xref: /linux/kernel/trace/trace.c (revision 136114e0abf03005e182d75761ab694648e6d388)
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * ring buffer based function tracer
4  *
5  * Copyright (C) 2007-2012 Steven Rostedt <srostedt@redhat.com>
6  * Copyright (C) 2008 Ingo Molnar <mingo@redhat.com>
7  *
8  * Originally taken from the RT patch by:
9  *    Arnaldo Carvalho de Melo <acme@redhat.com>
10  *
11  * Based on code from the latency_tracer, that is:
12  *  Copyright (C) 2004-2006 Ingo Molnar
13  *  Copyright (C) 2004 Nadia Yvette Chambers
14  */
15 #include <linux/ring_buffer.h>
16 #include <linux/utsname.h>
17 #include <linux/stacktrace.h>
18 #include <linux/writeback.h>
19 #include <linux/kallsyms.h>
20 #include <linux/security.h>
21 #include <linux/seq_file.h>
22 #include <linux/irqflags.h>
23 #include <linux/syscalls.h>
24 #include <linux/debugfs.h>
25 #include <linux/tracefs.h>
26 #include <linux/pagemap.h>
27 #include <linux/hardirq.h>
28 #include <linux/linkage.h>
29 #include <linux/uaccess.h>
30 #include <linux/cleanup.h>
31 #include <linux/vmalloc.h>
32 #include <linux/ftrace.h>
33 #include <linux/module.h>
34 #include <linux/percpu.h>
35 #include <linux/splice.h>
36 #include <linux/kdebug.h>
37 #include <linux/string.h>
38 #include <linux/mount.h>
39 #include <linux/rwsem.h>
40 #include <linux/slab.h>
41 #include <linux/ctype.h>
42 #include <linux/init.h>
43 #include <linux/panic_notifier.h>
44 #include <linux/poll.h>
45 #include <linux/nmi.h>
46 #include <linux/fs.h>
47 #include <linux/trace.h>
48 #include <linux/sched/clock.h>
49 #include <linux/sched/rt.h>
50 #include <linux/fsnotify.h>
51 #include <linux/irq_work.h>
52 #include <linux/workqueue.h>
53 #include <linux/sort.h>
54 #include <linux/io.h> /* vmap_page_range() */
55 #include <linux/fs_context.h>
56 
57 #include <asm/setup.h> /* COMMAND_LINE_SIZE */
58 
59 #include "trace.h"
60 #include "trace_output.h"
61 
62 #ifdef CONFIG_FTRACE_STARTUP_TEST
63 /*
64  * We need to change this state when a selftest is running.
65  * A selftest will lurk into the ring-buffer to count the
66  * entries inserted during the selftest although some concurrent
67  * insertions into the ring-buffer such as trace_printk could occurred
68  * at the same time, giving false positive or negative results.
69  */
70 static bool __read_mostly tracing_selftest_running;
71 
72 /*
73  * If boot-time tracing including tracers/events via kernel cmdline
74  * is running, we do not want to run SELFTEST.
75  */
76 bool __read_mostly tracing_selftest_disabled;
77 
78 void __init disable_tracing_selftest(const char *reason)
79 {
80 	if (!tracing_selftest_disabled) {
81 		tracing_selftest_disabled = true;
82 		pr_info("Ftrace startup test is disabled due to %s\n", reason);
83 	}
84 }
85 #else
86 #define tracing_selftest_running	0
87 #define tracing_selftest_disabled	0
88 #endif
89 
90 /* Pipe tracepoints to printk */
91 static struct trace_iterator *tracepoint_print_iter;
92 int tracepoint_printk;
93 static bool tracepoint_printk_stop_on_boot __initdata;
94 static bool traceoff_after_boot __initdata;
95 static DEFINE_STATIC_KEY_FALSE(tracepoint_printk_key);
96 
97 /* Store tracers and their flags per instance */
98 struct tracers {
99 	struct list_head	list;
100 	struct tracer		*tracer;
101 	struct tracer_flags	*flags;
102 };
103 
104 /*
105  * To prevent the comm cache from being overwritten when no
106  * tracing is active, only save the comm when a trace event
107  * occurred.
108  */
109 DEFINE_PER_CPU(bool, trace_taskinfo_save);
110 
111 /*
112  * Kill all tracing for good (never come back).
113  * It is initialized to 1 but will turn to zero if the initialization
114  * of the tracer is successful. But that is the only place that sets
115  * this back to zero.
116  */
117 static int tracing_disabled = 1;
118 
119 cpumask_var_t __read_mostly	tracing_buffer_mask;
120 
121 #define MAX_TRACER_SIZE		100
122 /*
123  * ftrace_dump_on_oops - variable to dump ftrace buffer on oops
124  *
125  * If there is an oops (or kernel panic) and the ftrace_dump_on_oops
126  * is set, then ftrace_dump is called. This will output the contents
127  * of the ftrace buffers to the console.  This is very useful for
128  * capturing traces that lead to crashes and outputting it to a
129  * serial console.
130  *
131  * It is default off, but you can enable it with either specifying
132  * "ftrace_dump_on_oops" in the kernel command line, or setting
133  * /proc/sys/kernel/ftrace_dump_on_oops
134  * Set 1 if you want to dump buffers of all CPUs
135  * Set 2 if you want to dump the buffer of the CPU that triggered oops
136  * Set instance name if you want to dump the specific trace instance
137  * Multiple instance dump is also supported, and instances are separated
138  * by commas.
139  */
140 /* Set to string format zero to disable by default */
141 static char ftrace_dump_on_oops[MAX_TRACER_SIZE] = "0";
142 
143 /* When set, tracing will stop when a WARN*() is hit */
144 static int __disable_trace_on_warning;
145 
146 int tracepoint_printk_sysctl(const struct ctl_table *table, int write,
147 			     void *buffer, size_t *lenp, loff_t *ppos);
148 static const struct ctl_table trace_sysctl_table[] = {
149 	{
150 		.procname	= "ftrace_dump_on_oops",
151 		.data		= &ftrace_dump_on_oops,
152 		.maxlen		= MAX_TRACER_SIZE,
153 		.mode		= 0644,
154 		.proc_handler	= proc_dostring,
155 	},
156 	{
157 		.procname	= "traceoff_on_warning",
158 		.data		= &__disable_trace_on_warning,
159 		.maxlen		= sizeof(__disable_trace_on_warning),
160 		.mode		= 0644,
161 		.proc_handler	= proc_dointvec,
162 	},
163 	{
164 		.procname	= "tracepoint_printk",
165 		.data		= &tracepoint_printk,
166 		.maxlen		= sizeof(tracepoint_printk),
167 		.mode		= 0644,
168 		.proc_handler	= tracepoint_printk_sysctl,
169 	},
170 };
171 
172 static int __init init_trace_sysctls(void)
173 {
174 	register_sysctl_init("kernel", trace_sysctl_table);
175 	return 0;
176 }
177 subsys_initcall(init_trace_sysctls);
178 
179 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
180 /* Map of enums to their values, for "eval_map" file */
181 struct trace_eval_map_head {
182 	struct module			*mod;
183 	unsigned long			length;
184 };
185 
186 union trace_eval_map_item;
187 
188 struct trace_eval_map_tail {
189 	/*
190 	 * "end" is first and points to NULL as it must be different
191 	 * than "mod" or "eval_string"
192 	 */
193 	union trace_eval_map_item	*next;
194 	const char			*end;	/* points to NULL */
195 };
196 
197 static DEFINE_MUTEX(trace_eval_mutex);
198 
199 /*
200  * The trace_eval_maps are saved in an array with two extra elements,
201  * one at the beginning, and one at the end. The beginning item contains
202  * the count of the saved maps (head.length), and the module they
203  * belong to if not built in (head.mod). The ending item contains a
204  * pointer to the next array of saved eval_map items.
205  */
206 union trace_eval_map_item {
207 	struct trace_eval_map		map;
208 	struct trace_eval_map_head	head;
209 	struct trace_eval_map_tail	tail;
210 };
211 
212 static union trace_eval_map_item *trace_eval_maps;
213 #endif /* CONFIG_TRACE_EVAL_MAP_FILE */
214 
215 int tracing_set_tracer(struct trace_array *tr, const char *buf);
216 static void ftrace_trace_userstack(struct trace_array *tr,
217 				   struct trace_buffer *buffer,
218 				   unsigned int trace_ctx);
219 
220 static char bootup_tracer_buf[MAX_TRACER_SIZE] __initdata;
221 static char *default_bootup_tracer;
222 
223 static bool allocate_snapshot;
224 static bool snapshot_at_boot;
225 
226 static char boot_instance_info[COMMAND_LINE_SIZE] __initdata;
227 static int boot_instance_index;
228 
229 static char boot_snapshot_info[COMMAND_LINE_SIZE] __initdata;
230 static int boot_snapshot_index;
231 
232 static int __init set_cmdline_ftrace(char *str)
233 {
234 	strscpy(bootup_tracer_buf, str, MAX_TRACER_SIZE);
235 	default_bootup_tracer = bootup_tracer_buf;
236 	/* We are using ftrace early, expand it */
237 	trace_set_ring_buffer_expanded(NULL);
238 	return 1;
239 }
240 __setup("ftrace=", set_cmdline_ftrace);
241 
242 int ftrace_dump_on_oops_enabled(void)
243 {
244 	if (!strcmp("0", ftrace_dump_on_oops))
245 		return 0;
246 	else
247 		return 1;
248 }
249 
250 static int __init set_ftrace_dump_on_oops(char *str)
251 {
252 	if (!*str) {
253 		strscpy(ftrace_dump_on_oops, "1", MAX_TRACER_SIZE);
254 		return 1;
255 	}
256 
257 	if (*str == ',') {
258 		strscpy(ftrace_dump_on_oops, "1", MAX_TRACER_SIZE);
259 		strscpy(ftrace_dump_on_oops + 1, str, MAX_TRACER_SIZE - 1);
260 		return 1;
261 	}
262 
263 	if (*str++ == '=') {
264 		strscpy(ftrace_dump_on_oops, str, MAX_TRACER_SIZE);
265 		return 1;
266 	}
267 
268 	return 0;
269 }
270 __setup("ftrace_dump_on_oops", set_ftrace_dump_on_oops);
271 
272 static int __init stop_trace_on_warning(char *str)
273 {
274 	if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
275 		__disable_trace_on_warning = 1;
276 	return 1;
277 }
278 __setup("traceoff_on_warning", stop_trace_on_warning);
279 
280 static int __init boot_alloc_snapshot(char *str)
281 {
282 	char *slot = boot_snapshot_info + boot_snapshot_index;
283 	int left = sizeof(boot_snapshot_info) - boot_snapshot_index;
284 	int ret;
285 
286 	if (str[0] == '=') {
287 		str++;
288 		if (strlen(str) >= left)
289 			return -1;
290 
291 		ret = snprintf(slot, left, "%s\t", str);
292 		boot_snapshot_index += ret;
293 	} else {
294 		allocate_snapshot = true;
295 		/* We also need the main ring buffer expanded */
296 		trace_set_ring_buffer_expanded(NULL);
297 	}
298 	return 1;
299 }
300 __setup("alloc_snapshot", boot_alloc_snapshot);
301 
302 
303 static int __init boot_snapshot(char *str)
304 {
305 	snapshot_at_boot = true;
306 	boot_alloc_snapshot(str);
307 	return 1;
308 }
309 __setup("ftrace_boot_snapshot", boot_snapshot);
310 
311 
312 static int __init boot_instance(char *str)
313 {
314 	char *slot = boot_instance_info + boot_instance_index;
315 	int left = sizeof(boot_instance_info) - boot_instance_index;
316 	int ret;
317 
318 	if (strlen(str) >= left)
319 		return -1;
320 
321 	ret = snprintf(slot, left, "%s\t", str);
322 	boot_instance_index += ret;
323 
324 	return 1;
325 }
326 __setup("trace_instance=", boot_instance);
327 
328 
329 static char trace_boot_options_buf[MAX_TRACER_SIZE] __initdata;
330 
331 static int __init set_trace_boot_options(char *str)
332 {
333 	strscpy(trace_boot_options_buf, str, MAX_TRACER_SIZE);
334 	return 1;
335 }
336 __setup("trace_options=", set_trace_boot_options);
337 
338 static char trace_boot_clock_buf[MAX_TRACER_SIZE] __initdata;
339 static char *trace_boot_clock __initdata;
340 
341 static int __init set_trace_boot_clock(char *str)
342 {
343 	strscpy(trace_boot_clock_buf, str, MAX_TRACER_SIZE);
344 	trace_boot_clock = trace_boot_clock_buf;
345 	return 1;
346 }
347 __setup("trace_clock=", set_trace_boot_clock);
348 
349 static int __init set_tracepoint_printk(char *str)
350 {
351 	/* Ignore the "tp_printk_stop_on_boot" param */
352 	if (*str == '_')
353 		return 0;
354 
355 	if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
356 		tracepoint_printk = 1;
357 	return 1;
358 }
359 __setup("tp_printk", set_tracepoint_printk);
360 
361 static int __init set_tracepoint_printk_stop(char *str)
362 {
363 	tracepoint_printk_stop_on_boot = true;
364 	return 1;
365 }
366 __setup("tp_printk_stop_on_boot", set_tracepoint_printk_stop);
367 
368 static int __init set_traceoff_after_boot(char *str)
369 {
370 	traceoff_after_boot = true;
371 	return 1;
372 }
373 __setup("traceoff_after_boot", set_traceoff_after_boot);
374 
375 unsigned long long ns2usecs(u64 nsec)
376 {
377 	nsec += 500;
378 	do_div(nsec, 1000);
379 	return nsec;
380 }
381 
382 static void
383 trace_process_export(struct trace_export *export,
384 	       struct ring_buffer_event *event, int flag)
385 {
386 	struct trace_entry *entry;
387 	unsigned int size = 0;
388 
389 	if (export->flags & flag) {
390 		entry = ring_buffer_event_data(event);
391 		size = ring_buffer_event_length(event);
392 		export->write(export, entry, size);
393 	}
394 }
395 
396 static DEFINE_MUTEX(ftrace_export_lock);
397 
398 static struct trace_export __rcu *ftrace_exports_list __read_mostly;
399 
400 static DEFINE_STATIC_KEY_FALSE(trace_function_exports_enabled);
401 static DEFINE_STATIC_KEY_FALSE(trace_event_exports_enabled);
402 static DEFINE_STATIC_KEY_FALSE(trace_marker_exports_enabled);
403 
404 static inline void ftrace_exports_enable(struct trace_export *export)
405 {
406 	if (export->flags & TRACE_EXPORT_FUNCTION)
407 		static_branch_inc(&trace_function_exports_enabled);
408 
409 	if (export->flags & TRACE_EXPORT_EVENT)
410 		static_branch_inc(&trace_event_exports_enabled);
411 
412 	if (export->flags & TRACE_EXPORT_MARKER)
413 		static_branch_inc(&trace_marker_exports_enabled);
414 }
415 
416 static inline void ftrace_exports_disable(struct trace_export *export)
417 {
418 	if (export->flags & TRACE_EXPORT_FUNCTION)
419 		static_branch_dec(&trace_function_exports_enabled);
420 
421 	if (export->flags & TRACE_EXPORT_EVENT)
422 		static_branch_dec(&trace_event_exports_enabled);
423 
424 	if (export->flags & TRACE_EXPORT_MARKER)
425 		static_branch_dec(&trace_marker_exports_enabled);
426 }
427 
428 static void ftrace_exports(struct ring_buffer_event *event, int flag)
429 {
430 	struct trace_export *export;
431 
432 	guard(preempt_notrace)();
433 
434 	export = rcu_dereference_raw_check(ftrace_exports_list);
435 	while (export) {
436 		trace_process_export(export, event, flag);
437 		export = rcu_dereference_raw_check(export->next);
438 	}
439 }
440 
441 static inline void
442 add_trace_export(struct trace_export **list, struct trace_export *export)
443 {
444 	rcu_assign_pointer(export->next, *list);
445 	/*
446 	 * We are entering export into the list but another
447 	 * CPU might be walking that list. We need to make sure
448 	 * the export->next pointer is valid before another CPU sees
449 	 * the export pointer included into the list.
450 	 */
451 	rcu_assign_pointer(*list, export);
452 }
453 
454 static inline int
455 rm_trace_export(struct trace_export **list, struct trace_export *export)
456 {
457 	struct trace_export **p;
458 
459 	for (p = list; *p != NULL; p = &(*p)->next)
460 		if (*p == export)
461 			break;
462 
463 	if (*p != export)
464 		return -1;
465 
466 	rcu_assign_pointer(*p, (*p)->next);
467 
468 	return 0;
469 }
470 
471 static inline void
472 add_ftrace_export(struct trace_export **list, struct trace_export *export)
473 {
474 	ftrace_exports_enable(export);
475 
476 	add_trace_export(list, export);
477 }
478 
479 static inline int
480 rm_ftrace_export(struct trace_export **list, struct trace_export *export)
481 {
482 	int ret;
483 
484 	ret = rm_trace_export(list, export);
485 	ftrace_exports_disable(export);
486 
487 	return ret;
488 }
489 
490 int register_ftrace_export(struct trace_export *export)
491 {
492 	if (WARN_ON_ONCE(!export->write))
493 		return -1;
494 
495 	guard(mutex)(&ftrace_export_lock);
496 
497 	add_ftrace_export(&ftrace_exports_list, export);
498 
499 	return 0;
500 }
501 EXPORT_SYMBOL_GPL(register_ftrace_export);
502 
503 int unregister_ftrace_export(struct trace_export *export)
504 {
505 	guard(mutex)(&ftrace_export_lock);
506 	return rm_ftrace_export(&ftrace_exports_list, export);
507 }
508 EXPORT_SYMBOL_GPL(unregister_ftrace_export);
509 
510 /* trace_flags holds trace_options default values */
511 #define TRACE_DEFAULT_FLAGS						\
512 	(FUNCTION_DEFAULT_FLAGS | FPROFILE_DEFAULT_FLAGS |		\
513 	 TRACE_ITER(PRINT_PARENT) | TRACE_ITER(PRINTK) |		\
514 	 TRACE_ITER(ANNOTATE) | TRACE_ITER(CONTEXT_INFO) |		\
515 	 TRACE_ITER(RECORD_CMD) | TRACE_ITER(OVERWRITE) |		\
516 	 TRACE_ITER(IRQ_INFO) | TRACE_ITER(MARKERS) |			\
517 	 TRACE_ITER(HASH_PTR) | TRACE_ITER(TRACE_PRINTK) |		\
518 	 TRACE_ITER(COPY_MARKER))
519 
520 /* trace_options that are only supported by global_trace */
521 #define TOP_LEVEL_TRACE_FLAGS (TRACE_ITER(PRINTK) |			\
522 	       TRACE_ITER(PRINTK_MSGONLY) | TRACE_ITER(RECORD_CMD) |	\
523 	       TRACE_ITER(PROF_TEXT_OFFSET) | FPROFILE_DEFAULT_FLAGS)
524 
525 /* trace_flags that are default zero for instances */
526 #define ZEROED_TRACE_FLAGS \
527 	(TRACE_ITER(EVENT_FORK) | TRACE_ITER(FUNC_FORK) | TRACE_ITER(TRACE_PRINTK) | \
528 	 TRACE_ITER(COPY_MARKER))
529 
530 /*
531  * The global_trace is the descriptor that holds the top-level tracing
532  * buffers for the live tracing.
533  */
534 static struct trace_array global_trace = {
535 	.trace_flags = TRACE_DEFAULT_FLAGS,
536 };
537 
538 static struct trace_array *printk_trace = &global_trace;
539 
540 /* List of trace_arrays interested in the top level trace_marker */
541 static LIST_HEAD(marker_copies);
542 
543 static __always_inline bool printk_binsafe(struct trace_array *tr)
544 {
545 	/*
546 	 * The binary format of traceprintk can cause a crash if used
547 	 * by a buffer from another boot. Force the use of the
548 	 * non binary version of trace_printk if the trace_printk
549 	 * buffer is a boot mapped ring buffer.
550 	 */
551 	return !(tr->flags & TRACE_ARRAY_FL_BOOT);
552 }
553 
554 static void update_printk_trace(struct trace_array *tr)
555 {
556 	if (printk_trace == tr)
557 		return;
558 
559 	printk_trace->trace_flags &= ~TRACE_ITER(TRACE_PRINTK);
560 	printk_trace = tr;
561 	tr->trace_flags |= TRACE_ITER(TRACE_PRINTK);
562 }
563 
564 /* Returns true if the status of tr changed */
565 static bool update_marker_trace(struct trace_array *tr, int enabled)
566 {
567 	lockdep_assert_held(&event_mutex);
568 
569 	if (enabled) {
570 		if (!list_empty(&tr->marker_list))
571 			return false;
572 
573 		list_add_rcu(&tr->marker_list, &marker_copies);
574 		tr->trace_flags |= TRACE_ITER(COPY_MARKER);
575 		return true;
576 	}
577 
578 	if (list_empty(&tr->marker_list))
579 		return false;
580 
581 	list_del_init(&tr->marker_list);
582 	tr->trace_flags &= ~TRACE_ITER(COPY_MARKER);
583 	return true;
584 }
585 
586 void trace_set_ring_buffer_expanded(struct trace_array *tr)
587 {
588 	if (!tr)
589 		tr = &global_trace;
590 	tr->ring_buffer_expanded = true;
591 }
592 
593 LIST_HEAD(ftrace_trace_arrays);
594 
595 int trace_array_get(struct trace_array *this_tr)
596 {
597 	struct trace_array *tr;
598 
599 	guard(mutex)(&trace_types_lock);
600 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
601 		if (tr == this_tr) {
602 			tr->ref++;
603 			return 0;
604 		}
605 	}
606 
607 	return -ENODEV;
608 }
609 
610 static void __trace_array_put(struct trace_array *this_tr)
611 {
612 	WARN_ON(!this_tr->ref);
613 	this_tr->ref--;
614 }
615 
616 /**
617  * trace_array_put - Decrement the reference counter for this trace array.
618  * @this_tr : pointer to the trace array
619  *
620  * NOTE: Use this when we no longer need the trace array returned by
621  * trace_array_get_by_name(). This ensures the trace array can be later
622  * destroyed.
623  *
624  */
625 void trace_array_put(struct trace_array *this_tr)
626 {
627 	if (!this_tr)
628 		return;
629 
630 	guard(mutex)(&trace_types_lock);
631 	__trace_array_put(this_tr);
632 }
633 EXPORT_SYMBOL_GPL(trace_array_put);
634 
635 int tracing_check_open_get_tr(struct trace_array *tr)
636 {
637 	int ret;
638 
639 	ret = security_locked_down(LOCKDOWN_TRACEFS);
640 	if (ret)
641 		return ret;
642 
643 	if (tracing_disabled)
644 		return -ENODEV;
645 
646 	if (tr && trace_array_get(tr) < 0)
647 		return -ENODEV;
648 
649 	return 0;
650 }
651 
652 /**
653  * trace_find_filtered_pid - check if a pid exists in a filtered_pid list
654  * @filtered_pids: The list of pids to check
655  * @search_pid: The PID to find in @filtered_pids
656  *
657  * Returns true if @search_pid is found in @filtered_pids, and false otherwise.
658  */
659 bool
660 trace_find_filtered_pid(struct trace_pid_list *filtered_pids, pid_t search_pid)
661 {
662 	return trace_pid_list_is_set(filtered_pids, search_pid);
663 }
664 
665 /**
666  * trace_ignore_this_task - should a task be ignored for tracing
667  * @filtered_pids: The list of pids to check
668  * @filtered_no_pids: The list of pids not to be traced
669  * @task: The task that should be ignored if not filtered
670  *
671  * Checks if @task should be traced or not from @filtered_pids.
672  * Returns true if @task should *NOT* be traced.
673  * Returns false if @task should be traced.
674  */
675 bool
676 trace_ignore_this_task(struct trace_pid_list *filtered_pids,
677 		       struct trace_pid_list *filtered_no_pids,
678 		       struct task_struct *task)
679 {
680 	/*
681 	 * If filtered_no_pids is not empty, and the task's pid is listed
682 	 * in filtered_no_pids, then return true.
683 	 * Otherwise, if filtered_pids is empty, that means we can
684 	 * trace all tasks. If it has content, then only trace pids
685 	 * within filtered_pids.
686 	 */
687 
688 	return (filtered_pids &&
689 		!trace_find_filtered_pid(filtered_pids, task->pid)) ||
690 		(filtered_no_pids &&
691 		 trace_find_filtered_pid(filtered_no_pids, task->pid));
692 }
693 
694 /**
695  * trace_filter_add_remove_task - Add or remove a task from a pid_list
696  * @pid_list: The list to modify
697  * @self: The current task for fork or NULL for exit
698  * @task: The task to add or remove
699  *
700  * If adding a task, if @self is defined, the task is only added if @self
701  * is also included in @pid_list. This happens on fork and tasks should
702  * only be added when the parent is listed. If @self is NULL, then the
703  * @task pid will be removed from the list, which would happen on exit
704  * of a task.
705  */
706 void trace_filter_add_remove_task(struct trace_pid_list *pid_list,
707 				  struct task_struct *self,
708 				  struct task_struct *task)
709 {
710 	if (!pid_list)
711 		return;
712 
713 	/* For forks, we only add if the forking task is listed */
714 	if (self) {
715 		if (!trace_find_filtered_pid(pid_list, self->pid))
716 			return;
717 	}
718 
719 	/* "self" is set for forks, and NULL for exits */
720 	if (self)
721 		trace_pid_list_set(pid_list, task->pid);
722 	else
723 		trace_pid_list_clear(pid_list, task->pid);
724 }
725 
726 /**
727  * trace_pid_next - Used for seq_file to get to the next pid of a pid_list
728  * @pid_list: The pid list to show
729  * @v: The last pid that was shown (+1 the actual pid to let zero be displayed)
730  * @pos: The position of the file
731  *
732  * This is used by the seq_file "next" operation to iterate the pids
733  * listed in a trace_pid_list structure.
734  *
735  * Returns the pid+1 as we want to display pid of zero, but NULL would
736  * stop the iteration.
737  */
738 void *trace_pid_next(struct trace_pid_list *pid_list, void *v, loff_t *pos)
739 {
740 	long pid = (unsigned long)v;
741 	unsigned int next;
742 
743 	(*pos)++;
744 
745 	/* pid already is +1 of the actual previous bit */
746 	if (trace_pid_list_next(pid_list, pid, &next) < 0)
747 		return NULL;
748 
749 	pid = next;
750 
751 	/* Return pid + 1 to allow zero to be represented */
752 	return (void *)(pid + 1);
753 }
754 
755 /**
756  * trace_pid_start - Used for seq_file to start reading pid lists
757  * @pid_list: The pid list to show
758  * @pos: The position of the file
759  *
760  * This is used by seq_file "start" operation to start the iteration
761  * of listing pids.
762  *
763  * Returns the pid+1 as we want to display pid of zero, but NULL would
764  * stop the iteration.
765  */
766 void *trace_pid_start(struct trace_pid_list *pid_list, loff_t *pos)
767 {
768 	unsigned long pid;
769 	unsigned int first;
770 	loff_t l = 0;
771 
772 	if (trace_pid_list_first(pid_list, &first) < 0)
773 		return NULL;
774 
775 	pid = first;
776 
777 	/* Return pid + 1 so that zero can be the exit value */
778 	for (pid++; pid && l < *pos;
779 	     pid = (unsigned long)trace_pid_next(pid_list, (void *)pid, &l))
780 		;
781 	return (void *)pid;
782 }
783 
784 /**
785  * trace_pid_show - show the current pid in seq_file processing
786  * @m: The seq_file structure to write into
787  * @v: A void pointer of the pid (+1) value to display
788  *
789  * Can be directly used by seq_file operations to display the current
790  * pid value.
791  */
792 int trace_pid_show(struct seq_file *m, void *v)
793 {
794 	unsigned long pid = (unsigned long)v - 1;
795 
796 	seq_printf(m, "%lu\n", pid);
797 	return 0;
798 }
799 
800 /* 128 should be much more than enough */
801 #define PID_BUF_SIZE		127
802 
803 int trace_pid_write(struct trace_pid_list *filtered_pids,
804 		    struct trace_pid_list **new_pid_list,
805 		    const char __user *ubuf, size_t cnt)
806 {
807 	struct trace_pid_list *pid_list;
808 	struct trace_parser parser;
809 	unsigned long val;
810 	int nr_pids = 0;
811 	ssize_t read = 0;
812 	ssize_t ret;
813 	loff_t pos;
814 	pid_t pid;
815 
816 	if (trace_parser_get_init(&parser, PID_BUF_SIZE + 1))
817 		return -ENOMEM;
818 
819 	/*
820 	 * Always recreate a new array. The write is an all or nothing
821 	 * operation. Always create a new array when adding new pids by
822 	 * the user. If the operation fails, then the current list is
823 	 * not modified.
824 	 */
825 	pid_list = trace_pid_list_alloc();
826 	if (!pid_list) {
827 		trace_parser_put(&parser);
828 		return -ENOMEM;
829 	}
830 
831 	if (filtered_pids) {
832 		/* copy the current bits to the new max */
833 		ret = trace_pid_list_first(filtered_pids, &pid);
834 		while (!ret) {
835 			ret = trace_pid_list_set(pid_list, pid);
836 			if (ret < 0)
837 				goto out;
838 
839 			ret = trace_pid_list_next(filtered_pids, pid + 1, &pid);
840 			nr_pids++;
841 		}
842 	}
843 
844 	ret = 0;
845 	while (cnt > 0) {
846 
847 		pos = 0;
848 
849 		ret = trace_get_user(&parser, ubuf, cnt, &pos);
850 		if (ret < 0)
851 			break;
852 
853 		read += ret;
854 		ubuf += ret;
855 		cnt -= ret;
856 
857 		if (!trace_parser_loaded(&parser))
858 			break;
859 
860 		ret = -EINVAL;
861 		if (kstrtoul(parser.buffer, 0, &val))
862 			break;
863 
864 		pid = (pid_t)val;
865 
866 		if (trace_pid_list_set(pid_list, pid) < 0) {
867 			ret = -1;
868 			break;
869 		}
870 		nr_pids++;
871 
872 		trace_parser_clear(&parser);
873 		ret = 0;
874 	}
875  out:
876 	trace_parser_put(&parser);
877 
878 	if (ret < 0) {
879 		trace_pid_list_free(pid_list);
880 		return ret;
881 	}
882 
883 	if (!nr_pids) {
884 		/* Cleared the list of pids */
885 		trace_pid_list_free(pid_list);
886 		pid_list = NULL;
887 	}
888 
889 	*new_pid_list = pid_list;
890 
891 	return read;
892 }
893 
894 static u64 buffer_ftrace_now(struct array_buffer *buf, int cpu)
895 {
896 	u64 ts;
897 
898 	/* Early boot up does not have a buffer yet */
899 	if (!buf->buffer)
900 		return trace_clock_local();
901 
902 	ts = ring_buffer_time_stamp(buf->buffer);
903 	ring_buffer_normalize_time_stamp(buf->buffer, cpu, &ts);
904 
905 	return ts;
906 }
907 
908 u64 ftrace_now(int cpu)
909 {
910 	return buffer_ftrace_now(&global_trace.array_buffer, cpu);
911 }
912 
913 /**
914  * tracing_is_enabled - Show if global_trace has been enabled
915  *
916  * Shows if the global trace has been enabled or not. It uses the
917  * mirror flag "buffer_disabled" to be used in fast paths such as for
918  * the irqsoff tracer. But it may be inaccurate due to races. If you
919  * need to know the accurate state, use tracing_is_on() which is a little
920  * slower, but accurate.
921  */
922 int tracing_is_enabled(void)
923 {
924 	/*
925 	 * For quick access (irqsoff uses this in fast path), just
926 	 * return the mirror variable of the state of the ring buffer.
927 	 * It's a little racy, but we don't really care.
928 	 */
929 	return !global_trace.buffer_disabled;
930 }
931 
932 /*
933  * trace_buf_size is the size in bytes that is allocated
934  * for a buffer. Note, the number of bytes is always rounded
935  * to page size.
936  *
937  * This number is purposely set to a low number of 16384.
938  * If the dump on oops happens, it will be much appreciated
939  * to not have to wait for all that output. Anyway this can be
940  * boot time and run time configurable.
941  */
942 #define TRACE_BUF_SIZE_DEFAULT	1441792UL /* 16384 * 88 (sizeof(entry)) */
943 
944 static unsigned long		trace_buf_size = TRACE_BUF_SIZE_DEFAULT;
945 
946 /* trace_types holds a link list of available tracers. */
947 static struct tracer		*trace_types __read_mostly;
948 
949 /*
950  * trace_types_lock is used to protect the trace_types list.
951  */
952 DEFINE_MUTEX(trace_types_lock);
953 
954 /*
955  * serialize the access of the ring buffer
956  *
957  * ring buffer serializes readers, but it is low level protection.
958  * The validity of the events (which returns by ring_buffer_peek() ..etc)
959  * are not protected by ring buffer.
960  *
961  * The content of events may become garbage if we allow other process consumes
962  * these events concurrently:
963  *   A) the page of the consumed events may become a normal page
964  *      (not reader page) in ring buffer, and this page will be rewritten
965  *      by events producer.
966  *   B) The page of the consumed events may become a page for splice_read,
967  *      and this page will be returned to system.
968  *
969  * These primitives allow multi process access to different cpu ring buffer
970  * concurrently.
971  *
972  * These primitives don't distinguish read-only and read-consume access.
973  * Multi read-only access are also serialized.
974  */
975 
976 #ifdef CONFIG_SMP
977 static DECLARE_RWSEM(all_cpu_access_lock);
978 static DEFINE_PER_CPU(struct mutex, cpu_access_lock);
979 
980 static inline void trace_access_lock(int cpu)
981 {
982 	if (cpu == RING_BUFFER_ALL_CPUS) {
983 		/* gain it for accessing the whole ring buffer. */
984 		down_write(&all_cpu_access_lock);
985 	} else {
986 		/* gain it for accessing a cpu ring buffer. */
987 
988 		/* Firstly block other trace_access_lock(RING_BUFFER_ALL_CPUS). */
989 		down_read(&all_cpu_access_lock);
990 
991 		/* Secondly block other access to this @cpu ring buffer. */
992 		mutex_lock(&per_cpu(cpu_access_lock, cpu));
993 	}
994 }
995 
996 static inline void trace_access_unlock(int cpu)
997 {
998 	if (cpu == RING_BUFFER_ALL_CPUS) {
999 		up_write(&all_cpu_access_lock);
1000 	} else {
1001 		mutex_unlock(&per_cpu(cpu_access_lock, cpu));
1002 		up_read(&all_cpu_access_lock);
1003 	}
1004 }
1005 
1006 static inline void trace_access_lock_init(void)
1007 {
1008 	int cpu;
1009 
1010 	for_each_possible_cpu(cpu)
1011 		mutex_init(&per_cpu(cpu_access_lock, cpu));
1012 }
1013 
1014 #else
1015 
1016 static DEFINE_MUTEX(access_lock);
1017 
1018 static inline void trace_access_lock(int cpu)
1019 {
1020 	(void)cpu;
1021 	mutex_lock(&access_lock);
1022 }
1023 
1024 static inline void trace_access_unlock(int cpu)
1025 {
1026 	(void)cpu;
1027 	mutex_unlock(&access_lock);
1028 }
1029 
1030 static inline void trace_access_lock_init(void)
1031 {
1032 }
1033 
1034 #endif
1035 
1036 #ifdef CONFIG_STACKTRACE
1037 static void __ftrace_trace_stack(struct trace_array *tr,
1038 				 struct trace_buffer *buffer,
1039 				 unsigned int trace_ctx,
1040 				 int skip, struct pt_regs *regs);
1041 static inline void ftrace_trace_stack(struct trace_array *tr,
1042 				      struct trace_buffer *buffer,
1043 				      unsigned int trace_ctx,
1044 				      int skip, struct pt_regs *regs);
1045 
1046 #else
1047 static inline void __ftrace_trace_stack(struct trace_array *tr,
1048 					struct trace_buffer *buffer,
1049 					unsigned int trace_ctx,
1050 					int skip, struct pt_regs *regs)
1051 {
1052 }
1053 static inline void ftrace_trace_stack(struct trace_array *tr,
1054 				      struct trace_buffer *buffer,
1055 				      unsigned long trace_ctx,
1056 				      int skip, struct pt_regs *regs)
1057 {
1058 }
1059 
1060 #endif
1061 
1062 static __always_inline void
1063 trace_event_setup(struct ring_buffer_event *event,
1064 		  int type, unsigned int trace_ctx)
1065 {
1066 	struct trace_entry *ent = ring_buffer_event_data(event);
1067 
1068 	tracing_generic_entry_update(ent, type, trace_ctx);
1069 }
1070 
1071 static __always_inline struct ring_buffer_event *
1072 __trace_buffer_lock_reserve(struct trace_buffer *buffer,
1073 			  int type,
1074 			  unsigned long len,
1075 			  unsigned int trace_ctx)
1076 {
1077 	struct ring_buffer_event *event;
1078 
1079 	event = ring_buffer_lock_reserve(buffer, len);
1080 	if (event != NULL)
1081 		trace_event_setup(event, type, trace_ctx);
1082 
1083 	return event;
1084 }
1085 
1086 void tracer_tracing_on(struct trace_array *tr)
1087 {
1088 	if (tr->array_buffer.buffer)
1089 		ring_buffer_record_on(tr->array_buffer.buffer);
1090 	/*
1091 	 * This flag is looked at when buffers haven't been allocated
1092 	 * yet, or by some tracers (like irqsoff), that just want to
1093 	 * know if the ring buffer has been disabled, but it can handle
1094 	 * races of where it gets disabled but we still do a record.
1095 	 * As the check is in the fast path of the tracers, it is more
1096 	 * important to be fast than accurate.
1097 	 */
1098 	tr->buffer_disabled = 0;
1099 }
1100 
1101 /**
1102  * tracing_on - enable tracing buffers
1103  *
1104  * This function enables tracing buffers that may have been
1105  * disabled with tracing_off.
1106  */
1107 void tracing_on(void)
1108 {
1109 	tracer_tracing_on(&global_trace);
1110 }
1111 EXPORT_SYMBOL_GPL(tracing_on);
1112 
1113 
1114 static __always_inline void
1115 __buffer_unlock_commit(struct trace_buffer *buffer, struct ring_buffer_event *event)
1116 {
1117 	__this_cpu_write(trace_taskinfo_save, true);
1118 
1119 	/* If this is the temp buffer, we need to commit fully */
1120 	if (this_cpu_read(trace_buffered_event) == event) {
1121 		/* Length is in event->array[0] */
1122 		ring_buffer_write(buffer, event->array[0], &event->array[1]);
1123 		/* Release the temp buffer */
1124 		this_cpu_dec(trace_buffered_event_cnt);
1125 		/* ring_buffer_unlock_commit() enables preemption */
1126 		preempt_enable_notrace();
1127 	} else
1128 		ring_buffer_unlock_commit(buffer);
1129 }
1130 
1131 int __trace_array_puts(struct trace_array *tr, unsigned long ip,
1132 		       const char *str, int size)
1133 {
1134 	struct ring_buffer_event *event;
1135 	struct trace_buffer *buffer;
1136 	struct print_entry *entry;
1137 	unsigned int trace_ctx;
1138 	int alloc;
1139 
1140 	if (!(tr->trace_flags & TRACE_ITER(PRINTK)))
1141 		return 0;
1142 
1143 	if (unlikely(tracing_selftest_running && tr == &global_trace))
1144 		return 0;
1145 
1146 	if (unlikely(tracing_disabled))
1147 		return 0;
1148 
1149 	alloc = sizeof(*entry) + size + 2; /* possible \n added */
1150 
1151 	trace_ctx = tracing_gen_ctx();
1152 	buffer = tr->array_buffer.buffer;
1153 	guard(ring_buffer_nest)(buffer);
1154 	event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, alloc,
1155 					    trace_ctx);
1156 	if (!event)
1157 		return 0;
1158 
1159 	entry = ring_buffer_event_data(event);
1160 	entry->ip = ip;
1161 
1162 	memcpy(&entry->buf, str, size);
1163 
1164 	/* Add a newline if necessary */
1165 	if (entry->buf[size - 1] != '\n') {
1166 		entry->buf[size] = '\n';
1167 		entry->buf[size + 1] = '\0';
1168 	} else
1169 		entry->buf[size] = '\0';
1170 
1171 	__buffer_unlock_commit(buffer, event);
1172 	ftrace_trace_stack(tr, buffer, trace_ctx, 4, NULL);
1173 	return size;
1174 }
1175 EXPORT_SYMBOL_GPL(__trace_array_puts);
1176 
1177 /**
1178  * __trace_puts - write a constant string into the trace buffer.
1179  * @ip:	   The address of the caller
1180  * @str:   The constant string to write
1181  */
1182 int __trace_puts(unsigned long ip, const char *str)
1183 {
1184 	return __trace_array_puts(printk_trace, ip, str, strlen(str));
1185 }
1186 EXPORT_SYMBOL_GPL(__trace_puts);
1187 
1188 /**
1189  * __trace_bputs - write the pointer to a constant string into trace buffer
1190  * @ip:	   The address of the caller
1191  * @str:   The constant string to write to the buffer to
1192  */
1193 int __trace_bputs(unsigned long ip, const char *str)
1194 {
1195 	struct trace_array *tr = READ_ONCE(printk_trace);
1196 	struct ring_buffer_event *event;
1197 	struct trace_buffer *buffer;
1198 	struct bputs_entry *entry;
1199 	unsigned int trace_ctx;
1200 	int size = sizeof(struct bputs_entry);
1201 
1202 	if (!printk_binsafe(tr))
1203 		return __trace_puts(ip, str);
1204 
1205 	if (!(tr->trace_flags & TRACE_ITER(PRINTK)))
1206 		return 0;
1207 
1208 	if (unlikely(tracing_selftest_running || tracing_disabled))
1209 		return 0;
1210 
1211 	trace_ctx = tracing_gen_ctx();
1212 	buffer = tr->array_buffer.buffer;
1213 
1214 	guard(ring_buffer_nest)(buffer);
1215 	event = __trace_buffer_lock_reserve(buffer, TRACE_BPUTS, size,
1216 					    trace_ctx);
1217 	if (!event)
1218 		return 0;
1219 
1220 	entry = ring_buffer_event_data(event);
1221 	entry->ip			= ip;
1222 	entry->str			= str;
1223 
1224 	__buffer_unlock_commit(buffer, event);
1225 	ftrace_trace_stack(tr, buffer, trace_ctx, 4, NULL);
1226 
1227 	return 1;
1228 }
1229 EXPORT_SYMBOL_GPL(__trace_bputs);
1230 
1231 #ifdef CONFIG_TRACER_SNAPSHOT
1232 static void tracing_snapshot_instance_cond(struct trace_array *tr,
1233 					   void *cond_data)
1234 {
1235 	struct tracer *tracer = tr->current_trace;
1236 	unsigned long flags;
1237 
1238 	if (in_nmi()) {
1239 		trace_array_puts(tr, "*** SNAPSHOT CALLED FROM NMI CONTEXT ***\n");
1240 		trace_array_puts(tr, "*** snapshot is being ignored        ***\n");
1241 		return;
1242 	}
1243 
1244 	if (!tr->allocated_snapshot) {
1245 		trace_array_puts(tr, "*** SNAPSHOT NOT ALLOCATED ***\n");
1246 		trace_array_puts(tr, "*** stopping trace here!   ***\n");
1247 		tracer_tracing_off(tr);
1248 		return;
1249 	}
1250 
1251 	/* Note, snapshot can not be used when the tracer uses it */
1252 	if (tracer->use_max_tr) {
1253 		trace_array_puts(tr, "*** LATENCY TRACER ACTIVE ***\n");
1254 		trace_array_puts(tr, "*** Can not use snapshot (sorry) ***\n");
1255 		return;
1256 	}
1257 
1258 	if (tr->mapped) {
1259 		trace_array_puts(tr, "*** BUFFER MEMORY MAPPED ***\n");
1260 		trace_array_puts(tr, "*** Can not use snapshot (sorry) ***\n");
1261 		return;
1262 	}
1263 
1264 	local_irq_save(flags);
1265 	update_max_tr(tr, current, smp_processor_id(), cond_data);
1266 	local_irq_restore(flags);
1267 }
1268 
1269 void tracing_snapshot_instance(struct trace_array *tr)
1270 {
1271 	tracing_snapshot_instance_cond(tr, NULL);
1272 }
1273 
1274 /**
1275  * tracing_snapshot - take a snapshot of the current buffer.
1276  *
1277  * This causes a swap between the snapshot buffer and the current live
1278  * tracing buffer. You can use this to take snapshots of the live
1279  * trace when some condition is triggered, but continue to trace.
1280  *
1281  * Note, make sure to allocate the snapshot with either
1282  * a tracing_snapshot_alloc(), or by doing it manually
1283  * with: echo 1 > /sys/kernel/tracing/snapshot
1284  *
1285  * If the snapshot buffer is not allocated, it will stop tracing.
1286  * Basically making a permanent snapshot.
1287  */
1288 void tracing_snapshot(void)
1289 {
1290 	struct trace_array *tr = &global_trace;
1291 
1292 	tracing_snapshot_instance(tr);
1293 }
1294 EXPORT_SYMBOL_GPL(tracing_snapshot);
1295 
1296 /**
1297  * tracing_snapshot_cond - conditionally take a snapshot of the current buffer.
1298  * @tr:		The tracing instance to snapshot
1299  * @cond_data:	The data to be tested conditionally, and possibly saved
1300  *
1301  * This is the same as tracing_snapshot() except that the snapshot is
1302  * conditional - the snapshot will only happen if the
1303  * cond_snapshot.update() implementation receiving the cond_data
1304  * returns true, which means that the trace array's cond_snapshot
1305  * update() operation used the cond_data to determine whether the
1306  * snapshot should be taken, and if it was, presumably saved it along
1307  * with the snapshot.
1308  */
1309 void tracing_snapshot_cond(struct trace_array *tr, void *cond_data)
1310 {
1311 	tracing_snapshot_instance_cond(tr, cond_data);
1312 }
1313 EXPORT_SYMBOL_GPL(tracing_snapshot_cond);
1314 
1315 /**
1316  * tracing_cond_snapshot_data - get the user data associated with a snapshot
1317  * @tr:		The tracing instance
1318  *
1319  * When the user enables a conditional snapshot using
1320  * tracing_snapshot_cond_enable(), the user-defined cond_data is saved
1321  * with the snapshot.  This accessor is used to retrieve it.
1322  *
1323  * Should not be called from cond_snapshot.update(), since it takes
1324  * the tr->max_lock lock, which the code calling
1325  * cond_snapshot.update() has already done.
1326  *
1327  * Returns the cond_data associated with the trace array's snapshot.
1328  */
1329 void *tracing_cond_snapshot_data(struct trace_array *tr)
1330 {
1331 	void *cond_data = NULL;
1332 
1333 	local_irq_disable();
1334 	arch_spin_lock(&tr->max_lock);
1335 
1336 	if (tr->cond_snapshot)
1337 		cond_data = tr->cond_snapshot->cond_data;
1338 
1339 	arch_spin_unlock(&tr->max_lock);
1340 	local_irq_enable();
1341 
1342 	return cond_data;
1343 }
1344 EXPORT_SYMBOL_GPL(tracing_cond_snapshot_data);
1345 
1346 static int resize_buffer_duplicate_size(struct array_buffer *trace_buf,
1347 					struct array_buffer *size_buf, int cpu_id);
1348 static void set_buffer_entries(struct array_buffer *buf, unsigned long val);
1349 
1350 int tracing_alloc_snapshot_instance(struct trace_array *tr)
1351 {
1352 	int order;
1353 	int ret;
1354 
1355 	if (!tr->allocated_snapshot) {
1356 
1357 		/* Make the snapshot buffer have the same order as main buffer */
1358 		order = ring_buffer_subbuf_order_get(tr->array_buffer.buffer);
1359 		ret = ring_buffer_subbuf_order_set(tr->max_buffer.buffer, order);
1360 		if (ret < 0)
1361 			return ret;
1362 
1363 		/* allocate spare buffer */
1364 		ret = resize_buffer_duplicate_size(&tr->max_buffer,
1365 				   &tr->array_buffer, RING_BUFFER_ALL_CPUS);
1366 		if (ret < 0)
1367 			return ret;
1368 
1369 		tr->allocated_snapshot = true;
1370 	}
1371 
1372 	return 0;
1373 }
1374 
1375 static void free_snapshot(struct trace_array *tr)
1376 {
1377 	/*
1378 	 * We don't free the ring buffer. instead, resize it because
1379 	 * The max_tr ring buffer has some state (e.g. ring->clock) and
1380 	 * we want preserve it.
1381 	 */
1382 	ring_buffer_subbuf_order_set(tr->max_buffer.buffer, 0);
1383 	ring_buffer_resize(tr->max_buffer.buffer, 1, RING_BUFFER_ALL_CPUS);
1384 	set_buffer_entries(&tr->max_buffer, 1);
1385 	tracing_reset_online_cpus(&tr->max_buffer);
1386 	tr->allocated_snapshot = false;
1387 }
1388 
1389 static int tracing_arm_snapshot_locked(struct trace_array *tr)
1390 {
1391 	int ret;
1392 
1393 	lockdep_assert_held(&trace_types_lock);
1394 
1395 	spin_lock(&tr->snapshot_trigger_lock);
1396 	if (tr->snapshot == UINT_MAX || tr->mapped) {
1397 		spin_unlock(&tr->snapshot_trigger_lock);
1398 		return -EBUSY;
1399 	}
1400 
1401 	tr->snapshot++;
1402 	spin_unlock(&tr->snapshot_trigger_lock);
1403 
1404 	ret = tracing_alloc_snapshot_instance(tr);
1405 	if (ret) {
1406 		spin_lock(&tr->snapshot_trigger_lock);
1407 		tr->snapshot--;
1408 		spin_unlock(&tr->snapshot_trigger_lock);
1409 	}
1410 
1411 	return ret;
1412 }
1413 
1414 int tracing_arm_snapshot(struct trace_array *tr)
1415 {
1416 	guard(mutex)(&trace_types_lock);
1417 	return tracing_arm_snapshot_locked(tr);
1418 }
1419 
1420 void tracing_disarm_snapshot(struct trace_array *tr)
1421 {
1422 	spin_lock(&tr->snapshot_trigger_lock);
1423 	if (!WARN_ON(!tr->snapshot))
1424 		tr->snapshot--;
1425 	spin_unlock(&tr->snapshot_trigger_lock);
1426 }
1427 
1428 /**
1429  * tracing_alloc_snapshot - allocate snapshot buffer.
1430  *
1431  * This only allocates the snapshot buffer if it isn't already
1432  * allocated - it doesn't also take a snapshot.
1433  *
1434  * This is meant to be used in cases where the snapshot buffer needs
1435  * to be set up for events that can't sleep but need to be able to
1436  * trigger a snapshot.
1437  */
1438 int tracing_alloc_snapshot(void)
1439 {
1440 	struct trace_array *tr = &global_trace;
1441 	int ret;
1442 
1443 	ret = tracing_alloc_snapshot_instance(tr);
1444 	WARN_ON(ret < 0);
1445 
1446 	return ret;
1447 }
1448 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
1449 
1450 /**
1451  * tracing_snapshot_alloc - allocate and take a snapshot of the current buffer.
1452  *
1453  * This is similar to tracing_snapshot(), but it will allocate the
1454  * snapshot buffer if it isn't already allocated. Use this only
1455  * where it is safe to sleep, as the allocation may sleep.
1456  *
1457  * This causes a swap between the snapshot buffer and the current live
1458  * tracing buffer. You can use this to take snapshots of the live
1459  * trace when some condition is triggered, but continue to trace.
1460  */
1461 void tracing_snapshot_alloc(void)
1462 {
1463 	int ret;
1464 
1465 	ret = tracing_alloc_snapshot();
1466 	if (ret < 0)
1467 		return;
1468 
1469 	tracing_snapshot();
1470 }
1471 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
1472 
1473 /**
1474  * tracing_snapshot_cond_enable - enable conditional snapshot for an instance
1475  * @tr:		The tracing instance
1476  * @cond_data:	User data to associate with the snapshot
1477  * @update:	Implementation of the cond_snapshot update function
1478  *
1479  * Check whether the conditional snapshot for the given instance has
1480  * already been enabled, or if the current tracer is already using a
1481  * snapshot; if so, return -EBUSY, else create a cond_snapshot and
1482  * save the cond_data and update function inside.
1483  *
1484  * Returns 0 if successful, error otherwise.
1485  */
1486 int tracing_snapshot_cond_enable(struct trace_array *tr, void *cond_data,
1487 				 cond_update_fn_t update)
1488 {
1489 	struct cond_snapshot *cond_snapshot __free(kfree) =
1490 		kzalloc(sizeof(*cond_snapshot), GFP_KERNEL);
1491 	int ret;
1492 
1493 	if (!cond_snapshot)
1494 		return -ENOMEM;
1495 
1496 	cond_snapshot->cond_data = cond_data;
1497 	cond_snapshot->update = update;
1498 
1499 	guard(mutex)(&trace_types_lock);
1500 
1501 	if (tr->current_trace->use_max_tr)
1502 		return -EBUSY;
1503 
1504 	/*
1505 	 * The cond_snapshot can only change to NULL without the
1506 	 * trace_types_lock. We don't care if we race with it going
1507 	 * to NULL, but we want to make sure that it's not set to
1508 	 * something other than NULL when we get here, which we can
1509 	 * do safely with only holding the trace_types_lock and not
1510 	 * having to take the max_lock.
1511 	 */
1512 	if (tr->cond_snapshot)
1513 		return -EBUSY;
1514 
1515 	ret = tracing_arm_snapshot_locked(tr);
1516 	if (ret)
1517 		return ret;
1518 
1519 	local_irq_disable();
1520 	arch_spin_lock(&tr->max_lock);
1521 	tr->cond_snapshot = no_free_ptr(cond_snapshot);
1522 	arch_spin_unlock(&tr->max_lock);
1523 	local_irq_enable();
1524 
1525 	return 0;
1526 }
1527 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_enable);
1528 
1529 /**
1530  * tracing_snapshot_cond_disable - disable conditional snapshot for an instance
1531  * @tr:		The tracing instance
1532  *
1533  * Check whether the conditional snapshot for the given instance is
1534  * enabled; if so, free the cond_snapshot associated with it,
1535  * otherwise return -EINVAL.
1536  *
1537  * Returns 0 if successful, error otherwise.
1538  */
1539 int tracing_snapshot_cond_disable(struct trace_array *tr)
1540 {
1541 	int ret = 0;
1542 
1543 	local_irq_disable();
1544 	arch_spin_lock(&tr->max_lock);
1545 
1546 	if (!tr->cond_snapshot)
1547 		ret = -EINVAL;
1548 	else {
1549 		kfree(tr->cond_snapshot);
1550 		tr->cond_snapshot = NULL;
1551 	}
1552 
1553 	arch_spin_unlock(&tr->max_lock);
1554 	local_irq_enable();
1555 
1556 	tracing_disarm_snapshot(tr);
1557 
1558 	return ret;
1559 }
1560 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_disable);
1561 #else
1562 void tracing_snapshot(void)
1563 {
1564 	WARN_ONCE(1, "Snapshot feature not enabled, but internal snapshot used");
1565 }
1566 EXPORT_SYMBOL_GPL(tracing_snapshot);
1567 void tracing_snapshot_cond(struct trace_array *tr, void *cond_data)
1568 {
1569 	WARN_ONCE(1, "Snapshot feature not enabled, but internal conditional snapshot used");
1570 }
1571 EXPORT_SYMBOL_GPL(tracing_snapshot_cond);
1572 int tracing_alloc_snapshot(void)
1573 {
1574 	WARN_ONCE(1, "Snapshot feature not enabled, but snapshot allocation used");
1575 	return -ENODEV;
1576 }
1577 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
1578 void tracing_snapshot_alloc(void)
1579 {
1580 	/* Give warning */
1581 	tracing_snapshot();
1582 }
1583 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
1584 void *tracing_cond_snapshot_data(struct trace_array *tr)
1585 {
1586 	return NULL;
1587 }
1588 EXPORT_SYMBOL_GPL(tracing_cond_snapshot_data);
1589 int tracing_snapshot_cond_enable(struct trace_array *tr, void *cond_data, cond_update_fn_t update)
1590 {
1591 	return -ENODEV;
1592 }
1593 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_enable);
1594 int tracing_snapshot_cond_disable(struct trace_array *tr)
1595 {
1596 	return false;
1597 }
1598 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_disable);
1599 #define free_snapshot(tr)	do { } while (0)
1600 #define tracing_arm_snapshot_locked(tr) ({ -EBUSY; })
1601 #endif /* CONFIG_TRACER_SNAPSHOT */
1602 
1603 void tracer_tracing_off(struct trace_array *tr)
1604 {
1605 	if (tr->array_buffer.buffer)
1606 		ring_buffer_record_off(tr->array_buffer.buffer);
1607 	/*
1608 	 * This flag is looked at when buffers haven't been allocated
1609 	 * yet, or by some tracers (like irqsoff), that just want to
1610 	 * know if the ring buffer has been disabled, but it can handle
1611 	 * races of where it gets disabled but we still do a record.
1612 	 * As the check is in the fast path of the tracers, it is more
1613 	 * important to be fast than accurate.
1614 	 */
1615 	tr->buffer_disabled = 1;
1616 }
1617 
1618 /**
1619  * tracer_tracing_disable() - temporary disable the buffer from write
1620  * @tr: The trace array to disable its buffer for
1621  *
1622  * Expects trace_tracing_enable() to re-enable tracing.
1623  * The difference between this and tracer_tracing_off() is that this
1624  * is a counter and can nest, whereas, tracer_tracing_off() can
1625  * be called multiple times and a single trace_tracing_on() will
1626  * enable it.
1627  */
1628 void tracer_tracing_disable(struct trace_array *tr)
1629 {
1630 	if (WARN_ON_ONCE(!tr->array_buffer.buffer))
1631 		return;
1632 
1633 	ring_buffer_record_disable(tr->array_buffer.buffer);
1634 }
1635 
1636 /**
1637  * tracer_tracing_enable() - counter part of tracer_tracing_disable()
1638  * @tr: The trace array that had tracer_tracincg_disable() called on it
1639  *
1640  * This is called after tracer_tracing_disable() has been called on @tr,
1641  * when it's safe to re-enable tracing.
1642  */
1643 void tracer_tracing_enable(struct trace_array *tr)
1644 {
1645 	if (WARN_ON_ONCE(!tr->array_buffer.buffer))
1646 		return;
1647 
1648 	ring_buffer_record_enable(tr->array_buffer.buffer);
1649 }
1650 
1651 /**
1652  * tracing_off - turn off tracing buffers
1653  *
1654  * This function stops the tracing buffers from recording data.
1655  * It does not disable any overhead the tracers themselves may
1656  * be causing. This function simply causes all recording to
1657  * the ring buffers to fail.
1658  */
1659 void tracing_off(void)
1660 {
1661 	tracer_tracing_off(&global_trace);
1662 }
1663 EXPORT_SYMBOL_GPL(tracing_off);
1664 
1665 void disable_trace_on_warning(void)
1666 {
1667 	if (__disable_trace_on_warning) {
1668 		trace_array_printk_buf(global_trace.array_buffer.buffer, _THIS_IP_,
1669 			"Disabling tracing due to warning\n");
1670 		tracing_off();
1671 	}
1672 }
1673 
1674 /**
1675  * tracer_tracing_is_on - show real state of ring buffer enabled
1676  * @tr : the trace array to know if ring buffer is enabled
1677  *
1678  * Shows real state of the ring buffer if it is enabled or not.
1679  */
1680 bool tracer_tracing_is_on(struct trace_array *tr)
1681 {
1682 	if (tr->array_buffer.buffer)
1683 		return ring_buffer_record_is_set_on(tr->array_buffer.buffer);
1684 	return !tr->buffer_disabled;
1685 }
1686 
1687 /**
1688  * tracing_is_on - show state of ring buffers enabled
1689  */
1690 int tracing_is_on(void)
1691 {
1692 	return tracer_tracing_is_on(&global_trace);
1693 }
1694 EXPORT_SYMBOL_GPL(tracing_is_on);
1695 
1696 static int __init set_buf_size(char *str)
1697 {
1698 	unsigned long buf_size;
1699 
1700 	if (!str)
1701 		return 0;
1702 	buf_size = memparse(str, &str);
1703 	/*
1704 	 * nr_entries can not be zero and the startup
1705 	 * tests require some buffer space. Therefore
1706 	 * ensure we have at least 4096 bytes of buffer.
1707 	 */
1708 	trace_buf_size = max(4096UL, buf_size);
1709 	return 1;
1710 }
1711 __setup("trace_buf_size=", set_buf_size);
1712 
1713 static int __init set_tracing_thresh(char *str)
1714 {
1715 	unsigned long threshold;
1716 	int ret;
1717 
1718 	if (!str)
1719 		return 0;
1720 	ret = kstrtoul(str, 0, &threshold);
1721 	if (ret < 0)
1722 		return 0;
1723 	tracing_thresh = threshold * 1000;
1724 	return 1;
1725 }
1726 __setup("tracing_thresh=", set_tracing_thresh);
1727 
1728 unsigned long nsecs_to_usecs(unsigned long nsecs)
1729 {
1730 	return nsecs / 1000;
1731 }
1732 
1733 /*
1734  * TRACE_FLAGS is defined as a tuple matching bit masks with strings.
1735  * It uses C(a, b) where 'a' is the eval (enum) name and 'b' is the string that
1736  * matches it. By defining "C(a, b) b", TRACE_FLAGS becomes a list
1737  * of strings in the order that the evals (enum) were defined.
1738  */
1739 #undef C
1740 #define C(a, b) b
1741 
1742 /* These must match the bit positions in trace_iterator_flags */
1743 static const char *trace_options[] = {
1744 	TRACE_FLAGS
1745 	NULL
1746 };
1747 
1748 static struct {
1749 	u64 (*func)(void);
1750 	const char *name;
1751 	int in_ns;		/* is this clock in nanoseconds? */
1752 } trace_clocks[] = {
1753 	{ trace_clock_local,		"local",	1 },
1754 	{ trace_clock_global,		"global",	1 },
1755 	{ trace_clock_counter,		"counter",	0 },
1756 	{ trace_clock_jiffies,		"uptime",	0 },
1757 	{ trace_clock,			"perf",		1 },
1758 	{ ktime_get_mono_fast_ns,	"mono",		1 },
1759 	{ ktime_get_raw_fast_ns,	"mono_raw",	1 },
1760 	{ ktime_get_boot_fast_ns,	"boot",		1 },
1761 	{ ktime_get_tai_fast_ns,	"tai",		1 },
1762 	ARCH_TRACE_CLOCKS
1763 };
1764 
1765 bool trace_clock_in_ns(struct trace_array *tr)
1766 {
1767 	if (trace_clocks[tr->clock_id].in_ns)
1768 		return true;
1769 
1770 	return false;
1771 }
1772 
1773 /*
1774  * trace_parser_get_init - gets the buffer for trace parser
1775  */
1776 int trace_parser_get_init(struct trace_parser *parser, int size)
1777 {
1778 	memset(parser, 0, sizeof(*parser));
1779 
1780 	parser->buffer = kmalloc(size, GFP_KERNEL);
1781 	if (!parser->buffer)
1782 		return 1;
1783 
1784 	parser->size = size;
1785 	return 0;
1786 }
1787 
1788 /*
1789  * trace_parser_put - frees the buffer for trace parser
1790  */
1791 void trace_parser_put(struct trace_parser *parser)
1792 {
1793 	kfree(parser->buffer);
1794 	parser->buffer = NULL;
1795 }
1796 
1797 /*
1798  * trace_get_user - reads the user input string separated by  space
1799  * (matched by isspace(ch))
1800  *
1801  * For each string found the 'struct trace_parser' is updated,
1802  * and the function returns.
1803  *
1804  * Returns number of bytes read.
1805  *
1806  * See kernel/trace/trace.h for 'struct trace_parser' details.
1807  */
1808 int trace_get_user(struct trace_parser *parser, const char __user *ubuf,
1809 	size_t cnt, loff_t *ppos)
1810 {
1811 	char ch;
1812 	size_t read = 0;
1813 	ssize_t ret;
1814 
1815 	if (!*ppos)
1816 		trace_parser_clear(parser);
1817 
1818 	ret = get_user(ch, ubuf++);
1819 	if (ret)
1820 		goto fail;
1821 
1822 	read++;
1823 	cnt--;
1824 
1825 	/*
1826 	 * The parser is not finished with the last write,
1827 	 * continue reading the user input without skipping spaces.
1828 	 */
1829 	if (!parser->cont) {
1830 		/* skip white space */
1831 		while (cnt && isspace(ch)) {
1832 			ret = get_user(ch, ubuf++);
1833 			if (ret)
1834 				goto fail;
1835 			read++;
1836 			cnt--;
1837 		}
1838 
1839 		parser->idx = 0;
1840 
1841 		/* only spaces were written */
1842 		if (isspace(ch) || !ch) {
1843 			*ppos += read;
1844 			return read;
1845 		}
1846 	}
1847 
1848 	/* read the non-space input */
1849 	while (cnt && !isspace(ch) && ch) {
1850 		if (parser->idx < parser->size - 1)
1851 			parser->buffer[parser->idx++] = ch;
1852 		else {
1853 			ret = -EINVAL;
1854 			goto fail;
1855 		}
1856 
1857 		ret = get_user(ch, ubuf++);
1858 		if (ret)
1859 			goto fail;
1860 		read++;
1861 		cnt--;
1862 	}
1863 
1864 	/* We either got finished input or we have to wait for another call. */
1865 	if (isspace(ch) || !ch) {
1866 		parser->buffer[parser->idx] = 0;
1867 		parser->cont = false;
1868 	} else if (parser->idx < parser->size - 1) {
1869 		parser->cont = true;
1870 		parser->buffer[parser->idx++] = ch;
1871 		/* Make sure the parsed string always terminates with '\0'. */
1872 		parser->buffer[parser->idx] = 0;
1873 	} else {
1874 		ret = -EINVAL;
1875 		goto fail;
1876 	}
1877 
1878 	*ppos += read;
1879 	return read;
1880 fail:
1881 	trace_parser_fail(parser);
1882 	return ret;
1883 }
1884 
1885 /* TODO add a seq_buf_to_buffer() */
1886 static ssize_t trace_seq_to_buffer(struct trace_seq *s, void *buf, size_t cnt)
1887 {
1888 	int len;
1889 
1890 	if (trace_seq_used(s) <= s->readpos)
1891 		return -EBUSY;
1892 
1893 	len = trace_seq_used(s) - s->readpos;
1894 	if (cnt > len)
1895 		cnt = len;
1896 	memcpy(buf, s->buffer + s->readpos, cnt);
1897 
1898 	s->readpos += cnt;
1899 	return cnt;
1900 }
1901 
1902 unsigned long __read_mostly	tracing_thresh;
1903 
1904 #ifdef CONFIG_TRACER_MAX_TRACE
1905 static const struct file_operations tracing_max_lat_fops;
1906 
1907 #ifdef LATENCY_FS_NOTIFY
1908 
1909 static struct workqueue_struct *fsnotify_wq;
1910 
1911 static void latency_fsnotify_workfn(struct work_struct *work)
1912 {
1913 	struct trace_array *tr = container_of(work, struct trace_array,
1914 					      fsnotify_work);
1915 	fsnotify_inode(tr->d_max_latency->d_inode, FS_MODIFY);
1916 }
1917 
1918 static void latency_fsnotify_workfn_irq(struct irq_work *iwork)
1919 {
1920 	struct trace_array *tr = container_of(iwork, struct trace_array,
1921 					      fsnotify_irqwork);
1922 	queue_work(fsnotify_wq, &tr->fsnotify_work);
1923 }
1924 
1925 static void trace_create_maxlat_file(struct trace_array *tr,
1926 				     struct dentry *d_tracer)
1927 {
1928 	INIT_WORK(&tr->fsnotify_work, latency_fsnotify_workfn);
1929 	init_irq_work(&tr->fsnotify_irqwork, latency_fsnotify_workfn_irq);
1930 	tr->d_max_latency = trace_create_file("tracing_max_latency",
1931 					      TRACE_MODE_WRITE,
1932 					      d_tracer, tr,
1933 					      &tracing_max_lat_fops);
1934 }
1935 
1936 __init static int latency_fsnotify_init(void)
1937 {
1938 	fsnotify_wq = alloc_workqueue("tr_max_lat_wq",
1939 				      WQ_UNBOUND | WQ_HIGHPRI, 0);
1940 	if (!fsnotify_wq) {
1941 		pr_err("Unable to allocate tr_max_lat_wq\n");
1942 		return -ENOMEM;
1943 	}
1944 	return 0;
1945 }
1946 
1947 late_initcall_sync(latency_fsnotify_init);
1948 
1949 void latency_fsnotify(struct trace_array *tr)
1950 {
1951 	if (!fsnotify_wq)
1952 		return;
1953 	/*
1954 	 * We cannot call queue_work(&tr->fsnotify_work) from here because it's
1955 	 * possible that we are called from __schedule() or do_idle(), which
1956 	 * could cause a deadlock.
1957 	 */
1958 	irq_work_queue(&tr->fsnotify_irqwork);
1959 }
1960 
1961 #else /* !LATENCY_FS_NOTIFY */
1962 
1963 #define trace_create_maxlat_file(tr, d_tracer)				\
1964 	trace_create_file("tracing_max_latency", TRACE_MODE_WRITE,	\
1965 			  d_tracer, tr, &tracing_max_lat_fops)
1966 
1967 #endif
1968 
1969 /*
1970  * Copy the new maximum trace into the separate maximum-trace
1971  * structure. (this way the maximum trace is permanently saved,
1972  * for later retrieval via /sys/kernel/tracing/tracing_max_latency)
1973  */
1974 static void
1975 __update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
1976 {
1977 	struct array_buffer *trace_buf = &tr->array_buffer;
1978 	struct array_buffer *max_buf = &tr->max_buffer;
1979 	struct trace_array_cpu *data = per_cpu_ptr(trace_buf->data, cpu);
1980 	struct trace_array_cpu *max_data = per_cpu_ptr(max_buf->data, cpu);
1981 
1982 	max_buf->cpu = cpu;
1983 	max_buf->time_start = data->preempt_timestamp;
1984 
1985 	max_data->saved_latency = tr->max_latency;
1986 	max_data->critical_start = data->critical_start;
1987 	max_data->critical_end = data->critical_end;
1988 
1989 	strscpy(max_data->comm, tsk->comm);
1990 	max_data->pid = tsk->pid;
1991 	/*
1992 	 * If tsk == current, then use current_uid(), as that does not use
1993 	 * RCU. The irq tracer can be called out of RCU scope.
1994 	 */
1995 	if (tsk == current)
1996 		max_data->uid = current_uid();
1997 	else
1998 		max_data->uid = task_uid(tsk);
1999 
2000 	max_data->nice = tsk->static_prio - 20 - MAX_RT_PRIO;
2001 	max_data->policy = tsk->policy;
2002 	max_data->rt_priority = tsk->rt_priority;
2003 
2004 	/* record this tasks comm */
2005 	tracing_record_cmdline(tsk);
2006 	latency_fsnotify(tr);
2007 }
2008 
2009 /**
2010  * update_max_tr - snapshot all trace buffers from global_trace to max_tr
2011  * @tr: tracer
2012  * @tsk: the task with the latency
2013  * @cpu: The cpu that initiated the trace.
2014  * @cond_data: User data associated with a conditional snapshot
2015  *
2016  * Flip the buffers between the @tr and the max_tr and record information
2017  * about which task was the cause of this latency.
2018  */
2019 void
2020 update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu,
2021 	      void *cond_data)
2022 {
2023 	if (tr->stop_count)
2024 		return;
2025 
2026 	WARN_ON_ONCE(!irqs_disabled());
2027 
2028 	if (!tr->allocated_snapshot) {
2029 		/* Only the nop tracer should hit this when disabling */
2030 		WARN_ON_ONCE(tr->current_trace != &nop_trace);
2031 		return;
2032 	}
2033 
2034 	arch_spin_lock(&tr->max_lock);
2035 
2036 	/* Inherit the recordable setting from array_buffer */
2037 	if (ring_buffer_record_is_set_on(tr->array_buffer.buffer))
2038 		ring_buffer_record_on(tr->max_buffer.buffer);
2039 	else
2040 		ring_buffer_record_off(tr->max_buffer.buffer);
2041 
2042 #ifdef CONFIG_TRACER_SNAPSHOT
2043 	if (tr->cond_snapshot && !tr->cond_snapshot->update(tr, cond_data)) {
2044 		arch_spin_unlock(&tr->max_lock);
2045 		return;
2046 	}
2047 #endif
2048 	swap(tr->array_buffer.buffer, tr->max_buffer.buffer);
2049 
2050 	__update_max_tr(tr, tsk, cpu);
2051 
2052 	arch_spin_unlock(&tr->max_lock);
2053 
2054 	/* Any waiters on the old snapshot buffer need to wake up */
2055 	ring_buffer_wake_waiters(tr->array_buffer.buffer, RING_BUFFER_ALL_CPUS);
2056 }
2057 
2058 /**
2059  * update_max_tr_single - only copy one trace over, and reset the rest
2060  * @tr: tracer
2061  * @tsk: task with the latency
2062  * @cpu: the cpu of the buffer to copy.
2063  *
2064  * Flip the trace of a single CPU buffer between the @tr and the max_tr.
2065  */
2066 void
2067 update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu)
2068 {
2069 	int ret;
2070 
2071 	if (tr->stop_count)
2072 		return;
2073 
2074 	WARN_ON_ONCE(!irqs_disabled());
2075 	if (!tr->allocated_snapshot) {
2076 		/* Only the nop tracer should hit this when disabling */
2077 		WARN_ON_ONCE(tr->current_trace != &nop_trace);
2078 		return;
2079 	}
2080 
2081 	arch_spin_lock(&tr->max_lock);
2082 
2083 	ret = ring_buffer_swap_cpu(tr->max_buffer.buffer, tr->array_buffer.buffer, cpu);
2084 
2085 	if (ret == -EBUSY) {
2086 		/*
2087 		 * We failed to swap the buffer due to a commit taking
2088 		 * place on this CPU. We fail to record, but we reset
2089 		 * the max trace buffer (no one writes directly to it)
2090 		 * and flag that it failed.
2091 		 * Another reason is resize is in progress.
2092 		 */
2093 		trace_array_printk_buf(tr->max_buffer.buffer, _THIS_IP_,
2094 			"Failed to swap buffers due to commit or resize in progress\n");
2095 	}
2096 
2097 	WARN_ON_ONCE(ret && ret != -EAGAIN && ret != -EBUSY);
2098 
2099 	__update_max_tr(tr, tsk, cpu);
2100 	arch_spin_unlock(&tr->max_lock);
2101 }
2102 
2103 #endif /* CONFIG_TRACER_MAX_TRACE */
2104 
2105 struct pipe_wait {
2106 	struct trace_iterator		*iter;
2107 	int				wait_index;
2108 };
2109 
2110 static bool wait_pipe_cond(void *data)
2111 {
2112 	struct pipe_wait *pwait = data;
2113 	struct trace_iterator *iter = pwait->iter;
2114 
2115 	if (atomic_read_acquire(&iter->wait_index) != pwait->wait_index)
2116 		return true;
2117 
2118 	return iter->closed;
2119 }
2120 
2121 static int wait_on_pipe(struct trace_iterator *iter, int full)
2122 {
2123 	struct pipe_wait pwait;
2124 	int ret;
2125 
2126 	/* Iterators are static, they should be filled or empty */
2127 	if (trace_buffer_iter(iter, iter->cpu_file))
2128 		return 0;
2129 
2130 	pwait.wait_index = atomic_read_acquire(&iter->wait_index);
2131 	pwait.iter = iter;
2132 
2133 	ret = ring_buffer_wait(iter->array_buffer->buffer, iter->cpu_file, full,
2134 			       wait_pipe_cond, &pwait);
2135 
2136 #ifdef CONFIG_TRACER_MAX_TRACE
2137 	/*
2138 	 * Make sure this is still the snapshot buffer, as if a snapshot were
2139 	 * to happen, this would now be the main buffer.
2140 	 */
2141 	if (iter->snapshot)
2142 		iter->array_buffer = &iter->tr->max_buffer;
2143 #endif
2144 	return ret;
2145 }
2146 
2147 #ifdef CONFIG_FTRACE_STARTUP_TEST
2148 static bool selftests_can_run;
2149 
2150 struct trace_selftests {
2151 	struct list_head		list;
2152 	struct tracer			*type;
2153 };
2154 
2155 static LIST_HEAD(postponed_selftests);
2156 
2157 static int save_selftest(struct tracer *type)
2158 {
2159 	struct trace_selftests *selftest;
2160 
2161 	selftest = kmalloc(sizeof(*selftest), GFP_KERNEL);
2162 	if (!selftest)
2163 		return -ENOMEM;
2164 
2165 	selftest->type = type;
2166 	list_add(&selftest->list, &postponed_selftests);
2167 	return 0;
2168 }
2169 
2170 static int run_tracer_selftest(struct tracer *type)
2171 {
2172 	struct trace_array *tr = &global_trace;
2173 	struct tracer_flags *saved_flags = tr->current_trace_flags;
2174 	struct tracer *saved_tracer = tr->current_trace;
2175 	int ret;
2176 
2177 	if (!type->selftest || tracing_selftest_disabled)
2178 		return 0;
2179 
2180 	/*
2181 	 * If a tracer registers early in boot up (before scheduling is
2182 	 * initialized and such), then do not run its selftests yet.
2183 	 * Instead, run it a little later in the boot process.
2184 	 */
2185 	if (!selftests_can_run)
2186 		return save_selftest(type);
2187 
2188 	if (!tracing_is_on()) {
2189 		pr_warn("Selftest for tracer %s skipped due to tracing disabled\n",
2190 			type->name);
2191 		return 0;
2192 	}
2193 
2194 	/*
2195 	 * Run a selftest on this tracer.
2196 	 * Here we reset the trace buffer, and set the current
2197 	 * tracer to be this tracer. The tracer can then run some
2198 	 * internal tracing to verify that everything is in order.
2199 	 * If we fail, we do not register this tracer.
2200 	 */
2201 	tracing_reset_online_cpus(&tr->array_buffer);
2202 
2203 	tr->current_trace = type;
2204 	tr->current_trace_flags = type->flags ? : type->default_flags;
2205 
2206 #ifdef CONFIG_TRACER_MAX_TRACE
2207 	if (type->use_max_tr) {
2208 		/* If we expanded the buffers, make sure the max is expanded too */
2209 		if (tr->ring_buffer_expanded)
2210 			ring_buffer_resize(tr->max_buffer.buffer, trace_buf_size,
2211 					   RING_BUFFER_ALL_CPUS);
2212 		tr->allocated_snapshot = true;
2213 	}
2214 #endif
2215 
2216 	/* the test is responsible for initializing and enabling */
2217 	pr_info("Testing tracer %s: ", type->name);
2218 	ret = type->selftest(type, tr);
2219 	/* the test is responsible for resetting too */
2220 	tr->current_trace = saved_tracer;
2221 	tr->current_trace_flags = saved_flags;
2222 	if (ret) {
2223 		printk(KERN_CONT "FAILED!\n");
2224 		/* Add the warning after printing 'FAILED' */
2225 		WARN_ON(1);
2226 		return -1;
2227 	}
2228 	/* Only reset on passing, to avoid touching corrupted buffers */
2229 	tracing_reset_online_cpus(&tr->array_buffer);
2230 
2231 #ifdef CONFIG_TRACER_MAX_TRACE
2232 	if (type->use_max_tr) {
2233 		tr->allocated_snapshot = false;
2234 
2235 		/* Shrink the max buffer again */
2236 		if (tr->ring_buffer_expanded)
2237 			ring_buffer_resize(tr->max_buffer.buffer, 1,
2238 					   RING_BUFFER_ALL_CPUS);
2239 	}
2240 #endif
2241 
2242 	printk(KERN_CONT "PASSED\n");
2243 	return 0;
2244 }
2245 
2246 static int do_run_tracer_selftest(struct tracer *type)
2247 {
2248 	int ret;
2249 
2250 	/*
2251 	 * Tests can take a long time, especially if they are run one after the
2252 	 * other, as does happen during bootup when all the tracers are
2253 	 * registered. This could cause the soft lockup watchdog to trigger.
2254 	 */
2255 	cond_resched();
2256 
2257 	tracing_selftest_running = true;
2258 	ret = run_tracer_selftest(type);
2259 	tracing_selftest_running = false;
2260 
2261 	return ret;
2262 }
2263 
2264 static __init int init_trace_selftests(void)
2265 {
2266 	struct trace_selftests *p, *n;
2267 	struct tracer *t, **last;
2268 	int ret;
2269 
2270 	selftests_can_run = true;
2271 
2272 	guard(mutex)(&trace_types_lock);
2273 
2274 	if (list_empty(&postponed_selftests))
2275 		return 0;
2276 
2277 	pr_info("Running postponed tracer tests:\n");
2278 
2279 	tracing_selftest_running = true;
2280 	list_for_each_entry_safe(p, n, &postponed_selftests, list) {
2281 		/* This loop can take minutes when sanitizers are enabled, so
2282 		 * lets make sure we allow RCU processing.
2283 		 */
2284 		cond_resched();
2285 		ret = run_tracer_selftest(p->type);
2286 		/* If the test fails, then warn and remove from available_tracers */
2287 		if (ret < 0) {
2288 			WARN(1, "tracer: %s failed selftest, disabling\n",
2289 			     p->type->name);
2290 			last = &trace_types;
2291 			for (t = trace_types; t; t = t->next) {
2292 				if (t == p->type) {
2293 					*last = t->next;
2294 					break;
2295 				}
2296 				last = &t->next;
2297 			}
2298 		}
2299 		list_del(&p->list);
2300 		kfree(p);
2301 	}
2302 	tracing_selftest_running = false;
2303 
2304 	return 0;
2305 }
2306 core_initcall(init_trace_selftests);
2307 #else
2308 static inline int do_run_tracer_selftest(struct tracer *type)
2309 {
2310 	return 0;
2311 }
2312 #endif /* CONFIG_FTRACE_STARTUP_TEST */
2313 
2314 static int add_tracer(struct trace_array *tr, struct tracer *t);
2315 
2316 static void __init apply_trace_boot_options(void);
2317 
2318 static void free_tracers(struct trace_array *tr)
2319 {
2320 	struct tracers *t, *n;
2321 
2322 	lockdep_assert_held(&trace_types_lock);
2323 
2324 	list_for_each_entry_safe(t, n, &tr->tracers, list) {
2325 		list_del(&t->list);
2326 		kfree(t->flags);
2327 		kfree(t);
2328 	}
2329 }
2330 
2331 /**
2332  * register_tracer - register a tracer with the ftrace system.
2333  * @type: the plugin for the tracer
2334  *
2335  * Register a new plugin tracer.
2336  */
2337 int __init register_tracer(struct tracer *type)
2338 {
2339 	struct trace_array *tr;
2340 	struct tracer *t;
2341 	int ret = 0;
2342 
2343 	if (!type->name) {
2344 		pr_info("Tracer must have a name\n");
2345 		return -1;
2346 	}
2347 
2348 	if (strlen(type->name) >= MAX_TRACER_SIZE) {
2349 		pr_info("Tracer has a name longer than %d\n", MAX_TRACER_SIZE);
2350 		return -1;
2351 	}
2352 
2353 	if (security_locked_down(LOCKDOWN_TRACEFS)) {
2354 		pr_warn("Can not register tracer %s due to lockdown\n",
2355 			   type->name);
2356 		return -EPERM;
2357 	}
2358 
2359 	mutex_lock(&trace_types_lock);
2360 
2361 	for (t = trace_types; t; t = t->next) {
2362 		if (strcmp(type->name, t->name) == 0) {
2363 			/* already found */
2364 			pr_info("Tracer %s already registered\n",
2365 				type->name);
2366 			ret = -1;
2367 			goto out;
2368 		}
2369 	}
2370 
2371 	/* store the tracer for __set_tracer_option */
2372 	if (type->flags)
2373 		type->flags->trace = type;
2374 
2375 	ret = do_run_tracer_selftest(type);
2376 	if (ret < 0)
2377 		goto out;
2378 
2379 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
2380 		ret = add_tracer(tr, type);
2381 		if (ret < 0) {
2382 			/* The tracer will still exist but without options */
2383 			pr_warn("Failed to create tracer options for %s\n", type->name);
2384 			break;
2385 		}
2386 	}
2387 
2388 	type->next = trace_types;
2389 	trace_types = type;
2390 
2391  out:
2392 	mutex_unlock(&trace_types_lock);
2393 
2394 	if (ret || !default_bootup_tracer)
2395 		return ret;
2396 
2397 	if (strncmp(default_bootup_tracer, type->name, MAX_TRACER_SIZE))
2398 		return 0;
2399 
2400 	printk(KERN_INFO "Starting tracer '%s'\n", type->name);
2401 	/* Do we want this tracer to start on bootup? */
2402 	WARN_ON(tracing_set_tracer(&global_trace, type->name) < 0);
2403 	default_bootup_tracer = NULL;
2404 
2405 	apply_trace_boot_options();
2406 
2407 	/* disable other selftests, since this will break it. */
2408 	disable_tracing_selftest("running a tracer");
2409 
2410 	return 0;
2411 }
2412 
2413 static void tracing_reset_cpu(struct array_buffer *buf, int cpu)
2414 {
2415 	struct trace_buffer *buffer = buf->buffer;
2416 
2417 	if (!buffer)
2418 		return;
2419 
2420 	ring_buffer_record_disable(buffer);
2421 
2422 	/* Make sure all commits have finished */
2423 	synchronize_rcu();
2424 	ring_buffer_reset_cpu(buffer, cpu);
2425 
2426 	ring_buffer_record_enable(buffer);
2427 }
2428 
2429 void tracing_reset_online_cpus(struct array_buffer *buf)
2430 {
2431 	struct trace_buffer *buffer = buf->buffer;
2432 
2433 	if (!buffer)
2434 		return;
2435 
2436 	ring_buffer_record_disable(buffer);
2437 
2438 	/* Make sure all commits have finished */
2439 	synchronize_rcu();
2440 
2441 	buf->time_start = buffer_ftrace_now(buf, buf->cpu);
2442 
2443 	ring_buffer_reset_online_cpus(buffer);
2444 
2445 	ring_buffer_record_enable(buffer);
2446 }
2447 
2448 static void tracing_reset_all_cpus(struct array_buffer *buf)
2449 {
2450 	struct trace_buffer *buffer = buf->buffer;
2451 
2452 	if (!buffer)
2453 		return;
2454 
2455 	ring_buffer_record_disable(buffer);
2456 
2457 	/* Make sure all commits have finished */
2458 	synchronize_rcu();
2459 
2460 	buf->time_start = buffer_ftrace_now(buf, buf->cpu);
2461 
2462 	ring_buffer_reset(buffer);
2463 
2464 	ring_buffer_record_enable(buffer);
2465 }
2466 
2467 /* Must have trace_types_lock held */
2468 void tracing_reset_all_online_cpus_unlocked(void)
2469 {
2470 	struct trace_array *tr;
2471 
2472 	lockdep_assert_held(&trace_types_lock);
2473 
2474 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
2475 		if (!tr->clear_trace)
2476 			continue;
2477 		tr->clear_trace = false;
2478 		tracing_reset_online_cpus(&tr->array_buffer);
2479 #ifdef CONFIG_TRACER_MAX_TRACE
2480 		tracing_reset_online_cpus(&tr->max_buffer);
2481 #endif
2482 	}
2483 }
2484 
2485 void tracing_reset_all_online_cpus(void)
2486 {
2487 	guard(mutex)(&trace_types_lock);
2488 	tracing_reset_all_online_cpus_unlocked();
2489 }
2490 
2491 int is_tracing_stopped(void)
2492 {
2493 	return global_trace.stop_count;
2494 }
2495 
2496 static void tracing_start_tr(struct trace_array *tr)
2497 {
2498 	struct trace_buffer *buffer;
2499 
2500 	if (tracing_disabled)
2501 		return;
2502 
2503 	guard(raw_spinlock_irqsave)(&tr->start_lock);
2504 	if (--tr->stop_count) {
2505 		if (WARN_ON_ONCE(tr->stop_count < 0)) {
2506 			/* Someone screwed up their debugging */
2507 			tr->stop_count = 0;
2508 		}
2509 		return;
2510 	}
2511 
2512 	/* Prevent the buffers from switching */
2513 	arch_spin_lock(&tr->max_lock);
2514 
2515 	buffer = tr->array_buffer.buffer;
2516 	if (buffer)
2517 		ring_buffer_record_enable(buffer);
2518 
2519 #ifdef CONFIG_TRACER_MAX_TRACE
2520 	buffer = tr->max_buffer.buffer;
2521 	if (buffer)
2522 		ring_buffer_record_enable(buffer);
2523 #endif
2524 
2525 	arch_spin_unlock(&tr->max_lock);
2526 }
2527 
2528 /**
2529  * tracing_start - quick start of the tracer
2530  *
2531  * If tracing is enabled but was stopped by tracing_stop,
2532  * this will start the tracer back up.
2533  */
2534 void tracing_start(void)
2535 
2536 {
2537 	return tracing_start_tr(&global_trace);
2538 }
2539 
2540 static void tracing_stop_tr(struct trace_array *tr)
2541 {
2542 	struct trace_buffer *buffer;
2543 
2544 	guard(raw_spinlock_irqsave)(&tr->start_lock);
2545 	if (tr->stop_count++)
2546 		return;
2547 
2548 	/* Prevent the buffers from switching */
2549 	arch_spin_lock(&tr->max_lock);
2550 
2551 	buffer = tr->array_buffer.buffer;
2552 	if (buffer)
2553 		ring_buffer_record_disable(buffer);
2554 
2555 #ifdef CONFIG_TRACER_MAX_TRACE
2556 	buffer = tr->max_buffer.buffer;
2557 	if (buffer)
2558 		ring_buffer_record_disable(buffer);
2559 #endif
2560 
2561 	arch_spin_unlock(&tr->max_lock);
2562 }
2563 
2564 /**
2565  * tracing_stop - quick stop of the tracer
2566  *
2567  * Light weight way to stop tracing. Use in conjunction with
2568  * tracing_start.
2569  */
2570 void tracing_stop(void)
2571 {
2572 	return tracing_stop_tr(&global_trace);
2573 }
2574 
2575 /*
2576  * Several functions return TRACE_TYPE_PARTIAL_LINE if the trace_seq
2577  * overflowed, and TRACE_TYPE_HANDLED otherwise. This helper function
2578  * simplifies those functions and keeps them in sync.
2579  */
2580 enum print_line_t trace_handle_return(struct trace_seq *s)
2581 {
2582 	return trace_seq_has_overflowed(s) ?
2583 		TRACE_TYPE_PARTIAL_LINE : TRACE_TYPE_HANDLED;
2584 }
2585 EXPORT_SYMBOL_GPL(trace_handle_return);
2586 
2587 static unsigned short migration_disable_value(void)
2588 {
2589 #if defined(CONFIG_SMP)
2590 	return current->migration_disabled;
2591 #else
2592 	return 0;
2593 #endif
2594 }
2595 
2596 unsigned int tracing_gen_ctx_irq_test(unsigned int irqs_status)
2597 {
2598 	unsigned int trace_flags = irqs_status;
2599 	unsigned int pc;
2600 
2601 	pc = preempt_count();
2602 
2603 	if (pc & NMI_MASK)
2604 		trace_flags |= TRACE_FLAG_NMI;
2605 	if (pc & HARDIRQ_MASK)
2606 		trace_flags |= TRACE_FLAG_HARDIRQ;
2607 	if (in_serving_softirq())
2608 		trace_flags |= TRACE_FLAG_SOFTIRQ;
2609 	if (softirq_count() >> (SOFTIRQ_SHIFT + 1))
2610 		trace_flags |= TRACE_FLAG_BH_OFF;
2611 
2612 	if (tif_need_resched())
2613 		trace_flags |= TRACE_FLAG_NEED_RESCHED;
2614 	if (test_preempt_need_resched())
2615 		trace_flags |= TRACE_FLAG_PREEMPT_RESCHED;
2616 	if (IS_ENABLED(CONFIG_ARCH_HAS_PREEMPT_LAZY) && tif_test_bit(TIF_NEED_RESCHED_LAZY))
2617 		trace_flags |= TRACE_FLAG_NEED_RESCHED_LAZY;
2618 	return (trace_flags << 16) | (min_t(unsigned int, pc & 0xff, 0xf)) |
2619 		(min_t(unsigned int, migration_disable_value(), 0xf)) << 4;
2620 }
2621 
2622 struct ring_buffer_event *
2623 trace_buffer_lock_reserve(struct trace_buffer *buffer,
2624 			  int type,
2625 			  unsigned long len,
2626 			  unsigned int trace_ctx)
2627 {
2628 	return __trace_buffer_lock_reserve(buffer, type, len, trace_ctx);
2629 }
2630 
2631 DEFINE_PER_CPU(struct ring_buffer_event *, trace_buffered_event);
2632 DEFINE_PER_CPU(int, trace_buffered_event_cnt);
2633 static int trace_buffered_event_ref;
2634 
2635 /**
2636  * trace_buffered_event_enable - enable buffering events
2637  *
2638  * When events are being filtered, it is quicker to use a temporary
2639  * buffer to write the event data into if there's a likely chance
2640  * that it will not be committed. The discard of the ring buffer
2641  * is not as fast as committing, and is much slower than copying
2642  * a commit.
2643  *
2644  * When an event is to be filtered, allocate per cpu buffers to
2645  * write the event data into, and if the event is filtered and discarded
2646  * it is simply dropped, otherwise, the entire data is to be committed
2647  * in one shot.
2648  */
2649 void trace_buffered_event_enable(void)
2650 {
2651 	struct ring_buffer_event *event;
2652 	struct page *page;
2653 	int cpu;
2654 
2655 	WARN_ON_ONCE(!mutex_is_locked(&event_mutex));
2656 
2657 	if (trace_buffered_event_ref++)
2658 		return;
2659 
2660 	for_each_tracing_cpu(cpu) {
2661 		page = alloc_pages_node(cpu_to_node(cpu),
2662 					GFP_KERNEL | __GFP_NORETRY, 0);
2663 		/* This is just an optimization and can handle failures */
2664 		if (!page) {
2665 			pr_err("Failed to allocate event buffer\n");
2666 			break;
2667 		}
2668 
2669 		event = page_address(page);
2670 		memset(event, 0, sizeof(*event));
2671 
2672 		per_cpu(trace_buffered_event, cpu) = event;
2673 
2674 		scoped_guard(preempt,) {
2675 			if (cpu == smp_processor_id() &&
2676 			    __this_cpu_read(trace_buffered_event) !=
2677 			    per_cpu(trace_buffered_event, cpu))
2678 				WARN_ON_ONCE(1);
2679 		}
2680 	}
2681 }
2682 
2683 static void enable_trace_buffered_event(void *data)
2684 {
2685 	this_cpu_dec(trace_buffered_event_cnt);
2686 }
2687 
2688 static void disable_trace_buffered_event(void *data)
2689 {
2690 	this_cpu_inc(trace_buffered_event_cnt);
2691 }
2692 
2693 /**
2694  * trace_buffered_event_disable - disable buffering events
2695  *
2696  * When a filter is removed, it is faster to not use the buffered
2697  * events, and to commit directly into the ring buffer. Free up
2698  * the temp buffers when there are no more users. This requires
2699  * special synchronization with current events.
2700  */
2701 void trace_buffered_event_disable(void)
2702 {
2703 	int cpu;
2704 
2705 	WARN_ON_ONCE(!mutex_is_locked(&event_mutex));
2706 
2707 	if (WARN_ON_ONCE(!trace_buffered_event_ref))
2708 		return;
2709 
2710 	if (--trace_buffered_event_ref)
2711 		return;
2712 
2713 	/* For each CPU, set the buffer as used. */
2714 	on_each_cpu_mask(tracing_buffer_mask, disable_trace_buffered_event,
2715 			 NULL, true);
2716 
2717 	/* Wait for all current users to finish */
2718 	synchronize_rcu();
2719 
2720 	for_each_tracing_cpu(cpu) {
2721 		free_page((unsigned long)per_cpu(trace_buffered_event, cpu));
2722 		per_cpu(trace_buffered_event, cpu) = NULL;
2723 	}
2724 
2725 	/*
2726 	 * Wait for all CPUs that potentially started checking if they can use
2727 	 * their event buffer only after the previous synchronize_rcu() call and
2728 	 * they still read a valid pointer from trace_buffered_event. It must be
2729 	 * ensured they don't see cleared trace_buffered_event_cnt else they
2730 	 * could wrongly decide to use the pointed-to buffer which is now freed.
2731 	 */
2732 	synchronize_rcu();
2733 
2734 	/* For each CPU, relinquish the buffer */
2735 	on_each_cpu_mask(tracing_buffer_mask, enable_trace_buffered_event, NULL,
2736 			 true);
2737 }
2738 
2739 static struct trace_buffer *temp_buffer;
2740 
2741 struct ring_buffer_event *
2742 trace_event_buffer_lock_reserve(struct trace_buffer **current_rb,
2743 			  struct trace_event_file *trace_file,
2744 			  int type, unsigned long len,
2745 			  unsigned int trace_ctx)
2746 {
2747 	struct ring_buffer_event *entry;
2748 	struct trace_array *tr = trace_file->tr;
2749 	int val;
2750 
2751 	*current_rb = tr->array_buffer.buffer;
2752 
2753 	if (!tr->no_filter_buffering_ref &&
2754 	    (trace_file->flags & (EVENT_FILE_FL_SOFT_DISABLED | EVENT_FILE_FL_FILTERED))) {
2755 		preempt_disable_notrace();
2756 		/*
2757 		 * Filtering is on, so try to use the per cpu buffer first.
2758 		 * This buffer will simulate a ring_buffer_event,
2759 		 * where the type_len is zero and the array[0] will
2760 		 * hold the full length.
2761 		 * (see include/linux/ring-buffer.h for details on
2762 		 *  how the ring_buffer_event is structured).
2763 		 *
2764 		 * Using a temp buffer during filtering and copying it
2765 		 * on a matched filter is quicker than writing directly
2766 		 * into the ring buffer and then discarding it when
2767 		 * it doesn't match. That is because the discard
2768 		 * requires several atomic operations to get right.
2769 		 * Copying on match and doing nothing on a failed match
2770 		 * is still quicker than no copy on match, but having
2771 		 * to discard out of the ring buffer on a failed match.
2772 		 */
2773 		if ((entry = __this_cpu_read(trace_buffered_event))) {
2774 			int max_len = PAGE_SIZE - struct_size(entry, array, 1);
2775 
2776 			val = this_cpu_inc_return(trace_buffered_event_cnt);
2777 
2778 			/*
2779 			 * Preemption is disabled, but interrupts and NMIs
2780 			 * can still come in now. If that happens after
2781 			 * the above increment, then it will have to go
2782 			 * back to the old method of allocating the event
2783 			 * on the ring buffer, and if the filter fails, it
2784 			 * will have to call ring_buffer_discard_commit()
2785 			 * to remove it.
2786 			 *
2787 			 * Need to also check the unlikely case that the
2788 			 * length is bigger than the temp buffer size.
2789 			 * If that happens, then the reserve is pretty much
2790 			 * guaranteed to fail, as the ring buffer currently
2791 			 * only allows events less than a page. But that may
2792 			 * change in the future, so let the ring buffer reserve
2793 			 * handle the failure in that case.
2794 			 */
2795 			if (val == 1 && likely(len <= max_len)) {
2796 				trace_event_setup(entry, type, trace_ctx);
2797 				entry->array[0] = len;
2798 				/* Return with preemption disabled */
2799 				return entry;
2800 			}
2801 			this_cpu_dec(trace_buffered_event_cnt);
2802 		}
2803 		/* __trace_buffer_lock_reserve() disables preemption */
2804 		preempt_enable_notrace();
2805 	}
2806 
2807 	entry = __trace_buffer_lock_reserve(*current_rb, type, len,
2808 					    trace_ctx);
2809 	/*
2810 	 * If tracing is off, but we have triggers enabled
2811 	 * we still need to look at the event data. Use the temp_buffer
2812 	 * to store the trace event for the trigger to use. It's recursive
2813 	 * safe and will not be recorded anywhere.
2814 	 */
2815 	if (!entry && trace_file->flags & EVENT_FILE_FL_TRIGGER_COND) {
2816 		*current_rb = temp_buffer;
2817 		entry = __trace_buffer_lock_reserve(*current_rb, type, len,
2818 						    trace_ctx);
2819 	}
2820 	return entry;
2821 }
2822 EXPORT_SYMBOL_GPL(trace_event_buffer_lock_reserve);
2823 
2824 static DEFINE_RAW_SPINLOCK(tracepoint_iter_lock);
2825 static DEFINE_MUTEX(tracepoint_printk_mutex);
2826 
2827 static void output_printk(struct trace_event_buffer *fbuffer)
2828 {
2829 	struct trace_event_call *event_call;
2830 	struct trace_event_file *file;
2831 	struct trace_event *event;
2832 	unsigned long flags;
2833 	struct trace_iterator *iter = tracepoint_print_iter;
2834 
2835 	/* We should never get here if iter is NULL */
2836 	if (WARN_ON_ONCE(!iter))
2837 		return;
2838 
2839 	event_call = fbuffer->trace_file->event_call;
2840 	if (!event_call || !event_call->event.funcs ||
2841 	    !event_call->event.funcs->trace)
2842 		return;
2843 
2844 	file = fbuffer->trace_file;
2845 	if (test_bit(EVENT_FILE_FL_SOFT_DISABLED_BIT, &file->flags) ||
2846 	    (unlikely(file->flags & EVENT_FILE_FL_FILTERED) &&
2847 	     !filter_match_preds(file->filter, fbuffer->entry)))
2848 		return;
2849 
2850 	event = &fbuffer->trace_file->event_call->event;
2851 
2852 	raw_spin_lock_irqsave(&tracepoint_iter_lock, flags);
2853 	trace_seq_init(&iter->seq);
2854 	iter->ent = fbuffer->entry;
2855 	event_call->event.funcs->trace(iter, 0, event);
2856 	trace_seq_putc(&iter->seq, 0);
2857 	printk("%s", iter->seq.buffer);
2858 
2859 	raw_spin_unlock_irqrestore(&tracepoint_iter_lock, flags);
2860 }
2861 
2862 int tracepoint_printk_sysctl(const struct ctl_table *table, int write,
2863 			     void *buffer, size_t *lenp,
2864 			     loff_t *ppos)
2865 {
2866 	int save_tracepoint_printk;
2867 	int ret;
2868 
2869 	guard(mutex)(&tracepoint_printk_mutex);
2870 	save_tracepoint_printk = tracepoint_printk;
2871 
2872 	ret = proc_dointvec(table, write, buffer, lenp, ppos);
2873 
2874 	/*
2875 	 * This will force exiting early, as tracepoint_printk
2876 	 * is always zero when tracepoint_printk_iter is not allocated
2877 	 */
2878 	if (!tracepoint_print_iter)
2879 		tracepoint_printk = 0;
2880 
2881 	if (save_tracepoint_printk == tracepoint_printk)
2882 		return ret;
2883 
2884 	if (tracepoint_printk)
2885 		static_key_enable(&tracepoint_printk_key.key);
2886 	else
2887 		static_key_disable(&tracepoint_printk_key.key);
2888 
2889 	return ret;
2890 }
2891 
2892 void trace_event_buffer_commit(struct trace_event_buffer *fbuffer)
2893 {
2894 	enum event_trigger_type tt = ETT_NONE;
2895 	struct trace_event_file *file = fbuffer->trace_file;
2896 
2897 	if (__event_trigger_test_discard(file, fbuffer->buffer, fbuffer->event,
2898 			fbuffer->entry, &tt))
2899 		goto discard;
2900 
2901 	if (static_key_false(&tracepoint_printk_key.key))
2902 		output_printk(fbuffer);
2903 
2904 	if (static_branch_unlikely(&trace_event_exports_enabled))
2905 		ftrace_exports(fbuffer->event, TRACE_EXPORT_EVENT);
2906 
2907 	trace_buffer_unlock_commit_regs(file->tr, fbuffer->buffer,
2908 			fbuffer->event, fbuffer->trace_ctx, fbuffer->regs);
2909 
2910 discard:
2911 	if (tt)
2912 		event_triggers_post_call(file, tt);
2913 
2914 }
2915 EXPORT_SYMBOL_GPL(trace_event_buffer_commit);
2916 
2917 /*
2918  * Skip 3:
2919  *
2920  *   trace_buffer_unlock_commit_regs()
2921  *   trace_event_buffer_commit()
2922  *   trace_event_raw_event_xxx()
2923  */
2924 # define STACK_SKIP 3
2925 
2926 void trace_buffer_unlock_commit_regs(struct trace_array *tr,
2927 				     struct trace_buffer *buffer,
2928 				     struct ring_buffer_event *event,
2929 				     unsigned int trace_ctx,
2930 				     struct pt_regs *regs)
2931 {
2932 	__buffer_unlock_commit(buffer, event);
2933 
2934 	/*
2935 	 * If regs is not set, then skip the necessary functions.
2936 	 * Note, we can still get here via blktrace, wakeup tracer
2937 	 * and mmiotrace, but that's ok if they lose a function or
2938 	 * two. They are not that meaningful.
2939 	 */
2940 	ftrace_trace_stack(tr, buffer, trace_ctx, regs ? 0 : STACK_SKIP, regs);
2941 	ftrace_trace_userstack(tr, buffer, trace_ctx);
2942 }
2943 
2944 /*
2945  * Similar to trace_buffer_unlock_commit_regs() but do not dump stack.
2946  */
2947 void
2948 trace_buffer_unlock_commit_nostack(struct trace_buffer *buffer,
2949 				   struct ring_buffer_event *event)
2950 {
2951 	__buffer_unlock_commit(buffer, event);
2952 }
2953 
2954 void
2955 trace_function(struct trace_array *tr, unsigned long ip, unsigned long
2956 	       parent_ip, unsigned int trace_ctx, struct ftrace_regs *fregs)
2957 {
2958 	struct trace_buffer *buffer = tr->array_buffer.buffer;
2959 	struct ring_buffer_event *event;
2960 	struct ftrace_entry *entry;
2961 	int size = sizeof(*entry);
2962 
2963 	size += FTRACE_REGS_MAX_ARGS * !!fregs * sizeof(long);
2964 
2965 	event = __trace_buffer_lock_reserve(buffer, TRACE_FN, size,
2966 					    trace_ctx);
2967 	if (!event)
2968 		return;
2969 	entry	= ring_buffer_event_data(event);
2970 	entry->ip			= ip;
2971 	entry->parent_ip		= parent_ip;
2972 
2973 #ifdef CONFIG_HAVE_FUNCTION_ARG_ACCESS_API
2974 	if (fregs) {
2975 		for (int i = 0; i < FTRACE_REGS_MAX_ARGS; i++)
2976 			entry->args[i] = ftrace_regs_get_argument(fregs, i);
2977 	}
2978 #endif
2979 
2980 	if (static_branch_unlikely(&trace_function_exports_enabled))
2981 		ftrace_exports(event, TRACE_EXPORT_FUNCTION);
2982 	__buffer_unlock_commit(buffer, event);
2983 }
2984 
2985 #ifdef CONFIG_STACKTRACE
2986 
2987 /* Allow 4 levels of nesting: normal, softirq, irq, NMI */
2988 #define FTRACE_KSTACK_NESTING	4
2989 
2990 #define FTRACE_KSTACK_ENTRIES	(SZ_4K / FTRACE_KSTACK_NESTING)
2991 
2992 struct ftrace_stack {
2993 	unsigned long		calls[FTRACE_KSTACK_ENTRIES];
2994 };
2995 
2996 
2997 struct ftrace_stacks {
2998 	struct ftrace_stack	stacks[FTRACE_KSTACK_NESTING];
2999 };
3000 
3001 static DEFINE_PER_CPU(struct ftrace_stacks, ftrace_stacks);
3002 static DEFINE_PER_CPU(int, ftrace_stack_reserve);
3003 
3004 static void __ftrace_trace_stack(struct trace_array *tr,
3005 				 struct trace_buffer *buffer,
3006 				 unsigned int trace_ctx,
3007 				 int skip, struct pt_regs *regs)
3008 {
3009 	struct ring_buffer_event *event;
3010 	unsigned int size, nr_entries;
3011 	struct ftrace_stack *fstack;
3012 	struct stack_entry *entry;
3013 	int stackidx;
3014 	int bit;
3015 
3016 	bit = trace_test_and_set_recursion(_THIS_IP_, _RET_IP_, TRACE_EVENT_START);
3017 	if (bit < 0)
3018 		return;
3019 
3020 	/*
3021 	 * Add one, for this function and the call to save_stack_trace()
3022 	 * If regs is set, then these functions will not be in the way.
3023 	 */
3024 #ifndef CONFIG_UNWINDER_ORC
3025 	if (!regs)
3026 		skip++;
3027 #endif
3028 
3029 	guard(preempt_notrace)();
3030 
3031 	stackidx = __this_cpu_inc_return(ftrace_stack_reserve) - 1;
3032 
3033 	/* This should never happen. If it does, yell once and skip */
3034 	if (WARN_ON_ONCE(stackidx >= FTRACE_KSTACK_NESTING))
3035 		goto out;
3036 
3037 	/*
3038 	 * The above __this_cpu_inc_return() is 'atomic' cpu local. An
3039 	 * interrupt will either see the value pre increment or post
3040 	 * increment. If the interrupt happens pre increment it will have
3041 	 * restored the counter when it returns.  We just need a barrier to
3042 	 * keep gcc from moving things around.
3043 	 */
3044 	barrier();
3045 
3046 	fstack = this_cpu_ptr(ftrace_stacks.stacks) + stackidx;
3047 	size = ARRAY_SIZE(fstack->calls);
3048 
3049 	if (regs) {
3050 		nr_entries = stack_trace_save_regs(regs, fstack->calls,
3051 						   size, skip);
3052 	} else {
3053 		nr_entries = stack_trace_save(fstack->calls, size, skip);
3054 	}
3055 
3056 #ifdef CONFIG_DYNAMIC_FTRACE
3057 	/* Mark entry of stack trace as trampoline code */
3058 	if (tr->ops && tr->ops->trampoline) {
3059 		unsigned long tramp_start = tr->ops->trampoline;
3060 		unsigned long tramp_end = tramp_start + tr->ops->trampoline_size;
3061 		unsigned long *calls = fstack->calls;
3062 
3063 		for (int i = 0; i < nr_entries; i++) {
3064 			if (calls[i] >= tramp_start && calls[i] < tramp_end)
3065 				calls[i] = FTRACE_TRAMPOLINE_MARKER;
3066 		}
3067 	}
3068 #endif
3069 
3070 	event = __trace_buffer_lock_reserve(buffer, TRACE_STACK,
3071 				    struct_size(entry, caller, nr_entries),
3072 				    trace_ctx);
3073 	if (!event)
3074 		goto out;
3075 	entry = ring_buffer_event_data(event);
3076 
3077 	entry->size = nr_entries;
3078 	memcpy(&entry->caller, fstack->calls,
3079 	       flex_array_size(entry, caller, nr_entries));
3080 
3081 	__buffer_unlock_commit(buffer, event);
3082 
3083  out:
3084 	/* Again, don't let gcc optimize things here */
3085 	barrier();
3086 	__this_cpu_dec(ftrace_stack_reserve);
3087 	trace_clear_recursion(bit);
3088 }
3089 
3090 static inline void ftrace_trace_stack(struct trace_array *tr,
3091 				      struct trace_buffer *buffer,
3092 				      unsigned int trace_ctx,
3093 				      int skip, struct pt_regs *regs)
3094 {
3095 	if (!(tr->trace_flags & TRACE_ITER(STACKTRACE)))
3096 		return;
3097 
3098 	__ftrace_trace_stack(tr, buffer, trace_ctx, skip, regs);
3099 }
3100 
3101 void __trace_stack(struct trace_array *tr, unsigned int trace_ctx,
3102 		   int skip)
3103 {
3104 	struct trace_buffer *buffer = tr->array_buffer.buffer;
3105 
3106 	if (rcu_is_watching()) {
3107 		__ftrace_trace_stack(tr, buffer, trace_ctx, skip, NULL);
3108 		return;
3109 	}
3110 
3111 	if (WARN_ON_ONCE(IS_ENABLED(CONFIG_GENERIC_ENTRY)))
3112 		return;
3113 
3114 	/*
3115 	 * When an NMI triggers, RCU is enabled via ct_nmi_enter(),
3116 	 * but if the above rcu_is_watching() failed, then the NMI
3117 	 * triggered someplace critical, and ct_irq_enter() should
3118 	 * not be called from NMI.
3119 	 */
3120 	if (unlikely(in_nmi()))
3121 		return;
3122 
3123 	ct_irq_enter_irqson();
3124 	__ftrace_trace_stack(tr, buffer, trace_ctx, skip, NULL);
3125 	ct_irq_exit_irqson();
3126 }
3127 
3128 /**
3129  * trace_dump_stack - record a stack back trace in the trace buffer
3130  * @skip: Number of functions to skip (helper handlers)
3131  */
3132 void trace_dump_stack(int skip)
3133 {
3134 	if (tracing_disabled || tracing_selftest_running)
3135 		return;
3136 
3137 #ifndef CONFIG_UNWINDER_ORC
3138 	/* Skip 1 to skip this function. */
3139 	skip++;
3140 #endif
3141 	__ftrace_trace_stack(printk_trace, printk_trace->array_buffer.buffer,
3142 				tracing_gen_ctx(), skip, NULL);
3143 }
3144 EXPORT_SYMBOL_GPL(trace_dump_stack);
3145 
3146 #ifdef CONFIG_USER_STACKTRACE_SUPPORT
3147 static DEFINE_PER_CPU(int, user_stack_count);
3148 
3149 static void
3150 ftrace_trace_userstack(struct trace_array *tr,
3151 		       struct trace_buffer *buffer, unsigned int trace_ctx)
3152 {
3153 	struct ring_buffer_event *event;
3154 	struct userstack_entry *entry;
3155 
3156 	if (!(tr->trace_flags & TRACE_ITER(USERSTACKTRACE)))
3157 		return;
3158 
3159 	/*
3160 	 * NMIs can not handle page faults, even with fix ups.
3161 	 * The save user stack can (and often does) fault.
3162 	 */
3163 	if (unlikely(in_nmi()))
3164 		return;
3165 
3166 	/*
3167 	 * prevent recursion, since the user stack tracing may
3168 	 * trigger other kernel events.
3169 	 */
3170 	guard(preempt)();
3171 	if (__this_cpu_read(user_stack_count))
3172 		return;
3173 
3174 	__this_cpu_inc(user_stack_count);
3175 
3176 	event = __trace_buffer_lock_reserve(buffer, TRACE_USER_STACK,
3177 					    sizeof(*entry), trace_ctx);
3178 	if (!event)
3179 		goto out_drop_count;
3180 	entry	= ring_buffer_event_data(event);
3181 
3182 	entry->tgid		= current->tgid;
3183 	memset(&entry->caller, 0, sizeof(entry->caller));
3184 
3185 	stack_trace_save_user(entry->caller, FTRACE_STACK_ENTRIES);
3186 	__buffer_unlock_commit(buffer, event);
3187 
3188  out_drop_count:
3189 	__this_cpu_dec(user_stack_count);
3190 }
3191 #else /* CONFIG_USER_STACKTRACE_SUPPORT */
3192 static void ftrace_trace_userstack(struct trace_array *tr,
3193 				   struct trace_buffer *buffer,
3194 				   unsigned int trace_ctx)
3195 {
3196 }
3197 #endif /* !CONFIG_USER_STACKTRACE_SUPPORT */
3198 
3199 #endif /* CONFIG_STACKTRACE */
3200 
3201 static inline void
3202 func_repeats_set_delta_ts(struct func_repeats_entry *entry,
3203 			  unsigned long long delta)
3204 {
3205 	entry->bottom_delta_ts = delta & U32_MAX;
3206 	entry->top_delta_ts = (delta >> 32);
3207 }
3208 
3209 void trace_last_func_repeats(struct trace_array *tr,
3210 			     struct trace_func_repeats *last_info,
3211 			     unsigned int trace_ctx)
3212 {
3213 	struct trace_buffer *buffer = tr->array_buffer.buffer;
3214 	struct func_repeats_entry *entry;
3215 	struct ring_buffer_event *event;
3216 	u64 delta;
3217 
3218 	event = __trace_buffer_lock_reserve(buffer, TRACE_FUNC_REPEATS,
3219 					    sizeof(*entry), trace_ctx);
3220 	if (!event)
3221 		return;
3222 
3223 	delta = ring_buffer_event_time_stamp(buffer, event) -
3224 		last_info->ts_last_call;
3225 
3226 	entry = ring_buffer_event_data(event);
3227 	entry->ip = last_info->ip;
3228 	entry->parent_ip = last_info->parent_ip;
3229 	entry->count = last_info->count;
3230 	func_repeats_set_delta_ts(entry, delta);
3231 
3232 	__buffer_unlock_commit(buffer, event);
3233 }
3234 
3235 /* created for use with alloc_percpu */
3236 struct trace_buffer_struct {
3237 	int nesting;
3238 	char buffer[4][TRACE_BUF_SIZE];
3239 };
3240 
3241 static struct trace_buffer_struct __percpu *trace_percpu_buffer;
3242 
3243 /*
3244  * This allows for lockless recording.  If we're nested too deeply, then
3245  * this returns NULL.
3246  */
3247 static char *get_trace_buf(void)
3248 {
3249 	struct trace_buffer_struct *buffer = this_cpu_ptr(trace_percpu_buffer);
3250 
3251 	if (!trace_percpu_buffer || buffer->nesting >= 4)
3252 		return NULL;
3253 
3254 	buffer->nesting++;
3255 
3256 	/* Interrupts must see nesting incremented before we use the buffer */
3257 	barrier();
3258 	return &buffer->buffer[buffer->nesting - 1][0];
3259 }
3260 
3261 static void put_trace_buf(void)
3262 {
3263 	/* Don't let the decrement of nesting leak before this */
3264 	barrier();
3265 	this_cpu_dec(trace_percpu_buffer->nesting);
3266 }
3267 
3268 static int alloc_percpu_trace_buffer(void)
3269 {
3270 	struct trace_buffer_struct __percpu *buffers;
3271 
3272 	if (trace_percpu_buffer)
3273 		return 0;
3274 
3275 	buffers = alloc_percpu(struct trace_buffer_struct);
3276 	if (MEM_FAIL(!buffers, "Could not allocate percpu trace_printk buffer"))
3277 		return -ENOMEM;
3278 
3279 	trace_percpu_buffer = buffers;
3280 	return 0;
3281 }
3282 
3283 static int buffers_allocated;
3284 
3285 void trace_printk_init_buffers(void)
3286 {
3287 	if (buffers_allocated)
3288 		return;
3289 
3290 	if (alloc_percpu_trace_buffer())
3291 		return;
3292 
3293 	/* trace_printk() is for debug use only. Don't use it in production. */
3294 
3295 	pr_warn("\n");
3296 	pr_warn("**********************************************************\n");
3297 	pr_warn("**   NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE   **\n");
3298 	pr_warn("**                                                      **\n");
3299 	pr_warn("** trace_printk() being used. Allocating extra memory.  **\n");
3300 	pr_warn("**                                                      **\n");
3301 	pr_warn("** This means that this is a DEBUG kernel and it is     **\n");
3302 	pr_warn("** unsafe for production use.                           **\n");
3303 	pr_warn("**                                                      **\n");
3304 	pr_warn("** If you see this message and you are not debugging    **\n");
3305 	pr_warn("** the kernel, report this immediately to your vendor!  **\n");
3306 	pr_warn("**                                                      **\n");
3307 	pr_warn("**   NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE   **\n");
3308 	pr_warn("**********************************************************\n");
3309 
3310 	/* Expand the buffers to set size */
3311 	tracing_update_buffers(&global_trace);
3312 
3313 	buffers_allocated = 1;
3314 
3315 	/*
3316 	 * trace_printk_init_buffers() can be called by modules.
3317 	 * If that happens, then we need to start cmdline recording
3318 	 * directly here. If the global_trace.buffer is already
3319 	 * allocated here, then this was called by module code.
3320 	 */
3321 	if (global_trace.array_buffer.buffer)
3322 		tracing_start_cmdline_record();
3323 }
3324 EXPORT_SYMBOL_GPL(trace_printk_init_buffers);
3325 
3326 void trace_printk_start_comm(void)
3327 {
3328 	/* Start tracing comms if trace printk is set */
3329 	if (!buffers_allocated)
3330 		return;
3331 	tracing_start_cmdline_record();
3332 }
3333 
3334 static void trace_printk_start_stop_comm(int enabled)
3335 {
3336 	if (!buffers_allocated)
3337 		return;
3338 
3339 	if (enabled)
3340 		tracing_start_cmdline_record();
3341 	else
3342 		tracing_stop_cmdline_record();
3343 }
3344 
3345 /**
3346  * trace_vbprintk - write binary msg to tracing buffer
3347  * @ip:    The address of the caller
3348  * @fmt:   The string format to write to the buffer
3349  * @args:  Arguments for @fmt
3350  */
3351 int trace_vbprintk(unsigned long ip, const char *fmt, va_list args)
3352 {
3353 	struct ring_buffer_event *event;
3354 	struct trace_buffer *buffer;
3355 	struct trace_array *tr = READ_ONCE(printk_trace);
3356 	struct bprint_entry *entry;
3357 	unsigned int trace_ctx;
3358 	char *tbuffer;
3359 	int len = 0, size;
3360 
3361 	if (!printk_binsafe(tr))
3362 		return trace_vprintk(ip, fmt, args);
3363 
3364 	if (unlikely(tracing_selftest_running || tracing_disabled))
3365 		return 0;
3366 
3367 	/* Don't pollute graph traces with trace_vprintk internals */
3368 	pause_graph_tracing();
3369 
3370 	trace_ctx = tracing_gen_ctx();
3371 	guard(preempt_notrace)();
3372 
3373 	tbuffer = get_trace_buf();
3374 	if (!tbuffer) {
3375 		len = 0;
3376 		goto out_nobuffer;
3377 	}
3378 
3379 	len = vbin_printf((u32 *)tbuffer, TRACE_BUF_SIZE/sizeof(int), fmt, args);
3380 
3381 	if (len > TRACE_BUF_SIZE/sizeof(int) || len < 0)
3382 		goto out_put;
3383 
3384 	size = sizeof(*entry) + sizeof(u32) * len;
3385 	buffer = tr->array_buffer.buffer;
3386 	scoped_guard(ring_buffer_nest, buffer) {
3387 		event = __trace_buffer_lock_reserve(buffer, TRACE_BPRINT, size,
3388 						    trace_ctx);
3389 		if (!event)
3390 			goto out_put;
3391 		entry = ring_buffer_event_data(event);
3392 		entry->ip			= ip;
3393 		entry->fmt			= fmt;
3394 
3395 		memcpy(entry->buf, tbuffer, sizeof(u32) * len);
3396 		__buffer_unlock_commit(buffer, event);
3397 		ftrace_trace_stack(tr, buffer, trace_ctx, 6, NULL);
3398 	}
3399 out_put:
3400 	put_trace_buf();
3401 
3402 out_nobuffer:
3403 	unpause_graph_tracing();
3404 
3405 	return len;
3406 }
3407 EXPORT_SYMBOL_GPL(trace_vbprintk);
3408 
3409 static __printf(3, 0)
3410 int __trace_array_vprintk(struct trace_buffer *buffer,
3411 			  unsigned long ip, const char *fmt, va_list args)
3412 {
3413 	struct ring_buffer_event *event;
3414 	int len = 0, size;
3415 	struct print_entry *entry;
3416 	unsigned int trace_ctx;
3417 	char *tbuffer;
3418 
3419 	if (tracing_disabled)
3420 		return 0;
3421 
3422 	/* Don't pollute graph traces with trace_vprintk internals */
3423 	pause_graph_tracing();
3424 
3425 	trace_ctx = tracing_gen_ctx();
3426 	guard(preempt_notrace)();
3427 
3428 
3429 	tbuffer = get_trace_buf();
3430 	if (!tbuffer) {
3431 		len = 0;
3432 		goto out_nobuffer;
3433 	}
3434 
3435 	len = vscnprintf(tbuffer, TRACE_BUF_SIZE, fmt, args);
3436 
3437 	size = sizeof(*entry) + len + 1;
3438 	scoped_guard(ring_buffer_nest, buffer) {
3439 		event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
3440 						    trace_ctx);
3441 		if (!event)
3442 			goto out;
3443 		entry = ring_buffer_event_data(event);
3444 		entry->ip = ip;
3445 
3446 		memcpy(&entry->buf, tbuffer, len + 1);
3447 		__buffer_unlock_commit(buffer, event);
3448 		ftrace_trace_stack(printk_trace, buffer, trace_ctx, 6, NULL);
3449 	}
3450 out:
3451 	put_trace_buf();
3452 
3453 out_nobuffer:
3454 	unpause_graph_tracing();
3455 
3456 	return len;
3457 }
3458 
3459 int trace_array_vprintk(struct trace_array *tr,
3460 			unsigned long ip, const char *fmt, va_list args)
3461 {
3462 	if (tracing_selftest_running && tr == &global_trace)
3463 		return 0;
3464 
3465 	return __trace_array_vprintk(tr->array_buffer.buffer, ip, fmt, args);
3466 }
3467 
3468 /**
3469  * trace_array_printk - Print a message to a specific instance
3470  * @tr: The instance trace_array descriptor
3471  * @ip: The instruction pointer that this is called from.
3472  * @fmt: The format to print (printf format)
3473  *
3474  * If a subsystem sets up its own instance, they have the right to
3475  * printk strings into their tracing instance buffer using this
3476  * function. Note, this function will not write into the top level
3477  * buffer (use trace_printk() for that), as writing into the top level
3478  * buffer should only have events that can be individually disabled.
3479  * trace_printk() is only used for debugging a kernel, and should not
3480  * be ever incorporated in normal use.
3481  *
3482  * trace_array_printk() can be used, as it will not add noise to the
3483  * top level tracing buffer.
3484  *
3485  * Note, trace_array_init_printk() must be called on @tr before this
3486  * can be used.
3487  */
3488 int trace_array_printk(struct trace_array *tr,
3489 		       unsigned long ip, const char *fmt, ...)
3490 {
3491 	int ret;
3492 	va_list ap;
3493 
3494 	if (!tr)
3495 		return -ENOENT;
3496 
3497 	/* This is only allowed for created instances */
3498 	if (tr == &global_trace)
3499 		return 0;
3500 
3501 	if (!(tr->trace_flags & TRACE_ITER(PRINTK)))
3502 		return 0;
3503 
3504 	va_start(ap, fmt);
3505 	ret = trace_array_vprintk(tr, ip, fmt, ap);
3506 	va_end(ap);
3507 	return ret;
3508 }
3509 EXPORT_SYMBOL_GPL(trace_array_printk);
3510 
3511 /**
3512  * trace_array_init_printk - Initialize buffers for trace_array_printk()
3513  * @tr: The trace array to initialize the buffers for
3514  *
3515  * As trace_array_printk() only writes into instances, they are OK to
3516  * have in the kernel (unlike trace_printk()). This needs to be called
3517  * before trace_array_printk() can be used on a trace_array.
3518  */
3519 int trace_array_init_printk(struct trace_array *tr)
3520 {
3521 	if (!tr)
3522 		return -ENOENT;
3523 
3524 	/* This is only allowed for created instances */
3525 	if (tr == &global_trace)
3526 		return -EINVAL;
3527 
3528 	return alloc_percpu_trace_buffer();
3529 }
3530 EXPORT_SYMBOL_GPL(trace_array_init_printk);
3531 
3532 int trace_array_printk_buf(struct trace_buffer *buffer,
3533 			   unsigned long ip, const char *fmt, ...)
3534 {
3535 	int ret;
3536 	va_list ap;
3537 
3538 	if (!(printk_trace->trace_flags & TRACE_ITER(PRINTK)))
3539 		return 0;
3540 
3541 	va_start(ap, fmt);
3542 	ret = __trace_array_vprintk(buffer, ip, fmt, ap);
3543 	va_end(ap);
3544 	return ret;
3545 }
3546 
3547 int trace_vprintk(unsigned long ip, const char *fmt, va_list args)
3548 {
3549 	return trace_array_vprintk(printk_trace, ip, fmt, args);
3550 }
3551 EXPORT_SYMBOL_GPL(trace_vprintk);
3552 
3553 static void trace_iterator_increment(struct trace_iterator *iter)
3554 {
3555 	struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, iter->cpu);
3556 
3557 	iter->idx++;
3558 	if (buf_iter)
3559 		ring_buffer_iter_advance(buf_iter);
3560 }
3561 
3562 static struct trace_entry *
3563 peek_next_entry(struct trace_iterator *iter, int cpu, u64 *ts,
3564 		unsigned long *lost_events)
3565 {
3566 	struct ring_buffer_event *event;
3567 	struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, cpu);
3568 
3569 	if (buf_iter) {
3570 		event = ring_buffer_iter_peek(buf_iter, ts);
3571 		if (lost_events)
3572 			*lost_events = ring_buffer_iter_dropped(buf_iter) ?
3573 				(unsigned long)-1 : 0;
3574 	} else {
3575 		event = ring_buffer_peek(iter->array_buffer->buffer, cpu, ts,
3576 					 lost_events);
3577 	}
3578 
3579 	if (event) {
3580 		iter->ent_size = ring_buffer_event_length(event);
3581 		return ring_buffer_event_data(event);
3582 	}
3583 	iter->ent_size = 0;
3584 	return NULL;
3585 }
3586 
3587 static struct trace_entry *
3588 __find_next_entry(struct trace_iterator *iter, int *ent_cpu,
3589 		  unsigned long *missing_events, u64 *ent_ts)
3590 {
3591 	struct trace_buffer *buffer = iter->array_buffer->buffer;
3592 	struct trace_entry *ent, *next = NULL;
3593 	unsigned long lost_events = 0, next_lost = 0;
3594 	int cpu_file = iter->cpu_file;
3595 	u64 next_ts = 0, ts;
3596 	int next_cpu = -1;
3597 	int next_size = 0;
3598 	int cpu;
3599 
3600 	/*
3601 	 * If we are in a per_cpu trace file, don't bother by iterating over
3602 	 * all cpu and peek directly.
3603 	 */
3604 	if (cpu_file > RING_BUFFER_ALL_CPUS) {
3605 		if (ring_buffer_empty_cpu(buffer, cpu_file))
3606 			return NULL;
3607 		ent = peek_next_entry(iter, cpu_file, ent_ts, missing_events);
3608 		if (ent_cpu)
3609 			*ent_cpu = cpu_file;
3610 
3611 		return ent;
3612 	}
3613 
3614 	for_each_tracing_cpu(cpu) {
3615 
3616 		if (ring_buffer_empty_cpu(buffer, cpu))
3617 			continue;
3618 
3619 		ent = peek_next_entry(iter, cpu, &ts, &lost_events);
3620 
3621 		/*
3622 		 * Pick the entry with the smallest timestamp:
3623 		 */
3624 		if (ent && (!next || ts < next_ts)) {
3625 			next = ent;
3626 			next_cpu = cpu;
3627 			next_ts = ts;
3628 			next_lost = lost_events;
3629 			next_size = iter->ent_size;
3630 		}
3631 	}
3632 
3633 	iter->ent_size = next_size;
3634 
3635 	if (ent_cpu)
3636 		*ent_cpu = next_cpu;
3637 
3638 	if (ent_ts)
3639 		*ent_ts = next_ts;
3640 
3641 	if (missing_events)
3642 		*missing_events = next_lost;
3643 
3644 	return next;
3645 }
3646 
3647 #define STATIC_FMT_BUF_SIZE	128
3648 static char static_fmt_buf[STATIC_FMT_BUF_SIZE];
3649 
3650 char *trace_iter_expand_format(struct trace_iterator *iter)
3651 {
3652 	char *tmp;
3653 
3654 	/*
3655 	 * iter->tr is NULL when used with tp_printk, which makes
3656 	 * this get called where it is not safe to call krealloc().
3657 	 */
3658 	if (!iter->tr || iter->fmt == static_fmt_buf)
3659 		return NULL;
3660 
3661 	tmp = krealloc(iter->fmt, iter->fmt_size + STATIC_FMT_BUF_SIZE,
3662 		       GFP_KERNEL);
3663 	if (tmp) {
3664 		iter->fmt_size += STATIC_FMT_BUF_SIZE;
3665 		iter->fmt = tmp;
3666 	}
3667 
3668 	return tmp;
3669 }
3670 
3671 /* Returns true if the string is safe to dereference from an event */
3672 static bool trace_safe_str(struct trace_iterator *iter, const char *str)
3673 {
3674 	unsigned long addr = (unsigned long)str;
3675 	struct trace_event *trace_event;
3676 	struct trace_event_call *event;
3677 
3678 	/* OK if part of the event data */
3679 	if ((addr >= (unsigned long)iter->ent) &&
3680 	    (addr < (unsigned long)iter->ent + iter->ent_size))
3681 		return true;
3682 
3683 	/* OK if part of the temp seq buffer */
3684 	if ((addr >= (unsigned long)iter->tmp_seq.buffer) &&
3685 	    (addr < (unsigned long)iter->tmp_seq.buffer + TRACE_SEQ_BUFFER_SIZE))
3686 		return true;
3687 
3688 	/* Core rodata can not be freed */
3689 	if (is_kernel_rodata(addr))
3690 		return true;
3691 
3692 	if (trace_is_tracepoint_string(str))
3693 		return true;
3694 
3695 	/*
3696 	 * Now this could be a module event, referencing core module
3697 	 * data, which is OK.
3698 	 */
3699 	if (!iter->ent)
3700 		return false;
3701 
3702 	trace_event = ftrace_find_event(iter->ent->type);
3703 	if (!trace_event)
3704 		return false;
3705 
3706 	event = container_of(trace_event, struct trace_event_call, event);
3707 	if ((event->flags & TRACE_EVENT_FL_DYNAMIC) || !event->module)
3708 		return false;
3709 
3710 	/* Would rather have rodata, but this will suffice */
3711 	if (within_module_core(addr, event->module))
3712 		return true;
3713 
3714 	return false;
3715 }
3716 
3717 /**
3718  * ignore_event - Check dereferenced fields while writing to the seq buffer
3719  * @iter: The iterator that holds the seq buffer and the event being printed
3720  *
3721  * At boot up, test_event_printk() will flag any event that dereferences
3722  * a string with "%s" that does exist in the ring buffer. It may still
3723  * be valid, as the string may point to a static string in the kernel
3724  * rodata that never gets freed. But if the string pointer is pointing
3725  * to something that was allocated, there's a chance that it can be freed
3726  * by the time the user reads the trace. This would cause a bad memory
3727  * access by the kernel and possibly crash the system.
3728  *
3729  * This function will check if the event has any fields flagged as needing
3730  * to be checked at runtime and perform those checks.
3731  *
3732  * If it is found that a field is unsafe, it will write into the @iter->seq
3733  * a message stating what was found to be unsafe.
3734  *
3735  * @return: true if the event is unsafe and should be ignored,
3736  *          false otherwise.
3737  */
3738 bool ignore_event(struct trace_iterator *iter)
3739 {
3740 	struct ftrace_event_field *field;
3741 	struct trace_event *trace_event;
3742 	struct trace_event_call *event;
3743 	struct list_head *head;
3744 	struct trace_seq *seq;
3745 	const void *ptr;
3746 
3747 	trace_event = ftrace_find_event(iter->ent->type);
3748 
3749 	seq = &iter->seq;
3750 
3751 	if (!trace_event) {
3752 		trace_seq_printf(seq, "EVENT ID %d NOT FOUND?\n", iter->ent->type);
3753 		return true;
3754 	}
3755 
3756 	event = container_of(trace_event, struct trace_event_call, event);
3757 	if (!(event->flags & TRACE_EVENT_FL_TEST_STR))
3758 		return false;
3759 
3760 	head = trace_get_fields(event);
3761 	if (!head) {
3762 		trace_seq_printf(seq, "FIELDS FOR EVENT '%s' NOT FOUND?\n",
3763 				 trace_event_name(event));
3764 		return true;
3765 	}
3766 
3767 	/* Offsets are from the iter->ent that points to the raw event */
3768 	ptr = iter->ent;
3769 
3770 	list_for_each_entry(field, head, link) {
3771 		const char *str;
3772 		bool good;
3773 
3774 		if (!field->needs_test)
3775 			continue;
3776 
3777 		str = *(const char **)(ptr + field->offset);
3778 
3779 		good = trace_safe_str(iter, str);
3780 
3781 		/*
3782 		 * If you hit this warning, it is likely that the
3783 		 * trace event in question used %s on a string that
3784 		 * was saved at the time of the event, but may not be
3785 		 * around when the trace is read. Use __string(),
3786 		 * __assign_str() and __get_str() helpers in the TRACE_EVENT()
3787 		 * instead. See samples/trace_events/trace-events-sample.h
3788 		 * for reference.
3789 		 */
3790 		if (WARN_ONCE(!good, "event '%s' has unsafe pointer field '%s'",
3791 			      trace_event_name(event), field->name)) {
3792 			trace_seq_printf(seq, "EVENT %s: HAS UNSAFE POINTER FIELD '%s'\n",
3793 					 trace_event_name(event), field->name);
3794 			return true;
3795 		}
3796 	}
3797 	return false;
3798 }
3799 
3800 const char *trace_event_format(struct trace_iterator *iter, const char *fmt)
3801 {
3802 	const char *p, *new_fmt;
3803 	char *q;
3804 
3805 	if (WARN_ON_ONCE(!fmt))
3806 		return fmt;
3807 
3808 	if (!iter->tr || iter->tr->trace_flags & TRACE_ITER(HASH_PTR))
3809 		return fmt;
3810 
3811 	p = fmt;
3812 	new_fmt = q = iter->fmt;
3813 	while (*p) {
3814 		if (unlikely(q - new_fmt + 3 > iter->fmt_size)) {
3815 			if (!trace_iter_expand_format(iter))
3816 				return fmt;
3817 
3818 			q += iter->fmt - new_fmt;
3819 			new_fmt = iter->fmt;
3820 		}
3821 
3822 		*q++ = *p++;
3823 
3824 		/* Replace %p with %px */
3825 		if (p[-1] == '%') {
3826 			if (p[0] == '%') {
3827 				*q++ = *p++;
3828 			} else if (p[0] == 'p' && !isalnum(p[1])) {
3829 				*q++ = *p++;
3830 				*q++ = 'x';
3831 			}
3832 		}
3833 	}
3834 	*q = '\0';
3835 
3836 	return new_fmt;
3837 }
3838 
3839 #define STATIC_TEMP_BUF_SIZE	128
3840 static char static_temp_buf[STATIC_TEMP_BUF_SIZE] __aligned(4);
3841 
3842 /* Find the next real entry, without updating the iterator itself */
3843 struct trace_entry *trace_find_next_entry(struct trace_iterator *iter,
3844 					  int *ent_cpu, u64 *ent_ts)
3845 {
3846 	/* __find_next_entry will reset ent_size */
3847 	int ent_size = iter->ent_size;
3848 	struct trace_entry *entry;
3849 
3850 	/*
3851 	 * If called from ftrace_dump(), then the iter->temp buffer
3852 	 * will be the static_temp_buf and not created from kmalloc.
3853 	 * If the entry size is greater than the buffer, we can
3854 	 * not save it. Just return NULL in that case. This is only
3855 	 * used to add markers when two consecutive events' time
3856 	 * stamps have a large delta. See trace_print_lat_context()
3857 	 */
3858 	if (iter->temp == static_temp_buf &&
3859 	    STATIC_TEMP_BUF_SIZE < ent_size)
3860 		return NULL;
3861 
3862 	/*
3863 	 * The __find_next_entry() may call peek_next_entry(), which may
3864 	 * call ring_buffer_peek() that may make the contents of iter->ent
3865 	 * undefined. Need to copy iter->ent now.
3866 	 */
3867 	if (iter->ent && iter->ent != iter->temp) {
3868 		if ((!iter->temp || iter->temp_size < iter->ent_size) &&
3869 		    !WARN_ON_ONCE(iter->temp == static_temp_buf)) {
3870 			void *temp;
3871 			temp = kmalloc(iter->ent_size, GFP_KERNEL);
3872 			if (!temp)
3873 				return NULL;
3874 			kfree(iter->temp);
3875 			iter->temp = temp;
3876 			iter->temp_size = iter->ent_size;
3877 		}
3878 		memcpy(iter->temp, iter->ent, iter->ent_size);
3879 		iter->ent = iter->temp;
3880 	}
3881 	entry = __find_next_entry(iter, ent_cpu, NULL, ent_ts);
3882 	/* Put back the original ent_size */
3883 	iter->ent_size = ent_size;
3884 
3885 	return entry;
3886 }
3887 
3888 /* Find the next real entry, and increment the iterator to the next entry */
3889 void *trace_find_next_entry_inc(struct trace_iterator *iter)
3890 {
3891 	iter->ent = __find_next_entry(iter, &iter->cpu,
3892 				      &iter->lost_events, &iter->ts);
3893 
3894 	if (iter->ent)
3895 		trace_iterator_increment(iter);
3896 
3897 	return iter->ent ? iter : NULL;
3898 }
3899 
3900 static void trace_consume(struct trace_iterator *iter)
3901 {
3902 	ring_buffer_consume(iter->array_buffer->buffer, iter->cpu, &iter->ts,
3903 			    &iter->lost_events);
3904 }
3905 
3906 static void *s_next(struct seq_file *m, void *v, loff_t *pos)
3907 {
3908 	struct trace_iterator *iter = m->private;
3909 	int i = (int)*pos;
3910 	void *ent;
3911 
3912 	WARN_ON_ONCE(iter->leftover);
3913 
3914 	(*pos)++;
3915 
3916 	/* can't go backwards */
3917 	if (iter->idx > i)
3918 		return NULL;
3919 
3920 	if (iter->idx < 0)
3921 		ent = trace_find_next_entry_inc(iter);
3922 	else
3923 		ent = iter;
3924 
3925 	while (ent && iter->idx < i)
3926 		ent = trace_find_next_entry_inc(iter);
3927 
3928 	iter->pos = *pos;
3929 
3930 	return ent;
3931 }
3932 
3933 void tracing_iter_reset(struct trace_iterator *iter, int cpu)
3934 {
3935 	struct ring_buffer_iter *buf_iter;
3936 	unsigned long entries = 0;
3937 	u64 ts;
3938 
3939 	per_cpu_ptr(iter->array_buffer->data, cpu)->skipped_entries = 0;
3940 
3941 	buf_iter = trace_buffer_iter(iter, cpu);
3942 	if (!buf_iter)
3943 		return;
3944 
3945 	ring_buffer_iter_reset(buf_iter);
3946 
3947 	/*
3948 	 * We could have the case with the max latency tracers
3949 	 * that a reset never took place on a cpu. This is evident
3950 	 * by the timestamp being before the start of the buffer.
3951 	 */
3952 	while (ring_buffer_iter_peek(buf_iter, &ts)) {
3953 		if (ts >= iter->array_buffer->time_start)
3954 			break;
3955 		entries++;
3956 		ring_buffer_iter_advance(buf_iter);
3957 		/* This could be a big loop */
3958 		cond_resched();
3959 	}
3960 
3961 	per_cpu_ptr(iter->array_buffer->data, cpu)->skipped_entries = entries;
3962 }
3963 
3964 /*
3965  * The current tracer is copied to avoid a global locking
3966  * all around.
3967  */
3968 static void *s_start(struct seq_file *m, loff_t *pos)
3969 {
3970 	struct trace_iterator *iter = m->private;
3971 	struct trace_array *tr = iter->tr;
3972 	int cpu_file = iter->cpu_file;
3973 	void *p = NULL;
3974 	loff_t l = 0;
3975 	int cpu;
3976 
3977 	mutex_lock(&trace_types_lock);
3978 	if (unlikely(tr->current_trace != iter->trace)) {
3979 		/* Close iter->trace before switching to the new current tracer */
3980 		if (iter->trace->close)
3981 			iter->trace->close(iter);
3982 		iter->trace = tr->current_trace;
3983 		/* Reopen the new current tracer */
3984 		if (iter->trace->open)
3985 			iter->trace->open(iter);
3986 	}
3987 	mutex_unlock(&trace_types_lock);
3988 
3989 #ifdef CONFIG_TRACER_MAX_TRACE
3990 	if (iter->snapshot && iter->trace->use_max_tr)
3991 		return ERR_PTR(-EBUSY);
3992 #endif
3993 
3994 	if (*pos != iter->pos) {
3995 		iter->ent = NULL;
3996 		iter->cpu = 0;
3997 		iter->idx = -1;
3998 
3999 		if (cpu_file == RING_BUFFER_ALL_CPUS) {
4000 			for_each_tracing_cpu(cpu)
4001 				tracing_iter_reset(iter, cpu);
4002 		} else
4003 			tracing_iter_reset(iter, cpu_file);
4004 
4005 		iter->leftover = 0;
4006 		for (p = iter; p && l < *pos; p = s_next(m, p, &l))
4007 			;
4008 
4009 	} else {
4010 		/*
4011 		 * If we overflowed the seq_file before, then we want
4012 		 * to just reuse the trace_seq buffer again.
4013 		 */
4014 		if (iter->leftover)
4015 			p = iter;
4016 		else {
4017 			l = *pos - 1;
4018 			p = s_next(m, p, &l);
4019 		}
4020 	}
4021 
4022 	trace_event_read_lock();
4023 	trace_access_lock(cpu_file);
4024 	return p;
4025 }
4026 
4027 static void s_stop(struct seq_file *m, void *p)
4028 {
4029 	struct trace_iterator *iter = m->private;
4030 
4031 #ifdef CONFIG_TRACER_MAX_TRACE
4032 	if (iter->snapshot && iter->trace->use_max_tr)
4033 		return;
4034 #endif
4035 
4036 	trace_access_unlock(iter->cpu_file);
4037 	trace_event_read_unlock();
4038 }
4039 
4040 static void
4041 get_total_entries_cpu(struct array_buffer *buf, unsigned long *total,
4042 		      unsigned long *entries, int cpu)
4043 {
4044 	unsigned long count;
4045 
4046 	count = ring_buffer_entries_cpu(buf->buffer, cpu);
4047 	/*
4048 	 * If this buffer has skipped entries, then we hold all
4049 	 * entries for the trace and we need to ignore the
4050 	 * ones before the time stamp.
4051 	 */
4052 	if (per_cpu_ptr(buf->data, cpu)->skipped_entries) {
4053 		count -= per_cpu_ptr(buf->data, cpu)->skipped_entries;
4054 		/* total is the same as the entries */
4055 		*total = count;
4056 	} else
4057 		*total = count +
4058 			ring_buffer_overrun_cpu(buf->buffer, cpu);
4059 	*entries = count;
4060 }
4061 
4062 static void
4063 get_total_entries(struct array_buffer *buf,
4064 		  unsigned long *total, unsigned long *entries)
4065 {
4066 	unsigned long t, e;
4067 	int cpu;
4068 
4069 	*total = 0;
4070 	*entries = 0;
4071 
4072 	for_each_tracing_cpu(cpu) {
4073 		get_total_entries_cpu(buf, &t, &e, cpu);
4074 		*total += t;
4075 		*entries += e;
4076 	}
4077 }
4078 
4079 unsigned long trace_total_entries_cpu(struct trace_array *tr, int cpu)
4080 {
4081 	unsigned long total, entries;
4082 
4083 	if (!tr)
4084 		tr = &global_trace;
4085 
4086 	get_total_entries_cpu(&tr->array_buffer, &total, &entries, cpu);
4087 
4088 	return entries;
4089 }
4090 
4091 unsigned long trace_total_entries(struct trace_array *tr)
4092 {
4093 	unsigned long total, entries;
4094 
4095 	if (!tr)
4096 		tr = &global_trace;
4097 
4098 	get_total_entries(&tr->array_buffer, &total, &entries);
4099 
4100 	return entries;
4101 }
4102 
4103 static void print_lat_help_header(struct seq_file *m)
4104 {
4105 	seq_puts(m, "#                    _------=> CPU#            \n"
4106 		    "#                   / _-----=> irqs-off/BH-disabled\n"
4107 		    "#                  | / _----=> need-resched    \n"
4108 		    "#                  || / _---=> hardirq/softirq \n"
4109 		    "#                  ||| / _--=> preempt-depth   \n"
4110 		    "#                  |||| / _-=> migrate-disable \n"
4111 		    "#                  ||||| /     delay           \n"
4112 		    "#  cmd     pid     |||||| time  |   caller     \n"
4113 		    "#     \\   /        ||||||  \\    |    /       \n");
4114 }
4115 
4116 static void print_event_info(struct array_buffer *buf, struct seq_file *m)
4117 {
4118 	unsigned long total;
4119 	unsigned long entries;
4120 
4121 	get_total_entries(buf, &total, &entries);
4122 	seq_printf(m, "# entries-in-buffer/entries-written: %lu/%lu   #P:%d\n",
4123 		   entries, total, num_online_cpus());
4124 	seq_puts(m, "#\n");
4125 }
4126 
4127 static void print_func_help_header(struct array_buffer *buf, struct seq_file *m,
4128 				   unsigned int flags)
4129 {
4130 	bool tgid = flags & TRACE_ITER(RECORD_TGID);
4131 
4132 	print_event_info(buf, m);
4133 
4134 	seq_printf(m, "#           TASK-PID    %s CPU#     TIMESTAMP  FUNCTION\n", tgid ? "   TGID   " : "");
4135 	seq_printf(m, "#              | |      %s   |         |         |\n",      tgid ? "     |    " : "");
4136 }
4137 
4138 static void print_func_help_header_irq(struct array_buffer *buf, struct seq_file *m,
4139 				       unsigned int flags)
4140 {
4141 	bool tgid = flags & TRACE_ITER(RECORD_TGID);
4142 	static const char space[] = "            ";
4143 	int prec = tgid ? 12 : 2;
4144 
4145 	print_event_info(buf, m);
4146 
4147 	seq_printf(m, "#                            %.*s  _-----=> irqs-off/BH-disabled\n", prec, space);
4148 	seq_printf(m, "#                            %.*s / _----=> need-resched\n", prec, space);
4149 	seq_printf(m, "#                            %.*s| / _---=> hardirq/softirq\n", prec, space);
4150 	seq_printf(m, "#                            %.*s|| / _--=> preempt-depth\n", prec, space);
4151 	seq_printf(m, "#                            %.*s||| / _-=> migrate-disable\n", prec, space);
4152 	seq_printf(m, "#                            %.*s|||| /     delay\n", prec, space);
4153 	seq_printf(m, "#           TASK-PID  %.*s CPU#  |||||  TIMESTAMP  FUNCTION\n", prec, "     TGID   ");
4154 	seq_printf(m, "#              | |    %.*s   |   |||||     |         |\n", prec, "       |    ");
4155 }
4156 
4157 void
4158 print_trace_header(struct seq_file *m, struct trace_iterator *iter)
4159 {
4160 	unsigned long sym_flags = (global_trace.trace_flags & TRACE_ITER_SYM_MASK);
4161 	struct array_buffer *buf = iter->array_buffer;
4162 	struct trace_array_cpu *data = per_cpu_ptr(buf->data, buf->cpu);
4163 	struct tracer *type = iter->trace;
4164 	unsigned long entries;
4165 	unsigned long total;
4166 	const char *name = type->name;
4167 
4168 	get_total_entries(buf, &total, &entries);
4169 
4170 	seq_printf(m, "# %s latency trace v1.1.5 on %s\n",
4171 		   name, init_utsname()->release);
4172 	seq_puts(m, "# -----------------------------------"
4173 		 "---------------------------------\n");
4174 	seq_printf(m, "# latency: %lu us, #%lu/%lu, CPU#%d |"
4175 		   " (M:%s VP:%d, KP:%d, SP:%d HP:%d",
4176 		   nsecs_to_usecs(data->saved_latency),
4177 		   entries,
4178 		   total,
4179 		   buf->cpu,
4180 		   preempt_model_str(),
4181 		   /* These are reserved for later use */
4182 		   0, 0, 0, 0);
4183 #ifdef CONFIG_SMP
4184 	seq_printf(m, " #P:%d)\n", num_online_cpus());
4185 #else
4186 	seq_puts(m, ")\n");
4187 #endif
4188 	seq_puts(m, "#    -----------------\n");
4189 	seq_printf(m, "#    | task: %.16s-%d "
4190 		   "(uid:%d nice:%ld policy:%ld rt_prio:%ld)\n",
4191 		   data->comm, data->pid,
4192 		   from_kuid_munged(seq_user_ns(m), data->uid), data->nice,
4193 		   data->policy, data->rt_priority);
4194 	seq_puts(m, "#    -----------------\n");
4195 
4196 	if (data->critical_start) {
4197 		seq_puts(m, "#  => started at: ");
4198 		seq_print_ip_sym(&iter->seq, data->critical_start, sym_flags);
4199 		trace_print_seq(m, &iter->seq);
4200 		seq_puts(m, "\n#  => ended at:   ");
4201 		seq_print_ip_sym(&iter->seq, data->critical_end, sym_flags);
4202 		trace_print_seq(m, &iter->seq);
4203 		seq_puts(m, "\n#\n");
4204 	}
4205 
4206 	seq_puts(m, "#\n");
4207 }
4208 
4209 static void test_cpu_buff_start(struct trace_iterator *iter)
4210 {
4211 	struct trace_seq *s = &iter->seq;
4212 	struct trace_array *tr = iter->tr;
4213 
4214 	if (!(tr->trace_flags & TRACE_ITER(ANNOTATE)))
4215 		return;
4216 
4217 	if (!(iter->iter_flags & TRACE_FILE_ANNOTATE))
4218 		return;
4219 
4220 	if (cpumask_available(iter->started) &&
4221 	    cpumask_test_cpu(iter->cpu, iter->started))
4222 		return;
4223 
4224 	if (per_cpu_ptr(iter->array_buffer->data, iter->cpu)->skipped_entries)
4225 		return;
4226 
4227 	if (cpumask_available(iter->started))
4228 		cpumask_set_cpu(iter->cpu, iter->started);
4229 
4230 	/* Don't print started cpu buffer for the first entry of the trace */
4231 	if (iter->idx > 1)
4232 		trace_seq_printf(s, "##### CPU %u buffer started ####\n",
4233 				iter->cpu);
4234 }
4235 
4236 #ifdef CONFIG_FTRACE_SYSCALLS
4237 static bool is_syscall_event(struct trace_event *event)
4238 {
4239 	return (event->funcs == &enter_syscall_print_funcs) ||
4240 	       (event->funcs == &exit_syscall_print_funcs);
4241 
4242 }
4243 #define syscall_buf_size CONFIG_TRACE_SYSCALL_BUF_SIZE_DEFAULT
4244 #else
4245 static inline bool is_syscall_event(struct trace_event *event)
4246 {
4247 	return false;
4248 }
4249 #define syscall_buf_size 0
4250 #endif /* CONFIG_FTRACE_SYSCALLS */
4251 
4252 static enum print_line_t print_trace_fmt(struct trace_iterator *iter)
4253 {
4254 	struct trace_array *tr = iter->tr;
4255 	struct trace_seq *s = &iter->seq;
4256 	unsigned long sym_flags = (tr->trace_flags & TRACE_ITER_SYM_MASK);
4257 	struct trace_entry *entry;
4258 	struct trace_event *event;
4259 
4260 	entry = iter->ent;
4261 
4262 	test_cpu_buff_start(iter);
4263 
4264 	event = ftrace_find_event(entry->type);
4265 
4266 	if (tr->trace_flags & TRACE_ITER(CONTEXT_INFO)) {
4267 		if (iter->iter_flags & TRACE_FILE_LAT_FMT)
4268 			trace_print_lat_context(iter);
4269 		else
4270 			trace_print_context(iter);
4271 	}
4272 
4273 	if (trace_seq_has_overflowed(s))
4274 		return TRACE_TYPE_PARTIAL_LINE;
4275 
4276 	if (event) {
4277 		if (tr->trace_flags & TRACE_ITER(FIELDS))
4278 			return print_event_fields(iter, event);
4279 		/*
4280 		 * For TRACE_EVENT() events, the print_fmt is not
4281 		 * safe to use if the array has delta offsets
4282 		 * Force printing via the fields.
4283 		 */
4284 		if ((tr->text_delta)) {
4285 			/* ftrace and system call events are still OK */
4286 			if ((event->type > __TRACE_LAST_TYPE) &&
4287 			    !is_syscall_event(event))
4288 			return print_event_fields(iter, event);
4289 		}
4290 		return event->funcs->trace(iter, sym_flags, event);
4291 	}
4292 
4293 	trace_seq_printf(s, "Unknown type %d\n", entry->type);
4294 
4295 	return trace_handle_return(s);
4296 }
4297 
4298 static enum print_line_t print_raw_fmt(struct trace_iterator *iter)
4299 {
4300 	struct trace_array *tr = iter->tr;
4301 	struct trace_seq *s = &iter->seq;
4302 	struct trace_entry *entry;
4303 	struct trace_event *event;
4304 
4305 	entry = iter->ent;
4306 
4307 	if (tr->trace_flags & TRACE_ITER(CONTEXT_INFO))
4308 		trace_seq_printf(s, "%d %d %llu ",
4309 				 entry->pid, iter->cpu, iter->ts);
4310 
4311 	if (trace_seq_has_overflowed(s))
4312 		return TRACE_TYPE_PARTIAL_LINE;
4313 
4314 	event = ftrace_find_event(entry->type);
4315 	if (event)
4316 		return event->funcs->raw(iter, 0, event);
4317 
4318 	trace_seq_printf(s, "%d ?\n", entry->type);
4319 
4320 	return trace_handle_return(s);
4321 }
4322 
4323 static enum print_line_t print_hex_fmt(struct trace_iterator *iter)
4324 {
4325 	struct trace_array *tr = iter->tr;
4326 	struct trace_seq *s = &iter->seq;
4327 	unsigned char newline = '\n';
4328 	struct trace_entry *entry;
4329 	struct trace_event *event;
4330 
4331 	entry = iter->ent;
4332 
4333 	if (tr->trace_flags & TRACE_ITER(CONTEXT_INFO)) {
4334 		SEQ_PUT_HEX_FIELD(s, entry->pid);
4335 		SEQ_PUT_HEX_FIELD(s, iter->cpu);
4336 		SEQ_PUT_HEX_FIELD(s, iter->ts);
4337 		if (trace_seq_has_overflowed(s))
4338 			return TRACE_TYPE_PARTIAL_LINE;
4339 	}
4340 
4341 	event = ftrace_find_event(entry->type);
4342 	if (event) {
4343 		enum print_line_t ret = event->funcs->hex(iter, 0, event);
4344 		if (ret != TRACE_TYPE_HANDLED)
4345 			return ret;
4346 	}
4347 
4348 	SEQ_PUT_FIELD(s, newline);
4349 
4350 	return trace_handle_return(s);
4351 }
4352 
4353 static enum print_line_t print_bin_fmt(struct trace_iterator *iter)
4354 {
4355 	struct trace_array *tr = iter->tr;
4356 	struct trace_seq *s = &iter->seq;
4357 	struct trace_entry *entry;
4358 	struct trace_event *event;
4359 
4360 	entry = iter->ent;
4361 
4362 	if (tr->trace_flags & TRACE_ITER(CONTEXT_INFO)) {
4363 		SEQ_PUT_FIELD(s, entry->pid);
4364 		SEQ_PUT_FIELD(s, iter->cpu);
4365 		SEQ_PUT_FIELD(s, iter->ts);
4366 		if (trace_seq_has_overflowed(s))
4367 			return TRACE_TYPE_PARTIAL_LINE;
4368 	}
4369 
4370 	event = ftrace_find_event(entry->type);
4371 	return event ? event->funcs->binary(iter, 0, event) :
4372 		TRACE_TYPE_HANDLED;
4373 }
4374 
4375 int trace_empty(struct trace_iterator *iter)
4376 {
4377 	struct ring_buffer_iter *buf_iter;
4378 	int cpu;
4379 
4380 	/* If we are looking at one CPU buffer, only check that one */
4381 	if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
4382 		cpu = iter->cpu_file;
4383 		buf_iter = trace_buffer_iter(iter, cpu);
4384 		if (buf_iter) {
4385 			if (!ring_buffer_iter_empty(buf_iter))
4386 				return 0;
4387 		} else {
4388 			if (!ring_buffer_empty_cpu(iter->array_buffer->buffer, cpu))
4389 				return 0;
4390 		}
4391 		return 1;
4392 	}
4393 
4394 	for_each_tracing_cpu(cpu) {
4395 		buf_iter = trace_buffer_iter(iter, cpu);
4396 		if (buf_iter) {
4397 			if (!ring_buffer_iter_empty(buf_iter))
4398 				return 0;
4399 		} else {
4400 			if (!ring_buffer_empty_cpu(iter->array_buffer->buffer, cpu))
4401 				return 0;
4402 		}
4403 	}
4404 
4405 	return 1;
4406 }
4407 
4408 /*  Called with trace_event_read_lock() held. */
4409 enum print_line_t print_trace_line(struct trace_iterator *iter)
4410 {
4411 	struct trace_array *tr = iter->tr;
4412 	unsigned long trace_flags = tr->trace_flags;
4413 	enum print_line_t ret;
4414 
4415 	if (iter->lost_events) {
4416 		if (iter->lost_events == (unsigned long)-1)
4417 			trace_seq_printf(&iter->seq, "CPU:%d [LOST EVENTS]\n",
4418 					 iter->cpu);
4419 		else
4420 			trace_seq_printf(&iter->seq, "CPU:%d [LOST %lu EVENTS]\n",
4421 					 iter->cpu, iter->lost_events);
4422 		if (trace_seq_has_overflowed(&iter->seq))
4423 			return TRACE_TYPE_PARTIAL_LINE;
4424 	}
4425 
4426 	if (iter->trace && iter->trace->print_line) {
4427 		ret = iter->trace->print_line(iter);
4428 		if (ret != TRACE_TYPE_UNHANDLED)
4429 			return ret;
4430 	}
4431 
4432 	if (iter->ent->type == TRACE_BPUTS &&
4433 			trace_flags & TRACE_ITER(PRINTK) &&
4434 			trace_flags & TRACE_ITER(PRINTK_MSGONLY))
4435 		return trace_print_bputs_msg_only(iter);
4436 
4437 	if (iter->ent->type == TRACE_BPRINT &&
4438 			trace_flags & TRACE_ITER(PRINTK) &&
4439 			trace_flags & TRACE_ITER(PRINTK_MSGONLY))
4440 		return trace_print_bprintk_msg_only(iter);
4441 
4442 	if (iter->ent->type == TRACE_PRINT &&
4443 			trace_flags & TRACE_ITER(PRINTK) &&
4444 			trace_flags & TRACE_ITER(PRINTK_MSGONLY))
4445 		return trace_print_printk_msg_only(iter);
4446 
4447 	if (trace_flags & TRACE_ITER(BIN))
4448 		return print_bin_fmt(iter);
4449 
4450 	if (trace_flags & TRACE_ITER(HEX))
4451 		return print_hex_fmt(iter);
4452 
4453 	if (trace_flags & TRACE_ITER(RAW))
4454 		return print_raw_fmt(iter);
4455 
4456 	return print_trace_fmt(iter);
4457 }
4458 
4459 void trace_latency_header(struct seq_file *m)
4460 {
4461 	struct trace_iterator *iter = m->private;
4462 	struct trace_array *tr = iter->tr;
4463 
4464 	/* print nothing if the buffers are empty */
4465 	if (trace_empty(iter))
4466 		return;
4467 
4468 	if (iter->iter_flags & TRACE_FILE_LAT_FMT)
4469 		print_trace_header(m, iter);
4470 
4471 	if (!(tr->trace_flags & TRACE_ITER(VERBOSE)))
4472 		print_lat_help_header(m);
4473 }
4474 
4475 void trace_default_header(struct seq_file *m)
4476 {
4477 	struct trace_iterator *iter = m->private;
4478 	struct trace_array *tr = iter->tr;
4479 	unsigned long trace_flags = tr->trace_flags;
4480 
4481 	if (!(trace_flags & TRACE_ITER(CONTEXT_INFO)))
4482 		return;
4483 
4484 	if (iter->iter_flags & TRACE_FILE_LAT_FMT) {
4485 		/* print nothing if the buffers are empty */
4486 		if (trace_empty(iter))
4487 			return;
4488 		print_trace_header(m, iter);
4489 		if (!(trace_flags & TRACE_ITER(VERBOSE)))
4490 			print_lat_help_header(m);
4491 	} else {
4492 		if (!(trace_flags & TRACE_ITER(VERBOSE))) {
4493 			if (trace_flags & TRACE_ITER(IRQ_INFO))
4494 				print_func_help_header_irq(iter->array_buffer,
4495 							   m, trace_flags);
4496 			else
4497 				print_func_help_header(iter->array_buffer, m,
4498 						       trace_flags);
4499 		}
4500 	}
4501 }
4502 
4503 static void test_ftrace_alive(struct seq_file *m)
4504 {
4505 	if (!ftrace_is_dead())
4506 		return;
4507 	seq_puts(m, "# WARNING: FUNCTION TRACING IS CORRUPTED\n"
4508 		    "#          MAY BE MISSING FUNCTION EVENTS\n");
4509 }
4510 
4511 #ifdef CONFIG_TRACER_MAX_TRACE
4512 static void show_snapshot_main_help(struct seq_file *m)
4513 {
4514 	seq_puts(m, "# echo 0 > snapshot : Clears and frees snapshot buffer\n"
4515 		    "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
4516 		    "#                      Takes a snapshot of the main buffer.\n"
4517 		    "# echo 2 > snapshot : Clears snapshot buffer (but does not allocate or free)\n"
4518 		    "#                      (Doesn't have to be '2' works with any number that\n"
4519 		    "#                       is not a '0' or '1')\n");
4520 }
4521 
4522 static void show_snapshot_percpu_help(struct seq_file *m)
4523 {
4524 	seq_puts(m, "# echo 0 > snapshot : Invalid for per_cpu snapshot file.\n");
4525 #ifdef CONFIG_RING_BUFFER_ALLOW_SWAP
4526 	seq_puts(m, "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
4527 		    "#                      Takes a snapshot of the main buffer for this cpu.\n");
4528 #else
4529 	seq_puts(m, "# echo 1 > snapshot : Not supported with this kernel.\n"
4530 		    "#                     Must use main snapshot file to allocate.\n");
4531 #endif
4532 	seq_puts(m, "# echo 2 > snapshot : Clears this cpu's snapshot buffer (but does not allocate)\n"
4533 		    "#                      (Doesn't have to be '2' works with any number that\n"
4534 		    "#                       is not a '0' or '1')\n");
4535 }
4536 
4537 static void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter)
4538 {
4539 	if (iter->tr->allocated_snapshot)
4540 		seq_puts(m, "#\n# * Snapshot is allocated *\n#\n");
4541 	else
4542 		seq_puts(m, "#\n# * Snapshot is freed *\n#\n");
4543 
4544 	seq_puts(m, "# Snapshot commands:\n");
4545 	if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
4546 		show_snapshot_main_help(m);
4547 	else
4548 		show_snapshot_percpu_help(m);
4549 }
4550 #else
4551 /* Should never be called */
4552 static inline void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter) { }
4553 #endif
4554 
4555 static int s_show(struct seq_file *m, void *v)
4556 {
4557 	struct trace_iterator *iter = v;
4558 	int ret;
4559 
4560 	if (iter->ent == NULL) {
4561 		if (iter->tr) {
4562 			seq_printf(m, "# tracer: %s\n", iter->trace->name);
4563 			seq_puts(m, "#\n");
4564 			test_ftrace_alive(m);
4565 		}
4566 		if (iter->snapshot && trace_empty(iter))
4567 			print_snapshot_help(m, iter);
4568 		else if (iter->trace && iter->trace->print_header)
4569 			iter->trace->print_header(m);
4570 		else
4571 			trace_default_header(m);
4572 
4573 	} else if (iter->leftover) {
4574 		/*
4575 		 * If we filled the seq_file buffer earlier, we
4576 		 * want to just show it now.
4577 		 */
4578 		ret = trace_print_seq(m, &iter->seq);
4579 
4580 		/* ret should this time be zero, but you never know */
4581 		iter->leftover = ret;
4582 
4583 	} else {
4584 		ret = print_trace_line(iter);
4585 		if (ret == TRACE_TYPE_PARTIAL_LINE) {
4586 			iter->seq.full = 0;
4587 			trace_seq_puts(&iter->seq, "[LINE TOO BIG]\n");
4588 		}
4589 		ret = trace_print_seq(m, &iter->seq);
4590 		/*
4591 		 * If we overflow the seq_file buffer, then it will
4592 		 * ask us for this data again at start up.
4593 		 * Use that instead.
4594 		 *  ret is 0 if seq_file write succeeded.
4595 		 *        -1 otherwise.
4596 		 */
4597 		iter->leftover = ret;
4598 	}
4599 
4600 	return 0;
4601 }
4602 
4603 /*
4604  * Should be used after trace_array_get(), trace_types_lock
4605  * ensures that i_cdev was already initialized.
4606  */
4607 static inline int tracing_get_cpu(struct inode *inode)
4608 {
4609 	if (inode->i_cdev) /* See trace_create_cpu_file() */
4610 		return (long)inode->i_cdev - 1;
4611 	return RING_BUFFER_ALL_CPUS;
4612 }
4613 
4614 static const struct seq_operations tracer_seq_ops = {
4615 	.start		= s_start,
4616 	.next		= s_next,
4617 	.stop		= s_stop,
4618 	.show		= s_show,
4619 };
4620 
4621 /*
4622  * Note, as iter itself can be allocated and freed in different
4623  * ways, this function is only used to free its content, and not
4624  * the iterator itself. The only requirement to all the allocations
4625  * is that it must zero all fields (kzalloc), as freeing works with
4626  * ethier allocated content or NULL.
4627  */
4628 static void free_trace_iter_content(struct trace_iterator *iter)
4629 {
4630 	/* The fmt is either NULL, allocated or points to static_fmt_buf */
4631 	if (iter->fmt != static_fmt_buf)
4632 		kfree(iter->fmt);
4633 
4634 	kfree(iter->temp);
4635 	kfree(iter->buffer_iter);
4636 	mutex_destroy(&iter->mutex);
4637 	free_cpumask_var(iter->started);
4638 }
4639 
4640 static struct trace_iterator *
4641 __tracing_open(struct inode *inode, struct file *file, bool snapshot)
4642 {
4643 	struct trace_array *tr = inode->i_private;
4644 	struct trace_iterator *iter;
4645 	int cpu;
4646 
4647 	if (tracing_disabled)
4648 		return ERR_PTR(-ENODEV);
4649 
4650 	iter = __seq_open_private(file, &tracer_seq_ops, sizeof(*iter));
4651 	if (!iter)
4652 		return ERR_PTR(-ENOMEM);
4653 
4654 	iter->buffer_iter = kcalloc(nr_cpu_ids, sizeof(*iter->buffer_iter),
4655 				    GFP_KERNEL);
4656 	if (!iter->buffer_iter)
4657 		goto release;
4658 
4659 	/*
4660 	 * trace_find_next_entry() may need to save off iter->ent.
4661 	 * It will place it into the iter->temp buffer. As most
4662 	 * events are less than 128, allocate a buffer of that size.
4663 	 * If one is greater, then trace_find_next_entry() will
4664 	 * allocate a new buffer to adjust for the bigger iter->ent.
4665 	 * It's not critical if it fails to get allocated here.
4666 	 */
4667 	iter->temp = kmalloc(128, GFP_KERNEL);
4668 	if (iter->temp)
4669 		iter->temp_size = 128;
4670 
4671 	/*
4672 	 * trace_event_printf() may need to modify given format
4673 	 * string to replace %p with %px so that it shows real address
4674 	 * instead of hash value. However, that is only for the event
4675 	 * tracing, other tracer may not need. Defer the allocation
4676 	 * until it is needed.
4677 	 */
4678 	iter->fmt = NULL;
4679 	iter->fmt_size = 0;
4680 
4681 	mutex_lock(&trace_types_lock);
4682 	iter->trace = tr->current_trace;
4683 
4684 	if (!zalloc_cpumask_var(&iter->started, GFP_KERNEL))
4685 		goto fail;
4686 
4687 	iter->tr = tr;
4688 
4689 #ifdef CONFIG_TRACER_MAX_TRACE
4690 	/* Currently only the top directory has a snapshot */
4691 	if (tr->current_trace->print_max || snapshot)
4692 		iter->array_buffer = &tr->max_buffer;
4693 	else
4694 #endif
4695 		iter->array_buffer = &tr->array_buffer;
4696 	iter->snapshot = snapshot;
4697 	iter->pos = -1;
4698 	iter->cpu_file = tracing_get_cpu(inode);
4699 	mutex_init(&iter->mutex);
4700 
4701 	/* Notify the tracer early; before we stop tracing. */
4702 	if (iter->trace->open)
4703 		iter->trace->open(iter);
4704 
4705 	/* Annotate start of buffers if we had overruns */
4706 	if (ring_buffer_overruns(iter->array_buffer->buffer))
4707 		iter->iter_flags |= TRACE_FILE_ANNOTATE;
4708 
4709 	/* Output in nanoseconds only if we are using a clock in nanoseconds. */
4710 	if (trace_clocks[tr->clock_id].in_ns)
4711 		iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
4712 
4713 	/*
4714 	 * If pause-on-trace is enabled, then stop the trace while
4715 	 * dumping, unless this is the "snapshot" file
4716 	 */
4717 	if (!iter->snapshot && (tr->trace_flags & TRACE_ITER(PAUSE_ON_TRACE))) {
4718 		iter->iter_flags |= TRACE_FILE_PAUSE;
4719 		tracing_stop_tr(tr);
4720 	}
4721 
4722 	if (iter->cpu_file == RING_BUFFER_ALL_CPUS) {
4723 		for_each_tracing_cpu(cpu) {
4724 			iter->buffer_iter[cpu] =
4725 				ring_buffer_read_start(iter->array_buffer->buffer,
4726 						       cpu, GFP_KERNEL);
4727 			tracing_iter_reset(iter, cpu);
4728 		}
4729 	} else {
4730 		cpu = iter->cpu_file;
4731 		iter->buffer_iter[cpu] =
4732 			ring_buffer_read_start(iter->array_buffer->buffer,
4733 					       cpu, GFP_KERNEL);
4734 		tracing_iter_reset(iter, cpu);
4735 	}
4736 
4737 	mutex_unlock(&trace_types_lock);
4738 
4739 	return iter;
4740 
4741  fail:
4742 	mutex_unlock(&trace_types_lock);
4743 	free_trace_iter_content(iter);
4744 release:
4745 	seq_release_private(inode, file);
4746 	return ERR_PTR(-ENOMEM);
4747 }
4748 
4749 int tracing_open_generic(struct inode *inode, struct file *filp)
4750 {
4751 	int ret;
4752 
4753 	ret = tracing_check_open_get_tr(NULL);
4754 	if (ret)
4755 		return ret;
4756 
4757 	filp->private_data = inode->i_private;
4758 	return 0;
4759 }
4760 
4761 bool tracing_is_disabled(void)
4762 {
4763 	return (tracing_disabled) ? true: false;
4764 }
4765 
4766 /*
4767  * Open and update trace_array ref count.
4768  * Must have the current trace_array passed to it.
4769  */
4770 int tracing_open_generic_tr(struct inode *inode, struct file *filp)
4771 {
4772 	struct trace_array *tr = inode->i_private;
4773 	int ret;
4774 
4775 	ret = tracing_check_open_get_tr(tr);
4776 	if (ret)
4777 		return ret;
4778 
4779 	filp->private_data = inode->i_private;
4780 
4781 	return 0;
4782 }
4783 
4784 /*
4785  * The private pointer of the inode is the trace_event_file.
4786  * Update the tr ref count associated to it.
4787  */
4788 int tracing_open_file_tr(struct inode *inode, struct file *filp)
4789 {
4790 	struct trace_event_file *file = inode->i_private;
4791 	int ret;
4792 
4793 	ret = tracing_check_open_get_tr(file->tr);
4794 	if (ret)
4795 		return ret;
4796 
4797 	guard(mutex)(&event_mutex);
4798 
4799 	/* Fail if the file is marked for removal */
4800 	if (file->flags & EVENT_FILE_FL_FREED) {
4801 		trace_array_put(file->tr);
4802 		return -ENODEV;
4803 	} else {
4804 		event_file_get(file);
4805 	}
4806 
4807 	filp->private_data = inode->i_private;
4808 
4809 	return 0;
4810 }
4811 
4812 int tracing_release_file_tr(struct inode *inode, struct file *filp)
4813 {
4814 	struct trace_event_file *file = inode->i_private;
4815 
4816 	trace_array_put(file->tr);
4817 	event_file_put(file);
4818 
4819 	return 0;
4820 }
4821 
4822 int tracing_single_release_file_tr(struct inode *inode, struct file *filp)
4823 {
4824 	tracing_release_file_tr(inode, filp);
4825 	return single_release(inode, filp);
4826 }
4827 
4828 static int tracing_release(struct inode *inode, struct file *file)
4829 {
4830 	struct trace_array *tr = inode->i_private;
4831 	struct seq_file *m = file->private_data;
4832 	struct trace_iterator *iter;
4833 	int cpu;
4834 
4835 	if (!(file->f_mode & FMODE_READ)) {
4836 		trace_array_put(tr);
4837 		return 0;
4838 	}
4839 
4840 	/* Writes do not use seq_file */
4841 	iter = m->private;
4842 	mutex_lock(&trace_types_lock);
4843 
4844 	for_each_tracing_cpu(cpu) {
4845 		if (iter->buffer_iter[cpu])
4846 			ring_buffer_read_finish(iter->buffer_iter[cpu]);
4847 	}
4848 
4849 	if (iter->trace && iter->trace->close)
4850 		iter->trace->close(iter);
4851 
4852 	if (iter->iter_flags & TRACE_FILE_PAUSE)
4853 		/* reenable tracing if it was previously enabled */
4854 		tracing_start_tr(tr);
4855 
4856 	__trace_array_put(tr);
4857 
4858 	mutex_unlock(&trace_types_lock);
4859 
4860 	free_trace_iter_content(iter);
4861 	seq_release_private(inode, file);
4862 
4863 	return 0;
4864 }
4865 
4866 int tracing_release_generic_tr(struct inode *inode, struct file *file)
4867 {
4868 	struct trace_array *tr = inode->i_private;
4869 
4870 	trace_array_put(tr);
4871 	return 0;
4872 }
4873 
4874 static int tracing_single_release_tr(struct inode *inode, struct file *file)
4875 {
4876 	struct trace_array *tr = inode->i_private;
4877 
4878 	trace_array_put(tr);
4879 
4880 	return single_release(inode, file);
4881 }
4882 
4883 static int tracing_open(struct inode *inode, struct file *file)
4884 {
4885 	struct trace_array *tr = inode->i_private;
4886 	struct trace_iterator *iter;
4887 	int ret;
4888 
4889 	ret = tracing_check_open_get_tr(tr);
4890 	if (ret)
4891 		return ret;
4892 
4893 	/* If this file was open for write, then erase contents */
4894 	if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC)) {
4895 		int cpu = tracing_get_cpu(inode);
4896 		struct array_buffer *trace_buf = &tr->array_buffer;
4897 
4898 #ifdef CONFIG_TRACER_MAX_TRACE
4899 		if (tr->current_trace->print_max)
4900 			trace_buf = &tr->max_buffer;
4901 #endif
4902 
4903 		if (cpu == RING_BUFFER_ALL_CPUS)
4904 			tracing_reset_online_cpus(trace_buf);
4905 		else
4906 			tracing_reset_cpu(trace_buf, cpu);
4907 	}
4908 
4909 	if (file->f_mode & FMODE_READ) {
4910 		iter = __tracing_open(inode, file, false);
4911 		if (IS_ERR(iter))
4912 			ret = PTR_ERR(iter);
4913 		else if (tr->trace_flags & TRACE_ITER(LATENCY_FMT))
4914 			iter->iter_flags |= TRACE_FILE_LAT_FMT;
4915 	}
4916 
4917 	if (ret < 0)
4918 		trace_array_put(tr);
4919 
4920 	return ret;
4921 }
4922 
4923 /*
4924  * Some tracers are not suitable for instance buffers.
4925  * A tracer is always available for the global array (toplevel)
4926  * or if it explicitly states that it is.
4927  */
4928 static bool
4929 trace_ok_for_array(struct tracer *t, struct trace_array *tr)
4930 {
4931 #ifdef CONFIG_TRACER_SNAPSHOT
4932 	/* arrays with mapped buffer range do not have snapshots */
4933 	if (tr->range_addr_start && t->use_max_tr)
4934 		return false;
4935 #endif
4936 	return (tr->flags & TRACE_ARRAY_FL_GLOBAL) || t->allow_instances;
4937 }
4938 
4939 /* Find the next tracer that this trace array may use */
4940 static struct tracer *
4941 get_tracer_for_array(struct trace_array *tr, struct tracer *t)
4942 {
4943 	while (t && !trace_ok_for_array(t, tr))
4944 		t = t->next;
4945 
4946 	return t;
4947 }
4948 
4949 static void *
4950 t_next(struct seq_file *m, void *v, loff_t *pos)
4951 {
4952 	struct trace_array *tr = m->private;
4953 	struct tracer *t = v;
4954 
4955 	(*pos)++;
4956 
4957 	if (t)
4958 		t = get_tracer_for_array(tr, t->next);
4959 
4960 	return t;
4961 }
4962 
4963 static void *t_start(struct seq_file *m, loff_t *pos)
4964 {
4965 	struct trace_array *tr = m->private;
4966 	struct tracer *t;
4967 	loff_t l = 0;
4968 
4969 	mutex_lock(&trace_types_lock);
4970 
4971 	t = get_tracer_for_array(tr, trace_types);
4972 	for (; t && l < *pos; t = t_next(m, t, &l))
4973 			;
4974 
4975 	return t;
4976 }
4977 
4978 static void t_stop(struct seq_file *m, void *p)
4979 {
4980 	mutex_unlock(&trace_types_lock);
4981 }
4982 
4983 static int t_show(struct seq_file *m, void *v)
4984 {
4985 	struct tracer *t = v;
4986 
4987 	if (!t)
4988 		return 0;
4989 
4990 	seq_puts(m, t->name);
4991 	if (t->next)
4992 		seq_putc(m, ' ');
4993 	else
4994 		seq_putc(m, '\n');
4995 
4996 	return 0;
4997 }
4998 
4999 static const struct seq_operations show_traces_seq_ops = {
5000 	.start		= t_start,
5001 	.next		= t_next,
5002 	.stop		= t_stop,
5003 	.show		= t_show,
5004 };
5005 
5006 static int show_traces_open(struct inode *inode, struct file *file)
5007 {
5008 	struct trace_array *tr = inode->i_private;
5009 	struct seq_file *m;
5010 	int ret;
5011 
5012 	ret = tracing_check_open_get_tr(tr);
5013 	if (ret)
5014 		return ret;
5015 
5016 	ret = seq_open(file, &show_traces_seq_ops);
5017 	if (ret) {
5018 		trace_array_put(tr);
5019 		return ret;
5020 	}
5021 
5022 	m = file->private_data;
5023 	m->private = tr;
5024 
5025 	return 0;
5026 }
5027 
5028 static int tracing_seq_release(struct inode *inode, struct file *file)
5029 {
5030 	struct trace_array *tr = inode->i_private;
5031 
5032 	trace_array_put(tr);
5033 	return seq_release(inode, file);
5034 }
5035 
5036 static ssize_t
5037 tracing_write_stub(struct file *filp, const char __user *ubuf,
5038 		   size_t count, loff_t *ppos)
5039 {
5040 	return count;
5041 }
5042 
5043 loff_t tracing_lseek(struct file *file, loff_t offset, int whence)
5044 {
5045 	int ret;
5046 
5047 	if (file->f_mode & FMODE_READ)
5048 		ret = seq_lseek(file, offset, whence);
5049 	else
5050 		file->f_pos = ret = 0;
5051 
5052 	return ret;
5053 }
5054 
5055 static const struct file_operations tracing_fops = {
5056 	.open		= tracing_open,
5057 	.read		= seq_read,
5058 	.read_iter	= seq_read_iter,
5059 	.splice_read	= copy_splice_read,
5060 	.write		= tracing_write_stub,
5061 	.llseek		= tracing_lseek,
5062 	.release	= tracing_release,
5063 };
5064 
5065 static const struct file_operations show_traces_fops = {
5066 	.open		= show_traces_open,
5067 	.read		= seq_read,
5068 	.llseek		= seq_lseek,
5069 	.release	= tracing_seq_release,
5070 };
5071 
5072 static ssize_t
5073 tracing_cpumask_read(struct file *filp, char __user *ubuf,
5074 		     size_t count, loff_t *ppos)
5075 {
5076 	struct trace_array *tr = file_inode(filp)->i_private;
5077 	char *mask_str __free(kfree) = NULL;
5078 	int len;
5079 
5080 	len = snprintf(NULL, 0, "%*pb\n",
5081 		       cpumask_pr_args(tr->tracing_cpumask)) + 1;
5082 	mask_str = kmalloc(len, GFP_KERNEL);
5083 	if (!mask_str)
5084 		return -ENOMEM;
5085 
5086 	len = snprintf(mask_str, len, "%*pb\n",
5087 		       cpumask_pr_args(tr->tracing_cpumask));
5088 	if (len >= count)
5089 		return -EINVAL;
5090 
5091 	return simple_read_from_buffer(ubuf, count, ppos, mask_str, len);
5092 }
5093 
5094 int tracing_set_cpumask(struct trace_array *tr,
5095 			cpumask_var_t tracing_cpumask_new)
5096 {
5097 	int cpu;
5098 
5099 	if (!tr)
5100 		return -EINVAL;
5101 
5102 	local_irq_disable();
5103 	arch_spin_lock(&tr->max_lock);
5104 	for_each_tracing_cpu(cpu) {
5105 		/*
5106 		 * Increase/decrease the disabled counter if we are
5107 		 * about to flip a bit in the cpumask:
5108 		 */
5109 		if (cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
5110 				!cpumask_test_cpu(cpu, tracing_cpumask_new)) {
5111 			ring_buffer_record_disable_cpu(tr->array_buffer.buffer, cpu);
5112 #ifdef CONFIG_TRACER_MAX_TRACE
5113 			ring_buffer_record_disable_cpu(tr->max_buffer.buffer, cpu);
5114 #endif
5115 		}
5116 		if (!cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
5117 				cpumask_test_cpu(cpu, tracing_cpumask_new)) {
5118 			ring_buffer_record_enable_cpu(tr->array_buffer.buffer, cpu);
5119 #ifdef CONFIG_TRACER_MAX_TRACE
5120 			ring_buffer_record_enable_cpu(tr->max_buffer.buffer, cpu);
5121 #endif
5122 		}
5123 	}
5124 	arch_spin_unlock(&tr->max_lock);
5125 	local_irq_enable();
5126 
5127 	cpumask_copy(tr->tracing_cpumask, tracing_cpumask_new);
5128 
5129 	return 0;
5130 }
5131 
5132 static ssize_t
5133 tracing_cpumask_write(struct file *filp, const char __user *ubuf,
5134 		      size_t count, loff_t *ppos)
5135 {
5136 	struct trace_array *tr = file_inode(filp)->i_private;
5137 	cpumask_var_t tracing_cpumask_new;
5138 	int err;
5139 
5140 	if (count == 0 || count > KMALLOC_MAX_SIZE)
5141 		return -EINVAL;
5142 
5143 	if (!zalloc_cpumask_var(&tracing_cpumask_new, GFP_KERNEL))
5144 		return -ENOMEM;
5145 
5146 	err = cpumask_parse_user(ubuf, count, tracing_cpumask_new);
5147 	if (err)
5148 		goto err_free;
5149 
5150 	err = tracing_set_cpumask(tr, tracing_cpumask_new);
5151 	if (err)
5152 		goto err_free;
5153 
5154 	free_cpumask_var(tracing_cpumask_new);
5155 
5156 	return count;
5157 
5158 err_free:
5159 	free_cpumask_var(tracing_cpumask_new);
5160 
5161 	return err;
5162 }
5163 
5164 static const struct file_operations tracing_cpumask_fops = {
5165 	.open		= tracing_open_generic_tr,
5166 	.read		= tracing_cpumask_read,
5167 	.write		= tracing_cpumask_write,
5168 	.release	= tracing_release_generic_tr,
5169 	.llseek		= generic_file_llseek,
5170 };
5171 
5172 static int tracing_trace_options_show(struct seq_file *m, void *v)
5173 {
5174 	struct tracer_opt *trace_opts;
5175 	struct trace_array *tr = m->private;
5176 	struct tracer_flags *flags;
5177 	u32 tracer_flags;
5178 	int i;
5179 
5180 	guard(mutex)(&trace_types_lock);
5181 
5182 	for (i = 0; trace_options[i]; i++) {
5183 		if (tr->trace_flags & (1ULL << i))
5184 			seq_printf(m, "%s\n", trace_options[i]);
5185 		else
5186 			seq_printf(m, "no%s\n", trace_options[i]);
5187 	}
5188 
5189 	flags = tr->current_trace_flags;
5190 	if (!flags || !flags->opts)
5191 		return 0;
5192 
5193 	tracer_flags = flags->val;
5194 	trace_opts = flags->opts;
5195 
5196 	for (i = 0; trace_opts[i].name; i++) {
5197 		if (tracer_flags & trace_opts[i].bit)
5198 			seq_printf(m, "%s\n", trace_opts[i].name);
5199 		else
5200 			seq_printf(m, "no%s\n", trace_opts[i].name);
5201 	}
5202 
5203 	return 0;
5204 }
5205 
5206 static int __set_tracer_option(struct trace_array *tr,
5207 			       struct tracer_flags *tracer_flags,
5208 			       struct tracer_opt *opts, int neg)
5209 {
5210 	struct tracer *trace = tracer_flags->trace;
5211 	int ret = 0;
5212 
5213 	if (trace->set_flag)
5214 		ret = trace->set_flag(tr, tracer_flags->val, opts->bit, !neg);
5215 	if (ret)
5216 		return ret;
5217 
5218 	if (neg)
5219 		tracer_flags->val &= ~opts->bit;
5220 	else
5221 		tracer_flags->val |= opts->bit;
5222 	return 0;
5223 }
5224 
5225 /* Try to assign a tracer specific option */
5226 static int set_tracer_option(struct trace_array *tr, char *cmp, int neg)
5227 {
5228 	struct tracer_flags *tracer_flags = tr->current_trace_flags;
5229 	struct tracer_opt *opts = NULL;
5230 	int i;
5231 
5232 	if (!tracer_flags || !tracer_flags->opts)
5233 		return 0;
5234 
5235 	for (i = 0; tracer_flags->opts[i].name; i++) {
5236 		opts = &tracer_flags->opts[i];
5237 
5238 		if (strcmp(cmp, opts->name) == 0)
5239 			return __set_tracer_option(tr, tracer_flags, opts, neg);
5240 	}
5241 
5242 	return -EINVAL;
5243 }
5244 
5245 /* Some tracers require overwrite to stay enabled */
5246 int trace_keep_overwrite(struct tracer *tracer, u64 mask, int set)
5247 {
5248 	if (tracer->enabled && (mask & TRACE_ITER(OVERWRITE)) && !set)
5249 		return -1;
5250 
5251 	return 0;
5252 }
5253 
5254 int set_tracer_flag(struct trace_array *tr, u64 mask, int enabled)
5255 {
5256 	switch (mask) {
5257 	case TRACE_ITER(RECORD_TGID):
5258 	case TRACE_ITER(RECORD_CMD):
5259 	case TRACE_ITER(TRACE_PRINTK):
5260 	case TRACE_ITER(COPY_MARKER):
5261 		lockdep_assert_held(&event_mutex);
5262 	}
5263 
5264 	/* do nothing if flag is already set */
5265 	if (!!(tr->trace_flags & mask) == !!enabled)
5266 		return 0;
5267 
5268 	/* Give the tracer a chance to approve the change */
5269 	if (tr->current_trace->flag_changed)
5270 		if (tr->current_trace->flag_changed(tr, mask, !!enabled))
5271 			return -EINVAL;
5272 
5273 	switch (mask) {
5274 	case TRACE_ITER(TRACE_PRINTK):
5275 		if (enabled) {
5276 			update_printk_trace(tr);
5277 		} else {
5278 			/*
5279 			 * The global_trace cannot clear this.
5280 			 * It's flag only gets cleared if another instance sets it.
5281 			 */
5282 			if (printk_trace == &global_trace)
5283 				return -EINVAL;
5284 			/*
5285 			 * An instance must always have it set.
5286 			 * by default, that's the global_trace instance.
5287 			 */
5288 			if (printk_trace == tr)
5289 				update_printk_trace(&global_trace);
5290 		}
5291 		break;
5292 
5293 	case TRACE_ITER(COPY_MARKER):
5294 		update_marker_trace(tr, enabled);
5295 		/* update_marker_trace updates the tr->trace_flags */
5296 		return 0;
5297 	}
5298 
5299 	if (enabled)
5300 		tr->trace_flags |= mask;
5301 	else
5302 		tr->trace_flags &= ~mask;
5303 
5304 	switch (mask) {
5305 	case TRACE_ITER(RECORD_CMD):
5306 		trace_event_enable_cmd_record(enabled);
5307 		break;
5308 
5309 	case TRACE_ITER(RECORD_TGID):
5310 
5311 		if (trace_alloc_tgid_map() < 0) {
5312 			tr->trace_flags &= ~TRACE_ITER(RECORD_TGID);
5313 			return -ENOMEM;
5314 		}
5315 
5316 		trace_event_enable_tgid_record(enabled);
5317 		break;
5318 
5319 	case TRACE_ITER(EVENT_FORK):
5320 		trace_event_follow_fork(tr, enabled);
5321 		break;
5322 
5323 	case TRACE_ITER(FUNC_FORK):
5324 		ftrace_pid_follow_fork(tr, enabled);
5325 		break;
5326 
5327 	case TRACE_ITER(OVERWRITE):
5328 		ring_buffer_change_overwrite(tr->array_buffer.buffer, enabled);
5329 #ifdef CONFIG_TRACER_MAX_TRACE
5330 		ring_buffer_change_overwrite(tr->max_buffer.buffer, enabled);
5331 #endif
5332 		break;
5333 
5334 	case TRACE_ITER(PRINTK):
5335 		trace_printk_start_stop_comm(enabled);
5336 		trace_printk_control(enabled);
5337 		break;
5338 
5339 #if defined(CONFIG_FUNCTION_PROFILER) && defined(CONFIG_FUNCTION_GRAPH_TRACER)
5340 	case TRACE_GRAPH_GRAPH_TIME:
5341 		ftrace_graph_graph_time_control(enabled);
5342 		break;
5343 #endif
5344 	}
5345 
5346 	return 0;
5347 }
5348 
5349 int trace_set_options(struct trace_array *tr, char *option)
5350 {
5351 	char *cmp;
5352 	int neg = 0;
5353 	int ret;
5354 	size_t orig_len = strlen(option);
5355 	int len;
5356 
5357 	cmp = strstrip(option);
5358 
5359 	len = str_has_prefix(cmp, "no");
5360 	if (len)
5361 		neg = 1;
5362 
5363 	cmp += len;
5364 
5365 	mutex_lock(&event_mutex);
5366 	mutex_lock(&trace_types_lock);
5367 
5368 	ret = match_string(trace_options, -1, cmp);
5369 	/* If no option could be set, test the specific tracer options */
5370 	if (ret < 0)
5371 		ret = set_tracer_option(tr, cmp, neg);
5372 	else
5373 		ret = set_tracer_flag(tr, 1ULL << ret, !neg);
5374 
5375 	mutex_unlock(&trace_types_lock);
5376 	mutex_unlock(&event_mutex);
5377 
5378 	/*
5379 	 * If the first trailing whitespace is replaced with '\0' by strstrip,
5380 	 * turn it back into a space.
5381 	 */
5382 	if (orig_len > strlen(option))
5383 		option[strlen(option)] = ' ';
5384 
5385 	return ret;
5386 }
5387 
5388 static void __init apply_trace_boot_options(void)
5389 {
5390 	char *buf = trace_boot_options_buf;
5391 	char *option;
5392 
5393 	while (true) {
5394 		option = strsep(&buf, ",");
5395 
5396 		if (!option)
5397 			break;
5398 
5399 		if (*option)
5400 			trace_set_options(&global_trace, option);
5401 
5402 		/* Put back the comma to allow this to be called again */
5403 		if (buf)
5404 			*(buf - 1) = ',';
5405 	}
5406 }
5407 
5408 static ssize_t
5409 tracing_trace_options_write(struct file *filp, const char __user *ubuf,
5410 			size_t cnt, loff_t *ppos)
5411 {
5412 	struct seq_file *m = filp->private_data;
5413 	struct trace_array *tr = m->private;
5414 	char buf[64];
5415 	int ret;
5416 
5417 	if (cnt >= sizeof(buf))
5418 		return -EINVAL;
5419 
5420 	if (copy_from_user(buf, ubuf, cnt))
5421 		return -EFAULT;
5422 
5423 	buf[cnt] = 0;
5424 
5425 	ret = trace_set_options(tr, buf);
5426 	if (ret < 0)
5427 		return ret;
5428 
5429 	*ppos += cnt;
5430 
5431 	return cnt;
5432 }
5433 
5434 static int tracing_trace_options_open(struct inode *inode, struct file *file)
5435 {
5436 	struct trace_array *tr = inode->i_private;
5437 	int ret;
5438 
5439 	ret = tracing_check_open_get_tr(tr);
5440 	if (ret)
5441 		return ret;
5442 
5443 	ret = single_open(file, tracing_trace_options_show, inode->i_private);
5444 	if (ret < 0)
5445 		trace_array_put(tr);
5446 
5447 	return ret;
5448 }
5449 
5450 static const struct file_operations tracing_iter_fops = {
5451 	.open		= tracing_trace_options_open,
5452 	.read		= seq_read,
5453 	.llseek		= seq_lseek,
5454 	.release	= tracing_single_release_tr,
5455 	.write		= tracing_trace_options_write,
5456 };
5457 
5458 static const char readme_msg[] =
5459 	"tracing mini-HOWTO:\n\n"
5460 	"By default tracefs removes all OTH file permission bits.\n"
5461 	"When mounting tracefs an optional group id can be specified\n"
5462 	"which adds the group to every directory and file in tracefs:\n\n"
5463 	"\t e.g. mount -t tracefs [-o [gid=<gid>]] nodev /sys/kernel/tracing\n\n"
5464 	"# echo 0 > tracing_on : quick way to disable tracing\n"
5465 	"# echo 1 > tracing_on : quick way to re-enable tracing\n\n"
5466 	" Important files:\n"
5467 	"  trace\t\t\t- The static contents of the buffer\n"
5468 	"\t\t\t  To clear the buffer write into this file: echo > trace\n"
5469 	"  trace_pipe\t\t- A consuming read to see the contents of the buffer\n"
5470 	"  current_tracer\t- function and latency tracers\n"
5471 	"  available_tracers\t- list of configured tracers for current_tracer\n"
5472 	"  error_log\t- error log for failed commands (that support it)\n"
5473 	"  buffer_size_kb\t- view and modify size of per cpu buffer\n"
5474 	"  buffer_total_size_kb  - view total size of all cpu buffers\n\n"
5475 	"  trace_clock\t\t- change the clock used to order events\n"
5476 	"       local:   Per cpu clock but may not be synced across CPUs\n"
5477 	"      global:   Synced across CPUs but slows tracing down.\n"
5478 	"     counter:   Not a clock, but just an increment\n"
5479 	"      uptime:   Jiffy counter from time of boot\n"
5480 	"        perf:   Same clock that perf events use\n"
5481 #ifdef CONFIG_X86_64
5482 	"     x86-tsc:   TSC cycle counter\n"
5483 #endif
5484 	"\n  timestamp_mode\t- view the mode used to timestamp events\n"
5485 	"       delta:   Delta difference against a buffer-wide timestamp\n"
5486 	"    absolute:   Absolute (standalone) timestamp\n"
5487 	"\n  trace_marker\t\t- Writes into this file writes into the kernel buffer\n"
5488 	"\n  trace_marker_raw\t\t- Writes into this file writes binary data into the kernel buffer\n"
5489 	"  tracing_cpumask\t- Limit which CPUs to trace\n"
5490 	"  instances\t\t- Make sub-buffers with: mkdir instances/foo\n"
5491 	"\t\t\t  Remove sub-buffer with rmdir\n"
5492 	"  trace_options\t\t- Set format or modify how tracing happens\n"
5493 	"\t\t\t  Disable an option by prefixing 'no' to the\n"
5494 	"\t\t\t  option name\n"
5495 	"  saved_cmdlines_size\t- echo command number in here to store comm-pid list\n"
5496 #ifdef CONFIG_DYNAMIC_FTRACE
5497 	"\n  available_filter_functions - list of functions that can be filtered on\n"
5498 	"  set_ftrace_filter\t- echo function name in here to only trace these\n"
5499 	"\t\t\t  functions\n"
5500 	"\t     accepts: func_full_name or glob-matching-pattern\n"
5501 	"\t     modules: Can select a group via module\n"
5502 	"\t      Format: :mod:<module-name>\n"
5503 	"\t     example: echo :mod:ext3 > set_ftrace_filter\n"
5504 	"\t    triggers: a command to perform when function is hit\n"
5505 	"\t      Format: <function>:<trigger>[:count]\n"
5506 	"\t     trigger: traceon, traceoff\n"
5507 	"\t\t      enable_event:<system>:<event>\n"
5508 	"\t\t      disable_event:<system>:<event>\n"
5509 #ifdef CONFIG_STACKTRACE
5510 	"\t\t      stacktrace\n"
5511 #endif
5512 #ifdef CONFIG_TRACER_SNAPSHOT
5513 	"\t\t      snapshot\n"
5514 #endif
5515 	"\t\t      dump\n"
5516 	"\t\t      cpudump\n"
5517 	"\t     example: echo do_fault:traceoff > set_ftrace_filter\n"
5518 	"\t              echo do_trap:traceoff:3 > set_ftrace_filter\n"
5519 	"\t     The first one will disable tracing every time do_fault is hit\n"
5520 	"\t     The second will disable tracing at most 3 times when do_trap is hit\n"
5521 	"\t       The first time do trap is hit and it disables tracing, the\n"
5522 	"\t       counter will decrement to 2. If tracing is already disabled,\n"
5523 	"\t       the counter will not decrement. It only decrements when the\n"
5524 	"\t       trigger did work\n"
5525 	"\t     To remove trigger without count:\n"
5526 	"\t       echo '!<function>:<trigger> > set_ftrace_filter\n"
5527 	"\t     To remove trigger with a count:\n"
5528 	"\t       echo '!<function>:<trigger>:0 > set_ftrace_filter\n"
5529 	"  set_ftrace_notrace\t- echo function name in here to never trace.\n"
5530 	"\t    accepts: func_full_name, *func_end, func_begin*, *func_middle*\n"
5531 	"\t    modules: Can select a group via module command :mod:\n"
5532 	"\t    Does not accept triggers\n"
5533 #endif /* CONFIG_DYNAMIC_FTRACE */
5534 #ifdef CONFIG_FUNCTION_TRACER
5535 	"  set_ftrace_pid\t- Write pid(s) to only function trace those pids\n"
5536 	"\t\t    (function)\n"
5537 	"  set_ftrace_notrace_pid\t- Write pid(s) to not function trace those pids\n"
5538 	"\t\t    (function)\n"
5539 #endif
5540 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
5541 	"  set_graph_function\t- Trace the nested calls of a function (function_graph)\n"
5542 	"  set_graph_notrace\t- Do not trace the nested calls of a function (function_graph)\n"
5543 	"  max_graph_depth\t- Trace a limited depth of nested calls (0 is unlimited)\n"
5544 #endif
5545 #ifdef CONFIG_TRACER_SNAPSHOT
5546 	"\n  snapshot\t\t- Like 'trace' but shows the content of the static\n"
5547 	"\t\t\t  snapshot buffer. Read the contents for more\n"
5548 	"\t\t\t  information\n"
5549 #endif
5550 #ifdef CONFIG_STACK_TRACER
5551 	"  stack_trace\t\t- Shows the max stack trace when active\n"
5552 	"  stack_max_size\t- Shows current max stack size that was traced\n"
5553 	"\t\t\t  Write into this file to reset the max size (trigger a\n"
5554 	"\t\t\t  new trace)\n"
5555 #ifdef CONFIG_DYNAMIC_FTRACE
5556 	"  stack_trace_filter\t- Like set_ftrace_filter but limits what stack_trace\n"
5557 	"\t\t\t  traces\n"
5558 #endif
5559 #endif /* CONFIG_STACK_TRACER */
5560 #ifdef CONFIG_DYNAMIC_EVENTS
5561 	"  dynamic_events\t\t- Create/append/remove/show the generic dynamic events\n"
5562 	"\t\t\t  Write into this file to define/undefine new trace events.\n"
5563 #endif
5564 #ifdef CONFIG_KPROBE_EVENTS
5565 	"  kprobe_events\t\t- Create/append/remove/show the kernel dynamic events\n"
5566 	"\t\t\t  Write into this file to define/undefine new trace events.\n"
5567 #endif
5568 #ifdef CONFIG_UPROBE_EVENTS
5569 	"  uprobe_events\t\t- Create/append/remove/show the userspace dynamic events\n"
5570 	"\t\t\t  Write into this file to define/undefine new trace events.\n"
5571 #endif
5572 #if defined(CONFIG_KPROBE_EVENTS) || defined(CONFIG_UPROBE_EVENTS) || \
5573     defined(CONFIG_FPROBE_EVENTS)
5574 	"\t  accepts: event-definitions (one definition per line)\n"
5575 #if defined(CONFIG_KPROBE_EVENTS) || defined(CONFIG_UPROBE_EVENTS)
5576 	"\t   Format: p[:[<group>/][<event>]] <place> [<args>]\n"
5577 	"\t           r[maxactive][:[<group>/][<event>]] <place> [<args>]\n"
5578 #endif
5579 #ifdef CONFIG_FPROBE_EVENTS
5580 	"\t           f[:[<group>/][<event>]] <func-name>[%return] [<args>]\n"
5581 	"\t           t[:[<group>/][<event>]] <tracepoint> [<args>]\n"
5582 #endif
5583 #ifdef CONFIG_HIST_TRIGGERS
5584 	"\t           s:[synthetic/]<event> <field> [<field>]\n"
5585 #endif
5586 	"\t           e[:[<group>/][<event>]] <attached-group>.<attached-event> [<args>] [if <filter>]\n"
5587 	"\t           -:[<group>/][<event>]\n"
5588 #ifdef CONFIG_KPROBE_EVENTS
5589 	"\t    place: [<module>:]<symbol>[+<offset>]|<memaddr>\n"
5590   "place (kretprobe): [<module>:]<symbol>[+<offset>]%return|<memaddr>\n"
5591 #endif
5592 #ifdef CONFIG_UPROBE_EVENTS
5593   "   place (uprobe): <path>:<offset>[%return][(ref_ctr_offset)]\n"
5594 #endif
5595 	"\t     args: <name>=fetcharg[:type]\n"
5596 	"\t fetcharg: (%<register>|$<efield>), @<address>, @<symbol>[+|-<offset>],\n"
5597 #ifdef CONFIG_HAVE_FUNCTION_ARG_ACCESS_API
5598 	"\t           $stack<index>, $stack, $retval, $comm, $arg<N>,\n"
5599 #ifdef CONFIG_PROBE_EVENTS_BTF_ARGS
5600 	"\t           <argname>[->field[->field|.field...]],\n"
5601 #endif
5602 #else
5603 	"\t           $stack<index>, $stack, $retval, $comm,\n"
5604 #endif
5605 	"\t           +|-[u]<offset>(<fetcharg>), \\imm-value, \\\"imm-string\"\n"
5606 	"\t     kernel return probes support: $retval, $arg<N>, $comm\n"
5607 	"\t     type: s8/16/32/64, u8/16/32/64, x8/16/32/64, char, string, symbol,\n"
5608 	"\t           b<bit-width>@<bit-offset>/<container-size>, ustring,\n"
5609 	"\t           symstr, %pd/%pD, <type>\\[<array-size>\\]\n"
5610 #ifdef CONFIG_HIST_TRIGGERS
5611 	"\t    field: <stype> <name>;\n"
5612 	"\t    stype: u8/u16/u32/u64, s8/s16/s32/s64, pid_t,\n"
5613 	"\t           [unsigned] char/int/long\n"
5614 #endif
5615 	"\t    efield: For event probes ('e' types), the field is on of the fields\n"
5616 	"\t            of the <attached-group>/<attached-event>.\n"
5617 #endif
5618 	"  set_event\t\t- Enables events by name written into it\n"
5619 	"\t\t\t  Can enable module events via: :mod:<module>\n"
5620 	"  events/\t\t- Directory containing all trace event subsystems:\n"
5621 	"      enable\t\t- Write 0/1 to enable/disable tracing of all events\n"
5622 	"  events/<system>/\t- Directory containing all trace events for <system>:\n"
5623 	"      enable\t\t- Write 0/1 to enable/disable tracing of all <system>\n"
5624 	"\t\t\t  events\n"
5625 	"      filter\t\t- If set, only events passing filter are traced\n"
5626 	"  events/<system>/<event>/\t- Directory containing control files for\n"
5627 	"\t\t\t  <event>:\n"
5628 	"      enable\t\t- Write 0/1 to enable/disable tracing of <event>\n"
5629 	"      filter\t\t- If set, only events passing filter are traced\n"
5630 	"      trigger\t\t- If set, a command to perform when event is hit\n"
5631 	"\t    Format: <trigger>[:count][if <filter>]\n"
5632 	"\t   trigger: traceon, traceoff\n"
5633 	"\t            enable_event:<system>:<event>\n"
5634 	"\t            disable_event:<system>:<event>\n"
5635 #ifdef CONFIG_HIST_TRIGGERS
5636 	"\t            enable_hist:<system>:<event>\n"
5637 	"\t            disable_hist:<system>:<event>\n"
5638 #endif
5639 #ifdef CONFIG_STACKTRACE
5640 	"\t\t    stacktrace\n"
5641 #endif
5642 #ifdef CONFIG_TRACER_SNAPSHOT
5643 	"\t\t    snapshot\n"
5644 #endif
5645 #ifdef CONFIG_HIST_TRIGGERS
5646 	"\t\t    hist (see below)\n"
5647 #endif
5648 	"\t   example: echo traceoff > events/block/block_unplug/trigger\n"
5649 	"\t            echo traceoff:3 > events/block/block_unplug/trigger\n"
5650 	"\t            echo 'enable_event:kmem:kmalloc:3 if nr_rq > 1' > \\\n"
5651 	"\t                  events/block/block_unplug/trigger\n"
5652 	"\t   The first disables tracing every time block_unplug is hit.\n"
5653 	"\t   The second disables tracing the first 3 times block_unplug is hit.\n"
5654 	"\t   The third enables the kmalloc event the first 3 times block_unplug\n"
5655 	"\t     is hit and has value of greater than 1 for the 'nr_rq' event field.\n"
5656 	"\t   Like function triggers, the counter is only decremented if it\n"
5657 	"\t    enabled or disabled tracing.\n"
5658 	"\t   To remove a trigger without a count:\n"
5659 	"\t     echo '!<trigger> > <system>/<event>/trigger\n"
5660 	"\t   To remove a trigger with a count:\n"
5661 	"\t     echo '!<trigger>:0 > <system>/<event>/trigger\n"
5662 	"\t   Filters can be ignored when removing a trigger.\n"
5663 #ifdef CONFIG_HIST_TRIGGERS
5664 	"      hist trigger\t- If set, event hits are aggregated into a hash table\n"
5665 	"\t    Format: hist:keys=<field1[,field2,...]>\n"
5666 	"\t            [:<var1>=<field|var_ref|numeric_literal>[,<var2>=...]]\n"
5667 	"\t            [:values=<field1[,field2,...]>]\n"
5668 	"\t            [:sort=<field1[,field2,...]>]\n"
5669 	"\t            [:size=#entries]\n"
5670 	"\t            [:pause][:continue][:clear]\n"
5671 	"\t            [:name=histname1]\n"
5672 	"\t            [:nohitcount]\n"
5673 	"\t            [:<handler>.<action>]\n"
5674 	"\t            [if <filter>]\n\n"
5675 	"\t    Note, special fields can be used as well:\n"
5676 	"\t            common_timestamp - to record current timestamp\n"
5677 	"\t            common_cpu - to record the CPU the event happened on\n"
5678 	"\n"
5679 	"\t    A hist trigger variable can be:\n"
5680 	"\t        - a reference to a field e.g. x=current_timestamp,\n"
5681 	"\t        - a reference to another variable e.g. y=$x,\n"
5682 	"\t        - a numeric literal: e.g. ms_per_sec=1000,\n"
5683 	"\t        - an arithmetic expression: e.g. time_secs=current_timestamp/1000\n"
5684 	"\n"
5685 	"\t    hist trigger arithmetic expressions support addition(+), subtraction(-),\n"
5686 	"\t    multiplication(*) and division(/) operators. An operand can be either a\n"
5687 	"\t    variable reference, field or numeric literal.\n"
5688 	"\n"
5689 	"\t    When a matching event is hit, an entry is added to a hash\n"
5690 	"\t    table using the key(s) and value(s) named, and the value of a\n"
5691 	"\t    sum called 'hitcount' is incremented.  Keys and values\n"
5692 	"\t    correspond to fields in the event's format description.  Keys\n"
5693 	"\t    can be any field, or the special string 'common_stacktrace'.\n"
5694 	"\t    Compound keys consisting of up to two fields can be specified\n"
5695 	"\t    by the 'keys' keyword.  Values must correspond to numeric\n"
5696 	"\t    fields.  Sort keys consisting of up to two fields can be\n"
5697 	"\t    specified using the 'sort' keyword.  The sort direction can\n"
5698 	"\t    be modified by appending '.descending' or '.ascending' to a\n"
5699 	"\t    sort field.  The 'size' parameter can be used to specify more\n"
5700 	"\t    or fewer than the default 2048 entries for the hashtable size.\n"
5701 	"\t    If a hist trigger is given a name using the 'name' parameter,\n"
5702 	"\t    its histogram data will be shared with other triggers of the\n"
5703 	"\t    same name, and trigger hits will update this common data.\n\n"
5704 	"\t    Reading the 'hist' file for the event will dump the hash\n"
5705 	"\t    table in its entirety to stdout.  If there are multiple hist\n"
5706 	"\t    triggers attached to an event, there will be a table for each\n"
5707 	"\t    trigger in the output.  The table displayed for a named\n"
5708 	"\t    trigger will be the same as any other instance having the\n"
5709 	"\t    same name.  The default format used to display a given field\n"
5710 	"\t    can be modified by appending any of the following modifiers\n"
5711 	"\t    to the field name, as applicable:\n\n"
5712 	"\t            .hex        display a number as a hex value\n"
5713 	"\t            .sym        display an address as a symbol\n"
5714 	"\t            .sym-offset display an address as a symbol and offset\n"
5715 	"\t            .execname   display a common_pid as a program name\n"
5716 	"\t            .syscall    display a syscall id as a syscall name\n"
5717 	"\t            .log2       display log2 value rather than raw number\n"
5718 	"\t            .buckets=size  display values in groups of size rather than raw number\n"
5719 	"\t            .usecs      display a common_timestamp in microseconds\n"
5720 	"\t            .percent    display a number of percentage value\n"
5721 	"\t            .graph      display a bar-graph of a value\n\n"
5722 	"\t    The 'pause' parameter can be used to pause an existing hist\n"
5723 	"\t    trigger or to start a hist trigger but not log any events\n"
5724 	"\t    until told to do so.  'continue' can be used to start or\n"
5725 	"\t    restart a paused hist trigger.\n\n"
5726 	"\t    The 'clear' parameter will clear the contents of a running\n"
5727 	"\t    hist trigger and leave its current paused/active state\n"
5728 	"\t    unchanged.\n\n"
5729 	"\t    The 'nohitcount' (or NOHC) parameter will suppress display of\n"
5730 	"\t    raw hitcount in the histogram.\n\n"
5731 	"\t    The enable_hist and disable_hist triggers can be used to\n"
5732 	"\t    have one event conditionally start and stop another event's\n"
5733 	"\t    already-attached hist trigger.  The syntax is analogous to\n"
5734 	"\t    the enable_event and disable_event triggers.\n\n"
5735 	"\t    Hist trigger handlers and actions are executed whenever a\n"
5736 	"\t    a histogram entry is added or updated.  They take the form:\n\n"
5737 	"\t        <handler>.<action>\n\n"
5738 	"\t    The available handlers are:\n\n"
5739 	"\t        onmatch(matching.event)  - invoke on addition or update\n"
5740 	"\t        onmax(var)               - invoke if var exceeds current max\n"
5741 	"\t        onchange(var)            - invoke action if var changes\n\n"
5742 	"\t    The available actions are:\n\n"
5743 	"\t        trace(<synthetic_event>,param list)  - generate synthetic event\n"
5744 	"\t        save(field,...)                      - save current event fields\n"
5745 #ifdef CONFIG_TRACER_SNAPSHOT
5746 	"\t        snapshot()                           - snapshot the trace buffer\n\n"
5747 #endif
5748 #ifdef CONFIG_SYNTH_EVENTS
5749 	"  events/synthetic_events\t- Create/append/remove/show synthetic events\n"
5750 	"\t  Write into this file to define/undefine new synthetic events.\n"
5751 	"\t     example: echo 'myevent u64 lat; char name[]; long[] stack' >> synthetic_events\n"
5752 #endif
5753 #endif
5754 ;
5755 
5756 static ssize_t
5757 tracing_readme_read(struct file *filp, char __user *ubuf,
5758 		       size_t cnt, loff_t *ppos)
5759 {
5760 	return simple_read_from_buffer(ubuf, cnt, ppos,
5761 					readme_msg, strlen(readme_msg));
5762 }
5763 
5764 static const struct file_operations tracing_readme_fops = {
5765 	.open		= tracing_open_generic,
5766 	.read		= tracing_readme_read,
5767 	.llseek		= generic_file_llseek,
5768 };
5769 
5770 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
5771 static union trace_eval_map_item *
5772 update_eval_map(union trace_eval_map_item *ptr)
5773 {
5774 	if (!ptr->map.eval_string) {
5775 		if (ptr->tail.next) {
5776 			ptr = ptr->tail.next;
5777 			/* Set ptr to the next real item (skip head) */
5778 			ptr++;
5779 		} else
5780 			return NULL;
5781 	}
5782 	return ptr;
5783 }
5784 
5785 static void *eval_map_next(struct seq_file *m, void *v, loff_t *pos)
5786 {
5787 	union trace_eval_map_item *ptr = v;
5788 
5789 	/*
5790 	 * Paranoid! If ptr points to end, we don't want to increment past it.
5791 	 * This really should never happen.
5792 	 */
5793 	(*pos)++;
5794 	ptr = update_eval_map(ptr);
5795 	if (WARN_ON_ONCE(!ptr))
5796 		return NULL;
5797 
5798 	ptr++;
5799 	ptr = update_eval_map(ptr);
5800 
5801 	return ptr;
5802 }
5803 
5804 static void *eval_map_start(struct seq_file *m, loff_t *pos)
5805 {
5806 	union trace_eval_map_item *v;
5807 	loff_t l = 0;
5808 
5809 	mutex_lock(&trace_eval_mutex);
5810 
5811 	v = trace_eval_maps;
5812 	if (v)
5813 		v++;
5814 
5815 	while (v && l < *pos) {
5816 		v = eval_map_next(m, v, &l);
5817 	}
5818 
5819 	return v;
5820 }
5821 
5822 static void eval_map_stop(struct seq_file *m, void *v)
5823 {
5824 	mutex_unlock(&trace_eval_mutex);
5825 }
5826 
5827 static int eval_map_show(struct seq_file *m, void *v)
5828 {
5829 	union trace_eval_map_item *ptr = v;
5830 
5831 	seq_printf(m, "%s %ld (%s)\n",
5832 		   ptr->map.eval_string, ptr->map.eval_value,
5833 		   ptr->map.system);
5834 
5835 	return 0;
5836 }
5837 
5838 static const struct seq_operations tracing_eval_map_seq_ops = {
5839 	.start		= eval_map_start,
5840 	.next		= eval_map_next,
5841 	.stop		= eval_map_stop,
5842 	.show		= eval_map_show,
5843 };
5844 
5845 static int tracing_eval_map_open(struct inode *inode, struct file *filp)
5846 {
5847 	int ret;
5848 
5849 	ret = tracing_check_open_get_tr(NULL);
5850 	if (ret)
5851 		return ret;
5852 
5853 	return seq_open(filp, &tracing_eval_map_seq_ops);
5854 }
5855 
5856 static const struct file_operations tracing_eval_map_fops = {
5857 	.open		= tracing_eval_map_open,
5858 	.read		= seq_read,
5859 	.llseek		= seq_lseek,
5860 	.release	= seq_release,
5861 };
5862 
5863 static inline union trace_eval_map_item *
5864 trace_eval_jmp_to_tail(union trace_eval_map_item *ptr)
5865 {
5866 	/* Return tail of array given the head */
5867 	return ptr + ptr->head.length + 1;
5868 }
5869 
5870 static void
5871 trace_insert_eval_map_file(struct module *mod, struct trace_eval_map **start,
5872 			   int len)
5873 {
5874 	struct trace_eval_map **stop;
5875 	struct trace_eval_map **map;
5876 	union trace_eval_map_item *map_array;
5877 	union trace_eval_map_item *ptr;
5878 
5879 	stop = start + len;
5880 
5881 	/*
5882 	 * The trace_eval_maps contains the map plus a head and tail item,
5883 	 * where the head holds the module and length of array, and the
5884 	 * tail holds a pointer to the next list.
5885 	 */
5886 	map_array = kmalloc_array(len + 2, sizeof(*map_array), GFP_KERNEL);
5887 	if (!map_array) {
5888 		pr_warn("Unable to allocate trace eval mapping\n");
5889 		return;
5890 	}
5891 
5892 	guard(mutex)(&trace_eval_mutex);
5893 
5894 	if (!trace_eval_maps)
5895 		trace_eval_maps = map_array;
5896 	else {
5897 		ptr = trace_eval_maps;
5898 		for (;;) {
5899 			ptr = trace_eval_jmp_to_tail(ptr);
5900 			if (!ptr->tail.next)
5901 				break;
5902 			ptr = ptr->tail.next;
5903 
5904 		}
5905 		ptr->tail.next = map_array;
5906 	}
5907 	map_array->head.mod = mod;
5908 	map_array->head.length = len;
5909 	map_array++;
5910 
5911 	for (map = start; (unsigned long)map < (unsigned long)stop; map++) {
5912 		map_array->map = **map;
5913 		map_array++;
5914 	}
5915 	memset(map_array, 0, sizeof(*map_array));
5916 }
5917 
5918 static void trace_create_eval_file(struct dentry *d_tracer)
5919 {
5920 	trace_create_file("eval_map", TRACE_MODE_READ, d_tracer,
5921 			  NULL, &tracing_eval_map_fops);
5922 }
5923 
5924 #else /* CONFIG_TRACE_EVAL_MAP_FILE */
5925 static inline void trace_create_eval_file(struct dentry *d_tracer) { }
5926 static inline void trace_insert_eval_map_file(struct module *mod,
5927 			      struct trace_eval_map **start, int len) { }
5928 #endif /* !CONFIG_TRACE_EVAL_MAP_FILE */
5929 
5930 static void
5931 trace_event_update_with_eval_map(struct module *mod,
5932 				 struct trace_eval_map **start,
5933 				 int len)
5934 {
5935 	struct trace_eval_map **map;
5936 
5937 	/* Always run sanitizer only if btf_type_tag attr exists. */
5938 	if (len <= 0) {
5939 		if (!(IS_ENABLED(CONFIG_DEBUG_INFO_BTF) &&
5940 		      IS_ENABLED(CONFIG_PAHOLE_HAS_BTF_TAG) &&
5941 		      __has_attribute(btf_type_tag)))
5942 			return;
5943 	}
5944 
5945 	map = start;
5946 
5947 	trace_event_update_all(map, len);
5948 
5949 	if (len <= 0)
5950 		return;
5951 
5952 	trace_insert_eval_map_file(mod, start, len);
5953 }
5954 
5955 static ssize_t
5956 tracing_set_trace_read(struct file *filp, char __user *ubuf,
5957 		       size_t cnt, loff_t *ppos)
5958 {
5959 	struct trace_array *tr = filp->private_data;
5960 	char buf[MAX_TRACER_SIZE+2];
5961 	int r;
5962 
5963 	scoped_guard(mutex, &trace_types_lock) {
5964 		r = sprintf(buf, "%s\n", tr->current_trace->name);
5965 	}
5966 
5967 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
5968 }
5969 
5970 int tracer_init(struct tracer *t, struct trace_array *tr)
5971 {
5972 	tracing_reset_online_cpus(&tr->array_buffer);
5973 	return t->init(tr);
5974 }
5975 
5976 static void set_buffer_entries(struct array_buffer *buf, unsigned long val)
5977 {
5978 	int cpu;
5979 
5980 	for_each_tracing_cpu(cpu)
5981 		per_cpu_ptr(buf->data, cpu)->entries = val;
5982 }
5983 
5984 static void update_buffer_entries(struct array_buffer *buf, int cpu)
5985 {
5986 	if (cpu == RING_BUFFER_ALL_CPUS) {
5987 		set_buffer_entries(buf, ring_buffer_size(buf->buffer, 0));
5988 	} else {
5989 		per_cpu_ptr(buf->data, cpu)->entries = ring_buffer_size(buf->buffer, cpu);
5990 	}
5991 }
5992 
5993 #ifdef CONFIG_TRACER_MAX_TRACE
5994 /* resize @tr's buffer to the size of @size_tr's entries */
5995 static int resize_buffer_duplicate_size(struct array_buffer *trace_buf,
5996 					struct array_buffer *size_buf, int cpu_id)
5997 {
5998 	int cpu, ret = 0;
5999 
6000 	if (cpu_id == RING_BUFFER_ALL_CPUS) {
6001 		for_each_tracing_cpu(cpu) {
6002 			ret = ring_buffer_resize(trace_buf->buffer,
6003 				 per_cpu_ptr(size_buf->data, cpu)->entries, cpu);
6004 			if (ret < 0)
6005 				break;
6006 			per_cpu_ptr(trace_buf->data, cpu)->entries =
6007 				per_cpu_ptr(size_buf->data, cpu)->entries;
6008 		}
6009 	} else {
6010 		ret = ring_buffer_resize(trace_buf->buffer,
6011 				 per_cpu_ptr(size_buf->data, cpu_id)->entries, cpu_id);
6012 		if (ret == 0)
6013 			per_cpu_ptr(trace_buf->data, cpu_id)->entries =
6014 				per_cpu_ptr(size_buf->data, cpu_id)->entries;
6015 	}
6016 
6017 	return ret;
6018 }
6019 #endif /* CONFIG_TRACER_MAX_TRACE */
6020 
6021 static int __tracing_resize_ring_buffer(struct trace_array *tr,
6022 					unsigned long size, int cpu)
6023 {
6024 	int ret;
6025 
6026 	/*
6027 	 * If kernel or user changes the size of the ring buffer
6028 	 * we use the size that was given, and we can forget about
6029 	 * expanding it later.
6030 	 */
6031 	trace_set_ring_buffer_expanded(tr);
6032 
6033 	/* May be called before buffers are initialized */
6034 	if (!tr->array_buffer.buffer)
6035 		return 0;
6036 
6037 	/* Do not allow tracing while resizing ring buffer */
6038 	tracing_stop_tr(tr);
6039 
6040 	ret = ring_buffer_resize(tr->array_buffer.buffer, size, cpu);
6041 	if (ret < 0)
6042 		goto out_start;
6043 
6044 #ifdef CONFIG_TRACER_MAX_TRACE
6045 	if (!tr->allocated_snapshot)
6046 		goto out;
6047 
6048 	ret = ring_buffer_resize(tr->max_buffer.buffer, size, cpu);
6049 	if (ret < 0) {
6050 		int r = resize_buffer_duplicate_size(&tr->array_buffer,
6051 						     &tr->array_buffer, cpu);
6052 		if (r < 0) {
6053 			/*
6054 			 * AARGH! We are left with different
6055 			 * size max buffer!!!!
6056 			 * The max buffer is our "snapshot" buffer.
6057 			 * When a tracer needs a snapshot (one of the
6058 			 * latency tracers), it swaps the max buffer
6059 			 * with the saved snap shot. We succeeded to
6060 			 * update the size of the main buffer, but failed to
6061 			 * update the size of the max buffer. But when we tried
6062 			 * to reset the main buffer to the original size, we
6063 			 * failed there too. This is very unlikely to
6064 			 * happen, but if it does, warn and kill all
6065 			 * tracing.
6066 			 */
6067 			WARN_ON(1);
6068 			tracing_disabled = 1;
6069 		}
6070 		goto out_start;
6071 	}
6072 
6073 	update_buffer_entries(&tr->max_buffer, cpu);
6074 
6075  out:
6076 #endif /* CONFIG_TRACER_MAX_TRACE */
6077 
6078 	update_buffer_entries(&tr->array_buffer, cpu);
6079  out_start:
6080 	tracing_start_tr(tr);
6081 	return ret;
6082 }
6083 
6084 ssize_t tracing_resize_ring_buffer(struct trace_array *tr,
6085 				  unsigned long size, int cpu_id)
6086 {
6087 	guard(mutex)(&trace_types_lock);
6088 
6089 	if (cpu_id != RING_BUFFER_ALL_CPUS) {
6090 		/* make sure, this cpu is enabled in the mask */
6091 		if (!cpumask_test_cpu(cpu_id, tracing_buffer_mask))
6092 			return -EINVAL;
6093 	}
6094 
6095 	return __tracing_resize_ring_buffer(tr, size, cpu_id);
6096 }
6097 
6098 struct trace_mod_entry {
6099 	unsigned long	mod_addr;
6100 	char		mod_name[MODULE_NAME_LEN];
6101 };
6102 
6103 struct trace_scratch {
6104 	unsigned int		clock_id;
6105 	unsigned long		text_addr;
6106 	unsigned long		nr_entries;
6107 	struct trace_mod_entry	entries[];
6108 };
6109 
6110 static DEFINE_MUTEX(scratch_mutex);
6111 
6112 static int cmp_mod_entry(const void *key, const void *pivot)
6113 {
6114 	unsigned long addr = (unsigned long)key;
6115 	const struct trace_mod_entry *ent = pivot;
6116 
6117 	if (addr < ent[0].mod_addr)
6118 		return -1;
6119 
6120 	return addr >= ent[1].mod_addr;
6121 }
6122 
6123 /**
6124  * trace_adjust_address() - Adjust prev boot address to current address.
6125  * @tr: Persistent ring buffer's trace_array.
6126  * @addr: Address in @tr which is adjusted.
6127  */
6128 unsigned long trace_adjust_address(struct trace_array *tr, unsigned long addr)
6129 {
6130 	struct trace_module_delta *module_delta;
6131 	struct trace_scratch *tscratch;
6132 	struct trace_mod_entry *entry;
6133 	unsigned long raddr;
6134 	int idx = 0, nr_entries;
6135 
6136 	/* If we don't have last boot delta, return the address */
6137 	if (!(tr->flags & TRACE_ARRAY_FL_LAST_BOOT))
6138 		return addr;
6139 
6140 	/* tr->module_delta must be protected by rcu. */
6141 	guard(rcu)();
6142 	tscratch = tr->scratch;
6143 	/* if there is no tscrach, module_delta must be NULL. */
6144 	module_delta = READ_ONCE(tr->module_delta);
6145 	if (!module_delta || !tscratch->nr_entries ||
6146 	    tscratch->entries[0].mod_addr > addr) {
6147 		raddr = addr + tr->text_delta;
6148 		return __is_kernel(raddr) || is_kernel_core_data(raddr) ||
6149 			is_kernel_rodata(raddr) ? raddr : addr;
6150 	}
6151 
6152 	/* Note that entries must be sorted. */
6153 	nr_entries = tscratch->nr_entries;
6154 	if (nr_entries == 1 ||
6155 	    tscratch->entries[nr_entries - 1].mod_addr < addr)
6156 		idx = nr_entries - 1;
6157 	else {
6158 		entry = __inline_bsearch((void *)addr,
6159 				tscratch->entries,
6160 				nr_entries - 1,
6161 				sizeof(tscratch->entries[0]),
6162 				cmp_mod_entry);
6163 		if (entry)
6164 			idx = entry - tscratch->entries;
6165 	}
6166 
6167 	return addr + module_delta->delta[idx];
6168 }
6169 
6170 #ifdef CONFIG_MODULES
6171 static int save_mod(struct module *mod, void *data)
6172 {
6173 	struct trace_array *tr = data;
6174 	struct trace_scratch *tscratch;
6175 	struct trace_mod_entry *entry;
6176 	unsigned int size;
6177 
6178 	tscratch = tr->scratch;
6179 	if (!tscratch)
6180 		return -1;
6181 	size = tr->scratch_size;
6182 
6183 	if (struct_size(tscratch, entries, tscratch->nr_entries + 1) > size)
6184 		return -1;
6185 
6186 	entry = &tscratch->entries[tscratch->nr_entries];
6187 
6188 	tscratch->nr_entries++;
6189 
6190 	entry->mod_addr = (unsigned long)mod->mem[MOD_TEXT].base;
6191 	strscpy(entry->mod_name, mod->name);
6192 
6193 	return 0;
6194 }
6195 #else
6196 static int save_mod(struct module *mod, void *data)
6197 {
6198 	return 0;
6199 }
6200 #endif
6201 
6202 static void update_last_data(struct trace_array *tr)
6203 {
6204 	struct trace_module_delta *module_delta;
6205 	struct trace_scratch *tscratch;
6206 
6207 	if (!(tr->flags & TRACE_ARRAY_FL_BOOT))
6208 		return;
6209 
6210 	if (!(tr->flags & TRACE_ARRAY_FL_LAST_BOOT))
6211 		return;
6212 
6213 	/* Only if the buffer has previous boot data clear and update it. */
6214 	tr->flags &= ~TRACE_ARRAY_FL_LAST_BOOT;
6215 
6216 	/* Reset the module list and reload them */
6217 	if (tr->scratch) {
6218 		struct trace_scratch *tscratch = tr->scratch;
6219 
6220 		tscratch->clock_id = tr->clock_id;
6221 		memset(tscratch->entries, 0,
6222 		       flex_array_size(tscratch, entries, tscratch->nr_entries));
6223 		tscratch->nr_entries = 0;
6224 
6225 		guard(mutex)(&scratch_mutex);
6226 		module_for_each_mod(save_mod, tr);
6227 	}
6228 
6229 	/*
6230 	 * Need to clear all CPU buffers as there cannot be events
6231 	 * from the previous boot mixed with events with this boot
6232 	 * as that will cause a confusing trace. Need to clear all
6233 	 * CPU buffers, even for those that may currently be offline.
6234 	 */
6235 	tracing_reset_all_cpus(&tr->array_buffer);
6236 
6237 	/* Using current data now */
6238 	tr->text_delta = 0;
6239 
6240 	if (!tr->scratch)
6241 		return;
6242 
6243 	tscratch = tr->scratch;
6244 	module_delta = READ_ONCE(tr->module_delta);
6245 	WRITE_ONCE(tr->module_delta, NULL);
6246 	kfree_rcu(module_delta, rcu);
6247 
6248 	/* Set the persistent ring buffer meta data to this address */
6249 	tscratch->text_addr = (unsigned long)_text;
6250 }
6251 
6252 /**
6253  * tracing_update_buffers - used by tracing facility to expand ring buffers
6254  * @tr: The tracing instance
6255  *
6256  * To save on memory when the tracing is never used on a system with it
6257  * configured in. The ring buffers are set to a minimum size. But once
6258  * a user starts to use the tracing facility, then they need to grow
6259  * to their default size.
6260  *
6261  * This function is to be called when a tracer is about to be used.
6262  */
6263 int tracing_update_buffers(struct trace_array *tr)
6264 {
6265 	int ret = 0;
6266 
6267 	guard(mutex)(&trace_types_lock);
6268 
6269 	update_last_data(tr);
6270 
6271 	if (!tr->ring_buffer_expanded)
6272 		ret = __tracing_resize_ring_buffer(tr, trace_buf_size,
6273 						RING_BUFFER_ALL_CPUS);
6274 	return ret;
6275 }
6276 
6277 /*
6278  * Used to clear out the tracer before deletion of an instance.
6279  * Must have trace_types_lock held.
6280  */
6281 static void tracing_set_nop(struct trace_array *tr)
6282 {
6283 	if (tr->current_trace == &nop_trace)
6284 		return;
6285 
6286 	tr->current_trace->enabled--;
6287 
6288 	if (tr->current_trace->reset)
6289 		tr->current_trace->reset(tr);
6290 
6291 	tr->current_trace = &nop_trace;
6292 	tr->current_trace_flags = nop_trace.flags;
6293 }
6294 
6295 static bool tracer_options_updated;
6296 
6297 int tracing_set_tracer(struct trace_array *tr, const char *buf)
6298 {
6299 	struct tracer *trace = NULL;
6300 	struct tracers *t;
6301 #ifdef CONFIG_TRACER_MAX_TRACE
6302 	bool had_max_tr;
6303 #endif
6304 	int ret;
6305 
6306 	guard(mutex)(&trace_types_lock);
6307 
6308 	update_last_data(tr);
6309 
6310 	if (!tr->ring_buffer_expanded) {
6311 		ret = __tracing_resize_ring_buffer(tr, trace_buf_size,
6312 						RING_BUFFER_ALL_CPUS);
6313 		if (ret < 0)
6314 			return ret;
6315 		ret = 0;
6316 	}
6317 
6318 	list_for_each_entry(t, &tr->tracers, list) {
6319 		if (strcmp(t->tracer->name, buf) == 0) {
6320 			trace = t->tracer;
6321 			break;
6322 		}
6323 	}
6324 	if (!trace)
6325 		return -EINVAL;
6326 
6327 	if (trace == tr->current_trace)
6328 		return 0;
6329 
6330 #ifdef CONFIG_TRACER_SNAPSHOT
6331 	if (trace->use_max_tr) {
6332 		local_irq_disable();
6333 		arch_spin_lock(&tr->max_lock);
6334 		ret = tr->cond_snapshot ? -EBUSY : 0;
6335 		arch_spin_unlock(&tr->max_lock);
6336 		local_irq_enable();
6337 		if (ret)
6338 			return ret;
6339 	}
6340 #endif
6341 	/* Some tracers won't work on kernel command line */
6342 	if (system_state < SYSTEM_RUNNING && trace->noboot) {
6343 		pr_warn("Tracer '%s' is not allowed on command line, ignored\n",
6344 			trace->name);
6345 		return -EINVAL;
6346 	}
6347 
6348 	/* Some tracers are only allowed for the top level buffer */
6349 	if (!trace_ok_for_array(trace, tr))
6350 		return -EINVAL;
6351 
6352 	/* If trace pipe files are being read, we can't change the tracer */
6353 	if (tr->trace_ref)
6354 		return -EBUSY;
6355 
6356 	trace_branch_disable();
6357 
6358 	tr->current_trace->enabled--;
6359 
6360 	if (tr->current_trace->reset)
6361 		tr->current_trace->reset(tr);
6362 
6363 #ifdef CONFIG_TRACER_MAX_TRACE
6364 	had_max_tr = tr->current_trace->use_max_tr;
6365 
6366 	/* Current trace needs to be nop_trace before synchronize_rcu */
6367 	tr->current_trace = &nop_trace;
6368 	tr->current_trace_flags = nop_trace.flags;
6369 
6370 	if (had_max_tr && !trace->use_max_tr) {
6371 		/*
6372 		 * We need to make sure that the update_max_tr sees that
6373 		 * current_trace changed to nop_trace to keep it from
6374 		 * swapping the buffers after we resize it.
6375 		 * The update_max_tr is called from interrupts disabled
6376 		 * so a synchronized_sched() is sufficient.
6377 		 */
6378 		synchronize_rcu();
6379 		free_snapshot(tr);
6380 		tracing_disarm_snapshot(tr);
6381 	}
6382 
6383 	if (!had_max_tr && trace->use_max_tr) {
6384 		ret = tracing_arm_snapshot_locked(tr);
6385 		if (ret)
6386 			return ret;
6387 	}
6388 #else
6389 	tr->current_trace = &nop_trace;
6390 #endif
6391 
6392 	tr->current_trace_flags = t->flags ? : t->tracer->flags;
6393 
6394 	if (trace->init) {
6395 		ret = tracer_init(trace, tr);
6396 		if (ret) {
6397 #ifdef CONFIG_TRACER_MAX_TRACE
6398 			if (trace->use_max_tr)
6399 				tracing_disarm_snapshot(tr);
6400 #endif
6401 			tr->current_trace_flags = nop_trace.flags;
6402 			return ret;
6403 		}
6404 	}
6405 
6406 	tr->current_trace = trace;
6407 	tr->current_trace->enabled++;
6408 	trace_branch_enable(tr);
6409 
6410 	return 0;
6411 }
6412 
6413 static ssize_t
6414 tracing_set_trace_write(struct file *filp, const char __user *ubuf,
6415 			size_t cnt, loff_t *ppos)
6416 {
6417 	struct trace_array *tr = filp->private_data;
6418 	char buf[MAX_TRACER_SIZE+1];
6419 	char *name;
6420 	size_t ret;
6421 	int err;
6422 
6423 	ret = cnt;
6424 
6425 	if (cnt > MAX_TRACER_SIZE)
6426 		cnt = MAX_TRACER_SIZE;
6427 
6428 	if (copy_from_user(buf, ubuf, cnt))
6429 		return -EFAULT;
6430 
6431 	buf[cnt] = 0;
6432 
6433 	name = strim(buf);
6434 
6435 	err = tracing_set_tracer(tr, name);
6436 	if (err)
6437 		return err;
6438 
6439 	*ppos += ret;
6440 
6441 	return ret;
6442 }
6443 
6444 static ssize_t
6445 tracing_nsecs_read(unsigned long *ptr, char __user *ubuf,
6446 		   size_t cnt, loff_t *ppos)
6447 {
6448 	char buf[64];
6449 	int r;
6450 
6451 	r = snprintf(buf, sizeof(buf), "%ld\n",
6452 		     *ptr == (unsigned long)-1 ? -1 : nsecs_to_usecs(*ptr));
6453 	if (r > sizeof(buf))
6454 		r = sizeof(buf);
6455 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6456 }
6457 
6458 static ssize_t
6459 tracing_nsecs_write(unsigned long *ptr, const char __user *ubuf,
6460 		    size_t cnt, loff_t *ppos)
6461 {
6462 	unsigned long val;
6463 	int ret;
6464 
6465 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
6466 	if (ret)
6467 		return ret;
6468 
6469 	*ptr = val * 1000;
6470 
6471 	return cnt;
6472 }
6473 
6474 static ssize_t
6475 tracing_thresh_read(struct file *filp, char __user *ubuf,
6476 		    size_t cnt, loff_t *ppos)
6477 {
6478 	return tracing_nsecs_read(&tracing_thresh, ubuf, cnt, ppos);
6479 }
6480 
6481 static ssize_t
6482 tracing_thresh_write(struct file *filp, const char __user *ubuf,
6483 		     size_t cnt, loff_t *ppos)
6484 {
6485 	struct trace_array *tr = filp->private_data;
6486 	int ret;
6487 
6488 	guard(mutex)(&trace_types_lock);
6489 	ret = tracing_nsecs_write(&tracing_thresh, ubuf, cnt, ppos);
6490 	if (ret < 0)
6491 		return ret;
6492 
6493 	if (tr->current_trace->update_thresh) {
6494 		ret = tr->current_trace->update_thresh(tr);
6495 		if (ret < 0)
6496 			return ret;
6497 	}
6498 
6499 	return cnt;
6500 }
6501 
6502 #ifdef CONFIG_TRACER_MAX_TRACE
6503 
6504 static ssize_t
6505 tracing_max_lat_read(struct file *filp, char __user *ubuf,
6506 		     size_t cnt, loff_t *ppos)
6507 {
6508 	struct trace_array *tr = filp->private_data;
6509 
6510 	return tracing_nsecs_read(&tr->max_latency, ubuf, cnt, ppos);
6511 }
6512 
6513 static ssize_t
6514 tracing_max_lat_write(struct file *filp, const char __user *ubuf,
6515 		      size_t cnt, loff_t *ppos)
6516 {
6517 	struct trace_array *tr = filp->private_data;
6518 
6519 	return tracing_nsecs_write(&tr->max_latency, ubuf, cnt, ppos);
6520 }
6521 
6522 #endif
6523 
6524 static int open_pipe_on_cpu(struct trace_array *tr, int cpu)
6525 {
6526 	if (cpu == RING_BUFFER_ALL_CPUS) {
6527 		if (cpumask_empty(tr->pipe_cpumask)) {
6528 			cpumask_setall(tr->pipe_cpumask);
6529 			return 0;
6530 		}
6531 	} else if (!cpumask_test_cpu(cpu, tr->pipe_cpumask)) {
6532 		cpumask_set_cpu(cpu, tr->pipe_cpumask);
6533 		return 0;
6534 	}
6535 	return -EBUSY;
6536 }
6537 
6538 static void close_pipe_on_cpu(struct trace_array *tr, int cpu)
6539 {
6540 	if (cpu == RING_BUFFER_ALL_CPUS) {
6541 		WARN_ON(!cpumask_full(tr->pipe_cpumask));
6542 		cpumask_clear(tr->pipe_cpumask);
6543 	} else {
6544 		WARN_ON(!cpumask_test_cpu(cpu, tr->pipe_cpumask));
6545 		cpumask_clear_cpu(cpu, tr->pipe_cpumask);
6546 	}
6547 }
6548 
6549 static int tracing_open_pipe(struct inode *inode, struct file *filp)
6550 {
6551 	struct trace_array *tr = inode->i_private;
6552 	struct trace_iterator *iter;
6553 	int cpu;
6554 	int ret;
6555 
6556 	ret = tracing_check_open_get_tr(tr);
6557 	if (ret)
6558 		return ret;
6559 
6560 	guard(mutex)(&trace_types_lock);
6561 	cpu = tracing_get_cpu(inode);
6562 	ret = open_pipe_on_cpu(tr, cpu);
6563 	if (ret)
6564 		goto fail_pipe_on_cpu;
6565 
6566 	/* create a buffer to store the information to pass to userspace */
6567 	iter = kzalloc(sizeof(*iter), GFP_KERNEL);
6568 	if (!iter) {
6569 		ret = -ENOMEM;
6570 		goto fail_alloc_iter;
6571 	}
6572 
6573 	trace_seq_init(&iter->seq);
6574 	iter->trace = tr->current_trace;
6575 
6576 	if (!alloc_cpumask_var(&iter->started, GFP_KERNEL)) {
6577 		ret = -ENOMEM;
6578 		goto fail;
6579 	}
6580 
6581 	/* trace pipe does not show start of buffer */
6582 	cpumask_setall(iter->started);
6583 
6584 	if (tr->trace_flags & TRACE_ITER(LATENCY_FMT))
6585 		iter->iter_flags |= TRACE_FILE_LAT_FMT;
6586 
6587 	/* Output in nanoseconds only if we are using a clock in nanoseconds. */
6588 	if (trace_clocks[tr->clock_id].in_ns)
6589 		iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
6590 
6591 	iter->tr = tr;
6592 	iter->array_buffer = &tr->array_buffer;
6593 	iter->cpu_file = cpu;
6594 	mutex_init(&iter->mutex);
6595 	filp->private_data = iter;
6596 
6597 	if (iter->trace->pipe_open)
6598 		iter->trace->pipe_open(iter);
6599 
6600 	nonseekable_open(inode, filp);
6601 
6602 	tr->trace_ref++;
6603 
6604 	return ret;
6605 
6606 fail:
6607 	kfree(iter);
6608 fail_alloc_iter:
6609 	close_pipe_on_cpu(tr, cpu);
6610 fail_pipe_on_cpu:
6611 	__trace_array_put(tr);
6612 	return ret;
6613 }
6614 
6615 static int tracing_release_pipe(struct inode *inode, struct file *file)
6616 {
6617 	struct trace_iterator *iter = file->private_data;
6618 	struct trace_array *tr = inode->i_private;
6619 
6620 	scoped_guard(mutex, &trace_types_lock) {
6621 		tr->trace_ref--;
6622 
6623 		if (iter->trace->pipe_close)
6624 			iter->trace->pipe_close(iter);
6625 		close_pipe_on_cpu(tr, iter->cpu_file);
6626 	}
6627 
6628 	free_trace_iter_content(iter);
6629 	kfree(iter);
6630 
6631 	trace_array_put(tr);
6632 
6633 	return 0;
6634 }
6635 
6636 static __poll_t
6637 trace_poll(struct trace_iterator *iter, struct file *filp, poll_table *poll_table)
6638 {
6639 	struct trace_array *tr = iter->tr;
6640 
6641 	/* Iterators are static, they should be filled or empty */
6642 	if (trace_buffer_iter(iter, iter->cpu_file))
6643 		return EPOLLIN | EPOLLRDNORM;
6644 
6645 	if (tr->trace_flags & TRACE_ITER(BLOCK))
6646 		/*
6647 		 * Always select as readable when in blocking mode
6648 		 */
6649 		return EPOLLIN | EPOLLRDNORM;
6650 	else
6651 		return ring_buffer_poll_wait(iter->array_buffer->buffer, iter->cpu_file,
6652 					     filp, poll_table, iter->tr->buffer_percent);
6653 }
6654 
6655 static __poll_t
6656 tracing_poll_pipe(struct file *filp, poll_table *poll_table)
6657 {
6658 	struct trace_iterator *iter = filp->private_data;
6659 
6660 	return trace_poll(iter, filp, poll_table);
6661 }
6662 
6663 /* Must be called with iter->mutex held. */
6664 static int tracing_wait_pipe(struct file *filp)
6665 {
6666 	struct trace_iterator *iter = filp->private_data;
6667 	int ret;
6668 
6669 	while (trace_empty(iter)) {
6670 
6671 		if ((filp->f_flags & O_NONBLOCK)) {
6672 			return -EAGAIN;
6673 		}
6674 
6675 		/*
6676 		 * We block until we read something and tracing is disabled.
6677 		 * We still block if tracing is disabled, but we have never
6678 		 * read anything. This allows a user to cat this file, and
6679 		 * then enable tracing. But after we have read something,
6680 		 * we give an EOF when tracing is again disabled.
6681 		 *
6682 		 * iter->pos will be 0 if we haven't read anything.
6683 		 */
6684 		if (!tracer_tracing_is_on(iter->tr) && iter->pos)
6685 			break;
6686 
6687 		mutex_unlock(&iter->mutex);
6688 
6689 		ret = wait_on_pipe(iter, 0);
6690 
6691 		mutex_lock(&iter->mutex);
6692 
6693 		if (ret)
6694 			return ret;
6695 	}
6696 
6697 	return 1;
6698 }
6699 
6700 static bool update_last_data_if_empty(struct trace_array *tr)
6701 {
6702 	if (!(tr->flags & TRACE_ARRAY_FL_LAST_BOOT))
6703 		return false;
6704 
6705 	if (!ring_buffer_empty(tr->array_buffer.buffer))
6706 		return false;
6707 
6708 	/*
6709 	 * If the buffer contains the last boot data and all per-cpu
6710 	 * buffers are empty, reset it from the kernel side.
6711 	 */
6712 	update_last_data(tr);
6713 	return true;
6714 }
6715 
6716 /*
6717  * Consumer reader.
6718  */
6719 static ssize_t
6720 tracing_read_pipe(struct file *filp, char __user *ubuf,
6721 		  size_t cnt, loff_t *ppos)
6722 {
6723 	struct trace_iterator *iter = filp->private_data;
6724 	ssize_t sret;
6725 
6726 	/*
6727 	 * Avoid more than one consumer on a single file descriptor
6728 	 * This is just a matter of traces coherency, the ring buffer itself
6729 	 * is protected.
6730 	 */
6731 	guard(mutex)(&iter->mutex);
6732 
6733 	/* return any leftover data */
6734 	sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
6735 	if (sret != -EBUSY)
6736 		return sret;
6737 
6738 	trace_seq_init(&iter->seq);
6739 
6740 	if (iter->trace->read) {
6741 		sret = iter->trace->read(iter, filp, ubuf, cnt, ppos);
6742 		if (sret)
6743 			return sret;
6744 	}
6745 
6746 waitagain:
6747 	if (update_last_data_if_empty(iter->tr))
6748 		return 0;
6749 
6750 	sret = tracing_wait_pipe(filp);
6751 	if (sret <= 0)
6752 		return sret;
6753 
6754 	/* stop when tracing is finished */
6755 	if (trace_empty(iter))
6756 		return 0;
6757 
6758 	if (cnt >= TRACE_SEQ_BUFFER_SIZE)
6759 		cnt = TRACE_SEQ_BUFFER_SIZE - 1;
6760 
6761 	/* reset all but tr, trace, and overruns */
6762 	trace_iterator_reset(iter);
6763 	cpumask_clear(iter->started);
6764 	trace_seq_init(&iter->seq);
6765 
6766 	trace_event_read_lock();
6767 	trace_access_lock(iter->cpu_file);
6768 	while (trace_find_next_entry_inc(iter) != NULL) {
6769 		enum print_line_t ret;
6770 		int save_len = iter->seq.seq.len;
6771 
6772 		ret = print_trace_line(iter);
6773 		if (ret == TRACE_TYPE_PARTIAL_LINE) {
6774 			/*
6775 			 * If one print_trace_line() fills entire trace_seq in one shot,
6776 			 * trace_seq_to_user() will returns -EBUSY because save_len == 0,
6777 			 * In this case, we need to consume it, otherwise, loop will peek
6778 			 * this event next time, resulting in an infinite loop.
6779 			 */
6780 			if (save_len == 0) {
6781 				iter->seq.full = 0;
6782 				trace_seq_puts(&iter->seq, "[LINE TOO BIG]\n");
6783 				trace_consume(iter);
6784 				break;
6785 			}
6786 
6787 			/* In other cases, don't print partial lines */
6788 			iter->seq.seq.len = save_len;
6789 			break;
6790 		}
6791 		if (ret != TRACE_TYPE_NO_CONSUME)
6792 			trace_consume(iter);
6793 
6794 		if (trace_seq_used(&iter->seq) >= cnt)
6795 			break;
6796 
6797 		/*
6798 		 * Setting the full flag means we reached the trace_seq buffer
6799 		 * size and we should leave by partial output condition above.
6800 		 * One of the trace_seq_* functions is not used properly.
6801 		 */
6802 		WARN_ONCE(iter->seq.full, "full flag set for trace type %d",
6803 			  iter->ent->type);
6804 	}
6805 	trace_access_unlock(iter->cpu_file);
6806 	trace_event_read_unlock();
6807 
6808 	/* Now copy what we have to the user */
6809 	sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
6810 	if (iter->seq.readpos >= trace_seq_used(&iter->seq))
6811 		trace_seq_init(&iter->seq);
6812 
6813 	/*
6814 	 * If there was nothing to send to user, in spite of consuming trace
6815 	 * entries, go back to wait for more entries.
6816 	 */
6817 	if (sret == -EBUSY)
6818 		goto waitagain;
6819 
6820 	return sret;
6821 }
6822 
6823 static void tracing_spd_release_pipe(struct splice_pipe_desc *spd,
6824 				     unsigned int idx)
6825 {
6826 	__free_page(spd->pages[idx]);
6827 }
6828 
6829 static size_t
6830 tracing_fill_pipe_page(size_t rem, struct trace_iterator *iter)
6831 {
6832 	size_t count;
6833 	int save_len;
6834 	int ret;
6835 
6836 	/* Seq buffer is page-sized, exactly what we need. */
6837 	for (;;) {
6838 		save_len = iter->seq.seq.len;
6839 		ret = print_trace_line(iter);
6840 
6841 		if (trace_seq_has_overflowed(&iter->seq)) {
6842 			iter->seq.seq.len = save_len;
6843 			break;
6844 		}
6845 
6846 		/*
6847 		 * This should not be hit, because it should only
6848 		 * be set if the iter->seq overflowed. But check it
6849 		 * anyway to be safe.
6850 		 */
6851 		if (ret == TRACE_TYPE_PARTIAL_LINE) {
6852 			iter->seq.seq.len = save_len;
6853 			break;
6854 		}
6855 
6856 		count = trace_seq_used(&iter->seq) - save_len;
6857 		if (rem < count) {
6858 			rem = 0;
6859 			iter->seq.seq.len = save_len;
6860 			break;
6861 		}
6862 
6863 		if (ret != TRACE_TYPE_NO_CONSUME)
6864 			trace_consume(iter);
6865 		rem -= count;
6866 		if (!trace_find_next_entry_inc(iter))	{
6867 			rem = 0;
6868 			iter->ent = NULL;
6869 			break;
6870 		}
6871 	}
6872 
6873 	return rem;
6874 }
6875 
6876 static ssize_t tracing_splice_read_pipe(struct file *filp,
6877 					loff_t *ppos,
6878 					struct pipe_inode_info *pipe,
6879 					size_t len,
6880 					unsigned int flags)
6881 {
6882 	struct page *pages_def[PIPE_DEF_BUFFERS];
6883 	struct partial_page partial_def[PIPE_DEF_BUFFERS];
6884 	struct trace_iterator *iter = filp->private_data;
6885 	struct splice_pipe_desc spd = {
6886 		.pages		= pages_def,
6887 		.partial	= partial_def,
6888 		.nr_pages	= 0, /* This gets updated below. */
6889 		.nr_pages_max	= PIPE_DEF_BUFFERS,
6890 		.ops		= &default_pipe_buf_ops,
6891 		.spd_release	= tracing_spd_release_pipe,
6892 	};
6893 	ssize_t ret;
6894 	size_t rem;
6895 	unsigned int i;
6896 
6897 	if (splice_grow_spd(pipe, &spd))
6898 		return -ENOMEM;
6899 
6900 	mutex_lock(&iter->mutex);
6901 
6902 	if (iter->trace->splice_read) {
6903 		ret = iter->trace->splice_read(iter, filp,
6904 					       ppos, pipe, len, flags);
6905 		if (ret)
6906 			goto out_err;
6907 	}
6908 
6909 	ret = tracing_wait_pipe(filp);
6910 	if (ret <= 0)
6911 		goto out_err;
6912 
6913 	if (!iter->ent && !trace_find_next_entry_inc(iter)) {
6914 		ret = -EFAULT;
6915 		goto out_err;
6916 	}
6917 
6918 	trace_event_read_lock();
6919 	trace_access_lock(iter->cpu_file);
6920 
6921 	/* Fill as many pages as possible. */
6922 	for (i = 0, rem = len; i < spd.nr_pages_max && rem; i++) {
6923 		spd.pages[i] = alloc_page(GFP_KERNEL);
6924 		if (!spd.pages[i])
6925 			break;
6926 
6927 		rem = tracing_fill_pipe_page(rem, iter);
6928 
6929 		/* Copy the data into the page, so we can start over. */
6930 		ret = trace_seq_to_buffer(&iter->seq,
6931 					  page_address(spd.pages[i]),
6932 					  min((size_t)trace_seq_used(&iter->seq),
6933 						  (size_t)PAGE_SIZE));
6934 		if (ret < 0) {
6935 			__free_page(spd.pages[i]);
6936 			break;
6937 		}
6938 		spd.partial[i].offset = 0;
6939 		spd.partial[i].len = ret;
6940 
6941 		trace_seq_init(&iter->seq);
6942 	}
6943 
6944 	trace_access_unlock(iter->cpu_file);
6945 	trace_event_read_unlock();
6946 	mutex_unlock(&iter->mutex);
6947 
6948 	spd.nr_pages = i;
6949 
6950 	if (i)
6951 		ret = splice_to_pipe(pipe, &spd);
6952 	else
6953 		ret = 0;
6954 out:
6955 	splice_shrink_spd(&spd);
6956 	return ret;
6957 
6958 out_err:
6959 	mutex_unlock(&iter->mutex);
6960 	goto out;
6961 }
6962 
6963 static ssize_t
6964 tracing_syscall_buf_read(struct file *filp, char __user *ubuf,
6965 			 size_t cnt, loff_t *ppos)
6966 {
6967 	struct inode *inode = file_inode(filp);
6968 	struct trace_array *tr = inode->i_private;
6969 	char buf[64];
6970 	int r;
6971 
6972 	r = snprintf(buf, 64, "%d\n", tr->syscall_buf_sz);
6973 
6974 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6975 }
6976 
6977 static ssize_t
6978 tracing_syscall_buf_write(struct file *filp, const char __user *ubuf,
6979 			  size_t cnt, loff_t *ppos)
6980 {
6981 	struct inode *inode = file_inode(filp);
6982 	struct trace_array *tr = inode->i_private;
6983 	unsigned long val;
6984 	int ret;
6985 
6986 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
6987 	if (ret)
6988 		return ret;
6989 
6990 	if (val > SYSCALL_FAULT_USER_MAX)
6991 		val = SYSCALL_FAULT_USER_MAX;
6992 
6993 	tr->syscall_buf_sz = val;
6994 
6995 	*ppos += cnt;
6996 
6997 	return cnt;
6998 }
6999 
7000 static ssize_t
7001 tracing_entries_read(struct file *filp, char __user *ubuf,
7002 		     size_t cnt, loff_t *ppos)
7003 {
7004 	struct inode *inode = file_inode(filp);
7005 	struct trace_array *tr = inode->i_private;
7006 	int cpu = tracing_get_cpu(inode);
7007 	char buf[64];
7008 	int r = 0;
7009 	ssize_t ret;
7010 
7011 	mutex_lock(&trace_types_lock);
7012 
7013 	if (cpu == RING_BUFFER_ALL_CPUS) {
7014 		int cpu, buf_size_same;
7015 		unsigned long size;
7016 
7017 		size = 0;
7018 		buf_size_same = 1;
7019 		/* check if all cpu sizes are same */
7020 		for_each_tracing_cpu(cpu) {
7021 			/* fill in the size from first enabled cpu */
7022 			if (size == 0)
7023 				size = per_cpu_ptr(tr->array_buffer.data, cpu)->entries;
7024 			if (size != per_cpu_ptr(tr->array_buffer.data, cpu)->entries) {
7025 				buf_size_same = 0;
7026 				break;
7027 			}
7028 		}
7029 
7030 		if (buf_size_same) {
7031 			if (!tr->ring_buffer_expanded)
7032 				r = sprintf(buf, "%lu (expanded: %lu)\n",
7033 					    size >> 10,
7034 					    trace_buf_size >> 10);
7035 			else
7036 				r = sprintf(buf, "%lu\n", size >> 10);
7037 		} else
7038 			r = sprintf(buf, "X\n");
7039 	} else
7040 		r = sprintf(buf, "%lu\n", per_cpu_ptr(tr->array_buffer.data, cpu)->entries >> 10);
7041 
7042 	mutex_unlock(&trace_types_lock);
7043 
7044 	ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
7045 	return ret;
7046 }
7047 
7048 static ssize_t
7049 tracing_entries_write(struct file *filp, const char __user *ubuf,
7050 		      size_t cnt, loff_t *ppos)
7051 {
7052 	struct inode *inode = file_inode(filp);
7053 	struct trace_array *tr = inode->i_private;
7054 	unsigned long val;
7055 	int ret;
7056 
7057 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
7058 	if (ret)
7059 		return ret;
7060 
7061 	/* must have at least 1 entry */
7062 	if (!val)
7063 		return -EINVAL;
7064 
7065 	/* value is in KB */
7066 	val <<= 10;
7067 	ret = tracing_resize_ring_buffer(tr, val, tracing_get_cpu(inode));
7068 	if (ret < 0)
7069 		return ret;
7070 
7071 	*ppos += cnt;
7072 
7073 	return cnt;
7074 }
7075 
7076 static ssize_t
7077 tracing_total_entries_read(struct file *filp, char __user *ubuf,
7078 				size_t cnt, loff_t *ppos)
7079 {
7080 	struct trace_array *tr = filp->private_data;
7081 	char buf[64];
7082 	int r, cpu;
7083 	unsigned long size = 0, expanded_size = 0;
7084 
7085 	mutex_lock(&trace_types_lock);
7086 	for_each_tracing_cpu(cpu) {
7087 		size += per_cpu_ptr(tr->array_buffer.data, cpu)->entries >> 10;
7088 		if (!tr->ring_buffer_expanded)
7089 			expanded_size += trace_buf_size >> 10;
7090 	}
7091 	if (tr->ring_buffer_expanded)
7092 		r = sprintf(buf, "%lu\n", size);
7093 	else
7094 		r = sprintf(buf, "%lu (expanded: %lu)\n", size, expanded_size);
7095 	mutex_unlock(&trace_types_lock);
7096 
7097 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
7098 }
7099 
7100 #define LAST_BOOT_HEADER ((void *)1)
7101 
7102 static void *l_next(struct seq_file *m, void *v, loff_t *pos)
7103 {
7104 	struct trace_array *tr = m->private;
7105 	struct trace_scratch *tscratch = tr->scratch;
7106 	unsigned int index = *pos;
7107 
7108 	(*pos)++;
7109 
7110 	if (*pos == 1)
7111 		return LAST_BOOT_HEADER;
7112 
7113 	/* Only show offsets of the last boot data */
7114 	if (!tscratch || !(tr->flags & TRACE_ARRAY_FL_LAST_BOOT))
7115 		return NULL;
7116 
7117 	/* *pos 0 is for the header, 1 is for the first module */
7118 	index--;
7119 
7120 	if (index >= tscratch->nr_entries)
7121 		return NULL;
7122 
7123 	return &tscratch->entries[index];
7124 }
7125 
7126 static void *l_start(struct seq_file *m, loff_t *pos)
7127 {
7128 	mutex_lock(&scratch_mutex);
7129 
7130 	return l_next(m, NULL, pos);
7131 }
7132 
7133 static void l_stop(struct seq_file *m, void *p)
7134 {
7135 	mutex_unlock(&scratch_mutex);
7136 }
7137 
7138 static void show_last_boot_header(struct seq_file *m, struct trace_array *tr)
7139 {
7140 	struct trace_scratch *tscratch = tr->scratch;
7141 
7142 	/*
7143 	 * Do not leak KASLR address. This only shows the KASLR address of
7144 	 * the last boot. When the ring buffer is started, the LAST_BOOT
7145 	 * flag gets cleared, and this should only report "current".
7146 	 * Otherwise it shows the KASLR address from the previous boot which
7147 	 * should not be the same as the current boot.
7148 	 */
7149 	if (tscratch && (tr->flags & TRACE_ARRAY_FL_LAST_BOOT))
7150 		seq_printf(m, "%lx\t[kernel]\n", tscratch->text_addr);
7151 	else
7152 		seq_puts(m, "# Current\n");
7153 }
7154 
7155 static int l_show(struct seq_file *m, void *v)
7156 {
7157 	struct trace_array *tr = m->private;
7158 	struct trace_mod_entry *entry = v;
7159 
7160 	if (v == LAST_BOOT_HEADER) {
7161 		show_last_boot_header(m, tr);
7162 		return 0;
7163 	}
7164 
7165 	seq_printf(m, "%lx\t%s\n", entry->mod_addr, entry->mod_name);
7166 	return 0;
7167 }
7168 
7169 static const struct seq_operations last_boot_seq_ops = {
7170 	.start		= l_start,
7171 	.next		= l_next,
7172 	.stop		= l_stop,
7173 	.show		= l_show,
7174 };
7175 
7176 static int tracing_last_boot_open(struct inode *inode, struct file *file)
7177 {
7178 	struct trace_array *tr = inode->i_private;
7179 	struct seq_file *m;
7180 	int ret;
7181 
7182 	ret = tracing_check_open_get_tr(tr);
7183 	if (ret)
7184 		return ret;
7185 
7186 	ret = seq_open(file, &last_boot_seq_ops);
7187 	if (ret) {
7188 		trace_array_put(tr);
7189 		return ret;
7190 	}
7191 
7192 	m = file->private_data;
7193 	m->private = tr;
7194 
7195 	return 0;
7196 }
7197 
7198 static int tracing_buffer_meta_open(struct inode *inode, struct file *filp)
7199 {
7200 	struct trace_array *tr = inode->i_private;
7201 	int cpu = tracing_get_cpu(inode);
7202 	int ret;
7203 
7204 	ret = tracing_check_open_get_tr(tr);
7205 	if (ret)
7206 		return ret;
7207 
7208 	ret = ring_buffer_meta_seq_init(filp, tr->array_buffer.buffer, cpu);
7209 	if (ret < 0)
7210 		__trace_array_put(tr);
7211 	return ret;
7212 }
7213 
7214 static ssize_t
7215 tracing_free_buffer_write(struct file *filp, const char __user *ubuf,
7216 			  size_t cnt, loff_t *ppos)
7217 {
7218 	/*
7219 	 * There is no need to read what the user has written, this function
7220 	 * is just to make sure that there is no error when "echo" is used
7221 	 */
7222 
7223 	*ppos += cnt;
7224 
7225 	return cnt;
7226 }
7227 
7228 static int
7229 tracing_free_buffer_release(struct inode *inode, struct file *filp)
7230 {
7231 	struct trace_array *tr = inode->i_private;
7232 
7233 	/* disable tracing ? */
7234 	if (tr->trace_flags & TRACE_ITER(STOP_ON_FREE))
7235 		tracer_tracing_off(tr);
7236 	/* resize the ring buffer to 0 */
7237 	tracing_resize_ring_buffer(tr, 0, RING_BUFFER_ALL_CPUS);
7238 
7239 	trace_array_put(tr);
7240 
7241 	return 0;
7242 }
7243 
7244 #define TRACE_MARKER_MAX_SIZE		4096
7245 
7246 static ssize_t write_marker_to_buffer(struct trace_array *tr, const char *buf,
7247 				      size_t cnt, unsigned long ip)
7248 {
7249 	struct ring_buffer_event *event;
7250 	enum event_trigger_type tt = ETT_NONE;
7251 	struct trace_buffer *buffer;
7252 	struct print_entry *entry;
7253 	int meta_size;
7254 	ssize_t written;
7255 	size_t size;
7256 
7257 	meta_size = sizeof(*entry) + 2;  /* add '\0' and possible '\n' */
7258  again:
7259 	size = cnt + meta_size;
7260 
7261 	buffer = tr->array_buffer.buffer;
7262 	event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
7263 					    tracing_gen_ctx());
7264 	if (unlikely(!event)) {
7265 		/*
7266 		 * If the size was greater than what was allowed, then
7267 		 * make it smaller and try again.
7268 		 */
7269 		if (size > ring_buffer_max_event_size(buffer)) {
7270 			cnt = ring_buffer_max_event_size(buffer) - meta_size;
7271 			/* The above should only happen once */
7272 			if (WARN_ON_ONCE(cnt + meta_size == size))
7273 				return -EBADF;
7274 			goto again;
7275 		}
7276 
7277 		/* Ring buffer disabled, return as if not open for write */
7278 		return -EBADF;
7279 	}
7280 
7281 	entry = ring_buffer_event_data(event);
7282 	entry->ip = ip;
7283 	memcpy(&entry->buf, buf, cnt);
7284 	written = cnt;
7285 
7286 	if (tr->trace_marker_file && !list_empty(&tr->trace_marker_file->triggers)) {
7287 		/* do not add \n before testing triggers, but add \0 */
7288 		entry->buf[cnt] = '\0';
7289 		tt = event_triggers_call(tr->trace_marker_file, buffer, entry, event);
7290 	}
7291 
7292 	if (entry->buf[cnt - 1] != '\n') {
7293 		entry->buf[cnt] = '\n';
7294 		entry->buf[cnt + 1] = '\0';
7295 	} else
7296 		entry->buf[cnt] = '\0';
7297 
7298 	if (static_branch_unlikely(&trace_marker_exports_enabled))
7299 		ftrace_exports(event, TRACE_EXPORT_MARKER);
7300 	__buffer_unlock_commit(buffer, event);
7301 
7302 	if (tt)
7303 		event_triggers_post_call(tr->trace_marker_file, tt);
7304 
7305 	return written;
7306 }
7307 
7308 struct trace_user_buf {
7309 	char		*buf;
7310 };
7311 
7312 static DEFINE_MUTEX(trace_user_buffer_mutex);
7313 static struct trace_user_buf_info *trace_user_buffer;
7314 
7315 /**
7316  * trace_user_fault_destroy - free up allocated memory of a trace user buffer
7317  * @tinfo: The descriptor to free up
7318  *
7319  * Frees any data allocated in the trace info dsecriptor.
7320  */
7321 void trace_user_fault_destroy(struct trace_user_buf_info *tinfo)
7322 {
7323 	char *buf;
7324 	int cpu;
7325 
7326 	if (!tinfo || !tinfo->tbuf)
7327 		return;
7328 
7329 	for_each_possible_cpu(cpu) {
7330 		buf = per_cpu_ptr(tinfo->tbuf, cpu)->buf;
7331 		kfree(buf);
7332 	}
7333 	free_percpu(tinfo->tbuf);
7334 }
7335 
7336 static int user_fault_buffer_enable(struct trace_user_buf_info *tinfo, size_t size)
7337 {
7338 	char *buf;
7339 	int cpu;
7340 
7341 	lockdep_assert_held(&trace_user_buffer_mutex);
7342 
7343 	tinfo->tbuf = alloc_percpu(struct trace_user_buf);
7344 	if (!tinfo->tbuf)
7345 		return -ENOMEM;
7346 
7347 	tinfo->ref = 1;
7348 	tinfo->size = size;
7349 
7350 	/* Clear each buffer in case of error */
7351 	for_each_possible_cpu(cpu) {
7352 		per_cpu_ptr(tinfo->tbuf, cpu)->buf = NULL;
7353 	}
7354 
7355 	for_each_possible_cpu(cpu) {
7356 		buf = kmalloc_node(size, GFP_KERNEL,
7357 				   cpu_to_node(cpu));
7358 		if (!buf)
7359 			return -ENOMEM;
7360 		per_cpu_ptr(tinfo->tbuf, cpu)->buf = buf;
7361 	}
7362 
7363 	return 0;
7364 }
7365 
7366 /* For internal use. Free and reinitialize */
7367 static void user_buffer_free(struct trace_user_buf_info **tinfo)
7368 {
7369 	lockdep_assert_held(&trace_user_buffer_mutex);
7370 
7371 	trace_user_fault_destroy(*tinfo);
7372 	kfree(*tinfo);
7373 	*tinfo = NULL;
7374 }
7375 
7376 /* For internal use. Initialize and allocate */
7377 static int user_buffer_init(struct trace_user_buf_info **tinfo, size_t size)
7378 {
7379 	bool alloc = false;
7380 	int ret;
7381 
7382 	lockdep_assert_held(&trace_user_buffer_mutex);
7383 
7384 	if (!*tinfo) {
7385 		alloc = true;
7386 		*tinfo = kzalloc(sizeof(**tinfo), GFP_KERNEL);
7387 		if (!*tinfo)
7388 			return -ENOMEM;
7389 	}
7390 
7391 	ret = user_fault_buffer_enable(*tinfo, size);
7392 	if (ret < 0 && alloc)
7393 		user_buffer_free(tinfo);
7394 
7395 	return ret;
7396 }
7397 
7398 /* For internal use, derefrence and free if necessary */
7399 static void user_buffer_put(struct trace_user_buf_info **tinfo)
7400 {
7401 	guard(mutex)(&trace_user_buffer_mutex);
7402 
7403 	if (WARN_ON_ONCE(!*tinfo || !(*tinfo)->ref))
7404 		return;
7405 
7406 	if (--(*tinfo)->ref)
7407 		return;
7408 
7409 	user_buffer_free(tinfo);
7410 }
7411 
7412 /**
7413  * trace_user_fault_init - Allocated or reference a per CPU buffer
7414  * @tinfo: A pointer to the trace buffer descriptor
7415  * @size: The size to allocate each per CPU buffer
7416  *
7417  * Create a per CPU buffer that can be used to copy from user space
7418  * in a task context. When calling trace_user_fault_read(), preemption
7419  * must be disabled, and it will enable preemption and copy user
7420  * space data to the buffer. If any schedule switches occur, it will
7421  * retry until it succeeds without a schedule switch knowing the buffer
7422  * is still valid.
7423  *
7424  * Returns 0 on success, negative on failure.
7425  */
7426 int trace_user_fault_init(struct trace_user_buf_info *tinfo, size_t size)
7427 {
7428 	int ret;
7429 
7430 	if (!tinfo)
7431 		return -EINVAL;
7432 
7433 	guard(mutex)(&trace_user_buffer_mutex);
7434 
7435 	ret = user_buffer_init(&tinfo, size);
7436 	if (ret < 0)
7437 		trace_user_fault_destroy(tinfo);
7438 
7439 	return ret;
7440 }
7441 
7442 /**
7443  * trace_user_fault_get - up the ref count for the user buffer
7444  * @tinfo: A pointer to a pointer to the trace buffer descriptor
7445  *
7446  * Ups the ref count of the trace buffer.
7447  *
7448  * Returns the new ref count.
7449  */
7450 int trace_user_fault_get(struct trace_user_buf_info *tinfo)
7451 {
7452 	if (!tinfo)
7453 		return -1;
7454 
7455 	guard(mutex)(&trace_user_buffer_mutex);
7456 
7457 	tinfo->ref++;
7458 	return tinfo->ref;
7459 }
7460 
7461 /**
7462  * trace_user_fault_put - dereference a per cpu trace buffer
7463  * @tinfo: The @tinfo that was passed to trace_user_fault_get()
7464  *
7465  * Decrement the ref count of @tinfo.
7466  *
7467  * Returns the new refcount (negative on error).
7468  */
7469 int trace_user_fault_put(struct trace_user_buf_info *tinfo)
7470 {
7471 	guard(mutex)(&trace_user_buffer_mutex);
7472 
7473 	if (WARN_ON_ONCE(!tinfo || !tinfo->ref))
7474 		return -1;
7475 
7476 	--tinfo->ref;
7477 	return tinfo->ref;
7478 }
7479 
7480 /**
7481  * trace_user_fault_read - Read user space into a per CPU buffer
7482  * @tinfo: The @tinfo allocated by trace_user_fault_get()
7483  * @ptr: The user space pointer to read
7484  * @size: The size of user space to read.
7485  * @copy_func: Optional function to use to copy from user space
7486  * @data: Data to pass to copy_func if it was supplied
7487  *
7488  * Preemption must be disabled when this is called, and must not
7489  * be enabled while using the returned buffer.
7490  * This does the copying from user space into a per CPU buffer.
7491  *
7492  * The @size must not be greater than the size passed in to
7493  * trace_user_fault_init().
7494  *
7495  * If @copy_func is NULL, trace_user_fault_read() will use copy_from_user(),
7496  * otherwise it will call @copy_func. It will call @copy_func with:
7497  *
7498  *   buffer: the per CPU buffer of the @tinfo.
7499  *   ptr: The pointer @ptr to user space to read
7500  *   size: The @size of the ptr to read
7501  *   data: The @data parameter
7502  *
7503  * It is expected that @copy_func will return 0 on success and non zero
7504  * if there was a fault.
7505  *
7506  * Returns a pointer to the buffer with the content read from @ptr.
7507  *   Preemption must remain disabled while the caller accesses the
7508  *   buffer returned by this function.
7509  * Returns NULL if there was a fault, or the size passed in is
7510  *   greater than the size passed to trace_user_fault_init().
7511  */
7512 char *trace_user_fault_read(struct trace_user_buf_info *tinfo,
7513 			     const char __user *ptr, size_t size,
7514 			     trace_user_buf_copy copy_func, void *data)
7515 {
7516 	int cpu = smp_processor_id();
7517 	char *buffer = per_cpu_ptr(tinfo->tbuf, cpu)->buf;
7518 	unsigned int cnt;
7519 	int trys = 0;
7520 	int ret;
7521 
7522 	lockdep_assert_preemption_disabled();
7523 
7524 	/*
7525 	 * It's up to the caller to not try to copy more than it said
7526 	 * it would.
7527 	 */
7528 	if (size > tinfo->size)
7529 		return NULL;
7530 
7531 	/*
7532 	 * This acts similar to a seqcount. The per CPU context switches are
7533 	 * recorded, migration is disabled and preemption is enabled. The
7534 	 * read of the user space memory is copied into the per CPU buffer.
7535 	 * Preemption is disabled again, and if the per CPU context switches count
7536 	 * is still the same, it means the buffer has not been corrupted.
7537 	 * If the count is different, it is assumed the buffer is corrupted
7538 	 * and reading must be tried again.
7539 	 */
7540 
7541 	do {
7542 		/*
7543 		 * If for some reason, copy_from_user() always causes a context
7544 		 * switch, this would then cause an infinite loop.
7545 		 * If this task is preempted by another user space task, it
7546 		 * will cause this task to try again. But just in case something
7547 		 * changes where the copying from user space causes another task
7548 		 * to run, prevent this from going into an infinite loop.
7549 		 * 100 tries should be plenty.
7550 		 */
7551 		if (WARN_ONCE(trys++ > 100, "Error: Too many tries to read user space"))
7552 			return NULL;
7553 
7554 		/* Read the current CPU context switch counter */
7555 		cnt = nr_context_switches_cpu(cpu);
7556 
7557 		/*
7558 		 * Preemption is going to be enabled, but this task must
7559 		 * remain on this CPU.
7560 		 */
7561 		migrate_disable();
7562 
7563 		/*
7564 		 * Now preemption is being enabled and another task can come in
7565 		 * and use the same buffer and corrupt our data.
7566 		 */
7567 		preempt_enable_notrace();
7568 
7569 		/* Make sure preemption is enabled here */
7570 		lockdep_assert_preemption_enabled();
7571 
7572 		if (copy_func) {
7573 			ret = copy_func(buffer, ptr, size, data);
7574 		} else {
7575 			ret = __copy_from_user(buffer, ptr, size);
7576 		}
7577 
7578 		preempt_disable_notrace();
7579 		migrate_enable();
7580 
7581 		/* if it faulted, no need to test if the buffer was corrupted */
7582 		if (ret)
7583 			return NULL;
7584 
7585 		/*
7586 		 * Preemption is disabled again, now check the per CPU context
7587 		 * switch counter. If it doesn't match, then another user space
7588 		 * process may have schedule in and corrupted our buffer. In that
7589 		 * case the copying must be retried.
7590 		 */
7591 	} while (nr_context_switches_cpu(cpu) != cnt);
7592 
7593 	return buffer;
7594 }
7595 
7596 static ssize_t
7597 tracing_mark_write(struct file *filp, const char __user *ubuf,
7598 					size_t cnt, loff_t *fpos)
7599 {
7600 	struct trace_array *tr = filp->private_data;
7601 	ssize_t written = -ENODEV;
7602 	unsigned long ip;
7603 	char *buf;
7604 
7605 	if (tracing_disabled)
7606 		return -EINVAL;
7607 
7608 	if (!(tr->trace_flags & TRACE_ITER(MARKERS)))
7609 		return -EINVAL;
7610 
7611 	if ((ssize_t)cnt < 0)
7612 		return -EINVAL;
7613 
7614 	if (cnt > TRACE_MARKER_MAX_SIZE)
7615 		cnt = TRACE_MARKER_MAX_SIZE;
7616 
7617 	/* Must have preemption disabled while having access to the buffer */
7618 	guard(preempt_notrace)();
7619 
7620 	buf = trace_user_fault_read(trace_user_buffer, ubuf, cnt, NULL, NULL);
7621 	if (!buf)
7622 		return -EFAULT;
7623 
7624 	/* The selftests expect this function to be the IP address */
7625 	ip = _THIS_IP_;
7626 
7627 	/* The global trace_marker can go to multiple instances */
7628 	if (tr == &global_trace) {
7629 		guard(rcu)();
7630 		list_for_each_entry_rcu(tr, &marker_copies, marker_list) {
7631 			written = write_marker_to_buffer(tr, buf, cnt, ip);
7632 			if (written < 0)
7633 				break;
7634 		}
7635 	} else {
7636 		written = write_marker_to_buffer(tr, buf, cnt, ip);
7637 	}
7638 
7639 	return written;
7640 }
7641 
7642 static ssize_t write_raw_marker_to_buffer(struct trace_array *tr,
7643 					  const char *buf, size_t cnt)
7644 {
7645 	struct ring_buffer_event *event;
7646 	struct trace_buffer *buffer;
7647 	struct raw_data_entry *entry;
7648 	ssize_t written;
7649 	size_t size;
7650 
7651 	/* cnt includes both the entry->id and the data behind it. */
7652 	size = struct_offset(entry, id) + cnt;
7653 
7654 	buffer = tr->array_buffer.buffer;
7655 
7656 	if (size > ring_buffer_max_event_size(buffer))
7657 		return -EINVAL;
7658 
7659 	event = __trace_buffer_lock_reserve(buffer, TRACE_RAW_DATA, size,
7660 					    tracing_gen_ctx());
7661 	if (!event)
7662 		/* Ring buffer disabled, return as if not open for write */
7663 		return -EBADF;
7664 
7665 	entry = ring_buffer_event_data(event);
7666 	unsafe_memcpy(&entry->id, buf, cnt,
7667 		      "id and content already reserved on ring buffer"
7668 		      "'buf' includes the 'id' and the data."
7669 		      "'entry' was allocated with cnt from 'id'.");
7670 	written = cnt;
7671 
7672 	__buffer_unlock_commit(buffer, event);
7673 
7674 	return written;
7675 }
7676 
7677 static ssize_t
7678 tracing_mark_raw_write(struct file *filp, const char __user *ubuf,
7679 					size_t cnt, loff_t *fpos)
7680 {
7681 	struct trace_array *tr = filp->private_data;
7682 	ssize_t written = -ENODEV;
7683 	char *buf;
7684 
7685 	if (tracing_disabled)
7686 		return -EINVAL;
7687 
7688 	if (!(tr->trace_flags & TRACE_ITER(MARKERS)))
7689 		return -EINVAL;
7690 
7691 	/* The marker must at least have a tag id */
7692 	if (cnt < sizeof(unsigned int))
7693 		return -EINVAL;
7694 
7695 	/* raw write is all or nothing */
7696 	if (cnt > TRACE_MARKER_MAX_SIZE)
7697 		return -EINVAL;
7698 
7699 	/* Must have preemption disabled while having access to the buffer */
7700 	guard(preempt_notrace)();
7701 
7702 	buf = trace_user_fault_read(trace_user_buffer, ubuf, cnt, NULL, NULL);
7703 	if (!buf)
7704 		return -EFAULT;
7705 
7706 	/* The global trace_marker_raw can go to multiple instances */
7707 	if (tr == &global_trace) {
7708 		guard(rcu)();
7709 		list_for_each_entry_rcu(tr, &marker_copies, marker_list) {
7710 			written = write_raw_marker_to_buffer(tr, buf, cnt);
7711 			if (written < 0)
7712 				break;
7713 		}
7714 	} else {
7715 		written = write_raw_marker_to_buffer(tr, buf, cnt);
7716 	}
7717 
7718 	return written;
7719 }
7720 
7721 static int tracing_mark_open(struct inode *inode, struct file *filp)
7722 {
7723 	int ret;
7724 
7725 	scoped_guard(mutex, &trace_user_buffer_mutex) {
7726 		if (!trace_user_buffer) {
7727 			ret = user_buffer_init(&trace_user_buffer, TRACE_MARKER_MAX_SIZE);
7728 			if (ret < 0)
7729 				return ret;
7730 		} else {
7731 			trace_user_buffer->ref++;
7732 		}
7733 	}
7734 
7735 	stream_open(inode, filp);
7736 	ret = tracing_open_generic_tr(inode, filp);
7737 	if (ret < 0)
7738 		user_buffer_put(&trace_user_buffer);
7739 	return ret;
7740 }
7741 
7742 static int tracing_mark_release(struct inode *inode, struct file *file)
7743 {
7744 	user_buffer_put(&trace_user_buffer);
7745 	return tracing_release_generic_tr(inode, file);
7746 }
7747 
7748 static int tracing_clock_show(struct seq_file *m, void *v)
7749 {
7750 	struct trace_array *tr = m->private;
7751 	int i;
7752 
7753 	for (i = 0; i < ARRAY_SIZE(trace_clocks); i++)
7754 		seq_printf(m,
7755 			"%s%s%s%s", i ? " " : "",
7756 			i == tr->clock_id ? "[" : "", trace_clocks[i].name,
7757 			i == tr->clock_id ? "]" : "");
7758 	seq_putc(m, '\n');
7759 
7760 	return 0;
7761 }
7762 
7763 int tracing_set_clock(struct trace_array *tr, const char *clockstr)
7764 {
7765 	int i;
7766 
7767 	for (i = 0; i < ARRAY_SIZE(trace_clocks); i++) {
7768 		if (strcmp(trace_clocks[i].name, clockstr) == 0)
7769 			break;
7770 	}
7771 	if (i == ARRAY_SIZE(trace_clocks))
7772 		return -EINVAL;
7773 
7774 	guard(mutex)(&trace_types_lock);
7775 
7776 	tr->clock_id = i;
7777 
7778 	ring_buffer_set_clock(tr->array_buffer.buffer, trace_clocks[i].func);
7779 
7780 	/*
7781 	 * New clock may not be consistent with the previous clock.
7782 	 * Reset the buffer so that it doesn't have incomparable timestamps.
7783 	 */
7784 	tracing_reset_online_cpus(&tr->array_buffer);
7785 
7786 #ifdef CONFIG_TRACER_MAX_TRACE
7787 	if (tr->max_buffer.buffer)
7788 		ring_buffer_set_clock(tr->max_buffer.buffer, trace_clocks[i].func);
7789 	tracing_reset_online_cpus(&tr->max_buffer);
7790 #endif
7791 
7792 	if (tr->scratch && !(tr->flags & TRACE_ARRAY_FL_LAST_BOOT)) {
7793 		struct trace_scratch *tscratch = tr->scratch;
7794 
7795 		tscratch->clock_id = i;
7796 	}
7797 
7798 	return 0;
7799 }
7800 
7801 static ssize_t tracing_clock_write(struct file *filp, const char __user *ubuf,
7802 				   size_t cnt, loff_t *fpos)
7803 {
7804 	struct seq_file *m = filp->private_data;
7805 	struct trace_array *tr = m->private;
7806 	char buf[64];
7807 	const char *clockstr;
7808 	int ret;
7809 
7810 	if (cnt >= sizeof(buf))
7811 		return -EINVAL;
7812 
7813 	if (copy_from_user(buf, ubuf, cnt))
7814 		return -EFAULT;
7815 
7816 	buf[cnt] = 0;
7817 
7818 	clockstr = strstrip(buf);
7819 
7820 	ret = tracing_set_clock(tr, clockstr);
7821 	if (ret)
7822 		return ret;
7823 
7824 	*fpos += cnt;
7825 
7826 	return cnt;
7827 }
7828 
7829 static int tracing_clock_open(struct inode *inode, struct file *file)
7830 {
7831 	struct trace_array *tr = inode->i_private;
7832 	int ret;
7833 
7834 	ret = tracing_check_open_get_tr(tr);
7835 	if (ret)
7836 		return ret;
7837 
7838 	ret = single_open(file, tracing_clock_show, inode->i_private);
7839 	if (ret < 0)
7840 		trace_array_put(tr);
7841 
7842 	return ret;
7843 }
7844 
7845 static int tracing_time_stamp_mode_show(struct seq_file *m, void *v)
7846 {
7847 	struct trace_array *tr = m->private;
7848 
7849 	guard(mutex)(&trace_types_lock);
7850 
7851 	if (ring_buffer_time_stamp_abs(tr->array_buffer.buffer))
7852 		seq_puts(m, "delta [absolute]\n");
7853 	else
7854 		seq_puts(m, "[delta] absolute\n");
7855 
7856 	return 0;
7857 }
7858 
7859 static int tracing_time_stamp_mode_open(struct inode *inode, struct file *file)
7860 {
7861 	struct trace_array *tr = inode->i_private;
7862 	int ret;
7863 
7864 	ret = tracing_check_open_get_tr(tr);
7865 	if (ret)
7866 		return ret;
7867 
7868 	ret = single_open(file, tracing_time_stamp_mode_show, inode->i_private);
7869 	if (ret < 0)
7870 		trace_array_put(tr);
7871 
7872 	return ret;
7873 }
7874 
7875 u64 tracing_event_time_stamp(struct trace_buffer *buffer, struct ring_buffer_event *rbe)
7876 {
7877 	if (rbe == this_cpu_read(trace_buffered_event))
7878 		return ring_buffer_time_stamp(buffer);
7879 
7880 	return ring_buffer_event_time_stamp(buffer, rbe);
7881 }
7882 
7883 /*
7884  * Set or disable using the per CPU trace_buffer_event when possible.
7885  */
7886 int tracing_set_filter_buffering(struct trace_array *tr, bool set)
7887 {
7888 	guard(mutex)(&trace_types_lock);
7889 
7890 	if (set && tr->no_filter_buffering_ref++)
7891 		return 0;
7892 
7893 	if (!set) {
7894 		if (WARN_ON_ONCE(!tr->no_filter_buffering_ref))
7895 			return -EINVAL;
7896 
7897 		--tr->no_filter_buffering_ref;
7898 	}
7899 
7900 	return 0;
7901 }
7902 
7903 struct ftrace_buffer_info {
7904 	struct trace_iterator	iter;
7905 	void			*spare;
7906 	unsigned int		spare_cpu;
7907 	unsigned int		spare_size;
7908 	unsigned int		read;
7909 };
7910 
7911 #ifdef CONFIG_TRACER_SNAPSHOT
7912 static int tracing_snapshot_open(struct inode *inode, struct file *file)
7913 {
7914 	struct trace_array *tr = inode->i_private;
7915 	struct trace_iterator *iter;
7916 	struct seq_file *m;
7917 	int ret;
7918 
7919 	ret = tracing_check_open_get_tr(tr);
7920 	if (ret)
7921 		return ret;
7922 
7923 	if (file->f_mode & FMODE_READ) {
7924 		iter = __tracing_open(inode, file, true);
7925 		if (IS_ERR(iter))
7926 			ret = PTR_ERR(iter);
7927 	} else {
7928 		/* Writes still need the seq_file to hold the private data */
7929 		ret = -ENOMEM;
7930 		m = kzalloc(sizeof(*m), GFP_KERNEL);
7931 		if (!m)
7932 			goto out;
7933 		iter = kzalloc(sizeof(*iter), GFP_KERNEL);
7934 		if (!iter) {
7935 			kfree(m);
7936 			goto out;
7937 		}
7938 		ret = 0;
7939 
7940 		iter->tr = tr;
7941 		iter->array_buffer = &tr->max_buffer;
7942 		iter->cpu_file = tracing_get_cpu(inode);
7943 		m->private = iter;
7944 		file->private_data = m;
7945 	}
7946 out:
7947 	if (ret < 0)
7948 		trace_array_put(tr);
7949 
7950 	return ret;
7951 }
7952 
7953 static void tracing_swap_cpu_buffer(void *tr)
7954 {
7955 	update_max_tr_single((struct trace_array *)tr, current, smp_processor_id());
7956 }
7957 
7958 static ssize_t
7959 tracing_snapshot_write(struct file *filp, const char __user *ubuf, size_t cnt,
7960 		       loff_t *ppos)
7961 {
7962 	struct seq_file *m = filp->private_data;
7963 	struct trace_iterator *iter = m->private;
7964 	struct trace_array *tr = iter->tr;
7965 	unsigned long val;
7966 	int ret;
7967 
7968 	ret = tracing_update_buffers(tr);
7969 	if (ret < 0)
7970 		return ret;
7971 
7972 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
7973 	if (ret)
7974 		return ret;
7975 
7976 	guard(mutex)(&trace_types_lock);
7977 
7978 	if (tr->current_trace->use_max_tr)
7979 		return -EBUSY;
7980 
7981 	local_irq_disable();
7982 	arch_spin_lock(&tr->max_lock);
7983 	if (tr->cond_snapshot)
7984 		ret = -EBUSY;
7985 	arch_spin_unlock(&tr->max_lock);
7986 	local_irq_enable();
7987 	if (ret)
7988 		return ret;
7989 
7990 	switch (val) {
7991 	case 0:
7992 		if (iter->cpu_file != RING_BUFFER_ALL_CPUS)
7993 			return -EINVAL;
7994 		if (tr->allocated_snapshot)
7995 			free_snapshot(tr);
7996 		break;
7997 	case 1:
7998 /* Only allow per-cpu swap if the ring buffer supports it */
7999 #ifndef CONFIG_RING_BUFFER_ALLOW_SWAP
8000 		if (iter->cpu_file != RING_BUFFER_ALL_CPUS)
8001 			return -EINVAL;
8002 #endif
8003 		if (tr->allocated_snapshot)
8004 			ret = resize_buffer_duplicate_size(&tr->max_buffer,
8005 					&tr->array_buffer, iter->cpu_file);
8006 
8007 		ret = tracing_arm_snapshot_locked(tr);
8008 		if (ret)
8009 			return ret;
8010 
8011 		/* Now, we're going to swap */
8012 		if (iter->cpu_file == RING_BUFFER_ALL_CPUS) {
8013 			local_irq_disable();
8014 			update_max_tr(tr, current, smp_processor_id(), NULL);
8015 			local_irq_enable();
8016 		} else {
8017 			smp_call_function_single(iter->cpu_file, tracing_swap_cpu_buffer,
8018 						 (void *)tr, 1);
8019 		}
8020 		tracing_disarm_snapshot(tr);
8021 		break;
8022 	default:
8023 		if (tr->allocated_snapshot) {
8024 			if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
8025 				tracing_reset_online_cpus(&tr->max_buffer);
8026 			else
8027 				tracing_reset_cpu(&tr->max_buffer, iter->cpu_file);
8028 		}
8029 		break;
8030 	}
8031 
8032 	if (ret >= 0) {
8033 		*ppos += cnt;
8034 		ret = cnt;
8035 	}
8036 
8037 	return ret;
8038 }
8039 
8040 static int tracing_snapshot_release(struct inode *inode, struct file *file)
8041 {
8042 	struct seq_file *m = file->private_data;
8043 	int ret;
8044 
8045 	ret = tracing_release(inode, file);
8046 
8047 	if (file->f_mode & FMODE_READ)
8048 		return ret;
8049 
8050 	/* If write only, the seq_file is just a stub */
8051 	if (m)
8052 		kfree(m->private);
8053 	kfree(m);
8054 
8055 	return 0;
8056 }
8057 
8058 static int tracing_buffers_open(struct inode *inode, struct file *filp);
8059 static ssize_t tracing_buffers_read(struct file *filp, char __user *ubuf,
8060 				    size_t count, loff_t *ppos);
8061 static int tracing_buffers_release(struct inode *inode, struct file *file);
8062 static ssize_t tracing_buffers_splice_read(struct file *file, loff_t *ppos,
8063 		   struct pipe_inode_info *pipe, size_t len, unsigned int flags);
8064 
8065 static int snapshot_raw_open(struct inode *inode, struct file *filp)
8066 {
8067 	struct ftrace_buffer_info *info;
8068 	int ret;
8069 
8070 	/* The following checks for tracefs lockdown */
8071 	ret = tracing_buffers_open(inode, filp);
8072 	if (ret < 0)
8073 		return ret;
8074 
8075 	info = filp->private_data;
8076 
8077 	if (info->iter.trace->use_max_tr) {
8078 		tracing_buffers_release(inode, filp);
8079 		return -EBUSY;
8080 	}
8081 
8082 	info->iter.snapshot = true;
8083 	info->iter.array_buffer = &info->iter.tr->max_buffer;
8084 
8085 	return ret;
8086 }
8087 
8088 #endif /* CONFIG_TRACER_SNAPSHOT */
8089 
8090 
8091 static const struct file_operations tracing_thresh_fops = {
8092 	.open		= tracing_open_generic,
8093 	.read		= tracing_thresh_read,
8094 	.write		= tracing_thresh_write,
8095 	.llseek		= generic_file_llseek,
8096 };
8097 
8098 #ifdef CONFIG_TRACER_MAX_TRACE
8099 static const struct file_operations tracing_max_lat_fops = {
8100 	.open		= tracing_open_generic_tr,
8101 	.read		= tracing_max_lat_read,
8102 	.write		= tracing_max_lat_write,
8103 	.llseek		= generic_file_llseek,
8104 	.release	= tracing_release_generic_tr,
8105 };
8106 #endif
8107 
8108 static const struct file_operations set_tracer_fops = {
8109 	.open		= tracing_open_generic_tr,
8110 	.read		= tracing_set_trace_read,
8111 	.write		= tracing_set_trace_write,
8112 	.llseek		= generic_file_llseek,
8113 	.release	= tracing_release_generic_tr,
8114 };
8115 
8116 static const struct file_operations tracing_pipe_fops = {
8117 	.open		= tracing_open_pipe,
8118 	.poll		= tracing_poll_pipe,
8119 	.read		= tracing_read_pipe,
8120 	.splice_read	= tracing_splice_read_pipe,
8121 	.release	= tracing_release_pipe,
8122 };
8123 
8124 static const struct file_operations tracing_entries_fops = {
8125 	.open		= tracing_open_generic_tr,
8126 	.read		= tracing_entries_read,
8127 	.write		= tracing_entries_write,
8128 	.llseek		= generic_file_llseek,
8129 	.release	= tracing_release_generic_tr,
8130 };
8131 
8132 static const struct file_operations tracing_syscall_buf_fops = {
8133 	.open		= tracing_open_generic_tr,
8134 	.read		= tracing_syscall_buf_read,
8135 	.write		= tracing_syscall_buf_write,
8136 	.llseek		= generic_file_llseek,
8137 	.release	= tracing_release_generic_tr,
8138 };
8139 
8140 static const struct file_operations tracing_buffer_meta_fops = {
8141 	.open		= tracing_buffer_meta_open,
8142 	.read		= seq_read,
8143 	.llseek		= seq_lseek,
8144 	.release	= tracing_seq_release,
8145 };
8146 
8147 static const struct file_operations tracing_total_entries_fops = {
8148 	.open		= tracing_open_generic_tr,
8149 	.read		= tracing_total_entries_read,
8150 	.llseek		= generic_file_llseek,
8151 	.release	= tracing_release_generic_tr,
8152 };
8153 
8154 static const struct file_operations tracing_free_buffer_fops = {
8155 	.open		= tracing_open_generic_tr,
8156 	.write		= tracing_free_buffer_write,
8157 	.release	= tracing_free_buffer_release,
8158 };
8159 
8160 static const struct file_operations tracing_mark_fops = {
8161 	.open		= tracing_mark_open,
8162 	.write		= tracing_mark_write,
8163 	.release	= tracing_mark_release,
8164 };
8165 
8166 static const struct file_operations tracing_mark_raw_fops = {
8167 	.open		= tracing_mark_open,
8168 	.write		= tracing_mark_raw_write,
8169 	.release	= tracing_mark_release,
8170 };
8171 
8172 static const struct file_operations trace_clock_fops = {
8173 	.open		= tracing_clock_open,
8174 	.read		= seq_read,
8175 	.llseek		= seq_lseek,
8176 	.release	= tracing_single_release_tr,
8177 	.write		= tracing_clock_write,
8178 };
8179 
8180 static const struct file_operations trace_time_stamp_mode_fops = {
8181 	.open		= tracing_time_stamp_mode_open,
8182 	.read		= seq_read,
8183 	.llseek		= seq_lseek,
8184 	.release	= tracing_single_release_tr,
8185 };
8186 
8187 static const struct file_operations last_boot_fops = {
8188 	.open		= tracing_last_boot_open,
8189 	.read		= seq_read,
8190 	.llseek		= seq_lseek,
8191 	.release	= tracing_seq_release,
8192 };
8193 
8194 #ifdef CONFIG_TRACER_SNAPSHOT
8195 static const struct file_operations snapshot_fops = {
8196 	.open		= tracing_snapshot_open,
8197 	.read		= seq_read,
8198 	.write		= tracing_snapshot_write,
8199 	.llseek		= tracing_lseek,
8200 	.release	= tracing_snapshot_release,
8201 };
8202 
8203 static const struct file_operations snapshot_raw_fops = {
8204 	.open		= snapshot_raw_open,
8205 	.read		= tracing_buffers_read,
8206 	.release	= tracing_buffers_release,
8207 	.splice_read	= tracing_buffers_splice_read,
8208 };
8209 
8210 #endif /* CONFIG_TRACER_SNAPSHOT */
8211 
8212 /*
8213  * trace_min_max_write - Write a u64 value to a trace_min_max_param struct
8214  * @filp: The active open file structure
8215  * @ubuf: The userspace provided buffer to read value into
8216  * @cnt: The maximum number of bytes to read
8217  * @ppos: The current "file" position
8218  *
8219  * This function implements the write interface for a struct trace_min_max_param.
8220  * The filp->private_data must point to a trace_min_max_param structure that
8221  * defines where to write the value, the min and the max acceptable values,
8222  * and a lock to protect the write.
8223  */
8224 static ssize_t
8225 trace_min_max_write(struct file *filp, const char __user *ubuf, size_t cnt, loff_t *ppos)
8226 {
8227 	struct trace_min_max_param *param = filp->private_data;
8228 	u64 val;
8229 	int err;
8230 
8231 	if (!param)
8232 		return -EFAULT;
8233 
8234 	err = kstrtoull_from_user(ubuf, cnt, 10, &val);
8235 	if (err)
8236 		return err;
8237 
8238 	if (param->lock)
8239 		mutex_lock(param->lock);
8240 
8241 	if (param->min && val < *param->min)
8242 		err = -EINVAL;
8243 
8244 	if (param->max && val > *param->max)
8245 		err = -EINVAL;
8246 
8247 	if (!err)
8248 		*param->val = val;
8249 
8250 	if (param->lock)
8251 		mutex_unlock(param->lock);
8252 
8253 	if (err)
8254 		return err;
8255 
8256 	return cnt;
8257 }
8258 
8259 /*
8260  * trace_min_max_read - Read a u64 value from a trace_min_max_param struct
8261  * @filp: The active open file structure
8262  * @ubuf: The userspace provided buffer to read value into
8263  * @cnt: The maximum number of bytes to read
8264  * @ppos: The current "file" position
8265  *
8266  * This function implements the read interface for a struct trace_min_max_param.
8267  * The filp->private_data must point to a trace_min_max_param struct with valid
8268  * data.
8269  */
8270 static ssize_t
8271 trace_min_max_read(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos)
8272 {
8273 	struct trace_min_max_param *param = filp->private_data;
8274 	char buf[U64_STR_SIZE];
8275 	int len;
8276 	u64 val;
8277 
8278 	if (!param)
8279 		return -EFAULT;
8280 
8281 	val = *param->val;
8282 
8283 	if (cnt > sizeof(buf))
8284 		cnt = sizeof(buf);
8285 
8286 	len = snprintf(buf, sizeof(buf), "%llu\n", val);
8287 
8288 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, len);
8289 }
8290 
8291 const struct file_operations trace_min_max_fops = {
8292 	.open		= tracing_open_generic,
8293 	.read		= trace_min_max_read,
8294 	.write		= trace_min_max_write,
8295 };
8296 
8297 #define TRACING_LOG_ERRS_MAX	8
8298 #define TRACING_LOG_LOC_MAX	128
8299 
8300 #define CMD_PREFIX "  Command: "
8301 
8302 struct err_info {
8303 	const char	**errs;	/* ptr to loc-specific array of err strings */
8304 	u8		type;	/* index into errs -> specific err string */
8305 	u16		pos;	/* caret position */
8306 	u64		ts;
8307 };
8308 
8309 struct tracing_log_err {
8310 	struct list_head	list;
8311 	struct err_info		info;
8312 	char			loc[TRACING_LOG_LOC_MAX]; /* err location */
8313 	char			*cmd;                     /* what caused err */
8314 };
8315 
8316 static DEFINE_MUTEX(tracing_err_log_lock);
8317 
8318 static struct tracing_log_err *alloc_tracing_log_err(int len)
8319 {
8320 	struct tracing_log_err *err;
8321 
8322 	err = kzalloc(sizeof(*err), GFP_KERNEL);
8323 	if (!err)
8324 		return ERR_PTR(-ENOMEM);
8325 
8326 	err->cmd = kzalloc(len, GFP_KERNEL);
8327 	if (!err->cmd) {
8328 		kfree(err);
8329 		return ERR_PTR(-ENOMEM);
8330 	}
8331 
8332 	return err;
8333 }
8334 
8335 static void free_tracing_log_err(struct tracing_log_err *err)
8336 {
8337 	kfree(err->cmd);
8338 	kfree(err);
8339 }
8340 
8341 static struct tracing_log_err *get_tracing_log_err(struct trace_array *tr,
8342 						   int len)
8343 {
8344 	struct tracing_log_err *err;
8345 	char *cmd;
8346 
8347 	if (tr->n_err_log_entries < TRACING_LOG_ERRS_MAX) {
8348 		err = alloc_tracing_log_err(len);
8349 		if (PTR_ERR(err) != -ENOMEM)
8350 			tr->n_err_log_entries++;
8351 
8352 		return err;
8353 	}
8354 	cmd = kzalloc(len, GFP_KERNEL);
8355 	if (!cmd)
8356 		return ERR_PTR(-ENOMEM);
8357 	err = list_first_entry(&tr->err_log, struct tracing_log_err, list);
8358 	kfree(err->cmd);
8359 	err->cmd = cmd;
8360 	list_del(&err->list);
8361 
8362 	return err;
8363 }
8364 
8365 /**
8366  * err_pos - find the position of a string within a command for error careting
8367  * @cmd: The tracing command that caused the error
8368  * @str: The string to position the caret at within @cmd
8369  *
8370  * Finds the position of the first occurrence of @str within @cmd.  The
8371  * return value can be passed to tracing_log_err() for caret placement
8372  * within @cmd.
8373  *
8374  * Returns the index within @cmd of the first occurrence of @str or 0
8375  * if @str was not found.
8376  */
8377 unsigned int err_pos(char *cmd, const char *str)
8378 {
8379 	char *found;
8380 
8381 	if (WARN_ON(!strlen(cmd)))
8382 		return 0;
8383 
8384 	found = strstr(cmd, str);
8385 	if (found)
8386 		return found - cmd;
8387 
8388 	return 0;
8389 }
8390 
8391 /**
8392  * tracing_log_err - write an error to the tracing error log
8393  * @tr: The associated trace array for the error (NULL for top level array)
8394  * @loc: A string describing where the error occurred
8395  * @cmd: The tracing command that caused the error
8396  * @errs: The array of loc-specific static error strings
8397  * @type: The index into errs[], which produces the specific static err string
8398  * @pos: The position the caret should be placed in the cmd
8399  *
8400  * Writes an error into tracing/error_log of the form:
8401  *
8402  * <loc>: error: <text>
8403  *   Command: <cmd>
8404  *              ^
8405  *
8406  * tracing/error_log is a small log file containing the last
8407  * TRACING_LOG_ERRS_MAX errors (8).  Memory for errors isn't allocated
8408  * unless there has been a tracing error, and the error log can be
8409  * cleared and have its memory freed by writing the empty string in
8410  * truncation mode to it i.e. echo > tracing/error_log.
8411  *
8412  * NOTE: the @errs array along with the @type param are used to
8413  * produce a static error string - this string is not copied and saved
8414  * when the error is logged - only a pointer to it is saved.  See
8415  * existing callers for examples of how static strings are typically
8416  * defined for use with tracing_log_err().
8417  */
8418 void tracing_log_err(struct trace_array *tr,
8419 		     const char *loc, const char *cmd,
8420 		     const char **errs, u8 type, u16 pos)
8421 {
8422 	struct tracing_log_err *err;
8423 	int len = 0;
8424 
8425 	if (!tr)
8426 		tr = &global_trace;
8427 
8428 	len += sizeof(CMD_PREFIX) + 2 * sizeof("\n") + strlen(cmd) + 1;
8429 
8430 	guard(mutex)(&tracing_err_log_lock);
8431 
8432 	err = get_tracing_log_err(tr, len);
8433 	if (PTR_ERR(err) == -ENOMEM)
8434 		return;
8435 
8436 	snprintf(err->loc, TRACING_LOG_LOC_MAX, "%s: error: ", loc);
8437 	snprintf(err->cmd, len, "\n" CMD_PREFIX "%s\n", cmd);
8438 
8439 	err->info.errs = errs;
8440 	err->info.type = type;
8441 	err->info.pos = pos;
8442 	err->info.ts = local_clock();
8443 
8444 	list_add_tail(&err->list, &tr->err_log);
8445 }
8446 
8447 static void clear_tracing_err_log(struct trace_array *tr)
8448 {
8449 	struct tracing_log_err *err, *next;
8450 
8451 	guard(mutex)(&tracing_err_log_lock);
8452 
8453 	list_for_each_entry_safe(err, next, &tr->err_log, list) {
8454 		list_del(&err->list);
8455 		free_tracing_log_err(err);
8456 	}
8457 
8458 	tr->n_err_log_entries = 0;
8459 }
8460 
8461 static void *tracing_err_log_seq_start(struct seq_file *m, loff_t *pos)
8462 {
8463 	struct trace_array *tr = m->private;
8464 
8465 	mutex_lock(&tracing_err_log_lock);
8466 
8467 	return seq_list_start(&tr->err_log, *pos);
8468 }
8469 
8470 static void *tracing_err_log_seq_next(struct seq_file *m, void *v, loff_t *pos)
8471 {
8472 	struct trace_array *tr = m->private;
8473 
8474 	return seq_list_next(v, &tr->err_log, pos);
8475 }
8476 
8477 static void tracing_err_log_seq_stop(struct seq_file *m, void *v)
8478 {
8479 	mutex_unlock(&tracing_err_log_lock);
8480 }
8481 
8482 static void tracing_err_log_show_pos(struct seq_file *m, u16 pos)
8483 {
8484 	u16 i;
8485 
8486 	for (i = 0; i < sizeof(CMD_PREFIX) - 1; i++)
8487 		seq_putc(m, ' ');
8488 	for (i = 0; i < pos; i++)
8489 		seq_putc(m, ' ');
8490 	seq_puts(m, "^\n");
8491 }
8492 
8493 static int tracing_err_log_seq_show(struct seq_file *m, void *v)
8494 {
8495 	struct tracing_log_err *err = v;
8496 
8497 	if (err) {
8498 		const char *err_text = err->info.errs[err->info.type];
8499 		u64 sec = err->info.ts;
8500 		u32 nsec;
8501 
8502 		nsec = do_div(sec, NSEC_PER_SEC);
8503 		seq_printf(m, "[%5llu.%06u] %s%s", sec, nsec / 1000,
8504 			   err->loc, err_text);
8505 		seq_printf(m, "%s", err->cmd);
8506 		tracing_err_log_show_pos(m, err->info.pos);
8507 	}
8508 
8509 	return 0;
8510 }
8511 
8512 static const struct seq_operations tracing_err_log_seq_ops = {
8513 	.start  = tracing_err_log_seq_start,
8514 	.next   = tracing_err_log_seq_next,
8515 	.stop   = tracing_err_log_seq_stop,
8516 	.show   = tracing_err_log_seq_show
8517 };
8518 
8519 static int tracing_err_log_open(struct inode *inode, struct file *file)
8520 {
8521 	struct trace_array *tr = inode->i_private;
8522 	int ret = 0;
8523 
8524 	ret = tracing_check_open_get_tr(tr);
8525 	if (ret)
8526 		return ret;
8527 
8528 	/* If this file was opened for write, then erase contents */
8529 	if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC))
8530 		clear_tracing_err_log(tr);
8531 
8532 	if (file->f_mode & FMODE_READ) {
8533 		ret = seq_open(file, &tracing_err_log_seq_ops);
8534 		if (!ret) {
8535 			struct seq_file *m = file->private_data;
8536 			m->private = tr;
8537 		} else {
8538 			trace_array_put(tr);
8539 		}
8540 	}
8541 	return ret;
8542 }
8543 
8544 static ssize_t tracing_err_log_write(struct file *file,
8545 				     const char __user *buffer,
8546 				     size_t count, loff_t *ppos)
8547 {
8548 	return count;
8549 }
8550 
8551 static int tracing_err_log_release(struct inode *inode, struct file *file)
8552 {
8553 	struct trace_array *tr = inode->i_private;
8554 
8555 	trace_array_put(tr);
8556 
8557 	if (file->f_mode & FMODE_READ)
8558 		seq_release(inode, file);
8559 
8560 	return 0;
8561 }
8562 
8563 static const struct file_operations tracing_err_log_fops = {
8564 	.open           = tracing_err_log_open,
8565 	.write		= tracing_err_log_write,
8566 	.read           = seq_read,
8567 	.llseek         = tracing_lseek,
8568 	.release        = tracing_err_log_release,
8569 };
8570 
8571 static int tracing_buffers_open(struct inode *inode, struct file *filp)
8572 {
8573 	struct trace_array *tr = inode->i_private;
8574 	struct ftrace_buffer_info *info;
8575 	int ret;
8576 
8577 	ret = tracing_check_open_get_tr(tr);
8578 	if (ret)
8579 		return ret;
8580 
8581 	info = kvzalloc(sizeof(*info), GFP_KERNEL);
8582 	if (!info) {
8583 		trace_array_put(tr);
8584 		return -ENOMEM;
8585 	}
8586 
8587 	mutex_lock(&trace_types_lock);
8588 
8589 	info->iter.tr		= tr;
8590 	info->iter.cpu_file	= tracing_get_cpu(inode);
8591 	info->iter.trace	= tr->current_trace;
8592 	info->iter.array_buffer = &tr->array_buffer;
8593 	info->spare		= NULL;
8594 	/* Force reading ring buffer for first read */
8595 	info->read		= (unsigned int)-1;
8596 
8597 	filp->private_data = info;
8598 
8599 	tr->trace_ref++;
8600 
8601 	mutex_unlock(&trace_types_lock);
8602 
8603 	ret = nonseekable_open(inode, filp);
8604 	if (ret < 0)
8605 		trace_array_put(tr);
8606 
8607 	return ret;
8608 }
8609 
8610 static __poll_t
8611 tracing_buffers_poll(struct file *filp, poll_table *poll_table)
8612 {
8613 	struct ftrace_buffer_info *info = filp->private_data;
8614 	struct trace_iterator *iter = &info->iter;
8615 
8616 	return trace_poll(iter, filp, poll_table);
8617 }
8618 
8619 static ssize_t
8620 tracing_buffers_read(struct file *filp, char __user *ubuf,
8621 		     size_t count, loff_t *ppos)
8622 {
8623 	struct ftrace_buffer_info *info = filp->private_data;
8624 	struct trace_iterator *iter = &info->iter;
8625 	void *trace_data;
8626 	int page_size;
8627 	ssize_t ret = 0;
8628 	ssize_t size;
8629 
8630 	if (!count)
8631 		return 0;
8632 
8633 #ifdef CONFIG_TRACER_MAX_TRACE
8634 	if (iter->snapshot && iter->tr->current_trace->use_max_tr)
8635 		return -EBUSY;
8636 #endif
8637 
8638 	page_size = ring_buffer_subbuf_size_get(iter->array_buffer->buffer);
8639 
8640 	/* Make sure the spare matches the current sub buffer size */
8641 	if (info->spare) {
8642 		if (page_size != info->spare_size) {
8643 			ring_buffer_free_read_page(iter->array_buffer->buffer,
8644 						   info->spare_cpu, info->spare);
8645 			info->spare = NULL;
8646 		}
8647 	}
8648 
8649 	if (!info->spare) {
8650 		info->spare = ring_buffer_alloc_read_page(iter->array_buffer->buffer,
8651 							  iter->cpu_file);
8652 		if (IS_ERR(info->spare)) {
8653 			ret = PTR_ERR(info->spare);
8654 			info->spare = NULL;
8655 		} else {
8656 			info->spare_cpu = iter->cpu_file;
8657 			info->spare_size = page_size;
8658 		}
8659 	}
8660 	if (!info->spare)
8661 		return ret;
8662 
8663 	/* Do we have previous read data to read? */
8664 	if (info->read < page_size)
8665 		goto read;
8666 
8667  again:
8668 	trace_access_lock(iter->cpu_file);
8669 	ret = ring_buffer_read_page(iter->array_buffer->buffer,
8670 				    info->spare,
8671 				    count,
8672 				    iter->cpu_file, 0);
8673 	trace_access_unlock(iter->cpu_file);
8674 
8675 	if (ret < 0) {
8676 		if (trace_empty(iter) && !iter->closed) {
8677 			if (update_last_data_if_empty(iter->tr))
8678 				return 0;
8679 
8680 			if ((filp->f_flags & O_NONBLOCK))
8681 				return -EAGAIN;
8682 
8683 			ret = wait_on_pipe(iter, 0);
8684 			if (ret)
8685 				return ret;
8686 
8687 			goto again;
8688 		}
8689 		return 0;
8690 	}
8691 
8692 	info->read = 0;
8693  read:
8694 	size = page_size - info->read;
8695 	if (size > count)
8696 		size = count;
8697 	trace_data = ring_buffer_read_page_data(info->spare);
8698 	ret = copy_to_user(ubuf, trace_data + info->read, size);
8699 	if (ret == size)
8700 		return -EFAULT;
8701 
8702 	size -= ret;
8703 
8704 	*ppos += size;
8705 	info->read += size;
8706 
8707 	return size;
8708 }
8709 
8710 static int tracing_buffers_flush(struct file *file, fl_owner_t id)
8711 {
8712 	struct ftrace_buffer_info *info = file->private_data;
8713 	struct trace_iterator *iter = &info->iter;
8714 
8715 	iter->closed = true;
8716 	/* Make sure the waiters see the new wait_index */
8717 	(void)atomic_fetch_inc_release(&iter->wait_index);
8718 
8719 	ring_buffer_wake_waiters(iter->array_buffer->buffer, iter->cpu_file);
8720 
8721 	return 0;
8722 }
8723 
8724 static int tracing_buffers_release(struct inode *inode, struct file *file)
8725 {
8726 	struct ftrace_buffer_info *info = file->private_data;
8727 	struct trace_iterator *iter = &info->iter;
8728 
8729 	guard(mutex)(&trace_types_lock);
8730 
8731 	iter->tr->trace_ref--;
8732 
8733 	__trace_array_put(iter->tr);
8734 
8735 	if (info->spare)
8736 		ring_buffer_free_read_page(iter->array_buffer->buffer,
8737 					   info->spare_cpu, info->spare);
8738 	kvfree(info);
8739 
8740 	return 0;
8741 }
8742 
8743 struct buffer_ref {
8744 	struct trace_buffer	*buffer;
8745 	void			*page;
8746 	int			cpu;
8747 	refcount_t		refcount;
8748 };
8749 
8750 static void buffer_ref_release(struct buffer_ref *ref)
8751 {
8752 	if (!refcount_dec_and_test(&ref->refcount))
8753 		return;
8754 	ring_buffer_free_read_page(ref->buffer, ref->cpu, ref->page);
8755 	kfree(ref);
8756 }
8757 
8758 static void buffer_pipe_buf_release(struct pipe_inode_info *pipe,
8759 				    struct pipe_buffer *buf)
8760 {
8761 	struct buffer_ref *ref = (struct buffer_ref *)buf->private;
8762 
8763 	buffer_ref_release(ref);
8764 	buf->private = 0;
8765 }
8766 
8767 static bool buffer_pipe_buf_get(struct pipe_inode_info *pipe,
8768 				struct pipe_buffer *buf)
8769 {
8770 	struct buffer_ref *ref = (struct buffer_ref *)buf->private;
8771 
8772 	if (refcount_read(&ref->refcount) > INT_MAX/2)
8773 		return false;
8774 
8775 	refcount_inc(&ref->refcount);
8776 	return true;
8777 }
8778 
8779 /* Pipe buffer operations for a buffer. */
8780 static const struct pipe_buf_operations buffer_pipe_buf_ops = {
8781 	.release		= buffer_pipe_buf_release,
8782 	.get			= buffer_pipe_buf_get,
8783 };
8784 
8785 /*
8786  * Callback from splice_to_pipe(), if we need to release some pages
8787  * at the end of the spd in case we error'ed out in filling the pipe.
8788  */
8789 static void buffer_spd_release(struct splice_pipe_desc *spd, unsigned int i)
8790 {
8791 	struct buffer_ref *ref =
8792 		(struct buffer_ref *)spd->partial[i].private;
8793 
8794 	buffer_ref_release(ref);
8795 	spd->partial[i].private = 0;
8796 }
8797 
8798 static ssize_t
8799 tracing_buffers_splice_read(struct file *file, loff_t *ppos,
8800 			    struct pipe_inode_info *pipe, size_t len,
8801 			    unsigned int flags)
8802 {
8803 	struct ftrace_buffer_info *info = file->private_data;
8804 	struct trace_iterator *iter = &info->iter;
8805 	struct partial_page partial_def[PIPE_DEF_BUFFERS];
8806 	struct page *pages_def[PIPE_DEF_BUFFERS];
8807 	struct splice_pipe_desc spd = {
8808 		.pages		= pages_def,
8809 		.partial	= partial_def,
8810 		.nr_pages_max	= PIPE_DEF_BUFFERS,
8811 		.ops		= &buffer_pipe_buf_ops,
8812 		.spd_release	= buffer_spd_release,
8813 	};
8814 	struct buffer_ref *ref;
8815 	bool woken = false;
8816 	int page_size;
8817 	int entries, i;
8818 	ssize_t ret = 0;
8819 
8820 #ifdef CONFIG_TRACER_MAX_TRACE
8821 	if (iter->snapshot && iter->tr->current_trace->use_max_tr)
8822 		return -EBUSY;
8823 #endif
8824 
8825 	page_size = ring_buffer_subbuf_size_get(iter->array_buffer->buffer);
8826 	if (*ppos & (page_size - 1))
8827 		return -EINVAL;
8828 
8829 	if (len & (page_size - 1)) {
8830 		if (len < page_size)
8831 			return -EINVAL;
8832 		len &= (~(page_size - 1));
8833 	}
8834 
8835 	if (splice_grow_spd(pipe, &spd))
8836 		return -ENOMEM;
8837 
8838  again:
8839 	trace_access_lock(iter->cpu_file);
8840 	entries = ring_buffer_entries_cpu(iter->array_buffer->buffer, iter->cpu_file);
8841 
8842 	for (i = 0; i < spd.nr_pages_max && len && entries; i++, len -= page_size) {
8843 		struct page *page;
8844 		int r;
8845 
8846 		ref = kzalloc(sizeof(*ref), GFP_KERNEL);
8847 		if (!ref) {
8848 			ret = -ENOMEM;
8849 			break;
8850 		}
8851 
8852 		refcount_set(&ref->refcount, 1);
8853 		ref->buffer = iter->array_buffer->buffer;
8854 		ref->page = ring_buffer_alloc_read_page(ref->buffer, iter->cpu_file);
8855 		if (IS_ERR(ref->page)) {
8856 			ret = PTR_ERR(ref->page);
8857 			ref->page = NULL;
8858 			kfree(ref);
8859 			break;
8860 		}
8861 		ref->cpu = iter->cpu_file;
8862 
8863 		r = ring_buffer_read_page(ref->buffer, ref->page,
8864 					  len, iter->cpu_file, 1);
8865 		if (r < 0) {
8866 			ring_buffer_free_read_page(ref->buffer, ref->cpu,
8867 						   ref->page);
8868 			kfree(ref);
8869 			break;
8870 		}
8871 
8872 		page = virt_to_page(ring_buffer_read_page_data(ref->page));
8873 
8874 		spd.pages[i] = page;
8875 		spd.partial[i].len = page_size;
8876 		spd.partial[i].offset = 0;
8877 		spd.partial[i].private = (unsigned long)ref;
8878 		spd.nr_pages++;
8879 		*ppos += page_size;
8880 
8881 		entries = ring_buffer_entries_cpu(iter->array_buffer->buffer, iter->cpu_file);
8882 	}
8883 
8884 	trace_access_unlock(iter->cpu_file);
8885 	spd.nr_pages = i;
8886 
8887 	/* did we read anything? */
8888 	if (!spd.nr_pages) {
8889 
8890 		if (ret)
8891 			goto out;
8892 
8893 		if (woken)
8894 			goto out;
8895 
8896 		ret = -EAGAIN;
8897 		if ((file->f_flags & O_NONBLOCK) || (flags & SPLICE_F_NONBLOCK))
8898 			goto out;
8899 
8900 		ret = wait_on_pipe(iter, iter->snapshot ? 0 : iter->tr->buffer_percent);
8901 		if (ret)
8902 			goto out;
8903 
8904 		/* No need to wait after waking up when tracing is off */
8905 		if (!tracer_tracing_is_on(iter->tr))
8906 			goto out;
8907 
8908 		/* Iterate one more time to collect any new data then exit */
8909 		woken = true;
8910 
8911 		goto again;
8912 	}
8913 
8914 	ret = splice_to_pipe(pipe, &spd);
8915 out:
8916 	splice_shrink_spd(&spd);
8917 
8918 	return ret;
8919 }
8920 
8921 static long tracing_buffers_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
8922 {
8923 	struct ftrace_buffer_info *info = file->private_data;
8924 	struct trace_iterator *iter = &info->iter;
8925 	int err;
8926 
8927 	if (cmd == TRACE_MMAP_IOCTL_GET_READER) {
8928 		if (!(file->f_flags & O_NONBLOCK)) {
8929 			err = ring_buffer_wait(iter->array_buffer->buffer,
8930 					       iter->cpu_file,
8931 					       iter->tr->buffer_percent,
8932 					       NULL, NULL);
8933 			if (err)
8934 				return err;
8935 		}
8936 
8937 		return ring_buffer_map_get_reader(iter->array_buffer->buffer,
8938 						  iter->cpu_file);
8939 	} else if (cmd) {
8940 		return -ENOTTY;
8941 	}
8942 
8943 	/*
8944 	 * An ioctl call with cmd 0 to the ring buffer file will wake up all
8945 	 * waiters
8946 	 */
8947 	guard(mutex)(&trace_types_lock);
8948 
8949 	/* Make sure the waiters see the new wait_index */
8950 	(void)atomic_fetch_inc_release(&iter->wait_index);
8951 
8952 	ring_buffer_wake_waiters(iter->array_buffer->buffer, iter->cpu_file);
8953 
8954 	return 0;
8955 }
8956 
8957 #ifdef CONFIG_TRACER_MAX_TRACE
8958 static int get_snapshot_map(struct trace_array *tr)
8959 {
8960 	int err = 0;
8961 
8962 	/*
8963 	 * Called with mmap_lock held. lockdep would be unhappy if we would now
8964 	 * take trace_types_lock. Instead use the specific
8965 	 * snapshot_trigger_lock.
8966 	 */
8967 	spin_lock(&tr->snapshot_trigger_lock);
8968 
8969 	if (tr->snapshot || tr->mapped == UINT_MAX)
8970 		err = -EBUSY;
8971 	else
8972 		tr->mapped++;
8973 
8974 	spin_unlock(&tr->snapshot_trigger_lock);
8975 
8976 	/* Wait for update_max_tr() to observe iter->tr->mapped */
8977 	if (tr->mapped == 1)
8978 		synchronize_rcu();
8979 
8980 	return err;
8981 
8982 }
8983 static void put_snapshot_map(struct trace_array *tr)
8984 {
8985 	spin_lock(&tr->snapshot_trigger_lock);
8986 	if (!WARN_ON(!tr->mapped))
8987 		tr->mapped--;
8988 	spin_unlock(&tr->snapshot_trigger_lock);
8989 }
8990 #else
8991 static inline int get_snapshot_map(struct trace_array *tr) { return 0; }
8992 static inline void put_snapshot_map(struct trace_array *tr) { }
8993 #endif
8994 
8995 static void tracing_buffers_mmap_close(struct vm_area_struct *vma)
8996 {
8997 	struct ftrace_buffer_info *info = vma->vm_file->private_data;
8998 	struct trace_iterator *iter = &info->iter;
8999 
9000 	WARN_ON(ring_buffer_unmap(iter->array_buffer->buffer, iter->cpu_file));
9001 	put_snapshot_map(iter->tr);
9002 }
9003 
9004 static int tracing_buffers_may_split(struct vm_area_struct *vma, unsigned long addr)
9005 {
9006 	/*
9007 	 * Trace buffer mappings require the complete buffer including
9008 	 * the meta page. Partial mappings are not supported.
9009 	 */
9010 	return -EINVAL;
9011 }
9012 
9013 static const struct vm_operations_struct tracing_buffers_vmops = {
9014 	.close		= tracing_buffers_mmap_close,
9015 	.may_split      = tracing_buffers_may_split,
9016 };
9017 
9018 static int tracing_buffers_mmap(struct file *filp, struct vm_area_struct *vma)
9019 {
9020 	struct ftrace_buffer_info *info = filp->private_data;
9021 	struct trace_iterator *iter = &info->iter;
9022 	int ret = 0;
9023 
9024 	/* A memmap'ed and backup buffers are not supported for user space mmap */
9025 	if (iter->tr->flags & (TRACE_ARRAY_FL_MEMMAP | TRACE_ARRAY_FL_VMALLOC))
9026 		return -ENODEV;
9027 
9028 	ret = get_snapshot_map(iter->tr);
9029 	if (ret)
9030 		return ret;
9031 
9032 	ret = ring_buffer_map(iter->array_buffer->buffer, iter->cpu_file, vma);
9033 	if (ret)
9034 		put_snapshot_map(iter->tr);
9035 
9036 	vma->vm_ops = &tracing_buffers_vmops;
9037 
9038 	return ret;
9039 }
9040 
9041 static const struct file_operations tracing_buffers_fops = {
9042 	.open		= tracing_buffers_open,
9043 	.read		= tracing_buffers_read,
9044 	.poll		= tracing_buffers_poll,
9045 	.release	= tracing_buffers_release,
9046 	.flush		= tracing_buffers_flush,
9047 	.splice_read	= tracing_buffers_splice_read,
9048 	.unlocked_ioctl = tracing_buffers_ioctl,
9049 	.mmap		= tracing_buffers_mmap,
9050 };
9051 
9052 static ssize_t
9053 tracing_stats_read(struct file *filp, char __user *ubuf,
9054 		   size_t count, loff_t *ppos)
9055 {
9056 	struct inode *inode = file_inode(filp);
9057 	struct trace_array *tr = inode->i_private;
9058 	struct array_buffer *trace_buf = &tr->array_buffer;
9059 	int cpu = tracing_get_cpu(inode);
9060 	struct trace_seq *s;
9061 	unsigned long cnt;
9062 	unsigned long long t;
9063 	unsigned long usec_rem;
9064 
9065 	s = kmalloc(sizeof(*s), GFP_KERNEL);
9066 	if (!s)
9067 		return -ENOMEM;
9068 
9069 	trace_seq_init(s);
9070 
9071 	cnt = ring_buffer_entries_cpu(trace_buf->buffer, cpu);
9072 	trace_seq_printf(s, "entries: %ld\n", cnt);
9073 
9074 	cnt = ring_buffer_overrun_cpu(trace_buf->buffer, cpu);
9075 	trace_seq_printf(s, "overrun: %ld\n", cnt);
9076 
9077 	cnt = ring_buffer_commit_overrun_cpu(trace_buf->buffer, cpu);
9078 	trace_seq_printf(s, "commit overrun: %ld\n", cnt);
9079 
9080 	cnt = ring_buffer_bytes_cpu(trace_buf->buffer, cpu);
9081 	trace_seq_printf(s, "bytes: %ld\n", cnt);
9082 
9083 	if (trace_clocks[tr->clock_id].in_ns) {
9084 		/* local or global for trace_clock */
9085 		t = ns2usecs(ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
9086 		usec_rem = do_div(t, USEC_PER_SEC);
9087 		trace_seq_printf(s, "oldest event ts: %5llu.%06lu\n",
9088 								t, usec_rem);
9089 
9090 		t = ns2usecs(ring_buffer_time_stamp(trace_buf->buffer));
9091 		usec_rem = do_div(t, USEC_PER_SEC);
9092 		trace_seq_printf(s, "now ts: %5llu.%06lu\n", t, usec_rem);
9093 	} else {
9094 		/* counter or tsc mode for trace_clock */
9095 		trace_seq_printf(s, "oldest event ts: %llu\n",
9096 				ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
9097 
9098 		trace_seq_printf(s, "now ts: %llu\n",
9099 				ring_buffer_time_stamp(trace_buf->buffer));
9100 	}
9101 
9102 	cnt = ring_buffer_dropped_events_cpu(trace_buf->buffer, cpu);
9103 	trace_seq_printf(s, "dropped events: %ld\n", cnt);
9104 
9105 	cnt = ring_buffer_read_events_cpu(trace_buf->buffer, cpu);
9106 	trace_seq_printf(s, "read events: %ld\n", cnt);
9107 
9108 	count = simple_read_from_buffer(ubuf, count, ppos,
9109 					s->buffer, trace_seq_used(s));
9110 
9111 	kfree(s);
9112 
9113 	return count;
9114 }
9115 
9116 static const struct file_operations tracing_stats_fops = {
9117 	.open		= tracing_open_generic_tr,
9118 	.read		= tracing_stats_read,
9119 	.llseek		= generic_file_llseek,
9120 	.release	= tracing_release_generic_tr,
9121 };
9122 
9123 #ifdef CONFIG_DYNAMIC_FTRACE
9124 
9125 static ssize_t
9126 tracing_read_dyn_info(struct file *filp, char __user *ubuf,
9127 		  size_t cnt, loff_t *ppos)
9128 {
9129 	ssize_t ret;
9130 	char *buf;
9131 	int r;
9132 
9133 	/* 512 should be plenty to hold the amount needed */
9134 #define DYN_INFO_BUF_SIZE	512
9135 
9136 	buf = kmalloc(DYN_INFO_BUF_SIZE, GFP_KERNEL);
9137 	if (!buf)
9138 		return -ENOMEM;
9139 
9140 	r = scnprintf(buf, DYN_INFO_BUF_SIZE,
9141 		      "%ld pages:%ld groups: %ld\n"
9142 		      "ftrace boot update time = %llu (ns)\n"
9143 		      "ftrace module total update time = %llu (ns)\n",
9144 		      ftrace_update_tot_cnt,
9145 		      ftrace_number_of_pages,
9146 		      ftrace_number_of_groups,
9147 		      ftrace_update_time,
9148 		      ftrace_total_mod_time);
9149 
9150 	ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
9151 	kfree(buf);
9152 	return ret;
9153 }
9154 
9155 static const struct file_operations tracing_dyn_info_fops = {
9156 	.open		= tracing_open_generic,
9157 	.read		= tracing_read_dyn_info,
9158 	.llseek		= generic_file_llseek,
9159 };
9160 #endif /* CONFIG_DYNAMIC_FTRACE */
9161 
9162 #if defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE)
9163 static void
9164 ftrace_snapshot(unsigned long ip, unsigned long parent_ip,
9165 		struct trace_array *tr, struct ftrace_probe_ops *ops,
9166 		void *data)
9167 {
9168 	tracing_snapshot_instance(tr);
9169 }
9170 
9171 static void
9172 ftrace_count_snapshot(unsigned long ip, unsigned long parent_ip,
9173 		      struct trace_array *tr, struct ftrace_probe_ops *ops,
9174 		      void *data)
9175 {
9176 	struct ftrace_func_mapper *mapper = data;
9177 	long *count = NULL;
9178 
9179 	if (mapper)
9180 		count = (long *)ftrace_func_mapper_find_ip(mapper, ip);
9181 
9182 	if (count) {
9183 
9184 		if (*count <= 0)
9185 			return;
9186 
9187 		(*count)--;
9188 	}
9189 
9190 	tracing_snapshot_instance(tr);
9191 }
9192 
9193 static int
9194 ftrace_snapshot_print(struct seq_file *m, unsigned long ip,
9195 		      struct ftrace_probe_ops *ops, void *data)
9196 {
9197 	struct ftrace_func_mapper *mapper = data;
9198 	long *count = NULL;
9199 
9200 	seq_printf(m, "%ps:", (void *)ip);
9201 
9202 	seq_puts(m, "snapshot");
9203 
9204 	if (mapper)
9205 		count = (long *)ftrace_func_mapper_find_ip(mapper, ip);
9206 
9207 	if (count)
9208 		seq_printf(m, ":count=%ld\n", *count);
9209 	else
9210 		seq_puts(m, ":unlimited\n");
9211 
9212 	return 0;
9213 }
9214 
9215 static int
9216 ftrace_snapshot_init(struct ftrace_probe_ops *ops, struct trace_array *tr,
9217 		     unsigned long ip, void *init_data, void **data)
9218 {
9219 	struct ftrace_func_mapper *mapper = *data;
9220 
9221 	if (!mapper) {
9222 		mapper = allocate_ftrace_func_mapper();
9223 		if (!mapper)
9224 			return -ENOMEM;
9225 		*data = mapper;
9226 	}
9227 
9228 	return ftrace_func_mapper_add_ip(mapper, ip, init_data);
9229 }
9230 
9231 static void
9232 ftrace_snapshot_free(struct ftrace_probe_ops *ops, struct trace_array *tr,
9233 		     unsigned long ip, void *data)
9234 {
9235 	struct ftrace_func_mapper *mapper = data;
9236 
9237 	if (!ip) {
9238 		if (!mapper)
9239 			return;
9240 		free_ftrace_func_mapper(mapper, NULL);
9241 		return;
9242 	}
9243 
9244 	ftrace_func_mapper_remove_ip(mapper, ip);
9245 }
9246 
9247 static struct ftrace_probe_ops snapshot_probe_ops = {
9248 	.func			= ftrace_snapshot,
9249 	.print			= ftrace_snapshot_print,
9250 };
9251 
9252 static struct ftrace_probe_ops snapshot_count_probe_ops = {
9253 	.func			= ftrace_count_snapshot,
9254 	.print			= ftrace_snapshot_print,
9255 	.init			= ftrace_snapshot_init,
9256 	.free			= ftrace_snapshot_free,
9257 };
9258 
9259 static int
9260 ftrace_trace_snapshot_callback(struct trace_array *tr, struct ftrace_hash *hash,
9261 			       char *glob, char *cmd, char *param, int enable)
9262 {
9263 	struct ftrace_probe_ops *ops;
9264 	void *count = (void *)-1;
9265 	char *number;
9266 	int ret;
9267 
9268 	if (!tr)
9269 		return -ENODEV;
9270 
9271 	/* hash funcs only work with set_ftrace_filter */
9272 	if (!enable)
9273 		return -EINVAL;
9274 
9275 	ops = param ? &snapshot_count_probe_ops :  &snapshot_probe_ops;
9276 
9277 	if (glob[0] == '!') {
9278 		ret = unregister_ftrace_function_probe_func(glob+1, tr, ops);
9279 		if (!ret)
9280 			tracing_disarm_snapshot(tr);
9281 
9282 		return ret;
9283 	}
9284 
9285 	if (!param)
9286 		goto out_reg;
9287 
9288 	number = strsep(&param, ":");
9289 
9290 	if (!strlen(number))
9291 		goto out_reg;
9292 
9293 	/*
9294 	 * We use the callback data field (which is a pointer)
9295 	 * as our counter.
9296 	 */
9297 	ret = kstrtoul(number, 0, (unsigned long *)&count);
9298 	if (ret)
9299 		return ret;
9300 
9301  out_reg:
9302 	ret = tracing_arm_snapshot(tr);
9303 	if (ret < 0)
9304 		return ret;
9305 
9306 	ret = register_ftrace_function_probe(glob, tr, ops, count);
9307 	if (ret < 0)
9308 		tracing_disarm_snapshot(tr);
9309 
9310 	return ret < 0 ? ret : 0;
9311 }
9312 
9313 static struct ftrace_func_command ftrace_snapshot_cmd = {
9314 	.name			= "snapshot",
9315 	.func			= ftrace_trace_snapshot_callback,
9316 };
9317 
9318 static __init int register_snapshot_cmd(void)
9319 {
9320 	return register_ftrace_command(&ftrace_snapshot_cmd);
9321 }
9322 #else
9323 static inline __init int register_snapshot_cmd(void) { return 0; }
9324 #endif /* defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE) */
9325 
9326 static struct dentry *tracing_get_dentry(struct trace_array *tr)
9327 {
9328 	/* Top directory uses NULL as the parent */
9329 	if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
9330 		return NULL;
9331 
9332 	if (WARN_ON(!tr->dir))
9333 		return ERR_PTR(-ENODEV);
9334 
9335 	/* All sub buffers have a descriptor */
9336 	return tr->dir;
9337 }
9338 
9339 static struct dentry *tracing_dentry_percpu(struct trace_array *tr, int cpu)
9340 {
9341 	struct dentry *d_tracer;
9342 
9343 	if (tr->percpu_dir)
9344 		return tr->percpu_dir;
9345 
9346 	d_tracer = tracing_get_dentry(tr);
9347 	if (IS_ERR(d_tracer))
9348 		return NULL;
9349 
9350 	tr->percpu_dir = tracefs_create_dir("per_cpu", d_tracer);
9351 
9352 	MEM_FAIL(!tr->percpu_dir,
9353 		  "Could not create tracefs directory 'per_cpu/%d'\n", cpu);
9354 
9355 	return tr->percpu_dir;
9356 }
9357 
9358 static struct dentry *
9359 trace_create_cpu_file(const char *name, umode_t mode, struct dentry *parent,
9360 		      void *data, long cpu, const struct file_operations *fops)
9361 {
9362 	struct dentry *ret = trace_create_file(name, mode, parent, data, fops);
9363 
9364 	if (ret) /* See tracing_get_cpu() */
9365 		d_inode(ret)->i_cdev = (void *)(cpu + 1);
9366 	return ret;
9367 }
9368 
9369 static void
9370 tracing_init_tracefs_percpu(struct trace_array *tr, long cpu)
9371 {
9372 	struct dentry *d_percpu = tracing_dentry_percpu(tr, cpu);
9373 	struct dentry *d_cpu;
9374 	char cpu_dir[30]; /* 30 characters should be more than enough */
9375 
9376 	if (!d_percpu)
9377 		return;
9378 
9379 	snprintf(cpu_dir, 30, "cpu%ld", cpu);
9380 	d_cpu = tracefs_create_dir(cpu_dir, d_percpu);
9381 	if (!d_cpu) {
9382 		pr_warn("Could not create tracefs '%s' entry\n", cpu_dir);
9383 		return;
9384 	}
9385 
9386 	/* per cpu trace_pipe */
9387 	trace_create_cpu_file("trace_pipe", TRACE_MODE_READ, d_cpu,
9388 				tr, cpu, &tracing_pipe_fops);
9389 
9390 	/* per cpu trace */
9391 	trace_create_cpu_file("trace", TRACE_MODE_WRITE, d_cpu,
9392 				tr, cpu, &tracing_fops);
9393 
9394 	trace_create_cpu_file("trace_pipe_raw", TRACE_MODE_READ, d_cpu,
9395 				tr, cpu, &tracing_buffers_fops);
9396 
9397 	trace_create_cpu_file("stats", TRACE_MODE_READ, d_cpu,
9398 				tr, cpu, &tracing_stats_fops);
9399 
9400 	trace_create_cpu_file("buffer_size_kb", TRACE_MODE_READ, d_cpu,
9401 				tr, cpu, &tracing_entries_fops);
9402 
9403 	if (tr->range_addr_start)
9404 		trace_create_cpu_file("buffer_meta", TRACE_MODE_READ, d_cpu,
9405 				      tr, cpu, &tracing_buffer_meta_fops);
9406 #ifdef CONFIG_TRACER_SNAPSHOT
9407 	if (!tr->range_addr_start) {
9408 		trace_create_cpu_file("snapshot", TRACE_MODE_WRITE, d_cpu,
9409 				      tr, cpu, &snapshot_fops);
9410 
9411 		trace_create_cpu_file("snapshot_raw", TRACE_MODE_READ, d_cpu,
9412 				      tr, cpu, &snapshot_raw_fops);
9413 	}
9414 #endif
9415 }
9416 
9417 #ifdef CONFIG_FTRACE_SELFTEST
9418 /* Let selftest have access to static functions in this file */
9419 #include "trace_selftest.c"
9420 #endif
9421 
9422 static ssize_t
9423 trace_options_read(struct file *filp, char __user *ubuf, size_t cnt,
9424 			loff_t *ppos)
9425 {
9426 	struct trace_option_dentry *topt = filp->private_data;
9427 	char *buf;
9428 
9429 	if (topt->flags->val & topt->opt->bit)
9430 		buf = "1\n";
9431 	else
9432 		buf = "0\n";
9433 
9434 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
9435 }
9436 
9437 static ssize_t
9438 trace_options_write(struct file *filp, const char __user *ubuf, size_t cnt,
9439 			 loff_t *ppos)
9440 {
9441 	struct trace_option_dentry *topt = filp->private_data;
9442 	unsigned long val;
9443 	int ret;
9444 
9445 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
9446 	if (ret)
9447 		return ret;
9448 
9449 	if (val != 0 && val != 1)
9450 		return -EINVAL;
9451 
9452 	if (!!(topt->flags->val & topt->opt->bit) != val) {
9453 		guard(mutex)(&trace_types_lock);
9454 		ret = __set_tracer_option(topt->tr, topt->flags,
9455 					  topt->opt, !val);
9456 		if (ret)
9457 			return ret;
9458 	}
9459 
9460 	*ppos += cnt;
9461 
9462 	return cnt;
9463 }
9464 
9465 static int tracing_open_options(struct inode *inode, struct file *filp)
9466 {
9467 	struct trace_option_dentry *topt = inode->i_private;
9468 	int ret;
9469 
9470 	ret = tracing_check_open_get_tr(topt->tr);
9471 	if (ret)
9472 		return ret;
9473 
9474 	filp->private_data = inode->i_private;
9475 	return 0;
9476 }
9477 
9478 static int tracing_release_options(struct inode *inode, struct file *file)
9479 {
9480 	struct trace_option_dentry *topt = file->private_data;
9481 
9482 	trace_array_put(topt->tr);
9483 	return 0;
9484 }
9485 
9486 static const struct file_operations trace_options_fops = {
9487 	.open = tracing_open_options,
9488 	.read = trace_options_read,
9489 	.write = trace_options_write,
9490 	.llseek	= generic_file_llseek,
9491 	.release = tracing_release_options,
9492 };
9493 
9494 /*
9495  * In order to pass in both the trace_array descriptor as well as the index
9496  * to the flag that the trace option file represents, the trace_array
9497  * has a character array of trace_flags_index[], which holds the index
9498  * of the bit for the flag it represents. index[0] == 0, index[1] == 1, etc.
9499  * The address of this character array is passed to the flag option file
9500  * read/write callbacks.
9501  *
9502  * In order to extract both the index and the trace_array descriptor,
9503  * get_tr_index() uses the following algorithm.
9504  *
9505  *   idx = *ptr;
9506  *
9507  * As the pointer itself contains the address of the index (remember
9508  * index[1] == 1).
9509  *
9510  * Then to get the trace_array descriptor, by subtracting that index
9511  * from the ptr, we get to the start of the index itself.
9512  *
9513  *   ptr - idx == &index[0]
9514  *
9515  * Then a simple container_of() from that pointer gets us to the
9516  * trace_array descriptor.
9517  */
9518 static void get_tr_index(void *data, struct trace_array **ptr,
9519 			 unsigned int *pindex)
9520 {
9521 	*pindex = *(unsigned char *)data;
9522 
9523 	*ptr = container_of(data - *pindex, struct trace_array,
9524 			    trace_flags_index);
9525 }
9526 
9527 static ssize_t
9528 trace_options_core_read(struct file *filp, char __user *ubuf, size_t cnt,
9529 			loff_t *ppos)
9530 {
9531 	void *tr_index = filp->private_data;
9532 	struct trace_array *tr;
9533 	unsigned int index;
9534 	char *buf;
9535 
9536 	get_tr_index(tr_index, &tr, &index);
9537 
9538 	if (tr->trace_flags & (1ULL << index))
9539 		buf = "1\n";
9540 	else
9541 		buf = "0\n";
9542 
9543 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
9544 }
9545 
9546 static ssize_t
9547 trace_options_core_write(struct file *filp, const char __user *ubuf, size_t cnt,
9548 			 loff_t *ppos)
9549 {
9550 	void *tr_index = filp->private_data;
9551 	struct trace_array *tr;
9552 	unsigned int index;
9553 	unsigned long val;
9554 	int ret;
9555 
9556 	get_tr_index(tr_index, &tr, &index);
9557 
9558 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
9559 	if (ret)
9560 		return ret;
9561 
9562 	if (val != 0 && val != 1)
9563 		return -EINVAL;
9564 
9565 	mutex_lock(&event_mutex);
9566 	mutex_lock(&trace_types_lock);
9567 	ret = set_tracer_flag(tr, 1ULL << index, val);
9568 	mutex_unlock(&trace_types_lock);
9569 	mutex_unlock(&event_mutex);
9570 
9571 	if (ret < 0)
9572 		return ret;
9573 
9574 	*ppos += cnt;
9575 
9576 	return cnt;
9577 }
9578 
9579 static const struct file_operations trace_options_core_fops = {
9580 	.open = tracing_open_generic,
9581 	.read = trace_options_core_read,
9582 	.write = trace_options_core_write,
9583 	.llseek = generic_file_llseek,
9584 };
9585 
9586 struct dentry *trace_create_file(const char *name,
9587 				 umode_t mode,
9588 				 struct dentry *parent,
9589 				 void *data,
9590 				 const struct file_operations *fops)
9591 {
9592 	struct dentry *ret;
9593 
9594 	ret = tracefs_create_file(name, mode, parent, data, fops);
9595 	if (!ret)
9596 		pr_warn("Could not create tracefs '%s' entry\n", name);
9597 
9598 	return ret;
9599 }
9600 
9601 
9602 static struct dentry *trace_options_init_dentry(struct trace_array *tr)
9603 {
9604 	struct dentry *d_tracer;
9605 
9606 	if (tr->options)
9607 		return tr->options;
9608 
9609 	d_tracer = tracing_get_dentry(tr);
9610 	if (IS_ERR(d_tracer))
9611 		return NULL;
9612 
9613 	tr->options = tracefs_create_dir("options", d_tracer);
9614 	if (!tr->options) {
9615 		pr_warn("Could not create tracefs directory 'options'\n");
9616 		return NULL;
9617 	}
9618 
9619 	return tr->options;
9620 }
9621 
9622 static void
9623 create_trace_option_file(struct trace_array *tr,
9624 			 struct trace_option_dentry *topt,
9625 			 struct tracer_flags *flags,
9626 			 struct tracer_opt *opt)
9627 {
9628 	struct dentry *t_options;
9629 
9630 	t_options = trace_options_init_dentry(tr);
9631 	if (!t_options)
9632 		return;
9633 
9634 	topt->flags = flags;
9635 	topt->opt = opt;
9636 	topt->tr = tr;
9637 
9638 	topt->entry = trace_create_file(opt->name, TRACE_MODE_WRITE,
9639 					t_options, topt, &trace_options_fops);
9640 }
9641 
9642 static int
9643 create_trace_option_files(struct trace_array *tr, struct tracer *tracer,
9644 			  struct tracer_flags *flags)
9645 {
9646 	struct trace_option_dentry *topts;
9647 	struct trace_options *tr_topts;
9648 	struct tracer_opt *opts;
9649 	int cnt;
9650 
9651 	if (!flags || !flags->opts)
9652 		return 0;
9653 
9654 	opts = flags->opts;
9655 
9656 	for (cnt = 0; opts[cnt].name; cnt++)
9657 		;
9658 
9659 	topts = kcalloc(cnt + 1, sizeof(*topts), GFP_KERNEL);
9660 	if (!topts)
9661 		return 0;
9662 
9663 	tr_topts = krealloc(tr->topts, sizeof(*tr->topts) * (tr->nr_topts + 1),
9664 			    GFP_KERNEL);
9665 	if (!tr_topts) {
9666 		kfree(topts);
9667 		return -ENOMEM;
9668 	}
9669 
9670 	tr->topts = tr_topts;
9671 	tr->topts[tr->nr_topts].tracer = tracer;
9672 	tr->topts[tr->nr_topts].topts = topts;
9673 	tr->nr_topts++;
9674 
9675 	for (cnt = 0; opts[cnt].name; cnt++) {
9676 		create_trace_option_file(tr, &topts[cnt], flags,
9677 					 &opts[cnt]);
9678 		MEM_FAIL(topts[cnt].entry == NULL,
9679 			  "Failed to create trace option: %s",
9680 			  opts[cnt].name);
9681 	}
9682 	return 0;
9683 }
9684 
9685 static int get_global_flags_val(struct tracer *tracer)
9686 {
9687 	struct tracers *t;
9688 
9689 	list_for_each_entry(t, &global_trace.tracers, list) {
9690 		if (t->tracer != tracer)
9691 			continue;
9692 		if (!t->flags)
9693 			return -1;
9694 		return t->flags->val;
9695 	}
9696 	return -1;
9697 }
9698 
9699 static int add_tracer_options(struct trace_array *tr, struct tracers *t)
9700 {
9701 	struct tracer *tracer = t->tracer;
9702 	struct tracer_flags *flags = t->flags ?: tracer->flags;
9703 
9704 	if (!flags)
9705 		return 0;
9706 
9707 	/* Only add tracer options after update_tracer_options finish */
9708 	if (!tracer_options_updated)
9709 		return 0;
9710 
9711 	return create_trace_option_files(tr, tracer, flags);
9712 }
9713 
9714 static int add_tracer(struct trace_array *tr, struct tracer *tracer)
9715 {
9716 	struct tracer_flags *flags;
9717 	struct tracers *t;
9718 	int ret;
9719 
9720 	/* Only enable if the directory has been created already. */
9721 	if (!tr->dir && !(tr->flags & TRACE_ARRAY_FL_GLOBAL))
9722 		return 0;
9723 
9724 	/*
9725 	 * If this is an instance, only create flags for tracers
9726 	 * the instance may have.
9727 	 */
9728 	if (!trace_ok_for_array(tracer, tr))
9729 		return 0;
9730 
9731 	t = kmalloc(sizeof(*t), GFP_KERNEL);
9732 	if (!t)
9733 		return -ENOMEM;
9734 
9735 	t->tracer = tracer;
9736 	t->flags = NULL;
9737 	list_add(&t->list, &tr->tracers);
9738 
9739 	flags = tracer->flags;
9740 	if (!flags) {
9741 		if (!tracer->default_flags)
9742 			return 0;
9743 
9744 		/*
9745 		 * If the tracer defines default flags, it means the flags are
9746 		 * per trace instance.
9747 		 */
9748 		flags = kmalloc(sizeof(*flags), GFP_KERNEL);
9749 		if (!flags)
9750 			return -ENOMEM;
9751 
9752 		*flags = *tracer->default_flags;
9753 		flags->trace = tracer;
9754 
9755 		t->flags = flags;
9756 
9757 		/* If this is an instance, inherit the global_trace flags */
9758 		if (!(tr->flags & TRACE_ARRAY_FL_GLOBAL)) {
9759 			int val = get_global_flags_val(tracer);
9760 			if (!WARN_ON_ONCE(val < 0))
9761 				flags->val = val;
9762 		}
9763 	}
9764 
9765 	ret = add_tracer_options(tr, t);
9766 	if (ret < 0) {
9767 		list_del(&t->list);
9768 		kfree(t->flags);
9769 		kfree(t);
9770 	}
9771 
9772 	return ret;
9773 }
9774 
9775 static struct dentry *
9776 create_trace_option_core_file(struct trace_array *tr,
9777 			      const char *option, long index)
9778 {
9779 	struct dentry *t_options;
9780 
9781 	t_options = trace_options_init_dentry(tr);
9782 	if (!t_options)
9783 		return NULL;
9784 
9785 	return trace_create_file(option, TRACE_MODE_WRITE, t_options,
9786 				 (void *)&tr->trace_flags_index[index],
9787 				 &trace_options_core_fops);
9788 }
9789 
9790 static void create_trace_options_dir(struct trace_array *tr)
9791 {
9792 	struct dentry *t_options;
9793 	bool top_level = tr == &global_trace;
9794 	int i;
9795 
9796 	t_options = trace_options_init_dentry(tr);
9797 	if (!t_options)
9798 		return;
9799 
9800 	for (i = 0; trace_options[i]; i++) {
9801 		if (top_level ||
9802 		    !((1ULL << i) & TOP_LEVEL_TRACE_FLAGS)) {
9803 			create_trace_option_core_file(tr, trace_options[i], i);
9804 		}
9805 	}
9806 }
9807 
9808 static ssize_t
9809 rb_simple_read(struct file *filp, char __user *ubuf,
9810 	       size_t cnt, loff_t *ppos)
9811 {
9812 	struct trace_array *tr = filp->private_data;
9813 	char buf[64];
9814 	int r;
9815 
9816 	r = tracer_tracing_is_on(tr);
9817 	r = sprintf(buf, "%d\n", r);
9818 
9819 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
9820 }
9821 
9822 static ssize_t
9823 rb_simple_write(struct file *filp, const char __user *ubuf,
9824 		size_t cnt, loff_t *ppos)
9825 {
9826 	struct trace_array *tr = filp->private_data;
9827 	struct trace_buffer *buffer = tr->array_buffer.buffer;
9828 	unsigned long val;
9829 	int ret;
9830 
9831 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
9832 	if (ret)
9833 		return ret;
9834 
9835 	if (buffer) {
9836 		guard(mutex)(&trace_types_lock);
9837 		if (!!val == tracer_tracing_is_on(tr)) {
9838 			val = 0; /* do nothing */
9839 		} else if (val) {
9840 			tracer_tracing_on(tr);
9841 			if (tr->current_trace->start)
9842 				tr->current_trace->start(tr);
9843 		} else {
9844 			tracer_tracing_off(tr);
9845 			if (tr->current_trace->stop)
9846 				tr->current_trace->stop(tr);
9847 			/* Wake up any waiters */
9848 			ring_buffer_wake_waiters(buffer, RING_BUFFER_ALL_CPUS);
9849 		}
9850 	}
9851 
9852 	(*ppos)++;
9853 
9854 	return cnt;
9855 }
9856 
9857 static const struct file_operations rb_simple_fops = {
9858 	.open		= tracing_open_generic_tr,
9859 	.read		= rb_simple_read,
9860 	.write		= rb_simple_write,
9861 	.release	= tracing_release_generic_tr,
9862 	.llseek		= default_llseek,
9863 };
9864 
9865 static ssize_t
9866 buffer_percent_read(struct file *filp, char __user *ubuf,
9867 		    size_t cnt, loff_t *ppos)
9868 {
9869 	struct trace_array *tr = filp->private_data;
9870 	char buf[64];
9871 	int r;
9872 
9873 	r = tr->buffer_percent;
9874 	r = sprintf(buf, "%d\n", r);
9875 
9876 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
9877 }
9878 
9879 static ssize_t
9880 buffer_percent_write(struct file *filp, const char __user *ubuf,
9881 		     size_t cnt, loff_t *ppos)
9882 {
9883 	struct trace_array *tr = filp->private_data;
9884 	unsigned long val;
9885 	int ret;
9886 
9887 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
9888 	if (ret)
9889 		return ret;
9890 
9891 	if (val > 100)
9892 		return -EINVAL;
9893 
9894 	tr->buffer_percent = val;
9895 
9896 	(*ppos)++;
9897 
9898 	return cnt;
9899 }
9900 
9901 static const struct file_operations buffer_percent_fops = {
9902 	.open		= tracing_open_generic_tr,
9903 	.read		= buffer_percent_read,
9904 	.write		= buffer_percent_write,
9905 	.release	= tracing_release_generic_tr,
9906 	.llseek		= default_llseek,
9907 };
9908 
9909 static ssize_t
9910 buffer_subbuf_size_read(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos)
9911 {
9912 	struct trace_array *tr = filp->private_data;
9913 	size_t size;
9914 	char buf[64];
9915 	int order;
9916 	int r;
9917 
9918 	order = ring_buffer_subbuf_order_get(tr->array_buffer.buffer);
9919 	size = (PAGE_SIZE << order) / 1024;
9920 
9921 	r = sprintf(buf, "%zd\n", size);
9922 
9923 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
9924 }
9925 
9926 static ssize_t
9927 buffer_subbuf_size_write(struct file *filp, const char __user *ubuf,
9928 			 size_t cnt, loff_t *ppos)
9929 {
9930 	struct trace_array *tr = filp->private_data;
9931 	unsigned long val;
9932 	int old_order;
9933 	int order;
9934 	int pages;
9935 	int ret;
9936 
9937 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
9938 	if (ret)
9939 		return ret;
9940 
9941 	val *= 1024; /* value passed in is in KB */
9942 
9943 	pages = DIV_ROUND_UP(val, PAGE_SIZE);
9944 	order = fls(pages - 1);
9945 
9946 	/* limit between 1 and 128 system pages */
9947 	if (order < 0 || order > 7)
9948 		return -EINVAL;
9949 
9950 	/* Do not allow tracing while changing the order of the ring buffer */
9951 	tracing_stop_tr(tr);
9952 
9953 	old_order = ring_buffer_subbuf_order_get(tr->array_buffer.buffer);
9954 	if (old_order == order)
9955 		goto out;
9956 
9957 	ret = ring_buffer_subbuf_order_set(tr->array_buffer.buffer, order);
9958 	if (ret)
9959 		goto out;
9960 
9961 #ifdef CONFIG_TRACER_MAX_TRACE
9962 
9963 	if (!tr->allocated_snapshot)
9964 		goto out_max;
9965 
9966 	ret = ring_buffer_subbuf_order_set(tr->max_buffer.buffer, order);
9967 	if (ret) {
9968 		/* Put back the old order */
9969 		cnt = ring_buffer_subbuf_order_set(tr->array_buffer.buffer, old_order);
9970 		if (WARN_ON_ONCE(cnt)) {
9971 			/*
9972 			 * AARGH! We are left with different orders!
9973 			 * The max buffer is our "snapshot" buffer.
9974 			 * When a tracer needs a snapshot (one of the
9975 			 * latency tracers), it swaps the max buffer
9976 			 * with the saved snap shot. We succeeded to
9977 			 * update the order of the main buffer, but failed to
9978 			 * update the order of the max buffer. But when we tried
9979 			 * to reset the main buffer to the original size, we
9980 			 * failed there too. This is very unlikely to
9981 			 * happen, but if it does, warn and kill all
9982 			 * tracing.
9983 			 */
9984 			tracing_disabled = 1;
9985 		}
9986 		goto out;
9987 	}
9988  out_max:
9989 #endif
9990 	(*ppos)++;
9991  out:
9992 	if (ret)
9993 		cnt = ret;
9994 	tracing_start_tr(tr);
9995 	return cnt;
9996 }
9997 
9998 static const struct file_operations buffer_subbuf_size_fops = {
9999 	.open		= tracing_open_generic_tr,
10000 	.read		= buffer_subbuf_size_read,
10001 	.write		= buffer_subbuf_size_write,
10002 	.release	= tracing_release_generic_tr,
10003 	.llseek		= default_llseek,
10004 };
10005 
10006 static struct dentry *trace_instance_dir;
10007 
10008 static void
10009 init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer);
10010 
10011 #ifdef CONFIG_MODULES
10012 static int make_mod_delta(struct module *mod, void *data)
10013 {
10014 	struct trace_module_delta *module_delta;
10015 	struct trace_scratch *tscratch;
10016 	struct trace_mod_entry *entry;
10017 	struct trace_array *tr = data;
10018 	int i;
10019 
10020 	tscratch = tr->scratch;
10021 	module_delta = READ_ONCE(tr->module_delta);
10022 	for (i = 0; i < tscratch->nr_entries; i++) {
10023 		entry = &tscratch->entries[i];
10024 		if (strcmp(mod->name, entry->mod_name))
10025 			continue;
10026 		if (mod->state == MODULE_STATE_GOING)
10027 			module_delta->delta[i] = 0;
10028 		else
10029 			module_delta->delta[i] = (unsigned long)mod->mem[MOD_TEXT].base
10030 						 - entry->mod_addr;
10031 		break;
10032 	}
10033 	return 0;
10034 }
10035 #else
10036 static int make_mod_delta(struct module *mod, void *data)
10037 {
10038 	return 0;
10039 }
10040 #endif
10041 
10042 static int mod_addr_comp(const void *a, const void *b, const void *data)
10043 {
10044 	const struct trace_mod_entry *e1 = a;
10045 	const struct trace_mod_entry *e2 = b;
10046 
10047 	return e1->mod_addr > e2->mod_addr ? 1 : -1;
10048 }
10049 
10050 static void setup_trace_scratch(struct trace_array *tr,
10051 				struct trace_scratch *tscratch, unsigned int size)
10052 {
10053 	struct trace_module_delta *module_delta;
10054 	struct trace_mod_entry *entry;
10055 	int i, nr_entries;
10056 
10057 	if (!tscratch)
10058 		return;
10059 
10060 	tr->scratch = tscratch;
10061 	tr->scratch_size = size;
10062 
10063 	if (tscratch->text_addr)
10064 		tr->text_delta = (unsigned long)_text - tscratch->text_addr;
10065 
10066 	if (struct_size(tscratch, entries, tscratch->nr_entries) > size)
10067 		goto reset;
10068 
10069 	/* Check if each module name is a valid string */
10070 	for (i = 0; i < tscratch->nr_entries; i++) {
10071 		int n;
10072 
10073 		entry = &tscratch->entries[i];
10074 
10075 		for (n = 0; n < MODULE_NAME_LEN; n++) {
10076 			if (entry->mod_name[n] == '\0')
10077 				break;
10078 			if (!isprint(entry->mod_name[n]))
10079 				goto reset;
10080 		}
10081 		if (n == MODULE_NAME_LEN)
10082 			goto reset;
10083 	}
10084 
10085 	/* Sort the entries so that we can find appropriate module from address. */
10086 	nr_entries = tscratch->nr_entries;
10087 	sort_r(tscratch->entries, nr_entries, sizeof(struct trace_mod_entry),
10088 	       mod_addr_comp, NULL, NULL);
10089 
10090 	if (IS_ENABLED(CONFIG_MODULES)) {
10091 		module_delta = kzalloc(struct_size(module_delta, delta, nr_entries), GFP_KERNEL);
10092 		if (!module_delta) {
10093 			pr_info("module_delta allocation failed. Not able to decode module address.");
10094 			goto reset;
10095 		}
10096 		init_rcu_head(&module_delta->rcu);
10097 	} else
10098 		module_delta = NULL;
10099 	WRITE_ONCE(tr->module_delta, module_delta);
10100 
10101 	/* Scan modules to make text delta for modules. */
10102 	module_for_each_mod(make_mod_delta, tr);
10103 
10104 	/* Set trace_clock as the same of the previous boot. */
10105 	if (tscratch->clock_id != tr->clock_id) {
10106 		if (tscratch->clock_id >= ARRAY_SIZE(trace_clocks) ||
10107 		    tracing_set_clock(tr, trace_clocks[tscratch->clock_id].name) < 0) {
10108 			pr_info("the previous trace_clock info is not valid.");
10109 			goto reset;
10110 		}
10111 	}
10112 	return;
10113  reset:
10114 	/* Invalid trace modules */
10115 	memset(tscratch, 0, size);
10116 }
10117 
10118 static int
10119 allocate_trace_buffer(struct trace_array *tr, struct array_buffer *buf, int size)
10120 {
10121 	enum ring_buffer_flags rb_flags;
10122 	struct trace_scratch *tscratch;
10123 	unsigned int scratch_size = 0;
10124 
10125 	rb_flags = tr->trace_flags & TRACE_ITER(OVERWRITE) ? RB_FL_OVERWRITE : 0;
10126 
10127 	buf->tr = tr;
10128 
10129 	if (tr->range_addr_start && tr->range_addr_size) {
10130 		/* Add scratch buffer to handle 128 modules */
10131 		buf->buffer = ring_buffer_alloc_range(size, rb_flags, 0,
10132 						      tr->range_addr_start,
10133 						      tr->range_addr_size,
10134 						      struct_size(tscratch, entries, 128));
10135 
10136 		tscratch = ring_buffer_meta_scratch(buf->buffer, &scratch_size);
10137 		setup_trace_scratch(tr, tscratch, scratch_size);
10138 
10139 		/*
10140 		 * This is basically the same as a mapped buffer,
10141 		 * with the same restrictions.
10142 		 */
10143 		tr->mapped++;
10144 	} else {
10145 		buf->buffer = ring_buffer_alloc(size, rb_flags);
10146 	}
10147 	if (!buf->buffer)
10148 		return -ENOMEM;
10149 
10150 	buf->data = alloc_percpu(struct trace_array_cpu);
10151 	if (!buf->data) {
10152 		ring_buffer_free(buf->buffer);
10153 		buf->buffer = NULL;
10154 		return -ENOMEM;
10155 	}
10156 
10157 	/* Allocate the first page for all buffers */
10158 	set_buffer_entries(&tr->array_buffer,
10159 			   ring_buffer_size(tr->array_buffer.buffer, 0));
10160 
10161 	return 0;
10162 }
10163 
10164 static void free_trace_buffer(struct array_buffer *buf)
10165 {
10166 	if (buf->buffer) {
10167 		ring_buffer_free(buf->buffer);
10168 		buf->buffer = NULL;
10169 		free_percpu(buf->data);
10170 		buf->data = NULL;
10171 	}
10172 }
10173 
10174 static int allocate_trace_buffers(struct trace_array *tr, int size)
10175 {
10176 	int ret;
10177 
10178 	ret = allocate_trace_buffer(tr, &tr->array_buffer, size);
10179 	if (ret)
10180 		return ret;
10181 
10182 #ifdef CONFIG_TRACER_MAX_TRACE
10183 	/* Fix mapped buffer trace arrays do not have snapshot buffers */
10184 	if (tr->range_addr_start)
10185 		return 0;
10186 
10187 	ret = allocate_trace_buffer(tr, &tr->max_buffer,
10188 				    allocate_snapshot ? size : 1);
10189 	if (MEM_FAIL(ret, "Failed to allocate trace buffer\n")) {
10190 		free_trace_buffer(&tr->array_buffer);
10191 		return -ENOMEM;
10192 	}
10193 	tr->allocated_snapshot = allocate_snapshot;
10194 
10195 	allocate_snapshot = false;
10196 #endif
10197 
10198 	return 0;
10199 }
10200 
10201 static void free_trace_buffers(struct trace_array *tr)
10202 {
10203 	if (!tr)
10204 		return;
10205 
10206 	free_trace_buffer(&tr->array_buffer);
10207 	kfree(tr->module_delta);
10208 
10209 #ifdef CONFIG_TRACER_MAX_TRACE
10210 	free_trace_buffer(&tr->max_buffer);
10211 #endif
10212 }
10213 
10214 static void init_trace_flags_index(struct trace_array *tr)
10215 {
10216 	int i;
10217 
10218 	/* Used by the trace options files */
10219 	for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++)
10220 		tr->trace_flags_index[i] = i;
10221 }
10222 
10223 static int __update_tracer(struct trace_array *tr)
10224 {
10225 	struct tracer *t;
10226 	int ret = 0;
10227 
10228 	for (t = trace_types; t && !ret; t = t->next)
10229 		ret = add_tracer(tr, t);
10230 
10231 	return ret;
10232 }
10233 
10234 static __init int __update_tracer_options(struct trace_array *tr)
10235 {
10236 	struct tracers *t;
10237 	int ret = 0;
10238 
10239 	list_for_each_entry(t, &tr->tracers, list) {
10240 		ret = add_tracer_options(tr, t);
10241 		if (ret < 0)
10242 			break;
10243 	}
10244 
10245 	return ret;
10246 }
10247 
10248 static __init void update_tracer_options(void)
10249 {
10250 	struct trace_array *tr;
10251 
10252 	guard(mutex)(&trace_types_lock);
10253 	tracer_options_updated = true;
10254 	list_for_each_entry(tr, &ftrace_trace_arrays, list)
10255 		__update_tracer_options(tr);
10256 }
10257 
10258 /* Must have trace_types_lock held */
10259 struct trace_array *trace_array_find(const char *instance)
10260 {
10261 	struct trace_array *tr, *found = NULL;
10262 
10263 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
10264 		if (tr->name && strcmp(tr->name, instance) == 0) {
10265 			found = tr;
10266 			break;
10267 		}
10268 	}
10269 
10270 	return found;
10271 }
10272 
10273 struct trace_array *trace_array_find_get(const char *instance)
10274 {
10275 	struct trace_array *tr;
10276 
10277 	guard(mutex)(&trace_types_lock);
10278 	tr = trace_array_find(instance);
10279 	if (tr)
10280 		tr->ref++;
10281 
10282 	return tr;
10283 }
10284 
10285 static int trace_array_create_dir(struct trace_array *tr)
10286 {
10287 	int ret;
10288 
10289 	tr->dir = tracefs_create_dir(tr->name, trace_instance_dir);
10290 	if (!tr->dir)
10291 		return -EINVAL;
10292 
10293 	ret = event_trace_add_tracer(tr->dir, tr);
10294 	if (ret) {
10295 		tracefs_remove(tr->dir);
10296 		return ret;
10297 	}
10298 
10299 	init_tracer_tracefs(tr, tr->dir);
10300 	ret = __update_tracer(tr);
10301 	if (ret) {
10302 		event_trace_del_tracer(tr);
10303 		tracefs_remove(tr->dir);
10304 		return ret;
10305 	}
10306 	return 0;
10307 }
10308 
10309 static struct trace_array *
10310 trace_array_create_systems(const char *name, const char *systems,
10311 			   unsigned long range_addr_start,
10312 			   unsigned long range_addr_size)
10313 {
10314 	struct trace_array *tr;
10315 	int ret;
10316 
10317 	ret = -ENOMEM;
10318 	tr = kzalloc(sizeof(*tr), GFP_KERNEL);
10319 	if (!tr)
10320 		return ERR_PTR(ret);
10321 
10322 	tr->name = kstrdup(name, GFP_KERNEL);
10323 	if (!tr->name)
10324 		goto out_free_tr;
10325 
10326 	if (!alloc_cpumask_var(&tr->tracing_cpumask, GFP_KERNEL))
10327 		goto out_free_tr;
10328 
10329 	if (!zalloc_cpumask_var(&tr->pipe_cpumask, GFP_KERNEL))
10330 		goto out_free_tr;
10331 
10332 	if (systems) {
10333 		tr->system_names = kstrdup_const(systems, GFP_KERNEL);
10334 		if (!tr->system_names)
10335 			goto out_free_tr;
10336 	}
10337 
10338 	/* Only for boot up memory mapped ring buffers */
10339 	tr->range_addr_start = range_addr_start;
10340 	tr->range_addr_size = range_addr_size;
10341 
10342 	tr->trace_flags = global_trace.trace_flags & ~ZEROED_TRACE_FLAGS;
10343 
10344 	cpumask_copy(tr->tracing_cpumask, cpu_all_mask);
10345 
10346 	raw_spin_lock_init(&tr->start_lock);
10347 
10348 	tr->syscall_buf_sz = global_trace.syscall_buf_sz;
10349 
10350 	tr->max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
10351 #ifdef CONFIG_TRACER_MAX_TRACE
10352 	spin_lock_init(&tr->snapshot_trigger_lock);
10353 #endif
10354 	tr->current_trace = &nop_trace;
10355 	tr->current_trace_flags = nop_trace.flags;
10356 
10357 	INIT_LIST_HEAD(&tr->systems);
10358 	INIT_LIST_HEAD(&tr->events);
10359 	INIT_LIST_HEAD(&tr->hist_vars);
10360 	INIT_LIST_HEAD(&tr->err_log);
10361 	INIT_LIST_HEAD(&tr->tracers);
10362 	INIT_LIST_HEAD(&tr->marker_list);
10363 
10364 #ifdef CONFIG_MODULES
10365 	INIT_LIST_HEAD(&tr->mod_events);
10366 #endif
10367 
10368 	if (allocate_trace_buffers(tr, trace_buf_size) < 0)
10369 		goto out_free_tr;
10370 
10371 	/* The ring buffer is defaultly expanded */
10372 	trace_set_ring_buffer_expanded(tr);
10373 
10374 	if (ftrace_allocate_ftrace_ops(tr) < 0)
10375 		goto out_free_tr;
10376 
10377 	ftrace_init_trace_array(tr);
10378 
10379 	init_trace_flags_index(tr);
10380 
10381 	if (trace_instance_dir) {
10382 		ret = trace_array_create_dir(tr);
10383 		if (ret)
10384 			goto out_free_tr;
10385 	} else
10386 		__trace_early_add_events(tr);
10387 
10388 	list_add(&tr->list, &ftrace_trace_arrays);
10389 
10390 	tr->ref++;
10391 
10392 	return tr;
10393 
10394  out_free_tr:
10395 	ftrace_free_ftrace_ops(tr);
10396 	free_trace_buffers(tr);
10397 	free_cpumask_var(tr->pipe_cpumask);
10398 	free_cpumask_var(tr->tracing_cpumask);
10399 	kfree_const(tr->system_names);
10400 	kfree(tr->range_name);
10401 	kfree(tr->name);
10402 	kfree(tr);
10403 
10404 	return ERR_PTR(ret);
10405 }
10406 
10407 static struct trace_array *trace_array_create(const char *name)
10408 {
10409 	return trace_array_create_systems(name, NULL, 0, 0);
10410 }
10411 
10412 static int instance_mkdir(const char *name)
10413 {
10414 	struct trace_array *tr;
10415 	int ret;
10416 
10417 	guard(mutex)(&event_mutex);
10418 	guard(mutex)(&trace_types_lock);
10419 
10420 	ret = -EEXIST;
10421 	if (trace_array_find(name))
10422 		return -EEXIST;
10423 
10424 	tr = trace_array_create(name);
10425 
10426 	ret = PTR_ERR_OR_ZERO(tr);
10427 
10428 	return ret;
10429 }
10430 
10431 #ifdef CONFIG_MMU
10432 static u64 map_pages(unsigned long start, unsigned long size)
10433 {
10434 	unsigned long vmap_start, vmap_end;
10435 	struct vm_struct *area;
10436 	int ret;
10437 
10438 	area = get_vm_area(size, VM_IOREMAP);
10439 	if (!area)
10440 		return 0;
10441 
10442 	vmap_start = (unsigned long) area->addr;
10443 	vmap_end = vmap_start + size;
10444 
10445 	ret = vmap_page_range(vmap_start, vmap_end,
10446 			      start, pgprot_nx(PAGE_KERNEL));
10447 	if (ret < 0) {
10448 		free_vm_area(area);
10449 		return 0;
10450 	}
10451 
10452 	return (u64)vmap_start;
10453 }
10454 #else
10455 static inline u64 map_pages(unsigned long start, unsigned long size)
10456 {
10457 	return 0;
10458 }
10459 #endif
10460 
10461 /**
10462  * trace_array_get_by_name - Create/Lookup a trace array, given its name.
10463  * @name: The name of the trace array to be looked up/created.
10464  * @systems: A list of systems to create event directories for (NULL for all)
10465  *
10466  * Returns pointer to trace array with given name.
10467  * NULL, if it cannot be created.
10468  *
10469  * NOTE: This function increments the reference counter associated with the
10470  * trace array returned. This makes sure it cannot be freed while in use.
10471  * Use trace_array_put() once the trace array is no longer needed.
10472  * If the trace_array is to be freed, trace_array_destroy() needs to
10473  * be called after the trace_array_put(), or simply let user space delete
10474  * it from the tracefs instances directory. But until the
10475  * trace_array_put() is called, user space can not delete it.
10476  *
10477  */
10478 struct trace_array *trace_array_get_by_name(const char *name, const char *systems)
10479 {
10480 	struct trace_array *tr;
10481 
10482 	guard(mutex)(&event_mutex);
10483 	guard(mutex)(&trace_types_lock);
10484 
10485 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
10486 		if (tr->name && strcmp(tr->name, name) == 0) {
10487 			tr->ref++;
10488 			return tr;
10489 		}
10490 	}
10491 
10492 	tr = trace_array_create_systems(name, systems, 0, 0);
10493 
10494 	if (IS_ERR(tr))
10495 		tr = NULL;
10496 	else
10497 		tr->ref++;
10498 
10499 	return tr;
10500 }
10501 EXPORT_SYMBOL_GPL(trace_array_get_by_name);
10502 
10503 static int __remove_instance(struct trace_array *tr)
10504 {
10505 	int i;
10506 
10507 	/* Reference counter for a newly created trace array = 1. */
10508 	if (tr->ref > 1 || (tr->current_trace && tr->trace_ref))
10509 		return -EBUSY;
10510 
10511 	list_del(&tr->list);
10512 
10513 	/* Disable all the flags that were enabled coming in */
10514 	for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++) {
10515 		if ((1ULL << i) & ZEROED_TRACE_FLAGS)
10516 			set_tracer_flag(tr, 1ULL << i, 0);
10517 	}
10518 
10519 	if (printk_trace == tr)
10520 		update_printk_trace(&global_trace);
10521 
10522 	if (update_marker_trace(tr, 0))
10523 		synchronize_rcu();
10524 
10525 	tracing_set_nop(tr);
10526 	clear_ftrace_function_probes(tr);
10527 	event_trace_del_tracer(tr);
10528 	ftrace_clear_pids(tr);
10529 	ftrace_destroy_function_files(tr);
10530 	tracefs_remove(tr->dir);
10531 	free_percpu(tr->last_func_repeats);
10532 	free_trace_buffers(tr);
10533 	clear_tracing_err_log(tr);
10534 	free_tracers(tr);
10535 
10536 	if (tr->range_name) {
10537 		reserve_mem_release_by_name(tr->range_name);
10538 		kfree(tr->range_name);
10539 	}
10540 	if (tr->flags & TRACE_ARRAY_FL_VMALLOC)
10541 		vfree((void *)tr->range_addr_start);
10542 
10543 	for (i = 0; i < tr->nr_topts; i++) {
10544 		kfree(tr->topts[i].topts);
10545 	}
10546 	kfree(tr->topts);
10547 
10548 	free_cpumask_var(tr->pipe_cpumask);
10549 	free_cpumask_var(tr->tracing_cpumask);
10550 	kfree_const(tr->system_names);
10551 	kfree(tr->name);
10552 	kfree(tr);
10553 
10554 	return 0;
10555 }
10556 
10557 int trace_array_destroy(struct trace_array *this_tr)
10558 {
10559 	struct trace_array *tr;
10560 
10561 	if (!this_tr)
10562 		return -EINVAL;
10563 
10564 	guard(mutex)(&event_mutex);
10565 	guard(mutex)(&trace_types_lock);
10566 
10567 
10568 	/* Making sure trace array exists before destroying it. */
10569 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
10570 		if (tr == this_tr)
10571 			return __remove_instance(tr);
10572 	}
10573 
10574 	return -ENODEV;
10575 }
10576 EXPORT_SYMBOL_GPL(trace_array_destroy);
10577 
10578 static int instance_rmdir(const char *name)
10579 {
10580 	struct trace_array *tr;
10581 
10582 	guard(mutex)(&event_mutex);
10583 	guard(mutex)(&trace_types_lock);
10584 
10585 	tr = trace_array_find(name);
10586 	if (!tr)
10587 		return -ENODEV;
10588 
10589 	return __remove_instance(tr);
10590 }
10591 
10592 static __init void create_trace_instances(struct dentry *d_tracer)
10593 {
10594 	struct trace_array *tr;
10595 
10596 	trace_instance_dir = tracefs_create_instance_dir("instances", d_tracer,
10597 							 instance_mkdir,
10598 							 instance_rmdir);
10599 	if (MEM_FAIL(!trace_instance_dir, "Failed to create instances directory\n"))
10600 		return;
10601 
10602 	guard(mutex)(&event_mutex);
10603 	guard(mutex)(&trace_types_lock);
10604 
10605 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
10606 		if (!tr->name)
10607 			continue;
10608 		if (MEM_FAIL(trace_array_create_dir(tr) < 0,
10609 			     "Failed to create instance directory\n"))
10610 			return;
10611 	}
10612 }
10613 
10614 static void
10615 init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer)
10616 {
10617 	int cpu;
10618 
10619 	trace_create_file("available_tracers", TRACE_MODE_READ, d_tracer,
10620 			tr, &show_traces_fops);
10621 
10622 	trace_create_file("current_tracer", TRACE_MODE_WRITE, d_tracer,
10623 			tr, &set_tracer_fops);
10624 
10625 	trace_create_file("tracing_cpumask", TRACE_MODE_WRITE, d_tracer,
10626 			  tr, &tracing_cpumask_fops);
10627 
10628 	trace_create_file("trace_options", TRACE_MODE_WRITE, d_tracer,
10629 			  tr, &tracing_iter_fops);
10630 
10631 	trace_create_file("trace", TRACE_MODE_WRITE, d_tracer,
10632 			  tr, &tracing_fops);
10633 
10634 	trace_create_file("trace_pipe", TRACE_MODE_READ, d_tracer,
10635 			  tr, &tracing_pipe_fops);
10636 
10637 	trace_create_file("buffer_size_kb", TRACE_MODE_WRITE, d_tracer,
10638 			  tr, &tracing_entries_fops);
10639 
10640 	trace_create_file("buffer_total_size_kb", TRACE_MODE_READ, d_tracer,
10641 			  tr, &tracing_total_entries_fops);
10642 
10643 	trace_create_file("free_buffer", 0200, d_tracer,
10644 			  tr, &tracing_free_buffer_fops);
10645 
10646 	trace_create_file("trace_marker", 0220, d_tracer,
10647 			  tr, &tracing_mark_fops);
10648 
10649 	tr->trace_marker_file = __find_event_file(tr, "ftrace", "print");
10650 
10651 	trace_create_file("trace_marker_raw", 0220, d_tracer,
10652 			  tr, &tracing_mark_raw_fops);
10653 
10654 	trace_create_file("trace_clock", TRACE_MODE_WRITE, d_tracer, tr,
10655 			  &trace_clock_fops);
10656 
10657 	trace_create_file("tracing_on", TRACE_MODE_WRITE, d_tracer,
10658 			  tr, &rb_simple_fops);
10659 
10660 	trace_create_file("timestamp_mode", TRACE_MODE_READ, d_tracer, tr,
10661 			  &trace_time_stamp_mode_fops);
10662 
10663 	tr->buffer_percent = 50;
10664 
10665 	trace_create_file("buffer_percent", TRACE_MODE_WRITE, d_tracer,
10666 			tr, &buffer_percent_fops);
10667 
10668 	trace_create_file("buffer_subbuf_size_kb", TRACE_MODE_WRITE, d_tracer,
10669 			  tr, &buffer_subbuf_size_fops);
10670 
10671 	trace_create_file("syscall_user_buf_size", TRACE_MODE_WRITE, d_tracer,
10672 			 tr, &tracing_syscall_buf_fops);
10673 
10674 	create_trace_options_dir(tr);
10675 
10676 #ifdef CONFIG_TRACER_MAX_TRACE
10677 	trace_create_maxlat_file(tr, d_tracer);
10678 #endif
10679 
10680 	if (ftrace_create_function_files(tr, d_tracer))
10681 		MEM_FAIL(1, "Could not allocate function filter files");
10682 
10683 	if (tr->range_addr_start) {
10684 		trace_create_file("last_boot_info", TRACE_MODE_READ, d_tracer,
10685 				  tr, &last_boot_fops);
10686 #ifdef CONFIG_TRACER_SNAPSHOT
10687 	} else {
10688 		trace_create_file("snapshot", TRACE_MODE_WRITE, d_tracer,
10689 				  tr, &snapshot_fops);
10690 #endif
10691 	}
10692 
10693 	trace_create_file("error_log", TRACE_MODE_WRITE, d_tracer,
10694 			  tr, &tracing_err_log_fops);
10695 
10696 	for_each_tracing_cpu(cpu)
10697 		tracing_init_tracefs_percpu(tr, cpu);
10698 
10699 	ftrace_init_tracefs(tr, d_tracer);
10700 }
10701 
10702 #ifdef CONFIG_TRACEFS_AUTOMOUNT_DEPRECATED
10703 static struct vfsmount *trace_automount(struct dentry *mntpt, void *ingore)
10704 {
10705 	struct vfsmount *mnt;
10706 	struct file_system_type *type;
10707 	struct fs_context *fc;
10708 	int ret;
10709 
10710 	/*
10711 	 * To maintain backward compatibility for tools that mount
10712 	 * debugfs to get to the tracing facility, tracefs is automatically
10713 	 * mounted to the debugfs/tracing directory.
10714 	 */
10715 	type = get_fs_type("tracefs");
10716 	if (!type)
10717 		return NULL;
10718 
10719 	fc = fs_context_for_submount(type, mntpt);
10720 	put_filesystem(type);
10721 	if (IS_ERR(fc))
10722 		return ERR_CAST(fc);
10723 
10724 	pr_warn("NOTICE: Automounting of tracing to debugfs is deprecated and will be removed in 2030\n");
10725 
10726 	ret = vfs_parse_fs_string(fc, "source", "tracefs");
10727 	if (!ret)
10728 		mnt = fc_mount(fc);
10729 	else
10730 		mnt = ERR_PTR(ret);
10731 
10732 	put_fs_context(fc);
10733 	return mnt;
10734 }
10735 #endif
10736 
10737 /**
10738  * tracing_init_dentry - initialize top level trace array
10739  *
10740  * This is called when creating files or directories in the tracing
10741  * directory. It is called via fs_initcall() by any of the boot up code
10742  * and expects to return the dentry of the top level tracing directory.
10743  */
10744 int tracing_init_dentry(void)
10745 {
10746 	struct trace_array *tr = &global_trace;
10747 
10748 	if (security_locked_down(LOCKDOWN_TRACEFS)) {
10749 		pr_warn("Tracing disabled due to lockdown\n");
10750 		return -EPERM;
10751 	}
10752 
10753 	/* The top level trace array uses  NULL as parent */
10754 	if (tr->dir)
10755 		return 0;
10756 
10757 	if (WARN_ON(!tracefs_initialized()))
10758 		return -ENODEV;
10759 
10760 #ifdef CONFIG_TRACEFS_AUTOMOUNT_DEPRECATED
10761 	/*
10762 	 * As there may still be users that expect the tracing
10763 	 * files to exist in debugfs/tracing, we must automount
10764 	 * the tracefs file system there, so older tools still
10765 	 * work with the newer kernel.
10766 	 */
10767 	tr->dir = debugfs_create_automount("tracing", NULL,
10768 					   trace_automount, NULL);
10769 #endif
10770 
10771 	return 0;
10772 }
10773 
10774 extern struct trace_eval_map *__start_ftrace_eval_maps[];
10775 extern struct trace_eval_map *__stop_ftrace_eval_maps[];
10776 
10777 static struct workqueue_struct *eval_map_wq __initdata;
10778 static struct work_struct eval_map_work __initdata;
10779 static struct work_struct tracerfs_init_work __initdata;
10780 
10781 static void __init eval_map_work_func(struct work_struct *work)
10782 {
10783 	int len;
10784 
10785 	len = __stop_ftrace_eval_maps - __start_ftrace_eval_maps;
10786 	trace_event_update_with_eval_map(NULL, __start_ftrace_eval_maps, len);
10787 }
10788 
10789 static int __init trace_eval_init(void)
10790 {
10791 	INIT_WORK(&eval_map_work, eval_map_work_func);
10792 
10793 	eval_map_wq = alloc_workqueue("eval_map_wq", WQ_UNBOUND, 0);
10794 	if (!eval_map_wq) {
10795 		pr_err("Unable to allocate eval_map_wq\n");
10796 		/* Do work here */
10797 		eval_map_work_func(&eval_map_work);
10798 		return -ENOMEM;
10799 	}
10800 
10801 	queue_work(eval_map_wq, &eval_map_work);
10802 	return 0;
10803 }
10804 
10805 subsys_initcall(trace_eval_init);
10806 
10807 static int __init trace_eval_sync(void)
10808 {
10809 	/* Make sure the eval map updates are finished */
10810 	if (eval_map_wq)
10811 		destroy_workqueue(eval_map_wq);
10812 	return 0;
10813 }
10814 
10815 late_initcall_sync(trace_eval_sync);
10816 
10817 
10818 #ifdef CONFIG_MODULES
10819 
10820 bool module_exists(const char *module)
10821 {
10822 	/* All modules have the symbol __this_module */
10823 	static const char this_mod[] = "__this_module";
10824 	char modname[MODULE_NAME_LEN + sizeof(this_mod) + 2];
10825 	unsigned long val;
10826 	int n;
10827 
10828 	n = snprintf(modname, sizeof(modname), "%s:%s", module, this_mod);
10829 
10830 	if (n > sizeof(modname) - 1)
10831 		return false;
10832 
10833 	val = module_kallsyms_lookup_name(modname);
10834 	return val != 0;
10835 }
10836 
10837 static void trace_module_add_evals(struct module *mod)
10838 {
10839 	/*
10840 	 * Modules with bad taint do not have events created, do
10841 	 * not bother with enums either.
10842 	 */
10843 	if (trace_module_has_bad_taint(mod))
10844 		return;
10845 
10846 	/* Even if no trace_evals, this need to sanitize field types. */
10847 	trace_event_update_with_eval_map(mod, mod->trace_evals, mod->num_trace_evals);
10848 }
10849 
10850 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
10851 static void trace_module_remove_evals(struct module *mod)
10852 {
10853 	union trace_eval_map_item *map;
10854 	union trace_eval_map_item **last = &trace_eval_maps;
10855 
10856 	if (!mod->num_trace_evals)
10857 		return;
10858 
10859 	guard(mutex)(&trace_eval_mutex);
10860 
10861 	map = trace_eval_maps;
10862 
10863 	while (map) {
10864 		if (map->head.mod == mod)
10865 			break;
10866 		map = trace_eval_jmp_to_tail(map);
10867 		last = &map->tail.next;
10868 		map = map->tail.next;
10869 	}
10870 	if (!map)
10871 		return;
10872 
10873 	*last = trace_eval_jmp_to_tail(map)->tail.next;
10874 	kfree(map);
10875 }
10876 #else
10877 static inline void trace_module_remove_evals(struct module *mod) { }
10878 #endif /* CONFIG_TRACE_EVAL_MAP_FILE */
10879 
10880 static void trace_module_record(struct module *mod, bool add)
10881 {
10882 	struct trace_array *tr;
10883 	unsigned long flags;
10884 
10885 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
10886 		flags = tr->flags & (TRACE_ARRAY_FL_BOOT | TRACE_ARRAY_FL_LAST_BOOT);
10887 		/* Update any persistent trace array that has already been started */
10888 		if (flags == TRACE_ARRAY_FL_BOOT && add) {
10889 			guard(mutex)(&scratch_mutex);
10890 			save_mod(mod, tr);
10891 		} else if (flags & TRACE_ARRAY_FL_LAST_BOOT) {
10892 			/* Update delta if the module loaded in previous boot */
10893 			make_mod_delta(mod, tr);
10894 		}
10895 	}
10896 }
10897 
10898 static int trace_module_notify(struct notifier_block *self,
10899 			       unsigned long val, void *data)
10900 {
10901 	struct module *mod = data;
10902 
10903 	switch (val) {
10904 	case MODULE_STATE_COMING:
10905 		trace_module_add_evals(mod);
10906 		trace_module_record(mod, true);
10907 		break;
10908 	case MODULE_STATE_GOING:
10909 		trace_module_remove_evals(mod);
10910 		trace_module_record(mod, false);
10911 		break;
10912 	}
10913 
10914 	return NOTIFY_OK;
10915 }
10916 
10917 static struct notifier_block trace_module_nb = {
10918 	.notifier_call = trace_module_notify,
10919 	.priority = 0,
10920 };
10921 #endif /* CONFIG_MODULES */
10922 
10923 static __init void tracer_init_tracefs_work_func(struct work_struct *work)
10924 {
10925 
10926 	event_trace_init();
10927 
10928 	init_tracer_tracefs(&global_trace, NULL);
10929 	ftrace_init_tracefs_toplevel(&global_trace, NULL);
10930 
10931 	trace_create_file("tracing_thresh", TRACE_MODE_WRITE, NULL,
10932 			&global_trace, &tracing_thresh_fops);
10933 
10934 	trace_create_file("README", TRACE_MODE_READ, NULL,
10935 			NULL, &tracing_readme_fops);
10936 
10937 	trace_create_file("saved_cmdlines", TRACE_MODE_READ, NULL,
10938 			NULL, &tracing_saved_cmdlines_fops);
10939 
10940 	trace_create_file("saved_cmdlines_size", TRACE_MODE_WRITE, NULL,
10941 			  NULL, &tracing_saved_cmdlines_size_fops);
10942 
10943 	trace_create_file("saved_tgids", TRACE_MODE_READ, NULL,
10944 			NULL, &tracing_saved_tgids_fops);
10945 
10946 	trace_create_eval_file(NULL);
10947 
10948 #ifdef CONFIG_MODULES
10949 	register_module_notifier(&trace_module_nb);
10950 #endif
10951 
10952 #ifdef CONFIG_DYNAMIC_FTRACE
10953 	trace_create_file("dyn_ftrace_total_info", TRACE_MODE_READ, NULL,
10954 			NULL, &tracing_dyn_info_fops);
10955 #endif
10956 
10957 	create_trace_instances(NULL);
10958 
10959 	update_tracer_options();
10960 }
10961 
10962 static __init int tracer_init_tracefs(void)
10963 {
10964 	int ret;
10965 
10966 	trace_access_lock_init();
10967 
10968 	ret = tracing_init_dentry();
10969 	if (ret)
10970 		return 0;
10971 
10972 	if (eval_map_wq) {
10973 		INIT_WORK(&tracerfs_init_work, tracer_init_tracefs_work_func);
10974 		queue_work(eval_map_wq, &tracerfs_init_work);
10975 	} else {
10976 		tracer_init_tracefs_work_func(NULL);
10977 	}
10978 
10979 	if (rv_init_interface())
10980 		pr_err("RV: Error while creating the RV interface\n");
10981 
10982 	return 0;
10983 }
10984 
10985 fs_initcall(tracer_init_tracefs);
10986 
10987 static int trace_die_panic_handler(struct notifier_block *self,
10988 				unsigned long ev, void *unused);
10989 
10990 static struct notifier_block trace_panic_notifier = {
10991 	.notifier_call = trace_die_panic_handler,
10992 	.priority = INT_MAX - 1,
10993 };
10994 
10995 static struct notifier_block trace_die_notifier = {
10996 	.notifier_call = trace_die_panic_handler,
10997 	.priority = INT_MAX - 1,
10998 };
10999 
11000 /*
11001  * The idea is to execute the following die/panic callback early, in order
11002  * to avoid showing irrelevant information in the trace (like other panic
11003  * notifier functions); we are the 2nd to run, after hung_task/rcu_stall
11004  * warnings get disabled (to prevent potential log flooding).
11005  */
11006 static int trace_die_panic_handler(struct notifier_block *self,
11007 				unsigned long ev, void *unused)
11008 {
11009 	if (!ftrace_dump_on_oops_enabled())
11010 		return NOTIFY_DONE;
11011 
11012 	/* The die notifier requires DIE_OOPS to trigger */
11013 	if (self == &trace_die_notifier && ev != DIE_OOPS)
11014 		return NOTIFY_DONE;
11015 
11016 	ftrace_dump(DUMP_PARAM);
11017 
11018 	return NOTIFY_DONE;
11019 }
11020 
11021 /*
11022  * printk is set to max of 1024, we really don't need it that big.
11023  * Nothing should be printing 1000 characters anyway.
11024  */
11025 #define TRACE_MAX_PRINT		1000
11026 
11027 /*
11028  * Define here KERN_TRACE so that we have one place to modify
11029  * it if we decide to change what log level the ftrace dump
11030  * should be at.
11031  */
11032 #define KERN_TRACE		KERN_EMERG
11033 
11034 void
11035 trace_printk_seq(struct trace_seq *s)
11036 {
11037 	/* Probably should print a warning here. */
11038 	if (s->seq.len >= TRACE_MAX_PRINT)
11039 		s->seq.len = TRACE_MAX_PRINT;
11040 
11041 	/*
11042 	 * More paranoid code. Although the buffer size is set to
11043 	 * PAGE_SIZE, and TRACE_MAX_PRINT is 1000, this is just
11044 	 * an extra layer of protection.
11045 	 */
11046 	if (WARN_ON_ONCE(s->seq.len >= s->seq.size))
11047 		s->seq.len = s->seq.size - 1;
11048 
11049 	/* should be zero ended, but we are paranoid. */
11050 	s->buffer[s->seq.len] = 0;
11051 
11052 	printk(KERN_TRACE "%s", s->buffer);
11053 
11054 	trace_seq_init(s);
11055 }
11056 
11057 static void trace_init_iter(struct trace_iterator *iter, struct trace_array *tr)
11058 {
11059 	iter->tr = tr;
11060 	iter->trace = iter->tr->current_trace;
11061 	iter->cpu_file = RING_BUFFER_ALL_CPUS;
11062 	iter->array_buffer = &tr->array_buffer;
11063 
11064 	if (iter->trace && iter->trace->open)
11065 		iter->trace->open(iter);
11066 
11067 	/* Annotate start of buffers if we had overruns */
11068 	if (ring_buffer_overruns(iter->array_buffer->buffer))
11069 		iter->iter_flags |= TRACE_FILE_ANNOTATE;
11070 
11071 	/* Output in nanoseconds only if we are using a clock in nanoseconds. */
11072 	if (trace_clocks[iter->tr->clock_id].in_ns)
11073 		iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
11074 
11075 	/* Can not use kmalloc for iter.temp and iter.fmt */
11076 	iter->temp = static_temp_buf;
11077 	iter->temp_size = STATIC_TEMP_BUF_SIZE;
11078 	iter->fmt = static_fmt_buf;
11079 	iter->fmt_size = STATIC_FMT_BUF_SIZE;
11080 }
11081 
11082 void trace_init_global_iter(struct trace_iterator *iter)
11083 {
11084 	trace_init_iter(iter, &global_trace);
11085 }
11086 
11087 static void ftrace_dump_one(struct trace_array *tr, enum ftrace_dump_mode dump_mode)
11088 {
11089 	/* use static because iter can be a bit big for the stack */
11090 	static struct trace_iterator iter;
11091 	unsigned int old_userobj;
11092 	unsigned long flags;
11093 	int cnt = 0;
11094 
11095 	/*
11096 	 * Always turn off tracing when we dump.
11097 	 * We don't need to show trace output of what happens
11098 	 * between multiple crashes.
11099 	 *
11100 	 * If the user does a sysrq-z, then they can re-enable
11101 	 * tracing with echo 1 > tracing_on.
11102 	 */
11103 	tracer_tracing_off(tr);
11104 
11105 	local_irq_save(flags);
11106 
11107 	/* Simulate the iterator */
11108 	trace_init_iter(&iter, tr);
11109 
11110 	/* While dumping, do not allow the buffer to be enable */
11111 	tracer_tracing_disable(tr);
11112 
11113 	old_userobj = tr->trace_flags & TRACE_ITER(SYM_USEROBJ);
11114 
11115 	/* don't look at user memory in panic mode */
11116 	tr->trace_flags &= ~TRACE_ITER(SYM_USEROBJ);
11117 
11118 	if (dump_mode == DUMP_ORIG)
11119 		iter.cpu_file = raw_smp_processor_id();
11120 	else
11121 		iter.cpu_file = RING_BUFFER_ALL_CPUS;
11122 
11123 	if (tr == &global_trace)
11124 		printk(KERN_TRACE "Dumping ftrace buffer:\n");
11125 	else
11126 		printk(KERN_TRACE "Dumping ftrace instance %s buffer:\n", tr->name);
11127 
11128 	/* Did function tracer already get disabled? */
11129 	if (ftrace_is_dead()) {
11130 		printk("# WARNING: FUNCTION TRACING IS CORRUPTED\n");
11131 		printk("#          MAY BE MISSING FUNCTION EVENTS\n");
11132 	}
11133 
11134 	/*
11135 	 * We need to stop all tracing on all CPUS to read
11136 	 * the next buffer. This is a bit expensive, but is
11137 	 * not done often. We fill all what we can read,
11138 	 * and then release the locks again.
11139 	 */
11140 
11141 	while (!trace_empty(&iter)) {
11142 
11143 		if (!cnt)
11144 			printk(KERN_TRACE "---------------------------------\n");
11145 
11146 		cnt++;
11147 
11148 		trace_iterator_reset(&iter);
11149 		iter.iter_flags |= TRACE_FILE_LAT_FMT;
11150 
11151 		if (trace_find_next_entry_inc(&iter) != NULL) {
11152 			int ret;
11153 
11154 			ret = print_trace_line(&iter);
11155 			if (ret != TRACE_TYPE_NO_CONSUME)
11156 				trace_consume(&iter);
11157 
11158 			trace_printk_seq(&iter.seq);
11159 		}
11160 		touch_nmi_watchdog();
11161 	}
11162 
11163 	if (!cnt)
11164 		printk(KERN_TRACE "   (ftrace buffer empty)\n");
11165 	else
11166 		printk(KERN_TRACE "---------------------------------\n");
11167 
11168 	tr->trace_flags |= old_userobj;
11169 
11170 	tracer_tracing_enable(tr);
11171 	local_irq_restore(flags);
11172 }
11173 
11174 static void ftrace_dump_by_param(void)
11175 {
11176 	bool first_param = true;
11177 	char dump_param[MAX_TRACER_SIZE];
11178 	char *buf, *token, *inst_name;
11179 	struct trace_array *tr;
11180 
11181 	strscpy(dump_param, ftrace_dump_on_oops, MAX_TRACER_SIZE);
11182 	buf = dump_param;
11183 
11184 	while ((token = strsep(&buf, ",")) != NULL) {
11185 		if (first_param) {
11186 			first_param = false;
11187 			if (!strcmp("0", token))
11188 				continue;
11189 			else if (!strcmp("1", token)) {
11190 				ftrace_dump_one(&global_trace, DUMP_ALL);
11191 				continue;
11192 			}
11193 			else if (!strcmp("2", token) ||
11194 			  !strcmp("orig_cpu", token)) {
11195 				ftrace_dump_one(&global_trace, DUMP_ORIG);
11196 				continue;
11197 			}
11198 		}
11199 
11200 		inst_name = strsep(&token, "=");
11201 		tr = trace_array_find(inst_name);
11202 		if (!tr) {
11203 			printk(KERN_TRACE "Instance %s not found\n", inst_name);
11204 			continue;
11205 		}
11206 
11207 		if (token && (!strcmp("2", token) ||
11208 			  !strcmp("orig_cpu", token)))
11209 			ftrace_dump_one(tr, DUMP_ORIG);
11210 		else
11211 			ftrace_dump_one(tr, DUMP_ALL);
11212 	}
11213 }
11214 
11215 void ftrace_dump(enum ftrace_dump_mode oops_dump_mode)
11216 {
11217 	static atomic_t dump_running;
11218 
11219 	/* Only allow one dump user at a time. */
11220 	if (atomic_inc_return(&dump_running) != 1) {
11221 		atomic_dec(&dump_running);
11222 		return;
11223 	}
11224 
11225 	switch (oops_dump_mode) {
11226 	case DUMP_ALL:
11227 		ftrace_dump_one(&global_trace, DUMP_ALL);
11228 		break;
11229 	case DUMP_ORIG:
11230 		ftrace_dump_one(&global_trace, DUMP_ORIG);
11231 		break;
11232 	case DUMP_PARAM:
11233 		ftrace_dump_by_param();
11234 		break;
11235 	case DUMP_NONE:
11236 		break;
11237 	default:
11238 		printk(KERN_TRACE "Bad dumping mode, switching to all CPUs dump\n");
11239 		ftrace_dump_one(&global_trace, DUMP_ALL);
11240 	}
11241 
11242 	atomic_dec(&dump_running);
11243 }
11244 EXPORT_SYMBOL_GPL(ftrace_dump);
11245 
11246 #define WRITE_BUFSIZE  4096
11247 
11248 ssize_t trace_parse_run_command(struct file *file, const char __user *buffer,
11249 				size_t count, loff_t *ppos,
11250 				int (*createfn)(const char *))
11251 {
11252 	char *kbuf __free(kfree) = NULL;
11253 	char *buf, *tmp;
11254 	int ret = 0;
11255 	size_t done = 0;
11256 	size_t size;
11257 
11258 	kbuf = kmalloc(WRITE_BUFSIZE, GFP_KERNEL);
11259 	if (!kbuf)
11260 		return -ENOMEM;
11261 
11262 	while (done < count) {
11263 		size = count - done;
11264 
11265 		if (size >= WRITE_BUFSIZE)
11266 			size = WRITE_BUFSIZE - 1;
11267 
11268 		if (copy_from_user(kbuf, buffer + done, size))
11269 			return -EFAULT;
11270 
11271 		kbuf[size] = '\0';
11272 		buf = kbuf;
11273 		do {
11274 			tmp = strchr(buf, '\n');
11275 			if (tmp) {
11276 				*tmp = '\0';
11277 				size = tmp - buf + 1;
11278 			} else {
11279 				size = strlen(buf);
11280 				if (done + size < count) {
11281 					if (buf != kbuf)
11282 						break;
11283 					/* This can accept WRITE_BUFSIZE - 2 ('\n' + '\0') */
11284 					pr_warn("Line length is too long: Should be less than %d\n",
11285 						WRITE_BUFSIZE - 2);
11286 					return -EINVAL;
11287 				}
11288 			}
11289 			done += size;
11290 
11291 			/* Remove comments */
11292 			tmp = strchr(buf, '#');
11293 
11294 			if (tmp)
11295 				*tmp = '\0';
11296 
11297 			ret = createfn(buf);
11298 			if (ret)
11299 				return ret;
11300 			buf += size;
11301 
11302 		} while (done < count);
11303 	}
11304 	return done;
11305 }
11306 
11307 #ifdef CONFIG_TRACER_MAX_TRACE
11308 __init static bool tr_needs_alloc_snapshot(const char *name)
11309 {
11310 	char *test;
11311 	int len = strlen(name);
11312 	bool ret;
11313 
11314 	if (!boot_snapshot_index)
11315 		return false;
11316 
11317 	if (strncmp(name, boot_snapshot_info, len) == 0 &&
11318 	    boot_snapshot_info[len] == '\t')
11319 		return true;
11320 
11321 	test = kmalloc(strlen(name) + 3, GFP_KERNEL);
11322 	if (!test)
11323 		return false;
11324 
11325 	sprintf(test, "\t%s\t", name);
11326 	ret = strstr(boot_snapshot_info, test) == NULL;
11327 	kfree(test);
11328 	return ret;
11329 }
11330 
11331 __init static void do_allocate_snapshot(const char *name)
11332 {
11333 	if (!tr_needs_alloc_snapshot(name))
11334 		return;
11335 
11336 	/*
11337 	 * When allocate_snapshot is set, the next call to
11338 	 * allocate_trace_buffers() (called by trace_array_get_by_name())
11339 	 * will allocate the snapshot buffer. That will also clear
11340 	 * this flag.
11341 	 */
11342 	allocate_snapshot = true;
11343 }
11344 #else
11345 static inline void do_allocate_snapshot(const char *name) { }
11346 #endif
11347 
11348 __init static int backup_instance_area(const char *backup,
11349 				       unsigned long *addr, phys_addr_t *size)
11350 {
11351 	struct trace_array *backup_tr;
11352 	void *allocated_vaddr = NULL;
11353 
11354 	backup_tr = trace_array_get_by_name(backup, NULL);
11355 	if (!backup_tr) {
11356 		pr_warn("Tracing: Instance %s is not found.\n", backup);
11357 		return -ENOENT;
11358 	}
11359 
11360 	if (!(backup_tr->flags & TRACE_ARRAY_FL_BOOT)) {
11361 		pr_warn("Tracing: Instance %s is not boot mapped.\n", backup);
11362 		trace_array_put(backup_tr);
11363 		return -EINVAL;
11364 	}
11365 
11366 	*size = backup_tr->range_addr_size;
11367 
11368 	allocated_vaddr = vzalloc(*size);
11369 	if (!allocated_vaddr) {
11370 		pr_warn("Tracing: Failed to allocate memory for copying instance %s (size 0x%lx)\n",
11371 			backup, (unsigned long)*size);
11372 		trace_array_put(backup_tr);
11373 		return -ENOMEM;
11374 	}
11375 
11376 	memcpy(allocated_vaddr,
11377 		(void *)backup_tr->range_addr_start, (size_t)*size);
11378 	*addr = (unsigned long)allocated_vaddr;
11379 
11380 	trace_array_put(backup_tr);
11381 	return 0;
11382 }
11383 
11384 __init static void enable_instances(void)
11385 {
11386 	struct trace_array *tr;
11387 	bool memmap_area = false;
11388 	char *curr_str;
11389 	char *name;
11390 	char *str;
11391 	char *tok;
11392 
11393 	/* A tab is always appended */
11394 	boot_instance_info[boot_instance_index - 1] = '\0';
11395 	str = boot_instance_info;
11396 
11397 	while ((curr_str = strsep(&str, "\t"))) {
11398 		phys_addr_t start = 0;
11399 		phys_addr_t size = 0;
11400 		unsigned long addr = 0;
11401 		bool traceprintk = false;
11402 		bool traceoff = false;
11403 		char *flag_delim;
11404 		char *addr_delim;
11405 		char *rname __free(kfree) = NULL;
11406 		char *backup;
11407 
11408 		tok = strsep(&curr_str, ",");
11409 
11410 		name = strsep(&tok, "=");
11411 		backup = tok;
11412 
11413 		flag_delim = strchr(name, '^');
11414 		addr_delim = strchr(name, '@');
11415 
11416 		if (addr_delim)
11417 			*addr_delim++ = '\0';
11418 
11419 		if (flag_delim)
11420 			*flag_delim++ = '\0';
11421 
11422 		if (backup) {
11423 			if (backup_instance_area(backup, &addr, &size) < 0)
11424 				continue;
11425 		}
11426 
11427 		if (flag_delim) {
11428 			char *flag;
11429 
11430 			while ((flag = strsep(&flag_delim, "^"))) {
11431 				if (strcmp(flag, "traceoff") == 0) {
11432 					traceoff = true;
11433 				} else if ((strcmp(flag, "printk") == 0) ||
11434 					   (strcmp(flag, "traceprintk") == 0) ||
11435 					   (strcmp(flag, "trace_printk") == 0)) {
11436 					traceprintk = true;
11437 				} else {
11438 					pr_info("Tracing: Invalid instance flag '%s' for %s\n",
11439 						flag, name);
11440 				}
11441 			}
11442 		}
11443 
11444 		tok = addr_delim;
11445 		if (tok && isdigit(*tok)) {
11446 			start = memparse(tok, &tok);
11447 			if (!start) {
11448 				pr_warn("Tracing: Invalid boot instance address for %s\n",
11449 					name);
11450 				continue;
11451 			}
11452 			if (*tok != ':') {
11453 				pr_warn("Tracing: No size specified for instance %s\n", name);
11454 				continue;
11455 			}
11456 			tok++;
11457 			size = memparse(tok, &tok);
11458 			if (!size) {
11459 				pr_warn("Tracing: Invalid boot instance size for %s\n",
11460 					name);
11461 				continue;
11462 			}
11463 			memmap_area = true;
11464 		} else if (tok) {
11465 			if (!reserve_mem_find_by_name(tok, &start, &size)) {
11466 				start = 0;
11467 				pr_warn("Failed to map boot instance %s to %s\n", name, tok);
11468 				continue;
11469 			}
11470 			rname = kstrdup(tok, GFP_KERNEL);
11471 		}
11472 
11473 		if (start) {
11474 			/* Start and size must be page aligned */
11475 			if (start & ~PAGE_MASK) {
11476 				pr_warn("Tracing: mapping start addr %pa is not page aligned\n", &start);
11477 				continue;
11478 			}
11479 			if (size & ~PAGE_MASK) {
11480 				pr_warn("Tracing: mapping size %pa is not page aligned\n", &size);
11481 				continue;
11482 			}
11483 
11484 			if (memmap_area)
11485 				addr = map_pages(start, size);
11486 			else
11487 				addr = (unsigned long)phys_to_virt(start);
11488 			if (addr) {
11489 				pr_info("Tracing: mapped boot instance %s at physical memory %pa of size 0x%lx\n",
11490 					name, &start, (unsigned long)size);
11491 			} else {
11492 				pr_warn("Tracing: Failed to map boot instance %s\n", name);
11493 				continue;
11494 			}
11495 		} else {
11496 			/* Only non mapped buffers have snapshot buffers */
11497 			if (IS_ENABLED(CONFIG_TRACER_MAX_TRACE))
11498 				do_allocate_snapshot(name);
11499 		}
11500 
11501 		tr = trace_array_create_systems(name, NULL, addr, size);
11502 		if (IS_ERR(tr)) {
11503 			pr_warn("Tracing: Failed to create instance buffer %s\n", curr_str);
11504 			continue;
11505 		}
11506 
11507 		if (traceoff)
11508 			tracer_tracing_off(tr);
11509 
11510 		if (traceprintk)
11511 			update_printk_trace(tr);
11512 
11513 		/*
11514 		 * memmap'd buffers can not be freed.
11515 		 */
11516 		if (memmap_area) {
11517 			tr->flags |= TRACE_ARRAY_FL_MEMMAP;
11518 			tr->ref++;
11519 		}
11520 
11521 		/*
11522 		 * Backup buffers can be freed but need vfree().
11523 		 */
11524 		if (backup)
11525 			tr->flags |= TRACE_ARRAY_FL_VMALLOC;
11526 
11527 		if (start || backup) {
11528 			tr->flags |= TRACE_ARRAY_FL_BOOT | TRACE_ARRAY_FL_LAST_BOOT;
11529 			tr->range_name = no_free_ptr(rname);
11530 		}
11531 
11532 		while ((tok = strsep(&curr_str, ","))) {
11533 			early_enable_events(tr, tok, true);
11534 		}
11535 	}
11536 }
11537 
11538 __init static int tracer_alloc_buffers(void)
11539 {
11540 	int ring_buf_size;
11541 	int ret = -ENOMEM;
11542 
11543 
11544 	if (security_locked_down(LOCKDOWN_TRACEFS)) {
11545 		pr_warn("Tracing disabled due to lockdown\n");
11546 		return -EPERM;
11547 	}
11548 
11549 	/*
11550 	 * Make sure we don't accidentally add more trace options
11551 	 * than we have bits for.
11552 	 */
11553 	BUILD_BUG_ON(TRACE_ITER_LAST_BIT > TRACE_FLAGS_MAX_SIZE);
11554 
11555 	if (!alloc_cpumask_var(&tracing_buffer_mask, GFP_KERNEL))
11556 		return -ENOMEM;
11557 
11558 	if (!alloc_cpumask_var(&global_trace.tracing_cpumask, GFP_KERNEL))
11559 		goto out_free_buffer_mask;
11560 
11561 	/* Only allocate trace_printk buffers if a trace_printk exists */
11562 	if (&__stop___trace_bprintk_fmt != &__start___trace_bprintk_fmt)
11563 		/* Must be called before global_trace.buffer is allocated */
11564 		trace_printk_init_buffers();
11565 
11566 	/* To save memory, keep the ring buffer size to its minimum */
11567 	if (global_trace.ring_buffer_expanded)
11568 		ring_buf_size = trace_buf_size;
11569 	else
11570 		ring_buf_size = 1;
11571 
11572 	cpumask_copy(tracing_buffer_mask, cpu_possible_mask);
11573 	cpumask_copy(global_trace.tracing_cpumask, cpu_all_mask);
11574 
11575 	raw_spin_lock_init(&global_trace.start_lock);
11576 
11577 	/*
11578 	 * The prepare callbacks allocates some memory for the ring buffer. We
11579 	 * don't free the buffer if the CPU goes down. If we were to free
11580 	 * the buffer, then the user would lose any trace that was in the
11581 	 * buffer. The memory will be removed once the "instance" is removed.
11582 	 */
11583 	ret = cpuhp_setup_state_multi(CPUHP_TRACE_RB_PREPARE,
11584 				      "trace/RB:prepare", trace_rb_cpu_prepare,
11585 				      NULL);
11586 	if (ret < 0)
11587 		goto out_free_cpumask;
11588 	/* Used for event triggers */
11589 	ret = -ENOMEM;
11590 	temp_buffer = ring_buffer_alloc(PAGE_SIZE, RB_FL_OVERWRITE);
11591 	if (!temp_buffer)
11592 		goto out_rm_hp_state;
11593 
11594 	if (trace_create_savedcmd() < 0)
11595 		goto out_free_temp_buffer;
11596 
11597 	if (!zalloc_cpumask_var(&global_trace.pipe_cpumask, GFP_KERNEL))
11598 		goto out_free_savedcmd;
11599 
11600 	/* TODO: make the number of buffers hot pluggable with CPUS */
11601 	if (allocate_trace_buffers(&global_trace, ring_buf_size) < 0) {
11602 		MEM_FAIL(1, "tracer: failed to allocate ring buffer!\n");
11603 		goto out_free_pipe_cpumask;
11604 	}
11605 	if (global_trace.buffer_disabled)
11606 		tracing_off();
11607 
11608 	if (trace_boot_clock) {
11609 		ret = tracing_set_clock(&global_trace, trace_boot_clock);
11610 		if (ret < 0)
11611 			pr_warn("Trace clock %s not defined, going back to default\n",
11612 				trace_boot_clock);
11613 	}
11614 
11615 	/*
11616 	 * register_tracer() might reference current_trace, so it
11617 	 * needs to be set before we register anything. This is
11618 	 * just a bootstrap of current_trace anyway.
11619 	 */
11620 	global_trace.current_trace = &nop_trace;
11621 	global_trace.current_trace_flags = nop_trace.flags;
11622 
11623 	global_trace.max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
11624 #ifdef CONFIG_TRACER_MAX_TRACE
11625 	spin_lock_init(&global_trace.snapshot_trigger_lock);
11626 #endif
11627 	ftrace_init_global_array_ops(&global_trace);
11628 
11629 #ifdef CONFIG_MODULES
11630 	INIT_LIST_HEAD(&global_trace.mod_events);
11631 #endif
11632 
11633 	init_trace_flags_index(&global_trace);
11634 
11635 	INIT_LIST_HEAD(&global_trace.tracers);
11636 
11637 	/* All seems OK, enable tracing */
11638 	tracing_disabled = 0;
11639 
11640 	atomic_notifier_chain_register(&panic_notifier_list,
11641 				       &trace_panic_notifier);
11642 
11643 	register_die_notifier(&trace_die_notifier);
11644 
11645 	global_trace.flags = TRACE_ARRAY_FL_GLOBAL;
11646 
11647 	global_trace.syscall_buf_sz = syscall_buf_size;
11648 
11649 	INIT_LIST_HEAD(&global_trace.systems);
11650 	INIT_LIST_HEAD(&global_trace.events);
11651 	INIT_LIST_HEAD(&global_trace.hist_vars);
11652 	INIT_LIST_HEAD(&global_trace.err_log);
11653 	list_add(&global_trace.marker_list, &marker_copies);
11654 	list_add(&global_trace.list, &ftrace_trace_arrays);
11655 
11656 	register_tracer(&nop_trace);
11657 
11658 	/* Function tracing may start here (via kernel command line) */
11659 	init_function_trace();
11660 
11661 	apply_trace_boot_options();
11662 
11663 	register_snapshot_cmd();
11664 
11665 	return 0;
11666 
11667 out_free_pipe_cpumask:
11668 	free_cpumask_var(global_trace.pipe_cpumask);
11669 out_free_savedcmd:
11670 	trace_free_saved_cmdlines_buffer();
11671 out_free_temp_buffer:
11672 	ring_buffer_free(temp_buffer);
11673 out_rm_hp_state:
11674 	cpuhp_remove_multi_state(CPUHP_TRACE_RB_PREPARE);
11675 out_free_cpumask:
11676 	free_cpumask_var(global_trace.tracing_cpumask);
11677 out_free_buffer_mask:
11678 	free_cpumask_var(tracing_buffer_mask);
11679 	return ret;
11680 }
11681 
11682 #ifdef CONFIG_FUNCTION_TRACER
11683 /* Used to set module cached ftrace filtering at boot up */
11684 struct trace_array *trace_get_global_array(void)
11685 {
11686 	return &global_trace;
11687 }
11688 #endif
11689 
11690 void __init ftrace_boot_snapshot(void)
11691 {
11692 #ifdef CONFIG_TRACER_MAX_TRACE
11693 	struct trace_array *tr;
11694 
11695 	if (!snapshot_at_boot)
11696 		return;
11697 
11698 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
11699 		if (!tr->allocated_snapshot)
11700 			continue;
11701 
11702 		tracing_snapshot_instance(tr);
11703 		trace_array_puts(tr, "** Boot snapshot taken **\n");
11704 	}
11705 #endif
11706 }
11707 
11708 void __init early_trace_init(void)
11709 {
11710 	if (tracepoint_printk) {
11711 		tracepoint_print_iter =
11712 			kzalloc(sizeof(*tracepoint_print_iter), GFP_KERNEL);
11713 		if (MEM_FAIL(!tracepoint_print_iter,
11714 			     "Failed to allocate trace iterator\n"))
11715 			tracepoint_printk = 0;
11716 		else
11717 			static_key_enable(&tracepoint_printk_key.key);
11718 	}
11719 	tracer_alloc_buffers();
11720 
11721 	init_events();
11722 }
11723 
11724 void __init trace_init(void)
11725 {
11726 	trace_event_init();
11727 
11728 	if (boot_instance_index)
11729 		enable_instances();
11730 }
11731 
11732 __init static void clear_boot_tracer(void)
11733 {
11734 	/*
11735 	 * The default tracer at boot buffer is an init section.
11736 	 * This function is called in lateinit. If we did not
11737 	 * find the boot tracer, then clear it out, to prevent
11738 	 * later registration from accessing the buffer that is
11739 	 * about to be freed.
11740 	 */
11741 	if (!default_bootup_tracer)
11742 		return;
11743 
11744 	printk(KERN_INFO "ftrace bootup tracer '%s' not registered.\n",
11745 	       default_bootup_tracer);
11746 	default_bootup_tracer = NULL;
11747 }
11748 
11749 #ifdef CONFIG_HAVE_UNSTABLE_SCHED_CLOCK
11750 __init static void tracing_set_default_clock(void)
11751 {
11752 	/* sched_clock_stable() is determined in late_initcall */
11753 	if (!trace_boot_clock && !sched_clock_stable()) {
11754 		if (security_locked_down(LOCKDOWN_TRACEFS)) {
11755 			pr_warn("Can not set tracing clock due to lockdown\n");
11756 			return;
11757 		}
11758 
11759 		printk(KERN_WARNING
11760 		       "Unstable clock detected, switching default tracing clock to \"global\"\n"
11761 		       "If you want to keep using the local clock, then add:\n"
11762 		       "  \"trace_clock=local\"\n"
11763 		       "on the kernel command line\n");
11764 		tracing_set_clock(&global_trace, "global");
11765 	}
11766 }
11767 #else
11768 static inline void tracing_set_default_clock(void) { }
11769 #endif
11770 
11771 __init static int late_trace_init(void)
11772 {
11773 	if (tracepoint_printk && tracepoint_printk_stop_on_boot) {
11774 		static_key_disable(&tracepoint_printk_key.key);
11775 		tracepoint_printk = 0;
11776 	}
11777 
11778 	if (traceoff_after_boot)
11779 		tracing_off();
11780 
11781 	tracing_set_default_clock();
11782 	clear_boot_tracer();
11783 	return 0;
11784 }
11785 
11786 late_initcall_sync(late_trace_init);
11787