xref: /linux/kernel/trace/trace.c (revision 9ecfb2f7287a967b418ba69f10d45ead0d360593)
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * ring buffer based function tracer
4  *
5  * Copyright (C) 2007-2012 Steven Rostedt <srostedt@redhat.com>
6  * Copyright (C) 2008 Ingo Molnar <mingo@redhat.com>
7  *
8  * Originally taken from the RT patch by:
9  *    Arnaldo Carvalho de Melo <acme@redhat.com>
10  *
11  * Based on code from the latency_tracer, that is:
12  *  Copyright (C) 2004-2006 Ingo Molnar
13  *  Copyright (C) 2004 Nadia Yvette Chambers
14  */
15 #include <linux/ring_buffer.h>
16 #include <linux/utsname.h>
17 #include <linux/stacktrace.h>
18 #include <linux/writeback.h>
19 #include <linux/kallsyms.h>
20 #include <linux/security.h>
21 #include <linux/seq_file.h>
22 #include <linux/irqflags.h>
23 #include <linux/syscalls.h>
24 #include <linux/debugfs.h>
25 #include <linux/tracefs.h>
26 #include <linux/pagemap.h>
27 #include <linux/hardirq.h>
28 #include <linux/linkage.h>
29 #include <linux/uaccess.h>
30 #include <linux/cleanup.h>
31 #include <linux/vmalloc.h>
32 #include <linux/ftrace.h>
33 #include <linux/module.h>
34 #include <linux/percpu.h>
35 #include <linux/splice.h>
36 #include <linux/kdebug.h>
37 #include <linux/string.h>
38 #include <linux/mount.h>
39 #include <linux/rwsem.h>
40 #include <linux/slab.h>
41 #include <linux/ctype.h>
42 #include <linux/init.h>
43 #include <linux/panic_notifier.h>
44 #include <linux/poll.h>
45 #include <linux/nmi.h>
46 #include <linux/fs.h>
47 #include <linux/trace.h>
48 #include <linux/sched/clock.h>
49 #include <linux/sched/rt.h>
50 #include <linux/irq_work.h>
51 #include <linux/workqueue.h>
52 #include <linux/sort.h>
53 #include <linux/io.h> /* vmap_page_range() */
54 #include <linux/fs_context.h>
55 
56 #include <asm/setup.h> /* COMMAND_LINE_SIZE */
57 
58 #include "trace.h"
59 #include "trace_output.h"
60 
61 #ifdef CONFIG_FTRACE_STARTUP_TEST
62 /*
63  * We need to change this state when a selftest is running.
64  * A selftest will lurk into the ring-buffer to count the
65  * entries inserted during the selftest although some concurrent
66  * insertions into the ring-buffer such as trace_printk could occurred
67  * at the same time, giving false positive or negative results.
68  */
69 bool __read_mostly tracing_selftest_running;
70 
71 /*
72  * If boot-time tracing including tracers/events via kernel cmdline
73  * is running, we do not want to run SELFTEST.
74  */
75 bool __read_mostly tracing_selftest_disabled;
76 
77 void __init disable_tracing_selftest(const char *reason)
78 {
79 	if (!tracing_selftest_disabled) {
80 		tracing_selftest_disabled = true;
81 		pr_info("Ftrace startup test is disabled due to %s\n", reason);
82 	}
83 }
84 #else
85 #define tracing_selftest_disabled	0
86 #endif
87 
88 /* Pipe tracepoints to printk */
89 static struct trace_iterator *tracepoint_print_iter;
90 int tracepoint_printk;
91 static bool tracepoint_printk_stop_on_boot __initdata;
92 static bool traceoff_after_boot __initdata;
93 static DEFINE_STATIC_KEY_FALSE(tracepoint_printk_key);
94 
95 /* Store tracers and their flags per instance */
96 struct tracers {
97 	struct list_head	list;
98 	struct tracer		*tracer;
99 	struct tracer_flags	*flags;
100 };
101 
102 /*
103  * To prevent the comm cache from being overwritten when no
104  * tracing is active, only save the comm when a trace event
105  * occurred.
106  */
107 DEFINE_PER_CPU(bool, trace_taskinfo_save);
108 
109 /*
110  * Kill all tracing for good (never come back).
111  * It is initialized to 1 but will turn to zero if the initialization
112  * of the tracer is successful. But that is the only place that sets
113  * this back to zero.
114  */
115 int tracing_disabled = 1;
116 
117 cpumask_var_t __read_mostly	tracing_buffer_mask;
118 
119 #define MAX_TRACER_SIZE		100
120 /*
121  * ftrace_dump_on_oops - variable to dump ftrace buffer on oops
122  *
123  * If there is an oops (or kernel panic) and the ftrace_dump_on_oops
124  * is set, then ftrace_dump is called. This will output the contents
125  * of the ftrace buffers to the console.  This is very useful for
126  * capturing traces that lead to crashes and outputting it to a
127  * serial console.
128  *
129  * It is default off, but you can enable it with either specifying
130  * "ftrace_dump_on_oops" in the kernel command line, or setting
131  * /proc/sys/kernel/ftrace_dump_on_oops
132  * Set 1 if you want to dump buffers of all CPUs
133  * Set 2 if you want to dump the buffer of the CPU that triggered oops
134  * Set instance name if you want to dump the specific trace instance
135  * Multiple instance dump is also supported, and instances are separated
136  * by commas.
137  */
138 /* Set to string format zero to disable by default */
139 static char ftrace_dump_on_oops[MAX_TRACER_SIZE] = "0";
140 
141 /* When set, tracing will stop when a WARN*() is hit */
142 static int __disable_trace_on_warning;
143 
144 int tracepoint_printk_sysctl(const struct ctl_table *table, int write,
145 			     void *buffer, size_t *lenp, loff_t *ppos);
146 static const struct ctl_table trace_sysctl_table[] = {
147 	{
148 		.procname	= "ftrace_dump_on_oops",
149 		.data		= &ftrace_dump_on_oops,
150 		.maxlen		= MAX_TRACER_SIZE,
151 		.mode		= 0644,
152 		.proc_handler	= proc_dostring,
153 	},
154 	{
155 		.procname	= "traceoff_on_warning",
156 		.data		= &__disable_trace_on_warning,
157 		.maxlen		= sizeof(__disable_trace_on_warning),
158 		.mode		= 0644,
159 		.proc_handler	= proc_dointvec,
160 	},
161 	{
162 		.procname	= "tracepoint_printk",
163 		.data		= &tracepoint_printk,
164 		.maxlen		= sizeof(tracepoint_printk),
165 		.mode		= 0644,
166 		.proc_handler	= tracepoint_printk_sysctl,
167 	},
168 };
169 
170 static int __init init_trace_sysctls(void)
171 {
172 	register_sysctl_init("kernel", trace_sysctl_table);
173 	return 0;
174 }
175 subsys_initcall(init_trace_sysctls);
176 
177 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
178 /* Map of enums to their values, for "eval_map" file */
179 struct trace_eval_map_head {
180 	struct module			*mod;
181 	unsigned long			length;
182 };
183 
184 union trace_eval_map_item;
185 
186 struct trace_eval_map_tail {
187 	/*
188 	 * "end" is first and points to NULL as it must be different
189 	 * than "mod" or "eval_string"
190 	 */
191 	union trace_eval_map_item	*next;
192 	const char			*end;	/* points to NULL */
193 };
194 
195 static DEFINE_MUTEX(trace_eval_mutex);
196 
197 /*
198  * The trace_eval_maps are saved in an array with two extra elements,
199  * one at the beginning, and one at the end. The beginning item contains
200  * the count of the saved maps (head.length), and the module they
201  * belong to if not built in (head.mod). The ending item contains a
202  * pointer to the next array of saved eval_map items.
203  */
204 union trace_eval_map_item {
205 	struct trace_eval_map		map;
206 	struct trace_eval_map_head	head;
207 	struct trace_eval_map_tail	tail;
208 };
209 
210 static union trace_eval_map_item *trace_eval_maps;
211 #endif /* CONFIG_TRACE_EVAL_MAP_FILE */
212 
213 int tracing_set_tracer(struct trace_array *tr, const char *buf);
214 static void ftrace_trace_userstack(struct trace_array *tr,
215 				   struct trace_buffer *buffer,
216 				   unsigned int trace_ctx);
217 
218 static char bootup_tracer_buf[MAX_TRACER_SIZE] __initdata;
219 static char *default_bootup_tracer;
220 
221 static char boot_instance_info[COMMAND_LINE_SIZE] __initdata;
222 static int boot_instance_index;
223 
224 /*
225  * Repeated boot parameters, including Bootconfig array expansions, need
226  * to stay in the delimiter form that the existing parser consumes.
227  */
228 void __init trace_append_boot_param(char *buf, const char *str, char sep,
229 				    int size)
230 {
231 	int len, needed, str_len;
232 
233 	if (!*str)
234 		return;
235 
236 	len = strlen(buf);
237 	str_len = strlen(str);
238 	needed = len + str_len + 1;
239 
240 	/* For continuation, account for the separator. */
241 	if (len)
242 		needed++;
243 	if (needed > size)
244 		return;
245 
246 	if (len)
247 		buf[len++] = sep;
248 
249 	strscpy(buf + len, str, size - len);
250 }
251 
252 static int __init set_cmdline_ftrace(char *str)
253 {
254 	strscpy(bootup_tracer_buf, str, MAX_TRACER_SIZE);
255 	default_bootup_tracer = bootup_tracer_buf;
256 	/* We are using ftrace early, expand it */
257 	trace_set_ring_buffer_expanded(NULL);
258 	return 1;
259 }
260 __setup("ftrace=", set_cmdline_ftrace);
261 
262 int ftrace_dump_on_oops_enabled(void)
263 {
264 	if (!strcmp("0", ftrace_dump_on_oops))
265 		return 0;
266 	else
267 		return 1;
268 }
269 
270 static int __init set_ftrace_dump_on_oops(char *str)
271 {
272 	if (!*str) {
273 		strscpy(ftrace_dump_on_oops, "1", MAX_TRACER_SIZE);
274 		return 1;
275 	}
276 
277 	if (*str == ',') {
278 		strscpy(ftrace_dump_on_oops, "1", MAX_TRACER_SIZE);
279 		strscpy(ftrace_dump_on_oops + 1, str, MAX_TRACER_SIZE - 1);
280 		return 1;
281 	}
282 
283 	if (*str++ == '=') {
284 		strscpy(ftrace_dump_on_oops, str, MAX_TRACER_SIZE);
285 		return 1;
286 	}
287 
288 	return 0;
289 }
290 __setup("ftrace_dump_on_oops", set_ftrace_dump_on_oops);
291 
292 static int __init stop_trace_on_warning(char *str)
293 {
294 	if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
295 		__disable_trace_on_warning = 1;
296 	return 1;
297 }
298 __setup("traceoff_on_warning", stop_trace_on_warning);
299 
300 static int __init boot_instance(char *str)
301 {
302 	char *slot = boot_instance_info + boot_instance_index;
303 	int left = sizeof(boot_instance_info) - boot_instance_index;
304 	int ret;
305 
306 	if (strlen(str) >= left)
307 		return -1;
308 
309 	ret = snprintf(slot, left, "%s\t", str);
310 	boot_instance_index += ret;
311 
312 	return 1;
313 }
314 __setup("trace_instance=", boot_instance);
315 
316 
317 static char trace_boot_options_buf[MAX_TRACER_SIZE] __initdata;
318 
319 static int __init set_trace_boot_options(char *str)
320 {
321 	trace_append_boot_param(trace_boot_options_buf, str, ',',
322 				MAX_TRACER_SIZE);
323 	return 1;
324 }
325 __setup("trace_options=", set_trace_boot_options);
326 
327 static char trace_boot_clock_buf[MAX_TRACER_SIZE] __initdata;
328 static char *trace_boot_clock __initdata;
329 
330 static int __init set_trace_boot_clock(char *str)
331 {
332 	strscpy(trace_boot_clock_buf, str, MAX_TRACER_SIZE);
333 	trace_boot_clock = trace_boot_clock_buf;
334 	return 1;
335 }
336 __setup("trace_clock=", set_trace_boot_clock);
337 
338 static int __init set_tracepoint_printk(char *str)
339 {
340 	/* Ignore the "tp_printk_stop_on_boot" param */
341 	if (*str == '_')
342 		return 0;
343 
344 	if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
345 		tracepoint_printk = 1;
346 	return 1;
347 }
348 __setup("tp_printk", set_tracepoint_printk);
349 
350 static int __init set_tracepoint_printk_stop(char *str)
351 {
352 	tracepoint_printk_stop_on_boot = true;
353 	return 1;
354 }
355 __setup("tp_printk_stop_on_boot", set_tracepoint_printk_stop);
356 
357 static int __init set_traceoff_after_boot(char *str)
358 {
359 	traceoff_after_boot = true;
360 	return 1;
361 }
362 __setup("traceoff_after_boot", set_traceoff_after_boot);
363 
364 unsigned long long ns2usecs(u64 nsec)
365 {
366 	nsec += 500;
367 	do_div(nsec, 1000);
368 	return nsec;
369 }
370 
371 static void
372 trace_process_export(struct trace_export *export,
373 	       struct ring_buffer_event *event, int flag)
374 {
375 	struct trace_entry *entry;
376 	unsigned int size = 0;
377 
378 	if (export->flags & flag) {
379 		entry = ring_buffer_event_data(event);
380 		size = ring_buffer_event_length(event);
381 		export->write(export, entry, size);
382 	}
383 }
384 
385 static DEFINE_MUTEX(ftrace_export_lock);
386 
387 static struct trace_export __rcu *ftrace_exports_list __read_mostly;
388 
389 static DEFINE_STATIC_KEY_FALSE(trace_function_exports_enabled);
390 static DEFINE_STATIC_KEY_FALSE(trace_event_exports_enabled);
391 static DEFINE_STATIC_KEY_FALSE(trace_marker_exports_enabled);
392 
393 static inline void ftrace_exports_enable(struct trace_export *export)
394 {
395 	if (export->flags & TRACE_EXPORT_FUNCTION)
396 		static_branch_inc(&trace_function_exports_enabled);
397 
398 	if (export->flags & TRACE_EXPORT_EVENT)
399 		static_branch_inc(&trace_event_exports_enabled);
400 
401 	if (export->flags & TRACE_EXPORT_MARKER)
402 		static_branch_inc(&trace_marker_exports_enabled);
403 }
404 
405 static inline void ftrace_exports_disable(struct trace_export *export)
406 {
407 	if (export->flags & TRACE_EXPORT_FUNCTION)
408 		static_branch_dec(&trace_function_exports_enabled);
409 
410 	if (export->flags & TRACE_EXPORT_EVENT)
411 		static_branch_dec(&trace_event_exports_enabled);
412 
413 	if (export->flags & TRACE_EXPORT_MARKER)
414 		static_branch_dec(&trace_marker_exports_enabled);
415 }
416 
417 static void ftrace_exports(struct ring_buffer_event *event, int flag)
418 {
419 	struct trace_export *export;
420 
421 	guard(preempt_notrace)();
422 
423 	export = rcu_dereference_raw_check(ftrace_exports_list);
424 	while (export) {
425 		trace_process_export(export, event, flag);
426 		export = rcu_dereference_raw_check(export->next);
427 	}
428 }
429 
430 static inline void
431 add_trace_export(struct trace_export **list, struct trace_export *export)
432 {
433 	rcu_assign_pointer(export->next, *list);
434 	/*
435 	 * We are entering export into the list but another
436 	 * CPU might be walking that list. We need to make sure
437 	 * the export->next pointer is valid before another CPU sees
438 	 * the export pointer included into the list.
439 	 */
440 	rcu_assign_pointer(*list, export);
441 }
442 
443 static inline int
444 rm_trace_export(struct trace_export **list, struct trace_export *export)
445 {
446 	struct trace_export **p;
447 
448 	for (p = list; *p != NULL; p = &(*p)->next)
449 		if (*p == export)
450 			break;
451 
452 	if (*p != export)
453 		return -1;
454 
455 	rcu_assign_pointer(*p, (*p)->next);
456 
457 	return 0;
458 }
459 
460 static inline void
461 add_ftrace_export(struct trace_export **list, struct trace_export *export)
462 {
463 	ftrace_exports_enable(export);
464 
465 	add_trace_export(list, export);
466 }
467 
468 static inline int
469 rm_ftrace_export(struct trace_export **list, struct trace_export *export)
470 {
471 	int ret;
472 
473 	ret = rm_trace_export(list, export);
474 	ftrace_exports_disable(export);
475 
476 	return ret;
477 }
478 
479 int register_ftrace_export(struct trace_export *export)
480 {
481 	if (WARN_ON_ONCE(!export->write))
482 		return -1;
483 
484 	guard(mutex)(&ftrace_export_lock);
485 
486 	add_ftrace_export(&ftrace_exports_list, export);
487 
488 	return 0;
489 }
490 EXPORT_SYMBOL_GPL(register_ftrace_export);
491 
492 int unregister_ftrace_export(struct trace_export *export)
493 {
494 	guard(mutex)(&ftrace_export_lock);
495 	return rm_ftrace_export(&ftrace_exports_list, export);
496 }
497 EXPORT_SYMBOL_GPL(unregister_ftrace_export);
498 
499 /* trace_flags holds trace_options default values */
500 #define TRACE_DEFAULT_FLAGS						\
501 	(FUNCTION_DEFAULT_FLAGS | FPROFILE_DEFAULT_FLAGS |		\
502 	 TRACE_ITER(PRINT_PARENT) | TRACE_ITER(PRINTK) |		\
503 	 TRACE_ITER(ANNOTATE) | TRACE_ITER(CONTEXT_INFO) |		\
504 	 TRACE_ITER(RECORD_CMD) | TRACE_ITER(OVERWRITE) |		\
505 	 TRACE_ITER(IRQ_INFO) | TRACE_ITER(MARKERS) |			\
506 	 TRACE_ITER(HASH_PTR) | TRACE_ITER(TRACE_PRINTK) |		\
507 	 TRACE_ITER(COPY_MARKER))
508 
509 /* trace_options that are only supported by global_trace */
510 #define TOP_LEVEL_TRACE_FLAGS (TRACE_ITER(PRINTK) |			\
511 	       TRACE_ITER(PRINTK_MSGONLY) | TRACE_ITER(RECORD_CMD) |	\
512 	       TRACE_ITER(PROF_TEXT_OFFSET) | FPROFILE_DEFAULT_FLAGS)
513 
514 /* trace_flags that are default zero for instances */
515 #define ZEROED_TRACE_FLAGS \
516 	(TRACE_ITER(EVENT_FORK) | TRACE_ITER(FUNC_FORK) | TRACE_ITER(TRACE_PRINTK) | \
517 	 TRACE_ITER(COPY_MARKER))
518 
519 /*
520  * The global_trace is the descriptor that holds the top-level tracing
521  * buffers for the live tracing.
522  */
523 static struct trace_array global_trace = {
524 	.trace_flags = TRACE_DEFAULT_FLAGS,
525 };
526 
527 struct trace_array *printk_trace = &global_trace;
528 
529 /* List of trace_arrays interested in the top level trace_marker */
530 static LIST_HEAD(marker_copies);
531 
532 static void update_printk_trace(struct trace_array *tr)
533 {
534 	if (printk_trace == tr)
535 		return;
536 
537 	printk_trace->trace_flags &= ~TRACE_ITER(TRACE_PRINTK);
538 	printk_trace = tr;
539 	tr->trace_flags |= TRACE_ITER(TRACE_PRINTK);
540 }
541 
542 /* Returns true if the status of tr changed */
543 static bool update_marker_trace(struct trace_array *tr, int enabled)
544 {
545 	lockdep_assert_held(&event_mutex);
546 
547 	if (enabled) {
548 		if (tr->trace_flags & TRACE_ITER(COPY_MARKER))
549 			return false;
550 
551 		list_add_rcu(&tr->marker_list, &marker_copies);
552 		tr->trace_flags |= TRACE_ITER(COPY_MARKER);
553 		return true;
554 	}
555 
556 	if (!(tr->trace_flags & TRACE_ITER(COPY_MARKER)))
557 		return false;
558 
559 	list_del_rcu(&tr->marker_list);
560 	tr->trace_flags &= ~TRACE_ITER(COPY_MARKER);
561 	return true;
562 }
563 
564 void trace_set_ring_buffer_expanded(struct trace_array *tr)
565 {
566 	if (!tr)
567 		tr = &global_trace;
568 	tr->ring_buffer_expanded = true;
569 }
570 
571 static void trace_array_autoremove(struct work_struct *work)
572 {
573 	struct trace_array *tr = container_of(work, struct trace_array, autoremove_work);
574 
575 	trace_array_destroy(tr);
576 }
577 
578 static struct workqueue_struct *autoremove_wq;
579 
580 static void trace_array_kick_autoremove(struct trace_array *tr)
581 {
582 	if (autoremove_wq)
583 		queue_work(autoremove_wq, &tr->autoremove_work);
584 }
585 
586 static void trace_array_cancel_autoremove(struct trace_array *tr)
587 {
588 	/*
589 	 * Since this can be called inside trace_array_autoremove(),
590 	 * it has to avoid deadlock of the workqueue.
591 	 */
592 	if (work_pending(&tr->autoremove_work))
593 		cancel_work_sync(&tr->autoremove_work);
594 }
595 
596 static void trace_array_init_autoremove(struct trace_array *tr)
597 {
598 	INIT_WORK(&tr->autoremove_work, trace_array_autoremove);
599 }
600 
601 static void trace_array_start_autoremove(void)
602 {
603 	if (autoremove_wq)
604 		return;
605 
606 	autoremove_wq = alloc_workqueue("tr_autoremove_wq",
607 					WQ_UNBOUND | WQ_HIGHPRI, 0);
608 	if (!autoremove_wq)
609 		pr_warn("Unable to allocate tr_autoremove_wq. autoremove disabled.\n");
610 }
611 
612 LIST_HEAD(ftrace_trace_arrays);
613 
614 static int __trace_array_get(struct trace_array *this_tr)
615 {
616 	/* When free_on_close is set, this is not available anymore. */
617 	if (autoremove_wq && this_tr->free_on_close)
618 		return -ENODEV;
619 
620 	this_tr->ref++;
621 	return 0;
622 }
623 
624 int trace_array_get(struct trace_array *this_tr)
625 {
626 	struct trace_array *tr;
627 
628 	guard(mutex)(&trace_types_lock);
629 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
630 		if (tr == this_tr) {
631 			return __trace_array_get(tr);
632 		}
633 	}
634 
635 	return -ENODEV;
636 }
637 
638 static void __trace_array_put(struct trace_array *this_tr)
639 {
640 	WARN_ON(!this_tr->ref);
641 	this_tr->ref--;
642 	/*
643 	 * When free_on_close is set, prepare removing the array
644 	 * when the last reference is released.
645 	 */
646 	if (this_tr->ref == 1 && this_tr->free_on_close)
647 		trace_array_kick_autoremove(this_tr);
648 }
649 
650 /**
651  * trace_array_put - Decrement the reference counter for this trace array.
652  * @this_tr : pointer to the trace array
653  *
654  * NOTE: Use this when we no longer need the trace array returned by
655  * trace_array_get_by_name(). This ensures the trace array can be later
656  * destroyed.
657  *
658  */
659 void trace_array_put(struct trace_array *this_tr)
660 {
661 	if (!this_tr)
662 		return;
663 
664 	guard(mutex)(&trace_types_lock);
665 	__trace_array_put(this_tr);
666 }
667 EXPORT_SYMBOL_GPL(trace_array_put);
668 
669 int tracing_check_open_get_tr(struct trace_array *tr)
670 {
671 	int ret;
672 
673 	ret = security_locked_down(LOCKDOWN_TRACEFS);
674 	if (ret)
675 		return ret;
676 
677 	if (tracing_disabled)
678 		return -ENODEV;
679 
680 	if (tr && trace_array_get(tr) < 0)
681 		return -ENODEV;
682 
683 	return 0;
684 }
685 
686 static u64 buffer_ftrace_now(struct array_buffer *buf, int cpu)
687 {
688 	u64 ts;
689 
690 	/* Early boot up does not have a buffer yet */
691 	if (!buf->buffer)
692 		return trace_clock_local();
693 
694 	ts = ring_buffer_time_stamp(buf->buffer);
695 	ring_buffer_normalize_time_stamp(buf->buffer, cpu, &ts);
696 
697 	return ts;
698 }
699 
700 u64 ftrace_now(int cpu)
701 {
702 	return buffer_ftrace_now(&global_trace.array_buffer, cpu);
703 }
704 
705 /**
706  * tracing_is_enabled - Show if global_trace has been enabled
707  *
708  * Shows if the global trace has been enabled or not. It uses the
709  * mirror flag "buffer_disabled" to be used in fast paths such as for
710  * the irqsoff tracer. But it may be inaccurate due to races. If you
711  * need to know the accurate state, use tracing_is_on() which is a little
712  * slower, but accurate.
713  */
714 int tracing_is_enabled(void)
715 {
716 	/*
717 	 * For quick access (irqsoff uses this in fast path), just
718 	 * return the mirror variable of the state of the ring buffer.
719 	 * It's a little racy, but we don't really care.
720 	 */
721 	return !global_trace.buffer_disabled;
722 }
723 
724 /*
725  * trace_buf_size is the size in bytes that is allocated
726  * for a buffer. Note, the number of bytes is always rounded
727  * to page size.
728  *
729  * This number is purposely set to a low number of 16384.
730  * If the dump on oops happens, it will be much appreciated
731  * to not have to wait for all that output. Anyway this can be
732  * boot time and run time configurable.
733  */
734 #define TRACE_BUF_SIZE_DEFAULT	1441792UL /* 16384 * 88 (sizeof(entry)) */
735 
736 static unsigned long		trace_buf_size = TRACE_BUF_SIZE_DEFAULT;
737 
738 /* trace_types holds a link list of available tracers. */
739 static struct tracer		*trace_types __read_mostly;
740 
741 /*
742  * trace_types_lock is used to protect the trace_types list.
743  */
744 DEFINE_MUTEX(trace_types_lock);
745 
746 /*
747  * serialize the access of the ring buffer
748  *
749  * ring buffer serializes readers, but it is low level protection.
750  * The validity of the events (which returns by ring_buffer_peek() ..etc)
751  * are not protected by ring buffer.
752  *
753  * The content of events may become garbage if we allow other process consumes
754  * these events concurrently:
755  *   A) the page of the consumed events may become a normal page
756  *      (not reader page) in ring buffer, and this page will be rewritten
757  *      by events producer.
758  *   B) The page of the consumed events may become a page for splice_read,
759  *      and this page will be returned to system.
760  *
761  * These primitives allow multi process access to different cpu ring buffer
762  * concurrently.
763  *
764  * These primitives don't distinguish read-only and read-consume access.
765  * Multi read-only access are also serialized.
766  */
767 
768 #ifdef CONFIG_SMP
769 static DECLARE_RWSEM(all_cpu_access_lock);
770 static DEFINE_PER_CPU(struct mutex, cpu_access_lock);
771 
772 static inline void trace_access_lock(int cpu)
773 {
774 	if (cpu == RING_BUFFER_ALL_CPUS) {
775 		/* gain it for accessing the whole ring buffer. */
776 		down_write(&all_cpu_access_lock);
777 	} else {
778 		/* gain it for accessing a cpu ring buffer. */
779 
780 		/* Firstly block other trace_access_lock(RING_BUFFER_ALL_CPUS). */
781 		down_read(&all_cpu_access_lock);
782 
783 		/* Secondly block other access to this @cpu ring buffer. */
784 		mutex_lock(&per_cpu(cpu_access_lock, cpu));
785 	}
786 }
787 
788 static inline void trace_access_unlock(int cpu)
789 {
790 	if (cpu == RING_BUFFER_ALL_CPUS) {
791 		up_write(&all_cpu_access_lock);
792 	} else {
793 		mutex_unlock(&per_cpu(cpu_access_lock, cpu));
794 		up_read(&all_cpu_access_lock);
795 	}
796 }
797 
798 static inline void trace_access_lock_init(void)
799 {
800 	int cpu;
801 
802 	for_each_possible_cpu(cpu)
803 		mutex_init(&per_cpu(cpu_access_lock, cpu));
804 }
805 
806 #else
807 
808 static DEFINE_MUTEX(access_lock);
809 
810 static inline void trace_access_lock(int cpu)
811 {
812 	(void)cpu;
813 	mutex_lock(&access_lock);
814 }
815 
816 static inline void trace_access_unlock(int cpu)
817 {
818 	(void)cpu;
819 	mutex_unlock(&access_lock);
820 }
821 
822 static inline void trace_access_lock_init(void)
823 {
824 }
825 
826 #endif
827 
828 void tracer_tracing_on(struct trace_array *tr)
829 {
830 	if (tr->array_buffer.buffer)
831 		ring_buffer_record_on(tr->array_buffer.buffer);
832 	/*
833 	 * This flag is looked at when buffers haven't been allocated
834 	 * yet, or by some tracers (like irqsoff), that just want to
835 	 * know if the ring buffer has been disabled, but it can handle
836 	 * races of where it gets disabled but we still do a record.
837 	 * As the check is in the fast path of the tracers, it is more
838 	 * important to be fast than accurate.
839 	 */
840 	tr->buffer_disabled = 0;
841 }
842 
843 /**
844  * tracing_on - enable tracing buffers
845  *
846  * This function enables tracing buffers that may have been
847  * disabled with tracing_off.
848  */
849 void tracing_on(void)
850 {
851 	tracer_tracing_on(&global_trace);
852 }
853 EXPORT_SYMBOL_GPL(tracing_on);
854 
855 #ifdef CONFIG_TRACER_SNAPSHOT
856 /**
857  * tracing_snapshot - take a snapshot of the current buffer.
858  *
859  * This causes a swap between the snapshot buffer and the current live
860  * tracing buffer. You can use this to take snapshots of the live
861  * trace when some condition is triggered, but continue to trace.
862  *
863  * Note, make sure to allocate the snapshot with either
864  * a tracing_snapshot_alloc(), or by doing it manually
865  * with: echo 1 > /sys/kernel/tracing/snapshot
866  *
867  * If the snapshot buffer is not allocated, it will stop tracing.
868  * Basically making a permanent snapshot.
869  */
870 void tracing_snapshot(void)
871 {
872 	struct trace_array *tr = &global_trace;
873 
874 	tracing_snapshot_instance(tr);
875 }
876 EXPORT_SYMBOL_GPL(tracing_snapshot);
877 
878 /**
879  * tracing_alloc_snapshot - allocate snapshot buffer.
880  *
881  * This only allocates the snapshot buffer if it isn't already
882  * allocated - it doesn't also take a snapshot.
883  *
884  * This is meant to be used in cases where the snapshot buffer needs
885  * to be set up for events that can't sleep but need to be able to
886  * trigger a snapshot.
887  */
888 int tracing_alloc_snapshot(void)
889 {
890 	struct trace_array *tr = &global_trace;
891 	int ret;
892 
893 	ret = tracing_alloc_snapshot_instance(tr);
894 	WARN_ON(ret < 0);
895 
896 	return ret;
897 }
898 #else
899 void tracing_snapshot(void)
900 {
901 	WARN_ONCE(1, "Snapshot feature not enabled, but internal snapshot used");
902 }
903 EXPORT_SYMBOL_GPL(tracing_snapshot);
904 void tracing_snapshot_alloc(void)
905 {
906 	/* Give warning */
907 	tracing_snapshot();
908 }
909 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
910 #endif /* CONFIG_TRACER_SNAPSHOT */
911 
912 void tracer_tracing_off(struct trace_array *tr)
913 {
914 	if (tr->array_buffer.buffer)
915 		ring_buffer_record_off(tr->array_buffer.buffer);
916 	/*
917 	 * This flag is looked at when buffers haven't been allocated
918 	 * yet, or by some tracers (like irqsoff), that just want to
919 	 * know if the ring buffer has been disabled, but it can handle
920 	 * races of where it gets disabled but we still do a record.
921 	 * As the check is in the fast path of the tracers, it is more
922 	 * important to be fast than accurate.
923 	 */
924 	tr->buffer_disabled = 1;
925 }
926 
927 /**
928  * tracer_tracing_disable() - temporary disable the buffer from write
929  * @tr: The trace array to disable its buffer for
930  *
931  * Expects trace_tracing_enable() to re-enable tracing.
932  * The difference between this and tracer_tracing_off() is that this
933  * is a counter and can nest, whereas, tracer_tracing_off() can
934  * be called multiple times and a single trace_tracing_on() will
935  * enable it.
936  */
937 void tracer_tracing_disable(struct trace_array *tr)
938 {
939 	if (WARN_ON_ONCE(!tr->array_buffer.buffer))
940 		return;
941 
942 	ring_buffer_record_disable(tr->array_buffer.buffer);
943 }
944 
945 /**
946  * tracer_tracing_enable() - counter part of tracer_tracing_disable()
947  * @tr: The trace array that had tracer_tracincg_disable() called on it
948  *
949  * This is called after tracer_tracing_disable() has been called on @tr,
950  * when it's safe to re-enable tracing.
951  */
952 void tracer_tracing_enable(struct trace_array *tr)
953 {
954 	if (WARN_ON_ONCE(!tr->array_buffer.buffer))
955 		return;
956 
957 	ring_buffer_record_enable(tr->array_buffer.buffer);
958 }
959 
960 /**
961  * tracing_off - turn off tracing buffers
962  *
963  * This function stops the tracing buffers from recording data.
964  * It does not disable any overhead the tracers themselves may
965  * be causing. This function simply causes all recording to
966  * the ring buffers to fail.
967  */
968 void tracing_off(void)
969 {
970 	tracer_tracing_off(&global_trace);
971 }
972 EXPORT_SYMBOL_GPL(tracing_off);
973 
974 void disable_trace_on_warning(void)
975 {
976 	if (__disable_trace_on_warning) {
977 		struct trace_array *tr = READ_ONCE(printk_trace);
978 
979 		trace_array_printk_buf(global_trace.array_buffer.buffer, _THIS_IP_,
980 			"Disabling tracing due to warning\n");
981 		tracing_off();
982 
983 		/* Disable trace_printk() buffer too */
984 		if (tr != &global_trace) {
985 			trace_array_printk_buf(tr->array_buffer.buffer, _THIS_IP_,
986 					       "Disabling tracing due to warning\n");
987 			tracer_tracing_off(tr);
988 		}
989 	}
990 }
991 
992 /**
993  * tracer_tracing_is_on - show real state of ring buffer enabled
994  * @tr : the trace array to know if ring buffer is enabled
995  *
996  * Shows real state of the ring buffer if it is enabled or not.
997  */
998 bool tracer_tracing_is_on(struct trace_array *tr)
999 {
1000 	if (tr->array_buffer.buffer)
1001 		return ring_buffer_record_is_set_on(tr->array_buffer.buffer);
1002 	return !tr->buffer_disabled;
1003 }
1004 
1005 /**
1006  * tracing_is_on - show state of ring buffers enabled
1007  */
1008 int tracing_is_on(void)
1009 {
1010 	return tracer_tracing_is_on(&global_trace);
1011 }
1012 EXPORT_SYMBOL_GPL(tracing_is_on);
1013 
1014 static int __init set_buf_size(char *str)
1015 {
1016 	unsigned long buf_size;
1017 
1018 	if (!str)
1019 		return 0;
1020 	buf_size = memparse(str, &str);
1021 	/*
1022 	 * nr_entries can not be zero and the startup
1023 	 * tests require some buffer space. Therefore
1024 	 * ensure we have at least 4096 bytes of buffer.
1025 	 */
1026 	trace_buf_size = max(4096UL, buf_size);
1027 	return 1;
1028 }
1029 __setup("trace_buf_size=", set_buf_size);
1030 
1031 static int __init set_tracing_thresh(char *str)
1032 {
1033 	unsigned long threshold;
1034 	int ret;
1035 
1036 	if (!str)
1037 		return 0;
1038 	ret = kstrtoul(str, 0, &threshold);
1039 	if (ret < 0)
1040 		return 0;
1041 	tracing_thresh = threshold * 1000;
1042 	return 1;
1043 }
1044 __setup("tracing_thresh=", set_tracing_thresh);
1045 
1046 unsigned long nsecs_to_usecs(unsigned long nsecs)
1047 {
1048 	return nsecs / 1000;
1049 }
1050 
1051 /*
1052  * TRACE_FLAGS is defined as a tuple matching bit masks with strings.
1053  * It uses C(a, b) where 'a' is the eval (enum) name and 'b' is the string that
1054  * matches it. By defining "C(a, b) b", TRACE_FLAGS becomes a list
1055  * of strings in the order that the evals (enum) were defined.
1056  */
1057 #undef C
1058 #define C(a, b) b
1059 
1060 /* These must match the bit positions in trace_iterator_flags */
1061 static const char *trace_options[] = {
1062 	TRACE_FLAGS
1063 	NULL
1064 };
1065 
1066 static struct {
1067 	u64 (*func)(void);
1068 	const char *name;
1069 	int in_ns;		/* is this clock in nanoseconds? */
1070 } trace_clocks[] = {
1071 	{ trace_clock_local,		"local",	1 },
1072 	{ trace_clock_global,		"global",	1 },
1073 	{ trace_clock_counter,		"counter",	0 },
1074 	{ trace_clock_jiffies,		"uptime",	0 },
1075 	{ trace_clock,			"perf",		1 },
1076 	{ ktime_get_mono_fast_ns,	"mono",		1 },
1077 	{ ktime_get_raw_fast_ns,	"mono_raw",	1 },
1078 	{ ktime_get_boot_fast_ns,	"boot",		1 },
1079 	{ ktime_get_tai_fast_ns,	"tai",		1 },
1080 	ARCH_TRACE_CLOCKS
1081 };
1082 
1083 bool trace_clock_in_ns(struct trace_array *tr)
1084 {
1085 	if (trace_clocks[tr->clock_id].in_ns)
1086 		return true;
1087 
1088 	return false;
1089 }
1090 
1091 /*
1092  * trace_parser_get_init - gets the buffer for trace parser
1093  */
1094 int trace_parser_get_init(struct trace_parser *parser, int size)
1095 {
1096 	memset(parser, 0, sizeof(*parser));
1097 
1098 	parser->buffer = kmalloc(size, GFP_KERNEL);
1099 	if (!parser->buffer)
1100 		return 1;
1101 
1102 	parser->size = size;
1103 	return 0;
1104 }
1105 
1106 /*
1107  * trace_parser_put - frees the buffer for trace parser
1108  */
1109 void trace_parser_put(struct trace_parser *parser)
1110 {
1111 	kfree(parser->buffer);
1112 	parser->buffer = NULL;
1113 }
1114 
1115 /*
1116  * trace_get_user - reads the user input string separated by  space
1117  * (matched by isspace(ch))
1118  *
1119  * For each string found the 'struct trace_parser' is updated,
1120  * and the function returns.
1121  *
1122  * Returns number of bytes read.
1123  *
1124  * See kernel/trace/trace.h for 'struct trace_parser' details.
1125  */
1126 int trace_get_user(struct trace_parser *parser, const char __user *ubuf,
1127 	size_t cnt, loff_t *ppos)
1128 {
1129 	char ch;
1130 	size_t read = 0;
1131 	ssize_t ret;
1132 
1133 	if (!*ppos)
1134 		trace_parser_clear(parser);
1135 
1136 	ret = get_user(ch, ubuf++);
1137 	if (ret)
1138 		goto fail;
1139 
1140 	read++;
1141 	cnt--;
1142 
1143 	/*
1144 	 * The parser is not finished with the last write,
1145 	 * continue reading the user input without skipping spaces.
1146 	 */
1147 	if (!parser->cont) {
1148 		/* skip white space */
1149 		while (cnt && isspace(ch)) {
1150 			ret = get_user(ch, ubuf++);
1151 			if (ret)
1152 				goto fail;
1153 			read++;
1154 			cnt--;
1155 		}
1156 
1157 		parser->idx = 0;
1158 
1159 		/* only spaces were written */
1160 		if (isspace(ch) || !ch) {
1161 			*ppos += read;
1162 			return read;
1163 		}
1164 	}
1165 
1166 	/* read the non-space input */
1167 	while (cnt && !isspace(ch) && ch) {
1168 		if (parser->idx < parser->size - 1)
1169 			parser->buffer[parser->idx++] = ch;
1170 		else {
1171 			ret = -EINVAL;
1172 			goto fail;
1173 		}
1174 
1175 		ret = get_user(ch, ubuf++);
1176 		if (ret)
1177 			goto fail;
1178 		read++;
1179 		cnt--;
1180 	}
1181 
1182 	/* We either got finished input or we have to wait for another call. */
1183 	if (isspace(ch) || !ch) {
1184 		parser->buffer[parser->idx] = 0;
1185 		parser->cont = false;
1186 	} else if (parser->idx < parser->size - 1) {
1187 		parser->cont = true;
1188 		parser->buffer[parser->idx++] = ch;
1189 		/* Make sure the parsed string always terminates with '\0'. */
1190 		parser->buffer[parser->idx] = 0;
1191 	} else {
1192 		ret = -EINVAL;
1193 		goto fail;
1194 	}
1195 
1196 	*ppos += read;
1197 	return read;
1198 fail:
1199 	trace_parser_fail(parser);
1200 	return ret;
1201 }
1202 
1203 /* TODO add a seq_buf_to_buffer() */
1204 static ssize_t trace_seq_to_buffer(struct trace_seq *s, void *buf, size_t cnt)
1205 {
1206 	int len;
1207 
1208 	if (trace_seq_used(s) <= s->readpos)
1209 		return -EBUSY;
1210 
1211 	len = trace_seq_used(s) - s->readpos;
1212 	if (cnt > len)
1213 		cnt = len;
1214 	memcpy(buf, s->buffer + s->readpos, cnt);
1215 
1216 	s->readpos += cnt;
1217 	return cnt;
1218 }
1219 
1220 unsigned long __read_mostly	tracing_thresh;
1221 
1222 struct pipe_wait {
1223 	struct trace_iterator		*iter;
1224 	int				wait_index;
1225 };
1226 
1227 static bool wait_pipe_cond(void *data)
1228 {
1229 	struct pipe_wait *pwait = data;
1230 	struct trace_iterator *iter = pwait->iter;
1231 
1232 	if (atomic_read_acquire(&iter->wait_index) != pwait->wait_index)
1233 		return true;
1234 
1235 	return iter->closed;
1236 }
1237 
1238 static int wait_on_pipe(struct trace_iterator *iter, int full)
1239 {
1240 	struct pipe_wait pwait;
1241 	int ret;
1242 
1243 	/* Iterators are static, they should be filled or empty */
1244 	if (trace_buffer_iter(iter, iter->cpu_file))
1245 		return 0;
1246 
1247 	pwait.wait_index = atomic_read_acquire(&iter->wait_index);
1248 	pwait.iter = iter;
1249 
1250 	ret = ring_buffer_wait(iter->array_buffer->buffer, iter->cpu_file, full,
1251 			       wait_pipe_cond, &pwait);
1252 
1253 #ifdef CONFIG_TRACER_SNAPSHOT
1254 	/*
1255 	 * Make sure this is still the snapshot buffer, as if a snapshot were
1256 	 * to happen, this would now be the main buffer.
1257 	 */
1258 	if (iter->snapshot)
1259 		iter->array_buffer = &iter->tr->snapshot_buffer;
1260 #endif
1261 	return ret;
1262 }
1263 
1264 #ifdef CONFIG_FTRACE_STARTUP_TEST
1265 static bool selftests_can_run;
1266 
1267 struct trace_selftests {
1268 	struct list_head		list;
1269 	struct tracer			*type;
1270 };
1271 
1272 static LIST_HEAD(postponed_selftests);
1273 
1274 static int save_selftest(struct tracer *type)
1275 {
1276 	struct trace_selftests *selftest;
1277 
1278 	selftest = kmalloc(sizeof(*selftest), GFP_KERNEL);
1279 	if (!selftest)
1280 		return -ENOMEM;
1281 
1282 	selftest->type = type;
1283 	list_add(&selftest->list, &postponed_selftests);
1284 	return 0;
1285 }
1286 
1287 static int run_tracer_selftest(struct tracer *type)
1288 {
1289 	struct trace_array *tr = &global_trace;
1290 	struct tracer_flags *saved_flags = tr->current_trace_flags;
1291 	struct tracer *saved_tracer = tr->current_trace;
1292 	int ret;
1293 
1294 	if (!type->selftest || tracing_selftest_disabled)
1295 		return 0;
1296 
1297 	/*
1298 	 * If a tracer registers early in boot up (before scheduling is
1299 	 * initialized and such), then do not run its selftests yet.
1300 	 * Instead, run it a little later in the boot process.
1301 	 */
1302 	if (!selftests_can_run)
1303 		return save_selftest(type);
1304 
1305 	if (!tracing_is_on()) {
1306 		pr_warn("Selftest for tracer %s skipped due to tracing disabled\n",
1307 			type->name);
1308 		return 0;
1309 	}
1310 
1311 	/*
1312 	 * Run a selftest on this tracer.
1313 	 * Here we reset the trace buffer, and set the current
1314 	 * tracer to be this tracer. The tracer can then run some
1315 	 * internal tracing to verify that everything is in order.
1316 	 * If we fail, we do not register this tracer.
1317 	 */
1318 	tracing_reset_online_cpus(&tr->array_buffer);
1319 
1320 	tr->current_trace = type;
1321 	tr->current_trace_flags = type->flags ? : type->default_flags;
1322 
1323 #ifdef CONFIG_TRACER_MAX_TRACE
1324 	if (tracer_uses_snapshot(type)) {
1325 		/* If we expanded the buffers, make sure the max is expanded too */
1326 		if (tr->ring_buffer_expanded)
1327 			ring_buffer_resize(tr->snapshot_buffer.buffer, trace_buf_size,
1328 					   RING_BUFFER_ALL_CPUS);
1329 		tr->allocated_snapshot = true;
1330 	}
1331 #endif
1332 
1333 	/* the test is responsible for initializing and enabling */
1334 	pr_info("Testing tracer %s: ", type->name);
1335 	ret = type->selftest(type, tr);
1336 	/* the test is responsible for resetting too */
1337 	tr->current_trace = saved_tracer;
1338 	tr->current_trace_flags = saved_flags;
1339 	if (ret) {
1340 		printk(KERN_CONT "FAILED!\n");
1341 		/* Add the warning after printing 'FAILED' */
1342 		WARN_ON(1);
1343 		return -1;
1344 	}
1345 	/* Only reset on passing, to avoid touching corrupted buffers */
1346 	tracing_reset_online_cpus(&tr->array_buffer);
1347 
1348 #ifdef CONFIG_TRACER_MAX_TRACE
1349 	if (tracer_uses_snapshot(type)) {
1350 		tr->allocated_snapshot = false;
1351 
1352 		/* Shrink the max buffer again */
1353 		if (tr->ring_buffer_expanded)
1354 			ring_buffer_resize(tr->snapshot_buffer.buffer, 1,
1355 					   RING_BUFFER_ALL_CPUS);
1356 	}
1357 #endif
1358 
1359 	printk(KERN_CONT "PASSED\n");
1360 	return 0;
1361 }
1362 
1363 static int do_run_tracer_selftest(struct tracer *type)
1364 {
1365 	int ret;
1366 
1367 	/*
1368 	 * Tests can take a long time, especially if they are run one after the
1369 	 * other, as does happen during bootup when all the tracers are
1370 	 * registered. This could cause the soft lockup watchdog to trigger.
1371 	 */
1372 	cond_resched();
1373 
1374 	tracing_selftest_running = true;
1375 	ret = run_tracer_selftest(type);
1376 	tracing_selftest_running = false;
1377 
1378 	return ret;
1379 }
1380 
1381 static __init int init_trace_selftests(void)
1382 {
1383 	struct trace_selftests *p, *n;
1384 	struct tracer *t, **last;
1385 	int ret;
1386 
1387 	selftests_can_run = true;
1388 
1389 	guard(mutex)(&trace_types_lock);
1390 
1391 	if (list_empty(&postponed_selftests))
1392 		return 0;
1393 
1394 	pr_info("Running postponed tracer tests:\n");
1395 
1396 	tracing_selftest_running = true;
1397 	list_for_each_entry_safe(p, n, &postponed_selftests, list) {
1398 		/* This loop can take minutes when sanitizers are enabled, so
1399 		 * lets make sure we allow RCU processing.
1400 		 */
1401 		cond_resched();
1402 		ret = run_tracer_selftest(p->type);
1403 		/* If the test fails, then warn and remove from available_tracers */
1404 		if (ret < 0) {
1405 			WARN(1, "tracer: %s failed selftest, disabling\n",
1406 			     p->type->name);
1407 			last = &trace_types;
1408 			for (t = trace_types; t; t = t->next) {
1409 				if (t == p->type) {
1410 					*last = t->next;
1411 					break;
1412 				}
1413 				last = &t->next;
1414 			}
1415 		}
1416 		list_del(&p->list);
1417 		kfree(p);
1418 	}
1419 	tracing_selftest_running = false;
1420 
1421 	return 0;
1422 }
1423 core_initcall(init_trace_selftests);
1424 #else
1425 static inline int do_run_tracer_selftest(struct tracer *type)
1426 {
1427 	return 0;
1428 }
1429 #endif /* CONFIG_FTRACE_STARTUP_TEST */
1430 
1431 static int add_tracer(struct trace_array *tr, struct tracer *t);
1432 
1433 static void __init apply_trace_boot_options(void);
1434 
1435 static void free_tracers(struct trace_array *tr)
1436 {
1437 	struct tracers *t, *n;
1438 
1439 	lockdep_assert_held(&trace_types_lock);
1440 
1441 	list_for_each_entry_safe(t, n, &tr->tracers, list) {
1442 		list_del(&t->list);
1443 		kfree(t->flags);
1444 		kfree(t);
1445 	}
1446 }
1447 
1448 /**
1449  * register_tracer - register a tracer with the ftrace system.
1450  * @type: the plugin for the tracer
1451  *
1452  * Register a new plugin tracer.
1453  */
1454 int __init register_tracer(struct tracer *type)
1455 {
1456 	struct trace_array *tr;
1457 	struct tracer *t;
1458 	int ret = 0;
1459 
1460 	if (!type->name) {
1461 		pr_info("Tracer must have a name\n");
1462 		return -1;
1463 	}
1464 
1465 	if (strlen(type->name) >= MAX_TRACER_SIZE) {
1466 		pr_info("Tracer has a name longer than %d\n", MAX_TRACER_SIZE);
1467 		return -1;
1468 	}
1469 
1470 	if (security_locked_down(LOCKDOWN_TRACEFS)) {
1471 		pr_warn("Can not register tracer %s due to lockdown\n",
1472 			   type->name);
1473 		return -EPERM;
1474 	}
1475 
1476 	mutex_lock(&trace_types_lock);
1477 
1478 	for (t = trace_types; t; t = t->next) {
1479 		if (strcmp(type->name, t->name) == 0) {
1480 			/* already found */
1481 			pr_info("Tracer %s already registered\n",
1482 				type->name);
1483 			ret = -1;
1484 			goto out;
1485 		}
1486 	}
1487 
1488 	/* store the tracer for __set_tracer_option */
1489 	if (type->flags)
1490 		type->flags->trace = type;
1491 
1492 	ret = do_run_tracer_selftest(type);
1493 	if (ret < 0)
1494 		goto out;
1495 
1496 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
1497 		ret = add_tracer(tr, type);
1498 		if (ret < 0) {
1499 			/* The tracer will still exist but without options */
1500 			pr_warn("Failed to create tracer options for %s\n", type->name);
1501 			break;
1502 		}
1503 	}
1504 
1505 	type->next = trace_types;
1506 	trace_types = type;
1507 
1508  out:
1509 	mutex_unlock(&trace_types_lock);
1510 
1511 	if (ret || !default_bootup_tracer)
1512 		return ret;
1513 
1514 	if (strncmp(default_bootup_tracer, type->name, MAX_TRACER_SIZE))
1515 		return 0;
1516 
1517 	printk(KERN_INFO "Starting tracer '%s'\n", type->name);
1518 	/* Do we want this tracer to start on bootup? */
1519 	WARN_ON(tracing_set_tracer(&global_trace, type->name) < 0);
1520 	default_bootup_tracer = NULL;
1521 
1522 	apply_trace_boot_options();
1523 
1524 	/* disable other selftests, since this will break it. */
1525 	disable_tracing_selftest("running a tracer");
1526 
1527 	return 0;
1528 }
1529 
1530 void tracing_reset_cpu(struct array_buffer *buf, int cpu)
1531 {
1532 	struct trace_buffer *buffer = buf->buffer;
1533 
1534 	if (!buffer)
1535 		return;
1536 
1537 	ring_buffer_record_disable(buffer);
1538 
1539 	/* Make sure all commits have finished */
1540 	synchronize_rcu();
1541 	ring_buffer_reset_cpu(buffer, cpu);
1542 
1543 	ring_buffer_record_enable(buffer);
1544 }
1545 
1546 void tracing_reset_online_cpus(struct array_buffer *buf)
1547 {
1548 	struct trace_buffer *buffer = buf->buffer;
1549 
1550 	if (!buffer)
1551 		return;
1552 
1553 	ring_buffer_record_disable(buffer);
1554 
1555 	/* Make sure all commits have finished */
1556 	synchronize_rcu();
1557 
1558 	buf->time_start = buffer_ftrace_now(buf, buf->cpu);
1559 
1560 	ring_buffer_reset_online_cpus(buffer);
1561 
1562 	ring_buffer_record_enable(buffer);
1563 }
1564 
1565 static void tracing_reset_all_cpus(struct array_buffer *buf)
1566 {
1567 	struct trace_buffer *buffer = buf->buffer;
1568 
1569 	if (!buffer)
1570 		return;
1571 
1572 	ring_buffer_record_disable(buffer);
1573 
1574 	/* Make sure all commits have finished */
1575 	synchronize_rcu();
1576 
1577 	buf->time_start = buffer_ftrace_now(buf, buf->cpu);
1578 
1579 	ring_buffer_reset(buffer);
1580 
1581 	ring_buffer_record_enable(buffer);
1582 }
1583 
1584 /* Must have trace_types_lock held */
1585 void tracing_reset_all_online_cpus_unlocked(void)
1586 {
1587 	struct trace_array *tr;
1588 
1589 	lockdep_assert_held(&trace_types_lock);
1590 
1591 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
1592 		if (!tr->clear_trace)
1593 			continue;
1594 		tr->clear_trace = false;
1595 		tracing_reset_online_cpus(&tr->array_buffer);
1596 #ifdef CONFIG_TRACER_SNAPSHOT
1597 		tracing_reset_online_cpus(&tr->snapshot_buffer);
1598 #endif
1599 	}
1600 }
1601 
1602 void tracing_reset_all_online_cpus(void)
1603 {
1604 	guard(mutex)(&trace_types_lock);
1605 	tracing_reset_all_online_cpus_unlocked();
1606 }
1607 
1608 int is_tracing_stopped(void)
1609 {
1610 	return global_trace.stop_count;
1611 }
1612 
1613 static void tracing_start_tr(struct trace_array *tr)
1614 {
1615 	struct trace_buffer *buffer;
1616 
1617 	if (tracing_disabled)
1618 		return;
1619 
1620 	guard(raw_spinlock_irqsave)(&tr->start_lock);
1621 	if (--tr->stop_count) {
1622 		if (WARN_ON_ONCE(tr->stop_count < 0)) {
1623 			/* Someone screwed up their debugging */
1624 			tr->stop_count = 0;
1625 		}
1626 		return;
1627 	}
1628 
1629 	/* Prevent the buffers from switching */
1630 	arch_spin_lock(&tr->max_lock);
1631 
1632 	buffer = tr->array_buffer.buffer;
1633 	if (buffer)
1634 		ring_buffer_record_enable(buffer);
1635 
1636 #ifdef CONFIG_TRACER_SNAPSHOT
1637 	buffer = tr->snapshot_buffer.buffer;
1638 	if (buffer)
1639 		ring_buffer_record_enable(buffer);
1640 #endif
1641 
1642 	arch_spin_unlock(&tr->max_lock);
1643 }
1644 
1645 /**
1646  * tracing_start - quick start of the tracer
1647  *
1648  * If tracing is enabled but was stopped by tracing_stop,
1649  * this will start the tracer back up.
1650  */
1651 void tracing_start(void)
1652 
1653 {
1654 	return tracing_start_tr(&global_trace);
1655 }
1656 
1657 static void tracing_stop_tr(struct trace_array *tr)
1658 {
1659 	struct trace_buffer *buffer;
1660 
1661 	guard(raw_spinlock_irqsave)(&tr->start_lock);
1662 	if (tr->stop_count++)
1663 		return;
1664 
1665 	/* Prevent the buffers from switching */
1666 	arch_spin_lock(&tr->max_lock);
1667 
1668 	buffer = tr->array_buffer.buffer;
1669 	if (buffer)
1670 		ring_buffer_record_disable(buffer);
1671 
1672 #ifdef CONFIG_TRACER_SNAPSHOT
1673 	buffer = tr->snapshot_buffer.buffer;
1674 	if (buffer)
1675 		ring_buffer_record_disable(buffer);
1676 #endif
1677 
1678 	arch_spin_unlock(&tr->max_lock);
1679 }
1680 
1681 /**
1682  * tracing_stop - quick stop of the tracer
1683  *
1684  * Light weight way to stop tracing. Use in conjunction with
1685  * tracing_start.
1686  */
1687 void tracing_stop(void)
1688 {
1689 	return tracing_stop_tr(&global_trace);
1690 }
1691 
1692 /*
1693  * Several functions return TRACE_TYPE_PARTIAL_LINE if the trace_seq
1694  * overflowed, and TRACE_TYPE_HANDLED otherwise. This helper function
1695  * simplifies those functions and keeps them in sync.
1696  */
1697 enum print_line_t trace_handle_return(struct trace_seq *s)
1698 {
1699 	return trace_seq_has_overflowed(s) ?
1700 		TRACE_TYPE_PARTIAL_LINE : TRACE_TYPE_HANDLED;
1701 }
1702 EXPORT_SYMBOL_GPL(trace_handle_return);
1703 
1704 static unsigned short migration_disable_value(void)
1705 {
1706 #if defined(CONFIG_SMP)
1707 	return current->migration_disabled;
1708 #else
1709 	return 0;
1710 #endif
1711 }
1712 
1713 unsigned int tracing_gen_ctx_irq_test(unsigned int irqs_status)
1714 {
1715 	unsigned int trace_flags = irqs_status;
1716 	unsigned int pc;
1717 
1718 	pc = preempt_count();
1719 
1720 	if (pc & NMI_MASK)
1721 		trace_flags |= TRACE_FLAG_NMI;
1722 	if (pc & HARDIRQ_MASK)
1723 		trace_flags |= TRACE_FLAG_HARDIRQ;
1724 	if (in_serving_softirq())
1725 		trace_flags |= TRACE_FLAG_SOFTIRQ;
1726 	if (softirq_count() >> (SOFTIRQ_SHIFT + 1))
1727 		trace_flags |= TRACE_FLAG_BH_OFF;
1728 
1729 	if (tif_need_resched())
1730 		trace_flags |= TRACE_FLAG_NEED_RESCHED;
1731 	if (test_preempt_need_resched())
1732 		trace_flags |= TRACE_FLAG_PREEMPT_RESCHED;
1733 	if (IS_ENABLED(CONFIG_ARCH_HAS_PREEMPT_LAZY) && tif_test_bit(TIF_NEED_RESCHED_LAZY))
1734 		trace_flags |= TRACE_FLAG_NEED_RESCHED_LAZY;
1735 	return (trace_flags << 16) | (min_t(unsigned int, pc & 0xff, 0xf)) |
1736 		(min_t(unsigned int, migration_disable_value(), 0xf)) << 4;
1737 }
1738 
1739 struct ring_buffer_event *
1740 trace_buffer_lock_reserve(struct trace_buffer *buffer,
1741 			  int type,
1742 			  unsigned long len,
1743 			  unsigned int trace_ctx)
1744 {
1745 	return __trace_buffer_lock_reserve(buffer, type, len, trace_ctx);
1746 }
1747 
1748 DEFINE_PER_CPU(struct ring_buffer_event *, trace_buffered_event);
1749 DEFINE_PER_CPU(int, trace_buffered_event_cnt);
1750 static int trace_buffered_event_ref;
1751 
1752 /**
1753  * trace_buffered_event_enable - enable buffering events
1754  *
1755  * When events are being filtered, it is quicker to use a temporary
1756  * buffer to write the event data into if there's a likely chance
1757  * that it will not be committed. The discard of the ring buffer
1758  * is not as fast as committing, and is much slower than copying
1759  * a commit.
1760  *
1761  * When an event is to be filtered, allocate per cpu buffers to
1762  * write the event data into, and if the event is filtered and discarded
1763  * it is simply dropped, otherwise, the entire data is to be committed
1764  * in one shot.
1765  */
1766 void trace_buffered_event_enable(void)
1767 {
1768 	struct ring_buffer_event *event;
1769 	struct page *page;
1770 	int cpu;
1771 
1772 	WARN_ON_ONCE(!mutex_is_locked(&event_mutex));
1773 
1774 	if (trace_buffered_event_ref++)
1775 		return;
1776 
1777 	for_each_tracing_cpu(cpu) {
1778 		page = alloc_pages_node(cpu_to_node(cpu),
1779 					GFP_KERNEL | __GFP_NORETRY, 0);
1780 		/* This is just an optimization and can handle failures */
1781 		if (!page) {
1782 			pr_err("Failed to allocate event buffer\n");
1783 			break;
1784 		}
1785 
1786 		event = page_address(page);
1787 		memset(event, 0, sizeof(*event));
1788 
1789 		per_cpu(trace_buffered_event, cpu) = event;
1790 
1791 		scoped_guard(preempt,) {
1792 			if (cpu == smp_processor_id() &&
1793 			    __this_cpu_read(trace_buffered_event) !=
1794 			    per_cpu(trace_buffered_event, cpu))
1795 				WARN_ON_ONCE(1);
1796 		}
1797 	}
1798 }
1799 
1800 static void enable_trace_buffered_event(void *data)
1801 {
1802 	this_cpu_dec(trace_buffered_event_cnt);
1803 }
1804 
1805 static void disable_trace_buffered_event(void *data)
1806 {
1807 	this_cpu_inc(trace_buffered_event_cnt);
1808 }
1809 
1810 /**
1811  * trace_buffered_event_disable - disable buffering events
1812  *
1813  * When a filter is removed, it is faster to not use the buffered
1814  * events, and to commit directly into the ring buffer. Free up
1815  * the temp buffers when there are no more users. This requires
1816  * special synchronization with current events.
1817  */
1818 void trace_buffered_event_disable(void)
1819 {
1820 	int cpu;
1821 
1822 	WARN_ON_ONCE(!mutex_is_locked(&event_mutex));
1823 
1824 	if (WARN_ON_ONCE(!trace_buffered_event_ref))
1825 		return;
1826 
1827 	if (--trace_buffered_event_ref)
1828 		return;
1829 
1830 	/* For each CPU, set the buffer as used. */
1831 	on_each_cpu_mask(tracing_buffer_mask, disable_trace_buffered_event,
1832 			 NULL, true);
1833 
1834 	/* Wait for all current users to finish */
1835 	synchronize_rcu();
1836 
1837 	for_each_tracing_cpu(cpu) {
1838 		free_page((unsigned long)per_cpu(trace_buffered_event, cpu));
1839 		per_cpu(trace_buffered_event, cpu) = NULL;
1840 	}
1841 
1842 	/*
1843 	 * Wait for all CPUs that potentially started checking if they can use
1844 	 * their event buffer only after the previous synchronize_rcu() call and
1845 	 * they still read a valid pointer from trace_buffered_event. It must be
1846 	 * ensured they don't see cleared trace_buffered_event_cnt else they
1847 	 * could wrongly decide to use the pointed-to buffer which is now freed.
1848 	 */
1849 	synchronize_rcu();
1850 
1851 	/* For each CPU, relinquish the buffer */
1852 	on_each_cpu_mask(tracing_buffer_mask, enable_trace_buffered_event, NULL,
1853 			 true);
1854 }
1855 
1856 static struct trace_buffer *temp_buffer;
1857 
1858 struct ring_buffer_event *
1859 trace_event_buffer_lock_reserve(struct trace_buffer **current_rb,
1860 			  struct trace_event_file *trace_file,
1861 			  int type, unsigned long len,
1862 			  unsigned int trace_ctx)
1863 {
1864 	struct ring_buffer_event *entry;
1865 	struct trace_array *tr = trace_file->tr;
1866 	int val;
1867 
1868 	*current_rb = tr->array_buffer.buffer;
1869 
1870 	if (!tr->no_filter_buffering_ref &&
1871 	    (trace_file->flags & (EVENT_FILE_FL_SOFT_DISABLED | EVENT_FILE_FL_FILTERED))) {
1872 		preempt_disable_notrace();
1873 		/*
1874 		 * Filtering is on, so try to use the per cpu buffer first.
1875 		 * This buffer will simulate a ring_buffer_event,
1876 		 * where the type_len is zero and the array[0] will
1877 		 * hold the full length.
1878 		 * (see include/linux/ring-buffer.h for details on
1879 		 *  how the ring_buffer_event is structured).
1880 		 *
1881 		 * Using a temp buffer during filtering and copying it
1882 		 * on a matched filter is quicker than writing directly
1883 		 * into the ring buffer and then discarding it when
1884 		 * it doesn't match. That is because the discard
1885 		 * requires several atomic operations to get right.
1886 		 * Copying on match and doing nothing on a failed match
1887 		 * is still quicker than no copy on match, but having
1888 		 * to discard out of the ring buffer on a failed match.
1889 		 */
1890 		if ((entry = __this_cpu_read(trace_buffered_event))) {
1891 			int max_len = PAGE_SIZE - struct_size(entry, array, 1);
1892 
1893 			val = this_cpu_inc_return(trace_buffered_event_cnt);
1894 
1895 			/*
1896 			 * Preemption is disabled, but interrupts and NMIs
1897 			 * can still come in now. If that happens after
1898 			 * the above increment, then it will have to go
1899 			 * back to the old method of allocating the event
1900 			 * on the ring buffer, and if the filter fails, it
1901 			 * will have to call ring_buffer_discard_commit()
1902 			 * to remove it.
1903 			 *
1904 			 * Need to also check the unlikely case that the
1905 			 * length is bigger than the temp buffer size.
1906 			 * If that happens, then the reserve is pretty much
1907 			 * guaranteed to fail, as the ring buffer currently
1908 			 * only allows events less than a page. But that may
1909 			 * change in the future, so let the ring buffer reserve
1910 			 * handle the failure in that case.
1911 			 */
1912 			if (val == 1 && likely(len <= max_len)) {
1913 				trace_event_setup(entry, type, trace_ctx);
1914 				entry->array[0] = len;
1915 				/* Return with preemption disabled */
1916 				return entry;
1917 			}
1918 			this_cpu_dec(trace_buffered_event_cnt);
1919 		}
1920 		/* __trace_buffer_lock_reserve() disables preemption */
1921 		preempt_enable_notrace();
1922 	}
1923 
1924 	entry = __trace_buffer_lock_reserve(*current_rb, type, len,
1925 					    trace_ctx);
1926 	/*
1927 	 * If tracing is off, but we have triggers enabled
1928 	 * we still need to look at the event data. Use the temp_buffer
1929 	 * to store the trace event for the trigger to use. It's recursive
1930 	 * safe and will not be recorded anywhere.
1931 	 */
1932 	if (!entry && trace_file->flags & EVENT_FILE_FL_TRIGGER_COND) {
1933 		*current_rb = temp_buffer;
1934 		entry = __trace_buffer_lock_reserve(*current_rb, type, len,
1935 						    trace_ctx);
1936 	}
1937 	return entry;
1938 }
1939 EXPORT_SYMBOL_GPL(trace_event_buffer_lock_reserve);
1940 
1941 static DEFINE_RAW_SPINLOCK(tracepoint_iter_lock);
1942 static DEFINE_MUTEX(tracepoint_printk_mutex);
1943 
1944 static void output_printk(struct trace_event_buffer *fbuffer)
1945 {
1946 	struct trace_event_call *event_call;
1947 	struct trace_event_file *file;
1948 	struct trace_event *event;
1949 	unsigned long flags;
1950 	struct trace_iterator *iter = tracepoint_print_iter;
1951 
1952 	/* We should never get here if iter is NULL */
1953 	if (WARN_ON_ONCE(!iter))
1954 		return;
1955 
1956 	event_call = fbuffer->trace_file->event_call;
1957 	if (!event_call || !event_call->event.funcs ||
1958 	    !event_call->event.funcs->trace)
1959 		return;
1960 
1961 	file = fbuffer->trace_file;
1962 	if (test_bit(EVENT_FILE_FL_SOFT_DISABLED_BIT, &file->flags) ||
1963 	    (unlikely(file->flags & EVENT_FILE_FL_FILTERED) &&
1964 	     !filter_match_preds(file->filter, fbuffer->entry)))
1965 		return;
1966 
1967 	event = &fbuffer->trace_file->event_call->event;
1968 
1969 	raw_spin_lock_irqsave(&tracepoint_iter_lock, flags);
1970 	trace_seq_init(&iter->seq);
1971 	iter->ent = fbuffer->entry;
1972 	event_call->event.funcs->trace(iter, 0, event);
1973 	trace_seq_putc(&iter->seq, 0);
1974 	printk("%s", iter->seq.buffer);
1975 
1976 	raw_spin_unlock_irqrestore(&tracepoint_iter_lock, flags);
1977 }
1978 
1979 int tracepoint_printk_sysctl(const struct ctl_table *table, int write,
1980 			     void *buffer, size_t *lenp,
1981 			     loff_t *ppos)
1982 {
1983 	int save_tracepoint_printk;
1984 	int ret;
1985 
1986 	guard(mutex)(&tracepoint_printk_mutex);
1987 	save_tracepoint_printk = tracepoint_printk;
1988 
1989 	ret = proc_dointvec(table, write, buffer, lenp, ppos);
1990 
1991 	/*
1992 	 * This will force exiting early, as tracepoint_printk
1993 	 * is always zero when tracepoint_printk_iter is not allocated
1994 	 */
1995 	if (!tracepoint_print_iter)
1996 		tracepoint_printk = 0;
1997 
1998 	if (save_tracepoint_printk == tracepoint_printk)
1999 		return ret;
2000 
2001 	if (tracepoint_printk)
2002 		static_key_enable(&tracepoint_printk_key.key);
2003 	else
2004 		static_key_disable(&tracepoint_printk_key.key);
2005 
2006 	return ret;
2007 }
2008 
2009 void trace_event_buffer_commit(struct trace_event_buffer *fbuffer)
2010 {
2011 	enum event_trigger_type tt = ETT_NONE;
2012 	struct trace_event_file *file = fbuffer->trace_file;
2013 
2014 	if (__event_trigger_test_discard(file, fbuffer->buffer, fbuffer->event,
2015 			fbuffer->entry, &tt))
2016 		goto discard;
2017 
2018 	if (static_key_false(&tracepoint_printk_key.key))
2019 		output_printk(fbuffer);
2020 
2021 	if (static_branch_unlikely(&trace_event_exports_enabled))
2022 		ftrace_exports(fbuffer->event, TRACE_EXPORT_EVENT);
2023 
2024 	trace_buffer_unlock_commit_regs(file->tr, fbuffer->buffer,
2025 			fbuffer->event, fbuffer->trace_ctx, fbuffer->regs);
2026 
2027 discard:
2028 	if (tt)
2029 		event_triggers_post_call(file, tt);
2030 
2031 }
2032 EXPORT_SYMBOL_GPL(trace_event_buffer_commit);
2033 
2034 /*
2035  * Skip 3:
2036  *
2037  *   trace_buffer_unlock_commit_regs()
2038  *   trace_event_buffer_commit()
2039  *   trace_event_raw_event_xxx()
2040  */
2041 # define STACK_SKIP 3
2042 
2043 void trace_buffer_unlock_commit_regs(struct trace_array *tr,
2044 				     struct trace_buffer *buffer,
2045 				     struct ring_buffer_event *event,
2046 				     unsigned int trace_ctx,
2047 				     struct pt_regs *regs)
2048 {
2049 	__buffer_unlock_commit(buffer, event);
2050 
2051 	/*
2052 	 * If regs is not set, then skip the necessary functions.
2053 	 * Note, we can still get here via blktrace, wakeup tracer
2054 	 * and mmiotrace, but that's ok if they lose a function or
2055 	 * two. They are not that meaningful.
2056 	 */
2057 	ftrace_trace_stack(tr, buffer, trace_ctx, regs ? 0 : STACK_SKIP, regs);
2058 	ftrace_trace_userstack(tr, buffer, trace_ctx);
2059 }
2060 
2061 /*
2062  * Similar to trace_buffer_unlock_commit_regs() but do not dump stack.
2063  */
2064 void
2065 trace_buffer_unlock_commit_nostack(struct trace_buffer *buffer,
2066 				   struct ring_buffer_event *event)
2067 {
2068 	__buffer_unlock_commit(buffer, event);
2069 }
2070 
2071 void
2072 trace_function(struct trace_array *tr, unsigned long ip, unsigned long
2073 	       parent_ip, unsigned int trace_ctx, struct ftrace_regs *fregs)
2074 {
2075 	struct trace_buffer *buffer = tr->array_buffer.buffer;
2076 	struct ring_buffer_event *event;
2077 	struct ftrace_entry *entry;
2078 	int size = sizeof(*entry);
2079 
2080 	size += FTRACE_REGS_MAX_ARGS * !!fregs * sizeof(long);
2081 
2082 	event = __trace_buffer_lock_reserve(buffer, TRACE_FN, size,
2083 					    trace_ctx);
2084 	if (!event)
2085 		return;
2086 	entry	= ring_buffer_event_data(event);
2087 	entry->ip			= ip;
2088 	entry->parent_ip		= parent_ip;
2089 
2090 #ifdef CONFIG_HAVE_FUNCTION_ARG_ACCESS_API
2091 	if (fregs) {
2092 		for (int i = 0; i < FTRACE_REGS_MAX_ARGS; i++)
2093 			entry->args[i] = ftrace_regs_get_argument(fregs, i);
2094 	}
2095 #endif
2096 
2097 	if (static_branch_unlikely(&trace_function_exports_enabled))
2098 		ftrace_exports(event, TRACE_EXPORT_FUNCTION);
2099 	__buffer_unlock_commit(buffer, event);
2100 }
2101 
2102 #ifdef CONFIG_STACKTRACE
2103 
2104 /* Allow 4 levels of nesting: normal, softirq, irq, NMI */
2105 #define FTRACE_KSTACK_NESTING	4
2106 
2107 #define FTRACE_KSTACK_ENTRIES	(SZ_4K / FTRACE_KSTACK_NESTING)
2108 
2109 struct ftrace_stack {
2110 	unsigned long		calls[FTRACE_KSTACK_ENTRIES];
2111 };
2112 
2113 
2114 struct ftrace_stacks {
2115 	struct ftrace_stack	stacks[FTRACE_KSTACK_NESTING];
2116 };
2117 
2118 static DEFINE_PER_CPU(struct ftrace_stacks, ftrace_stacks);
2119 static DEFINE_PER_CPU(int, ftrace_stack_reserve);
2120 
2121 void __ftrace_trace_stack(struct trace_array *tr,
2122 			  struct trace_buffer *buffer,
2123 			  unsigned int trace_ctx,
2124 			  int skip, struct pt_regs *regs)
2125 {
2126 	struct ring_buffer_event *event;
2127 	unsigned int size, nr_entries;
2128 	struct ftrace_stack *fstack;
2129 	struct stack_entry *entry;
2130 	int stackidx;
2131 	int bit;
2132 
2133 	bit = trace_test_and_set_recursion(_THIS_IP_, _RET_IP_, TRACE_EVENT_START);
2134 	if (bit < 0)
2135 		return;
2136 
2137 	/*
2138 	 * Add one, for this function and the call to save_stack_trace()
2139 	 * If regs is set, then these functions will not be in the way.
2140 	 */
2141 #ifndef CONFIG_UNWINDER_ORC
2142 	if (!regs)
2143 		skip++;
2144 #endif
2145 
2146 	guard(preempt_notrace)();
2147 
2148 	stackidx = __this_cpu_inc_return(ftrace_stack_reserve) - 1;
2149 
2150 	/* This should never happen. If it does, yell once and skip */
2151 	if (WARN_ON_ONCE(stackidx >= FTRACE_KSTACK_NESTING))
2152 		goto out;
2153 
2154 	/*
2155 	 * The above __this_cpu_inc_return() is 'atomic' cpu local. An
2156 	 * interrupt will either see the value pre increment or post
2157 	 * increment. If the interrupt happens pre increment it will have
2158 	 * restored the counter when it returns.  We just need a barrier to
2159 	 * keep gcc from moving things around.
2160 	 */
2161 	barrier();
2162 
2163 	fstack = this_cpu_ptr(ftrace_stacks.stacks) + stackidx;
2164 	size = ARRAY_SIZE(fstack->calls);
2165 
2166 	if (regs) {
2167 		nr_entries = stack_trace_save_regs(regs, fstack->calls,
2168 						   size, skip);
2169 	} else {
2170 		nr_entries = stack_trace_save(fstack->calls, size, skip);
2171 	}
2172 
2173 #ifdef CONFIG_DYNAMIC_FTRACE
2174 	/* Mark entry of stack trace as trampoline code */
2175 	if (tr->ops && tr->ops->trampoline) {
2176 		unsigned long tramp_start = tr->ops->trampoline;
2177 		unsigned long tramp_end = tramp_start + tr->ops->trampoline_size;
2178 		unsigned long *calls = fstack->calls;
2179 
2180 		for (int i = 0; i < nr_entries; i++) {
2181 			if (calls[i] >= tramp_start && calls[i] < tramp_end)
2182 				calls[i] = FTRACE_TRAMPOLINE_MARKER;
2183 		}
2184 	}
2185 #endif
2186 
2187 	event = __trace_buffer_lock_reserve(buffer, TRACE_STACK,
2188 				    struct_size(entry, caller, nr_entries),
2189 				    trace_ctx);
2190 	if (!event)
2191 		goto out;
2192 	entry = ring_buffer_event_data(event);
2193 
2194 	entry->size = nr_entries;
2195 	memcpy(&entry->caller, fstack->calls,
2196 	       flex_array_size(entry, caller, nr_entries));
2197 
2198 	__buffer_unlock_commit(buffer, event);
2199 
2200  out:
2201 	/* Again, don't let gcc optimize things here */
2202 	barrier();
2203 	__this_cpu_dec(ftrace_stack_reserve);
2204 	trace_clear_recursion(bit);
2205 }
2206 
2207 void __trace_stack(struct trace_array *tr, unsigned int trace_ctx,
2208 		   int skip)
2209 {
2210 	struct trace_buffer *buffer = tr->array_buffer.buffer;
2211 
2212 	if (rcu_is_watching()) {
2213 		__ftrace_trace_stack(tr, buffer, trace_ctx, skip, NULL);
2214 		return;
2215 	}
2216 
2217 	if (WARN_ON_ONCE(IS_ENABLED(CONFIG_GENERIC_ENTRY)))
2218 		return;
2219 
2220 	/*
2221 	 * When an NMI triggers, RCU is enabled via ct_nmi_enter(),
2222 	 * but if the above rcu_is_watching() failed, then the NMI
2223 	 * triggered someplace critical, and ct_irq_enter() should
2224 	 * not be called from NMI.
2225 	 */
2226 	if (unlikely(in_nmi()))
2227 		return;
2228 
2229 	ct_irq_enter_irqson();
2230 	__ftrace_trace_stack(tr, buffer, trace_ctx, skip, NULL);
2231 	ct_irq_exit_irqson();
2232 }
2233 
2234 /**
2235  * trace_dump_stack - record a stack back trace in the trace buffer
2236  * @skip: Number of functions to skip (helper handlers)
2237  */
2238 void trace_dump_stack(int skip)
2239 {
2240 	if (tracing_disabled || tracing_selftest_running)
2241 		return;
2242 
2243 #ifndef CONFIG_UNWINDER_ORC
2244 	/* Skip 1 to skip this function. */
2245 	skip++;
2246 #endif
2247 	__ftrace_trace_stack(printk_trace, printk_trace->array_buffer.buffer,
2248 				tracing_gen_ctx(), skip, NULL);
2249 }
2250 EXPORT_SYMBOL_GPL(trace_dump_stack);
2251 
2252 #ifdef CONFIG_USER_STACKTRACE_SUPPORT
2253 static DEFINE_PER_CPU(int, user_stack_count);
2254 
2255 static void
2256 ftrace_trace_userstack(struct trace_array *tr,
2257 		       struct trace_buffer *buffer, unsigned int trace_ctx)
2258 {
2259 	struct ring_buffer_event *event;
2260 	struct userstack_entry *entry;
2261 
2262 	if (!(tr->trace_flags & TRACE_ITER(USERSTACKTRACE)))
2263 		return;
2264 
2265 	/*
2266 	 * NMIs can not handle page faults, even with fix ups.
2267 	 * The save user stack can (and often does) fault.
2268 	 */
2269 	if (unlikely(in_nmi()))
2270 		return;
2271 
2272 	/*
2273 	 * prevent recursion, since the user stack tracing may
2274 	 * trigger other kernel events.
2275 	 */
2276 	guard(preempt)();
2277 	if (__this_cpu_read(user_stack_count))
2278 		return;
2279 
2280 	__this_cpu_inc(user_stack_count);
2281 
2282 	event = __trace_buffer_lock_reserve(buffer, TRACE_USER_STACK,
2283 					    sizeof(*entry), trace_ctx);
2284 	if (!event)
2285 		goto out_drop_count;
2286 	entry	= ring_buffer_event_data(event);
2287 
2288 	entry->tgid		= current->tgid;
2289 	memset(&entry->caller, 0, sizeof(entry->caller));
2290 
2291 	stack_trace_save_user(entry->caller, FTRACE_STACK_ENTRIES);
2292 	__buffer_unlock_commit(buffer, event);
2293 
2294  out_drop_count:
2295 	__this_cpu_dec(user_stack_count);
2296 }
2297 #else /* CONFIG_USER_STACKTRACE_SUPPORT */
2298 static void ftrace_trace_userstack(struct trace_array *tr,
2299 				   struct trace_buffer *buffer,
2300 				   unsigned int trace_ctx)
2301 {
2302 }
2303 #endif /* !CONFIG_USER_STACKTRACE_SUPPORT */
2304 
2305 #endif /* CONFIG_STACKTRACE */
2306 
2307 static inline void
2308 func_repeats_set_delta_ts(struct func_repeats_entry *entry,
2309 			  unsigned long long delta)
2310 {
2311 	entry->bottom_delta_ts = delta & U32_MAX;
2312 	entry->top_delta_ts = (delta >> 32);
2313 }
2314 
2315 void trace_last_func_repeats(struct trace_array *tr,
2316 			     struct trace_func_repeats *last_info,
2317 			     unsigned int trace_ctx)
2318 {
2319 	struct trace_buffer *buffer = tr->array_buffer.buffer;
2320 	struct func_repeats_entry *entry;
2321 	struct ring_buffer_event *event;
2322 	u64 delta;
2323 
2324 	event = __trace_buffer_lock_reserve(buffer, TRACE_FUNC_REPEATS,
2325 					    sizeof(*entry), trace_ctx);
2326 	if (!event)
2327 		return;
2328 
2329 	delta = ring_buffer_event_time_stamp(buffer, event) -
2330 		last_info->ts_last_call;
2331 
2332 	entry = ring_buffer_event_data(event);
2333 	entry->ip = last_info->ip;
2334 	entry->parent_ip = last_info->parent_ip;
2335 	entry->count = last_info->count;
2336 	func_repeats_set_delta_ts(entry, delta);
2337 
2338 	__buffer_unlock_commit(buffer, event);
2339 }
2340 
2341 static struct trace_entry *
2342 peek_next_entry(struct trace_iterator *iter, int cpu, u64 *ts,
2343 		unsigned long *lost_events)
2344 {
2345 	struct ring_buffer_event *event;
2346 	struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, cpu);
2347 
2348 	if (buf_iter) {
2349 		event = ring_buffer_iter_peek(buf_iter, ts);
2350 		if (lost_events)
2351 			*lost_events = ring_buffer_iter_dropped(buf_iter) ?
2352 				(unsigned long)-1 : 0;
2353 	} else {
2354 		event = ring_buffer_peek(iter->array_buffer->buffer, cpu, ts,
2355 					 lost_events);
2356 	}
2357 
2358 	if (event) {
2359 		iter->ent_size = ring_buffer_event_length(event);
2360 		return ring_buffer_event_data(event);
2361 	}
2362 	iter->ent_size = 0;
2363 	return NULL;
2364 }
2365 
2366 static struct trace_entry *
2367 __find_next_entry(struct trace_iterator *iter, int *ent_cpu,
2368 		  unsigned long *missing_events, u64 *ent_ts)
2369 {
2370 	struct trace_buffer *buffer = iter->array_buffer->buffer;
2371 	struct trace_entry *ent, *next = NULL;
2372 	unsigned long lost_events = 0, next_lost = 0;
2373 	int cpu_file = iter->cpu_file;
2374 	u64 next_ts = 0, ts;
2375 	int next_cpu = -1;
2376 	int next_size = 0;
2377 	int cpu;
2378 
2379 	/*
2380 	 * If we are in a per_cpu trace file, don't bother by iterating over
2381 	 * all cpu and peek directly.
2382 	 */
2383 	if (cpu_file > RING_BUFFER_ALL_CPUS) {
2384 		if (ring_buffer_empty_cpu(buffer, cpu_file))
2385 			return NULL;
2386 		ent = peek_next_entry(iter, cpu_file, ent_ts, missing_events);
2387 		if (ent_cpu)
2388 			*ent_cpu = cpu_file;
2389 
2390 		return ent;
2391 	}
2392 
2393 	for_each_tracing_cpu(cpu) {
2394 
2395 		if (ring_buffer_empty_cpu(buffer, cpu))
2396 			continue;
2397 
2398 		ent = peek_next_entry(iter, cpu, &ts, &lost_events);
2399 
2400 		/*
2401 		 * Pick the entry with the smallest timestamp:
2402 		 */
2403 		if (ent && (!next || ts < next_ts)) {
2404 			next = ent;
2405 			next_cpu = cpu;
2406 			next_ts = ts;
2407 			next_lost = lost_events;
2408 			next_size = iter->ent_size;
2409 		}
2410 	}
2411 
2412 	iter->ent_size = next_size;
2413 
2414 	if (ent_cpu)
2415 		*ent_cpu = next_cpu;
2416 
2417 	if (ent_ts)
2418 		*ent_ts = next_ts;
2419 
2420 	if (missing_events)
2421 		*missing_events = next_lost;
2422 
2423 	return next;
2424 }
2425 
2426 #define STATIC_FMT_BUF_SIZE	128
2427 static char static_fmt_buf[STATIC_FMT_BUF_SIZE];
2428 
2429 char *trace_iter_expand_format(struct trace_iterator *iter)
2430 {
2431 	char *tmp;
2432 
2433 	/*
2434 	 * iter->tr is NULL when used with tp_printk, which makes
2435 	 * this get called where it is not safe to call krealloc().
2436 	 */
2437 	if (!iter->tr || iter->fmt == static_fmt_buf)
2438 		return NULL;
2439 
2440 	tmp = krealloc(iter->fmt, iter->fmt_size + STATIC_FMT_BUF_SIZE,
2441 		       GFP_KERNEL);
2442 	if (tmp) {
2443 		iter->fmt_size += STATIC_FMT_BUF_SIZE;
2444 		iter->fmt = tmp;
2445 	}
2446 
2447 	return tmp;
2448 }
2449 
2450 /* Returns true if the string is safe to dereference from an event */
2451 static bool trace_safe_str(struct trace_iterator *iter, const char *str)
2452 {
2453 	unsigned long addr = (unsigned long)str;
2454 	struct trace_event *trace_event;
2455 	struct trace_event_call *event;
2456 
2457 	/* OK if part of the event data */
2458 	if ((addr >= (unsigned long)iter->ent) &&
2459 	    (addr < (unsigned long)iter->ent + iter->ent_size))
2460 		return true;
2461 
2462 	/* OK if part of the temp seq buffer */
2463 	if ((addr >= (unsigned long)iter->tmp_seq.buffer) &&
2464 	    (addr < (unsigned long)iter->tmp_seq.buffer + TRACE_SEQ_BUFFER_SIZE))
2465 		return true;
2466 
2467 	/* Core rodata can not be freed */
2468 	if (is_kernel_rodata(addr))
2469 		return true;
2470 
2471 	if (trace_is_tracepoint_string(str))
2472 		return true;
2473 
2474 	/*
2475 	 * Now this could be a module event, referencing core module
2476 	 * data, which is OK.
2477 	 */
2478 	if (!iter->ent)
2479 		return false;
2480 
2481 	trace_event = ftrace_find_event(iter->ent->type);
2482 	if (!trace_event)
2483 		return false;
2484 
2485 	event = container_of(trace_event, struct trace_event_call, event);
2486 	if ((event->flags & TRACE_EVENT_FL_DYNAMIC) || !event->module)
2487 		return false;
2488 
2489 	/* Would rather have rodata, but this will suffice */
2490 	if (within_module_core(addr, event->module))
2491 		return true;
2492 
2493 	return false;
2494 }
2495 
2496 /**
2497  * ignore_event - Check dereferenced fields while writing to the seq buffer
2498  * @iter: The iterator that holds the seq buffer and the event being printed
2499  *
2500  * At boot up, test_event_printk() will flag any event that dereferences
2501  * a string with "%s" that does exist in the ring buffer. It may still
2502  * be valid, as the string may point to a static string in the kernel
2503  * rodata that never gets freed. But if the string pointer is pointing
2504  * to something that was allocated, there's a chance that it can be freed
2505  * by the time the user reads the trace. This would cause a bad memory
2506  * access by the kernel and possibly crash the system.
2507  *
2508  * This function will check if the event has any fields flagged as needing
2509  * to be checked at runtime and perform those checks.
2510  *
2511  * If it is found that a field is unsafe, it will write into the @iter->seq
2512  * a message stating what was found to be unsafe.
2513  *
2514  * @return: true if the event is unsafe and should be ignored,
2515  *          false otherwise.
2516  */
2517 bool ignore_event(struct trace_iterator *iter)
2518 {
2519 	struct ftrace_event_field *field;
2520 	struct trace_event *trace_event;
2521 	struct trace_event_call *event;
2522 	struct list_head *head;
2523 	struct trace_seq *seq;
2524 	const void *ptr;
2525 
2526 	trace_event = ftrace_find_event(iter->ent->type);
2527 
2528 	seq = &iter->seq;
2529 
2530 	if (!trace_event) {
2531 		trace_seq_printf(seq, "EVENT ID %d NOT FOUND?\n", iter->ent->type);
2532 		return true;
2533 	}
2534 
2535 	event = container_of(trace_event, struct trace_event_call, event);
2536 	if (!(event->flags & TRACE_EVENT_FL_TEST_STR))
2537 		return false;
2538 
2539 	head = trace_get_fields(event);
2540 	if (!head) {
2541 		trace_seq_printf(seq, "FIELDS FOR EVENT '%s' NOT FOUND?\n",
2542 				 trace_event_name(event));
2543 		return true;
2544 	}
2545 
2546 	/* Offsets are from the iter->ent that points to the raw event */
2547 	ptr = iter->ent;
2548 
2549 	list_for_each_entry(field, head, link) {
2550 		const char *str;
2551 		bool good;
2552 
2553 		if (!field->needs_test)
2554 			continue;
2555 
2556 		str = *(const char **)(ptr + field->offset);
2557 
2558 		good = trace_safe_str(iter, str);
2559 
2560 		/*
2561 		 * If you hit this warning, it is likely that the
2562 		 * trace event in question used %s on a string that
2563 		 * was saved at the time of the event, but may not be
2564 		 * around when the trace is read. Use __string(),
2565 		 * __assign_str() and __get_str() helpers in the TRACE_EVENT()
2566 		 * instead. See samples/trace_events/trace-events-sample.h
2567 		 * for reference.
2568 		 */
2569 		if (WARN_ONCE(!good, "event '%s' has unsafe pointer field '%s'",
2570 			      trace_event_name(event), field->name)) {
2571 			trace_seq_printf(seq, "EVENT %s: HAS UNSAFE POINTER FIELD '%s'\n",
2572 					 trace_event_name(event), field->name);
2573 			return true;
2574 		}
2575 	}
2576 	return false;
2577 }
2578 
2579 const char *trace_event_format(struct trace_iterator *iter, const char *fmt)
2580 {
2581 	const char *p, *new_fmt;
2582 	char *q;
2583 
2584 	if (WARN_ON_ONCE(!fmt))
2585 		return fmt;
2586 
2587 	if (!iter->tr || iter->tr->trace_flags & TRACE_ITER(HASH_PTR))
2588 		return fmt;
2589 
2590 	p = fmt;
2591 	new_fmt = q = iter->fmt;
2592 	while (*p) {
2593 		if (unlikely(q - new_fmt + 3 > iter->fmt_size)) {
2594 			if (!trace_iter_expand_format(iter))
2595 				return fmt;
2596 
2597 			q += iter->fmt - new_fmt;
2598 			new_fmt = iter->fmt;
2599 		}
2600 
2601 		*q++ = *p++;
2602 
2603 		/* Replace %p with %px */
2604 		if (p[-1] == '%') {
2605 			if (p[0] == '%') {
2606 				*q++ = *p++;
2607 			} else if (p[0] == 'p' && !isalnum(p[1])) {
2608 				*q++ = *p++;
2609 				*q++ = 'x';
2610 			}
2611 		}
2612 	}
2613 	*q = '\0';
2614 
2615 	return new_fmt;
2616 }
2617 
2618 #define STATIC_TEMP_BUF_SIZE	128
2619 static char static_temp_buf[STATIC_TEMP_BUF_SIZE] __aligned(4);
2620 
2621 /* Find the next real entry, without updating the iterator itself */
2622 struct trace_entry *trace_find_next_entry(struct trace_iterator *iter,
2623 					  int *ent_cpu, u64 *ent_ts)
2624 {
2625 	/* __find_next_entry will reset ent_size */
2626 	int ent_size = iter->ent_size;
2627 	struct trace_entry *entry;
2628 
2629 	/*
2630 	 * If called from ftrace_dump(), then the iter->temp buffer
2631 	 * will be the static_temp_buf and not created from kmalloc.
2632 	 * If the entry size is greater than the buffer, we can
2633 	 * not save it. Just return NULL in that case. This is only
2634 	 * used to add markers when two consecutive events' time
2635 	 * stamps have a large delta. See trace_print_lat_context()
2636 	 */
2637 	if (iter->temp == static_temp_buf &&
2638 	    STATIC_TEMP_BUF_SIZE < ent_size)
2639 		return NULL;
2640 
2641 	/*
2642 	 * The __find_next_entry() may call peek_next_entry(), which may
2643 	 * call ring_buffer_peek() that may make the contents of iter->ent
2644 	 * undefined. Need to copy iter->ent now.
2645 	 */
2646 	if (iter->ent && iter->ent != iter->temp) {
2647 		if ((!iter->temp || iter->temp_size < iter->ent_size) &&
2648 		    !WARN_ON_ONCE(iter->temp == static_temp_buf)) {
2649 			void *temp;
2650 			temp = kmalloc(iter->ent_size, GFP_KERNEL);
2651 			if (!temp)
2652 				return NULL;
2653 			kfree(iter->temp);
2654 			iter->temp = temp;
2655 			iter->temp_size = iter->ent_size;
2656 		}
2657 		memcpy(iter->temp, iter->ent, iter->ent_size);
2658 		iter->ent = iter->temp;
2659 	}
2660 	entry = __find_next_entry(iter, ent_cpu, NULL, ent_ts);
2661 	/* Put back the original ent_size */
2662 	iter->ent_size = ent_size;
2663 
2664 	return entry;
2665 }
2666 
2667 /* Find the next real entry, and increment the iterator to the next entry */
2668 void *trace_find_next_entry_inc(struct trace_iterator *iter)
2669 {
2670 	struct ring_buffer_iter *buf_iter;
2671 
2672 	iter->ent = __find_next_entry(iter, &iter->cpu,
2673 				      &iter->lost_events, &iter->ts);
2674 
2675 	if (iter->ent) {
2676 		iter->idx++;
2677 		buf_iter = trace_buffer_iter(iter, iter->cpu);
2678 		if (buf_iter)
2679 			ring_buffer_iter_advance(buf_iter);
2680 	}
2681 
2682 	return iter->ent ? iter : NULL;
2683 }
2684 
2685 static void trace_consume(struct trace_iterator *iter)
2686 {
2687 	ring_buffer_consume(iter->array_buffer->buffer, iter->cpu, &iter->ts,
2688 			    &iter->lost_events);
2689 }
2690 
2691 static void *s_next(struct seq_file *m, void *v, loff_t *pos)
2692 {
2693 	struct trace_iterator *iter = m->private;
2694 	int i = (int)*pos;
2695 	void *ent;
2696 
2697 	WARN_ON_ONCE(iter->leftover);
2698 
2699 	(*pos)++;
2700 
2701 	/* can't go backwards */
2702 	if (iter->idx > i)
2703 		return NULL;
2704 
2705 	if (iter->idx < 0)
2706 		ent = trace_find_next_entry_inc(iter);
2707 	else
2708 		ent = iter;
2709 
2710 	while (ent && iter->idx < i)
2711 		ent = trace_find_next_entry_inc(iter);
2712 
2713 	iter->pos = *pos;
2714 
2715 	return ent;
2716 }
2717 
2718 void tracing_iter_reset(struct trace_iterator *iter, int cpu)
2719 {
2720 	struct ring_buffer_iter *buf_iter;
2721 	unsigned long entries = 0;
2722 	u64 ts;
2723 
2724 	per_cpu_ptr(iter->array_buffer->data, cpu)->skipped_entries = 0;
2725 
2726 	buf_iter = trace_buffer_iter(iter, cpu);
2727 	if (!buf_iter)
2728 		return;
2729 
2730 	ring_buffer_iter_reset(buf_iter);
2731 
2732 	/*
2733 	 * We could have the case with the max latency tracers
2734 	 * that a reset never took place on a cpu. This is evident
2735 	 * by the timestamp being before the start of the buffer.
2736 	 */
2737 	while (ring_buffer_iter_peek(buf_iter, &ts)) {
2738 		if (ts >= iter->array_buffer->time_start)
2739 			break;
2740 		entries++;
2741 		ring_buffer_iter_advance(buf_iter);
2742 		/* This could be a big loop */
2743 		cond_resched();
2744 	}
2745 
2746 	per_cpu_ptr(iter->array_buffer->data, cpu)->skipped_entries = entries;
2747 }
2748 
2749 /*
2750  * The current tracer is copied to avoid a global locking
2751  * all around.
2752  */
2753 static void *s_start(struct seq_file *m, loff_t *pos)
2754 {
2755 	struct trace_iterator *iter = m->private;
2756 	struct trace_array *tr = iter->tr;
2757 	int cpu_file = iter->cpu_file;
2758 	void *p = NULL;
2759 	loff_t l = 0;
2760 	int cpu;
2761 
2762 	mutex_lock(&trace_types_lock);
2763 	if (unlikely(tr->current_trace != iter->trace)) {
2764 		/* Close iter->trace before switching to the new current tracer */
2765 		if (iter->trace->close)
2766 			iter->trace->close(iter);
2767 		iter->trace = tr->current_trace;
2768 		/* Reopen the new current tracer */
2769 		if (iter->trace->open)
2770 			iter->trace->open(iter);
2771 	}
2772 	mutex_unlock(&trace_types_lock);
2773 
2774 	if (iter->snapshot && tracer_uses_snapshot(iter->trace))
2775 		return ERR_PTR(-EBUSY);
2776 
2777 	if (*pos != iter->pos) {
2778 		iter->ent = NULL;
2779 		iter->cpu = 0;
2780 		iter->idx = -1;
2781 
2782 		if (cpu_file == RING_BUFFER_ALL_CPUS) {
2783 			for_each_tracing_cpu(cpu)
2784 				tracing_iter_reset(iter, cpu);
2785 		} else
2786 			tracing_iter_reset(iter, cpu_file);
2787 
2788 		iter->leftover = 0;
2789 		for (p = iter; p && l < *pos; p = s_next(m, p, &l))
2790 			;
2791 
2792 	} else {
2793 		/*
2794 		 * If we overflowed the seq_file before, then we want
2795 		 * to just reuse the trace_seq buffer again.
2796 		 */
2797 		if (iter->leftover)
2798 			p = iter;
2799 		else {
2800 			l = *pos - 1;
2801 			p = s_next(m, p, &l);
2802 		}
2803 	}
2804 
2805 	trace_event_read_lock();
2806 	trace_access_lock(cpu_file);
2807 	return p;
2808 }
2809 
2810 static void s_stop(struct seq_file *m, void *p)
2811 {
2812 	struct trace_iterator *iter = m->private;
2813 
2814 	if (iter->snapshot && tracer_uses_snapshot(iter->trace))
2815 		return;
2816 
2817 	trace_access_unlock(iter->cpu_file);
2818 	trace_event_read_unlock();
2819 }
2820 
2821 static void
2822 get_total_entries_cpu(struct array_buffer *buf, unsigned long *total,
2823 		      unsigned long *entries, int cpu)
2824 {
2825 	unsigned long count;
2826 
2827 	count = ring_buffer_entries_cpu(buf->buffer, cpu);
2828 	/*
2829 	 * If this buffer has skipped entries, then we hold all
2830 	 * entries for the trace and we need to ignore the
2831 	 * ones before the time stamp.
2832 	 */
2833 	if (per_cpu_ptr(buf->data, cpu)->skipped_entries) {
2834 		count -= per_cpu_ptr(buf->data, cpu)->skipped_entries;
2835 		/* total is the same as the entries */
2836 		*total = count;
2837 	} else
2838 		*total = count +
2839 			ring_buffer_overrun_cpu(buf->buffer, cpu);
2840 	*entries = count;
2841 }
2842 
2843 static void
2844 get_total_entries(struct array_buffer *buf,
2845 		  unsigned long *total, unsigned long *entries)
2846 {
2847 	unsigned long t, e;
2848 	int cpu;
2849 
2850 	*total = 0;
2851 	*entries = 0;
2852 
2853 	for_each_tracing_cpu(cpu) {
2854 		get_total_entries_cpu(buf, &t, &e, cpu);
2855 		*total += t;
2856 		*entries += e;
2857 	}
2858 }
2859 
2860 unsigned long trace_total_entries_cpu(struct trace_array *tr, int cpu)
2861 {
2862 	unsigned long total, entries;
2863 
2864 	if (!tr)
2865 		tr = &global_trace;
2866 
2867 	get_total_entries_cpu(&tr->array_buffer, &total, &entries, cpu);
2868 
2869 	return entries;
2870 }
2871 
2872 unsigned long trace_total_entries(struct trace_array *tr)
2873 {
2874 	unsigned long total, entries;
2875 
2876 	if (!tr)
2877 		tr = &global_trace;
2878 
2879 	get_total_entries(&tr->array_buffer, &total, &entries);
2880 
2881 	return entries;
2882 }
2883 
2884 static void print_lat_help_header(struct seq_file *m)
2885 {
2886 	seq_puts(m, "#                    _------=> CPU#            \n"
2887 		    "#                   / _-----=> irqs-off/BH-disabled\n"
2888 		    "#                  | / _----=> need-resched    \n"
2889 		    "#                  || / _---=> hardirq/softirq \n"
2890 		    "#                  ||| / _--=> preempt-depth   \n"
2891 		    "#                  |||| / _-=> migrate-disable \n"
2892 		    "#                  ||||| /     delay           \n"
2893 		    "#  cmd     pid     |||||| time  |   caller     \n"
2894 		    "#     \\   /        ||||||  \\    |    /       \n");
2895 }
2896 
2897 static void print_event_info(struct array_buffer *buf, struct seq_file *m)
2898 {
2899 	unsigned long total;
2900 	unsigned long entries;
2901 
2902 	get_total_entries(buf, &total, &entries);
2903 	seq_printf(m, "# entries-in-buffer/entries-written: %lu/%lu   #P:%d\n",
2904 		   entries, total, num_online_cpus());
2905 	seq_puts(m, "#\n");
2906 }
2907 
2908 static void print_func_help_header(struct array_buffer *buf, struct seq_file *m,
2909 				   unsigned int flags)
2910 {
2911 	bool tgid = flags & TRACE_ITER(RECORD_TGID);
2912 
2913 	print_event_info(buf, m);
2914 
2915 	seq_printf(m, "#           TASK-PID    %s CPU#     TIMESTAMP  FUNCTION\n", tgid ? "   TGID   " : "");
2916 	seq_printf(m, "#              | |      %s   |         |         |\n",      tgid ? "     |    " : "");
2917 }
2918 
2919 static void print_func_help_header_irq(struct array_buffer *buf, struct seq_file *m,
2920 				       unsigned int flags)
2921 {
2922 	bool tgid = flags & TRACE_ITER(RECORD_TGID);
2923 	static const char space[] = "            ";
2924 	int prec = tgid ? 12 : 2;
2925 
2926 	print_event_info(buf, m);
2927 
2928 	seq_printf(m, "#                            %.*s  _-----=> irqs-off/BH-disabled\n", prec, space);
2929 	seq_printf(m, "#                            %.*s / _----=> need-resched\n", prec, space);
2930 	seq_printf(m, "#                            %.*s| / _---=> hardirq/softirq\n", prec, space);
2931 	seq_printf(m, "#                            %.*s|| / _--=> preempt-depth\n", prec, space);
2932 	seq_printf(m, "#                            %.*s||| / _-=> migrate-disable\n", prec, space);
2933 	seq_printf(m, "#                            %.*s|||| /     delay\n", prec, space);
2934 	seq_printf(m, "#           TASK-PID  %.*s CPU#  |||||  TIMESTAMP  FUNCTION\n", prec, "     TGID   ");
2935 	seq_printf(m, "#              | |    %.*s   |   |||||     |         |\n", prec, "       |    ");
2936 }
2937 
2938 void
2939 print_trace_header(struct seq_file *m, struct trace_iterator *iter)
2940 {
2941 	unsigned long sym_flags = (global_trace.trace_flags & TRACE_ITER_SYM_MASK);
2942 	struct array_buffer *buf = iter->array_buffer;
2943 	struct trace_array_cpu *data = per_cpu_ptr(buf->data, buf->cpu);
2944 	struct tracer *type = iter->trace;
2945 	unsigned long entries;
2946 	unsigned long total;
2947 	const char *name = type->name;
2948 
2949 	get_total_entries(buf, &total, &entries);
2950 
2951 	seq_printf(m, "# %s latency trace v1.1.5 on %s\n",
2952 		   name, init_utsname()->release);
2953 	seq_puts(m, "# -----------------------------------"
2954 		 "---------------------------------\n");
2955 	seq_printf(m, "# latency: %lu us, #%lu/%lu, CPU#%d |"
2956 		   " (M:%s VP:%d, KP:%d, SP:%d HP:%d",
2957 		   nsecs_to_usecs(data->saved_latency),
2958 		   entries,
2959 		   total,
2960 		   buf->cpu,
2961 		   preempt_model_str(),
2962 		   /* These are reserved for later use */
2963 		   0, 0, 0, 0);
2964 #ifdef CONFIG_SMP
2965 	seq_printf(m, " #P:%d)\n", num_online_cpus());
2966 #else
2967 	seq_puts(m, ")\n");
2968 #endif
2969 	seq_puts(m, "#    -----------------\n");
2970 	seq_printf(m, "#    | task: %.16s-%d "
2971 		   "(uid:%d nice:%ld policy:%ld rt_prio:%ld)\n",
2972 		   data->comm, data->pid,
2973 		   from_kuid_munged(seq_user_ns(m), data->uid), data->nice,
2974 		   data->policy, data->rt_priority);
2975 	seq_puts(m, "#    -----------------\n");
2976 
2977 	if (data->critical_start) {
2978 		seq_puts(m, "#  => started at: ");
2979 		seq_print_ip_sym(&iter->seq, data->critical_start, sym_flags);
2980 		trace_print_seq(m, &iter->seq);
2981 		seq_puts(m, "\n#  => ended at:   ");
2982 		seq_print_ip_sym(&iter->seq, data->critical_end, sym_flags);
2983 		trace_print_seq(m, &iter->seq);
2984 		seq_puts(m, "\n#\n");
2985 	}
2986 
2987 	seq_puts(m, "#\n");
2988 }
2989 
2990 static void test_cpu_buff_start(struct trace_iterator *iter)
2991 {
2992 	struct trace_seq *s = &iter->seq;
2993 	struct trace_array *tr = iter->tr;
2994 
2995 	if (!(tr->trace_flags & TRACE_ITER(ANNOTATE)))
2996 		return;
2997 
2998 	if (!(iter->iter_flags & TRACE_FILE_ANNOTATE))
2999 		return;
3000 
3001 	if (cpumask_available(iter->started) &&
3002 	    cpumask_test_cpu(iter->cpu, iter->started))
3003 		return;
3004 
3005 	if (per_cpu_ptr(iter->array_buffer->data, iter->cpu)->skipped_entries)
3006 		return;
3007 
3008 	if (cpumask_available(iter->started))
3009 		cpumask_set_cpu(iter->cpu, iter->started);
3010 
3011 	/* Don't print started cpu buffer for the first entry of the trace */
3012 	if (iter->idx > 1)
3013 		trace_seq_printf(s, "##### CPU %u buffer started ####\n",
3014 				iter->cpu);
3015 }
3016 
3017 #ifdef CONFIG_FTRACE_SYSCALLS
3018 static bool is_syscall_event(struct trace_event *event)
3019 {
3020 	return (event->funcs == &enter_syscall_print_funcs) ||
3021 	       (event->funcs == &exit_syscall_print_funcs);
3022 
3023 }
3024 #define syscall_buf_size CONFIG_TRACE_SYSCALL_BUF_SIZE_DEFAULT
3025 #else
3026 static inline bool is_syscall_event(struct trace_event *event)
3027 {
3028 	return false;
3029 }
3030 #define syscall_buf_size 0
3031 #endif /* CONFIG_FTRACE_SYSCALLS */
3032 
3033 static enum print_line_t print_trace_fmt(struct trace_iterator *iter)
3034 {
3035 	struct trace_array *tr = iter->tr;
3036 	struct trace_seq *s = &iter->seq;
3037 	unsigned long sym_flags = (tr->trace_flags & TRACE_ITER_SYM_MASK);
3038 	struct trace_entry *entry;
3039 	struct trace_event *event;
3040 
3041 	entry = iter->ent;
3042 
3043 	test_cpu_buff_start(iter);
3044 
3045 	event = ftrace_find_event(entry->type);
3046 
3047 	if (tr->trace_flags & TRACE_ITER(CONTEXT_INFO)) {
3048 		if (iter->iter_flags & TRACE_FILE_LAT_FMT)
3049 			trace_print_lat_context(iter);
3050 		else
3051 			trace_print_context(iter);
3052 	}
3053 
3054 	if (trace_seq_has_overflowed(s))
3055 		return TRACE_TYPE_PARTIAL_LINE;
3056 
3057 	if (event) {
3058 		if (tr->trace_flags & TRACE_ITER(FIELDS))
3059 			return print_event_fields(iter, event);
3060 		/*
3061 		 * For TRACE_EVENT() events, the print_fmt is not
3062 		 * safe to use if the array has delta offsets
3063 		 * Force printing via the fields.
3064 		 */
3065 		if ((tr->text_delta)) {
3066 			/* ftrace and system call events are still OK */
3067 			if ((event->type > __TRACE_LAST_TYPE) &&
3068 			    !is_syscall_event(event))
3069 				return print_event_fields(iter, event);
3070 		}
3071 		return event->funcs->trace(iter, sym_flags, event);
3072 	}
3073 
3074 	trace_seq_printf(s, "Unknown type %d\n", entry->type);
3075 
3076 	return trace_handle_return(s);
3077 }
3078 
3079 static enum print_line_t print_raw_fmt(struct trace_iterator *iter)
3080 {
3081 	struct trace_array *tr = iter->tr;
3082 	struct trace_seq *s = &iter->seq;
3083 	struct trace_entry *entry;
3084 	struct trace_event *event;
3085 
3086 	entry = iter->ent;
3087 
3088 	if (tr->trace_flags & TRACE_ITER(CONTEXT_INFO))
3089 		trace_seq_printf(s, "%d %d %llu ",
3090 				 entry->pid, iter->cpu, iter->ts);
3091 
3092 	if (trace_seq_has_overflowed(s))
3093 		return TRACE_TYPE_PARTIAL_LINE;
3094 
3095 	event = ftrace_find_event(entry->type);
3096 	if (event)
3097 		return event->funcs->raw(iter, 0, event);
3098 
3099 	trace_seq_printf(s, "%d ?\n", entry->type);
3100 
3101 	return trace_handle_return(s);
3102 }
3103 
3104 static enum print_line_t print_hex_fmt(struct trace_iterator *iter)
3105 {
3106 	struct trace_array *tr = iter->tr;
3107 	struct trace_seq *s = &iter->seq;
3108 	unsigned char newline = '\n';
3109 	struct trace_entry *entry;
3110 	struct trace_event *event;
3111 
3112 	entry = iter->ent;
3113 
3114 	if (tr->trace_flags & TRACE_ITER(CONTEXT_INFO)) {
3115 		SEQ_PUT_HEX_FIELD(s, entry->pid);
3116 		SEQ_PUT_HEX_FIELD(s, iter->cpu);
3117 		SEQ_PUT_HEX_FIELD(s, iter->ts);
3118 		if (trace_seq_has_overflowed(s))
3119 			return TRACE_TYPE_PARTIAL_LINE;
3120 	}
3121 
3122 	event = ftrace_find_event(entry->type);
3123 	if (event) {
3124 		enum print_line_t ret = event->funcs->hex(iter, 0, event);
3125 		if (ret != TRACE_TYPE_HANDLED)
3126 			return ret;
3127 	}
3128 
3129 	SEQ_PUT_FIELD(s, newline);
3130 
3131 	return trace_handle_return(s);
3132 }
3133 
3134 static enum print_line_t print_bin_fmt(struct trace_iterator *iter)
3135 {
3136 	struct trace_array *tr = iter->tr;
3137 	struct trace_seq *s = &iter->seq;
3138 	struct trace_entry *entry;
3139 	struct trace_event *event;
3140 
3141 	entry = iter->ent;
3142 
3143 	if (tr->trace_flags & TRACE_ITER(CONTEXT_INFO)) {
3144 		SEQ_PUT_FIELD(s, entry->pid);
3145 		SEQ_PUT_FIELD(s, iter->cpu);
3146 		SEQ_PUT_FIELD(s, iter->ts);
3147 		if (trace_seq_has_overflowed(s))
3148 			return TRACE_TYPE_PARTIAL_LINE;
3149 	}
3150 
3151 	event = ftrace_find_event(entry->type);
3152 	return event ? event->funcs->binary(iter, 0, event) :
3153 		TRACE_TYPE_HANDLED;
3154 }
3155 
3156 int trace_empty(struct trace_iterator *iter)
3157 {
3158 	struct ring_buffer_iter *buf_iter;
3159 	int cpu;
3160 
3161 	/* If we are looking at one CPU buffer, only check that one */
3162 	if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
3163 		cpu = iter->cpu_file;
3164 		buf_iter = trace_buffer_iter(iter, cpu);
3165 		if (buf_iter) {
3166 			if (!ring_buffer_iter_empty(buf_iter))
3167 				return 0;
3168 		} else {
3169 			if (!ring_buffer_empty_cpu(iter->array_buffer->buffer, cpu))
3170 				return 0;
3171 		}
3172 		return 1;
3173 	}
3174 
3175 	for_each_tracing_cpu(cpu) {
3176 		buf_iter = trace_buffer_iter(iter, cpu);
3177 		if (buf_iter) {
3178 			if (!ring_buffer_iter_empty(buf_iter))
3179 				return 0;
3180 		} else {
3181 			if (!ring_buffer_empty_cpu(iter->array_buffer->buffer, cpu))
3182 				return 0;
3183 		}
3184 	}
3185 
3186 	return 1;
3187 }
3188 
3189 /*  Called with trace_event_read_lock() held. */
3190 enum print_line_t print_trace_line(struct trace_iterator *iter)
3191 {
3192 	struct trace_array *tr = iter->tr;
3193 	unsigned long trace_flags = tr->trace_flags;
3194 	enum print_line_t ret;
3195 
3196 	if (iter->lost_events) {
3197 		if (iter->lost_events == (unsigned long)-1)
3198 			trace_seq_printf(&iter->seq, "CPU:%d [LOST EVENTS]\n",
3199 					 iter->cpu);
3200 		else
3201 			trace_seq_printf(&iter->seq, "CPU:%d [LOST %lu EVENTS]\n",
3202 					 iter->cpu, iter->lost_events);
3203 		if (trace_seq_has_overflowed(&iter->seq))
3204 			return TRACE_TYPE_PARTIAL_LINE;
3205 	}
3206 
3207 	if (iter->trace && iter->trace->print_line) {
3208 		ret = iter->trace->print_line(iter);
3209 		if (ret != TRACE_TYPE_UNHANDLED)
3210 			return ret;
3211 	}
3212 
3213 	if (iter->ent->type == TRACE_BPUTS &&
3214 			trace_flags & TRACE_ITER(PRINTK) &&
3215 			trace_flags & TRACE_ITER(PRINTK_MSGONLY))
3216 		return trace_print_bputs_msg_only(iter);
3217 
3218 	if (iter->ent->type == TRACE_BPRINT &&
3219 			trace_flags & TRACE_ITER(PRINTK) &&
3220 			trace_flags & TRACE_ITER(PRINTK_MSGONLY))
3221 		return trace_print_bprintk_msg_only(iter);
3222 
3223 	if (iter->ent->type == TRACE_PRINT &&
3224 			trace_flags & TRACE_ITER(PRINTK) &&
3225 			trace_flags & TRACE_ITER(PRINTK_MSGONLY))
3226 		return trace_print_printk_msg_only(iter);
3227 
3228 	if (trace_flags & TRACE_ITER(BIN))
3229 		return print_bin_fmt(iter);
3230 
3231 	if (trace_flags & TRACE_ITER(HEX))
3232 		return print_hex_fmt(iter);
3233 
3234 	if (trace_flags & TRACE_ITER(RAW))
3235 		return print_raw_fmt(iter);
3236 
3237 	return print_trace_fmt(iter);
3238 }
3239 
3240 void trace_latency_header(struct seq_file *m)
3241 {
3242 	struct trace_iterator *iter = m->private;
3243 	struct trace_array *tr = iter->tr;
3244 
3245 	/* print nothing if the buffers are empty */
3246 	if (trace_empty(iter))
3247 		return;
3248 
3249 	if (iter->iter_flags & TRACE_FILE_LAT_FMT)
3250 		print_trace_header(m, iter);
3251 
3252 	if (!(tr->trace_flags & TRACE_ITER(VERBOSE)))
3253 		print_lat_help_header(m);
3254 }
3255 
3256 void trace_default_header(struct seq_file *m)
3257 {
3258 	struct trace_iterator *iter = m->private;
3259 	struct trace_array *tr = iter->tr;
3260 	unsigned long trace_flags = tr->trace_flags;
3261 
3262 	if (!(trace_flags & TRACE_ITER(CONTEXT_INFO)))
3263 		return;
3264 
3265 	if (iter->iter_flags & TRACE_FILE_LAT_FMT) {
3266 		/* print nothing if the buffers are empty */
3267 		if (trace_empty(iter))
3268 			return;
3269 		print_trace_header(m, iter);
3270 		if (!(trace_flags & TRACE_ITER(VERBOSE)))
3271 			print_lat_help_header(m);
3272 	} else {
3273 		if (!(trace_flags & TRACE_ITER(VERBOSE))) {
3274 			if (trace_flags & TRACE_ITER(IRQ_INFO))
3275 				print_func_help_header_irq(iter->array_buffer,
3276 							   m, trace_flags);
3277 			else
3278 				print_func_help_header(iter->array_buffer, m,
3279 						       trace_flags);
3280 		}
3281 	}
3282 }
3283 
3284 static void test_ftrace_alive(struct seq_file *m)
3285 {
3286 	if (!ftrace_is_dead())
3287 		return;
3288 	seq_puts(m, "# WARNING: FUNCTION TRACING IS CORRUPTED\n"
3289 		    "#          MAY BE MISSING FUNCTION EVENTS\n");
3290 }
3291 
3292 static int s_show(struct seq_file *m, void *v)
3293 {
3294 	struct trace_iterator *iter = v;
3295 	int ret;
3296 
3297 	if (iter->ent == NULL) {
3298 		if (iter->tr) {
3299 			seq_printf(m, "# tracer: %s\n", iter->trace->name);
3300 			seq_puts(m, "#\n");
3301 			test_ftrace_alive(m);
3302 		}
3303 		if (iter->snapshot && trace_empty(iter))
3304 			print_snapshot_help(m, iter);
3305 		else if (iter->trace && iter->trace->print_header)
3306 			iter->trace->print_header(m);
3307 		else
3308 			trace_default_header(m);
3309 
3310 	} else if (iter->leftover) {
3311 		/*
3312 		 * If we filled the seq_file buffer earlier, we
3313 		 * want to just show it now.
3314 		 */
3315 		ret = trace_print_seq(m, &iter->seq);
3316 
3317 		/* ret should this time be zero, but you never know */
3318 		iter->leftover = ret;
3319 
3320 	} else {
3321 		ret = print_trace_line(iter);
3322 		if (ret == TRACE_TYPE_PARTIAL_LINE) {
3323 			iter->seq.full = 0;
3324 			trace_seq_puts(&iter->seq, "[LINE TOO BIG]\n");
3325 		}
3326 		ret = trace_print_seq(m, &iter->seq);
3327 		/*
3328 		 * If we overflow the seq_file buffer, then it will
3329 		 * ask us for this data again at start up.
3330 		 * Use that instead.
3331 		 *  ret is 0 if seq_file write succeeded.
3332 		 *        -1 otherwise.
3333 		 */
3334 		iter->leftover = ret;
3335 	}
3336 
3337 	return 0;
3338 }
3339 
3340 static const struct seq_operations tracer_seq_ops = {
3341 	.start		= s_start,
3342 	.next		= s_next,
3343 	.stop		= s_stop,
3344 	.show		= s_show,
3345 };
3346 
3347 /*
3348  * Note, as iter itself can be allocated and freed in different
3349  * ways, this function is only used to free its content, and not
3350  * the iterator itself. The only requirement to all the allocations
3351  * is that it must zero all fields (kzalloc), as freeing works with
3352  * ethier allocated content or NULL.
3353  */
3354 static void free_trace_iter_content(struct trace_iterator *iter)
3355 {
3356 	/* The fmt is either NULL, allocated or points to static_fmt_buf */
3357 	if (iter->fmt != static_fmt_buf)
3358 		kfree(iter->fmt);
3359 
3360 	kfree(iter->temp);
3361 	kfree(iter->buffer_iter);
3362 	mutex_destroy(&iter->mutex);
3363 	free_cpumask_var(iter->started);
3364 }
3365 
3366 struct trace_iterator *
3367 __tracing_open(struct inode *inode, struct file *file, bool snapshot)
3368 {
3369 	struct trace_array *tr = inode->i_private;
3370 	struct trace_iterator *iter;
3371 	int cpu;
3372 
3373 	if (tracing_disabled)
3374 		return ERR_PTR(-ENODEV);
3375 
3376 	iter = __seq_open_private(file, &tracer_seq_ops, sizeof(*iter));
3377 	if (!iter)
3378 		return ERR_PTR(-ENOMEM);
3379 
3380 	iter->buffer_iter = kzalloc_objs(*iter->buffer_iter, nr_cpu_ids);
3381 	if (!iter->buffer_iter)
3382 		goto release;
3383 
3384 	/*
3385 	 * trace_find_next_entry() may need to save off iter->ent.
3386 	 * It will place it into the iter->temp buffer. As most
3387 	 * events are less than 128, allocate a buffer of that size.
3388 	 * If one is greater, then trace_find_next_entry() will
3389 	 * allocate a new buffer to adjust for the bigger iter->ent.
3390 	 * It's not critical if it fails to get allocated here.
3391 	 */
3392 	iter->temp = kmalloc(128, GFP_KERNEL);
3393 	if (iter->temp)
3394 		iter->temp_size = 128;
3395 
3396 	/*
3397 	 * trace_event_printf() may need to modify given format
3398 	 * string to replace %p with %px so that it shows real address
3399 	 * instead of hash value. However, that is only for the event
3400 	 * tracing, other tracer may not need. Defer the allocation
3401 	 * until it is needed.
3402 	 */
3403 	iter->fmt = NULL;
3404 	iter->fmt_size = 0;
3405 
3406 	mutex_lock(&trace_types_lock);
3407 	iter->trace = tr->current_trace;
3408 
3409 	if (!zalloc_cpumask_var(&iter->started, GFP_KERNEL))
3410 		goto fail;
3411 
3412 	iter->tr = tr;
3413 
3414 #ifdef CONFIG_TRACER_SNAPSHOT
3415 	/* Currently only the top directory has a snapshot */
3416 	if (tr->current_trace->print_max || snapshot)
3417 		iter->array_buffer = &tr->snapshot_buffer;
3418 	else
3419 #endif
3420 		iter->array_buffer = &tr->array_buffer;
3421 	iter->snapshot = snapshot;
3422 	iter->pos = -1;
3423 	iter->cpu_file = tracing_get_cpu(inode);
3424 	mutex_init(&iter->mutex);
3425 
3426 	/* Notify the tracer early; before we stop tracing. */
3427 	if (iter->trace->open)
3428 		iter->trace->open(iter);
3429 
3430 	/* Annotate start of buffers if we had overruns */
3431 	if (ring_buffer_overruns(iter->array_buffer->buffer))
3432 		iter->iter_flags |= TRACE_FILE_ANNOTATE;
3433 
3434 	/* Output in nanoseconds only if we are using a clock in nanoseconds. */
3435 	if (trace_clocks[tr->clock_id].in_ns)
3436 		iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
3437 
3438 	/*
3439 	 * If pause-on-trace is enabled, then stop the trace while
3440 	 * dumping, unless this is the "snapshot" file
3441 	 */
3442 	if (!iter->snapshot && (tr->trace_flags & TRACE_ITER(PAUSE_ON_TRACE))) {
3443 		iter->iter_flags |= TRACE_FILE_PAUSE;
3444 		tracing_stop_tr(tr);
3445 	}
3446 
3447 	if (iter->cpu_file == RING_BUFFER_ALL_CPUS) {
3448 		for_each_tracing_cpu(cpu) {
3449 			iter->buffer_iter[cpu] =
3450 				ring_buffer_read_start(iter->array_buffer->buffer,
3451 						       cpu, GFP_KERNEL);
3452 			tracing_iter_reset(iter, cpu);
3453 		}
3454 	} else {
3455 		cpu = iter->cpu_file;
3456 		iter->buffer_iter[cpu] =
3457 			ring_buffer_read_start(iter->array_buffer->buffer,
3458 					       cpu, GFP_KERNEL);
3459 		tracing_iter_reset(iter, cpu);
3460 	}
3461 
3462 	mutex_unlock(&trace_types_lock);
3463 
3464 	return iter;
3465 
3466  fail:
3467 	mutex_unlock(&trace_types_lock);
3468 	free_trace_iter_content(iter);
3469 release:
3470 	seq_release_private(inode, file);
3471 	return ERR_PTR(-ENOMEM);
3472 }
3473 
3474 int tracing_open_generic(struct inode *inode, struct file *filp)
3475 {
3476 	int ret;
3477 
3478 	ret = tracing_check_open_get_tr(NULL);
3479 	if (ret)
3480 		return ret;
3481 
3482 	filp->private_data = inode->i_private;
3483 	return 0;
3484 }
3485 
3486 /*
3487  * Open and update trace_array ref count.
3488  * Must have the current trace_array passed to it.
3489  */
3490 int tracing_open_generic_tr(struct inode *inode, struct file *filp)
3491 {
3492 	struct trace_array *tr = inode->i_private;
3493 	int ret;
3494 
3495 	ret = tracing_check_open_get_tr(tr);
3496 	if (ret)
3497 		return ret;
3498 
3499 	if ((filp->f_mode & FMODE_WRITE) && trace_array_is_readonly(tr)) {
3500 		trace_array_put(tr);
3501 		return -EACCES;
3502 	}
3503 
3504 	filp->private_data = inode->i_private;
3505 
3506 	return 0;
3507 }
3508 
3509 /*
3510  * The private pointer of the inode is the trace_event_file.
3511  * Update the tr ref count associated to it.
3512  */
3513 int tracing_open_file_tr(struct inode *inode, struct file *filp)
3514 {
3515 	struct trace_event_file *file = inode->i_private;
3516 	int ret;
3517 
3518 	ret = tracing_check_open_get_tr(file->tr);
3519 	if (ret)
3520 		return ret;
3521 
3522 	guard(mutex)(&event_mutex);
3523 
3524 	/* Fail if the file is marked for removal */
3525 	if (file->flags & EVENT_FILE_FL_FREED) {
3526 		trace_array_put(file->tr);
3527 		return -ENODEV;
3528 	} else {
3529 		event_file_get(file);
3530 	}
3531 
3532 	return 0;
3533 }
3534 
3535 int tracing_release_file_tr(struct inode *inode, struct file *filp)
3536 {
3537 	struct trace_event_file *file = inode->i_private;
3538 
3539 	trace_array_put(file->tr);
3540 	event_file_put(file);
3541 
3542 	return 0;
3543 }
3544 
3545 int tracing_single_release_file_tr(struct inode *inode, struct file *filp)
3546 {
3547 	tracing_release_file_tr(inode, filp);
3548 	return single_release(inode, filp);
3549 }
3550 
3551 int tracing_release(struct inode *inode, struct file *file)
3552 {
3553 	struct trace_array *tr = inode->i_private;
3554 	struct seq_file *m = file->private_data;
3555 	struct trace_iterator *iter;
3556 	int cpu;
3557 
3558 	if (!(file->f_mode & FMODE_READ)) {
3559 		trace_array_put(tr);
3560 		return 0;
3561 	}
3562 
3563 	/* Writes do not use seq_file */
3564 	iter = m->private;
3565 	mutex_lock(&trace_types_lock);
3566 
3567 	for_each_tracing_cpu(cpu) {
3568 		if (iter->buffer_iter[cpu])
3569 			ring_buffer_read_finish(iter->buffer_iter[cpu]);
3570 	}
3571 
3572 	if (iter->trace && iter->trace->close)
3573 		iter->trace->close(iter);
3574 
3575 	if (iter->iter_flags & TRACE_FILE_PAUSE)
3576 		/* reenable tracing if it was previously enabled */
3577 		tracing_start_tr(tr);
3578 
3579 	__trace_array_put(tr);
3580 
3581 	mutex_unlock(&trace_types_lock);
3582 
3583 	free_trace_iter_content(iter);
3584 	seq_release_private(inode, file);
3585 
3586 	return 0;
3587 }
3588 
3589 int tracing_release_generic_tr(struct inode *inode, struct file *file)
3590 {
3591 	struct trace_array *tr = inode->i_private;
3592 
3593 	trace_array_put(tr);
3594 	return 0;
3595 }
3596 
3597 static int tracing_single_release_tr(struct inode *inode, struct file *file)
3598 {
3599 	struct trace_array *tr = inode->i_private;
3600 
3601 	trace_array_put(tr);
3602 
3603 	return single_release(inode, file);
3604 }
3605 
3606 static bool update_last_data_if_empty(struct trace_array *tr);
3607 
3608 static int tracing_open(struct inode *inode, struct file *file)
3609 {
3610 	struct trace_array *tr = inode->i_private;
3611 	struct trace_iterator *iter;
3612 	int ret;
3613 
3614 	ret = tracing_check_open_get_tr(tr);
3615 	if (ret)
3616 		return ret;
3617 
3618 	/* If this file was open for write, then erase contents */
3619 	if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC)) {
3620 		int cpu = tracing_get_cpu(inode);
3621 		struct array_buffer *trace_buf = &tr->array_buffer;
3622 
3623 #ifdef CONFIG_TRACER_MAX_TRACE
3624 		if (tr->current_trace->print_max)
3625 			trace_buf = &tr->snapshot_buffer;
3626 #endif
3627 
3628 		if (cpu == RING_BUFFER_ALL_CPUS)
3629 			tracing_reset_online_cpus(trace_buf);
3630 		else
3631 			tracing_reset_cpu(trace_buf, cpu);
3632 
3633 		update_last_data_if_empty(tr);
3634 	}
3635 
3636 	if (file->f_mode & FMODE_READ) {
3637 		iter = __tracing_open(inode, file, false);
3638 		if (IS_ERR(iter))
3639 			ret = PTR_ERR(iter);
3640 		else if (tr->trace_flags & TRACE_ITER(LATENCY_FMT))
3641 			iter->iter_flags |= TRACE_FILE_LAT_FMT;
3642 	}
3643 
3644 	if (ret < 0)
3645 		trace_array_put(tr);
3646 
3647 	return ret;
3648 }
3649 
3650 /*
3651  * Some tracers are not suitable for instance buffers.
3652  * A tracer is always available for the global array (toplevel)
3653  * or if it explicitly states that it is.
3654  */
3655 static bool
3656 trace_ok_for_array(struct tracer *t, struct trace_array *tr)
3657 {
3658 	/* arrays with mapped buffer range do not have snapshots */
3659 	if (tr->range_addr_start && tracer_uses_snapshot(t))
3660 		return false;
3661 	return (tr->flags & TRACE_ARRAY_FL_GLOBAL) || t->allow_instances;
3662 }
3663 
3664 /* Find the next tracer that this trace array may use */
3665 static struct tracer *
3666 get_tracer_for_array(struct trace_array *tr, struct tracer *t)
3667 {
3668 	while (t && !trace_ok_for_array(t, tr))
3669 		t = t->next;
3670 
3671 	return t;
3672 }
3673 
3674 static void *
3675 t_next(struct seq_file *m, void *v, loff_t *pos)
3676 {
3677 	struct trace_array *tr = m->private;
3678 	struct tracer *t = v;
3679 
3680 	(*pos)++;
3681 
3682 	if (t)
3683 		t = get_tracer_for_array(tr, t->next);
3684 
3685 	return t;
3686 }
3687 
3688 static void *t_start(struct seq_file *m, loff_t *pos)
3689 {
3690 	struct trace_array *tr = m->private;
3691 	struct tracer *t;
3692 	loff_t l = 0;
3693 
3694 	mutex_lock(&trace_types_lock);
3695 
3696 	t = get_tracer_for_array(tr, trace_types);
3697 	for (; t && l < *pos; t = t_next(m, t, &l))
3698 			;
3699 
3700 	return t;
3701 }
3702 
3703 static void t_stop(struct seq_file *m, void *p)
3704 {
3705 	mutex_unlock(&trace_types_lock);
3706 }
3707 
3708 static int t_show(struct seq_file *m, void *v)
3709 {
3710 	struct tracer *t = v;
3711 
3712 	if (!t)
3713 		return 0;
3714 
3715 	seq_puts(m, t->name);
3716 	if (t->next)
3717 		seq_putc(m, ' ');
3718 	else
3719 		seq_putc(m, '\n');
3720 
3721 	return 0;
3722 }
3723 
3724 static const struct seq_operations show_traces_seq_ops = {
3725 	.start		= t_start,
3726 	.next		= t_next,
3727 	.stop		= t_stop,
3728 	.show		= t_show,
3729 };
3730 
3731 static int show_traces_open(struct inode *inode, struct file *file)
3732 {
3733 	struct trace_array *tr = inode->i_private;
3734 	struct seq_file *m;
3735 	int ret;
3736 
3737 	ret = tracing_check_open_get_tr(tr);
3738 	if (ret)
3739 		return ret;
3740 
3741 	ret = seq_open(file, &show_traces_seq_ops);
3742 	if (ret) {
3743 		trace_array_put(tr);
3744 		return ret;
3745 	}
3746 
3747 	m = file->private_data;
3748 	m->private = tr;
3749 
3750 	return 0;
3751 }
3752 
3753 static int tracing_seq_release(struct inode *inode, struct file *file)
3754 {
3755 	struct trace_array *tr = inode->i_private;
3756 
3757 	trace_array_put(tr);
3758 	return seq_release(inode, file);
3759 }
3760 
3761 static ssize_t
3762 tracing_write_stub(struct file *filp, const char __user *ubuf,
3763 		   size_t count, loff_t *ppos)
3764 {
3765 	return count;
3766 }
3767 
3768 loff_t tracing_lseek(struct file *file, loff_t offset, int whence)
3769 {
3770 	int ret;
3771 
3772 	if (file->f_mode & FMODE_READ)
3773 		ret = seq_lseek(file, offset, whence);
3774 	else
3775 		file->f_pos = ret = 0;
3776 
3777 	return ret;
3778 }
3779 
3780 static const struct file_operations tracing_fops = {
3781 	.open		= tracing_open,
3782 	.read		= seq_read,
3783 	.read_iter	= seq_read_iter,
3784 	.splice_read	= copy_splice_read,
3785 	.write		= tracing_write_stub,
3786 	.llseek		= tracing_lseek,
3787 	.release	= tracing_release,
3788 };
3789 
3790 static const struct file_operations show_traces_fops = {
3791 	.open		= show_traces_open,
3792 	.read		= seq_read,
3793 	.llseek		= seq_lseek,
3794 	.release	= tracing_seq_release,
3795 };
3796 
3797 static ssize_t
3798 tracing_cpumask_read(struct file *filp, char __user *ubuf,
3799 		     size_t count, loff_t *ppos)
3800 {
3801 	struct trace_array *tr = file_inode(filp)->i_private;
3802 	char *mask_str __free(kfree) = NULL;
3803 	int len;
3804 
3805 	len = snprintf(NULL, 0, "%*pb\n",
3806 		       cpumask_pr_args(tr->tracing_cpumask)) + 1;
3807 	mask_str = kmalloc(len, GFP_KERNEL);
3808 	if (!mask_str)
3809 		return -ENOMEM;
3810 
3811 	len = snprintf(mask_str, len, "%*pb\n",
3812 		       cpumask_pr_args(tr->tracing_cpumask));
3813 	if (len >= count)
3814 		return -EINVAL;
3815 
3816 	return simple_read_from_buffer(ubuf, count, ppos, mask_str, len);
3817 }
3818 
3819 int tracing_set_cpumask(struct trace_array *tr,
3820 			cpumask_var_t tracing_cpumask_new)
3821 {
3822 	int cpu;
3823 
3824 	if (!tr)
3825 		return -EINVAL;
3826 
3827 	local_irq_disable();
3828 	arch_spin_lock(&tr->max_lock);
3829 	for_each_tracing_cpu(cpu) {
3830 		/*
3831 		 * Increase/decrease the disabled counter if we are
3832 		 * about to flip a bit in the cpumask:
3833 		 */
3834 		if (cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
3835 				!cpumask_test_cpu(cpu, tracing_cpumask_new)) {
3836 			ring_buffer_record_disable_cpu(tr->array_buffer.buffer, cpu);
3837 #ifdef CONFIG_TRACER_SNAPSHOT
3838 			ring_buffer_record_disable_cpu(tr->snapshot_buffer.buffer, cpu);
3839 #endif
3840 		}
3841 		if (!cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
3842 				cpumask_test_cpu(cpu, tracing_cpumask_new)) {
3843 			ring_buffer_record_enable_cpu(tr->array_buffer.buffer, cpu);
3844 #ifdef CONFIG_TRACER_SNAPSHOT
3845 			ring_buffer_record_enable_cpu(tr->snapshot_buffer.buffer, cpu);
3846 #endif
3847 		}
3848 	}
3849 	arch_spin_unlock(&tr->max_lock);
3850 	local_irq_enable();
3851 
3852 	cpumask_copy(tr->tracing_cpumask, tracing_cpumask_new);
3853 
3854 	return 0;
3855 }
3856 
3857 static ssize_t
3858 tracing_cpumask_write(struct file *filp, const char __user *ubuf,
3859 		      size_t count, loff_t *ppos)
3860 {
3861 	struct trace_array *tr = file_inode(filp)->i_private;
3862 	cpumask_var_t tracing_cpumask_new;
3863 	int err;
3864 
3865 	if (count == 0 || count > KMALLOC_MAX_SIZE)
3866 		return -EINVAL;
3867 
3868 	if (!zalloc_cpumask_var(&tracing_cpumask_new, GFP_KERNEL))
3869 		return -ENOMEM;
3870 
3871 	err = cpumask_parse_user(ubuf, count, tracing_cpumask_new);
3872 	if (err)
3873 		goto err_free;
3874 
3875 	err = tracing_set_cpumask(tr, tracing_cpumask_new);
3876 	if (err)
3877 		goto err_free;
3878 
3879 	free_cpumask_var(tracing_cpumask_new);
3880 
3881 	return count;
3882 
3883 err_free:
3884 	free_cpumask_var(tracing_cpumask_new);
3885 
3886 	return err;
3887 }
3888 
3889 static const struct file_operations tracing_cpumask_fops = {
3890 	.open		= tracing_open_generic_tr,
3891 	.read		= tracing_cpumask_read,
3892 	.write		= tracing_cpumask_write,
3893 	.release	= tracing_release_generic_tr,
3894 	.llseek		= generic_file_llseek,
3895 };
3896 
3897 static int tracing_trace_options_show(struct seq_file *m, void *v)
3898 {
3899 	struct tracer_opt *trace_opts;
3900 	struct trace_array *tr = m->private;
3901 	struct tracer_flags *flags;
3902 	u32 tracer_flags;
3903 	int i;
3904 
3905 	guard(mutex)(&trace_types_lock);
3906 
3907 	for (i = 0; trace_options[i]; i++) {
3908 		if (tr->trace_flags & (1ULL << i))
3909 			seq_printf(m, "%s\n", trace_options[i]);
3910 		else
3911 			seq_printf(m, "no%s\n", trace_options[i]);
3912 	}
3913 
3914 	flags = tr->current_trace_flags;
3915 	if (!flags || !flags->opts)
3916 		return 0;
3917 
3918 	tracer_flags = flags->val;
3919 	trace_opts = flags->opts;
3920 
3921 	for (i = 0; trace_opts[i].name; i++) {
3922 		if (tracer_flags & trace_opts[i].bit)
3923 			seq_printf(m, "%s\n", trace_opts[i].name);
3924 		else
3925 			seq_printf(m, "no%s\n", trace_opts[i].name);
3926 	}
3927 
3928 	return 0;
3929 }
3930 
3931 static int __set_tracer_option(struct trace_array *tr,
3932 			       struct tracer_flags *tracer_flags,
3933 			       struct tracer_opt *opts, int neg)
3934 {
3935 	struct tracer *trace = tracer_flags->trace;
3936 	int ret = 0;
3937 
3938 	if (trace->set_flag)
3939 		ret = trace->set_flag(tr, tracer_flags->val, opts->bit, !neg);
3940 	if (ret)
3941 		return ret;
3942 
3943 	if (neg)
3944 		tracer_flags->val &= ~opts->bit;
3945 	else
3946 		tracer_flags->val |= opts->bit;
3947 	return 0;
3948 }
3949 
3950 /* Try to assign a tracer specific option */
3951 static int set_tracer_option(struct trace_array *tr, char *cmp, int neg)
3952 {
3953 	struct tracer_flags *tracer_flags = tr->current_trace_flags;
3954 	struct tracer_opt *opts = NULL;
3955 	int i;
3956 
3957 	if (!tracer_flags || !tracer_flags->opts)
3958 		return 0;
3959 
3960 	for (i = 0; tracer_flags->opts[i].name; i++) {
3961 		opts = &tracer_flags->opts[i];
3962 
3963 		if (strcmp(cmp, opts->name) == 0)
3964 			return __set_tracer_option(tr, tracer_flags, opts, neg);
3965 	}
3966 
3967 	return -EINVAL;
3968 }
3969 
3970 /* Some tracers require overwrite to stay enabled */
3971 int trace_keep_overwrite(struct tracer *tracer, u64 mask, int set)
3972 {
3973 	if (tracer->enabled && (mask & TRACE_ITER(OVERWRITE)) && !set)
3974 		return -1;
3975 
3976 	return 0;
3977 }
3978 
3979 int set_tracer_flag(struct trace_array *tr, u64 mask, int enabled)
3980 {
3981 	switch (mask) {
3982 	case TRACE_ITER(RECORD_TGID):
3983 	case TRACE_ITER(RECORD_CMD):
3984 	case TRACE_ITER(TRACE_PRINTK):
3985 	case TRACE_ITER(COPY_MARKER):
3986 		lockdep_assert_held(&event_mutex);
3987 	}
3988 
3989 	/* do nothing if flag is already set */
3990 	if (!!(tr->trace_flags & mask) == !!enabled)
3991 		return 0;
3992 
3993 	/* Give the tracer a chance to approve the change */
3994 	if (tr->current_trace->flag_changed)
3995 		if (tr->current_trace->flag_changed(tr, mask, !!enabled))
3996 			return -EINVAL;
3997 
3998 	switch (mask) {
3999 	case TRACE_ITER(TRACE_PRINTK):
4000 		if (enabled) {
4001 			update_printk_trace(tr);
4002 		} else {
4003 			/*
4004 			 * The global_trace cannot clear this.
4005 			 * It's flag only gets cleared if another instance sets it.
4006 			 */
4007 			if (printk_trace == &global_trace)
4008 				return -EINVAL;
4009 			/*
4010 			 * An instance must always have it set.
4011 			 * by default, that's the global_trace instance.
4012 			 */
4013 			if (printk_trace == tr)
4014 				update_printk_trace(&global_trace);
4015 		}
4016 		break;
4017 
4018 	case TRACE_ITER(COPY_MARKER):
4019 		update_marker_trace(tr, enabled);
4020 		/* update_marker_trace updates the tr->trace_flags */
4021 		return 0;
4022 	}
4023 
4024 	if (enabled)
4025 		tr->trace_flags |= mask;
4026 	else
4027 		tr->trace_flags &= ~mask;
4028 
4029 	switch (mask) {
4030 	case TRACE_ITER(RECORD_CMD):
4031 		trace_event_enable_cmd_record(enabled);
4032 		break;
4033 
4034 	case TRACE_ITER(RECORD_TGID):
4035 
4036 		if (trace_alloc_tgid_map() < 0) {
4037 			tr->trace_flags &= ~TRACE_ITER(RECORD_TGID);
4038 			return -ENOMEM;
4039 		}
4040 
4041 		trace_event_enable_tgid_record(enabled);
4042 		break;
4043 
4044 	case TRACE_ITER(EVENT_FORK):
4045 		trace_event_follow_fork(tr, enabled);
4046 		break;
4047 
4048 	case TRACE_ITER(FUNC_FORK):
4049 		ftrace_pid_follow_fork(tr, enabled);
4050 		break;
4051 
4052 	case TRACE_ITER(OVERWRITE):
4053 		ring_buffer_change_overwrite(tr->array_buffer.buffer, enabled);
4054 #ifdef CONFIG_TRACER_SNAPSHOT
4055 		ring_buffer_change_overwrite(tr->snapshot_buffer.buffer, enabled);
4056 #endif
4057 		break;
4058 
4059 	case TRACE_ITER(PRINTK):
4060 		trace_printk_start_stop_comm(enabled);
4061 		trace_printk_control(enabled);
4062 		break;
4063 
4064 #if defined(CONFIG_FUNCTION_PROFILER) && defined(CONFIG_FUNCTION_GRAPH_TRACER)
4065 	case TRACE_GRAPH_GRAPH_TIME:
4066 		ftrace_graph_graph_time_control(enabled);
4067 		break;
4068 #endif
4069 	}
4070 
4071 	return 0;
4072 }
4073 
4074 int trace_set_options(struct trace_array *tr, char *option)
4075 {
4076 	char *cmp;
4077 	int neg = 0;
4078 	int ret;
4079 	size_t orig_len = strlen(option);
4080 	int len;
4081 
4082 	cmp = strstrip(option);
4083 
4084 	len = str_has_prefix(cmp, "no");
4085 	if (len)
4086 		neg = 1;
4087 
4088 	cmp += len;
4089 
4090 	mutex_lock(&event_mutex);
4091 	mutex_lock(&trace_types_lock);
4092 
4093 	ret = match_string(trace_options, -1, cmp);
4094 	/* If no option could be set, test the specific tracer options */
4095 	if (ret < 0)
4096 		ret = set_tracer_option(tr, cmp, neg);
4097 	else
4098 		ret = set_tracer_flag(tr, 1ULL << ret, !neg);
4099 
4100 	mutex_unlock(&trace_types_lock);
4101 	mutex_unlock(&event_mutex);
4102 
4103 	/*
4104 	 * If the first trailing whitespace is replaced with '\0' by strstrip,
4105 	 * turn it back into a space.
4106 	 */
4107 	if (orig_len > strlen(option))
4108 		option[strlen(option)] = ' ';
4109 
4110 	return ret;
4111 }
4112 
4113 static void __init apply_trace_boot_options(void)
4114 {
4115 	char *buf = trace_boot_options_buf;
4116 	char *option;
4117 
4118 	while (true) {
4119 		option = strsep(&buf, ",");
4120 
4121 		if (!option)
4122 			break;
4123 
4124 		if (*option)
4125 			trace_set_options(&global_trace, option);
4126 
4127 		/* Put back the comma to allow this to be called again */
4128 		if (buf)
4129 			*(buf - 1) = ',';
4130 	}
4131 }
4132 
4133 static ssize_t
4134 tracing_trace_options_write(struct file *filp, const char __user *ubuf,
4135 			size_t cnt, loff_t *ppos)
4136 {
4137 	struct seq_file *m = filp->private_data;
4138 	struct trace_array *tr = m->private;
4139 	char buf[64];
4140 	int ret;
4141 
4142 	if (cnt >= sizeof(buf))
4143 		return -EINVAL;
4144 
4145 	if (copy_from_user(buf, ubuf, cnt))
4146 		return -EFAULT;
4147 
4148 	buf[cnt] = 0;
4149 
4150 	ret = trace_set_options(tr, buf);
4151 	if (ret < 0)
4152 		return ret;
4153 
4154 	*ppos += cnt;
4155 
4156 	return cnt;
4157 }
4158 
4159 static int tracing_trace_options_open(struct inode *inode, struct file *file)
4160 {
4161 	struct trace_array *tr = inode->i_private;
4162 	int ret;
4163 
4164 	ret = tracing_check_open_get_tr(tr);
4165 	if (ret)
4166 		return ret;
4167 
4168 	ret = single_open(file, tracing_trace_options_show, inode->i_private);
4169 	if (ret < 0)
4170 		trace_array_put(tr);
4171 
4172 	return ret;
4173 }
4174 
4175 static const struct file_operations tracing_iter_fops = {
4176 	.open		= tracing_trace_options_open,
4177 	.read		= seq_read,
4178 	.llseek		= seq_lseek,
4179 	.release	= tracing_single_release_tr,
4180 	.write		= tracing_trace_options_write,
4181 };
4182 
4183 static const char readme_msg[] =
4184 	"tracing mini-HOWTO:\n\n"
4185 	"By default tracefs removes all OTH file permission bits.\n"
4186 	"When mounting tracefs an optional group id can be specified\n"
4187 	"which adds the group to every directory and file in tracefs:\n\n"
4188 	"\t e.g. mount -t tracefs [-o [gid=<gid>]] nodev /sys/kernel/tracing\n\n"
4189 	"# echo 0 > tracing_on : quick way to disable tracing\n"
4190 	"# echo 1 > tracing_on : quick way to re-enable tracing\n\n"
4191 	" Important files:\n"
4192 	"  trace\t\t\t- The static contents of the buffer\n"
4193 	"\t\t\t  To clear the buffer write into this file: echo > trace\n"
4194 	"  trace_pipe\t\t- A consuming read to see the contents of the buffer\n"
4195 	"  current_tracer\t- function and latency tracers\n"
4196 	"  available_tracers\t- list of configured tracers for current_tracer\n"
4197 	"  error_log\t- error log for failed commands (that support it)\n"
4198 	"  buffer_size_kb\t- view and modify size of per cpu buffer\n"
4199 	"  buffer_total_size_kb  - view total size of all cpu buffers\n\n"
4200 	"  trace_clock\t\t- change the clock used to order events\n"
4201 	"       local:   Per cpu clock but may not be synced across CPUs\n"
4202 	"      global:   Synced across CPUs but slows tracing down.\n"
4203 	"     counter:   Not a clock, but just an increment\n"
4204 	"      uptime:   Jiffy counter from time of boot\n"
4205 	"        perf:   Same clock that perf events use\n"
4206 #ifdef CONFIG_X86_64
4207 	"     x86-tsc:   TSC cycle counter\n"
4208 #endif
4209 	"\n  timestamp_mode\t- view the mode used to timestamp events\n"
4210 	"       delta:   Delta difference against a buffer-wide timestamp\n"
4211 	"    absolute:   Absolute (standalone) timestamp\n"
4212 	"\n  trace_marker\t\t- Writes into this file writes into the kernel buffer\n"
4213 	"\n  trace_marker_raw\t\t- Writes into this file writes binary data into the kernel buffer\n"
4214 	"  tracing_cpumask\t- Limit which CPUs to trace\n"
4215 	"  instances\t\t- Make sub-buffers with: mkdir instances/foo\n"
4216 	"\t\t\t  Remove sub-buffer with rmdir\n"
4217 	"  trace_options\t\t- Set format or modify how tracing happens\n"
4218 	"\t\t\t  Disable an option by prefixing 'no' to the\n"
4219 	"\t\t\t  option name\n"
4220 	"  saved_cmdlines_size\t- echo command number in here to store comm-pid list\n"
4221 #ifdef CONFIG_DYNAMIC_FTRACE
4222 	"\n  available_filter_functions - list of functions that can be filtered on\n"
4223 	"  set_ftrace_filter\t- echo function name in here to only trace these\n"
4224 	"\t\t\t  functions\n"
4225 	"\t     accepts: func_full_name or glob-matching-pattern\n"
4226 	"\t     modules: Can select a group via module\n"
4227 	"\t      Format: :mod:<module-name>\n"
4228 	"\t     example: echo :mod:ext3 > set_ftrace_filter\n"
4229 	"\t    triggers: a command to perform when function is hit\n"
4230 	"\t      Format: <function>:<trigger>[:count]\n"
4231 	"\t     trigger: traceon, traceoff\n"
4232 	"\t\t      enable_event:<system>:<event>\n"
4233 	"\t\t      disable_event:<system>:<event>\n"
4234 #ifdef CONFIG_STACKTRACE
4235 	"\t\t      stacktrace\n"
4236 #endif
4237 #ifdef CONFIG_TRACER_SNAPSHOT
4238 	"\t\t      snapshot\n"
4239 #endif
4240 	"\t\t      dump\n"
4241 	"\t\t      cpudump\n"
4242 	"\t     example: echo do_fault:traceoff > set_ftrace_filter\n"
4243 	"\t              echo do_trap:traceoff:3 > set_ftrace_filter\n"
4244 	"\t     The first one will disable tracing every time do_fault is hit\n"
4245 	"\t     The second will disable tracing at most 3 times when do_trap is hit\n"
4246 	"\t       The first time do trap is hit and it disables tracing, the\n"
4247 	"\t       counter will decrement to 2. If tracing is already disabled,\n"
4248 	"\t       the counter will not decrement. It only decrements when the\n"
4249 	"\t       trigger did work\n"
4250 	"\t     To remove trigger without count:\n"
4251 	"\t       echo '!<function>:<trigger> > set_ftrace_filter\n"
4252 	"\t     To remove trigger with a count:\n"
4253 	"\t       echo '!<function>:<trigger>:0 > set_ftrace_filter\n"
4254 	"  set_ftrace_notrace\t- echo function name in here to never trace.\n"
4255 	"\t    accepts: func_full_name, *func_end, func_begin*, *func_middle*\n"
4256 	"\t    modules: Can select a group via module command :mod:\n"
4257 	"\t    Does not accept triggers\n"
4258 #endif /* CONFIG_DYNAMIC_FTRACE */
4259 #ifdef CONFIG_FUNCTION_TRACER
4260 	"  set_ftrace_pid\t- Write pid(s) to only function trace those pids\n"
4261 	"\t\t    (function)\n"
4262 	"  set_ftrace_notrace_pid\t- Write pid(s) to not function trace those pids\n"
4263 	"\t\t    (function)\n"
4264 #endif
4265 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
4266 	"  set_graph_function\t- Trace the nested calls of a function (function_graph)\n"
4267 	"  set_graph_notrace\t- Do not trace the nested calls of a function (function_graph)\n"
4268 	"  max_graph_depth\t- Trace a limited depth of nested calls (0 is unlimited)\n"
4269 #endif
4270 #ifdef CONFIG_TRACER_SNAPSHOT
4271 	"\n  snapshot\t\t- Like 'trace' but shows the content of the static\n"
4272 	"\t\t\t  snapshot buffer. Read the contents for more\n"
4273 	"\t\t\t  information\n"
4274 #endif
4275 #ifdef CONFIG_STACK_TRACER
4276 	"  stack_trace\t\t- Shows the max stack trace when active\n"
4277 	"  stack_max_size\t- Shows current max stack size that was traced\n"
4278 	"\t\t\t  Write into this file to reset the max size (trigger a\n"
4279 	"\t\t\t  new trace)\n"
4280 #ifdef CONFIG_DYNAMIC_FTRACE
4281 	"  stack_trace_filter\t- Like set_ftrace_filter but limits what stack_trace\n"
4282 	"\t\t\t  traces\n"
4283 #endif
4284 #endif /* CONFIG_STACK_TRACER */
4285 #ifdef CONFIG_DYNAMIC_EVENTS
4286 	"  dynamic_events\t\t- Create/append/remove/show the generic dynamic events\n"
4287 	"\t\t\t  Write into this file to define/undefine new trace events.\n"
4288 #endif
4289 #ifdef CONFIG_KPROBE_EVENTS
4290 	"  kprobe_events\t\t- Create/append/remove/show the kernel dynamic events\n"
4291 	"\t\t\t  Write into this file to define/undefine new trace events.\n"
4292 #endif
4293 #ifdef CONFIG_UPROBE_EVENTS
4294 	"  uprobe_events\t\t- Create/append/remove/show the userspace dynamic events\n"
4295 	"\t\t\t  Write into this file to define/undefine new trace events.\n"
4296 #endif
4297 #if defined(CONFIG_KPROBE_EVENTS) || defined(CONFIG_UPROBE_EVENTS) || \
4298     defined(CONFIG_FPROBE_EVENTS)
4299 	"\t  accepts: event-definitions (one definition per line)\n"
4300 #if defined(CONFIG_KPROBE_EVENTS) || defined(CONFIG_UPROBE_EVENTS)
4301 	"\t   Format: p[:[<group>/][<event>]] <place> [<args>]\n"
4302 	"\t           r[maxactive][:[<group>/][<event>]] <place> [<args>]\n"
4303 #endif
4304 #ifdef CONFIG_FPROBE_EVENTS
4305 	"\t           f[:[<group>/][<event>]] <func-name>[%return] [<args>]\n"
4306 	"\t           t[:[<group>/][<event>]] <tracepoint> [<args>]\n"
4307 #endif
4308 #ifdef CONFIG_HIST_TRIGGERS
4309 	"\t           s:[synthetic/]<event> <field> [<field>]\n"
4310 #endif
4311 	"\t           e[:[<group>/][<event>]] <attached-group>.<attached-event> [<args>] [if <filter>]\n"
4312 	"\t           -:[<group>/][<event>]\n"
4313 #ifdef CONFIG_KPROBE_EVENTS
4314 	"\t    place: [<module>:]<symbol>[+<offset>]|<memaddr>\n"
4315   "place (kretprobe): [<module>:]<symbol>[+<offset>]%return|<memaddr>\n"
4316 #endif
4317 #ifdef CONFIG_UPROBE_EVENTS
4318   "   place (uprobe): <path>:<offset>[%return][(ref_ctr_offset)]\n"
4319 #endif
4320 	"\t     args: <name>=fetcharg[:type]\n"
4321 	"\t fetcharg: (%<register>|$<efield>), @<address>, @<symbol>[+|-<offset>],\n"
4322 #ifdef CONFIG_HAVE_FUNCTION_ARG_ACCESS_API
4323 	"\t           $stack<index>, $stack, $retval, $comm, $arg<N>,\n"
4324 #ifdef CONFIG_PROBE_EVENTS_BTF_ARGS
4325 	"\t           <argname>[->field[->field|.field...]],\n"
4326 #endif
4327 #else
4328 	"\t           $stack<index>, $stack, $retval, $comm,\n"
4329 #endif
4330 	"\t           +|-[u]<offset>(<fetcharg>), \\imm-value, \\\"imm-string\"\n"
4331 	"\t     kernel return probes support: $retval, $arg<N>, $comm\n"
4332 	"\t     type: s8/16/32/64, u8/16/32/64, x8/16/32/64, char, string, symbol,\n"
4333 	"\t           b<bit-width>@<bit-offset>/<container-size>, ustring,\n"
4334 	"\t           symstr, %pd/%pD, <type>\\[<array-size>\\]\n"
4335 #ifdef CONFIG_HIST_TRIGGERS
4336 	"\t    field: <stype> <name>;\n"
4337 	"\t    stype: u8/u16/u32/u64, s8/s16/s32/s64, pid_t,\n"
4338 	"\t           [unsigned] char/int/long\n"
4339 #endif
4340 	"\t    efield: For event probes ('e' types), the field is on of the fields\n"
4341 	"\t            of the <attached-group>/<attached-event>.\n"
4342 #endif
4343 	"  set_event\t\t- Enables events by name written into it\n"
4344 	"\t\t\t  Can enable module events via: :mod:<module>\n"
4345 	"  events/\t\t- Directory containing all trace event subsystems:\n"
4346 	"      enable\t\t- Write 0/1 to enable/disable tracing of all events\n"
4347 	"  events/<system>/\t- Directory containing all trace events for <system>:\n"
4348 	"      enable\t\t- Write 0/1 to enable/disable tracing of all <system>\n"
4349 	"\t\t\t  events\n"
4350 	"      filter\t\t- If set, only events passing filter are traced\n"
4351 	"  events/<system>/<event>/\t- Directory containing control files for\n"
4352 	"\t\t\t  <event>:\n"
4353 	"      enable\t\t- Write 0/1 to enable/disable tracing of <event>\n"
4354 	"      filter\t\t- If set, only events passing filter are traced\n"
4355 	"      trigger\t\t- If set, a command to perform when event is hit\n"
4356 	"\t    Format: <trigger>[:count][if <filter>]\n"
4357 	"\t   trigger: traceon, traceoff\n"
4358 	"\t            enable_event:<system>:<event>\n"
4359 	"\t            disable_event:<system>:<event>\n"
4360 #ifdef CONFIG_HIST_TRIGGERS
4361 	"\t            enable_hist:<system>:<event>\n"
4362 	"\t            disable_hist:<system>:<event>\n"
4363 #endif
4364 #ifdef CONFIG_STACKTRACE
4365 	"\t\t    stacktrace\n"
4366 #endif
4367 #ifdef CONFIG_TRACER_SNAPSHOT
4368 	"\t\t    snapshot\n"
4369 #endif
4370 #ifdef CONFIG_HIST_TRIGGERS
4371 	"\t\t    hist (see below)\n"
4372 #endif
4373 	"\t   example: echo traceoff > events/block/block_unplug/trigger\n"
4374 	"\t            echo traceoff:3 > events/block/block_unplug/trigger\n"
4375 	"\t            echo 'enable_event:kmem:kmalloc:3 if nr_rq > 1' > \\\n"
4376 	"\t                  events/block/block_unplug/trigger\n"
4377 	"\t   The first disables tracing every time block_unplug is hit.\n"
4378 	"\t   The second disables tracing the first 3 times block_unplug is hit.\n"
4379 	"\t   The third enables the kmalloc event the first 3 times block_unplug\n"
4380 	"\t     is hit and has value of greater than 1 for the 'nr_rq' event field.\n"
4381 	"\t   Like function triggers, the counter is only decremented if it\n"
4382 	"\t    enabled or disabled tracing.\n"
4383 	"\t   To remove a trigger without a count:\n"
4384 	"\t     echo '!<trigger> > <system>/<event>/trigger\n"
4385 	"\t   To remove a trigger with a count:\n"
4386 	"\t     echo '!<trigger>:0 > <system>/<event>/trigger\n"
4387 	"\t   Filters can be ignored when removing a trigger.\n"
4388 #ifdef CONFIG_HIST_TRIGGERS
4389 	"      hist trigger\t- If set, event hits are aggregated into a hash table\n"
4390 	"\t    Format: hist:keys=<field1[,field2,...]>\n"
4391 	"\t            [:<var1>=<field|var_ref|numeric_literal>[,<var2>=...]]\n"
4392 	"\t            [:values=<field1[,field2,...]>]\n"
4393 	"\t            [:sort=<field1[,field2,...]>]\n"
4394 	"\t            [:size=#entries]\n"
4395 	"\t            [:pause][:continue][:clear]\n"
4396 	"\t            [:name=histname1]\n"
4397 	"\t            [:nohitcount]\n"
4398 	"\t            [:<handler>.<action>]\n"
4399 	"\t            [if <filter>]\n\n"
4400 	"\t    Note, special fields can be used as well:\n"
4401 	"\t            common_timestamp - to record current timestamp\n"
4402 	"\t            common_cpu - to record the CPU the event happened on\n"
4403 	"\n"
4404 	"\t    A hist trigger variable can be:\n"
4405 	"\t        - a reference to a field e.g. x=current_timestamp,\n"
4406 	"\t        - a reference to another variable e.g. y=$x,\n"
4407 	"\t        - a numeric literal: e.g. ms_per_sec=1000,\n"
4408 	"\t        - an arithmetic expression: e.g. time_secs=current_timestamp/1000\n"
4409 	"\n"
4410 	"\t    hist trigger arithmetic expressions support addition(+), subtraction(-),\n"
4411 	"\t    multiplication(*) and division(/) operators. An operand can be either a\n"
4412 	"\t    variable reference, field or numeric literal.\n"
4413 	"\n"
4414 	"\t    When a matching event is hit, an entry is added to a hash\n"
4415 	"\t    table using the key(s) and value(s) named, and the value of a\n"
4416 	"\t    sum called 'hitcount' is incremented.  Keys and values\n"
4417 	"\t    correspond to fields in the event's format description.  Keys\n"
4418 	"\t    can be any field, or the special string 'common_stacktrace'.\n"
4419 	"\t    Compound keys consisting of up to two fields can be specified\n"
4420 	"\t    by the 'keys' keyword.  Values must correspond to numeric\n"
4421 	"\t    fields.  Sort keys consisting of up to two fields can be\n"
4422 	"\t    specified using the 'sort' keyword.  The sort direction can\n"
4423 	"\t    be modified by appending '.descending' or '.ascending' to a\n"
4424 	"\t    sort field.  The 'size' parameter can be used to specify more\n"
4425 	"\t    or fewer than the default 2048 entries for the hashtable size.\n"
4426 	"\t    If a hist trigger is given a name using the 'name' parameter,\n"
4427 	"\t    its histogram data will be shared with other triggers of the\n"
4428 	"\t    same name, and trigger hits will update this common data.\n\n"
4429 	"\t    Reading the 'hist' file for the event will dump the hash\n"
4430 	"\t    table in its entirety to stdout.  If there are multiple hist\n"
4431 	"\t    triggers attached to an event, there will be a table for each\n"
4432 	"\t    trigger in the output.  The table displayed for a named\n"
4433 	"\t    trigger will be the same as any other instance having the\n"
4434 	"\t    same name.  The default format used to display a given field\n"
4435 	"\t    can be modified by appending any of the following modifiers\n"
4436 	"\t    to the field name, as applicable:\n\n"
4437 	"\t            .hex        display a number as a hex value\n"
4438 	"\t            .sym        display an address as a symbol\n"
4439 	"\t            .sym-offset display an address as a symbol and offset\n"
4440 	"\t            .execname   display a common_pid as a program name\n"
4441 	"\t            .syscall    display a syscall id as a syscall name\n"
4442 	"\t            .log2       display log2 value rather than raw number\n"
4443 	"\t            .buckets=size  display values in groups of size rather than raw number\n"
4444 	"\t            .usecs      display a common_timestamp in microseconds\n"
4445 	"\t            .percent    display a number of percentage value\n"
4446 	"\t            .graph      display a bar-graph of a value\n\n"
4447 	"\t    The 'pause' parameter can be used to pause an existing hist\n"
4448 	"\t    trigger or to start a hist trigger but not log any events\n"
4449 	"\t    until told to do so.  'continue' can be used to start or\n"
4450 	"\t    restart a paused hist trigger.\n\n"
4451 	"\t    The 'clear' parameter will clear the contents of a running\n"
4452 	"\t    hist trigger and leave its current paused/active state\n"
4453 	"\t    unchanged.\n\n"
4454 	"\t    The 'nohitcount' (or NOHC) parameter will suppress display of\n"
4455 	"\t    raw hitcount in the histogram.\n\n"
4456 	"\t    The enable_hist and disable_hist triggers can be used to\n"
4457 	"\t    have one event conditionally start and stop another event's\n"
4458 	"\t    already-attached hist trigger.  The syntax is analogous to\n"
4459 	"\t    the enable_event and disable_event triggers.\n\n"
4460 	"\t    Hist trigger handlers and actions are executed whenever a\n"
4461 	"\t    a histogram entry is added or updated.  They take the form:\n\n"
4462 	"\t        <handler>.<action>\n\n"
4463 	"\t    The available handlers are:\n\n"
4464 	"\t        onmatch(matching.event)  - invoke on addition or update\n"
4465 	"\t        onmax(var)               - invoke if var exceeds current max\n"
4466 	"\t        onchange(var)            - invoke action if var changes\n\n"
4467 	"\t    The available actions are:\n\n"
4468 	"\t        trace(<synthetic_event>,param list)  - generate synthetic event\n"
4469 	"\t        save(field,...)                      - save current event fields\n"
4470 #ifdef CONFIG_TRACER_SNAPSHOT
4471 	"\t        snapshot()                           - snapshot the trace buffer\n\n"
4472 #endif
4473 #ifdef CONFIG_SYNTH_EVENTS
4474 	"  synthetic_events\t- Create/append/remove/show synthetic events\n"
4475 	"\t  Write into this file to define/undefine new synthetic events.\n"
4476 	"\t     example: echo 'myevent u64 lat; char name[]; long[] stack' >> synthetic_events\n"
4477 #endif
4478 #endif
4479 ;
4480 
4481 static ssize_t
4482 tracing_readme_read(struct file *filp, char __user *ubuf,
4483 		       size_t cnt, loff_t *ppos)
4484 {
4485 	return simple_read_from_buffer(ubuf, cnt, ppos,
4486 					readme_msg, strlen(readme_msg));
4487 }
4488 
4489 static const struct file_operations tracing_readme_fops = {
4490 	.open		= tracing_open_generic,
4491 	.read		= tracing_readme_read,
4492 	.llseek		= generic_file_llseek,
4493 };
4494 
4495 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
4496 static union trace_eval_map_item *
4497 update_eval_map(union trace_eval_map_item *ptr)
4498 {
4499 	if (!ptr->map.eval_string) {
4500 		if (ptr->tail.next) {
4501 			ptr = ptr->tail.next;
4502 			/* Set ptr to the next real item (skip head) */
4503 			ptr++;
4504 		} else
4505 			return NULL;
4506 	}
4507 	return ptr;
4508 }
4509 
4510 static void *eval_map_next(struct seq_file *m, void *v, loff_t *pos)
4511 {
4512 	union trace_eval_map_item *ptr = v;
4513 
4514 	/*
4515 	 * Paranoid! If ptr points to end, we don't want to increment past it.
4516 	 * This really should never happen.
4517 	 */
4518 	(*pos)++;
4519 	ptr = update_eval_map(ptr);
4520 	if (WARN_ON_ONCE(!ptr))
4521 		return NULL;
4522 
4523 	ptr++;
4524 	ptr = update_eval_map(ptr);
4525 
4526 	return ptr;
4527 }
4528 
4529 static void *eval_map_start(struct seq_file *m, loff_t *pos)
4530 {
4531 	union trace_eval_map_item *v;
4532 	loff_t l = 0;
4533 
4534 	mutex_lock(&trace_eval_mutex);
4535 
4536 	v = trace_eval_maps;
4537 	if (v)
4538 		v++;
4539 
4540 	while (v && l < *pos) {
4541 		v = eval_map_next(m, v, &l);
4542 	}
4543 
4544 	return v;
4545 }
4546 
4547 static void eval_map_stop(struct seq_file *m, void *v)
4548 {
4549 	mutex_unlock(&trace_eval_mutex);
4550 }
4551 
4552 static int eval_map_show(struct seq_file *m, void *v)
4553 {
4554 	union trace_eval_map_item *ptr = v;
4555 
4556 	seq_printf(m, "%s %ld (%s)\n",
4557 		   ptr->map.eval_string, ptr->map.eval_value,
4558 		   ptr->map.system);
4559 
4560 	return 0;
4561 }
4562 
4563 static const struct seq_operations tracing_eval_map_seq_ops = {
4564 	.start		= eval_map_start,
4565 	.next		= eval_map_next,
4566 	.stop		= eval_map_stop,
4567 	.show		= eval_map_show,
4568 };
4569 
4570 static int tracing_eval_map_open(struct inode *inode, struct file *filp)
4571 {
4572 	int ret;
4573 
4574 	ret = tracing_check_open_get_tr(NULL);
4575 	if (ret)
4576 		return ret;
4577 
4578 	return seq_open(filp, &tracing_eval_map_seq_ops);
4579 }
4580 
4581 static const struct file_operations tracing_eval_map_fops = {
4582 	.open		= tracing_eval_map_open,
4583 	.read		= seq_read,
4584 	.llseek		= seq_lseek,
4585 	.release	= seq_release,
4586 };
4587 
4588 static inline union trace_eval_map_item *
4589 trace_eval_jmp_to_tail(union trace_eval_map_item *ptr)
4590 {
4591 	/* Return tail of array given the head */
4592 	return ptr + ptr->head.length + 1;
4593 }
4594 
4595 static void
4596 trace_insert_eval_map_file(struct module *mod, struct trace_eval_map **start,
4597 			   int len)
4598 {
4599 	struct trace_eval_map **stop;
4600 	struct trace_eval_map **map;
4601 	union trace_eval_map_item *map_array;
4602 	union trace_eval_map_item *ptr;
4603 
4604 	stop = start + len;
4605 
4606 	/*
4607 	 * The trace_eval_maps contains the map plus a head and tail item,
4608 	 * where the head holds the module and length of array, and the
4609 	 * tail holds a pointer to the next list.
4610 	 */
4611 	map_array = kmalloc_objs(*map_array, len + 2);
4612 	if (!map_array) {
4613 		pr_warn("Unable to allocate trace eval mapping\n");
4614 		return;
4615 	}
4616 
4617 	guard(mutex)(&trace_eval_mutex);
4618 
4619 	if (!trace_eval_maps)
4620 		trace_eval_maps = map_array;
4621 	else {
4622 		ptr = trace_eval_maps;
4623 		for (;;) {
4624 			ptr = trace_eval_jmp_to_tail(ptr);
4625 			if (!ptr->tail.next)
4626 				break;
4627 			ptr = ptr->tail.next;
4628 
4629 		}
4630 		ptr->tail.next = map_array;
4631 	}
4632 	map_array->head.mod = mod;
4633 	map_array->head.length = len;
4634 	map_array++;
4635 
4636 	for (map = start; (unsigned long)map < (unsigned long)stop; map++) {
4637 		map_array->map = **map;
4638 		map_array++;
4639 	}
4640 	memset(map_array, 0, sizeof(*map_array));
4641 }
4642 
4643 static void trace_create_eval_file(struct dentry *d_tracer)
4644 {
4645 	trace_create_file("eval_map", TRACE_MODE_READ, d_tracer,
4646 			  NULL, &tracing_eval_map_fops);
4647 }
4648 
4649 #else /* CONFIG_TRACE_EVAL_MAP_FILE */
4650 static inline void trace_create_eval_file(struct dentry *d_tracer) { }
4651 static inline void trace_insert_eval_map_file(struct module *mod,
4652 			      struct trace_eval_map **start, int len) { }
4653 #endif /* !CONFIG_TRACE_EVAL_MAP_FILE */
4654 
4655 static void
4656 trace_event_update_with_eval_map(struct module *mod,
4657 				 struct trace_eval_map **start,
4658 				 int len)
4659 {
4660 	struct trace_eval_map **map;
4661 
4662 	/* Always run sanitizer only if btf_type_tag attr exists. */
4663 	if (len <= 0) {
4664 		if (!(IS_ENABLED(CONFIG_DEBUG_INFO_BTF) &&
4665 		      IS_ENABLED(CONFIG_PAHOLE_HAS_BTF_TAG) &&
4666 		      __has_attribute(btf_type_tag)))
4667 			return;
4668 	}
4669 
4670 	map = start;
4671 
4672 	trace_event_update_all(map, len);
4673 
4674 	if (len <= 0)
4675 		return;
4676 
4677 	trace_insert_eval_map_file(mod, start, len);
4678 }
4679 
4680 static ssize_t
4681 tracing_set_trace_read(struct file *filp, char __user *ubuf,
4682 		       size_t cnt, loff_t *ppos)
4683 {
4684 	struct trace_array *tr = filp->private_data;
4685 	char buf[MAX_TRACER_SIZE+2];
4686 	int r;
4687 
4688 	scoped_guard(mutex, &trace_types_lock) {
4689 		r = sprintf(buf, "%s\n", tr->current_trace->name);
4690 	}
4691 
4692 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
4693 }
4694 
4695 int tracer_init(struct tracer *t, struct trace_array *tr)
4696 {
4697 	tracing_reset_online_cpus(&tr->array_buffer);
4698 	update_last_data_if_empty(tr);
4699 	return t->init(tr);
4700 }
4701 
4702 void trace_set_buffer_entries(struct array_buffer *buf, unsigned long val)
4703 {
4704 	int cpu;
4705 
4706 	for_each_tracing_cpu(cpu)
4707 		per_cpu_ptr(buf->data, cpu)->entries = val;
4708 }
4709 
4710 static void update_buffer_entries(struct array_buffer *buf, int cpu)
4711 {
4712 	if (cpu == RING_BUFFER_ALL_CPUS) {
4713 		trace_set_buffer_entries(buf, ring_buffer_size(buf->buffer, 0));
4714 	} else {
4715 		per_cpu_ptr(buf->data, cpu)->entries = ring_buffer_size(buf->buffer, cpu);
4716 	}
4717 }
4718 
4719 static int __tracing_resize_ring_buffer(struct trace_array *tr,
4720 					unsigned long size, int cpu)
4721 {
4722 	int ret;
4723 
4724 	/*
4725 	 * If kernel or user changes the size of the ring buffer
4726 	 * we use the size that was given, and we can forget about
4727 	 * expanding it later.
4728 	 */
4729 	trace_set_ring_buffer_expanded(tr);
4730 
4731 	/* May be called before buffers are initialized */
4732 	if (!tr->array_buffer.buffer)
4733 		return 0;
4734 
4735 	/* Do not allow tracing while resizing ring buffer */
4736 	tracing_stop_tr(tr);
4737 
4738 	ret = ring_buffer_resize(tr->array_buffer.buffer, size, cpu);
4739 	if (ret < 0)
4740 		goto out_start;
4741 
4742 #ifdef CONFIG_TRACER_SNAPSHOT
4743 	if (!tr->allocated_snapshot)
4744 		goto out;
4745 
4746 	ret = ring_buffer_resize(tr->snapshot_buffer.buffer, size, cpu);
4747 	if (ret < 0) {
4748 		int r = resize_buffer_duplicate_size(&tr->array_buffer,
4749 						     &tr->array_buffer, cpu);
4750 		if (r < 0) {
4751 			/*
4752 			 * AARGH! We are left with different
4753 			 * size max buffer!!!!
4754 			 * The max buffer is our "snapshot" buffer.
4755 			 * When a tracer needs a snapshot (one of the
4756 			 * latency tracers), it swaps the max buffer
4757 			 * with the saved snap shot. We succeeded to
4758 			 * update the size of the main buffer, but failed to
4759 			 * update the size of the max buffer. But when we tried
4760 			 * to reset the main buffer to the original size, we
4761 			 * failed there too. This is very unlikely to
4762 			 * happen, but if it does, warn and kill all
4763 			 * tracing.
4764 			 */
4765 			WARN_ON(1);
4766 			tracing_disabled = 1;
4767 		}
4768 		goto out_start;
4769 	}
4770 
4771 	update_buffer_entries(&tr->snapshot_buffer, cpu);
4772 
4773  out:
4774 #endif /* CONFIG_TRACER_SNAPSHOT */
4775 
4776 	update_buffer_entries(&tr->array_buffer, cpu);
4777  out_start:
4778 	tracing_start_tr(tr);
4779 	return ret;
4780 }
4781 
4782 ssize_t tracing_resize_ring_buffer(struct trace_array *tr,
4783 				  unsigned long size, int cpu_id)
4784 {
4785 	guard(mutex)(&trace_types_lock);
4786 
4787 	if (cpu_id != RING_BUFFER_ALL_CPUS) {
4788 		/* make sure, this cpu is enabled in the mask */
4789 		if (!cpumask_test_cpu(cpu_id, tracing_buffer_mask))
4790 			return -EINVAL;
4791 	}
4792 
4793 	return __tracing_resize_ring_buffer(tr, size, cpu_id);
4794 }
4795 
4796 struct trace_mod_entry {
4797 	unsigned long	mod_addr;
4798 	char		mod_name[MODULE_NAME_LEN];
4799 };
4800 
4801 struct trace_scratch {
4802 	unsigned int		clock_id;
4803 	unsigned long		text_addr;
4804 	unsigned long		nr_entries;
4805 	struct trace_mod_entry	entries[];
4806 };
4807 
4808 static DEFINE_MUTEX(scratch_mutex);
4809 
4810 static int cmp_mod_entry(const void *key, const void *pivot)
4811 {
4812 	unsigned long addr = (unsigned long)key;
4813 	const struct trace_mod_entry *ent = pivot;
4814 
4815 	if (addr < ent[0].mod_addr)
4816 		return -1;
4817 
4818 	return addr >= ent[1].mod_addr;
4819 }
4820 
4821 /**
4822  * trace_adjust_address() - Adjust prev boot address to current address.
4823  * @tr: Persistent ring buffer's trace_array.
4824  * @addr: Address in @tr which is adjusted.
4825  */
4826 unsigned long trace_adjust_address(struct trace_array *tr, unsigned long addr)
4827 {
4828 	struct trace_module_delta *module_delta;
4829 	struct trace_scratch *tscratch;
4830 	struct trace_mod_entry *entry;
4831 	unsigned long raddr;
4832 	int idx = 0, nr_entries;
4833 
4834 	/* If we don't have last boot delta, return the address */
4835 	if (!(tr->flags & TRACE_ARRAY_FL_LAST_BOOT))
4836 		return addr;
4837 
4838 	/* tr->module_delta must be protected by rcu. */
4839 	guard(rcu)();
4840 	tscratch = tr->scratch;
4841 	/* if there is no tscrach, module_delta must be NULL. */
4842 	module_delta = READ_ONCE(tr->module_delta);
4843 	if (!module_delta || !tscratch->nr_entries ||
4844 	    tscratch->entries[0].mod_addr > addr) {
4845 		raddr = addr + tr->text_delta;
4846 		return __is_kernel(raddr) || is_kernel_core_data(raddr) ||
4847 			is_kernel_rodata(raddr) ? raddr : addr;
4848 	}
4849 
4850 	/* Note that entries must be sorted. */
4851 	nr_entries = tscratch->nr_entries;
4852 	if (nr_entries == 1 ||
4853 	    tscratch->entries[nr_entries - 1].mod_addr < addr)
4854 		idx = nr_entries - 1;
4855 	else {
4856 		entry = __inline_bsearch((void *)addr,
4857 				tscratch->entries,
4858 				nr_entries - 1,
4859 				sizeof(tscratch->entries[0]),
4860 				cmp_mod_entry);
4861 		if (entry)
4862 			idx = entry - tscratch->entries;
4863 	}
4864 
4865 	return addr + module_delta->delta[idx];
4866 }
4867 
4868 #ifdef CONFIG_MODULES
4869 static int save_mod(struct module *mod, void *data)
4870 {
4871 	struct trace_array *tr = data;
4872 	struct trace_scratch *tscratch;
4873 	struct trace_mod_entry *entry;
4874 	unsigned int size;
4875 
4876 	tscratch = tr->scratch;
4877 	if (!tscratch)
4878 		return -1;
4879 	size = tr->scratch_size;
4880 
4881 	if (struct_size(tscratch, entries, tscratch->nr_entries + 1) > size)
4882 		return -1;
4883 
4884 	entry = &tscratch->entries[tscratch->nr_entries];
4885 
4886 	tscratch->nr_entries++;
4887 
4888 	entry->mod_addr = (unsigned long)mod->mem[MOD_TEXT].base;
4889 	strscpy(entry->mod_name, mod->name);
4890 
4891 	return 0;
4892 }
4893 #else
4894 static int save_mod(struct module *mod, void *data)
4895 {
4896 	return 0;
4897 }
4898 #endif
4899 
4900 static void update_last_data(struct trace_array *tr)
4901 {
4902 	struct trace_module_delta *module_delta;
4903 	struct trace_scratch *tscratch;
4904 
4905 	if (!(tr->flags & TRACE_ARRAY_FL_BOOT))
4906 		return;
4907 
4908 	if (!(tr->flags & TRACE_ARRAY_FL_LAST_BOOT))
4909 		return;
4910 
4911 	/* Only if the buffer has previous boot data clear and update it. */
4912 	tr->flags &= ~TRACE_ARRAY_FL_LAST_BOOT;
4913 
4914 	/* If this is a backup instance, mark it for autoremove. */
4915 	if (tr->flags & TRACE_ARRAY_FL_VMALLOC)
4916 		tr->free_on_close = true;
4917 
4918 	/* Reset the module list and reload them */
4919 	if (tr->scratch) {
4920 		struct trace_scratch *tscratch = tr->scratch;
4921 
4922 		tscratch->clock_id = tr->clock_id;
4923 		memset(tscratch->entries, 0,
4924 		       flex_array_size(tscratch, entries, tscratch->nr_entries));
4925 		tscratch->nr_entries = 0;
4926 
4927 		guard(mutex)(&scratch_mutex);
4928 		module_for_each_mod(save_mod, tr);
4929 	}
4930 
4931 	/*
4932 	 * Need to clear all CPU buffers as there cannot be events
4933 	 * from the previous boot mixed with events with this boot
4934 	 * as that will cause a confusing trace. Need to clear all
4935 	 * CPU buffers, even for those that may currently be offline.
4936 	 */
4937 	tracing_reset_all_cpus(&tr->array_buffer);
4938 
4939 	/* Using current data now */
4940 	tr->text_delta = 0;
4941 
4942 	if (!tr->scratch)
4943 		return;
4944 
4945 	tscratch = tr->scratch;
4946 	module_delta = READ_ONCE(tr->module_delta);
4947 	WRITE_ONCE(tr->module_delta, NULL);
4948 	kfree_rcu(module_delta, rcu);
4949 
4950 	/* Set the persistent ring buffer meta data to this address */
4951 	tscratch->text_addr = (unsigned long)_text;
4952 }
4953 
4954 /**
4955  * tracing_update_buffers - used by tracing facility to expand ring buffers
4956  * @tr: The tracing instance
4957  *
4958  * To save on memory when the tracing is never used on a system with it
4959  * configured in. The ring buffers are set to a minimum size. But once
4960  * a user starts to use the tracing facility, then they need to grow
4961  * to their default size.
4962  *
4963  * This function is to be called when a tracer is about to be used.
4964  */
4965 int tracing_update_buffers(struct trace_array *tr)
4966 {
4967 	int ret = 0;
4968 
4969 	if (!tr)
4970 		tr = &global_trace;
4971 
4972 	guard(mutex)(&trace_types_lock);
4973 
4974 	update_last_data(tr);
4975 
4976 	if (!tr->ring_buffer_expanded)
4977 		ret = __tracing_resize_ring_buffer(tr, trace_buf_size,
4978 						RING_BUFFER_ALL_CPUS);
4979 	return ret;
4980 }
4981 
4982 /*
4983  * Used to clear out the tracer before deletion of an instance.
4984  * Must have trace_types_lock held.
4985  */
4986 static void tracing_set_nop(struct trace_array *tr)
4987 {
4988 	if (tr->current_trace == &nop_trace)
4989 		return;
4990 
4991 	tr->current_trace->enabled--;
4992 
4993 	if (tr->current_trace->reset)
4994 		tr->current_trace->reset(tr);
4995 
4996 	tr->current_trace = &nop_trace;
4997 	tr->current_trace_flags = nop_trace.flags;
4998 }
4999 
5000 static bool tracer_options_updated;
5001 
5002 int tracing_set_tracer(struct trace_array *tr, const char *buf)
5003 {
5004 	struct tracer *trace = NULL;
5005 	struct tracers *t;
5006 	bool had_max_tr;
5007 	int ret;
5008 
5009 	guard(mutex)(&trace_types_lock);
5010 
5011 	update_last_data(tr);
5012 
5013 	if (!tr->ring_buffer_expanded) {
5014 		ret = __tracing_resize_ring_buffer(tr, trace_buf_size,
5015 						RING_BUFFER_ALL_CPUS);
5016 		if (ret < 0)
5017 			return ret;
5018 		ret = 0;
5019 	}
5020 
5021 	list_for_each_entry(t, &tr->tracers, list) {
5022 		if (strcmp(t->tracer->name, buf) == 0) {
5023 			trace = t->tracer;
5024 			break;
5025 		}
5026 	}
5027 	if (!trace)
5028 		return -EINVAL;
5029 
5030 	if (trace == tr->current_trace)
5031 		return 0;
5032 
5033 #ifdef CONFIG_TRACER_SNAPSHOT
5034 	if (tracer_uses_snapshot(trace)) {
5035 		local_irq_disable();
5036 		arch_spin_lock(&tr->max_lock);
5037 		ret = tr->cond_snapshot ? -EBUSY : 0;
5038 		arch_spin_unlock(&tr->max_lock);
5039 		local_irq_enable();
5040 		if (ret)
5041 			return ret;
5042 	}
5043 #endif
5044 	/* Some tracers won't work on kernel command line */
5045 	if (system_state < SYSTEM_RUNNING && trace->noboot) {
5046 		pr_warn("Tracer '%s' is not allowed on command line, ignored\n",
5047 			trace->name);
5048 		return -EINVAL;
5049 	}
5050 
5051 	/* Some tracers are only allowed for the top level buffer */
5052 	if (!trace_ok_for_array(trace, tr))
5053 		return -EINVAL;
5054 
5055 	/* If trace pipe files are being read, we can't change the tracer */
5056 	if (tr->trace_ref)
5057 		return -EBUSY;
5058 
5059 	trace_branch_disable();
5060 
5061 	tr->current_trace->enabled--;
5062 
5063 	if (tr->current_trace->reset)
5064 		tr->current_trace->reset(tr);
5065 
5066 	had_max_tr = tracer_uses_snapshot(tr->current_trace);
5067 
5068 	/* Current trace needs to be nop_trace before synchronize_rcu */
5069 	tr->current_trace = &nop_trace;
5070 	tr->current_trace_flags = nop_trace.flags;
5071 
5072 	if (had_max_tr && !tracer_uses_snapshot(trace)) {
5073 		/*
5074 		 * We need to make sure that the update_max_tr sees that
5075 		 * current_trace changed to nop_trace to keep it from
5076 		 * swapping the buffers after we resize it.
5077 		 * The update_max_tr is called from interrupts disabled
5078 		 * so a synchronized_sched() is sufficient.
5079 		 */
5080 		synchronize_rcu();
5081 		free_snapshot(tr);
5082 		tracing_disarm_snapshot(tr);
5083 	}
5084 
5085 	if (!had_max_tr && tracer_uses_snapshot(trace)) {
5086 		ret = tracing_arm_snapshot_locked(tr);
5087 		if (ret)
5088 			return ret;
5089 	}
5090 
5091 	tr->current_trace_flags = t->flags ? : t->tracer->flags;
5092 
5093 	if (trace->init) {
5094 		ret = tracer_init(trace, tr);
5095 		if (ret) {
5096 			if (tracer_uses_snapshot(trace))
5097 				tracing_disarm_snapshot(tr);
5098 			tr->current_trace_flags = nop_trace.flags;
5099 			return ret;
5100 		}
5101 	}
5102 
5103 	tr->current_trace = trace;
5104 	tr->current_trace->enabled++;
5105 	trace_branch_enable(tr);
5106 
5107 	return 0;
5108 }
5109 
5110 static ssize_t
5111 tracing_set_trace_write(struct file *filp, const char __user *ubuf,
5112 			size_t cnt, loff_t *ppos)
5113 {
5114 	struct trace_array *tr = filp->private_data;
5115 	char buf[MAX_TRACER_SIZE+1];
5116 	char *name;
5117 	size_t ret;
5118 	int err;
5119 
5120 	ret = cnt;
5121 
5122 	if (cnt > MAX_TRACER_SIZE)
5123 		cnt = MAX_TRACER_SIZE;
5124 
5125 	if (copy_from_user(buf, ubuf, cnt))
5126 		return -EFAULT;
5127 
5128 	buf[cnt] = 0;
5129 
5130 	name = strim(buf);
5131 
5132 	err = tracing_set_tracer(tr, name);
5133 	if (err)
5134 		return err;
5135 
5136 	*ppos += ret;
5137 
5138 	return ret;
5139 }
5140 
5141 ssize_t tracing_nsecs_read(unsigned long *ptr, char __user *ubuf,
5142 			   size_t cnt, loff_t *ppos)
5143 {
5144 	char buf[64];
5145 	int r;
5146 
5147 	r = snprintf(buf, sizeof(buf), "%ld\n",
5148 		     *ptr == (unsigned long)-1 ? -1 : nsecs_to_usecs(*ptr));
5149 	if (r > sizeof(buf))
5150 		r = sizeof(buf);
5151 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
5152 }
5153 
5154 ssize_t tracing_nsecs_write(unsigned long *ptr, const char __user *ubuf,
5155 			    size_t cnt, loff_t *ppos)
5156 {
5157 	unsigned long val;
5158 	int ret;
5159 
5160 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
5161 	if (ret)
5162 		return ret;
5163 
5164 	*ptr = val * 1000;
5165 
5166 	return cnt;
5167 }
5168 
5169 static ssize_t
5170 tracing_thresh_read(struct file *filp, char __user *ubuf,
5171 		    size_t cnt, loff_t *ppos)
5172 {
5173 	return tracing_nsecs_read(&tracing_thresh, ubuf, cnt, ppos);
5174 }
5175 
5176 static ssize_t
5177 tracing_thresh_write(struct file *filp, const char __user *ubuf,
5178 		     size_t cnt, loff_t *ppos)
5179 {
5180 	struct trace_array *tr = filp->private_data;
5181 	int ret;
5182 
5183 	guard(mutex)(&trace_types_lock);
5184 	ret = tracing_nsecs_write(&tracing_thresh, ubuf, cnt, ppos);
5185 	if (ret < 0)
5186 		return ret;
5187 
5188 	if (tr->current_trace->update_thresh) {
5189 		ret = tr->current_trace->update_thresh(tr);
5190 		if (ret < 0)
5191 			return ret;
5192 	}
5193 
5194 	return cnt;
5195 }
5196 
5197 static int open_pipe_on_cpu(struct trace_array *tr, int cpu)
5198 {
5199 	if (cpu == RING_BUFFER_ALL_CPUS) {
5200 		if (cpumask_empty(tr->pipe_cpumask)) {
5201 			cpumask_setall(tr->pipe_cpumask);
5202 			return 0;
5203 		}
5204 	} else if (!cpumask_test_cpu(cpu, tr->pipe_cpumask)) {
5205 		cpumask_set_cpu(cpu, tr->pipe_cpumask);
5206 		return 0;
5207 	}
5208 	return -EBUSY;
5209 }
5210 
5211 static void close_pipe_on_cpu(struct trace_array *tr, int cpu)
5212 {
5213 	if (cpu == RING_BUFFER_ALL_CPUS) {
5214 		WARN_ON(!cpumask_full(tr->pipe_cpumask));
5215 		cpumask_clear(tr->pipe_cpumask);
5216 	} else {
5217 		WARN_ON(!cpumask_test_cpu(cpu, tr->pipe_cpumask));
5218 		cpumask_clear_cpu(cpu, tr->pipe_cpumask);
5219 	}
5220 }
5221 
5222 static int tracing_open_pipe(struct inode *inode, struct file *filp)
5223 {
5224 	struct trace_array *tr = inode->i_private;
5225 	struct trace_iterator *iter;
5226 	int cpu;
5227 	int ret;
5228 
5229 	ret = tracing_check_open_get_tr(tr);
5230 	if (ret)
5231 		return ret;
5232 
5233 	guard(mutex)(&trace_types_lock);
5234 	cpu = tracing_get_cpu(inode);
5235 	ret = open_pipe_on_cpu(tr, cpu);
5236 	if (ret)
5237 		goto fail_pipe_on_cpu;
5238 
5239 	/* create a buffer to store the information to pass to userspace */
5240 	iter = kzalloc_obj(*iter);
5241 	if (!iter) {
5242 		ret = -ENOMEM;
5243 		goto fail_alloc_iter;
5244 	}
5245 
5246 	trace_seq_init(&iter->seq);
5247 	iter->trace = tr->current_trace;
5248 
5249 	if (!alloc_cpumask_var(&iter->started, GFP_KERNEL)) {
5250 		ret = -ENOMEM;
5251 		goto fail;
5252 	}
5253 
5254 	/* trace pipe does not show start of buffer */
5255 	cpumask_setall(iter->started);
5256 
5257 	if (tr->trace_flags & TRACE_ITER(LATENCY_FMT))
5258 		iter->iter_flags |= TRACE_FILE_LAT_FMT;
5259 
5260 	/* Output in nanoseconds only if we are using a clock in nanoseconds. */
5261 	if (trace_clocks[tr->clock_id].in_ns)
5262 		iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
5263 
5264 	iter->tr = tr;
5265 	iter->array_buffer = &tr->array_buffer;
5266 	iter->cpu_file = cpu;
5267 	mutex_init(&iter->mutex);
5268 	filp->private_data = iter;
5269 
5270 	if (iter->trace->pipe_open)
5271 		iter->trace->pipe_open(iter);
5272 
5273 	nonseekable_open(inode, filp);
5274 
5275 	tr->trace_ref++;
5276 
5277 	return ret;
5278 
5279 fail:
5280 	kfree(iter);
5281 fail_alloc_iter:
5282 	close_pipe_on_cpu(tr, cpu);
5283 fail_pipe_on_cpu:
5284 	__trace_array_put(tr);
5285 	return ret;
5286 }
5287 
5288 static int tracing_release_pipe(struct inode *inode, struct file *file)
5289 {
5290 	struct trace_iterator *iter = file->private_data;
5291 	struct trace_array *tr = inode->i_private;
5292 
5293 	scoped_guard(mutex, &trace_types_lock) {
5294 		tr->trace_ref--;
5295 
5296 		if (iter->trace->pipe_close)
5297 			iter->trace->pipe_close(iter);
5298 		close_pipe_on_cpu(tr, iter->cpu_file);
5299 	}
5300 
5301 	free_trace_iter_content(iter);
5302 	kfree(iter);
5303 
5304 	trace_array_put(tr);
5305 
5306 	return 0;
5307 }
5308 
5309 static __poll_t
5310 trace_poll(struct trace_iterator *iter, struct file *filp, poll_table *poll_table)
5311 {
5312 	struct trace_array *tr = iter->tr;
5313 
5314 	/* Iterators are static, they should be filled or empty */
5315 	if (trace_buffer_iter(iter, iter->cpu_file))
5316 		return EPOLLIN | EPOLLRDNORM;
5317 
5318 	if (tr->trace_flags & TRACE_ITER(BLOCK))
5319 		/*
5320 		 * Always select as readable when in blocking mode
5321 		 */
5322 		return EPOLLIN | EPOLLRDNORM;
5323 	else
5324 		return ring_buffer_poll_wait(iter->array_buffer->buffer, iter->cpu_file,
5325 					     filp, poll_table, iter->tr->buffer_percent);
5326 }
5327 
5328 static __poll_t
5329 tracing_poll_pipe(struct file *filp, poll_table *poll_table)
5330 {
5331 	struct trace_iterator *iter = filp->private_data;
5332 
5333 	return trace_poll(iter, filp, poll_table);
5334 }
5335 
5336 /* Must be called with iter->mutex held. */
5337 static int tracing_wait_pipe(struct file *filp)
5338 {
5339 	struct trace_iterator *iter = filp->private_data;
5340 	int ret;
5341 
5342 	while (trace_empty(iter)) {
5343 
5344 		if ((filp->f_flags & O_NONBLOCK)) {
5345 			return -EAGAIN;
5346 		}
5347 
5348 		/*
5349 		 * We block until we read something and tracing is disabled.
5350 		 * We still block if tracing is disabled, but we have never
5351 		 * read anything. This allows a user to cat this file, and
5352 		 * then enable tracing. But after we have read something,
5353 		 * we give an EOF when tracing is again disabled.
5354 		 *
5355 		 * iter->pos will be 0 if we haven't read anything.
5356 		 */
5357 		if (!tracer_tracing_is_on(iter->tr) && iter->pos)
5358 			break;
5359 
5360 		mutex_unlock(&iter->mutex);
5361 
5362 		ret = wait_on_pipe(iter, 0);
5363 
5364 		mutex_lock(&iter->mutex);
5365 
5366 		if (ret)
5367 			return ret;
5368 	}
5369 
5370 	return 1;
5371 }
5372 
5373 static bool update_last_data_if_empty(struct trace_array *tr)
5374 {
5375 	if (!(tr->flags & TRACE_ARRAY_FL_LAST_BOOT))
5376 		return false;
5377 
5378 	if (!ring_buffer_empty(tr->array_buffer.buffer))
5379 		return false;
5380 
5381 	/*
5382 	 * If the buffer contains the last boot data and all per-cpu
5383 	 * buffers are empty, reset it from the kernel side.
5384 	 */
5385 	update_last_data(tr);
5386 	return true;
5387 }
5388 
5389 /*
5390  * Consumer reader.
5391  */
5392 static ssize_t
5393 tracing_read_pipe(struct file *filp, char __user *ubuf,
5394 		  size_t cnt, loff_t *ppos)
5395 {
5396 	struct trace_iterator *iter = filp->private_data;
5397 	ssize_t sret;
5398 
5399 	/*
5400 	 * Avoid more than one consumer on a single file descriptor
5401 	 * This is just a matter of traces coherency, the ring buffer itself
5402 	 * is protected.
5403 	 */
5404 	guard(mutex)(&iter->mutex);
5405 
5406 	/* return any leftover data */
5407 	sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
5408 	if (sret != -EBUSY)
5409 		return sret;
5410 
5411 	trace_seq_init(&iter->seq);
5412 
5413 	if (iter->trace->read) {
5414 		sret = iter->trace->read(iter, filp, ubuf, cnt, ppos);
5415 		if (sret)
5416 			return sret;
5417 	}
5418 
5419 waitagain:
5420 	if (update_last_data_if_empty(iter->tr))
5421 		return 0;
5422 
5423 	sret = tracing_wait_pipe(filp);
5424 	if (sret <= 0)
5425 		return sret;
5426 
5427 	/* stop when tracing is finished */
5428 	if (trace_empty(iter))
5429 		return 0;
5430 
5431 	if (cnt >= TRACE_SEQ_BUFFER_SIZE)
5432 		cnt = TRACE_SEQ_BUFFER_SIZE - 1;
5433 
5434 	/* reset all but tr, trace, and overruns */
5435 	trace_iterator_reset(iter);
5436 	cpumask_clear(iter->started);
5437 	trace_seq_init(&iter->seq);
5438 
5439 	trace_event_read_lock();
5440 	trace_access_lock(iter->cpu_file);
5441 	while (trace_find_next_entry_inc(iter) != NULL) {
5442 		enum print_line_t ret;
5443 		int save_len = iter->seq.seq.len;
5444 
5445 		ret = print_trace_line(iter);
5446 		if (ret == TRACE_TYPE_PARTIAL_LINE) {
5447 			/*
5448 			 * If one print_trace_line() fills entire trace_seq in one shot,
5449 			 * trace_seq_to_user() will returns -EBUSY because save_len == 0,
5450 			 * In this case, we need to consume it, otherwise, loop will peek
5451 			 * this event next time, resulting in an infinite loop.
5452 			 */
5453 			if (save_len == 0) {
5454 				iter->seq.full = 0;
5455 				trace_seq_puts(&iter->seq, "[LINE TOO BIG]\n");
5456 				trace_consume(iter);
5457 				break;
5458 			}
5459 
5460 			/* In other cases, don't print partial lines */
5461 			iter->seq.seq.len = save_len;
5462 			break;
5463 		}
5464 		if (ret != TRACE_TYPE_NO_CONSUME)
5465 			trace_consume(iter);
5466 
5467 		if (trace_seq_used(&iter->seq) >= cnt)
5468 			break;
5469 
5470 		/*
5471 		 * Setting the full flag means we reached the trace_seq buffer
5472 		 * size and we should leave by partial output condition above.
5473 		 * One of the trace_seq_* functions is not used properly.
5474 		 */
5475 		WARN_ONCE(iter->seq.full, "full flag set for trace type %d",
5476 			  iter->ent->type);
5477 	}
5478 	trace_access_unlock(iter->cpu_file);
5479 	trace_event_read_unlock();
5480 
5481 	/* Now copy what we have to the user */
5482 	sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
5483 	if (iter->seq.readpos >= trace_seq_used(&iter->seq))
5484 		trace_seq_init(&iter->seq);
5485 
5486 	/*
5487 	 * If there was nothing to send to user, in spite of consuming trace
5488 	 * entries, go back to wait for more entries.
5489 	 */
5490 	if (sret == -EBUSY)
5491 		goto waitagain;
5492 
5493 	return sret;
5494 }
5495 
5496 static void tracing_spd_release_pipe(struct splice_pipe_desc *spd,
5497 				     unsigned int idx)
5498 {
5499 	__free_page(spd->pages[idx]);
5500 }
5501 
5502 static size_t
5503 tracing_fill_pipe_page(size_t rem, struct trace_iterator *iter)
5504 {
5505 	size_t count;
5506 	int save_len;
5507 	int ret;
5508 
5509 	/* Seq buffer is page-sized, exactly what we need. */
5510 	for (;;) {
5511 		save_len = iter->seq.seq.len;
5512 		ret = print_trace_line(iter);
5513 
5514 		if (trace_seq_has_overflowed(&iter->seq)) {
5515 			iter->seq.seq.len = save_len;
5516 			break;
5517 		}
5518 
5519 		/*
5520 		 * This should not be hit, because it should only
5521 		 * be set if the iter->seq overflowed. But check it
5522 		 * anyway to be safe.
5523 		 */
5524 		if (ret == TRACE_TYPE_PARTIAL_LINE) {
5525 			iter->seq.seq.len = save_len;
5526 			break;
5527 		}
5528 
5529 		count = trace_seq_used(&iter->seq) - save_len;
5530 		if (rem < count) {
5531 			rem = 0;
5532 			iter->seq.seq.len = save_len;
5533 			break;
5534 		}
5535 
5536 		if (ret != TRACE_TYPE_NO_CONSUME)
5537 			trace_consume(iter);
5538 		rem -= count;
5539 		if (!trace_find_next_entry_inc(iter))	{
5540 			rem = 0;
5541 			iter->ent = NULL;
5542 			break;
5543 		}
5544 	}
5545 
5546 	return rem;
5547 }
5548 
5549 static ssize_t tracing_splice_read_pipe(struct file *filp,
5550 					loff_t *ppos,
5551 					struct pipe_inode_info *pipe,
5552 					size_t len,
5553 					unsigned int flags)
5554 {
5555 	struct page *pages_def[PIPE_DEF_BUFFERS];
5556 	struct partial_page partial_def[PIPE_DEF_BUFFERS];
5557 	struct trace_iterator *iter = filp->private_data;
5558 	struct splice_pipe_desc spd = {
5559 		.pages		= pages_def,
5560 		.partial	= partial_def,
5561 		.nr_pages	= 0, /* This gets updated below. */
5562 		.nr_pages_max	= PIPE_DEF_BUFFERS,
5563 		.ops		= &default_pipe_buf_ops,
5564 		.spd_release	= tracing_spd_release_pipe,
5565 	};
5566 	ssize_t ret;
5567 	size_t rem;
5568 	unsigned int i;
5569 
5570 	if (splice_grow_spd(pipe, &spd))
5571 		return -ENOMEM;
5572 
5573 	mutex_lock(&iter->mutex);
5574 
5575 	if (iter->trace->splice_read) {
5576 		ret = iter->trace->splice_read(iter, filp,
5577 					       ppos, pipe, len, flags);
5578 		if (ret)
5579 			goto out_err;
5580 	}
5581 
5582 	ret = tracing_wait_pipe(filp);
5583 	if (ret <= 0)
5584 		goto out_err;
5585 
5586 	if (!iter->ent && !trace_find_next_entry_inc(iter)) {
5587 		ret = -EFAULT;
5588 		goto out_err;
5589 	}
5590 
5591 	trace_event_read_lock();
5592 	trace_access_lock(iter->cpu_file);
5593 
5594 	/* Fill as many pages as possible. */
5595 	for (i = 0, rem = len; i < spd.nr_pages_max && rem; i++) {
5596 		spd.pages[i] = alloc_page(GFP_KERNEL);
5597 		if (!spd.pages[i])
5598 			break;
5599 
5600 		rem = tracing_fill_pipe_page(rem, iter);
5601 
5602 		/* Copy the data into the page, so we can start over. */
5603 		ret = trace_seq_to_buffer(&iter->seq,
5604 					  page_address(spd.pages[i]),
5605 					  min((size_t)trace_seq_used(&iter->seq),
5606 						  (size_t)PAGE_SIZE));
5607 		if (ret < 0) {
5608 			__free_page(spd.pages[i]);
5609 			break;
5610 		}
5611 		spd.partial[i].offset = 0;
5612 		spd.partial[i].len = ret;
5613 
5614 		trace_seq_init(&iter->seq);
5615 	}
5616 
5617 	trace_access_unlock(iter->cpu_file);
5618 	trace_event_read_unlock();
5619 	mutex_unlock(&iter->mutex);
5620 
5621 	spd.nr_pages = i;
5622 
5623 	if (i)
5624 		ret = splice_to_pipe(pipe, &spd);
5625 	else
5626 		ret = 0;
5627 out:
5628 	splice_shrink_spd(&spd);
5629 	return ret;
5630 
5631 out_err:
5632 	mutex_unlock(&iter->mutex);
5633 	goto out;
5634 }
5635 
5636 static ssize_t
5637 tracing_syscall_buf_read(struct file *filp, char __user *ubuf,
5638 			 size_t cnt, loff_t *ppos)
5639 {
5640 	struct inode *inode = file_inode(filp);
5641 	struct trace_array *tr = inode->i_private;
5642 	char buf[64];
5643 	int r;
5644 
5645 	r = snprintf(buf, 64, "%d\n", tr->syscall_buf_sz);
5646 
5647 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
5648 }
5649 
5650 static ssize_t
5651 tracing_syscall_buf_write(struct file *filp, const char __user *ubuf,
5652 			  size_t cnt, loff_t *ppos)
5653 {
5654 	struct inode *inode = file_inode(filp);
5655 	struct trace_array *tr = inode->i_private;
5656 	unsigned long val;
5657 	int ret;
5658 
5659 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
5660 	if (ret)
5661 		return ret;
5662 
5663 	if (val > SYSCALL_FAULT_USER_MAX)
5664 		val = SYSCALL_FAULT_USER_MAX;
5665 
5666 	tr->syscall_buf_sz = val;
5667 
5668 	*ppos += cnt;
5669 
5670 	return cnt;
5671 }
5672 
5673 static ssize_t
5674 tracing_entries_read(struct file *filp, char __user *ubuf,
5675 		     size_t cnt, loff_t *ppos)
5676 {
5677 	struct inode *inode = file_inode(filp);
5678 	struct trace_array *tr = inode->i_private;
5679 	int cpu = tracing_get_cpu(inode);
5680 	char buf[64];
5681 	int r = 0;
5682 	ssize_t ret;
5683 
5684 	mutex_lock(&trace_types_lock);
5685 
5686 	if (cpu == RING_BUFFER_ALL_CPUS) {
5687 		int cpu, buf_size_same;
5688 		unsigned long size;
5689 
5690 		size = 0;
5691 		buf_size_same = 1;
5692 		/* check if all cpu sizes are same */
5693 		for_each_tracing_cpu(cpu) {
5694 			/* fill in the size from first enabled cpu */
5695 			if (size == 0)
5696 				size = per_cpu_ptr(tr->array_buffer.data, cpu)->entries;
5697 			if (size != per_cpu_ptr(tr->array_buffer.data, cpu)->entries) {
5698 				buf_size_same = 0;
5699 				break;
5700 			}
5701 		}
5702 
5703 		if (buf_size_same) {
5704 			if (!tr->ring_buffer_expanded)
5705 				r = sprintf(buf, "%lu (expanded: %lu)\n",
5706 					    size >> 10,
5707 					    trace_buf_size >> 10);
5708 			else
5709 				r = sprintf(buf, "%lu\n", size >> 10);
5710 		} else
5711 			r = sprintf(buf, "X\n");
5712 	} else
5713 		r = sprintf(buf, "%lu\n", per_cpu_ptr(tr->array_buffer.data, cpu)->entries >> 10);
5714 
5715 	mutex_unlock(&trace_types_lock);
5716 
5717 	ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
5718 	return ret;
5719 }
5720 
5721 static ssize_t
5722 tracing_entries_write(struct file *filp, const char __user *ubuf,
5723 		      size_t cnt, loff_t *ppos)
5724 {
5725 	struct inode *inode = file_inode(filp);
5726 	struct trace_array *tr = inode->i_private;
5727 	unsigned long val;
5728 	int ret;
5729 
5730 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
5731 	if (ret)
5732 		return ret;
5733 
5734 	/* must have at least 1 entry */
5735 	if (!val)
5736 		return -EINVAL;
5737 
5738 	/* value is in KB */
5739 	val <<= 10;
5740 	ret = tracing_resize_ring_buffer(tr, val, tracing_get_cpu(inode));
5741 	if (ret < 0)
5742 		return ret;
5743 
5744 	*ppos += cnt;
5745 
5746 	return cnt;
5747 }
5748 
5749 static ssize_t
5750 tracing_total_entries_read(struct file *filp, char __user *ubuf,
5751 				size_t cnt, loff_t *ppos)
5752 {
5753 	struct trace_array *tr = filp->private_data;
5754 	char buf[64];
5755 	int r, cpu;
5756 	unsigned long size = 0, expanded_size = 0;
5757 
5758 	mutex_lock(&trace_types_lock);
5759 	for_each_tracing_cpu(cpu) {
5760 		size += per_cpu_ptr(tr->array_buffer.data, cpu)->entries >> 10;
5761 		if (!tr->ring_buffer_expanded)
5762 			expanded_size += trace_buf_size >> 10;
5763 	}
5764 	if (tr->ring_buffer_expanded)
5765 		r = sprintf(buf, "%lu\n", size);
5766 	else
5767 		r = sprintf(buf, "%lu (expanded: %lu)\n", size, expanded_size);
5768 	mutex_unlock(&trace_types_lock);
5769 
5770 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
5771 }
5772 
5773 #define LAST_BOOT_HEADER ((void *)1)
5774 
5775 static void *l_next(struct seq_file *m, void *v, loff_t *pos)
5776 {
5777 	struct trace_array *tr = m->private;
5778 	struct trace_scratch *tscratch = tr->scratch;
5779 	unsigned int index = *pos;
5780 
5781 	(*pos)++;
5782 
5783 	if (*pos == 1)
5784 		return LAST_BOOT_HEADER;
5785 
5786 	/* Only show offsets of the last boot data */
5787 	if (!tscratch || !(tr->flags & TRACE_ARRAY_FL_LAST_BOOT))
5788 		return NULL;
5789 
5790 	/* *pos 0 is for the header, 1 is for the first module */
5791 	index--;
5792 
5793 	if (index >= tscratch->nr_entries)
5794 		return NULL;
5795 
5796 	return &tscratch->entries[index];
5797 }
5798 
5799 static void *l_start(struct seq_file *m, loff_t *pos)
5800 {
5801 	mutex_lock(&scratch_mutex);
5802 
5803 	return l_next(m, NULL, pos);
5804 }
5805 
5806 static void l_stop(struct seq_file *m, void *p)
5807 {
5808 	mutex_unlock(&scratch_mutex);
5809 }
5810 
5811 static void show_last_boot_header(struct seq_file *m, struct trace_array *tr)
5812 {
5813 	struct trace_scratch *tscratch = tr->scratch;
5814 
5815 	/*
5816 	 * Do not leak KASLR address. This only shows the KASLR address of
5817 	 * the last boot. When the ring buffer is started, the LAST_BOOT
5818 	 * flag gets cleared, and this should only report "current".
5819 	 * Otherwise it shows the KASLR address from the previous boot which
5820 	 * should not be the same as the current boot.
5821 	 */
5822 	if (tscratch && (tr->flags & TRACE_ARRAY_FL_LAST_BOOT))
5823 		seq_printf(m, "%lx\t[kernel]\n", tscratch->text_addr);
5824 	else
5825 		seq_puts(m, "# Current\n");
5826 }
5827 
5828 static int l_show(struct seq_file *m, void *v)
5829 {
5830 	struct trace_array *tr = m->private;
5831 	struct trace_mod_entry *entry = v;
5832 
5833 	if (v == LAST_BOOT_HEADER) {
5834 		show_last_boot_header(m, tr);
5835 		return 0;
5836 	}
5837 
5838 	seq_printf(m, "%lx\t%s\n", entry->mod_addr, entry->mod_name);
5839 	return 0;
5840 }
5841 
5842 static const struct seq_operations last_boot_seq_ops = {
5843 	.start		= l_start,
5844 	.next		= l_next,
5845 	.stop		= l_stop,
5846 	.show		= l_show,
5847 };
5848 
5849 static int tracing_last_boot_open(struct inode *inode, struct file *file)
5850 {
5851 	struct trace_array *tr = inode->i_private;
5852 	struct seq_file *m;
5853 	int ret;
5854 
5855 	ret = tracing_check_open_get_tr(tr);
5856 	if (ret)
5857 		return ret;
5858 
5859 	ret = seq_open(file, &last_boot_seq_ops);
5860 	if (ret) {
5861 		trace_array_put(tr);
5862 		return ret;
5863 	}
5864 
5865 	m = file->private_data;
5866 	m->private = tr;
5867 
5868 	return 0;
5869 }
5870 
5871 static int tracing_buffer_meta_open(struct inode *inode, struct file *filp)
5872 {
5873 	struct trace_array *tr = inode->i_private;
5874 	int cpu = tracing_get_cpu(inode);
5875 	int ret;
5876 
5877 	ret = tracing_check_open_get_tr(tr);
5878 	if (ret)
5879 		return ret;
5880 
5881 	ret = ring_buffer_meta_seq_init(filp, tr->array_buffer.buffer, cpu);
5882 	if (ret < 0)
5883 		__trace_array_put(tr);
5884 	return ret;
5885 }
5886 
5887 static ssize_t
5888 tracing_free_buffer_write(struct file *filp, const char __user *ubuf,
5889 			  size_t cnt, loff_t *ppos)
5890 {
5891 	/*
5892 	 * There is no need to read what the user has written, this function
5893 	 * is just to make sure that there is no error when "echo" is used
5894 	 */
5895 
5896 	*ppos += cnt;
5897 
5898 	return cnt;
5899 }
5900 
5901 static int
5902 tracing_free_buffer_release(struct inode *inode, struct file *filp)
5903 {
5904 	struct trace_array *tr = inode->i_private;
5905 
5906 	/* disable tracing ? */
5907 	if (tr->trace_flags & TRACE_ITER(STOP_ON_FREE))
5908 		tracer_tracing_off(tr);
5909 	/* resize the ring buffer to 0 */
5910 	tracing_resize_ring_buffer(tr, 0, RING_BUFFER_ALL_CPUS);
5911 
5912 	trace_array_put(tr);
5913 
5914 	return 0;
5915 }
5916 
5917 #define TRACE_MARKER_MAX_SIZE		4096
5918 
5919 static ssize_t write_marker_to_buffer(struct trace_array *tr, const char *buf,
5920 				      size_t cnt, unsigned long ip)
5921 {
5922 	struct ring_buffer_event *event;
5923 	enum event_trigger_type tt = ETT_NONE;
5924 	struct trace_buffer *buffer;
5925 	struct print_entry *entry;
5926 	int meta_size;
5927 	ssize_t written;
5928 	size_t size;
5929 
5930 	meta_size = sizeof(*entry) + 2;  /* add '\0' and possible '\n' */
5931  again:
5932 	size = cnt + meta_size;
5933 
5934 	buffer = tr->array_buffer.buffer;
5935 	event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
5936 					    tracing_gen_ctx());
5937 	if (unlikely(!event)) {
5938 		/*
5939 		 * If the size was greater than what was allowed, then
5940 		 * make it smaller and try again.
5941 		 */
5942 		if (size > ring_buffer_max_event_size(buffer)) {
5943 			cnt = ring_buffer_max_event_size(buffer) - meta_size;
5944 			/* The above should only happen once */
5945 			if (WARN_ON_ONCE(cnt + meta_size == size))
5946 				return -EBADF;
5947 			goto again;
5948 		}
5949 
5950 		/* Ring buffer disabled, return as if not open for write */
5951 		return -EBADF;
5952 	}
5953 
5954 	entry = ring_buffer_event_data(event);
5955 	entry->ip = ip;
5956 	memcpy(&entry->buf, buf, cnt);
5957 	written = cnt;
5958 
5959 	if (tr->trace_marker_file && !list_empty(&tr->trace_marker_file->triggers)) {
5960 		/* do not add \n before testing triggers, but add \0 */
5961 		entry->buf[cnt] = '\0';
5962 		tt = event_triggers_call(tr->trace_marker_file, buffer, entry, event);
5963 	}
5964 
5965 	if (entry->buf[cnt - 1] != '\n') {
5966 		entry->buf[cnt] = '\n';
5967 		entry->buf[cnt + 1] = '\0';
5968 	} else
5969 		entry->buf[cnt] = '\0';
5970 
5971 	if (static_branch_unlikely(&trace_marker_exports_enabled))
5972 		ftrace_exports(event, TRACE_EXPORT_MARKER);
5973 	__buffer_unlock_commit(buffer, event);
5974 
5975 	if (tt)
5976 		event_triggers_post_call(tr->trace_marker_file, tt);
5977 
5978 	return written;
5979 }
5980 
5981 struct trace_user_buf {
5982 	char		*buf;
5983 };
5984 
5985 static DEFINE_MUTEX(trace_user_buffer_mutex);
5986 static struct trace_user_buf_info *trace_user_buffer;
5987 
5988 /**
5989  * trace_user_fault_destroy - free up allocated memory of a trace user buffer
5990  * @tinfo: The descriptor to free up
5991  *
5992  * Frees any data allocated in the trace info dsecriptor.
5993  */
5994 void trace_user_fault_destroy(struct trace_user_buf_info *tinfo)
5995 {
5996 	char *buf;
5997 	int cpu;
5998 
5999 	if (!tinfo || !tinfo->tbuf)
6000 		return;
6001 
6002 	for_each_possible_cpu(cpu) {
6003 		buf = per_cpu_ptr(tinfo->tbuf, cpu)->buf;
6004 		kfree(buf);
6005 	}
6006 	free_percpu(tinfo->tbuf);
6007 }
6008 
6009 static int user_fault_buffer_enable(struct trace_user_buf_info *tinfo, size_t size)
6010 {
6011 	char *buf;
6012 	int cpu;
6013 
6014 	lockdep_assert_held(&trace_user_buffer_mutex);
6015 
6016 	tinfo->tbuf = alloc_percpu(struct trace_user_buf);
6017 	if (!tinfo->tbuf)
6018 		return -ENOMEM;
6019 
6020 	tinfo->ref = 1;
6021 	tinfo->size = size;
6022 
6023 	/* Clear each buffer in case of error */
6024 	for_each_possible_cpu(cpu) {
6025 		per_cpu_ptr(tinfo->tbuf, cpu)->buf = NULL;
6026 	}
6027 
6028 	for_each_possible_cpu(cpu) {
6029 		buf = kmalloc_node(size, GFP_KERNEL,
6030 				   cpu_to_node(cpu));
6031 		if (!buf)
6032 			return -ENOMEM;
6033 		per_cpu_ptr(tinfo->tbuf, cpu)->buf = buf;
6034 	}
6035 
6036 	return 0;
6037 }
6038 
6039 /* For internal use. Free and reinitialize */
6040 static void user_buffer_free(struct trace_user_buf_info **tinfo)
6041 {
6042 	lockdep_assert_held(&trace_user_buffer_mutex);
6043 
6044 	trace_user_fault_destroy(*tinfo);
6045 	kfree(*tinfo);
6046 	*tinfo = NULL;
6047 }
6048 
6049 /* For internal use. Initialize and allocate */
6050 static int user_buffer_init(struct trace_user_buf_info **tinfo, size_t size)
6051 {
6052 	bool alloc = false;
6053 	int ret;
6054 
6055 	lockdep_assert_held(&trace_user_buffer_mutex);
6056 
6057 	if (!*tinfo) {
6058 		alloc = true;
6059 		*tinfo = kzalloc_obj(**tinfo);
6060 		if (!*tinfo)
6061 			return -ENOMEM;
6062 	}
6063 
6064 	ret = user_fault_buffer_enable(*tinfo, size);
6065 	if (ret < 0 && alloc)
6066 		user_buffer_free(tinfo);
6067 
6068 	return ret;
6069 }
6070 
6071 /* For internal use, derefrence and free if necessary */
6072 static void user_buffer_put(struct trace_user_buf_info **tinfo)
6073 {
6074 	guard(mutex)(&trace_user_buffer_mutex);
6075 
6076 	if (WARN_ON_ONCE(!*tinfo || !(*tinfo)->ref))
6077 		return;
6078 
6079 	if (--(*tinfo)->ref)
6080 		return;
6081 
6082 	user_buffer_free(tinfo);
6083 }
6084 
6085 /**
6086  * trace_user_fault_init - Allocated or reference a per CPU buffer
6087  * @tinfo: A pointer to the trace buffer descriptor
6088  * @size: The size to allocate each per CPU buffer
6089  *
6090  * Create a per CPU buffer that can be used to copy from user space
6091  * in a task context. When calling trace_user_fault_read(), preemption
6092  * must be disabled, and it will enable preemption and copy user
6093  * space data to the buffer. If any schedule switches occur, it will
6094  * retry until it succeeds without a schedule switch knowing the buffer
6095  * is still valid.
6096  *
6097  * Returns 0 on success, negative on failure.
6098  */
6099 int trace_user_fault_init(struct trace_user_buf_info *tinfo, size_t size)
6100 {
6101 	int ret;
6102 
6103 	if (!tinfo)
6104 		return -EINVAL;
6105 
6106 	guard(mutex)(&trace_user_buffer_mutex);
6107 
6108 	ret = user_buffer_init(&tinfo, size);
6109 	if (ret < 0)
6110 		trace_user_fault_destroy(tinfo);
6111 
6112 	return ret;
6113 }
6114 
6115 /**
6116  * trace_user_fault_get - up the ref count for the user buffer
6117  * @tinfo: A pointer to a pointer to the trace buffer descriptor
6118  *
6119  * Ups the ref count of the trace buffer.
6120  *
6121  * Returns the new ref count.
6122  */
6123 int trace_user_fault_get(struct trace_user_buf_info *tinfo)
6124 {
6125 	if (!tinfo)
6126 		return -1;
6127 
6128 	guard(mutex)(&trace_user_buffer_mutex);
6129 
6130 	tinfo->ref++;
6131 	return tinfo->ref;
6132 }
6133 
6134 /**
6135  * trace_user_fault_put - dereference a per cpu trace buffer
6136  * @tinfo: The @tinfo that was passed to trace_user_fault_get()
6137  *
6138  * Decrement the ref count of @tinfo.
6139  *
6140  * Returns the new refcount (negative on error).
6141  */
6142 int trace_user_fault_put(struct trace_user_buf_info *tinfo)
6143 {
6144 	guard(mutex)(&trace_user_buffer_mutex);
6145 
6146 	if (WARN_ON_ONCE(!tinfo || !tinfo->ref))
6147 		return -1;
6148 
6149 	--tinfo->ref;
6150 	return tinfo->ref;
6151 }
6152 
6153 /**
6154  * trace_user_fault_read - Read user space into a per CPU buffer
6155  * @tinfo: The @tinfo allocated by trace_user_fault_get()
6156  * @ptr: The user space pointer to read
6157  * @size: The size of user space to read.
6158  * @copy_func: Optional function to use to copy from user space
6159  * @data: Data to pass to copy_func if it was supplied
6160  *
6161  * Preemption must be disabled when this is called, and must not
6162  * be enabled while using the returned buffer.
6163  * This does the copying from user space into a per CPU buffer.
6164  *
6165  * The @size must not be greater than the size passed in to
6166  * trace_user_fault_init().
6167  *
6168  * If @copy_func is NULL, trace_user_fault_read() will use copy_from_user(),
6169  * otherwise it will call @copy_func. It will call @copy_func with:
6170  *
6171  *   buffer: the per CPU buffer of the @tinfo.
6172  *   ptr: The pointer @ptr to user space to read
6173  *   size: The @size of the ptr to read
6174  *   data: The @data parameter
6175  *
6176  * It is expected that @copy_func will return 0 on success and non zero
6177  * if there was a fault.
6178  *
6179  * Returns a pointer to the buffer with the content read from @ptr.
6180  *   Preemption must remain disabled while the caller accesses the
6181  *   buffer returned by this function.
6182  * Returns NULL if there was a fault, or the size passed in is
6183  *   greater than the size passed to trace_user_fault_init().
6184  */
6185 char *trace_user_fault_read(struct trace_user_buf_info *tinfo,
6186 			     const char __user *ptr, size_t size,
6187 			     trace_user_buf_copy copy_func, void *data)
6188 {
6189 	int cpu = smp_processor_id();
6190 	char *buffer = per_cpu_ptr(tinfo->tbuf, cpu)->buf;
6191 	unsigned int cnt;
6192 	int trys = 0;
6193 	int ret;
6194 
6195 	lockdep_assert_preemption_disabled();
6196 
6197 	/*
6198 	 * It's up to the caller to not try to copy more than it said
6199 	 * it would.
6200 	 */
6201 	if (size > tinfo->size)
6202 		return NULL;
6203 
6204 	/*
6205 	 * This acts similar to a seqcount. The per CPU context switches are
6206 	 * recorded, migration is disabled and preemption is enabled. The
6207 	 * read of the user space memory is copied into the per CPU buffer.
6208 	 * Preemption is disabled again, and if the per CPU context switches count
6209 	 * is still the same, it means the buffer has not been corrupted.
6210 	 * If the count is different, it is assumed the buffer is corrupted
6211 	 * and reading must be tried again.
6212 	 */
6213 
6214 	do {
6215 		/*
6216 		 * It is possible that something is trying to migrate this
6217 		 * task. What happens then, is when preemption is enabled,
6218 		 * the migration thread will preempt this task, try to
6219 		 * migrate it, fail, then let it run again. That will
6220 		 * cause this to loop again and never succeed.
6221 		 * On failures, enabled and disable preemption with
6222 		 * migration enabled, to allow the migration thread to
6223 		 * migrate this task.
6224 		 */
6225 		if (trys) {
6226 			preempt_enable_notrace();
6227 			preempt_disable_notrace();
6228 			cpu = smp_processor_id();
6229 			buffer = per_cpu_ptr(tinfo->tbuf, cpu)->buf;
6230 		}
6231 
6232 		/*
6233 		 * If for some reason, copy_from_user() always causes a context
6234 		 * switch, this would then cause an infinite loop.
6235 		 * If this task is preempted by another user space task, it
6236 		 * will cause this task to try again. But just in case something
6237 		 * changes where the copying from user space causes another task
6238 		 * to run, prevent this from going into an infinite loop.
6239 		 * 100 tries should be plenty.
6240 		 */
6241 		if (WARN_ONCE(trys++ > 100, "Error: Too many tries to read user space"))
6242 			return NULL;
6243 
6244 		/* Read the current CPU context switch counter */
6245 		cnt = nr_context_switches_cpu(cpu);
6246 
6247 		/*
6248 		 * Preemption is going to be enabled, but this task must
6249 		 * remain on this CPU.
6250 		 */
6251 		migrate_disable();
6252 
6253 		/*
6254 		 * Now preemption is being enabled and another task can come in
6255 		 * and use the same buffer and corrupt our data.
6256 		 */
6257 		preempt_enable_notrace();
6258 
6259 		/* Make sure preemption is enabled here */
6260 		lockdep_assert_preemption_enabled();
6261 
6262 		if (copy_func) {
6263 			ret = copy_func(buffer, ptr, size, data);
6264 		} else {
6265 			ret = __copy_from_user(buffer, ptr, size);
6266 		}
6267 
6268 		preempt_disable_notrace();
6269 		migrate_enable();
6270 
6271 		/* if it faulted, no need to test if the buffer was corrupted */
6272 		if (ret)
6273 			return NULL;
6274 
6275 		/*
6276 		 * Preemption is disabled again, now check the per CPU context
6277 		 * switch counter. If it doesn't match, then another user space
6278 		 * process may have schedule in and corrupted our buffer. In that
6279 		 * case the copying must be retried.
6280 		 */
6281 	} while (nr_context_switches_cpu(cpu) != cnt);
6282 
6283 	return buffer;
6284 }
6285 
6286 static ssize_t
6287 tracing_mark_write(struct file *filp, const char __user *ubuf,
6288 					size_t cnt, loff_t *fpos)
6289 {
6290 	struct trace_array *tr = filp->private_data;
6291 	ssize_t written = -ENODEV;
6292 	unsigned long ip;
6293 	char *buf;
6294 
6295 	if (unlikely(tracing_disabled))
6296 		return -EINVAL;
6297 
6298 	if (!(tr->trace_flags & TRACE_ITER(MARKERS)))
6299 		return -EINVAL;
6300 
6301 	if ((ssize_t)cnt < 0)
6302 		return -EINVAL;
6303 
6304 	if (cnt > TRACE_MARKER_MAX_SIZE)
6305 		cnt = TRACE_MARKER_MAX_SIZE;
6306 
6307 	/* Must have preemption disabled while having access to the buffer */
6308 	guard(preempt_notrace)();
6309 
6310 	buf = trace_user_fault_read(trace_user_buffer, ubuf, cnt, NULL, NULL);
6311 	if (!buf)
6312 		return -EFAULT;
6313 
6314 	/* The selftests expect this function to be the IP address */
6315 	ip = _THIS_IP_;
6316 
6317 	/* The global trace_marker can go to multiple instances */
6318 	if (tr == &global_trace) {
6319 		guard(rcu)();
6320 		list_for_each_entry_rcu(tr, &marker_copies, marker_list) {
6321 			written = write_marker_to_buffer(tr, buf, cnt, ip);
6322 			if (written < 0)
6323 				break;
6324 		}
6325 	} else {
6326 		written = write_marker_to_buffer(tr, buf, cnt, ip);
6327 	}
6328 
6329 	return written;
6330 }
6331 
6332 static ssize_t write_raw_marker_to_buffer(struct trace_array *tr,
6333 					  const char *buf, size_t cnt)
6334 {
6335 	struct ring_buffer_event *event;
6336 	struct trace_buffer *buffer;
6337 	struct raw_data_entry *entry;
6338 	ssize_t written;
6339 	size_t size;
6340 
6341 	/* cnt includes both the entry->id and the data behind it. */
6342 	size = struct_offset(entry, id) + cnt;
6343 
6344 	buffer = tr->array_buffer.buffer;
6345 
6346 	if (size > ring_buffer_max_event_size(buffer))
6347 		return -EINVAL;
6348 
6349 	event = __trace_buffer_lock_reserve(buffer, TRACE_RAW_DATA, size,
6350 					    tracing_gen_ctx());
6351 	if (!event)
6352 		/* Ring buffer disabled, return as if not open for write */
6353 		return -EBADF;
6354 
6355 	entry = ring_buffer_event_data(event);
6356 	unsafe_memcpy(&entry->id, buf, cnt,
6357 		      "id and content already reserved on ring buffer"
6358 		      "'buf' includes the 'id' and the data."
6359 		      "'entry' was allocated with cnt from 'id'.");
6360 	written = cnt;
6361 
6362 	__buffer_unlock_commit(buffer, event);
6363 
6364 	return written;
6365 }
6366 
6367 static ssize_t
6368 tracing_mark_raw_write(struct file *filp, const char __user *ubuf,
6369 					size_t cnt, loff_t *fpos)
6370 {
6371 	struct trace_array *tr = filp->private_data;
6372 	ssize_t written = -ENODEV;
6373 	char *buf;
6374 
6375 	if (unlikely(tracing_disabled))
6376 		return -EINVAL;
6377 
6378 	if (!(tr->trace_flags & TRACE_ITER(MARKERS)))
6379 		return -EINVAL;
6380 
6381 	/* The marker must at least have a tag id */
6382 	if (cnt < sizeof(unsigned int))
6383 		return -EINVAL;
6384 
6385 	/* raw write is all or nothing */
6386 	if (cnt > TRACE_MARKER_MAX_SIZE)
6387 		return -EINVAL;
6388 
6389 	/* Must have preemption disabled while having access to the buffer */
6390 	guard(preempt_notrace)();
6391 
6392 	buf = trace_user_fault_read(trace_user_buffer, ubuf, cnt, NULL, NULL);
6393 	if (!buf)
6394 		return -EFAULT;
6395 
6396 	/* The global trace_marker_raw can go to multiple instances */
6397 	if (tr == &global_trace) {
6398 		guard(rcu)();
6399 		list_for_each_entry_rcu(tr, &marker_copies, marker_list) {
6400 			written = write_raw_marker_to_buffer(tr, buf, cnt);
6401 			if (written < 0)
6402 				break;
6403 		}
6404 	} else {
6405 		written = write_raw_marker_to_buffer(tr, buf, cnt);
6406 	}
6407 
6408 	return written;
6409 }
6410 
6411 static int tracing_mark_open(struct inode *inode, struct file *filp)
6412 {
6413 	int ret;
6414 
6415 	scoped_guard(mutex, &trace_user_buffer_mutex) {
6416 		if (!trace_user_buffer) {
6417 			ret = user_buffer_init(&trace_user_buffer, TRACE_MARKER_MAX_SIZE);
6418 			if (ret < 0)
6419 				return ret;
6420 		} else {
6421 			trace_user_buffer->ref++;
6422 		}
6423 	}
6424 
6425 	stream_open(inode, filp);
6426 	ret = tracing_open_generic_tr(inode, filp);
6427 	if (ret < 0)
6428 		user_buffer_put(&trace_user_buffer);
6429 	return ret;
6430 }
6431 
6432 static int tracing_mark_release(struct inode *inode, struct file *file)
6433 {
6434 	user_buffer_put(&trace_user_buffer);
6435 	return tracing_release_generic_tr(inode, file);
6436 }
6437 
6438 static int tracing_clock_show(struct seq_file *m, void *v)
6439 {
6440 	struct trace_array *tr = m->private;
6441 	int i;
6442 
6443 	for (i = 0; i < ARRAY_SIZE(trace_clocks); i++)
6444 		seq_printf(m,
6445 			"%s%s%s%s", i ? " " : "",
6446 			i == tr->clock_id ? "[" : "", trace_clocks[i].name,
6447 			i == tr->clock_id ? "]" : "");
6448 	seq_putc(m, '\n');
6449 
6450 	return 0;
6451 }
6452 
6453 int tracing_set_clock(struct trace_array *tr, const char *clockstr)
6454 {
6455 	int i;
6456 
6457 	for (i = 0; i < ARRAY_SIZE(trace_clocks); i++) {
6458 		if (strcmp(trace_clocks[i].name, clockstr) == 0)
6459 			break;
6460 	}
6461 	if (i == ARRAY_SIZE(trace_clocks))
6462 		return -EINVAL;
6463 
6464 	guard(mutex)(&trace_types_lock);
6465 
6466 	tr->clock_id = i;
6467 
6468 	ring_buffer_set_clock(tr->array_buffer.buffer, trace_clocks[i].func);
6469 
6470 	/*
6471 	 * New clock may not be consistent with the previous clock.
6472 	 * Reset the buffer so that it doesn't have incomparable timestamps.
6473 	 */
6474 	tracing_reset_online_cpus(&tr->array_buffer);
6475 
6476 #ifdef CONFIG_TRACER_SNAPSHOT
6477 	if (tr->snapshot_buffer.buffer)
6478 		ring_buffer_set_clock(tr->snapshot_buffer.buffer, trace_clocks[i].func);
6479 	tracing_reset_online_cpus(&tr->snapshot_buffer);
6480 #endif
6481 	update_last_data_if_empty(tr);
6482 
6483 	if (tr->scratch && !(tr->flags & TRACE_ARRAY_FL_LAST_BOOT)) {
6484 		struct trace_scratch *tscratch = tr->scratch;
6485 
6486 		tscratch->clock_id = i;
6487 	}
6488 
6489 	return 0;
6490 }
6491 
6492 static ssize_t tracing_clock_write(struct file *filp, const char __user *ubuf,
6493 				   size_t cnt, loff_t *fpos)
6494 {
6495 	struct seq_file *m = filp->private_data;
6496 	struct trace_array *tr = m->private;
6497 	char buf[64];
6498 	const char *clockstr;
6499 	int ret;
6500 
6501 	if (cnt >= sizeof(buf))
6502 		return -EINVAL;
6503 
6504 	if (copy_from_user(buf, ubuf, cnt))
6505 		return -EFAULT;
6506 
6507 	buf[cnt] = 0;
6508 
6509 	clockstr = strstrip(buf);
6510 
6511 	ret = tracing_set_clock(tr, clockstr);
6512 	if (ret)
6513 		return ret;
6514 
6515 	*fpos += cnt;
6516 
6517 	return cnt;
6518 }
6519 
6520 static int tracing_clock_open(struct inode *inode, struct file *file)
6521 {
6522 	struct trace_array *tr = inode->i_private;
6523 	int ret;
6524 
6525 	ret = tracing_check_open_get_tr(tr);
6526 	if (ret)
6527 		return ret;
6528 
6529 	if ((file->f_mode & FMODE_WRITE) && trace_array_is_readonly(tr)) {
6530 		trace_array_put(tr);
6531 		return -EACCES;
6532 	}
6533 
6534 	ret = single_open(file, tracing_clock_show, inode->i_private);
6535 	if (ret < 0)
6536 		trace_array_put(tr);
6537 
6538 	return ret;
6539 }
6540 
6541 static int tracing_time_stamp_mode_show(struct seq_file *m, void *v)
6542 {
6543 	struct trace_array *tr = m->private;
6544 
6545 	guard(mutex)(&trace_types_lock);
6546 
6547 	if (ring_buffer_time_stamp_abs(tr->array_buffer.buffer))
6548 		seq_puts(m, "delta [absolute]\n");
6549 	else
6550 		seq_puts(m, "[delta] absolute\n");
6551 
6552 	return 0;
6553 }
6554 
6555 static int tracing_time_stamp_mode_open(struct inode *inode, struct file *file)
6556 {
6557 	struct trace_array *tr = inode->i_private;
6558 	int ret;
6559 
6560 	ret = tracing_check_open_get_tr(tr);
6561 	if (ret)
6562 		return ret;
6563 
6564 	ret = single_open(file, tracing_time_stamp_mode_show, inode->i_private);
6565 	if (ret < 0)
6566 		trace_array_put(tr);
6567 
6568 	return ret;
6569 }
6570 
6571 u64 tracing_event_time_stamp(struct trace_buffer *buffer, struct ring_buffer_event *rbe)
6572 {
6573 	if (rbe == this_cpu_read(trace_buffered_event))
6574 		return ring_buffer_time_stamp(buffer);
6575 
6576 	return ring_buffer_event_time_stamp(buffer, rbe);
6577 }
6578 
6579 static const struct file_operations tracing_thresh_fops = {
6580 	.open		= tracing_open_generic,
6581 	.read		= tracing_thresh_read,
6582 	.write		= tracing_thresh_write,
6583 	.llseek		= generic_file_llseek,
6584 };
6585 
6586 static const struct file_operations set_tracer_fops = {
6587 	.open		= tracing_open_generic_tr,
6588 	.read		= tracing_set_trace_read,
6589 	.write		= tracing_set_trace_write,
6590 	.llseek		= generic_file_llseek,
6591 	.release	= tracing_release_generic_tr,
6592 };
6593 
6594 static const struct file_operations tracing_pipe_fops = {
6595 	.open		= tracing_open_pipe,
6596 	.poll		= tracing_poll_pipe,
6597 	.read		= tracing_read_pipe,
6598 	.splice_read	= tracing_splice_read_pipe,
6599 	.release	= tracing_release_pipe,
6600 };
6601 
6602 static const struct file_operations tracing_entries_fops = {
6603 	.open		= tracing_open_generic_tr,
6604 	.read		= tracing_entries_read,
6605 	.write		= tracing_entries_write,
6606 	.llseek		= generic_file_llseek,
6607 	.release	= tracing_release_generic_tr,
6608 };
6609 
6610 static const struct file_operations tracing_syscall_buf_fops = {
6611 	.open		= tracing_open_generic_tr,
6612 	.read		= tracing_syscall_buf_read,
6613 	.write		= tracing_syscall_buf_write,
6614 	.llseek		= generic_file_llseek,
6615 	.release	= tracing_release_generic_tr,
6616 };
6617 
6618 static const struct file_operations tracing_buffer_meta_fops = {
6619 	.open		= tracing_buffer_meta_open,
6620 	.read		= seq_read,
6621 	.llseek		= seq_lseek,
6622 	.release	= tracing_seq_release,
6623 };
6624 
6625 static const struct file_operations tracing_total_entries_fops = {
6626 	.open		= tracing_open_generic_tr,
6627 	.read		= tracing_total_entries_read,
6628 	.llseek		= generic_file_llseek,
6629 	.release	= tracing_release_generic_tr,
6630 };
6631 
6632 static const struct file_operations tracing_free_buffer_fops = {
6633 	.open		= tracing_open_generic_tr,
6634 	.write		= tracing_free_buffer_write,
6635 	.release	= tracing_free_buffer_release,
6636 };
6637 
6638 static const struct file_operations tracing_mark_fops = {
6639 	.open		= tracing_mark_open,
6640 	.write		= tracing_mark_write,
6641 	.release	= tracing_mark_release,
6642 };
6643 
6644 static const struct file_operations tracing_mark_raw_fops = {
6645 	.open		= tracing_mark_open,
6646 	.write		= tracing_mark_raw_write,
6647 	.release	= tracing_mark_release,
6648 };
6649 
6650 static const struct file_operations trace_clock_fops = {
6651 	.open		= tracing_clock_open,
6652 	.read		= seq_read,
6653 	.llseek		= seq_lseek,
6654 	.release	= tracing_single_release_tr,
6655 	.write		= tracing_clock_write,
6656 };
6657 
6658 static const struct file_operations trace_time_stamp_mode_fops = {
6659 	.open		= tracing_time_stamp_mode_open,
6660 	.read		= seq_read,
6661 	.llseek		= seq_lseek,
6662 	.release	= tracing_single_release_tr,
6663 };
6664 
6665 static const struct file_operations last_boot_fops = {
6666 	.open		= tracing_last_boot_open,
6667 	.read		= seq_read,
6668 	.llseek		= seq_lseek,
6669 	.release	= tracing_seq_release,
6670 };
6671 
6672 /*
6673  * trace_min_max_write - Write a u64 value to a trace_min_max_param struct
6674  * @filp: The active open file structure
6675  * @ubuf: The userspace provided buffer to read value into
6676  * @cnt: The maximum number of bytes to read
6677  * @ppos: The current "file" position
6678  *
6679  * This function implements the write interface for a struct trace_min_max_param.
6680  * The filp->private_data must point to a trace_min_max_param structure that
6681  * defines where to write the value, the min and the max acceptable values,
6682  * and a lock to protect the write.
6683  */
6684 static ssize_t
6685 trace_min_max_write(struct file *filp, const char __user *ubuf, size_t cnt, loff_t *ppos)
6686 {
6687 	struct trace_min_max_param *param = filp->private_data;
6688 	u64 val;
6689 	int err;
6690 
6691 	if (!param)
6692 		return -EFAULT;
6693 
6694 	err = kstrtoull_from_user(ubuf, cnt, 10, &val);
6695 	if (err)
6696 		return err;
6697 
6698 	if (param->lock)
6699 		mutex_lock(param->lock);
6700 
6701 	if (param->min && val < *param->min)
6702 		err = -EINVAL;
6703 
6704 	if (param->max && val > *param->max)
6705 		err = -EINVAL;
6706 
6707 	if (!err)
6708 		*param->val = val;
6709 
6710 	if (param->lock)
6711 		mutex_unlock(param->lock);
6712 
6713 	if (err)
6714 		return err;
6715 
6716 	return cnt;
6717 }
6718 
6719 /*
6720  * trace_min_max_read - Read a u64 value from a trace_min_max_param struct
6721  * @filp: The active open file structure
6722  * @ubuf: The userspace provided buffer to read value into
6723  * @cnt: The maximum number of bytes to read
6724  * @ppos: The current "file" position
6725  *
6726  * This function implements the read interface for a struct trace_min_max_param.
6727  * The filp->private_data must point to a trace_min_max_param struct with valid
6728  * data.
6729  */
6730 static ssize_t
6731 trace_min_max_read(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos)
6732 {
6733 	struct trace_min_max_param *param = filp->private_data;
6734 	char buf[U64_STR_SIZE];
6735 	int len;
6736 	u64 val;
6737 
6738 	if (!param)
6739 		return -EFAULT;
6740 
6741 	val = *param->val;
6742 
6743 	if (cnt > sizeof(buf))
6744 		cnt = sizeof(buf);
6745 
6746 	len = snprintf(buf, sizeof(buf), "%llu\n", val);
6747 
6748 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, len);
6749 }
6750 
6751 const struct file_operations trace_min_max_fops = {
6752 	.open		= tracing_open_generic,
6753 	.read		= trace_min_max_read,
6754 	.write		= trace_min_max_write,
6755 };
6756 
6757 #define TRACING_LOG_ERRS_MAX	8
6758 #define TRACING_LOG_LOC_MAX	128
6759 
6760 #define CMD_PREFIX "  Command: "
6761 
6762 struct err_info {
6763 	const char	**errs;	/* ptr to loc-specific array of err strings */
6764 	u8		type;	/* index into errs -> specific err string */
6765 	u16		pos;	/* caret position */
6766 	u64		ts;
6767 };
6768 
6769 struct tracing_log_err {
6770 	struct list_head	list;
6771 	struct err_info		info;
6772 	char			loc[TRACING_LOG_LOC_MAX]; /* err location */
6773 	char			*cmd;                     /* what caused err */
6774 };
6775 
6776 static DEFINE_MUTEX(tracing_err_log_lock);
6777 
6778 static struct tracing_log_err *alloc_tracing_log_err(int len)
6779 {
6780 	struct tracing_log_err *err;
6781 
6782 	err = kzalloc_obj(*err);
6783 	if (!err)
6784 		return ERR_PTR(-ENOMEM);
6785 
6786 	err->cmd = kzalloc(len, GFP_KERNEL);
6787 	if (!err->cmd) {
6788 		kfree(err);
6789 		return ERR_PTR(-ENOMEM);
6790 	}
6791 
6792 	return err;
6793 }
6794 
6795 static void free_tracing_log_err(struct tracing_log_err *err)
6796 {
6797 	kfree(err->cmd);
6798 	kfree(err);
6799 }
6800 
6801 static struct tracing_log_err *get_tracing_log_err(struct trace_array *tr,
6802 						   int len)
6803 {
6804 	struct tracing_log_err *err;
6805 	char *cmd;
6806 
6807 	if (tr->n_err_log_entries < TRACING_LOG_ERRS_MAX) {
6808 		err = alloc_tracing_log_err(len);
6809 		if (PTR_ERR(err) != -ENOMEM)
6810 			tr->n_err_log_entries++;
6811 
6812 		return err;
6813 	}
6814 	cmd = kzalloc(len, GFP_KERNEL);
6815 	if (!cmd)
6816 		return ERR_PTR(-ENOMEM);
6817 	err = list_first_entry(&tr->err_log, struct tracing_log_err, list);
6818 	kfree(err->cmd);
6819 	err->cmd = cmd;
6820 	list_del(&err->list);
6821 
6822 	return err;
6823 }
6824 
6825 /**
6826  * err_pos - find the position of a string within a command for error careting
6827  * @cmd: The tracing command that caused the error
6828  * @str: The string to position the caret at within @cmd
6829  *
6830  * Finds the position of the first occurrence of @str within @cmd.  The
6831  * return value can be passed to tracing_log_err() for caret placement
6832  * within @cmd.
6833  *
6834  * Returns the index within @cmd of the first occurrence of @str or 0
6835  * if @str was not found.
6836  */
6837 unsigned int err_pos(char *cmd, const char *str)
6838 {
6839 	char *found;
6840 
6841 	if (WARN_ON(!strlen(cmd)))
6842 		return 0;
6843 
6844 	found = strstr(cmd, str);
6845 	if (found)
6846 		return found - cmd;
6847 
6848 	return 0;
6849 }
6850 
6851 /**
6852  * tracing_log_err - write an error to the tracing error log
6853  * @tr: The associated trace array for the error (NULL for top level array)
6854  * @loc: A string describing where the error occurred
6855  * @cmd: The tracing command that caused the error
6856  * @errs: The array of loc-specific static error strings
6857  * @type: The index into errs[], which produces the specific static err string
6858  * @pos: The position the caret should be placed in the cmd
6859  *
6860  * Writes an error into tracing/error_log of the form:
6861  *
6862  * <loc>: error: <text>
6863  *   Command: <cmd>
6864  *              ^
6865  *
6866  * tracing/error_log is a small log file containing the last
6867  * TRACING_LOG_ERRS_MAX errors (8).  Memory for errors isn't allocated
6868  * unless there has been a tracing error, and the error log can be
6869  * cleared and have its memory freed by writing the empty string in
6870  * truncation mode to it i.e. echo > tracing/error_log.
6871  *
6872  * NOTE: the @errs array along with the @type param are used to
6873  * produce a static error string - this string is not copied and saved
6874  * when the error is logged - only a pointer to it is saved.  See
6875  * existing callers for examples of how static strings are typically
6876  * defined for use with tracing_log_err().
6877  */
6878 void tracing_log_err(struct trace_array *tr,
6879 		     const char *loc, const char *cmd,
6880 		     const char **errs, u8 type, u16 pos)
6881 {
6882 	struct tracing_log_err *err;
6883 	int len = 0;
6884 
6885 	if (!tr)
6886 		tr = &global_trace;
6887 
6888 	len += sizeof(CMD_PREFIX) + 2 * sizeof("\n") + strlen(cmd) + 1;
6889 
6890 	guard(mutex)(&tracing_err_log_lock);
6891 
6892 	err = get_tracing_log_err(tr, len);
6893 	if (PTR_ERR(err) == -ENOMEM)
6894 		return;
6895 
6896 	snprintf(err->loc, TRACING_LOG_LOC_MAX, "%s: error: ", loc);
6897 	snprintf(err->cmd, len, "\n" CMD_PREFIX "%s\n", cmd);
6898 
6899 	err->info.errs = errs;
6900 	err->info.type = type;
6901 	err->info.pos = pos;
6902 	err->info.ts = local_clock();
6903 
6904 	list_add_tail(&err->list, &tr->err_log);
6905 }
6906 
6907 static void clear_tracing_err_log(struct trace_array *tr)
6908 {
6909 	struct tracing_log_err *err, *next;
6910 
6911 	guard(mutex)(&tracing_err_log_lock);
6912 
6913 	list_for_each_entry_safe(err, next, &tr->err_log, list) {
6914 		list_del(&err->list);
6915 		free_tracing_log_err(err);
6916 	}
6917 
6918 	tr->n_err_log_entries = 0;
6919 }
6920 
6921 static void *tracing_err_log_seq_start(struct seq_file *m, loff_t *pos)
6922 {
6923 	struct trace_array *tr = m->private;
6924 
6925 	mutex_lock(&tracing_err_log_lock);
6926 
6927 	return seq_list_start(&tr->err_log, *pos);
6928 }
6929 
6930 static void *tracing_err_log_seq_next(struct seq_file *m, void *v, loff_t *pos)
6931 {
6932 	struct trace_array *tr = m->private;
6933 
6934 	return seq_list_next(v, &tr->err_log, pos);
6935 }
6936 
6937 static void tracing_err_log_seq_stop(struct seq_file *m, void *v)
6938 {
6939 	mutex_unlock(&tracing_err_log_lock);
6940 }
6941 
6942 static void tracing_err_log_show_pos(struct seq_file *m, u16 pos)
6943 {
6944 	u16 i;
6945 
6946 	for (i = 0; i < sizeof(CMD_PREFIX) - 1; i++)
6947 		seq_putc(m, ' ');
6948 	for (i = 0; i < pos; i++)
6949 		seq_putc(m, ' ');
6950 	seq_puts(m, "^\n");
6951 }
6952 
6953 static int tracing_err_log_seq_show(struct seq_file *m, void *v)
6954 {
6955 	struct tracing_log_err *err = v;
6956 
6957 	if (err) {
6958 		const char *err_text = err->info.errs[err->info.type];
6959 		u64 sec = err->info.ts;
6960 		u32 nsec;
6961 
6962 		nsec = do_div(sec, NSEC_PER_SEC);
6963 		seq_printf(m, "[%5llu.%06u] %s%s", sec, nsec / 1000,
6964 			   err->loc, err_text);
6965 		seq_printf(m, "%s", err->cmd);
6966 		tracing_err_log_show_pos(m, err->info.pos);
6967 	}
6968 
6969 	return 0;
6970 }
6971 
6972 static const struct seq_operations tracing_err_log_seq_ops = {
6973 	.start  = tracing_err_log_seq_start,
6974 	.next   = tracing_err_log_seq_next,
6975 	.stop   = tracing_err_log_seq_stop,
6976 	.show   = tracing_err_log_seq_show
6977 };
6978 
6979 static int tracing_err_log_open(struct inode *inode, struct file *file)
6980 {
6981 	struct trace_array *tr = inode->i_private;
6982 	int ret = 0;
6983 
6984 	ret = tracing_check_open_get_tr(tr);
6985 	if (ret)
6986 		return ret;
6987 
6988 	/* If this file was opened for write, then erase contents */
6989 	if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC))
6990 		clear_tracing_err_log(tr);
6991 
6992 	if (file->f_mode & FMODE_READ) {
6993 		ret = seq_open(file, &tracing_err_log_seq_ops);
6994 		if (!ret) {
6995 			struct seq_file *m = file->private_data;
6996 			m->private = tr;
6997 		} else {
6998 			trace_array_put(tr);
6999 		}
7000 	}
7001 	return ret;
7002 }
7003 
7004 static ssize_t tracing_err_log_write(struct file *file,
7005 				     const char __user *buffer,
7006 				     size_t count, loff_t *ppos)
7007 {
7008 	return count;
7009 }
7010 
7011 static int tracing_err_log_release(struct inode *inode, struct file *file)
7012 {
7013 	struct trace_array *tr = inode->i_private;
7014 
7015 	trace_array_put(tr);
7016 
7017 	if (file->f_mode & FMODE_READ)
7018 		seq_release(inode, file);
7019 
7020 	return 0;
7021 }
7022 
7023 static const struct file_operations tracing_err_log_fops = {
7024 	.open           = tracing_err_log_open,
7025 	.write		= tracing_err_log_write,
7026 	.read           = seq_read,
7027 	.llseek         = tracing_lseek,
7028 	.release        = tracing_err_log_release,
7029 };
7030 
7031 int tracing_buffers_open(struct inode *inode, struct file *filp)
7032 {
7033 	struct trace_array *tr = inode->i_private;
7034 	struct ftrace_buffer_info *info;
7035 	int ret;
7036 
7037 	ret = tracing_check_open_get_tr(tr);
7038 	if (ret)
7039 		return ret;
7040 
7041 	info = kvzalloc_obj(*info);
7042 	if (!info) {
7043 		trace_array_put(tr);
7044 		return -ENOMEM;
7045 	}
7046 
7047 	mutex_lock(&trace_types_lock);
7048 
7049 	info->iter.tr		= tr;
7050 	info->iter.cpu_file	= tracing_get_cpu(inode);
7051 	info->iter.trace	= tr->current_trace;
7052 	info->iter.array_buffer = &tr->array_buffer;
7053 	info->spare		= NULL;
7054 	/* Force reading ring buffer for first read */
7055 	info->read		= (unsigned int)-1;
7056 
7057 	filp->private_data = info;
7058 
7059 	tr->trace_ref++;
7060 
7061 	mutex_unlock(&trace_types_lock);
7062 
7063 	ret = nonseekable_open(inode, filp);
7064 	if (ret < 0)
7065 		trace_array_put(tr);
7066 
7067 	return ret;
7068 }
7069 
7070 static __poll_t
7071 tracing_buffers_poll(struct file *filp, poll_table *poll_table)
7072 {
7073 	struct ftrace_buffer_info *info = filp->private_data;
7074 	struct trace_iterator *iter = &info->iter;
7075 
7076 	return trace_poll(iter, filp, poll_table);
7077 }
7078 
7079 ssize_t tracing_buffers_read(struct file *filp, char __user *ubuf,
7080 			     size_t count, loff_t *ppos)
7081 {
7082 	struct ftrace_buffer_info *info = filp->private_data;
7083 	struct trace_iterator *iter = &info->iter;
7084 	void *trace_data;
7085 	int page_size;
7086 	ssize_t ret = 0;
7087 	ssize_t size;
7088 
7089 	if (!count)
7090 		return 0;
7091 
7092 	if (iter->snapshot && tracer_uses_snapshot(iter->tr->current_trace))
7093 		return -EBUSY;
7094 
7095 	page_size = ring_buffer_subbuf_size_get(iter->array_buffer->buffer);
7096 
7097 	/* Make sure the spare matches the current sub buffer size */
7098 	if (info->spare) {
7099 		if (page_size != info->spare_size) {
7100 			ring_buffer_free_read_page(iter->array_buffer->buffer,
7101 						   info->spare_cpu, info->spare);
7102 			info->spare = NULL;
7103 		}
7104 	}
7105 
7106 	if (!info->spare) {
7107 		info->spare = ring_buffer_alloc_read_page(iter->array_buffer->buffer,
7108 							  iter->cpu_file);
7109 		if (IS_ERR(info->spare)) {
7110 			ret = PTR_ERR(info->spare);
7111 			info->spare = NULL;
7112 		} else {
7113 			info->spare_cpu = iter->cpu_file;
7114 			info->spare_size = page_size;
7115 		}
7116 	}
7117 	if (!info->spare)
7118 		return ret;
7119 
7120 	/* Do we have previous read data to read? */
7121 	if (info->read < page_size)
7122 		goto read;
7123 
7124  again:
7125 	trace_access_lock(iter->cpu_file);
7126 	ret = ring_buffer_read_page(iter->array_buffer->buffer,
7127 				    info->spare,
7128 				    count,
7129 				    iter->cpu_file, 0);
7130 	trace_access_unlock(iter->cpu_file);
7131 
7132 	if (ret < 0) {
7133 		if (trace_empty(iter) && !iter->closed) {
7134 			if (update_last_data_if_empty(iter->tr))
7135 				return 0;
7136 
7137 			if ((filp->f_flags & O_NONBLOCK))
7138 				return -EAGAIN;
7139 
7140 			ret = wait_on_pipe(iter, 0);
7141 			if (ret)
7142 				return ret;
7143 
7144 			goto again;
7145 		}
7146 		return 0;
7147 	}
7148 
7149 	info->read = 0;
7150  read:
7151 	size = page_size - info->read;
7152 	if (size > count)
7153 		size = count;
7154 	trace_data = ring_buffer_read_page_data(info->spare);
7155 	ret = copy_to_user(ubuf, trace_data + info->read, size);
7156 	if (ret == size)
7157 		return -EFAULT;
7158 
7159 	size -= ret;
7160 
7161 	*ppos += size;
7162 	info->read += size;
7163 
7164 	return size;
7165 }
7166 
7167 static int tracing_buffers_flush(struct file *file, fl_owner_t id)
7168 {
7169 	struct ftrace_buffer_info *info = file->private_data;
7170 	struct trace_iterator *iter = &info->iter;
7171 
7172 	iter->closed = true;
7173 	/* Make sure the waiters see the new wait_index */
7174 	(void)atomic_fetch_inc_release(&iter->wait_index);
7175 
7176 	ring_buffer_wake_waiters(iter->array_buffer->buffer, iter->cpu_file);
7177 
7178 	return 0;
7179 }
7180 
7181 int tracing_buffers_release(struct inode *inode, struct file *file)
7182 {
7183 	struct ftrace_buffer_info *info = file->private_data;
7184 	struct trace_iterator *iter = &info->iter;
7185 
7186 	guard(mutex)(&trace_types_lock);
7187 
7188 	iter->tr->trace_ref--;
7189 
7190 	__trace_array_put(iter->tr);
7191 
7192 	if (info->spare)
7193 		ring_buffer_free_read_page(iter->array_buffer->buffer,
7194 					   info->spare_cpu, info->spare);
7195 	kvfree(info);
7196 
7197 	return 0;
7198 }
7199 
7200 struct buffer_ref {
7201 	struct trace_buffer	*buffer;
7202 	void			*page;
7203 	int			cpu;
7204 	refcount_t		refcount;
7205 };
7206 
7207 static void buffer_ref_release(struct buffer_ref *ref)
7208 {
7209 	if (!refcount_dec_and_test(&ref->refcount))
7210 		return;
7211 	ring_buffer_free_read_page(ref->buffer, ref->cpu, ref->page);
7212 	kfree(ref);
7213 }
7214 
7215 static void buffer_pipe_buf_release(struct pipe_inode_info *pipe,
7216 				    struct pipe_buffer *buf)
7217 {
7218 	struct buffer_ref *ref = (struct buffer_ref *)buf->private;
7219 
7220 	buffer_ref_release(ref);
7221 	buf->private = 0;
7222 }
7223 
7224 static bool buffer_pipe_buf_get(struct pipe_inode_info *pipe,
7225 				struct pipe_buffer *buf)
7226 {
7227 	struct buffer_ref *ref = (struct buffer_ref *)buf->private;
7228 
7229 	if (refcount_read(&ref->refcount) > INT_MAX/2)
7230 		return false;
7231 
7232 	refcount_inc(&ref->refcount);
7233 	return true;
7234 }
7235 
7236 /* Pipe buffer operations for a buffer. */
7237 static const struct pipe_buf_operations buffer_pipe_buf_ops = {
7238 	.release		= buffer_pipe_buf_release,
7239 	.get			= buffer_pipe_buf_get,
7240 };
7241 
7242 /*
7243  * Callback from splice_to_pipe(), if we need to release some pages
7244  * at the end of the spd in case we error'ed out in filling the pipe.
7245  */
7246 static void buffer_spd_release(struct splice_pipe_desc *spd, unsigned int i)
7247 {
7248 	struct buffer_ref *ref =
7249 		(struct buffer_ref *)spd->partial[i].private;
7250 
7251 	buffer_ref_release(ref);
7252 	spd->partial[i].private = 0;
7253 }
7254 
7255 ssize_t tracing_buffers_splice_read(struct file *file, loff_t *ppos,
7256 				    struct pipe_inode_info *pipe, size_t len,
7257 				    unsigned int flags)
7258 {
7259 	struct ftrace_buffer_info *info = file->private_data;
7260 	struct trace_iterator *iter = &info->iter;
7261 	struct partial_page partial_def[PIPE_DEF_BUFFERS];
7262 	struct page *pages_def[PIPE_DEF_BUFFERS];
7263 	struct splice_pipe_desc spd = {
7264 		.pages		= pages_def,
7265 		.partial	= partial_def,
7266 		.nr_pages_max	= PIPE_DEF_BUFFERS,
7267 		.ops		= &buffer_pipe_buf_ops,
7268 		.spd_release	= buffer_spd_release,
7269 	};
7270 	struct buffer_ref *ref;
7271 	bool woken = false;
7272 	int page_size;
7273 	int entries, i;
7274 	ssize_t ret = 0;
7275 
7276 	if (iter->snapshot && tracer_uses_snapshot(iter->tr->current_trace))
7277 		return -EBUSY;
7278 
7279 	page_size = ring_buffer_subbuf_size_get(iter->array_buffer->buffer);
7280 	if (*ppos & (page_size - 1))
7281 		return -EINVAL;
7282 
7283 	if (len & (page_size - 1)) {
7284 		if (len < page_size)
7285 			return -EINVAL;
7286 		len &= (~(page_size - 1));
7287 	}
7288 
7289 	if (splice_grow_spd(pipe, &spd))
7290 		return -ENOMEM;
7291 
7292  again:
7293 	trace_access_lock(iter->cpu_file);
7294 	entries = ring_buffer_entries_cpu(iter->array_buffer->buffer, iter->cpu_file);
7295 
7296 	for (i = 0; i < spd.nr_pages_max && len && entries; i++, len -= page_size) {
7297 		struct page *page;
7298 		int r;
7299 
7300 		ref = kzalloc_obj(*ref);
7301 		if (!ref) {
7302 			ret = -ENOMEM;
7303 			break;
7304 		}
7305 
7306 		refcount_set(&ref->refcount, 1);
7307 		ref->buffer = iter->array_buffer->buffer;
7308 		ref->page = ring_buffer_alloc_read_page(ref->buffer, iter->cpu_file);
7309 		if (IS_ERR(ref->page)) {
7310 			ret = PTR_ERR(ref->page);
7311 			ref->page = NULL;
7312 			kfree(ref);
7313 			break;
7314 		}
7315 		ref->cpu = iter->cpu_file;
7316 
7317 		r = ring_buffer_read_page(ref->buffer, ref->page,
7318 					  len, iter->cpu_file, 1);
7319 		if (r < 0) {
7320 			ring_buffer_free_read_page(ref->buffer, ref->cpu,
7321 						   ref->page);
7322 			kfree(ref);
7323 			break;
7324 		}
7325 
7326 		page = virt_to_page(ring_buffer_read_page_data(ref->page));
7327 
7328 		spd.pages[i] = page;
7329 		spd.partial[i].len = page_size;
7330 		spd.partial[i].offset = 0;
7331 		spd.partial[i].private = (unsigned long)ref;
7332 		spd.nr_pages++;
7333 		*ppos += page_size;
7334 
7335 		entries = ring_buffer_entries_cpu(iter->array_buffer->buffer, iter->cpu_file);
7336 	}
7337 
7338 	trace_access_unlock(iter->cpu_file);
7339 	spd.nr_pages = i;
7340 
7341 	/* did we read anything? */
7342 	if (!spd.nr_pages) {
7343 
7344 		if (ret)
7345 			goto out;
7346 
7347 		if (woken)
7348 			goto out;
7349 
7350 		ret = -EAGAIN;
7351 		if ((file->f_flags & O_NONBLOCK) || (flags & SPLICE_F_NONBLOCK))
7352 			goto out;
7353 
7354 		ret = wait_on_pipe(iter, iter->snapshot ? 0 : iter->tr->buffer_percent);
7355 		if (ret)
7356 			goto out;
7357 
7358 		/* No need to wait after waking up when tracing is off */
7359 		if (!tracer_tracing_is_on(iter->tr))
7360 			goto out;
7361 
7362 		/* Iterate one more time to collect any new data then exit */
7363 		woken = true;
7364 
7365 		goto again;
7366 	}
7367 
7368 	ret = splice_to_pipe(pipe, &spd);
7369 out:
7370 	splice_shrink_spd(&spd);
7371 
7372 	return ret;
7373 }
7374 
7375 static long tracing_buffers_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
7376 {
7377 	struct ftrace_buffer_info *info = file->private_data;
7378 	struct trace_iterator *iter = &info->iter;
7379 	int err;
7380 
7381 	if (cmd == TRACE_MMAP_IOCTL_GET_READER) {
7382 		if (!(file->f_flags & O_NONBLOCK)) {
7383 			err = ring_buffer_wait(iter->array_buffer->buffer,
7384 					       iter->cpu_file,
7385 					       iter->tr->buffer_percent,
7386 					       NULL, NULL);
7387 			if (err)
7388 				return err;
7389 		}
7390 
7391 		return ring_buffer_map_get_reader(iter->array_buffer->buffer,
7392 						  iter->cpu_file);
7393 	} else if (cmd) {
7394 		return -ENOTTY;
7395 	}
7396 
7397 	/*
7398 	 * An ioctl call with cmd 0 to the ring buffer file will wake up all
7399 	 * waiters
7400 	 */
7401 	guard(mutex)(&trace_types_lock);
7402 
7403 	/* Make sure the waiters see the new wait_index */
7404 	(void)atomic_fetch_inc_release(&iter->wait_index);
7405 
7406 	ring_buffer_wake_waiters(iter->array_buffer->buffer, iter->cpu_file);
7407 
7408 	return 0;
7409 }
7410 
7411 /*
7412  * This is called when a VMA is duplicated (e.g., on fork()) to increment
7413  * the user_mapped counter without remapping pages.
7414  */
7415 static void tracing_buffers_mmap_open(struct vm_area_struct *vma)
7416 {
7417 	struct ftrace_buffer_info *info = vma->vm_file->private_data;
7418 	struct trace_iterator *iter = &info->iter;
7419 
7420 	ring_buffer_map_dup(iter->array_buffer->buffer, iter->cpu_file);
7421 }
7422 
7423 static void tracing_buffers_mmap_close(struct vm_area_struct *vma)
7424 {
7425 	struct ftrace_buffer_info *info = vma->vm_file->private_data;
7426 	struct trace_iterator *iter = &info->iter;
7427 
7428 	WARN_ON(ring_buffer_unmap(iter->array_buffer->buffer, iter->cpu_file));
7429 	put_snapshot_map(iter->tr);
7430 }
7431 
7432 static int tracing_buffers_may_split(struct vm_area_struct *vma, unsigned long addr)
7433 {
7434 	/*
7435 	 * Trace buffer mappings require the complete buffer including
7436 	 * the meta page. Partial mappings are not supported.
7437 	 */
7438 	return -EINVAL;
7439 }
7440 
7441 static const struct vm_operations_struct tracing_buffers_vmops = {
7442 	.open		= tracing_buffers_mmap_open,
7443 	.close		= tracing_buffers_mmap_close,
7444 	.may_split      = tracing_buffers_may_split,
7445 };
7446 
7447 static int tracing_buffers_mmap(struct file *filp, struct vm_area_struct *vma)
7448 {
7449 	struct ftrace_buffer_info *info = filp->private_data;
7450 	struct trace_iterator *iter = &info->iter;
7451 	int ret = 0;
7452 
7453 	/* A memmap'ed and backup buffers are not supported for user space mmap */
7454 	if (iter->tr->flags & (TRACE_ARRAY_FL_MEMMAP | TRACE_ARRAY_FL_VMALLOC))
7455 		return -ENODEV;
7456 
7457 	ret = get_snapshot_map(iter->tr);
7458 	if (ret)
7459 		return ret;
7460 
7461 	ret = ring_buffer_map(iter->array_buffer->buffer, iter->cpu_file, vma);
7462 	if (ret)
7463 		put_snapshot_map(iter->tr);
7464 
7465 	vma->vm_ops = &tracing_buffers_vmops;
7466 
7467 	return ret;
7468 }
7469 
7470 static const struct file_operations tracing_buffers_fops = {
7471 	.open		= tracing_buffers_open,
7472 	.read		= tracing_buffers_read,
7473 	.poll		= tracing_buffers_poll,
7474 	.release	= tracing_buffers_release,
7475 	.flush		= tracing_buffers_flush,
7476 	.splice_read	= tracing_buffers_splice_read,
7477 	.unlocked_ioctl = tracing_buffers_ioctl,
7478 	.mmap		= tracing_buffers_mmap,
7479 };
7480 
7481 static ssize_t
7482 tracing_stats_read(struct file *filp, char __user *ubuf,
7483 		   size_t count, loff_t *ppos)
7484 {
7485 	struct inode *inode = file_inode(filp);
7486 	struct trace_array *tr = inode->i_private;
7487 	struct array_buffer *trace_buf = &tr->array_buffer;
7488 	int cpu = tracing_get_cpu(inode);
7489 	struct trace_seq *s;
7490 	unsigned long cnt;
7491 	unsigned long long t;
7492 	unsigned long usec_rem;
7493 
7494 	s = kmalloc_obj(*s);
7495 	if (!s)
7496 		return -ENOMEM;
7497 
7498 	trace_seq_init(s);
7499 
7500 	cnt = ring_buffer_entries_cpu(trace_buf->buffer, cpu);
7501 	trace_seq_printf(s, "entries: %ld\n", cnt);
7502 
7503 	cnt = ring_buffer_overrun_cpu(trace_buf->buffer, cpu);
7504 	trace_seq_printf(s, "overrun: %ld\n", cnt);
7505 
7506 	cnt = ring_buffer_commit_overrun_cpu(trace_buf->buffer, cpu);
7507 	trace_seq_printf(s, "commit overrun: %ld\n", cnt);
7508 
7509 	cnt = ring_buffer_bytes_cpu(trace_buf->buffer, cpu);
7510 	trace_seq_printf(s, "bytes: %ld\n", cnt);
7511 
7512 	if (trace_clocks[tr->clock_id].in_ns) {
7513 		/* local or global for trace_clock */
7514 		t = ns2usecs(ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
7515 		usec_rem = do_div(t, USEC_PER_SEC);
7516 		trace_seq_printf(s, "oldest event ts: %5llu.%06lu\n",
7517 								t, usec_rem);
7518 
7519 		t = ns2usecs(ring_buffer_time_stamp(trace_buf->buffer));
7520 		usec_rem = do_div(t, USEC_PER_SEC);
7521 		trace_seq_printf(s, "now ts: %5llu.%06lu\n", t, usec_rem);
7522 	} else {
7523 		/* counter or tsc mode for trace_clock */
7524 		trace_seq_printf(s, "oldest event ts: %llu\n",
7525 				ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
7526 
7527 		trace_seq_printf(s, "now ts: %llu\n",
7528 				ring_buffer_time_stamp(trace_buf->buffer));
7529 	}
7530 
7531 	cnt = ring_buffer_dropped_events_cpu(trace_buf->buffer, cpu);
7532 	trace_seq_printf(s, "dropped events: %ld\n", cnt);
7533 
7534 	cnt = ring_buffer_read_events_cpu(trace_buf->buffer, cpu);
7535 	trace_seq_printf(s, "read events: %ld\n", cnt);
7536 
7537 	count = simple_read_from_buffer(ubuf, count, ppos,
7538 					s->buffer, trace_seq_used(s));
7539 
7540 	kfree(s);
7541 
7542 	return count;
7543 }
7544 
7545 static const struct file_operations tracing_stats_fops = {
7546 	.open		= tracing_open_generic_tr,
7547 	.read		= tracing_stats_read,
7548 	.llseek		= generic_file_llseek,
7549 	.release	= tracing_release_generic_tr,
7550 };
7551 
7552 #ifdef CONFIG_DYNAMIC_FTRACE
7553 
7554 static ssize_t
7555 tracing_read_dyn_info(struct file *filp, char __user *ubuf,
7556 		  size_t cnt, loff_t *ppos)
7557 {
7558 	ssize_t ret;
7559 	char *buf;
7560 	int r;
7561 
7562 	/* 512 should be plenty to hold the amount needed */
7563 #define DYN_INFO_BUF_SIZE	512
7564 
7565 	buf = kmalloc(DYN_INFO_BUF_SIZE, GFP_KERNEL);
7566 	if (!buf)
7567 		return -ENOMEM;
7568 
7569 	r = scnprintf(buf, DYN_INFO_BUF_SIZE,
7570 		      "%ld pages:%ld groups: %ld\n"
7571 		      "ftrace boot update time = %llu (ns)\n"
7572 		      "ftrace module total update time = %llu (ns)\n",
7573 		      ftrace_update_tot_cnt,
7574 		      ftrace_number_of_pages,
7575 		      ftrace_number_of_groups,
7576 		      ftrace_update_time,
7577 		      ftrace_total_mod_time);
7578 
7579 	ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
7580 	kfree(buf);
7581 	return ret;
7582 }
7583 
7584 static const struct file_operations tracing_dyn_info_fops = {
7585 	.open		= tracing_open_generic,
7586 	.read		= tracing_read_dyn_info,
7587 	.llseek		= generic_file_llseek,
7588 };
7589 #endif /* CONFIG_DYNAMIC_FTRACE */
7590 
7591 static struct dentry *tracing_get_dentry(struct trace_array *tr)
7592 {
7593 	/* Top directory uses NULL as the parent */
7594 	if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
7595 		return NULL;
7596 
7597 	if (WARN_ON(!tr->dir))
7598 		return ERR_PTR(-ENODEV);
7599 
7600 	/* All sub buffers have a descriptor */
7601 	return tr->dir;
7602 }
7603 
7604 static struct dentry *tracing_dentry_percpu(struct trace_array *tr, int cpu)
7605 {
7606 	struct dentry *d_tracer;
7607 
7608 	if (tr->percpu_dir)
7609 		return tr->percpu_dir;
7610 
7611 	d_tracer = tracing_get_dentry(tr);
7612 	if (IS_ERR(d_tracer))
7613 		return NULL;
7614 
7615 	tr->percpu_dir = tracefs_create_dir("per_cpu", d_tracer);
7616 
7617 	MEM_FAIL(!tr->percpu_dir,
7618 		  "Could not create tracefs directory 'per_cpu/%d'\n", cpu);
7619 
7620 	return tr->percpu_dir;
7621 }
7622 
7623 struct dentry *
7624 trace_create_cpu_file(const char *name, umode_t mode, struct dentry *parent,
7625 		      void *data, long cpu, const struct file_operations *fops)
7626 {
7627 	struct dentry *ret = trace_create_file(name, mode, parent, data, fops);
7628 
7629 	if (ret) /* See tracing_get_cpu() */
7630 		d_inode(ret)->i_cdev = (void *)(cpu + 1);
7631 	return ret;
7632 }
7633 
7634 static void
7635 tracing_init_tracefs_percpu(struct trace_array *tr, long cpu)
7636 {
7637 	struct dentry *d_percpu = tracing_dentry_percpu(tr, cpu);
7638 	struct dentry *d_cpu;
7639 	char cpu_dir[30]; /* 30 characters should be more than enough */
7640 
7641 	if (!d_percpu)
7642 		return;
7643 
7644 	snprintf(cpu_dir, 30, "cpu%ld", cpu);
7645 	d_cpu = tracefs_create_dir(cpu_dir, d_percpu);
7646 	if (!d_cpu) {
7647 		pr_warn("Could not create tracefs '%s' entry\n", cpu_dir);
7648 		return;
7649 	}
7650 
7651 	/* per cpu trace_pipe */
7652 	trace_create_cpu_file("trace_pipe", TRACE_MODE_READ, d_cpu,
7653 				tr, cpu, &tracing_pipe_fops);
7654 
7655 	/* per cpu trace */
7656 	trace_create_cpu_file("trace", TRACE_MODE_WRITE, d_cpu,
7657 				tr, cpu, &tracing_fops);
7658 
7659 	trace_create_cpu_file("trace_pipe_raw", TRACE_MODE_READ, d_cpu,
7660 				tr, cpu, &tracing_buffers_fops);
7661 
7662 	trace_create_cpu_file("stats", TRACE_MODE_READ, d_cpu,
7663 				tr, cpu, &tracing_stats_fops);
7664 
7665 	trace_create_cpu_file("buffer_size_kb", TRACE_MODE_WRITE, d_cpu,
7666 				tr, cpu, &tracing_entries_fops);
7667 
7668 	if (tr->range_addr_start)
7669 		trace_create_cpu_file("buffer_meta", TRACE_MODE_READ, d_cpu,
7670 				      tr, cpu, &tracing_buffer_meta_fops);
7671 #ifdef CONFIG_TRACER_SNAPSHOT
7672 	if (!tr->range_addr_start) {
7673 		trace_create_cpu_file("snapshot", TRACE_MODE_WRITE, d_cpu,
7674 				      tr, cpu, &snapshot_fops);
7675 
7676 		trace_create_cpu_file("snapshot_raw", TRACE_MODE_READ, d_cpu,
7677 				      tr, cpu, &snapshot_raw_fops);
7678 	}
7679 #endif
7680 }
7681 
7682 #ifdef CONFIG_FTRACE_SELFTEST
7683 /* Let selftest have access to static functions in this file */
7684 #include "trace_selftest.c"
7685 #endif
7686 
7687 static ssize_t
7688 trace_options_read(struct file *filp, char __user *ubuf, size_t cnt,
7689 			loff_t *ppos)
7690 {
7691 	struct trace_option_dentry *topt = filp->private_data;
7692 	char *buf;
7693 
7694 	if (topt->flags->val & topt->opt->bit)
7695 		buf = "1\n";
7696 	else
7697 		buf = "0\n";
7698 
7699 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
7700 }
7701 
7702 static ssize_t
7703 trace_options_write(struct file *filp, const char __user *ubuf, size_t cnt,
7704 			 loff_t *ppos)
7705 {
7706 	struct trace_option_dentry *topt = filp->private_data;
7707 	unsigned long val;
7708 	int ret;
7709 
7710 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
7711 	if (ret)
7712 		return ret;
7713 
7714 	if (val != 0 && val != 1)
7715 		return -EINVAL;
7716 
7717 	if (!!(topt->flags->val & topt->opt->bit) != val) {
7718 		guard(mutex)(&trace_types_lock);
7719 		ret = __set_tracer_option(topt->tr, topt->flags,
7720 					  topt->opt, !val);
7721 		if (ret)
7722 			return ret;
7723 	}
7724 
7725 	*ppos += cnt;
7726 
7727 	return cnt;
7728 }
7729 
7730 static int tracing_open_options(struct inode *inode, struct file *filp)
7731 {
7732 	struct trace_option_dentry *topt = inode->i_private;
7733 	int ret;
7734 
7735 	ret = tracing_check_open_get_tr(topt->tr);
7736 	if (ret)
7737 		return ret;
7738 
7739 	filp->private_data = inode->i_private;
7740 	return 0;
7741 }
7742 
7743 static int tracing_release_options(struct inode *inode, struct file *file)
7744 {
7745 	struct trace_option_dentry *topt = file->private_data;
7746 
7747 	trace_array_put(topt->tr);
7748 	return 0;
7749 }
7750 
7751 static const struct file_operations trace_options_fops = {
7752 	.open = tracing_open_options,
7753 	.read = trace_options_read,
7754 	.write = trace_options_write,
7755 	.llseek	= generic_file_llseek,
7756 	.release = tracing_release_options,
7757 };
7758 
7759 /*
7760  * In order to pass in both the trace_array descriptor as well as the index
7761  * to the flag that the trace option file represents, the trace_array
7762  * has a character array of trace_flags_index[], which holds the index
7763  * of the bit for the flag it represents. index[0] == 0, index[1] == 1, etc.
7764  * The address of this character array is passed to the flag option file
7765  * read/write callbacks.
7766  *
7767  * In order to extract both the index and the trace_array descriptor,
7768  * get_tr_index() uses the following algorithm.
7769  *
7770  *   idx = *ptr;
7771  *
7772  * As the pointer itself contains the address of the index (remember
7773  * index[1] == 1).
7774  *
7775  * Then to get the trace_array descriptor, by subtracting that index
7776  * from the ptr, we get to the start of the index itself.
7777  *
7778  *   ptr - idx == &index[0]
7779  *
7780  * Then a simple container_of() from that pointer gets us to the
7781  * trace_array descriptor.
7782  */
7783 static void get_tr_index(void *data, struct trace_array **ptr,
7784 			 unsigned int *pindex)
7785 {
7786 	*pindex = *(unsigned char *)data;
7787 
7788 	*ptr = container_of(data - *pindex, struct trace_array,
7789 			    trace_flags_index);
7790 }
7791 
7792 static ssize_t
7793 trace_options_core_read(struct file *filp, char __user *ubuf, size_t cnt,
7794 			loff_t *ppos)
7795 {
7796 	void *tr_index = filp->private_data;
7797 	struct trace_array *tr;
7798 	unsigned int index;
7799 	char *buf;
7800 
7801 	get_tr_index(tr_index, &tr, &index);
7802 
7803 	if (tr->trace_flags & (1ULL << index))
7804 		buf = "1\n";
7805 	else
7806 		buf = "0\n";
7807 
7808 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
7809 }
7810 
7811 static ssize_t
7812 trace_options_core_write(struct file *filp, const char __user *ubuf, size_t cnt,
7813 			 loff_t *ppos)
7814 {
7815 	void *tr_index = filp->private_data;
7816 	struct trace_array *tr;
7817 	unsigned int index;
7818 	unsigned long val;
7819 	int ret;
7820 
7821 	get_tr_index(tr_index, &tr, &index);
7822 
7823 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
7824 	if (ret)
7825 		return ret;
7826 
7827 	if (val != 0 && val != 1)
7828 		return -EINVAL;
7829 
7830 	mutex_lock(&event_mutex);
7831 	mutex_lock(&trace_types_lock);
7832 	ret = set_tracer_flag(tr, 1ULL << index, val);
7833 	mutex_unlock(&trace_types_lock);
7834 	mutex_unlock(&event_mutex);
7835 
7836 	if (ret < 0)
7837 		return ret;
7838 
7839 	*ppos += cnt;
7840 
7841 	return cnt;
7842 }
7843 
7844 static const struct file_operations trace_options_core_fops = {
7845 	.open = tracing_open_generic,
7846 	.read = trace_options_core_read,
7847 	.write = trace_options_core_write,
7848 	.llseek = generic_file_llseek,
7849 };
7850 
7851 struct dentry *trace_create_file(const char *name,
7852 				 umode_t mode,
7853 				 struct dentry *parent,
7854 				 void *data,
7855 				 const struct file_operations *fops)
7856 {
7857 	struct dentry *ret;
7858 
7859 	ret = tracefs_create_file(name, mode, parent, data, fops);
7860 	if (!ret)
7861 		pr_warn("Could not create tracefs '%s' entry\n", name);
7862 
7863 	return ret;
7864 }
7865 
7866 
7867 static struct dentry *trace_options_init_dentry(struct trace_array *tr)
7868 {
7869 	struct dentry *d_tracer;
7870 
7871 	if (tr->options)
7872 		return tr->options;
7873 
7874 	d_tracer = tracing_get_dentry(tr);
7875 	if (IS_ERR(d_tracer))
7876 		return NULL;
7877 
7878 	tr->options = tracefs_create_dir("options", d_tracer);
7879 	if (!tr->options) {
7880 		pr_warn("Could not create tracefs directory 'options'\n");
7881 		return NULL;
7882 	}
7883 
7884 	return tr->options;
7885 }
7886 
7887 static void
7888 create_trace_option_file(struct trace_array *tr,
7889 			 struct trace_option_dentry *topt,
7890 			 struct tracer_flags *flags,
7891 			 struct tracer_opt *opt)
7892 {
7893 	struct dentry *t_options;
7894 
7895 	t_options = trace_options_init_dentry(tr);
7896 	if (!t_options)
7897 		return;
7898 
7899 	topt->flags = flags;
7900 	topt->opt = opt;
7901 	topt->tr = tr;
7902 
7903 	topt->entry = trace_create_file(opt->name, TRACE_MODE_WRITE,
7904 					t_options, topt, &trace_options_fops);
7905 }
7906 
7907 static int
7908 create_trace_option_files(struct trace_array *tr, struct tracer *tracer,
7909 			  struct tracer_flags *flags)
7910 {
7911 	struct trace_option_dentry *topts;
7912 	struct trace_options *tr_topts;
7913 	struct tracer_opt *opts;
7914 	int cnt;
7915 
7916 	if (!flags || !flags->opts)
7917 		return 0;
7918 
7919 	opts = flags->opts;
7920 
7921 	for (cnt = 0; opts[cnt].name; cnt++)
7922 		;
7923 
7924 	topts = kzalloc_objs(*topts, cnt + 1);
7925 	if (!topts)
7926 		return 0;
7927 
7928 	tr_topts = krealloc_array(tr->topts, tr->nr_topts + 1, sizeof(*tr->topts),
7929 				  GFP_KERNEL);
7930 	if (!tr_topts) {
7931 		kfree(topts);
7932 		return -ENOMEM;
7933 	}
7934 
7935 	tr->topts = tr_topts;
7936 	tr->topts[tr->nr_topts].tracer = tracer;
7937 	tr->topts[tr->nr_topts].topts = topts;
7938 	tr->nr_topts++;
7939 
7940 	for (cnt = 0; opts[cnt].name; cnt++) {
7941 		create_trace_option_file(tr, &topts[cnt], flags,
7942 					 &opts[cnt]);
7943 		MEM_FAIL(topts[cnt].entry == NULL,
7944 			  "Failed to create trace option: %s",
7945 			  opts[cnt].name);
7946 	}
7947 	return 0;
7948 }
7949 
7950 static int get_global_flags_val(struct tracer *tracer)
7951 {
7952 	struct tracers *t;
7953 
7954 	list_for_each_entry(t, &global_trace.tracers, list) {
7955 		if (t->tracer != tracer)
7956 			continue;
7957 		if (!t->flags)
7958 			return -1;
7959 		return t->flags->val;
7960 	}
7961 	return -1;
7962 }
7963 
7964 static int add_tracer_options(struct trace_array *tr, struct tracers *t)
7965 {
7966 	struct tracer *tracer = t->tracer;
7967 	struct tracer_flags *flags = t->flags ?: tracer->flags;
7968 
7969 	if (!flags)
7970 		return 0;
7971 
7972 	/* Only add tracer options after update_tracer_options finish */
7973 	if (!tracer_options_updated)
7974 		return 0;
7975 
7976 	return create_trace_option_files(tr, tracer, flags);
7977 }
7978 
7979 static int add_tracer(struct trace_array *tr, struct tracer *tracer)
7980 {
7981 	struct tracer_flags *flags;
7982 	struct tracers *t;
7983 	int ret;
7984 
7985 	/* Only enable if the directory has been created already. */
7986 	if (!tr->dir && !(tr->flags & TRACE_ARRAY_FL_GLOBAL))
7987 		return 0;
7988 
7989 	/*
7990 	 * If this is an instance, only create flags for tracers
7991 	 * the instance may have.
7992 	 */
7993 	if (!trace_ok_for_array(tracer, tr))
7994 		return 0;
7995 
7996 	t = kmalloc_obj(*t);
7997 	if (!t)
7998 		return -ENOMEM;
7999 
8000 	t->tracer = tracer;
8001 	t->flags = NULL;
8002 	list_add(&t->list, &tr->tracers);
8003 
8004 	flags = tracer->flags;
8005 	if (!flags) {
8006 		if (!tracer->default_flags)
8007 			return 0;
8008 
8009 		/*
8010 		 * If the tracer defines default flags, it means the flags are
8011 		 * per trace instance.
8012 		 */
8013 		flags = kmalloc_obj(*flags);
8014 		if (!flags)
8015 			return -ENOMEM;
8016 
8017 		*flags = *tracer->default_flags;
8018 		flags->trace = tracer;
8019 
8020 		t->flags = flags;
8021 
8022 		/* If this is an instance, inherit the global_trace flags */
8023 		if (!(tr->flags & TRACE_ARRAY_FL_GLOBAL)) {
8024 			int val = get_global_flags_val(tracer);
8025 			if (!WARN_ON_ONCE(val < 0))
8026 				flags->val = val;
8027 		}
8028 	}
8029 
8030 	ret = add_tracer_options(tr, t);
8031 	if (ret < 0) {
8032 		list_del(&t->list);
8033 		kfree(t->flags);
8034 		kfree(t);
8035 	}
8036 
8037 	return ret;
8038 }
8039 
8040 static struct dentry *
8041 create_trace_option_core_file(struct trace_array *tr,
8042 			      const char *option, long index)
8043 {
8044 	struct dentry *t_options;
8045 
8046 	t_options = trace_options_init_dentry(tr);
8047 	if (!t_options)
8048 		return NULL;
8049 
8050 	return trace_create_file(option, TRACE_MODE_WRITE, t_options,
8051 				 (void *)&tr->trace_flags_index[index],
8052 				 &trace_options_core_fops);
8053 }
8054 
8055 static void create_trace_options_dir(struct trace_array *tr)
8056 {
8057 	struct dentry *t_options;
8058 	bool top_level = tr == &global_trace;
8059 	int i;
8060 
8061 	t_options = trace_options_init_dentry(tr);
8062 	if (!t_options)
8063 		return;
8064 
8065 	for (i = 0; trace_options[i]; i++) {
8066 		if (top_level ||
8067 		    !((1ULL << i) & TOP_LEVEL_TRACE_FLAGS)) {
8068 			create_trace_option_core_file(tr, trace_options[i], i);
8069 		}
8070 	}
8071 }
8072 
8073 static ssize_t
8074 rb_simple_read(struct file *filp, char __user *ubuf,
8075 	       size_t cnt, loff_t *ppos)
8076 {
8077 	struct trace_array *tr = filp->private_data;
8078 	char buf[64];
8079 	int r;
8080 
8081 	r = tracer_tracing_is_on(tr);
8082 	r = sprintf(buf, "%d\n", r);
8083 
8084 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
8085 }
8086 
8087 static ssize_t
8088 rb_simple_write(struct file *filp, const char __user *ubuf,
8089 		size_t cnt, loff_t *ppos)
8090 {
8091 	struct trace_array *tr = filp->private_data;
8092 	struct trace_buffer *buffer = tr->array_buffer.buffer;
8093 	unsigned long val;
8094 	int ret;
8095 
8096 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
8097 	if (ret)
8098 		return ret;
8099 
8100 	if (buffer) {
8101 		guard(mutex)(&trace_types_lock);
8102 		if (!!val == tracer_tracing_is_on(tr)) {
8103 			val = 0; /* do nothing */
8104 		} else if (val) {
8105 			tracer_tracing_on(tr);
8106 			if (tr->current_trace->start)
8107 				tr->current_trace->start(tr);
8108 		} else {
8109 			tracer_tracing_off(tr);
8110 			if (tr->current_trace->stop)
8111 				tr->current_trace->stop(tr);
8112 			/* Wake up any waiters */
8113 			ring_buffer_wake_waiters(buffer, RING_BUFFER_ALL_CPUS);
8114 		}
8115 	}
8116 
8117 	(*ppos)++;
8118 
8119 	return cnt;
8120 }
8121 
8122 static const struct file_operations rb_simple_fops = {
8123 	.open		= tracing_open_generic_tr,
8124 	.read		= rb_simple_read,
8125 	.write		= rb_simple_write,
8126 	.release	= tracing_release_generic_tr,
8127 	.llseek		= default_llseek,
8128 };
8129 
8130 static ssize_t
8131 buffer_percent_read(struct file *filp, char __user *ubuf,
8132 		    size_t cnt, loff_t *ppos)
8133 {
8134 	struct trace_array *tr = filp->private_data;
8135 	char buf[64];
8136 	int r;
8137 
8138 	r = tr->buffer_percent;
8139 	r = sprintf(buf, "%d\n", r);
8140 
8141 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
8142 }
8143 
8144 static ssize_t
8145 buffer_percent_write(struct file *filp, const char __user *ubuf,
8146 		     size_t cnt, loff_t *ppos)
8147 {
8148 	struct trace_array *tr = filp->private_data;
8149 	unsigned long val;
8150 	int ret;
8151 
8152 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
8153 	if (ret)
8154 		return ret;
8155 
8156 	if (val > 100)
8157 		return -EINVAL;
8158 
8159 	tr->buffer_percent = val;
8160 
8161 	(*ppos)++;
8162 
8163 	return cnt;
8164 }
8165 
8166 static const struct file_operations buffer_percent_fops = {
8167 	.open		= tracing_open_generic_tr,
8168 	.read		= buffer_percent_read,
8169 	.write		= buffer_percent_write,
8170 	.release	= tracing_release_generic_tr,
8171 	.llseek		= default_llseek,
8172 };
8173 
8174 static ssize_t
8175 buffer_subbuf_size_read(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos)
8176 {
8177 	struct trace_array *tr = filp->private_data;
8178 	size_t size;
8179 	char buf[64];
8180 	int order;
8181 	int r;
8182 
8183 	order = ring_buffer_subbuf_order_get(tr->array_buffer.buffer);
8184 	size = (PAGE_SIZE << order) / 1024;
8185 
8186 	r = sprintf(buf, "%zd\n", size);
8187 
8188 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
8189 }
8190 
8191 static ssize_t
8192 buffer_subbuf_size_write(struct file *filp, const char __user *ubuf,
8193 			 size_t cnt, loff_t *ppos)
8194 {
8195 	struct trace_array *tr = filp->private_data;
8196 	unsigned long val;
8197 	int old_order;
8198 	int order;
8199 	int pages;
8200 	int ret;
8201 
8202 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
8203 	if (ret)
8204 		return ret;
8205 
8206 	val *= 1024; /* value passed in is in KB */
8207 
8208 	pages = DIV_ROUND_UP(val, PAGE_SIZE);
8209 	order = fls(pages - 1);
8210 
8211 	/* limit between 1 and 128 system pages */
8212 	if (order < 0 || order > 7)
8213 		return -EINVAL;
8214 
8215 	/* Do not allow tracing while changing the order of the ring buffer */
8216 	tracing_stop_tr(tr);
8217 
8218 	old_order = ring_buffer_subbuf_order_get(tr->array_buffer.buffer);
8219 	if (old_order == order)
8220 		goto out;
8221 
8222 	ret = ring_buffer_subbuf_order_set(tr->array_buffer.buffer, order);
8223 	if (ret)
8224 		goto out;
8225 
8226 #ifdef CONFIG_TRACER_SNAPSHOT
8227 
8228 	if (!tr->allocated_snapshot)
8229 		goto out_max;
8230 
8231 	ret = ring_buffer_subbuf_order_set(tr->snapshot_buffer.buffer, order);
8232 	if (ret) {
8233 		/* Put back the old order */
8234 		cnt = ring_buffer_subbuf_order_set(tr->array_buffer.buffer, old_order);
8235 		if (WARN_ON_ONCE(cnt)) {
8236 			/*
8237 			 * AARGH! We are left with different orders!
8238 			 * The max buffer is our "snapshot" buffer.
8239 			 * When a tracer needs a snapshot (one of the
8240 			 * latency tracers), it swaps the max buffer
8241 			 * with the saved snap shot. We succeeded to
8242 			 * update the order of the main buffer, but failed to
8243 			 * update the order of the max buffer. But when we tried
8244 			 * to reset the main buffer to the original size, we
8245 			 * failed there too. This is very unlikely to
8246 			 * happen, but if it does, warn and kill all
8247 			 * tracing.
8248 			 */
8249 			tracing_disabled = 1;
8250 		}
8251 		goto out;
8252 	}
8253  out_max:
8254 #endif
8255 	(*ppos)++;
8256  out:
8257 	if (ret)
8258 		cnt = ret;
8259 	tracing_start_tr(tr);
8260 	return cnt;
8261 }
8262 
8263 static const struct file_operations buffer_subbuf_size_fops = {
8264 	.open		= tracing_open_generic_tr,
8265 	.read		= buffer_subbuf_size_read,
8266 	.write		= buffer_subbuf_size_write,
8267 	.release	= tracing_release_generic_tr,
8268 	.llseek		= default_llseek,
8269 };
8270 
8271 static struct dentry *trace_instance_dir;
8272 
8273 static void
8274 init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer);
8275 
8276 #ifdef CONFIG_MODULES
8277 static int make_mod_delta(struct module *mod, void *data)
8278 {
8279 	struct trace_module_delta *module_delta;
8280 	struct trace_scratch *tscratch;
8281 	struct trace_mod_entry *entry;
8282 	struct trace_array *tr = data;
8283 	int i;
8284 
8285 	tscratch = tr->scratch;
8286 	module_delta = READ_ONCE(tr->module_delta);
8287 	for (i = 0; i < tscratch->nr_entries; i++) {
8288 		entry = &tscratch->entries[i];
8289 		if (strcmp(mod->name, entry->mod_name))
8290 			continue;
8291 		if (mod->state == MODULE_STATE_GOING)
8292 			module_delta->delta[i] = 0;
8293 		else
8294 			module_delta->delta[i] = (unsigned long)mod->mem[MOD_TEXT].base
8295 						 - entry->mod_addr;
8296 		break;
8297 	}
8298 	return 0;
8299 }
8300 #else
8301 static int make_mod_delta(struct module *mod, void *data)
8302 {
8303 	return 0;
8304 }
8305 #endif
8306 
8307 static int mod_addr_comp(const void *a, const void *b, const void *data)
8308 {
8309 	const struct trace_mod_entry *e1 = a;
8310 	const struct trace_mod_entry *e2 = b;
8311 
8312 	return e1->mod_addr > e2->mod_addr ? 1 : -1;
8313 }
8314 
8315 static void setup_trace_scratch(struct trace_array *tr,
8316 				struct trace_scratch *tscratch, unsigned int size)
8317 {
8318 	struct trace_module_delta *module_delta;
8319 	struct trace_mod_entry *entry;
8320 	int i, nr_entries;
8321 
8322 	if (!tscratch)
8323 		return;
8324 
8325 	tr->scratch = tscratch;
8326 	tr->scratch_size = size;
8327 
8328 	if (tscratch->text_addr)
8329 		tr->text_delta = (unsigned long)_text - tscratch->text_addr;
8330 
8331 	if (struct_size(tscratch, entries, tscratch->nr_entries) > size)
8332 		goto reset;
8333 
8334 	/* Check if each module name is a valid string */
8335 	for (i = 0; i < tscratch->nr_entries; i++) {
8336 		int n;
8337 
8338 		entry = &tscratch->entries[i];
8339 
8340 		for (n = 0; n < MODULE_NAME_LEN; n++) {
8341 			if (entry->mod_name[n] == '\0')
8342 				break;
8343 			if (!isprint(entry->mod_name[n]))
8344 				goto reset;
8345 		}
8346 		if (n == MODULE_NAME_LEN)
8347 			goto reset;
8348 	}
8349 
8350 	/* Sort the entries so that we can find appropriate module from address. */
8351 	nr_entries = tscratch->nr_entries;
8352 	sort_r(tscratch->entries, nr_entries, sizeof(struct trace_mod_entry),
8353 	       mod_addr_comp, NULL, NULL);
8354 
8355 	if (IS_ENABLED(CONFIG_MODULES)) {
8356 		module_delta = kzalloc_flex(*module_delta, delta, nr_entries);
8357 		if (!module_delta) {
8358 			pr_info("module_delta allocation failed. Not able to decode module address.");
8359 			goto reset;
8360 		}
8361 		init_rcu_head(&module_delta->rcu);
8362 	} else
8363 		module_delta = NULL;
8364 	WRITE_ONCE(tr->module_delta, module_delta);
8365 
8366 	/* Scan modules to make text delta for modules. */
8367 	module_for_each_mod(make_mod_delta, tr);
8368 
8369 	/* Set trace_clock as the same of the previous boot. */
8370 	if (tscratch->clock_id != tr->clock_id) {
8371 		if (tscratch->clock_id >= ARRAY_SIZE(trace_clocks) ||
8372 		    tracing_set_clock(tr, trace_clocks[tscratch->clock_id].name) < 0) {
8373 			pr_info("the previous trace_clock info is not valid.");
8374 			goto reset;
8375 		}
8376 	}
8377 	return;
8378  reset:
8379 	/* Invalid trace modules */
8380 	memset(tscratch, 0, size);
8381 }
8382 
8383 #define TRACE_TEST_PTRACING_NAME	"ptracingtest"
8384 
8385 int allocate_trace_buffer(struct trace_array *tr, struct array_buffer *buf, int size)
8386 {
8387 	enum ring_buffer_flags rb_flags;
8388 	struct trace_scratch *tscratch;
8389 	unsigned int scratch_size = 0;
8390 
8391 	rb_flags = tr->trace_flags & TRACE_ITER(OVERWRITE) ? RB_FL_OVERWRITE : 0;
8392 
8393 	buf->tr = tr;
8394 
8395 	if (tr->range_addr_start && tr->range_addr_size) {
8396 		if (tr->name && !strcmp(tr->name, TRACE_TEST_PTRACING_NAME))
8397 			rb_flags |= RB_FL_TESTING;
8398 		/* Add scratch buffer to handle 128 modules */
8399 		buf->buffer = ring_buffer_alloc_range(size, rb_flags, 0,
8400 						      tr->range_addr_start,
8401 						      tr->range_addr_size,
8402 						      struct_size(tscratch, entries, 128));
8403 
8404 		tscratch = ring_buffer_meta_scratch(buf->buffer, &scratch_size);
8405 		setup_trace_scratch(tr, tscratch, scratch_size);
8406 
8407 		/*
8408 		 * This is basically the same as a mapped buffer,
8409 		 * with the same restrictions.
8410 		 */
8411 		tr->mapped++;
8412 	} else {
8413 		buf->buffer = ring_buffer_alloc(size, rb_flags);
8414 	}
8415 	if (!buf->buffer)
8416 		return -ENOMEM;
8417 
8418 	buf->data = alloc_percpu(struct trace_array_cpu);
8419 	if (!buf->data) {
8420 		ring_buffer_free(buf->buffer);
8421 		buf->buffer = NULL;
8422 		return -ENOMEM;
8423 	}
8424 
8425 	/* Allocate the first page for all buffers */
8426 	trace_set_buffer_entries(&tr->array_buffer,
8427 				 ring_buffer_size(tr->array_buffer.buffer, 0));
8428 
8429 	return 0;
8430 }
8431 
8432 static void free_trace_buffer(struct array_buffer *buf)
8433 {
8434 	if (buf->buffer) {
8435 		ring_buffer_free(buf->buffer);
8436 		buf->buffer = NULL;
8437 		free_percpu(buf->data);
8438 		buf->data = NULL;
8439 	}
8440 }
8441 
8442 static int allocate_trace_buffers(struct trace_array *tr, unsigned long size)
8443 {
8444 	int ret;
8445 
8446 	ret = allocate_trace_buffer(tr, &tr->array_buffer, size);
8447 	if (ret)
8448 		return ret;
8449 
8450 	ret = trace_allocate_snapshot(tr, size);
8451 	if (MEM_FAIL(ret, "Failed to allocate trace buffer\n"))
8452 		free_trace_buffer(&tr->array_buffer);
8453 
8454 	return ret;
8455 }
8456 
8457 static void free_trace_buffers(struct trace_array *tr)
8458 {
8459 	if (!tr)
8460 		return;
8461 
8462 	free_trace_buffer(&tr->array_buffer);
8463 	kfree(tr->module_delta);
8464 
8465 #ifdef CONFIG_TRACER_SNAPSHOT
8466 	free_trace_buffer(&tr->snapshot_buffer);
8467 #endif
8468 }
8469 
8470 static void init_trace_flags_index(struct trace_array *tr)
8471 {
8472 	int i;
8473 
8474 	/* Used by the trace options files */
8475 	for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++)
8476 		tr->trace_flags_index[i] = i;
8477 }
8478 
8479 static int __update_tracer(struct trace_array *tr)
8480 {
8481 	struct tracer *t;
8482 	int ret = 0;
8483 
8484 	for (t = trace_types; t && !ret; t = t->next)
8485 		ret = add_tracer(tr, t);
8486 
8487 	return ret;
8488 }
8489 
8490 static __init int __update_tracer_options(struct trace_array *tr)
8491 {
8492 	struct tracers *t;
8493 	int ret = 0;
8494 
8495 	list_for_each_entry(t, &tr->tracers, list) {
8496 		ret = add_tracer_options(tr, t);
8497 		if (ret < 0)
8498 			break;
8499 	}
8500 
8501 	return ret;
8502 }
8503 
8504 static __init void update_tracer_options(void)
8505 {
8506 	struct trace_array *tr;
8507 
8508 	guard(mutex)(&trace_types_lock);
8509 	tracer_options_updated = true;
8510 	list_for_each_entry(tr, &ftrace_trace_arrays, list)
8511 		__update_tracer_options(tr);
8512 }
8513 
8514 /* Must have trace_types_lock held */
8515 struct trace_array *trace_array_find(const char *instance)
8516 {
8517 	struct trace_array *tr, *found = NULL;
8518 
8519 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
8520 		if (tr->name && strcmp(tr->name, instance) == 0) {
8521 			found = tr;
8522 			break;
8523 		}
8524 	}
8525 
8526 	return found;
8527 }
8528 
8529 struct trace_array *trace_array_find_get(const char *instance)
8530 {
8531 	struct trace_array *tr;
8532 
8533 	guard(mutex)(&trace_types_lock);
8534 	tr = trace_array_find(instance);
8535 	if (tr && __trace_array_get(tr) < 0)
8536 		tr = NULL;
8537 
8538 	return tr;
8539 }
8540 
8541 static int trace_array_create_dir(struct trace_array *tr)
8542 {
8543 	int ret;
8544 
8545 	tr->dir = tracefs_create_dir(tr->name, trace_instance_dir);
8546 	if (!tr->dir)
8547 		return -EINVAL;
8548 
8549 	ret = event_trace_add_tracer(tr->dir, tr);
8550 	if (ret) {
8551 		tracefs_remove(tr->dir);
8552 		return ret;
8553 	}
8554 
8555 	init_tracer_tracefs(tr, tr->dir);
8556 	ret = __update_tracer(tr);
8557 	if (ret) {
8558 		event_trace_del_tracer(tr);
8559 		tracefs_remove(tr->dir);
8560 		return ret;
8561 	}
8562 	return 0;
8563 }
8564 
8565 static struct trace_array *
8566 trace_array_create_systems(const char *name, const char *systems,
8567 			   unsigned long range_addr_start,
8568 			   unsigned long range_addr_size)
8569 {
8570 	struct trace_array *tr;
8571 	int ret;
8572 
8573 	ret = -ENOMEM;
8574 	tr = kzalloc_obj(*tr);
8575 	if (!tr)
8576 		return ERR_PTR(ret);
8577 
8578 	tr->name = kstrdup(name, GFP_KERNEL);
8579 	if (!tr->name)
8580 		goto out_free_tr;
8581 
8582 	if (!alloc_cpumask_var(&tr->tracing_cpumask, GFP_KERNEL))
8583 		goto out_free_tr;
8584 
8585 	if (!zalloc_cpumask_var(&tr->pipe_cpumask, GFP_KERNEL))
8586 		goto out_free_tr;
8587 
8588 	if (systems) {
8589 		tr->system_names = kstrdup_const(systems, GFP_KERNEL);
8590 		if (!tr->system_names)
8591 			goto out_free_tr;
8592 	}
8593 
8594 	/* Only for boot up memory mapped ring buffers */
8595 	tr->range_addr_start = range_addr_start;
8596 	tr->range_addr_size = range_addr_size;
8597 
8598 	tr->trace_flags = global_trace.trace_flags & ~ZEROED_TRACE_FLAGS;
8599 
8600 	cpumask_copy(tr->tracing_cpumask, cpu_all_mask);
8601 
8602 	raw_spin_lock_init(&tr->start_lock);
8603 
8604 	tr->syscall_buf_sz = global_trace.syscall_buf_sz;
8605 
8606 	tr->max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
8607 #ifdef CONFIG_TRACER_SNAPSHOT
8608 	spin_lock_init(&tr->snapshot_trigger_lock);
8609 #endif
8610 	tr->current_trace = &nop_trace;
8611 	tr->current_trace_flags = nop_trace.flags;
8612 
8613 	INIT_LIST_HEAD(&tr->systems);
8614 	INIT_LIST_HEAD(&tr->events);
8615 	INIT_LIST_HEAD(&tr->hist_vars);
8616 	INIT_LIST_HEAD(&tr->err_log);
8617 	INIT_LIST_HEAD(&tr->tracers);
8618 	INIT_LIST_HEAD(&tr->marker_list);
8619 
8620 #ifdef CONFIG_MODULES
8621 	INIT_LIST_HEAD(&tr->mod_events);
8622 #endif
8623 
8624 	if (allocate_trace_buffers(tr, trace_buf_size) < 0)
8625 		goto out_free_tr;
8626 
8627 	/* The ring buffer is defaultly expanded */
8628 	trace_set_ring_buffer_expanded(tr);
8629 
8630 	if (ftrace_allocate_ftrace_ops(tr) < 0)
8631 		goto out_free_tr;
8632 
8633 	trace_array_init_autoremove(tr);
8634 
8635 	ftrace_init_trace_array(tr);
8636 
8637 	init_trace_flags_index(tr);
8638 
8639 	if (trace_instance_dir) {
8640 		ret = trace_array_create_dir(tr);
8641 		if (ret)
8642 			goto out_free_tr;
8643 	} else
8644 		__trace_early_add_events(tr);
8645 
8646 	list_add(&tr->list, &ftrace_trace_arrays);
8647 
8648 	tr->ref++;
8649 
8650 	return tr;
8651 
8652  out_free_tr:
8653 	ftrace_free_ftrace_ops(tr);
8654 	free_trace_buffers(tr);
8655 	free_cpumask_var(tr->pipe_cpumask);
8656 	free_cpumask_var(tr->tracing_cpumask);
8657 	kfree_const(tr->system_names);
8658 	kfree(tr->range_name);
8659 	kfree(tr->name);
8660 	kfree(tr);
8661 
8662 	return ERR_PTR(ret);
8663 }
8664 
8665 static struct trace_array *trace_array_create(const char *name)
8666 {
8667 	return trace_array_create_systems(name, NULL, 0, 0);
8668 }
8669 
8670 static int instance_mkdir(const char *name)
8671 {
8672 	struct trace_array *tr;
8673 	int ret;
8674 
8675 	guard(mutex)(&event_mutex);
8676 	guard(mutex)(&trace_types_lock);
8677 
8678 	ret = -EEXIST;
8679 	if (trace_array_find(name))
8680 		return -EEXIST;
8681 
8682 	tr = trace_array_create(name);
8683 
8684 	ret = PTR_ERR_OR_ZERO(tr);
8685 
8686 	return ret;
8687 }
8688 
8689 #ifdef CONFIG_MMU
8690 static u64 map_pages(unsigned long start, unsigned long size)
8691 {
8692 	unsigned long vmap_start, vmap_end;
8693 	struct vm_struct *area;
8694 	int ret;
8695 
8696 	area = get_vm_area(size, VM_IOREMAP);
8697 	if (!area)
8698 		return 0;
8699 
8700 	vmap_start = (unsigned long) area->addr;
8701 	vmap_end = vmap_start + size;
8702 
8703 	ret = vmap_page_range(vmap_start, vmap_end,
8704 			      start, pgprot_nx(PAGE_KERNEL));
8705 	if (ret < 0) {
8706 		free_vm_area(area);
8707 		return 0;
8708 	}
8709 
8710 	return (u64)vmap_start;
8711 }
8712 #else
8713 static inline u64 map_pages(unsigned long start, unsigned long size)
8714 {
8715 	return 0;
8716 }
8717 #endif
8718 
8719 /**
8720  * trace_array_get_by_name - Create/Lookup a trace array, given its name.
8721  * @name: The name of the trace array to be looked up/created.
8722  * @systems: A list of systems to create event directories for (NULL for all)
8723  *
8724  * Returns pointer to trace array with given name.
8725  * NULL, if it cannot be created.
8726  *
8727  * NOTE: This function increments the reference counter associated with the
8728  * trace array returned. This makes sure it cannot be freed while in use.
8729  * Use trace_array_put() once the trace array is no longer needed.
8730  * If the trace_array is to be freed, trace_array_destroy() needs to
8731  * be called after the trace_array_put(), or simply let user space delete
8732  * it from the tracefs instances directory. But until the
8733  * trace_array_put() is called, user space can not delete it.
8734  *
8735  */
8736 struct trace_array *trace_array_get_by_name(const char *name, const char *systems)
8737 {
8738 	struct trace_array *tr;
8739 
8740 	guard(mutex)(&event_mutex);
8741 	guard(mutex)(&trace_types_lock);
8742 
8743 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
8744 		if (tr->name && strcmp(tr->name, name) == 0) {
8745 			/* if this fails, @tr is going to be removed. */
8746 			if (__trace_array_get(tr) < 0)
8747 				tr = NULL;
8748 			return tr;
8749 		}
8750 	}
8751 
8752 	tr = trace_array_create_systems(name, systems, 0, 0);
8753 
8754 	if (IS_ERR(tr))
8755 		tr = NULL;
8756 	else
8757 		tr->ref++;
8758 
8759 	return tr;
8760 }
8761 EXPORT_SYMBOL_GPL(trace_array_get_by_name);
8762 
8763 static int __remove_instance(struct trace_array *tr)
8764 {
8765 	int i;
8766 
8767 	/* Reference counter for a newly created trace array = 1. */
8768 	if (tr->ref > 1 || (tr->current_trace && tr->trace_ref))
8769 		return -EBUSY;
8770 
8771 	list_del(&tr->list);
8772 
8773 	if (printk_trace == tr)
8774 		update_printk_trace(&global_trace);
8775 
8776 	/* Must be done before disabling all the flags */
8777 	if (update_marker_trace(tr, 0))
8778 		synchronize_rcu();
8779 
8780 	/* Disable all the flags that were enabled coming in */
8781 	for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++) {
8782 		if ((1ULL << i) & ZEROED_TRACE_FLAGS)
8783 			set_tracer_flag(tr, 1ULL << i, 0);
8784 	}
8785 
8786 	trace_array_cancel_autoremove(tr);
8787 	tracing_set_nop(tr);
8788 	clear_ftrace_function_probes(tr);
8789 	event_trace_del_tracer(tr);
8790 	ftrace_clear_pids(tr);
8791 	ftrace_destroy_function_files(tr);
8792 	tracefs_remove(tr->dir);
8793 	free_percpu(tr->last_func_repeats);
8794 	free_trace_buffers(tr);
8795 	clear_tracing_err_log(tr);
8796 	free_tracers(tr);
8797 
8798 	if (tr->range_name) {
8799 		reserve_mem_release_by_name(tr->range_name);
8800 		kfree(tr->range_name);
8801 	}
8802 	if (tr->flags & TRACE_ARRAY_FL_VMALLOC)
8803 		vfree((void *)tr->range_addr_start);
8804 
8805 	for (i = 0; i < tr->nr_topts; i++) {
8806 		kfree(tr->topts[i].topts);
8807 	}
8808 	kfree(tr->topts);
8809 
8810 	free_cpumask_var(tr->pipe_cpumask);
8811 	free_cpumask_var(tr->tracing_cpumask);
8812 	kfree_const(tr->system_names);
8813 	kfree(tr->name);
8814 	kfree(tr);
8815 
8816 	return 0;
8817 }
8818 
8819 int trace_array_destroy(struct trace_array *this_tr)
8820 {
8821 	struct trace_array *tr;
8822 
8823 	if (!this_tr)
8824 		return -EINVAL;
8825 
8826 	guard(mutex)(&event_mutex);
8827 	guard(mutex)(&trace_types_lock);
8828 
8829 
8830 	/* Making sure trace array exists before destroying it. */
8831 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
8832 		if (tr == this_tr)
8833 			return __remove_instance(tr);
8834 	}
8835 
8836 	return -ENODEV;
8837 }
8838 EXPORT_SYMBOL_GPL(trace_array_destroy);
8839 
8840 static int instance_rmdir(const char *name)
8841 {
8842 	struct trace_array *tr;
8843 
8844 	guard(mutex)(&event_mutex);
8845 	guard(mutex)(&trace_types_lock);
8846 
8847 	tr = trace_array_find(name);
8848 	if (!tr)
8849 		return -ENODEV;
8850 
8851 	return __remove_instance(tr);
8852 }
8853 
8854 static __init void create_trace_instances(struct dentry *d_tracer)
8855 {
8856 	struct trace_array *tr;
8857 
8858 	trace_instance_dir = tracefs_create_instance_dir("instances", d_tracer,
8859 							 instance_mkdir,
8860 							 instance_rmdir);
8861 	if (MEM_FAIL(!trace_instance_dir, "Failed to create instances directory\n"))
8862 		return;
8863 
8864 	guard(mutex)(&event_mutex);
8865 	guard(mutex)(&trace_types_lock);
8866 
8867 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
8868 		if (!tr->name)
8869 			continue;
8870 		if (MEM_FAIL(trace_array_create_dir(tr) < 0,
8871 			     "Failed to create instance directory\n"))
8872 			return;
8873 	}
8874 }
8875 
8876 static void
8877 init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer)
8878 {
8879 	umode_t writable_mode = TRACE_MODE_WRITE;
8880 	int cpu;
8881 
8882 	if (trace_array_is_readonly(tr))
8883 		writable_mode = TRACE_MODE_READ;
8884 
8885 	trace_create_file("available_tracers", TRACE_MODE_READ, d_tracer,
8886 			  tr, &show_traces_fops);
8887 
8888 	trace_create_file("current_tracer", writable_mode, d_tracer,
8889 			  tr, &set_tracer_fops);
8890 
8891 	trace_create_file("tracing_cpumask", writable_mode, d_tracer,
8892 			  tr, &tracing_cpumask_fops);
8893 
8894 	/* Options are used for changing print-format even for readonly instance. */
8895 	trace_create_file("trace_options", TRACE_MODE_WRITE, d_tracer,
8896 			  tr, &tracing_iter_fops);
8897 
8898 	trace_create_file("trace", TRACE_MODE_WRITE, d_tracer,
8899 			  tr, &tracing_fops);
8900 
8901 	trace_create_file("trace_pipe", TRACE_MODE_READ, d_tracer,
8902 			  tr, &tracing_pipe_fops);
8903 
8904 	trace_create_file("buffer_size_kb", writable_mode, d_tracer,
8905 			  tr, &tracing_entries_fops);
8906 
8907 	trace_create_file("buffer_total_size_kb", TRACE_MODE_READ, d_tracer,
8908 			  tr, &tracing_total_entries_fops);
8909 
8910 	trace_create_file("trace_clock", writable_mode, d_tracer, tr,
8911 			  &trace_clock_fops);
8912 
8913 	trace_create_file("timestamp_mode", TRACE_MODE_READ, d_tracer, tr,
8914 			  &trace_time_stamp_mode_fops);
8915 
8916 	tr->buffer_percent = 50;
8917 
8918 	trace_create_file("buffer_subbuf_size_kb", writable_mode, d_tracer,
8919 			  tr, &buffer_subbuf_size_fops);
8920 
8921 	create_trace_options_dir(tr);
8922 
8923 	if (tr->range_addr_start)
8924 		trace_create_file("last_boot_info", TRACE_MODE_READ, d_tracer,
8925 				  tr, &last_boot_fops);
8926 
8927 	for_each_tracing_cpu(cpu)
8928 		tracing_init_tracefs_percpu(tr, cpu);
8929 
8930 	/* Read-only instance has above files only. */
8931 	if (trace_array_is_readonly(tr))
8932 		return;
8933 
8934 	trace_create_file("free_buffer", 0200, d_tracer,
8935 			  tr, &tracing_free_buffer_fops);
8936 
8937 	trace_create_file("trace_marker", 0220, d_tracer,
8938 			  tr, &tracing_mark_fops);
8939 
8940 	tr->trace_marker_file = __find_event_file(tr, "ftrace", "print");
8941 
8942 	trace_create_file("trace_marker_raw", 0220, d_tracer,
8943 			  tr, &tracing_mark_raw_fops);
8944 
8945 	trace_create_file("buffer_percent", TRACE_MODE_WRITE, d_tracer,
8946 			  tr, &buffer_percent_fops);
8947 
8948 	trace_create_file("syscall_user_buf_size", TRACE_MODE_WRITE, d_tracer,
8949 			  tr, &tracing_syscall_buf_fops);
8950 
8951 	trace_create_file("tracing_on", TRACE_MODE_WRITE, d_tracer,
8952 			  tr, &rb_simple_fops);
8953 
8954 	trace_create_maxlat_file(tr, d_tracer);
8955 
8956 	if (ftrace_create_function_files(tr, d_tracer))
8957 		MEM_FAIL(1, "Could not allocate function filter files");
8958 
8959 #ifdef CONFIG_TRACER_SNAPSHOT
8960 	if (!tr->range_addr_start)
8961 		trace_create_file("snapshot", TRACE_MODE_WRITE, d_tracer,
8962 				  tr, &snapshot_fops);
8963 #endif
8964 
8965 	trace_create_file("error_log", TRACE_MODE_WRITE, d_tracer,
8966 			  tr, &tracing_err_log_fops);
8967 
8968 	ftrace_init_tracefs(tr, d_tracer);
8969 }
8970 
8971 #ifdef CONFIG_TRACEFS_AUTOMOUNT_DEPRECATED
8972 static struct vfsmount *trace_automount(struct dentry *mntpt, void *ingore)
8973 {
8974 	struct vfsmount *mnt;
8975 	struct file_system_type *type;
8976 	struct fs_context *fc;
8977 	int ret;
8978 
8979 	/*
8980 	 * To maintain backward compatibility for tools that mount
8981 	 * debugfs to get to the tracing facility, tracefs is automatically
8982 	 * mounted to the debugfs/tracing directory.
8983 	 */
8984 	type = get_fs_type("tracefs");
8985 	if (!type)
8986 		return NULL;
8987 
8988 	fc = fs_context_for_submount(type, mntpt);
8989 	put_filesystem(type);
8990 	if (IS_ERR(fc))
8991 		return ERR_CAST(fc);
8992 
8993 	pr_warn("NOTICE: Automounting of tracing to debugfs is deprecated and will be removed in 2030\n");
8994 
8995 	ret = vfs_parse_fs_string(fc, "source", "tracefs");
8996 	if (!ret)
8997 		mnt = fc_mount(fc);
8998 	else
8999 		mnt = ERR_PTR(ret);
9000 
9001 	put_fs_context(fc);
9002 	return mnt;
9003 }
9004 #endif
9005 
9006 /**
9007  * tracing_init_dentry - initialize top level trace array
9008  *
9009  * This is called when creating files or directories in the tracing
9010  * directory. It is called via fs_initcall() by any of the boot up code
9011  * and expects to return the dentry of the top level tracing directory.
9012  */
9013 int tracing_init_dentry(void)
9014 {
9015 	struct trace_array *tr = &global_trace;
9016 
9017 	if (security_locked_down(LOCKDOWN_TRACEFS)) {
9018 		pr_warn("Tracing disabled due to lockdown\n");
9019 		return -EPERM;
9020 	}
9021 
9022 	/* The top level trace array uses  NULL as parent */
9023 	if (tr->dir)
9024 		return 0;
9025 
9026 	if (WARN_ON(!tracefs_initialized()))
9027 		return -ENODEV;
9028 
9029 #ifdef CONFIG_TRACEFS_AUTOMOUNT_DEPRECATED
9030 	/*
9031 	 * As there may still be users that expect the tracing
9032 	 * files to exist in debugfs/tracing, we must automount
9033 	 * the tracefs file system there, so older tools still
9034 	 * work with the newer kernel.
9035 	 */
9036 	tr->dir = debugfs_create_automount("tracing", NULL,
9037 					   trace_automount, NULL);
9038 #endif
9039 
9040 	return 0;
9041 }
9042 
9043 extern struct trace_eval_map *__start_ftrace_eval_maps[];
9044 extern struct trace_eval_map *__stop_ftrace_eval_maps[];
9045 
9046 struct workqueue_struct *trace_init_wq __initdata;
9047 static struct work_struct eval_map_work __initdata;
9048 static struct work_struct tracerfs_init_work __initdata;
9049 
9050 static void __init eval_map_work_func(struct work_struct *work)
9051 {
9052 	int len;
9053 
9054 	len = __stop_ftrace_eval_maps - __start_ftrace_eval_maps;
9055 	trace_event_update_with_eval_map(NULL, __start_ftrace_eval_maps, len);
9056 }
9057 
9058 static int __init trace_eval_init(void)
9059 {
9060 	INIT_WORK(&eval_map_work, eval_map_work_func);
9061 
9062 	trace_init_wq = alloc_workqueue("trace_init_wq", WQ_UNBOUND, 0);
9063 	if (!trace_init_wq) {
9064 		pr_err("Unable to allocate trace_init_wq\n");
9065 		/* Do work here */
9066 		eval_map_work_func(&eval_map_work);
9067 		return -ENOMEM;
9068 	}
9069 
9070 	queue_work(trace_init_wq, &eval_map_work);
9071 	return 0;
9072 }
9073 
9074 subsys_initcall(trace_eval_init);
9075 
9076 static int __init trace_eval_sync(void)
9077 {
9078 	/* Make sure the eval map updates are finished */
9079 	if (trace_init_wq)
9080 		destroy_workqueue(trace_init_wq);
9081 	return 0;
9082 }
9083 
9084 late_initcall_sync(trace_eval_sync);
9085 
9086 
9087 #ifdef CONFIG_MODULES
9088 
9089 bool module_exists(const char *module)
9090 {
9091 	/* All modules have the symbol __this_module */
9092 	static const char this_mod[] = "__this_module";
9093 	char modname[MODULE_NAME_LEN + sizeof(this_mod) + 2];
9094 	unsigned long val;
9095 	int n;
9096 
9097 	n = snprintf(modname, sizeof(modname), "%s:%s", module, this_mod);
9098 
9099 	if (n > sizeof(modname) - 1)
9100 		return false;
9101 
9102 	val = module_kallsyms_lookup_name(modname);
9103 	return val != 0;
9104 }
9105 
9106 static void trace_module_add_evals(struct module *mod)
9107 {
9108 	/*
9109 	 * Modules with bad taint do not have events created, do
9110 	 * not bother with enums either.
9111 	 */
9112 	if (trace_module_has_bad_taint(mod))
9113 		return;
9114 
9115 	/* Even if no trace_evals, this need to sanitize field types. */
9116 	trace_event_update_with_eval_map(mod, mod->trace_evals, mod->num_trace_evals);
9117 }
9118 
9119 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
9120 static void trace_module_remove_evals(struct module *mod)
9121 {
9122 	union trace_eval_map_item *map;
9123 	union trace_eval_map_item **last = &trace_eval_maps;
9124 
9125 	if (!mod->num_trace_evals)
9126 		return;
9127 
9128 	guard(mutex)(&trace_eval_mutex);
9129 
9130 	map = trace_eval_maps;
9131 
9132 	while (map) {
9133 		if (map->head.mod == mod)
9134 			break;
9135 		map = trace_eval_jmp_to_tail(map);
9136 		last = &map->tail.next;
9137 		map = map->tail.next;
9138 	}
9139 	if (!map)
9140 		return;
9141 
9142 	*last = trace_eval_jmp_to_tail(map)->tail.next;
9143 	kfree(map);
9144 }
9145 #else
9146 static inline void trace_module_remove_evals(struct module *mod) { }
9147 #endif /* CONFIG_TRACE_EVAL_MAP_FILE */
9148 
9149 static void trace_module_record(struct module *mod, bool add)
9150 {
9151 	struct trace_array *tr;
9152 	unsigned long flags;
9153 
9154 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
9155 		flags = tr->flags & (TRACE_ARRAY_FL_BOOT | TRACE_ARRAY_FL_LAST_BOOT);
9156 		/* Update any persistent trace array that has already been started */
9157 		if (flags == TRACE_ARRAY_FL_BOOT && add) {
9158 			guard(mutex)(&scratch_mutex);
9159 			save_mod(mod, tr);
9160 		} else if (flags & TRACE_ARRAY_FL_LAST_BOOT) {
9161 			/* Update delta if the module loaded in previous boot */
9162 			make_mod_delta(mod, tr);
9163 		}
9164 	}
9165 }
9166 
9167 static int trace_module_notify(struct notifier_block *self,
9168 			       unsigned long val, void *data)
9169 {
9170 	struct module *mod = data;
9171 
9172 	switch (val) {
9173 	case MODULE_STATE_COMING:
9174 		trace_module_add_evals(mod);
9175 		trace_module_record(mod, true);
9176 		break;
9177 	case MODULE_STATE_GOING:
9178 		trace_module_remove_evals(mod);
9179 		trace_module_record(mod, false);
9180 		break;
9181 	}
9182 
9183 	return NOTIFY_OK;
9184 }
9185 
9186 static struct notifier_block trace_module_nb = {
9187 	.notifier_call = trace_module_notify,
9188 	.priority = 0,
9189 };
9190 #endif /* CONFIG_MODULES */
9191 
9192 static __init void tracer_init_tracefs_work_func(struct work_struct *work)
9193 {
9194 
9195 	event_trace_init();
9196 
9197 	init_tracer_tracefs(&global_trace, NULL);
9198 	ftrace_init_tracefs_toplevel(&global_trace, NULL);
9199 
9200 	trace_create_file("tracing_thresh", TRACE_MODE_WRITE, NULL,
9201 			&global_trace, &tracing_thresh_fops);
9202 
9203 	trace_create_file("README", TRACE_MODE_READ, NULL,
9204 			NULL, &tracing_readme_fops);
9205 
9206 	trace_create_file("saved_cmdlines", TRACE_MODE_READ, NULL,
9207 			NULL, &tracing_saved_cmdlines_fops);
9208 
9209 	trace_create_file("saved_cmdlines_size", TRACE_MODE_WRITE, NULL,
9210 			  NULL, &tracing_saved_cmdlines_size_fops);
9211 
9212 	trace_create_file("saved_tgids", TRACE_MODE_READ, NULL,
9213 			NULL, &tracing_saved_tgids_fops);
9214 
9215 	trace_create_eval_file(NULL);
9216 
9217 #ifdef CONFIG_MODULES
9218 	register_module_notifier(&trace_module_nb);
9219 #endif
9220 
9221 #ifdef CONFIG_DYNAMIC_FTRACE
9222 	trace_create_file("dyn_ftrace_total_info", TRACE_MODE_READ, NULL,
9223 			NULL, &tracing_dyn_info_fops);
9224 #endif
9225 
9226 	create_trace_instances(NULL);
9227 
9228 	update_tracer_options();
9229 }
9230 
9231 static __init int tracer_init_tracefs(void)
9232 {
9233 	int ret;
9234 
9235 	trace_access_lock_init();
9236 
9237 	ret = tracing_init_dentry();
9238 	if (ret)
9239 		return 0;
9240 
9241 	if (trace_init_wq) {
9242 		INIT_WORK(&tracerfs_init_work, tracer_init_tracefs_work_func);
9243 		queue_work(trace_init_wq, &tracerfs_init_work);
9244 	} else {
9245 		tracer_init_tracefs_work_func(NULL);
9246 	}
9247 
9248 	if (rv_init_interface())
9249 		pr_err("RV: Error while creating the RV interface\n");
9250 
9251 	return 0;
9252 }
9253 
9254 fs_initcall(tracer_init_tracefs);
9255 
9256 static int trace_die_panic_handler(struct notifier_block *self,
9257 				unsigned long ev, void *unused);
9258 
9259 static struct notifier_block trace_panic_notifier = {
9260 	.notifier_call = trace_die_panic_handler,
9261 	.priority = INT_MAX - 1,
9262 };
9263 
9264 static struct notifier_block trace_die_notifier = {
9265 	.notifier_call = trace_die_panic_handler,
9266 	.priority = INT_MAX - 1,
9267 };
9268 
9269 /*
9270  * The idea is to execute the following die/panic callback early, in order
9271  * to avoid showing irrelevant information in the trace (like other panic
9272  * notifier functions); we are the 2nd to run, after hung_task/rcu_stall
9273  * warnings get disabled (to prevent potential log flooding).
9274  */
9275 static int trace_die_panic_handler(struct notifier_block *self,
9276 				unsigned long ev, void *unused)
9277 {
9278 	if (!ftrace_dump_on_oops_enabled())
9279 		return NOTIFY_DONE;
9280 
9281 	/* The die notifier requires DIE_OOPS to trigger */
9282 	if (self == &trace_die_notifier && ev != DIE_OOPS)
9283 		return NOTIFY_DONE;
9284 
9285 	ftrace_dump(DUMP_PARAM);
9286 
9287 	return NOTIFY_DONE;
9288 }
9289 
9290 /*
9291  * printk is set to max of 1024, we really don't need it that big.
9292  * Nothing should be printing 1000 characters anyway.
9293  */
9294 #define TRACE_MAX_PRINT		1000
9295 
9296 /*
9297  * Define here KERN_TRACE so that we have one place to modify
9298  * it if we decide to change what log level the ftrace dump
9299  * should be at.
9300  */
9301 #define KERN_TRACE		KERN_EMERG
9302 
9303 void
9304 trace_printk_seq(struct trace_seq *s)
9305 {
9306 	/* Probably should print a warning here. */
9307 	if (s->seq.len >= TRACE_MAX_PRINT)
9308 		s->seq.len = TRACE_MAX_PRINT;
9309 
9310 	/*
9311 	 * More paranoid code. Although the buffer size is set to
9312 	 * PAGE_SIZE, and TRACE_MAX_PRINT is 1000, this is just
9313 	 * an extra layer of protection.
9314 	 */
9315 	if (WARN_ON_ONCE(s->seq.len >= s->seq.size))
9316 		s->seq.len = s->seq.size - 1;
9317 
9318 	/* should be zero ended, but we are paranoid. */
9319 	s->buffer[s->seq.len] = 0;
9320 
9321 	printk(KERN_TRACE "%s", s->buffer);
9322 
9323 	trace_seq_init(s);
9324 }
9325 
9326 static void trace_init_iter(struct trace_iterator *iter, struct trace_array *tr)
9327 {
9328 	iter->tr = tr;
9329 	iter->trace = iter->tr->current_trace;
9330 	iter->cpu_file = RING_BUFFER_ALL_CPUS;
9331 	iter->array_buffer = &tr->array_buffer;
9332 
9333 	if (iter->trace && iter->trace->open)
9334 		iter->trace->open(iter);
9335 
9336 	/* Annotate start of buffers if we had overruns */
9337 	if (ring_buffer_overruns(iter->array_buffer->buffer))
9338 		iter->iter_flags |= TRACE_FILE_ANNOTATE;
9339 
9340 	/* Output in nanoseconds only if we are using a clock in nanoseconds. */
9341 	if (trace_clocks[iter->tr->clock_id].in_ns)
9342 		iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
9343 
9344 	/* Can not use kmalloc for iter.temp and iter.fmt */
9345 	iter->temp = static_temp_buf;
9346 	iter->temp_size = STATIC_TEMP_BUF_SIZE;
9347 	iter->fmt = static_fmt_buf;
9348 	iter->fmt_size = STATIC_FMT_BUF_SIZE;
9349 }
9350 
9351 void trace_init_global_iter(struct trace_iterator *iter)
9352 {
9353 	trace_init_iter(iter, &global_trace);
9354 }
9355 
9356 static void ftrace_dump_one(struct trace_array *tr, enum ftrace_dump_mode dump_mode)
9357 {
9358 	/* use static because iter can be a bit big for the stack */
9359 	static struct trace_iterator iter;
9360 	unsigned int old_userobj;
9361 	unsigned long flags;
9362 	int cnt = 0;
9363 
9364 	/*
9365 	 * Always turn off tracing when we dump.
9366 	 * We don't need to show trace output of what happens
9367 	 * between multiple crashes.
9368 	 *
9369 	 * If the user does a sysrq-z, then they can re-enable
9370 	 * tracing with echo 1 > tracing_on.
9371 	 */
9372 	tracer_tracing_off(tr);
9373 
9374 	local_irq_save(flags);
9375 
9376 	/* Simulate the iterator */
9377 	trace_init_iter(&iter, tr);
9378 
9379 	/* While dumping, do not allow the buffer to be enable */
9380 	tracer_tracing_disable(tr);
9381 
9382 	old_userobj = tr->trace_flags & TRACE_ITER(SYM_USEROBJ);
9383 
9384 	/* don't look at user memory in panic mode */
9385 	tr->trace_flags &= ~TRACE_ITER(SYM_USEROBJ);
9386 
9387 	if (dump_mode == DUMP_ORIG)
9388 		iter.cpu_file = raw_smp_processor_id();
9389 	else
9390 		iter.cpu_file = RING_BUFFER_ALL_CPUS;
9391 
9392 	if (tr == &global_trace)
9393 		printk(KERN_TRACE "Dumping ftrace buffer:\n");
9394 	else
9395 		printk(KERN_TRACE "Dumping ftrace instance %s buffer:\n", tr->name);
9396 
9397 	/* Did function tracer already get disabled? */
9398 	if (ftrace_is_dead()) {
9399 		printk("# WARNING: FUNCTION TRACING IS CORRUPTED\n");
9400 		printk("#          MAY BE MISSING FUNCTION EVENTS\n");
9401 	}
9402 
9403 	/*
9404 	 * We need to stop all tracing on all CPUS to read
9405 	 * the next buffer. This is a bit expensive, but is
9406 	 * not done often. We fill all what we can read,
9407 	 * and then release the locks again.
9408 	 */
9409 
9410 	while (!trace_empty(&iter)) {
9411 
9412 		if (!cnt)
9413 			printk(KERN_TRACE "---------------------------------\n");
9414 
9415 		cnt++;
9416 
9417 		trace_iterator_reset(&iter);
9418 		iter.iter_flags |= TRACE_FILE_LAT_FMT;
9419 
9420 		if (trace_find_next_entry_inc(&iter) != NULL) {
9421 			int ret;
9422 
9423 			ret = print_trace_line(&iter);
9424 			if (ret != TRACE_TYPE_NO_CONSUME)
9425 				trace_consume(&iter);
9426 
9427 			trace_printk_seq(&iter.seq);
9428 		}
9429 		touch_nmi_watchdog();
9430 	}
9431 
9432 	if (!cnt)
9433 		printk(KERN_TRACE "   (ftrace buffer empty)\n");
9434 	else
9435 		printk(KERN_TRACE "---------------------------------\n");
9436 
9437 	tr->trace_flags |= old_userobj;
9438 
9439 	tracer_tracing_enable(tr);
9440 	local_irq_restore(flags);
9441 }
9442 
9443 static void ftrace_dump_by_param(void)
9444 {
9445 	bool first_param = true;
9446 	char dump_param[MAX_TRACER_SIZE];
9447 	char *buf, *token, *inst_name;
9448 	struct trace_array *tr;
9449 
9450 	strscpy(dump_param, ftrace_dump_on_oops, MAX_TRACER_SIZE);
9451 	buf = dump_param;
9452 
9453 	while ((token = strsep(&buf, ",")) != NULL) {
9454 		if (first_param) {
9455 			first_param = false;
9456 			if (!strcmp("0", token))
9457 				continue;
9458 			else if (!strcmp("1", token)) {
9459 				ftrace_dump_one(&global_trace, DUMP_ALL);
9460 				continue;
9461 			}
9462 			else if (!strcmp("2", token) ||
9463 			  !strcmp("orig_cpu", token)) {
9464 				ftrace_dump_one(&global_trace, DUMP_ORIG);
9465 				continue;
9466 			}
9467 		}
9468 
9469 		inst_name = strsep(&token, "=");
9470 		tr = trace_array_find(inst_name);
9471 		if (!tr) {
9472 			printk(KERN_TRACE "Instance %s not found\n", inst_name);
9473 			continue;
9474 		}
9475 
9476 		if (token && (!strcmp("2", token) ||
9477 			  !strcmp("orig_cpu", token)))
9478 			ftrace_dump_one(tr, DUMP_ORIG);
9479 		else
9480 			ftrace_dump_one(tr, DUMP_ALL);
9481 	}
9482 }
9483 
9484 void ftrace_dump(enum ftrace_dump_mode oops_dump_mode)
9485 {
9486 	static atomic_t dump_running;
9487 
9488 	/* Only allow one dump user at a time. */
9489 	if (atomic_inc_return(&dump_running) != 1) {
9490 		atomic_dec(&dump_running);
9491 		return;
9492 	}
9493 
9494 	switch (oops_dump_mode) {
9495 	case DUMP_ALL:
9496 		ftrace_dump_one(&global_trace, DUMP_ALL);
9497 		break;
9498 	case DUMP_ORIG:
9499 		ftrace_dump_one(&global_trace, DUMP_ORIG);
9500 		break;
9501 	case DUMP_PARAM:
9502 		ftrace_dump_by_param();
9503 		break;
9504 	case DUMP_NONE:
9505 		break;
9506 	default:
9507 		printk(KERN_TRACE "Bad dumping mode, switching to all CPUs dump\n");
9508 		ftrace_dump_one(&global_trace, DUMP_ALL);
9509 	}
9510 
9511 	atomic_dec(&dump_running);
9512 }
9513 EXPORT_SYMBOL_GPL(ftrace_dump);
9514 
9515 #define WRITE_BUFSIZE  4096
9516 
9517 ssize_t trace_parse_run_command(struct file *file, const char __user *buffer,
9518 				size_t count, loff_t *ppos,
9519 				int (*createfn)(const char *))
9520 {
9521 	char *kbuf __free(kfree) = NULL;
9522 	char *buf, *tmp;
9523 	int ret = 0;
9524 	size_t done = 0;
9525 	size_t size;
9526 
9527 	kbuf = kmalloc(WRITE_BUFSIZE, GFP_KERNEL);
9528 	if (!kbuf)
9529 		return -ENOMEM;
9530 
9531 	while (done < count) {
9532 		size = count - done;
9533 
9534 		if (size >= WRITE_BUFSIZE)
9535 			size = WRITE_BUFSIZE - 1;
9536 
9537 		if (copy_from_user(kbuf, buffer + done, size))
9538 			return -EFAULT;
9539 
9540 		kbuf[size] = '\0';
9541 		buf = kbuf;
9542 		do {
9543 			tmp = strchr(buf, '\n');
9544 			if (tmp) {
9545 				*tmp = '\0';
9546 				size = tmp - buf + 1;
9547 			} else {
9548 				size = strlen(buf);
9549 				if (done + size < count) {
9550 					if (buf != kbuf)
9551 						break;
9552 					/* This can accept WRITE_BUFSIZE - 2 ('\n' + '\0') */
9553 					pr_warn("Line length is too long: Should be less than %d\n",
9554 						WRITE_BUFSIZE - 2);
9555 					return -EINVAL;
9556 				}
9557 			}
9558 			done += size;
9559 
9560 			/* Remove comments */
9561 			tmp = strchr(buf, '#');
9562 
9563 			if (tmp)
9564 				*tmp = '\0';
9565 
9566 			ret = createfn(buf);
9567 			if (ret)
9568 				return ret;
9569 			buf += size;
9570 
9571 		} while (done < count);
9572 	}
9573 	return done;
9574 }
9575 
9576 __init static int backup_instance_area(const char *backup,
9577 				       unsigned long *addr, phys_addr_t *size)
9578 {
9579 	struct trace_array *backup_tr;
9580 	void *allocated_vaddr = NULL;
9581 
9582 	backup_tr = trace_array_get_by_name(backup, NULL);
9583 	if (!backup_tr) {
9584 		pr_warn("Tracing: Instance %s is not found.\n", backup);
9585 		return -ENOENT;
9586 	}
9587 
9588 	if (!(backup_tr->flags & TRACE_ARRAY_FL_BOOT)) {
9589 		pr_warn("Tracing: Instance %s is not boot mapped.\n", backup);
9590 		trace_array_put(backup_tr);
9591 		return -EINVAL;
9592 	}
9593 
9594 	*size = backup_tr->range_addr_size;
9595 
9596 	allocated_vaddr = vzalloc(*size);
9597 	if (!allocated_vaddr) {
9598 		pr_warn("Tracing: Failed to allocate memory for copying instance %s (size 0x%lx)\n",
9599 			backup, (unsigned long)*size);
9600 		trace_array_put(backup_tr);
9601 		return -ENOMEM;
9602 	}
9603 
9604 	memcpy(allocated_vaddr,
9605 		(void *)backup_tr->range_addr_start, (size_t)*size);
9606 	*addr = (unsigned long)allocated_vaddr;
9607 
9608 	trace_array_put(backup_tr);
9609 	return 0;
9610 }
9611 
9612 __init static void enable_instances(void)
9613 {
9614 	struct trace_array *tr;
9615 	bool memmap_area = false;
9616 	char *curr_str;
9617 	char *name;
9618 	char *str;
9619 	char *tok;
9620 
9621 	/* A tab is always appended */
9622 	boot_instance_info[boot_instance_index - 1] = '\0';
9623 	str = boot_instance_info;
9624 
9625 	while ((curr_str = strsep(&str, "\t"))) {
9626 		phys_addr_t start = 0;
9627 		phys_addr_t size = 0;
9628 		unsigned long addr = 0;
9629 		bool traceprintk = false;
9630 		bool traceoff = false;
9631 		char *flag_delim;
9632 		char *addr_delim;
9633 		char *rname __free(kfree) = NULL;
9634 		char *backup;
9635 
9636 		tok = strsep(&curr_str, ",");
9637 
9638 		name = strsep(&tok, "=");
9639 		backup = tok;
9640 
9641 		flag_delim = strchr(name, '^');
9642 		addr_delim = strchr(name, '@');
9643 
9644 		if (addr_delim)
9645 			*addr_delim++ = '\0';
9646 
9647 		if (flag_delim)
9648 			*flag_delim++ = '\0';
9649 
9650 		if (backup) {
9651 			if (backup_instance_area(backup, &addr, &size) < 0)
9652 				continue;
9653 		}
9654 
9655 		if (flag_delim) {
9656 			char *flag;
9657 
9658 			while ((flag = strsep(&flag_delim, "^"))) {
9659 				if (strcmp(flag, "traceoff") == 0) {
9660 					traceoff = true;
9661 				} else if ((strcmp(flag, "printk") == 0) ||
9662 					   (strcmp(flag, "traceprintk") == 0) ||
9663 					   (strcmp(flag, "trace_printk") == 0)) {
9664 					traceprintk = true;
9665 				} else {
9666 					pr_info("Tracing: Invalid instance flag '%s' for %s\n",
9667 						flag, name);
9668 				}
9669 			}
9670 		}
9671 
9672 		tok = addr_delim;
9673 		if (tok && isdigit(*tok)) {
9674 			start = memparse(tok, &tok);
9675 			if (!start) {
9676 				pr_warn("Tracing: Invalid boot instance address for %s\n",
9677 					name);
9678 				continue;
9679 			}
9680 			if (*tok != ':') {
9681 				pr_warn("Tracing: No size specified for instance %s\n", name);
9682 				continue;
9683 			}
9684 			tok++;
9685 			size = memparse(tok, &tok);
9686 			if (!size) {
9687 				pr_warn("Tracing: Invalid boot instance size for %s\n",
9688 					name);
9689 				continue;
9690 			}
9691 			memmap_area = true;
9692 		} else if (tok) {
9693 			if (!reserve_mem_find_by_name(tok, &start, &size)) {
9694 				start = 0;
9695 				pr_warn("Failed to map boot instance %s to %s\n", name, tok);
9696 				continue;
9697 			}
9698 			rname = kstrdup(tok, GFP_KERNEL);
9699 		}
9700 
9701 		if (start) {
9702 			/* Start and size must be page aligned */
9703 			if (start & ~PAGE_MASK) {
9704 				pr_warn("Tracing: mapping start addr %pa is not page aligned\n", &start);
9705 				continue;
9706 			}
9707 			if (size & ~PAGE_MASK) {
9708 				pr_warn("Tracing: mapping size %pa is not page aligned\n", &size);
9709 				continue;
9710 			}
9711 
9712 			if (memmap_area)
9713 				addr = map_pages(start, size);
9714 			else
9715 				addr = (unsigned long)phys_to_virt(start);
9716 			if (addr) {
9717 				pr_info("Tracing: mapped boot instance %s at physical memory %pa of size 0x%lx\n",
9718 					name, &start, (unsigned long)size);
9719 			} else {
9720 				pr_warn("Tracing: Failed to map boot instance %s\n", name);
9721 				continue;
9722 			}
9723 		} else {
9724 			/* Only non mapped buffers have snapshot buffers */
9725 			do_allocate_snapshot(name);
9726 		}
9727 
9728 		tr = trace_array_create_systems(name, NULL, addr, size);
9729 		if (IS_ERR(tr)) {
9730 			pr_warn("Tracing: Failed to create instance buffer %s\n", curr_str);
9731 			continue;
9732 		}
9733 
9734 		if (traceoff)
9735 			tracer_tracing_off(tr);
9736 
9737 		if (traceprintk)
9738 			update_printk_trace(tr);
9739 
9740 		/*
9741 		 * memmap'd buffers can not be freed.
9742 		 */
9743 		if (memmap_area) {
9744 			tr->flags |= TRACE_ARRAY_FL_MEMMAP;
9745 			tr->ref++;
9746 		}
9747 
9748 		/*
9749 		 * Backup buffers can be freed but need vfree().
9750 		 */
9751 		if (backup) {
9752 			tr->flags |= TRACE_ARRAY_FL_VMALLOC | TRACE_ARRAY_FL_RDONLY;
9753 			trace_array_start_autoremove();
9754 		}
9755 
9756 		if (start || backup) {
9757 			tr->flags |= TRACE_ARRAY_FL_BOOT | TRACE_ARRAY_FL_LAST_BOOT;
9758 			tr->range_name = no_free_ptr(rname);
9759 		}
9760 
9761 		/*
9762 		 * Save the events to start and enabled them after all boot instances
9763 		 * have been created.
9764 		 */
9765 		tr->boot_events = curr_str;
9766 	}
9767 
9768 	/* Enable the events after all boot instances have been created */
9769 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
9770 
9771 		if (!tr->boot_events || !(*tr->boot_events)) {
9772 			tr->boot_events = NULL;
9773 			continue;
9774 		}
9775 
9776 		curr_str = tr->boot_events;
9777 
9778 		/* Clear the instance if this is a persistent buffer */
9779 		if (tr->flags & TRACE_ARRAY_FL_LAST_BOOT)
9780 			update_last_data(tr);
9781 
9782 		while ((tok = strsep(&curr_str, ","))) {
9783 			early_enable_events(tr, tok, true);
9784 		}
9785 		tr->boot_events = NULL;
9786 	}
9787 }
9788 
9789 __init static int tracer_alloc_buffers(void)
9790 {
9791 	unsigned long ring_buf_size;
9792 	int ret = -ENOMEM;
9793 
9794 
9795 	if (security_locked_down(LOCKDOWN_TRACEFS)) {
9796 		pr_warn("Tracing disabled due to lockdown\n");
9797 		return -EPERM;
9798 	}
9799 
9800 	/*
9801 	 * Make sure we don't accidentally add more trace options
9802 	 * than we have bits for.
9803 	 */
9804 	BUILD_BUG_ON(TRACE_ITER_LAST_BIT > TRACE_FLAGS_MAX_SIZE);
9805 
9806 	if (!alloc_cpumask_var(&tracing_buffer_mask, GFP_KERNEL))
9807 		return -ENOMEM;
9808 
9809 	if (!alloc_cpumask_var(&global_trace.tracing_cpumask, GFP_KERNEL))
9810 		goto out_free_buffer_mask;
9811 
9812 	/* Only allocate trace_printk buffers if a trace_printk exists */
9813 	if (&__stop___trace_bprintk_fmt != &__start___trace_bprintk_fmt)
9814 		/* Must be called before global_trace.buffer is allocated */
9815 		trace_printk_init_buffers();
9816 
9817 	/* To save memory, keep the ring buffer size to its minimum */
9818 	if (global_trace.ring_buffer_expanded)
9819 		ring_buf_size = trace_buf_size;
9820 	else
9821 		ring_buf_size = 1;
9822 
9823 	cpumask_copy(tracing_buffer_mask, cpu_possible_mask);
9824 	cpumask_copy(global_trace.tracing_cpumask, cpu_all_mask);
9825 
9826 	raw_spin_lock_init(&global_trace.start_lock);
9827 
9828 	/*
9829 	 * The prepare callbacks allocates some memory for the ring buffer. We
9830 	 * don't free the buffer if the CPU goes down. If we were to free
9831 	 * the buffer, then the user would lose any trace that was in the
9832 	 * buffer. The memory will be removed once the "instance" is removed.
9833 	 */
9834 	ret = cpuhp_setup_state_multi(CPUHP_TRACE_RB_PREPARE,
9835 				      "trace/RB:prepare", trace_rb_cpu_prepare,
9836 				      NULL);
9837 	if (ret < 0)
9838 		goto out_free_cpumask;
9839 	/* Used for event triggers */
9840 	ret = -ENOMEM;
9841 	temp_buffer = ring_buffer_alloc(PAGE_SIZE, RB_FL_OVERWRITE);
9842 	if (!temp_buffer)
9843 		goto out_rm_hp_state;
9844 
9845 	if (trace_create_savedcmd() < 0)
9846 		goto out_free_temp_buffer;
9847 
9848 	if (!zalloc_cpumask_var(&global_trace.pipe_cpumask, GFP_KERNEL))
9849 		goto out_free_savedcmd;
9850 
9851 	/* TODO: make the number of buffers hot pluggable with CPUS */
9852 	if (allocate_trace_buffers(&global_trace, ring_buf_size) < 0) {
9853 		MEM_FAIL(1, "tracer: failed to allocate ring buffer!\n");
9854 		goto out_free_pipe_cpumask;
9855 	}
9856 	if (global_trace.buffer_disabled)
9857 		tracing_off();
9858 
9859 	if (trace_boot_clock) {
9860 		ret = tracing_set_clock(&global_trace, trace_boot_clock);
9861 		if (ret < 0)
9862 			pr_warn("Trace clock %s not defined, going back to default\n",
9863 				trace_boot_clock);
9864 	}
9865 
9866 	/*
9867 	 * register_tracer() might reference current_trace, so it
9868 	 * needs to be set before we register anything. This is
9869 	 * just a bootstrap of current_trace anyway.
9870 	 */
9871 	global_trace.current_trace = &nop_trace;
9872 	global_trace.current_trace_flags = nop_trace.flags;
9873 
9874 	global_trace.max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
9875 #ifdef CONFIG_TRACER_SNAPSHOT
9876 	spin_lock_init(&global_trace.snapshot_trigger_lock);
9877 #endif
9878 	ftrace_init_global_array_ops(&global_trace);
9879 
9880 #ifdef CONFIG_MODULES
9881 	INIT_LIST_HEAD(&global_trace.mod_events);
9882 #endif
9883 
9884 	init_trace_flags_index(&global_trace);
9885 
9886 	INIT_LIST_HEAD(&global_trace.tracers);
9887 
9888 	/* All seems OK, enable tracing */
9889 	tracing_disabled = 0;
9890 
9891 	atomic_notifier_chain_register(&panic_notifier_list,
9892 				       &trace_panic_notifier);
9893 
9894 	register_die_notifier(&trace_die_notifier);
9895 
9896 	global_trace.flags = TRACE_ARRAY_FL_GLOBAL;
9897 
9898 	global_trace.syscall_buf_sz = syscall_buf_size;
9899 
9900 	INIT_LIST_HEAD(&global_trace.systems);
9901 	INIT_LIST_HEAD(&global_trace.events);
9902 	INIT_LIST_HEAD(&global_trace.hist_vars);
9903 	INIT_LIST_HEAD(&global_trace.err_log);
9904 	list_add(&global_trace.marker_list, &marker_copies);
9905 	list_add(&global_trace.list, &ftrace_trace_arrays);
9906 
9907 	register_tracer(&nop_trace);
9908 
9909 	/* Function tracing may start here (via kernel command line) */
9910 	init_function_trace();
9911 
9912 	apply_trace_boot_options();
9913 
9914 	register_snapshot_cmd();
9915 
9916 	return 0;
9917 
9918 out_free_pipe_cpumask:
9919 	free_cpumask_var(global_trace.pipe_cpumask);
9920 out_free_savedcmd:
9921 	trace_free_saved_cmdlines_buffer();
9922 out_free_temp_buffer:
9923 	ring_buffer_free(temp_buffer);
9924 out_rm_hp_state:
9925 	cpuhp_remove_multi_state(CPUHP_TRACE_RB_PREPARE);
9926 out_free_cpumask:
9927 	free_cpumask_var(global_trace.tracing_cpumask);
9928 out_free_buffer_mask:
9929 	free_cpumask_var(tracing_buffer_mask);
9930 	return ret;
9931 }
9932 
9933 #ifdef CONFIG_FUNCTION_TRACER
9934 /* Used to set module cached ftrace filtering at boot up */
9935 struct trace_array *trace_get_global_array(void)
9936 {
9937 	return &global_trace;
9938 }
9939 #endif
9940 
9941 void __init early_trace_init(void)
9942 {
9943 	if (tracepoint_printk) {
9944 		tracepoint_print_iter = kzalloc_obj(*tracepoint_print_iter);
9945 		if (MEM_FAIL(!tracepoint_print_iter,
9946 			     "Failed to allocate trace iterator\n"))
9947 			tracepoint_printk = 0;
9948 		else
9949 			static_key_enable(&tracepoint_printk_key.key);
9950 	}
9951 	tracer_alloc_buffers();
9952 
9953 	init_events();
9954 }
9955 
9956 void __init trace_init(void)
9957 {
9958 	trace_event_init();
9959 
9960 	if (boot_instance_index)
9961 		enable_instances();
9962 }
9963 
9964 __init static void clear_boot_tracer(void)
9965 {
9966 	/*
9967 	 * The default tracer at boot buffer is an init section.
9968 	 * This function is called in lateinit. If we did not
9969 	 * find the boot tracer, then clear it out, to prevent
9970 	 * later registration from accessing the buffer that is
9971 	 * about to be freed.
9972 	 */
9973 	if (!default_bootup_tracer)
9974 		return;
9975 
9976 	printk(KERN_INFO "ftrace bootup tracer '%s' not registered.\n",
9977 	       default_bootup_tracer);
9978 	default_bootup_tracer = NULL;
9979 }
9980 
9981 #ifdef CONFIG_HAVE_UNSTABLE_SCHED_CLOCK
9982 __init static void tracing_set_default_clock(void)
9983 {
9984 	/* sched_clock_stable() is determined in late_initcall */
9985 	if (!trace_boot_clock && !sched_clock_stable()) {
9986 		if (security_locked_down(LOCKDOWN_TRACEFS)) {
9987 			pr_warn("Can not set tracing clock due to lockdown\n");
9988 			return;
9989 		}
9990 
9991 		printk(KERN_WARNING
9992 		       "Unstable clock detected, switching default tracing clock to \"global\"\n"
9993 		       "If you want to keep using the local clock, then add:\n"
9994 		       "  \"trace_clock=local\"\n"
9995 		       "on the kernel command line\n");
9996 		tracing_set_clock(&global_trace, "global");
9997 	}
9998 }
9999 #else
10000 static inline void tracing_set_default_clock(void) { }
10001 #endif
10002 
10003 __init static int late_trace_init(void)
10004 {
10005 	if (tracepoint_printk && tracepoint_printk_stop_on_boot) {
10006 		static_key_disable(&tracepoint_printk_key.key);
10007 		tracepoint_printk = 0;
10008 	}
10009 
10010 	if (traceoff_after_boot)
10011 		tracing_off();
10012 
10013 	tracing_set_default_clock();
10014 	clear_boot_tracer();
10015 	return 0;
10016 }
10017 
10018 late_initcall_sync(late_trace_init);
10019