xref: /linux/kernel/trace/trace.c (revision cb30bf881c5b4ee8b879558a2fce93d7de652955)
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * ring buffer based function tracer
4  *
5  * Copyright (C) 2007-2012 Steven Rostedt <srostedt@redhat.com>
6  * Copyright (C) 2008 Ingo Molnar <mingo@redhat.com>
7  *
8  * Originally taken from the RT patch by:
9  *    Arnaldo Carvalho de Melo <acme@redhat.com>
10  *
11  * Based on code from the latency_tracer, that is:
12  *  Copyright (C) 2004-2006 Ingo Molnar
13  *  Copyright (C) 2004 Nadia Yvette Chambers
14  */
15 #include <linux/ring_buffer.h>
16 #include <linux/utsname.h>
17 #include <linux/stacktrace.h>
18 #include <linux/writeback.h>
19 #include <linux/kallsyms.h>
20 #include <linux/security.h>
21 #include <linux/seq_file.h>
22 #include <linux/irqflags.h>
23 #include <linux/syscalls.h>
24 #include <linux/debugfs.h>
25 #include <linux/tracefs.h>
26 #include <linux/pagemap.h>
27 #include <linux/hardirq.h>
28 #include <linux/linkage.h>
29 #include <linux/uaccess.h>
30 #include <linux/cleanup.h>
31 #include <linux/vmalloc.h>
32 #include <linux/ftrace.h>
33 #include <linux/module.h>
34 #include <linux/percpu.h>
35 #include <linux/splice.h>
36 #include <linux/kdebug.h>
37 #include <linux/string.h>
38 #include <linux/mount.h>
39 #include <linux/rwsem.h>
40 #include <linux/slab.h>
41 #include <linux/ctype.h>
42 #include <linux/init.h>
43 #include <linux/panic_notifier.h>
44 #include <linux/poll.h>
45 #include <linux/nmi.h>
46 #include <linux/fs.h>
47 #include <linux/trace.h>
48 #include <linux/sched/clock.h>
49 #include <linux/sched/rt.h>
50 #include <linux/irq_work.h>
51 #include <linux/workqueue.h>
52 #include <linux/sort.h>
53 #include <linux/io.h> /* vmap_page_range() */
54 #include <linux/fs_context.h>
55 
56 #include <asm/setup.h> /* COMMAND_LINE_SIZE */
57 
58 #include "trace.h"
59 #include "trace_output.h"
60 
61 #ifdef CONFIG_FTRACE_STARTUP_TEST
62 /*
63  * We need to change this state when a selftest is running.
64  * A selftest will lurk into the ring-buffer to count the
65  * entries inserted during the selftest although some concurrent
66  * insertions into the ring-buffer such as trace_printk could occurred
67  * at the same time, giving false positive or negative results.
68  */
69 bool __read_mostly tracing_selftest_running;
70 
71 /*
72  * If boot-time tracing including tracers/events via kernel cmdline
73  * is running, we do not want to run SELFTEST.
74  */
75 bool __read_mostly tracing_selftest_disabled;
76 
77 void __init disable_tracing_selftest(const char *reason)
78 {
79 	if (!tracing_selftest_disabled) {
80 		tracing_selftest_disabled = true;
81 		pr_info("Ftrace startup test is disabled due to %s\n", reason);
82 	}
83 }
84 #else
85 #define tracing_selftest_disabled	0
86 #endif
87 
88 /* Pipe tracepoints to printk */
89 static struct trace_iterator *tracepoint_print_iter;
90 int tracepoint_printk;
91 static bool tracepoint_printk_stop_on_boot __initdata;
92 static bool traceoff_after_boot __initdata;
93 static DEFINE_STATIC_KEY_FALSE(tracepoint_printk_key);
94 
95 /* Store tracers and their flags per instance */
96 struct tracers {
97 	struct list_head	list;
98 	struct tracer		*tracer;
99 	struct tracer_flags	*flags;
100 };
101 
102 /*
103  * To prevent the comm cache from being overwritten when no
104  * tracing is active, only save the comm when a trace event
105  * occurred.
106  */
107 DEFINE_PER_CPU(bool, trace_taskinfo_save);
108 
109 /*
110  * Kill all tracing for good (never come back).
111  * It is initialized to 1 but will turn to zero if the initialization
112  * of the tracer is successful. But that is the only place that sets
113  * this back to zero.
114  */
115 int tracing_disabled = 1;
116 
117 cpumask_var_t __read_mostly	tracing_buffer_mask;
118 
119 #define MAX_TRACER_SIZE		100
120 /*
121  * ftrace_dump_on_oops - variable to dump ftrace buffer on oops
122  *
123  * If there is an oops (or kernel panic) and the ftrace_dump_on_oops
124  * is set, then ftrace_dump is called. This will output the contents
125  * of the ftrace buffers to the console.  This is very useful for
126  * capturing traces that lead to crashes and outputting it to a
127  * serial console.
128  *
129  * It is default off, but you can enable it with either specifying
130  * "ftrace_dump_on_oops" in the kernel command line, or setting
131  * /proc/sys/kernel/ftrace_dump_on_oops
132  * Set 1 if you want to dump buffers of all CPUs
133  * Set 2 if you want to dump the buffer of the CPU that triggered oops
134  * Set instance name if you want to dump the specific trace instance
135  * Multiple instance dump is also supported, and instances are separated
136  * by commas.
137  */
138 /* Set to string format zero to disable by default */
139 static char ftrace_dump_on_oops[MAX_TRACER_SIZE] = "0";
140 
141 /* When set, tracing will stop when a WARN*() is hit */
142 static int __disable_trace_on_warning;
143 
144 int tracepoint_printk_sysctl(const struct ctl_table *table, int write,
145 			     void *buffer, size_t *lenp, loff_t *ppos);
146 static const struct ctl_table trace_sysctl_table[] = {
147 	{
148 		.procname	= "ftrace_dump_on_oops",
149 		.data		= &ftrace_dump_on_oops,
150 		.maxlen		= MAX_TRACER_SIZE,
151 		.mode		= 0644,
152 		.proc_handler	= proc_dostring,
153 	},
154 	{
155 		.procname	= "traceoff_on_warning",
156 		.data		= &__disable_trace_on_warning,
157 		.maxlen		= sizeof(__disable_trace_on_warning),
158 		.mode		= 0644,
159 		.proc_handler	= proc_dointvec,
160 	},
161 	{
162 		.procname	= "tracepoint_printk",
163 		.data		= &tracepoint_printk,
164 		.maxlen		= sizeof(tracepoint_printk),
165 		.mode		= 0644,
166 		.proc_handler	= tracepoint_printk_sysctl,
167 	},
168 };
169 
170 static int __init init_trace_sysctls(void)
171 {
172 	register_sysctl_init("kernel", trace_sysctl_table);
173 	return 0;
174 }
175 subsys_initcall(init_trace_sysctls);
176 
177 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
178 /* Map of enums to their values, for "eval_map" file */
179 struct trace_eval_map_head {
180 	struct module			*mod;
181 	unsigned long			length;
182 };
183 
184 union trace_eval_map_item;
185 
186 struct trace_eval_map_tail {
187 	/*
188 	 * "end" is first and points to NULL as it must be different
189 	 * than "mod" or "eval_string"
190 	 */
191 	union trace_eval_map_item	*next;
192 	const char			*end;	/* points to NULL */
193 };
194 
195 static DEFINE_MUTEX(trace_eval_mutex);
196 
197 /*
198  * The trace_eval_maps are saved in an array with two extra elements,
199  * one at the beginning, and one at the end. The beginning item contains
200  * the count of the saved maps (head.length), and the module they
201  * belong to if not built in (head.mod). The ending item contains a
202  * pointer to the next array of saved eval_map items.
203  */
204 union trace_eval_map_item {
205 	struct trace_eval_map		map;
206 	struct trace_eval_map_head	head;
207 	struct trace_eval_map_tail	tail;
208 };
209 
210 static union trace_eval_map_item *trace_eval_maps;
211 #endif /* CONFIG_TRACE_EVAL_MAP_FILE */
212 
213 int tracing_set_tracer(struct trace_array *tr, const char *buf);
214 static void ftrace_trace_userstack(struct trace_array *tr,
215 				   struct trace_buffer *buffer,
216 				   unsigned int trace_ctx);
217 
218 static char bootup_tracer_buf[MAX_TRACER_SIZE] __initdata;
219 static char *default_bootup_tracer;
220 
221 static char boot_instance_info[COMMAND_LINE_SIZE] __initdata;
222 static int boot_instance_index;
223 
224 /*
225  * Repeated boot parameters, including Bootconfig array expansions, need
226  * to stay in the delimiter form that the existing parser consumes.
227  */
228 void __init trace_append_boot_param(char *buf, const char *str, char sep,
229 				    int size)
230 {
231 	int len, needed, str_len;
232 
233 	if (!*str)
234 		return;
235 
236 	len = strlen(buf);
237 	str_len = strlen(str);
238 	needed = len + str_len + 1;
239 
240 	/* For continuation, account for the separator. */
241 	if (len)
242 		needed++;
243 	if (needed > size)
244 		return;
245 
246 	if (len)
247 		buf[len++] = sep;
248 
249 	strscpy(buf + len, str, size - len);
250 }
251 
252 static int __init set_cmdline_ftrace(char *str)
253 {
254 	strscpy(bootup_tracer_buf, str, MAX_TRACER_SIZE);
255 	default_bootup_tracer = bootup_tracer_buf;
256 	/* We are using ftrace early, expand it */
257 	trace_set_ring_buffer_expanded(NULL);
258 	return 1;
259 }
260 __setup("ftrace=", set_cmdline_ftrace);
261 
262 int ftrace_dump_on_oops_enabled(void)
263 {
264 	if (!strcmp("0", ftrace_dump_on_oops))
265 		return 0;
266 	else
267 		return 1;
268 }
269 
270 static int __init set_ftrace_dump_on_oops(char *str)
271 {
272 	if (!*str) {
273 		strscpy(ftrace_dump_on_oops, "1", MAX_TRACER_SIZE);
274 		return 1;
275 	}
276 
277 	if (*str == ',') {
278 		strscpy(ftrace_dump_on_oops, "1", MAX_TRACER_SIZE);
279 		strscpy(ftrace_dump_on_oops + 1, str, MAX_TRACER_SIZE - 1);
280 		return 1;
281 	}
282 
283 	if (*str++ == '=') {
284 		strscpy(ftrace_dump_on_oops, str, MAX_TRACER_SIZE);
285 		return 1;
286 	}
287 
288 	return 0;
289 }
290 __setup("ftrace_dump_on_oops", set_ftrace_dump_on_oops);
291 
292 static int __init stop_trace_on_warning(char *str)
293 {
294 	if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
295 		__disable_trace_on_warning = 1;
296 	return 1;
297 }
298 __setup("traceoff_on_warning", stop_trace_on_warning);
299 
300 static int __init boot_instance(char *str)
301 {
302 	char *slot = boot_instance_info + boot_instance_index;
303 	int left = sizeof(boot_instance_info) - boot_instance_index;
304 	int ret;
305 
306 	if (strlen(str) >= left)
307 		return -1;
308 
309 	ret = snprintf(slot, left, "%s\t", str);
310 	boot_instance_index += ret;
311 
312 	return 1;
313 }
314 __setup("trace_instance=", boot_instance);
315 
316 
317 static char trace_boot_options_buf[MAX_TRACER_SIZE] __initdata;
318 
319 static int __init set_trace_boot_options(char *str)
320 {
321 	trace_append_boot_param(trace_boot_options_buf, str, ',',
322 				MAX_TRACER_SIZE);
323 	return 1;
324 }
325 __setup("trace_options=", set_trace_boot_options);
326 
327 static char trace_boot_clock_buf[MAX_TRACER_SIZE] __initdata;
328 static char *trace_boot_clock __initdata;
329 
330 static int __init set_trace_boot_clock(char *str)
331 {
332 	strscpy(trace_boot_clock_buf, str, MAX_TRACER_SIZE);
333 	trace_boot_clock = trace_boot_clock_buf;
334 	return 1;
335 }
336 __setup("trace_clock=", set_trace_boot_clock);
337 
338 static int __init set_tracepoint_printk(char *str)
339 {
340 	/* Ignore the "tp_printk_stop_on_boot" param */
341 	if (*str == '_')
342 		return 0;
343 
344 	if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
345 		tracepoint_printk = 1;
346 	return 1;
347 }
348 __setup("tp_printk", set_tracepoint_printk);
349 
350 static int __init set_tracepoint_printk_stop(char *str)
351 {
352 	tracepoint_printk_stop_on_boot = true;
353 	return 1;
354 }
355 __setup("tp_printk_stop_on_boot", set_tracepoint_printk_stop);
356 
357 static int __init set_traceoff_after_boot(char *str)
358 {
359 	traceoff_after_boot = true;
360 	return 1;
361 }
362 __setup("traceoff_after_boot", set_traceoff_after_boot);
363 
364 unsigned long long ns2usecs(u64 nsec)
365 {
366 	nsec += 500;
367 	do_div(nsec, 1000);
368 	return nsec;
369 }
370 
371 static void
372 trace_process_export(struct trace_export *export,
373 	       struct ring_buffer_event *event, int flag)
374 {
375 	struct trace_entry *entry;
376 	unsigned int size = 0;
377 
378 	if (export->flags & flag) {
379 		entry = ring_buffer_event_data(event);
380 		size = ring_buffer_event_length(event);
381 		export->write(export, entry, size);
382 	}
383 }
384 
385 static DEFINE_MUTEX(ftrace_export_lock);
386 
387 static struct trace_export __rcu *ftrace_exports_list __read_mostly;
388 
389 static DEFINE_STATIC_KEY_FALSE(trace_function_exports_enabled);
390 static DEFINE_STATIC_KEY_FALSE(trace_event_exports_enabled);
391 static DEFINE_STATIC_KEY_FALSE(trace_marker_exports_enabled);
392 
393 static inline void ftrace_exports_enable(struct trace_export *export)
394 {
395 	if (export->flags & TRACE_EXPORT_FUNCTION)
396 		static_branch_inc(&trace_function_exports_enabled);
397 
398 	if (export->flags & TRACE_EXPORT_EVENT)
399 		static_branch_inc(&trace_event_exports_enabled);
400 
401 	if (export->flags & TRACE_EXPORT_MARKER)
402 		static_branch_inc(&trace_marker_exports_enabled);
403 }
404 
405 static inline void ftrace_exports_disable(struct trace_export *export)
406 {
407 	if (export->flags & TRACE_EXPORT_FUNCTION)
408 		static_branch_dec(&trace_function_exports_enabled);
409 
410 	if (export->flags & TRACE_EXPORT_EVENT)
411 		static_branch_dec(&trace_event_exports_enabled);
412 
413 	if (export->flags & TRACE_EXPORT_MARKER)
414 		static_branch_dec(&trace_marker_exports_enabled);
415 }
416 
417 static void ftrace_exports(struct ring_buffer_event *event, int flag)
418 {
419 	struct trace_export *export;
420 
421 	guard(preempt_notrace)();
422 
423 	export = rcu_dereference_raw_check(ftrace_exports_list);
424 	while (export) {
425 		trace_process_export(export, event, flag);
426 		export = rcu_dereference_raw_check(export->next);
427 	}
428 }
429 
430 static inline void
431 add_trace_export(struct trace_export **list, struct trace_export *export)
432 {
433 	rcu_assign_pointer(export->next, *list);
434 	/*
435 	 * We are entering export into the list but another
436 	 * CPU might be walking that list. We need to make sure
437 	 * the export->next pointer is valid before another CPU sees
438 	 * the export pointer included into the list.
439 	 */
440 	rcu_assign_pointer(*list, export);
441 }
442 
443 static inline int
444 rm_trace_export(struct trace_export **list, struct trace_export *export)
445 {
446 	struct trace_export **p;
447 
448 	for (p = list; *p != NULL; p = &(*p)->next)
449 		if (*p == export)
450 			break;
451 
452 	if (*p != export)
453 		return -1;
454 
455 	rcu_assign_pointer(*p, (*p)->next);
456 
457 	return 0;
458 }
459 
460 static inline void
461 add_ftrace_export(struct trace_export **list, struct trace_export *export)
462 {
463 	ftrace_exports_enable(export);
464 
465 	add_trace_export(list, export);
466 }
467 
468 static inline int
469 rm_ftrace_export(struct trace_export **list, struct trace_export *export)
470 {
471 	int ret;
472 
473 	ret = rm_trace_export(list, export);
474 	ftrace_exports_disable(export);
475 
476 	return ret;
477 }
478 
479 int register_ftrace_export(struct trace_export *export)
480 {
481 	if (WARN_ON_ONCE(!export->write))
482 		return -1;
483 
484 	guard(mutex)(&ftrace_export_lock);
485 
486 	add_ftrace_export(&ftrace_exports_list, export);
487 
488 	return 0;
489 }
490 EXPORT_SYMBOL_GPL(register_ftrace_export);
491 
492 int unregister_ftrace_export(struct trace_export *export)
493 {
494 	guard(mutex)(&ftrace_export_lock);
495 	return rm_ftrace_export(&ftrace_exports_list, export);
496 }
497 EXPORT_SYMBOL_GPL(unregister_ftrace_export);
498 
499 /* trace_flags holds trace_options default values */
500 #define TRACE_DEFAULT_FLAGS						\
501 	(FUNCTION_DEFAULT_FLAGS | FPROFILE_DEFAULT_FLAGS |		\
502 	 TRACE_ITER(PRINT_PARENT) | TRACE_ITER(PRINTK) |		\
503 	 TRACE_ITER(ANNOTATE) | TRACE_ITER(CONTEXT_INFO) |		\
504 	 TRACE_ITER(RECORD_CMD) | TRACE_ITER(OVERWRITE) |		\
505 	 TRACE_ITER(IRQ_INFO) | TRACE_ITER(MARKERS) |			\
506 	 TRACE_ITER(HASH_PTR) | TRACE_ITER(TRACE_PRINTK) |		\
507 	 TRACE_ITER(COPY_MARKER))
508 
509 /* trace_options that are only supported by global_trace */
510 #define TOP_LEVEL_TRACE_FLAGS (TRACE_ITER(PRINTK) |			\
511 	       TRACE_ITER(PRINTK_MSGONLY) | TRACE_ITER(RECORD_CMD) |	\
512 	       TRACE_ITER(PROF_TEXT_OFFSET) | FPROFILE_DEFAULT_FLAGS)
513 
514 /* trace_flags that are default zero for instances */
515 #define ZEROED_TRACE_FLAGS \
516 	(TRACE_ITER(EVENT_FORK) | TRACE_ITER(FUNC_FORK) | TRACE_ITER(TRACE_PRINTK) | \
517 	 TRACE_ITER(COPY_MARKER))
518 
519 /*
520  * The global_trace is the descriptor that holds the top-level tracing
521  * buffers for the live tracing.
522  */
523 static struct trace_array global_trace = {
524 	.trace_flags = TRACE_DEFAULT_FLAGS,
525 };
526 
527 struct trace_array *printk_trace = &global_trace;
528 
529 /* List of trace_arrays interested in the top level trace_marker */
530 static LIST_HEAD(marker_copies);
531 
532 static void update_printk_trace(struct trace_array *tr)
533 {
534 	if (printk_trace == tr)
535 		return;
536 
537 	printk_trace->trace_flags &= ~TRACE_ITER(TRACE_PRINTK);
538 	printk_trace = tr;
539 	tr->trace_flags |= TRACE_ITER(TRACE_PRINTK);
540 }
541 
542 /* Returns true if the status of tr changed */
543 static bool update_marker_trace(struct trace_array *tr, int enabled)
544 {
545 	lockdep_assert_held(&event_mutex);
546 
547 	if (enabled) {
548 		if (tr->trace_flags & TRACE_ITER(COPY_MARKER))
549 			return false;
550 
551 		list_add_rcu(&tr->marker_list, &marker_copies);
552 		tr->trace_flags |= TRACE_ITER(COPY_MARKER);
553 		return true;
554 	}
555 
556 	if (!(tr->trace_flags & TRACE_ITER(COPY_MARKER)))
557 		return false;
558 
559 	list_del_rcu(&tr->marker_list);
560 	tr->trace_flags &= ~TRACE_ITER(COPY_MARKER);
561 	return true;
562 }
563 
564 void trace_set_ring_buffer_expanded(struct trace_array *tr)
565 {
566 	if (!tr)
567 		tr = &global_trace;
568 	tr->ring_buffer_expanded = true;
569 }
570 
571 static void trace_array_autoremove(struct work_struct *work)
572 {
573 	struct trace_array *tr = container_of(work, struct trace_array, autoremove_work);
574 
575 	trace_array_destroy(tr);
576 }
577 
578 static struct workqueue_struct *autoremove_wq;
579 
580 static void trace_array_kick_autoremove(struct trace_array *tr)
581 {
582 	if (autoremove_wq)
583 		queue_work(autoremove_wq, &tr->autoremove_work);
584 }
585 
586 static void trace_array_cancel_autoremove(struct trace_array *tr)
587 {
588 	/*
589 	 * Since this can be called inside trace_array_autoremove(),
590 	 * it has to avoid deadlock of the workqueue.
591 	 */
592 	if (work_pending(&tr->autoremove_work))
593 		cancel_work_sync(&tr->autoremove_work);
594 }
595 
596 static void trace_array_init_autoremove(struct trace_array *tr)
597 {
598 	INIT_WORK(&tr->autoremove_work, trace_array_autoremove);
599 }
600 
601 static void trace_array_start_autoremove(void)
602 {
603 	if (autoremove_wq)
604 		return;
605 
606 	autoremove_wq = alloc_workqueue("tr_autoremove_wq",
607 					WQ_UNBOUND | WQ_HIGHPRI, 0);
608 	if (!autoremove_wq)
609 		pr_warn("Unable to allocate tr_autoremove_wq. autoremove disabled.\n");
610 }
611 
612 LIST_HEAD(ftrace_trace_arrays);
613 
614 static int __trace_array_get(struct trace_array *this_tr)
615 {
616 	/* When free_on_close is set, this is not available anymore. */
617 	if (autoremove_wq && this_tr->free_on_close)
618 		return -ENODEV;
619 
620 	this_tr->ref++;
621 	return 0;
622 }
623 
624 int trace_array_get(struct trace_array *this_tr)
625 {
626 	struct trace_array *tr;
627 
628 	guard(mutex)(&trace_types_lock);
629 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
630 		if (tr == this_tr) {
631 			return __trace_array_get(tr);
632 		}
633 	}
634 
635 	return -ENODEV;
636 }
637 
638 static void __trace_array_put(struct trace_array *this_tr)
639 {
640 	WARN_ON(!this_tr->ref);
641 	this_tr->ref--;
642 	/*
643 	 * When free_on_close is set, prepare removing the array
644 	 * when the last reference is released.
645 	 */
646 	if (this_tr->ref == 1 && this_tr->free_on_close)
647 		trace_array_kick_autoremove(this_tr);
648 }
649 
650 /**
651  * trace_array_put - Decrement the reference counter for this trace array.
652  * @this_tr : pointer to the trace array
653  *
654  * NOTE: Use this when we no longer need the trace array returned by
655  * trace_array_get_by_name(). This ensures the trace array can be later
656  * destroyed.
657  *
658  */
659 void trace_array_put(struct trace_array *this_tr)
660 {
661 	if (!this_tr)
662 		return;
663 
664 	guard(mutex)(&trace_types_lock);
665 	__trace_array_put(this_tr);
666 }
667 EXPORT_SYMBOL_GPL(trace_array_put);
668 
669 int tracing_check_open_get_tr(struct trace_array *tr)
670 {
671 	int ret;
672 
673 	ret = security_locked_down(LOCKDOWN_TRACEFS);
674 	if (ret)
675 		return ret;
676 
677 	if (tracing_disabled)
678 		return -ENODEV;
679 
680 	if (tr && trace_array_get(tr) < 0)
681 		return -ENODEV;
682 
683 	return 0;
684 }
685 
686 static u64 buffer_ftrace_now(struct array_buffer *buf, int cpu)
687 {
688 	u64 ts;
689 
690 	/* Early boot up does not have a buffer yet */
691 	if (!buf->buffer)
692 		return trace_clock_local();
693 
694 	ts = ring_buffer_time_stamp(buf->buffer);
695 	ring_buffer_normalize_time_stamp(buf->buffer, cpu, &ts);
696 
697 	return ts;
698 }
699 
700 u64 ftrace_now(int cpu)
701 {
702 	return buffer_ftrace_now(&global_trace.array_buffer, cpu);
703 }
704 
705 /**
706  * tracing_is_enabled - Show if global_trace has been enabled
707  *
708  * Shows if the global trace has been enabled or not. It uses the
709  * mirror flag "buffer_disabled" to be used in fast paths such as for
710  * the irqsoff tracer. But it may be inaccurate due to races. If you
711  * need to know the accurate state, use tracing_is_on() which is a little
712  * slower, but accurate.
713  */
714 int tracing_is_enabled(void)
715 {
716 	/*
717 	 * For quick access (irqsoff uses this in fast path), just
718 	 * return the mirror variable of the state of the ring buffer.
719 	 * It's a little racy, but we don't really care.
720 	 */
721 	return !global_trace.buffer_disabled;
722 }
723 
724 /*
725  * trace_buf_size is the size in bytes that is allocated
726  * for a buffer. Note, the number of bytes is always rounded
727  * to page size.
728  *
729  * This number is purposely set to a low number of 16384.
730  * If the dump on oops happens, it will be much appreciated
731  * to not have to wait for all that output. Anyway this can be
732  * boot time and run time configurable.
733  */
734 #define TRACE_BUF_SIZE_DEFAULT	1441792UL /* 16384 * 88 (sizeof(entry)) */
735 
736 static unsigned long		trace_buf_size = TRACE_BUF_SIZE_DEFAULT;
737 
738 /* trace_types holds a link list of available tracers. */
739 static struct tracer		*trace_types __read_mostly;
740 
741 /*
742  * trace_types_lock is used to protect the trace_types list.
743  */
744 DEFINE_MUTEX(trace_types_lock);
745 
746 /*
747  * serialize the access of the ring buffer
748  *
749  * ring buffer serializes readers, but it is low level protection.
750  * The validity of the events (which returns by ring_buffer_peek() ..etc)
751  * are not protected by ring buffer.
752  *
753  * The content of events may become garbage if we allow other process consumes
754  * these events concurrently:
755  *   A) the page of the consumed events may become a normal page
756  *      (not reader page) in ring buffer, and this page will be rewritten
757  *      by events producer.
758  *   B) The page of the consumed events may become a page for splice_read,
759  *      and this page will be returned to system.
760  *
761  * These primitives allow multi process access to different cpu ring buffer
762  * concurrently.
763  *
764  * These primitives don't distinguish read-only and read-consume access.
765  * Multi read-only access are also serialized.
766  */
767 
768 #ifdef CONFIG_SMP
769 static DECLARE_RWSEM(all_cpu_access_lock);
770 static DEFINE_PER_CPU(struct mutex, cpu_access_lock);
771 
772 static inline void trace_access_lock(int cpu)
773 {
774 	if (cpu == RING_BUFFER_ALL_CPUS) {
775 		/* gain it for accessing the whole ring buffer. */
776 		down_write(&all_cpu_access_lock);
777 	} else {
778 		/* gain it for accessing a cpu ring buffer. */
779 
780 		/* Firstly block other trace_access_lock(RING_BUFFER_ALL_CPUS). */
781 		down_read(&all_cpu_access_lock);
782 
783 		/* Secondly block other access to this @cpu ring buffer. */
784 		mutex_lock(&per_cpu(cpu_access_lock, cpu));
785 	}
786 }
787 
788 static inline void trace_access_unlock(int cpu)
789 {
790 	if (cpu == RING_BUFFER_ALL_CPUS) {
791 		up_write(&all_cpu_access_lock);
792 	} else {
793 		mutex_unlock(&per_cpu(cpu_access_lock, cpu));
794 		up_read(&all_cpu_access_lock);
795 	}
796 }
797 
798 static inline void trace_access_lock_init(void)
799 {
800 	int cpu;
801 
802 	for_each_possible_cpu(cpu)
803 		mutex_init(&per_cpu(cpu_access_lock, cpu));
804 }
805 
806 #else
807 
808 static DEFINE_MUTEX(access_lock);
809 
810 static inline void trace_access_lock(int cpu)
811 {
812 	(void)cpu;
813 	mutex_lock(&access_lock);
814 }
815 
816 static inline void trace_access_unlock(int cpu)
817 {
818 	(void)cpu;
819 	mutex_unlock(&access_lock);
820 }
821 
822 static inline void trace_access_lock_init(void)
823 {
824 }
825 
826 #endif
827 
828 void tracer_tracing_on(struct trace_array *tr)
829 {
830 	if (tr->array_buffer.buffer)
831 		ring_buffer_record_on(tr->array_buffer.buffer);
832 	/*
833 	 * This flag is looked at when buffers haven't been allocated
834 	 * yet, or by some tracers (like irqsoff), that just want to
835 	 * know if the ring buffer has been disabled, but it can handle
836 	 * races of where it gets disabled but we still do a record.
837 	 * As the check is in the fast path of the tracers, it is more
838 	 * important to be fast than accurate.
839 	 */
840 	tr->buffer_disabled = 0;
841 }
842 
843 /**
844  * tracing_on - enable tracing buffers
845  *
846  * This function enables tracing buffers that may have been
847  * disabled with tracing_off.
848  */
849 void tracing_on(void)
850 {
851 	tracer_tracing_on(&global_trace);
852 }
853 EXPORT_SYMBOL_GPL(tracing_on);
854 
855 #ifdef CONFIG_TRACER_SNAPSHOT
856 /**
857  * tracing_snapshot - take a snapshot of the current buffer.
858  *
859  * This causes a swap between the snapshot buffer and the current live
860  * tracing buffer. You can use this to take snapshots of the live
861  * trace when some condition is triggered, but continue to trace.
862  *
863  * Note, make sure to allocate the snapshot with either
864  * a tracing_snapshot_alloc(), or by doing it manually
865  * with: echo 1 > /sys/kernel/tracing/snapshot
866  *
867  * If the snapshot buffer is not allocated, it will stop tracing.
868  * Basically making a permanent snapshot.
869  */
870 void tracing_snapshot(void)
871 {
872 	struct trace_array *tr = &global_trace;
873 
874 	tracing_snapshot_instance(tr);
875 }
876 EXPORT_SYMBOL_GPL(tracing_snapshot);
877 
878 /**
879  * tracing_alloc_snapshot - allocate snapshot buffer.
880  *
881  * This only allocates the snapshot buffer if it isn't already
882  * allocated - it doesn't also take a snapshot.
883  *
884  * This is meant to be used in cases where the snapshot buffer needs
885  * to be set up for events that can't sleep but need to be able to
886  * trigger a snapshot.
887  */
888 int tracing_alloc_snapshot(void)
889 {
890 	struct trace_array *tr = &global_trace;
891 	int ret;
892 
893 	ret = tracing_alloc_snapshot_instance(tr);
894 	WARN_ON(ret < 0);
895 
896 	return ret;
897 }
898 #else
899 void tracing_snapshot(void)
900 {
901 	WARN_ONCE(1, "Snapshot feature not enabled, but internal snapshot used");
902 }
903 EXPORT_SYMBOL_GPL(tracing_snapshot);
904 void tracing_snapshot_alloc(void)
905 {
906 	/* Give warning */
907 	tracing_snapshot();
908 }
909 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
910 #endif /* CONFIG_TRACER_SNAPSHOT */
911 
912 void tracer_tracing_off(struct trace_array *tr)
913 {
914 	if (tr->array_buffer.buffer)
915 		ring_buffer_record_off(tr->array_buffer.buffer);
916 	/*
917 	 * This flag is looked at when buffers haven't been allocated
918 	 * yet, or by some tracers (like irqsoff), that just want to
919 	 * know if the ring buffer has been disabled, but it can handle
920 	 * races of where it gets disabled but we still do a record.
921 	 * As the check is in the fast path of the tracers, it is more
922 	 * important to be fast than accurate.
923 	 */
924 	tr->buffer_disabled = 1;
925 }
926 
927 /**
928  * tracer_tracing_disable() - temporary disable the buffer from write
929  * @tr: The trace array to disable its buffer for
930  *
931  * Expects trace_tracing_enable() to re-enable tracing.
932  * The difference between this and tracer_tracing_off() is that this
933  * is a counter and can nest, whereas, tracer_tracing_off() can
934  * be called multiple times and a single trace_tracing_on() will
935  * enable it.
936  */
937 void tracer_tracing_disable(struct trace_array *tr)
938 {
939 	if (WARN_ON_ONCE(!tr->array_buffer.buffer))
940 		return;
941 
942 	ring_buffer_record_disable(tr->array_buffer.buffer);
943 }
944 
945 /**
946  * tracer_tracing_enable() - counter part of tracer_tracing_disable()
947  * @tr: The trace array that had tracer_tracincg_disable() called on it
948  *
949  * This is called after tracer_tracing_disable() has been called on @tr,
950  * when it's safe to re-enable tracing.
951  */
952 void tracer_tracing_enable(struct trace_array *tr)
953 {
954 	if (WARN_ON_ONCE(!tr->array_buffer.buffer))
955 		return;
956 
957 	ring_buffer_record_enable(tr->array_buffer.buffer);
958 }
959 
960 /**
961  * tracing_off - turn off tracing buffers
962  *
963  * This function stops the tracing buffers from recording data.
964  * It does not disable any overhead the tracers themselves may
965  * be causing. This function simply causes all recording to
966  * the ring buffers to fail.
967  */
968 void tracing_off(void)
969 {
970 	tracer_tracing_off(&global_trace);
971 }
972 EXPORT_SYMBOL_GPL(tracing_off);
973 
974 void disable_trace_on_warning(void)
975 {
976 	if (__disable_trace_on_warning) {
977 		struct trace_array *tr = READ_ONCE(printk_trace);
978 
979 		trace_array_printk_buf(global_trace.array_buffer.buffer, _THIS_IP_,
980 			"Disabling tracing due to warning\n");
981 		tracing_off();
982 
983 		/* Disable trace_printk() buffer too */
984 		if (tr != &global_trace) {
985 			trace_array_printk_buf(tr->array_buffer.buffer, _THIS_IP_,
986 					       "Disabling tracing due to warning\n");
987 			tracer_tracing_off(tr);
988 		}
989 	}
990 }
991 
992 /**
993  * tracer_tracing_is_on - show real state of ring buffer enabled
994  * @tr : the trace array to know if ring buffer is enabled
995  *
996  * Shows real state of the ring buffer if it is enabled or not.
997  */
998 bool tracer_tracing_is_on(struct trace_array *tr)
999 {
1000 	if (tr->array_buffer.buffer)
1001 		return ring_buffer_record_is_set_on(tr->array_buffer.buffer);
1002 	return !tr->buffer_disabled;
1003 }
1004 
1005 /**
1006  * tracing_is_on - show state of ring buffers enabled
1007  */
1008 int tracing_is_on(void)
1009 {
1010 	return tracer_tracing_is_on(&global_trace);
1011 }
1012 EXPORT_SYMBOL_GPL(tracing_is_on);
1013 
1014 static int __init set_buf_size(char *str)
1015 {
1016 	unsigned long buf_size;
1017 
1018 	if (!str)
1019 		return 0;
1020 	buf_size = memparse(str, &str);
1021 	/*
1022 	 * nr_entries can not be zero and the startup
1023 	 * tests require some buffer space. Therefore
1024 	 * ensure we have at least 4096 bytes of buffer.
1025 	 */
1026 	trace_buf_size = max(4096UL, buf_size);
1027 	return 1;
1028 }
1029 __setup("trace_buf_size=", set_buf_size);
1030 
1031 static int __init set_tracing_thresh(char *str)
1032 {
1033 	unsigned long threshold;
1034 	int ret;
1035 
1036 	if (!str)
1037 		return 0;
1038 	ret = kstrtoul(str, 0, &threshold);
1039 	if (ret < 0)
1040 		return 0;
1041 	tracing_thresh = threshold * 1000;
1042 	return 1;
1043 }
1044 __setup("tracing_thresh=", set_tracing_thresh);
1045 
1046 unsigned long nsecs_to_usecs(unsigned long nsecs)
1047 {
1048 	return nsecs / 1000;
1049 }
1050 
1051 /*
1052  * TRACE_FLAGS is defined as a tuple matching bit masks with strings.
1053  * It uses C(a, b) where 'a' is the eval (enum) name and 'b' is the string that
1054  * matches it. By defining "C(a, b) b", TRACE_FLAGS becomes a list
1055  * of strings in the order that the evals (enum) were defined.
1056  */
1057 #undef C
1058 #define C(a, b) b
1059 
1060 /* These must match the bit positions in trace_iterator_flags */
1061 static const char *trace_options[] = {
1062 	TRACE_FLAGS
1063 	NULL
1064 };
1065 
1066 static struct {
1067 	u64 (*func)(void);
1068 	const char *name;
1069 	int in_ns;		/* is this clock in nanoseconds? */
1070 } trace_clocks[] = {
1071 	{ trace_clock_local,		"local",	1 },
1072 	{ trace_clock_global,		"global",	1 },
1073 	{ trace_clock_counter,		"counter",	0 },
1074 	{ trace_clock_jiffies,		"uptime",	0 },
1075 	{ trace_clock,			"perf",		1 },
1076 	{ ktime_get_mono_fast_ns,	"mono",		1 },
1077 	{ ktime_get_raw_fast_ns,	"mono_raw",	1 },
1078 	{ ktime_get_boot_fast_ns,	"boot",		1 },
1079 	{ ktime_get_tai_fast_ns,	"tai",		1 },
1080 	ARCH_TRACE_CLOCKS
1081 };
1082 
1083 bool trace_clock_in_ns(struct trace_array *tr)
1084 {
1085 	if (trace_clocks[tr->clock_id].in_ns)
1086 		return true;
1087 
1088 	return false;
1089 }
1090 
1091 /*
1092  * trace_parser_get_init - gets the buffer for trace parser
1093  */
1094 int trace_parser_get_init(struct trace_parser *parser, int size)
1095 {
1096 	memset(parser, 0, sizeof(*parser));
1097 
1098 	parser->buffer = kmalloc(size, GFP_KERNEL);
1099 	if (!parser->buffer)
1100 		return 1;
1101 
1102 	parser->size = size;
1103 	return 0;
1104 }
1105 
1106 /*
1107  * trace_parser_put - frees the buffer for trace parser
1108  */
1109 void trace_parser_put(struct trace_parser *parser)
1110 {
1111 	kfree(parser->buffer);
1112 	parser->buffer = NULL;
1113 }
1114 
1115 /*
1116  * trace_get_user - reads the user input string separated by  space
1117  * (matched by isspace(ch))
1118  *
1119  * For each string found the 'struct trace_parser' is updated,
1120  * and the function returns.
1121  *
1122  * Returns number of bytes read.
1123  *
1124  * See kernel/trace/trace.h for 'struct trace_parser' details.
1125  */
1126 int trace_get_user(struct trace_parser *parser, const char __user *ubuf,
1127 	size_t cnt, loff_t *ppos)
1128 {
1129 	char ch;
1130 	size_t read = 0;
1131 	ssize_t ret;
1132 
1133 	if (!*ppos)
1134 		trace_parser_clear(parser);
1135 
1136 	ret = get_user(ch, ubuf++);
1137 	if (ret)
1138 		goto fail;
1139 
1140 	read++;
1141 	cnt--;
1142 
1143 	/*
1144 	 * The parser is not finished with the last write,
1145 	 * continue reading the user input without skipping spaces.
1146 	 */
1147 	if (!parser->cont) {
1148 		/* skip white space */
1149 		while (cnt && isspace(ch)) {
1150 			ret = get_user(ch, ubuf++);
1151 			if (ret)
1152 				goto fail;
1153 			read++;
1154 			cnt--;
1155 		}
1156 
1157 		parser->idx = 0;
1158 
1159 		/* only spaces were written */
1160 		if (isspace(ch) || !ch) {
1161 			*ppos += read;
1162 			return read;
1163 		}
1164 	}
1165 
1166 	/* read the non-space input */
1167 	while (cnt && !isspace(ch) && ch) {
1168 		if (parser->idx < parser->size - 1)
1169 			parser->buffer[parser->idx++] = ch;
1170 		else {
1171 			ret = -EINVAL;
1172 			goto fail;
1173 		}
1174 
1175 		ret = get_user(ch, ubuf++);
1176 		if (ret)
1177 			goto fail;
1178 		read++;
1179 		cnt--;
1180 	}
1181 
1182 	/* We either got finished input or we have to wait for another call. */
1183 	if (isspace(ch) || !ch) {
1184 		parser->buffer[parser->idx] = 0;
1185 		parser->cont = false;
1186 	} else if (parser->idx < parser->size - 1) {
1187 		parser->cont = true;
1188 		parser->buffer[parser->idx++] = ch;
1189 		/* Make sure the parsed string always terminates with '\0'. */
1190 		parser->buffer[parser->idx] = 0;
1191 	} else {
1192 		ret = -EINVAL;
1193 		goto fail;
1194 	}
1195 
1196 	*ppos += read;
1197 	return read;
1198 fail:
1199 	trace_parser_fail(parser);
1200 	return ret;
1201 }
1202 
1203 /* TODO add a seq_buf_to_buffer() */
1204 static ssize_t trace_seq_to_buffer(struct trace_seq *s, void *buf, size_t cnt)
1205 {
1206 	int len;
1207 
1208 	if (trace_seq_used(s) <= s->readpos)
1209 		return -EBUSY;
1210 
1211 	len = trace_seq_used(s) - s->readpos;
1212 	if (cnt > len)
1213 		cnt = len;
1214 	memcpy(buf, s->buffer + s->readpos, cnt);
1215 
1216 	s->readpos += cnt;
1217 	return cnt;
1218 }
1219 
1220 unsigned long __read_mostly	tracing_thresh;
1221 
1222 struct pipe_wait {
1223 	struct trace_iterator		*iter;
1224 	int				wait_index;
1225 };
1226 
1227 static bool wait_pipe_cond(void *data)
1228 {
1229 	struct pipe_wait *pwait = data;
1230 	struct trace_iterator *iter = pwait->iter;
1231 
1232 	if (atomic_read_acquire(&iter->wait_index) != pwait->wait_index)
1233 		return true;
1234 
1235 	return iter->closed;
1236 }
1237 
1238 static int wait_on_pipe(struct trace_iterator *iter, int full)
1239 {
1240 	struct pipe_wait pwait;
1241 	int ret;
1242 
1243 	/* Iterators are static, they should be filled or empty */
1244 	if (trace_buffer_iter(iter, iter->cpu_file))
1245 		return 0;
1246 
1247 	pwait.wait_index = atomic_read_acquire(&iter->wait_index);
1248 	pwait.iter = iter;
1249 
1250 	ret = ring_buffer_wait(iter->array_buffer->buffer, iter->cpu_file, full,
1251 			       wait_pipe_cond, &pwait);
1252 
1253 #ifdef CONFIG_TRACER_SNAPSHOT
1254 	/*
1255 	 * Make sure this is still the snapshot buffer, as if a snapshot were
1256 	 * to happen, this would now be the main buffer.
1257 	 */
1258 	if (iter->snapshot)
1259 		iter->array_buffer = &iter->tr->snapshot_buffer;
1260 #endif
1261 	return ret;
1262 }
1263 
1264 #ifdef CONFIG_FTRACE_STARTUP_TEST
1265 static bool selftests_can_run;
1266 
1267 struct trace_selftests {
1268 	struct list_head		list;
1269 	struct tracer			*type;
1270 };
1271 
1272 static LIST_HEAD(postponed_selftests);
1273 
1274 static int save_selftest(struct tracer *type)
1275 {
1276 	struct trace_selftests *selftest;
1277 
1278 	selftest = kmalloc(sizeof(*selftest), GFP_KERNEL);
1279 	if (!selftest)
1280 		return -ENOMEM;
1281 
1282 	selftest->type = type;
1283 	list_add(&selftest->list, &postponed_selftests);
1284 	return 0;
1285 }
1286 
1287 static int run_tracer_selftest(struct tracer *type)
1288 {
1289 	struct trace_array *tr = &global_trace;
1290 	struct tracer_flags *saved_flags = tr->current_trace_flags;
1291 	struct tracer *saved_tracer = tr->current_trace;
1292 	int ret;
1293 
1294 	if (!type->selftest || tracing_selftest_disabled)
1295 		return 0;
1296 
1297 	/*
1298 	 * If a tracer registers early in boot up (before scheduling is
1299 	 * initialized and such), then do not run its selftests yet.
1300 	 * Instead, run it a little later in the boot process.
1301 	 */
1302 	if (!selftests_can_run)
1303 		return save_selftest(type);
1304 
1305 	if (!tracing_is_on()) {
1306 		pr_warn("Selftest for tracer %s skipped due to tracing disabled\n",
1307 			type->name);
1308 		return 0;
1309 	}
1310 
1311 	/*
1312 	 * Run a selftest on this tracer.
1313 	 * Here we reset the trace buffer, and set the current
1314 	 * tracer to be this tracer. The tracer can then run some
1315 	 * internal tracing to verify that everything is in order.
1316 	 * If we fail, we do not register this tracer.
1317 	 */
1318 	tracing_reset_online_cpus(&tr->array_buffer);
1319 
1320 	tr->current_trace = type;
1321 	tr->current_trace_flags = type->flags ? : type->default_flags;
1322 
1323 #ifdef CONFIG_TRACER_MAX_TRACE
1324 	if (tracer_uses_snapshot(type)) {
1325 		/* If we expanded the buffers, make sure the max is expanded too */
1326 		if (tr->ring_buffer_expanded)
1327 			ring_buffer_resize(tr->snapshot_buffer.buffer, trace_buf_size,
1328 					   RING_BUFFER_ALL_CPUS);
1329 		tr->allocated_snapshot = true;
1330 	}
1331 #endif
1332 
1333 	/* the test is responsible for initializing and enabling */
1334 	pr_info("Testing tracer %s: ", type->name);
1335 	ret = type->selftest(type, tr);
1336 	/* the test is responsible for resetting too */
1337 	tr->current_trace = saved_tracer;
1338 	tr->current_trace_flags = saved_flags;
1339 	if (ret) {
1340 		printk(KERN_CONT "FAILED!\n");
1341 		/* Add the warning after printing 'FAILED' */
1342 		WARN_ON(1);
1343 		return -1;
1344 	}
1345 	/* Only reset on passing, to avoid touching corrupted buffers */
1346 	tracing_reset_online_cpus(&tr->array_buffer);
1347 
1348 #ifdef CONFIG_TRACER_MAX_TRACE
1349 	if (tracer_uses_snapshot(type)) {
1350 		tr->allocated_snapshot = false;
1351 
1352 		/* Shrink the max buffer again */
1353 		if (tr->ring_buffer_expanded)
1354 			ring_buffer_resize(tr->snapshot_buffer.buffer, 1,
1355 					   RING_BUFFER_ALL_CPUS);
1356 	}
1357 #endif
1358 
1359 	printk(KERN_CONT "PASSED\n");
1360 	return 0;
1361 }
1362 
1363 static int do_run_tracer_selftest(struct tracer *type)
1364 {
1365 	int ret;
1366 
1367 	/*
1368 	 * Tests can take a long time, especially if they are run one after the
1369 	 * other, as does happen during bootup when all the tracers are
1370 	 * registered. This could cause the soft lockup watchdog to trigger.
1371 	 */
1372 	cond_resched();
1373 
1374 	tracing_selftest_running = true;
1375 	ret = run_tracer_selftest(type);
1376 	tracing_selftest_running = false;
1377 
1378 	return ret;
1379 }
1380 
1381 static __init int init_trace_selftests(void)
1382 {
1383 	struct trace_selftests *p, *n;
1384 	struct tracer *t, **last;
1385 	int ret;
1386 
1387 	selftests_can_run = true;
1388 
1389 	guard(mutex)(&trace_types_lock);
1390 
1391 	if (list_empty(&postponed_selftests))
1392 		return 0;
1393 
1394 	pr_info("Running postponed tracer tests:\n");
1395 
1396 	tracing_selftest_running = true;
1397 	list_for_each_entry_safe(p, n, &postponed_selftests, list) {
1398 		/* This loop can take minutes when sanitizers are enabled, so
1399 		 * lets make sure we allow RCU processing.
1400 		 */
1401 		cond_resched();
1402 		ret = run_tracer_selftest(p->type);
1403 		/* If the test fails, then warn and remove from available_tracers */
1404 		if (ret < 0) {
1405 			WARN(1, "tracer: %s failed selftest, disabling\n",
1406 			     p->type->name);
1407 			last = &trace_types;
1408 			for (t = trace_types; t; t = t->next) {
1409 				if (t == p->type) {
1410 					*last = t->next;
1411 					break;
1412 				}
1413 				last = &t->next;
1414 			}
1415 		}
1416 		list_del(&p->list);
1417 		kfree(p);
1418 	}
1419 	tracing_selftest_running = false;
1420 
1421 	return 0;
1422 }
1423 core_initcall(init_trace_selftests);
1424 #else
1425 static inline int do_run_tracer_selftest(struct tracer *type)
1426 {
1427 	return 0;
1428 }
1429 #endif /* CONFIG_FTRACE_STARTUP_TEST */
1430 
1431 static int add_tracer(struct trace_array *tr, struct tracer *t);
1432 
1433 static void __init apply_trace_boot_options(void);
1434 
1435 static void free_tracers(struct trace_array *tr)
1436 {
1437 	struct tracers *t, *n;
1438 
1439 	lockdep_assert_held(&trace_types_lock);
1440 
1441 	list_for_each_entry_safe(t, n, &tr->tracers, list) {
1442 		list_del(&t->list);
1443 		kfree(t->flags);
1444 		kfree(t);
1445 	}
1446 }
1447 
1448 /**
1449  * register_tracer - register a tracer with the ftrace system.
1450  * @type: the plugin for the tracer
1451  *
1452  * Register a new plugin tracer.
1453  */
1454 int __init register_tracer(struct tracer *type)
1455 {
1456 	struct trace_array *tr;
1457 	struct tracer *t;
1458 	int ret = 0;
1459 
1460 	if (!type->name) {
1461 		pr_info("Tracer must have a name\n");
1462 		return -1;
1463 	}
1464 
1465 	if (strlen(type->name) >= MAX_TRACER_SIZE) {
1466 		pr_info("Tracer has a name longer than %d\n", MAX_TRACER_SIZE);
1467 		return -1;
1468 	}
1469 
1470 	if (security_locked_down(LOCKDOWN_TRACEFS)) {
1471 		pr_warn("Can not register tracer %s due to lockdown\n",
1472 			   type->name);
1473 		return -EPERM;
1474 	}
1475 
1476 	mutex_lock(&trace_types_lock);
1477 
1478 	for (t = trace_types; t; t = t->next) {
1479 		if (strcmp(type->name, t->name) == 0) {
1480 			/* already found */
1481 			pr_info("Tracer %s already registered\n",
1482 				type->name);
1483 			ret = -1;
1484 			goto out;
1485 		}
1486 	}
1487 
1488 	/* store the tracer for __set_tracer_option */
1489 	if (type->flags)
1490 		type->flags->trace = type;
1491 
1492 	ret = do_run_tracer_selftest(type);
1493 	if (ret < 0)
1494 		goto out;
1495 
1496 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
1497 		ret = add_tracer(tr, type);
1498 		if (ret < 0) {
1499 			/* The tracer will still exist but without options */
1500 			pr_warn("Failed to create tracer options for %s\n", type->name);
1501 			break;
1502 		}
1503 	}
1504 
1505 	type->next = trace_types;
1506 	trace_types = type;
1507 
1508  out:
1509 	mutex_unlock(&trace_types_lock);
1510 
1511 	if (ret || !default_bootup_tracer)
1512 		return ret;
1513 
1514 	if (strncmp(default_bootup_tracer, type->name, MAX_TRACER_SIZE))
1515 		return 0;
1516 
1517 	printk(KERN_INFO "Starting tracer '%s'\n", type->name);
1518 	/* Do we want this tracer to start on bootup? */
1519 	WARN_ON(tracing_set_tracer(&global_trace, type->name) < 0);
1520 	default_bootup_tracer = NULL;
1521 
1522 	apply_trace_boot_options();
1523 
1524 	/* disable other selftests, since this will break it. */
1525 	disable_tracing_selftest("running a tracer");
1526 
1527 	return 0;
1528 }
1529 
1530 void tracing_reset_cpu(struct array_buffer *buf, int cpu)
1531 {
1532 	struct trace_buffer *buffer = buf->buffer;
1533 
1534 	if (!buffer)
1535 		return;
1536 
1537 	ring_buffer_record_disable(buffer);
1538 
1539 	/* Make sure all commits have finished */
1540 	synchronize_rcu();
1541 	ring_buffer_reset_cpu(buffer, cpu);
1542 
1543 	ring_buffer_record_enable(buffer);
1544 }
1545 
1546 void tracing_reset_online_cpus(struct array_buffer *buf)
1547 {
1548 	struct trace_buffer *buffer = buf->buffer;
1549 
1550 	if (!buffer)
1551 		return;
1552 
1553 	ring_buffer_record_disable(buffer);
1554 
1555 	/* Make sure all commits have finished */
1556 	synchronize_rcu();
1557 
1558 	buf->time_start = buffer_ftrace_now(buf, buf->cpu);
1559 
1560 	ring_buffer_reset_online_cpus(buffer);
1561 
1562 	ring_buffer_record_enable(buffer);
1563 }
1564 
1565 static void tracing_reset_all_cpus(struct array_buffer *buf)
1566 {
1567 	struct trace_buffer *buffer = buf->buffer;
1568 
1569 	if (!buffer)
1570 		return;
1571 
1572 	ring_buffer_record_disable(buffer);
1573 
1574 	/* Make sure all commits have finished */
1575 	synchronize_rcu();
1576 
1577 	buf->time_start = buffer_ftrace_now(buf, buf->cpu);
1578 
1579 	ring_buffer_reset(buffer);
1580 
1581 	ring_buffer_record_enable(buffer);
1582 }
1583 
1584 /* Must have trace_types_lock held */
1585 void tracing_reset_all_online_cpus_unlocked(void)
1586 {
1587 	struct trace_array *tr;
1588 
1589 	lockdep_assert_held(&trace_types_lock);
1590 
1591 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
1592 		if (!tr->clear_trace)
1593 			continue;
1594 		tr->clear_trace = false;
1595 		tracing_reset_online_cpus(&tr->array_buffer);
1596 #ifdef CONFIG_TRACER_SNAPSHOT
1597 		tracing_reset_online_cpus(&tr->snapshot_buffer);
1598 #endif
1599 	}
1600 }
1601 
1602 void tracing_reset_all_online_cpus(void)
1603 {
1604 	guard(mutex)(&trace_types_lock);
1605 	tracing_reset_all_online_cpus_unlocked();
1606 }
1607 
1608 int is_tracing_stopped(void)
1609 {
1610 	return global_trace.stop_count;
1611 }
1612 
1613 static void tracing_start_tr(struct trace_array *tr)
1614 {
1615 	struct trace_buffer *buffer;
1616 
1617 	if (tracing_disabled)
1618 		return;
1619 
1620 	guard(raw_spinlock_irqsave)(&tr->start_lock);
1621 	if (--tr->stop_count) {
1622 		if (WARN_ON_ONCE(tr->stop_count < 0)) {
1623 			/* Someone screwed up their debugging */
1624 			tr->stop_count = 0;
1625 		}
1626 		return;
1627 	}
1628 
1629 	/* Prevent the buffers from switching */
1630 	arch_spin_lock(&tr->max_lock);
1631 
1632 	buffer = tr->array_buffer.buffer;
1633 	if (buffer)
1634 		ring_buffer_record_enable(buffer);
1635 
1636 #ifdef CONFIG_TRACER_SNAPSHOT
1637 	buffer = tr->snapshot_buffer.buffer;
1638 	if (buffer)
1639 		ring_buffer_record_enable(buffer);
1640 #endif
1641 
1642 	arch_spin_unlock(&tr->max_lock);
1643 }
1644 
1645 /**
1646  * tracing_start - quick start of the tracer
1647  *
1648  * If tracing is enabled but was stopped by tracing_stop,
1649  * this will start the tracer back up.
1650  */
1651 void tracing_start(void)
1652 
1653 {
1654 	return tracing_start_tr(&global_trace);
1655 }
1656 
1657 static void tracing_stop_tr(struct trace_array *tr)
1658 {
1659 	struct trace_buffer *buffer;
1660 
1661 	guard(raw_spinlock_irqsave)(&tr->start_lock);
1662 	if (tr->stop_count++)
1663 		return;
1664 
1665 	/* Prevent the buffers from switching */
1666 	arch_spin_lock(&tr->max_lock);
1667 
1668 	buffer = tr->array_buffer.buffer;
1669 	if (buffer)
1670 		ring_buffer_record_disable(buffer);
1671 
1672 #ifdef CONFIG_TRACER_SNAPSHOT
1673 	buffer = tr->snapshot_buffer.buffer;
1674 	if (buffer)
1675 		ring_buffer_record_disable(buffer);
1676 #endif
1677 
1678 	arch_spin_unlock(&tr->max_lock);
1679 }
1680 
1681 /**
1682  * tracing_stop - quick stop of the tracer
1683  *
1684  * Light weight way to stop tracing. Use in conjunction with
1685  * tracing_start.
1686  */
1687 void tracing_stop(void)
1688 {
1689 	return tracing_stop_tr(&global_trace);
1690 }
1691 
1692 /*
1693  * Several functions return TRACE_TYPE_PARTIAL_LINE if the trace_seq
1694  * overflowed, and TRACE_TYPE_HANDLED otherwise. This helper function
1695  * simplifies those functions and keeps them in sync.
1696  */
1697 enum print_line_t trace_handle_return(struct trace_seq *s)
1698 {
1699 	return trace_seq_has_overflowed(s) ?
1700 		TRACE_TYPE_PARTIAL_LINE : TRACE_TYPE_HANDLED;
1701 }
1702 EXPORT_SYMBOL_GPL(trace_handle_return);
1703 
1704 static unsigned short migration_disable_value(void)
1705 {
1706 #if defined(CONFIG_SMP)
1707 	return current->migration_disabled;
1708 #else
1709 	return 0;
1710 #endif
1711 }
1712 
1713 unsigned int tracing_gen_ctx_irq_test(unsigned int irqs_status)
1714 {
1715 	unsigned int trace_flags = irqs_status;
1716 	unsigned int pc;
1717 
1718 	pc = preempt_count();
1719 
1720 	if (pc & NMI_MASK)
1721 		trace_flags |= TRACE_FLAG_NMI;
1722 	if (pc & HARDIRQ_MASK)
1723 		trace_flags |= TRACE_FLAG_HARDIRQ;
1724 	if (in_serving_softirq())
1725 		trace_flags |= TRACE_FLAG_SOFTIRQ;
1726 	if (softirq_count() >> (SOFTIRQ_SHIFT + 1))
1727 		trace_flags |= TRACE_FLAG_BH_OFF;
1728 
1729 	if (tif_need_resched())
1730 		trace_flags |= TRACE_FLAG_NEED_RESCHED;
1731 	if (test_preempt_need_resched())
1732 		trace_flags |= TRACE_FLAG_PREEMPT_RESCHED;
1733 	if (IS_ENABLED(CONFIG_ARCH_HAS_PREEMPT_LAZY) && tif_test_bit(TIF_NEED_RESCHED_LAZY))
1734 		trace_flags |= TRACE_FLAG_NEED_RESCHED_LAZY;
1735 	return (trace_flags << 16) | (min_t(unsigned int, pc & 0xff, 0xf)) |
1736 		(min_t(unsigned int, migration_disable_value(), 0xf)) << 4;
1737 }
1738 
1739 struct ring_buffer_event *
1740 trace_buffer_lock_reserve(struct trace_buffer *buffer,
1741 			  int type,
1742 			  unsigned long len,
1743 			  unsigned int trace_ctx)
1744 {
1745 	return __trace_buffer_lock_reserve(buffer, type, len, trace_ctx);
1746 }
1747 
1748 DEFINE_PER_CPU(struct ring_buffer_event *, trace_buffered_event);
1749 DEFINE_PER_CPU(int, trace_buffered_event_cnt);
1750 static int trace_buffered_event_ref;
1751 
1752 /**
1753  * trace_buffered_event_enable - enable buffering events
1754  *
1755  * When events are being filtered, it is quicker to use a temporary
1756  * buffer to write the event data into if there's a likely chance
1757  * that it will not be committed. The discard of the ring buffer
1758  * is not as fast as committing, and is much slower than copying
1759  * a commit.
1760  *
1761  * When an event is to be filtered, allocate per cpu buffers to
1762  * write the event data into, and if the event is filtered and discarded
1763  * it is simply dropped, otherwise, the entire data is to be committed
1764  * in one shot.
1765  */
1766 void trace_buffered_event_enable(void)
1767 {
1768 	struct ring_buffer_event *event;
1769 	struct page *page;
1770 	int cpu;
1771 
1772 	WARN_ON_ONCE(!mutex_is_locked(&event_mutex));
1773 
1774 	if (trace_buffered_event_ref++)
1775 		return;
1776 
1777 	for_each_tracing_cpu(cpu) {
1778 		page = alloc_pages_node(cpu_to_node(cpu),
1779 					GFP_KERNEL | __GFP_NORETRY, 0);
1780 		/* This is just an optimization and can handle failures */
1781 		if (!page) {
1782 			pr_err("Failed to allocate event buffer\n");
1783 			break;
1784 		}
1785 
1786 		event = page_address(page);
1787 		memset(event, 0, sizeof(*event));
1788 
1789 		per_cpu(trace_buffered_event, cpu) = event;
1790 
1791 		scoped_guard(preempt,) {
1792 			if (cpu == smp_processor_id() &&
1793 			    __this_cpu_read(trace_buffered_event) !=
1794 			    per_cpu(trace_buffered_event, cpu))
1795 				WARN_ON_ONCE(1);
1796 		}
1797 	}
1798 }
1799 
1800 static void enable_trace_buffered_event(void *data)
1801 {
1802 	this_cpu_dec(trace_buffered_event_cnt);
1803 }
1804 
1805 static void disable_trace_buffered_event(void *data)
1806 {
1807 	this_cpu_inc(trace_buffered_event_cnt);
1808 }
1809 
1810 /**
1811  * trace_buffered_event_disable - disable buffering events
1812  *
1813  * When a filter is removed, it is faster to not use the buffered
1814  * events, and to commit directly into the ring buffer. Free up
1815  * the temp buffers when there are no more users. This requires
1816  * special synchronization with current events.
1817  */
1818 void trace_buffered_event_disable(void)
1819 {
1820 	int cpu;
1821 
1822 	WARN_ON_ONCE(!mutex_is_locked(&event_mutex));
1823 
1824 	if (WARN_ON_ONCE(!trace_buffered_event_ref))
1825 		return;
1826 
1827 	if (--trace_buffered_event_ref)
1828 		return;
1829 
1830 	/* For each CPU, set the buffer as used. */
1831 	on_each_cpu_mask(tracing_buffer_mask, disable_trace_buffered_event,
1832 			 NULL, true);
1833 
1834 	/* Wait for all current users to finish */
1835 	synchronize_rcu();
1836 
1837 	for_each_tracing_cpu(cpu) {
1838 		free_page((unsigned long)per_cpu(trace_buffered_event, cpu));
1839 		per_cpu(trace_buffered_event, cpu) = NULL;
1840 	}
1841 
1842 	/*
1843 	 * Wait for all CPUs that potentially started checking if they can use
1844 	 * their event buffer only after the previous synchronize_rcu() call and
1845 	 * they still read a valid pointer from trace_buffered_event. It must be
1846 	 * ensured they don't see cleared trace_buffered_event_cnt else they
1847 	 * could wrongly decide to use the pointed-to buffer which is now freed.
1848 	 */
1849 	synchronize_rcu();
1850 
1851 	/* For each CPU, relinquish the buffer */
1852 	on_each_cpu_mask(tracing_buffer_mask, enable_trace_buffered_event, NULL,
1853 			 true);
1854 }
1855 
1856 static struct trace_buffer *temp_buffer;
1857 
1858 struct ring_buffer_event *
1859 trace_event_buffer_lock_reserve(struct trace_buffer **current_rb,
1860 			  struct trace_event_file *trace_file,
1861 			  int type, unsigned long len,
1862 			  unsigned int trace_ctx)
1863 {
1864 	struct ring_buffer_event *entry;
1865 	struct trace_array *tr = trace_file->tr;
1866 	int val;
1867 
1868 	*current_rb = tr->array_buffer.buffer;
1869 
1870 	if (!tr->no_filter_buffering_ref &&
1871 	    (trace_file->flags & (EVENT_FILE_FL_SOFT_DISABLED | EVENT_FILE_FL_FILTERED))) {
1872 		preempt_disable_notrace();
1873 		/*
1874 		 * Filtering is on, so try to use the per cpu buffer first.
1875 		 * This buffer will simulate a ring_buffer_event,
1876 		 * where the type_len is zero and the array[0] will
1877 		 * hold the full length.
1878 		 * (see include/linux/ring-buffer.h for details on
1879 		 *  how the ring_buffer_event is structured).
1880 		 *
1881 		 * Using a temp buffer during filtering and copying it
1882 		 * on a matched filter is quicker than writing directly
1883 		 * into the ring buffer and then discarding it when
1884 		 * it doesn't match. That is because the discard
1885 		 * requires several atomic operations to get right.
1886 		 * Copying on match and doing nothing on a failed match
1887 		 * is still quicker than no copy on match, but having
1888 		 * to discard out of the ring buffer on a failed match.
1889 		 */
1890 		if ((entry = __this_cpu_read(trace_buffered_event))) {
1891 			int max_len = PAGE_SIZE - struct_size(entry, array, 1);
1892 
1893 			val = this_cpu_inc_return(trace_buffered_event_cnt);
1894 
1895 			/*
1896 			 * Preemption is disabled, but interrupts and NMIs
1897 			 * can still come in now. If that happens after
1898 			 * the above increment, then it will have to go
1899 			 * back to the old method of allocating the event
1900 			 * on the ring buffer, and if the filter fails, it
1901 			 * will have to call ring_buffer_discard_commit()
1902 			 * to remove it.
1903 			 *
1904 			 * Need to also check the unlikely case that the
1905 			 * length is bigger than the temp buffer size.
1906 			 * If that happens, then the reserve is pretty much
1907 			 * guaranteed to fail, as the ring buffer currently
1908 			 * only allows events less than a page. But that may
1909 			 * change in the future, so let the ring buffer reserve
1910 			 * handle the failure in that case.
1911 			 */
1912 			if (val == 1 && likely(len <= max_len)) {
1913 				trace_event_setup(entry, type, trace_ctx);
1914 				entry->array[0] = len;
1915 				/* Return with preemption disabled */
1916 				return entry;
1917 			}
1918 			this_cpu_dec(trace_buffered_event_cnt);
1919 		}
1920 		/* __trace_buffer_lock_reserve() disables preemption */
1921 		preempt_enable_notrace();
1922 	}
1923 
1924 	entry = __trace_buffer_lock_reserve(*current_rb, type, len,
1925 					    trace_ctx);
1926 	/*
1927 	 * If tracing is off, but we have triggers enabled
1928 	 * we still need to look at the event data. Use the temp_buffer
1929 	 * to store the trace event for the trigger to use. It's recursive
1930 	 * safe and will not be recorded anywhere.
1931 	 */
1932 	if (!entry && trace_file->flags & EVENT_FILE_FL_TRIGGER_COND) {
1933 		*current_rb = temp_buffer;
1934 		entry = __trace_buffer_lock_reserve(*current_rb, type, len,
1935 						    trace_ctx);
1936 	}
1937 	return entry;
1938 }
1939 EXPORT_SYMBOL_GPL(trace_event_buffer_lock_reserve);
1940 
1941 static DEFINE_RAW_SPINLOCK(tracepoint_iter_lock);
1942 static DEFINE_MUTEX(tracepoint_printk_mutex);
1943 
1944 static void output_printk(struct trace_event_buffer *fbuffer)
1945 {
1946 	struct trace_event_call *event_call;
1947 	struct trace_event_file *file;
1948 	struct trace_event *event;
1949 	unsigned long flags;
1950 	struct trace_iterator *iter = tracepoint_print_iter;
1951 
1952 	/* We should never get here if iter is NULL */
1953 	if (WARN_ON_ONCE(!iter))
1954 		return;
1955 
1956 	event_call = fbuffer->trace_file->event_call;
1957 	if (!event_call || !event_call->event.funcs ||
1958 	    !event_call->event.funcs->trace)
1959 		return;
1960 
1961 	file = fbuffer->trace_file;
1962 	if (test_bit(EVENT_FILE_FL_SOFT_DISABLED_BIT, &file->flags) ||
1963 	    (unlikely(file->flags & EVENT_FILE_FL_FILTERED) &&
1964 	     !filter_match_preds(file->filter, fbuffer->entry)))
1965 		return;
1966 
1967 	event = &fbuffer->trace_file->event_call->event;
1968 
1969 	raw_spin_lock_irqsave(&tracepoint_iter_lock, flags);
1970 	trace_seq_init(&iter->seq);
1971 	iter->ent = fbuffer->entry;
1972 	event_call->event.funcs->trace(iter, 0, event);
1973 	trace_seq_putc(&iter->seq, 0);
1974 	printk("%s", iter->seq.buffer);
1975 
1976 	raw_spin_unlock_irqrestore(&tracepoint_iter_lock, flags);
1977 }
1978 
1979 int tracepoint_printk_sysctl(const struct ctl_table *table, int write,
1980 			     void *buffer, size_t *lenp,
1981 			     loff_t *ppos)
1982 {
1983 	int save_tracepoint_printk;
1984 	int ret;
1985 
1986 	guard(mutex)(&tracepoint_printk_mutex);
1987 	save_tracepoint_printk = tracepoint_printk;
1988 
1989 	ret = proc_dointvec(table, write, buffer, lenp, ppos);
1990 
1991 	/*
1992 	 * This will force exiting early, as tracepoint_printk
1993 	 * is always zero when tracepoint_printk_iter is not allocated
1994 	 */
1995 	if (!tracepoint_print_iter)
1996 		tracepoint_printk = 0;
1997 
1998 	if (save_tracepoint_printk == tracepoint_printk)
1999 		return ret;
2000 
2001 	if (tracepoint_printk)
2002 		static_key_enable(&tracepoint_printk_key.key);
2003 	else
2004 		static_key_disable(&tracepoint_printk_key.key);
2005 
2006 	return ret;
2007 }
2008 
2009 void trace_event_buffer_commit(struct trace_event_buffer *fbuffer)
2010 {
2011 	enum event_trigger_type tt = ETT_NONE;
2012 	struct trace_event_file *file = fbuffer->trace_file;
2013 
2014 	if (__event_trigger_test_discard(file, fbuffer->buffer, fbuffer->event,
2015 			fbuffer->entry, &tt))
2016 		goto discard;
2017 
2018 	if (static_key_false(&tracepoint_printk_key.key))
2019 		output_printk(fbuffer);
2020 
2021 	if (static_branch_unlikely(&trace_event_exports_enabled))
2022 		ftrace_exports(fbuffer->event, TRACE_EXPORT_EVENT);
2023 
2024 	trace_buffer_unlock_commit_regs(file->tr, fbuffer->buffer,
2025 			fbuffer->event, fbuffer->trace_ctx, fbuffer->regs);
2026 
2027 discard:
2028 	if (tt)
2029 		event_triggers_post_call(file, tt);
2030 
2031 }
2032 EXPORT_SYMBOL_GPL(trace_event_buffer_commit);
2033 
2034 /*
2035  * Skip 3:
2036  *
2037  *   trace_buffer_unlock_commit_regs()
2038  *   trace_event_buffer_commit()
2039  *   trace_event_raw_event_xxx()
2040  */
2041 # define STACK_SKIP 3
2042 
2043 void trace_buffer_unlock_commit_regs(struct trace_array *tr,
2044 				     struct trace_buffer *buffer,
2045 				     struct ring_buffer_event *event,
2046 				     unsigned int trace_ctx,
2047 				     struct pt_regs *regs)
2048 {
2049 	__buffer_unlock_commit(buffer, event);
2050 
2051 	/*
2052 	 * If regs is not set, then skip the necessary functions.
2053 	 * Note, we can still get here via blktrace, wakeup tracer
2054 	 * and mmiotrace, but that's ok if they lose a function or
2055 	 * two. They are not that meaningful.
2056 	 */
2057 	ftrace_trace_stack(tr, buffer, trace_ctx, regs ? 0 : STACK_SKIP, regs);
2058 	ftrace_trace_userstack(tr, buffer, trace_ctx);
2059 }
2060 
2061 /*
2062  * Similar to trace_buffer_unlock_commit_regs() but do not dump stack.
2063  */
2064 void
2065 trace_buffer_unlock_commit_nostack(struct trace_buffer *buffer,
2066 				   struct ring_buffer_event *event)
2067 {
2068 	__buffer_unlock_commit(buffer, event);
2069 }
2070 
2071 void
2072 trace_function(struct trace_array *tr, unsigned long ip, unsigned long
2073 	       parent_ip, unsigned int trace_ctx, struct ftrace_regs *fregs)
2074 {
2075 	struct trace_buffer *buffer = tr->array_buffer.buffer;
2076 	struct ring_buffer_event *event;
2077 	struct ftrace_entry *entry;
2078 	int size = sizeof(*entry);
2079 
2080 	size += FTRACE_REGS_MAX_ARGS * !!fregs * sizeof(long);
2081 
2082 	event = __trace_buffer_lock_reserve(buffer, TRACE_FN, size,
2083 					    trace_ctx);
2084 	if (!event)
2085 		return;
2086 	entry	= ring_buffer_event_data(event);
2087 	entry->ip			= ip;
2088 	entry->parent_ip		= parent_ip;
2089 
2090 #ifdef CONFIG_HAVE_FUNCTION_ARG_ACCESS_API
2091 	if (fregs) {
2092 		for (int i = 0; i < FTRACE_REGS_MAX_ARGS; i++)
2093 			entry->args[i] = ftrace_regs_get_argument(fregs, i);
2094 	}
2095 #endif
2096 
2097 	if (static_branch_unlikely(&trace_function_exports_enabled))
2098 		ftrace_exports(event, TRACE_EXPORT_FUNCTION);
2099 	__buffer_unlock_commit(buffer, event);
2100 }
2101 
2102 #ifdef CONFIG_STACKTRACE
2103 
2104 /* Allow 4 levels of nesting: normal, softirq, irq, NMI */
2105 #define FTRACE_KSTACK_NESTING	4
2106 
2107 #define FTRACE_KSTACK_ENTRIES	(SZ_4K / FTRACE_KSTACK_NESTING)
2108 
2109 struct ftrace_stack {
2110 	unsigned long		calls[FTRACE_KSTACK_ENTRIES];
2111 };
2112 
2113 
2114 struct ftrace_stacks {
2115 	struct ftrace_stack	stacks[FTRACE_KSTACK_NESTING];
2116 };
2117 
2118 static DEFINE_PER_CPU(struct ftrace_stacks, ftrace_stacks);
2119 static DEFINE_PER_CPU(int, ftrace_stack_reserve);
2120 
2121 void __ftrace_trace_stack(struct trace_array *tr,
2122 			  struct trace_buffer *buffer,
2123 			  unsigned int trace_ctx,
2124 			  int skip, struct pt_regs *regs)
2125 {
2126 	struct ring_buffer_event *event;
2127 	unsigned int size, nr_entries;
2128 	struct ftrace_stack *fstack;
2129 	struct stack_entry *entry;
2130 	int stackidx;
2131 	int bit;
2132 
2133 	bit = trace_test_and_set_recursion(_THIS_IP_, _RET_IP_, TRACE_EVENT_START);
2134 	if (bit < 0)
2135 		return;
2136 
2137 	/*
2138 	 * Add one, for this function and the call to save_stack_trace()
2139 	 * If regs is set, then these functions will not be in the way.
2140 	 */
2141 #ifndef CONFIG_UNWINDER_ORC
2142 	if (!regs)
2143 		skip++;
2144 #endif
2145 
2146 	guard(preempt_notrace)();
2147 
2148 	stackidx = __this_cpu_inc_return(ftrace_stack_reserve) - 1;
2149 
2150 	/* This should never happen. If it does, yell once and skip */
2151 	if (WARN_ON_ONCE(stackidx >= FTRACE_KSTACK_NESTING))
2152 		goto out;
2153 
2154 	/*
2155 	 * The above __this_cpu_inc_return() is 'atomic' cpu local. An
2156 	 * interrupt will either see the value pre increment or post
2157 	 * increment. If the interrupt happens pre increment it will have
2158 	 * restored the counter when it returns.  We just need a barrier to
2159 	 * keep gcc from moving things around.
2160 	 */
2161 	barrier();
2162 
2163 	fstack = this_cpu_ptr(ftrace_stacks.stacks) + stackidx;
2164 	size = ARRAY_SIZE(fstack->calls);
2165 
2166 	if (regs) {
2167 		nr_entries = stack_trace_save_regs(regs, fstack->calls,
2168 						   size, skip);
2169 	} else {
2170 		nr_entries = stack_trace_save(fstack->calls, size, skip);
2171 	}
2172 
2173 #ifdef CONFIG_DYNAMIC_FTRACE
2174 	/* Mark entry of stack trace as trampoline code */
2175 	if (tr->ops && tr->ops->trampoline) {
2176 		unsigned long tramp_start = tr->ops->trampoline;
2177 		unsigned long tramp_end = tramp_start + tr->ops->trampoline_size;
2178 		unsigned long *calls = fstack->calls;
2179 
2180 		for (int i = 0; i < nr_entries; i++) {
2181 			if (calls[i] >= tramp_start && calls[i] < tramp_end)
2182 				calls[i] = FTRACE_TRAMPOLINE_MARKER;
2183 		}
2184 	}
2185 #endif
2186 
2187 	event = __trace_buffer_lock_reserve(buffer, TRACE_STACK,
2188 				    struct_size(entry, caller, nr_entries),
2189 				    trace_ctx);
2190 	if (!event)
2191 		goto out;
2192 	entry = ring_buffer_event_data(event);
2193 
2194 	entry->size = nr_entries;
2195 	memcpy(&entry->caller, fstack->calls,
2196 	       flex_array_size(entry, caller, nr_entries));
2197 
2198 	__buffer_unlock_commit(buffer, event);
2199 
2200  out:
2201 	/* Again, don't let gcc optimize things here */
2202 	barrier();
2203 	__this_cpu_dec(ftrace_stack_reserve);
2204 	trace_clear_recursion(bit);
2205 }
2206 
2207 void __trace_stack(struct trace_array *tr, unsigned int trace_ctx,
2208 		   int skip)
2209 {
2210 	struct trace_buffer *buffer = tr->array_buffer.buffer;
2211 
2212 	if (rcu_is_watching()) {
2213 		__ftrace_trace_stack(tr, buffer, trace_ctx, skip, NULL);
2214 		return;
2215 	}
2216 
2217 	if (WARN_ON_ONCE(IS_ENABLED(CONFIG_GENERIC_ENTRY)))
2218 		return;
2219 
2220 	/*
2221 	 * When an NMI triggers, RCU is enabled via ct_nmi_enter(),
2222 	 * but if the above rcu_is_watching() failed, then the NMI
2223 	 * triggered someplace critical, and ct_irq_enter() should
2224 	 * not be called from NMI.
2225 	 */
2226 	if (unlikely(in_nmi()))
2227 		return;
2228 
2229 	ct_irq_enter_irqson();
2230 	__ftrace_trace_stack(tr, buffer, trace_ctx, skip, NULL);
2231 	ct_irq_exit_irqson();
2232 }
2233 
2234 /**
2235  * trace_dump_stack - record a stack back trace in the trace buffer
2236  * @skip: Number of functions to skip (helper handlers)
2237  */
2238 void trace_dump_stack(int skip)
2239 {
2240 	if (tracing_disabled || tracing_selftest_running)
2241 		return;
2242 
2243 #ifndef CONFIG_UNWINDER_ORC
2244 	/* Skip 1 to skip this function. */
2245 	skip++;
2246 #endif
2247 	__ftrace_trace_stack(printk_trace, printk_trace->array_buffer.buffer,
2248 				tracing_gen_ctx(), skip, NULL);
2249 }
2250 EXPORT_SYMBOL_GPL(trace_dump_stack);
2251 
2252 #ifdef CONFIG_USER_STACKTRACE_SUPPORT
2253 static DEFINE_PER_CPU(int, user_stack_count);
2254 
2255 static void
2256 ftrace_trace_userstack(struct trace_array *tr,
2257 		       struct trace_buffer *buffer, unsigned int trace_ctx)
2258 {
2259 	struct ring_buffer_event *event;
2260 	struct userstack_entry *entry;
2261 
2262 	if (!(tr->trace_flags & TRACE_ITER(USERSTACKTRACE)))
2263 		return;
2264 
2265 	/*
2266 	 * NMIs can not handle page faults, even with fix ups.
2267 	 * The save user stack can (and often does) fault.
2268 	 */
2269 	if (unlikely(in_nmi()))
2270 		return;
2271 
2272 	/*
2273 	 * prevent recursion, since the user stack tracing may
2274 	 * trigger other kernel events.
2275 	 */
2276 	guard(preempt)();
2277 	if (__this_cpu_read(user_stack_count))
2278 		return;
2279 
2280 	__this_cpu_inc(user_stack_count);
2281 
2282 	event = __trace_buffer_lock_reserve(buffer, TRACE_USER_STACK,
2283 					    sizeof(*entry), trace_ctx);
2284 	if (!event)
2285 		goto out_drop_count;
2286 	entry	= ring_buffer_event_data(event);
2287 
2288 	entry->tgid		= current->tgid;
2289 	memset(&entry->caller, 0, sizeof(entry->caller));
2290 
2291 	stack_trace_save_user(entry->caller, FTRACE_STACK_ENTRIES);
2292 	__buffer_unlock_commit(buffer, event);
2293 
2294  out_drop_count:
2295 	__this_cpu_dec(user_stack_count);
2296 }
2297 #else /* CONFIG_USER_STACKTRACE_SUPPORT */
2298 static void ftrace_trace_userstack(struct trace_array *tr,
2299 				   struct trace_buffer *buffer,
2300 				   unsigned int trace_ctx)
2301 {
2302 }
2303 #endif /* !CONFIG_USER_STACKTRACE_SUPPORT */
2304 
2305 #endif /* CONFIG_STACKTRACE */
2306 
2307 static inline void
2308 func_repeats_set_delta_ts(struct func_repeats_entry *entry,
2309 			  unsigned long long delta)
2310 {
2311 	entry->bottom_delta_ts = delta & U32_MAX;
2312 	entry->top_delta_ts = (delta >> 32);
2313 }
2314 
2315 void trace_last_func_repeats(struct trace_array *tr,
2316 			     struct trace_func_repeats *last_info,
2317 			     unsigned int trace_ctx)
2318 {
2319 	struct trace_buffer *buffer = tr->array_buffer.buffer;
2320 	struct func_repeats_entry *entry;
2321 	struct ring_buffer_event *event;
2322 	u64 delta;
2323 
2324 	event = __trace_buffer_lock_reserve(buffer, TRACE_FUNC_REPEATS,
2325 					    sizeof(*entry), trace_ctx);
2326 	if (!event)
2327 		return;
2328 
2329 	delta = ring_buffer_event_time_stamp(buffer, event) -
2330 		last_info->ts_last_call;
2331 
2332 	entry = ring_buffer_event_data(event);
2333 	entry->ip = last_info->ip;
2334 	entry->parent_ip = last_info->parent_ip;
2335 	entry->count = last_info->count;
2336 	func_repeats_set_delta_ts(entry, delta);
2337 
2338 	__buffer_unlock_commit(buffer, event);
2339 }
2340 
2341 static void trace_iterator_increment(struct trace_iterator *iter)
2342 {
2343 	struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, iter->cpu);
2344 
2345 	iter->idx++;
2346 	if (buf_iter)
2347 		ring_buffer_iter_advance(buf_iter);
2348 }
2349 
2350 static struct trace_entry *
2351 peek_next_entry(struct trace_iterator *iter, int cpu, u64 *ts,
2352 		unsigned long *lost_events)
2353 {
2354 	struct ring_buffer_event *event;
2355 	struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, cpu);
2356 
2357 	if (buf_iter) {
2358 		event = ring_buffer_iter_peek(buf_iter, ts);
2359 		if (lost_events)
2360 			*lost_events = ring_buffer_iter_dropped(buf_iter) ?
2361 				(unsigned long)-1 : 0;
2362 	} else {
2363 		event = ring_buffer_peek(iter->array_buffer->buffer, cpu, ts,
2364 					 lost_events);
2365 	}
2366 
2367 	if (event) {
2368 		iter->ent_size = ring_buffer_event_length(event);
2369 		return ring_buffer_event_data(event);
2370 	}
2371 	iter->ent_size = 0;
2372 	return NULL;
2373 }
2374 
2375 static struct trace_entry *
2376 __find_next_entry(struct trace_iterator *iter, int *ent_cpu,
2377 		  unsigned long *missing_events, u64 *ent_ts)
2378 {
2379 	struct trace_buffer *buffer = iter->array_buffer->buffer;
2380 	struct trace_entry *ent, *next = NULL;
2381 	unsigned long lost_events = 0, next_lost = 0;
2382 	int cpu_file = iter->cpu_file;
2383 	u64 next_ts = 0, ts;
2384 	int next_cpu = -1;
2385 	int next_size = 0;
2386 	int cpu;
2387 
2388 	/*
2389 	 * If we are in a per_cpu trace file, don't bother by iterating over
2390 	 * all cpu and peek directly.
2391 	 */
2392 	if (cpu_file > RING_BUFFER_ALL_CPUS) {
2393 		if (ring_buffer_empty_cpu(buffer, cpu_file))
2394 			return NULL;
2395 		ent = peek_next_entry(iter, cpu_file, ent_ts, missing_events);
2396 		if (ent_cpu)
2397 			*ent_cpu = cpu_file;
2398 
2399 		return ent;
2400 	}
2401 
2402 	for_each_tracing_cpu(cpu) {
2403 
2404 		if (ring_buffer_empty_cpu(buffer, cpu))
2405 			continue;
2406 
2407 		ent = peek_next_entry(iter, cpu, &ts, &lost_events);
2408 
2409 		/*
2410 		 * Pick the entry with the smallest timestamp:
2411 		 */
2412 		if (ent && (!next || ts < next_ts)) {
2413 			next = ent;
2414 			next_cpu = cpu;
2415 			next_ts = ts;
2416 			next_lost = lost_events;
2417 			next_size = iter->ent_size;
2418 		}
2419 	}
2420 
2421 	iter->ent_size = next_size;
2422 
2423 	if (ent_cpu)
2424 		*ent_cpu = next_cpu;
2425 
2426 	if (ent_ts)
2427 		*ent_ts = next_ts;
2428 
2429 	if (missing_events)
2430 		*missing_events = next_lost;
2431 
2432 	return next;
2433 }
2434 
2435 #define STATIC_FMT_BUF_SIZE	128
2436 static char static_fmt_buf[STATIC_FMT_BUF_SIZE];
2437 
2438 char *trace_iter_expand_format(struct trace_iterator *iter)
2439 {
2440 	char *tmp;
2441 
2442 	/*
2443 	 * iter->tr is NULL when used with tp_printk, which makes
2444 	 * this get called where it is not safe to call krealloc().
2445 	 */
2446 	if (!iter->tr || iter->fmt == static_fmt_buf)
2447 		return NULL;
2448 
2449 	tmp = krealloc(iter->fmt, iter->fmt_size + STATIC_FMT_BUF_SIZE,
2450 		       GFP_KERNEL);
2451 	if (tmp) {
2452 		iter->fmt_size += STATIC_FMT_BUF_SIZE;
2453 		iter->fmt = tmp;
2454 	}
2455 
2456 	return tmp;
2457 }
2458 
2459 /* Returns true if the string is safe to dereference from an event */
2460 static bool trace_safe_str(struct trace_iterator *iter, const char *str)
2461 {
2462 	unsigned long addr = (unsigned long)str;
2463 	struct trace_event *trace_event;
2464 	struct trace_event_call *event;
2465 
2466 	/* OK if part of the event data */
2467 	if ((addr >= (unsigned long)iter->ent) &&
2468 	    (addr < (unsigned long)iter->ent + iter->ent_size))
2469 		return true;
2470 
2471 	/* OK if part of the temp seq buffer */
2472 	if ((addr >= (unsigned long)iter->tmp_seq.buffer) &&
2473 	    (addr < (unsigned long)iter->tmp_seq.buffer + TRACE_SEQ_BUFFER_SIZE))
2474 		return true;
2475 
2476 	/* Core rodata can not be freed */
2477 	if (is_kernel_rodata(addr))
2478 		return true;
2479 
2480 	if (trace_is_tracepoint_string(str))
2481 		return true;
2482 
2483 	/*
2484 	 * Now this could be a module event, referencing core module
2485 	 * data, which is OK.
2486 	 */
2487 	if (!iter->ent)
2488 		return false;
2489 
2490 	trace_event = ftrace_find_event(iter->ent->type);
2491 	if (!trace_event)
2492 		return false;
2493 
2494 	event = container_of(trace_event, struct trace_event_call, event);
2495 	if ((event->flags & TRACE_EVENT_FL_DYNAMIC) || !event->module)
2496 		return false;
2497 
2498 	/* Would rather have rodata, but this will suffice */
2499 	if (within_module_core(addr, event->module))
2500 		return true;
2501 
2502 	return false;
2503 }
2504 
2505 /**
2506  * ignore_event - Check dereferenced fields while writing to the seq buffer
2507  * @iter: The iterator that holds the seq buffer and the event being printed
2508  *
2509  * At boot up, test_event_printk() will flag any event that dereferences
2510  * a string with "%s" that does exist in the ring buffer. It may still
2511  * be valid, as the string may point to a static string in the kernel
2512  * rodata that never gets freed. But if the string pointer is pointing
2513  * to something that was allocated, there's a chance that it can be freed
2514  * by the time the user reads the trace. This would cause a bad memory
2515  * access by the kernel and possibly crash the system.
2516  *
2517  * This function will check if the event has any fields flagged as needing
2518  * to be checked at runtime and perform those checks.
2519  *
2520  * If it is found that a field is unsafe, it will write into the @iter->seq
2521  * a message stating what was found to be unsafe.
2522  *
2523  * @return: true if the event is unsafe and should be ignored,
2524  *          false otherwise.
2525  */
2526 bool ignore_event(struct trace_iterator *iter)
2527 {
2528 	struct ftrace_event_field *field;
2529 	struct trace_event *trace_event;
2530 	struct trace_event_call *event;
2531 	struct list_head *head;
2532 	struct trace_seq *seq;
2533 	const void *ptr;
2534 
2535 	trace_event = ftrace_find_event(iter->ent->type);
2536 
2537 	seq = &iter->seq;
2538 
2539 	if (!trace_event) {
2540 		trace_seq_printf(seq, "EVENT ID %d NOT FOUND?\n", iter->ent->type);
2541 		return true;
2542 	}
2543 
2544 	event = container_of(trace_event, struct trace_event_call, event);
2545 	if (!(event->flags & TRACE_EVENT_FL_TEST_STR))
2546 		return false;
2547 
2548 	head = trace_get_fields(event);
2549 	if (!head) {
2550 		trace_seq_printf(seq, "FIELDS FOR EVENT '%s' NOT FOUND?\n",
2551 				 trace_event_name(event));
2552 		return true;
2553 	}
2554 
2555 	/* Offsets are from the iter->ent that points to the raw event */
2556 	ptr = iter->ent;
2557 
2558 	list_for_each_entry(field, head, link) {
2559 		const char *str;
2560 		bool good;
2561 
2562 		if (!field->needs_test)
2563 			continue;
2564 
2565 		str = *(const char **)(ptr + field->offset);
2566 
2567 		good = trace_safe_str(iter, str);
2568 
2569 		/*
2570 		 * If you hit this warning, it is likely that the
2571 		 * trace event in question used %s on a string that
2572 		 * was saved at the time of the event, but may not be
2573 		 * around when the trace is read. Use __string(),
2574 		 * __assign_str() and __get_str() helpers in the TRACE_EVENT()
2575 		 * instead. See samples/trace_events/trace-events-sample.h
2576 		 * for reference.
2577 		 */
2578 		if (WARN_ONCE(!good, "event '%s' has unsafe pointer field '%s'",
2579 			      trace_event_name(event), field->name)) {
2580 			trace_seq_printf(seq, "EVENT %s: HAS UNSAFE POINTER FIELD '%s'\n",
2581 					 trace_event_name(event), field->name);
2582 			return true;
2583 		}
2584 	}
2585 	return false;
2586 }
2587 
2588 const char *trace_event_format(struct trace_iterator *iter, const char *fmt)
2589 {
2590 	const char *p, *new_fmt;
2591 	char *q;
2592 
2593 	if (WARN_ON_ONCE(!fmt))
2594 		return fmt;
2595 
2596 	if (!iter->tr || iter->tr->trace_flags & TRACE_ITER(HASH_PTR))
2597 		return fmt;
2598 
2599 	p = fmt;
2600 	new_fmt = q = iter->fmt;
2601 	while (*p) {
2602 		if (unlikely(q - new_fmt + 3 > iter->fmt_size)) {
2603 			if (!trace_iter_expand_format(iter))
2604 				return fmt;
2605 
2606 			q += iter->fmt - new_fmt;
2607 			new_fmt = iter->fmt;
2608 		}
2609 
2610 		*q++ = *p++;
2611 
2612 		/* Replace %p with %px */
2613 		if (p[-1] == '%') {
2614 			if (p[0] == '%') {
2615 				*q++ = *p++;
2616 			} else if (p[0] == 'p' && !isalnum(p[1])) {
2617 				*q++ = *p++;
2618 				*q++ = 'x';
2619 			}
2620 		}
2621 	}
2622 	*q = '\0';
2623 
2624 	return new_fmt;
2625 }
2626 
2627 #define STATIC_TEMP_BUF_SIZE	128
2628 static char static_temp_buf[STATIC_TEMP_BUF_SIZE] __aligned(4);
2629 
2630 /* Find the next real entry, without updating the iterator itself */
2631 struct trace_entry *trace_find_next_entry(struct trace_iterator *iter,
2632 					  int *ent_cpu, u64 *ent_ts)
2633 {
2634 	/* __find_next_entry will reset ent_size */
2635 	int ent_size = iter->ent_size;
2636 	struct trace_entry *entry;
2637 
2638 	/*
2639 	 * If called from ftrace_dump(), then the iter->temp buffer
2640 	 * will be the static_temp_buf and not created from kmalloc.
2641 	 * If the entry size is greater than the buffer, we can
2642 	 * not save it. Just return NULL in that case. This is only
2643 	 * used to add markers when two consecutive events' time
2644 	 * stamps have a large delta. See trace_print_lat_context()
2645 	 */
2646 	if (iter->temp == static_temp_buf &&
2647 	    STATIC_TEMP_BUF_SIZE < ent_size)
2648 		return NULL;
2649 
2650 	/*
2651 	 * The __find_next_entry() may call peek_next_entry(), which may
2652 	 * call ring_buffer_peek() that may make the contents of iter->ent
2653 	 * undefined. Need to copy iter->ent now.
2654 	 */
2655 	if (iter->ent && iter->ent != iter->temp) {
2656 		if ((!iter->temp || iter->temp_size < iter->ent_size) &&
2657 		    !WARN_ON_ONCE(iter->temp == static_temp_buf)) {
2658 			void *temp;
2659 			temp = kmalloc(iter->ent_size, GFP_KERNEL);
2660 			if (!temp)
2661 				return NULL;
2662 			kfree(iter->temp);
2663 			iter->temp = temp;
2664 			iter->temp_size = iter->ent_size;
2665 		}
2666 		memcpy(iter->temp, iter->ent, iter->ent_size);
2667 		iter->ent = iter->temp;
2668 	}
2669 	entry = __find_next_entry(iter, ent_cpu, NULL, ent_ts);
2670 	/* Put back the original ent_size */
2671 	iter->ent_size = ent_size;
2672 
2673 	return entry;
2674 }
2675 
2676 /* Find the next real entry, and increment the iterator to the next entry */
2677 void *trace_find_next_entry_inc(struct trace_iterator *iter)
2678 {
2679 	iter->ent = __find_next_entry(iter, &iter->cpu,
2680 				      &iter->lost_events, &iter->ts);
2681 
2682 	if (iter->ent)
2683 		trace_iterator_increment(iter);
2684 
2685 	return iter->ent ? iter : NULL;
2686 }
2687 
2688 static void trace_consume(struct trace_iterator *iter)
2689 {
2690 	ring_buffer_consume(iter->array_buffer->buffer, iter->cpu, &iter->ts,
2691 			    &iter->lost_events);
2692 }
2693 
2694 static void *s_next(struct seq_file *m, void *v, loff_t *pos)
2695 {
2696 	struct trace_iterator *iter = m->private;
2697 	int i = (int)*pos;
2698 	void *ent;
2699 
2700 	WARN_ON_ONCE(iter->leftover);
2701 
2702 	(*pos)++;
2703 
2704 	/* can't go backwards */
2705 	if (iter->idx > i)
2706 		return NULL;
2707 
2708 	if (iter->idx < 0)
2709 		ent = trace_find_next_entry_inc(iter);
2710 	else
2711 		ent = iter;
2712 
2713 	while (ent && iter->idx < i)
2714 		ent = trace_find_next_entry_inc(iter);
2715 
2716 	iter->pos = *pos;
2717 
2718 	return ent;
2719 }
2720 
2721 void tracing_iter_reset(struct trace_iterator *iter, int cpu)
2722 {
2723 	struct ring_buffer_iter *buf_iter;
2724 	unsigned long entries = 0;
2725 	u64 ts;
2726 
2727 	per_cpu_ptr(iter->array_buffer->data, cpu)->skipped_entries = 0;
2728 
2729 	buf_iter = trace_buffer_iter(iter, cpu);
2730 	if (!buf_iter)
2731 		return;
2732 
2733 	ring_buffer_iter_reset(buf_iter);
2734 
2735 	/*
2736 	 * We could have the case with the max latency tracers
2737 	 * that a reset never took place on a cpu. This is evident
2738 	 * by the timestamp being before the start of the buffer.
2739 	 */
2740 	while (ring_buffer_iter_peek(buf_iter, &ts)) {
2741 		if (ts >= iter->array_buffer->time_start)
2742 			break;
2743 		entries++;
2744 		ring_buffer_iter_advance(buf_iter);
2745 		/* This could be a big loop */
2746 		cond_resched();
2747 	}
2748 
2749 	per_cpu_ptr(iter->array_buffer->data, cpu)->skipped_entries = entries;
2750 }
2751 
2752 /*
2753  * The current tracer is copied to avoid a global locking
2754  * all around.
2755  */
2756 static void *s_start(struct seq_file *m, loff_t *pos)
2757 {
2758 	struct trace_iterator *iter = m->private;
2759 	struct trace_array *tr = iter->tr;
2760 	int cpu_file = iter->cpu_file;
2761 	void *p = NULL;
2762 	loff_t l = 0;
2763 	int cpu;
2764 
2765 	mutex_lock(&trace_types_lock);
2766 	if (unlikely(tr->current_trace != iter->trace)) {
2767 		/* Close iter->trace before switching to the new current tracer */
2768 		if (iter->trace->close)
2769 			iter->trace->close(iter);
2770 		iter->trace = tr->current_trace;
2771 		/* Reopen the new current tracer */
2772 		if (iter->trace->open)
2773 			iter->trace->open(iter);
2774 	}
2775 	mutex_unlock(&trace_types_lock);
2776 
2777 	if (iter->snapshot && tracer_uses_snapshot(iter->trace))
2778 		return ERR_PTR(-EBUSY);
2779 
2780 	if (*pos != iter->pos) {
2781 		iter->ent = NULL;
2782 		iter->cpu = 0;
2783 		iter->idx = -1;
2784 
2785 		if (cpu_file == RING_BUFFER_ALL_CPUS) {
2786 			for_each_tracing_cpu(cpu)
2787 				tracing_iter_reset(iter, cpu);
2788 		} else
2789 			tracing_iter_reset(iter, cpu_file);
2790 
2791 		iter->leftover = 0;
2792 		for (p = iter; p && l < *pos; p = s_next(m, p, &l))
2793 			;
2794 
2795 	} else {
2796 		/*
2797 		 * If we overflowed the seq_file before, then we want
2798 		 * to just reuse the trace_seq buffer again.
2799 		 */
2800 		if (iter->leftover)
2801 			p = iter;
2802 		else {
2803 			l = *pos - 1;
2804 			p = s_next(m, p, &l);
2805 		}
2806 	}
2807 
2808 	trace_event_read_lock();
2809 	trace_access_lock(cpu_file);
2810 	return p;
2811 }
2812 
2813 static void s_stop(struct seq_file *m, void *p)
2814 {
2815 	struct trace_iterator *iter = m->private;
2816 
2817 	if (iter->snapshot && tracer_uses_snapshot(iter->trace))
2818 		return;
2819 
2820 	trace_access_unlock(iter->cpu_file);
2821 	trace_event_read_unlock();
2822 }
2823 
2824 static void
2825 get_total_entries_cpu(struct array_buffer *buf, unsigned long *total,
2826 		      unsigned long *entries, int cpu)
2827 {
2828 	unsigned long count;
2829 
2830 	count = ring_buffer_entries_cpu(buf->buffer, cpu);
2831 	/*
2832 	 * If this buffer has skipped entries, then we hold all
2833 	 * entries for the trace and we need to ignore the
2834 	 * ones before the time stamp.
2835 	 */
2836 	if (per_cpu_ptr(buf->data, cpu)->skipped_entries) {
2837 		count -= per_cpu_ptr(buf->data, cpu)->skipped_entries;
2838 		/* total is the same as the entries */
2839 		*total = count;
2840 	} else
2841 		*total = count +
2842 			ring_buffer_overrun_cpu(buf->buffer, cpu);
2843 	*entries = count;
2844 }
2845 
2846 static void
2847 get_total_entries(struct array_buffer *buf,
2848 		  unsigned long *total, unsigned long *entries)
2849 {
2850 	unsigned long t, e;
2851 	int cpu;
2852 
2853 	*total = 0;
2854 	*entries = 0;
2855 
2856 	for_each_tracing_cpu(cpu) {
2857 		get_total_entries_cpu(buf, &t, &e, cpu);
2858 		*total += t;
2859 		*entries += e;
2860 	}
2861 }
2862 
2863 unsigned long trace_total_entries_cpu(struct trace_array *tr, int cpu)
2864 {
2865 	unsigned long total, entries;
2866 
2867 	if (!tr)
2868 		tr = &global_trace;
2869 
2870 	get_total_entries_cpu(&tr->array_buffer, &total, &entries, cpu);
2871 
2872 	return entries;
2873 }
2874 
2875 unsigned long trace_total_entries(struct trace_array *tr)
2876 {
2877 	unsigned long total, entries;
2878 
2879 	if (!tr)
2880 		tr = &global_trace;
2881 
2882 	get_total_entries(&tr->array_buffer, &total, &entries);
2883 
2884 	return entries;
2885 }
2886 
2887 static void print_lat_help_header(struct seq_file *m)
2888 {
2889 	seq_puts(m, "#                    _------=> CPU#            \n"
2890 		    "#                   / _-----=> irqs-off/BH-disabled\n"
2891 		    "#                  | / _----=> need-resched    \n"
2892 		    "#                  || / _---=> hardirq/softirq \n"
2893 		    "#                  ||| / _--=> preempt-depth   \n"
2894 		    "#                  |||| / _-=> migrate-disable \n"
2895 		    "#                  ||||| /     delay           \n"
2896 		    "#  cmd     pid     |||||| time  |   caller     \n"
2897 		    "#     \\   /        ||||||  \\    |    /       \n");
2898 }
2899 
2900 static void print_event_info(struct array_buffer *buf, struct seq_file *m)
2901 {
2902 	unsigned long total;
2903 	unsigned long entries;
2904 
2905 	get_total_entries(buf, &total, &entries);
2906 	seq_printf(m, "# entries-in-buffer/entries-written: %lu/%lu   #P:%d\n",
2907 		   entries, total, num_online_cpus());
2908 	seq_puts(m, "#\n");
2909 }
2910 
2911 static void print_func_help_header(struct array_buffer *buf, struct seq_file *m,
2912 				   unsigned int flags)
2913 {
2914 	bool tgid = flags & TRACE_ITER(RECORD_TGID);
2915 
2916 	print_event_info(buf, m);
2917 
2918 	seq_printf(m, "#           TASK-PID    %s CPU#     TIMESTAMP  FUNCTION\n", tgid ? "   TGID   " : "");
2919 	seq_printf(m, "#              | |      %s   |         |         |\n",      tgid ? "     |    " : "");
2920 }
2921 
2922 static void print_func_help_header_irq(struct array_buffer *buf, struct seq_file *m,
2923 				       unsigned int flags)
2924 {
2925 	bool tgid = flags & TRACE_ITER(RECORD_TGID);
2926 	static const char space[] = "            ";
2927 	int prec = tgid ? 12 : 2;
2928 
2929 	print_event_info(buf, m);
2930 
2931 	seq_printf(m, "#                            %.*s  _-----=> irqs-off/BH-disabled\n", prec, space);
2932 	seq_printf(m, "#                            %.*s / _----=> need-resched\n", prec, space);
2933 	seq_printf(m, "#                            %.*s| / _---=> hardirq/softirq\n", prec, space);
2934 	seq_printf(m, "#                            %.*s|| / _--=> preempt-depth\n", prec, space);
2935 	seq_printf(m, "#                            %.*s||| / _-=> migrate-disable\n", prec, space);
2936 	seq_printf(m, "#                            %.*s|||| /     delay\n", prec, space);
2937 	seq_printf(m, "#           TASK-PID  %.*s CPU#  |||||  TIMESTAMP  FUNCTION\n", prec, "     TGID   ");
2938 	seq_printf(m, "#              | |    %.*s   |   |||||     |         |\n", prec, "       |    ");
2939 }
2940 
2941 void
2942 print_trace_header(struct seq_file *m, struct trace_iterator *iter)
2943 {
2944 	unsigned long sym_flags = (global_trace.trace_flags & TRACE_ITER_SYM_MASK);
2945 	struct array_buffer *buf = iter->array_buffer;
2946 	struct trace_array_cpu *data = per_cpu_ptr(buf->data, buf->cpu);
2947 	struct tracer *type = iter->trace;
2948 	unsigned long entries;
2949 	unsigned long total;
2950 	const char *name = type->name;
2951 
2952 	get_total_entries(buf, &total, &entries);
2953 
2954 	seq_printf(m, "# %s latency trace v1.1.5 on %s\n",
2955 		   name, init_utsname()->release);
2956 	seq_puts(m, "# -----------------------------------"
2957 		 "---------------------------------\n");
2958 	seq_printf(m, "# latency: %lu us, #%lu/%lu, CPU#%d |"
2959 		   " (M:%s VP:%d, KP:%d, SP:%d HP:%d",
2960 		   nsecs_to_usecs(data->saved_latency),
2961 		   entries,
2962 		   total,
2963 		   buf->cpu,
2964 		   preempt_model_str(),
2965 		   /* These are reserved for later use */
2966 		   0, 0, 0, 0);
2967 #ifdef CONFIG_SMP
2968 	seq_printf(m, " #P:%d)\n", num_online_cpus());
2969 #else
2970 	seq_puts(m, ")\n");
2971 #endif
2972 	seq_puts(m, "#    -----------------\n");
2973 	seq_printf(m, "#    | task: %.16s-%d "
2974 		   "(uid:%d nice:%ld policy:%ld rt_prio:%ld)\n",
2975 		   data->comm, data->pid,
2976 		   from_kuid_munged(seq_user_ns(m), data->uid), data->nice,
2977 		   data->policy, data->rt_priority);
2978 	seq_puts(m, "#    -----------------\n");
2979 
2980 	if (data->critical_start) {
2981 		seq_puts(m, "#  => started at: ");
2982 		seq_print_ip_sym(&iter->seq, data->critical_start, sym_flags);
2983 		trace_print_seq(m, &iter->seq);
2984 		seq_puts(m, "\n#  => ended at:   ");
2985 		seq_print_ip_sym(&iter->seq, data->critical_end, sym_flags);
2986 		trace_print_seq(m, &iter->seq);
2987 		seq_puts(m, "\n#\n");
2988 	}
2989 
2990 	seq_puts(m, "#\n");
2991 }
2992 
2993 static void test_cpu_buff_start(struct trace_iterator *iter)
2994 {
2995 	struct trace_seq *s = &iter->seq;
2996 	struct trace_array *tr = iter->tr;
2997 
2998 	if (!(tr->trace_flags & TRACE_ITER(ANNOTATE)))
2999 		return;
3000 
3001 	if (!(iter->iter_flags & TRACE_FILE_ANNOTATE))
3002 		return;
3003 
3004 	if (cpumask_available(iter->started) &&
3005 	    cpumask_test_cpu(iter->cpu, iter->started))
3006 		return;
3007 
3008 	if (per_cpu_ptr(iter->array_buffer->data, iter->cpu)->skipped_entries)
3009 		return;
3010 
3011 	if (cpumask_available(iter->started))
3012 		cpumask_set_cpu(iter->cpu, iter->started);
3013 
3014 	/* Don't print started cpu buffer for the first entry of the trace */
3015 	if (iter->idx > 1)
3016 		trace_seq_printf(s, "##### CPU %u buffer started ####\n",
3017 				iter->cpu);
3018 }
3019 
3020 #ifdef CONFIG_FTRACE_SYSCALLS
3021 static bool is_syscall_event(struct trace_event *event)
3022 {
3023 	return (event->funcs == &enter_syscall_print_funcs) ||
3024 	       (event->funcs == &exit_syscall_print_funcs);
3025 
3026 }
3027 #define syscall_buf_size CONFIG_TRACE_SYSCALL_BUF_SIZE_DEFAULT
3028 #else
3029 static inline bool is_syscall_event(struct trace_event *event)
3030 {
3031 	return false;
3032 }
3033 #define syscall_buf_size 0
3034 #endif /* CONFIG_FTRACE_SYSCALLS */
3035 
3036 static enum print_line_t print_trace_fmt(struct trace_iterator *iter)
3037 {
3038 	struct trace_array *tr = iter->tr;
3039 	struct trace_seq *s = &iter->seq;
3040 	unsigned long sym_flags = (tr->trace_flags & TRACE_ITER_SYM_MASK);
3041 	struct trace_entry *entry;
3042 	struct trace_event *event;
3043 
3044 	entry = iter->ent;
3045 
3046 	test_cpu_buff_start(iter);
3047 
3048 	event = ftrace_find_event(entry->type);
3049 
3050 	if (tr->trace_flags & TRACE_ITER(CONTEXT_INFO)) {
3051 		if (iter->iter_flags & TRACE_FILE_LAT_FMT)
3052 			trace_print_lat_context(iter);
3053 		else
3054 			trace_print_context(iter);
3055 	}
3056 
3057 	if (trace_seq_has_overflowed(s))
3058 		return TRACE_TYPE_PARTIAL_LINE;
3059 
3060 	if (event) {
3061 		if (tr->trace_flags & TRACE_ITER(FIELDS))
3062 			return print_event_fields(iter, event);
3063 		/*
3064 		 * For TRACE_EVENT() events, the print_fmt is not
3065 		 * safe to use if the array has delta offsets
3066 		 * Force printing via the fields.
3067 		 */
3068 		if ((tr->text_delta)) {
3069 			/* ftrace and system call events are still OK */
3070 			if ((event->type > __TRACE_LAST_TYPE) &&
3071 			    !is_syscall_event(event))
3072 				return print_event_fields(iter, event);
3073 		}
3074 		return event->funcs->trace(iter, sym_flags, event);
3075 	}
3076 
3077 	trace_seq_printf(s, "Unknown type %d\n", entry->type);
3078 
3079 	return trace_handle_return(s);
3080 }
3081 
3082 static enum print_line_t print_raw_fmt(struct trace_iterator *iter)
3083 {
3084 	struct trace_array *tr = iter->tr;
3085 	struct trace_seq *s = &iter->seq;
3086 	struct trace_entry *entry;
3087 	struct trace_event *event;
3088 
3089 	entry = iter->ent;
3090 
3091 	if (tr->trace_flags & TRACE_ITER(CONTEXT_INFO))
3092 		trace_seq_printf(s, "%d %d %llu ",
3093 				 entry->pid, iter->cpu, iter->ts);
3094 
3095 	if (trace_seq_has_overflowed(s))
3096 		return TRACE_TYPE_PARTIAL_LINE;
3097 
3098 	event = ftrace_find_event(entry->type);
3099 	if (event)
3100 		return event->funcs->raw(iter, 0, event);
3101 
3102 	trace_seq_printf(s, "%d ?\n", entry->type);
3103 
3104 	return trace_handle_return(s);
3105 }
3106 
3107 static enum print_line_t print_hex_fmt(struct trace_iterator *iter)
3108 {
3109 	struct trace_array *tr = iter->tr;
3110 	struct trace_seq *s = &iter->seq;
3111 	unsigned char newline = '\n';
3112 	struct trace_entry *entry;
3113 	struct trace_event *event;
3114 
3115 	entry = iter->ent;
3116 
3117 	if (tr->trace_flags & TRACE_ITER(CONTEXT_INFO)) {
3118 		SEQ_PUT_HEX_FIELD(s, entry->pid);
3119 		SEQ_PUT_HEX_FIELD(s, iter->cpu);
3120 		SEQ_PUT_HEX_FIELD(s, iter->ts);
3121 		if (trace_seq_has_overflowed(s))
3122 			return TRACE_TYPE_PARTIAL_LINE;
3123 	}
3124 
3125 	event = ftrace_find_event(entry->type);
3126 	if (event) {
3127 		enum print_line_t ret = event->funcs->hex(iter, 0, event);
3128 		if (ret != TRACE_TYPE_HANDLED)
3129 			return ret;
3130 	}
3131 
3132 	SEQ_PUT_FIELD(s, newline);
3133 
3134 	return trace_handle_return(s);
3135 }
3136 
3137 static enum print_line_t print_bin_fmt(struct trace_iterator *iter)
3138 {
3139 	struct trace_array *tr = iter->tr;
3140 	struct trace_seq *s = &iter->seq;
3141 	struct trace_entry *entry;
3142 	struct trace_event *event;
3143 
3144 	entry = iter->ent;
3145 
3146 	if (tr->trace_flags & TRACE_ITER(CONTEXT_INFO)) {
3147 		SEQ_PUT_FIELD(s, entry->pid);
3148 		SEQ_PUT_FIELD(s, iter->cpu);
3149 		SEQ_PUT_FIELD(s, iter->ts);
3150 		if (trace_seq_has_overflowed(s))
3151 			return TRACE_TYPE_PARTIAL_LINE;
3152 	}
3153 
3154 	event = ftrace_find_event(entry->type);
3155 	return event ? event->funcs->binary(iter, 0, event) :
3156 		TRACE_TYPE_HANDLED;
3157 }
3158 
3159 int trace_empty(struct trace_iterator *iter)
3160 {
3161 	struct ring_buffer_iter *buf_iter;
3162 	int cpu;
3163 
3164 	/* If we are looking at one CPU buffer, only check that one */
3165 	if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
3166 		cpu = iter->cpu_file;
3167 		buf_iter = trace_buffer_iter(iter, cpu);
3168 		if (buf_iter) {
3169 			if (!ring_buffer_iter_empty(buf_iter))
3170 				return 0;
3171 		} else {
3172 			if (!ring_buffer_empty_cpu(iter->array_buffer->buffer, cpu))
3173 				return 0;
3174 		}
3175 		return 1;
3176 	}
3177 
3178 	for_each_tracing_cpu(cpu) {
3179 		buf_iter = trace_buffer_iter(iter, cpu);
3180 		if (buf_iter) {
3181 			if (!ring_buffer_iter_empty(buf_iter))
3182 				return 0;
3183 		} else {
3184 			if (!ring_buffer_empty_cpu(iter->array_buffer->buffer, cpu))
3185 				return 0;
3186 		}
3187 	}
3188 
3189 	return 1;
3190 }
3191 
3192 /*  Called with trace_event_read_lock() held. */
3193 enum print_line_t print_trace_line(struct trace_iterator *iter)
3194 {
3195 	struct trace_array *tr = iter->tr;
3196 	unsigned long trace_flags = tr->trace_flags;
3197 	enum print_line_t ret;
3198 
3199 	if (iter->lost_events) {
3200 		if (iter->lost_events == (unsigned long)-1)
3201 			trace_seq_printf(&iter->seq, "CPU:%d [LOST EVENTS]\n",
3202 					 iter->cpu);
3203 		else
3204 			trace_seq_printf(&iter->seq, "CPU:%d [LOST %lu EVENTS]\n",
3205 					 iter->cpu, iter->lost_events);
3206 		if (trace_seq_has_overflowed(&iter->seq))
3207 			return TRACE_TYPE_PARTIAL_LINE;
3208 	}
3209 
3210 	if (iter->trace && iter->trace->print_line) {
3211 		ret = iter->trace->print_line(iter);
3212 		if (ret != TRACE_TYPE_UNHANDLED)
3213 			return ret;
3214 	}
3215 
3216 	if (iter->ent->type == TRACE_BPUTS &&
3217 			trace_flags & TRACE_ITER(PRINTK) &&
3218 			trace_flags & TRACE_ITER(PRINTK_MSGONLY))
3219 		return trace_print_bputs_msg_only(iter);
3220 
3221 	if (iter->ent->type == TRACE_BPRINT &&
3222 			trace_flags & TRACE_ITER(PRINTK) &&
3223 			trace_flags & TRACE_ITER(PRINTK_MSGONLY))
3224 		return trace_print_bprintk_msg_only(iter);
3225 
3226 	if (iter->ent->type == TRACE_PRINT &&
3227 			trace_flags & TRACE_ITER(PRINTK) &&
3228 			trace_flags & TRACE_ITER(PRINTK_MSGONLY))
3229 		return trace_print_printk_msg_only(iter);
3230 
3231 	if (trace_flags & TRACE_ITER(BIN))
3232 		return print_bin_fmt(iter);
3233 
3234 	if (trace_flags & TRACE_ITER(HEX))
3235 		return print_hex_fmt(iter);
3236 
3237 	if (trace_flags & TRACE_ITER(RAW))
3238 		return print_raw_fmt(iter);
3239 
3240 	return print_trace_fmt(iter);
3241 }
3242 
3243 void trace_latency_header(struct seq_file *m)
3244 {
3245 	struct trace_iterator *iter = m->private;
3246 	struct trace_array *tr = iter->tr;
3247 
3248 	/* print nothing if the buffers are empty */
3249 	if (trace_empty(iter))
3250 		return;
3251 
3252 	if (iter->iter_flags & TRACE_FILE_LAT_FMT)
3253 		print_trace_header(m, iter);
3254 
3255 	if (!(tr->trace_flags & TRACE_ITER(VERBOSE)))
3256 		print_lat_help_header(m);
3257 }
3258 
3259 void trace_default_header(struct seq_file *m)
3260 {
3261 	struct trace_iterator *iter = m->private;
3262 	struct trace_array *tr = iter->tr;
3263 	unsigned long trace_flags = tr->trace_flags;
3264 
3265 	if (!(trace_flags & TRACE_ITER(CONTEXT_INFO)))
3266 		return;
3267 
3268 	if (iter->iter_flags & TRACE_FILE_LAT_FMT) {
3269 		/* print nothing if the buffers are empty */
3270 		if (trace_empty(iter))
3271 			return;
3272 		print_trace_header(m, iter);
3273 		if (!(trace_flags & TRACE_ITER(VERBOSE)))
3274 			print_lat_help_header(m);
3275 	} else {
3276 		if (!(trace_flags & TRACE_ITER(VERBOSE))) {
3277 			if (trace_flags & TRACE_ITER(IRQ_INFO))
3278 				print_func_help_header_irq(iter->array_buffer,
3279 							   m, trace_flags);
3280 			else
3281 				print_func_help_header(iter->array_buffer, m,
3282 						       trace_flags);
3283 		}
3284 	}
3285 }
3286 
3287 static void test_ftrace_alive(struct seq_file *m)
3288 {
3289 	if (!ftrace_is_dead())
3290 		return;
3291 	seq_puts(m, "# WARNING: FUNCTION TRACING IS CORRUPTED\n"
3292 		    "#          MAY BE MISSING FUNCTION EVENTS\n");
3293 }
3294 
3295 static int s_show(struct seq_file *m, void *v)
3296 {
3297 	struct trace_iterator *iter = v;
3298 	int ret;
3299 
3300 	if (iter->ent == NULL) {
3301 		if (iter->tr) {
3302 			seq_printf(m, "# tracer: %s\n", iter->trace->name);
3303 			seq_puts(m, "#\n");
3304 			test_ftrace_alive(m);
3305 		}
3306 		if (iter->snapshot && trace_empty(iter))
3307 			print_snapshot_help(m, iter);
3308 		else if (iter->trace && iter->trace->print_header)
3309 			iter->trace->print_header(m);
3310 		else
3311 			trace_default_header(m);
3312 
3313 	} else if (iter->leftover) {
3314 		/*
3315 		 * If we filled the seq_file buffer earlier, we
3316 		 * want to just show it now.
3317 		 */
3318 		ret = trace_print_seq(m, &iter->seq);
3319 
3320 		/* ret should this time be zero, but you never know */
3321 		iter->leftover = ret;
3322 
3323 	} else {
3324 		ret = print_trace_line(iter);
3325 		if (ret == TRACE_TYPE_PARTIAL_LINE) {
3326 			iter->seq.full = 0;
3327 			trace_seq_puts(&iter->seq, "[LINE TOO BIG]\n");
3328 		}
3329 		ret = trace_print_seq(m, &iter->seq);
3330 		/*
3331 		 * If we overflow the seq_file buffer, then it will
3332 		 * ask us for this data again at start up.
3333 		 * Use that instead.
3334 		 *  ret is 0 if seq_file write succeeded.
3335 		 *        -1 otherwise.
3336 		 */
3337 		iter->leftover = ret;
3338 	}
3339 
3340 	return 0;
3341 }
3342 
3343 static const struct seq_operations tracer_seq_ops = {
3344 	.start		= s_start,
3345 	.next		= s_next,
3346 	.stop		= s_stop,
3347 	.show		= s_show,
3348 };
3349 
3350 /*
3351  * Note, as iter itself can be allocated and freed in different
3352  * ways, this function is only used to free its content, and not
3353  * the iterator itself. The only requirement to all the allocations
3354  * is that it must zero all fields (kzalloc), as freeing works with
3355  * ethier allocated content or NULL.
3356  */
3357 static void free_trace_iter_content(struct trace_iterator *iter)
3358 {
3359 	/* The fmt is either NULL, allocated or points to static_fmt_buf */
3360 	if (iter->fmt != static_fmt_buf)
3361 		kfree(iter->fmt);
3362 
3363 	kfree(iter->temp);
3364 	kfree(iter->buffer_iter);
3365 	mutex_destroy(&iter->mutex);
3366 	free_cpumask_var(iter->started);
3367 }
3368 
3369 struct trace_iterator *
3370 __tracing_open(struct inode *inode, struct file *file, bool snapshot)
3371 {
3372 	struct trace_array *tr = inode->i_private;
3373 	struct trace_iterator *iter;
3374 	int cpu;
3375 
3376 	if (tracing_disabled)
3377 		return ERR_PTR(-ENODEV);
3378 
3379 	iter = __seq_open_private(file, &tracer_seq_ops, sizeof(*iter));
3380 	if (!iter)
3381 		return ERR_PTR(-ENOMEM);
3382 
3383 	iter->buffer_iter = kzalloc_objs(*iter->buffer_iter, nr_cpu_ids);
3384 	if (!iter->buffer_iter)
3385 		goto release;
3386 
3387 	/*
3388 	 * trace_find_next_entry() may need to save off iter->ent.
3389 	 * It will place it into the iter->temp buffer. As most
3390 	 * events are less than 128, allocate a buffer of that size.
3391 	 * If one is greater, then trace_find_next_entry() will
3392 	 * allocate a new buffer to adjust for the bigger iter->ent.
3393 	 * It's not critical if it fails to get allocated here.
3394 	 */
3395 	iter->temp = kmalloc(128, GFP_KERNEL);
3396 	if (iter->temp)
3397 		iter->temp_size = 128;
3398 
3399 	/*
3400 	 * trace_event_printf() may need to modify given format
3401 	 * string to replace %p with %px so that it shows real address
3402 	 * instead of hash value. However, that is only for the event
3403 	 * tracing, other tracer may not need. Defer the allocation
3404 	 * until it is needed.
3405 	 */
3406 	iter->fmt = NULL;
3407 	iter->fmt_size = 0;
3408 
3409 	mutex_lock(&trace_types_lock);
3410 	iter->trace = tr->current_trace;
3411 
3412 	if (!zalloc_cpumask_var(&iter->started, GFP_KERNEL))
3413 		goto fail;
3414 
3415 	iter->tr = tr;
3416 
3417 #ifdef CONFIG_TRACER_SNAPSHOT
3418 	/* Currently only the top directory has a snapshot */
3419 	if (tr->current_trace->print_max || snapshot)
3420 		iter->array_buffer = &tr->snapshot_buffer;
3421 	else
3422 #endif
3423 		iter->array_buffer = &tr->array_buffer;
3424 	iter->snapshot = snapshot;
3425 	iter->pos = -1;
3426 	iter->cpu_file = tracing_get_cpu(inode);
3427 	mutex_init(&iter->mutex);
3428 
3429 	/* Notify the tracer early; before we stop tracing. */
3430 	if (iter->trace->open)
3431 		iter->trace->open(iter);
3432 
3433 	/* Annotate start of buffers if we had overruns */
3434 	if (ring_buffer_overruns(iter->array_buffer->buffer))
3435 		iter->iter_flags |= TRACE_FILE_ANNOTATE;
3436 
3437 	/* Output in nanoseconds only if we are using a clock in nanoseconds. */
3438 	if (trace_clocks[tr->clock_id].in_ns)
3439 		iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
3440 
3441 	/*
3442 	 * If pause-on-trace is enabled, then stop the trace while
3443 	 * dumping, unless this is the "snapshot" file
3444 	 */
3445 	if (!iter->snapshot && (tr->trace_flags & TRACE_ITER(PAUSE_ON_TRACE))) {
3446 		iter->iter_flags |= TRACE_FILE_PAUSE;
3447 		tracing_stop_tr(tr);
3448 	}
3449 
3450 	if (iter->cpu_file == RING_BUFFER_ALL_CPUS) {
3451 		for_each_tracing_cpu(cpu) {
3452 			iter->buffer_iter[cpu] =
3453 				ring_buffer_read_start(iter->array_buffer->buffer,
3454 						       cpu, GFP_KERNEL);
3455 			tracing_iter_reset(iter, cpu);
3456 		}
3457 	} else {
3458 		cpu = iter->cpu_file;
3459 		iter->buffer_iter[cpu] =
3460 			ring_buffer_read_start(iter->array_buffer->buffer,
3461 					       cpu, GFP_KERNEL);
3462 		tracing_iter_reset(iter, cpu);
3463 	}
3464 
3465 	mutex_unlock(&trace_types_lock);
3466 
3467 	return iter;
3468 
3469  fail:
3470 	mutex_unlock(&trace_types_lock);
3471 	free_trace_iter_content(iter);
3472 release:
3473 	seq_release_private(inode, file);
3474 	return ERR_PTR(-ENOMEM);
3475 }
3476 
3477 int tracing_open_generic(struct inode *inode, struct file *filp)
3478 {
3479 	int ret;
3480 
3481 	ret = tracing_check_open_get_tr(NULL);
3482 	if (ret)
3483 		return ret;
3484 
3485 	filp->private_data = inode->i_private;
3486 	return 0;
3487 }
3488 
3489 /*
3490  * Open and update trace_array ref count.
3491  * Must have the current trace_array passed to it.
3492  */
3493 int tracing_open_generic_tr(struct inode *inode, struct file *filp)
3494 {
3495 	struct trace_array *tr = inode->i_private;
3496 	int ret;
3497 
3498 	ret = tracing_check_open_get_tr(tr);
3499 	if (ret)
3500 		return ret;
3501 
3502 	if ((filp->f_mode & FMODE_WRITE) && trace_array_is_readonly(tr)) {
3503 		trace_array_put(tr);
3504 		return -EACCES;
3505 	}
3506 
3507 	filp->private_data = inode->i_private;
3508 
3509 	return 0;
3510 }
3511 
3512 /*
3513  * The private pointer of the inode is the trace_event_file.
3514  * Update the tr ref count associated to it.
3515  */
3516 int tracing_open_file_tr(struct inode *inode, struct file *filp)
3517 {
3518 	struct trace_event_file *file = inode->i_private;
3519 	int ret;
3520 
3521 	ret = tracing_check_open_get_tr(file->tr);
3522 	if (ret)
3523 		return ret;
3524 
3525 	guard(mutex)(&event_mutex);
3526 
3527 	/* Fail if the file is marked for removal */
3528 	if (file->flags & EVENT_FILE_FL_FREED) {
3529 		trace_array_put(file->tr);
3530 		return -ENODEV;
3531 	} else {
3532 		event_file_get(file);
3533 	}
3534 
3535 	return 0;
3536 }
3537 
3538 int tracing_release_file_tr(struct inode *inode, struct file *filp)
3539 {
3540 	struct trace_event_file *file = inode->i_private;
3541 
3542 	trace_array_put(file->tr);
3543 	event_file_put(file);
3544 
3545 	return 0;
3546 }
3547 
3548 int tracing_single_release_file_tr(struct inode *inode, struct file *filp)
3549 {
3550 	tracing_release_file_tr(inode, filp);
3551 	return single_release(inode, filp);
3552 }
3553 
3554 int tracing_release(struct inode *inode, struct file *file)
3555 {
3556 	struct trace_array *tr = inode->i_private;
3557 	struct seq_file *m = file->private_data;
3558 	struct trace_iterator *iter;
3559 	int cpu;
3560 
3561 	if (!(file->f_mode & FMODE_READ)) {
3562 		trace_array_put(tr);
3563 		return 0;
3564 	}
3565 
3566 	/* Writes do not use seq_file */
3567 	iter = m->private;
3568 	mutex_lock(&trace_types_lock);
3569 
3570 	for_each_tracing_cpu(cpu) {
3571 		if (iter->buffer_iter[cpu])
3572 			ring_buffer_read_finish(iter->buffer_iter[cpu]);
3573 	}
3574 
3575 	if (iter->trace && iter->trace->close)
3576 		iter->trace->close(iter);
3577 
3578 	if (iter->iter_flags & TRACE_FILE_PAUSE)
3579 		/* reenable tracing if it was previously enabled */
3580 		tracing_start_tr(tr);
3581 
3582 	__trace_array_put(tr);
3583 
3584 	mutex_unlock(&trace_types_lock);
3585 
3586 	free_trace_iter_content(iter);
3587 	seq_release_private(inode, file);
3588 
3589 	return 0;
3590 }
3591 
3592 int tracing_release_generic_tr(struct inode *inode, struct file *file)
3593 {
3594 	struct trace_array *tr = inode->i_private;
3595 
3596 	trace_array_put(tr);
3597 	return 0;
3598 }
3599 
3600 static int tracing_single_release_tr(struct inode *inode, struct file *file)
3601 {
3602 	struct trace_array *tr = inode->i_private;
3603 
3604 	trace_array_put(tr);
3605 
3606 	return single_release(inode, file);
3607 }
3608 
3609 static bool update_last_data_if_empty(struct trace_array *tr);
3610 
3611 static int tracing_open(struct inode *inode, struct file *file)
3612 {
3613 	struct trace_array *tr = inode->i_private;
3614 	struct trace_iterator *iter;
3615 	int ret;
3616 
3617 	ret = tracing_check_open_get_tr(tr);
3618 	if (ret)
3619 		return ret;
3620 
3621 	/* If this file was open for write, then erase contents */
3622 	if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC)) {
3623 		int cpu = tracing_get_cpu(inode);
3624 		struct array_buffer *trace_buf = &tr->array_buffer;
3625 
3626 #ifdef CONFIG_TRACER_MAX_TRACE
3627 		if (tr->current_trace->print_max)
3628 			trace_buf = &tr->snapshot_buffer;
3629 #endif
3630 
3631 		if (cpu == RING_BUFFER_ALL_CPUS)
3632 			tracing_reset_online_cpus(trace_buf);
3633 		else
3634 			tracing_reset_cpu(trace_buf, cpu);
3635 
3636 		update_last_data_if_empty(tr);
3637 	}
3638 
3639 	if (file->f_mode & FMODE_READ) {
3640 		iter = __tracing_open(inode, file, false);
3641 		if (IS_ERR(iter))
3642 			ret = PTR_ERR(iter);
3643 		else if (tr->trace_flags & TRACE_ITER(LATENCY_FMT))
3644 			iter->iter_flags |= TRACE_FILE_LAT_FMT;
3645 	}
3646 
3647 	if (ret < 0)
3648 		trace_array_put(tr);
3649 
3650 	return ret;
3651 }
3652 
3653 /*
3654  * Some tracers are not suitable for instance buffers.
3655  * A tracer is always available for the global array (toplevel)
3656  * or if it explicitly states that it is.
3657  */
3658 static bool
3659 trace_ok_for_array(struct tracer *t, struct trace_array *tr)
3660 {
3661 	/* arrays with mapped buffer range do not have snapshots */
3662 	if (tr->range_addr_start && tracer_uses_snapshot(t))
3663 		return false;
3664 	return (tr->flags & TRACE_ARRAY_FL_GLOBAL) || t->allow_instances;
3665 }
3666 
3667 /* Find the next tracer that this trace array may use */
3668 static struct tracer *
3669 get_tracer_for_array(struct trace_array *tr, struct tracer *t)
3670 {
3671 	while (t && !trace_ok_for_array(t, tr))
3672 		t = t->next;
3673 
3674 	return t;
3675 }
3676 
3677 static void *
3678 t_next(struct seq_file *m, void *v, loff_t *pos)
3679 {
3680 	struct trace_array *tr = m->private;
3681 	struct tracer *t = v;
3682 
3683 	(*pos)++;
3684 
3685 	if (t)
3686 		t = get_tracer_for_array(tr, t->next);
3687 
3688 	return t;
3689 }
3690 
3691 static void *t_start(struct seq_file *m, loff_t *pos)
3692 {
3693 	struct trace_array *tr = m->private;
3694 	struct tracer *t;
3695 	loff_t l = 0;
3696 
3697 	mutex_lock(&trace_types_lock);
3698 
3699 	t = get_tracer_for_array(tr, trace_types);
3700 	for (; t && l < *pos; t = t_next(m, t, &l))
3701 			;
3702 
3703 	return t;
3704 }
3705 
3706 static void t_stop(struct seq_file *m, void *p)
3707 {
3708 	mutex_unlock(&trace_types_lock);
3709 }
3710 
3711 static int t_show(struct seq_file *m, void *v)
3712 {
3713 	struct tracer *t = v;
3714 
3715 	if (!t)
3716 		return 0;
3717 
3718 	seq_puts(m, t->name);
3719 	if (t->next)
3720 		seq_putc(m, ' ');
3721 	else
3722 		seq_putc(m, '\n');
3723 
3724 	return 0;
3725 }
3726 
3727 static const struct seq_operations show_traces_seq_ops = {
3728 	.start		= t_start,
3729 	.next		= t_next,
3730 	.stop		= t_stop,
3731 	.show		= t_show,
3732 };
3733 
3734 static int show_traces_open(struct inode *inode, struct file *file)
3735 {
3736 	struct trace_array *tr = inode->i_private;
3737 	struct seq_file *m;
3738 	int ret;
3739 
3740 	ret = tracing_check_open_get_tr(tr);
3741 	if (ret)
3742 		return ret;
3743 
3744 	ret = seq_open(file, &show_traces_seq_ops);
3745 	if (ret) {
3746 		trace_array_put(tr);
3747 		return ret;
3748 	}
3749 
3750 	m = file->private_data;
3751 	m->private = tr;
3752 
3753 	return 0;
3754 }
3755 
3756 static int tracing_seq_release(struct inode *inode, struct file *file)
3757 {
3758 	struct trace_array *tr = inode->i_private;
3759 
3760 	trace_array_put(tr);
3761 	return seq_release(inode, file);
3762 }
3763 
3764 static ssize_t
3765 tracing_write_stub(struct file *filp, const char __user *ubuf,
3766 		   size_t count, loff_t *ppos)
3767 {
3768 	return count;
3769 }
3770 
3771 loff_t tracing_lseek(struct file *file, loff_t offset, int whence)
3772 {
3773 	int ret;
3774 
3775 	if (file->f_mode & FMODE_READ)
3776 		ret = seq_lseek(file, offset, whence);
3777 	else
3778 		file->f_pos = ret = 0;
3779 
3780 	return ret;
3781 }
3782 
3783 static const struct file_operations tracing_fops = {
3784 	.open		= tracing_open,
3785 	.read		= seq_read,
3786 	.read_iter	= seq_read_iter,
3787 	.splice_read	= copy_splice_read,
3788 	.write		= tracing_write_stub,
3789 	.llseek		= tracing_lseek,
3790 	.release	= tracing_release,
3791 };
3792 
3793 static const struct file_operations show_traces_fops = {
3794 	.open		= show_traces_open,
3795 	.read		= seq_read,
3796 	.llseek		= seq_lseek,
3797 	.release	= tracing_seq_release,
3798 };
3799 
3800 static ssize_t
3801 tracing_cpumask_read(struct file *filp, char __user *ubuf,
3802 		     size_t count, loff_t *ppos)
3803 {
3804 	struct trace_array *tr = file_inode(filp)->i_private;
3805 	char *mask_str __free(kfree) = NULL;
3806 	int len;
3807 
3808 	len = snprintf(NULL, 0, "%*pb\n",
3809 		       cpumask_pr_args(tr->tracing_cpumask)) + 1;
3810 	mask_str = kmalloc(len, GFP_KERNEL);
3811 	if (!mask_str)
3812 		return -ENOMEM;
3813 
3814 	len = snprintf(mask_str, len, "%*pb\n",
3815 		       cpumask_pr_args(tr->tracing_cpumask));
3816 	if (len >= count)
3817 		return -EINVAL;
3818 
3819 	return simple_read_from_buffer(ubuf, count, ppos, mask_str, len);
3820 }
3821 
3822 int tracing_set_cpumask(struct trace_array *tr,
3823 			cpumask_var_t tracing_cpumask_new)
3824 {
3825 	int cpu;
3826 
3827 	if (!tr)
3828 		return -EINVAL;
3829 
3830 	local_irq_disable();
3831 	arch_spin_lock(&tr->max_lock);
3832 	for_each_tracing_cpu(cpu) {
3833 		/*
3834 		 * Increase/decrease the disabled counter if we are
3835 		 * about to flip a bit in the cpumask:
3836 		 */
3837 		if (cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
3838 				!cpumask_test_cpu(cpu, tracing_cpumask_new)) {
3839 			ring_buffer_record_disable_cpu(tr->array_buffer.buffer, cpu);
3840 #ifdef CONFIG_TRACER_SNAPSHOT
3841 			ring_buffer_record_disable_cpu(tr->snapshot_buffer.buffer, cpu);
3842 #endif
3843 		}
3844 		if (!cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
3845 				cpumask_test_cpu(cpu, tracing_cpumask_new)) {
3846 			ring_buffer_record_enable_cpu(tr->array_buffer.buffer, cpu);
3847 #ifdef CONFIG_TRACER_SNAPSHOT
3848 			ring_buffer_record_enable_cpu(tr->snapshot_buffer.buffer, cpu);
3849 #endif
3850 		}
3851 	}
3852 	arch_spin_unlock(&tr->max_lock);
3853 	local_irq_enable();
3854 
3855 	cpumask_copy(tr->tracing_cpumask, tracing_cpumask_new);
3856 
3857 	return 0;
3858 }
3859 
3860 static ssize_t
3861 tracing_cpumask_write(struct file *filp, const char __user *ubuf,
3862 		      size_t count, loff_t *ppos)
3863 {
3864 	struct trace_array *tr = file_inode(filp)->i_private;
3865 	cpumask_var_t tracing_cpumask_new;
3866 	int err;
3867 
3868 	if (count == 0 || count > KMALLOC_MAX_SIZE)
3869 		return -EINVAL;
3870 
3871 	if (!zalloc_cpumask_var(&tracing_cpumask_new, GFP_KERNEL))
3872 		return -ENOMEM;
3873 
3874 	err = cpumask_parse_user(ubuf, count, tracing_cpumask_new);
3875 	if (err)
3876 		goto err_free;
3877 
3878 	err = tracing_set_cpumask(tr, tracing_cpumask_new);
3879 	if (err)
3880 		goto err_free;
3881 
3882 	free_cpumask_var(tracing_cpumask_new);
3883 
3884 	return count;
3885 
3886 err_free:
3887 	free_cpumask_var(tracing_cpumask_new);
3888 
3889 	return err;
3890 }
3891 
3892 static const struct file_operations tracing_cpumask_fops = {
3893 	.open		= tracing_open_generic_tr,
3894 	.read		= tracing_cpumask_read,
3895 	.write		= tracing_cpumask_write,
3896 	.release	= tracing_release_generic_tr,
3897 	.llseek		= generic_file_llseek,
3898 };
3899 
3900 static int tracing_trace_options_show(struct seq_file *m, void *v)
3901 {
3902 	struct tracer_opt *trace_opts;
3903 	struct trace_array *tr = m->private;
3904 	struct tracer_flags *flags;
3905 	u32 tracer_flags;
3906 	int i;
3907 
3908 	guard(mutex)(&trace_types_lock);
3909 
3910 	for (i = 0; trace_options[i]; i++) {
3911 		if (tr->trace_flags & (1ULL << i))
3912 			seq_printf(m, "%s\n", trace_options[i]);
3913 		else
3914 			seq_printf(m, "no%s\n", trace_options[i]);
3915 	}
3916 
3917 	flags = tr->current_trace_flags;
3918 	if (!flags || !flags->opts)
3919 		return 0;
3920 
3921 	tracer_flags = flags->val;
3922 	trace_opts = flags->opts;
3923 
3924 	for (i = 0; trace_opts[i].name; i++) {
3925 		if (tracer_flags & trace_opts[i].bit)
3926 			seq_printf(m, "%s\n", trace_opts[i].name);
3927 		else
3928 			seq_printf(m, "no%s\n", trace_opts[i].name);
3929 	}
3930 
3931 	return 0;
3932 }
3933 
3934 static int __set_tracer_option(struct trace_array *tr,
3935 			       struct tracer_flags *tracer_flags,
3936 			       struct tracer_opt *opts, int neg)
3937 {
3938 	struct tracer *trace = tracer_flags->trace;
3939 	int ret = 0;
3940 
3941 	if (trace->set_flag)
3942 		ret = trace->set_flag(tr, tracer_flags->val, opts->bit, !neg);
3943 	if (ret)
3944 		return ret;
3945 
3946 	if (neg)
3947 		tracer_flags->val &= ~opts->bit;
3948 	else
3949 		tracer_flags->val |= opts->bit;
3950 	return 0;
3951 }
3952 
3953 /* Try to assign a tracer specific option */
3954 static int set_tracer_option(struct trace_array *tr, char *cmp, int neg)
3955 {
3956 	struct tracer_flags *tracer_flags = tr->current_trace_flags;
3957 	struct tracer_opt *opts = NULL;
3958 	int i;
3959 
3960 	if (!tracer_flags || !tracer_flags->opts)
3961 		return 0;
3962 
3963 	for (i = 0; tracer_flags->opts[i].name; i++) {
3964 		opts = &tracer_flags->opts[i];
3965 
3966 		if (strcmp(cmp, opts->name) == 0)
3967 			return __set_tracer_option(tr, tracer_flags, opts, neg);
3968 	}
3969 
3970 	return -EINVAL;
3971 }
3972 
3973 /* Some tracers require overwrite to stay enabled */
3974 int trace_keep_overwrite(struct tracer *tracer, u64 mask, int set)
3975 {
3976 	if (tracer->enabled && (mask & TRACE_ITER(OVERWRITE)) && !set)
3977 		return -1;
3978 
3979 	return 0;
3980 }
3981 
3982 int set_tracer_flag(struct trace_array *tr, u64 mask, int enabled)
3983 {
3984 	switch (mask) {
3985 	case TRACE_ITER(RECORD_TGID):
3986 	case TRACE_ITER(RECORD_CMD):
3987 	case TRACE_ITER(TRACE_PRINTK):
3988 	case TRACE_ITER(COPY_MARKER):
3989 		lockdep_assert_held(&event_mutex);
3990 	}
3991 
3992 	/* do nothing if flag is already set */
3993 	if (!!(tr->trace_flags & mask) == !!enabled)
3994 		return 0;
3995 
3996 	/* Give the tracer a chance to approve the change */
3997 	if (tr->current_trace->flag_changed)
3998 		if (tr->current_trace->flag_changed(tr, mask, !!enabled))
3999 			return -EINVAL;
4000 
4001 	switch (mask) {
4002 	case TRACE_ITER(TRACE_PRINTK):
4003 		if (enabled) {
4004 			update_printk_trace(tr);
4005 		} else {
4006 			/*
4007 			 * The global_trace cannot clear this.
4008 			 * It's flag only gets cleared if another instance sets it.
4009 			 */
4010 			if (printk_trace == &global_trace)
4011 				return -EINVAL;
4012 			/*
4013 			 * An instance must always have it set.
4014 			 * by default, that's the global_trace instance.
4015 			 */
4016 			if (printk_trace == tr)
4017 				update_printk_trace(&global_trace);
4018 		}
4019 		break;
4020 
4021 	case TRACE_ITER(COPY_MARKER):
4022 		update_marker_trace(tr, enabled);
4023 		/* update_marker_trace updates the tr->trace_flags */
4024 		return 0;
4025 	}
4026 
4027 	if (enabled)
4028 		tr->trace_flags |= mask;
4029 	else
4030 		tr->trace_flags &= ~mask;
4031 
4032 	switch (mask) {
4033 	case TRACE_ITER(RECORD_CMD):
4034 		trace_event_enable_cmd_record(enabled);
4035 		break;
4036 
4037 	case TRACE_ITER(RECORD_TGID):
4038 
4039 		if (trace_alloc_tgid_map() < 0) {
4040 			tr->trace_flags &= ~TRACE_ITER(RECORD_TGID);
4041 			return -ENOMEM;
4042 		}
4043 
4044 		trace_event_enable_tgid_record(enabled);
4045 		break;
4046 
4047 	case TRACE_ITER(EVENT_FORK):
4048 		trace_event_follow_fork(tr, enabled);
4049 		break;
4050 
4051 	case TRACE_ITER(FUNC_FORK):
4052 		ftrace_pid_follow_fork(tr, enabled);
4053 		break;
4054 
4055 	case TRACE_ITER(OVERWRITE):
4056 		ring_buffer_change_overwrite(tr->array_buffer.buffer, enabled);
4057 #ifdef CONFIG_TRACER_SNAPSHOT
4058 		ring_buffer_change_overwrite(tr->snapshot_buffer.buffer, enabled);
4059 #endif
4060 		break;
4061 
4062 	case TRACE_ITER(PRINTK):
4063 		trace_printk_start_stop_comm(enabled);
4064 		trace_printk_control(enabled);
4065 		break;
4066 
4067 #if defined(CONFIG_FUNCTION_PROFILER) && defined(CONFIG_FUNCTION_GRAPH_TRACER)
4068 	case TRACE_GRAPH_GRAPH_TIME:
4069 		ftrace_graph_graph_time_control(enabled);
4070 		break;
4071 #endif
4072 	}
4073 
4074 	return 0;
4075 }
4076 
4077 int trace_set_options(struct trace_array *tr, char *option)
4078 {
4079 	char *cmp;
4080 	int neg = 0;
4081 	int ret;
4082 	size_t orig_len = strlen(option);
4083 	int len;
4084 
4085 	cmp = strstrip(option);
4086 
4087 	len = str_has_prefix(cmp, "no");
4088 	if (len)
4089 		neg = 1;
4090 
4091 	cmp += len;
4092 
4093 	mutex_lock(&event_mutex);
4094 	mutex_lock(&trace_types_lock);
4095 
4096 	ret = match_string(trace_options, -1, cmp);
4097 	/* If no option could be set, test the specific tracer options */
4098 	if (ret < 0)
4099 		ret = set_tracer_option(tr, cmp, neg);
4100 	else
4101 		ret = set_tracer_flag(tr, 1ULL << ret, !neg);
4102 
4103 	mutex_unlock(&trace_types_lock);
4104 	mutex_unlock(&event_mutex);
4105 
4106 	/*
4107 	 * If the first trailing whitespace is replaced with '\0' by strstrip,
4108 	 * turn it back into a space.
4109 	 */
4110 	if (orig_len > strlen(option))
4111 		option[strlen(option)] = ' ';
4112 
4113 	return ret;
4114 }
4115 
4116 static void __init apply_trace_boot_options(void)
4117 {
4118 	char *buf = trace_boot_options_buf;
4119 	char *option;
4120 
4121 	while (true) {
4122 		option = strsep(&buf, ",");
4123 
4124 		if (!option)
4125 			break;
4126 
4127 		if (*option)
4128 			trace_set_options(&global_trace, option);
4129 
4130 		/* Put back the comma to allow this to be called again */
4131 		if (buf)
4132 			*(buf - 1) = ',';
4133 	}
4134 }
4135 
4136 static ssize_t
4137 tracing_trace_options_write(struct file *filp, const char __user *ubuf,
4138 			size_t cnt, loff_t *ppos)
4139 {
4140 	struct seq_file *m = filp->private_data;
4141 	struct trace_array *tr = m->private;
4142 	char buf[64];
4143 	int ret;
4144 
4145 	if (cnt >= sizeof(buf))
4146 		return -EINVAL;
4147 
4148 	if (copy_from_user(buf, ubuf, cnt))
4149 		return -EFAULT;
4150 
4151 	buf[cnt] = 0;
4152 
4153 	ret = trace_set_options(tr, buf);
4154 	if (ret < 0)
4155 		return ret;
4156 
4157 	*ppos += cnt;
4158 
4159 	return cnt;
4160 }
4161 
4162 static int tracing_trace_options_open(struct inode *inode, struct file *file)
4163 {
4164 	struct trace_array *tr = inode->i_private;
4165 	int ret;
4166 
4167 	ret = tracing_check_open_get_tr(tr);
4168 	if (ret)
4169 		return ret;
4170 
4171 	ret = single_open(file, tracing_trace_options_show, inode->i_private);
4172 	if (ret < 0)
4173 		trace_array_put(tr);
4174 
4175 	return ret;
4176 }
4177 
4178 static const struct file_operations tracing_iter_fops = {
4179 	.open		= tracing_trace_options_open,
4180 	.read		= seq_read,
4181 	.llseek		= seq_lseek,
4182 	.release	= tracing_single_release_tr,
4183 	.write		= tracing_trace_options_write,
4184 };
4185 
4186 static const char readme_msg[] =
4187 	"tracing mini-HOWTO:\n\n"
4188 	"By default tracefs removes all OTH file permission bits.\n"
4189 	"When mounting tracefs an optional group id can be specified\n"
4190 	"which adds the group to every directory and file in tracefs:\n\n"
4191 	"\t e.g. mount -t tracefs [-o [gid=<gid>]] nodev /sys/kernel/tracing\n\n"
4192 	"# echo 0 > tracing_on : quick way to disable tracing\n"
4193 	"# echo 1 > tracing_on : quick way to re-enable tracing\n\n"
4194 	" Important files:\n"
4195 	"  trace\t\t\t- The static contents of the buffer\n"
4196 	"\t\t\t  To clear the buffer write into this file: echo > trace\n"
4197 	"  trace_pipe\t\t- A consuming read to see the contents of the buffer\n"
4198 	"  current_tracer\t- function and latency tracers\n"
4199 	"  available_tracers\t- list of configured tracers for current_tracer\n"
4200 	"  error_log\t- error log for failed commands (that support it)\n"
4201 	"  buffer_size_kb\t- view and modify size of per cpu buffer\n"
4202 	"  buffer_total_size_kb  - view total size of all cpu buffers\n\n"
4203 	"  trace_clock\t\t- change the clock used to order events\n"
4204 	"       local:   Per cpu clock but may not be synced across CPUs\n"
4205 	"      global:   Synced across CPUs but slows tracing down.\n"
4206 	"     counter:   Not a clock, but just an increment\n"
4207 	"      uptime:   Jiffy counter from time of boot\n"
4208 	"        perf:   Same clock that perf events use\n"
4209 #ifdef CONFIG_X86_64
4210 	"     x86-tsc:   TSC cycle counter\n"
4211 #endif
4212 	"\n  timestamp_mode\t- view the mode used to timestamp events\n"
4213 	"       delta:   Delta difference against a buffer-wide timestamp\n"
4214 	"    absolute:   Absolute (standalone) timestamp\n"
4215 	"\n  trace_marker\t\t- Writes into this file writes into the kernel buffer\n"
4216 	"\n  trace_marker_raw\t\t- Writes into this file writes binary data into the kernel buffer\n"
4217 	"  tracing_cpumask\t- Limit which CPUs to trace\n"
4218 	"  instances\t\t- Make sub-buffers with: mkdir instances/foo\n"
4219 	"\t\t\t  Remove sub-buffer with rmdir\n"
4220 	"  trace_options\t\t- Set format or modify how tracing happens\n"
4221 	"\t\t\t  Disable an option by prefixing 'no' to the\n"
4222 	"\t\t\t  option name\n"
4223 	"  saved_cmdlines_size\t- echo command number in here to store comm-pid list\n"
4224 #ifdef CONFIG_DYNAMIC_FTRACE
4225 	"\n  available_filter_functions - list of functions that can be filtered on\n"
4226 	"  set_ftrace_filter\t- echo function name in here to only trace these\n"
4227 	"\t\t\t  functions\n"
4228 	"\t     accepts: func_full_name or glob-matching-pattern\n"
4229 	"\t     modules: Can select a group via module\n"
4230 	"\t      Format: :mod:<module-name>\n"
4231 	"\t     example: echo :mod:ext3 > set_ftrace_filter\n"
4232 	"\t    triggers: a command to perform when function is hit\n"
4233 	"\t      Format: <function>:<trigger>[:count]\n"
4234 	"\t     trigger: traceon, traceoff\n"
4235 	"\t\t      enable_event:<system>:<event>\n"
4236 	"\t\t      disable_event:<system>:<event>\n"
4237 #ifdef CONFIG_STACKTRACE
4238 	"\t\t      stacktrace\n"
4239 #endif
4240 #ifdef CONFIG_TRACER_SNAPSHOT
4241 	"\t\t      snapshot\n"
4242 #endif
4243 	"\t\t      dump\n"
4244 	"\t\t      cpudump\n"
4245 	"\t     example: echo do_fault:traceoff > set_ftrace_filter\n"
4246 	"\t              echo do_trap:traceoff:3 > set_ftrace_filter\n"
4247 	"\t     The first one will disable tracing every time do_fault is hit\n"
4248 	"\t     The second will disable tracing at most 3 times when do_trap is hit\n"
4249 	"\t       The first time do trap is hit and it disables tracing, the\n"
4250 	"\t       counter will decrement to 2. If tracing is already disabled,\n"
4251 	"\t       the counter will not decrement. It only decrements when the\n"
4252 	"\t       trigger did work\n"
4253 	"\t     To remove trigger without count:\n"
4254 	"\t       echo '!<function>:<trigger> > set_ftrace_filter\n"
4255 	"\t     To remove trigger with a count:\n"
4256 	"\t       echo '!<function>:<trigger>:0 > set_ftrace_filter\n"
4257 	"  set_ftrace_notrace\t- echo function name in here to never trace.\n"
4258 	"\t    accepts: func_full_name, *func_end, func_begin*, *func_middle*\n"
4259 	"\t    modules: Can select a group via module command :mod:\n"
4260 	"\t    Does not accept triggers\n"
4261 #endif /* CONFIG_DYNAMIC_FTRACE */
4262 #ifdef CONFIG_FUNCTION_TRACER
4263 	"  set_ftrace_pid\t- Write pid(s) to only function trace those pids\n"
4264 	"\t\t    (function)\n"
4265 	"  set_ftrace_notrace_pid\t- Write pid(s) to not function trace those pids\n"
4266 	"\t\t    (function)\n"
4267 #endif
4268 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
4269 	"  set_graph_function\t- Trace the nested calls of a function (function_graph)\n"
4270 	"  set_graph_notrace\t- Do not trace the nested calls of a function (function_graph)\n"
4271 	"  max_graph_depth\t- Trace a limited depth of nested calls (0 is unlimited)\n"
4272 #endif
4273 #ifdef CONFIG_TRACER_SNAPSHOT
4274 	"\n  snapshot\t\t- Like 'trace' but shows the content of the static\n"
4275 	"\t\t\t  snapshot buffer. Read the contents for more\n"
4276 	"\t\t\t  information\n"
4277 #endif
4278 #ifdef CONFIG_STACK_TRACER
4279 	"  stack_trace\t\t- Shows the max stack trace when active\n"
4280 	"  stack_max_size\t- Shows current max stack size that was traced\n"
4281 	"\t\t\t  Write into this file to reset the max size (trigger a\n"
4282 	"\t\t\t  new trace)\n"
4283 #ifdef CONFIG_DYNAMIC_FTRACE
4284 	"  stack_trace_filter\t- Like set_ftrace_filter but limits what stack_trace\n"
4285 	"\t\t\t  traces\n"
4286 #endif
4287 #endif /* CONFIG_STACK_TRACER */
4288 #ifdef CONFIG_DYNAMIC_EVENTS
4289 	"  dynamic_events\t\t- Create/append/remove/show the generic dynamic events\n"
4290 	"\t\t\t  Write into this file to define/undefine new trace events.\n"
4291 #endif
4292 #ifdef CONFIG_KPROBE_EVENTS
4293 	"  kprobe_events\t\t- Create/append/remove/show the kernel dynamic events\n"
4294 	"\t\t\t  Write into this file to define/undefine new trace events.\n"
4295 #endif
4296 #ifdef CONFIG_UPROBE_EVENTS
4297 	"  uprobe_events\t\t- Create/append/remove/show the userspace dynamic events\n"
4298 	"\t\t\t  Write into this file to define/undefine new trace events.\n"
4299 #endif
4300 #if defined(CONFIG_KPROBE_EVENTS) || defined(CONFIG_UPROBE_EVENTS) || \
4301     defined(CONFIG_FPROBE_EVENTS)
4302 	"\t  accepts: event-definitions (one definition per line)\n"
4303 #if defined(CONFIG_KPROBE_EVENTS) || defined(CONFIG_UPROBE_EVENTS)
4304 	"\t   Format: p[:[<group>/][<event>]] <place> [<args>]\n"
4305 	"\t           r[maxactive][:[<group>/][<event>]] <place> [<args>]\n"
4306 #endif
4307 #ifdef CONFIG_FPROBE_EVENTS
4308 	"\t           f[:[<group>/][<event>]] <func-name>[%return] [<args>]\n"
4309 	"\t           t[:[<group>/][<event>]] <tracepoint> [<args>]\n"
4310 #endif
4311 #ifdef CONFIG_HIST_TRIGGERS
4312 	"\t           s:[synthetic/]<event> <field> [<field>]\n"
4313 #endif
4314 	"\t           e[:[<group>/][<event>]] <attached-group>.<attached-event> [<args>] [if <filter>]\n"
4315 	"\t           -:[<group>/][<event>]\n"
4316 #ifdef CONFIG_KPROBE_EVENTS
4317 	"\t    place: [<module>:]<symbol>[+<offset>]|<memaddr>\n"
4318   "place (kretprobe): [<module>:]<symbol>[+<offset>]%return|<memaddr>\n"
4319 #endif
4320 #ifdef CONFIG_UPROBE_EVENTS
4321   "   place (uprobe): <path>:<offset>[%return][(ref_ctr_offset)]\n"
4322 #endif
4323 	"\t     args: <name>=fetcharg[:type]\n"
4324 	"\t fetcharg: (%<register>|$<efield>), @<address>, @<symbol>[+|-<offset>],\n"
4325 #ifdef CONFIG_HAVE_FUNCTION_ARG_ACCESS_API
4326 	"\t           $stack<index>, $stack, $retval, $comm, $arg<N>,\n"
4327 #ifdef CONFIG_PROBE_EVENTS_BTF_ARGS
4328 	"\t           <argname>[->field[->field|.field...]],\n"
4329 #endif
4330 #else
4331 	"\t           $stack<index>, $stack, $retval, $comm,\n"
4332 #endif
4333 	"\t           +|-[u]<offset>(<fetcharg>), \\imm-value, \\\"imm-string\"\n"
4334 	"\t     kernel return probes support: $retval, $arg<N>, $comm\n"
4335 	"\t     type: s8/16/32/64, u8/16/32/64, x8/16/32/64, char, string, symbol,\n"
4336 	"\t           b<bit-width>@<bit-offset>/<container-size>, ustring,\n"
4337 	"\t           symstr, %pd/%pD, <type>\\[<array-size>\\]\n"
4338 #ifdef CONFIG_HIST_TRIGGERS
4339 	"\t    field: <stype> <name>;\n"
4340 	"\t    stype: u8/u16/u32/u64, s8/s16/s32/s64, pid_t,\n"
4341 	"\t           [unsigned] char/int/long\n"
4342 #endif
4343 	"\t    efield: For event probes ('e' types), the field is on of the fields\n"
4344 	"\t            of the <attached-group>/<attached-event>.\n"
4345 #endif
4346 	"  set_event\t\t- Enables events by name written into it\n"
4347 	"\t\t\t  Can enable module events via: :mod:<module>\n"
4348 	"  events/\t\t- Directory containing all trace event subsystems:\n"
4349 	"      enable\t\t- Write 0/1 to enable/disable tracing of all events\n"
4350 	"  events/<system>/\t- Directory containing all trace events for <system>:\n"
4351 	"      enable\t\t- Write 0/1 to enable/disable tracing of all <system>\n"
4352 	"\t\t\t  events\n"
4353 	"      filter\t\t- If set, only events passing filter are traced\n"
4354 	"  events/<system>/<event>/\t- Directory containing control files for\n"
4355 	"\t\t\t  <event>:\n"
4356 	"      enable\t\t- Write 0/1 to enable/disable tracing of <event>\n"
4357 	"      filter\t\t- If set, only events passing filter are traced\n"
4358 	"      trigger\t\t- If set, a command to perform when event is hit\n"
4359 	"\t    Format: <trigger>[:count][if <filter>]\n"
4360 	"\t   trigger: traceon, traceoff\n"
4361 	"\t            enable_event:<system>:<event>\n"
4362 	"\t            disable_event:<system>:<event>\n"
4363 #ifdef CONFIG_HIST_TRIGGERS
4364 	"\t            enable_hist:<system>:<event>\n"
4365 	"\t            disable_hist:<system>:<event>\n"
4366 #endif
4367 #ifdef CONFIG_STACKTRACE
4368 	"\t\t    stacktrace\n"
4369 #endif
4370 #ifdef CONFIG_TRACER_SNAPSHOT
4371 	"\t\t    snapshot\n"
4372 #endif
4373 #ifdef CONFIG_HIST_TRIGGERS
4374 	"\t\t    hist (see below)\n"
4375 #endif
4376 	"\t   example: echo traceoff > events/block/block_unplug/trigger\n"
4377 	"\t            echo traceoff:3 > events/block/block_unplug/trigger\n"
4378 	"\t            echo 'enable_event:kmem:kmalloc:3 if nr_rq > 1' > \\\n"
4379 	"\t                  events/block/block_unplug/trigger\n"
4380 	"\t   The first disables tracing every time block_unplug is hit.\n"
4381 	"\t   The second disables tracing the first 3 times block_unplug is hit.\n"
4382 	"\t   The third enables the kmalloc event the first 3 times block_unplug\n"
4383 	"\t     is hit and has value of greater than 1 for the 'nr_rq' event field.\n"
4384 	"\t   Like function triggers, the counter is only decremented if it\n"
4385 	"\t    enabled or disabled tracing.\n"
4386 	"\t   To remove a trigger without a count:\n"
4387 	"\t     echo '!<trigger> > <system>/<event>/trigger\n"
4388 	"\t   To remove a trigger with a count:\n"
4389 	"\t     echo '!<trigger>:0 > <system>/<event>/trigger\n"
4390 	"\t   Filters can be ignored when removing a trigger.\n"
4391 #ifdef CONFIG_HIST_TRIGGERS
4392 	"      hist trigger\t- If set, event hits are aggregated into a hash table\n"
4393 	"\t    Format: hist:keys=<field1[,field2,...]>\n"
4394 	"\t            [:<var1>=<field|var_ref|numeric_literal>[,<var2>=...]]\n"
4395 	"\t            [:values=<field1[,field2,...]>]\n"
4396 	"\t            [:sort=<field1[,field2,...]>]\n"
4397 	"\t            [:size=#entries]\n"
4398 	"\t            [:pause][:continue][:clear]\n"
4399 	"\t            [:name=histname1]\n"
4400 	"\t            [:nohitcount]\n"
4401 	"\t            [:<handler>.<action>]\n"
4402 	"\t            [if <filter>]\n\n"
4403 	"\t    Note, special fields can be used as well:\n"
4404 	"\t            common_timestamp - to record current timestamp\n"
4405 	"\t            common_cpu - to record the CPU the event happened on\n"
4406 	"\n"
4407 	"\t    A hist trigger variable can be:\n"
4408 	"\t        - a reference to a field e.g. x=current_timestamp,\n"
4409 	"\t        - a reference to another variable e.g. y=$x,\n"
4410 	"\t        - a numeric literal: e.g. ms_per_sec=1000,\n"
4411 	"\t        - an arithmetic expression: e.g. time_secs=current_timestamp/1000\n"
4412 	"\n"
4413 	"\t    hist trigger arithmetic expressions support addition(+), subtraction(-),\n"
4414 	"\t    multiplication(*) and division(/) operators. An operand can be either a\n"
4415 	"\t    variable reference, field or numeric literal.\n"
4416 	"\n"
4417 	"\t    When a matching event is hit, an entry is added to a hash\n"
4418 	"\t    table using the key(s) and value(s) named, and the value of a\n"
4419 	"\t    sum called 'hitcount' is incremented.  Keys and values\n"
4420 	"\t    correspond to fields in the event's format description.  Keys\n"
4421 	"\t    can be any field, or the special string 'common_stacktrace'.\n"
4422 	"\t    Compound keys consisting of up to two fields can be specified\n"
4423 	"\t    by the 'keys' keyword.  Values must correspond to numeric\n"
4424 	"\t    fields.  Sort keys consisting of up to two fields can be\n"
4425 	"\t    specified using the 'sort' keyword.  The sort direction can\n"
4426 	"\t    be modified by appending '.descending' or '.ascending' to a\n"
4427 	"\t    sort field.  The 'size' parameter can be used to specify more\n"
4428 	"\t    or fewer than the default 2048 entries for the hashtable size.\n"
4429 	"\t    If a hist trigger is given a name using the 'name' parameter,\n"
4430 	"\t    its histogram data will be shared with other triggers of the\n"
4431 	"\t    same name, and trigger hits will update this common data.\n\n"
4432 	"\t    Reading the 'hist' file for the event will dump the hash\n"
4433 	"\t    table in its entirety to stdout.  If there are multiple hist\n"
4434 	"\t    triggers attached to an event, there will be a table for each\n"
4435 	"\t    trigger in the output.  The table displayed for a named\n"
4436 	"\t    trigger will be the same as any other instance having the\n"
4437 	"\t    same name.  The default format used to display a given field\n"
4438 	"\t    can be modified by appending any of the following modifiers\n"
4439 	"\t    to the field name, as applicable:\n\n"
4440 	"\t            .hex        display a number as a hex value\n"
4441 	"\t            .sym        display an address as a symbol\n"
4442 	"\t            .sym-offset display an address as a symbol and offset\n"
4443 	"\t            .execname   display a common_pid as a program name\n"
4444 	"\t            .syscall    display a syscall id as a syscall name\n"
4445 	"\t            .log2       display log2 value rather than raw number\n"
4446 	"\t            .buckets=size  display values in groups of size rather than raw number\n"
4447 	"\t            .usecs      display a common_timestamp in microseconds\n"
4448 	"\t            .percent    display a number of percentage value\n"
4449 	"\t            .graph      display a bar-graph of a value\n\n"
4450 	"\t    The 'pause' parameter can be used to pause an existing hist\n"
4451 	"\t    trigger or to start a hist trigger but not log any events\n"
4452 	"\t    until told to do so.  'continue' can be used to start or\n"
4453 	"\t    restart a paused hist trigger.\n\n"
4454 	"\t    The 'clear' parameter will clear the contents of a running\n"
4455 	"\t    hist trigger and leave its current paused/active state\n"
4456 	"\t    unchanged.\n\n"
4457 	"\t    The 'nohitcount' (or NOHC) parameter will suppress display of\n"
4458 	"\t    raw hitcount in the histogram.\n\n"
4459 	"\t    The enable_hist and disable_hist triggers can be used to\n"
4460 	"\t    have one event conditionally start and stop another event's\n"
4461 	"\t    already-attached hist trigger.  The syntax is analogous to\n"
4462 	"\t    the enable_event and disable_event triggers.\n\n"
4463 	"\t    Hist trigger handlers and actions are executed whenever a\n"
4464 	"\t    a histogram entry is added or updated.  They take the form:\n\n"
4465 	"\t        <handler>.<action>\n\n"
4466 	"\t    The available handlers are:\n\n"
4467 	"\t        onmatch(matching.event)  - invoke on addition or update\n"
4468 	"\t        onmax(var)               - invoke if var exceeds current max\n"
4469 	"\t        onchange(var)            - invoke action if var changes\n\n"
4470 	"\t    The available actions are:\n\n"
4471 	"\t        trace(<synthetic_event>,param list)  - generate synthetic event\n"
4472 	"\t        save(field,...)                      - save current event fields\n"
4473 #ifdef CONFIG_TRACER_SNAPSHOT
4474 	"\t        snapshot()                           - snapshot the trace buffer\n\n"
4475 #endif
4476 #ifdef CONFIG_SYNTH_EVENTS
4477 	"  events/synthetic_events\t- Create/append/remove/show synthetic events\n"
4478 	"\t  Write into this file to define/undefine new synthetic events.\n"
4479 	"\t     example: echo 'myevent u64 lat; char name[]; long[] stack' >> synthetic_events\n"
4480 #endif
4481 #endif
4482 ;
4483 
4484 static ssize_t
4485 tracing_readme_read(struct file *filp, char __user *ubuf,
4486 		       size_t cnt, loff_t *ppos)
4487 {
4488 	return simple_read_from_buffer(ubuf, cnt, ppos,
4489 					readme_msg, strlen(readme_msg));
4490 }
4491 
4492 static const struct file_operations tracing_readme_fops = {
4493 	.open		= tracing_open_generic,
4494 	.read		= tracing_readme_read,
4495 	.llseek		= generic_file_llseek,
4496 };
4497 
4498 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
4499 static union trace_eval_map_item *
4500 update_eval_map(union trace_eval_map_item *ptr)
4501 {
4502 	if (!ptr->map.eval_string) {
4503 		if (ptr->tail.next) {
4504 			ptr = ptr->tail.next;
4505 			/* Set ptr to the next real item (skip head) */
4506 			ptr++;
4507 		} else
4508 			return NULL;
4509 	}
4510 	return ptr;
4511 }
4512 
4513 static void *eval_map_next(struct seq_file *m, void *v, loff_t *pos)
4514 {
4515 	union trace_eval_map_item *ptr = v;
4516 
4517 	/*
4518 	 * Paranoid! If ptr points to end, we don't want to increment past it.
4519 	 * This really should never happen.
4520 	 */
4521 	(*pos)++;
4522 	ptr = update_eval_map(ptr);
4523 	if (WARN_ON_ONCE(!ptr))
4524 		return NULL;
4525 
4526 	ptr++;
4527 	ptr = update_eval_map(ptr);
4528 
4529 	return ptr;
4530 }
4531 
4532 static void *eval_map_start(struct seq_file *m, loff_t *pos)
4533 {
4534 	union trace_eval_map_item *v;
4535 	loff_t l = 0;
4536 
4537 	mutex_lock(&trace_eval_mutex);
4538 
4539 	v = trace_eval_maps;
4540 	if (v)
4541 		v++;
4542 
4543 	while (v && l < *pos) {
4544 		v = eval_map_next(m, v, &l);
4545 	}
4546 
4547 	return v;
4548 }
4549 
4550 static void eval_map_stop(struct seq_file *m, void *v)
4551 {
4552 	mutex_unlock(&trace_eval_mutex);
4553 }
4554 
4555 static int eval_map_show(struct seq_file *m, void *v)
4556 {
4557 	union trace_eval_map_item *ptr = v;
4558 
4559 	seq_printf(m, "%s %ld (%s)\n",
4560 		   ptr->map.eval_string, ptr->map.eval_value,
4561 		   ptr->map.system);
4562 
4563 	return 0;
4564 }
4565 
4566 static const struct seq_operations tracing_eval_map_seq_ops = {
4567 	.start		= eval_map_start,
4568 	.next		= eval_map_next,
4569 	.stop		= eval_map_stop,
4570 	.show		= eval_map_show,
4571 };
4572 
4573 static int tracing_eval_map_open(struct inode *inode, struct file *filp)
4574 {
4575 	int ret;
4576 
4577 	ret = tracing_check_open_get_tr(NULL);
4578 	if (ret)
4579 		return ret;
4580 
4581 	return seq_open(filp, &tracing_eval_map_seq_ops);
4582 }
4583 
4584 static const struct file_operations tracing_eval_map_fops = {
4585 	.open		= tracing_eval_map_open,
4586 	.read		= seq_read,
4587 	.llseek		= seq_lseek,
4588 	.release	= seq_release,
4589 };
4590 
4591 static inline union trace_eval_map_item *
4592 trace_eval_jmp_to_tail(union trace_eval_map_item *ptr)
4593 {
4594 	/* Return tail of array given the head */
4595 	return ptr + ptr->head.length + 1;
4596 }
4597 
4598 static void
4599 trace_insert_eval_map_file(struct module *mod, struct trace_eval_map **start,
4600 			   int len)
4601 {
4602 	struct trace_eval_map **stop;
4603 	struct trace_eval_map **map;
4604 	union trace_eval_map_item *map_array;
4605 	union trace_eval_map_item *ptr;
4606 
4607 	stop = start + len;
4608 
4609 	/*
4610 	 * The trace_eval_maps contains the map plus a head and tail item,
4611 	 * where the head holds the module and length of array, and the
4612 	 * tail holds a pointer to the next list.
4613 	 */
4614 	map_array = kmalloc_objs(*map_array, len + 2);
4615 	if (!map_array) {
4616 		pr_warn("Unable to allocate trace eval mapping\n");
4617 		return;
4618 	}
4619 
4620 	guard(mutex)(&trace_eval_mutex);
4621 
4622 	if (!trace_eval_maps)
4623 		trace_eval_maps = map_array;
4624 	else {
4625 		ptr = trace_eval_maps;
4626 		for (;;) {
4627 			ptr = trace_eval_jmp_to_tail(ptr);
4628 			if (!ptr->tail.next)
4629 				break;
4630 			ptr = ptr->tail.next;
4631 
4632 		}
4633 		ptr->tail.next = map_array;
4634 	}
4635 	map_array->head.mod = mod;
4636 	map_array->head.length = len;
4637 	map_array++;
4638 
4639 	for (map = start; (unsigned long)map < (unsigned long)stop; map++) {
4640 		map_array->map = **map;
4641 		map_array++;
4642 	}
4643 	memset(map_array, 0, sizeof(*map_array));
4644 }
4645 
4646 static void trace_create_eval_file(struct dentry *d_tracer)
4647 {
4648 	trace_create_file("eval_map", TRACE_MODE_READ, d_tracer,
4649 			  NULL, &tracing_eval_map_fops);
4650 }
4651 
4652 #else /* CONFIG_TRACE_EVAL_MAP_FILE */
4653 static inline void trace_create_eval_file(struct dentry *d_tracer) { }
4654 static inline void trace_insert_eval_map_file(struct module *mod,
4655 			      struct trace_eval_map **start, int len) { }
4656 #endif /* !CONFIG_TRACE_EVAL_MAP_FILE */
4657 
4658 static void
4659 trace_event_update_with_eval_map(struct module *mod,
4660 				 struct trace_eval_map **start,
4661 				 int len)
4662 {
4663 	struct trace_eval_map **map;
4664 
4665 	/* Always run sanitizer only if btf_type_tag attr exists. */
4666 	if (len <= 0) {
4667 		if (!(IS_ENABLED(CONFIG_DEBUG_INFO_BTF) &&
4668 		      IS_ENABLED(CONFIG_PAHOLE_HAS_BTF_TAG) &&
4669 		      __has_attribute(btf_type_tag)))
4670 			return;
4671 	}
4672 
4673 	map = start;
4674 
4675 	trace_event_update_all(map, len);
4676 
4677 	if (len <= 0)
4678 		return;
4679 
4680 	trace_insert_eval_map_file(mod, start, len);
4681 }
4682 
4683 static ssize_t
4684 tracing_set_trace_read(struct file *filp, char __user *ubuf,
4685 		       size_t cnt, loff_t *ppos)
4686 {
4687 	struct trace_array *tr = filp->private_data;
4688 	char buf[MAX_TRACER_SIZE+2];
4689 	int r;
4690 
4691 	scoped_guard(mutex, &trace_types_lock) {
4692 		r = sprintf(buf, "%s\n", tr->current_trace->name);
4693 	}
4694 
4695 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
4696 }
4697 
4698 int tracer_init(struct tracer *t, struct trace_array *tr)
4699 {
4700 	tracing_reset_online_cpus(&tr->array_buffer);
4701 	update_last_data_if_empty(tr);
4702 	return t->init(tr);
4703 }
4704 
4705 void trace_set_buffer_entries(struct array_buffer *buf, unsigned long val)
4706 {
4707 	int cpu;
4708 
4709 	for_each_tracing_cpu(cpu)
4710 		per_cpu_ptr(buf->data, cpu)->entries = val;
4711 }
4712 
4713 static void update_buffer_entries(struct array_buffer *buf, int cpu)
4714 {
4715 	if (cpu == RING_BUFFER_ALL_CPUS) {
4716 		trace_set_buffer_entries(buf, ring_buffer_size(buf->buffer, 0));
4717 	} else {
4718 		per_cpu_ptr(buf->data, cpu)->entries = ring_buffer_size(buf->buffer, cpu);
4719 	}
4720 }
4721 
4722 static int __tracing_resize_ring_buffer(struct trace_array *tr,
4723 					unsigned long size, int cpu)
4724 {
4725 	int ret;
4726 
4727 	/*
4728 	 * If kernel or user changes the size of the ring buffer
4729 	 * we use the size that was given, and we can forget about
4730 	 * expanding it later.
4731 	 */
4732 	trace_set_ring_buffer_expanded(tr);
4733 
4734 	/* May be called before buffers are initialized */
4735 	if (!tr->array_buffer.buffer)
4736 		return 0;
4737 
4738 	/* Do not allow tracing while resizing ring buffer */
4739 	tracing_stop_tr(tr);
4740 
4741 	ret = ring_buffer_resize(tr->array_buffer.buffer, size, cpu);
4742 	if (ret < 0)
4743 		goto out_start;
4744 
4745 #ifdef CONFIG_TRACER_SNAPSHOT
4746 	if (!tr->allocated_snapshot)
4747 		goto out;
4748 
4749 	ret = ring_buffer_resize(tr->snapshot_buffer.buffer, size, cpu);
4750 	if (ret < 0) {
4751 		int r = resize_buffer_duplicate_size(&tr->array_buffer,
4752 						     &tr->array_buffer, cpu);
4753 		if (r < 0) {
4754 			/*
4755 			 * AARGH! We are left with different
4756 			 * size max buffer!!!!
4757 			 * The max buffer is our "snapshot" buffer.
4758 			 * When a tracer needs a snapshot (one of the
4759 			 * latency tracers), it swaps the max buffer
4760 			 * with the saved snap shot. We succeeded to
4761 			 * update the size of the main buffer, but failed to
4762 			 * update the size of the max buffer. But when we tried
4763 			 * to reset the main buffer to the original size, we
4764 			 * failed there too. This is very unlikely to
4765 			 * happen, but if it does, warn and kill all
4766 			 * tracing.
4767 			 */
4768 			WARN_ON(1);
4769 			tracing_disabled = 1;
4770 		}
4771 		goto out_start;
4772 	}
4773 
4774 	update_buffer_entries(&tr->snapshot_buffer, cpu);
4775 
4776  out:
4777 #endif /* CONFIG_TRACER_SNAPSHOT */
4778 
4779 	update_buffer_entries(&tr->array_buffer, cpu);
4780  out_start:
4781 	tracing_start_tr(tr);
4782 	return ret;
4783 }
4784 
4785 ssize_t tracing_resize_ring_buffer(struct trace_array *tr,
4786 				  unsigned long size, int cpu_id)
4787 {
4788 	guard(mutex)(&trace_types_lock);
4789 
4790 	if (cpu_id != RING_BUFFER_ALL_CPUS) {
4791 		/* make sure, this cpu is enabled in the mask */
4792 		if (!cpumask_test_cpu(cpu_id, tracing_buffer_mask))
4793 			return -EINVAL;
4794 	}
4795 
4796 	return __tracing_resize_ring_buffer(tr, size, cpu_id);
4797 }
4798 
4799 struct trace_mod_entry {
4800 	unsigned long	mod_addr;
4801 	char		mod_name[MODULE_NAME_LEN];
4802 };
4803 
4804 struct trace_scratch {
4805 	unsigned int		clock_id;
4806 	unsigned long		text_addr;
4807 	unsigned long		nr_entries;
4808 	struct trace_mod_entry	entries[];
4809 };
4810 
4811 static DEFINE_MUTEX(scratch_mutex);
4812 
4813 static int cmp_mod_entry(const void *key, const void *pivot)
4814 {
4815 	unsigned long addr = (unsigned long)key;
4816 	const struct trace_mod_entry *ent = pivot;
4817 
4818 	if (addr < ent[0].mod_addr)
4819 		return -1;
4820 
4821 	return addr >= ent[1].mod_addr;
4822 }
4823 
4824 /**
4825  * trace_adjust_address() - Adjust prev boot address to current address.
4826  * @tr: Persistent ring buffer's trace_array.
4827  * @addr: Address in @tr which is adjusted.
4828  */
4829 unsigned long trace_adjust_address(struct trace_array *tr, unsigned long addr)
4830 {
4831 	struct trace_module_delta *module_delta;
4832 	struct trace_scratch *tscratch;
4833 	struct trace_mod_entry *entry;
4834 	unsigned long raddr;
4835 	int idx = 0, nr_entries;
4836 
4837 	/* If we don't have last boot delta, return the address */
4838 	if (!(tr->flags & TRACE_ARRAY_FL_LAST_BOOT))
4839 		return addr;
4840 
4841 	/* tr->module_delta must be protected by rcu. */
4842 	guard(rcu)();
4843 	tscratch = tr->scratch;
4844 	/* if there is no tscrach, module_delta must be NULL. */
4845 	module_delta = READ_ONCE(tr->module_delta);
4846 	if (!module_delta || !tscratch->nr_entries ||
4847 	    tscratch->entries[0].mod_addr > addr) {
4848 		raddr = addr + tr->text_delta;
4849 		return __is_kernel(raddr) || is_kernel_core_data(raddr) ||
4850 			is_kernel_rodata(raddr) ? raddr : addr;
4851 	}
4852 
4853 	/* Note that entries must be sorted. */
4854 	nr_entries = tscratch->nr_entries;
4855 	if (nr_entries == 1 ||
4856 	    tscratch->entries[nr_entries - 1].mod_addr < addr)
4857 		idx = nr_entries - 1;
4858 	else {
4859 		entry = __inline_bsearch((void *)addr,
4860 				tscratch->entries,
4861 				nr_entries - 1,
4862 				sizeof(tscratch->entries[0]),
4863 				cmp_mod_entry);
4864 		if (entry)
4865 			idx = entry - tscratch->entries;
4866 	}
4867 
4868 	return addr + module_delta->delta[idx];
4869 }
4870 
4871 #ifdef CONFIG_MODULES
4872 static int save_mod(struct module *mod, void *data)
4873 {
4874 	struct trace_array *tr = data;
4875 	struct trace_scratch *tscratch;
4876 	struct trace_mod_entry *entry;
4877 	unsigned int size;
4878 
4879 	tscratch = tr->scratch;
4880 	if (!tscratch)
4881 		return -1;
4882 	size = tr->scratch_size;
4883 
4884 	if (struct_size(tscratch, entries, tscratch->nr_entries + 1) > size)
4885 		return -1;
4886 
4887 	entry = &tscratch->entries[tscratch->nr_entries];
4888 
4889 	tscratch->nr_entries++;
4890 
4891 	entry->mod_addr = (unsigned long)mod->mem[MOD_TEXT].base;
4892 	strscpy(entry->mod_name, mod->name);
4893 
4894 	return 0;
4895 }
4896 #else
4897 static int save_mod(struct module *mod, void *data)
4898 {
4899 	return 0;
4900 }
4901 #endif
4902 
4903 static void update_last_data(struct trace_array *tr)
4904 {
4905 	struct trace_module_delta *module_delta;
4906 	struct trace_scratch *tscratch;
4907 
4908 	if (!(tr->flags & TRACE_ARRAY_FL_BOOT))
4909 		return;
4910 
4911 	if (!(tr->flags & TRACE_ARRAY_FL_LAST_BOOT))
4912 		return;
4913 
4914 	/* Only if the buffer has previous boot data clear and update it. */
4915 	tr->flags &= ~TRACE_ARRAY_FL_LAST_BOOT;
4916 
4917 	/* If this is a backup instance, mark it for autoremove. */
4918 	if (tr->flags & TRACE_ARRAY_FL_VMALLOC)
4919 		tr->free_on_close = true;
4920 
4921 	/* Reset the module list and reload them */
4922 	if (tr->scratch) {
4923 		struct trace_scratch *tscratch = tr->scratch;
4924 
4925 		tscratch->clock_id = tr->clock_id;
4926 		memset(tscratch->entries, 0,
4927 		       flex_array_size(tscratch, entries, tscratch->nr_entries));
4928 		tscratch->nr_entries = 0;
4929 
4930 		guard(mutex)(&scratch_mutex);
4931 		module_for_each_mod(save_mod, tr);
4932 	}
4933 
4934 	/*
4935 	 * Need to clear all CPU buffers as there cannot be events
4936 	 * from the previous boot mixed with events with this boot
4937 	 * as that will cause a confusing trace. Need to clear all
4938 	 * CPU buffers, even for those that may currently be offline.
4939 	 */
4940 	tracing_reset_all_cpus(&tr->array_buffer);
4941 
4942 	/* Using current data now */
4943 	tr->text_delta = 0;
4944 
4945 	if (!tr->scratch)
4946 		return;
4947 
4948 	tscratch = tr->scratch;
4949 	module_delta = READ_ONCE(tr->module_delta);
4950 	WRITE_ONCE(tr->module_delta, NULL);
4951 	kfree_rcu(module_delta, rcu);
4952 
4953 	/* Set the persistent ring buffer meta data to this address */
4954 	tscratch->text_addr = (unsigned long)_text;
4955 }
4956 
4957 /**
4958  * tracing_update_buffers - used by tracing facility to expand ring buffers
4959  * @tr: The tracing instance
4960  *
4961  * To save on memory when the tracing is never used on a system with it
4962  * configured in. The ring buffers are set to a minimum size. But once
4963  * a user starts to use the tracing facility, then they need to grow
4964  * to their default size.
4965  *
4966  * This function is to be called when a tracer is about to be used.
4967  */
4968 int tracing_update_buffers(struct trace_array *tr)
4969 {
4970 	int ret = 0;
4971 
4972 	if (!tr)
4973 		tr = &global_trace;
4974 
4975 	guard(mutex)(&trace_types_lock);
4976 
4977 	update_last_data(tr);
4978 
4979 	if (!tr->ring_buffer_expanded)
4980 		ret = __tracing_resize_ring_buffer(tr, trace_buf_size,
4981 						RING_BUFFER_ALL_CPUS);
4982 	return ret;
4983 }
4984 
4985 /*
4986  * Used to clear out the tracer before deletion of an instance.
4987  * Must have trace_types_lock held.
4988  */
4989 static void tracing_set_nop(struct trace_array *tr)
4990 {
4991 	if (tr->current_trace == &nop_trace)
4992 		return;
4993 
4994 	tr->current_trace->enabled--;
4995 
4996 	if (tr->current_trace->reset)
4997 		tr->current_trace->reset(tr);
4998 
4999 	tr->current_trace = &nop_trace;
5000 	tr->current_trace_flags = nop_trace.flags;
5001 }
5002 
5003 static bool tracer_options_updated;
5004 
5005 int tracing_set_tracer(struct trace_array *tr, const char *buf)
5006 {
5007 	struct tracer *trace = NULL;
5008 	struct tracers *t;
5009 	bool had_max_tr;
5010 	int ret;
5011 
5012 	guard(mutex)(&trace_types_lock);
5013 
5014 	update_last_data(tr);
5015 
5016 	if (!tr->ring_buffer_expanded) {
5017 		ret = __tracing_resize_ring_buffer(tr, trace_buf_size,
5018 						RING_BUFFER_ALL_CPUS);
5019 		if (ret < 0)
5020 			return ret;
5021 		ret = 0;
5022 	}
5023 
5024 	list_for_each_entry(t, &tr->tracers, list) {
5025 		if (strcmp(t->tracer->name, buf) == 0) {
5026 			trace = t->tracer;
5027 			break;
5028 		}
5029 	}
5030 	if (!trace)
5031 		return -EINVAL;
5032 
5033 	if (trace == tr->current_trace)
5034 		return 0;
5035 
5036 #ifdef CONFIG_TRACER_SNAPSHOT
5037 	if (tracer_uses_snapshot(trace)) {
5038 		local_irq_disable();
5039 		arch_spin_lock(&tr->max_lock);
5040 		ret = tr->cond_snapshot ? -EBUSY : 0;
5041 		arch_spin_unlock(&tr->max_lock);
5042 		local_irq_enable();
5043 		if (ret)
5044 			return ret;
5045 	}
5046 #endif
5047 	/* Some tracers won't work on kernel command line */
5048 	if (system_state < SYSTEM_RUNNING && trace->noboot) {
5049 		pr_warn("Tracer '%s' is not allowed on command line, ignored\n",
5050 			trace->name);
5051 		return -EINVAL;
5052 	}
5053 
5054 	/* Some tracers are only allowed for the top level buffer */
5055 	if (!trace_ok_for_array(trace, tr))
5056 		return -EINVAL;
5057 
5058 	/* If trace pipe files are being read, we can't change the tracer */
5059 	if (tr->trace_ref)
5060 		return -EBUSY;
5061 
5062 	trace_branch_disable();
5063 
5064 	tr->current_trace->enabled--;
5065 
5066 	if (tr->current_trace->reset)
5067 		tr->current_trace->reset(tr);
5068 
5069 	had_max_tr = tracer_uses_snapshot(tr->current_trace);
5070 
5071 	/* Current trace needs to be nop_trace before synchronize_rcu */
5072 	tr->current_trace = &nop_trace;
5073 	tr->current_trace_flags = nop_trace.flags;
5074 
5075 	if (had_max_tr && !tracer_uses_snapshot(trace)) {
5076 		/*
5077 		 * We need to make sure that the update_max_tr sees that
5078 		 * current_trace changed to nop_trace to keep it from
5079 		 * swapping the buffers after we resize it.
5080 		 * The update_max_tr is called from interrupts disabled
5081 		 * so a synchronized_sched() is sufficient.
5082 		 */
5083 		synchronize_rcu();
5084 		free_snapshot(tr);
5085 		tracing_disarm_snapshot(tr);
5086 	}
5087 
5088 	if (!had_max_tr && tracer_uses_snapshot(trace)) {
5089 		ret = tracing_arm_snapshot_locked(tr);
5090 		if (ret)
5091 			return ret;
5092 	}
5093 
5094 	tr->current_trace_flags = t->flags ? : t->tracer->flags;
5095 
5096 	if (trace->init) {
5097 		ret = tracer_init(trace, tr);
5098 		if (ret) {
5099 			if (tracer_uses_snapshot(trace))
5100 				tracing_disarm_snapshot(tr);
5101 			tr->current_trace_flags = nop_trace.flags;
5102 			return ret;
5103 		}
5104 	}
5105 
5106 	tr->current_trace = trace;
5107 	tr->current_trace->enabled++;
5108 	trace_branch_enable(tr);
5109 
5110 	return 0;
5111 }
5112 
5113 static ssize_t
5114 tracing_set_trace_write(struct file *filp, const char __user *ubuf,
5115 			size_t cnt, loff_t *ppos)
5116 {
5117 	struct trace_array *tr = filp->private_data;
5118 	char buf[MAX_TRACER_SIZE+1];
5119 	char *name;
5120 	size_t ret;
5121 	int err;
5122 
5123 	ret = cnt;
5124 
5125 	if (cnt > MAX_TRACER_SIZE)
5126 		cnt = MAX_TRACER_SIZE;
5127 
5128 	if (copy_from_user(buf, ubuf, cnt))
5129 		return -EFAULT;
5130 
5131 	buf[cnt] = 0;
5132 
5133 	name = strim(buf);
5134 
5135 	err = tracing_set_tracer(tr, name);
5136 	if (err)
5137 		return err;
5138 
5139 	*ppos += ret;
5140 
5141 	return ret;
5142 }
5143 
5144 ssize_t tracing_nsecs_read(unsigned long *ptr, char __user *ubuf,
5145 			   size_t cnt, loff_t *ppos)
5146 {
5147 	char buf[64];
5148 	int r;
5149 
5150 	r = snprintf(buf, sizeof(buf), "%ld\n",
5151 		     *ptr == (unsigned long)-1 ? -1 : nsecs_to_usecs(*ptr));
5152 	if (r > sizeof(buf))
5153 		r = sizeof(buf);
5154 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
5155 }
5156 
5157 ssize_t tracing_nsecs_write(unsigned long *ptr, const char __user *ubuf,
5158 			    size_t cnt, loff_t *ppos)
5159 {
5160 	unsigned long val;
5161 	int ret;
5162 
5163 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
5164 	if (ret)
5165 		return ret;
5166 
5167 	*ptr = val * 1000;
5168 
5169 	return cnt;
5170 }
5171 
5172 static ssize_t
5173 tracing_thresh_read(struct file *filp, char __user *ubuf,
5174 		    size_t cnt, loff_t *ppos)
5175 {
5176 	return tracing_nsecs_read(&tracing_thresh, ubuf, cnt, ppos);
5177 }
5178 
5179 static ssize_t
5180 tracing_thresh_write(struct file *filp, const char __user *ubuf,
5181 		     size_t cnt, loff_t *ppos)
5182 {
5183 	struct trace_array *tr = filp->private_data;
5184 	int ret;
5185 
5186 	guard(mutex)(&trace_types_lock);
5187 	ret = tracing_nsecs_write(&tracing_thresh, ubuf, cnt, ppos);
5188 	if (ret < 0)
5189 		return ret;
5190 
5191 	if (tr->current_trace->update_thresh) {
5192 		ret = tr->current_trace->update_thresh(tr);
5193 		if (ret < 0)
5194 			return ret;
5195 	}
5196 
5197 	return cnt;
5198 }
5199 
5200 static int open_pipe_on_cpu(struct trace_array *tr, int cpu)
5201 {
5202 	if (cpu == RING_BUFFER_ALL_CPUS) {
5203 		if (cpumask_empty(tr->pipe_cpumask)) {
5204 			cpumask_setall(tr->pipe_cpumask);
5205 			return 0;
5206 		}
5207 	} else if (!cpumask_test_cpu(cpu, tr->pipe_cpumask)) {
5208 		cpumask_set_cpu(cpu, tr->pipe_cpumask);
5209 		return 0;
5210 	}
5211 	return -EBUSY;
5212 }
5213 
5214 static void close_pipe_on_cpu(struct trace_array *tr, int cpu)
5215 {
5216 	if (cpu == RING_BUFFER_ALL_CPUS) {
5217 		WARN_ON(!cpumask_full(tr->pipe_cpumask));
5218 		cpumask_clear(tr->pipe_cpumask);
5219 	} else {
5220 		WARN_ON(!cpumask_test_cpu(cpu, tr->pipe_cpumask));
5221 		cpumask_clear_cpu(cpu, tr->pipe_cpumask);
5222 	}
5223 }
5224 
5225 static int tracing_open_pipe(struct inode *inode, struct file *filp)
5226 {
5227 	struct trace_array *tr = inode->i_private;
5228 	struct trace_iterator *iter;
5229 	int cpu;
5230 	int ret;
5231 
5232 	ret = tracing_check_open_get_tr(tr);
5233 	if (ret)
5234 		return ret;
5235 
5236 	guard(mutex)(&trace_types_lock);
5237 	cpu = tracing_get_cpu(inode);
5238 	ret = open_pipe_on_cpu(tr, cpu);
5239 	if (ret)
5240 		goto fail_pipe_on_cpu;
5241 
5242 	/* create a buffer to store the information to pass to userspace */
5243 	iter = kzalloc_obj(*iter);
5244 	if (!iter) {
5245 		ret = -ENOMEM;
5246 		goto fail_alloc_iter;
5247 	}
5248 
5249 	trace_seq_init(&iter->seq);
5250 	iter->trace = tr->current_trace;
5251 
5252 	if (!alloc_cpumask_var(&iter->started, GFP_KERNEL)) {
5253 		ret = -ENOMEM;
5254 		goto fail;
5255 	}
5256 
5257 	/* trace pipe does not show start of buffer */
5258 	cpumask_setall(iter->started);
5259 
5260 	if (tr->trace_flags & TRACE_ITER(LATENCY_FMT))
5261 		iter->iter_flags |= TRACE_FILE_LAT_FMT;
5262 
5263 	/* Output in nanoseconds only if we are using a clock in nanoseconds. */
5264 	if (trace_clocks[tr->clock_id].in_ns)
5265 		iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
5266 
5267 	iter->tr = tr;
5268 	iter->array_buffer = &tr->array_buffer;
5269 	iter->cpu_file = cpu;
5270 	mutex_init(&iter->mutex);
5271 	filp->private_data = iter;
5272 
5273 	if (iter->trace->pipe_open)
5274 		iter->trace->pipe_open(iter);
5275 
5276 	nonseekable_open(inode, filp);
5277 
5278 	tr->trace_ref++;
5279 
5280 	return ret;
5281 
5282 fail:
5283 	kfree(iter);
5284 fail_alloc_iter:
5285 	close_pipe_on_cpu(tr, cpu);
5286 fail_pipe_on_cpu:
5287 	__trace_array_put(tr);
5288 	return ret;
5289 }
5290 
5291 static int tracing_release_pipe(struct inode *inode, struct file *file)
5292 {
5293 	struct trace_iterator *iter = file->private_data;
5294 	struct trace_array *tr = inode->i_private;
5295 
5296 	scoped_guard(mutex, &trace_types_lock) {
5297 		tr->trace_ref--;
5298 
5299 		if (iter->trace->pipe_close)
5300 			iter->trace->pipe_close(iter);
5301 		close_pipe_on_cpu(tr, iter->cpu_file);
5302 	}
5303 
5304 	free_trace_iter_content(iter);
5305 	kfree(iter);
5306 
5307 	trace_array_put(tr);
5308 
5309 	return 0;
5310 }
5311 
5312 static __poll_t
5313 trace_poll(struct trace_iterator *iter, struct file *filp, poll_table *poll_table)
5314 {
5315 	struct trace_array *tr = iter->tr;
5316 
5317 	/* Iterators are static, they should be filled or empty */
5318 	if (trace_buffer_iter(iter, iter->cpu_file))
5319 		return EPOLLIN | EPOLLRDNORM;
5320 
5321 	if (tr->trace_flags & TRACE_ITER(BLOCK))
5322 		/*
5323 		 * Always select as readable when in blocking mode
5324 		 */
5325 		return EPOLLIN | EPOLLRDNORM;
5326 	else
5327 		return ring_buffer_poll_wait(iter->array_buffer->buffer, iter->cpu_file,
5328 					     filp, poll_table, iter->tr->buffer_percent);
5329 }
5330 
5331 static __poll_t
5332 tracing_poll_pipe(struct file *filp, poll_table *poll_table)
5333 {
5334 	struct trace_iterator *iter = filp->private_data;
5335 
5336 	return trace_poll(iter, filp, poll_table);
5337 }
5338 
5339 /* Must be called with iter->mutex held. */
5340 static int tracing_wait_pipe(struct file *filp)
5341 {
5342 	struct trace_iterator *iter = filp->private_data;
5343 	int ret;
5344 
5345 	while (trace_empty(iter)) {
5346 
5347 		if ((filp->f_flags & O_NONBLOCK)) {
5348 			return -EAGAIN;
5349 		}
5350 
5351 		/*
5352 		 * We block until we read something and tracing is disabled.
5353 		 * We still block if tracing is disabled, but we have never
5354 		 * read anything. This allows a user to cat this file, and
5355 		 * then enable tracing. But after we have read something,
5356 		 * we give an EOF when tracing is again disabled.
5357 		 *
5358 		 * iter->pos will be 0 if we haven't read anything.
5359 		 */
5360 		if (!tracer_tracing_is_on(iter->tr) && iter->pos)
5361 			break;
5362 
5363 		mutex_unlock(&iter->mutex);
5364 
5365 		ret = wait_on_pipe(iter, 0);
5366 
5367 		mutex_lock(&iter->mutex);
5368 
5369 		if (ret)
5370 			return ret;
5371 	}
5372 
5373 	return 1;
5374 }
5375 
5376 static bool update_last_data_if_empty(struct trace_array *tr)
5377 {
5378 	if (!(tr->flags & TRACE_ARRAY_FL_LAST_BOOT))
5379 		return false;
5380 
5381 	if (!ring_buffer_empty(tr->array_buffer.buffer))
5382 		return false;
5383 
5384 	/*
5385 	 * If the buffer contains the last boot data and all per-cpu
5386 	 * buffers are empty, reset it from the kernel side.
5387 	 */
5388 	update_last_data(tr);
5389 	return true;
5390 }
5391 
5392 /*
5393  * Consumer reader.
5394  */
5395 static ssize_t
5396 tracing_read_pipe(struct file *filp, char __user *ubuf,
5397 		  size_t cnt, loff_t *ppos)
5398 {
5399 	struct trace_iterator *iter = filp->private_data;
5400 	ssize_t sret;
5401 
5402 	/*
5403 	 * Avoid more than one consumer on a single file descriptor
5404 	 * This is just a matter of traces coherency, the ring buffer itself
5405 	 * is protected.
5406 	 */
5407 	guard(mutex)(&iter->mutex);
5408 
5409 	/* return any leftover data */
5410 	sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
5411 	if (sret != -EBUSY)
5412 		return sret;
5413 
5414 	trace_seq_init(&iter->seq);
5415 
5416 	if (iter->trace->read) {
5417 		sret = iter->trace->read(iter, filp, ubuf, cnt, ppos);
5418 		if (sret)
5419 			return sret;
5420 	}
5421 
5422 waitagain:
5423 	if (update_last_data_if_empty(iter->tr))
5424 		return 0;
5425 
5426 	sret = tracing_wait_pipe(filp);
5427 	if (sret <= 0)
5428 		return sret;
5429 
5430 	/* stop when tracing is finished */
5431 	if (trace_empty(iter))
5432 		return 0;
5433 
5434 	if (cnt >= TRACE_SEQ_BUFFER_SIZE)
5435 		cnt = TRACE_SEQ_BUFFER_SIZE - 1;
5436 
5437 	/* reset all but tr, trace, and overruns */
5438 	trace_iterator_reset(iter);
5439 	cpumask_clear(iter->started);
5440 	trace_seq_init(&iter->seq);
5441 
5442 	trace_event_read_lock();
5443 	trace_access_lock(iter->cpu_file);
5444 	while (trace_find_next_entry_inc(iter) != NULL) {
5445 		enum print_line_t ret;
5446 		int save_len = iter->seq.seq.len;
5447 
5448 		ret = print_trace_line(iter);
5449 		if (ret == TRACE_TYPE_PARTIAL_LINE) {
5450 			/*
5451 			 * If one print_trace_line() fills entire trace_seq in one shot,
5452 			 * trace_seq_to_user() will returns -EBUSY because save_len == 0,
5453 			 * In this case, we need to consume it, otherwise, loop will peek
5454 			 * this event next time, resulting in an infinite loop.
5455 			 */
5456 			if (save_len == 0) {
5457 				iter->seq.full = 0;
5458 				trace_seq_puts(&iter->seq, "[LINE TOO BIG]\n");
5459 				trace_consume(iter);
5460 				break;
5461 			}
5462 
5463 			/* In other cases, don't print partial lines */
5464 			iter->seq.seq.len = save_len;
5465 			break;
5466 		}
5467 		if (ret != TRACE_TYPE_NO_CONSUME)
5468 			trace_consume(iter);
5469 
5470 		if (trace_seq_used(&iter->seq) >= cnt)
5471 			break;
5472 
5473 		/*
5474 		 * Setting the full flag means we reached the trace_seq buffer
5475 		 * size and we should leave by partial output condition above.
5476 		 * One of the trace_seq_* functions is not used properly.
5477 		 */
5478 		WARN_ONCE(iter->seq.full, "full flag set for trace type %d",
5479 			  iter->ent->type);
5480 	}
5481 	trace_access_unlock(iter->cpu_file);
5482 	trace_event_read_unlock();
5483 
5484 	/* Now copy what we have to the user */
5485 	sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
5486 	if (iter->seq.readpos >= trace_seq_used(&iter->seq))
5487 		trace_seq_init(&iter->seq);
5488 
5489 	/*
5490 	 * If there was nothing to send to user, in spite of consuming trace
5491 	 * entries, go back to wait for more entries.
5492 	 */
5493 	if (sret == -EBUSY)
5494 		goto waitagain;
5495 
5496 	return sret;
5497 }
5498 
5499 static void tracing_spd_release_pipe(struct splice_pipe_desc *spd,
5500 				     unsigned int idx)
5501 {
5502 	__free_page(spd->pages[idx]);
5503 }
5504 
5505 static size_t
5506 tracing_fill_pipe_page(size_t rem, struct trace_iterator *iter)
5507 {
5508 	size_t count;
5509 	int save_len;
5510 	int ret;
5511 
5512 	/* Seq buffer is page-sized, exactly what we need. */
5513 	for (;;) {
5514 		save_len = iter->seq.seq.len;
5515 		ret = print_trace_line(iter);
5516 
5517 		if (trace_seq_has_overflowed(&iter->seq)) {
5518 			iter->seq.seq.len = save_len;
5519 			break;
5520 		}
5521 
5522 		/*
5523 		 * This should not be hit, because it should only
5524 		 * be set if the iter->seq overflowed. But check it
5525 		 * anyway to be safe.
5526 		 */
5527 		if (ret == TRACE_TYPE_PARTIAL_LINE) {
5528 			iter->seq.seq.len = save_len;
5529 			break;
5530 		}
5531 
5532 		count = trace_seq_used(&iter->seq) - save_len;
5533 		if (rem < count) {
5534 			rem = 0;
5535 			iter->seq.seq.len = save_len;
5536 			break;
5537 		}
5538 
5539 		if (ret != TRACE_TYPE_NO_CONSUME)
5540 			trace_consume(iter);
5541 		rem -= count;
5542 		if (!trace_find_next_entry_inc(iter))	{
5543 			rem = 0;
5544 			iter->ent = NULL;
5545 			break;
5546 		}
5547 	}
5548 
5549 	return rem;
5550 }
5551 
5552 static ssize_t tracing_splice_read_pipe(struct file *filp,
5553 					loff_t *ppos,
5554 					struct pipe_inode_info *pipe,
5555 					size_t len,
5556 					unsigned int flags)
5557 {
5558 	struct page *pages_def[PIPE_DEF_BUFFERS];
5559 	struct partial_page partial_def[PIPE_DEF_BUFFERS];
5560 	struct trace_iterator *iter = filp->private_data;
5561 	struct splice_pipe_desc spd = {
5562 		.pages		= pages_def,
5563 		.partial	= partial_def,
5564 		.nr_pages	= 0, /* This gets updated below. */
5565 		.nr_pages_max	= PIPE_DEF_BUFFERS,
5566 		.ops		= &default_pipe_buf_ops,
5567 		.spd_release	= tracing_spd_release_pipe,
5568 	};
5569 	ssize_t ret;
5570 	size_t rem;
5571 	unsigned int i;
5572 
5573 	if (splice_grow_spd(pipe, &spd))
5574 		return -ENOMEM;
5575 
5576 	mutex_lock(&iter->mutex);
5577 
5578 	if (iter->trace->splice_read) {
5579 		ret = iter->trace->splice_read(iter, filp,
5580 					       ppos, pipe, len, flags);
5581 		if (ret)
5582 			goto out_err;
5583 	}
5584 
5585 	ret = tracing_wait_pipe(filp);
5586 	if (ret <= 0)
5587 		goto out_err;
5588 
5589 	if (!iter->ent && !trace_find_next_entry_inc(iter)) {
5590 		ret = -EFAULT;
5591 		goto out_err;
5592 	}
5593 
5594 	trace_event_read_lock();
5595 	trace_access_lock(iter->cpu_file);
5596 
5597 	/* Fill as many pages as possible. */
5598 	for (i = 0, rem = len; i < spd.nr_pages_max && rem; i++) {
5599 		spd.pages[i] = alloc_page(GFP_KERNEL);
5600 		if (!spd.pages[i])
5601 			break;
5602 
5603 		rem = tracing_fill_pipe_page(rem, iter);
5604 
5605 		/* Copy the data into the page, so we can start over. */
5606 		ret = trace_seq_to_buffer(&iter->seq,
5607 					  page_address(spd.pages[i]),
5608 					  min((size_t)trace_seq_used(&iter->seq),
5609 						  (size_t)PAGE_SIZE));
5610 		if (ret < 0) {
5611 			__free_page(spd.pages[i]);
5612 			break;
5613 		}
5614 		spd.partial[i].offset = 0;
5615 		spd.partial[i].len = ret;
5616 
5617 		trace_seq_init(&iter->seq);
5618 	}
5619 
5620 	trace_access_unlock(iter->cpu_file);
5621 	trace_event_read_unlock();
5622 	mutex_unlock(&iter->mutex);
5623 
5624 	spd.nr_pages = i;
5625 
5626 	if (i)
5627 		ret = splice_to_pipe(pipe, &spd);
5628 	else
5629 		ret = 0;
5630 out:
5631 	splice_shrink_spd(&spd);
5632 	return ret;
5633 
5634 out_err:
5635 	mutex_unlock(&iter->mutex);
5636 	goto out;
5637 }
5638 
5639 static ssize_t
5640 tracing_syscall_buf_read(struct file *filp, char __user *ubuf,
5641 			 size_t cnt, loff_t *ppos)
5642 {
5643 	struct inode *inode = file_inode(filp);
5644 	struct trace_array *tr = inode->i_private;
5645 	char buf[64];
5646 	int r;
5647 
5648 	r = snprintf(buf, 64, "%d\n", tr->syscall_buf_sz);
5649 
5650 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
5651 }
5652 
5653 static ssize_t
5654 tracing_syscall_buf_write(struct file *filp, const char __user *ubuf,
5655 			  size_t cnt, loff_t *ppos)
5656 {
5657 	struct inode *inode = file_inode(filp);
5658 	struct trace_array *tr = inode->i_private;
5659 	unsigned long val;
5660 	int ret;
5661 
5662 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
5663 	if (ret)
5664 		return ret;
5665 
5666 	if (val > SYSCALL_FAULT_USER_MAX)
5667 		val = SYSCALL_FAULT_USER_MAX;
5668 
5669 	tr->syscall_buf_sz = val;
5670 
5671 	*ppos += cnt;
5672 
5673 	return cnt;
5674 }
5675 
5676 static ssize_t
5677 tracing_entries_read(struct file *filp, char __user *ubuf,
5678 		     size_t cnt, loff_t *ppos)
5679 {
5680 	struct inode *inode = file_inode(filp);
5681 	struct trace_array *tr = inode->i_private;
5682 	int cpu = tracing_get_cpu(inode);
5683 	char buf[64];
5684 	int r = 0;
5685 	ssize_t ret;
5686 
5687 	mutex_lock(&trace_types_lock);
5688 
5689 	if (cpu == RING_BUFFER_ALL_CPUS) {
5690 		int cpu, buf_size_same;
5691 		unsigned long size;
5692 
5693 		size = 0;
5694 		buf_size_same = 1;
5695 		/* check if all cpu sizes are same */
5696 		for_each_tracing_cpu(cpu) {
5697 			/* fill in the size from first enabled cpu */
5698 			if (size == 0)
5699 				size = per_cpu_ptr(tr->array_buffer.data, cpu)->entries;
5700 			if (size != per_cpu_ptr(tr->array_buffer.data, cpu)->entries) {
5701 				buf_size_same = 0;
5702 				break;
5703 			}
5704 		}
5705 
5706 		if (buf_size_same) {
5707 			if (!tr->ring_buffer_expanded)
5708 				r = sprintf(buf, "%lu (expanded: %lu)\n",
5709 					    size >> 10,
5710 					    trace_buf_size >> 10);
5711 			else
5712 				r = sprintf(buf, "%lu\n", size >> 10);
5713 		} else
5714 			r = sprintf(buf, "X\n");
5715 	} else
5716 		r = sprintf(buf, "%lu\n", per_cpu_ptr(tr->array_buffer.data, cpu)->entries >> 10);
5717 
5718 	mutex_unlock(&trace_types_lock);
5719 
5720 	ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
5721 	return ret;
5722 }
5723 
5724 static ssize_t
5725 tracing_entries_write(struct file *filp, const char __user *ubuf,
5726 		      size_t cnt, loff_t *ppos)
5727 {
5728 	struct inode *inode = file_inode(filp);
5729 	struct trace_array *tr = inode->i_private;
5730 	unsigned long val;
5731 	int ret;
5732 
5733 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
5734 	if (ret)
5735 		return ret;
5736 
5737 	/* must have at least 1 entry */
5738 	if (!val)
5739 		return -EINVAL;
5740 
5741 	/* value is in KB */
5742 	val <<= 10;
5743 	ret = tracing_resize_ring_buffer(tr, val, tracing_get_cpu(inode));
5744 	if (ret < 0)
5745 		return ret;
5746 
5747 	*ppos += cnt;
5748 
5749 	return cnt;
5750 }
5751 
5752 static ssize_t
5753 tracing_total_entries_read(struct file *filp, char __user *ubuf,
5754 				size_t cnt, loff_t *ppos)
5755 {
5756 	struct trace_array *tr = filp->private_data;
5757 	char buf[64];
5758 	int r, cpu;
5759 	unsigned long size = 0, expanded_size = 0;
5760 
5761 	mutex_lock(&trace_types_lock);
5762 	for_each_tracing_cpu(cpu) {
5763 		size += per_cpu_ptr(tr->array_buffer.data, cpu)->entries >> 10;
5764 		if (!tr->ring_buffer_expanded)
5765 			expanded_size += trace_buf_size >> 10;
5766 	}
5767 	if (tr->ring_buffer_expanded)
5768 		r = sprintf(buf, "%lu\n", size);
5769 	else
5770 		r = sprintf(buf, "%lu (expanded: %lu)\n", size, expanded_size);
5771 	mutex_unlock(&trace_types_lock);
5772 
5773 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
5774 }
5775 
5776 #define LAST_BOOT_HEADER ((void *)1)
5777 
5778 static void *l_next(struct seq_file *m, void *v, loff_t *pos)
5779 {
5780 	struct trace_array *tr = m->private;
5781 	struct trace_scratch *tscratch = tr->scratch;
5782 	unsigned int index = *pos;
5783 
5784 	(*pos)++;
5785 
5786 	if (*pos == 1)
5787 		return LAST_BOOT_HEADER;
5788 
5789 	/* Only show offsets of the last boot data */
5790 	if (!tscratch || !(tr->flags & TRACE_ARRAY_FL_LAST_BOOT))
5791 		return NULL;
5792 
5793 	/* *pos 0 is for the header, 1 is for the first module */
5794 	index--;
5795 
5796 	if (index >= tscratch->nr_entries)
5797 		return NULL;
5798 
5799 	return &tscratch->entries[index];
5800 }
5801 
5802 static void *l_start(struct seq_file *m, loff_t *pos)
5803 {
5804 	mutex_lock(&scratch_mutex);
5805 
5806 	return l_next(m, NULL, pos);
5807 }
5808 
5809 static void l_stop(struct seq_file *m, void *p)
5810 {
5811 	mutex_unlock(&scratch_mutex);
5812 }
5813 
5814 static void show_last_boot_header(struct seq_file *m, struct trace_array *tr)
5815 {
5816 	struct trace_scratch *tscratch = tr->scratch;
5817 
5818 	/*
5819 	 * Do not leak KASLR address. This only shows the KASLR address of
5820 	 * the last boot. When the ring buffer is started, the LAST_BOOT
5821 	 * flag gets cleared, and this should only report "current".
5822 	 * Otherwise it shows the KASLR address from the previous boot which
5823 	 * should not be the same as the current boot.
5824 	 */
5825 	if (tscratch && (tr->flags & TRACE_ARRAY_FL_LAST_BOOT))
5826 		seq_printf(m, "%lx\t[kernel]\n", tscratch->text_addr);
5827 	else
5828 		seq_puts(m, "# Current\n");
5829 }
5830 
5831 static int l_show(struct seq_file *m, void *v)
5832 {
5833 	struct trace_array *tr = m->private;
5834 	struct trace_mod_entry *entry = v;
5835 
5836 	if (v == LAST_BOOT_HEADER) {
5837 		show_last_boot_header(m, tr);
5838 		return 0;
5839 	}
5840 
5841 	seq_printf(m, "%lx\t%s\n", entry->mod_addr, entry->mod_name);
5842 	return 0;
5843 }
5844 
5845 static const struct seq_operations last_boot_seq_ops = {
5846 	.start		= l_start,
5847 	.next		= l_next,
5848 	.stop		= l_stop,
5849 	.show		= l_show,
5850 };
5851 
5852 static int tracing_last_boot_open(struct inode *inode, struct file *file)
5853 {
5854 	struct trace_array *tr = inode->i_private;
5855 	struct seq_file *m;
5856 	int ret;
5857 
5858 	ret = tracing_check_open_get_tr(tr);
5859 	if (ret)
5860 		return ret;
5861 
5862 	ret = seq_open(file, &last_boot_seq_ops);
5863 	if (ret) {
5864 		trace_array_put(tr);
5865 		return ret;
5866 	}
5867 
5868 	m = file->private_data;
5869 	m->private = tr;
5870 
5871 	return 0;
5872 }
5873 
5874 static int tracing_buffer_meta_open(struct inode *inode, struct file *filp)
5875 {
5876 	struct trace_array *tr = inode->i_private;
5877 	int cpu = tracing_get_cpu(inode);
5878 	int ret;
5879 
5880 	ret = tracing_check_open_get_tr(tr);
5881 	if (ret)
5882 		return ret;
5883 
5884 	ret = ring_buffer_meta_seq_init(filp, tr->array_buffer.buffer, cpu);
5885 	if (ret < 0)
5886 		__trace_array_put(tr);
5887 	return ret;
5888 }
5889 
5890 static ssize_t
5891 tracing_free_buffer_write(struct file *filp, const char __user *ubuf,
5892 			  size_t cnt, loff_t *ppos)
5893 {
5894 	/*
5895 	 * There is no need to read what the user has written, this function
5896 	 * is just to make sure that there is no error when "echo" is used
5897 	 */
5898 
5899 	*ppos += cnt;
5900 
5901 	return cnt;
5902 }
5903 
5904 static int
5905 tracing_free_buffer_release(struct inode *inode, struct file *filp)
5906 {
5907 	struct trace_array *tr = inode->i_private;
5908 
5909 	/* disable tracing ? */
5910 	if (tr->trace_flags & TRACE_ITER(STOP_ON_FREE))
5911 		tracer_tracing_off(tr);
5912 	/* resize the ring buffer to 0 */
5913 	tracing_resize_ring_buffer(tr, 0, RING_BUFFER_ALL_CPUS);
5914 
5915 	trace_array_put(tr);
5916 
5917 	return 0;
5918 }
5919 
5920 #define TRACE_MARKER_MAX_SIZE		4096
5921 
5922 static ssize_t write_marker_to_buffer(struct trace_array *tr, const char *buf,
5923 				      size_t cnt, unsigned long ip)
5924 {
5925 	struct ring_buffer_event *event;
5926 	enum event_trigger_type tt = ETT_NONE;
5927 	struct trace_buffer *buffer;
5928 	struct print_entry *entry;
5929 	int meta_size;
5930 	ssize_t written;
5931 	size_t size;
5932 
5933 	meta_size = sizeof(*entry) + 2;  /* add '\0' and possible '\n' */
5934  again:
5935 	size = cnt + meta_size;
5936 
5937 	buffer = tr->array_buffer.buffer;
5938 	event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
5939 					    tracing_gen_ctx());
5940 	if (unlikely(!event)) {
5941 		/*
5942 		 * If the size was greater than what was allowed, then
5943 		 * make it smaller and try again.
5944 		 */
5945 		if (size > ring_buffer_max_event_size(buffer)) {
5946 			cnt = ring_buffer_max_event_size(buffer) - meta_size;
5947 			/* The above should only happen once */
5948 			if (WARN_ON_ONCE(cnt + meta_size == size))
5949 				return -EBADF;
5950 			goto again;
5951 		}
5952 
5953 		/* Ring buffer disabled, return as if not open for write */
5954 		return -EBADF;
5955 	}
5956 
5957 	entry = ring_buffer_event_data(event);
5958 	entry->ip = ip;
5959 	memcpy(&entry->buf, buf, cnt);
5960 	written = cnt;
5961 
5962 	if (tr->trace_marker_file && !list_empty(&tr->trace_marker_file->triggers)) {
5963 		/* do not add \n before testing triggers, but add \0 */
5964 		entry->buf[cnt] = '\0';
5965 		tt = event_triggers_call(tr->trace_marker_file, buffer, entry, event);
5966 	}
5967 
5968 	if (entry->buf[cnt - 1] != '\n') {
5969 		entry->buf[cnt] = '\n';
5970 		entry->buf[cnt + 1] = '\0';
5971 	} else
5972 		entry->buf[cnt] = '\0';
5973 
5974 	if (static_branch_unlikely(&trace_marker_exports_enabled))
5975 		ftrace_exports(event, TRACE_EXPORT_MARKER);
5976 	__buffer_unlock_commit(buffer, event);
5977 
5978 	if (tt)
5979 		event_triggers_post_call(tr->trace_marker_file, tt);
5980 
5981 	return written;
5982 }
5983 
5984 struct trace_user_buf {
5985 	char		*buf;
5986 };
5987 
5988 static DEFINE_MUTEX(trace_user_buffer_mutex);
5989 static struct trace_user_buf_info *trace_user_buffer;
5990 
5991 /**
5992  * trace_user_fault_destroy - free up allocated memory of a trace user buffer
5993  * @tinfo: The descriptor to free up
5994  *
5995  * Frees any data allocated in the trace info dsecriptor.
5996  */
5997 void trace_user_fault_destroy(struct trace_user_buf_info *tinfo)
5998 {
5999 	char *buf;
6000 	int cpu;
6001 
6002 	if (!tinfo || !tinfo->tbuf)
6003 		return;
6004 
6005 	for_each_possible_cpu(cpu) {
6006 		buf = per_cpu_ptr(tinfo->tbuf, cpu)->buf;
6007 		kfree(buf);
6008 	}
6009 	free_percpu(tinfo->tbuf);
6010 }
6011 
6012 static int user_fault_buffer_enable(struct trace_user_buf_info *tinfo, size_t size)
6013 {
6014 	char *buf;
6015 	int cpu;
6016 
6017 	lockdep_assert_held(&trace_user_buffer_mutex);
6018 
6019 	tinfo->tbuf = alloc_percpu(struct trace_user_buf);
6020 	if (!tinfo->tbuf)
6021 		return -ENOMEM;
6022 
6023 	tinfo->ref = 1;
6024 	tinfo->size = size;
6025 
6026 	/* Clear each buffer in case of error */
6027 	for_each_possible_cpu(cpu) {
6028 		per_cpu_ptr(tinfo->tbuf, cpu)->buf = NULL;
6029 	}
6030 
6031 	for_each_possible_cpu(cpu) {
6032 		buf = kmalloc_node(size, GFP_KERNEL,
6033 				   cpu_to_node(cpu));
6034 		if (!buf)
6035 			return -ENOMEM;
6036 		per_cpu_ptr(tinfo->tbuf, cpu)->buf = buf;
6037 	}
6038 
6039 	return 0;
6040 }
6041 
6042 /* For internal use. Free and reinitialize */
6043 static void user_buffer_free(struct trace_user_buf_info **tinfo)
6044 {
6045 	lockdep_assert_held(&trace_user_buffer_mutex);
6046 
6047 	trace_user_fault_destroy(*tinfo);
6048 	kfree(*tinfo);
6049 	*tinfo = NULL;
6050 }
6051 
6052 /* For internal use. Initialize and allocate */
6053 static int user_buffer_init(struct trace_user_buf_info **tinfo, size_t size)
6054 {
6055 	bool alloc = false;
6056 	int ret;
6057 
6058 	lockdep_assert_held(&trace_user_buffer_mutex);
6059 
6060 	if (!*tinfo) {
6061 		alloc = true;
6062 		*tinfo = kzalloc_obj(**tinfo);
6063 		if (!*tinfo)
6064 			return -ENOMEM;
6065 	}
6066 
6067 	ret = user_fault_buffer_enable(*tinfo, size);
6068 	if (ret < 0 && alloc)
6069 		user_buffer_free(tinfo);
6070 
6071 	return ret;
6072 }
6073 
6074 /* For internal use, derefrence and free if necessary */
6075 static void user_buffer_put(struct trace_user_buf_info **tinfo)
6076 {
6077 	guard(mutex)(&trace_user_buffer_mutex);
6078 
6079 	if (WARN_ON_ONCE(!*tinfo || !(*tinfo)->ref))
6080 		return;
6081 
6082 	if (--(*tinfo)->ref)
6083 		return;
6084 
6085 	user_buffer_free(tinfo);
6086 }
6087 
6088 /**
6089  * trace_user_fault_init - Allocated or reference a per CPU buffer
6090  * @tinfo: A pointer to the trace buffer descriptor
6091  * @size: The size to allocate each per CPU buffer
6092  *
6093  * Create a per CPU buffer that can be used to copy from user space
6094  * in a task context. When calling trace_user_fault_read(), preemption
6095  * must be disabled, and it will enable preemption and copy user
6096  * space data to the buffer. If any schedule switches occur, it will
6097  * retry until it succeeds without a schedule switch knowing the buffer
6098  * is still valid.
6099  *
6100  * Returns 0 on success, negative on failure.
6101  */
6102 int trace_user_fault_init(struct trace_user_buf_info *tinfo, size_t size)
6103 {
6104 	int ret;
6105 
6106 	if (!tinfo)
6107 		return -EINVAL;
6108 
6109 	guard(mutex)(&trace_user_buffer_mutex);
6110 
6111 	ret = user_buffer_init(&tinfo, size);
6112 	if (ret < 0)
6113 		trace_user_fault_destroy(tinfo);
6114 
6115 	return ret;
6116 }
6117 
6118 /**
6119  * trace_user_fault_get - up the ref count for the user buffer
6120  * @tinfo: A pointer to a pointer to the trace buffer descriptor
6121  *
6122  * Ups the ref count of the trace buffer.
6123  *
6124  * Returns the new ref count.
6125  */
6126 int trace_user_fault_get(struct trace_user_buf_info *tinfo)
6127 {
6128 	if (!tinfo)
6129 		return -1;
6130 
6131 	guard(mutex)(&trace_user_buffer_mutex);
6132 
6133 	tinfo->ref++;
6134 	return tinfo->ref;
6135 }
6136 
6137 /**
6138  * trace_user_fault_put - dereference a per cpu trace buffer
6139  * @tinfo: The @tinfo that was passed to trace_user_fault_get()
6140  *
6141  * Decrement the ref count of @tinfo.
6142  *
6143  * Returns the new refcount (negative on error).
6144  */
6145 int trace_user_fault_put(struct trace_user_buf_info *tinfo)
6146 {
6147 	guard(mutex)(&trace_user_buffer_mutex);
6148 
6149 	if (WARN_ON_ONCE(!tinfo || !tinfo->ref))
6150 		return -1;
6151 
6152 	--tinfo->ref;
6153 	return tinfo->ref;
6154 }
6155 
6156 /**
6157  * trace_user_fault_read - Read user space into a per CPU buffer
6158  * @tinfo: The @tinfo allocated by trace_user_fault_get()
6159  * @ptr: The user space pointer to read
6160  * @size: The size of user space to read.
6161  * @copy_func: Optional function to use to copy from user space
6162  * @data: Data to pass to copy_func if it was supplied
6163  *
6164  * Preemption must be disabled when this is called, and must not
6165  * be enabled while using the returned buffer.
6166  * This does the copying from user space into a per CPU buffer.
6167  *
6168  * The @size must not be greater than the size passed in to
6169  * trace_user_fault_init().
6170  *
6171  * If @copy_func is NULL, trace_user_fault_read() will use copy_from_user(),
6172  * otherwise it will call @copy_func. It will call @copy_func with:
6173  *
6174  *   buffer: the per CPU buffer of the @tinfo.
6175  *   ptr: The pointer @ptr to user space to read
6176  *   size: The @size of the ptr to read
6177  *   data: The @data parameter
6178  *
6179  * It is expected that @copy_func will return 0 on success and non zero
6180  * if there was a fault.
6181  *
6182  * Returns a pointer to the buffer with the content read from @ptr.
6183  *   Preemption must remain disabled while the caller accesses the
6184  *   buffer returned by this function.
6185  * Returns NULL if there was a fault, or the size passed in is
6186  *   greater than the size passed to trace_user_fault_init().
6187  */
6188 char *trace_user_fault_read(struct trace_user_buf_info *tinfo,
6189 			     const char __user *ptr, size_t size,
6190 			     trace_user_buf_copy copy_func, void *data)
6191 {
6192 	int cpu = smp_processor_id();
6193 	char *buffer = per_cpu_ptr(tinfo->tbuf, cpu)->buf;
6194 	unsigned int cnt;
6195 	int trys = 0;
6196 	int ret;
6197 
6198 	lockdep_assert_preemption_disabled();
6199 
6200 	/*
6201 	 * It's up to the caller to not try to copy more than it said
6202 	 * it would.
6203 	 */
6204 	if (size > tinfo->size)
6205 		return NULL;
6206 
6207 	/*
6208 	 * This acts similar to a seqcount. The per CPU context switches are
6209 	 * recorded, migration is disabled and preemption is enabled. The
6210 	 * read of the user space memory is copied into the per CPU buffer.
6211 	 * Preemption is disabled again, and if the per CPU context switches count
6212 	 * is still the same, it means the buffer has not been corrupted.
6213 	 * If the count is different, it is assumed the buffer is corrupted
6214 	 * and reading must be tried again.
6215 	 */
6216 
6217 	do {
6218 		/*
6219 		 * It is possible that something is trying to migrate this
6220 		 * task. What happens then, is when preemption is enabled,
6221 		 * the migration thread will preempt this task, try to
6222 		 * migrate it, fail, then let it run again. That will
6223 		 * cause this to loop again and never succeed.
6224 		 * On failures, enabled and disable preemption with
6225 		 * migration enabled, to allow the migration thread to
6226 		 * migrate this task.
6227 		 */
6228 		if (trys) {
6229 			preempt_enable_notrace();
6230 			preempt_disable_notrace();
6231 			cpu = smp_processor_id();
6232 			buffer = per_cpu_ptr(tinfo->tbuf, cpu)->buf;
6233 		}
6234 
6235 		/*
6236 		 * If for some reason, copy_from_user() always causes a context
6237 		 * switch, this would then cause an infinite loop.
6238 		 * If this task is preempted by another user space task, it
6239 		 * will cause this task to try again. But just in case something
6240 		 * changes where the copying from user space causes another task
6241 		 * to run, prevent this from going into an infinite loop.
6242 		 * 100 tries should be plenty.
6243 		 */
6244 		if (WARN_ONCE(trys++ > 100, "Error: Too many tries to read user space"))
6245 			return NULL;
6246 
6247 		/* Read the current CPU context switch counter */
6248 		cnt = nr_context_switches_cpu(cpu);
6249 
6250 		/*
6251 		 * Preemption is going to be enabled, but this task must
6252 		 * remain on this CPU.
6253 		 */
6254 		migrate_disable();
6255 
6256 		/*
6257 		 * Now preemption is being enabled and another task can come in
6258 		 * and use the same buffer and corrupt our data.
6259 		 */
6260 		preempt_enable_notrace();
6261 
6262 		/* Make sure preemption is enabled here */
6263 		lockdep_assert_preemption_enabled();
6264 
6265 		if (copy_func) {
6266 			ret = copy_func(buffer, ptr, size, data);
6267 		} else {
6268 			ret = __copy_from_user(buffer, ptr, size);
6269 		}
6270 
6271 		preempt_disable_notrace();
6272 		migrate_enable();
6273 
6274 		/* if it faulted, no need to test if the buffer was corrupted */
6275 		if (ret)
6276 			return NULL;
6277 
6278 		/*
6279 		 * Preemption is disabled again, now check the per CPU context
6280 		 * switch counter. If it doesn't match, then another user space
6281 		 * process may have schedule in and corrupted our buffer. In that
6282 		 * case the copying must be retried.
6283 		 */
6284 	} while (nr_context_switches_cpu(cpu) != cnt);
6285 
6286 	return buffer;
6287 }
6288 
6289 static ssize_t
6290 tracing_mark_write(struct file *filp, const char __user *ubuf,
6291 					size_t cnt, loff_t *fpos)
6292 {
6293 	struct trace_array *tr = filp->private_data;
6294 	ssize_t written = -ENODEV;
6295 	unsigned long ip;
6296 	char *buf;
6297 
6298 	if (unlikely(tracing_disabled))
6299 		return -EINVAL;
6300 
6301 	if (!(tr->trace_flags & TRACE_ITER(MARKERS)))
6302 		return -EINVAL;
6303 
6304 	if ((ssize_t)cnt < 0)
6305 		return -EINVAL;
6306 
6307 	if (cnt > TRACE_MARKER_MAX_SIZE)
6308 		cnt = TRACE_MARKER_MAX_SIZE;
6309 
6310 	/* Must have preemption disabled while having access to the buffer */
6311 	guard(preempt_notrace)();
6312 
6313 	buf = trace_user_fault_read(trace_user_buffer, ubuf, cnt, NULL, NULL);
6314 	if (!buf)
6315 		return -EFAULT;
6316 
6317 	/* The selftests expect this function to be the IP address */
6318 	ip = _THIS_IP_;
6319 
6320 	/* The global trace_marker can go to multiple instances */
6321 	if (tr == &global_trace) {
6322 		guard(rcu)();
6323 		list_for_each_entry_rcu(tr, &marker_copies, marker_list) {
6324 			written = write_marker_to_buffer(tr, buf, cnt, ip);
6325 			if (written < 0)
6326 				break;
6327 		}
6328 	} else {
6329 		written = write_marker_to_buffer(tr, buf, cnt, ip);
6330 	}
6331 
6332 	return written;
6333 }
6334 
6335 static ssize_t write_raw_marker_to_buffer(struct trace_array *tr,
6336 					  const char *buf, size_t cnt)
6337 {
6338 	struct ring_buffer_event *event;
6339 	struct trace_buffer *buffer;
6340 	struct raw_data_entry *entry;
6341 	ssize_t written;
6342 	size_t size;
6343 
6344 	/* cnt includes both the entry->id and the data behind it. */
6345 	size = struct_offset(entry, id) + cnt;
6346 
6347 	buffer = tr->array_buffer.buffer;
6348 
6349 	if (size > ring_buffer_max_event_size(buffer))
6350 		return -EINVAL;
6351 
6352 	event = __trace_buffer_lock_reserve(buffer, TRACE_RAW_DATA, size,
6353 					    tracing_gen_ctx());
6354 	if (!event)
6355 		/* Ring buffer disabled, return as if not open for write */
6356 		return -EBADF;
6357 
6358 	entry = ring_buffer_event_data(event);
6359 	unsafe_memcpy(&entry->id, buf, cnt,
6360 		      "id and content already reserved on ring buffer"
6361 		      "'buf' includes the 'id' and the data."
6362 		      "'entry' was allocated with cnt from 'id'.");
6363 	written = cnt;
6364 
6365 	__buffer_unlock_commit(buffer, event);
6366 
6367 	return written;
6368 }
6369 
6370 static ssize_t
6371 tracing_mark_raw_write(struct file *filp, const char __user *ubuf,
6372 					size_t cnt, loff_t *fpos)
6373 {
6374 	struct trace_array *tr = filp->private_data;
6375 	ssize_t written = -ENODEV;
6376 	char *buf;
6377 
6378 	if (unlikely(tracing_disabled))
6379 		return -EINVAL;
6380 
6381 	if (!(tr->trace_flags & TRACE_ITER(MARKERS)))
6382 		return -EINVAL;
6383 
6384 	/* The marker must at least have a tag id */
6385 	if (cnt < sizeof(unsigned int))
6386 		return -EINVAL;
6387 
6388 	/* raw write is all or nothing */
6389 	if (cnt > TRACE_MARKER_MAX_SIZE)
6390 		return -EINVAL;
6391 
6392 	/* Must have preemption disabled while having access to the buffer */
6393 	guard(preempt_notrace)();
6394 
6395 	buf = trace_user_fault_read(trace_user_buffer, ubuf, cnt, NULL, NULL);
6396 	if (!buf)
6397 		return -EFAULT;
6398 
6399 	/* The global trace_marker_raw can go to multiple instances */
6400 	if (tr == &global_trace) {
6401 		guard(rcu)();
6402 		list_for_each_entry_rcu(tr, &marker_copies, marker_list) {
6403 			written = write_raw_marker_to_buffer(tr, buf, cnt);
6404 			if (written < 0)
6405 				break;
6406 		}
6407 	} else {
6408 		written = write_raw_marker_to_buffer(tr, buf, cnt);
6409 	}
6410 
6411 	return written;
6412 }
6413 
6414 static int tracing_mark_open(struct inode *inode, struct file *filp)
6415 {
6416 	int ret;
6417 
6418 	scoped_guard(mutex, &trace_user_buffer_mutex) {
6419 		if (!trace_user_buffer) {
6420 			ret = user_buffer_init(&trace_user_buffer, TRACE_MARKER_MAX_SIZE);
6421 			if (ret < 0)
6422 				return ret;
6423 		} else {
6424 			trace_user_buffer->ref++;
6425 		}
6426 	}
6427 
6428 	stream_open(inode, filp);
6429 	ret = tracing_open_generic_tr(inode, filp);
6430 	if (ret < 0)
6431 		user_buffer_put(&trace_user_buffer);
6432 	return ret;
6433 }
6434 
6435 static int tracing_mark_release(struct inode *inode, struct file *file)
6436 {
6437 	user_buffer_put(&trace_user_buffer);
6438 	return tracing_release_generic_tr(inode, file);
6439 }
6440 
6441 static int tracing_clock_show(struct seq_file *m, void *v)
6442 {
6443 	struct trace_array *tr = m->private;
6444 	int i;
6445 
6446 	for (i = 0; i < ARRAY_SIZE(trace_clocks); i++)
6447 		seq_printf(m,
6448 			"%s%s%s%s", i ? " " : "",
6449 			i == tr->clock_id ? "[" : "", trace_clocks[i].name,
6450 			i == tr->clock_id ? "]" : "");
6451 	seq_putc(m, '\n');
6452 
6453 	return 0;
6454 }
6455 
6456 int tracing_set_clock(struct trace_array *tr, const char *clockstr)
6457 {
6458 	int i;
6459 
6460 	for (i = 0; i < ARRAY_SIZE(trace_clocks); i++) {
6461 		if (strcmp(trace_clocks[i].name, clockstr) == 0)
6462 			break;
6463 	}
6464 	if (i == ARRAY_SIZE(trace_clocks))
6465 		return -EINVAL;
6466 
6467 	guard(mutex)(&trace_types_lock);
6468 
6469 	tr->clock_id = i;
6470 
6471 	ring_buffer_set_clock(tr->array_buffer.buffer, trace_clocks[i].func);
6472 
6473 	/*
6474 	 * New clock may not be consistent with the previous clock.
6475 	 * Reset the buffer so that it doesn't have incomparable timestamps.
6476 	 */
6477 	tracing_reset_online_cpus(&tr->array_buffer);
6478 
6479 #ifdef CONFIG_TRACER_SNAPSHOT
6480 	if (tr->snapshot_buffer.buffer)
6481 		ring_buffer_set_clock(tr->snapshot_buffer.buffer, trace_clocks[i].func);
6482 	tracing_reset_online_cpus(&tr->snapshot_buffer);
6483 #endif
6484 	update_last_data_if_empty(tr);
6485 
6486 	if (tr->scratch && !(tr->flags & TRACE_ARRAY_FL_LAST_BOOT)) {
6487 		struct trace_scratch *tscratch = tr->scratch;
6488 
6489 		tscratch->clock_id = i;
6490 	}
6491 
6492 	return 0;
6493 }
6494 
6495 static ssize_t tracing_clock_write(struct file *filp, const char __user *ubuf,
6496 				   size_t cnt, loff_t *fpos)
6497 {
6498 	struct seq_file *m = filp->private_data;
6499 	struct trace_array *tr = m->private;
6500 	char buf[64];
6501 	const char *clockstr;
6502 	int ret;
6503 
6504 	if (cnt >= sizeof(buf))
6505 		return -EINVAL;
6506 
6507 	if (copy_from_user(buf, ubuf, cnt))
6508 		return -EFAULT;
6509 
6510 	buf[cnt] = 0;
6511 
6512 	clockstr = strstrip(buf);
6513 
6514 	ret = tracing_set_clock(tr, clockstr);
6515 	if (ret)
6516 		return ret;
6517 
6518 	*fpos += cnt;
6519 
6520 	return cnt;
6521 }
6522 
6523 static int tracing_clock_open(struct inode *inode, struct file *file)
6524 {
6525 	struct trace_array *tr = inode->i_private;
6526 	int ret;
6527 
6528 	ret = tracing_check_open_get_tr(tr);
6529 	if (ret)
6530 		return ret;
6531 
6532 	if ((file->f_mode & FMODE_WRITE) && trace_array_is_readonly(tr)) {
6533 		trace_array_put(tr);
6534 		return -EACCES;
6535 	}
6536 
6537 	ret = single_open(file, tracing_clock_show, inode->i_private);
6538 	if (ret < 0)
6539 		trace_array_put(tr);
6540 
6541 	return ret;
6542 }
6543 
6544 static int tracing_time_stamp_mode_show(struct seq_file *m, void *v)
6545 {
6546 	struct trace_array *tr = m->private;
6547 
6548 	guard(mutex)(&trace_types_lock);
6549 
6550 	if (ring_buffer_time_stamp_abs(tr->array_buffer.buffer))
6551 		seq_puts(m, "delta [absolute]\n");
6552 	else
6553 		seq_puts(m, "[delta] absolute\n");
6554 
6555 	return 0;
6556 }
6557 
6558 static int tracing_time_stamp_mode_open(struct inode *inode, struct file *file)
6559 {
6560 	struct trace_array *tr = inode->i_private;
6561 	int ret;
6562 
6563 	ret = tracing_check_open_get_tr(tr);
6564 	if (ret)
6565 		return ret;
6566 
6567 	ret = single_open(file, tracing_time_stamp_mode_show, inode->i_private);
6568 	if (ret < 0)
6569 		trace_array_put(tr);
6570 
6571 	return ret;
6572 }
6573 
6574 u64 tracing_event_time_stamp(struct trace_buffer *buffer, struct ring_buffer_event *rbe)
6575 {
6576 	if (rbe == this_cpu_read(trace_buffered_event))
6577 		return ring_buffer_time_stamp(buffer);
6578 
6579 	return ring_buffer_event_time_stamp(buffer, rbe);
6580 }
6581 
6582 static const struct file_operations tracing_thresh_fops = {
6583 	.open		= tracing_open_generic,
6584 	.read		= tracing_thresh_read,
6585 	.write		= tracing_thresh_write,
6586 	.llseek		= generic_file_llseek,
6587 };
6588 
6589 static const struct file_operations set_tracer_fops = {
6590 	.open		= tracing_open_generic_tr,
6591 	.read		= tracing_set_trace_read,
6592 	.write		= tracing_set_trace_write,
6593 	.llseek		= generic_file_llseek,
6594 	.release	= tracing_release_generic_tr,
6595 };
6596 
6597 static const struct file_operations tracing_pipe_fops = {
6598 	.open		= tracing_open_pipe,
6599 	.poll		= tracing_poll_pipe,
6600 	.read		= tracing_read_pipe,
6601 	.splice_read	= tracing_splice_read_pipe,
6602 	.release	= tracing_release_pipe,
6603 };
6604 
6605 static const struct file_operations tracing_entries_fops = {
6606 	.open		= tracing_open_generic_tr,
6607 	.read		= tracing_entries_read,
6608 	.write		= tracing_entries_write,
6609 	.llseek		= generic_file_llseek,
6610 	.release	= tracing_release_generic_tr,
6611 };
6612 
6613 static const struct file_operations tracing_syscall_buf_fops = {
6614 	.open		= tracing_open_generic_tr,
6615 	.read		= tracing_syscall_buf_read,
6616 	.write		= tracing_syscall_buf_write,
6617 	.llseek		= generic_file_llseek,
6618 	.release	= tracing_release_generic_tr,
6619 };
6620 
6621 static const struct file_operations tracing_buffer_meta_fops = {
6622 	.open		= tracing_buffer_meta_open,
6623 	.read		= seq_read,
6624 	.llseek		= seq_lseek,
6625 	.release	= tracing_seq_release,
6626 };
6627 
6628 static const struct file_operations tracing_total_entries_fops = {
6629 	.open		= tracing_open_generic_tr,
6630 	.read		= tracing_total_entries_read,
6631 	.llseek		= generic_file_llseek,
6632 	.release	= tracing_release_generic_tr,
6633 };
6634 
6635 static const struct file_operations tracing_free_buffer_fops = {
6636 	.open		= tracing_open_generic_tr,
6637 	.write		= tracing_free_buffer_write,
6638 	.release	= tracing_free_buffer_release,
6639 };
6640 
6641 static const struct file_operations tracing_mark_fops = {
6642 	.open		= tracing_mark_open,
6643 	.write		= tracing_mark_write,
6644 	.release	= tracing_mark_release,
6645 };
6646 
6647 static const struct file_operations tracing_mark_raw_fops = {
6648 	.open		= tracing_mark_open,
6649 	.write		= tracing_mark_raw_write,
6650 	.release	= tracing_mark_release,
6651 };
6652 
6653 static const struct file_operations trace_clock_fops = {
6654 	.open		= tracing_clock_open,
6655 	.read		= seq_read,
6656 	.llseek		= seq_lseek,
6657 	.release	= tracing_single_release_tr,
6658 	.write		= tracing_clock_write,
6659 };
6660 
6661 static const struct file_operations trace_time_stamp_mode_fops = {
6662 	.open		= tracing_time_stamp_mode_open,
6663 	.read		= seq_read,
6664 	.llseek		= seq_lseek,
6665 	.release	= tracing_single_release_tr,
6666 };
6667 
6668 static const struct file_operations last_boot_fops = {
6669 	.open		= tracing_last_boot_open,
6670 	.read		= seq_read,
6671 	.llseek		= seq_lseek,
6672 	.release	= tracing_seq_release,
6673 };
6674 
6675 /*
6676  * trace_min_max_write - Write a u64 value to a trace_min_max_param struct
6677  * @filp: The active open file structure
6678  * @ubuf: The userspace provided buffer to read value into
6679  * @cnt: The maximum number of bytes to read
6680  * @ppos: The current "file" position
6681  *
6682  * This function implements the write interface for a struct trace_min_max_param.
6683  * The filp->private_data must point to a trace_min_max_param structure that
6684  * defines where to write the value, the min and the max acceptable values,
6685  * and a lock to protect the write.
6686  */
6687 static ssize_t
6688 trace_min_max_write(struct file *filp, const char __user *ubuf, size_t cnt, loff_t *ppos)
6689 {
6690 	struct trace_min_max_param *param = filp->private_data;
6691 	u64 val;
6692 	int err;
6693 
6694 	if (!param)
6695 		return -EFAULT;
6696 
6697 	err = kstrtoull_from_user(ubuf, cnt, 10, &val);
6698 	if (err)
6699 		return err;
6700 
6701 	if (param->lock)
6702 		mutex_lock(param->lock);
6703 
6704 	if (param->min && val < *param->min)
6705 		err = -EINVAL;
6706 
6707 	if (param->max && val > *param->max)
6708 		err = -EINVAL;
6709 
6710 	if (!err)
6711 		*param->val = val;
6712 
6713 	if (param->lock)
6714 		mutex_unlock(param->lock);
6715 
6716 	if (err)
6717 		return err;
6718 
6719 	return cnt;
6720 }
6721 
6722 /*
6723  * trace_min_max_read - Read a u64 value from a trace_min_max_param struct
6724  * @filp: The active open file structure
6725  * @ubuf: The userspace provided buffer to read value into
6726  * @cnt: The maximum number of bytes to read
6727  * @ppos: The current "file" position
6728  *
6729  * This function implements the read interface for a struct trace_min_max_param.
6730  * The filp->private_data must point to a trace_min_max_param struct with valid
6731  * data.
6732  */
6733 static ssize_t
6734 trace_min_max_read(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos)
6735 {
6736 	struct trace_min_max_param *param = filp->private_data;
6737 	char buf[U64_STR_SIZE];
6738 	int len;
6739 	u64 val;
6740 
6741 	if (!param)
6742 		return -EFAULT;
6743 
6744 	val = *param->val;
6745 
6746 	if (cnt > sizeof(buf))
6747 		cnt = sizeof(buf);
6748 
6749 	len = snprintf(buf, sizeof(buf), "%llu\n", val);
6750 
6751 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, len);
6752 }
6753 
6754 const struct file_operations trace_min_max_fops = {
6755 	.open		= tracing_open_generic,
6756 	.read		= trace_min_max_read,
6757 	.write		= trace_min_max_write,
6758 };
6759 
6760 #define TRACING_LOG_ERRS_MAX	8
6761 #define TRACING_LOG_LOC_MAX	128
6762 
6763 #define CMD_PREFIX "  Command: "
6764 
6765 struct err_info {
6766 	const char	**errs;	/* ptr to loc-specific array of err strings */
6767 	u8		type;	/* index into errs -> specific err string */
6768 	u16		pos;	/* caret position */
6769 	u64		ts;
6770 };
6771 
6772 struct tracing_log_err {
6773 	struct list_head	list;
6774 	struct err_info		info;
6775 	char			loc[TRACING_LOG_LOC_MAX]; /* err location */
6776 	char			*cmd;                     /* what caused err */
6777 };
6778 
6779 static DEFINE_MUTEX(tracing_err_log_lock);
6780 
6781 static struct tracing_log_err *alloc_tracing_log_err(int len)
6782 {
6783 	struct tracing_log_err *err;
6784 
6785 	err = kzalloc_obj(*err);
6786 	if (!err)
6787 		return ERR_PTR(-ENOMEM);
6788 
6789 	err->cmd = kzalloc(len, GFP_KERNEL);
6790 	if (!err->cmd) {
6791 		kfree(err);
6792 		return ERR_PTR(-ENOMEM);
6793 	}
6794 
6795 	return err;
6796 }
6797 
6798 static void free_tracing_log_err(struct tracing_log_err *err)
6799 {
6800 	kfree(err->cmd);
6801 	kfree(err);
6802 }
6803 
6804 static struct tracing_log_err *get_tracing_log_err(struct trace_array *tr,
6805 						   int len)
6806 {
6807 	struct tracing_log_err *err;
6808 	char *cmd;
6809 
6810 	if (tr->n_err_log_entries < TRACING_LOG_ERRS_MAX) {
6811 		err = alloc_tracing_log_err(len);
6812 		if (PTR_ERR(err) != -ENOMEM)
6813 			tr->n_err_log_entries++;
6814 
6815 		return err;
6816 	}
6817 	cmd = kzalloc(len, GFP_KERNEL);
6818 	if (!cmd)
6819 		return ERR_PTR(-ENOMEM);
6820 	err = list_first_entry(&tr->err_log, struct tracing_log_err, list);
6821 	kfree(err->cmd);
6822 	err->cmd = cmd;
6823 	list_del(&err->list);
6824 
6825 	return err;
6826 }
6827 
6828 /**
6829  * err_pos - find the position of a string within a command for error careting
6830  * @cmd: The tracing command that caused the error
6831  * @str: The string to position the caret at within @cmd
6832  *
6833  * Finds the position of the first occurrence of @str within @cmd.  The
6834  * return value can be passed to tracing_log_err() for caret placement
6835  * within @cmd.
6836  *
6837  * Returns the index within @cmd of the first occurrence of @str or 0
6838  * if @str was not found.
6839  */
6840 unsigned int err_pos(char *cmd, const char *str)
6841 {
6842 	char *found;
6843 
6844 	if (WARN_ON(!strlen(cmd)))
6845 		return 0;
6846 
6847 	found = strstr(cmd, str);
6848 	if (found)
6849 		return found - cmd;
6850 
6851 	return 0;
6852 }
6853 
6854 /**
6855  * tracing_log_err - write an error to the tracing error log
6856  * @tr: The associated trace array for the error (NULL for top level array)
6857  * @loc: A string describing where the error occurred
6858  * @cmd: The tracing command that caused the error
6859  * @errs: The array of loc-specific static error strings
6860  * @type: The index into errs[], which produces the specific static err string
6861  * @pos: The position the caret should be placed in the cmd
6862  *
6863  * Writes an error into tracing/error_log of the form:
6864  *
6865  * <loc>: error: <text>
6866  *   Command: <cmd>
6867  *              ^
6868  *
6869  * tracing/error_log is a small log file containing the last
6870  * TRACING_LOG_ERRS_MAX errors (8).  Memory for errors isn't allocated
6871  * unless there has been a tracing error, and the error log can be
6872  * cleared and have its memory freed by writing the empty string in
6873  * truncation mode to it i.e. echo > tracing/error_log.
6874  *
6875  * NOTE: the @errs array along with the @type param are used to
6876  * produce a static error string - this string is not copied and saved
6877  * when the error is logged - only a pointer to it is saved.  See
6878  * existing callers for examples of how static strings are typically
6879  * defined for use with tracing_log_err().
6880  */
6881 void tracing_log_err(struct trace_array *tr,
6882 		     const char *loc, const char *cmd,
6883 		     const char **errs, u8 type, u16 pos)
6884 {
6885 	struct tracing_log_err *err;
6886 	int len = 0;
6887 
6888 	if (!tr)
6889 		tr = &global_trace;
6890 
6891 	len += sizeof(CMD_PREFIX) + 2 * sizeof("\n") + strlen(cmd) + 1;
6892 
6893 	guard(mutex)(&tracing_err_log_lock);
6894 
6895 	err = get_tracing_log_err(tr, len);
6896 	if (PTR_ERR(err) == -ENOMEM)
6897 		return;
6898 
6899 	snprintf(err->loc, TRACING_LOG_LOC_MAX, "%s: error: ", loc);
6900 	snprintf(err->cmd, len, "\n" CMD_PREFIX "%s\n", cmd);
6901 
6902 	err->info.errs = errs;
6903 	err->info.type = type;
6904 	err->info.pos = pos;
6905 	err->info.ts = local_clock();
6906 
6907 	list_add_tail(&err->list, &tr->err_log);
6908 }
6909 
6910 static void clear_tracing_err_log(struct trace_array *tr)
6911 {
6912 	struct tracing_log_err *err, *next;
6913 
6914 	guard(mutex)(&tracing_err_log_lock);
6915 
6916 	list_for_each_entry_safe(err, next, &tr->err_log, list) {
6917 		list_del(&err->list);
6918 		free_tracing_log_err(err);
6919 	}
6920 
6921 	tr->n_err_log_entries = 0;
6922 }
6923 
6924 static void *tracing_err_log_seq_start(struct seq_file *m, loff_t *pos)
6925 {
6926 	struct trace_array *tr = m->private;
6927 
6928 	mutex_lock(&tracing_err_log_lock);
6929 
6930 	return seq_list_start(&tr->err_log, *pos);
6931 }
6932 
6933 static void *tracing_err_log_seq_next(struct seq_file *m, void *v, loff_t *pos)
6934 {
6935 	struct trace_array *tr = m->private;
6936 
6937 	return seq_list_next(v, &tr->err_log, pos);
6938 }
6939 
6940 static void tracing_err_log_seq_stop(struct seq_file *m, void *v)
6941 {
6942 	mutex_unlock(&tracing_err_log_lock);
6943 }
6944 
6945 static void tracing_err_log_show_pos(struct seq_file *m, u16 pos)
6946 {
6947 	u16 i;
6948 
6949 	for (i = 0; i < sizeof(CMD_PREFIX) - 1; i++)
6950 		seq_putc(m, ' ');
6951 	for (i = 0; i < pos; i++)
6952 		seq_putc(m, ' ');
6953 	seq_puts(m, "^\n");
6954 }
6955 
6956 static int tracing_err_log_seq_show(struct seq_file *m, void *v)
6957 {
6958 	struct tracing_log_err *err = v;
6959 
6960 	if (err) {
6961 		const char *err_text = err->info.errs[err->info.type];
6962 		u64 sec = err->info.ts;
6963 		u32 nsec;
6964 
6965 		nsec = do_div(sec, NSEC_PER_SEC);
6966 		seq_printf(m, "[%5llu.%06u] %s%s", sec, nsec / 1000,
6967 			   err->loc, err_text);
6968 		seq_printf(m, "%s", err->cmd);
6969 		tracing_err_log_show_pos(m, err->info.pos);
6970 	}
6971 
6972 	return 0;
6973 }
6974 
6975 static const struct seq_operations tracing_err_log_seq_ops = {
6976 	.start  = tracing_err_log_seq_start,
6977 	.next   = tracing_err_log_seq_next,
6978 	.stop   = tracing_err_log_seq_stop,
6979 	.show   = tracing_err_log_seq_show
6980 };
6981 
6982 static int tracing_err_log_open(struct inode *inode, struct file *file)
6983 {
6984 	struct trace_array *tr = inode->i_private;
6985 	int ret = 0;
6986 
6987 	ret = tracing_check_open_get_tr(tr);
6988 	if (ret)
6989 		return ret;
6990 
6991 	/* If this file was opened for write, then erase contents */
6992 	if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC))
6993 		clear_tracing_err_log(tr);
6994 
6995 	if (file->f_mode & FMODE_READ) {
6996 		ret = seq_open(file, &tracing_err_log_seq_ops);
6997 		if (!ret) {
6998 			struct seq_file *m = file->private_data;
6999 			m->private = tr;
7000 		} else {
7001 			trace_array_put(tr);
7002 		}
7003 	}
7004 	return ret;
7005 }
7006 
7007 static ssize_t tracing_err_log_write(struct file *file,
7008 				     const char __user *buffer,
7009 				     size_t count, loff_t *ppos)
7010 {
7011 	return count;
7012 }
7013 
7014 static int tracing_err_log_release(struct inode *inode, struct file *file)
7015 {
7016 	struct trace_array *tr = inode->i_private;
7017 
7018 	trace_array_put(tr);
7019 
7020 	if (file->f_mode & FMODE_READ)
7021 		seq_release(inode, file);
7022 
7023 	return 0;
7024 }
7025 
7026 static const struct file_operations tracing_err_log_fops = {
7027 	.open           = tracing_err_log_open,
7028 	.write		= tracing_err_log_write,
7029 	.read           = seq_read,
7030 	.llseek         = tracing_lseek,
7031 	.release        = tracing_err_log_release,
7032 };
7033 
7034 int tracing_buffers_open(struct inode *inode, struct file *filp)
7035 {
7036 	struct trace_array *tr = inode->i_private;
7037 	struct ftrace_buffer_info *info;
7038 	int ret;
7039 
7040 	ret = tracing_check_open_get_tr(tr);
7041 	if (ret)
7042 		return ret;
7043 
7044 	info = kvzalloc_obj(*info);
7045 	if (!info) {
7046 		trace_array_put(tr);
7047 		return -ENOMEM;
7048 	}
7049 
7050 	mutex_lock(&trace_types_lock);
7051 
7052 	info->iter.tr		= tr;
7053 	info->iter.cpu_file	= tracing_get_cpu(inode);
7054 	info->iter.trace	= tr->current_trace;
7055 	info->iter.array_buffer = &tr->array_buffer;
7056 	info->spare		= NULL;
7057 	/* Force reading ring buffer for first read */
7058 	info->read		= (unsigned int)-1;
7059 
7060 	filp->private_data = info;
7061 
7062 	tr->trace_ref++;
7063 
7064 	mutex_unlock(&trace_types_lock);
7065 
7066 	ret = nonseekable_open(inode, filp);
7067 	if (ret < 0)
7068 		trace_array_put(tr);
7069 
7070 	return ret;
7071 }
7072 
7073 static __poll_t
7074 tracing_buffers_poll(struct file *filp, poll_table *poll_table)
7075 {
7076 	struct ftrace_buffer_info *info = filp->private_data;
7077 	struct trace_iterator *iter = &info->iter;
7078 
7079 	return trace_poll(iter, filp, poll_table);
7080 }
7081 
7082 ssize_t tracing_buffers_read(struct file *filp, char __user *ubuf,
7083 			     size_t count, loff_t *ppos)
7084 {
7085 	struct ftrace_buffer_info *info = filp->private_data;
7086 	struct trace_iterator *iter = &info->iter;
7087 	void *trace_data;
7088 	int page_size;
7089 	ssize_t ret = 0;
7090 	ssize_t size;
7091 
7092 	if (!count)
7093 		return 0;
7094 
7095 	if (iter->snapshot && tracer_uses_snapshot(iter->tr->current_trace))
7096 		return -EBUSY;
7097 
7098 	page_size = ring_buffer_subbuf_size_get(iter->array_buffer->buffer);
7099 
7100 	/* Make sure the spare matches the current sub buffer size */
7101 	if (info->spare) {
7102 		if (page_size != info->spare_size) {
7103 			ring_buffer_free_read_page(iter->array_buffer->buffer,
7104 						   info->spare_cpu, info->spare);
7105 			info->spare = NULL;
7106 		}
7107 	}
7108 
7109 	if (!info->spare) {
7110 		info->spare = ring_buffer_alloc_read_page(iter->array_buffer->buffer,
7111 							  iter->cpu_file);
7112 		if (IS_ERR(info->spare)) {
7113 			ret = PTR_ERR(info->spare);
7114 			info->spare = NULL;
7115 		} else {
7116 			info->spare_cpu = iter->cpu_file;
7117 			info->spare_size = page_size;
7118 		}
7119 	}
7120 	if (!info->spare)
7121 		return ret;
7122 
7123 	/* Do we have previous read data to read? */
7124 	if (info->read < page_size)
7125 		goto read;
7126 
7127  again:
7128 	trace_access_lock(iter->cpu_file);
7129 	ret = ring_buffer_read_page(iter->array_buffer->buffer,
7130 				    info->spare,
7131 				    count,
7132 				    iter->cpu_file, 0);
7133 	trace_access_unlock(iter->cpu_file);
7134 
7135 	if (ret < 0) {
7136 		if (trace_empty(iter) && !iter->closed) {
7137 			if (update_last_data_if_empty(iter->tr))
7138 				return 0;
7139 
7140 			if ((filp->f_flags & O_NONBLOCK))
7141 				return -EAGAIN;
7142 
7143 			ret = wait_on_pipe(iter, 0);
7144 			if (ret)
7145 				return ret;
7146 
7147 			goto again;
7148 		}
7149 		return 0;
7150 	}
7151 
7152 	info->read = 0;
7153  read:
7154 	size = page_size - info->read;
7155 	if (size > count)
7156 		size = count;
7157 	trace_data = ring_buffer_read_page_data(info->spare);
7158 	ret = copy_to_user(ubuf, trace_data + info->read, size);
7159 	if (ret == size)
7160 		return -EFAULT;
7161 
7162 	size -= ret;
7163 
7164 	*ppos += size;
7165 	info->read += size;
7166 
7167 	return size;
7168 }
7169 
7170 static int tracing_buffers_flush(struct file *file, fl_owner_t id)
7171 {
7172 	struct ftrace_buffer_info *info = file->private_data;
7173 	struct trace_iterator *iter = &info->iter;
7174 
7175 	iter->closed = true;
7176 	/* Make sure the waiters see the new wait_index */
7177 	(void)atomic_fetch_inc_release(&iter->wait_index);
7178 
7179 	ring_buffer_wake_waiters(iter->array_buffer->buffer, iter->cpu_file);
7180 
7181 	return 0;
7182 }
7183 
7184 int tracing_buffers_release(struct inode *inode, struct file *file)
7185 {
7186 	struct ftrace_buffer_info *info = file->private_data;
7187 	struct trace_iterator *iter = &info->iter;
7188 
7189 	guard(mutex)(&trace_types_lock);
7190 
7191 	iter->tr->trace_ref--;
7192 
7193 	__trace_array_put(iter->tr);
7194 
7195 	if (info->spare)
7196 		ring_buffer_free_read_page(iter->array_buffer->buffer,
7197 					   info->spare_cpu, info->spare);
7198 	kvfree(info);
7199 
7200 	return 0;
7201 }
7202 
7203 struct buffer_ref {
7204 	struct trace_buffer	*buffer;
7205 	void			*page;
7206 	int			cpu;
7207 	refcount_t		refcount;
7208 };
7209 
7210 static void buffer_ref_release(struct buffer_ref *ref)
7211 {
7212 	if (!refcount_dec_and_test(&ref->refcount))
7213 		return;
7214 	ring_buffer_free_read_page(ref->buffer, ref->cpu, ref->page);
7215 	kfree(ref);
7216 }
7217 
7218 static void buffer_pipe_buf_release(struct pipe_inode_info *pipe,
7219 				    struct pipe_buffer *buf)
7220 {
7221 	struct buffer_ref *ref = (struct buffer_ref *)buf->private;
7222 
7223 	buffer_ref_release(ref);
7224 	buf->private = 0;
7225 }
7226 
7227 static bool buffer_pipe_buf_get(struct pipe_inode_info *pipe,
7228 				struct pipe_buffer *buf)
7229 {
7230 	struct buffer_ref *ref = (struct buffer_ref *)buf->private;
7231 
7232 	if (refcount_read(&ref->refcount) > INT_MAX/2)
7233 		return false;
7234 
7235 	refcount_inc(&ref->refcount);
7236 	return true;
7237 }
7238 
7239 /* Pipe buffer operations for a buffer. */
7240 static const struct pipe_buf_operations buffer_pipe_buf_ops = {
7241 	.release		= buffer_pipe_buf_release,
7242 	.get			= buffer_pipe_buf_get,
7243 };
7244 
7245 /*
7246  * Callback from splice_to_pipe(), if we need to release some pages
7247  * at the end of the spd in case we error'ed out in filling the pipe.
7248  */
7249 static void buffer_spd_release(struct splice_pipe_desc *spd, unsigned int i)
7250 {
7251 	struct buffer_ref *ref =
7252 		(struct buffer_ref *)spd->partial[i].private;
7253 
7254 	buffer_ref_release(ref);
7255 	spd->partial[i].private = 0;
7256 }
7257 
7258 ssize_t tracing_buffers_splice_read(struct file *file, loff_t *ppos,
7259 				    struct pipe_inode_info *pipe, size_t len,
7260 				    unsigned int flags)
7261 {
7262 	struct ftrace_buffer_info *info = file->private_data;
7263 	struct trace_iterator *iter = &info->iter;
7264 	struct partial_page partial_def[PIPE_DEF_BUFFERS];
7265 	struct page *pages_def[PIPE_DEF_BUFFERS];
7266 	struct splice_pipe_desc spd = {
7267 		.pages		= pages_def,
7268 		.partial	= partial_def,
7269 		.nr_pages_max	= PIPE_DEF_BUFFERS,
7270 		.ops		= &buffer_pipe_buf_ops,
7271 		.spd_release	= buffer_spd_release,
7272 	};
7273 	struct buffer_ref *ref;
7274 	bool woken = false;
7275 	int page_size;
7276 	int entries, i;
7277 	ssize_t ret = 0;
7278 
7279 	if (iter->snapshot && tracer_uses_snapshot(iter->tr->current_trace))
7280 		return -EBUSY;
7281 
7282 	page_size = ring_buffer_subbuf_size_get(iter->array_buffer->buffer);
7283 	if (*ppos & (page_size - 1))
7284 		return -EINVAL;
7285 
7286 	if (len & (page_size - 1)) {
7287 		if (len < page_size)
7288 			return -EINVAL;
7289 		len &= (~(page_size - 1));
7290 	}
7291 
7292 	if (splice_grow_spd(pipe, &spd))
7293 		return -ENOMEM;
7294 
7295  again:
7296 	trace_access_lock(iter->cpu_file);
7297 	entries = ring_buffer_entries_cpu(iter->array_buffer->buffer, iter->cpu_file);
7298 
7299 	for (i = 0; i < spd.nr_pages_max && len && entries; i++, len -= page_size) {
7300 		struct page *page;
7301 		int r;
7302 
7303 		ref = kzalloc_obj(*ref);
7304 		if (!ref) {
7305 			ret = -ENOMEM;
7306 			break;
7307 		}
7308 
7309 		refcount_set(&ref->refcount, 1);
7310 		ref->buffer = iter->array_buffer->buffer;
7311 		ref->page = ring_buffer_alloc_read_page(ref->buffer, iter->cpu_file);
7312 		if (IS_ERR(ref->page)) {
7313 			ret = PTR_ERR(ref->page);
7314 			ref->page = NULL;
7315 			kfree(ref);
7316 			break;
7317 		}
7318 		ref->cpu = iter->cpu_file;
7319 
7320 		r = ring_buffer_read_page(ref->buffer, ref->page,
7321 					  len, iter->cpu_file, 1);
7322 		if (r < 0) {
7323 			ring_buffer_free_read_page(ref->buffer, ref->cpu,
7324 						   ref->page);
7325 			kfree(ref);
7326 			break;
7327 		}
7328 
7329 		page = virt_to_page(ring_buffer_read_page_data(ref->page));
7330 
7331 		spd.pages[i] = page;
7332 		spd.partial[i].len = page_size;
7333 		spd.partial[i].offset = 0;
7334 		spd.partial[i].private = (unsigned long)ref;
7335 		spd.nr_pages++;
7336 		*ppos += page_size;
7337 
7338 		entries = ring_buffer_entries_cpu(iter->array_buffer->buffer, iter->cpu_file);
7339 	}
7340 
7341 	trace_access_unlock(iter->cpu_file);
7342 	spd.nr_pages = i;
7343 
7344 	/* did we read anything? */
7345 	if (!spd.nr_pages) {
7346 
7347 		if (ret)
7348 			goto out;
7349 
7350 		if (woken)
7351 			goto out;
7352 
7353 		ret = -EAGAIN;
7354 		if ((file->f_flags & O_NONBLOCK) || (flags & SPLICE_F_NONBLOCK))
7355 			goto out;
7356 
7357 		ret = wait_on_pipe(iter, iter->snapshot ? 0 : iter->tr->buffer_percent);
7358 		if (ret)
7359 			goto out;
7360 
7361 		/* No need to wait after waking up when tracing is off */
7362 		if (!tracer_tracing_is_on(iter->tr))
7363 			goto out;
7364 
7365 		/* Iterate one more time to collect any new data then exit */
7366 		woken = true;
7367 
7368 		goto again;
7369 	}
7370 
7371 	ret = splice_to_pipe(pipe, &spd);
7372 out:
7373 	splice_shrink_spd(&spd);
7374 
7375 	return ret;
7376 }
7377 
7378 static long tracing_buffers_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
7379 {
7380 	struct ftrace_buffer_info *info = file->private_data;
7381 	struct trace_iterator *iter = &info->iter;
7382 	int err;
7383 
7384 	if (cmd == TRACE_MMAP_IOCTL_GET_READER) {
7385 		if (!(file->f_flags & O_NONBLOCK)) {
7386 			err = ring_buffer_wait(iter->array_buffer->buffer,
7387 					       iter->cpu_file,
7388 					       iter->tr->buffer_percent,
7389 					       NULL, NULL);
7390 			if (err)
7391 				return err;
7392 		}
7393 
7394 		return ring_buffer_map_get_reader(iter->array_buffer->buffer,
7395 						  iter->cpu_file);
7396 	} else if (cmd) {
7397 		return -ENOTTY;
7398 	}
7399 
7400 	/*
7401 	 * An ioctl call with cmd 0 to the ring buffer file will wake up all
7402 	 * waiters
7403 	 */
7404 	guard(mutex)(&trace_types_lock);
7405 
7406 	/* Make sure the waiters see the new wait_index */
7407 	(void)atomic_fetch_inc_release(&iter->wait_index);
7408 
7409 	ring_buffer_wake_waiters(iter->array_buffer->buffer, iter->cpu_file);
7410 
7411 	return 0;
7412 }
7413 
7414 /*
7415  * This is called when a VMA is duplicated (e.g., on fork()) to increment
7416  * the user_mapped counter without remapping pages.
7417  */
7418 static void tracing_buffers_mmap_open(struct vm_area_struct *vma)
7419 {
7420 	struct ftrace_buffer_info *info = vma->vm_file->private_data;
7421 	struct trace_iterator *iter = &info->iter;
7422 
7423 	ring_buffer_map_dup(iter->array_buffer->buffer, iter->cpu_file);
7424 }
7425 
7426 static void tracing_buffers_mmap_close(struct vm_area_struct *vma)
7427 {
7428 	struct ftrace_buffer_info *info = vma->vm_file->private_data;
7429 	struct trace_iterator *iter = &info->iter;
7430 
7431 	WARN_ON(ring_buffer_unmap(iter->array_buffer->buffer, iter->cpu_file));
7432 	put_snapshot_map(iter->tr);
7433 }
7434 
7435 static int tracing_buffers_may_split(struct vm_area_struct *vma, unsigned long addr)
7436 {
7437 	/*
7438 	 * Trace buffer mappings require the complete buffer including
7439 	 * the meta page. Partial mappings are not supported.
7440 	 */
7441 	return -EINVAL;
7442 }
7443 
7444 static const struct vm_operations_struct tracing_buffers_vmops = {
7445 	.open		= tracing_buffers_mmap_open,
7446 	.close		= tracing_buffers_mmap_close,
7447 	.may_split      = tracing_buffers_may_split,
7448 };
7449 
7450 static int tracing_buffers_mmap(struct file *filp, struct vm_area_struct *vma)
7451 {
7452 	struct ftrace_buffer_info *info = filp->private_data;
7453 	struct trace_iterator *iter = &info->iter;
7454 	int ret = 0;
7455 
7456 	/* A memmap'ed and backup buffers are not supported for user space mmap */
7457 	if (iter->tr->flags & (TRACE_ARRAY_FL_MEMMAP | TRACE_ARRAY_FL_VMALLOC))
7458 		return -ENODEV;
7459 
7460 	ret = get_snapshot_map(iter->tr);
7461 	if (ret)
7462 		return ret;
7463 
7464 	ret = ring_buffer_map(iter->array_buffer->buffer, iter->cpu_file, vma);
7465 	if (ret)
7466 		put_snapshot_map(iter->tr);
7467 
7468 	vma->vm_ops = &tracing_buffers_vmops;
7469 
7470 	return ret;
7471 }
7472 
7473 static const struct file_operations tracing_buffers_fops = {
7474 	.open		= tracing_buffers_open,
7475 	.read		= tracing_buffers_read,
7476 	.poll		= tracing_buffers_poll,
7477 	.release	= tracing_buffers_release,
7478 	.flush		= tracing_buffers_flush,
7479 	.splice_read	= tracing_buffers_splice_read,
7480 	.unlocked_ioctl = tracing_buffers_ioctl,
7481 	.mmap		= tracing_buffers_mmap,
7482 };
7483 
7484 static ssize_t
7485 tracing_stats_read(struct file *filp, char __user *ubuf,
7486 		   size_t count, loff_t *ppos)
7487 {
7488 	struct inode *inode = file_inode(filp);
7489 	struct trace_array *tr = inode->i_private;
7490 	struct array_buffer *trace_buf = &tr->array_buffer;
7491 	int cpu = tracing_get_cpu(inode);
7492 	struct trace_seq *s;
7493 	unsigned long cnt;
7494 	unsigned long long t;
7495 	unsigned long usec_rem;
7496 
7497 	s = kmalloc_obj(*s);
7498 	if (!s)
7499 		return -ENOMEM;
7500 
7501 	trace_seq_init(s);
7502 
7503 	cnt = ring_buffer_entries_cpu(trace_buf->buffer, cpu);
7504 	trace_seq_printf(s, "entries: %ld\n", cnt);
7505 
7506 	cnt = ring_buffer_overrun_cpu(trace_buf->buffer, cpu);
7507 	trace_seq_printf(s, "overrun: %ld\n", cnt);
7508 
7509 	cnt = ring_buffer_commit_overrun_cpu(trace_buf->buffer, cpu);
7510 	trace_seq_printf(s, "commit overrun: %ld\n", cnt);
7511 
7512 	cnt = ring_buffer_bytes_cpu(trace_buf->buffer, cpu);
7513 	trace_seq_printf(s, "bytes: %ld\n", cnt);
7514 
7515 	if (trace_clocks[tr->clock_id].in_ns) {
7516 		/* local or global for trace_clock */
7517 		t = ns2usecs(ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
7518 		usec_rem = do_div(t, USEC_PER_SEC);
7519 		trace_seq_printf(s, "oldest event ts: %5llu.%06lu\n",
7520 								t, usec_rem);
7521 
7522 		t = ns2usecs(ring_buffer_time_stamp(trace_buf->buffer));
7523 		usec_rem = do_div(t, USEC_PER_SEC);
7524 		trace_seq_printf(s, "now ts: %5llu.%06lu\n", t, usec_rem);
7525 	} else {
7526 		/* counter or tsc mode for trace_clock */
7527 		trace_seq_printf(s, "oldest event ts: %llu\n",
7528 				ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
7529 
7530 		trace_seq_printf(s, "now ts: %llu\n",
7531 				ring_buffer_time_stamp(trace_buf->buffer));
7532 	}
7533 
7534 	cnt = ring_buffer_dropped_events_cpu(trace_buf->buffer, cpu);
7535 	trace_seq_printf(s, "dropped events: %ld\n", cnt);
7536 
7537 	cnt = ring_buffer_read_events_cpu(trace_buf->buffer, cpu);
7538 	trace_seq_printf(s, "read events: %ld\n", cnt);
7539 
7540 	count = simple_read_from_buffer(ubuf, count, ppos,
7541 					s->buffer, trace_seq_used(s));
7542 
7543 	kfree(s);
7544 
7545 	return count;
7546 }
7547 
7548 static const struct file_operations tracing_stats_fops = {
7549 	.open		= tracing_open_generic_tr,
7550 	.read		= tracing_stats_read,
7551 	.llseek		= generic_file_llseek,
7552 	.release	= tracing_release_generic_tr,
7553 };
7554 
7555 #ifdef CONFIG_DYNAMIC_FTRACE
7556 
7557 static ssize_t
7558 tracing_read_dyn_info(struct file *filp, char __user *ubuf,
7559 		  size_t cnt, loff_t *ppos)
7560 {
7561 	ssize_t ret;
7562 	char *buf;
7563 	int r;
7564 
7565 	/* 512 should be plenty to hold the amount needed */
7566 #define DYN_INFO_BUF_SIZE	512
7567 
7568 	buf = kmalloc(DYN_INFO_BUF_SIZE, GFP_KERNEL);
7569 	if (!buf)
7570 		return -ENOMEM;
7571 
7572 	r = scnprintf(buf, DYN_INFO_BUF_SIZE,
7573 		      "%ld pages:%ld groups: %ld\n"
7574 		      "ftrace boot update time = %llu (ns)\n"
7575 		      "ftrace module total update time = %llu (ns)\n",
7576 		      ftrace_update_tot_cnt,
7577 		      ftrace_number_of_pages,
7578 		      ftrace_number_of_groups,
7579 		      ftrace_update_time,
7580 		      ftrace_total_mod_time);
7581 
7582 	ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
7583 	kfree(buf);
7584 	return ret;
7585 }
7586 
7587 static const struct file_operations tracing_dyn_info_fops = {
7588 	.open		= tracing_open_generic,
7589 	.read		= tracing_read_dyn_info,
7590 	.llseek		= generic_file_llseek,
7591 };
7592 #endif /* CONFIG_DYNAMIC_FTRACE */
7593 
7594 static struct dentry *tracing_get_dentry(struct trace_array *tr)
7595 {
7596 	/* Top directory uses NULL as the parent */
7597 	if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
7598 		return NULL;
7599 
7600 	if (WARN_ON(!tr->dir))
7601 		return ERR_PTR(-ENODEV);
7602 
7603 	/* All sub buffers have a descriptor */
7604 	return tr->dir;
7605 }
7606 
7607 static struct dentry *tracing_dentry_percpu(struct trace_array *tr, int cpu)
7608 {
7609 	struct dentry *d_tracer;
7610 
7611 	if (tr->percpu_dir)
7612 		return tr->percpu_dir;
7613 
7614 	d_tracer = tracing_get_dentry(tr);
7615 	if (IS_ERR(d_tracer))
7616 		return NULL;
7617 
7618 	tr->percpu_dir = tracefs_create_dir("per_cpu", d_tracer);
7619 
7620 	MEM_FAIL(!tr->percpu_dir,
7621 		  "Could not create tracefs directory 'per_cpu/%d'\n", cpu);
7622 
7623 	return tr->percpu_dir;
7624 }
7625 
7626 struct dentry *
7627 trace_create_cpu_file(const char *name, umode_t mode, struct dentry *parent,
7628 		      void *data, long cpu, const struct file_operations *fops)
7629 {
7630 	struct dentry *ret = trace_create_file(name, mode, parent, data, fops);
7631 
7632 	if (ret) /* See tracing_get_cpu() */
7633 		d_inode(ret)->i_cdev = (void *)(cpu + 1);
7634 	return ret;
7635 }
7636 
7637 static void
7638 tracing_init_tracefs_percpu(struct trace_array *tr, long cpu)
7639 {
7640 	struct dentry *d_percpu = tracing_dentry_percpu(tr, cpu);
7641 	struct dentry *d_cpu;
7642 	char cpu_dir[30]; /* 30 characters should be more than enough */
7643 
7644 	if (!d_percpu)
7645 		return;
7646 
7647 	snprintf(cpu_dir, 30, "cpu%ld", cpu);
7648 	d_cpu = tracefs_create_dir(cpu_dir, d_percpu);
7649 	if (!d_cpu) {
7650 		pr_warn("Could not create tracefs '%s' entry\n", cpu_dir);
7651 		return;
7652 	}
7653 
7654 	/* per cpu trace_pipe */
7655 	trace_create_cpu_file("trace_pipe", TRACE_MODE_READ, d_cpu,
7656 				tr, cpu, &tracing_pipe_fops);
7657 
7658 	/* per cpu trace */
7659 	trace_create_cpu_file("trace", TRACE_MODE_WRITE, d_cpu,
7660 				tr, cpu, &tracing_fops);
7661 
7662 	trace_create_cpu_file("trace_pipe_raw", TRACE_MODE_READ, d_cpu,
7663 				tr, cpu, &tracing_buffers_fops);
7664 
7665 	trace_create_cpu_file("stats", TRACE_MODE_READ, d_cpu,
7666 				tr, cpu, &tracing_stats_fops);
7667 
7668 	trace_create_cpu_file("buffer_size_kb", TRACE_MODE_WRITE, d_cpu,
7669 				tr, cpu, &tracing_entries_fops);
7670 
7671 	if (tr->range_addr_start)
7672 		trace_create_cpu_file("buffer_meta", TRACE_MODE_READ, d_cpu,
7673 				      tr, cpu, &tracing_buffer_meta_fops);
7674 #ifdef CONFIG_TRACER_SNAPSHOT
7675 	if (!tr->range_addr_start) {
7676 		trace_create_cpu_file("snapshot", TRACE_MODE_WRITE, d_cpu,
7677 				      tr, cpu, &snapshot_fops);
7678 
7679 		trace_create_cpu_file("snapshot_raw", TRACE_MODE_READ, d_cpu,
7680 				      tr, cpu, &snapshot_raw_fops);
7681 	}
7682 #endif
7683 }
7684 
7685 #ifdef CONFIG_FTRACE_SELFTEST
7686 /* Let selftest have access to static functions in this file */
7687 #include "trace_selftest.c"
7688 #endif
7689 
7690 static ssize_t
7691 trace_options_read(struct file *filp, char __user *ubuf, size_t cnt,
7692 			loff_t *ppos)
7693 {
7694 	struct trace_option_dentry *topt = filp->private_data;
7695 	char *buf;
7696 
7697 	if (topt->flags->val & topt->opt->bit)
7698 		buf = "1\n";
7699 	else
7700 		buf = "0\n";
7701 
7702 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
7703 }
7704 
7705 static ssize_t
7706 trace_options_write(struct file *filp, const char __user *ubuf, size_t cnt,
7707 			 loff_t *ppos)
7708 {
7709 	struct trace_option_dentry *topt = filp->private_data;
7710 	unsigned long val;
7711 	int ret;
7712 
7713 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
7714 	if (ret)
7715 		return ret;
7716 
7717 	if (val != 0 && val != 1)
7718 		return -EINVAL;
7719 
7720 	if (!!(topt->flags->val & topt->opt->bit) != val) {
7721 		guard(mutex)(&trace_types_lock);
7722 		ret = __set_tracer_option(topt->tr, topt->flags,
7723 					  topt->opt, !val);
7724 		if (ret)
7725 			return ret;
7726 	}
7727 
7728 	*ppos += cnt;
7729 
7730 	return cnt;
7731 }
7732 
7733 static int tracing_open_options(struct inode *inode, struct file *filp)
7734 {
7735 	struct trace_option_dentry *topt = inode->i_private;
7736 	int ret;
7737 
7738 	ret = tracing_check_open_get_tr(topt->tr);
7739 	if (ret)
7740 		return ret;
7741 
7742 	filp->private_data = inode->i_private;
7743 	return 0;
7744 }
7745 
7746 static int tracing_release_options(struct inode *inode, struct file *file)
7747 {
7748 	struct trace_option_dentry *topt = file->private_data;
7749 
7750 	trace_array_put(topt->tr);
7751 	return 0;
7752 }
7753 
7754 static const struct file_operations trace_options_fops = {
7755 	.open = tracing_open_options,
7756 	.read = trace_options_read,
7757 	.write = trace_options_write,
7758 	.llseek	= generic_file_llseek,
7759 	.release = tracing_release_options,
7760 };
7761 
7762 /*
7763  * In order to pass in both the trace_array descriptor as well as the index
7764  * to the flag that the trace option file represents, the trace_array
7765  * has a character array of trace_flags_index[], which holds the index
7766  * of the bit for the flag it represents. index[0] == 0, index[1] == 1, etc.
7767  * The address of this character array is passed to the flag option file
7768  * read/write callbacks.
7769  *
7770  * In order to extract both the index and the trace_array descriptor,
7771  * get_tr_index() uses the following algorithm.
7772  *
7773  *   idx = *ptr;
7774  *
7775  * As the pointer itself contains the address of the index (remember
7776  * index[1] == 1).
7777  *
7778  * Then to get the trace_array descriptor, by subtracting that index
7779  * from the ptr, we get to the start of the index itself.
7780  *
7781  *   ptr - idx == &index[0]
7782  *
7783  * Then a simple container_of() from that pointer gets us to the
7784  * trace_array descriptor.
7785  */
7786 static void get_tr_index(void *data, struct trace_array **ptr,
7787 			 unsigned int *pindex)
7788 {
7789 	*pindex = *(unsigned char *)data;
7790 
7791 	*ptr = container_of(data - *pindex, struct trace_array,
7792 			    trace_flags_index);
7793 }
7794 
7795 static ssize_t
7796 trace_options_core_read(struct file *filp, char __user *ubuf, size_t cnt,
7797 			loff_t *ppos)
7798 {
7799 	void *tr_index = filp->private_data;
7800 	struct trace_array *tr;
7801 	unsigned int index;
7802 	char *buf;
7803 
7804 	get_tr_index(tr_index, &tr, &index);
7805 
7806 	if (tr->trace_flags & (1ULL << index))
7807 		buf = "1\n";
7808 	else
7809 		buf = "0\n";
7810 
7811 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
7812 }
7813 
7814 static ssize_t
7815 trace_options_core_write(struct file *filp, const char __user *ubuf, size_t cnt,
7816 			 loff_t *ppos)
7817 {
7818 	void *tr_index = filp->private_data;
7819 	struct trace_array *tr;
7820 	unsigned int index;
7821 	unsigned long val;
7822 	int ret;
7823 
7824 	get_tr_index(tr_index, &tr, &index);
7825 
7826 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
7827 	if (ret)
7828 		return ret;
7829 
7830 	if (val != 0 && val != 1)
7831 		return -EINVAL;
7832 
7833 	mutex_lock(&event_mutex);
7834 	mutex_lock(&trace_types_lock);
7835 	ret = set_tracer_flag(tr, 1ULL << index, val);
7836 	mutex_unlock(&trace_types_lock);
7837 	mutex_unlock(&event_mutex);
7838 
7839 	if (ret < 0)
7840 		return ret;
7841 
7842 	*ppos += cnt;
7843 
7844 	return cnt;
7845 }
7846 
7847 static const struct file_operations trace_options_core_fops = {
7848 	.open = tracing_open_generic,
7849 	.read = trace_options_core_read,
7850 	.write = trace_options_core_write,
7851 	.llseek = generic_file_llseek,
7852 };
7853 
7854 struct dentry *trace_create_file(const char *name,
7855 				 umode_t mode,
7856 				 struct dentry *parent,
7857 				 void *data,
7858 				 const struct file_operations *fops)
7859 {
7860 	struct dentry *ret;
7861 
7862 	ret = tracefs_create_file(name, mode, parent, data, fops);
7863 	if (!ret)
7864 		pr_warn("Could not create tracefs '%s' entry\n", name);
7865 
7866 	return ret;
7867 }
7868 
7869 
7870 static struct dentry *trace_options_init_dentry(struct trace_array *tr)
7871 {
7872 	struct dentry *d_tracer;
7873 
7874 	if (tr->options)
7875 		return tr->options;
7876 
7877 	d_tracer = tracing_get_dentry(tr);
7878 	if (IS_ERR(d_tracer))
7879 		return NULL;
7880 
7881 	tr->options = tracefs_create_dir("options", d_tracer);
7882 	if (!tr->options) {
7883 		pr_warn("Could not create tracefs directory 'options'\n");
7884 		return NULL;
7885 	}
7886 
7887 	return tr->options;
7888 }
7889 
7890 static void
7891 create_trace_option_file(struct trace_array *tr,
7892 			 struct trace_option_dentry *topt,
7893 			 struct tracer_flags *flags,
7894 			 struct tracer_opt *opt)
7895 {
7896 	struct dentry *t_options;
7897 
7898 	t_options = trace_options_init_dentry(tr);
7899 	if (!t_options)
7900 		return;
7901 
7902 	topt->flags = flags;
7903 	topt->opt = opt;
7904 	topt->tr = tr;
7905 
7906 	topt->entry = trace_create_file(opt->name, TRACE_MODE_WRITE,
7907 					t_options, topt, &trace_options_fops);
7908 }
7909 
7910 static int
7911 create_trace_option_files(struct trace_array *tr, struct tracer *tracer,
7912 			  struct tracer_flags *flags)
7913 {
7914 	struct trace_option_dentry *topts;
7915 	struct trace_options *tr_topts;
7916 	struct tracer_opt *opts;
7917 	int cnt;
7918 
7919 	if (!flags || !flags->opts)
7920 		return 0;
7921 
7922 	opts = flags->opts;
7923 
7924 	for (cnt = 0; opts[cnt].name; cnt++)
7925 		;
7926 
7927 	topts = kzalloc_objs(*topts, cnt + 1);
7928 	if (!topts)
7929 		return 0;
7930 
7931 	tr_topts = krealloc(tr->topts, sizeof(*tr->topts) * (tr->nr_topts + 1),
7932 			    GFP_KERNEL);
7933 	if (!tr_topts) {
7934 		kfree(topts);
7935 		return -ENOMEM;
7936 	}
7937 
7938 	tr->topts = tr_topts;
7939 	tr->topts[tr->nr_topts].tracer = tracer;
7940 	tr->topts[tr->nr_topts].topts = topts;
7941 	tr->nr_topts++;
7942 
7943 	for (cnt = 0; opts[cnt].name; cnt++) {
7944 		create_trace_option_file(tr, &topts[cnt], flags,
7945 					 &opts[cnt]);
7946 		MEM_FAIL(topts[cnt].entry == NULL,
7947 			  "Failed to create trace option: %s",
7948 			  opts[cnt].name);
7949 	}
7950 	return 0;
7951 }
7952 
7953 static int get_global_flags_val(struct tracer *tracer)
7954 {
7955 	struct tracers *t;
7956 
7957 	list_for_each_entry(t, &global_trace.tracers, list) {
7958 		if (t->tracer != tracer)
7959 			continue;
7960 		if (!t->flags)
7961 			return -1;
7962 		return t->flags->val;
7963 	}
7964 	return -1;
7965 }
7966 
7967 static int add_tracer_options(struct trace_array *tr, struct tracers *t)
7968 {
7969 	struct tracer *tracer = t->tracer;
7970 	struct tracer_flags *flags = t->flags ?: tracer->flags;
7971 
7972 	if (!flags)
7973 		return 0;
7974 
7975 	/* Only add tracer options after update_tracer_options finish */
7976 	if (!tracer_options_updated)
7977 		return 0;
7978 
7979 	return create_trace_option_files(tr, tracer, flags);
7980 }
7981 
7982 static int add_tracer(struct trace_array *tr, struct tracer *tracer)
7983 {
7984 	struct tracer_flags *flags;
7985 	struct tracers *t;
7986 	int ret;
7987 
7988 	/* Only enable if the directory has been created already. */
7989 	if (!tr->dir && !(tr->flags & TRACE_ARRAY_FL_GLOBAL))
7990 		return 0;
7991 
7992 	/*
7993 	 * If this is an instance, only create flags for tracers
7994 	 * the instance may have.
7995 	 */
7996 	if (!trace_ok_for_array(tracer, tr))
7997 		return 0;
7998 
7999 	t = kmalloc_obj(*t);
8000 	if (!t)
8001 		return -ENOMEM;
8002 
8003 	t->tracer = tracer;
8004 	t->flags = NULL;
8005 	list_add(&t->list, &tr->tracers);
8006 
8007 	flags = tracer->flags;
8008 	if (!flags) {
8009 		if (!tracer->default_flags)
8010 			return 0;
8011 
8012 		/*
8013 		 * If the tracer defines default flags, it means the flags are
8014 		 * per trace instance.
8015 		 */
8016 		flags = kmalloc_obj(*flags);
8017 		if (!flags)
8018 			return -ENOMEM;
8019 
8020 		*flags = *tracer->default_flags;
8021 		flags->trace = tracer;
8022 
8023 		t->flags = flags;
8024 
8025 		/* If this is an instance, inherit the global_trace flags */
8026 		if (!(tr->flags & TRACE_ARRAY_FL_GLOBAL)) {
8027 			int val = get_global_flags_val(tracer);
8028 			if (!WARN_ON_ONCE(val < 0))
8029 				flags->val = val;
8030 		}
8031 	}
8032 
8033 	ret = add_tracer_options(tr, t);
8034 	if (ret < 0) {
8035 		list_del(&t->list);
8036 		kfree(t->flags);
8037 		kfree(t);
8038 	}
8039 
8040 	return ret;
8041 }
8042 
8043 static struct dentry *
8044 create_trace_option_core_file(struct trace_array *tr,
8045 			      const char *option, long index)
8046 {
8047 	struct dentry *t_options;
8048 
8049 	t_options = trace_options_init_dentry(tr);
8050 	if (!t_options)
8051 		return NULL;
8052 
8053 	return trace_create_file(option, TRACE_MODE_WRITE, t_options,
8054 				 (void *)&tr->trace_flags_index[index],
8055 				 &trace_options_core_fops);
8056 }
8057 
8058 static void create_trace_options_dir(struct trace_array *tr)
8059 {
8060 	struct dentry *t_options;
8061 	bool top_level = tr == &global_trace;
8062 	int i;
8063 
8064 	t_options = trace_options_init_dentry(tr);
8065 	if (!t_options)
8066 		return;
8067 
8068 	for (i = 0; trace_options[i]; i++) {
8069 		if (top_level ||
8070 		    !((1ULL << i) & TOP_LEVEL_TRACE_FLAGS)) {
8071 			create_trace_option_core_file(tr, trace_options[i], i);
8072 		}
8073 	}
8074 }
8075 
8076 static ssize_t
8077 rb_simple_read(struct file *filp, char __user *ubuf,
8078 	       size_t cnt, loff_t *ppos)
8079 {
8080 	struct trace_array *tr = filp->private_data;
8081 	char buf[64];
8082 	int r;
8083 
8084 	r = tracer_tracing_is_on(tr);
8085 	r = sprintf(buf, "%d\n", r);
8086 
8087 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
8088 }
8089 
8090 static ssize_t
8091 rb_simple_write(struct file *filp, const char __user *ubuf,
8092 		size_t cnt, loff_t *ppos)
8093 {
8094 	struct trace_array *tr = filp->private_data;
8095 	struct trace_buffer *buffer = tr->array_buffer.buffer;
8096 	unsigned long val;
8097 	int ret;
8098 
8099 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
8100 	if (ret)
8101 		return ret;
8102 
8103 	if (buffer) {
8104 		guard(mutex)(&trace_types_lock);
8105 		if (!!val == tracer_tracing_is_on(tr)) {
8106 			val = 0; /* do nothing */
8107 		} else if (val) {
8108 			tracer_tracing_on(tr);
8109 			if (tr->current_trace->start)
8110 				tr->current_trace->start(tr);
8111 		} else {
8112 			tracer_tracing_off(tr);
8113 			if (tr->current_trace->stop)
8114 				tr->current_trace->stop(tr);
8115 			/* Wake up any waiters */
8116 			ring_buffer_wake_waiters(buffer, RING_BUFFER_ALL_CPUS);
8117 		}
8118 	}
8119 
8120 	(*ppos)++;
8121 
8122 	return cnt;
8123 }
8124 
8125 static const struct file_operations rb_simple_fops = {
8126 	.open		= tracing_open_generic_tr,
8127 	.read		= rb_simple_read,
8128 	.write		= rb_simple_write,
8129 	.release	= tracing_release_generic_tr,
8130 	.llseek		= default_llseek,
8131 };
8132 
8133 static ssize_t
8134 buffer_percent_read(struct file *filp, char __user *ubuf,
8135 		    size_t cnt, loff_t *ppos)
8136 {
8137 	struct trace_array *tr = filp->private_data;
8138 	char buf[64];
8139 	int r;
8140 
8141 	r = tr->buffer_percent;
8142 	r = sprintf(buf, "%d\n", r);
8143 
8144 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
8145 }
8146 
8147 static ssize_t
8148 buffer_percent_write(struct file *filp, const char __user *ubuf,
8149 		     size_t cnt, loff_t *ppos)
8150 {
8151 	struct trace_array *tr = filp->private_data;
8152 	unsigned long val;
8153 	int ret;
8154 
8155 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
8156 	if (ret)
8157 		return ret;
8158 
8159 	if (val > 100)
8160 		return -EINVAL;
8161 
8162 	tr->buffer_percent = val;
8163 
8164 	(*ppos)++;
8165 
8166 	return cnt;
8167 }
8168 
8169 static const struct file_operations buffer_percent_fops = {
8170 	.open		= tracing_open_generic_tr,
8171 	.read		= buffer_percent_read,
8172 	.write		= buffer_percent_write,
8173 	.release	= tracing_release_generic_tr,
8174 	.llseek		= default_llseek,
8175 };
8176 
8177 static ssize_t
8178 buffer_subbuf_size_read(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos)
8179 {
8180 	struct trace_array *tr = filp->private_data;
8181 	size_t size;
8182 	char buf[64];
8183 	int order;
8184 	int r;
8185 
8186 	order = ring_buffer_subbuf_order_get(tr->array_buffer.buffer);
8187 	size = (PAGE_SIZE << order) / 1024;
8188 
8189 	r = sprintf(buf, "%zd\n", size);
8190 
8191 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
8192 }
8193 
8194 static ssize_t
8195 buffer_subbuf_size_write(struct file *filp, const char __user *ubuf,
8196 			 size_t cnt, loff_t *ppos)
8197 {
8198 	struct trace_array *tr = filp->private_data;
8199 	unsigned long val;
8200 	int old_order;
8201 	int order;
8202 	int pages;
8203 	int ret;
8204 
8205 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
8206 	if (ret)
8207 		return ret;
8208 
8209 	val *= 1024; /* value passed in is in KB */
8210 
8211 	pages = DIV_ROUND_UP(val, PAGE_SIZE);
8212 	order = fls(pages - 1);
8213 
8214 	/* limit between 1 and 128 system pages */
8215 	if (order < 0 || order > 7)
8216 		return -EINVAL;
8217 
8218 	/* Do not allow tracing while changing the order of the ring buffer */
8219 	tracing_stop_tr(tr);
8220 
8221 	old_order = ring_buffer_subbuf_order_get(tr->array_buffer.buffer);
8222 	if (old_order == order)
8223 		goto out;
8224 
8225 	ret = ring_buffer_subbuf_order_set(tr->array_buffer.buffer, order);
8226 	if (ret)
8227 		goto out;
8228 
8229 #ifdef CONFIG_TRACER_SNAPSHOT
8230 
8231 	if (!tr->allocated_snapshot)
8232 		goto out_max;
8233 
8234 	ret = ring_buffer_subbuf_order_set(tr->snapshot_buffer.buffer, order);
8235 	if (ret) {
8236 		/* Put back the old order */
8237 		cnt = ring_buffer_subbuf_order_set(tr->array_buffer.buffer, old_order);
8238 		if (WARN_ON_ONCE(cnt)) {
8239 			/*
8240 			 * AARGH! We are left with different orders!
8241 			 * The max buffer is our "snapshot" buffer.
8242 			 * When a tracer needs a snapshot (one of the
8243 			 * latency tracers), it swaps the max buffer
8244 			 * with the saved snap shot. We succeeded to
8245 			 * update the order of the main buffer, but failed to
8246 			 * update the order of the max buffer. But when we tried
8247 			 * to reset the main buffer to the original size, we
8248 			 * failed there too. This is very unlikely to
8249 			 * happen, but if it does, warn and kill all
8250 			 * tracing.
8251 			 */
8252 			tracing_disabled = 1;
8253 		}
8254 		goto out;
8255 	}
8256  out_max:
8257 #endif
8258 	(*ppos)++;
8259  out:
8260 	if (ret)
8261 		cnt = ret;
8262 	tracing_start_tr(tr);
8263 	return cnt;
8264 }
8265 
8266 static const struct file_operations buffer_subbuf_size_fops = {
8267 	.open		= tracing_open_generic_tr,
8268 	.read		= buffer_subbuf_size_read,
8269 	.write		= buffer_subbuf_size_write,
8270 	.release	= tracing_release_generic_tr,
8271 	.llseek		= default_llseek,
8272 };
8273 
8274 static struct dentry *trace_instance_dir;
8275 
8276 static void
8277 init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer);
8278 
8279 #ifdef CONFIG_MODULES
8280 static int make_mod_delta(struct module *mod, void *data)
8281 {
8282 	struct trace_module_delta *module_delta;
8283 	struct trace_scratch *tscratch;
8284 	struct trace_mod_entry *entry;
8285 	struct trace_array *tr = data;
8286 	int i;
8287 
8288 	tscratch = tr->scratch;
8289 	module_delta = READ_ONCE(tr->module_delta);
8290 	for (i = 0; i < tscratch->nr_entries; i++) {
8291 		entry = &tscratch->entries[i];
8292 		if (strcmp(mod->name, entry->mod_name))
8293 			continue;
8294 		if (mod->state == MODULE_STATE_GOING)
8295 			module_delta->delta[i] = 0;
8296 		else
8297 			module_delta->delta[i] = (unsigned long)mod->mem[MOD_TEXT].base
8298 						 - entry->mod_addr;
8299 		break;
8300 	}
8301 	return 0;
8302 }
8303 #else
8304 static int make_mod_delta(struct module *mod, void *data)
8305 {
8306 	return 0;
8307 }
8308 #endif
8309 
8310 static int mod_addr_comp(const void *a, const void *b, const void *data)
8311 {
8312 	const struct trace_mod_entry *e1 = a;
8313 	const struct trace_mod_entry *e2 = b;
8314 
8315 	return e1->mod_addr > e2->mod_addr ? 1 : -1;
8316 }
8317 
8318 static void setup_trace_scratch(struct trace_array *tr,
8319 				struct trace_scratch *tscratch, unsigned int size)
8320 {
8321 	struct trace_module_delta *module_delta;
8322 	struct trace_mod_entry *entry;
8323 	int i, nr_entries;
8324 
8325 	if (!tscratch)
8326 		return;
8327 
8328 	tr->scratch = tscratch;
8329 	tr->scratch_size = size;
8330 
8331 	if (tscratch->text_addr)
8332 		tr->text_delta = (unsigned long)_text - tscratch->text_addr;
8333 
8334 	if (struct_size(tscratch, entries, tscratch->nr_entries) > size)
8335 		goto reset;
8336 
8337 	/* Check if each module name is a valid string */
8338 	for (i = 0; i < tscratch->nr_entries; i++) {
8339 		int n;
8340 
8341 		entry = &tscratch->entries[i];
8342 
8343 		for (n = 0; n < MODULE_NAME_LEN; n++) {
8344 			if (entry->mod_name[n] == '\0')
8345 				break;
8346 			if (!isprint(entry->mod_name[n]))
8347 				goto reset;
8348 		}
8349 		if (n == MODULE_NAME_LEN)
8350 			goto reset;
8351 	}
8352 
8353 	/* Sort the entries so that we can find appropriate module from address. */
8354 	nr_entries = tscratch->nr_entries;
8355 	sort_r(tscratch->entries, nr_entries, sizeof(struct trace_mod_entry),
8356 	       mod_addr_comp, NULL, NULL);
8357 
8358 	if (IS_ENABLED(CONFIG_MODULES)) {
8359 		module_delta = kzalloc_flex(*module_delta, delta, nr_entries);
8360 		if (!module_delta) {
8361 			pr_info("module_delta allocation failed. Not able to decode module address.");
8362 			goto reset;
8363 		}
8364 		init_rcu_head(&module_delta->rcu);
8365 	} else
8366 		module_delta = NULL;
8367 	WRITE_ONCE(tr->module_delta, module_delta);
8368 
8369 	/* Scan modules to make text delta for modules. */
8370 	module_for_each_mod(make_mod_delta, tr);
8371 
8372 	/* Set trace_clock as the same of the previous boot. */
8373 	if (tscratch->clock_id != tr->clock_id) {
8374 		if (tscratch->clock_id >= ARRAY_SIZE(trace_clocks) ||
8375 		    tracing_set_clock(tr, trace_clocks[tscratch->clock_id].name) < 0) {
8376 			pr_info("the previous trace_clock info is not valid.");
8377 			goto reset;
8378 		}
8379 	}
8380 	return;
8381  reset:
8382 	/* Invalid trace modules */
8383 	memset(tscratch, 0, size);
8384 }
8385 
8386 int allocate_trace_buffer(struct trace_array *tr, struct array_buffer *buf, int size)
8387 {
8388 	enum ring_buffer_flags rb_flags;
8389 	struct trace_scratch *tscratch;
8390 	unsigned int scratch_size = 0;
8391 
8392 	rb_flags = tr->trace_flags & TRACE_ITER(OVERWRITE) ? RB_FL_OVERWRITE : 0;
8393 
8394 	buf->tr = tr;
8395 
8396 	if (tr->range_addr_start && tr->range_addr_size) {
8397 		/* Add scratch buffer to handle 128 modules */
8398 		buf->buffer = ring_buffer_alloc_range(size, rb_flags, 0,
8399 						      tr->range_addr_start,
8400 						      tr->range_addr_size,
8401 						      struct_size(tscratch, entries, 128));
8402 
8403 		tscratch = ring_buffer_meta_scratch(buf->buffer, &scratch_size);
8404 		setup_trace_scratch(tr, tscratch, scratch_size);
8405 
8406 		/*
8407 		 * This is basically the same as a mapped buffer,
8408 		 * with the same restrictions.
8409 		 */
8410 		tr->mapped++;
8411 	} else {
8412 		buf->buffer = ring_buffer_alloc(size, rb_flags);
8413 	}
8414 	if (!buf->buffer)
8415 		return -ENOMEM;
8416 
8417 	buf->data = alloc_percpu(struct trace_array_cpu);
8418 	if (!buf->data) {
8419 		ring_buffer_free(buf->buffer);
8420 		buf->buffer = NULL;
8421 		return -ENOMEM;
8422 	}
8423 
8424 	/* Allocate the first page for all buffers */
8425 	trace_set_buffer_entries(&tr->array_buffer,
8426 				 ring_buffer_size(tr->array_buffer.buffer, 0));
8427 
8428 	return 0;
8429 }
8430 
8431 static void free_trace_buffer(struct array_buffer *buf)
8432 {
8433 	if (buf->buffer) {
8434 		ring_buffer_free(buf->buffer);
8435 		buf->buffer = NULL;
8436 		free_percpu(buf->data);
8437 		buf->data = NULL;
8438 	}
8439 }
8440 
8441 static int allocate_trace_buffers(struct trace_array *tr, unsigned long size)
8442 {
8443 	int ret;
8444 
8445 	ret = allocate_trace_buffer(tr, &tr->array_buffer, size);
8446 	if (ret)
8447 		return ret;
8448 
8449 	ret = trace_allocate_snapshot(tr, size);
8450 	if (MEM_FAIL(ret, "Failed to allocate trace buffer\n"))
8451 		free_trace_buffer(&tr->array_buffer);
8452 
8453 	return ret;
8454 }
8455 
8456 static void free_trace_buffers(struct trace_array *tr)
8457 {
8458 	if (!tr)
8459 		return;
8460 
8461 	free_trace_buffer(&tr->array_buffer);
8462 	kfree(tr->module_delta);
8463 
8464 #ifdef CONFIG_TRACER_SNAPSHOT
8465 	free_trace_buffer(&tr->snapshot_buffer);
8466 #endif
8467 }
8468 
8469 static void init_trace_flags_index(struct trace_array *tr)
8470 {
8471 	int i;
8472 
8473 	/* Used by the trace options files */
8474 	for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++)
8475 		tr->trace_flags_index[i] = i;
8476 }
8477 
8478 static int __update_tracer(struct trace_array *tr)
8479 {
8480 	struct tracer *t;
8481 	int ret = 0;
8482 
8483 	for (t = trace_types; t && !ret; t = t->next)
8484 		ret = add_tracer(tr, t);
8485 
8486 	return ret;
8487 }
8488 
8489 static __init int __update_tracer_options(struct trace_array *tr)
8490 {
8491 	struct tracers *t;
8492 	int ret = 0;
8493 
8494 	list_for_each_entry(t, &tr->tracers, list) {
8495 		ret = add_tracer_options(tr, t);
8496 		if (ret < 0)
8497 			break;
8498 	}
8499 
8500 	return ret;
8501 }
8502 
8503 static __init void update_tracer_options(void)
8504 {
8505 	struct trace_array *tr;
8506 
8507 	guard(mutex)(&trace_types_lock);
8508 	tracer_options_updated = true;
8509 	list_for_each_entry(tr, &ftrace_trace_arrays, list)
8510 		__update_tracer_options(tr);
8511 }
8512 
8513 /* Must have trace_types_lock held */
8514 struct trace_array *trace_array_find(const char *instance)
8515 {
8516 	struct trace_array *tr, *found = NULL;
8517 
8518 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
8519 		if (tr->name && strcmp(tr->name, instance) == 0) {
8520 			found = tr;
8521 			break;
8522 		}
8523 	}
8524 
8525 	return found;
8526 }
8527 
8528 struct trace_array *trace_array_find_get(const char *instance)
8529 {
8530 	struct trace_array *tr;
8531 
8532 	guard(mutex)(&trace_types_lock);
8533 	tr = trace_array_find(instance);
8534 	if (tr && __trace_array_get(tr) < 0)
8535 		tr = NULL;
8536 
8537 	return tr;
8538 }
8539 
8540 static int trace_array_create_dir(struct trace_array *tr)
8541 {
8542 	int ret;
8543 
8544 	tr->dir = tracefs_create_dir(tr->name, trace_instance_dir);
8545 	if (!tr->dir)
8546 		return -EINVAL;
8547 
8548 	ret = event_trace_add_tracer(tr->dir, tr);
8549 	if (ret) {
8550 		tracefs_remove(tr->dir);
8551 		return ret;
8552 	}
8553 
8554 	init_tracer_tracefs(tr, tr->dir);
8555 	ret = __update_tracer(tr);
8556 	if (ret) {
8557 		event_trace_del_tracer(tr);
8558 		tracefs_remove(tr->dir);
8559 		return ret;
8560 	}
8561 	return 0;
8562 }
8563 
8564 static struct trace_array *
8565 trace_array_create_systems(const char *name, const char *systems,
8566 			   unsigned long range_addr_start,
8567 			   unsigned long range_addr_size)
8568 {
8569 	struct trace_array *tr;
8570 	int ret;
8571 
8572 	ret = -ENOMEM;
8573 	tr = kzalloc_obj(*tr);
8574 	if (!tr)
8575 		return ERR_PTR(ret);
8576 
8577 	tr->name = kstrdup(name, GFP_KERNEL);
8578 	if (!tr->name)
8579 		goto out_free_tr;
8580 
8581 	if (!alloc_cpumask_var(&tr->tracing_cpumask, GFP_KERNEL))
8582 		goto out_free_tr;
8583 
8584 	if (!zalloc_cpumask_var(&tr->pipe_cpumask, GFP_KERNEL))
8585 		goto out_free_tr;
8586 
8587 	if (systems) {
8588 		tr->system_names = kstrdup_const(systems, GFP_KERNEL);
8589 		if (!tr->system_names)
8590 			goto out_free_tr;
8591 	}
8592 
8593 	/* Only for boot up memory mapped ring buffers */
8594 	tr->range_addr_start = range_addr_start;
8595 	tr->range_addr_size = range_addr_size;
8596 
8597 	tr->trace_flags = global_trace.trace_flags & ~ZEROED_TRACE_FLAGS;
8598 
8599 	cpumask_copy(tr->tracing_cpumask, cpu_all_mask);
8600 
8601 	raw_spin_lock_init(&tr->start_lock);
8602 
8603 	tr->syscall_buf_sz = global_trace.syscall_buf_sz;
8604 
8605 	tr->max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
8606 #ifdef CONFIG_TRACER_SNAPSHOT
8607 	spin_lock_init(&tr->snapshot_trigger_lock);
8608 #endif
8609 	tr->current_trace = &nop_trace;
8610 	tr->current_trace_flags = nop_trace.flags;
8611 
8612 	INIT_LIST_HEAD(&tr->systems);
8613 	INIT_LIST_HEAD(&tr->events);
8614 	INIT_LIST_HEAD(&tr->hist_vars);
8615 	INIT_LIST_HEAD(&tr->err_log);
8616 	INIT_LIST_HEAD(&tr->tracers);
8617 	INIT_LIST_HEAD(&tr->marker_list);
8618 
8619 #ifdef CONFIG_MODULES
8620 	INIT_LIST_HEAD(&tr->mod_events);
8621 #endif
8622 
8623 	if (allocate_trace_buffers(tr, trace_buf_size) < 0)
8624 		goto out_free_tr;
8625 
8626 	/* The ring buffer is defaultly expanded */
8627 	trace_set_ring_buffer_expanded(tr);
8628 
8629 	if (ftrace_allocate_ftrace_ops(tr) < 0)
8630 		goto out_free_tr;
8631 
8632 	trace_array_init_autoremove(tr);
8633 
8634 	ftrace_init_trace_array(tr);
8635 
8636 	init_trace_flags_index(tr);
8637 
8638 	if (trace_instance_dir) {
8639 		ret = trace_array_create_dir(tr);
8640 		if (ret)
8641 			goto out_free_tr;
8642 	} else
8643 		__trace_early_add_events(tr);
8644 
8645 	list_add(&tr->list, &ftrace_trace_arrays);
8646 
8647 	tr->ref++;
8648 
8649 	return tr;
8650 
8651  out_free_tr:
8652 	ftrace_free_ftrace_ops(tr);
8653 	free_trace_buffers(tr);
8654 	free_cpumask_var(tr->pipe_cpumask);
8655 	free_cpumask_var(tr->tracing_cpumask);
8656 	kfree_const(tr->system_names);
8657 	kfree(tr->range_name);
8658 	kfree(tr->name);
8659 	kfree(tr);
8660 
8661 	return ERR_PTR(ret);
8662 }
8663 
8664 static struct trace_array *trace_array_create(const char *name)
8665 {
8666 	return trace_array_create_systems(name, NULL, 0, 0);
8667 }
8668 
8669 static int instance_mkdir(const char *name)
8670 {
8671 	struct trace_array *tr;
8672 	int ret;
8673 
8674 	guard(mutex)(&event_mutex);
8675 	guard(mutex)(&trace_types_lock);
8676 
8677 	ret = -EEXIST;
8678 	if (trace_array_find(name))
8679 		return -EEXIST;
8680 
8681 	tr = trace_array_create(name);
8682 
8683 	ret = PTR_ERR_OR_ZERO(tr);
8684 
8685 	return ret;
8686 }
8687 
8688 #ifdef CONFIG_MMU
8689 static u64 map_pages(unsigned long start, unsigned long size)
8690 {
8691 	unsigned long vmap_start, vmap_end;
8692 	struct vm_struct *area;
8693 	int ret;
8694 
8695 	area = get_vm_area(size, VM_IOREMAP);
8696 	if (!area)
8697 		return 0;
8698 
8699 	vmap_start = (unsigned long) area->addr;
8700 	vmap_end = vmap_start + size;
8701 
8702 	ret = vmap_page_range(vmap_start, vmap_end,
8703 			      start, pgprot_nx(PAGE_KERNEL));
8704 	if (ret < 0) {
8705 		free_vm_area(area);
8706 		return 0;
8707 	}
8708 
8709 	return (u64)vmap_start;
8710 }
8711 #else
8712 static inline u64 map_pages(unsigned long start, unsigned long size)
8713 {
8714 	return 0;
8715 }
8716 #endif
8717 
8718 /**
8719  * trace_array_get_by_name - Create/Lookup a trace array, given its name.
8720  * @name: The name of the trace array to be looked up/created.
8721  * @systems: A list of systems to create event directories for (NULL for all)
8722  *
8723  * Returns pointer to trace array with given name.
8724  * NULL, if it cannot be created.
8725  *
8726  * NOTE: This function increments the reference counter associated with the
8727  * trace array returned. This makes sure it cannot be freed while in use.
8728  * Use trace_array_put() once the trace array is no longer needed.
8729  * If the trace_array is to be freed, trace_array_destroy() needs to
8730  * be called after the trace_array_put(), or simply let user space delete
8731  * it from the tracefs instances directory. But until the
8732  * trace_array_put() is called, user space can not delete it.
8733  *
8734  */
8735 struct trace_array *trace_array_get_by_name(const char *name, const char *systems)
8736 {
8737 	struct trace_array *tr;
8738 
8739 	guard(mutex)(&event_mutex);
8740 	guard(mutex)(&trace_types_lock);
8741 
8742 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
8743 		if (tr->name && strcmp(tr->name, name) == 0) {
8744 			/* if this fails, @tr is going to be removed. */
8745 			if (__trace_array_get(tr) < 0)
8746 				tr = NULL;
8747 			return tr;
8748 		}
8749 	}
8750 
8751 	tr = trace_array_create_systems(name, systems, 0, 0);
8752 
8753 	if (IS_ERR(tr))
8754 		tr = NULL;
8755 	else
8756 		tr->ref++;
8757 
8758 	return tr;
8759 }
8760 EXPORT_SYMBOL_GPL(trace_array_get_by_name);
8761 
8762 static int __remove_instance(struct trace_array *tr)
8763 {
8764 	int i;
8765 
8766 	/* Reference counter for a newly created trace array = 1. */
8767 	if (tr->ref > 1 || (tr->current_trace && tr->trace_ref))
8768 		return -EBUSY;
8769 
8770 	list_del(&tr->list);
8771 
8772 	if (printk_trace == tr)
8773 		update_printk_trace(&global_trace);
8774 
8775 	/* Must be done before disabling all the flags */
8776 	if (update_marker_trace(tr, 0))
8777 		synchronize_rcu();
8778 
8779 	/* Disable all the flags that were enabled coming in */
8780 	for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++) {
8781 		if ((1ULL << i) & ZEROED_TRACE_FLAGS)
8782 			set_tracer_flag(tr, 1ULL << i, 0);
8783 	}
8784 
8785 	trace_array_cancel_autoremove(tr);
8786 	tracing_set_nop(tr);
8787 	clear_ftrace_function_probes(tr);
8788 	event_trace_del_tracer(tr);
8789 	ftrace_clear_pids(tr);
8790 	ftrace_destroy_function_files(tr);
8791 	tracefs_remove(tr->dir);
8792 	free_percpu(tr->last_func_repeats);
8793 	free_trace_buffers(tr);
8794 	clear_tracing_err_log(tr);
8795 	free_tracers(tr);
8796 
8797 	if (tr->range_name) {
8798 		reserve_mem_release_by_name(tr->range_name);
8799 		kfree(tr->range_name);
8800 	}
8801 	if (tr->flags & TRACE_ARRAY_FL_VMALLOC)
8802 		vfree((void *)tr->range_addr_start);
8803 
8804 	for (i = 0; i < tr->nr_topts; i++) {
8805 		kfree(tr->topts[i].topts);
8806 	}
8807 	kfree(tr->topts);
8808 
8809 	free_cpumask_var(tr->pipe_cpumask);
8810 	free_cpumask_var(tr->tracing_cpumask);
8811 	kfree_const(tr->system_names);
8812 	kfree(tr->name);
8813 	kfree(tr);
8814 
8815 	return 0;
8816 }
8817 
8818 int trace_array_destroy(struct trace_array *this_tr)
8819 {
8820 	struct trace_array *tr;
8821 
8822 	if (!this_tr)
8823 		return -EINVAL;
8824 
8825 	guard(mutex)(&event_mutex);
8826 	guard(mutex)(&trace_types_lock);
8827 
8828 
8829 	/* Making sure trace array exists before destroying it. */
8830 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
8831 		if (tr == this_tr)
8832 			return __remove_instance(tr);
8833 	}
8834 
8835 	return -ENODEV;
8836 }
8837 EXPORT_SYMBOL_GPL(trace_array_destroy);
8838 
8839 static int instance_rmdir(const char *name)
8840 {
8841 	struct trace_array *tr;
8842 
8843 	guard(mutex)(&event_mutex);
8844 	guard(mutex)(&trace_types_lock);
8845 
8846 	tr = trace_array_find(name);
8847 	if (!tr)
8848 		return -ENODEV;
8849 
8850 	return __remove_instance(tr);
8851 }
8852 
8853 static __init void create_trace_instances(struct dentry *d_tracer)
8854 {
8855 	struct trace_array *tr;
8856 
8857 	trace_instance_dir = tracefs_create_instance_dir("instances", d_tracer,
8858 							 instance_mkdir,
8859 							 instance_rmdir);
8860 	if (MEM_FAIL(!trace_instance_dir, "Failed to create instances directory\n"))
8861 		return;
8862 
8863 	guard(mutex)(&event_mutex);
8864 	guard(mutex)(&trace_types_lock);
8865 
8866 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
8867 		if (!tr->name)
8868 			continue;
8869 		if (MEM_FAIL(trace_array_create_dir(tr) < 0,
8870 			     "Failed to create instance directory\n"))
8871 			return;
8872 	}
8873 }
8874 
8875 static void
8876 init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer)
8877 {
8878 	umode_t writable_mode = TRACE_MODE_WRITE;
8879 	int cpu;
8880 
8881 	if (trace_array_is_readonly(tr))
8882 		writable_mode = TRACE_MODE_READ;
8883 
8884 	trace_create_file("available_tracers", TRACE_MODE_READ, d_tracer,
8885 			  tr, &show_traces_fops);
8886 
8887 	trace_create_file("current_tracer", writable_mode, d_tracer,
8888 			  tr, &set_tracer_fops);
8889 
8890 	trace_create_file("tracing_cpumask", writable_mode, d_tracer,
8891 			  tr, &tracing_cpumask_fops);
8892 
8893 	/* Options are used for changing print-format even for readonly instance. */
8894 	trace_create_file("trace_options", TRACE_MODE_WRITE, d_tracer,
8895 			  tr, &tracing_iter_fops);
8896 
8897 	trace_create_file("trace", TRACE_MODE_WRITE, d_tracer,
8898 			  tr, &tracing_fops);
8899 
8900 	trace_create_file("trace_pipe", TRACE_MODE_READ, d_tracer,
8901 			  tr, &tracing_pipe_fops);
8902 
8903 	trace_create_file("buffer_size_kb", writable_mode, d_tracer,
8904 			  tr, &tracing_entries_fops);
8905 
8906 	trace_create_file("buffer_total_size_kb", TRACE_MODE_READ, d_tracer,
8907 			  tr, &tracing_total_entries_fops);
8908 
8909 	trace_create_file("trace_clock", writable_mode, d_tracer, tr,
8910 			  &trace_clock_fops);
8911 
8912 	trace_create_file("timestamp_mode", TRACE_MODE_READ, d_tracer, tr,
8913 			  &trace_time_stamp_mode_fops);
8914 
8915 	tr->buffer_percent = 50;
8916 
8917 	trace_create_file("buffer_subbuf_size_kb", writable_mode, d_tracer,
8918 			  tr, &buffer_subbuf_size_fops);
8919 
8920 	create_trace_options_dir(tr);
8921 
8922 	if (tr->range_addr_start)
8923 		trace_create_file("last_boot_info", TRACE_MODE_READ, d_tracer,
8924 				  tr, &last_boot_fops);
8925 
8926 	for_each_tracing_cpu(cpu)
8927 		tracing_init_tracefs_percpu(tr, cpu);
8928 
8929 	/* Read-only instance has above files only. */
8930 	if (trace_array_is_readonly(tr))
8931 		return;
8932 
8933 	trace_create_file("free_buffer", 0200, d_tracer,
8934 			  tr, &tracing_free_buffer_fops);
8935 
8936 	trace_create_file("trace_marker", 0220, d_tracer,
8937 			  tr, &tracing_mark_fops);
8938 
8939 	tr->trace_marker_file = __find_event_file(tr, "ftrace", "print");
8940 
8941 	trace_create_file("trace_marker_raw", 0220, d_tracer,
8942 			  tr, &tracing_mark_raw_fops);
8943 
8944 	trace_create_file("buffer_percent", TRACE_MODE_WRITE, d_tracer,
8945 			  tr, &buffer_percent_fops);
8946 
8947 	trace_create_file("syscall_user_buf_size", TRACE_MODE_WRITE, d_tracer,
8948 			  tr, &tracing_syscall_buf_fops);
8949 
8950 	trace_create_file("tracing_on", TRACE_MODE_WRITE, d_tracer,
8951 			  tr, &rb_simple_fops);
8952 
8953 	trace_create_maxlat_file(tr, d_tracer);
8954 
8955 	if (ftrace_create_function_files(tr, d_tracer))
8956 		MEM_FAIL(1, "Could not allocate function filter files");
8957 
8958 #ifdef CONFIG_TRACER_SNAPSHOT
8959 	if (!tr->range_addr_start)
8960 		trace_create_file("snapshot", TRACE_MODE_WRITE, d_tracer,
8961 				  tr, &snapshot_fops);
8962 #endif
8963 
8964 	trace_create_file("error_log", TRACE_MODE_WRITE, d_tracer,
8965 			  tr, &tracing_err_log_fops);
8966 
8967 	ftrace_init_tracefs(tr, d_tracer);
8968 }
8969 
8970 #ifdef CONFIG_TRACEFS_AUTOMOUNT_DEPRECATED
8971 static struct vfsmount *trace_automount(struct dentry *mntpt, void *ingore)
8972 {
8973 	struct vfsmount *mnt;
8974 	struct file_system_type *type;
8975 	struct fs_context *fc;
8976 	int ret;
8977 
8978 	/*
8979 	 * To maintain backward compatibility for tools that mount
8980 	 * debugfs to get to the tracing facility, tracefs is automatically
8981 	 * mounted to the debugfs/tracing directory.
8982 	 */
8983 	type = get_fs_type("tracefs");
8984 	if (!type)
8985 		return NULL;
8986 
8987 	fc = fs_context_for_submount(type, mntpt);
8988 	put_filesystem(type);
8989 	if (IS_ERR(fc))
8990 		return ERR_CAST(fc);
8991 
8992 	pr_warn("NOTICE: Automounting of tracing to debugfs is deprecated and will be removed in 2030\n");
8993 
8994 	ret = vfs_parse_fs_string(fc, "source", "tracefs");
8995 	if (!ret)
8996 		mnt = fc_mount(fc);
8997 	else
8998 		mnt = ERR_PTR(ret);
8999 
9000 	put_fs_context(fc);
9001 	return mnt;
9002 }
9003 #endif
9004 
9005 /**
9006  * tracing_init_dentry - initialize top level trace array
9007  *
9008  * This is called when creating files or directories in the tracing
9009  * directory. It is called via fs_initcall() by any of the boot up code
9010  * and expects to return the dentry of the top level tracing directory.
9011  */
9012 int tracing_init_dentry(void)
9013 {
9014 	struct trace_array *tr = &global_trace;
9015 
9016 	if (security_locked_down(LOCKDOWN_TRACEFS)) {
9017 		pr_warn("Tracing disabled due to lockdown\n");
9018 		return -EPERM;
9019 	}
9020 
9021 	/* The top level trace array uses  NULL as parent */
9022 	if (tr->dir)
9023 		return 0;
9024 
9025 	if (WARN_ON(!tracefs_initialized()))
9026 		return -ENODEV;
9027 
9028 #ifdef CONFIG_TRACEFS_AUTOMOUNT_DEPRECATED
9029 	/*
9030 	 * As there may still be users that expect the tracing
9031 	 * files to exist in debugfs/tracing, we must automount
9032 	 * the tracefs file system there, so older tools still
9033 	 * work with the newer kernel.
9034 	 */
9035 	tr->dir = debugfs_create_automount("tracing", NULL,
9036 					   trace_automount, NULL);
9037 #endif
9038 
9039 	return 0;
9040 }
9041 
9042 extern struct trace_eval_map *__start_ftrace_eval_maps[];
9043 extern struct trace_eval_map *__stop_ftrace_eval_maps[];
9044 
9045 struct workqueue_struct *trace_init_wq __initdata;
9046 static struct work_struct eval_map_work __initdata;
9047 static struct work_struct tracerfs_init_work __initdata;
9048 
9049 static void __init eval_map_work_func(struct work_struct *work)
9050 {
9051 	int len;
9052 
9053 	len = __stop_ftrace_eval_maps - __start_ftrace_eval_maps;
9054 	trace_event_update_with_eval_map(NULL, __start_ftrace_eval_maps, len);
9055 }
9056 
9057 static int __init trace_eval_init(void)
9058 {
9059 	INIT_WORK(&eval_map_work, eval_map_work_func);
9060 
9061 	trace_init_wq = alloc_workqueue("trace_init_wq", WQ_UNBOUND, 0);
9062 	if (!trace_init_wq) {
9063 		pr_err("Unable to allocate trace_init_wq\n");
9064 		/* Do work here */
9065 		eval_map_work_func(&eval_map_work);
9066 		return -ENOMEM;
9067 	}
9068 
9069 	queue_work(trace_init_wq, &eval_map_work);
9070 	return 0;
9071 }
9072 
9073 subsys_initcall(trace_eval_init);
9074 
9075 static int __init trace_eval_sync(void)
9076 {
9077 	/* Make sure the eval map updates are finished */
9078 	if (trace_init_wq)
9079 		destroy_workqueue(trace_init_wq);
9080 	return 0;
9081 }
9082 
9083 late_initcall_sync(trace_eval_sync);
9084 
9085 
9086 #ifdef CONFIG_MODULES
9087 
9088 bool module_exists(const char *module)
9089 {
9090 	/* All modules have the symbol __this_module */
9091 	static const char this_mod[] = "__this_module";
9092 	char modname[MODULE_NAME_LEN + sizeof(this_mod) + 2];
9093 	unsigned long val;
9094 	int n;
9095 
9096 	n = snprintf(modname, sizeof(modname), "%s:%s", module, this_mod);
9097 
9098 	if (n > sizeof(modname) - 1)
9099 		return false;
9100 
9101 	val = module_kallsyms_lookup_name(modname);
9102 	return val != 0;
9103 }
9104 
9105 static void trace_module_add_evals(struct module *mod)
9106 {
9107 	/*
9108 	 * Modules with bad taint do not have events created, do
9109 	 * not bother with enums either.
9110 	 */
9111 	if (trace_module_has_bad_taint(mod))
9112 		return;
9113 
9114 	/* Even if no trace_evals, this need to sanitize field types. */
9115 	trace_event_update_with_eval_map(mod, mod->trace_evals, mod->num_trace_evals);
9116 }
9117 
9118 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
9119 static void trace_module_remove_evals(struct module *mod)
9120 {
9121 	union trace_eval_map_item *map;
9122 	union trace_eval_map_item **last = &trace_eval_maps;
9123 
9124 	if (!mod->num_trace_evals)
9125 		return;
9126 
9127 	guard(mutex)(&trace_eval_mutex);
9128 
9129 	map = trace_eval_maps;
9130 
9131 	while (map) {
9132 		if (map->head.mod == mod)
9133 			break;
9134 		map = trace_eval_jmp_to_tail(map);
9135 		last = &map->tail.next;
9136 		map = map->tail.next;
9137 	}
9138 	if (!map)
9139 		return;
9140 
9141 	*last = trace_eval_jmp_to_tail(map)->tail.next;
9142 	kfree(map);
9143 }
9144 #else
9145 static inline void trace_module_remove_evals(struct module *mod) { }
9146 #endif /* CONFIG_TRACE_EVAL_MAP_FILE */
9147 
9148 static void trace_module_record(struct module *mod, bool add)
9149 {
9150 	struct trace_array *tr;
9151 	unsigned long flags;
9152 
9153 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
9154 		flags = tr->flags & (TRACE_ARRAY_FL_BOOT | TRACE_ARRAY_FL_LAST_BOOT);
9155 		/* Update any persistent trace array that has already been started */
9156 		if (flags == TRACE_ARRAY_FL_BOOT && add) {
9157 			guard(mutex)(&scratch_mutex);
9158 			save_mod(mod, tr);
9159 		} else if (flags & TRACE_ARRAY_FL_LAST_BOOT) {
9160 			/* Update delta if the module loaded in previous boot */
9161 			make_mod_delta(mod, tr);
9162 		}
9163 	}
9164 }
9165 
9166 static int trace_module_notify(struct notifier_block *self,
9167 			       unsigned long val, void *data)
9168 {
9169 	struct module *mod = data;
9170 
9171 	switch (val) {
9172 	case MODULE_STATE_COMING:
9173 		trace_module_add_evals(mod);
9174 		trace_module_record(mod, true);
9175 		break;
9176 	case MODULE_STATE_GOING:
9177 		trace_module_remove_evals(mod);
9178 		trace_module_record(mod, false);
9179 		break;
9180 	}
9181 
9182 	return NOTIFY_OK;
9183 }
9184 
9185 static struct notifier_block trace_module_nb = {
9186 	.notifier_call = trace_module_notify,
9187 	.priority = 0,
9188 };
9189 #endif /* CONFIG_MODULES */
9190 
9191 static __init void tracer_init_tracefs_work_func(struct work_struct *work)
9192 {
9193 
9194 	event_trace_init();
9195 
9196 	init_tracer_tracefs(&global_trace, NULL);
9197 	ftrace_init_tracefs_toplevel(&global_trace, NULL);
9198 
9199 	trace_create_file("tracing_thresh", TRACE_MODE_WRITE, NULL,
9200 			&global_trace, &tracing_thresh_fops);
9201 
9202 	trace_create_file("README", TRACE_MODE_READ, NULL,
9203 			NULL, &tracing_readme_fops);
9204 
9205 	trace_create_file("saved_cmdlines", TRACE_MODE_READ, NULL,
9206 			NULL, &tracing_saved_cmdlines_fops);
9207 
9208 	trace_create_file("saved_cmdlines_size", TRACE_MODE_WRITE, NULL,
9209 			  NULL, &tracing_saved_cmdlines_size_fops);
9210 
9211 	trace_create_file("saved_tgids", TRACE_MODE_READ, NULL,
9212 			NULL, &tracing_saved_tgids_fops);
9213 
9214 	trace_create_eval_file(NULL);
9215 
9216 #ifdef CONFIG_MODULES
9217 	register_module_notifier(&trace_module_nb);
9218 #endif
9219 
9220 #ifdef CONFIG_DYNAMIC_FTRACE
9221 	trace_create_file("dyn_ftrace_total_info", TRACE_MODE_READ, NULL,
9222 			NULL, &tracing_dyn_info_fops);
9223 #endif
9224 
9225 	create_trace_instances(NULL);
9226 
9227 	update_tracer_options();
9228 }
9229 
9230 static __init int tracer_init_tracefs(void)
9231 {
9232 	int ret;
9233 
9234 	trace_access_lock_init();
9235 
9236 	ret = tracing_init_dentry();
9237 	if (ret)
9238 		return 0;
9239 
9240 	if (trace_init_wq) {
9241 		INIT_WORK(&tracerfs_init_work, tracer_init_tracefs_work_func);
9242 		queue_work(trace_init_wq, &tracerfs_init_work);
9243 	} else {
9244 		tracer_init_tracefs_work_func(NULL);
9245 	}
9246 
9247 	if (rv_init_interface())
9248 		pr_err("RV: Error while creating the RV interface\n");
9249 
9250 	return 0;
9251 }
9252 
9253 fs_initcall(tracer_init_tracefs);
9254 
9255 static int trace_die_panic_handler(struct notifier_block *self,
9256 				unsigned long ev, void *unused);
9257 
9258 static struct notifier_block trace_panic_notifier = {
9259 	.notifier_call = trace_die_panic_handler,
9260 	.priority = INT_MAX - 1,
9261 };
9262 
9263 static struct notifier_block trace_die_notifier = {
9264 	.notifier_call = trace_die_panic_handler,
9265 	.priority = INT_MAX - 1,
9266 };
9267 
9268 /*
9269  * The idea is to execute the following die/panic callback early, in order
9270  * to avoid showing irrelevant information in the trace (like other panic
9271  * notifier functions); we are the 2nd to run, after hung_task/rcu_stall
9272  * warnings get disabled (to prevent potential log flooding).
9273  */
9274 static int trace_die_panic_handler(struct notifier_block *self,
9275 				unsigned long ev, void *unused)
9276 {
9277 	if (!ftrace_dump_on_oops_enabled())
9278 		return NOTIFY_DONE;
9279 
9280 	/* The die notifier requires DIE_OOPS to trigger */
9281 	if (self == &trace_die_notifier && ev != DIE_OOPS)
9282 		return NOTIFY_DONE;
9283 
9284 	ftrace_dump(DUMP_PARAM);
9285 
9286 	return NOTIFY_DONE;
9287 }
9288 
9289 /*
9290  * printk is set to max of 1024, we really don't need it that big.
9291  * Nothing should be printing 1000 characters anyway.
9292  */
9293 #define TRACE_MAX_PRINT		1000
9294 
9295 /*
9296  * Define here KERN_TRACE so that we have one place to modify
9297  * it if we decide to change what log level the ftrace dump
9298  * should be at.
9299  */
9300 #define KERN_TRACE		KERN_EMERG
9301 
9302 void
9303 trace_printk_seq(struct trace_seq *s)
9304 {
9305 	/* Probably should print a warning here. */
9306 	if (s->seq.len >= TRACE_MAX_PRINT)
9307 		s->seq.len = TRACE_MAX_PRINT;
9308 
9309 	/*
9310 	 * More paranoid code. Although the buffer size is set to
9311 	 * PAGE_SIZE, and TRACE_MAX_PRINT is 1000, this is just
9312 	 * an extra layer of protection.
9313 	 */
9314 	if (WARN_ON_ONCE(s->seq.len >= s->seq.size))
9315 		s->seq.len = s->seq.size - 1;
9316 
9317 	/* should be zero ended, but we are paranoid. */
9318 	s->buffer[s->seq.len] = 0;
9319 
9320 	printk(KERN_TRACE "%s", s->buffer);
9321 
9322 	trace_seq_init(s);
9323 }
9324 
9325 static void trace_init_iter(struct trace_iterator *iter, struct trace_array *tr)
9326 {
9327 	iter->tr = tr;
9328 	iter->trace = iter->tr->current_trace;
9329 	iter->cpu_file = RING_BUFFER_ALL_CPUS;
9330 	iter->array_buffer = &tr->array_buffer;
9331 
9332 	if (iter->trace && iter->trace->open)
9333 		iter->trace->open(iter);
9334 
9335 	/* Annotate start of buffers if we had overruns */
9336 	if (ring_buffer_overruns(iter->array_buffer->buffer))
9337 		iter->iter_flags |= TRACE_FILE_ANNOTATE;
9338 
9339 	/* Output in nanoseconds only if we are using a clock in nanoseconds. */
9340 	if (trace_clocks[iter->tr->clock_id].in_ns)
9341 		iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
9342 
9343 	/* Can not use kmalloc for iter.temp and iter.fmt */
9344 	iter->temp = static_temp_buf;
9345 	iter->temp_size = STATIC_TEMP_BUF_SIZE;
9346 	iter->fmt = static_fmt_buf;
9347 	iter->fmt_size = STATIC_FMT_BUF_SIZE;
9348 }
9349 
9350 void trace_init_global_iter(struct trace_iterator *iter)
9351 {
9352 	trace_init_iter(iter, &global_trace);
9353 }
9354 
9355 static void ftrace_dump_one(struct trace_array *tr, enum ftrace_dump_mode dump_mode)
9356 {
9357 	/* use static because iter can be a bit big for the stack */
9358 	static struct trace_iterator iter;
9359 	unsigned int old_userobj;
9360 	unsigned long flags;
9361 	int cnt = 0;
9362 
9363 	/*
9364 	 * Always turn off tracing when we dump.
9365 	 * We don't need to show trace output of what happens
9366 	 * between multiple crashes.
9367 	 *
9368 	 * If the user does a sysrq-z, then they can re-enable
9369 	 * tracing with echo 1 > tracing_on.
9370 	 */
9371 	tracer_tracing_off(tr);
9372 
9373 	local_irq_save(flags);
9374 
9375 	/* Simulate the iterator */
9376 	trace_init_iter(&iter, tr);
9377 
9378 	/* While dumping, do not allow the buffer to be enable */
9379 	tracer_tracing_disable(tr);
9380 
9381 	old_userobj = tr->trace_flags & TRACE_ITER(SYM_USEROBJ);
9382 
9383 	/* don't look at user memory in panic mode */
9384 	tr->trace_flags &= ~TRACE_ITER(SYM_USEROBJ);
9385 
9386 	if (dump_mode == DUMP_ORIG)
9387 		iter.cpu_file = raw_smp_processor_id();
9388 	else
9389 		iter.cpu_file = RING_BUFFER_ALL_CPUS;
9390 
9391 	if (tr == &global_trace)
9392 		printk(KERN_TRACE "Dumping ftrace buffer:\n");
9393 	else
9394 		printk(KERN_TRACE "Dumping ftrace instance %s buffer:\n", tr->name);
9395 
9396 	/* Did function tracer already get disabled? */
9397 	if (ftrace_is_dead()) {
9398 		printk("# WARNING: FUNCTION TRACING IS CORRUPTED\n");
9399 		printk("#          MAY BE MISSING FUNCTION EVENTS\n");
9400 	}
9401 
9402 	/*
9403 	 * We need to stop all tracing on all CPUS to read
9404 	 * the next buffer. This is a bit expensive, but is
9405 	 * not done often. We fill all what we can read,
9406 	 * and then release the locks again.
9407 	 */
9408 
9409 	while (!trace_empty(&iter)) {
9410 
9411 		if (!cnt)
9412 			printk(KERN_TRACE "---------------------------------\n");
9413 
9414 		cnt++;
9415 
9416 		trace_iterator_reset(&iter);
9417 		iter.iter_flags |= TRACE_FILE_LAT_FMT;
9418 
9419 		if (trace_find_next_entry_inc(&iter) != NULL) {
9420 			int ret;
9421 
9422 			ret = print_trace_line(&iter);
9423 			if (ret != TRACE_TYPE_NO_CONSUME)
9424 				trace_consume(&iter);
9425 
9426 			trace_printk_seq(&iter.seq);
9427 		}
9428 		touch_nmi_watchdog();
9429 	}
9430 
9431 	if (!cnt)
9432 		printk(KERN_TRACE "   (ftrace buffer empty)\n");
9433 	else
9434 		printk(KERN_TRACE "---------------------------------\n");
9435 
9436 	tr->trace_flags |= old_userobj;
9437 
9438 	tracer_tracing_enable(tr);
9439 	local_irq_restore(flags);
9440 }
9441 
9442 static void ftrace_dump_by_param(void)
9443 {
9444 	bool first_param = true;
9445 	char dump_param[MAX_TRACER_SIZE];
9446 	char *buf, *token, *inst_name;
9447 	struct trace_array *tr;
9448 
9449 	strscpy(dump_param, ftrace_dump_on_oops, MAX_TRACER_SIZE);
9450 	buf = dump_param;
9451 
9452 	while ((token = strsep(&buf, ",")) != NULL) {
9453 		if (first_param) {
9454 			first_param = false;
9455 			if (!strcmp("0", token))
9456 				continue;
9457 			else if (!strcmp("1", token)) {
9458 				ftrace_dump_one(&global_trace, DUMP_ALL);
9459 				continue;
9460 			}
9461 			else if (!strcmp("2", token) ||
9462 			  !strcmp("orig_cpu", token)) {
9463 				ftrace_dump_one(&global_trace, DUMP_ORIG);
9464 				continue;
9465 			}
9466 		}
9467 
9468 		inst_name = strsep(&token, "=");
9469 		tr = trace_array_find(inst_name);
9470 		if (!tr) {
9471 			printk(KERN_TRACE "Instance %s not found\n", inst_name);
9472 			continue;
9473 		}
9474 
9475 		if (token && (!strcmp("2", token) ||
9476 			  !strcmp("orig_cpu", token)))
9477 			ftrace_dump_one(tr, DUMP_ORIG);
9478 		else
9479 			ftrace_dump_one(tr, DUMP_ALL);
9480 	}
9481 }
9482 
9483 void ftrace_dump(enum ftrace_dump_mode oops_dump_mode)
9484 {
9485 	static atomic_t dump_running;
9486 
9487 	/* Only allow one dump user at a time. */
9488 	if (atomic_inc_return(&dump_running) != 1) {
9489 		atomic_dec(&dump_running);
9490 		return;
9491 	}
9492 
9493 	switch (oops_dump_mode) {
9494 	case DUMP_ALL:
9495 		ftrace_dump_one(&global_trace, DUMP_ALL);
9496 		break;
9497 	case DUMP_ORIG:
9498 		ftrace_dump_one(&global_trace, DUMP_ORIG);
9499 		break;
9500 	case DUMP_PARAM:
9501 		ftrace_dump_by_param();
9502 		break;
9503 	case DUMP_NONE:
9504 		break;
9505 	default:
9506 		printk(KERN_TRACE "Bad dumping mode, switching to all CPUs dump\n");
9507 		ftrace_dump_one(&global_trace, DUMP_ALL);
9508 	}
9509 
9510 	atomic_dec(&dump_running);
9511 }
9512 EXPORT_SYMBOL_GPL(ftrace_dump);
9513 
9514 #define WRITE_BUFSIZE  4096
9515 
9516 ssize_t trace_parse_run_command(struct file *file, const char __user *buffer,
9517 				size_t count, loff_t *ppos,
9518 				int (*createfn)(const char *))
9519 {
9520 	char *kbuf __free(kfree) = NULL;
9521 	char *buf, *tmp;
9522 	int ret = 0;
9523 	size_t done = 0;
9524 	size_t size;
9525 
9526 	kbuf = kmalloc(WRITE_BUFSIZE, GFP_KERNEL);
9527 	if (!kbuf)
9528 		return -ENOMEM;
9529 
9530 	while (done < count) {
9531 		size = count - done;
9532 
9533 		if (size >= WRITE_BUFSIZE)
9534 			size = WRITE_BUFSIZE - 1;
9535 
9536 		if (copy_from_user(kbuf, buffer + done, size))
9537 			return -EFAULT;
9538 
9539 		kbuf[size] = '\0';
9540 		buf = kbuf;
9541 		do {
9542 			tmp = strchr(buf, '\n');
9543 			if (tmp) {
9544 				*tmp = '\0';
9545 				size = tmp - buf + 1;
9546 			} else {
9547 				size = strlen(buf);
9548 				if (done + size < count) {
9549 					if (buf != kbuf)
9550 						break;
9551 					/* This can accept WRITE_BUFSIZE - 2 ('\n' + '\0') */
9552 					pr_warn("Line length is too long: Should be less than %d\n",
9553 						WRITE_BUFSIZE - 2);
9554 					return -EINVAL;
9555 				}
9556 			}
9557 			done += size;
9558 
9559 			/* Remove comments */
9560 			tmp = strchr(buf, '#');
9561 
9562 			if (tmp)
9563 				*tmp = '\0';
9564 
9565 			ret = createfn(buf);
9566 			if (ret)
9567 				return ret;
9568 			buf += size;
9569 
9570 		} while (done < count);
9571 	}
9572 	return done;
9573 }
9574 
9575 __init static int backup_instance_area(const char *backup,
9576 				       unsigned long *addr, phys_addr_t *size)
9577 {
9578 	struct trace_array *backup_tr;
9579 	void *allocated_vaddr = NULL;
9580 
9581 	backup_tr = trace_array_get_by_name(backup, NULL);
9582 	if (!backup_tr) {
9583 		pr_warn("Tracing: Instance %s is not found.\n", backup);
9584 		return -ENOENT;
9585 	}
9586 
9587 	if (!(backup_tr->flags & TRACE_ARRAY_FL_BOOT)) {
9588 		pr_warn("Tracing: Instance %s is not boot mapped.\n", backup);
9589 		trace_array_put(backup_tr);
9590 		return -EINVAL;
9591 	}
9592 
9593 	*size = backup_tr->range_addr_size;
9594 
9595 	allocated_vaddr = vzalloc(*size);
9596 	if (!allocated_vaddr) {
9597 		pr_warn("Tracing: Failed to allocate memory for copying instance %s (size 0x%lx)\n",
9598 			backup, (unsigned long)*size);
9599 		trace_array_put(backup_tr);
9600 		return -ENOMEM;
9601 	}
9602 
9603 	memcpy(allocated_vaddr,
9604 		(void *)backup_tr->range_addr_start, (size_t)*size);
9605 	*addr = (unsigned long)allocated_vaddr;
9606 
9607 	trace_array_put(backup_tr);
9608 	return 0;
9609 }
9610 
9611 __init static void enable_instances(void)
9612 {
9613 	struct trace_array *tr;
9614 	bool memmap_area = false;
9615 	char *curr_str;
9616 	char *name;
9617 	char *str;
9618 	char *tok;
9619 
9620 	/* A tab is always appended */
9621 	boot_instance_info[boot_instance_index - 1] = '\0';
9622 	str = boot_instance_info;
9623 
9624 	while ((curr_str = strsep(&str, "\t"))) {
9625 		phys_addr_t start = 0;
9626 		phys_addr_t size = 0;
9627 		unsigned long addr = 0;
9628 		bool traceprintk = false;
9629 		bool traceoff = false;
9630 		char *flag_delim;
9631 		char *addr_delim;
9632 		char *rname __free(kfree) = NULL;
9633 		char *backup;
9634 
9635 		tok = strsep(&curr_str, ",");
9636 
9637 		name = strsep(&tok, "=");
9638 		backup = tok;
9639 
9640 		flag_delim = strchr(name, '^');
9641 		addr_delim = strchr(name, '@');
9642 
9643 		if (addr_delim)
9644 			*addr_delim++ = '\0';
9645 
9646 		if (flag_delim)
9647 			*flag_delim++ = '\0';
9648 
9649 		if (backup) {
9650 			if (backup_instance_area(backup, &addr, &size) < 0)
9651 				continue;
9652 		}
9653 
9654 		if (flag_delim) {
9655 			char *flag;
9656 
9657 			while ((flag = strsep(&flag_delim, "^"))) {
9658 				if (strcmp(flag, "traceoff") == 0) {
9659 					traceoff = true;
9660 				} else if ((strcmp(flag, "printk") == 0) ||
9661 					   (strcmp(flag, "traceprintk") == 0) ||
9662 					   (strcmp(flag, "trace_printk") == 0)) {
9663 					traceprintk = true;
9664 				} else {
9665 					pr_info("Tracing: Invalid instance flag '%s' for %s\n",
9666 						flag, name);
9667 				}
9668 			}
9669 		}
9670 
9671 		tok = addr_delim;
9672 		if (tok && isdigit(*tok)) {
9673 			start = memparse(tok, &tok);
9674 			if (!start) {
9675 				pr_warn("Tracing: Invalid boot instance address for %s\n",
9676 					name);
9677 				continue;
9678 			}
9679 			if (*tok != ':') {
9680 				pr_warn("Tracing: No size specified for instance %s\n", name);
9681 				continue;
9682 			}
9683 			tok++;
9684 			size = memparse(tok, &tok);
9685 			if (!size) {
9686 				pr_warn("Tracing: Invalid boot instance size for %s\n",
9687 					name);
9688 				continue;
9689 			}
9690 			memmap_area = true;
9691 		} else if (tok) {
9692 			if (!reserve_mem_find_by_name(tok, &start, &size)) {
9693 				start = 0;
9694 				pr_warn("Failed to map boot instance %s to %s\n", name, tok);
9695 				continue;
9696 			}
9697 			rname = kstrdup(tok, GFP_KERNEL);
9698 		}
9699 
9700 		if (start) {
9701 			/* Start and size must be page aligned */
9702 			if (start & ~PAGE_MASK) {
9703 				pr_warn("Tracing: mapping start addr %pa is not page aligned\n", &start);
9704 				continue;
9705 			}
9706 			if (size & ~PAGE_MASK) {
9707 				pr_warn("Tracing: mapping size %pa is not page aligned\n", &size);
9708 				continue;
9709 			}
9710 
9711 			if (memmap_area)
9712 				addr = map_pages(start, size);
9713 			else
9714 				addr = (unsigned long)phys_to_virt(start);
9715 			if (addr) {
9716 				pr_info("Tracing: mapped boot instance %s at physical memory %pa of size 0x%lx\n",
9717 					name, &start, (unsigned long)size);
9718 			} else {
9719 				pr_warn("Tracing: Failed to map boot instance %s\n", name);
9720 				continue;
9721 			}
9722 		} else {
9723 			/* Only non mapped buffers have snapshot buffers */
9724 			do_allocate_snapshot(name);
9725 		}
9726 
9727 		tr = trace_array_create_systems(name, NULL, addr, size);
9728 		if (IS_ERR(tr)) {
9729 			pr_warn("Tracing: Failed to create instance buffer %s\n", curr_str);
9730 			continue;
9731 		}
9732 
9733 		if (traceoff)
9734 			tracer_tracing_off(tr);
9735 
9736 		if (traceprintk)
9737 			update_printk_trace(tr);
9738 
9739 		/*
9740 		 * memmap'd buffers can not be freed.
9741 		 */
9742 		if (memmap_area) {
9743 			tr->flags |= TRACE_ARRAY_FL_MEMMAP;
9744 			tr->ref++;
9745 		}
9746 
9747 		/*
9748 		 * Backup buffers can be freed but need vfree().
9749 		 */
9750 		if (backup) {
9751 			tr->flags |= TRACE_ARRAY_FL_VMALLOC | TRACE_ARRAY_FL_RDONLY;
9752 			trace_array_start_autoremove();
9753 		}
9754 
9755 		if (start || backup) {
9756 			tr->flags |= TRACE_ARRAY_FL_BOOT | TRACE_ARRAY_FL_LAST_BOOT;
9757 			tr->range_name = no_free_ptr(rname);
9758 		}
9759 
9760 		/*
9761 		 * Save the events to start and enabled them after all boot instances
9762 		 * have been created.
9763 		 */
9764 		tr->boot_events = curr_str;
9765 	}
9766 
9767 	/* Enable the events after all boot instances have been created */
9768 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
9769 
9770 		if (!tr->boot_events || !(*tr->boot_events)) {
9771 			tr->boot_events = NULL;
9772 			continue;
9773 		}
9774 
9775 		curr_str = tr->boot_events;
9776 
9777 		/* Clear the instance if this is a persistent buffer */
9778 		if (tr->flags & TRACE_ARRAY_FL_LAST_BOOT)
9779 			update_last_data(tr);
9780 
9781 		while ((tok = strsep(&curr_str, ","))) {
9782 			early_enable_events(tr, tok, true);
9783 		}
9784 		tr->boot_events = NULL;
9785 	}
9786 }
9787 
9788 __init static int tracer_alloc_buffers(void)
9789 {
9790 	unsigned long ring_buf_size;
9791 	int ret = -ENOMEM;
9792 
9793 
9794 	if (security_locked_down(LOCKDOWN_TRACEFS)) {
9795 		pr_warn("Tracing disabled due to lockdown\n");
9796 		return -EPERM;
9797 	}
9798 
9799 	/*
9800 	 * Make sure we don't accidentally add more trace options
9801 	 * than we have bits for.
9802 	 */
9803 	BUILD_BUG_ON(TRACE_ITER_LAST_BIT > TRACE_FLAGS_MAX_SIZE);
9804 
9805 	if (!alloc_cpumask_var(&tracing_buffer_mask, GFP_KERNEL))
9806 		return -ENOMEM;
9807 
9808 	if (!alloc_cpumask_var(&global_trace.tracing_cpumask, GFP_KERNEL))
9809 		goto out_free_buffer_mask;
9810 
9811 	/* Only allocate trace_printk buffers if a trace_printk exists */
9812 	if (&__stop___trace_bprintk_fmt != &__start___trace_bprintk_fmt)
9813 		/* Must be called before global_trace.buffer is allocated */
9814 		trace_printk_init_buffers();
9815 
9816 	/* To save memory, keep the ring buffer size to its minimum */
9817 	if (global_trace.ring_buffer_expanded)
9818 		ring_buf_size = trace_buf_size;
9819 	else
9820 		ring_buf_size = 1;
9821 
9822 	cpumask_copy(tracing_buffer_mask, cpu_possible_mask);
9823 	cpumask_copy(global_trace.tracing_cpumask, cpu_all_mask);
9824 
9825 	raw_spin_lock_init(&global_trace.start_lock);
9826 
9827 	/*
9828 	 * The prepare callbacks allocates some memory for the ring buffer. We
9829 	 * don't free the buffer if the CPU goes down. If we were to free
9830 	 * the buffer, then the user would lose any trace that was in the
9831 	 * buffer. The memory will be removed once the "instance" is removed.
9832 	 */
9833 	ret = cpuhp_setup_state_multi(CPUHP_TRACE_RB_PREPARE,
9834 				      "trace/RB:prepare", trace_rb_cpu_prepare,
9835 				      NULL);
9836 	if (ret < 0)
9837 		goto out_free_cpumask;
9838 	/* Used for event triggers */
9839 	ret = -ENOMEM;
9840 	temp_buffer = ring_buffer_alloc(PAGE_SIZE, RB_FL_OVERWRITE);
9841 	if (!temp_buffer)
9842 		goto out_rm_hp_state;
9843 
9844 	if (trace_create_savedcmd() < 0)
9845 		goto out_free_temp_buffer;
9846 
9847 	if (!zalloc_cpumask_var(&global_trace.pipe_cpumask, GFP_KERNEL))
9848 		goto out_free_savedcmd;
9849 
9850 	/* TODO: make the number of buffers hot pluggable with CPUS */
9851 	if (allocate_trace_buffers(&global_trace, ring_buf_size) < 0) {
9852 		MEM_FAIL(1, "tracer: failed to allocate ring buffer!\n");
9853 		goto out_free_pipe_cpumask;
9854 	}
9855 	if (global_trace.buffer_disabled)
9856 		tracing_off();
9857 
9858 	if (trace_boot_clock) {
9859 		ret = tracing_set_clock(&global_trace, trace_boot_clock);
9860 		if (ret < 0)
9861 			pr_warn("Trace clock %s not defined, going back to default\n",
9862 				trace_boot_clock);
9863 	}
9864 
9865 	/*
9866 	 * register_tracer() might reference current_trace, so it
9867 	 * needs to be set before we register anything. This is
9868 	 * just a bootstrap of current_trace anyway.
9869 	 */
9870 	global_trace.current_trace = &nop_trace;
9871 	global_trace.current_trace_flags = nop_trace.flags;
9872 
9873 	global_trace.max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
9874 #ifdef CONFIG_TRACER_SNAPSHOT
9875 	spin_lock_init(&global_trace.snapshot_trigger_lock);
9876 #endif
9877 	ftrace_init_global_array_ops(&global_trace);
9878 
9879 #ifdef CONFIG_MODULES
9880 	INIT_LIST_HEAD(&global_trace.mod_events);
9881 #endif
9882 
9883 	init_trace_flags_index(&global_trace);
9884 
9885 	INIT_LIST_HEAD(&global_trace.tracers);
9886 
9887 	/* All seems OK, enable tracing */
9888 	tracing_disabled = 0;
9889 
9890 	atomic_notifier_chain_register(&panic_notifier_list,
9891 				       &trace_panic_notifier);
9892 
9893 	register_die_notifier(&trace_die_notifier);
9894 
9895 	global_trace.flags = TRACE_ARRAY_FL_GLOBAL;
9896 
9897 	global_trace.syscall_buf_sz = syscall_buf_size;
9898 
9899 	INIT_LIST_HEAD(&global_trace.systems);
9900 	INIT_LIST_HEAD(&global_trace.events);
9901 	INIT_LIST_HEAD(&global_trace.hist_vars);
9902 	INIT_LIST_HEAD(&global_trace.err_log);
9903 	list_add(&global_trace.marker_list, &marker_copies);
9904 	list_add(&global_trace.list, &ftrace_trace_arrays);
9905 
9906 	register_tracer(&nop_trace);
9907 
9908 	/* Function tracing may start here (via kernel command line) */
9909 	init_function_trace();
9910 
9911 	apply_trace_boot_options();
9912 
9913 	register_snapshot_cmd();
9914 
9915 	return 0;
9916 
9917 out_free_pipe_cpumask:
9918 	free_cpumask_var(global_trace.pipe_cpumask);
9919 out_free_savedcmd:
9920 	trace_free_saved_cmdlines_buffer();
9921 out_free_temp_buffer:
9922 	ring_buffer_free(temp_buffer);
9923 out_rm_hp_state:
9924 	cpuhp_remove_multi_state(CPUHP_TRACE_RB_PREPARE);
9925 out_free_cpumask:
9926 	free_cpumask_var(global_trace.tracing_cpumask);
9927 out_free_buffer_mask:
9928 	free_cpumask_var(tracing_buffer_mask);
9929 	return ret;
9930 }
9931 
9932 #ifdef CONFIG_FUNCTION_TRACER
9933 /* Used to set module cached ftrace filtering at boot up */
9934 struct trace_array *trace_get_global_array(void)
9935 {
9936 	return &global_trace;
9937 }
9938 #endif
9939 
9940 void __init early_trace_init(void)
9941 {
9942 	if (tracepoint_printk) {
9943 		tracepoint_print_iter = kzalloc_obj(*tracepoint_print_iter);
9944 		if (MEM_FAIL(!tracepoint_print_iter,
9945 			     "Failed to allocate trace iterator\n"))
9946 			tracepoint_printk = 0;
9947 		else
9948 			static_key_enable(&tracepoint_printk_key.key);
9949 	}
9950 	tracer_alloc_buffers();
9951 
9952 	init_events();
9953 }
9954 
9955 void __init trace_init(void)
9956 {
9957 	trace_event_init();
9958 
9959 	if (boot_instance_index)
9960 		enable_instances();
9961 }
9962 
9963 __init static void clear_boot_tracer(void)
9964 {
9965 	/*
9966 	 * The default tracer at boot buffer is an init section.
9967 	 * This function is called in lateinit. If we did not
9968 	 * find the boot tracer, then clear it out, to prevent
9969 	 * later registration from accessing the buffer that is
9970 	 * about to be freed.
9971 	 */
9972 	if (!default_bootup_tracer)
9973 		return;
9974 
9975 	printk(KERN_INFO "ftrace bootup tracer '%s' not registered.\n",
9976 	       default_bootup_tracer);
9977 	default_bootup_tracer = NULL;
9978 }
9979 
9980 #ifdef CONFIG_HAVE_UNSTABLE_SCHED_CLOCK
9981 __init static void tracing_set_default_clock(void)
9982 {
9983 	/* sched_clock_stable() is determined in late_initcall */
9984 	if (!trace_boot_clock && !sched_clock_stable()) {
9985 		if (security_locked_down(LOCKDOWN_TRACEFS)) {
9986 			pr_warn("Can not set tracing clock due to lockdown\n");
9987 			return;
9988 		}
9989 
9990 		printk(KERN_WARNING
9991 		       "Unstable clock detected, switching default tracing clock to \"global\"\n"
9992 		       "If you want to keep using the local clock, then add:\n"
9993 		       "  \"trace_clock=local\"\n"
9994 		       "on the kernel command line\n");
9995 		tracing_set_clock(&global_trace, "global");
9996 	}
9997 }
9998 #else
9999 static inline void tracing_set_default_clock(void) { }
10000 #endif
10001 
10002 __init static int late_trace_init(void)
10003 {
10004 	if (tracepoint_printk && tracepoint_printk_stop_on_boot) {
10005 		static_key_disable(&tracepoint_printk_key.key);
10006 		tracepoint_printk = 0;
10007 	}
10008 
10009 	if (traceoff_after_boot)
10010 		tracing_off();
10011 
10012 	tracing_set_default_clock();
10013 	clear_boot_tracer();
10014 	return 0;
10015 }
10016 
10017 late_initcall_sync(late_trace_init);
10018