xref: /linux/kernel/trace/trace.c (revision 9f32a03e3e0d372c520d829dd4da6022fe88832a)
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * ring buffer based function tracer
4  *
5  * Copyright (C) 2007-2012 Steven Rostedt <srostedt@redhat.com>
6  * Copyright (C) 2008 Ingo Molnar <mingo@redhat.com>
7  *
8  * Originally taken from the RT patch by:
9  *    Arnaldo Carvalho de Melo <acme@redhat.com>
10  *
11  * Based on code from the latency_tracer, that is:
12  *  Copyright (C) 2004-2006 Ingo Molnar
13  *  Copyright (C) 2004 Nadia Yvette Chambers
14  */
15 #include <linux/ring_buffer.h>
16 #include <linux/utsname.h>
17 #include <linux/stacktrace.h>
18 #include <linux/writeback.h>
19 #include <linux/kallsyms.h>
20 #include <linux/security.h>
21 #include <linux/seq_file.h>
22 #include <linux/irqflags.h>
23 #include <linux/debugfs.h>
24 #include <linux/tracefs.h>
25 #include <linux/pagemap.h>
26 #include <linux/hardirq.h>
27 #include <linux/linkage.h>
28 #include <linux/uaccess.h>
29 #include <linux/cleanup.h>
30 #include <linux/vmalloc.h>
31 #include <linux/ftrace.h>
32 #include <linux/module.h>
33 #include <linux/percpu.h>
34 #include <linux/splice.h>
35 #include <linux/kdebug.h>
36 #include <linux/string.h>
37 #include <linux/mount.h>
38 #include <linux/rwsem.h>
39 #include <linux/slab.h>
40 #include <linux/ctype.h>
41 #include <linux/init.h>
42 #include <linux/panic_notifier.h>
43 #include <linux/poll.h>
44 #include <linux/nmi.h>
45 #include <linux/fs.h>
46 #include <linux/trace.h>
47 #include <linux/sched/clock.h>
48 #include <linux/sched/rt.h>
49 #include <linux/fsnotify.h>
50 #include <linux/irq_work.h>
51 #include <linux/workqueue.h>
52 #include <linux/sort.h>
53 #include <linux/io.h> /* vmap_page_range() */
54 
55 #include <asm/setup.h> /* COMMAND_LINE_SIZE */
56 
57 #include "trace.h"
58 #include "trace_output.h"
59 
60 #ifdef CONFIG_FTRACE_STARTUP_TEST
61 /*
62  * We need to change this state when a selftest is running.
63  * A selftest will lurk into the ring-buffer to count the
64  * entries inserted during the selftest although some concurrent
65  * insertions into the ring-buffer such as trace_printk could occurred
66  * at the same time, giving false positive or negative results.
67  */
68 static bool __read_mostly tracing_selftest_running;
69 
70 /*
71  * If boot-time tracing including tracers/events via kernel cmdline
72  * is running, we do not want to run SELFTEST.
73  */
74 bool __read_mostly tracing_selftest_disabled;
75 
76 void __init disable_tracing_selftest(const char *reason)
77 {
78 	if (!tracing_selftest_disabled) {
79 		tracing_selftest_disabled = true;
80 		pr_info("Ftrace startup test is disabled due to %s\n", reason);
81 	}
82 }
83 #else
84 #define tracing_selftest_running	0
85 #define tracing_selftest_disabled	0
86 #endif
87 
88 /* Pipe tracepoints to printk */
89 static struct trace_iterator *tracepoint_print_iter;
90 int tracepoint_printk;
91 static bool tracepoint_printk_stop_on_boot __initdata;
92 static bool traceoff_after_boot __initdata;
93 static DEFINE_STATIC_KEY_FALSE(tracepoint_printk_key);
94 
95 /* For tracers that don't implement custom flags */
96 static struct tracer_opt dummy_tracer_opt[] = {
97 	{ }
98 };
99 
100 static int
101 dummy_set_flag(struct trace_array *tr, u32 old_flags, u32 bit, int set)
102 {
103 	return 0;
104 }
105 
106 /*
107  * To prevent the comm cache from being overwritten when no
108  * tracing is active, only save the comm when a trace event
109  * occurred.
110  */
111 DEFINE_PER_CPU(bool, trace_taskinfo_save);
112 
113 /*
114  * Kill all tracing for good (never come back).
115  * It is initialized to 1 but will turn to zero if the initialization
116  * of the tracer is successful. But that is the only place that sets
117  * this back to zero.
118  */
119 static int tracing_disabled = 1;
120 
121 cpumask_var_t __read_mostly	tracing_buffer_mask;
122 
123 #define MAX_TRACER_SIZE		100
124 /*
125  * ftrace_dump_on_oops - variable to dump ftrace buffer on oops
126  *
127  * If there is an oops (or kernel panic) and the ftrace_dump_on_oops
128  * is set, then ftrace_dump is called. This will output the contents
129  * of the ftrace buffers to the console.  This is very useful for
130  * capturing traces that lead to crashes and outputing it to a
131  * serial console.
132  *
133  * It is default off, but you can enable it with either specifying
134  * "ftrace_dump_on_oops" in the kernel command line, or setting
135  * /proc/sys/kernel/ftrace_dump_on_oops
136  * Set 1 if you want to dump buffers of all CPUs
137  * Set 2 if you want to dump the buffer of the CPU that triggered oops
138  * Set instance name if you want to dump the specific trace instance
139  * Multiple instance dump is also supported, and instances are seperated
140  * by commas.
141  */
142 /* Set to string format zero to disable by default */
143 char ftrace_dump_on_oops[MAX_TRACER_SIZE] = "0";
144 
145 /* When set, tracing will stop when a WARN*() is hit */
146 static int __disable_trace_on_warning;
147 
148 int tracepoint_printk_sysctl(const struct ctl_table *table, int write,
149 			     void *buffer, size_t *lenp, loff_t *ppos);
150 static const struct ctl_table trace_sysctl_table[] = {
151 	{
152 		.procname	= "ftrace_dump_on_oops",
153 		.data		= &ftrace_dump_on_oops,
154 		.maxlen		= MAX_TRACER_SIZE,
155 		.mode		= 0644,
156 		.proc_handler	= proc_dostring,
157 	},
158 	{
159 		.procname	= "traceoff_on_warning",
160 		.data		= &__disable_trace_on_warning,
161 		.maxlen		= sizeof(__disable_trace_on_warning),
162 		.mode		= 0644,
163 		.proc_handler	= proc_dointvec,
164 	},
165 	{
166 		.procname	= "tracepoint_printk",
167 		.data		= &tracepoint_printk,
168 		.maxlen		= sizeof(tracepoint_printk),
169 		.mode		= 0644,
170 		.proc_handler	= tracepoint_printk_sysctl,
171 	},
172 };
173 
174 static int __init init_trace_sysctls(void)
175 {
176 	register_sysctl_init("kernel", trace_sysctl_table);
177 	return 0;
178 }
179 subsys_initcall(init_trace_sysctls);
180 
181 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
182 /* Map of enums to their values, for "eval_map" file */
183 struct trace_eval_map_head {
184 	struct module			*mod;
185 	unsigned long			length;
186 };
187 
188 union trace_eval_map_item;
189 
190 struct trace_eval_map_tail {
191 	/*
192 	 * "end" is first and points to NULL as it must be different
193 	 * than "mod" or "eval_string"
194 	 */
195 	union trace_eval_map_item	*next;
196 	const char			*end;	/* points to NULL */
197 };
198 
199 static DEFINE_MUTEX(trace_eval_mutex);
200 
201 /*
202  * The trace_eval_maps are saved in an array with two extra elements,
203  * one at the beginning, and one at the end. The beginning item contains
204  * the count of the saved maps (head.length), and the module they
205  * belong to if not built in (head.mod). The ending item contains a
206  * pointer to the next array of saved eval_map items.
207  */
208 union trace_eval_map_item {
209 	struct trace_eval_map		map;
210 	struct trace_eval_map_head	head;
211 	struct trace_eval_map_tail	tail;
212 };
213 
214 static union trace_eval_map_item *trace_eval_maps;
215 #endif /* CONFIG_TRACE_EVAL_MAP_FILE */
216 
217 int tracing_set_tracer(struct trace_array *tr, const char *buf);
218 static void ftrace_trace_userstack(struct trace_array *tr,
219 				   struct trace_buffer *buffer,
220 				   unsigned int trace_ctx);
221 
222 static char bootup_tracer_buf[MAX_TRACER_SIZE] __initdata;
223 static char *default_bootup_tracer;
224 
225 static bool allocate_snapshot;
226 static bool snapshot_at_boot;
227 
228 static char boot_instance_info[COMMAND_LINE_SIZE] __initdata;
229 static int boot_instance_index;
230 
231 static char boot_snapshot_info[COMMAND_LINE_SIZE] __initdata;
232 static int boot_snapshot_index;
233 
234 static int __init set_cmdline_ftrace(char *str)
235 {
236 	strscpy(bootup_tracer_buf, str, MAX_TRACER_SIZE);
237 	default_bootup_tracer = bootup_tracer_buf;
238 	/* We are using ftrace early, expand it */
239 	trace_set_ring_buffer_expanded(NULL);
240 	return 1;
241 }
242 __setup("ftrace=", set_cmdline_ftrace);
243 
244 int ftrace_dump_on_oops_enabled(void)
245 {
246 	if (!strcmp("0", ftrace_dump_on_oops))
247 		return 0;
248 	else
249 		return 1;
250 }
251 
252 static int __init set_ftrace_dump_on_oops(char *str)
253 {
254 	if (!*str) {
255 		strscpy(ftrace_dump_on_oops, "1", MAX_TRACER_SIZE);
256 		return 1;
257 	}
258 
259 	if (*str == ',') {
260 		strscpy(ftrace_dump_on_oops, "1", MAX_TRACER_SIZE);
261 		strscpy(ftrace_dump_on_oops + 1, str, MAX_TRACER_SIZE - 1);
262 		return 1;
263 	}
264 
265 	if (*str++ == '=') {
266 		strscpy(ftrace_dump_on_oops, str, MAX_TRACER_SIZE);
267 		return 1;
268 	}
269 
270 	return 0;
271 }
272 __setup("ftrace_dump_on_oops", set_ftrace_dump_on_oops);
273 
274 static int __init stop_trace_on_warning(char *str)
275 {
276 	if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
277 		__disable_trace_on_warning = 1;
278 	return 1;
279 }
280 __setup("traceoff_on_warning", stop_trace_on_warning);
281 
282 static int __init boot_alloc_snapshot(char *str)
283 {
284 	char *slot = boot_snapshot_info + boot_snapshot_index;
285 	int left = sizeof(boot_snapshot_info) - boot_snapshot_index;
286 	int ret;
287 
288 	if (str[0] == '=') {
289 		str++;
290 		if (strlen(str) >= left)
291 			return -1;
292 
293 		ret = snprintf(slot, left, "%s\t", str);
294 		boot_snapshot_index += ret;
295 	} else {
296 		allocate_snapshot = true;
297 		/* We also need the main ring buffer expanded */
298 		trace_set_ring_buffer_expanded(NULL);
299 	}
300 	return 1;
301 }
302 __setup("alloc_snapshot", boot_alloc_snapshot);
303 
304 
305 static int __init boot_snapshot(char *str)
306 {
307 	snapshot_at_boot = true;
308 	boot_alloc_snapshot(str);
309 	return 1;
310 }
311 __setup("ftrace_boot_snapshot", boot_snapshot);
312 
313 
314 static int __init boot_instance(char *str)
315 {
316 	char *slot = boot_instance_info + boot_instance_index;
317 	int left = sizeof(boot_instance_info) - boot_instance_index;
318 	int ret;
319 
320 	if (strlen(str) >= left)
321 		return -1;
322 
323 	ret = snprintf(slot, left, "%s\t", str);
324 	boot_instance_index += ret;
325 
326 	return 1;
327 }
328 __setup("trace_instance=", boot_instance);
329 
330 
331 static char trace_boot_options_buf[MAX_TRACER_SIZE] __initdata;
332 
333 static int __init set_trace_boot_options(char *str)
334 {
335 	strscpy(trace_boot_options_buf, str, MAX_TRACER_SIZE);
336 	return 1;
337 }
338 __setup("trace_options=", set_trace_boot_options);
339 
340 static char trace_boot_clock_buf[MAX_TRACER_SIZE] __initdata;
341 static char *trace_boot_clock __initdata;
342 
343 static int __init set_trace_boot_clock(char *str)
344 {
345 	strscpy(trace_boot_clock_buf, str, MAX_TRACER_SIZE);
346 	trace_boot_clock = trace_boot_clock_buf;
347 	return 1;
348 }
349 __setup("trace_clock=", set_trace_boot_clock);
350 
351 static int __init set_tracepoint_printk(char *str)
352 {
353 	/* Ignore the "tp_printk_stop_on_boot" param */
354 	if (*str == '_')
355 		return 0;
356 
357 	if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
358 		tracepoint_printk = 1;
359 	return 1;
360 }
361 __setup("tp_printk", set_tracepoint_printk);
362 
363 static int __init set_tracepoint_printk_stop(char *str)
364 {
365 	tracepoint_printk_stop_on_boot = true;
366 	return 1;
367 }
368 __setup("tp_printk_stop_on_boot", set_tracepoint_printk_stop);
369 
370 static int __init set_traceoff_after_boot(char *str)
371 {
372 	traceoff_after_boot = true;
373 	return 1;
374 }
375 __setup("traceoff_after_boot", set_traceoff_after_boot);
376 
377 unsigned long long ns2usecs(u64 nsec)
378 {
379 	nsec += 500;
380 	do_div(nsec, 1000);
381 	return nsec;
382 }
383 
384 static void
385 trace_process_export(struct trace_export *export,
386 	       struct ring_buffer_event *event, int flag)
387 {
388 	struct trace_entry *entry;
389 	unsigned int size = 0;
390 
391 	if (export->flags & flag) {
392 		entry = ring_buffer_event_data(event);
393 		size = ring_buffer_event_length(event);
394 		export->write(export, entry, size);
395 	}
396 }
397 
398 static DEFINE_MUTEX(ftrace_export_lock);
399 
400 static struct trace_export __rcu *ftrace_exports_list __read_mostly;
401 
402 static DEFINE_STATIC_KEY_FALSE(trace_function_exports_enabled);
403 static DEFINE_STATIC_KEY_FALSE(trace_event_exports_enabled);
404 static DEFINE_STATIC_KEY_FALSE(trace_marker_exports_enabled);
405 
406 static inline void ftrace_exports_enable(struct trace_export *export)
407 {
408 	if (export->flags & TRACE_EXPORT_FUNCTION)
409 		static_branch_inc(&trace_function_exports_enabled);
410 
411 	if (export->flags & TRACE_EXPORT_EVENT)
412 		static_branch_inc(&trace_event_exports_enabled);
413 
414 	if (export->flags & TRACE_EXPORT_MARKER)
415 		static_branch_inc(&trace_marker_exports_enabled);
416 }
417 
418 static inline void ftrace_exports_disable(struct trace_export *export)
419 {
420 	if (export->flags & TRACE_EXPORT_FUNCTION)
421 		static_branch_dec(&trace_function_exports_enabled);
422 
423 	if (export->flags & TRACE_EXPORT_EVENT)
424 		static_branch_dec(&trace_event_exports_enabled);
425 
426 	if (export->flags & TRACE_EXPORT_MARKER)
427 		static_branch_dec(&trace_marker_exports_enabled);
428 }
429 
430 static void ftrace_exports(struct ring_buffer_event *event, int flag)
431 {
432 	struct trace_export *export;
433 
434 	preempt_disable_notrace();
435 
436 	export = rcu_dereference_raw_check(ftrace_exports_list);
437 	while (export) {
438 		trace_process_export(export, event, flag);
439 		export = rcu_dereference_raw_check(export->next);
440 	}
441 
442 	preempt_enable_notrace();
443 }
444 
445 static inline void
446 add_trace_export(struct trace_export **list, struct trace_export *export)
447 {
448 	rcu_assign_pointer(export->next, *list);
449 	/*
450 	 * We are entering export into the list but another
451 	 * CPU might be walking that list. We need to make sure
452 	 * the export->next pointer is valid before another CPU sees
453 	 * the export pointer included into the list.
454 	 */
455 	rcu_assign_pointer(*list, export);
456 }
457 
458 static inline int
459 rm_trace_export(struct trace_export **list, struct trace_export *export)
460 {
461 	struct trace_export **p;
462 
463 	for (p = list; *p != NULL; p = &(*p)->next)
464 		if (*p == export)
465 			break;
466 
467 	if (*p != export)
468 		return -1;
469 
470 	rcu_assign_pointer(*p, (*p)->next);
471 
472 	return 0;
473 }
474 
475 static inline void
476 add_ftrace_export(struct trace_export **list, struct trace_export *export)
477 {
478 	ftrace_exports_enable(export);
479 
480 	add_trace_export(list, export);
481 }
482 
483 static inline int
484 rm_ftrace_export(struct trace_export **list, struct trace_export *export)
485 {
486 	int ret;
487 
488 	ret = rm_trace_export(list, export);
489 	ftrace_exports_disable(export);
490 
491 	return ret;
492 }
493 
494 int register_ftrace_export(struct trace_export *export)
495 {
496 	if (WARN_ON_ONCE(!export->write))
497 		return -1;
498 
499 	mutex_lock(&ftrace_export_lock);
500 
501 	add_ftrace_export(&ftrace_exports_list, export);
502 
503 	mutex_unlock(&ftrace_export_lock);
504 
505 	return 0;
506 }
507 EXPORT_SYMBOL_GPL(register_ftrace_export);
508 
509 int unregister_ftrace_export(struct trace_export *export)
510 {
511 	int ret;
512 
513 	mutex_lock(&ftrace_export_lock);
514 
515 	ret = rm_ftrace_export(&ftrace_exports_list, export);
516 
517 	mutex_unlock(&ftrace_export_lock);
518 
519 	return ret;
520 }
521 EXPORT_SYMBOL_GPL(unregister_ftrace_export);
522 
523 /* trace_flags holds trace_options default values */
524 #define TRACE_DEFAULT_FLAGS						\
525 	(FUNCTION_DEFAULT_FLAGS |					\
526 	 TRACE_ITER_PRINT_PARENT | TRACE_ITER_PRINTK |			\
527 	 TRACE_ITER_ANNOTATE | TRACE_ITER_CONTEXT_INFO |		\
528 	 TRACE_ITER_RECORD_CMD | TRACE_ITER_OVERWRITE |			\
529 	 TRACE_ITER_IRQ_INFO | TRACE_ITER_MARKERS |			\
530 	 TRACE_ITER_HASH_PTR | TRACE_ITER_TRACE_PRINTK |		\
531 	 TRACE_ITER_COPY_MARKER)
532 
533 /* trace_options that are only supported by global_trace */
534 #define TOP_LEVEL_TRACE_FLAGS (TRACE_ITER_PRINTK |			\
535 	       TRACE_ITER_PRINTK_MSGONLY | TRACE_ITER_RECORD_CMD)
536 
537 /* trace_flags that are default zero for instances */
538 #define ZEROED_TRACE_FLAGS \
539 	(TRACE_ITER_EVENT_FORK | TRACE_ITER_FUNC_FORK | TRACE_ITER_TRACE_PRINTK | \
540 	 TRACE_ITER_COPY_MARKER)
541 
542 /*
543  * The global_trace is the descriptor that holds the top-level tracing
544  * buffers for the live tracing.
545  */
546 static struct trace_array global_trace = {
547 	.trace_flags = TRACE_DEFAULT_FLAGS,
548 };
549 
550 static struct trace_array *printk_trace = &global_trace;
551 
552 /* List of trace_arrays interested in the top level trace_marker */
553 static LIST_HEAD(marker_copies);
554 
555 static __always_inline bool printk_binsafe(struct trace_array *tr)
556 {
557 	/*
558 	 * The binary format of traceprintk can cause a crash if used
559 	 * by a buffer from another boot. Force the use of the
560 	 * non binary version of trace_printk if the trace_printk
561 	 * buffer is a boot mapped ring buffer.
562 	 */
563 	return !(tr->flags & TRACE_ARRAY_FL_BOOT);
564 }
565 
566 static void update_printk_trace(struct trace_array *tr)
567 {
568 	if (printk_trace == tr)
569 		return;
570 
571 	printk_trace->trace_flags &= ~TRACE_ITER_TRACE_PRINTK;
572 	printk_trace = tr;
573 	tr->trace_flags |= TRACE_ITER_TRACE_PRINTK;
574 }
575 
576 /* Returns true if the status of tr changed */
577 static bool update_marker_trace(struct trace_array *tr, int enabled)
578 {
579 	lockdep_assert_held(&event_mutex);
580 
581 	if (enabled) {
582 		if (!list_empty(&tr->marker_list))
583 			return false;
584 
585 		list_add_rcu(&tr->marker_list, &marker_copies);
586 		tr->trace_flags |= TRACE_ITER_COPY_MARKER;
587 		return true;
588 	}
589 
590 	if (list_empty(&tr->marker_list))
591 		return false;
592 
593 	list_del_init(&tr->marker_list);
594 	tr->trace_flags &= ~TRACE_ITER_COPY_MARKER;
595 	return true;
596 }
597 
598 void trace_set_ring_buffer_expanded(struct trace_array *tr)
599 {
600 	if (!tr)
601 		tr = &global_trace;
602 	tr->ring_buffer_expanded = true;
603 }
604 
605 LIST_HEAD(ftrace_trace_arrays);
606 
607 int trace_array_get(struct trace_array *this_tr)
608 {
609 	struct trace_array *tr;
610 
611 	guard(mutex)(&trace_types_lock);
612 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
613 		if (tr == this_tr) {
614 			tr->ref++;
615 			return 0;
616 		}
617 	}
618 
619 	return -ENODEV;
620 }
621 
622 static void __trace_array_put(struct trace_array *this_tr)
623 {
624 	WARN_ON(!this_tr->ref);
625 	this_tr->ref--;
626 }
627 
628 /**
629  * trace_array_put - Decrement the reference counter for this trace array.
630  * @this_tr : pointer to the trace array
631  *
632  * NOTE: Use this when we no longer need the trace array returned by
633  * trace_array_get_by_name(). This ensures the trace array can be later
634  * destroyed.
635  *
636  */
637 void trace_array_put(struct trace_array *this_tr)
638 {
639 	if (!this_tr)
640 		return;
641 
642 	mutex_lock(&trace_types_lock);
643 	__trace_array_put(this_tr);
644 	mutex_unlock(&trace_types_lock);
645 }
646 EXPORT_SYMBOL_GPL(trace_array_put);
647 
648 int tracing_check_open_get_tr(struct trace_array *tr)
649 {
650 	int ret;
651 
652 	ret = security_locked_down(LOCKDOWN_TRACEFS);
653 	if (ret)
654 		return ret;
655 
656 	if (tracing_disabled)
657 		return -ENODEV;
658 
659 	if (tr && trace_array_get(tr) < 0)
660 		return -ENODEV;
661 
662 	return 0;
663 }
664 
665 /**
666  * trace_find_filtered_pid - check if a pid exists in a filtered_pid list
667  * @filtered_pids: The list of pids to check
668  * @search_pid: The PID to find in @filtered_pids
669  *
670  * Returns true if @search_pid is found in @filtered_pids, and false otherwise.
671  */
672 bool
673 trace_find_filtered_pid(struct trace_pid_list *filtered_pids, pid_t search_pid)
674 {
675 	return trace_pid_list_is_set(filtered_pids, search_pid);
676 }
677 
678 /**
679  * trace_ignore_this_task - should a task be ignored for tracing
680  * @filtered_pids: The list of pids to check
681  * @filtered_no_pids: The list of pids not to be traced
682  * @task: The task that should be ignored if not filtered
683  *
684  * Checks if @task should be traced or not from @filtered_pids.
685  * Returns true if @task should *NOT* be traced.
686  * Returns false if @task should be traced.
687  */
688 bool
689 trace_ignore_this_task(struct trace_pid_list *filtered_pids,
690 		       struct trace_pid_list *filtered_no_pids,
691 		       struct task_struct *task)
692 {
693 	/*
694 	 * If filtered_no_pids is not empty, and the task's pid is listed
695 	 * in filtered_no_pids, then return true.
696 	 * Otherwise, if filtered_pids is empty, that means we can
697 	 * trace all tasks. If it has content, then only trace pids
698 	 * within filtered_pids.
699 	 */
700 
701 	return (filtered_pids &&
702 		!trace_find_filtered_pid(filtered_pids, task->pid)) ||
703 		(filtered_no_pids &&
704 		 trace_find_filtered_pid(filtered_no_pids, task->pid));
705 }
706 
707 /**
708  * trace_filter_add_remove_task - Add or remove a task from a pid_list
709  * @pid_list: The list to modify
710  * @self: The current task for fork or NULL for exit
711  * @task: The task to add or remove
712  *
713  * If adding a task, if @self is defined, the task is only added if @self
714  * is also included in @pid_list. This happens on fork and tasks should
715  * only be added when the parent is listed. If @self is NULL, then the
716  * @task pid will be removed from the list, which would happen on exit
717  * of a task.
718  */
719 void trace_filter_add_remove_task(struct trace_pid_list *pid_list,
720 				  struct task_struct *self,
721 				  struct task_struct *task)
722 {
723 	if (!pid_list)
724 		return;
725 
726 	/* For forks, we only add if the forking task is listed */
727 	if (self) {
728 		if (!trace_find_filtered_pid(pid_list, self->pid))
729 			return;
730 	}
731 
732 	/* "self" is set for forks, and NULL for exits */
733 	if (self)
734 		trace_pid_list_set(pid_list, task->pid);
735 	else
736 		trace_pid_list_clear(pid_list, task->pid);
737 }
738 
739 /**
740  * trace_pid_next - Used for seq_file to get to the next pid of a pid_list
741  * @pid_list: The pid list to show
742  * @v: The last pid that was shown (+1 the actual pid to let zero be displayed)
743  * @pos: The position of the file
744  *
745  * This is used by the seq_file "next" operation to iterate the pids
746  * listed in a trace_pid_list structure.
747  *
748  * Returns the pid+1 as we want to display pid of zero, but NULL would
749  * stop the iteration.
750  */
751 void *trace_pid_next(struct trace_pid_list *pid_list, void *v, loff_t *pos)
752 {
753 	long pid = (unsigned long)v;
754 	unsigned int next;
755 
756 	(*pos)++;
757 
758 	/* pid already is +1 of the actual previous bit */
759 	if (trace_pid_list_next(pid_list, pid, &next) < 0)
760 		return NULL;
761 
762 	pid = next;
763 
764 	/* Return pid + 1 to allow zero to be represented */
765 	return (void *)(pid + 1);
766 }
767 
768 /**
769  * trace_pid_start - Used for seq_file to start reading pid lists
770  * @pid_list: The pid list to show
771  * @pos: The position of the file
772  *
773  * This is used by seq_file "start" operation to start the iteration
774  * of listing pids.
775  *
776  * Returns the pid+1 as we want to display pid of zero, but NULL would
777  * stop the iteration.
778  */
779 void *trace_pid_start(struct trace_pid_list *pid_list, loff_t *pos)
780 {
781 	unsigned long pid;
782 	unsigned int first;
783 	loff_t l = 0;
784 
785 	if (trace_pid_list_first(pid_list, &first) < 0)
786 		return NULL;
787 
788 	pid = first;
789 
790 	/* Return pid + 1 so that zero can be the exit value */
791 	for (pid++; pid && l < *pos;
792 	     pid = (unsigned long)trace_pid_next(pid_list, (void *)pid, &l))
793 		;
794 	return (void *)pid;
795 }
796 
797 /**
798  * trace_pid_show - show the current pid in seq_file processing
799  * @m: The seq_file structure to write into
800  * @v: A void pointer of the pid (+1) value to display
801  *
802  * Can be directly used by seq_file operations to display the current
803  * pid value.
804  */
805 int trace_pid_show(struct seq_file *m, void *v)
806 {
807 	unsigned long pid = (unsigned long)v - 1;
808 
809 	seq_printf(m, "%lu\n", pid);
810 	return 0;
811 }
812 
813 /* 128 should be much more than enough */
814 #define PID_BUF_SIZE		127
815 
816 int trace_pid_write(struct trace_pid_list *filtered_pids,
817 		    struct trace_pid_list **new_pid_list,
818 		    const char __user *ubuf, size_t cnt)
819 {
820 	struct trace_pid_list *pid_list;
821 	struct trace_parser parser;
822 	unsigned long val;
823 	int nr_pids = 0;
824 	ssize_t read = 0;
825 	ssize_t ret;
826 	loff_t pos;
827 	pid_t pid;
828 
829 	if (trace_parser_get_init(&parser, PID_BUF_SIZE + 1))
830 		return -ENOMEM;
831 
832 	/*
833 	 * Always recreate a new array. The write is an all or nothing
834 	 * operation. Always create a new array when adding new pids by
835 	 * the user. If the operation fails, then the current list is
836 	 * not modified.
837 	 */
838 	pid_list = trace_pid_list_alloc();
839 	if (!pid_list) {
840 		trace_parser_put(&parser);
841 		return -ENOMEM;
842 	}
843 
844 	if (filtered_pids) {
845 		/* copy the current bits to the new max */
846 		ret = trace_pid_list_first(filtered_pids, &pid);
847 		while (!ret) {
848 			trace_pid_list_set(pid_list, pid);
849 			ret = trace_pid_list_next(filtered_pids, pid + 1, &pid);
850 			nr_pids++;
851 		}
852 	}
853 
854 	ret = 0;
855 	while (cnt > 0) {
856 
857 		pos = 0;
858 
859 		ret = trace_get_user(&parser, ubuf, cnt, &pos);
860 		if (ret < 0)
861 			break;
862 
863 		read += ret;
864 		ubuf += ret;
865 		cnt -= ret;
866 
867 		if (!trace_parser_loaded(&parser))
868 			break;
869 
870 		ret = -EINVAL;
871 		if (kstrtoul(parser.buffer, 0, &val))
872 			break;
873 
874 		pid = (pid_t)val;
875 
876 		if (trace_pid_list_set(pid_list, pid) < 0) {
877 			ret = -1;
878 			break;
879 		}
880 		nr_pids++;
881 
882 		trace_parser_clear(&parser);
883 		ret = 0;
884 	}
885 	trace_parser_put(&parser);
886 
887 	if (ret < 0) {
888 		trace_pid_list_free(pid_list);
889 		return ret;
890 	}
891 
892 	if (!nr_pids) {
893 		/* Cleared the list of pids */
894 		trace_pid_list_free(pid_list);
895 		pid_list = NULL;
896 	}
897 
898 	*new_pid_list = pid_list;
899 
900 	return read;
901 }
902 
903 static u64 buffer_ftrace_now(struct array_buffer *buf, int cpu)
904 {
905 	u64 ts;
906 
907 	/* Early boot up does not have a buffer yet */
908 	if (!buf->buffer)
909 		return trace_clock_local();
910 
911 	ts = ring_buffer_time_stamp(buf->buffer);
912 	ring_buffer_normalize_time_stamp(buf->buffer, cpu, &ts);
913 
914 	return ts;
915 }
916 
917 u64 ftrace_now(int cpu)
918 {
919 	return buffer_ftrace_now(&global_trace.array_buffer, cpu);
920 }
921 
922 /**
923  * tracing_is_enabled - Show if global_trace has been enabled
924  *
925  * Shows if the global trace has been enabled or not. It uses the
926  * mirror flag "buffer_disabled" to be used in fast paths such as for
927  * the irqsoff tracer. But it may be inaccurate due to races. If you
928  * need to know the accurate state, use tracing_is_on() which is a little
929  * slower, but accurate.
930  */
931 int tracing_is_enabled(void)
932 {
933 	/*
934 	 * For quick access (irqsoff uses this in fast path), just
935 	 * return the mirror variable of the state of the ring buffer.
936 	 * It's a little racy, but we don't really care.
937 	 */
938 	smp_rmb();
939 	return !global_trace.buffer_disabled;
940 }
941 
942 /*
943  * trace_buf_size is the size in bytes that is allocated
944  * for a buffer. Note, the number of bytes is always rounded
945  * to page size.
946  *
947  * This number is purposely set to a low number of 16384.
948  * If the dump on oops happens, it will be much appreciated
949  * to not have to wait for all that output. Anyway this can be
950  * boot time and run time configurable.
951  */
952 #define TRACE_BUF_SIZE_DEFAULT	1441792UL /* 16384 * 88 (sizeof(entry)) */
953 
954 static unsigned long		trace_buf_size = TRACE_BUF_SIZE_DEFAULT;
955 
956 /* trace_types holds a link list of available tracers. */
957 static struct tracer		*trace_types __read_mostly;
958 
959 /*
960  * trace_types_lock is used to protect the trace_types list.
961  */
962 DEFINE_MUTEX(trace_types_lock);
963 
964 /*
965  * serialize the access of the ring buffer
966  *
967  * ring buffer serializes readers, but it is low level protection.
968  * The validity of the events (which returns by ring_buffer_peek() ..etc)
969  * are not protected by ring buffer.
970  *
971  * The content of events may become garbage if we allow other process consumes
972  * these events concurrently:
973  *   A) the page of the consumed events may become a normal page
974  *      (not reader page) in ring buffer, and this page will be rewritten
975  *      by events producer.
976  *   B) The page of the consumed events may become a page for splice_read,
977  *      and this page will be returned to system.
978  *
979  * These primitives allow multi process access to different cpu ring buffer
980  * concurrently.
981  *
982  * These primitives don't distinguish read-only and read-consume access.
983  * Multi read-only access are also serialized.
984  */
985 
986 #ifdef CONFIG_SMP
987 static DECLARE_RWSEM(all_cpu_access_lock);
988 static DEFINE_PER_CPU(struct mutex, cpu_access_lock);
989 
990 static inline void trace_access_lock(int cpu)
991 {
992 	if (cpu == RING_BUFFER_ALL_CPUS) {
993 		/* gain it for accessing the whole ring buffer. */
994 		down_write(&all_cpu_access_lock);
995 	} else {
996 		/* gain it for accessing a cpu ring buffer. */
997 
998 		/* Firstly block other trace_access_lock(RING_BUFFER_ALL_CPUS). */
999 		down_read(&all_cpu_access_lock);
1000 
1001 		/* Secondly block other access to this @cpu ring buffer. */
1002 		mutex_lock(&per_cpu(cpu_access_lock, cpu));
1003 	}
1004 }
1005 
1006 static inline void trace_access_unlock(int cpu)
1007 {
1008 	if (cpu == RING_BUFFER_ALL_CPUS) {
1009 		up_write(&all_cpu_access_lock);
1010 	} else {
1011 		mutex_unlock(&per_cpu(cpu_access_lock, cpu));
1012 		up_read(&all_cpu_access_lock);
1013 	}
1014 }
1015 
1016 static inline void trace_access_lock_init(void)
1017 {
1018 	int cpu;
1019 
1020 	for_each_possible_cpu(cpu)
1021 		mutex_init(&per_cpu(cpu_access_lock, cpu));
1022 }
1023 
1024 #else
1025 
1026 static DEFINE_MUTEX(access_lock);
1027 
1028 static inline void trace_access_lock(int cpu)
1029 {
1030 	(void)cpu;
1031 	mutex_lock(&access_lock);
1032 }
1033 
1034 static inline void trace_access_unlock(int cpu)
1035 {
1036 	(void)cpu;
1037 	mutex_unlock(&access_lock);
1038 }
1039 
1040 static inline void trace_access_lock_init(void)
1041 {
1042 }
1043 
1044 #endif
1045 
1046 #ifdef CONFIG_STACKTRACE
1047 static void __ftrace_trace_stack(struct trace_array *tr,
1048 				 struct trace_buffer *buffer,
1049 				 unsigned int trace_ctx,
1050 				 int skip, struct pt_regs *regs);
1051 static inline void ftrace_trace_stack(struct trace_array *tr,
1052 				      struct trace_buffer *buffer,
1053 				      unsigned int trace_ctx,
1054 				      int skip, struct pt_regs *regs);
1055 
1056 #else
1057 static inline void __ftrace_trace_stack(struct trace_array *tr,
1058 					struct trace_buffer *buffer,
1059 					unsigned int trace_ctx,
1060 					int skip, struct pt_regs *regs)
1061 {
1062 }
1063 static inline void ftrace_trace_stack(struct trace_array *tr,
1064 				      struct trace_buffer *buffer,
1065 				      unsigned long trace_ctx,
1066 				      int skip, struct pt_regs *regs)
1067 {
1068 }
1069 
1070 #endif
1071 
1072 static __always_inline void
1073 trace_event_setup(struct ring_buffer_event *event,
1074 		  int type, unsigned int trace_ctx)
1075 {
1076 	struct trace_entry *ent = ring_buffer_event_data(event);
1077 
1078 	tracing_generic_entry_update(ent, type, trace_ctx);
1079 }
1080 
1081 static __always_inline struct ring_buffer_event *
1082 __trace_buffer_lock_reserve(struct trace_buffer *buffer,
1083 			  int type,
1084 			  unsigned long len,
1085 			  unsigned int trace_ctx)
1086 {
1087 	struct ring_buffer_event *event;
1088 
1089 	event = ring_buffer_lock_reserve(buffer, len);
1090 	if (event != NULL)
1091 		trace_event_setup(event, type, trace_ctx);
1092 
1093 	return event;
1094 }
1095 
1096 void tracer_tracing_on(struct trace_array *tr)
1097 {
1098 	if (tr->array_buffer.buffer)
1099 		ring_buffer_record_on(tr->array_buffer.buffer);
1100 	/*
1101 	 * This flag is looked at when buffers haven't been allocated
1102 	 * yet, or by some tracers (like irqsoff), that just want to
1103 	 * know if the ring buffer has been disabled, but it can handle
1104 	 * races of where it gets disabled but we still do a record.
1105 	 * As the check is in the fast path of the tracers, it is more
1106 	 * important to be fast than accurate.
1107 	 */
1108 	tr->buffer_disabled = 0;
1109 	/* Make the flag seen by readers */
1110 	smp_wmb();
1111 }
1112 
1113 /**
1114  * tracing_on - enable tracing buffers
1115  *
1116  * This function enables tracing buffers that may have been
1117  * disabled with tracing_off.
1118  */
1119 void tracing_on(void)
1120 {
1121 	tracer_tracing_on(&global_trace);
1122 }
1123 EXPORT_SYMBOL_GPL(tracing_on);
1124 
1125 
1126 static __always_inline void
1127 __buffer_unlock_commit(struct trace_buffer *buffer, struct ring_buffer_event *event)
1128 {
1129 	__this_cpu_write(trace_taskinfo_save, true);
1130 
1131 	/* If this is the temp buffer, we need to commit fully */
1132 	if (this_cpu_read(trace_buffered_event) == event) {
1133 		/* Length is in event->array[0] */
1134 		ring_buffer_write(buffer, event->array[0], &event->array[1]);
1135 		/* Release the temp buffer */
1136 		this_cpu_dec(trace_buffered_event_cnt);
1137 		/* ring_buffer_unlock_commit() enables preemption */
1138 		preempt_enable_notrace();
1139 	} else
1140 		ring_buffer_unlock_commit(buffer);
1141 }
1142 
1143 int __trace_array_puts(struct trace_array *tr, unsigned long ip,
1144 		       const char *str, int size)
1145 {
1146 	struct ring_buffer_event *event;
1147 	struct trace_buffer *buffer;
1148 	struct print_entry *entry;
1149 	unsigned int trace_ctx;
1150 	int alloc;
1151 
1152 	if (!(tr->trace_flags & TRACE_ITER_PRINTK))
1153 		return 0;
1154 
1155 	if (unlikely(tracing_selftest_running && tr == &global_trace))
1156 		return 0;
1157 
1158 	if (unlikely(tracing_disabled))
1159 		return 0;
1160 
1161 	alloc = sizeof(*entry) + size + 2; /* possible \n added */
1162 
1163 	trace_ctx = tracing_gen_ctx();
1164 	buffer = tr->array_buffer.buffer;
1165 	ring_buffer_nest_start(buffer);
1166 	event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, alloc,
1167 					    trace_ctx);
1168 	if (!event) {
1169 		size = 0;
1170 		goto out;
1171 	}
1172 
1173 	entry = ring_buffer_event_data(event);
1174 	entry->ip = ip;
1175 
1176 	memcpy(&entry->buf, str, size);
1177 
1178 	/* Add a newline if necessary */
1179 	if (entry->buf[size - 1] != '\n') {
1180 		entry->buf[size] = '\n';
1181 		entry->buf[size + 1] = '\0';
1182 	} else
1183 		entry->buf[size] = '\0';
1184 
1185 	__buffer_unlock_commit(buffer, event);
1186 	ftrace_trace_stack(tr, buffer, trace_ctx, 4, NULL);
1187  out:
1188 	ring_buffer_nest_end(buffer);
1189 	return size;
1190 }
1191 EXPORT_SYMBOL_GPL(__trace_array_puts);
1192 
1193 /**
1194  * __trace_puts - write a constant string into the trace buffer.
1195  * @ip:	   The address of the caller
1196  * @str:   The constant string to write
1197  * @size:  The size of the string.
1198  */
1199 int __trace_puts(unsigned long ip, const char *str, int size)
1200 {
1201 	return __trace_array_puts(printk_trace, ip, str, size);
1202 }
1203 EXPORT_SYMBOL_GPL(__trace_puts);
1204 
1205 /**
1206  * __trace_bputs - write the pointer to a constant string into trace buffer
1207  * @ip:	   The address of the caller
1208  * @str:   The constant string to write to the buffer to
1209  */
1210 int __trace_bputs(unsigned long ip, const char *str)
1211 {
1212 	struct trace_array *tr = READ_ONCE(printk_trace);
1213 	struct ring_buffer_event *event;
1214 	struct trace_buffer *buffer;
1215 	struct bputs_entry *entry;
1216 	unsigned int trace_ctx;
1217 	int size = sizeof(struct bputs_entry);
1218 	int ret = 0;
1219 
1220 	if (!printk_binsafe(tr))
1221 		return __trace_puts(ip, str, strlen(str));
1222 
1223 	if (!(tr->trace_flags & TRACE_ITER_PRINTK))
1224 		return 0;
1225 
1226 	if (unlikely(tracing_selftest_running || tracing_disabled))
1227 		return 0;
1228 
1229 	trace_ctx = tracing_gen_ctx();
1230 	buffer = tr->array_buffer.buffer;
1231 
1232 	ring_buffer_nest_start(buffer);
1233 	event = __trace_buffer_lock_reserve(buffer, TRACE_BPUTS, size,
1234 					    trace_ctx);
1235 	if (!event)
1236 		goto out;
1237 
1238 	entry = ring_buffer_event_data(event);
1239 	entry->ip			= ip;
1240 	entry->str			= str;
1241 
1242 	__buffer_unlock_commit(buffer, event);
1243 	ftrace_trace_stack(tr, buffer, trace_ctx, 4, NULL);
1244 
1245 	ret = 1;
1246  out:
1247 	ring_buffer_nest_end(buffer);
1248 	return ret;
1249 }
1250 EXPORT_SYMBOL_GPL(__trace_bputs);
1251 
1252 #ifdef CONFIG_TRACER_SNAPSHOT
1253 static void tracing_snapshot_instance_cond(struct trace_array *tr,
1254 					   void *cond_data)
1255 {
1256 	struct tracer *tracer = tr->current_trace;
1257 	unsigned long flags;
1258 
1259 	if (in_nmi()) {
1260 		trace_array_puts(tr, "*** SNAPSHOT CALLED FROM NMI CONTEXT ***\n");
1261 		trace_array_puts(tr, "*** snapshot is being ignored        ***\n");
1262 		return;
1263 	}
1264 
1265 	if (!tr->allocated_snapshot) {
1266 		trace_array_puts(tr, "*** SNAPSHOT NOT ALLOCATED ***\n");
1267 		trace_array_puts(tr, "*** stopping trace here!   ***\n");
1268 		tracer_tracing_off(tr);
1269 		return;
1270 	}
1271 
1272 	/* Note, snapshot can not be used when the tracer uses it */
1273 	if (tracer->use_max_tr) {
1274 		trace_array_puts(tr, "*** LATENCY TRACER ACTIVE ***\n");
1275 		trace_array_puts(tr, "*** Can not use snapshot (sorry) ***\n");
1276 		return;
1277 	}
1278 
1279 	if (tr->mapped) {
1280 		trace_array_puts(tr, "*** BUFFER MEMORY MAPPED ***\n");
1281 		trace_array_puts(tr, "*** Can not use snapshot (sorry) ***\n");
1282 		return;
1283 	}
1284 
1285 	local_irq_save(flags);
1286 	update_max_tr(tr, current, smp_processor_id(), cond_data);
1287 	local_irq_restore(flags);
1288 }
1289 
1290 void tracing_snapshot_instance(struct trace_array *tr)
1291 {
1292 	tracing_snapshot_instance_cond(tr, NULL);
1293 }
1294 
1295 /**
1296  * tracing_snapshot - take a snapshot of the current buffer.
1297  *
1298  * This causes a swap between the snapshot buffer and the current live
1299  * tracing buffer. You can use this to take snapshots of the live
1300  * trace when some condition is triggered, but continue to trace.
1301  *
1302  * Note, make sure to allocate the snapshot with either
1303  * a tracing_snapshot_alloc(), or by doing it manually
1304  * with: echo 1 > /sys/kernel/tracing/snapshot
1305  *
1306  * If the snapshot buffer is not allocated, it will stop tracing.
1307  * Basically making a permanent snapshot.
1308  */
1309 void tracing_snapshot(void)
1310 {
1311 	struct trace_array *tr = &global_trace;
1312 
1313 	tracing_snapshot_instance(tr);
1314 }
1315 EXPORT_SYMBOL_GPL(tracing_snapshot);
1316 
1317 /**
1318  * tracing_snapshot_cond - conditionally take a snapshot of the current buffer.
1319  * @tr:		The tracing instance to snapshot
1320  * @cond_data:	The data to be tested conditionally, and possibly saved
1321  *
1322  * This is the same as tracing_snapshot() except that the snapshot is
1323  * conditional - the snapshot will only happen if the
1324  * cond_snapshot.update() implementation receiving the cond_data
1325  * returns true, which means that the trace array's cond_snapshot
1326  * update() operation used the cond_data to determine whether the
1327  * snapshot should be taken, and if it was, presumably saved it along
1328  * with the snapshot.
1329  */
1330 void tracing_snapshot_cond(struct trace_array *tr, void *cond_data)
1331 {
1332 	tracing_snapshot_instance_cond(tr, cond_data);
1333 }
1334 EXPORT_SYMBOL_GPL(tracing_snapshot_cond);
1335 
1336 /**
1337  * tracing_cond_snapshot_data - get the user data associated with a snapshot
1338  * @tr:		The tracing instance
1339  *
1340  * When the user enables a conditional snapshot using
1341  * tracing_snapshot_cond_enable(), the user-defined cond_data is saved
1342  * with the snapshot.  This accessor is used to retrieve it.
1343  *
1344  * Should not be called from cond_snapshot.update(), since it takes
1345  * the tr->max_lock lock, which the code calling
1346  * cond_snapshot.update() has already done.
1347  *
1348  * Returns the cond_data associated with the trace array's snapshot.
1349  */
1350 void *tracing_cond_snapshot_data(struct trace_array *tr)
1351 {
1352 	void *cond_data = NULL;
1353 
1354 	local_irq_disable();
1355 	arch_spin_lock(&tr->max_lock);
1356 
1357 	if (tr->cond_snapshot)
1358 		cond_data = tr->cond_snapshot->cond_data;
1359 
1360 	arch_spin_unlock(&tr->max_lock);
1361 	local_irq_enable();
1362 
1363 	return cond_data;
1364 }
1365 EXPORT_SYMBOL_GPL(tracing_cond_snapshot_data);
1366 
1367 static int resize_buffer_duplicate_size(struct array_buffer *trace_buf,
1368 					struct array_buffer *size_buf, int cpu_id);
1369 static void set_buffer_entries(struct array_buffer *buf, unsigned long val);
1370 
1371 int tracing_alloc_snapshot_instance(struct trace_array *tr)
1372 {
1373 	int order;
1374 	int ret;
1375 
1376 	if (!tr->allocated_snapshot) {
1377 
1378 		/* Make the snapshot buffer have the same order as main buffer */
1379 		order = ring_buffer_subbuf_order_get(tr->array_buffer.buffer);
1380 		ret = ring_buffer_subbuf_order_set(tr->max_buffer.buffer, order);
1381 		if (ret < 0)
1382 			return ret;
1383 
1384 		/* allocate spare buffer */
1385 		ret = resize_buffer_duplicate_size(&tr->max_buffer,
1386 				   &tr->array_buffer, RING_BUFFER_ALL_CPUS);
1387 		if (ret < 0)
1388 			return ret;
1389 
1390 		tr->allocated_snapshot = true;
1391 	}
1392 
1393 	return 0;
1394 }
1395 
1396 static void free_snapshot(struct trace_array *tr)
1397 {
1398 	/*
1399 	 * We don't free the ring buffer. instead, resize it because
1400 	 * The max_tr ring buffer has some state (e.g. ring->clock) and
1401 	 * we want preserve it.
1402 	 */
1403 	ring_buffer_subbuf_order_set(tr->max_buffer.buffer, 0);
1404 	ring_buffer_resize(tr->max_buffer.buffer, 1, RING_BUFFER_ALL_CPUS);
1405 	set_buffer_entries(&tr->max_buffer, 1);
1406 	tracing_reset_online_cpus(&tr->max_buffer);
1407 	tr->allocated_snapshot = false;
1408 }
1409 
1410 static int tracing_arm_snapshot_locked(struct trace_array *tr)
1411 {
1412 	int ret;
1413 
1414 	lockdep_assert_held(&trace_types_lock);
1415 
1416 	spin_lock(&tr->snapshot_trigger_lock);
1417 	if (tr->snapshot == UINT_MAX || tr->mapped) {
1418 		spin_unlock(&tr->snapshot_trigger_lock);
1419 		return -EBUSY;
1420 	}
1421 
1422 	tr->snapshot++;
1423 	spin_unlock(&tr->snapshot_trigger_lock);
1424 
1425 	ret = tracing_alloc_snapshot_instance(tr);
1426 	if (ret) {
1427 		spin_lock(&tr->snapshot_trigger_lock);
1428 		tr->snapshot--;
1429 		spin_unlock(&tr->snapshot_trigger_lock);
1430 	}
1431 
1432 	return ret;
1433 }
1434 
1435 int tracing_arm_snapshot(struct trace_array *tr)
1436 {
1437 	int ret;
1438 
1439 	mutex_lock(&trace_types_lock);
1440 	ret = tracing_arm_snapshot_locked(tr);
1441 	mutex_unlock(&trace_types_lock);
1442 
1443 	return ret;
1444 }
1445 
1446 void tracing_disarm_snapshot(struct trace_array *tr)
1447 {
1448 	spin_lock(&tr->snapshot_trigger_lock);
1449 	if (!WARN_ON(!tr->snapshot))
1450 		tr->snapshot--;
1451 	spin_unlock(&tr->snapshot_trigger_lock);
1452 }
1453 
1454 /**
1455  * tracing_alloc_snapshot - allocate snapshot buffer.
1456  *
1457  * This only allocates the snapshot buffer if it isn't already
1458  * allocated - it doesn't also take a snapshot.
1459  *
1460  * This is meant to be used in cases where the snapshot buffer needs
1461  * to be set up for events that can't sleep but need to be able to
1462  * trigger a snapshot.
1463  */
1464 int tracing_alloc_snapshot(void)
1465 {
1466 	struct trace_array *tr = &global_trace;
1467 	int ret;
1468 
1469 	ret = tracing_alloc_snapshot_instance(tr);
1470 	WARN_ON(ret < 0);
1471 
1472 	return ret;
1473 }
1474 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
1475 
1476 /**
1477  * tracing_snapshot_alloc - allocate and take a snapshot of the current buffer.
1478  *
1479  * This is similar to tracing_snapshot(), but it will allocate the
1480  * snapshot buffer if it isn't already allocated. Use this only
1481  * where it is safe to sleep, as the allocation may sleep.
1482  *
1483  * This causes a swap between the snapshot buffer and the current live
1484  * tracing buffer. You can use this to take snapshots of the live
1485  * trace when some condition is triggered, but continue to trace.
1486  */
1487 void tracing_snapshot_alloc(void)
1488 {
1489 	int ret;
1490 
1491 	ret = tracing_alloc_snapshot();
1492 	if (ret < 0)
1493 		return;
1494 
1495 	tracing_snapshot();
1496 }
1497 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
1498 
1499 /**
1500  * tracing_snapshot_cond_enable - enable conditional snapshot for an instance
1501  * @tr:		The tracing instance
1502  * @cond_data:	User data to associate with the snapshot
1503  * @update:	Implementation of the cond_snapshot update function
1504  *
1505  * Check whether the conditional snapshot for the given instance has
1506  * already been enabled, or if the current tracer is already using a
1507  * snapshot; if so, return -EBUSY, else create a cond_snapshot and
1508  * save the cond_data and update function inside.
1509  *
1510  * Returns 0 if successful, error otherwise.
1511  */
1512 int tracing_snapshot_cond_enable(struct trace_array *tr, void *cond_data,
1513 				 cond_update_fn_t update)
1514 {
1515 	struct cond_snapshot *cond_snapshot __free(kfree) =
1516 		kzalloc(sizeof(*cond_snapshot), GFP_KERNEL);
1517 	int ret;
1518 
1519 	if (!cond_snapshot)
1520 		return -ENOMEM;
1521 
1522 	cond_snapshot->cond_data = cond_data;
1523 	cond_snapshot->update = update;
1524 
1525 	guard(mutex)(&trace_types_lock);
1526 
1527 	if (tr->current_trace->use_max_tr)
1528 		return -EBUSY;
1529 
1530 	/*
1531 	 * The cond_snapshot can only change to NULL without the
1532 	 * trace_types_lock. We don't care if we race with it going
1533 	 * to NULL, but we want to make sure that it's not set to
1534 	 * something other than NULL when we get here, which we can
1535 	 * do safely with only holding the trace_types_lock and not
1536 	 * having to take the max_lock.
1537 	 */
1538 	if (tr->cond_snapshot)
1539 		return -EBUSY;
1540 
1541 	ret = tracing_arm_snapshot_locked(tr);
1542 	if (ret)
1543 		return ret;
1544 
1545 	local_irq_disable();
1546 	arch_spin_lock(&tr->max_lock);
1547 	tr->cond_snapshot = no_free_ptr(cond_snapshot);
1548 	arch_spin_unlock(&tr->max_lock);
1549 	local_irq_enable();
1550 
1551 	return 0;
1552 }
1553 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_enable);
1554 
1555 /**
1556  * tracing_snapshot_cond_disable - disable conditional snapshot for an instance
1557  * @tr:		The tracing instance
1558  *
1559  * Check whether the conditional snapshot for the given instance is
1560  * enabled; if so, free the cond_snapshot associated with it,
1561  * otherwise return -EINVAL.
1562  *
1563  * Returns 0 if successful, error otherwise.
1564  */
1565 int tracing_snapshot_cond_disable(struct trace_array *tr)
1566 {
1567 	int ret = 0;
1568 
1569 	local_irq_disable();
1570 	arch_spin_lock(&tr->max_lock);
1571 
1572 	if (!tr->cond_snapshot)
1573 		ret = -EINVAL;
1574 	else {
1575 		kfree(tr->cond_snapshot);
1576 		tr->cond_snapshot = NULL;
1577 	}
1578 
1579 	arch_spin_unlock(&tr->max_lock);
1580 	local_irq_enable();
1581 
1582 	tracing_disarm_snapshot(tr);
1583 
1584 	return ret;
1585 }
1586 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_disable);
1587 #else
1588 void tracing_snapshot(void)
1589 {
1590 	WARN_ONCE(1, "Snapshot feature not enabled, but internal snapshot used");
1591 }
1592 EXPORT_SYMBOL_GPL(tracing_snapshot);
1593 void tracing_snapshot_cond(struct trace_array *tr, void *cond_data)
1594 {
1595 	WARN_ONCE(1, "Snapshot feature not enabled, but internal conditional snapshot used");
1596 }
1597 EXPORT_SYMBOL_GPL(tracing_snapshot_cond);
1598 int tracing_alloc_snapshot(void)
1599 {
1600 	WARN_ONCE(1, "Snapshot feature not enabled, but snapshot allocation used");
1601 	return -ENODEV;
1602 }
1603 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
1604 void tracing_snapshot_alloc(void)
1605 {
1606 	/* Give warning */
1607 	tracing_snapshot();
1608 }
1609 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
1610 void *tracing_cond_snapshot_data(struct trace_array *tr)
1611 {
1612 	return NULL;
1613 }
1614 EXPORT_SYMBOL_GPL(tracing_cond_snapshot_data);
1615 int tracing_snapshot_cond_enable(struct trace_array *tr, void *cond_data, cond_update_fn_t update)
1616 {
1617 	return -ENODEV;
1618 }
1619 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_enable);
1620 int tracing_snapshot_cond_disable(struct trace_array *tr)
1621 {
1622 	return false;
1623 }
1624 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_disable);
1625 #define free_snapshot(tr)	do { } while (0)
1626 #define tracing_arm_snapshot_locked(tr) ({ -EBUSY; })
1627 #endif /* CONFIG_TRACER_SNAPSHOT */
1628 
1629 void tracer_tracing_off(struct trace_array *tr)
1630 {
1631 	if (tr->array_buffer.buffer)
1632 		ring_buffer_record_off(tr->array_buffer.buffer);
1633 	/*
1634 	 * This flag is looked at when buffers haven't been allocated
1635 	 * yet, or by some tracers (like irqsoff), that just want to
1636 	 * know if the ring buffer has been disabled, but it can handle
1637 	 * races of where it gets disabled but we still do a record.
1638 	 * As the check is in the fast path of the tracers, it is more
1639 	 * important to be fast than accurate.
1640 	 */
1641 	tr->buffer_disabled = 1;
1642 	/* Make the flag seen by readers */
1643 	smp_wmb();
1644 }
1645 
1646 /**
1647  * tracer_tracing_disable() - temporary disable the buffer from write
1648  * @tr: The trace array to disable its buffer for
1649  *
1650  * Expects trace_tracing_enable() to re-enable tracing.
1651  * The difference between this and tracer_tracing_off() is that this
1652  * is a counter and can nest, whereas, tracer_tracing_off() can
1653  * be called multiple times and a single trace_tracing_on() will
1654  * enable it.
1655  */
1656 void tracer_tracing_disable(struct trace_array *tr)
1657 {
1658 	if (WARN_ON_ONCE(!tr->array_buffer.buffer))
1659 		return;
1660 
1661 	ring_buffer_record_disable(tr->array_buffer.buffer);
1662 }
1663 
1664 /**
1665  * tracer_tracing_enable() - counter part of tracer_tracing_disable()
1666  * @tr: The trace array that had tracer_tracincg_disable() called on it
1667  *
1668  * This is called after tracer_tracing_disable() has been called on @tr,
1669  * when it's safe to re-enable tracing.
1670  */
1671 void tracer_tracing_enable(struct trace_array *tr)
1672 {
1673 	if (WARN_ON_ONCE(!tr->array_buffer.buffer))
1674 		return;
1675 
1676 	ring_buffer_record_enable(tr->array_buffer.buffer);
1677 }
1678 
1679 /**
1680  * tracing_off - turn off tracing buffers
1681  *
1682  * This function stops the tracing buffers from recording data.
1683  * It does not disable any overhead the tracers themselves may
1684  * be causing. This function simply causes all recording to
1685  * the ring buffers to fail.
1686  */
1687 void tracing_off(void)
1688 {
1689 	tracer_tracing_off(&global_trace);
1690 }
1691 EXPORT_SYMBOL_GPL(tracing_off);
1692 
1693 void disable_trace_on_warning(void)
1694 {
1695 	if (__disable_trace_on_warning) {
1696 		trace_array_printk_buf(global_trace.array_buffer.buffer, _THIS_IP_,
1697 			"Disabling tracing due to warning\n");
1698 		tracing_off();
1699 	}
1700 }
1701 
1702 /**
1703  * tracer_tracing_is_on - show real state of ring buffer enabled
1704  * @tr : the trace array to know if ring buffer is enabled
1705  *
1706  * Shows real state of the ring buffer if it is enabled or not.
1707  */
1708 bool tracer_tracing_is_on(struct trace_array *tr)
1709 {
1710 	if (tr->array_buffer.buffer)
1711 		return ring_buffer_record_is_set_on(tr->array_buffer.buffer);
1712 	return !tr->buffer_disabled;
1713 }
1714 
1715 /**
1716  * tracing_is_on - show state of ring buffers enabled
1717  */
1718 int tracing_is_on(void)
1719 {
1720 	return tracer_tracing_is_on(&global_trace);
1721 }
1722 EXPORT_SYMBOL_GPL(tracing_is_on);
1723 
1724 static int __init set_buf_size(char *str)
1725 {
1726 	unsigned long buf_size;
1727 
1728 	if (!str)
1729 		return 0;
1730 	buf_size = memparse(str, &str);
1731 	/*
1732 	 * nr_entries can not be zero and the startup
1733 	 * tests require some buffer space. Therefore
1734 	 * ensure we have at least 4096 bytes of buffer.
1735 	 */
1736 	trace_buf_size = max(4096UL, buf_size);
1737 	return 1;
1738 }
1739 __setup("trace_buf_size=", set_buf_size);
1740 
1741 static int __init set_tracing_thresh(char *str)
1742 {
1743 	unsigned long threshold;
1744 	int ret;
1745 
1746 	if (!str)
1747 		return 0;
1748 	ret = kstrtoul(str, 0, &threshold);
1749 	if (ret < 0)
1750 		return 0;
1751 	tracing_thresh = threshold * 1000;
1752 	return 1;
1753 }
1754 __setup("tracing_thresh=", set_tracing_thresh);
1755 
1756 unsigned long nsecs_to_usecs(unsigned long nsecs)
1757 {
1758 	return nsecs / 1000;
1759 }
1760 
1761 /*
1762  * TRACE_FLAGS is defined as a tuple matching bit masks with strings.
1763  * It uses C(a, b) where 'a' is the eval (enum) name and 'b' is the string that
1764  * matches it. By defining "C(a, b) b", TRACE_FLAGS becomes a list
1765  * of strings in the order that the evals (enum) were defined.
1766  */
1767 #undef C
1768 #define C(a, b) b
1769 
1770 /* These must match the bit positions in trace_iterator_flags */
1771 static const char *trace_options[] = {
1772 	TRACE_FLAGS
1773 	NULL
1774 };
1775 
1776 static struct {
1777 	u64 (*func)(void);
1778 	const char *name;
1779 	int in_ns;		/* is this clock in nanoseconds? */
1780 } trace_clocks[] = {
1781 	{ trace_clock_local,		"local",	1 },
1782 	{ trace_clock_global,		"global",	1 },
1783 	{ trace_clock_counter,		"counter",	0 },
1784 	{ trace_clock_jiffies,		"uptime",	0 },
1785 	{ trace_clock,			"perf",		1 },
1786 	{ ktime_get_mono_fast_ns,	"mono",		1 },
1787 	{ ktime_get_raw_fast_ns,	"mono_raw",	1 },
1788 	{ ktime_get_boot_fast_ns,	"boot",		1 },
1789 	{ ktime_get_tai_fast_ns,	"tai",		1 },
1790 	ARCH_TRACE_CLOCKS
1791 };
1792 
1793 bool trace_clock_in_ns(struct trace_array *tr)
1794 {
1795 	if (trace_clocks[tr->clock_id].in_ns)
1796 		return true;
1797 
1798 	return false;
1799 }
1800 
1801 /*
1802  * trace_parser_get_init - gets the buffer for trace parser
1803  */
1804 int trace_parser_get_init(struct trace_parser *parser, int size)
1805 {
1806 	memset(parser, 0, sizeof(*parser));
1807 
1808 	parser->buffer = kmalloc(size, GFP_KERNEL);
1809 	if (!parser->buffer)
1810 		return 1;
1811 
1812 	parser->size = size;
1813 	return 0;
1814 }
1815 
1816 /*
1817  * trace_parser_put - frees the buffer for trace parser
1818  */
1819 void trace_parser_put(struct trace_parser *parser)
1820 {
1821 	kfree(parser->buffer);
1822 	parser->buffer = NULL;
1823 }
1824 
1825 /*
1826  * trace_get_user - reads the user input string separated by  space
1827  * (matched by isspace(ch))
1828  *
1829  * For each string found the 'struct trace_parser' is updated,
1830  * and the function returns.
1831  *
1832  * Returns number of bytes read.
1833  *
1834  * See kernel/trace/trace.h for 'struct trace_parser' details.
1835  */
1836 int trace_get_user(struct trace_parser *parser, const char __user *ubuf,
1837 	size_t cnt, loff_t *ppos)
1838 {
1839 	char ch;
1840 	size_t read = 0;
1841 	ssize_t ret;
1842 
1843 	if (!*ppos)
1844 		trace_parser_clear(parser);
1845 
1846 	ret = get_user(ch, ubuf++);
1847 	if (ret)
1848 		goto out;
1849 
1850 	read++;
1851 	cnt--;
1852 
1853 	/*
1854 	 * The parser is not finished with the last write,
1855 	 * continue reading the user input without skipping spaces.
1856 	 */
1857 	if (!parser->cont) {
1858 		/* skip white space */
1859 		while (cnt && isspace(ch)) {
1860 			ret = get_user(ch, ubuf++);
1861 			if (ret)
1862 				goto out;
1863 			read++;
1864 			cnt--;
1865 		}
1866 
1867 		parser->idx = 0;
1868 
1869 		/* only spaces were written */
1870 		if (isspace(ch) || !ch) {
1871 			*ppos += read;
1872 			ret = read;
1873 			goto out;
1874 		}
1875 	}
1876 
1877 	/* read the non-space input */
1878 	while (cnt && !isspace(ch) && ch) {
1879 		if (parser->idx < parser->size - 1)
1880 			parser->buffer[parser->idx++] = ch;
1881 		else {
1882 			ret = -EINVAL;
1883 			goto out;
1884 		}
1885 		ret = get_user(ch, ubuf++);
1886 		if (ret)
1887 			goto out;
1888 		read++;
1889 		cnt--;
1890 	}
1891 
1892 	/* We either got finished input or we have to wait for another call. */
1893 	if (isspace(ch) || !ch) {
1894 		parser->buffer[parser->idx] = 0;
1895 		parser->cont = false;
1896 	} else if (parser->idx < parser->size - 1) {
1897 		parser->cont = true;
1898 		parser->buffer[parser->idx++] = ch;
1899 		/* Make sure the parsed string always terminates with '\0'. */
1900 		parser->buffer[parser->idx] = 0;
1901 	} else {
1902 		ret = -EINVAL;
1903 		goto out;
1904 	}
1905 
1906 	*ppos += read;
1907 	ret = read;
1908 
1909 out:
1910 	return ret;
1911 }
1912 
1913 /* TODO add a seq_buf_to_buffer() */
1914 static ssize_t trace_seq_to_buffer(struct trace_seq *s, void *buf, size_t cnt)
1915 {
1916 	int len;
1917 
1918 	if (trace_seq_used(s) <= s->readpos)
1919 		return -EBUSY;
1920 
1921 	len = trace_seq_used(s) - s->readpos;
1922 	if (cnt > len)
1923 		cnt = len;
1924 	memcpy(buf, s->buffer + s->readpos, cnt);
1925 
1926 	s->readpos += cnt;
1927 	return cnt;
1928 }
1929 
1930 unsigned long __read_mostly	tracing_thresh;
1931 
1932 #ifdef CONFIG_TRACER_MAX_TRACE
1933 static const struct file_operations tracing_max_lat_fops;
1934 
1935 #ifdef LATENCY_FS_NOTIFY
1936 
1937 static struct workqueue_struct *fsnotify_wq;
1938 
1939 static void latency_fsnotify_workfn(struct work_struct *work)
1940 {
1941 	struct trace_array *tr = container_of(work, struct trace_array,
1942 					      fsnotify_work);
1943 	fsnotify_inode(tr->d_max_latency->d_inode, FS_MODIFY);
1944 }
1945 
1946 static void latency_fsnotify_workfn_irq(struct irq_work *iwork)
1947 {
1948 	struct trace_array *tr = container_of(iwork, struct trace_array,
1949 					      fsnotify_irqwork);
1950 	queue_work(fsnotify_wq, &tr->fsnotify_work);
1951 }
1952 
1953 static void trace_create_maxlat_file(struct trace_array *tr,
1954 				     struct dentry *d_tracer)
1955 {
1956 	INIT_WORK(&tr->fsnotify_work, latency_fsnotify_workfn);
1957 	init_irq_work(&tr->fsnotify_irqwork, latency_fsnotify_workfn_irq);
1958 	tr->d_max_latency = trace_create_file("tracing_max_latency",
1959 					      TRACE_MODE_WRITE,
1960 					      d_tracer, tr,
1961 					      &tracing_max_lat_fops);
1962 }
1963 
1964 __init static int latency_fsnotify_init(void)
1965 {
1966 	fsnotify_wq = alloc_workqueue("tr_max_lat_wq",
1967 				      WQ_UNBOUND | WQ_HIGHPRI, 0);
1968 	if (!fsnotify_wq) {
1969 		pr_err("Unable to allocate tr_max_lat_wq\n");
1970 		return -ENOMEM;
1971 	}
1972 	return 0;
1973 }
1974 
1975 late_initcall_sync(latency_fsnotify_init);
1976 
1977 void latency_fsnotify(struct trace_array *tr)
1978 {
1979 	if (!fsnotify_wq)
1980 		return;
1981 	/*
1982 	 * We cannot call queue_work(&tr->fsnotify_work) from here because it's
1983 	 * possible that we are called from __schedule() or do_idle(), which
1984 	 * could cause a deadlock.
1985 	 */
1986 	irq_work_queue(&tr->fsnotify_irqwork);
1987 }
1988 
1989 #else /* !LATENCY_FS_NOTIFY */
1990 
1991 #define trace_create_maxlat_file(tr, d_tracer)				\
1992 	trace_create_file("tracing_max_latency", TRACE_MODE_WRITE,	\
1993 			  d_tracer, tr, &tracing_max_lat_fops)
1994 
1995 #endif
1996 
1997 /*
1998  * Copy the new maximum trace into the separate maximum-trace
1999  * structure. (this way the maximum trace is permanently saved,
2000  * for later retrieval via /sys/kernel/tracing/tracing_max_latency)
2001  */
2002 static void
2003 __update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
2004 {
2005 	struct array_buffer *trace_buf = &tr->array_buffer;
2006 	struct array_buffer *max_buf = &tr->max_buffer;
2007 	struct trace_array_cpu *data = per_cpu_ptr(trace_buf->data, cpu);
2008 	struct trace_array_cpu *max_data = per_cpu_ptr(max_buf->data, cpu);
2009 
2010 	max_buf->cpu = cpu;
2011 	max_buf->time_start = data->preempt_timestamp;
2012 
2013 	max_data->saved_latency = tr->max_latency;
2014 	max_data->critical_start = data->critical_start;
2015 	max_data->critical_end = data->critical_end;
2016 
2017 	strscpy(max_data->comm, tsk->comm);
2018 	max_data->pid = tsk->pid;
2019 	/*
2020 	 * If tsk == current, then use current_uid(), as that does not use
2021 	 * RCU. The irq tracer can be called out of RCU scope.
2022 	 */
2023 	if (tsk == current)
2024 		max_data->uid = current_uid();
2025 	else
2026 		max_data->uid = task_uid(tsk);
2027 
2028 	max_data->nice = tsk->static_prio - 20 - MAX_RT_PRIO;
2029 	max_data->policy = tsk->policy;
2030 	max_data->rt_priority = tsk->rt_priority;
2031 
2032 	/* record this tasks comm */
2033 	tracing_record_cmdline(tsk);
2034 	latency_fsnotify(tr);
2035 }
2036 
2037 /**
2038  * update_max_tr - snapshot all trace buffers from global_trace to max_tr
2039  * @tr: tracer
2040  * @tsk: the task with the latency
2041  * @cpu: The cpu that initiated the trace.
2042  * @cond_data: User data associated with a conditional snapshot
2043  *
2044  * Flip the buffers between the @tr and the max_tr and record information
2045  * about which task was the cause of this latency.
2046  */
2047 void
2048 update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu,
2049 	      void *cond_data)
2050 {
2051 	if (tr->stop_count)
2052 		return;
2053 
2054 	WARN_ON_ONCE(!irqs_disabled());
2055 
2056 	if (!tr->allocated_snapshot) {
2057 		/* Only the nop tracer should hit this when disabling */
2058 		WARN_ON_ONCE(tr->current_trace != &nop_trace);
2059 		return;
2060 	}
2061 
2062 	arch_spin_lock(&tr->max_lock);
2063 
2064 	/* Inherit the recordable setting from array_buffer */
2065 	if (ring_buffer_record_is_set_on(tr->array_buffer.buffer))
2066 		ring_buffer_record_on(tr->max_buffer.buffer);
2067 	else
2068 		ring_buffer_record_off(tr->max_buffer.buffer);
2069 
2070 #ifdef CONFIG_TRACER_SNAPSHOT
2071 	if (tr->cond_snapshot && !tr->cond_snapshot->update(tr, cond_data)) {
2072 		arch_spin_unlock(&tr->max_lock);
2073 		return;
2074 	}
2075 #endif
2076 	swap(tr->array_buffer.buffer, tr->max_buffer.buffer);
2077 
2078 	__update_max_tr(tr, tsk, cpu);
2079 
2080 	arch_spin_unlock(&tr->max_lock);
2081 
2082 	/* Any waiters on the old snapshot buffer need to wake up */
2083 	ring_buffer_wake_waiters(tr->array_buffer.buffer, RING_BUFFER_ALL_CPUS);
2084 }
2085 
2086 /**
2087  * update_max_tr_single - only copy one trace over, and reset the rest
2088  * @tr: tracer
2089  * @tsk: task with the latency
2090  * @cpu: the cpu of the buffer to copy.
2091  *
2092  * Flip the trace of a single CPU buffer between the @tr and the max_tr.
2093  */
2094 void
2095 update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu)
2096 {
2097 	int ret;
2098 
2099 	if (tr->stop_count)
2100 		return;
2101 
2102 	WARN_ON_ONCE(!irqs_disabled());
2103 	if (!tr->allocated_snapshot) {
2104 		/* Only the nop tracer should hit this when disabling */
2105 		WARN_ON_ONCE(tr->current_trace != &nop_trace);
2106 		return;
2107 	}
2108 
2109 	arch_spin_lock(&tr->max_lock);
2110 
2111 	ret = ring_buffer_swap_cpu(tr->max_buffer.buffer, tr->array_buffer.buffer, cpu);
2112 
2113 	if (ret == -EBUSY) {
2114 		/*
2115 		 * We failed to swap the buffer due to a commit taking
2116 		 * place on this CPU. We fail to record, but we reset
2117 		 * the max trace buffer (no one writes directly to it)
2118 		 * and flag that it failed.
2119 		 * Another reason is resize is in progress.
2120 		 */
2121 		trace_array_printk_buf(tr->max_buffer.buffer, _THIS_IP_,
2122 			"Failed to swap buffers due to commit or resize in progress\n");
2123 	}
2124 
2125 	WARN_ON_ONCE(ret && ret != -EAGAIN && ret != -EBUSY);
2126 
2127 	__update_max_tr(tr, tsk, cpu);
2128 	arch_spin_unlock(&tr->max_lock);
2129 }
2130 
2131 #endif /* CONFIG_TRACER_MAX_TRACE */
2132 
2133 struct pipe_wait {
2134 	struct trace_iterator		*iter;
2135 	int				wait_index;
2136 };
2137 
2138 static bool wait_pipe_cond(void *data)
2139 {
2140 	struct pipe_wait *pwait = data;
2141 	struct trace_iterator *iter = pwait->iter;
2142 
2143 	if (atomic_read_acquire(&iter->wait_index) != pwait->wait_index)
2144 		return true;
2145 
2146 	return iter->closed;
2147 }
2148 
2149 static int wait_on_pipe(struct trace_iterator *iter, int full)
2150 {
2151 	struct pipe_wait pwait;
2152 	int ret;
2153 
2154 	/* Iterators are static, they should be filled or empty */
2155 	if (trace_buffer_iter(iter, iter->cpu_file))
2156 		return 0;
2157 
2158 	pwait.wait_index = atomic_read_acquire(&iter->wait_index);
2159 	pwait.iter = iter;
2160 
2161 	ret = ring_buffer_wait(iter->array_buffer->buffer, iter->cpu_file, full,
2162 			       wait_pipe_cond, &pwait);
2163 
2164 #ifdef CONFIG_TRACER_MAX_TRACE
2165 	/*
2166 	 * Make sure this is still the snapshot buffer, as if a snapshot were
2167 	 * to happen, this would now be the main buffer.
2168 	 */
2169 	if (iter->snapshot)
2170 		iter->array_buffer = &iter->tr->max_buffer;
2171 #endif
2172 	return ret;
2173 }
2174 
2175 #ifdef CONFIG_FTRACE_STARTUP_TEST
2176 static bool selftests_can_run;
2177 
2178 struct trace_selftests {
2179 	struct list_head		list;
2180 	struct tracer			*type;
2181 };
2182 
2183 static LIST_HEAD(postponed_selftests);
2184 
2185 static int save_selftest(struct tracer *type)
2186 {
2187 	struct trace_selftests *selftest;
2188 
2189 	selftest = kmalloc(sizeof(*selftest), GFP_KERNEL);
2190 	if (!selftest)
2191 		return -ENOMEM;
2192 
2193 	selftest->type = type;
2194 	list_add(&selftest->list, &postponed_selftests);
2195 	return 0;
2196 }
2197 
2198 static int run_tracer_selftest(struct tracer *type)
2199 {
2200 	struct trace_array *tr = &global_trace;
2201 	struct tracer *saved_tracer = tr->current_trace;
2202 	int ret;
2203 
2204 	if (!type->selftest || tracing_selftest_disabled)
2205 		return 0;
2206 
2207 	/*
2208 	 * If a tracer registers early in boot up (before scheduling is
2209 	 * initialized and such), then do not run its selftests yet.
2210 	 * Instead, run it a little later in the boot process.
2211 	 */
2212 	if (!selftests_can_run)
2213 		return save_selftest(type);
2214 
2215 	if (!tracing_is_on()) {
2216 		pr_warn("Selftest for tracer %s skipped due to tracing disabled\n",
2217 			type->name);
2218 		return 0;
2219 	}
2220 
2221 	/*
2222 	 * Run a selftest on this tracer.
2223 	 * Here we reset the trace buffer, and set the current
2224 	 * tracer to be this tracer. The tracer can then run some
2225 	 * internal tracing to verify that everything is in order.
2226 	 * If we fail, we do not register this tracer.
2227 	 */
2228 	tracing_reset_online_cpus(&tr->array_buffer);
2229 
2230 	tr->current_trace = type;
2231 
2232 #ifdef CONFIG_TRACER_MAX_TRACE
2233 	if (type->use_max_tr) {
2234 		/* If we expanded the buffers, make sure the max is expanded too */
2235 		if (tr->ring_buffer_expanded)
2236 			ring_buffer_resize(tr->max_buffer.buffer, trace_buf_size,
2237 					   RING_BUFFER_ALL_CPUS);
2238 		tr->allocated_snapshot = true;
2239 	}
2240 #endif
2241 
2242 	/* the test is responsible for initializing and enabling */
2243 	pr_info("Testing tracer %s: ", type->name);
2244 	ret = type->selftest(type, tr);
2245 	/* the test is responsible for resetting too */
2246 	tr->current_trace = saved_tracer;
2247 	if (ret) {
2248 		printk(KERN_CONT "FAILED!\n");
2249 		/* Add the warning after printing 'FAILED' */
2250 		WARN_ON(1);
2251 		return -1;
2252 	}
2253 	/* Only reset on passing, to avoid touching corrupted buffers */
2254 	tracing_reset_online_cpus(&tr->array_buffer);
2255 
2256 #ifdef CONFIG_TRACER_MAX_TRACE
2257 	if (type->use_max_tr) {
2258 		tr->allocated_snapshot = false;
2259 
2260 		/* Shrink the max buffer again */
2261 		if (tr->ring_buffer_expanded)
2262 			ring_buffer_resize(tr->max_buffer.buffer, 1,
2263 					   RING_BUFFER_ALL_CPUS);
2264 	}
2265 #endif
2266 
2267 	printk(KERN_CONT "PASSED\n");
2268 	return 0;
2269 }
2270 
2271 static int do_run_tracer_selftest(struct tracer *type)
2272 {
2273 	int ret;
2274 
2275 	/*
2276 	 * Tests can take a long time, especially if they are run one after the
2277 	 * other, as does happen during bootup when all the tracers are
2278 	 * registered. This could cause the soft lockup watchdog to trigger.
2279 	 */
2280 	cond_resched();
2281 
2282 	tracing_selftest_running = true;
2283 	ret = run_tracer_selftest(type);
2284 	tracing_selftest_running = false;
2285 
2286 	return ret;
2287 }
2288 
2289 static __init int init_trace_selftests(void)
2290 {
2291 	struct trace_selftests *p, *n;
2292 	struct tracer *t, **last;
2293 	int ret;
2294 
2295 	selftests_can_run = true;
2296 
2297 	guard(mutex)(&trace_types_lock);
2298 
2299 	if (list_empty(&postponed_selftests))
2300 		return 0;
2301 
2302 	pr_info("Running postponed tracer tests:\n");
2303 
2304 	tracing_selftest_running = true;
2305 	list_for_each_entry_safe(p, n, &postponed_selftests, list) {
2306 		/* This loop can take minutes when sanitizers are enabled, so
2307 		 * lets make sure we allow RCU processing.
2308 		 */
2309 		cond_resched();
2310 		ret = run_tracer_selftest(p->type);
2311 		/* If the test fails, then warn and remove from available_tracers */
2312 		if (ret < 0) {
2313 			WARN(1, "tracer: %s failed selftest, disabling\n",
2314 			     p->type->name);
2315 			last = &trace_types;
2316 			for (t = trace_types; t; t = t->next) {
2317 				if (t == p->type) {
2318 					*last = t->next;
2319 					break;
2320 				}
2321 				last = &t->next;
2322 			}
2323 		}
2324 		list_del(&p->list);
2325 		kfree(p);
2326 	}
2327 	tracing_selftest_running = false;
2328 
2329 	return 0;
2330 }
2331 core_initcall(init_trace_selftests);
2332 #else
2333 static inline int do_run_tracer_selftest(struct tracer *type)
2334 {
2335 	return 0;
2336 }
2337 #endif /* CONFIG_FTRACE_STARTUP_TEST */
2338 
2339 static void add_tracer_options(struct trace_array *tr, struct tracer *t);
2340 
2341 static void __init apply_trace_boot_options(void);
2342 
2343 /**
2344  * register_tracer - register a tracer with the ftrace system.
2345  * @type: the plugin for the tracer
2346  *
2347  * Register a new plugin tracer.
2348  */
2349 int __init register_tracer(struct tracer *type)
2350 {
2351 	struct tracer *t;
2352 	int ret = 0;
2353 
2354 	if (!type->name) {
2355 		pr_info("Tracer must have a name\n");
2356 		return -1;
2357 	}
2358 
2359 	if (strlen(type->name) >= MAX_TRACER_SIZE) {
2360 		pr_info("Tracer has a name longer than %d\n", MAX_TRACER_SIZE);
2361 		return -1;
2362 	}
2363 
2364 	if (security_locked_down(LOCKDOWN_TRACEFS)) {
2365 		pr_warn("Can not register tracer %s due to lockdown\n",
2366 			   type->name);
2367 		return -EPERM;
2368 	}
2369 
2370 	mutex_lock(&trace_types_lock);
2371 
2372 	for (t = trace_types; t; t = t->next) {
2373 		if (strcmp(type->name, t->name) == 0) {
2374 			/* already found */
2375 			pr_info("Tracer %s already registered\n",
2376 				type->name);
2377 			ret = -1;
2378 			goto out;
2379 		}
2380 	}
2381 
2382 	if (!type->set_flag)
2383 		type->set_flag = &dummy_set_flag;
2384 	if (!type->flags) {
2385 		/*allocate a dummy tracer_flags*/
2386 		type->flags = kmalloc(sizeof(*type->flags), GFP_KERNEL);
2387 		if (!type->flags) {
2388 			ret = -ENOMEM;
2389 			goto out;
2390 		}
2391 		type->flags->val = 0;
2392 		type->flags->opts = dummy_tracer_opt;
2393 	} else
2394 		if (!type->flags->opts)
2395 			type->flags->opts = dummy_tracer_opt;
2396 
2397 	/* store the tracer for __set_tracer_option */
2398 	type->flags->trace = type;
2399 
2400 	ret = do_run_tracer_selftest(type);
2401 	if (ret < 0)
2402 		goto out;
2403 
2404 	type->next = trace_types;
2405 	trace_types = type;
2406 	add_tracer_options(&global_trace, type);
2407 
2408  out:
2409 	mutex_unlock(&trace_types_lock);
2410 
2411 	if (ret || !default_bootup_tracer)
2412 		goto out_unlock;
2413 
2414 	if (strncmp(default_bootup_tracer, type->name, MAX_TRACER_SIZE))
2415 		goto out_unlock;
2416 
2417 	printk(KERN_INFO "Starting tracer '%s'\n", type->name);
2418 	/* Do we want this tracer to start on bootup? */
2419 	tracing_set_tracer(&global_trace, type->name);
2420 	default_bootup_tracer = NULL;
2421 
2422 	apply_trace_boot_options();
2423 
2424 	/* disable other selftests, since this will break it. */
2425 	disable_tracing_selftest("running a tracer");
2426 
2427  out_unlock:
2428 	return ret;
2429 }
2430 
2431 static void tracing_reset_cpu(struct array_buffer *buf, int cpu)
2432 {
2433 	struct trace_buffer *buffer = buf->buffer;
2434 
2435 	if (!buffer)
2436 		return;
2437 
2438 	ring_buffer_record_disable(buffer);
2439 
2440 	/* Make sure all commits have finished */
2441 	synchronize_rcu();
2442 	ring_buffer_reset_cpu(buffer, cpu);
2443 
2444 	ring_buffer_record_enable(buffer);
2445 }
2446 
2447 void tracing_reset_online_cpus(struct array_buffer *buf)
2448 {
2449 	struct trace_buffer *buffer = buf->buffer;
2450 
2451 	if (!buffer)
2452 		return;
2453 
2454 	ring_buffer_record_disable(buffer);
2455 
2456 	/* Make sure all commits have finished */
2457 	synchronize_rcu();
2458 
2459 	buf->time_start = buffer_ftrace_now(buf, buf->cpu);
2460 
2461 	ring_buffer_reset_online_cpus(buffer);
2462 
2463 	ring_buffer_record_enable(buffer);
2464 }
2465 
2466 static void tracing_reset_all_cpus(struct array_buffer *buf)
2467 {
2468 	struct trace_buffer *buffer = buf->buffer;
2469 
2470 	if (!buffer)
2471 		return;
2472 
2473 	ring_buffer_record_disable(buffer);
2474 
2475 	/* Make sure all commits have finished */
2476 	synchronize_rcu();
2477 
2478 	buf->time_start = buffer_ftrace_now(buf, buf->cpu);
2479 
2480 	ring_buffer_reset(buffer);
2481 
2482 	ring_buffer_record_enable(buffer);
2483 }
2484 
2485 /* Must have trace_types_lock held */
2486 void tracing_reset_all_online_cpus_unlocked(void)
2487 {
2488 	struct trace_array *tr;
2489 
2490 	lockdep_assert_held(&trace_types_lock);
2491 
2492 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
2493 		if (!tr->clear_trace)
2494 			continue;
2495 		tr->clear_trace = false;
2496 		tracing_reset_online_cpus(&tr->array_buffer);
2497 #ifdef CONFIG_TRACER_MAX_TRACE
2498 		tracing_reset_online_cpus(&tr->max_buffer);
2499 #endif
2500 	}
2501 }
2502 
2503 void tracing_reset_all_online_cpus(void)
2504 {
2505 	mutex_lock(&trace_types_lock);
2506 	tracing_reset_all_online_cpus_unlocked();
2507 	mutex_unlock(&trace_types_lock);
2508 }
2509 
2510 int is_tracing_stopped(void)
2511 {
2512 	return global_trace.stop_count;
2513 }
2514 
2515 static void tracing_start_tr(struct trace_array *tr)
2516 {
2517 	struct trace_buffer *buffer;
2518 	unsigned long flags;
2519 
2520 	if (tracing_disabled)
2521 		return;
2522 
2523 	raw_spin_lock_irqsave(&tr->start_lock, flags);
2524 	if (--tr->stop_count) {
2525 		if (WARN_ON_ONCE(tr->stop_count < 0)) {
2526 			/* Someone screwed up their debugging */
2527 			tr->stop_count = 0;
2528 		}
2529 		goto out;
2530 	}
2531 
2532 	/* Prevent the buffers from switching */
2533 	arch_spin_lock(&tr->max_lock);
2534 
2535 	buffer = tr->array_buffer.buffer;
2536 	if (buffer)
2537 		ring_buffer_record_enable(buffer);
2538 
2539 #ifdef CONFIG_TRACER_MAX_TRACE
2540 	buffer = tr->max_buffer.buffer;
2541 	if (buffer)
2542 		ring_buffer_record_enable(buffer);
2543 #endif
2544 
2545 	arch_spin_unlock(&tr->max_lock);
2546 
2547  out:
2548 	raw_spin_unlock_irqrestore(&tr->start_lock, flags);
2549 }
2550 
2551 /**
2552  * tracing_start - quick start of the tracer
2553  *
2554  * If tracing is enabled but was stopped by tracing_stop,
2555  * this will start the tracer back up.
2556  */
2557 void tracing_start(void)
2558 
2559 {
2560 	return tracing_start_tr(&global_trace);
2561 }
2562 
2563 static void tracing_stop_tr(struct trace_array *tr)
2564 {
2565 	struct trace_buffer *buffer;
2566 	unsigned long flags;
2567 
2568 	raw_spin_lock_irqsave(&tr->start_lock, flags);
2569 	if (tr->stop_count++)
2570 		goto out;
2571 
2572 	/* Prevent the buffers from switching */
2573 	arch_spin_lock(&tr->max_lock);
2574 
2575 	buffer = tr->array_buffer.buffer;
2576 	if (buffer)
2577 		ring_buffer_record_disable(buffer);
2578 
2579 #ifdef CONFIG_TRACER_MAX_TRACE
2580 	buffer = tr->max_buffer.buffer;
2581 	if (buffer)
2582 		ring_buffer_record_disable(buffer);
2583 #endif
2584 
2585 	arch_spin_unlock(&tr->max_lock);
2586 
2587  out:
2588 	raw_spin_unlock_irqrestore(&tr->start_lock, flags);
2589 }
2590 
2591 /**
2592  * tracing_stop - quick stop of the tracer
2593  *
2594  * Light weight way to stop tracing. Use in conjunction with
2595  * tracing_start.
2596  */
2597 void tracing_stop(void)
2598 {
2599 	return tracing_stop_tr(&global_trace);
2600 }
2601 
2602 /*
2603  * Several functions return TRACE_TYPE_PARTIAL_LINE if the trace_seq
2604  * overflowed, and TRACE_TYPE_HANDLED otherwise. This helper function
2605  * simplifies those functions and keeps them in sync.
2606  */
2607 enum print_line_t trace_handle_return(struct trace_seq *s)
2608 {
2609 	return trace_seq_has_overflowed(s) ?
2610 		TRACE_TYPE_PARTIAL_LINE : TRACE_TYPE_HANDLED;
2611 }
2612 EXPORT_SYMBOL_GPL(trace_handle_return);
2613 
2614 static unsigned short migration_disable_value(void)
2615 {
2616 #if defined(CONFIG_SMP)
2617 	return current->migration_disabled;
2618 #else
2619 	return 0;
2620 #endif
2621 }
2622 
2623 unsigned int tracing_gen_ctx_irq_test(unsigned int irqs_status)
2624 {
2625 	unsigned int trace_flags = irqs_status;
2626 	unsigned int pc;
2627 
2628 	pc = preempt_count();
2629 
2630 	if (pc & NMI_MASK)
2631 		trace_flags |= TRACE_FLAG_NMI;
2632 	if (pc & HARDIRQ_MASK)
2633 		trace_flags |= TRACE_FLAG_HARDIRQ;
2634 	if (in_serving_softirq())
2635 		trace_flags |= TRACE_FLAG_SOFTIRQ;
2636 	if (softirq_count() >> (SOFTIRQ_SHIFT + 1))
2637 		trace_flags |= TRACE_FLAG_BH_OFF;
2638 
2639 	if (tif_need_resched())
2640 		trace_flags |= TRACE_FLAG_NEED_RESCHED;
2641 	if (test_preempt_need_resched())
2642 		trace_flags |= TRACE_FLAG_PREEMPT_RESCHED;
2643 	if (IS_ENABLED(CONFIG_ARCH_HAS_PREEMPT_LAZY) && tif_test_bit(TIF_NEED_RESCHED_LAZY))
2644 		trace_flags |= TRACE_FLAG_NEED_RESCHED_LAZY;
2645 	return (trace_flags << 16) | (min_t(unsigned int, pc & 0xff, 0xf)) |
2646 		(min_t(unsigned int, migration_disable_value(), 0xf)) << 4;
2647 }
2648 
2649 struct ring_buffer_event *
2650 trace_buffer_lock_reserve(struct trace_buffer *buffer,
2651 			  int type,
2652 			  unsigned long len,
2653 			  unsigned int trace_ctx)
2654 {
2655 	return __trace_buffer_lock_reserve(buffer, type, len, trace_ctx);
2656 }
2657 
2658 DEFINE_PER_CPU(struct ring_buffer_event *, trace_buffered_event);
2659 DEFINE_PER_CPU(int, trace_buffered_event_cnt);
2660 static int trace_buffered_event_ref;
2661 
2662 /**
2663  * trace_buffered_event_enable - enable buffering events
2664  *
2665  * When events are being filtered, it is quicker to use a temporary
2666  * buffer to write the event data into if there's a likely chance
2667  * that it will not be committed. The discard of the ring buffer
2668  * is not as fast as committing, and is much slower than copying
2669  * a commit.
2670  *
2671  * When an event is to be filtered, allocate per cpu buffers to
2672  * write the event data into, and if the event is filtered and discarded
2673  * it is simply dropped, otherwise, the entire data is to be committed
2674  * in one shot.
2675  */
2676 void trace_buffered_event_enable(void)
2677 {
2678 	struct ring_buffer_event *event;
2679 	struct page *page;
2680 	int cpu;
2681 
2682 	WARN_ON_ONCE(!mutex_is_locked(&event_mutex));
2683 
2684 	if (trace_buffered_event_ref++)
2685 		return;
2686 
2687 	for_each_tracing_cpu(cpu) {
2688 		page = alloc_pages_node(cpu_to_node(cpu),
2689 					GFP_KERNEL | __GFP_NORETRY, 0);
2690 		/* This is just an optimization and can handle failures */
2691 		if (!page) {
2692 			pr_err("Failed to allocate event buffer\n");
2693 			break;
2694 		}
2695 
2696 		event = page_address(page);
2697 		memset(event, 0, sizeof(*event));
2698 
2699 		per_cpu(trace_buffered_event, cpu) = event;
2700 
2701 		preempt_disable();
2702 		if (cpu == smp_processor_id() &&
2703 		    __this_cpu_read(trace_buffered_event) !=
2704 		    per_cpu(trace_buffered_event, cpu))
2705 			WARN_ON_ONCE(1);
2706 		preempt_enable();
2707 	}
2708 }
2709 
2710 static void enable_trace_buffered_event(void *data)
2711 {
2712 	/* Probably not needed, but do it anyway */
2713 	smp_rmb();
2714 	this_cpu_dec(trace_buffered_event_cnt);
2715 }
2716 
2717 static void disable_trace_buffered_event(void *data)
2718 {
2719 	this_cpu_inc(trace_buffered_event_cnt);
2720 }
2721 
2722 /**
2723  * trace_buffered_event_disable - disable buffering events
2724  *
2725  * When a filter is removed, it is faster to not use the buffered
2726  * events, and to commit directly into the ring buffer. Free up
2727  * the temp buffers when there are no more users. This requires
2728  * special synchronization with current events.
2729  */
2730 void trace_buffered_event_disable(void)
2731 {
2732 	int cpu;
2733 
2734 	WARN_ON_ONCE(!mutex_is_locked(&event_mutex));
2735 
2736 	if (WARN_ON_ONCE(!trace_buffered_event_ref))
2737 		return;
2738 
2739 	if (--trace_buffered_event_ref)
2740 		return;
2741 
2742 	/* For each CPU, set the buffer as used. */
2743 	on_each_cpu_mask(tracing_buffer_mask, disable_trace_buffered_event,
2744 			 NULL, true);
2745 
2746 	/* Wait for all current users to finish */
2747 	synchronize_rcu();
2748 
2749 	for_each_tracing_cpu(cpu) {
2750 		free_page((unsigned long)per_cpu(trace_buffered_event, cpu));
2751 		per_cpu(trace_buffered_event, cpu) = NULL;
2752 	}
2753 
2754 	/*
2755 	 * Wait for all CPUs that potentially started checking if they can use
2756 	 * their event buffer only after the previous synchronize_rcu() call and
2757 	 * they still read a valid pointer from trace_buffered_event. It must be
2758 	 * ensured they don't see cleared trace_buffered_event_cnt else they
2759 	 * could wrongly decide to use the pointed-to buffer which is now freed.
2760 	 */
2761 	synchronize_rcu();
2762 
2763 	/* For each CPU, relinquish the buffer */
2764 	on_each_cpu_mask(tracing_buffer_mask, enable_trace_buffered_event, NULL,
2765 			 true);
2766 }
2767 
2768 static struct trace_buffer *temp_buffer;
2769 
2770 struct ring_buffer_event *
2771 trace_event_buffer_lock_reserve(struct trace_buffer **current_rb,
2772 			  struct trace_event_file *trace_file,
2773 			  int type, unsigned long len,
2774 			  unsigned int trace_ctx)
2775 {
2776 	struct ring_buffer_event *entry;
2777 	struct trace_array *tr = trace_file->tr;
2778 	int val;
2779 
2780 	*current_rb = tr->array_buffer.buffer;
2781 
2782 	if (!tr->no_filter_buffering_ref &&
2783 	    (trace_file->flags & (EVENT_FILE_FL_SOFT_DISABLED | EVENT_FILE_FL_FILTERED))) {
2784 		preempt_disable_notrace();
2785 		/*
2786 		 * Filtering is on, so try to use the per cpu buffer first.
2787 		 * This buffer will simulate a ring_buffer_event,
2788 		 * where the type_len is zero and the array[0] will
2789 		 * hold the full length.
2790 		 * (see include/linux/ring-buffer.h for details on
2791 		 *  how the ring_buffer_event is structured).
2792 		 *
2793 		 * Using a temp buffer during filtering and copying it
2794 		 * on a matched filter is quicker than writing directly
2795 		 * into the ring buffer and then discarding it when
2796 		 * it doesn't match. That is because the discard
2797 		 * requires several atomic operations to get right.
2798 		 * Copying on match and doing nothing on a failed match
2799 		 * is still quicker than no copy on match, but having
2800 		 * to discard out of the ring buffer on a failed match.
2801 		 */
2802 		if ((entry = __this_cpu_read(trace_buffered_event))) {
2803 			int max_len = PAGE_SIZE - struct_size(entry, array, 1);
2804 
2805 			val = this_cpu_inc_return(trace_buffered_event_cnt);
2806 
2807 			/*
2808 			 * Preemption is disabled, but interrupts and NMIs
2809 			 * can still come in now. If that happens after
2810 			 * the above increment, then it will have to go
2811 			 * back to the old method of allocating the event
2812 			 * on the ring buffer, and if the filter fails, it
2813 			 * will have to call ring_buffer_discard_commit()
2814 			 * to remove it.
2815 			 *
2816 			 * Need to also check the unlikely case that the
2817 			 * length is bigger than the temp buffer size.
2818 			 * If that happens, then the reserve is pretty much
2819 			 * guaranteed to fail, as the ring buffer currently
2820 			 * only allows events less than a page. But that may
2821 			 * change in the future, so let the ring buffer reserve
2822 			 * handle the failure in that case.
2823 			 */
2824 			if (val == 1 && likely(len <= max_len)) {
2825 				trace_event_setup(entry, type, trace_ctx);
2826 				entry->array[0] = len;
2827 				/* Return with preemption disabled */
2828 				return entry;
2829 			}
2830 			this_cpu_dec(trace_buffered_event_cnt);
2831 		}
2832 		/* __trace_buffer_lock_reserve() disables preemption */
2833 		preempt_enable_notrace();
2834 	}
2835 
2836 	entry = __trace_buffer_lock_reserve(*current_rb, type, len,
2837 					    trace_ctx);
2838 	/*
2839 	 * If tracing is off, but we have triggers enabled
2840 	 * we still need to look at the event data. Use the temp_buffer
2841 	 * to store the trace event for the trigger to use. It's recursive
2842 	 * safe and will not be recorded anywhere.
2843 	 */
2844 	if (!entry && trace_file->flags & EVENT_FILE_FL_TRIGGER_COND) {
2845 		*current_rb = temp_buffer;
2846 		entry = __trace_buffer_lock_reserve(*current_rb, type, len,
2847 						    trace_ctx);
2848 	}
2849 	return entry;
2850 }
2851 EXPORT_SYMBOL_GPL(trace_event_buffer_lock_reserve);
2852 
2853 static DEFINE_RAW_SPINLOCK(tracepoint_iter_lock);
2854 static DEFINE_MUTEX(tracepoint_printk_mutex);
2855 
2856 static void output_printk(struct trace_event_buffer *fbuffer)
2857 {
2858 	struct trace_event_call *event_call;
2859 	struct trace_event_file *file;
2860 	struct trace_event *event;
2861 	unsigned long flags;
2862 	struct trace_iterator *iter = tracepoint_print_iter;
2863 
2864 	/* We should never get here if iter is NULL */
2865 	if (WARN_ON_ONCE(!iter))
2866 		return;
2867 
2868 	event_call = fbuffer->trace_file->event_call;
2869 	if (!event_call || !event_call->event.funcs ||
2870 	    !event_call->event.funcs->trace)
2871 		return;
2872 
2873 	file = fbuffer->trace_file;
2874 	if (test_bit(EVENT_FILE_FL_SOFT_DISABLED_BIT, &file->flags) ||
2875 	    (unlikely(file->flags & EVENT_FILE_FL_FILTERED) &&
2876 	     !filter_match_preds(file->filter, fbuffer->entry)))
2877 		return;
2878 
2879 	event = &fbuffer->trace_file->event_call->event;
2880 
2881 	raw_spin_lock_irqsave(&tracepoint_iter_lock, flags);
2882 	trace_seq_init(&iter->seq);
2883 	iter->ent = fbuffer->entry;
2884 	event_call->event.funcs->trace(iter, 0, event);
2885 	trace_seq_putc(&iter->seq, 0);
2886 	printk("%s", iter->seq.buffer);
2887 
2888 	raw_spin_unlock_irqrestore(&tracepoint_iter_lock, flags);
2889 }
2890 
2891 int tracepoint_printk_sysctl(const struct ctl_table *table, int write,
2892 			     void *buffer, size_t *lenp,
2893 			     loff_t *ppos)
2894 {
2895 	int save_tracepoint_printk;
2896 	int ret;
2897 
2898 	guard(mutex)(&tracepoint_printk_mutex);
2899 	save_tracepoint_printk = tracepoint_printk;
2900 
2901 	ret = proc_dointvec(table, write, buffer, lenp, ppos);
2902 
2903 	/*
2904 	 * This will force exiting early, as tracepoint_printk
2905 	 * is always zero when tracepoint_printk_iter is not allocated
2906 	 */
2907 	if (!tracepoint_print_iter)
2908 		tracepoint_printk = 0;
2909 
2910 	if (save_tracepoint_printk == tracepoint_printk)
2911 		return ret;
2912 
2913 	if (tracepoint_printk)
2914 		static_key_enable(&tracepoint_printk_key.key);
2915 	else
2916 		static_key_disable(&tracepoint_printk_key.key);
2917 
2918 	return ret;
2919 }
2920 
2921 void trace_event_buffer_commit(struct trace_event_buffer *fbuffer)
2922 {
2923 	enum event_trigger_type tt = ETT_NONE;
2924 	struct trace_event_file *file = fbuffer->trace_file;
2925 
2926 	if (__event_trigger_test_discard(file, fbuffer->buffer, fbuffer->event,
2927 			fbuffer->entry, &tt))
2928 		goto discard;
2929 
2930 	if (static_key_false(&tracepoint_printk_key.key))
2931 		output_printk(fbuffer);
2932 
2933 	if (static_branch_unlikely(&trace_event_exports_enabled))
2934 		ftrace_exports(fbuffer->event, TRACE_EXPORT_EVENT);
2935 
2936 	trace_buffer_unlock_commit_regs(file->tr, fbuffer->buffer,
2937 			fbuffer->event, fbuffer->trace_ctx, fbuffer->regs);
2938 
2939 discard:
2940 	if (tt)
2941 		event_triggers_post_call(file, tt);
2942 
2943 }
2944 EXPORT_SYMBOL_GPL(trace_event_buffer_commit);
2945 
2946 /*
2947  * Skip 3:
2948  *
2949  *   trace_buffer_unlock_commit_regs()
2950  *   trace_event_buffer_commit()
2951  *   trace_event_raw_event_xxx()
2952  */
2953 # define STACK_SKIP 3
2954 
2955 void trace_buffer_unlock_commit_regs(struct trace_array *tr,
2956 				     struct trace_buffer *buffer,
2957 				     struct ring_buffer_event *event,
2958 				     unsigned int trace_ctx,
2959 				     struct pt_regs *regs)
2960 {
2961 	__buffer_unlock_commit(buffer, event);
2962 
2963 	/*
2964 	 * If regs is not set, then skip the necessary functions.
2965 	 * Note, we can still get here via blktrace, wakeup tracer
2966 	 * and mmiotrace, but that's ok if they lose a function or
2967 	 * two. They are not that meaningful.
2968 	 */
2969 	ftrace_trace_stack(tr, buffer, trace_ctx, regs ? 0 : STACK_SKIP, regs);
2970 	ftrace_trace_userstack(tr, buffer, trace_ctx);
2971 }
2972 
2973 /*
2974  * Similar to trace_buffer_unlock_commit_regs() but do not dump stack.
2975  */
2976 void
2977 trace_buffer_unlock_commit_nostack(struct trace_buffer *buffer,
2978 				   struct ring_buffer_event *event)
2979 {
2980 	__buffer_unlock_commit(buffer, event);
2981 }
2982 
2983 void
2984 trace_function(struct trace_array *tr, unsigned long ip, unsigned long
2985 	       parent_ip, unsigned int trace_ctx, struct ftrace_regs *fregs)
2986 {
2987 	struct trace_buffer *buffer = tr->array_buffer.buffer;
2988 	struct ring_buffer_event *event;
2989 	struct ftrace_entry *entry;
2990 	int size = sizeof(*entry);
2991 
2992 	size += FTRACE_REGS_MAX_ARGS * !!fregs * sizeof(long);
2993 
2994 	event = __trace_buffer_lock_reserve(buffer, TRACE_FN, size,
2995 					    trace_ctx);
2996 	if (!event)
2997 		return;
2998 	entry	= ring_buffer_event_data(event);
2999 	entry->ip			= ip;
3000 	entry->parent_ip		= parent_ip;
3001 
3002 #ifdef CONFIG_HAVE_FUNCTION_ARG_ACCESS_API
3003 	if (fregs) {
3004 		for (int i = 0; i < FTRACE_REGS_MAX_ARGS; i++)
3005 			entry->args[i] = ftrace_regs_get_argument(fregs, i);
3006 	}
3007 #endif
3008 
3009 	if (static_branch_unlikely(&trace_function_exports_enabled))
3010 		ftrace_exports(event, TRACE_EXPORT_FUNCTION);
3011 	__buffer_unlock_commit(buffer, event);
3012 }
3013 
3014 #ifdef CONFIG_STACKTRACE
3015 
3016 /* Allow 4 levels of nesting: normal, softirq, irq, NMI */
3017 #define FTRACE_KSTACK_NESTING	4
3018 
3019 #define FTRACE_KSTACK_ENTRIES	(SZ_4K / FTRACE_KSTACK_NESTING)
3020 
3021 struct ftrace_stack {
3022 	unsigned long		calls[FTRACE_KSTACK_ENTRIES];
3023 };
3024 
3025 
3026 struct ftrace_stacks {
3027 	struct ftrace_stack	stacks[FTRACE_KSTACK_NESTING];
3028 };
3029 
3030 static DEFINE_PER_CPU(struct ftrace_stacks, ftrace_stacks);
3031 static DEFINE_PER_CPU(int, ftrace_stack_reserve);
3032 
3033 static void __ftrace_trace_stack(struct trace_array *tr,
3034 				 struct trace_buffer *buffer,
3035 				 unsigned int trace_ctx,
3036 				 int skip, struct pt_regs *regs)
3037 {
3038 	struct ring_buffer_event *event;
3039 	unsigned int size, nr_entries;
3040 	struct ftrace_stack *fstack;
3041 	struct stack_entry *entry;
3042 	int stackidx;
3043 
3044 	/*
3045 	 * Add one, for this function and the call to save_stack_trace()
3046 	 * If regs is set, then these functions will not be in the way.
3047 	 */
3048 #ifndef CONFIG_UNWINDER_ORC
3049 	if (!regs)
3050 		skip++;
3051 #endif
3052 
3053 	preempt_disable_notrace();
3054 
3055 	stackidx = __this_cpu_inc_return(ftrace_stack_reserve) - 1;
3056 
3057 	/* This should never happen. If it does, yell once and skip */
3058 	if (WARN_ON_ONCE(stackidx >= FTRACE_KSTACK_NESTING))
3059 		goto out;
3060 
3061 	/*
3062 	 * The above __this_cpu_inc_return() is 'atomic' cpu local. An
3063 	 * interrupt will either see the value pre increment or post
3064 	 * increment. If the interrupt happens pre increment it will have
3065 	 * restored the counter when it returns.  We just need a barrier to
3066 	 * keep gcc from moving things around.
3067 	 */
3068 	barrier();
3069 
3070 	fstack = this_cpu_ptr(ftrace_stacks.stacks) + stackidx;
3071 	size = ARRAY_SIZE(fstack->calls);
3072 
3073 	if (regs) {
3074 		nr_entries = stack_trace_save_regs(regs, fstack->calls,
3075 						   size, skip);
3076 	} else {
3077 		nr_entries = stack_trace_save(fstack->calls, size, skip);
3078 	}
3079 
3080 #ifdef CONFIG_DYNAMIC_FTRACE
3081 	/* Mark entry of stack trace as trampoline code */
3082 	if (tr->ops && tr->ops->trampoline) {
3083 		unsigned long tramp_start = tr->ops->trampoline;
3084 		unsigned long tramp_end = tramp_start + tr->ops->trampoline_size;
3085 		unsigned long *calls = fstack->calls;
3086 
3087 		for (int i = 0; i < nr_entries; i++) {
3088 			if (calls[i] >= tramp_start && calls[i] < tramp_end)
3089 				calls[i] = FTRACE_TRAMPOLINE_MARKER;
3090 		}
3091 	}
3092 #endif
3093 
3094 	event = __trace_buffer_lock_reserve(buffer, TRACE_STACK,
3095 				    struct_size(entry, caller, nr_entries),
3096 				    trace_ctx);
3097 	if (!event)
3098 		goto out;
3099 	entry = ring_buffer_event_data(event);
3100 
3101 	entry->size = nr_entries;
3102 	memcpy(&entry->caller, fstack->calls,
3103 	       flex_array_size(entry, caller, nr_entries));
3104 
3105 	__buffer_unlock_commit(buffer, event);
3106 
3107  out:
3108 	/* Again, don't let gcc optimize things here */
3109 	barrier();
3110 	__this_cpu_dec(ftrace_stack_reserve);
3111 	preempt_enable_notrace();
3112 
3113 }
3114 
3115 static inline void ftrace_trace_stack(struct trace_array *tr,
3116 				      struct trace_buffer *buffer,
3117 				      unsigned int trace_ctx,
3118 				      int skip, struct pt_regs *regs)
3119 {
3120 	if (!(tr->trace_flags & TRACE_ITER_STACKTRACE))
3121 		return;
3122 
3123 	__ftrace_trace_stack(tr, buffer, trace_ctx, skip, regs);
3124 }
3125 
3126 void __trace_stack(struct trace_array *tr, unsigned int trace_ctx,
3127 		   int skip)
3128 {
3129 	struct trace_buffer *buffer = tr->array_buffer.buffer;
3130 
3131 	if (rcu_is_watching()) {
3132 		__ftrace_trace_stack(tr, buffer, trace_ctx, skip, NULL);
3133 		return;
3134 	}
3135 
3136 	if (WARN_ON_ONCE(IS_ENABLED(CONFIG_GENERIC_ENTRY)))
3137 		return;
3138 
3139 	/*
3140 	 * When an NMI triggers, RCU is enabled via ct_nmi_enter(),
3141 	 * but if the above rcu_is_watching() failed, then the NMI
3142 	 * triggered someplace critical, and ct_irq_enter() should
3143 	 * not be called from NMI.
3144 	 */
3145 	if (unlikely(in_nmi()))
3146 		return;
3147 
3148 	ct_irq_enter_irqson();
3149 	__ftrace_trace_stack(tr, buffer, trace_ctx, skip, NULL);
3150 	ct_irq_exit_irqson();
3151 }
3152 
3153 /**
3154  * trace_dump_stack - record a stack back trace in the trace buffer
3155  * @skip: Number of functions to skip (helper handlers)
3156  */
3157 void trace_dump_stack(int skip)
3158 {
3159 	if (tracing_disabled || tracing_selftest_running)
3160 		return;
3161 
3162 #ifndef CONFIG_UNWINDER_ORC
3163 	/* Skip 1 to skip this function. */
3164 	skip++;
3165 #endif
3166 	__ftrace_trace_stack(printk_trace, printk_trace->array_buffer.buffer,
3167 				tracing_gen_ctx(), skip, NULL);
3168 }
3169 EXPORT_SYMBOL_GPL(trace_dump_stack);
3170 
3171 #ifdef CONFIG_USER_STACKTRACE_SUPPORT
3172 static DEFINE_PER_CPU(int, user_stack_count);
3173 
3174 static void
3175 ftrace_trace_userstack(struct trace_array *tr,
3176 		       struct trace_buffer *buffer, unsigned int trace_ctx)
3177 {
3178 	struct ring_buffer_event *event;
3179 	struct userstack_entry *entry;
3180 
3181 	if (!(tr->trace_flags & TRACE_ITER_USERSTACKTRACE))
3182 		return;
3183 
3184 	/*
3185 	 * NMIs can not handle page faults, even with fix ups.
3186 	 * The save user stack can (and often does) fault.
3187 	 */
3188 	if (unlikely(in_nmi()))
3189 		return;
3190 
3191 	/*
3192 	 * prevent recursion, since the user stack tracing may
3193 	 * trigger other kernel events.
3194 	 */
3195 	preempt_disable();
3196 	if (__this_cpu_read(user_stack_count))
3197 		goto out;
3198 
3199 	__this_cpu_inc(user_stack_count);
3200 
3201 	event = __trace_buffer_lock_reserve(buffer, TRACE_USER_STACK,
3202 					    sizeof(*entry), trace_ctx);
3203 	if (!event)
3204 		goto out_drop_count;
3205 	entry	= ring_buffer_event_data(event);
3206 
3207 	entry->tgid		= current->tgid;
3208 	memset(&entry->caller, 0, sizeof(entry->caller));
3209 
3210 	stack_trace_save_user(entry->caller, FTRACE_STACK_ENTRIES);
3211 	__buffer_unlock_commit(buffer, event);
3212 
3213  out_drop_count:
3214 	__this_cpu_dec(user_stack_count);
3215  out:
3216 	preempt_enable();
3217 }
3218 #else /* CONFIG_USER_STACKTRACE_SUPPORT */
3219 static void ftrace_trace_userstack(struct trace_array *tr,
3220 				   struct trace_buffer *buffer,
3221 				   unsigned int trace_ctx)
3222 {
3223 }
3224 #endif /* !CONFIG_USER_STACKTRACE_SUPPORT */
3225 
3226 #endif /* CONFIG_STACKTRACE */
3227 
3228 static inline void
3229 func_repeats_set_delta_ts(struct func_repeats_entry *entry,
3230 			  unsigned long long delta)
3231 {
3232 	entry->bottom_delta_ts = delta & U32_MAX;
3233 	entry->top_delta_ts = (delta >> 32);
3234 }
3235 
3236 void trace_last_func_repeats(struct trace_array *tr,
3237 			     struct trace_func_repeats *last_info,
3238 			     unsigned int trace_ctx)
3239 {
3240 	struct trace_buffer *buffer = tr->array_buffer.buffer;
3241 	struct func_repeats_entry *entry;
3242 	struct ring_buffer_event *event;
3243 	u64 delta;
3244 
3245 	event = __trace_buffer_lock_reserve(buffer, TRACE_FUNC_REPEATS,
3246 					    sizeof(*entry), trace_ctx);
3247 	if (!event)
3248 		return;
3249 
3250 	delta = ring_buffer_event_time_stamp(buffer, event) -
3251 		last_info->ts_last_call;
3252 
3253 	entry = ring_buffer_event_data(event);
3254 	entry->ip = last_info->ip;
3255 	entry->parent_ip = last_info->parent_ip;
3256 	entry->count = last_info->count;
3257 	func_repeats_set_delta_ts(entry, delta);
3258 
3259 	__buffer_unlock_commit(buffer, event);
3260 }
3261 
3262 /* created for use with alloc_percpu */
3263 struct trace_buffer_struct {
3264 	int nesting;
3265 	char buffer[4][TRACE_BUF_SIZE];
3266 };
3267 
3268 static struct trace_buffer_struct __percpu *trace_percpu_buffer;
3269 
3270 /*
3271  * This allows for lockless recording.  If we're nested too deeply, then
3272  * this returns NULL.
3273  */
3274 static char *get_trace_buf(void)
3275 {
3276 	struct trace_buffer_struct *buffer = this_cpu_ptr(trace_percpu_buffer);
3277 
3278 	if (!trace_percpu_buffer || buffer->nesting >= 4)
3279 		return NULL;
3280 
3281 	buffer->nesting++;
3282 
3283 	/* Interrupts must see nesting incremented before we use the buffer */
3284 	barrier();
3285 	return &buffer->buffer[buffer->nesting - 1][0];
3286 }
3287 
3288 static void put_trace_buf(void)
3289 {
3290 	/* Don't let the decrement of nesting leak before this */
3291 	barrier();
3292 	this_cpu_dec(trace_percpu_buffer->nesting);
3293 }
3294 
3295 static int alloc_percpu_trace_buffer(void)
3296 {
3297 	struct trace_buffer_struct __percpu *buffers;
3298 
3299 	if (trace_percpu_buffer)
3300 		return 0;
3301 
3302 	buffers = alloc_percpu(struct trace_buffer_struct);
3303 	if (MEM_FAIL(!buffers, "Could not allocate percpu trace_printk buffer"))
3304 		return -ENOMEM;
3305 
3306 	trace_percpu_buffer = buffers;
3307 	return 0;
3308 }
3309 
3310 static int buffers_allocated;
3311 
3312 void trace_printk_init_buffers(void)
3313 {
3314 	if (buffers_allocated)
3315 		return;
3316 
3317 	if (alloc_percpu_trace_buffer())
3318 		return;
3319 
3320 	/* trace_printk() is for debug use only. Don't use it in production. */
3321 
3322 	pr_warn("\n");
3323 	pr_warn("**********************************************************\n");
3324 	pr_warn("**   NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE   **\n");
3325 	pr_warn("**                                                      **\n");
3326 	pr_warn("** trace_printk() being used. Allocating extra memory.  **\n");
3327 	pr_warn("**                                                      **\n");
3328 	pr_warn("** This means that this is a DEBUG kernel and it is     **\n");
3329 	pr_warn("** unsafe for production use.                           **\n");
3330 	pr_warn("**                                                      **\n");
3331 	pr_warn("** If you see this message and you are not debugging    **\n");
3332 	pr_warn("** the kernel, report this immediately to your vendor!  **\n");
3333 	pr_warn("**                                                      **\n");
3334 	pr_warn("**   NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE   **\n");
3335 	pr_warn("**********************************************************\n");
3336 
3337 	/* Expand the buffers to set size */
3338 	tracing_update_buffers(&global_trace);
3339 
3340 	buffers_allocated = 1;
3341 
3342 	/*
3343 	 * trace_printk_init_buffers() can be called by modules.
3344 	 * If that happens, then we need to start cmdline recording
3345 	 * directly here. If the global_trace.buffer is already
3346 	 * allocated here, then this was called by module code.
3347 	 */
3348 	if (global_trace.array_buffer.buffer)
3349 		tracing_start_cmdline_record();
3350 }
3351 EXPORT_SYMBOL_GPL(trace_printk_init_buffers);
3352 
3353 void trace_printk_start_comm(void)
3354 {
3355 	/* Start tracing comms if trace printk is set */
3356 	if (!buffers_allocated)
3357 		return;
3358 	tracing_start_cmdline_record();
3359 }
3360 
3361 static void trace_printk_start_stop_comm(int enabled)
3362 {
3363 	if (!buffers_allocated)
3364 		return;
3365 
3366 	if (enabled)
3367 		tracing_start_cmdline_record();
3368 	else
3369 		tracing_stop_cmdline_record();
3370 }
3371 
3372 /**
3373  * trace_vbprintk - write binary msg to tracing buffer
3374  * @ip:    The address of the caller
3375  * @fmt:   The string format to write to the buffer
3376  * @args:  Arguments for @fmt
3377  */
3378 int trace_vbprintk(unsigned long ip, const char *fmt, va_list args)
3379 {
3380 	struct ring_buffer_event *event;
3381 	struct trace_buffer *buffer;
3382 	struct trace_array *tr = READ_ONCE(printk_trace);
3383 	struct bprint_entry *entry;
3384 	unsigned int trace_ctx;
3385 	char *tbuffer;
3386 	int len = 0, size;
3387 
3388 	if (!printk_binsafe(tr))
3389 		return trace_vprintk(ip, fmt, args);
3390 
3391 	if (unlikely(tracing_selftest_running || tracing_disabled))
3392 		return 0;
3393 
3394 	/* Don't pollute graph traces with trace_vprintk internals */
3395 	pause_graph_tracing();
3396 
3397 	trace_ctx = tracing_gen_ctx();
3398 	preempt_disable_notrace();
3399 
3400 	tbuffer = get_trace_buf();
3401 	if (!tbuffer) {
3402 		len = 0;
3403 		goto out_nobuffer;
3404 	}
3405 
3406 	len = vbin_printf((u32 *)tbuffer, TRACE_BUF_SIZE/sizeof(int), fmt, args);
3407 
3408 	if (len > TRACE_BUF_SIZE/sizeof(int) || len < 0)
3409 		goto out_put;
3410 
3411 	size = sizeof(*entry) + sizeof(u32) * len;
3412 	buffer = tr->array_buffer.buffer;
3413 	ring_buffer_nest_start(buffer);
3414 	event = __trace_buffer_lock_reserve(buffer, TRACE_BPRINT, size,
3415 					    trace_ctx);
3416 	if (!event)
3417 		goto out;
3418 	entry = ring_buffer_event_data(event);
3419 	entry->ip			= ip;
3420 	entry->fmt			= fmt;
3421 
3422 	memcpy(entry->buf, tbuffer, sizeof(u32) * len);
3423 	__buffer_unlock_commit(buffer, event);
3424 	ftrace_trace_stack(tr, buffer, trace_ctx, 6, NULL);
3425 
3426 out:
3427 	ring_buffer_nest_end(buffer);
3428 out_put:
3429 	put_trace_buf();
3430 
3431 out_nobuffer:
3432 	preempt_enable_notrace();
3433 	unpause_graph_tracing();
3434 
3435 	return len;
3436 }
3437 EXPORT_SYMBOL_GPL(trace_vbprintk);
3438 
3439 static __printf(3, 0)
3440 int __trace_array_vprintk(struct trace_buffer *buffer,
3441 			  unsigned long ip, const char *fmt, va_list args)
3442 {
3443 	struct ring_buffer_event *event;
3444 	int len = 0, size;
3445 	struct print_entry *entry;
3446 	unsigned int trace_ctx;
3447 	char *tbuffer;
3448 
3449 	if (tracing_disabled)
3450 		return 0;
3451 
3452 	/* Don't pollute graph traces with trace_vprintk internals */
3453 	pause_graph_tracing();
3454 
3455 	trace_ctx = tracing_gen_ctx();
3456 	preempt_disable_notrace();
3457 
3458 
3459 	tbuffer = get_trace_buf();
3460 	if (!tbuffer) {
3461 		len = 0;
3462 		goto out_nobuffer;
3463 	}
3464 
3465 	len = vscnprintf(tbuffer, TRACE_BUF_SIZE, fmt, args);
3466 
3467 	size = sizeof(*entry) + len + 1;
3468 	ring_buffer_nest_start(buffer);
3469 	event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
3470 					    trace_ctx);
3471 	if (!event)
3472 		goto out;
3473 	entry = ring_buffer_event_data(event);
3474 	entry->ip = ip;
3475 
3476 	memcpy(&entry->buf, tbuffer, len + 1);
3477 	__buffer_unlock_commit(buffer, event);
3478 	ftrace_trace_stack(printk_trace, buffer, trace_ctx, 6, NULL);
3479 
3480 out:
3481 	ring_buffer_nest_end(buffer);
3482 	put_trace_buf();
3483 
3484 out_nobuffer:
3485 	preempt_enable_notrace();
3486 	unpause_graph_tracing();
3487 
3488 	return len;
3489 }
3490 
3491 int trace_array_vprintk(struct trace_array *tr,
3492 			unsigned long ip, const char *fmt, va_list args)
3493 {
3494 	if (tracing_selftest_running && tr == &global_trace)
3495 		return 0;
3496 
3497 	return __trace_array_vprintk(tr->array_buffer.buffer, ip, fmt, args);
3498 }
3499 
3500 /**
3501  * trace_array_printk - Print a message to a specific instance
3502  * @tr: The instance trace_array descriptor
3503  * @ip: The instruction pointer that this is called from.
3504  * @fmt: The format to print (printf format)
3505  *
3506  * If a subsystem sets up its own instance, they have the right to
3507  * printk strings into their tracing instance buffer using this
3508  * function. Note, this function will not write into the top level
3509  * buffer (use trace_printk() for that), as writing into the top level
3510  * buffer should only have events that can be individually disabled.
3511  * trace_printk() is only used for debugging a kernel, and should not
3512  * be ever incorporated in normal use.
3513  *
3514  * trace_array_printk() can be used, as it will not add noise to the
3515  * top level tracing buffer.
3516  *
3517  * Note, trace_array_init_printk() must be called on @tr before this
3518  * can be used.
3519  */
3520 int trace_array_printk(struct trace_array *tr,
3521 		       unsigned long ip, const char *fmt, ...)
3522 {
3523 	int ret;
3524 	va_list ap;
3525 
3526 	if (!tr)
3527 		return -ENOENT;
3528 
3529 	/* This is only allowed for created instances */
3530 	if (tr == &global_trace)
3531 		return 0;
3532 
3533 	if (!(tr->trace_flags & TRACE_ITER_PRINTK))
3534 		return 0;
3535 
3536 	va_start(ap, fmt);
3537 	ret = trace_array_vprintk(tr, ip, fmt, ap);
3538 	va_end(ap);
3539 	return ret;
3540 }
3541 EXPORT_SYMBOL_GPL(trace_array_printk);
3542 
3543 /**
3544  * trace_array_init_printk - Initialize buffers for trace_array_printk()
3545  * @tr: The trace array to initialize the buffers for
3546  *
3547  * As trace_array_printk() only writes into instances, they are OK to
3548  * have in the kernel (unlike trace_printk()). This needs to be called
3549  * before trace_array_printk() can be used on a trace_array.
3550  */
3551 int trace_array_init_printk(struct trace_array *tr)
3552 {
3553 	if (!tr)
3554 		return -ENOENT;
3555 
3556 	/* This is only allowed for created instances */
3557 	if (tr == &global_trace)
3558 		return -EINVAL;
3559 
3560 	return alloc_percpu_trace_buffer();
3561 }
3562 EXPORT_SYMBOL_GPL(trace_array_init_printk);
3563 
3564 int trace_array_printk_buf(struct trace_buffer *buffer,
3565 			   unsigned long ip, const char *fmt, ...)
3566 {
3567 	int ret;
3568 	va_list ap;
3569 
3570 	if (!(printk_trace->trace_flags & TRACE_ITER_PRINTK))
3571 		return 0;
3572 
3573 	va_start(ap, fmt);
3574 	ret = __trace_array_vprintk(buffer, ip, fmt, ap);
3575 	va_end(ap);
3576 	return ret;
3577 }
3578 
3579 int trace_vprintk(unsigned long ip, const char *fmt, va_list args)
3580 {
3581 	return trace_array_vprintk(printk_trace, ip, fmt, args);
3582 }
3583 EXPORT_SYMBOL_GPL(trace_vprintk);
3584 
3585 static void trace_iterator_increment(struct trace_iterator *iter)
3586 {
3587 	struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, iter->cpu);
3588 
3589 	iter->idx++;
3590 	if (buf_iter)
3591 		ring_buffer_iter_advance(buf_iter);
3592 }
3593 
3594 static struct trace_entry *
3595 peek_next_entry(struct trace_iterator *iter, int cpu, u64 *ts,
3596 		unsigned long *lost_events)
3597 {
3598 	struct ring_buffer_event *event;
3599 	struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, cpu);
3600 
3601 	if (buf_iter) {
3602 		event = ring_buffer_iter_peek(buf_iter, ts);
3603 		if (lost_events)
3604 			*lost_events = ring_buffer_iter_dropped(buf_iter) ?
3605 				(unsigned long)-1 : 0;
3606 	} else {
3607 		event = ring_buffer_peek(iter->array_buffer->buffer, cpu, ts,
3608 					 lost_events);
3609 	}
3610 
3611 	if (event) {
3612 		iter->ent_size = ring_buffer_event_length(event);
3613 		return ring_buffer_event_data(event);
3614 	}
3615 	iter->ent_size = 0;
3616 	return NULL;
3617 }
3618 
3619 static struct trace_entry *
3620 __find_next_entry(struct trace_iterator *iter, int *ent_cpu,
3621 		  unsigned long *missing_events, u64 *ent_ts)
3622 {
3623 	struct trace_buffer *buffer = iter->array_buffer->buffer;
3624 	struct trace_entry *ent, *next = NULL;
3625 	unsigned long lost_events = 0, next_lost = 0;
3626 	int cpu_file = iter->cpu_file;
3627 	u64 next_ts = 0, ts;
3628 	int next_cpu = -1;
3629 	int next_size = 0;
3630 	int cpu;
3631 
3632 	/*
3633 	 * If we are in a per_cpu trace file, don't bother by iterating over
3634 	 * all cpu and peek directly.
3635 	 */
3636 	if (cpu_file > RING_BUFFER_ALL_CPUS) {
3637 		if (ring_buffer_empty_cpu(buffer, cpu_file))
3638 			return NULL;
3639 		ent = peek_next_entry(iter, cpu_file, ent_ts, missing_events);
3640 		if (ent_cpu)
3641 			*ent_cpu = cpu_file;
3642 
3643 		return ent;
3644 	}
3645 
3646 	for_each_tracing_cpu(cpu) {
3647 
3648 		if (ring_buffer_empty_cpu(buffer, cpu))
3649 			continue;
3650 
3651 		ent = peek_next_entry(iter, cpu, &ts, &lost_events);
3652 
3653 		/*
3654 		 * Pick the entry with the smallest timestamp:
3655 		 */
3656 		if (ent && (!next || ts < next_ts)) {
3657 			next = ent;
3658 			next_cpu = cpu;
3659 			next_ts = ts;
3660 			next_lost = lost_events;
3661 			next_size = iter->ent_size;
3662 		}
3663 	}
3664 
3665 	iter->ent_size = next_size;
3666 
3667 	if (ent_cpu)
3668 		*ent_cpu = next_cpu;
3669 
3670 	if (ent_ts)
3671 		*ent_ts = next_ts;
3672 
3673 	if (missing_events)
3674 		*missing_events = next_lost;
3675 
3676 	return next;
3677 }
3678 
3679 #define STATIC_FMT_BUF_SIZE	128
3680 static char static_fmt_buf[STATIC_FMT_BUF_SIZE];
3681 
3682 char *trace_iter_expand_format(struct trace_iterator *iter)
3683 {
3684 	char *tmp;
3685 
3686 	/*
3687 	 * iter->tr is NULL when used with tp_printk, which makes
3688 	 * this get called where it is not safe to call krealloc().
3689 	 */
3690 	if (!iter->tr || iter->fmt == static_fmt_buf)
3691 		return NULL;
3692 
3693 	tmp = krealloc(iter->fmt, iter->fmt_size + STATIC_FMT_BUF_SIZE,
3694 		       GFP_KERNEL);
3695 	if (tmp) {
3696 		iter->fmt_size += STATIC_FMT_BUF_SIZE;
3697 		iter->fmt = tmp;
3698 	}
3699 
3700 	return tmp;
3701 }
3702 
3703 /* Returns true if the string is safe to dereference from an event */
3704 static bool trace_safe_str(struct trace_iterator *iter, const char *str)
3705 {
3706 	unsigned long addr = (unsigned long)str;
3707 	struct trace_event *trace_event;
3708 	struct trace_event_call *event;
3709 
3710 	/* OK if part of the event data */
3711 	if ((addr >= (unsigned long)iter->ent) &&
3712 	    (addr < (unsigned long)iter->ent + iter->ent_size))
3713 		return true;
3714 
3715 	/* OK if part of the temp seq buffer */
3716 	if ((addr >= (unsigned long)iter->tmp_seq.buffer) &&
3717 	    (addr < (unsigned long)iter->tmp_seq.buffer + TRACE_SEQ_BUFFER_SIZE))
3718 		return true;
3719 
3720 	/* Core rodata can not be freed */
3721 	if (is_kernel_rodata(addr))
3722 		return true;
3723 
3724 	if (trace_is_tracepoint_string(str))
3725 		return true;
3726 
3727 	/*
3728 	 * Now this could be a module event, referencing core module
3729 	 * data, which is OK.
3730 	 */
3731 	if (!iter->ent)
3732 		return false;
3733 
3734 	trace_event = ftrace_find_event(iter->ent->type);
3735 	if (!trace_event)
3736 		return false;
3737 
3738 	event = container_of(trace_event, struct trace_event_call, event);
3739 	if ((event->flags & TRACE_EVENT_FL_DYNAMIC) || !event->module)
3740 		return false;
3741 
3742 	/* Would rather have rodata, but this will suffice */
3743 	if (within_module_core(addr, event->module))
3744 		return true;
3745 
3746 	return false;
3747 }
3748 
3749 /**
3750  * ignore_event - Check dereferenced fields while writing to the seq buffer
3751  * @iter: The iterator that holds the seq buffer and the event being printed
3752  *
3753  * At boot up, test_event_printk() will flag any event that dereferences
3754  * a string with "%s" that does exist in the ring buffer. It may still
3755  * be valid, as the string may point to a static string in the kernel
3756  * rodata that never gets freed. But if the string pointer is pointing
3757  * to something that was allocated, there's a chance that it can be freed
3758  * by the time the user reads the trace. This would cause a bad memory
3759  * access by the kernel and possibly crash the system.
3760  *
3761  * This function will check if the event has any fields flagged as needing
3762  * to be checked at runtime and perform those checks.
3763  *
3764  * If it is found that a field is unsafe, it will write into the @iter->seq
3765  * a message stating what was found to be unsafe.
3766  *
3767  * @return: true if the event is unsafe and should be ignored,
3768  *          false otherwise.
3769  */
3770 bool ignore_event(struct trace_iterator *iter)
3771 {
3772 	struct ftrace_event_field *field;
3773 	struct trace_event *trace_event;
3774 	struct trace_event_call *event;
3775 	struct list_head *head;
3776 	struct trace_seq *seq;
3777 	const void *ptr;
3778 
3779 	trace_event = ftrace_find_event(iter->ent->type);
3780 
3781 	seq = &iter->seq;
3782 
3783 	if (!trace_event) {
3784 		trace_seq_printf(seq, "EVENT ID %d NOT FOUND?\n", iter->ent->type);
3785 		return true;
3786 	}
3787 
3788 	event = container_of(trace_event, struct trace_event_call, event);
3789 	if (!(event->flags & TRACE_EVENT_FL_TEST_STR))
3790 		return false;
3791 
3792 	head = trace_get_fields(event);
3793 	if (!head) {
3794 		trace_seq_printf(seq, "FIELDS FOR EVENT '%s' NOT FOUND?\n",
3795 				 trace_event_name(event));
3796 		return true;
3797 	}
3798 
3799 	/* Offsets are from the iter->ent that points to the raw event */
3800 	ptr = iter->ent;
3801 
3802 	list_for_each_entry(field, head, link) {
3803 		const char *str;
3804 		bool good;
3805 
3806 		if (!field->needs_test)
3807 			continue;
3808 
3809 		str = *(const char **)(ptr + field->offset);
3810 
3811 		good = trace_safe_str(iter, str);
3812 
3813 		/*
3814 		 * If you hit this warning, it is likely that the
3815 		 * trace event in question used %s on a string that
3816 		 * was saved at the time of the event, but may not be
3817 		 * around when the trace is read. Use __string(),
3818 		 * __assign_str() and __get_str() helpers in the TRACE_EVENT()
3819 		 * instead. See samples/trace_events/trace-events-sample.h
3820 		 * for reference.
3821 		 */
3822 		if (WARN_ONCE(!good, "event '%s' has unsafe pointer field '%s'",
3823 			      trace_event_name(event), field->name)) {
3824 			trace_seq_printf(seq, "EVENT %s: HAS UNSAFE POINTER FIELD '%s'\n",
3825 					 trace_event_name(event), field->name);
3826 			return true;
3827 		}
3828 	}
3829 	return false;
3830 }
3831 
3832 const char *trace_event_format(struct trace_iterator *iter, const char *fmt)
3833 {
3834 	const char *p, *new_fmt;
3835 	char *q;
3836 
3837 	if (WARN_ON_ONCE(!fmt))
3838 		return fmt;
3839 
3840 	if (!iter->tr || iter->tr->trace_flags & TRACE_ITER_HASH_PTR)
3841 		return fmt;
3842 
3843 	p = fmt;
3844 	new_fmt = q = iter->fmt;
3845 	while (*p) {
3846 		if (unlikely(q - new_fmt + 3 > iter->fmt_size)) {
3847 			if (!trace_iter_expand_format(iter))
3848 				return fmt;
3849 
3850 			q += iter->fmt - new_fmt;
3851 			new_fmt = iter->fmt;
3852 		}
3853 
3854 		*q++ = *p++;
3855 
3856 		/* Replace %p with %px */
3857 		if (p[-1] == '%') {
3858 			if (p[0] == '%') {
3859 				*q++ = *p++;
3860 			} else if (p[0] == 'p' && !isalnum(p[1])) {
3861 				*q++ = *p++;
3862 				*q++ = 'x';
3863 			}
3864 		}
3865 	}
3866 	*q = '\0';
3867 
3868 	return new_fmt;
3869 }
3870 
3871 #define STATIC_TEMP_BUF_SIZE	128
3872 static char static_temp_buf[STATIC_TEMP_BUF_SIZE] __aligned(4);
3873 
3874 /* Find the next real entry, without updating the iterator itself */
3875 struct trace_entry *trace_find_next_entry(struct trace_iterator *iter,
3876 					  int *ent_cpu, u64 *ent_ts)
3877 {
3878 	/* __find_next_entry will reset ent_size */
3879 	int ent_size = iter->ent_size;
3880 	struct trace_entry *entry;
3881 
3882 	/*
3883 	 * If called from ftrace_dump(), then the iter->temp buffer
3884 	 * will be the static_temp_buf and not created from kmalloc.
3885 	 * If the entry size is greater than the buffer, we can
3886 	 * not save it. Just return NULL in that case. This is only
3887 	 * used to add markers when two consecutive events' time
3888 	 * stamps have a large delta. See trace_print_lat_context()
3889 	 */
3890 	if (iter->temp == static_temp_buf &&
3891 	    STATIC_TEMP_BUF_SIZE < ent_size)
3892 		return NULL;
3893 
3894 	/*
3895 	 * The __find_next_entry() may call peek_next_entry(), which may
3896 	 * call ring_buffer_peek() that may make the contents of iter->ent
3897 	 * undefined. Need to copy iter->ent now.
3898 	 */
3899 	if (iter->ent && iter->ent != iter->temp) {
3900 		if ((!iter->temp || iter->temp_size < iter->ent_size) &&
3901 		    !WARN_ON_ONCE(iter->temp == static_temp_buf)) {
3902 			void *temp;
3903 			temp = kmalloc(iter->ent_size, GFP_KERNEL);
3904 			if (!temp)
3905 				return NULL;
3906 			kfree(iter->temp);
3907 			iter->temp = temp;
3908 			iter->temp_size = iter->ent_size;
3909 		}
3910 		memcpy(iter->temp, iter->ent, iter->ent_size);
3911 		iter->ent = iter->temp;
3912 	}
3913 	entry = __find_next_entry(iter, ent_cpu, NULL, ent_ts);
3914 	/* Put back the original ent_size */
3915 	iter->ent_size = ent_size;
3916 
3917 	return entry;
3918 }
3919 
3920 /* Find the next real entry, and increment the iterator to the next entry */
3921 void *trace_find_next_entry_inc(struct trace_iterator *iter)
3922 {
3923 	iter->ent = __find_next_entry(iter, &iter->cpu,
3924 				      &iter->lost_events, &iter->ts);
3925 
3926 	if (iter->ent)
3927 		trace_iterator_increment(iter);
3928 
3929 	return iter->ent ? iter : NULL;
3930 }
3931 
3932 static void trace_consume(struct trace_iterator *iter)
3933 {
3934 	ring_buffer_consume(iter->array_buffer->buffer, iter->cpu, &iter->ts,
3935 			    &iter->lost_events);
3936 }
3937 
3938 static void *s_next(struct seq_file *m, void *v, loff_t *pos)
3939 {
3940 	struct trace_iterator *iter = m->private;
3941 	int i = (int)*pos;
3942 	void *ent;
3943 
3944 	WARN_ON_ONCE(iter->leftover);
3945 
3946 	(*pos)++;
3947 
3948 	/* can't go backwards */
3949 	if (iter->idx > i)
3950 		return NULL;
3951 
3952 	if (iter->idx < 0)
3953 		ent = trace_find_next_entry_inc(iter);
3954 	else
3955 		ent = iter;
3956 
3957 	while (ent && iter->idx < i)
3958 		ent = trace_find_next_entry_inc(iter);
3959 
3960 	iter->pos = *pos;
3961 
3962 	return ent;
3963 }
3964 
3965 void tracing_iter_reset(struct trace_iterator *iter, int cpu)
3966 {
3967 	struct ring_buffer_iter *buf_iter;
3968 	unsigned long entries = 0;
3969 	u64 ts;
3970 
3971 	per_cpu_ptr(iter->array_buffer->data, cpu)->skipped_entries = 0;
3972 
3973 	buf_iter = trace_buffer_iter(iter, cpu);
3974 	if (!buf_iter)
3975 		return;
3976 
3977 	ring_buffer_iter_reset(buf_iter);
3978 
3979 	/*
3980 	 * We could have the case with the max latency tracers
3981 	 * that a reset never took place on a cpu. This is evident
3982 	 * by the timestamp being before the start of the buffer.
3983 	 */
3984 	while (ring_buffer_iter_peek(buf_iter, &ts)) {
3985 		if (ts >= iter->array_buffer->time_start)
3986 			break;
3987 		entries++;
3988 		ring_buffer_iter_advance(buf_iter);
3989 		/* This could be a big loop */
3990 		cond_resched();
3991 	}
3992 
3993 	per_cpu_ptr(iter->array_buffer->data, cpu)->skipped_entries = entries;
3994 }
3995 
3996 /*
3997  * The current tracer is copied to avoid a global locking
3998  * all around.
3999  */
4000 static void *s_start(struct seq_file *m, loff_t *pos)
4001 {
4002 	struct trace_iterator *iter = m->private;
4003 	struct trace_array *tr = iter->tr;
4004 	int cpu_file = iter->cpu_file;
4005 	void *p = NULL;
4006 	loff_t l = 0;
4007 	int cpu;
4008 
4009 	mutex_lock(&trace_types_lock);
4010 	if (unlikely(tr->current_trace != iter->trace)) {
4011 		/* Close iter->trace before switching to the new current tracer */
4012 		if (iter->trace->close)
4013 			iter->trace->close(iter);
4014 		iter->trace = tr->current_trace;
4015 		/* Reopen the new current tracer */
4016 		if (iter->trace->open)
4017 			iter->trace->open(iter);
4018 	}
4019 	mutex_unlock(&trace_types_lock);
4020 
4021 #ifdef CONFIG_TRACER_MAX_TRACE
4022 	if (iter->snapshot && iter->trace->use_max_tr)
4023 		return ERR_PTR(-EBUSY);
4024 #endif
4025 
4026 	if (*pos != iter->pos) {
4027 		iter->ent = NULL;
4028 		iter->cpu = 0;
4029 		iter->idx = -1;
4030 
4031 		if (cpu_file == RING_BUFFER_ALL_CPUS) {
4032 			for_each_tracing_cpu(cpu)
4033 				tracing_iter_reset(iter, cpu);
4034 		} else
4035 			tracing_iter_reset(iter, cpu_file);
4036 
4037 		iter->leftover = 0;
4038 		for (p = iter; p && l < *pos; p = s_next(m, p, &l))
4039 			;
4040 
4041 	} else {
4042 		/*
4043 		 * If we overflowed the seq_file before, then we want
4044 		 * to just reuse the trace_seq buffer again.
4045 		 */
4046 		if (iter->leftover)
4047 			p = iter;
4048 		else {
4049 			l = *pos - 1;
4050 			p = s_next(m, p, &l);
4051 		}
4052 	}
4053 
4054 	trace_event_read_lock();
4055 	trace_access_lock(cpu_file);
4056 	return p;
4057 }
4058 
4059 static void s_stop(struct seq_file *m, void *p)
4060 {
4061 	struct trace_iterator *iter = m->private;
4062 
4063 #ifdef CONFIG_TRACER_MAX_TRACE
4064 	if (iter->snapshot && iter->trace->use_max_tr)
4065 		return;
4066 #endif
4067 
4068 	trace_access_unlock(iter->cpu_file);
4069 	trace_event_read_unlock();
4070 }
4071 
4072 static void
4073 get_total_entries_cpu(struct array_buffer *buf, unsigned long *total,
4074 		      unsigned long *entries, int cpu)
4075 {
4076 	unsigned long count;
4077 
4078 	count = ring_buffer_entries_cpu(buf->buffer, cpu);
4079 	/*
4080 	 * If this buffer has skipped entries, then we hold all
4081 	 * entries for the trace and we need to ignore the
4082 	 * ones before the time stamp.
4083 	 */
4084 	if (per_cpu_ptr(buf->data, cpu)->skipped_entries) {
4085 		count -= per_cpu_ptr(buf->data, cpu)->skipped_entries;
4086 		/* total is the same as the entries */
4087 		*total = count;
4088 	} else
4089 		*total = count +
4090 			ring_buffer_overrun_cpu(buf->buffer, cpu);
4091 	*entries = count;
4092 }
4093 
4094 static void
4095 get_total_entries(struct array_buffer *buf,
4096 		  unsigned long *total, unsigned long *entries)
4097 {
4098 	unsigned long t, e;
4099 	int cpu;
4100 
4101 	*total = 0;
4102 	*entries = 0;
4103 
4104 	for_each_tracing_cpu(cpu) {
4105 		get_total_entries_cpu(buf, &t, &e, cpu);
4106 		*total += t;
4107 		*entries += e;
4108 	}
4109 }
4110 
4111 unsigned long trace_total_entries_cpu(struct trace_array *tr, int cpu)
4112 {
4113 	unsigned long total, entries;
4114 
4115 	if (!tr)
4116 		tr = &global_trace;
4117 
4118 	get_total_entries_cpu(&tr->array_buffer, &total, &entries, cpu);
4119 
4120 	return entries;
4121 }
4122 
4123 unsigned long trace_total_entries(struct trace_array *tr)
4124 {
4125 	unsigned long total, entries;
4126 
4127 	if (!tr)
4128 		tr = &global_trace;
4129 
4130 	get_total_entries(&tr->array_buffer, &total, &entries);
4131 
4132 	return entries;
4133 }
4134 
4135 static void print_lat_help_header(struct seq_file *m)
4136 {
4137 	seq_puts(m, "#                    _------=> CPU#            \n"
4138 		    "#                   / _-----=> irqs-off/BH-disabled\n"
4139 		    "#                  | / _----=> need-resched    \n"
4140 		    "#                  || / _---=> hardirq/softirq \n"
4141 		    "#                  ||| / _--=> preempt-depth   \n"
4142 		    "#                  |||| / _-=> migrate-disable \n"
4143 		    "#                  ||||| /     delay           \n"
4144 		    "#  cmd     pid     |||||| time  |   caller     \n"
4145 		    "#     \\   /        ||||||  \\    |    /       \n");
4146 }
4147 
4148 static void print_event_info(struct array_buffer *buf, struct seq_file *m)
4149 {
4150 	unsigned long total;
4151 	unsigned long entries;
4152 
4153 	get_total_entries(buf, &total, &entries);
4154 	seq_printf(m, "# entries-in-buffer/entries-written: %lu/%lu   #P:%d\n",
4155 		   entries, total, num_online_cpus());
4156 	seq_puts(m, "#\n");
4157 }
4158 
4159 static void print_func_help_header(struct array_buffer *buf, struct seq_file *m,
4160 				   unsigned int flags)
4161 {
4162 	bool tgid = flags & TRACE_ITER_RECORD_TGID;
4163 
4164 	print_event_info(buf, m);
4165 
4166 	seq_printf(m, "#           TASK-PID    %s CPU#     TIMESTAMP  FUNCTION\n", tgid ? "   TGID   " : "");
4167 	seq_printf(m, "#              | |      %s   |         |         |\n",      tgid ? "     |    " : "");
4168 }
4169 
4170 static void print_func_help_header_irq(struct array_buffer *buf, struct seq_file *m,
4171 				       unsigned int flags)
4172 {
4173 	bool tgid = flags & TRACE_ITER_RECORD_TGID;
4174 	static const char space[] = "            ";
4175 	int prec = tgid ? 12 : 2;
4176 
4177 	print_event_info(buf, m);
4178 
4179 	seq_printf(m, "#                            %.*s  _-----=> irqs-off/BH-disabled\n", prec, space);
4180 	seq_printf(m, "#                            %.*s / _----=> need-resched\n", prec, space);
4181 	seq_printf(m, "#                            %.*s| / _---=> hardirq/softirq\n", prec, space);
4182 	seq_printf(m, "#                            %.*s|| / _--=> preempt-depth\n", prec, space);
4183 	seq_printf(m, "#                            %.*s||| / _-=> migrate-disable\n", prec, space);
4184 	seq_printf(m, "#                            %.*s|||| /     delay\n", prec, space);
4185 	seq_printf(m, "#           TASK-PID  %.*s CPU#  |||||  TIMESTAMP  FUNCTION\n", prec, "     TGID   ");
4186 	seq_printf(m, "#              | |    %.*s   |   |||||     |         |\n", prec, "       |    ");
4187 }
4188 
4189 void
4190 print_trace_header(struct seq_file *m, struct trace_iterator *iter)
4191 {
4192 	unsigned long sym_flags = (global_trace.trace_flags & TRACE_ITER_SYM_MASK);
4193 	struct array_buffer *buf = iter->array_buffer;
4194 	struct trace_array_cpu *data = per_cpu_ptr(buf->data, buf->cpu);
4195 	struct tracer *type = iter->trace;
4196 	unsigned long entries;
4197 	unsigned long total;
4198 	const char *name = type->name;
4199 
4200 	get_total_entries(buf, &total, &entries);
4201 
4202 	seq_printf(m, "# %s latency trace v1.1.5 on %s\n",
4203 		   name, init_utsname()->release);
4204 	seq_puts(m, "# -----------------------------------"
4205 		 "---------------------------------\n");
4206 	seq_printf(m, "# latency: %lu us, #%lu/%lu, CPU#%d |"
4207 		   " (M:%s VP:%d, KP:%d, SP:%d HP:%d",
4208 		   nsecs_to_usecs(data->saved_latency),
4209 		   entries,
4210 		   total,
4211 		   buf->cpu,
4212 		   preempt_model_str(),
4213 		   /* These are reserved for later use */
4214 		   0, 0, 0, 0);
4215 #ifdef CONFIG_SMP
4216 	seq_printf(m, " #P:%d)\n", num_online_cpus());
4217 #else
4218 	seq_puts(m, ")\n");
4219 #endif
4220 	seq_puts(m, "#    -----------------\n");
4221 	seq_printf(m, "#    | task: %.16s-%d "
4222 		   "(uid:%d nice:%ld policy:%ld rt_prio:%ld)\n",
4223 		   data->comm, data->pid,
4224 		   from_kuid_munged(seq_user_ns(m), data->uid), data->nice,
4225 		   data->policy, data->rt_priority);
4226 	seq_puts(m, "#    -----------------\n");
4227 
4228 	if (data->critical_start) {
4229 		seq_puts(m, "#  => started at: ");
4230 		seq_print_ip_sym(&iter->seq, data->critical_start, sym_flags);
4231 		trace_print_seq(m, &iter->seq);
4232 		seq_puts(m, "\n#  => ended at:   ");
4233 		seq_print_ip_sym(&iter->seq, data->critical_end, sym_flags);
4234 		trace_print_seq(m, &iter->seq);
4235 		seq_puts(m, "\n#\n");
4236 	}
4237 
4238 	seq_puts(m, "#\n");
4239 }
4240 
4241 static void test_cpu_buff_start(struct trace_iterator *iter)
4242 {
4243 	struct trace_seq *s = &iter->seq;
4244 	struct trace_array *tr = iter->tr;
4245 
4246 	if (!(tr->trace_flags & TRACE_ITER_ANNOTATE))
4247 		return;
4248 
4249 	if (!(iter->iter_flags & TRACE_FILE_ANNOTATE))
4250 		return;
4251 
4252 	if (cpumask_available(iter->started) &&
4253 	    cpumask_test_cpu(iter->cpu, iter->started))
4254 		return;
4255 
4256 	if (per_cpu_ptr(iter->array_buffer->data, iter->cpu)->skipped_entries)
4257 		return;
4258 
4259 	if (cpumask_available(iter->started))
4260 		cpumask_set_cpu(iter->cpu, iter->started);
4261 
4262 	/* Don't print started cpu buffer for the first entry of the trace */
4263 	if (iter->idx > 1)
4264 		trace_seq_printf(s, "##### CPU %u buffer started ####\n",
4265 				iter->cpu);
4266 }
4267 
4268 static enum print_line_t print_trace_fmt(struct trace_iterator *iter)
4269 {
4270 	struct trace_array *tr = iter->tr;
4271 	struct trace_seq *s = &iter->seq;
4272 	unsigned long sym_flags = (tr->trace_flags & TRACE_ITER_SYM_MASK);
4273 	struct trace_entry *entry;
4274 	struct trace_event *event;
4275 
4276 	entry = iter->ent;
4277 
4278 	test_cpu_buff_start(iter);
4279 
4280 	event = ftrace_find_event(entry->type);
4281 
4282 	if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
4283 		if (iter->iter_flags & TRACE_FILE_LAT_FMT)
4284 			trace_print_lat_context(iter);
4285 		else
4286 			trace_print_context(iter);
4287 	}
4288 
4289 	if (trace_seq_has_overflowed(s))
4290 		return TRACE_TYPE_PARTIAL_LINE;
4291 
4292 	if (event) {
4293 		if (tr->trace_flags & TRACE_ITER_FIELDS)
4294 			return print_event_fields(iter, event);
4295 		/*
4296 		 * For TRACE_EVENT() events, the print_fmt is not
4297 		 * safe to use if the array has delta offsets
4298 		 * Force printing via the fields.
4299 		 */
4300 		if ((tr->text_delta) &&
4301 		    event->type > __TRACE_LAST_TYPE)
4302 			return print_event_fields(iter, event);
4303 
4304 		return event->funcs->trace(iter, sym_flags, event);
4305 	}
4306 
4307 	trace_seq_printf(s, "Unknown type %d\n", entry->type);
4308 
4309 	return trace_handle_return(s);
4310 }
4311 
4312 static enum print_line_t print_raw_fmt(struct trace_iterator *iter)
4313 {
4314 	struct trace_array *tr = iter->tr;
4315 	struct trace_seq *s = &iter->seq;
4316 	struct trace_entry *entry;
4317 	struct trace_event *event;
4318 
4319 	entry = iter->ent;
4320 
4321 	if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO)
4322 		trace_seq_printf(s, "%d %d %llu ",
4323 				 entry->pid, iter->cpu, iter->ts);
4324 
4325 	if (trace_seq_has_overflowed(s))
4326 		return TRACE_TYPE_PARTIAL_LINE;
4327 
4328 	event = ftrace_find_event(entry->type);
4329 	if (event)
4330 		return event->funcs->raw(iter, 0, event);
4331 
4332 	trace_seq_printf(s, "%d ?\n", entry->type);
4333 
4334 	return trace_handle_return(s);
4335 }
4336 
4337 static enum print_line_t print_hex_fmt(struct trace_iterator *iter)
4338 {
4339 	struct trace_array *tr = iter->tr;
4340 	struct trace_seq *s = &iter->seq;
4341 	unsigned char newline = '\n';
4342 	struct trace_entry *entry;
4343 	struct trace_event *event;
4344 
4345 	entry = iter->ent;
4346 
4347 	if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
4348 		SEQ_PUT_HEX_FIELD(s, entry->pid);
4349 		SEQ_PUT_HEX_FIELD(s, iter->cpu);
4350 		SEQ_PUT_HEX_FIELD(s, iter->ts);
4351 		if (trace_seq_has_overflowed(s))
4352 			return TRACE_TYPE_PARTIAL_LINE;
4353 	}
4354 
4355 	event = ftrace_find_event(entry->type);
4356 	if (event) {
4357 		enum print_line_t ret = event->funcs->hex(iter, 0, event);
4358 		if (ret != TRACE_TYPE_HANDLED)
4359 			return ret;
4360 	}
4361 
4362 	SEQ_PUT_FIELD(s, newline);
4363 
4364 	return trace_handle_return(s);
4365 }
4366 
4367 static enum print_line_t print_bin_fmt(struct trace_iterator *iter)
4368 {
4369 	struct trace_array *tr = iter->tr;
4370 	struct trace_seq *s = &iter->seq;
4371 	struct trace_entry *entry;
4372 	struct trace_event *event;
4373 
4374 	entry = iter->ent;
4375 
4376 	if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
4377 		SEQ_PUT_FIELD(s, entry->pid);
4378 		SEQ_PUT_FIELD(s, iter->cpu);
4379 		SEQ_PUT_FIELD(s, iter->ts);
4380 		if (trace_seq_has_overflowed(s))
4381 			return TRACE_TYPE_PARTIAL_LINE;
4382 	}
4383 
4384 	event = ftrace_find_event(entry->type);
4385 	return event ? event->funcs->binary(iter, 0, event) :
4386 		TRACE_TYPE_HANDLED;
4387 }
4388 
4389 int trace_empty(struct trace_iterator *iter)
4390 {
4391 	struct ring_buffer_iter *buf_iter;
4392 	int cpu;
4393 
4394 	/* If we are looking at one CPU buffer, only check that one */
4395 	if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
4396 		cpu = iter->cpu_file;
4397 		buf_iter = trace_buffer_iter(iter, cpu);
4398 		if (buf_iter) {
4399 			if (!ring_buffer_iter_empty(buf_iter))
4400 				return 0;
4401 		} else {
4402 			if (!ring_buffer_empty_cpu(iter->array_buffer->buffer, cpu))
4403 				return 0;
4404 		}
4405 		return 1;
4406 	}
4407 
4408 	for_each_tracing_cpu(cpu) {
4409 		buf_iter = trace_buffer_iter(iter, cpu);
4410 		if (buf_iter) {
4411 			if (!ring_buffer_iter_empty(buf_iter))
4412 				return 0;
4413 		} else {
4414 			if (!ring_buffer_empty_cpu(iter->array_buffer->buffer, cpu))
4415 				return 0;
4416 		}
4417 	}
4418 
4419 	return 1;
4420 }
4421 
4422 /*  Called with trace_event_read_lock() held. */
4423 enum print_line_t print_trace_line(struct trace_iterator *iter)
4424 {
4425 	struct trace_array *tr = iter->tr;
4426 	unsigned long trace_flags = tr->trace_flags;
4427 	enum print_line_t ret;
4428 
4429 	if (iter->lost_events) {
4430 		if (iter->lost_events == (unsigned long)-1)
4431 			trace_seq_printf(&iter->seq, "CPU:%d [LOST EVENTS]\n",
4432 					 iter->cpu);
4433 		else
4434 			trace_seq_printf(&iter->seq, "CPU:%d [LOST %lu EVENTS]\n",
4435 					 iter->cpu, iter->lost_events);
4436 		if (trace_seq_has_overflowed(&iter->seq))
4437 			return TRACE_TYPE_PARTIAL_LINE;
4438 	}
4439 
4440 	if (iter->trace && iter->trace->print_line) {
4441 		ret = iter->trace->print_line(iter);
4442 		if (ret != TRACE_TYPE_UNHANDLED)
4443 			return ret;
4444 	}
4445 
4446 	if (iter->ent->type == TRACE_BPUTS &&
4447 			trace_flags & TRACE_ITER_PRINTK &&
4448 			trace_flags & TRACE_ITER_PRINTK_MSGONLY)
4449 		return trace_print_bputs_msg_only(iter);
4450 
4451 	if (iter->ent->type == TRACE_BPRINT &&
4452 			trace_flags & TRACE_ITER_PRINTK &&
4453 			trace_flags & TRACE_ITER_PRINTK_MSGONLY)
4454 		return trace_print_bprintk_msg_only(iter);
4455 
4456 	if (iter->ent->type == TRACE_PRINT &&
4457 			trace_flags & TRACE_ITER_PRINTK &&
4458 			trace_flags & TRACE_ITER_PRINTK_MSGONLY)
4459 		return trace_print_printk_msg_only(iter);
4460 
4461 	if (trace_flags & TRACE_ITER_BIN)
4462 		return print_bin_fmt(iter);
4463 
4464 	if (trace_flags & TRACE_ITER_HEX)
4465 		return print_hex_fmt(iter);
4466 
4467 	if (trace_flags & TRACE_ITER_RAW)
4468 		return print_raw_fmt(iter);
4469 
4470 	return print_trace_fmt(iter);
4471 }
4472 
4473 void trace_latency_header(struct seq_file *m)
4474 {
4475 	struct trace_iterator *iter = m->private;
4476 	struct trace_array *tr = iter->tr;
4477 
4478 	/* print nothing if the buffers are empty */
4479 	if (trace_empty(iter))
4480 		return;
4481 
4482 	if (iter->iter_flags & TRACE_FILE_LAT_FMT)
4483 		print_trace_header(m, iter);
4484 
4485 	if (!(tr->trace_flags & TRACE_ITER_VERBOSE))
4486 		print_lat_help_header(m);
4487 }
4488 
4489 void trace_default_header(struct seq_file *m)
4490 {
4491 	struct trace_iterator *iter = m->private;
4492 	struct trace_array *tr = iter->tr;
4493 	unsigned long trace_flags = tr->trace_flags;
4494 
4495 	if (!(trace_flags & TRACE_ITER_CONTEXT_INFO))
4496 		return;
4497 
4498 	if (iter->iter_flags & TRACE_FILE_LAT_FMT) {
4499 		/* print nothing if the buffers are empty */
4500 		if (trace_empty(iter))
4501 			return;
4502 		print_trace_header(m, iter);
4503 		if (!(trace_flags & TRACE_ITER_VERBOSE))
4504 			print_lat_help_header(m);
4505 	} else {
4506 		if (!(trace_flags & TRACE_ITER_VERBOSE)) {
4507 			if (trace_flags & TRACE_ITER_IRQ_INFO)
4508 				print_func_help_header_irq(iter->array_buffer,
4509 							   m, trace_flags);
4510 			else
4511 				print_func_help_header(iter->array_buffer, m,
4512 						       trace_flags);
4513 		}
4514 	}
4515 }
4516 
4517 static void test_ftrace_alive(struct seq_file *m)
4518 {
4519 	if (!ftrace_is_dead())
4520 		return;
4521 	seq_puts(m, "# WARNING: FUNCTION TRACING IS CORRUPTED\n"
4522 		    "#          MAY BE MISSING FUNCTION EVENTS\n");
4523 }
4524 
4525 #ifdef CONFIG_TRACER_MAX_TRACE
4526 static void show_snapshot_main_help(struct seq_file *m)
4527 {
4528 	seq_puts(m, "# echo 0 > snapshot : Clears and frees snapshot buffer\n"
4529 		    "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
4530 		    "#                      Takes a snapshot of the main buffer.\n"
4531 		    "# echo 2 > snapshot : Clears snapshot buffer (but does not allocate or free)\n"
4532 		    "#                      (Doesn't have to be '2' works with any number that\n"
4533 		    "#                       is not a '0' or '1')\n");
4534 }
4535 
4536 static void show_snapshot_percpu_help(struct seq_file *m)
4537 {
4538 	seq_puts(m, "# echo 0 > snapshot : Invalid for per_cpu snapshot file.\n");
4539 #ifdef CONFIG_RING_BUFFER_ALLOW_SWAP
4540 	seq_puts(m, "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
4541 		    "#                      Takes a snapshot of the main buffer for this cpu.\n");
4542 #else
4543 	seq_puts(m, "# echo 1 > snapshot : Not supported with this kernel.\n"
4544 		    "#                     Must use main snapshot file to allocate.\n");
4545 #endif
4546 	seq_puts(m, "# echo 2 > snapshot : Clears this cpu's snapshot buffer (but does not allocate)\n"
4547 		    "#                      (Doesn't have to be '2' works with any number that\n"
4548 		    "#                       is not a '0' or '1')\n");
4549 }
4550 
4551 static void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter)
4552 {
4553 	if (iter->tr->allocated_snapshot)
4554 		seq_puts(m, "#\n# * Snapshot is allocated *\n#\n");
4555 	else
4556 		seq_puts(m, "#\n# * Snapshot is freed *\n#\n");
4557 
4558 	seq_puts(m, "# Snapshot commands:\n");
4559 	if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
4560 		show_snapshot_main_help(m);
4561 	else
4562 		show_snapshot_percpu_help(m);
4563 }
4564 #else
4565 /* Should never be called */
4566 static inline void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter) { }
4567 #endif
4568 
4569 static int s_show(struct seq_file *m, void *v)
4570 {
4571 	struct trace_iterator *iter = v;
4572 	int ret;
4573 
4574 	if (iter->ent == NULL) {
4575 		if (iter->tr) {
4576 			seq_printf(m, "# tracer: %s\n", iter->trace->name);
4577 			seq_puts(m, "#\n");
4578 			test_ftrace_alive(m);
4579 		}
4580 		if (iter->snapshot && trace_empty(iter))
4581 			print_snapshot_help(m, iter);
4582 		else if (iter->trace && iter->trace->print_header)
4583 			iter->trace->print_header(m);
4584 		else
4585 			trace_default_header(m);
4586 
4587 	} else if (iter->leftover) {
4588 		/*
4589 		 * If we filled the seq_file buffer earlier, we
4590 		 * want to just show it now.
4591 		 */
4592 		ret = trace_print_seq(m, &iter->seq);
4593 
4594 		/* ret should this time be zero, but you never know */
4595 		iter->leftover = ret;
4596 
4597 	} else {
4598 		ret = print_trace_line(iter);
4599 		if (ret == TRACE_TYPE_PARTIAL_LINE) {
4600 			iter->seq.full = 0;
4601 			trace_seq_puts(&iter->seq, "[LINE TOO BIG]\n");
4602 		}
4603 		ret = trace_print_seq(m, &iter->seq);
4604 		/*
4605 		 * If we overflow the seq_file buffer, then it will
4606 		 * ask us for this data again at start up.
4607 		 * Use that instead.
4608 		 *  ret is 0 if seq_file write succeeded.
4609 		 *        -1 otherwise.
4610 		 */
4611 		iter->leftover = ret;
4612 	}
4613 
4614 	return 0;
4615 }
4616 
4617 /*
4618  * Should be used after trace_array_get(), trace_types_lock
4619  * ensures that i_cdev was already initialized.
4620  */
4621 static inline int tracing_get_cpu(struct inode *inode)
4622 {
4623 	if (inode->i_cdev) /* See trace_create_cpu_file() */
4624 		return (long)inode->i_cdev - 1;
4625 	return RING_BUFFER_ALL_CPUS;
4626 }
4627 
4628 static const struct seq_operations tracer_seq_ops = {
4629 	.start		= s_start,
4630 	.next		= s_next,
4631 	.stop		= s_stop,
4632 	.show		= s_show,
4633 };
4634 
4635 /*
4636  * Note, as iter itself can be allocated and freed in different
4637  * ways, this function is only used to free its content, and not
4638  * the iterator itself. The only requirement to all the allocations
4639  * is that it must zero all fields (kzalloc), as freeing works with
4640  * ethier allocated content or NULL.
4641  */
4642 static void free_trace_iter_content(struct trace_iterator *iter)
4643 {
4644 	/* The fmt is either NULL, allocated or points to static_fmt_buf */
4645 	if (iter->fmt != static_fmt_buf)
4646 		kfree(iter->fmt);
4647 
4648 	kfree(iter->temp);
4649 	kfree(iter->buffer_iter);
4650 	mutex_destroy(&iter->mutex);
4651 	free_cpumask_var(iter->started);
4652 }
4653 
4654 static struct trace_iterator *
4655 __tracing_open(struct inode *inode, struct file *file, bool snapshot)
4656 {
4657 	struct trace_array *tr = inode->i_private;
4658 	struct trace_iterator *iter;
4659 	int cpu;
4660 
4661 	if (tracing_disabled)
4662 		return ERR_PTR(-ENODEV);
4663 
4664 	iter = __seq_open_private(file, &tracer_seq_ops, sizeof(*iter));
4665 	if (!iter)
4666 		return ERR_PTR(-ENOMEM);
4667 
4668 	iter->buffer_iter = kcalloc(nr_cpu_ids, sizeof(*iter->buffer_iter),
4669 				    GFP_KERNEL);
4670 	if (!iter->buffer_iter)
4671 		goto release;
4672 
4673 	/*
4674 	 * trace_find_next_entry() may need to save off iter->ent.
4675 	 * It will place it into the iter->temp buffer. As most
4676 	 * events are less than 128, allocate a buffer of that size.
4677 	 * If one is greater, then trace_find_next_entry() will
4678 	 * allocate a new buffer to adjust for the bigger iter->ent.
4679 	 * It's not critical if it fails to get allocated here.
4680 	 */
4681 	iter->temp = kmalloc(128, GFP_KERNEL);
4682 	if (iter->temp)
4683 		iter->temp_size = 128;
4684 
4685 	/*
4686 	 * trace_event_printf() may need to modify given format
4687 	 * string to replace %p with %px so that it shows real address
4688 	 * instead of hash value. However, that is only for the event
4689 	 * tracing, other tracer may not need. Defer the allocation
4690 	 * until it is needed.
4691 	 */
4692 	iter->fmt = NULL;
4693 	iter->fmt_size = 0;
4694 
4695 	mutex_lock(&trace_types_lock);
4696 	iter->trace = tr->current_trace;
4697 
4698 	if (!zalloc_cpumask_var(&iter->started, GFP_KERNEL))
4699 		goto fail;
4700 
4701 	iter->tr = tr;
4702 
4703 #ifdef CONFIG_TRACER_MAX_TRACE
4704 	/* Currently only the top directory has a snapshot */
4705 	if (tr->current_trace->print_max || snapshot)
4706 		iter->array_buffer = &tr->max_buffer;
4707 	else
4708 #endif
4709 		iter->array_buffer = &tr->array_buffer;
4710 	iter->snapshot = snapshot;
4711 	iter->pos = -1;
4712 	iter->cpu_file = tracing_get_cpu(inode);
4713 	mutex_init(&iter->mutex);
4714 
4715 	/* Notify the tracer early; before we stop tracing. */
4716 	if (iter->trace->open)
4717 		iter->trace->open(iter);
4718 
4719 	/* Annotate start of buffers if we had overruns */
4720 	if (ring_buffer_overruns(iter->array_buffer->buffer))
4721 		iter->iter_flags |= TRACE_FILE_ANNOTATE;
4722 
4723 	/* Output in nanoseconds only if we are using a clock in nanoseconds. */
4724 	if (trace_clocks[tr->clock_id].in_ns)
4725 		iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
4726 
4727 	/*
4728 	 * If pause-on-trace is enabled, then stop the trace while
4729 	 * dumping, unless this is the "snapshot" file
4730 	 */
4731 	if (!iter->snapshot && (tr->trace_flags & TRACE_ITER_PAUSE_ON_TRACE))
4732 		tracing_stop_tr(tr);
4733 
4734 	if (iter->cpu_file == RING_BUFFER_ALL_CPUS) {
4735 		for_each_tracing_cpu(cpu) {
4736 			iter->buffer_iter[cpu] =
4737 				ring_buffer_read_prepare(iter->array_buffer->buffer,
4738 							 cpu, GFP_KERNEL);
4739 		}
4740 		ring_buffer_read_prepare_sync();
4741 		for_each_tracing_cpu(cpu) {
4742 			ring_buffer_read_start(iter->buffer_iter[cpu]);
4743 			tracing_iter_reset(iter, cpu);
4744 		}
4745 	} else {
4746 		cpu = iter->cpu_file;
4747 		iter->buffer_iter[cpu] =
4748 			ring_buffer_read_prepare(iter->array_buffer->buffer,
4749 						 cpu, GFP_KERNEL);
4750 		ring_buffer_read_prepare_sync();
4751 		ring_buffer_read_start(iter->buffer_iter[cpu]);
4752 		tracing_iter_reset(iter, cpu);
4753 	}
4754 
4755 	mutex_unlock(&trace_types_lock);
4756 
4757 	return iter;
4758 
4759  fail:
4760 	mutex_unlock(&trace_types_lock);
4761 	free_trace_iter_content(iter);
4762 release:
4763 	seq_release_private(inode, file);
4764 	return ERR_PTR(-ENOMEM);
4765 }
4766 
4767 int tracing_open_generic(struct inode *inode, struct file *filp)
4768 {
4769 	int ret;
4770 
4771 	ret = tracing_check_open_get_tr(NULL);
4772 	if (ret)
4773 		return ret;
4774 
4775 	filp->private_data = inode->i_private;
4776 	return 0;
4777 }
4778 
4779 bool tracing_is_disabled(void)
4780 {
4781 	return (tracing_disabled) ? true: false;
4782 }
4783 
4784 /*
4785  * Open and update trace_array ref count.
4786  * Must have the current trace_array passed to it.
4787  */
4788 int tracing_open_generic_tr(struct inode *inode, struct file *filp)
4789 {
4790 	struct trace_array *tr = inode->i_private;
4791 	int ret;
4792 
4793 	ret = tracing_check_open_get_tr(tr);
4794 	if (ret)
4795 		return ret;
4796 
4797 	filp->private_data = inode->i_private;
4798 
4799 	return 0;
4800 }
4801 
4802 /*
4803  * The private pointer of the inode is the trace_event_file.
4804  * Update the tr ref count associated to it.
4805  */
4806 int tracing_open_file_tr(struct inode *inode, struct file *filp)
4807 {
4808 	struct trace_event_file *file = inode->i_private;
4809 	int ret;
4810 
4811 	ret = tracing_check_open_get_tr(file->tr);
4812 	if (ret)
4813 		return ret;
4814 
4815 	mutex_lock(&event_mutex);
4816 
4817 	/* Fail if the file is marked for removal */
4818 	if (file->flags & EVENT_FILE_FL_FREED) {
4819 		trace_array_put(file->tr);
4820 		ret = -ENODEV;
4821 	} else {
4822 		event_file_get(file);
4823 	}
4824 
4825 	mutex_unlock(&event_mutex);
4826 	if (ret)
4827 		return ret;
4828 
4829 	filp->private_data = inode->i_private;
4830 
4831 	return 0;
4832 }
4833 
4834 int tracing_release_file_tr(struct inode *inode, struct file *filp)
4835 {
4836 	struct trace_event_file *file = inode->i_private;
4837 
4838 	trace_array_put(file->tr);
4839 	event_file_put(file);
4840 
4841 	return 0;
4842 }
4843 
4844 int tracing_single_release_file_tr(struct inode *inode, struct file *filp)
4845 {
4846 	tracing_release_file_tr(inode, filp);
4847 	return single_release(inode, filp);
4848 }
4849 
4850 static int tracing_mark_open(struct inode *inode, struct file *filp)
4851 {
4852 	stream_open(inode, filp);
4853 	return tracing_open_generic_tr(inode, filp);
4854 }
4855 
4856 static int tracing_release(struct inode *inode, struct file *file)
4857 {
4858 	struct trace_array *tr = inode->i_private;
4859 	struct seq_file *m = file->private_data;
4860 	struct trace_iterator *iter;
4861 	int cpu;
4862 
4863 	if (!(file->f_mode & FMODE_READ)) {
4864 		trace_array_put(tr);
4865 		return 0;
4866 	}
4867 
4868 	/* Writes do not use seq_file */
4869 	iter = m->private;
4870 	mutex_lock(&trace_types_lock);
4871 
4872 	for_each_tracing_cpu(cpu) {
4873 		if (iter->buffer_iter[cpu])
4874 			ring_buffer_read_finish(iter->buffer_iter[cpu]);
4875 	}
4876 
4877 	if (iter->trace && iter->trace->close)
4878 		iter->trace->close(iter);
4879 
4880 	if (!iter->snapshot && tr->stop_count)
4881 		/* reenable tracing if it was previously enabled */
4882 		tracing_start_tr(tr);
4883 
4884 	__trace_array_put(tr);
4885 
4886 	mutex_unlock(&trace_types_lock);
4887 
4888 	free_trace_iter_content(iter);
4889 	seq_release_private(inode, file);
4890 
4891 	return 0;
4892 }
4893 
4894 int tracing_release_generic_tr(struct inode *inode, struct file *file)
4895 {
4896 	struct trace_array *tr = inode->i_private;
4897 
4898 	trace_array_put(tr);
4899 	return 0;
4900 }
4901 
4902 static int tracing_single_release_tr(struct inode *inode, struct file *file)
4903 {
4904 	struct trace_array *tr = inode->i_private;
4905 
4906 	trace_array_put(tr);
4907 
4908 	return single_release(inode, file);
4909 }
4910 
4911 static int tracing_open(struct inode *inode, struct file *file)
4912 {
4913 	struct trace_array *tr = inode->i_private;
4914 	struct trace_iterator *iter;
4915 	int ret;
4916 
4917 	ret = tracing_check_open_get_tr(tr);
4918 	if (ret)
4919 		return ret;
4920 
4921 	/* If this file was open for write, then erase contents */
4922 	if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC)) {
4923 		int cpu = tracing_get_cpu(inode);
4924 		struct array_buffer *trace_buf = &tr->array_buffer;
4925 
4926 #ifdef CONFIG_TRACER_MAX_TRACE
4927 		if (tr->current_trace->print_max)
4928 			trace_buf = &tr->max_buffer;
4929 #endif
4930 
4931 		if (cpu == RING_BUFFER_ALL_CPUS)
4932 			tracing_reset_online_cpus(trace_buf);
4933 		else
4934 			tracing_reset_cpu(trace_buf, cpu);
4935 	}
4936 
4937 	if (file->f_mode & FMODE_READ) {
4938 		iter = __tracing_open(inode, file, false);
4939 		if (IS_ERR(iter))
4940 			ret = PTR_ERR(iter);
4941 		else if (tr->trace_flags & TRACE_ITER_LATENCY_FMT)
4942 			iter->iter_flags |= TRACE_FILE_LAT_FMT;
4943 	}
4944 
4945 	if (ret < 0)
4946 		trace_array_put(tr);
4947 
4948 	return ret;
4949 }
4950 
4951 /*
4952  * Some tracers are not suitable for instance buffers.
4953  * A tracer is always available for the global array (toplevel)
4954  * or if it explicitly states that it is.
4955  */
4956 static bool
4957 trace_ok_for_array(struct tracer *t, struct trace_array *tr)
4958 {
4959 #ifdef CONFIG_TRACER_SNAPSHOT
4960 	/* arrays with mapped buffer range do not have snapshots */
4961 	if (tr->range_addr_start && t->use_max_tr)
4962 		return false;
4963 #endif
4964 	return (tr->flags & TRACE_ARRAY_FL_GLOBAL) || t->allow_instances;
4965 }
4966 
4967 /* Find the next tracer that this trace array may use */
4968 static struct tracer *
4969 get_tracer_for_array(struct trace_array *tr, struct tracer *t)
4970 {
4971 	while (t && !trace_ok_for_array(t, tr))
4972 		t = t->next;
4973 
4974 	return t;
4975 }
4976 
4977 static void *
4978 t_next(struct seq_file *m, void *v, loff_t *pos)
4979 {
4980 	struct trace_array *tr = m->private;
4981 	struct tracer *t = v;
4982 
4983 	(*pos)++;
4984 
4985 	if (t)
4986 		t = get_tracer_for_array(tr, t->next);
4987 
4988 	return t;
4989 }
4990 
4991 static void *t_start(struct seq_file *m, loff_t *pos)
4992 {
4993 	struct trace_array *tr = m->private;
4994 	struct tracer *t;
4995 	loff_t l = 0;
4996 
4997 	mutex_lock(&trace_types_lock);
4998 
4999 	t = get_tracer_for_array(tr, trace_types);
5000 	for (; t && l < *pos; t = t_next(m, t, &l))
5001 			;
5002 
5003 	return t;
5004 }
5005 
5006 static void t_stop(struct seq_file *m, void *p)
5007 {
5008 	mutex_unlock(&trace_types_lock);
5009 }
5010 
5011 static int t_show(struct seq_file *m, void *v)
5012 {
5013 	struct tracer *t = v;
5014 
5015 	if (!t)
5016 		return 0;
5017 
5018 	seq_puts(m, t->name);
5019 	if (t->next)
5020 		seq_putc(m, ' ');
5021 	else
5022 		seq_putc(m, '\n');
5023 
5024 	return 0;
5025 }
5026 
5027 static const struct seq_operations show_traces_seq_ops = {
5028 	.start		= t_start,
5029 	.next		= t_next,
5030 	.stop		= t_stop,
5031 	.show		= t_show,
5032 };
5033 
5034 static int show_traces_open(struct inode *inode, struct file *file)
5035 {
5036 	struct trace_array *tr = inode->i_private;
5037 	struct seq_file *m;
5038 	int ret;
5039 
5040 	ret = tracing_check_open_get_tr(tr);
5041 	if (ret)
5042 		return ret;
5043 
5044 	ret = seq_open(file, &show_traces_seq_ops);
5045 	if (ret) {
5046 		trace_array_put(tr);
5047 		return ret;
5048 	}
5049 
5050 	m = file->private_data;
5051 	m->private = tr;
5052 
5053 	return 0;
5054 }
5055 
5056 static int tracing_seq_release(struct inode *inode, struct file *file)
5057 {
5058 	struct trace_array *tr = inode->i_private;
5059 
5060 	trace_array_put(tr);
5061 	return seq_release(inode, file);
5062 }
5063 
5064 static ssize_t
5065 tracing_write_stub(struct file *filp, const char __user *ubuf,
5066 		   size_t count, loff_t *ppos)
5067 {
5068 	return count;
5069 }
5070 
5071 loff_t tracing_lseek(struct file *file, loff_t offset, int whence)
5072 {
5073 	int ret;
5074 
5075 	if (file->f_mode & FMODE_READ)
5076 		ret = seq_lseek(file, offset, whence);
5077 	else
5078 		file->f_pos = ret = 0;
5079 
5080 	return ret;
5081 }
5082 
5083 static const struct file_operations tracing_fops = {
5084 	.open		= tracing_open,
5085 	.read		= seq_read,
5086 	.read_iter	= seq_read_iter,
5087 	.splice_read	= copy_splice_read,
5088 	.write		= tracing_write_stub,
5089 	.llseek		= tracing_lseek,
5090 	.release	= tracing_release,
5091 };
5092 
5093 static const struct file_operations show_traces_fops = {
5094 	.open		= show_traces_open,
5095 	.read		= seq_read,
5096 	.llseek		= seq_lseek,
5097 	.release	= tracing_seq_release,
5098 };
5099 
5100 static ssize_t
5101 tracing_cpumask_read(struct file *filp, char __user *ubuf,
5102 		     size_t count, loff_t *ppos)
5103 {
5104 	struct trace_array *tr = file_inode(filp)->i_private;
5105 	char *mask_str;
5106 	int len;
5107 
5108 	len = snprintf(NULL, 0, "%*pb\n",
5109 		       cpumask_pr_args(tr->tracing_cpumask)) + 1;
5110 	mask_str = kmalloc(len, GFP_KERNEL);
5111 	if (!mask_str)
5112 		return -ENOMEM;
5113 
5114 	len = snprintf(mask_str, len, "%*pb\n",
5115 		       cpumask_pr_args(tr->tracing_cpumask));
5116 	if (len >= count) {
5117 		count = -EINVAL;
5118 		goto out_err;
5119 	}
5120 	count = simple_read_from_buffer(ubuf, count, ppos, mask_str, len);
5121 
5122 out_err:
5123 	kfree(mask_str);
5124 
5125 	return count;
5126 }
5127 
5128 int tracing_set_cpumask(struct trace_array *tr,
5129 			cpumask_var_t tracing_cpumask_new)
5130 {
5131 	int cpu;
5132 
5133 	if (!tr)
5134 		return -EINVAL;
5135 
5136 	local_irq_disable();
5137 	arch_spin_lock(&tr->max_lock);
5138 	for_each_tracing_cpu(cpu) {
5139 		/*
5140 		 * Increase/decrease the disabled counter if we are
5141 		 * about to flip a bit in the cpumask:
5142 		 */
5143 		if (cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
5144 				!cpumask_test_cpu(cpu, tracing_cpumask_new)) {
5145 			ring_buffer_record_disable_cpu(tr->array_buffer.buffer, cpu);
5146 #ifdef CONFIG_TRACER_MAX_TRACE
5147 			ring_buffer_record_disable_cpu(tr->max_buffer.buffer, cpu);
5148 #endif
5149 		}
5150 		if (!cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
5151 				cpumask_test_cpu(cpu, tracing_cpumask_new)) {
5152 			ring_buffer_record_enable_cpu(tr->array_buffer.buffer, cpu);
5153 #ifdef CONFIG_TRACER_MAX_TRACE
5154 			ring_buffer_record_enable_cpu(tr->max_buffer.buffer, cpu);
5155 #endif
5156 		}
5157 	}
5158 	arch_spin_unlock(&tr->max_lock);
5159 	local_irq_enable();
5160 
5161 	cpumask_copy(tr->tracing_cpumask, tracing_cpumask_new);
5162 
5163 	return 0;
5164 }
5165 
5166 static ssize_t
5167 tracing_cpumask_write(struct file *filp, const char __user *ubuf,
5168 		      size_t count, loff_t *ppos)
5169 {
5170 	struct trace_array *tr = file_inode(filp)->i_private;
5171 	cpumask_var_t tracing_cpumask_new;
5172 	int err;
5173 
5174 	if (count == 0 || count > KMALLOC_MAX_SIZE)
5175 		return -EINVAL;
5176 
5177 	if (!zalloc_cpumask_var(&tracing_cpumask_new, GFP_KERNEL))
5178 		return -ENOMEM;
5179 
5180 	err = cpumask_parse_user(ubuf, count, tracing_cpumask_new);
5181 	if (err)
5182 		goto err_free;
5183 
5184 	err = tracing_set_cpumask(tr, tracing_cpumask_new);
5185 	if (err)
5186 		goto err_free;
5187 
5188 	free_cpumask_var(tracing_cpumask_new);
5189 
5190 	return count;
5191 
5192 err_free:
5193 	free_cpumask_var(tracing_cpumask_new);
5194 
5195 	return err;
5196 }
5197 
5198 static const struct file_operations tracing_cpumask_fops = {
5199 	.open		= tracing_open_generic_tr,
5200 	.read		= tracing_cpumask_read,
5201 	.write		= tracing_cpumask_write,
5202 	.release	= tracing_release_generic_tr,
5203 	.llseek		= generic_file_llseek,
5204 };
5205 
5206 static int tracing_trace_options_show(struct seq_file *m, void *v)
5207 {
5208 	struct tracer_opt *trace_opts;
5209 	struct trace_array *tr = m->private;
5210 	u32 tracer_flags;
5211 	int i;
5212 
5213 	guard(mutex)(&trace_types_lock);
5214 
5215 	tracer_flags = tr->current_trace->flags->val;
5216 	trace_opts = tr->current_trace->flags->opts;
5217 
5218 	for (i = 0; trace_options[i]; i++) {
5219 		if (tr->trace_flags & (1 << i))
5220 			seq_printf(m, "%s\n", trace_options[i]);
5221 		else
5222 			seq_printf(m, "no%s\n", trace_options[i]);
5223 	}
5224 
5225 	for (i = 0; trace_opts[i].name; i++) {
5226 		if (tracer_flags & trace_opts[i].bit)
5227 			seq_printf(m, "%s\n", trace_opts[i].name);
5228 		else
5229 			seq_printf(m, "no%s\n", trace_opts[i].name);
5230 	}
5231 
5232 	return 0;
5233 }
5234 
5235 static int __set_tracer_option(struct trace_array *tr,
5236 			       struct tracer_flags *tracer_flags,
5237 			       struct tracer_opt *opts, int neg)
5238 {
5239 	struct tracer *trace = tracer_flags->trace;
5240 	int ret;
5241 
5242 	ret = trace->set_flag(tr, tracer_flags->val, opts->bit, !neg);
5243 	if (ret)
5244 		return ret;
5245 
5246 	if (neg)
5247 		tracer_flags->val &= ~opts->bit;
5248 	else
5249 		tracer_flags->val |= opts->bit;
5250 	return 0;
5251 }
5252 
5253 /* Try to assign a tracer specific option */
5254 static int set_tracer_option(struct trace_array *tr, char *cmp, int neg)
5255 {
5256 	struct tracer *trace = tr->current_trace;
5257 	struct tracer_flags *tracer_flags = trace->flags;
5258 	struct tracer_opt *opts = NULL;
5259 	int i;
5260 
5261 	for (i = 0; tracer_flags->opts[i].name; i++) {
5262 		opts = &tracer_flags->opts[i];
5263 
5264 		if (strcmp(cmp, opts->name) == 0)
5265 			return __set_tracer_option(tr, trace->flags, opts, neg);
5266 	}
5267 
5268 	return -EINVAL;
5269 }
5270 
5271 /* Some tracers require overwrite to stay enabled */
5272 int trace_keep_overwrite(struct tracer *tracer, u32 mask, int set)
5273 {
5274 	if (tracer->enabled && (mask & TRACE_ITER_OVERWRITE) && !set)
5275 		return -1;
5276 
5277 	return 0;
5278 }
5279 
5280 int set_tracer_flag(struct trace_array *tr, unsigned int mask, int enabled)
5281 {
5282 	if ((mask == TRACE_ITER_RECORD_TGID) ||
5283 	    (mask == TRACE_ITER_RECORD_CMD) ||
5284 	    (mask == TRACE_ITER_TRACE_PRINTK) ||
5285 	    (mask == TRACE_ITER_COPY_MARKER))
5286 		lockdep_assert_held(&event_mutex);
5287 
5288 	/* do nothing if flag is already set */
5289 	if (!!(tr->trace_flags & mask) == !!enabled)
5290 		return 0;
5291 
5292 	/* Give the tracer a chance to approve the change */
5293 	if (tr->current_trace->flag_changed)
5294 		if (tr->current_trace->flag_changed(tr, mask, !!enabled))
5295 			return -EINVAL;
5296 
5297 	if (mask == TRACE_ITER_TRACE_PRINTK) {
5298 		if (enabled) {
5299 			update_printk_trace(tr);
5300 		} else {
5301 			/*
5302 			 * The global_trace cannot clear this.
5303 			 * It's flag only gets cleared if another instance sets it.
5304 			 */
5305 			if (printk_trace == &global_trace)
5306 				return -EINVAL;
5307 			/*
5308 			 * An instance must always have it set.
5309 			 * by default, that's the global_trace instane.
5310 			 */
5311 			if (printk_trace == tr)
5312 				update_printk_trace(&global_trace);
5313 		}
5314 	}
5315 
5316 	if (mask == TRACE_ITER_COPY_MARKER)
5317 		update_marker_trace(tr, enabled);
5318 
5319 	if (enabled)
5320 		tr->trace_flags |= mask;
5321 	else
5322 		tr->trace_flags &= ~mask;
5323 
5324 	if (mask == TRACE_ITER_RECORD_CMD)
5325 		trace_event_enable_cmd_record(enabled);
5326 
5327 	if (mask == TRACE_ITER_RECORD_TGID) {
5328 
5329 		if (trace_alloc_tgid_map() < 0) {
5330 			tr->trace_flags &= ~TRACE_ITER_RECORD_TGID;
5331 			return -ENOMEM;
5332 		}
5333 
5334 		trace_event_enable_tgid_record(enabled);
5335 	}
5336 
5337 	if (mask == TRACE_ITER_EVENT_FORK)
5338 		trace_event_follow_fork(tr, enabled);
5339 
5340 	if (mask == TRACE_ITER_FUNC_FORK)
5341 		ftrace_pid_follow_fork(tr, enabled);
5342 
5343 	if (mask == TRACE_ITER_OVERWRITE) {
5344 		ring_buffer_change_overwrite(tr->array_buffer.buffer, enabled);
5345 #ifdef CONFIG_TRACER_MAX_TRACE
5346 		ring_buffer_change_overwrite(tr->max_buffer.buffer, enabled);
5347 #endif
5348 	}
5349 
5350 	if (mask == TRACE_ITER_PRINTK) {
5351 		trace_printk_start_stop_comm(enabled);
5352 		trace_printk_control(enabled);
5353 	}
5354 
5355 	return 0;
5356 }
5357 
5358 int trace_set_options(struct trace_array *tr, char *option)
5359 {
5360 	char *cmp;
5361 	int neg = 0;
5362 	int ret;
5363 	size_t orig_len = strlen(option);
5364 	int len;
5365 
5366 	cmp = strstrip(option);
5367 
5368 	len = str_has_prefix(cmp, "no");
5369 	if (len)
5370 		neg = 1;
5371 
5372 	cmp += len;
5373 
5374 	mutex_lock(&event_mutex);
5375 	mutex_lock(&trace_types_lock);
5376 
5377 	ret = match_string(trace_options, -1, cmp);
5378 	/* If no option could be set, test the specific tracer options */
5379 	if (ret < 0)
5380 		ret = set_tracer_option(tr, cmp, neg);
5381 	else
5382 		ret = set_tracer_flag(tr, 1 << ret, !neg);
5383 
5384 	mutex_unlock(&trace_types_lock);
5385 	mutex_unlock(&event_mutex);
5386 
5387 	/*
5388 	 * If the first trailing whitespace is replaced with '\0' by strstrip,
5389 	 * turn it back into a space.
5390 	 */
5391 	if (orig_len > strlen(option))
5392 		option[strlen(option)] = ' ';
5393 
5394 	return ret;
5395 }
5396 
5397 static void __init apply_trace_boot_options(void)
5398 {
5399 	char *buf = trace_boot_options_buf;
5400 	char *option;
5401 
5402 	while (true) {
5403 		option = strsep(&buf, ",");
5404 
5405 		if (!option)
5406 			break;
5407 
5408 		if (*option)
5409 			trace_set_options(&global_trace, option);
5410 
5411 		/* Put back the comma to allow this to be called again */
5412 		if (buf)
5413 			*(buf - 1) = ',';
5414 	}
5415 }
5416 
5417 static ssize_t
5418 tracing_trace_options_write(struct file *filp, const char __user *ubuf,
5419 			size_t cnt, loff_t *ppos)
5420 {
5421 	struct seq_file *m = filp->private_data;
5422 	struct trace_array *tr = m->private;
5423 	char buf[64];
5424 	int ret;
5425 
5426 	if (cnt >= sizeof(buf))
5427 		return -EINVAL;
5428 
5429 	if (copy_from_user(buf, ubuf, cnt))
5430 		return -EFAULT;
5431 
5432 	buf[cnt] = 0;
5433 
5434 	ret = trace_set_options(tr, buf);
5435 	if (ret < 0)
5436 		return ret;
5437 
5438 	*ppos += cnt;
5439 
5440 	return cnt;
5441 }
5442 
5443 static int tracing_trace_options_open(struct inode *inode, struct file *file)
5444 {
5445 	struct trace_array *tr = inode->i_private;
5446 	int ret;
5447 
5448 	ret = tracing_check_open_get_tr(tr);
5449 	if (ret)
5450 		return ret;
5451 
5452 	ret = single_open(file, tracing_trace_options_show, inode->i_private);
5453 	if (ret < 0)
5454 		trace_array_put(tr);
5455 
5456 	return ret;
5457 }
5458 
5459 static const struct file_operations tracing_iter_fops = {
5460 	.open		= tracing_trace_options_open,
5461 	.read		= seq_read,
5462 	.llseek		= seq_lseek,
5463 	.release	= tracing_single_release_tr,
5464 	.write		= tracing_trace_options_write,
5465 };
5466 
5467 static const char readme_msg[] =
5468 	"tracing mini-HOWTO:\n\n"
5469 	"By default tracefs removes all OTH file permission bits.\n"
5470 	"When mounting tracefs an optional group id can be specified\n"
5471 	"which adds the group to every directory and file in tracefs:\n\n"
5472 	"\t e.g. mount -t tracefs [-o [gid=<gid>]] nodev /sys/kernel/tracing\n\n"
5473 	"# echo 0 > tracing_on : quick way to disable tracing\n"
5474 	"# echo 1 > tracing_on : quick way to re-enable tracing\n\n"
5475 	" Important files:\n"
5476 	"  trace\t\t\t- The static contents of the buffer\n"
5477 	"\t\t\t  To clear the buffer write into this file: echo > trace\n"
5478 	"  trace_pipe\t\t- A consuming read to see the contents of the buffer\n"
5479 	"  current_tracer\t- function and latency tracers\n"
5480 	"  available_tracers\t- list of configured tracers for current_tracer\n"
5481 	"  error_log\t- error log for failed commands (that support it)\n"
5482 	"  buffer_size_kb\t- view and modify size of per cpu buffer\n"
5483 	"  buffer_total_size_kb  - view total size of all cpu buffers\n\n"
5484 	"  trace_clock\t\t- change the clock used to order events\n"
5485 	"       local:   Per cpu clock but may not be synced across CPUs\n"
5486 	"      global:   Synced across CPUs but slows tracing down.\n"
5487 	"     counter:   Not a clock, but just an increment\n"
5488 	"      uptime:   Jiffy counter from time of boot\n"
5489 	"        perf:   Same clock that perf events use\n"
5490 #ifdef CONFIG_X86_64
5491 	"     x86-tsc:   TSC cycle counter\n"
5492 #endif
5493 	"\n  timestamp_mode\t- view the mode used to timestamp events\n"
5494 	"       delta:   Delta difference against a buffer-wide timestamp\n"
5495 	"    absolute:   Absolute (standalone) timestamp\n"
5496 	"\n  trace_marker\t\t- Writes into this file writes into the kernel buffer\n"
5497 	"\n  trace_marker_raw\t\t- Writes into this file writes binary data into the kernel buffer\n"
5498 	"  tracing_cpumask\t- Limit which CPUs to trace\n"
5499 	"  instances\t\t- Make sub-buffers with: mkdir instances/foo\n"
5500 	"\t\t\t  Remove sub-buffer with rmdir\n"
5501 	"  trace_options\t\t- Set format or modify how tracing happens\n"
5502 	"\t\t\t  Disable an option by prefixing 'no' to the\n"
5503 	"\t\t\t  option name\n"
5504 	"  saved_cmdlines_size\t- echo command number in here to store comm-pid list\n"
5505 #ifdef CONFIG_DYNAMIC_FTRACE
5506 	"\n  available_filter_functions - list of functions that can be filtered on\n"
5507 	"  set_ftrace_filter\t- echo function name in here to only trace these\n"
5508 	"\t\t\t  functions\n"
5509 	"\t     accepts: func_full_name or glob-matching-pattern\n"
5510 	"\t     modules: Can select a group via module\n"
5511 	"\t      Format: :mod:<module-name>\n"
5512 	"\t     example: echo :mod:ext3 > set_ftrace_filter\n"
5513 	"\t    triggers: a command to perform when function is hit\n"
5514 	"\t      Format: <function>:<trigger>[:count]\n"
5515 	"\t     trigger: traceon, traceoff\n"
5516 	"\t\t      enable_event:<system>:<event>\n"
5517 	"\t\t      disable_event:<system>:<event>\n"
5518 #ifdef CONFIG_STACKTRACE
5519 	"\t\t      stacktrace\n"
5520 #endif
5521 #ifdef CONFIG_TRACER_SNAPSHOT
5522 	"\t\t      snapshot\n"
5523 #endif
5524 	"\t\t      dump\n"
5525 	"\t\t      cpudump\n"
5526 	"\t     example: echo do_fault:traceoff > set_ftrace_filter\n"
5527 	"\t              echo do_trap:traceoff:3 > set_ftrace_filter\n"
5528 	"\t     The first one will disable tracing every time do_fault is hit\n"
5529 	"\t     The second will disable tracing at most 3 times when do_trap is hit\n"
5530 	"\t       The first time do trap is hit and it disables tracing, the\n"
5531 	"\t       counter will decrement to 2. If tracing is already disabled,\n"
5532 	"\t       the counter will not decrement. It only decrements when the\n"
5533 	"\t       trigger did work\n"
5534 	"\t     To remove trigger without count:\n"
5535 	"\t       echo '!<function>:<trigger> > set_ftrace_filter\n"
5536 	"\t     To remove trigger with a count:\n"
5537 	"\t       echo '!<function>:<trigger>:0 > set_ftrace_filter\n"
5538 	"  set_ftrace_notrace\t- echo function name in here to never trace.\n"
5539 	"\t    accepts: func_full_name, *func_end, func_begin*, *func_middle*\n"
5540 	"\t    modules: Can select a group via module command :mod:\n"
5541 	"\t    Does not accept triggers\n"
5542 #endif /* CONFIG_DYNAMIC_FTRACE */
5543 #ifdef CONFIG_FUNCTION_TRACER
5544 	"  set_ftrace_pid\t- Write pid(s) to only function trace those pids\n"
5545 	"\t\t    (function)\n"
5546 	"  set_ftrace_notrace_pid\t- Write pid(s) to not function trace those pids\n"
5547 	"\t\t    (function)\n"
5548 #endif
5549 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
5550 	"  set_graph_function\t- Trace the nested calls of a function (function_graph)\n"
5551 	"  set_graph_notrace\t- Do not trace the nested calls of a function (function_graph)\n"
5552 	"  max_graph_depth\t- Trace a limited depth of nested calls (0 is unlimited)\n"
5553 #endif
5554 #ifdef CONFIG_TRACER_SNAPSHOT
5555 	"\n  snapshot\t\t- Like 'trace' but shows the content of the static\n"
5556 	"\t\t\t  snapshot buffer. Read the contents for more\n"
5557 	"\t\t\t  information\n"
5558 #endif
5559 #ifdef CONFIG_STACK_TRACER
5560 	"  stack_trace\t\t- Shows the max stack trace when active\n"
5561 	"  stack_max_size\t- Shows current max stack size that was traced\n"
5562 	"\t\t\t  Write into this file to reset the max size (trigger a\n"
5563 	"\t\t\t  new trace)\n"
5564 #ifdef CONFIG_DYNAMIC_FTRACE
5565 	"  stack_trace_filter\t- Like set_ftrace_filter but limits what stack_trace\n"
5566 	"\t\t\t  traces\n"
5567 #endif
5568 #endif /* CONFIG_STACK_TRACER */
5569 #ifdef CONFIG_DYNAMIC_EVENTS
5570 	"  dynamic_events\t\t- Create/append/remove/show the generic dynamic events\n"
5571 	"\t\t\t  Write into this file to define/undefine new trace events.\n"
5572 #endif
5573 #ifdef CONFIG_KPROBE_EVENTS
5574 	"  kprobe_events\t\t- Create/append/remove/show the kernel dynamic events\n"
5575 	"\t\t\t  Write into this file to define/undefine new trace events.\n"
5576 #endif
5577 #ifdef CONFIG_UPROBE_EVENTS
5578 	"  uprobe_events\t\t- Create/append/remove/show the userspace dynamic events\n"
5579 	"\t\t\t  Write into this file to define/undefine new trace events.\n"
5580 #endif
5581 #if defined(CONFIG_KPROBE_EVENTS) || defined(CONFIG_UPROBE_EVENTS) || \
5582     defined(CONFIG_FPROBE_EVENTS)
5583 	"\t  accepts: event-definitions (one definition per line)\n"
5584 #if defined(CONFIG_KPROBE_EVENTS) || defined(CONFIG_UPROBE_EVENTS)
5585 	"\t   Format: p[:[<group>/][<event>]] <place> [<args>]\n"
5586 	"\t           r[maxactive][:[<group>/][<event>]] <place> [<args>]\n"
5587 #endif
5588 #ifdef CONFIG_FPROBE_EVENTS
5589 	"\t           f[:[<group>/][<event>]] <func-name>[%return] [<args>]\n"
5590 	"\t           t[:[<group>/][<event>]] <tracepoint> [<args>]\n"
5591 #endif
5592 #ifdef CONFIG_HIST_TRIGGERS
5593 	"\t           s:[synthetic/]<event> <field> [<field>]\n"
5594 #endif
5595 	"\t           e[:[<group>/][<event>]] <attached-group>.<attached-event> [<args>] [if <filter>]\n"
5596 	"\t           -:[<group>/][<event>]\n"
5597 #ifdef CONFIG_KPROBE_EVENTS
5598 	"\t    place: [<module>:]<symbol>[+<offset>]|<memaddr>\n"
5599   "place (kretprobe): [<module>:]<symbol>[+<offset>]%return|<memaddr>\n"
5600 #endif
5601 #ifdef CONFIG_UPROBE_EVENTS
5602   "   place (uprobe): <path>:<offset>[%return][(ref_ctr_offset)]\n"
5603 #endif
5604 	"\t     args: <name>=fetcharg[:type]\n"
5605 	"\t fetcharg: (%<register>|$<efield>), @<address>, @<symbol>[+|-<offset>],\n"
5606 #ifdef CONFIG_HAVE_FUNCTION_ARG_ACCESS_API
5607 	"\t           $stack<index>, $stack, $retval, $comm, $arg<N>,\n"
5608 #ifdef CONFIG_PROBE_EVENTS_BTF_ARGS
5609 	"\t           <argname>[->field[->field|.field...]],\n"
5610 #endif
5611 #else
5612 	"\t           $stack<index>, $stack, $retval, $comm,\n"
5613 #endif
5614 	"\t           +|-[u]<offset>(<fetcharg>), \\imm-value, \\\"imm-string\"\n"
5615 	"\t     kernel return probes support: $retval, $arg<N>, $comm\n"
5616 	"\t     type: s8/16/32/64, u8/16/32/64, x8/16/32/64, char, string, symbol,\n"
5617 	"\t           b<bit-width>@<bit-offset>/<container-size>, ustring,\n"
5618 	"\t           symstr, %pd/%pD, <type>\\[<array-size>\\]\n"
5619 #ifdef CONFIG_HIST_TRIGGERS
5620 	"\t    field: <stype> <name>;\n"
5621 	"\t    stype: u8/u16/u32/u64, s8/s16/s32/s64, pid_t,\n"
5622 	"\t           [unsigned] char/int/long\n"
5623 #endif
5624 	"\t    efield: For event probes ('e' types), the field is on of the fields\n"
5625 	"\t            of the <attached-group>/<attached-event>.\n"
5626 #endif
5627 	"  set_event\t\t- Enables events by name written into it\n"
5628 	"\t\t\t  Can enable module events via: :mod:<module>\n"
5629 	"  events/\t\t- Directory containing all trace event subsystems:\n"
5630 	"      enable\t\t- Write 0/1 to enable/disable tracing of all events\n"
5631 	"  events/<system>/\t- Directory containing all trace events for <system>:\n"
5632 	"      enable\t\t- Write 0/1 to enable/disable tracing of all <system>\n"
5633 	"\t\t\t  events\n"
5634 	"      filter\t\t- If set, only events passing filter are traced\n"
5635 	"  events/<system>/<event>/\t- Directory containing control files for\n"
5636 	"\t\t\t  <event>:\n"
5637 	"      enable\t\t- Write 0/1 to enable/disable tracing of <event>\n"
5638 	"      filter\t\t- If set, only events passing filter are traced\n"
5639 	"      trigger\t\t- If set, a command to perform when event is hit\n"
5640 	"\t    Format: <trigger>[:count][if <filter>]\n"
5641 	"\t   trigger: traceon, traceoff\n"
5642 	"\t            enable_event:<system>:<event>\n"
5643 	"\t            disable_event:<system>:<event>\n"
5644 #ifdef CONFIG_HIST_TRIGGERS
5645 	"\t            enable_hist:<system>:<event>\n"
5646 	"\t            disable_hist:<system>:<event>\n"
5647 #endif
5648 #ifdef CONFIG_STACKTRACE
5649 	"\t\t    stacktrace\n"
5650 #endif
5651 #ifdef CONFIG_TRACER_SNAPSHOT
5652 	"\t\t    snapshot\n"
5653 #endif
5654 #ifdef CONFIG_HIST_TRIGGERS
5655 	"\t\t    hist (see below)\n"
5656 #endif
5657 	"\t   example: echo traceoff > events/block/block_unplug/trigger\n"
5658 	"\t            echo traceoff:3 > events/block/block_unplug/trigger\n"
5659 	"\t            echo 'enable_event:kmem:kmalloc:3 if nr_rq > 1' > \\\n"
5660 	"\t                  events/block/block_unplug/trigger\n"
5661 	"\t   The first disables tracing every time block_unplug is hit.\n"
5662 	"\t   The second disables tracing the first 3 times block_unplug is hit.\n"
5663 	"\t   The third enables the kmalloc event the first 3 times block_unplug\n"
5664 	"\t     is hit and has value of greater than 1 for the 'nr_rq' event field.\n"
5665 	"\t   Like function triggers, the counter is only decremented if it\n"
5666 	"\t    enabled or disabled tracing.\n"
5667 	"\t   To remove a trigger without a count:\n"
5668 	"\t     echo '!<trigger> > <system>/<event>/trigger\n"
5669 	"\t   To remove a trigger with a count:\n"
5670 	"\t     echo '!<trigger>:0 > <system>/<event>/trigger\n"
5671 	"\t   Filters can be ignored when removing a trigger.\n"
5672 #ifdef CONFIG_HIST_TRIGGERS
5673 	"      hist trigger\t- If set, event hits are aggregated into a hash table\n"
5674 	"\t    Format: hist:keys=<field1[,field2,...]>\n"
5675 	"\t            [:<var1>=<field|var_ref|numeric_literal>[,<var2>=...]]\n"
5676 	"\t            [:values=<field1[,field2,...]>]\n"
5677 	"\t            [:sort=<field1[,field2,...]>]\n"
5678 	"\t            [:size=#entries]\n"
5679 	"\t            [:pause][:continue][:clear]\n"
5680 	"\t            [:name=histname1]\n"
5681 	"\t            [:nohitcount]\n"
5682 	"\t            [:<handler>.<action>]\n"
5683 	"\t            [if <filter>]\n\n"
5684 	"\t    Note, special fields can be used as well:\n"
5685 	"\t            common_timestamp - to record current timestamp\n"
5686 	"\t            common_cpu - to record the CPU the event happened on\n"
5687 	"\n"
5688 	"\t    A hist trigger variable can be:\n"
5689 	"\t        - a reference to a field e.g. x=current_timestamp,\n"
5690 	"\t        - a reference to another variable e.g. y=$x,\n"
5691 	"\t        - a numeric literal: e.g. ms_per_sec=1000,\n"
5692 	"\t        - an arithmetic expression: e.g. time_secs=current_timestamp/1000\n"
5693 	"\n"
5694 	"\t    hist trigger arithmetic expressions support addition(+), subtraction(-),\n"
5695 	"\t    multiplication(*) and division(/) operators. An operand can be either a\n"
5696 	"\t    variable reference, field or numeric literal.\n"
5697 	"\n"
5698 	"\t    When a matching event is hit, an entry is added to a hash\n"
5699 	"\t    table using the key(s) and value(s) named, and the value of a\n"
5700 	"\t    sum called 'hitcount' is incremented.  Keys and values\n"
5701 	"\t    correspond to fields in the event's format description.  Keys\n"
5702 	"\t    can be any field, or the special string 'common_stacktrace'.\n"
5703 	"\t    Compound keys consisting of up to two fields can be specified\n"
5704 	"\t    by the 'keys' keyword.  Values must correspond to numeric\n"
5705 	"\t    fields.  Sort keys consisting of up to two fields can be\n"
5706 	"\t    specified using the 'sort' keyword.  The sort direction can\n"
5707 	"\t    be modified by appending '.descending' or '.ascending' to a\n"
5708 	"\t    sort field.  The 'size' parameter can be used to specify more\n"
5709 	"\t    or fewer than the default 2048 entries for the hashtable size.\n"
5710 	"\t    If a hist trigger is given a name using the 'name' parameter,\n"
5711 	"\t    its histogram data will be shared with other triggers of the\n"
5712 	"\t    same name, and trigger hits will update this common data.\n\n"
5713 	"\t    Reading the 'hist' file for the event will dump the hash\n"
5714 	"\t    table in its entirety to stdout.  If there are multiple hist\n"
5715 	"\t    triggers attached to an event, there will be a table for each\n"
5716 	"\t    trigger in the output.  The table displayed for a named\n"
5717 	"\t    trigger will be the same as any other instance having the\n"
5718 	"\t    same name.  The default format used to display a given field\n"
5719 	"\t    can be modified by appending any of the following modifiers\n"
5720 	"\t    to the field name, as applicable:\n\n"
5721 	"\t            .hex        display a number as a hex value\n"
5722 	"\t            .sym        display an address as a symbol\n"
5723 	"\t            .sym-offset display an address as a symbol and offset\n"
5724 	"\t            .execname   display a common_pid as a program name\n"
5725 	"\t            .syscall    display a syscall id as a syscall name\n"
5726 	"\t            .log2       display log2 value rather than raw number\n"
5727 	"\t            .buckets=size  display values in groups of size rather than raw number\n"
5728 	"\t            .usecs      display a common_timestamp in microseconds\n"
5729 	"\t            .percent    display a number of percentage value\n"
5730 	"\t            .graph      display a bar-graph of a value\n\n"
5731 	"\t    The 'pause' parameter can be used to pause an existing hist\n"
5732 	"\t    trigger or to start a hist trigger but not log any events\n"
5733 	"\t    until told to do so.  'continue' can be used to start or\n"
5734 	"\t    restart a paused hist trigger.\n\n"
5735 	"\t    The 'clear' parameter will clear the contents of a running\n"
5736 	"\t    hist trigger and leave its current paused/active state\n"
5737 	"\t    unchanged.\n\n"
5738 	"\t    The 'nohitcount' (or NOHC) parameter will suppress display of\n"
5739 	"\t    raw hitcount in the histogram.\n\n"
5740 	"\t    The enable_hist and disable_hist triggers can be used to\n"
5741 	"\t    have one event conditionally start and stop another event's\n"
5742 	"\t    already-attached hist trigger.  The syntax is analogous to\n"
5743 	"\t    the enable_event and disable_event triggers.\n\n"
5744 	"\t    Hist trigger handlers and actions are executed whenever a\n"
5745 	"\t    a histogram entry is added or updated.  They take the form:\n\n"
5746 	"\t        <handler>.<action>\n\n"
5747 	"\t    The available handlers are:\n\n"
5748 	"\t        onmatch(matching.event)  - invoke on addition or update\n"
5749 	"\t        onmax(var)               - invoke if var exceeds current max\n"
5750 	"\t        onchange(var)            - invoke action if var changes\n\n"
5751 	"\t    The available actions are:\n\n"
5752 	"\t        trace(<synthetic_event>,param list)  - generate synthetic event\n"
5753 	"\t        save(field,...)                      - save current event fields\n"
5754 #ifdef CONFIG_TRACER_SNAPSHOT
5755 	"\t        snapshot()                           - snapshot the trace buffer\n\n"
5756 #endif
5757 #ifdef CONFIG_SYNTH_EVENTS
5758 	"  events/synthetic_events\t- Create/append/remove/show synthetic events\n"
5759 	"\t  Write into this file to define/undefine new synthetic events.\n"
5760 	"\t     example: echo 'myevent u64 lat; char name[]; long[] stack' >> synthetic_events\n"
5761 #endif
5762 #endif
5763 ;
5764 
5765 static ssize_t
5766 tracing_readme_read(struct file *filp, char __user *ubuf,
5767 		       size_t cnt, loff_t *ppos)
5768 {
5769 	return simple_read_from_buffer(ubuf, cnt, ppos,
5770 					readme_msg, strlen(readme_msg));
5771 }
5772 
5773 static const struct file_operations tracing_readme_fops = {
5774 	.open		= tracing_open_generic,
5775 	.read		= tracing_readme_read,
5776 	.llseek		= generic_file_llseek,
5777 };
5778 
5779 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
5780 static union trace_eval_map_item *
5781 update_eval_map(union trace_eval_map_item *ptr)
5782 {
5783 	if (!ptr->map.eval_string) {
5784 		if (ptr->tail.next) {
5785 			ptr = ptr->tail.next;
5786 			/* Set ptr to the next real item (skip head) */
5787 			ptr++;
5788 		} else
5789 			return NULL;
5790 	}
5791 	return ptr;
5792 }
5793 
5794 static void *eval_map_next(struct seq_file *m, void *v, loff_t *pos)
5795 {
5796 	union trace_eval_map_item *ptr = v;
5797 
5798 	/*
5799 	 * Paranoid! If ptr points to end, we don't want to increment past it.
5800 	 * This really should never happen.
5801 	 */
5802 	(*pos)++;
5803 	ptr = update_eval_map(ptr);
5804 	if (WARN_ON_ONCE(!ptr))
5805 		return NULL;
5806 
5807 	ptr++;
5808 	ptr = update_eval_map(ptr);
5809 
5810 	return ptr;
5811 }
5812 
5813 static void *eval_map_start(struct seq_file *m, loff_t *pos)
5814 {
5815 	union trace_eval_map_item *v;
5816 	loff_t l = 0;
5817 
5818 	mutex_lock(&trace_eval_mutex);
5819 
5820 	v = trace_eval_maps;
5821 	if (v)
5822 		v++;
5823 
5824 	while (v && l < *pos) {
5825 		v = eval_map_next(m, v, &l);
5826 	}
5827 
5828 	return v;
5829 }
5830 
5831 static void eval_map_stop(struct seq_file *m, void *v)
5832 {
5833 	mutex_unlock(&trace_eval_mutex);
5834 }
5835 
5836 static int eval_map_show(struct seq_file *m, void *v)
5837 {
5838 	union trace_eval_map_item *ptr = v;
5839 
5840 	seq_printf(m, "%s %ld (%s)\n",
5841 		   ptr->map.eval_string, ptr->map.eval_value,
5842 		   ptr->map.system);
5843 
5844 	return 0;
5845 }
5846 
5847 static const struct seq_operations tracing_eval_map_seq_ops = {
5848 	.start		= eval_map_start,
5849 	.next		= eval_map_next,
5850 	.stop		= eval_map_stop,
5851 	.show		= eval_map_show,
5852 };
5853 
5854 static int tracing_eval_map_open(struct inode *inode, struct file *filp)
5855 {
5856 	int ret;
5857 
5858 	ret = tracing_check_open_get_tr(NULL);
5859 	if (ret)
5860 		return ret;
5861 
5862 	return seq_open(filp, &tracing_eval_map_seq_ops);
5863 }
5864 
5865 static const struct file_operations tracing_eval_map_fops = {
5866 	.open		= tracing_eval_map_open,
5867 	.read		= seq_read,
5868 	.llseek		= seq_lseek,
5869 	.release	= seq_release,
5870 };
5871 
5872 static inline union trace_eval_map_item *
5873 trace_eval_jmp_to_tail(union trace_eval_map_item *ptr)
5874 {
5875 	/* Return tail of array given the head */
5876 	return ptr + ptr->head.length + 1;
5877 }
5878 
5879 static void
5880 trace_insert_eval_map_file(struct module *mod, struct trace_eval_map **start,
5881 			   int len)
5882 {
5883 	struct trace_eval_map **stop;
5884 	struct trace_eval_map **map;
5885 	union trace_eval_map_item *map_array;
5886 	union trace_eval_map_item *ptr;
5887 
5888 	stop = start + len;
5889 
5890 	/*
5891 	 * The trace_eval_maps contains the map plus a head and tail item,
5892 	 * where the head holds the module and length of array, and the
5893 	 * tail holds a pointer to the next list.
5894 	 */
5895 	map_array = kmalloc_array(len + 2, sizeof(*map_array), GFP_KERNEL);
5896 	if (!map_array) {
5897 		pr_warn("Unable to allocate trace eval mapping\n");
5898 		return;
5899 	}
5900 
5901 	guard(mutex)(&trace_eval_mutex);
5902 
5903 	if (!trace_eval_maps)
5904 		trace_eval_maps = map_array;
5905 	else {
5906 		ptr = trace_eval_maps;
5907 		for (;;) {
5908 			ptr = trace_eval_jmp_to_tail(ptr);
5909 			if (!ptr->tail.next)
5910 				break;
5911 			ptr = ptr->tail.next;
5912 
5913 		}
5914 		ptr->tail.next = map_array;
5915 	}
5916 	map_array->head.mod = mod;
5917 	map_array->head.length = len;
5918 	map_array++;
5919 
5920 	for (map = start; (unsigned long)map < (unsigned long)stop; map++) {
5921 		map_array->map = **map;
5922 		map_array++;
5923 	}
5924 	memset(map_array, 0, sizeof(*map_array));
5925 }
5926 
5927 static void trace_create_eval_file(struct dentry *d_tracer)
5928 {
5929 	trace_create_file("eval_map", TRACE_MODE_READ, d_tracer,
5930 			  NULL, &tracing_eval_map_fops);
5931 }
5932 
5933 #else /* CONFIG_TRACE_EVAL_MAP_FILE */
5934 static inline void trace_create_eval_file(struct dentry *d_tracer) { }
5935 static inline void trace_insert_eval_map_file(struct module *mod,
5936 			      struct trace_eval_map **start, int len) { }
5937 #endif /* !CONFIG_TRACE_EVAL_MAP_FILE */
5938 
5939 static void trace_insert_eval_map(struct module *mod,
5940 				  struct trace_eval_map **start, int len)
5941 {
5942 	struct trace_eval_map **map;
5943 
5944 	if (len <= 0)
5945 		return;
5946 
5947 	map = start;
5948 
5949 	trace_event_eval_update(map, len);
5950 
5951 	trace_insert_eval_map_file(mod, start, len);
5952 }
5953 
5954 static ssize_t
5955 tracing_set_trace_read(struct file *filp, char __user *ubuf,
5956 		       size_t cnt, loff_t *ppos)
5957 {
5958 	struct trace_array *tr = filp->private_data;
5959 	char buf[MAX_TRACER_SIZE+2];
5960 	int r;
5961 
5962 	mutex_lock(&trace_types_lock);
5963 	r = sprintf(buf, "%s\n", tr->current_trace->name);
5964 	mutex_unlock(&trace_types_lock);
5965 
5966 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
5967 }
5968 
5969 int tracer_init(struct tracer *t, struct trace_array *tr)
5970 {
5971 	tracing_reset_online_cpus(&tr->array_buffer);
5972 	return t->init(tr);
5973 }
5974 
5975 static void set_buffer_entries(struct array_buffer *buf, unsigned long val)
5976 {
5977 	int cpu;
5978 
5979 	for_each_tracing_cpu(cpu)
5980 		per_cpu_ptr(buf->data, cpu)->entries = val;
5981 }
5982 
5983 static void update_buffer_entries(struct array_buffer *buf, int cpu)
5984 {
5985 	if (cpu == RING_BUFFER_ALL_CPUS) {
5986 		set_buffer_entries(buf, ring_buffer_size(buf->buffer, 0));
5987 	} else {
5988 		per_cpu_ptr(buf->data, cpu)->entries = ring_buffer_size(buf->buffer, cpu);
5989 	}
5990 }
5991 
5992 #ifdef CONFIG_TRACER_MAX_TRACE
5993 /* resize @tr's buffer to the size of @size_tr's entries */
5994 static int resize_buffer_duplicate_size(struct array_buffer *trace_buf,
5995 					struct array_buffer *size_buf, int cpu_id)
5996 {
5997 	int cpu, ret = 0;
5998 
5999 	if (cpu_id == RING_BUFFER_ALL_CPUS) {
6000 		for_each_tracing_cpu(cpu) {
6001 			ret = ring_buffer_resize(trace_buf->buffer,
6002 				 per_cpu_ptr(size_buf->data, cpu)->entries, cpu);
6003 			if (ret < 0)
6004 				break;
6005 			per_cpu_ptr(trace_buf->data, cpu)->entries =
6006 				per_cpu_ptr(size_buf->data, cpu)->entries;
6007 		}
6008 	} else {
6009 		ret = ring_buffer_resize(trace_buf->buffer,
6010 				 per_cpu_ptr(size_buf->data, cpu_id)->entries, cpu_id);
6011 		if (ret == 0)
6012 			per_cpu_ptr(trace_buf->data, cpu_id)->entries =
6013 				per_cpu_ptr(size_buf->data, cpu_id)->entries;
6014 	}
6015 
6016 	return ret;
6017 }
6018 #endif /* CONFIG_TRACER_MAX_TRACE */
6019 
6020 static int __tracing_resize_ring_buffer(struct trace_array *tr,
6021 					unsigned long size, int cpu)
6022 {
6023 	int ret;
6024 
6025 	/*
6026 	 * If kernel or user changes the size of the ring buffer
6027 	 * we use the size that was given, and we can forget about
6028 	 * expanding it later.
6029 	 */
6030 	trace_set_ring_buffer_expanded(tr);
6031 
6032 	/* May be called before buffers are initialized */
6033 	if (!tr->array_buffer.buffer)
6034 		return 0;
6035 
6036 	/* Do not allow tracing while resizing ring buffer */
6037 	tracing_stop_tr(tr);
6038 
6039 	ret = ring_buffer_resize(tr->array_buffer.buffer, size, cpu);
6040 	if (ret < 0)
6041 		goto out_start;
6042 
6043 #ifdef CONFIG_TRACER_MAX_TRACE
6044 	if (!tr->allocated_snapshot)
6045 		goto out;
6046 
6047 	ret = ring_buffer_resize(tr->max_buffer.buffer, size, cpu);
6048 	if (ret < 0) {
6049 		int r = resize_buffer_duplicate_size(&tr->array_buffer,
6050 						     &tr->array_buffer, cpu);
6051 		if (r < 0) {
6052 			/*
6053 			 * AARGH! We are left with different
6054 			 * size max buffer!!!!
6055 			 * The max buffer is our "snapshot" buffer.
6056 			 * When a tracer needs a snapshot (one of the
6057 			 * latency tracers), it swaps the max buffer
6058 			 * with the saved snap shot. We succeeded to
6059 			 * update the size of the main buffer, but failed to
6060 			 * update the size of the max buffer. But when we tried
6061 			 * to reset the main buffer to the original size, we
6062 			 * failed there too. This is very unlikely to
6063 			 * happen, but if it does, warn and kill all
6064 			 * tracing.
6065 			 */
6066 			WARN_ON(1);
6067 			tracing_disabled = 1;
6068 		}
6069 		goto out_start;
6070 	}
6071 
6072 	update_buffer_entries(&tr->max_buffer, cpu);
6073 
6074  out:
6075 #endif /* CONFIG_TRACER_MAX_TRACE */
6076 
6077 	update_buffer_entries(&tr->array_buffer, cpu);
6078  out_start:
6079 	tracing_start_tr(tr);
6080 	return ret;
6081 }
6082 
6083 ssize_t tracing_resize_ring_buffer(struct trace_array *tr,
6084 				  unsigned long size, int cpu_id)
6085 {
6086 	guard(mutex)(&trace_types_lock);
6087 
6088 	if (cpu_id != RING_BUFFER_ALL_CPUS) {
6089 		/* make sure, this cpu is enabled in the mask */
6090 		if (!cpumask_test_cpu(cpu_id, tracing_buffer_mask))
6091 			return -EINVAL;
6092 	}
6093 
6094 	return __tracing_resize_ring_buffer(tr, size, cpu_id);
6095 }
6096 
6097 struct trace_mod_entry {
6098 	unsigned long	mod_addr;
6099 	char		mod_name[MODULE_NAME_LEN];
6100 };
6101 
6102 struct trace_scratch {
6103 	unsigned int		clock_id;
6104 	unsigned long		text_addr;
6105 	unsigned long		nr_entries;
6106 	struct trace_mod_entry	entries[];
6107 };
6108 
6109 static DEFINE_MUTEX(scratch_mutex);
6110 
6111 static int cmp_mod_entry(const void *key, const void *pivot)
6112 {
6113 	unsigned long addr = (unsigned long)key;
6114 	const struct trace_mod_entry *ent = pivot;
6115 
6116 	if (addr >= ent[0].mod_addr && addr < ent[1].mod_addr)
6117 		return 0;
6118 	else
6119 		return addr - ent->mod_addr;
6120 }
6121 
6122 /**
6123  * trace_adjust_address() - Adjust prev boot address to current address.
6124  * @tr: Persistent ring buffer's trace_array.
6125  * @addr: Address in @tr which is adjusted.
6126  */
6127 unsigned long trace_adjust_address(struct trace_array *tr, unsigned long addr)
6128 {
6129 	struct trace_module_delta *module_delta;
6130 	struct trace_scratch *tscratch;
6131 	struct trace_mod_entry *entry;
6132 	unsigned long raddr;
6133 	int idx = 0, nr_entries;
6134 
6135 	/* If we don't have last boot delta, return the address */
6136 	if (!(tr->flags & TRACE_ARRAY_FL_LAST_BOOT))
6137 		return addr;
6138 
6139 	/* tr->module_delta must be protected by rcu. */
6140 	guard(rcu)();
6141 	tscratch = tr->scratch;
6142 	/* if there is no tscrach, module_delta must be NULL. */
6143 	module_delta = READ_ONCE(tr->module_delta);
6144 	if (!module_delta || !tscratch->nr_entries ||
6145 	    tscratch->entries[0].mod_addr > addr) {
6146 		raddr = addr + tr->text_delta;
6147 		return __is_kernel(raddr) || is_kernel_core_data(raddr) ||
6148 			is_kernel_rodata(raddr) ? raddr : addr;
6149 	}
6150 
6151 	/* Note that entries must be sorted. */
6152 	nr_entries = tscratch->nr_entries;
6153 	if (nr_entries == 1 ||
6154 	    tscratch->entries[nr_entries - 1].mod_addr < addr)
6155 		idx = nr_entries - 1;
6156 	else {
6157 		entry = __inline_bsearch((void *)addr,
6158 				tscratch->entries,
6159 				nr_entries - 1,
6160 				sizeof(tscratch->entries[0]),
6161 				cmp_mod_entry);
6162 		if (entry)
6163 			idx = entry - tscratch->entries;
6164 	}
6165 
6166 	return addr + module_delta->delta[idx];
6167 }
6168 
6169 #ifdef CONFIG_MODULES
6170 static int save_mod(struct module *mod, void *data)
6171 {
6172 	struct trace_array *tr = data;
6173 	struct trace_scratch *tscratch;
6174 	struct trace_mod_entry *entry;
6175 	unsigned int size;
6176 
6177 	tscratch = tr->scratch;
6178 	if (!tscratch)
6179 		return -1;
6180 	size = tr->scratch_size;
6181 
6182 	if (struct_size(tscratch, entries, tscratch->nr_entries + 1) > size)
6183 		return -1;
6184 
6185 	entry = &tscratch->entries[tscratch->nr_entries];
6186 
6187 	tscratch->nr_entries++;
6188 
6189 	entry->mod_addr = (unsigned long)mod->mem[MOD_TEXT].base;
6190 	strscpy(entry->mod_name, mod->name);
6191 
6192 	return 0;
6193 }
6194 #else
6195 static int save_mod(struct module *mod, void *data)
6196 {
6197 	return 0;
6198 }
6199 #endif
6200 
6201 static void update_last_data(struct trace_array *tr)
6202 {
6203 	struct trace_module_delta *module_delta;
6204 	struct trace_scratch *tscratch;
6205 
6206 	if (!(tr->flags & TRACE_ARRAY_FL_BOOT))
6207 		return;
6208 
6209 	if (!(tr->flags & TRACE_ARRAY_FL_LAST_BOOT))
6210 		return;
6211 
6212 	/* Only if the buffer has previous boot data clear and update it. */
6213 	tr->flags &= ~TRACE_ARRAY_FL_LAST_BOOT;
6214 
6215 	/* Reset the module list and reload them */
6216 	if (tr->scratch) {
6217 		struct trace_scratch *tscratch = tr->scratch;
6218 
6219 		tscratch->clock_id = tr->clock_id;
6220 		memset(tscratch->entries, 0,
6221 		       flex_array_size(tscratch, entries, tscratch->nr_entries));
6222 		tscratch->nr_entries = 0;
6223 
6224 		guard(mutex)(&scratch_mutex);
6225 		module_for_each_mod(save_mod, tr);
6226 	}
6227 
6228 	/*
6229 	 * Need to clear all CPU buffers as there cannot be events
6230 	 * from the previous boot mixed with events with this boot
6231 	 * as that will cause a confusing trace. Need to clear all
6232 	 * CPU buffers, even for those that may currently be offline.
6233 	 */
6234 	tracing_reset_all_cpus(&tr->array_buffer);
6235 
6236 	/* Using current data now */
6237 	tr->text_delta = 0;
6238 
6239 	if (!tr->scratch)
6240 		return;
6241 
6242 	tscratch = tr->scratch;
6243 	module_delta = READ_ONCE(tr->module_delta);
6244 	WRITE_ONCE(tr->module_delta, NULL);
6245 	kfree_rcu(module_delta, rcu);
6246 
6247 	/* Set the persistent ring buffer meta data to this address */
6248 	tscratch->text_addr = (unsigned long)_text;
6249 }
6250 
6251 /**
6252  * tracing_update_buffers - used by tracing facility to expand ring buffers
6253  * @tr: The tracing instance
6254  *
6255  * To save on memory when the tracing is never used on a system with it
6256  * configured in. The ring buffers are set to a minimum size. But once
6257  * a user starts to use the tracing facility, then they need to grow
6258  * to their default size.
6259  *
6260  * This function is to be called when a tracer is about to be used.
6261  */
6262 int tracing_update_buffers(struct trace_array *tr)
6263 {
6264 	int ret = 0;
6265 
6266 	mutex_lock(&trace_types_lock);
6267 
6268 	update_last_data(tr);
6269 
6270 	if (!tr->ring_buffer_expanded)
6271 		ret = __tracing_resize_ring_buffer(tr, trace_buf_size,
6272 						RING_BUFFER_ALL_CPUS);
6273 	mutex_unlock(&trace_types_lock);
6274 
6275 	return ret;
6276 }
6277 
6278 struct trace_option_dentry;
6279 
6280 static void
6281 create_trace_option_files(struct trace_array *tr, struct tracer *tracer);
6282 
6283 /*
6284  * Used to clear out the tracer before deletion of an instance.
6285  * Must have trace_types_lock held.
6286  */
6287 static void tracing_set_nop(struct trace_array *tr)
6288 {
6289 	if (tr->current_trace == &nop_trace)
6290 		return;
6291 
6292 	tr->current_trace->enabled--;
6293 
6294 	if (tr->current_trace->reset)
6295 		tr->current_trace->reset(tr);
6296 
6297 	tr->current_trace = &nop_trace;
6298 }
6299 
6300 static bool tracer_options_updated;
6301 
6302 static void add_tracer_options(struct trace_array *tr, struct tracer *t)
6303 {
6304 	/* Only enable if the directory has been created already. */
6305 	if (!tr->dir)
6306 		return;
6307 
6308 	/* Only create trace option files after update_tracer_options finish */
6309 	if (!tracer_options_updated)
6310 		return;
6311 
6312 	create_trace_option_files(tr, t);
6313 }
6314 
6315 int tracing_set_tracer(struct trace_array *tr, const char *buf)
6316 {
6317 	struct tracer *t;
6318 #ifdef CONFIG_TRACER_MAX_TRACE
6319 	bool had_max_tr;
6320 #endif
6321 	int ret;
6322 
6323 	guard(mutex)(&trace_types_lock);
6324 
6325 	update_last_data(tr);
6326 
6327 	if (!tr->ring_buffer_expanded) {
6328 		ret = __tracing_resize_ring_buffer(tr, trace_buf_size,
6329 						RING_BUFFER_ALL_CPUS);
6330 		if (ret < 0)
6331 			return ret;
6332 		ret = 0;
6333 	}
6334 
6335 	for (t = trace_types; t; t = t->next) {
6336 		if (strcmp(t->name, buf) == 0)
6337 			break;
6338 	}
6339 	if (!t)
6340 		return -EINVAL;
6341 
6342 	if (t == tr->current_trace)
6343 		return 0;
6344 
6345 #ifdef CONFIG_TRACER_SNAPSHOT
6346 	if (t->use_max_tr) {
6347 		local_irq_disable();
6348 		arch_spin_lock(&tr->max_lock);
6349 		ret = tr->cond_snapshot ? -EBUSY : 0;
6350 		arch_spin_unlock(&tr->max_lock);
6351 		local_irq_enable();
6352 		if (ret)
6353 			return ret;
6354 	}
6355 #endif
6356 	/* Some tracers won't work on kernel command line */
6357 	if (system_state < SYSTEM_RUNNING && t->noboot) {
6358 		pr_warn("Tracer '%s' is not allowed on command line, ignored\n",
6359 			t->name);
6360 		return -EINVAL;
6361 	}
6362 
6363 	/* Some tracers are only allowed for the top level buffer */
6364 	if (!trace_ok_for_array(t, tr))
6365 		return -EINVAL;
6366 
6367 	/* If trace pipe files are being read, we can't change the tracer */
6368 	if (tr->trace_ref)
6369 		return -EBUSY;
6370 
6371 	trace_branch_disable();
6372 
6373 	tr->current_trace->enabled--;
6374 
6375 	if (tr->current_trace->reset)
6376 		tr->current_trace->reset(tr);
6377 
6378 #ifdef CONFIG_TRACER_MAX_TRACE
6379 	had_max_tr = tr->current_trace->use_max_tr;
6380 
6381 	/* Current trace needs to be nop_trace before synchronize_rcu */
6382 	tr->current_trace = &nop_trace;
6383 
6384 	if (had_max_tr && !t->use_max_tr) {
6385 		/*
6386 		 * We need to make sure that the update_max_tr sees that
6387 		 * current_trace changed to nop_trace to keep it from
6388 		 * swapping the buffers after we resize it.
6389 		 * The update_max_tr is called from interrupts disabled
6390 		 * so a synchronized_sched() is sufficient.
6391 		 */
6392 		synchronize_rcu();
6393 		free_snapshot(tr);
6394 		tracing_disarm_snapshot(tr);
6395 	}
6396 
6397 	if (!had_max_tr && t->use_max_tr) {
6398 		ret = tracing_arm_snapshot_locked(tr);
6399 		if (ret)
6400 			return ret;
6401 	}
6402 #else
6403 	tr->current_trace = &nop_trace;
6404 #endif
6405 
6406 	if (t->init) {
6407 		ret = tracer_init(t, tr);
6408 		if (ret) {
6409 #ifdef CONFIG_TRACER_MAX_TRACE
6410 			if (t->use_max_tr)
6411 				tracing_disarm_snapshot(tr);
6412 #endif
6413 			return ret;
6414 		}
6415 	}
6416 
6417 	tr->current_trace = t;
6418 	tr->current_trace->enabled++;
6419 	trace_branch_enable(tr);
6420 
6421 	return 0;
6422 }
6423 
6424 static ssize_t
6425 tracing_set_trace_write(struct file *filp, const char __user *ubuf,
6426 			size_t cnt, loff_t *ppos)
6427 {
6428 	struct trace_array *tr = filp->private_data;
6429 	char buf[MAX_TRACER_SIZE+1];
6430 	char *name;
6431 	size_t ret;
6432 	int err;
6433 
6434 	ret = cnt;
6435 
6436 	if (cnt > MAX_TRACER_SIZE)
6437 		cnt = MAX_TRACER_SIZE;
6438 
6439 	if (copy_from_user(buf, ubuf, cnt))
6440 		return -EFAULT;
6441 
6442 	buf[cnt] = 0;
6443 
6444 	name = strim(buf);
6445 
6446 	err = tracing_set_tracer(tr, name);
6447 	if (err)
6448 		return err;
6449 
6450 	*ppos += ret;
6451 
6452 	return ret;
6453 }
6454 
6455 static ssize_t
6456 tracing_nsecs_read(unsigned long *ptr, char __user *ubuf,
6457 		   size_t cnt, loff_t *ppos)
6458 {
6459 	char buf[64];
6460 	int r;
6461 
6462 	r = snprintf(buf, sizeof(buf), "%ld\n",
6463 		     *ptr == (unsigned long)-1 ? -1 : nsecs_to_usecs(*ptr));
6464 	if (r > sizeof(buf))
6465 		r = sizeof(buf);
6466 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6467 }
6468 
6469 static ssize_t
6470 tracing_nsecs_write(unsigned long *ptr, const char __user *ubuf,
6471 		    size_t cnt, loff_t *ppos)
6472 {
6473 	unsigned long val;
6474 	int ret;
6475 
6476 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
6477 	if (ret)
6478 		return ret;
6479 
6480 	*ptr = val * 1000;
6481 
6482 	return cnt;
6483 }
6484 
6485 static ssize_t
6486 tracing_thresh_read(struct file *filp, char __user *ubuf,
6487 		    size_t cnt, loff_t *ppos)
6488 {
6489 	return tracing_nsecs_read(&tracing_thresh, ubuf, cnt, ppos);
6490 }
6491 
6492 static ssize_t
6493 tracing_thresh_write(struct file *filp, const char __user *ubuf,
6494 		     size_t cnt, loff_t *ppos)
6495 {
6496 	struct trace_array *tr = filp->private_data;
6497 	int ret;
6498 
6499 	guard(mutex)(&trace_types_lock);
6500 	ret = tracing_nsecs_write(&tracing_thresh, ubuf, cnt, ppos);
6501 	if (ret < 0)
6502 		return ret;
6503 
6504 	if (tr->current_trace->update_thresh) {
6505 		ret = tr->current_trace->update_thresh(tr);
6506 		if (ret < 0)
6507 			return ret;
6508 	}
6509 
6510 	return cnt;
6511 }
6512 
6513 #ifdef CONFIG_TRACER_MAX_TRACE
6514 
6515 static ssize_t
6516 tracing_max_lat_read(struct file *filp, char __user *ubuf,
6517 		     size_t cnt, loff_t *ppos)
6518 {
6519 	struct trace_array *tr = filp->private_data;
6520 
6521 	return tracing_nsecs_read(&tr->max_latency, ubuf, cnt, ppos);
6522 }
6523 
6524 static ssize_t
6525 tracing_max_lat_write(struct file *filp, const char __user *ubuf,
6526 		      size_t cnt, loff_t *ppos)
6527 {
6528 	struct trace_array *tr = filp->private_data;
6529 
6530 	return tracing_nsecs_write(&tr->max_latency, ubuf, cnt, ppos);
6531 }
6532 
6533 #endif
6534 
6535 static int open_pipe_on_cpu(struct trace_array *tr, int cpu)
6536 {
6537 	if (cpu == RING_BUFFER_ALL_CPUS) {
6538 		if (cpumask_empty(tr->pipe_cpumask)) {
6539 			cpumask_setall(tr->pipe_cpumask);
6540 			return 0;
6541 		}
6542 	} else if (!cpumask_test_cpu(cpu, tr->pipe_cpumask)) {
6543 		cpumask_set_cpu(cpu, tr->pipe_cpumask);
6544 		return 0;
6545 	}
6546 	return -EBUSY;
6547 }
6548 
6549 static void close_pipe_on_cpu(struct trace_array *tr, int cpu)
6550 {
6551 	if (cpu == RING_BUFFER_ALL_CPUS) {
6552 		WARN_ON(!cpumask_full(tr->pipe_cpumask));
6553 		cpumask_clear(tr->pipe_cpumask);
6554 	} else {
6555 		WARN_ON(!cpumask_test_cpu(cpu, tr->pipe_cpumask));
6556 		cpumask_clear_cpu(cpu, tr->pipe_cpumask);
6557 	}
6558 }
6559 
6560 static int tracing_open_pipe(struct inode *inode, struct file *filp)
6561 {
6562 	struct trace_array *tr = inode->i_private;
6563 	struct trace_iterator *iter;
6564 	int cpu;
6565 	int ret;
6566 
6567 	ret = tracing_check_open_get_tr(tr);
6568 	if (ret)
6569 		return ret;
6570 
6571 	mutex_lock(&trace_types_lock);
6572 	cpu = tracing_get_cpu(inode);
6573 	ret = open_pipe_on_cpu(tr, cpu);
6574 	if (ret)
6575 		goto fail_pipe_on_cpu;
6576 
6577 	/* create a buffer to store the information to pass to userspace */
6578 	iter = kzalloc(sizeof(*iter), GFP_KERNEL);
6579 	if (!iter) {
6580 		ret = -ENOMEM;
6581 		goto fail_alloc_iter;
6582 	}
6583 
6584 	trace_seq_init(&iter->seq);
6585 	iter->trace = tr->current_trace;
6586 
6587 	if (!alloc_cpumask_var(&iter->started, GFP_KERNEL)) {
6588 		ret = -ENOMEM;
6589 		goto fail;
6590 	}
6591 
6592 	/* trace pipe does not show start of buffer */
6593 	cpumask_setall(iter->started);
6594 
6595 	if (tr->trace_flags & TRACE_ITER_LATENCY_FMT)
6596 		iter->iter_flags |= TRACE_FILE_LAT_FMT;
6597 
6598 	/* Output in nanoseconds only if we are using a clock in nanoseconds. */
6599 	if (trace_clocks[tr->clock_id].in_ns)
6600 		iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
6601 
6602 	iter->tr = tr;
6603 	iter->array_buffer = &tr->array_buffer;
6604 	iter->cpu_file = cpu;
6605 	mutex_init(&iter->mutex);
6606 	filp->private_data = iter;
6607 
6608 	if (iter->trace->pipe_open)
6609 		iter->trace->pipe_open(iter);
6610 
6611 	nonseekable_open(inode, filp);
6612 
6613 	tr->trace_ref++;
6614 
6615 	mutex_unlock(&trace_types_lock);
6616 	return ret;
6617 
6618 fail:
6619 	kfree(iter);
6620 fail_alloc_iter:
6621 	close_pipe_on_cpu(tr, cpu);
6622 fail_pipe_on_cpu:
6623 	__trace_array_put(tr);
6624 	mutex_unlock(&trace_types_lock);
6625 	return ret;
6626 }
6627 
6628 static int tracing_release_pipe(struct inode *inode, struct file *file)
6629 {
6630 	struct trace_iterator *iter = file->private_data;
6631 	struct trace_array *tr = inode->i_private;
6632 
6633 	mutex_lock(&trace_types_lock);
6634 
6635 	tr->trace_ref--;
6636 
6637 	if (iter->trace->pipe_close)
6638 		iter->trace->pipe_close(iter);
6639 	close_pipe_on_cpu(tr, iter->cpu_file);
6640 	mutex_unlock(&trace_types_lock);
6641 
6642 	free_trace_iter_content(iter);
6643 	kfree(iter);
6644 
6645 	trace_array_put(tr);
6646 
6647 	return 0;
6648 }
6649 
6650 static __poll_t
6651 trace_poll(struct trace_iterator *iter, struct file *filp, poll_table *poll_table)
6652 {
6653 	struct trace_array *tr = iter->tr;
6654 
6655 	/* Iterators are static, they should be filled or empty */
6656 	if (trace_buffer_iter(iter, iter->cpu_file))
6657 		return EPOLLIN | EPOLLRDNORM;
6658 
6659 	if (tr->trace_flags & TRACE_ITER_BLOCK)
6660 		/*
6661 		 * Always select as readable when in blocking mode
6662 		 */
6663 		return EPOLLIN | EPOLLRDNORM;
6664 	else
6665 		return ring_buffer_poll_wait(iter->array_buffer->buffer, iter->cpu_file,
6666 					     filp, poll_table, iter->tr->buffer_percent);
6667 }
6668 
6669 static __poll_t
6670 tracing_poll_pipe(struct file *filp, poll_table *poll_table)
6671 {
6672 	struct trace_iterator *iter = filp->private_data;
6673 
6674 	return trace_poll(iter, filp, poll_table);
6675 }
6676 
6677 /* Must be called with iter->mutex held. */
6678 static int tracing_wait_pipe(struct file *filp)
6679 {
6680 	struct trace_iterator *iter = filp->private_data;
6681 	int ret;
6682 
6683 	while (trace_empty(iter)) {
6684 
6685 		if ((filp->f_flags & O_NONBLOCK)) {
6686 			return -EAGAIN;
6687 		}
6688 
6689 		/*
6690 		 * We block until we read something and tracing is disabled.
6691 		 * We still block if tracing is disabled, but we have never
6692 		 * read anything. This allows a user to cat this file, and
6693 		 * then enable tracing. But after we have read something,
6694 		 * we give an EOF when tracing is again disabled.
6695 		 *
6696 		 * iter->pos will be 0 if we haven't read anything.
6697 		 */
6698 		if (!tracer_tracing_is_on(iter->tr) && iter->pos)
6699 			break;
6700 
6701 		mutex_unlock(&iter->mutex);
6702 
6703 		ret = wait_on_pipe(iter, 0);
6704 
6705 		mutex_lock(&iter->mutex);
6706 
6707 		if (ret)
6708 			return ret;
6709 	}
6710 
6711 	return 1;
6712 }
6713 
6714 /*
6715  * Consumer reader.
6716  */
6717 static ssize_t
6718 tracing_read_pipe(struct file *filp, char __user *ubuf,
6719 		  size_t cnt, loff_t *ppos)
6720 {
6721 	struct trace_iterator *iter = filp->private_data;
6722 	ssize_t sret;
6723 
6724 	/*
6725 	 * Avoid more than one consumer on a single file descriptor
6726 	 * This is just a matter of traces coherency, the ring buffer itself
6727 	 * is protected.
6728 	 */
6729 	guard(mutex)(&iter->mutex);
6730 
6731 	/* return any leftover data */
6732 	sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
6733 	if (sret != -EBUSY)
6734 		return sret;
6735 
6736 	trace_seq_init(&iter->seq);
6737 
6738 	if (iter->trace->read) {
6739 		sret = iter->trace->read(iter, filp, ubuf, cnt, ppos);
6740 		if (sret)
6741 			return sret;
6742 	}
6743 
6744 waitagain:
6745 	sret = tracing_wait_pipe(filp);
6746 	if (sret <= 0)
6747 		return sret;
6748 
6749 	/* stop when tracing is finished */
6750 	if (trace_empty(iter))
6751 		return 0;
6752 
6753 	if (cnt >= TRACE_SEQ_BUFFER_SIZE)
6754 		cnt = TRACE_SEQ_BUFFER_SIZE - 1;
6755 
6756 	/* reset all but tr, trace, and overruns */
6757 	trace_iterator_reset(iter);
6758 	cpumask_clear(iter->started);
6759 	trace_seq_init(&iter->seq);
6760 
6761 	trace_event_read_lock();
6762 	trace_access_lock(iter->cpu_file);
6763 	while (trace_find_next_entry_inc(iter) != NULL) {
6764 		enum print_line_t ret;
6765 		int save_len = iter->seq.seq.len;
6766 
6767 		ret = print_trace_line(iter);
6768 		if (ret == TRACE_TYPE_PARTIAL_LINE) {
6769 			/*
6770 			 * If one print_trace_line() fills entire trace_seq in one shot,
6771 			 * trace_seq_to_user() will returns -EBUSY because save_len == 0,
6772 			 * In this case, we need to consume it, otherwise, loop will peek
6773 			 * this event next time, resulting in an infinite loop.
6774 			 */
6775 			if (save_len == 0) {
6776 				iter->seq.full = 0;
6777 				trace_seq_puts(&iter->seq, "[LINE TOO BIG]\n");
6778 				trace_consume(iter);
6779 				break;
6780 			}
6781 
6782 			/* In other cases, don't print partial lines */
6783 			iter->seq.seq.len = save_len;
6784 			break;
6785 		}
6786 		if (ret != TRACE_TYPE_NO_CONSUME)
6787 			trace_consume(iter);
6788 
6789 		if (trace_seq_used(&iter->seq) >= cnt)
6790 			break;
6791 
6792 		/*
6793 		 * Setting the full flag means we reached the trace_seq buffer
6794 		 * size and we should leave by partial output condition above.
6795 		 * One of the trace_seq_* functions is not used properly.
6796 		 */
6797 		WARN_ONCE(iter->seq.full, "full flag set for trace type %d",
6798 			  iter->ent->type);
6799 	}
6800 	trace_access_unlock(iter->cpu_file);
6801 	trace_event_read_unlock();
6802 
6803 	/* Now copy what we have to the user */
6804 	sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
6805 	if (iter->seq.readpos >= trace_seq_used(&iter->seq))
6806 		trace_seq_init(&iter->seq);
6807 
6808 	/*
6809 	 * If there was nothing to send to user, in spite of consuming trace
6810 	 * entries, go back to wait for more entries.
6811 	 */
6812 	if (sret == -EBUSY)
6813 		goto waitagain;
6814 
6815 	return sret;
6816 }
6817 
6818 static void tracing_spd_release_pipe(struct splice_pipe_desc *spd,
6819 				     unsigned int idx)
6820 {
6821 	__free_page(spd->pages[idx]);
6822 }
6823 
6824 static size_t
6825 tracing_fill_pipe_page(size_t rem, struct trace_iterator *iter)
6826 {
6827 	size_t count;
6828 	int save_len;
6829 	int ret;
6830 
6831 	/* Seq buffer is page-sized, exactly what we need. */
6832 	for (;;) {
6833 		save_len = iter->seq.seq.len;
6834 		ret = print_trace_line(iter);
6835 
6836 		if (trace_seq_has_overflowed(&iter->seq)) {
6837 			iter->seq.seq.len = save_len;
6838 			break;
6839 		}
6840 
6841 		/*
6842 		 * This should not be hit, because it should only
6843 		 * be set if the iter->seq overflowed. But check it
6844 		 * anyway to be safe.
6845 		 */
6846 		if (ret == TRACE_TYPE_PARTIAL_LINE) {
6847 			iter->seq.seq.len = save_len;
6848 			break;
6849 		}
6850 
6851 		count = trace_seq_used(&iter->seq) - save_len;
6852 		if (rem < count) {
6853 			rem = 0;
6854 			iter->seq.seq.len = save_len;
6855 			break;
6856 		}
6857 
6858 		if (ret != TRACE_TYPE_NO_CONSUME)
6859 			trace_consume(iter);
6860 		rem -= count;
6861 		if (!trace_find_next_entry_inc(iter))	{
6862 			rem = 0;
6863 			iter->ent = NULL;
6864 			break;
6865 		}
6866 	}
6867 
6868 	return rem;
6869 }
6870 
6871 static ssize_t tracing_splice_read_pipe(struct file *filp,
6872 					loff_t *ppos,
6873 					struct pipe_inode_info *pipe,
6874 					size_t len,
6875 					unsigned int flags)
6876 {
6877 	struct page *pages_def[PIPE_DEF_BUFFERS];
6878 	struct partial_page partial_def[PIPE_DEF_BUFFERS];
6879 	struct trace_iterator *iter = filp->private_data;
6880 	struct splice_pipe_desc spd = {
6881 		.pages		= pages_def,
6882 		.partial	= partial_def,
6883 		.nr_pages	= 0, /* This gets updated below. */
6884 		.nr_pages_max	= PIPE_DEF_BUFFERS,
6885 		.ops		= &default_pipe_buf_ops,
6886 		.spd_release	= tracing_spd_release_pipe,
6887 	};
6888 	ssize_t ret;
6889 	size_t rem;
6890 	unsigned int i;
6891 
6892 	if (splice_grow_spd(pipe, &spd))
6893 		return -ENOMEM;
6894 
6895 	mutex_lock(&iter->mutex);
6896 
6897 	if (iter->trace->splice_read) {
6898 		ret = iter->trace->splice_read(iter, filp,
6899 					       ppos, pipe, len, flags);
6900 		if (ret)
6901 			goto out_err;
6902 	}
6903 
6904 	ret = tracing_wait_pipe(filp);
6905 	if (ret <= 0)
6906 		goto out_err;
6907 
6908 	if (!iter->ent && !trace_find_next_entry_inc(iter)) {
6909 		ret = -EFAULT;
6910 		goto out_err;
6911 	}
6912 
6913 	trace_event_read_lock();
6914 	trace_access_lock(iter->cpu_file);
6915 
6916 	/* Fill as many pages as possible. */
6917 	for (i = 0, rem = len; i < spd.nr_pages_max && rem; i++) {
6918 		spd.pages[i] = alloc_page(GFP_KERNEL);
6919 		if (!spd.pages[i])
6920 			break;
6921 
6922 		rem = tracing_fill_pipe_page(rem, iter);
6923 
6924 		/* Copy the data into the page, so we can start over. */
6925 		ret = trace_seq_to_buffer(&iter->seq,
6926 					  page_address(spd.pages[i]),
6927 					  min((size_t)trace_seq_used(&iter->seq),
6928 						  (size_t)PAGE_SIZE));
6929 		if (ret < 0) {
6930 			__free_page(spd.pages[i]);
6931 			break;
6932 		}
6933 		spd.partial[i].offset = 0;
6934 		spd.partial[i].len = ret;
6935 
6936 		trace_seq_init(&iter->seq);
6937 	}
6938 
6939 	trace_access_unlock(iter->cpu_file);
6940 	trace_event_read_unlock();
6941 	mutex_unlock(&iter->mutex);
6942 
6943 	spd.nr_pages = i;
6944 
6945 	if (i)
6946 		ret = splice_to_pipe(pipe, &spd);
6947 	else
6948 		ret = 0;
6949 out:
6950 	splice_shrink_spd(&spd);
6951 	return ret;
6952 
6953 out_err:
6954 	mutex_unlock(&iter->mutex);
6955 	goto out;
6956 }
6957 
6958 static ssize_t
6959 tracing_entries_read(struct file *filp, char __user *ubuf,
6960 		     size_t cnt, loff_t *ppos)
6961 {
6962 	struct inode *inode = file_inode(filp);
6963 	struct trace_array *tr = inode->i_private;
6964 	int cpu = tracing_get_cpu(inode);
6965 	char buf[64];
6966 	int r = 0;
6967 	ssize_t ret;
6968 
6969 	mutex_lock(&trace_types_lock);
6970 
6971 	if (cpu == RING_BUFFER_ALL_CPUS) {
6972 		int cpu, buf_size_same;
6973 		unsigned long size;
6974 
6975 		size = 0;
6976 		buf_size_same = 1;
6977 		/* check if all cpu sizes are same */
6978 		for_each_tracing_cpu(cpu) {
6979 			/* fill in the size from first enabled cpu */
6980 			if (size == 0)
6981 				size = per_cpu_ptr(tr->array_buffer.data, cpu)->entries;
6982 			if (size != per_cpu_ptr(tr->array_buffer.data, cpu)->entries) {
6983 				buf_size_same = 0;
6984 				break;
6985 			}
6986 		}
6987 
6988 		if (buf_size_same) {
6989 			if (!tr->ring_buffer_expanded)
6990 				r = sprintf(buf, "%lu (expanded: %lu)\n",
6991 					    size >> 10,
6992 					    trace_buf_size >> 10);
6993 			else
6994 				r = sprintf(buf, "%lu\n", size >> 10);
6995 		} else
6996 			r = sprintf(buf, "X\n");
6997 	} else
6998 		r = sprintf(buf, "%lu\n", per_cpu_ptr(tr->array_buffer.data, cpu)->entries >> 10);
6999 
7000 	mutex_unlock(&trace_types_lock);
7001 
7002 	ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
7003 	return ret;
7004 }
7005 
7006 static ssize_t
7007 tracing_entries_write(struct file *filp, const char __user *ubuf,
7008 		      size_t cnt, loff_t *ppos)
7009 {
7010 	struct inode *inode = file_inode(filp);
7011 	struct trace_array *tr = inode->i_private;
7012 	unsigned long val;
7013 	int ret;
7014 
7015 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
7016 	if (ret)
7017 		return ret;
7018 
7019 	/* must have at least 1 entry */
7020 	if (!val)
7021 		return -EINVAL;
7022 
7023 	/* value is in KB */
7024 	val <<= 10;
7025 	ret = tracing_resize_ring_buffer(tr, val, tracing_get_cpu(inode));
7026 	if (ret < 0)
7027 		return ret;
7028 
7029 	*ppos += cnt;
7030 
7031 	return cnt;
7032 }
7033 
7034 static ssize_t
7035 tracing_total_entries_read(struct file *filp, char __user *ubuf,
7036 				size_t cnt, loff_t *ppos)
7037 {
7038 	struct trace_array *tr = filp->private_data;
7039 	char buf[64];
7040 	int r, cpu;
7041 	unsigned long size = 0, expanded_size = 0;
7042 
7043 	mutex_lock(&trace_types_lock);
7044 	for_each_tracing_cpu(cpu) {
7045 		size += per_cpu_ptr(tr->array_buffer.data, cpu)->entries >> 10;
7046 		if (!tr->ring_buffer_expanded)
7047 			expanded_size += trace_buf_size >> 10;
7048 	}
7049 	if (tr->ring_buffer_expanded)
7050 		r = sprintf(buf, "%lu\n", size);
7051 	else
7052 		r = sprintf(buf, "%lu (expanded: %lu)\n", size, expanded_size);
7053 	mutex_unlock(&trace_types_lock);
7054 
7055 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
7056 }
7057 
7058 #define LAST_BOOT_HEADER ((void *)1)
7059 
7060 static void *l_next(struct seq_file *m, void *v, loff_t *pos)
7061 {
7062 	struct trace_array *tr = m->private;
7063 	struct trace_scratch *tscratch = tr->scratch;
7064 	unsigned int index = *pos;
7065 
7066 	(*pos)++;
7067 
7068 	if (*pos == 1)
7069 		return LAST_BOOT_HEADER;
7070 
7071 	/* Only show offsets of the last boot data */
7072 	if (!tscratch || !(tr->flags & TRACE_ARRAY_FL_LAST_BOOT))
7073 		return NULL;
7074 
7075 	/* *pos 0 is for the header, 1 is for the first module */
7076 	index--;
7077 
7078 	if (index >= tscratch->nr_entries)
7079 		return NULL;
7080 
7081 	return &tscratch->entries[index];
7082 }
7083 
7084 static void *l_start(struct seq_file *m, loff_t *pos)
7085 {
7086 	mutex_lock(&scratch_mutex);
7087 
7088 	return l_next(m, NULL, pos);
7089 }
7090 
7091 static void l_stop(struct seq_file *m, void *p)
7092 {
7093 	mutex_unlock(&scratch_mutex);
7094 }
7095 
7096 static void show_last_boot_header(struct seq_file *m, struct trace_array *tr)
7097 {
7098 	struct trace_scratch *tscratch = tr->scratch;
7099 
7100 	/*
7101 	 * Do not leak KASLR address. This only shows the KASLR address of
7102 	 * the last boot. When the ring buffer is started, the LAST_BOOT
7103 	 * flag gets cleared, and this should only report "current".
7104 	 * Otherwise it shows the KASLR address from the previous boot which
7105 	 * should not be the same as the current boot.
7106 	 */
7107 	if (tscratch && (tr->flags & TRACE_ARRAY_FL_LAST_BOOT))
7108 		seq_printf(m, "%lx\t[kernel]\n", tscratch->text_addr);
7109 	else
7110 		seq_puts(m, "# Current\n");
7111 }
7112 
7113 static int l_show(struct seq_file *m, void *v)
7114 {
7115 	struct trace_array *tr = m->private;
7116 	struct trace_mod_entry *entry = v;
7117 
7118 	if (v == LAST_BOOT_HEADER) {
7119 		show_last_boot_header(m, tr);
7120 		return 0;
7121 	}
7122 
7123 	seq_printf(m, "%lx\t%s\n", entry->mod_addr, entry->mod_name);
7124 	return 0;
7125 }
7126 
7127 static const struct seq_operations last_boot_seq_ops = {
7128 	.start		= l_start,
7129 	.next		= l_next,
7130 	.stop		= l_stop,
7131 	.show		= l_show,
7132 };
7133 
7134 static int tracing_last_boot_open(struct inode *inode, struct file *file)
7135 {
7136 	struct trace_array *tr = inode->i_private;
7137 	struct seq_file *m;
7138 	int ret;
7139 
7140 	ret = tracing_check_open_get_tr(tr);
7141 	if (ret)
7142 		return ret;
7143 
7144 	ret = seq_open(file, &last_boot_seq_ops);
7145 	if (ret) {
7146 		trace_array_put(tr);
7147 		return ret;
7148 	}
7149 
7150 	m = file->private_data;
7151 	m->private = tr;
7152 
7153 	return 0;
7154 }
7155 
7156 static int tracing_buffer_meta_open(struct inode *inode, struct file *filp)
7157 {
7158 	struct trace_array *tr = inode->i_private;
7159 	int cpu = tracing_get_cpu(inode);
7160 	int ret;
7161 
7162 	ret = tracing_check_open_get_tr(tr);
7163 	if (ret)
7164 		return ret;
7165 
7166 	ret = ring_buffer_meta_seq_init(filp, tr->array_buffer.buffer, cpu);
7167 	if (ret < 0)
7168 		__trace_array_put(tr);
7169 	return ret;
7170 }
7171 
7172 static ssize_t
7173 tracing_free_buffer_write(struct file *filp, const char __user *ubuf,
7174 			  size_t cnt, loff_t *ppos)
7175 {
7176 	/*
7177 	 * There is no need to read what the user has written, this function
7178 	 * is just to make sure that there is no error when "echo" is used
7179 	 */
7180 
7181 	*ppos += cnt;
7182 
7183 	return cnt;
7184 }
7185 
7186 static int
7187 tracing_free_buffer_release(struct inode *inode, struct file *filp)
7188 {
7189 	struct trace_array *tr = inode->i_private;
7190 
7191 	/* disable tracing ? */
7192 	if (tr->trace_flags & TRACE_ITER_STOP_ON_FREE)
7193 		tracer_tracing_off(tr);
7194 	/* resize the ring buffer to 0 */
7195 	tracing_resize_ring_buffer(tr, 0, RING_BUFFER_ALL_CPUS);
7196 
7197 	trace_array_put(tr);
7198 
7199 	return 0;
7200 }
7201 
7202 #define TRACE_MARKER_MAX_SIZE		4096
7203 
7204 static ssize_t write_marker_to_buffer(struct trace_array *tr, const char __user *ubuf,
7205 				      size_t cnt, unsigned long ip)
7206 {
7207 	struct ring_buffer_event *event;
7208 	enum event_trigger_type tt = ETT_NONE;
7209 	struct trace_buffer *buffer;
7210 	struct print_entry *entry;
7211 	int meta_size;
7212 	ssize_t written;
7213 	size_t size;
7214 	int len;
7215 
7216 /* Used in tracing_mark_raw_write() as well */
7217 #define FAULTED_STR "<faulted>"
7218 #define FAULTED_SIZE (sizeof(FAULTED_STR) - 1) /* '\0' is already accounted for */
7219 
7220 	meta_size = sizeof(*entry) + 2;  /* add '\0' and possible '\n' */
7221  again:
7222 	size = cnt + meta_size;
7223 
7224 	/* If less than "<faulted>", then make sure we can still add that */
7225 	if (cnt < FAULTED_SIZE)
7226 		size += FAULTED_SIZE - cnt;
7227 
7228 	buffer = tr->array_buffer.buffer;
7229 	event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
7230 					    tracing_gen_ctx());
7231 	if (unlikely(!event)) {
7232 		/*
7233 		 * If the size was greater than what was allowed, then
7234 		 * make it smaller and try again.
7235 		 */
7236 		if (size > ring_buffer_max_event_size(buffer)) {
7237 			/* cnt < FAULTED size should never be bigger than max */
7238 			if (WARN_ON_ONCE(cnt < FAULTED_SIZE))
7239 				return -EBADF;
7240 			cnt = ring_buffer_max_event_size(buffer) - meta_size;
7241 			/* The above should only happen once */
7242 			if (WARN_ON_ONCE(cnt + meta_size == size))
7243 				return -EBADF;
7244 			goto again;
7245 		}
7246 
7247 		/* Ring buffer disabled, return as if not open for write */
7248 		return -EBADF;
7249 	}
7250 
7251 	entry = ring_buffer_event_data(event);
7252 	entry->ip = ip;
7253 
7254 	len = __copy_from_user_inatomic(&entry->buf, ubuf, cnt);
7255 	if (len) {
7256 		memcpy(&entry->buf, FAULTED_STR, FAULTED_SIZE);
7257 		cnt = FAULTED_SIZE;
7258 		written = -EFAULT;
7259 	} else
7260 		written = cnt;
7261 
7262 	if (tr->trace_marker_file && !list_empty(&tr->trace_marker_file->triggers)) {
7263 		/* do not add \n before testing triggers, but add \0 */
7264 		entry->buf[cnt] = '\0';
7265 		tt = event_triggers_call(tr->trace_marker_file, buffer, entry, event);
7266 	}
7267 
7268 	if (entry->buf[cnt - 1] != '\n') {
7269 		entry->buf[cnt] = '\n';
7270 		entry->buf[cnt + 1] = '\0';
7271 	} else
7272 		entry->buf[cnt] = '\0';
7273 
7274 	if (static_branch_unlikely(&trace_marker_exports_enabled))
7275 		ftrace_exports(event, TRACE_EXPORT_MARKER);
7276 	__buffer_unlock_commit(buffer, event);
7277 
7278 	if (tt)
7279 		event_triggers_post_call(tr->trace_marker_file, tt);
7280 
7281 	return written;
7282 }
7283 
7284 static ssize_t
7285 tracing_mark_write(struct file *filp, const char __user *ubuf,
7286 					size_t cnt, loff_t *fpos)
7287 {
7288 	struct trace_array *tr = filp->private_data;
7289 	ssize_t written = -ENODEV;
7290 	unsigned long ip;
7291 
7292 	if (tracing_disabled)
7293 		return -EINVAL;
7294 
7295 	if (!(tr->trace_flags & TRACE_ITER_MARKERS))
7296 		return -EINVAL;
7297 
7298 	if ((ssize_t)cnt < 0)
7299 		return -EINVAL;
7300 
7301 	if (cnt > TRACE_MARKER_MAX_SIZE)
7302 		cnt = TRACE_MARKER_MAX_SIZE;
7303 
7304 	/* The selftests expect this function to be the IP address */
7305 	ip = _THIS_IP_;
7306 
7307 	/* The global trace_marker can go to multiple instances */
7308 	if (tr == &global_trace) {
7309 		guard(rcu)();
7310 		list_for_each_entry_rcu(tr, &marker_copies, marker_list) {
7311 			written = write_marker_to_buffer(tr, ubuf, cnt, ip);
7312 			if (written < 0)
7313 				break;
7314 		}
7315 	} else {
7316 		written = write_marker_to_buffer(tr, ubuf, cnt, ip);
7317 	}
7318 
7319 	return written;
7320 }
7321 
7322 static ssize_t write_raw_marker_to_buffer(struct trace_array *tr,
7323 					  const char __user *ubuf, size_t cnt)
7324 {
7325 	struct ring_buffer_event *event;
7326 	struct trace_buffer *buffer;
7327 	struct raw_data_entry *entry;
7328 	ssize_t written;
7329 	int size;
7330 	int len;
7331 
7332 #define FAULT_SIZE_ID (FAULTED_SIZE + sizeof(int))
7333 
7334 	size = sizeof(*entry) + cnt;
7335 	if (cnt < FAULT_SIZE_ID)
7336 		size += FAULT_SIZE_ID - cnt;
7337 
7338 	buffer = tr->array_buffer.buffer;
7339 
7340 	if (size > ring_buffer_max_event_size(buffer))
7341 		return -EINVAL;
7342 
7343 	event = __trace_buffer_lock_reserve(buffer, TRACE_RAW_DATA, size,
7344 					    tracing_gen_ctx());
7345 	if (!event)
7346 		/* Ring buffer disabled, return as if not open for write */
7347 		return -EBADF;
7348 
7349 	entry = ring_buffer_event_data(event);
7350 
7351 	len = __copy_from_user_inatomic(&entry->id, ubuf, cnt);
7352 	if (len) {
7353 		entry->id = -1;
7354 		memcpy(&entry->buf, FAULTED_STR, FAULTED_SIZE);
7355 		written = -EFAULT;
7356 	} else
7357 		written = cnt;
7358 
7359 	__buffer_unlock_commit(buffer, event);
7360 
7361 	return written;
7362 }
7363 
7364 static ssize_t
7365 tracing_mark_raw_write(struct file *filp, const char __user *ubuf,
7366 					size_t cnt, loff_t *fpos)
7367 {
7368 	struct trace_array *tr = filp->private_data;
7369 	ssize_t written = -ENODEV;
7370 
7371 #define FAULT_SIZE_ID (FAULTED_SIZE + sizeof(int))
7372 
7373 	if (tracing_disabled)
7374 		return -EINVAL;
7375 
7376 	if (!(tr->trace_flags & TRACE_ITER_MARKERS))
7377 		return -EINVAL;
7378 
7379 	/* The marker must at least have a tag id */
7380 	if (cnt < sizeof(unsigned int))
7381 		return -EINVAL;
7382 
7383 	/* The global trace_marker_raw can go to multiple instances */
7384 	if (tr == &global_trace) {
7385 		guard(rcu)();
7386 		list_for_each_entry_rcu(tr, &marker_copies, marker_list) {
7387 			written = write_raw_marker_to_buffer(tr, ubuf, cnt);
7388 			if (written < 0)
7389 				break;
7390 		}
7391 	} else {
7392 		written = write_raw_marker_to_buffer(tr, ubuf, cnt);
7393 	}
7394 
7395 	return written;
7396 }
7397 
7398 static int tracing_clock_show(struct seq_file *m, void *v)
7399 {
7400 	struct trace_array *tr = m->private;
7401 	int i;
7402 
7403 	for (i = 0; i < ARRAY_SIZE(trace_clocks); i++)
7404 		seq_printf(m,
7405 			"%s%s%s%s", i ? " " : "",
7406 			i == tr->clock_id ? "[" : "", trace_clocks[i].name,
7407 			i == tr->clock_id ? "]" : "");
7408 	seq_putc(m, '\n');
7409 
7410 	return 0;
7411 }
7412 
7413 int tracing_set_clock(struct trace_array *tr, const char *clockstr)
7414 {
7415 	int i;
7416 
7417 	for (i = 0; i < ARRAY_SIZE(trace_clocks); i++) {
7418 		if (strcmp(trace_clocks[i].name, clockstr) == 0)
7419 			break;
7420 	}
7421 	if (i == ARRAY_SIZE(trace_clocks))
7422 		return -EINVAL;
7423 
7424 	mutex_lock(&trace_types_lock);
7425 
7426 	tr->clock_id = i;
7427 
7428 	ring_buffer_set_clock(tr->array_buffer.buffer, trace_clocks[i].func);
7429 
7430 	/*
7431 	 * New clock may not be consistent with the previous clock.
7432 	 * Reset the buffer so that it doesn't have incomparable timestamps.
7433 	 */
7434 	tracing_reset_online_cpus(&tr->array_buffer);
7435 
7436 #ifdef CONFIG_TRACER_MAX_TRACE
7437 	if (tr->max_buffer.buffer)
7438 		ring_buffer_set_clock(tr->max_buffer.buffer, trace_clocks[i].func);
7439 	tracing_reset_online_cpus(&tr->max_buffer);
7440 #endif
7441 
7442 	if (tr->scratch && !(tr->flags & TRACE_ARRAY_FL_LAST_BOOT)) {
7443 		struct trace_scratch *tscratch = tr->scratch;
7444 
7445 		tscratch->clock_id = i;
7446 	}
7447 
7448 	mutex_unlock(&trace_types_lock);
7449 
7450 	return 0;
7451 }
7452 
7453 static ssize_t tracing_clock_write(struct file *filp, const char __user *ubuf,
7454 				   size_t cnt, loff_t *fpos)
7455 {
7456 	struct seq_file *m = filp->private_data;
7457 	struct trace_array *tr = m->private;
7458 	char buf[64];
7459 	const char *clockstr;
7460 	int ret;
7461 
7462 	if (cnt >= sizeof(buf))
7463 		return -EINVAL;
7464 
7465 	if (copy_from_user(buf, ubuf, cnt))
7466 		return -EFAULT;
7467 
7468 	buf[cnt] = 0;
7469 
7470 	clockstr = strstrip(buf);
7471 
7472 	ret = tracing_set_clock(tr, clockstr);
7473 	if (ret)
7474 		return ret;
7475 
7476 	*fpos += cnt;
7477 
7478 	return cnt;
7479 }
7480 
7481 static int tracing_clock_open(struct inode *inode, struct file *file)
7482 {
7483 	struct trace_array *tr = inode->i_private;
7484 	int ret;
7485 
7486 	ret = tracing_check_open_get_tr(tr);
7487 	if (ret)
7488 		return ret;
7489 
7490 	ret = single_open(file, tracing_clock_show, inode->i_private);
7491 	if (ret < 0)
7492 		trace_array_put(tr);
7493 
7494 	return ret;
7495 }
7496 
7497 static int tracing_time_stamp_mode_show(struct seq_file *m, void *v)
7498 {
7499 	struct trace_array *tr = m->private;
7500 
7501 	mutex_lock(&trace_types_lock);
7502 
7503 	if (ring_buffer_time_stamp_abs(tr->array_buffer.buffer))
7504 		seq_puts(m, "delta [absolute]\n");
7505 	else
7506 		seq_puts(m, "[delta] absolute\n");
7507 
7508 	mutex_unlock(&trace_types_lock);
7509 
7510 	return 0;
7511 }
7512 
7513 static int tracing_time_stamp_mode_open(struct inode *inode, struct file *file)
7514 {
7515 	struct trace_array *tr = inode->i_private;
7516 	int ret;
7517 
7518 	ret = tracing_check_open_get_tr(tr);
7519 	if (ret)
7520 		return ret;
7521 
7522 	ret = single_open(file, tracing_time_stamp_mode_show, inode->i_private);
7523 	if (ret < 0)
7524 		trace_array_put(tr);
7525 
7526 	return ret;
7527 }
7528 
7529 u64 tracing_event_time_stamp(struct trace_buffer *buffer, struct ring_buffer_event *rbe)
7530 {
7531 	if (rbe == this_cpu_read(trace_buffered_event))
7532 		return ring_buffer_time_stamp(buffer);
7533 
7534 	return ring_buffer_event_time_stamp(buffer, rbe);
7535 }
7536 
7537 /*
7538  * Set or disable using the per CPU trace_buffer_event when possible.
7539  */
7540 int tracing_set_filter_buffering(struct trace_array *tr, bool set)
7541 {
7542 	guard(mutex)(&trace_types_lock);
7543 
7544 	if (set && tr->no_filter_buffering_ref++)
7545 		return 0;
7546 
7547 	if (!set) {
7548 		if (WARN_ON_ONCE(!tr->no_filter_buffering_ref))
7549 			return -EINVAL;
7550 
7551 		--tr->no_filter_buffering_ref;
7552 	}
7553 
7554 	return 0;
7555 }
7556 
7557 struct ftrace_buffer_info {
7558 	struct trace_iterator	iter;
7559 	void			*spare;
7560 	unsigned int		spare_cpu;
7561 	unsigned int		spare_size;
7562 	unsigned int		read;
7563 };
7564 
7565 #ifdef CONFIG_TRACER_SNAPSHOT
7566 static int tracing_snapshot_open(struct inode *inode, struct file *file)
7567 {
7568 	struct trace_array *tr = inode->i_private;
7569 	struct trace_iterator *iter;
7570 	struct seq_file *m;
7571 	int ret;
7572 
7573 	ret = tracing_check_open_get_tr(tr);
7574 	if (ret)
7575 		return ret;
7576 
7577 	if (file->f_mode & FMODE_READ) {
7578 		iter = __tracing_open(inode, file, true);
7579 		if (IS_ERR(iter))
7580 			ret = PTR_ERR(iter);
7581 	} else {
7582 		/* Writes still need the seq_file to hold the private data */
7583 		ret = -ENOMEM;
7584 		m = kzalloc(sizeof(*m), GFP_KERNEL);
7585 		if (!m)
7586 			goto out;
7587 		iter = kzalloc(sizeof(*iter), GFP_KERNEL);
7588 		if (!iter) {
7589 			kfree(m);
7590 			goto out;
7591 		}
7592 		ret = 0;
7593 
7594 		iter->tr = tr;
7595 		iter->array_buffer = &tr->max_buffer;
7596 		iter->cpu_file = tracing_get_cpu(inode);
7597 		m->private = iter;
7598 		file->private_data = m;
7599 	}
7600 out:
7601 	if (ret < 0)
7602 		trace_array_put(tr);
7603 
7604 	return ret;
7605 }
7606 
7607 static void tracing_swap_cpu_buffer(void *tr)
7608 {
7609 	update_max_tr_single((struct trace_array *)tr, current, smp_processor_id());
7610 }
7611 
7612 static ssize_t
7613 tracing_snapshot_write(struct file *filp, const char __user *ubuf, size_t cnt,
7614 		       loff_t *ppos)
7615 {
7616 	struct seq_file *m = filp->private_data;
7617 	struct trace_iterator *iter = m->private;
7618 	struct trace_array *tr = iter->tr;
7619 	unsigned long val;
7620 	int ret;
7621 
7622 	ret = tracing_update_buffers(tr);
7623 	if (ret < 0)
7624 		return ret;
7625 
7626 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
7627 	if (ret)
7628 		return ret;
7629 
7630 	guard(mutex)(&trace_types_lock);
7631 
7632 	if (tr->current_trace->use_max_tr)
7633 		return -EBUSY;
7634 
7635 	local_irq_disable();
7636 	arch_spin_lock(&tr->max_lock);
7637 	if (tr->cond_snapshot)
7638 		ret = -EBUSY;
7639 	arch_spin_unlock(&tr->max_lock);
7640 	local_irq_enable();
7641 	if (ret)
7642 		return ret;
7643 
7644 	switch (val) {
7645 	case 0:
7646 		if (iter->cpu_file != RING_BUFFER_ALL_CPUS)
7647 			return -EINVAL;
7648 		if (tr->allocated_snapshot)
7649 			free_snapshot(tr);
7650 		break;
7651 	case 1:
7652 /* Only allow per-cpu swap if the ring buffer supports it */
7653 #ifndef CONFIG_RING_BUFFER_ALLOW_SWAP
7654 		if (iter->cpu_file != RING_BUFFER_ALL_CPUS)
7655 			return -EINVAL;
7656 #endif
7657 		if (tr->allocated_snapshot)
7658 			ret = resize_buffer_duplicate_size(&tr->max_buffer,
7659 					&tr->array_buffer, iter->cpu_file);
7660 
7661 		ret = tracing_arm_snapshot_locked(tr);
7662 		if (ret)
7663 			return ret;
7664 
7665 		/* Now, we're going to swap */
7666 		if (iter->cpu_file == RING_BUFFER_ALL_CPUS) {
7667 			local_irq_disable();
7668 			update_max_tr(tr, current, smp_processor_id(), NULL);
7669 			local_irq_enable();
7670 		} else {
7671 			smp_call_function_single(iter->cpu_file, tracing_swap_cpu_buffer,
7672 						 (void *)tr, 1);
7673 		}
7674 		tracing_disarm_snapshot(tr);
7675 		break;
7676 	default:
7677 		if (tr->allocated_snapshot) {
7678 			if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
7679 				tracing_reset_online_cpus(&tr->max_buffer);
7680 			else
7681 				tracing_reset_cpu(&tr->max_buffer, iter->cpu_file);
7682 		}
7683 		break;
7684 	}
7685 
7686 	if (ret >= 0) {
7687 		*ppos += cnt;
7688 		ret = cnt;
7689 	}
7690 
7691 	return ret;
7692 }
7693 
7694 static int tracing_snapshot_release(struct inode *inode, struct file *file)
7695 {
7696 	struct seq_file *m = file->private_data;
7697 	int ret;
7698 
7699 	ret = tracing_release(inode, file);
7700 
7701 	if (file->f_mode & FMODE_READ)
7702 		return ret;
7703 
7704 	/* If write only, the seq_file is just a stub */
7705 	if (m)
7706 		kfree(m->private);
7707 	kfree(m);
7708 
7709 	return 0;
7710 }
7711 
7712 static int tracing_buffers_open(struct inode *inode, struct file *filp);
7713 static ssize_t tracing_buffers_read(struct file *filp, char __user *ubuf,
7714 				    size_t count, loff_t *ppos);
7715 static int tracing_buffers_release(struct inode *inode, struct file *file);
7716 static ssize_t tracing_buffers_splice_read(struct file *file, loff_t *ppos,
7717 		   struct pipe_inode_info *pipe, size_t len, unsigned int flags);
7718 
7719 static int snapshot_raw_open(struct inode *inode, struct file *filp)
7720 {
7721 	struct ftrace_buffer_info *info;
7722 	int ret;
7723 
7724 	/* The following checks for tracefs lockdown */
7725 	ret = tracing_buffers_open(inode, filp);
7726 	if (ret < 0)
7727 		return ret;
7728 
7729 	info = filp->private_data;
7730 
7731 	if (info->iter.trace->use_max_tr) {
7732 		tracing_buffers_release(inode, filp);
7733 		return -EBUSY;
7734 	}
7735 
7736 	info->iter.snapshot = true;
7737 	info->iter.array_buffer = &info->iter.tr->max_buffer;
7738 
7739 	return ret;
7740 }
7741 
7742 #endif /* CONFIG_TRACER_SNAPSHOT */
7743 
7744 
7745 static const struct file_operations tracing_thresh_fops = {
7746 	.open		= tracing_open_generic,
7747 	.read		= tracing_thresh_read,
7748 	.write		= tracing_thresh_write,
7749 	.llseek		= generic_file_llseek,
7750 };
7751 
7752 #ifdef CONFIG_TRACER_MAX_TRACE
7753 static const struct file_operations tracing_max_lat_fops = {
7754 	.open		= tracing_open_generic_tr,
7755 	.read		= tracing_max_lat_read,
7756 	.write		= tracing_max_lat_write,
7757 	.llseek		= generic_file_llseek,
7758 	.release	= tracing_release_generic_tr,
7759 };
7760 #endif
7761 
7762 static const struct file_operations set_tracer_fops = {
7763 	.open		= tracing_open_generic_tr,
7764 	.read		= tracing_set_trace_read,
7765 	.write		= tracing_set_trace_write,
7766 	.llseek		= generic_file_llseek,
7767 	.release	= tracing_release_generic_tr,
7768 };
7769 
7770 static const struct file_operations tracing_pipe_fops = {
7771 	.open		= tracing_open_pipe,
7772 	.poll		= tracing_poll_pipe,
7773 	.read		= tracing_read_pipe,
7774 	.splice_read	= tracing_splice_read_pipe,
7775 	.release	= tracing_release_pipe,
7776 };
7777 
7778 static const struct file_operations tracing_entries_fops = {
7779 	.open		= tracing_open_generic_tr,
7780 	.read		= tracing_entries_read,
7781 	.write		= tracing_entries_write,
7782 	.llseek		= generic_file_llseek,
7783 	.release	= tracing_release_generic_tr,
7784 };
7785 
7786 static const struct file_operations tracing_buffer_meta_fops = {
7787 	.open		= tracing_buffer_meta_open,
7788 	.read		= seq_read,
7789 	.llseek		= seq_lseek,
7790 	.release	= tracing_seq_release,
7791 };
7792 
7793 static const struct file_operations tracing_total_entries_fops = {
7794 	.open		= tracing_open_generic_tr,
7795 	.read		= tracing_total_entries_read,
7796 	.llseek		= generic_file_llseek,
7797 	.release	= tracing_release_generic_tr,
7798 };
7799 
7800 static const struct file_operations tracing_free_buffer_fops = {
7801 	.open		= tracing_open_generic_tr,
7802 	.write		= tracing_free_buffer_write,
7803 	.release	= tracing_free_buffer_release,
7804 };
7805 
7806 static const struct file_operations tracing_mark_fops = {
7807 	.open		= tracing_mark_open,
7808 	.write		= tracing_mark_write,
7809 	.release	= tracing_release_generic_tr,
7810 };
7811 
7812 static const struct file_operations tracing_mark_raw_fops = {
7813 	.open		= tracing_mark_open,
7814 	.write		= tracing_mark_raw_write,
7815 	.release	= tracing_release_generic_tr,
7816 };
7817 
7818 static const struct file_operations trace_clock_fops = {
7819 	.open		= tracing_clock_open,
7820 	.read		= seq_read,
7821 	.llseek		= seq_lseek,
7822 	.release	= tracing_single_release_tr,
7823 	.write		= tracing_clock_write,
7824 };
7825 
7826 static const struct file_operations trace_time_stamp_mode_fops = {
7827 	.open		= tracing_time_stamp_mode_open,
7828 	.read		= seq_read,
7829 	.llseek		= seq_lseek,
7830 	.release	= tracing_single_release_tr,
7831 };
7832 
7833 static const struct file_operations last_boot_fops = {
7834 	.open		= tracing_last_boot_open,
7835 	.read		= seq_read,
7836 	.llseek		= seq_lseek,
7837 	.release	= tracing_seq_release,
7838 };
7839 
7840 #ifdef CONFIG_TRACER_SNAPSHOT
7841 static const struct file_operations snapshot_fops = {
7842 	.open		= tracing_snapshot_open,
7843 	.read		= seq_read,
7844 	.write		= tracing_snapshot_write,
7845 	.llseek		= tracing_lseek,
7846 	.release	= tracing_snapshot_release,
7847 };
7848 
7849 static const struct file_operations snapshot_raw_fops = {
7850 	.open		= snapshot_raw_open,
7851 	.read		= tracing_buffers_read,
7852 	.release	= tracing_buffers_release,
7853 	.splice_read	= tracing_buffers_splice_read,
7854 };
7855 
7856 #endif /* CONFIG_TRACER_SNAPSHOT */
7857 
7858 /*
7859  * trace_min_max_write - Write a u64 value to a trace_min_max_param struct
7860  * @filp: The active open file structure
7861  * @ubuf: The userspace provided buffer to read value into
7862  * @cnt: The maximum number of bytes to read
7863  * @ppos: The current "file" position
7864  *
7865  * This function implements the write interface for a struct trace_min_max_param.
7866  * The filp->private_data must point to a trace_min_max_param structure that
7867  * defines where to write the value, the min and the max acceptable values,
7868  * and a lock to protect the write.
7869  */
7870 static ssize_t
7871 trace_min_max_write(struct file *filp, const char __user *ubuf, size_t cnt, loff_t *ppos)
7872 {
7873 	struct trace_min_max_param *param = filp->private_data;
7874 	u64 val;
7875 	int err;
7876 
7877 	if (!param)
7878 		return -EFAULT;
7879 
7880 	err = kstrtoull_from_user(ubuf, cnt, 10, &val);
7881 	if (err)
7882 		return err;
7883 
7884 	if (param->lock)
7885 		mutex_lock(param->lock);
7886 
7887 	if (param->min && val < *param->min)
7888 		err = -EINVAL;
7889 
7890 	if (param->max && val > *param->max)
7891 		err = -EINVAL;
7892 
7893 	if (!err)
7894 		*param->val = val;
7895 
7896 	if (param->lock)
7897 		mutex_unlock(param->lock);
7898 
7899 	if (err)
7900 		return err;
7901 
7902 	return cnt;
7903 }
7904 
7905 /*
7906  * trace_min_max_read - Read a u64 value from a trace_min_max_param struct
7907  * @filp: The active open file structure
7908  * @ubuf: The userspace provided buffer to read value into
7909  * @cnt: The maximum number of bytes to read
7910  * @ppos: The current "file" position
7911  *
7912  * This function implements the read interface for a struct trace_min_max_param.
7913  * The filp->private_data must point to a trace_min_max_param struct with valid
7914  * data.
7915  */
7916 static ssize_t
7917 trace_min_max_read(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos)
7918 {
7919 	struct trace_min_max_param *param = filp->private_data;
7920 	char buf[U64_STR_SIZE];
7921 	int len;
7922 	u64 val;
7923 
7924 	if (!param)
7925 		return -EFAULT;
7926 
7927 	val = *param->val;
7928 
7929 	if (cnt > sizeof(buf))
7930 		cnt = sizeof(buf);
7931 
7932 	len = snprintf(buf, sizeof(buf), "%llu\n", val);
7933 
7934 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, len);
7935 }
7936 
7937 const struct file_operations trace_min_max_fops = {
7938 	.open		= tracing_open_generic,
7939 	.read		= trace_min_max_read,
7940 	.write		= trace_min_max_write,
7941 };
7942 
7943 #define TRACING_LOG_ERRS_MAX	8
7944 #define TRACING_LOG_LOC_MAX	128
7945 
7946 #define CMD_PREFIX "  Command: "
7947 
7948 struct err_info {
7949 	const char	**errs;	/* ptr to loc-specific array of err strings */
7950 	u8		type;	/* index into errs -> specific err string */
7951 	u16		pos;	/* caret position */
7952 	u64		ts;
7953 };
7954 
7955 struct tracing_log_err {
7956 	struct list_head	list;
7957 	struct err_info		info;
7958 	char			loc[TRACING_LOG_LOC_MAX]; /* err location */
7959 	char			*cmd;                     /* what caused err */
7960 };
7961 
7962 static DEFINE_MUTEX(tracing_err_log_lock);
7963 
7964 static struct tracing_log_err *alloc_tracing_log_err(int len)
7965 {
7966 	struct tracing_log_err *err;
7967 
7968 	err = kzalloc(sizeof(*err), GFP_KERNEL);
7969 	if (!err)
7970 		return ERR_PTR(-ENOMEM);
7971 
7972 	err->cmd = kzalloc(len, GFP_KERNEL);
7973 	if (!err->cmd) {
7974 		kfree(err);
7975 		return ERR_PTR(-ENOMEM);
7976 	}
7977 
7978 	return err;
7979 }
7980 
7981 static void free_tracing_log_err(struct tracing_log_err *err)
7982 {
7983 	kfree(err->cmd);
7984 	kfree(err);
7985 }
7986 
7987 static struct tracing_log_err *get_tracing_log_err(struct trace_array *tr,
7988 						   int len)
7989 {
7990 	struct tracing_log_err *err;
7991 	char *cmd;
7992 
7993 	if (tr->n_err_log_entries < TRACING_LOG_ERRS_MAX) {
7994 		err = alloc_tracing_log_err(len);
7995 		if (PTR_ERR(err) != -ENOMEM)
7996 			tr->n_err_log_entries++;
7997 
7998 		return err;
7999 	}
8000 	cmd = kzalloc(len, GFP_KERNEL);
8001 	if (!cmd)
8002 		return ERR_PTR(-ENOMEM);
8003 	err = list_first_entry(&tr->err_log, struct tracing_log_err, list);
8004 	kfree(err->cmd);
8005 	err->cmd = cmd;
8006 	list_del(&err->list);
8007 
8008 	return err;
8009 }
8010 
8011 /**
8012  * err_pos - find the position of a string within a command for error careting
8013  * @cmd: The tracing command that caused the error
8014  * @str: The string to position the caret at within @cmd
8015  *
8016  * Finds the position of the first occurrence of @str within @cmd.  The
8017  * return value can be passed to tracing_log_err() for caret placement
8018  * within @cmd.
8019  *
8020  * Returns the index within @cmd of the first occurrence of @str or 0
8021  * if @str was not found.
8022  */
8023 unsigned int err_pos(char *cmd, const char *str)
8024 {
8025 	char *found;
8026 
8027 	if (WARN_ON(!strlen(cmd)))
8028 		return 0;
8029 
8030 	found = strstr(cmd, str);
8031 	if (found)
8032 		return found - cmd;
8033 
8034 	return 0;
8035 }
8036 
8037 /**
8038  * tracing_log_err - write an error to the tracing error log
8039  * @tr: The associated trace array for the error (NULL for top level array)
8040  * @loc: A string describing where the error occurred
8041  * @cmd: The tracing command that caused the error
8042  * @errs: The array of loc-specific static error strings
8043  * @type: The index into errs[], which produces the specific static err string
8044  * @pos: The position the caret should be placed in the cmd
8045  *
8046  * Writes an error into tracing/error_log of the form:
8047  *
8048  * <loc>: error: <text>
8049  *   Command: <cmd>
8050  *              ^
8051  *
8052  * tracing/error_log is a small log file containing the last
8053  * TRACING_LOG_ERRS_MAX errors (8).  Memory for errors isn't allocated
8054  * unless there has been a tracing error, and the error log can be
8055  * cleared and have its memory freed by writing the empty string in
8056  * truncation mode to it i.e. echo > tracing/error_log.
8057  *
8058  * NOTE: the @errs array along with the @type param are used to
8059  * produce a static error string - this string is not copied and saved
8060  * when the error is logged - only a pointer to it is saved.  See
8061  * existing callers for examples of how static strings are typically
8062  * defined for use with tracing_log_err().
8063  */
8064 void tracing_log_err(struct trace_array *tr,
8065 		     const char *loc, const char *cmd,
8066 		     const char **errs, u8 type, u16 pos)
8067 {
8068 	struct tracing_log_err *err;
8069 	int len = 0;
8070 
8071 	if (!tr)
8072 		tr = &global_trace;
8073 
8074 	len += sizeof(CMD_PREFIX) + 2 * sizeof("\n") + strlen(cmd) + 1;
8075 
8076 	guard(mutex)(&tracing_err_log_lock);
8077 
8078 	err = get_tracing_log_err(tr, len);
8079 	if (PTR_ERR(err) == -ENOMEM)
8080 		return;
8081 
8082 	snprintf(err->loc, TRACING_LOG_LOC_MAX, "%s: error: ", loc);
8083 	snprintf(err->cmd, len, "\n" CMD_PREFIX "%s\n", cmd);
8084 
8085 	err->info.errs = errs;
8086 	err->info.type = type;
8087 	err->info.pos = pos;
8088 	err->info.ts = local_clock();
8089 
8090 	list_add_tail(&err->list, &tr->err_log);
8091 }
8092 
8093 static void clear_tracing_err_log(struct trace_array *tr)
8094 {
8095 	struct tracing_log_err *err, *next;
8096 
8097 	mutex_lock(&tracing_err_log_lock);
8098 	list_for_each_entry_safe(err, next, &tr->err_log, list) {
8099 		list_del(&err->list);
8100 		free_tracing_log_err(err);
8101 	}
8102 
8103 	tr->n_err_log_entries = 0;
8104 	mutex_unlock(&tracing_err_log_lock);
8105 }
8106 
8107 static void *tracing_err_log_seq_start(struct seq_file *m, loff_t *pos)
8108 {
8109 	struct trace_array *tr = m->private;
8110 
8111 	mutex_lock(&tracing_err_log_lock);
8112 
8113 	return seq_list_start(&tr->err_log, *pos);
8114 }
8115 
8116 static void *tracing_err_log_seq_next(struct seq_file *m, void *v, loff_t *pos)
8117 {
8118 	struct trace_array *tr = m->private;
8119 
8120 	return seq_list_next(v, &tr->err_log, pos);
8121 }
8122 
8123 static void tracing_err_log_seq_stop(struct seq_file *m, void *v)
8124 {
8125 	mutex_unlock(&tracing_err_log_lock);
8126 }
8127 
8128 static void tracing_err_log_show_pos(struct seq_file *m, u16 pos)
8129 {
8130 	u16 i;
8131 
8132 	for (i = 0; i < sizeof(CMD_PREFIX) - 1; i++)
8133 		seq_putc(m, ' ');
8134 	for (i = 0; i < pos; i++)
8135 		seq_putc(m, ' ');
8136 	seq_puts(m, "^\n");
8137 }
8138 
8139 static int tracing_err_log_seq_show(struct seq_file *m, void *v)
8140 {
8141 	struct tracing_log_err *err = v;
8142 
8143 	if (err) {
8144 		const char *err_text = err->info.errs[err->info.type];
8145 		u64 sec = err->info.ts;
8146 		u32 nsec;
8147 
8148 		nsec = do_div(sec, NSEC_PER_SEC);
8149 		seq_printf(m, "[%5llu.%06u] %s%s", sec, nsec / 1000,
8150 			   err->loc, err_text);
8151 		seq_printf(m, "%s", err->cmd);
8152 		tracing_err_log_show_pos(m, err->info.pos);
8153 	}
8154 
8155 	return 0;
8156 }
8157 
8158 static const struct seq_operations tracing_err_log_seq_ops = {
8159 	.start  = tracing_err_log_seq_start,
8160 	.next   = tracing_err_log_seq_next,
8161 	.stop   = tracing_err_log_seq_stop,
8162 	.show   = tracing_err_log_seq_show
8163 };
8164 
8165 static int tracing_err_log_open(struct inode *inode, struct file *file)
8166 {
8167 	struct trace_array *tr = inode->i_private;
8168 	int ret = 0;
8169 
8170 	ret = tracing_check_open_get_tr(tr);
8171 	if (ret)
8172 		return ret;
8173 
8174 	/* If this file was opened for write, then erase contents */
8175 	if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC))
8176 		clear_tracing_err_log(tr);
8177 
8178 	if (file->f_mode & FMODE_READ) {
8179 		ret = seq_open(file, &tracing_err_log_seq_ops);
8180 		if (!ret) {
8181 			struct seq_file *m = file->private_data;
8182 			m->private = tr;
8183 		} else {
8184 			trace_array_put(tr);
8185 		}
8186 	}
8187 	return ret;
8188 }
8189 
8190 static ssize_t tracing_err_log_write(struct file *file,
8191 				     const char __user *buffer,
8192 				     size_t count, loff_t *ppos)
8193 {
8194 	return count;
8195 }
8196 
8197 static int tracing_err_log_release(struct inode *inode, struct file *file)
8198 {
8199 	struct trace_array *tr = inode->i_private;
8200 
8201 	trace_array_put(tr);
8202 
8203 	if (file->f_mode & FMODE_READ)
8204 		seq_release(inode, file);
8205 
8206 	return 0;
8207 }
8208 
8209 static const struct file_operations tracing_err_log_fops = {
8210 	.open           = tracing_err_log_open,
8211 	.write		= tracing_err_log_write,
8212 	.read           = seq_read,
8213 	.llseek         = tracing_lseek,
8214 	.release        = tracing_err_log_release,
8215 };
8216 
8217 static int tracing_buffers_open(struct inode *inode, struct file *filp)
8218 {
8219 	struct trace_array *tr = inode->i_private;
8220 	struct ftrace_buffer_info *info;
8221 	int ret;
8222 
8223 	ret = tracing_check_open_get_tr(tr);
8224 	if (ret)
8225 		return ret;
8226 
8227 	info = kvzalloc(sizeof(*info), GFP_KERNEL);
8228 	if (!info) {
8229 		trace_array_put(tr);
8230 		return -ENOMEM;
8231 	}
8232 
8233 	mutex_lock(&trace_types_lock);
8234 
8235 	info->iter.tr		= tr;
8236 	info->iter.cpu_file	= tracing_get_cpu(inode);
8237 	info->iter.trace	= tr->current_trace;
8238 	info->iter.array_buffer = &tr->array_buffer;
8239 	info->spare		= NULL;
8240 	/* Force reading ring buffer for first read */
8241 	info->read		= (unsigned int)-1;
8242 
8243 	filp->private_data = info;
8244 
8245 	tr->trace_ref++;
8246 
8247 	mutex_unlock(&trace_types_lock);
8248 
8249 	ret = nonseekable_open(inode, filp);
8250 	if (ret < 0)
8251 		trace_array_put(tr);
8252 
8253 	return ret;
8254 }
8255 
8256 static __poll_t
8257 tracing_buffers_poll(struct file *filp, poll_table *poll_table)
8258 {
8259 	struct ftrace_buffer_info *info = filp->private_data;
8260 	struct trace_iterator *iter = &info->iter;
8261 
8262 	return trace_poll(iter, filp, poll_table);
8263 }
8264 
8265 static ssize_t
8266 tracing_buffers_read(struct file *filp, char __user *ubuf,
8267 		     size_t count, loff_t *ppos)
8268 {
8269 	struct ftrace_buffer_info *info = filp->private_data;
8270 	struct trace_iterator *iter = &info->iter;
8271 	void *trace_data;
8272 	int page_size;
8273 	ssize_t ret = 0;
8274 	ssize_t size;
8275 
8276 	if (!count)
8277 		return 0;
8278 
8279 #ifdef CONFIG_TRACER_MAX_TRACE
8280 	if (iter->snapshot && iter->tr->current_trace->use_max_tr)
8281 		return -EBUSY;
8282 #endif
8283 
8284 	page_size = ring_buffer_subbuf_size_get(iter->array_buffer->buffer);
8285 
8286 	/* Make sure the spare matches the current sub buffer size */
8287 	if (info->spare) {
8288 		if (page_size != info->spare_size) {
8289 			ring_buffer_free_read_page(iter->array_buffer->buffer,
8290 						   info->spare_cpu, info->spare);
8291 			info->spare = NULL;
8292 		}
8293 	}
8294 
8295 	if (!info->spare) {
8296 		info->spare = ring_buffer_alloc_read_page(iter->array_buffer->buffer,
8297 							  iter->cpu_file);
8298 		if (IS_ERR(info->spare)) {
8299 			ret = PTR_ERR(info->spare);
8300 			info->spare = NULL;
8301 		} else {
8302 			info->spare_cpu = iter->cpu_file;
8303 			info->spare_size = page_size;
8304 		}
8305 	}
8306 	if (!info->spare)
8307 		return ret;
8308 
8309 	/* Do we have previous read data to read? */
8310 	if (info->read < page_size)
8311 		goto read;
8312 
8313  again:
8314 	trace_access_lock(iter->cpu_file);
8315 	ret = ring_buffer_read_page(iter->array_buffer->buffer,
8316 				    info->spare,
8317 				    count,
8318 				    iter->cpu_file, 0);
8319 	trace_access_unlock(iter->cpu_file);
8320 
8321 	if (ret < 0) {
8322 		if (trace_empty(iter) && !iter->closed) {
8323 			if ((filp->f_flags & O_NONBLOCK))
8324 				return -EAGAIN;
8325 
8326 			ret = wait_on_pipe(iter, 0);
8327 			if (ret)
8328 				return ret;
8329 
8330 			goto again;
8331 		}
8332 		return 0;
8333 	}
8334 
8335 	info->read = 0;
8336  read:
8337 	size = page_size - info->read;
8338 	if (size > count)
8339 		size = count;
8340 	trace_data = ring_buffer_read_page_data(info->spare);
8341 	ret = copy_to_user(ubuf, trace_data + info->read, size);
8342 	if (ret == size)
8343 		return -EFAULT;
8344 
8345 	size -= ret;
8346 
8347 	*ppos += size;
8348 	info->read += size;
8349 
8350 	return size;
8351 }
8352 
8353 static int tracing_buffers_flush(struct file *file, fl_owner_t id)
8354 {
8355 	struct ftrace_buffer_info *info = file->private_data;
8356 	struct trace_iterator *iter = &info->iter;
8357 
8358 	iter->closed = true;
8359 	/* Make sure the waiters see the new wait_index */
8360 	(void)atomic_fetch_inc_release(&iter->wait_index);
8361 
8362 	ring_buffer_wake_waiters(iter->array_buffer->buffer, iter->cpu_file);
8363 
8364 	return 0;
8365 }
8366 
8367 static int tracing_buffers_release(struct inode *inode, struct file *file)
8368 {
8369 	struct ftrace_buffer_info *info = file->private_data;
8370 	struct trace_iterator *iter = &info->iter;
8371 
8372 	mutex_lock(&trace_types_lock);
8373 
8374 	iter->tr->trace_ref--;
8375 
8376 	__trace_array_put(iter->tr);
8377 
8378 	if (info->spare)
8379 		ring_buffer_free_read_page(iter->array_buffer->buffer,
8380 					   info->spare_cpu, info->spare);
8381 	kvfree(info);
8382 
8383 	mutex_unlock(&trace_types_lock);
8384 
8385 	return 0;
8386 }
8387 
8388 struct buffer_ref {
8389 	struct trace_buffer	*buffer;
8390 	void			*page;
8391 	int			cpu;
8392 	refcount_t		refcount;
8393 };
8394 
8395 static void buffer_ref_release(struct buffer_ref *ref)
8396 {
8397 	if (!refcount_dec_and_test(&ref->refcount))
8398 		return;
8399 	ring_buffer_free_read_page(ref->buffer, ref->cpu, ref->page);
8400 	kfree(ref);
8401 }
8402 
8403 static void buffer_pipe_buf_release(struct pipe_inode_info *pipe,
8404 				    struct pipe_buffer *buf)
8405 {
8406 	struct buffer_ref *ref = (struct buffer_ref *)buf->private;
8407 
8408 	buffer_ref_release(ref);
8409 	buf->private = 0;
8410 }
8411 
8412 static bool buffer_pipe_buf_get(struct pipe_inode_info *pipe,
8413 				struct pipe_buffer *buf)
8414 {
8415 	struct buffer_ref *ref = (struct buffer_ref *)buf->private;
8416 
8417 	if (refcount_read(&ref->refcount) > INT_MAX/2)
8418 		return false;
8419 
8420 	refcount_inc(&ref->refcount);
8421 	return true;
8422 }
8423 
8424 /* Pipe buffer operations for a buffer. */
8425 static const struct pipe_buf_operations buffer_pipe_buf_ops = {
8426 	.release		= buffer_pipe_buf_release,
8427 	.get			= buffer_pipe_buf_get,
8428 };
8429 
8430 /*
8431  * Callback from splice_to_pipe(), if we need to release some pages
8432  * at the end of the spd in case we error'ed out in filling the pipe.
8433  */
8434 static void buffer_spd_release(struct splice_pipe_desc *spd, unsigned int i)
8435 {
8436 	struct buffer_ref *ref =
8437 		(struct buffer_ref *)spd->partial[i].private;
8438 
8439 	buffer_ref_release(ref);
8440 	spd->partial[i].private = 0;
8441 }
8442 
8443 static ssize_t
8444 tracing_buffers_splice_read(struct file *file, loff_t *ppos,
8445 			    struct pipe_inode_info *pipe, size_t len,
8446 			    unsigned int flags)
8447 {
8448 	struct ftrace_buffer_info *info = file->private_data;
8449 	struct trace_iterator *iter = &info->iter;
8450 	struct partial_page partial_def[PIPE_DEF_BUFFERS];
8451 	struct page *pages_def[PIPE_DEF_BUFFERS];
8452 	struct splice_pipe_desc spd = {
8453 		.pages		= pages_def,
8454 		.partial	= partial_def,
8455 		.nr_pages_max	= PIPE_DEF_BUFFERS,
8456 		.ops		= &buffer_pipe_buf_ops,
8457 		.spd_release	= buffer_spd_release,
8458 	};
8459 	struct buffer_ref *ref;
8460 	bool woken = false;
8461 	int page_size;
8462 	int entries, i;
8463 	ssize_t ret = 0;
8464 
8465 #ifdef CONFIG_TRACER_MAX_TRACE
8466 	if (iter->snapshot && iter->tr->current_trace->use_max_tr)
8467 		return -EBUSY;
8468 #endif
8469 
8470 	page_size = ring_buffer_subbuf_size_get(iter->array_buffer->buffer);
8471 	if (*ppos & (page_size - 1))
8472 		return -EINVAL;
8473 
8474 	if (len & (page_size - 1)) {
8475 		if (len < page_size)
8476 			return -EINVAL;
8477 		len &= (~(page_size - 1));
8478 	}
8479 
8480 	if (splice_grow_spd(pipe, &spd))
8481 		return -ENOMEM;
8482 
8483  again:
8484 	trace_access_lock(iter->cpu_file);
8485 	entries = ring_buffer_entries_cpu(iter->array_buffer->buffer, iter->cpu_file);
8486 
8487 	for (i = 0; i < spd.nr_pages_max && len && entries; i++, len -= page_size) {
8488 		struct page *page;
8489 		int r;
8490 
8491 		ref = kzalloc(sizeof(*ref), GFP_KERNEL);
8492 		if (!ref) {
8493 			ret = -ENOMEM;
8494 			break;
8495 		}
8496 
8497 		refcount_set(&ref->refcount, 1);
8498 		ref->buffer = iter->array_buffer->buffer;
8499 		ref->page = ring_buffer_alloc_read_page(ref->buffer, iter->cpu_file);
8500 		if (IS_ERR(ref->page)) {
8501 			ret = PTR_ERR(ref->page);
8502 			ref->page = NULL;
8503 			kfree(ref);
8504 			break;
8505 		}
8506 		ref->cpu = iter->cpu_file;
8507 
8508 		r = ring_buffer_read_page(ref->buffer, ref->page,
8509 					  len, iter->cpu_file, 1);
8510 		if (r < 0) {
8511 			ring_buffer_free_read_page(ref->buffer, ref->cpu,
8512 						   ref->page);
8513 			kfree(ref);
8514 			break;
8515 		}
8516 
8517 		page = virt_to_page(ring_buffer_read_page_data(ref->page));
8518 
8519 		spd.pages[i] = page;
8520 		spd.partial[i].len = page_size;
8521 		spd.partial[i].offset = 0;
8522 		spd.partial[i].private = (unsigned long)ref;
8523 		spd.nr_pages++;
8524 		*ppos += page_size;
8525 
8526 		entries = ring_buffer_entries_cpu(iter->array_buffer->buffer, iter->cpu_file);
8527 	}
8528 
8529 	trace_access_unlock(iter->cpu_file);
8530 	spd.nr_pages = i;
8531 
8532 	/* did we read anything? */
8533 	if (!spd.nr_pages) {
8534 
8535 		if (ret)
8536 			goto out;
8537 
8538 		if (woken)
8539 			goto out;
8540 
8541 		ret = -EAGAIN;
8542 		if ((file->f_flags & O_NONBLOCK) || (flags & SPLICE_F_NONBLOCK))
8543 			goto out;
8544 
8545 		ret = wait_on_pipe(iter, iter->snapshot ? 0 : iter->tr->buffer_percent);
8546 		if (ret)
8547 			goto out;
8548 
8549 		/* No need to wait after waking up when tracing is off */
8550 		if (!tracer_tracing_is_on(iter->tr))
8551 			goto out;
8552 
8553 		/* Iterate one more time to collect any new data then exit */
8554 		woken = true;
8555 
8556 		goto again;
8557 	}
8558 
8559 	ret = splice_to_pipe(pipe, &spd);
8560 out:
8561 	splice_shrink_spd(&spd);
8562 
8563 	return ret;
8564 }
8565 
8566 static long tracing_buffers_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
8567 {
8568 	struct ftrace_buffer_info *info = file->private_data;
8569 	struct trace_iterator *iter = &info->iter;
8570 	int err;
8571 
8572 	if (cmd == TRACE_MMAP_IOCTL_GET_READER) {
8573 		if (!(file->f_flags & O_NONBLOCK)) {
8574 			err = ring_buffer_wait(iter->array_buffer->buffer,
8575 					       iter->cpu_file,
8576 					       iter->tr->buffer_percent,
8577 					       NULL, NULL);
8578 			if (err)
8579 				return err;
8580 		}
8581 
8582 		return ring_buffer_map_get_reader(iter->array_buffer->buffer,
8583 						  iter->cpu_file);
8584 	} else if (cmd) {
8585 		return -ENOTTY;
8586 	}
8587 
8588 	/*
8589 	 * An ioctl call with cmd 0 to the ring buffer file will wake up all
8590 	 * waiters
8591 	 */
8592 	mutex_lock(&trace_types_lock);
8593 
8594 	/* Make sure the waiters see the new wait_index */
8595 	(void)atomic_fetch_inc_release(&iter->wait_index);
8596 
8597 	ring_buffer_wake_waiters(iter->array_buffer->buffer, iter->cpu_file);
8598 
8599 	mutex_unlock(&trace_types_lock);
8600 	return 0;
8601 }
8602 
8603 #ifdef CONFIG_TRACER_MAX_TRACE
8604 static int get_snapshot_map(struct trace_array *tr)
8605 {
8606 	int err = 0;
8607 
8608 	/*
8609 	 * Called with mmap_lock held. lockdep would be unhappy if we would now
8610 	 * take trace_types_lock. Instead use the specific
8611 	 * snapshot_trigger_lock.
8612 	 */
8613 	spin_lock(&tr->snapshot_trigger_lock);
8614 
8615 	if (tr->snapshot || tr->mapped == UINT_MAX)
8616 		err = -EBUSY;
8617 	else
8618 		tr->mapped++;
8619 
8620 	spin_unlock(&tr->snapshot_trigger_lock);
8621 
8622 	/* Wait for update_max_tr() to observe iter->tr->mapped */
8623 	if (tr->mapped == 1)
8624 		synchronize_rcu();
8625 
8626 	return err;
8627 
8628 }
8629 static void put_snapshot_map(struct trace_array *tr)
8630 {
8631 	spin_lock(&tr->snapshot_trigger_lock);
8632 	if (!WARN_ON(!tr->mapped))
8633 		tr->mapped--;
8634 	spin_unlock(&tr->snapshot_trigger_lock);
8635 }
8636 #else
8637 static inline int get_snapshot_map(struct trace_array *tr) { return 0; }
8638 static inline void put_snapshot_map(struct trace_array *tr) { }
8639 #endif
8640 
8641 static void tracing_buffers_mmap_close(struct vm_area_struct *vma)
8642 {
8643 	struct ftrace_buffer_info *info = vma->vm_file->private_data;
8644 	struct trace_iterator *iter = &info->iter;
8645 
8646 	WARN_ON(ring_buffer_unmap(iter->array_buffer->buffer, iter->cpu_file));
8647 	put_snapshot_map(iter->tr);
8648 }
8649 
8650 static const struct vm_operations_struct tracing_buffers_vmops = {
8651 	.close		= tracing_buffers_mmap_close,
8652 };
8653 
8654 static int tracing_buffers_mmap(struct file *filp, struct vm_area_struct *vma)
8655 {
8656 	struct ftrace_buffer_info *info = filp->private_data;
8657 	struct trace_iterator *iter = &info->iter;
8658 	int ret = 0;
8659 
8660 	/* A memmap'ed buffer is not supported for user space mmap */
8661 	if (iter->tr->flags & TRACE_ARRAY_FL_MEMMAP)
8662 		return -ENODEV;
8663 
8664 	/* Currently the boot mapped buffer is not supported for mmap */
8665 	if (iter->tr->flags & TRACE_ARRAY_FL_BOOT)
8666 		return -ENODEV;
8667 
8668 	ret = get_snapshot_map(iter->tr);
8669 	if (ret)
8670 		return ret;
8671 
8672 	ret = ring_buffer_map(iter->array_buffer->buffer, iter->cpu_file, vma);
8673 	if (ret)
8674 		put_snapshot_map(iter->tr);
8675 
8676 	vma->vm_ops = &tracing_buffers_vmops;
8677 
8678 	return ret;
8679 }
8680 
8681 static const struct file_operations tracing_buffers_fops = {
8682 	.open		= tracing_buffers_open,
8683 	.read		= tracing_buffers_read,
8684 	.poll		= tracing_buffers_poll,
8685 	.release	= tracing_buffers_release,
8686 	.flush		= tracing_buffers_flush,
8687 	.splice_read	= tracing_buffers_splice_read,
8688 	.unlocked_ioctl = tracing_buffers_ioctl,
8689 	.mmap		= tracing_buffers_mmap,
8690 };
8691 
8692 static ssize_t
8693 tracing_stats_read(struct file *filp, char __user *ubuf,
8694 		   size_t count, loff_t *ppos)
8695 {
8696 	struct inode *inode = file_inode(filp);
8697 	struct trace_array *tr = inode->i_private;
8698 	struct array_buffer *trace_buf = &tr->array_buffer;
8699 	int cpu = tracing_get_cpu(inode);
8700 	struct trace_seq *s;
8701 	unsigned long cnt;
8702 	unsigned long long t;
8703 	unsigned long usec_rem;
8704 
8705 	s = kmalloc(sizeof(*s), GFP_KERNEL);
8706 	if (!s)
8707 		return -ENOMEM;
8708 
8709 	trace_seq_init(s);
8710 
8711 	cnt = ring_buffer_entries_cpu(trace_buf->buffer, cpu);
8712 	trace_seq_printf(s, "entries: %ld\n", cnt);
8713 
8714 	cnt = ring_buffer_overrun_cpu(trace_buf->buffer, cpu);
8715 	trace_seq_printf(s, "overrun: %ld\n", cnt);
8716 
8717 	cnt = ring_buffer_commit_overrun_cpu(trace_buf->buffer, cpu);
8718 	trace_seq_printf(s, "commit overrun: %ld\n", cnt);
8719 
8720 	cnt = ring_buffer_bytes_cpu(trace_buf->buffer, cpu);
8721 	trace_seq_printf(s, "bytes: %ld\n", cnt);
8722 
8723 	if (trace_clocks[tr->clock_id].in_ns) {
8724 		/* local or global for trace_clock */
8725 		t = ns2usecs(ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
8726 		usec_rem = do_div(t, USEC_PER_SEC);
8727 		trace_seq_printf(s, "oldest event ts: %5llu.%06lu\n",
8728 								t, usec_rem);
8729 
8730 		t = ns2usecs(ring_buffer_time_stamp(trace_buf->buffer));
8731 		usec_rem = do_div(t, USEC_PER_SEC);
8732 		trace_seq_printf(s, "now ts: %5llu.%06lu\n", t, usec_rem);
8733 	} else {
8734 		/* counter or tsc mode for trace_clock */
8735 		trace_seq_printf(s, "oldest event ts: %llu\n",
8736 				ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
8737 
8738 		trace_seq_printf(s, "now ts: %llu\n",
8739 				ring_buffer_time_stamp(trace_buf->buffer));
8740 	}
8741 
8742 	cnt = ring_buffer_dropped_events_cpu(trace_buf->buffer, cpu);
8743 	trace_seq_printf(s, "dropped events: %ld\n", cnt);
8744 
8745 	cnt = ring_buffer_read_events_cpu(trace_buf->buffer, cpu);
8746 	trace_seq_printf(s, "read events: %ld\n", cnt);
8747 
8748 	count = simple_read_from_buffer(ubuf, count, ppos,
8749 					s->buffer, trace_seq_used(s));
8750 
8751 	kfree(s);
8752 
8753 	return count;
8754 }
8755 
8756 static const struct file_operations tracing_stats_fops = {
8757 	.open		= tracing_open_generic_tr,
8758 	.read		= tracing_stats_read,
8759 	.llseek		= generic_file_llseek,
8760 	.release	= tracing_release_generic_tr,
8761 };
8762 
8763 #ifdef CONFIG_DYNAMIC_FTRACE
8764 
8765 static ssize_t
8766 tracing_read_dyn_info(struct file *filp, char __user *ubuf,
8767 		  size_t cnt, loff_t *ppos)
8768 {
8769 	ssize_t ret;
8770 	char *buf;
8771 	int r;
8772 
8773 	/* 512 should be plenty to hold the amount needed */
8774 #define DYN_INFO_BUF_SIZE	512
8775 
8776 	buf = kmalloc(DYN_INFO_BUF_SIZE, GFP_KERNEL);
8777 	if (!buf)
8778 		return -ENOMEM;
8779 
8780 	r = scnprintf(buf, DYN_INFO_BUF_SIZE,
8781 		      "%ld pages:%ld groups: %ld\n"
8782 		      "ftrace boot update time = %llu (ns)\n"
8783 		      "ftrace module total update time = %llu (ns)\n",
8784 		      ftrace_update_tot_cnt,
8785 		      ftrace_number_of_pages,
8786 		      ftrace_number_of_groups,
8787 		      ftrace_update_time,
8788 		      ftrace_total_mod_time);
8789 
8790 	ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
8791 	kfree(buf);
8792 	return ret;
8793 }
8794 
8795 static const struct file_operations tracing_dyn_info_fops = {
8796 	.open		= tracing_open_generic,
8797 	.read		= tracing_read_dyn_info,
8798 	.llseek		= generic_file_llseek,
8799 };
8800 #endif /* CONFIG_DYNAMIC_FTRACE */
8801 
8802 #if defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE)
8803 static void
8804 ftrace_snapshot(unsigned long ip, unsigned long parent_ip,
8805 		struct trace_array *tr, struct ftrace_probe_ops *ops,
8806 		void *data)
8807 {
8808 	tracing_snapshot_instance(tr);
8809 }
8810 
8811 static void
8812 ftrace_count_snapshot(unsigned long ip, unsigned long parent_ip,
8813 		      struct trace_array *tr, struct ftrace_probe_ops *ops,
8814 		      void *data)
8815 {
8816 	struct ftrace_func_mapper *mapper = data;
8817 	long *count = NULL;
8818 
8819 	if (mapper)
8820 		count = (long *)ftrace_func_mapper_find_ip(mapper, ip);
8821 
8822 	if (count) {
8823 
8824 		if (*count <= 0)
8825 			return;
8826 
8827 		(*count)--;
8828 	}
8829 
8830 	tracing_snapshot_instance(tr);
8831 }
8832 
8833 static int
8834 ftrace_snapshot_print(struct seq_file *m, unsigned long ip,
8835 		      struct ftrace_probe_ops *ops, void *data)
8836 {
8837 	struct ftrace_func_mapper *mapper = data;
8838 	long *count = NULL;
8839 
8840 	seq_printf(m, "%ps:", (void *)ip);
8841 
8842 	seq_puts(m, "snapshot");
8843 
8844 	if (mapper)
8845 		count = (long *)ftrace_func_mapper_find_ip(mapper, ip);
8846 
8847 	if (count)
8848 		seq_printf(m, ":count=%ld\n", *count);
8849 	else
8850 		seq_puts(m, ":unlimited\n");
8851 
8852 	return 0;
8853 }
8854 
8855 static int
8856 ftrace_snapshot_init(struct ftrace_probe_ops *ops, struct trace_array *tr,
8857 		     unsigned long ip, void *init_data, void **data)
8858 {
8859 	struct ftrace_func_mapper *mapper = *data;
8860 
8861 	if (!mapper) {
8862 		mapper = allocate_ftrace_func_mapper();
8863 		if (!mapper)
8864 			return -ENOMEM;
8865 		*data = mapper;
8866 	}
8867 
8868 	return ftrace_func_mapper_add_ip(mapper, ip, init_data);
8869 }
8870 
8871 static void
8872 ftrace_snapshot_free(struct ftrace_probe_ops *ops, struct trace_array *tr,
8873 		     unsigned long ip, void *data)
8874 {
8875 	struct ftrace_func_mapper *mapper = data;
8876 
8877 	if (!ip) {
8878 		if (!mapper)
8879 			return;
8880 		free_ftrace_func_mapper(mapper, NULL);
8881 		return;
8882 	}
8883 
8884 	ftrace_func_mapper_remove_ip(mapper, ip);
8885 }
8886 
8887 static struct ftrace_probe_ops snapshot_probe_ops = {
8888 	.func			= ftrace_snapshot,
8889 	.print			= ftrace_snapshot_print,
8890 };
8891 
8892 static struct ftrace_probe_ops snapshot_count_probe_ops = {
8893 	.func			= ftrace_count_snapshot,
8894 	.print			= ftrace_snapshot_print,
8895 	.init			= ftrace_snapshot_init,
8896 	.free			= ftrace_snapshot_free,
8897 };
8898 
8899 static int
8900 ftrace_trace_snapshot_callback(struct trace_array *tr, struct ftrace_hash *hash,
8901 			       char *glob, char *cmd, char *param, int enable)
8902 {
8903 	struct ftrace_probe_ops *ops;
8904 	void *count = (void *)-1;
8905 	char *number;
8906 	int ret;
8907 
8908 	if (!tr)
8909 		return -ENODEV;
8910 
8911 	/* hash funcs only work with set_ftrace_filter */
8912 	if (!enable)
8913 		return -EINVAL;
8914 
8915 	ops = param ? &snapshot_count_probe_ops :  &snapshot_probe_ops;
8916 
8917 	if (glob[0] == '!') {
8918 		ret = unregister_ftrace_function_probe_func(glob+1, tr, ops);
8919 		if (!ret)
8920 			tracing_disarm_snapshot(tr);
8921 
8922 		return ret;
8923 	}
8924 
8925 	if (!param)
8926 		goto out_reg;
8927 
8928 	number = strsep(&param, ":");
8929 
8930 	if (!strlen(number))
8931 		goto out_reg;
8932 
8933 	/*
8934 	 * We use the callback data field (which is a pointer)
8935 	 * as our counter.
8936 	 */
8937 	ret = kstrtoul(number, 0, (unsigned long *)&count);
8938 	if (ret)
8939 		return ret;
8940 
8941  out_reg:
8942 	ret = tracing_arm_snapshot(tr);
8943 	if (ret < 0)
8944 		goto out;
8945 
8946 	ret = register_ftrace_function_probe(glob, tr, ops, count);
8947 	if (ret < 0)
8948 		tracing_disarm_snapshot(tr);
8949  out:
8950 	return ret < 0 ? ret : 0;
8951 }
8952 
8953 static struct ftrace_func_command ftrace_snapshot_cmd = {
8954 	.name			= "snapshot",
8955 	.func			= ftrace_trace_snapshot_callback,
8956 };
8957 
8958 static __init int register_snapshot_cmd(void)
8959 {
8960 	return register_ftrace_command(&ftrace_snapshot_cmd);
8961 }
8962 #else
8963 static inline __init int register_snapshot_cmd(void) { return 0; }
8964 #endif /* defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE) */
8965 
8966 static struct dentry *tracing_get_dentry(struct trace_array *tr)
8967 {
8968 	if (WARN_ON(!tr->dir))
8969 		return ERR_PTR(-ENODEV);
8970 
8971 	/* Top directory uses NULL as the parent */
8972 	if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
8973 		return NULL;
8974 
8975 	/* All sub buffers have a descriptor */
8976 	return tr->dir;
8977 }
8978 
8979 static struct dentry *tracing_dentry_percpu(struct trace_array *tr, int cpu)
8980 {
8981 	struct dentry *d_tracer;
8982 
8983 	if (tr->percpu_dir)
8984 		return tr->percpu_dir;
8985 
8986 	d_tracer = tracing_get_dentry(tr);
8987 	if (IS_ERR(d_tracer))
8988 		return NULL;
8989 
8990 	tr->percpu_dir = tracefs_create_dir("per_cpu", d_tracer);
8991 
8992 	MEM_FAIL(!tr->percpu_dir,
8993 		  "Could not create tracefs directory 'per_cpu/%d'\n", cpu);
8994 
8995 	return tr->percpu_dir;
8996 }
8997 
8998 static struct dentry *
8999 trace_create_cpu_file(const char *name, umode_t mode, struct dentry *parent,
9000 		      void *data, long cpu, const struct file_operations *fops)
9001 {
9002 	struct dentry *ret = trace_create_file(name, mode, parent, data, fops);
9003 
9004 	if (ret) /* See tracing_get_cpu() */
9005 		d_inode(ret)->i_cdev = (void *)(cpu + 1);
9006 	return ret;
9007 }
9008 
9009 static void
9010 tracing_init_tracefs_percpu(struct trace_array *tr, long cpu)
9011 {
9012 	struct dentry *d_percpu = tracing_dentry_percpu(tr, cpu);
9013 	struct dentry *d_cpu;
9014 	char cpu_dir[30]; /* 30 characters should be more than enough */
9015 
9016 	if (!d_percpu)
9017 		return;
9018 
9019 	snprintf(cpu_dir, 30, "cpu%ld", cpu);
9020 	d_cpu = tracefs_create_dir(cpu_dir, d_percpu);
9021 	if (!d_cpu) {
9022 		pr_warn("Could not create tracefs '%s' entry\n", cpu_dir);
9023 		return;
9024 	}
9025 
9026 	/* per cpu trace_pipe */
9027 	trace_create_cpu_file("trace_pipe", TRACE_MODE_READ, d_cpu,
9028 				tr, cpu, &tracing_pipe_fops);
9029 
9030 	/* per cpu trace */
9031 	trace_create_cpu_file("trace", TRACE_MODE_WRITE, d_cpu,
9032 				tr, cpu, &tracing_fops);
9033 
9034 	trace_create_cpu_file("trace_pipe_raw", TRACE_MODE_READ, d_cpu,
9035 				tr, cpu, &tracing_buffers_fops);
9036 
9037 	trace_create_cpu_file("stats", TRACE_MODE_READ, d_cpu,
9038 				tr, cpu, &tracing_stats_fops);
9039 
9040 	trace_create_cpu_file("buffer_size_kb", TRACE_MODE_READ, d_cpu,
9041 				tr, cpu, &tracing_entries_fops);
9042 
9043 	if (tr->range_addr_start)
9044 		trace_create_cpu_file("buffer_meta", TRACE_MODE_READ, d_cpu,
9045 				      tr, cpu, &tracing_buffer_meta_fops);
9046 #ifdef CONFIG_TRACER_SNAPSHOT
9047 	if (!tr->range_addr_start) {
9048 		trace_create_cpu_file("snapshot", TRACE_MODE_WRITE, d_cpu,
9049 				      tr, cpu, &snapshot_fops);
9050 
9051 		trace_create_cpu_file("snapshot_raw", TRACE_MODE_READ, d_cpu,
9052 				      tr, cpu, &snapshot_raw_fops);
9053 	}
9054 #endif
9055 }
9056 
9057 #ifdef CONFIG_FTRACE_SELFTEST
9058 /* Let selftest have access to static functions in this file */
9059 #include "trace_selftest.c"
9060 #endif
9061 
9062 static ssize_t
9063 trace_options_read(struct file *filp, char __user *ubuf, size_t cnt,
9064 			loff_t *ppos)
9065 {
9066 	struct trace_option_dentry *topt = filp->private_data;
9067 	char *buf;
9068 
9069 	if (topt->flags->val & topt->opt->bit)
9070 		buf = "1\n";
9071 	else
9072 		buf = "0\n";
9073 
9074 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
9075 }
9076 
9077 static ssize_t
9078 trace_options_write(struct file *filp, const char __user *ubuf, size_t cnt,
9079 			 loff_t *ppos)
9080 {
9081 	struct trace_option_dentry *topt = filp->private_data;
9082 	unsigned long val;
9083 	int ret;
9084 
9085 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
9086 	if (ret)
9087 		return ret;
9088 
9089 	if (val != 0 && val != 1)
9090 		return -EINVAL;
9091 
9092 	if (!!(topt->flags->val & topt->opt->bit) != val) {
9093 		mutex_lock(&trace_types_lock);
9094 		ret = __set_tracer_option(topt->tr, topt->flags,
9095 					  topt->opt, !val);
9096 		mutex_unlock(&trace_types_lock);
9097 		if (ret)
9098 			return ret;
9099 	}
9100 
9101 	*ppos += cnt;
9102 
9103 	return cnt;
9104 }
9105 
9106 static int tracing_open_options(struct inode *inode, struct file *filp)
9107 {
9108 	struct trace_option_dentry *topt = inode->i_private;
9109 	int ret;
9110 
9111 	ret = tracing_check_open_get_tr(topt->tr);
9112 	if (ret)
9113 		return ret;
9114 
9115 	filp->private_data = inode->i_private;
9116 	return 0;
9117 }
9118 
9119 static int tracing_release_options(struct inode *inode, struct file *file)
9120 {
9121 	struct trace_option_dentry *topt = file->private_data;
9122 
9123 	trace_array_put(topt->tr);
9124 	return 0;
9125 }
9126 
9127 static const struct file_operations trace_options_fops = {
9128 	.open = tracing_open_options,
9129 	.read = trace_options_read,
9130 	.write = trace_options_write,
9131 	.llseek	= generic_file_llseek,
9132 	.release = tracing_release_options,
9133 };
9134 
9135 /*
9136  * In order to pass in both the trace_array descriptor as well as the index
9137  * to the flag that the trace option file represents, the trace_array
9138  * has a character array of trace_flags_index[], which holds the index
9139  * of the bit for the flag it represents. index[0] == 0, index[1] == 1, etc.
9140  * The address of this character array is passed to the flag option file
9141  * read/write callbacks.
9142  *
9143  * In order to extract both the index and the trace_array descriptor,
9144  * get_tr_index() uses the following algorithm.
9145  *
9146  *   idx = *ptr;
9147  *
9148  * As the pointer itself contains the address of the index (remember
9149  * index[1] == 1).
9150  *
9151  * Then to get the trace_array descriptor, by subtracting that index
9152  * from the ptr, we get to the start of the index itself.
9153  *
9154  *   ptr - idx == &index[0]
9155  *
9156  * Then a simple container_of() from that pointer gets us to the
9157  * trace_array descriptor.
9158  */
9159 static void get_tr_index(void *data, struct trace_array **ptr,
9160 			 unsigned int *pindex)
9161 {
9162 	*pindex = *(unsigned char *)data;
9163 
9164 	*ptr = container_of(data - *pindex, struct trace_array,
9165 			    trace_flags_index);
9166 }
9167 
9168 static ssize_t
9169 trace_options_core_read(struct file *filp, char __user *ubuf, size_t cnt,
9170 			loff_t *ppos)
9171 {
9172 	void *tr_index = filp->private_data;
9173 	struct trace_array *tr;
9174 	unsigned int index;
9175 	char *buf;
9176 
9177 	get_tr_index(tr_index, &tr, &index);
9178 
9179 	if (tr->trace_flags & (1 << index))
9180 		buf = "1\n";
9181 	else
9182 		buf = "0\n";
9183 
9184 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
9185 }
9186 
9187 static ssize_t
9188 trace_options_core_write(struct file *filp, const char __user *ubuf, size_t cnt,
9189 			 loff_t *ppos)
9190 {
9191 	void *tr_index = filp->private_data;
9192 	struct trace_array *tr;
9193 	unsigned int index;
9194 	unsigned long val;
9195 	int ret;
9196 
9197 	get_tr_index(tr_index, &tr, &index);
9198 
9199 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
9200 	if (ret)
9201 		return ret;
9202 
9203 	if (val != 0 && val != 1)
9204 		return -EINVAL;
9205 
9206 	mutex_lock(&event_mutex);
9207 	mutex_lock(&trace_types_lock);
9208 	ret = set_tracer_flag(tr, 1 << index, val);
9209 	mutex_unlock(&trace_types_lock);
9210 	mutex_unlock(&event_mutex);
9211 
9212 	if (ret < 0)
9213 		return ret;
9214 
9215 	*ppos += cnt;
9216 
9217 	return cnt;
9218 }
9219 
9220 static const struct file_operations trace_options_core_fops = {
9221 	.open = tracing_open_generic,
9222 	.read = trace_options_core_read,
9223 	.write = trace_options_core_write,
9224 	.llseek = generic_file_llseek,
9225 };
9226 
9227 struct dentry *trace_create_file(const char *name,
9228 				 umode_t mode,
9229 				 struct dentry *parent,
9230 				 void *data,
9231 				 const struct file_operations *fops)
9232 {
9233 	struct dentry *ret;
9234 
9235 	ret = tracefs_create_file(name, mode, parent, data, fops);
9236 	if (!ret)
9237 		pr_warn("Could not create tracefs '%s' entry\n", name);
9238 
9239 	return ret;
9240 }
9241 
9242 
9243 static struct dentry *trace_options_init_dentry(struct trace_array *tr)
9244 {
9245 	struct dentry *d_tracer;
9246 
9247 	if (tr->options)
9248 		return tr->options;
9249 
9250 	d_tracer = tracing_get_dentry(tr);
9251 	if (IS_ERR(d_tracer))
9252 		return NULL;
9253 
9254 	tr->options = tracefs_create_dir("options", d_tracer);
9255 	if (!tr->options) {
9256 		pr_warn("Could not create tracefs directory 'options'\n");
9257 		return NULL;
9258 	}
9259 
9260 	return tr->options;
9261 }
9262 
9263 static void
9264 create_trace_option_file(struct trace_array *tr,
9265 			 struct trace_option_dentry *topt,
9266 			 struct tracer_flags *flags,
9267 			 struct tracer_opt *opt)
9268 {
9269 	struct dentry *t_options;
9270 
9271 	t_options = trace_options_init_dentry(tr);
9272 	if (!t_options)
9273 		return;
9274 
9275 	topt->flags = flags;
9276 	topt->opt = opt;
9277 	topt->tr = tr;
9278 
9279 	topt->entry = trace_create_file(opt->name, TRACE_MODE_WRITE,
9280 					t_options, topt, &trace_options_fops);
9281 
9282 }
9283 
9284 static void
9285 create_trace_option_files(struct trace_array *tr, struct tracer *tracer)
9286 {
9287 	struct trace_option_dentry *topts;
9288 	struct trace_options *tr_topts;
9289 	struct tracer_flags *flags;
9290 	struct tracer_opt *opts;
9291 	int cnt;
9292 	int i;
9293 
9294 	if (!tracer)
9295 		return;
9296 
9297 	flags = tracer->flags;
9298 
9299 	if (!flags || !flags->opts)
9300 		return;
9301 
9302 	/*
9303 	 * If this is an instance, only create flags for tracers
9304 	 * the instance may have.
9305 	 */
9306 	if (!trace_ok_for_array(tracer, tr))
9307 		return;
9308 
9309 	for (i = 0; i < tr->nr_topts; i++) {
9310 		/* Make sure there's no duplicate flags. */
9311 		if (WARN_ON_ONCE(tr->topts[i].tracer->flags == tracer->flags))
9312 			return;
9313 	}
9314 
9315 	opts = flags->opts;
9316 
9317 	for (cnt = 0; opts[cnt].name; cnt++)
9318 		;
9319 
9320 	topts = kcalloc(cnt + 1, sizeof(*topts), GFP_KERNEL);
9321 	if (!topts)
9322 		return;
9323 
9324 	tr_topts = krealloc(tr->topts, sizeof(*tr->topts) * (tr->nr_topts + 1),
9325 			    GFP_KERNEL);
9326 	if (!tr_topts) {
9327 		kfree(topts);
9328 		return;
9329 	}
9330 
9331 	tr->topts = tr_topts;
9332 	tr->topts[tr->nr_topts].tracer = tracer;
9333 	tr->topts[tr->nr_topts].topts = topts;
9334 	tr->nr_topts++;
9335 
9336 	for (cnt = 0; opts[cnt].name; cnt++) {
9337 		create_trace_option_file(tr, &topts[cnt], flags,
9338 					 &opts[cnt]);
9339 		MEM_FAIL(topts[cnt].entry == NULL,
9340 			  "Failed to create trace option: %s",
9341 			  opts[cnt].name);
9342 	}
9343 }
9344 
9345 static struct dentry *
9346 create_trace_option_core_file(struct trace_array *tr,
9347 			      const char *option, long index)
9348 {
9349 	struct dentry *t_options;
9350 
9351 	t_options = trace_options_init_dentry(tr);
9352 	if (!t_options)
9353 		return NULL;
9354 
9355 	return trace_create_file(option, TRACE_MODE_WRITE, t_options,
9356 				 (void *)&tr->trace_flags_index[index],
9357 				 &trace_options_core_fops);
9358 }
9359 
9360 static void create_trace_options_dir(struct trace_array *tr)
9361 {
9362 	struct dentry *t_options;
9363 	bool top_level = tr == &global_trace;
9364 	int i;
9365 
9366 	t_options = trace_options_init_dentry(tr);
9367 	if (!t_options)
9368 		return;
9369 
9370 	for (i = 0; trace_options[i]; i++) {
9371 		if (top_level ||
9372 		    !((1 << i) & TOP_LEVEL_TRACE_FLAGS))
9373 			create_trace_option_core_file(tr, trace_options[i], i);
9374 	}
9375 }
9376 
9377 static ssize_t
9378 rb_simple_read(struct file *filp, char __user *ubuf,
9379 	       size_t cnt, loff_t *ppos)
9380 {
9381 	struct trace_array *tr = filp->private_data;
9382 	char buf[64];
9383 	int r;
9384 
9385 	r = tracer_tracing_is_on(tr);
9386 	r = sprintf(buf, "%d\n", r);
9387 
9388 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
9389 }
9390 
9391 static ssize_t
9392 rb_simple_write(struct file *filp, const char __user *ubuf,
9393 		size_t cnt, loff_t *ppos)
9394 {
9395 	struct trace_array *tr = filp->private_data;
9396 	struct trace_buffer *buffer = tr->array_buffer.buffer;
9397 	unsigned long val;
9398 	int ret;
9399 
9400 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
9401 	if (ret)
9402 		return ret;
9403 
9404 	if (buffer) {
9405 		mutex_lock(&trace_types_lock);
9406 		if (!!val == tracer_tracing_is_on(tr)) {
9407 			val = 0; /* do nothing */
9408 		} else if (val) {
9409 			tracer_tracing_on(tr);
9410 			if (tr->current_trace->start)
9411 				tr->current_trace->start(tr);
9412 		} else {
9413 			tracer_tracing_off(tr);
9414 			if (tr->current_trace->stop)
9415 				tr->current_trace->stop(tr);
9416 			/* Wake up any waiters */
9417 			ring_buffer_wake_waiters(buffer, RING_BUFFER_ALL_CPUS);
9418 		}
9419 		mutex_unlock(&trace_types_lock);
9420 	}
9421 
9422 	(*ppos)++;
9423 
9424 	return cnt;
9425 }
9426 
9427 static const struct file_operations rb_simple_fops = {
9428 	.open		= tracing_open_generic_tr,
9429 	.read		= rb_simple_read,
9430 	.write		= rb_simple_write,
9431 	.release	= tracing_release_generic_tr,
9432 	.llseek		= default_llseek,
9433 };
9434 
9435 static ssize_t
9436 buffer_percent_read(struct file *filp, char __user *ubuf,
9437 		    size_t cnt, loff_t *ppos)
9438 {
9439 	struct trace_array *tr = filp->private_data;
9440 	char buf[64];
9441 	int r;
9442 
9443 	r = tr->buffer_percent;
9444 	r = sprintf(buf, "%d\n", r);
9445 
9446 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
9447 }
9448 
9449 static ssize_t
9450 buffer_percent_write(struct file *filp, const char __user *ubuf,
9451 		     size_t cnt, loff_t *ppos)
9452 {
9453 	struct trace_array *tr = filp->private_data;
9454 	unsigned long val;
9455 	int ret;
9456 
9457 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
9458 	if (ret)
9459 		return ret;
9460 
9461 	if (val > 100)
9462 		return -EINVAL;
9463 
9464 	tr->buffer_percent = val;
9465 
9466 	(*ppos)++;
9467 
9468 	return cnt;
9469 }
9470 
9471 static const struct file_operations buffer_percent_fops = {
9472 	.open		= tracing_open_generic_tr,
9473 	.read		= buffer_percent_read,
9474 	.write		= buffer_percent_write,
9475 	.release	= tracing_release_generic_tr,
9476 	.llseek		= default_llseek,
9477 };
9478 
9479 static ssize_t
9480 buffer_subbuf_size_read(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos)
9481 {
9482 	struct trace_array *tr = filp->private_data;
9483 	size_t size;
9484 	char buf[64];
9485 	int order;
9486 	int r;
9487 
9488 	order = ring_buffer_subbuf_order_get(tr->array_buffer.buffer);
9489 	size = (PAGE_SIZE << order) / 1024;
9490 
9491 	r = sprintf(buf, "%zd\n", size);
9492 
9493 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
9494 }
9495 
9496 static ssize_t
9497 buffer_subbuf_size_write(struct file *filp, const char __user *ubuf,
9498 			 size_t cnt, loff_t *ppos)
9499 {
9500 	struct trace_array *tr = filp->private_data;
9501 	unsigned long val;
9502 	int old_order;
9503 	int order;
9504 	int pages;
9505 	int ret;
9506 
9507 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
9508 	if (ret)
9509 		return ret;
9510 
9511 	val *= 1024; /* value passed in is in KB */
9512 
9513 	pages = DIV_ROUND_UP(val, PAGE_SIZE);
9514 	order = fls(pages - 1);
9515 
9516 	/* limit between 1 and 128 system pages */
9517 	if (order < 0 || order > 7)
9518 		return -EINVAL;
9519 
9520 	/* Do not allow tracing while changing the order of the ring buffer */
9521 	tracing_stop_tr(tr);
9522 
9523 	old_order = ring_buffer_subbuf_order_get(tr->array_buffer.buffer);
9524 	if (old_order == order)
9525 		goto out;
9526 
9527 	ret = ring_buffer_subbuf_order_set(tr->array_buffer.buffer, order);
9528 	if (ret)
9529 		goto out;
9530 
9531 #ifdef CONFIG_TRACER_MAX_TRACE
9532 
9533 	if (!tr->allocated_snapshot)
9534 		goto out_max;
9535 
9536 	ret = ring_buffer_subbuf_order_set(tr->max_buffer.buffer, order);
9537 	if (ret) {
9538 		/* Put back the old order */
9539 		cnt = ring_buffer_subbuf_order_set(tr->array_buffer.buffer, old_order);
9540 		if (WARN_ON_ONCE(cnt)) {
9541 			/*
9542 			 * AARGH! We are left with different orders!
9543 			 * The max buffer is our "snapshot" buffer.
9544 			 * When a tracer needs a snapshot (one of the
9545 			 * latency tracers), it swaps the max buffer
9546 			 * with the saved snap shot. We succeeded to
9547 			 * update the order of the main buffer, but failed to
9548 			 * update the order of the max buffer. But when we tried
9549 			 * to reset the main buffer to the original size, we
9550 			 * failed there too. This is very unlikely to
9551 			 * happen, but if it does, warn and kill all
9552 			 * tracing.
9553 			 */
9554 			tracing_disabled = 1;
9555 		}
9556 		goto out;
9557 	}
9558  out_max:
9559 #endif
9560 	(*ppos)++;
9561  out:
9562 	if (ret)
9563 		cnt = ret;
9564 	tracing_start_tr(tr);
9565 	return cnt;
9566 }
9567 
9568 static const struct file_operations buffer_subbuf_size_fops = {
9569 	.open		= tracing_open_generic_tr,
9570 	.read		= buffer_subbuf_size_read,
9571 	.write		= buffer_subbuf_size_write,
9572 	.release	= tracing_release_generic_tr,
9573 	.llseek		= default_llseek,
9574 };
9575 
9576 static struct dentry *trace_instance_dir;
9577 
9578 static void
9579 init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer);
9580 
9581 #ifdef CONFIG_MODULES
9582 static int make_mod_delta(struct module *mod, void *data)
9583 {
9584 	struct trace_module_delta *module_delta;
9585 	struct trace_scratch *tscratch;
9586 	struct trace_mod_entry *entry;
9587 	struct trace_array *tr = data;
9588 	int i;
9589 
9590 	tscratch = tr->scratch;
9591 	module_delta = READ_ONCE(tr->module_delta);
9592 	for (i = 0; i < tscratch->nr_entries; i++) {
9593 		entry = &tscratch->entries[i];
9594 		if (strcmp(mod->name, entry->mod_name))
9595 			continue;
9596 		if (mod->state == MODULE_STATE_GOING)
9597 			module_delta->delta[i] = 0;
9598 		else
9599 			module_delta->delta[i] = (unsigned long)mod->mem[MOD_TEXT].base
9600 						 - entry->mod_addr;
9601 		break;
9602 	}
9603 	return 0;
9604 }
9605 #else
9606 static int make_mod_delta(struct module *mod, void *data)
9607 {
9608 	return 0;
9609 }
9610 #endif
9611 
9612 static int mod_addr_comp(const void *a, const void *b, const void *data)
9613 {
9614 	const struct trace_mod_entry *e1 = a;
9615 	const struct trace_mod_entry *e2 = b;
9616 
9617 	return e1->mod_addr > e2->mod_addr ? 1 : -1;
9618 }
9619 
9620 static void setup_trace_scratch(struct trace_array *tr,
9621 				struct trace_scratch *tscratch, unsigned int size)
9622 {
9623 	struct trace_module_delta *module_delta;
9624 	struct trace_mod_entry *entry;
9625 	int i, nr_entries;
9626 
9627 	if (!tscratch)
9628 		return;
9629 
9630 	tr->scratch = tscratch;
9631 	tr->scratch_size = size;
9632 
9633 	if (tscratch->text_addr)
9634 		tr->text_delta = (unsigned long)_text - tscratch->text_addr;
9635 
9636 	if (struct_size(tscratch, entries, tscratch->nr_entries) > size)
9637 		goto reset;
9638 
9639 	/* Check if each module name is a valid string */
9640 	for (i = 0; i < tscratch->nr_entries; i++) {
9641 		int n;
9642 
9643 		entry = &tscratch->entries[i];
9644 
9645 		for (n = 0; n < MODULE_NAME_LEN; n++) {
9646 			if (entry->mod_name[n] == '\0')
9647 				break;
9648 			if (!isprint(entry->mod_name[n]))
9649 				goto reset;
9650 		}
9651 		if (n == MODULE_NAME_LEN)
9652 			goto reset;
9653 	}
9654 
9655 	/* Sort the entries so that we can find appropriate module from address. */
9656 	nr_entries = tscratch->nr_entries;
9657 	sort_r(tscratch->entries, nr_entries, sizeof(struct trace_mod_entry),
9658 	       mod_addr_comp, NULL, NULL);
9659 
9660 	if (IS_ENABLED(CONFIG_MODULES)) {
9661 		module_delta = kzalloc(struct_size(module_delta, delta, nr_entries), GFP_KERNEL);
9662 		if (!module_delta) {
9663 			pr_info("module_delta allocation failed. Not able to decode module address.");
9664 			goto reset;
9665 		}
9666 		init_rcu_head(&module_delta->rcu);
9667 	} else
9668 		module_delta = NULL;
9669 	WRITE_ONCE(tr->module_delta, module_delta);
9670 
9671 	/* Scan modules to make text delta for modules. */
9672 	module_for_each_mod(make_mod_delta, tr);
9673 
9674 	/* Set trace_clock as the same of the previous boot. */
9675 	if (tscratch->clock_id != tr->clock_id) {
9676 		if (tscratch->clock_id >= ARRAY_SIZE(trace_clocks) ||
9677 		    tracing_set_clock(tr, trace_clocks[tscratch->clock_id].name) < 0) {
9678 			pr_info("the previous trace_clock info is not valid.");
9679 			goto reset;
9680 		}
9681 	}
9682 	return;
9683  reset:
9684 	/* Invalid trace modules */
9685 	memset(tscratch, 0, size);
9686 }
9687 
9688 static int
9689 allocate_trace_buffer(struct trace_array *tr, struct array_buffer *buf, int size)
9690 {
9691 	enum ring_buffer_flags rb_flags;
9692 	struct trace_scratch *tscratch;
9693 	unsigned int scratch_size = 0;
9694 
9695 	rb_flags = tr->trace_flags & TRACE_ITER_OVERWRITE ? RB_FL_OVERWRITE : 0;
9696 
9697 	buf->tr = tr;
9698 
9699 	if (tr->range_addr_start && tr->range_addr_size) {
9700 		/* Add scratch buffer to handle 128 modules */
9701 		buf->buffer = ring_buffer_alloc_range(size, rb_flags, 0,
9702 						      tr->range_addr_start,
9703 						      tr->range_addr_size,
9704 						      struct_size(tscratch, entries, 128));
9705 
9706 		tscratch = ring_buffer_meta_scratch(buf->buffer, &scratch_size);
9707 		setup_trace_scratch(tr, tscratch, scratch_size);
9708 
9709 		/*
9710 		 * This is basically the same as a mapped buffer,
9711 		 * with the same restrictions.
9712 		 */
9713 		tr->mapped++;
9714 	} else {
9715 		buf->buffer = ring_buffer_alloc(size, rb_flags);
9716 	}
9717 	if (!buf->buffer)
9718 		return -ENOMEM;
9719 
9720 	buf->data = alloc_percpu(struct trace_array_cpu);
9721 	if (!buf->data) {
9722 		ring_buffer_free(buf->buffer);
9723 		buf->buffer = NULL;
9724 		return -ENOMEM;
9725 	}
9726 
9727 	/* Allocate the first page for all buffers */
9728 	set_buffer_entries(&tr->array_buffer,
9729 			   ring_buffer_size(tr->array_buffer.buffer, 0));
9730 
9731 	return 0;
9732 }
9733 
9734 static void free_trace_buffer(struct array_buffer *buf)
9735 {
9736 	if (buf->buffer) {
9737 		ring_buffer_free(buf->buffer);
9738 		buf->buffer = NULL;
9739 		free_percpu(buf->data);
9740 		buf->data = NULL;
9741 	}
9742 }
9743 
9744 static int allocate_trace_buffers(struct trace_array *tr, int size)
9745 {
9746 	int ret;
9747 
9748 	ret = allocate_trace_buffer(tr, &tr->array_buffer, size);
9749 	if (ret)
9750 		return ret;
9751 
9752 #ifdef CONFIG_TRACER_MAX_TRACE
9753 	/* Fix mapped buffer trace arrays do not have snapshot buffers */
9754 	if (tr->range_addr_start)
9755 		return 0;
9756 
9757 	ret = allocate_trace_buffer(tr, &tr->max_buffer,
9758 				    allocate_snapshot ? size : 1);
9759 	if (MEM_FAIL(ret, "Failed to allocate trace buffer\n")) {
9760 		free_trace_buffer(&tr->array_buffer);
9761 		return -ENOMEM;
9762 	}
9763 	tr->allocated_snapshot = allocate_snapshot;
9764 
9765 	allocate_snapshot = false;
9766 #endif
9767 
9768 	return 0;
9769 }
9770 
9771 static void free_trace_buffers(struct trace_array *tr)
9772 {
9773 	if (!tr)
9774 		return;
9775 
9776 	free_trace_buffer(&tr->array_buffer);
9777 	kfree(tr->module_delta);
9778 
9779 #ifdef CONFIG_TRACER_MAX_TRACE
9780 	free_trace_buffer(&tr->max_buffer);
9781 #endif
9782 }
9783 
9784 static void init_trace_flags_index(struct trace_array *tr)
9785 {
9786 	int i;
9787 
9788 	/* Used by the trace options files */
9789 	for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++)
9790 		tr->trace_flags_index[i] = i;
9791 }
9792 
9793 static void __update_tracer_options(struct trace_array *tr)
9794 {
9795 	struct tracer *t;
9796 
9797 	for (t = trace_types; t; t = t->next)
9798 		add_tracer_options(tr, t);
9799 }
9800 
9801 static void update_tracer_options(struct trace_array *tr)
9802 {
9803 	mutex_lock(&trace_types_lock);
9804 	tracer_options_updated = true;
9805 	__update_tracer_options(tr);
9806 	mutex_unlock(&trace_types_lock);
9807 }
9808 
9809 /* Must have trace_types_lock held */
9810 struct trace_array *trace_array_find(const char *instance)
9811 {
9812 	struct trace_array *tr, *found = NULL;
9813 
9814 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
9815 		if (tr->name && strcmp(tr->name, instance) == 0) {
9816 			found = tr;
9817 			break;
9818 		}
9819 	}
9820 
9821 	return found;
9822 }
9823 
9824 struct trace_array *trace_array_find_get(const char *instance)
9825 {
9826 	struct trace_array *tr;
9827 
9828 	mutex_lock(&trace_types_lock);
9829 	tr = trace_array_find(instance);
9830 	if (tr)
9831 		tr->ref++;
9832 	mutex_unlock(&trace_types_lock);
9833 
9834 	return tr;
9835 }
9836 
9837 static int trace_array_create_dir(struct trace_array *tr)
9838 {
9839 	int ret;
9840 
9841 	tr->dir = tracefs_create_dir(tr->name, trace_instance_dir);
9842 	if (!tr->dir)
9843 		return -EINVAL;
9844 
9845 	ret = event_trace_add_tracer(tr->dir, tr);
9846 	if (ret) {
9847 		tracefs_remove(tr->dir);
9848 		return ret;
9849 	}
9850 
9851 	init_tracer_tracefs(tr, tr->dir);
9852 	__update_tracer_options(tr);
9853 
9854 	return ret;
9855 }
9856 
9857 static struct trace_array *
9858 trace_array_create_systems(const char *name, const char *systems,
9859 			   unsigned long range_addr_start,
9860 			   unsigned long range_addr_size)
9861 {
9862 	struct trace_array *tr;
9863 	int ret;
9864 
9865 	ret = -ENOMEM;
9866 	tr = kzalloc(sizeof(*tr), GFP_KERNEL);
9867 	if (!tr)
9868 		return ERR_PTR(ret);
9869 
9870 	tr->name = kstrdup(name, GFP_KERNEL);
9871 	if (!tr->name)
9872 		goto out_free_tr;
9873 
9874 	if (!alloc_cpumask_var(&tr->tracing_cpumask, GFP_KERNEL))
9875 		goto out_free_tr;
9876 
9877 	if (!zalloc_cpumask_var(&tr->pipe_cpumask, GFP_KERNEL))
9878 		goto out_free_tr;
9879 
9880 	if (systems) {
9881 		tr->system_names = kstrdup_const(systems, GFP_KERNEL);
9882 		if (!tr->system_names)
9883 			goto out_free_tr;
9884 	}
9885 
9886 	/* Only for boot up memory mapped ring buffers */
9887 	tr->range_addr_start = range_addr_start;
9888 	tr->range_addr_size = range_addr_size;
9889 
9890 	tr->trace_flags = global_trace.trace_flags & ~ZEROED_TRACE_FLAGS;
9891 
9892 	cpumask_copy(tr->tracing_cpumask, cpu_all_mask);
9893 
9894 	raw_spin_lock_init(&tr->start_lock);
9895 
9896 	tr->max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
9897 #ifdef CONFIG_TRACER_MAX_TRACE
9898 	spin_lock_init(&tr->snapshot_trigger_lock);
9899 #endif
9900 	tr->current_trace = &nop_trace;
9901 
9902 	INIT_LIST_HEAD(&tr->systems);
9903 	INIT_LIST_HEAD(&tr->events);
9904 	INIT_LIST_HEAD(&tr->hist_vars);
9905 	INIT_LIST_HEAD(&tr->err_log);
9906 	INIT_LIST_HEAD(&tr->marker_list);
9907 
9908 #ifdef CONFIG_MODULES
9909 	INIT_LIST_HEAD(&tr->mod_events);
9910 #endif
9911 
9912 	if (allocate_trace_buffers(tr, trace_buf_size) < 0)
9913 		goto out_free_tr;
9914 
9915 	/* The ring buffer is defaultly expanded */
9916 	trace_set_ring_buffer_expanded(tr);
9917 
9918 	if (ftrace_allocate_ftrace_ops(tr) < 0)
9919 		goto out_free_tr;
9920 
9921 	ftrace_init_trace_array(tr);
9922 
9923 	init_trace_flags_index(tr);
9924 
9925 	if (trace_instance_dir) {
9926 		ret = trace_array_create_dir(tr);
9927 		if (ret)
9928 			goto out_free_tr;
9929 	} else
9930 		__trace_early_add_events(tr);
9931 
9932 	list_add(&tr->list, &ftrace_trace_arrays);
9933 
9934 	tr->ref++;
9935 
9936 	return tr;
9937 
9938  out_free_tr:
9939 	ftrace_free_ftrace_ops(tr);
9940 	free_trace_buffers(tr);
9941 	free_cpumask_var(tr->pipe_cpumask);
9942 	free_cpumask_var(tr->tracing_cpumask);
9943 	kfree_const(tr->system_names);
9944 	kfree(tr->range_name);
9945 	kfree(tr->name);
9946 	kfree(tr);
9947 
9948 	return ERR_PTR(ret);
9949 }
9950 
9951 static struct trace_array *trace_array_create(const char *name)
9952 {
9953 	return trace_array_create_systems(name, NULL, 0, 0);
9954 }
9955 
9956 static int instance_mkdir(const char *name)
9957 {
9958 	struct trace_array *tr;
9959 	int ret;
9960 
9961 	guard(mutex)(&event_mutex);
9962 	guard(mutex)(&trace_types_lock);
9963 
9964 	ret = -EEXIST;
9965 	if (trace_array_find(name))
9966 		return -EEXIST;
9967 
9968 	tr = trace_array_create(name);
9969 
9970 	ret = PTR_ERR_OR_ZERO(tr);
9971 
9972 	return ret;
9973 }
9974 
9975 #ifdef CONFIG_MMU
9976 static u64 map_pages(unsigned long start, unsigned long size)
9977 {
9978 	unsigned long vmap_start, vmap_end;
9979 	struct vm_struct *area;
9980 	int ret;
9981 
9982 	area = get_vm_area(size, VM_IOREMAP);
9983 	if (!area)
9984 		return 0;
9985 
9986 	vmap_start = (unsigned long) area->addr;
9987 	vmap_end = vmap_start + size;
9988 
9989 	ret = vmap_page_range(vmap_start, vmap_end,
9990 			      start, pgprot_nx(PAGE_KERNEL));
9991 	if (ret < 0) {
9992 		free_vm_area(area);
9993 		return 0;
9994 	}
9995 
9996 	return (u64)vmap_start;
9997 }
9998 #else
9999 static inline u64 map_pages(unsigned long start, unsigned long size)
10000 {
10001 	return 0;
10002 }
10003 #endif
10004 
10005 /**
10006  * trace_array_get_by_name - Create/Lookup a trace array, given its name.
10007  * @name: The name of the trace array to be looked up/created.
10008  * @systems: A list of systems to create event directories for (NULL for all)
10009  *
10010  * Returns pointer to trace array with given name.
10011  * NULL, if it cannot be created.
10012  *
10013  * NOTE: This function increments the reference counter associated with the
10014  * trace array returned. This makes sure it cannot be freed while in use.
10015  * Use trace_array_put() once the trace array is no longer needed.
10016  * If the trace_array is to be freed, trace_array_destroy() needs to
10017  * be called after the trace_array_put(), or simply let user space delete
10018  * it from the tracefs instances directory. But until the
10019  * trace_array_put() is called, user space can not delete it.
10020  *
10021  */
10022 struct trace_array *trace_array_get_by_name(const char *name, const char *systems)
10023 {
10024 	struct trace_array *tr;
10025 
10026 	guard(mutex)(&event_mutex);
10027 	guard(mutex)(&trace_types_lock);
10028 
10029 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
10030 		if (tr->name && strcmp(tr->name, name) == 0) {
10031 			tr->ref++;
10032 			return tr;
10033 		}
10034 	}
10035 
10036 	tr = trace_array_create_systems(name, systems, 0, 0);
10037 
10038 	if (IS_ERR(tr))
10039 		tr = NULL;
10040 	else
10041 		tr->ref++;
10042 
10043 	return tr;
10044 }
10045 EXPORT_SYMBOL_GPL(trace_array_get_by_name);
10046 
10047 static int __remove_instance(struct trace_array *tr)
10048 {
10049 	int i;
10050 
10051 	/* Reference counter for a newly created trace array = 1. */
10052 	if (tr->ref > 1 || (tr->current_trace && tr->trace_ref))
10053 		return -EBUSY;
10054 
10055 	list_del(&tr->list);
10056 
10057 	/* Disable all the flags that were enabled coming in */
10058 	for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++) {
10059 		if ((1 << i) & ZEROED_TRACE_FLAGS)
10060 			set_tracer_flag(tr, 1 << i, 0);
10061 	}
10062 
10063 	if (printk_trace == tr)
10064 		update_printk_trace(&global_trace);
10065 
10066 	if (update_marker_trace(tr, 0))
10067 		synchronize_rcu();
10068 
10069 	tracing_set_nop(tr);
10070 	clear_ftrace_function_probes(tr);
10071 	event_trace_del_tracer(tr);
10072 	ftrace_clear_pids(tr);
10073 	ftrace_destroy_function_files(tr);
10074 	tracefs_remove(tr->dir);
10075 	free_percpu(tr->last_func_repeats);
10076 	free_trace_buffers(tr);
10077 	clear_tracing_err_log(tr);
10078 
10079 	if (tr->range_name) {
10080 		reserve_mem_release_by_name(tr->range_name);
10081 		kfree(tr->range_name);
10082 	}
10083 
10084 	for (i = 0; i < tr->nr_topts; i++) {
10085 		kfree(tr->topts[i].topts);
10086 	}
10087 	kfree(tr->topts);
10088 
10089 	free_cpumask_var(tr->pipe_cpumask);
10090 	free_cpumask_var(tr->tracing_cpumask);
10091 	kfree_const(tr->system_names);
10092 	kfree(tr->name);
10093 	kfree(tr);
10094 
10095 	return 0;
10096 }
10097 
10098 int trace_array_destroy(struct trace_array *this_tr)
10099 {
10100 	struct trace_array *tr;
10101 
10102 	if (!this_tr)
10103 		return -EINVAL;
10104 
10105 	guard(mutex)(&event_mutex);
10106 	guard(mutex)(&trace_types_lock);
10107 
10108 
10109 	/* Making sure trace array exists before destroying it. */
10110 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
10111 		if (tr == this_tr)
10112 			return __remove_instance(tr);
10113 	}
10114 
10115 	return -ENODEV;
10116 }
10117 EXPORT_SYMBOL_GPL(trace_array_destroy);
10118 
10119 static int instance_rmdir(const char *name)
10120 {
10121 	struct trace_array *tr;
10122 
10123 	guard(mutex)(&event_mutex);
10124 	guard(mutex)(&trace_types_lock);
10125 
10126 	tr = trace_array_find(name);
10127 	if (!tr)
10128 		return -ENODEV;
10129 
10130 	return __remove_instance(tr);
10131 }
10132 
10133 static __init void create_trace_instances(struct dentry *d_tracer)
10134 {
10135 	struct trace_array *tr;
10136 
10137 	trace_instance_dir = tracefs_create_instance_dir("instances", d_tracer,
10138 							 instance_mkdir,
10139 							 instance_rmdir);
10140 	if (MEM_FAIL(!trace_instance_dir, "Failed to create instances directory\n"))
10141 		return;
10142 
10143 	guard(mutex)(&event_mutex);
10144 	guard(mutex)(&trace_types_lock);
10145 
10146 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
10147 		if (!tr->name)
10148 			continue;
10149 		if (MEM_FAIL(trace_array_create_dir(tr) < 0,
10150 			     "Failed to create instance directory\n"))
10151 			return;
10152 	}
10153 }
10154 
10155 static void
10156 init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer)
10157 {
10158 	int cpu;
10159 
10160 	trace_create_file("available_tracers", TRACE_MODE_READ, d_tracer,
10161 			tr, &show_traces_fops);
10162 
10163 	trace_create_file("current_tracer", TRACE_MODE_WRITE, d_tracer,
10164 			tr, &set_tracer_fops);
10165 
10166 	trace_create_file("tracing_cpumask", TRACE_MODE_WRITE, d_tracer,
10167 			  tr, &tracing_cpumask_fops);
10168 
10169 	trace_create_file("trace_options", TRACE_MODE_WRITE, d_tracer,
10170 			  tr, &tracing_iter_fops);
10171 
10172 	trace_create_file("trace", TRACE_MODE_WRITE, d_tracer,
10173 			  tr, &tracing_fops);
10174 
10175 	trace_create_file("trace_pipe", TRACE_MODE_READ, d_tracer,
10176 			  tr, &tracing_pipe_fops);
10177 
10178 	trace_create_file("buffer_size_kb", TRACE_MODE_WRITE, d_tracer,
10179 			  tr, &tracing_entries_fops);
10180 
10181 	trace_create_file("buffer_total_size_kb", TRACE_MODE_READ, d_tracer,
10182 			  tr, &tracing_total_entries_fops);
10183 
10184 	trace_create_file("free_buffer", 0200, d_tracer,
10185 			  tr, &tracing_free_buffer_fops);
10186 
10187 	trace_create_file("trace_marker", 0220, d_tracer,
10188 			  tr, &tracing_mark_fops);
10189 
10190 	tr->trace_marker_file = __find_event_file(tr, "ftrace", "print");
10191 
10192 	trace_create_file("trace_marker_raw", 0220, d_tracer,
10193 			  tr, &tracing_mark_raw_fops);
10194 
10195 	trace_create_file("trace_clock", TRACE_MODE_WRITE, d_tracer, tr,
10196 			  &trace_clock_fops);
10197 
10198 	trace_create_file("tracing_on", TRACE_MODE_WRITE, d_tracer,
10199 			  tr, &rb_simple_fops);
10200 
10201 	trace_create_file("timestamp_mode", TRACE_MODE_READ, d_tracer, tr,
10202 			  &trace_time_stamp_mode_fops);
10203 
10204 	tr->buffer_percent = 50;
10205 
10206 	trace_create_file("buffer_percent", TRACE_MODE_WRITE, d_tracer,
10207 			tr, &buffer_percent_fops);
10208 
10209 	trace_create_file("buffer_subbuf_size_kb", TRACE_MODE_WRITE, d_tracer,
10210 			  tr, &buffer_subbuf_size_fops);
10211 
10212 	create_trace_options_dir(tr);
10213 
10214 #ifdef CONFIG_TRACER_MAX_TRACE
10215 	trace_create_maxlat_file(tr, d_tracer);
10216 #endif
10217 
10218 	if (ftrace_create_function_files(tr, d_tracer))
10219 		MEM_FAIL(1, "Could not allocate function filter files");
10220 
10221 	if (tr->range_addr_start) {
10222 		trace_create_file("last_boot_info", TRACE_MODE_READ, d_tracer,
10223 				  tr, &last_boot_fops);
10224 #ifdef CONFIG_TRACER_SNAPSHOT
10225 	} else {
10226 		trace_create_file("snapshot", TRACE_MODE_WRITE, d_tracer,
10227 				  tr, &snapshot_fops);
10228 #endif
10229 	}
10230 
10231 	trace_create_file("error_log", TRACE_MODE_WRITE, d_tracer,
10232 			  tr, &tracing_err_log_fops);
10233 
10234 	for_each_tracing_cpu(cpu)
10235 		tracing_init_tracefs_percpu(tr, cpu);
10236 
10237 	ftrace_init_tracefs(tr, d_tracer);
10238 }
10239 
10240 static struct vfsmount *trace_automount(struct dentry *mntpt, void *ingore)
10241 {
10242 	struct vfsmount *mnt;
10243 	struct file_system_type *type;
10244 
10245 	/*
10246 	 * To maintain backward compatibility for tools that mount
10247 	 * debugfs to get to the tracing facility, tracefs is automatically
10248 	 * mounted to the debugfs/tracing directory.
10249 	 */
10250 	type = get_fs_type("tracefs");
10251 	if (!type)
10252 		return NULL;
10253 	mnt = vfs_submount(mntpt, type, "tracefs", NULL);
10254 	put_filesystem(type);
10255 	if (IS_ERR(mnt))
10256 		return NULL;
10257 	mntget(mnt);
10258 
10259 	return mnt;
10260 }
10261 
10262 /**
10263  * tracing_init_dentry - initialize top level trace array
10264  *
10265  * This is called when creating files or directories in the tracing
10266  * directory. It is called via fs_initcall() by any of the boot up code
10267  * and expects to return the dentry of the top level tracing directory.
10268  */
10269 int tracing_init_dentry(void)
10270 {
10271 	struct trace_array *tr = &global_trace;
10272 
10273 	if (security_locked_down(LOCKDOWN_TRACEFS)) {
10274 		pr_warn("Tracing disabled due to lockdown\n");
10275 		return -EPERM;
10276 	}
10277 
10278 	/* The top level trace array uses  NULL as parent */
10279 	if (tr->dir)
10280 		return 0;
10281 
10282 	if (WARN_ON(!tracefs_initialized()))
10283 		return -ENODEV;
10284 
10285 	/*
10286 	 * As there may still be users that expect the tracing
10287 	 * files to exist in debugfs/tracing, we must automount
10288 	 * the tracefs file system there, so older tools still
10289 	 * work with the newer kernel.
10290 	 */
10291 	tr->dir = debugfs_create_automount("tracing", NULL,
10292 					   trace_automount, NULL);
10293 
10294 	return 0;
10295 }
10296 
10297 extern struct trace_eval_map *__start_ftrace_eval_maps[];
10298 extern struct trace_eval_map *__stop_ftrace_eval_maps[];
10299 
10300 static struct workqueue_struct *eval_map_wq __initdata;
10301 static struct work_struct eval_map_work __initdata;
10302 static struct work_struct tracerfs_init_work __initdata;
10303 
10304 static void __init eval_map_work_func(struct work_struct *work)
10305 {
10306 	int len;
10307 
10308 	len = __stop_ftrace_eval_maps - __start_ftrace_eval_maps;
10309 	trace_insert_eval_map(NULL, __start_ftrace_eval_maps, len);
10310 }
10311 
10312 static int __init trace_eval_init(void)
10313 {
10314 	INIT_WORK(&eval_map_work, eval_map_work_func);
10315 
10316 	eval_map_wq = alloc_workqueue("eval_map_wq", WQ_UNBOUND, 0);
10317 	if (!eval_map_wq) {
10318 		pr_err("Unable to allocate eval_map_wq\n");
10319 		/* Do work here */
10320 		eval_map_work_func(&eval_map_work);
10321 		return -ENOMEM;
10322 	}
10323 
10324 	queue_work(eval_map_wq, &eval_map_work);
10325 	return 0;
10326 }
10327 
10328 subsys_initcall(trace_eval_init);
10329 
10330 static int __init trace_eval_sync(void)
10331 {
10332 	/* Make sure the eval map updates are finished */
10333 	if (eval_map_wq)
10334 		destroy_workqueue(eval_map_wq);
10335 	return 0;
10336 }
10337 
10338 late_initcall_sync(trace_eval_sync);
10339 
10340 
10341 #ifdef CONFIG_MODULES
10342 
10343 bool module_exists(const char *module)
10344 {
10345 	/* All modules have the symbol __this_module */
10346 	static const char this_mod[] = "__this_module";
10347 	char modname[MAX_PARAM_PREFIX_LEN + sizeof(this_mod) + 2];
10348 	unsigned long val;
10349 	int n;
10350 
10351 	n = snprintf(modname, sizeof(modname), "%s:%s", module, this_mod);
10352 
10353 	if (n > sizeof(modname) - 1)
10354 		return false;
10355 
10356 	val = module_kallsyms_lookup_name(modname);
10357 	return val != 0;
10358 }
10359 
10360 static void trace_module_add_evals(struct module *mod)
10361 {
10362 	if (!mod->num_trace_evals)
10363 		return;
10364 
10365 	/*
10366 	 * Modules with bad taint do not have events created, do
10367 	 * not bother with enums either.
10368 	 */
10369 	if (trace_module_has_bad_taint(mod))
10370 		return;
10371 
10372 	trace_insert_eval_map(mod, mod->trace_evals, mod->num_trace_evals);
10373 }
10374 
10375 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
10376 static void trace_module_remove_evals(struct module *mod)
10377 {
10378 	union trace_eval_map_item *map;
10379 	union trace_eval_map_item **last = &trace_eval_maps;
10380 
10381 	if (!mod->num_trace_evals)
10382 		return;
10383 
10384 	guard(mutex)(&trace_eval_mutex);
10385 
10386 	map = trace_eval_maps;
10387 
10388 	while (map) {
10389 		if (map->head.mod == mod)
10390 			break;
10391 		map = trace_eval_jmp_to_tail(map);
10392 		last = &map->tail.next;
10393 		map = map->tail.next;
10394 	}
10395 	if (!map)
10396 		return;
10397 
10398 	*last = trace_eval_jmp_to_tail(map)->tail.next;
10399 	kfree(map);
10400 }
10401 #else
10402 static inline void trace_module_remove_evals(struct module *mod) { }
10403 #endif /* CONFIG_TRACE_EVAL_MAP_FILE */
10404 
10405 static void trace_module_record(struct module *mod, bool add)
10406 {
10407 	struct trace_array *tr;
10408 	unsigned long flags;
10409 
10410 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
10411 		flags = tr->flags & (TRACE_ARRAY_FL_BOOT | TRACE_ARRAY_FL_LAST_BOOT);
10412 		/* Update any persistent trace array that has already been started */
10413 		if (flags == TRACE_ARRAY_FL_BOOT && add) {
10414 			guard(mutex)(&scratch_mutex);
10415 			save_mod(mod, tr);
10416 		} else if (flags & TRACE_ARRAY_FL_LAST_BOOT) {
10417 			/* Update delta if the module loaded in previous boot */
10418 			make_mod_delta(mod, tr);
10419 		}
10420 	}
10421 }
10422 
10423 static int trace_module_notify(struct notifier_block *self,
10424 			       unsigned long val, void *data)
10425 {
10426 	struct module *mod = data;
10427 
10428 	switch (val) {
10429 	case MODULE_STATE_COMING:
10430 		trace_module_add_evals(mod);
10431 		trace_module_record(mod, true);
10432 		break;
10433 	case MODULE_STATE_GOING:
10434 		trace_module_remove_evals(mod);
10435 		trace_module_record(mod, false);
10436 		break;
10437 	}
10438 
10439 	return NOTIFY_OK;
10440 }
10441 
10442 static struct notifier_block trace_module_nb = {
10443 	.notifier_call = trace_module_notify,
10444 	.priority = 0,
10445 };
10446 #endif /* CONFIG_MODULES */
10447 
10448 static __init void tracer_init_tracefs_work_func(struct work_struct *work)
10449 {
10450 
10451 	event_trace_init();
10452 
10453 	init_tracer_tracefs(&global_trace, NULL);
10454 	ftrace_init_tracefs_toplevel(&global_trace, NULL);
10455 
10456 	trace_create_file("tracing_thresh", TRACE_MODE_WRITE, NULL,
10457 			&global_trace, &tracing_thresh_fops);
10458 
10459 	trace_create_file("README", TRACE_MODE_READ, NULL,
10460 			NULL, &tracing_readme_fops);
10461 
10462 	trace_create_file("saved_cmdlines", TRACE_MODE_READ, NULL,
10463 			NULL, &tracing_saved_cmdlines_fops);
10464 
10465 	trace_create_file("saved_cmdlines_size", TRACE_MODE_WRITE, NULL,
10466 			  NULL, &tracing_saved_cmdlines_size_fops);
10467 
10468 	trace_create_file("saved_tgids", TRACE_MODE_READ, NULL,
10469 			NULL, &tracing_saved_tgids_fops);
10470 
10471 	trace_create_eval_file(NULL);
10472 
10473 #ifdef CONFIG_MODULES
10474 	register_module_notifier(&trace_module_nb);
10475 #endif
10476 
10477 #ifdef CONFIG_DYNAMIC_FTRACE
10478 	trace_create_file("dyn_ftrace_total_info", TRACE_MODE_READ, NULL,
10479 			NULL, &tracing_dyn_info_fops);
10480 #endif
10481 
10482 	create_trace_instances(NULL);
10483 
10484 	update_tracer_options(&global_trace);
10485 }
10486 
10487 static __init int tracer_init_tracefs(void)
10488 {
10489 	int ret;
10490 
10491 	trace_access_lock_init();
10492 
10493 	ret = tracing_init_dentry();
10494 	if (ret)
10495 		return 0;
10496 
10497 	if (eval_map_wq) {
10498 		INIT_WORK(&tracerfs_init_work, tracer_init_tracefs_work_func);
10499 		queue_work(eval_map_wq, &tracerfs_init_work);
10500 	} else {
10501 		tracer_init_tracefs_work_func(NULL);
10502 	}
10503 
10504 	rv_init_interface();
10505 
10506 	return 0;
10507 }
10508 
10509 fs_initcall(tracer_init_tracefs);
10510 
10511 static int trace_die_panic_handler(struct notifier_block *self,
10512 				unsigned long ev, void *unused);
10513 
10514 static struct notifier_block trace_panic_notifier = {
10515 	.notifier_call = trace_die_panic_handler,
10516 	.priority = INT_MAX - 1,
10517 };
10518 
10519 static struct notifier_block trace_die_notifier = {
10520 	.notifier_call = trace_die_panic_handler,
10521 	.priority = INT_MAX - 1,
10522 };
10523 
10524 /*
10525  * The idea is to execute the following die/panic callback early, in order
10526  * to avoid showing irrelevant information in the trace (like other panic
10527  * notifier functions); we are the 2nd to run, after hung_task/rcu_stall
10528  * warnings get disabled (to prevent potential log flooding).
10529  */
10530 static int trace_die_panic_handler(struct notifier_block *self,
10531 				unsigned long ev, void *unused)
10532 {
10533 	if (!ftrace_dump_on_oops_enabled())
10534 		return NOTIFY_DONE;
10535 
10536 	/* The die notifier requires DIE_OOPS to trigger */
10537 	if (self == &trace_die_notifier && ev != DIE_OOPS)
10538 		return NOTIFY_DONE;
10539 
10540 	ftrace_dump(DUMP_PARAM);
10541 
10542 	return NOTIFY_DONE;
10543 }
10544 
10545 /*
10546  * printk is set to max of 1024, we really don't need it that big.
10547  * Nothing should be printing 1000 characters anyway.
10548  */
10549 #define TRACE_MAX_PRINT		1000
10550 
10551 /*
10552  * Define here KERN_TRACE so that we have one place to modify
10553  * it if we decide to change what log level the ftrace dump
10554  * should be at.
10555  */
10556 #define KERN_TRACE		KERN_EMERG
10557 
10558 void
10559 trace_printk_seq(struct trace_seq *s)
10560 {
10561 	/* Probably should print a warning here. */
10562 	if (s->seq.len >= TRACE_MAX_PRINT)
10563 		s->seq.len = TRACE_MAX_PRINT;
10564 
10565 	/*
10566 	 * More paranoid code. Although the buffer size is set to
10567 	 * PAGE_SIZE, and TRACE_MAX_PRINT is 1000, this is just
10568 	 * an extra layer of protection.
10569 	 */
10570 	if (WARN_ON_ONCE(s->seq.len >= s->seq.size))
10571 		s->seq.len = s->seq.size - 1;
10572 
10573 	/* should be zero ended, but we are paranoid. */
10574 	s->buffer[s->seq.len] = 0;
10575 
10576 	printk(KERN_TRACE "%s", s->buffer);
10577 
10578 	trace_seq_init(s);
10579 }
10580 
10581 static void trace_init_iter(struct trace_iterator *iter, struct trace_array *tr)
10582 {
10583 	iter->tr = tr;
10584 	iter->trace = iter->tr->current_trace;
10585 	iter->cpu_file = RING_BUFFER_ALL_CPUS;
10586 	iter->array_buffer = &tr->array_buffer;
10587 
10588 	if (iter->trace && iter->trace->open)
10589 		iter->trace->open(iter);
10590 
10591 	/* Annotate start of buffers if we had overruns */
10592 	if (ring_buffer_overruns(iter->array_buffer->buffer))
10593 		iter->iter_flags |= TRACE_FILE_ANNOTATE;
10594 
10595 	/* Output in nanoseconds only if we are using a clock in nanoseconds. */
10596 	if (trace_clocks[iter->tr->clock_id].in_ns)
10597 		iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
10598 
10599 	/* Can not use kmalloc for iter.temp and iter.fmt */
10600 	iter->temp = static_temp_buf;
10601 	iter->temp_size = STATIC_TEMP_BUF_SIZE;
10602 	iter->fmt = static_fmt_buf;
10603 	iter->fmt_size = STATIC_FMT_BUF_SIZE;
10604 }
10605 
10606 void trace_init_global_iter(struct trace_iterator *iter)
10607 {
10608 	trace_init_iter(iter, &global_trace);
10609 }
10610 
10611 static void ftrace_dump_one(struct trace_array *tr, enum ftrace_dump_mode dump_mode)
10612 {
10613 	/* use static because iter can be a bit big for the stack */
10614 	static struct trace_iterator iter;
10615 	unsigned int old_userobj;
10616 	unsigned long flags;
10617 	int cnt = 0;
10618 
10619 	/*
10620 	 * Always turn off tracing when we dump.
10621 	 * We don't need to show trace output of what happens
10622 	 * between multiple crashes.
10623 	 *
10624 	 * If the user does a sysrq-z, then they can re-enable
10625 	 * tracing with echo 1 > tracing_on.
10626 	 */
10627 	tracer_tracing_off(tr);
10628 
10629 	local_irq_save(flags);
10630 
10631 	/* Simulate the iterator */
10632 	trace_init_iter(&iter, tr);
10633 
10634 	/* While dumping, do not allow the buffer to be enable */
10635 	tracer_tracing_disable(tr);
10636 
10637 	old_userobj = tr->trace_flags & TRACE_ITER_SYM_USEROBJ;
10638 
10639 	/* don't look at user memory in panic mode */
10640 	tr->trace_flags &= ~TRACE_ITER_SYM_USEROBJ;
10641 
10642 	if (dump_mode == DUMP_ORIG)
10643 		iter.cpu_file = raw_smp_processor_id();
10644 	else
10645 		iter.cpu_file = RING_BUFFER_ALL_CPUS;
10646 
10647 	if (tr == &global_trace)
10648 		printk(KERN_TRACE "Dumping ftrace buffer:\n");
10649 	else
10650 		printk(KERN_TRACE "Dumping ftrace instance %s buffer:\n", tr->name);
10651 
10652 	/* Did function tracer already get disabled? */
10653 	if (ftrace_is_dead()) {
10654 		printk("# WARNING: FUNCTION TRACING IS CORRUPTED\n");
10655 		printk("#          MAY BE MISSING FUNCTION EVENTS\n");
10656 	}
10657 
10658 	/*
10659 	 * We need to stop all tracing on all CPUS to read
10660 	 * the next buffer. This is a bit expensive, but is
10661 	 * not done often. We fill all what we can read,
10662 	 * and then release the locks again.
10663 	 */
10664 
10665 	while (!trace_empty(&iter)) {
10666 
10667 		if (!cnt)
10668 			printk(KERN_TRACE "---------------------------------\n");
10669 
10670 		cnt++;
10671 
10672 		trace_iterator_reset(&iter);
10673 		iter.iter_flags |= TRACE_FILE_LAT_FMT;
10674 
10675 		if (trace_find_next_entry_inc(&iter) != NULL) {
10676 			int ret;
10677 
10678 			ret = print_trace_line(&iter);
10679 			if (ret != TRACE_TYPE_NO_CONSUME)
10680 				trace_consume(&iter);
10681 		}
10682 		touch_nmi_watchdog();
10683 
10684 		trace_printk_seq(&iter.seq);
10685 	}
10686 
10687 	if (!cnt)
10688 		printk(KERN_TRACE "   (ftrace buffer empty)\n");
10689 	else
10690 		printk(KERN_TRACE "---------------------------------\n");
10691 
10692 	tr->trace_flags |= old_userobj;
10693 
10694 	tracer_tracing_enable(tr);
10695 	local_irq_restore(flags);
10696 }
10697 
10698 static void ftrace_dump_by_param(void)
10699 {
10700 	bool first_param = true;
10701 	char dump_param[MAX_TRACER_SIZE];
10702 	char *buf, *token, *inst_name;
10703 	struct trace_array *tr;
10704 
10705 	strscpy(dump_param, ftrace_dump_on_oops, MAX_TRACER_SIZE);
10706 	buf = dump_param;
10707 
10708 	while ((token = strsep(&buf, ",")) != NULL) {
10709 		if (first_param) {
10710 			first_param = false;
10711 			if (!strcmp("0", token))
10712 				continue;
10713 			else if (!strcmp("1", token)) {
10714 				ftrace_dump_one(&global_trace, DUMP_ALL);
10715 				continue;
10716 			}
10717 			else if (!strcmp("2", token) ||
10718 			  !strcmp("orig_cpu", token)) {
10719 				ftrace_dump_one(&global_trace, DUMP_ORIG);
10720 				continue;
10721 			}
10722 		}
10723 
10724 		inst_name = strsep(&token, "=");
10725 		tr = trace_array_find(inst_name);
10726 		if (!tr) {
10727 			printk(KERN_TRACE "Instance %s not found\n", inst_name);
10728 			continue;
10729 		}
10730 
10731 		if (token && (!strcmp("2", token) ||
10732 			  !strcmp("orig_cpu", token)))
10733 			ftrace_dump_one(tr, DUMP_ORIG);
10734 		else
10735 			ftrace_dump_one(tr, DUMP_ALL);
10736 	}
10737 }
10738 
10739 void ftrace_dump(enum ftrace_dump_mode oops_dump_mode)
10740 {
10741 	static atomic_t dump_running;
10742 
10743 	/* Only allow one dump user at a time. */
10744 	if (atomic_inc_return(&dump_running) != 1) {
10745 		atomic_dec(&dump_running);
10746 		return;
10747 	}
10748 
10749 	switch (oops_dump_mode) {
10750 	case DUMP_ALL:
10751 		ftrace_dump_one(&global_trace, DUMP_ALL);
10752 		break;
10753 	case DUMP_ORIG:
10754 		ftrace_dump_one(&global_trace, DUMP_ORIG);
10755 		break;
10756 	case DUMP_PARAM:
10757 		ftrace_dump_by_param();
10758 		break;
10759 	case DUMP_NONE:
10760 		break;
10761 	default:
10762 		printk(KERN_TRACE "Bad dumping mode, switching to all CPUs dump\n");
10763 		ftrace_dump_one(&global_trace, DUMP_ALL);
10764 	}
10765 
10766 	atomic_dec(&dump_running);
10767 }
10768 EXPORT_SYMBOL_GPL(ftrace_dump);
10769 
10770 #define WRITE_BUFSIZE  4096
10771 
10772 ssize_t trace_parse_run_command(struct file *file, const char __user *buffer,
10773 				size_t count, loff_t *ppos,
10774 				int (*createfn)(const char *))
10775 {
10776 	char *kbuf, *buf, *tmp;
10777 	int ret = 0;
10778 	size_t done = 0;
10779 	size_t size;
10780 
10781 	kbuf = kmalloc(WRITE_BUFSIZE, GFP_KERNEL);
10782 	if (!kbuf)
10783 		return -ENOMEM;
10784 
10785 	while (done < count) {
10786 		size = count - done;
10787 
10788 		if (size >= WRITE_BUFSIZE)
10789 			size = WRITE_BUFSIZE - 1;
10790 
10791 		if (copy_from_user(kbuf, buffer + done, size)) {
10792 			ret = -EFAULT;
10793 			goto out;
10794 		}
10795 		kbuf[size] = '\0';
10796 		buf = kbuf;
10797 		do {
10798 			tmp = strchr(buf, '\n');
10799 			if (tmp) {
10800 				*tmp = '\0';
10801 				size = tmp - buf + 1;
10802 			} else {
10803 				size = strlen(buf);
10804 				if (done + size < count) {
10805 					if (buf != kbuf)
10806 						break;
10807 					/* This can accept WRITE_BUFSIZE - 2 ('\n' + '\0') */
10808 					pr_warn("Line length is too long: Should be less than %d\n",
10809 						WRITE_BUFSIZE - 2);
10810 					ret = -EINVAL;
10811 					goto out;
10812 				}
10813 			}
10814 			done += size;
10815 
10816 			/* Remove comments */
10817 			tmp = strchr(buf, '#');
10818 
10819 			if (tmp)
10820 				*tmp = '\0';
10821 
10822 			ret = createfn(buf);
10823 			if (ret)
10824 				goto out;
10825 			buf += size;
10826 
10827 		} while (done < count);
10828 	}
10829 	ret = done;
10830 
10831 out:
10832 	kfree(kbuf);
10833 
10834 	return ret;
10835 }
10836 
10837 #ifdef CONFIG_TRACER_MAX_TRACE
10838 __init static bool tr_needs_alloc_snapshot(const char *name)
10839 {
10840 	char *test;
10841 	int len = strlen(name);
10842 	bool ret;
10843 
10844 	if (!boot_snapshot_index)
10845 		return false;
10846 
10847 	if (strncmp(name, boot_snapshot_info, len) == 0 &&
10848 	    boot_snapshot_info[len] == '\t')
10849 		return true;
10850 
10851 	test = kmalloc(strlen(name) + 3, GFP_KERNEL);
10852 	if (!test)
10853 		return false;
10854 
10855 	sprintf(test, "\t%s\t", name);
10856 	ret = strstr(boot_snapshot_info, test) == NULL;
10857 	kfree(test);
10858 	return ret;
10859 }
10860 
10861 __init static void do_allocate_snapshot(const char *name)
10862 {
10863 	if (!tr_needs_alloc_snapshot(name))
10864 		return;
10865 
10866 	/*
10867 	 * When allocate_snapshot is set, the next call to
10868 	 * allocate_trace_buffers() (called by trace_array_get_by_name())
10869 	 * will allocate the snapshot buffer. That will alse clear
10870 	 * this flag.
10871 	 */
10872 	allocate_snapshot = true;
10873 }
10874 #else
10875 static inline void do_allocate_snapshot(const char *name) { }
10876 #endif
10877 
10878 __init static void enable_instances(void)
10879 {
10880 	struct trace_array *tr;
10881 	bool memmap_area = false;
10882 	char *curr_str;
10883 	char *name;
10884 	char *str;
10885 	char *tok;
10886 
10887 	/* A tab is always appended */
10888 	boot_instance_info[boot_instance_index - 1] = '\0';
10889 	str = boot_instance_info;
10890 
10891 	while ((curr_str = strsep(&str, "\t"))) {
10892 		phys_addr_t start = 0;
10893 		phys_addr_t size = 0;
10894 		unsigned long addr = 0;
10895 		bool traceprintk = false;
10896 		bool traceoff = false;
10897 		char *flag_delim;
10898 		char *addr_delim;
10899 		char *rname __free(kfree) = NULL;
10900 
10901 		tok = strsep(&curr_str, ",");
10902 
10903 		flag_delim = strchr(tok, '^');
10904 		addr_delim = strchr(tok, '@');
10905 
10906 		if (addr_delim)
10907 			*addr_delim++ = '\0';
10908 
10909 		if (flag_delim)
10910 			*flag_delim++ = '\0';
10911 
10912 		name = tok;
10913 
10914 		if (flag_delim) {
10915 			char *flag;
10916 
10917 			while ((flag = strsep(&flag_delim, "^"))) {
10918 				if (strcmp(flag, "traceoff") == 0) {
10919 					traceoff = true;
10920 				} else if ((strcmp(flag, "printk") == 0) ||
10921 					   (strcmp(flag, "traceprintk") == 0) ||
10922 					   (strcmp(flag, "trace_printk") == 0)) {
10923 					traceprintk = true;
10924 				} else {
10925 					pr_info("Tracing: Invalid instance flag '%s' for %s\n",
10926 						flag, name);
10927 				}
10928 			}
10929 		}
10930 
10931 		tok = addr_delim;
10932 		if (tok && isdigit(*tok)) {
10933 			start = memparse(tok, &tok);
10934 			if (!start) {
10935 				pr_warn("Tracing: Invalid boot instance address for %s\n",
10936 					name);
10937 				continue;
10938 			}
10939 			if (*tok != ':') {
10940 				pr_warn("Tracing: No size specified for instance %s\n", name);
10941 				continue;
10942 			}
10943 			tok++;
10944 			size = memparse(tok, &tok);
10945 			if (!size) {
10946 				pr_warn("Tracing: Invalid boot instance size for %s\n",
10947 					name);
10948 				continue;
10949 			}
10950 			memmap_area = true;
10951 		} else if (tok) {
10952 			if (!reserve_mem_find_by_name(tok, &start, &size)) {
10953 				start = 0;
10954 				pr_warn("Failed to map boot instance %s to %s\n", name, tok);
10955 				continue;
10956 			}
10957 			rname = kstrdup(tok, GFP_KERNEL);
10958 		}
10959 
10960 		if (start) {
10961 			/* Start and size must be page aligned */
10962 			if (start & ~PAGE_MASK) {
10963 				pr_warn("Tracing: mapping start addr %pa is not page aligned\n", &start);
10964 				continue;
10965 			}
10966 			if (size & ~PAGE_MASK) {
10967 				pr_warn("Tracing: mapping size %pa is not page aligned\n", &size);
10968 				continue;
10969 			}
10970 
10971 			if (memmap_area)
10972 				addr = map_pages(start, size);
10973 			else
10974 				addr = (unsigned long)phys_to_virt(start);
10975 			if (addr) {
10976 				pr_info("Tracing: mapped boot instance %s at physical memory %pa of size 0x%lx\n",
10977 					name, &start, (unsigned long)size);
10978 			} else {
10979 				pr_warn("Tracing: Failed to map boot instance %s\n", name);
10980 				continue;
10981 			}
10982 		} else {
10983 			/* Only non mapped buffers have snapshot buffers */
10984 			if (IS_ENABLED(CONFIG_TRACER_MAX_TRACE))
10985 				do_allocate_snapshot(name);
10986 		}
10987 
10988 		tr = trace_array_create_systems(name, NULL, addr, size);
10989 		if (IS_ERR(tr)) {
10990 			pr_warn("Tracing: Failed to create instance buffer %s\n", curr_str);
10991 			continue;
10992 		}
10993 
10994 		if (traceoff)
10995 			tracer_tracing_off(tr);
10996 
10997 		if (traceprintk)
10998 			update_printk_trace(tr);
10999 
11000 		/*
11001 		 * memmap'd buffers can not be freed.
11002 		 */
11003 		if (memmap_area) {
11004 			tr->flags |= TRACE_ARRAY_FL_MEMMAP;
11005 			tr->ref++;
11006 		}
11007 
11008 		if (start) {
11009 			tr->flags |= TRACE_ARRAY_FL_BOOT | TRACE_ARRAY_FL_LAST_BOOT;
11010 			tr->range_name = no_free_ptr(rname);
11011 		}
11012 
11013 		while ((tok = strsep(&curr_str, ","))) {
11014 			early_enable_events(tr, tok, true);
11015 		}
11016 	}
11017 }
11018 
11019 __init static int tracer_alloc_buffers(void)
11020 {
11021 	int ring_buf_size;
11022 	int ret = -ENOMEM;
11023 
11024 
11025 	if (security_locked_down(LOCKDOWN_TRACEFS)) {
11026 		pr_warn("Tracing disabled due to lockdown\n");
11027 		return -EPERM;
11028 	}
11029 
11030 	/*
11031 	 * Make sure we don't accidentally add more trace options
11032 	 * than we have bits for.
11033 	 */
11034 	BUILD_BUG_ON(TRACE_ITER_LAST_BIT > TRACE_FLAGS_MAX_SIZE);
11035 
11036 	if (!alloc_cpumask_var(&tracing_buffer_mask, GFP_KERNEL))
11037 		goto out;
11038 
11039 	if (!alloc_cpumask_var(&global_trace.tracing_cpumask, GFP_KERNEL))
11040 		goto out_free_buffer_mask;
11041 
11042 	/* Only allocate trace_printk buffers if a trace_printk exists */
11043 	if (&__stop___trace_bprintk_fmt != &__start___trace_bprintk_fmt)
11044 		/* Must be called before global_trace.buffer is allocated */
11045 		trace_printk_init_buffers();
11046 
11047 	/* To save memory, keep the ring buffer size to its minimum */
11048 	if (global_trace.ring_buffer_expanded)
11049 		ring_buf_size = trace_buf_size;
11050 	else
11051 		ring_buf_size = 1;
11052 
11053 	cpumask_copy(tracing_buffer_mask, cpu_possible_mask);
11054 	cpumask_copy(global_trace.tracing_cpumask, cpu_all_mask);
11055 
11056 	raw_spin_lock_init(&global_trace.start_lock);
11057 
11058 	/*
11059 	 * The prepare callbacks allocates some memory for the ring buffer. We
11060 	 * don't free the buffer if the CPU goes down. If we were to free
11061 	 * the buffer, then the user would lose any trace that was in the
11062 	 * buffer. The memory will be removed once the "instance" is removed.
11063 	 */
11064 	ret = cpuhp_setup_state_multi(CPUHP_TRACE_RB_PREPARE,
11065 				      "trace/RB:prepare", trace_rb_cpu_prepare,
11066 				      NULL);
11067 	if (ret < 0)
11068 		goto out_free_cpumask;
11069 	/* Used for event triggers */
11070 	ret = -ENOMEM;
11071 	temp_buffer = ring_buffer_alloc(PAGE_SIZE, RB_FL_OVERWRITE);
11072 	if (!temp_buffer)
11073 		goto out_rm_hp_state;
11074 
11075 	if (trace_create_savedcmd() < 0)
11076 		goto out_free_temp_buffer;
11077 
11078 	if (!zalloc_cpumask_var(&global_trace.pipe_cpumask, GFP_KERNEL))
11079 		goto out_free_savedcmd;
11080 
11081 	/* TODO: make the number of buffers hot pluggable with CPUS */
11082 	if (allocate_trace_buffers(&global_trace, ring_buf_size) < 0) {
11083 		MEM_FAIL(1, "tracer: failed to allocate ring buffer!\n");
11084 		goto out_free_pipe_cpumask;
11085 	}
11086 	if (global_trace.buffer_disabled)
11087 		tracing_off();
11088 
11089 	if (trace_boot_clock) {
11090 		ret = tracing_set_clock(&global_trace, trace_boot_clock);
11091 		if (ret < 0)
11092 			pr_warn("Trace clock %s not defined, going back to default\n",
11093 				trace_boot_clock);
11094 	}
11095 
11096 	/*
11097 	 * register_tracer() might reference current_trace, so it
11098 	 * needs to be set before we register anything. This is
11099 	 * just a bootstrap of current_trace anyway.
11100 	 */
11101 	global_trace.current_trace = &nop_trace;
11102 
11103 	global_trace.max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
11104 #ifdef CONFIG_TRACER_MAX_TRACE
11105 	spin_lock_init(&global_trace.snapshot_trigger_lock);
11106 #endif
11107 	ftrace_init_global_array_ops(&global_trace);
11108 
11109 #ifdef CONFIG_MODULES
11110 	INIT_LIST_HEAD(&global_trace.mod_events);
11111 #endif
11112 
11113 	init_trace_flags_index(&global_trace);
11114 
11115 	register_tracer(&nop_trace);
11116 
11117 	/* Function tracing may start here (via kernel command line) */
11118 	init_function_trace();
11119 
11120 	/* All seems OK, enable tracing */
11121 	tracing_disabled = 0;
11122 
11123 	atomic_notifier_chain_register(&panic_notifier_list,
11124 				       &trace_panic_notifier);
11125 
11126 	register_die_notifier(&trace_die_notifier);
11127 
11128 	global_trace.flags = TRACE_ARRAY_FL_GLOBAL;
11129 
11130 	INIT_LIST_HEAD(&global_trace.systems);
11131 	INIT_LIST_HEAD(&global_trace.events);
11132 	INIT_LIST_HEAD(&global_trace.hist_vars);
11133 	INIT_LIST_HEAD(&global_trace.err_log);
11134 	list_add(&global_trace.marker_list, &marker_copies);
11135 	list_add(&global_trace.list, &ftrace_trace_arrays);
11136 
11137 	apply_trace_boot_options();
11138 
11139 	register_snapshot_cmd();
11140 
11141 	return 0;
11142 
11143 out_free_pipe_cpumask:
11144 	free_cpumask_var(global_trace.pipe_cpumask);
11145 out_free_savedcmd:
11146 	trace_free_saved_cmdlines_buffer();
11147 out_free_temp_buffer:
11148 	ring_buffer_free(temp_buffer);
11149 out_rm_hp_state:
11150 	cpuhp_remove_multi_state(CPUHP_TRACE_RB_PREPARE);
11151 out_free_cpumask:
11152 	free_cpumask_var(global_trace.tracing_cpumask);
11153 out_free_buffer_mask:
11154 	free_cpumask_var(tracing_buffer_mask);
11155 out:
11156 	return ret;
11157 }
11158 
11159 #ifdef CONFIG_FUNCTION_TRACER
11160 /* Used to set module cached ftrace filtering at boot up */
11161 __init struct trace_array *trace_get_global_array(void)
11162 {
11163 	return &global_trace;
11164 }
11165 #endif
11166 
11167 void __init ftrace_boot_snapshot(void)
11168 {
11169 #ifdef CONFIG_TRACER_MAX_TRACE
11170 	struct trace_array *tr;
11171 
11172 	if (!snapshot_at_boot)
11173 		return;
11174 
11175 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
11176 		if (!tr->allocated_snapshot)
11177 			continue;
11178 
11179 		tracing_snapshot_instance(tr);
11180 		trace_array_puts(tr, "** Boot snapshot taken **\n");
11181 	}
11182 #endif
11183 }
11184 
11185 void __init early_trace_init(void)
11186 {
11187 	if (tracepoint_printk) {
11188 		tracepoint_print_iter =
11189 			kzalloc(sizeof(*tracepoint_print_iter), GFP_KERNEL);
11190 		if (MEM_FAIL(!tracepoint_print_iter,
11191 			     "Failed to allocate trace iterator\n"))
11192 			tracepoint_printk = 0;
11193 		else
11194 			static_key_enable(&tracepoint_printk_key.key);
11195 	}
11196 	tracer_alloc_buffers();
11197 
11198 	init_events();
11199 }
11200 
11201 void __init trace_init(void)
11202 {
11203 	trace_event_init();
11204 
11205 	if (boot_instance_index)
11206 		enable_instances();
11207 }
11208 
11209 __init static void clear_boot_tracer(void)
11210 {
11211 	/*
11212 	 * The default tracer at boot buffer is an init section.
11213 	 * This function is called in lateinit. If we did not
11214 	 * find the boot tracer, then clear it out, to prevent
11215 	 * later registration from accessing the buffer that is
11216 	 * about to be freed.
11217 	 */
11218 	if (!default_bootup_tracer)
11219 		return;
11220 
11221 	printk(KERN_INFO "ftrace bootup tracer '%s' not registered.\n",
11222 	       default_bootup_tracer);
11223 	default_bootup_tracer = NULL;
11224 }
11225 
11226 #ifdef CONFIG_HAVE_UNSTABLE_SCHED_CLOCK
11227 __init static void tracing_set_default_clock(void)
11228 {
11229 	/* sched_clock_stable() is determined in late_initcall */
11230 	if (!trace_boot_clock && !sched_clock_stable()) {
11231 		if (security_locked_down(LOCKDOWN_TRACEFS)) {
11232 			pr_warn("Can not set tracing clock due to lockdown\n");
11233 			return;
11234 		}
11235 
11236 		printk(KERN_WARNING
11237 		       "Unstable clock detected, switching default tracing clock to \"global\"\n"
11238 		       "If you want to keep using the local clock, then add:\n"
11239 		       "  \"trace_clock=local\"\n"
11240 		       "on the kernel command line\n");
11241 		tracing_set_clock(&global_trace, "global");
11242 	}
11243 }
11244 #else
11245 static inline void tracing_set_default_clock(void) { }
11246 #endif
11247 
11248 __init static int late_trace_init(void)
11249 {
11250 	if (tracepoint_printk && tracepoint_printk_stop_on_boot) {
11251 		static_key_disable(&tracepoint_printk_key.key);
11252 		tracepoint_printk = 0;
11253 	}
11254 
11255 	if (traceoff_after_boot)
11256 		tracing_off();
11257 
11258 	tracing_set_default_clock();
11259 	clear_boot_tracer();
11260 	return 0;
11261 }
11262 
11263 late_initcall_sync(late_trace_init);
11264