xref: /linux/kernel/trace/trace.c (revision 40840afa53bed05b990b201d749dfee3bd6e7e42)
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * ring buffer based function tracer
4  *
5  * Copyright (C) 2007-2012 Steven Rostedt <srostedt@redhat.com>
6  * Copyright (C) 2008 Ingo Molnar <mingo@redhat.com>
7  *
8  * Originally taken from the RT patch by:
9  *    Arnaldo Carvalho de Melo <acme@redhat.com>
10  *
11  * Based on code from the latency_tracer, that is:
12  *  Copyright (C) 2004-2006 Ingo Molnar
13  *  Copyright (C) 2004 Nadia Yvette Chambers
14  */
15 #include <linux/ring_buffer.h>
16 #include <linux/utsname.h>
17 #include <linux/stacktrace.h>
18 #include <linux/writeback.h>
19 #include <linux/kallsyms.h>
20 #include <linux/security.h>
21 #include <linux/seq_file.h>
22 #include <linux/irqflags.h>
23 #include <linux/debugfs.h>
24 #include <linux/tracefs.h>
25 #include <linux/pagemap.h>
26 #include <linux/hardirq.h>
27 #include <linux/linkage.h>
28 #include <linux/uaccess.h>
29 #include <linux/cleanup.h>
30 #include <linux/vmalloc.h>
31 #include <linux/ftrace.h>
32 #include <linux/module.h>
33 #include <linux/percpu.h>
34 #include <linux/splice.h>
35 #include <linux/kdebug.h>
36 #include <linux/string.h>
37 #include <linux/mount.h>
38 #include <linux/rwsem.h>
39 #include <linux/slab.h>
40 #include <linux/ctype.h>
41 #include <linux/init.h>
42 #include <linux/panic_notifier.h>
43 #include <linux/poll.h>
44 #include <linux/nmi.h>
45 #include <linux/fs.h>
46 #include <linux/trace.h>
47 #include <linux/sched/clock.h>
48 #include <linux/sched/rt.h>
49 #include <linux/fsnotify.h>
50 #include <linux/irq_work.h>
51 #include <linux/workqueue.h>
52 #include <linux/sort.h>
53 #include <linux/io.h> /* vmap_page_range() */
54 
55 #include <asm/setup.h> /* COMMAND_LINE_SIZE */
56 
57 #include "trace.h"
58 #include "trace_output.h"
59 
60 #ifdef CONFIG_FTRACE_STARTUP_TEST
61 /*
62  * We need to change this state when a selftest is running.
63  * A selftest will lurk into the ring-buffer to count the
64  * entries inserted during the selftest although some concurrent
65  * insertions into the ring-buffer such as trace_printk could occurred
66  * at the same time, giving false positive or negative results.
67  */
68 static bool __read_mostly tracing_selftest_running;
69 
70 /*
71  * If boot-time tracing including tracers/events via kernel cmdline
72  * is running, we do not want to run SELFTEST.
73  */
74 bool __read_mostly tracing_selftest_disabled;
75 
76 void __init disable_tracing_selftest(const char *reason)
77 {
78 	if (!tracing_selftest_disabled) {
79 		tracing_selftest_disabled = true;
80 		pr_info("Ftrace startup test is disabled due to %s\n", reason);
81 	}
82 }
83 #else
84 #define tracing_selftest_running	0
85 #define tracing_selftest_disabled	0
86 #endif
87 
88 /* Pipe tracepoints to printk */
89 static struct trace_iterator *tracepoint_print_iter;
90 int tracepoint_printk;
91 static bool tracepoint_printk_stop_on_boot __initdata;
92 static bool traceoff_after_boot __initdata;
93 static DEFINE_STATIC_KEY_FALSE(tracepoint_printk_key);
94 
95 /* For tracers that don't implement custom flags */
96 static struct tracer_opt dummy_tracer_opt[] = {
97 	{ }
98 };
99 
100 static int
101 dummy_set_flag(struct trace_array *tr, u32 old_flags, u32 bit, int set)
102 {
103 	return 0;
104 }
105 
106 /*
107  * To prevent the comm cache from being overwritten when no
108  * tracing is active, only save the comm when a trace event
109  * occurred.
110  */
111 DEFINE_PER_CPU(bool, trace_taskinfo_save);
112 
113 /*
114  * Kill all tracing for good (never come back).
115  * It is initialized to 1 but will turn to zero if the initialization
116  * of the tracer is successful. But that is the only place that sets
117  * this back to zero.
118  */
119 static int tracing_disabled = 1;
120 
121 cpumask_var_t __read_mostly	tracing_buffer_mask;
122 
123 #define MAX_TRACER_SIZE		100
124 /*
125  * ftrace_dump_on_oops - variable to dump ftrace buffer on oops
126  *
127  * If there is an oops (or kernel panic) and the ftrace_dump_on_oops
128  * is set, then ftrace_dump is called. This will output the contents
129  * of the ftrace buffers to the console.  This is very useful for
130  * capturing traces that lead to crashes and outputing it to a
131  * serial console.
132  *
133  * It is default off, but you can enable it with either specifying
134  * "ftrace_dump_on_oops" in the kernel command line, or setting
135  * /proc/sys/kernel/ftrace_dump_on_oops
136  * Set 1 if you want to dump buffers of all CPUs
137  * Set 2 if you want to dump the buffer of the CPU that triggered oops
138  * Set instance name if you want to dump the specific trace instance
139  * Multiple instance dump is also supported, and instances are seperated
140  * by commas.
141  */
142 /* Set to string format zero to disable by default */
143 char ftrace_dump_on_oops[MAX_TRACER_SIZE] = "0";
144 
145 /* When set, tracing will stop when a WARN*() is hit */
146 static int __disable_trace_on_warning;
147 
148 int tracepoint_printk_sysctl(const struct ctl_table *table, int write,
149 			     void *buffer, size_t *lenp, loff_t *ppos);
150 static const struct ctl_table trace_sysctl_table[] = {
151 	{
152 		.procname	= "ftrace_dump_on_oops",
153 		.data		= &ftrace_dump_on_oops,
154 		.maxlen		= MAX_TRACER_SIZE,
155 		.mode		= 0644,
156 		.proc_handler	= proc_dostring,
157 	},
158 	{
159 		.procname	= "traceoff_on_warning",
160 		.data		= &__disable_trace_on_warning,
161 		.maxlen		= sizeof(__disable_trace_on_warning),
162 		.mode		= 0644,
163 		.proc_handler	= proc_dointvec,
164 	},
165 	{
166 		.procname	= "tracepoint_printk",
167 		.data		= &tracepoint_printk,
168 		.maxlen		= sizeof(tracepoint_printk),
169 		.mode		= 0644,
170 		.proc_handler	= tracepoint_printk_sysctl,
171 	},
172 };
173 
174 static int __init init_trace_sysctls(void)
175 {
176 	register_sysctl_init("kernel", trace_sysctl_table);
177 	return 0;
178 }
179 subsys_initcall(init_trace_sysctls);
180 
181 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
182 /* Map of enums to their values, for "eval_map" file */
183 struct trace_eval_map_head {
184 	struct module			*mod;
185 	unsigned long			length;
186 };
187 
188 union trace_eval_map_item;
189 
190 struct trace_eval_map_tail {
191 	/*
192 	 * "end" is first and points to NULL as it must be different
193 	 * than "mod" or "eval_string"
194 	 */
195 	union trace_eval_map_item	*next;
196 	const char			*end;	/* points to NULL */
197 };
198 
199 static DEFINE_MUTEX(trace_eval_mutex);
200 
201 /*
202  * The trace_eval_maps are saved in an array with two extra elements,
203  * one at the beginning, and one at the end. The beginning item contains
204  * the count of the saved maps (head.length), and the module they
205  * belong to if not built in (head.mod). The ending item contains a
206  * pointer to the next array of saved eval_map items.
207  */
208 union trace_eval_map_item {
209 	struct trace_eval_map		map;
210 	struct trace_eval_map_head	head;
211 	struct trace_eval_map_tail	tail;
212 };
213 
214 static union trace_eval_map_item *trace_eval_maps;
215 #endif /* CONFIG_TRACE_EVAL_MAP_FILE */
216 
217 int tracing_set_tracer(struct trace_array *tr, const char *buf);
218 static void ftrace_trace_userstack(struct trace_array *tr,
219 				   struct trace_buffer *buffer,
220 				   unsigned int trace_ctx);
221 
222 static char bootup_tracer_buf[MAX_TRACER_SIZE] __initdata;
223 static char *default_bootup_tracer;
224 
225 static bool allocate_snapshot;
226 static bool snapshot_at_boot;
227 
228 static char boot_instance_info[COMMAND_LINE_SIZE] __initdata;
229 static int boot_instance_index;
230 
231 static char boot_snapshot_info[COMMAND_LINE_SIZE] __initdata;
232 static int boot_snapshot_index;
233 
234 static int __init set_cmdline_ftrace(char *str)
235 {
236 	strscpy(bootup_tracer_buf, str, MAX_TRACER_SIZE);
237 	default_bootup_tracer = bootup_tracer_buf;
238 	/* We are using ftrace early, expand it */
239 	trace_set_ring_buffer_expanded(NULL);
240 	return 1;
241 }
242 __setup("ftrace=", set_cmdline_ftrace);
243 
244 int ftrace_dump_on_oops_enabled(void)
245 {
246 	if (!strcmp("0", ftrace_dump_on_oops))
247 		return 0;
248 	else
249 		return 1;
250 }
251 
252 static int __init set_ftrace_dump_on_oops(char *str)
253 {
254 	if (!*str) {
255 		strscpy(ftrace_dump_on_oops, "1", MAX_TRACER_SIZE);
256 		return 1;
257 	}
258 
259 	if (*str == ',') {
260 		strscpy(ftrace_dump_on_oops, "1", MAX_TRACER_SIZE);
261 		strscpy(ftrace_dump_on_oops + 1, str, MAX_TRACER_SIZE - 1);
262 		return 1;
263 	}
264 
265 	if (*str++ == '=') {
266 		strscpy(ftrace_dump_on_oops, str, MAX_TRACER_SIZE);
267 		return 1;
268 	}
269 
270 	return 0;
271 }
272 __setup("ftrace_dump_on_oops", set_ftrace_dump_on_oops);
273 
274 static int __init stop_trace_on_warning(char *str)
275 {
276 	if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
277 		__disable_trace_on_warning = 1;
278 	return 1;
279 }
280 __setup("traceoff_on_warning", stop_trace_on_warning);
281 
282 static int __init boot_alloc_snapshot(char *str)
283 {
284 	char *slot = boot_snapshot_info + boot_snapshot_index;
285 	int left = sizeof(boot_snapshot_info) - boot_snapshot_index;
286 	int ret;
287 
288 	if (str[0] == '=') {
289 		str++;
290 		if (strlen(str) >= left)
291 			return -1;
292 
293 		ret = snprintf(slot, left, "%s\t", str);
294 		boot_snapshot_index += ret;
295 	} else {
296 		allocate_snapshot = true;
297 		/* We also need the main ring buffer expanded */
298 		trace_set_ring_buffer_expanded(NULL);
299 	}
300 	return 1;
301 }
302 __setup("alloc_snapshot", boot_alloc_snapshot);
303 
304 
305 static int __init boot_snapshot(char *str)
306 {
307 	snapshot_at_boot = true;
308 	boot_alloc_snapshot(str);
309 	return 1;
310 }
311 __setup("ftrace_boot_snapshot", boot_snapshot);
312 
313 
314 static int __init boot_instance(char *str)
315 {
316 	char *slot = boot_instance_info + boot_instance_index;
317 	int left = sizeof(boot_instance_info) - boot_instance_index;
318 	int ret;
319 
320 	if (strlen(str) >= left)
321 		return -1;
322 
323 	ret = snprintf(slot, left, "%s\t", str);
324 	boot_instance_index += ret;
325 
326 	return 1;
327 }
328 __setup("trace_instance=", boot_instance);
329 
330 
331 static char trace_boot_options_buf[MAX_TRACER_SIZE] __initdata;
332 
333 static int __init set_trace_boot_options(char *str)
334 {
335 	strscpy(trace_boot_options_buf, str, MAX_TRACER_SIZE);
336 	return 1;
337 }
338 __setup("trace_options=", set_trace_boot_options);
339 
340 static char trace_boot_clock_buf[MAX_TRACER_SIZE] __initdata;
341 static char *trace_boot_clock __initdata;
342 
343 static int __init set_trace_boot_clock(char *str)
344 {
345 	strscpy(trace_boot_clock_buf, str, MAX_TRACER_SIZE);
346 	trace_boot_clock = trace_boot_clock_buf;
347 	return 1;
348 }
349 __setup("trace_clock=", set_trace_boot_clock);
350 
351 static int __init set_tracepoint_printk(char *str)
352 {
353 	/* Ignore the "tp_printk_stop_on_boot" param */
354 	if (*str == '_')
355 		return 0;
356 
357 	if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
358 		tracepoint_printk = 1;
359 	return 1;
360 }
361 __setup("tp_printk", set_tracepoint_printk);
362 
363 static int __init set_tracepoint_printk_stop(char *str)
364 {
365 	tracepoint_printk_stop_on_boot = true;
366 	return 1;
367 }
368 __setup("tp_printk_stop_on_boot", set_tracepoint_printk_stop);
369 
370 static int __init set_traceoff_after_boot(char *str)
371 {
372 	traceoff_after_boot = true;
373 	return 1;
374 }
375 __setup("traceoff_after_boot", set_traceoff_after_boot);
376 
377 unsigned long long ns2usecs(u64 nsec)
378 {
379 	nsec += 500;
380 	do_div(nsec, 1000);
381 	return nsec;
382 }
383 
384 static void
385 trace_process_export(struct trace_export *export,
386 	       struct ring_buffer_event *event, int flag)
387 {
388 	struct trace_entry *entry;
389 	unsigned int size = 0;
390 
391 	if (export->flags & flag) {
392 		entry = ring_buffer_event_data(event);
393 		size = ring_buffer_event_length(event);
394 		export->write(export, entry, size);
395 	}
396 }
397 
398 static DEFINE_MUTEX(ftrace_export_lock);
399 
400 static struct trace_export __rcu *ftrace_exports_list __read_mostly;
401 
402 static DEFINE_STATIC_KEY_FALSE(trace_function_exports_enabled);
403 static DEFINE_STATIC_KEY_FALSE(trace_event_exports_enabled);
404 static DEFINE_STATIC_KEY_FALSE(trace_marker_exports_enabled);
405 
406 static inline void ftrace_exports_enable(struct trace_export *export)
407 {
408 	if (export->flags & TRACE_EXPORT_FUNCTION)
409 		static_branch_inc(&trace_function_exports_enabled);
410 
411 	if (export->flags & TRACE_EXPORT_EVENT)
412 		static_branch_inc(&trace_event_exports_enabled);
413 
414 	if (export->flags & TRACE_EXPORT_MARKER)
415 		static_branch_inc(&trace_marker_exports_enabled);
416 }
417 
418 static inline void ftrace_exports_disable(struct trace_export *export)
419 {
420 	if (export->flags & TRACE_EXPORT_FUNCTION)
421 		static_branch_dec(&trace_function_exports_enabled);
422 
423 	if (export->flags & TRACE_EXPORT_EVENT)
424 		static_branch_dec(&trace_event_exports_enabled);
425 
426 	if (export->flags & TRACE_EXPORT_MARKER)
427 		static_branch_dec(&trace_marker_exports_enabled);
428 }
429 
430 static void ftrace_exports(struct ring_buffer_event *event, int flag)
431 {
432 	struct trace_export *export;
433 
434 	preempt_disable_notrace();
435 
436 	export = rcu_dereference_raw_check(ftrace_exports_list);
437 	while (export) {
438 		trace_process_export(export, event, flag);
439 		export = rcu_dereference_raw_check(export->next);
440 	}
441 
442 	preempt_enable_notrace();
443 }
444 
445 static inline void
446 add_trace_export(struct trace_export **list, struct trace_export *export)
447 {
448 	rcu_assign_pointer(export->next, *list);
449 	/*
450 	 * We are entering export into the list but another
451 	 * CPU might be walking that list. We need to make sure
452 	 * the export->next pointer is valid before another CPU sees
453 	 * the export pointer included into the list.
454 	 */
455 	rcu_assign_pointer(*list, export);
456 }
457 
458 static inline int
459 rm_trace_export(struct trace_export **list, struct trace_export *export)
460 {
461 	struct trace_export **p;
462 
463 	for (p = list; *p != NULL; p = &(*p)->next)
464 		if (*p == export)
465 			break;
466 
467 	if (*p != export)
468 		return -1;
469 
470 	rcu_assign_pointer(*p, (*p)->next);
471 
472 	return 0;
473 }
474 
475 static inline void
476 add_ftrace_export(struct trace_export **list, struct trace_export *export)
477 {
478 	ftrace_exports_enable(export);
479 
480 	add_trace_export(list, export);
481 }
482 
483 static inline int
484 rm_ftrace_export(struct trace_export **list, struct trace_export *export)
485 {
486 	int ret;
487 
488 	ret = rm_trace_export(list, export);
489 	ftrace_exports_disable(export);
490 
491 	return ret;
492 }
493 
494 int register_ftrace_export(struct trace_export *export)
495 {
496 	if (WARN_ON_ONCE(!export->write))
497 		return -1;
498 
499 	mutex_lock(&ftrace_export_lock);
500 
501 	add_ftrace_export(&ftrace_exports_list, export);
502 
503 	mutex_unlock(&ftrace_export_lock);
504 
505 	return 0;
506 }
507 EXPORT_SYMBOL_GPL(register_ftrace_export);
508 
509 int unregister_ftrace_export(struct trace_export *export)
510 {
511 	int ret;
512 
513 	mutex_lock(&ftrace_export_lock);
514 
515 	ret = rm_ftrace_export(&ftrace_exports_list, export);
516 
517 	mutex_unlock(&ftrace_export_lock);
518 
519 	return ret;
520 }
521 EXPORT_SYMBOL_GPL(unregister_ftrace_export);
522 
523 /* trace_flags holds trace_options default values */
524 #define TRACE_DEFAULT_FLAGS						\
525 	(FUNCTION_DEFAULT_FLAGS |					\
526 	 TRACE_ITER_PRINT_PARENT | TRACE_ITER_PRINTK |			\
527 	 TRACE_ITER_ANNOTATE | TRACE_ITER_CONTEXT_INFO |		\
528 	 TRACE_ITER_RECORD_CMD | TRACE_ITER_OVERWRITE |			\
529 	 TRACE_ITER_IRQ_INFO | TRACE_ITER_MARKERS |			\
530 	 TRACE_ITER_HASH_PTR | TRACE_ITER_TRACE_PRINTK)
531 
532 /* trace_options that are only supported by global_trace */
533 #define TOP_LEVEL_TRACE_FLAGS (TRACE_ITER_PRINTK |			\
534 	       TRACE_ITER_PRINTK_MSGONLY | TRACE_ITER_RECORD_CMD)
535 
536 /* trace_flags that are default zero for instances */
537 #define ZEROED_TRACE_FLAGS \
538 	(TRACE_ITER_EVENT_FORK | TRACE_ITER_FUNC_FORK | TRACE_ITER_TRACE_PRINTK)
539 
540 /*
541  * The global_trace is the descriptor that holds the top-level tracing
542  * buffers for the live tracing.
543  */
544 static struct trace_array global_trace = {
545 	.trace_flags = TRACE_DEFAULT_FLAGS,
546 };
547 
548 static struct trace_array *printk_trace = &global_trace;
549 
550 static __always_inline bool printk_binsafe(struct trace_array *tr)
551 {
552 	/*
553 	 * The binary format of traceprintk can cause a crash if used
554 	 * by a buffer from another boot. Force the use of the
555 	 * non binary version of trace_printk if the trace_printk
556 	 * buffer is a boot mapped ring buffer.
557 	 */
558 	return !(tr->flags & TRACE_ARRAY_FL_BOOT);
559 }
560 
561 static void update_printk_trace(struct trace_array *tr)
562 {
563 	if (printk_trace == tr)
564 		return;
565 
566 	printk_trace->trace_flags &= ~TRACE_ITER_TRACE_PRINTK;
567 	printk_trace = tr;
568 	tr->trace_flags |= TRACE_ITER_TRACE_PRINTK;
569 }
570 
571 void trace_set_ring_buffer_expanded(struct trace_array *tr)
572 {
573 	if (!tr)
574 		tr = &global_trace;
575 	tr->ring_buffer_expanded = true;
576 }
577 
578 LIST_HEAD(ftrace_trace_arrays);
579 
580 int trace_array_get(struct trace_array *this_tr)
581 {
582 	struct trace_array *tr;
583 
584 	guard(mutex)(&trace_types_lock);
585 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
586 		if (tr == this_tr) {
587 			tr->ref++;
588 			return 0;
589 		}
590 	}
591 
592 	return -ENODEV;
593 }
594 
595 static void __trace_array_put(struct trace_array *this_tr)
596 {
597 	WARN_ON(!this_tr->ref);
598 	this_tr->ref--;
599 }
600 
601 /**
602  * trace_array_put - Decrement the reference counter for this trace array.
603  * @this_tr : pointer to the trace array
604  *
605  * NOTE: Use this when we no longer need the trace array returned by
606  * trace_array_get_by_name(). This ensures the trace array can be later
607  * destroyed.
608  *
609  */
610 void trace_array_put(struct trace_array *this_tr)
611 {
612 	if (!this_tr)
613 		return;
614 
615 	mutex_lock(&trace_types_lock);
616 	__trace_array_put(this_tr);
617 	mutex_unlock(&trace_types_lock);
618 }
619 EXPORT_SYMBOL_GPL(trace_array_put);
620 
621 int tracing_check_open_get_tr(struct trace_array *tr)
622 {
623 	int ret;
624 
625 	ret = security_locked_down(LOCKDOWN_TRACEFS);
626 	if (ret)
627 		return ret;
628 
629 	if (tracing_disabled)
630 		return -ENODEV;
631 
632 	if (tr && trace_array_get(tr) < 0)
633 		return -ENODEV;
634 
635 	return 0;
636 }
637 
638 /**
639  * trace_find_filtered_pid - check if a pid exists in a filtered_pid list
640  * @filtered_pids: The list of pids to check
641  * @search_pid: The PID to find in @filtered_pids
642  *
643  * Returns true if @search_pid is found in @filtered_pids, and false otherwise.
644  */
645 bool
646 trace_find_filtered_pid(struct trace_pid_list *filtered_pids, pid_t search_pid)
647 {
648 	return trace_pid_list_is_set(filtered_pids, search_pid);
649 }
650 
651 /**
652  * trace_ignore_this_task - should a task be ignored for tracing
653  * @filtered_pids: The list of pids to check
654  * @filtered_no_pids: The list of pids not to be traced
655  * @task: The task that should be ignored if not filtered
656  *
657  * Checks if @task should be traced or not from @filtered_pids.
658  * Returns true if @task should *NOT* be traced.
659  * Returns false if @task should be traced.
660  */
661 bool
662 trace_ignore_this_task(struct trace_pid_list *filtered_pids,
663 		       struct trace_pid_list *filtered_no_pids,
664 		       struct task_struct *task)
665 {
666 	/*
667 	 * If filtered_no_pids is not empty, and the task's pid is listed
668 	 * in filtered_no_pids, then return true.
669 	 * Otherwise, if filtered_pids is empty, that means we can
670 	 * trace all tasks. If it has content, then only trace pids
671 	 * within filtered_pids.
672 	 */
673 
674 	return (filtered_pids &&
675 		!trace_find_filtered_pid(filtered_pids, task->pid)) ||
676 		(filtered_no_pids &&
677 		 trace_find_filtered_pid(filtered_no_pids, task->pid));
678 }
679 
680 /**
681  * trace_filter_add_remove_task - Add or remove a task from a pid_list
682  * @pid_list: The list to modify
683  * @self: The current task for fork or NULL for exit
684  * @task: The task to add or remove
685  *
686  * If adding a task, if @self is defined, the task is only added if @self
687  * is also included in @pid_list. This happens on fork and tasks should
688  * only be added when the parent is listed. If @self is NULL, then the
689  * @task pid will be removed from the list, which would happen on exit
690  * of a task.
691  */
692 void trace_filter_add_remove_task(struct trace_pid_list *pid_list,
693 				  struct task_struct *self,
694 				  struct task_struct *task)
695 {
696 	if (!pid_list)
697 		return;
698 
699 	/* For forks, we only add if the forking task is listed */
700 	if (self) {
701 		if (!trace_find_filtered_pid(pid_list, self->pid))
702 			return;
703 	}
704 
705 	/* "self" is set for forks, and NULL for exits */
706 	if (self)
707 		trace_pid_list_set(pid_list, task->pid);
708 	else
709 		trace_pid_list_clear(pid_list, task->pid);
710 }
711 
712 /**
713  * trace_pid_next - Used for seq_file to get to the next pid of a pid_list
714  * @pid_list: The pid list to show
715  * @v: The last pid that was shown (+1 the actual pid to let zero be displayed)
716  * @pos: The position of the file
717  *
718  * This is used by the seq_file "next" operation to iterate the pids
719  * listed in a trace_pid_list structure.
720  *
721  * Returns the pid+1 as we want to display pid of zero, but NULL would
722  * stop the iteration.
723  */
724 void *trace_pid_next(struct trace_pid_list *pid_list, void *v, loff_t *pos)
725 {
726 	long pid = (unsigned long)v;
727 	unsigned int next;
728 
729 	(*pos)++;
730 
731 	/* pid already is +1 of the actual previous bit */
732 	if (trace_pid_list_next(pid_list, pid, &next) < 0)
733 		return NULL;
734 
735 	pid = next;
736 
737 	/* Return pid + 1 to allow zero to be represented */
738 	return (void *)(pid + 1);
739 }
740 
741 /**
742  * trace_pid_start - Used for seq_file to start reading pid lists
743  * @pid_list: The pid list to show
744  * @pos: The position of the file
745  *
746  * This is used by seq_file "start" operation to start the iteration
747  * of listing pids.
748  *
749  * Returns the pid+1 as we want to display pid of zero, but NULL would
750  * stop the iteration.
751  */
752 void *trace_pid_start(struct trace_pid_list *pid_list, loff_t *pos)
753 {
754 	unsigned long pid;
755 	unsigned int first;
756 	loff_t l = 0;
757 
758 	if (trace_pid_list_first(pid_list, &first) < 0)
759 		return NULL;
760 
761 	pid = first;
762 
763 	/* Return pid + 1 so that zero can be the exit value */
764 	for (pid++; pid && l < *pos;
765 	     pid = (unsigned long)trace_pid_next(pid_list, (void *)pid, &l))
766 		;
767 	return (void *)pid;
768 }
769 
770 /**
771  * trace_pid_show - show the current pid in seq_file processing
772  * @m: The seq_file structure to write into
773  * @v: A void pointer of the pid (+1) value to display
774  *
775  * Can be directly used by seq_file operations to display the current
776  * pid value.
777  */
778 int trace_pid_show(struct seq_file *m, void *v)
779 {
780 	unsigned long pid = (unsigned long)v - 1;
781 
782 	seq_printf(m, "%lu\n", pid);
783 	return 0;
784 }
785 
786 /* 128 should be much more than enough */
787 #define PID_BUF_SIZE		127
788 
789 int trace_pid_write(struct trace_pid_list *filtered_pids,
790 		    struct trace_pid_list **new_pid_list,
791 		    const char __user *ubuf, size_t cnt)
792 {
793 	struct trace_pid_list *pid_list;
794 	struct trace_parser parser;
795 	unsigned long val;
796 	int nr_pids = 0;
797 	ssize_t read = 0;
798 	ssize_t ret;
799 	loff_t pos;
800 	pid_t pid;
801 
802 	if (trace_parser_get_init(&parser, PID_BUF_SIZE + 1))
803 		return -ENOMEM;
804 
805 	/*
806 	 * Always recreate a new array. The write is an all or nothing
807 	 * operation. Always create a new array when adding new pids by
808 	 * the user. If the operation fails, then the current list is
809 	 * not modified.
810 	 */
811 	pid_list = trace_pid_list_alloc();
812 	if (!pid_list) {
813 		trace_parser_put(&parser);
814 		return -ENOMEM;
815 	}
816 
817 	if (filtered_pids) {
818 		/* copy the current bits to the new max */
819 		ret = trace_pid_list_first(filtered_pids, &pid);
820 		while (!ret) {
821 			trace_pid_list_set(pid_list, pid);
822 			ret = trace_pid_list_next(filtered_pids, pid + 1, &pid);
823 			nr_pids++;
824 		}
825 	}
826 
827 	ret = 0;
828 	while (cnt > 0) {
829 
830 		pos = 0;
831 
832 		ret = trace_get_user(&parser, ubuf, cnt, &pos);
833 		if (ret < 0)
834 			break;
835 
836 		read += ret;
837 		ubuf += ret;
838 		cnt -= ret;
839 
840 		if (!trace_parser_loaded(&parser))
841 			break;
842 
843 		ret = -EINVAL;
844 		if (kstrtoul(parser.buffer, 0, &val))
845 			break;
846 
847 		pid = (pid_t)val;
848 
849 		if (trace_pid_list_set(pid_list, pid) < 0) {
850 			ret = -1;
851 			break;
852 		}
853 		nr_pids++;
854 
855 		trace_parser_clear(&parser);
856 		ret = 0;
857 	}
858 	trace_parser_put(&parser);
859 
860 	if (ret < 0) {
861 		trace_pid_list_free(pid_list);
862 		return ret;
863 	}
864 
865 	if (!nr_pids) {
866 		/* Cleared the list of pids */
867 		trace_pid_list_free(pid_list);
868 		pid_list = NULL;
869 	}
870 
871 	*new_pid_list = pid_list;
872 
873 	return read;
874 }
875 
876 static u64 buffer_ftrace_now(struct array_buffer *buf, int cpu)
877 {
878 	u64 ts;
879 
880 	/* Early boot up does not have a buffer yet */
881 	if (!buf->buffer)
882 		return trace_clock_local();
883 
884 	ts = ring_buffer_time_stamp(buf->buffer);
885 	ring_buffer_normalize_time_stamp(buf->buffer, cpu, &ts);
886 
887 	return ts;
888 }
889 
890 u64 ftrace_now(int cpu)
891 {
892 	return buffer_ftrace_now(&global_trace.array_buffer, cpu);
893 }
894 
895 /**
896  * tracing_is_enabled - Show if global_trace has been enabled
897  *
898  * Shows if the global trace has been enabled or not. It uses the
899  * mirror flag "buffer_disabled" to be used in fast paths such as for
900  * the irqsoff tracer. But it may be inaccurate due to races. If you
901  * need to know the accurate state, use tracing_is_on() which is a little
902  * slower, but accurate.
903  */
904 int tracing_is_enabled(void)
905 {
906 	/*
907 	 * For quick access (irqsoff uses this in fast path), just
908 	 * return the mirror variable of the state of the ring buffer.
909 	 * It's a little racy, but we don't really care.
910 	 */
911 	smp_rmb();
912 	return !global_trace.buffer_disabled;
913 }
914 
915 /*
916  * trace_buf_size is the size in bytes that is allocated
917  * for a buffer. Note, the number of bytes is always rounded
918  * to page size.
919  *
920  * This number is purposely set to a low number of 16384.
921  * If the dump on oops happens, it will be much appreciated
922  * to not have to wait for all that output. Anyway this can be
923  * boot time and run time configurable.
924  */
925 #define TRACE_BUF_SIZE_DEFAULT	1441792UL /* 16384 * 88 (sizeof(entry)) */
926 
927 static unsigned long		trace_buf_size = TRACE_BUF_SIZE_DEFAULT;
928 
929 /* trace_types holds a link list of available tracers. */
930 static struct tracer		*trace_types __read_mostly;
931 
932 /*
933  * trace_types_lock is used to protect the trace_types list.
934  */
935 DEFINE_MUTEX(trace_types_lock);
936 
937 /*
938  * serialize the access of the ring buffer
939  *
940  * ring buffer serializes readers, but it is low level protection.
941  * The validity of the events (which returns by ring_buffer_peek() ..etc)
942  * are not protected by ring buffer.
943  *
944  * The content of events may become garbage if we allow other process consumes
945  * these events concurrently:
946  *   A) the page of the consumed events may become a normal page
947  *      (not reader page) in ring buffer, and this page will be rewritten
948  *      by events producer.
949  *   B) The page of the consumed events may become a page for splice_read,
950  *      and this page will be returned to system.
951  *
952  * These primitives allow multi process access to different cpu ring buffer
953  * concurrently.
954  *
955  * These primitives don't distinguish read-only and read-consume access.
956  * Multi read-only access are also serialized.
957  */
958 
959 #ifdef CONFIG_SMP
960 static DECLARE_RWSEM(all_cpu_access_lock);
961 static DEFINE_PER_CPU(struct mutex, cpu_access_lock);
962 
963 static inline void trace_access_lock(int cpu)
964 {
965 	if (cpu == RING_BUFFER_ALL_CPUS) {
966 		/* gain it for accessing the whole ring buffer. */
967 		down_write(&all_cpu_access_lock);
968 	} else {
969 		/* gain it for accessing a cpu ring buffer. */
970 
971 		/* Firstly block other trace_access_lock(RING_BUFFER_ALL_CPUS). */
972 		down_read(&all_cpu_access_lock);
973 
974 		/* Secondly block other access to this @cpu ring buffer. */
975 		mutex_lock(&per_cpu(cpu_access_lock, cpu));
976 	}
977 }
978 
979 static inline void trace_access_unlock(int cpu)
980 {
981 	if (cpu == RING_BUFFER_ALL_CPUS) {
982 		up_write(&all_cpu_access_lock);
983 	} else {
984 		mutex_unlock(&per_cpu(cpu_access_lock, cpu));
985 		up_read(&all_cpu_access_lock);
986 	}
987 }
988 
989 static inline void trace_access_lock_init(void)
990 {
991 	int cpu;
992 
993 	for_each_possible_cpu(cpu)
994 		mutex_init(&per_cpu(cpu_access_lock, cpu));
995 }
996 
997 #else
998 
999 static DEFINE_MUTEX(access_lock);
1000 
1001 static inline void trace_access_lock(int cpu)
1002 {
1003 	(void)cpu;
1004 	mutex_lock(&access_lock);
1005 }
1006 
1007 static inline void trace_access_unlock(int cpu)
1008 {
1009 	(void)cpu;
1010 	mutex_unlock(&access_lock);
1011 }
1012 
1013 static inline void trace_access_lock_init(void)
1014 {
1015 }
1016 
1017 #endif
1018 
1019 #ifdef CONFIG_STACKTRACE
1020 static void __ftrace_trace_stack(struct trace_array *tr,
1021 				 struct trace_buffer *buffer,
1022 				 unsigned int trace_ctx,
1023 				 int skip, struct pt_regs *regs);
1024 static inline void ftrace_trace_stack(struct trace_array *tr,
1025 				      struct trace_buffer *buffer,
1026 				      unsigned int trace_ctx,
1027 				      int skip, struct pt_regs *regs);
1028 
1029 #else
1030 static inline void __ftrace_trace_stack(struct trace_array *tr,
1031 					struct trace_buffer *buffer,
1032 					unsigned int trace_ctx,
1033 					int skip, struct pt_regs *regs)
1034 {
1035 }
1036 static inline void ftrace_trace_stack(struct trace_array *tr,
1037 				      struct trace_buffer *buffer,
1038 				      unsigned long trace_ctx,
1039 				      int skip, struct pt_regs *regs)
1040 {
1041 }
1042 
1043 #endif
1044 
1045 static __always_inline void
1046 trace_event_setup(struct ring_buffer_event *event,
1047 		  int type, unsigned int trace_ctx)
1048 {
1049 	struct trace_entry *ent = ring_buffer_event_data(event);
1050 
1051 	tracing_generic_entry_update(ent, type, trace_ctx);
1052 }
1053 
1054 static __always_inline struct ring_buffer_event *
1055 __trace_buffer_lock_reserve(struct trace_buffer *buffer,
1056 			  int type,
1057 			  unsigned long len,
1058 			  unsigned int trace_ctx)
1059 {
1060 	struct ring_buffer_event *event;
1061 
1062 	event = ring_buffer_lock_reserve(buffer, len);
1063 	if (event != NULL)
1064 		trace_event_setup(event, type, trace_ctx);
1065 
1066 	return event;
1067 }
1068 
1069 void tracer_tracing_on(struct trace_array *tr)
1070 {
1071 	if (tr->array_buffer.buffer)
1072 		ring_buffer_record_on(tr->array_buffer.buffer);
1073 	/*
1074 	 * This flag is looked at when buffers haven't been allocated
1075 	 * yet, or by some tracers (like irqsoff), that just want to
1076 	 * know if the ring buffer has been disabled, but it can handle
1077 	 * races of where it gets disabled but we still do a record.
1078 	 * As the check is in the fast path of the tracers, it is more
1079 	 * important to be fast than accurate.
1080 	 */
1081 	tr->buffer_disabled = 0;
1082 	/* Make the flag seen by readers */
1083 	smp_wmb();
1084 }
1085 
1086 /**
1087  * tracing_on - enable tracing buffers
1088  *
1089  * This function enables tracing buffers that may have been
1090  * disabled with tracing_off.
1091  */
1092 void tracing_on(void)
1093 {
1094 	tracer_tracing_on(&global_trace);
1095 }
1096 EXPORT_SYMBOL_GPL(tracing_on);
1097 
1098 
1099 static __always_inline void
1100 __buffer_unlock_commit(struct trace_buffer *buffer, struct ring_buffer_event *event)
1101 {
1102 	__this_cpu_write(trace_taskinfo_save, true);
1103 
1104 	/* If this is the temp buffer, we need to commit fully */
1105 	if (this_cpu_read(trace_buffered_event) == event) {
1106 		/* Length is in event->array[0] */
1107 		ring_buffer_write(buffer, event->array[0], &event->array[1]);
1108 		/* Release the temp buffer */
1109 		this_cpu_dec(trace_buffered_event_cnt);
1110 		/* ring_buffer_unlock_commit() enables preemption */
1111 		preempt_enable_notrace();
1112 	} else
1113 		ring_buffer_unlock_commit(buffer);
1114 }
1115 
1116 int __trace_array_puts(struct trace_array *tr, unsigned long ip,
1117 		       const char *str, int size)
1118 {
1119 	struct ring_buffer_event *event;
1120 	struct trace_buffer *buffer;
1121 	struct print_entry *entry;
1122 	unsigned int trace_ctx;
1123 	int alloc;
1124 
1125 	if (!(tr->trace_flags & TRACE_ITER_PRINTK))
1126 		return 0;
1127 
1128 	if (unlikely(tracing_selftest_running && tr == &global_trace))
1129 		return 0;
1130 
1131 	if (unlikely(tracing_disabled))
1132 		return 0;
1133 
1134 	alloc = sizeof(*entry) + size + 2; /* possible \n added */
1135 
1136 	trace_ctx = tracing_gen_ctx();
1137 	buffer = tr->array_buffer.buffer;
1138 	ring_buffer_nest_start(buffer);
1139 	event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, alloc,
1140 					    trace_ctx);
1141 	if (!event) {
1142 		size = 0;
1143 		goto out;
1144 	}
1145 
1146 	entry = ring_buffer_event_data(event);
1147 	entry->ip = ip;
1148 
1149 	memcpy(&entry->buf, str, size);
1150 
1151 	/* Add a newline if necessary */
1152 	if (entry->buf[size - 1] != '\n') {
1153 		entry->buf[size] = '\n';
1154 		entry->buf[size + 1] = '\0';
1155 	} else
1156 		entry->buf[size] = '\0';
1157 
1158 	__buffer_unlock_commit(buffer, event);
1159 	ftrace_trace_stack(tr, buffer, trace_ctx, 4, NULL);
1160  out:
1161 	ring_buffer_nest_end(buffer);
1162 	return size;
1163 }
1164 EXPORT_SYMBOL_GPL(__trace_array_puts);
1165 
1166 /**
1167  * __trace_puts - write a constant string into the trace buffer.
1168  * @ip:	   The address of the caller
1169  * @str:   The constant string to write
1170  * @size:  The size of the string.
1171  */
1172 int __trace_puts(unsigned long ip, const char *str, int size)
1173 {
1174 	return __trace_array_puts(printk_trace, ip, str, size);
1175 }
1176 EXPORT_SYMBOL_GPL(__trace_puts);
1177 
1178 /**
1179  * __trace_bputs - write the pointer to a constant string into trace buffer
1180  * @ip:	   The address of the caller
1181  * @str:   The constant string to write to the buffer to
1182  */
1183 int __trace_bputs(unsigned long ip, const char *str)
1184 {
1185 	struct trace_array *tr = READ_ONCE(printk_trace);
1186 	struct ring_buffer_event *event;
1187 	struct trace_buffer *buffer;
1188 	struct bputs_entry *entry;
1189 	unsigned int trace_ctx;
1190 	int size = sizeof(struct bputs_entry);
1191 	int ret = 0;
1192 
1193 	if (!printk_binsafe(tr))
1194 		return __trace_puts(ip, str, strlen(str));
1195 
1196 	if (!(tr->trace_flags & TRACE_ITER_PRINTK))
1197 		return 0;
1198 
1199 	if (unlikely(tracing_selftest_running || tracing_disabled))
1200 		return 0;
1201 
1202 	trace_ctx = tracing_gen_ctx();
1203 	buffer = tr->array_buffer.buffer;
1204 
1205 	ring_buffer_nest_start(buffer);
1206 	event = __trace_buffer_lock_reserve(buffer, TRACE_BPUTS, size,
1207 					    trace_ctx);
1208 	if (!event)
1209 		goto out;
1210 
1211 	entry = ring_buffer_event_data(event);
1212 	entry->ip			= ip;
1213 	entry->str			= str;
1214 
1215 	__buffer_unlock_commit(buffer, event);
1216 	ftrace_trace_stack(tr, buffer, trace_ctx, 4, NULL);
1217 
1218 	ret = 1;
1219  out:
1220 	ring_buffer_nest_end(buffer);
1221 	return ret;
1222 }
1223 EXPORT_SYMBOL_GPL(__trace_bputs);
1224 
1225 #ifdef CONFIG_TRACER_SNAPSHOT
1226 static void tracing_snapshot_instance_cond(struct trace_array *tr,
1227 					   void *cond_data)
1228 {
1229 	struct tracer *tracer = tr->current_trace;
1230 	unsigned long flags;
1231 
1232 	if (in_nmi()) {
1233 		trace_array_puts(tr, "*** SNAPSHOT CALLED FROM NMI CONTEXT ***\n");
1234 		trace_array_puts(tr, "*** snapshot is being ignored        ***\n");
1235 		return;
1236 	}
1237 
1238 	if (!tr->allocated_snapshot) {
1239 		trace_array_puts(tr, "*** SNAPSHOT NOT ALLOCATED ***\n");
1240 		trace_array_puts(tr, "*** stopping trace here!   ***\n");
1241 		tracer_tracing_off(tr);
1242 		return;
1243 	}
1244 
1245 	/* Note, snapshot can not be used when the tracer uses it */
1246 	if (tracer->use_max_tr) {
1247 		trace_array_puts(tr, "*** LATENCY TRACER ACTIVE ***\n");
1248 		trace_array_puts(tr, "*** Can not use snapshot (sorry) ***\n");
1249 		return;
1250 	}
1251 
1252 	if (tr->mapped) {
1253 		trace_array_puts(tr, "*** BUFFER MEMORY MAPPED ***\n");
1254 		trace_array_puts(tr, "*** Can not use snapshot (sorry) ***\n");
1255 		return;
1256 	}
1257 
1258 	local_irq_save(flags);
1259 	update_max_tr(tr, current, smp_processor_id(), cond_data);
1260 	local_irq_restore(flags);
1261 }
1262 
1263 void tracing_snapshot_instance(struct trace_array *tr)
1264 {
1265 	tracing_snapshot_instance_cond(tr, NULL);
1266 }
1267 
1268 /**
1269  * tracing_snapshot - take a snapshot of the current buffer.
1270  *
1271  * This causes a swap between the snapshot buffer and the current live
1272  * tracing buffer. You can use this to take snapshots of the live
1273  * trace when some condition is triggered, but continue to trace.
1274  *
1275  * Note, make sure to allocate the snapshot with either
1276  * a tracing_snapshot_alloc(), or by doing it manually
1277  * with: echo 1 > /sys/kernel/tracing/snapshot
1278  *
1279  * If the snapshot buffer is not allocated, it will stop tracing.
1280  * Basically making a permanent snapshot.
1281  */
1282 void tracing_snapshot(void)
1283 {
1284 	struct trace_array *tr = &global_trace;
1285 
1286 	tracing_snapshot_instance(tr);
1287 }
1288 EXPORT_SYMBOL_GPL(tracing_snapshot);
1289 
1290 /**
1291  * tracing_snapshot_cond - conditionally take a snapshot of the current buffer.
1292  * @tr:		The tracing instance to snapshot
1293  * @cond_data:	The data to be tested conditionally, and possibly saved
1294  *
1295  * This is the same as tracing_snapshot() except that the snapshot is
1296  * conditional - the snapshot will only happen if the
1297  * cond_snapshot.update() implementation receiving the cond_data
1298  * returns true, which means that the trace array's cond_snapshot
1299  * update() operation used the cond_data to determine whether the
1300  * snapshot should be taken, and if it was, presumably saved it along
1301  * with the snapshot.
1302  */
1303 void tracing_snapshot_cond(struct trace_array *tr, void *cond_data)
1304 {
1305 	tracing_snapshot_instance_cond(tr, cond_data);
1306 }
1307 EXPORT_SYMBOL_GPL(tracing_snapshot_cond);
1308 
1309 /**
1310  * tracing_cond_snapshot_data - get the user data associated with a snapshot
1311  * @tr:		The tracing instance
1312  *
1313  * When the user enables a conditional snapshot using
1314  * tracing_snapshot_cond_enable(), the user-defined cond_data is saved
1315  * with the snapshot.  This accessor is used to retrieve it.
1316  *
1317  * Should not be called from cond_snapshot.update(), since it takes
1318  * the tr->max_lock lock, which the code calling
1319  * cond_snapshot.update() has already done.
1320  *
1321  * Returns the cond_data associated with the trace array's snapshot.
1322  */
1323 void *tracing_cond_snapshot_data(struct trace_array *tr)
1324 {
1325 	void *cond_data = NULL;
1326 
1327 	local_irq_disable();
1328 	arch_spin_lock(&tr->max_lock);
1329 
1330 	if (tr->cond_snapshot)
1331 		cond_data = tr->cond_snapshot->cond_data;
1332 
1333 	arch_spin_unlock(&tr->max_lock);
1334 	local_irq_enable();
1335 
1336 	return cond_data;
1337 }
1338 EXPORT_SYMBOL_GPL(tracing_cond_snapshot_data);
1339 
1340 static int resize_buffer_duplicate_size(struct array_buffer *trace_buf,
1341 					struct array_buffer *size_buf, int cpu_id);
1342 static void set_buffer_entries(struct array_buffer *buf, unsigned long val);
1343 
1344 int tracing_alloc_snapshot_instance(struct trace_array *tr)
1345 {
1346 	int order;
1347 	int ret;
1348 
1349 	if (!tr->allocated_snapshot) {
1350 
1351 		/* Make the snapshot buffer have the same order as main buffer */
1352 		order = ring_buffer_subbuf_order_get(tr->array_buffer.buffer);
1353 		ret = ring_buffer_subbuf_order_set(tr->max_buffer.buffer, order);
1354 		if (ret < 0)
1355 			return ret;
1356 
1357 		/* allocate spare buffer */
1358 		ret = resize_buffer_duplicate_size(&tr->max_buffer,
1359 				   &tr->array_buffer, RING_BUFFER_ALL_CPUS);
1360 		if (ret < 0)
1361 			return ret;
1362 
1363 		tr->allocated_snapshot = true;
1364 	}
1365 
1366 	return 0;
1367 }
1368 
1369 static void free_snapshot(struct trace_array *tr)
1370 {
1371 	/*
1372 	 * We don't free the ring buffer. instead, resize it because
1373 	 * The max_tr ring buffer has some state (e.g. ring->clock) and
1374 	 * we want preserve it.
1375 	 */
1376 	ring_buffer_subbuf_order_set(tr->max_buffer.buffer, 0);
1377 	ring_buffer_resize(tr->max_buffer.buffer, 1, RING_BUFFER_ALL_CPUS);
1378 	set_buffer_entries(&tr->max_buffer, 1);
1379 	tracing_reset_online_cpus(&tr->max_buffer);
1380 	tr->allocated_snapshot = false;
1381 }
1382 
1383 static int tracing_arm_snapshot_locked(struct trace_array *tr)
1384 {
1385 	int ret;
1386 
1387 	lockdep_assert_held(&trace_types_lock);
1388 
1389 	spin_lock(&tr->snapshot_trigger_lock);
1390 	if (tr->snapshot == UINT_MAX || tr->mapped) {
1391 		spin_unlock(&tr->snapshot_trigger_lock);
1392 		return -EBUSY;
1393 	}
1394 
1395 	tr->snapshot++;
1396 	spin_unlock(&tr->snapshot_trigger_lock);
1397 
1398 	ret = tracing_alloc_snapshot_instance(tr);
1399 	if (ret) {
1400 		spin_lock(&tr->snapshot_trigger_lock);
1401 		tr->snapshot--;
1402 		spin_unlock(&tr->snapshot_trigger_lock);
1403 	}
1404 
1405 	return ret;
1406 }
1407 
1408 int tracing_arm_snapshot(struct trace_array *tr)
1409 {
1410 	int ret;
1411 
1412 	mutex_lock(&trace_types_lock);
1413 	ret = tracing_arm_snapshot_locked(tr);
1414 	mutex_unlock(&trace_types_lock);
1415 
1416 	return ret;
1417 }
1418 
1419 void tracing_disarm_snapshot(struct trace_array *tr)
1420 {
1421 	spin_lock(&tr->snapshot_trigger_lock);
1422 	if (!WARN_ON(!tr->snapshot))
1423 		tr->snapshot--;
1424 	spin_unlock(&tr->snapshot_trigger_lock);
1425 }
1426 
1427 /**
1428  * tracing_alloc_snapshot - allocate snapshot buffer.
1429  *
1430  * This only allocates the snapshot buffer if it isn't already
1431  * allocated - it doesn't also take a snapshot.
1432  *
1433  * This is meant to be used in cases where the snapshot buffer needs
1434  * to be set up for events that can't sleep but need to be able to
1435  * trigger a snapshot.
1436  */
1437 int tracing_alloc_snapshot(void)
1438 {
1439 	struct trace_array *tr = &global_trace;
1440 	int ret;
1441 
1442 	ret = tracing_alloc_snapshot_instance(tr);
1443 	WARN_ON(ret < 0);
1444 
1445 	return ret;
1446 }
1447 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
1448 
1449 /**
1450  * tracing_snapshot_alloc - allocate and take a snapshot of the current buffer.
1451  *
1452  * This is similar to tracing_snapshot(), but it will allocate the
1453  * snapshot buffer if it isn't already allocated. Use this only
1454  * where it is safe to sleep, as the allocation may sleep.
1455  *
1456  * This causes a swap between the snapshot buffer and the current live
1457  * tracing buffer. You can use this to take snapshots of the live
1458  * trace when some condition is triggered, but continue to trace.
1459  */
1460 void tracing_snapshot_alloc(void)
1461 {
1462 	int ret;
1463 
1464 	ret = tracing_alloc_snapshot();
1465 	if (ret < 0)
1466 		return;
1467 
1468 	tracing_snapshot();
1469 }
1470 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
1471 
1472 /**
1473  * tracing_snapshot_cond_enable - enable conditional snapshot for an instance
1474  * @tr:		The tracing instance
1475  * @cond_data:	User data to associate with the snapshot
1476  * @update:	Implementation of the cond_snapshot update function
1477  *
1478  * Check whether the conditional snapshot for the given instance has
1479  * already been enabled, or if the current tracer is already using a
1480  * snapshot; if so, return -EBUSY, else create a cond_snapshot and
1481  * save the cond_data and update function inside.
1482  *
1483  * Returns 0 if successful, error otherwise.
1484  */
1485 int tracing_snapshot_cond_enable(struct trace_array *tr, void *cond_data,
1486 				 cond_update_fn_t update)
1487 {
1488 	struct cond_snapshot *cond_snapshot __free(kfree) =
1489 		kzalloc(sizeof(*cond_snapshot), GFP_KERNEL);
1490 	int ret;
1491 
1492 	if (!cond_snapshot)
1493 		return -ENOMEM;
1494 
1495 	cond_snapshot->cond_data = cond_data;
1496 	cond_snapshot->update = update;
1497 
1498 	guard(mutex)(&trace_types_lock);
1499 
1500 	if (tr->current_trace->use_max_tr)
1501 		return -EBUSY;
1502 
1503 	/*
1504 	 * The cond_snapshot can only change to NULL without the
1505 	 * trace_types_lock. We don't care if we race with it going
1506 	 * to NULL, but we want to make sure that it's not set to
1507 	 * something other than NULL when we get here, which we can
1508 	 * do safely with only holding the trace_types_lock and not
1509 	 * having to take the max_lock.
1510 	 */
1511 	if (tr->cond_snapshot)
1512 		return -EBUSY;
1513 
1514 	ret = tracing_arm_snapshot_locked(tr);
1515 	if (ret)
1516 		return ret;
1517 
1518 	local_irq_disable();
1519 	arch_spin_lock(&tr->max_lock);
1520 	tr->cond_snapshot = no_free_ptr(cond_snapshot);
1521 	arch_spin_unlock(&tr->max_lock);
1522 	local_irq_enable();
1523 
1524 	return 0;
1525 }
1526 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_enable);
1527 
1528 /**
1529  * tracing_snapshot_cond_disable - disable conditional snapshot for an instance
1530  * @tr:		The tracing instance
1531  *
1532  * Check whether the conditional snapshot for the given instance is
1533  * enabled; if so, free the cond_snapshot associated with it,
1534  * otherwise return -EINVAL.
1535  *
1536  * Returns 0 if successful, error otherwise.
1537  */
1538 int tracing_snapshot_cond_disable(struct trace_array *tr)
1539 {
1540 	int ret = 0;
1541 
1542 	local_irq_disable();
1543 	arch_spin_lock(&tr->max_lock);
1544 
1545 	if (!tr->cond_snapshot)
1546 		ret = -EINVAL;
1547 	else {
1548 		kfree(tr->cond_snapshot);
1549 		tr->cond_snapshot = NULL;
1550 	}
1551 
1552 	arch_spin_unlock(&tr->max_lock);
1553 	local_irq_enable();
1554 
1555 	tracing_disarm_snapshot(tr);
1556 
1557 	return ret;
1558 }
1559 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_disable);
1560 #else
1561 void tracing_snapshot(void)
1562 {
1563 	WARN_ONCE(1, "Snapshot feature not enabled, but internal snapshot used");
1564 }
1565 EXPORT_SYMBOL_GPL(tracing_snapshot);
1566 void tracing_snapshot_cond(struct trace_array *tr, void *cond_data)
1567 {
1568 	WARN_ONCE(1, "Snapshot feature not enabled, but internal conditional snapshot used");
1569 }
1570 EXPORT_SYMBOL_GPL(tracing_snapshot_cond);
1571 int tracing_alloc_snapshot(void)
1572 {
1573 	WARN_ONCE(1, "Snapshot feature not enabled, but snapshot allocation used");
1574 	return -ENODEV;
1575 }
1576 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
1577 void tracing_snapshot_alloc(void)
1578 {
1579 	/* Give warning */
1580 	tracing_snapshot();
1581 }
1582 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
1583 void *tracing_cond_snapshot_data(struct trace_array *tr)
1584 {
1585 	return NULL;
1586 }
1587 EXPORT_SYMBOL_GPL(tracing_cond_snapshot_data);
1588 int tracing_snapshot_cond_enable(struct trace_array *tr, void *cond_data, cond_update_fn_t update)
1589 {
1590 	return -ENODEV;
1591 }
1592 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_enable);
1593 int tracing_snapshot_cond_disable(struct trace_array *tr)
1594 {
1595 	return false;
1596 }
1597 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_disable);
1598 #define free_snapshot(tr)	do { } while (0)
1599 #define tracing_arm_snapshot_locked(tr) ({ -EBUSY; })
1600 #endif /* CONFIG_TRACER_SNAPSHOT */
1601 
1602 void tracer_tracing_off(struct trace_array *tr)
1603 {
1604 	if (tr->array_buffer.buffer)
1605 		ring_buffer_record_off(tr->array_buffer.buffer);
1606 	/*
1607 	 * This flag is looked at when buffers haven't been allocated
1608 	 * yet, or by some tracers (like irqsoff), that just want to
1609 	 * know if the ring buffer has been disabled, but it can handle
1610 	 * races of where it gets disabled but we still do a record.
1611 	 * As the check is in the fast path of the tracers, it is more
1612 	 * important to be fast than accurate.
1613 	 */
1614 	tr->buffer_disabled = 1;
1615 	/* Make the flag seen by readers */
1616 	smp_wmb();
1617 }
1618 
1619 /**
1620  * tracing_off - turn off tracing buffers
1621  *
1622  * This function stops the tracing buffers from recording data.
1623  * It does not disable any overhead the tracers themselves may
1624  * be causing. This function simply causes all recording to
1625  * the ring buffers to fail.
1626  */
1627 void tracing_off(void)
1628 {
1629 	tracer_tracing_off(&global_trace);
1630 }
1631 EXPORT_SYMBOL_GPL(tracing_off);
1632 
1633 void disable_trace_on_warning(void)
1634 {
1635 	if (__disable_trace_on_warning) {
1636 		trace_array_printk_buf(global_trace.array_buffer.buffer, _THIS_IP_,
1637 			"Disabling tracing due to warning\n");
1638 		tracing_off();
1639 	}
1640 }
1641 
1642 /**
1643  * tracer_tracing_is_on - show real state of ring buffer enabled
1644  * @tr : the trace array to know if ring buffer is enabled
1645  *
1646  * Shows real state of the ring buffer if it is enabled or not.
1647  */
1648 bool tracer_tracing_is_on(struct trace_array *tr)
1649 {
1650 	if (tr->array_buffer.buffer)
1651 		return ring_buffer_record_is_set_on(tr->array_buffer.buffer);
1652 	return !tr->buffer_disabled;
1653 }
1654 
1655 /**
1656  * tracing_is_on - show state of ring buffers enabled
1657  */
1658 int tracing_is_on(void)
1659 {
1660 	return tracer_tracing_is_on(&global_trace);
1661 }
1662 EXPORT_SYMBOL_GPL(tracing_is_on);
1663 
1664 static int __init set_buf_size(char *str)
1665 {
1666 	unsigned long buf_size;
1667 
1668 	if (!str)
1669 		return 0;
1670 	buf_size = memparse(str, &str);
1671 	/*
1672 	 * nr_entries can not be zero and the startup
1673 	 * tests require some buffer space. Therefore
1674 	 * ensure we have at least 4096 bytes of buffer.
1675 	 */
1676 	trace_buf_size = max(4096UL, buf_size);
1677 	return 1;
1678 }
1679 __setup("trace_buf_size=", set_buf_size);
1680 
1681 static int __init set_tracing_thresh(char *str)
1682 {
1683 	unsigned long threshold;
1684 	int ret;
1685 
1686 	if (!str)
1687 		return 0;
1688 	ret = kstrtoul(str, 0, &threshold);
1689 	if (ret < 0)
1690 		return 0;
1691 	tracing_thresh = threshold * 1000;
1692 	return 1;
1693 }
1694 __setup("tracing_thresh=", set_tracing_thresh);
1695 
1696 unsigned long nsecs_to_usecs(unsigned long nsecs)
1697 {
1698 	return nsecs / 1000;
1699 }
1700 
1701 /*
1702  * TRACE_FLAGS is defined as a tuple matching bit masks with strings.
1703  * It uses C(a, b) where 'a' is the eval (enum) name and 'b' is the string that
1704  * matches it. By defining "C(a, b) b", TRACE_FLAGS becomes a list
1705  * of strings in the order that the evals (enum) were defined.
1706  */
1707 #undef C
1708 #define C(a, b) b
1709 
1710 /* These must match the bit positions in trace_iterator_flags */
1711 static const char *trace_options[] = {
1712 	TRACE_FLAGS
1713 	NULL
1714 };
1715 
1716 static struct {
1717 	u64 (*func)(void);
1718 	const char *name;
1719 	int in_ns;		/* is this clock in nanoseconds? */
1720 } trace_clocks[] = {
1721 	{ trace_clock_local,		"local",	1 },
1722 	{ trace_clock_global,		"global",	1 },
1723 	{ trace_clock_counter,		"counter",	0 },
1724 	{ trace_clock_jiffies,		"uptime",	0 },
1725 	{ trace_clock,			"perf",		1 },
1726 	{ ktime_get_mono_fast_ns,	"mono",		1 },
1727 	{ ktime_get_raw_fast_ns,	"mono_raw",	1 },
1728 	{ ktime_get_boot_fast_ns,	"boot",		1 },
1729 	{ ktime_get_tai_fast_ns,	"tai",		1 },
1730 	ARCH_TRACE_CLOCKS
1731 };
1732 
1733 bool trace_clock_in_ns(struct trace_array *tr)
1734 {
1735 	if (trace_clocks[tr->clock_id].in_ns)
1736 		return true;
1737 
1738 	return false;
1739 }
1740 
1741 /*
1742  * trace_parser_get_init - gets the buffer for trace parser
1743  */
1744 int trace_parser_get_init(struct trace_parser *parser, int size)
1745 {
1746 	memset(parser, 0, sizeof(*parser));
1747 
1748 	parser->buffer = kmalloc(size, GFP_KERNEL);
1749 	if (!parser->buffer)
1750 		return 1;
1751 
1752 	parser->size = size;
1753 	return 0;
1754 }
1755 
1756 /*
1757  * trace_parser_put - frees the buffer for trace parser
1758  */
1759 void trace_parser_put(struct trace_parser *parser)
1760 {
1761 	kfree(parser->buffer);
1762 	parser->buffer = NULL;
1763 }
1764 
1765 /*
1766  * trace_get_user - reads the user input string separated by  space
1767  * (matched by isspace(ch))
1768  *
1769  * For each string found the 'struct trace_parser' is updated,
1770  * and the function returns.
1771  *
1772  * Returns number of bytes read.
1773  *
1774  * See kernel/trace/trace.h for 'struct trace_parser' details.
1775  */
1776 int trace_get_user(struct trace_parser *parser, const char __user *ubuf,
1777 	size_t cnt, loff_t *ppos)
1778 {
1779 	char ch;
1780 	size_t read = 0;
1781 	ssize_t ret;
1782 
1783 	if (!*ppos)
1784 		trace_parser_clear(parser);
1785 
1786 	ret = get_user(ch, ubuf++);
1787 	if (ret)
1788 		goto out;
1789 
1790 	read++;
1791 	cnt--;
1792 
1793 	/*
1794 	 * The parser is not finished with the last write,
1795 	 * continue reading the user input without skipping spaces.
1796 	 */
1797 	if (!parser->cont) {
1798 		/* skip white space */
1799 		while (cnt && isspace(ch)) {
1800 			ret = get_user(ch, ubuf++);
1801 			if (ret)
1802 				goto out;
1803 			read++;
1804 			cnt--;
1805 		}
1806 
1807 		parser->idx = 0;
1808 
1809 		/* only spaces were written */
1810 		if (isspace(ch) || !ch) {
1811 			*ppos += read;
1812 			ret = read;
1813 			goto out;
1814 		}
1815 	}
1816 
1817 	/* read the non-space input */
1818 	while (cnt && !isspace(ch) && ch) {
1819 		if (parser->idx < parser->size - 1)
1820 			parser->buffer[parser->idx++] = ch;
1821 		else {
1822 			ret = -EINVAL;
1823 			goto out;
1824 		}
1825 		ret = get_user(ch, ubuf++);
1826 		if (ret)
1827 			goto out;
1828 		read++;
1829 		cnt--;
1830 	}
1831 
1832 	/* We either got finished input or we have to wait for another call. */
1833 	if (isspace(ch) || !ch) {
1834 		parser->buffer[parser->idx] = 0;
1835 		parser->cont = false;
1836 	} else if (parser->idx < parser->size - 1) {
1837 		parser->cont = true;
1838 		parser->buffer[parser->idx++] = ch;
1839 		/* Make sure the parsed string always terminates with '\0'. */
1840 		parser->buffer[parser->idx] = 0;
1841 	} else {
1842 		ret = -EINVAL;
1843 		goto out;
1844 	}
1845 
1846 	*ppos += read;
1847 	ret = read;
1848 
1849 out:
1850 	return ret;
1851 }
1852 
1853 /* TODO add a seq_buf_to_buffer() */
1854 static ssize_t trace_seq_to_buffer(struct trace_seq *s, void *buf, size_t cnt)
1855 {
1856 	int len;
1857 
1858 	if (trace_seq_used(s) <= s->readpos)
1859 		return -EBUSY;
1860 
1861 	len = trace_seq_used(s) - s->readpos;
1862 	if (cnt > len)
1863 		cnt = len;
1864 	memcpy(buf, s->buffer + s->readpos, cnt);
1865 
1866 	s->readpos += cnt;
1867 	return cnt;
1868 }
1869 
1870 unsigned long __read_mostly	tracing_thresh;
1871 
1872 #ifdef CONFIG_TRACER_MAX_TRACE
1873 static const struct file_operations tracing_max_lat_fops;
1874 
1875 #ifdef LATENCY_FS_NOTIFY
1876 
1877 static struct workqueue_struct *fsnotify_wq;
1878 
1879 static void latency_fsnotify_workfn(struct work_struct *work)
1880 {
1881 	struct trace_array *tr = container_of(work, struct trace_array,
1882 					      fsnotify_work);
1883 	fsnotify_inode(tr->d_max_latency->d_inode, FS_MODIFY);
1884 }
1885 
1886 static void latency_fsnotify_workfn_irq(struct irq_work *iwork)
1887 {
1888 	struct trace_array *tr = container_of(iwork, struct trace_array,
1889 					      fsnotify_irqwork);
1890 	queue_work(fsnotify_wq, &tr->fsnotify_work);
1891 }
1892 
1893 static void trace_create_maxlat_file(struct trace_array *tr,
1894 				     struct dentry *d_tracer)
1895 {
1896 	INIT_WORK(&tr->fsnotify_work, latency_fsnotify_workfn);
1897 	init_irq_work(&tr->fsnotify_irqwork, latency_fsnotify_workfn_irq);
1898 	tr->d_max_latency = trace_create_file("tracing_max_latency",
1899 					      TRACE_MODE_WRITE,
1900 					      d_tracer, tr,
1901 					      &tracing_max_lat_fops);
1902 }
1903 
1904 __init static int latency_fsnotify_init(void)
1905 {
1906 	fsnotify_wq = alloc_workqueue("tr_max_lat_wq",
1907 				      WQ_UNBOUND | WQ_HIGHPRI, 0);
1908 	if (!fsnotify_wq) {
1909 		pr_err("Unable to allocate tr_max_lat_wq\n");
1910 		return -ENOMEM;
1911 	}
1912 	return 0;
1913 }
1914 
1915 late_initcall_sync(latency_fsnotify_init);
1916 
1917 void latency_fsnotify(struct trace_array *tr)
1918 {
1919 	if (!fsnotify_wq)
1920 		return;
1921 	/*
1922 	 * We cannot call queue_work(&tr->fsnotify_work) from here because it's
1923 	 * possible that we are called from __schedule() or do_idle(), which
1924 	 * could cause a deadlock.
1925 	 */
1926 	irq_work_queue(&tr->fsnotify_irqwork);
1927 }
1928 
1929 #else /* !LATENCY_FS_NOTIFY */
1930 
1931 #define trace_create_maxlat_file(tr, d_tracer)				\
1932 	trace_create_file("tracing_max_latency", TRACE_MODE_WRITE,	\
1933 			  d_tracer, tr, &tracing_max_lat_fops)
1934 
1935 #endif
1936 
1937 /*
1938  * Copy the new maximum trace into the separate maximum-trace
1939  * structure. (this way the maximum trace is permanently saved,
1940  * for later retrieval via /sys/kernel/tracing/tracing_max_latency)
1941  */
1942 static void
1943 __update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
1944 {
1945 	struct array_buffer *trace_buf = &tr->array_buffer;
1946 	struct array_buffer *max_buf = &tr->max_buffer;
1947 	struct trace_array_cpu *data = per_cpu_ptr(trace_buf->data, cpu);
1948 	struct trace_array_cpu *max_data = per_cpu_ptr(max_buf->data, cpu);
1949 
1950 	max_buf->cpu = cpu;
1951 	max_buf->time_start = data->preempt_timestamp;
1952 
1953 	max_data->saved_latency = tr->max_latency;
1954 	max_data->critical_start = data->critical_start;
1955 	max_data->critical_end = data->critical_end;
1956 
1957 	strscpy(max_data->comm, tsk->comm);
1958 	max_data->pid = tsk->pid;
1959 	/*
1960 	 * If tsk == current, then use current_uid(), as that does not use
1961 	 * RCU. The irq tracer can be called out of RCU scope.
1962 	 */
1963 	if (tsk == current)
1964 		max_data->uid = current_uid();
1965 	else
1966 		max_data->uid = task_uid(tsk);
1967 
1968 	max_data->nice = tsk->static_prio - 20 - MAX_RT_PRIO;
1969 	max_data->policy = tsk->policy;
1970 	max_data->rt_priority = tsk->rt_priority;
1971 
1972 	/* record this tasks comm */
1973 	tracing_record_cmdline(tsk);
1974 	latency_fsnotify(tr);
1975 }
1976 
1977 /**
1978  * update_max_tr - snapshot all trace buffers from global_trace to max_tr
1979  * @tr: tracer
1980  * @tsk: the task with the latency
1981  * @cpu: The cpu that initiated the trace.
1982  * @cond_data: User data associated with a conditional snapshot
1983  *
1984  * Flip the buffers between the @tr and the max_tr and record information
1985  * about which task was the cause of this latency.
1986  */
1987 void
1988 update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu,
1989 	      void *cond_data)
1990 {
1991 	if (tr->stop_count)
1992 		return;
1993 
1994 	WARN_ON_ONCE(!irqs_disabled());
1995 
1996 	if (!tr->allocated_snapshot) {
1997 		/* Only the nop tracer should hit this when disabling */
1998 		WARN_ON_ONCE(tr->current_trace != &nop_trace);
1999 		return;
2000 	}
2001 
2002 	arch_spin_lock(&tr->max_lock);
2003 
2004 	/* Inherit the recordable setting from array_buffer */
2005 	if (ring_buffer_record_is_set_on(tr->array_buffer.buffer))
2006 		ring_buffer_record_on(tr->max_buffer.buffer);
2007 	else
2008 		ring_buffer_record_off(tr->max_buffer.buffer);
2009 
2010 #ifdef CONFIG_TRACER_SNAPSHOT
2011 	if (tr->cond_snapshot && !tr->cond_snapshot->update(tr, cond_data)) {
2012 		arch_spin_unlock(&tr->max_lock);
2013 		return;
2014 	}
2015 #endif
2016 	swap(tr->array_buffer.buffer, tr->max_buffer.buffer);
2017 
2018 	__update_max_tr(tr, tsk, cpu);
2019 
2020 	arch_spin_unlock(&tr->max_lock);
2021 
2022 	/* Any waiters on the old snapshot buffer need to wake up */
2023 	ring_buffer_wake_waiters(tr->array_buffer.buffer, RING_BUFFER_ALL_CPUS);
2024 }
2025 
2026 /**
2027  * update_max_tr_single - only copy one trace over, and reset the rest
2028  * @tr: tracer
2029  * @tsk: task with the latency
2030  * @cpu: the cpu of the buffer to copy.
2031  *
2032  * Flip the trace of a single CPU buffer between the @tr and the max_tr.
2033  */
2034 void
2035 update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu)
2036 {
2037 	int ret;
2038 
2039 	if (tr->stop_count)
2040 		return;
2041 
2042 	WARN_ON_ONCE(!irqs_disabled());
2043 	if (!tr->allocated_snapshot) {
2044 		/* Only the nop tracer should hit this when disabling */
2045 		WARN_ON_ONCE(tr->current_trace != &nop_trace);
2046 		return;
2047 	}
2048 
2049 	arch_spin_lock(&tr->max_lock);
2050 
2051 	ret = ring_buffer_swap_cpu(tr->max_buffer.buffer, tr->array_buffer.buffer, cpu);
2052 
2053 	if (ret == -EBUSY) {
2054 		/*
2055 		 * We failed to swap the buffer due to a commit taking
2056 		 * place on this CPU. We fail to record, but we reset
2057 		 * the max trace buffer (no one writes directly to it)
2058 		 * and flag that it failed.
2059 		 * Another reason is resize is in progress.
2060 		 */
2061 		trace_array_printk_buf(tr->max_buffer.buffer, _THIS_IP_,
2062 			"Failed to swap buffers due to commit or resize in progress\n");
2063 	}
2064 
2065 	WARN_ON_ONCE(ret && ret != -EAGAIN && ret != -EBUSY);
2066 
2067 	__update_max_tr(tr, tsk, cpu);
2068 	arch_spin_unlock(&tr->max_lock);
2069 }
2070 
2071 #endif /* CONFIG_TRACER_MAX_TRACE */
2072 
2073 struct pipe_wait {
2074 	struct trace_iterator		*iter;
2075 	int				wait_index;
2076 };
2077 
2078 static bool wait_pipe_cond(void *data)
2079 {
2080 	struct pipe_wait *pwait = data;
2081 	struct trace_iterator *iter = pwait->iter;
2082 
2083 	if (atomic_read_acquire(&iter->wait_index) != pwait->wait_index)
2084 		return true;
2085 
2086 	return iter->closed;
2087 }
2088 
2089 static int wait_on_pipe(struct trace_iterator *iter, int full)
2090 {
2091 	struct pipe_wait pwait;
2092 	int ret;
2093 
2094 	/* Iterators are static, they should be filled or empty */
2095 	if (trace_buffer_iter(iter, iter->cpu_file))
2096 		return 0;
2097 
2098 	pwait.wait_index = atomic_read_acquire(&iter->wait_index);
2099 	pwait.iter = iter;
2100 
2101 	ret = ring_buffer_wait(iter->array_buffer->buffer, iter->cpu_file, full,
2102 			       wait_pipe_cond, &pwait);
2103 
2104 #ifdef CONFIG_TRACER_MAX_TRACE
2105 	/*
2106 	 * Make sure this is still the snapshot buffer, as if a snapshot were
2107 	 * to happen, this would now be the main buffer.
2108 	 */
2109 	if (iter->snapshot)
2110 		iter->array_buffer = &iter->tr->max_buffer;
2111 #endif
2112 	return ret;
2113 }
2114 
2115 #ifdef CONFIG_FTRACE_STARTUP_TEST
2116 static bool selftests_can_run;
2117 
2118 struct trace_selftests {
2119 	struct list_head		list;
2120 	struct tracer			*type;
2121 };
2122 
2123 static LIST_HEAD(postponed_selftests);
2124 
2125 static int save_selftest(struct tracer *type)
2126 {
2127 	struct trace_selftests *selftest;
2128 
2129 	selftest = kmalloc(sizeof(*selftest), GFP_KERNEL);
2130 	if (!selftest)
2131 		return -ENOMEM;
2132 
2133 	selftest->type = type;
2134 	list_add(&selftest->list, &postponed_selftests);
2135 	return 0;
2136 }
2137 
2138 static int run_tracer_selftest(struct tracer *type)
2139 {
2140 	struct trace_array *tr = &global_trace;
2141 	struct tracer *saved_tracer = tr->current_trace;
2142 	int ret;
2143 
2144 	if (!type->selftest || tracing_selftest_disabled)
2145 		return 0;
2146 
2147 	/*
2148 	 * If a tracer registers early in boot up (before scheduling is
2149 	 * initialized and such), then do not run its selftests yet.
2150 	 * Instead, run it a little later in the boot process.
2151 	 */
2152 	if (!selftests_can_run)
2153 		return save_selftest(type);
2154 
2155 	if (!tracing_is_on()) {
2156 		pr_warn("Selftest for tracer %s skipped due to tracing disabled\n",
2157 			type->name);
2158 		return 0;
2159 	}
2160 
2161 	/*
2162 	 * Run a selftest on this tracer.
2163 	 * Here we reset the trace buffer, and set the current
2164 	 * tracer to be this tracer. The tracer can then run some
2165 	 * internal tracing to verify that everything is in order.
2166 	 * If we fail, we do not register this tracer.
2167 	 */
2168 	tracing_reset_online_cpus(&tr->array_buffer);
2169 
2170 	tr->current_trace = type;
2171 
2172 #ifdef CONFIG_TRACER_MAX_TRACE
2173 	if (type->use_max_tr) {
2174 		/* If we expanded the buffers, make sure the max is expanded too */
2175 		if (tr->ring_buffer_expanded)
2176 			ring_buffer_resize(tr->max_buffer.buffer, trace_buf_size,
2177 					   RING_BUFFER_ALL_CPUS);
2178 		tr->allocated_snapshot = true;
2179 	}
2180 #endif
2181 
2182 	/* the test is responsible for initializing and enabling */
2183 	pr_info("Testing tracer %s: ", type->name);
2184 	ret = type->selftest(type, tr);
2185 	/* the test is responsible for resetting too */
2186 	tr->current_trace = saved_tracer;
2187 	if (ret) {
2188 		printk(KERN_CONT "FAILED!\n");
2189 		/* Add the warning after printing 'FAILED' */
2190 		WARN_ON(1);
2191 		return -1;
2192 	}
2193 	/* Only reset on passing, to avoid touching corrupted buffers */
2194 	tracing_reset_online_cpus(&tr->array_buffer);
2195 
2196 #ifdef CONFIG_TRACER_MAX_TRACE
2197 	if (type->use_max_tr) {
2198 		tr->allocated_snapshot = false;
2199 
2200 		/* Shrink the max buffer again */
2201 		if (tr->ring_buffer_expanded)
2202 			ring_buffer_resize(tr->max_buffer.buffer, 1,
2203 					   RING_BUFFER_ALL_CPUS);
2204 	}
2205 #endif
2206 
2207 	printk(KERN_CONT "PASSED\n");
2208 	return 0;
2209 }
2210 
2211 static int do_run_tracer_selftest(struct tracer *type)
2212 {
2213 	int ret;
2214 
2215 	/*
2216 	 * Tests can take a long time, especially if they are run one after the
2217 	 * other, as does happen during bootup when all the tracers are
2218 	 * registered. This could cause the soft lockup watchdog to trigger.
2219 	 */
2220 	cond_resched();
2221 
2222 	tracing_selftest_running = true;
2223 	ret = run_tracer_selftest(type);
2224 	tracing_selftest_running = false;
2225 
2226 	return ret;
2227 }
2228 
2229 static __init int init_trace_selftests(void)
2230 {
2231 	struct trace_selftests *p, *n;
2232 	struct tracer *t, **last;
2233 	int ret;
2234 
2235 	selftests_can_run = true;
2236 
2237 	guard(mutex)(&trace_types_lock);
2238 
2239 	if (list_empty(&postponed_selftests))
2240 		return 0;
2241 
2242 	pr_info("Running postponed tracer tests:\n");
2243 
2244 	tracing_selftest_running = true;
2245 	list_for_each_entry_safe(p, n, &postponed_selftests, list) {
2246 		/* This loop can take minutes when sanitizers are enabled, so
2247 		 * lets make sure we allow RCU processing.
2248 		 */
2249 		cond_resched();
2250 		ret = run_tracer_selftest(p->type);
2251 		/* If the test fails, then warn and remove from available_tracers */
2252 		if (ret < 0) {
2253 			WARN(1, "tracer: %s failed selftest, disabling\n",
2254 			     p->type->name);
2255 			last = &trace_types;
2256 			for (t = trace_types; t; t = t->next) {
2257 				if (t == p->type) {
2258 					*last = t->next;
2259 					break;
2260 				}
2261 				last = &t->next;
2262 			}
2263 		}
2264 		list_del(&p->list);
2265 		kfree(p);
2266 	}
2267 	tracing_selftest_running = false;
2268 
2269 	return 0;
2270 }
2271 core_initcall(init_trace_selftests);
2272 #else
2273 static inline int do_run_tracer_selftest(struct tracer *type)
2274 {
2275 	return 0;
2276 }
2277 #endif /* CONFIG_FTRACE_STARTUP_TEST */
2278 
2279 static void add_tracer_options(struct trace_array *tr, struct tracer *t);
2280 
2281 static void __init apply_trace_boot_options(void);
2282 
2283 /**
2284  * register_tracer - register a tracer with the ftrace system.
2285  * @type: the plugin for the tracer
2286  *
2287  * Register a new plugin tracer.
2288  */
2289 int __init register_tracer(struct tracer *type)
2290 {
2291 	struct tracer *t;
2292 	int ret = 0;
2293 
2294 	if (!type->name) {
2295 		pr_info("Tracer must have a name\n");
2296 		return -1;
2297 	}
2298 
2299 	if (strlen(type->name) >= MAX_TRACER_SIZE) {
2300 		pr_info("Tracer has a name longer than %d\n", MAX_TRACER_SIZE);
2301 		return -1;
2302 	}
2303 
2304 	if (security_locked_down(LOCKDOWN_TRACEFS)) {
2305 		pr_warn("Can not register tracer %s due to lockdown\n",
2306 			   type->name);
2307 		return -EPERM;
2308 	}
2309 
2310 	mutex_lock(&trace_types_lock);
2311 
2312 	for (t = trace_types; t; t = t->next) {
2313 		if (strcmp(type->name, t->name) == 0) {
2314 			/* already found */
2315 			pr_info("Tracer %s already registered\n",
2316 				type->name);
2317 			ret = -1;
2318 			goto out;
2319 		}
2320 	}
2321 
2322 	if (!type->set_flag)
2323 		type->set_flag = &dummy_set_flag;
2324 	if (!type->flags) {
2325 		/*allocate a dummy tracer_flags*/
2326 		type->flags = kmalloc(sizeof(*type->flags), GFP_KERNEL);
2327 		if (!type->flags) {
2328 			ret = -ENOMEM;
2329 			goto out;
2330 		}
2331 		type->flags->val = 0;
2332 		type->flags->opts = dummy_tracer_opt;
2333 	} else
2334 		if (!type->flags->opts)
2335 			type->flags->opts = dummy_tracer_opt;
2336 
2337 	/* store the tracer for __set_tracer_option */
2338 	type->flags->trace = type;
2339 
2340 	ret = do_run_tracer_selftest(type);
2341 	if (ret < 0)
2342 		goto out;
2343 
2344 	type->next = trace_types;
2345 	trace_types = type;
2346 	add_tracer_options(&global_trace, type);
2347 
2348  out:
2349 	mutex_unlock(&trace_types_lock);
2350 
2351 	if (ret || !default_bootup_tracer)
2352 		goto out_unlock;
2353 
2354 	if (strncmp(default_bootup_tracer, type->name, MAX_TRACER_SIZE))
2355 		goto out_unlock;
2356 
2357 	printk(KERN_INFO "Starting tracer '%s'\n", type->name);
2358 	/* Do we want this tracer to start on bootup? */
2359 	tracing_set_tracer(&global_trace, type->name);
2360 	default_bootup_tracer = NULL;
2361 
2362 	apply_trace_boot_options();
2363 
2364 	/* disable other selftests, since this will break it. */
2365 	disable_tracing_selftest("running a tracer");
2366 
2367  out_unlock:
2368 	return ret;
2369 }
2370 
2371 static void tracing_reset_cpu(struct array_buffer *buf, int cpu)
2372 {
2373 	struct trace_buffer *buffer = buf->buffer;
2374 
2375 	if (!buffer)
2376 		return;
2377 
2378 	ring_buffer_record_disable(buffer);
2379 
2380 	/* Make sure all commits have finished */
2381 	synchronize_rcu();
2382 	ring_buffer_reset_cpu(buffer, cpu);
2383 
2384 	ring_buffer_record_enable(buffer);
2385 }
2386 
2387 void tracing_reset_online_cpus(struct array_buffer *buf)
2388 {
2389 	struct trace_buffer *buffer = buf->buffer;
2390 
2391 	if (!buffer)
2392 		return;
2393 
2394 	ring_buffer_record_disable(buffer);
2395 
2396 	/* Make sure all commits have finished */
2397 	synchronize_rcu();
2398 
2399 	buf->time_start = buffer_ftrace_now(buf, buf->cpu);
2400 
2401 	ring_buffer_reset_online_cpus(buffer);
2402 
2403 	ring_buffer_record_enable(buffer);
2404 }
2405 
2406 static void tracing_reset_all_cpus(struct array_buffer *buf)
2407 {
2408 	struct trace_buffer *buffer = buf->buffer;
2409 
2410 	if (!buffer)
2411 		return;
2412 
2413 	ring_buffer_record_disable(buffer);
2414 
2415 	/* Make sure all commits have finished */
2416 	synchronize_rcu();
2417 
2418 	buf->time_start = buffer_ftrace_now(buf, buf->cpu);
2419 
2420 	ring_buffer_reset(buffer);
2421 
2422 	ring_buffer_record_enable(buffer);
2423 }
2424 
2425 /* Must have trace_types_lock held */
2426 void tracing_reset_all_online_cpus_unlocked(void)
2427 {
2428 	struct trace_array *tr;
2429 
2430 	lockdep_assert_held(&trace_types_lock);
2431 
2432 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
2433 		if (!tr->clear_trace)
2434 			continue;
2435 		tr->clear_trace = false;
2436 		tracing_reset_online_cpus(&tr->array_buffer);
2437 #ifdef CONFIG_TRACER_MAX_TRACE
2438 		tracing_reset_online_cpus(&tr->max_buffer);
2439 #endif
2440 	}
2441 }
2442 
2443 void tracing_reset_all_online_cpus(void)
2444 {
2445 	mutex_lock(&trace_types_lock);
2446 	tracing_reset_all_online_cpus_unlocked();
2447 	mutex_unlock(&trace_types_lock);
2448 }
2449 
2450 int is_tracing_stopped(void)
2451 {
2452 	return global_trace.stop_count;
2453 }
2454 
2455 static void tracing_start_tr(struct trace_array *tr)
2456 {
2457 	struct trace_buffer *buffer;
2458 	unsigned long flags;
2459 
2460 	if (tracing_disabled)
2461 		return;
2462 
2463 	raw_spin_lock_irqsave(&tr->start_lock, flags);
2464 	if (--tr->stop_count) {
2465 		if (WARN_ON_ONCE(tr->stop_count < 0)) {
2466 			/* Someone screwed up their debugging */
2467 			tr->stop_count = 0;
2468 		}
2469 		goto out;
2470 	}
2471 
2472 	/* Prevent the buffers from switching */
2473 	arch_spin_lock(&tr->max_lock);
2474 
2475 	buffer = tr->array_buffer.buffer;
2476 	if (buffer)
2477 		ring_buffer_record_enable(buffer);
2478 
2479 #ifdef CONFIG_TRACER_MAX_TRACE
2480 	buffer = tr->max_buffer.buffer;
2481 	if (buffer)
2482 		ring_buffer_record_enable(buffer);
2483 #endif
2484 
2485 	arch_spin_unlock(&tr->max_lock);
2486 
2487  out:
2488 	raw_spin_unlock_irqrestore(&tr->start_lock, flags);
2489 }
2490 
2491 /**
2492  * tracing_start - quick start of the tracer
2493  *
2494  * If tracing is enabled but was stopped by tracing_stop,
2495  * this will start the tracer back up.
2496  */
2497 void tracing_start(void)
2498 
2499 {
2500 	return tracing_start_tr(&global_trace);
2501 }
2502 
2503 static void tracing_stop_tr(struct trace_array *tr)
2504 {
2505 	struct trace_buffer *buffer;
2506 	unsigned long flags;
2507 
2508 	raw_spin_lock_irqsave(&tr->start_lock, flags);
2509 	if (tr->stop_count++)
2510 		goto out;
2511 
2512 	/* Prevent the buffers from switching */
2513 	arch_spin_lock(&tr->max_lock);
2514 
2515 	buffer = tr->array_buffer.buffer;
2516 	if (buffer)
2517 		ring_buffer_record_disable(buffer);
2518 
2519 #ifdef CONFIG_TRACER_MAX_TRACE
2520 	buffer = tr->max_buffer.buffer;
2521 	if (buffer)
2522 		ring_buffer_record_disable(buffer);
2523 #endif
2524 
2525 	arch_spin_unlock(&tr->max_lock);
2526 
2527  out:
2528 	raw_spin_unlock_irqrestore(&tr->start_lock, flags);
2529 }
2530 
2531 /**
2532  * tracing_stop - quick stop of the tracer
2533  *
2534  * Light weight way to stop tracing. Use in conjunction with
2535  * tracing_start.
2536  */
2537 void tracing_stop(void)
2538 {
2539 	return tracing_stop_tr(&global_trace);
2540 }
2541 
2542 /*
2543  * Several functions return TRACE_TYPE_PARTIAL_LINE if the trace_seq
2544  * overflowed, and TRACE_TYPE_HANDLED otherwise. This helper function
2545  * simplifies those functions and keeps them in sync.
2546  */
2547 enum print_line_t trace_handle_return(struct trace_seq *s)
2548 {
2549 	return trace_seq_has_overflowed(s) ?
2550 		TRACE_TYPE_PARTIAL_LINE : TRACE_TYPE_HANDLED;
2551 }
2552 EXPORT_SYMBOL_GPL(trace_handle_return);
2553 
2554 static unsigned short migration_disable_value(void)
2555 {
2556 #if defined(CONFIG_SMP)
2557 	return current->migration_disabled;
2558 #else
2559 	return 0;
2560 #endif
2561 }
2562 
2563 unsigned int tracing_gen_ctx_irq_test(unsigned int irqs_status)
2564 {
2565 	unsigned int trace_flags = irqs_status;
2566 	unsigned int pc;
2567 
2568 	pc = preempt_count();
2569 
2570 	if (pc & NMI_MASK)
2571 		trace_flags |= TRACE_FLAG_NMI;
2572 	if (pc & HARDIRQ_MASK)
2573 		trace_flags |= TRACE_FLAG_HARDIRQ;
2574 	if (in_serving_softirq())
2575 		trace_flags |= TRACE_FLAG_SOFTIRQ;
2576 	if (softirq_count() >> (SOFTIRQ_SHIFT + 1))
2577 		trace_flags |= TRACE_FLAG_BH_OFF;
2578 
2579 	if (tif_need_resched())
2580 		trace_flags |= TRACE_FLAG_NEED_RESCHED;
2581 	if (test_preempt_need_resched())
2582 		trace_flags |= TRACE_FLAG_PREEMPT_RESCHED;
2583 	if (IS_ENABLED(CONFIG_ARCH_HAS_PREEMPT_LAZY) && tif_test_bit(TIF_NEED_RESCHED_LAZY))
2584 		trace_flags |= TRACE_FLAG_NEED_RESCHED_LAZY;
2585 	return (trace_flags << 16) | (min_t(unsigned int, pc & 0xff, 0xf)) |
2586 		(min_t(unsigned int, migration_disable_value(), 0xf)) << 4;
2587 }
2588 
2589 struct ring_buffer_event *
2590 trace_buffer_lock_reserve(struct trace_buffer *buffer,
2591 			  int type,
2592 			  unsigned long len,
2593 			  unsigned int trace_ctx)
2594 {
2595 	return __trace_buffer_lock_reserve(buffer, type, len, trace_ctx);
2596 }
2597 
2598 DEFINE_PER_CPU(struct ring_buffer_event *, trace_buffered_event);
2599 DEFINE_PER_CPU(int, trace_buffered_event_cnt);
2600 static int trace_buffered_event_ref;
2601 
2602 /**
2603  * trace_buffered_event_enable - enable buffering events
2604  *
2605  * When events are being filtered, it is quicker to use a temporary
2606  * buffer to write the event data into if there's a likely chance
2607  * that it will not be committed. The discard of the ring buffer
2608  * is not as fast as committing, and is much slower than copying
2609  * a commit.
2610  *
2611  * When an event is to be filtered, allocate per cpu buffers to
2612  * write the event data into, and if the event is filtered and discarded
2613  * it is simply dropped, otherwise, the entire data is to be committed
2614  * in one shot.
2615  */
2616 void trace_buffered_event_enable(void)
2617 {
2618 	struct ring_buffer_event *event;
2619 	struct page *page;
2620 	int cpu;
2621 
2622 	WARN_ON_ONCE(!mutex_is_locked(&event_mutex));
2623 
2624 	if (trace_buffered_event_ref++)
2625 		return;
2626 
2627 	for_each_tracing_cpu(cpu) {
2628 		page = alloc_pages_node(cpu_to_node(cpu),
2629 					GFP_KERNEL | __GFP_NORETRY, 0);
2630 		/* This is just an optimization and can handle failures */
2631 		if (!page) {
2632 			pr_err("Failed to allocate event buffer\n");
2633 			break;
2634 		}
2635 
2636 		event = page_address(page);
2637 		memset(event, 0, sizeof(*event));
2638 
2639 		per_cpu(trace_buffered_event, cpu) = event;
2640 
2641 		preempt_disable();
2642 		if (cpu == smp_processor_id() &&
2643 		    __this_cpu_read(trace_buffered_event) !=
2644 		    per_cpu(trace_buffered_event, cpu))
2645 			WARN_ON_ONCE(1);
2646 		preempt_enable();
2647 	}
2648 }
2649 
2650 static void enable_trace_buffered_event(void *data)
2651 {
2652 	/* Probably not needed, but do it anyway */
2653 	smp_rmb();
2654 	this_cpu_dec(trace_buffered_event_cnt);
2655 }
2656 
2657 static void disable_trace_buffered_event(void *data)
2658 {
2659 	this_cpu_inc(trace_buffered_event_cnt);
2660 }
2661 
2662 /**
2663  * trace_buffered_event_disable - disable buffering events
2664  *
2665  * When a filter is removed, it is faster to not use the buffered
2666  * events, and to commit directly into the ring buffer. Free up
2667  * the temp buffers when there are no more users. This requires
2668  * special synchronization with current events.
2669  */
2670 void trace_buffered_event_disable(void)
2671 {
2672 	int cpu;
2673 
2674 	WARN_ON_ONCE(!mutex_is_locked(&event_mutex));
2675 
2676 	if (WARN_ON_ONCE(!trace_buffered_event_ref))
2677 		return;
2678 
2679 	if (--trace_buffered_event_ref)
2680 		return;
2681 
2682 	/* For each CPU, set the buffer as used. */
2683 	on_each_cpu_mask(tracing_buffer_mask, disable_trace_buffered_event,
2684 			 NULL, true);
2685 
2686 	/* Wait for all current users to finish */
2687 	synchronize_rcu();
2688 
2689 	for_each_tracing_cpu(cpu) {
2690 		free_page((unsigned long)per_cpu(trace_buffered_event, cpu));
2691 		per_cpu(trace_buffered_event, cpu) = NULL;
2692 	}
2693 
2694 	/*
2695 	 * Wait for all CPUs that potentially started checking if they can use
2696 	 * their event buffer only after the previous synchronize_rcu() call and
2697 	 * they still read a valid pointer from trace_buffered_event. It must be
2698 	 * ensured they don't see cleared trace_buffered_event_cnt else they
2699 	 * could wrongly decide to use the pointed-to buffer which is now freed.
2700 	 */
2701 	synchronize_rcu();
2702 
2703 	/* For each CPU, relinquish the buffer */
2704 	on_each_cpu_mask(tracing_buffer_mask, enable_trace_buffered_event, NULL,
2705 			 true);
2706 }
2707 
2708 static struct trace_buffer *temp_buffer;
2709 
2710 struct ring_buffer_event *
2711 trace_event_buffer_lock_reserve(struct trace_buffer **current_rb,
2712 			  struct trace_event_file *trace_file,
2713 			  int type, unsigned long len,
2714 			  unsigned int trace_ctx)
2715 {
2716 	struct ring_buffer_event *entry;
2717 	struct trace_array *tr = trace_file->tr;
2718 	int val;
2719 
2720 	*current_rb = tr->array_buffer.buffer;
2721 
2722 	if (!tr->no_filter_buffering_ref &&
2723 	    (trace_file->flags & (EVENT_FILE_FL_SOFT_DISABLED | EVENT_FILE_FL_FILTERED))) {
2724 		preempt_disable_notrace();
2725 		/*
2726 		 * Filtering is on, so try to use the per cpu buffer first.
2727 		 * This buffer will simulate a ring_buffer_event,
2728 		 * where the type_len is zero and the array[0] will
2729 		 * hold the full length.
2730 		 * (see include/linux/ring-buffer.h for details on
2731 		 *  how the ring_buffer_event is structured).
2732 		 *
2733 		 * Using a temp buffer during filtering and copying it
2734 		 * on a matched filter is quicker than writing directly
2735 		 * into the ring buffer and then discarding it when
2736 		 * it doesn't match. That is because the discard
2737 		 * requires several atomic operations to get right.
2738 		 * Copying on match and doing nothing on a failed match
2739 		 * is still quicker than no copy on match, but having
2740 		 * to discard out of the ring buffer on a failed match.
2741 		 */
2742 		if ((entry = __this_cpu_read(trace_buffered_event))) {
2743 			int max_len = PAGE_SIZE - struct_size(entry, array, 1);
2744 
2745 			val = this_cpu_inc_return(trace_buffered_event_cnt);
2746 
2747 			/*
2748 			 * Preemption is disabled, but interrupts and NMIs
2749 			 * can still come in now. If that happens after
2750 			 * the above increment, then it will have to go
2751 			 * back to the old method of allocating the event
2752 			 * on the ring buffer, and if the filter fails, it
2753 			 * will have to call ring_buffer_discard_commit()
2754 			 * to remove it.
2755 			 *
2756 			 * Need to also check the unlikely case that the
2757 			 * length is bigger than the temp buffer size.
2758 			 * If that happens, then the reserve is pretty much
2759 			 * guaranteed to fail, as the ring buffer currently
2760 			 * only allows events less than a page. But that may
2761 			 * change in the future, so let the ring buffer reserve
2762 			 * handle the failure in that case.
2763 			 */
2764 			if (val == 1 && likely(len <= max_len)) {
2765 				trace_event_setup(entry, type, trace_ctx);
2766 				entry->array[0] = len;
2767 				/* Return with preemption disabled */
2768 				return entry;
2769 			}
2770 			this_cpu_dec(trace_buffered_event_cnt);
2771 		}
2772 		/* __trace_buffer_lock_reserve() disables preemption */
2773 		preempt_enable_notrace();
2774 	}
2775 
2776 	entry = __trace_buffer_lock_reserve(*current_rb, type, len,
2777 					    trace_ctx);
2778 	/*
2779 	 * If tracing is off, but we have triggers enabled
2780 	 * we still need to look at the event data. Use the temp_buffer
2781 	 * to store the trace event for the trigger to use. It's recursive
2782 	 * safe and will not be recorded anywhere.
2783 	 */
2784 	if (!entry && trace_file->flags & EVENT_FILE_FL_TRIGGER_COND) {
2785 		*current_rb = temp_buffer;
2786 		entry = __trace_buffer_lock_reserve(*current_rb, type, len,
2787 						    trace_ctx);
2788 	}
2789 	return entry;
2790 }
2791 EXPORT_SYMBOL_GPL(trace_event_buffer_lock_reserve);
2792 
2793 static DEFINE_RAW_SPINLOCK(tracepoint_iter_lock);
2794 static DEFINE_MUTEX(tracepoint_printk_mutex);
2795 
2796 static void output_printk(struct trace_event_buffer *fbuffer)
2797 {
2798 	struct trace_event_call *event_call;
2799 	struct trace_event_file *file;
2800 	struct trace_event *event;
2801 	unsigned long flags;
2802 	struct trace_iterator *iter = tracepoint_print_iter;
2803 
2804 	/* We should never get here if iter is NULL */
2805 	if (WARN_ON_ONCE(!iter))
2806 		return;
2807 
2808 	event_call = fbuffer->trace_file->event_call;
2809 	if (!event_call || !event_call->event.funcs ||
2810 	    !event_call->event.funcs->trace)
2811 		return;
2812 
2813 	file = fbuffer->trace_file;
2814 	if (test_bit(EVENT_FILE_FL_SOFT_DISABLED_BIT, &file->flags) ||
2815 	    (unlikely(file->flags & EVENT_FILE_FL_FILTERED) &&
2816 	     !filter_match_preds(file->filter, fbuffer->entry)))
2817 		return;
2818 
2819 	event = &fbuffer->trace_file->event_call->event;
2820 
2821 	raw_spin_lock_irqsave(&tracepoint_iter_lock, flags);
2822 	trace_seq_init(&iter->seq);
2823 	iter->ent = fbuffer->entry;
2824 	event_call->event.funcs->trace(iter, 0, event);
2825 	trace_seq_putc(&iter->seq, 0);
2826 	printk("%s", iter->seq.buffer);
2827 
2828 	raw_spin_unlock_irqrestore(&tracepoint_iter_lock, flags);
2829 }
2830 
2831 int tracepoint_printk_sysctl(const struct ctl_table *table, int write,
2832 			     void *buffer, size_t *lenp,
2833 			     loff_t *ppos)
2834 {
2835 	int save_tracepoint_printk;
2836 	int ret;
2837 
2838 	guard(mutex)(&tracepoint_printk_mutex);
2839 	save_tracepoint_printk = tracepoint_printk;
2840 
2841 	ret = proc_dointvec(table, write, buffer, lenp, ppos);
2842 
2843 	/*
2844 	 * This will force exiting early, as tracepoint_printk
2845 	 * is always zero when tracepoint_printk_iter is not allocated
2846 	 */
2847 	if (!tracepoint_print_iter)
2848 		tracepoint_printk = 0;
2849 
2850 	if (save_tracepoint_printk == tracepoint_printk)
2851 		return ret;
2852 
2853 	if (tracepoint_printk)
2854 		static_key_enable(&tracepoint_printk_key.key);
2855 	else
2856 		static_key_disable(&tracepoint_printk_key.key);
2857 
2858 	return ret;
2859 }
2860 
2861 void trace_event_buffer_commit(struct trace_event_buffer *fbuffer)
2862 {
2863 	enum event_trigger_type tt = ETT_NONE;
2864 	struct trace_event_file *file = fbuffer->trace_file;
2865 
2866 	if (__event_trigger_test_discard(file, fbuffer->buffer, fbuffer->event,
2867 			fbuffer->entry, &tt))
2868 		goto discard;
2869 
2870 	if (static_key_false(&tracepoint_printk_key.key))
2871 		output_printk(fbuffer);
2872 
2873 	if (static_branch_unlikely(&trace_event_exports_enabled))
2874 		ftrace_exports(fbuffer->event, TRACE_EXPORT_EVENT);
2875 
2876 	trace_buffer_unlock_commit_regs(file->tr, fbuffer->buffer,
2877 			fbuffer->event, fbuffer->trace_ctx, fbuffer->regs);
2878 
2879 discard:
2880 	if (tt)
2881 		event_triggers_post_call(file, tt);
2882 
2883 }
2884 EXPORT_SYMBOL_GPL(trace_event_buffer_commit);
2885 
2886 /*
2887  * Skip 3:
2888  *
2889  *   trace_buffer_unlock_commit_regs()
2890  *   trace_event_buffer_commit()
2891  *   trace_event_raw_event_xxx()
2892  */
2893 # define STACK_SKIP 3
2894 
2895 void trace_buffer_unlock_commit_regs(struct trace_array *tr,
2896 				     struct trace_buffer *buffer,
2897 				     struct ring_buffer_event *event,
2898 				     unsigned int trace_ctx,
2899 				     struct pt_regs *regs)
2900 {
2901 	__buffer_unlock_commit(buffer, event);
2902 
2903 	/*
2904 	 * If regs is not set, then skip the necessary functions.
2905 	 * Note, we can still get here via blktrace, wakeup tracer
2906 	 * and mmiotrace, but that's ok if they lose a function or
2907 	 * two. They are not that meaningful.
2908 	 */
2909 	ftrace_trace_stack(tr, buffer, trace_ctx, regs ? 0 : STACK_SKIP, regs);
2910 	ftrace_trace_userstack(tr, buffer, trace_ctx);
2911 }
2912 
2913 /*
2914  * Similar to trace_buffer_unlock_commit_regs() but do not dump stack.
2915  */
2916 void
2917 trace_buffer_unlock_commit_nostack(struct trace_buffer *buffer,
2918 				   struct ring_buffer_event *event)
2919 {
2920 	__buffer_unlock_commit(buffer, event);
2921 }
2922 
2923 void
2924 trace_function(struct trace_array *tr, unsigned long ip, unsigned long
2925 	       parent_ip, unsigned int trace_ctx, struct ftrace_regs *fregs)
2926 {
2927 	struct trace_buffer *buffer = tr->array_buffer.buffer;
2928 	struct ring_buffer_event *event;
2929 	struct ftrace_entry *entry;
2930 	int size = sizeof(*entry);
2931 
2932 	size += FTRACE_REGS_MAX_ARGS * !!fregs * sizeof(long);
2933 
2934 	event = __trace_buffer_lock_reserve(buffer, TRACE_FN, size,
2935 					    trace_ctx);
2936 	if (!event)
2937 		return;
2938 	entry	= ring_buffer_event_data(event);
2939 	entry->ip			= ip;
2940 	entry->parent_ip		= parent_ip;
2941 
2942 #ifdef CONFIG_HAVE_FUNCTION_ARG_ACCESS_API
2943 	if (fregs) {
2944 		for (int i = 0; i < FTRACE_REGS_MAX_ARGS; i++)
2945 			entry->args[i] = ftrace_regs_get_argument(fregs, i);
2946 	}
2947 #endif
2948 
2949 	if (static_branch_unlikely(&trace_function_exports_enabled))
2950 		ftrace_exports(event, TRACE_EXPORT_FUNCTION);
2951 	__buffer_unlock_commit(buffer, event);
2952 }
2953 
2954 #ifdef CONFIG_STACKTRACE
2955 
2956 /* Allow 4 levels of nesting: normal, softirq, irq, NMI */
2957 #define FTRACE_KSTACK_NESTING	4
2958 
2959 #define FTRACE_KSTACK_ENTRIES	(SZ_4K / FTRACE_KSTACK_NESTING)
2960 
2961 struct ftrace_stack {
2962 	unsigned long		calls[FTRACE_KSTACK_ENTRIES];
2963 };
2964 
2965 
2966 struct ftrace_stacks {
2967 	struct ftrace_stack	stacks[FTRACE_KSTACK_NESTING];
2968 };
2969 
2970 static DEFINE_PER_CPU(struct ftrace_stacks, ftrace_stacks);
2971 static DEFINE_PER_CPU(int, ftrace_stack_reserve);
2972 
2973 static void __ftrace_trace_stack(struct trace_array *tr,
2974 				 struct trace_buffer *buffer,
2975 				 unsigned int trace_ctx,
2976 				 int skip, struct pt_regs *regs)
2977 {
2978 	struct ring_buffer_event *event;
2979 	unsigned int size, nr_entries;
2980 	struct ftrace_stack *fstack;
2981 	struct stack_entry *entry;
2982 	int stackidx;
2983 
2984 	/*
2985 	 * Add one, for this function and the call to save_stack_trace()
2986 	 * If regs is set, then these functions will not be in the way.
2987 	 */
2988 #ifndef CONFIG_UNWINDER_ORC
2989 	if (!regs)
2990 		skip++;
2991 #endif
2992 
2993 	preempt_disable_notrace();
2994 
2995 	stackidx = __this_cpu_inc_return(ftrace_stack_reserve) - 1;
2996 
2997 	/* This should never happen. If it does, yell once and skip */
2998 	if (WARN_ON_ONCE(stackidx >= FTRACE_KSTACK_NESTING))
2999 		goto out;
3000 
3001 	/*
3002 	 * The above __this_cpu_inc_return() is 'atomic' cpu local. An
3003 	 * interrupt will either see the value pre increment or post
3004 	 * increment. If the interrupt happens pre increment it will have
3005 	 * restored the counter when it returns.  We just need a barrier to
3006 	 * keep gcc from moving things around.
3007 	 */
3008 	barrier();
3009 
3010 	fstack = this_cpu_ptr(ftrace_stacks.stacks) + stackidx;
3011 	size = ARRAY_SIZE(fstack->calls);
3012 
3013 	if (regs) {
3014 		nr_entries = stack_trace_save_regs(regs, fstack->calls,
3015 						   size, skip);
3016 	} else {
3017 		nr_entries = stack_trace_save(fstack->calls, size, skip);
3018 	}
3019 
3020 #ifdef CONFIG_DYNAMIC_FTRACE
3021 	/* Mark entry of stack trace as trampoline code */
3022 	if (tr->ops && tr->ops->trampoline) {
3023 		unsigned long tramp_start = tr->ops->trampoline;
3024 		unsigned long tramp_end = tramp_start + tr->ops->trampoline_size;
3025 		unsigned long *calls = fstack->calls;
3026 
3027 		for (int i = 0; i < nr_entries; i++) {
3028 			if (calls[i] >= tramp_start && calls[i] < tramp_end)
3029 				calls[i] = FTRACE_TRAMPOLINE_MARKER;
3030 		}
3031 	}
3032 #endif
3033 
3034 	event = __trace_buffer_lock_reserve(buffer, TRACE_STACK,
3035 				    struct_size(entry, caller, nr_entries),
3036 				    trace_ctx);
3037 	if (!event)
3038 		goto out;
3039 	entry = ring_buffer_event_data(event);
3040 
3041 	entry->size = nr_entries;
3042 	memcpy(&entry->caller, fstack->calls,
3043 	       flex_array_size(entry, caller, nr_entries));
3044 
3045 	__buffer_unlock_commit(buffer, event);
3046 
3047  out:
3048 	/* Again, don't let gcc optimize things here */
3049 	barrier();
3050 	__this_cpu_dec(ftrace_stack_reserve);
3051 	preempt_enable_notrace();
3052 
3053 }
3054 
3055 static inline void ftrace_trace_stack(struct trace_array *tr,
3056 				      struct trace_buffer *buffer,
3057 				      unsigned int trace_ctx,
3058 				      int skip, struct pt_regs *regs)
3059 {
3060 	if (!(tr->trace_flags & TRACE_ITER_STACKTRACE))
3061 		return;
3062 
3063 	__ftrace_trace_stack(tr, buffer, trace_ctx, skip, regs);
3064 }
3065 
3066 void __trace_stack(struct trace_array *tr, unsigned int trace_ctx,
3067 		   int skip)
3068 {
3069 	struct trace_buffer *buffer = tr->array_buffer.buffer;
3070 
3071 	if (rcu_is_watching()) {
3072 		__ftrace_trace_stack(tr, buffer, trace_ctx, skip, NULL);
3073 		return;
3074 	}
3075 
3076 	if (WARN_ON_ONCE(IS_ENABLED(CONFIG_GENERIC_ENTRY)))
3077 		return;
3078 
3079 	/*
3080 	 * When an NMI triggers, RCU is enabled via ct_nmi_enter(),
3081 	 * but if the above rcu_is_watching() failed, then the NMI
3082 	 * triggered someplace critical, and ct_irq_enter() should
3083 	 * not be called from NMI.
3084 	 */
3085 	if (unlikely(in_nmi()))
3086 		return;
3087 
3088 	ct_irq_enter_irqson();
3089 	__ftrace_trace_stack(tr, buffer, trace_ctx, skip, NULL);
3090 	ct_irq_exit_irqson();
3091 }
3092 
3093 /**
3094  * trace_dump_stack - record a stack back trace in the trace buffer
3095  * @skip: Number of functions to skip (helper handlers)
3096  */
3097 void trace_dump_stack(int skip)
3098 {
3099 	if (tracing_disabled || tracing_selftest_running)
3100 		return;
3101 
3102 #ifndef CONFIG_UNWINDER_ORC
3103 	/* Skip 1 to skip this function. */
3104 	skip++;
3105 #endif
3106 	__ftrace_trace_stack(printk_trace, printk_trace->array_buffer.buffer,
3107 				tracing_gen_ctx(), skip, NULL);
3108 }
3109 EXPORT_SYMBOL_GPL(trace_dump_stack);
3110 
3111 #ifdef CONFIG_USER_STACKTRACE_SUPPORT
3112 static DEFINE_PER_CPU(int, user_stack_count);
3113 
3114 static void
3115 ftrace_trace_userstack(struct trace_array *tr,
3116 		       struct trace_buffer *buffer, unsigned int trace_ctx)
3117 {
3118 	struct ring_buffer_event *event;
3119 	struct userstack_entry *entry;
3120 
3121 	if (!(tr->trace_flags & TRACE_ITER_USERSTACKTRACE))
3122 		return;
3123 
3124 	/*
3125 	 * NMIs can not handle page faults, even with fix ups.
3126 	 * The save user stack can (and often does) fault.
3127 	 */
3128 	if (unlikely(in_nmi()))
3129 		return;
3130 
3131 	/*
3132 	 * prevent recursion, since the user stack tracing may
3133 	 * trigger other kernel events.
3134 	 */
3135 	preempt_disable();
3136 	if (__this_cpu_read(user_stack_count))
3137 		goto out;
3138 
3139 	__this_cpu_inc(user_stack_count);
3140 
3141 	event = __trace_buffer_lock_reserve(buffer, TRACE_USER_STACK,
3142 					    sizeof(*entry), trace_ctx);
3143 	if (!event)
3144 		goto out_drop_count;
3145 	entry	= ring_buffer_event_data(event);
3146 
3147 	entry->tgid		= current->tgid;
3148 	memset(&entry->caller, 0, sizeof(entry->caller));
3149 
3150 	stack_trace_save_user(entry->caller, FTRACE_STACK_ENTRIES);
3151 	__buffer_unlock_commit(buffer, event);
3152 
3153  out_drop_count:
3154 	__this_cpu_dec(user_stack_count);
3155  out:
3156 	preempt_enable();
3157 }
3158 #else /* CONFIG_USER_STACKTRACE_SUPPORT */
3159 static void ftrace_trace_userstack(struct trace_array *tr,
3160 				   struct trace_buffer *buffer,
3161 				   unsigned int trace_ctx)
3162 {
3163 }
3164 #endif /* !CONFIG_USER_STACKTRACE_SUPPORT */
3165 
3166 #endif /* CONFIG_STACKTRACE */
3167 
3168 static inline void
3169 func_repeats_set_delta_ts(struct func_repeats_entry *entry,
3170 			  unsigned long long delta)
3171 {
3172 	entry->bottom_delta_ts = delta & U32_MAX;
3173 	entry->top_delta_ts = (delta >> 32);
3174 }
3175 
3176 void trace_last_func_repeats(struct trace_array *tr,
3177 			     struct trace_func_repeats *last_info,
3178 			     unsigned int trace_ctx)
3179 {
3180 	struct trace_buffer *buffer = tr->array_buffer.buffer;
3181 	struct func_repeats_entry *entry;
3182 	struct ring_buffer_event *event;
3183 	u64 delta;
3184 
3185 	event = __trace_buffer_lock_reserve(buffer, TRACE_FUNC_REPEATS,
3186 					    sizeof(*entry), trace_ctx);
3187 	if (!event)
3188 		return;
3189 
3190 	delta = ring_buffer_event_time_stamp(buffer, event) -
3191 		last_info->ts_last_call;
3192 
3193 	entry = ring_buffer_event_data(event);
3194 	entry->ip = last_info->ip;
3195 	entry->parent_ip = last_info->parent_ip;
3196 	entry->count = last_info->count;
3197 	func_repeats_set_delta_ts(entry, delta);
3198 
3199 	__buffer_unlock_commit(buffer, event);
3200 }
3201 
3202 /* created for use with alloc_percpu */
3203 struct trace_buffer_struct {
3204 	int nesting;
3205 	char buffer[4][TRACE_BUF_SIZE];
3206 };
3207 
3208 static struct trace_buffer_struct __percpu *trace_percpu_buffer;
3209 
3210 /*
3211  * This allows for lockless recording.  If we're nested too deeply, then
3212  * this returns NULL.
3213  */
3214 static char *get_trace_buf(void)
3215 {
3216 	struct trace_buffer_struct *buffer = this_cpu_ptr(trace_percpu_buffer);
3217 
3218 	if (!trace_percpu_buffer || buffer->nesting >= 4)
3219 		return NULL;
3220 
3221 	buffer->nesting++;
3222 
3223 	/* Interrupts must see nesting incremented before we use the buffer */
3224 	barrier();
3225 	return &buffer->buffer[buffer->nesting - 1][0];
3226 }
3227 
3228 static void put_trace_buf(void)
3229 {
3230 	/* Don't let the decrement of nesting leak before this */
3231 	barrier();
3232 	this_cpu_dec(trace_percpu_buffer->nesting);
3233 }
3234 
3235 static int alloc_percpu_trace_buffer(void)
3236 {
3237 	struct trace_buffer_struct __percpu *buffers;
3238 
3239 	if (trace_percpu_buffer)
3240 		return 0;
3241 
3242 	buffers = alloc_percpu(struct trace_buffer_struct);
3243 	if (MEM_FAIL(!buffers, "Could not allocate percpu trace_printk buffer"))
3244 		return -ENOMEM;
3245 
3246 	trace_percpu_buffer = buffers;
3247 	return 0;
3248 }
3249 
3250 static int buffers_allocated;
3251 
3252 void trace_printk_init_buffers(void)
3253 {
3254 	if (buffers_allocated)
3255 		return;
3256 
3257 	if (alloc_percpu_trace_buffer())
3258 		return;
3259 
3260 	/* trace_printk() is for debug use only. Don't use it in production. */
3261 
3262 	pr_warn("\n");
3263 	pr_warn("**********************************************************\n");
3264 	pr_warn("**   NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE   **\n");
3265 	pr_warn("**                                                      **\n");
3266 	pr_warn("** trace_printk() being used. Allocating extra memory.  **\n");
3267 	pr_warn("**                                                      **\n");
3268 	pr_warn("** This means that this is a DEBUG kernel and it is     **\n");
3269 	pr_warn("** unsafe for production use.                           **\n");
3270 	pr_warn("**                                                      **\n");
3271 	pr_warn("** If you see this message and you are not debugging    **\n");
3272 	pr_warn("** the kernel, report this immediately to your vendor!  **\n");
3273 	pr_warn("**                                                      **\n");
3274 	pr_warn("**   NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE   **\n");
3275 	pr_warn("**********************************************************\n");
3276 
3277 	/* Expand the buffers to set size */
3278 	tracing_update_buffers(&global_trace);
3279 
3280 	buffers_allocated = 1;
3281 
3282 	/*
3283 	 * trace_printk_init_buffers() can be called by modules.
3284 	 * If that happens, then we need to start cmdline recording
3285 	 * directly here. If the global_trace.buffer is already
3286 	 * allocated here, then this was called by module code.
3287 	 */
3288 	if (global_trace.array_buffer.buffer)
3289 		tracing_start_cmdline_record();
3290 }
3291 EXPORT_SYMBOL_GPL(trace_printk_init_buffers);
3292 
3293 void trace_printk_start_comm(void)
3294 {
3295 	/* Start tracing comms if trace printk is set */
3296 	if (!buffers_allocated)
3297 		return;
3298 	tracing_start_cmdline_record();
3299 }
3300 
3301 static void trace_printk_start_stop_comm(int enabled)
3302 {
3303 	if (!buffers_allocated)
3304 		return;
3305 
3306 	if (enabled)
3307 		tracing_start_cmdline_record();
3308 	else
3309 		tracing_stop_cmdline_record();
3310 }
3311 
3312 /**
3313  * trace_vbprintk - write binary msg to tracing buffer
3314  * @ip:    The address of the caller
3315  * @fmt:   The string format to write to the buffer
3316  * @args:  Arguments for @fmt
3317  */
3318 int trace_vbprintk(unsigned long ip, const char *fmt, va_list args)
3319 {
3320 	struct ring_buffer_event *event;
3321 	struct trace_buffer *buffer;
3322 	struct trace_array *tr = READ_ONCE(printk_trace);
3323 	struct bprint_entry *entry;
3324 	unsigned int trace_ctx;
3325 	char *tbuffer;
3326 	int len = 0, size;
3327 
3328 	if (!printk_binsafe(tr))
3329 		return trace_vprintk(ip, fmt, args);
3330 
3331 	if (unlikely(tracing_selftest_running || tracing_disabled))
3332 		return 0;
3333 
3334 	/* Don't pollute graph traces with trace_vprintk internals */
3335 	pause_graph_tracing();
3336 
3337 	trace_ctx = tracing_gen_ctx();
3338 	preempt_disable_notrace();
3339 
3340 	tbuffer = get_trace_buf();
3341 	if (!tbuffer) {
3342 		len = 0;
3343 		goto out_nobuffer;
3344 	}
3345 
3346 	len = vbin_printf((u32 *)tbuffer, TRACE_BUF_SIZE/sizeof(int), fmt, args);
3347 
3348 	if (len > TRACE_BUF_SIZE/sizeof(int) || len < 0)
3349 		goto out_put;
3350 
3351 	size = sizeof(*entry) + sizeof(u32) * len;
3352 	buffer = tr->array_buffer.buffer;
3353 	ring_buffer_nest_start(buffer);
3354 	event = __trace_buffer_lock_reserve(buffer, TRACE_BPRINT, size,
3355 					    trace_ctx);
3356 	if (!event)
3357 		goto out;
3358 	entry = ring_buffer_event_data(event);
3359 	entry->ip			= ip;
3360 	entry->fmt			= fmt;
3361 
3362 	memcpy(entry->buf, tbuffer, sizeof(u32) * len);
3363 	__buffer_unlock_commit(buffer, event);
3364 	ftrace_trace_stack(tr, buffer, trace_ctx, 6, NULL);
3365 
3366 out:
3367 	ring_buffer_nest_end(buffer);
3368 out_put:
3369 	put_trace_buf();
3370 
3371 out_nobuffer:
3372 	preempt_enable_notrace();
3373 	unpause_graph_tracing();
3374 
3375 	return len;
3376 }
3377 EXPORT_SYMBOL_GPL(trace_vbprintk);
3378 
3379 static __printf(3, 0)
3380 int __trace_array_vprintk(struct trace_buffer *buffer,
3381 			  unsigned long ip, const char *fmt, va_list args)
3382 {
3383 	struct ring_buffer_event *event;
3384 	int len = 0, size;
3385 	struct print_entry *entry;
3386 	unsigned int trace_ctx;
3387 	char *tbuffer;
3388 
3389 	if (tracing_disabled)
3390 		return 0;
3391 
3392 	/* Don't pollute graph traces with trace_vprintk internals */
3393 	pause_graph_tracing();
3394 
3395 	trace_ctx = tracing_gen_ctx();
3396 	preempt_disable_notrace();
3397 
3398 
3399 	tbuffer = get_trace_buf();
3400 	if (!tbuffer) {
3401 		len = 0;
3402 		goto out_nobuffer;
3403 	}
3404 
3405 	len = vscnprintf(tbuffer, TRACE_BUF_SIZE, fmt, args);
3406 
3407 	size = sizeof(*entry) + len + 1;
3408 	ring_buffer_nest_start(buffer);
3409 	event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
3410 					    trace_ctx);
3411 	if (!event)
3412 		goto out;
3413 	entry = ring_buffer_event_data(event);
3414 	entry->ip = ip;
3415 
3416 	memcpy(&entry->buf, tbuffer, len + 1);
3417 	__buffer_unlock_commit(buffer, event);
3418 	ftrace_trace_stack(printk_trace, buffer, trace_ctx, 6, NULL);
3419 
3420 out:
3421 	ring_buffer_nest_end(buffer);
3422 	put_trace_buf();
3423 
3424 out_nobuffer:
3425 	preempt_enable_notrace();
3426 	unpause_graph_tracing();
3427 
3428 	return len;
3429 }
3430 
3431 int trace_array_vprintk(struct trace_array *tr,
3432 			unsigned long ip, const char *fmt, va_list args)
3433 {
3434 	if (tracing_selftest_running && tr == &global_trace)
3435 		return 0;
3436 
3437 	return __trace_array_vprintk(tr->array_buffer.buffer, ip, fmt, args);
3438 }
3439 
3440 /**
3441  * trace_array_printk - Print a message to a specific instance
3442  * @tr: The instance trace_array descriptor
3443  * @ip: The instruction pointer that this is called from.
3444  * @fmt: The format to print (printf format)
3445  *
3446  * If a subsystem sets up its own instance, they have the right to
3447  * printk strings into their tracing instance buffer using this
3448  * function. Note, this function will not write into the top level
3449  * buffer (use trace_printk() for that), as writing into the top level
3450  * buffer should only have events that can be individually disabled.
3451  * trace_printk() is only used for debugging a kernel, and should not
3452  * be ever incorporated in normal use.
3453  *
3454  * trace_array_printk() can be used, as it will not add noise to the
3455  * top level tracing buffer.
3456  *
3457  * Note, trace_array_init_printk() must be called on @tr before this
3458  * can be used.
3459  */
3460 int trace_array_printk(struct trace_array *tr,
3461 		       unsigned long ip, const char *fmt, ...)
3462 {
3463 	int ret;
3464 	va_list ap;
3465 
3466 	if (!tr)
3467 		return -ENOENT;
3468 
3469 	/* This is only allowed for created instances */
3470 	if (tr == &global_trace)
3471 		return 0;
3472 
3473 	if (!(tr->trace_flags & TRACE_ITER_PRINTK))
3474 		return 0;
3475 
3476 	va_start(ap, fmt);
3477 	ret = trace_array_vprintk(tr, ip, fmt, ap);
3478 	va_end(ap);
3479 	return ret;
3480 }
3481 EXPORT_SYMBOL_GPL(trace_array_printk);
3482 
3483 /**
3484  * trace_array_init_printk - Initialize buffers for trace_array_printk()
3485  * @tr: The trace array to initialize the buffers for
3486  *
3487  * As trace_array_printk() only writes into instances, they are OK to
3488  * have in the kernel (unlike trace_printk()). This needs to be called
3489  * before trace_array_printk() can be used on a trace_array.
3490  */
3491 int trace_array_init_printk(struct trace_array *tr)
3492 {
3493 	if (!tr)
3494 		return -ENOENT;
3495 
3496 	/* This is only allowed for created instances */
3497 	if (tr == &global_trace)
3498 		return -EINVAL;
3499 
3500 	return alloc_percpu_trace_buffer();
3501 }
3502 EXPORT_SYMBOL_GPL(trace_array_init_printk);
3503 
3504 int trace_array_printk_buf(struct trace_buffer *buffer,
3505 			   unsigned long ip, const char *fmt, ...)
3506 {
3507 	int ret;
3508 	va_list ap;
3509 
3510 	if (!(printk_trace->trace_flags & TRACE_ITER_PRINTK))
3511 		return 0;
3512 
3513 	va_start(ap, fmt);
3514 	ret = __trace_array_vprintk(buffer, ip, fmt, ap);
3515 	va_end(ap);
3516 	return ret;
3517 }
3518 
3519 int trace_vprintk(unsigned long ip, const char *fmt, va_list args)
3520 {
3521 	return trace_array_vprintk(printk_trace, ip, fmt, args);
3522 }
3523 EXPORT_SYMBOL_GPL(trace_vprintk);
3524 
3525 static void trace_iterator_increment(struct trace_iterator *iter)
3526 {
3527 	struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, iter->cpu);
3528 
3529 	iter->idx++;
3530 	if (buf_iter)
3531 		ring_buffer_iter_advance(buf_iter);
3532 }
3533 
3534 static struct trace_entry *
3535 peek_next_entry(struct trace_iterator *iter, int cpu, u64 *ts,
3536 		unsigned long *lost_events)
3537 {
3538 	struct ring_buffer_event *event;
3539 	struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, cpu);
3540 
3541 	if (buf_iter) {
3542 		event = ring_buffer_iter_peek(buf_iter, ts);
3543 		if (lost_events)
3544 			*lost_events = ring_buffer_iter_dropped(buf_iter) ?
3545 				(unsigned long)-1 : 0;
3546 	} else {
3547 		event = ring_buffer_peek(iter->array_buffer->buffer, cpu, ts,
3548 					 lost_events);
3549 	}
3550 
3551 	if (event) {
3552 		iter->ent_size = ring_buffer_event_length(event);
3553 		return ring_buffer_event_data(event);
3554 	}
3555 	iter->ent_size = 0;
3556 	return NULL;
3557 }
3558 
3559 static struct trace_entry *
3560 __find_next_entry(struct trace_iterator *iter, int *ent_cpu,
3561 		  unsigned long *missing_events, u64 *ent_ts)
3562 {
3563 	struct trace_buffer *buffer = iter->array_buffer->buffer;
3564 	struct trace_entry *ent, *next = NULL;
3565 	unsigned long lost_events = 0, next_lost = 0;
3566 	int cpu_file = iter->cpu_file;
3567 	u64 next_ts = 0, ts;
3568 	int next_cpu = -1;
3569 	int next_size = 0;
3570 	int cpu;
3571 
3572 	/*
3573 	 * If we are in a per_cpu trace file, don't bother by iterating over
3574 	 * all cpu and peek directly.
3575 	 */
3576 	if (cpu_file > RING_BUFFER_ALL_CPUS) {
3577 		if (ring_buffer_empty_cpu(buffer, cpu_file))
3578 			return NULL;
3579 		ent = peek_next_entry(iter, cpu_file, ent_ts, missing_events);
3580 		if (ent_cpu)
3581 			*ent_cpu = cpu_file;
3582 
3583 		return ent;
3584 	}
3585 
3586 	for_each_tracing_cpu(cpu) {
3587 
3588 		if (ring_buffer_empty_cpu(buffer, cpu))
3589 			continue;
3590 
3591 		ent = peek_next_entry(iter, cpu, &ts, &lost_events);
3592 
3593 		/*
3594 		 * Pick the entry with the smallest timestamp:
3595 		 */
3596 		if (ent && (!next || ts < next_ts)) {
3597 			next = ent;
3598 			next_cpu = cpu;
3599 			next_ts = ts;
3600 			next_lost = lost_events;
3601 			next_size = iter->ent_size;
3602 		}
3603 	}
3604 
3605 	iter->ent_size = next_size;
3606 
3607 	if (ent_cpu)
3608 		*ent_cpu = next_cpu;
3609 
3610 	if (ent_ts)
3611 		*ent_ts = next_ts;
3612 
3613 	if (missing_events)
3614 		*missing_events = next_lost;
3615 
3616 	return next;
3617 }
3618 
3619 #define STATIC_FMT_BUF_SIZE	128
3620 static char static_fmt_buf[STATIC_FMT_BUF_SIZE];
3621 
3622 char *trace_iter_expand_format(struct trace_iterator *iter)
3623 {
3624 	char *tmp;
3625 
3626 	/*
3627 	 * iter->tr is NULL when used with tp_printk, which makes
3628 	 * this get called where it is not safe to call krealloc().
3629 	 */
3630 	if (!iter->tr || iter->fmt == static_fmt_buf)
3631 		return NULL;
3632 
3633 	tmp = krealloc(iter->fmt, iter->fmt_size + STATIC_FMT_BUF_SIZE,
3634 		       GFP_KERNEL);
3635 	if (tmp) {
3636 		iter->fmt_size += STATIC_FMT_BUF_SIZE;
3637 		iter->fmt = tmp;
3638 	}
3639 
3640 	return tmp;
3641 }
3642 
3643 /* Returns true if the string is safe to dereference from an event */
3644 static bool trace_safe_str(struct trace_iterator *iter, const char *str)
3645 {
3646 	unsigned long addr = (unsigned long)str;
3647 	struct trace_event *trace_event;
3648 	struct trace_event_call *event;
3649 
3650 	/* OK if part of the event data */
3651 	if ((addr >= (unsigned long)iter->ent) &&
3652 	    (addr < (unsigned long)iter->ent + iter->ent_size))
3653 		return true;
3654 
3655 	/* OK if part of the temp seq buffer */
3656 	if ((addr >= (unsigned long)iter->tmp_seq.buffer) &&
3657 	    (addr < (unsigned long)iter->tmp_seq.buffer + TRACE_SEQ_BUFFER_SIZE))
3658 		return true;
3659 
3660 	/* Core rodata can not be freed */
3661 	if (is_kernel_rodata(addr))
3662 		return true;
3663 
3664 	if (trace_is_tracepoint_string(str))
3665 		return true;
3666 
3667 	/*
3668 	 * Now this could be a module event, referencing core module
3669 	 * data, which is OK.
3670 	 */
3671 	if (!iter->ent)
3672 		return false;
3673 
3674 	trace_event = ftrace_find_event(iter->ent->type);
3675 	if (!trace_event)
3676 		return false;
3677 
3678 	event = container_of(trace_event, struct trace_event_call, event);
3679 	if ((event->flags & TRACE_EVENT_FL_DYNAMIC) || !event->module)
3680 		return false;
3681 
3682 	/* Would rather have rodata, but this will suffice */
3683 	if (within_module_core(addr, event->module))
3684 		return true;
3685 
3686 	return false;
3687 }
3688 
3689 /**
3690  * ignore_event - Check dereferenced fields while writing to the seq buffer
3691  * @iter: The iterator that holds the seq buffer and the event being printed
3692  *
3693  * At boot up, test_event_printk() will flag any event that dereferences
3694  * a string with "%s" that does exist in the ring buffer. It may still
3695  * be valid, as the string may point to a static string in the kernel
3696  * rodata that never gets freed. But if the string pointer is pointing
3697  * to something that was allocated, there's a chance that it can be freed
3698  * by the time the user reads the trace. This would cause a bad memory
3699  * access by the kernel and possibly crash the system.
3700  *
3701  * This function will check if the event has any fields flagged as needing
3702  * to be checked at runtime and perform those checks.
3703  *
3704  * If it is found that a field is unsafe, it will write into the @iter->seq
3705  * a message stating what was found to be unsafe.
3706  *
3707  * @return: true if the event is unsafe and should be ignored,
3708  *          false otherwise.
3709  */
3710 bool ignore_event(struct trace_iterator *iter)
3711 {
3712 	struct ftrace_event_field *field;
3713 	struct trace_event *trace_event;
3714 	struct trace_event_call *event;
3715 	struct list_head *head;
3716 	struct trace_seq *seq;
3717 	const void *ptr;
3718 
3719 	trace_event = ftrace_find_event(iter->ent->type);
3720 
3721 	seq = &iter->seq;
3722 
3723 	if (!trace_event) {
3724 		trace_seq_printf(seq, "EVENT ID %d NOT FOUND?\n", iter->ent->type);
3725 		return true;
3726 	}
3727 
3728 	event = container_of(trace_event, struct trace_event_call, event);
3729 	if (!(event->flags & TRACE_EVENT_FL_TEST_STR))
3730 		return false;
3731 
3732 	head = trace_get_fields(event);
3733 	if (!head) {
3734 		trace_seq_printf(seq, "FIELDS FOR EVENT '%s' NOT FOUND?\n",
3735 				 trace_event_name(event));
3736 		return true;
3737 	}
3738 
3739 	/* Offsets are from the iter->ent that points to the raw event */
3740 	ptr = iter->ent;
3741 
3742 	list_for_each_entry(field, head, link) {
3743 		const char *str;
3744 		bool good;
3745 
3746 		if (!field->needs_test)
3747 			continue;
3748 
3749 		str = *(const char **)(ptr + field->offset);
3750 
3751 		good = trace_safe_str(iter, str);
3752 
3753 		/*
3754 		 * If you hit this warning, it is likely that the
3755 		 * trace event in question used %s on a string that
3756 		 * was saved at the time of the event, but may not be
3757 		 * around when the trace is read. Use __string(),
3758 		 * __assign_str() and __get_str() helpers in the TRACE_EVENT()
3759 		 * instead. See samples/trace_events/trace-events-sample.h
3760 		 * for reference.
3761 		 */
3762 		if (WARN_ONCE(!good, "event '%s' has unsafe pointer field '%s'",
3763 			      trace_event_name(event), field->name)) {
3764 			trace_seq_printf(seq, "EVENT %s: HAS UNSAFE POINTER FIELD '%s'\n",
3765 					 trace_event_name(event), field->name);
3766 			return true;
3767 		}
3768 	}
3769 	return false;
3770 }
3771 
3772 const char *trace_event_format(struct trace_iterator *iter, const char *fmt)
3773 {
3774 	const char *p, *new_fmt;
3775 	char *q;
3776 
3777 	if (WARN_ON_ONCE(!fmt))
3778 		return fmt;
3779 
3780 	if (!iter->tr || iter->tr->trace_flags & TRACE_ITER_HASH_PTR)
3781 		return fmt;
3782 
3783 	p = fmt;
3784 	new_fmt = q = iter->fmt;
3785 	while (*p) {
3786 		if (unlikely(q - new_fmt + 3 > iter->fmt_size)) {
3787 			if (!trace_iter_expand_format(iter))
3788 				return fmt;
3789 
3790 			q += iter->fmt - new_fmt;
3791 			new_fmt = iter->fmt;
3792 		}
3793 
3794 		*q++ = *p++;
3795 
3796 		/* Replace %p with %px */
3797 		if (p[-1] == '%') {
3798 			if (p[0] == '%') {
3799 				*q++ = *p++;
3800 			} else if (p[0] == 'p' && !isalnum(p[1])) {
3801 				*q++ = *p++;
3802 				*q++ = 'x';
3803 			}
3804 		}
3805 	}
3806 	*q = '\0';
3807 
3808 	return new_fmt;
3809 }
3810 
3811 #define STATIC_TEMP_BUF_SIZE	128
3812 static char static_temp_buf[STATIC_TEMP_BUF_SIZE] __aligned(4);
3813 
3814 /* Find the next real entry, without updating the iterator itself */
3815 struct trace_entry *trace_find_next_entry(struct trace_iterator *iter,
3816 					  int *ent_cpu, u64 *ent_ts)
3817 {
3818 	/* __find_next_entry will reset ent_size */
3819 	int ent_size = iter->ent_size;
3820 	struct trace_entry *entry;
3821 
3822 	/*
3823 	 * If called from ftrace_dump(), then the iter->temp buffer
3824 	 * will be the static_temp_buf and not created from kmalloc.
3825 	 * If the entry size is greater than the buffer, we can
3826 	 * not save it. Just return NULL in that case. This is only
3827 	 * used to add markers when two consecutive events' time
3828 	 * stamps have a large delta. See trace_print_lat_context()
3829 	 */
3830 	if (iter->temp == static_temp_buf &&
3831 	    STATIC_TEMP_BUF_SIZE < ent_size)
3832 		return NULL;
3833 
3834 	/*
3835 	 * The __find_next_entry() may call peek_next_entry(), which may
3836 	 * call ring_buffer_peek() that may make the contents of iter->ent
3837 	 * undefined. Need to copy iter->ent now.
3838 	 */
3839 	if (iter->ent && iter->ent != iter->temp) {
3840 		if ((!iter->temp || iter->temp_size < iter->ent_size) &&
3841 		    !WARN_ON_ONCE(iter->temp == static_temp_buf)) {
3842 			void *temp;
3843 			temp = kmalloc(iter->ent_size, GFP_KERNEL);
3844 			if (!temp)
3845 				return NULL;
3846 			kfree(iter->temp);
3847 			iter->temp = temp;
3848 			iter->temp_size = iter->ent_size;
3849 		}
3850 		memcpy(iter->temp, iter->ent, iter->ent_size);
3851 		iter->ent = iter->temp;
3852 	}
3853 	entry = __find_next_entry(iter, ent_cpu, NULL, ent_ts);
3854 	/* Put back the original ent_size */
3855 	iter->ent_size = ent_size;
3856 
3857 	return entry;
3858 }
3859 
3860 /* Find the next real entry, and increment the iterator to the next entry */
3861 void *trace_find_next_entry_inc(struct trace_iterator *iter)
3862 {
3863 	iter->ent = __find_next_entry(iter, &iter->cpu,
3864 				      &iter->lost_events, &iter->ts);
3865 
3866 	if (iter->ent)
3867 		trace_iterator_increment(iter);
3868 
3869 	return iter->ent ? iter : NULL;
3870 }
3871 
3872 static void trace_consume(struct trace_iterator *iter)
3873 {
3874 	ring_buffer_consume(iter->array_buffer->buffer, iter->cpu, &iter->ts,
3875 			    &iter->lost_events);
3876 }
3877 
3878 static void *s_next(struct seq_file *m, void *v, loff_t *pos)
3879 {
3880 	struct trace_iterator *iter = m->private;
3881 	int i = (int)*pos;
3882 	void *ent;
3883 
3884 	WARN_ON_ONCE(iter->leftover);
3885 
3886 	(*pos)++;
3887 
3888 	/* can't go backwards */
3889 	if (iter->idx > i)
3890 		return NULL;
3891 
3892 	if (iter->idx < 0)
3893 		ent = trace_find_next_entry_inc(iter);
3894 	else
3895 		ent = iter;
3896 
3897 	while (ent && iter->idx < i)
3898 		ent = trace_find_next_entry_inc(iter);
3899 
3900 	iter->pos = *pos;
3901 
3902 	return ent;
3903 }
3904 
3905 void tracing_iter_reset(struct trace_iterator *iter, int cpu)
3906 {
3907 	struct ring_buffer_iter *buf_iter;
3908 	unsigned long entries = 0;
3909 	u64 ts;
3910 
3911 	per_cpu_ptr(iter->array_buffer->data, cpu)->skipped_entries = 0;
3912 
3913 	buf_iter = trace_buffer_iter(iter, cpu);
3914 	if (!buf_iter)
3915 		return;
3916 
3917 	ring_buffer_iter_reset(buf_iter);
3918 
3919 	/*
3920 	 * We could have the case with the max latency tracers
3921 	 * that a reset never took place on a cpu. This is evident
3922 	 * by the timestamp being before the start of the buffer.
3923 	 */
3924 	while (ring_buffer_iter_peek(buf_iter, &ts)) {
3925 		if (ts >= iter->array_buffer->time_start)
3926 			break;
3927 		entries++;
3928 		ring_buffer_iter_advance(buf_iter);
3929 		/* This could be a big loop */
3930 		cond_resched();
3931 	}
3932 
3933 	per_cpu_ptr(iter->array_buffer->data, cpu)->skipped_entries = entries;
3934 }
3935 
3936 /*
3937  * The current tracer is copied to avoid a global locking
3938  * all around.
3939  */
3940 static void *s_start(struct seq_file *m, loff_t *pos)
3941 {
3942 	struct trace_iterator *iter = m->private;
3943 	struct trace_array *tr = iter->tr;
3944 	int cpu_file = iter->cpu_file;
3945 	void *p = NULL;
3946 	loff_t l = 0;
3947 	int cpu;
3948 
3949 	mutex_lock(&trace_types_lock);
3950 	if (unlikely(tr->current_trace != iter->trace)) {
3951 		/* Close iter->trace before switching to the new current tracer */
3952 		if (iter->trace->close)
3953 			iter->trace->close(iter);
3954 		iter->trace = tr->current_trace;
3955 		/* Reopen the new current tracer */
3956 		if (iter->trace->open)
3957 			iter->trace->open(iter);
3958 	}
3959 	mutex_unlock(&trace_types_lock);
3960 
3961 #ifdef CONFIG_TRACER_MAX_TRACE
3962 	if (iter->snapshot && iter->trace->use_max_tr)
3963 		return ERR_PTR(-EBUSY);
3964 #endif
3965 
3966 	if (*pos != iter->pos) {
3967 		iter->ent = NULL;
3968 		iter->cpu = 0;
3969 		iter->idx = -1;
3970 
3971 		if (cpu_file == RING_BUFFER_ALL_CPUS) {
3972 			for_each_tracing_cpu(cpu)
3973 				tracing_iter_reset(iter, cpu);
3974 		} else
3975 			tracing_iter_reset(iter, cpu_file);
3976 
3977 		iter->leftover = 0;
3978 		for (p = iter; p && l < *pos; p = s_next(m, p, &l))
3979 			;
3980 
3981 	} else {
3982 		/*
3983 		 * If we overflowed the seq_file before, then we want
3984 		 * to just reuse the trace_seq buffer again.
3985 		 */
3986 		if (iter->leftover)
3987 			p = iter;
3988 		else {
3989 			l = *pos - 1;
3990 			p = s_next(m, p, &l);
3991 		}
3992 	}
3993 
3994 	trace_event_read_lock();
3995 	trace_access_lock(cpu_file);
3996 	return p;
3997 }
3998 
3999 static void s_stop(struct seq_file *m, void *p)
4000 {
4001 	struct trace_iterator *iter = m->private;
4002 
4003 #ifdef CONFIG_TRACER_MAX_TRACE
4004 	if (iter->snapshot && iter->trace->use_max_tr)
4005 		return;
4006 #endif
4007 
4008 	trace_access_unlock(iter->cpu_file);
4009 	trace_event_read_unlock();
4010 }
4011 
4012 static void
4013 get_total_entries_cpu(struct array_buffer *buf, unsigned long *total,
4014 		      unsigned long *entries, int cpu)
4015 {
4016 	unsigned long count;
4017 
4018 	count = ring_buffer_entries_cpu(buf->buffer, cpu);
4019 	/*
4020 	 * If this buffer has skipped entries, then we hold all
4021 	 * entries for the trace and we need to ignore the
4022 	 * ones before the time stamp.
4023 	 */
4024 	if (per_cpu_ptr(buf->data, cpu)->skipped_entries) {
4025 		count -= per_cpu_ptr(buf->data, cpu)->skipped_entries;
4026 		/* total is the same as the entries */
4027 		*total = count;
4028 	} else
4029 		*total = count +
4030 			ring_buffer_overrun_cpu(buf->buffer, cpu);
4031 	*entries = count;
4032 }
4033 
4034 static void
4035 get_total_entries(struct array_buffer *buf,
4036 		  unsigned long *total, unsigned long *entries)
4037 {
4038 	unsigned long t, e;
4039 	int cpu;
4040 
4041 	*total = 0;
4042 	*entries = 0;
4043 
4044 	for_each_tracing_cpu(cpu) {
4045 		get_total_entries_cpu(buf, &t, &e, cpu);
4046 		*total += t;
4047 		*entries += e;
4048 	}
4049 }
4050 
4051 unsigned long trace_total_entries_cpu(struct trace_array *tr, int cpu)
4052 {
4053 	unsigned long total, entries;
4054 
4055 	if (!tr)
4056 		tr = &global_trace;
4057 
4058 	get_total_entries_cpu(&tr->array_buffer, &total, &entries, cpu);
4059 
4060 	return entries;
4061 }
4062 
4063 unsigned long trace_total_entries(struct trace_array *tr)
4064 {
4065 	unsigned long total, entries;
4066 
4067 	if (!tr)
4068 		tr = &global_trace;
4069 
4070 	get_total_entries(&tr->array_buffer, &total, &entries);
4071 
4072 	return entries;
4073 }
4074 
4075 static void print_lat_help_header(struct seq_file *m)
4076 {
4077 	seq_puts(m, "#                    _------=> CPU#            \n"
4078 		    "#                   / _-----=> irqs-off/BH-disabled\n"
4079 		    "#                  | / _----=> need-resched    \n"
4080 		    "#                  || / _---=> hardirq/softirq \n"
4081 		    "#                  ||| / _--=> preempt-depth   \n"
4082 		    "#                  |||| / _-=> migrate-disable \n"
4083 		    "#                  ||||| /     delay           \n"
4084 		    "#  cmd     pid     |||||| time  |   caller     \n"
4085 		    "#     \\   /        ||||||  \\    |    /       \n");
4086 }
4087 
4088 static void print_event_info(struct array_buffer *buf, struct seq_file *m)
4089 {
4090 	unsigned long total;
4091 	unsigned long entries;
4092 
4093 	get_total_entries(buf, &total, &entries);
4094 	seq_printf(m, "# entries-in-buffer/entries-written: %lu/%lu   #P:%d\n",
4095 		   entries, total, num_online_cpus());
4096 	seq_puts(m, "#\n");
4097 }
4098 
4099 static void print_func_help_header(struct array_buffer *buf, struct seq_file *m,
4100 				   unsigned int flags)
4101 {
4102 	bool tgid = flags & TRACE_ITER_RECORD_TGID;
4103 
4104 	print_event_info(buf, m);
4105 
4106 	seq_printf(m, "#           TASK-PID    %s CPU#     TIMESTAMP  FUNCTION\n", tgid ? "   TGID   " : "");
4107 	seq_printf(m, "#              | |      %s   |         |         |\n",      tgid ? "     |    " : "");
4108 }
4109 
4110 static void print_func_help_header_irq(struct array_buffer *buf, struct seq_file *m,
4111 				       unsigned int flags)
4112 {
4113 	bool tgid = flags & TRACE_ITER_RECORD_TGID;
4114 	static const char space[] = "            ";
4115 	int prec = tgid ? 12 : 2;
4116 
4117 	print_event_info(buf, m);
4118 
4119 	seq_printf(m, "#                            %.*s  _-----=> irqs-off/BH-disabled\n", prec, space);
4120 	seq_printf(m, "#                            %.*s / _----=> need-resched\n", prec, space);
4121 	seq_printf(m, "#                            %.*s| / _---=> hardirq/softirq\n", prec, space);
4122 	seq_printf(m, "#                            %.*s|| / _--=> preempt-depth\n", prec, space);
4123 	seq_printf(m, "#                            %.*s||| / _-=> migrate-disable\n", prec, space);
4124 	seq_printf(m, "#                            %.*s|||| /     delay\n", prec, space);
4125 	seq_printf(m, "#           TASK-PID  %.*s CPU#  |||||  TIMESTAMP  FUNCTION\n", prec, "     TGID   ");
4126 	seq_printf(m, "#              | |    %.*s   |   |||||     |         |\n", prec, "       |    ");
4127 }
4128 
4129 void
4130 print_trace_header(struct seq_file *m, struct trace_iterator *iter)
4131 {
4132 	unsigned long sym_flags = (global_trace.trace_flags & TRACE_ITER_SYM_MASK);
4133 	struct array_buffer *buf = iter->array_buffer;
4134 	struct trace_array_cpu *data = per_cpu_ptr(buf->data, buf->cpu);
4135 	struct tracer *type = iter->trace;
4136 	unsigned long entries;
4137 	unsigned long total;
4138 	const char *name = type->name;
4139 
4140 	get_total_entries(buf, &total, &entries);
4141 
4142 	seq_printf(m, "# %s latency trace v1.1.5 on %s\n",
4143 		   name, init_utsname()->release);
4144 	seq_puts(m, "# -----------------------------------"
4145 		 "---------------------------------\n");
4146 	seq_printf(m, "# latency: %lu us, #%lu/%lu, CPU#%d |"
4147 		   " (M:%s VP:%d, KP:%d, SP:%d HP:%d",
4148 		   nsecs_to_usecs(data->saved_latency),
4149 		   entries,
4150 		   total,
4151 		   buf->cpu,
4152 		   preempt_model_str(),
4153 		   /* These are reserved for later use */
4154 		   0, 0, 0, 0);
4155 #ifdef CONFIG_SMP
4156 	seq_printf(m, " #P:%d)\n", num_online_cpus());
4157 #else
4158 	seq_puts(m, ")\n");
4159 #endif
4160 	seq_puts(m, "#    -----------------\n");
4161 	seq_printf(m, "#    | task: %.16s-%d "
4162 		   "(uid:%d nice:%ld policy:%ld rt_prio:%ld)\n",
4163 		   data->comm, data->pid,
4164 		   from_kuid_munged(seq_user_ns(m), data->uid), data->nice,
4165 		   data->policy, data->rt_priority);
4166 	seq_puts(m, "#    -----------------\n");
4167 
4168 	if (data->critical_start) {
4169 		seq_puts(m, "#  => started at: ");
4170 		seq_print_ip_sym(&iter->seq, data->critical_start, sym_flags);
4171 		trace_print_seq(m, &iter->seq);
4172 		seq_puts(m, "\n#  => ended at:   ");
4173 		seq_print_ip_sym(&iter->seq, data->critical_end, sym_flags);
4174 		trace_print_seq(m, &iter->seq);
4175 		seq_puts(m, "\n#\n");
4176 	}
4177 
4178 	seq_puts(m, "#\n");
4179 }
4180 
4181 static void test_cpu_buff_start(struct trace_iterator *iter)
4182 {
4183 	struct trace_seq *s = &iter->seq;
4184 	struct trace_array *tr = iter->tr;
4185 
4186 	if (!(tr->trace_flags & TRACE_ITER_ANNOTATE))
4187 		return;
4188 
4189 	if (!(iter->iter_flags & TRACE_FILE_ANNOTATE))
4190 		return;
4191 
4192 	if (cpumask_available(iter->started) &&
4193 	    cpumask_test_cpu(iter->cpu, iter->started))
4194 		return;
4195 
4196 	if (per_cpu_ptr(iter->array_buffer->data, iter->cpu)->skipped_entries)
4197 		return;
4198 
4199 	if (cpumask_available(iter->started))
4200 		cpumask_set_cpu(iter->cpu, iter->started);
4201 
4202 	/* Don't print started cpu buffer for the first entry of the trace */
4203 	if (iter->idx > 1)
4204 		trace_seq_printf(s, "##### CPU %u buffer started ####\n",
4205 				iter->cpu);
4206 }
4207 
4208 static enum print_line_t print_trace_fmt(struct trace_iterator *iter)
4209 {
4210 	struct trace_array *tr = iter->tr;
4211 	struct trace_seq *s = &iter->seq;
4212 	unsigned long sym_flags = (tr->trace_flags & TRACE_ITER_SYM_MASK);
4213 	struct trace_entry *entry;
4214 	struct trace_event *event;
4215 
4216 	entry = iter->ent;
4217 
4218 	test_cpu_buff_start(iter);
4219 
4220 	event = ftrace_find_event(entry->type);
4221 
4222 	if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
4223 		if (iter->iter_flags & TRACE_FILE_LAT_FMT)
4224 			trace_print_lat_context(iter);
4225 		else
4226 			trace_print_context(iter);
4227 	}
4228 
4229 	if (trace_seq_has_overflowed(s))
4230 		return TRACE_TYPE_PARTIAL_LINE;
4231 
4232 	if (event) {
4233 		if (tr->trace_flags & TRACE_ITER_FIELDS)
4234 			return print_event_fields(iter, event);
4235 		/*
4236 		 * For TRACE_EVENT() events, the print_fmt is not
4237 		 * safe to use if the array has delta offsets
4238 		 * Force printing via the fields.
4239 		 */
4240 		if ((tr->text_delta) &&
4241 		    event->type > __TRACE_LAST_TYPE)
4242 			return print_event_fields(iter, event);
4243 
4244 		return event->funcs->trace(iter, sym_flags, event);
4245 	}
4246 
4247 	trace_seq_printf(s, "Unknown type %d\n", entry->type);
4248 
4249 	return trace_handle_return(s);
4250 }
4251 
4252 static enum print_line_t print_raw_fmt(struct trace_iterator *iter)
4253 {
4254 	struct trace_array *tr = iter->tr;
4255 	struct trace_seq *s = &iter->seq;
4256 	struct trace_entry *entry;
4257 	struct trace_event *event;
4258 
4259 	entry = iter->ent;
4260 
4261 	if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO)
4262 		trace_seq_printf(s, "%d %d %llu ",
4263 				 entry->pid, iter->cpu, iter->ts);
4264 
4265 	if (trace_seq_has_overflowed(s))
4266 		return TRACE_TYPE_PARTIAL_LINE;
4267 
4268 	event = ftrace_find_event(entry->type);
4269 	if (event)
4270 		return event->funcs->raw(iter, 0, event);
4271 
4272 	trace_seq_printf(s, "%d ?\n", entry->type);
4273 
4274 	return trace_handle_return(s);
4275 }
4276 
4277 static enum print_line_t print_hex_fmt(struct trace_iterator *iter)
4278 {
4279 	struct trace_array *tr = iter->tr;
4280 	struct trace_seq *s = &iter->seq;
4281 	unsigned char newline = '\n';
4282 	struct trace_entry *entry;
4283 	struct trace_event *event;
4284 
4285 	entry = iter->ent;
4286 
4287 	if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
4288 		SEQ_PUT_HEX_FIELD(s, entry->pid);
4289 		SEQ_PUT_HEX_FIELD(s, iter->cpu);
4290 		SEQ_PUT_HEX_FIELD(s, iter->ts);
4291 		if (trace_seq_has_overflowed(s))
4292 			return TRACE_TYPE_PARTIAL_LINE;
4293 	}
4294 
4295 	event = ftrace_find_event(entry->type);
4296 	if (event) {
4297 		enum print_line_t ret = event->funcs->hex(iter, 0, event);
4298 		if (ret != TRACE_TYPE_HANDLED)
4299 			return ret;
4300 	}
4301 
4302 	SEQ_PUT_FIELD(s, newline);
4303 
4304 	return trace_handle_return(s);
4305 }
4306 
4307 static enum print_line_t print_bin_fmt(struct trace_iterator *iter)
4308 {
4309 	struct trace_array *tr = iter->tr;
4310 	struct trace_seq *s = &iter->seq;
4311 	struct trace_entry *entry;
4312 	struct trace_event *event;
4313 
4314 	entry = iter->ent;
4315 
4316 	if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
4317 		SEQ_PUT_FIELD(s, entry->pid);
4318 		SEQ_PUT_FIELD(s, iter->cpu);
4319 		SEQ_PUT_FIELD(s, iter->ts);
4320 		if (trace_seq_has_overflowed(s))
4321 			return TRACE_TYPE_PARTIAL_LINE;
4322 	}
4323 
4324 	event = ftrace_find_event(entry->type);
4325 	return event ? event->funcs->binary(iter, 0, event) :
4326 		TRACE_TYPE_HANDLED;
4327 }
4328 
4329 int trace_empty(struct trace_iterator *iter)
4330 {
4331 	struct ring_buffer_iter *buf_iter;
4332 	int cpu;
4333 
4334 	/* If we are looking at one CPU buffer, only check that one */
4335 	if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
4336 		cpu = iter->cpu_file;
4337 		buf_iter = trace_buffer_iter(iter, cpu);
4338 		if (buf_iter) {
4339 			if (!ring_buffer_iter_empty(buf_iter))
4340 				return 0;
4341 		} else {
4342 			if (!ring_buffer_empty_cpu(iter->array_buffer->buffer, cpu))
4343 				return 0;
4344 		}
4345 		return 1;
4346 	}
4347 
4348 	for_each_tracing_cpu(cpu) {
4349 		buf_iter = trace_buffer_iter(iter, cpu);
4350 		if (buf_iter) {
4351 			if (!ring_buffer_iter_empty(buf_iter))
4352 				return 0;
4353 		} else {
4354 			if (!ring_buffer_empty_cpu(iter->array_buffer->buffer, cpu))
4355 				return 0;
4356 		}
4357 	}
4358 
4359 	return 1;
4360 }
4361 
4362 /*  Called with trace_event_read_lock() held. */
4363 enum print_line_t print_trace_line(struct trace_iterator *iter)
4364 {
4365 	struct trace_array *tr = iter->tr;
4366 	unsigned long trace_flags = tr->trace_flags;
4367 	enum print_line_t ret;
4368 
4369 	if (iter->lost_events) {
4370 		if (iter->lost_events == (unsigned long)-1)
4371 			trace_seq_printf(&iter->seq, "CPU:%d [LOST EVENTS]\n",
4372 					 iter->cpu);
4373 		else
4374 			trace_seq_printf(&iter->seq, "CPU:%d [LOST %lu EVENTS]\n",
4375 					 iter->cpu, iter->lost_events);
4376 		if (trace_seq_has_overflowed(&iter->seq))
4377 			return TRACE_TYPE_PARTIAL_LINE;
4378 	}
4379 
4380 	if (iter->trace && iter->trace->print_line) {
4381 		ret = iter->trace->print_line(iter);
4382 		if (ret != TRACE_TYPE_UNHANDLED)
4383 			return ret;
4384 	}
4385 
4386 	if (iter->ent->type == TRACE_BPUTS &&
4387 			trace_flags & TRACE_ITER_PRINTK &&
4388 			trace_flags & TRACE_ITER_PRINTK_MSGONLY)
4389 		return trace_print_bputs_msg_only(iter);
4390 
4391 	if (iter->ent->type == TRACE_BPRINT &&
4392 			trace_flags & TRACE_ITER_PRINTK &&
4393 			trace_flags & TRACE_ITER_PRINTK_MSGONLY)
4394 		return trace_print_bprintk_msg_only(iter);
4395 
4396 	if (iter->ent->type == TRACE_PRINT &&
4397 			trace_flags & TRACE_ITER_PRINTK &&
4398 			trace_flags & TRACE_ITER_PRINTK_MSGONLY)
4399 		return trace_print_printk_msg_only(iter);
4400 
4401 	if (trace_flags & TRACE_ITER_BIN)
4402 		return print_bin_fmt(iter);
4403 
4404 	if (trace_flags & TRACE_ITER_HEX)
4405 		return print_hex_fmt(iter);
4406 
4407 	if (trace_flags & TRACE_ITER_RAW)
4408 		return print_raw_fmt(iter);
4409 
4410 	return print_trace_fmt(iter);
4411 }
4412 
4413 void trace_latency_header(struct seq_file *m)
4414 {
4415 	struct trace_iterator *iter = m->private;
4416 	struct trace_array *tr = iter->tr;
4417 
4418 	/* print nothing if the buffers are empty */
4419 	if (trace_empty(iter))
4420 		return;
4421 
4422 	if (iter->iter_flags & TRACE_FILE_LAT_FMT)
4423 		print_trace_header(m, iter);
4424 
4425 	if (!(tr->trace_flags & TRACE_ITER_VERBOSE))
4426 		print_lat_help_header(m);
4427 }
4428 
4429 void trace_default_header(struct seq_file *m)
4430 {
4431 	struct trace_iterator *iter = m->private;
4432 	struct trace_array *tr = iter->tr;
4433 	unsigned long trace_flags = tr->trace_flags;
4434 
4435 	if (!(trace_flags & TRACE_ITER_CONTEXT_INFO))
4436 		return;
4437 
4438 	if (iter->iter_flags & TRACE_FILE_LAT_FMT) {
4439 		/* print nothing if the buffers are empty */
4440 		if (trace_empty(iter))
4441 			return;
4442 		print_trace_header(m, iter);
4443 		if (!(trace_flags & TRACE_ITER_VERBOSE))
4444 			print_lat_help_header(m);
4445 	} else {
4446 		if (!(trace_flags & TRACE_ITER_VERBOSE)) {
4447 			if (trace_flags & TRACE_ITER_IRQ_INFO)
4448 				print_func_help_header_irq(iter->array_buffer,
4449 							   m, trace_flags);
4450 			else
4451 				print_func_help_header(iter->array_buffer, m,
4452 						       trace_flags);
4453 		}
4454 	}
4455 }
4456 
4457 static void test_ftrace_alive(struct seq_file *m)
4458 {
4459 	if (!ftrace_is_dead())
4460 		return;
4461 	seq_puts(m, "# WARNING: FUNCTION TRACING IS CORRUPTED\n"
4462 		    "#          MAY BE MISSING FUNCTION EVENTS\n");
4463 }
4464 
4465 #ifdef CONFIG_TRACER_MAX_TRACE
4466 static void show_snapshot_main_help(struct seq_file *m)
4467 {
4468 	seq_puts(m, "# echo 0 > snapshot : Clears and frees snapshot buffer\n"
4469 		    "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
4470 		    "#                      Takes a snapshot of the main buffer.\n"
4471 		    "# echo 2 > snapshot : Clears snapshot buffer (but does not allocate or free)\n"
4472 		    "#                      (Doesn't have to be '2' works with any number that\n"
4473 		    "#                       is not a '0' or '1')\n");
4474 }
4475 
4476 static void show_snapshot_percpu_help(struct seq_file *m)
4477 {
4478 	seq_puts(m, "# echo 0 > snapshot : Invalid for per_cpu snapshot file.\n");
4479 #ifdef CONFIG_RING_BUFFER_ALLOW_SWAP
4480 	seq_puts(m, "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
4481 		    "#                      Takes a snapshot of the main buffer for this cpu.\n");
4482 #else
4483 	seq_puts(m, "# echo 1 > snapshot : Not supported with this kernel.\n"
4484 		    "#                     Must use main snapshot file to allocate.\n");
4485 #endif
4486 	seq_puts(m, "# echo 2 > snapshot : Clears this cpu's snapshot buffer (but does not allocate)\n"
4487 		    "#                      (Doesn't have to be '2' works with any number that\n"
4488 		    "#                       is not a '0' or '1')\n");
4489 }
4490 
4491 static void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter)
4492 {
4493 	if (iter->tr->allocated_snapshot)
4494 		seq_puts(m, "#\n# * Snapshot is allocated *\n#\n");
4495 	else
4496 		seq_puts(m, "#\n# * Snapshot is freed *\n#\n");
4497 
4498 	seq_puts(m, "# Snapshot commands:\n");
4499 	if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
4500 		show_snapshot_main_help(m);
4501 	else
4502 		show_snapshot_percpu_help(m);
4503 }
4504 #else
4505 /* Should never be called */
4506 static inline void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter) { }
4507 #endif
4508 
4509 static int s_show(struct seq_file *m, void *v)
4510 {
4511 	struct trace_iterator *iter = v;
4512 	int ret;
4513 
4514 	if (iter->ent == NULL) {
4515 		if (iter->tr) {
4516 			seq_printf(m, "# tracer: %s\n", iter->trace->name);
4517 			seq_puts(m, "#\n");
4518 			test_ftrace_alive(m);
4519 		}
4520 		if (iter->snapshot && trace_empty(iter))
4521 			print_snapshot_help(m, iter);
4522 		else if (iter->trace && iter->trace->print_header)
4523 			iter->trace->print_header(m);
4524 		else
4525 			trace_default_header(m);
4526 
4527 	} else if (iter->leftover) {
4528 		/*
4529 		 * If we filled the seq_file buffer earlier, we
4530 		 * want to just show it now.
4531 		 */
4532 		ret = trace_print_seq(m, &iter->seq);
4533 
4534 		/* ret should this time be zero, but you never know */
4535 		iter->leftover = ret;
4536 
4537 	} else {
4538 		ret = print_trace_line(iter);
4539 		if (ret == TRACE_TYPE_PARTIAL_LINE) {
4540 			iter->seq.full = 0;
4541 			trace_seq_puts(&iter->seq, "[LINE TOO BIG]\n");
4542 		}
4543 		ret = trace_print_seq(m, &iter->seq);
4544 		/*
4545 		 * If we overflow the seq_file buffer, then it will
4546 		 * ask us for this data again at start up.
4547 		 * Use that instead.
4548 		 *  ret is 0 if seq_file write succeeded.
4549 		 *        -1 otherwise.
4550 		 */
4551 		iter->leftover = ret;
4552 	}
4553 
4554 	return 0;
4555 }
4556 
4557 /*
4558  * Should be used after trace_array_get(), trace_types_lock
4559  * ensures that i_cdev was already initialized.
4560  */
4561 static inline int tracing_get_cpu(struct inode *inode)
4562 {
4563 	if (inode->i_cdev) /* See trace_create_cpu_file() */
4564 		return (long)inode->i_cdev - 1;
4565 	return RING_BUFFER_ALL_CPUS;
4566 }
4567 
4568 static const struct seq_operations tracer_seq_ops = {
4569 	.start		= s_start,
4570 	.next		= s_next,
4571 	.stop		= s_stop,
4572 	.show		= s_show,
4573 };
4574 
4575 /*
4576  * Note, as iter itself can be allocated and freed in different
4577  * ways, this function is only used to free its content, and not
4578  * the iterator itself. The only requirement to all the allocations
4579  * is that it must zero all fields (kzalloc), as freeing works with
4580  * ethier allocated content or NULL.
4581  */
4582 static void free_trace_iter_content(struct trace_iterator *iter)
4583 {
4584 	/* The fmt is either NULL, allocated or points to static_fmt_buf */
4585 	if (iter->fmt != static_fmt_buf)
4586 		kfree(iter->fmt);
4587 
4588 	kfree(iter->temp);
4589 	kfree(iter->buffer_iter);
4590 	mutex_destroy(&iter->mutex);
4591 	free_cpumask_var(iter->started);
4592 }
4593 
4594 static struct trace_iterator *
4595 __tracing_open(struct inode *inode, struct file *file, bool snapshot)
4596 {
4597 	struct trace_array *tr = inode->i_private;
4598 	struct trace_iterator *iter;
4599 	int cpu;
4600 
4601 	if (tracing_disabled)
4602 		return ERR_PTR(-ENODEV);
4603 
4604 	iter = __seq_open_private(file, &tracer_seq_ops, sizeof(*iter));
4605 	if (!iter)
4606 		return ERR_PTR(-ENOMEM);
4607 
4608 	iter->buffer_iter = kcalloc(nr_cpu_ids, sizeof(*iter->buffer_iter),
4609 				    GFP_KERNEL);
4610 	if (!iter->buffer_iter)
4611 		goto release;
4612 
4613 	/*
4614 	 * trace_find_next_entry() may need to save off iter->ent.
4615 	 * It will place it into the iter->temp buffer. As most
4616 	 * events are less than 128, allocate a buffer of that size.
4617 	 * If one is greater, then trace_find_next_entry() will
4618 	 * allocate a new buffer to adjust for the bigger iter->ent.
4619 	 * It's not critical if it fails to get allocated here.
4620 	 */
4621 	iter->temp = kmalloc(128, GFP_KERNEL);
4622 	if (iter->temp)
4623 		iter->temp_size = 128;
4624 
4625 	/*
4626 	 * trace_event_printf() may need to modify given format
4627 	 * string to replace %p with %px so that it shows real address
4628 	 * instead of hash value. However, that is only for the event
4629 	 * tracing, other tracer may not need. Defer the allocation
4630 	 * until it is needed.
4631 	 */
4632 	iter->fmt = NULL;
4633 	iter->fmt_size = 0;
4634 
4635 	mutex_lock(&trace_types_lock);
4636 	iter->trace = tr->current_trace;
4637 
4638 	if (!zalloc_cpumask_var(&iter->started, GFP_KERNEL))
4639 		goto fail;
4640 
4641 	iter->tr = tr;
4642 
4643 #ifdef CONFIG_TRACER_MAX_TRACE
4644 	/* Currently only the top directory has a snapshot */
4645 	if (tr->current_trace->print_max || snapshot)
4646 		iter->array_buffer = &tr->max_buffer;
4647 	else
4648 #endif
4649 		iter->array_buffer = &tr->array_buffer;
4650 	iter->snapshot = snapshot;
4651 	iter->pos = -1;
4652 	iter->cpu_file = tracing_get_cpu(inode);
4653 	mutex_init(&iter->mutex);
4654 
4655 	/* Notify the tracer early; before we stop tracing. */
4656 	if (iter->trace->open)
4657 		iter->trace->open(iter);
4658 
4659 	/* Annotate start of buffers if we had overruns */
4660 	if (ring_buffer_overruns(iter->array_buffer->buffer))
4661 		iter->iter_flags |= TRACE_FILE_ANNOTATE;
4662 
4663 	/* Output in nanoseconds only if we are using a clock in nanoseconds. */
4664 	if (trace_clocks[tr->clock_id].in_ns)
4665 		iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
4666 
4667 	/*
4668 	 * If pause-on-trace is enabled, then stop the trace while
4669 	 * dumping, unless this is the "snapshot" file
4670 	 */
4671 	if (!iter->snapshot && (tr->trace_flags & TRACE_ITER_PAUSE_ON_TRACE))
4672 		tracing_stop_tr(tr);
4673 
4674 	if (iter->cpu_file == RING_BUFFER_ALL_CPUS) {
4675 		for_each_tracing_cpu(cpu) {
4676 			iter->buffer_iter[cpu] =
4677 				ring_buffer_read_prepare(iter->array_buffer->buffer,
4678 							 cpu, GFP_KERNEL);
4679 		}
4680 		ring_buffer_read_prepare_sync();
4681 		for_each_tracing_cpu(cpu) {
4682 			ring_buffer_read_start(iter->buffer_iter[cpu]);
4683 			tracing_iter_reset(iter, cpu);
4684 		}
4685 	} else {
4686 		cpu = iter->cpu_file;
4687 		iter->buffer_iter[cpu] =
4688 			ring_buffer_read_prepare(iter->array_buffer->buffer,
4689 						 cpu, GFP_KERNEL);
4690 		ring_buffer_read_prepare_sync();
4691 		ring_buffer_read_start(iter->buffer_iter[cpu]);
4692 		tracing_iter_reset(iter, cpu);
4693 	}
4694 
4695 	mutex_unlock(&trace_types_lock);
4696 
4697 	return iter;
4698 
4699  fail:
4700 	mutex_unlock(&trace_types_lock);
4701 	free_trace_iter_content(iter);
4702 release:
4703 	seq_release_private(inode, file);
4704 	return ERR_PTR(-ENOMEM);
4705 }
4706 
4707 int tracing_open_generic(struct inode *inode, struct file *filp)
4708 {
4709 	int ret;
4710 
4711 	ret = tracing_check_open_get_tr(NULL);
4712 	if (ret)
4713 		return ret;
4714 
4715 	filp->private_data = inode->i_private;
4716 	return 0;
4717 }
4718 
4719 bool tracing_is_disabled(void)
4720 {
4721 	return (tracing_disabled) ? true: false;
4722 }
4723 
4724 /*
4725  * Open and update trace_array ref count.
4726  * Must have the current trace_array passed to it.
4727  */
4728 int tracing_open_generic_tr(struct inode *inode, struct file *filp)
4729 {
4730 	struct trace_array *tr = inode->i_private;
4731 	int ret;
4732 
4733 	ret = tracing_check_open_get_tr(tr);
4734 	if (ret)
4735 		return ret;
4736 
4737 	filp->private_data = inode->i_private;
4738 
4739 	return 0;
4740 }
4741 
4742 /*
4743  * The private pointer of the inode is the trace_event_file.
4744  * Update the tr ref count associated to it.
4745  */
4746 int tracing_open_file_tr(struct inode *inode, struct file *filp)
4747 {
4748 	struct trace_event_file *file = inode->i_private;
4749 	int ret;
4750 
4751 	ret = tracing_check_open_get_tr(file->tr);
4752 	if (ret)
4753 		return ret;
4754 
4755 	mutex_lock(&event_mutex);
4756 
4757 	/* Fail if the file is marked for removal */
4758 	if (file->flags & EVENT_FILE_FL_FREED) {
4759 		trace_array_put(file->tr);
4760 		ret = -ENODEV;
4761 	} else {
4762 		event_file_get(file);
4763 	}
4764 
4765 	mutex_unlock(&event_mutex);
4766 	if (ret)
4767 		return ret;
4768 
4769 	filp->private_data = inode->i_private;
4770 
4771 	return 0;
4772 }
4773 
4774 int tracing_release_file_tr(struct inode *inode, struct file *filp)
4775 {
4776 	struct trace_event_file *file = inode->i_private;
4777 
4778 	trace_array_put(file->tr);
4779 	event_file_put(file);
4780 
4781 	return 0;
4782 }
4783 
4784 int tracing_single_release_file_tr(struct inode *inode, struct file *filp)
4785 {
4786 	tracing_release_file_tr(inode, filp);
4787 	return single_release(inode, filp);
4788 }
4789 
4790 static int tracing_mark_open(struct inode *inode, struct file *filp)
4791 {
4792 	stream_open(inode, filp);
4793 	return tracing_open_generic_tr(inode, filp);
4794 }
4795 
4796 static int tracing_release(struct inode *inode, struct file *file)
4797 {
4798 	struct trace_array *tr = inode->i_private;
4799 	struct seq_file *m = file->private_data;
4800 	struct trace_iterator *iter;
4801 	int cpu;
4802 
4803 	if (!(file->f_mode & FMODE_READ)) {
4804 		trace_array_put(tr);
4805 		return 0;
4806 	}
4807 
4808 	/* Writes do not use seq_file */
4809 	iter = m->private;
4810 	mutex_lock(&trace_types_lock);
4811 
4812 	for_each_tracing_cpu(cpu) {
4813 		if (iter->buffer_iter[cpu])
4814 			ring_buffer_read_finish(iter->buffer_iter[cpu]);
4815 	}
4816 
4817 	if (iter->trace && iter->trace->close)
4818 		iter->trace->close(iter);
4819 
4820 	if (!iter->snapshot && tr->stop_count)
4821 		/* reenable tracing if it was previously enabled */
4822 		tracing_start_tr(tr);
4823 
4824 	__trace_array_put(tr);
4825 
4826 	mutex_unlock(&trace_types_lock);
4827 
4828 	free_trace_iter_content(iter);
4829 	seq_release_private(inode, file);
4830 
4831 	return 0;
4832 }
4833 
4834 int tracing_release_generic_tr(struct inode *inode, struct file *file)
4835 {
4836 	struct trace_array *tr = inode->i_private;
4837 
4838 	trace_array_put(tr);
4839 	return 0;
4840 }
4841 
4842 static int tracing_single_release_tr(struct inode *inode, struct file *file)
4843 {
4844 	struct trace_array *tr = inode->i_private;
4845 
4846 	trace_array_put(tr);
4847 
4848 	return single_release(inode, file);
4849 }
4850 
4851 static int tracing_open(struct inode *inode, struct file *file)
4852 {
4853 	struct trace_array *tr = inode->i_private;
4854 	struct trace_iterator *iter;
4855 	int ret;
4856 
4857 	ret = tracing_check_open_get_tr(tr);
4858 	if (ret)
4859 		return ret;
4860 
4861 	/* If this file was open for write, then erase contents */
4862 	if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC)) {
4863 		int cpu = tracing_get_cpu(inode);
4864 		struct array_buffer *trace_buf = &tr->array_buffer;
4865 
4866 #ifdef CONFIG_TRACER_MAX_TRACE
4867 		if (tr->current_trace->print_max)
4868 			trace_buf = &tr->max_buffer;
4869 #endif
4870 
4871 		if (cpu == RING_BUFFER_ALL_CPUS)
4872 			tracing_reset_online_cpus(trace_buf);
4873 		else
4874 			tracing_reset_cpu(trace_buf, cpu);
4875 	}
4876 
4877 	if (file->f_mode & FMODE_READ) {
4878 		iter = __tracing_open(inode, file, false);
4879 		if (IS_ERR(iter))
4880 			ret = PTR_ERR(iter);
4881 		else if (tr->trace_flags & TRACE_ITER_LATENCY_FMT)
4882 			iter->iter_flags |= TRACE_FILE_LAT_FMT;
4883 	}
4884 
4885 	if (ret < 0)
4886 		trace_array_put(tr);
4887 
4888 	return ret;
4889 }
4890 
4891 /*
4892  * Some tracers are not suitable for instance buffers.
4893  * A tracer is always available for the global array (toplevel)
4894  * or if it explicitly states that it is.
4895  */
4896 static bool
4897 trace_ok_for_array(struct tracer *t, struct trace_array *tr)
4898 {
4899 #ifdef CONFIG_TRACER_SNAPSHOT
4900 	/* arrays with mapped buffer range do not have snapshots */
4901 	if (tr->range_addr_start && t->use_max_tr)
4902 		return false;
4903 #endif
4904 	return (tr->flags & TRACE_ARRAY_FL_GLOBAL) || t->allow_instances;
4905 }
4906 
4907 /* Find the next tracer that this trace array may use */
4908 static struct tracer *
4909 get_tracer_for_array(struct trace_array *tr, struct tracer *t)
4910 {
4911 	while (t && !trace_ok_for_array(t, tr))
4912 		t = t->next;
4913 
4914 	return t;
4915 }
4916 
4917 static void *
4918 t_next(struct seq_file *m, void *v, loff_t *pos)
4919 {
4920 	struct trace_array *tr = m->private;
4921 	struct tracer *t = v;
4922 
4923 	(*pos)++;
4924 
4925 	if (t)
4926 		t = get_tracer_for_array(tr, t->next);
4927 
4928 	return t;
4929 }
4930 
4931 static void *t_start(struct seq_file *m, loff_t *pos)
4932 {
4933 	struct trace_array *tr = m->private;
4934 	struct tracer *t;
4935 	loff_t l = 0;
4936 
4937 	mutex_lock(&trace_types_lock);
4938 
4939 	t = get_tracer_for_array(tr, trace_types);
4940 	for (; t && l < *pos; t = t_next(m, t, &l))
4941 			;
4942 
4943 	return t;
4944 }
4945 
4946 static void t_stop(struct seq_file *m, void *p)
4947 {
4948 	mutex_unlock(&trace_types_lock);
4949 }
4950 
4951 static int t_show(struct seq_file *m, void *v)
4952 {
4953 	struct tracer *t = v;
4954 
4955 	if (!t)
4956 		return 0;
4957 
4958 	seq_puts(m, t->name);
4959 	if (t->next)
4960 		seq_putc(m, ' ');
4961 	else
4962 		seq_putc(m, '\n');
4963 
4964 	return 0;
4965 }
4966 
4967 static const struct seq_operations show_traces_seq_ops = {
4968 	.start		= t_start,
4969 	.next		= t_next,
4970 	.stop		= t_stop,
4971 	.show		= t_show,
4972 };
4973 
4974 static int show_traces_open(struct inode *inode, struct file *file)
4975 {
4976 	struct trace_array *tr = inode->i_private;
4977 	struct seq_file *m;
4978 	int ret;
4979 
4980 	ret = tracing_check_open_get_tr(tr);
4981 	if (ret)
4982 		return ret;
4983 
4984 	ret = seq_open(file, &show_traces_seq_ops);
4985 	if (ret) {
4986 		trace_array_put(tr);
4987 		return ret;
4988 	}
4989 
4990 	m = file->private_data;
4991 	m->private = tr;
4992 
4993 	return 0;
4994 }
4995 
4996 static int tracing_seq_release(struct inode *inode, struct file *file)
4997 {
4998 	struct trace_array *tr = inode->i_private;
4999 
5000 	trace_array_put(tr);
5001 	return seq_release(inode, file);
5002 }
5003 
5004 static ssize_t
5005 tracing_write_stub(struct file *filp, const char __user *ubuf,
5006 		   size_t count, loff_t *ppos)
5007 {
5008 	return count;
5009 }
5010 
5011 loff_t tracing_lseek(struct file *file, loff_t offset, int whence)
5012 {
5013 	int ret;
5014 
5015 	if (file->f_mode & FMODE_READ)
5016 		ret = seq_lseek(file, offset, whence);
5017 	else
5018 		file->f_pos = ret = 0;
5019 
5020 	return ret;
5021 }
5022 
5023 static const struct file_operations tracing_fops = {
5024 	.open		= tracing_open,
5025 	.read		= seq_read,
5026 	.read_iter	= seq_read_iter,
5027 	.splice_read	= copy_splice_read,
5028 	.write		= tracing_write_stub,
5029 	.llseek		= tracing_lseek,
5030 	.release	= tracing_release,
5031 };
5032 
5033 static const struct file_operations show_traces_fops = {
5034 	.open		= show_traces_open,
5035 	.read		= seq_read,
5036 	.llseek		= seq_lseek,
5037 	.release	= tracing_seq_release,
5038 };
5039 
5040 static ssize_t
5041 tracing_cpumask_read(struct file *filp, char __user *ubuf,
5042 		     size_t count, loff_t *ppos)
5043 {
5044 	struct trace_array *tr = file_inode(filp)->i_private;
5045 	char *mask_str;
5046 	int len;
5047 
5048 	len = snprintf(NULL, 0, "%*pb\n",
5049 		       cpumask_pr_args(tr->tracing_cpumask)) + 1;
5050 	mask_str = kmalloc(len, GFP_KERNEL);
5051 	if (!mask_str)
5052 		return -ENOMEM;
5053 
5054 	len = snprintf(mask_str, len, "%*pb\n",
5055 		       cpumask_pr_args(tr->tracing_cpumask));
5056 	if (len >= count) {
5057 		count = -EINVAL;
5058 		goto out_err;
5059 	}
5060 	count = simple_read_from_buffer(ubuf, count, ppos, mask_str, len);
5061 
5062 out_err:
5063 	kfree(mask_str);
5064 
5065 	return count;
5066 }
5067 
5068 int tracing_set_cpumask(struct trace_array *tr,
5069 			cpumask_var_t tracing_cpumask_new)
5070 {
5071 	int cpu;
5072 
5073 	if (!tr)
5074 		return -EINVAL;
5075 
5076 	local_irq_disable();
5077 	arch_spin_lock(&tr->max_lock);
5078 	for_each_tracing_cpu(cpu) {
5079 		/*
5080 		 * Increase/decrease the disabled counter if we are
5081 		 * about to flip a bit in the cpumask:
5082 		 */
5083 		if (cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
5084 				!cpumask_test_cpu(cpu, tracing_cpumask_new)) {
5085 			atomic_inc(&per_cpu_ptr(tr->array_buffer.data, cpu)->disabled);
5086 			ring_buffer_record_disable_cpu(tr->array_buffer.buffer, cpu);
5087 #ifdef CONFIG_TRACER_MAX_TRACE
5088 			ring_buffer_record_disable_cpu(tr->max_buffer.buffer, cpu);
5089 #endif
5090 		}
5091 		if (!cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
5092 				cpumask_test_cpu(cpu, tracing_cpumask_new)) {
5093 			atomic_dec(&per_cpu_ptr(tr->array_buffer.data, cpu)->disabled);
5094 			ring_buffer_record_enable_cpu(tr->array_buffer.buffer, cpu);
5095 #ifdef CONFIG_TRACER_MAX_TRACE
5096 			ring_buffer_record_enable_cpu(tr->max_buffer.buffer, cpu);
5097 #endif
5098 		}
5099 	}
5100 	arch_spin_unlock(&tr->max_lock);
5101 	local_irq_enable();
5102 
5103 	cpumask_copy(tr->tracing_cpumask, tracing_cpumask_new);
5104 
5105 	return 0;
5106 }
5107 
5108 static ssize_t
5109 tracing_cpumask_write(struct file *filp, const char __user *ubuf,
5110 		      size_t count, loff_t *ppos)
5111 {
5112 	struct trace_array *tr = file_inode(filp)->i_private;
5113 	cpumask_var_t tracing_cpumask_new;
5114 	int err;
5115 
5116 	if (count == 0 || count > KMALLOC_MAX_SIZE)
5117 		return -EINVAL;
5118 
5119 	if (!zalloc_cpumask_var(&tracing_cpumask_new, GFP_KERNEL))
5120 		return -ENOMEM;
5121 
5122 	err = cpumask_parse_user(ubuf, count, tracing_cpumask_new);
5123 	if (err)
5124 		goto err_free;
5125 
5126 	err = tracing_set_cpumask(tr, tracing_cpumask_new);
5127 	if (err)
5128 		goto err_free;
5129 
5130 	free_cpumask_var(tracing_cpumask_new);
5131 
5132 	return count;
5133 
5134 err_free:
5135 	free_cpumask_var(tracing_cpumask_new);
5136 
5137 	return err;
5138 }
5139 
5140 static const struct file_operations tracing_cpumask_fops = {
5141 	.open		= tracing_open_generic_tr,
5142 	.read		= tracing_cpumask_read,
5143 	.write		= tracing_cpumask_write,
5144 	.release	= tracing_release_generic_tr,
5145 	.llseek		= generic_file_llseek,
5146 };
5147 
5148 static int tracing_trace_options_show(struct seq_file *m, void *v)
5149 {
5150 	struct tracer_opt *trace_opts;
5151 	struct trace_array *tr = m->private;
5152 	u32 tracer_flags;
5153 	int i;
5154 
5155 	guard(mutex)(&trace_types_lock);
5156 
5157 	tracer_flags = tr->current_trace->flags->val;
5158 	trace_opts = tr->current_trace->flags->opts;
5159 
5160 	for (i = 0; trace_options[i]; i++) {
5161 		if (tr->trace_flags & (1 << i))
5162 			seq_printf(m, "%s\n", trace_options[i]);
5163 		else
5164 			seq_printf(m, "no%s\n", trace_options[i]);
5165 	}
5166 
5167 	for (i = 0; trace_opts[i].name; i++) {
5168 		if (tracer_flags & trace_opts[i].bit)
5169 			seq_printf(m, "%s\n", trace_opts[i].name);
5170 		else
5171 			seq_printf(m, "no%s\n", trace_opts[i].name);
5172 	}
5173 
5174 	return 0;
5175 }
5176 
5177 static int __set_tracer_option(struct trace_array *tr,
5178 			       struct tracer_flags *tracer_flags,
5179 			       struct tracer_opt *opts, int neg)
5180 {
5181 	struct tracer *trace = tracer_flags->trace;
5182 	int ret;
5183 
5184 	ret = trace->set_flag(tr, tracer_flags->val, opts->bit, !neg);
5185 	if (ret)
5186 		return ret;
5187 
5188 	if (neg)
5189 		tracer_flags->val &= ~opts->bit;
5190 	else
5191 		tracer_flags->val |= opts->bit;
5192 	return 0;
5193 }
5194 
5195 /* Try to assign a tracer specific option */
5196 static int set_tracer_option(struct trace_array *tr, char *cmp, int neg)
5197 {
5198 	struct tracer *trace = tr->current_trace;
5199 	struct tracer_flags *tracer_flags = trace->flags;
5200 	struct tracer_opt *opts = NULL;
5201 	int i;
5202 
5203 	for (i = 0; tracer_flags->opts[i].name; i++) {
5204 		opts = &tracer_flags->opts[i];
5205 
5206 		if (strcmp(cmp, opts->name) == 0)
5207 			return __set_tracer_option(tr, trace->flags, opts, neg);
5208 	}
5209 
5210 	return -EINVAL;
5211 }
5212 
5213 /* Some tracers require overwrite to stay enabled */
5214 int trace_keep_overwrite(struct tracer *tracer, u32 mask, int set)
5215 {
5216 	if (tracer->enabled && (mask & TRACE_ITER_OVERWRITE) && !set)
5217 		return -1;
5218 
5219 	return 0;
5220 }
5221 
5222 int set_tracer_flag(struct trace_array *tr, unsigned int mask, int enabled)
5223 {
5224 	if ((mask == TRACE_ITER_RECORD_TGID) ||
5225 	    (mask == TRACE_ITER_RECORD_CMD) ||
5226 	    (mask == TRACE_ITER_TRACE_PRINTK))
5227 		lockdep_assert_held(&event_mutex);
5228 
5229 	/* do nothing if flag is already set */
5230 	if (!!(tr->trace_flags & mask) == !!enabled)
5231 		return 0;
5232 
5233 	/* Give the tracer a chance to approve the change */
5234 	if (tr->current_trace->flag_changed)
5235 		if (tr->current_trace->flag_changed(tr, mask, !!enabled))
5236 			return -EINVAL;
5237 
5238 	if (mask == TRACE_ITER_TRACE_PRINTK) {
5239 		if (enabled) {
5240 			update_printk_trace(tr);
5241 		} else {
5242 			/*
5243 			 * The global_trace cannot clear this.
5244 			 * It's flag only gets cleared if another instance sets it.
5245 			 */
5246 			if (printk_trace == &global_trace)
5247 				return -EINVAL;
5248 			/*
5249 			 * An instance must always have it set.
5250 			 * by default, that's the global_trace instane.
5251 			 */
5252 			if (printk_trace == tr)
5253 				update_printk_trace(&global_trace);
5254 		}
5255 	}
5256 
5257 	if (enabled)
5258 		tr->trace_flags |= mask;
5259 	else
5260 		tr->trace_flags &= ~mask;
5261 
5262 	if (mask == TRACE_ITER_RECORD_CMD)
5263 		trace_event_enable_cmd_record(enabled);
5264 
5265 	if (mask == TRACE_ITER_RECORD_TGID) {
5266 
5267 		if (trace_alloc_tgid_map() < 0) {
5268 			tr->trace_flags &= ~TRACE_ITER_RECORD_TGID;
5269 			return -ENOMEM;
5270 		}
5271 
5272 		trace_event_enable_tgid_record(enabled);
5273 	}
5274 
5275 	if (mask == TRACE_ITER_EVENT_FORK)
5276 		trace_event_follow_fork(tr, enabled);
5277 
5278 	if (mask == TRACE_ITER_FUNC_FORK)
5279 		ftrace_pid_follow_fork(tr, enabled);
5280 
5281 	if (mask == TRACE_ITER_OVERWRITE) {
5282 		ring_buffer_change_overwrite(tr->array_buffer.buffer, enabled);
5283 #ifdef CONFIG_TRACER_MAX_TRACE
5284 		ring_buffer_change_overwrite(tr->max_buffer.buffer, enabled);
5285 #endif
5286 	}
5287 
5288 	if (mask == TRACE_ITER_PRINTK) {
5289 		trace_printk_start_stop_comm(enabled);
5290 		trace_printk_control(enabled);
5291 	}
5292 
5293 	return 0;
5294 }
5295 
5296 int trace_set_options(struct trace_array *tr, char *option)
5297 {
5298 	char *cmp;
5299 	int neg = 0;
5300 	int ret;
5301 	size_t orig_len = strlen(option);
5302 	int len;
5303 
5304 	cmp = strstrip(option);
5305 
5306 	len = str_has_prefix(cmp, "no");
5307 	if (len)
5308 		neg = 1;
5309 
5310 	cmp += len;
5311 
5312 	mutex_lock(&event_mutex);
5313 	mutex_lock(&trace_types_lock);
5314 
5315 	ret = match_string(trace_options, -1, cmp);
5316 	/* If no option could be set, test the specific tracer options */
5317 	if (ret < 0)
5318 		ret = set_tracer_option(tr, cmp, neg);
5319 	else
5320 		ret = set_tracer_flag(tr, 1 << ret, !neg);
5321 
5322 	mutex_unlock(&trace_types_lock);
5323 	mutex_unlock(&event_mutex);
5324 
5325 	/*
5326 	 * If the first trailing whitespace is replaced with '\0' by strstrip,
5327 	 * turn it back into a space.
5328 	 */
5329 	if (orig_len > strlen(option))
5330 		option[strlen(option)] = ' ';
5331 
5332 	return ret;
5333 }
5334 
5335 static void __init apply_trace_boot_options(void)
5336 {
5337 	char *buf = trace_boot_options_buf;
5338 	char *option;
5339 
5340 	while (true) {
5341 		option = strsep(&buf, ",");
5342 
5343 		if (!option)
5344 			break;
5345 
5346 		if (*option)
5347 			trace_set_options(&global_trace, option);
5348 
5349 		/* Put back the comma to allow this to be called again */
5350 		if (buf)
5351 			*(buf - 1) = ',';
5352 	}
5353 }
5354 
5355 static ssize_t
5356 tracing_trace_options_write(struct file *filp, const char __user *ubuf,
5357 			size_t cnt, loff_t *ppos)
5358 {
5359 	struct seq_file *m = filp->private_data;
5360 	struct trace_array *tr = m->private;
5361 	char buf[64];
5362 	int ret;
5363 
5364 	if (cnt >= sizeof(buf))
5365 		return -EINVAL;
5366 
5367 	if (copy_from_user(buf, ubuf, cnt))
5368 		return -EFAULT;
5369 
5370 	buf[cnt] = 0;
5371 
5372 	ret = trace_set_options(tr, buf);
5373 	if (ret < 0)
5374 		return ret;
5375 
5376 	*ppos += cnt;
5377 
5378 	return cnt;
5379 }
5380 
5381 static int tracing_trace_options_open(struct inode *inode, struct file *file)
5382 {
5383 	struct trace_array *tr = inode->i_private;
5384 	int ret;
5385 
5386 	ret = tracing_check_open_get_tr(tr);
5387 	if (ret)
5388 		return ret;
5389 
5390 	ret = single_open(file, tracing_trace_options_show, inode->i_private);
5391 	if (ret < 0)
5392 		trace_array_put(tr);
5393 
5394 	return ret;
5395 }
5396 
5397 static const struct file_operations tracing_iter_fops = {
5398 	.open		= tracing_trace_options_open,
5399 	.read		= seq_read,
5400 	.llseek		= seq_lseek,
5401 	.release	= tracing_single_release_tr,
5402 	.write		= tracing_trace_options_write,
5403 };
5404 
5405 static const char readme_msg[] =
5406 	"tracing mini-HOWTO:\n\n"
5407 	"By default tracefs removes all OTH file permission bits.\n"
5408 	"When mounting tracefs an optional group id can be specified\n"
5409 	"which adds the group to every directory and file in tracefs:\n\n"
5410 	"\t e.g. mount -t tracefs [-o [gid=<gid>]] nodev /sys/kernel/tracing\n\n"
5411 	"# echo 0 > tracing_on : quick way to disable tracing\n"
5412 	"# echo 1 > tracing_on : quick way to re-enable tracing\n\n"
5413 	" Important files:\n"
5414 	"  trace\t\t\t- The static contents of the buffer\n"
5415 	"\t\t\t  To clear the buffer write into this file: echo > trace\n"
5416 	"  trace_pipe\t\t- A consuming read to see the contents of the buffer\n"
5417 	"  current_tracer\t- function and latency tracers\n"
5418 	"  available_tracers\t- list of configured tracers for current_tracer\n"
5419 	"  error_log\t- error log for failed commands (that support it)\n"
5420 	"  buffer_size_kb\t- view and modify size of per cpu buffer\n"
5421 	"  buffer_total_size_kb  - view total size of all cpu buffers\n\n"
5422 	"  trace_clock\t\t- change the clock used to order events\n"
5423 	"       local:   Per cpu clock but may not be synced across CPUs\n"
5424 	"      global:   Synced across CPUs but slows tracing down.\n"
5425 	"     counter:   Not a clock, but just an increment\n"
5426 	"      uptime:   Jiffy counter from time of boot\n"
5427 	"        perf:   Same clock that perf events use\n"
5428 #ifdef CONFIG_X86_64
5429 	"     x86-tsc:   TSC cycle counter\n"
5430 #endif
5431 	"\n  timestamp_mode\t- view the mode used to timestamp events\n"
5432 	"       delta:   Delta difference against a buffer-wide timestamp\n"
5433 	"    absolute:   Absolute (standalone) timestamp\n"
5434 	"\n  trace_marker\t\t- Writes into this file writes into the kernel buffer\n"
5435 	"\n  trace_marker_raw\t\t- Writes into this file writes binary data into the kernel buffer\n"
5436 	"  tracing_cpumask\t- Limit which CPUs to trace\n"
5437 	"  instances\t\t- Make sub-buffers with: mkdir instances/foo\n"
5438 	"\t\t\t  Remove sub-buffer with rmdir\n"
5439 	"  trace_options\t\t- Set format or modify how tracing happens\n"
5440 	"\t\t\t  Disable an option by prefixing 'no' to the\n"
5441 	"\t\t\t  option name\n"
5442 	"  saved_cmdlines_size\t- echo command number in here to store comm-pid list\n"
5443 #ifdef CONFIG_DYNAMIC_FTRACE
5444 	"\n  available_filter_functions - list of functions that can be filtered on\n"
5445 	"  set_ftrace_filter\t- echo function name in here to only trace these\n"
5446 	"\t\t\t  functions\n"
5447 	"\t     accepts: func_full_name or glob-matching-pattern\n"
5448 	"\t     modules: Can select a group via module\n"
5449 	"\t      Format: :mod:<module-name>\n"
5450 	"\t     example: echo :mod:ext3 > set_ftrace_filter\n"
5451 	"\t    triggers: a command to perform when function is hit\n"
5452 	"\t      Format: <function>:<trigger>[:count]\n"
5453 	"\t     trigger: traceon, traceoff\n"
5454 	"\t\t      enable_event:<system>:<event>\n"
5455 	"\t\t      disable_event:<system>:<event>\n"
5456 #ifdef CONFIG_STACKTRACE
5457 	"\t\t      stacktrace\n"
5458 #endif
5459 #ifdef CONFIG_TRACER_SNAPSHOT
5460 	"\t\t      snapshot\n"
5461 #endif
5462 	"\t\t      dump\n"
5463 	"\t\t      cpudump\n"
5464 	"\t     example: echo do_fault:traceoff > set_ftrace_filter\n"
5465 	"\t              echo do_trap:traceoff:3 > set_ftrace_filter\n"
5466 	"\t     The first one will disable tracing every time do_fault is hit\n"
5467 	"\t     The second will disable tracing at most 3 times when do_trap is hit\n"
5468 	"\t       The first time do trap is hit and it disables tracing, the\n"
5469 	"\t       counter will decrement to 2. If tracing is already disabled,\n"
5470 	"\t       the counter will not decrement. It only decrements when the\n"
5471 	"\t       trigger did work\n"
5472 	"\t     To remove trigger without count:\n"
5473 	"\t       echo '!<function>:<trigger> > set_ftrace_filter\n"
5474 	"\t     To remove trigger with a count:\n"
5475 	"\t       echo '!<function>:<trigger>:0 > set_ftrace_filter\n"
5476 	"  set_ftrace_notrace\t- echo function name in here to never trace.\n"
5477 	"\t    accepts: func_full_name, *func_end, func_begin*, *func_middle*\n"
5478 	"\t    modules: Can select a group via module command :mod:\n"
5479 	"\t    Does not accept triggers\n"
5480 #endif /* CONFIG_DYNAMIC_FTRACE */
5481 #ifdef CONFIG_FUNCTION_TRACER
5482 	"  set_ftrace_pid\t- Write pid(s) to only function trace those pids\n"
5483 	"\t\t    (function)\n"
5484 	"  set_ftrace_notrace_pid\t- Write pid(s) to not function trace those pids\n"
5485 	"\t\t    (function)\n"
5486 #endif
5487 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
5488 	"  set_graph_function\t- Trace the nested calls of a function (function_graph)\n"
5489 	"  set_graph_notrace\t- Do not trace the nested calls of a function (function_graph)\n"
5490 	"  max_graph_depth\t- Trace a limited depth of nested calls (0 is unlimited)\n"
5491 #endif
5492 #ifdef CONFIG_TRACER_SNAPSHOT
5493 	"\n  snapshot\t\t- Like 'trace' but shows the content of the static\n"
5494 	"\t\t\t  snapshot buffer. Read the contents for more\n"
5495 	"\t\t\t  information\n"
5496 #endif
5497 #ifdef CONFIG_STACK_TRACER
5498 	"  stack_trace\t\t- Shows the max stack trace when active\n"
5499 	"  stack_max_size\t- Shows current max stack size that was traced\n"
5500 	"\t\t\t  Write into this file to reset the max size (trigger a\n"
5501 	"\t\t\t  new trace)\n"
5502 #ifdef CONFIG_DYNAMIC_FTRACE
5503 	"  stack_trace_filter\t- Like set_ftrace_filter but limits what stack_trace\n"
5504 	"\t\t\t  traces\n"
5505 #endif
5506 #endif /* CONFIG_STACK_TRACER */
5507 #ifdef CONFIG_DYNAMIC_EVENTS
5508 	"  dynamic_events\t\t- Create/append/remove/show the generic dynamic events\n"
5509 	"\t\t\t  Write into this file to define/undefine new trace events.\n"
5510 #endif
5511 #ifdef CONFIG_KPROBE_EVENTS
5512 	"  kprobe_events\t\t- Create/append/remove/show the kernel dynamic events\n"
5513 	"\t\t\t  Write into this file to define/undefine new trace events.\n"
5514 #endif
5515 #ifdef CONFIG_UPROBE_EVENTS
5516 	"  uprobe_events\t\t- Create/append/remove/show the userspace dynamic events\n"
5517 	"\t\t\t  Write into this file to define/undefine new trace events.\n"
5518 #endif
5519 #if defined(CONFIG_KPROBE_EVENTS) || defined(CONFIG_UPROBE_EVENTS) || \
5520     defined(CONFIG_FPROBE_EVENTS)
5521 	"\t  accepts: event-definitions (one definition per line)\n"
5522 #if defined(CONFIG_KPROBE_EVENTS) || defined(CONFIG_UPROBE_EVENTS)
5523 	"\t   Format: p[:[<group>/][<event>]] <place> [<args>]\n"
5524 	"\t           r[maxactive][:[<group>/][<event>]] <place> [<args>]\n"
5525 #endif
5526 #ifdef CONFIG_FPROBE_EVENTS
5527 	"\t           f[:[<group>/][<event>]] <func-name>[%return] [<args>]\n"
5528 	"\t           t[:[<group>/][<event>]] <tracepoint> [<args>]\n"
5529 #endif
5530 #ifdef CONFIG_HIST_TRIGGERS
5531 	"\t           s:[synthetic/]<event> <field> [<field>]\n"
5532 #endif
5533 	"\t           e[:[<group>/][<event>]] <attached-group>.<attached-event> [<args>] [if <filter>]\n"
5534 	"\t           -:[<group>/][<event>]\n"
5535 #ifdef CONFIG_KPROBE_EVENTS
5536 	"\t    place: [<module>:]<symbol>[+<offset>]|<memaddr>\n"
5537   "place (kretprobe): [<module>:]<symbol>[+<offset>]%return|<memaddr>\n"
5538 #endif
5539 #ifdef CONFIG_UPROBE_EVENTS
5540   "   place (uprobe): <path>:<offset>[%return][(ref_ctr_offset)]\n"
5541 #endif
5542 	"\t     args: <name>=fetcharg[:type]\n"
5543 	"\t fetcharg: (%<register>|$<efield>), @<address>, @<symbol>[+|-<offset>],\n"
5544 #ifdef CONFIG_HAVE_FUNCTION_ARG_ACCESS_API
5545 	"\t           $stack<index>, $stack, $retval, $comm, $arg<N>,\n"
5546 #ifdef CONFIG_PROBE_EVENTS_BTF_ARGS
5547 	"\t           <argname>[->field[->field|.field...]],\n"
5548 #endif
5549 #else
5550 	"\t           $stack<index>, $stack, $retval, $comm,\n"
5551 #endif
5552 	"\t           +|-[u]<offset>(<fetcharg>), \\imm-value, \\\"imm-string\"\n"
5553 	"\t     kernel return probes support: $retval, $arg<N>, $comm\n"
5554 	"\t     type: s8/16/32/64, u8/16/32/64, x8/16/32/64, char, string, symbol,\n"
5555 	"\t           b<bit-width>@<bit-offset>/<container-size>, ustring,\n"
5556 	"\t           symstr, %pd/%pD, <type>\\[<array-size>\\]\n"
5557 #ifdef CONFIG_HIST_TRIGGERS
5558 	"\t    field: <stype> <name>;\n"
5559 	"\t    stype: u8/u16/u32/u64, s8/s16/s32/s64, pid_t,\n"
5560 	"\t           [unsigned] char/int/long\n"
5561 #endif
5562 	"\t    efield: For event probes ('e' types), the field is on of the fields\n"
5563 	"\t            of the <attached-group>/<attached-event>.\n"
5564 #endif
5565 	"  set_event\t\t- Enables events by name written into it\n"
5566 	"\t\t\t  Can enable module events via: :mod:<module>\n"
5567 	"  events/\t\t- Directory containing all trace event subsystems:\n"
5568 	"      enable\t\t- Write 0/1 to enable/disable tracing of all events\n"
5569 	"  events/<system>/\t- Directory containing all trace events for <system>:\n"
5570 	"      enable\t\t- Write 0/1 to enable/disable tracing of all <system>\n"
5571 	"\t\t\t  events\n"
5572 	"      filter\t\t- If set, only events passing filter are traced\n"
5573 	"  events/<system>/<event>/\t- Directory containing control files for\n"
5574 	"\t\t\t  <event>:\n"
5575 	"      enable\t\t- Write 0/1 to enable/disable tracing of <event>\n"
5576 	"      filter\t\t- If set, only events passing filter are traced\n"
5577 	"      trigger\t\t- If set, a command to perform when event is hit\n"
5578 	"\t    Format: <trigger>[:count][if <filter>]\n"
5579 	"\t   trigger: traceon, traceoff\n"
5580 	"\t            enable_event:<system>:<event>\n"
5581 	"\t            disable_event:<system>:<event>\n"
5582 #ifdef CONFIG_HIST_TRIGGERS
5583 	"\t            enable_hist:<system>:<event>\n"
5584 	"\t            disable_hist:<system>:<event>\n"
5585 #endif
5586 #ifdef CONFIG_STACKTRACE
5587 	"\t\t    stacktrace\n"
5588 #endif
5589 #ifdef CONFIG_TRACER_SNAPSHOT
5590 	"\t\t    snapshot\n"
5591 #endif
5592 #ifdef CONFIG_HIST_TRIGGERS
5593 	"\t\t    hist (see below)\n"
5594 #endif
5595 	"\t   example: echo traceoff > events/block/block_unplug/trigger\n"
5596 	"\t            echo traceoff:3 > events/block/block_unplug/trigger\n"
5597 	"\t            echo 'enable_event:kmem:kmalloc:3 if nr_rq > 1' > \\\n"
5598 	"\t                  events/block/block_unplug/trigger\n"
5599 	"\t   The first disables tracing every time block_unplug is hit.\n"
5600 	"\t   The second disables tracing the first 3 times block_unplug is hit.\n"
5601 	"\t   The third enables the kmalloc event the first 3 times block_unplug\n"
5602 	"\t     is hit and has value of greater than 1 for the 'nr_rq' event field.\n"
5603 	"\t   Like function triggers, the counter is only decremented if it\n"
5604 	"\t    enabled or disabled tracing.\n"
5605 	"\t   To remove a trigger without a count:\n"
5606 	"\t     echo '!<trigger> > <system>/<event>/trigger\n"
5607 	"\t   To remove a trigger with a count:\n"
5608 	"\t     echo '!<trigger>:0 > <system>/<event>/trigger\n"
5609 	"\t   Filters can be ignored when removing a trigger.\n"
5610 #ifdef CONFIG_HIST_TRIGGERS
5611 	"      hist trigger\t- If set, event hits are aggregated into a hash table\n"
5612 	"\t    Format: hist:keys=<field1[,field2,...]>\n"
5613 	"\t            [:<var1>=<field|var_ref|numeric_literal>[,<var2>=...]]\n"
5614 	"\t            [:values=<field1[,field2,...]>]\n"
5615 	"\t            [:sort=<field1[,field2,...]>]\n"
5616 	"\t            [:size=#entries]\n"
5617 	"\t            [:pause][:continue][:clear]\n"
5618 	"\t            [:name=histname1]\n"
5619 	"\t            [:nohitcount]\n"
5620 	"\t            [:<handler>.<action>]\n"
5621 	"\t            [if <filter>]\n\n"
5622 	"\t    Note, special fields can be used as well:\n"
5623 	"\t            common_timestamp - to record current timestamp\n"
5624 	"\t            common_cpu - to record the CPU the event happened on\n"
5625 	"\n"
5626 	"\t    A hist trigger variable can be:\n"
5627 	"\t        - a reference to a field e.g. x=current_timestamp,\n"
5628 	"\t        - a reference to another variable e.g. y=$x,\n"
5629 	"\t        - a numeric literal: e.g. ms_per_sec=1000,\n"
5630 	"\t        - an arithmetic expression: e.g. time_secs=current_timestamp/1000\n"
5631 	"\n"
5632 	"\t    hist trigger arithmetic expressions support addition(+), subtraction(-),\n"
5633 	"\t    multiplication(*) and division(/) operators. An operand can be either a\n"
5634 	"\t    variable reference, field or numeric literal.\n"
5635 	"\n"
5636 	"\t    When a matching event is hit, an entry is added to a hash\n"
5637 	"\t    table using the key(s) and value(s) named, and the value of a\n"
5638 	"\t    sum called 'hitcount' is incremented.  Keys and values\n"
5639 	"\t    correspond to fields in the event's format description.  Keys\n"
5640 	"\t    can be any field, or the special string 'common_stacktrace'.\n"
5641 	"\t    Compound keys consisting of up to two fields can be specified\n"
5642 	"\t    by the 'keys' keyword.  Values must correspond to numeric\n"
5643 	"\t    fields.  Sort keys consisting of up to two fields can be\n"
5644 	"\t    specified using the 'sort' keyword.  The sort direction can\n"
5645 	"\t    be modified by appending '.descending' or '.ascending' to a\n"
5646 	"\t    sort field.  The 'size' parameter can be used to specify more\n"
5647 	"\t    or fewer than the default 2048 entries for the hashtable size.\n"
5648 	"\t    If a hist trigger is given a name using the 'name' parameter,\n"
5649 	"\t    its histogram data will be shared with other triggers of the\n"
5650 	"\t    same name, and trigger hits will update this common data.\n\n"
5651 	"\t    Reading the 'hist' file for the event will dump the hash\n"
5652 	"\t    table in its entirety to stdout.  If there are multiple hist\n"
5653 	"\t    triggers attached to an event, there will be a table for each\n"
5654 	"\t    trigger in the output.  The table displayed for a named\n"
5655 	"\t    trigger will be the same as any other instance having the\n"
5656 	"\t    same name.  The default format used to display a given field\n"
5657 	"\t    can be modified by appending any of the following modifiers\n"
5658 	"\t    to the field name, as applicable:\n\n"
5659 	"\t            .hex        display a number as a hex value\n"
5660 	"\t            .sym        display an address as a symbol\n"
5661 	"\t            .sym-offset display an address as a symbol and offset\n"
5662 	"\t            .execname   display a common_pid as a program name\n"
5663 	"\t            .syscall    display a syscall id as a syscall name\n"
5664 	"\t            .log2       display log2 value rather than raw number\n"
5665 	"\t            .buckets=size  display values in groups of size rather than raw number\n"
5666 	"\t            .usecs      display a common_timestamp in microseconds\n"
5667 	"\t            .percent    display a number of percentage value\n"
5668 	"\t            .graph      display a bar-graph of a value\n\n"
5669 	"\t    The 'pause' parameter can be used to pause an existing hist\n"
5670 	"\t    trigger or to start a hist trigger but not log any events\n"
5671 	"\t    until told to do so.  'continue' can be used to start or\n"
5672 	"\t    restart a paused hist trigger.\n\n"
5673 	"\t    The 'clear' parameter will clear the contents of a running\n"
5674 	"\t    hist trigger and leave its current paused/active state\n"
5675 	"\t    unchanged.\n\n"
5676 	"\t    The 'nohitcount' (or NOHC) parameter will suppress display of\n"
5677 	"\t    raw hitcount in the histogram.\n\n"
5678 	"\t    The enable_hist and disable_hist triggers can be used to\n"
5679 	"\t    have one event conditionally start and stop another event's\n"
5680 	"\t    already-attached hist trigger.  The syntax is analogous to\n"
5681 	"\t    the enable_event and disable_event triggers.\n\n"
5682 	"\t    Hist trigger handlers and actions are executed whenever a\n"
5683 	"\t    a histogram entry is added or updated.  They take the form:\n\n"
5684 	"\t        <handler>.<action>\n\n"
5685 	"\t    The available handlers are:\n\n"
5686 	"\t        onmatch(matching.event)  - invoke on addition or update\n"
5687 	"\t        onmax(var)               - invoke if var exceeds current max\n"
5688 	"\t        onchange(var)            - invoke action if var changes\n\n"
5689 	"\t    The available actions are:\n\n"
5690 	"\t        trace(<synthetic_event>,param list)  - generate synthetic event\n"
5691 	"\t        save(field,...)                      - save current event fields\n"
5692 #ifdef CONFIG_TRACER_SNAPSHOT
5693 	"\t        snapshot()                           - snapshot the trace buffer\n\n"
5694 #endif
5695 #ifdef CONFIG_SYNTH_EVENTS
5696 	"  events/synthetic_events\t- Create/append/remove/show synthetic events\n"
5697 	"\t  Write into this file to define/undefine new synthetic events.\n"
5698 	"\t     example: echo 'myevent u64 lat; char name[]; long[] stack' >> synthetic_events\n"
5699 #endif
5700 #endif
5701 ;
5702 
5703 static ssize_t
5704 tracing_readme_read(struct file *filp, char __user *ubuf,
5705 		       size_t cnt, loff_t *ppos)
5706 {
5707 	return simple_read_from_buffer(ubuf, cnt, ppos,
5708 					readme_msg, strlen(readme_msg));
5709 }
5710 
5711 static const struct file_operations tracing_readme_fops = {
5712 	.open		= tracing_open_generic,
5713 	.read		= tracing_readme_read,
5714 	.llseek		= generic_file_llseek,
5715 };
5716 
5717 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
5718 static union trace_eval_map_item *
5719 update_eval_map(union trace_eval_map_item *ptr)
5720 {
5721 	if (!ptr->map.eval_string) {
5722 		if (ptr->tail.next) {
5723 			ptr = ptr->tail.next;
5724 			/* Set ptr to the next real item (skip head) */
5725 			ptr++;
5726 		} else
5727 			return NULL;
5728 	}
5729 	return ptr;
5730 }
5731 
5732 static void *eval_map_next(struct seq_file *m, void *v, loff_t *pos)
5733 {
5734 	union trace_eval_map_item *ptr = v;
5735 
5736 	/*
5737 	 * Paranoid! If ptr points to end, we don't want to increment past it.
5738 	 * This really should never happen.
5739 	 */
5740 	(*pos)++;
5741 	ptr = update_eval_map(ptr);
5742 	if (WARN_ON_ONCE(!ptr))
5743 		return NULL;
5744 
5745 	ptr++;
5746 	ptr = update_eval_map(ptr);
5747 
5748 	return ptr;
5749 }
5750 
5751 static void *eval_map_start(struct seq_file *m, loff_t *pos)
5752 {
5753 	union trace_eval_map_item *v;
5754 	loff_t l = 0;
5755 
5756 	mutex_lock(&trace_eval_mutex);
5757 
5758 	v = trace_eval_maps;
5759 	if (v)
5760 		v++;
5761 
5762 	while (v && l < *pos) {
5763 		v = eval_map_next(m, v, &l);
5764 	}
5765 
5766 	return v;
5767 }
5768 
5769 static void eval_map_stop(struct seq_file *m, void *v)
5770 {
5771 	mutex_unlock(&trace_eval_mutex);
5772 }
5773 
5774 static int eval_map_show(struct seq_file *m, void *v)
5775 {
5776 	union trace_eval_map_item *ptr = v;
5777 
5778 	seq_printf(m, "%s %ld (%s)\n",
5779 		   ptr->map.eval_string, ptr->map.eval_value,
5780 		   ptr->map.system);
5781 
5782 	return 0;
5783 }
5784 
5785 static const struct seq_operations tracing_eval_map_seq_ops = {
5786 	.start		= eval_map_start,
5787 	.next		= eval_map_next,
5788 	.stop		= eval_map_stop,
5789 	.show		= eval_map_show,
5790 };
5791 
5792 static int tracing_eval_map_open(struct inode *inode, struct file *filp)
5793 {
5794 	int ret;
5795 
5796 	ret = tracing_check_open_get_tr(NULL);
5797 	if (ret)
5798 		return ret;
5799 
5800 	return seq_open(filp, &tracing_eval_map_seq_ops);
5801 }
5802 
5803 static const struct file_operations tracing_eval_map_fops = {
5804 	.open		= tracing_eval_map_open,
5805 	.read		= seq_read,
5806 	.llseek		= seq_lseek,
5807 	.release	= seq_release,
5808 };
5809 
5810 static inline union trace_eval_map_item *
5811 trace_eval_jmp_to_tail(union trace_eval_map_item *ptr)
5812 {
5813 	/* Return tail of array given the head */
5814 	return ptr + ptr->head.length + 1;
5815 }
5816 
5817 static void
5818 trace_insert_eval_map_file(struct module *mod, struct trace_eval_map **start,
5819 			   int len)
5820 {
5821 	struct trace_eval_map **stop;
5822 	struct trace_eval_map **map;
5823 	union trace_eval_map_item *map_array;
5824 	union trace_eval_map_item *ptr;
5825 
5826 	stop = start + len;
5827 
5828 	/*
5829 	 * The trace_eval_maps contains the map plus a head and tail item,
5830 	 * where the head holds the module and length of array, and the
5831 	 * tail holds a pointer to the next list.
5832 	 */
5833 	map_array = kmalloc_array(len + 2, sizeof(*map_array), GFP_KERNEL);
5834 	if (!map_array) {
5835 		pr_warn("Unable to allocate trace eval mapping\n");
5836 		return;
5837 	}
5838 
5839 	guard(mutex)(&trace_eval_mutex);
5840 
5841 	if (!trace_eval_maps)
5842 		trace_eval_maps = map_array;
5843 	else {
5844 		ptr = trace_eval_maps;
5845 		for (;;) {
5846 			ptr = trace_eval_jmp_to_tail(ptr);
5847 			if (!ptr->tail.next)
5848 				break;
5849 			ptr = ptr->tail.next;
5850 
5851 		}
5852 		ptr->tail.next = map_array;
5853 	}
5854 	map_array->head.mod = mod;
5855 	map_array->head.length = len;
5856 	map_array++;
5857 
5858 	for (map = start; (unsigned long)map < (unsigned long)stop; map++) {
5859 		map_array->map = **map;
5860 		map_array++;
5861 	}
5862 	memset(map_array, 0, sizeof(*map_array));
5863 }
5864 
5865 static void trace_create_eval_file(struct dentry *d_tracer)
5866 {
5867 	trace_create_file("eval_map", TRACE_MODE_READ, d_tracer,
5868 			  NULL, &tracing_eval_map_fops);
5869 }
5870 
5871 #else /* CONFIG_TRACE_EVAL_MAP_FILE */
5872 static inline void trace_create_eval_file(struct dentry *d_tracer) { }
5873 static inline void trace_insert_eval_map_file(struct module *mod,
5874 			      struct trace_eval_map **start, int len) { }
5875 #endif /* !CONFIG_TRACE_EVAL_MAP_FILE */
5876 
5877 static void trace_insert_eval_map(struct module *mod,
5878 				  struct trace_eval_map **start, int len)
5879 {
5880 	struct trace_eval_map **map;
5881 
5882 	if (len <= 0)
5883 		return;
5884 
5885 	map = start;
5886 
5887 	trace_event_eval_update(map, len);
5888 
5889 	trace_insert_eval_map_file(mod, start, len);
5890 }
5891 
5892 static ssize_t
5893 tracing_set_trace_read(struct file *filp, char __user *ubuf,
5894 		       size_t cnt, loff_t *ppos)
5895 {
5896 	struct trace_array *tr = filp->private_data;
5897 	char buf[MAX_TRACER_SIZE+2];
5898 	int r;
5899 
5900 	mutex_lock(&trace_types_lock);
5901 	r = sprintf(buf, "%s\n", tr->current_trace->name);
5902 	mutex_unlock(&trace_types_lock);
5903 
5904 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
5905 }
5906 
5907 int tracer_init(struct tracer *t, struct trace_array *tr)
5908 {
5909 	tracing_reset_online_cpus(&tr->array_buffer);
5910 	return t->init(tr);
5911 }
5912 
5913 static void set_buffer_entries(struct array_buffer *buf, unsigned long val)
5914 {
5915 	int cpu;
5916 
5917 	for_each_tracing_cpu(cpu)
5918 		per_cpu_ptr(buf->data, cpu)->entries = val;
5919 }
5920 
5921 static void update_buffer_entries(struct array_buffer *buf, int cpu)
5922 {
5923 	if (cpu == RING_BUFFER_ALL_CPUS) {
5924 		set_buffer_entries(buf, ring_buffer_size(buf->buffer, 0));
5925 	} else {
5926 		per_cpu_ptr(buf->data, cpu)->entries = ring_buffer_size(buf->buffer, cpu);
5927 	}
5928 }
5929 
5930 #ifdef CONFIG_TRACER_MAX_TRACE
5931 /* resize @tr's buffer to the size of @size_tr's entries */
5932 static int resize_buffer_duplicate_size(struct array_buffer *trace_buf,
5933 					struct array_buffer *size_buf, int cpu_id)
5934 {
5935 	int cpu, ret = 0;
5936 
5937 	if (cpu_id == RING_BUFFER_ALL_CPUS) {
5938 		for_each_tracing_cpu(cpu) {
5939 			ret = ring_buffer_resize(trace_buf->buffer,
5940 				 per_cpu_ptr(size_buf->data, cpu)->entries, cpu);
5941 			if (ret < 0)
5942 				break;
5943 			per_cpu_ptr(trace_buf->data, cpu)->entries =
5944 				per_cpu_ptr(size_buf->data, cpu)->entries;
5945 		}
5946 	} else {
5947 		ret = ring_buffer_resize(trace_buf->buffer,
5948 				 per_cpu_ptr(size_buf->data, cpu_id)->entries, cpu_id);
5949 		if (ret == 0)
5950 			per_cpu_ptr(trace_buf->data, cpu_id)->entries =
5951 				per_cpu_ptr(size_buf->data, cpu_id)->entries;
5952 	}
5953 
5954 	return ret;
5955 }
5956 #endif /* CONFIG_TRACER_MAX_TRACE */
5957 
5958 static int __tracing_resize_ring_buffer(struct trace_array *tr,
5959 					unsigned long size, int cpu)
5960 {
5961 	int ret;
5962 
5963 	/*
5964 	 * If kernel or user changes the size of the ring buffer
5965 	 * we use the size that was given, and we can forget about
5966 	 * expanding it later.
5967 	 */
5968 	trace_set_ring_buffer_expanded(tr);
5969 
5970 	/* May be called before buffers are initialized */
5971 	if (!tr->array_buffer.buffer)
5972 		return 0;
5973 
5974 	/* Do not allow tracing while resizing ring buffer */
5975 	tracing_stop_tr(tr);
5976 
5977 	ret = ring_buffer_resize(tr->array_buffer.buffer, size, cpu);
5978 	if (ret < 0)
5979 		goto out_start;
5980 
5981 #ifdef CONFIG_TRACER_MAX_TRACE
5982 	if (!tr->allocated_snapshot)
5983 		goto out;
5984 
5985 	ret = ring_buffer_resize(tr->max_buffer.buffer, size, cpu);
5986 	if (ret < 0) {
5987 		int r = resize_buffer_duplicate_size(&tr->array_buffer,
5988 						     &tr->array_buffer, cpu);
5989 		if (r < 0) {
5990 			/*
5991 			 * AARGH! We are left with different
5992 			 * size max buffer!!!!
5993 			 * The max buffer is our "snapshot" buffer.
5994 			 * When a tracer needs a snapshot (one of the
5995 			 * latency tracers), it swaps the max buffer
5996 			 * with the saved snap shot. We succeeded to
5997 			 * update the size of the main buffer, but failed to
5998 			 * update the size of the max buffer. But when we tried
5999 			 * to reset the main buffer to the original size, we
6000 			 * failed there too. This is very unlikely to
6001 			 * happen, but if it does, warn and kill all
6002 			 * tracing.
6003 			 */
6004 			WARN_ON(1);
6005 			tracing_disabled = 1;
6006 		}
6007 		goto out_start;
6008 	}
6009 
6010 	update_buffer_entries(&tr->max_buffer, cpu);
6011 
6012  out:
6013 #endif /* CONFIG_TRACER_MAX_TRACE */
6014 
6015 	update_buffer_entries(&tr->array_buffer, cpu);
6016  out_start:
6017 	tracing_start_tr(tr);
6018 	return ret;
6019 }
6020 
6021 ssize_t tracing_resize_ring_buffer(struct trace_array *tr,
6022 				  unsigned long size, int cpu_id)
6023 {
6024 	guard(mutex)(&trace_types_lock);
6025 
6026 	if (cpu_id != RING_BUFFER_ALL_CPUS) {
6027 		/* make sure, this cpu is enabled in the mask */
6028 		if (!cpumask_test_cpu(cpu_id, tracing_buffer_mask))
6029 			return -EINVAL;
6030 	}
6031 
6032 	return __tracing_resize_ring_buffer(tr, size, cpu_id);
6033 }
6034 
6035 struct trace_mod_entry {
6036 	unsigned long	mod_addr;
6037 	char		mod_name[MODULE_NAME_LEN];
6038 };
6039 
6040 struct trace_scratch {
6041 	unsigned long		text_addr;
6042 	unsigned long		nr_entries;
6043 	struct trace_mod_entry	entries[];
6044 };
6045 
6046 static DEFINE_MUTEX(scratch_mutex);
6047 
6048 static int cmp_mod_entry(const void *key, const void *pivot)
6049 {
6050 	unsigned long addr = (unsigned long)key;
6051 	const struct trace_mod_entry *ent = pivot;
6052 
6053 	if (addr >= ent[0].mod_addr && addr < ent[1].mod_addr)
6054 		return 0;
6055 	else
6056 		return addr - ent->mod_addr;
6057 }
6058 
6059 /**
6060  * trace_adjust_address() - Adjust prev boot address to current address.
6061  * @tr: Persistent ring buffer's trace_array.
6062  * @addr: Address in @tr which is adjusted.
6063  */
6064 unsigned long trace_adjust_address(struct trace_array *tr, unsigned long addr)
6065 {
6066 	struct trace_module_delta *module_delta;
6067 	struct trace_scratch *tscratch;
6068 	struct trace_mod_entry *entry;
6069 	int idx = 0, nr_entries;
6070 
6071 	/* If we don't have last boot delta, return the address */
6072 	if (!(tr->flags & TRACE_ARRAY_FL_LAST_BOOT))
6073 		return addr;
6074 
6075 	/* tr->module_delta must be protected by rcu. */
6076 	guard(rcu)();
6077 	tscratch = tr->scratch;
6078 	/* if there is no tscrach, module_delta must be NULL. */
6079 	module_delta = READ_ONCE(tr->module_delta);
6080 	if (!module_delta || !tscratch->nr_entries ||
6081 	    tscratch->entries[0].mod_addr > addr) {
6082 		return addr + tr->text_delta;
6083 	}
6084 
6085 	/* Note that entries must be sorted. */
6086 	nr_entries = tscratch->nr_entries;
6087 	if (nr_entries == 1 ||
6088 	    tscratch->entries[nr_entries - 1].mod_addr < addr)
6089 		idx = nr_entries - 1;
6090 	else {
6091 		entry = __inline_bsearch((void *)addr,
6092 				tscratch->entries,
6093 				nr_entries - 1,
6094 				sizeof(tscratch->entries[0]),
6095 				cmp_mod_entry);
6096 		if (entry)
6097 			idx = entry - tscratch->entries;
6098 	}
6099 
6100 	return addr + module_delta->delta[idx];
6101 }
6102 
6103 #ifdef CONFIG_MODULES
6104 static int save_mod(struct module *mod, void *data)
6105 {
6106 	struct trace_array *tr = data;
6107 	struct trace_scratch *tscratch;
6108 	struct trace_mod_entry *entry;
6109 	unsigned int size;
6110 
6111 	tscratch = tr->scratch;
6112 	if (!tscratch)
6113 		return -1;
6114 	size = tr->scratch_size;
6115 
6116 	if (struct_size(tscratch, entries, tscratch->nr_entries + 1) > size)
6117 		return -1;
6118 
6119 	entry = &tscratch->entries[tscratch->nr_entries];
6120 
6121 	tscratch->nr_entries++;
6122 
6123 	entry->mod_addr = (unsigned long)mod->mem[MOD_TEXT].base;
6124 	strscpy(entry->mod_name, mod->name);
6125 
6126 	return 0;
6127 }
6128 #else
6129 static int save_mod(struct module *mod, void *data)
6130 {
6131 	return 0;
6132 }
6133 #endif
6134 
6135 static void update_last_data(struct trace_array *tr)
6136 {
6137 	struct trace_module_delta *module_delta;
6138 	struct trace_scratch *tscratch;
6139 
6140 	if (!(tr->flags & TRACE_ARRAY_FL_BOOT))
6141 		return;
6142 
6143 	if (!(tr->flags & TRACE_ARRAY_FL_LAST_BOOT))
6144 		return;
6145 
6146 	/* Only if the buffer has previous boot data clear and update it. */
6147 	tr->flags &= ~TRACE_ARRAY_FL_LAST_BOOT;
6148 
6149 	/* Reset the module list and reload them */
6150 	if (tr->scratch) {
6151 		struct trace_scratch *tscratch = tr->scratch;
6152 
6153 		memset(tscratch->entries, 0,
6154 		       flex_array_size(tscratch, entries, tscratch->nr_entries));
6155 		tscratch->nr_entries = 0;
6156 
6157 		guard(mutex)(&scratch_mutex);
6158 		module_for_each_mod(save_mod, tr);
6159 	}
6160 
6161 	/*
6162 	 * Need to clear all CPU buffers as there cannot be events
6163 	 * from the previous boot mixed with events with this boot
6164 	 * as that will cause a confusing trace. Need to clear all
6165 	 * CPU buffers, even for those that may currently be offline.
6166 	 */
6167 	tracing_reset_all_cpus(&tr->array_buffer);
6168 
6169 	/* Using current data now */
6170 	tr->text_delta = 0;
6171 
6172 	if (!tr->scratch)
6173 		return;
6174 
6175 	tscratch = tr->scratch;
6176 	module_delta = READ_ONCE(tr->module_delta);
6177 	WRITE_ONCE(tr->module_delta, NULL);
6178 	kfree_rcu(module_delta, rcu);
6179 
6180 	/* Set the persistent ring buffer meta data to this address */
6181 	tscratch->text_addr = (unsigned long)_text;
6182 }
6183 
6184 /**
6185  * tracing_update_buffers - used by tracing facility to expand ring buffers
6186  * @tr: The tracing instance
6187  *
6188  * To save on memory when the tracing is never used on a system with it
6189  * configured in. The ring buffers are set to a minimum size. But once
6190  * a user starts to use the tracing facility, then they need to grow
6191  * to their default size.
6192  *
6193  * This function is to be called when a tracer is about to be used.
6194  */
6195 int tracing_update_buffers(struct trace_array *tr)
6196 {
6197 	int ret = 0;
6198 
6199 	mutex_lock(&trace_types_lock);
6200 
6201 	update_last_data(tr);
6202 
6203 	if (!tr->ring_buffer_expanded)
6204 		ret = __tracing_resize_ring_buffer(tr, trace_buf_size,
6205 						RING_BUFFER_ALL_CPUS);
6206 	mutex_unlock(&trace_types_lock);
6207 
6208 	return ret;
6209 }
6210 
6211 struct trace_option_dentry;
6212 
6213 static void
6214 create_trace_option_files(struct trace_array *tr, struct tracer *tracer);
6215 
6216 /*
6217  * Used to clear out the tracer before deletion of an instance.
6218  * Must have trace_types_lock held.
6219  */
6220 static void tracing_set_nop(struct trace_array *tr)
6221 {
6222 	if (tr->current_trace == &nop_trace)
6223 		return;
6224 
6225 	tr->current_trace->enabled--;
6226 
6227 	if (tr->current_trace->reset)
6228 		tr->current_trace->reset(tr);
6229 
6230 	tr->current_trace = &nop_trace;
6231 }
6232 
6233 static bool tracer_options_updated;
6234 
6235 static void add_tracer_options(struct trace_array *tr, struct tracer *t)
6236 {
6237 	/* Only enable if the directory has been created already. */
6238 	if (!tr->dir)
6239 		return;
6240 
6241 	/* Only create trace option files after update_tracer_options finish */
6242 	if (!tracer_options_updated)
6243 		return;
6244 
6245 	create_trace_option_files(tr, t);
6246 }
6247 
6248 int tracing_set_tracer(struct trace_array *tr, const char *buf)
6249 {
6250 	struct tracer *t;
6251 #ifdef CONFIG_TRACER_MAX_TRACE
6252 	bool had_max_tr;
6253 #endif
6254 	int ret;
6255 
6256 	guard(mutex)(&trace_types_lock);
6257 
6258 	update_last_data(tr);
6259 
6260 	if (!tr->ring_buffer_expanded) {
6261 		ret = __tracing_resize_ring_buffer(tr, trace_buf_size,
6262 						RING_BUFFER_ALL_CPUS);
6263 		if (ret < 0)
6264 			return ret;
6265 		ret = 0;
6266 	}
6267 
6268 	for (t = trace_types; t; t = t->next) {
6269 		if (strcmp(t->name, buf) == 0)
6270 			break;
6271 	}
6272 	if (!t)
6273 		return -EINVAL;
6274 
6275 	if (t == tr->current_trace)
6276 		return 0;
6277 
6278 #ifdef CONFIG_TRACER_SNAPSHOT
6279 	if (t->use_max_tr) {
6280 		local_irq_disable();
6281 		arch_spin_lock(&tr->max_lock);
6282 		ret = tr->cond_snapshot ? -EBUSY : 0;
6283 		arch_spin_unlock(&tr->max_lock);
6284 		local_irq_enable();
6285 		if (ret)
6286 			return ret;
6287 	}
6288 #endif
6289 	/* Some tracers won't work on kernel command line */
6290 	if (system_state < SYSTEM_RUNNING && t->noboot) {
6291 		pr_warn("Tracer '%s' is not allowed on command line, ignored\n",
6292 			t->name);
6293 		return -EINVAL;
6294 	}
6295 
6296 	/* Some tracers are only allowed for the top level buffer */
6297 	if (!trace_ok_for_array(t, tr))
6298 		return -EINVAL;
6299 
6300 	/* If trace pipe files are being read, we can't change the tracer */
6301 	if (tr->trace_ref)
6302 		return -EBUSY;
6303 
6304 	trace_branch_disable();
6305 
6306 	tr->current_trace->enabled--;
6307 
6308 	if (tr->current_trace->reset)
6309 		tr->current_trace->reset(tr);
6310 
6311 #ifdef CONFIG_TRACER_MAX_TRACE
6312 	had_max_tr = tr->current_trace->use_max_tr;
6313 
6314 	/* Current trace needs to be nop_trace before synchronize_rcu */
6315 	tr->current_trace = &nop_trace;
6316 
6317 	if (had_max_tr && !t->use_max_tr) {
6318 		/*
6319 		 * We need to make sure that the update_max_tr sees that
6320 		 * current_trace changed to nop_trace to keep it from
6321 		 * swapping the buffers after we resize it.
6322 		 * The update_max_tr is called from interrupts disabled
6323 		 * so a synchronized_sched() is sufficient.
6324 		 */
6325 		synchronize_rcu();
6326 		free_snapshot(tr);
6327 		tracing_disarm_snapshot(tr);
6328 	}
6329 
6330 	if (!had_max_tr && t->use_max_tr) {
6331 		ret = tracing_arm_snapshot_locked(tr);
6332 		if (ret)
6333 			return ret;
6334 	}
6335 #else
6336 	tr->current_trace = &nop_trace;
6337 #endif
6338 
6339 	if (t->init) {
6340 		ret = tracer_init(t, tr);
6341 		if (ret) {
6342 #ifdef CONFIG_TRACER_MAX_TRACE
6343 			if (t->use_max_tr)
6344 				tracing_disarm_snapshot(tr);
6345 #endif
6346 			return ret;
6347 		}
6348 	}
6349 
6350 	tr->current_trace = t;
6351 	tr->current_trace->enabled++;
6352 	trace_branch_enable(tr);
6353 
6354 	return 0;
6355 }
6356 
6357 static ssize_t
6358 tracing_set_trace_write(struct file *filp, const char __user *ubuf,
6359 			size_t cnt, loff_t *ppos)
6360 {
6361 	struct trace_array *tr = filp->private_data;
6362 	char buf[MAX_TRACER_SIZE+1];
6363 	char *name;
6364 	size_t ret;
6365 	int err;
6366 
6367 	ret = cnt;
6368 
6369 	if (cnt > MAX_TRACER_SIZE)
6370 		cnt = MAX_TRACER_SIZE;
6371 
6372 	if (copy_from_user(buf, ubuf, cnt))
6373 		return -EFAULT;
6374 
6375 	buf[cnt] = 0;
6376 
6377 	name = strim(buf);
6378 
6379 	err = tracing_set_tracer(tr, name);
6380 	if (err)
6381 		return err;
6382 
6383 	*ppos += ret;
6384 
6385 	return ret;
6386 }
6387 
6388 static ssize_t
6389 tracing_nsecs_read(unsigned long *ptr, char __user *ubuf,
6390 		   size_t cnt, loff_t *ppos)
6391 {
6392 	char buf[64];
6393 	int r;
6394 
6395 	r = snprintf(buf, sizeof(buf), "%ld\n",
6396 		     *ptr == (unsigned long)-1 ? -1 : nsecs_to_usecs(*ptr));
6397 	if (r > sizeof(buf))
6398 		r = sizeof(buf);
6399 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6400 }
6401 
6402 static ssize_t
6403 tracing_nsecs_write(unsigned long *ptr, const char __user *ubuf,
6404 		    size_t cnt, loff_t *ppos)
6405 {
6406 	unsigned long val;
6407 	int ret;
6408 
6409 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
6410 	if (ret)
6411 		return ret;
6412 
6413 	*ptr = val * 1000;
6414 
6415 	return cnt;
6416 }
6417 
6418 static ssize_t
6419 tracing_thresh_read(struct file *filp, char __user *ubuf,
6420 		    size_t cnt, loff_t *ppos)
6421 {
6422 	return tracing_nsecs_read(&tracing_thresh, ubuf, cnt, ppos);
6423 }
6424 
6425 static ssize_t
6426 tracing_thresh_write(struct file *filp, const char __user *ubuf,
6427 		     size_t cnt, loff_t *ppos)
6428 {
6429 	struct trace_array *tr = filp->private_data;
6430 	int ret;
6431 
6432 	guard(mutex)(&trace_types_lock);
6433 	ret = tracing_nsecs_write(&tracing_thresh, ubuf, cnt, ppos);
6434 	if (ret < 0)
6435 		return ret;
6436 
6437 	if (tr->current_trace->update_thresh) {
6438 		ret = tr->current_trace->update_thresh(tr);
6439 		if (ret < 0)
6440 			return ret;
6441 	}
6442 
6443 	return cnt;
6444 }
6445 
6446 #ifdef CONFIG_TRACER_MAX_TRACE
6447 
6448 static ssize_t
6449 tracing_max_lat_read(struct file *filp, char __user *ubuf,
6450 		     size_t cnt, loff_t *ppos)
6451 {
6452 	struct trace_array *tr = filp->private_data;
6453 
6454 	return tracing_nsecs_read(&tr->max_latency, ubuf, cnt, ppos);
6455 }
6456 
6457 static ssize_t
6458 tracing_max_lat_write(struct file *filp, const char __user *ubuf,
6459 		      size_t cnt, loff_t *ppos)
6460 {
6461 	struct trace_array *tr = filp->private_data;
6462 
6463 	return tracing_nsecs_write(&tr->max_latency, ubuf, cnt, ppos);
6464 }
6465 
6466 #endif
6467 
6468 static int open_pipe_on_cpu(struct trace_array *tr, int cpu)
6469 {
6470 	if (cpu == RING_BUFFER_ALL_CPUS) {
6471 		if (cpumask_empty(tr->pipe_cpumask)) {
6472 			cpumask_setall(tr->pipe_cpumask);
6473 			return 0;
6474 		}
6475 	} else if (!cpumask_test_cpu(cpu, tr->pipe_cpumask)) {
6476 		cpumask_set_cpu(cpu, tr->pipe_cpumask);
6477 		return 0;
6478 	}
6479 	return -EBUSY;
6480 }
6481 
6482 static void close_pipe_on_cpu(struct trace_array *tr, int cpu)
6483 {
6484 	if (cpu == RING_BUFFER_ALL_CPUS) {
6485 		WARN_ON(!cpumask_full(tr->pipe_cpumask));
6486 		cpumask_clear(tr->pipe_cpumask);
6487 	} else {
6488 		WARN_ON(!cpumask_test_cpu(cpu, tr->pipe_cpumask));
6489 		cpumask_clear_cpu(cpu, tr->pipe_cpumask);
6490 	}
6491 }
6492 
6493 static int tracing_open_pipe(struct inode *inode, struct file *filp)
6494 {
6495 	struct trace_array *tr = inode->i_private;
6496 	struct trace_iterator *iter;
6497 	int cpu;
6498 	int ret;
6499 
6500 	ret = tracing_check_open_get_tr(tr);
6501 	if (ret)
6502 		return ret;
6503 
6504 	mutex_lock(&trace_types_lock);
6505 	cpu = tracing_get_cpu(inode);
6506 	ret = open_pipe_on_cpu(tr, cpu);
6507 	if (ret)
6508 		goto fail_pipe_on_cpu;
6509 
6510 	/* create a buffer to store the information to pass to userspace */
6511 	iter = kzalloc(sizeof(*iter), GFP_KERNEL);
6512 	if (!iter) {
6513 		ret = -ENOMEM;
6514 		goto fail_alloc_iter;
6515 	}
6516 
6517 	trace_seq_init(&iter->seq);
6518 	iter->trace = tr->current_trace;
6519 
6520 	if (!alloc_cpumask_var(&iter->started, GFP_KERNEL)) {
6521 		ret = -ENOMEM;
6522 		goto fail;
6523 	}
6524 
6525 	/* trace pipe does not show start of buffer */
6526 	cpumask_setall(iter->started);
6527 
6528 	if (tr->trace_flags & TRACE_ITER_LATENCY_FMT)
6529 		iter->iter_flags |= TRACE_FILE_LAT_FMT;
6530 
6531 	/* Output in nanoseconds only if we are using a clock in nanoseconds. */
6532 	if (trace_clocks[tr->clock_id].in_ns)
6533 		iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
6534 
6535 	iter->tr = tr;
6536 	iter->array_buffer = &tr->array_buffer;
6537 	iter->cpu_file = cpu;
6538 	mutex_init(&iter->mutex);
6539 	filp->private_data = iter;
6540 
6541 	if (iter->trace->pipe_open)
6542 		iter->trace->pipe_open(iter);
6543 
6544 	nonseekable_open(inode, filp);
6545 
6546 	tr->trace_ref++;
6547 
6548 	mutex_unlock(&trace_types_lock);
6549 	return ret;
6550 
6551 fail:
6552 	kfree(iter);
6553 fail_alloc_iter:
6554 	close_pipe_on_cpu(tr, cpu);
6555 fail_pipe_on_cpu:
6556 	__trace_array_put(tr);
6557 	mutex_unlock(&trace_types_lock);
6558 	return ret;
6559 }
6560 
6561 static int tracing_release_pipe(struct inode *inode, struct file *file)
6562 {
6563 	struct trace_iterator *iter = file->private_data;
6564 	struct trace_array *tr = inode->i_private;
6565 
6566 	mutex_lock(&trace_types_lock);
6567 
6568 	tr->trace_ref--;
6569 
6570 	if (iter->trace->pipe_close)
6571 		iter->trace->pipe_close(iter);
6572 	close_pipe_on_cpu(tr, iter->cpu_file);
6573 	mutex_unlock(&trace_types_lock);
6574 
6575 	free_trace_iter_content(iter);
6576 	kfree(iter);
6577 
6578 	trace_array_put(tr);
6579 
6580 	return 0;
6581 }
6582 
6583 static __poll_t
6584 trace_poll(struct trace_iterator *iter, struct file *filp, poll_table *poll_table)
6585 {
6586 	struct trace_array *tr = iter->tr;
6587 
6588 	/* Iterators are static, they should be filled or empty */
6589 	if (trace_buffer_iter(iter, iter->cpu_file))
6590 		return EPOLLIN | EPOLLRDNORM;
6591 
6592 	if (tr->trace_flags & TRACE_ITER_BLOCK)
6593 		/*
6594 		 * Always select as readable when in blocking mode
6595 		 */
6596 		return EPOLLIN | EPOLLRDNORM;
6597 	else
6598 		return ring_buffer_poll_wait(iter->array_buffer->buffer, iter->cpu_file,
6599 					     filp, poll_table, iter->tr->buffer_percent);
6600 }
6601 
6602 static __poll_t
6603 tracing_poll_pipe(struct file *filp, poll_table *poll_table)
6604 {
6605 	struct trace_iterator *iter = filp->private_data;
6606 
6607 	return trace_poll(iter, filp, poll_table);
6608 }
6609 
6610 /* Must be called with iter->mutex held. */
6611 static int tracing_wait_pipe(struct file *filp)
6612 {
6613 	struct trace_iterator *iter = filp->private_data;
6614 	int ret;
6615 
6616 	while (trace_empty(iter)) {
6617 
6618 		if ((filp->f_flags & O_NONBLOCK)) {
6619 			return -EAGAIN;
6620 		}
6621 
6622 		/*
6623 		 * We block until we read something and tracing is disabled.
6624 		 * We still block if tracing is disabled, but we have never
6625 		 * read anything. This allows a user to cat this file, and
6626 		 * then enable tracing. But after we have read something,
6627 		 * we give an EOF when tracing is again disabled.
6628 		 *
6629 		 * iter->pos will be 0 if we haven't read anything.
6630 		 */
6631 		if (!tracer_tracing_is_on(iter->tr) && iter->pos)
6632 			break;
6633 
6634 		mutex_unlock(&iter->mutex);
6635 
6636 		ret = wait_on_pipe(iter, 0);
6637 
6638 		mutex_lock(&iter->mutex);
6639 
6640 		if (ret)
6641 			return ret;
6642 	}
6643 
6644 	return 1;
6645 }
6646 
6647 /*
6648  * Consumer reader.
6649  */
6650 static ssize_t
6651 tracing_read_pipe(struct file *filp, char __user *ubuf,
6652 		  size_t cnt, loff_t *ppos)
6653 {
6654 	struct trace_iterator *iter = filp->private_data;
6655 	ssize_t sret;
6656 
6657 	/*
6658 	 * Avoid more than one consumer on a single file descriptor
6659 	 * This is just a matter of traces coherency, the ring buffer itself
6660 	 * is protected.
6661 	 */
6662 	guard(mutex)(&iter->mutex);
6663 
6664 	/* return any leftover data */
6665 	sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
6666 	if (sret != -EBUSY)
6667 		return sret;
6668 
6669 	trace_seq_init(&iter->seq);
6670 
6671 	if (iter->trace->read) {
6672 		sret = iter->trace->read(iter, filp, ubuf, cnt, ppos);
6673 		if (sret)
6674 			return sret;
6675 	}
6676 
6677 waitagain:
6678 	sret = tracing_wait_pipe(filp);
6679 	if (sret <= 0)
6680 		return sret;
6681 
6682 	/* stop when tracing is finished */
6683 	if (trace_empty(iter))
6684 		return 0;
6685 
6686 	if (cnt >= TRACE_SEQ_BUFFER_SIZE)
6687 		cnt = TRACE_SEQ_BUFFER_SIZE - 1;
6688 
6689 	/* reset all but tr, trace, and overruns */
6690 	trace_iterator_reset(iter);
6691 	cpumask_clear(iter->started);
6692 	trace_seq_init(&iter->seq);
6693 
6694 	trace_event_read_lock();
6695 	trace_access_lock(iter->cpu_file);
6696 	while (trace_find_next_entry_inc(iter) != NULL) {
6697 		enum print_line_t ret;
6698 		int save_len = iter->seq.seq.len;
6699 
6700 		ret = print_trace_line(iter);
6701 		if (ret == TRACE_TYPE_PARTIAL_LINE) {
6702 			/*
6703 			 * If one print_trace_line() fills entire trace_seq in one shot,
6704 			 * trace_seq_to_user() will returns -EBUSY because save_len == 0,
6705 			 * In this case, we need to consume it, otherwise, loop will peek
6706 			 * this event next time, resulting in an infinite loop.
6707 			 */
6708 			if (save_len == 0) {
6709 				iter->seq.full = 0;
6710 				trace_seq_puts(&iter->seq, "[LINE TOO BIG]\n");
6711 				trace_consume(iter);
6712 				break;
6713 			}
6714 
6715 			/* In other cases, don't print partial lines */
6716 			iter->seq.seq.len = save_len;
6717 			break;
6718 		}
6719 		if (ret != TRACE_TYPE_NO_CONSUME)
6720 			trace_consume(iter);
6721 
6722 		if (trace_seq_used(&iter->seq) >= cnt)
6723 			break;
6724 
6725 		/*
6726 		 * Setting the full flag means we reached the trace_seq buffer
6727 		 * size and we should leave by partial output condition above.
6728 		 * One of the trace_seq_* functions is not used properly.
6729 		 */
6730 		WARN_ONCE(iter->seq.full, "full flag set for trace type %d",
6731 			  iter->ent->type);
6732 	}
6733 	trace_access_unlock(iter->cpu_file);
6734 	trace_event_read_unlock();
6735 
6736 	/* Now copy what we have to the user */
6737 	sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
6738 	if (iter->seq.readpos >= trace_seq_used(&iter->seq))
6739 		trace_seq_init(&iter->seq);
6740 
6741 	/*
6742 	 * If there was nothing to send to user, in spite of consuming trace
6743 	 * entries, go back to wait for more entries.
6744 	 */
6745 	if (sret == -EBUSY)
6746 		goto waitagain;
6747 
6748 	return sret;
6749 }
6750 
6751 static void tracing_spd_release_pipe(struct splice_pipe_desc *spd,
6752 				     unsigned int idx)
6753 {
6754 	__free_page(spd->pages[idx]);
6755 }
6756 
6757 static size_t
6758 tracing_fill_pipe_page(size_t rem, struct trace_iterator *iter)
6759 {
6760 	size_t count;
6761 	int save_len;
6762 	int ret;
6763 
6764 	/* Seq buffer is page-sized, exactly what we need. */
6765 	for (;;) {
6766 		save_len = iter->seq.seq.len;
6767 		ret = print_trace_line(iter);
6768 
6769 		if (trace_seq_has_overflowed(&iter->seq)) {
6770 			iter->seq.seq.len = save_len;
6771 			break;
6772 		}
6773 
6774 		/*
6775 		 * This should not be hit, because it should only
6776 		 * be set if the iter->seq overflowed. But check it
6777 		 * anyway to be safe.
6778 		 */
6779 		if (ret == TRACE_TYPE_PARTIAL_LINE) {
6780 			iter->seq.seq.len = save_len;
6781 			break;
6782 		}
6783 
6784 		count = trace_seq_used(&iter->seq) - save_len;
6785 		if (rem < count) {
6786 			rem = 0;
6787 			iter->seq.seq.len = save_len;
6788 			break;
6789 		}
6790 
6791 		if (ret != TRACE_TYPE_NO_CONSUME)
6792 			trace_consume(iter);
6793 		rem -= count;
6794 		if (!trace_find_next_entry_inc(iter))	{
6795 			rem = 0;
6796 			iter->ent = NULL;
6797 			break;
6798 		}
6799 	}
6800 
6801 	return rem;
6802 }
6803 
6804 static ssize_t tracing_splice_read_pipe(struct file *filp,
6805 					loff_t *ppos,
6806 					struct pipe_inode_info *pipe,
6807 					size_t len,
6808 					unsigned int flags)
6809 {
6810 	struct page *pages_def[PIPE_DEF_BUFFERS];
6811 	struct partial_page partial_def[PIPE_DEF_BUFFERS];
6812 	struct trace_iterator *iter = filp->private_data;
6813 	struct splice_pipe_desc spd = {
6814 		.pages		= pages_def,
6815 		.partial	= partial_def,
6816 		.nr_pages	= 0, /* This gets updated below. */
6817 		.nr_pages_max	= PIPE_DEF_BUFFERS,
6818 		.ops		= &default_pipe_buf_ops,
6819 		.spd_release	= tracing_spd_release_pipe,
6820 	};
6821 	ssize_t ret;
6822 	size_t rem;
6823 	unsigned int i;
6824 
6825 	if (splice_grow_spd(pipe, &spd))
6826 		return -ENOMEM;
6827 
6828 	mutex_lock(&iter->mutex);
6829 
6830 	if (iter->trace->splice_read) {
6831 		ret = iter->trace->splice_read(iter, filp,
6832 					       ppos, pipe, len, flags);
6833 		if (ret)
6834 			goto out_err;
6835 	}
6836 
6837 	ret = tracing_wait_pipe(filp);
6838 	if (ret <= 0)
6839 		goto out_err;
6840 
6841 	if (!iter->ent && !trace_find_next_entry_inc(iter)) {
6842 		ret = -EFAULT;
6843 		goto out_err;
6844 	}
6845 
6846 	trace_event_read_lock();
6847 	trace_access_lock(iter->cpu_file);
6848 
6849 	/* Fill as many pages as possible. */
6850 	for (i = 0, rem = len; i < spd.nr_pages_max && rem; i++) {
6851 		spd.pages[i] = alloc_page(GFP_KERNEL);
6852 		if (!spd.pages[i])
6853 			break;
6854 
6855 		rem = tracing_fill_pipe_page(rem, iter);
6856 
6857 		/* Copy the data into the page, so we can start over. */
6858 		ret = trace_seq_to_buffer(&iter->seq,
6859 					  page_address(spd.pages[i]),
6860 					  min((size_t)trace_seq_used(&iter->seq),
6861 						  PAGE_SIZE));
6862 		if (ret < 0) {
6863 			__free_page(spd.pages[i]);
6864 			break;
6865 		}
6866 		spd.partial[i].offset = 0;
6867 		spd.partial[i].len = ret;
6868 
6869 		trace_seq_init(&iter->seq);
6870 	}
6871 
6872 	trace_access_unlock(iter->cpu_file);
6873 	trace_event_read_unlock();
6874 	mutex_unlock(&iter->mutex);
6875 
6876 	spd.nr_pages = i;
6877 
6878 	if (i)
6879 		ret = splice_to_pipe(pipe, &spd);
6880 	else
6881 		ret = 0;
6882 out:
6883 	splice_shrink_spd(&spd);
6884 	return ret;
6885 
6886 out_err:
6887 	mutex_unlock(&iter->mutex);
6888 	goto out;
6889 }
6890 
6891 static ssize_t
6892 tracing_entries_read(struct file *filp, char __user *ubuf,
6893 		     size_t cnt, loff_t *ppos)
6894 {
6895 	struct inode *inode = file_inode(filp);
6896 	struct trace_array *tr = inode->i_private;
6897 	int cpu = tracing_get_cpu(inode);
6898 	char buf[64];
6899 	int r = 0;
6900 	ssize_t ret;
6901 
6902 	mutex_lock(&trace_types_lock);
6903 
6904 	if (cpu == RING_BUFFER_ALL_CPUS) {
6905 		int cpu, buf_size_same;
6906 		unsigned long size;
6907 
6908 		size = 0;
6909 		buf_size_same = 1;
6910 		/* check if all cpu sizes are same */
6911 		for_each_tracing_cpu(cpu) {
6912 			/* fill in the size from first enabled cpu */
6913 			if (size == 0)
6914 				size = per_cpu_ptr(tr->array_buffer.data, cpu)->entries;
6915 			if (size != per_cpu_ptr(tr->array_buffer.data, cpu)->entries) {
6916 				buf_size_same = 0;
6917 				break;
6918 			}
6919 		}
6920 
6921 		if (buf_size_same) {
6922 			if (!tr->ring_buffer_expanded)
6923 				r = sprintf(buf, "%lu (expanded: %lu)\n",
6924 					    size >> 10,
6925 					    trace_buf_size >> 10);
6926 			else
6927 				r = sprintf(buf, "%lu\n", size >> 10);
6928 		} else
6929 			r = sprintf(buf, "X\n");
6930 	} else
6931 		r = sprintf(buf, "%lu\n", per_cpu_ptr(tr->array_buffer.data, cpu)->entries >> 10);
6932 
6933 	mutex_unlock(&trace_types_lock);
6934 
6935 	ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6936 	return ret;
6937 }
6938 
6939 static ssize_t
6940 tracing_entries_write(struct file *filp, const char __user *ubuf,
6941 		      size_t cnt, loff_t *ppos)
6942 {
6943 	struct inode *inode = file_inode(filp);
6944 	struct trace_array *tr = inode->i_private;
6945 	unsigned long val;
6946 	int ret;
6947 
6948 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
6949 	if (ret)
6950 		return ret;
6951 
6952 	/* must have at least 1 entry */
6953 	if (!val)
6954 		return -EINVAL;
6955 
6956 	/* value is in KB */
6957 	val <<= 10;
6958 	ret = tracing_resize_ring_buffer(tr, val, tracing_get_cpu(inode));
6959 	if (ret < 0)
6960 		return ret;
6961 
6962 	*ppos += cnt;
6963 
6964 	return cnt;
6965 }
6966 
6967 static ssize_t
6968 tracing_total_entries_read(struct file *filp, char __user *ubuf,
6969 				size_t cnt, loff_t *ppos)
6970 {
6971 	struct trace_array *tr = filp->private_data;
6972 	char buf[64];
6973 	int r, cpu;
6974 	unsigned long size = 0, expanded_size = 0;
6975 
6976 	mutex_lock(&trace_types_lock);
6977 	for_each_tracing_cpu(cpu) {
6978 		size += per_cpu_ptr(tr->array_buffer.data, cpu)->entries >> 10;
6979 		if (!tr->ring_buffer_expanded)
6980 			expanded_size += trace_buf_size >> 10;
6981 	}
6982 	if (tr->ring_buffer_expanded)
6983 		r = sprintf(buf, "%lu\n", size);
6984 	else
6985 		r = sprintf(buf, "%lu (expanded: %lu)\n", size, expanded_size);
6986 	mutex_unlock(&trace_types_lock);
6987 
6988 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6989 }
6990 
6991 #define LAST_BOOT_HEADER ((void *)1)
6992 
6993 static void *l_next(struct seq_file *m, void *v, loff_t *pos)
6994 {
6995 	struct trace_array *tr = m->private;
6996 	struct trace_scratch *tscratch = tr->scratch;
6997 	unsigned int index = *pos;
6998 
6999 	(*pos)++;
7000 
7001 	if (*pos == 1)
7002 		return LAST_BOOT_HEADER;
7003 
7004 	/* Only show offsets of the last boot data */
7005 	if (!tscratch || !(tr->flags & TRACE_ARRAY_FL_LAST_BOOT))
7006 		return NULL;
7007 
7008 	/* *pos 0 is for the header, 1 is for the first module */
7009 	index--;
7010 
7011 	if (index >= tscratch->nr_entries)
7012 		return NULL;
7013 
7014 	return &tscratch->entries[index];
7015 }
7016 
7017 static void *l_start(struct seq_file *m, loff_t *pos)
7018 {
7019 	mutex_lock(&scratch_mutex);
7020 
7021 	return l_next(m, NULL, pos);
7022 }
7023 
7024 static void l_stop(struct seq_file *m, void *p)
7025 {
7026 	mutex_unlock(&scratch_mutex);
7027 }
7028 
7029 static void show_last_boot_header(struct seq_file *m, struct trace_array *tr)
7030 {
7031 	struct trace_scratch *tscratch = tr->scratch;
7032 
7033 	/*
7034 	 * Do not leak KASLR address. This only shows the KASLR address of
7035 	 * the last boot. When the ring buffer is started, the LAST_BOOT
7036 	 * flag gets cleared, and this should only report "current".
7037 	 * Otherwise it shows the KASLR address from the previous boot which
7038 	 * should not be the same as the current boot.
7039 	 */
7040 	if (tscratch && (tr->flags & TRACE_ARRAY_FL_LAST_BOOT))
7041 		seq_printf(m, "%lx\t[kernel]\n", tscratch->text_addr);
7042 	else
7043 		seq_puts(m, "# Current\n");
7044 }
7045 
7046 static int l_show(struct seq_file *m, void *v)
7047 {
7048 	struct trace_array *tr = m->private;
7049 	struct trace_mod_entry *entry = v;
7050 
7051 	if (v == LAST_BOOT_HEADER) {
7052 		show_last_boot_header(m, tr);
7053 		return 0;
7054 	}
7055 
7056 	seq_printf(m, "%lx\t%s\n", entry->mod_addr, entry->mod_name);
7057 	return 0;
7058 }
7059 
7060 static const struct seq_operations last_boot_seq_ops = {
7061 	.start		= l_start,
7062 	.next		= l_next,
7063 	.stop		= l_stop,
7064 	.show		= l_show,
7065 };
7066 
7067 static int tracing_last_boot_open(struct inode *inode, struct file *file)
7068 {
7069 	struct trace_array *tr = inode->i_private;
7070 	struct seq_file *m;
7071 	int ret;
7072 
7073 	ret = tracing_check_open_get_tr(tr);
7074 	if (ret)
7075 		return ret;
7076 
7077 	ret = seq_open(file, &last_boot_seq_ops);
7078 	if (ret) {
7079 		trace_array_put(tr);
7080 		return ret;
7081 	}
7082 
7083 	m = file->private_data;
7084 	m->private = tr;
7085 
7086 	return 0;
7087 }
7088 
7089 static int tracing_buffer_meta_open(struct inode *inode, struct file *filp)
7090 {
7091 	struct trace_array *tr = inode->i_private;
7092 	int cpu = tracing_get_cpu(inode);
7093 	int ret;
7094 
7095 	ret = tracing_check_open_get_tr(tr);
7096 	if (ret)
7097 		return ret;
7098 
7099 	ret = ring_buffer_meta_seq_init(filp, tr->array_buffer.buffer, cpu);
7100 	if (ret < 0)
7101 		__trace_array_put(tr);
7102 	return ret;
7103 }
7104 
7105 static ssize_t
7106 tracing_free_buffer_write(struct file *filp, const char __user *ubuf,
7107 			  size_t cnt, loff_t *ppos)
7108 {
7109 	/*
7110 	 * There is no need to read what the user has written, this function
7111 	 * is just to make sure that there is no error when "echo" is used
7112 	 */
7113 
7114 	*ppos += cnt;
7115 
7116 	return cnt;
7117 }
7118 
7119 static int
7120 tracing_free_buffer_release(struct inode *inode, struct file *filp)
7121 {
7122 	struct trace_array *tr = inode->i_private;
7123 
7124 	/* disable tracing ? */
7125 	if (tr->trace_flags & TRACE_ITER_STOP_ON_FREE)
7126 		tracer_tracing_off(tr);
7127 	/* resize the ring buffer to 0 */
7128 	tracing_resize_ring_buffer(tr, 0, RING_BUFFER_ALL_CPUS);
7129 
7130 	trace_array_put(tr);
7131 
7132 	return 0;
7133 }
7134 
7135 #define TRACE_MARKER_MAX_SIZE		4096
7136 
7137 static ssize_t
7138 tracing_mark_write(struct file *filp, const char __user *ubuf,
7139 					size_t cnt, loff_t *fpos)
7140 {
7141 	struct trace_array *tr = filp->private_data;
7142 	struct ring_buffer_event *event;
7143 	enum event_trigger_type tt = ETT_NONE;
7144 	struct trace_buffer *buffer;
7145 	struct print_entry *entry;
7146 	int meta_size;
7147 	ssize_t written;
7148 	size_t size;
7149 	int len;
7150 
7151 /* Used in tracing_mark_raw_write() as well */
7152 #define FAULTED_STR "<faulted>"
7153 #define FAULTED_SIZE (sizeof(FAULTED_STR) - 1) /* '\0' is already accounted for */
7154 
7155 	if (tracing_disabled)
7156 		return -EINVAL;
7157 
7158 	if (!(tr->trace_flags & TRACE_ITER_MARKERS))
7159 		return -EINVAL;
7160 
7161 	if ((ssize_t)cnt < 0)
7162 		return -EINVAL;
7163 
7164 	if (cnt > TRACE_MARKER_MAX_SIZE)
7165 		cnt = TRACE_MARKER_MAX_SIZE;
7166 
7167 	meta_size = sizeof(*entry) + 2;  /* add '\0' and possible '\n' */
7168  again:
7169 	size = cnt + meta_size;
7170 
7171 	/* If less than "<faulted>", then make sure we can still add that */
7172 	if (cnt < FAULTED_SIZE)
7173 		size += FAULTED_SIZE - cnt;
7174 
7175 	buffer = tr->array_buffer.buffer;
7176 	event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
7177 					    tracing_gen_ctx());
7178 	if (unlikely(!event)) {
7179 		/*
7180 		 * If the size was greater than what was allowed, then
7181 		 * make it smaller and try again.
7182 		 */
7183 		if (size > ring_buffer_max_event_size(buffer)) {
7184 			/* cnt < FAULTED size should never be bigger than max */
7185 			if (WARN_ON_ONCE(cnt < FAULTED_SIZE))
7186 				return -EBADF;
7187 			cnt = ring_buffer_max_event_size(buffer) - meta_size;
7188 			/* The above should only happen once */
7189 			if (WARN_ON_ONCE(cnt + meta_size == size))
7190 				return -EBADF;
7191 			goto again;
7192 		}
7193 
7194 		/* Ring buffer disabled, return as if not open for write */
7195 		return -EBADF;
7196 	}
7197 
7198 	entry = ring_buffer_event_data(event);
7199 	entry->ip = _THIS_IP_;
7200 
7201 	len = __copy_from_user_inatomic(&entry->buf, ubuf, cnt);
7202 	if (len) {
7203 		memcpy(&entry->buf, FAULTED_STR, FAULTED_SIZE);
7204 		cnt = FAULTED_SIZE;
7205 		written = -EFAULT;
7206 	} else
7207 		written = cnt;
7208 
7209 	if (tr->trace_marker_file && !list_empty(&tr->trace_marker_file->triggers)) {
7210 		/* do not add \n before testing triggers, but add \0 */
7211 		entry->buf[cnt] = '\0';
7212 		tt = event_triggers_call(tr->trace_marker_file, buffer, entry, event);
7213 	}
7214 
7215 	if (entry->buf[cnt - 1] != '\n') {
7216 		entry->buf[cnt] = '\n';
7217 		entry->buf[cnt + 1] = '\0';
7218 	} else
7219 		entry->buf[cnt] = '\0';
7220 
7221 	if (static_branch_unlikely(&trace_marker_exports_enabled))
7222 		ftrace_exports(event, TRACE_EXPORT_MARKER);
7223 	__buffer_unlock_commit(buffer, event);
7224 
7225 	if (tt)
7226 		event_triggers_post_call(tr->trace_marker_file, tt);
7227 
7228 	return written;
7229 }
7230 
7231 static ssize_t
7232 tracing_mark_raw_write(struct file *filp, const char __user *ubuf,
7233 					size_t cnt, loff_t *fpos)
7234 {
7235 	struct trace_array *tr = filp->private_data;
7236 	struct ring_buffer_event *event;
7237 	struct trace_buffer *buffer;
7238 	struct raw_data_entry *entry;
7239 	ssize_t written;
7240 	int size;
7241 	int len;
7242 
7243 #define FAULT_SIZE_ID (FAULTED_SIZE + sizeof(int))
7244 
7245 	if (tracing_disabled)
7246 		return -EINVAL;
7247 
7248 	if (!(tr->trace_flags & TRACE_ITER_MARKERS))
7249 		return -EINVAL;
7250 
7251 	/* The marker must at least have a tag id */
7252 	if (cnt < sizeof(unsigned int))
7253 		return -EINVAL;
7254 
7255 	size = sizeof(*entry) + cnt;
7256 	if (cnt < FAULT_SIZE_ID)
7257 		size += FAULT_SIZE_ID - cnt;
7258 
7259 	buffer = tr->array_buffer.buffer;
7260 
7261 	if (size > ring_buffer_max_event_size(buffer))
7262 		return -EINVAL;
7263 
7264 	event = __trace_buffer_lock_reserve(buffer, TRACE_RAW_DATA, size,
7265 					    tracing_gen_ctx());
7266 	if (!event)
7267 		/* Ring buffer disabled, return as if not open for write */
7268 		return -EBADF;
7269 
7270 	entry = ring_buffer_event_data(event);
7271 
7272 	len = __copy_from_user_inatomic(&entry->id, ubuf, cnt);
7273 	if (len) {
7274 		entry->id = -1;
7275 		memcpy(&entry->buf, FAULTED_STR, FAULTED_SIZE);
7276 		written = -EFAULT;
7277 	} else
7278 		written = cnt;
7279 
7280 	__buffer_unlock_commit(buffer, event);
7281 
7282 	return written;
7283 }
7284 
7285 static int tracing_clock_show(struct seq_file *m, void *v)
7286 {
7287 	struct trace_array *tr = m->private;
7288 	int i;
7289 
7290 	for (i = 0; i < ARRAY_SIZE(trace_clocks); i++)
7291 		seq_printf(m,
7292 			"%s%s%s%s", i ? " " : "",
7293 			i == tr->clock_id ? "[" : "", trace_clocks[i].name,
7294 			i == tr->clock_id ? "]" : "");
7295 	seq_putc(m, '\n');
7296 
7297 	return 0;
7298 }
7299 
7300 int tracing_set_clock(struct trace_array *tr, const char *clockstr)
7301 {
7302 	int i;
7303 
7304 	for (i = 0; i < ARRAY_SIZE(trace_clocks); i++) {
7305 		if (strcmp(trace_clocks[i].name, clockstr) == 0)
7306 			break;
7307 	}
7308 	if (i == ARRAY_SIZE(trace_clocks))
7309 		return -EINVAL;
7310 
7311 	mutex_lock(&trace_types_lock);
7312 
7313 	tr->clock_id = i;
7314 
7315 	ring_buffer_set_clock(tr->array_buffer.buffer, trace_clocks[i].func);
7316 
7317 	/*
7318 	 * New clock may not be consistent with the previous clock.
7319 	 * Reset the buffer so that it doesn't have incomparable timestamps.
7320 	 */
7321 	tracing_reset_online_cpus(&tr->array_buffer);
7322 
7323 #ifdef CONFIG_TRACER_MAX_TRACE
7324 	if (tr->max_buffer.buffer)
7325 		ring_buffer_set_clock(tr->max_buffer.buffer, trace_clocks[i].func);
7326 	tracing_reset_online_cpus(&tr->max_buffer);
7327 #endif
7328 
7329 	mutex_unlock(&trace_types_lock);
7330 
7331 	return 0;
7332 }
7333 
7334 static ssize_t tracing_clock_write(struct file *filp, const char __user *ubuf,
7335 				   size_t cnt, loff_t *fpos)
7336 {
7337 	struct seq_file *m = filp->private_data;
7338 	struct trace_array *tr = m->private;
7339 	char buf[64];
7340 	const char *clockstr;
7341 	int ret;
7342 
7343 	if (cnt >= sizeof(buf))
7344 		return -EINVAL;
7345 
7346 	if (copy_from_user(buf, ubuf, cnt))
7347 		return -EFAULT;
7348 
7349 	buf[cnt] = 0;
7350 
7351 	clockstr = strstrip(buf);
7352 
7353 	ret = tracing_set_clock(tr, clockstr);
7354 	if (ret)
7355 		return ret;
7356 
7357 	*fpos += cnt;
7358 
7359 	return cnt;
7360 }
7361 
7362 static int tracing_clock_open(struct inode *inode, struct file *file)
7363 {
7364 	struct trace_array *tr = inode->i_private;
7365 	int ret;
7366 
7367 	ret = tracing_check_open_get_tr(tr);
7368 	if (ret)
7369 		return ret;
7370 
7371 	ret = single_open(file, tracing_clock_show, inode->i_private);
7372 	if (ret < 0)
7373 		trace_array_put(tr);
7374 
7375 	return ret;
7376 }
7377 
7378 static int tracing_time_stamp_mode_show(struct seq_file *m, void *v)
7379 {
7380 	struct trace_array *tr = m->private;
7381 
7382 	mutex_lock(&trace_types_lock);
7383 
7384 	if (ring_buffer_time_stamp_abs(tr->array_buffer.buffer))
7385 		seq_puts(m, "delta [absolute]\n");
7386 	else
7387 		seq_puts(m, "[delta] absolute\n");
7388 
7389 	mutex_unlock(&trace_types_lock);
7390 
7391 	return 0;
7392 }
7393 
7394 static int tracing_time_stamp_mode_open(struct inode *inode, struct file *file)
7395 {
7396 	struct trace_array *tr = inode->i_private;
7397 	int ret;
7398 
7399 	ret = tracing_check_open_get_tr(tr);
7400 	if (ret)
7401 		return ret;
7402 
7403 	ret = single_open(file, tracing_time_stamp_mode_show, inode->i_private);
7404 	if (ret < 0)
7405 		trace_array_put(tr);
7406 
7407 	return ret;
7408 }
7409 
7410 u64 tracing_event_time_stamp(struct trace_buffer *buffer, struct ring_buffer_event *rbe)
7411 {
7412 	if (rbe == this_cpu_read(trace_buffered_event))
7413 		return ring_buffer_time_stamp(buffer);
7414 
7415 	return ring_buffer_event_time_stamp(buffer, rbe);
7416 }
7417 
7418 /*
7419  * Set or disable using the per CPU trace_buffer_event when possible.
7420  */
7421 int tracing_set_filter_buffering(struct trace_array *tr, bool set)
7422 {
7423 	guard(mutex)(&trace_types_lock);
7424 
7425 	if (set && tr->no_filter_buffering_ref++)
7426 		return 0;
7427 
7428 	if (!set) {
7429 		if (WARN_ON_ONCE(!tr->no_filter_buffering_ref))
7430 			return -EINVAL;
7431 
7432 		--tr->no_filter_buffering_ref;
7433 	}
7434 
7435 	return 0;
7436 }
7437 
7438 struct ftrace_buffer_info {
7439 	struct trace_iterator	iter;
7440 	void			*spare;
7441 	unsigned int		spare_cpu;
7442 	unsigned int		spare_size;
7443 	unsigned int		read;
7444 };
7445 
7446 #ifdef CONFIG_TRACER_SNAPSHOT
7447 static int tracing_snapshot_open(struct inode *inode, struct file *file)
7448 {
7449 	struct trace_array *tr = inode->i_private;
7450 	struct trace_iterator *iter;
7451 	struct seq_file *m;
7452 	int ret;
7453 
7454 	ret = tracing_check_open_get_tr(tr);
7455 	if (ret)
7456 		return ret;
7457 
7458 	if (file->f_mode & FMODE_READ) {
7459 		iter = __tracing_open(inode, file, true);
7460 		if (IS_ERR(iter))
7461 			ret = PTR_ERR(iter);
7462 	} else {
7463 		/* Writes still need the seq_file to hold the private data */
7464 		ret = -ENOMEM;
7465 		m = kzalloc(sizeof(*m), GFP_KERNEL);
7466 		if (!m)
7467 			goto out;
7468 		iter = kzalloc(sizeof(*iter), GFP_KERNEL);
7469 		if (!iter) {
7470 			kfree(m);
7471 			goto out;
7472 		}
7473 		ret = 0;
7474 
7475 		iter->tr = tr;
7476 		iter->array_buffer = &tr->max_buffer;
7477 		iter->cpu_file = tracing_get_cpu(inode);
7478 		m->private = iter;
7479 		file->private_data = m;
7480 	}
7481 out:
7482 	if (ret < 0)
7483 		trace_array_put(tr);
7484 
7485 	return ret;
7486 }
7487 
7488 static void tracing_swap_cpu_buffer(void *tr)
7489 {
7490 	update_max_tr_single((struct trace_array *)tr, current, smp_processor_id());
7491 }
7492 
7493 static ssize_t
7494 tracing_snapshot_write(struct file *filp, const char __user *ubuf, size_t cnt,
7495 		       loff_t *ppos)
7496 {
7497 	struct seq_file *m = filp->private_data;
7498 	struct trace_iterator *iter = m->private;
7499 	struct trace_array *tr = iter->tr;
7500 	unsigned long val;
7501 	int ret;
7502 
7503 	ret = tracing_update_buffers(tr);
7504 	if (ret < 0)
7505 		return ret;
7506 
7507 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
7508 	if (ret)
7509 		return ret;
7510 
7511 	guard(mutex)(&trace_types_lock);
7512 
7513 	if (tr->current_trace->use_max_tr)
7514 		return -EBUSY;
7515 
7516 	local_irq_disable();
7517 	arch_spin_lock(&tr->max_lock);
7518 	if (tr->cond_snapshot)
7519 		ret = -EBUSY;
7520 	arch_spin_unlock(&tr->max_lock);
7521 	local_irq_enable();
7522 	if (ret)
7523 		return ret;
7524 
7525 	switch (val) {
7526 	case 0:
7527 		if (iter->cpu_file != RING_BUFFER_ALL_CPUS)
7528 			return -EINVAL;
7529 		if (tr->allocated_snapshot)
7530 			free_snapshot(tr);
7531 		break;
7532 	case 1:
7533 /* Only allow per-cpu swap if the ring buffer supports it */
7534 #ifndef CONFIG_RING_BUFFER_ALLOW_SWAP
7535 		if (iter->cpu_file != RING_BUFFER_ALL_CPUS)
7536 			return -EINVAL;
7537 #endif
7538 		if (tr->allocated_snapshot)
7539 			ret = resize_buffer_duplicate_size(&tr->max_buffer,
7540 					&tr->array_buffer, iter->cpu_file);
7541 
7542 		ret = tracing_arm_snapshot_locked(tr);
7543 		if (ret)
7544 			return ret;
7545 
7546 		/* Now, we're going to swap */
7547 		if (iter->cpu_file == RING_BUFFER_ALL_CPUS) {
7548 			local_irq_disable();
7549 			update_max_tr(tr, current, smp_processor_id(), NULL);
7550 			local_irq_enable();
7551 		} else {
7552 			smp_call_function_single(iter->cpu_file, tracing_swap_cpu_buffer,
7553 						 (void *)tr, 1);
7554 		}
7555 		tracing_disarm_snapshot(tr);
7556 		break;
7557 	default:
7558 		if (tr->allocated_snapshot) {
7559 			if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
7560 				tracing_reset_online_cpus(&tr->max_buffer);
7561 			else
7562 				tracing_reset_cpu(&tr->max_buffer, iter->cpu_file);
7563 		}
7564 		break;
7565 	}
7566 
7567 	if (ret >= 0) {
7568 		*ppos += cnt;
7569 		ret = cnt;
7570 	}
7571 
7572 	return ret;
7573 }
7574 
7575 static int tracing_snapshot_release(struct inode *inode, struct file *file)
7576 {
7577 	struct seq_file *m = file->private_data;
7578 	int ret;
7579 
7580 	ret = tracing_release(inode, file);
7581 
7582 	if (file->f_mode & FMODE_READ)
7583 		return ret;
7584 
7585 	/* If write only, the seq_file is just a stub */
7586 	if (m)
7587 		kfree(m->private);
7588 	kfree(m);
7589 
7590 	return 0;
7591 }
7592 
7593 static int tracing_buffers_open(struct inode *inode, struct file *filp);
7594 static ssize_t tracing_buffers_read(struct file *filp, char __user *ubuf,
7595 				    size_t count, loff_t *ppos);
7596 static int tracing_buffers_release(struct inode *inode, struct file *file);
7597 static ssize_t tracing_buffers_splice_read(struct file *file, loff_t *ppos,
7598 		   struct pipe_inode_info *pipe, size_t len, unsigned int flags);
7599 
7600 static int snapshot_raw_open(struct inode *inode, struct file *filp)
7601 {
7602 	struct ftrace_buffer_info *info;
7603 	int ret;
7604 
7605 	/* The following checks for tracefs lockdown */
7606 	ret = tracing_buffers_open(inode, filp);
7607 	if (ret < 0)
7608 		return ret;
7609 
7610 	info = filp->private_data;
7611 
7612 	if (info->iter.trace->use_max_tr) {
7613 		tracing_buffers_release(inode, filp);
7614 		return -EBUSY;
7615 	}
7616 
7617 	info->iter.snapshot = true;
7618 	info->iter.array_buffer = &info->iter.tr->max_buffer;
7619 
7620 	return ret;
7621 }
7622 
7623 #endif /* CONFIG_TRACER_SNAPSHOT */
7624 
7625 
7626 static const struct file_operations tracing_thresh_fops = {
7627 	.open		= tracing_open_generic,
7628 	.read		= tracing_thresh_read,
7629 	.write		= tracing_thresh_write,
7630 	.llseek		= generic_file_llseek,
7631 };
7632 
7633 #ifdef CONFIG_TRACER_MAX_TRACE
7634 static const struct file_operations tracing_max_lat_fops = {
7635 	.open		= tracing_open_generic_tr,
7636 	.read		= tracing_max_lat_read,
7637 	.write		= tracing_max_lat_write,
7638 	.llseek		= generic_file_llseek,
7639 	.release	= tracing_release_generic_tr,
7640 };
7641 #endif
7642 
7643 static const struct file_operations set_tracer_fops = {
7644 	.open		= tracing_open_generic_tr,
7645 	.read		= tracing_set_trace_read,
7646 	.write		= tracing_set_trace_write,
7647 	.llseek		= generic_file_llseek,
7648 	.release	= tracing_release_generic_tr,
7649 };
7650 
7651 static const struct file_operations tracing_pipe_fops = {
7652 	.open		= tracing_open_pipe,
7653 	.poll		= tracing_poll_pipe,
7654 	.read		= tracing_read_pipe,
7655 	.splice_read	= tracing_splice_read_pipe,
7656 	.release	= tracing_release_pipe,
7657 };
7658 
7659 static const struct file_operations tracing_entries_fops = {
7660 	.open		= tracing_open_generic_tr,
7661 	.read		= tracing_entries_read,
7662 	.write		= tracing_entries_write,
7663 	.llseek		= generic_file_llseek,
7664 	.release	= tracing_release_generic_tr,
7665 };
7666 
7667 static const struct file_operations tracing_buffer_meta_fops = {
7668 	.open		= tracing_buffer_meta_open,
7669 	.read		= seq_read,
7670 	.llseek		= seq_lseek,
7671 	.release	= tracing_seq_release,
7672 };
7673 
7674 static const struct file_operations tracing_total_entries_fops = {
7675 	.open		= tracing_open_generic_tr,
7676 	.read		= tracing_total_entries_read,
7677 	.llseek		= generic_file_llseek,
7678 	.release	= tracing_release_generic_tr,
7679 };
7680 
7681 static const struct file_operations tracing_free_buffer_fops = {
7682 	.open		= tracing_open_generic_tr,
7683 	.write		= tracing_free_buffer_write,
7684 	.release	= tracing_free_buffer_release,
7685 };
7686 
7687 static const struct file_operations tracing_mark_fops = {
7688 	.open		= tracing_mark_open,
7689 	.write		= tracing_mark_write,
7690 	.release	= tracing_release_generic_tr,
7691 };
7692 
7693 static const struct file_operations tracing_mark_raw_fops = {
7694 	.open		= tracing_mark_open,
7695 	.write		= tracing_mark_raw_write,
7696 	.release	= tracing_release_generic_tr,
7697 };
7698 
7699 static const struct file_operations trace_clock_fops = {
7700 	.open		= tracing_clock_open,
7701 	.read		= seq_read,
7702 	.llseek		= seq_lseek,
7703 	.release	= tracing_single_release_tr,
7704 	.write		= tracing_clock_write,
7705 };
7706 
7707 static const struct file_operations trace_time_stamp_mode_fops = {
7708 	.open		= tracing_time_stamp_mode_open,
7709 	.read		= seq_read,
7710 	.llseek		= seq_lseek,
7711 	.release	= tracing_single_release_tr,
7712 };
7713 
7714 static const struct file_operations last_boot_fops = {
7715 	.open		= tracing_last_boot_open,
7716 	.read		= seq_read,
7717 	.llseek		= seq_lseek,
7718 	.release	= tracing_seq_release,
7719 };
7720 
7721 #ifdef CONFIG_TRACER_SNAPSHOT
7722 static const struct file_operations snapshot_fops = {
7723 	.open		= tracing_snapshot_open,
7724 	.read		= seq_read,
7725 	.write		= tracing_snapshot_write,
7726 	.llseek		= tracing_lseek,
7727 	.release	= tracing_snapshot_release,
7728 };
7729 
7730 static const struct file_operations snapshot_raw_fops = {
7731 	.open		= snapshot_raw_open,
7732 	.read		= tracing_buffers_read,
7733 	.release	= tracing_buffers_release,
7734 	.splice_read	= tracing_buffers_splice_read,
7735 };
7736 
7737 #endif /* CONFIG_TRACER_SNAPSHOT */
7738 
7739 /*
7740  * trace_min_max_write - Write a u64 value to a trace_min_max_param struct
7741  * @filp: The active open file structure
7742  * @ubuf: The userspace provided buffer to read value into
7743  * @cnt: The maximum number of bytes to read
7744  * @ppos: The current "file" position
7745  *
7746  * This function implements the write interface for a struct trace_min_max_param.
7747  * The filp->private_data must point to a trace_min_max_param structure that
7748  * defines where to write the value, the min and the max acceptable values,
7749  * and a lock to protect the write.
7750  */
7751 static ssize_t
7752 trace_min_max_write(struct file *filp, const char __user *ubuf, size_t cnt, loff_t *ppos)
7753 {
7754 	struct trace_min_max_param *param = filp->private_data;
7755 	u64 val;
7756 	int err;
7757 
7758 	if (!param)
7759 		return -EFAULT;
7760 
7761 	err = kstrtoull_from_user(ubuf, cnt, 10, &val);
7762 	if (err)
7763 		return err;
7764 
7765 	if (param->lock)
7766 		mutex_lock(param->lock);
7767 
7768 	if (param->min && val < *param->min)
7769 		err = -EINVAL;
7770 
7771 	if (param->max && val > *param->max)
7772 		err = -EINVAL;
7773 
7774 	if (!err)
7775 		*param->val = val;
7776 
7777 	if (param->lock)
7778 		mutex_unlock(param->lock);
7779 
7780 	if (err)
7781 		return err;
7782 
7783 	return cnt;
7784 }
7785 
7786 /*
7787  * trace_min_max_read - Read a u64 value from a trace_min_max_param struct
7788  * @filp: The active open file structure
7789  * @ubuf: The userspace provided buffer to read value into
7790  * @cnt: The maximum number of bytes to read
7791  * @ppos: The current "file" position
7792  *
7793  * This function implements the read interface for a struct trace_min_max_param.
7794  * The filp->private_data must point to a trace_min_max_param struct with valid
7795  * data.
7796  */
7797 static ssize_t
7798 trace_min_max_read(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos)
7799 {
7800 	struct trace_min_max_param *param = filp->private_data;
7801 	char buf[U64_STR_SIZE];
7802 	int len;
7803 	u64 val;
7804 
7805 	if (!param)
7806 		return -EFAULT;
7807 
7808 	val = *param->val;
7809 
7810 	if (cnt > sizeof(buf))
7811 		cnt = sizeof(buf);
7812 
7813 	len = snprintf(buf, sizeof(buf), "%llu\n", val);
7814 
7815 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, len);
7816 }
7817 
7818 const struct file_operations trace_min_max_fops = {
7819 	.open		= tracing_open_generic,
7820 	.read		= trace_min_max_read,
7821 	.write		= trace_min_max_write,
7822 };
7823 
7824 #define TRACING_LOG_ERRS_MAX	8
7825 #define TRACING_LOG_LOC_MAX	128
7826 
7827 #define CMD_PREFIX "  Command: "
7828 
7829 struct err_info {
7830 	const char	**errs;	/* ptr to loc-specific array of err strings */
7831 	u8		type;	/* index into errs -> specific err string */
7832 	u16		pos;	/* caret position */
7833 	u64		ts;
7834 };
7835 
7836 struct tracing_log_err {
7837 	struct list_head	list;
7838 	struct err_info		info;
7839 	char			loc[TRACING_LOG_LOC_MAX]; /* err location */
7840 	char			*cmd;                     /* what caused err */
7841 };
7842 
7843 static DEFINE_MUTEX(tracing_err_log_lock);
7844 
7845 static struct tracing_log_err *alloc_tracing_log_err(int len)
7846 {
7847 	struct tracing_log_err *err;
7848 
7849 	err = kzalloc(sizeof(*err), GFP_KERNEL);
7850 	if (!err)
7851 		return ERR_PTR(-ENOMEM);
7852 
7853 	err->cmd = kzalloc(len, GFP_KERNEL);
7854 	if (!err->cmd) {
7855 		kfree(err);
7856 		return ERR_PTR(-ENOMEM);
7857 	}
7858 
7859 	return err;
7860 }
7861 
7862 static void free_tracing_log_err(struct tracing_log_err *err)
7863 {
7864 	kfree(err->cmd);
7865 	kfree(err);
7866 }
7867 
7868 static struct tracing_log_err *get_tracing_log_err(struct trace_array *tr,
7869 						   int len)
7870 {
7871 	struct tracing_log_err *err;
7872 	char *cmd;
7873 
7874 	if (tr->n_err_log_entries < TRACING_LOG_ERRS_MAX) {
7875 		err = alloc_tracing_log_err(len);
7876 		if (PTR_ERR(err) != -ENOMEM)
7877 			tr->n_err_log_entries++;
7878 
7879 		return err;
7880 	}
7881 	cmd = kzalloc(len, GFP_KERNEL);
7882 	if (!cmd)
7883 		return ERR_PTR(-ENOMEM);
7884 	err = list_first_entry(&tr->err_log, struct tracing_log_err, list);
7885 	kfree(err->cmd);
7886 	err->cmd = cmd;
7887 	list_del(&err->list);
7888 
7889 	return err;
7890 }
7891 
7892 /**
7893  * err_pos - find the position of a string within a command for error careting
7894  * @cmd: The tracing command that caused the error
7895  * @str: The string to position the caret at within @cmd
7896  *
7897  * Finds the position of the first occurrence of @str within @cmd.  The
7898  * return value can be passed to tracing_log_err() for caret placement
7899  * within @cmd.
7900  *
7901  * Returns the index within @cmd of the first occurrence of @str or 0
7902  * if @str was not found.
7903  */
7904 unsigned int err_pos(char *cmd, const char *str)
7905 {
7906 	char *found;
7907 
7908 	if (WARN_ON(!strlen(cmd)))
7909 		return 0;
7910 
7911 	found = strstr(cmd, str);
7912 	if (found)
7913 		return found - cmd;
7914 
7915 	return 0;
7916 }
7917 
7918 /**
7919  * tracing_log_err - write an error to the tracing error log
7920  * @tr: The associated trace array for the error (NULL for top level array)
7921  * @loc: A string describing where the error occurred
7922  * @cmd: The tracing command that caused the error
7923  * @errs: The array of loc-specific static error strings
7924  * @type: The index into errs[], which produces the specific static err string
7925  * @pos: The position the caret should be placed in the cmd
7926  *
7927  * Writes an error into tracing/error_log of the form:
7928  *
7929  * <loc>: error: <text>
7930  *   Command: <cmd>
7931  *              ^
7932  *
7933  * tracing/error_log is a small log file containing the last
7934  * TRACING_LOG_ERRS_MAX errors (8).  Memory for errors isn't allocated
7935  * unless there has been a tracing error, and the error log can be
7936  * cleared and have its memory freed by writing the empty string in
7937  * truncation mode to it i.e. echo > tracing/error_log.
7938  *
7939  * NOTE: the @errs array along with the @type param are used to
7940  * produce a static error string - this string is not copied and saved
7941  * when the error is logged - only a pointer to it is saved.  See
7942  * existing callers for examples of how static strings are typically
7943  * defined for use with tracing_log_err().
7944  */
7945 void tracing_log_err(struct trace_array *tr,
7946 		     const char *loc, const char *cmd,
7947 		     const char **errs, u8 type, u16 pos)
7948 {
7949 	struct tracing_log_err *err;
7950 	int len = 0;
7951 
7952 	if (!tr)
7953 		tr = &global_trace;
7954 
7955 	len += sizeof(CMD_PREFIX) + 2 * sizeof("\n") + strlen(cmd) + 1;
7956 
7957 	guard(mutex)(&tracing_err_log_lock);
7958 
7959 	err = get_tracing_log_err(tr, len);
7960 	if (PTR_ERR(err) == -ENOMEM)
7961 		return;
7962 
7963 	snprintf(err->loc, TRACING_LOG_LOC_MAX, "%s: error: ", loc);
7964 	snprintf(err->cmd, len, "\n" CMD_PREFIX "%s\n", cmd);
7965 
7966 	err->info.errs = errs;
7967 	err->info.type = type;
7968 	err->info.pos = pos;
7969 	err->info.ts = local_clock();
7970 
7971 	list_add_tail(&err->list, &tr->err_log);
7972 }
7973 
7974 static void clear_tracing_err_log(struct trace_array *tr)
7975 {
7976 	struct tracing_log_err *err, *next;
7977 
7978 	mutex_lock(&tracing_err_log_lock);
7979 	list_for_each_entry_safe(err, next, &tr->err_log, list) {
7980 		list_del(&err->list);
7981 		free_tracing_log_err(err);
7982 	}
7983 
7984 	tr->n_err_log_entries = 0;
7985 	mutex_unlock(&tracing_err_log_lock);
7986 }
7987 
7988 static void *tracing_err_log_seq_start(struct seq_file *m, loff_t *pos)
7989 {
7990 	struct trace_array *tr = m->private;
7991 
7992 	mutex_lock(&tracing_err_log_lock);
7993 
7994 	return seq_list_start(&tr->err_log, *pos);
7995 }
7996 
7997 static void *tracing_err_log_seq_next(struct seq_file *m, void *v, loff_t *pos)
7998 {
7999 	struct trace_array *tr = m->private;
8000 
8001 	return seq_list_next(v, &tr->err_log, pos);
8002 }
8003 
8004 static void tracing_err_log_seq_stop(struct seq_file *m, void *v)
8005 {
8006 	mutex_unlock(&tracing_err_log_lock);
8007 }
8008 
8009 static void tracing_err_log_show_pos(struct seq_file *m, u16 pos)
8010 {
8011 	u16 i;
8012 
8013 	for (i = 0; i < sizeof(CMD_PREFIX) - 1; i++)
8014 		seq_putc(m, ' ');
8015 	for (i = 0; i < pos; i++)
8016 		seq_putc(m, ' ');
8017 	seq_puts(m, "^\n");
8018 }
8019 
8020 static int tracing_err_log_seq_show(struct seq_file *m, void *v)
8021 {
8022 	struct tracing_log_err *err = v;
8023 
8024 	if (err) {
8025 		const char *err_text = err->info.errs[err->info.type];
8026 		u64 sec = err->info.ts;
8027 		u32 nsec;
8028 
8029 		nsec = do_div(sec, NSEC_PER_SEC);
8030 		seq_printf(m, "[%5llu.%06u] %s%s", sec, nsec / 1000,
8031 			   err->loc, err_text);
8032 		seq_printf(m, "%s", err->cmd);
8033 		tracing_err_log_show_pos(m, err->info.pos);
8034 	}
8035 
8036 	return 0;
8037 }
8038 
8039 static const struct seq_operations tracing_err_log_seq_ops = {
8040 	.start  = tracing_err_log_seq_start,
8041 	.next   = tracing_err_log_seq_next,
8042 	.stop   = tracing_err_log_seq_stop,
8043 	.show   = tracing_err_log_seq_show
8044 };
8045 
8046 static int tracing_err_log_open(struct inode *inode, struct file *file)
8047 {
8048 	struct trace_array *tr = inode->i_private;
8049 	int ret = 0;
8050 
8051 	ret = tracing_check_open_get_tr(tr);
8052 	if (ret)
8053 		return ret;
8054 
8055 	/* If this file was opened for write, then erase contents */
8056 	if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC))
8057 		clear_tracing_err_log(tr);
8058 
8059 	if (file->f_mode & FMODE_READ) {
8060 		ret = seq_open(file, &tracing_err_log_seq_ops);
8061 		if (!ret) {
8062 			struct seq_file *m = file->private_data;
8063 			m->private = tr;
8064 		} else {
8065 			trace_array_put(tr);
8066 		}
8067 	}
8068 	return ret;
8069 }
8070 
8071 static ssize_t tracing_err_log_write(struct file *file,
8072 				     const char __user *buffer,
8073 				     size_t count, loff_t *ppos)
8074 {
8075 	return count;
8076 }
8077 
8078 static int tracing_err_log_release(struct inode *inode, struct file *file)
8079 {
8080 	struct trace_array *tr = inode->i_private;
8081 
8082 	trace_array_put(tr);
8083 
8084 	if (file->f_mode & FMODE_READ)
8085 		seq_release(inode, file);
8086 
8087 	return 0;
8088 }
8089 
8090 static const struct file_operations tracing_err_log_fops = {
8091 	.open           = tracing_err_log_open,
8092 	.write		= tracing_err_log_write,
8093 	.read           = seq_read,
8094 	.llseek         = tracing_lseek,
8095 	.release        = tracing_err_log_release,
8096 };
8097 
8098 static int tracing_buffers_open(struct inode *inode, struct file *filp)
8099 {
8100 	struct trace_array *tr = inode->i_private;
8101 	struct ftrace_buffer_info *info;
8102 	int ret;
8103 
8104 	ret = tracing_check_open_get_tr(tr);
8105 	if (ret)
8106 		return ret;
8107 
8108 	info = kvzalloc(sizeof(*info), GFP_KERNEL);
8109 	if (!info) {
8110 		trace_array_put(tr);
8111 		return -ENOMEM;
8112 	}
8113 
8114 	mutex_lock(&trace_types_lock);
8115 
8116 	info->iter.tr		= tr;
8117 	info->iter.cpu_file	= tracing_get_cpu(inode);
8118 	info->iter.trace	= tr->current_trace;
8119 	info->iter.array_buffer = &tr->array_buffer;
8120 	info->spare		= NULL;
8121 	/* Force reading ring buffer for first read */
8122 	info->read		= (unsigned int)-1;
8123 
8124 	filp->private_data = info;
8125 
8126 	tr->trace_ref++;
8127 
8128 	mutex_unlock(&trace_types_lock);
8129 
8130 	ret = nonseekable_open(inode, filp);
8131 	if (ret < 0)
8132 		trace_array_put(tr);
8133 
8134 	return ret;
8135 }
8136 
8137 static __poll_t
8138 tracing_buffers_poll(struct file *filp, poll_table *poll_table)
8139 {
8140 	struct ftrace_buffer_info *info = filp->private_data;
8141 	struct trace_iterator *iter = &info->iter;
8142 
8143 	return trace_poll(iter, filp, poll_table);
8144 }
8145 
8146 static ssize_t
8147 tracing_buffers_read(struct file *filp, char __user *ubuf,
8148 		     size_t count, loff_t *ppos)
8149 {
8150 	struct ftrace_buffer_info *info = filp->private_data;
8151 	struct trace_iterator *iter = &info->iter;
8152 	void *trace_data;
8153 	int page_size;
8154 	ssize_t ret = 0;
8155 	ssize_t size;
8156 
8157 	if (!count)
8158 		return 0;
8159 
8160 #ifdef CONFIG_TRACER_MAX_TRACE
8161 	if (iter->snapshot && iter->tr->current_trace->use_max_tr)
8162 		return -EBUSY;
8163 #endif
8164 
8165 	page_size = ring_buffer_subbuf_size_get(iter->array_buffer->buffer);
8166 
8167 	/* Make sure the spare matches the current sub buffer size */
8168 	if (info->spare) {
8169 		if (page_size != info->spare_size) {
8170 			ring_buffer_free_read_page(iter->array_buffer->buffer,
8171 						   info->spare_cpu, info->spare);
8172 			info->spare = NULL;
8173 		}
8174 	}
8175 
8176 	if (!info->spare) {
8177 		info->spare = ring_buffer_alloc_read_page(iter->array_buffer->buffer,
8178 							  iter->cpu_file);
8179 		if (IS_ERR(info->spare)) {
8180 			ret = PTR_ERR(info->spare);
8181 			info->spare = NULL;
8182 		} else {
8183 			info->spare_cpu = iter->cpu_file;
8184 			info->spare_size = page_size;
8185 		}
8186 	}
8187 	if (!info->spare)
8188 		return ret;
8189 
8190 	/* Do we have previous read data to read? */
8191 	if (info->read < page_size)
8192 		goto read;
8193 
8194  again:
8195 	trace_access_lock(iter->cpu_file);
8196 	ret = ring_buffer_read_page(iter->array_buffer->buffer,
8197 				    info->spare,
8198 				    count,
8199 				    iter->cpu_file, 0);
8200 	trace_access_unlock(iter->cpu_file);
8201 
8202 	if (ret < 0) {
8203 		if (trace_empty(iter) && !iter->closed) {
8204 			if ((filp->f_flags & O_NONBLOCK))
8205 				return -EAGAIN;
8206 
8207 			ret = wait_on_pipe(iter, 0);
8208 			if (ret)
8209 				return ret;
8210 
8211 			goto again;
8212 		}
8213 		return 0;
8214 	}
8215 
8216 	info->read = 0;
8217  read:
8218 	size = page_size - info->read;
8219 	if (size > count)
8220 		size = count;
8221 	trace_data = ring_buffer_read_page_data(info->spare);
8222 	ret = copy_to_user(ubuf, trace_data + info->read, size);
8223 	if (ret == size)
8224 		return -EFAULT;
8225 
8226 	size -= ret;
8227 
8228 	*ppos += size;
8229 	info->read += size;
8230 
8231 	return size;
8232 }
8233 
8234 static int tracing_buffers_flush(struct file *file, fl_owner_t id)
8235 {
8236 	struct ftrace_buffer_info *info = file->private_data;
8237 	struct trace_iterator *iter = &info->iter;
8238 
8239 	iter->closed = true;
8240 	/* Make sure the waiters see the new wait_index */
8241 	(void)atomic_fetch_inc_release(&iter->wait_index);
8242 
8243 	ring_buffer_wake_waiters(iter->array_buffer->buffer, iter->cpu_file);
8244 
8245 	return 0;
8246 }
8247 
8248 static int tracing_buffers_release(struct inode *inode, struct file *file)
8249 {
8250 	struct ftrace_buffer_info *info = file->private_data;
8251 	struct trace_iterator *iter = &info->iter;
8252 
8253 	mutex_lock(&trace_types_lock);
8254 
8255 	iter->tr->trace_ref--;
8256 
8257 	__trace_array_put(iter->tr);
8258 
8259 	if (info->spare)
8260 		ring_buffer_free_read_page(iter->array_buffer->buffer,
8261 					   info->spare_cpu, info->spare);
8262 	kvfree(info);
8263 
8264 	mutex_unlock(&trace_types_lock);
8265 
8266 	return 0;
8267 }
8268 
8269 struct buffer_ref {
8270 	struct trace_buffer	*buffer;
8271 	void			*page;
8272 	int			cpu;
8273 	refcount_t		refcount;
8274 };
8275 
8276 static void buffer_ref_release(struct buffer_ref *ref)
8277 {
8278 	if (!refcount_dec_and_test(&ref->refcount))
8279 		return;
8280 	ring_buffer_free_read_page(ref->buffer, ref->cpu, ref->page);
8281 	kfree(ref);
8282 }
8283 
8284 static void buffer_pipe_buf_release(struct pipe_inode_info *pipe,
8285 				    struct pipe_buffer *buf)
8286 {
8287 	struct buffer_ref *ref = (struct buffer_ref *)buf->private;
8288 
8289 	buffer_ref_release(ref);
8290 	buf->private = 0;
8291 }
8292 
8293 static bool buffer_pipe_buf_get(struct pipe_inode_info *pipe,
8294 				struct pipe_buffer *buf)
8295 {
8296 	struct buffer_ref *ref = (struct buffer_ref *)buf->private;
8297 
8298 	if (refcount_read(&ref->refcount) > INT_MAX/2)
8299 		return false;
8300 
8301 	refcount_inc(&ref->refcount);
8302 	return true;
8303 }
8304 
8305 /* Pipe buffer operations for a buffer. */
8306 static const struct pipe_buf_operations buffer_pipe_buf_ops = {
8307 	.release		= buffer_pipe_buf_release,
8308 	.get			= buffer_pipe_buf_get,
8309 };
8310 
8311 /*
8312  * Callback from splice_to_pipe(), if we need to release some pages
8313  * at the end of the spd in case we error'ed out in filling the pipe.
8314  */
8315 static void buffer_spd_release(struct splice_pipe_desc *spd, unsigned int i)
8316 {
8317 	struct buffer_ref *ref =
8318 		(struct buffer_ref *)spd->partial[i].private;
8319 
8320 	buffer_ref_release(ref);
8321 	spd->partial[i].private = 0;
8322 }
8323 
8324 static ssize_t
8325 tracing_buffers_splice_read(struct file *file, loff_t *ppos,
8326 			    struct pipe_inode_info *pipe, size_t len,
8327 			    unsigned int flags)
8328 {
8329 	struct ftrace_buffer_info *info = file->private_data;
8330 	struct trace_iterator *iter = &info->iter;
8331 	struct partial_page partial_def[PIPE_DEF_BUFFERS];
8332 	struct page *pages_def[PIPE_DEF_BUFFERS];
8333 	struct splice_pipe_desc spd = {
8334 		.pages		= pages_def,
8335 		.partial	= partial_def,
8336 		.nr_pages_max	= PIPE_DEF_BUFFERS,
8337 		.ops		= &buffer_pipe_buf_ops,
8338 		.spd_release	= buffer_spd_release,
8339 	};
8340 	struct buffer_ref *ref;
8341 	bool woken = false;
8342 	int page_size;
8343 	int entries, i;
8344 	ssize_t ret = 0;
8345 
8346 #ifdef CONFIG_TRACER_MAX_TRACE
8347 	if (iter->snapshot && iter->tr->current_trace->use_max_tr)
8348 		return -EBUSY;
8349 #endif
8350 
8351 	page_size = ring_buffer_subbuf_size_get(iter->array_buffer->buffer);
8352 	if (*ppos & (page_size - 1))
8353 		return -EINVAL;
8354 
8355 	if (len & (page_size - 1)) {
8356 		if (len < page_size)
8357 			return -EINVAL;
8358 		len &= (~(page_size - 1));
8359 	}
8360 
8361 	if (splice_grow_spd(pipe, &spd))
8362 		return -ENOMEM;
8363 
8364  again:
8365 	trace_access_lock(iter->cpu_file);
8366 	entries = ring_buffer_entries_cpu(iter->array_buffer->buffer, iter->cpu_file);
8367 
8368 	for (i = 0; i < spd.nr_pages_max && len && entries; i++, len -= page_size) {
8369 		struct page *page;
8370 		int r;
8371 
8372 		ref = kzalloc(sizeof(*ref), GFP_KERNEL);
8373 		if (!ref) {
8374 			ret = -ENOMEM;
8375 			break;
8376 		}
8377 
8378 		refcount_set(&ref->refcount, 1);
8379 		ref->buffer = iter->array_buffer->buffer;
8380 		ref->page = ring_buffer_alloc_read_page(ref->buffer, iter->cpu_file);
8381 		if (IS_ERR(ref->page)) {
8382 			ret = PTR_ERR(ref->page);
8383 			ref->page = NULL;
8384 			kfree(ref);
8385 			break;
8386 		}
8387 		ref->cpu = iter->cpu_file;
8388 
8389 		r = ring_buffer_read_page(ref->buffer, ref->page,
8390 					  len, iter->cpu_file, 1);
8391 		if (r < 0) {
8392 			ring_buffer_free_read_page(ref->buffer, ref->cpu,
8393 						   ref->page);
8394 			kfree(ref);
8395 			break;
8396 		}
8397 
8398 		page = virt_to_page(ring_buffer_read_page_data(ref->page));
8399 
8400 		spd.pages[i] = page;
8401 		spd.partial[i].len = page_size;
8402 		spd.partial[i].offset = 0;
8403 		spd.partial[i].private = (unsigned long)ref;
8404 		spd.nr_pages++;
8405 		*ppos += page_size;
8406 
8407 		entries = ring_buffer_entries_cpu(iter->array_buffer->buffer, iter->cpu_file);
8408 	}
8409 
8410 	trace_access_unlock(iter->cpu_file);
8411 	spd.nr_pages = i;
8412 
8413 	/* did we read anything? */
8414 	if (!spd.nr_pages) {
8415 
8416 		if (ret)
8417 			goto out;
8418 
8419 		if (woken)
8420 			goto out;
8421 
8422 		ret = -EAGAIN;
8423 		if ((file->f_flags & O_NONBLOCK) || (flags & SPLICE_F_NONBLOCK))
8424 			goto out;
8425 
8426 		ret = wait_on_pipe(iter, iter->snapshot ? 0 : iter->tr->buffer_percent);
8427 		if (ret)
8428 			goto out;
8429 
8430 		/* No need to wait after waking up when tracing is off */
8431 		if (!tracer_tracing_is_on(iter->tr))
8432 			goto out;
8433 
8434 		/* Iterate one more time to collect any new data then exit */
8435 		woken = true;
8436 
8437 		goto again;
8438 	}
8439 
8440 	ret = splice_to_pipe(pipe, &spd);
8441 out:
8442 	splice_shrink_spd(&spd);
8443 
8444 	return ret;
8445 }
8446 
8447 static long tracing_buffers_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
8448 {
8449 	struct ftrace_buffer_info *info = file->private_data;
8450 	struct trace_iterator *iter = &info->iter;
8451 	int err;
8452 
8453 	if (cmd == TRACE_MMAP_IOCTL_GET_READER) {
8454 		if (!(file->f_flags & O_NONBLOCK)) {
8455 			err = ring_buffer_wait(iter->array_buffer->buffer,
8456 					       iter->cpu_file,
8457 					       iter->tr->buffer_percent,
8458 					       NULL, NULL);
8459 			if (err)
8460 				return err;
8461 		}
8462 
8463 		return ring_buffer_map_get_reader(iter->array_buffer->buffer,
8464 						  iter->cpu_file);
8465 	} else if (cmd) {
8466 		return -ENOTTY;
8467 	}
8468 
8469 	/*
8470 	 * An ioctl call with cmd 0 to the ring buffer file will wake up all
8471 	 * waiters
8472 	 */
8473 	mutex_lock(&trace_types_lock);
8474 
8475 	/* Make sure the waiters see the new wait_index */
8476 	(void)atomic_fetch_inc_release(&iter->wait_index);
8477 
8478 	ring_buffer_wake_waiters(iter->array_buffer->buffer, iter->cpu_file);
8479 
8480 	mutex_unlock(&trace_types_lock);
8481 	return 0;
8482 }
8483 
8484 #ifdef CONFIG_TRACER_MAX_TRACE
8485 static int get_snapshot_map(struct trace_array *tr)
8486 {
8487 	int err = 0;
8488 
8489 	/*
8490 	 * Called with mmap_lock held. lockdep would be unhappy if we would now
8491 	 * take trace_types_lock. Instead use the specific
8492 	 * snapshot_trigger_lock.
8493 	 */
8494 	spin_lock(&tr->snapshot_trigger_lock);
8495 
8496 	if (tr->snapshot || tr->mapped == UINT_MAX)
8497 		err = -EBUSY;
8498 	else
8499 		tr->mapped++;
8500 
8501 	spin_unlock(&tr->snapshot_trigger_lock);
8502 
8503 	/* Wait for update_max_tr() to observe iter->tr->mapped */
8504 	if (tr->mapped == 1)
8505 		synchronize_rcu();
8506 
8507 	return err;
8508 
8509 }
8510 static void put_snapshot_map(struct trace_array *tr)
8511 {
8512 	spin_lock(&tr->snapshot_trigger_lock);
8513 	if (!WARN_ON(!tr->mapped))
8514 		tr->mapped--;
8515 	spin_unlock(&tr->snapshot_trigger_lock);
8516 }
8517 #else
8518 static inline int get_snapshot_map(struct trace_array *tr) { return 0; }
8519 static inline void put_snapshot_map(struct trace_array *tr) { }
8520 #endif
8521 
8522 static void tracing_buffers_mmap_close(struct vm_area_struct *vma)
8523 {
8524 	struct ftrace_buffer_info *info = vma->vm_file->private_data;
8525 	struct trace_iterator *iter = &info->iter;
8526 
8527 	WARN_ON(ring_buffer_unmap(iter->array_buffer->buffer, iter->cpu_file));
8528 	put_snapshot_map(iter->tr);
8529 }
8530 
8531 static const struct vm_operations_struct tracing_buffers_vmops = {
8532 	.close		= tracing_buffers_mmap_close,
8533 };
8534 
8535 static int tracing_buffers_mmap(struct file *filp, struct vm_area_struct *vma)
8536 {
8537 	struct ftrace_buffer_info *info = filp->private_data;
8538 	struct trace_iterator *iter = &info->iter;
8539 	int ret = 0;
8540 
8541 	/* A memmap'ed buffer is not supported for user space mmap */
8542 	if (iter->tr->flags & TRACE_ARRAY_FL_MEMMAP)
8543 		return -ENODEV;
8544 
8545 	/* Currently the boot mapped buffer is not supported for mmap */
8546 	if (iter->tr->flags & TRACE_ARRAY_FL_BOOT)
8547 		return -ENODEV;
8548 
8549 	ret = get_snapshot_map(iter->tr);
8550 	if (ret)
8551 		return ret;
8552 
8553 	ret = ring_buffer_map(iter->array_buffer->buffer, iter->cpu_file, vma);
8554 	if (ret)
8555 		put_snapshot_map(iter->tr);
8556 
8557 	vma->vm_ops = &tracing_buffers_vmops;
8558 
8559 	return ret;
8560 }
8561 
8562 static const struct file_operations tracing_buffers_fops = {
8563 	.open		= tracing_buffers_open,
8564 	.read		= tracing_buffers_read,
8565 	.poll		= tracing_buffers_poll,
8566 	.release	= tracing_buffers_release,
8567 	.flush		= tracing_buffers_flush,
8568 	.splice_read	= tracing_buffers_splice_read,
8569 	.unlocked_ioctl = tracing_buffers_ioctl,
8570 	.mmap		= tracing_buffers_mmap,
8571 };
8572 
8573 static ssize_t
8574 tracing_stats_read(struct file *filp, char __user *ubuf,
8575 		   size_t count, loff_t *ppos)
8576 {
8577 	struct inode *inode = file_inode(filp);
8578 	struct trace_array *tr = inode->i_private;
8579 	struct array_buffer *trace_buf = &tr->array_buffer;
8580 	int cpu = tracing_get_cpu(inode);
8581 	struct trace_seq *s;
8582 	unsigned long cnt;
8583 	unsigned long long t;
8584 	unsigned long usec_rem;
8585 
8586 	s = kmalloc(sizeof(*s), GFP_KERNEL);
8587 	if (!s)
8588 		return -ENOMEM;
8589 
8590 	trace_seq_init(s);
8591 
8592 	cnt = ring_buffer_entries_cpu(trace_buf->buffer, cpu);
8593 	trace_seq_printf(s, "entries: %ld\n", cnt);
8594 
8595 	cnt = ring_buffer_overrun_cpu(trace_buf->buffer, cpu);
8596 	trace_seq_printf(s, "overrun: %ld\n", cnt);
8597 
8598 	cnt = ring_buffer_commit_overrun_cpu(trace_buf->buffer, cpu);
8599 	trace_seq_printf(s, "commit overrun: %ld\n", cnt);
8600 
8601 	cnt = ring_buffer_bytes_cpu(trace_buf->buffer, cpu);
8602 	trace_seq_printf(s, "bytes: %ld\n", cnt);
8603 
8604 	if (trace_clocks[tr->clock_id].in_ns) {
8605 		/* local or global for trace_clock */
8606 		t = ns2usecs(ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
8607 		usec_rem = do_div(t, USEC_PER_SEC);
8608 		trace_seq_printf(s, "oldest event ts: %5llu.%06lu\n",
8609 								t, usec_rem);
8610 
8611 		t = ns2usecs(ring_buffer_time_stamp(trace_buf->buffer));
8612 		usec_rem = do_div(t, USEC_PER_SEC);
8613 		trace_seq_printf(s, "now ts: %5llu.%06lu\n", t, usec_rem);
8614 	} else {
8615 		/* counter or tsc mode for trace_clock */
8616 		trace_seq_printf(s, "oldest event ts: %llu\n",
8617 				ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
8618 
8619 		trace_seq_printf(s, "now ts: %llu\n",
8620 				ring_buffer_time_stamp(trace_buf->buffer));
8621 	}
8622 
8623 	cnt = ring_buffer_dropped_events_cpu(trace_buf->buffer, cpu);
8624 	trace_seq_printf(s, "dropped events: %ld\n", cnt);
8625 
8626 	cnt = ring_buffer_read_events_cpu(trace_buf->buffer, cpu);
8627 	trace_seq_printf(s, "read events: %ld\n", cnt);
8628 
8629 	count = simple_read_from_buffer(ubuf, count, ppos,
8630 					s->buffer, trace_seq_used(s));
8631 
8632 	kfree(s);
8633 
8634 	return count;
8635 }
8636 
8637 static const struct file_operations tracing_stats_fops = {
8638 	.open		= tracing_open_generic_tr,
8639 	.read		= tracing_stats_read,
8640 	.llseek		= generic_file_llseek,
8641 	.release	= tracing_release_generic_tr,
8642 };
8643 
8644 #ifdef CONFIG_DYNAMIC_FTRACE
8645 
8646 static ssize_t
8647 tracing_read_dyn_info(struct file *filp, char __user *ubuf,
8648 		  size_t cnt, loff_t *ppos)
8649 {
8650 	ssize_t ret;
8651 	char *buf;
8652 	int r;
8653 
8654 	/* 512 should be plenty to hold the amount needed */
8655 #define DYN_INFO_BUF_SIZE	512
8656 
8657 	buf = kmalloc(DYN_INFO_BUF_SIZE, GFP_KERNEL);
8658 	if (!buf)
8659 		return -ENOMEM;
8660 
8661 	r = scnprintf(buf, DYN_INFO_BUF_SIZE,
8662 		      "%ld pages:%ld groups: %ld\n"
8663 		      "ftrace boot update time = %llu (ns)\n"
8664 		      "ftrace module total update time = %llu (ns)\n",
8665 		      ftrace_update_tot_cnt,
8666 		      ftrace_number_of_pages,
8667 		      ftrace_number_of_groups,
8668 		      ftrace_update_time,
8669 		      ftrace_total_mod_time);
8670 
8671 	ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
8672 	kfree(buf);
8673 	return ret;
8674 }
8675 
8676 static const struct file_operations tracing_dyn_info_fops = {
8677 	.open		= tracing_open_generic,
8678 	.read		= tracing_read_dyn_info,
8679 	.llseek		= generic_file_llseek,
8680 };
8681 #endif /* CONFIG_DYNAMIC_FTRACE */
8682 
8683 #if defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE)
8684 static void
8685 ftrace_snapshot(unsigned long ip, unsigned long parent_ip,
8686 		struct trace_array *tr, struct ftrace_probe_ops *ops,
8687 		void *data)
8688 {
8689 	tracing_snapshot_instance(tr);
8690 }
8691 
8692 static void
8693 ftrace_count_snapshot(unsigned long ip, unsigned long parent_ip,
8694 		      struct trace_array *tr, struct ftrace_probe_ops *ops,
8695 		      void *data)
8696 {
8697 	struct ftrace_func_mapper *mapper = data;
8698 	long *count = NULL;
8699 
8700 	if (mapper)
8701 		count = (long *)ftrace_func_mapper_find_ip(mapper, ip);
8702 
8703 	if (count) {
8704 
8705 		if (*count <= 0)
8706 			return;
8707 
8708 		(*count)--;
8709 	}
8710 
8711 	tracing_snapshot_instance(tr);
8712 }
8713 
8714 static int
8715 ftrace_snapshot_print(struct seq_file *m, unsigned long ip,
8716 		      struct ftrace_probe_ops *ops, void *data)
8717 {
8718 	struct ftrace_func_mapper *mapper = data;
8719 	long *count = NULL;
8720 
8721 	seq_printf(m, "%ps:", (void *)ip);
8722 
8723 	seq_puts(m, "snapshot");
8724 
8725 	if (mapper)
8726 		count = (long *)ftrace_func_mapper_find_ip(mapper, ip);
8727 
8728 	if (count)
8729 		seq_printf(m, ":count=%ld\n", *count);
8730 	else
8731 		seq_puts(m, ":unlimited\n");
8732 
8733 	return 0;
8734 }
8735 
8736 static int
8737 ftrace_snapshot_init(struct ftrace_probe_ops *ops, struct trace_array *tr,
8738 		     unsigned long ip, void *init_data, void **data)
8739 {
8740 	struct ftrace_func_mapper *mapper = *data;
8741 
8742 	if (!mapper) {
8743 		mapper = allocate_ftrace_func_mapper();
8744 		if (!mapper)
8745 			return -ENOMEM;
8746 		*data = mapper;
8747 	}
8748 
8749 	return ftrace_func_mapper_add_ip(mapper, ip, init_data);
8750 }
8751 
8752 static void
8753 ftrace_snapshot_free(struct ftrace_probe_ops *ops, struct trace_array *tr,
8754 		     unsigned long ip, void *data)
8755 {
8756 	struct ftrace_func_mapper *mapper = data;
8757 
8758 	if (!ip) {
8759 		if (!mapper)
8760 			return;
8761 		free_ftrace_func_mapper(mapper, NULL);
8762 		return;
8763 	}
8764 
8765 	ftrace_func_mapper_remove_ip(mapper, ip);
8766 }
8767 
8768 static struct ftrace_probe_ops snapshot_probe_ops = {
8769 	.func			= ftrace_snapshot,
8770 	.print			= ftrace_snapshot_print,
8771 };
8772 
8773 static struct ftrace_probe_ops snapshot_count_probe_ops = {
8774 	.func			= ftrace_count_snapshot,
8775 	.print			= ftrace_snapshot_print,
8776 	.init			= ftrace_snapshot_init,
8777 	.free			= ftrace_snapshot_free,
8778 };
8779 
8780 static int
8781 ftrace_trace_snapshot_callback(struct trace_array *tr, struct ftrace_hash *hash,
8782 			       char *glob, char *cmd, char *param, int enable)
8783 {
8784 	struct ftrace_probe_ops *ops;
8785 	void *count = (void *)-1;
8786 	char *number;
8787 	int ret;
8788 
8789 	if (!tr)
8790 		return -ENODEV;
8791 
8792 	/* hash funcs only work with set_ftrace_filter */
8793 	if (!enable)
8794 		return -EINVAL;
8795 
8796 	ops = param ? &snapshot_count_probe_ops :  &snapshot_probe_ops;
8797 
8798 	if (glob[0] == '!') {
8799 		ret = unregister_ftrace_function_probe_func(glob+1, tr, ops);
8800 		if (!ret)
8801 			tracing_disarm_snapshot(tr);
8802 
8803 		return ret;
8804 	}
8805 
8806 	if (!param)
8807 		goto out_reg;
8808 
8809 	number = strsep(&param, ":");
8810 
8811 	if (!strlen(number))
8812 		goto out_reg;
8813 
8814 	/*
8815 	 * We use the callback data field (which is a pointer)
8816 	 * as our counter.
8817 	 */
8818 	ret = kstrtoul(number, 0, (unsigned long *)&count);
8819 	if (ret)
8820 		return ret;
8821 
8822  out_reg:
8823 	ret = tracing_arm_snapshot(tr);
8824 	if (ret < 0)
8825 		goto out;
8826 
8827 	ret = register_ftrace_function_probe(glob, tr, ops, count);
8828 	if (ret < 0)
8829 		tracing_disarm_snapshot(tr);
8830  out:
8831 	return ret < 0 ? ret : 0;
8832 }
8833 
8834 static struct ftrace_func_command ftrace_snapshot_cmd = {
8835 	.name			= "snapshot",
8836 	.func			= ftrace_trace_snapshot_callback,
8837 };
8838 
8839 static __init int register_snapshot_cmd(void)
8840 {
8841 	return register_ftrace_command(&ftrace_snapshot_cmd);
8842 }
8843 #else
8844 static inline __init int register_snapshot_cmd(void) { return 0; }
8845 #endif /* defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE) */
8846 
8847 static struct dentry *tracing_get_dentry(struct trace_array *tr)
8848 {
8849 	if (WARN_ON(!tr->dir))
8850 		return ERR_PTR(-ENODEV);
8851 
8852 	/* Top directory uses NULL as the parent */
8853 	if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
8854 		return NULL;
8855 
8856 	/* All sub buffers have a descriptor */
8857 	return tr->dir;
8858 }
8859 
8860 static struct dentry *tracing_dentry_percpu(struct trace_array *tr, int cpu)
8861 {
8862 	struct dentry *d_tracer;
8863 
8864 	if (tr->percpu_dir)
8865 		return tr->percpu_dir;
8866 
8867 	d_tracer = tracing_get_dentry(tr);
8868 	if (IS_ERR(d_tracer))
8869 		return NULL;
8870 
8871 	tr->percpu_dir = tracefs_create_dir("per_cpu", d_tracer);
8872 
8873 	MEM_FAIL(!tr->percpu_dir,
8874 		  "Could not create tracefs directory 'per_cpu/%d'\n", cpu);
8875 
8876 	return tr->percpu_dir;
8877 }
8878 
8879 static struct dentry *
8880 trace_create_cpu_file(const char *name, umode_t mode, struct dentry *parent,
8881 		      void *data, long cpu, const struct file_operations *fops)
8882 {
8883 	struct dentry *ret = trace_create_file(name, mode, parent, data, fops);
8884 
8885 	if (ret) /* See tracing_get_cpu() */
8886 		d_inode(ret)->i_cdev = (void *)(cpu + 1);
8887 	return ret;
8888 }
8889 
8890 static void
8891 tracing_init_tracefs_percpu(struct trace_array *tr, long cpu)
8892 {
8893 	struct dentry *d_percpu = tracing_dentry_percpu(tr, cpu);
8894 	struct dentry *d_cpu;
8895 	char cpu_dir[30]; /* 30 characters should be more than enough */
8896 
8897 	if (!d_percpu)
8898 		return;
8899 
8900 	snprintf(cpu_dir, 30, "cpu%ld", cpu);
8901 	d_cpu = tracefs_create_dir(cpu_dir, d_percpu);
8902 	if (!d_cpu) {
8903 		pr_warn("Could not create tracefs '%s' entry\n", cpu_dir);
8904 		return;
8905 	}
8906 
8907 	/* per cpu trace_pipe */
8908 	trace_create_cpu_file("trace_pipe", TRACE_MODE_READ, d_cpu,
8909 				tr, cpu, &tracing_pipe_fops);
8910 
8911 	/* per cpu trace */
8912 	trace_create_cpu_file("trace", TRACE_MODE_WRITE, d_cpu,
8913 				tr, cpu, &tracing_fops);
8914 
8915 	trace_create_cpu_file("trace_pipe_raw", TRACE_MODE_READ, d_cpu,
8916 				tr, cpu, &tracing_buffers_fops);
8917 
8918 	trace_create_cpu_file("stats", TRACE_MODE_READ, d_cpu,
8919 				tr, cpu, &tracing_stats_fops);
8920 
8921 	trace_create_cpu_file("buffer_size_kb", TRACE_MODE_READ, d_cpu,
8922 				tr, cpu, &tracing_entries_fops);
8923 
8924 	if (tr->range_addr_start)
8925 		trace_create_cpu_file("buffer_meta", TRACE_MODE_READ, d_cpu,
8926 				      tr, cpu, &tracing_buffer_meta_fops);
8927 #ifdef CONFIG_TRACER_SNAPSHOT
8928 	if (!tr->range_addr_start) {
8929 		trace_create_cpu_file("snapshot", TRACE_MODE_WRITE, d_cpu,
8930 				      tr, cpu, &snapshot_fops);
8931 
8932 		trace_create_cpu_file("snapshot_raw", TRACE_MODE_READ, d_cpu,
8933 				      tr, cpu, &snapshot_raw_fops);
8934 	}
8935 #endif
8936 }
8937 
8938 #ifdef CONFIG_FTRACE_SELFTEST
8939 /* Let selftest have access to static functions in this file */
8940 #include "trace_selftest.c"
8941 #endif
8942 
8943 static ssize_t
8944 trace_options_read(struct file *filp, char __user *ubuf, size_t cnt,
8945 			loff_t *ppos)
8946 {
8947 	struct trace_option_dentry *topt = filp->private_data;
8948 	char *buf;
8949 
8950 	if (topt->flags->val & topt->opt->bit)
8951 		buf = "1\n";
8952 	else
8953 		buf = "0\n";
8954 
8955 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
8956 }
8957 
8958 static ssize_t
8959 trace_options_write(struct file *filp, const char __user *ubuf, size_t cnt,
8960 			 loff_t *ppos)
8961 {
8962 	struct trace_option_dentry *topt = filp->private_data;
8963 	unsigned long val;
8964 	int ret;
8965 
8966 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
8967 	if (ret)
8968 		return ret;
8969 
8970 	if (val != 0 && val != 1)
8971 		return -EINVAL;
8972 
8973 	if (!!(topt->flags->val & topt->opt->bit) != val) {
8974 		mutex_lock(&trace_types_lock);
8975 		ret = __set_tracer_option(topt->tr, topt->flags,
8976 					  topt->opt, !val);
8977 		mutex_unlock(&trace_types_lock);
8978 		if (ret)
8979 			return ret;
8980 	}
8981 
8982 	*ppos += cnt;
8983 
8984 	return cnt;
8985 }
8986 
8987 static int tracing_open_options(struct inode *inode, struct file *filp)
8988 {
8989 	struct trace_option_dentry *topt = inode->i_private;
8990 	int ret;
8991 
8992 	ret = tracing_check_open_get_tr(topt->tr);
8993 	if (ret)
8994 		return ret;
8995 
8996 	filp->private_data = inode->i_private;
8997 	return 0;
8998 }
8999 
9000 static int tracing_release_options(struct inode *inode, struct file *file)
9001 {
9002 	struct trace_option_dentry *topt = file->private_data;
9003 
9004 	trace_array_put(topt->tr);
9005 	return 0;
9006 }
9007 
9008 static const struct file_operations trace_options_fops = {
9009 	.open = tracing_open_options,
9010 	.read = trace_options_read,
9011 	.write = trace_options_write,
9012 	.llseek	= generic_file_llseek,
9013 	.release = tracing_release_options,
9014 };
9015 
9016 /*
9017  * In order to pass in both the trace_array descriptor as well as the index
9018  * to the flag that the trace option file represents, the trace_array
9019  * has a character array of trace_flags_index[], which holds the index
9020  * of the bit for the flag it represents. index[0] == 0, index[1] == 1, etc.
9021  * The address of this character array is passed to the flag option file
9022  * read/write callbacks.
9023  *
9024  * In order to extract both the index and the trace_array descriptor,
9025  * get_tr_index() uses the following algorithm.
9026  *
9027  *   idx = *ptr;
9028  *
9029  * As the pointer itself contains the address of the index (remember
9030  * index[1] == 1).
9031  *
9032  * Then to get the trace_array descriptor, by subtracting that index
9033  * from the ptr, we get to the start of the index itself.
9034  *
9035  *   ptr - idx == &index[0]
9036  *
9037  * Then a simple container_of() from that pointer gets us to the
9038  * trace_array descriptor.
9039  */
9040 static void get_tr_index(void *data, struct trace_array **ptr,
9041 			 unsigned int *pindex)
9042 {
9043 	*pindex = *(unsigned char *)data;
9044 
9045 	*ptr = container_of(data - *pindex, struct trace_array,
9046 			    trace_flags_index);
9047 }
9048 
9049 static ssize_t
9050 trace_options_core_read(struct file *filp, char __user *ubuf, size_t cnt,
9051 			loff_t *ppos)
9052 {
9053 	void *tr_index = filp->private_data;
9054 	struct trace_array *tr;
9055 	unsigned int index;
9056 	char *buf;
9057 
9058 	get_tr_index(tr_index, &tr, &index);
9059 
9060 	if (tr->trace_flags & (1 << index))
9061 		buf = "1\n";
9062 	else
9063 		buf = "0\n";
9064 
9065 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
9066 }
9067 
9068 static ssize_t
9069 trace_options_core_write(struct file *filp, const char __user *ubuf, size_t cnt,
9070 			 loff_t *ppos)
9071 {
9072 	void *tr_index = filp->private_data;
9073 	struct trace_array *tr;
9074 	unsigned int index;
9075 	unsigned long val;
9076 	int ret;
9077 
9078 	get_tr_index(tr_index, &tr, &index);
9079 
9080 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
9081 	if (ret)
9082 		return ret;
9083 
9084 	if (val != 0 && val != 1)
9085 		return -EINVAL;
9086 
9087 	mutex_lock(&event_mutex);
9088 	mutex_lock(&trace_types_lock);
9089 	ret = set_tracer_flag(tr, 1 << index, val);
9090 	mutex_unlock(&trace_types_lock);
9091 	mutex_unlock(&event_mutex);
9092 
9093 	if (ret < 0)
9094 		return ret;
9095 
9096 	*ppos += cnt;
9097 
9098 	return cnt;
9099 }
9100 
9101 static const struct file_operations trace_options_core_fops = {
9102 	.open = tracing_open_generic,
9103 	.read = trace_options_core_read,
9104 	.write = trace_options_core_write,
9105 	.llseek = generic_file_llseek,
9106 };
9107 
9108 struct dentry *trace_create_file(const char *name,
9109 				 umode_t mode,
9110 				 struct dentry *parent,
9111 				 void *data,
9112 				 const struct file_operations *fops)
9113 {
9114 	struct dentry *ret;
9115 
9116 	ret = tracefs_create_file(name, mode, parent, data, fops);
9117 	if (!ret)
9118 		pr_warn("Could not create tracefs '%s' entry\n", name);
9119 
9120 	return ret;
9121 }
9122 
9123 
9124 static struct dentry *trace_options_init_dentry(struct trace_array *tr)
9125 {
9126 	struct dentry *d_tracer;
9127 
9128 	if (tr->options)
9129 		return tr->options;
9130 
9131 	d_tracer = tracing_get_dentry(tr);
9132 	if (IS_ERR(d_tracer))
9133 		return NULL;
9134 
9135 	tr->options = tracefs_create_dir("options", d_tracer);
9136 	if (!tr->options) {
9137 		pr_warn("Could not create tracefs directory 'options'\n");
9138 		return NULL;
9139 	}
9140 
9141 	return tr->options;
9142 }
9143 
9144 static void
9145 create_trace_option_file(struct trace_array *tr,
9146 			 struct trace_option_dentry *topt,
9147 			 struct tracer_flags *flags,
9148 			 struct tracer_opt *opt)
9149 {
9150 	struct dentry *t_options;
9151 
9152 	t_options = trace_options_init_dentry(tr);
9153 	if (!t_options)
9154 		return;
9155 
9156 	topt->flags = flags;
9157 	topt->opt = opt;
9158 	topt->tr = tr;
9159 
9160 	topt->entry = trace_create_file(opt->name, TRACE_MODE_WRITE,
9161 					t_options, topt, &trace_options_fops);
9162 
9163 }
9164 
9165 static void
9166 create_trace_option_files(struct trace_array *tr, struct tracer *tracer)
9167 {
9168 	struct trace_option_dentry *topts;
9169 	struct trace_options *tr_topts;
9170 	struct tracer_flags *flags;
9171 	struct tracer_opt *opts;
9172 	int cnt;
9173 	int i;
9174 
9175 	if (!tracer)
9176 		return;
9177 
9178 	flags = tracer->flags;
9179 
9180 	if (!flags || !flags->opts)
9181 		return;
9182 
9183 	/*
9184 	 * If this is an instance, only create flags for tracers
9185 	 * the instance may have.
9186 	 */
9187 	if (!trace_ok_for_array(tracer, tr))
9188 		return;
9189 
9190 	for (i = 0; i < tr->nr_topts; i++) {
9191 		/* Make sure there's no duplicate flags. */
9192 		if (WARN_ON_ONCE(tr->topts[i].tracer->flags == tracer->flags))
9193 			return;
9194 	}
9195 
9196 	opts = flags->opts;
9197 
9198 	for (cnt = 0; opts[cnt].name; cnt++)
9199 		;
9200 
9201 	topts = kcalloc(cnt + 1, sizeof(*topts), GFP_KERNEL);
9202 	if (!topts)
9203 		return;
9204 
9205 	tr_topts = krealloc(tr->topts, sizeof(*tr->topts) * (tr->nr_topts + 1),
9206 			    GFP_KERNEL);
9207 	if (!tr_topts) {
9208 		kfree(topts);
9209 		return;
9210 	}
9211 
9212 	tr->topts = tr_topts;
9213 	tr->topts[tr->nr_topts].tracer = tracer;
9214 	tr->topts[tr->nr_topts].topts = topts;
9215 	tr->nr_topts++;
9216 
9217 	for (cnt = 0; opts[cnt].name; cnt++) {
9218 		create_trace_option_file(tr, &topts[cnt], flags,
9219 					 &opts[cnt]);
9220 		MEM_FAIL(topts[cnt].entry == NULL,
9221 			  "Failed to create trace option: %s",
9222 			  opts[cnt].name);
9223 	}
9224 }
9225 
9226 static struct dentry *
9227 create_trace_option_core_file(struct trace_array *tr,
9228 			      const char *option, long index)
9229 {
9230 	struct dentry *t_options;
9231 
9232 	t_options = trace_options_init_dentry(tr);
9233 	if (!t_options)
9234 		return NULL;
9235 
9236 	return trace_create_file(option, TRACE_MODE_WRITE, t_options,
9237 				 (void *)&tr->trace_flags_index[index],
9238 				 &trace_options_core_fops);
9239 }
9240 
9241 static void create_trace_options_dir(struct trace_array *tr)
9242 {
9243 	struct dentry *t_options;
9244 	bool top_level = tr == &global_trace;
9245 	int i;
9246 
9247 	t_options = trace_options_init_dentry(tr);
9248 	if (!t_options)
9249 		return;
9250 
9251 	for (i = 0; trace_options[i]; i++) {
9252 		if (top_level ||
9253 		    !((1 << i) & TOP_LEVEL_TRACE_FLAGS))
9254 			create_trace_option_core_file(tr, trace_options[i], i);
9255 	}
9256 }
9257 
9258 static ssize_t
9259 rb_simple_read(struct file *filp, char __user *ubuf,
9260 	       size_t cnt, loff_t *ppos)
9261 {
9262 	struct trace_array *tr = filp->private_data;
9263 	char buf[64];
9264 	int r;
9265 
9266 	r = tracer_tracing_is_on(tr);
9267 	r = sprintf(buf, "%d\n", r);
9268 
9269 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
9270 }
9271 
9272 static ssize_t
9273 rb_simple_write(struct file *filp, const char __user *ubuf,
9274 		size_t cnt, loff_t *ppos)
9275 {
9276 	struct trace_array *tr = filp->private_data;
9277 	struct trace_buffer *buffer = tr->array_buffer.buffer;
9278 	unsigned long val;
9279 	int ret;
9280 
9281 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
9282 	if (ret)
9283 		return ret;
9284 
9285 	if (buffer) {
9286 		mutex_lock(&trace_types_lock);
9287 		if (!!val == tracer_tracing_is_on(tr)) {
9288 			val = 0; /* do nothing */
9289 		} else if (val) {
9290 			tracer_tracing_on(tr);
9291 			if (tr->current_trace->start)
9292 				tr->current_trace->start(tr);
9293 		} else {
9294 			tracer_tracing_off(tr);
9295 			if (tr->current_trace->stop)
9296 				tr->current_trace->stop(tr);
9297 			/* Wake up any waiters */
9298 			ring_buffer_wake_waiters(buffer, RING_BUFFER_ALL_CPUS);
9299 		}
9300 		mutex_unlock(&trace_types_lock);
9301 	}
9302 
9303 	(*ppos)++;
9304 
9305 	return cnt;
9306 }
9307 
9308 static const struct file_operations rb_simple_fops = {
9309 	.open		= tracing_open_generic_tr,
9310 	.read		= rb_simple_read,
9311 	.write		= rb_simple_write,
9312 	.release	= tracing_release_generic_tr,
9313 	.llseek		= default_llseek,
9314 };
9315 
9316 static ssize_t
9317 buffer_percent_read(struct file *filp, char __user *ubuf,
9318 		    size_t cnt, loff_t *ppos)
9319 {
9320 	struct trace_array *tr = filp->private_data;
9321 	char buf[64];
9322 	int r;
9323 
9324 	r = tr->buffer_percent;
9325 	r = sprintf(buf, "%d\n", r);
9326 
9327 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
9328 }
9329 
9330 static ssize_t
9331 buffer_percent_write(struct file *filp, const char __user *ubuf,
9332 		     size_t cnt, loff_t *ppos)
9333 {
9334 	struct trace_array *tr = filp->private_data;
9335 	unsigned long val;
9336 	int ret;
9337 
9338 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
9339 	if (ret)
9340 		return ret;
9341 
9342 	if (val > 100)
9343 		return -EINVAL;
9344 
9345 	tr->buffer_percent = val;
9346 
9347 	(*ppos)++;
9348 
9349 	return cnt;
9350 }
9351 
9352 static const struct file_operations buffer_percent_fops = {
9353 	.open		= tracing_open_generic_tr,
9354 	.read		= buffer_percent_read,
9355 	.write		= buffer_percent_write,
9356 	.release	= tracing_release_generic_tr,
9357 	.llseek		= default_llseek,
9358 };
9359 
9360 static ssize_t
9361 buffer_subbuf_size_read(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos)
9362 {
9363 	struct trace_array *tr = filp->private_data;
9364 	size_t size;
9365 	char buf[64];
9366 	int order;
9367 	int r;
9368 
9369 	order = ring_buffer_subbuf_order_get(tr->array_buffer.buffer);
9370 	size = (PAGE_SIZE << order) / 1024;
9371 
9372 	r = sprintf(buf, "%zd\n", size);
9373 
9374 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
9375 }
9376 
9377 static ssize_t
9378 buffer_subbuf_size_write(struct file *filp, const char __user *ubuf,
9379 			 size_t cnt, loff_t *ppos)
9380 {
9381 	struct trace_array *tr = filp->private_data;
9382 	unsigned long val;
9383 	int old_order;
9384 	int order;
9385 	int pages;
9386 	int ret;
9387 
9388 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
9389 	if (ret)
9390 		return ret;
9391 
9392 	val *= 1024; /* value passed in is in KB */
9393 
9394 	pages = DIV_ROUND_UP(val, PAGE_SIZE);
9395 	order = fls(pages - 1);
9396 
9397 	/* limit between 1 and 128 system pages */
9398 	if (order < 0 || order > 7)
9399 		return -EINVAL;
9400 
9401 	/* Do not allow tracing while changing the order of the ring buffer */
9402 	tracing_stop_tr(tr);
9403 
9404 	old_order = ring_buffer_subbuf_order_get(tr->array_buffer.buffer);
9405 	if (old_order == order)
9406 		goto out;
9407 
9408 	ret = ring_buffer_subbuf_order_set(tr->array_buffer.buffer, order);
9409 	if (ret)
9410 		goto out;
9411 
9412 #ifdef CONFIG_TRACER_MAX_TRACE
9413 
9414 	if (!tr->allocated_snapshot)
9415 		goto out_max;
9416 
9417 	ret = ring_buffer_subbuf_order_set(tr->max_buffer.buffer, order);
9418 	if (ret) {
9419 		/* Put back the old order */
9420 		cnt = ring_buffer_subbuf_order_set(tr->array_buffer.buffer, old_order);
9421 		if (WARN_ON_ONCE(cnt)) {
9422 			/*
9423 			 * AARGH! We are left with different orders!
9424 			 * The max buffer is our "snapshot" buffer.
9425 			 * When a tracer needs a snapshot (one of the
9426 			 * latency tracers), it swaps the max buffer
9427 			 * with the saved snap shot. We succeeded to
9428 			 * update the order of the main buffer, but failed to
9429 			 * update the order of the max buffer. But when we tried
9430 			 * to reset the main buffer to the original size, we
9431 			 * failed there too. This is very unlikely to
9432 			 * happen, but if it does, warn and kill all
9433 			 * tracing.
9434 			 */
9435 			tracing_disabled = 1;
9436 		}
9437 		goto out;
9438 	}
9439  out_max:
9440 #endif
9441 	(*ppos)++;
9442  out:
9443 	if (ret)
9444 		cnt = ret;
9445 	tracing_start_tr(tr);
9446 	return cnt;
9447 }
9448 
9449 static const struct file_operations buffer_subbuf_size_fops = {
9450 	.open		= tracing_open_generic_tr,
9451 	.read		= buffer_subbuf_size_read,
9452 	.write		= buffer_subbuf_size_write,
9453 	.release	= tracing_release_generic_tr,
9454 	.llseek		= default_llseek,
9455 };
9456 
9457 static struct dentry *trace_instance_dir;
9458 
9459 static void
9460 init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer);
9461 
9462 #ifdef CONFIG_MODULES
9463 static int make_mod_delta(struct module *mod, void *data)
9464 {
9465 	struct trace_module_delta *module_delta;
9466 	struct trace_scratch *tscratch;
9467 	struct trace_mod_entry *entry;
9468 	struct trace_array *tr = data;
9469 	int i;
9470 
9471 	tscratch = tr->scratch;
9472 	module_delta = READ_ONCE(tr->module_delta);
9473 	for (i = 0; i < tscratch->nr_entries; i++) {
9474 		entry = &tscratch->entries[i];
9475 		if (strcmp(mod->name, entry->mod_name))
9476 			continue;
9477 		if (mod->state == MODULE_STATE_GOING)
9478 			module_delta->delta[i] = 0;
9479 		else
9480 			module_delta->delta[i] = (unsigned long)mod->mem[MOD_TEXT].base
9481 						 - entry->mod_addr;
9482 		break;
9483 	}
9484 	return 0;
9485 }
9486 #else
9487 static int make_mod_delta(struct module *mod, void *data)
9488 {
9489 	return 0;
9490 }
9491 #endif
9492 
9493 static int mod_addr_comp(const void *a, const void *b, const void *data)
9494 {
9495 	const struct trace_mod_entry *e1 = a;
9496 	const struct trace_mod_entry *e2 = b;
9497 
9498 	return e1->mod_addr > e2->mod_addr ? 1 : -1;
9499 }
9500 
9501 static void setup_trace_scratch(struct trace_array *tr,
9502 				struct trace_scratch *tscratch, unsigned int size)
9503 {
9504 	struct trace_module_delta *module_delta;
9505 	struct trace_mod_entry *entry;
9506 	int i, nr_entries;
9507 
9508 	if (!tscratch)
9509 		return;
9510 
9511 	tr->scratch = tscratch;
9512 	tr->scratch_size = size;
9513 
9514 	if (tscratch->text_addr)
9515 		tr->text_delta = (unsigned long)_text - tscratch->text_addr;
9516 
9517 	if (struct_size(tscratch, entries, tscratch->nr_entries) > size)
9518 		goto reset;
9519 
9520 	/* Check if each module name is a valid string */
9521 	for (i = 0; i < tscratch->nr_entries; i++) {
9522 		int n;
9523 
9524 		entry = &tscratch->entries[i];
9525 
9526 		for (n = 0; n < MODULE_NAME_LEN; n++) {
9527 			if (entry->mod_name[n] == '\0')
9528 				break;
9529 			if (!isprint(entry->mod_name[n]))
9530 				goto reset;
9531 		}
9532 		if (n == MODULE_NAME_LEN)
9533 			goto reset;
9534 	}
9535 
9536 	/* Sort the entries so that we can find appropriate module from address. */
9537 	nr_entries = tscratch->nr_entries;
9538 	sort_r(tscratch->entries, nr_entries, sizeof(struct trace_mod_entry),
9539 	       mod_addr_comp, NULL, NULL);
9540 
9541 	if (IS_ENABLED(CONFIG_MODULES)) {
9542 		module_delta = kzalloc(struct_size(module_delta, delta, nr_entries), GFP_KERNEL);
9543 		if (!module_delta) {
9544 			pr_info("module_delta allocation failed. Not able to decode module address.");
9545 			goto reset;
9546 		}
9547 		init_rcu_head(&module_delta->rcu);
9548 	} else
9549 		module_delta = NULL;
9550 	WRITE_ONCE(tr->module_delta, module_delta);
9551 
9552 	/* Scan modules to make text delta for modules. */
9553 	module_for_each_mod(make_mod_delta, tr);
9554 	return;
9555  reset:
9556 	/* Invalid trace modules */
9557 	memset(tscratch, 0, size);
9558 }
9559 
9560 static int
9561 allocate_trace_buffer(struct trace_array *tr, struct array_buffer *buf, int size)
9562 {
9563 	enum ring_buffer_flags rb_flags;
9564 	struct trace_scratch *tscratch;
9565 	unsigned int scratch_size = 0;
9566 
9567 	rb_flags = tr->trace_flags & TRACE_ITER_OVERWRITE ? RB_FL_OVERWRITE : 0;
9568 
9569 	buf->tr = tr;
9570 
9571 	if (tr->range_addr_start && tr->range_addr_size) {
9572 		/* Add scratch buffer to handle 128 modules */
9573 		buf->buffer = ring_buffer_alloc_range(size, rb_flags, 0,
9574 						      tr->range_addr_start,
9575 						      tr->range_addr_size,
9576 						      struct_size(tscratch, entries, 128));
9577 
9578 		tscratch = ring_buffer_meta_scratch(buf->buffer, &scratch_size);
9579 		setup_trace_scratch(tr, tscratch, scratch_size);
9580 
9581 		/*
9582 		 * This is basically the same as a mapped buffer,
9583 		 * with the same restrictions.
9584 		 */
9585 		tr->mapped++;
9586 	} else {
9587 		buf->buffer = ring_buffer_alloc(size, rb_flags);
9588 	}
9589 	if (!buf->buffer)
9590 		return -ENOMEM;
9591 
9592 	buf->data = alloc_percpu(struct trace_array_cpu);
9593 	if (!buf->data) {
9594 		ring_buffer_free(buf->buffer);
9595 		buf->buffer = NULL;
9596 		return -ENOMEM;
9597 	}
9598 
9599 	/* Allocate the first page for all buffers */
9600 	set_buffer_entries(&tr->array_buffer,
9601 			   ring_buffer_size(tr->array_buffer.buffer, 0));
9602 
9603 	return 0;
9604 }
9605 
9606 static void free_trace_buffer(struct array_buffer *buf)
9607 {
9608 	if (buf->buffer) {
9609 		ring_buffer_free(buf->buffer);
9610 		buf->buffer = NULL;
9611 		free_percpu(buf->data);
9612 		buf->data = NULL;
9613 	}
9614 }
9615 
9616 static int allocate_trace_buffers(struct trace_array *tr, int size)
9617 {
9618 	int ret;
9619 
9620 	ret = allocate_trace_buffer(tr, &tr->array_buffer, size);
9621 	if (ret)
9622 		return ret;
9623 
9624 #ifdef CONFIG_TRACER_MAX_TRACE
9625 	/* Fix mapped buffer trace arrays do not have snapshot buffers */
9626 	if (tr->range_addr_start)
9627 		return 0;
9628 
9629 	ret = allocate_trace_buffer(tr, &tr->max_buffer,
9630 				    allocate_snapshot ? size : 1);
9631 	if (MEM_FAIL(ret, "Failed to allocate trace buffer\n")) {
9632 		free_trace_buffer(&tr->array_buffer);
9633 		return -ENOMEM;
9634 	}
9635 	tr->allocated_snapshot = allocate_snapshot;
9636 
9637 	allocate_snapshot = false;
9638 #endif
9639 
9640 	return 0;
9641 }
9642 
9643 static void free_trace_buffers(struct trace_array *tr)
9644 {
9645 	if (!tr)
9646 		return;
9647 
9648 	free_trace_buffer(&tr->array_buffer);
9649 	kfree(tr->module_delta);
9650 
9651 #ifdef CONFIG_TRACER_MAX_TRACE
9652 	free_trace_buffer(&tr->max_buffer);
9653 #endif
9654 }
9655 
9656 static void init_trace_flags_index(struct trace_array *tr)
9657 {
9658 	int i;
9659 
9660 	/* Used by the trace options files */
9661 	for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++)
9662 		tr->trace_flags_index[i] = i;
9663 }
9664 
9665 static void __update_tracer_options(struct trace_array *tr)
9666 {
9667 	struct tracer *t;
9668 
9669 	for (t = trace_types; t; t = t->next)
9670 		add_tracer_options(tr, t);
9671 }
9672 
9673 static void update_tracer_options(struct trace_array *tr)
9674 {
9675 	mutex_lock(&trace_types_lock);
9676 	tracer_options_updated = true;
9677 	__update_tracer_options(tr);
9678 	mutex_unlock(&trace_types_lock);
9679 }
9680 
9681 /* Must have trace_types_lock held */
9682 struct trace_array *trace_array_find(const char *instance)
9683 {
9684 	struct trace_array *tr, *found = NULL;
9685 
9686 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
9687 		if (tr->name && strcmp(tr->name, instance) == 0) {
9688 			found = tr;
9689 			break;
9690 		}
9691 	}
9692 
9693 	return found;
9694 }
9695 
9696 struct trace_array *trace_array_find_get(const char *instance)
9697 {
9698 	struct trace_array *tr;
9699 
9700 	mutex_lock(&trace_types_lock);
9701 	tr = trace_array_find(instance);
9702 	if (tr)
9703 		tr->ref++;
9704 	mutex_unlock(&trace_types_lock);
9705 
9706 	return tr;
9707 }
9708 
9709 static int trace_array_create_dir(struct trace_array *tr)
9710 {
9711 	int ret;
9712 
9713 	tr->dir = tracefs_create_dir(tr->name, trace_instance_dir);
9714 	if (!tr->dir)
9715 		return -EINVAL;
9716 
9717 	ret = event_trace_add_tracer(tr->dir, tr);
9718 	if (ret) {
9719 		tracefs_remove(tr->dir);
9720 		return ret;
9721 	}
9722 
9723 	init_tracer_tracefs(tr, tr->dir);
9724 	__update_tracer_options(tr);
9725 
9726 	return ret;
9727 }
9728 
9729 static struct trace_array *
9730 trace_array_create_systems(const char *name, const char *systems,
9731 			   unsigned long range_addr_start,
9732 			   unsigned long range_addr_size)
9733 {
9734 	struct trace_array *tr;
9735 	int ret;
9736 
9737 	ret = -ENOMEM;
9738 	tr = kzalloc(sizeof(*tr), GFP_KERNEL);
9739 	if (!tr)
9740 		return ERR_PTR(ret);
9741 
9742 	tr->name = kstrdup(name, GFP_KERNEL);
9743 	if (!tr->name)
9744 		goto out_free_tr;
9745 
9746 	if (!alloc_cpumask_var(&tr->tracing_cpumask, GFP_KERNEL))
9747 		goto out_free_tr;
9748 
9749 	if (!zalloc_cpumask_var(&tr->pipe_cpumask, GFP_KERNEL))
9750 		goto out_free_tr;
9751 
9752 	if (systems) {
9753 		tr->system_names = kstrdup_const(systems, GFP_KERNEL);
9754 		if (!tr->system_names)
9755 			goto out_free_tr;
9756 	}
9757 
9758 	/* Only for boot up memory mapped ring buffers */
9759 	tr->range_addr_start = range_addr_start;
9760 	tr->range_addr_size = range_addr_size;
9761 
9762 	tr->trace_flags = global_trace.trace_flags & ~ZEROED_TRACE_FLAGS;
9763 
9764 	cpumask_copy(tr->tracing_cpumask, cpu_all_mask);
9765 
9766 	raw_spin_lock_init(&tr->start_lock);
9767 
9768 	tr->max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
9769 #ifdef CONFIG_TRACER_MAX_TRACE
9770 	spin_lock_init(&tr->snapshot_trigger_lock);
9771 #endif
9772 	tr->current_trace = &nop_trace;
9773 
9774 	INIT_LIST_HEAD(&tr->systems);
9775 	INIT_LIST_HEAD(&tr->events);
9776 	INIT_LIST_HEAD(&tr->hist_vars);
9777 	INIT_LIST_HEAD(&tr->err_log);
9778 
9779 #ifdef CONFIG_MODULES
9780 	INIT_LIST_HEAD(&tr->mod_events);
9781 #endif
9782 
9783 	if (allocate_trace_buffers(tr, trace_buf_size) < 0)
9784 		goto out_free_tr;
9785 
9786 	/* The ring buffer is defaultly expanded */
9787 	trace_set_ring_buffer_expanded(tr);
9788 
9789 	if (ftrace_allocate_ftrace_ops(tr) < 0)
9790 		goto out_free_tr;
9791 
9792 	ftrace_init_trace_array(tr);
9793 
9794 	init_trace_flags_index(tr);
9795 
9796 	if (trace_instance_dir) {
9797 		ret = trace_array_create_dir(tr);
9798 		if (ret)
9799 			goto out_free_tr;
9800 	} else
9801 		__trace_early_add_events(tr);
9802 
9803 	list_add(&tr->list, &ftrace_trace_arrays);
9804 
9805 	tr->ref++;
9806 
9807 	return tr;
9808 
9809  out_free_tr:
9810 	ftrace_free_ftrace_ops(tr);
9811 	free_trace_buffers(tr);
9812 	free_cpumask_var(tr->pipe_cpumask);
9813 	free_cpumask_var(tr->tracing_cpumask);
9814 	kfree_const(tr->system_names);
9815 	kfree(tr->range_name);
9816 	kfree(tr->name);
9817 	kfree(tr);
9818 
9819 	return ERR_PTR(ret);
9820 }
9821 
9822 static struct trace_array *trace_array_create(const char *name)
9823 {
9824 	return trace_array_create_systems(name, NULL, 0, 0);
9825 }
9826 
9827 static int instance_mkdir(const char *name)
9828 {
9829 	struct trace_array *tr;
9830 	int ret;
9831 
9832 	guard(mutex)(&event_mutex);
9833 	guard(mutex)(&trace_types_lock);
9834 
9835 	ret = -EEXIST;
9836 	if (trace_array_find(name))
9837 		return -EEXIST;
9838 
9839 	tr = trace_array_create(name);
9840 
9841 	ret = PTR_ERR_OR_ZERO(tr);
9842 
9843 	return ret;
9844 }
9845 
9846 #ifdef CONFIG_MMU
9847 static u64 map_pages(unsigned long start, unsigned long size)
9848 {
9849 	unsigned long vmap_start, vmap_end;
9850 	struct vm_struct *area;
9851 	int ret;
9852 
9853 	area = get_vm_area(size, VM_IOREMAP);
9854 	if (!area)
9855 		return 0;
9856 
9857 	vmap_start = (unsigned long) area->addr;
9858 	vmap_end = vmap_start + size;
9859 
9860 	ret = vmap_page_range(vmap_start, vmap_end,
9861 			      start, pgprot_nx(PAGE_KERNEL));
9862 	if (ret < 0) {
9863 		free_vm_area(area);
9864 		return 0;
9865 	}
9866 
9867 	return (u64)vmap_start;
9868 }
9869 #else
9870 static inline u64 map_pages(unsigned long start, unsigned long size)
9871 {
9872 	return 0;
9873 }
9874 #endif
9875 
9876 /**
9877  * trace_array_get_by_name - Create/Lookup a trace array, given its name.
9878  * @name: The name of the trace array to be looked up/created.
9879  * @systems: A list of systems to create event directories for (NULL for all)
9880  *
9881  * Returns pointer to trace array with given name.
9882  * NULL, if it cannot be created.
9883  *
9884  * NOTE: This function increments the reference counter associated with the
9885  * trace array returned. This makes sure it cannot be freed while in use.
9886  * Use trace_array_put() once the trace array is no longer needed.
9887  * If the trace_array is to be freed, trace_array_destroy() needs to
9888  * be called after the trace_array_put(), or simply let user space delete
9889  * it from the tracefs instances directory. But until the
9890  * trace_array_put() is called, user space can not delete it.
9891  *
9892  */
9893 struct trace_array *trace_array_get_by_name(const char *name, const char *systems)
9894 {
9895 	struct trace_array *tr;
9896 
9897 	guard(mutex)(&event_mutex);
9898 	guard(mutex)(&trace_types_lock);
9899 
9900 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
9901 		if (tr->name && strcmp(tr->name, name) == 0) {
9902 			tr->ref++;
9903 			return tr;
9904 		}
9905 	}
9906 
9907 	tr = trace_array_create_systems(name, systems, 0, 0);
9908 
9909 	if (IS_ERR(tr))
9910 		tr = NULL;
9911 	else
9912 		tr->ref++;
9913 
9914 	return tr;
9915 }
9916 EXPORT_SYMBOL_GPL(trace_array_get_by_name);
9917 
9918 static int __remove_instance(struct trace_array *tr)
9919 {
9920 	int i;
9921 
9922 	/* Reference counter for a newly created trace array = 1. */
9923 	if (tr->ref > 1 || (tr->current_trace && tr->trace_ref))
9924 		return -EBUSY;
9925 
9926 	list_del(&tr->list);
9927 
9928 	/* Disable all the flags that were enabled coming in */
9929 	for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++) {
9930 		if ((1 << i) & ZEROED_TRACE_FLAGS)
9931 			set_tracer_flag(tr, 1 << i, 0);
9932 	}
9933 
9934 	if (printk_trace == tr)
9935 		update_printk_trace(&global_trace);
9936 
9937 	tracing_set_nop(tr);
9938 	clear_ftrace_function_probes(tr);
9939 	event_trace_del_tracer(tr);
9940 	ftrace_clear_pids(tr);
9941 	ftrace_destroy_function_files(tr);
9942 	tracefs_remove(tr->dir);
9943 	free_percpu(tr->last_func_repeats);
9944 	free_trace_buffers(tr);
9945 	clear_tracing_err_log(tr);
9946 
9947 	if (tr->range_name) {
9948 		reserve_mem_release_by_name(tr->range_name);
9949 		kfree(tr->range_name);
9950 	}
9951 
9952 	for (i = 0; i < tr->nr_topts; i++) {
9953 		kfree(tr->topts[i].topts);
9954 	}
9955 	kfree(tr->topts);
9956 
9957 	free_cpumask_var(tr->pipe_cpumask);
9958 	free_cpumask_var(tr->tracing_cpumask);
9959 	kfree_const(tr->system_names);
9960 	kfree(tr->name);
9961 	kfree(tr);
9962 
9963 	return 0;
9964 }
9965 
9966 int trace_array_destroy(struct trace_array *this_tr)
9967 {
9968 	struct trace_array *tr;
9969 
9970 	if (!this_tr)
9971 		return -EINVAL;
9972 
9973 	guard(mutex)(&event_mutex);
9974 	guard(mutex)(&trace_types_lock);
9975 
9976 
9977 	/* Making sure trace array exists before destroying it. */
9978 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
9979 		if (tr == this_tr)
9980 			return __remove_instance(tr);
9981 	}
9982 
9983 	return -ENODEV;
9984 }
9985 EXPORT_SYMBOL_GPL(trace_array_destroy);
9986 
9987 static int instance_rmdir(const char *name)
9988 {
9989 	struct trace_array *tr;
9990 
9991 	guard(mutex)(&event_mutex);
9992 	guard(mutex)(&trace_types_lock);
9993 
9994 	tr = trace_array_find(name);
9995 	if (!tr)
9996 		return -ENODEV;
9997 
9998 	return __remove_instance(tr);
9999 }
10000 
10001 static __init void create_trace_instances(struct dentry *d_tracer)
10002 {
10003 	struct trace_array *tr;
10004 
10005 	trace_instance_dir = tracefs_create_instance_dir("instances", d_tracer,
10006 							 instance_mkdir,
10007 							 instance_rmdir);
10008 	if (MEM_FAIL(!trace_instance_dir, "Failed to create instances directory\n"))
10009 		return;
10010 
10011 	guard(mutex)(&event_mutex);
10012 	guard(mutex)(&trace_types_lock);
10013 
10014 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
10015 		if (!tr->name)
10016 			continue;
10017 		if (MEM_FAIL(trace_array_create_dir(tr) < 0,
10018 			     "Failed to create instance directory\n"))
10019 			return;
10020 	}
10021 }
10022 
10023 static void
10024 init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer)
10025 {
10026 	int cpu;
10027 
10028 	trace_create_file("available_tracers", TRACE_MODE_READ, d_tracer,
10029 			tr, &show_traces_fops);
10030 
10031 	trace_create_file("current_tracer", TRACE_MODE_WRITE, d_tracer,
10032 			tr, &set_tracer_fops);
10033 
10034 	trace_create_file("tracing_cpumask", TRACE_MODE_WRITE, d_tracer,
10035 			  tr, &tracing_cpumask_fops);
10036 
10037 	trace_create_file("trace_options", TRACE_MODE_WRITE, d_tracer,
10038 			  tr, &tracing_iter_fops);
10039 
10040 	trace_create_file("trace", TRACE_MODE_WRITE, d_tracer,
10041 			  tr, &tracing_fops);
10042 
10043 	trace_create_file("trace_pipe", TRACE_MODE_READ, d_tracer,
10044 			  tr, &tracing_pipe_fops);
10045 
10046 	trace_create_file("buffer_size_kb", TRACE_MODE_WRITE, d_tracer,
10047 			  tr, &tracing_entries_fops);
10048 
10049 	trace_create_file("buffer_total_size_kb", TRACE_MODE_READ, d_tracer,
10050 			  tr, &tracing_total_entries_fops);
10051 
10052 	trace_create_file("free_buffer", 0200, d_tracer,
10053 			  tr, &tracing_free_buffer_fops);
10054 
10055 	trace_create_file("trace_marker", 0220, d_tracer,
10056 			  tr, &tracing_mark_fops);
10057 
10058 	tr->trace_marker_file = __find_event_file(tr, "ftrace", "print");
10059 
10060 	trace_create_file("trace_marker_raw", 0220, d_tracer,
10061 			  tr, &tracing_mark_raw_fops);
10062 
10063 	trace_create_file("trace_clock", TRACE_MODE_WRITE, d_tracer, tr,
10064 			  &trace_clock_fops);
10065 
10066 	trace_create_file("tracing_on", TRACE_MODE_WRITE, d_tracer,
10067 			  tr, &rb_simple_fops);
10068 
10069 	trace_create_file("timestamp_mode", TRACE_MODE_READ, d_tracer, tr,
10070 			  &trace_time_stamp_mode_fops);
10071 
10072 	tr->buffer_percent = 50;
10073 
10074 	trace_create_file("buffer_percent", TRACE_MODE_WRITE, d_tracer,
10075 			tr, &buffer_percent_fops);
10076 
10077 	trace_create_file("buffer_subbuf_size_kb", TRACE_MODE_WRITE, d_tracer,
10078 			  tr, &buffer_subbuf_size_fops);
10079 
10080 	create_trace_options_dir(tr);
10081 
10082 #ifdef CONFIG_TRACER_MAX_TRACE
10083 	trace_create_maxlat_file(tr, d_tracer);
10084 #endif
10085 
10086 	if (ftrace_create_function_files(tr, d_tracer))
10087 		MEM_FAIL(1, "Could not allocate function filter files");
10088 
10089 	if (tr->range_addr_start) {
10090 		trace_create_file("last_boot_info", TRACE_MODE_READ, d_tracer,
10091 				  tr, &last_boot_fops);
10092 #ifdef CONFIG_TRACER_SNAPSHOT
10093 	} else {
10094 		trace_create_file("snapshot", TRACE_MODE_WRITE, d_tracer,
10095 				  tr, &snapshot_fops);
10096 #endif
10097 	}
10098 
10099 	trace_create_file("error_log", TRACE_MODE_WRITE, d_tracer,
10100 			  tr, &tracing_err_log_fops);
10101 
10102 	for_each_tracing_cpu(cpu)
10103 		tracing_init_tracefs_percpu(tr, cpu);
10104 
10105 	ftrace_init_tracefs(tr, d_tracer);
10106 }
10107 
10108 static struct vfsmount *trace_automount(struct dentry *mntpt, void *ingore)
10109 {
10110 	struct vfsmount *mnt;
10111 	struct file_system_type *type;
10112 
10113 	/*
10114 	 * To maintain backward compatibility for tools that mount
10115 	 * debugfs to get to the tracing facility, tracefs is automatically
10116 	 * mounted to the debugfs/tracing directory.
10117 	 */
10118 	type = get_fs_type("tracefs");
10119 	if (!type)
10120 		return NULL;
10121 	mnt = vfs_submount(mntpt, type, "tracefs", NULL);
10122 	put_filesystem(type);
10123 	if (IS_ERR(mnt))
10124 		return NULL;
10125 	mntget(mnt);
10126 
10127 	return mnt;
10128 }
10129 
10130 /**
10131  * tracing_init_dentry - initialize top level trace array
10132  *
10133  * This is called when creating files or directories in the tracing
10134  * directory. It is called via fs_initcall() by any of the boot up code
10135  * and expects to return the dentry of the top level tracing directory.
10136  */
10137 int tracing_init_dentry(void)
10138 {
10139 	struct trace_array *tr = &global_trace;
10140 
10141 	if (security_locked_down(LOCKDOWN_TRACEFS)) {
10142 		pr_warn("Tracing disabled due to lockdown\n");
10143 		return -EPERM;
10144 	}
10145 
10146 	/* The top level trace array uses  NULL as parent */
10147 	if (tr->dir)
10148 		return 0;
10149 
10150 	if (WARN_ON(!tracefs_initialized()))
10151 		return -ENODEV;
10152 
10153 	/*
10154 	 * As there may still be users that expect the tracing
10155 	 * files to exist in debugfs/tracing, we must automount
10156 	 * the tracefs file system there, so older tools still
10157 	 * work with the newer kernel.
10158 	 */
10159 	tr->dir = debugfs_create_automount("tracing", NULL,
10160 					   trace_automount, NULL);
10161 
10162 	return 0;
10163 }
10164 
10165 extern struct trace_eval_map *__start_ftrace_eval_maps[];
10166 extern struct trace_eval_map *__stop_ftrace_eval_maps[];
10167 
10168 static struct workqueue_struct *eval_map_wq __initdata;
10169 static struct work_struct eval_map_work __initdata;
10170 static struct work_struct tracerfs_init_work __initdata;
10171 
10172 static void __init eval_map_work_func(struct work_struct *work)
10173 {
10174 	int len;
10175 
10176 	len = __stop_ftrace_eval_maps - __start_ftrace_eval_maps;
10177 	trace_insert_eval_map(NULL, __start_ftrace_eval_maps, len);
10178 }
10179 
10180 static int __init trace_eval_init(void)
10181 {
10182 	INIT_WORK(&eval_map_work, eval_map_work_func);
10183 
10184 	eval_map_wq = alloc_workqueue("eval_map_wq", WQ_UNBOUND, 0);
10185 	if (!eval_map_wq) {
10186 		pr_err("Unable to allocate eval_map_wq\n");
10187 		/* Do work here */
10188 		eval_map_work_func(&eval_map_work);
10189 		return -ENOMEM;
10190 	}
10191 
10192 	queue_work(eval_map_wq, &eval_map_work);
10193 	return 0;
10194 }
10195 
10196 subsys_initcall(trace_eval_init);
10197 
10198 static int __init trace_eval_sync(void)
10199 {
10200 	/* Make sure the eval map updates are finished */
10201 	if (eval_map_wq)
10202 		destroy_workqueue(eval_map_wq);
10203 	return 0;
10204 }
10205 
10206 late_initcall_sync(trace_eval_sync);
10207 
10208 
10209 #ifdef CONFIG_MODULES
10210 
10211 bool module_exists(const char *module)
10212 {
10213 	/* All modules have the symbol __this_module */
10214 	static const char this_mod[] = "__this_module";
10215 	char modname[MAX_PARAM_PREFIX_LEN + sizeof(this_mod) + 2];
10216 	unsigned long val;
10217 	int n;
10218 
10219 	n = snprintf(modname, sizeof(modname), "%s:%s", module, this_mod);
10220 
10221 	if (n > sizeof(modname) - 1)
10222 		return false;
10223 
10224 	val = module_kallsyms_lookup_name(modname);
10225 	return val != 0;
10226 }
10227 
10228 static void trace_module_add_evals(struct module *mod)
10229 {
10230 	if (!mod->num_trace_evals)
10231 		return;
10232 
10233 	/*
10234 	 * Modules with bad taint do not have events created, do
10235 	 * not bother with enums either.
10236 	 */
10237 	if (trace_module_has_bad_taint(mod))
10238 		return;
10239 
10240 	trace_insert_eval_map(mod, mod->trace_evals, mod->num_trace_evals);
10241 }
10242 
10243 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
10244 static void trace_module_remove_evals(struct module *mod)
10245 {
10246 	union trace_eval_map_item *map;
10247 	union trace_eval_map_item **last = &trace_eval_maps;
10248 
10249 	if (!mod->num_trace_evals)
10250 		return;
10251 
10252 	guard(mutex)(&trace_eval_mutex);
10253 
10254 	map = trace_eval_maps;
10255 
10256 	while (map) {
10257 		if (map->head.mod == mod)
10258 			break;
10259 		map = trace_eval_jmp_to_tail(map);
10260 		last = &map->tail.next;
10261 		map = map->tail.next;
10262 	}
10263 	if (!map)
10264 		return;
10265 
10266 	*last = trace_eval_jmp_to_tail(map)->tail.next;
10267 	kfree(map);
10268 }
10269 #else
10270 static inline void trace_module_remove_evals(struct module *mod) { }
10271 #endif /* CONFIG_TRACE_EVAL_MAP_FILE */
10272 
10273 static void trace_module_record(struct module *mod, bool add)
10274 {
10275 	struct trace_array *tr;
10276 	unsigned long flags;
10277 
10278 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
10279 		flags = tr->flags & (TRACE_ARRAY_FL_BOOT | TRACE_ARRAY_FL_LAST_BOOT);
10280 		/* Update any persistent trace array that has already been started */
10281 		if (flags == TRACE_ARRAY_FL_BOOT && add) {
10282 			guard(mutex)(&scratch_mutex);
10283 			save_mod(mod, tr);
10284 		} else if (flags & TRACE_ARRAY_FL_LAST_BOOT) {
10285 			/* Update delta if the module loaded in previous boot */
10286 			make_mod_delta(mod, tr);
10287 		}
10288 	}
10289 }
10290 
10291 static int trace_module_notify(struct notifier_block *self,
10292 			       unsigned long val, void *data)
10293 {
10294 	struct module *mod = data;
10295 
10296 	switch (val) {
10297 	case MODULE_STATE_COMING:
10298 		trace_module_add_evals(mod);
10299 		trace_module_record(mod, true);
10300 		break;
10301 	case MODULE_STATE_GOING:
10302 		trace_module_remove_evals(mod);
10303 		trace_module_record(mod, false);
10304 		break;
10305 	}
10306 
10307 	return NOTIFY_OK;
10308 }
10309 
10310 static struct notifier_block trace_module_nb = {
10311 	.notifier_call = trace_module_notify,
10312 	.priority = 0,
10313 };
10314 #endif /* CONFIG_MODULES */
10315 
10316 static __init void tracer_init_tracefs_work_func(struct work_struct *work)
10317 {
10318 
10319 	event_trace_init();
10320 
10321 	init_tracer_tracefs(&global_trace, NULL);
10322 	ftrace_init_tracefs_toplevel(&global_trace, NULL);
10323 
10324 	trace_create_file("tracing_thresh", TRACE_MODE_WRITE, NULL,
10325 			&global_trace, &tracing_thresh_fops);
10326 
10327 	trace_create_file("README", TRACE_MODE_READ, NULL,
10328 			NULL, &tracing_readme_fops);
10329 
10330 	trace_create_file("saved_cmdlines", TRACE_MODE_READ, NULL,
10331 			NULL, &tracing_saved_cmdlines_fops);
10332 
10333 	trace_create_file("saved_cmdlines_size", TRACE_MODE_WRITE, NULL,
10334 			  NULL, &tracing_saved_cmdlines_size_fops);
10335 
10336 	trace_create_file("saved_tgids", TRACE_MODE_READ, NULL,
10337 			NULL, &tracing_saved_tgids_fops);
10338 
10339 	trace_create_eval_file(NULL);
10340 
10341 #ifdef CONFIG_MODULES
10342 	register_module_notifier(&trace_module_nb);
10343 #endif
10344 
10345 #ifdef CONFIG_DYNAMIC_FTRACE
10346 	trace_create_file("dyn_ftrace_total_info", TRACE_MODE_READ, NULL,
10347 			NULL, &tracing_dyn_info_fops);
10348 #endif
10349 
10350 	create_trace_instances(NULL);
10351 
10352 	update_tracer_options(&global_trace);
10353 }
10354 
10355 static __init int tracer_init_tracefs(void)
10356 {
10357 	int ret;
10358 
10359 	trace_access_lock_init();
10360 
10361 	ret = tracing_init_dentry();
10362 	if (ret)
10363 		return 0;
10364 
10365 	if (eval_map_wq) {
10366 		INIT_WORK(&tracerfs_init_work, tracer_init_tracefs_work_func);
10367 		queue_work(eval_map_wq, &tracerfs_init_work);
10368 	} else {
10369 		tracer_init_tracefs_work_func(NULL);
10370 	}
10371 
10372 	rv_init_interface();
10373 
10374 	return 0;
10375 }
10376 
10377 fs_initcall(tracer_init_tracefs);
10378 
10379 static int trace_die_panic_handler(struct notifier_block *self,
10380 				unsigned long ev, void *unused);
10381 
10382 static struct notifier_block trace_panic_notifier = {
10383 	.notifier_call = trace_die_panic_handler,
10384 	.priority = INT_MAX - 1,
10385 };
10386 
10387 static struct notifier_block trace_die_notifier = {
10388 	.notifier_call = trace_die_panic_handler,
10389 	.priority = INT_MAX - 1,
10390 };
10391 
10392 /*
10393  * The idea is to execute the following die/panic callback early, in order
10394  * to avoid showing irrelevant information in the trace (like other panic
10395  * notifier functions); we are the 2nd to run, after hung_task/rcu_stall
10396  * warnings get disabled (to prevent potential log flooding).
10397  */
10398 static int trace_die_panic_handler(struct notifier_block *self,
10399 				unsigned long ev, void *unused)
10400 {
10401 	if (!ftrace_dump_on_oops_enabled())
10402 		return NOTIFY_DONE;
10403 
10404 	/* The die notifier requires DIE_OOPS to trigger */
10405 	if (self == &trace_die_notifier && ev != DIE_OOPS)
10406 		return NOTIFY_DONE;
10407 
10408 	ftrace_dump(DUMP_PARAM);
10409 
10410 	return NOTIFY_DONE;
10411 }
10412 
10413 /*
10414  * printk is set to max of 1024, we really don't need it that big.
10415  * Nothing should be printing 1000 characters anyway.
10416  */
10417 #define TRACE_MAX_PRINT		1000
10418 
10419 /*
10420  * Define here KERN_TRACE so that we have one place to modify
10421  * it if we decide to change what log level the ftrace dump
10422  * should be at.
10423  */
10424 #define KERN_TRACE		KERN_EMERG
10425 
10426 void
10427 trace_printk_seq(struct trace_seq *s)
10428 {
10429 	/* Probably should print a warning here. */
10430 	if (s->seq.len >= TRACE_MAX_PRINT)
10431 		s->seq.len = TRACE_MAX_PRINT;
10432 
10433 	/*
10434 	 * More paranoid code. Although the buffer size is set to
10435 	 * PAGE_SIZE, and TRACE_MAX_PRINT is 1000, this is just
10436 	 * an extra layer of protection.
10437 	 */
10438 	if (WARN_ON_ONCE(s->seq.len >= s->seq.size))
10439 		s->seq.len = s->seq.size - 1;
10440 
10441 	/* should be zero ended, but we are paranoid. */
10442 	s->buffer[s->seq.len] = 0;
10443 
10444 	printk(KERN_TRACE "%s", s->buffer);
10445 
10446 	trace_seq_init(s);
10447 }
10448 
10449 static void trace_init_iter(struct trace_iterator *iter, struct trace_array *tr)
10450 {
10451 	iter->tr = tr;
10452 	iter->trace = iter->tr->current_trace;
10453 	iter->cpu_file = RING_BUFFER_ALL_CPUS;
10454 	iter->array_buffer = &tr->array_buffer;
10455 
10456 	if (iter->trace && iter->trace->open)
10457 		iter->trace->open(iter);
10458 
10459 	/* Annotate start of buffers if we had overruns */
10460 	if (ring_buffer_overruns(iter->array_buffer->buffer))
10461 		iter->iter_flags |= TRACE_FILE_ANNOTATE;
10462 
10463 	/* Output in nanoseconds only if we are using a clock in nanoseconds. */
10464 	if (trace_clocks[iter->tr->clock_id].in_ns)
10465 		iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
10466 
10467 	/* Can not use kmalloc for iter.temp and iter.fmt */
10468 	iter->temp = static_temp_buf;
10469 	iter->temp_size = STATIC_TEMP_BUF_SIZE;
10470 	iter->fmt = static_fmt_buf;
10471 	iter->fmt_size = STATIC_FMT_BUF_SIZE;
10472 }
10473 
10474 void trace_init_global_iter(struct trace_iterator *iter)
10475 {
10476 	trace_init_iter(iter, &global_trace);
10477 }
10478 
10479 static void ftrace_dump_one(struct trace_array *tr, enum ftrace_dump_mode dump_mode)
10480 {
10481 	/* use static because iter can be a bit big for the stack */
10482 	static struct trace_iterator iter;
10483 	unsigned int old_userobj;
10484 	unsigned long flags;
10485 	int cnt = 0, cpu;
10486 
10487 	/*
10488 	 * Always turn off tracing when we dump.
10489 	 * We don't need to show trace output of what happens
10490 	 * between multiple crashes.
10491 	 *
10492 	 * If the user does a sysrq-z, then they can re-enable
10493 	 * tracing with echo 1 > tracing_on.
10494 	 */
10495 	tracer_tracing_off(tr);
10496 
10497 	local_irq_save(flags);
10498 
10499 	/* Simulate the iterator */
10500 	trace_init_iter(&iter, tr);
10501 
10502 	for_each_tracing_cpu(cpu) {
10503 		atomic_inc(&per_cpu_ptr(iter.array_buffer->data, cpu)->disabled);
10504 	}
10505 
10506 	old_userobj = tr->trace_flags & TRACE_ITER_SYM_USEROBJ;
10507 
10508 	/* don't look at user memory in panic mode */
10509 	tr->trace_flags &= ~TRACE_ITER_SYM_USEROBJ;
10510 
10511 	if (dump_mode == DUMP_ORIG)
10512 		iter.cpu_file = raw_smp_processor_id();
10513 	else
10514 		iter.cpu_file = RING_BUFFER_ALL_CPUS;
10515 
10516 	if (tr == &global_trace)
10517 		printk(KERN_TRACE "Dumping ftrace buffer:\n");
10518 	else
10519 		printk(KERN_TRACE "Dumping ftrace instance %s buffer:\n", tr->name);
10520 
10521 	/* Did function tracer already get disabled? */
10522 	if (ftrace_is_dead()) {
10523 		printk("# WARNING: FUNCTION TRACING IS CORRUPTED\n");
10524 		printk("#          MAY BE MISSING FUNCTION EVENTS\n");
10525 	}
10526 
10527 	/*
10528 	 * We need to stop all tracing on all CPUS to read
10529 	 * the next buffer. This is a bit expensive, but is
10530 	 * not done often. We fill all what we can read,
10531 	 * and then release the locks again.
10532 	 */
10533 
10534 	while (!trace_empty(&iter)) {
10535 
10536 		if (!cnt)
10537 			printk(KERN_TRACE "---------------------------------\n");
10538 
10539 		cnt++;
10540 
10541 		trace_iterator_reset(&iter);
10542 		iter.iter_flags |= TRACE_FILE_LAT_FMT;
10543 
10544 		if (trace_find_next_entry_inc(&iter) != NULL) {
10545 			int ret;
10546 
10547 			ret = print_trace_line(&iter);
10548 			if (ret != TRACE_TYPE_NO_CONSUME)
10549 				trace_consume(&iter);
10550 		}
10551 		touch_nmi_watchdog();
10552 
10553 		trace_printk_seq(&iter.seq);
10554 	}
10555 
10556 	if (!cnt)
10557 		printk(KERN_TRACE "   (ftrace buffer empty)\n");
10558 	else
10559 		printk(KERN_TRACE "---------------------------------\n");
10560 
10561 	tr->trace_flags |= old_userobj;
10562 
10563 	for_each_tracing_cpu(cpu) {
10564 		atomic_dec(&per_cpu_ptr(iter.array_buffer->data, cpu)->disabled);
10565 	}
10566 	local_irq_restore(flags);
10567 }
10568 
10569 static void ftrace_dump_by_param(void)
10570 {
10571 	bool first_param = true;
10572 	char dump_param[MAX_TRACER_SIZE];
10573 	char *buf, *token, *inst_name;
10574 	struct trace_array *tr;
10575 
10576 	strscpy(dump_param, ftrace_dump_on_oops, MAX_TRACER_SIZE);
10577 	buf = dump_param;
10578 
10579 	while ((token = strsep(&buf, ",")) != NULL) {
10580 		if (first_param) {
10581 			first_param = false;
10582 			if (!strcmp("0", token))
10583 				continue;
10584 			else if (!strcmp("1", token)) {
10585 				ftrace_dump_one(&global_trace, DUMP_ALL);
10586 				continue;
10587 			}
10588 			else if (!strcmp("2", token) ||
10589 			  !strcmp("orig_cpu", token)) {
10590 				ftrace_dump_one(&global_trace, DUMP_ORIG);
10591 				continue;
10592 			}
10593 		}
10594 
10595 		inst_name = strsep(&token, "=");
10596 		tr = trace_array_find(inst_name);
10597 		if (!tr) {
10598 			printk(KERN_TRACE "Instance %s not found\n", inst_name);
10599 			continue;
10600 		}
10601 
10602 		if (token && (!strcmp("2", token) ||
10603 			  !strcmp("orig_cpu", token)))
10604 			ftrace_dump_one(tr, DUMP_ORIG);
10605 		else
10606 			ftrace_dump_one(tr, DUMP_ALL);
10607 	}
10608 }
10609 
10610 void ftrace_dump(enum ftrace_dump_mode oops_dump_mode)
10611 {
10612 	static atomic_t dump_running;
10613 
10614 	/* Only allow one dump user at a time. */
10615 	if (atomic_inc_return(&dump_running) != 1) {
10616 		atomic_dec(&dump_running);
10617 		return;
10618 	}
10619 
10620 	switch (oops_dump_mode) {
10621 	case DUMP_ALL:
10622 		ftrace_dump_one(&global_trace, DUMP_ALL);
10623 		break;
10624 	case DUMP_ORIG:
10625 		ftrace_dump_one(&global_trace, DUMP_ORIG);
10626 		break;
10627 	case DUMP_PARAM:
10628 		ftrace_dump_by_param();
10629 		break;
10630 	case DUMP_NONE:
10631 		break;
10632 	default:
10633 		printk(KERN_TRACE "Bad dumping mode, switching to all CPUs dump\n");
10634 		ftrace_dump_one(&global_trace, DUMP_ALL);
10635 	}
10636 
10637 	atomic_dec(&dump_running);
10638 }
10639 EXPORT_SYMBOL_GPL(ftrace_dump);
10640 
10641 #define WRITE_BUFSIZE  4096
10642 
10643 ssize_t trace_parse_run_command(struct file *file, const char __user *buffer,
10644 				size_t count, loff_t *ppos,
10645 				int (*createfn)(const char *))
10646 {
10647 	char *kbuf, *buf, *tmp;
10648 	int ret = 0;
10649 	size_t done = 0;
10650 	size_t size;
10651 
10652 	kbuf = kmalloc(WRITE_BUFSIZE, GFP_KERNEL);
10653 	if (!kbuf)
10654 		return -ENOMEM;
10655 
10656 	while (done < count) {
10657 		size = count - done;
10658 
10659 		if (size >= WRITE_BUFSIZE)
10660 			size = WRITE_BUFSIZE - 1;
10661 
10662 		if (copy_from_user(kbuf, buffer + done, size)) {
10663 			ret = -EFAULT;
10664 			goto out;
10665 		}
10666 		kbuf[size] = '\0';
10667 		buf = kbuf;
10668 		do {
10669 			tmp = strchr(buf, '\n');
10670 			if (tmp) {
10671 				*tmp = '\0';
10672 				size = tmp - buf + 1;
10673 			} else {
10674 				size = strlen(buf);
10675 				if (done + size < count) {
10676 					if (buf != kbuf)
10677 						break;
10678 					/* This can accept WRITE_BUFSIZE - 2 ('\n' + '\0') */
10679 					pr_warn("Line length is too long: Should be less than %d\n",
10680 						WRITE_BUFSIZE - 2);
10681 					ret = -EINVAL;
10682 					goto out;
10683 				}
10684 			}
10685 			done += size;
10686 
10687 			/* Remove comments */
10688 			tmp = strchr(buf, '#');
10689 
10690 			if (tmp)
10691 				*tmp = '\0';
10692 
10693 			ret = createfn(buf);
10694 			if (ret)
10695 				goto out;
10696 			buf += size;
10697 
10698 		} while (done < count);
10699 	}
10700 	ret = done;
10701 
10702 out:
10703 	kfree(kbuf);
10704 
10705 	return ret;
10706 }
10707 
10708 #ifdef CONFIG_TRACER_MAX_TRACE
10709 __init static bool tr_needs_alloc_snapshot(const char *name)
10710 {
10711 	char *test;
10712 	int len = strlen(name);
10713 	bool ret;
10714 
10715 	if (!boot_snapshot_index)
10716 		return false;
10717 
10718 	if (strncmp(name, boot_snapshot_info, len) == 0 &&
10719 	    boot_snapshot_info[len] == '\t')
10720 		return true;
10721 
10722 	test = kmalloc(strlen(name) + 3, GFP_KERNEL);
10723 	if (!test)
10724 		return false;
10725 
10726 	sprintf(test, "\t%s\t", name);
10727 	ret = strstr(boot_snapshot_info, test) == NULL;
10728 	kfree(test);
10729 	return ret;
10730 }
10731 
10732 __init static void do_allocate_snapshot(const char *name)
10733 {
10734 	if (!tr_needs_alloc_snapshot(name))
10735 		return;
10736 
10737 	/*
10738 	 * When allocate_snapshot is set, the next call to
10739 	 * allocate_trace_buffers() (called by trace_array_get_by_name())
10740 	 * will allocate the snapshot buffer. That will alse clear
10741 	 * this flag.
10742 	 */
10743 	allocate_snapshot = true;
10744 }
10745 #else
10746 static inline void do_allocate_snapshot(const char *name) { }
10747 #endif
10748 
10749 __init static void enable_instances(void)
10750 {
10751 	struct trace_array *tr;
10752 	bool memmap_area = false;
10753 	char *curr_str;
10754 	char *name;
10755 	char *str;
10756 	char *tok;
10757 
10758 	/* A tab is always appended */
10759 	boot_instance_info[boot_instance_index - 1] = '\0';
10760 	str = boot_instance_info;
10761 
10762 	while ((curr_str = strsep(&str, "\t"))) {
10763 		phys_addr_t start = 0;
10764 		phys_addr_t size = 0;
10765 		unsigned long addr = 0;
10766 		bool traceprintk = false;
10767 		bool traceoff = false;
10768 		char *flag_delim;
10769 		char *addr_delim;
10770 		char *rname __free(kfree) = NULL;
10771 
10772 		tok = strsep(&curr_str, ",");
10773 
10774 		flag_delim = strchr(tok, '^');
10775 		addr_delim = strchr(tok, '@');
10776 
10777 		if (addr_delim)
10778 			*addr_delim++ = '\0';
10779 
10780 		if (flag_delim)
10781 			*flag_delim++ = '\0';
10782 
10783 		name = tok;
10784 
10785 		if (flag_delim) {
10786 			char *flag;
10787 
10788 			while ((flag = strsep(&flag_delim, "^"))) {
10789 				if (strcmp(flag, "traceoff") == 0) {
10790 					traceoff = true;
10791 				} else if ((strcmp(flag, "printk") == 0) ||
10792 					   (strcmp(flag, "traceprintk") == 0) ||
10793 					   (strcmp(flag, "trace_printk") == 0)) {
10794 					traceprintk = true;
10795 				} else {
10796 					pr_info("Tracing: Invalid instance flag '%s' for %s\n",
10797 						flag, name);
10798 				}
10799 			}
10800 		}
10801 
10802 		tok = addr_delim;
10803 		if (tok && isdigit(*tok)) {
10804 			start = memparse(tok, &tok);
10805 			if (!start) {
10806 				pr_warn("Tracing: Invalid boot instance address for %s\n",
10807 					name);
10808 				continue;
10809 			}
10810 			if (*tok != ':') {
10811 				pr_warn("Tracing: No size specified for instance %s\n", name);
10812 				continue;
10813 			}
10814 			tok++;
10815 			size = memparse(tok, &tok);
10816 			if (!size) {
10817 				pr_warn("Tracing: Invalid boot instance size for %s\n",
10818 					name);
10819 				continue;
10820 			}
10821 			memmap_area = true;
10822 		} else if (tok) {
10823 			if (!reserve_mem_find_by_name(tok, &start, &size)) {
10824 				start = 0;
10825 				pr_warn("Failed to map boot instance %s to %s\n", name, tok);
10826 				continue;
10827 			}
10828 			rname = kstrdup(tok, GFP_KERNEL);
10829 		}
10830 
10831 		if (start) {
10832 			/* Start and size must be page aligned */
10833 			if (start & ~PAGE_MASK) {
10834 				pr_warn("Tracing: mapping start addr %pa is not page aligned\n", &start);
10835 				continue;
10836 			}
10837 			if (size & ~PAGE_MASK) {
10838 				pr_warn("Tracing: mapping size %pa is not page aligned\n", &size);
10839 				continue;
10840 			}
10841 
10842 			if (memmap_area)
10843 				addr = map_pages(start, size);
10844 			else
10845 				addr = (unsigned long)phys_to_virt(start);
10846 			if (addr) {
10847 				pr_info("Tracing: mapped boot instance %s at physical memory %pa of size 0x%lx\n",
10848 					name, &start, (unsigned long)size);
10849 			} else {
10850 				pr_warn("Tracing: Failed to map boot instance %s\n", name);
10851 				continue;
10852 			}
10853 		} else {
10854 			/* Only non mapped buffers have snapshot buffers */
10855 			if (IS_ENABLED(CONFIG_TRACER_MAX_TRACE))
10856 				do_allocate_snapshot(name);
10857 		}
10858 
10859 		tr = trace_array_create_systems(name, NULL, addr, size);
10860 		if (IS_ERR(tr)) {
10861 			pr_warn("Tracing: Failed to create instance buffer %s\n", curr_str);
10862 			continue;
10863 		}
10864 
10865 		if (traceoff)
10866 			tracer_tracing_off(tr);
10867 
10868 		if (traceprintk)
10869 			update_printk_trace(tr);
10870 
10871 		/*
10872 		 * memmap'd buffers can not be freed.
10873 		 */
10874 		if (memmap_area) {
10875 			tr->flags |= TRACE_ARRAY_FL_MEMMAP;
10876 			tr->ref++;
10877 		}
10878 
10879 		if (start) {
10880 			tr->flags |= TRACE_ARRAY_FL_BOOT | TRACE_ARRAY_FL_LAST_BOOT;
10881 			tr->range_name = no_free_ptr(rname);
10882 		}
10883 
10884 		while ((tok = strsep(&curr_str, ","))) {
10885 			early_enable_events(tr, tok, true);
10886 		}
10887 	}
10888 }
10889 
10890 __init static int tracer_alloc_buffers(void)
10891 {
10892 	int ring_buf_size;
10893 	int ret = -ENOMEM;
10894 
10895 
10896 	if (security_locked_down(LOCKDOWN_TRACEFS)) {
10897 		pr_warn("Tracing disabled due to lockdown\n");
10898 		return -EPERM;
10899 	}
10900 
10901 	/*
10902 	 * Make sure we don't accidentally add more trace options
10903 	 * than we have bits for.
10904 	 */
10905 	BUILD_BUG_ON(TRACE_ITER_LAST_BIT > TRACE_FLAGS_MAX_SIZE);
10906 
10907 	if (!alloc_cpumask_var(&tracing_buffer_mask, GFP_KERNEL))
10908 		goto out;
10909 
10910 	if (!alloc_cpumask_var(&global_trace.tracing_cpumask, GFP_KERNEL))
10911 		goto out_free_buffer_mask;
10912 
10913 	/* Only allocate trace_printk buffers if a trace_printk exists */
10914 	if (&__stop___trace_bprintk_fmt != &__start___trace_bprintk_fmt)
10915 		/* Must be called before global_trace.buffer is allocated */
10916 		trace_printk_init_buffers();
10917 
10918 	/* To save memory, keep the ring buffer size to its minimum */
10919 	if (global_trace.ring_buffer_expanded)
10920 		ring_buf_size = trace_buf_size;
10921 	else
10922 		ring_buf_size = 1;
10923 
10924 	cpumask_copy(tracing_buffer_mask, cpu_possible_mask);
10925 	cpumask_copy(global_trace.tracing_cpumask, cpu_all_mask);
10926 
10927 	raw_spin_lock_init(&global_trace.start_lock);
10928 
10929 	/*
10930 	 * The prepare callbacks allocates some memory for the ring buffer. We
10931 	 * don't free the buffer if the CPU goes down. If we were to free
10932 	 * the buffer, then the user would lose any trace that was in the
10933 	 * buffer. The memory will be removed once the "instance" is removed.
10934 	 */
10935 	ret = cpuhp_setup_state_multi(CPUHP_TRACE_RB_PREPARE,
10936 				      "trace/RB:prepare", trace_rb_cpu_prepare,
10937 				      NULL);
10938 	if (ret < 0)
10939 		goto out_free_cpumask;
10940 	/* Used for event triggers */
10941 	ret = -ENOMEM;
10942 	temp_buffer = ring_buffer_alloc(PAGE_SIZE, RB_FL_OVERWRITE);
10943 	if (!temp_buffer)
10944 		goto out_rm_hp_state;
10945 
10946 	if (trace_create_savedcmd() < 0)
10947 		goto out_free_temp_buffer;
10948 
10949 	if (!zalloc_cpumask_var(&global_trace.pipe_cpumask, GFP_KERNEL))
10950 		goto out_free_savedcmd;
10951 
10952 	/* TODO: make the number of buffers hot pluggable with CPUS */
10953 	if (allocate_trace_buffers(&global_trace, ring_buf_size) < 0) {
10954 		MEM_FAIL(1, "tracer: failed to allocate ring buffer!\n");
10955 		goto out_free_pipe_cpumask;
10956 	}
10957 	if (global_trace.buffer_disabled)
10958 		tracing_off();
10959 
10960 	if (trace_boot_clock) {
10961 		ret = tracing_set_clock(&global_trace, trace_boot_clock);
10962 		if (ret < 0)
10963 			pr_warn("Trace clock %s not defined, going back to default\n",
10964 				trace_boot_clock);
10965 	}
10966 
10967 	/*
10968 	 * register_tracer() might reference current_trace, so it
10969 	 * needs to be set before we register anything. This is
10970 	 * just a bootstrap of current_trace anyway.
10971 	 */
10972 	global_trace.current_trace = &nop_trace;
10973 
10974 	global_trace.max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
10975 #ifdef CONFIG_TRACER_MAX_TRACE
10976 	spin_lock_init(&global_trace.snapshot_trigger_lock);
10977 #endif
10978 	ftrace_init_global_array_ops(&global_trace);
10979 
10980 #ifdef CONFIG_MODULES
10981 	INIT_LIST_HEAD(&global_trace.mod_events);
10982 #endif
10983 
10984 	init_trace_flags_index(&global_trace);
10985 
10986 	register_tracer(&nop_trace);
10987 
10988 	/* Function tracing may start here (via kernel command line) */
10989 	init_function_trace();
10990 
10991 	/* All seems OK, enable tracing */
10992 	tracing_disabled = 0;
10993 
10994 	atomic_notifier_chain_register(&panic_notifier_list,
10995 				       &trace_panic_notifier);
10996 
10997 	register_die_notifier(&trace_die_notifier);
10998 
10999 	global_trace.flags = TRACE_ARRAY_FL_GLOBAL;
11000 
11001 	INIT_LIST_HEAD(&global_trace.systems);
11002 	INIT_LIST_HEAD(&global_trace.events);
11003 	INIT_LIST_HEAD(&global_trace.hist_vars);
11004 	INIT_LIST_HEAD(&global_trace.err_log);
11005 	list_add(&global_trace.list, &ftrace_trace_arrays);
11006 
11007 	apply_trace_boot_options();
11008 
11009 	register_snapshot_cmd();
11010 
11011 	return 0;
11012 
11013 out_free_pipe_cpumask:
11014 	free_cpumask_var(global_trace.pipe_cpumask);
11015 out_free_savedcmd:
11016 	trace_free_saved_cmdlines_buffer();
11017 out_free_temp_buffer:
11018 	ring_buffer_free(temp_buffer);
11019 out_rm_hp_state:
11020 	cpuhp_remove_multi_state(CPUHP_TRACE_RB_PREPARE);
11021 out_free_cpumask:
11022 	free_cpumask_var(global_trace.tracing_cpumask);
11023 out_free_buffer_mask:
11024 	free_cpumask_var(tracing_buffer_mask);
11025 out:
11026 	return ret;
11027 }
11028 
11029 #ifdef CONFIG_FUNCTION_TRACER
11030 /* Used to set module cached ftrace filtering at boot up */
11031 __init struct trace_array *trace_get_global_array(void)
11032 {
11033 	return &global_trace;
11034 }
11035 #endif
11036 
11037 void __init ftrace_boot_snapshot(void)
11038 {
11039 #ifdef CONFIG_TRACER_MAX_TRACE
11040 	struct trace_array *tr;
11041 
11042 	if (!snapshot_at_boot)
11043 		return;
11044 
11045 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
11046 		if (!tr->allocated_snapshot)
11047 			continue;
11048 
11049 		tracing_snapshot_instance(tr);
11050 		trace_array_puts(tr, "** Boot snapshot taken **\n");
11051 	}
11052 #endif
11053 }
11054 
11055 void __init early_trace_init(void)
11056 {
11057 	if (tracepoint_printk) {
11058 		tracepoint_print_iter =
11059 			kzalloc(sizeof(*tracepoint_print_iter), GFP_KERNEL);
11060 		if (MEM_FAIL(!tracepoint_print_iter,
11061 			     "Failed to allocate trace iterator\n"))
11062 			tracepoint_printk = 0;
11063 		else
11064 			static_key_enable(&tracepoint_printk_key.key);
11065 	}
11066 	tracer_alloc_buffers();
11067 
11068 	init_events();
11069 }
11070 
11071 void __init trace_init(void)
11072 {
11073 	trace_event_init();
11074 
11075 	if (boot_instance_index)
11076 		enable_instances();
11077 }
11078 
11079 __init static void clear_boot_tracer(void)
11080 {
11081 	/*
11082 	 * The default tracer at boot buffer is an init section.
11083 	 * This function is called in lateinit. If we did not
11084 	 * find the boot tracer, then clear it out, to prevent
11085 	 * later registration from accessing the buffer that is
11086 	 * about to be freed.
11087 	 */
11088 	if (!default_bootup_tracer)
11089 		return;
11090 
11091 	printk(KERN_INFO "ftrace bootup tracer '%s' not registered.\n",
11092 	       default_bootup_tracer);
11093 	default_bootup_tracer = NULL;
11094 }
11095 
11096 #ifdef CONFIG_HAVE_UNSTABLE_SCHED_CLOCK
11097 __init static void tracing_set_default_clock(void)
11098 {
11099 	/* sched_clock_stable() is determined in late_initcall */
11100 	if (!trace_boot_clock && !sched_clock_stable()) {
11101 		if (security_locked_down(LOCKDOWN_TRACEFS)) {
11102 			pr_warn("Can not set tracing clock due to lockdown\n");
11103 			return;
11104 		}
11105 
11106 		printk(KERN_WARNING
11107 		       "Unstable clock detected, switching default tracing clock to \"global\"\n"
11108 		       "If you want to keep using the local clock, then add:\n"
11109 		       "  \"trace_clock=local\"\n"
11110 		       "on the kernel command line\n");
11111 		tracing_set_clock(&global_trace, "global");
11112 	}
11113 }
11114 #else
11115 static inline void tracing_set_default_clock(void) { }
11116 #endif
11117 
11118 __init static int late_trace_init(void)
11119 {
11120 	if (tracepoint_printk && tracepoint_printk_stop_on_boot) {
11121 		static_key_disable(&tracepoint_printk_key.key);
11122 		tracepoint_printk = 0;
11123 	}
11124 
11125 	if (traceoff_after_boot)
11126 		tracing_off();
11127 
11128 	tracing_set_default_clock();
11129 	clear_boot_tracer();
11130 	return 0;
11131 }
11132 
11133 late_initcall_sync(late_trace_init);
11134