xref: /linux/kernel/trace/trace.c (revision 260f6f4fda93c8485c8037865c941b42b9cba5d2)
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * ring buffer based function tracer
4  *
5  * Copyright (C) 2007-2012 Steven Rostedt <srostedt@redhat.com>
6  * Copyright (C) 2008 Ingo Molnar <mingo@redhat.com>
7  *
8  * Originally taken from the RT patch by:
9  *    Arnaldo Carvalho de Melo <acme@redhat.com>
10  *
11  * Based on code from the latency_tracer, that is:
12  *  Copyright (C) 2004-2006 Ingo Molnar
13  *  Copyright (C) 2004 Nadia Yvette Chambers
14  */
15 #include <linux/ring_buffer.h>
16 #include <linux/utsname.h>
17 #include <linux/stacktrace.h>
18 #include <linux/writeback.h>
19 #include <linux/kallsyms.h>
20 #include <linux/security.h>
21 #include <linux/seq_file.h>
22 #include <linux/irqflags.h>
23 #include <linux/debugfs.h>
24 #include <linux/tracefs.h>
25 #include <linux/pagemap.h>
26 #include <linux/hardirq.h>
27 #include <linux/linkage.h>
28 #include <linux/uaccess.h>
29 #include <linux/cleanup.h>
30 #include <linux/vmalloc.h>
31 #include <linux/ftrace.h>
32 #include <linux/module.h>
33 #include <linux/percpu.h>
34 #include <linux/splice.h>
35 #include <linux/kdebug.h>
36 #include <linux/string.h>
37 #include <linux/mount.h>
38 #include <linux/rwsem.h>
39 #include <linux/slab.h>
40 #include <linux/ctype.h>
41 #include <linux/init.h>
42 #include <linux/panic_notifier.h>
43 #include <linux/poll.h>
44 #include <linux/nmi.h>
45 #include <linux/fs.h>
46 #include <linux/trace.h>
47 #include <linux/sched/clock.h>
48 #include <linux/sched/rt.h>
49 #include <linux/fsnotify.h>
50 #include <linux/irq_work.h>
51 #include <linux/workqueue.h>
52 #include <linux/sort.h>
53 #include <linux/io.h> /* vmap_page_range() */
54 #include <linux/fs_context.h>
55 
56 #include <asm/setup.h> /* COMMAND_LINE_SIZE */
57 
58 #include "trace.h"
59 #include "trace_output.h"
60 
61 #ifdef CONFIG_FTRACE_STARTUP_TEST
62 /*
63  * We need to change this state when a selftest is running.
64  * A selftest will lurk into the ring-buffer to count the
65  * entries inserted during the selftest although some concurrent
66  * insertions into the ring-buffer such as trace_printk could occurred
67  * at the same time, giving false positive or negative results.
68  */
69 static bool __read_mostly tracing_selftest_running;
70 
71 /*
72  * If boot-time tracing including tracers/events via kernel cmdline
73  * is running, we do not want to run SELFTEST.
74  */
75 bool __read_mostly tracing_selftest_disabled;
76 
77 void __init disable_tracing_selftest(const char *reason)
78 {
79 	if (!tracing_selftest_disabled) {
80 		tracing_selftest_disabled = true;
81 		pr_info("Ftrace startup test is disabled due to %s\n", reason);
82 	}
83 }
84 #else
85 #define tracing_selftest_running	0
86 #define tracing_selftest_disabled	0
87 #endif
88 
89 /* Pipe tracepoints to printk */
90 static struct trace_iterator *tracepoint_print_iter;
91 int tracepoint_printk;
92 static bool tracepoint_printk_stop_on_boot __initdata;
93 static bool traceoff_after_boot __initdata;
94 static DEFINE_STATIC_KEY_FALSE(tracepoint_printk_key);
95 
96 /* For tracers that don't implement custom flags */
97 static struct tracer_opt dummy_tracer_opt[] = {
98 	{ }
99 };
100 
101 static int
102 dummy_set_flag(struct trace_array *tr, u32 old_flags, u32 bit, int set)
103 {
104 	return 0;
105 }
106 
107 /*
108  * To prevent the comm cache from being overwritten when no
109  * tracing is active, only save the comm when a trace event
110  * occurred.
111  */
112 DEFINE_PER_CPU(bool, trace_taskinfo_save);
113 
114 /*
115  * Kill all tracing for good (never come back).
116  * It is initialized to 1 but will turn to zero if the initialization
117  * of the tracer is successful. But that is the only place that sets
118  * this back to zero.
119  */
120 static int tracing_disabled = 1;
121 
122 cpumask_var_t __read_mostly	tracing_buffer_mask;
123 
124 #define MAX_TRACER_SIZE		100
125 /*
126  * ftrace_dump_on_oops - variable to dump ftrace buffer on oops
127  *
128  * If there is an oops (or kernel panic) and the ftrace_dump_on_oops
129  * is set, then ftrace_dump is called. This will output the contents
130  * of the ftrace buffers to the console.  This is very useful for
131  * capturing traces that lead to crashes and outputing it to a
132  * serial console.
133  *
134  * It is default off, but you can enable it with either specifying
135  * "ftrace_dump_on_oops" in the kernel command line, or setting
136  * /proc/sys/kernel/ftrace_dump_on_oops
137  * Set 1 if you want to dump buffers of all CPUs
138  * Set 2 if you want to dump the buffer of the CPU that triggered oops
139  * Set instance name if you want to dump the specific trace instance
140  * Multiple instance dump is also supported, and instances are seperated
141  * by commas.
142  */
143 /* Set to string format zero to disable by default */
144 char ftrace_dump_on_oops[MAX_TRACER_SIZE] = "0";
145 
146 /* When set, tracing will stop when a WARN*() is hit */
147 static int __disable_trace_on_warning;
148 
149 int tracepoint_printk_sysctl(const struct ctl_table *table, int write,
150 			     void *buffer, size_t *lenp, loff_t *ppos);
151 static const struct ctl_table trace_sysctl_table[] = {
152 	{
153 		.procname	= "ftrace_dump_on_oops",
154 		.data		= &ftrace_dump_on_oops,
155 		.maxlen		= MAX_TRACER_SIZE,
156 		.mode		= 0644,
157 		.proc_handler	= proc_dostring,
158 	},
159 	{
160 		.procname	= "traceoff_on_warning",
161 		.data		= &__disable_trace_on_warning,
162 		.maxlen		= sizeof(__disable_trace_on_warning),
163 		.mode		= 0644,
164 		.proc_handler	= proc_dointvec,
165 	},
166 	{
167 		.procname	= "tracepoint_printk",
168 		.data		= &tracepoint_printk,
169 		.maxlen		= sizeof(tracepoint_printk),
170 		.mode		= 0644,
171 		.proc_handler	= tracepoint_printk_sysctl,
172 	},
173 };
174 
175 static int __init init_trace_sysctls(void)
176 {
177 	register_sysctl_init("kernel", trace_sysctl_table);
178 	return 0;
179 }
180 subsys_initcall(init_trace_sysctls);
181 
182 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
183 /* Map of enums to their values, for "eval_map" file */
184 struct trace_eval_map_head {
185 	struct module			*mod;
186 	unsigned long			length;
187 };
188 
189 union trace_eval_map_item;
190 
191 struct trace_eval_map_tail {
192 	/*
193 	 * "end" is first and points to NULL as it must be different
194 	 * than "mod" or "eval_string"
195 	 */
196 	union trace_eval_map_item	*next;
197 	const char			*end;	/* points to NULL */
198 };
199 
200 static DEFINE_MUTEX(trace_eval_mutex);
201 
202 /*
203  * The trace_eval_maps are saved in an array with two extra elements,
204  * one at the beginning, and one at the end. The beginning item contains
205  * the count of the saved maps (head.length), and the module they
206  * belong to if not built in (head.mod). The ending item contains a
207  * pointer to the next array of saved eval_map items.
208  */
209 union trace_eval_map_item {
210 	struct trace_eval_map		map;
211 	struct trace_eval_map_head	head;
212 	struct trace_eval_map_tail	tail;
213 };
214 
215 static union trace_eval_map_item *trace_eval_maps;
216 #endif /* CONFIG_TRACE_EVAL_MAP_FILE */
217 
218 int tracing_set_tracer(struct trace_array *tr, const char *buf);
219 static void ftrace_trace_userstack(struct trace_array *tr,
220 				   struct trace_buffer *buffer,
221 				   unsigned int trace_ctx);
222 
223 static char bootup_tracer_buf[MAX_TRACER_SIZE] __initdata;
224 static char *default_bootup_tracer;
225 
226 static bool allocate_snapshot;
227 static bool snapshot_at_boot;
228 
229 static char boot_instance_info[COMMAND_LINE_SIZE] __initdata;
230 static int boot_instance_index;
231 
232 static char boot_snapshot_info[COMMAND_LINE_SIZE] __initdata;
233 static int boot_snapshot_index;
234 
235 static int __init set_cmdline_ftrace(char *str)
236 {
237 	strscpy(bootup_tracer_buf, str, MAX_TRACER_SIZE);
238 	default_bootup_tracer = bootup_tracer_buf;
239 	/* We are using ftrace early, expand it */
240 	trace_set_ring_buffer_expanded(NULL);
241 	return 1;
242 }
243 __setup("ftrace=", set_cmdline_ftrace);
244 
245 int ftrace_dump_on_oops_enabled(void)
246 {
247 	if (!strcmp("0", ftrace_dump_on_oops))
248 		return 0;
249 	else
250 		return 1;
251 }
252 
253 static int __init set_ftrace_dump_on_oops(char *str)
254 {
255 	if (!*str) {
256 		strscpy(ftrace_dump_on_oops, "1", MAX_TRACER_SIZE);
257 		return 1;
258 	}
259 
260 	if (*str == ',') {
261 		strscpy(ftrace_dump_on_oops, "1", MAX_TRACER_SIZE);
262 		strscpy(ftrace_dump_on_oops + 1, str, MAX_TRACER_SIZE - 1);
263 		return 1;
264 	}
265 
266 	if (*str++ == '=') {
267 		strscpy(ftrace_dump_on_oops, str, MAX_TRACER_SIZE);
268 		return 1;
269 	}
270 
271 	return 0;
272 }
273 __setup("ftrace_dump_on_oops", set_ftrace_dump_on_oops);
274 
275 static int __init stop_trace_on_warning(char *str)
276 {
277 	if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
278 		__disable_trace_on_warning = 1;
279 	return 1;
280 }
281 __setup("traceoff_on_warning", stop_trace_on_warning);
282 
283 static int __init boot_alloc_snapshot(char *str)
284 {
285 	char *slot = boot_snapshot_info + boot_snapshot_index;
286 	int left = sizeof(boot_snapshot_info) - boot_snapshot_index;
287 	int ret;
288 
289 	if (str[0] == '=') {
290 		str++;
291 		if (strlen(str) >= left)
292 			return -1;
293 
294 		ret = snprintf(slot, left, "%s\t", str);
295 		boot_snapshot_index += ret;
296 	} else {
297 		allocate_snapshot = true;
298 		/* We also need the main ring buffer expanded */
299 		trace_set_ring_buffer_expanded(NULL);
300 	}
301 	return 1;
302 }
303 __setup("alloc_snapshot", boot_alloc_snapshot);
304 
305 
306 static int __init boot_snapshot(char *str)
307 {
308 	snapshot_at_boot = true;
309 	boot_alloc_snapshot(str);
310 	return 1;
311 }
312 __setup("ftrace_boot_snapshot", boot_snapshot);
313 
314 
315 static int __init boot_instance(char *str)
316 {
317 	char *slot = boot_instance_info + boot_instance_index;
318 	int left = sizeof(boot_instance_info) - boot_instance_index;
319 	int ret;
320 
321 	if (strlen(str) >= left)
322 		return -1;
323 
324 	ret = snprintf(slot, left, "%s\t", str);
325 	boot_instance_index += ret;
326 
327 	return 1;
328 }
329 __setup("trace_instance=", boot_instance);
330 
331 
332 static char trace_boot_options_buf[MAX_TRACER_SIZE] __initdata;
333 
334 static int __init set_trace_boot_options(char *str)
335 {
336 	strscpy(trace_boot_options_buf, str, MAX_TRACER_SIZE);
337 	return 1;
338 }
339 __setup("trace_options=", set_trace_boot_options);
340 
341 static char trace_boot_clock_buf[MAX_TRACER_SIZE] __initdata;
342 static char *trace_boot_clock __initdata;
343 
344 static int __init set_trace_boot_clock(char *str)
345 {
346 	strscpy(trace_boot_clock_buf, str, MAX_TRACER_SIZE);
347 	trace_boot_clock = trace_boot_clock_buf;
348 	return 1;
349 }
350 __setup("trace_clock=", set_trace_boot_clock);
351 
352 static int __init set_tracepoint_printk(char *str)
353 {
354 	/* Ignore the "tp_printk_stop_on_boot" param */
355 	if (*str == '_')
356 		return 0;
357 
358 	if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
359 		tracepoint_printk = 1;
360 	return 1;
361 }
362 __setup("tp_printk", set_tracepoint_printk);
363 
364 static int __init set_tracepoint_printk_stop(char *str)
365 {
366 	tracepoint_printk_stop_on_boot = true;
367 	return 1;
368 }
369 __setup("tp_printk_stop_on_boot", set_tracepoint_printk_stop);
370 
371 static int __init set_traceoff_after_boot(char *str)
372 {
373 	traceoff_after_boot = true;
374 	return 1;
375 }
376 __setup("traceoff_after_boot", set_traceoff_after_boot);
377 
378 unsigned long long ns2usecs(u64 nsec)
379 {
380 	nsec += 500;
381 	do_div(nsec, 1000);
382 	return nsec;
383 }
384 
385 static void
386 trace_process_export(struct trace_export *export,
387 	       struct ring_buffer_event *event, int flag)
388 {
389 	struct trace_entry *entry;
390 	unsigned int size = 0;
391 
392 	if (export->flags & flag) {
393 		entry = ring_buffer_event_data(event);
394 		size = ring_buffer_event_length(event);
395 		export->write(export, entry, size);
396 	}
397 }
398 
399 static DEFINE_MUTEX(ftrace_export_lock);
400 
401 static struct trace_export __rcu *ftrace_exports_list __read_mostly;
402 
403 static DEFINE_STATIC_KEY_FALSE(trace_function_exports_enabled);
404 static DEFINE_STATIC_KEY_FALSE(trace_event_exports_enabled);
405 static DEFINE_STATIC_KEY_FALSE(trace_marker_exports_enabled);
406 
407 static inline void ftrace_exports_enable(struct trace_export *export)
408 {
409 	if (export->flags & TRACE_EXPORT_FUNCTION)
410 		static_branch_inc(&trace_function_exports_enabled);
411 
412 	if (export->flags & TRACE_EXPORT_EVENT)
413 		static_branch_inc(&trace_event_exports_enabled);
414 
415 	if (export->flags & TRACE_EXPORT_MARKER)
416 		static_branch_inc(&trace_marker_exports_enabled);
417 }
418 
419 static inline void ftrace_exports_disable(struct trace_export *export)
420 {
421 	if (export->flags & TRACE_EXPORT_FUNCTION)
422 		static_branch_dec(&trace_function_exports_enabled);
423 
424 	if (export->flags & TRACE_EXPORT_EVENT)
425 		static_branch_dec(&trace_event_exports_enabled);
426 
427 	if (export->flags & TRACE_EXPORT_MARKER)
428 		static_branch_dec(&trace_marker_exports_enabled);
429 }
430 
431 static void ftrace_exports(struct ring_buffer_event *event, int flag)
432 {
433 	struct trace_export *export;
434 
435 	preempt_disable_notrace();
436 
437 	export = rcu_dereference_raw_check(ftrace_exports_list);
438 	while (export) {
439 		trace_process_export(export, event, flag);
440 		export = rcu_dereference_raw_check(export->next);
441 	}
442 
443 	preempt_enable_notrace();
444 }
445 
446 static inline void
447 add_trace_export(struct trace_export **list, struct trace_export *export)
448 {
449 	rcu_assign_pointer(export->next, *list);
450 	/*
451 	 * We are entering export into the list but another
452 	 * CPU might be walking that list. We need to make sure
453 	 * the export->next pointer is valid before another CPU sees
454 	 * the export pointer included into the list.
455 	 */
456 	rcu_assign_pointer(*list, export);
457 }
458 
459 static inline int
460 rm_trace_export(struct trace_export **list, struct trace_export *export)
461 {
462 	struct trace_export **p;
463 
464 	for (p = list; *p != NULL; p = &(*p)->next)
465 		if (*p == export)
466 			break;
467 
468 	if (*p != export)
469 		return -1;
470 
471 	rcu_assign_pointer(*p, (*p)->next);
472 
473 	return 0;
474 }
475 
476 static inline void
477 add_ftrace_export(struct trace_export **list, struct trace_export *export)
478 {
479 	ftrace_exports_enable(export);
480 
481 	add_trace_export(list, export);
482 }
483 
484 static inline int
485 rm_ftrace_export(struct trace_export **list, struct trace_export *export)
486 {
487 	int ret;
488 
489 	ret = rm_trace_export(list, export);
490 	ftrace_exports_disable(export);
491 
492 	return ret;
493 }
494 
495 int register_ftrace_export(struct trace_export *export)
496 {
497 	if (WARN_ON_ONCE(!export->write))
498 		return -1;
499 
500 	mutex_lock(&ftrace_export_lock);
501 
502 	add_ftrace_export(&ftrace_exports_list, export);
503 
504 	mutex_unlock(&ftrace_export_lock);
505 
506 	return 0;
507 }
508 EXPORT_SYMBOL_GPL(register_ftrace_export);
509 
510 int unregister_ftrace_export(struct trace_export *export)
511 {
512 	int ret;
513 
514 	mutex_lock(&ftrace_export_lock);
515 
516 	ret = rm_ftrace_export(&ftrace_exports_list, export);
517 
518 	mutex_unlock(&ftrace_export_lock);
519 
520 	return ret;
521 }
522 EXPORT_SYMBOL_GPL(unregister_ftrace_export);
523 
524 /* trace_flags holds trace_options default values */
525 #define TRACE_DEFAULT_FLAGS						\
526 	(FUNCTION_DEFAULT_FLAGS |					\
527 	 TRACE_ITER_PRINT_PARENT | TRACE_ITER_PRINTK |			\
528 	 TRACE_ITER_ANNOTATE | TRACE_ITER_CONTEXT_INFO |		\
529 	 TRACE_ITER_RECORD_CMD | TRACE_ITER_OVERWRITE |			\
530 	 TRACE_ITER_IRQ_INFO | TRACE_ITER_MARKERS |			\
531 	 TRACE_ITER_HASH_PTR | TRACE_ITER_TRACE_PRINTK |		\
532 	 TRACE_ITER_COPY_MARKER)
533 
534 /* trace_options that are only supported by global_trace */
535 #define TOP_LEVEL_TRACE_FLAGS (TRACE_ITER_PRINTK |			\
536 	       TRACE_ITER_PRINTK_MSGONLY | TRACE_ITER_RECORD_CMD)
537 
538 /* trace_flags that are default zero for instances */
539 #define ZEROED_TRACE_FLAGS \
540 	(TRACE_ITER_EVENT_FORK | TRACE_ITER_FUNC_FORK | TRACE_ITER_TRACE_PRINTK | \
541 	 TRACE_ITER_COPY_MARKER)
542 
543 /*
544  * The global_trace is the descriptor that holds the top-level tracing
545  * buffers for the live tracing.
546  */
547 static struct trace_array global_trace = {
548 	.trace_flags = TRACE_DEFAULT_FLAGS,
549 };
550 
551 static struct trace_array *printk_trace = &global_trace;
552 
553 /* List of trace_arrays interested in the top level trace_marker */
554 static LIST_HEAD(marker_copies);
555 
556 static __always_inline bool printk_binsafe(struct trace_array *tr)
557 {
558 	/*
559 	 * The binary format of traceprintk can cause a crash if used
560 	 * by a buffer from another boot. Force the use of the
561 	 * non binary version of trace_printk if the trace_printk
562 	 * buffer is a boot mapped ring buffer.
563 	 */
564 	return !(tr->flags & TRACE_ARRAY_FL_BOOT);
565 }
566 
567 static void update_printk_trace(struct trace_array *tr)
568 {
569 	if (printk_trace == tr)
570 		return;
571 
572 	printk_trace->trace_flags &= ~TRACE_ITER_TRACE_PRINTK;
573 	printk_trace = tr;
574 	tr->trace_flags |= TRACE_ITER_TRACE_PRINTK;
575 }
576 
577 /* Returns true if the status of tr changed */
578 static bool update_marker_trace(struct trace_array *tr, int enabled)
579 {
580 	lockdep_assert_held(&event_mutex);
581 
582 	if (enabled) {
583 		if (!list_empty(&tr->marker_list))
584 			return false;
585 
586 		list_add_rcu(&tr->marker_list, &marker_copies);
587 		tr->trace_flags |= TRACE_ITER_COPY_MARKER;
588 		return true;
589 	}
590 
591 	if (list_empty(&tr->marker_list))
592 		return false;
593 
594 	list_del_init(&tr->marker_list);
595 	tr->trace_flags &= ~TRACE_ITER_COPY_MARKER;
596 	return true;
597 }
598 
599 void trace_set_ring_buffer_expanded(struct trace_array *tr)
600 {
601 	if (!tr)
602 		tr = &global_trace;
603 	tr->ring_buffer_expanded = true;
604 }
605 
606 LIST_HEAD(ftrace_trace_arrays);
607 
608 int trace_array_get(struct trace_array *this_tr)
609 {
610 	struct trace_array *tr;
611 
612 	guard(mutex)(&trace_types_lock);
613 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
614 		if (tr == this_tr) {
615 			tr->ref++;
616 			return 0;
617 		}
618 	}
619 
620 	return -ENODEV;
621 }
622 
623 static void __trace_array_put(struct trace_array *this_tr)
624 {
625 	WARN_ON(!this_tr->ref);
626 	this_tr->ref--;
627 }
628 
629 /**
630  * trace_array_put - Decrement the reference counter for this trace array.
631  * @this_tr : pointer to the trace array
632  *
633  * NOTE: Use this when we no longer need the trace array returned by
634  * trace_array_get_by_name(). This ensures the trace array can be later
635  * destroyed.
636  *
637  */
638 void trace_array_put(struct trace_array *this_tr)
639 {
640 	if (!this_tr)
641 		return;
642 
643 	mutex_lock(&trace_types_lock);
644 	__trace_array_put(this_tr);
645 	mutex_unlock(&trace_types_lock);
646 }
647 EXPORT_SYMBOL_GPL(trace_array_put);
648 
649 int tracing_check_open_get_tr(struct trace_array *tr)
650 {
651 	int ret;
652 
653 	ret = security_locked_down(LOCKDOWN_TRACEFS);
654 	if (ret)
655 		return ret;
656 
657 	if (tracing_disabled)
658 		return -ENODEV;
659 
660 	if (tr && trace_array_get(tr) < 0)
661 		return -ENODEV;
662 
663 	return 0;
664 }
665 
666 /**
667  * trace_find_filtered_pid - check if a pid exists in a filtered_pid list
668  * @filtered_pids: The list of pids to check
669  * @search_pid: The PID to find in @filtered_pids
670  *
671  * Returns true if @search_pid is found in @filtered_pids, and false otherwise.
672  */
673 bool
674 trace_find_filtered_pid(struct trace_pid_list *filtered_pids, pid_t search_pid)
675 {
676 	return trace_pid_list_is_set(filtered_pids, search_pid);
677 }
678 
679 /**
680  * trace_ignore_this_task - should a task be ignored for tracing
681  * @filtered_pids: The list of pids to check
682  * @filtered_no_pids: The list of pids not to be traced
683  * @task: The task that should be ignored if not filtered
684  *
685  * Checks if @task should be traced or not from @filtered_pids.
686  * Returns true if @task should *NOT* be traced.
687  * Returns false if @task should be traced.
688  */
689 bool
690 trace_ignore_this_task(struct trace_pid_list *filtered_pids,
691 		       struct trace_pid_list *filtered_no_pids,
692 		       struct task_struct *task)
693 {
694 	/*
695 	 * If filtered_no_pids is not empty, and the task's pid is listed
696 	 * in filtered_no_pids, then return true.
697 	 * Otherwise, if filtered_pids is empty, that means we can
698 	 * trace all tasks. If it has content, then only trace pids
699 	 * within filtered_pids.
700 	 */
701 
702 	return (filtered_pids &&
703 		!trace_find_filtered_pid(filtered_pids, task->pid)) ||
704 		(filtered_no_pids &&
705 		 trace_find_filtered_pid(filtered_no_pids, task->pid));
706 }
707 
708 /**
709  * trace_filter_add_remove_task - Add or remove a task from a pid_list
710  * @pid_list: The list to modify
711  * @self: The current task for fork or NULL for exit
712  * @task: The task to add or remove
713  *
714  * If adding a task, if @self is defined, the task is only added if @self
715  * is also included in @pid_list. This happens on fork and tasks should
716  * only be added when the parent is listed. If @self is NULL, then the
717  * @task pid will be removed from the list, which would happen on exit
718  * of a task.
719  */
720 void trace_filter_add_remove_task(struct trace_pid_list *pid_list,
721 				  struct task_struct *self,
722 				  struct task_struct *task)
723 {
724 	if (!pid_list)
725 		return;
726 
727 	/* For forks, we only add if the forking task is listed */
728 	if (self) {
729 		if (!trace_find_filtered_pid(pid_list, self->pid))
730 			return;
731 	}
732 
733 	/* "self" is set for forks, and NULL for exits */
734 	if (self)
735 		trace_pid_list_set(pid_list, task->pid);
736 	else
737 		trace_pid_list_clear(pid_list, task->pid);
738 }
739 
740 /**
741  * trace_pid_next - Used for seq_file to get to the next pid of a pid_list
742  * @pid_list: The pid list to show
743  * @v: The last pid that was shown (+1 the actual pid to let zero be displayed)
744  * @pos: The position of the file
745  *
746  * This is used by the seq_file "next" operation to iterate the pids
747  * listed in a trace_pid_list structure.
748  *
749  * Returns the pid+1 as we want to display pid of zero, but NULL would
750  * stop the iteration.
751  */
752 void *trace_pid_next(struct trace_pid_list *pid_list, void *v, loff_t *pos)
753 {
754 	long pid = (unsigned long)v;
755 	unsigned int next;
756 
757 	(*pos)++;
758 
759 	/* pid already is +1 of the actual previous bit */
760 	if (trace_pid_list_next(pid_list, pid, &next) < 0)
761 		return NULL;
762 
763 	pid = next;
764 
765 	/* Return pid + 1 to allow zero to be represented */
766 	return (void *)(pid + 1);
767 }
768 
769 /**
770  * trace_pid_start - Used for seq_file to start reading pid lists
771  * @pid_list: The pid list to show
772  * @pos: The position of the file
773  *
774  * This is used by seq_file "start" operation to start the iteration
775  * of listing pids.
776  *
777  * Returns the pid+1 as we want to display pid of zero, but NULL would
778  * stop the iteration.
779  */
780 void *trace_pid_start(struct trace_pid_list *pid_list, loff_t *pos)
781 {
782 	unsigned long pid;
783 	unsigned int first;
784 	loff_t l = 0;
785 
786 	if (trace_pid_list_first(pid_list, &first) < 0)
787 		return NULL;
788 
789 	pid = first;
790 
791 	/* Return pid + 1 so that zero can be the exit value */
792 	for (pid++; pid && l < *pos;
793 	     pid = (unsigned long)trace_pid_next(pid_list, (void *)pid, &l))
794 		;
795 	return (void *)pid;
796 }
797 
798 /**
799  * trace_pid_show - show the current pid in seq_file processing
800  * @m: The seq_file structure to write into
801  * @v: A void pointer of the pid (+1) value to display
802  *
803  * Can be directly used by seq_file operations to display the current
804  * pid value.
805  */
806 int trace_pid_show(struct seq_file *m, void *v)
807 {
808 	unsigned long pid = (unsigned long)v - 1;
809 
810 	seq_printf(m, "%lu\n", pid);
811 	return 0;
812 }
813 
814 /* 128 should be much more than enough */
815 #define PID_BUF_SIZE		127
816 
817 int trace_pid_write(struct trace_pid_list *filtered_pids,
818 		    struct trace_pid_list **new_pid_list,
819 		    const char __user *ubuf, size_t cnt)
820 {
821 	struct trace_pid_list *pid_list;
822 	struct trace_parser parser;
823 	unsigned long val;
824 	int nr_pids = 0;
825 	ssize_t read = 0;
826 	ssize_t ret;
827 	loff_t pos;
828 	pid_t pid;
829 
830 	if (trace_parser_get_init(&parser, PID_BUF_SIZE + 1))
831 		return -ENOMEM;
832 
833 	/*
834 	 * Always recreate a new array. The write is an all or nothing
835 	 * operation. Always create a new array when adding new pids by
836 	 * the user. If the operation fails, then the current list is
837 	 * not modified.
838 	 */
839 	pid_list = trace_pid_list_alloc();
840 	if (!pid_list) {
841 		trace_parser_put(&parser);
842 		return -ENOMEM;
843 	}
844 
845 	if (filtered_pids) {
846 		/* copy the current bits to the new max */
847 		ret = trace_pid_list_first(filtered_pids, &pid);
848 		while (!ret) {
849 			trace_pid_list_set(pid_list, pid);
850 			ret = trace_pid_list_next(filtered_pids, pid + 1, &pid);
851 			nr_pids++;
852 		}
853 	}
854 
855 	ret = 0;
856 	while (cnt > 0) {
857 
858 		pos = 0;
859 
860 		ret = trace_get_user(&parser, ubuf, cnt, &pos);
861 		if (ret < 0)
862 			break;
863 
864 		read += ret;
865 		ubuf += ret;
866 		cnt -= ret;
867 
868 		if (!trace_parser_loaded(&parser))
869 			break;
870 
871 		ret = -EINVAL;
872 		if (kstrtoul(parser.buffer, 0, &val))
873 			break;
874 
875 		pid = (pid_t)val;
876 
877 		if (trace_pid_list_set(pid_list, pid) < 0) {
878 			ret = -1;
879 			break;
880 		}
881 		nr_pids++;
882 
883 		trace_parser_clear(&parser);
884 		ret = 0;
885 	}
886 	trace_parser_put(&parser);
887 
888 	if (ret < 0) {
889 		trace_pid_list_free(pid_list);
890 		return ret;
891 	}
892 
893 	if (!nr_pids) {
894 		/* Cleared the list of pids */
895 		trace_pid_list_free(pid_list);
896 		pid_list = NULL;
897 	}
898 
899 	*new_pid_list = pid_list;
900 
901 	return read;
902 }
903 
904 static u64 buffer_ftrace_now(struct array_buffer *buf, int cpu)
905 {
906 	u64 ts;
907 
908 	/* Early boot up does not have a buffer yet */
909 	if (!buf->buffer)
910 		return trace_clock_local();
911 
912 	ts = ring_buffer_time_stamp(buf->buffer);
913 	ring_buffer_normalize_time_stamp(buf->buffer, cpu, &ts);
914 
915 	return ts;
916 }
917 
918 u64 ftrace_now(int cpu)
919 {
920 	return buffer_ftrace_now(&global_trace.array_buffer, cpu);
921 }
922 
923 /**
924  * tracing_is_enabled - Show if global_trace has been enabled
925  *
926  * Shows if the global trace has been enabled or not. It uses the
927  * mirror flag "buffer_disabled" to be used in fast paths such as for
928  * the irqsoff tracer. But it may be inaccurate due to races. If you
929  * need to know the accurate state, use tracing_is_on() which is a little
930  * slower, but accurate.
931  */
932 int tracing_is_enabled(void)
933 {
934 	/*
935 	 * For quick access (irqsoff uses this in fast path), just
936 	 * return the mirror variable of the state of the ring buffer.
937 	 * It's a little racy, but we don't really care.
938 	 */
939 	smp_rmb();
940 	return !global_trace.buffer_disabled;
941 }
942 
943 /*
944  * trace_buf_size is the size in bytes that is allocated
945  * for a buffer. Note, the number of bytes is always rounded
946  * to page size.
947  *
948  * This number is purposely set to a low number of 16384.
949  * If the dump on oops happens, it will be much appreciated
950  * to not have to wait for all that output. Anyway this can be
951  * boot time and run time configurable.
952  */
953 #define TRACE_BUF_SIZE_DEFAULT	1441792UL /* 16384 * 88 (sizeof(entry)) */
954 
955 static unsigned long		trace_buf_size = TRACE_BUF_SIZE_DEFAULT;
956 
957 /* trace_types holds a link list of available tracers. */
958 static struct tracer		*trace_types __read_mostly;
959 
960 /*
961  * trace_types_lock is used to protect the trace_types list.
962  */
963 DEFINE_MUTEX(trace_types_lock);
964 
965 /*
966  * serialize the access of the ring buffer
967  *
968  * ring buffer serializes readers, but it is low level protection.
969  * The validity of the events (which returns by ring_buffer_peek() ..etc)
970  * are not protected by ring buffer.
971  *
972  * The content of events may become garbage if we allow other process consumes
973  * these events concurrently:
974  *   A) the page of the consumed events may become a normal page
975  *      (not reader page) in ring buffer, and this page will be rewritten
976  *      by events producer.
977  *   B) The page of the consumed events may become a page for splice_read,
978  *      and this page will be returned to system.
979  *
980  * These primitives allow multi process access to different cpu ring buffer
981  * concurrently.
982  *
983  * These primitives don't distinguish read-only and read-consume access.
984  * Multi read-only access are also serialized.
985  */
986 
987 #ifdef CONFIG_SMP
988 static DECLARE_RWSEM(all_cpu_access_lock);
989 static DEFINE_PER_CPU(struct mutex, cpu_access_lock);
990 
991 static inline void trace_access_lock(int cpu)
992 {
993 	if (cpu == RING_BUFFER_ALL_CPUS) {
994 		/* gain it for accessing the whole ring buffer. */
995 		down_write(&all_cpu_access_lock);
996 	} else {
997 		/* gain it for accessing a cpu ring buffer. */
998 
999 		/* Firstly block other trace_access_lock(RING_BUFFER_ALL_CPUS). */
1000 		down_read(&all_cpu_access_lock);
1001 
1002 		/* Secondly block other access to this @cpu ring buffer. */
1003 		mutex_lock(&per_cpu(cpu_access_lock, cpu));
1004 	}
1005 }
1006 
1007 static inline void trace_access_unlock(int cpu)
1008 {
1009 	if (cpu == RING_BUFFER_ALL_CPUS) {
1010 		up_write(&all_cpu_access_lock);
1011 	} else {
1012 		mutex_unlock(&per_cpu(cpu_access_lock, cpu));
1013 		up_read(&all_cpu_access_lock);
1014 	}
1015 }
1016 
1017 static inline void trace_access_lock_init(void)
1018 {
1019 	int cpu;
1020 
1021 	for_each_possible_cpu(cpu)
1022 		mutex_init(&per_cpu(cpu_access_lock, cpu));
1023 }
1024 
1025 #else
1026 
1027 static DEFINE_MUTEX(access_lock);
1028 
1029 static inline void trace_access_lock(int cpu)
1030 {
1031 	(void)cpu;
1032 	mutex_lock(&access_lock);
1033 }
1034 
1035 static inline void trace_access_unlock(int cpu)
1036 {
1037 	(void)cpu;
1038 	mutex_unlock(&access_lock);
1039 }
1040 
1041 static inline void trace_access_lock_init(void)
1042 {
1043 }
1044 
1045 #endif
1046 
1047 #ifdef CONFIG_STACKTRACE
1048 static void __ftrace_trace_stack(struct trace_array *tr,
1049 				 struct trace_buffer *buffer,
1050 				 unsigned int trace_ctx,
1051 				 int skip, struct pt_regs *regs);
1052 static inline void ftrace_trace_stack(struct trace_array *tr,
1053 				      struct trace_buffer *buffer,
1054 				      unsigned int trace_ctx,
1055 				      int skip, struct pt_regs *regs);
1056 
1057 #else
1058 static inline void __ftrace_trace_stack(struct trace_array *tr,
1059 					struct trace_buffer *buffer,
1060 					unsigned int trace_ctx,
1061 					int skip, struct pt_regs *regs)
1062 {
1063 }
1064 static inline void ftrace_trace_stack(struct trace_array *tr,
1065 				      struct trace_buffer *buffer,
1066 				      unsigned long trace_ctx,
1067 				      int skip, struct pt_regs *regs)
1068 {
1069 }
1070 
1071 #endif
1072 
1073 static __always_inline void
1074 trace_event_setup(struct ring_buffer_event *event,
1075 		  int type, unsigned int trace_ctx)
1076 {
1077 	struct trace_entry *ent = ring_buffer_event_data(event);
1078 
1079 	tracing_generic_entry_update(ent, type, trace_ctx);
1080 }
1081 
1082 static __always_inline struct ring_buffer_event *
1083 __trace_buffer_lock_reserve(struct trace_buffer *buffer,
1084 			  int type,
1085 			  unsigned long len,
1086 			  unsigned int trace_ctx)
1087 {
1088 	struct ring_buffer_event *event;
1089 
1090 	event = ring_buffer_lock_reserve(buffer, len);
1091 	if (event != NULL)
1092 		trace_event_setup(event, type, trace_ctx);
1093 
1094 	return event;
1095 }
1096 
1097 void tracer_tracing_on(struct trace_array *tr)
1098 {
1099 	if (tr->array_buffer.buffer)
1100 		ring_buffer_record_on(tr->array_buffer.buffer);
1101 	/*
1102 	 * This flag is looked at when buffers haven't been allocated
1103 	 * yet, or by some tracers (like irqsoff), that just want to
1104 	 * know if the ring buffer has been disabled, but it can handle
1105 	 * races of where it gets disabled but we still do a record.
1106 	 * As the check is in the fast path of the tracers, it is more
1107 	 * important to be fast than accurate.
1108 	 */
1109 	tr->buffer_disabled = 0;
1110 	/* Make the flag seen by readers */
1111 	smp_wmb();
1112 }
1113 
1114 /**
1115  * tracing_on - enable tracing buffers
1116  *
1117  * This function enables tracing buffers that may have been
1118  * disabled with tracing_off.
1119  */
1120 void tracing_on(void)
1121 {
1122 	tracer_tracing_on(&global_trace);
1123 }
1124 EXPORT_SYMBOL_GPL(tracing_on);
1125 
1126 
1127 static __always_inline void
1128 __buffer_unlock_commit(struct trace_buffer *buffer, struct ring_buffer_event *event)
1129 {
1130 	__this_cpu_write(trace_taskinfo_save, true);
1131 
1132 	/* If this is the temp buffer, we need to commit fully */
1133 	if (this_cpu_read(trace_buffered_event) == event) {
1134 		/* Length is in event->array[0] */
1135 		ring_buffer_write(buffer, event->array[0], &event->array[1]);
1136 		/* Release the temp buffer */
1137 		this_cpu_dec(trace_buffered_event_cnt);
1138 		/* ring_buffer_unlock_commit() enables preemption */
1139 		preempt_enable_notrace();
1140 	} else
1141 		ring_buffer_unlock_commit(buffer);
1142 }
1143 
1144 int __trace_array_puts(struct trace_array *tr, unsigned long ip,
1145 		       const char *str, int size)
1146 {
1147 	struct ring_buffer_event *event;
1148 	struct trace_buffer *buffer;
1149 	struct print_entry *entry;
1150 	unsigned int trace_ctx;
1151 	int alloc;
1152 
1153 	if (!(tr->trace_flags & TRACE_ITER_PRINTK))
1154 		return 0;
1155 
1156 	if (unlikely(tracing_selftest_running && tr == &global_trace))
1157 		return 0;
1158 
1159 	if (unlikely(tracing_disabled))
1160 		return 0;
1161 
1162 	alloc = sizeof(*entry) + size + 2; /* possible \n added */
1163 
1164 	trace_ctx = tracing_gen_ctx();
1165 	buffer = tr->array_buffer.buffer;
1166 	ring_buffer_nest_start(buffer);
1167 	event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, alloc,
1168 					    trace_ctx);
1169 	if (!event) {
1170 		size = 0;
1171 		goto out;
1172 	}
1173 
1174 	entry = ring_buffer_event_data(event);
1175 	entry->ip = ip;
1176 
1177 	memcpy(&entry->buf, str, size);
1178 
1179 	/* Add a newline if necessary */
1180 	if (entry->buf[size - 1] != '\n') {
1181 		entry->buf[size] = '\n';
1182 		entry->buf[size + 1] = '\0';
1183 	} else
1184 		entry->buf[size] = '\0';
1185 
1186 	__buffer_unlock_commit(buffer, event);
1187 	ftrace_trace_stack(tr, buffer, trace_ctx, 4, NULL);
1188  out:
1189 	ring_buffer_nest_end(buffer);
1190 	return size;
1191 }
1192 EXPORT_SYMBOL_GPL(__trace_array_puts);
1193 
1194 /**
1195  * __trace_puts - write a constant string into the trace buffer.
1196  * @ip:	   The address of the caller
1197  * @str:   The constant string to write
1198  * @size:  The size of the string.
1199  */
1200 int __trace_puts(unsigned long ip, const char *str, int size)
1201 {
1202 	return __trace_array_puts(printk_trace, ip, str, size);
1203 }
1204 EXPORT_SYMBOL_GPL(__trace_puts);
1205 
1206 /**
1207  * __trace_bputs - write the pointer to a constant string into trace buffer
1208  * @ip:	   The address of the caller
1209  * @str:   The constant string to write to the buffer to
1210  */
1211 int __trace_bputs(unsigned long ip, const char *str)
1212 {
1213 	struct trace_array *tr = READ_ONCE(printk_trace);
1214 	struct ring_buffer_event *event;
1215 	struct trace_buffer *buffer;
1216 	struct bputs_entry *entry;
1217 	unsigned int trace_ctx;
1218 	int size = sizeof(struct bputs_entry);
1219 	int ret = 0;
1220 
1221 	if (!printk_binsafe(tr))
1222 		return __trace_puts(ip, str, strlen(str));
1223 
1224 	if (!(tr->trace_flags & TRACE_ITER_PRINTK))
1225 		return 0;
1226 
1227 	if (unlikely(tracing_selftest_running || tracing_disabled))
1228 		return 0;
1229 
1230 	trace_ctx = tracing_gen_ctx();
1231 	buffer = tr->array_buffer.buffer;
1232 
1233 	ring_buffer_nest_start(buffer);
1234 	event = __trace_buffer_lock_reserve(buffer, TRACE_BPUTS, size,
1235 					    trace_ctx);
1236 	if (!event)
1237 		goto out;
1238 
1239 	entry = ring_buffer_event_data(event);
1240 	entry->ip			= ip;
1241 	entry->str			= str;
1242 
1243 	__buffer_unlock_commit(buffer, event);
1244 	ftrace_trace_stack(tr, buffer, trace_ctx, 4, NULL);
1245 
1246 	ret = 1;
1247  out:
1248 	ring_buffer_nest_end(buffer);
1249 	return ret;
1250 }
1251 EXPORT_SYMBOL_GPL(__trace_bputs);
1252 
1253 #ifdef CONFIG_TRACER_SNAPSHOT
1254 static void tracing_snapshot_instance_cond(struct trace_array *tr,
1255 					   void *cond_data)
1256 {
1257 	struct tracer *tracer = tr->current_trace;
1258 	unsigned long flags;
1259 
1260 	if (in_nmi()) {
1261 		trace_array_puts(tr, "*** SNAPSHOT CALLED FROM NMI CONTEXT ***\n");
1262 		trace_array_puts(tr, "*** snapshot is being ignored        ***\n");
1263 		return;
1264 	}
1265 
1266 	if (!tr->allocated_snapshot) {
1267 		trace_array_puts(tr, "*** SNAPSHOT NOT ALLOCATED ***\n");
1268 		trace_array_puts(tr, "*** stopping trace here!   ***\n");
1269 		tracer_tracing_off(tr);
1270 		return;
1271 	}
1272 
1273 	/* Note, snapshot can not be used when the tracer uses it */
1274 	if (tracer->use_max_tr) {
1275 		trace_array_puts(tr, "*** LATENCY TRACER ACTIVE ***\n");
1276 		trace_array_puts(tr, "*** Can not use snapshot (sorry) ***\n");
1277 		return;
1278 	}
1279 
1280 	if (tr->mapped) {
1281 		trace_array_puts(tr, "*** BUFFER MEMORY MAPPED ***\n");
1282 		trace_array_puts(tr, "*** Can not use snapshot (sorry) ***\n");
1283 		return;
1284 	}
1285 
1286 	local_irq_save(flags);
1287 	update_max_tr(tr, current, smp_processor_id(), cond_data);
1288 	local_irq_restore(flags);
1289 }
1290 
1291 void tracing_snapshot_instance(struct trace_array *tr)
1292 {
1293 	tracing_snapshot_instance_cond(tr, NULL);
1294 }
1295 
1296 /**
1297  * tracing_snapshot - take a snapshot of the current buffer.
1298  *
1299  * This causes a swap between the snapshot buffer and the current live
1300  * tracing buffer. You can use this to take snapshots of the live
1301  * trace when some condition is triggered, but continue to trace.
1302  *
1303  * Note, make sure to allocate the snapshot with either
1304  * a tracing_snapshot_alloc(), or by doing it manually
1305  * with: echo 1 > /sys/kernel/tracing/snapshot
1306  *
1307  * If the snapshot buffer is not allocated, it will stop tracing.
1308  * Basically making a permanent snapshot.
1309  */
1310 void tracing_snapshot(void)
1311 {
1312 	struct trace_array *tr = &global_trace;
1313 
1314 	tracing_snapshot_instance(tr);
1315 }
1316 EXPORT_SYMBOL_GPL(tracing_snapshot);
1317 
1318 /**
1319  * tracing_snapshot_cond - conditionally take a snapshot of the current buffer.
1320  * @tr:		The tracing instance to snapshot
1321  * @cond_data:	The data to be tested conditionally, and possibly saved
1322  *
1323  * This is the same as tracing_snapshot() except that the snapshot is
1324  * conditional - the snapshot will only happen if the
1325  * cond_snapshot.update() implementation receiving the cond_data
1326  * returns true, which means that the trace array's cond_snapshot
1327  * update() operation used the cond_data to determine whether the
1328  * snapshot should be taken, and if it was, presumably saved it along
1329  * with the snapshot.
1330  */
1331 void tracing_snapshot_cond(struct trace_array *tr, void *cond_data)
1332 {
1333 	tracing_snapshot_instance_cond(tr, cond_data);
1334 }
1335 EXPORT_SYMBOL_GPL(tracing_snapshot_cond);
1336 
1337 /**
1338  * tracing_cond_snapshot_data - get the user data associated with a snapshot
1339  * @tr:		The tracing instance
1340  *
1341  * When the user enables a conditional snapshot using
1342  * tracing_snapshot_cond_enable(), the user-defined cond_data is saved
1343  * with the snapshot.  This accessor is used to retrieve it.
1344  *
1345  * Should not be called from cond_snapshot.update(), since it takes
1346  * the tr->max_lock lock, which the code calling
1347  * cond_snapshot.update() has already done.
1348  *
1349  * Returns the cond_data associated with the trace array's snapshot.
1350  */
1351 void *tracing_cond_snapshot_data(struct trace_array *tr)
1352 {
1353 	void *cond_data = NULL;
1354 
1355 	local_irq_disable();
1356 	arch_spin_lock(&tr->max_lock);
1357 
1358 	if (tr->cond_snapshot)
1359 		cond_data = tr->cond_snapshot->cond_data;
1360 
1361 	arch_spin_unlock(&tr->max_lock);
1362 	local_irq_enable();
1363 
1364 	return cond_data;
1365 }
1366 EXPORT_SYMBOL_GPL(tracing_cond_snapshot_data);
1367 
1368 static int resize_buffer_duplicate_size(struct array_buffer *trace_buf,
1369 					struct array_buffer *size_buf, int cpu_id);
1370 static void set_buffer_entries(struct array_buffer *buf, unsigned long val);
1371 
1372 int tracing_alloc_snapshot_instance(struct trace_array *tr)
1373 {
1374 	int order;
1375 	int ret;
1376 
1377 	if (!tr->allocated_snapshot) {
1378 
1379 		/* Make the snapshot buffer have the same order as main buffer */
1380 		order = ring_buffer_subbuf_order_get(tr->array_buffer.buffer);
1381 		ret = ring_buffer_subbuf_order_set(tr->max_buffer.buffer, order);
1382 		if (ret < 0)
1383 			return ret;
1384 
1385 		/* allocate spare buffer */
1386 		ret = resize_buffer_duplicate_size(&tr->max_buffer,
1387 				   &tr->array_buffer, RING_BUFFER_ALL_CPUS);
1388 		if (ret < 0)
1389 			return ret;
1390 
1391 		tr->allocated_snapshot = true;
1392 	}
1393 
1394 	return 0;
1395 }
1396 
1397 static void free_snapshot(struct trace_array *tr)
1398 {
1399 	/*
1400 	 * We don't free the ring buffer. instead, resize it because
1401 	 * The max_tr ring buffer has some state (e.g. ring->clock) and
1402 	 * we want preserve it.
1403 	 */
1404 	ring_buffer_subbuf_order_set(tr->max_buffer.buffer, 0);
1405 	ring_buffer_resize(tr->max_buffer.buffer, 1, RING_BUFFER_ALL_CPUS);
1406 	set_buffer_entries(&tr->max_buffer, 1);
1407 	tracing_reset_online_cpus(&tr->max_buffer);
1408 	tr->allocated_snapshot = false;
1409 }
1410 
1411 static int tracing_arm_snapshot_locked(struct trace_array *tr)
1412 {
1413 	int ret;
1414 
1415 	lockdep_assert_held(&trace_types_lock);
1416 
1417 	spin_lock(&tr->snapshot_trigger_lock);
1418 	if (tr->snapshot == UINT_MAX || tr->mapped) {
1419 		spin_unlock(&tr->snapshot_trigger_lock);
1420 		return -EBUSY;
1421 	}
1422 
1423 	tr->snapshot++;
1424 	spin_unlock(&tr->snapshot_trigger_lock);
1425 
1426 	ret = tracing_alloc_snapshot_instance(tr);
1427 	if (ret) {
1428 		spin_lock(&tr->snapshot_trigger_lock);
1429 		tr->snapshot--;
1430 		spin_unlock(&tr->snapshot_trigger_lock);
1431 	}
1432 
1433 	return ret;
1434 }
1435 
1436 int tracing_arm_snapshot(struct trace_array *tr)
1437 {
1438 	int ret;
1439 
1440 	mutex_lock(&trace_types_lock);
1441 	ret = tracing_arm_snapshot_locked(tr);
1442 	mutex_unlock(&trace_types_lock);
1443 
1444 	return ret;
1445 }
1446 
1447 void tracing_disarm_snapshot(struct trace_array *tr)
1448 {
1449 	spin_lock(&tr->snapshot_trigger_lock);
1450 	if (!WARN_ON(!tr->snapshot))
1451 		tr->snapshot--;
1452 	spin_unlock(&tr->snapshot_trigger_lock);
1453 }
1454 
1455 /**
1456  * tracing_alloc_snapshot - allocate snapshot buffer.
1457  *
1458  * This only allocates the snapshot buffer if it isn't already
1459  * allocated - it doesn't also take a snapshot.
1460  *
1461  * This is meant to be used in cases where the snapshot buffer needs
1462  * to be set up for events that can't sleep but need to be able to
1463  * trigger a snapshot.
1464  */
1465 int tracing_alloc_snapshot(void)
1466 {
1467 	struct trace_array *tr = &global_trace;
1468 	int ret;
1469 
1470 	ret = tracing_alloc_snapshot_instance(tr);
1471 	WARN_ON(ret < 0);
1472 
1473 	return ret;
1474 }
1475 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
1476 
1477 /**
1478  * tracing_snapshot_alloc - allocate and take a snapshot of the current buffer.
1479  *
1480  * This is similar to tracing_snapshot(), but it will allocate the
1481  * snapshot buffer if it isn't already allocated. Use this only
1482  * where it is safe to sleep, as the allocation may sleep.
1483  *
1484  * This causes a swap between the snapshot buffer and the current live
1485  * tracing buffer. You can use this to take snapshots of the live
1486  * trace when some condition is triggered, but continue to trace.
1487  */
1488 void tracing_snapshot_alloc(void)
1489 {
1490 	int ret;
1491 
1492 	ret = tracing_alloc_snapshot();
1493 	if (ret < 0)
1494 		return;
1495 
1496 	tracing_snapshot();
1497 }
1498 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
1499 
1500 /**
1501  * tracing_snapshot_cond_enable - enable conditional snapshot for an instance
1502  * @tr:		The tracing instance
1503  * @cond_data:	User data to associate with the snapshot
1504  * @update:	Implementation of the cond_snapshot update function
1505  *
1506  * Check whether the conditional snapshot for the given instance has
1507  * already been enabled, or if the current tracer is already using a
1508  * snapshot; if so, return -EBUSY, else create a cond_snapshot and
1509  * save the cond_data and update function inside.
1510  *
1511  * Returns 0 if successful, error otherwise.
1512  */
1513 int tracing_snapshot_cond_enable(struct trace_array *tr, void *cond_data,
1514 				 cond_update_fn_t update)
1515 {
1516 	struct cond_snapshot *cond_snapshot __free(kfree) =
1517 		kzalloc(sizeof(*cond_snapshot), GFP_KERNEL);
1518 	int ret;
1519 
1520 	if (!cond_snapshot)
1521 		return -ENOMEM;
1522 
1523 	cond_snapshot->cond_data = cond_data;
1524 	cond_snapshot->update = update;
1525 
1526 	guard(mutex)(&trace_types_lock);
1527 
1528 	if (tr->current_trace->use_max_tr)
1529 		return -EBUSY;
1530 
1531 	/*
1532 	 * The cond_snapshot can only change to NULL without the
1533 	 * trace_types_lock. We don't care if we race with it going
1534 	 * to NULL, but we want to make sure that it's not set to
1535 	 * something other than NULL when we get here, which we can
1536 	 * do safely with only holding the trace_types_lock and not
1537 	 * having to take the max_lock.
1538 	 */
1539 	if (tr->cond_snapshot)
1540 		return -EBUSY;
1541 
1542 	ret = tracing_arm_snapshot_locked(tr);
1543 	if (ret)
1544 		return ret;
1545 
1546 	local_irq_disable();
1547 	arch_spin_lock(&tr->max_lock);
1548 	tr->cond_snapshot = no_free_ptr(cond_snapshot);
1549 	arch_spin_unlock(&tr->max_lock);
1550 	local_irq_enable();
1551 
1552 	return 0;
1553 }
1554 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_enable);
1555 
1556 /**
1557  * tracing_snapshot_cond_disable - disable conditional snapshot for an instance
1558  * @tr:		The tracing instance
1559  *
1560  * Check whether the conditional snapshot for the given instance is
1561  * enabled; if so, free the cond_snapshot associated with it,
1562  * otherwise return -EINVAL.
1563  *
1564  * Returns 0 if successful, error otherwise.
1565  */
1566 int tracing_snapshot_cond_disable(struct trace_array *tr)
1567 {
1568 	int ret = 0;
1569 
1570 	local_irq_disable();
1571 	arch_spin_lock(&tr->max_lock);
1572 
1573 	if (!tr->cond_snapshot)
1574 		ret = -EINVAL;
1575 	else {
1576 		kfree(tr->cond_snapshot);
1577 		tr->cond_snapshot = NULL;
1578 	}
1579 
1580 	arch_spin_unlock(&tr->max_lock);
1581 	local_irq_enable();
1582 
1583 	tracing_disarm_snapshot(tr);
1584 
1585 	return ret;
1586 }
1587 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_disable);
1588 #else
1589 void tracing_snapshot(void)
1590 {
1591 	WARN_ONCE(1, "Snapshot feature not enabled, but internal snapshot used");
1592 }
1593 EXPORT_SYMBOL_GPL(tracing_snapshot);
1594 void tracing_snapshot_cond(struct trace_array *tr, void *cond_data)
1595 {
1596 	WARN_ONCE(1, "Snapshot feature not enabled, but internal conditional snapshot used");
1597 }
1598 EXPORT_SYMBOL_GPL(tracing_snapshot_cond);
1599 int tracing_alloc_snapshot(void)
1600 {
1601 	WARN_ONCE(1, "Snapshot feature not enabled, but snapshot allocation used");
1602 	return -ENODEV;
1603 }
1604 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
1605 void tracing_snapshot_alloc(void)
1606 {
1607 	/* Give warning */
1608 	tracing_snapshot();
1609 }
1610 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
1611 void *tracing_cond_snapshot_data(struct trace_array *tr)
1612 {
1613 	return NULL;
1614 }
1615 EXPORT_SYMBOL_GPL(tracing_cond_snapshot_data);
1616 int tracing_snapshot_cond_enable(struct trace_array *tr, void *cond_data, cond_update_fn_t update)
1617 {
1618 	return -ENODEV;
1619 }
1620 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_enable);
1621 int tracing_snapshot_cond_disable(struct trace_array *tr)
1622 {
1623 	return false;
1624 }
1625 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_disable);
1626 #define free_snapshot(tr)	do { } while (0)
1627 #define tracing_arm_snapshot_locked(tr) ({ -EBUSY; })
1628 #endif /* CONFIG_TRACER_SNAPSHOT */
1629 
1630 void tracer_tracing_off(struct trace_array *tr)
1631 {
1632 	if (tr->array_buffer.buffer)
1633 		ring_buffer_record_off(tr->array_buffer.buffer);
1634 	/*
1635 	 * This flag is looked at when buffers haven't been allocated
1636 	 * yet, or by some tracers (like irqsoff), that just want to
1637 	 * know if the ring buffer has been disabled, but it can handle
1638 	 * races of where it gets disabled but we still do a record.
1639 	 * As the check is in the fast path of the tracers, it is more
1640 	 * important to be fast than accurate.
1641 	 */
1642 	tr->buffer_disabled = 1;
1643 	/* Make the flag seen by readers */
1644 	smp_wmb();
1645 }
1646 
1647 /**
1648  * tracer_tracing_disable() - temporary disable the buffer from write
1649  * @tr: The trace array to disable its buffer for
1650  *
1651  * Expects trace_tracing_enable() to re-enable tracing.
1652  * The difference between this and tracer_tracing_off() is that this
1653  * is a counter and can nest, whereas, tracer_tracing_off() can
1654  * be called multiple times and a single trace_tracing_on() will
1655  * enable it.
1656  */
1657 void tracer_tracing_disable(struct trace_array *tr)
1658 {
1659 	if (WARN_ON_ONCE(!tr->array_buffer.buffer))
1660 		return;
1661 
1662 	ring_buffer_record_disable(tr->array_buffer.buffer);
1663 }
1664 
1665 /**
1666  * tracer_tracing_enable() - counter part of tracer_tracing_disable()
1667  * @tr: The trace array that had tracer_tracincg_disable() called on it
1668  *
1669  * This is called after tracer_tracing_disable() has been called on @tr,
1670  * when it's safe to re-enable tracing.
1671  */
1672 void tracer_tracing_enable(struct trace_array *tr)
1673 {
1674 	if (WARN_ON_ONCE(!tr->array_buffer.buffer))
1675 		return;
1676 
1677 	ring_buffer_record_enable(tr->array_buffer.buffer);
1678 }
1679 
1680 /**
1681  * tracing_off - turn off tracing buffers
1682  *
1683  * This function stops the tracing buffers from recording data.
1684  * It does not disable any overhead the tracers themselves may
1685  * be causing. This function simply causes all recording to
1686  * the ring buffers to fail.
1687  */
1688 void tracing_off(void)
1689 {
1690 	tracer_tracing_off(&global_trace);
1691 }
1692 EXPORT_SYMBOL_GPL(tracing_off);
1693 
1694 void disable_trace_on_warning(void)
1695 {
1696 	if (__disable_trace_on_warning) {
1697 		trace_array_printk_buf(global_trace.array_buffer.buffer, _THIS_IP_,
1698 			"Disabling tracing due to warning\n");
1699 		tracing_off();
1700 	}
1701 }
1702 
1703 /**
1704  * tracer_tracing_is_on - show real state of ring buffer enabled
1705  * @tr : the trace array to know if ring buffer is enabled
1706  *
1707  * Shows real state of the ring buffer if it is enabled or not.
1708  */
1709 bool tracer_tracing_is_on(struct trace_array *tr)
1710 {
1711 	if (tr->array_buffer.buffer)
1712 		return ring_buffer_record_is_set_on(tr->array_buffer.buffer);
1713 	return !tr->buffer_disabled;
1714 }
1715 
1716 /**
1717  * tracing_is_on - show state of ring buffers enabled
1718  */
1719 int tracing_is_on(void)
1720 {
1721 	return tracer_tracing_is_on(&global_trace);
1722 }
1723 EXPORT_SYMBOL_GPL(tracing_is_on);
1724 
1725 static int __init set_buf_size(char *str)
1726 {
1727 	unsigned long buf_size;
1728 
1729 	if (!str)
1730 		return 0;
1731 	buf_size = memparse(str, &str);
1732 	/*
1733 	 * nr_entries can not be zero and the startup
1734 	 * tests require some buffer space. Therefore
1735 	 * ensure we have at least 4096 bytes of buffer.
1736 	 */
1737 	trace_buf_size = max(4096UL, buf_size);
1738 	return 1;
1739 }
1740 __setup("trace_buf_size=", set_buf_size);
1741 
1742 static int __init set_tracing_thresh(char *str)
1743 {
1744 	unsigned long threshold;
1745 	int ret;
1746 
1747 	if (!str)
1748 		return 0;
1749 	ret = kstrtoul(str, 0, &threshold);
1750 	if (ret < 0)
1751 		return 0;
1752 	tracing_thresh = threshold * 1000;
1753 	return 1;
1754 }
1755 __setup("tracing_thresh=", set_tracing_thresh);
1756 
1757 unsigned long nsecs_to_usecs(unsigned long nsecs)
1758 {
1759 	return nsecs / 1000;
1760 }
1761 
1762 /*
1763  * TRACE_FLAGS is defined as a tuple matching bit masks with strings.
1764  * It uses C(a, b) where 'a' is the eval (enum) name and 'b' is the string that
1765  * matches it. By defining "C(a, b) b", TRACE_FLAGS becomes a list
1766  * of strings in the order that the evals (enum) were defined.
1767  */
1768 #undef C
1769 #define C(a, b) b
1770 
1771 /* These must match the bit positions in trace_iterator_flags */
1772 static const char *trace_options[] = {
1773 	TRACE_FLAGS
1774 	NULL
1775 };
1776 
1777 static struct {
1778 	u64 (*func)(void);
1779 	const char *name;
1780 	int in_ns;		/* is this clock in nanoseconds? */
1781 } trace_clocks[] = {
1782 	{ trace_clock_local,		"local",	1 },
1783 	{ trace_clock_global,		"global",	1 },
1784 	{ trace_clock_counter,		"counter",	0 },
1785 	{ trace_clock_jiffies,		"uptime",	0 },
1786 	{ trace_clock,			"perf",		1 },
1787 	{ ktime_get_mono_fast_ns,	"mono",		1 },
1788 	{ ktime_get_raw_fast_ns,	"mono_raw",	1 },
1789 	{ ktime_get_boot_fast_ns,	"boot",		1 },
1790 	{ ktime_get_tai_fast_ns,	"tai",		1 },
1791 	ARCH_TRACE_CLOCKS
1792 };
1793 
1794 bool trace_clock_in_ns(struct trace_array *tr)
1795 {
1796 	if (trace_clocks[tr->clock_id].in_ns)
1797 		return true;
1798 
1799 	return false;
1800 }
1801 
1802 /*
1803  * trace_parser_get_init - gets the buffer for trace parser
1804  */
1805 int trace_parser_get_init(struct trace_parser *parser, int size)
1806 {
1807 	memset(parser, 0, sizeof(*parser));
1808 
1809 	parser->buffer = kmalloc(size, GFP_KERNEL);
1810 	if (!parser->buffer)
1811 		return 1;
1812 
1813 	parser->size = size;
1814 	return 0;
1815 }
1816 
1817 /*
1818  * trace_parser_put - frees the buffer for trace parser
1819  */
1820 void trace_parser_put(struct trace_parser *parser)
1821 {
1822 	kfree(parser->buffer);
1823 	parser->buffer = NULL;
1824 }
1825 
1826 /*
1827  * trace_get_user - reads the user input string separated by  space
1828  * (matched by isspace(ch))
1829  *
1830  * For each string found the 'struct trace_parser' is updated,
1831  * and the function returns.
1832  *
1833  * Returns number of bytes read.
1834  *
1835  * See kernel/trace/trace.h for 'struct trace_parser' details.
1836  */
1837 int trace_get_user(struct trace_parser *parser, const char __user *ubuf,
1838 	size_t cnt, loff_t *ppos)
1839 {
1840 	char ch;
1841 	size_t read = 0;
1842 	ssize_t ret;
1843 
1844 	if (!*ppos)
1845 		trace_parser_clear(parser);
1846 
1847 	ret = get_user(ch, ubuf++);
1848 	if (ret)
1849 		goto out;
1850 
1851 	read++;
1852 	cnt--;
1853 
1854 	/*
1855 	 * The parser is not finished with the last write,
1856 	 * continue reading the user input without skipping spaces.
1857 	 */
1858 	if (!parser->cont) {
1859 		/* skip white space */
1860 		while (cnt && isspace(ch)) {
1861 			ret = get_user(ch, ubuf++);
1862 			if (ret)
1863 				goto out;
1864 			read++;
1865 			cnt--;
1866 		}
1867 
1868 		parser->idx = 0;
1869 
1870 		/* only spaces were written */
1871 		if (isspace(ch) || !ch) {
1872 			*ppos += read;
1873 			ret = read;
1874 			goto out;
1875 		}
1876 	}
1877 
1878 	/* read the non-space input */
1879 	while (cnt && !isspace(ch) && ch) {
1880 		if (parser->idx < parser->size - 1)
1881 			parser->buffer[parser->idx++] = ch;
1882 		else {
1883 			ret = -EINVAL;
1884 			goto out;
1885 		}
1886 		ret = get_user(ch, ubuf++);
1887 		if (ret)
1888 			goto out;
1889 		read++;
1890 		cnt--;
1891 	}
1892 
1893 	/* We either got finished input or we have to wait for another call. */
1894 	if (isspace(ch) || !ch) {
1895 		parser->buffer[parser->idx] = 0;
1896 		parser->cont = false;
1897 	} else if (parser->idx < parser->size - 1) {
1898 		parser->cont = true;
1899 		parser->buffer[parser->idx++] = ch;
1900 		/* Make sure the parsed string always terminates with '\0'. */
1901 		parser->buffer[parser->idx] = 0;
1902 	} else {
1903 		ret = -EINVAL;
1904 		goto out;
1905 	}
1906 
1907 	*ppos += read;
1908 	ret = read;
1909 
1910 out:
1911 	return ret;
1912 }
1913 
1914 /* TODO add a seq_buf_to_buffer() */
1915 static ssize_t trace_seq_to_buffer(struct trace_seq *s, void *buf, size_t cnt)
1916 {
1917 	int len;
1918 
1919 	if (trace_seq_used(s) <= s->readpos)
1920 		return -EBUSY;
1921 
1922 	len = trace_seq_used(s) - s->readpos;
1923 	if (cnt > len)
1924 		cnt = len;
1925 	memcpy(buf, s->buffer + s->readpos, cnt);
1926 
1927 	s->readpos += cnt;
1928 	return cnt;
1929 }
1930 
1931 unsigned long __read_mostly	tracing_thresh;
1932 
1933 #ifdef CONFIG_TRACER_MAX_TRACE
1934 static const struct file_operations tracing_max_lat_fops;
1935 
1936 #ifdef LATENCY_FS_NOTIFY
1937 
1938 static struct workqueue_struct *fsnotify_wq;
1939 
1940 static void latency_fsnotify_workfn(struct work_struct *work)
1941 {
1942 	struct trace_array *tr = container_of(work, struct trace_array,
1943 					      fsnotify_work);
1944 	fsnotify_inode(tr->d_max_latency->d_inode, FS_MODIFY);
1945 }
1946 
1947 static void latency_fsnotify_workfn_irq(struct irq_work *iwork)
1948 {
1949 	struct trace_array *tr = container_of(iwork, struct trace_array,
1950 					      fsnotify_irqwork);
1951 	queue_work(fsnotify_wq, &tr->fsnotify_work);
1952 }
1953 
1954 static void trace_create_maxlat_file(struct trace_array *tr,
1955 				     struct dentry *d_tracer)
1956 {
1957 	INIT_WORK(&tr->fsnotify_work, latency_fsnotify_workfn);
1958 	init_irq_work(&tr->fsnotify_irqwork, latency_fsnotify_workfn_irq);
1959 	tr->d_max_latency = trace_create_file("tracing_max_latency",
1960 					      TRACE_MODE_WRITE,
1961 					      d_tracer, tr,
1962 					      &tracing_max_lat_fops);
1963 }
1964 
1965 __init static int latency_fsnotify_init(void)
1966 {
1967 	fsnotify_wq = alloc_workqueue("tr_max_lat_wq",
1968 				      WQ_UNBOUND | WQ_HIGHPRI, 0);
1969 	if (!fsnotify_wq) {
1970 		pr_err("Unable to allocate tr_max_lat_wq\n");
1971 		return -ENOMEM;
1972 	}
1973 	return 0;
1974 }
1975 
1976 late_initcall_sync(latency_fsnotify_init);
1977 
1978 void latency_fsnotify(struct trace_array *tr)
1979 {
1980 	if (!fsnotify_wq)
1981 		return;
1982 	/*
1983 	 * We cannot call queue_work(&tr->fsnotify_work) from here because it's
1984 	 * possible that we are called from __schedule() or do_idle(), which
1985 	 * could cause a deadlock.
1986 	 */
1987 	irq_work_queue(&tr->fsnotify_irqwork);
1988 }
1989 
1990 #else /* !LATENCY_FS_NOTIFY */
1991 
1992 #define trace_create_maxlat_file(tr, d_tracer)				\
1993 	trace_create_file("tracing_max_latency", TRACE_MODE_WRITE,	\
1994 			  d_tracer, tr, &tracing_max_lat_fops)
1995 
1996 #endif
1997 
1998 /*
1999  * Copy the new maximum trace into the separate maximum-trace
2000  * structure. (this way the maximum trace is permanently saved,
2001  * for later retrieval via /sys/kernel/tracing/tracing_max_latency)
2002  */
2003 static void
2004 __update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
2005 {
2006 	struct array_buffer *trace_buf = &tr->array_buffer;
2007 	struct array_buffer *max_buf = &tr->max_buffer;
2008 	struct trace_array_cpu *data = per_cpu_ptr(trace_buf->data, cpu);
2009 	struct trace_array_cpu *max_data = per_cpu_ptr(max_buf->data, cpu);
2010 
2011 	max_buf->cpu = cpu;
2012 	max_buf->time_start = data->preempt_timestamp;
2013 
2014 	max_data->saved_latency = tr->max_latency;
2015 	max_data->critical_start = data->critical_start;
2016 	max_data->critical_end = data->critical_end;
2017 
2018 	strscpy(max_data->comm, tsk->comm);
2019 	max_data->pid = tsk->pid;
2020 	/*
2021 	 * If tsk == current, then use current_uid(), as that does not use
2022 	 * RCU. The irq tracer can be called out of RCU scope.
2023 	 */
2024 	if (tsk == current)
2025 		max_data->uid = current_uid();
2026 	else
2027 		max_data->uid = task_uid(tsk);
2028 
2029 	max_data->nice = tsk->static_prio - 20 - MAX_RT_PRIO;
2030 	max_data->policy = tsk->policy;
2031 	max_data->rt_priority = tsk->rt_priority;
2032 
2033 	/* record this tasks comm */
2034 	tracing_record_cmdline(tsk);
2035 	latency_fsnotify(tr);
2036 }
2037 
2038 /**
2039  * update_max_tr - snapshot all trace buffers from global_trace to max_tr
2040  * @tr: tracer
2041  * @tsk: the task with the latency
2042  * @cpu: The cpu that initiated the trace.
2043  * @cond_data: User data associated with a conditional snapshot
2044  *
2045  * Flip the buffers between the @tr and the max_tr and record information
2046  * about which task was the cause of this latency.
2047  */
2048 void
2049 update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu,
2050 	      void *cond_data)
2051 {
2052 	if (tr->stop_count)
2053 		return;
2054 
2055 	WARN_ON_ONCE(!irqs_disabled());
2056 
2057 	if (!tr->allocated_snapshot) {
2058 		/* Only the nop tracer should hit this when disabling */
2059 		WARN_ON_ONCE(tr->current_trace != &nop_trace);
2060 		return;
2061 	}
2062 
2063 	arch_spin_lock(&tr->max_lock);
2064 
2065 	/* Inherit the recordable setting from array_buffer */
2066 	if (ring_buffer_record_is_set_on(tr->array_buffer.buffer))
2067 		ring_buffer_record_on(tr->max_buffer.buffer);
2068 	else
2069 		ring_buffer_record_off(tr->max_buffer.buffer);
2070 
2071 #ifdef CONFIG_TRACER_SNAPSHOT
2072 	if (tr->cond_snapshot && !tr->cond_snapshot->update(tr, cond_data)) {
2073 		arch_spin_unlock(&tr->max_lock);
2074 		return;
2075 	}
2076 #endif
2077 	swap(tr->array_buffer.buffer, tr->max_buffer.buffer);
2078 
2079 	__update_max_tr(tr, tsk, cpu);
2080 
2081 	arch_spin_unlock(&tr->max_lock);
2082 
2083 	/* Any waiters on the old snapshot buffer need to wake up */
2084 	ring_buffer_wake_waiters(tr->array_buffer.buffer, RING_BUFFER_ALL_CPUS);
2085 }
2086 
2087 /**
2088  * update_max_tr_single - only copy one trace over, and reset the rest
2089  * @tr: tracer
2090  * @tsk: task with the latency
2091  * @cpu: the cpu of the buffer to copy.
2092  *
2093  * Flip the trace of a single CPU buffer between the @tr and the max_tr.
2094  */
2095 void
2096 update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu)
2097 {
2098 	int ret;
2099 
2100 	if (tr->stop_count)
2101 		return;
2102 
2103 	WARN_ON_ONCE(!irqs_disabled());
2104 	if (!tr->allocated_snapshot) {
2105 		/* Only the nop tracer should hit this when disabling */
2106 		WARN_ON_ONCE(tr->current_trace != &nop_trace);
2107 		return;
2108 	}
2109 
2110 	arch_spin_lock(&tr->max_lock);
2111 
2112 	ret = ring_buffer_swap_cpu(tr->max_buffer.buffer, tr->array_buffer.buffer, cpu);
2113 
2114 	if (ret == -EBUSY) {
2115 		/*
2116 		 * We failed to swap the buffer due to a commit taking
2117 		 * place on this CPU. We fail to record, but we reset
2118 		 * the max trace buffer (no one writes directly to it)
2119 		 * and flag that it failed.
2120 		 * Another reason is resize is in progress.
2121 		 */
2122 		trace_array_printk_buf(tr->max_buffer.buffer, _THIS_IP_,
2123 			"Failed to swap buffers due to commit or resize in progress\n");
2124 	}
2125 
2126 	WARN_ON_ONCE(ret && ret != -EAGAIN && ret != -EBUSY);
2127 
2128 	__update_max_tr(tr, tsk, cpu);
2129 	arch_spin_unlock(&tr->max_lock);
2130 }
2131 
2132 #endif /* CONFIG_TRACER_MAX_TRACE */
2133 
2134 struct pipe_wait {
2135 	struct trace_iterator		*iter;
2136 	int				wait_index;
2137 };
2138 
2139 static bool wait_pipe_cond(void *data)
2140 {
2141 	struct pipe_wait *pwait = data;
2142 	struct trace_iterator *iter = pwait->iter;
2143 
2144 	if (atomic_read_acquire(&iter->wait_index) != pwait->wait_index)
2145 		return true;
2146 
2147 	return iter->closed;
2148 }
2149 
2150 static int wait_on_pipe(struct trace_iterator *iter, int full)
2151 {
2152 	struct pipe_wait pwait;
2153 	int ret;
2154 
2155 	/* Iterators are static, they should be filled or empty */
2156 	if (trace_buffer_iter(iter, iter->cpu_file))
2157 		return 0;
2158 
2159 	pwait.wait_index = atomic_read_acquire(&iter->wait_index);
2160 	pwait.iter = iter;
2161 
2162 	ret = ring_buffer_wait(iter->array_buffer->buffer, iter->cpu_file, full,
2163 			       wait_pipe_cond, &pwait);
2164 
2165 #ifdef CONFIG_TRACER_MAX_TRACE
2166 	/*
2167 	 * Make sure this is still the snapshot buffer, as if a snapshot were
2168 	 * to happen, this would now be the main buffer.
2169 	 */
2170 	if (iter->snapshot)
2171 		iter->array_buffer = &iter->tr->max_buffer;
2172 #endif
2173 	return ret;
2174 }
2175 
2176 #ifdef CONFIG_FTRACE_STARTUP_TEST
2177 static bool selftests_can_run;
2178 
2179 struct trace_selftests {
2180 	struct list_head		list;
2181 	struct tracer			*type;
2182 };
2183 
2184 static LIST_HEAD(postponed_selftests);
2185 
2186 static int save_selftest(struct tracer *type)
2187 {
2188 	struct trace_selftests *selftest;
2189 
2190 	selftest = kmalloc(sizeof(*selftest), GFP_KERNEL);
2191 	if (!selftest)
2192 		return -ENOMEM;
2193 
2194 	selftest->type = type;
2195 	list_add(&selftest->list, &postponed_selftests);
2196 	return 0;
2197 }
2198 
2199 static int run_tracer_selftest(struct tracer *type)
2200 {
2201 	struct trace_array *tr = &global_trace;
2202 	struct tracer *saved_tracer = tr->current_trace;
2203 	int ret;
2204 
2205 	if (!type->selftest || tracing_selftest_disabled)
2206 		return 0;
2207 
2208 	/*
2209 	 * If a tracer registers early in boot up (before scheduling is
2210 	 * initialized and such), then do not run its selftests yet.
2211 	 * Instead, run it a little later in the boot process.
2212 	 */
2213 	if (!selftests_can_run)
2214 		return save_selftest(type);
2215 
2216 	if (!tracing_is_on()) {
2217 		pr_warn("Selftest for tracer %s skipped due to tracing disabled\n",
2218 			type->name);
2219 		return 0;
2220 	}
2221 
2222 	/*
2223 	 * Run a selftest on this tracer.
2224 	 * Here we reset the trace buffer, and set the current
2225 	 * tracer to be this tracer. The tracer can then run some
2226 	 * internal tracing to verify that everything is in order.
2227 	 * If we fail, we do not register this tracer.
2228 	 */
2229 	tracing_reset_online_cpus(&tr->array_buffer);
2230 
2231 	tr->current_trace = type;
2232 
2233 #ifdef CONFIG_TRACER_MAX_TRACE
2234 	if (type->use_max_tr) {
2235 		/* If we expanded the buffers, make sure the max is expanded too */
2236 		if (tr->ring_buffer_expanded)
2237 			ring_buffer_resize(tr->max_buffer.buffer, trace_buf_size,
2238 					   RING_BUFFER_ALL_CPUS);
2239 		tr->allocated_snapshot = true;
2240 	}
2241 #endif
2242 
2243 	/* the test is responsible for initializing and enabling */
2244 	pr_info("Testing tracer %s: ", type->name);
2245 	ret = type->selftest(type, tr);
2246 	/* the test is responsible for resetting too */
2247 	tr->current_trace = saved_tracer;
2248 	if (ret) {
2249 		printk(KERN_CONT "FAILED!\n");
2250 		/* Add the warning after printing 'FAILED' */
2251 		WARN_ON(1);
2252 		return -1;
2253 	}
2254 	/* Only reset on passing, to avoid touching corrupted buffers */
2255 	tracing_reset_online_cpus(&tr->array_buffer);
2256 
2257 #ifdef CONFIG_TRACER_MAX_TRACE
2258 	if (type->use_max_tr) {
2259 		tr->allocated_snapshot = false;
2260 
2261 		/* Shrink the max buffer again */
2262 		if (tr->ring_buffer_expanded)
2263 			ring_buffer_resize(tr->max_buffer.buffer, 1,
2264 					   RING_BUFFER_ALL_CPUS);
2265 	}
2266 #endif
2267 
2268 	printk(KERN_CONT "PASSED\n");
2269 	return 0;
2270 }
2271 
2272 static int do_run_tracer_selftest(struct tracer *type)
2273 {
2274 	int ret;
2275 
2276 	/*
2277 	 * Tests can take a long time, especially if they are run one after the
2278 	 * other, as does happen during bootup when all the tracers are
2279 	 * registered. This could cause the soft lockup watchdog to trigger.
2280 	 */
2281 	cond_resched();
2282 
2283 	tracing_selftest_running = true;
2284 	ret = run_tracer_selftest(type);
2285 	tracing_selftest_running = false;
2286 
2287 	return ret;
2288 }
2289 
2290 static __init int init_trace_selftests(void)
2291 {
2292 	struct trace_selftests *p, *n;
2293 	struct tracer *t, **last;
2294 	int ret;
2295 
2296 	selftests_can_run = true;
2297 
2298 	guard(mutex)(&trace_types_lock);
2299 
2300 	if (list_empty(&postponed_selftests))
2301 		return 0;
2302 
2303 	pr_info("Running postponed tracer tests:\n");
2304 
2305 	tracing_selftest_running = true;
2306 	list_for_each_entry_safe(p, n, &postponed_selftests, list) {
2307 		/* This loop can take minutes when sanitizers are enabled, so
2308 		 * lets make sure we allow RCU processing.
2309 		 */
2310 		cond_resched();
2311 		ret = run_tracer_selftest(p->type);
2312 		/* If the test fails, then warn and remove from available_tracers */
2313 		if (ret < 0) {
2314 			WARN(1, "tracer: %s failed selftest, disabling\n",
2315 			     p->type->name);
2316 			last = &trace_types;
2317 			for (t = trace_types; t; t = t->next) {
2318 				if (t == p->type) {
2319 					*last = t->next;
2320 					break;
2321 				}
2322 				last = &t->next;
2323 			}
2324 		}
2325 		list_del(&p->list);
2326 		kfree(p);
2327 	}
2328 	tracing_selftest_running = false;
2329 
2330 	return 0;
2331 }
2332 core_initcall(init_trace_selftests);
2333 #else
2334 static inline int do_run_tracer_selftest(struct tracer *type)
2335 {
2336 	return 0;
2337 }
2338 #endif /* CONFIG_FTRACE_STARTUP_TEST */
2339 
2340 static void add_tracer_options(struct trace_array *tr, struct tracer *t);
2341 
2342 static void __init apply_trace_boot_options(void);
2343 
2344 /**
2345  * register_tracer - register a tracer with the ftrace system.
2346  * @type: the plugin for the tracer
2347  *
2348  * Register a new plugin tracer.
2349  */
2350 int __init register_tracer(struct tracer *type)
2351 {
2352 	struct tracer *t;
2353 	int ret = 0;
2354 
2355 	if (!type->name) {
2356 		pr_info("Tracer must have a name\n");
2357 		return -1;
2358 	}
2359 
2360 	if (strlen(type->name) >= MAX_TRACER_SIZE) {
2361 		pr_info("Tracer has a name longer than %d\n", MAX_TRACER_SIZE);
2362 		return -1;
2363 	}
2364 
2365 	if (security_locked_down(LOCKDOWN_TRACEFS)) {
2366 		pr_warn("Can not register tracer %s due to lockdown\n",
2367 			   type->name);
2368 		return -EPERM;
2369 	}
2370 
2371 	mutex_lock(&trace_types_lock);
2372 
2373 	for (t = trace_types; t; t = t->next) {
2374 		if (strcmp(type->name, t->name) == 0) {
2375 			/* already found */
2376 			pr_info("Tracer %s already registered\n",
2377 				type->name);
2378 			ret = -1;
2379 			goto out;
2380 		}
2381 	}
2382 
2383 	if (!type->set_flag)
2384 		type->set_flag = &dummy_set_flag;
2385 	if (!type->flags) {
2386 		/*allocate a dummy tracer_flags*/
2387 		type->flags = kmalloc(sizeof(*type->flags), GFP_KERNEL);
2388 		if (!type->flags) {
2389 			ret = -ENOMEM;
2390 			goto out;
2391 		}
2392 		type->flags->val = 0;
2393 		type->flags->opts = dummy_tracer_opt;
2394 	} else
2395 		if (!type->flags->opts)
2396 			type->flags->opts = dummy_tracer_opt;
2397 
2398 	/* store the tracer for __set_tracer_option */
2399 	type->flags->trace = type;
2400 
2401 	ret = do_run_tracer_selftest(type);
2402 	if (ret < 0)
2403 		goto out;
2404 
2405 	type->next = trace_types;
2406 	trace_types = type;
2407 	add_tracer_options(&global_trace, type);
2408 
2409  out:
2410 	mutex_unlock(&trace_types_lock);
2411 
2412 	if (ret || !default_bootup_tracer)
2413 		goto out_unlock;
2414 
2415 	if (strncmp(default_bootup_tracer, type->name, MAX_TRACER_SIZE))
2416 		goto out_unlock;
2417 
2418 	printk(KERN_INFO "Starting tracer '%s'\n", type->name);
2419 	/* Do we want this tracer to start on bootup? */
2420 	tracing_set_tracer(&global_trace, type->name);
2421 	default_bootup_tracer = NULL;
2422 
2423 	apply_trace_boot_options();
2424 
2425 	/* disable other selftests, since this will break it. */
2426 	disable_tracing_selftest("running a tracer");
2427 
2428  out_unlock:
2429 	return ret;
2430 }
2431 
2432 static void tracing_reset_cpu(struct array_buffer *buf, int cpu)
2433 {
2434 	struct trace_buffer *buffer = buf->buffer;
2435 
2436 	if (!buffer)
2437 		return;
2438 
2439 	ring_buffer_record_disable(buffer);
2440 
2441 	/* Make sure all commits have finished */
2442 	synchronize_rcu();
2443 	ring_buffer_reset_cpu(buffer, cpu);
2444 
2445 	ring_buffer_record_enable(buffer);
2446 }
2447 
2448 void tracing_reset_online_cpus(struct array_buffer *buf)
2449 {
2450 	struct trace_buffer *buffer = buf->buffer;
2451 
2452 	if (!buffer)
2453 		return;
2454 
2455 	ring_buffer_record_disable(buffer);
2456 
2457 	/* Make sure all commits have finished */
2458 	synchronize_rcu();
2459 
2460 	buf->time_start = buffer_ftrace_now(buf, buf->cpu);
2461 
2462 	ring_buffer_reset_online_cpus(buffer);
2463 
2464 	ring_buffer_record_enable(buffer);
2465 }
2466 
2467 static void tracing_reset_all_cpus(struct array_buffer *buf)
2468 {
2469 	struct trace_buffer *buffer = buf->buffer;
2470 
2471 	if (!buffer)
2472 		return;
2473 
2474 	ring_buffer_record_disable(buffer);
2475 
2476 	/* Make sure all commits have finished */
2477 	synchronize_rcu();
2478 
2479 	buf->time_start = buffer_ftrace_now(buf, buf->cpu);
2480 
2481 	ring_buffer_reset(buffer);
2482 
2483 	ring_buffer_record_enable(buffer);
2484 }
2485 
2486 /* Must have trace_types_lock held */
2487 void tracing_reset_all_online_cpus_unlocked(void)
2488 {
2489 	struct trace_array *tr;
2490 
2491 	lockdep_assert_held(&trace_types_lock);
2492 
2493 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
2494 		if (!tr->clear_trace)
2495 			continue;
2496 		tr->clear_trace = false;
2497 		tracing_reset_online_cpus(&tr->array_buffer);
2498 #ifdef CONFIG_TRACER_MAX_TRACE
2499 		tracing_reset_online_cpus(&tr->max_buffer);
2500 #endif
2501 	}
2502 }
2503 
2504 void tracing_reset_all_online_cpus(void)
2505 {
2506 	mutex_lock(&trace_types_lock);
2507 	tracing_reset_all_online_cpus_unlocked();
2508 	mutex_unlock(&trace_types_lock);
2509 }
2510 
2511 int is_tracing_stopped(void)
2512 {
2513 	return global_trace.stop_count;
2514 }
2515 
2516 static void tracing_start_tr(struct trace_array *tr)
2517 {
2518 	struct trace_buffer *buffer;
2519 	unsigned long flags;
2520 
2521 	if (tracing_disabled)
2522 		return;
2523 
2524 	raw_spin_lock_irqsave(&tr->start_lock, flags);
2525 	if (--tr->stop_count) {
2526 		if (WARN_ON_ONCE(tr->stop_count < 0)) {
2527 			/* Someone screwed up their debugging */
2528 			tr->stop_count = 0;
2529 		}
2530 		goto out;
2531 	}
2532 
2533 	/* Prevent the buffers from switching */
2534 	arch_spin_lock(&tr->max_lock);
2535 
2536 	buffer = tr->array_buffer.buffer;
2537 	if (buffer)
2538 		ring_buffer_record_enable(buffer);
2539 
2540 #ifdef CONFIG_TRACER_MAX_TRACE
2541 	buffer = tr->max_buffer.buffer;
2542 	if (buffer)
2543 		ring_buffer_record_enable(buffer);
2544 #endif
2545 
2546 	arch_spin_unlock(&tr->max_lock);
2547 
2548  out:
2549 	raw_spin_unlock_irqrestore(&tr->start_lock, flags);
2550 }
2551 
2552 /**
2553  * tracing_start - quick start of the tracer
2554  *
2555  * If tracing is enabled but was stopped by tracing_stop,
2556  * this will start the tracer back up.
2557  */
2558 void tracing_start(void)
2559 
2560 {
2561 	return tracing_start_tr(&global_trace);
2562 }
2563 
2564 static void tracing_stop_tr(struct trace_array *tr)
2565 {
2566 	struct trace_buffer *buffer;
2567 	unsigned long flags;
2568 
2569 	raw_spin_lock_irqsave(&tr->start_lock, flags);
2570 	if (tr->stop_count++)
2571 		goto out;
2572 
2573 	/* Prevent the buffers from switching */
2574 	arch_spin_lock(&tr->max_lock);
2575 
2576 	buffer = tr->array_buffer.buffer;
2577 	if (buffer)
2578 		ring_buffer_record_disable(buffer);
2579 
2580 #ifdef CONFIG_TRACER_MAX_TRACE
2581 	buffer = tr->max_buffer.buffer;
2582 	if (buffer)
2583 		ring_buffer_record_disable(buffer);
2584 #endif
2585 
2586 	arch_spin_unlock(&tr->max_lock);
2587 
2588  out:
2589 	raw_spin_unlock_irqrestore(&tr->start_lock, flags);
2590 }
2591 
2592 /**
2593  * tracing_stop - quick stop of the tracer
2594  *
2595  * Light weight way to stop tracing. Use in conjunction with
2596  * tracing_start.
2597  */
2598 void tracing_stop(void)
2599 {
2600 	return tracing_stop_tr(&global_trace);
2601 }
2602 
2603 /*
2604  * Several functions return TRACE_TYPE_PARTIAL_LINE if the trace_seq
2605  * overflowed, and TRACE_TYPE_HANDLED otherwise. This helper function
2606  * simplifies those functions and keeps them in sync.
2607  */
2608 enum print_line_t trace_handle_return(struct trace_seq *s)
2609 {
2610 	return trace_seq_has_overflowed(s) ?
2611 		TRACE_TYPE_PARTIAL_LINE : TRACE_TYPE_HANDLED;
2612 }
2613 EXPORT_SYMBOL_GPL(trace_handle_return);
2614 
2615 static unsigned short migration_disable_value(void)
2616 {
2617 #if defined(CONFIG_SMP)
2618 	return current->migration_disabled;
2619 #else
2620 	return 0;
2621 #endif
2622 }
2623 
2624 unsigned int tracing_gen_ctx_irq_test(unsigned int irqs_status)
2625 {
2626 	unsigned int trace_flags = irqs_status;
2627 	unsigned int pc;
2628 
2629 	pc = preempt_count();
2630 
2631 	if (pc & NMI_MASK)
2632 		trace_flags |= TRACE_FLAG_NMI;
2633 	if (pc & HARDIRQ_MASK)
2634 		trace_flags |= TRACE_FLAG_HARDIRQ;
2635 	if (in_serving_softirq())
2636 		trace_flags |= TRACE_FLAG_SOFTIRQ;
2637 	if (softirq_count() >> (SOFTIRQ_SHIFT + 1))
2638 		trace_flags |= TRACE_FLAG_BH_OFF;
2639 
2640 	if (tif_need_resched())
2641 		trace_flags |= TRACE_FLAG_NEED_RESCHED;
2642 	if (test_preempt_need_resched())
2643 		trace_flags |= TRACE_FLAG_PREEMPT_RESCHED;
2644 	if (IS_ENABLED(CONFIG_ARCH_HAS_PREEMPT_LAZY) && tif_test_bit(TIF_NEED_RESCHED_LAZY))
2645 		trace_flags |= TRACE_FLAG_NEED_RESCHED_LAZY;
2646 	return (trace_flags << 16) | (min_t(unsigned int, pc & 0xff, 0xf)) |
2647 		(min_t(unsigned int, migration_disable_value(), 0xf)) << 4;
2648 }
2649 
2650 struct ring_buffer_event *
2651 trace_buffer_lock_reserve(struct trace_buffer *buffer,
2652 			  int type,
2653 			  unsigned long len,
2654 			  unsigned int trace_ctx)
2655 {
2656 	return __trace_buffer_lock_reserve(buffer, type, len, trace_ctx);
2657 }
2658 
2659 DEFINE_PER_CPU(struct ring_buffer_event *, trace_buffered_event);
2660 DEFINE_PER_CPU(int, trace_buffered_event_cnt);
2661 static int trace_buffered_event_ref;
2662 
2663 /**
2664  * trace_buffered_event_enable - enable buffering events
2665  *
2666  * When events are being filtered, it is quicker to use a temporary
2667  * buffer to write the event data into if there's a likely chance
2668  * that it will not be committed. The discard of the ring buffer
2669  * is not as fast as committing, and is much slower than copying
2670  * a commit.
2671  *
2672  * When an event is to be filtered, allocate per cpu buffers to
2673  * write the event data into, and if the event is filtered and discarded
2674  * it is simply dropped, otherwise, the entire data is to be committed
2675  * in one shot.
2676  */
2677 void trace_buffered_event_enable(void)
2678 {
2679 	struct ring_buffer_event *event;
2680 	struct page *page;
2681 	int cpu;
2682 
2683 	WARN_ON_ONCE(!mutex_is_locked(&event_mutex));
2684 
2685 	if (trace_buffered_event_ref++)
2686 		return;
2687 
2688 	for_each_tracing_cpu(cpu) {
2689 		page = alloc_pages_node(cpu_to_node(cpu),
2690 					GFP_KERNEL | __GFP_NORETRY, 0);
2691 		/* This is just an optimization and can handle failures */
2692 		if (!page) {
2693 			pr_err("Failed to allocate event buffer\n");
2694 			break;
2695 		}
2696 
2697 		event = page_address(page);
2698 		memset(event, 0, sizeof(*event));
2699 
2700 		per_cpu(trace_buffered_event, cpu) = event;
2701 
2702 		preempt_disable();
2703 		if (cpu == smp_processor_id() &&
2704 		    __this_cpu_read(trace_buffered_event) !=
2705 		    per_cpu(trace_buffered_event, cpu))
2706 			WARN_ON_ONCE(1);
2707 		preempt_enable();
2708 	}
2709 }
2710 
2711 static void enable_trace_buffered_event(void *data)
2712 {
2713 	/* Probably not needed, but do it anyway */
2714 	smp_rmb();
2715 	this_cpu_dec(trace_buffered_event_cnt);
2716 }
2717 
2718 static void disable_trace_buffered_event(void *data)
2719 {
2720 	this_cpu_inc(trace_buffered_event_cnt);
2721 }
2722 
2723 /**
2724  * trace_buffered_event_disable - disable buffering events
2725  *
2726  * When a filter is removed, it is faster to not use the buffered
2727  * events, and to commit directly into the ring buffer. Free up
2728  * the temp buffers when there are no more users. This requires
2729  * special synchronization with current events.
2730  */
2731 void trace_buffered_event_disable(void)
2732 {
2733 	int cpu;
2734 
2735 	WARN_ON_ONCE(!mutex_is_locked(&event_mutex));
2736 
2737 	if (WARN_ON_ONCE(!trace_buffered_event_ref))
2738 		return;
2739 
2740 	if (--trace_buffered_event_ref)
2741 		return;
2742 
2743 	/* For each CPU, set the buffer as used. */
2744 	on_each_cpu_mask(tracing_buffer_mask, disable_trace_buffered_event,
2745 			 NULL, true);
2746 
2747 	/* Wait for all current users to finish */
2748 	synchronize_rcu();
2749 
2750 	for_each_tracing_cpu(cpu) {
2751 		free_page((unsigned long)per_cpu(trace_buffered_event, cpu));
2752 		per_cpu(trace_buffered_event, cpu) = NULL;
2753 	}
2754 
2755 	/*
2756 	 * Wait for all CPUs that potentially started checking if they can use
2757 	 * their event buffer only after the previous synchronize_rcu() call and
2758 	 * they still read a valid pointer from trace_buffered_event. It must be
2759 	 * ensured they don't see cleared trace_buffered_event_cnt else they
2760 	 * could wrongly decide to use the pointed-to buffer which is now freed.
2761 	 */
2762 	synchronize_rcu();
2763 
2764 	/* For each CPU, relinquish the buffer */
2765 	on_each_cpu_mask(tracing_buffer_mask, enable_trace_buffered_event, NULL,
2766 			 true);
2767 }
2768 
2769 static struct trace_buffer *temp_buffer;
2770 
2771 struct ring_buffer_event *
2772 trace_event_buffer_lock_reserve(struct trace_buffer **current_rb,
2773 			  struct trace_event_file *trace_file,
2774 			  int type, unsigned long len,
2775 			  unsigned int trace_ctx)
2776 {
2777 	struct ring_buffer_event *entry;
2778 	struct trace_array *tr = trace_file->tr;
2779 	int val;
2780 
2781 	*current_rb = tr->array_buffer.buffer;
2782 
2783 	if (!tr->no_filter_buffering_ref &&
2784 	    (trace_file->flags & (EVENT_FILE_FL_SOFT_DISABLED | EVENT_FILE_FL_FILTERED))) {
2785 		preempt_disable_notrace();
2786 		/*
2787 		 * Filtering is on, so try to use the per cpu buffer first.
2788 		 * This buffer will simulate a ring_buffer_event,
2789 		 * where the type_len is zero and the array[0] will
2790 		 * hold the full length.
2791 		 * (see include/linux/ring-buffer.h for details on
2792 		 *  how the ring_buffer_event is structured).
2793 		 *
2794 		 * Using a temp buffer during filtering and copying it
2795 		 * on a matched filter is quicker than writing directly
2796 		 * into the ring buffer and then discarding it when
2797 		 * it doesn't match. That is because the discard
2798 		 * requires several atomic operations to get right.
2799 		 * Copying on match and doing nothing on a failed match
2800 		 * is still quicker than no copy on match, but having
2801 		 * to discard out of the ring buffer on a failed match.
2802 		 */
2803 		if ((entry = __this_cpu_read(trace_buffered_event))) {
2804 			int max_len = PAGE_SIZE - struct_size(entry, array, 1);
2805 
2806 			val = this_cpu_inc_return(trace_buffered_event_cnt);
2807 
2808 			/*
2809 			 * Preemption is disabled, but interrupts and NMIs
2810 			 * can still come in now. If that happens after
2811 			 * the above increment, then it will have to go
2812 			 * back to the old method of allocating the event
2813 			 * on the ring buffer, and if the filter fails, it
2814 			 * will have to call ring_buffer_discard_commit()
2815 			 * to remove it.
2816 			 *
2817 			 * Need to also check the unlikely case that the
2818 			 * length is bigger than the temp buffer size.
2819 			 * If that happens, then the reserve is pretty much
2820 			 * guaranteed to fail, as the ring buffer currently
2821 			 * only allows events less than a page. But that may
2822 			 * change in the future, so let the ring buffer reserve
2823 			 * handle the failure in that case.
2824 			 */
2825 			if (val == 1 && likely(len <= max_len)) {
2826 				trace_event_setup(entry, type, trace_ctx);
2827 				entry->array[0] = len;
2828 				/* Return with preemption disabled */
2829 				return entry;
2830 			}
2831 			this_cpu_dec(trace_buffered_event_cnt);
2832 		}
2833 		/* __trace_buffer_lock_reserve() disables preemption */
2834 		preempt_enable_notrace();
2835 	}
2836 
2837 	entry = __trace_buffer_lock_reserve(*current_rb, type, len,
2838 					    trace_ctx);
2839 	/*
2840 	 * If tracing is off, but we have triggers enabled
2841 	 * we still need to look at the event data. Use the temp_buffer
2842 	 * to store the trace event for the trigger to use. It's recursive
2843 	 * safe and will not be recorded anywhere.
2844 	 */
2845 	if (!entry && trace_file->flags & EVENT_FILE_FL_TRIGGER_COND) {
2846 		*current_rb = temp_buffer;
2847 		entry = __trace_buffer_lock_reserve(*current_rb, type, len,
2848 						    trace_ctx);
2849 	}
2850 	return entry;
2851 }
2852 EXPORT_SYMBOL_GPL(trace_event_buffer_lock_reserve);
2853 
2854 static DEFINE_RAW_SPINLOCK(tracepoint_iter_lock);
2855 static DEFINE_MUTEX(tracepoint_printk_mutex);
2856 
2857 static void output_printk(struct trace_event_buffer *fbuffer)
2858 {
2859 	struct trace_event_call *event_call;
2860 	struct trace_event_file *file;
2861 	struct trace_event *event;
2862 	unsigned long flags;
2863 	struct trace_iterator *iter = tracepoint_print_iter;
2864 
2865 	/* We should never get here if iter is NULL */
2866 	if (WARN_ON_ONCE(!iter))
2867 		return;
2868 
2869 	event_call = fbuffer->trace_file->event_call;
2870 	if (!event_call || !event_call->event.funcs ||
2871 	    !event_call->event.funcs->trace)
2872 		return;
2873 
2874 	file = fbuffer->trace_file;
2875 	if (test_bit(EVENT_FILE_FL_SOFT_DISABLED_BIT, &file->flags) ||
2876 	    (unlikely(file->flags & EVENT_FILE_FL_FILTERED) &&
2877 	     !filter_match_preds(file->filter, fbuffer->entry)))
2878 		return;
2879 
2880 	event = &fbuffer->trace_file->event_call->event;
2881 
2882 	raw_spin_lock_irqsave(&tracepoint_iter_lock, flags);
2883 	trace_seq_init(&iter->seq);
2884 	iter->ent = fbuffer->entry;
2885 	event_call->event.funcs->trace(iter, 0, event);
2886 	trace_seq_putc(&iter->seq, 0);
2887 	printk("%s", iter->seq.buffer);
2888 
2889 	raw_spin_unlock_irqrestore(&tracepoint_iter_lock, flags);
2890 }
2891 
2892 int tracepoint_printk_sysctl(const struct ctl_table *table, int write,
2893 			     void *buffer, size_t *lenp,
2894 			     loff_t *ppos)
2895 {
2896 	int save_tracepoint_printk;
2897 	int ret;
2898 
2899 	guard(mutex)(&tracepoint_printk_mutex);
2900 	save_tracepoint_printk = tracepoint_printk;
2901 
2902 	ret = proc_dointvec(table, write, buffer, lenp, ppos);
2903 
2904 	/*
2905 	 * This will force exiting early, as tracepoint_printk
2906 	 * is always zero when tracepoint_printk_iter is not allocated
2907 	 */
2908 	if (!tracepoint_print_iter)
2909 		tracepoint_printk = 0;
2910 
2911 	if (save_tracepoint_printk == tracepoint_printk)
2912 		return ret;
2913 
2914 	if (tracepoint_printk)
2915 		static_key_enable(&tracepoint_printk_key.key);
2916 	else
2917 		static_key_disable(&tracepoint_printk_key.key);
2918 
2919 	return ret;
2920 }
2921 
2922 void trace_event_buffer_commit(struct trace_event_buffer *fbuffer)
2923 {
2924 	enum event_trigger_type tt = ETT_NONE;
2925 	struct trace_event_file *file = fbuffer->trace_file;
2926 
2927 	if (__event_trigger_test_discard(file, fbuffer->buffer, fbuffer->event,
2928 			fbuffer->entry, &tt))
2929 		goto discard;
2930 
2931 	if (static_key_false(&tracepoint_printk_key.key))
2932 		output_printk(fbuffer);
2933 
2934 	if (static_branch_unlikely(&trace_event_exports_enabled))
2935 		ftrace_exports(fbuffer->event, TRACE_EXPORT_EVENT);
2936 
2937 	trace_buffer_unlock_commit_regs(file->tr, fbuffer->buffer,
2938 			fbuffer->event, fbuffer->trace_ctx, fbuffer->regs);
2939 
2940 discard:
2941 	if (tt)
2942 		event_triggers_post_call(file, tt);
2943 
2944 }
2945 EXPORT_SYMBOL_GPL(trace_event_buffer_commit);
2946 
2947 /*
2948  * Skip 3:
2949  *
2950  *   trace_buffer_unlock_commit_regs()
2951  *   trace_event_buffer_commit()
2952  *   trace_event_raw_event_xxx()
2953  */
2954 # define STACK_SKIP 3
2955 
2956 void trace_buffer_unlock_commit_regs(struct trace_array *tr,
2957 				     struct trace_buffer *buffer,
2958 				     struct ring_buffer_event *event,
2959 				     unsigned int trace_ctx,
2960 				     struct pt_regs *regs)
2961 {
2962 	__buffer_unlock_commit(buffer, event);
2963 
2964 	/*
2965 	 * If regs is not set, then skip the necessary functions.
2966 	 * Note, we can still get here via blktrace, wakeup tracer
2967 	 * and mmiotrace, but that's ok if they lose a function or
2968 	 * two. They are not that meaningful.
2969 	 */
2970 	ftrace_trace_stack(tr, buffer, trace_ctx, regs ? 0 : STACK_SKIP, regs);
2971 	ftrace_trace_userstack(tr, buffer, trace_ctx);
2972 }
2973 
2974 /*
2975  * Similar to trace_buffer_unlock_commit_regs() but do not dump stack.
2976  */
2977 void
2978 trace_buffer_unlock_commit_nostack(struct trace_buffer *buffer,
2979 				   struct ring_buffer_event *event)
2980 {
2981 	__buffer_unlock_commit(buffer, event);
2982 }
2983 
2984 void
2985 trace_function(struct trace_array *tr, unsigned long ip, unsigned long
2986 	       parent_ip, unsigned int trace_ctx, struct ftrace_regs *fregs)
2987 {
2988 	struct trace_buffer *buffer = tr->array_buffer.buffer;
2989 	struct ring_buffer_event *event;
2990 	struct ftrace_entry *entry;
2991 	int size = sizeof(*entry);
2992 
2993 	size += FTRACE_REGS_MAX_ARGS * !!fregs * sizeof(long);
2994 
2995 	event = __trace_buffer_lock_reserve(buffer, TRACE_FN, size,
2996 					    trace_ctx);
2997 	if (!event)
2998 		return;
2999 	entry	= ring_buffer_event_data(event);
3000 	entry->ip			= ip;
3001 	entry->parent_ip		= parent_ip;
3002 
3003 #ifdef CONFIG_HAVE_FUNCTION_ARG_ACCESS_API
3004 	if (fregs) {
3005 		for (int i = 0; i < FTRACE_REGS_MAX_ARGS; i++)
3006 			entry->args[i] = ftrace_regs_get_argument(fregs, i);
3007 	}
3008 #endif
3009 
3010 	if (static_branch_unlikely(&trace_function_exports_enabled))
3011 		ftrace_exports(event, TRACE_EXPORT_FUNCTION);
3012 	__buffer_unlock_commit(buffer, event);
3013 }
3014 
3015 #ifdef CONFIG_STACKTRACE
3016 
3017 /* Allow 4 levels of nesting: normal, softirq, irq, NMI */
3018 #define FTRACE_KSTACK_NESTING	4
3019 
3020 #define FTRACE_KSTACK_ENTRIES	(SZ_4K / FTRACE_KSTACK_NESTING)
3021 
3022 struct ftrace_stack {
3023 	unsigned long		calls[FTRACE_KSTACK_ENTRIES];
3024 };
3025 
3026 
3027 struct ftrace_stacks {
3028 	struct ftrace_stack	stacks[FTRACE_KSTACK_NESTING];
3029 };
3030 
3031 static DEFINE_PER_CPU(struct ftrace_stacks, ftrace_stacks);
3032 static DEFINE_PER_CPU(int, ftrace_stack_reserve);
3033 
3034 static void __ftrace_trace_stack(struct trace_array *tr,
3035 				 struct trace_buffer *buffer,
3036 				 unsigned int trace_ctx,
3037 				 int skip, struct pt_regs *regs)
3038 {
3039 	struct ring_buffer_event *event;
3040 	unsigned int size, nr_entries;
3041 	struct ftrace_stack *fstack;
3042 	struct stack_entry *entry;
3043 	int stackidx;
3044 
3045 	/*
3046 	 * Add one, for this function and the call to save_stack_trace()
3047 	 * If regs is set, then these functions will not be in the way.
3048 	 */
3049 #ifndef CONFIG_UNWINDER_ORC
3050 	if (!regs)
3051 		skip++;
3052 #endif
3053 
3054 	preempt_disable_notrace();
3055 
3056 	stackidx = __this_cpu_inc_return(ftrace_stack_reserve) - 1;
3057 
3058 	/* This should never happen. If it does, yell once and skip */
3059 	if (WARN_ON_ONCE(stackidx >= FTRACE_KSTACK_NESTING))
3060 		goto out;
3061 
3062 	/*
3063 	 * The above __this_cpu_inc_return() is 'atomic' cpu local. An
3064 	 * interrupt will either see the value pre increment or post
3065 	 * increment. If the interrupt happens pre increment it will have
3066 	 * restored the counter when it returns.  We just need a barrier to
3067 	 * keep gcc from moving things around.
3068 	 */
3069 	barrier();
3070 
3071 	fstack = this_cpu_ptr(ftrace_stacks.stacks) + stackidx;
3072 	size = ARRAY_SIZE(fstack->calls);
3073 
3074 	if (regs) {
3075 		nr_entries = stack_trace_save_regs(regs, fstack->calls,
3076 						   size, skip);
3077 	} else {
3078 		nr_entries = stack_trace_save(fstack->calls, size, skip);
3079 	}
3080 
3081 #ifdef CONFIG_DYNAMIC_FTRACE
3082 	/* Mark entry of stack trace as trampoline code */
3083 	if (tr->ops && tr->ops->trampoline) {
3084 		unsigned long tramp_start = tr->ops->trampoline;
3085 		unsigned long tramp_end = tramp_start + tr->ops->trampoline_size;
3086 		unsigned long *calls = fstack->calls;
3087 
3088 		for (int i = 0; i < nr_entries; i++) {
3089 			if (calls[i] >= tramp_start && calls[i] < tramp_end)
3090 				calls[i] = FTRACE_TRAMPOLINE_MARKER;
3091 		}
3092 	}
3093 #endif
3094 
3095 	event = __trace_buffer_lock_reserve(buffer, TRACE_STACK,
3096 				    struct_size(entry, caller, nr_entries),
3097 				    trace_ctx);
3098 	if (!event)
3099 		goto out;
3100 	entry = ring_buffer_event_data(event);
3101 
3102 	entry->size = nr_entries;
3103 	memcpy(&entry->caller, fstack->calls,
3104 	       flex_array_size(entry, caller, nr_entries));
3105 
3106 	__buffer_unlock_commit(buffer, event);
3107 
3108  out:
3109 	/* Again, don't let gcc optimize things here */
3110 	barrier();
3111 	__this_cpu_dec(ftrace_stack_reserve);
3112 	preempt_enable_notrace();
3113 
3114 }
3115 
3116 static inline void ftrace_trace_stack(struct trace_array *tr,
3117 				      struct trace_buffer *buffer,
3118 				      unsigned int trace_ctx,
3119 				      int skip, struct pt_regs *regs)
3120 {
3121 	if (!(tr->trace_flags & TRACE_ITER_STACKTRACE))
3122 		return;
3123 
3124 	__ftrace_trace_stack(tr, buffer, trace_ctx, skip, regs);
3125 }
3126 
3127 void __trace_stack(struct trace_array *tr, unsigned int trace_ctx,
3128 		   int skip)
3129 {
3130 	struct trace_buffer *buffer = tr->array_buffer.buffer;
3131 
3132 	if (rcu_is_watching()) {
3133 		__ftrace_trace_stack(tr, buffer, trace_ctx, skip, NULL);
3134 		return;
3135 	}
3136 
3137 	if (WARN_ON_ONCE(IS_ENABLED(CONFIG_GENERIC_ENTRY)))
3138 		return;
3139 
3140 	/*
3141 	 * When an NMI triggers, RCU is enabled via ct_nmi_enter(),
3142 	 * but if the above rcu_is_watching() failed, then the NMI
3143 	 * triggered someplace critical, and ct_irq_enter() should
3144 	 * not be called from NMI.
3145 	 */
3146 	if (unlikely(in_nmi()))
3147 		return;
3148 
3149 	ct_irq_enter_irqson();
3150 	__ftrace_trace_stack(tr, buffer, trace_ctx, skip, NULL);
3151 	ct_irq_exit_irqson();
3152 }
3153 
3154 /**
3155  * trace_dump_stack - record a stack back trace in the trace buffer
3156  * @skip: Number of functions to skip (helper handlers)
3157  */
3158 void trace_dump_stack(int skip)
3159 {
3160 	if (tracing_disabled || tracing_selftest_running)
3161 		return;
3162 
3163 #ifndef CONFIG_UNWINDER_ORC
3164 	/* Skip 1 to skip this function. */
3165 	skip++;
3166 #endif
3167 	__ftrace_trace_stack(printk_trace, printk_trace->array_buffer.buffer,
3168 				tracing_gen_ctx(), skip, NULL);
3169 }
3170 EXPORT_SYMBOL_GPL(trace_dump_stack);
3171 
3172 #ifdef CONFIG_USER_STACKTRACE_SUPPORT
3173 static DEFINE_PER_CPU(int, user_stack_count);
3174 
3175 static void
3176 ftrace_trace_userstack(struct trace_array *tr,
3177 		       struct trace_buffer *buffer, unsigned int trace_ctx)
3178 {
3179 	struct ring_buffer_event *event;
3180 	struct userstack_entry *entry;
3181 
3182 	if (!(tr->trace_flags & TRACE_ITER_USERSTACKTRACE))
3183 		return;
3184 
3185 	/*
3186 	 * NMIs can not handle page faults, even with fix ups.
3187 	 * The save user stack can (and often does) fault.
3188 	 */
3189 	if (unlikely(in_nmi()))
3190 		return;
3191 
3192 	/*
3193 	 * prevent recursion, since the user stack tracing may
3194 	 * trigger other kernel events.
3195 	 */
3196 	preempt_disable();
3197 	if (__this_cpu_read(user_stack_count))
3198 		goto out;
3199 
3200 	__this_cpu_inc(user_stack_count);
3201 
3202 	event = __trace_buffer_lock_reserve(buffer, TRACE_USER_STACK,
3203 					    sizeof(*entry), trace_ctx);
3204 	if (!event)
3205 		goto out_drop_count;
3206 	entry	= ring_buffer_event_data(event);
3207 
3208 	entry->tgid		= current->tgid;
3209 	memset(&entry->caller, 0, sizeof(entry->caller));
3210 
3211 	stack_trace_save_user(entry->caller, FTRACE_STACK_ENTRIES);
3212 	__buffer_unlock_commit(buffer, event);
3213 
3214  out_drop_count:
3215 	__this_cpu_dec(user_stack_count);
3216  out:
3217 	preempt_enable();
3218 }
3219 #else /* CONFIG_USER_STACKTRACE_SUPPORT */
3220 static void ftrace_trace_userstack(struct trace_array *tr,
3221 				   struct trace_buffer *buffer,
3222 				   unsigned int trace_ctx)
3223 {
3224 }
3225 #endif /* !CONFIG_USER_STACKTRACE_SUPPORT */
3226 
3227 #endif /* CONFIG_STACKTRACE */
3228 
3229 static inline void
3230 func_repeats_set_delta_ts(struct func_repeats_entry *entry,
3231 			  unsigned long long delta)
3232 {
3233 	entry->bottom_delta_ts = delta & U32_MAX;
3234 	entry->top_delta_ts = (delta >> 32);
3235 }
3236 
3237 void trace_last_func_repeats(struct trace_array *tr,
3238 			     struct trace_func_repeats *last_info,
3239 			     unsigned int trace_ctx)
3240 {
3241 	struct trace_buffer *buffer = tr->array_buffer.buffer;
3242 	struct func_repeats_entry *entry;
3243 	struct ring_buffer_event *event;
3244 	u64 delta;
3245 
3246 	event = __trace_buffer_lock_reserve(buffer, TRACE_FUNC_REPEATS,
3247 					    sizeof(*entry), trace_ctx);
3248 	if (!event)
3249 		return;
3250 
3251 	delta = ring_buffer_event_time_stamp(buffer, event) -
3252 		last_info->ts_last_call;
3253 
3254 	entry = ring_buffer_event_data(event);
3255 	entry->ip = last_info->ip;
3256 	entry->parent_ip = last_info->parent_ip;
3257 	entry->count = last_info->count;
3258 	func_repeats_set_delta_ts(entry, delta);
3259 
3260 	__buffer_unlock_commit(buffer, event);
3261 }
3262 
3263 /* created for use with alloc_percpu */
3264 struct trace_buffer_struct {
3265 	int nesting;
3266 	char buffer[4][TRACE_BUF_SIZE];
3267 };
3268 
3269 static struct trace_buffer_struct __percpu *trace_percpu_buffer;
3270 
3271 /*
3272  * This allows for lockless recording.  If we're nested too deeply, then
3273  * this returns NULL.
3274  */
3275 static char *get_trace_buf(void)
3276 {
3277 	struct trace_buffer_struct *buffer = this_cpu_ptr(trace_percpu_buffer);
3278 
3279 	if (!trace_percpu_buffer || buffer->nesting >= 4)
3280 		return NULL;
3281 
3282 	buffer->nesting++;
3283 
3284 	/* Interrupts must see nesting incremented before we use the buffer */
3285 	barrier();
3286 	return &buffer->buffer[buffer->nesting - 1][0];
3287 }
3288 
3289 static void put_trace_buf(void)
3290 {
3291 	/* Don't let the decrement of nesting leak before this */
3292 	barrier();
3293 	this_cpu_dec(trace_percpu_buffer->nesting);
3294 }
3295 
3296 static int alloc_percpu_trace_buffer(void)
3297 {
3298 	struct trace_buffer_struct __percpu *buffers;
3299 
3300 	if (trace_percpu_buffer)
3301 		return 0;
3302 
3303 	buffers = alloc_percpu(struct trace_buffer_struct);
3304 	if (MEM_FAIL(!buffers, "Could not allocate percpu trace_printk buffer"))
3305 		return -ENOMEM;
3306 
3307 	trace_percpu_buffer = buffers;
3308 	return 0;
3309 }
3310 
3311 static int buffers_allocated;
3312 
3313 void trace_printk_init_buffers(void)
3314 {
3315 	if (buffers_allocated)
3316 		return;
3317 
3318 	if (alloc_percpu_trace_buffer())
3319 		return;
3320 
3321 	/* trace_printk() is for debug use only. Don't use it in production. */
3322 
3323 	pr_warn("\n");
3324 	pr_warn("**********************************************************\n");
3325 	pr_warn("**   NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE   **\n");
3326 	pr_warn("**                                                      **\n");
3327 	pr_warn("** trace_printk() being used. Allocating extra memory.  **\n");
3328 	pr_warn("**                                                      **\n");
3329 	pr_warn("** This means that this is a DEBUG kernel and it is     **\n");
3330 	pr_warn("** unsafe for production use.                           **\n");
3331 	pr_warn("**                                                      **\n");
3332 	pr_warn("** If you see this message and you are not debugging    **\n");
3333 	pr_warn("** the kernel, report this immediately to your vendor!  **\n");
3334 	pr_warn("**                                                      **\n");
3335 	pr_warn("**   NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE   **\n");
3336 	pr_warn("**********************************************************\n");
3337 
3338 	/* Expand the buffers to set size */
3339 	tracing_update_buffers(&global_trace);
3340 
3341 	buffers_allocated = 1;
3342 
3343 	/*
3344 	 * trace_printk_init_buffers() can be called by modules.
3345 	 * If that happens, then we need to start cmdline recording
3346 	 * directly here. If the global_trace.buffer is already
3347 	 * allocated here, then this was called by module code.
3348 	 */
3349 	if (global_trace.array_buffer.buffer)
3350 		tracing_start_cmdline_record();
3351 }
3352 EXPORT_SYMBOL_GPL(trace_printk_init_buffers);
3353 
3354 void trace_printk_start_comm(void)
3355 {
3356 	/* Start tracing comms if trace printk is set */
3357 	if (!buffers_allocated)
3358 		return;
3359 	tracing_start_cmdline_record();
3360 }
3361 
3362 static void trace_printk_start_stop_comm(int enabled)
3363 {
3364 	if (!buffers_allocated)
3365 		return;
3366 
3367 	if (enabled)
3368 		tracing_start_cmdline_record();
3369 	else
3370 		tracing_stop_cmdline_record();
3371 }
3372 
3373 /**
3374  * trace_vbprintk - write binary msg to tracing buffer
3375  * @ip:    The address of the caller
3376  * @fmt:   The string format to write to the buffer
3377  * @args:  Arguments for @fmt
3378  */
3379 int trace_vbprintk(unsigned long ip, const char *fmt, va_list args)
3380 {
3381 	struct ring_buffer_event *event;
3382 	struct trace_buffer *buffer;
3383 	struct trace_array *tr = READ_ONCE(printk_trace);
3384 	struct bprint_entry *entry;
3385 	unsigned int trace_ctx;
3386 	char *tbuffer;
3387 	int len = 0, size;
3388 
3389 	if (!printk_binsafe(tr))
3390 		return trace_vprintk(ip, fmt, args);
3391 
3392 	if (unlikely(tracing_selftest_running || tracing_disabled))
3393 		return 0;
3394 
3395 	/* Don't pollute graph traces with trace_vprintk internals */
3396 	pause_graph_tracing();
3397 
3398 	trace_ctx = tracing_gen_ctx();
3399 	preempt_disable_notrace();
3400 
3401 	tbuffer = get_trace_buf();
3402 	if (!tbuffer) {
3403 		len = 0;
3404 		goto out_nobuffer;
3405 	}
3406 
3407 	len = vbin_printf((u32 *)tbuffer, TRACE_BUF_SIZE/sizeof(int), fmt, args);
3408 
3409 	if (len > TRACE_BUF_SIZE/sizeof(int) || len < 0)
3410 		goto out_put;
3411 
3412 	size = sizeof(*entry) + sizeof(u32) * len;
3413 	buffer = tr->array_buffer.buffer;
3414 	ring_buffer_nest_start(buffer);
3415 	event = __trace_buffer_lock_reserve(buffer, TRACE_BPRINT, size,
3416 					    trace_ctx);
3417 	if (!event)
3418 		goto out;
3419 	entry = ring_buffer_event_data(event);
3420 	entry->ip			= ip;
3421 	entry->fmt			= fmt;
3422 
3423 	memcpy(entry->buf, tbuffer, sizeof(u32) * len);
3424 	__buffer_unlock_commit(buffer, event);
3425 	ftrace_trace_stack(tr, buffer, trace_ctx, 6, NULL);
3426 
3427 out:
3428 	ring_buffer_nest_end(buffer);
3429 out_put:
3430 	put_trace_buf();
3431 
3432 out_nobuffer:
3433 	preempt_enable_notrace();
3434 	unpause_graph_tracing();
3435 
3436 	return len;
3437 }
3438 EXPORT_SYMBOL_GPL(trace_vbprintk);
3439 
3440 static __printf(3, 0)
3441 int __trace_array_vprintk(struct trace_buffer *buffer,
3442 			  unsigned long ip, const char *fmt, va_list args)
3443 {
3444 	struct ring_buffer_event *event;
3445 	int len = 0, size;
3446 	struct print_entry *entry;
3447 	unsigned int trace_ctx;
3448 	char *tbuffer;
3449 
3450 	if (tracing_disabled)
3451 		return 0;
3452 
3453 	/* Don't pollute graph traces with trace_vprintk internals */
3454 	pause_graph_tracing();
3455 
3456 	trace_ctx = tracing_gen_ctx();
3457 	preempt_disable_notrace();
3458 
3459 
3460 	tbuffer = get_trace_buf();
3461 	if (!tbuffer) {
3462 		len = 0;
3463 		goto out_nobuffer;
3464 	}
3465 
3466 	len = vscnprintf(tbuffer, TRACE_BUF_SIZE, fmt, args);
3467 
3468 	size = sizeof(*entry) + len + 1;
3469 	ring_buffer_nest_start(buffer);
3470 	event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
3471 					    trace_ctx);
3472 	if (!event)
3473 		goto out;
3474 	entry = ring_buffer_event_data(event);
3475 	entry->ip = ip;
3476 
3477 	memcpy(&entry->buf, tbuffer, len + 1);
3478 	__buffer_unlock_commit(buffer, event);
3479 	ftrace_trace_stack(printk_trace, buffer, trace_ctx, 6, NULL);
3480 
3481 out:
3482 	ring_buffer_nest_end(buffer);
3483 	put_trace_buf();
3484 
3485 out_nobuffer:
3486 	preempt_enable_notrace();
3487 	unpause_graph_tracing();
3488 
3489 	return len;
3490 }
3491 
3492 int trace_array_vprintk(struct trace_array *tr,
3493 			unsigned long ip, const char *fmt, va_list args)
3494 {
3495 	if (tracing_selftest_running && tr == &global_trace)
3496 		return 0;
3497 
3498 	return __trace_array_vprintk(tr->array_buffer.buffer, ip, fmt, args);
3499 }
3500 
3501 /**
3502  * trace_array_printk - Print a message to a specific instance
3503  * @tr: The instance trace_array descriptor
3504  * @ip: The instruction pointer that this is called from.
3505  * @fmt: The format to print (printf format)
3506  *
3507  * If a subsystem sets up its own instance, they have the right to
3508  * printk strings into their tracing instance buffer using this
3509  * function. Note, this function will not write into the top level
3510  * buffer (use trace_printk() for that), as writing into the top level
3511  * buffer should only have events that can be individually disabled.
3512  * trace_printk() is only used for debugging a kernel, and should not
3513  * be ever incorporated in normal use.
3514  *
3515  * trace_array_printk() can be used, as it will not add noise to the
3516  * top level tracing buffer.
3517  *
3518  * Note, trace_array_init_printk() must be called on @tr before this
3519  * can be used.
3520  */
3521 int trace_array_printk(struct trace_array *tr,
3522 		       unsigned long ip, const char *fmt, ...)
3523 {
3524 	int ret;
3525 	va_list ap;
3526 
3527 	if (!tr)
3528 		return -ENOENT;
3529 
3530 	/* This is only allowed for created instances */
3531 	if (tr == &global_trace)
3532 		return 0;
3533 
3534 	if (!(tr->trace_flags & TRACE_ITER_PRINTK))
3535 		return 0;
3536 
3537 	va_start(ap, fmt);
3538 	ret = trace_array_vprintk(tr, ip, fmt, ap);
3539 	va_end(ap);
3540 	return ret;
3541 }
3542 EXPORT_SYMBOL_GPL(trace_array_printk);
3543 
3544 /**
3545  * trace_array_init_printk - Initialize buffers for trace_array_printk()
3546  * @tr: The trace array to initialize the buffers for
3547  *
3548  * As trace_array_printk() only writes into instances, they are OK to
3549  * have in the kernel (unlike trace_printk()). This needs to be called
3550  * before trace_array_printk() can be used on a trace_array.
3551  */
3552 int trace_array_init_printk(struct trace_array *tr)
3553 {
3554 	if (!tr)
3555 		return -ENOENT;
3556 
3557 	/* This is only allowed for created instances */
3558 	if (tr == &global_trace)
3559 		return -EINVAL;
3560 
3561 	return alloc_percpu_trace_buffer();
3562 }
3563 EXPORT_SYMBOL_GPL(trace_array_init_printk);
3564 
3565 int trace_array_printk_buf(struct trace_buffer *buffer,
3566 			   unsigned long ip, const char *fmt, ...)
3567 {
3568 	int ret;
3569 	va_list ap;
3570 
3571 	if (!(printk_trace->trace_flags & TRACE_ITER_PRINTK))
3572 		return 0;
3573 
3574 	va_start(ap, fmt);
3575 	ret = __trace_array_vprintk(buffer, ip, fmt, ap);
3576 	va_end(ap);
3577 	return ret;
3578 }
3579 
3580 int trace_vprintk(unsigned long ip, const char *fmt, va_list args)
3581 {
3582 	return trace_array_vprintk(printk_trace, ip, fmt, args);
3583 }
3584 EXPORT_SYMBOL_GPL(trace_vprintk);
3585 
3586 static void trace_iterator_increment(struct trace_iterator *iter)
3587 {
3588 	struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, iter->cpu);
3589 
3590 	iter->idx++;
3591 	if (buf_iter)
3592 		ring_buffer_iter_advance(buf_iter);
3593 }
3594 
3595 static struct trace_entry *
3596 peek_next_entry(struct trace_iterator *iter, int cpu, u64 *ts,
3597 		unsigned long *lost_events)
3598 {
3599 	struct ring_buffer_event *event;
3600 	struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, cpu);
3601 
3602 	if (buf_iter) {
3603 		event = ring_buffer_iter_peek(buf_iter, ts);
3604 		if (lost_events)
3605 			*lost_events = ring_buffer_iter_dropped(buf_iter) ?
3606 				(unsigned long)-1 : 0;
3607 	} else {
3608 		event = ring_buffer_peek(iter->array_buffer->buffer, cpu, ts,
3609 					 lost_events);
3610 	}
3611 
3612 	if (event) {
3613 		iter->ent_size = ring_buffer_event_length(event);
3614 		return ring_buffer_event_data(event);
3615 	}
3616 	iter->ent_size = 0;
3617 	return NULL;
3618 }
3619 
3620 static struct trace_entry *
3621 __find_next_entry(struct trace_iterator *iter, int *ent_cpu,
3622 		  unsigned long *missing_events, u64 *ent_ts)
3623 {
3624 	struct trace_buffer *buffer = iter->array_buffer->buffer;
3625 	struct trace_entry *ent, *next = NULL;
3626 	unsigned long lost_events = 0, next_lost = 0;
3627 	int cpu_file = iter->cpu_file;
3628 	u64 next_ts = 0, ts;
3629 	int next_cpu = -1;
3630 	int next_size = 0;
3631 	int cpu;
3632 
3633 	/*
3634 	 * If we are in a per_cpu trace file, don't bother by iterating over
3635 	 * all cpu and peek directly.
3636 	 */
3637 	if (cpu_file > RING_BUFFER_ALL_CPUS) {
3638 		if (ring_buffer_empty_cpu(buffer, cpu_file))
3639 			return NULL;
3640 		ent = peek_next_entry(iter, cpu_file, ent_ts, missing_events);
3641 		if (ent_cpu)
3642 			*ent_cpu = cpu_file;
3643 
3644 		return ent;
3645 	}
3646 
3647 	for_each_tracing_cpu(cpu) {
3648 
3649 		if (ring_buffer_empty_cpu(buffer, cpu))
3650 			continue;
3651 
3652 		ent = peek_next_entry(iter, cpu, &ts, &lost_events);
3653 
3654 		/*
3655 		 * Pick the entry with the smallest timestamp:
3656 		 */
3657 		if (ent && (!next || ts < next_ts)) {
3658 			next = ent;
3659 			next_cpu = cpu;
3660 			next_ts = ts;
3661 			next_lost = lost_events;
3662 			next_size = iter->ent_size;
3663 		}
3664 	}
3665 
3666 	iter->ent_size = next_size;
3667 
3668 	if (ent_cpu)
3669 		*ent_cpu = next_cpu;
3670 
3671 	if (ent_ts)
3672 		*ent_ts = next_ts;
3673 
3674 	if (missing_events)
3675 		*missing_events = next_lost;
3676 
3677 	return next;
3678 }
3679 
3680 #define STATIC_FMT_BUF_SIZE	128
3681 static char static_fmt_buf[STATIC_FMT_BUF_SIZE];
3682 
3683 char *trace_iter_expand_format(struct trace_iterator *iter)
3684 {
3685 	char *tmp;
3686 
3687 	/*
3688 	 * iter->tr is NULL when used with tp_printk, which makes
3689 	 * this get called where it is not safe to call krealloc().
3690 	 */
3691 	if (!iter->tr || iter->fmt == static_fmt_buf)
3692 		return NULL;
3693 
3694 	tmp = krealloc(iter->fmt, iter->fmt_size + STATIC_FMT_BUF_SIZE,
3695 		       GFP_KERNEL);
3696 	if (tmp) {
3697 		iter->fmt_size += STATIC_FMT_BUF_SIZE;
3698 		iter->fmt = tmp;
3699 	}
3700 
3701 	return tmp;
3702 }
3703 
3704 /* Returns true if the string is safe to dereference from an event */
3705 static bool trace_safe_str(struct trace_iterator *iter, const char *str)
3706 {
3707 	unsigned long addr = (unsigned long)str;
3708 	struct trace_event *trace_event;
3709 	struct trace_event_call *event;
3710 
3711 	/* OK if part of the event data */
3712 	if ((addr >= (unsigned long)iter->ent) &&
3713 	    (addr < (unsigned long)iter->ent + iter->ent_size))
3714 		return true;
3715 
3716 	/* OK if part of the temp seq buffer */
3717 	if ((addr >= (unsigned long)iter->tmp_seq.buffer) &&
3718 	    (addr < (unsigned long)iter->tmp_seq.buffer + TRACE_SEQ_BUFFER_SIZE))
3719 		return true;
3720 
3721 	/* Core rodata can not be freed */
3722 	if (is_kernel_rodata(addr))
3723 		return true;
3724 
3725 	if (trace_is_tracepoint_string(str))
3726 		return true;
3727 
3728 	/*
3729 	 * Now this could be a module event, referencing core module
3730 	 * data, which is OK.
3731 	 */
3732 	if (!iter->ent)
3733 		return false;
3734 
3735 	trace_event = ftrace_find_event(iter->ent->type);
3736 	if (!trace_event)
3737 		return false;
3738 
3739 	event = container_of(trace_event, struct trace_event_call, event);
3740 	if ((event->flags & TRACE_EVENT_FL_DYNAMIC) || !event->module)
3741 		return false;
3742 
3743 	/* Would rather have rodata, but this will suffice */
3744 	if (within_module_core(addr, event->module))
3745 		return true;
3746 
3747 	return false;
3748 }
3749 
3750 /**
3751  * ignore_event - Check dereferenced fields while writing to the seq buffer
3752  * @iter: The iterator that holds the seq buffer and the event being printed
3753  *
3754  * At boot up, test_event_printk() will flag any event that dereferences
3755  * a string with "%s" that does exist in the ring buffer. It may still
3756  * be valid, as the string may point to a static string in the kernel
3757  * rodata that never gets freed. But if the string pointer is pointing
3758  * to something that was allocated, there's a chance that it can be freed
3759  * by the time the user reads the trace. This would cause a bad memory
3760  * access by the kernel and possibly crash the system.
3761  *
3762  * This function will check if the event has any fields flagged as needing
3763  * to be checked at runtime and perform those checks.
3764  *
3765  * If it is found that a field is unsafe, it will write into the @iter->seq
3766  * a message stating what was found to be unsafe.
3767  *
3768  * @return: true if the event is unsafe and should be ignored,
3769  *          false otherwise.
3770  */
3771 bool ignore_event(struct trace_iterator *iter)
3772 {
3773 	struct ftrace_event_field *field;
3774 	struct trace_event *trace_event;
3775 	struct trace_event_call *event;
3776 	struct list_head *head;
3777 	struct trace_seq *seq;
3778 	const void *ptr;
3779 
3780 	trace_event = ftrace_find_event(iter->ent->type);
3781 
3782 	seq = &iter->seq;
3783 
3784 	if (!trace_event) {
3785 		trace_seq_printf(seq, "EVENT ID %d NOT FOUND?\n", iter->ent->type);
3786 		return true;
3787 	}
3788 
3789 	event = container_of(trace_event, struct trace_event_call, event);
3790 	if (!(event->flags & TRACE_EVENT_FL_TEST_STR))
3791 		return false;
3792 
3793 	head = trace_get_fields(event);
3794 	if (!head) {
3795 		trace_seq_printf(seq, "FIELDS FOR EVENT '%s' NOT FOUND?\n",
3796 				 trace_event_name(event));
3797 		return true;
3798 	}
3799 
3800 	/* Offsets are from the iter->ent that points to the raw event */
3801 	ptr = iter->ent;
3802 
3803 	list_for_each_entry(field, head, link) {
3804 		const char *str;
3805 		bool good;
3806 
3807 		if (!field->needs_test)
3808 			continue;
3809 
3810 		str = *(const char **)(ptr + field->offset);
3811 
3812 		good = trace_safe_str(iter, str);
3813 
3814 		/*
3815 		 * If you hit this warning, it is likely that the
3816 		 * trace event in question used %s on a string that
3817 		 * was saved at the time of the event, but may not be
3818 		 * around when the trace is read. Use __string(),
3819 		 * __assign_str() and __get_str() helpers in the TRACE_EVENT()
3820 		 * instead. See samples/trace_events/trace-events-sample.h
3821 		 * for reference.
3822 		 */
3823 		if (WARN_ONCE(!good, "event '%s' has unsafe pointer field '%s'",
3824 			      trace_event_name(event), field->name)) {
3825 			trace_seq_printf(seq, "EVENT %s: HAS UNSAFE POINTER FIELD '%s'\n",
3826 					 trace_event_name(event), field->name);
3827 			return true;
3828 		}
3829 	}
3830 	return false;
3831 }
3832 
3833 const char *trace_event_format(struct trace_iterator *iter, const char *fmt)
3834 {
3835 	const char *p, *new_fmt;
3836 	char *q;
3837 
3838 	if (WARN_ON_ONCE(!fmt))
3839 		return fmt;
3840 
3841 	if (!iter->tr || iter->tr->trace_flags & TRACE_ITER_HASH_PTR)
3842 		return fmt;
3843 
3844 	p = fmt;
3845 	new_fmt = q = iter->fmt;
3846 	while (*p) {
3847 		if (unlikely(q - new_fmt + 3 > iter->fmt_size)) {
3848 			if (!trace_iter_expand_format(iter))
3849 				return fmt;
3850 
3851 			q += iter->fmt - new_fmt;
3852 			new_fmt = iter->fmt;
3853 		}
3854 
3855 		*q++ = *p++;
3856 
3857 		/* Replace %p with %px */
3858 		if (p[-1] == '%') {
3859 			if (p[0] == '%') {
3860 				*q++ = *p++;
3861 			} else if (p[0] == 'p' && !isalnum(p[1])) {
3862 				*q++ = *p++;
3863 				*q++ = 'x';
3864 			}
3865 		}
3866 	}
3867 	*q = '\0';
3868 
3869 	return new_fmt;
3870 }
3871 
3872 #define STATIC_TEMP_BUF_SIZE	128
3873 static char static_temp_buf[STATIC_TEMP_BUF_SIZE] __aligned(4);
3874 
3875 /* Find the next real entry, without updating the iterator itself */
3876 struct trace_entry *trace_find_next_entry(struct trace_iterator *iter,
3877 					  int *ent_cpu, u64 *ent_ts)
3878 {
3879 	/* __find_next_entry will reset ent_size */
3880 	int ent_size = iter->ent_size;
3881 	struct trace_entry *entry;
3882 
3883 	/*
3884 	 * If called from ftrace_dump(), then the iter->temp buffer
3885 	 * will be the static_temp_buf and not created from kmalloc.
3886 	 * If the entry size is greater than the buffer, we can
3887 	 * not save it. Just return NULL in that case. This is only
3888 	 * used to add markers when two consecutive events' time
3889 	 * stamps have a large delta. See trace_print_lat_context()
3890 	 */
3891 	if (iter->temp == static_temp_buf &&
3892 	    STATIC_TEMP_BUF_SIZE < ent_size)
3893 		return NULL;
3894 
3895 	/*
3896 	 * The __find_next_entry() may call peek_next_entry(), which may
3897 	 * call ring_buffer_peek() that may make the contents of iter->ent
3898 	 * undefined. Need to copy iter->ent now.
3899 	 */
3900 	if (iter->ent && iter->ent != iter->temp) {
3901 		if ((!iter->temp || iter->temp_size < iter->ent_size) &&
3902 		    !WARN_ON_ONCE(iter->temp == static_temp_buf)) {
3903 			void *temp;
3904 			temp = kmalloc(iter->ent_size, GFP_KERNEL);
3905 			if (!temp)
3906 				return NULL;
3907 			kfree(iter->temp);
3908 			iter->temp = temp;
3909 			iter->temp_size = iter->ent_size;
3910 		}
3911 		memcpy(iter->temp, iter->ent, iter->ent_size);
3912 		iter->ent = iter->temp;
3913 	}
3914 	entry = __find_next_entry(iter, ent_cpu, NULL, ent_ts);
3915 	/* Put back the original ent_size */
3916 	iter->ent_size = ent_size;
3917 
3918 	return entry;
3919 }
3920 
3921 /* Find the next real entry, and increment the iterator to the next entry */
3922 void *trace_find_next_entry_inc(struct trace_iterator *iter)
3923 {
3924 	iter->ent = __find_next_entry(iter, &iter->cpu,
3925 				      &iter->lost_events, &iter->ts);
3926 
3927 	if (iter->ent)
3928 		trace_iterator_increment(iter);
3929 
3930 	return iter->ent ? iter : NULL;
3931 }
3932 
3933 static void trace_consume(struct trace_iterator *iter)
3934 {
3935 	ring_buffer_consume(iter->array_buffer->buffer, iter->cpu, &iter->ts,
3936 			    &iter->lost_events);
3937 }
3938 
3939 static void *s_next(struct seq_file *m, void *v, loff_t *pos)
3940 {
3941 	struct trace_iterator *iter = m->private;
3942 	int i = (int)*pos;
3943 	void *ent;
3944 
3945 	WARN_ON_ONCE(iter->leftover);
3946 
3947 	(*pos)++;
3948 
3949 	/* can't go backwards */
3950 	if (iter->idx > i)
3951 		return NULL;
3952 
3953 	if (iter->idx < 0)
3954 		ent = trace_find_next_entry_inc(iter);
3955 	else
3956 		ent = iter;
3957 
3958 	while (ent && iter->idx < i)
3959 		ent = trace_find_next_entry_inc(iter);
3960 
3961 	iter->pos = *pos;
3962 
3963 	return ent;
3964 }
3965 
3966 void tracing_iter_reset(struct trace_iterator *iter, int cpu)
3967 {
3968 	struct ring_buffer_iter *buf_iter;
3969 	unsigned long entries = 0;
3970 	u64 ts;
3971 
3972 	per_cpu_ptr(iter->array_buffer->data, cpu)->skipped_entries = 0;
3973 
3974 	buf_iter = trace_buffer_iter(iter, cpu);
3975 	if (!buf_iter)
3976 		return;
3977 
3978 	ring_buffer_iter_reset(buf_iter);
3979 
3980 	/*
3981 	 * We could have the case with the max latency tracers
3982 	 * that a reset never took place on a cpu. This is evident
3983 	 * by the timestamp being before the start of the buffer.
3984 	 */
3985 	while (ring_buffer_iter_peek(buf_iter, &ts)) {
3986 		if (ts >= iter->array_buffer->time_start)
3987 			break;
3988 		entries++;
3989 		ring_buffer_iter_advance(buf_iter);
3990 		/* This could be a big loop */
3991 		cond_resched();
3992 	}
3993 
3994 	per_cpu_ptr(iter->array_buffer->data, cpu)->skipped_entries = entries;
3995 }
3996 
3997 /*
3998  * The current tracer is copied to avoid a global locking
3999  * all around.
4000  */
4001 static void *s_start(struct seq_file *m, loff_t *pos)
4002 {
4003 	struct trace_iterator *iter = m->private;
4004 	struct trace_array *tr = iter->tr;
4005 	int cpu_file = iter->cpu_file;
4006 	void *p = NULL;
4007 	loff_t l = 0;
4008 	int cpu;
4009 
4010 	mutex_lock(&trace_types_lock);
4011 	if (unlikely(tr->current_trace != iter->trace)) {
4012 		/* Close iter->trace before switching to the new current tracer */
4013 		if (iter->trace->close)
4014 			iter->trace->close(iter);
4015 		iter->trace = tr->current_trace;
4016 		/* Reopen the new current tracer */
4017 		if (iter->trace->open)
4018 			iter->trace->open(iter);
4019 	}
4020 	mutex_unlock(&trace_types_lock);
4021 
4022 #ifdef CONFIG_TRACER_MAX_TRACE
4023 	if (iter->snapshot && iter->trace->use_max_tr)
4024 		return ERR_PTR(-EBUSY);
4025 #endif
4026 
4027 	if (*pos != iter->pos) {
4028 		iter->ent = NULL;
4029 		iter->cpu = 0;
4030 		iter->idx = -1;
4031 
4032 		if (cpu_file == RING_BUFFER_ALL_CPUS) {
4033 			for_each_tracing_cpu(cpu)
4034 				tracing_iter_reset(iter, cpu);
4035 		} else
4036 			tracing_iter_reset(iter, cpu_file);
4037 
4038 		iter->leftover = 0;
4039 		for (p = iter; p && l < *pos; p = s_next(m, p, &l))
4040 			;
4041 
4042 	} else {
4043 		/*
4044 		 * If we overflowed the seq_file before, then we want
4045 		 * to just reuse the trace_seq buffer again.
4046 		 */
4047 		if (iter->leftover)
4048 			p = iter;
4049 		else {
4050 			l = *pos - 1;
4051 			p = s_next(m, p, &l);
4052 		}
4053 	}
4054 
4055 	trace_event_read_lock();
4056 	trace_access_lock(cpu_file);
4057 	return p;
4058 }
4059 
4060 static void s_stop(struct seq_file *m, void *p)
4061 {
4062 	struct trace_iterator *iter = m->private;
4063 
4064 #ifdef CONFIG_TRACER_MAX_TRACE
4065 	if (iter->snapshot && iter->trace->use_max_tr)
4066 		return;
4067 #endif
4068 
4069 	trace_access_unlock(iter->cpu_file);
4070 	trace_event_read_unlock();
4071 }
4072 
4073 static void
4074 get_total_entries_cpu(struct array_buffer *buf, unsigned long *total,
4075 		      unsigned long *entries, int cpu)
4076 {
4077 	unsigned long count;
4078 
4079 	count = ring_buffer_entries_cpu(buf->buffer, cpu);
4080 	/*
4081 	 * If this buffer has skipped entries, then we hold all
4082 	 * entries for the trace and we need to ignore the
4083 	 * ones before the time stamp.
4084 	 */
4085 	if (per_cpu_ptr(buf->data, cpu)->skipped_entries) {
4086 		count -= per_cpu_ptr(buf->data, cpu)->skipped_entries;
4087 		/* total is the same as the entries */
4088 		*total = count;
4089 	} else
4090 		*total = count +
4091 			ring_buffer_overrun_cpu(buf->buffer, cpu);
4092 	*entries = count;
4093 }
4094 
4095 static void
4096 get_total_entries(struct array_buffer *buf,
4097 		  unsigned long *total, unsigned long *entries)
4098 {
4099 	unsigned long t, e;
4100 	int cpu;
4101 
4102 	*total = 0;
4103 	*entries = 0;
4104 
4105 	for_each_tracing_cpu(cpu) {
4106 		get_total_entries_cpu(buf, &t, &e, cpu);
4107 		*total += t;
4108 		*entries += e;
4109 	}
4110 }
4111 
4112 unsigned long trace_total_entries_cpu(struct trace_array *tr, int cpu)
4113 {
4114 	unsigned long total, entries;
4115 
4116 	if (!tr)
4117 		tr = &global_trace;
4118 
4119 	get_total_entries_cpu(&tr->array_buffer, &total, &entries, cpu);
4120 
4121 	return entries;
4122 }
4123 
4124 unsigned long trace_total_entries(struct trace_array *tr)
4125 {
4126 	unsigned long total, entries;
4127 
4128 	if (!tr)
4129 		tr = &global_trace;
4130 
4131 	get_total_entries(&tr->array_buffer, &total, &entries);
4132 
4133 	return entries;
4134 }
4135 
4136 static void print_lat_help_header(struct seq_file *m)
4137 {
4138 	seq_puts(m, "#                    _------=> CPU#            \n"
4139 		    "#                   / _-----=> irqs-off/BH-disabled\n"
4140 		    "#                  | / _----=> need-resched    \n"
4141 		    "#                  || / _---=> hardirq/softirq \n"
4142 		    "#                  ||| / _--=> preempt-depth   \n"
4143 		    "#                  |||| / _-=> migrate-disable \n"
4144 		    "#                  ||||| /     delay           \n"
4145 		    "#  cmd     pid     |||||| time  |   caller     \n"
4146 		    "#     \\   /        ||||||  \\    |    /       \n");
4147 }
4148 
4149 static void print_event_info(struct array_buffer *buf, struct seq_file *m)
4150 {
4151 	unsigned long total;
4152 	unsigned long entries;
4153 
4154 	get_total_entries(buf, &total, &entries);
4155 	seq_printf(m, "# entries-in-buffer/entries-written: %lu/%lu   #P:%d\n",
4156 		   entries, total, num_online_cpus());
4157 	seq_puts(m, "#\n");
4158 }
4159 
4160 static void print_func_help_header(struct array_buffer *buf, struct seq_file *m,
4161 				   unsigned int flags)
4162 {
4163 	bool tgid = flags & TRACE_ITER_RECORD_TGID;
4164 
4165 	print_event_info(buf, m);
4166 
4167 	seq_printf(m, "#           TASK-PID    %s CPU#     TIMESTAMP  FUNCTION\n", tgid ? "   TGID   " : "");
4168 	seq_printf(m, "#              | |      %s   |         |         |\n",      tgid ? "     |    " : "");
4169 }
4170 
4171 static void print_func_help_header_irq(struct array_buffer *buf, struct seq_file *m,
4172 				       unsigned int flags)
4173 {
4174 	bool tgid = flags & TRACE_ITER_RECORD_TGID;
4175 	static const char space[] = "            ";
4176 	int prec = tgid ? 12 : 2;
4177 
4178 	print_event_info(buf, m);
4179 
4180 	seq_printf(m, "#                            %.*s  _-----=> irqs-off/BH-disabled\n", prec, space);
4181 	seq_printf(m, "#                            %.*s / _----=> need-resched\n", prec, space);
4182 	seq_printf(m, "#                            %.*s| / _---=> hardirq/softirq\n", prec, space);
4183 	seq_printf(m, "#                            %.*s|| / _--=> preempt-depth\n", prec, space);
4184 	seq_printf(m, "#                            %.*s||| / _-=> migrate-disable\n", prec, space);
4185 	seq_printf(m, "#                            %.*s|||| /     delay\n", prec, space);
4186 	seq_printf(m, "#           TASK-PID  %.*s CPU#  |||||  TIMESTAMP  FUNCTION\n", prec, "     TGID   ");
4187 	seq_printf(m, "#              | |    %.*s   |   |||||     |         |\n", prec, "       |    ");
4188 }
4189 
4190 void
4191 print_trace_header(struct seq_file *m, struct trace_iterator *iter)
4192 {
4193 	unsigned long sym_flags = (global_trace.trace_flags & TRACE_ITER_SYM_MASK);
4194 	struct array_buffer *buf = iter->array_buffer;
4195 	struct trace_array_cpu *data = per_cpu_ptr(buf->data, buf->cpu);
4196 	struct tracer *type = iter->trace;
4197 	unsigned long entries;
4198 	unsigned long total;
4199 	const char *name = type->name;
4200 
4201 	get_total_entries(buf, &total, &entries);
4202 
4203 	seq_printf(m, "# %s latency trace v1.1.5 on %s\n",
4204 		   name, init_utsname()->release);
4205 	seq_puts(m, "# -----------------------------------"
4206 		 "---------------------------------\n");
4207 	seq_printf(m, "# latency: %lu us, #%lu/%lu, CPU#%d |"
4208 		   " (M:%s VP:%d, KP:%d, SP:%d HP:%d",
4209 		   nsecs_to_usecs(data->saved_latency),
4210 		   entries,
4211 		   total,
4212 		   buf->cpu,
4213 		   preempt_model_str(),
4214 		   /* These are reserved for later use */
4215 		   0, 0, 0, 0);
4216 #ifdef CONFIG_SMP
4217 	seq_printf(m, " #P:%d)\n", num_online_cpus());
4218 #else
4219 	seq_puts(m, ")\n");
4220 #endif
4221 	seq_puts(m, "#    -----------------\n");
4222 	seq_printf(m, "#    | task: %.16s-%d "
4223 		   "(uid:%d nice:%ld policy:%ld rt_prio:%ld)\n",
4224 		   data->comm, data->pid,
4225 		   from_kuid_munged(seq_user_ns(m), data->uid), data->nice,
4226 		   data->policy, data->rt_priority);
4227 	seq_puts(m, "#    -----------------\n");
4228 
4229 	if (data->critical_start) {
4230 		seq_puts(m, "#  => started at: ");
4231 		seq_print_ip_sym(&iter->seq, data->critical_start, sym_flags);
4232 		trace_print_seq(m, &iter->seq);
4233 		seq_puts(m, "\n#  => ended at:   ");
4234 		seq_print_ip_sym(&iter->seq, data->critical_end, sym_flags);
4235 		trace_print_seq(m, &iter->seq);
4236 		seq_puts(m, "\n#\n");
4237 	}
4238 
4239 	seq_puts(m, "#\n");
4240 }
4241 
4242 static void test_cpu_buff_start(struct trace_iterator *iter)
4243 {
4244 	struct trace_seq *s = &iter->seq;
4245 	struct trace_array *tr = iter->tr;
4246 
4247 	if (!(tr->trace_flags & TRACE_ITER_ANNOTATE))
4248 		return;
4249 
4250 	if (!(iter->iter_flags & TRACE_FILE_ANNOTATE))
4251 		return;
4252 
4253 	if (cpumask_available(iter->started) &&
4254 	    cpumask_test_cpu(iter->cpu, iter->started))
4255 		return;
4256 
4257 	if (per_cpu_ptr(iter->array_buffer->data, iter->cpu)->skipped_entries)
4258 		return;
4259 
4260 	if (cpumask_available(iter->started))
4261 		cpumask_set_cpu(iter->cpu, iter->started);
4262 
4263 	/* Don't print started cpu buffer for the first entry of the trace */
4264 	if (iter->idx > 1)
4265 		trace_seq_printf(s, "##### CPU %u buffer started ####\n",
4266 				iter->cpu);
4267 }
4268 
4269 static enum print_line_t print_trace_fmt(struct trace_iterator *iter)
4270 {
4271 	struct trace_array *tr = iter->tr;
4272 	struct trace_seq *s = &iter->seq;
4273 	unsigned long sym_flags = (tr->trace_flags & TRACE_ITER_SYM_MASK);
4274 	struct trace_entry *entry;
4275 	struct trace_event *event;
4276 
4277 	entry = iter->ent;
4278 
4279 	test_cpu_buff_start(iter);
4280 
4281 	event = ftrace_find_event(entry->type);
4282 
4283 	if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
4284 		if (iter->iter_flags & TRACE_FILE_LAT_FMT)
4285 			trace_print_lat_context(iter);
4286 		else
4287 			trace_print_context(iter);
4288 	}
4289 
4290 	if (trace_seq_has_overflowed(s))
4291 		return TRACE_TYPE_PARTIAL_LINE;
4292 
4293 	if (event) {
4294 		if (tr->trace_flags & TRACE_ITER_FIELDS)
4295 			return print_event_fields(iter, event);
4296 		/*
4297 		 * For TRACE_EVENT() events, the print_fmt is not
4298 		 * safe to use if the array has delta offsets
4299 		 * Force printing via the fields.
4300 		 */
4301 		if ((tr->text_delta) &&
4302 		    event->type > __TRACE_LAST_TYPE)
4303 			return print_event_fields(iter, event);
4304 
4305 		return event->funcs->trace(iter, sym_flags, event);
4306 	}
4307 
4308 	trace_seq_printf(s, "Unknown type %d\n", entry->type);
4309 
4310 	return trace_handle_return(s);
4311 }
4312 
4313 static enum print_line_t print_raw_fmt(struct trace_iterator *iter)
4314 {
4315 	struct trace_array *tr = iter->tr;
4316 	struct trace_seq *s = &iter->seq;
4317 	struct trace_entry *entry;
4318 	struct trace_event *event;
4319 
4320 	entry = iter->ent;
4321 
4322 	if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO)
4323 		trace_seq_printf(s, "%d %d %llu ",
4324 				 entry->pid, iter->cpu, iter->ts);
4325 
4326 	if (trace_seq_has_overflowed(s))
4327 		return TRACE_TYPE_PARTIAL_LINE;
4328 
4329 	event = ftrace_find_event(entry->type);
4330 	if (event)
4331 		return event->funcs->raw(iter, 0, event);
4332 
4333 	trace_seq_printf(s, "%d ?\n", entry->type);
4334 
4335 	return trace_handle_return(s);
4336 }
4337 
4338 static enum print_line_t print_hex_fmt(struct trace_iterator *iter)
4339 {
4340 	struct trace_array *tr = iter->tr;
4341 	struct trace_seq *s = &iter->seq;
4342 	unsigned char newline = '\n';
4343 	struct trace_entry *entry;
4344 	struct trace_event *event;
4345 
4346 	entry = iter->ent;
4347 
4348 	if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
4349 		SEQ_PUT_HEX_FIELD(s, entry->pid);
4350 		SEQ_PUT_HEX_FIELD(s, iter->cpu);
4351 		SEQ_PUT_HEX_FIELD(s, iter->ts);
4352 		if (trace_seq_has_overflowed(s))
4353 			return TRACE_TYPE_PARTIAL_LINE;
4354 	}
4355 
4356 	event = ftrace_find_event(entry->type);
4357 	if (event) {
4358 		enum print_line_t ret = event->funcs->hex(iter, 0, event);
4359 		if (ret != TRACE_TYPE_HANDLED)
4360 			return ret;
4361 	}
4362 
4363 	SEQ_PUT_FIELD(s, newline);
4364 
4365 	return trace_handle_return(s);
4366 }
4367 
4368 static enum print_line_t print_bin_fmt(struct trace_iterator *iter)
4369 {
4370 	struct trace_array *tr = iter->tr;
4371 	struct trace_seq *s = &iter->seq;
4372 	struct trace_entry *entry;
4373 	struct trace_event *event;
4374 
4375 	entry = iter->ent;
4376 
4377 	if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
4378 		SEQ_PUT_FIELD(s, entry->pid);
4379 		SEQ_PUT_FIELD(s, iter->cpu);
4380 		SEQ_PUT_FIELD(s, iter->ts);
4381 		if (trace_seq_has_overflowed(s))
4382 			return TRACE_TYPE_PARTIAL_LINE;
4383 	}
4384 
4385 	event = ftrace_find_event(entry->type);
4386 	return event ? event->funcs->binary(iter, 0, event) :
4387 		TRACE_TYPE_HANDLED;
4388 }
4389 
4390 int trace_empty(struct trace_iterator *iter)
4391 {
4392 	struct ring_buffer_iter *buf_iter;
4393 	int cpu;
4394 
4395 	/* If we are looking at one CPU buffer, only check that one */
4396 	if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
4397 		cpu = iter->cpu_file;
4398 		buf_iter = trace_buffer_iter(iter, cpu);
4399 		if (buf_iter) {
4400 			if (!ring_buffer_iter_empty(buf_iter))
4401 				return 0;
4402 		} else {
4403 			if (!ring_buffer_empty_cpu(iter->array_buffer->buffer, cpu))
4404 				return 0;
4405 		}
4406 		return 1;
4407 	}
4408 
4409 	for_each_tracing_cpu(cpu) {
4410 		buf_iter = trace_buffer_iter(iter, cpu);
4411 		if (buf_iter) {
4412 			if (!ring_buffer_iter_empty(buf_iter))
4413 				return 0;
4414 		} else {
4415 			if (!ring_buffer_empty_cpu(iter->array_buffer->buffer, cpu))
4416 				return 0;
4417 		}
4418 	}
4419 
4420 	return 1;
4421 }
4422 
4423 /*  Called with trace_event_read_lock() held. */
4424 enum print_line_t print_trace_line(struct trace_iterator *iter)
4425 {
4426 	struct trace_array *tr = iter->tr;
4427 	unsigned long trace_flags = tr->trace_flags;
4428 	enum print_line_t ret;
4429 
4430 	if (iter->lost_events) {
4431 		if (iter->lost_events == (unsigned long)-1)
4432 			trace_seq_printf(&iter->seq, "CPU:%d [LOST EVENTS]\n",
4433 					 iter->cpu);
4434 		else
4435 			trace_seq_printf(&iter->seq, "CPU:%d [LOST %lu EVENTS]\n",
4436 					 iter->cpu, iter->lost_events);
4437 		if (trace_seq_has_overflowed(&iter->seq))
4438 			return TRACE_TYPE_PARTIAL_LINE;
4439 	}
4440 
4441 	if (iter->trace && iter->trace->print_line) {
4442 		ret = iter->trace->print_line(iter);
4443 		if (ret != TRACE_TYPE_UNHANDLED)
4444 			return ret;
4445 	}
4446 
4447 	if (iter->ent->type == TRACE_BPUTS &&
4448 			trace_flags & TRACE_ITER_PRINTK &&
4449 			trace_flags & TRACE_ITER_PRINTK_MSGONLY)
4450 		return trace_print_bputs_msg_only(iter);
4451 
4452 	if (iter->ent->type == TRACE_BPRINT &&
4453 			trace_flags & TRACE_ITER_PRINTK &&
4454 			trace_flags & TRACE_ITER_PRINTK_MSGONLY)
4455 		return trace_print_bprintk_msg_only(iter);
4456 
4457 	if (iter->ent->type == TRACE_PRINT &&
4458 			trace_flags & TRACE_ITER_PRINTK &&
4459 			trace_flags & TRACE_ITER_PRINTK_MSGONLY)
4460 		return trace_print_printk_msg_only(iter);
4461 
4462 	if (trace_flags & TRACE_ITER_BIN)
4463 		return print_bin_fmt(iter);
4464 
4465 	if (trace_flags & TRACE_ITER_HEX)
4466 		return print_hex_fmt(iter);
4467 
4468 	if (trace_flags & TRACE_ITER_RAW)
4469 		return print_raw_fmt(iter);
4470 
4471 	return print_trace_fmt(iter);
4472 }
4473 
4474 void trace_latency_header(struct seq_file *m)
4475 {
4476 	struct trace_iterator *iter = m->private;
4477 	struct trace_array *tr = iter->tr;
4478 
4479 	/* print nothing if the buffers are empty */
4480 	if (trace_empty(iter))
4481 		return;
4482 
4483 	if (iter->iter_flags & TRACE_FILE_LAT_FMT)
4484 		print_trace_header(m, iter);
4485 
4486 	if (!(tr->trace_flags & TRACE_ITER_VERBOSE))
4487 		print_lat_help_header(m);
4488 }
4489 
4490 void trace_default_header(struct seq_file *m)
4491 {
4492 	struct trace_iterator *iter = m->private;
4493 	struct trace_array *tr = iter->tr;
4494 	unsigned long trace_flags = tr->trace_flags;
4495 
4496 	if (!(trace_flags & TRACE_ITER_CONTEXT_INFO))
4497 		return;
4498 
4499 	if (iter->iter_flags & TRACE_FILE_LAT_FMT) {
4500 		/* print nothing if the buffers are empty */
4501 		if (trace_empty(iter))
4502 			return;
4503 		print_trace_header(m, iter);
4504 		if (!(trace_flags & TRACE_ITER_VERBOSE))
4505 			print_lat_help_header(m);
4506 	} else {
4507 		if (!(trace_flags & TRACE_ITER_VERBOSE)) {
4508 			if (trace_flags & TRACE_ITER_IRQ_INFO)
4509 				print_func_help_header_irq(iter->array_buffer,
4510 							   m, trace_flags);
4511 			else
4512 				print_func_help_header(iter->array_buffer, m,
4513 						       trace_flags);
4514 		}
4515 	}
4516 }
4517 
4518 static void test_ftrace_alive(struct seq_file *m)
4519 {
4520 	if (!ftrace_is_dead())
4521 		return;
4522 	seq_puts(m, "# WARNING: FUNCTION TRACING IS CORRUPTED\n"
4523 		    "#          MAY BE MISSING FUNCTION EVENTS\n");
4524 }
4525 
4526 #ifdef CONFIG_TRACER_MAX_TRACE
4527 static void show_snapshot_main_help(struct seq_file *m)
4528 {
4529 	seq_puts(m, "# echo 0 > snapshot : Clears and frees snapshot buffer\n"
4530 		    "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
4531 		    "#                      Takes a snapshot of the main buffer.\n"
4532 		    "# echo 2 > snapshot : Clears snapshot buffer (but does not allocate or free)\n"
4533 		    "#                      (Doesn't have to be '2' works with any number that\n"
4534 		    "#                       is not a '0' or '1')\n");
4535 }
4536 
4537 static void show_snapshot_percpu_help(struct seq_file *m)
4538 {
4539 	seq_puts(m, "# echo 0 > snapshot : Invalid for per_cpu snapshot file.\n");
4540 #ifdef CONFIG_RING_BUFFER_ALLOW_SWAP
4541 	seq_puts(m, "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
4542 		    "#                      Takes a snapshot of the main buffer for this cpu.\n");
4543 #else
4544 	seq_puts(m, "# echo 1 > snapshot : Not supported with this kernel.\n"
4545 		    "#                     Must use main snapshot file to allocate.\n");
4546 #endif
4547 	seq_puts(m, "# echo 2 > snapshot : Clears this cpu's snapshot buffer (but does not allocate)\n"
4548 		    "#                      (Doesn't have to be '2' works with any number that\n"
4549 		    "#                       is not a '0' or '1')\n");
4550 }
4551 
4552 static void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter)
4553 {
4554 	if (iter->tr->allocated_snapshot)
4555 		seq_puts(m, "#\n# * Snapshot is allocated *\n#\n");
4556 	else
4557 		seq_puts(m, "#\n# * Snapshot is freed *\n#\n");
4558 
4559 	seq_puts(m, "# Snapshot commands:\n");
4560 	if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
4561 		show_snapshot_main_help(m);
4562 	else
4563 		show_snapshot_percpu_help(m);
4564 }
4565 #else
4566 /* Should never be called */
4567 static inline void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter) { }
4568 #endif
4569 
4570 static int s_show(struct seq_file *m, void *v)
4571 {
4572 	struct trace_iterator *iter = v;
4573 	int ret;
4574 
4575 	if (iter->ent == NULL) {
4576 		if (iter->tr) {
4577 			seq_printf(m, "# tracer: %s\n", iter->trace->name);
4578 			seq_puts(m, "#\n");
4579 			test_ftrace_alive(m);
4580 		}
4581 		if (iter->snapshot && trace_empty(iter))
4582 			print_snapshot_help(m, iter);
4583 		else if (iter->trace && iter->trace->print_header)
4584 			iter->trace->print_header(m);
4585 		else
4586 			trace_default_header(m);
4587 
4588 	} else if (iter->leftover) {
4589 		/*
4590 		 * If we filled the seq_file buffer earlier, we
4591 		 * want to just show it now.
4592 		 */
4593 		ret = trace_print_seq(m, &iter->seq);
4594 
4595 		/* ret should this time be zero, but you never know */
4596 		iter->leftover = ret;
4597 
4598 	} else {
4599 		ret = print_trace_line(iter);
4600 		if (ret == TRACE_TYPE_PARTIAL_LINE) {
4601 			iter->seq.full = 0;
4602 			trace_seq_puts(&iter->seq, "[LINE TOO BIG]\n");
4603 		}
4604 		ret = trace_print_seq(m, &iter->seq);
4605 		/*
4606 		 * If we overflow the seq_file buffer, then it will
4607 		 * ask us for this data again at start up.
4608 		 * Use that instead.
4609 		 *  ret is 0 if seq_file write succeeded.
4610 		 *        -1 otherwise.
4611 		 */
4612 		iter->leftover = ret;
4613 	}
4614 
4615 	return 0;
4616 }
4617 
4618 /*
4619  * Should be used after trace_array_get(), trace_types_lock
4620  * ensures that i_cdev was already initialized.
4621  */
4622 static inline int tracing_get_cpu(struct inode *inode)
4623 {
4624 	if (inode->i_cdev) /* See trace_create_cpu_file() */
4625 		return (long)inode->i_cdev - 1;
4626 	return RING_BUFFER_ALL_CPUS;
4627 }
4628 
4629 static const struct seq_operations tracer_seq_ops = {
4630 	.start		= s_start,
4631 	.next		= s_next,
4632 	.stop		= s_stop,
4633 	.show		= s_show,
4634 };
4635 
4636 /*
4637  * Note, as iter itself can be allocated and freed in different
4638  * ways, this function is only used to free its content, and not
4639  * the iterator itself. The only requirement to all the allocations
4640  * is that it must zero all fields (kzalloc), as freeing works with
4641  * ethier allocated content or NULL.
4642  */
4643 static void free_trace_iter_content(struct trace_iterator *iter)
4644 {
4645 	/* The fmt is either NULL, allocated or points to static_fmt_buf */
4646 	if (iter->fmt != static_fmt_buf)
4647 		kfree(iter->fmt);
4648 
4649 	kfree(iter->temp);
4650 	kfree(iter->buffer_iter);
4651 	mutex_destroy(&iter->mutex);
4652 	free_cpumask_var(iter->started);
4653 }
4654 
4655 static struct trace_iterator *
4656 __tracing_open(struct inode *inode, struct file *file, bool snapshot)
4657 {
4658 	struct trace_array *tr = inode->i_private;
4659 	struct trace_iterator *iter;
4660 	int cpu;
4661 
4662 	if (tracing_disabled)
4663 		return ERR_PTR(-ENODEV);
4664 
4665 	iter = __seq_open_private(file, &tracer_seq_ops, sizeof(*iter));
4666 	if (!iter)
4667 		return ERR_PTR(-ENOMEM);
4668 
4669 	iter->buffer_iter = kcalloc(nr_cpu_ids, sizeof(*iter->buffer_iter),
4670 				    GFP_KERNEL);
4671 	if (!iter->buffer_iter)
4672 		goto release;
4673 
4674 	/*
4675 	 * trace_find_next_entry() may need to save off iter->ent.
4676 	 * It will place it into the iter->temp buffer. As most
4677 	 * events are less than 128, allocate a buffer of that size.
4678 	 * If one is greater, then trace_find_next_entry() will
4679 	 * allocate a new buffer to adjust for the bigger iter->ent.
4680 	 * It's not critical if it fails to get allocated here.
4681 	 */
4682 	iter->temp = kmalloc(128, GFP_KERNEL);
4683 	if (iter->temp)
4684 		iter->temp_size = 128;
4685 
4686 	/*
4687 	 * trace_event_printf() may need to modify given format
4688 	 * string to replace %p with %px so that it shows real address
4689 	 * instead of hash value. However, that is only for the event
4690 	 * tracing, other tracer may not need. Defer the allocation
4691 	 * until it is needed.
4692 	 */
4693 	iter->fmt = NULL;
4694 	iter->fmt_size = 0;
4695 
4696 	mutex_lock(&trace_types_lock);
4697 	iter->trace = tr->current_trace;
4698 
4699 	if (!zalloc_cpumask_var(&iter->started, GFP_KERNEL))
4700 		goto fail;
4701 
4702 	iter->tr = tr;
4703 
4704 #ifdef CONFIG_TRACER_MAX_TRACE
4705 	/* Currently only the top directory has a snapshot */
4706 	if (tr->current_trace->print_max || snapshot)
4707 		iter->array_buffer = &tr->max_buffer;
4708 	else
4709 #endif
4710 		iter->array_buffer = &tr->array_buffer;
4711 	iter->snapshot = snapshot;
4712 	iter->pos = -1;
4713 	iter->cpu_file = tracing_get_cpu(inode);
4714 	mutex_init(&iter->mutex);
4715 
4716 	/* Notify the tracer early; before we stop tracing. */
4717 	if (iter->trace->open)
4718 		iter->trace->open(iter);
4719 
4720 	/* Annotate start of buffers if we had overruns */
4721 	if (ring_buffer_overruns(iter->array_buffer->buffer))
4722 		iter->iter_flags |= TRACE_FILE_ANNOTATE;
4723 
4724 	/* Output in nanoseconds only if we are using a clock in nanoseconds. */
4725 	if (trace_clocks[tr->clock_id].in_ns)
4726 		iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
4727 
4728 	/*
4729 	 * If pause-on-trace is enabled, then stop the trace while
4730 	 * dumping, unless this is the "snapshot" file
4731 	 */
4732 	if (!iter->snapshot && (tr->trace_flags & TRACE_ITER_PAUSE_ON_TRACE))
4733 		tracing_stop_tr(tr);
4734 
4735 	if (iter->cpu_file == RING_BUFFER_ALL_CPUS) {
4736 		for_each_tracing_cpu(cpu) {
4737 			iter->buffer_iter[cpu] =
4738 				ring_buffer_read_start(iter->array_buffer->buffer,
4739 						       cpu, GFP_KERNEL);
4740 			tracing_iter_reset(iter, cpu);
4741 		}
4742 	} else {
4743 		cpu = iter->cpu_file;
4744 		iter->buffer_iter[cpu] =
4745 			ring_buffer_read_start(iter->array_buffer->buffer,
4746 					       cpu, GFP_KERNEL);
4747 		tracing_iter_reset(iter, cpu);
4748 	}
4749 
4750 	mutex_unlock(&trace_types_lock);
4751 
4752 	return iter;
4753 
4754  fail:
4755 	mutex_unlock(&trace_types_lock);
4756 	free_trace_iter_content(iter);
4757 release:
4758 	seq_release_private(inode, file);
4759 	return ERR_PTR(-ENOMEM);
4760 }
4761 
4762 int tracing_open_generic(struct inode *inode, struct file *filp)
4763 {
4764 	int ret;
4765 
4766 	ret = tracing_check_open_get_tr(NULL);
4767 	if (ret)
4768 		return ret;
4769 
4770 	filp->private_data = inode->i_private;
4771 	return 0;
4772 }
4773 
4774 bool tracing_is_disabled(void)
4775 {
4776 	return (tracing_disabled) ? true: false;
4777 }
4778 
4779 /*
4780  * Open and update trace_array ref count.
4781  * Must have the current trace_array passed to it.
4782  */
4783 int tracing_open_generic_tr(struct inode *inode, struct file *filp)
4784 {
4785 	struct trace_array *tr = inode->i_private;
4786 	int ret;
4787 
4788 	ret = tracing_check_open_get_tr(tr);
4789 	if (ret)
4790 		return ret;
4791 
4792 	filp->private_data = inode->i_private;
4793 
4794 	return 0;
4795 }
4796 
4797 /*
4798  * The private pointer of the inode is the trace_event_file.
4799  * Update the tr ref count associated to it.
4800  */
4801 int tracing_open_file_tr(struct inode *inode, struct file *filp)
4802 {
4803 	struct trace_event_file *file = inode->i_private;
4804 	int ret;
4805 
4806 	ret = tracing_check_open_get_tr(file->tr);
4807 	if (ret)
4808 		return ret;
4809 
4810 	mutex_lock(&event_mutex);
4811 
4812 	/* Fail if the file is marked for removal */
4813 	if (file->flags & EVENT_FILE_FL_FREED) {
4814 		trace_array_put(file->tr);
4815 		ret = -ENODEV;
4816 	} else {
4817 		event_file_get(file);
4818 	}
4819 
4820 	mutex_unlock(&event_mutex);
4821 	if (ret)
4822 		return ret;
4823 
4824 	filp->private_data = inode->i_private;
4825 
4826 	return 0;
4827 }
4828 
4829 int tracing_release_file_tr(struct inode *inode, struct file *filp)
4830 {
4831 	struct trace_event_file *file = inode->i_private;
4832 
4833 	trace_array_put(file->tr);
4834 	event_file_put(file);
4835 
4836 	return 0;
4837 }
4838 
4839 int tracing_single_release_file_tr(struct inode *inode, struct file *filp)
4840 {
4841 	tracing_release_file_tr(inode, filp);
4842 	return single_release(inode, filp);
4843 }
4844 
4845 static int tracing_mark_open(struct inode *inode, struct file *filp)
4846 {
4847 	stream_open(inode, filp);
4848 	return tracing_open_generic_tr(inode, filp);
4849 }
4850 
4851 static int tracing_release(struct inode *inode, struct file *file)
4852 {
4853 	struct trace_array *tr = inode->i_private;
4854 	struct seq_file *m = file->private_data;
4855 	struct trace_iterator *iter;
4856 	int cpu;
4857 
4858 	if (!(file->f_mode & FMODE_READ)) {
4859 		trace_array_put(tr);
4860 		return 0;
4861 	}
4862 
4863 	/* Writes do not use seq_file */
4864 	iter = m->private;
4865 	mutex_lock(&trace_types_lock);
4866 
4867 	for_each_tracing_cpu(cpu) {
4868 		if (iter->buffer_iter[cpu])
4869 			ring_buffer_read_finish(iter->buffer_iter[cpu]);
4870 	}
4871 
4872 	if (iter->trace && iter->trace->close)
4873 		iter->trace->close(iter);
4874 
4875 	if (!iter->snapshot && tr->stop_count)
4876 		/* reenable tracing if it was previously enabled */
4877 		tracing_start_tr(tr);
4878 
4879 	__trace_array_put(tr);
4880 
4881 	mutex_unlock(&trace_types_lock);
4882 
4883 	free_trace_iter_content(iter);
4884 	seq_release_private(inode, file);
4885 
4886 	return 0;
4887 }
4888 
4889 int tracing_release_generic_tr(struct inode *inode, struct file *file)
4890 {
4891 	struct trace_array *tr = inode->i_private;
4892 
4893 	trace_array_put(tr);
4894 	return 0;
4895 }
4896 
4897 static int tracing_single_release_tr(struct inode *inode, struct file *file)
4898 {
4899 	struct trace_array *tr = inode->i_private;
4900 
4901 	trace_array_put(tr);
4902 
4903 	return single_release(inode, file);
4904 }
4905 
4906 static int tracing_open(struct inode *inode, struct file *file)
4907 {
4908 	struct trace_array *tr = inode->i_private;
4909 	struct trace_iterator *iter;
4910 	int ret;
4911 
4912 	ret = tracing_check_open_get_tr(tr);
4913 	if (ret)
4914 		return ret;
4915 
4916 	/* If this file was open for write, then erase contents */
4917 	if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC)) {
4918 		int cpu = tracing_get_cpu(inode);
4919 		struct array_buffer *trace_buf = &tr->array_buffer;
4920 
4921 #ifdef CONFIG_TRACER_MAX_TRACE
4922 		if (tr->current_trace->print_max)
4923 			trace_buf = &tr->max_buffer;
4924 #endif
4925 
4926 		if (cpu == RING_BUFFER_ALL_CPUS)
4927 			tracing_reset_online_cpus(trace_buf);
4928 		else
4929 			tracing_reset_cpu(trace_buf, cpu);
4930 	}
4931 
4932 	if (file->f_mode & FMODE_READ) {
4933 		iter = __tracing_open(inode, file, false);
4934 		if (IS_ERR(iter))
4935 			ret = PTR_ERR(iter);
4936 		else if (tr->trace_flags & TRACE_ITER_LATENCY_FMT)
4937 			iter->iter_flags |= TRACE_FILE_LAT_FMT;
4938 	}
4939 
4940 	if (ret < 0)
4941 		trace_array_put(tr);
4942 
4943 	return ret;
4944 }
4945 
4946 /*
4947  * Some tracers are not suitable for instance buffers.
4948  * A tracer is always available for the global array (toplevel)
4949  * or if it explicitly states that it is.
4950  */
4951 static bool
4952 trace_ok_for_array(struct tracer *t, struct trace_array *tr)
4953 {
4954 #ifdef CONFIG_TRACER_SNAPSHOT
4955 	/* arrays with mapped buffer range do not have snapshots */
4956 	if (tr->range_addr_start && t->use_max_tr)
4957 		return false;
4958 #endif
4959 	return (tr->flags & TRACE_ARRAY_FL_GLOBAL) || t->allow_instances;
4960 }
4961 
4962 /* Find the next tracer that this trace array may use */
4963 static struct tracer *
4964 get_tracer_for_array(struct trace_array *tr, struct tracer *t)
4965 {
4966 	while (t && !trace_ok_for_array(t, tr))
4967 		t = t->next;
4968 
4969 	return t;
4970 }
4971 
4972 static void *
4973 t_next(struct seq_file *m, void *v, loff_t *pos)
4974 {
4975 	struct trace_array *tr = m->private;
4976 	struct tracer *t = v;
4977 
4978 	(*pos)++;
4979 
4980 	if (t)
4981 		t = get_tracer_for_array(tr, t->next);
4982 
4983 	return t;
4984 }
4985 
4986 static void *t_start(struct seq_file *m, loff_t *pos)
4987 {
4988 	struct trace_array *tr = m->private;
4989 	struct tracer *t;
4990 	loff_t l = 0;
4991 
4992 	mutex_lock(&trace_types_lock);
4993 
4994 	t = get_tracer_for_array(tr, trace_types);
4995 	for (; t && l < *pos; t = t_next(m, t, &l))
4996 			;
4997 
4998 	return t;
4999 }
5000 
5001 static void t_stop(struct seq_file *m, void *p)
5002 {
5003 	mutex_unlock(&trace_types_lock);
5004 }
5005 
5006 static int t_show(struct seq_file *m, void *v)
5007 {
5008 	struct tracer *t = v;
5009 
5010 	if (!t)
5011 		return 0;
5012 
5013 	seq_puts(m, t->name);
5014 	if (t->next)
5015 		seq_putc(m, ' ');
5016 	else
5017 		seq_putc(m, '\n');
5018 
5019 	return 0;
5020 }
5021 
5022 static const struct seq_operations show_traces_seq_ops = {
5023 	.start		= t_start,
5024 	.next		= t_next,
5025 	.stop		= t_stop,
5026 	.show		= t_show,
5027 };
5028 
5029 static int show_traces_open(struct inode *inode, struct file *file)
5030 {
5031 	struct trace_array *tr = inode->i_private;
5032 	struct seq_file *m;
5033 	int ret;
5034 
5035 	ret = tracing_check_open_get_tr(tr);
5036 	if (ret)
5037 		return ret;
5038 
5039 	ret = seq_open(file, &show_traces_seq_ops);
5040 	if (ret) {
5041 		trace_array_put(tr);
5042 		return ret;
5043 	}
5044 
5045 	m = file->private_data;
5046 	m->private = tr;
5047 
5048 	return 0;
5049 }
5050 
5051 static int tracing_seq_release(struct inode *inode, struct file *file)
5052 {
5053 	struct trace_array *tr = inode->i_private;
5054 
5055 	trace_array_put(tr);
5056 	return seq_release(inode, file);
5057 }
5058 
5059 static ssize_t
5060 tracing_write_stub(struct file *filp, const char __user *ubuf,
5061 		   size_t count, loff_t *ppos)
5062 {
5063 	return count;
5064 }
5065 
5066 loff_t tracing_lseek(struct file *file, loff_t offset, int whence)
5067 {
5068 	int ret;
5069 
5070 	if (file->f_mode & FMODE_READ)
5071 		ret = seq_lseek(file, offset, whence);
5072 	else
5073 		file->f_pos = ret = 0;
5074 
5075 	return ret;
5076 }
5077 
5078 static const struct file_operations tracing_fops = {
5079 	.open		= tracing_open,
5080 	.read		= seq_read,
5081 	.read_iter	= seq_read_iter,
5082 	.splice_read	= copy_splice_read,
5083 	.write		= tracing_write_stub,
5084 	.llseek		= tracing_lseek,
5085 	.release	= tracing_release,
5086 };
5087 
5088 static const struct file_operations show_traces_fops = {
5089 	.open		= show_traces_open,
5090 	.read		= seq_read,
5091 	.llseek		= seq_lseek,
5092 	.release	= tracing_seq_release,
5093 };
5094 
5095 static ssize_t
5096 tracing_cpumask_read(struct file *filp, char __user *ubuf,
5097 		     size_t count, loff_t *ppos)
5098 {
5099 	struct trace_array *tr = file_inode(filp)->i_private;
5100 	char *mask_str;
5101 	int len;
5102 
5103 	len = snprintf(NULL, 0, "%*pb\n",
5104 		       cpumask_pr_args(tr->tracing_cpumask)) + 1;
5105 	mask_str = kmalloc(len, GFP_KERNEL);
5106 	if (!mask_str)
5107 		return -ENOMEM;
5108 
5109 	len = snprintf(mask_str, len, "%*pb\n",
5110 		       cpumask_pr_args(tr->tracing_cpumask));
5111 	if (len >= count) {
5112 		count = -EINVAL;
5113 		goto out_err;
5114 	}
5115 	count = simple_read_from_buffer(ubuf, count, ppos, mask_str, len);
5116 
5117 out_err:
5118 	kfree(mask_str);
5119 
5120 	return count;
5121 }
5122 
5123 int tracing_set_cpumask(struct trace_array *tr,
5124 			cpumask_var_t tracing_cpumask_new)
5125 {
5126 	int cpu;
5127 
5128 	if (!tr)
5129 		return -EINVAL;
5130 
5131 	local_irq_disable();
5132 	arch_spin_lock(&tr->max_lock);
5133 	for_each_tracing_cpu(cpu) {
5134 		/*
5135 		 * Increase/decrease the disabled counter if we are
5136 		 * about to flip a bit in the cpumask:
5137 		 */
5138 		if (cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
5139 				!cpumask_test_cpu(cpu, tracing_cpumask_new)) {
5140 			ring_buffer_record_disable_cpu(tr->array_buffer.buffer, cpu);
5141 #ifdef CONFIG_TRACER_MAX_TRACE
5142 			ring_buffer_record_disable_cpu(tr->max_buffer.buffer, cpu);
5143 #endif
5144 		}
5145 		if (!cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
5146 				cpumask_test_cpu(cpu, tracing_cpumask_new)) {
5147 			ring_buffer_record_enable_cpu(tr->array_buffer.buffer, cpu);
5148 #ifdef CONFIG_TRACER_MAX_TRACE
5149 			ring_buffer_record_enable_cpu(tr->max_buffer.buffer, cpu);
5150 #endif
5151 		}
5152 	}
5153 	arch_spin_unlock(&tr->max_lock);
5154 	local_irq_enable();
5155 
5156 	cpumask_copy(tr->tracing_cpumask, tracing_cpumask_new);
5157 
5158 	return 0;
5159 }
5160 
5161 static ssize_t
5162 tracing_cpumask_write(struct file *filp, const char __user *ubuf,
5163 		      size_t count, loff_t *ppos)
5164 {
5165 	struct trace_array *tr = file_inode(filp)->i_private;
5166 	cpumask_var_t tracing_cpumask_new;
5167 	int err;
5168 
5169 	if (count == 0 || count > KMALLOC_MAX_SIZE)
5170 		return -EINVAL;
5171 
5172 	if (!zalloc_cpumask_var(&tracing_cpumask_new, GFP_KERNEL))
5173 		return -ENOMEM;
5174 
5175 	err = cpumask_parse_user(ubuf, count, tracing_cpumask_new);
5176 	if (err)
5177 		goto err_free;
5178 
5179 	err = tracing_set_cpumask(tr, tracing_cpumask_new);
5180 	if (err)
5181 		goto err_free;
5182 
5183 	free_cpumask_var(tracing_cpumask_new);
5184 
5185 	return count;
5186 
5187 err_free:
5188 	free_cpumask_var(tracing_cpumask_new);
5189 
5190 	return err;
5191 }
5192 
5193 static const struct file_operations tracing_cpumask_fops = {
5194 	.open		= tracing_open_generic_tr,
5195 	.read		= tracing_cpumask_read,
5196 	.write		= tracing_cpumask_write,
5197 	.release	= tracing_release_generic_tr,
5198 	.llseek		= generic_file_llseek,
5199 };
5200 
5201 static int tracing_trace_options_show(struct seq_file *m, void *v)
5202 {
5203 	struct tracer_opt *trace_opts;
5204 	struct trace_array *tr = m->private;
5205 	u32 tracer_flags;
5206 	int i;
5207 
5208 	guard(mutex)(&trace_types_lock);
5209 
5210 	tracer_flags = tr->current_trace->flags->val;
5211 	trace_opts = tr->current_trace->flags->opts;
5212 
5213 	for (i = 0; trace_options[i]; i++) {
5214 		if (tr->trace_flags & (1 << i))
5215 			seq_printf(m, "%s\n", trace_options[i]);
5216 		else
5217 			seq_printf(m, "no%s\n", trace_options[i]);
5218 	}
5219 
5220 	for (i = 0; trace_opts[i].name; i++) {
5221 		if (tracer_flags & trace_opts[i].bit)
5222 			seq_printf(m, "%s\n", trace_opts[i].name);
5223 		else
5224 			seq_printf(m, "no%s\n", trace_opts[i].name);
5225 	}
5226 
5227 	return 0;
5228 }
5229 
5230 static int __set_tracer_option(struct trace_array *tr,
5231 			       struct tracer_flags *tracer_flags,
5232 			       struct tracer_opt *opts, int neg)
5233 {
5234 	struct tracer *trace = tracer_flags->trace;
5235 	int ret;
5236 
5237 	ret = trace->set_flag(tr, tracer_flags->val, opts->bit, !neg);
5238 	if (ret)
5239 		return ret;
5240 
5241 	if (neg)
5242 		tracer_flags->val &= ~opts->bit;
5243 	else
5244 		tracer_flags->val |= opts->bit;
5245 	return 0;
5246 }
5247 
5248 /* Try to assign a tracer specific option */
5249 static int set_tracer_option(struct trace_array *tr, char *cmp, int neg)
5250 {
5251 	struct tracer *trace = tr->current_trace;
5252 	struct tracer_flags *tracer_flags = trace->flags;
5253 	struct tracer_opt *opts = NULL;
5254 	int i;
5255 
5256 	for (i = 0; tracer_flags->opts[i].name; i++) {
5257 		opts = &tracer_flags->opts[i];
5258 
5259 		if (strcmp(cmp, opts->name) == 0)
5260 			return __set_tracer_option(tr, trace->flags, opts, neg);
5261 	}
5262 
5263 	return -EINVAL;
5264 }
5265 
5266 /* Some tracers require overwrite to stay enabled */
5267 int trace_keep_overwrite(struct tracer *tracer, u32 mask, int set)
5268 {
5269 	if (tracer->enabled && (mask & TRACE_ITER_OVERWRITE) && !set)
5270 		return -1;
5271 
5272 	return 0;
5273 }
5274 
5275 int set_tracer_flag(struct trace_array *tr, unsigned int mask, int enabled)
5276 {
5277 	if ((mask == TRACE_ITER_RECORD_TGID) ||
5278 	    (mask == TRACE_ITER_RECORD_CMD) ||
5279 	    (mask == TRACE_ITER_TRACE_PRINTK) ||
5280 	    (mask == TRACE_ITER_COPY_MARKER))
5281 		lockdep_assert_held(&event_mutex);
5282 
5283 	/* do nothing if flag is already set */
5284 	if (!!(tr->trace_flags & mask) == !!enabled)
5285 		return 0;
5286 
5287 	/* Give the tracer a chance to approve the change */
5288 	if (tr->current_trace->flag_changed)
5289 		if (tr->current_trace->flag_changed(tr, mask, !!enabled))
5290 			return -EINVAL;
5291 
5292 	if (mask == TRACE_ITER_TRACE_PRINTK) {
5293 		if (enabled) {
5294 			update_printk_trace(tr);
5295 		} else {
5296 			/*
5297 			 * The global_trace cannot clear this.
5298 			 * It's flag only gets cleared if another instance sets it.
5299 			 */
5300 			if (printk_trace == &global_trace)
5301 				return -EINVAL;
5302 			/*
5303 			 * An instance must always have it set.
5304 			 * by default, that's the global_trace instane.
5305 			 */
5306 			if (printk_trace == tr)
5307 				update_printk_trace(&global_trace);
5308 		}
5309 	}
5310 
5311 	if (mask == TRACE_ITER_COPY_MARKER)
5312 		update_marker_trace(tr, enabled);
5313 
5314 	if (enabled)
5315 		tr->trace_flags |= mask;
5316 	else
5317 		tr->trace_flags &= ~mask;
5318 
5319 	if (mask == TRACE_ITER_RECORD_CMD)
5320 		trace_event_enable_cmd_record(enabled);
5321 
5322 	if (mask == TRACE_ITER_RECORD_TGID) {
5323 
5324 		if (trace_alloc_tgid_map() < 0) {
5325 			tr->trace_flags &= ~TRACE_ITER_RECORD_TGID;
5326 			return -ENOMEM;
5327 		}
5328 
5329 		trace_event_enable_tgid_record(enabled);
5330 	}
5331 
5332 	if (mask == TRACE_ITER_EVENT_FORK)
5333 		trace_event_follow_fork(tr, enabled);
5334 
5335 	if (mask == TRACE_ITER_FUNC_FORK)
5336 		ftrace_pid_follow_fork(tr, enabled);
5337 
5338 	if (mask == TRACE_ITER_OVERWRITE) {
5339 		ring_buffer_change_overwrite(tr->array_buffer.buffer, enabled);
5340 #ifdef CONFIG_TRACER_MAX_TRACE
5341 		ring_buffer_change_overwrite(tr->max_buffer.buffer, enabled);
5342 #endif
5343 	}
5344 
5345 	if (mask == TRACE_ITER_PRINTK) {
5346 		trace_printk_start_stop_comm(enabled);
5347 		trace_printk_control(enabled);
5348 	}
5349 
5350 	return 0;
5351 }
5352 
5353 int trace_set_options(struct trace_array *tr, char *option)
5354 {
5355 	char *cmp;
5356 	int neg = 0;
5357 	int ret;
5358 	size_t orig_len = strlen(option);
5359 	int len;
5360 
5361 	cmp = strstrip(option);
5362 
5363 	len = str_has_prefix(cmp, "no");
5364 	if (len)
5365 		neg = 1;
5366 
5367 	cmp += len;
5368 
5369 	mutex_lock(&event_mutex);
5370 	mutex_lock(&trace_types_lock);
5371 
5372 	ret = match_string(trace_options, -1, cmp);
5373 	/* If no option could be set, test the specific tracer options */
5374 	if (ret < 0)
5375 		ret = set_tracer_option(tr, cmp, neg);
5376 	else
5377 		ret = set_tracer_flag(tr, 1 << ret, !neg);
5378 
5379 	mutex_unlock(&trace_types_lock);
5380 	mutex_unlock(&event_mutex);
5381 
5382 	/*
5383 	 * If the first trailing whitespace is replaced with '\0' by strstrip,
5384 	 * turn it back into a space.
5385 	 */
5386 	if (orig_len > strlen(option))
5387 		option[strlen(option)] = ' ';
5388 
5389 	return ret;
5390 }
5391 
5392 static void __init apply_trace_boot_options(void)
5393 {
5394 	char *buf = trace_boot_options_buf;
5395 	char *option;
5396 
5397 	while (true) {
5398 		option = strsep(&buf, ",");
5399 
5400 		if (!option)
5401 			break;
5402 
5403 		if (*option)
5404 			trace_set_options(&global_trace, option);
5405 
5406 		/* Put back the comma to allow this to be called again */
5407 		if (buf)
5408 			*(buf - 1) = ',';
5409 	}
5410 }
5411 
5412 static ssize_t
5413 tracing_trace_options_write(struct file *filp, const char __user *ubuf,
5414 			size_t cnt, loff_t *ppos)
5415 {
5416 	struct seq_file *m = filp->private_data;
5417 	struct trace_array *tr = m->private;
5418 	char buf[64];
5419 	int ret;
5420 
5421 	if (cnt >= sizeof(buf))
5422 		return -EINVAL;
5423 
5424 	if (copy_from_user(buf, ubuf, cnt))
5425 		return -EFAULT;
5426 
5427 	buf[cnt] = 0;
5428 
5429 	ret = trace_set_options(tr, buf);
5430 	if (ret < 0)
5431 		return ret;
5432 
5433 	*ppos += cnt;
5434 
5435 	return cnt;
5436 }
5437 
5438 static int tracing_trace_options_open(struct inode *inode, struct file *file)
5439 {
5440 	struct trace_array *tr = inode->i_private;
5441 	int ret;
5442 
5443 	ret = tracing_check_open_get_tr(tr);
5444 	if (ret)
5445 		return ret;
5446 
5447 	ret = single_open(file, tracing_trace_options_show, inode->i_private);
5448 	if (ret < 0)
5449 		trace_array_put(tr);
5450 
5451 	return ret;
5452 }
5453 
5454 static const struct file_operations tracing_iter_fops = {
5455 	.open		= tracing_trace_options_open,
5456 	.read		= seq_read,
5457 	.llseek		= seq_lseek,
5458 	.release	= tracing_single_release_tr,
5459 	.write		= tracing_trace_options_write,
5460 };
5461 
5462 static const char readme_msg[] =
5463 	"tracing mini-HOWTO:\n\n"
5464 	"By default tracefs removes all OTH file permission bits.\n"
5465 	"When mounting tracefs an optional group id can be specified\n"
5466 	"which adds the group to every directory and file in tracefs:\n\n"
5467 	"\t e.g. mount -t tracefs [-o [gid=<gid>]] nodev /sys/kernel/tracing\n\n"
5468 	"# echo 0 > tracing_on : quick way to disable tracing\n"
5469 	"# echo 1 > tracing_on : quick way to re-enable tracing\n\n"
5470 	" Important files:\n"
5471 	"  trace\t\t\t- The static contents of the buffer\n"
5472 	"\t\t\t  To clear the buffer write into this file: echo > trace\n"
5473 	"  trace_pipe\t\t- A consuming read to see the contents of the buffer\n"
5474 	"  current_tracer\t- function and latency tracers\n"
5475 	"  available_tracers\t- list of configured tracers for current_tracer\n"
5476 	"  error_log\t- error log for failed commands (that support it)\n"
5477 	"  buffer_size_kb\t- view and modify size of per cpu buffer\n"
5478 	"  buffer_total_size_kb  - view total size of all cpu buffers\n\n"
5479 	"  trace_clock\t\t- change the clock used to order events\n"
5480 	"       local:   Per cpu clock but may not be synced across CPUs\n"
5481 	"      global:   Synced across CPUs but slows tracing down.\n"
5482 	"     counter:   Not a clock, but just an increment\n"
5483 	"      uptime:   Jiffy counter from time of boot\n"
5484 	"        perf:   Same clock that perf events use\n"
5485 #ifdef CONFIG_X86_64
5486 	"     x86-tsc:   TSC cycle counter\n"
5487 #endif
5488 	"\n  timestamp_mode\t- view the mode used to timestamp events\n"
5489 	"       delta:   Delta difference against a buffer-wide timestamp\n"
5490 	"    absolute:   Absolute (standalone) timestamp\n"
5491 	"\n  trace_marker\t\t- Writes into this file writes into the kernel buffer\n"
5492 	"\n  trace_marker_raw\t\t- Writes into this file writes binary data into the kernel buffer\n"
5493 	"  tracing_cpumask\t- Limit which CPUs to trace\n"
5494 	"  instances\t\t- Make sub-buffers with: mkdir instances/foo\n"
5495 	"\t\t\t  Remove sub-buffer with rmdir\n"
5496 	"  trace_options\t\t- Set format or modify how tracing happens\n"
5497 	"\t\t\t  Disable an option by prefixing 'no' to the\n"
5498 	"\t\t\t  option name\n"
5499 	"  saved_cmdlines_size\t- echo command number in here to store comm-pid list\n"
5500 #ifdef CONFIG_DYNAMIC_FTRACE
5501 	"\n  available_filter_functions - list of functions that can be filtered on\n"
5502 	"  set_ftrace_filter\t- echo function name in here to only trace these\n"
5503 	"\t\t\t  functions\n"
5504 	"\t     accepts: func_full_name or glob-matching-pattern\n"
5505 	"\t     modules: Can select a group via module\n"
5506 	"\t      Format: :mod:<module-name>\n"
5507 	"\t     example: echo :mod:ext3 > set_ftrace_filter\n"
5508 	"\t    triggers: a command to perform when function is hit\n"
5509 	"\t      Format: <function>:<trigger>[:count]\n"
5510 	"\t     trigger: traceon, traceoff\n"
5511 	"\t\t      enable_event:<system>:<event>\n"
5512 	"\t\t      disable_event:<system>:<event>\n"
5513 #ifdef CONFIG_STACKTRACE
5514 	"\t\t      stacktrace\n"
5515 #endif
5516 #ifdef CONFIG_TRACER_SNAPSHOT
5517 	"\t\t      snapshot\n"
5518 #endif
5519 	"\t\t      dump\n"
5520 	"\t\t      cpudump\n"
5521 	"\t     example: echo do_fault:traceoff > set_ftrace_filter\n"
5522 	"\t              echo do_trap:traceoff:3 > set_ftrace_filter\n"
5523 	"\t     The first one will disable tracing every time do_fault is hit\n"
5524 	"\t     The second will disable tracing at most 3 times when do_trap is hit\n"
5525 	"\t       The first time do trap is hit and it disables tracing, the\n"
5526 	"\t       counter will decrement to 2. If tracing is already disabled,\n"
5527 	"\t       the counter will not decrement. It only decrements when the\n"
5528 	"\t       trigger did work\n"
5529 	"\t     To remove trigger without count:\n"
5530 	"\t       echo '!<function>:<trigger> > set_ftrace_filter\n"
5531 	"\t     To remove trigger with a count:\n"
5532 	"\t       echo '!<function>:<trigger>:0 > set_ftrace_filter\n"
5533 	"  set_ftrace_notrace\t- echo function name in here to never trace.\n"
5534 	"\t    accepts: func_full_name, *func_end, func_begin*, *func_middle*\n"
5535 	"\t    modules: Can select a group via module command :mod:\n"
5536 	"\t    Does not accept triggers\n"
5537 #endif /* CONFIG_DYNAMIC_FTRACE */
5538 #ifdef CONFIG_FUNCTION_TRACER
5539 	"  set_ftrace_pid\t- Write pid(s) to only function trace those pids\n"
5540 	"\t\t    (function)\n"
5541 	"  set_ftrace_notrace_pid\t- Write pid(s) to not function trace those pids\n"
5542 	"\t\t    (function)\n"
5543 #endif
5544 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
5545 	"  set_graph_function\t- Trace the nested calls of a function (function_graph)\n"
5546 	"  set_graph_notrace\t- Do not trace the nested calls of a function (function_graph)\n"
5547 	"  max_graph_depth\t- Trace a limited depth of nested calls (0 is unlimited)\n"
5548 #endif
5549 #ifdef CONFIG_TRACER_SNAPSHOT
5550 	"\n  snapshot\t\t- Like 'trace' but shows the content of the static\n"
5551 	"\t\t\t  snapshot buffer. Read the contents for more\n"
5552 	"\t\t\t  information\n"
5553 #endif
5554 #ifdef CONFIG_STACK_TRACER
5555 	"  stack_trace\t\t- Shows the max stack trace when active\n"
5556 	"  stack_max_size\t- Shows current max stack size that was traced\n"
5557 	"\t\t\t  Write into this file to reset the max size (trigger a\n"
5558 	"\t\t\t  new trace)\n"
5559 #ifdef CONFIG_DYNAMIC_FTRACE
5560 	"  stack_trace_filter\t- Like set_ftrace_filter but limits what stack_trace\n"
5561 	"\t\t\t  traces\n"
5562 #endif
5563 #endif /* CONFIG_STACK_TRACER */
5564 #ifdef CONFIG_DYNAMIC_EVENTS
5565 	"  dynamic_events\t\t- Create/append/remove/show the generic dynamic events\n"
5566 	"\t\t\t  Write into this file to define/undefine new trace events.\n"
5567 #endif
5568 #ifdef CONFIG_KPROBE_EVENTS
5569 	"  kprobe_events\t\t- Create/append/remove/show the kernel dynamic events\n"
5570 	"\t\t\t  Write into this file to define/undefine new trace events.\n"
5571 #endif
5572 #ifdef CONFIG_UPROBE_EVENTS
5573 	"  uprobe_events\t\t- Create/append/remove/show the userspace dynamic events\n"
5574 	"\t\t\t  Write into this file to define/undefine new trace events.\n"
5575 #endif
5576 #if defined(CONFIG_KPROBE_EVENTS) || defined(CONFIG_UPROBE_EVENTS) || \
5577     defined(CONFIG_FPROBE_EVENTS)
5578 	"\t  accepts: event-definitions (one definition per line)\n"
5579 #if defined(CONFIG_KPROBE_EVENTS) || defined(CONFIG_UPROBE_EVENTS)
5580 	"\t   Format: p[:[<group>/][<event>]] <place> [<args>]\n"
5581 	"\t           r[maxactive][:[<group>/][<event>]] <place> [<args>]\n"
5582 #endif
5583 #ifdef CONFIG_FPROBE_EVENTS
5584 	"\t           f[:[<group>/][<event>]] <func-name>[%return] [<args>]\n"
5585 	"\t           t[:[<group>/][<event>]] <tracepoint> [<args>]\n"
5586 #endif
5587 #ifdef CONFIG_HIST_TRIGGERS
5588 	"\t           s:[synthetic/]<event> <field> [<field>]\n"
5589 #endif
5590 	"\t           e[:[<group>/][<event>]] <attached-group>.<attached-event> [<args>] [if <filter>]\n"
5591 	"\t           -:[<group>/][<event>]\n"
5592 #ifdef CONFIG_KPROBE_EVENTS
5593 	"\t    place: [<module>:]<symbol>[+<offset>]|<memaddr>\n"
5594   "place (kretprobe): [<module>:]<symbol>[+<offset>]%return|<memaddr>\n"
5595 #endif
5596 #ifdef CONFIG_UPROBE_EVENTS
5597   "   place (uprobe): <path>:<offset>[%return][(ref_ctr_offset)]\n"
5598 #endif
5599 	"\t     args: <name>=fetcharg[:type]\n"
5600 	"\t fetcharg: (%<register>|$<efield>), @<address>, @<symbol>[+|-<offset>],\n"
5601 #ifdef CONFIG_HAVE_FUNCTION_ARG_ACCESS_API
5602 	"\t           $stack<index>, $stack, $retval, $comm, $arg<N>,\n"
5603 #ifdef CONFIG_PROBE_EVENTS_BTF_ARGS
5604 	"\t           <argname>[->field[->field|.field...]],\n"
5605 #endif
5606 #else
5607 	"\t           $stack<index>, $stack, $retval, $comm,\n"
5608 #endif
5609 	"\t           +|-[u]<offset>(<fetcharg>), \\imm-value, \\\"imm-string\"\n"
5610 	"\t     kernel return probes support: $retval, $arg<N>, $comm\n"
5611 	"\t     type: s8/16/32/64, u8/16/32/64, x8/16/32/64, char, string, symbol,\n"
5612 	"\t           b<bit-width>@<bit-offset>/<container-size>, ustring,\n"
5613 	"\t           symstr, %pd/%pD, <type>\\[<array-size>\\]\n"
5614 #ifdef CONFIG_HIST_TRIGGERS
5615 	"\t    field: <stype> <name>;\n"
5616 	"\t    stype: u8/u16/u32/u64, s8/s16/s32/s64, pid_t,\n"
5617 	"\t           [unsigned] char/int/long\n"
5618 #endif
5619 	"\t    efield: For event probes ('e' types), the field is on of the fields\n"
5620 	"\t            of the <attached-group>/<attached-event>.\n"
5621 #endif
5622 	"  set_event\t\t- Enables events by name written into it\n"
5623 	"\t\t\t  Can enable module events via: :mod:<module>\n"
5624 	"  events/\t\t- Directory containing all trace event subsystems:\n"
5625 	"      enable\t\t- Write 0/1 to enable/disable tracing of all events\n"
5626 	"  events/<system>/\t- Directory containing all trace events for <system>:\n"
5627 	"      enable\t\t- Write 0/1 to enable/disable tracing of all <system>\n"
5628 	"\t\t\t  events\n"
5629 	"      filter\t\t- If set, only events passing filter are traced\n"
5630 	"  events/<system>/<event>/\t- Directory containing control files for\n"
5631 	"\t\t\t  <event>:\n"
5632 	"      enable\t\t- Write 0/1 to enable/disable tracing of <event>\n"
5633 	"      filter\t\t- If set, only events passing filter are traced\n"
5634 	"      trigger\t\t- If set, a command to perform when event is hit\n"
5635 	"\t    Format: <trigger>[:count][if <filter>]\n"
5636 	"\t   trigger: traceon, traceoff\n"
5637 	"\t            enable_event:<system>:<event>\n"
5638 	"\t            disable_event:<system>:<event>\n"
5639 #ifdef CONFIG_HIST_TRIGGERS
5640 	"\t            enable_hist:<system>:<event>\n"
5641 	"\t            disable_hist:<system>:<event>\n"
5642 #endif
5643 #ifdef CONFIG_STACKTRACE
5644 	"\t\t    stacktrace\n"
5645 #endif
5646 #ifdef CONFIG_TRACER_SNAPSHOT
5647 	"\t\t    snapshot\n"
5648 #endif
5649 #ifdef CONFIG_HIST_TRIGGERS
5650 	"\t\t    hist (see below)\n"
5651 #endif
5652 	"\t   example: echo traceoff > events/block/block_unplug/trigger\n"
5653 	"\t            echo traceoff:3 > events/block/block_unplug/trigger\n"
5654 	"\t            echo 'enable_event:kmem:kmalloc:3 if nr_rq > 1' > \\\n"
5655 	"\t                  events/block/block_unplug/trigger\n"
5656 	"\t   The first disables tracing every time block_unplug is hit.\n"
5657 	"\t   The second disables tracing the first 3 times block_unplug is hit.\n"
5658 	"\t   The third enables the kmalloc event the first 3 times block_unplug\n"
5659 	"\t     is hit and has value of greater than 1 for the 'nr_rq' event field.\n"
5660 	"\t   Like function triggers, the counter is only decremented if it\n"
5661 	"\t    enabled or disabled tracing.\n"
5662 	"\t   To remove a trigger without a count:\n"
5663 	"\t     echo '!<trigger> > <system>/<event>/trigger\n"
5664 	"\t   To remove a trigger with a count:\n"
5665 	"\t     echo '!<trigger>:0 > <system>/<event>/trigger\n"
5666 	"\t   Filters can be ignored when removing a trigger.\n"
5667 #ifdef CONFIG_HIST_TRIGGERS
5668 	"      hist trigger\t- If set, event hits are aggregated into a hash table\n"
5669 	"\t    Format: hist:keys=<field1[,field2,...]>\n"
5670 	"\t            [:<var1>=<field|var_ref|numeric_literal>[,<var2>=...]]\n"
5671 	"\t            [:values=<field1[,field2,...]>]\n"
5672 	"\t            [:sort=<field1[,field2,...]>]\n"
5673 	"\t            [:size=#entries]\n"
5674 	"\t            [:pause][:continue][:clear]\n"
5675 	"\t            [:name=histname1]\n"
5676 	"\t            [:nohitcount]\n"
5677 	"\t            [:<handler>.<action>]\n"
5678 	"\t            [if <filter>]\n\n"
5679 	"\t    Note, special fields can be used as well:\n"
5680 	"\t            common_timestamp - to record current timestamp\n"
5681 	"\t            common_cpu - to record the CPU the event happened on\n"
5682 	"\n"
5683 	"\t    A hist trigger variable can be:\n"
5684 	"\t        - a reference to a field e.g. x=current_timestamp,\n"
5685 	"\t        - a reference to another variable e.g. y=$x,\n"
5686 	"\t        - a numeric literal: e.g. ms_per_sec=1000,\n"
5687 	"\t        - an arithmetic expression: e.g. time_secs=current_timestamp/1000\n"
5688 	"\n"
5689 	"\t    hist trigger arithmetic expressions support addition(+), subtraction(-),\n"
5690 	"\t    multiplication(*) and division(/) operators. An operand can be either a\n"
5691 	"\t    variable reference, field or numeric literal.\n"
5692 	"\n"
5693 	"\t    When a matching event is hit, an entry is added to a hash\n"
5694 	"\t    table using the key(s) and value(s) named, and the value of a\n"
5695 	"\t    sum called 'hitcount' is incremented.  Keys and values\n"
5696 	"\t    correspond to fields in the event's format description.  Keys\n"
5697 	"\t    can be any field, or the special string 'common_stacktrace'.\n"
5698 	"\t    Compound keys consisting of up to two fields can be specified\n"
5699 	"\t    by the 'keys' keyword.  Values must correspond to numeric\n"
5700 	"\t    fields.  Sort keys consisting of up to two fields can be\n"
5701 	"\t    specified using the 'sort' keyword.  The sort direction can\n"
5702 	"\t    be modified by appending '.descending' or '.ascending' to a\n"
5703 	"\t    sort field.  The 'size' parameter can be used to specify more\n"
5704 	"\t    or fewer than the default 2048 entries for the hashtable size.\n"
5705 	"\t    If a hist trigger is given a name using the 'name' parameter,\n"
5706 	"\t    its histogram data will be shared with other triggers of the\n"
5707 	"\t    same name, and trigger hits will update this common data.\n\n"
5708 	"\t    Reading the 'hist' file for the event will dump the hash\n"
5709 	"\t    table in its entirety to stdout.  If there are multiple hist\n"
5710 	"\t    triggers attached to an event, there will be a table for each\n"
5711 	"\t    trigger in the output.  The table displayed for a named\n"
5712 	"\t    trigger will be the same as any other instance having the\n"
5713 	"\t    same name.  The default format used to display a given field\n"
5714 	"\t    can be modified by appending any of the following modifiers\n"
5715 	"\t    to the field name, as applicable:\n\n"
5716 	"\t            .hex        display a number as a hex value\n"
5717 	"\t            .sym        display an address as a symbol\n"
5718 	"\t            .sym-offset display an address as a symbol and offset\n"
5719 	"\t            .execname   display a common_pid as a program name\n"
5720 	"\t            .syscall    display a syscall id as a syscall name\n"
5721 	"\t            .log2       display log2 value rather than raw number\n"
5722 	"\t            .buckets=size  display values in groups of size rather than raw number\n"
5723 	"\t            .usecs      display a common_timestamp in microseconds\n"
5724 	"\t            .percent    display a number of percentage value\n"
5725 	"\t            .graph      display a bar-graph of a value\n\n"
5726 	"\t    The 'pause' parameter can be used to pause an existing hist\n"
5727 	"\t    trigger or to start a hist trigger but not log any events\n"
5728 	"\t    until told to do so.  'continue' can be used to start or\n"
5729 	"\t    restart a paused hist trigger.\n\n"
5730 	"\t    The 'clear' parameter will clear the contents of a running\n"
5731 	"\t    hist trigger and leave its current paused/active state\n"
5732 	"\t    unchanged.\n\n"
5733 	"\t    The 'nohitcount' (or NOHC) parameter will suppress display of\n"
5734 	"\t    raw hitcount in the histogram.\n\n"
5735 	"\t    The enable_hist and disable_hist triggers can be used to\n"
5736 	"\t    have one event conditionally start and stop another event's\n"
5737 	"\t    already-attached hist trigger.  The syntax is analogous to\n"
5738 	"\t    the enable_event and disable_event triggers.\n\n"
5739 	"\t    Hist trigger handlers and actions are executed whenever a\n"
5740 	"\t    a histogram entry is added or updated.  They take the form:\n\n"
5741 	"\t        <handler>.<action>\n\n"
5742 	"\t    The available handlers are:\n\n"
5743 	"\t        onmatch(matching.event)  - invoke on addition or update\n"
5744 	"\t        onmax(var)               - invoke if var exceeds current max\n"
5745 	"\t        onchange(var)            - invoke action if var changes\n\n"
5746 	"\t    The available actions are:\n\n"
5747 	"\t        trace(<synthetic_event>,param list)  - generate synthetic event\n"
5748 	"\t        save(field,...)                      - save current event fields\n"
5749 #ifdef CONFIG_TRACER_SNAPSHOT
5750 	"\t        snapshot()                           - snapshot the trace buffer\n\n"
5751 #endif
5752 #ifdef CONFIG_SYNTH_EVENTS
5753 	"  events/synthetic_events\t- Create/append/remove/show synthetic events\n"
5754 	"\t  Write into this file to define/undefine new synthetic events.\n"
5755 	"\t     example: echo 'myevent u64 lat; char name[]; long[] stack' >> synthetic_events\n"
5756 #endif
5757 #endif
5758 ;
5759 
5760 static ssize_t
5761 tracing_readme_read(struct file *filp, char __user *ubuf,
5762 		       size_t cnt, loff_t *ppos)
5763 {
5764 	return simple_read_from_buffer(ubuf, cnt, ppos,
5765 					readme_msg, strlen(readme_msg));
5766 }
5767 
5768 static const struct file_operations tracing_readme_fops = {
5769 	.open		= tracing_open_generic,
5770 	.read		= tracing_readme_read,
5771 	.llseek		= generic_file_llseek,
5772 };
5773 
5774 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
5775 static union trace_eval_map_item *
5776 update_eval_map(union trace_eval_map_item *ptr)
5777 {
5778 	if (!ptr->map.eval_string) {
5779 		if (ptr->tail.next) {
5780 			ptr = ptr->tail.next;
5781 			/* Set ptr to the next real item (skip head) */
5782 			ptr++;
5783 		} else
5784 			return NULL;
5785 	}
5786 	return ptr;
5787 }
5788 
5789 static void *eval_map_next(struct seq_file *m, void *v, loff_t *pos)
5790 {
5791 	union trace_eval_map_item *ptr = v;
5792 
5793 	/*
5794 	 * Paranoid! If ptr points to end, we don't want to increment past it.
5795 	 * This really should never happen.
5796 	 */
5797 	(*pos)++;
5798 	ptr = update_eval_map(ptr);
5799 	if (WARN_ON_ONCE(!ptr))
5800 		return NULL;
5801 
5802 	ptr++;
5803 	ptr = update_eval_map(ptr);
5804 
5805 	return ptr;
5806 }
5807 
5808 static void *eval_map_start(struct seq_file *m, loff_t *pos)
5809 {
5810 	union trace_eval_map_item *v;
5811 	loff_t l = 0;
5812 
5813 	mutex_lock(&trace_eval_mutex);
5814 
5815 	v = trace_eval_maps;
5816 	if (v)
5817 		v++;
5818 
5819 	while (v && l < *pos) {
5820 		v = eval_map_next(m, v, &l);
5821 	}
5822 
5823 	return v;
5824 }
5825 
5826 static void eval_map_stop(struct seq_file *m, void *v)
5827 {
5828 	mutex_unlock(&trace_eval_mutex);
5829 }
5830 
5831 static int eval_map_show(struct seq_file *m, void *v)
5832 {
5833 	union trace_eval_map_item *ptr = v;
5834 
5835 	seq_printf(m, "%s %ld (%s)\n",
5836 		   ptr->map.eval_string, ptr->map.eval_value,
5837 		   ptr->map.system);
5838 
5839 	return 0;
5840 }
5841 
5842 static const struct seq_operations tracing_eval_map_seq_ops = {
5843 	.start		= eval_map_start,
5844 	.next		= eval_map_next,
5845 	.stop		= eval_map_stop,
5846 	.show		= eval_map_show,
5847 };
5848 
5849 static int tracing_eval_map_open(struct inode *inode, struct file *filp)
5850 {
5851 	int ret;
5852 
5853 	ret = tracing_check_open_get_tr(NULL);
5854 	if (ret)
5855 		return ret;
5856 
5857 	return seq_open(filp, &tracing_eval_map_seq_ops);
5858 }
5859 
5860 static const struct file_operations tracing_eval_map_fops = {
5861 	.open		= tracing_eval_map_open,
5862 	.read		= seq_read,
5863 	.llseek		= seq_lseek,
5864 	.release	= seq_release,
5865 };
5866 
5867 static inline union trace_eval_map_item *
5868 trace_eval_jmp_to_tail(union trace_eval_map_item *ptr)
5869 {
5870 	/* Return tail of array given the head */
5871 	return ptr + ptr->head.length + 1;
5872 }
5873 
5874 static void
5875 trace_insert_eval_map_file(struct module *mod, struct trace_eval_map **start,
5876 			   int len)
5877 {
5878 	struct trace_eval_map **stop;
5879 	struct trace_eval_map **map;
5880 	union trace_eval_map_item *map_array;
5881 	union trace_eval_map_item *ptr;
5882 
5883 	stop = start + len;
5884 
5885 	/*
5886 	 * The trace_eval_maps contains the map plus a head and tail item,
5887 	 * where the head holds the module and length of array, and the
5888 	 * tail holds a pointer to the next list.
5889 	 */
5890 	map_array = kmalloc_array(len + 2, sizeof(*map_array), GFP_KERNEL);
5891 	if (!map_array) {
5892 		pr_warn("Unable to allocate trace eval mapping\n");
5893 		return;
5894 	}
5895 
5896 	guard(mutex)(&trace_eval_mutex);
5897 
5898 	if (!trace_eval_maps)
5899 		trace_eval_maps = map_array;
5900 	else {
5901 		ptr = trace_eval_maps;
5902 		for (;;) {
5903 			ptr = trace_eval_jmp_to_tail(ptr);
5904 			if (!ptr->tail.next)
5905 				break;
5906 			ptr = ptr->tail.next;
5907 
5908 		}
5909 		ptr->tail.next = map_array;
5910 	}
5911 	map_array->head.mod = mod;
5912 	map_array->head.length = len;
5913 	map_array++;
5914 
5915 	for (map = start; (unsigned long)map < (unsigned long)stop; map++) {
5916 		map_array->map = **map;
5917 		map_array++;
5918 	}
5919 	memset(map_array, 0, sizeof(*map_array));
5920 }
5921 
5922 static void trace_create_eval_file(struct dentry *d_tracer)
5923 {
5924 	trace_create_file("eval_map", TRACE_MODE_READ, d_tracer,
5925 			  NULL, &tracing_eval_map_fops);
5926 }
5927 
5928 #else /* CONFIG_TRACE_EVAL_MAP_FILE */
5929 static inline void trace_create_eval_file(struct dentry *d_tracer) { }
5930 static inline void trace_insert_eval_map_file(struct module *mod,
5931 			      struct trace_eval_map **start, int len) { }
5932 #endif /* !CONFIG_TRACE_EVAL_MAP_FILE */
5933 
5934 static void trace_insert_eval_map(struct module *mod,
5935 				  struct trace_eval_map **start, int len)
5936 {
5937 	struct trace_eval_map **map;
5938 
5939 	if (len <= 0)
5940 		return;
5941 
5942 	map = start;
5943 
5944 	trace_event_eval_update(map, len);
5945 
5946 	trace_insert_eval_map_file(mod, start, len);
5947 }
5948 
5949 static ssize_t
5950 tracing_set_trace_read(struct file *filp, char __user *ubuf,
5951 		       size_t cnt, loff_t *ppos)
5952 {
5953 	struct trace_array *tr = filp->private_data;
5954 	char buf[MAX_TRACER_SIZE+2];
5955 	int r;
5956 
5957 	mutex_lock(&trace_types_lock);
5958 	r = sprintf(buf, "%s\n", tr->current_trace->name);
5959 	mutex_unlock(&trace_types_lock);
5960 
5961 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
5962 }
5963 
5964 int tracer_init(struct tracer *t, struct trace_array *tr)
5965 {
5966 	tracing_reset_online_cpus(&tr->array_buffer);
5967 	return t->init(tr);
5968 }
5969 
5970 static void set_buffer_entries(struct array_buffer *buf, unsigned long val)
5971 {
5972 	int cpu;
5973 
5974 	for_each_tracing_cpu(cpu)
5975 		per_cpu_ptr(buf->data, cpu)->entries = val;
5976 }
5977 
5978 static void update_buffer_entries(struct array_buffer *buf, int cpu)
5979 {
5980 	if (cpu == RING_BUFFER_ALL_CPUS) {
5981 		set_buffer_entries(buf, ring_buffer_size(buf->buffer, 0));
5982 	} else {
5983 		per_cpu_ptr(buf->data, cpu)->entries = ring_buffer_size(buf->buffer, cpu);
5984 	}
5985 }
5986 
5987 #ifdef CONFIG_TRACER_MAX_TRACE
5988 /* resize @tr's buffer to the size of @size_tr's entries */
5989 static int resize_buffer_duplicate_size(struct array_buffer *trace_buf,
5990 					struct array_buffer *size_buf, int cpu_id)
5991 {
5992 	int cpu, ret = 0;
5993 
5994 	if (cpu_id == RING_BUFFER_ALL_CPUS) {
5995 		for_each_tracing_cpu(cpu) {
5996 			ret = ring_buffer_resize(trace_buf->buffer,
5997 				 per_cpu_ptr(size_buf->data, cpu)->entries, cpu);
5998 			if (ret < 0)
5999 				break;
6000 			per_cpu_ptr(trace_buf->data, cpu)->entries =
6001 				per_cpu_ptr(size_buf->data, cpu)->entries;
6002 		}
6003 	} else {
6004 		ret = ring_buffer_resize(trace_buf->buffer,
6005 				 per_cpu_ptr(size_buf->data, cpu_id)->entries, cpu_id);
6006 		if (ret == 0)
6007 			per_cpu_ptr(trace_buf->data, cpu_id)->entries =
6008 				per_cpu_ptr(size_buf->data, cpu_id)->entries;
6009 	}
6010 
6011 	return ret;
6012 }
6013 #endif /* CONFIG_TRACER_MAX_TRACE */
6014 
6015 static int __tracing_resize_ring_buffer(struct trace_array *tr,
6016 					unsigned long size, int cpu)
6017 {
6018 	int ret;
6019 
6020 	/*
6021 	 * If kernel or user changes the size of the ring buffer
6022 	 * we use the size that was given, and we can forget about
6023 	 * expanding it later.
6024 	 */
6025 	trace_set_ring_buffer_expanded(tr);
6026 
6027 	/* May be called before buffers are initialized */
6028 	if (!tr->array_buffer.buffer)
6029 		return 0;
6030 
6031 	/* Do not allow tracing while resizing ring buffer */
6032 	tracing_stop_tr(tr);
6033 
6034 	ret = ring_buffer_resize(tr->array_buffer.buffer, size, cpu);
6035 	if (ret < 0)
6036 		goto out_start;
6037 
6038 #ifdef CONFIG_TRACER_MAX_TRACE
6039 	if (!tr->allocated_snapshot)
6040 		goto out;
6041 
6042 	ret = ring_buffer_resize(tr->max_buffer.buffer, size, cpu);
6043 	if (ret < 0) {
6044 		int r = resize_buffer_duplicate_size(&tr->array_buffer,
6045 						     &tr->array_buffer, cpu);
6046 		if (r < 0) {
6047 			/*
6048 			 * AARGH! We are left with different
6049 			 * size max buffer!!!!
6050 			 * The max buffer is our "snapshot" buffer.
6051 			 * When a tracer needs a snapshot (one of the
6052 			 * latency tracers), it swaps the max buffer
6053 			 * with the saved snap shot. We succeeded to
6054 			 * update the size of the main buffer, but failed to
6055 			 * update the size of the max buffer. But when we tried
6056 			 * to reset the main buffer to the original size, we
6057 			 * failed there too. This is very unlikely to
6058 			 * happen, but if it does, warn and kill all
6059 			 * tracing.
6060 			 */
6061 			WARN_ON(1);
6062 			tracing_disabled = 1;
6063 		}
6064 		goto out_start;
6065 	}
6066 
6067 	update_buffer_entries(&tr->max_buffer, cpu);
6068 
6069  out:
6070 #endif /* CONFIG_TRACER_MAX_TRACE */
6071 
6072 	update_buffer_entries(&tr->array_buffer, cpu);
6073  out_start:
6074 	tracing_start_tr(tr);
6075 	return ret;
6076 }
6077 
6078 ssize_t tracing_resize_ring_buffer(struct trace_array *tr,
6079 				  unsigned long size, int cpu_id)
6080 {
6081 	guard(mutex)(&trace_types_lock);
6082 
6083 	if (cpu_id != RING_BUFFER_ALL_CPUS) {
6084 		/* make sure, this cpu is enabled in the mask */
6085 		if (!cpumask_test_cpu(cpu_id, tracing_buffer_mask))
6086 			return -EINVAL;
6087 	}
6088 
6089 	return __tracing_resize_ring_buffer(tr, size, cpu_id);
6090 }
6091 
6092 struct trace_mod_entry {
6093 	unsigned long	mod_addr;
6094 	char		mod_name[MODULE_NAME_LEN];
6095 };
6096 
6097 struct trace_scratch {
6098 	unsigned int		clock_id;
6099 	unsigned long		text_addr;
6100 	unsigned long		nr_entries;
6101 	struct trace_mod_entry	entries[];
6102 };
6103 
6104 static DEFINE_MUTEX(scratch_mutex);
6105 
6106 static int cmp_mod_entry(const void *key, const void *pivot)
6107 {
6108 	unsigned long addr = (unsigned long)key;
6109 	const struct trace_mod_entry *ent = pivot;
6110 
6111 	if (addr >= ent[0].mod_addr && addr < ent[1].mod_addr)
6112 		return 0;
6113 	else
6114 		return addr - ent->mod_addr;
6115 }
6116 
6117 /**
6118  * trace_adjust_address() - Adjust prev boot address to current address.
6119  * @tr: Persistent ring buffer's trace_array.
6120  * @addr: Address in @tr which is adjusted.
6121  */
6122 unsigned long trace_adjust_address(struct trace_array *tr, unsigned long addr)
6123 {
6124 	struct trace_module_delta *module_delta;
6125 	struct trace_scratch *tscratch;
6126 	struct trace_mod_entry *entry;
6127 	unsigned long raddr;
6128 	int idx = 0, nr_entries;
6129 
6130 	/* If we don't have last boot delta, return the address */
6131 	if (!(tr->flags & TRACE_ARRAY_FL_LAST_BOOT))
6132 		return addr;
6133 
6134 	/* tr->module_delta must be protected by rcu. */
6135 	guard(rcu)();
6136 	tscratch = tr->scratch;
6137 	/* if there is no tscrach, module_delta must be NULL. */
6138 	module_delta = READ_ONCE(tr->module_delta);
6139 	if (!module_delta || !tscratch->nr_entries ||
6140 	    tscratch->entries[0].mod_addr > addr) {
6141 		raddr = addr + tr->text_delta;
6142 		return __is_kernel(raddr) || is_kernel_core_data(raddr) ||
6143 			is_kernel_rodata(raddr) ? raddr : addr;
6144 	}
6145 
6146 	/* Note that entries must be sorted. */
6147 	nr_entries = tscratch->nr_entries;
6148 	if (nr_entries == 1 ||
6149 	    tscratch->entries[nr_entries - 1].mod_addr < addr)
6150 		idx = nr_entries - 1;
6151 	else {
6152 		entry = __inline_bsearch((void *)addr,
6153 				tscratch->entries,
6154 				nr_entries - 1,
6155 				sizeof(tscratch->entries[0]),
6156 				cmp_mod_entry);
6157 		if (entry)
6158 			idx = entry - tscratch->entries;
6159 	}
6160 
6161 	return addr + module_delta->delta[idx];
6162 }
6163 
6164 #ifdef CONFIG_MODULES
6165 static int save_mod(struct module *mod, void *data)
6166 {
6167 	struct trace_array *tr = data;
6168 	struct trace_scratch *tscratch;
6169 	struct trace_mod_entry *entry;
6170 	unsigned int size;
6171 
6172 	tscratch = tr->scratch;
6173 	if (!tscratch)
6174 		return -1;
6175 	size = tr->scratch_size;
6176 
6177 	if (struct_size(tscratch, entries, tscratch->nr_entries + 1) > size)
6178 		return -1;
6179 
6180 	entry = &tscratch->entries[tscratch->nr_entries];
6181 
6182 	tscratch->nr_entries++;
6183 
6184 	entry->mod_addr = (unsigned long)mod->mem[MOD_TEXT].base;
6185 	strscpy(entry->mod_name, mod->name);
6186 
6187 	return 0;
6188 }
6189 #else
6190 static int save_mod(struct module *mod, void *data)
6191 {
6192 	return 0;
6193 }
6194 #endif
6195 
6196 static void update_last_data(struct trace_array *tr)
6197 {
6198 	struct trace_module_delta *module_delta;
6199 	struct trace_scratch *tscratch;
6200 
6201 	if (!(tr->flags & TRACE_ARRAY_FL_BOOT))
6202 		return;
6203 
6204 	if (!(tr->flags & TRACE_ARRAY_FL_LAST_BOOT))
6205 		return;
6206 
6207 	/* Only if the buffer has previous boot data clear and update it. */
6208 	tr->flags &= ~TRACE_ARRAY_FL_LAST_BOOT;
6209 
6210 	/* Reset the module list and reload them */
6211 	if (tr->scratch) {
6212 		struct trace_scratch *tscratch = tr->scratch;
6213 
6214 		tscratch->clock_id = tr->clock_id;
6215 		memset(tscratch->entries, 0,
6216 		       flex_array_size(tscratch, entries, tscratch->nr_entries));
6217 		tscratch->nr_entries = 0;
6218 
6219 		guard(mutex)(&scratch_mutex);
6220 		module_for_each_mod(save_mod, tr);
6221 	}
6222 
6223 	/*
6224 	 * Need to clear all CPU buffers as there cannot be events
6225 	 * from the previous boot mixed with events with this boot
6226 	 * as that will cause a confusing trace. Need to clear all
6227 	 * CPU buffers, even for those that may currently be offline.
6228 	 */
6229 	tracing_reset_all_cpus(&tr->array_buffer);
6230 
6231 	/* Using current data now */
6232 	tr->text_delta = 0;
6233 
6234 	if (!tr->scratch)
6235 		return;
6236 
6237 	tscratch = tr->scratch;
6238 	module_delta = READ_ONCE(tr->module_delta);
6239 	WRITE_ONCE(tr->module_delta, NULL);
6240 	kfree_rcu(module_delta, rcu);
6241 
6242 	/* Set the persistent ring buffer meta data to this address */
6243 	tscratch->text_addr = (unsigned long)_text;
6244 }
6245 
6246 /**
6247  * tracing_update_buffers - used by tracing facility to expand ring buffers
6248  * @tr: The tracing instance
6249  *
6250  * To save on memory when the tracing is never used on a system with it
6251  * configured in. The ring buffers are set to a minimum size. But once
6252  * a user starts to use the tracing facility, then they need to grow
6253  * to their default size.
6254  *
6255  * This function is to be called when a tracer is about to be used.
6256  */
6257 int tracing_update_buffers(struct trace_array *tr)
6258 {
6259 	int ret = 0;
6260 
6261 	mutex_lock(&trace_types_lock);
6262 
6263 	update_last_data(tr);
6264 
6265 	if (!tr->ring_buffer_expanded)
6266 		ret = __tracing_resize_ring_buffer(tr, trace_buf_size,
6267 						RING_BUFFER_ALL_CPUS);
6268 	mutex_unlock(&trace_types_lock);
6269 
6270 	return ret;
6271 }
6272 
6273 struct trace_option_dentry;
6274 
6275 static void
6276 create_trace_option_files(struct trace_array *tr, struct tracer *tracer);
6277 
6278 /*
6279  * Used to clear out the tracer before deletion of an instance.
6280  * Must have trace_types_lock held.
6281  */
6282 static void tracing_set_nop(struct trace_array *tr)
6283 {
6284 	if (tr->current_trace == &nop_trace)
6285 		return;
6286 
6287 	tr->current_trace->enabled--;
6288 
6289 	if (tr->current_trace->reset)
6290 		tr->current_trace->reset(tr);
6291 
6292 	tr->current_trace = &nop_trace;
6293 }
6294 
6295 static bool tracer_options_updated;
6296 
6297 static void add_tracer_options(struct trace_array *tr, struct tracer *t)
6298 {
6299 	/* Only enable if the directory has been created already. */
6300 	if (!tr->dir)
6301 		return;
6302 
6303 	/* Only create trace option files after update_tracer_options finish */
6304 	if (!tracer_options_updated)
6305 		return;
6306 
6307 	create_trace_option_files(tr, t);
6308 }
6309 
6310 int tracing_set_tracer(struct trace_array *tr, const char *buf)
6311 {
6312 	struct tracer *t;
6313 #ifdef CONFIG_TRACER_MAX_TRACE
6314 	bool had_max_tr;
6315 #endif
6316 	int ret;
6317 
6318 	guard(mutex)(&trace_types_lock);
6319 
6320 	update_last_data(tr);
6321 
6322 	if (!tr->ring_buffer_expanded) {
6323 		ret = __tracing_resize_ring_buffer(tr, trace_buf_size,
6324 						RING_BUFFER_ALL_CPUS);
6325 		if (ret < 0)
6326 			return ret;
6327 		ret = 0;
6328 	}
6329 
6330 	for (t = trace_types; t; t = t->next) {
6331 		if (strcmp(t->name, buf) == 0)
6332 			break;
6333 	}
6334 	if (!t)
6335 		return -EINVAL;
6336 
6337 	if (t == tr->current_trace)
6338 		return 0;
6339 
6340 #ifdef CONFIG_TRACER_SNAPSHOT
6341 	if (t->use_max_tr) {
6342 		local_irq_disable();
6343 		arch_spin_lock(&tr->max_lock);
6344 		ret = tr->cond_snapshot ? -EBUSY : 0;
6345 		arch_spin_unlock(&tr->max_lock);
6346 		local_irq_enable();
6347 		if (ret)
6348 			return ret;
6349 	}
6350 #endif
6351 	/* Some tracers won't work on kernel command line */
6352 	if (system_state < SYSTEM_RUNNING && t->noboot) {
6353 		pr_warn("Tracer '%s' is not allowed on command line, ignored\n",
6354 			t->name);
6355 		return -EINVAL;
6356 	}
6357 
6358 	/* Some tracers are only allowed for the top level buffer */
6359 	if (!trace_ok_for_array(t, tr))
6360 		return -EINVAL;
6361 
6362 	/* If trace pipe files are being read, we can't change the tracer */
6363 	if (tr->trace_ref)
6364 		return -EBUSY;
6365 
6366 	trace_branch_disable();
6367 
6368 	tr->current_trace->enabled--;
6369 
6370 	if (tr->current_trace->reset)
6371 		tr->current_trace->reset(tr);
6372 
6373 #ifdef CONFIG_TRACER_MAX_TRACE
6374 	had_max_tr = tr->current_trace->use_max_tr;
6375 
6376 	/* Current trace needs to be nop_trace before synchronize_rcu */
6377 	tr->current_trace = &nop_trace;
6378 
6379 	if (had_max_tr && !t->use_max_tr) {
6380 		/*
6381 		 * We need to make sure that the update_max_tr sees that
6382 		 * current_trace changed to nop_trace to keep it from
6383 		 * swapping the buffers after we resize it.
6384 		 * The update_max_tr is called from interrupts disabled
6385 		 * so a synchronized_sched() is sufficient.
6386 		 */
6387 		synchronize_rcu();
6388 		free_snapshot(tr);
6389 		tracing_disarm_snapshot(tr);
6390 	}
6391 
6392 	if (!had_max_tr && t->use_max_tr) {
6393 		ret = tracing_arm_snapshot_locked(tr);
6394 		if (ret)
6395 			return ret;
6396 	}
6397 #else
6398 	tr->current_trace = &nop_trace;
6399 #endif
6400 
6401 	if (t->init) {
6402 		ret = tracer_init(t, tr);
6403 		if (ret) {
6404 #ifdef CONFIG_TRACER_MAX_TRACE
6405 			if (t->use_max_tr)
6406 				tracing_disarm_snapshot(tr);
6407 #endif
6408 			return ret;
6409 		}
6410 	}
6411 
6412 	tr->current_trace = t;
6413 	tr->current_trace->enabled++;
6414 	trace_branch_enable(tr);
6415 
6416 	return 0;
6417 }
6418 
6419 static ssize_t
6420 tracing_set_trace_write(struct file *filp, const char __user *ubuf,
6421 			size_t cnt, loff_t *ppos)
6422 {
6423 	struct trace_array *tr = filp->private_data;
6424 	char buf[MAX_TRACER_SIZE+1];
6425 	char *name;
6426 	size_t ret;
6427 	int err;
6428 
6429 	ret = cnt;
6430 
6431 	if (cnt > MAX_TRACER_SIZE)
6432 		cnt = MAX_TRACER_SIZE;
6433 
6434 	if (copy_from_user(buf, ubuf, cnt))
6435 		return -EFAULT;
6436 
6437 	buf[cnt] = 0;
6438 
6439 	name = strim(buf);
6440 
6441 	err = tracing_set_tracer(tr, name);
6442 	if (err)
6443 		return err;
6444 
6445 	*ppos += ret;
6446 
6447 	return ret;
6448 }
6449 
6450 static ssize_t
6451 tracing_nsecs_read(unsigned long *ptr, char __user *ubuf,
6452 		   size_t cnt, loff_t *ppos)
6453 {
6454 	char buf[64];
6455 	int r;
6456 
6457 	r = snprintf(buf, sizeof(buf), "%ld\n",
6458 		     *ptr == (unsigned long)-1 ? -1 : nsecs_to_usecs(*ptr));
6459 	if (r > sizeof(buf))
6460 		r = sizeof(buf);
6461 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6462 }
6463 
6464 static ssize_t
6465 tracing_nsecs_write(unsigned long *ptr, const char __user *ubuf,
6466 		    size_t cnt, loff_t *ppos)
6467 {
6468 	unsigned long val;
6469 	int ret;
6470 
6471 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
6472 	if (ret)
6473 		return ret;
6474 
6475 	*ptr = val * 1000;
6476 
6477 	return cnt;
6478 }
6479 
6480 static ssize_t
6481 tracing_thresh_read(struct file *filp, char __user *ubuf,
6482 		    size_t cnt, loff_t *ppos)
6483 {
6484 	return tracing_nsecs_read(&tracing_thresh, ubuf, cnt, ppos);
6485 }
6486 
6487 static ssize_t
6488 tracing_thresh_write(struct file *filp, const char __user *ubuf,
6489 		     size_t cnt, loff_t *ppos)
6490 {
6491 	struct trace_array *tr = filp->private_data;
6492 	int ret;
6493 
6494 	guard(mutex)(&trace_types_lock);
6495 	ret = tracing_nsecs_write(&tracing_thresh, ubuf, cnt, ppos);
6496 	if (ret < 0)
6497 		return ret;
6498 
6499 	if (tr->current_trace->update_thresh) {
6500 		ret = tr->current_trace->update_thresh(tr);
6501 		if (ret < 0)
6502 			return ret;
6503 	}
6504 
6505 	return cnt;
6506 }
6507 
6508 #ifdef CONFIG_TRACER_MAX_TRACE
6509 
6510 static ssize_t
6511 tracing_max_lat_read(struct file *filp, char __user *ubuf,
6512 		     size_t cnt, loff_t *ppos)
6513 {
6514 	struct trace_array *tr = filp->private_data;
6515 
6516 	return tracing_nsecs_read(&tr->max_latency, ubuf, cnt, ppos);
6517 }
6518 
6519 static ssize_t
6520 tracing_max_lat_write(struct file *filp, const char __user *ubuf,
6521 		      size_t cnt, loff_t *ppos)
6522 {
6523 	struct trace_array *tr = filp->private_data;
6524 
6525 	return tracing_nsecs_write(&tr->max_latency, ubuf, cnt, ppos);
6526 }
6527 
6528 #endif
6529 
6530 static int open_pipe_on_cpu(struct trace_array *tr, int cpu)
6531 {
6532 	if (cpu == RING_BUFFER_ALL_CPUS) {
6533 		if (cpumask_empty(tr->pipe_cpumask)) {
6534 			cpumask_setall(tr->pipe_cpumask);
6535 			return 0;
6536 		}
6537 	} else if (!cpumask_test_cpu(cpu, tr->pipe_cpumask)) {
6538 		cpumask_set_cpu(cpu, tr->pipe_cpumask);
6539 		return 0;
6540 	}
6541 	return -EBUSY;
6542 }
6543 
6544 static void close_pipe_on_cpu(struct trace_array *tr, int cpu)
6545 {
6546 	if (cpu == RING_BUFFER_ALL_CPUS) {
6547 		WARN_ON(!cpumask_full(tr->pipe_cpumask));
6548 		cpumask_clear(tr->pipe_cpumask);
6549 	} else {
6550 		WARN_ON(!cpumask_test_cpu(cpu, tr->pipe_cpumask));
6551 		cpumask_clear_cpu(cpu, tr->pipe_cpumask);
6552 	}
6553 }
6554 
6555 static int tracing_open_pipe(struct inode *inode, struct file *filp)
6556 {
6557 	struct trace_array *tr = inode->i_private;
6558 	struct trace_iterator *iter;
6559 	int cpu;
6560 	int ret;
6561 
6562 	ret = tracing_check_open_get_tr(tr);
6563 	if (ret)
6564 		return ret;
6565 
6566 	mutex_lock(&trace_types_lock);
6567 	cpu = tracing_get_cpu(inode);
6568 	ret = open_pipe_on_cpu(tr, cpu);
6569 	if (ret)
6570 		goto fail_pipe_on_cpu;
6571 
6572 	/* create a buffer to store the information to pass to userspace */
6573 	iter = kzalloc(sizeof(*iter), GFP_KERNEL);
6574 	if (!iter) {
6575 		ret = -ENOMEM;
6576 		goto fail_alloc_iter;
6577 	}
6578 
6579 	trace_seq_init(&iter->seq);
6580 	iter->trace = tr->current_trace;
6581 
6582 	if (!alloc_cpumask_var(&iter->started, GFP_KERNEL)) {
6583 		ret = -ENOMEM;
6584 		goto fail;
6585 	}
6586 
6587 	/* trace pipe does not show start of buffer */
6588 	cpumask_setall(iter->started);
6589 
6590 	if (tr->trace_flags & TRACE_ITER_LATENCY_FMT)
6591 		iter->iter_flags |= TRACE_FILE_LAT_FMT;
6592 
6593 	/* Output in nanoseconds only if we are using a clock in nanoseconds. */
6594 	if (trace_clocks[tr->clock_id].in_ns)
6595 		iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
6596 
6597 	iter->tr = tr;
6598 	iter->array_buffer = &tr->array_buffer;
6599 	iter->cpu_file = cpu;
6600 	mutex_init(&iter->mutex);
6601 	filp->private_data = iter;
6602 
6603 	if (iter->trace->pipe_open)
6604 		iter->trace->pipe_open(iter);
6605 
6606 	nonseekable_open(inode, filp);
6607 
6608 	tr->trace_ref++;
6609 
6610 	mutex_unlock(&trace_types_lock);
6611 	return ret;
6612 
6613 fail:
6614 	kfree(iter);
6615 fail_alloc_iter:
6616 	close_pipe_on_cpu(tr, cpu);
6617 fail_pipe_on_cpu:
6618 	__trace_array_put(tr);
6619 	mutex_unlock(&trace_types_lock);
6620 	return ret;
6621 }
6622 
6623 static int tracing_release_pipe(struct inode *inode, struct file *file)
6624 {
6625 	struct trace_iterator *iter = file->private_data;
6626 	struct trace_array *tr = inode->i_private;
6627 
6628 	mutex_lock(&trace_types_lock);
6629 
6630 	tr->trace_ref--;
6631 
6632 	if (iter->trace->pipe_close)
6633 		iter->trace->pipe_close(iter);
6634 	close_pipe_on_cpu(tr, iter->cpu_file);
6635 	mutex_unlock(&trace_types_lock);
6636 
6637 	free_trace_iter_content(iter);
6638 	kfree(iter);
6639 
6640 	trace_array_put(tr);
6641 
6642 	return 0;
6643 }
6644 
6645 static __poll_t
6646 trace_poll(struct trace_iterator *iter, struct file *filp, poll_table *poll_table)
6647 {
6648 	struct trace_array *tr = iter->tr;
6649 
6650 	/* Iterators are static, they should be filled or empty */
6651 	if (trace_buffer_iter(iter, iter->cpu_file))
6652 		return EPOLLIN | EPOLLRDNORM;
6653 
6654 	if (tr->trace_flags & TRACE_ITER_BLOCK)
6655 		/*
6656 		 * Always select as readable when in blocking mode
6657 		 */
6658 		return EPOLLIN | EPOLLRDNORM;
6659 	else
6660 		return ring_buffer_poll_wait(iter->array_buffer->buffer, iter->cpu_file,
6661 					     filp, poll_table, iter->tr->buffer_percent);
6662 }
6663 
6664 static __poll_t
6665 tracing_poll_pipe(struct file *filp, poll_table *poll_table)
6666 {
6667 	struct trace_iterator *iter = filp->private_data;
6668 
6669 	return trace_poll(iter, filp, poll_table);
6670 }
6671 
6672 /* Must be called with iter->mutex held. */
6673 static int tracing_wait_pipe(struct file *filp)
6674 {
6675 	struct trace_iterator *iter = filp->private_data;
6676 	int ret;
6677 
6678 	while (trace_empty(iter)) {
6679 
6680 		if ((filp->f_flags & O_NONBLOCK)) {
6681 			return -EAGAIN;
6682 		}
6683 
6684 		/*
6685 		 * We block until we read something and tracing is disabled.
6686 		 * We still block if tracing is disabled, but we have never
6687 		 * read anything. This allows a user to cat this file, and
6688 		 * then enable tracing. But after we have read something,
6689 		 * we give an EOF when tracing is again disabled.
6690 		 *
6691 		 * iter->pos will be 0 if we haven't read anything.
6692 		 */
6693 		if (!tracer_tracing_is_on(iter->tr) && iter->pos)
6694 			break;
6695 
6696 		mutex_unlock(&iter->mutex);
6697 
6698 		ret = wait_on_pipe(iter, 0);
6699 
6700 		mutex_lock(&iter->mutex);
6701 
6702 		if (ret)
6703 			return ret;
6704 	}
6705 
6706 	return 1;
6707 }
6708 
6709 static bool update_last_data_if_empty(struct trace_array *tr)
6710 {
6711 	if (!(tr->flags & TRACE_ARRAY_FL_LAST_BOOT))
6712 		return false;
6713 
6714 	if (!ring_buffer_empty(tr->array_buffer.buffer))
6715 		return false;
6716 
6717 	/*
6718 	 * If the buffer contains the last boot data and all per-cpu
6719 	 * buffers are empty, reset it from the kernel side.
6720 	 */
6721 	update_last_data(tr);
6722 	return true;
6723 }
6724 
6725 /*
6726  * Consumer reader.
6727  */
6728 static ssize_t
6729 tracing_read_pipe(struct file *filp, char __user *ubuf,
6730 		  size_t cnt, loff_t *ppos)
6731 {
6732 	struct trace_iterator *iter = filp->private_data;
6733 	ssize_t sret;
6734 
6735 	/*
6736 	 * Avoid more than one consumer on a single file descriptor
6737 	 * This is just a matter of traces coherency, the ring buffer itself
6738 	 * is protected.
6739 	 */
6740 	guard(mutex)(&iter->mutex);
6741 
6742 	/* return any leftover data */
6743 	sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
6744 	if (sret != -EBUSY)
6745 		return sret;
6746 
6747 	trace_seq_init(&iter->seq);
6748 
6749 	if (iter->trace->read) {
6750 		sret = iter->trace->read(iter, filp, ubuf, cnt, ppos);
6751 		if (sret)
6752 			return sret;
6753 	}
6754 
6755 waitagain:
6756 	if (update_last_data_if_empty(iter->tr))
6757 		return 0;
6758 
6759 	sret = tracing_wait_pipe(filp);
6760 	if (sret <= 0)
6761 		return sret;
6762 
6763 	/* stop when tracing is finished */
6764 	if (trace_empty(iter))
6765 		return 0;
6766 
6767 	if (cnt >= TRACE_SEQ_BUFFER_SIZE)
6768 		cnt = TRACE_SEQ_BUFFER_SIZE - 1;
6769 
6770 	/* reset all but tr, trace, and overruns */
6771 	trace_iterator_reset(iter);
6772 	cpumask_clear(iter->started);
6773 	trace_seq_init(&iter->seq);
6774 
6775 	trace_event_read_lock();
6776 	trace_access_lock(iter->cpu_file);
6777 	while (trace_find_next_entry_inc(iter) != NULL) {
6778 		enum print_line_t ret;
6779 		int save_len = iter->seq.seq.len;
6780 
6781 		ret = print_trace_line(iter);
6782 		if (ret == TRACE_TYPE_PARTIAL_LINE) {
6783 			/*
6784 			 * If one print_trace_line() fills entire trace_seq in one shot,
6785 			 * trace_seq_to_user() will returns -EBUSY because save_len == 0,
6786 			 * In this case, we need to consume it, otherwise, loop will peek
6787 			 * this event next time, resulting in an infinite loop.
6788 			 */
6789 			if (save_len == 0) {
6790 				iter->seq.full = 0;
6791 				trace_seq_puts(&iter->seq, "[LINE TOO BIG]\n");
6792 				trace_consume(iter);
6793 				break;
6794 			}
6795 
6796 			/* In other cases, don't print partial lines */
6797 			iter->seq.seq.len = save_len;
6798 			break;
6799 		}
6800 		if (ret != TRACE_TYPE_NO_CONSUME)
6801 			trace_consume(iter);
6802 
6803 		if (trace_seq_used(&iter->seq) >= cnt)
6804 			break;
6805 
6806 		/*
6807 		 * Setting the full flag means we reached the trace_seq buffer
6808 		 * size and we should leave by partial output condition above.
6809 		 * One of the trace_seq_* functions is not used properly.
6810 		 */
6811 		WARN_ONCE(iter->seq.full, "full flag set for trace type %d",
6812 			  iter->ent->type);
6813 	}
6814 	trace_access_unlock(iter->cpu_file);
6815 	trace_event_read_unlock();
6816 
6817 	/* Now copy what we have to the user */
6818 	sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
6819 	if (iter->seq.readpos >= trace_seq_used(&iter->seq))
6820 		trace_seq_init(&iter->seq);
6821 
6822 	/*
6823 	 * If there was nothing to send to user, in spite of consuming trace
6824 	 * entries, go back to wait for more entries.
6825 	 */
6826 	if (sret == -EBUSY)
6827 		goto waitagain;
6828 
6829 	return sret;
6830 }
6831 
6832 static void tracing_spd_release_pipe(struct splice_pipe_desc *spd,
6833 				     unsigned int idx)
6834 {
6835 	__free_page(spd->pages[idx]);
6836 }
6837 
6838 static size_t
6839 tracing_fill_pipe_page(size_t rem, struct trace_iterator *iter)
6840 {
6841 	size_t count;
6842 	int save_len;
6843 	int ret;
6844 
6845 	/* Seq buffer is page-sized, exactly what we need. */
6846 	for (;;) {
6847 		save_len = iter->seq.seq.len;
6848 		ret = print_trace_line(iter);
6849 
6850 		if (trace_seq_has_overflowed(&iter->seq)) {
6851 			iter->seq.seq.len = save_len;
6852 			break;
6853 		}
6854 
6855 		/*
6856 		 * This should not be hit, because it should only
6857 		 * be set if the iter->seq overflowed. But check it
6858 		 * anyway to be safe.
6859 		 */
6860 		if (ret == TRACE_TYPE_PARTIAL_LINE) {
6861 			iter->seq.seq.len = save_len;
6862 			break;
6863 		}
6864 
6865 		count = trace_seq_used(&iter->seq) - save_len;
6866 		if (rem < count) {
6867 			rem = 0;
6868 			iter->seq.seq.len = save_len;
6869 			break;
6870 		}
6871 
6872 		if (ret != TRACE_TYPE_NO_CONSUME)
6873 			trace_consume(iter);
6874 		rem -= count;
6875 		if (!trace_find_next_entry_inc(iter))	{
6876 			rem = 0;
6877 			iter->ent = NULL;
6878 			break;
6879 		}
6880 	}
6881 
6882 	return rem;
6883 }
6884 
6885 static ssize_t tracing_splice_read_pipe(struct file *filp,
6886 					loff_t *ppos,
6887 					struct pipe_inode_info *pipe,
6888 					size_t len,
6889 					unsigned int flags)
6890 {
6891 	struct page *pages_def[PIPE_DEF_BUFFERS];
6892 	struct partial_page partial_def[PIPE_DEF_BUFFERS];
6893 	struct trace_iterator *iter = filp->private_data;
6894 	struct splice_pipe_desc spd = {
6895 		.pages		= pages_def,
6896 		.partial	= partial_def,
6897 		.nr_pages	= 0, /* This gets updated below. */
6898 		.nr_pages_max	= PIPE_DEF_BUFFERS,
6899 		.ops		= &default_pipe_buf_ops,
6900 		.spd_release	= tracing_spd_release_pipe,
6901 	};
6902 	ssize_t ret;
6903 	size_t rem;
6904 	unsigned int i;
6905 
6906 	if (splice_grow_spd(pipe, &spd))
6907 		return -ENOMEM;
6908 
6909 	mutex_lock(&iter->mutex);
6910 
6911 	if (iter->trace->splice_read) {
6912 		ret = iter->trace->splice_read(iter, filp,
6913 					       ppos, pipe, len, flags);
6914 		if (ret)
6915 			goto out_err;
6916 	}
6917 
6918 	ret = tracing_wait_pipe(filp);
6919 	if (ret <= 0)
6920 		goto out_err;
6921 
6922 	if (!iter->ent && !trace_find_next_entry_inc(iter)) {
6923 		ret = -EFAULT;
6924 		goto out_err;
6925 	}
6926 
6927 	trace_event_read_lock();
6928 	trace_access_lock(iter->cpu_file);
6929 
6930 	/* Fill as many pages as possible. */
6931 	for (i = 0, rem = len; i < spd.nr_pages_max && rem; i++) {
6932 		spd.pages[i] = alloc_page(GFP_KERNEL);
6933 		if (!spd.pages[i])
6934 			break;
6935 
6936 		rem = tracing_fill_pipe_page(rem, iter);
6937 
6938 		/* Copy the data into the page, so we can start over. */
6939 		ret = trace_seq_to_buffer(&iter->seq,
6940 					  page_address(spd.pages[i]),
6941 					  min((size_t)trace_seq_used(&iter->seq),
6942 						  (size_t)PAGE_SIZE));
6943 		if (ret < 0) {
6944 			__free_page(spd.pages[i]);
6945 			break;
6946 		}
6947 		spd.partial[i].offset = 0;
6948 		spd.partial[i].len = ret;
6949 
6950 		trace_seq_init(&iter->seq);
6951 	}
6952 
6953 	trace_access_unlock(iter->cpu_file);
6954 	trace_event_read_unlock();
6955 	mutex_unlock(&iter->mutex);
6956 
6957 	spd.nr_pages = i;
6958 
6959 	if (i)
6960 		ret = splice_to_pipe(pipe, &spd);
6961 	else
6962 		ret = 0;
6963 out:
6964 	splice_shrink_spd(&spd);
6965 	return ret;
6966 
6967 out_err:
6968 	mutex_unlock(&iter->mutex);
6969 	goto out;
6970 }
6971 
6972 static ssize_t
6973 tracing_entries_read(struct file *filp, char __user *ubuf,
6974 		     size_t cnt, loff_t *ppos)
6975 {
6976 	struct inode *inode = file_inode(filp);
6977 	struct trace_array *tr = inode->i_private;
6978 	int cpu = tracing_get_cpu(inode);
6979 	char buf[64];
6980 	int r = 0;
6981 	ssize_t ret;
6982 
6983 	mutex_lock(&trace_types_lock);
6984 
6985 	if (cpu == RING_BUFFER_ALL_CPUS) {
6986 		int cpu, buf_size_same;
6987 		unsigned long size;
6988 
6989 		size = 0;
6990 		buf_size_same = 1;
6991 		/* check if all cpu sizes are same */
6992 		for_each_tracing_cpu(cpu) {
6993 			/* fill in the size from first enabled cpu */
6994 			if (size == 0)
6995 				size = per_cpu_ptr(tr->array_buffer.data, cpu)->entries;
6996 			if (size != per_cpu_ptr(tr->array_buffer.data, cpu)->entries) {
6997 				buf_size_same = 0;
6998 				break;
6999 			}
7000 		}
7001 
7002 		if (buf_size_same) {
7003 			if (!tr->ring_buffer_expanded)
7004 				r = sprintf(buf, "%lu (expanded: %lu)\n",
7005 					    size >> 10,
7006 					    trace_buf_size >> 10);
7007 			else
7008 				r = sprintf(buf, "%lu\n", size >> 10);
7009 		} else
7010 			r = sprintf(buf, "X\n");
7011 	} else
7012 		r = sprintf(buf, "%lu\n", per_cpu_ptr(tr->array_buffer.data, cpu)->entries >> 10);
7013 
7014 	mutex_unlock(&trace_types_lock);
7015 
7016 	ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
7017 	return ret;
7018 }
7019 
7020 static ssize_t
7021 tracing_entries_write(struct file *filp, const char __user *ubuf,
7022 		      size_t cnt, loff_t *ppos)
7023 {
7024 	struct inode *inode = file_inode(filp);
7025 	struct trace_array *tr = inode->i_private;
7026 	unsigned long val;
7027 	int ret;
7028 
7029 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
7030 	if (ret)
7031 		return ret;
7032 
7033 	/* must have at least 1 entry */
7034 	if (!val)
7035 		return -EINVAL;
7036 
7037 	/* value is in KB */
7038 	val <<= 10;
7039 	ret = tracing_resize_ring_buffer(tr, val, tracing_get_cpu(inode));
7040 	if (ret < 0)
7041 		return ret;
7042 
7043 	*ppos += cnt;
7044 
7045 	return cnt;
7046 }
7047 
7048 static ssize_t
7049 tracing_total_entries_read(struct file *filp, char __user *ubuf,
7050 				size_t cnt, loff_t *ppos)
7051 {
7052 	struct trace_array *tr = filp->private_data;
7053 	char buf[64];
7054 	int r, cpu;
7055 	unsigned long size = 0, expanded_size = 0;
7056 
7057 	mutex_lock(&trace_types_lock);
7058 	for_each_tracing_cpu(cpu) {
7059 		size += per_cpu_ptr(tr->array_buffer.data, cpu)->entries >> 10;
7060 		if (!tr->ring_buffer_expanded)
7061 			expanded_size += trace_buf_size >> 10;
7062 	}
7063 	if (tr->ring_buffer_expanded)
7064 		r = sprintf(buf, "%lu\n", size);
7065 	else
7066 		r = sprintf(buf, "%lu (expanded: %lu)\n", size, expanded_size);
7067 	mutex_unlock(&trace_types_lock);
7068 
7069 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
7070 }
7071 
7072 #define LAST_BOOT_HEADER ((void *)1)
7073 
7074 static void *l_next(struct seq_file *m, void *v, loff_t *pos)
7075 {
7076 	struct trace_array *tr = m->private;
7077 	struct trace_scratch *tscratch = tr->scratch;
7078 	unsigned int index = *pos;
7079 
7080 	(*pos)++;
7081 
7082 	if (*pos == 1)
7083 		return LAST_BOOT_HEADER;
7084 
7085 	/* Only show offsets of the last boot data */
7086 	if (!tscratch || !(tr->flags & TRACE_ARRAY_FL_LAST_BOOT))
7087 		return NULL;
7088 
7089 	/* *pos 0 is for the header, 1 is for the first module */
7090 	index--;
7091 
7092 	if (index >= tscratch->nr_entries)
7093 		return NULL;
7094 
7095 	return &tscratch->entries[index];
7096 }
7097 
7098 static void *l_start(struct seq_file *m, loff_t *pos)
7099 {
7100 	mutex_lock(&scratch_mutex);
7101 
7102 	return l_next(m, NULL, pos);
7103 }
7104 
7105 static void l_stop(struct seq_file *m, void *p)
7106 {
7107 	mutex_unlock(&scratch_mutex);
7108 }
7109 
7110 static void show_last_boot_header(struct seq_file *m, struct trace_array *tr)
7111 {
7112 	struct trace_scratch *tscratch = tr->scratch;
7113 
7114 	/*
7115 	 * Do not leak KASLR address. This only shows the KASLR address of
7116 	 * the last boot. When the ring buffer is started, the LAST_BOOT
7117 	 * flag gets cleared, and this should only report "current".
7118 	 * Otherwise it shows the KASLR address from the previous boot which
7119 	 * should not be the same as the current boot.
7120 	 */
7121 	if (tscratch && (tr->flags & TRACE_ARRAY_FL_LAST_BOOT))
7122 		seq_printf(m, "%lx\t[kernel]\n", tscratch->text_addr);
7123 	else
7124 		seq_puts(m, "# Current\n");
7125 }
7126 
7127 static int l_show(struct seq_file *m, void *v)
7128 {
7129 	struct trace_array *tr = m->private;
7130 	struct trace_mod_entry *entry = v;
7131 
7132 	if (v == LAST_BOOT_HEADER) {
7133 		show_last_boot_header(m, tr);
7134 		return 0;
7135 	}
7136 
7137 	seq_printf(m, "%lx\t%s\n", entry->mod_addr, entry->mod_name);
7138 	return 0;
7139 }
7140 
7141 static const struct seq_operations last_boot_seq_ops = {
7142 	.start		= l_start,
7143 	.next		= l_next,
7144 	.stop		= l_stop,
7145 	.show		= l_show,
7146 };
7147 
7148 static int tracing_last_boot_open(struct inode *inode, struct file *file)
7149 {
7150 	struct trace_array *tr = inode->i_private;
7151 	struct seq_file *m;
7152 	int ret;
7153 
7154 	ret = tracing_check_open_get_tr(tr);
7155 	if (ret)
7156 		return ret;
7157 
7158 	ret = seq_open(file, &last_boot_seq_ops);
7159 	if (ret) {
7160 		trace_array_put(tr);
7161 		return ret;
7162 	}
7163 
7164 	m = file->private_data;
7165 	m->private = tr;
7166 
7167 	return 0;
7168 }
7169 
7170 static int tracing_buffer_meta_open(struct inode *inode, struct file *filp)
7171 {
7172 	struct trace_array *tr = inode->i_private;
7173 	int cpu = tracing_get_cpu(inode);
7174 	int ret;
7175 
7176 	ret = tracing_check_open_get_tr(tr);
7177 	if (ret)
7178 		return ret;
7179 
7180 	ret = ring_buffer_meta_seq_init(filp, tr->array_buffer.buffer, cpu);
7181 	if (ret < 0)
7182 		__trace_array_put(tr);
7183 	return ret;
7184 }
7185 
7186 static ssize_t
7187 tracing_free_buffer_write(struct file *filp, const char __user *ubuf,
7188 			  size_t cnt, loff_t *ppos)
7189 {
7190 	/*
7191 	 * There is no need to read what the user has written, this function
7192 	 * is just to make sure that there is no error when "echo" is used
7193 	 */
7194 
7195 	*ppos += cnt;
7196 
7197 	return cnt;
7198 }
7199 
7200 static int
7201 tracing_free_buffer_release(struct inode *inode, struct file *filp)
7202 {
7203 	struct trace_array *tr = inode->i_private;
7204 
7205 	/* disable tracing ? */
7206 	if (tr->trace_flags & TRACE_ITER_STOP_ON_FREE)
7207 		tracer_tracing_off(tr);
7208 	/* resize the ring buffer to 0 */
7209 	tracing_resize_ring_buffer(tr, 0, RING_BUFFER_ALL_CPUS);
7210 
7211 	trace_array_put(tr);
7212 
7213 	return 0;
7214 }
7215 
7216 #define TRACE_MARKER_MAX_SIZE		4096
7217 
7218 static ssize_t write_marker_to_buffer(struct trace_array *tr, const char __user *ubuf,
7219 				      size_t cnt, unsigned long ip)
7220 {
7221 	struct ring_buffer_event *event;
7222 	enum event_trigger_type tt = ETT_NONE;
7223 	struct trace_buffer *buffer;
7224 	struct print_entry *entry;
7225 	int meta_size;
7226 	ssize_t written;
7227 	size_t size;
7228 	int len;
7229 
7230 /* Used in tracing_mark_raw_write() as well */
7231 #define FAULTED_STR "<faulted>"
7232 #define FAULTED_SIZE (sizeof(FAULTED_STR) - 1) /* '\0' is already accounted for */
7233 
7234 	meta_size = sizeof(*entry) + 2;  /* add '\0' and possible '\n' */
7235  again:
7236 	size = cnt + meta_size;
7237 
7238 	/* If less than "<faulted>", then make sure we can still add that */
7239 	if (cnt < FAULTED_SIZE)
7240 		size += FAULTED_SIZE - cnt;
7241 
7242 	buffer = tr->array_buffer.buffer;
7243 	event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
7244 					    tracing_gen_ctx());
7245 	if (unlikely(!event)) {
7246 		/*
7247 		 * If the size was greater than what was allowed, then
7248 		 * make it smaller and try again.
7249 		 */
7250 		if (size > ring_buffer_max_event_size(buffer)) {
7251 			/* cnt < FAULTED size should never be bigger than max */
7252 			if (WARN_ON_ONCE(cnt < FAULTED_SIZE))
7253 				return -EBADF;
7254 			cnt = ring_buffer_max_event_size(buffer) - meta_size;
7255 			/* The above should only happen once */
7256 			if (WARN_ON_ONCE(cnt + meta_size == size))
7257 				return -EBADF;
7258 			goto again;
7259 		}
7260 
7261 		/* Ring buffer disabled, return as if not open for write */
7262 		return -EBADF;
7263 	}
7264 
7265 	entry = ring_buffer_event_data(event);
7266 	entry->ip = ip;
7267 
7268 	len = __copy_from_user_inatomic(&entry->buf, ubuf, cnt);
7269 	if (len) {
7270 		memcpy(&entry->buf, FAULTED_STR, FAULTED_SIZE);
7271 		cnt = FAULTED_SIZE;
7272 		written = -EFAULT;
7273 	} else
7274 		written = cnt;
7275 
7276 	if (tr->trace_marker_file && !list_empty(&tr->trace_marker_file->triggers)) {
7277 		/* do not add \n before testing triggers, but add \0 */
7278 		entry->buf[cnt] = '\0';
7279 		tt = event_triggers_call(tr->trace_marker_file, buffer, entry, event);
7280 	}
7281 
7282 	if (entry->buf[cnt - 1] != '\n') {
7283 		entry->buf[cnt] = '\n';
7284 		entry->buf[cnt + 1] = '\0';
7285 	} else
7286 		entry->buf[cnt] = '\0';
7287 
7288 	if (static_branch_unlikely(&trace_marker_exports_enabled))
7289 		ftrace_exports(event, TRACE_EXPORT_MARKER);
7290 	__buffer_unlock_commit(buffer, event);
7291 
7292 	if (tt)
7293 		event_triggers_post_call(tr->trace_marker_file, tt);
7294 
7295 	return written;
7296 }
7297 
7298 static ssize_t
7299 tracing_mark_write(struct file *filp, const char __user *ubuf,
7300 					size_t cnt, loff_t *fpos)
7301 {
7302 	struct trace_array *tr = filp->private_data;
7303 	ssize_t written = -ENODEV;
7304 	unsigned long ip;
7305 
7306 	if (tracing_disabled)
7307 		return -EINVAL;
7308 
7309 	if (!(tr->trace_flags & TRACE_ITER_MARKERS))
7310 		return -EINVAL;
7311 
7312 	if ((ssize_t)cnt < 0)
7313 		return -EINVAL;
7314 
7315 	if (cnt > TRACE_MARKER_MAX_SIZE)
7316 		cnt = TRACE_MARKER_MAX_SIZE;
7317 
7318 	/* The selftests expect this function to be the IP address */
7319 	ip = _THIS_IP_;
7320 
7321 	/* The global trace_marker can go to multiple instances */
7322 	if (tr == &global_trace) {
7323 		guard(rcu)();
7324 		list_for_each_entry_rcu(tr, &marker_copies, marker_list) {
7325 			written = write_marker_to_buffer(tr, ubuf, cnt, ip);
7326 			if (written < 0)
7327 				break;
7328 		}
7329 	} else {
7330 		written = write_marker_to_buffer(tr, ubuf, cnt, ip);
7331 	}
7332 
7333 	return written;
7334 }
7335 
7336 static ssize_t write_raw_marker_to_buffer(struct trace_array *tr,
7337 					  const char __user *ubuf, size_t cnt)
7338 {
7339 	struct ring_buffer_event *event;
7340 	struct trace_buffer *buffer;
7341 	struct raw_data_entry *entry;
7342 	ssize_t written;
7343 	int size;
7344 	int len;
7345 
7346 #define FAULT_SIZE_ID (FAULTED_SIZE + sizeof(int))
7347 
7348 	size = sizeof(*entry) + cnt;
7349 	if (cnt < FAULT_SIZE_ID)
7350 		size += FAULT_SIZE_ID - cnt;
7351 
7352 	buffer = tr->array_buffer.buffer;
7353 
7354 	if (size > ring_buffer_max_event_size(buffer))
7355 		return -EINVAL;
7356 
7357 	event = __trace_buffer_lock_reserve(buffer, TRACE_RAW_DATA, size,
7358 					    tracing_gen_ctx());
7359 	if (!event)
7360 		/* Ring buffer disabled, return as if not open for write */
7361 		return -EBADF;
7362 
7363 	entry = ring_buffer_event_data(event);
7364 
7365 	len = __copy_from_user_inatomic(&entry->id, ubuf, cnt);
7366 	if (len) {
7367 		entry->id = -1;
7368 		memcpy(&entry->buf, FAULTED_STR, FAULTED_SIZE);
7369 		written = -EFAULT;
7370 	} else
7371 		written = cnt;
7372 
7373 	__buffer_unlock_commit(buffer, event);
7374 
7375 	return written;
7376 }
7377 
7378 static ssize_t
7379 tracing_mark_raw_write(struct file *filp, const char __user *ubuf,
7380 					size_t cnt, loff_t *fpos)
7381 {
7382 	struct trace_array *tr = filp->private_data;
7383 	ssize_t written = -ENODEV;
7384 
7385 #define FAULT_SIZE_ID (FAULTED_SIZE + sizeof(int))
7386 
7387 	if (tracing_disabled)
7388 		return -EINVAL;
7389 
7390 	if (!(tr->trace_flags & TRACE_ITER_MARKERS))
7391 		return -EINVAL;
7392 
7393 	/* The marker must at least have a tag id */
7394 	if (cnt < sizeof(unsigned int))
7395 		return -EINVAL;
7396 
7397 	/* The global trace_marker_raw can go to multiple instances */
7398 	if (tr == &global_trace) {
7399 		guard(rcu)();
7400 		list_for_each_entry_rcu(tr, &marker_copies, marker_list) {
7401 			written = write_raw_marker_to_buffer(tr, ubuf, cnt);
7402 			if (written < 0)
7403 				break;
7404 		}
7405 	} else {
7406 		written = write_raw_marker_to_buffer(tr, ubuf, cnt);
7407 	}
7408 
7409 	return written;
7410 }
7411 
7412 static int tracing_clock_show(struct seq_file *m, void *v)
7413 {
7414 	struct trace_array *tr = m->private;
7415 	int i;
7416 
7417 	for (i = 0; i < ARRAY_SIZE(trace_clocks); i++)
7418 		seq_printf(m,
7419 			"%s%s%s%s", i ? " " : "",
7420 			i == tr->clock_id ? "[" : "", trace_clocks[i].name,
7421 			i == tr->clock_id ? "]" : "");
7422 	seq_putc(m, '\n');
7423 
7424 	return 0;
7425 }
7426 
7427 int tracing_set_clock(struct trace_array *tr, const char *clockstr)
7428 {
7429 	int i;
7430 
7431 	for (i = 0; i < ARRAY_SIZE(trace_clocks); i++) {
7432 		if (strcmp(trace_clocks[i].name, clockstr) == 0)
7433 			break;
7434 	}
7435 	if (i == ARRAY_SIZE(trace_clocks))
7436 		return -EINVAL;
7437 
7438 	mutex_lock(&trace_types_lock);
7439 
7440 	tr->clock_id = i;
7441 
7442 	ring_buffer_set_clock(tr->array_buffer.buffer, trace_clocks[i].func);
7443 
7444 	/*
7445 	 * New clock may not be consistent with the previous clock.
7446 	 * Reset the buffer so that it doesn't have incomparable timestamps.
7447 	 */
7448 	tracing_reset_online_cpus(&tr->array_buffer);
7449 
7450 #ifdef CONFIG_TRACER_MAX_TRACE
7451 	if (tr->max_buffer.buffer)
7452 		ring_buffer_set_clock(tr->max_buffer.buffer, trace_clocks[i].func);
7453 	tracing_reset_online_cpus(&tr->max_buffer);
7454 #endif
7455 
7456 	if (tr->scratch && !(tr->flags & TRACE_ARRAY_FL_LAST_BOOT)) {
7457 		struct trace_scratch *tscratch = tr->scratch;
7458 
7459 		tscratch->clock_id = i;
7460 	}
7461 
7462 	mutex_unlock(&trace_types_lock);
7463 
7464 	return 0;
7465 }
7466 
7467 static ssize_t tracing_clock_write(struct file *filp, const char __user *ubuf,
7468 				   size_t cnt, loff_t *fpos)
7469 {
7470 	struct seq_file *m = filp->private_data;
7471 	struct trace_array *tr = m->private;
7472 	char buf[64];
7473 	const char *clockstr;
7474 	int ret;
7475 
7476 	if (cnt >= sizeof(buf))
7477 		return -EINVAL;
7478 
7479 	if (copy_from_user(buf, ubuf, cnt))
7480 		return -EFAULT;
7481 
7482 	buf[cnt] = 0;
7483 
7484 	clockstr = strstrip(buf);
7485 
7486 	ret = tracing_set_clock(tr, clockstr);
7487 	if (ret)
7488 		return ret;
7489 
7490 	*fpos += cnt;
7491 
7492 	return cnt;
7493 }
7494 
7495 static int tracing_clock_open(struct inode *inode, struct file *file)
7496 {
7497 	struct trace_array *tr = inode->i_private;
7498 	int ret;
7499 
7500 	ret = tracing_check_open_get_tr(tr);
7501 	if (ret)
7502 		return ret;
7503 
7504 	ret = single_open(file, tracing_clock_show, inode->i_private);
7505 	if (ret < 0)
7506 		trace_array_put(tr);
7507 
7508 	return ret;
7509 }
7510 
7511 static int tracing_time_stamp_mode_show(struct seq_file *m, void *v)
7512 {
7513 	struct trace_array *tr = m->private;
7514 
7515 	mutex_lock(&trace_types_lock);
7516 
7517 	if (ring_buffer_time_stamp_abs(tr->array_buffer.buffer))
7518 		seq_puts(m, "delta [absolute]\n");
7519 	else
7520 		seq_puts(m, "[delta] absolute\n");
7521 
7522 	mutex_unlock(&trace_types_lock);
7523 
7524 	return 0;
7525 }
7526 
7527 static int tracing_time_stamp_mode_open(struct inode *inode, struct file *file)
7528 {
7529 	struct trace_array *tr = inode->i_private;
7530 	int ret;
7531 
7532 	ret = tracing_check_open_get_tr(tr);
7533 	if (ret)
7534 		return ret;
7535 
7536 	ret = single_open(file, tracing_time_stamp_mode_show, inode->i_private);
7537 	if (ret < 0)
7538 		trace_array_put(tr);
7539 
7540 	return ret;
7541 }
7542 
7543 u64 tracing_event_time_stamp(struct trace_buffer *buffer, struct ring_buffer_event *rbe)
7544 {
7545 	if (rbe == this_cpu_read(trace_buffered_event))
7546 		return ring_buffer_time_stamp(buffer);
7547 
7548 	return ring_buffer_event_time_stamp(buffer, rbe);
7549 }
7550 
7551 /*
7552  * Set or disable using the per CPU trace_buffer_event when possible.
7553  */
7554 int tracing_set_filter_buffering(struct trace_array *tr, bool set)
7555 {
7556 	guard(mutex)(&trace_types_lock);
7557 
7558 	if (set && tr->no_filter_buffering_ref++)
7559 		return 0;
7560 
7561 	if (!set) {
7562 		if (WARN_ON_ONCE(!tr->no_filter_buffering_ref))
7563 			return -EINVAL;
7564 
7565 		--tr->no_filter_buffering_ref;
7566 	}
7567 
7568 	return 0;
7569 }
7570 
7571 struct ftrace_buffer_info {
7572 	struct trace_iterator	iter;
7573 	void			*spare;
7574 	unsigned int		spare_cpu;
7575 	unsigned int		spare_size;
7576 	unsigned int		read;
7577 };
7578 
7579 #ifdef CONFIG_TRACER_SNAPSHOT
7580 static int tracing_snapshot_open(struct inode *inode, struct file *file)
7581 {
7582 	struct trace_array *tr = inode->i_private;
7583 	struct trace_iterator *iter;
7584 	struct seq_file *m;
7585 	int ret;
7586 
7587 	ret = tracing_check_open_get_tr(tr);
7588 	if (ret)
7589 		return ret;
7590 
7591 	if (file->f_mode & FMODE_READ) {
7592 		iter = __tracing_open(inode, file, true);
7593 		if (IS_ERR(iter))
7594 			ret = PTR_ERR(iter);
7595 	} else {
7596 		/* Writes still need the seq_file to hold the private data */
7597 		ret = -ENOMEM;
7598 		m = kzalloc(sizeof(*m), GFP_KERNEL);
7599 		if (!m)
7600 			goto out;
7601 		iter = kzalloc(sizeof(*iter), GFP_KERNEL);
7602 		if (!iter) {
7603 			kfree(m);
7604 			goto out;
7605 		}
7606 		ret = 0;
7607 
7608 		iter->tr = tr;
7609 		iter->array_buffer = &tr->max_buffer;
7610 		iter->cpu_file = tracing_get_cpu(inode);
7611 		m->private = iter;
7612 		file->private_data = m;
7613 	}
7614 out:
7615 	if (ret < 0)
7616 		trace_array_put(tr);
7617 
7618 	return ret;
7619 }
7620 
7621 static void tracing_swap_cpu_buffer(void *tr)
7622 {
7623 	update_max_tr_single((struct trace_array *)tr, current, smp_processor_id());
7624 }
7625 
7626 static ssize_t
7627 tracing_snapshot_write(struct file *filp, const char __user *ubuf, size_t cnt,
7628 		       loff_t *ppos)
7629 {
7630 	struct seq_file *m = filp->private_data;
7631 	struct trace_iterator *iter = m->private;
7632 	struct trace_array *tr = iter->tr;
7633 	unsigned long val;
7634 	int ret;
7635 
7636 	ret = tracing_update_buffers(tr);
7637 	if (ret < 0)
7638 		return ret;
7639 
7640 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
7641 	if (ret)
7642 		return ret;
7643 
7644 	guard(mutex)(&trace_types_lock);
7645 
7646 	if (tr->current_trace->use_max_tr)
7647 		return -EBUSY;
7648 
7649 	local_irq_disable();
7650 	arch_spin_lock(&tr->max_lock);
7651 	if (tr->cond_snapshot)
7652 		ret = -EBUSY;
7653 	arch_spin_unlock(&tr->max_lock);
7654 	local_irq_enable();
7655 	if (ret)
7656 		return ret;
7657 
7658 	switch (val) {
7659 	case 0:
7660 		if (iter->cpu_file != RING_BUFFER_ALL_CPUS)
7661 			return -EINVAL;
7662 		if (tr->allocated_snapshot)
7663 			free_snapshot(tr);
7664 		break;
7665 	case 1:
7666 /* Only allow per-cpu swap if the ring buffer supports it */
7667 #ifndef CONFIG_RING_BUFFER_ALLOW_SWAP
7668 		if (iter->cpu_file != RING_BUFFER_ALL_CPUS)
7669 			return -EINVAL;
7670 #endif
7671 		if (tr->allocated_snapshot)
7672 			ret = resize_buffer_duplicate_size(&tr->max_buffer,
7673 					&tr->array_buffer, iter->cpu_file);
7674 
7675 		ret = tracing_arm_snapshot_locked(tr);
7676 		if (ret)
7677 			return ret;
7678 
7679 		/* Now, we're going to swap */
7680 		if (iter->cpu_file == RING_BUFFER_ALL_CPUS) {
7681 			local_irq_disable();
7682 			update_max_tr(tr, current, smp_processor_id(), NULL);
7683 			local_irq_enable();
7684 		} else {
7685 			smp_call_function_single(iter->cpu_file, tracing_swap_cpu_buffer,
7686 						 (void *)tr, 1);
7687 		}
7688 		tracing_disarm_snapshot(tr);
7689 		break;
7690 	default:
7691 		if (tr->allocated_snapshot) {
7692 			if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
7693 				tracing_reset_online_cpus(&tr->max_buffer);
7694 			else
7695 				tracing_reset_cpu(&tr->max_buffer, iter->cpu_file);
7696 		}
7697 		break;
7698 	}
7699 
7700 	if (ret >= 0) {
7701 		*ppos += cnt;
7702 		ret = cnt;
7703 	}
7704 
7705 	return ret;
7706 }
7707 
7708 static int tracing_snapshot_release(struct inode *inode, struct file *file)
7709 {
7710 	struct seq_file *m = file->private_data;
7711 	int ret;
7712 
7713 	ret = tracing_release(inode, file);
7714 
7715 	if (file->f_mode & FMODE_READ)
7716 		return ret;
7717 
7718 	/* If write only, the seq_file is just a stub */
7719 	if (m)
7720 		kfree(m->private);
7721 	kfree(m);
7722 
7723 	return 0;
7724 }
7725 
7726 static int tracing_buffers_open(struct inode *inode, struct file *filp);
7727 static ssize_t tracing_buffers_read(struct file *filp, char __user *ubuf,
7728 				    size_t count, loff_t *ppos);
7729 static int tracing_buffers_release(struct inode *inode, struct file *file);
7730 static ssize_t tracing_buffers_splice_read(struct file *file, loff_t *ppos,
7731 		   struct pipe_inode_info *pipe, size_t len, unsigned int flags);
7732 
7733 static int snapshot_raw_open(struct inode *inode, struct file *filp)
7734 {
7735 	struct ftrace_buffer_info *info;
7736 	int ret;
7737 
7738 	/* The following checks for tracefs lockdown */
7739 	ret = tracing_buffers_open(inode, filp);
7740 	if (ret < 0)
7741 		return ret;
7742 
7743 	info = filp->private_data;
7744 
7745 	if (info->iter.trace->use_max_tr) {
7746 		tracing_buffers_release(inode, filp);
7747 		return -EBUSY;
7748 	}
7749 
7750 	info->iter.snapshot = true;
7751 	info->iter.array_buffer = &info->iter.tr->max_buffer;
7752 
7753 	return ret;
7754 }
7755 
7756 #endif /* CONFIG_TRACER_SNAPSHOT */
7757 
7758 
7759 static const struct file_operations tracing_thresh_fops = {
7760 	.open		= tracing_open_generic,
7761 	.read		= tracing_thresh_read,
7762 	.write		= tracing_thresh_write,
7763 	.llseek		= generic_file_llseek,
7764 };
7765 
7766 #ifdef CONFIG_TRACER_MAX_TRACE
7767 static const struct file_operations tracing_max_lat_fops = {
7768 	.open		= tracing_open_generic_tr,
7769 	.read		= tracing_max_lat_read,
7770 	.write		= tracing_max_lat_write,
7771 	.llseek		= generic_file_llseek,
7772 	.release	= tracing_release_generic_tr,
7773 };
7774 #endif
7775 
7776 static const struct file_operations set_tracer_fops = {
7777 	.open		= tracing_open_generic_tr,
7778 	.read		= tracing_set_trace_read,
7779 	.write		= tracing_set_trace_write,
7780 	.llseek		= generic_file_llseek,
7781 	.release	= tracing_release_generic_tr,
7782 };
7783 
7784 static const struct file_operations tracing_pipe_fops = {
7785 	.open		= tracing_open_pipe,
7786 	.poll		= tracing_poll_pipe,
7787 	.read		= tracing_read_pipe,
7788 	.splice_read	= tracing_splice_read_pipe,
7789 	.release	= tracing_release_pipe,
7790 };
7791 
7792 static const struct file_operations tracing_entries_fops = {
7793 	.open		= tracing_open_generic_tr,
7794 	.read		= tracing_entries_read,
7795 	.write		= tracing_entries_write,
7796 	.llseek		= generic_file_llseek,
7797 	.release	= tracing_release_generic_tr,
7798 };
7799 
7800 static const struct file_operations tracing_buffer_meta_fops = {
7801 	.open		= tracing_buffer_meta_open,
7802 	.read		= seq_read,
7803 	.llseek		= seq_lseek,
7804 	.release	= tracing_seq_release,
7805 };
7806 
7807 static const struct file_operations tracing_total_entries_fops = {
7808 	.open		= tracing_open_generic_tr,
7809 	.read		= tracing_total_entries_read,
7810 	.llseek		= generic_file_llseek,
7811 	.release	= tracing_release_generic_tr,
7812 };
7813 
7814 static const struct file_operations tracing_free_buffer_fops = {
7815 	.open		= tracing_open_generic_tr,
7816 	.write		= tracing_free_buffer_write,
7817 	.release	= tracing_free_buffer_release,
7818 };
7819 
7820 static const struct file_operations tracing_mark_fops = {
7821 	.open		= tracing_mark_open,
7822 	.write		= tracing_mark_write,
7823 	.release	= tracing_release_generic_tr,
7824 };
7825 
7826 static const struct file_operations tracing_mark_raw_fops = {
7827 	.open		= tracing_mark_open,
7828 	.write		= tracing_mark_raw_write,
7829 	.release	= tracing_release_generic_tr,
7830 };
7831 
7832 static const struct file_operations trace_clock_fops = {
7833 	.open		= tracing_clock_open,
7834 	.read		= seq_read,
7835 	.llseek		= seq_lseek,
7836 	.release	= tracing_single_release_tr,
7837 	.write		= tracing_clock_write,
7838 };
7839 
7840 static const struct file_operations trace_time_stamp_mode_fops = {
7841 	.open		= tracing_time_stamp_mode_open,
7842 	.read		= seq_read,
7843 	.llseek		= seq_lseek,
7844 	.release	= tracing_single_release_tr,
7845 };
7846 
7847 static const struct file_operations last_boot_fops = {
7848 	.open		= tracing_last_boot_open,
7849 	.read		= seq_read,
7850 	.llseek		= seq_lseek,
7851 	.release	= tracing_seq_release,
7852 };
7853 
7854 #ifdef CONFIG_TRACER_SNAPSHOT
7855 static const struct file_operations snapshot_fops = {
7856 	.open		= tracing_snapshot_open,
7857 	.read		= seq_read,
7858 	.write		= tracing_snapshot_write,
7859 	.llseek		= tracing_lseek,
7860 	.release	= tracing_snapshot_release,
7861 };
7862 
7863 static const struct file_operations snapshot_raw_fops = {
7864 	.open		= snapshot_raw_open,
7865 	.read		= tracing_buffers_read,
7866 	.release	= tracing_buffers_release,
7867 	.splice_read	= tracing_buffers_splice_read,
7868 };
7869 
7870 #endif /* CONFIG_TRACER_SNAPSHOT */
7871 
7872 /*
7873  * trace_min_max_write - Write a u64 value to a trace_min_max_param struct
7874  * @filp: The active open file structure
7875  * @ubuf: The userspace provided buffer to read value into
7876  * @cnt: The maximum number of bytes to read
7877  * @ppos: The current "file" position
7878  *
7879  * This function implements the write interface for a struct trace_min_max_param.
7880  * The filp->private_data must point to a trace_min_max_param structure that
7881  * defines where to write the value, the min and the max acceptable values,
7882  * and a lock to protect the write.
7883  */
7884 static ssize_t
7885 trace_min_max_write(struct file *filp, const char __user *ubuf, size_t cnt, loff_t *ppos)
7886 {
7887 	struct trace_min_max_param *param = filp->private_data;
7888 	u64 val;
7889 	int err;
7890 
7891 	if (!param)
7892 		return -EFAULT;
7893 
7894 	err = kstrtoull_from_user(ubuf, cnt, 10, &val);
7895 	if (err)
7896 		return err;
7897 
7898 	if (param->lock)
7899 		mutex_lock(param->lock);
7900 
7901 	if (param->min && val < *param->min)
7902 		err = -EINVAL;
7903 
7904 	if (param->max && val > *param->max)
7905 		err = -EINVAL;
7906 
7907 	if (!err)
7908 		*param->val = val;
7909 
7910 	if (param->lock)
7911 		mutex_unlock(param->lock);
7912 
7913 	if (err)
7914 		return err;
7915 
7916 	return cnt;
7917 }
7918 
7919 /*
7920  * trace_min_max_read - Read a u64 value from a trace_min_max_param struct
7921  * @filp: The active open file structure
7922  * @ubuf: The userspace provided buffer to read value into
7923  * @cnt: The maximum number of bytes to read
7924  * @ppos: The current "file" position
7925  *
7926  * This function implements the read interface for a struct trace_min_max_param.
7927  * The filp->private_data must point to a trace_min_max_param struct with valid
7928  * data.
7929  */
7930 static ssize_t
7931 trace_min_max_read(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos)
7932 {
7933 	struct trace_min_max_param *param = filp->private_data;
7934 	char buf[U64_STR_SIZE];
7935 	int len;
7936 	u64 val;
7937 
7938 	if (!param)
7939 		return -EFAULT;
7940 
7941 	val = *param->val;
7942 
7943 	if (cnt > sizeof(buf))
7944 		cnt = sizeof(buf);
7945 
7946 	len = snprintf(buf, sizeof(buf), "%llu\n", val);
7947 
7948 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, len);
7949 }
7950 
7951 const struct file_operations trace_min_max_fops = {
7952 	.open		= tracing_open_generic,
7953 	.read		= trace_min_max_read,
7954 	.write		= trace_min_max_write,
7955 };
7956 
7957 #define TRACING_LOG_ERRS_MAX	8
7958 #define TRACING_LOG_LOC_MAX	128
7959 
7960 #define CMD_PREFIX "  Command: "
7961 
7962 struct err_info {
7963 	const char	**errs;	/* ptr to loc-specific array of err strings */
7964 	u8		type;	/* index into errs -> specific err string */
7965 	u16		pos;	/* caret position */
7966 	u64		ts;
7967 };
7968 
7969 struct tracing_log_err {
7970 	struct list_head	list;
7971 	struct err_info		info;
7972 	char			loc[TRACING_LOG_LOC_MAX]; /* err location */
7973 	char			*cmd;                     /* what caused err */
7974 };
7975 
7976 static DEFINE_MUTEX(tracing_err_log_lock);
7977 
7978 static struct tracing_log_err *alloc_tracing_log_err(int len)
7979 {
7980 	struct tracing_log_err *err;
7981 
7982 	err = kzalloc(sizeof(*err), GFP_KERNEL);
7983 	if (!err)
7984 		return ERR_PTR(-ENOMEM);
7985 
7986 	err->cmd = kzalloc(len, GFP_KERNEL);
7987 	if (!err->cmd) {
7988 		kfree(err);
7989 		return ERR_PTR(-ENOMEM);
7990 	}
7991 
7992 	return err;
7993 }
7994 
7995 static void free_tracing_log_err(struct tracing_log_err *err)
7996 {
7997 	kfree(err->cmd);
7998 	kfree(err);
7999 }
8000 
8001 static struct tracing_log_err *get_tracing_log_err(struct trace_array *tr,
8002 						   int len)
8003 {
8004 	struct tracing_log_err *err;
8005 	char *cmd;
8006 
8007 	if (tr->n_err_log_entries < TRACING_LOG_ERRS_MAX) {
8008 		err = alloc_tracing_log_err(len);
8009 		if (PTR_ERR(err) != -ENOMEM)
8010 			tr->n_err_log_entries++;
8011 
8012 		return err;
8013 	}
8014 	cmd = kzalloc(len, GFP_KERNEL);
8015 	if (!cmd)
8016 		return ERR_PTR(-ENOMEM);
8017 	err = list_first_entry(&tr->err_log, struct tracing_log_err, list);
8018 	kfree(err->cmd);
8019 	err->cmd = cmd;
8020 	list_del(&err->list);
8021 
8022 	return err;
8023 }
8024 
8025 /**
8026  * err_pos - find the position of a string within a command for error careting
8027  * @cmd: The tracing command that caused the error
8028  * @str: The string to position the caret at within @cmd
8029  *
8030  * Finds the position of the first occurrence of @str within @cmd.  The
8031  * return value can be passed to tracing_log_err() for caret placement
8032  * within @cmd.
8033  *
8034  * Returns the index within @cmd of the first occurrence of @str or 0
8035  * if @str was not found.
8036  */
8037 unsigned int err_pos(char *cmd, const char *str)
8038 {
8039 	char *found;
8040 
8041 	if (WARN_ON(!strlen(cmd)))
8042 		return 0;
8043 
8044 	found = strstr(cmd, str);
8045 	if (found)
8046 		return found - cmd;
8047 
8048 	return 0;
8049 }
8050 
8051 /**
8052  * tracing_log_err - write an error to the tracing error log
8053  * @tr: The associated trace array for the error (NULL for top level array)
8054  * @loc: A string describing where the error occurred
8055  * @cmd: The tracing command that caused the error
8056  * @errs: The array of loc-specific static error strings
8057  * @type: The index into errs[], which produces the specific static err string
8058  * @pos: The position the caret should be placed in the cmd
8059  *
8060  * Writes an error into tracing/error_log of the form:
8061  *
8062  * <loc>: error: <text>
8063  *   Command: <cmd>
8064  *              ^
8065  *
8066  * tracing/error_log is a small log file containing the last
8067  * TRACING_LOG_ERRS_MAX errors (8).  Memory for errors isn't allocated
8068  * unless there has been a tracing error, and the error log can be
8069  * cleared and have its memory freed by writing the empty string in
8070  * truncation mode to it i.e. echo > tracing/error_log.
8071  *
8072  * NOTE: the @errs array along with the @type param are used to
8073  * produce a static error string - this string is not copied and saved
8074  * when the error is logged - only a pointer to it is saved.  See
8075  * existing callers for examples of how static strings are typically
8076  * defined for use with tracing_log_err().
8077  */
8078 void tracing_log_err(struct trace_array *tr,
8079 		     const char *loc, const char *cmd,
8080 		     const char **errs, u8 type, u16 pos)
8081 {
8082 	struct tracing_log_err *err;
8083 	int len = 0;
8084 
8085 	if (!tr)
8086 		tr = &global_trace;
8087 
8088 	len += sizeof(CMD_PREFIX) + 2 * sizeof("\n") + strlen(cmd) + 1;
8089 
8090 	guard(mutex)(&tracing_err_log_lock);
8091 
8092 	err = get_tracing_log_err(tr, len);
8093 	if (PTR_ERR(err) == -ENOMEM)
8094 		return;
8095 
8096 	snprintf(err->loc, TRACING_LOG_LOC_MAX, "%s: error: ", loc);
8097 	snprintf(err->cmd, len, "\n" CMD_PREFIX "%s\n", cmd);
8098 
8099 	err->info.errs = errs;
8100 	err->info.type = type;
8101 	err->info.pos = pos;
8102 	err->info.ts = local_clock();
8103 
8104 	list_add_tail(&err->list, &tr->err_log);
8105 }
8106 
8107 static void clear_tracing_err_log(struct trace_array *tr)
8108 {
8109 	struct tracing_log_err *err, *next;
8110 
8111 	mutex_lock(&tracing_err_log_lock);
8112 	list_for_each_entry_safe(err, next, &tr->err_log, list) {
8113 		list_del(&err->list);
8114 		free_tracing_log_err(err);
8115 	}
8116 
8117 	tr->n_err_log_entries = 0;
8118 	mutex_unlock(&tracing_err_log_lock);
8119 }
8120 
8121 static void *tracing_err_log_seq_start(struct seq_file *m, loff_t *pos)
8122 {
8123 	struct trace_array *tr = m->private;
8124 
8125 	mutex_lock(&tracing_err_log_lock);
8126 
8127 	return seq_list_start(&tr->err_log, *pos);
8128 }
8129 
8130 static void *tracing_err_log_seq_next(struct seq_file *m, void *v, loff_t *pos)
8131 {
8132 	struct trace_array *tr = m->private;
8133 
8134 	return seq_list_next(v, &tr->err_log, pos);
8135 }
8136 
8137 static void tracing_err_log_seq_stop(struct seq_file *m, void *v)
8138 {
8139 	mutex_unlock(&tracing_err_log_lock);
8140 }
8141 
8142 static void tracing_err_log_show_pos(struct seq_file *m, u16 pos)
8143 {
8144 	u16 i;
8145 
8146 	for (i = 0; i < sizeof(CMD_PREFIX) - 1; i++)
8147 		seq_putc(m, ' ');
8148 	for (i = 0; i < pos; i++)
8149 		seq_putc(m, ' ');
8150 	seq_puts(m, "^\n");
8151 }
8152 
8153 static int tracing_err_log_seq_show(struct seq_file *m, void *v)
8154 {
8155 	struct tracing_log_err *err = v;
8156 
8157 	if (err) {
8158 		const char *err_text = err->info.errs[err->info.type];
8159 		u64 sec = err->info.ts;
8160 		u32 nsec;
8161 
8162 		nsec = do_div(sec, NSEC_PER_SEC);
8163 		seq_printf(m, "[%5llu.%06u] %s%s", sec, nsec / 1000,
8164 			   err->loc, err_text);
8165 		seq_printf(m, "%s", err->cmd);
8166 		tracing_err_log_show_pos(m, err->info.pos);
8167 	}
8168 
8169 	return 0;
8170 }
8171 
8172 static const struct seq_operations tracing_err_log_seq_ops = {
8173 	.start  = tracing_err_log_seq_start,
8174 	.next   = tracing_err_log_seq_next,
8175 	.stop   = tracing_err_log_seq_stop,
8176 	.show   = tracing_err_log_seq_show
8177 };
8178 
8179 static int tracing_err_log_open(struct inode *inode, struct file *file)
8180 {
8181 	struct trace_array *tr = inode->i_private;
8182 	int ret = 0;
8183 
8184 	ret = tracing_check_open_get_tr(tr);
8185 	if (ret)
8186 		return ret;
8187 
8188 	/* If this file was opened for write, then erase contents */
8189 	if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC))
8190 		clear_tracing_err_log(tr);
8191 
8192 	if (file->f_mode & FMODE_READ) {
8193 		ret = seq_open(file, &tracing_err_log_seq_ops);
8194 		if (!ret) {
8195 			struct seq_file *m = file->private_data;
8196 			m->private = tr;
8197 		} else {
8198 			trace_array_put(tr);
8199 		}
8200 	}
8201 	return ret;
8202 }
8203 
8204 static ssize_t tracing_err_log_write(struct file *file,
8205 				     const char __user *buffer,
8206 				     size_t count, loff_t *ppos)
8207 {
8208 	return count;
8209 }
8210 
8211 static int tracing_err_log_release(struct inode *inode, struct file *file)
8212 {
8213 	struct trace_array *tr = inode->i_private;
8214 
8215 	trace_array_put(tr);
8216 
8217 	if (file->f_mode & FMODE_READ)
8218 		seq_release(inode, file);
8219 
8220 	return 0;
8221 }
8222 
8223 static const struct file_operations tracing_err_log_fops = {
8224 	.open           = tracing_err_log_open,
8225 	.write		= tracing_err_log_write,
8226 	.read           = seq_read,
8227 	.llseek         = tracing_lseek,
8228 	.release        = tracing_err_log_release,
8229 };
8230 
8231 static int tracing_buffers_open(struct inode *inode, struct file *filp)
8232 {
8233 	struct trace_array *tr = inode->i_private;
8234 	struct ftrace_buffer_info *info;
8235 	int ret;
8236 
8237 	ret = tracing_check_open_get_tr(tr);
8238 	if (ret)
8239 		return ret;
8240 
8241 	info = kvzalloc(sizeof(*info), GFP_KERNEL);
8242 	if (!info) {
8243 		trace_array_put(tr);
8244 		return -ENOMEM;
8245 	}
8246 
8247 	mutex_lock(&trace_types_lock);
8248 
8249 	info->iter.tr		= tr;
8250 	info->iter.cpu_file	= tracing_get_cpu(inode);
8251 	info->iter.trace	= tr->current_trace;
8252 	info->iter.array_buffer = &tr->array_buffer;
8253 	info->spare		= NULL;
8254 	/* Force reading ring buffer for first read */
8255 	info->read		= (unsigned int)-1;
8256 
8257 	filp->private_data = info;
8258 
8259 	tr->trace_ref++;
8260 
8261 	mutex_unlock(&trace_types_lock);
8262 
8263 	ret = nonseekable_open(inode, filp);
8264 	if (ret < 0)
8265 		trace_array_put(tr);
8266 
8267 	return ret;
8268 }
8269 
8270 static __poll_t
8271 tracing_buffers_poll(struct file *filp, poll_table *poll_table)
8272 {
8273 	struct ftrace_buffer_info *info = filp->private_data;
8274 	struct trace_iterator *iter = &info->iter;
8275 
8276 	return trace_poll(iter, filp, poll_table);
8277 }
8278 
8279 static ssize_t
8280 tracing_buffers_read(struct file *filp, char __user *ubuf,
8281 		     size_t count, loff_t *ppos)
8282 {
8283 	struct ftrace_buffer_info *info = filp->private_data;
8284 	struct trace_iterator *iter = &info->iter;
8285 	void *trace_data;
8286 	int page_size;
8287 	ssize_t ret = 0;
8288 	ssize_t size;
8289 
8290 	if (!count)
8291 		return 0;
8292 
8293 #ifdef CONFIG_TRACER_MAX_TRACE
8294 	if (iter->snapshot && iter->tr->current_trace->use_max_tr)
8295 		return -EBUSY;
8296 #endif
8297 
8298 	page_size = ring_buffer_subbuf_size_get(iter->array_buffer->buffer);
8299 
8300 	/* Make sure the spare matches the current sub buffer size */
8301 	if (info->spare) {
8302 		if (page_size != info->spare_size) {
8303 			ring_buffer_free_read_page(iter->array_buffer->buffer,
8304 						   info->spare_cpu, info->spare);
8305 			info->spare = NULL;
8306 		}
8307 	}
8308 
8309 	if (!info->spare) {
8310 		info->spare = ring_buffer_alloc_read_page(iter->array_buffer->buffer,
8311 							  iter->cpu_file);
8312 		if (IS_ERR(info->spare)) {
8313 			ret = PTR_ERR(info->spare);
8314 			info->spare = NULL;
8315 		} else {
8316 			info->spare_cpu = iter->cpu_file;
8317 			info->spare_size = page_size;
8318 		}
8319 	}
8320 	if (!info->spare)
8321 		return ret;
8322 
8323 	/* Do we have previous read data to read? */
8324 	if (info->read < page_size)
8325 		goto read;
8326 
8327  again:
8328 	trace_access_lock(iter->cpu_file);
8329 	ret = ring_buffer_read_page(iter->array_buffer->buffer,
8330 				    info->spare,
8331 				    count,
8332 				    iter->cpu_file, 0);
8333 	trace_access_unlock(iter->cpu_file);
8334 
8335 	if (ret < 0) {
8336 		if (trace_empty(iter) && !iter->closed) {
8337 			if (update_last_data_if_empty(iter->tr))
8338 				return 0;
8339 
8340 			if ((filp->f_flags & O_NONBLOCK))
8341 				return -EAGAIN;
8342 
8343 			ret = wait_on_pipe(iter, 0);
8344 			if (ret)
8345 				return ret;
8346 
8347 			goto again;
8348 		}
8349 		return 0;
8350 	}
8351 
8352 	info->read = 0;
8353  read:
8354 	size = page_size - info->read;
8355 	if (size > count)
8356 		size = count;
8357 	trace_data = ring_buffer_read_page_data(info->spare);
8358 	ret = copy_to_user(ubuf, trace_data + info->read, size);
8359 	if (ret == size)
8360 		return -EFAULT;
8361 
8362 	size -= ret;
8363 
8364 	*ppos += size;
8365 	info->read += size;
8366 
8367 	return size;
8368 }
8369 
8370 static int tracing_buffers_flush(struct file *file, fl_owner_t id)
8371 {
8372 	struct ftrace_buffer_info *info = file->private_data;
8373 	struct trace_iterator *iter = &info->iter;
8374 
8375 	iter->closed = true;
8376 	/* Make sure the waiters see the new wait_index */
8377 	(void)atomic_fetch_inc_release(&iter->wait_index);
8378 
8379 	ring_buffer_wake_waiters(iter->array_buffer->buffer, iter->cpu_file);
8380 
8381 	return 0;
8382 }
8383 
8384 static int tracing_buffers_release(struct inode *inode, struct file *file)
8385 {
8386 	struct ftrace_buffer_info *info = file->private_data;
8387 	struct trace_iterator *iter = &info->iter;
8388 
8389 	mutex_lock(&trace_types_lock);
8390 
8391 	iter->tr->trace_ref--;
8392 
8393 	__trace_array_put(iter->tr);
8394 
8395 	if (info->spare)
8396 		ring_buffer_free_read_page(iter->array_buffer->buffer,
8397 					   info->spare_cpu, info->spare);
8398 	kvfree(info);
8399 
8400 	mutex_unlock(&trace_types_lock);
8401 
8402 	return 0;
8403 }
8404 
8405 struct buffer_ref {
8406 	struct trace_buffer	*buffer;
8407 	void			*page;
8408 	int			cpu;
8409 	refcount_t		refcount;
8410 };
8411 
8412 static void buffer_ref_release(struct buffer_ref *ref)
8413 {
8414 	if (!refcount_dec_and_test(&ref->refcount))
8415 		return;
8416 	ring_buffer_free_read_page(ref->buffer, ref->cpu, ref->page);
8417 	kfree(ref);
8418 }
8419 
8420 static void buffer_pipe_buf_release(struct pipe_inode_info *pipe,
8421 				    struct pipe_buffer *buf)
8422 {
8423 	struct buffer_ref *ref = (struct buffer_ref *)buf->private;
8424 
8425 	buffer_ref_release(ref);
8426 	buf->private = 0;
8427 }
8428 
8429 static bool buffer_pipe_buf_get(struct pipe_inode_info *pipe,
8430 				struct pipe_buffer *buf)
8431 {
8432 	struct buffer_ref *ref = (struct buffer_ref *)buf->private;
8433 
8434 	if (refcount_read(&ref->refcount) > INT_MAX/2)
8435 		return false;
8436 
8437 	refcount_inc(&ref->refcount);
8438 	return true;
8439 }
8440 
8441 /* Pipe buffer operations for a buffer. */
8442 static const struct pipe_buf_operations buffer_pipe_buf_ops = {
8443 	.release		= buffer_pipe_buf_release,
8444 	.get			= buffer_pipe_buf_get,
8445 };
8446 
8447 /*
8448  * Callback from splice_to_pipe(), if we need to release some pages
8449  * at the end of the spd in case we error'ed out in filling the pipe.
8450  */
8451 static void buffer_spd_release(struct splice_pipe_desc *spd, unsigned int i)
8452 {
8453 	struct buffer_ref *ref =
8454 		(struct buffer_ref *)spd->partial[i].private;
8455 
8456 	buffer_ref_release(ref);
8457 	spd->partial[i].private = 0;
8458 }
8459 
8460 static ssize_t
8461 tracing_buffers_splice_read(struct file *file, loff_t *ppos,
8462 			    struct pipe_inode_info *pipe, size_t len,
8463 			    unsigned int flags)
8464 {
8465 	struct ftrace_buffer_info *info = file->private_data;
8466 	struct trace_iterator *iter = &info->iter;
8467 	struct partial_page partial_def[PIPE_DEF_BUFFERS];
8468 	struct page *pages_def[PIPE_DEF_BUFFERS];
8469 	struct splice_pipe_desc spd = {
8470 		.pages		= pages_def,
8471 		.partial	= partial_def,
8472 		.nr_pages_max	= PIPE_DEF_BUFFERS,
8473 		.ops		= &buffer_pipe_buf_ops,
8474 		.spd_release	= buffer_spd_release,
8475 	};
8476 	struct buffer_ref *ref;
8477 	bool woken = false;
8478 	int page_size;
8479 	int entries, i;
8480 	ssize_t ret = 0;
8481 
8482 #ifdef CONFIG_TRACER_MAX_TRACE
8483 	if (iter->snapshot && iter->tr->current_trace->use_max_tr)
8484 		return -EBUSY;
8485 #endif
8486 
8487 	page_size = ring_buffer_subbuf_size_get(iter->array_buffer->buffer);
8488 	if (*ppos & (page_size - 1))
8489 		return -EINVAL;
8490 
8491 	if (len & (page_size - 1)) {
8492 		if (len < page_size)
8493 			return -EINVAL;
8494 		len &= (~(page_size - 1));
8495 	}
8496 
8497 	if (splice_grow_spd(pipe, &spd))
8498 		return -ENOMEM;
8499 
8500  again:
8501 	trace_access_lock(iter->cpu_file);
8502 	entries = ring_buffer_entries_cpu(iter->array_buffer->buffer, iter->cpu_file);
8503 
8504 	for (i = 0; i < spd.nr_pages_max && len && entries; i++, len -= page_size) {
8505 		struct page *page;
8506 		int r;
8507 
8508 		ref = kzalloc(sizeof(*ref), GFP_KERNEL);
8509 		if (!ref) {
8510 			ret = -ENOMEM;
8511 			break;
8512 		}
8513 
8514 		refcount_set(&ref->refcount, 1);
8515 		ref->buffer = iter->array_buffer->buffer;
8516 		ref->page = ring_buffer_alloc_read_page(ref->buffer, iter->cpu_file);
8517 		if (IS_ERR(ref->page)) {
8518 			ret = PTR_ERR(ref->page);
8519 			ref->page = NULL;
8520 			kfree(ref);
8521 			break;
8522 		}
8523 		ref->cpu = iter->cpu_file;
8524 
8525 		r = ring_buffer_read_page(ref->buffer, ref->page,
8526 					  len, iter->cpu_file, 1);
8527 		if (r < 0) {
8528 			ring_buffer_free_read_page(ref->buffer, ref->cpu,
8529 						   ref->page);
8530 			kfree(ref);
8531 			break;
8532 		}
8533 
8534 		page = virt_to_page(ring_buffer_read_page_data(ref->page));
8535 
8536 		spd.pages[i] = page;
8537 		spd.partial[i].len = page_size;
8538 		spd.partial[i].offset = 0;
8539 		spd.partial[i].private = (unsigned long)ref;
8540 		spd.nr_pages++;
8541 		*ppos += page_size;
8542 
8543 		entries = ring_buffer_entries_cpu(iter->array_buffer->buffer, iter->cpu_file);
8544 	}
8545 
8546 	trace_access_unlock(iter->cpu_file);
8547 	spd.nr_pages = i;
8548 
8549 	/* did we read anything? */
8550 	if (!spd.nr_pages) {
8551 
8552 		if (ret)
8553 			goto out;
8554 
8555 		if (woken)
8556 			goto out;
8557 
8558 		ret = -EAGAIN;
8559 		if ((file->f_flags & O_NONBLOCK) || (flags & SPLICE_F_NONBLOCK))
8560 			goto out;
8561 
8562 		ret = wait_on_pipe(iter, iter->snapshot ? 0 : iter->tr->buffer_percent);
8563 		if (ret)
8564 			goto out;
8565 
8566 		/* No need to wait after waking up when tracing is off */
8567 		if (!tracer_tracing_is_on(iter->tr))
8568 			goto out;
8569 
8570 		/* Iterate one more time to collect any new data then exit */
8571 		woken = true;
8572 
8573 		goto again;
8574 	}
8575 
8576 	ret = splice_to_pipe(pipe, &spd);
8577 out:
8578 	splice_shrink_spd(&spd);
8579 
8580 	return ret;
8581 }
8582 
8583 static long tracing_buffers_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
8584 {
8585 	struct ftrace_buffer_info *info = file->private_data;
8586 	struct trace_iterator *iter = &info->iter;
8587 	int err;
8588 
8589 	if (cmd == TRACE_MMAP_IOCTL_GET_READER) {
8590 		if (!(file->f_flags & O_NONBLOCK)) {
8591 			err = ring_buffer_wait(iter->array_buffer->buffer,
8592 					       iter->cpu_file,
8593 					       iter->tr->buffer_percent,
8594 					       NULL, NULL);
8595 			if (err)
8596 				return err;
8597 		}
8598 
8599 		return ring_buffer_map_get_reader(iter->array_buffer->buffer,
8600 						  iter->cpu_file);
8601 	} else if (cmd) {
8602 		return -ENOTTY;
8603 	}
8604 
8605 	/*
8606 	 * An ioctl call with cmd 0 to the ring buffer file will wake up all
8607 	 * waiters
8608 	 */
8609 	mutex_lock(&trace_types_lock);
8610 
8611 	/* Make sure the waiters see the new wait_index */
8612 	(void)atomic_fetch_inc_release(&iter->wait_index);
8613 
8614 	ring_buffer_wake_waiters(iter->array_buffer->buffer, iter->cpu_file);
8615 
8616 	mutex_unlock(&trace_types_lock);
8617 	return 0;
8618 }
8619 
8620 #ifdef CONFIG_TRACER_MAX_TRACE
8621 static int get_snapshot_map(struct trace_array *tr)
8622 {
8623 	int err = 0;
8624 
8625 	/*
8626 	 * Called with mmap_lock held. lockdep would be unhappy if we would now
8627 	 * take trace_types_lock. Instead use the specific
8628 	 * snapshot_trigger_lock.
8629 	 */
8630 	spin_lock(&tr->snapshot_trigger_lock);
8631 
8632 	if (tr->snapshot || tr->mapped == UINT_MAX)
8633 		err = -EBUSY;
8634 	else
8635 		tr->mapped++;
8636 
8637 	spin_unlock(&tr->snapshot_trigger_lock);
8638 
8639 	/* Wait for update_max_tr() to observe iter->tr->mapped */
8640 	if (tr->mapped == 1)
8641 		synchronize_rcu();
8642 
8643 	return err;
8644 
8645 }
8646 static void put_snapshot_map(struct trace_array *tr)
8647 {
8648 	spin_lock(&tr->snapshot_trigger_lock);
8649 	if (!WARN_ON(!tr->mapped))
8650 		tr->mapped--;
8651 	spin_unlock(&tr->snapshot_trigger_lock);
8652 }
8653 #else
8654 static inline int get_snapshot_map(struct trace_array *tr) { return 0; }
8655 static inline void put_snapshot_map(struct trace_array *tr) { }
8656 #endif
8657 
8658 static void tracing_buffers_mmap_close(struct vm_area_struct *vma)
8659 {
8660 	struct ftrace_buffer_info *info = vma->vm_file->private_data;
8661 	struct trace_iterator *iter = &info->iter;
8662 
8663 	WARN_ON(ring_buffer_unmap(iter->array_buffer->buffer, iter->cpu_file));
8664 	put_snapshot_map(iter->tr);
8665 }
8666 
8667 static const struct vm_operations_struct tracing_buffers_vmops = {
8668 	.close		= tracing_buffers_mmap_close,
8669 };
8670 
8671 static int tracing_buffers_mmap(struct file *filp, struct vm_area_struct *vma)
8672 {
8673 	struct ftrace_buffer_info *info = filp->private_data;
8674 	struct trace_iterator *iter = &info->iter;
8675 	int ret = 0;
8676 
8677 	/* A memmap'ed buffer is not supported for user space mmap */
8678 	if (iter->tr->flags & TRACE_ARRAY_FL_MEMMAP)
8679 		return -ENODEV;
8680 
8681 	ret = get_snapshot_map(iter->tr);
8682 	if (ret)
8683 		return ret;
8684 
8685 	ret = ring_buffer_map(iter->array_buffer->buffer, iter->cpu_file, vma);
8686 	if (ret)
8687 		put_snapshot_map(iter->tr);
8688 
8689 	vma->vm_ops = &tracing_buffers_vmops;
8690 
8691 	return ret;
8692 }
8693 
8694 static const struct file_operations tracing_buffers_fops = {
8695 	.open		= tracing_buffers_open,
8696 	.read		= tracing_buffers_read,
8697 	.poll		= tracing_buffers_poll,
8698 	.release	= tracing_buffers_release,
8699 	.flush		= tracing_buffers_flush,
8700 	.splice_read	= tracing_buffers_splice_read,
8701 	.unlocked_ioctl = tracing_buffers_ioctl,
8702 	.mmap		= tracing_buffers_mmap,
8703 };
8704 
8705 static ssize_t
8706 tracing_stats_read(struct file *filp, char __user *ubuf,
8707 		   size_t count, loff_t *ppos)
8708 {
8709 	struct inode *inode = file_inode(filp);
8710 	struct trace_array *tr = inode->i_private;
8711 	struct array_buffer *trace_buf = &tr->array_buffer;
8712 	int cpu = tracing_get_cpu(inode);
8713 	struct trace_seq *s;
8714 	unsigned long cnt;
8715 	unsigned long long t;
8716 	unsigned long usec_rem;
8717 
8718 	s = kmalloc(sizeof(*s), GFP_KERNEL);
8719 	if (!s)
8720 		return -ENOMEM;
8721 
8722 	trace_seq_init(s);
8723 
8724 	cnt = ring_buffer_entries_cpu(trace_buf->buffer, cpu);
8725 	trace_seq_printf(s, "entries: %ld\n", cnt);
8726 
8727 	cnt = ring_buffer_overrun_cpu(trace_buf->buffer, cpu);
8728 	trace_seq_printf(s, "overrun: %ld\n", cnt);
8729 
8730 	cnt = ring_buffer_commit_overrun_cpu(trace_buf->buffer, cpu);
8731 	trace_seq_printf(s, "commit overrun: %ld\n", cnt);
8732 
8733 	cnt = ring_buffer_bytes_cpu(trace_buf->buffer, cpu);
8734 	trace_seq_printf(s, "bytes: %ld\n", cnt);
8735 
8736 	if (trace_clocks[tr->clock_id].in_ns) {
8737 		/* local or global for trace_clock */
8738 		t = ns2usecs(ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
8739 		usec_rem = do_div(t, USEC_PER_SEC);
8740 		trace_seq_printf(s, "oldest event ts: %5llu.%06lu\n",
8741 								t, usec_rem);
8742 
8743 		t = ns2usecs(ring_buffer_time_stamp(trace_buf->buffer));
8744 		usec_rem = do_div(t, USEC_PER_SEC);
8745 		trace_seq_printf(s, "now ts: %5llu.%06lu\n", t, usec_rem);
8746 	} else {
8747 		/* counter or tsc mode for trace_clock */
8748 		trace_seq_printf(s, "oldest event ts: %llu\n",
8749 				ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
8750 
8751 		trace_seq_printf(s, "now ts: %llu\n",
8752 				ring_buffer_time_stamp(trace_buf->buffer));
8753 	}
8754 
8755 	cnt = ring_buffer_dropped_events_cpu(trace_buf->buffer, cpu);
8756 	trace_seq_printf(s, "dropped events: %ld\n", cnt);
8757 
8758 	cnt = ring_buffer_read_events_cpu(trace_buf->buffer, cpu);
8759 	trace_seq_printf(s, "read events: %ld\n", cnt);
8760 
8761 	count = simple_read_from_buffer(ubuf, count, ppos,
8762 					s->buffer, trace_seq_used(s));
8763 
8764 	kfree(s);
8765 
8766 	return count;
8767 }
8768 
8769 static const struct file_operations tracing_stats_fops = {
8770 	.open		= tracing_open_generic_tr,
8771 	.read		= tracing_stats_read,
8772 	.llseek		= generic_file_llseek,
8773 	.release	= tracing_release_generic_tr,
8774 };
8775 
8776 #ifdef CONFIG_DYNAMIC_FTRACE
8777 
8778 static ssize_t
8779 tracing_read_dyn_info(struct file *filp, char __user *ubuf,
8780 		  size_t cnt, loff_t *ppos)
8781 {
8782 	ssize_t ret;
8783 	char *buf;
8784 	int r;
8785 
8786 	/* 512 should be plenty to hold the amount needed */
8787 #define DYN_INFO_BUF_SIZE	512
8788 
8789 	buf = kmalloc(DYN_INFO_BUF_SIZE, GFP_KERNEL);
8790 	if (!buf)
8791 		return -ENOMEM;
8792 
8793 	r = scnprintf(buf, DYN_INFO_BUF_SIZE,
8794 		      "%ld pages:%ld groups: %ld\n"
8795 		      "ftrace boot update time = %llu (ns)\n"
8796 		      "ftrace module total update time = %llu (ns)\n",
8797 		      ftrace_update_tot_cnt,
8798 		      ftrace_number_of_pages,
8799 		      ftrace_number_of_groups,
8800 		      ftrace_update_time,
8801 		      ftrace_total_mod_time);
8802 
8803 	ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
8804 	kfree(buf);
8805 	return ret;
8806 }
8807 
8808 static const struct file_operations tracing_dyn_info_fops = {
8809 	.open		= tracing_open_generic,
8810 	.read		= tracing_read_dyn_info,
8811 	.llseek		= generic_file_llseek,
8812 };
8813 #endif /* CONFIG_DYNAMIC_FTRACE */
8814 
8815 #if defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE)
8816 static void
8817 ftrace_snapshot(unsigned long ip, unsigned long parent_ip,
8818 		struct trace_array *tr, struct ftrace_probe_ops *ops,
8819 		void *data)
8820 {
8821 	tracing_snapshot_instance(tr);
8822 }
8823 
8824 static void
8825 ftrace_count_snapshot(unsigned long ip, unsigned long parent_ip,
8826 		      struct trace_array *tr, struct ftrace_probe_ops *ops,
8827 		      void *data)
8828 {
8829 	struct ftrace_func_mapper *mapper = data;
8830 	long *count = NULL;
8831 
8832 	if (mapper)
8833 		count = (long *)ftrace_func_mapper_find_ip(mapper, ip);
8834 
8835 	if (count) {
8836 
8837 		if (*count <= 0)
8838 			return;
8839 
8840 		(*count)--;
8841 	}
8842 
8843 	tracing_snapshot_instance(tr);
8844 }
8845 
8846 static int
8847 ftrace_snapshot_print(struct seq_file *m, unsigned long ip,
8848 		      struct ftrace_probe_ops *ops, void *data)
8849 {
8850 	struct ftrace_func_mapper *mapper = data;
8851 	long *count = NULL;
8852 
8853 	seq_printf(m, "%ps:", (void *)ip);
8854 
8855 	seq_puts(m, "snapshot");
8856 
8857 	if (mapper)
8858 		count = (long *)ftrace_func_mapper_find_ip(mapper, ip);
8859 
8860 	if (count)
8861 		seq_printf(m, ":count=%ld\n", *count);
8862 	else
8863 		seq_puts(m, ":unlimited\n");
8864 
8865 	return 0;
8866 }
8867 
8868 static int
8869 ftrace_snapshot_init(struct ftrace_probe_ops *ops, struct trace_array *tr,
8870 		     unsigned long ip, void *init_data, void **data)
8871 {
8872 	struct ftrace_func_mapper *mapper = *data;
8873 
8874 	if (!mapper) {
8875 		mapper = allocate_ftrace_func_mapper();
8876 		if (!mapper)
8877 			return -ENOMEM;
8878 		*data = mapper;
8879 	}
8880 
8881 	return ftrace_func_mapper_add_ip(mapper, ip, init_data);
8882 }
8883 
8884 static void
8885 ftrace_snapshot_free(struct ftrace_probe_ops *ops, struct trace_array *tr,
8886 		     unsigned long ip, void *data)
8887 {
8888 	struct ftrace_func_mapper *mapper = data;
8889 
8890 	if (!ip) {
8891 		if (!mapper)
8892 			return;
8893 		free_ftrace_func_mapper(mapper, NULL);
8894 		return;
8895 	}
8896 
8897 	ftrace_func_mapper_remove_ip(mapper, ip);
8898 }
8899 
8900 static struct ftrace_probe_ops snapshot_probe_ops = {
8901 	.func			= ftrace_snapshot,
8902 	.print			= ftrace_snapshot_print,
8903 };
8904 
8905 static struct ftrace_probe_ops snapshot_count_probe_ops = {
8906 	.func			= ftrace_count_snapshot,
8907 	.print			= ftrace_snapshot_print,
8908 	.init			= ftrace_snapshot_init,
8909 	.free			= ftrace_snapshot_free,
8910 };
8911 
8912 static int
8913 ftrace_trace_snapshot_callback(struct trace_array *tr, struct ftrace_hash *hash,
8914 			       char *glob, char *cmd, char *param, int enable)
8915 {
8916 	struct ftrace_probe_ops *ops;
8917 	void *count = (void *)-1;
8918 	char *number;
8919 	int ret;
8920 
8921 	if (!tr)
8922 		return -ENODEV;
8923 
8924 	/* hash funcs only work with set_ftrace_filter */
8925 	if (!enable)
8926 		return -EINVAL;
8927 
8928 	ops = param ? &snapshot_count_probe_ops :  &snapshot_probe_ops;
8929 
8930 	if (glob[0] == '!') {
8931 		ret = unregister_ftrace_function_probe_func(glob+1, tr, ops);
8932 		if (!ret)
8933 			tracing_disarm_snapshot(tr);
8934 
8935 		return ret;
8936 	}
8937 
8938 	if (!param)
8939 		goto out_reg;
8940 
8941 	number = strsep(&param, ":");
8942 
8943 	if (!strlen(number))
8944 		goto out_reg;
8945 
8946 	/*
8947 	 * We use the callback data field (which is a pointer)
8948 	 * as our counter.
8949 	 */
8950 	ret = kstrtoul(number, 0, (unsigned long *)&count);
8951 	if (ret)
8952 		return ret;
8953 
8954  out_reg:
8955 	ret = tracing_arm_snapshot(tr);
8956 	if (ret < 0)
8957 		goto out;
8958 
8959 	ret = register_ftrace_function_probe(glob, tr, ops, count);
8960 	if (ret < 0)
8961 		tracing_disarm_snapshot(tr);
8962  out:
8963 	return ret < 0 ? ret : 0;
8964 }
8965 
8966 static struct ftrace_func_command ftrace_snapshot_cmd = {
8967 	.name			= "snapshot",
8968 	.func			= ftrace_trace_snapshot_callback,
8969 };
8970 
8971 static __init int register_snapshot_cmd(void)
8972 {
8973 	return register_ftrace_command(&ftrace_snapshot_cmd);
8974 }
8975 #else
8976 static inline __init int register_snapshot_cmd(void) { return 0; }
8977 #endif /* defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE) */
8978 
8979 static struct dentry *tracing_get_dentry(struct trace_array *tr)
8980 {
8981 	if (WARN_ON(!tr->dir))
8982 		return ERR_PTR(-ENODEV);
8983 
8984 	/* Top directory uses NULL as the parent */
8985 	if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
8986 		return NULL;
8987 
8988 	/* All sub buffers have a descriptor */
8989 	return tr->dir;
8990 }
8991 
8992 static struct dentry *tracing_dentry_percpu(struct trace_array *tr, int cpu)
8993 {
8994 	struct dentry *d_tracer;
8995 
8996 	if (tr->percpu_dir)
8997 		return tr->percpu_dir;
8998 
8999 	d_tracer = tracing_get_dentry(tr);
9000 	if (IS_ERR(d_tracer))
9001 		return NULL;
9002 
9003 	tr->percpu_dir = tracefs_create_dir("per_cpu", d_tracer);
9004 
9005 	MEM_FAIL(!tr->percpu_dir,
9006 		  "Could not create tracefs directory 'per_cpu/%d'\n", cpu);
9007 
9008 	return tr->percpu_dir;
9009 }
9010 
9011 static struct dentry *
9012 trace_create_cpu_file(const char *name, umode_t mode, struct dentry *parent,
9013 		      void *data, long cpu, const struct file_operations *fops)
9014 {
9015 	struct dentry *ret = trace_create_file(name, mode, parent, data, fops);
9016 
9017 	if (ret) /* See tracing_get_cpu() */
9018 		d_inode(ret)->i_cdev = (void *)(cpu + 1);
9019 	return ret;
9020 }
9021 
9022 static void
9023 tracing_init_tracefs_percpu(struct trace_array *tr, long cpu)
9024 {
9025 	struct dentry *d_percpu = tracing_dentry_percpu(tr, cpu);
9026 	struct dentry *d_cpu;
9027 	char cpu_dir[30]; /* 30 characters should be more than enough */
9028 
9029 	if (!d_percpu)
9030 		return;
9031 
9032 	snprintf(cpu_dir, 30, "cpu%ld", cpu);
9033 	d_cpu = tracefs_create_dir(cpu_dir, d_percpu);
9034 	if (!d_cpu) {
9035 		pr_warn("Could not create tracefs '%s' entry\n", cpu_dir);
9036 		return;
9037 	}
9038 
9039 	/* per cpu trace_pipe */
9040 	trace_create_cpu_file("trace_pipe", TRACE_MODE_READ, d_cpu,
9041 				tr, cpu, &tracing_pipe_fops);
9042 
9043 	/* per cpu trace */
9044 	trace_create_cpu_file("trace", TRACE_MODE_WRITE, d_cpu,
9045 				tr, cpu, &tracing_fops);
9046 
9047 	trace_create_cpu_file("trace_pipe_raw", TRACE_MODE_READ, d_cpu,
9048 				tr, cpu, &tracing_buffers_fops);
9049 
9050 	trace_create_cpu_file("stats", TRACE_MODE_READ, d_cpu,
9051 				tr, cpu, &tracing_stats_fops);
9052 
9053 	trace_create_cpu_file("buffer_size_kb", TRACE_MODE_READ, d_cpu,
9054 				tr, cpu, &tracing_entries_fops);
9055 
9056 	if (tr->range_addr_start)
9057 		trace_create_cpu_file("buffer_meta", TRACE_MODE_READ, d_cpu,
9058 				      tr, cpu, &tracing_buffer_meta_fops);
9059 #ifdef CONFIG_TRACER_SNAPSHOT
9060 	if (!tr->range_addr_start) {
9061 		trace_create_cpu_file("snapshot", TRACE_MODE_WRITE, d_cpu,
9062 				      tr, cpu, &snapshot_fops);
9063 
9064 		trace_create_cpu_file("snapshot_raw", TRACE_MODE_READ, d_cpu,
9065 				      tr, cpu, &snapshot_raw_fops);
9066 	}
9067 #endif
9068 }
9069 
9070 #ifdef CONFIG_FTRACE_SELFTEST
9071 /* Let selftest have access to static functions in this file */
9072 #include "trace_selftest.c"
9073 #endif
9074 
9075 static ssize_t
9076 trace_options_read(struct file *filp, char __user *ubuf, size_t cnt,
9077 			loff_t *ppos)
9078 {
9079 	struct trace_option_dentry *topt = filp->private_data;
9080 	char *buf;
9081 
9082 	if (topt->flags->val & topt->opt->bit)
9083 		buf = "1\n";
9084 	else
9085 		buf = "0\n";
9086 
9087 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
9088 }
9089 
9090 static ssize_t
9091 trace_options_write(struct file *filp, const char __user *ubuf, size_t cnt,
9092 			 loff_t *ppos)
9093 {
9094 	struct trace_option_dentry *topt = filp->private_data;
9095 	unsigned long val;
9096 	int ret;
9097 
9098 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
9099 	if (ret)
9100 		return ret;
9101 
9102 	if (val != 0 && val != 1)
9103 		return -EINVAL;
9104 
9105 	if (!!(topt->flags->val & topt->opt->bit) != val) {
9106 		mutex_lock(&trace_types_lock);
9107 		ret = __set_tracer_option(topt->tr, topt->flags,
9108 					  topt->opt, !val);
9109 		mutex_unlock(&trace_types_lock);
9110 		if (ret)
9111 			return ret;
9112 	}
9113 
9114 	*ppos += cnt;
9115 
9116 	return cnt;
9117 }
9118 
9119 static int tracing_open_options(struct inode *inode, struct file *filp)
9120 {
9121 	struct trace_option_dentry *topt = inode->i_private;
9122 	int ret;
9123 
9124 	ret = tracing_check_open_get_tr(topt->tr);
9125 	if (ret)
9126 		return ret;
9127 
9128 	filp->private_data = inode->i_private;
9129 	return 0;
9130 }
9131 
9132 static int tracing_release_options(struct inode *inode, struct file *file)
9133 {
9134 	struct trace_option_dentry *topt = file->private_data;
9135 
9136 	trace_array_put(topt->tr);
9137 	return 0;
9138 }
9139 
9140 static const struct file_operations trace_options_fops = {
9141 	.open = tracing_open_options,
9142 	.read = trace_options_read,
9143 	.write = trace_options_write,
9144 	.llseek	= generic_file_llseek,
9145 	.release = tracing_release_options,
9146 };
9147 
9148 /*
9149  * In order to pass in both the trace_array descriptor as well as the index
9150  * to the flag that the trace option file represents, the trace_array
9151  * has a character array of trace_flags_index[], which holds the index
9152  * of the bit for the flag it represents. index[0] == 0, index[1] == 1, etc.
9153  * The address of this character array is passed to the flag option file
9154  * read/write callbacks.
9155  *
9156  * In order to extract both the index and the trace_array descriptor,
9157  * get_tr_index() uses the following algorithm.
9158  *
9159  *   idx = *ptr;
9160  *
9161  * As the pointer itself contains the address of the index (remember
9162  * index[1] == 1).
9163  *
9164  * Then to get the trace_array descriptor, by subtracting that index
9165  * from the ptr, we get to the start of the index itself.
9166  *
9167  *   ptr - idx == &index[0]
9168  *
9169  * Then a simple container_of() from that pointer gets us to the
9170  * trace_array descriptor.
9171  */
9172 static void get_tr_index(void *data, struct trace_array **ptr,
9173 			 unsigned int *pindex)
9174 {
9175 	*pindex = *(unsigned char *)data;
9176 
9177 	*ptr = container_of(data - *pindex, struct trace_array,
9178 			    trace_flags_index);
9179 }
9180 
9181 static ssize_t
9182 trace_options_core_read(struct file *filp, char __user *ubuf, size_t cnt,
9183 			loff_t *ppos)
9184 {
9185 	void *tr_index = filp->private_data;
9186 	struct trace_array *tr;
9187 	unsigned int index;
9188 	char *buf;
9189 
9190 	get_tr_index(tr_index, &tr, &index);
9191 
9192 	if (tr->trace_flags & (1 << index))
9193 		buf = "1\n";
9194 	else
9195 		buf = "0\n";
9196 
9197 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
9198 }
9199 
9200 static ssize_t
9201 trace_options_core_write(struct file *filp, const char __user *ubuf, size_t cnt,
9202 			 loff_t *ppos)
9203 {
9204 	void *tr_index = filp->private_data;
9205 	struct trace_array *tr;
9206 	unsigned int index;
9207 	unsigned long val;
9208 	int ret;
9209 
9210 	get_tr_index(tr_index, &tr, &index);
9211 
9212 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
9213 	if (ret)
9214 		return ret;
9215 
9216 	if (val != 0 && val != 1)
9217 		return -EINVAL;
9218 
9219 	mutex_lock(&event_mutex);
9220 	mutex_lock(&trace_types_lock);
9221 	ret = set_tracer_flag(tr, 1 << index, val);
9222 	mutex_unlock(&trace_types_lock);
9223 	mutex_unlock(&event_mutex);
9224 
9225 	if (ret < 0)
9226 		return ret;
9227 
9228 	*ppos += cnt;
9229 
9230 	return cnt;
9231 }
9232 
9233 static const struct file_operations trace_options_core_fops = {
9234 	.open = tracing_open_generic,
9235 	.read = trace_options_core_read,
9236 	.write = trace_options_core_write,
9237 	.llseek = generic_file_llseek,
9238 };
9239 
9240 struct dentry *trace_create_file(const char *name,
9241 				 umode_t mode,
9242 				 struct dentry *parent,
9243 				 void *data,
9244 				 const struct file_operations *fops)
9245 {
9246 	struct dentry *ret;
9247 
9248 	ret = tracefs_create_file(name, mode, parent, data, fops);
9249 	if (!ret)
9250 		pr_warn("Could not create tracefs '%s' entry\n", name);
9251 
9252 	return ret;
9253 }
9254 
9255 
9256 static struct dentry *trace_options_init_dentry(struct trace_array *tr)
9257 {
9258 	struct dentry *d_tracer;
9259 
9260 	if (tr->options)
9261 		return tr->options;
9262 
9263 	d_tracer = tracing_get_dentry(tr);
9264 	if (IS_ERR(d_tracer))
9265 		return NULL;
9266 
9267 	tr->options = tracefs_create_dir("options", d_tracer);
9268 	if (!tr->options) {
9269 		pr_warn("Could not create tracefs directory 'options'\n");
9270 		return NULL;
9271 	}
9272 
9273 	return tr->options;
9274 }
9275 
9276 static void
9277 create_trace_option_file(struct trace_array *tr,
9278 			 struct trace_option_dentry *topt,
9279 			 struct tracer_flags *flags,
9280 			 struct tracer_opt *opt)
9281 {
9282 	struct dentry *t_options;
9283 
9284 	t_options = trace_options_init_dentry(tr);
9285 	if (!t_options)
9286 		return;
9287 
9288 	topt->flags = flags;
9289 	topt->opt = opt;
9290 	topt->tr = tr;
9291 
9292 	topt->entry = trace_create_file(opt->name, TRACE_MODE_WRITE,
9293 					t_options, topt, &trace_options_fops);
9294 
9295 }
9296 
9297 static void
9298 create_trace_option_files(struct trace_array *tr, struct tracer *tracer)
9299 {
9300 	struct trace_option_dentry *topts;
9301 	struct trace_options *tr_topts;
9302 	struct tracer_flags *flags;
9303 	struct tracer_opt *opts;
9304 	int cnt;
9305 	int i;
9306 
9307 	if (!tracer)
9308 		return;
9309 
9310 	flags = tracer->flags;
9311 
9312 	if (!flags || !flags->opts)
9313 		return;
9314 
9315 	/*
9316 	 * If this is an instance, only create flags for tracers
9317 	 * the instance may have.
9318 	 */
9319 	if (!trace_ok_for_array(tracer, tr))
9320 		return;
9321 
9322 	for (i = 0; i < tr->nr_topts; i++) {
9323 		/* Make sure there's no duplicate flags. */
9324 		if (WARN_ON_ONCE(tr->topts[i].tracer->flags == tracer->flags))
9325 			return;
9326 	}
9327 
9328 	opts = flags->opts;
9329 
9330 	for (cnt = 0; opts[cnt].name; cnt++)
9331 		;
9332 
9333 	topts = kcalloc(cnt + 1, sizeof(*topts), GFP_KERNEL);
9334 	if (!topts)
9335 		return;
9336 
9337 	tr_topts = krealloc(tr->topts, sizeof(*tr->topts) * (tr->nr_topts + 1),
9338 			    GFP_KERNEL);
9339 	if (!tr_topts) {
9340 		kfree(topts);
9341 		return;
9342 	}
9343 
9344 	tr->topts = tr_topts;
9345 	tr->topts[tr->nr_topts].tracer = tracer;
9346 	tr->topts[tr->nr_topts].topts = topts;
9347 	tr->nr_topts++;
9348 
9349 	for (cnt = 0; opts[cnt].name; cnt++) {
9350 		create_trace_option_file(tr, &topts[cnt], flags,
9351 					 &opts[cnt]);
9352 		MEM_FAIL(topts[cnt].entry == NULL,
9353 			  "Failed to create trace option: %s",
9354 			  opts[cnt].name);
9355 	}
9356 }
9357 
9358 static struct dentry *
9359 create_trace_option_core_file(struct trace_array *tr,
9360 			      const char *option, long index)
9361 {
9362 	struct dentry *t_options;
9363 
9364 	t_options = trace_options_init_dentry(tr);
9365 	if (!t_options)
9366 		return NULL;
9367 
9368 	return trace_create_file(option, TRACE_MODE_WRITE, t_options,
9369 				 (void *)&tr->trace_flags_index[index],
9370 				 &trace_options_core_fops);
9371 }
9372 
9373 static void create_trace_options_dir(struct trace_array *tr)
9374 {
9375 	struct dentry *t_options;
9376 	bool top_level = tr == &global_trace;
9377 	int i;
9378 
9379 	t_options = trace_options_init_dentry(tr);
9380 	if (!t_options)
9381 		return;
9382 
9383 	for (i = 0; trace_options[i]; i++) {
9384 		if (top_level ||
9385 		    !((1 << i) & TOP_LEVEL_TRACE_FLAGS))
9386 			create_trace_option_core_file(tr, trace_options[i], i);
9387 	}
9388 }
9389 
9390 static ssize_t
9391 rb_simple_read(struct file *filp, char __user *ubuf,
9392 	       size_t cnt, loff_t *ppos)
9393 {
9394 	struct trace_array *tr = filp->private_data;
9395 	char buf[64];
9396 	int r;
9397 
9398 	r = tracer_tracing_is_on(tr);
9399 	r = sprintf(buf, "%d\n", r);
9400 
9401 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
9402 }
9403 
9404 static ssize_t
9405 rb_simple_write(struct file *filp, const char __user *ubuf,
9406 		size_t cnt, loff_t *ppos)
9407 {
9408 	struct trace_array *tr = filp->private_data;
9409 	struct trace_buffer *buffer = tr->array_buffer.buffer;
9410 	unsigned long val;
9411 	int ret;
9412 
9413 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
9414 	if (ret)
9415 		return ret;
9416 
9417 	if (buffer) {
9418 		mutex_lock(&trace_types_lock);
9419 		if (!!val == tracer_tracing_is_on(tr)) {
9420 			val = 0; /* do nothing */
9421 		} else if (val) {
9422 			tracer_tracing_on(tr);
9423 			if (tr->current_trace->start)
9424 				tr->current_trace->start(tr);
9425 		} else {
9426 			tracer_tracing_off(tr);
9427 			if (tr->current_trace->stop)
9428 				tr->current_trace->stop(tr);
9429 			/* Wake up any waiters */
9430 			ring_buffer_wake_waiters(buffer, RING_BUFFER_ALL_CPUS);
9431 		}
9432 		mutex_unlock(&trace_types_lock);
9433 	}
9434 
9435 	(*ppos)++;
9436 
9437 	return cnt;
9438 }
9439 
9440 static const struct file_operations rb_simple_fops = {
9441 	.open		= tracing_open_generic_tr,
9442 	.read		= rb_simple_read,
9443 	.write		= rb_simple_write,
9444 	.release	= tracing_release_generic_tr,
9445 	.llseek		= default_llseek,
9446 };
9447 
9448 static ssize_t
9449 buffer_percent_read(struct file *filp, char __user *ubuf,
9450 		    size_t cnt, loff_t *ppos)
9451 {
9452 	struct trace_array *tr = filp->private_data;
9453 	char buf[64];
9454 	int r;
9455 
9456 	r = tr->buffer_percent;
9457 	r = sprintf(buf, "%d\n", r);
9458 
9459 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
9460 }
9461 
9462 static ssize_t
9463 buffer_percent_write(struct file *filp, const char __user *ubuf,
9464 		     size_t cnt, loff_t *ppos)
9465 {
9466 	struct trace_array *tr = filp->private_data;
9467 	unsigned long val;
9468 	int ret;
9469 
9470 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
9471 	if (ret)
9472 		return ret;
9473 
9474 	if (val > 100)
9475 		return -EINVAL;
9476 
9477 	tr->buffer_percent = val;
9478 
9479 	(*ppos)++;
9480 
9481 	return cnt;
9482 }
9483 
9484 static const struct file_operations buffer_percent_fops = {
9485 	.open		= tracing_open_generic_tr,
9486 	.read		= buffer_percent_read,
9487 	.write		= buffer_percent_write,
9488 	.release	= tracing_release_generic_tr,
9489 	.llseek		= default_llseek,
9490 };
9491 
9492 static ssize_t
9493 buffer_subbuf_size_read(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos)
9494 {
9495 	struct trace_array *tr = filp->private_data;
9496 	size_t size;
9497 	char buf[64];
9498 	int order;
9499 	int r;
9500 
9501 	order = ring_buffer_subbuf_order_get(tr->array_buffer.buffer);
9502 	size = (PAGE_SIZE << order) / 1024;
9503 
9504 	r = sprintf(buf, "%zd\n", size);
9505 
9506 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
9507 }
9508 
9509 static ssize_t
9510 buffer_subbuf_size_write(struct file *filp, const char __user *ubuf,
9511 			 size_t cnt, loff_t *ppos)
9512 {
9513 	struct trace_array *tr = filp->private_data;
9514 	unsigned long val;
9515 	int old_order;
9516 	int order;
9517 	int pages;
9518 	int ret;
9519 
9520 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
9521 	if (ret)
9522 		return ret;
9523 
9524 	val *= 1024; /* value passed in is in KB */
9525 
9526 	pages = DIV_ROUND_UP(val, PAGE_SIZE);
9527 	order = fls(pages - 1);
9528 
9529 	/* limit between 1 and 128 system pages */
9530 	if (order < 0 || order > 7)
9531 		return -EINVAL;
9532 
9533 	/* Do not allow tracing while changing the order of the ring buffer */
9534 	tracing_stop_tr(tr);
9535 
9536 	old_order = ring_buffer_subbuf_order_get(tr->array_buffer.buffer);
9537 	if (old_order == order)
9538 		goto out;
9539 
9540 	ret = ring_buffer_subbuf_order_set(tr->array_buffer.buffer, order);
9541 	if (ret)
9542 		goto out;
9543 
9544 #ifdef CONFIG_TRACER_MAX_TRACE
9545 
9546 	if (!tr->allocated_snapshot)
9547 		goto out_max;
9548 
9549 	ret = ring_buffer_subbuf_order_set(tr->max_buffer.buffer, order);
9550 	if (ret) {
9551 		/* Put back the old order */
9552 		cnt = ring_buffer_subbuf_order_set(tr->array_buffer.buffer, old_order);
9553 		if (WARN_ON_ONCE(cnt)) {
9554 			/*
9555 			 * AARGH! We are left with different orders!
9556 			 * The max buffer is our "snapshot" buffer.
9557 			 * When a tracer needs a snapshot (one of the
9558 			 * latency tracers), it swaps the max buffer
9559 			 * with the saved snap shot. We succeeded to
9560 			 * update the order of the main buffer, but failed to
9561 			 * update the order of the max buffer. But when we tried
9562 			 * to reset the main buffer to the original size, we
9563 			 * failed there too. This is very unlikely to
9564 			 * happen, but if it does, warn and kill all
9565 			 * tracing.
9566 			 */
9567 			tracing_disabled = 1;
9568 		}
9569 		goto out;
9570 	}
9571  out_max:
9572 #endif
9573 	(*ppos)++;
9574  out:
9575 	if (ret)
9576 		cnt = ret;
9577 	tracing_start_tr(tr);
9578 	return cnt;
9579 }
9580 
9581 static const struct file_operations buffer_subbuf_size_fops = {
9582 	.open		= tracing_open_generic_tr,
9583 	.read		= buffer_subbuf_size_read,
9584 	.write		= buffer_subbuf_size_write,
9585 	.release	= tracing_release_generic_tr,
9586 	.llseek		= default_llseek,
9587 };
9588 
9589 static struct dentry *trace_instance_dir;
9590 
9591 static void
9592 init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer);
9593 
9594 #ifdef CONFIG_MODULES
9595 static int make_mod_delta(struct module *mod, void *data)
9596 {
9597 	struct trace_module_delta *module_delta;
9598 	struct trace_scratch *tscratch;
9599 	struct trace_mod_entry *entry;
9600 	struct trace_array *tr = data;
9601 	int i;
9602 
9603 	tscratch = tr->scratch;
9604 	module_delta = READ_ONCE(tr->module_delta);
9605 	for (i = 0; i < tscratch->nr_entries; i++) {
9606 		entry = &tscratch->entries[i];
9607 		if (strcmp(mod->name, entry->mod_name))
9608 			continue;
9609 		if (mod->state == MODULE_STATE_GOING)
9610 			module_delta->delta[i] = 0;
9611 		else
9612 			module_delta->delta[i] = (unsigned long)mod->mem[MOD_TEXT].base
9613 						 - entry->mod_addr;
9614 		break;
9615 	}
9616 	return 0;
9617 }
9618 #else
9619 static int make_mod_delta(struct module *mod, void *data)
9620 {
9621 	return 0;
9622 }
9623 #endif
9624 
9625 static int mod_addr_comp(const void *a, const void *b, const void *data)
9626 {
9627 	const struct trace_mod_entry *e1 = a;
9628 	const struct trace_mod_entry *e2 = b;
9629 
9630 	return e1->mod_addr > e2->mod_addr ? 1 : -1;
9631 }
9632 
9633 static void setup_trace_scratch(struct trace_array *tr,
9634 				struct trace_scratch *tscratch, unsigned int size)
9635 {
9636 	struct trace_module_delta *module_delta;
9637 	struct trace_mod_entry *entry;
9638 	int i, nr_entries;
9639 
9640 	if (!tscratch)
9641 		return;
9642 
9643 	tr->scratch = tscratch;
9644 	tr->scratch_size = size;
9645 
9646 	if (tscratch->text_addr)
9647 		tr->text_delta = (unsigned long)_text - tscratch->text_addr;
9648 
9649 	if (struct_size(tscratch, entries, tscratch->nr_entries) > size)
9650 		goto reset;
9651 
9652 	/* Check if each module name is a valid string */
9653 	for (i = 0; i < tscratch->nr_entries; i++) {
9654 		int n;
9655 
9656 		entry = &tscratch->entries[i];
9657 
9658 		for (n = 0; n < MODULE_NAME_LEN; n++) {
9659 			if (entry->mod_name[n] == '\0')
9660 				break;
9661 			if (!isprint(entry->mod_name[n]))
9662 				goto reset;
9663 		}
9664 		if (n == MODULE_NAME_LEN)
9665 			goto reset;
9666 	}
9667 
9668 	/* Sort the entries so that we can find appropriate module from address. */
9669 	nr_entries = tscratch->nr_entries;
9670 	sort_r(tscratch->entries, nr_entries, sizeof(struct trace_mod_entry),
9671 	       mod_addr_comp, NULL, NULL);
9672 
9673 	if (IS_ENABLED(CONFIG_MODULES)) {
9674 		module_delta = kzalloc(struct_size(module_delta, delta, nr_entries), GFP_KERNEL);
9675 		if (!module_delta) {
9676 			pr_info("module_delta allocation failed. Not able to decode module address.");
9677 			goto reset;
9678 		}
9679 		init_rcu_head(&module_delta->rcu);
9680 	} else
9681 		module_delta = NULL;
9682 	WRITE_ONCE(tr->module_delta, module_delta);
9683 
9684 	/* Scan modules to make text delta for modules. */
9685 	module_for_each_mod(make_mod_delta, tr);
9686 
9687 	/* Set trace_clock as the same of the previous boot. */
9688 	if (tscratch->clock_id != tr->clock_id) {
9689 		if (tscratch->clock_id >= ARRAY_SIZE(trace_clocks) ||
9690 		    tracing_set_clock(tr, trace_clocks[tscratch->clock_id].name) < 0) {
9691 			pr_info("the previous trace_clock info is not valid.");
9692 			goto reset;
9693 		}
9694 	}
9695 	return;
9696  reset:
9697 	/* Invalid trace modules */
9698 	memset(tscratch, 0, size);
9699 }
9700 
9701 static int
9702 allocate_trace_buffer(struct trace_array *tr, struct array_buffer *buf, int size)
9703 {
9704 	enum ring_buffer_flags rb_flags;
9705 	struct trace_scratch *tscratch;
9706 	unsigned int scratch_size = 0;
9707 
9708 	rb_flags = tr->trace_flags & TRACE_ITER_OVERWRITE ? RB_FL_OVERWRITE : 0;
9709 
9710 	buf->tr = tr;
9711 
9712 	if (tr->range_addr_start && tr->range_addr_size) {
9713 		/* Add scratch buffer to handle 128 modules */
9714 		buf->buffer = ring_buffer_alloc_range(size, rb_flags, 0,
9715 						      tr->range_addr_start,
9716 						      tr->range_addr_size,
9717 						      struct_size(tscratch, entries, 128));
9718 
9719 		tscratch = ring_buffer_meta_scratch(buf->buffer, &scratch_size);
9720 		setup_trace_scratch(tr, tscratch, scratch_size);
9721 
9722 		/*
9723 		 * This is basically the same as a mapped buffer,
9724 		 * with the same restrictions.
9725 		 */
9726 		tr->mapped++;
9727 	} else {
9728 		buf->buffer = ring_buffer_alloc(size, rb_flags);
9729 	}
9730 	if (!buf->buffer)
9731 		return -ENOMEM;
9732 
9733 	buf->data = alloc_percpu(struct trace_array_cpu);
9734 	if (!buf->data) {
9735 		ring_buffer_free(buf->buffer);
9736 		buf->buffer = NULL;
9737 		return -ENOMEM;
9738 	}
9739 
9740 	/* Allocate the first page for all buffers */
9741 	set_buffer_entries(&tr->array_buffer,
9742 			   ring_buffer_size(tr->array_buffer.buffer, 0));
9743 
9744 	return 0;
9745 }
9746 
9747 static void free_trace_buffer(struct array_buffer *buf)
9748 {
9749 	if (buf->buffer) {
9750 		ring_buffer_free(buf->buffer);
9751 		buf->buffer = NULL;
9752 		free_percpu(buf->data);
9753 		buf->data = NULL;
9754 	}
9755 }
9756 
9757 static int allocate_trace_buffers(struct trace_array *tr, int size)
9758 {
9759 	int ret;
9760 
9761 	ret = allocate_trace_buffer(tr, &tr->array_buffer, size);
9762 	if (ret)
9763 		return ret;
9764 
9765 #ifdef CONFIG_TRACER_MAX_TRACE
9766 	/* Fix mapped buffer trace arrays do not have snapshot buffers */
9767 	if (tr->range_addr_start)
9768 		return 0;
9769 
9770 	ret = allocate_trace_buffer(tr, &tr->max_buffer,
9771 				    allocate_snapshot ? size : 1);
9772 	if (MEM_FAIL(ret, "Failed to allocate trace buffer\n")) {
9773 		free_trace_buffer(&tr->array_buffer);
9774 		return -ENOMEM;
9775 	}
9776 	tr->allocated_snapshot = allocate_snapshot;
9777 
9778 	allocate_snapshot = false;
9779 #endif
9780 
9781 	return 0;
9782 }
9783 
9784 static void free_trace_buffers(struct trace_array *tr)
9785 {
9786 	if (!tr)
9787 		return;
9788 
9789 	free_trace_buffer(&tr->array_buffer);
9790 	kfree(tr->module_delta);
9791 
9792 #ifdef CONFIG_TRACER_MAX_TRACE
9793 	free_trace_buffer(&tr->max_buffer);
9794 #endif
9795 }
9796 
9797 static void init_trace_flags_index(struct trace_array *tr)
9798 {
9799 	int i;
9800 
9801 	/* Used by the trace options files */
9802 	for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++)
9803 		tr->trace_flags_index[i] = i;
9804 }
9805 
9806 static void __update_tracer_options(struct trace_array *tr)
9807 {
9808 	struct tracer *t;
9809 
9810 	for (t = trace_types; t; t = t->next)
9811 		add_tracer_options(tr, t);
9812 }
9813 
9814 static void update_tracer_options(struct trace_array *tr)
9815 {
9816 	mutex_lock(&trace_types_lock);
9817 	tracer_options_updated = true;
9818 	__update_tracer_options(tr);
9819 	mutex_unlock(&trace_types_lock);
9820 }
9821 
9822 /* Must have trace_types_lock held */
9823 struct trace_array *trace_array_find(const char *instance)
9824 {
9825 	struct trace_array *tr, *found = NULL;
9826 
9827 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
9828 		if (tr->name && strcmp(tr->name, instance) == 0) {
9829 			found = tr;
9830 			break;
9831 		}
9832 	}
9833 
9834 	return found;
9835 }
9836 
9837 struct trace_array *trace_array_find_get(const char *instance)
9838 {
9839 	struct trace_array *tr;
9840 
9841 	mutex_lock(&trace_types_lock);
9842 	tr = trace_array_find(instance);
9843 	if (tr)
9844 		tr->ref++;
9845 	mutex_unlock(&trace_types_lock);
9846 
9847 	return tr;
9848 }
9849 
9850 static int trace_array_create_dir(struct trace_array *tr)
9851 {
9852 	int ret;
9853 
9854 	tr->dir = tracefs_create_dir(tr->name, trace_instance_dir);
9855 	if (!tr->dir)
9856 		return -EINVAL;
9857 
9858 	ret = event_trace_add_tracer(tr->dir, tr);
9859 	if (ret) {
9860 		tracefs_remove(tr->dir);
9861 		return ret;
9862 	}
9863 
9864 	init_tracer_tracefs(tr, tr->dir);
9865 	__update_tracer_options(tr);
9866 
9867 	return ret;
9868 }
9869 
9870 static struct trace_array *
9871 trace_array_create_systems(const char *name, const char *systems,
9872 			   unsigned long range_addr_start,
9873 			   unsigned long range_addr_size)
9874 {
9875 	struct trace_array *tr;
9876 	int ret;
9877 
9878 	ret = -ENOMEM;
9879 	tr = kzalloc(sizeof(*tr), GFP_KERNEL);
9880 	if (!tr)
9881 		return ERR_PTR(ret);
9882 
9883 	tr->name = kstrdup(name, GFP_KERNEL);
9884 	if (!tr->name)
9885 		goto out_free_tr;
9886 
9887 	if (!alloc_cpumask_var(&tr->tracing_cpumask, GFP_KERNEL))
9888 		goto out_free_tr;
9889 
9890 	if (!zalloc_cpumask_var(&tr->pipe_cpumask, GFP_KERNEL))
9891 		goto out_free_tr;
9892 
9893 	if (systems) {
9894 		tr->system_names = kstrdup_const(systems, GFP_KERNEL);
9895 		if (!tr->system_names)
9896 			goto out_free_tr;
9897 	}
9898 
9899 	/* Only for boot up memory mapped ring buffers */
9900 	tr->range_addr_start = range_addr_start;
9901 	tr->range_addr_size = range_addr_size;
9902 
9903 	tr->trace_flags = global_trace.trace_flags & ~ZEROED_TRACE_FLAGS;
9904 
9905 	cpumask_copy(tr->tracing_cpumask, cpu_all_mask);
9906 
9907 	raw_spin_lock_init(&tr->start_lock);
9908 
9909 	tr->max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
9910 #ifdef CONFIG_TRACER_MAX_TRACE
9911 	spin_lock_init(&tr->snapshot_trigger_lock);
9912 #endif
9913 	tr->current_trace = &nop_trace;
9914 
9915 	INIT_LIST_HEAD(&tr->systems);
9916 	INIT_LIST_HEAD(&tr->events);
9917 	INIT_LIST_HEAD(&tr->hist_vars);
9918 	INIT_LIST_HEAD(&tr->err_log);
9919 	INIT_LIST_HEAD(&tr->marker_list);
9920 
9921 #ifdef CONFIG_MODULES
9922 	INIT_LIST_HEAD(&tr->mod_events);
9923 #endif
9924 
9925 	if (allocate_trace_buffers(tr, trace_buf_size) < 0)
9926 		goto out_free_tr;
9927 
9928 	/* The ring buffer is defaultly expanded */
9929 	trace_set_ring_buffer_expanded(tr);
9930 
9931 	if (ftrace_allocate_ftrace_ops(tr) < 0)
9932 		goto out_free_tr;
9933 
9934 	ftrace_init_trace_array(tr);
9935 
9936 	init_trace_flags_index(tr);
9937 
9938 	if (trace_instance_dir) {
9939 		ret = trace_array_create_dir(tr);
9940 		if (ret)
9941 			goto out_free_tr;
9942 	} else
9943 		__trace_early_add_events(tr);
9944 
9945 	list_add(&tr->list, &ftrace_trace_arrays);
9946 
9947 	tr->ref++;
9948 
9949 	return tr;
9950 
9951  out_free_tr:
9952 	ftrace_free_ftrace_ops(tr);
9953 	free_trace_buffers(tr);
9954 	free_cpumask_var(tr->pipe_cpumask);
9955 	free_cpumask_var(tr->tracing_cpumask);
9956 	kfree_const(tr->system_names);
9957 	kfree(tr->range_name);
9958 	kfree(tr->name);
9959 	kfree(tr);
9960 
9961 	return ERR_PTR(ret);
9962 }
9963 
9964 static struct trace_array *trace_array_create(const char *name)
9965 {
9966 	return trace_array_create_systems(name, NULL, 0, 0);
9967 }
9968 
9969 static int instance_mkdir(const char *name)
9970 {
9971 	struct trace_array *tr;
9972 	int ret;
9973 
9974 	guard(mutex)(&event_mutex);
9975 	guard(mutex)(&trace_types_lock);
9976 
9977 	ret = -EEXIST;
9978 	if (trace_array_find(name))
9979 		return -EEXIST;
9980 
9981 	tr = trace_array_create(name);
9982 
9983 	ret = PTR_ERR_OR_ZERO(tr);
9984 
9985 	return ret;
9986 }
9987 
9988 #ifdef CONFIG_MMU
9989 static u64 map_pages(unsigned long start, unsigned long size)
9990 {
9991 	unsigned long vmap_start, vmap_end;
9992 	struct vm_struct *area;
9993 	int ret;
9994 
9995 	area = get_vm_area(size, VM_IOREMAP);
9996 	if (!area)
9997 		return 0;
9998 
9999 	vmap_start = (unsigned long) area->addr;
10000 	vmap_end = vmap_start + size;
10001 
10002 	ret = vmap_page_range(vmap_start, vmap_end,
10003 			      start, pgprot_nx(PAGE_KERNEL));
10004 	if (ret < 0) {
10005 		free_vm_area(area);
10006 		return 0;
10007 	}
10008 
10009 	return (u64)vmap_start;
10010 }
10011 #else
10012 static inline u64 map_pages(unsigned long start, unsigned long size)
10013 {
10014 	return 0;
10015 }
10016 #endif
10017 
10018 /**
10019  * trace_array_get_by_name - Create/Lookup a trace array, given its name.
10020  * @name: The name of the trace array to be looked up/created.
10021  * @systems: A list of systems to create event directories for (NULL for all)
10022  *
10023  * Returns pointer to trace array with given name.
10024  * NULL, if it cannot be created.
10025  *
10026  * NOTE: This function increments the reference counter associated with the
10027  * trace array returned. This makes sure it cannot be freed while in use.
10028  * Use trace_array_put() once the trace array is no longer needed.
10029  * If the trace_array is to be freed, trace_array_destroy() needs to
10030  * be called after the trace_array_put(), or simply let user space delete
10031  * it from the tracefs instances directory. But until the
10032  * trace_array_put() is called, user space can not delete it.
10033  *
10034  */
10035 struct trace_array *trace_array_get_by_name(const char *name, const char *systems)
10036 {
10037 	struct trace_array *tr;
10038 
10039 	guard(mutex)(&event_mutex);
10040 	guard(mutex)(&trace_types_lock);
10041 
10042 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
10043 		if (tr->name && strcmp(tr->name, name) == 0) {
10044 			tr->ref++;
10045 			return tr;
10046 		}
10047 	}
10048 
10049 	tr = trace_array_create_systems(name, systems, 0, 0);
10050 
10051 	if (IS_ERR(tr))
10052 		tr = NULL;
10053 	else
10054 		tr->ref++;
10055 
10056 	return tr;
10057 }
10058 EXPORT_SYMBOL_GPL(trace_array_get_by_name);
10059 
10060 static int __remove_instance(struct trace_array *tr)
10061 {
10062 	int i;
10063 
10064 	/* Reference counter for a newly created trace array = 1. */
10065 	if (tr->ref > 1 || (tr->current_trace && tr->trace_ref))
10066 		return -EBUSY;
10067 
10068 	list_del(&tr->list);
10069 
10070 	/* Disable all the flags that were enabled coming in */
10071 	for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++) {
10072 		if ((1 << i) & ZEROED_TRACE_FLAGS)
10073 			set_tracer_flag(tr, 1 << i, 0);
10074 	}
10075 
10076 	if (printk_trace == tr)
10077 		update_printk_trace(&global_trace);
10078 
10079 	if (update_marker_trace(tr, 0))
10080 		synchronize_rcu();
10081 
10082 	tracing_set_nop(tr);
10083 	clear_ftrace_function_probes(tr);
10084 	event_trace_del_tracer(tr);
10085 	ftrace_clear_pids(tr);
10086 	ftrace_destroy_function_files(tr);
10087 	tracefs_remove(tr->dir);
10088 	free_percpu(tr->last_func_repeats);
10089 	free_trace_buffers(tr);
10090 	clear_tracing_err_log(tr);
10091 
10092 	if (tr->range_name) {
10093 		reserve_mem_release_by_name(tr->range_name);
10094 		kfree(tr->range_name);
10095 	}
10096 
10097 	for (i = 0; i < tr->nr_topts; i++) {
10098 		kfree(tr->topts[i].topts);
10099 	}
10100 	kfree(tr->topts);
10101 
10102 	free_cpumask_var(tr->pipe_cpumask);
10103 	free_cpumask_var(tr->tracing_cpumask);
10104 	kfree_const(tr->system_names);
10105 	kfree(tr->name);
10106 	kfree(tr);
10107 
10108 	return 0;
10109 }
10110 
10111 int trace_array_destroy(struct trace_array *this_tr)
10112 {
10113 	struct trace_array *tr;
10114 
10115 	if (!this_tr)
10116 		return -EINVAL;
10117 
10118 	guard(mutex)(&event_mutex);
10119 	guard(mutex)(&trace_types_lock);
10120 
10121 
10122 	/* Making sure trace array exists before destroying it. */
10123 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
10124 		if (tr == this_tr)
10125 			return __remove_instance(tr);
10126 	}
10127 
10128 	return -ENODEV;
10129 }
10130 EXPORT_SYMBOL_GPL(trace_array_destroy);
10131 
10132 static int instance_rmdir(const char *name)
10133 {
10134 	struct trace_array *tr;
10135 
10136 	guard(mutex)(&event_mutex);
10137 	guard(mutex)(&trace_types_lock);
10138 
10139 	tr = trace_array_find(name);
10140 	if (!tr)
10141 		return -ENODEV;
10142 
10143 	return __remove_instance(tr);
10144 }
10145 
10146 static __init void create_trace_instances(struct dentry *d_tracer)
10147 {
10148 	struct trace_array *tr;
10149 
10150 	trace_instance_dir = tracefs_create_instance_dir("instances", d_tracer,
10151 							 instance_mkdir,
10152 							 instance_rmdir);
10153 	if (MEM_FAIL(!trace_instance_dir, "Failed to create instances directory\n"))
10154 		return;
10155 
10156 	guard(mutex)(&event_mutex);
10157 	guard(mutex)(&trace_types_lock);
10158 
10159 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
10160 		if (!tr->name)
10161 			continue;
10162 		if (MEM_FAIL(trace_array_create_dir(tr) < 0,
10163 			     "Failed to create instance directory\n"))
10164 			return;
10165 	}
10166 }
10167 
10168 static void
10169 init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer)
10170 {
10171 	int cpu;
10172 
10173 	trace_create_file("available_tracers", TRACE_MODE_READ, d_tracer,
10174 			tr, &show_traces_fops);
10175 
10176 	trace_create_file("current_tracer", TRACE_MODE_WRITE, d_tracer,
10177 			tr, &set_tracer_fops);
10178 
10179 	trace_create_file("tracing_cpumask", TRACE_MODE_WRITE, d_tracer,
10180 			  tr, &tracing_cpumask_fops);
10181 
10182 	trace_create_file("trace_options", TRACE_MODE_WRITE, d_tracer,
10183 			  tr, &tracing_iter_fops);
10184 
10185 	trace_create_file("trace", TRACE_MODE_WRITE, d_tracer,
10186 			  tr, &tracing_fops);
10187 
10188 	trace_create_file("trace_pipe", TRACE_MODE_READ, d_tracer,
10189 			  tr, &tracing_pipe_fops);
10190 
10191 	trace_create_file("buffer_size_kb", TRACE_MODE_WRITE, d_tracer,
10192 			  tr, &tracing_entries_fops);
10193 
10194 	trace_create_file("buffer_total_size_kb", TRACE_MODE_READ, d_tracer,
10195 			  tr, &tracing_total_entries_fops);
10196 
10197 	trace_create_file("free_buffer", 0200, d_tracer,
10198 			  tr, &tracing_free_buffer_fops);
10199 
10200 	trace_create_file("trace_marker", 0220, d_tracer,
10201 			  tr, &tracing_mark_fops);
10202 
10203 	tr->trace_marker_file = __find_event_file(tr, "ftrace", "print");
10204 
10205 	trace_create_file("trace_marker_raw", 0220, d_tracer,
10206 			  tr, &tracing_mark_raw_fops);
10207 
10208 	trace_create_file("trace_clock", TRACE_MODE_WRITE, d_tracer, tr,
10209 			  &trace_clock_fops);
10210 
10211 	trace_create_file("tracing_on", TRACE_MODE_WRITE, d_tracer,
10212 			  tr, &rb_simple_fops);
10213 
10214 	trace_create_file("timestamp_mode", TRACE_MODE_READ, d_tracer, tr,
10215 			  &trace_time_stamp_mode_fops);
10216 
10217 	tr->buffer_percent = 50;
10218 
10219 	trace_create_file("buffer_percent", TRACE_MODE_WRITE, d_tracer,
10220 			tr, &buffer_percent_fops);
10221 
10222 	trace_create_file("buffer_subbuf_size_kb", TRACE_MODE_WRITE, d_tracer,
10223 			  tr, &buffer_subbuf_size_fops);
10224 
10225 	create_trace_options_dir(tr);
10226 
10227 #ifdef CONFIG_TRACER_MAX_TRACE
10228 	trace_create_maxlat_file(tr, d_tracer);
10229 #endif
10230 
10231 	if (ftrace_create_function_files(tr, d_tracer))
10232 		MEM_FAIL(1, "Could not allocate function filter files");
10233 
10234 	if (tr->range_addr_start) {
10235 		trace_create_file("last_boot_info", TRACE_MODE_READ, d_tracer,
10236 				  tr, &last_boot_fops);
10237 #ifdef CONFIG_TRACER_SNAPSHOT
10238 	} else {
10239 		trace_create_file("snapshot", TRACE_MODE_WRITE, d_tracer,
10240 				  tr, &snapshot_fops);
10241 #endif
10242 	}
10243 
10244 	trace_create_file("error_log", TRACE_MODE_WRITE, d_tracer,
10245 			  tr, &tracing_err_log_fops);
10246 
10247 	for_each_tracing_cpu(cpu)
10248 		tracing_init_tracefs_percpu(tr, cpu);
10249 
10250 	ftrace_init_tracefs(tr, d_tracer);
10251 }
10252 
10253 static struct vfsmount *trace_automount(struct dentry *mntpt, void *ingore)
10254 {
10255 	struct vfsmount *mnt;
10256 	struct file_system_type *type;
10257 	struct fs_context *fc;
10258 	int ret;
10259 
10260 	/*
10261 	 * To maintain backward compatibility for tools that mount
10262 	 * debugfs to get to the tracing facility, tracefs is automatically
10263 	 * mounted to the debugfs/tracing directory.
10264 	 */
10265 	type = get_fs_type("tracefs");
10266 	if (!type)
10267 		return NULL;
10268 
10269 	fc = fs_context_for_submount(type, mntpt);
10270 	put_filesystem(type);
10271 	if (IS_ERR(fc))
10272 		return ERR_CAST(fc);
10273 
10274 	ret = vfs_parse_fs_string(fc, "source",
10275 				  "tracefs", strlen("tracefs"));
10276 	if (!ret)
10277 		mnt = fc_mount(fc);
10278 	else
10279 		mnt = ERR_PTR(ret);
10280 
10281 	put_fs_context(fc);
10282 	return mnt;
10283 }
10284 
10285 /**
10286  * tracing_init_dentry - initialize top level trace array
10287  *
10288  * This is called when creating files or directories in the tracing
10289  * directory. It is called via fs_initcall() by any of the boot up code
10290  * and expects to return the dentry of the top level tracing directory.
10291  */
10292 int tracing_init_dentry(void)
10293 {
10294 	struct trace_array *tr = &global_trace;
10295 
10296 	if (security_locked_down(LOCKDOWN_TRACEFS)) {
10297 		pr_warn("Tracing disabled due to lockdown\n");
10298 		return -EPERM;
10299 	}
10300 
10301 	/* The top level trace array uses  NULL as parent */
10302 	if (tr->dir)
10303 		return 0;
10304 
10305 	if (WARN_ON(!tracefs_initialized()))
10306 		return -ENODEV;
10307 
10308 	/*
10309 	 * As there may still be users that expect the tracing
10310 	 * files to exist in debugfs/tracing, we must automount
10311 	 * the tracefs file system there, so older tools still
10312 	 * work with the newer kernel.
10313 	 */
10314 	tr->dir = debugfs_create_automount("tracing", NULL,
10315 					   trace_automount, NULL);
10316 
10317 	return 0;
10318 }
10319 
10320 extern struct trace_eval_map *__start_ftrace_eval_maps[];
10321 extern struct trace_eval_map *__stop_ftrace_eval_maps[];
10322 
10323 static struct workqueue_struct *eval_map_wq __initdata;
10324 static struct work_struct eval_map_work __initdata;
10325 static struct work_struct tracerfs_init_work __initdata;
10326 
10327 static void __init eval_map_work_func(struct work_struct *work)
10328 {
10329 	int len;
10330 
10331 	len = __stop_ftrace_eval_maps - __start_ftrace_eval_maps;
10332 	trace_insert_eval_map(NULL, __start_ftrace_eval_maps, len);
10333 }
10334 
10335 static int __init trace_eval_init(void)
10336 {
10337 	INIT_WORK(&eval_map_work, eval_map_work_func);
10338 
10339 	eval_map_wq = alloc_workqueue("eval_map_wq", WQ_UNBOUND, 0);
10340 	if (!eval_map_wq) {
10341 		pr_err("Unable to allocate eval_map_wq\n");
10342 		/* Do work here */
10343 		eval_map_work_func(&eval_map_work);
10344 		return -ENOMEM;
10345 	}
10346 
10347 	queue_work(eval_map_wq, &eval_map_work);
10348 	return 0;
10349 }
10350 
10351 subsys_initcall(trace_eval_init);
10352 
10353 static int __init trace_eval_sync(void)
10354 {
10355 	/* Make sure the eval map updates are finished */
10356 	if (eval_map_wq)
10357 		destroy_workqueue(eval_map_wq);
10358 	return 0;
10359 }
10360 
10361 late_initcall_sync(trace_eval_sync);
10362 
10363 
10364 #ifdef CONFIG_MODULES
10365 
10366 bool module_exists(const char *module)
10367 {
10368 	/* All modules have the symbol __this_module */
10369 	static const char this_mod[] = "__this_module";
10370 	char modname[MAX_PARAM_PREFIX_LEN + sizeof(this_mod) + 2];
10371 	unsigned long val;
10372 	int n;
10373 
10374 	n = snprintf(modname, sizeof(modname), "%s:%s", module, this_mod);
10375 
10376 	if (n > sizeof(modname) - 1)
10377 		return false;
10378 
10379 	val = module_kallsyms_lookup_name(modname);
10380 	return val != 0;
10381 }
10382 
10383 static void trace_module_add_evals(struct module *mod)
10384 {
10385 	if (!mod->num_trace_evals)
10386 		return;
10387 
10388 	/*
10389 	 * Modules with bad taint do not have events created, do
10390 	 * not bother with enums either.
10391 	 */
10392 	if (trace_module_has_bad_taint(mod))
10393 		return;
10394 
10395 	trace_insert_eval_map(mod, mod->trace_evals, mod->num_trace_evals);
10396 }
10397 
10398 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
10399 static void trace_module_remove_evals(struct module *mod)
10400 {
10401 	union trace_eval_map_item *map;
10402 	union trace_eval_map_item **last = &trace_eval_maps;
10403 
10404 	if (!mod->num_trace_evals)
10405 		return;
10406 
10407 	guard(mutex)(&trace_eval_mutex);
10408 
10409 	map = trace_eval_maps;
10410 
10411 	while (map) {
10412 		if (map->head.mod == mod)
10413 			break;
10414 		map = trace_eval_jmp_to_tail(map);
10415 		last = &map->tail.next;
10416 		map = map->tail.next;
10417 	}
10418 	if (!map)
10419 		return;
10420 
10421 	*last = trace_eval_jmp_to_tail(map)->tail.next;
10422 	kfree(map);
10423 }
10424 #else
10425 static inline void trace_module_remove_evals(struct module *mod) { }
10426 #endif /* CONFIG_TRACE_EVAL_MAP_FILE */
10427 
10428 static void trace_module_record(struct module *mod, bool add)
10429 {
10430 	struct trace_array *tr;
10431 	unsigned long flags;
10432 
10433 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
10434 		flags = tr->flags & (TRACE_ARRAY_FL_BOOT | TRACE_ARRAY_FL_LAST_BOOT);
10435 		/* Update any persistent trace array that has already been started */
10436 		if (flags == TRACE_ARRAY_FL_BOOT && add) {
10437 			guard(mutex)(&scratch_mutex);
10438 			save_mod(mod, tr);
10439 		} else if (flags & TRACE_ARRAY_FL_LAST_BOOT) {
10440 			/* Update delta if the module loaded in previous boot */
10441 			make_mod_delta(mod, tr);
10442 		}
10443 	}
10444 }
10445 
10446 static int trace_module_notify(struct notifier_block *self,
10447 			       unsigned long val, void *data)
10448 {
10449 	struct module *mod = data;
10450 
10451 	switch (val) {
10452 	case MODULE_STATE_COMING:
10453 		trace_module_add_evals(mod);
10454 		trace_module_record(mod, true);
10455 		break;
10456 	case MODULE_STATE_GOING:
10457 		trace_module_remove_evals(mod);
10458 		trace_module_record(mod, false);
10459 		break;
10460 	}
10461 
10462 	return NOTIFY_OK;
10463 }
10464 
10465 static struct notifier_block trace_module_nb = {
10466 	.notifier_call = trace_module_notify,
10467 	.priority = 0,
10468 };
10469 #endif /* CONFIG_MODULES */
10470 
10471 static __init void tracer_init_tracefs_work_func(struct work_struct *work)
10472 {
10473 
10474 	event_trace_init();
10475 
10476 	init_tracer_tracefs(&global_trace, NULL);
10477 	ftrace_init_tracefs_toplevel(&global_trace, NULL);
10478 
10479 	trace_create_file("tracing_thresh", TRACE_MODE_WRITE, NULL,
10480 			&global_trace, &tracing_thresh_fops);
10481 
10482 	trace_create_file("README", TRACE_MODE_READ, NULL,
10483 			NULL, &tracing_readme_fops);
10484 
10485 	trace_create_file("saved_cmdlines", TRACE_MODE_READ, NULL,
10486 			NULL, &tracing_saved_cmdlines_fops);
10487 
10488 	trace_create_file("saved_cmdlines_size", TRACE_MODE_WRITE, NULL,
10489 			  NULL, &tracing_saved_cmdlines_size_fops);
10490 
10491 	trace_create_file("saved_tgids", TRACE_MODE_READ, NULL,
10492 			NULL, &tracing_saved_tgids_fops);
10493 
10494 	trace_create_eval_file(NULL);
10495 
10496 #ifdef CONFIG_MODULES
10497 	register_module_notifier(&trace_module_nb);
10498 #endif
10499 
10500 #ifdef CONFIG_DYNAMIC_FTRACE
10501 	trace_create_file("dyn_ftrace_total_info", TRACE_MODE_READ, NULL,
10502 			NULL, &tracing_dyn_info_fops);
10503 #endif
10504 
10505 	create_trace_instances(NULL);
10506 
10507 	update_tracer_options(&global_trace);
10508 }
10509 
10510 static __init int tracer_init_tracefs(void)
10511 {
10512 	int ret;
10513 
10514 	trace_access_lock_init();
10515 
10516 	ret = tracing_init_dentry();
10517 	if (ret)
10518 		return 0;
10519 
10520 	if (eval_map_wq) {
10521 		INIT_WORK(&tracerfs_init_work, tracer_init_tracefs_work_func);
10522 		queue_work(eval_map_wq, &tracerfs_init_work);
10523 	} else {
10524 		tracer_init_tracefs_work_func(NULL);
10525 	}
10526 
10527 	rv_init_interface();
10528 
10529 	return 0;
10530 }
10531 
10532 fs_initcall(tracer_init_tracefs);
10533 
10534 static int trace_die_panic_handler(struct notifier_block *self,
10535 				unsigned long ev, void *unused);
10536 
10537 static struct notifier_block trace_panic_notifier = {
10538 	.notifier_call = trace_die_panic_handler,
10539 	.priority = INT_MAX - 1,
10540 };
10541 
10542 static struct notifier_block trace_die_notifier = {
10543 	.notifier_call = trace_die_panic_handler,
10544 	.priority = INT_MAX - 1,
10545 };
10546 
10547 /*
10548  * The idea is to execute the following die/panic callback early, in order
10549  * to avoid showing irrelevant information in the trace (like other panic
10550  * notifier functions); we are the 2nd to run, after hung_task/rcu_stall
10551  * warnings get disabled (to prevent potential log flooding).
10552  */
10553 static int trace_die_panic_handler(struct notifier_block *self,
10554 				unsigned long ev, void *unused)
10555 {
10556 	if (!ftrace_dump_on_oops_enabled())
10557 		return NOTIFY_DONE;
10558 
10559 	/* The die notifier requires DIE_OOPS to trigger */
10560 	if (self == &trace_die_notifier && ev != DIE_OOPS)
10561 		return NOTIFY_DONE;
10562 
10563 	ftrace_dump(DUMP_PARAM);
10564 
10565 	return NOTIFY_DONE;
10566 }
10567 
10568 /*
10569  * printk is set to max of 1024, we really don't need it that big.
10570  * Nothing should be printing 1000 characters anyway.
10571  */
10572 #define TRACE_MAX_PRINT		1000
10573 
10574 /*
10575  * Define here KERN_TRACE so that we have one place to modify
10576  * it if we decide to change what log level the ftrace dump
10577  * should be at.
10578  */
10579 #define KERN_TRACE		KERN_EMERG
10580 
10581 void
10582 trace_printk_seq(struct trace_seq *s)
10583 {
10584 	/* Probably should print a warning here. */
10585 	if (s->seq.len >= TRACE_MAX_PRINT)
10586 		s->seq.len = TRACE_MAX_PRINT;
10587 
10588 	/*
10589 	 * More paranoid code. Although the buffer size is set to
10590 	 * PAGE_SIZE, and TRACE_MAX_PRINT is 1000, this is just
10591 	 * an extra layer of protection.
10592 	 */
10593 	if (WARN_ON_ONCE(s->seq.len >= s->seq.size))
10594 		s->seq.len = s->seq.size - 1;
10595 
10596 	/* should be zero ended, but we are paranoid. */
10597 	s->buffer[s->seq.len] = 0;
10598 
10599 	printk(KERN_TRACE "%s", s->buffer);
10600 
10601 	trace_seq_init(s);
10602 }
10603 
10604 static void trace_init_iter(struct trace_iterator *iter, struct trace_array *tr)
10605 {
10606 	iter->tr = tr;
10607 	iter->trace = iter->tr->current_trace;
10608 	iter->cpu_file = RING_BUFFER_ALL_CPUS;
10609 	iter->array_buffer = &tr->array_buffer;
10610 
10611 	if (iter->trace && iter->trace->open)
10612 		iter->trace->open(iter);
10613 
10614 	/* Annotate start of buffers if we had overruns */
10615 	if (ring_buffer_overruns(iter->array_buffer->buffer))
10616 		iter->iter_flags |= TRACE_FILE_ANNOTATE;
10617 
10618 	/* Output in nanoseconds only if we are using a clock in nanoseconds. */
10619 	if (trace_clocks[iter->tr->clock_id].in_ns)
10620 		iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
10621 
10622 	/* Can not use kmalloc for iter.temp and iter.fmt */
10623 	iter->temp = static_temp_buf;
10624 	iter->temp_size = STATIC_TEMP_BUF_SIZE;
10625 	iter->fmt = static_fmt_buf;
10626 	iter->fmt_size = STATIC_FMT_BUF_SIZE;
10627 }
10628 
10629 void trace_init_global_iter(struct trace_iterator *iter)
10630 {
10631 	trace_init_iter(iter, &global_trace);
10632 }
10633 
10634 static void ftrace_dump_one(struct trace_array *tr, enum ftrace_dump_mode dump_mode)
10635 {
10636 	/* use static because iter can be a bit big for the stack */
10637 	static struct trace_iterator iter;
10638 	unsigned int old_userobj;
10639 	unsigned long flags;
10640 	int cnt = 0;
10641 
10642 	/*
10643 	 * Always turn off tracing when we dump.
10644 	 * We don't need to show trace output of what happens
10645 	 * between multiple crashes.
10646 	 *
10647 	 * If the user does a sysrq-z, then they can re-enable
10648 	 * tracing with echo 1 > tracing_on.
10649 	 */
10650 	tracer_tracing_off(tr);
10651 
10652 	local_irq_save(flags);
10653 
10654 	/* Simulate the iterator */
10655 	trace_init_iter(&iter, tr);
10656 
10657 	/* While dumping, do not allow the buffer to be enable */
10658 	tracer_tracing_disable(tr);
10659 
10660 	old_userobj = tr->trace_flags & TRACE_ITER_SYM_USEROBJ;
10661 
10662 	/* don't look at user memory in panic mode */
10663 	tr->trace_flags &= ~TRACE_ITER_SYM_USEROBJ;
10664 
10665 	if (dump_mode == DUMP_ORIG)
10666 		iter.cpu_file = raw_smp_processor_id();
10667 	else
10668 		iter.cpu_file = RING_BUFFER_ALL_CPUS;
10669 
10670 	if (tr == &global_trace)
10671 		printk(KERN_TRACE "Dumping ftrace buffer:\n");
10672 	else
10673 		printk(KERN_TRACE "Dumping ftrace instance %s buffer:\n", tr->name);
10674 
10675 	/* Did function tracer already get disabled? */
10676 	if (ftrace_is_dead()) {
10677 		printk("# WARNING: FUNCTION TRACING IS CORRUPTED\n");
10678 		printk("#          MAY BE MISSING FUNCTION EVENTS\n");
10679 	}
10680 
10681 	/*
10682 	 * We need to stop all tracing on all CPUS to read
10683 	 * the next buffer. This is a bit expensive, but is
10684 	 * not done often. We fill all what we can read,
10685 	 * and then release the locks again.
10686 	 */
10687 
10688 	while (!trace_empty(&iter)) {
10689 
10690 		if (!cnt)
10691 			printk(KERN_TRACE "---------------------------------\n");
10692 
10693 		cnt++;
10694 
10695 		trace_iterator_reset(&iter);
10696 		iter.iter_flags |= TRACE_FILE_LAT_FMT;
10697 
10698 		if (trace_find_next_entry_inc(&iter) != NULL) {
10699 			int ret;
10700 
10701 			ret = print_trace_line(&iter);
10702 			if (ret != TRACE_TYPE_NO_CONSUME)
10703 				trace_consume(&iter);
10704 		}
10705 		touch_nmi_watchdog();
10706 
10707 		trace_printk_seq(&iter.seq);
10708 	}
10709 
10710 	if (!cnt)
10711 		printk(KERN_TRACE "   (ftrace buffer empty)\n");
10712 	else
10713 		printk(KERN_TRACE "---------------------------------\n");
10714 
10715 	tr->trace_flags |= old_userobj;
10716 
10717 	tracer_tracing_enable(tr);
10718 	local_irq_restore(flags);
10719 }
10720 
10721 static void ftrace_dump_by_param(void)
10722 {
10723 	bool first_param = true;
10724 	char dump_param[MAX_TRACER_SIZE];
10725 	char *buf, *token, *inst_name;
10726 	struct trace_array *tr;
10727 
10728 	strscpy(dump_param, ftrace_dump_on_oops, MAX_TRACER_SIZE);
10729 	buf = dump_param;
10730 
10731 	while ((token = strsep(&buf, ",")) != NULL) {
10732 		if (first_param) {
10733 			first_param = false;
10734 			if (!strcmp("0", token))
10735 				continue;
10736 			else if (!strcmp("1", token)) {
10737 				ftrace_dump_one(&global_trace, DUMP_ALL);
10738 				continue;
10739 			}
10740 			else if (!strcmp("2", token) ||
10741 			  !strcmp("orig_cpu", token)) {
10742 				ftrace_dump_one(&global_trace, DUMP_ORIG);
10743 				continue;
10744 			}
10745 		}
10746 
10747 		inst_name = strsep(&token, "=");
10748 		tr = trace_array_find(inst_name);
10749 		if (!tr) {
10750 			printk(KERN_TRACE "Instance %s not found\n", inst_name);
10751 			continue;
10752 		}
10753 
10754 		if (token && (!strcmp("2", token) ||
10755 			  !strcmp("orig_cpu", token)))
10756 			ftrace_dump_one(tr, DUMP_ORIG);
10757 		else
10758 			ftrace_dump_one(tr, DUMP_ALL);
10759 	}
10760 }
10761 
10762 void ftrace_dump(enum ftrace_dump_mode oops_dump_mode)
10763 {
10764 	static atomic_t dump_running;
10765 
10766 	/* Only allow one dump user at a time. */
10767 	if (atomic_inc_return(&dump_running) != 1) {
10768 		atomic_dec(&dump_running);
10769 		return;
10770 	}
10771 
10772 	switch (oops_dump_mode) {
10773 	case DUMP_ALL:
10774 		ftrace_dump_one(&global_trace, DUMP_ALL);
10775 		break;
10776 	case DUMP_ORIG:
10777 		ftrace_dump_one(&global_trace, DUMP_ORIG);
10778 		break;
10779 	case DUMP_PARAM:
10780 		ftrace_dump_by_param();
10781 		break;
10782 	case DUMP_NONE:
10783 		break;
10784 	default:
10785 		printk(KERN_TRACE "Bad dumping mode, switching to all CPUs dump\n");
10786 		ftrace_dump_one(&global_trace, DUMP_ALL);
10787 	}
10788 
10789 	atomic_dec(&dump_running);
10790 }
10791 EXPORT_SYMBOL_GPL(ftrace_dump);
10792 
10793 #define WRITE_BUFSIZE  4096
10794 
10795 ssize_t trace_parse_run_command(struct file *file, const char __user *buffer,
10796 				size_t count, loff_t *ppos,
10797 				int (*createfn)(const char *))
10798 {
10799 	char *kbuf, *buf, *tmp;
10800 	int ret = 0;
10801 	size_t done = 0;
10802 	size_t size;
10803 
10804 	kbuf = kmalloc(WRITE_BUFSIZE, GFP_KERNEL);
10805 	if (!kbuf)
10806 		return -ENOMEM;
10807 
10808 	while (done < count) {
10809 		size = count - done;
10810 
10811 		if (size >= WRITE_BUFSIZE)
10812 			size = WRITE_BUFSIZE - 1;
10813 
10814 		if (copy_from_user(kbuf, buffer + done, size)) {
10815 			ret = -EFAULT;
10816 			goto out;
10817 		}
10818 		kbuf[size] = '\0';
10819 		buf = kbuf;
10820 		do {
10821 			tmp = strchr(buf, '\n');
10822 			if (tmp) {
10823 				*tmp = '\0';
10824 				size = tmp - buf + 1;
10825 			} else {
10826 				size = strlen(buf);
10827 				if (done + size < count) {
10828 					if (buf != kbuf)
10829 						break;
10830 					/* This can accept WRITE_BUFSIZE - 2 ('\n' + '\0') */
10831 					pr_warn("Line length is too long: Should be less than %d\n",
10832 						WRITE_BUFSIZE - 2);
10833 					ret = -EINVAL;
10834 					goto out;
10835 				}
10836 			}
10837 			done += size;
10838 
10839 			/* Remove comments */
10840 			tmp = strchr(buf, '#');
10841 
10842 			if (tmp)
10843 				*tmp = '\0';
10844 
10845 			ret = createfn(buf);
10846 			if (ret)
10847 				goto out;
10848 			buf += size;
10849 
10850 		} while (done < count);
10851 	}
10852 	ret = done;
10853 
10854 out:
10855 	kfree(kbuf);
10856 
10857 	return ret;
10858 }
10859 
10860 #ifdef CONFIG_TRACER_MAX_TRACE
10861 __init static bool tr_needs_alloc_snapshot(const char *name)
10862 {
10863 	char *test;
10864 	int len = strlen(name);
10865 	bool ret;
10866 
10867 	if (!boot_snapshot_index)
10868 		return false;
10869 
10870 	if (strncmp(name, boot_snapshot_info, len) == 0 &&
10871 	    boot_snapshot_info[len] == '\t')
10872 		return true;
10873 
10874 	test = kmalloc(strlen(name) + 3, GFP_KERNEL);
10875 	if (!test)
10876 		return false;
10877 
10878 	sprintf(test, "\t%s\t", name);
10879 	ret = strstr(boot_snapshot_info, test) == NULL;
10880 	kfree(test);
10881 	return ret;
10882 }
10883 
10884 __init static void do_allocate_snapshot(const char *name)
10885 {
10886 	if (!tr_needs_alloc_snapshot(name))
10887 		return;
10888 
10889 	/*
10890 	 * When allocate_snapshot is set, the next call to
10891 	 * allocate_trace_buffers() (called by trace_array_get_by_name())
10892 	 * will allocate the snapshot buffer. That will alse clear
10893 	 * this flag.
10894 	 */
10895 	allocate_snapshot = true;
10896 }
10897 #else
10898 static inline void do_allocate_snapshot(const char *name) { }
10899 #endif
10900 
10901 __init static void enable_instances(void)
10902 {
10903 	struct trace_array *tr;
10904 	bool memmap_area = false;
10905 	char *curr_str;
10906 	char *name;
10907 	char *str;
10908 	char *tok;
10909 
10910 	/* A tab is always appended */
10911 	boot_instance_info[boot_instance_index - 1] = '\0';
10912 	str = boot_instance_info;
10913 
10914 	while ((curr_str = strsep(&str, "\t"))) {
10915 		phys_addr_t start = 0;
10916 		phys_addr_t size = 0;
10917 		unsigned long addr = 0;
10918 		bool traceprintk = false;
10919 		bool traceoff = false;
10920 		char *flag_delim;
10921 		char *addr_delim;
10922 		char *rname __free(kfree) = NULL;
10923 
10924 		tok = strsep(&curr_str, ",");
10925 
10926 		flag_delim = strchr(tok, '^');
10927 		addr_delim = strchr(tok, '@');
10928 
10929 		if (addr_delim)
10930 			*addr_delim++ = '\0';
10931 
10932 		if (flag_delim)
10933 			*flag_delim++ = '\0';
10934 
10935 		name = tok;
10936 
10937 		if (flag_delim) {
10938 			char *flag;
10939 
10940 			while ((flag = strsep(&flag_delim, "^"))) {
10941 				if (strcmp(flag, "traceoff") == 0) {
10942 					traceoff = true;
10943 				} else if ((strcmp(flag, "printk") == 0) ||
10944 					   (strcmp(flag, "traceprintk") == 0) ||
10945 					   (strcmp(flag, "trace_printk") == 0)) {
10946 					traceprintk = true;
10947 				} else {
10948 					pr_info("Tracing: Invalid instance flag '%s' for %s\n",
10949 						flag, name);
10950 				}
10951 			}
10952 		}
10953 
10954 		tok = addr_delim;
10955 		if (tok && isdigit(*tok)) {
10956 			start = memparse(tok, &tok);
10957 			if (!start) {
10958 				pr_warn("Tracing: Invalid boot instance address for %s\n",
10959 					name);
10960 				continue;
10961 			}
10962 			if (*tok != ':') {
10963 				pr_warn("Tracing: No size specified for instance %s\n", name);
10964 				continue;
10965 			}
10966 			tok++;
10967 			size = memparse(tok, &tok);
10968 			if (!size) {
10969 				pr_warn("Tracing: Invalid boot instance size for %s\n",
10970 					name);
10971 				continue;
10972 			}
10973 			memmap_area = true;
10974 		} else if (tok) {
10975 			if (!reserve_mem_find_by_name(tok, &start, &size)) {
10976 				start = 0;
10977 				pr_warn("Failed to map boot instance %s to %s\n", name, tok);
10978 				continue;
10979 			}
10980 			rname = kstrdup(tok, GFP_KERNEL);
10981 		}
10982 
10983 		if (start) {
10984 			/* Start and size must be page aligned */
10985 			if (start & ~PAGE_MASK) {
10986 				pr_warn("Tracing: mapping start addr %pa is not page aligned\n", &start);
10987 				continue;
10988 			}
10989 			if (size & ~PAGE_MASK) {
10990 				pr_warn("Tracing: mapping size %pa is not page aligned\n", &size);
10991 				continue;
10992 			}
10993 
10994 			if (memmap_area)
10995 				addr = map_pages(start, size);
10996 			else
10997 				addr = (unsigned long)phys_to_virt(start);
10998 			if (addr) {
10999 				pr_info("Tracing: mapped boot instance %s at physical memory %pa of size 0x%lx\n",
11000 					name, &start, (unsigned long)size);
11001 			} else {
11002 				pr_warn("Tracing: Failed to map boot instance %s\n", name);
11003 				continue;
11004 			}
11005 		} else {
11006 			/* Only non mapped buffers have snapshot buffers */
11007 			if (IS_ENABLED(CONFIG_TRACER_MAX_TRACE))
11008 				do_allocate_snapshot(name);
11009 		}
11010 
11011 		tr = trace_array_create_systems(name, NULL, addr, size);
11012 		if (IS_ERR(tr)) {
11013 			pr_warn("Tracing: Failed to create instance buffer %s\n", curr_str);
11014 			continue;
11015 		}
11016 
11017 		if (traceoff)
11018 			tracer_tracing_off(tr);
11019 
11020 		if (traceprintk)
11021 			update_printk_trace(tr);
11022 
11023 		/*
11024 		 * memmap'd buffers can not be freed.
11025 		 */
11026 		if (memmap_area) {
11027 			tr->flags |= TRACE_ARRAY_FL_MEMMAP;
11028 			tr->ref++;
11029 		}
11030 
11031 		if (start) {
11032 			tr->flags |= TRACE_ARRAY_FL_BOOT | TRACE_ARRAY_FL_LAST_BOOT;
11033 			tr->range_name = no_free_ptr(rname);
11034 		}
11035 
11036 		while ((tok = strsep(&curr_str, ","))) {
11037 			early_enable_events(tr, tok, true);
11038 		}
11039 	}
11040 }
11041 
11042 __init static int tracer_alloc_buffers(void)
11043 {
11044 	int ring_buf_size;
11045 	int ret = -ENOMEM;
11046 
11047 
11048 	if (security_locked_down(LOCKDOWN_TRACEFS)) {
11049 		pr_warn("Tracing disabled due to lockdown\n");
11050 		return -EPERM;
11051 	}
11052 
11053 	/*
11054 	 * Make sure we don't accidentally add more trace options
11055 	 * than we have bits for.
11056 	 */
11057 	BUILD_BUG_ON(TRACE_ITER_LAST_BIT > TRACE_FLAGS_MAX_SIZE);
11058 
11059 	if (!alloc_cpumask_var(&tracing_buffer_mask, GFP_KERNEL))
11060 		goto out;
11061 
11062 	if (!alloc_cpumask_var(&global_trace.tracing_cpumask, GFP_KERNEL))
11063 		goto out_free_buffer_mask;
11064 
11065 	/* Only allocate trace_printk buffers if a trace_printk exists */
11066 	if (&__stop___trace_bprintk_fmt != &__start___trace_bprintk_fmt)
11067 		/* Must be called before global_trace.buffer is allocated */
11068 		trace_printk_init_buffers();
11069 
11070 	/* To save memory, keep the ring buffer size to its minimum */
11071 	if (global_trace.ring_buffer_expanded)
11072 		ring_buf_size = trace_buf_size;
11073 	else
11074 		ring_buf_size = 1;
11075 
11076 	cpumask_copy(tracing_buffer_mask, cpu_possible_mask);
11077 	cpumask_copy(global_trace.tracing_cpumask, cpu_all_mask);
11078 
11079 	raw_spin_lock_init(&global_trace.start_lock);
11080 
11081 	/*
11082 	 * The prepare callbacks allocates some memory for the ring buffer. We
11083 	 * don't free the buffer if the CPU goes down. If we were to free
11084 	 * the buffer, then the user would lose any trace that was in the
11085 	 * buffer. The memory will be removed once the "instance" is removed.
11086 	 */
11087 	ret = cpuhp_setup_state_multi(CPUHP_TRACE_RB_PREPARE,
11088 				      "trace/RB:prepare", trace_rb_cpu_prepare,
11089 				      NULL);
11090 	if (ret < 0)
11091 		goto out_free_cpumask;
11092 	/* Used for event triggers */
11093 	ret = -ENOMEM;
11094 	temp_buffer = ring_buffer_alloc(PAGE_SIZE, RB_FL_OVERWRITE);
11095 	if (!temp_buffer)
11096 		goto out_rm_hp_state;
11097 
11098 	if (trace_create_savedcmd() < 0)
11099 		goto out_free_temp_buffer;
11100 
11101 	if (!zalloc_cpumask_var(&global_trace.pipe_cpumask, GFP_KERNEL))
11102 		goto out_free_savedcmd;
11103 
11104 	/* TODO: make the number of buffers hot pluggable with CPUS */
11105 	if (allocate_trace_buffers(&global_trace, ring_buf_size) < 0) {
11106 		MEM_FAIL(1, "tracer: failed to allocate ring buffer!\n");
11107 		goto out_free_pipe_cpumask;
11108 	}
11109 	if (global_trace.buffer_disabled)
11110 		tracing_off();
11111 
11112 	if (trace_boot_clock) {
11113 		ret = tracing_set_clock(&global_trace, trace_boot_clock);
11114 		if (ret < 0)
11115 			pr_warn("Trace clock %s not defined, going back to default\n",
11116 				trace_boot_clock);
11117 	}
11118 
11119 	/*
11120 	 * register_tracer() might reference current_trace, so it
11121 	 * needs to be set before we register anything. This is
11122 	 * just a bootstrap of current_trace anyway.
11123 	 */
11124 	global_trace.current_trace = &nop_trace;
11125 
11126 	global_trace.max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
11127 #ifdef CONFIG_TRACER_MAX_TRACE
11128 	spin_lock_init(&global_trace.snapshot_trigger_lock);
11129 #endif
11130 	ftrace_init_global_array_ops(&global_trace);
11131 
11132 #ifdef CONFIG_MODULES
11133 	INIT_LIST_HEAD(&global_trace.mod_events);
11134 #endif
11135 
11136 	init_trace_flags_index(&global_trace);
11137 
11138 	register_tracer(&nop_trace);
11139 
11140 	/* Function tracing may start here (via kernel command line) */
11141 	init_function_trace();
11142 
11143 	/* All seems OK, enable tracing */
11144 	tracing_disabled = 0;
11145 
11146 	atomic_notifier_chain_register(&panic_notifier_list,
11147 				       &trace_panic_notifier);
11148 
11149 	register_die_notifier(&trace_die_notifier);
11150 
11151 	global_trace.flags = TRACE_ARRAY_FL_GLOBAL;
11152 
11153 	INIT_LIST_HEAD(&global_trace.systems);
11154 	INIT_LIST_HEAD(&global_trace.events);
11155 	INIT_LIST_HEAD(&global_trace.hist_vars);
11156 	INIT_LIST_HEAD(&global_trace.err_log);
11157 	list_add(&global_trace.marker_list, &marker_copies);
11158 	list_add(&global_trace.list, &ftrace_trace_arrays);
11159 
11160 	apply_trace_boot_options();
11161 
11162 	register_snapshot_cmd();
11163 
11164 	return 0;
11165 
11166 out_free_pipe_cpumask:
11167 	free_cpumask_var(global_trace.pipe_cpumask);
11168 out_free_savedcmd:
11169 	trace_free_saved_cmdlines_buffer();
11170 out_free_temp_buffer:
11171 	ring_buffer_free(temp_buffer);
11172 out_rm_hp_state:
11173 	cpuhp_remove_multi_state(CPUHP_TRACE_RB_PREPARE);
11174 out_free_cpumask:
11175 	free_cpumask_var(global_trace.tracing_cpumask);
11176 out_free_buffer_mask:
11177 	free_cpumask_var(tracing_buffer_mask);
11178 out:
11179 	return ret;
11180 }
11181 
11182 #ifdef CONFIG_FUNCTION_TRACER
11183 /* Used to set module cached ftrace filtering at boot up */
11184 __init struct trace_array *trace_get_global_array(void)
11185 {
11186 	return &global_trace;
11187 }
11188 #endif
11189 
11190 void __init ftrace_boot_snapshot(void)
11191 {
11192 #ifdef CONFIG_TRACER_MAX_TRACE
11193 	struct trace_array *tr;
11194 
11195 	if (!snapshot_at_boot)
11196 		return;
11197 
11198 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
11199 		if (!tr->allocated_snapshot)
11200 			continue;
11201 
11202 		tracing_snapshot_instance(tr);
11203 		trace_array_puts(tr, "** Boot snapshot taken **\n");
11204 	}
11205 #endif
11206 }
11207 
11208 void __init early_trace_init(void)
11209 {
11210 	if (tracepoint_printk) {
11211 		tracepoint_print_iter =
11212 			kzalloc(sizeof(*tracepoint_print_iter), GFP_KERNEL);
11213 		if (MEM_FAIL(!tracepoint_print_iter,
11214 			     "Failed to allocate trace iterator\n"))
11215 			tracepoint_printk = 0;
11216 		else
11217 			static_key_enable(&tracepoint_printk_key.key);
11218 	}
11219 	tracer_alloc_buffers();
11220 
11221 	init_events();
11222 }
11223 
11224 void __init trace_init(void)
11225 {
11226 	trace_event_init();
11227 
11228 	if (boot_instance_index)
11229 		enable_instances();
11230 }
11231 
11232 __init static void clear_boot_tracer(void)
11233 {
11234 	/*
11235 	 * The default tracer at boot buffer is an init section.
11236 	 * This function is called in lateinit. If we did not
11237 	 * find the boot tracer, then clear it out, to prevent
11238 	 * later registration from accessing the buffer that is
11239 	 * about to be freed.
11240 	 */
11241 	if (!default_bootup_tracer)
11242 		return;
11243 
11244 	printk(KERN_INFO "ftrace bootup tracer '%s' not registered.\n",
11245 	       default_bootup_tracer);
11246 	default_bootup_tracer = NULL;
11247 }
11248 
11249 #ifdef CONFIG_HAVE_UNSTABLE_SCHED_CLOCK
11250 __init static void tracing_set_default_clock(void)
11251 {
11252 	/* sched_clock_stable() is determined in late_initcall */
11253 	if (!trace_boot_clock && !sched_clock_stable()) {
11254 		if (security_locked_down(LOCKDOWN_TRACEFS)) {
11255 			pr_warn("Can not set tracing clock due to lockdown\n");
11256 			return;
11257 		}
11258 
11259 		printk(KERN_WARNING
11260 		       "Unstable clock detected, switching default tracing clock to \"global\"\n"
11261 		       "If you want to keep using the local clock, then add:\n"
11262 		       "  \"trace_clock=local\"\n"
11263 		       "on the kernel command line\n");
11264 		tracing_set_clock(&global_trace, "global");
11265 	}
11266 }
11267 #else
11268 static inline void tracing_set_default_clock(void) { }
11269 #endif
11270 
11271 __init static int late_trace_init(void)
11272 {
11273 	if (tracepoint_printk && tracepoint_printk_stop_on_boot) {
11274 		static_key_disable(&tracepoint_printk_key.key);
11275 		tracepoint_printk = 0;
11276 	}
11277 
11278 	if (traceoff_after_boot)
11279 		tracing_off();
11280 
11281 	tracing_set_default_clock();
11282 	clear_boot_tracer();
11283 	return 0;
11284 }
11285 
11286 late_initcall_sync(late_trace_init);
11287