xref: /linux/kernel/trace/trace.c (revision a9fc2304972b1db28b88af8203dffef23e1e92ba)
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * ring buffer based function tracer
4  *
5  * Copyright (C) 2007-2012 Steven Rostedt <srostedt@redhat.com>
6  * Copyright (C) 2008 Ingo Molnar <mingo@redhat.com>
7  *
8  * Originally taken from the RT patch by:
9  *    Arnaldo Carvalho de Melo <acme@redhat.com>
10  *
11  * Based on code from the latency_tracer, that is:
12  *  Copyright (C) 2004-2006 Ingo Molnar
13  *  Copyright (C) 2004 Nadia Yvette Chambers
14  */
15 #include <linux/ring_buffer.h>
16 #include <linux/utsname.h>
17 #include <linux/stacktrace.h>
18 #include <linux/writeback.h>
19 #include <linux/kallsyms.h>
20 #include <linux/security.h>
21 #include <linux/seq_file.h>
22 #include <linux/irqflags.h>
23 #include <linux/debugfs.h>
24 #include <linux/tracefs.h>
25 #include <linux/pagemap.h>
26 #include <linux/hardirq.h>
27 #include <linux/linkage.h>
28 #include <linux/uaccess.h>
29 #include <linux/cleanup.h>
30 #include <linux/vmalloc.h>
31 #include <linux/ftrace.h>
32 #include <linux/module.h>
33 #include <linux/percpu.h>
34 #include <linux/splice.h>
35 #include <linux/kdebug.h>
36 #include <linux/string.h>
37 #include <linux/mount.h>
38 #include <linux/rwsem.h>
39 #include <linux/slab.h>
40 #include <linux/ctype.h>
41 #include <linux/init.h>
42 #include <linux/panic_notifier.h>
43 #include <linux/poll.h>
44 #include <linux/nmi.h>
45 #include <linux/fs.h>
46 #include <linux/trace.h>
47 #include <linux/sched/clock.h>
48 #include <linux/sched/rt.h>
49 #include <linux/fsnotify.h>
50 #include <linux/irq_work.h>
51 #include <linux/workqueue.h>
52 
53 #include <asm/setup.h> /* COMMAND_LINE_SIZE */
54 
55 #include "trace.h"
56 #include "trace_output.h"
57 
58 #ifdef CONFIG_FTRACE_STARTUP_TEST
59 /*
60  * We need to change this state when a selftest is running.
61  * A selftest will lurk into the ring-buffer to count the
62  * entries inserted during the selftest although some concurrent
63  * insertions into the ring-buffer such as trace_printk could occurred
64  * at the same time, giving false positive or negative results.
65  */
66 static bool __read_mostly tracing_selftest_running;
67 
68 /*
69  * If boot-time tracing including tracers/events via kernel cmdline
70  * is running, we do not want to run SELFTEST.
71  */
72 bool __read_mostly tracing_selftest_disabled;
73 
74 void __init disable_tracing_selftest(const char *reason)
75 {
76 	if (!tracing_selftest_disabled) {
77 		tracing_selftest_disabled = true;
78 		pr_info("Ftrace startup test is disabled due to %s\n", reason);
79 	}
80 }
81 #else
82 #define tracing_selftest_running	0
83 #define tracing_selftest_disabled	0
84 #endif
85 
86 /* Pipe tracepoints to printk */
87 static struct trace_iterator *tracepoint_print_iter;
88 int tracepoint_printk;
89 static bool tracepoint_printk_stop_on_boot __initdata;
90 static DEFINE_STATIC_KEY_FALSE(tracepoint_printk_key);
91 
92 /* For tracers that don't implement custom flags */
93 static struct tracer_opt dummy_tracer_opt[] = {
94 	{ }
95 };
96 
97 static int
98 dummy_set_flag(struct trace_array *tr, u32 old_flags, u32 bit, int set)
99 {
100 	return 0;
101 }
102 
103 /*
104  * To prevent the comm cache from being overwritten when no
105  * tracing is active, only save the comm when a trace event
106  * occurred.
107  */
108 DEFINE_PER_CPU(bool, trace_taskinfo_save);
109 
110 /*
111  * Kill all tracing for good (never come back).
112  * It is initialized to 1 but will turn to zero if the initialization
113  * of the tracer is successful. But that is the only place that sets
114  * this back to zero.
115  */
116 static int tracing_disabled = 1;
117 
118 cpumask_var_t __read_mostly	tracing_buffer_mask;
119 
120 /*
121  * ftrace_dump_on_oops - variable to dump ftrace buffer on oops
122  *
123  * If there is an oops (or kernel panic) and the ftrace_dump_on_oops
124  * is set, then ftrace_dump is called. This will output the contents
125  * of the ftrace buffers to the console.  This is very useful for
126  * capturing traces that lead to crashes and outputing it to a
127  * serial console.
128  *
129  * It is default off, but you can enable it with either specifying
130  * "ftrace_dump_on_oops" in the kernel command line, or setting
131  * /proc/sys/kernel/ftrace_dump_on_oops
132  * Set 1 if you want to dump buffers of all CPUs
133  * Set 2 if you want to dump the buffer of the CPU that triggered oops
134  * Set instance name if you want to dump the specific trace instance
135  * Multiple instance dump is also supported, and instances are seperated
136  * by commas.
137  */
138 /* Set to string format zero to disable by default */
139 char ftrace_dump_on_oops[MAX_TRACER_SIZE] = "0";
140 
141 /* When set, tracing will stop when a WARN*() is hit */
142 int __disable_trace_on_warning;
143 
144 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
145 /* Map of enums to their values, for "eval_map" file */
146 struct trace_eval_map_head {
147 	struct module			*mod;
148 	unsigned long			length;
149 };
150 
151 union trace_eval_map_item;
152 
153 struct trace_eval_map_tail {
154 	/*
155 	 * "end" is first and points to NULL as it must be different
156 	 * than "mod" or "eval_string"
157 	 */
158 	union trace_eval_map_item	*next;
159 	const char			*end;	/* points to NULL */
160 };
161 
162 static DEFINE_MUTEX(trace_eval_mutex);
163 
164 /*
165  * The trace_eval_maps are saved in an array with two extra elements,
166  * one at the beginning, and one at the end. The beginning item contains
167  * the count of the saved maps (head.length), and the module they
168  * belong to if not built in (head.mod). The ending item contains a
169  * pointer to the next array of saved eval_map items.
170  */
171 union trace_eval_map_item {
172 	struct trace_eval_map		map;
173 	struct trace_eval_map_head	head;
174 	struct trace_eval_map_tail	tail;
175 };
176 
177 static union trace_eval_map_item *trace_eval_maps;
178 #endif /* CONFIG_TRACE_EVAL_MAP_FILE */
179 
180 int tracing_set_tracer(struct trace_array *tr, const char *buf);
181 static void ftrace_trace_userstack(struct trace_array *tr,
182 				   struct trace_buffer *buffer,
183 				   unsigned int trace_ctx);
184 
185 static char bootup_tracer_buf[MAX_TRACER_SIZE] __initdata;
186 static char *default_bootup_tracer;
187 
188 static bool allocate_snapshot;
189 static bool snapshot_at_boot;
190 
191 static char boot_instance_info[COMMAND_LINE_SIZE] __initdata;
192 static int boot_instance_index;
193 
194 static char boot_snapshot_info[COMMAND_LINE_SIZE] __initdata;
195 static int boot_snapshot_index;
196 
197 static int __init set_cmdline_ftrace(char *str)
198 {
199 	strscpy(bootup_tracer_buf, str, MAX_TRACER_SIZE);
200 	default_bootup_tracer = bootup_tracer_buf;
201 	/* We are using ftrace early, expand it */
202 	trace_set_ring_buffer_expanded(NULL);
203 	return 1;
204 }
205 __setup("ftrace=", set_cmdline_ftrace);
206 
207 int ftrace_dump_on_oops_enabled(void)
208 {
209 	if (!strcmp("0", ftrace_dump_on_oops))
210 		return 0;
211 	else
212 		return 1;
213 }
214 
215 static int __init set_ftrace_dump_on_oops(char *str)
216 {
217 	if (!*str) {
218 		strscpy(ftrace_dump_on_oops, "1", MAX_TRACER_SIZE);
219 		return 1;
220 	}
221 
222 	if (*str == ',') {
223 		strscpy(ftrace_dump_on_oops, "1", MAX_TRACER_SIZE);
224 		strscpy(ftrace_dump_on_oops + 1, str, MAX_TRACER_SIZE - 1);
225 		return 1;
226 	}
227 
228 	if (*str++ == '=') {
229 		strscpy(ftrace_dump_on_oops, str, MAX_TRACER_SIZE);
230 		return 1;
231 	}
232 
233 	return 0;
234 }
235 __setup("ftrace_dump_on_oops", set_ftrace_dump_on_oops);
236 
237 static int __init stop_trace_on_warning(char *str)
238 {
239 	if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
240 		__disable_trace_on_warning = 1;
241 	return 1;
242 }
243 __setup("traceoff_on_warning", stop_trace_on_warning);
244 
245 static int __init boot_alloc_snapshot(char *str)
246 {
247 	char *slot = boot_snapshot_info + boot_snapshot_index;
248 	int left = sizeof(boot_snapshot_info) - boot_snapshot_index;
249 	int ret;
250 
251 	if (str[0] == '=') {
252 		str++;
253 		if (strlen(str) >= left)
254 			return -1;
255 
256 		ret = snprintf(slot, left, "%s\t", str);
257 		boot_snapshot_index += ret;
258 	} else {
259 		allocate_snapshot = true;
260 		/* We also need the main ring buffer expanded */
261 		trace_set_ring_buffer_expanded(NULL);
262 	}
263 	return 1;
264 }
265 __setup("alloc_snapshot", boot_alloc_snapshot);
266 
267 
268 static int __init boot_snapshot(char *str)
269 {
270 	snapshot_at_boot = true;
271 	boot_alloc_snapshot(str);
272 	return 1;
273 }
274 __setup("ftrace_boot_snapshot", boot_snapshot);
275 
276 
277 static int __init boot_instance(char *str)
278 {
279 	char *slot = boot_instance_info + boot_instance_index;
280 	int left = sizeof(boot_instance_info) - boot_instance_index;
281 	int ret;
282 
283 	if (strlen(str) >= left)
284 		return -1;
285 
286 	ret = snprintf(slot, left, "%s\t", str);
287 	boot_instance_index += ret;
288 
289 	return 1;
290 }
291 __setup("trace_instance=", boot_instance);
292 
293 
294 static char trace_boot_options_buf[MAX_TRACER_SIZE] __initdata;
295 
296 static int __init set_trace_boot_options(char *str)
297 {
298 	strscpy(trace_boot_options_buf, str, MAX_TRACER_SIZE);
299 	return 1;
300 }
301 __setup("trace_options=", set_trace_boot_options);
302 
303 static char trace_boot_clock_buf[MAX_TRACER_SIZE] __initdata;
304 static char *trace_boot_clock __initdata;
305 
306 static int __init set_trace_boot_clock(char *str)
307 {
308 	strscpy(trace_boot_clock_buf, str, MAX_TRACER_SIZE);
309 	trace_boot_clock = trace_boot_clock_buf;
310 	return 1;
311 }
312 __setup("trace_clock=", set_trace_boot_clock);
313 
314 static int __init set_tracepoint_printk(char *str)
315 {
316 	/* Ignore the "tp_printk_stop_on_boot" param */
317 	if (*str == '_')
318 		return 0;
319 
320 	if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
321 		tracepoint_printk = 1;
322 	return 1;
323 }
324 __setup("tp_printk", set_tracepoint_printk);
325 
326 static int __init set_tracepoint_printk_stop(char *str)
327 {
328 	tracepoint_printk_stop_on_boot = true;
329 	return 1;
330 }
331 __setup("tp_printk_stop_on_boot", set_tracepoint_printk_stop);
332 
333 unsigned long long ns2usecs(u64 nsec)
334 {
335 	nsec += 500;
336 	do_div(nsec, 1000);
337 	return nsec;
338 }
339 
340 static void
341 trace_process_export(struct trace_export *export,
342 	       struct ring_buffer_event *event, int flag)
343 {
344 	struct trace_entry *entry;
345 	unsigned int size = 0;
346 
347 	if (export->flags & flag) {
348 		entry = ring_buffer_event_data(event);
349 		size = ring_buffer_event_length(event);
350 		export->write(export, entry, size);
351 	}
352 }
353 
354 static DEFINE_MUTEX(ftrace_export_lock);
355 
356 static struct trace_export __rcu *ftrace_exports_list __read_mostly;
357 
358 static DEFINE_STATIC_KEY_FALSE(trace_function_exports_enabled);
359 static DEFINE_STATIC_KEY_FALSE(trace_event_exports_enabled);
360 static DEFINE_STATIC_KEY_FALSE(trace_marker_exports_enabled);
361 
362 static inline void ftrace_exports_enable(struct trace_export *export)
363 {
364 	if (export->flags & TRACE_EXPORT_FUNCTION)
365 		static_branch_inc(&trace_function_exports_enabled);
366 
367 	if (export->flags & TRACE_EXPORT_EVENT)
368 		static_branch_inc(&trace_event_exports_enabled);
369 
370 	if (export->flags & TRACE_EXPORT_MARKER)
371 		static_branch_inc(&trace_marker_exports_enabled);
372 }
373 
374 static inline void ftrace_exports_disable(struct trace_export *export)
375 {
376 	if (export->flags & TRACE_EXPORT_FUNCTION)
377 		static_branch_dec(&trace_function_exports_enabled);
378 
379 	if (export->flags & TRACE_EXPORT_EVENT)
380 		static_branch_dec(&trace_event_exports_enabled);
381 
382 	if (export->flags & TRACE_EXPORT_MARKER)
383 		static_branch_dec(&trace_marker_exports_enabled);
384 }
385 
386 static void ftrace_exports(struct ring_buffer_event *event, int flag)
387 {
388 	struct trace_export *export;
389 
390 	preempt_disable_notrace();
391 
392 	export = rcu_dereference_raw_check(ftrace_exports_list);
393 	while (export) {
394 		trace_process_export(export, event, flag);
395 		export = rcu_dereference_raw_check(export->next);
396 	}
397 
398 	preempt_enable_notrace();
399 }
400 
401 static inline void
402 add_trace_export(struct trace_export **list, struct trace_export *export)
403 {
404 	rcu_assign_pointer(export->next, *list);
405 	/*
406 	 * We are entering export into the list but another
407 	 * CPU might be walking that list. We need to make sure
408 	 * the export->next pointer is valid before another CPU sees
409 	 * the export pointer included into the list.
410 	 */
411 	rcu_assign_pointer(*list, export);
412 }
413 
414 static inline int
415 rm_trace_export(struct trace_export **list, struct trace_export *export)
416 {
417 	struct trace_export **p;
418 
419 	for (p = list; *p != NULL; p = &(*p)->next)
420 		if (*p == export)
421 			break;
422 
423 	if (*p != export)
424 		return -1;
425 
426 	rcu_assign_pointer(*p, (*p)->next);
427 
428 	return 0;
429 }
430 
431 static inline void
432 add_ftrace_export(struct trace_export **list, struct trace_export *export)
433 {
434 	ftrace_exports_enable(export);
435 
436 	add_trace_export(list, export);
437 }
438 
439 static inline int
440 rm_ftrace_export(struct trace_export **list, struct trace_export *export)
441 {
442 	int ret;
443 
444 	ret = rm_trace_export(list, export);
445 	ftrace_exports_disable(export);
446 
447 	return ret;
448 }
449 
450 int register_ftrace_export(struct trace_export *export)
451 {
452 	if (WARN_ON_ONCE(!export->write))
453 		return -1;
454 
455 	mutex_lock(&ftrace_export_lock);
456 
457 	add_ftrace_export(&ftrace_exports_list, export);
458 
459 	mutex_unlock(&ftrace_export_lock);
460 
461 	return 0;
462 }
463 EXPORT_SYMBOL_GPL(register_ftrace_export);
464 
465 int unregister_ftrace_export(struct trace_export *export)
466 {
467 	int ret;
468 
469 	mutex_lock(&ftrace_export_lock);
470 
471 	ret = rm_ftrace_export(&ftrace_exports_list, export);
472 
473 	mutex_unlock(&ftrace_export_lock);
474 
475 	return ret;
476 }
477 EXPORT_SYMBOL_GPL(unregister_ftrace_export);
478 
479 /* trace_flags holds trace_options default values */
480 #define TRACE_DEFAULT_FLAGS						\
481 	(FUNCTION_DEFAULT_FLAGS |					\
482 	 TRACE_ITER_PRINT_PARENT | TRACE_ITER_PRINTK |			\
483 	 TRACE_ITER_ANNOTATE | TRACE_ITER_CONTEXT_INFO |		\
484 	 TRACE_ITER_RECORD_CMD | TRACE_ITER_OVERWRITE |			\
485 	 TRACE_ITER_IRQ_INFO | TRACE_ITER_MARKERS |			\
486 	 TRACE_ITER_HASH_PTR | TRACE_ITER_TRACE_PRINTK)
487 
488 /* trace_options that are only supported by global_trace */
489 #define TOP_LEVEL_TRACE_FLAGS (TRACE_ITER_PRINTK |			\
490 	       TRACE_ITER_PRINTK_MSGONLY | TRACE_ITER_RECORD_CMD)
491 
492 /* trace_flags that are default zero for instances */
493 #define ZEROED_TRACE_FLAGS \
494 	(TRACE_ITER_EVENT_FORK | TRACE_ITER_FUNC_FORK | TRACE_ITER_TRACE_PRINTK)
495 
496 /*
497  * The global_trace is the descriptor that holds the top-level tracing
498  * buffers for the live tracing.
499  */
500 static struct trace_array global_trace = {
501 	.trace_flags = TRACE_DEFAULT_FLAGS,
502 };
503 
504 static struct trace_array *printk_trace = &global_trace;
505 
506 static __always_inline bool printk_binsafe(struct trace_array *tr)
507 {
508 	/*
509 	 * The binary format of traceprintk can cause a crash if used
510 	 * by a buffer from another boot. Force the use of the
511 	 * non binary version of trace_printk if the trace_printk
512 	 * buffer is a boot mapped ring buffer.
513 	 */
514 	return !(tr->flags & TRACE_ARRAY_FL_BOOT);
515 }
516 
517 static void update_printk_trace(struct trace_array *tr)
518 {
519 	if (printk_trace == tr)
520 		return;
521 
522 	printk_trace->trace_flags &= ~TRACE_ITER_TRACE_PRINTK;
523 	printk_trace = tr;
524 	tr->trace_flags |= TRACE_ITER_TRACE_PRINTK;
525 }
526 
527 void trace_set_ring_buffer_expanded(struct trace_array *tr)
528 {
529 	if (!tr)
530 		tr = &global_trace;
531 	tr->ring_buffer_expanded = true;
532 }
533 
534 LIST_HEAD(ftrace_trace_arrays);
535 
536 int trace_array_get(struct trace_array *this_tr)
537 {
538 	struct trace_array *tr;
539 
540 	guard(mutex)(&trace_types_lock);
541 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
542 		if (tr == this_tr) {
543 			tr->ref++;
544 			return 0;
545 		}
546 	}
547 
548 	return -ENODEV;
549 }
550 
551 static void __trace_array_put(struct trace_array *this_tr)
552 {
553 	WARN_ON(!this_tr->ref);
554 	this_tr->ref--;
555 }
556 
557 /**
558  * trace_array_put - Decrement the reference counter for this trace array.
559  * @this_tr : pointer to the trace array
560  *
561  * NOTE: Use this when we no longer need the trace array returned by
562  * trace_array_get_by_name(). This ensures the trace array can be later
563  * destroyed.
564  *
565  */
566 void trace_array_put(struct trace_array *this_tr)
567 {
568 	if (!this_tr)
569 		return;
570 
571 	mutex_lock(&trace_types_lock);
572 	__trace_array_put(this_tr);
573 	mutex_unlock(&trace_types_lock);
574 }
575 EXPORT_SYMBOL_GPL(trace_array_put);
576 
577 int tracing_check_open_get_tr(struct trace_array *tr)
578 {
579 	int ret;
580 
581 	ret = security_locked_down(LOCKDOWN_TRACEFS);
582 	if (ret)
583 		return ret;
584 
585 	if (tracing_disabled)
586 		return -ENODEV;
587 
588 	if (tr && trace_array_get(tr) < 0)
589 		return -ENODEV;
590 
591 	return 0;
592 }
593 
594 /**
595  * trace_find_filtered_pid - check if a pid exists in a filtered_pid list
596  * @filtered_pids: The list of pids to check
597  * @search_pid: The PID to find in @filtered_pids
598  *
599  * Returns true if @search_pid is found in @filtered_pids, and false otherwise.
600  */
601 bool
602 trace_find_filtered_pid(struct trace_pid_list *filtered_pids, pid_t search_pid)
603 {
604 	return trace_pid_list_is_set(filtered_pids, search_pid);
605 }
606 
607 /**
608  * trace_ignore_this_task - should a task be ignored for tracing
609  * @filtered_pids: The list of pids to check
610  * @filtered_no_pids: The list of pids not to be traced
611  * @task: The task that should be ignored if not filtered
612  *
613  * Checks if @task should be traced or not from @filtered_pids.
614  * Returns true if @task should *NOT* be traced.
615  * Returns false if @task should be traced.
616  */
617 bool
618 trace_ignore_this_task(struct trace_pid_list *filtered_pids,
619 		       struct trace_pid_list *filtered_no_pids,
620 		       struct task_struct *task)
621 {
622 	/*
623 	 * If filtered_no_pids is not empty, and the task's pid is listed
624 	 * in filtered_no_pids, then return true.
625 	 * Otherwise, if filtered_pids is empty, that means we can
626 	 * trace all tasks. If it has content, then only trace pids
627 	 * within filtered_pids.
628 	 */
629 
630 	return (filtered_pids &&
631 		!trace_find_filtered_pid(filtered_pids, task->pid)) ||
632 		(filtered_no_pids &&
633 		 trace_find_filtered_pid(filtered_no_pids, task->pid));
634 }
635 
636 /**
637  * trace_filter_add_remove_task - Add or remove a task from a pid_list
638  * @pid_list: The list to modify
639  * @self: The current task for fork or NULL for exit
640  * @task: The task to add or remove
641  *
642  * If adding a task, if @self is defined, the task is only added if @self
643  * is also included in @pid_list. This happens on fork and tasks should
644  * only be added when the parent is listed. If @self is NULL, then the
645  * @task pid will be removed from the list, which would happen on exit
646  * of a task.
647  */
648 void trace_filter_add_remove_task(struct trace_pid_list *pid_list,
649 				  struct task_struct *self,
650 				  struct task_struct *task)
651 {
652 	if (!pid_list)
653 		return;
654 
655 	/* For forks, we only add if the forking task is listed */
656 	if (self) {
657 		if (!trace_find_filtered_pid(pid_list, self->pid))
658 			return;
659 	}
660 
661 	/* "self" is set for forks, and NULL for exits */
662 	if (self)
663 		trace_pid_list_set(pid_list, task->pid);
664 	else
665 		trace_pid_list_clear(pid_list, task->pid);
666 }
667 
668 /**
669  * trace_pid_next - Used for seq_file to get to the next pid of a pid_list
670  * @pid_list: The pid list to show
671  * @v: The last pid that was shown (+1 the actual pid to let zero be displayed)
672  * @pos: The position of the file
673  *
674  * This is used by the seq_file "next" operation to iterate the pids
675  * listed in a trace_pid_list structure.
676  *
677  * Returns the pid+1 as we want to display pid of zero, but NULL would
678  * stop the iteration.
679  */
680 void *trace_pid_next(struct trace_pid_list *pid_list, void *v, loff_t *pos)
681 {
682 	long pid = (unsigned long)v;
683 	unsigned int next;
684 
685 	(*pos)++;
686 
687 	/* pid already is +1 of the actual previous bit */
688 	if (trace_pid_list_next(pid_list, pid, &next) < 0)
689 		return NULL;
690 
691 	pid = next;
692 
693 	/* Return pid + 1 to allow zero to be represented */
694 	return (void *)(pid + 1);
695 }
696 
697 /**
698  * trace_pid_start - Used for seq_file to start reading pid lists
699  * @pid_list: The pid list to show
700  * @pos: The position of the file
701  *
702  * This is used by seq_file "start" operation to start the iteration
703  * of listing pids.
704  *
705  * Returns the pid+1 as we want to display pid of zero, but NULL would
706  * stop the iteration.
707  */
708 void *trace_pid_start(struct trace_pid_list *pid_list, loff_t *pos)
709 {
710 	unsigned long pid;
711 	unsigned int first;
712 	loff_t l = 0;
713 
714 	if (trace_pid_list_first(pid_list, &first) < 0)
715 		return NULL;
716 
717 	pid = first;
718 
719 	/* Return pid + 1 so that zero can be the exit value */
720 	for (pid++; pid && l < *pos;
721 	     pid = (unsigned long)trace_pid_next(pid_list, (void *)pid, &l))
722 		;
723 	return (void *)pid;
724 }
725 
726 /**
727  * trace_pid_show - show the current pid in seq_file processing
728  * @m: The seq_file structure to write into
729  * @v: A void pointer of the pid (+1) value to display
730  *
731  * Can be directly used by seq_file operations to display the current
732  * pid value.
733  */
734 int trace_pid_show(struct seq_file *m, void *v)
735 {
736 	unsigned long pid = (unsigned long)v - 1;
737 
738 	seq_printf(m, "%lu\n", pid);
739 	return 0;
740 }
741 
742 /* 128 should be much more than enough */
743 #define PID_BUF_SIZE		127
744 
745 int trace_pid_write(struct trace_pid_list *filtered_pids,
746 		    struct trace_pid_list **new_pid_list,
747 		    const char __user *ubuf, size_t cnt)
748 {
749 	struct trace_pid_list *pid_list;
750 	struct trace_parser parser;
751 	unsigned long val;
752 	int nr_pids = 0;
753 	ssize_t read = 0;
754 	ssize_t ret;
755 	loff_t pos;
756 	pid_t pid;
757 
758 	if (trace_parser_get_init(&parser, PID_BUF_SIZE + 1))
759 		return -ENOMEM;
760 
761 	/*
762 	 * Always recreate a new array. The write is an all or nothing
763 	 * operation. Always create a new array when adding new pids by
764 	 * the user. If the operation fails, then the current list is
765 	 * not modified.
766 	 */
767 	pid_list = trace_pid_list_alloc();
768 	if (!pid_list) {
769 		trace_parser_put(&parser);
770 		return -ENOMEM;
771 	}
772 
773 	if (filtered_pids) {
774 		/* copy the current bits to the new max */
775 		ret = trace_pid_list_first(filtered_pids, &pid);
776 		while (!ret) {
777 			trace_pid_list_set(pid_list, pid);
778 			ret = trace_pid_list_next(filtered_pids, pid + 1, &pid);
779 			nr_pids++;
780 		}
781 	}
782 
783 	ret = 0;
784 	while (cnt > 0) {
785 
786 		pos = 0;
787 
788 		ret = trace_get_user(&parser, ubuf, cnt, &pos);
789 		if (ret < 0)
790 			break;
791 
792 		read += ret;
793 		ubuf += ret;
794 		cnt -= ret;
795 
796 		if (!trace_parser_loaded(&parser))
797 			break;
798 
799 		ret = -EINVAL;
800 		if (kstrtoul(parser.buffer, 0, &val))
801 			break;
802 
803 		pid = (pid_t)val;
804 
805 		if (trace_pid_list_set(pid_list, pid) < 0) {
806 			ret = -1;
807 			break;
808 		}
809 		nr_pids++;
810 
811 		trace_parser_clear(&parser);
812 		ret = 0;
813 	}
814 	trace_parser_put(&parser);
815 
816 	if (ret < 0) {
817 		trace_pid_list_free(pid_list);
818 		return ret;
819 	}
820 
821 	if (!nr_pids) {
822 		/* Cleared the list of pids */
823 		trace_pid_list_free(pid_list);
824 		pid_list = NULL;
825 	}
826 
827 	*new_pid_list = pid_list;
828 
829 	return read;
830 }
831 
832 static u64 buffer_ftrace_now(struct array_buffer *buf, int cpu)
833 {
834 	u64 ts;
835 
836 	/* Early boot up does not have a buffer yet */
837 	if (!buf->buffer)
838 		return trace_clock_local();
839 
840 	ts = ring_buffer_time_stamp(buf->buffer);
841 	ring_buffer_normalize_time_stamp(buf->buffer, cpu, &ts);
842 
843 	return ts;
844 }
845 
846 u64 ftrace_now(int cpu)
847 {
848 	return buffer_ftrace_now(&global_trace.array_buffer, cpu);
849 }
850 
851 /**
852  * tracing_is_enabled - Show if global_trace has been enabled
853  *
854  * Shows if the global trace has been enabled or not. It uses the
855  * mirror flag "buffer_disabled" to be used in fast paths such as for
856  * the irqsoff tracer. But it may be inaccurate due to races. If you
857  * need to know the accurate state, use tracing_is_on() which is a little
858  * slower, but accurate.
859  */
860 int tracing_is_enabled(void)
861 {
862 	/*
863 	 * For quick access (irqsoff uses this in fast path), just
864 	 * return the mirror variable of the state of the ring buffer.
865 	 * It's a little racy, but we don't really care.
866 	 */
867 	smp_rmb();
868 	return !global_trace.buffer_disabled;
869 }
870 
871 /*
872  * trace_buf_size is the size in bytes that is allocated
873  * for a buffer. Note, the number of bytes is always rounded
874  * to page size.
875  *
876  * This number is purposely set to a low number of 16384.
877  * If the dump on oops happens, it will be much appreciated
878  * to not have to wait for all that output. Anyway this can be
879  * boot time and run time configurable.
880  */
881 #define TRACE_BUF_SIZE_DEFAULT	1441792UL /* 16384 * 88 (sizeof(entry)) */
882 
883 static unsigned long		trace_buf_size = TRACE_BUF_SIZE_DEFAULT;
884 
885 /* trace_types holds a link list of available tracers. */
886 static struct tracer		*trace_types __read_mostly;
887 
888 /*
889  * trace_types_lock is used to protect the trace_types list.
890  */
891 DEFINE_MUTEX(trace_types_lock);
892 
893 /*
894  * serialize the access of the ring buffer
895  *
896  * ring buffer serializes readers, but it is low level protection.
897  * The validity of the events (which returns by ring_buffer_peek() ..etc)
898  * are not protected by ring buffer.
899  *
900  * The content of events may become garbage if we allow other process consumes
901  * these events concurrently:
902  *   A) the page of the consumed events may become a normal page
903  *      (not reader page) in ring buffer, and this page will be rewritten
904  *      by events producer.
905  *   B) The page of the consumed events may become a page for splice_read,
906  *      and this page will be returned to system.
907  *
908  * These primitives allow multi process access to different cpu ring buffer
909  * concurrently.
910  *
911  * These primitives don't distinguish read-only and read-consume access.
912  * Multi read-only access are also serialized.
913  */
914 
915 #ifdef CONFIG_SMP
916 static DECLARE_RWSEM(all_cpu_access_lock);
917 static DEFINE_PER_CPU(struct mutex, cpu_access_lock);
918 
919 static inline void trace_access_lock(int cpu)
920 {
921 	if (cpu == RING_BUFFER_ALL_CPUS) {
922 		/* gain it for accessing the whole ring buffer. */
923 		down_write(&all_cpu_access_lock);
924 	} else {
925 		/* gain it for accessing a cpu ring buffer. */
926 
927 		/* Firstly block other trace_access_lock(RING_BUFFER_ALL_CPUS). */
928 		down_read(&all_cpu_access_lock);
929 
930 		/* Secondly block other access to this @cpu ring buffer. */
931 		mutex_lock(&per_cpu(cpu_access_lock, cpu));
932 	}
933 }
934 
935 static inline void trace_access_unlock(int cpu)
936 {
937 	if (cpu == RING_BUFFER_ALL_CPUS) {
938 		up_write(&all_cpu_access_lock);
939 	} else {
940 		mutex_unlock(&per_cpu(cpu_access_lock, cpu));
941 		up_read(&all_cpu_access_lock);
942 	}
943 }
944 
945 static inline void trace_access_lock_init(void)
946 {
947 	int cpu;
948 
949 	for_each_possible_cpu(cpu)
950 		mutex_init(&per_cpu(cpu_access_lock, cpu));
951 }
952 
953 #else
954 
955 static DEFINE_MUTEX(access_lock);
956 
957 static inline void trace_access_lock(int cpu)
958 {
959 	(void)cpu;
960 	mutex_lock(&access_lock);
961 }
962 
963 static inline void trace_access_unlock(int cpu)
964 {
965 	(void)cpu;
966 	mutex_unlock(&access_lock);
967 }
968 
969 static inline void trace_access_lock_init(void)
970 {
971 }
972 
973 #endif
974 
975 #ifdef CONFIG_STACKTRACE
976 static void __ftrace_trace_stack(struct trace_array *tr,
977 				 struct trace_buffer *buffer,
978 				 unsigned int trace_ctx,
979 				 int skip, struct pt_regs *regs);
980 static inline void ftrace_trace_stack(struct trace_array *tr,
981 				      struct trace_buffer *buffer,
982 				      unsigned int trace_ctx,
983 				      int skip, struct pt_regs *regs);
984 
985 #else
986 static inline void __ftrace_trace_stack(struct trace_array *tr,
987 					struct trace_buffer *buffer,
988 					unsigned int trace_ctx,
989 					int skip, struct pt_regs *regs)
990 {
991 }
992 static inline void ftrace_trace_stack(struct trace_array *tr,
993 				      struct trace_buffer *buffer,
994 				      unsigned long trace_ctx,
995 				      int skip, struct pt_regs *regs)
996 {
997 }
998 
999 #endif
1000 
1001 static __always_inline void
1002 trace_event_setup(struct ring_buffer_event *event,
1003 		  int type, unsigned int trace_ctx)
1004 {
1005 	struct trace_entry *ent = ring_buffer_event_data(event);
1006 
1007 	tracing_generic_entry_update(ent, type, trace_ctx);
1008 }
1009 
1010 static __always_inline struct ring_buffer_event *
1011 __trace_buffer_lock_reserve(struct trace_buffer *buffer,
1012 			  int type,
1013 			  unsigned long len,
1014 			  unsigned int trace_ctx)
1015 {
1016 	struct ring_buffer_event *event;
1017 
1018 	event = ring_buffer_lock_reserve(buffer, len);
1019 	if (event != NULL)
1020 		trace_event_setup(event, type, trace_ctx);
1021 
1022 	return event;
1023 }
1024 
1025 void tracer_tracing_on(struct trace_array *tr)
1026 {
1027 	if (tr->array_buffer.buffer)
1028 		ring_buffer_record_on(tr->array_buffer.buffer);
1029 	/*
1030 	 * This flag is looked at when buffers haven't been allocated
1031 	 * yet, or by some tracers (like irqsoff), that just want to
1032 	 * know if the ring buffer has been disabled, but it can handle
1033 	 * races of where it gets disabled but we still do a record.
1034 	 * As the check is in the fast path of the tracers, it is more
1035 	 * important to be fast than accurate.
1036 	 */
1037 	tr->buffer_disabled = 0;
1038 	/* Make the flag seen by readers */
1039 	smp_wmb();
1040 }
1041 
1042 /**
1043  * tracing_on - enable tracing buffers
1044  *
1045  * This function enables tracing buffers that may have been
1046  * disabled with tracing_off.
1047  */
1048 void tracing_on(void)
1049 {
1050 	tracer_tracing_on(&global_trace);
1051 }
1052 EXPORT_SYMBOL_GPL(tracing_on);
1053 
1054 
1055 static __always_inline void
1056 __buffer_unlock_commit(struct trace_buffer *buffer, struct ring_buffer_event *event)
1057 {
1058 	__this_cpu_write(trace_taskinfo_save, true);
1059 
1060 	/* If this is the temp buffer, we need to commit fully */
1061 	if (this_cpu_read(trace_buffered_event) == event) {
1062 		/* Length is in event->array[0] */
1063 		ring_buffer_write(buffer, event->array[0], &event->array[1]);
1064 		/* Release the temp buffer */
1065 		this_cpu_dec(trace_buffered_event_cnt);
1066 		/* ring_buffer_unlock_commit() enables preemption */
1067 		preempt_enable_notrace();
1068 	} else
1069 		ring_buffer_unlock_commit(buffer);
1070 }
1071 
1072 int __trace_array_puts(struct trace_array *tr, unsigned long ip,
1073 		       const char *str, int size)
1074 {
1075 	struct ring_buffer_event *event;
1076 	struct trace_buffer *buffer;
1077 	struct print_entry *entry;
1078 	unsigned int trace_ctx;
1079 	int alloc;
1080 
1081 	if (!(tr->trace_flags & TRACE_ITER_PRINTK))
1082 		return 0;
1083 
1084 	if (unlikely(tracing_selftest_running && tr == &global_trace))
1085 		return 0;
1086 
1087 	if (unlikely(tracing_disabled))
1088 		return 0;
1089 
1090 	alloc = sizeof(*entry) + size + 2; /* possible \n added */
1091 
1092 	trace_ctx = tracing_gen_ctx();
1093 	buffer = tr->array_buffer.buffer;
1094 	ring_buffer_nest_start(buffer);
1095 	event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, alloc,
1096 					    trace_ctx);
1097 	if (!event) {
1098 		size = 0;
1099 		goto out;
1100 	}
1101 
1102 	entry = ring_buffer_event_data(event);
1103 	entry->ip = ip;
1104 
1105 	memcpy(&entry->buf, str, size);
1106 
1107 	/* Add a newline if necessary */
1108 	if (entry->buf[size - 1] != '\n') {
1109 		entry->buf[size] = '\n';
1110 		entry->buf[size + 1] = '\0';
1111 	} else
1112 		entry->buf[size] = '\0';
1113 
1114 	__buffer_unlock_commit(buffer, event);
1115 	ftrace_trace_stack(tr, buffer, trace_ctx, 4, NULL);
1116  out:
1117 	ring_buffer_nest_end(buffer);
1118 	return size;
1119 }
1120 EXPORT_SYMBOL_GPL(__trace_array_puts);
1121 
1122 /**
1123  * __trace_puts - write a constant string into the trace buffer.
1124  * @ip:	   The address of the caller
1125  * @str:   The constant string to write
1126  * @size:  The size of the string.
1127  */
1128 int __trace_puts(unsigned long ip, const char *str, int size)
1129 {
1130 	return __trace_array_puts(printk_trace, ip, str, size);
1131 }
1132 EXPORT_SYMBOL_GPL(__trace_puts);
1133 
1134 /**
1135  * __trace_bputs - write the pointer to a constant string into trace buffer
1136  * @ip:	   The address of the caller
1137  * @str:   The constant string to write to the buffer to
1138  */
1139 int __trace_bputs(unsigned long ip, const char *str)
1140 {
1141 	struct trace_array *tr = READ_ONCE(printk_trace);
1142 	struct ring_buffer_event *event;
1143 	struct trace_buffer *buffer;
1144 	struct bputs_entry *entry;
1145 	unsigned int trace_ctx;
1146 	int size = sizeof(struct bputs_entry);
1147 	int ret = 0;
1148 
1149 	if (!printk_binsafe(tr))
1150 		return __trace_puts(ip, str, strlen(str));
1151 
1152 	if (!(tr->trace_flags & TRACE_ITER_PRINTK))
1153 		return 0;
1154 
1155 	if (unlikely(tracing_selftest_running || tracing_disabled))
1156 		return 0;
1157 
1158 	trace_ctx = tracing_gen_ctx();
1159 	buffer = tr->array_buffer.buffer;
1160 
1161 	ring_buffer_nest_start(buffer);
1162 	event = __trace_buffer_lock_reserve(buffer, TRACE_BPUTS, size,
1163 					    trace_ctx);
1164 	if (!event)
1165 		goto out;
1166 
1167 	entry = ring_buffer_event_data(event);
1168 	entry->ip			= ip;
1169 	entry->str			= str;
1170 
1171 	__buffer_unlock_commit(buffer, event);
1172 	ftrace_trace_stack(tr, buffer, trace_ctx, 4, NULL);
1173 
1174 	ret = 1;
1175  out:
1176 	ring_buffer_nest_end(buffer);
1177 	return ret;
1178 }
1179 EXPORT_SYMBOL_GPL(__trace_bputs);
1180 
1181 #ifdef CONFIG_TRACER_SNAPSHOT
1182 static void tracing_snapshot_instance_cond(struct trace_array *tr,
1183 					   void *cond_data)
1184 {
1185 	struct tracer *tracer = tr->current_trace;
1186 	unsigned long flags;
1187 
1188 	if (in_nmi()) {
1189 		trace_array_puts(tr, "*** SNAPSHOT CALLED FROM NMI CONTEXT ***\n");
1190 		trace_array_puts(tr, "*** snapshot is being ignored        ***\n");
1191 		return;
1192 	}
1193 
1194 	if (!tr->allocated_snapshot) {
1195 		trace_array_puts(tr, "*** SNAPSHOT NOT ALLOCATED ***\n");
1196 		trace_array_puts(tr, "*** stopping trace here!   ***\n");
1197 		tracer_tracing_off(tr);
1198 		return;
1199 	}
1200 
1201 	/* Note, snapshot can not be used when the tracer uses it */
1202 	if (tracer->use_max_tr) {
1203 		trace_array_puts(tr, "*** LATENCY TRACER ACTIVE ***\n");
1204 		trace_array_puts(tr, "*** Can not use snapshot (sorry) ***\n");
1205 		return;
1206 	}
1207 
1208 	if (tr->mapped) {
1209 		trace_array_puts(tr, "*** BUFFER MEMORY MAPPED ***\n");
1210 		trace_array_puts(tr, "*** Can not use snapshot (sorry) ***\n");
1211 		return;
1212 	}
1213 
1214 	local_irq_save(flags);
1215 	update_max_tr(tr, current, smp_processor_id(), cond_data);
1216 	local_irq_restore(flags);
1217 }
1218 
1219 void tracing_snapshot_instance(struct trace_array *tr)
1220 {
1221 	tracing_snapshot_instance_cond(tr, NULL);
1222 }
1223 
1224 /**
1225  * tracing_snapshot - take a snapshot of the current buffer.
1226  *
1227  * This causes a swap between the snapshot buffer and the current live
1228  * tracing buffer. You can use this to take snapshots of the live
1229  * trace when some condition is triggered, but continue to trace.
1230  *
1231  * Note, make sure to allocate the snapshot with either
1232  * a tracing_snapshot_alloc(), or by doing it manually
1233  * with: echo 1 > /sys/kernel/tracing/snapshot
1234  *
1235  * If the snapshot buffer is not allocated, it will stop tracing.
1236  * Basically making a permanent snapshot.
1237  */
1238 void tracing_snapshot(void)
1239 {
1240 	struct trace_array *tr = &global_trace;
1241 
1242 	tracing_snapshot_instance(tr);
1243 }
1244 EXPORT_SYMBOL_GPL(tracing_snapshot);
1245 
1246 /**
1247  * tracing_snapshot_cond - conditionally take a snapshot of the current buffer.
1248  * @tr:		The tracing instance to snapshot
1249  * @cond_data:	The data to be tested conditionally, and possibly saved
1250  *
1251  * This is the same as tracing_snapshot() except that the snapshot is
1252  * conditional - the snapshot will only happen if the
1253  * cond_snapshot.update() implementation receiving the cond_data
1254  * returns true, which means that the trace array's cond_snapshot
1255  * update() operation used the cond_data to determine whether the
1256  * snapshot should be taken, and if it was, presumably saved it along
1257  * with the snapshot.
1258  */
1259 void tracing_snapshot_cond(struct trace_array *tr, void *cond_data)
1260 {
1261 	tracing_snapshot_instance_cond(tr, cond_data);
1262 }
1263 EXPORT_SYMBOL_GPL(tracing_snapshot_cond);
1264 
1265 /**
1266  * tracing_cond_snapshot_data - get the user data associated with a snapshot
1267  * @tr:		The tracing instance
1268  *
1269  * When the user enables a conditional snapshot using
1270  * tracing_snapshot_cond_enable(), the user-defined cond_data is saved
1271  * with the snapshot.  This accessor is used to retrieve it.
1272  *
1273  * Should not be called from cond_snapshot.update(), since it takes
1274  * the tr->max_lock lock, which the code calling
1275  * cond_snapshot.update() has already done.
1276  *
1277  * Returns the cond_data associated with the trace array's snapshot.
1278  */
1279 void *tracing_cond_snapshot_data(struct trace_array *tr)
1280 {
1281 	void *cond_data = NULL;
1282 
1283 	local_irq_disable();
1284 	arch_spin_lock(&tr->max_lock);
1285 
1286 	if (tr->cond_snapshot)
1287 		cond_data = tr->cond_snapshot->cond_data;
1288 
1289 	arch_spin_unlock(&tr->max_lock);
1290 	local_irq_enable();
1291 
1292 	return cond_data;
1293 }
1294 EXPORT_SYMBOL_GPL(tracing_cond_snapshot_data);
1295 
1296 static int resize_buffer_duplicate_size(struct array_buffer *trace_buf,
1297 					struct array_buffer *size_buf, int cpu_id);
1298 static void set_buffer_entries(struct array_buffer *buf, unsigned long val);
1299 
1300 int tracing_alloc_snapshot_instance(struct trace_array *tr)
1301 {
1302 	int order;
1303 	int ret;
1304 
1305 	if (!tr->allocated_snapshot) {
1306 
1307 		/* Make the snapshot buffer have the same order as main buffer */
1308 		order = ring_buffer_subbuf_order_get(tr->array_buffer.buffer);
1309 		ret = ring_buffer_subbuf_order_set(tr->max_buffer.buffer, order);
1310 		if (ret < 0)
1311 			return ret;
1312 
1313 		/* allocate spare buffer */
1314 		ret = resize_buffer_duplicate_size(&tr->max_buffer,
1315 				   &tr->array_buffer, RING_BUFFER_ALL_CPUS);
1316 		if (ret < 0)
1317 			return ret;
1318 
1319 		tr->allocated_snapshot = true;
1320 	}
1321 
1322 	return 0;
1323 }
1324 
1325 static void free_snapshot(struct trace_array *tr)
1326 {
1327 	/*
1328 	 * We don't free the ring buffer. instead, resize it because
1329 	 * The max_tr ring buffer has some state (e.g. ring->clock) and
1330 	 * we want preserve it.
1331 	 */
1332 	ring_buffer_subbuf_order_set(tr->max_buffer.buffer, 0);
1333 	ring_buffer_resize(tr->max_buffer.buffer, 1, RING_BUFFER_ALL_CPUS);
1334 	set_buffer_entries(&tr->max_buffer, 1);
1335 	tracing_reset_online_cpus(&tr->max_buffer);
1336 	tr->allocated_snapshot = false;
1337 }
1338 
1339 static int tracing_arm_snapshot_locked(struct trace_array *tr)
1340 {
1341 	int ret;
1342 
1343 	lockdep_assert_held(&trace_types_lock);
1344 
1345 	spin_lock(&tr->snapshot_trigger_lock);
1346 	if (tr->snapshot == UINT_MAX || tr->mapped) {
1347 		spin_unlock(&tr->snapshot_trigger_lock);
1348 		return -EBUSY;
1349 	}
1350 
1351 	tr->snapshot++;
1352 	spin_unlock(&tr->snapshot_trigger_lock);
1353 
1354 	ret = tracing_alloc_snapshot_instance(tr);
1355 	if (ret) {
1356 		spin_lock(&tr->snapshot_trigger_lock);
1357 		tr->snapshot--;
1358 		spin_unlock(&tr->snapshot_trigger_lock);
1359 	}
1360 
1361 	return ret;
1362 }
1363 
1364 int tracing_arm_snapshot(struct trace_array *tr)
1365 {
1366 	int ret;
1367 
1368 	mutex_lock(&trace_types_lock);
1369 	ret = tracing_arm_snapshot_locked(tr);
1370 	mutex_unlock(&trace_types_lock);
1371 
1372 	return ret;
1373 }
1374 
1375 void tracing_disarm_snapshot(struct trace_array *tr)
1376 {
1377 	spin_lock(&tr->snapshot_trigger_lock);
1378 	if (!WARN_ON(!tr->snapshot))
1379 		tr->snapshot--;
1380 	spin_unlock(&tr->snapshot_trigger_lock);
1381 }
1382 
1383 /**
1384  * tracing_alloc_snapshot - allocate snapshot buffer.
1385  *
1386  * This only allocates the snapshot buffer if it isn't already
1387  * allocated - it doesn't also take a snapshot.
1388  *
1389  * This is meant to be used in cases where the snapshot buffer needs
1390  * to be set up for events that can't sleep but need to be able to
1391  * trigger a snapshot.
1392  */
1393 int tracing_alloc_snapshot(void)
1394 {
1395 	struct trace_array *tr = &global_trace;
1396 	int ret;
1397 
1398 	ret = tracing_alloc_snapshot_instance(tr);
1399 	WARN_ON(ret < 0);
1400 
1401 	return ret;
1402 }
1403 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
1404 
1405 /**
1406  * tracing_snapshot_alloc - allocate and take a snapshot of the current buffer.
1407  *
1408  * This is similar to tracing_snapshot(), but it will allocate the
1409  * snapshot buffer if it isn't already allocated. Use this only
1410  * where it is safe to sleep, as the allocation may sleep.
1411  *
1412  * This causes a swap between the snapshot buffer and the current live
1413  * tracing buffer. You can use this to take snapshots of the live
1414  * trace when some condition is triggered, but continue to trace.
1415  */
1416 void tracing_snapshot_alloc(void)
1417 {
1418 	int ret;
1419 
1420 	ret = tracing_alloc_snapshot();
1421 	if (ret < 0)
1422 		return;
1423 
1424 	tracing_snapshot();
1425 }
1426 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
1427 
1428 /**
1429  * tracing_snapshot_cond_enable - enable conditional snapshot for an instance
1430  * @tr:		The tracing instance
1431  * @cond_data:	User data to associate with the snapshot
1432  * @update:	Implementation of the cond_snapshot update function
1433  *
1434  * Check whether the conditional snapshot for the given instance has
1435  * already been enabled, or if the current tracer is already using a
1436  * snapshot; if so, return -EBUSY, else create a cond_snapshot and
1437  * save the cond_data and update function inside.
1438  *
1439  * Returns 0 if successful, error otherwise.
1440  */
1441 int tracing_snapshot_cond_enable(struct trace_array *tr, void *cond_data,
1442 				 cond_update_fn_t update)
1443 {
1444 	struct cond_snapshot *cond_snapshot __free(kfree) =
1445 		kzalloc(sizeof(*cond_snapshot), GFP_KERNEL);
1446 	int ret;
1447 
1448 	if (!cond_snapshot)
1449 		return -ENOMEM;
1450 
1451 	cond_snapshot->cond_data = cond_data;
1452 	cond_snapshot->update = update;
1453 
1454 	guard(mutex)(&trace_types_lock);
1455 
1456 	if (tr->current_trace->use_max_tr)
1457 		return -EBUSY;
1458 
1459 	/*
1460 	 * The cond_snapshot can only change to NULL without the
1461 	 * trace_types_lock. We don't care if we race with it going
1462 	 * to NULL, but we want to make sure that it's not set to
1463 	 * something other than NULL when we get here, which we can
1464 	 * do safely with only holding the trace_types_lock and not
1465 	 * having to take the max_lock.
1466 	 */
1467 	if (tr->cond_snapshot)
1468 		return -EBUSY;
1469 
1470 	ret = tracing_arm_snapshot_locked(tr);
1471 	if (ret)
1472 		return ret;
1473 
1474 	local_irq_disable();
1475 	arch_spin_lock(&tr->max_lock);
1476 	tr->cond_snapshot = no_free_ptr(cond_snapshot);
1477 	arch_spin_unlock(&tr->max_lock);
1478 	local_irq_enable();
1479 
1480 	return 0;
1481 }
1482 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_enable);
1483 
1484 /**
1485  * tracing_snapshot_cond_disable - disable conditional snapshot for an instance
1486  * @tr:		The tracing instance
1487  *
1488  * Check whether the conditional snapshot for the given instance is
1489  * enabled; if so, free the cond_snapshot associated with it,
1490  * otherwise return -EINVAL.
1491  *
1492  * Returns 0 if successful, error otherwise.
1493  */
1494 int tracing_snapshot_cond_disable(struct trace_array *tr)
1495 {
1496 	int ret = 0;
1497 
1498 	local_irq_disable();
1499 	arch_spin_lock(&tr->max_lock);
1500 
1501 	if (!tr->cond_snapshot)
1502 		ret = -EINVAL;
1503 	else {
1504 		kfree(tr->cond_snapshot);
1505 		tr->cond_snapshot = NULL;
1506 	}
1507 
1508 	arch_spin_unlock(&tr->max_lock);
1509 	local_irq_enable();
1510 
1511 	tracing_disarm_snapshot(tr);
1512 
1513 	return ret;
1514 }
1515 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_disable);
1516 #else
1517 void tracing_snapshot(void)
1518 {
1519 	WARN_ONCE(1, "Snapshot feature not enabled, but internal snapshot used");
1520 }
1521 EXPORT_SYMBOL_GPL(tracing_snapshot);
1522 void tracing_snapshot_cond(struct trace_array *tr, void *cond_data)
1523 {
1524 	WARN_ONCE(1, "Snapshot feature not enabled, but internal conditional snapshot used");
1525 }
1526 EXPORT_SYMBOL_GPL(tracing_snapshot_cond);
1527 int tracing_alloc_snapshot(void)
1528 {
1529 	WARN_ONCE(1, "Snapshot feature not enabled, but snapshot allocation used");
1530 	return -ENODEV;
1531 }
1532 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
1533 void tracing_snapshot_alloc(void)
1534 {
1535 	/* Give warning */
1536 	tracing_snapshot();
1537 }
1538 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
1539 void *tracing_cond_snapshot_data(struct trace_array *tr)
1540 {
1541 	return NULL;
1542 }
1543 EXPORT_SYMBOL_GPL(tracing_cond_snapshot_data);
1544 int tracing_snapshot_cond_enable(struct trace_array *tr, void *cond_data, cond_update_fn_t update)
1545 {
1546 	return -ENODEV;
1547 }
1548 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_enable);
1549 int tracing_snapshot_cond_disable(struct trace_array *tr)
1550 {
1551 	return false;
1552 }
1553 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_disable);
1554 #define free_snapshot(tr)	do { } while (0)
1555 #define tracing_arm_snapshot_locked(tr) ({ -EBUSY; })
1556 #endif /* CONFIG_TRACER_SNAPSHOT */
1557 
1558 void tracer_tracing_off(struct trace_array *tr)
1559 {
1560 	if (tr->array_buffer.buffer)
1561 		ring_buffer_record_off(tr->array_buffer.buffer);
1562 	/*
1563 	 * This flag is looked at when buffers haven't been allocated
1564 	 * yet, or by some tracers (like irqsoff), that just want to
1565 	 * know if the ring buffer has been disabled, but it can handle
1566 	 * races of where it gets disabled but we still do a record.
1567 	 * As the check is in the fast path of the tracers, it is more
1568 	 * important to be fast than accurate.
1569 	 */
1570 	tr->buffer_disabled = 1;
1571 	/* Make the flag seen by readers */
1572 	smp_wmb();
1573 }
1574 
1575 /**
1576  * tracing_off - turn off tracing buffers
1577  *
1578  * This function stops the tracing buffers from recording data.
1579  * It does not disable any overhead the tracers themselves may
1580  * be causing. This function simply causes all recording to
1581  * the ring buffers to fail.
1582  */
1583 void tracing_off(void)
1584 {
1585 	tracer_tracing_off(&global_trace);
1586 }
1587 EXPORT_SYMBOL_GPL(tracing_off);
1588 
1589 void disable_trace_on_warning(void)
1590 {
1591 	if (__disable_trace_on_warning) {
1592 		trace_array_printk_buf(global_trace.array_buffer.buffer, _THIS_IP_,
1593 			"Disabling tracing due to warning\n");
1594 		tracing_off();
1595 	}
1596 }
1597 
1598 /**
1599  * tracer_tracing_is_on - show real state of ring buffer enabled
1600  * @tr : the trace array to know if ring buffer is enabled
1601  *
1602  * Shows real state of the ring buffer if it is enabled or not.
1603  */
1604 bool tracer_tracing_is_on(struct trace_array *tr)
1605 {
1606 	if (tr->array_buffer.buffer)
1607 		return ring_buffer_record_is_set_on(tr->array_buffer.buffer);
1608 	return !tr->buffer_disabled;
1609 }
1610 
1611 /**
1612  * tracing_is_on - show state of ring buffers enabled
1613  */
1614 int tracing_is_on(void)
1615 {
1616 	return tracer_tracing_is_on(&global_trace);
1617 }
1618 EXPORT_SYMBOL_GPL(tracing_is_on);
1619 
1620 static int __init set_buf_size(char *str)
1621 {
1622 	unsigned long buf_size;
1623 
1624 	if (!str)
1625 		return 0;
1626 	buf_size = memparse(str, &str);
1627 	/*
1628 	 * nr_entries can not be zero and the startup
1629 	 * tests require some buffer space. Therefore
1630 	 * ensure we have at least 4096 bytes of buffer.
1631 	 */
1632 	trace_buf_size = max(4096UL, buf_size);
1633 	return 1;
1634 }
1635 __setup("trace_buf_size=", set_buf_size);
1636 
1637 static int __init set_tracing_thresh(char *str)
1638 {
1639 	unsigned long threshold;
1640 	int ret;
1641 
1642 	if (!str)
1643 		return 0;
1644 	ret = kstrtoul(str, 0, &threshold);
1645 	if (ret < 0)
1646 		return 0;
1647 	tracing_thresh = threshold * 1000;
1648 	return 1;
1649 }
1650 __setup("tracing_thresh=", set_tracing_thresh);
1651 
1652 unsigned long nsecs_to_usecs(unsigned long nsecs)
1653 {
1654 	return nsecs / 1000;
1655 }
1656 
1657 /*
1658  * TRACE_FLAGS is defined as a tuple matching bit masks with strings.
1659  * It uses C(a, b) where 'a' is the eval (enum) name and 'b' is the string that
1660  * matches it. By defining "C(a, b) b", TRACE_FLAGS becomes a list
1661  * of strings in the order that the evals (enum) were defined.
1662  */
1663 #undef C
1664 #define C(a, b) b
1665 
1666 /* These must match the bit positions in trace_iterator_flags */
1667 static const char *trace_options[] = {
1668 	TRACE_FLAGS
1669 	NULL
1670 };
1671 
1672 static struct {
1673 	u64 (*func)(void);
1674 	const char *name;
1675 	int in_ns;		/* is this clock in nanoseconds? */
1676 } trace_clocks[] = {
1677 	{ trace_clock_local,		"local",	1 },
1678 	{ trace_clock_global,		"global",	1 },
1679 	{ trace_clock_counter,		"counter",	0 },
1680 	{ trace_clock_jiffies,		"uptime",	0 },
1681 	{ trace_clock,			"perf",		1 },
1682 	{ ktime_get_mono_fast_ns,	"mono",		1 },
1683 	{ ktime_get_raw_fast_ns,	"mono_raw",	1 },
1684 	{ ktime_get_boot_fast_ns,	"boot",		1 },
1685 	{ ktime_get_tai_fast_ns,	"tai",		1 },
1686 	ARCH_TRACE_CLOCKS
1687 };
1688 
1689 bool trace_clock_in_ns(struct trace_array *tr)
1690 {
1691 	if (trace_clocks[tr->clock_id].in_ns)
1692 		return true;
1693 
1694 	return false;
1695 }
1696 
1697 /*
1698  * trace_parser_get_init - gets the buffer for trace parser
1699  */
1700 int trace_parser_get_init(struct trace_parser *parser, int size)
1701 {
1702 	memset(parser, 0, sizeof(*parser));
1703 
1704 	parser->buffer = kmalloc(size, GFP_KERNEL);
1705 	if (!parser->buffer)
1706 		return 1;
1707 
1708 	parser->size = size;
1709 	return 0;
1710 }
1711 
1712 /*
1713  * trace_parser_put - frees the buffer for trace parser
1714  */
1715 void trace_parser_put(struct trace_parser *parser)
1716 {
1717 	kfree(parser->buffer);
1718 	parser->buffer = NULL;
1719 }
1720 
1721 /*
1722  * trace_get_user - reads the user input string separated by  space
1723  * (matched by isspace(ch))
1724  *
1725  * For each string found the 'struct trace_parser' is updated,
1726  * and the function returns.
1727  *
1728  * Returns number of bytes read.
1729  *
1730  * See kernel/trace/trace.h for 'struct trace_parser' details.
1731  */
1732 int trace_get_user(struct trace_parser *parser, const char __user *ubuf,
1733 	size_t cnt, loff_t *ppos)
1734 {
1735 	char ch;
1736 	size_t read = 0;
1737 	ssize_t ret;
1738 
1739 	if (!*ppos)
1740 		trace_parser_clear(parser);
1741 
1742 	ret = get_user(ch, ubuf++);
1743 	if (ret)
1744 		goto out;
1745 
1746 	read++;
1747 	cnt--;
1748 
1749 	/*
1750 	 * The parser is not finished with the last write,
1751 	 * continue reading the user input without skipping spaces.
1752 	 */
1753 	if (!parser->cont) {
1754 		/* skip white space */
1755 		while (cnt && isspace(ch)) {
1756 			ret = get_user(ch, ubuf++);
1757 			if (ret)
1758 				goto out;
1759 			read++;
1760 			cnt--;
1761 		}
1762 
1763 		parser->idx = 0;
1764 
1765 		/* only spaces were written */
1766 		if (isspace(ch) || !ch) {
1767 			*ppos += read;
1768 			ret = read;
1769 			goto out;
1770 		}
1771 	}
1772 
1773 	/* read the non-space input */
1774 	while (cnt && !isspace(ch) && ch) {
1775 		if (parser->idx < parser->size - 1)
1776 			parser->buffer[parser->idx++] = ch;
1777 		else {
1778 			ret = -EINVAL;
1779 			goto out;
1780 		}
1781 		ret = get_user(ch, ubuf++);
1782 		if (ret)
1783 			goto out;
1784 		read++;
1785 		cnt--;
1786 	}
1787 
1788 	/* We either got finished input or we have to wait for another call. */
1789 	if (isspace(ch) || !ch) {
1790 		parser->buffer[parser->idx] = 0;
1791 		parser->cont = false;
1792 	} else if (parser->idx < parser->size - 1) {
1793 		parser->cont = true;
1794 		parser->buffer[parser->idx++] = ch;
1795 		/* Make sure the parsed string always terminates with '\0'. */
1796 		parser->buffer[parser->idx] = 0;
1797 	} else {
1798 		ret = -EINVAL;
1799 		goto out;
1800 	}
1801 
1802 	*ppos += read;
1803 	ret = read;
1804 
1805 out:
1806 	return ret;
1807 }
1808 
1809 /* TODO add a seq_buf_to_buffer() */
1810 static ssize_t trace_seq_to_buffer(struct trace_seq *s, void *buf, size_t cnt)
1811 {
1812 	int len;
1813 
1814 	if (trace_seq_used(s) <= s->readpos)
1815 		return -EBUSY;
1816 
1817 	len = trace_seq_used(s) - s->readpos;
1818 	if (cnt > len)
1819 		cnt = len;
1820 	memcpy(buf, s->buffer + s->readpos, cnt);
1821 
1822 	s->readpos += cnt;
1823 	return cnt;
1824 }
1825 
1826 unsigned long __read_mostly	tracing_thresh;
1827 
1828 #ifdef CONFIG_TRACER_MAX_TRACE
1829 static const struct file_operations tracing_max_lat_fops;
1830 
1831 #ifdef LATENCY_FS_NOTIFY
1832 
1833 static struct workqueue_struct *fsnotify_wq;
1834 
1835 static void latency_fsnotify_workfn(struct work_struct *work)
1836 {
1837 	struct trace_array *tr = container_of(work, struct trace_array,
1838 					      fsnotify_work);
1839 	fsnotify_inode(tr->d_max_latency->d_inode, FS_MODIFY);
1840 }
1841 
1842 static void latency_fsnotify_workfn_irq(struct irq_work *iwork)
1843 {
1844 	struct trace_array *tr = container_of(iwork, struct trace_array,
1845 					      fsnotify_irqwork);
1846 	queue_work(fsnotify_wq, &tr->fsnotify_work);
1847 }
1848 
1849 static void trace_create_maxlat_file(struct trace_array *tr,
1850 				     struct dentry *d_tracer)
1851 {
1852 	INIT_WORK(&tr->fsnotify_work, latency_fsnotify_workfn);
1853 	init_irq_work(&tr->fsnotify_irqwork, latency_fsnotify_workfn_irq);
1854 	tr->d_max_latency = trace_create_file("tracing_max_latency",
1855 					      TRACE_MODE_WRITE,
1856 					      d_tracer, tr,
1857 					      &tracing_max_lat_fops);
1858 }
1859 
1860 __init static int latency_fsnotify_init(void)
1861 {
1862 	fsnotify_wq = alloc_workqueue("tr_max_lat_wq",
1863 				      WQ_UNBOUND | WQ_HIGHPRI, 0);
1864 	if (!fsnotify_wq) {
1865 		pr_err("Unable to allocate tr_max_lat_wq\n");
1866 		return -ENOMEM;
1867 	}
1868 	return 0;
1869 }
1870 
1871 late_initcall_sync(latency_fsnotify_init);
1872 
1873 void latency_fsnotify(struct trace_array *tr)
1874 {
1875 	if (!fsnotify_wq)
1876 		return;
1877 	/*
1878 	 * We cannot call queue_work(&tr->fsnotify_work) from here because it's
1879 	 * possible that we are called from __schedule() or do_idle(), which
1880 	 * could cause a deadlock.
1881 	 */
1882 	irq_work_queue(&tr->fsnotify_irqwork);
1883 }
1884 
1885 #else /* !LATENCY_FS_NOTIFY */
1886 
1887 #define trace_create_maxlat_file(tr, d_tracer)				\
1888 	trace_create_file("tracing_max_latency", TRACE_MODE_WRITE,	\
1889 			  d_tracer, tr, &tracing_max_lat_fops)
1890 
1891 #endif
1892 
1893 /*
1894  * Copy the new maximum trace into the separate maximum-trace
1895  * structure. (this way the maximum trace is permanently saved,
1896  * for later retrieval via /sys/kernel/tracing/tracing_max_latency)
1897  */
1898 static void
1899 __update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
1900 {
1901 	struct array_buffer *trace_buf = &tr->array_buffer;
1902 	struct array_buffer *max_buf = &tr->max_buffer;
1903 	struct trace_array_cpu *data = per_cpu_ptr(trace_buf->data, cpu);
1904 	struct trace_array_cpu *max_data = per_cpu_ptr(max_buf->data, cpu);
1905 
1906 	max_buf->cpu = cpu;
1907 	max_buf->time_start = data->preempt_timestamp;
1908 
1909 	max_data->saved_latency = tr->max_latency;
1910 	max_data->critical_start = data->critical_start;
1911 	max_data->critical_end = data->critical_end;
1912 
1913 	strscpy(max_data->comm, tsk->comm);
1914 	max_data->pid = tsk->pid;
1915 	/*
1916 	 * If tsk == current, then use current_uid(), as that does not use
1917 	 * RCU. The irq tracer can be called out of RCU scope.
1918 	 */
1919 	if (tsk == current)
1920 		max_data->uid = current_uid();
1921 	else
1922 		max_data->uid = task_uid(tsk);
1923 
1924 	max_data->nice = tsk->static_prio - 20 - MAX_RT_PRIO;
1925 	max_data->policy = tsk->policy;
1926 	max_data->rt_priority = tsk->rt_priority;
1927 
1928 	/* record this tasks comm */
1929 	tracing_record_cmdline(tsk);
1930 	latency_fsnotify(tr);
1931 }
1932 
1933 /**
1934  * update_max_tr - snapshot all trace buffers from global_trace to max_tr
1935  * @tr: tracer
1936  * @tsk: the task with the latency
1937  * @cpu: The cpu that initiated the trace.
1938  * @cond_data: User data associated with a conditional snapshot
1939  *
1940  * Flip the buffers between the @tr and the max_tr and record information
1941  * about which task was the cause of this latency.
1942  */
1943 void
1944 update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu,
1945 	      void *cond_data)
1946 {
1947 	if (tr->stop_count)
1948 		return;
1949 
1950 	WARN_ON_ONCE(!irqs_disabled());
1951 
1952 	if (!tr->allocated_snapshot) {
1953 		/* Only the nop tracer should hit this when disabling */
1954 		WARN_ON_ONCE(tr->current_trace != &nop_trace);
1955 		return;
1956 	}
1957 
1958 	arch_spin_lock(&tr->max_lock);
1959 
1960 	/* Inherit the recordable setting from array_buffer */
1961 	if (ring_buffer_record_is_set_on(tr->array_buffer.buffer))
1962 		ring_buffer_record_on(tr->max_buffer.buffer);
1963 	else
1964 		ring_buffer_record_off(tr->max_buffer.buffer);
1965 
1966 #ifdef CONFIG_TRACER_SNAPSHOT
1967 	if (tr->cond_snapshot && !tr->cond_snapshot->update(tr, cond_data)) {
1968 		arch_spin_unlock(&tr->max_lock);
1969 		return;
1970 	}
1971 #endif
1972 	swap(tr->array_buffer.buffer, tr->max_buffer.buffer);
1973 
1974 	__update_max_tr(tr, tsk, cpu);
1975 
1976 	arch_spin_unlock(&tr->max_lock);
1977 
1978 	/* Any waiters on the old snapshot buffer need to wake up */
1979 	ring_buffer_wake_waiters(tr->array_buffer.buffer, RING_BUFFER_ALL_CPUS);
1980 }
1981 
1982 /**
1983  * update_max_tr_single - only copy one trace over, and reset the rest
1984  * @tr: tracer
1985  * @tsk: task with the latency
1986  * @cpu: the cpu of the buffer to copy.
1987  *
1988  * Flip the trace of a single CPU buffer between the @tr and the max_tr.
1989  */
1990 void
1991 update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu)
1992 {
1993 	int ret;
1994 
1995 	if (tr->stop_count)
1996 		return;
1997 
1998 	WARN_ON_ONCE(!irqs_disabled());
1999 	if (!tr->allocated_snapshot) {
2000 		/* Only the nop tracer should hit this when disabling */
2001 		WARN_ON_ONCE(tr->current_trace != &nop_trace);
2002 		return;
2003 	}
2004 
2005 	arch_spin_lock(&tr->max_lock);
2006 
2007 	ret = ring_buffer_swap_cpu(tr->max_buffer.buffer, tr->array_buffer.buffer, cpu);
2008 
2009 	if (ret == -EBUSY) {
2010 		/*
2011 		 * We failed to swap the buffer due to a commit taking
2012 		 * place on this CPU. We fail to record, but we reset
2013 		 * the max trace buffer (no one writes directly to it)
2014 		 * and flag that it failed.
2015 		 * Another reason is resize is in progress.
2016 		 */
2017 		trace_array_printk_buf(tr->max_buffer.buffer, _THIS_IP_,
2018 			"Failed to swap buffers due to commit or resize in progress\n");
2019 	}
2020 
2021 	WARN_ON_ONCE(ret && ret != -EAGAIN && ret != -EBUSY);
2022 
2023 	__update_max_tr(tr, tsk, cpu);
2024 	arch_spin_unlock(&tr->max_lock);
2025 }
2026 
2027 #endif /* CONFIG_TRACER_MAX_TRACE */
2028 
2029 struct pipe_wait {
2030 	struct trace_iterator		*iter;
2031 	int				wait_index;
2032 };
2033 
2034 static bool wait_pipe_cond(void *data)
2035 {
2036 	struct pipe_wait *pwait = data;
2037 	struct trace_iterator *iter = pwait->iter;
2038 
2039 	if (atomic_read_acquire(&iter->wait_index) != pwait->wait_index)
2040 		return true;
2041 
2042 	return iter->closed;
2043 }
2044 
2045 static int wait_on_pipe(struct trace_iterator *iter, int full)
2046 {
2047 	struct pipe_wait pwait;
2048 	int ret;
2049 
2050 	/* Iterators are static, they should be filled or empty */
2051 	if (trace_buffer_iter(iter, iter->cpu_file))
2052 		return 0;
2053 
2054 	pwait.wait_index = atomic_read_acquire(&iter->wait_index);
2055 	pwait.iter = iter;
2056 
2057 	ret = ring_buffer_wait(iter->array_buffer->buffer, iter->cpu_file, full,
2058 			       wait_pipe_cond, &pwait);
2059 
2060 #ifdef CONFIG_TRACER_MAX_TRACE
2061 	/*
2062 	 * Make sure this is still the snapshot buffer, as if a snapshot were
2063 	 * to happen, this would now be the main buffer.
2064 	 */
2065 	if (iter->snapshot)
2066 		iter->array_buffer = &iter->tr->max_buffer;
2067 #endif
2068 	return ret;
2069 }
2070 
2071 #ifdef CONFIG_FTRACE_STARTUP_TEST
2072 static bool selftests_can_run;
2073 
2074 struct trace_selftests {
2075 	struct list_head		list;
2076 	struct tracer			*type;
2077 };
2078 
2079 static LIST_HEAD(postponed_selftests);
2080 
2081 static int save_selftest(struct tracer *type)
2082 {
2083 	struct trace_selftests *selftest;
2084 
2085 	selftest = kmalloc(sizeof(*selftest), GFP_KERNEL);
2086 	if (!selftest)
2087 		return -ENOMEM;
2088 
2089 	selftest->type = type;
2090 	list_add(&selftest->list, &postponed_selftests);
2091 	return 0;
2092 }
2093 
2094 static int run_tracer_selftest(struct tracer *type)
2095 {
2096 	struct trace_array *tr = &global_trace;
2097 	struct tracer *saved_tracer = tr->current_trace;
2098 	int ret;
2099 
2100 	if (!type->selftest || tracing_selftest_disabled)
2101 		return 0;
2102 
2103 	/*
2104 	 * If a tracer registers early in boot up (before scheduling is
2105 	 * initialized and such), then do not run its selftests yet.
2106 	 * Instead, run it a little later in the boot process.
2107 	 */
2108 	if (!selftests_can_run)
2109 		return save_selftest(type);
2110 
2111 	if (!tracing_is_on()) {
2112 		pr_warn("Selftest for tracer %s skipped due to tracing disabled\n",
2113 			type->name);
2114 		return 0;
2115 	}
2116 
2117 	/*
2118 	 * Run a selftest on this tracer.
2119 	 * Here we reset the trace buffer, and set the current
2120 	 * tracer to be this tracer. The tracer can then run some
2121 	 * internal tracing to verify that everything is in order.
2122 	 * If we fail, we do not register this tracer.
2123 	 */
2124 	tracing_reset_online_cpus(&tr->array_buffer);
2125 
2126 	tr->current_trace = type;
2127 
2128 #ifdef CONFIG_TRACER_MAX_TRACE
2129 	if (type->use_max_tr) {
2130 		/* If we expanded the buffers, make sure the max is expanded too */
2131 		if (tr->ring_buffer_expanded)
2132 			ring_buffer_resize(tr->max_buffer.buffer, trace_buf_size,
2133 					   RING_BUFFER_ALL_CPUS);
2134 		tr->allocated_snapshot = true;
2135 	}
2136 #endif
2137 
2138 	/* the test is responsible for initializing and enabling */
2139 	pr_info("Testing tracer %s: ", type->name);
2140 	ret = type->selftest(type, tr);
2141 	/* the test is responsible for resetting too */
2142 	tr->current_trace = saved_tracer;
2143 	if (ret) {
2144 		printk(KERN_CONT "FAILED!\n");
2145 		/* Add the warning after printing 'FAILED' */
2146 		WARN_ON(1);
2147 		return -1;
2148 	}
2149 	/* Only reset on passing, to avoid touching corrupted buffers */
2150 	tracing_reset_online_cpus(&tr->array_buffer);
2151 
2152 #ifdef CONFIG_TRACER_MAX_TRACE
2153 	if (type->use_max_tr) {
2154 		tr->allocated_snapshot = false;
2155 
2156 		/* Shrink the max buffer again */
2157 		if (tr->ring_buffer_expanded)
2158 			ring_buffer_resize(tr->max_buffer.buffer, 1,
2159 					   RING_BUFFER_ALL_CPUS);
2160 	}
2161 #endif
2162 
2163 	printk(KERN_CONT "PASSED\n");
2164 	return 0;
2165 }
2166 
2167 static int do_run_tracer_selftest(struct tracer *type)
2168 {
2169 	int ret;
2170 
2171 	/*
2172 	 * Tests can take a long time, especially if they are run one after the
2173 	 * other, as does happen during bootup when all the tracers are
2174 	 * registered. This could cause the soft lockup watchdog to trigger.
2175 	 */
2176 	cond_resched();
2177 
2178 	tracing_selftest_running = true;
2179 	ret = run_tracer_selftest(type);
2180 	tracing_selftest_running = false;
2181 
2182 	return ret;
2183 }
2184 
2185 static __init int init_trace_selftests(void)
2186 {
2187 	struct trace_selftests *p, *n;
2188 	struct tracer *t, **last;
2189 	int ret;
2190 
2191 	selftests_can_run = true;
2192 
2193 	guard(mutex)(&trace_types_lock);
2194 
2195 	if (list_empty(&postponed_selftests))
2196 		return 0;
2197 
2198 	pr_info("Running postponed tracer tests:\n");
2199 
2200 	tracing_selftest_running = true;
2201 	list_for_each_entry_safe(p, n, &postponed_selftests, list) {
2202 		/* This loop can take minutes when sanitizers are enabled, so
2203 		 * lets make sure we allow RCU processing.
2204 		 */
2205 		cond_resched();
2206 		ret = run_tracer_selftest(p->type);
2207 		/* If the test fails, then warn and remove from available_tracers */
2208 		if (ret < 0) {
2209 			WARN(1, "tracer: %s failed selftest, disabling\n",
2210 			     p->type->name);
2211 			last = &trace_types;
2212 			for (t = trace_types; t; t = t->next) {
2213 				if (t == p->type) {
2214 					*last = t->next;
2215 					break;
2216 				}
2217 				last = &t->next;
2218 			}
2219 		}
2220 		list_del(&p->list);
2221 		kfree(p);
2222 	}
2223 	tracing_selftest_running = false;
2224 
2225 	return 0;
2226 }
2227 core_initcall(init_trace_selftests);
2228 #else
2229 static inline int do_run_tracer_selftest(struct tracer *type)
2230 {
2231 	return 0;
2232 }
2233 #endif /* CONFIG_FTRACE_STARTUP_TEST */
2234 
2235 static void add_tracer_options(struct trace_array *tr, struct tracer *t);
2236 
2237 static void __init apply_trace_boot_options(void);
2238 
2239 /**
2240  * register_tracer - register a tracer with the ftrace system.
2241  * @type: the plugin for the tracer
2242  *
2243  * Register a new plugin tracer.
2244  */
2245 int __init register_tracer(struct tracer *type)
2246 {
2247 	struct tracer *t;
2248 	int ret = 0;
2249 
2250 	if (!type->name) {
2251 		pr_info("Tracer must have a name\n");
2252 		return -1;
2253 	}
2254 
2255 	if (strlen(type->name) >= MAX_TRACER_SIZE) {
2256 		pr_info("Tracer has a name longer than %d\n", MAX_TRACER_SIZE);
2257 		return -1;
2258 	}
2259 
2260 	if (security_locked_down(LOCKDOWN_TRACEFS)) {
2261 		pr_warn("Can not register tracer %s due to lockdown\n",
2262 			   type->name);
2263 		return -EPERM;
2264 	}
2265 
2266 	mutex_lock(&trace_types_lock);
2267 
2268 	for (t = trace_types; t; t = t->next) {
2269 		if (strcmp(type->name, t->name) == 0) {
2270 			/* already found */
2271 			pr_info("Tracer %s already registered\n",
2272 				type->name);
2273 			ret = -1;
2274 			goto out;
2275 		}
2276 	}
2277 
2278 	if (!type->set_flag)
2279 		type->set_flag = &dummy_set_flag;
2280 	if (!type->flags) {
2281 		/*allocate a dummy tracer_flags*/
2282 		type->flags = kmalloc(sizeof(*type->flags), GFP_KERNEL);
2283 		if (!type->flags) {
2284 			ret = -ENOMEM;
2285 			goto out;
2286 		}
2287 		type->flags->val = 0;
2288 		type->flags->opts = dummy_tracer_opt;
2289 	} else
2290 		if (!type->flags->opts)
2291 			type->flags->opts = dummy_tracer_opt;
2292 
2293 	/* store the tracer for __set_tracer_option */
2294 	type->flags->trace = type;
2295 
2296 	ret = do_run_tracer_selftest(type);
2297 	if (ret < 0)
2298 		goto out;
2299 
2300 	type->next = trace_types;
2301 	trace_types = type;
2302 	add_tracer_options(&global_trace, type);
2303 
2304  out:
2305 	mutex_unlock(&trace_types_lock);
2306 
2307 	if (ret || !default_bootup_tracer)
2308 		goto out_unlock;
2309 
2310 	if (strncmp(default_bootup_tracer, type->name, MAX_TRACER_SIZE))
2311 		goto out_unlock;
2312 
2313 	printk(KERN_INFO "Starting tracer '%s'\n", type->name);
2314 	/* Do we want this tracer to start on bootup? */
2315 	tracing_set_tracer(&global_trace, type->name);
2316 	default_bootup_tracer = NULL;
2317 
2318 	apply_trace_boot_options();
2319 
2320 	/* disable other selftests, since this will break it. */
2321 	disable_tracing_selftest("running a tracer");
2322 
2323  out_unlock:
2324 	return ret;
2325 }
2326 
2327 static void tracing_reset_cpu(struct array_buffer *buf, int cpu)
2328 {
2329 	struct trace_buffer *buffer = buf->buffer;
2330 
2331 	if (!buffer)
2332 		return;
2333 
2334 	ring_buffer_record_disable(buffer);
2335 
2336 	/* Make sure all commits have finished */
2337 	synchronize_rcu();
2338 	ring_buffer_reset_cpu(buffer, cpu);
2339 
2340 	ring_buffer_record_enable(buffer);
2341 }
2342 
2343 void tracing_reset_online_cpus(struct array_buffer *buf)
2344 {
2345 	struct trace_buffer *buffer = buf->buffer;
2346 
2347 	if (!buffer)
2348 		return;
2349 
2350 	ring_buffer_record_disable(buffer);
2351 
2352 	/* Make sure all commits have finished */
2353 	synchronize_rcu();
2354 
2355 	buf->time_start = buffer_ftrace_now(buf, buf->cpu);
2356 
2357 	ring_buffer_reset_online_cpus(buffer);
2358 
2359 	ring_buffer_record_enable(buffer);
2360 }
2361 
2362 static void tracing_reset_all_cpus(struct array_buffer *buf)
2363 {
2364 	struct trace_buffer *buffer = buf->buffer;
2365 
2366 	if (!buffer)
2367 		return;
2368 
2369 	ring_buffer_record_disable(buffer);
2370 
2371 	/* Make sure all commits have finished */
2372 	synchronize_rcu();
2373 
2374 	buf->time_start = buffer_ftrace_now(buf, buf->cpu);
2375 
2376 	ring_buffer_reset(buffer);
2377 
2378 	ring_buffer_record_enable(buffer);
2379 }
2380 
2381 /* Must have trace_types_lock held */
2382 void tracing_reset_all_online_cpus_unlocked(void)
2383 {
2384 	struct trace_array *tr;
2385 
2386 	lockdep_assert_held(&trace_types_lock);
2387 
2388 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
2389 		if (!tr->clear_trace)
2390 			continue;
2391 		tr->clear_trace = false;
2392 		tracing_reset_online_cpus(&tr->array_buffer);
2393 #ifdef CONFIG_TRACER_MAX_TRACE
2394 		tracing_reset_online_cpus(&tr->max_buffer);
2395 #endif
2396 	}
2397 }
2398 
2399 void tracing_reset_all_online_cpus(void)
2400 {
2401 	mutex_lock(&trace_types_lock);
2402 	tracing_reset_all_online_cpus_unlocked();
2403 	mutex_unlock(&trace_types_lock);
2404 }
2405 
2406 int is_tracing_stopped(void)
2407 {
2408 	return global_trace.stop_count;
2409 }
2410 
2411 static void tracing_start_tr(struct trace_array *tr)
2412 {
2413 	struct trace_buffer *buffer;
2414 	unsigned long flags;
2415 
2416 	if (tracing_disabled)
2417 		return;
2418 
2419 	raw_spin_lock_irqsave(&tr->start_lock, flags);
2420 	if (--tr->stop_count) {
2421 		if (WARN_ON_ONCE(tr->stop_count < 0)) {
2422 			/* Someone screwed up their debugging */
2423 			tr->stop_count = 0;
2424 		}
2425 		goto out;
2426 	}
2427 
2428 	/* Prevent the buffers from switching */
2429 	arch_spin_lock(&tr->max_lock);
2430 
2431 	buffer = tr->array_buffer.buffer;
2432 	if (buffer)
2433 		ring_buffer_record_enable(buffer);
2434 
2435 #ifdef CONFIG_TRACER_MAX_TRACE
2436 	buffer = tr->max_buffer.buffer;
2437 	if (buffer)
2438 		ring_buffer_record_enable(buffer);
2439 #endif
2440 
2441 	arch_spin_unlock(&tr->max_lock);
2442 
2443  out:
2444 	raw_spin_unlock_irqrestore(&tr->start_lock, flags);
2445 }
2446 
2447 /**
2448  * tracing_start - quick start of the tracer
2449  *
2450  * If tracing is enabled but was stopped by tracing_stop,
2451  * this will start the tracer back up.
2452  */
2453 void tracing_start(void)
2454 
2455 {
2456 	return tracing_start_tr(&global_trace);
2457 }
2458 
2459 static void tracing_stop_tr(struct trace_array *tr)
2460 {
2461 	struct trace_buffer *buffer;
2462 	unsigned long flags;
2463 
2464 	raw_spin_lock_irqsave(&tr->start_lock, flags);
2465 	if (tr->stop_count++)
2466 		goto out;
2467 
2468 	/* Prevent the buffers from switching */
2469 	arch_spin_lock(&tr->max_lock);
2470 
2471 	buffer = tr->array_buffer.buffer;
2472 	if (buffer)
2473 		ring_buffer_record_disable(buffer);
2474 
2475 #ifdef CONFIG_TRACER_MAX_TRACE
2476 	buffer = tr->max_buffer.buffer;
2477 	if (buffer)
2478 		ring_buffer_record_disable(buffer);
2479 #endif
2480 
2481 	arch_spin_unlock(&tr->max_lock);
2482 
2483  out:
2484 	raw_spin_unlock_irqrestore(&tr->start_lock, flags);
2485 }
2486 
2487 /**
2488  * tracing_stop - quick stop of the tracer
2489  *
2490  * Light weight way to stop tracing. Use in conjunction with
2491  * tracing_start.
2492  */
2493 void tracing_stop(void)
2494 {
2495 	return tracing_stop_tr(&global_trace);
2496 }
2497 
2498 /*
2499  * Several functions return TRACE_TYPE_PARTIAL_LINE if the trace_seq
2500  * overflowed, and TRACE_TYPE_HANDLED otherwise. This helper function
2501  * simplifies those functions and keeps them in sync.
2502  */
2503 enum print_line_t trace_handle_return(struct trace_seq *s)
2504 {
2505 	return trace_seq_has_overflowed(s) ?
2506 		TRACE_TYPE_PARTIAL_LINE : TRACE_TYPE_HANDLED;
2507 }
2508 EXPORT_SYMBOL_GPL(trace_handle_return);
2509 
2510 static unsigned short migration_disable_value(void)
2511 {
2512 #if defined(CONFIG_SMP)
2513 	return current->migration_disabled;
2514 #else
2515 	return 0;
2516 #endif
2517 }
2518 
2519 unsigned int tracing_gen_ctx_irq_test(unsigned int irqs_status)
2520 {
2521 	unsigned int trace_flags = irqs_status;
2522 	unsigned int pc;
2523 
2524 	pc = preempt_count();
2525 
2526 	if (pc & NMI_MASK)
2527 		trace_flags |= TRACE_FLAG_NMI;
2528 	if (pc & HARDIRQ_MASK)
2529 		trace_flags |= TRACE_FLAG_HARDIRQ;
2530 	if (in_serving_softirq())
2531 		trace_flags |= TRACE_FLAG_SOFTIRQ;
2532 	if (softirq_count() >> (SOFTIRQ_SHIFT + 1))
2533 		trace_flags |= TRACE_FLAG_BH_OFF;
2534 
2535 	if (tif_need_resched())
2536 		trace_flags |= TRACE_FLAG_NEED_RESCHED;
2537 	if (test_preempt_need_resched())
2538 		trace_flags |= TRACE_FLAG_PREEMPT_RESCHED;
2539 	if (IS_ENABLED(CONFIG_ARCH_HAS_PREEMPT_LAZY) && tif_test_bit(TIF_NEED_RESCHED_LAZY))
2540 		trace_flags |= TRACE_FLAG_NEED_RESCHED_LAZY;
2541 	return (trace_flags << 16) | (min_t(unsigned int, pc & 0xff, 0xf)) |
2542 		(min_t(unsigned int, migration_disable_value(), 0xf)) << 4;
2543 }
2544 
2545 struct ring_buffer_event *
2546 trace_buffer_lock_reserve(struct trace_buffer *buffer,
2547 			  int type,
2548 			  unsigned long len,
2549 			  unsigned int trace_ctx)
2550 {
2551 	return __trace_buffer_lock_reserve(buffer, type, len, trace_ctx);
2552 }
2553 
2554 DEFINE_PER_CPU(struct ring_buffer_event *, trace_buffered_event);
2555 DEFINE_PER_CPU(int, trace_buffered_event_cnt);
2556 static int trace_buffered_event_ref;
2557 
2558 /**
2559  * trace_buffered_event_enable - enable buffering events
2560  *
2561  * When events are being filtered, it is quicker to use a temporary
2562  * buffer to write the event data into if there's a likely chance
2563  * that it will not be committed. The discard of the ring buffer
2564  * is not as fast as committing, and is much slower than copying
2565  * a commit.
2566  *
2567  * When an event is to be filtered, allocate per cpu buffers to
2568  * write the event data into, and if the event is filtered and discarded
2569  * it is simply dropped, otherwise, the entire data is to be committed
2570  * in one shot.
2571  */
2572 void trace_buffered_event_enable(void)
2573 {
2574 	struct ring_buffer_event *event;
2575 	struct page *page;
2576 	int cpu;
2577 
2578 	WARN_ON_ONCE(!mutex_is_locked(&event_mutex));
2579 
2580 	if (trace_buffered_event_ref++)
2581 		return;
2582 
2583 	for_each_tracing_cpu(cpu) {
2584 		page = alloc_pages_node(cpu_to_node(cpu),
2585 					GFP_KERNEL | __GFP_NORETRY, 0);
2586 		/* This is just an optimization and can handle failures */
2587 		if (!page) {
2588 			pr_err("Failed to allocate event buffer\n");
2589 			break;
2590 		}
2591 
2592 		event = page_address(page);
2593 		memset(event, 0, sizeof(*event));
2594 
2595 		per_cpu(trace_buffered_event, cpu) = event;
2596 
2597 		preempt_disable();
2598 		if (cpu == smp_processor_id() &&
2599 		    __this_cpu_read(trace_buffered_event) !=
2600 		    per_cpu(trace_buffered_event, cpu))
2601 			WARN_ON_ONCE(1);
2602 		preempt_enable();
2603 	}
2604 }
2605 
2606 static void enable_trace_buffered_event(void *data)
2607 {
2608 	/* Probably not needed, but do it anyway */
2609 	smp_rmb();
2610 	this_cpu_dec(trace_buffered_event_cnt);
2611 }
2612 
2613 static void disable_trace_buffered_event(void *data)
2614 {
2615 	this_cpu_inc(trace_buffered_event_cnt);
2616 }
2617 
2618 /**
2619  * trace_buffered_event_disable - disable buffering events
2620  *
2621  * When a filter is removed, it is faster to not use the buffered
2622  * events, and to commit directly into the ring buffer. Free up
2623  * the temp buffers when there are no more users. This requires
2624  * special synchronization with current events.
2625  */
2626 void trace_buffered_event_disable(void)
2627 {
2628 	int cpu;
2629 
2630 	WARN_ON_ONCE(!mutex_is_locked(&event_mutex));
2631 
2632 	if (WARN_ON_ONCE(!trace_buffered_event_ref))
2633 		return;
2634 
2635 	if (--trace_buffered_event_ref)
2636 		return;
2637 
2638 	/* For each CPU, set the buffer as used. */
2639 	on_each_cpu_mask(tracing_buffer_mask, disable_trace_buffered_event,
2640 			 NULL, true);
2641 
2642 	/* Wait for all current users to finish */
2643 	synchronize_rcu();
2644 
2645 	for_each_tracing_cpu(cpu) {
2646 		free_page((unsigned long)per_cpu(trace_buffered_event, cpu));
2647 		per_cpu(trace_buffered_event, cpu) = NULL;
2648 	}
2649 
2650 	/*
2651 	 * Wait for all CPUs that potentially started checking if they can use
2652 	 * their event buffer only after the previous synchronize_rcu() call and
2653 	 * they still read a valid pointer from trace_buffered_event. It must be
2654 	 * ensured they don't see cleared trace_buffered_event_cnt else they
2655 	 * could wrongly decide to use the pointed-to buffer which is now freed.
2656 	 */
2657 	synchronize_rcu();
2658 
2659 	/* For each CPU, relinquish the buffer */
2660 	on_each_cpu_mask(tracing_buffer_mask, enable_trace_buffered_event, NULL,
2661 			 true);
2662 }
2663 
2664 static struct trace_buffer *temp_buffer;
2665 
2666 struct ring_buffer_event *
2667 trace_event_buffer_lock_reserve(struct trace_buffer **current_rb,
2668 			  struct trace_event_file *trace_file,
2669 			  int type, unsigned long len,
2670 			  unsigned int trace_ctx)
2671 {
2672 	struct ring_buffer_event *entry;
2673 	struct trace_array *tr = trace_file->tr;
2674 	int val;
2675 
2676 	*current_rb = tr->array_buffer.buffer;
2677 
2678 	if (!tr->no_filter_buffering_ref &&
2679 	    (trace_file->flags & (EVENT_FILE_FL_SOFT_DISABLED | EVENT_FILE_FL_FILTERED))) {
2680 		preempt_disable_notrace();
2681 		/*
2682 		 * Filtering is on, so try to use the per cpu buffer first.
2683 		 * This buffer will simulate a ring_buffer_event,
2684 		 * where the type_len is zero and the array[0] will
2685 		 * hold the full length.
2686 		 * (see include/linux/ring-buffer.h for details on
2687 		 *  how the ring_buffer_event is structured).
2688 		 *
2689 		 * Using a temp buffer during filtering and copying it
2690 		 * on a matched filter is quicker than writing directly
2691 		 * into the ring buffer and then discarding it when
2692 		 * it doesn't match. That is because the discard
2693 		 * requires several atomic operations to get right.
2694 		 * Copying on match and doing nothing on a failed match
2695 		 * is still quicker than no copy on match, but having
2696 		 * to discard out of the ring buffer on a failed match.
2697 		 */
2698 		if ((entry = __this_cpu_read(trace_buffered_event))) {
2699 			int max_len = PAGE_SIZE - struct_size(entry, array, 1);
2700 
2701 			val = this_cpu_inc_return(trace_buffered_event_cnt);
2702 
2703 			/*
2704 			 * Preemption is disabled, but interrupts and NMIs
2705 			 * can still come in now. If that happens after
2706 			 * the above increment, then it will have to go
2707 			 * back to the old method of allocating the event
2708 			 * on the ring buffer, and if the filter fails, it
2709 			 * will have to call ring_buffer_discard_commit()
2710 			 * to remove it.
2711 			 *
2712 			 * Need to also check the unlikely case that the
2713 			 * length is bigger than the temp buffer size.
2714 			 * If that happens, then the reserve is pretty much
2715 			 * guaranteed to fail, as the ring buffer currently
2716 			 * only allows events less than a page. But that may
2717 			 * change in the future, so let the ring buffer reserve
2718 			 * handle the failure in that case.
2719 			 */
2720 			if (val == 1 && likely(len <= max_len)) {
2721 				trace_event_setup(entry, type, trace_ctx);
2722 				entry->array[0] = len;
2723 				/* Return with preemption disabled */
2724 				return entry;
2725 			}
2726 			this_cpu_dec(trace_buffered_event_cnt);
2727 		}
2728 		/* __trace_buffer_lock_reserve() disables preemption */
2729 		preempt_enable_notrace();
2730 	}
2731 
2732 	entry = __trace_buffer_lock_reserve(*current_rb, type, len,
2733 					    trace_ctx);
2734 	/*
2735 	 * If tracing is off, but we have triggers enabled
2736 	 * we still need to look at the event data. Use the temp_buffer
2737 	 * to store the trace event for the trigger to use. It's recursive
2738 	 * safe and will not be recorded anywhere.
2739 	 */
2740 	if (!entry && trace_file->flags & EVENT_FILE_FL_TRIGGER_COND) {
2741 		*current_rb = temp_buffer;
2742 		entry = __trace_buffer_lock_reserve(*current_rb, type, len,
2743 						    trace_ctx);
2744 	}
2745 	return entry;
2746 }
2747 EXPORT_SYMBOL_GPL(trace_event_buffer_lock_reserve);
2748 
2749 static DEFINE_RAW_SPINLOCK(tracepoint_iter_lock);
2750 static DEFINE_MUTEX(tracepoint_printk_mutex);
2751 
2752 static void output_printk(struct trace_event_buffer *fbuffer)
2753 {
2754 	struct trace_event_call *event_call;
2755 	struct trace_event_file *file;
2756 	struct trace_event *event;
2757 	unsigned long flags;
2758 	struct trace_iterator *iter = tracepoint_print_iter;
2759 
2760 	/* We should never get here if iter is NULL */
2761 	if (WARN_ON_ONCE(!iter))
2762 		return;
2763 
2764 	event_call = fbuffer->trace_file->event_call;
2765 	if (!event_call || !event_call->event.funcs ||
2766 	    !event_call->event.funcs->trace)
2767 		return;
2768 
2769 	file = fbuffer->trace_file;
2770 	if (test_bit(EVENT_FILE_FL_SOFT_DISABLED_BIT, &file->flags) ||
2771 	    (unlikely(file->flags & EVENT_FILE_FL_FILTERED) &&
2772 	     !filter_match_preds(file->filter, fbuffer->entry)))
2773 		return;
2774 
2775 	event = &fbuffer->trace_file->event_call->event;
2776 
2777 	raw_spin_lock_irqsave(&tracepoint_iter_lock, flags);
2778 	trace_seq_init(&iter->seq);
2779 	iter->ent = fbuffer->entry;
2780 	event_call->event.funcs->trace(iter, 0, event);
2781 	trace_seq_putc(&iter->seq, 0);
2782 	printk("%s", iter->seq.buffer);
2783 
2784 	raw_spin_unlock_irqrestore(&tracepoint_iter_lock, flags);
2785 }
2786 
2787 int tracepoint_printk_sysctl(const struct ctl_table *table, int write,
2788 			     void *buffer, size_t *lenp,
2789 			     loff_t *ppos)
2790 {
2791 	int save_tracepoint_printk;
2792 	int ret;
2793 
2794 	guard(mutex)(&tracepoint_printk_mutex);
2795 	save_tracepoint_printk = tracepoint_printk;
2796 
2797 	ret = proc_dointvec(table, write, buffer, lenp, ppos);
2798 
2799 	/*
2800 	 * This will force exiting early, as tracepoint_printk
2801 	 * is always zero when tracepoint_printk_iter is not allocated
2802 	 */
2803 	if (!tracepoint_print_iter)
2804 		tracepoint_printk = 0;
2805 
2806 	if (save_tracepoint_printk == tracepoint_printk)
2807 		return ret;
2808 
2809 	if (tracepoint_printk)
2810 		static_key_enable(&tracepoint_printk_key.key);
2811 	else
2812 		static_key_disable(&tracepoint_printk_key.key);
2813 
2814 	return ret;
2815 }
2816 
2817 void trace_event_buffer_commit(struct trace_event_buffer *fbuffer)
2818 {
2819 	enum event_trigger_type tt = ETT_NONE;
2820 	struct trace_event_file *file = fbuffer->trace_file;
2821 
2822 	if (__event_trigger_test_discard(file, fbuffer->buffer, fbuffer->event,
2823 			fbuffer->entry, &tt))
2824 		goto discard;
2825 
2826 	if (static_key_false(&tracepoint_printk_key.key))
2827 		output_printk(fbuffer);
2828 
2829 	if (static_branch_unlikely(&trace_event_exports_enabled))
2830 		ftrace_exports(fbuffer->event, TRACE_EXPORT_EVENT);
2831 
2832 	trace_buffer_unlock_commit_regs(file->tr, fbuffer->buffer,
2833 			fbuffer->event, fbuffer->trace_ctx, fbuffer->regs);
2834 
2835 discard:
2836 	if (tt)
2837 		event_triggers_post_call(file, tt);
2838 
2839 }
2840 EXPORT_SYMBOL_GPL(trace_event_buffer_commit);
2841 
2842 /*
2843  * Skip 3:
2844  *
2845  *   trace_buffer_unlock_commit_regs()
2846  *   trace_event_buffer_commit()
2847  *   trace_event_raw_event_xxx()
2848  */
2849 # define STACK_SKIP 3
2850 
2851 void trace_buffer_unlock_commit_regs(struct trace_array *tr,
2852 				     struct trace_buffer *buffer,
2853 				     struct ring_buffer_event *event,
2854 				     unsigned int trace_ctx,
2855 				     struct pt_regs *regs)
2856 {
2857 	__buffer_unlock_commit(buffer, event);
2858 
2859 	/*
2860 	 * If regs is not set, then skip the necessary functions.
2861 	 * Note, we can still get here via blktrace, wakeup tracer
2862 	 * and mmiotrace, but that's ok if they lose a function or
2863 	 * two. They are not that meaningful.
2864 	 */
2865 	ftrace_trace_stack(tr, buffer, trace_ctx, regs ? 0 : STACK_SKIP, regs);
2866 	ftrace_trace_userstack(tr, buffer, trace_ctx);
2867 }
2868 
2869 /*
2870  * Similar to trace_buffer_unlock_commit_regs() but do not dump stack.
2871  */
2872 void
2873 trace_buffer_unlock_commit_nostack(struct trace_buffer *buffer,
2874 				   struct ring_buffer_event *event)
2875 {
2876 	__buffer_unlock_commit(buffer, event);
2877 }
2878 
2879 void
2880 trace_function(struct trace_array *tr, unsigned long ip, unsigned long
2881 	       parent_ip, unsigned int trace_ctx)
2882 {
2883 	struct trace_buffer *buffer = tr->array_buffer.buffer;
2884 	struct ring_buffer_event *event;
2885 	struct ftrace_entry *entry;
2886 
2887 	event = __trace_buffer_lock_reserve(buffer, TRACE_FN, sizeof(*entry),
2888 					    trace_ctx);
2889 	if (!event)
2890 		return;
2891 	entry	= ring_buffer_event_data(event);
2892 	entry->ip			= ip;
2893 	entry->parent_ip		= parent_ip;
2894 
2895 	if (static_branch_unlikely(&trace_function_exports_enabled))
2896 		ftrace_exports(event, TRACE_EXPORT_FUNCTION);
2897 	__buffer_unlock_commit(buffer, event);
2898 }
2899 
2900 #ifdef CONFIG_STACKTRACE
2901 
2902 /* Allow 4 levels of nesting: normal, softirq, irq, NMI */
2903 #define FTRACE_KSTACK_NESTING	4
2904 
2905 #define FTRACE_KSTACK_ENTRIES	(SZ_4K / FTRACE_KSTACK_NESTING)
2906 
2907 struct ftrace_stack {
2908 	unsigned long		calls[FTRACE_KSTACK_ENTRIES];
2909 };
2910 
2911 
2912 struct ftrace_stacks {
2913 	struct ftrace_stack	stacks[FTRACE_KSTACK_NESTING];
2914 };
2915 
2916 static DEFINE_PER_CPU(struct ftrace_stacks, ftrace_stacks);
2917 static DEFINE_PER_CPU(int, ftrace_stack_reserve);
2918 
2919 static void __ftrace_trace_stack(struct trace_array *tr,
2920 				 struct trace_buffer *buffer,
2921 				 unsigned int trace_ctx,
2922 				 int skip, struct pt_regs *regs)
2923 {
2924 	struct ring_buffer_event *event;
2925 	unsigned int size, nr_entries;
2926 	struct ftrace_stack *fstack;
2927 	struct stack_entry *entry;
2928 	int stackidx;
2929 
2930 	/*
2931 	 * Add one, for this function and the call to save_stack_trace()
2932 	 * If regs is set, then these functions will not be in the way.
2933 	 */
2934 #ifndef CONFIG_UNWINDER_ORC
2935 	if (!regs)
2936 		skip++;
2937 #endif
2938 
2939 	preempt_disable_notrace();
2940 
2941 	stackidx = __this_cpu_inc_return(ftrace_stack_reserve) - 1;
2942 
2943 	/* This should never happen. If it does, yell once and skip */
2944 	if (WARN_ON_ONCE(stackidx >= FTRACE_KSTACK_NESTING))
2945 		goto out;
2946 
2947 	/*
2948 	 * The above __this_cpu_inc_return() is 'atomic' cpu local. An
2949 	 * interrupt will either see the value pre increment or post
2950 	 * increment. If the interrupt happens pre increment it will have
2951 	 * restored the counter when it returns.  We just need a barrier to
2952 	 * keep gcc from moving things around.
2953 	 */
2954 	barrier();
2955 
2956 	fstack = this_cpu_ptr(ftrace_stacks.stacks) + stackidx;
2957 	size = ARRAY_SIZE(fstack->calls);
2958 
2959 	if (regs) {
2960 		nr_entries = stack_trace_save_regs(regs, fstack->calls,
2961 						   size, skip);
2962 	} else {
2963 		nr_entries = stack_trace_save(fstack->calls, size, skip);
2964 	}
2965 
2966 #ifdef CONFIG_DYNAMIC_FTRACE
2967 	/* Mark entry of stack trace as trampoline code */
2968 	if (tr->ops && tr->ops->trampoline) {
2969 		unsigned long tramp_start = tr->ops->trampoline;
2970 		unsigned long tramp_end = tramp_start + tr->ops->trampoline_size;
2971 		unsigned long *calls = fstack->calls;
2972 
2973 		for (int i = 0; i < nr_entries; i++) {
2974 			if (calls[i] >= tramp_start && calls[i] < tramp_end)
2975 				calls[i] = FTRACE_TRAMPOLINE_MARKER;
2976 		}
2977 	}
2978 #endif
2979 
2980 	event = __trace_buffer_lock_reserve(buffer, TRACE_STACK,
2981 				    struct_size(entry, caller, nr_entries),
2982 				    trace_ctx);
2983 	if (!event)
2984 		goto out;
2985 	entry = ring_buffer_event_data(event);
2986 
2987 	entry->size = nr_entries;
2988 	memcpy(&entry->caller, fstack->calls,
2989 	       flex_array_size(entry, caller, nr_entries));
2990 
2991 	__buffer_unlock_commit(buffer, event);
2992 
2993  out:
2994 	/* Again, don't let gcc optimize things here */
2995 	barrier();
2996 	__this_cpu_dec(ftrace_stack_reserve);
2997 	preempt_enable_notrace();
2998 
2999 }
3000 
3001 static inline void ftrace_trace_stack(struct trace_array *tr,
3002 				      struct trace_buffer *buffer,
3003 				      unsigned int trace_ctx,
3004 				      int skip, struct pt_regs *regs)
3005 {
3006 	if (!(tr->trace_flags & TRACE_ITER_STACKTRACE))
3007 		return;
3008 
3009 	__ftrace_trace_stack(tr, buffer, trace_ctx, skip, regs);
3010 }
3011 
3012 void __trace_stack(struct trace_array *tr, unsigned int trace_ctx,
3013 		   int skip)
3014 {
3015 	struct trace_buffer *buffer = tr->array_buffer.buffer;
3016 
3017 	if (rcu_is_watching()) {
3018 		__ftrace_trace_stack(tr, buffer, trace_ctx, skip, NULL);
3019 		return;
3020 	}
3021 
3022 	if (WARN_ON_ONCE(IS_ENABLED(CONFIG_GENERIC_ENTRY)))
3023 		return;
3024 
3025 	/*
3026 	 * When an NMI triggers, RCU is enabled via ct_nmi_enter(),
3027 	 * but if the above rcu_is_watching() failed, then the NMI
3028 	 * triggered someplace critical, and ct_irq_enter() should
3029 	 * not be called from NMI.
3030 	 */
3031 	if (unlikely(in_nmi()))
3032 		return;
3033 
3034 	ct_irq_enter_irqson();
3035 	__ftrace_trace_stack(tr, buffer, trace_ctx, skip, NULL);
3036 	ct_irq_exit_irqson();
3037 }
3038 
3039 /**
3040  * trace_dump_stack - record a stack back trace in the trace buffer
3041  * @skip: Number of functions to skip (helper handlers)
3042  */
3043 void trace_dump_stack(int skip)
3044 {
3045 	if (tracing_disabled || tracing_selftest_running)
3046 		return;
3047 
3048 #ifndef CONFIG_UNWINDER_ORC
3049 	/* Skip 1 to skip this function. */
3050 	skip++;
3051 #endif
3052 	__ftrace_trace_stack(printk_trace, printk_trace->array_buffer.buffer,
3053 				tracing_gen_ctx(), skip, NULL);
3054 }
3055 EXPORT_SYMBOL_GPL(trace_dump_stack);
3056 
3057 #ifdef CONFIG_USER_STACKTRACE_SUPPORT
3058 static DEFINE_PER_CPU(int, user_stack_count);
3059 
3060 static void
3061 ftrace_trace_userstack(struct trace_array *tr,
3062 		       struct trace_buffer *buffer, unsigned int trace_ctx)
3063 {
3064 	struct ring_buffer_event *event;
3065 	struct userstack_entry *entry;
3066 
3067 	if (!(tr->trace_flags & TRACE_ITER_USERSTACKTRACE))
3068 		return;
3069 
3070 	/*
3071 	 * NMIs can not handle page faults, even with fix ups.
3072 	 * The save user stack can (and often does) fault.
3073 	 */
3074 	if (unlikely(in_nmi()))
3075 		return;
3076 
3077 	/*
3078 	 * prevent recursion, since the user stack tracing may
3079 	 * trigger other kernel events.
3080 	 */
3081 	preempt_disable();
3082 	if (__this_cpu_read(user_stack_count))
3083 		goto out;
3084 
3085 	__this_cpu_inc(user_stack_count);
3086 
3087 	event = __trace_buffer_lock_reserve(buffer, TRACE_USER_STACK,
3088 					    sizeof(*entry), trace_ctx);
3089 	if (!event)
3090 		goto out_drop_count;
3091 	entry	= ring_buffer_event_data(event);
3092 
3093 	entry->tgid		= current->tgid;
3094 	memset(&entry->caller, 0, sizeof(entry->caller));
3095 
3096 	stack_trace_save_user(entry->caller, FTRACE_STACK_ENTRIES);
3097 	__buffer_unlock_commit(buffer, event);
3098 
3099  out_drop_count:
3100 	__this_cpu_dec(user_stack_count);
3101  out:
3102 	preempt_enable();
3103 }
3104 #else /* CONFIG_USER_STACKTRACE_SUPPORT */
3105 static void ftrace_trace_userstack(struct trace_array *tr,
3106 				   struct trace_buffer *buffer,
3107 				   unsigned int trace_ctx)
3108 {
3109 }
3110 #endif /* !CONFIG_USER_STACKTRACE_SUPPORT */
3111 
3112 #endif /* CONFIG_STACKTRACE */
3113 
3114 static inline void
3115 func_repeats_set_delta_ts(struct func_repeats_entry *entry,
3116 			  unsigned long long delta)
3117 {
3118 	entry->bottom_delta_ts = delta & U32_MAX;
3119 	entry->top_delta_ts = (delta >> 32);
3120 }
3121 
3122 void trace_last_func_repeats(struct trace_array *tr,
3123 			     struct trace_func_repeats *last_info,
3124 			     unsigned int trace_ctx)
3125 {
3126 	struct trace_buffer *buffer = tr->array_buffer.buffer;
3127 	struct func_repeats_entry *entry;
3128 	struct ring_buffer_event *event;
3129 	u64 delta;
3130 
3131 	event = __trace_buffer_lock_reserve(buffer, TRACE_FUNC_REPEATS,
3132 					    sizeof(*entry), trace_ctx);
3133 	if (!event)
3134 		return;
3135 
3136 	delta = ring_buffer_event_time_stamp(buffer, event) -
3137 		last_info->ts_last_call;
3138 
3139 	entry = ring_buffer_event_data(event);
3140 	entry->ip = last_info->ip;
3141 	entry->parent_ip = last_info->parent_ip;
3142 	entry->count = last_info->count;
3143 	func_repeats_set_delta_ts(entry, delta);
3144 
3145 	__buffer_unlock_commit(buffer, event);
3146 }
3147 
3148 /* created for use with alloc_percpu */
3149 struct trace_buffer_struct {
3150 	int nesting;
3151 	char buffer[4][TRACE_BUF_SIZE];
3152 };
3153 
3154 static struct trace_buffer_struct __percpu *trace_percpu_buffer;
3155 
3156 /*
3157  * This allows for lockless recording.  If we're nested too deeply, then
3158  * this returns NULL.
3159  */
3160 static char *get_trace_buf(void)
3161 {
3162 	struct trace_buffer_struct *buffer = this_cpu_ptr(trace_percpu_buffer);
3163 
3164 	if (!trace_percpu_buffer || buffer->nesting >= 4)
3165 		return NULL;
3166 
3167 	buffer->nesting++;
3168 
3169 	/* Interrupts must see nesting incremented before we use the buffer */
3170 	barrier();
3171 	return &buffer->buffer[buffer->nesting - 1][0];
3172 }
3173 
3174 static void put_trace_buf(void)
3175 {
3176 	/* Don't let the decrement of nesting leak before this */
3177 	barrier();
3178 	this_cpu_dec(trace_percpu_buffer->nesting);
3179 }
3180 
3181 static int alloc_percpu_trace_buffer(void)
3182 {
3183 	struct trace_buffer_struct __percpu *buffers;
3184 
3185 	if (trace_percpu_buffer)
3186 		return 0;
3187 
3188 	buffers = alloc_percpu(struct trace_buffer_struct);
3189 	if (MEM_FAIL(!buffers, "Could not allocate percpu trace_printk buffer"))
3190 		return -ENOMEM;
3191 
3192 	trace_percpu_buffer = buffers;
3193 	return 0;
3194 }
3195 
3196 static int buffers_allocated;
3197 
3198 void trace_printk_init_buffers(void)
3199 {
3200 	if (buffers_allocated)
3201 		return;
3202 
3203 	if (alloc_percpu_trace_buffer())
3204 		return;
3205 
3206 	/* trace_printk() is for debug use only. Don't use it in production. */
3207 
3208 	pr_warn("\n");
3209 	pr_warn("**********************************************************\n");
3210 	pr_warn("**   NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE   **\n");
3211 	pr_warn("**                                                      **\n");
3212 	pr_warn("** trace_printk() being used. Allocating extra memory.  **\n");
3213 	pr_warn("**                                                      **\n");
3214 	pr_warn("** This means that this is a DEBUG kernel and it is     **\n");
3215 	pr_warn("** unsafe for production use.                           **\n");
3216 	pr_warn("**                                                      **\n");
3217 	pr_warn("** If you see this message and you are not debugging    **\n");
3218 	pr_warn("** the kernel, report this immediately to your vendor!  **\n");
3219 	pr_warn("**                                                      **\n");
3220 	pr_warn("**   NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE   **\n");
3221 	pr_warn("**********************************************************\n");
3222 
3223 	/* Expand the buffers to set size */
3224 	tracing_update_buffers(&global_trace);
3225 
3226 	buffers_allocated = 1;
3227 
3228 	/*
3229 	 * trace_printk_init_buffers() can be called by modules.
3230 	 * If that happens, then we need to start cmdline recording
3231 	 * directly here. If the global_trace.buffer is already
3232 	 * allocated here, then this was called by module code.
3233 	 */
3234 	if (global_trace.array_buffer.buffer)
3235 		tracing_start_cmdline_record();
3236 }
3237 EXPORT_SYMBOL_GPL(trace_printk_init_buffers);
3238 
3239 void trace_printk_start_comm(void)
3240 {
3241 	/* Start tracing comms if trace printk is set */
3242 	if (!buffers_allocated)
3243 		return;
3244 	tracing_start_cmdline_record();
3245 }
3246 
3247 static void trace_printk_start_stop_comm(int enabled)
3248 {
3249 	if (!buffers_allocated)
3250 		return;
3251 
3252 	if (enabled)
3253 		tracing_start_cmdline_record();
3254 	else
3255 		tracing_stop_cmdline_record();
3256 }
3257 
3258 /**
3259  * trace_vbprintk - write binary msg to tracing buffer
3260  * @ip:    The address of the caller
3261  * @fmt:   The string format to write to the buffer
3262  * @args:  Arguments for @fmt
3263  */
3264 int trace_vbprintk(unsigned long ip, const char *fmt, va_list args)
3265 {
3266 	struct ring_buffer_event *event;
3267 	struct trace_buffer *buffer;
3268 	struct trace_array *tr = READ_ONCE(printk_trace);
3269 	struct bprint_entry *entry;
3270 	unsigned int trace_ctx;
3271 	char *tbuffer;
3272 	int len = 0, size;
3273 
3274 	if (!printk_binsafe(tr))
3275 		return trace_vprintk(ip, fmt, args);
3276 
3277 	if (unlikely(tracing_selftest_running || tracing_disabled))
3278 		return 0;
3279 
3280 	/* Don't pollute graph traces with trace_vprintk internals */
3281 	pause_graph_tracing();
3282 
3283 	trace_ctx = tracing_gen_ctx();
3284 	preempt_disable_notrace();
3285 
3286 	tbuffer = get_trace_buf();
3287 	if (!tbuffer) {
3288 		len = 0;
3289 		goto out_nobuffer;
3290 	}
3291 
3292 	len = vbin_printf((u32 *)tbuffer, TRACE_BUF_SIZE/sizeof(int), fmt, args);
3293 
3294 	if (len > TRACE_BUF_SIZE/sizeof(int) || len < 0)
3295 		goto out_put;
3296 
3297 	size = sizeof(*entry) + sizeof(u32) * len;
3298 	buffer = tr->array_buffer.buffer;
3299 	ring_buffer_nest_start(buffer);
3300 	event = __trace_buffer_lock_reserve(buffer, TRACE_BPRINT, size,
3301 					    trace_ctx);
3302 	if (!event)
3303 		goto out;
3304 	entry = ring_buffer_event_data(event);
3305 	entry->ip			= ip;
3306 	entry->fmt			= fmt;
3307 
3308 	memcpy(entry->buf, tbuffer, sizeof(u32) * len);
3309 	__buffer_unlock_commit(buffer, event);
3310 	ftrace_trace_stack(tr, buffer, trace_ctx, 6, NULL);
3311 
3312 out:
3313 	ring_buffer_nest_end(buffer);
3314 out_put:
3315 	put_trace_buf();
3316 
3317 out_nobuffer:
3318 	preempt_enable_notrace();
3319 	unpause_graph_tracing();
3320 
3321 	return len;
3322 }
3323 EXPORT_SYMBOL_GPL(trace_vbprintk);
3324 
3325 __printf(3, 0)
3326 static int
3327 __trace_array_vprintk(struct trace_buffer *buffer,
3328 		      unsigned long ip, const char *fmt, va_list args)
3329 {
3330 	struct ring_buffer_event *event;
3331 	int len = 0, size;
3332 	struct print_entry *entry;
3333 	unsigned int trace_ctx;
3334 	char *tbuffer;
3335 
3336 	if (tracing_disabled)
3337 		return 0;
3338 
3339 	/* Don't pollute graph traces with trace_vprintk internals */
3340 	pause_graph_tracing();
3341 
3342 	trace_ctx = tracing_gen_ctx();
3343 	preempt_disable_notrace();
3344 
3345 
3346 	tbuffer = get_trace_buf();
3347 	if (!tbuffer) {
3348 		len = 0;
3349 		goto out_nobuffer;
3350 	}
3351 
3352 	len = vscnprintf(tbuffer, TRACE_BUF_SIZE, fmt, args);
3353 
3354 	size = sizeof(*entry) + len + 1;
3355 	ring_buffer_nest_start(buffer);
3356 	event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
3357 					    trace_ctx);
3358 	if (!event)
3359 		goto out;
3360 	entry = ring_buffer_event_data(event);
3361 	entry->ip = ip;
3362 
3363 	memcpy(&entry->buf, tbuffer, len + 1);
3364 	__buffer_unlock_commit(buffer, event);
3365 	ftrace_trace_stack(printk_trace, buffer, trace_ctx, 6, NULL);
3366 
3367 out:
3368 	ring_buffer_nest_end(buffer);
3369 	put_trace_buf();
3370 
3371 out_nobuffer:
3372 	preempt_enable_notrace();
3373 	unpause_graph_tracing();
3374 
3375 	return len;
3376 }
3377 
3378 __printf(3, 0)
3379 int trace_array_vprintk(struct trace_array *tr,
3380 			unsigned long ip, const char *fmt, va_list args)
3381 {
3382 	if (tracing_selftest_running && tr == &global_trace)
3383 		return 0;
3384 
3385 	return __trace_array_vprintk(tr->array_buffer.buffer, ip, fmt, args);
3386 }
3387 
3388 /**
3389  * trace_array_printk - Print a message to a specific instance
3390  * @tr: The instance trace_array descriptor
3391  * @ip: The instruction pointer that this is called from.
3392  * @fmt: The format to print (printf format)
3393  *
3394  * If a subsystem sets up its own instance, they have the right to
3395  * printk strings into their tracing instance buffer using this
3396  * function. Note, this function will not write into the top level
3397  * buffer (use trace_printk() for that), as writing into the top level
3398  * buffer should only have events that can be individually disabled.
3399  * trace_printk() is only used for debugging a kernel, and should not
3400  * be ever incorporated in normal use.
3401  *
3402  * trace_array_printk() can be used, as it will not add noise to the
3403  * top level tracing buffer.
3404  *
3405  * Note, trace_array_init_printk() must be called on @tr before this
3406  * can be used.
3407  */
3408 __printf(3, 0)
3409 int trace_array_printk(struct trace_array *tr,
3410 		       unsigned long ip, const char *fmt, ...)
3411 {
3412 	int ret;
3413 	va_list ap;
3414 
3415 	if (!tr)
3416 		return -ENOENT;
3417 
3418 	/* This is only allowed for created instances */
3419 	if (tr == &global_trace)
3420 		return 0;
3421 
3422 	if (!(tr->trace_flags & TRACE_ITER_PRINTK))
3423 		return 0;
3424 
3425 	va_start(ap, fmt);
3426 	ret = trace_array_vprintk(tr, ip, fmt, ap);
3427 	va_end(ap);
3428 	return ret;
3429 }
3430 EXPORT_SYMBOL_GPL(trace_array_printk);
3431 
3432 /**
3433  * trace_array_init_printk - Initialize buffers for trace_array_printk()
3434  * @tr: The trace array to initialize the buffers for
3435  *
3436  * As trace_array_printk() only writes into instances, they are OK to
3437  * have in the kernel (unlike trace_printk()). This needs to be called
3438  * before trace_array_printk() can be used on a trace_array.
3439  */
3440 int trace_array_init_printk(struct trace_array *tr)
3441 {
3442 	if (!tr)
3443 		return -ENOENT;
3444 
3445 	/* This is only allowed for created instances */
3446 	if (tr == &global_trace)
3447 		return -EINVAL;
3448 
3449 	return alloc_percpu_trace_buffer();
3450 }
3451 EXPORT_SYMBOL_GPL(trace_array_init_printk);
3452 
3453 __printf(3, 4)
3454 int trace_array_printk_buf(struct trace_buffer *buffer,
3455 			   unsigned long ip, const char *fmt, ...)
3456 {
3457 	int ret;
3458 	va_list ap;
3459 
3460 	if (!(printk_trace->trace_flags & TRACE_ITER_PRINTK))
3461 		return 0;
3462 
3463 	va_start(ap, fmt);
3464 	ret = __trace_array_vprintk(buffer, ip, fmt, ap);
3465 	va_end(ap);
3466 	return ret;
3467 }
3468 
3469 __printf(2, 0)
3470 int trace_vprintk(unsigned long ip, const char *fmt, va_list args)
3471 {
3472 	return trace_array_vprintk(printk_trace, ip, fmt, args);
3473 }
3474 EXPORT_SYMBOL_GPL(trace_vprintk);
3475 
3476 static void trace_iterator_increment(struct trace_iterator *iter)
3477 {
3478 	struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, iter->cpu);
3479 
3480 	iter->idx++;
3481 	if (buf_iter)
3482 		ring_buffer_iter_advance(buf_iter);
3483 }
3484 
3485 static struct trace_entry *
3486 peek_next_entry(struct trace_iterator *iter, int cpu, u64 *ts,
3487 		unsigned long *lost_events)
3488 {
3489 	struct ring_buffer_event *event;
3490 	struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, cpu);
3491 
3492 	if (buf_iter) {
3493 		event = ring_buffer_iter_peek(buf_iter, ts);
3494 		if (lost_events)
3495 			*lost_events = ring_buffer_iter_dropped(buf_iter) ?
3496 				(unsigned long)-1 : 0;
3497 	} else {
3498 		event = ring_buffer_peek(iter->array_buffer->buffer, cpu, ts,
3499 					 lost_events);
3500 	}
3501 
3502 	if (event) {
3503 		iter->ent_size = ring_buffer_event_length(event);
3504 		return ring_buffer_event_data(event);
3505 	}
3506 	iter->ent_size = 0;
3507 	return NULL;
3508 }
3509 
3510 static struct trace_entry *
3511 __find_next_entry(struct trace_iterator *iter, int *ent_cpu,
3512 		  unsigned long *missing_events, u64 *ent_ts)
3513 {
3514 	struct trace_buffer *buffer = iter->array_buffer->buffer;
3515 	struct trace_entry *ent, *next = NULL;
3516 	unsigned long lost_events = 0, next_lost = 0;
3517 	int cpu_file = iter->cpu_file;
3518 	u64 next_ts = 0, ts;
3519 	int next_cpu = -1;
3520 	int next_size = 0;
3521 	int cpu;
3522 
3523 	/*
3524 	 * If we are in a per_cpu trace file, don't bother by iterating over
3525 	 * all cpu and peek directly.
3526 	 */
3527 	if (cpu_file > RING_BUFFER_ALL_CPUS) {
3528 		if (ring_buffer_empty_cpu(buffer, cpu_file))
3529 			return NULL;
3530 		ent = peek_next_entry(iter, cpu_file, ent_ts, missing_events);
3531 		if (ent_cpu)
3532 			*ent_cpu = cpu_file;
3533 
3534 		return ent;
3535 	}
3536 
3537 	for_each_tracing_cpu(cpu) {
3538 
3539 		if (ring_buffer_empty_cpu(buffer, cpu))
3540 			continue;
3541 
3542 		ent = peek_next_entry(iter, cpu, &ts, &lost_events);
3543 
3544 		/*
3545 		 * Pick the entry with the smallest timestamp:
3546 		 */
3547 		if (ent && (!next || ts < next_ts)) {
3548 			next = ent;
3549 			next_cpu = cpu;
3550 			next_ts = ts;
3551 			next_lost = lost_events;
3552 			next_size = iter->ent_size;
3553 		}
3554 	}
3555 
3556 	iter->ent_size = next_size;
3557 
3558 	if (ent_cpu)
3559 		*ent_cpu = next_cpu;
3560 
3561 	if (ent_ts)
3562 		*ent_ts = next_ts;
3563 
3564 	if (missing_events)
3565 		*missing_events = next_lost;
3566 
3567 	return next;
3568 }
3569 
3570 #define STATIC_FMT_BUF_SIZE	128
3571 static char static_fmt_buf[STATIC_FMT_BUF_SIZE];
3572 
3573 char *trace_iter_expand_format(struct trace_iterator *iter)
3574 {
3575 	char *tmp;
3576 
3577 	/*
3578 	 * iter->tr is NULL when used with tp_printk, which makes
3579 	 * this get called where it is not safe to call krealloc().
3580 	 */
3581 	if (!iter->tr || iter->fmt == static_fmt_buf)
3582 		return NULL;
3583 
3584 	tmp = krealloc(iter->fmt, iter->fmt_size + STATIC_FMT_BUF_SIZE,
3585 		       GFP_KERNEL);
3586 	if (tmp) {
3587 		iter->fmt_size += STATIC_FMT_BUF_SIZE;
3588 		iter->fmt = tmp;
3589 	}
3590 
3591 	return tmp;
3592 }
3593 
3594 /* Returns true if the string is safe to dereference from an event */
3595 static bool trace_safe_str(struct trace_iterator *iter, const char *str)
3596 {
3597 	unsigned long addr = (unsigned long)str;
3598 	struct trace_event *trace_event;
3599 	struct trace_event_call *event;
3600 
3601 	/* OK if part of the event data */
3602 	if ((addr >= (unsigned long)iter->ent) &&
3603 	    (addr < (unsigned long)iter->ent + iter->ent_size))
3604 		return true;
3605 
3606 	/* OK if part of the temp seq buffer */
3607 	if ((addr >= (unsigned long)iter->tmp_seq.buffer) &&
3608 	    (addr < (unsigned long)iter->tmp_seq.buffer + TRACE_SEQ_BUFFER_SIZE))
3609 		return true;
3610 
3611 	/* Core rodata can not be freed */
3612 	if (is_kernel_rodata(addr))
3613 		return true;
3614 
3615 	if (trace_is_tracepoint_string(str))
3616 		return true;
3617 
3618 	/*
3619 	 * Now this could be a module event, referencing core module
3620 	 * data, which is OK.
3621 	 */
3622 	if (!iter->ent)
3623 		return false;
3624 
3625 	trace_event = ftrace_find_event(iter->ent->type);
3626 	if (!trace_event)
3627 		return false;
3628 
3629 	event = container_of(trace_event, struct trace_event_call, event);
3630 	if ((event->flags & TRACE_EVENT_FL_DYNAMIC) || !event->module)
3631 		return false;
3632 
3633 	/* Would rather have rodata, but this will suffice */
3634 	if (within_module_core(addr, event->module))
3635 		return true;
3636 
3637 	return false;
3638 }
3639 
3640 /**
3641  * ignore_event - Check dereferenced fields while writing to the seq buffer
3642  * @iter: The iterator that holds the seq buffer and the event being printed
3643  *
3644  * At boot up, test_event_printk() will flag any event that dereferences
3645  * a string with "%s" that does exist in the ring buffer. It may still
3646  * be valid, as the string may point to a static string in the kernel
3647  * rodata that never gets freed. But if the string pointer is pointing
3648  * to something that was allocated, there's a chance that it can be freed
3649  * by the time the user reads the trace. This would cause a bad memory
3650  * access by the kernel and possibly crash the system.
3651  *
3652  * This function will check if the event has any fields flagged as needing
3653  * to be checked at runtime and perform those checks.
3654  *
3655  * If it is found that a field is unsafe, it will write into the @iter->seq
3656  * a message stating what was found to be unsafe.
3657  *
3658  * @return: true if the event is unsafe and should be ignored,
3659  *          false otherwise.
3660  */
3661 bool ignore_event(struct trace_iterator *iter)
3662 {
3663 	struct ftrace_event_field *field;
3664 	struct trace_event *trace_event;
3665 	struct trace_event_call *event;
3666 	struct list_head *head;
3667 	struct trace_seq *seq;
3668 	const void *ptr;
3669 
3670 	trace_event = ftrace_find_event(iter->ent->type);
3671 
3672 	seq = &iter->seq;
3673 
3674 	if (!trace_event) {
3675 		trace_seq_printf(seq, "EVENT ID %d NOT FOUND?\n", iter->ent->type);
3676 		return true;
3677 	}
3678 
3679 	event = container_of(trace_event, struct trace_event_call, event);
3680 	if (!(event->flags & TRACE_EVENT_FL_TEST_STR))
3681 		return false;
3682 
3683 	head = trace_get_fields(event);
3684 	if (!head) {
3685 		trace_seq_printf(seq, "FIELDS FOR EVENT '%s' NOT FOUND?\n",
3686 				 trace_event_name(event));
3687 		return true;
3688 	}
3689 
3690 	/* Offsets are from the iter->ent that points to the raw event */
3691 	ptr = iter->ent;
3692 
3693 	list_for_each_entry(field, head, link) {
3694 		const char *str;
3695 		bool good;
3696 
3697 		if (!field->needs_test)
3698 			continue;
3699 
3700 		str = *(const char **)(ptr + field->offset);
3701 
3702 		good = trace_safe_str(iter, str);
3703 
3704 		/*
3705 		 * If you hit this warning, it is likely that the
3706 		 * trace event in question used %s on a string that
3707 		 * was saved at the time of the event, but may not be
3708 		 * around when the trace is read. Use __string(),
3709 		 * __assign_str() and __get_str() helpers in the TRACE_EVENT()
3710 		 * instead. See samples/trace_events/trace-events-sample.h
3711 		 * for reference.
3712 		 */
3713 		if (WARN_ONCE(!good, "event '%s' has unsafe pointer field '%s'",
3714 			      trace_event_name(event), field->name)) {
3715 			trace_seq_printf(seq, "EVENT %s: HAS UNSAFE POINTER FIELD '%s'\n",
3716 					 trace_event_name(event), field->name);
3717 			return true;
3718 		}
3719 	}
3720 	return false;
3721 }
3722 
3723 const char *trace_event_format(struct trace_iterator *iter, const char *fmt)
3724 {
3725 	const char *p, *new_fmt;
3726 	char *q;
3727 
3728 	if (WARN_ON_ONCE(!fmt))
3729 		return fmt;
3730 
3731 	if (!iter->tr || iter->tr->trace_flags & TRACE_ITER_HASH_PTR)
3732 		return fmt;
3733 
3734 	p = fmt;
3735 	new_fmt = q = iter->fmt;
3736 	while (*p) {
3737 		if (unlikely(q - new_fmt + 3 > iter->fmt_size)) {
3738 			if (!trace_iter_expand_format(iter))
3739 				return fmt;
3740 
3741 			q += iter->fmt - new_fmt;
3742 			new_fmt = iter->fmt;
3743 		}
3744 
3745 		*q++ = *p++;
3746 
3747 		/* Replace %p with %px */
3748 		if (p[-1] == '%') {
3749 			if (p[0] == '%') {
3750 				*q++ = *p++;
3751 			} else if (p[0] == 'p' && !isalnum(p[1])) {
3752 				*q++ = *p++;
3753 				*q++ = 'x';
3754 			}
3755 		}
3756 	}
3757 	*q = '\0';
3758 
3759 	return new_fmt;
3760 }
3761 
3762 #define STATIC_TEMP_BUF_SIZE	128
3763 static char static_temp_buf[STATIC_TEMP_BUF_SIZE] __aligned(4);
3764 
3765 /* Find the next real entry, without updating the iterator itself */
3766 struct trace_entry *trace_find_next_entry(struct trace_iterator *iter,
3767 					  int *ent_cpu, u64 *ent_ts)
3768 {
3769 	/* __find_next_entry will reset ent_size */
3770 	int ent_size = iter->ent_size;
3771 	struct trace_entry *entry;
3772 
3773 	/*
3774 	 * If called from ftrace_dump(), then the iter->temp buffer
3775 	 * will be the static_temp_buf and not created from kmalloc.
3776 	 * If the entry size is greater than the buffer, we can
3777 	 * not save it. Just return NULL in that case. This is only
3778 	 * used to add markers when two consecutive events' time
3779 	 * stamps have a large delta. See trace_print_lat_context()
3780 	 */
3781 	if (iter->temp == static_temp_buf &&
3782 	    STATIC_TEMP_BUF_SIZE < ent_size)
3783 		return NULL;
3784 
3785 	/*
3786 	 * The __find_next_entry() may call peek_next_entry(), which may
3787 	 * call ring_buffer_peek() that may make the contents of iter->ent
3788 	 * undefined. Need to copy iter->ent now.
3789 	 */
3790 	if (iter->ent && iter->ent != iter->temp) {
3791 		if ((!iter->temp || iter->temp_size < iter->ent_size) &&
3792 		    !WARN_ON_ONCE(iter->temp == static_temp_buf)) {
3793 			void *temp;
3794 			temp = kmalloc(iter->ent_size, GFP_KERNEL);
3795 			if (!temp)
3796 				return NULL;
3797 			kfree(iter->temp);
3798 			iter->temp = temp;
3799 			iter->temp_size = iter->ent_size;
3800 		}
3801 		memcpy(iter->temp, iter->ent, iter->ent_size);
3802 		iter->ent = iter->temp;
3803 	}
3804 	entry = __find_next_entry(iter, ent_cpu, NULL, ent_ts);
3805 	/* Put back the original ent_size */
3806 	iter->ent_size = ent_size;
3807 
3808 	return entry;
3809 }
3810 
3811 /* Find the next real entry, and increment the iterator to the next entry */
3812 void *trace_find_next_entry_inc(struct trace_iterator *iter)
3813 {
3814 	iter->ent = __find_next_entry(iter, &iter->cpu,
3815 				      &iter->lost_events, &iter->ts);
3816 
3817 	if (iter->ent)
3818 		trace_iterator_increment(iter);
3819 
3820 	return iter->ent ? iter : NULL;
3821 }
3822 
3823 static void trace_consume(struct trace_iterator *iter)
3824 {
3825 	ring_buffer_consume(iter->array_buffer->buffer, iter->cpu, &iter->ts,
3826 			    &iter->lost_events);
3827 }
3828 
3829 static void *s_next(struct seq_file *m, void *v, loff_t *pos)
3830 {
3831 	struct trace_iterator *iter = m->private;
3832 	int i = (int)*pos;
3833 	void *ent;
3834 
3835 	WARN_ON_ONCE(iter->leftover);
3836 
3837 	(*pos)++;
3838 
3839 	/* can't go backwards */
3840 	if (iter->idx > i)
3841 		return NULL;
3842 
3843 	if (iter->idx < 0)
3844 		ent = trace_find_next_entry_inc(iter);
3845 	else
3846 		ent = iter;
3847 
3848 	while (ent && iter->idx < i)
3849 		ent = trace_find_next_entry_inc(iter);
3850 
3851 	iter->pos = *pos;
3852 
3853 	return ent;
3854 }
3855 
3856 void tracing_iter_reset(struct trace_iterator *iter, int cpu)
3857 {
3858 	struct ring_buffer_iter *buf_iter;
3859 	unsigned long entries = 0;
3860 	u64 ts;
3861 
3862 	per_cpu_ptr(iter->array_buffer->data, cpu)->skipped_entries = 0;
3863 
3864 	buf_iter = trace_buffer_iter(iter, cpu);
3865 	if (!buf_iter)
3866 		return;
3867 
3868 	ring_buffer_iter_reset(buf_iter);
3869 
3870 	/*
3871 	 * We could have the case with the max latency tracers
3872 	 * that a reset never took place on a cpu. This is evident
3873 	 * by the timestamp being before the start of the buffer.
3874 	 */
3875 	while (ring_buffer_iter_peek(buf_iter, &ts)) {
3876 		if (ts >= iter->array_buffer->time_start)
3877 			break;
3878 		entries++;
3879 		ring_buffer_iter_advance(buf_iter);
3880 		/* This could be a big loop */
3881 		cond_resched();
3882 	}
3883 
3884 	per_cpu_ptr(iter->array_buffer->data, cpu)->skipped_entries = entries;
3885 }
3886 
3887 /*
3888  * The current tracer is copied to avoid a global locking
3889  * all around.
3890  */
3891 static void *s_start(struct seq_file *m, loff_t *pos)
3892 {
3893 	struct trace_iterator *iter = m->private;
3894 	struct trace_array *tr = iter->tr;
3895 	int cpu_file = iter->cpu_file;
3896 	void *p = NULL;
3897 	loff_t l = 0;
3898 	int cpu;
3899 
3900 	mutex_lock(&trace_types_lock);
3901 	if (unlikely(tr->current_trace != iter->trace)) {
3902 		/* Close iter->trace before switching to the new current tracer */
3903 		if (iter->trace->close)
3904 			iter->trace->close(iter);
3905 		iter->trace = tr->current_trace;
3906 		/* Reopen the new current tracer */
3907 		if (iter->trace->open)
3908 			iter->trace->open(iter);
3909 	}
3910 	mutex_unlock(&trace_types_lock);
3911 
3912 #ifdef CONFIG_TRACER_MAX_TRACE
3913 	if (iter->snapshot && iter->trace->use_max_tr)
3914 		return ERR_PTR(-EBUSY);
3915 #endif
3916 
3917 	if (*pos != iter->pos) {
3918 		iter->ent = NULL;
3919 		iter->cpu = 0;
3920 		iter->idx = -1;
3921 
3922 		if (cpu_file == RING_BUFFER_ALL_CPUS) {
3923 			for_each_tracing_cpu(cpu)
3924 				tracing_iter_reset(iter, cpu);
3925 		} else
3926 			tracing_iter_reset(iter, cpu_file);
3927 
3928 		iter->leftover = 0;
3929 		for (p = iter; p && l < *pos; p = s_next(m, p, &l))
3930 			;
3931 
3932 	} else {
3933 		/*
3934 		 * If we overflowed the seq_file before, then we want
3935 		 * to just reuse the trace_seq buffer again.
3936 		 */
3937 		if (iter->leftover)
3938 			p = iter;
3939 		else {
3940 			l = *pos - 1;
3941 			p = s_next(m, p, &l);
3942 		}
3943 	}
3944 
3945 	trace_event_read_lock();
3946 	trace_access_lock(cpu_file);
3947 	return p;
3948 }
3949 
3950 static void s_stop(struct seq_file *m, void *p)
3951 {
3952 	struct trace_iterator *iter = m->private;
3953 
3954 #ifdef CONFIG_TRACER_MAX_TRACE
3955 	if (iter->snapshot && iter->trace->use_max_tr)
3956 		return;
3957 #endif
3958 
3959 	trace_access_unlock(iter->cpu_file);
3960 	trace_event_read_unlock();
3961 }
3962 
3963 static void
3964 get_total_entries_cpu(struct array_buffer *buf, unsigned long *total,
3965 		      unsigned long *entries, int cpu)
3966 {
3967 	unsigned long count;
3968 
3969 	count = ring_buffer_entries_cpu(buf->buffer, cpu);
3970 	/*
3971 	 * If this buffer has skipped entries, then we hold all
3972 	 * entries for the trace and we need to ignore the
3973 	 * ones before the time stamp.
3974 	 */
3975 	if (per_cpu_ptr(buf->data, cpu)->skipped_entries) {
3976 		count -= per_cpu_ptr(buf->data, cpu)->skipped_entries;
3977 		/* total is the same as the entries */
3978 		*total = count;
3979 	} else
3980 		*total = count +
3981 			ring_buffer_overrun_cpu(buf->buffer, cpu);
3982 	*entries = count;
3983 }
3984 
3985 static void
3986 get_total_entries(struct array_buffer *buf,
3987 		  unsigned long *total, unsigned long *entries)
3988 {
3989 	unsigned long t, e;
3990 	int cpu;
3991 
3992 	*total = 0;
3993 	*entries = 0;
3994 
3995 	for_each_tracing_cpu(cpu) {
3996 		get_total_entries_cpu(buf, &t, &e, cpu);
3997 		*total += t;
3998 		*entries += e;
3999 	}
4000 }
4001 
4002 unsigned long trace_total_entries_cpu(struct trace_array *tr, int cpu)
4003 {
4004 	unsigned long total, entries;
4005 
4006 	if (!tr)
4007 		tr = &global_trace;
4008 
4009 	get_total_entries_cpu(&tr->array_buffer, &total, &entries, cpu);
4010 
4011 	return entries;
4012 }
4013 
4014 unsigned long trace_total_entries(struct trace_array *tr)
4015 {
4016 	unsigned long total, entries;
4017 
4018 	if (!tr)
4019 		tr = &global_trace;
4020 
4021 	get_total_entries(&tr->array_buffer, &total, &entries);
4022 
4023 	return entries;
4024 }
4025 
4026 static void print_lat_help_header(struct seq_file *m)
4027 {
4028 	seq_puts(m, "#                    _------=> CPU#            \n"
4029 		    "#                   / _-----=> irqs-off/BH-disabled\n"
4030 		    "#                  | / _----=> need-resched    \n"
4031 		    "#                  || / _---=> hardirq/softirq \n"
4032 		    "#                  ||| / _--=> preempt-depth   \n"
4033 		    "#                  |||| / _-=> migrate-disable \n"
4034 		    "#                  ||||| /     delay           \n"
4035 		    "#  cmd     pid     |||||| time  |   caller     \n"
4036 		    "#     \\   /        ||||||  \\    |    /       \n");
4037 }
4038 
4039 static void print_event_info(struct array_buffer *buf, struct seq_file *m)
4040 {
4041 	unsigned long total;
4042 	unsigned long entries;
4043 
4044 	get_total_entries(buf, &total, &entries);
4045 	seq_printf(m, "# entries-in-buffer/entries-written: %lu/%lu   #P:%d\n",
4046 		   entries, total, num_online_cpus());
4047 	seq_puts(m, "#\n");
4048 }
4049 
4050 static void print_func_help_header(struct array_buffer *buf, struct seq_file *m,
4051 				   unsigned int flags)
4052 {
4053 	bool tgid = flags & TRACE_ITER_RECORD_TGID;
4054 
4055 	print_event_info(buf, m);
4056 
4057 	seq_printf(m, "#           TASK-PID    %s CPU#     TIMESTAMP  FUNCTION\n", tgid ? "   TGID   " : "");
4058 	seq_printf(m, "#              | |      %s   |         |         |\n",      tgid ? "     |    " : "");
4059 }
4060 
4061 static void print_func_help_header_irq(struct array_buffer *buf, struct seq_file *m,
4062 				       unsigned int flags)
4063 {
4064 	bool tgid = flags & TRACE_ITER_RECORD_TGID;
4065 	static const char space[] = "            ";
4066 	int prec = tgid ? 12 : 2;
4067 
4068 	print_event_info(buf, m);
4069 
4070 	seq_printf(m, "#                            %.*s  _-----=> irqs-off/BH-disabled\n", prec, space);
4071 	seq_printf(m, "#                            %.*s / _----=> need-resched\n", prec, space);
4072 	seq_printf(m, "#                            %.*s| / _---=> hardirq/softirq\n", prec, space);
4073 	seq_printf(m, "#                            %.*s|| / _--=> preempt-depth\n", prec, space);
4074 	seq_printf(m, "#                            %.*s||| / _-=> migrate-disable\n", prec, space);
4075 	seq_printf(m, "#                            %.*s|||| /     delay\n", prec, space);
4076 	seq_printf(m, "#           TASK-PID  %.*s CPU#  |||||  TIMESTAMP  FUNCTION\n", prec, "     TGID   ");
4077 	seq_printf(m, "#              | |    %.*s   |   |||||     |         |\n", prec, "       |    ");
4078 }
4079 
4080 void
4081 print_trace_header(struct seq_file *m, struct trace_iterator *iter)
4082 {
4083 	unsigned long sym_flags = (global_trace.trace_flags & TRACE_ITER_SYM_MASK);
4084 	struct array_buffer *buf = iter->array_buffer;
4085 	struct trace_array_cpu *data = per_cpu_ptr(buf->data, buf->cpu);
4086 	struct tracer *type = iter->trace;
4087 	unsigned long entries;
4088 	unsigned long total;
4089 	const char *name = type->name;
4090 
4091 	get_total_entries(buf, &total, &entries);
4092 
4093 	seq_printf(m, "# %s latency trace v1.1.5 on %s\n",
4094 		   name, init_utsname()->release);
4095 	seq_puts(m, "# -----------------------------------"
4096 		 "---------------------------------\n");
4097 	seq_printf(m, "# latency: %lu us, #%lu/%lu, CPU#%d |"
4098 		   " (M:%s VP:%d, KP:%d, SP:%d HP:%d",
4099 		   nsecs_to_usecs(data->saved_latency),
4100 		   entries,
4101 		   total,
4102 		   buf->cpu,
4103 		   preempt_model_str(),
4104 		   /* These are reserved for later use */
4105 		   0, 0, 0, 0);
4106 #ifdef CONFIG_SMP
4107 	seq_printf(m, " #P:%d)\n", num_online_cpus());
4108 #else
4109 	seq_puts(m, ")\n");
4110 #endif
4111 	seq_puts(m, "#    -----------------\n");
4112 	seq_printf(m, "#    | task: %.16s-%d "
4113 		   "(uid:%d nice:%ld policy:%ld rt_prio:%ld)\n",
4114 		   data->comm, data->pid,
4115 		   from_kuid_munged(seq_user_ns(m), data->uid), data->nice,
4116 		   data->policy, data->rt_priority);
4117 	seq_puts(m, "#    -----------------\n");
4118 
4119 	if (data->critical_start) {
4120 		seq_puts(m, "#  => started at: ");
4121 		seq_print_ip_sym(&iter->seq, data->critical_start, sym_flags);
4122 		trace_print_seq(m, &iter->seq);
4123 		seq_puts(m, "\n#  => ended at:   ");
4124 		seq_print_ip_sym(&iter->seq, data->critical_end, sym_flags);
4125 		trace_print_seq(m, &iter->seq);
4126 		seq_puts(m, "\n#\n");
4127 	}
4128 
4129 	seq_puts(m, "#\n");
4130 }
4131 
4132 static void test_cpu_buff_start(struct trace_iterator *iter)
4133 {
4134 	struct trace_seq *s = &iter->seq;
4135 	struct trace_array *tr = iter->tr;
4136 
4137 	if (!(tr->trace_flags & TRACE_ITER_ANNOTATE))
4138 		return;
4139 
4140 	if (!(iter->iter_flags & TRACE_FILE_ANNOTATE))
4141 		return;
4142 
4143 	if (cpumask_available(iter->started) &&
4144 	    cpumask_test_cpu(iter->cpu, iter->started))
4145 		return;
4146 
4147 	if (per_cpu_ptr(iter->array_buffer->data, iter->cpu)->skipped_entries)
4148 		return;
4149 
4150 	if (cpumask_available(iter->started))
4151 		cpumask_set_cpu(iter->cpu, iter->started);
4152 
4153 	/* Don't print started cpu buffer for the first entry of the trace */
4154 	if (iter->idx > 1)
4155 		trace_seq_printf(s, "##### CPU %u buffer started ####\n",
4156 				iter->cpu);
4157 }
4158 
4159 static enum print_line_t print_trace_fmt(struct trace_iterator *iter)
4160 {
4161 	struct trace_array *tr = iter->tr;
4162 	struct trace_seq *s = &iter->seq;
4163 	unsigned long sym_flags = (tr->trace_flags & TRACE_ITER_SYM_MASK);
4164 	struct trace_entry *entry;
4165 	struct trace_event *event;
4166 
4167 	entry = iter->ent;
4168 
4169 	test_cpu_buff_start(iter);
4170 
4171 	event = ftrace_find_event(entry->type);
4172 
4173 	if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
4174 		if (iter->iter_flags & TRACE_FILE_LAT_FMT)
4175 			trace_print_lat_context(iter);
4176 		else
4177 			trace_print_context(iter);
4178 	}
4179 
4180 	if (trace_seq_has_overflowed(s))
4181 		return TRACE_TYPE_PARTIAL_LINE;
4182 
4183 	if (event) {
4184 		if (tr->trace_flags & TRACE_ITER_FIELDS)
4185 			return print_event_fields(iter, event);
4186 		/*
4187 		 * For TRACE_EVENT() events, the print_fmt is not
4188 		 * safe to use if the array has delta offsets
4189 		 * Force printing via the fields.
4190 		 */
4191 		if ((tr->text_delta || tr->data_delta) &&
4192 		    event->type > __TRACE_LAST_TYPE)
4193 			return print_event_fields(iter, event);
4194 
4195 		return event->funcs->trace(iter, sym_flags, event);
4196 	}
4197 
4198 	trace_seq_printf(s, "Unknown type %d\n", entry->type);
4199 
4200 	return trace_handle_return(s);
4201 }
4202 
4203 static enum print_line_t print_raw_fmt(struct trace_iterator *iter)
4204 {
4205 	struct trace_array *tr = iter->tr;
4206 	struct trace_seq *s = &iter->seq;
4207 	struct trace_entry *entry;
4208 	struct trace_event *event;
4209 
4210 	entry = iter->ent;
4211 
4212 	if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO)
4213 		trace_seq_printf(s, "%d %d %llu ",
4214 				 entry->pid, iter->cpu, iter->ts);
4215 
4216 	if (trace_seq_has_overflowed(s))
4217 		return TRACE_TYPE_PARTIAL_LINE;
4218 
4219 	event = ftrace_find_event(entry->type);
4220 	if (event)
4221 		return event->funcs->raw(iter, 0, event);
4222 
4223 	trace_seq_printf(s, "%d ?\n", entry->type);
4224 
4225 	return trace_handle_return(s);
4226 }
4227 
4228 static enum print_line_t print_hex_fmt(struct trace_iterator *iter)
4229 {
4230 	struct trace_array *tr = iter->tr;
4231 	struct trace_seq *s = &iter->seq;
4232 	unsigned char newline = '\n';
4233 	struct trace_entry *entry;
4234 	struct trace_event *event;
4235 
4236 	entry = iter->ent;
4237 
4238 	if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
4239 		SEQ_PUT_HEX_FIELD(s, entry->pid);
4240 		SEQ_PUT_HEX_FIELD(s, iter->cpu);
4241 		SEQ_PUT_HEX_FIELD(s, iter->ts);
4242 		if (trace_seq_has_overflowed(s))
4243 			return TRACE_TYPE_PARTIAL_LINE;
4244 	}
4245 
4246 	event = ftrace_find_event(entry->type);
4247 	if (event) {
4248 		enum print_line_t ret = event->funcs->hex(iter, 0, event);
4249 		if (ret != TRACE_TYPE_HANDLED)
4250 			return ret;
4251 	}
4252 
4253 	SEQ_PUT_FIELD(s, newline);
4254 
4255 	return trace_handle_return(s);
4256 }
4257 
4258 static enum print_line_t print_bin_fmt(struct trace_iterator *iter)
4259 {
4260 	struct trace_array *tr = iter->tr;
4261 	struct trace_seq *s = &iter->seq;
4262 	struct trace_entry *entry;
4263 	struct trace_event *event;
4264 
4265 	entry = iter->ent;
4266 
4267 	if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
4268 		SEQ_PUT_FIELD(s, entry->pid);
4269 		SEQ_PUT_FIELD(s, iter->cpu);
4270 		SEQ_PUT_FIELD(s, iter->ts);
4271 		if (trace_seq_has_overflowed(s))
4272 			return TRACE_TYPE_PARTIAL_LINE;
4273 	}
4274 
4275 	event = ftrace_find_event(entry->type);
4276 	return event ? event->funcs->binary(iter, 0, event) :
4277 		TRACE_TYPE_HANDLED;
4278 }
4279 
4280 int trace_empty(struct trace_iterator *iter)
4281 {
4282 	struct ring_buffer_iter *buf_iter;
4283 	int cpu;
4284 
4285 	/* If we are looking at one CPU buffer, only check that one */
4286 	if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
4287 		cpu = iter->cpu_file;
4288 		buf_iter = trace_buffer_iter(iter, cpu);
4289 		if (buf_iter) {
4290 			if (!ring_buffer_iter_empty(buf_iter))
4291 				return 0;
4292 		} else {
4293 			if (!ring_buffer_empty_cpu(iter->array_buffer->buffer, cpu))
4294 				return 0;
4295 		}
4296 		return 1;
4297 	}
4298 
4299 	for_each_tracing_cpu(cpu) {
4300 		buf_iter = trace_buffer_iter(iter, cpu);
4301 		if (buf_iter) {
4302 			if (!ring_buffer_iter_empty(buf_iter))
4303 				return 0;
4304 		} else {
4305 			if (!ring_buffer_empty_cpu(iter->array_buffer->buffer, cpu))
4306 				return 0;
4307 		}
4308 	}
4309 
4310 	return 1;
4311 }
4312 
4313 /*  Called with trace_event_read_lock() held. */
4314 enum print_line_t print_trace_line(struct trace_iterator *iter)
4315 {
4316 	struct trace_array *tr = iter->tr;
4317 	unsigned long trace_flags = tr->trace_flags;
4318 	enum print_line_t ret;
4319 
4320 	if (iter->lost_events) {
4321 		if (iter->lost_events == (unsigned long)-1)
4322 			trace_seq_printf(&iter->seq, "CPU:%d [LOST EVENTS]\n",
4323 					 iter->cpu);
4324 		else
4325 			trace_seq_printf(&iter->seq, "CPU:%d [LOST %lu EVENTS]\n",
4326 					 iter->cpu, iter->lost_events);
4327 		if (trace_seq_has_overflowed(&iter->seq))
4328 			return TRACE_TYPE_PARTIAL_LINE;
4329 	}
4330 
4331 	if (iter->trace && iter->trace->print_line) {
4332 		ret = iter->trace->print_line(iter);
4333 		if (ret != TRACE_TYPE_UNHANDLED)
4334 			return ret;
4335 	}
4336 
4337 	if (iter->ent->type == TRACE_BPUTS &&
4338 			trace_flags & TRACE_ITER_PRINTK &&
4339 			trace_flags & TRACE_ITER_PRINTK_MSGONLY)
4340 		return trace_print_bputs_msg_only(iter);
4341 
4342 	if (iter->ent->type == TRACE_BPRINT &&
4343 			trace_flags & TRACE_ITER_PRINTK &&
4344 			trace_flags & TRACE_ITER_PRINTK_MSGONLY)
4345 		return trace_print_bprintk_msg_only(iter);
4346 
4347 	if (iter->ent->type == TRACE_PRINT &&
4348 			trace_flags & TRACE_ITER_PRINTK &&
4349 			trace_flags & TRACE_ITER_PRINTK_MSGONLY)
4350 		return trace_print_printk_msg_only(iter);
4351 
4352 	if (trace_flags & TRACE_ITER_BIN)
4353 		return print_bin_fmt(iter);
4354 
4355 	if (trace_flags & TRACE_ITER_HEX)
4356 		return print_hex_fmt(iter);
4357 
4358 	if (trace_flags & TRACE_ITER_RAW)
4359 		return print_raw_fmt(iter);
4360 
4361 	return print_trace_fmt(iter);
4362 }
4363 
4364 void trace_latency_header(struct seq_file *m)
4365 {
4366 	struct trace_iterator *iter = m->private;
4367 	struct trace_array *tr = iter->tr;
4368 
4369 	/* print nothing if the buffers are empty */
4370 	if (trace_empty(iter))
4371 		return;
4372 
4373 	if (iter->iter_flags & TRACE_FILE_LAT_FMT)
4374 		print_trace_header(m, iter);
4375 
4376 	if (!(tr->trace_flags & TRACE_ITER_VERBOSE))
4377 		print_lat_help_header(m);
4378 }
4379 
4380 void trace_default_header(struct seq_file *m)
4381 {
4382 	struct trace_iterator *iter = m->private;
4383 	struct trace_array *tr = iter->tr;
4384 	unsigned long trace_flags = tr->trace_flags;
4385 
4386 	if (!(trace_flags & TRACE_ITER_CONTEXT_INFO))
4387 		return;
4388 
4389 	if (iter->iter_flags & TRACE_FILE_LAT_FMT) {
4390 		/* print nothing if the buffers are empty */
4391 		if (trace_empty(iter))
4392 			return;
4393 		print_trace_header(m, iter);
4394 		if (!(trace_flags & TRACE_ITER_VERBOSE))
4395 			print_lat_help_header(m);
4396 	} else {
4397 		if (!(trace_flags & TRACE_ITER_VERBOSE)) {
4398 			if (trace_flags & TRACE_ITER_IRQ_INFO)
4399 				print_func_help_header_irq(iter->array_buffer,
4400 							   m, trace_flags);
4401 			else
4402 				print_func_help_header(iter->array_buffer, m,
4403 						       trace_flags);
4404 		}
4405 	}
4406 }
4407 
4408 static void test_ftrace_alive(struct seq_file *m)
4409 {
4410 	if (!ftrace_is_dead())
4411 		return;
4412 	seq_puts(m, "# WARNING: FUNCTION TRACING IS CORRUPTED\n"
4413 		    "#          MAY BE MISSING FUNCTION EVENTS\n");
4414 }
4415 
4416 #ifdef CONFIG_TRACER_MAX_TRACE
4417 static void show_snapshot_main_help(struct seq_file *m)
4418 {
4419 	seq_puts(m, "# echo 0 > snapshot : Clears and frees snapshot buffer\n"
4420 		    "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
4421 		    "#                      Takes a snapshot of the main buffer.\n"
4422 		    "# echo 2 > snapshot : Clears snapshot buffer (but does not allocate or free)\n"
4423 		    "#                      (Doesn't have to be '2' works with any number that\n"
4424 		    "#                       is not a '0' or '1')\n");
4425 }
4426 
4427 static void show_snapshot_percpu_help(struct seq_file *m)
4428 {
4429 	seq_puts(m, "# echo 0 > snapshot : Invalid for per_cpu snapshot file.\n");
4430 #ifdef CONFIG_RING_BUFFER_ALLOW_SWAP
4431 	seq_puts(m, "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
4432 		    "#                      Takes a snapshot of the main buffer for this cpu.\n");
4433 #else
4434 	seq_puts(m, "# echo 1 > snapshot : Not supported with this kernel.\n"
4435 		    "#                     Must use main snapshot file to allocate.\n");
4436 #endif
4437 	seq_puts(m, "# echo 2 > snapshot : Clears this cpu's snapshot buffer (but does not allocate)\n"
4438 		    "#                      (Doesn't have to be '2' works with any number that\n"
4439 		    "#                       is not a '0' or '1')\n");
4440 }
4441 
4442 static void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter)
4443 {
4444 	if (iter->tr->allocated_snapshot)
4445 		seq_puts(m, "#\n# * Snapshot is allocated *\n#\n");
4446 	else
4447 		seq_puts(m, "#\n# * Snapshot is freed *\n#\n");
4448 
4449 	seq_puts(m, "# Snapshot commands:\n");
4450 	if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
4451 		show_snapshot_main_help(m);
4452 	else
4453 		show_snapshot_percpu_help(m);
4454 }
4455 #else
4456 /* Should never be called */
4457 static inline void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter) { }
4458 #endif
4459 
4460 static int s_show(struct seq_file *m, void *v)
4461 {
4462 	struct trace_iterator *iter = v;
4463 	int ret;
4464 
4465 	if (iter->ent == NULL) {
4466 		if (iter->tr) {
4467 			seq_printf(m, "# tracer: %s\n", iter->trace->name);
4468 			seq_puts(m, "#\n");
4469 			test_ftrace_alive(m);
4470 		}
4471 		if (iter->snapshot && trace_empty(iter))
4472 			print_snapshot_help(m, iter);
4473 		else if (iter->trace && iter->trace->print_header)
4474 			iter->trace->print_header(m);
4475 		else
4476 			trace_default_header(m);
4477 
4478 	} else if (iter->leftover) {
4479 		/*
4480 		 * If we filled the seq_file buffer earlier, we
4481 		 * want to just show it now.
4482 		 */
4483 		ret = trace_print_seq(m, &iter->seq);
4484 
4485 		/* ret should this time be zero, but you never know */
4486 		iter->leftover = ret;
4487 
4488 	} else {
4489 		ret = print_trace_line(iter);
4490 		if (ret == TRACE_TYPE_PARTIAL_LINE) {
4491 			iter->seq.full = 0;
4492 			trace_seq_puts(&iter->seq, "[LINE TOO BIG]\n");
4493 		}
4494 		ret = trace_print_seq(m, &iter->seq);
4495 		/*
4496 		 * If we overflow the seq_file buffer, then it will
4497 		 * ask us for this data again at start up.
4498 		 * Use that instead.
4499 		 *  ret is 0 if seq_file write succeeded.
4500 		 *        -1 otherwise.
4501 		 */
4502 		iter->leftover = ret;
4503 	}
4504 
4505 	return 0;
4506 }
4507 
4508 /*
4509  * Should be used after trace_array_get(), trace_types_lock
4510  * ensures that i_cdev was already initialized.
4511  */
4512 static inline int tracing_get_cpu(struct inode *inode)
4513 {
4514 	if (inode->i_cdev) /* See trace_create_cpu_file() */
4515 		return (long)inode->i_cdev - 1;
4516 	return RING_BUFFER_ALL_CPUS;
4517 }
4518 
4519 static const struct seq_operations tracer_seq_ops = {
4520 	.start		= s_start,
4521 	.next		= s_next,
4522 	.stop		= s_stop,
4523 	.show		= s_show,
4524 };
4525 
4526 /*
4527  * Note, as iter itself can be allocated and freed in different
4528  * ways, this function is only used to free its content, and not
4529  * the iterator itself. The only requirement to all the allocations
4530  * is that it must zero all fields (kzalloc), as freeing works with
4531  * ethier allocated content or NULL.
4532  */
4533 static void free_trace_iter_content(struct trace_iterator *iter)
4534 {
4535 	/* The fmt is either NULL, allocated or points to static_fmt_buf */
4536 	if (iter->fmt != static_fmt_buf)
4537 		kfree(iter->fmt);
4538 
4539 	kfree(iter->temp);
4540 	kfree(iter->buffer_iter);
4541 	mutex_destroy(&iter->mutex);
4542 	free_cpumask_var(iter->started);
4543 }
4544 
4545 static struct trace_iterator *
4546 __tracing_open(struct inode *inode, struct file *file, bool snapshot)
4547 {
4548 	struct trace_array *tr = inode->i_private;
4549 	struct trace_iterator *iter;
4550 	int cpu;
4551 
4552 	if (tracing_disabled)
4553 		return ERR_PTR(-ENODEV);
4554 
4555 	iter = __seq_open_private(file, &tracer_seq_ops, sizeof(*iter));
4556 	if (!iter)
4557 		return ERR_PTR(-ENOMEM);
4558 
4559 	iter->buffer_iter = kcalloc(nr_cpu_ids, sizeof(*iter->buffer_iter),
4560 				    GFP_KERNEL);
4561 	if (!iter->buffer_iter)
4562 		goto release;
4563 
4564 	/*
4565 	 * trace_find_next_entry() may need to save off iter->ent.
4566 	 * It will place it into the iter->temp buffer. As most
4567 	 * events are less than 128, allocate a buffer of that size.
4568 	 * If one is greater, then trace_find_next_entry() will
4569 	 * allocate a new buffer to adjust for the bigger iter->ent.
4570 	 * It's not critical if it fails to get allocated here.
4571 	 */
4572 	iter->temp = kmalloc(128, GFP_KERNEL);
4573 	if (iter->temp)
4574 		iter->temp_size = 128;
4575 
4576 	/*
4577 	 * trace_event_printf() may need to modify given format
4578 	 * string to replace %p with %px so that it shows real address
4579 	 * instead of hash value. However, that is only for the event
4580 	 * tracing, other tracer may not need. Defer the allocation
4581 	 * until it is needed.
4582 	 */
4583 	iter->fmt = NULL;
4584 	iter->fmt_size = 0;
4585 
4586 	mutex_lock(&trace_types_lock);
4587 	iter->trace = tr->current_trace;
4588 
4589 	if (!zalloc_cpumask_var(&iter->started, GFP_KERNEL))
4590 		goto fail;
4591 
4592 	iter->tr = tr;
4593 
4594 #ifdef CONFIG_TRACER_MAX_TRACE
4595 	/* Currently only the top directory has a snapshot */
4596 	if (tr->current_trace->print_max || snapshot)
4597 		iter->array_buffer = &tr->max_buffer;
4598 	else
4599 #endif
4600 		iter->array_buffer = &tr->array_buffer;
4601 	iter->snapshot = snapshot;
4602 	iter->pos = -1;
4603 	iter->cpu_file = tracing_get_cpu(inode);
4604 	mutex_init(&iter->mutex);
4605 
4606 	/* Notify the tracer early; before we stop tracing. */
4607 	if (iter->trace->open)
4608 		iter->trace->open(iter);
4609 
4610 	/* Annotate start of buffers if we had overruns */
4611 	if (ring_buffer_overruns(iter->array_buffer->buffer))
4612 		iter->iter_flags |= TRACE_FILE_ANNOTATE;
4613 
4614 	/* Output in nanoseconds only if we are using a clock in nanoseconds. */
4615 	if (trace_clocks[tr->clock_id].in_ns)
4616 		iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
4617 
4618 	/*
4619 	 * If pause-on-trace is enabled, then stop the trace while
4620 	 * dumping, unless this is the "snapshot" file
4621 	 */
4622 	if (!iter->snapshot && (tr->trace_flags & TRACE_ITER_PAUSE_ON_TRACE))
4623 		tracing_stop_tr(tr);
4624 
4625 	if (iter->cpu_file == RING_BUFFER_ALL_CPUS) {
4626 		for_each_tracing_cpu(cpu) {
4627 			iter->buffer_iter[cpu] =
4628 				ring_buffer_read_prepare(iter->array_buffer->buffer,
4629 							 cpu, GFP_KERNEL);
4630 		}
4631 		ring_buffer_read_prepare_sync();
4632 		for_each_tracing_cpu(cpu) {
4633 			ring_buffer_read_start(iter->buffer_iter[cpu]);
4634 			tracing_iter_reset(iter, cpu);
4635 		}
4636 	} else {
4637 		cpu = iter->cpu_file;
4638 		iter->buffer_iter[cpu] =
4639 			ring_buffer_read_prepare(iter->array_buffer->buffer,
4640 						 cpu, GFP_KERNEL);
4641 		ring_buffer_read_prepare_sync();
4642 		ring_buffer_read_start(iter->buffer_iter[cpu]);
4643 		tracing_iter_reset(iter, cpu);
4644 	}
4645 
4646 	mutex_unlock(&trace_types_lock);
4647 
4648 	return iter;
4649 
4650  fail:
4651 	mutex_unlock(&trace_types_lock);
4652 	free_trace_iter_content(iter);
4653 release:
4654 	seq_release_private(inode, file);
4655 	return ERR_PTR(-ENOMEM);
4656 }
4657 
4658 int tracing_open_generic(struct inode *inode, struct file *filp)
4659 {
4660 	int ret;
4661 
4662 	ret = tracing_check_open_get_tr(NULL);
4663 	if (ret)
4664 		return ret;
4665 
4666 	filp->private_data = inode->i_private;
4667 	return 0;
4668 }
4669 
4670 bool tracing_is_disabled(void)
4671 {
4672 	return (tracing_disabled) ? true: false;
4673 }
4674 
4675 /*
4676  * Open and update trace_array ref count.
4677  * Must have the current trace_array passed to it.
4678  */
4679 int tracing_open_generic_tr(struct inode *inode, struct file *filp)
4680 {
4681 	struct trace_array *tr = inode->i_private;
4682 	int ret;
4683 
4684 	ret = tracing_check_open_get_tr(tr);
4685 	if (ret)
4686 		return ret;
4687 
4688 	filp->private_data = inode->i_private;
4689 
4690 	return 0;
4691 }
4692 
4693 /*
4694  * The private pointer of the inode is the trace_event_file.
4695  * Update the tr ref count associated to it.
4696  */
4697 int tracing_open_file_tr(struct inode *inode, struct file *filp)
4698 {
4699 	struct trace_event_file *file = inode->i_private;
4700 	int ret;
4701 
4702 	ret = tracing_check_open_get_tr(file->tr);
4703 	if (ret)
4704 		return ret;
4705 
4706 	mutex_lock(&event_mutex);
4707 
4708 	/* Fail if the file is marked for removal */
4709 	if (file->flags & EVENT_FILE_FL_FREED) {
4710 		trace_array_put(file->tr);
4711 		ret = -ENODEV;
4712 	} else {
4713 		event_file_get(file);
4714 	}
4715 
4716 	mutex_unlock(&event_mutex);
4717 	if (ret)
4718 		return ret;
4719 
4720 	filp->private_data = inode->i_private;
4721 
4722 	return 0;
4723 }
4724 
4725 int tracing_release_file_tr(struct inode *inode, struct file *filp)
4726 {
4727 	struct trace_event_file *file = inode->i_private;
4728 
4729 	trace_array_put(file->tr);
4730 	event_file_put(file);
4731 
4732 	return 0;
4733 }
4734 
4735 int tracing_single_release_file_tr(struct inode *inode, struct file *filp)
4736 {
4737 	tracing_release_file_tr(inode, filp);
4738 	return single_release(inode, filp);
4739 }
4740 
4741 static int tracing_mark_open(struct inode *inode, struct file *filp)
4742 {
4743 	stream_open(inode, filp);
4744 	return tracing_open_generic_tr(inode, filp);
4745 }
4746 
4747 static int tracing_release(struct inode *inode, struct file *file)
4748 {
4749 	struct trace_array *tr = inode->i_private;
4750 	struct seq_file *m = file->private_data;
4751 	struct trace_iterator *iter;
4752 	int cpu;
4753 
4754 	if (!(file->f_mode & FMODE_READ)) {
4755 		trace_array_put(tr);
4756 		return 0;
4757 	}
4758 
4759 	/* Writes do not use seq_file */
4760 	iter = m->private;
4761 	mutex_lock(&trace_types_lock);
4762 
4763 	for_each_tracing_cpu(cpu) {
4764 		if (iter->buffer_iter[cpu])
4765 			ring_buffer_read_finish(iter->buffer_iter[cpu]);
4766 	}
4767 
4768 	if (iter->trace && iter->trace->close)
4769 		iter->trace->close(iter);
4770 
4771 	if (!iter->snapshot && tr->stop_count)
4772 		/* reenable tracing if it was previously enabled */
4773 		tracing_start_tr(tr);
4774 
4775 	__trace_array_put(tr);
4776 
4777 	mutex_unlock(&trace_types_lock);
4778 
4779 	free_trace_iter_content(iter);
4780 	seq_release_private(inode, file);
4781 
4782 	return 0;
4783 }
4784 
4785 int tracing_release_generic_tr(struct inode *inode, struct file *file)
4786 {
4787 	struct trace_array *tr = inode->i_private;
4788 
4789 	trace_array_put(tr);
4790 	return 0;
4791 }
4792 
4793 static int tracing_single_release_tr(struct inode *inode, struct file *file)
4794 {
4795 	struct trace_array *tr = inode->i_private;
4796 
4797 	trace_array_put(tr);
4798 
4799 	return single_release(inode, file);
4800 }
4801 
4802 static int tracing_open(struct inode *inode, struct file *file)
4803 {
4804 	struct trace_array *tr = inode->i_private;
4805 	struct trace_iterator *iter;
4806 	int ret;
4807 
4808 	ret = tracing_check_open_get_tr(tr);
4809 	if (ret)
4810 		return ret;
4811 
4812 	/* If this file was open for write, then erase contents */
4813 	if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC)) {
4814 		int cpu = tracing_get_cpu(inode);
4815 		struct array_buffer *trace_buf = &tr->array_buffer;
4816 
4817 #ifdef CONFIG_TRACER_MAX_TRACE
4818 		if (tr->current_trace->print_max)
4819 			trace_buf = &tr->max_buffer;
4820 #endif
4821 
4822 		if (cpu == RING_BUFFER_ALL_CPUS)
4823 			tracing_reset_online_cpus(trace_buf);
4824 		else
4825 			tracing_reset_cpu(trace_buf, cpu);
4826 	}
4827 
4828 	if (file->f_mode & FMODE_READ) {
4829 		iter = __tracing_open(inode, file, false);
4830 		if (IS_ERR(iter))
4831 			ret = PTR_ERR(iter);
4832 		else if (tr->trace_flags & TRACE_ITER_LATENCY_FMT)
4833 			iter->iter_flags |= TRACE_FILE_LAT_FMT;
4834 	}
4835 
4836 	if (ret < 0)
4837 		trace_array_put(tr);
4838 
4839 	return ret;
4840 }
4841 
4842 /*
4843  * Some tracers are not suitable for instance buffers.
4844  * A tracer is always available for the global array (toplevel)
4845  * or if it explicitly states that it is.
4846  */
4847 static bool
4848 trace_ok_for_array(struct tracer *t, struct trace_array *tr)
4849 {
4850 #ifdef CONFIG_TRACER_SNAPSHOT
4851 	/* arrays with mapped buffer range do not have snapshots */
4852 	if (tr->range_addr_start && t->use_max_tr)
4853 		return false;
4854 #endif
4855 	return (tr->flags & TRACE_ARRAY_FL_GLOBAL) || t->allow_instances;
4856 }
4857 
4858 /* Find the next tracer that this trace array may use */
4859 static struct tracer *
4860 get_tracer_for_array(struct trace_array *tr, struct tracer *t)
4861 {
4862 	while (t && !trace_ok_for_array(t, tr))
4863 		t = t->next;
4864 
4865 	return t;
4866 }
4867 
4868 static void *
4869 t_next(struct seq_file *m, void *v, loff_t *pos)
4870 {
4871 	struct trace_array *tr = m->private;
4872 	struct tracer *t = v;
4873 
4874 	(*pos)++;
4875 
4876 	if (t)
4877 		t = get_tracer_for_array(tr, t->next);
4878 
4879 	return t;
4880 }
4881 
4882 static void *t_start(struct seq_file *m, loff_t *pos)
4883 {
4884 	struct trace_array *tr = m->private;
4885 	struct tracer *t;
4886 	loff_t l = 0;
4887 
4888 	mutex_lock(&trace_types_lock);
4889 
4890 	t = get_tracer_for_array(tr, trace_types);
4891 	for (; t && l < *pos; t = t_next(m, t, &l))
4892 			;
4893 
4894 	return t;
4895 }
4896 
4897 static void t_stop(struct seq_file *m, void *p)
4898 {
4899 	mutex_unlock(&trace_types_lock);
4900 }
4901 
4902 static int t_show(struct seq_file *m, void *v)
4903 {
4904 	struct tracer *t = v;
4905 
4906 	if (!t)
4907 		return 0;
4908 
4909 	seq_puts(m, t->name);
4910 	if (t->next)
4911 		seq_putc(m, ' ');
4912 	else
4913 		seq_putc(m, '\n');
4914 
4915 	return 0;
4916 }
4917 
4918 static const struct seq_operations show_traces_seq_ops = {
4919 	.start		= t_start,
4920 	.next		= t_next,
4921 	.stop		= t_stop,
4922 	.show		= t_show,
4923 };
4924 
4925 static int show_traces_open(struct inode *inode, struct file *file)
4926 {
4927 	struct trace_array *tr = inode->i_private;
4928 	struct seq_file *m;
4929 	int ret;
4930 
4931 	ret = tracing_check_open_get_tr(tr);
4932 	if (ret)
4933 		return ret;
4934 
4935 	ret = seq_open(file, &show_traces_seq_ops);
4936 	if (ret) {
4937 		trace_array_put(tr);
4938 		return ret;
4939 	}
4940 
4941 	m = file->private_data;
4942 	m->private = tr;
4943 
4944 	return 0;
4945 }
4946 
4947 static int tracing_seq_release(struct inode *inode, struct file *file)
4948 {
4949 	struct trace_array *tr = inode->i_private;
4950 
4951 	trace_array_put(tr);
4952 	return seq_release(inode, file);
4953 }
4954 
4955 static ssize_t
4956 tracing_write_stub(struct file *filp, const char __user *ubuf,
4957 		   size_t count, loff_t *ppos)
4958 {
4959 	return count;
4960 }
4961 
4962 loff_t tracing_lseek(struct file *file, loff_t offset, int whence)
4963 {
4964 	int ret;
4965 
4966 	if (file->f_mode & FMODE_READ)
4967 		ret = seq_lseek(file, offset, whence);
4968 	else
4969 		file->f_pos = ret = 0;
4970 
4971 	return ret;
4972 }
4973 
4974 static const struct file_operations tracing_fops = {
4975 	.open		= tracing_open,
4976 	.read		= seq_read,
4977 	.read_iter	= seq_read_iter,
4978 	.splice_read	= copy_splice_read,
4979 	.write		= tracing_write_stub,
4980 	.llseek		= tracing_lseek,
4981 	.release	= tracing_release,
4982 };
4983 
4984 static const struct file_operations show_traces_fops = {
4985 	.open		= show_traces_open,
4986 	.read		= seq_read,
4987 	.llseek		= seq_lseek,
4988 	.release	= tracing_seq_release,
4989 };
4990 
4991 static ssize_t
4992 tracing_cpumask_read(struct file *filp, char __user *ubuf,
4993 		     size_t count, loff_t *ppos)
4994 {
4995 	struct trace_array *tr = file_inode(filp)->i_private;
4996 	char *mask_str;
4997 	int len;
4998 
4999 	len = snprintf(NULL, 0, "%*pb\n",
5000 		       cpumask_pr_args(tr->tracing_cpumask)) + 1;
5001 	mask_str = kmalloc(len, GFP_KERNEL);
5002 	if (!mask_str)
5003 		return -ENOMEM;
5004 
5005 	len = snprintf(mask_str, len, "%*pb\n",
5006 		       cpumask_pr_args(tr->tracing_cpumask));
5007 	if (len >= count) {
5008 		count = -EINVAL;
5009 		goto out_err;
5010 	}
5011 	count = simple_read_from_buffer(ubuf, count, ppos, mask_str, len);
5012 
5013 out_err:
5014 	kfree(mask_str);
5015 
5016 	return count;
5017 }
5018 
5019 int tracing_set_cpumask(struct trace_array *tr,
5020 			cpumask_var_t tracing_cpumask_new)
5021 {
5022 	int cpu;
5023 
5024 	if (!tr)
5025 		return -EINVAL;
5026 
5027 	local_irq_disable();
5028 	arch_spin_lock(&tr->max_lock);
5029 	for_each_tracing_cpu(cpu) {
5030 		/*
5031 		 * Increase/decrease the disabled counter if we are
5032 		 * about to flip a bit in the cpumask:
5033 		 */
5034 		if (cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
5035 				!cpumask_test_cpu(cpu, tracing_cpumask_new)) {
5036 			atomic_inc(&per_cpu_ptr(tr->array_buffer.data, cpu)->disabled);
5037 			ring_buffer_record_disable_cpu(tr->array_buffer.buffer, cpu);
5038 #ifdef CONFIG_TRACER_MAX_TRACE
5039 			ring_buffer_record_disable_cpu(tr->max_buffer.buffer, cpu);
5040 #endif
5041 		}
5042 		if (!cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
5043 				cpumask_test_cpu(cpu, tracing_cpumask_new)) {
5044 			atomic_dec(&per_cpu_ptr(tr->array_buffer.data, cpu)->disabled);
5045 			ring_buffer_record_enable_cpu(tr->array_buffer.buffer, cpu);
5046 #ifdef CONFIG_TRACER_MAX_TRACE
5047 			ring_buffer_record_enable_cpu(tr->max_buffer.buffer, cpu);
5048 #endif
5049 		}
5050 	}
5051 	arch_spin_unlock(&tr->max_lock);
5052 	local_irq_enable();
5053 
5054 	cpumask_copy(tr->tracing_cpumask, tracing_cpumask_new);
5055 
5056 	return 0;
5057 }
5058 
5059 static ssize_t
5060 tracing_cpumask_write(struct file *filp, const char __user *ubuf,
5061 		      size_t count, loff_t *ppos)
5062 {
5063 	struct trace_array *tr = file_inode(filp)->i_private;
5064 	cpumask_var_t tracing_cpumask_new;
5065 	int err;
5066 
5067 	if (count == 0 || count > KMALLOC_MAX_SIZE)
5068 		return -EINVAL;
5069 
5070 	if (!zalloc_cpumask_var(&tracing_cpumask_new, GFP_KERNEL))
5071 		return -ENOMEM;
5072 
5073 	err = cpumask_parse_user(ubuf, count, tracing_cpumask_new);
5074 	if (err)
5075 		goto err_free;
5076 
5077 	err = tracing_set_cpumask(tr, tracing_cpumask_new);
5078 	if (err)
5079 		goto err_free;
5080 
5081 	free_cpumask_var(tracing_cpumask_new);
5082 
5083 	return count;
5084 
5085 err_free:
5086 	free_cpumask_var(tracing_cpumask_new);
5087 
5088 	return err;
5089 }
5090 
5091 static const struct file_operations tracing_cpumask_fops = {
5092 	.open		= tracing_open_generic_tr,
5093 	.read		= tracing_cpumask_read,
5094 	.write		= tracing_cpumask_write,
5095 	.release	= tracing_release_generic_tr,
5096 	.llseek		= generic_file_llseek,
5097 };
5098 
5099 static int tracing_trace_options_show(struct seq_file *m, void *v)
5100 {
5101 	struct tracer_opt *trace_opts;
5102 	struct trace_array *tr = m->private;
5103 	u32 tracer_flags;
5104 	int i;
5105 
5106 	guard(mutex)(&trace_types_lock);
5107 
5108 	tracer_flags = tr->current_trace->flags->val;
5109 	trace_opts = tr->current_trace->flags->opts;
5110 
5111 	for (i = 0; trace_options[i]; i++) {
5112 		if (tr->trace_flags & (1 << i))
5113 			seq_printf(m, "%s\n", trace_options[i]);
5114 		else
5115 			seq_printf(m, "no%s\n", trace_options[i]);
5116 	}
5117 
5118 	for (i = 0; trace_opts[i].name; i++) {
5119 		if (tracer_flags & trace_opts[i].bit)
5120 			seq_printf(m, "%s\n", trace_opts[i].name);
5121 		else
5122 			seq_printf(m, "no%s\n", trace_opts[i].name);
5123 	}
5124 
5125 	return 0;
5126 }
5127 
5128 static int __set_tracer_option(struct trace_array *tr,
5129 			       struct tracer_flags *tracer_flags,
5130 			       struct tracer_opt *opts, int neg)
5131 {
5132 	struct tracer *trace = tracer_flags->trace;
5133 	int ret;
5134 
5135 	ret = trace->set_flag(tr, tracer_flags->val, opts->bit, !neg);
5136 	if (ret)
5137 		return ret;
5138 
5139 	if (neg)
5140 		tracer_flags->val &= ~opts->bit;
5141 	else
5142 		tracer_flags->val |= opts->bit;
5143 	return 0;
5144 }
5145 
5146 /* Try to assign a tracer specific option */
5147 static int set_tracer_option(struct trace_array *tr, char *cmp, int neg)
5148 {
5149 	struct tracer *trace = tr->current_trace;
5150 	struct tracer_flags *tracer_flags = trace->flags;
5151 	struct tracer_opt *opts = NULL;
5152 	int i;
5153 
5154 	for (i = 0; tracer_flags->opts[i].name; i++) {
5155 		opts = &tracer_flags->opts[i];
5156 
5157 		if (strcmp(cmp, opts->name) == 0)
5158 			return __set_tracer_option(tr, trace->flags, opts, neg);
5159 	}
5160 
5161 	return -EINVAL;
5162 }
5163 
5164 /* Some tracers require overwrite to stay enabled */
5165 int trace_keep_overwrite(struct tracer *tracer, u32 mask, int set)
5166 {
5167 	if (tracer->enabled && (mask & TRACE_ITER_OVERWRITE) && !set)
5168 		return -1;
5169 
5170 	return 0;
5171 }
5172 
5173 int set_tracer_flag(struct trace_array *tr, unsigned int mask, int enabled)
5174 {
5175 	if ((mask == TRACE_ITER_RECORD_TGID) ||
5176 	    (mask == TRACE_ITER_RECORD_CMD) ||
5177 	    (mask == TRACE_ITER_TRACE_PRINTK))
5178 		lockdep_assert_held(&event_mutex);
5179 
5180 	/* do nothing if flag is already set */
5181 	if (!!(tr->trace_flags & mask) == !!enabled)
5182 		return 0;
5183 
5184 	/* Give the tracer a chance to approve the change */
5185 	if (tr->current_trace->flag_changed)
5186 		if (tr->current_trace->flag_changed(tr, mask, !!enabled))
5187 			return -EINVAL;
5188 
5189 	if (mask == TRACE_ITER_TRACE_PRINTK) {
5190 		if (enabled) {
5191 			update_printk_trace(tr);
5192 		} else {
5193 			/*
5194 			 * The global_trace cannot clear this.
5195 			 * It's flag only gets cleared if another instance sets it.
5196 			 */
5197 			if (printk_trace == &global_trace)
5198 				return -EINVAL;
5199 			/*
5200 			 * An instance must always have it set.
5201 			 * by default, that's the global_trace instane.
5202 			 */
5203 			if (printk_trace == tr)
5204 				update_printk_trace(&global_trace);
5205 		}
5206 	}
5207 
5208 	if (enabled)
5209 		tr->trace_flags |= mask;
5210 	else
5211 		tr->trace_flags &= ~mask;
5212 
5213 	if (mask == TRACE_ITER_RECORD_CMD)
5214 		trace_event_enable_cmd_record(enabled);
5215 
5216 	if (mask == TRACE_ITER_RECORD_TGID) {
5217 
5218 		if (trace_alloc_tgid_map() < 0) {
5219 			tr->trace_flags &= ~TRACE_ITER_RECORD_TGID;
5220 			return -ENOMEM;
5221 		}
5222 
5223 		trace_event_enable_tgid_record(enabled);
5224 	}
5225 
5226 	if (mask == TRACE_ITER_EVENT_FORK)
5227 		trace_event_follow_fork(tr, enabled);
5228 
5229 	if (mask == TRACE_ITER_FUNC_FORK)
5230 		ftrace_pid_follow_fork(tr, enabled);
5231 
5232 	if (mask == TRACE_ITER_OVERWRITE) {
5233 		ring_buffer_change_overwrite(tr->array_buffer.buffer, enabled);
5234 #ifdef CONFIG_TRACER_MAX_TRACE
5235 		ring_buffer_change_overwrite(tr->max_buffer.buffer, enabled);
5236 #endif
5237 	}
5238 
5239 	if (mask == TRACE_ITER_PRINTK) {
5240 		trace_printk_start_stop_comm(enabled);
5241 		trace_printk_control(enabled);
5242 	}
5243 
5244 	return 0;
5245 }
5246 
5247 int trace_set_options(struct trace_array *tr, char *option)
5248 {
5249 	char *cmp;
5250 	int neg = 0;
5251 	int ret;
5252 	size_t orig_len = strlen(option);
5253 	int len;
5254 
5255 	cmp = strstrip(option);
5256 
5257 	len = str_has_prefix(cmp, "no");
5258 	if (len)
5259 		neg = 1;
5260 
5261 	cmp += len;
5262 
5263 	mutex_lock(&event_mutex);
5264 	mutex_lock(&trace_types_lock);
5265 
5266 	ret = match_string(trace_options, -1, cmp);
5267 	/* If no option could be set, test the specific tracer options */
5268 	if (ret < 0)
5269 		ret = set_tracer_option(tr, cmp, neg);
5270 	else
5271 		ret = set_tracer_flag(tr, 1 << ret, !neg);
5272 
5273 	mutex_unlock(&trace_types_lock);
5274 	mutex_unlock(&event_mutex);
5275 
5276 	/*
5277 	 * If the first trailing whitespace is replaced with '\0' by strstrip,
5278 	 * turn it back into a space.
5279 	 */
5280 	if (orig_len > strlen(option))
5281 		option[strlen(option)] = ' ';
5282 
5283 	return ret;
5284 }
5285 
5286 static void __init apply_trace_boot_options(void)
5287 {
5288 	char *buf = trace_boot_options_buf;
5289 	char *option;
5290 
5291 	while (true) {
5292 		option = strsep(&buf, ",");
5293 
5294 		if (!option)
5295 			break;
5296 
5297 		if (*option)
5298 			trace_set_options(&global_trace, option);
5299 
5300 		/* Put back the comma to allow this to be called again */
5301 		if (buf)
5302 			*(buf - 1) = ',';
5303 	}
5304 }
5305 
5306 static ssize_t
5307 tracing_trace_options_write(struct file *filp, const char __user *ubuf,
5308 			size_t cnt, loff_t *ppos)
5309 {
5310 	struct seq_file *m = filp->private_data;
5311 	struct trace_array *tr = m->private;
5312 	char buf[64];
5313 	int ret;
5314 
5315 	if (cnt >= sizeof(buf))
5316 		return -EINVAL;
5317 
5318 	if (copy_from_user(buf, ubuf, cnt))
5319 		return -EFAULT;
5320 
5321 	buf[cnt] = 0;
5322 
5323 	ret = trace_set_options(tr, buf);
5324 	if (ret < 0)
5325 		return ret;
5326 
5327 	*ppos += cnt;
5328 
5329 	return cnt;
5330 }
5331 
5332 static int tracing_trace_options_open(struct inode *inode, struct file *file)
5333 {
5334 	struct trace_array *tr = inode->i_private;
5335 	int ret;
5336 
5337 	ret = tracing_check_open_get_tr(tr);
5338 	if (ret)
5339 		return ret;
5340 
5341 	ret = single_open(file, tracing_trace_options_show, inode->i_private);
5342 	if (ret < 0)
5343 		trace_array_put(tr);
5344 
5345 	return ret;
5346 }
5347 
5348 static const struct file_operations tracing_iter_fops = {
5349 	.open		= tracing_trace_options_open,
5350 	.read		= seq_read,
5351 	.llseek		= seq_lseek,
5352 	.release	= tracing_single_release_tr,
5353 	.write		= tracing_trace_options_write,
5354 };
5355 
5356 static const char readme_msg[] =
5357 	"tracing mini-HOWTO:\n\n"
5358 	"By default tracefs removes all OTH file permission bits.\n"
5359 	"When mounting tracefs an optional group id can be specified\n"
5360 	"which adds the group to every directory and file in tracefs:\n\n"
5361 	"\t e.g. mount -t tracefs [-o [gid=<gid>]] nodev /sys/kernel/tracing\n\n"
5362 	"# echo 0 > tracing_on : quick way to disable tracing\n"
5363 	"# echo 1 > tracing_on : quick way to re-enable tracing\n\n"
5364 	" Important files:\n"
5365 	"  trace\t\t\t- The static contents of the buffer\n"
5366 	"\t\t\t  To clear the buffer write into this file: echo > trace\n"
5367 	"  trace_pipe\t\t- A consuming read to see the contents of the buffer\n"
5368 	"  current_tracer\t- function and latency tracers\n"
5369 	"  available_tracers\t- list of configured tracers for current_tracer\n"
5370 	"  error_log\t- error log for failed commands (that support it)\n"
5371 	"  buffer_size_kb\t- view and modify size of per cpu buffer\n"
5372 	"  buffer_total_size_kb  - view total size of all cpu buffers\n\n"
5373 	"  trace_clock\t\t- change the clock used to order events\n"
5374 	"       local:   Per cpu clock but may not be synced across CPUs\n"
5375 	"      global:   Synced across CPUs but slows tracing down.\n"
5376 	"     counter:   Not a clock, but just an increment\n"
5377 	"      uptime:   Jiffy counter from time of boot\n"
5378 	"        perf:   Same clock that perf events use\n"
5379 #ifdef CONFIG_X86_64
5380 	"     x86-tsc:   TSC cycle counter\n"
5381 #endif
5382 	"\n  timestamp_mode\t- view the mode used to timestamp events\n"
5383 	"       delta:   Delta difference against a buffer-wide timestamp\n"
5384 	"    absolute:   Absolute (standalone) timestamp\n"
5385 	"\n  trace_marker\t\t- Writes into this file writes into the kernel buffer\n"
5386 	"\n  trace_marker_raw\t\t- Writes into this file writes binary data into the kernel buffer\n"
5387 	"  tracing_cpumask\t- Limit which CPUs to trace\n"
5388 	"  instances\t\t- Make sub-buffers with: mkdir instances/foo\n"
5389 	"\t\t\t  Remove sub-buffer with rmdir\n"
5390 	"  trace_options\t\t- Set format or modify how tracing happens\n"
5391 	"\t\t\t  Disable an option by prefixing 'no' to the\n"
5392 	"\t\t\t  option name\n"
5393 	"  saved_cmdlines_size\t- echo command number in here to store comm-pid list\n"
5394 #ifdef CONFIG_DYNAMIC_FTRACE
5395 	"\n  available_filter_functions - list of functions that can be filtered on\n"
5396 	"  set_ftrace_filter\t- echo function name in here to only trace these\n"
5397 	"\t\t\t  functions\n"
5398 	"\t     accepts: func_full_name or glob-matching-pattern\n"
5399 	"\t     modules: Can select a group via module\n"
5400 	"\t      Format: :mod:<module-name>\n"
5401 	"\t     example: echo :mod:ext3 > set_ftrace_filter\n"
5402 	"\t    triggers: a command to perform when function is hit\n"
5403 	"\t      Format: <function>:<trigger>[:count]\n"
5404 	"\t     trigger: traceon, traceoff\n"
5405 	"\t\t      enable_event:<system>:<event>\n"
5406 	"\t\t      disable_event:<system>:<event>\n"
5407 #ifdef CONFIG_STACKTRACE
5408 	"\t\t      stacktrace\n"
5409 #endif
5410 #ifdef CONFIG_TRACER_SNAPSHOT
5411 	"\t\t      snapshot\n"
5412 #endif
5413 	"\t\t      dump\n"
5414 	"\t\t      cpudump\n"
5415 	"\t     example: echo do_fault:traceoff > set_ftrace_filter\n"
5416 	"\t              echo do_trap:traceoff:3 > set_ftrace_filter\n"
5417 	"\t     The first one will disable tracing every time do_fault is hit\n"
5418 	"\t     The second will disable tracing at most 3 times when do_trap is hit\n"
5419 	"\t       The first time do trap is hit and it disables tracing, the\n"
5420 	"\t       counter will decrement to 2. If tracing is already disabled,\n"
5421 	"\t       the counter will not decrement. It only decrements when the\n"
5422 	"\t       trigger did work\n"
5423 	"\t     To remove trigger without count:\n"
5424 	"\t       echo '!<function>:<trigger> > set_ftrace_filter\n"
5425 	"\t     To remove trigger with a count:\n"
5426 	"\t       echo '!<function>:<trigger>:0 > set_ftrace_filter\n"
5427 	"  set_ftrace_notrace\t- echo function name in here to never trace.\n"
5428 	"\t    accepts: func_full_name, *func_end, func_begin*, *func_middle*\n"
5429 	"\t    modules: Can select a group via module command :mod:\n"
5430 	"\t    Does not accept triggers\n"
5431 #endif /* CONFIG_DYNAMIC_FTRACE */
5432 #ifdef CONFIG_FUNCTION_TRACER
5433 	"  set_ftrace_pid\t- Write pid(s) to only function trace those pids\n"
5434 	"\t\t    (function)\n"
5435 	"  set_ftrace_notrace_pid\t- Write pid(s) to not function trace those pids\n"
5436 	"\t\t    (function)\n"
5437 #endif
5438 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
5439 	"  set_graph_function\t- Trace the nested calls of a function (function_graph)\n"
5440 	"  set_graph_notrace\t- Do not trace the nested calls of a function (function_graph)\n"
5441 	"  max_graph_depth\t- Trace a limited depth of nested calls (0 is unlimited)\n"
5442 #endif
5443 #ifdef CONFIG_TRACER_SNAPSHOT
5444 	"\n  snapshot\t\t- Like 'trace' but shows the content of the static\n"
5445 	"\t\t\t  snapshot buffer. Read the contents for more\n"
5446 	"\t\t\t  information\n"
5447 #endif
5448 #ifdef CONFIG_STACK_TRACER
5449 	"  stack_trace\t\t- Shows the max stack trace when active\n"
5450 	"  stack_max_size\t- Shows current max stack size that was traced\n"
5451 	"\t\t\t  Write into this file to reset the max size (trigger a\n"
5452 	"\t\t\t  new trace)\n"
5453 #ifdef CONFIG_DYNAMIC_FTRACE
5454 	"  stack_trace_filter\t- Like set_ftrace_filter but limits what stack_trace\n"
5455 	"\t\t\t  traces\n"
5456 #endif
5457 #endif /* CONFIG_STACK_TRACER */
5458 #ifdef CONFIG_DYNAMIC_EVENTS
5459 	"  dynamic_events\t\t- Create/append/remove/show the generic dynamic events\n"
5460 	"\t\t\t  Write into this file to define/undefine new trace events.\n"
5461 #endif
5462 #ifdef CONFIG_KPROBE_EVENTS
5463 	"  kprobe_events\t\t- Create/append/remove/show the kernel dynamic events\n"
5464 	"\t\t\t  Write into this file to define/undefine new trace events.\n"
5465 #endif
5466 #ifdef CONFIG_UPROBE_EVENTS
5467 	"  uprobe_events\t\t- Create/append/remove/show the userspace dynamic events\n"
5468 	"\t\t\t  Write into this file to define/undefine new trace events.\n"
5469 #endif
5470 #if defined(CONFIG_KPROBE_EVENTS) || defined(CONFIG_UPROBE_EVENTS) || \
5471     defined(CONFIG_FPROBE_EVENTS)
5472 	"\t  accepts: event-definitions (one definition per line)\n"
5473 #if defined(CONFIG_KPROBE_EVENTS) || defined(CONFIG_UPROBE_EVENTS)
5474 	"\t   Format: p[:[<group>/][<event>]] <place> [<args>]\n"
5475 	"\t           r[maxactive][:[<group>/][<event>]] <place> [<args>]\n"
5476 #endif
5477 #ifdef CONFIG_FPROBE_EVENTS
5478 	"\t           f[:[<group>/][<event>]] <func-name>[%return] [<args>]\n"
5479 	"\t           t[:[<group>/][<event>]] <tracepoint> [<args>]\n"
5480 #endif
5481 #ifdef CONFIG_HIST_TRIGGERS
5482 	"\t           s:[synthetic/]<event> <field> [<field>]\n"
5483 #endif
5484 	"\t           e[:[<group>/][<event>]] <attached-group>.<attached-event> [<args>] [if <filter>]\n"
5485 	"\t           -:[<group>/][<event>]\n"
5486 #ifdef CONFIG_KPROBE_EVENTS
5487 	"\t    place: [<module>:]<symbol>[+<offset>]|<memaddr>\n"
5488   "place (kretprobe): [<module>:]<symbol>[+<offset>]%return|<memaddr>\n"
5489 #endif
5490 #ifdef CONFIG_UPROBE_EVENTS
5491   "   place (uprobe): <path>:<offset>[%return][(ref_ctr_offset)]\n"
5492 #endif
5493 	"\t     args: <name>=fetcharg[:type]\n"
5494 	"\t fetcharg: (%<register>|$<efield>), @<address>, @<symbol>[+|-<offset>],\n"
5495 #ifdef CONFIG_HAVE_FUNCTION_ARG_ACCESS_API
5496 	"\t           $stack<index>, $stack, $retval, $comm, $arg<N>,\n"
5497 #ifdef CONFIG_PROBE_EVENTS_BTF_ARGS
5498 	"\t           <argname>[->field[->field|.field...]],\n"
5499 #endif
5500 #else
5501 	"\t           $stack<index>, $stack, $retval, $comm,\n"
5502 #endif
5503 	"\t           +|-[u]<offset>(<fetcharg>), \\imm-value, \\\"imm-string\"\n"
5504 	"\t     kernel return probes support: $retval, $arg<N>, $comm\n"
5505 	"\t     type: s8/16/32/64, u8/16/32/64, x8/16/32/64, char, string, symbol,\n"
5506 	"\t           b<bit-width>@<bit-offset>/<container-size>, ustring,\n"
5507 	"\t           symstr, %pd/%pD, <type>\\[<array-size>\\]\n"
5508 #ifdef CONFIG_HIST_TRIGGERS
5509 	"\t    field: <stype> <name>;\n"
5510 	"\t    stype: u8/u16/u32/u64, s8/s16/s32/s64, pid_t,\n"
5511 	"\t           [unsigned] char/int/long\n"
5512 #endif
5513 	"\t    efield: For event probes ('e' types), the field is on of the fields\n"
5514 	"\t            of the <attached-group>/<attached-event>.\n"
5515 #endif
5516 	"  set_event\t\t- Enables events by name written into it\n"
5517 	"\t\t\t  Can enable module events via: :mod:<module>\n"
5518 	"  events/\t\t- Directory containing all trace event subsystems:\n"
5519 	"      enable\t\t- Write 0/1 to enable/disable tracing of all events\n"
5520 	"  events/<system>/\t- Directory containing all trace events for <system>:\n"
5521 	"      enable\t\t- Write 0/1 to enable/disable tracing of all <system>\n"
5522 	"\t\t\t  events\n"
5523 	"      filter\t\t- If set, only events passing filter are traced\n"
5524 	"  events/<system>/<event>/\t- Directory containing control files for\n"
5525 	"\t\t\t  <event>:\n"
5526 	"      enable\t\t- Write 0/1 to enable/disable tracing of <event>\n"
5527 	"      filter\t\t- If set, only events passing filter are traced\n"
5528 	"      trigger\t\t- If set, a command to perform when event is hit\n"
5529 	"\t    Format: <trigger>[:count][if <filter>]\n"
5530 	"\t   trigger: traceon, traceoff\n"
5531 	"\t            enable_event:<system>:<event>\n"
5532 	"\t            disable_event:<system>:<event>\n"
5533 #ifdef CONFIG_HIST_TRIGGERS
5534 	"\t            enable_hist:<system>:<event>\n"
5535 	"\t            disable_hist:<system>:<event>\n"
5536 #endif
5537 #ifdef CONFIG_STACKTRACE
5538 	"\t\t    stacktrace\n"
5539 #endif
5540 #ifdef CONFIG_TRACER_SNAPSHOT
5541 	"\t\t    snapshot\n"
5542 #endif
5543 #ifdef CONFIG_HIST_TRIGGERS
5544 	"\t\t    hist (see below)\n"
5545 #endif
5546 	"\t   example: echo traceoff > events/block/block_unplug/trigger\n"
5547 	"\t            echo traceoff:3 > events/block/block_unplug/trigger\n"
5548 	"\t            echo 'enable_event:kmem:kmalloc:3 if nr_rq > 1' > \\\n"
5549 	"\t                  events/block/block_unplug/trigger\n"
5550 	"\t   The first disables tracing every time block_unplug is hit.\n"
5551 	"\t   The second disables tracing the first 3 times block_unplug is hit.\n"
5552 	"\t   The third enables the kmalloc event the first 3 times block_unplug\n"
5553 	"\t     is hit and has value of greater than 1 for the 'nr_rq' event field.\n"
5554 	"\t   Like function triggers, the counter is only decremented if it\n"
5555 	"\t    enabled or disabled tracing.\n"
5556 	"\t   To remove a trigger without a count:\n"
5557 	"\t     echo '!<trigger> > <system>/<event>/trigger\n"
5558 	"\t   To remove a trigger with a count:\n"
5559 	"\t     echo '!<trigger>:0 > <system>/<event>/trigger\n"
5560 	"\t   Filters can be ignored when removing a trigger.\n"
5561 #ifdef CONFIG_HIST_TRIGGERS
5562 	"      hist trigger\t- If set, event hits are aggregated into a hash table\n"
5563 	"\t    Format: hist:keys=<field1[,field2,...]>\n"
5564 	"\t            [:<var1>=<field|var_ref|numeric_literal>[,<var2>=...]]\n"
5565 	"\t            [:values=<field1[,field2,...]>]\n"
5566 	"\t            [:sort=<field1[,field2,...]>]\n"
5567 	"\t            [:size=#entries]\n"
5568 	"\t            [:pause][:continue][:clear]\n"
5569 	"\t            [:name=histname1]\n"
5570 	"\t            [:nohitcount]\n"
5571 	"\t            [:<handler>.<action>]\n"
5572 	"\t            [if <filter>]\n\n"
5573 	"\t    Note, special fields can be used as well:\n"
5574 	"\t            common_timestamp - to record current timestamp\n"
5575 	"\t            common_cpu - to record the CPU the event happened on\n"
5576 	"\n"
5577 	"\t    A hist trigger variable can be:\n"
5578 	"\t        - a reference to a field e.g. x=current_timestamp,\n"
5579 	"\t        - a reference to another variable e.g. y=$x,\n"
5580 	"\t        - a numeric literal: e.g. ms_per_sec=1000,\n"
5581 	"\t        - an arithmetic expression: e.g. time_secs=current_timestamp/1000\n"
5582 	"\n"
5583 	"\t    hist trigger arithmetic expressions support addition(+), subtraction(-),\n"
5584 	"\t    multiplication(*) and division(/) operators. An operand can be either a\n"
5585 	"\t    variable reference, field or numeric literal.\n"
5586 	"\n"
5587 	"\t    When a matching event is hit, an entry is added to a hash\n"
5588 	"\t    table using the key(s) and value(s) named, and the value of a\n"
5589 	"\t    sum called 'hitcount' is incremented.  Keys and values\n"
5590 	"\t    correspond to fields in the event's format description.  Keys\n"
5591 	"\t    can be any field, or the special string 'common_stacktrace'.\n"
5592 	"\t    Compound keys consisting of up to two fields can be specified\n"
5593 	"\t    by the 'keys' keyword.  Values must correspond to numeric\n"
5594 	"\t    fields.  Sort keys consisting of up to two fields can be\n"
5595 	"\t    specified using the 'sort' keyword.  The sort direction can\n"
5596 	"\t    be modified by appending '.descending' or '.ascending' to a\n"
5597 	"\t    sort field.  The 'size' parameter can be used to specify more\n"
5598 	"\t    or fewer than the default 2048 entries for the hashtable size.\n"
5599 	"\t    If a hist trigger is given a name using the 'name' parameter,\n"
5600 	"\t    its histogram data will be shared with other triggers of the\n"
5601 	"\t    same name, and trigger hits will update this common data.\n\n"
5602 	"\t    Reading the 'hist' file for the event will dump the hash\n"
5603 	"\t    table in its entirety to stdout.  If there are multiple hist\n"
5604 	"\t    triggers attached to an event, there will be a table for each\n"
5605 	"\t    trigger in the output.  The table displayed for a named\n"
5606 	"\t    trigger will be the same as any other instance having the\n"
5607 	"\t    same name.  The default format used to display a given field\n"
5608 	"\t    can be modified by appending any of the following modifiers\n"
5609 	"\t    to the field name, as applicable:\n\n"
5610 	"\t            .hex        display a number as a hex value\n"
5611 	"\t            .sym        display an address as a symbol\n"
5612 	"\t            .sym-offset display an address as a symbol and offset\n"
5613 	"\t            .execname   display a common_pid as a program name\n"
5614 	"\t            .syscall    display a syscall id as a syscall name\n"
5615 	"\t            .log2       display log2 value rather than raw number\n"
5616 	"\t            .buckets=size  display values in groups of size rather than raw number\n"
5617 	"\t            .usecs      display a common_timestamp in microseconds\n"
5618 	"\t            .percent    display a number of percentage value\n"
5619 	"\t            .graph      display a bar-graph of a value\n\n"
5620 	"\t    The 'pause' parameter can be used to pause an existing hist\n"
5621 	"\t    trigger or to start a hist trigger but not log any events\n"
5622 	"\t    until told to do so.  'continue' can be used to start or\n"
5623 	"\t    restart a paused hist trigger.\n\n"
5624 	"\t    The 'clear' parameter will clear the contents of a running\n"
5625 	"\t    hist trigger and leave its current paused/active state\n"
5626 	"\t    unchanged.\n\n"
5627 	"\t    The 'nohitcount' (or NOHC) parameter will suppress display of\n"
5628 	"\t    raw hitcount in the histogram.\n\n"
5629 	"\t    The enable_hist and disable_hist triggers can be used to\n"
5630 	"\t    have one event conditionally start and stop another event's\n"
5631 	"\t    already-attached hist trigger.  The syntax is analogous to\n"
5632 	"\t    the enable_event and disable_event triggers.\n\n"
5633 	"\t    Hist trigger handlers and actions are executed whenever a\n"
5634 	"\t    a histogram entry is added or updated.  They take the form:\n\n"
5635 	"\t        <handler>.<action>\n\n"
5636 	"\t    The available handlers are:\n\n"
5637 	"\t        onmatch(matching.event)  - invoke on addition or update\n"
5638 	"\t        onmax(var)               - invoke if var exceeds current max\n"
5639 	"\t        onchange(var)            - invoke action if var changes\n\n"
5640 	"\t    The available actions are:\n\n"
5641 	"\t        trace(<synthetic_event>,param list)  - generate synthetic event\n"
5642 	"\t        save(field,...)                      - save current event fields\n"
5643 #ifdef CONFIG_TRACER_SNAPSHOT
5644 	"\t        snapshot()                           - snapshot the trace buffer\n\n"
5645 #endif
5646 #ifdef CONFIG_SYNTH_EVENTS
5647 	"  events/synthetic_events\t- Create/append/remove/show synthetic events\n"
5648 	"\t  Write into this file to define/undefine new synthetic events.\n"
5649 	"\t     example: echo 'myevent u64 lat; char name[]; long[] stack' >> synthetic_events\n"
5650 #endif
5651 #endif
5652 ;
5653 
5654 static ssize_t
5655 tracing_readme_read(struct file *filp, char __user *ubuf,
5656 		       size_t cnt, loff_t *ppos)
5657 {
5658 	return simple_read_from_buffer(ubuf, cnt, ppos,
5659 					readme_msg, strlen(readme_msg));
5660 }
5661 
5662 static const struct file_operations tracing_readme_fops = {
5663 	.open		= tracing_open_generic,
5664 	.read		= tracing_readme_read,
5665 	.llseek		= generic_file_llseek,
5666 };
5667 
5668 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
5669 static union trace_eval_map_item *
5670 update_eval_map(union trace_eval_map_item *ptr)
5671 {
5672 	if (!ptr->map.eval_string) {
5673 		if (ptr->tail.next) {
5674 			ptr = ptr->tail.next;
5675 			/* Set ptr to the next real item (skip head) */
5676 			ptr++;
5677 		} else
5678 			return NULL;
5679 	}
5680 	return ptr;
5681 }
5682 
5683 static void *eval_map_next(struct seq_file *m, void *v, loff_t *pos)
5684 {
5685 	union trace_eval_map_item *ptr = v;
5686 
5687 	/*
5688 	 * Paranoid! If ptr points to end, we don't want to increment past it.
5689 	 * This really should never happen.
5690 	 */
5691 	(*pos)++;
5692 	ptr = update_eval_map(ptr);
5693 	if (WARN_ON_ONCE(!ptr))
5694 		return NULL;
5695 
5696 	ptr++;
5697 	ptr = update_eval_map(ptr);
5698 
5699 	return ptr;
5700 }
5701 
5702 static void *eval_map_start(struct seq_file *m, loff_t *pos)
5703 {
5704 	union trace_eval_map_item *v;
5705 	loff_t l = 0;
5706 
5707 	mutex_lock(&trace_eval_mutex);
5708 
5709 	v = trace_eval_maps;
5710 	if (v)
5711 		v++;
5712 
5713 	while (v && l < *pos) {
5714 		v = eval_map_next(m, v, &l);
5715 	}
5716 
5717 	return v;
5718 }
5719 
5720 static void eval_map_stop(struct seq_file *m, void *v)
5721 {
5722 	mutex_unlock(&trace_eval_mutex);
5723 }
5724 
5725 static int eval_map_show(struct seq_file *m, void *v)
5726 {
5727 	union trace_eval_map_item *ptr = v;
5728 
5729 	seq_printf(m, "%s %ld (%s)\n",
5730 		   ptr->map.eval_string, ptr->map.eval_value,
5731 		   ptr->map.system);
5732 
5733 	return 0;
5734 }
5735 
5736 static const struct seq_operations tracing_eval_map_seq_ops = {
5737 	.start		= eval_map_start,
5738 	.next		= eval_map_next,
5739 	.stop		= eval_map_stop,
5740 	.show		= eval_map_show,
5741 };
5742 
5743 static int tracing_eval_map_open(struct inode *inode, struct file *filp)
5744 {
5745 	int ret;
5746 
5747 	ret = tracing_check_open_get_tr(NULL);
5748 	if (ret)
5749 		return ret;
5750 
5751 	return seq_open(filp, &tracing_eval_map_seq_ops);
5752 }
5753 
5754 static const struct file_operations tracing_eval_map_fops = {
5755 	.open		= tracing_eval_map_open,
5756 	.read		= seq_read,
5757 	.llseek		= seq_lseek,
5758 	.release	= seq_release,
5759 };
5760 
5761 static inline union trace_eval_map_item *
5762 trace_eval_jmp_to_tail(union trace_eval_map_item *ptr)
5763 {
5764 	/* Return tail of array given the head */
5765 	return ptr + ptr->head.length + 1;
5766 }
5767 
5768 static void
5769 trace_insert_eval_map_file(struct module *mod, struct trace_eval_map **start,
5770 			   int len)
5771 {
5772 	struct trace_eval_map **stop;
5773 	struct trace_eval_map **map;
5774 	union trace_eval_map_item *map_array;
5775 	union trace_eval_map_item *ptr;
5776 
5777 	stop = start + len;
5778 
5779 	/*
5780 	 * The trace_eval_maps contains the map plus a head and tail item,
5781 	 * where the head holds the module and length of array, and the
5782 	 * tail holds a pointer to the next list.
5783 	 */
5784 	map_array = kmalloc_array(len + 2, sizeof(*map_array), GFP_KERNEL);
5785 	if (!map_array) {
5786 		pr_warn("Unable to allocate trace eval mapping\n");
5787 		return;
5788 	}
5789 
5790 	guard(mutex)(&trace_eval_mutex);
5791 
5792 	if (!trace_eval_maps)
5793 		trace_eval_maps = map_array;
5794 	else {
5795 		ptr = trace_eval_maps;
5796 		for (;;) {
5797 			ptr = trace_eval_jmp_to_tail(ptr);
5798 			if (!ptr->tail.next)
5799 				break;
5800 			ptr = ptr->tail.next;
5801 
5802 		}
5803 		ptr->tail.next = map_array;
5804 	}
5805 	map_array->head.mod = mod;
5806 	map_array->head.length = len;
5807 	map_array++;
5808 
5809 	for (map = start; (unsigned long)map < (unsigned long)stop; map++) {
5810 		map_array->map = **map;
5811 		map_array++;
5812 	}
5813 	memset(map_array, 0, sizeof(*map_array));
5814 }
5815 
5816 static void trace_create_eval_file(struct dentry *d_tracer)
5817 {
5818 	trace_create_file("eval_map", TRACE_MODE_READ, d_tracer,
5819 			  NULL, &tracing_eval_map_fops);
5820 }
5821 
5822 #else /* CONFIG_TRACE_EVAL_MAP_FILE */
5823 static inline void trace_create_eval_file(struct dentry *d_tracer) { }
5824 static inline void trace_insert_eval_map_file(struct module *mod,
5825 			      struct trace_eval_map **start, int len) { }
5826 #endif /* !CONFIG_TRACE_EVAL_MAP_FILE */
5827 
5828 static void trace_insert_eval_map(struct module *mod,
5829 				  struct trace_eval_map **start, int len)
5830 {
5831 	struct trace_eval_map **map;
5832 
5833 	if (len <= 0)
5834 		return;
5835 
5836 	map = start;
5837 
5838 	trace_event_eval_update(map, len);
5839 
5840 	trace_insert_eval_map_file(mod, start, len);
5841 }
5842 
5843 static ssize_t
5844 tracing_set_trace_read(struct file *filp, char __user *ubuf,
5845 		       size_t cnt, loff_t *ppos)
5846 {
5847 	struct trace_array *tr = filp->private_data;
5848 	char buf[MAX_TRACER_SIZE+2];
5849 	int r;
5850 
5851 	mutex_lock(&trace_types_lock);
5852 	r = sprintf(buf, "%s\n", tr->current_trace->name);
5853 	mutex_unlock(&trace_types_lock);
5854 
5855 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
5856 }
5857 
5858 int tracer_init(struct tracer *t, struct trace_array *tr)
5859 {
5860 	tracing_reset_online_cpus(&tr->array_buffer);
5861 	return t->init(tr);
5862 }
5863 
5864 static void set_buffer_entries(struct array_buffer *buf, unsigned long val)
5865 {
5866 	int cpu;
5867 
5868 	for_each_tracing_cpu(cpu)
5869 		per_cpu_ptr(buf->data, cpu)->entries = val;
5870 }
5871 
5872 static void update_buffer_entries(struct array_buffer *buf, int cpu)
5873 {
5874 	if (cpu == RING_BUFFER_ALL_CPUS) {
5875 		set_buffer_entries(buf, ring_buffer_size(buf->buffer, 0));
5876 	} else {
5877 		per_cpu_ptr(buf->data, cpu)->entries = ring_buffer_size(buf->buffer, cpu);
5878 	}
5879 }
5880 
5881 #ifdef CONFIG_TRACER_MAX_TRACE
5882 /* resize @tr's buffer to the size of @size_tr's entries */
5883 static int resize_buffer_duplicate_size(struct array_buffer *trace_buf,
5884 					struct array_buffer *size_buf, int cpu_id)
5885 {
5886 	int cpu, ret = 0;
5887 
5888 	if (cpu_id == RING_BUFFER_ALL_CPUS) {
5889 		for_each_tracing_cpu(cpu) {
5890 			ret = ring_buffer_resize(trace_buf->buffer,
5891 				 per_cpu_ptr(size_buf->data, cpu)->entries, cpu);
5892 			if (ret < 0)
5893 				break;
5894 			per_cpu_ptr(trace_buf->data, cpu)->entries =
5895 				per_cpu_ptr(size_buf->data, cpu)->entries;
5896 		}
5897 	} else {
5898 		ret = ring_buffer_resize(trace_buf->buffer,
5899 				 per_cpu_ptr(size_buf->data, cpu_id)->entries, cpu_id);
5900 		if (ret == 0)
5901 			per_cpu_ptr(trace_buf->data, cpu_id)->entries =
5902 				per_cpu_ptr(size_buf->data, cpu_id)->entries;
5903 	}
5904 
5905 	return ret;
5906 }
5907 #endif /* CONFIG_TRACER_MAX_TRACE */
5908 
5909 static int __tracing_resize_ring_buffer(struct trace_array *tr,
5910 					unsigned long size, int cpu)
5911 {
5912 	int ret;
5913 
5914 	/*
5915 	 * If kernel or user changes the size of the ring buffer
5916 	 * we use the size that was given, and we can forget about
5917 	 * expanding it later.
5918 	 */
5919 	trace_set_ring_buffer_expanded(tr);
5920 
5921 	/* May be called before buffers are initialized */
5922 	if (!tr->array_buffer.buffer)
5923 		return 0;
5924 
5925 	/* Do not allow tracing while resizing ring buffer */
5926 	tracing_stop_tr(tr);
5927 
5928 	ret = ring_buffer_resize(tr->array_buffer.buffer, size, cpu);
5929 	if (ret < 0)
5930 		goto out_start;
5931 
5932 #ifdef CONFIG_TRACER_MAX_TRACE
5933 	if (!tr->allocated_snapshot)
5934 		goto out;
5935 
5936 	ret = ring_buffer_resize(tr->max_buffer.buffer, size, cpu);
5937 	if (ret < 0) {
5938 		int r = resize_buffer_duplicate_size(&tr->array_buffer,
5939 						     &tr->array_buffer, cpu);
5940 		if (r < 0) {
5941 			/*
5942 			 * AARGH! We are left with different
5943 			 * size max buffer!!!!
5944 			 * The max buffer is our "snapshot" buffer.
5945 			 * When a tracer needs a snapshot (one of the
5946 			 * latency tracers), it swaps the max buffer
5947 			 * with the saved snap shot. We succeeded to
5948 			 * update the size of the main buffer, but failed to
5949 			 * update the size of the max buffer. But when we tried
5950 			 * to reset the main buffer to the original size, we
5951 			 * failed there too. This is very unlikely to
5952 			 * happen, but if it does, warn and kill all
5953 			 * tracing.
5954 			 */
5955 			WARN_ON(1);
5956 			tracing_disabled = 1;
5957 		}
5958 		goto out_start;
5959 	}
5960 
5961 	update_buffer_entries(&tr->max_buffer, cpu);
5962 
5963  out:
5964 #endif /* CONFIG_TRACER_MAX_TRACE */
5965 
5966 	update_buffer_entries(&tr->array_buffer, cpu);
5967  out_start:
5968 	tracing_start_tr(tr);
5969 	return ret;
5970 }
5971 
5972 ssize_t tracing_resize_ring_buffer(struct trace_array *tr,
5973 				  unsigned long size, int cpu_id)
5974 {
5975 	guard(mutex)(&trace_types_lock);
5976 
5977 	if (cpu_id != RING_BUFFER_ALL_CPUS) {
5978 		/* make sure, this cpu is enabled in the mask */
5979 		if (!cpumask_test_cpu(cpu_id, tracing_buffer_mask))
5980 			return -EINVAL;
5981 	}
5982 
5983 	return __tracing_resize_ring_buffer(tr, size, cpu_id);
5984 }
5985 
5986 static void update_last_data(struct trace_array *tr)
5987 {
5988 	if (!tr->text_delta && !tr->data_delta)
5989 		return;
5990 
5991 	/*
5992 	 * Need to clear all CPU buffers as there cannot be events
5993 	 * from the previous boot mixed with events with this boot
5994 	 * as that will cause a confusing trace. Need to clear all
5995 	 * CPU buffers, even for those that may currently be offline.
5996 	 */
5997 	tracing_reset_all_cpus(&tr->array_buffer);
5998 
5999 	/* Using current data now */
6000 	tr->text_delta = 0;
6001 	tr->data_delta = 0;
6002 }
6003 
6004 /**
6005  * tracing_update_buffers - used by tracing facility to expand ring buffers
6006  * @tr: The tracing instance
6007  *
6008  * To save on memory when the tracing is never used on a system with it
6009  * configured in. The ring buffers are set to a minimum size. But once
6010  * a user starts to use the tracing facility, then they need to grow
6011  * to their default size.
6012  *
6013  * This function is to be called when a tracer is about to be used.
6014  */
6015 int tracing_update_buffers(struct trace_array *tr)
6016 {
6017 	int ret = 0;
6018 
6019 	mutex_lock(&trace_types_lock);
6020 
6021 	update_last_data(tr);
6022 
6023 	if (!tr->ring_buffer_expanded)
6024 		ret = __tracing_resize_ring_buffer(tr, trace_buf_size,
6025 						RING_BUFFER_ALL_CPUS);
6026 	mutex_unlock(&trace_types_lock);
6027 
6028 	return ret;
6029 }
6030 
6031 struct trace_option_dentry;
6032 
6033 static void
6034 create_trace_option_files(struct trace_array *tr, struct tracer *tracer);
6035 
6036 /*
6037  * Used to clear out the tracer before deletion of an instance.
6038  * Must have trace_types_lock held.
6039  */
6040 static void tracing_set_nop(struct trace_array *tr)
6041 {
6042 	if (tr->current_trace == &nop_trace)
6043 		return;
6044 
6045 	tr->current_trace->enabled--;
6046 
6047 	if (tr->current_trace->reset)
6048 		tr->current_trace->reset(tr);
6049 
6050 	tr->current_trace = &nop_trace;
6051 }
6052 
6053 static bool tracer_options_updated;
6054 
6055 static void add_tracer_options(struct trace_array *tr, struct tracer *t)
6056 {
6057 	/* Only enable if the directory has been created already. */
6058 	if (!tr->dir)
6059 		return;
6060 
6061 	/* Only create trace option files after update_tracer_options finish */
6062 	if (!tracer_options_updated)
6063 		return;
6064 
6065 	create_trace_option_files(tr, t);
6066 }
6067 
6068 int tracing_set_tracer(struct trace_array *tr, const char *buf)
6069 {
6070 	struct tracer *t;
6071 #ifdef CONFIG_TRACER_MAX_TRACE
6072 	bool had_max_tr;
6073 #endif
6074 	int ret;
6075 
6076 	guard(mutex)(&trace_types_lock);
6077 
6078 	update_last_data(tr);
6079 
6080 	if (!tr->ring_buffer_expanded) {
6081 		ret = __tracing_resize_ring_buffer(tr, trace_buf_size,
6082 						RING_BUFFER_ALL_CPUS);
6083 		if (ret < 0)
6084 			return ret;
6085 		ret = 0;
6086 	}
6087 
6088 	for (t = trace_types; t; t = t->next) {
6089 		if (strcmp(t->name, buf) == 0)
6090 			break;
6091 	}
6092 	if (!t)
6093 		return -EINVAL;
6094 
6095 	if (t == tr->current_trace)
6096 		return 0;
6097 
6098 #ifdef CONFIG_TRACER_SNAPSHOT
6099 	if (t->use_max_tr) {
6100 		local_irq_disable();
6101 		arch_spin_lock(&tr->max_lock);
6102 		ret = tr->cond_snapshot ? -EBUSY : 0;
6103 		arch_spin_unlock(&tr->max_lock);
6104 		local_irq_enable();
6105 		if (ret)
6106 			return ret;
6107 	}
6108 #endif
6109 	/* Some tracers won't work on kernel command line */
6110 	if (system_state < SYSTEM_RUNNING && t->noboot) {
6111 		pr_warn("Tracer '%s' is not allowed on command line, ignored\n",
6112 			t->name);
6113 		return -EINVAL;
6114 	}
6115 
6116 	/* Some tracers are only allowed for the top level buffer */
6117 	if (!trace_ok_for_array(t, tr))
6118 		return -EINVAL;
6119 
6120 	/* If trace pipe files are being read, we can't change the tracer */
6121 	if (tr->trace_ref)
6122 		return -EBUSY;
6123 
6124 	trace_branch_disable();
6125 
6126 	tr->current_trace->enabled--;
6127 
6128 	if (tr->current_trace->reset)
6129 		tr->current_trace->reset(tr);
6130 
6131 #ifdef CONFIG_TRACER_MAX_TRACE
6132 	had_max_tr = tr->current_trace->use_max_tr;
6133 
6134 	/* Current trace needs to be nop_trace before synchronize_rcu */
6135 	tr->current_trace = &nop_trace;
6136 
6137 	if (had_max_tr && !t->use_max_tr) {
6138 		/*
6139 		 * We need to make sure that the update_max_tr sees that
6140 		 * current_trace changed to nop_trace to keep it from
6141 		 * swapping the buffers after we resize it.
6142 		 * The update_max_tr is called from interrupts disabled
6143 		 * so a synchronized_sched() is sufficient.
6144 		 */
6145 		synchronize_rcu();
6146 		free_snapshot(tr);
6147 		tracing_disarm_snapshot(tr);
6148 	}
6149 
6150 	if (!had_max_tr && t->use_max_tr) {
6151 		ret = tracing_arm_snapshot_locked(tr);
6152 		if (ret)
6153 			return ret;
6154 	}
6155 #else
6156 	tr->current_trace = &nop_trace;
6157 #endif
6158 
6159 	if (t->init) {
6160 		ret = tracer_init(t, tr);
6161 		if (ret) {
6162 #ifdef CONFIG_TRACER_MAX_TRACE
6163 			if (t->use_max_tr)
6164 				tracing_disarm_snapshot(tr);
6165 #endif
6166 			return ret;
6167 		}
6168 	}
6169 
6170 	tr->current_trace = t;
6171 	tr->current_trace->enabled++;
6172 	trace_branch_enable(tr);
6173 
6174 	return 0;
6175 }
6176 
6177 static ssize_t
6178 tracing_set_trace_write(struct file *filp, const char __user *ubuf,
6179 			size_t cnt, loff_t *ppos)
6180 {
6181 	struct trace_array *tr = filp->private_data;
6182 	char buf[MAX_TRACER_SIZE+1];
6183 	char *name;
6184 	size_t ret;
6185 	int err;
6186 
6187 	ret = cnt;
6188 
6189 	if (cnt > MAX_TRACER_SIZE)
6190 		cnt = MAX_TRACER_SIZE;
6191 
6192 	if (copy_from_user(buf, ubuf, cnt))
6193 		return -EFAULT;
6194 
6195 	buf[cnt] = 0;
6196 
6197 	name = strim(buf);
6198 
6199 	err = tracing_set_tracer(tr, name);
6200 	if (err)
6201 		return err;
6202 
6203 	*ppos += ret;
6204 
6205 	return ret;
6206 }
6207 
6208 static ssize_t
6209 tracing_nsecs_read(unsigned long *ptr, char __user *ubuf,
6210 		   size_t cnt, loff_t *ppos)
6211 {
6212 	char buf[64];
6213 	int r;
6214 
6215 	r = snprintf(buf, sizeof(buf), "%ld\n",
6216 		     *ptr == (unsigned long)-1 ? -1 : nsecs_to_usecs(*ptr));
6217 	if (r > sizeof(buf))
6218 		r = sizeof(buf);
6219 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6220 }
6221 
6222 static ssize_t
6223 tracing_nsecs_write(unsigned long *ptr, const char __user *ubuf,
6224 		    size_t cnt, loff_t *ppos)
6225 {
6226 	unsigned long val;
6227 	int ret;
6228 
6229 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
6230 	if (ret)
6231 		return ret;
6232 
6233 	*ptr = val * 1000;
6234 
6235 	return cnt;
6236 }
6237 
6238 static ssize_t
6239 tracing_thresh_read(struct file *filp, char __user *ubuf,
6240 		    size_t cnt, loff_t *ppos)
6241 {
6242 	return tracing_nsecs_read(&tracing_thresh, ubuf, cnt, ppos);
6243 }
6244 
6245 static ssize_t
6246 tracing_thresh_write(struct file *filp, const char __user *ubuf,
6247 		     size_t cnt, loff_t *ppos)
6248 {
6249 	struct trace_array *tr = filp->private_data;
6250 	int ret;
6251 
6252 	guard(mutex)(&trace_types_lock);
6253 	ret = tracing_nsecs_write(&tracing_thresh, ubuf, cnt, ppos);
6254 	if (ret < 0)
6255 		return ret;
6256 
6257 	if (tr->current_trace->update_thresh) {
6258 		ret = tr->current_trace->update_thresh(tr);
6259 		if (ret < 0)
6260 			return ret;
6261 	}
6262 
6263 	return cnt;
6264 }
6265 
6266 #ifdef CONFIG_TRACER_MAX_TRACE
6267 
6268 static ssize_t
6269 tracing_max_lat_read(struct file *filp, char __user *ubuf,
6270 		     size_t cnt, loff_t *ppos)
6271 {
6272 	struct trace_array *tr = filp->private_data;
6273 
6274 	return tracing_nsecs_read(&tr->max_latency, ubuf, cnt, ppos);
6275 }
6276 
6277 static ssize_t
6278 tracing_max_lat_write(struct file *filp, const char __user *ubuf,
6279 		      size_t cnt, loff_t *ppos)
6280 {
6281 	struct trace_array *tr = filp->private_data;
6282 
6283 	return tracing_nsecs_write(&tr->max_latency, ubuf, cnt, ppos);
6284 }
6285 
6286 #endif
6287 
6288 static int open_pipe_on_cpu(struct trace_array *tr, int cpu)
6289 {
6290 	if (cpu == RING_BUFFER_ALL_CPUS) {
6291 		if (cpumask_empty(tr->pipe_cpumask)) {
6292 			cpumask_setall(tr->pipe_cpumask);
6293 			return 0;
6294 		}
6295 	} else if (!cpumask_test_cpu(cpu, tr->pipe_cpumask)) {
6296 		cpumask_set_cpu(cpu, tr->pipe_cpumask);
6297 		return 0;
6298 	}
6299 	return -EBUSY;
6300 }
6301 
6302 static void close_pipe_on_cpu(struct trace_array *tr, int cpu)
6303 {
6304 	if (cpu == RING_BUFFER_ALL_CPUS) {
6305 		WARN_ON(!cpumask_full(tr->pipe_cpumask));
6306 		cpumask_clear(tr->pipe_cpumask);
6307 	} else {
6308 		WARN_ON(!cpumask_test_cpu(cpu, tr->pipe_cpumask));
6309 		cpumask_clear_cpu(cpu, tr->pipe_cpumask);
6310 	}
6311 }
6312 
6313 static int tracing_open_pipe(struct inode *inode, struct file *filp)
6314 {
6315 	struct trace_array *tr = inode->i_private;
6316 	struct trace_iterator *iter;
6317 	int cpu;
6318 	int ret;
6319 
6320 	ret = tracing_check_open_get_tr(tr);
6321 	if (ret)
6322 		return ret;
6323 
6324 	mutex_lock(&trace_types_lock);
6325 	cpu = tracing_get_cpu(inode);
6326 	ret = open_pipe_on_cpu(tr, cpu);
6327 	if (ret)
6328 		goto fail_pipe_on_cpu;
6329 
6330 	/* create a buffer to store the information to pass to userspace */
6331 	iter = kzalloc(sizeof(*iter), GFP_KERNEL);
6332 	if (!iter) {
6333 		ret = -ENOMEM;
6334 		goto fail_alloc_iter;
6335 	}
6336 
6337 	trace_seq_init(&iter->seq);
6338 	iter->trace = tr->current_trace;
6339 
6340 	if (!alloc_cpumask_var(&iter->started, GFP_KERNEL)) {
6341 		ret = -ENOMEM;
6342 		goto fail;
6343 	}
6344 
6345 	/* trace pipe does not show start of buffer */
6346 	cpumask_setall(iter->started);
6347 
6348 	if (tr->trace_flags & TRACE_ITER_LATENCY_FMT)
6349 		iter->iter_flags |= TRACE_FILE_LAT_FMT;
6350 
6351 	/* Output in nanoseconds only if we are using a clock in nanoseconds. */
6352 	if (trace_clocks[tr->clock_id].in_ns)
6353 		iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
6354 
6355 	iter->tr = tr;
6356 	iter->array_buffer = &tr->array_buffer;
6357 	iter->cpu_file = cpu;
6358 	mutex_init(&iter->mutex);
6359 	filp->private_data = iter;
6360 
6361 	if (iter->trace->pipe_open)
6362 		iter->trace->pipe_open(iter);
6363 
6364 	nonseekable_open(inode, filp);
6365 
6366 	tr->trace_ref++;
6367 
6368 	mutex_unlock(&trace_types_lock);
6369 	return ret;
6370 
6371 fail:
6372 	kfree(iter);
6373 fail_alloc_iter:
6374 	close_pipe_on_cpu(tr, cpu);
6375 fail_pipe_on_cpu:
6376 	__trace_array_put(tr);
6377 	mutex_unlock(&trace_types_lock);
6378 	return ret;
6379 }
6380 
6381 static int tracing_release_pipe(struct inode *inode, struct file *file)
6382 {
6383 	struct trace_iterator *iter = file->private_data;
6384 	struct trace_array *tr = inode->i_private;
6385 
6386 	mutex_lock(&trace_types_lock);
6387 
6388 	tr->trace_ref--;
6389 
6390 	if (iter->trace->pipe_close)
6391 		iter->trace->pipe_close(iter);
6392 	close_pipe_on_cpu(tr, iter->cpu_file);
6393 	mutex_unlock(&trace_types_lock);
6394 
6395 	free_trace_iter_content(iter);
6396 	kfree(iter);
6397 
6398 	trace_array_put(tr);
6399 
6400 	return 0;
6401 }
6402 
6403 static __poll_t
6404 trace_poll(struct trace_iterator *iter, struct file *filp, poll_table *poll_table)
6405 {
6406 	struct trace_array *tr = iter->tr;
6407 
6408 	/* Iterators are static, they should be filled or empty */
6409 	if (trace_buffer_iter(iter, iter->cpu_file))
6410 		return EPOLLIN | EPOLLRDNORM;
6411 
6412 	if (tr->trace_flags & TRACE_ITER_BLOCK)
6413 		/*
6414 		 * Always select as readable when in blocking mode
6415 		 */
6416 		return EPOLLIN | EPOLLRDNORM;
6417 	else
6418 		return ring_buffer_poll_wait(iter->array_buffer->buffer, iter->cpu_file,
6419 					     filp, poll_table, iter->tr->buffer_percent);
6420 }
6421 
6422 static __poll_t
6423 tracing_poll_pipe(struct file *filp, poll_table *poll_table)
6424 {
6425 	struct trace_iterator *iter = filp->private_data;
6426 
6427 	return trace_poll(iter, filp, poll_table);
6428 }
6429 
6430 /* Must be called with iter->mutex held. */
6431 static int tracing_wait_pipe(struct file *filp)
6432 {
6433 	struct trace_iterator *iter = filp->private_data;
6434 	int ret;
6435 
6436 	while (trace_empty(iter)) {
6437 
6438 		if ((filp->f_flags & O_NONBLOCK)) {
6439 			return -EAGAIN;
6440 		}
6441 
6442 		/*
6443 		 * We block until we read something and tracing is disabled.
6444 		 * We still block if tracing is disabled, but we have never
6445 		 * read anything. This allows a user to cat this file, and
6446 		 * then enable tracing. But after we have read something,
6447 		 * we give an EOF when tracing is again disabled.
6448 		 *
6449 		 * iter->pos will be 0 if we haven't read anything.
6450 		 */
6451 		if (!tracer_tracing_is_on(iter->tr) && iter->pos)
6452 			break;
6453 
6454 		mutex_unlock(&iter->mutex);
6455 
6456 		ret = wait_on_pipe(iter, 0);
6457 
6458 		mutex_lock(&iter->mutex);
6459 
6460 		if (ret)
6461 			return ret;
6462 	}
6463 
6464 	return 1;
6465 }
6466 
6467 /*
6468  * Consumer reader.
6469  */
6470 static ssize_t
6471 tracing_read_pipe(struct file *filp, char __user *ubuf,
6472 		  size_t cnt, loff_t *ppos)
6473 {
6474 	struct trace_iterator *iter = filp->private_data;
6475 	ssize_t sret;
6476 
6477 	/*
6478 	 * Avoid more than one consumer on a single file descriptor
6479 	 * This is just a matter of traces coherency, the ring buffer itself
6480 	 * is protected.
6481 	 */
6482 	guard(mutex)(&iter->mutex);
6483 
6484 	/* return any leftover data */
6485 	sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
6486 	if (sret != -EBUSY)
6487 		return sret;
6488 
6489 	trace_seq_init(&iter->seq);
6490 
6491 	if (iter->trace->read) {
6492 		sret = iter->trace->read(iter, filp, ubuf, cnt, ppos);
6493 		if (sret)
6494 			return sret;
6495 	}
6496 
6497 waitagain:
6498 	sret = tracing_wait_pipe(filp);
6499 	if (sret <= 0)
6500 		return sret;
6501 
6502 	/* stop when tracing is finished */
6503 	if (trace_empty(iter))
6504 		return 0;
6505 
6506 	if (cnt >= TRACE_SEQ_BUFFER_SIZE)
6507 		cnt = TRACE_SEQ_BUFFER_SIZE - 1;
6508 
6509 	/* reset all but tr, trace, and overruns */
6510 	trace_iterator_reset(iter);
6511 	cpumask_clear(iter->started);
6512 	trace_seq_init(&iter->seq);
6513 
6514 	trace_event_read_lock();
6515 	trace_access_lock(iter->cpu_file);
6516 	while (trace_find_next_entry_inc(iter) != NULL) {
6517 		enum print_line_t ret;
6518 		int save_len = iter->seq.seq.len;
6519 
6520 		ret = print_trace_line(iter);
6521 		if (ret == TRACE_TYPE_PARTIAL_LINE) {
6522 			/*
6523 			 * If one print_trace_line() fills entire trace_seq in one shot,
6524 			 * trace_seq_to_user() will returns -EBUSY because save_len == 0,
6525 			 * In this case, we need to consume it, otherwise, loop will peek
6526 			 * this event next time, resulting in an infinite loop.
6527 			 */
6528 			if (save_len == 0) {
6529 				iter->seq.full = 0;
6530 				trace_seq_puts(&iter->seq, "[LINE TOO BIG]\n");
6531 				trace_consume(iter);
6532 				break;
6533 			}
6534 
6535 			/* In other cases, don't print partial lines */
6536 			iter->seq.seq.len = save_len;
6537 			break;
6538 		}
6539 		if (ret != TRACE_TYPE_NO_CONSUME)
6540 			trace_consume(iter);
6541 
6542 		if (trace_seq_used(&iter->seq) >= cnt)
6543 			break;
6544 
6545 		/*
6546 		 * Setting the full flag means we reached the trace_seq buffer
6547 		 * size and we should leave by partial output condition above.
6548 		 * One of the trace_seq_* functions is not used properly.
6549 		 */
6550 		WARN_ONCE(iter->seq.full, "full flag set for trace type %d",
6551 			  iter->ent->type);
6552 	}
6553 	trace_access_unlock(iter->cpu_file);
6554 	trace_event_read_unlock();
6555 
6556 	/* Now copy what we have to the user */
6557 	sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
6558 	if (iter->seq.readpos >= trace_seq_used(&iter->seq))
6559 		trace_seq_init(&iter->seq);
6560 
6561 	/*
6562 	 * If there was nothing to send to user, in spite of consuming trace
6563 	 * entries, go back to wait for more entries.
6564 	 */
6565 	if (sret == -EBUSY)
6566 		goto waitagain;
6567 
6568 	return sret;
6569 }
6570 
6571 static void tracing_spd_release_pipe(struct splice_pipe_desc *spd,
6572 				     unsigned int idx)
6573 {
6574 	__free_page(spd->pages[idx]);
6575 }
6576 
6577 static size_t
6578 tracing_fill_pipe_page(size_t rem, struct trace_iterator *iter)
6579 {
6580 	size_t count;
6581 	int save_len;
6582 	int ret;
6583 
6584 	/* Seq buffer is page-sized, exactly what we need. */
6585 	for (;;) {
6586 		save_len = iter->seq.seq.len;
6587 		ret = print_trace_line(iter);
6588 
6589 		if (trace_seq_has_overflowed(&iter->seq)) {
6590 			iter->seq.seq.len = save_len;
6591 			break;
6592 		}
6593 
6594 		/*
6595 		 * This should not be hit, because it should only
6596 		 * be set if the iter->seq overflowed. But check it
6597 		 * anyway to be safe.
6598 		 */
6599 		if (ret == TRACE_TYPE_PARTIAL_LINE) {
6600 			iter->seq.seq.len = save_len;
6601 			break;
6602 		}
6603 
6604 		count = trace_seq_used(&iter->seq) - save_len;
6605 		if (rem < count) {
6606 			rem = 0;
6607 			iter->seq.seq.len = save_len;
6608 			break;
6609 		}
6610 
6611 		if (ret != TRACE_TYPE_NO_CONSUME)
6612 			trace_consume(iter);
6613 		rem -= count;
6614 		if (!trace_find_next_entry_inc(iter))	{
6615 			rem = 0;
6616 			iter->ent = NULL;
6617 			break;
6618 		}
6619 	}
6620 
6621 	return rem;
6622 }
6623 
6624 static ssize_t tracing_splice_read_pipe(struct file *filp,
6625 					loff_t *ppos,
6626 					struct pipe_inode_info *pipe,
6627 					size_t len,
6628 					unsigned int flags)
6629 {
6630 	struct page *pages_def[PIPE_DEF_BUFFERS];
6631 	struct partial_page partial_def[PIPE_DEF_BUFFERS];
6632 	struct trace_iterator *iter = filp->private_data;
6633 	struct splice_pipe_desc spd = {
6634 		.pages		= pages_def,
6635 		.partial	= partial_def,
6636 		.nr_pages	= 0, /* This gets updated below. */
6637 		.nr_pages_max	= PIPE_DEF_BUFFERS,
6638 		.ops		= &default_pipe_buf_ops,
6639 		.spd_release	= tracing_spd_release_pipe,
6640 	};
6641 	ssize_t ret;
6642 	size_t rem;
6643 	unsigned int i;
6644 
6645 	if (splice_grow_spd(pipe, &spd))
6646 		return -ENOMEM;
6647 
6648 	mutex_lock(&iter->mutex);
6649 
6650 	if (iter->trace->splice_read) {
6651 		ret = iter->trace->splice_read(iter, filp,
6652 					       ppos, pipe, len, flags);
6653 		if (ret)
6654 			goto out_err;
6655 	}
6656 
6657 	ret = tracing_wait_pipe(filp);
6658 	if (ret <= 0)
6659 		goto out_err;
6660 
6661 	if (!iter->ent && !trace_find_next_entry_inc(iter)) {
6662 		ret = -EFAULT;
6663 		goto out_err;
6664 	}
6665 
6666 	trace_event_read_lock();
6667 	trace_access_lock(iter->cpu_file);
6668 
6669 	/* Fill as many pages as possible. */
6670 	for (i = 0, rem = len; i < spd.nr_pages_max && rem; i++) {
6671 		spd.pages[i] = alloc_page(GFP_KERNEL);
6672 		if (!spd.pages[i])
6673 			break;
6674 
6675 		rem = tracing_fill_pipe_page(rem, iter);
6676 
6677 		/* Copy the data into the page, so we can start over. */
6678 		ret = trace_seq_to_buffer(&iter->seq,
6679 					  page_address(spd.pages[i]),
6680 					  trace_seq_used(&iter->seq));
6681 		if (ret < 0) {
6682 			__free_page(spd.pages[i]);
6683 			break;
6684 		}
6685 		spd.partial[i].offset = 0;
6686 		spd.partial[i].len = trace_seq_used(&iter->seq);
6687 
6688 		trace_seq_init(&iter->seq);
6689 	}
6690 
6691 	trace_access_unlock(iter->cpu_file);
6692 	trace_event_read_unlock();
6693 	mutex_unlock(&iter->mutex);
6694 
6695 	spd.nr_pages = i;
6696 
6697 	if (i)
6698 		ret = splice_to_pipe(pipe, &spd);
6699 	else
6700 		ret = 0;
6701 out:
6702 	splice_shrink_spd(&spd);
6703 	return ret;
6704 
6705 out_err:
6706 	mutex_unlock(&iter->mutex);
6707 	goto out;
6708 }
6709 
6710 static ssize_t
6711 tracing_entries_read(struct file *filp, char __user *ubuf,
6712 		     size_t cnt, loff_t *ppos)
6713 {
6714 	struct inode *inode = file_inode(filp);
6715 	struct trace_array *tr = inode->i_private;
6716 	int cpu = tracing_get_cpu(inode);
6717 	char buf[64];
6718 	int r = 0;
6719 	ssize_t ret;
6720 
6721 	mutex_lock(&trace_types_lock);
6722 
6723 	if (cpu == RING_BUFFER_ALL_CPUS) {
6724 		int cpu, buf_size_same;
6725 		unsigned long size;
6726 
6727 		size = 0;
6728 		buf_size_same = 1;
6729 		/* check if all cpu sizes are same */
6730 		for_each_tracing_cpu(cpu) {
6731 			/* fill in the size from first enabled cpu */
6732 			if (size == 0)
6733 				size = per_cpu_ptr(tr->array_buffer.data, cpu)->entries;
6734 			if (size != per_cpu_ptr(tr->array_buffer.data, cpu)->entries) {
6735 				buf_size_same = 0;
6736 				break;
6737 			}
6738 		}
6739 
6740 		if (buf_size_same) {
6741 			if (!tr->ring_buffer_expanded)
6742 				r = sprintf(buf, "%lu (expanded: %lu)\n",
6743 					    size >> 10,
6744 					    trace_buf_size >> 10);
6745 			else
6746 				r = sprintf(buf, "%lu\n", size >> 10);
6747 		} else
6748 			r = sprintf(buf, "X\n");
6749 	} else
6750 		r = sprintf(buf, "%lu\n", per_cpu_ptr(tr->array_buffer.data, cpu)->entries >> 10);
6751 
6752 	mutex_unlock(&trace_types_lock);
6753 
6754 	ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6755 	return ret;
6756 }
6757 
6758 static ssize_t
6759 tracing_entries_write(struct file *filp, const char __user *ubuf,
6760 		      size_t cnt, loff_t *ppos)
6761 {
6762 	struct inode *inode = file_inode(filp);
6763 	struct trace_array *tr = inode->i_private;
6764 	unsigned long val;
6765 	int ret;
6766 
6767 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
6768 	if (ret)
6769 		return ret;
6770 
6771 	/* must have at least 1 entry */
6772 	if (!val)
6773 		return -EINVAL;
6774 
6775 	/* value is in KB */
6776 	val <<= 10;
6777 	ret = tracing_resize_ring_buffer(tr, val, tracing_get_cpu(inode));
6778 	if (ret < 0)
6779 		return ret;
6780 
6781 	*ppos += cnt;
6782 
6783 	return cnt;
6784 }
6785 
6786 static ssize_t
6787 tracing_total_entries_read(struct file *filp, char __user *ubuf,
6788 				size_t cnt, loff_t *ppos)
6789 {
6790 	struct trace_array *tr = filp->private_data;
6791 	char buf[64];
6792 	int r, cpu;
6793 	unsigned long size = 0, expanded_size = 0;
6794 
6795 	mutex_lock(&trace_types_lock);
6796 	for_each_tracing_cpu(cpu) {
6797 		size += per_cpu_ptr(tr->array_buffer.data, cpu)->entries >> 10;
6798 		if (!tr->ring_buffer_expanded)
6799 			expanded_size += trace_buf_size >> 10;
6800 	}
6801 	if (tr->ring_buffer_expanded)
6802 		r = sprintf(buf, "%lu\n", size);
6803 	else
6804 		r = sprintf(buf, "%lu (expanded: %lu)\n", size, expanded_size);
6805 	mutex_unlock(&trace_types_lock);
6806 
6807 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6808 }
6809 
6810 static ssize_t
6811 tracing_last_boot_read(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos)
6812 {
6813 	struct trace_array *tr = filp->private_data;
6814 	struct seq_buf seq;
6815 	char buf[64];
6816 
6817 	seq_buf_init(&seq, buf, 64);
6818 
6819 	seq_buf_printf(&seq, "text delta:\t%ld\n", tr->text_delta);
6820 	seq_buf_printf(&seq, "data delta:\t%ld\n", tr->data_delta);
6821 
6822 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, seq_buf_used(&seq));
6823 }
6824 
6825 static int tracing_buffer_meta_open(struct inode *inode, struct file *filp)
6826 {
6827 	struct trace_array *tr = inode->i_private;
6828 	int cpu = tracing_get_cpu(inode);
6829 	int ret;
6830 
6831 	ret = tracing_check_open_get_tr(tr);
6832 	if (ret)
6833 		return ret;
6834 
6835 	ret = ring_buffer_meta_seq_init(filp, tr->array_buffer.buffer, cpu);
6836 	if (ret < 0)
6837 		__trace_array_put(tr);
6838 	return ret;
6839 }
6840 
6841 static ssize_t
6842 tracing_free_buffer_write(struct file *filp, const char __user *ubuf,
6843 			  size_t cnt, loff_t *ppos)
6844 {
6845 	/*
6846 	 * There is no need to read what the user has written, this function
6847 	 * is just to make sure that there is no error when "echo" is used
6848 	 */
6849 
6850 	*ppos += cnt;
6851 
6852 	return cnt;
6853 }
6854 
6855 static int
6856 tracing_free_buffer_release(struct inode *inode, struct file *filp)
6857 {
6858 	struct trace_array *tr = inode->i_private;
6859 
6860 	/* disable tracing ? */
6861 	if (tr->trace_flags & TRACE_ITER_STOP_ON_FREE)
6862 		tracer_tracing_off(tr);
6863 	/* resize the ring buffer to 0 */
6864 	tracing_resize_ring_buffer(tr, 0, RING_BUFFER_ALL_CPUS);
6865 
6866 	trace_array_put(tr);
6867 
6868 	return 0;
6869 }
6870 
6871 #define TRACE_MARKER_MAX_SIZE		4096
6872 
6873 static ssize_t
6874 tracing_mark_write(struct file *filp, const char __user *ubuf,
6875 					size_t cnt, loff_t *fpos)
6876 {
6877 	struct trace_array *tr = filp->private_data;
6878 	struct ring_buffer_event *event;
6879 	enum event_trigger_type tt = ETT_NONE;
6880 	struct trace_buffer *buffer;
6881 	struct print_entry *entry;
6882 	int meta_size;
6883 	ssize_t written;
6884 	size_t size;
6885 	int len;
6886 
6887 /* Used in tracing_mark_raw_write() as well */
6888 #define FAULTED_STR "<faulted>"
6889 #define FAULTED_SIZE (sizeof(FAULTED_STR) - 1) /* '\0' is already accounted for */
6890 
6891 	if (tracing_disabled)
6892 		return -EINVAL;
6893 
6894 	if (!(tr->trace_flags & TRACE_ITER_MARKERS))
6895 		return -EINVAL;
6896 
6897 	if ((ssize_t)cnt < 0)
6898 		return -EINVAL;
6899 
6900 	if (cnt > TRACE_MARKER_MAX_SIZE)
6901 		cnt = TRACE_MARKER_MAX_SIZE;
6902 
6903 	meta_size = sizeof(*entry) + 2;  /* add '\0' and possible '\n' */
6904  again:
6905 	size = cnt + meta_size;
6906 
6907 	/* If less than "<faulted>", then make sure we can still add that */
6908 	if (cnt < FAULTED_SIZE)
6909 		size += FAULTED_SIZE - cnt;
6910 
6911 	buffer = tr->array_buffer.buffer;
6912 	event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
6913 					    tracing_gen_ctx());
6914 	if (unlikely(!event)) {
6915 		/*
6916 		 * If the size was greater than what was allowed, then
6917 		 * make it smaller and try again.
6918 		 */
6919 		if (size > ring_buffer_max_event_size(buffer)) {
6920 			/* cnt < FAULTED size should never be bigger than max */
6921 			if (WARN_ON_ONCE(cnt < FAULTED_SIZE))
6922 				return -EBADF;
6923 			cnt = ring_buffer_max_event_size(buffer) - meta_size;
6924 			/* The above should only happen once */
6925 			if (WARN_ON_ONCE(cnt + meta_size == size))
6926 				return -EBADF;
6927 			goto again;
6928 		}
6929 
6930 		/* Ring buffer disabled, return as if not open for write */
6931 		return -EBADF;
6932 	}
6933 
6934 	entry = ring_buffer_event_data(event);
6935 	entry->ip = _THIS_IP_;
6936 
6937 	len = __copy_from_user_inatomic(&entry->buf, ubuf, cnt);
6938 	if (len) {
6939 		memcpy(&entry->buf, FAULTED_STR, FAULTED_SIZE);
6940 		cnt = FAULTED_SIZE;
6941 		written = -EFAULT;
6942 	} else
6943 		written = cnt;
6944 
6945 	if (tr->trace_marker_file && !list_empty(&tr->trace_marker_file->triggers)) {
6946 		/* do not add \n before testing triggers, but add \0 */
6947 		entry->buf[cnt] = '\0';
6948 		tt = event_triggers_call(tr->trace_marker_file, buffer, entry, event);
6949 	}
6950 
6951 	if (entry->buf[cnt - 1] != '\n') {
6952 		entry->buf[cnt] = '\n';
6953 		entry->buf[cnt + 1] = '\0';
6954 	} else
6955 		entry->buf[cnt] = '\0';
6956 
6957 	if (static_branch_unlikely(&trace_marker_exports_enabled))
6958 		ftrace_exports(event, TRACE_EXPORT_MARKER);
6959 	__buffer_unlock_commit(buffer, event);
6960 
6961 	if (tt)
6962 		event_triggers_post_call(tr->trace_marker_file, tt);
6963 
6964 	return written;
6965 }
6966 
6967 static ssize_t
6968 tracing_mark_raw_write(struct file *filp, const char __user *ubuf,
6969 					size_t cnt, loff_t *fpos)
6970 {
6971 	struct trace_array *tr = filp->private_data;
6972 	struct ring_buffer_event *event;
6973 	struct trace_buffer *buffer;
6974 	struct raw_data_entry *entry;
6975 	ssize_t written;
6976 	int size;
6977 	int len;
6978 
6979 #define FAULT_SIZE_ID (FAULTED_SIZE + sizeof(int))
6980 
6981 	if (tracing_disabled)
6982 		return -EINVAL;
6983 
6984 	if (!(tr->trace_flags & TRACE_ITER_MARKERS))
6985 		return -EINVAL;
6986 
6987 	/* The marker must at least have a tag id */
6988 	if (cnt < sizeof(unsigned int))
6989 		return -EINVAL;
6990 
6991 	size = sizeof(*entry) + cnt;
6992 	if (cnt < FAULT_SIZE_ID)
6993 		size += FAULT_SIZE_ID - cnt;
6994 
6995 	buffer = tr->array_buffer.buffer;
6996 
6997 	if (size > ring_buffer_max_event_size(buffer))
6998 		return -EINVAL;
6999 
7000 	event = __trace_buffer_lock_reserve(buffer, TRACE_RAW_DATA, size,
7001 					    tracing_gen_ctx());
7002 	if (!event)
7003 		/* Ring buffer disabled, return as if not open for write */
7004 		return -EBADF;
7005 
7006 	entry = ring_buffer_event_data(event);
7007 
7008 	len = __copy_from_user_inatomic(&entry->id, ubuf, cnt);
7009 	if (len) {
7010 		entry->id = -1;
7011 		memcpy(&entry->buf, FAULTED_STR, FAULTED_SIZE);
7012 		written = -EFAULT;
7013 	} else
7014 		written = cnt;
7015 
7016 	__buffer_unlock_commit(buffer, event);
7017 
7018 	return written;
7019 }
7020 
7021 static int tracing_clock_show(struct seq_file *m, void *v)
7022 {
7023 	struct trace_array *tr = m->private;
7024 	int i;
7025 
7026 	for (i = 0; i < ARRAY_SIZE(trace_clocks); i++)
7027 		seq_printf(m,
7028 			"%s%s%s%s", i ? " " : "",
7029 			i == tr->clock_id ? "[" : "", trace_clocks[i].name,
7030 			i == tr->clock_id ? "]" : "");
7031 	seq_putc(m, '\n');
7032 
7033 	return 0;
7034 }
7035 
7036 int tracing_set_clock(struct trace_array *tr, const char *clockstr)
7037 {
7038 	int i;
7039 
7040 	for (i = 0; i < ARRAY_SIZE(trace_clocks); i++) {
7041 		if (strcmp(trace_clocks[i].name, clockstr) == 0)
7042 			break;
7043 	}
7044 	if (i == ARRAY_SIZE(trace_clocks))
7045 		return -EINVAL;
7046 
7047 	mutex_lock(&trace_types_lock);
7048 
7049 	tr->clock_id = i;
7050 
7051 	ring_buffer_set_clock(tr->array_buffer.buffer, trace_clocks[i].func);
7052 
7053 	/*
7054 	 * New clock may not be consistent with the previous clock.
7055 	 * Reset the buffer so that it doesn't have incomparable timestamps.
7056 	 */
7057 	tracing_reset_online_cpus(&tr->array_buffer);
7058 
7059 #ifdef CONFIG_TRACER_MAX_TRACE
7060 	if (tr->max_buffer.buffer)
7061 		ring_buffer_set_clock(tr->max_buffer.buffer, trace_clocks[i].func);
7062 	tracing_reset_online_cpus(&tr->max_buffer);
7063 #endif
7064 
7065 	mutex_unlock(&trace_types_lock);
7066 
7067 	return 0;
7068 }
7069 
7070 static ssize_t tracing_clock_write(struct file *filp, const char __user *ubuf,
7071 				   size_t cnt, loff_t *fpos)
7072 {
7073 	struct seq_file *m = filp->private_data;
7074 	struct trace_array *tr = m->private;
7075 	char buf[64];
7076 	const char *clockstr;
7077 	int ret;
7078 
7079 	if (cnt >= sizeof(buf))
7080 		return -EINVAL;
7081 
7082 	if (copy_from_user(buf, ubuf, cnt))
7083 		return -EFAULT;
7084 
7085 	buf[cnt] = 0;
7086 
7087 	clockstr = strstrip(buf);
7088 
7089 	ret = tracing_set_clock(tr, clockstr);
7090 	if (ret)
7091 		return ret;
7092 
7093 	*fpos += cnt;
7094 
7095 	return cnt;
7096 }
7097 
7098 static int tracing_clock_open(struct inode *inode, struct file *file)
7099 {
7100 	struct trace_array *tr = inode->i_private;
7101 	int ret;
7102 
7103 	ret = tracing_check_open_get_tr(tr);
7104 	if (ret)
7105 		return ret;
7106 
7107 	ret = single_open(file, tracing_clock_show, inode->i_private);
7108 	if (ret < 0)
7109 		trace_array_put(tr);
7110 
7111 	return ret;
7112 }
7113 
7114 static int tracing_time_stamp_mode_show(struct seq_file *m, void *v)
7115 {
7116 	struct trace_array *tr = m->private;
7117 
7118 	mutex_lock(&trace_types_lock);
7119 
7120 	if (ring_buffer_time_stamp_abs(tr->array_buffer.buffer))
7121 		seq_puts(m, "delta [absolute]\n");
7122 	else
7123 		seq_puts(m, "[delta] absolute\n");
7124 
7125 	mutex_unlock(&trace_types_lock);
7126 
7127 	return 0;
7128 }
7129 
7130 static int tracing_time_stamp_mode_open(struct inode *inode, struct file *file)
7131 {
7132 	struct trace_array *tr = inode->i_private;
7133 	int ret;
7134 
7135 	ret = tracing_check_open_get_tr(tr);
7136 	if (ret)
7137 		return ret;
7138 
7139 	ret = single_open(file, tracing_time_stamp_mode_show, inode->i_private);
7140 	if (ret < 0)
7141 		trace_array_put(tr);
7142 
7143 	return ret;
7144 }
7145 
7146 u64 tracing_event_time_stamp(struct trace_buffer *buffer, struct ring_buffer_event *rbe)
7147 {
7148 	if (rbe == this_cpu_read(trace_buffered_event))
7149 		return ring_buffer_time_stamp(buffer);
7150 
7151 	return ring_buffer_event_time_stamp(buffer, rbe);
7152 }
7153 
7154 /*
7155  * Set or disable using the per CPU trace_buffer_event when possible.
7156  */
7157 int tracing_set_filter_buffering(struct trace_array *tr, bool set)
7158 {
7159 	guard(mutex)(&trace_types_lock);
7160 
7161 	if (set && tr->no_filter_buffering_ref++)
7162 		return 0;
7163 
7164 	if (!set) {
7165 		if (WARN_ON_ONCE(!tr->no_filter_buffering_ref))
7166 			return -EINVAL;
7167 
7168 		--tr->no_filter_buffering_ref;
7169 	}
7170 
7171 	return 0;
7172 }
7173 
7174 struct ftrace_buffer_info {
7175 	struct trace_iterator	iter;
7176 	void			*spare;
7177 	unsigned int		spare_cpu;
7178 	unsigned int		spare_size;
7179 	unsigned int		read;
7180 };
7181 
7182 #ifdef CONFIG_TRACER_SNAPSHOT
7183 static int tracing_snapshot_open(struct inode *inode, struct file *file)
7184 {
7185 	struct trace_array *tr = inode->i_private;
7186 	struct trace_iterator *iter;
7187 	struct seq_file *m;
7188 	int ret;
7189 
7190 	ret = tracing_check_open_get_tr(tr);
7191 	if (ret)
7192 		return ret;
7193 
7194 	if (file->f_mode & FMODE_READ) {
7195 		iter = __tracing_open(inode, file, true);
7196 		if (IS_ERR(iter))
7197 			ret = PTR_ERR(iter);
7198 	} else {
7199 		/* Writes still need the seq_file to hold the private data */
7200 		ret = -ENOMEM;
7201 		m = kzalloc(sizeof(*m), GFP_KERNEL);
7202 		if (!m)
7203 			goto out;
7204 		iter = kzalloc(sizeof(*iter), GFP_KERNEL);
7205 		if (!iter) {
7206 			kfree(m);
7207 			goto out;
7208 		}
7209 		ret = 0;
7210 
7211 		iter->tr = tr;
7212 		iter->array_buffer = &tr->max_buffer;
7213 		iter->cpu_file = tracing_get_cpu(inode);
7214 		m->private = iter;
7215 		file->private_data = m;
7216 	}
7217 out:
7218 	if (ret < 0)
7219 		trace_array_put(tr);
7220 
7221 	return ret;
7222 }
7223 
7224 static void tracing_swap_cpu_buffer(void *tr)
7225 {
7226 	update_max_tr_single((struct trace_array *)tr, current, smp_processor_id());
7227 }
7228 
7229 static ssize_t
7230 tracing_snapshot_write(struct file *filp, const char __user *ubuf, size_t cnt,
7231 		       loff_t *ppos)
7232 {
7233 	struct seq_file *m = filp->private_data;
7234 	struct trace_iterator *iter = m->private;
7235 	struct trace_array *tr = iter->tr;
7236 	unsigned long val;
7237 	int ret;
7238 
7239 	ret = tracing_update_buffers(tr);
7240 	if (ret < 0)
7241 		return ret;
7242 
7243 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
7244 	if (ret)
7245 		return ret;
7246 
7247 	guard(mutex)(&trace_types_lock);
7248 
7249 	if (tr->current_trace->use_max_tr)
7250 		return -EBUSY;
7251 
7252 	local_irq_disable();
7253 	arch_spin_lock(&tr->max_lock);
7254 	if (tr->cond_snapshot)
7255 		ret = -EBUSY;
7256 	arch_spin_unlock(&tr->max_lock);
7257 	local_irq_enable();
7258 	if (ret)
7259 		return ret;
7260 
7261 	switch (val) {
7262 	case 0:
7263 		if (iter->cpu_file != RING_BUFFER_ALL_CPUS)
7264 			return -EINVAL;
7265 		if (tr->allocated_snapshot)
7266 			free_snapshot(tr);
7267 		break;
7268 	case 1:
7269 /* Only allow per-cpu swap if the ring buffer supports it */
7270 #ifndef CONFIG_RING_BUFFER_ALLOW_SWAP
7271 		if (iter->cpu_file != RING_BUFFER_ALL_CPUS)
7272 			return -EINVAL;
7273 #endif
7274 		if (tr->allocated_snapshot)
7275 			ret = resize_buffer_duplicate_size(&tr->max_buffer,
7276 					&tr->array_buffer, iter->cpu_file);
7277 
7278 		ret = tracing_arm_snapshot_locked(tr);
7279 		if (ret)
7280 			return ret;
7281 
7282 		/* Now, we're going to swap */
7283 		if (iter->cpu_file == RING_BUFFER_ALL_CPUS) {
7284 			local_irq_disable();
7285 			update_max_tr(tr, current, smp_processor_id(), NULL);
7286 			local_irq_enable();
7287 		} else {
7288 			smp_call_function_single(iter->cpu_file, tracing_swap_cpu_buffer,
7289 						 (void *)tr, 1);
7290 		}
7291 		tracing_disarm_snapshot(tr);
7292 		break;
7293 	default:
7294 		if (tr->allocated_snapshot) {
7295 			if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
7296 				tracing_reset_online_cpus(&tr->max_buffer);
7297 			else
7298 				tracing_reset_cpu(&tr->max_buffer, iter->cpu_file);
7299 		}
7300 		break;
7301 	}
7302 
7303 	if (ret >= 0) {
7304 		*ppos += cnt;
7305 		ret = cnt;
7306 	}
7307 
7308 	return ret;
7309 }
7310 
7311 static int tracing_snapshot_release(struct inode *inode, struct file *file)
7312 {
7313 	struct seq_file *m = file->private_data;
7314 	int ret;
7315 
7316 	ret = tracing_release(inode, file);
7317 
7318 	if (file->f_mode & FMODE_READ)
7319 		return ret;
7320 
7321 	/* If write only, the seq_file is just a stub */
7322 	if (m)
7323 		kfree(m->private);
7324 	kfree(m);
7325 
7326 	return 0;
7327 }
7328 
7329 static int tracing_buffers_open(struct inode *inode, struct file *filp);
7330 static ssize_t tracing_buffers_read(struct file *filp, char __user *ubuf,
7331 				    size_t count, loff_t *ppos);
7332 static int tracing_buffers_release(struct inode *inode, struct file *file);
7333 static ssize_t tracing_buffers_splice_read(struct file *file, loff_t *ppos,
7334 		   struct pipe_inode_info *pipe, size_t len, unsigned int flags);
7335 
7336 static int snapshot_raw_open(struct inode *inode, struct file *filp)
7337 {
7338 	struct ftrace_buffer_info *info;
7339 	int ret;
7340 
7341 	/* The following checks for tracefs lockdown */
7342 	ret = tracing_buffers_open(inode, filp);
7343 	if (ret < 0)
7344 		return ret;
7345 
7346 	info = filp->private_data;
7347 
7348 	if (info->iter.trace->use_max_tr) {
7349 		tracing_buffers_release(inode, filp);
7350 		return -EBUSY;
7351 	}
7352 
7353 	info->iter.snapshot = true;
7354 	info->iter.array_buffer = &info->iter.tr->max_buffer;
7355 
7356 	return ret;
7357 }
7358 
7359 #endif /* CONFIG_TRACER_SNAPSHOT */
7360 
7361 
7362 static const struct file_operations tracing_thresh_fops = {
7363 	.open		= tracing_open_generic,
7364 	.read		= tracing_thresh_read,
7365 	.write		= tracing_thresh_write,
7366 	.llseek		= generic_file_llseek,
7367 };
7368 
7369 #ifdef CONFIG_TRACER_MAX_TRACE
7370 static const struct file_operations tracing_max_lat_fops = {
7371 	.open		= tracing_open_generic_tr,
7372 	.read		= tracing_max_lat_read,
7373 	.write		= tracing_max_lat_write,
7374 	.llseek		= generic_file_llseek,
7375 	.release	= tracing_release_generic_tr,
7376 };
7377 #endif
7378 
7379 static const struct file_operations set_tracer_fops = {
7380 	.open		= tracing_open_generic_tr,
7381 	.read		= tracing_set_trace_read,
7382 	.write		= tracing_set_trace_write,
7383 	.llseek		= generic_file_llseek,
7384 	.release	= tracing_release_generic_tr,
7385 };
7386 
7387 static const struct file_operations tracing_pipe_fops = {
7388 	.open		= tracing_open_pipe,
7389 	.poll		= tracing_poll_pipe,
7390 	.read		= tracing_read_pipe,
7391 	.splice_read	= tracing_splice_read_pipe,
7392 	.release	= tracing_release_pipe,
7393 };
7394 
7395 static const struct file_operations tracing_entries_fops = {
7396 	.open		= tracing_open_generic_tr,
7397 	.read		= tracing_entries_read,
7398 	.write		= tracing_entries_write,
7399 	.llseek		= generic_file_llseek,
7400 	.release	= tracing_release_generic_tr,
7401 };
7402 
7403 static const struct file_operations tracing_buffer_meta_fops = {
7404 	.open		= tracing_buffer_meta_open,
7405 	.read		= seq_read,
7406 	.llseek		= seq_lseek,
7407 	.release	= tracing_seq_release,
7408 };
7409 
7410 static const struct file_operations tracing_total_entries_fops = {
7411 	.open		= tracing_open_generic_tr,
7412 	.read		= tracing_total_entries_read,
7413 	.llseek		= generic_file_llseek,
7414 	.release	= tracing_release_generic_tr,
7415 };
7416 
7417 static const struct file_operations tracing_free_buffer_fops = {
7418 	.open		= tracing_open_generic_tr,
7419 	.write		= tracing_free_buffer_write,
7420 	.release	= tracing_free_buffer_release,
7421 };
7422 
7423 static const struct file_operations tracing_mark_fops = {
7424 	.open		= tracing_mark_open,
7425 	.write		= tracing_mark_write,
7426 	.release	= tracing_release_generic_tr,
7427 };
7428 
7429 static const struct file_operations tracing_mark_raw_fops = {
7430 	.open		= tracing_mark_open,
7431 	.write		= tracing_mark_raw_write,
7432 	.release	= tracing_release_generic_tr,
7433 };
7434 
7435 static const struct file_operations trace_clock_fops = {
7436 	.open		= tracing_clock_open,
7437 	.read		= seq_read,
7438 	.llseek		= seq_lseek,
7439 	.release	= tracing_single_release_tr,
7440 	.write		= tracing_clock_write,
7441 };
7442 
7443 static const struct file_operations trace_time_stamp_mode_fops = {
7444 	.open		= tracing_time_stamp_mode_open,
7445 	.read		= seq_read,
7446 	.llseek		= seq_lseek,
7447 	.release	= tracing_single_release_tr,
7448 };
7449 
7450 static const struct file_operations last_boot_fops = {
7451 	.open		= tracing_open_generic_tr,
7452 	.read		= tracing_last_boot_read,
7453 	.llseek		= generic_file_llseek,
7454 	.release	= tracing_release_generic_tr,
7455 };
7456 
7457 #ifdef CONFIG_TRACER_SNAPSHOT
7458 static const struct file_operations snapshot_fops = {
7459 	.open		= tracing_snapshot_open,
7460 	.read		= seq_read,
7461 	.write		= tracing_snapshot_write,
7462 	.llseek		= tracing_lseek,
7463 	.release	= tracing_snapshot_release,
7464 };
7465 
7466 static const struct file_operations snapshot_raw_fops = {
7467 	.open		= snapshot_raw_open,
7468 	.read		= tracing_buffers_read,
7469 	.release	= tracing_buffers_release,
7470 	.splice_read	= tracing_buffers_splice_read,
7471 };
7472 
7473 #endif /* CONFIG_TRACER_SNAPSHOT */
7474 
7475 /*
7476  * trace_min_max_write - Write a u64 value to a trace_min_max_param struct
7477  * @filp: The active open file structure
7478  * @ubuf: The userspace provided buffer to read value into
7479  * @cnt: The maximum number of bytes to read
7480  * @ppos: The current "file" position
7481  *
7482  * This function implements the write interface for a struct trace_min_max_param.
7483  * The filp->private_data must point to a trace_min_max_param structure that
7484  * defines where to write the value, the min and the max acceptable values,
7485  * and a lock to protect the write.
7486  */
7487 static ssize_t
7488 trace_min_max_write(struct file *filp, const char __user *ubuf, size_t cnt, loff_t *ppos)
7489 {
7490 	struct trace_min_max_param *param = filp->private_data;
7491 	u64 val;
7492 	int err;
7493 
7494 	if (!param)
7495 		return -EFAULT;
7496 
7497 	err = kstrtoull_from_user(ubuf, cnt, 10, &val);
7498 	if (err)
7499 		return err;
7500 
7501 	if (param->lock)
7502 		mutex_lock(param->lock);
7503 
7504 	if (param->min && val < *param->min)
7505 		err = -EINVAL;
7506 
7507 	if (param->max && val > *param->max)
7508 		err = -EINVAL;
7509 
7510 	if (!err)
7511 		*param->val = val;
7512 
7513 	if (param->lock)
7514 		mutex_unlock(param->lock);
7515 
7516 	if (err)
7517 		return err;
7518 
7519 	return cnt;
7520 }
7521 
7522 /*
7523  * trace_min_max_read - Read a u64 value from a trace_min_max_param struct
7524  * @filp: The active open file structure
7525  * @ubuf: The userspace provided buffer to read value into
7526  * @cnt: The maximum number of bytes to read
7527  * @ppos: The current "file" position
7528  *
7529  * This function implements the read interface for a struct trace_min_max_param.
7530  * The filp->private_data must point to a trace_min_max_param struct with valid
7531  * data.
7532  */
7533 static ssize_t
7534 trace_min_max_read(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos)
7535 {
7536 	struct trace_min_max_param *param = filp->private_data;
7537 	char buf[U64_STR_SIZE];
7538 	int len;
7539 	u64 val;
7540 
7541 	if (!param)
7542 		return -EFAULT;
7543 
7544 	val = *param->val;
7545 
7546 	if (cnt > sizeof(buf))
7547 		cnt = sizeof(buf);
7548 
7549 	len = snprintf(buf, sizeof(buf), "%llu\n", val);
7550 
7551 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, len);
7552 }
7553 
7554 const struct file_operations trace_min_max_fops = {
7555 	.open		= tracing_open_generic,
7556 	.read		= trace_min_max_read,
7557 	.write		= trace_min_max_write,
7558 };
7559 
7560 #define TRACING_LOG_ERRS_MAX	8
7561 #define TRACING_LOG_LOC_MAX	128
7562 
7563 #define CMD_PREFIX "  Command: "
7564 
7565 struct err_info {
7566 	const char	**errs;	/* ptr to loc-specific array of err strings */
7567 	u8		type;	/* index into errs -> specific err string */
7568 	u16		pos;	/* caret position */
7569 	u64		ts;
7570 };
7571 
7572 struct tracing_log_err {
7573 	struct list_head	list;
7574 	struct err_info		info;
7575 	char			loc[TRACING_LOG_LOC_MAX]; /* err location */
7576 	char			*cmd;                     /* what caused err */
7577 };
7578 
7579 static DEFINE_MUTEX(tracing_err_log_lock);
7580 
7581 static struct tracing_log_err *alloc_tracing_log_err(int len)
7582 {
7583 	struct tracing_log_err *err;
7584 
7585 	err = kzalloc(sizeof(*err), GFP_KERNEL);
7586 	if (!err)
7587 		return ERR_PTR(-ENOMEM);
7588 
7589 	err->cmd = kzalloc(len, GFP_KERNEL);
7590 	if (!err->cmd) {
7591 		kfree(err);
7592 		return ERR_PTR(-ENOMEM);
7593 	}
7594 
7595 	return err;
7596 }
7597 
7598 static void free_tracing_log_err(struct tracing_log_err *err)
7599 {
7600 	kfree(err->cmd);
7601 	kfree(err);
7602 }
7603 
7604 static struct tracing_log_err *get_tracing_log_err(struct trace_array *tr,
7605 						   int len)
7606 {
7607 	struct tracing_log_err *err;
7608 	char *cmd;
7609 
7610 	if (tr->n_err_log_entries < TRACING_LOG_ERRS_MAX) {
7611 		err = alloc_tracing_log_err(len);
7612 		if (PTR_ERR(err) != -ENOMEM)
7613 			tr->n_err_log_entries++;
7614 
7615 		return err;
7616 	}
7617 	cmd = kzalloc(len, GFP_KERNEL);
7618 	if (!cmd)
7619 		return ERR_PTR(-ENOMEM);
7620 	err = list_first_entry(&tr->err_log, struct tracing_log_err, list);
7621 	kfree(err->cmd);
7622 	err->cmd = cmd;
7623 	list_del(&err->list);
7624 
7625 	return err;
7626 }
7627 
7628 /**
7629  * err_pos - find the position of a string within a command for error careting
7630  * @cmd: The tracing command that caused the error
7631  * @str: The string to position the caret at within @cmd
7632  *
7633  * Finds the position of the first occurrence of @str within @cmd.  The
7634  * return value can be passed to tracing_log_err() for caret placement
7635  * within @cmd.
7636  *
7637  * Returns the index within @cmd of the first occurrence of @str or 0
7638  * if @str was not found.
7639  */
7640 unsigned int err_pos(char *cmd, const char *str)
7641 {
7642 	char *found;
7643 
7644 	if (WARN_ON(!strlen(cmd)))
7645 		return 0;
7646 
7647 	found = strstr(cmd, str);
7648 	if (found)
7649 		return found - cmd;
7650 
7651 	return 0;
7652 }
7653 
7654 /**
7655  * tracing_log_err - write an error to the tracing error log
7656  * @tr: The associated trace array for the error (NULL for top level array)
7657  * @loc: A string describing where the error occurred
7658  * @cmd: The tracing command that caused the error
7659  * @errs: The array of loc-specific static error strings
7660  * @type: The index into errs[], which produces the specific static err string
7661  * @pos: The position the caret should be placed in the cmd
7662  *
7663  * Writes an error into tracing/error_log of the form:
7664  *
7665  * <loc>: error: <text>
7666  *   Command: <cmd>
7667  *              ^
7668  *
7669  * tracing/error_log is a small log file containing the last
7670  * TRACING_LOG_ERRS_MAX errors (8).  Memory for errors isn't allocated
7671  * unless there has been a tracing error, and the error log can be
7672  * cleared and have its memory freed by writing the empty string in
7673  * truncation mode to it i.e. echo > tracing/error_log.
7674  *
7675  * NOTE: the @errs array along with the @type param are used to
7676  * produce a static error string - this string is not copied and saved
7677  * when the error is logged - only a pointer to it is saved.  See
7678  * existing callers for examples of how static strings are typically
7679  * defined for use with tracing_log_err().
7680  */
7681 void tracing_log_err(struct trace_array *tr,
7682 		     const char *loc, const char *cmd,
7683 		     const char **errs, u8 type, u16 pos)
7684 {
7685 	struct tracing_log_err *err;
7686 	int len = 0;
7687 
7688 	if (!tr)
7689 		tr = &global_trace;
7690 
7691 	len += sizeof(CMD_PREFIX) + 2 * sizeof("\n") + strlen(cmd) + 1;
7692 
7693 	guard(mutex)(&tracing_err_log_lock);
7694 
7695 	err = get_tracing_log_err(tr, len);
7696 	if (PTR_ERR(err) == -ENOMEM)
7697 		return;
7698 
7699 	snprintf(err->loc, TRACING_LOG_LOC_MAX, "%s: error: ", loc);
7700 	snprintf(err->cmd, len, "\n" CMD_PREFIX "%s\n", cmd);
7701 
7702 	err->info.errs = errs;
7703 	err->info.type = type;
7704 	err->info.pos = pos;
7705 	err->info.ts = local_clock();
7706 
7707 	list_add_tail(&err->list, &tr->err_log);
7708 }
7709 
7710 static void clear_tracing_err_log(struct trace_array *tr)
7711 {
7712 	struct tracing_log_err *err, *next;
7713 
7714 	mutex_lock(&tracing_err_log_lock);
7715 	list_for_each_entry_safe(err, next, &tr->err_log, list) {
7716 		list_del(&err->list);
7717 		free_tracing_log_err(err);
7718 	}
7719 
7720 	tr->n_err_log_entries = 0;
7721 	mutex_unlock(&tracing_err_log_lock);
7722 }
7723 
7724 static void *tracing_err_log_seq_start(struct seq_file *m, loff_t *pos)
7725 {
7726 	struct trace_array *tr = m->private;
7727 
7728 	mutex_lock(&tracing_err_log_lock);
7729 
7730 	return seq_list_start(&tr->err_log, *pos);
7731 }
7732 
7733 static void *tracing_err_log_seq_next(struct seq_file *m, void *v, loff_t *pos)
7734 {
7735 	struct trace_array *tr = m->private;
7736 
7737 	return seq_list_next(v, &tr->err_log, pos);
7738 }
7739 
7740 static void tracing_err_log_seq_stop(struct seq_file *m, void *v)
7741 {
7742 	mutex_unlock(&tracing_err_log_lock);
7743 }
7744 
7745 static void tracing_err_log_show_pos(struct seq_file *m, u16 pos)
7746 {
7747 	u16 i;
7748 
7749 	for (i = 0; i < sizeof(CMD_PREFIX) - 1; i++)
7750 		seq_putc(m, ' ');
7751 	for (i = 0; i < pos; i++)
7752 		seq_putc(m, ' ');
7753 	seq_puts(m, "^\n");
7754 }
7755 
7756 static int tracing_err_log_seq_show(struct seq_file *m, void *v)
7757 {
7758 	struct tracing_log_err *err = v;
7759 
7760 	if (err) {
7761 		const char *err_text = err->info.errs[err->info.type];
7762 		u64 sec = err->info.ts;
7763 		u32 nsec;
7764 
7765 		nsec = do_div(sec, NSEC_PER_SEC);
7766 		seq_printf(m, "[%5llu.%06u] %s%s", sec, nsec / 1000,
7767 			   err->loc, err_text);
7768 		seq_printf(m, "%s", err->cmd);
7769 		tracing_err_log_show_pos(m, err->info.pos);
7770 	}
7771 
7772 	return 0;
7773 }
7774 
7775 static const struct seq_operations tracing_err_log_seq_ops = {
7776 	.start  = tracing_err_log_seq_start,
7777 	.next   = tracing_err_log_seq_next,
7778 	.stop   = tracing_err_log_seq_stop,
7779 	.show   = tracing_err_log_seq_show
7780 };
7781 
7782 static int tracing_err_log_open(struct inode *inode, struct file *file)
7783 {
7784 	struct trace_array *tr = inode->i_private;
7785 	int ret = 0;
7786 
7787 	ret = tracing_check_open_get_tr(tr);
7788 	if (ret)
7789 		return ret;
7790 
7791 	/* If this file was opened for write, then erase contents */
7792 	if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC))
7793 		clear_tracing_err_log(tr);
7794 
7795 	if (file->f_mode & FMODE_READ) {
7796 		ret = seq_open(file, &tracing_err_log_seq_ops);
7797 		if (!ret) {
7798 			struct seq_file *m = file->private_data;
7799 			m->private = tr;
7800 		} else {
7801 			trace_array_put(tr);
7802 		}
7803 	}
7804 	return ret;
7805 }
7806 
7807 static ssize_t tracing_err_log_write(struct file *file,
7808 				     const char __user *buffer,
7809 				     size_t count, loff_t *ppos)
7810 {
7811 	return count;
7812 }
7813 
7814 static int tracing_err_log_release(struct inode *inode, struct file *file)
7815 {
7816 	struct trace_array *tr = inode->i_private;
7817 
7818 	trace_array_put(tr);
7819 
7820 	if (file->f_mode & FMODE_READ)
7821 		seq_release(inode, file);
7822 
7823 	return 0;
7824 }
7825 
7826 static const struct file_operations tracing_err_log_fops = {
7827 	.open           = tracing_err_log_open,
7828 	.write		= tracing_err_log_write,
7829 	.read           = seq_read,
7830 	.llseek         = tracing_lseek,
7831 	.release        = tracing_err_log_release,
7832 };
7833 
7834 static int tracing_buffers_open(struct inode *inode, struct file *filp)
7835 {
7836 	struct trace_array *tr = inode->i_private;
7837 	struct ftrace_buffer_info *info;
7838 	int ret;
7839 
7840 	ret = tracing_check_open_get_tr(tr);
7841 	if (ret)
7842 		return ret;
7843 
7844 	info = kvzalloc(sizeof(*info), GFP_KERNEL);
7845 	if (!info) {
7846 		trace_array_put(tr);
7847 		return -ENOMEM;
7848 	}
7849 
7850 	mutex_lock(&trace_types_lock);
7851 
7852 	info->iter.tr		= tr;
7853 	info->iter.cpu_file	= tracing_get_cpu(inode);
7854 	info->iter.trace	= tr->current_trace;
7855 	info->iter.array_buffer = &tr->array_buffer;
7856 	info->spare		= NULL;
7857 	/* Force reading ring buffer for first read */
7858 	info->read		= (unsigned int)-1;
7859 
7860 	filp->private_data = info;
7861 
7862 	tr->trace_ref++;
7863 
7864 	mutex_unlock(&trace_types_lock);
7865 
7866 	ret = nonseekable_open(inode, filp);
7867 	if (ret < 0)
7868 		trace_array_put(tr);
7869 
7870 	return ret;
7871 }
7872 
7873 static __poll_t
7874 tracing_buffers_poll(struct file *filp, poll_table *poll_table)
7875 {
7876 	struct ftrace_buffer_info *info = filp->private_data;
7877 	struct trace_iterator *iter = &info->iter;
7878 
7879 	return trace_poll(iter, filp, poll_table);
7880 }
7881 
7882 static ssize_t
7883 tracing_buffers_read(struct file *filp, char __user *ubuf,
7884 		     size_t count, loff_t *ppos)
7885 {
7886 	struct ftrace_buffer_info *info = filp->private_data;
7887 	struct trace_iterator *iter = &info->iter;
7888 	void *trace_data;
7889 	int page_size;
7890 	ssize_t ret = 0;
7891 	ssize_t size;
7892 
7893 	if (!count)
7894 		return 0;
7895 
7896 #ifdef CONFIG_TRACER_MAX_TRACE
7897 	if (iter->snapshot && iter->tr->current_trace->use_max_tr)
7898 		return -EBUSY;
7899 #endif
7900 
7901 	page_size = ring_buffer_subbuf_size_get(iter->array_buffer->buffer);
7902 
7903 	/* Make sure the spare matches the current sub buffer size */
7904 	if (info->spare) {
7905 		if (page_size != info->spare_size) {
7906 			ring_buffer_free_read_page(iter->array_buffer->buffer,
7907 						   info->spare_cpu, info->spare);
7908 			info->spare = NULL;
7909 		}
7910 	}
7911 
7912 	if (!info->spare) {
7913 		info->spare = ring_buffer_alloc_read_page(iter->array_buffer->buffer,
7914 							  iter->cpu_file);
7915 		if (IS_ERR(info->spare)) {
7916 			ret = PTR_ERR(info->spare);
7917 			info->spare = NULL;
7918 		} else {
7919 			info->spare_cpu = iter->cpu_file;
7920 			info->spare_size = page_size;
7921 		}
7922 	}
7923 	if (!info->spare)
7924 		return ret;
7925 
7926 	/* Do we have previous read data to read? */
7927 	if (info->read < page_size)
7928 		goto read;
7929 
7930  again:
7931 	trace_access_lock(iter->cpu_file);
7932 	ret = ring_buffer_read_page(iter->array_buffer->buffer,
7933 				    info->spare,
7934 				    count,
7935 				    iter->cpu_file, 0);
7936 	trace_access_unlock(iter->cpu_file);
7937 
7938 	if (ret < 0) {
7939 		if (trace_empty(iter) && !iter->closed) {
7940 			if ((filp->f_flags & O_NONBLOCK))
7941 				return -EAGAIN;
7942 
7943 			ret = wait_on_pipe(iter, 0);
7944 			if (ret)
7945 				return ret;
7946 
7947 			goto again;
7948 		}
7949 		return 0;
7950 	}
7951 
7952 	info->read = 0;
7953  read:
7954 	size = page_size - info->read;
7955 	if (size > count)
7956 		size = count;
7957 	trace_data = ring_buffer_read_page_data(info->spare);
7958 	ret = copy_to_user(ubuf, trace_data + info->read, size);
7959 	if (ret == size)
7960 		return -EFAULT;
7961 
7962 	size -= ret;
7963 
7964 	*ppos += size;
7965 	info->read += size;
7966 
7967 	return size;
7968 }
7969 
7970 static int tracing_buffers_flush(struct file *file, fl_owner_t id)
7971 {
7972 	struct ftrace_buffer_info *info = file->private_data;
7973 	struct trace_iterator *iter = &info->iter;
7974 
7975 	iter->closed = true;
7976 	/* Make sure the waiters see the new wait_index */
7977 	(void)atomic_fetch_inc_release(&iter->wait_index);
7978 
7979 	ring_buffer_wake_waiters(iter->array_buffer->buffer, iter->cpu_file);
7980 
7981 	return 0;
7982 }
7983 
7984 static int tracing_buffers_release(struct inode *inode, struct file *file)
7985 {
7986 	struct ftrace_buffer_info *info = file->private_data;
7987 	struct trace_iterator *iter = &info->iter;
7988 
7989 	mutex_lock(&trace_types_lock);
7990 
7991 	iter->tr->trace_ref--;
7992 
7993 	__trace_array_put(iter->tr);
7994 
7995 	if (info->spare)
7996 		ring_buffer_free_read_page(iter->array_buffer->buffer,
7997 					   info->spare_cpu, info->spare);
7998 	kvfree(info);
7999 
8000 	mutex_unlock(&trace_types_lock);
8001 
8002 	return 0;
8003 }
8004 
8005 struct buffer_ref {
8006 	struct trace_buffer	*buffer;
8007 	void			*page;
8008 	int			cpu;
8009 	refcount_t		refcount;
8010 };
8011 
8012 static void buffer_ref_release(struct buffer_ref *ref)
8013 {
8014 	if (!refcount_dec_and_test(&ref->refcount))
8015 		return;
8016 	ring_buffer_free_read_page(ref->buffer, ref->cpu, ref->page);
8017 	kfree(ref);
8018 }
8019 
8020 static void buffer_pipe_buf_release(struct pipe_inode_info *pipe,
8021 				    struct pipe_buffer *buf)
8022 {
8023 	struct buffer_ref *ref = (struct buffer_ref *)buf->private;
8024 
8025 	buffer_ref_release(ref);
8026 	buf->private = 0;
8027 }
8028 
8029 static bool buffer_pipe_buf_get(struct pipe_inode_info *pipe,
8030 				struct pipe_buffer *buf)
8031 {
8032 	struct buffer_ref *ref = (struct buffer_ref *)buf->private;
8033 
8034 	if (refcount_read(&ref->refcount) > INT_MAX/2)
8035 		return false;
8036 
8037 	refcount_inc(&ref->refcount);
8038 	return true;
8039 }
8040 
8041 /* Pipe buffer operations for a buffer. */
8042 static const struct pipe_buf_operations buffer_pipe_buf_ops = {
8043 	.release		= buffer_pipe_buf_release,
8044 	.get			= buffer_pipe_buf_get,
8045 };
8046 
8047 /*
8048  * Callback from splice_to_pipe(), if we need to release some pages
8049  * at the end of the spd in case we error'ed out in filling the pipe.
8050  */
8051 static void buffer_spd_release(struct splice_pipe_desc *spd, unsigned int i)
8052 {
8053 	struct buffer_ref *ref =
8054 		(struct buffer_ref *)spd->partial[i].private;
8055 
8056 	buffer_ref_release(ref);
8057 	spd->partial[i].private = 0;
8058 }
8059 
8060 static ssize_t
8061 tracing_buffers_splice_read(struct file *file, loff_t *ppos,
8062 			    struct pipe_inode_info *pipe, size_t len,
8063 			    unsigned int flags)
8064 {
8065 	struct ftrace_buffer_info *info = file->private_data;
8066 	struct trace_iterator *iter = &info->iter;
8067 	struct partial_page partial_def[PIPE_DEF_BUFFERS];
8068 	struct page *pages_def[PIPE_DEF_BUFFERS];
8069 	struct splice_pipe_desc spd = {
8070 		.pages		= pages_def,
8071 		.partial	= partial_def,
8072 		.nr_pages_max	= PIPE_DEF_BUFFERS,
8073 		.ops		= &buffer_pipe_buf_ops,
8074 		.spd_release	= buffer_spd_release,
8075 	};
8076 	struct buffer_ref *ref;
8077 	bool woken = false;
8078 	int page_size;
8079 	int entries, i;
8080 	ssize_t ret = 0;
8081 
8082 #ifdef CONFIG_TRACER_MAX_TRACE
8083 	if (iter->snapshot && iter->tr->current_trace->use_max_tr)
8084 		return -EBUSY;
8085 #endif
8086 
8087 	page_size = ring_buffer_subbuf_size_get(iter->array_buffer->buffer);
8088 	if (*ppos & (page_size - 1))
8089 		return -EINVAL;
8090 
8091 	if (len & (page_size - 1)) {
8092 		if (len < page_size)
8093 			return -EINVAL;
8094 		len &= (~(page_size - 1));
8095 	}
8096 
8097 	if (splice_grow_spd(pipe, &spd))
8098 		return -ENOMEM;
8099 
8100  again:
8101 	trace_access_lock(iter->cpu_file);
8102 	entries = ring_buffer_entries_cpu(iter->array_buffer->buffer, iter->cpu_file);
8103 
8104 	for (i = 0; i < spd.nr_pages_max && len && entries; i++, len -= page_size) {
8105 		struct page *page;
8106 		int r;
8107 
8108 		ref = kzalloc(sizeof(*ref), GFP_KERNEL);
8109 		if (!ref) {
8110 			ret = -ENOMEM;
8111 			break;
8112 		}
8113 
8114 		refcount_set(&ref->refcount, 1);
8115 		ref->buffer = iter->array_buffer->buffer;
8116 		ref->page = ring_buffer_alloc_read_page(ref->buffer, iter->cpu_file);
8117 		if (IS_ERR(ref->page)) {
8118 			ret = PTR_ERR(ref->page);
8119 			ref->page = NULL;
8120 			kfree(ref);
8121 			break;
8122 		}
8123 		ref->cpu = iter->cpu_file;
8124 
8125 		r = ring_buffer_read_page(ref->buffer, ref->page,
8126 					  len, iter->cpu_file, 1);
8127 		if (r < 0) {
8128 			ring_buffer_free_read_page(ref->buffer, ref->cpu,
8129 						   ref->page);
8130 			kfree(ref);
8131 			break;
8132 		}
8133 
8134 		page = virt_to_page(ring_buffer_read_page_data(ref->page));
8135 
8136 		spd.pages[i] = page;
8137 		spd.partial[i].len = page_size;
8138 		spd.partial[i].offset = 0;
8139 		spd.partial[i].private = (unsigned long)ref;
8140 		spd.nr_pages++;
8141 		*ppos += page_size;
8142 
8143 		entries = ring_buffer_entries_cpu(iter->array_buffer->buffer, iter->cpu_file);
8144 	}
8145 
8146 	trace_access_unlock(iter->cpu_file);
8147 	spd.nr_pages = i;
8148 
8149 	/* did we read anything? */
8150 	if (!spd.nr_pages) {
8151 
8152 		if (ret)
8153 			goto out;
8154 
8155 		if (woken)
8156 			goto out;
8157 
8158 		ret = -EAGAIN;
8159 		if ((file->f_flags & O_NONBLOCK) || (flags & SPLICE_F_NONBLOCK))
8160 			goto out;
8161 
8162 		ret = wait_on_pipe(iter, iter->snapshot ? 0 : iter->tr->buffer_percent);
8163 		if (ret)
8164 			goto out;
8165 
8166 		/* No need to wait after waking up when tracing is off */
8167 		if (!tracer_tracing_is_on(iter->tr))
8168 			goto out;
8169 
8170 		/* Iterate one more time to collect any new data then exit */
8171 		woken = true;
8172 
8173 		goto again;
8174 	}
8175 
8176 	ret = splice_to_pipe(pipe, &spd);
8177 out:
8178 	splice_shrink_spd(&spd);
8179 
8180 	return ret;
8181 }
8182 
8183 static long tracing_buffers_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
8184 {
8185 	struct ftrace_buffer_info *info = file->private_data;
8186 	struct trace_iterator *iter = &info->iter;
8187 	int err;
8188 
8189 	if (cmd == TRACE_MMAP_IOCTL_GET_READER) {
8190 		if (!(file->f_flags & O_NONBLOCK)) {
8191 			err = ring_buffer_wait(iter->array_buffer->buffer,
8192 					       iter->cpu_file,
8193 					       iter->tr->buffer_percent,
8194 					       NULL, NULL);
8195 			if (err)
8196 				return err;
8197 		}
8198 
8199 		return ring_buffer_map_get_reader(iter->array_buffer->buffer,
8200 						  iter->cpu_file);
8201 	} else if (cmd) {
8202 		return -ENOTTY;
8203 	}
8204 
8205 	/*
8206 	 * An ioctl call with cmd 0 to the ring buffer file will wake up all
8207 	 * waiters
8208 	 */
8209 	mutex_lock(&trace_types_lock);
8210 
8211 	/* Make sure the waiters see the new wait_index */
8212 	(void)atomic_fetch_inc_release(&iter->wait_index);
8213 
8214 	ring_buffer_wake_waiters(iter->array_buffer->buffer, iter->cpu_file);
8215 
8216 	mutex_unlock(&trace_types_lock);
8217 	return 0;
8218 }
8219 
8220 #ifdef CONFIG_TRACER_MAX_TRACE
8221 static int get_snapshot_map(struct trace_array *tr)
8222 {
8223 	int err = 0;
8224 
8225 	/*
8226 	 * Called with mmap_lock held. lockdep would be unhappy if we would now
8227 	 * take trace_types_lock. Instead use the specific
8228 	 * snapshot_trigger_lock.
8229 	 */
8230 	spin_lock(&tr->snapshot_trigger_lock);
8231 
8232 	if (tr->snapshot || tr->mapped == UINT_MAX)
8233 		err = -EBUSY;
8234 	else
8235 		tr->mapped++;
8236 
8237 	spin_unlock(&tr->snapshot_trigger_lock);
8238 
8239 	/* Wait for update_max_tr() to observe iter->tr->mapped */
8240 	if (tr->mapped == 1)
8241 		synchronize_rcu();
8242 
8243 	return err;
8244 
8245 }
8246 static void put_snapshot_map(struct trace_array *tr)
8247 {
8248 	spin_lock(&tr->snapshot_trigger_lock);
8249 	if (!WARN_ON(!tr->mapped))
8250 		tr->mapped--;
8251 	spin_unlock(&tr->snapshot_trigger_lock);
8252 }
8253 #else
8254 static inline int get_snapshot_map(struct trace_array *tr) { return 0; }
8255 static inline void put_snapshot_map(struct trace_array *tr) { }
8256 #endif
8257 
8258 static void tracing_buffers_mmap_close(struct vm_area_struct *vma)
8259 {
8260 	struct ftrace_buffer_info *info = vma->vm_file->private_data;
8261 	struct trace_iterator *iter = &info->iter;
8262 
8263 	WARN_ON(ring_buffer_unmap(iter->array_buffer->buffer, iter->cpu_file));
8264 	put_snapshot_map(iter->tr);
8265 }
8266 
8267 static const struct vm_operations_struct tracing_buffers_vmops = {
8268 	.close		= tracing_buffers_mmap_close,
8269 };
8270 
8271 static int tracing_buffers_mmap(struct file *filp, struct vm_area_struct *vma)
8272 {
8273 	struct ftrace_buffer_info *info = filp->private_data;
8274 	struct trace_iterator *iter = &info->iter;
8275 	int ret = 0;
8276 
8277 	/* Currently the boot mapped buffer is not supported for mmap */
8278 	if (iter->tr->flags & TRACE_ARRAY_FL_BOOT)
8279 		return -ENODEV;
8280 
8281 	ret = get_snapshot_map(iter->tr);
8282 	if (ret)
8283 		return ret;
8284 
8285 	ret = ring_buffer_map(iter->array_buffer->buffer, iter->cpu_file, vma);
8286 	if (ret)
8287 		put_snapshot_map(iter->tr);
8288 
8289 	vma->vm_ops = &tracing_buffers_vmops;
8290 
8291 	return ret;
8292 }
8293 
8294 static const struct file_operations tracing_buffers_fops = {
8295 	.open		= tracing_buffers_open,
8296 	.read		= tracing_buffers_read,
8297 	.poll		= tracing_buffers_poll,
8298 	.release	= tracing_buffers_release,
8299 	.flush		= tracing_buffers_flush,
8300 	.splice_read	= tracing_buffers_splice_read,
8301 	.unlocked_ioctl = tracing_buffers_ioctl,
8302 	.mmap		= tracing_buffers_mmap,
8303 };
8304 
8305 static ssize_t
8306 tracing_stats_read(struct file *filp, char __user *ubuf,
8307 		   size_t count, loff_t *ppos)
8308 {
8309 	struct inode *inode = file_inode(filp);
8310 	struct trace_array *tr = inode->i_private;
8311 	struct array_buffer *trace_buf = &tr->array_buffer;
8312 	int cpu = tracing_get_cpu(inode);
8313 	struct trace_seq *s;
8314 	unsigned long cnt;
8315 	unsigned long long t;
8316 	unsigned long usec_rem;
8317 
8318 	s = kmalloc(sizeof(*s), GFP_KERNEL);
8319 	if (!s)
8320 		return -ENOMEM;
8321 
8322 	trace_seq_init(s);
8323 
8324 	cnt = ring_buffer_entries_cpu(trace_buf->buffer, cpu);
8325 	trace_seq_printf(s, "entries: %ld\n", cnt);
8326 
8327 	cnt = ring_buffer_overrun_cpu(trace_buf->buffer, cpu);
8328 	trace_seq_printf(s, "overrun: %ld\n", cnt);
8329 
8330 	cnt = ring_buffer_commit_overrun_cpu(trace_buf->buffer, cpu);
8331 	trace_seq_printf(s, "commit overrun: %ld\n", cnt);
8332 
8333 	cnt = ring_buffer_bytes_cpu(trace_buf->buffer, cpu);
8334 	trace_seq_printf(s, "bytes: %ld\n", cnt);
8335 
8336 	if (trace_clocks[tr->clock_id].in_ns) {
8337 		/* local or global for trace_clock */
8338 		t = ns2usecs(ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
8339 		usec_rem = do_div(t, USEC_PER_SEC);
8340 		trace_seq_printf(s, "oldest event ts: %5llu.%06lu\n",
8341 								t, usec_rem);
8342 
8343 		t = ns2usecs(ring_buffer_time_stamp(trace_buf->buffer));
8344 		usec_rem = do_div(t, USEC_PER_SEC);
8345 		trace_seq_printf(s, "now ts: %5llu.%06lu\n", t, usec_rem);
8346 	} else {
8347 		/* counter or tsc mode for trace_clock */
8348 		trace_seq_printf(s, "oldest event ts: %llu\n",
8349 				ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
8350 
8351 		trace_seq_printf(s, "now ts: %llu\n",
8352 				ring_buffer_time_stamp(trace_buf->buffer));
8353 	}
8354 
8355 	cnt = ring_buffer_dropped_events_cpu(trace_buf->buffer, cpu);
8356 	trace_seq_printf(s, "dropped events: %ld\n", cnt);
8357 
8358 	cnt = ring_buffer_read_events_cpu(trace_buf->buffer, cpu);
8359 	trace_seq_printf(s, "read events: %ld\n", cnt);
8360 
8361 	count = simple_read_from_buffer(ubuf, count, ppos,
8362 					s->buffer, trace_seq_used(s));
8363 
8364 	kfree(s);
8365 
8366 	return count;
8367 }
8368 
8369 static const struct file_operations tracing_stats_fops = {
8370 	.open		= tracing_open_generic_tr,
8371 	.read		= tracing_stats_read,
8372 	.llseek		= generic_file_llseek,
8373 	.release	= tracing_release_generic_tr,
8374 };
8375 
8376 #ifdef CONFIG_DYNAMIC_FTRACE
8377 
8378 static ssize_t
8379 tracing_read_dyn_info(struct file *filp, char __user *ubuf,
8380 		  size_t cnt, loff_t *ppos)
8381 {
8382 	ssize_t ret;
8383 	char *buf;
8384 	int r;
8385 
8386 	/* 512 should be plenty to hold the amount needed */
8387 #define DYN_INFO_BUF_SIZE	512
8388 
8389 	buf = kmalloc(DYN_INFO_BUF_SIZE, GFP_KERNEL);
8390 	if (!buf)
8391 		return -ENOMEM;
8392 
8393 	r = scnprintf(buf, DYN_INFO_BUF_SIZE,
8394 		      "%ld pages:%ld groups: %ld\n"
8395 		      "ftrace boot update time = %llu (ns)\n"
8396 		      "ftrace module total update time = %llu (ns)\n",
8397 		      ftrace_update_tot_cnt,
8398 		      ftrace_number_of_pages,
8399 		      ftrace_number_of_groups,
8400 		      ftrace_update_time,
8401 		      ftrace_total_mod_time);
8402 
8403 	ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
8404 	kfree(buf);
8405 	return ret;
8406 }
8407 
8408 static const struct file_operations tracing_dyn_info_fops = {
8409 	.open		= tracing_open_generic,
8410 	.read		= tracing_read_dyn_info,
8411 	.llseek		= generic_file_llseek,
8412 };
8413 #endif /* CONFIG_DYNAMIC_FTRACE */
8414 
8415 #if defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE)
8416 static void
8417 ftrace_snapshot(unsigned long ip, unsigned long parent_ip,
8418 		struct trace_array *tr, struct ftrace_probe_ops *ops,
8419 		void *data)
8420 {
8421 	tracing_snapshot_instance(tr);
8422 }
8423 
8424 static void
8425 ftrace_count_snapshot(unsigned long ip, unsigned long parent_ip,
8426 		      struct trace_array *tr, struct ftrace_probe_ops *ops,
8427 		      void *data)
8428 {
8429 	struct ftrace_func_mapper *mapper = data;
8430 	long *count = NULL;
8431 
8432 	if (mapper)
8433 		count = (long *)ftrace_func_mapper_find_ip(mapper, ip);
8434 
8435 	if (count) {
8436 
8437 		if (*count <= 0)
8438 			return;
8439 
8440 		(*count)--;
8441 	}
8442 
8443 	tracing_snapshot_instance(tr);
8444 }
8445 
8446 static int
8447 ftrace_snapshot_print(struct seq_file *m, unsigned long ip,
8448 		      struct ftrace_probe_ops *ops, void *data)
8449 {
8450 	struct ftrace_func_mapper *mapper = data;
8451 	long *count = NULL;
8452 
8453 	seq_printf(m, "%ps:", (void *)ip);
8454 
8455 	seq_puts(m, "snapshot");
8456 
8457 	if (mapper)
8458 		count = (long *)ftrace_func_mapper_find_ip(mapper, ip);
8459 
8460 	if (count)
8461 		seq_printf(m, ":count=%ld\n", *count);
8462 	else
8463 		seq_puts(m, ":unlimited\n");
8464 
8465 	return 0;
8466 }
8467 
8468 static int
8469 ftrace_snapshot_init(struct ftrace_probe_ops *ops, struct trace_array *tr,
8470 		     unsigned long ip, void *init_data, void **data)
8471 {
8472 	struct ftrace_func_mapper *mapper = *data;
8473 
8474 	if (!mapper) {
8475 		mapper = allocate_ftrace_func_mapper();
8476 		if (!mapper)
8477 			return -ENOMEM;
8478 		*data = mapper;
8479 	}
8480 
8481 	return ftrace_func_mapper_add_ip(mapper, ip, init_data);
8482 }
8483 
8484 static void
8485 ftrace_snapshot_free(struct ftrace_probe_ops *ops, struct trace_array *tr,
8486 		     unsigned long ip, void *data)
8487 {
8488 	struct ftrace_func_mapper *mapper = data;
8489 
8490 	if (!ip) {
8491 		if (!mapper)
8492 			return;
8493 		free_ftrace_func_mapper(mapper, NULL);
8494 		return;
8495 	}
8496 
8497 	ftrace_func_mapper_remove_ip(mapper, ip);
8498 }
8499 
8500 static struct ftrace_probe_ops snapshot_probe_ops = {
8501 	.func			= ftrace_snapshot,
8502 	.print			= ftrace_snapshot_print,
8503 };
8504 
8505 static struct ftrace_probe_ops snapshot_count_probe_ops = {
8506 	.func			= ftrace_count_snapshot,
8507 	.print			= ftrace_snapshot_print,
8508 	.init			= ftrace_snapshot_init,
8509 	.free			= ftrace_snapshot_free,
8510 };
8511 
8512 static int
8513 ftrace_trace_snapshot_callback(struct trace_array *tr, struct ftrace_hash *hash,
8514 			       char *glob, char *cmd, char *param, int enable)
8515 {
8516 	struct ftrace_probe_ops *ops;
8517 	void *count = (void *)-1;
8518 	char *number;
8519 	int ret;
8520 
8521 	if (!tr)
8522 		return -ENODEV;
8523 
8524 	/* hash funcs only work with set_ftrace_filter */
8525 	if (!enable)
8526 		return -EINVAL;
8527 
8528 	ops = param ? &snapshot_count_probe_ops :  &snapshot_probe_ops;
8529 
8530 	if (glob[0] == '!') {
8531 		ret = unregister_ftrace_function_probe_func(glob+1, tr, ops);
8532 		if (!ret)
8533 			tracing_disarm_snapshot(tr);
8534 
8535 		return ret;
8536 	}
8537 
8538 	if (!param)
8539 		goto out_reg;
8540 
8541 	number = strsep(&param, ":");
8542 
8543 	if (!strlen(number))
8544 		goto out_reg;
8545 
8546 	/*
8547 	 * We use the callback data field (which is a pointer)
8548 	 * as our counter.
8549 	 */
8550 	ret = kstrtoul(number, 0, (unsigned long *)&count);
8551 	if (ret)
8552 		return ret;
8553 
8554  out_reg:
8555 	ret = tracing_arm_snapshot(tr);
8556 	if (ret < 0)
8557 		goto out;
8558 
8559 	ret = register_ftrace_function_probe(glob, tr, ops, count);
8560 	if (ret < 0)
8561 		tracing_disarm_snapshot(tr);
8562  out:
8563 	return ret < 0 ? ret : 0;
8564 }
8565 
8566 static struct ftrace_func_command ftrace_snapshot_cmd = {
8567 	.name			= "snapshot",
8568 	.func			= ftrace_trace_snapshot_callback,
8569 };
8570 
8571 static __init int register_snapshot_cmd(void)
8572 {
8573 	return register_ftrace_command(&ftrace_snapshot_cmd);
8574 }
8575 #else
8576 static inline __init int register_snapshot_cmd(void) { return 0; }
8577 #endif /* defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE) */
8578 
8579 static struct dentry *tracing_get_dentry(struct trace_array *tr)
8580 {
8581 	if (WARN_ON(!tr->dir))
8582 		return ERR_PTR(-ENODEV);
8583 
8584 	/* Top directory uses NULL as the parent */
8585 	if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
8586 		return NULL;
8587 
8588 	/* All sub buffers have a descriptor */
8589 	return tr->dir;
8590 }
8591 
8592 static struct dentry *tracing_dentry_percpu(struct trace_array *tr, int cpu)
8593 {
8594 	struct dentry *d_tracer;
8595 
8596 	if (tr->percpu_dir)
8597 		return tr->percpu_dir;
8598 
8599 	d_tracer = tracing_get_dentry(tr);
8600 	if (IS_ERR(d_tracer))
8601 		return NULL;
8602 
8603 	tr->percpu_dir = tracefs_create_dir("per_cpu", d_tracer);
8604 
8605 	MEM_FAIL(!tr->percpu_dir,
8606 		  "Could not create tracefs directory 'per_cpu/%d'\n", cpu);
8607 
8608 	return tr->percpu_dir;
8609 }
8610 
8611 static struct dentry *
8612 trace_create_cpu_file(const char *name, umode_t mode, struct dentry *parent,
8613 		      void *data, long cpu, const struct file_operations *fops)
8614 {
8615 	struct dentry *ret = trace_create_file(name, mode, parent, data, fops);
8616 
8617 	if (ret) /* See tracing_get_cpu() */
8618 		d_inode(ret)->i_cdev = (void *)(cpu + 1);
8619 	return ret;
8620 }
8621 
8622 static void
8623 tracing_init_tracefs_percpu(struct trace_array *tr, long cpu)
8624 {
8625 	struct dentry *d_percpu = tracing_dentry_percpu(tr, cpu);
8626 	struct dentry *d_cpu;
8627 	char cpu_dir[30]; /* 30 characters should be more than enough */
8628 
8629 	if (!d_percpu)
8630 		return;
8631 
8632 	snprintf(cpu_dir, 30, "cpu%ld", cpu);
8633 	d_cpu = tracefs_create_dir(cpu_dir, d_percpu);
8634 	if (!d_cpu) {
8635 		pr_warn("Could not create tracefs '%s' entry\n", cpu_dir);
8636 		return;
8637 	}
8638 
8639 	/* per cpu trace_pipe */
8640 	trace_create_cpu_file("trace_pipe", TRACE_MODE_READ, d_cpu,
8641 				tr, cpu, &tracing_pipe_fops);
8642 
8643 	/* per cpu trace */
8644 	trace_create_cpu_file("trace", TRACE_MODE_WRITE, d_cpu,
8645 				tr, cpu, &tracing_fops);
8646 
8647 	trace_create_cpu_file("trace_pipe_raw", TRACE_MODE_READ, d_cpu,
8648 				tr, cpu, &tracing_buffers_fops);
8649 
8650 	trace_create_cpu_file("stats", TRACE_MODE_READ, d_cpu,
8651 				tr, cpu, &tracing_stats_fops);
8652 
8653 	trace_create_cpu_file("buffer_size_kb", TRACE_MODE_READ, d_cpu,
8654 				tr, cpu, &tracing_entries_fops);
8655 
8656 	if (tr->range_addr_start)
8657 		trace_create_cpu_file("buffer_meta", TRACE_MODE_READ, d_cpu,
8658 				      tr, cpu, &tracing_buffer_meta_fops);
8659 #ifdef CONFIG_TRACER_SNAPSHOT
8660 	if (!tr->range_addr_start) {
8661 		trace_create_cpu_file("snapshot", TRACE_MODE_WRITE, d_cpu,
8662 				      tr, cpu, &snapshot_fops);
8663 
8664 		trace_create_cpu_file("snapshot_raw", TRACE_MODE_READ, d_cpu,
8665 				      tr, cpu, &snapshot_raw_fops);
8666 	}
8667 #endif
8668 }
8669 
8670 #ifdef CONFIG_FTRACE_SELFTEST
8671 /* Let selftest have access to static functions in this file */
8672 #include "trace_selftest.c"
8673 #endif
8674 
8675 static ssize_t
8676 trace_options_read(struct file *filp, char __user *ubuf, size_t cnt,
8677 			loff_t *ppos)
8678 {
8679 	struct trace_option_dentry *topt = filp->private_data;
8680 	char *buf;
8681 
8682 	if (topt->flags->val & topt->opt->bit)
8683 		buf = "1\n";
8684 	else
8685 		buf = "0\n";
8686 
8687 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
8688 }
8689 
8690 static ssize_t
8691 trace_options_write(struct file *filp, const char __user *ubuf, size_t cnt,
8692 			 loff_t *ppos)
8693 {
8694 	struct trace_option_dentry *topt = filp->private_data;
8695 	unsigned long val;
8696 	int ret;
8697 
8698 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
8699 	if (ret)
8700 		return ret;
8701 
8702 	if (val != 0 && val != 1)
8703 		return -EINVAL;
8704 
8705 	if (!!(topt->flags->val & topt->opt->bit) != val) {
8706 		mutex_lock(&trace_types_lock);
8707 		ret = __set_tracer_option(topt->tr, topt->flags,
8708 					  topt->opt, !val);
8709 		mutex_unlock(&trace_types_lock);
8710 		if (ret)
8711 			return ret;
8712 	}
8713 
8714 	*ppos += cnt;
8715 
8716 	return cnt;
8717 }
8718 
8719 static int tracing_open_options(struct inode *inode, struct file *filp)
8720 {
8721 	struct trace_option_dentry *topt = inode->i_private;
8722 	int ret;
8723 
8724 	ret = tracing_check_open_get_tr(topt->tr);
8725 	if (ret)
8726 		return ret;
8727 
8728 	filp->private_data = inode->i_private;
8729 	return 0;
8730 }
8731 
8732 static int tracing_release_options(struct inode *inode, struct file *file)
8733 {
8734 	struct trace_option_dentry *topt = file->private_data;
8735 
8736 	trace_array_put(topt->tr);
8737 	return 0;
8738 }
8739 
8740 static const struct file_operations trace_options_fops = {
8741 	.open = tracing_open_options,
8742 	.read = trace_options_read,
8743 	.write = trace_options_write,
8744 	.llseek	= generic_file_llseek,
8745 	.release = tracing_release_options,
8746 };
8747 
8748 /*
8749  * In order to pass in both the trace_array descriptor as well as the index
8750  * to the flag that the trace option file represents, the trace_array
8751  * has a character array of trace_flags_index[], which holds the index
8752  * of the bit for the flag it represents. index[0] == 0, index[1] == 1, etc.
8753  * The address of this character array is passed to the flag option file
8754  * read/write callbacks.
8755  *
8756  * In order to extract both the index and the trace_array descriptor,
8757  * get_tr_index() uses the following algorithm.
8758  *
8759  *   idx = *ptr;
8760  *
8761  * As the pointer itself contains the address of the index (remember
8762  * index[1] == 1).
8763  *
8764  * Then to get the trace_array descriptor, by subtracting that index
8765  * from the ptr, we get to the start of the index itself.
8766  *
8767  *   ptr - idx == &index[0]
8768  *
8769  * Then a simple container_of() from that pointer gets us to the
8770  * trace_array descriptor.
8771  */
8772 static void get_tr_index(void *data, struct trace_array **ptr,
8773 			 unsigned int *pindex)
8774 {
8775 	*pindex = *(unsigned char *)data;
8776 
8777 	*ptr = container_of(data - *pindex, struct trace_array,
8778 			    trace_flags_index);
8779 }
8780 
8781 static ssize_t
8782 trace_options_core_read(struct file *filp, char __user *ubuf, size_t cnt,
8783 			loff_t *ppos)
8784 {
8785 	void *tr_index = filp->private_data;
8786 	struct trace_array *tr;
8787 	unsigned int index;
8788 	char *buf;
8789 
8790 	get_tr_index(tr_index, &tr, &index);
8791 
8792 	if (tr->trace_flags & (1 << index))
8793 		buf = "1\n";
8794 	else
8795 		buf = "0\n";
8796 
8797 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
8798 }
8799 
8800 static ssize_t
8801 trace_options_core_write(struct file *filp, const char __user *ubuf, size_t cnt,
8802 			 loff_t *ppos)
8803 {
8804 	void *tr_index = filp->private_data;
8805 	struct trace_array *tr;
8806 	unsigned int index;
8807 	unsigned long val;
8808 	int ret;
8809 
8810 	get_tr_index(tr_index, &tr, &index);
8811 
8812 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
8813 	if (ret)
8814 		return ret;
8815 
8816 	if (val != 0 && val != 1)
8817 		return -EINVAL;
8818 
8819 	mutex_lock(&event_mutex);
8820 	mutex_lock(&trace_types_lock);
8821 	ret = set_tracer_flag(tr, 1 << index, val);
8822 	mutex_unlock(&trace_types_lock);
8823 	mutex_unlock(&event_mutex);
8824 
8825 	if (ret < 0)
8826 		return ret;
8827 
8828 	*ppos += cnt;
8829 
8830 	return cnt;
8831 }
8832 
8833 static const struct file_operations trace_options_core_fops = {
8834 	.open = tracing_open_generic,
8835 	.read = trace_options_core_read,
8836 	.write = trace_options_core_write,
8837 	.llseek = generic_file_llseek,
8838 };
8839 
8840 struct dentry *trace_create_file(const char *name,
8841 				 umode_t mode,
8842 				 struct dentry *parent,
8843 				 void *data,
8844 				 const struct file_operations *fops)
8845 {
8846 	struct dentry *ret;
8847 
8848 	ret = tracefs_create_file(name, mode, parent, data, fops);
8849 	if (!ret)
8850 		pr_warn("Could not create tracefs '%s' entry\n", name);
8851 
8852 	return ret;
8853 }
8854 
8855 
8856 static struct dentry *trace_options_init_dentry(struct trace_array *tr)
8857 {
8858 	struct dentry *d_tracer;
8859 
8860 	if (tr->options)
8861 		return tr->options;
8862 
8863 	d_tracer = tracing_get_dentry(tr);
8864 	if (IS_ERR(d_tracer))
8865 		return NULL;
8866 
8867 	tr->options = tracefs_create_dir("options", d_tracer);
8868 	if (!tr->options) {
8869 		pr_warn("Could not create tracefs directory 'options'\n");
8870 		return NULL;
8871 	}
8872 
8873 	return tr->options;
8874 }
8875 
8876 static void
8877 create_trace_option_file(struct trace_array *tr,
8878 			 struct trace_option_dentry *topt,
8879 			 struct tracer_flags *flags,
8880 			 struct tracer_opt *opt)
8881 {
8882 	struct dentry *t_options;
8883 
8884 	t_options = trace_options_init_dentry(tr);
8885 	if (!t_options)
8886 		return;
8887 
8888 	topt->flags = flags;
8889 	topt->opt = opt;
8890 	topt->tr = tr;
8891 
8892 	topt->entry = trace_create_file(opt->name, TRACE_MODE_WRITE,
8893 					t_options, topt, &trace_options_fops);
8894 
8895 }
8896 
8897 static void
8898 create_trace_option_files(struct trace_array *tr, struct tracer *tracer)
8899 {
8900 	struct trace_option_dentry *topts;
8901 	struct trace_options *tr_topts;
8902 	struct tracer_flags *flags;
8903 	struct tracer_opt *opts;
8904 	int cnt;
8905 	int i;
8906 
8907 	if (!tracer)
8908 		return;
8909 
8910 	flags = tracer->flags;
8911 
8912 	if (!flags || !flags->opts)
8913 		return;
8914 
8915 	/*
8916 	 * If this is an instance, only create flags for tracers
8917 	 * the instance may have.
8918 	 */
8919 	if (!trace_ok_for_array(tracer, tr))
8920 		return;
8921 
8922 	for (i = 0; i < tr->nr_topts; i++) {
8923 		/* Make sure there's no duplicate flags. */
8924 		if (WARN_ON_ONCE(tr->topts[i].tracer->flags == tracer->flags))
8925 			return;
8926 	}
8927 
8928 	opts = flags->opts;
8929 
8930 	for (cnt = 0; opts[cnt].name; cnt++)
8931 		;
8932 
8933 	topts = kcalloc(cnt + 1, sizeof(*topts), GFP_KERNEL);
8934 	if (!topts)
8935 		return;
8936 
8937 	tr_topts = krealloc(tr->topts, sizeof(*tr->topts) * (tr->nr_topts + 1),
8938 			    GFP_KERNEL);
8939 	if (!tr_topts) {
8940 		kfree(topts);
8941 		return;
8942 	}
8943 
8944 	tr->topts = tr_topts;
8945 	tr->topts[tr->nr_topts].tracer = tracer;
8946 	tr->topts[tr->nr_topts].topts = topts;
8947 	tr->nr_topts++;
8948 
8949 	for (cnt = 0; opts[cnt].name; cnt++) {
8950 		create_trace_option_file(tr, &topts[cnt], flags,
8951 					 &opts[cnt]);
8952 		MEM_FAIL(topts[cnt].entry == NULL,
8953 			  "Failed to create trace option: %s",
8954 			  opts[cnt].name);
8955 	}
8956 }
8957 
8958 static struct dentry *
8959 create_trace_option_core_file(struct trace_array *tr,
8960 			      const char *option, long index)
8961 {
8962 	struct dentry *t_options;
8963 
8964 	t_options = trace_options_init_dentry(tr);
8965 	if (!t_options)
8966 		return NULL;
8967 
8968 	return trace_create_file(option, TRACE_MODE_WRITE, t_options,
8969 				 (void *)&tr->trace_flags_index[index],
8970 				 &trace_options_core_fops);
8971 }
8972 
8973 static void create_trace_options_dir(struct trace_array *tr)
8974 {
8975 	struct dentry *t_options;
8976 	bool top_level = tr == &global_trace;
8977 	int i;
8978 
8979 	t_options = trace_options_init_dentry(tr);
8980 	if (!t_options)
8981 		return;
8982 
8983 	for (i = 0; trace_options[i]; i++) {
8984 		if (top_level ||
8985 		    !((1 << i) & TOP_LEVEL_TRACE_FLAGS))
8986 			create_trace_option_core_file(tr, trace_options[i], i);
8987 	}
8988 }
8989 
8990 static ssize_t
8991 rb_simple_read(struct file *filp, char __user *ubuf,
8992 	       size_t cnt, loff_t *ppos)
8993 {
8994 	struct trace_array *tr = filp->private_data;
8995 	char buf[64];
8996 	int r;
8997 
8998 	r = tracer_tracing_is_on(tr);
8999 	r = sprintf(buf, "%d\n", r);
9000 
9001 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
9002 }
9003 
9004 static ssize_t
9005 rb_simple_write(struct file *filp, const char __user *ubuf,
9006 		size_t cnt, loff_t *ppos)
9007 {
9008 	struct trace_array *tr = filp->private_data;
9009 	struct trace_buffer *buffer = tr->array_buffer.buffer;
9010 	unsigned long val;
9011 	int ret;
9012 
9013 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
9014 	if (ret)
9015 		return ret;
9016 
9017 	if (buffer) {
9018 		mutex_lock(&trace_types_lock);
9019 		if (!!val == tracer_tracing_is_on(tr)) {
9020 			val = 0; /* do nothing */
9021 		} else if (val) {
9022 			tracer_tracing_on(tr);
9023 			if (tr->current_trace->start)
9024 				tr->current_trace->start(tr);
9025 		} else {
9026 			tracer_tracing_off(tr);
9027 			if (tr->current_trace->stop)
9028 				tr->current_trace->stop(tr);
9029 			/* Wake up any waiters */
9030 			ring_buffer_wake_waiters(buffer, RING_BUFFER_ALL_CPUS);
9031 		}
9032 		mutex_unlock(&trace_types_lock);
9033 	}
9034 
9035 	(*ppos)++;
9036 
9037 	return cnt;
9038 }
9039 
9040 static const struct file_operations rb_simple_fops = {
9041 	.open		= tracing_open_generic_tr,
9042 	.read		= rb_simple_read,
9043 	.write		= rb_simple_write,
9044 	.release	= tracing_release_generic_tr,
9045 	.llseek		= default_llseek,
9046 };
9047 
9048 static ssize_t
9049 buffer_percent_read(struct file *filp, char __user *ubuf,
9050 		    size_t cnt, loff_t *ppos)
9051 {
9052 	struct trace_array *tr = filp->private_data;
9053 	char buf[64];
9054 	int r;
9055 
9056 	r = tr->buffer_percent;
9057 	r = sprintf(buf, "%d\n", r);
9058 
9059 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
9060 }
9061 
9062 static ssize_t
9063 buffer_percent_write(struct file *filp, const char __user *ubuf,
9064 		     size_t cnt, loff_t *ppos)
9065 {
9066 	struct trace_array *tr = filp->private_data;
9067 	unsigned long val;
9068 	int ret;
9069 
9070 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
9071 	if (ret)
9072 		return ret;
9073 
9074 	if (val > 100)
9075 		return -EINVAL;
9076 
9077 	tr->buffer_percent = val;
9078 
9079 	(*ppos)++;
9080 
9081 	return cnt;
9082 }
9083 
9084 static const struct file_operations buffer_percent_fops = {
9085 	.open		= tracing_open_generic_tr,
9086 	.read		= buffer_percent_read,
9087 	.write		= buffer_percent_write,
9088 	.release	= tracing_release_generic_tr,
9089 	.llseek		= default_llseek,
9090 };
9091 
9092 static ssize_t
9093 buffer_subbuf_size_read(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos)
9094 {
9095 	struct trace_array *tr = filp->private_data;
9096 	size_t size;
9097 	char buf[64];
9098 	int order;
9099 	int r;
9100 
9101 	order = ring_buffer_subbuf_order_get(tr->array_buffer.buffer);
9102 	size = (PAGE_SIZE << order) / 1024;
9103 
9104 	r = sprintf(buf, "%zd\n", size);
9105 
9106 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
9107 }
9108 
9109 static ssize_t
9110 buffer_subbuf_size_write(struct file *filp, const char __user *ubuf,
9111 			 size_t cnt, loff_t *ppos)
9112 {
9113 	struct trace_array *tr = filp->private_data;
9114 	unsigned long val;
9115 	int old_order;
9116 	int order;
9117 	int pages;
9118 	int ret;
9119 
9120 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
9121 	if (ret)
9122 		return ret;
9123 
9124 	val *= 1024; /* value passed in is in KB */
9125 
9126 	pages = DIV_ROUND_UP(val, PAGE_SIZE);
9127 	order = fls(pages - 1);
9128 
9129 	/* limit between 1 and 128 system pages */
9130 	if (order < 0 || order > 7)
9131 		return -EINVAL;
9132 
9133 	/* Do not allow tracing while changing the order of the ring buffer */
9134 	tracing_stop_tr(tr);
9135 
9136 	old_order = ring_buffer_subbuf_order_get(tr->array_buffer.buffer);
9137 	if (old_order == order)
9138 		goto out;
9139 
9140 	ret = ring_buffer_subbuf_order_set(tr->array_buffer.buffer, order);
9141 	if (ret)
9142 		goto out;
9143 
9144 #ifdef CONFIG_TRACER_MAX_TRACE
9145 
9146 	if (!tr->allocated_snapshot)
9147 		goto out_max;
9148 
9149 	ret = ring_buffer_subbuf_order_set(tr->max_buffer.buffer, order);
9150 	if (ret) {
9151 		/* Put back the old order */
9152 		cnt = ring_buffer_subbuf_order_set(tr->array_buffer.buffer, old_order);
9153 		if (WARN_ON_ONCE(cnt)) {
9154 			/*
9155 			 * AARGH! We are left with different orders!
9156 			 * The max buffer is our "snapshot" buffer.
9157 			 * When a tracer needs a snapshot (one of the
9158 			 * latency tracers), it swaps the max buffer
9159 			 * with the saved snap shot. We succeeded to
9160 			 * update the order of the main buffer, but failed to
9161 			 * update the order of the max buffer. But when we tried
9162 			 * to reset the main buffer to the original size, we
9163 			 * failed there too. This is very unlikely to
9164 			 * happen, but if it does, warn and kill all
9165 			 * tracing.
9166 			 */
9167 			tracing_disabled = 1;
9168 		}
9169 		goto out;
9170 	}
9171  out_max:
9172 #endif
9173 	(*ppos)++;
9174  out:
9175 	if (ret)
9176 		cnt = ret;
9177 	tracing_start_tr(tr);
9178 	return cnt;
9179 }
9180 
9181 static const struct file_operations buffer_subbuf_size_fops = {
9182 	.open		= tracing_open_generic_tr,
9183 	.read		= buffer_subbuf_size_read,
9184 	.write		= buffer_subbuf_size_write,
9185 	.release	= tracing_release_generic_tr,
9186 	.llseek		= default_llseek,
9187 };
9188 
9189 static struct dentry *trace_instance_dir;
9190 
9191 static void
9192 init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer);
9193 
9194 static int
9195 allocate_trace_buffer(struct trace_array *tr, struct array_buffer *buf, int size)
9196 {
9197 	enum ring_buffer_flags rb_flags;
9198 
9199 	rb_flags = tr->trace_flags & TRACE_ITER_OVERWRITE ? RB_FL_OVERWRITE : 0;
9200 
9201 	buf->tr = tr;
9202 
9203 	if (tr->range_addr_start && tr->range_addr_size) {
9204 		buf->buffer = ring_buffer_alloc_range(size, rb_flags, 0,
9205 						      tr->range_addr_start,
9206 						      tr->range_addr_size);
9207 
9208 		ring_buffer_last_boot_delta(buf->buffer,
9209 					    &tr->text_delta, &tr->data_delta);
9210 		/*
9211 		 * This is basically the same as a mapped buffer,
9212 		 * with the same restrictions.
9213 		 */
9214 		tr->mapped++;
9215 	} else {
9216 		buf->buffer = ring_buffer_alloc(size, rb_flags);
9217 	}
9218 	if (!buf->buffer)
9219 		return -ENOMEM;
9220 
9221 	buf->data = alloc_percpu(struct trace_array_cpu);
9222 	if (!buf->data) {
9223 		ring_buffer_free(buf->buffer);
9224 		buf->buffer = NULL;
9225 		return -ENOMEM;
9226 	}
9227 
9228 	/* Allocate the first page for all buffers */
9229 	set_buffer_entries(&tr->array_buffer,
9230 			   ring_buffer_size(tr->array_buffer.buffer, 0));
9231 
9232 	return 0;
9233 }
9234 
9235 static void free_trace_buffer(struct array_buffer *buf)
9236 {
9237 	if (buf->buffer) {
9238 		ring_buffer_free(buf->buffer);
9239 		buf->buffer = NULL;
9240 		free_percpu(buf->data);
9241 		buf->data = NULL;
9242 	}
9243 }
9244 
9245 static int allocate_trace_buffers(struct trace_array *tr, int size)
9246 {
9247 	int ret;
9248 
9249 	ret = allocate_trace_buffer(tr, &tr->array_buffer, size);
9250 	if (ret)
9251 		return ret;
9252 
9253 #ifdef CONFIG_TRACER_MAX_TRACE
9254 	/* Fix mapped buffer trace arrays do not have snapshot buffers */
9255 	if (tr->range_addr_start)
9256 		return 0;
9257 
9258 	ret = allocate_trace_buffer(tr, &tr->max_buffer,
9259 				    allocate_snapshot ? size : 1);
9260 	if (MEM_FAIL(ret, "Failed to allocate trace buffer\n")) {
9261 		free_trace_buffer(&tr->array_buffer);
9262 		return -ENOMEM;
9263 	}
9264 	tr->allocated_snapshot = allocate_snapshot;
9265 
9266 	allocate_snapshot = false;
9267 #endif
9268 
9269 	return 0;
9270 }
9271 
9272 static void free_trace_buffers(struct trace_array *tr)
9273 {
9274 	if (!tr)
9275 		return;
9276 
9277 	free_trace_buffer(&tr->array_buffer);
9278 
9279 #ifdef CONFIG_TRACER_MAX_TRACE
9280 	free_trace_buffer(&tr->max_buffer);
9281 #endif
9282 }
9283 
9284 static void init_trace_flags_index(struct trace_array *tr)
9285 {
9286 	int i;
9287 
9288 	/* Used by the trace options files */
9289 	for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++)
9290 		tr->trace_flags_index[i] = i;
9291 }
9292 
9293 static void __update_tracer_options(struct trace_array *tr)
9294 {
9295 	struct tracer *t;
9296 
9297 	for (t = trace_types; t; t = t->next)
9298 		add_tracer_options(tr, t);
9299 }
9300 
9301 static void update_tracer_options(struct trace_array *tr)
9302 {
9303 	mutex_lock(&trace_types_lock);
9304 	tracer_options_updated = true;
9305 	__update_tracer_options(tr);
9306 	mutex_unlock(&trace_types_lock);
9307 }
9308 
9309 /* Must have trace_types_lock held */
9310 struct trace_array *trace_array_find(const char *instance)
9311 {
9312 	struct trace_array *tr, *found = NULL;
9313 
9314 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
9315 		if (tr->name && strcmp(tr->name, instance) == 0) {
9316 			found = tr;
9317 			break;
9318 		}
9319 	}
9320 
9321 	return found;
9322 }
9323 
9324 struct trace_array *trace_array_find_get(const char *instance)
9325 {
9326 	struct trace_array *tr;
9327 
9328 	mutex_lock(&trace_types_lock);
9329 	tr = trace_array_find(instance);
9330 	if (tr)
9331 		tr->ref++;
9332 	mutex_unlock(&trace_types_lock);
9333 
9334 	return tr;
9335 }
9336 
9337 static int trace_array_create_dir(struct trace_array *tr)
9338 {
9339 	int ret;
9340 
9341 	tr->dir = tracefs_create_dir(tr->name, trace_instance_dir);
9342 	if (!tr->dir)
9343 		return -EINVAL;
9344 
9345 	ret = event_trace_add_tracer(tr->dir, tr);
9346 	if (ret) {
9347 		tracefs_remove(tr->dir);
9348 		return ret;
9349 	}
9350 
9351 	init_tracer_tracefs(tr, tr->dir);
9352 	__update_tracer_options(tr);
9353 
9354 	return ret;
9355 }
9356 
9357 static struct trace_array *
9358 trace_array_create_systems(const char *name, const char *systems,
9359 			   unsigned long range_addr_start,
9360 			   unsigned long range_addr_size)
9361 {
9362 	struct trace_array *tr;
9363 	int ret;
9364 
9365 	ret = -ENOMEM;
9366 	tr = kzalloc(sizeof(*tr), GFP_KERNEL);
9367 	if (!tr)
9368 		return ERR_PTR(ret);
9369 
9370 	tr->name = kstrdup(name, GFP_KERNEL);
9371 	if (!tr->name)
9372 		goto out_free_tr;
9373 
9374 	if (!alloc_cpumask_var(&tr->tracing_cpumask, GFP_KERNEL))
9375 		goto out_free_tr;
9376 
9377 	if (!zalloc_cpumask_var(&tr->pipe_cpumask, GFP_KERNEL))
9378 		goto out_free_tr;
9379 
9380 	if (systems) {
9381 		tr->system_names = kstrdup_const(systems, GFP_KERNEL);
9382 		if (!tr->system_names)
9383 			goto out_free_tr;
9384 	}
9385 
9386 	/* Only for boot up memory mapped ring buffers */
9387 	tr->range_addr_start = range_addr_start;
9388 	tr->range_addr_size = range_addr_size;
9389 
9390 	tr->trace_flags = global_trace.trace_flags & ~ZEROED_TRACE_FLAGS;
9391 
9392 	cpumask_copy(tr->tracing_cpumask, cpu_all_mask);
9393 
9394 	raw_spin_lock_init(&tr->start_lock);
9395 
9396 	tr->max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
9397 #ifdef CONFIG_TRACER_MAX_TRACE
9398 	spin_lock_init(&tr->snapshot_trigger_lock);
9399 #endif
9400 	tr->current_trace = &nop_trace;
9401 
9402 	INIT_LIST_HEAD(&tr->systems);
9403 	INIT_LIST_HEAD(&tr->events);
9404 	INIT_LIST_HEAD(&tr->hist_vars);
9405 	INIT_LIST_HEAD(&tr->err_log);
9406 
9407 #ifdef CONFIG_MODULES
9408 	INIT_LIST_HEAD(&tr->mod_events);
9409 #endif
9410 
9411 	if (allocate_trace_buffers(tr, trace_buf_size) < 0)
9412 		goto out_free_tr;
9413 
9414 	/* The ring buffer is defaultly expanded */
9415 	trace_set_ring_buffer_expanded(tr);
9416 
9417 	if (ftrace_allocate_ftrace_ops(tr) < 0)
9418 		goto out_free_tr;
9419 
9420 	ftrace_init_trace_array(tr);
9421 
9422 	init_trace_flags_index(tr);
9423 
9424 	if (trace_instance_dir) {
9425 		ret = trace_array_create_dir(tr);
9426 		if (ret)
9427 			goto out_free_tr;
9428 	} else
9429 		__trace_early_add_events(tr);
9430 
9431 	list_add(&tr->list, &ftrace_trace_arrays);
9432 
9433 	tr->ref++;
9434 
9435 	return tr;
9436 
9437  out_free_tr:
9438 	ftrace_free_ftrace_ops(tr);
9439 	free_trace_buffers(tr);
9440 	free_cpumask_var(tr->pipe_cpumask);
9441 	free_cpumask_var(tr->tracing_cpumask);
9442 	kfree_const(tr->system_names);
9443 	kfree(tr->name);
9444 	kfree(tr);
9445 
9446 	return ERR_PTR(ret);
9447 }
9448 
9449 static struct trace_array *trace_array_create(const char *name)
9450 {
9451 	return trace_array_create_systems(name, NULL, 0, 0);
9452 }
9453 
9454 static int instance_mkdir(const char *name)
9455 {
9456 	struct trace_array *tr;
9457 	int ret;
9458 
9459 	guard(mutex)(&event_mutex);
9460 	guard(mutex)(&trace_types_lock);
9461 
9462 	ret = -EEXIST;
9463 	if (trace_array_find(name))
9464 		return -EEXIST;
9465 
9466 	tr = trace_array_create(name);
9467 
9468 	ret = PTR_ERR_OR_ZERO(tr);
9469 
9470 	return ret;
9471 }
9472 
9473 static u64 map_pages(u64 start, u64 size)
9474 {
9475 	struct page **pages;
9476 	phys_addr_t page_start;
9477 	unsigned int page_count;
9478 	unsigned int i;
9479 	void *vaddr;
9480 
9481 	page_count = DIV_ROUND_UP(size, PAGE_SIZE);
9482 
9483 	page_start = start;
9484 	pages = kmalloc_array(page_count, sizeof(struct page *), GFP_KERNEL);
9485 	if (!pages)
9486 		return 0;
9487 
9488 	for (i = 0; i < page_count; i++) {
9489 		phys_addr_t addr = page_start + i * PAGE_SIZE;
9490 		pages[i] = pfn_to_page(addr >> PAGE_SHIFT);
9491 	}
9492 	vaddr = vmap(pages, page_count, VM_MAP, PAGE_KERNEL);
9493 	kfree(pages);
9494 
9495 	return (u64)(unsigned long)vaddr;
9496 }
9497 
9498 /**
9499  * trace_array_get_by_name - Create/Lookup a trace array, given its name.
9500  * @name: The name of the trace array to be looked up/created.
9501  * @systems: A list of systems to create event directories for (NULL for all)
9502  *
9503  * Returns pointer to trace array with given name.
9504  * NULL, if it cannot be created.
9505  *
9506  * NOTE: This function increments the reference counter associated with the
9507  * trace array returned. This makes sure it cannot be freed while in use.
9508  * Use trace_array_put() once the trace array is no longer needed.
9509  * If the trace_array is to be freed, trace_array_destroy() needs to
9510  * be called after the trace_array_put(), or simply let user space delete
9511  * it from the tracefs instances directory. But until the
9512  * trace_array_put() is called, user space can not delete it.
9513  *
9514  */
9515 struct trace_array *trace_array_get_by_name(const char *name, const char *systems)
9516 {
9517 	struct trace_array *tr;
9518 
9519 	guard(mutex)(&event_mutex);
9520 	guard(mutex)(&trace_types_lock);
9521 
9522 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
9523 		if (tr->name && strcmp(tr->name, name) == 0) {
9524 			tr->ref++;
9525 			return tr;
9526 		}
9527 	}
9528 
9529 	tr = trace_array_create_systems(name, systems, 0, 0);
9530 
9531 	if (IS_ERR(tr))
9532 		tr = NULL;
9533 	else
9534 		tr->ref++;
9535 
9536 	return tr;
9537 }
9538 EXPORT_SYMBOL_GPL(trace_array_get_by_name);
9539 
9540 static int __remove_instance(struct trace_array *tr)
9541 {
9542 	int i;
9543 
9544 	/* Reference counter for a newly created trace array = 1. */
9545 	if (tr->ref > 1 || (tr->current_trace && tr->trace_ref))
9546 		return -EBUSY;
9547 
9548 	list_del(&tr->list);
9549 
9550 	/* Disable all the flags that were enabled coming in */
9551 	for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++) {
9552 		if ((1 << i) & ZEROED_TRACE_FLAGS)
9553 			set_tracer_flag(tr, 1 << i, 0);
9554 	}
9555 
9556 	if (printk_trace == tr)
9557 		update_printk_trace(&global_trace);
9558 
9559 	tracing_set_nop(tr);
9560 	clear_ftrace_function_probes(tr);
9561 	event_trace_del_tracer(tr);
9562 	ftrace_clear_pids(tr);
9563 	ftrace_destroy_function_files(tr);
9564 	tracefs_remove(tr->dir);
9565 	free_percpu(tr->last_func_repeats);
9566 	free_trace_buffers(tr);
9567 	clear_tracing_err_log(tr);
9568 
9569 	for (i = 0; i < tr->nr_topts; i++) {
9570 		kfree(tr->topts[i].topts);
9571 	}
9572 	kfree(tr->topts);
9573 
9574 	free_cpumask_var(tr->pipe_cpumask);
9575 	free_cpumask_var(tr->tracing_cpumask);
9576 	kfree_const(tr->system_names);
9577 	kfree(tr->name);
9578 	kfree(tr);
9579 
9580 	return 0;
9581 }
9582 
9583 int trace_array_destroy(struct trace_array *this_tr)
9584 {
9585 	struct trace_array *tr;
9586 
9587 	if (!this_tr)
9588 		return -EINVAL;
9589 
9590 	guard(mutex)(&event_mutex);
9591 	guard(mutex)(&trace_types_lock);
9592 
9593 
9594 	/* Making sure trace array exists before destroying it. */
9595 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
9596 		if (tr == this_tr)
9597 			return __remove_instance(tr);
9598 	}
9599 
9600 	return -ENODEV;
9601 }
9602 EXPORT_SYMBOL_GPL(trace_array_destroy);
9603 
9604 static int instance_rmdir(const char *name)
9605 {
9606 	struct trace_array *tr;
9607 
9608 	guard(mutex)(&event_mutex);
9609 	guard(mutex)(&trace_types_lock);
9610 
9611 	tr = trace_array_find(name);
9612 	if (!tr)
9613 		return -ENODEV;
9614 
9615 	return __remove_instance(tr);
9616 }
9617 
9618 static __init void create_trace_instances(struct dentry *d_tracer)
9619 {
9620 	struct trace_array *tr;
9621 
9622 	trace_instance_dir = tracefs_create_instance_dir("instances", d_tracer,
9623 							 instance_mkdir,
9624 							 instance_rmdir);
9625 	if (MEM_FAIL(!trace_instance_dir, "Failed to create instances directory\n"))
9626 		return;
9627 
9628 	guard(mutex)(&event_mutex);
9629 	guard(mutex)(&trace_types_lock);
9630 
9631 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
9632 		if (!tr->name)
9633 			continue;
9634 		if (MEM_FAIL(trace_array_create_dir(tr) < 0,
9635 			     "Failed to create instance directory\n"))
9636 			return;
9637 	}
9638 }
9639 
9640 static void
9641 init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer)
9642 {
9643 	int cpu;
9644 
9645 	trace_create_file("available_tracers", TRACE_MODE_READ, d_tracer,
9646 			tr, &show_traces_fops);
9647 
9648 	trace_create_file("current_tracer", TRACE_MODE_WRITE, d_tracer,
9649 			tr, &set_tracer_fops);
9650 
9651 	trace_create_file("tracing_cpumask", TRACE_MODE_WRITE, d_tracer,
9652 			  tr, &tracing_cpumask_fops);
9653 
9654 	trace_create_file("trace_options", TRACE_MODE_WRITE, d_tracer,
9655 			  tr, &tracing_iter_fops);
9656 
9657 	trace_create_file("trace", TRACE_MODE_WRITE, d_tracer,
9658 			  tr, &tracing_fops);
9659 
9660 	trace_create_file("trace_pipe", TRACE_MODE_READ, d_tracer,
9661 			  tr, &tracing_pipe_fops);
9662 
9663 	trace_create_file("buffer_size_kb", TRACE_MODE_WRITE, d_tracer,
9664 			  tr, &tracing_entries_fops);
9665 
9666 	trace_create_file("buffer_total_size_kb", TRACE_MODE_READ, d_tracer,
9667 			  tr, &tracing_total_entries_fops);
9668 
9669 	trace_create_file("free_buffer", 0200, d_tracer,
9670 			  tr, &tracing_free_buffer_fops);
9671 
9672 	trace_create_file("trace_marker", 0220, d_tracer,
9673 			  tr, &tracing_mark_fops);
9674 
9675 	tr->trace_marker_file = __find_event_file(tr, "ftrace", "print");
9676 
9677 	trace_create_file("trace_marker_raw", 0220, d_tracer,
9678 			  tr, &tracing_mark_raw_fops);
9679 
9680 	trace_create_file("trace_clock", TRACE_MODE_WRITE, d_tracer, tr,
9681 			  &trace_clock_fops);
9682 
9683 	trace_create_file("tracing_on", TRACE_MODE_WRITE, d_tracer,
9684 			  tr, &rb_simple_fops);
9685 
9686 	trace_create_file("timestamp_mode", TRACE_MODE_READ, d_tracer, tr,
9687 			  &trace_time_stamp_mode_fops);
9688 
9689 	tr->buffer_percent = 50;
9690 
9691 	trace_create_file("buffer_percent", TRACE_MODE_WRITE, d_tracer,
9692 			tr, &buffer_percent_fops);
9693 
9694 	trace_create_file("buffer_subbuf_size_kb", TRACE_MODE_WRITE, d_tracer,
9695 			  tr, &buffer_subbuf_size_fops);
9696 
9697 	create_trace_options_dir(tr);
9698 
9699 #ifdef CONFIG_TRACER_MAX_TRACE
9700 	trace_create_maxlat_file(tr, d_tracer);
9701 #endif
9702 
9703 	if (ftrace_create_function_files(tr, d_tracer))
9704 		MEM_FAIL(1, "Could not allocate function filter files");
9705 
9706 	if (tr->range_addr_start) {
9707 		trace_create_file("last_boot_info", TRACE_MODE_READ, d_tracer,
9708 				  tr, &last_boot_fops);
9709 #ifdef CONFIG_TRACER_SNAPSHOT
9710 	} else {
9711 		trace_create_file("snapshot", TRACE_MODE_WRITE, d_tracer,
9712 				  tr, &snapshot_fops);
9713 #endif
9714 	}
9715 
9716 	trace_create_file("error_log", TRACE_MODE_WRITE, d_tracer,
9717 			  tr, &tracing_err_log_fops);
9718 
9719 	for_each_tracing_cpu(cpu)
9720 		tracing_init_tracefs_percpu(tr, cpu);
9721 
9722 	ftrace_init_tracefs(tr, d_tracer);
9723 }
9724 
9725 static struct vfsmount *trace_automount(struct dentry *mntpt, void *ingore)
9726 {
9727 	struct vfsmount *mnt;
9728 	struct file_system_type *type;
9729 
9730 	/*
9731 	 * To maintain backward compatibility for tools that mount
9732 	 * debugfs to get to the tracing facility, tracefs is automatically
9733 	 * mounted to the debugfs/tracing directory.
9734 	 */
9735 	type = get_fs_type("tracefs");
9736 	if (!type)
9737 		return NULL;
9738 	mnt = vfs_submount(mntpt, type, "tracefs", NULL);
9739 	put_filesystem(type);
9740 	if (IS_ERR(mnt))
9741 		return NULL;
9742 	mntget(mnt);
9743 
9744 	return mnt;
9745 }
9746 
9747 /**
9748  * tracing_init_dentry - initialize top level trace array
9749  *
9750  * This is called when creating files or directories in the tracing
9751  * directory. It is called via fs_initcall() by any of the boot up code
9752  * and expects to return the dentry of the top level tracing directory.
9753  */
9754 int tracing_init_dentry(void)
9755 {
9756 	struct trace_array *tr = &global_trace;
9757 
9758 	if (security_locked_down(LOCKDOWN_TRACEFS)) {
9759 		pr_warn("Tracing disabled due to lockdown\n");
9760 		return -EPERM;
9761 	}
9762 
9763 	/* The top level trace array uses  NULL as parent */
9764 	if (tr->dir)
9765 		return 0;
9766 
9767 	if (WARN_ON(!tracefs_initialized()))
9768 		return -ENODEV;
9769 
9770 	/*
9771 	 * As there may still be users that expect the tracing
9772 	 * files to exist in debugfs/tracing, we must automount
9773 	 * the tracefs file system there, so older tools still
9774 	 * work with the newer kernel.
9775 	 */
9776 	tr->dir = debugfs_create_automount("tracing", NULL,
9777 					   trace_automount, NULL);
9778 
9779 	return 0;
9780 }
9781 
9782 extern struct trace_eval_map *__start_ftrace_eval_maps[];
9783 extern struct trace_eval_map *__stop_ftrace_eval_maps[];
9784 
9785 static struct workqueue_struct *eval_map_wq __initdata;
9786 static struct work_struct eval_map_work __initdata;
9787 static struct work_struct tracerfs_init_work __initdata;
9788 
9789 static void __init eval_map_work_func(struct work_struct *work)
9790 {
9791 	int len;
9792 
9793 	len = __stop_ftrace_eval_maps - __start_ftrace_eval_maps;
9794 	trace_insert_eval_map(NULL, __start_ftrace_eval_maps, len);
9795 }
9796 
9797 static int __init trace_eval_init(void)
9798 {
9799 	INIT_WORK(&eval_map_work, eval_map_work_func);
9800 
9801 	eval_map_wq = alloc_workqueue("eval_map_wq", WQ_UNBOUND, 0);
9802 	if (!eval_map_wq) {
9803 		pr_err("Unable to allocate eval_map_wq\n");
9804 		/* Do work here */
9805 		eval_map_work_func(&eval_map_work);
9806 		return -ENOMEM;
9807 	}
9808 
9809 	queue_work(eval_map_wq, &eval_map_work);
9810 	return 0;
9811 }
9812 
9813 subsys_initcall(trace_eval_init);
9814 
9815 static int __init trace_eval_sync(void)
9816 {
9817 	/* Make sure the eval map updates are finished */
9818 	if (eval_map_wq)
9819 		destroy_workqueue(eval_map_wq);
9820 	return 0;
9821 }
9822 
9823 late_initcall_sync(trace_eval_sync);
9824 
9825 
9826 #ifdef CONFIG_MODULES
9827 
9828 bool module_exists(const char *module)
9829 {
9830 	/* All modules have the symbol __this_module */
9831 	static const char this_mod[] = "__this_module";
9832 	char modname[MAX_PARAM_PREFIX_LEN + sizeof(this_mod) + 2];
9833 	unsigned long val;
9834 	int n;
9835 
9836 	n = snprintf(modname, sizeof(modname), "%s:%s", module, this_mod);
9837 
9838 	if (n > sizeof(modname) - 1)
9839 		return false;
9840 
9841 	val = module_kallsyms_lookup_name(modname);
9842 	return val != 0;
9843 }
9844 
9845 static void trace_module_add_evals(struct module *mod)
9846 {
9847 	if (!mod->num_trace_evals)
9848 		return;
9849 
9850 	/*
9851 	 * Modules with bad taint do not have events created, do
9852 	 * not bother with enums either.
9853 	 */
9854 	if (trace_module_has_bad_taint(mod))
9855 		return;
9856 
9857 	trace_insert_eval_map(mod, mod->trace_evals, mod->num_trace_evals);
9858 }
9859 
9860 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
9861 static void trace_module_remove_evals(struct module *mod)
9862 {
9863 	union trace_eval_map_item *map;
9864 	union trace_eval_map_item **last = &trace_eval_maps;
9865 
9866 	if (!mod->num_trace_evals)
9867 		return;
9868 
9869 	guard(mutex)(&trace_eval_mutex);
9870 
9871 	map = trace_eval_maps;
9872 
9873 	while (map) {
9874 		if (map->head.mod == mod)
9875 			break;
9876 		map = trace_eval_jmp_to_tail(map);
9877 		last = &map->tail.next;
9878 		map = map->tail.next;
9879 	}
9880 	if (!map)
9881 		return;
9882 
9883 	*last = trace_eval_jmp_to_tail(map)->tail.next;
9884 	kfree(map);
9885 }
9886 #else
9887 static inline void trace_module_remove_evals(struct module *mod) { }
9888 #endif /* CONFIG_TRACE_EVAL_MAP_FILE */
9889 
9890 static int trace_module_notify(struct notifier_block *self,
9891 			       unsigned long val, void *data)
9892 {
9893 	struct module *mod = data;
9894 
9895 	switch (val) {
9896 	case MODULE_STATE_COMING:
9897 		trace_module_add_evals(mod);
9898 		break;
9899 	case MODULE_STATE_GOING:
9900 		trace_module_remove_evals(mod);
9901 		break;
9902 	}
9903 
9904 	return NOTIFY_OK;
9905 }
9906 
9907 static struct notifier_block trace_module_nb = {
9908 	.notifier_call = trace_module_notify,
9909 	.priority = 0,
9910 };
9911 #endif /* CONFIG_MODULES */
9912 
9913 static __init void tracer_init_tracefs_work_func(struct work_struct *work)
9914 {
9915 
9916 	event_trace_init();
9917 
9918 	init_tracer_tracefs(&global_trace, NULL);
9919 	ftrace_init_tracefs_toplevel(&global_trace, NULL);
9920 
9921 	trace_create_file("tracing_thresh", TRACE_MODE_WRITE, NULL,
9922 			&global_trace, &tracing_thresh_fops);
9923 
9924 	trace_create_file("README", TRACE_MODE_READ, NULL,
9925 			NULL, &tracing_readme_fops);
9926 
9927 	trace_create_file("saved_cmdlines", TRACE_MODE_READ, NULL,
9928 			NULL, &tracing_saved_cmdlines_fops);
9929 
9930 	trace_create_file("saved_cmdlines_size", TRACE_MODE_WRITE, NULL,
9931 			  NULL, &tracing_saved_cmdlines_size_fops);
9932 
9933 	trace_create_file("saved_tgids", TRACE_MODE_READ, NULL,
9934 			NULL, &tracing_saved_tgids_fops);
9935 
9936 	trace_create_eval_file(NULL);
9937 
9938 #ifdef CONFIG_MODULES
9939 	register_module_notifier(&trace_module_nb);
9940 #endif
9941 
9942 #ifdef CONFIG_DYNAMIC_FTRACE
9943 	trace_create_file("dyn_ftrace_total_info", TRACE_MODE_READ, NULL,
9944 			NULL, &tracing_dyn_info_fops);
9945 #endif
9946 
9947 	create_trace_instances(NULL);
9948 
9949 	update_tracer_options(&global_trace);
9950 }
9951 
9952 static __init int tracer_init_tracefs(void)
9953 {
9954 	int ret;
9955 
9956 	trace_access_lock_init();
9957 
9958 	ret = tracing_init_dentry();
9959 	if (ret)
9960 		return 0;
9961 
9962 	if (eval_map_wq) {
9963 		INIT_WORK(&tracerfs_init_work, tracer_init_tracefs_work_func);
9964 		queue_work(eval_map_wq, &tracerfs_init_work);
9965 	} else {
9966 		tracer_init_tracefs_work_func(NULL);
9967 	}
9968 
9969 	rv_init_interface();
9970 
9971 	return 0;
9972 }
9973 
9974 fs_initcall(tracer_init_tracefs);
9975 
9976 static int trace_die_panic_handler(struct notifier_block *self,
9977 				unsigned long ev, void *unused);
9978 
9979 static struct notifier_block trace_panic_notifier = {
9980 	.notifier_call = trace_die_panic_handler,
9981 	.priority = INT_MAX - 1,
9982 };
9983 
9984 static struct notifier_block trace_die_notifier = {
9985 	.notifier_call = trace_die_panic_handler,
9986 	.priority = INT_MAX - 1,
9987 };
9988 
9989 /*
9990  * The idea is to execute the following die/panic callback early, in order
9991  * to avoid showing irrelevant information in the trace (like other panic
9992  * notifier functions); we are the 2nd to run, after hung_task/rcu_stall
9993  * warnings get disabled (to prevent potential log flooding).
9994  */
9995 static int trace_die_panic_handler(struct notifier_block *self,
9996 				unsigned long ev, void *unused)
9997 {
9998 	if (!ftrace_dump_on_oops_enabled())
9999 		return NOTIFY_DONE;
10000 
10001 	/* The die notifier requires DIE_OOPS to trigger */
10002 	if (self == &trace_die_notifier && ev != DIE_OOPS)
10003 		return NOTIFY_DONE;
10004 
10005 	ftrace_dump(DUMP_PARAM);
10006 
10007 	return NOTIFY_DONE;
10008 }
10009 
10010 /*
10011  * printk is set to max of 1024, we really don't need it that big.
10012  * Nothing should be printing 1000 characters anyway.
10013  */
10014 #define TRACE_MAX_PRINT		1000
10015 
10016 /*
10017  * Define here KERN_TRACE so that we have one place to modify
10018  * it if we decide to change what log level the ftrace dump
10019  * should be at.
10020  */
10021 #define KERN_TRACE		KERN_EMERG
10022 
10023 void
10024 trace_printk_seq(struct trace_seq *s)
10025 {
10026 	/* Probably should print a warning here. */
10027 	if (s->seq.len >= TRACE_MAX_PRINT)
10028 		s->seq.len = TRACE_MAX_PRINT;
10029 
10030 	/*
10031 	 * More paranoid code. Although the buffer size is set to
10032 	 * PAGE_SIZE, and TRACE_MAX_PRINT is 1000, this is just
10033 	 * an extra layer of protection.
10034 	 */
10035 	if (WARN_ON_ONCE(s->seq.len >= s->seq.size))
10036 		s->seq.len = s->seq.size - 1;
10037 
10038 	/* should be zero ended, but we are paranoid. */
10039 	s->buffer[s->seq.len] = 0;
10040 
10041 	printk(KERN_TRACE "%s", s->buffer);
10042 
10043 	trace_seq_init(s);
10044 }
10045 
10046 static void trace_init_iter(struct trace_iterator *iter, struct trace_array *tr)
10047 {
10048 	iter->tr = tr;
10049 	iter->trace = iter->tr->current_trace;
10050 	iter->cpu_file = RING_BUFFER_ALL_CPUS;
10051 	iter->array_buffer = &tr->array_buffer;
10052 
10053 	if (iter->trace && iter->trace->open)
10054 		iter->trace->open(iter);
10055 
10056 	/* Annotate start of buffers if we had overruns */
10057 	if (ring_buffer_overruns(iter->array_buffer->buffer))
10058 		iter->iter_flags |= TRACE_FILE_ANNOTATE;
10059 
10060 	/* Output in nanoseconds only if we are using a clock in nanoseconds. */
10061 	if (trace_clocks[iter->tr->clock_id].in_ns)
10062 		iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
10063 
10064 	/* Can not use kmalloc for iter.temp and iter.fmt */
10065 	iter->temp = static_temp_buf;
10066 	iter->temp_size = STATIC_TEMP_BUF_SIZE;
10067 	iter->fmt = static_fmt_buf;
10068 	iter->fmt_size = STATIC_FMT_BUF_SIZE;
10069 }
10070 
10071 void trace_init_global_iter(struct trace_iterator *iter)
10072 {
10073 	trace_init_iter(iter, &global_trace);
10074 }
10075 
10076 static void ftrace_dump_one(struct trace_array *tr, enum ftrace_dump_mode dump_mode)
10077 {
10078 	/* use static because iter can be a bit big for the stack */
10079 	static struct trace_iterator iter;
10080 	unsigned int old_userobj;
10081 	unsigned long flags;
10082 	int cnt = 0, cpu;
10083 
10084 	/*
10085 	 * Always turn off tracing when we dump.
10086 	 * We don't need to show trace output of what happens
10087 	 * between multiple crashes.
10088 	 *
10089 	 * If the user does a sysrq-z, then they can re-enable
10090 	 * tracing with echo 1 > tracing_on.
10091 	 */
10092 	tracer_tracing_off(tr);
10093 
10094 	local_irq_save(flags);
10095 
10096 	/* Simulate the iterator */
10097 	trace_init_iter(&iter, tr);
10098 
10099 	for_each_tracing_cpu(cpu) {
10100 		atomic_inc(&per_cpu_ptr(iter.array_buffer->data, cpu)->disabled);
10101 	}
10102 
10103 	old_userobj = tr->trace_flags & TRACE_ITER_SYM_USEROBJ;
10104 
10105 	/* don't look at user memory in panic mode */
10106 	tr->trace_flags &= ~TRACE_ITER_SYM_USEROBJ;
10107 
10108 	if (dump_mode == DUMP_ORIG)
10109 		iter.cpu_file = raw_smp_processor_id();
10110 	else
10111 		iter.cpu_file = RING_BUFFER_ALL_CPUS;
10112 
10113 	if (tr == &global_trace)
10114 		printk(KERN_TRACE "Dumping ftrace buffer:\n");
10115 	else
10116 		printk(KERN_TRACE "Dumping ftrace instance %s buffer:\n", tr->name);
10117 
10118 	/* Did function tracer already get disabled? */
10119 	if (ftrace_is_dead()) {
10120 		printk("# WARNING: FUNCTION TRACING IS CORRUPTED\n");
10121 		printk("#          MAY BE MISSING FUNCTION EVENTS\n");
10122 	}
10123 
10124 	/*
10125 	 * We need to stop all tracing on all CPUS to read
10126 	 * the next buffer. This is a bit expensive, but is
10127 	 * not done often. We fill all what we can read,
10128 	 * and then release the locks again.
10129 	 */
10130 
10131 	while (!trace_empty(&iter)) {
10132 
10133 		if (!cnt)
10134 			printk(KERN_TRACE "---------------------------------\n");
10135 
10136 		cnt++;
10137 
10138 		trace_iterator_reset(&iter);
10139 		iter.iter_flags |= TRACE_FILE_LAT_FMT;
10140 
10141 		if (trace_find_next_entry_inc(&iter) != NULL) {
10142 			int ret;
10143 
10144 			ret = print_trace_line(&iter);
10145 			if (ret != TRACE_TYPE_NO_CONSUME)
10146 				trace_consume(&iter);
10147 		}
10148 		touch_nmi_watchdog();
10149 
10150 		trace_printk_seq(&iter.seq);
10151 	}
10152 
10153 	if (!cnt)
10154 		printk(KERN_TRACE "   (ftrace buffer empty)\n");
10155 	else
10156 		printk(KERN_TRACE "---------------------------------\n");
10157 
10158 	tr->trace_flags |= old_userobj;
10159 
10160 	for_each_tracing_cpu(cpu) {
10161 		atomic_dec(&per_cpu_ptr(iter.array_buffer->data, cpu)->disabled);
10162 	}
10163 	local_irq_restore(flags);
10164 }
10165 
10166 static void ftrace_dump_by_param(void)
10167 {
10168 	bool first_param = true;
10169 	char dump_param[MAX_TRACER_SIZE];
10170 	char *buf, *token, *inst_name;
10171 	struct trace_array *tr;
10172 
10173 	strscpy(dump_param, ftrace_dump_on_oops, MAX_TRACER_SIZE);
10174 	buf = dump_param;
10175 
10176 	while ((token = strsep(&buf, ",")) != NULL) {
10177 		if (first_param) {
10178 			first_param = false;
10179 			if (!strcmp("0", token))
10180 				continue;
10181 			else if (!strcmp("1", token)) {
10182 				ftrace_dump_one(&global_trace, DUMP_ALL);
10183 				continue;
10184 			}
10185 			else if (!strcmp("2", token) ||
10186 			  !strcmp("orig_cpu", token)) {
10187 				ftrace_dump_one(&global_trace, DUMP_ORIG);
10188 				continue;
10189 			}
10190 		}
10191 
10192 		inst_name = strsep(&token, "=");
10193 		tr = trace_array_find(inst_name);
10194 		if (!tr) {
10195 			printk(KERN_TRACE "Instance %s not found\n", inst_name);
10196 			continue;
10197 		}
10198 
10199 		if (token && (!strcmp("2", token) ||
10200 			  !strcmp("orig_cpu", token)))
10201 			ftrace_dump_one(tr, DUMP_ORIG);
10202 		else
10203 			ftrace_dump_one(tr, DUMP_ALL);
10204 	}
10205 }
10206 
10207 void ftrace_dump(enum ftrace_dump_mode oops_dump_mode)
10208 {
10209 	static atomic_t dump_running;
10210 
10211 	/* Only allow one dump user at a time. */
10212 	if (atomic_inc_return(&dump_running) != 1) {
10213 		atomic_dec(&dump_running);
10214 		return;
10215 	}
10216 
10217 	switch (oops_dump_mode) {
10218 	case DUMP_ALL:
10219 		ftrace_dump_one(&global_trace, DUMP_ALL);
10220 		break;
10221 	case DUMP_ORIG:
10222 		ftrace_dump_one(&global_trace, DUMP_ORIG);
10223 		break;
10224 	case DUMP_PARAM:
10225 		ftrace_dump_by_param();
10226 		break;
10227 	case DUMP_NONE:
10228 		break;
10229 	default:
10230 		printk(KERN_TRACE "Bad dumping mode, switching to all CPUs dump\n");
10231 		ftrace_dump_one(&global_trace, DUMP_ALL);
10232 	}
10233 
10234 	atomic_dec(&dump_running);
10235 }
10236 EXPORT_SYMBOL_GPL(ftrace_dump);
10237 
10238 #define WRITE_BUFSIZE  4096
10239 
10240 ssize_t trace_parse_run_command(struct file *file, const char __user *buffer,
10241 				size_t count, loff_t *ppos,
10242 				int (*createfn)(const char *))
10243 {
10244 	char *kbuf, *buf, *tmp;
10245 	int ret = 0;
10246 	size_t done = 0;
10247 	size_t size;
10248 
10249 	kbuf = kmalloc(WRITE_BUFSIZE, GFP_KERNEL);
10250 	if (!kbuf)
10251 		return -ENOMEM;
10252 
10253 	while (done < count) {
10254 		size = count - done;
10255 
10256 		if (size >= WRITE_BUFSIZE)
10257 			size = WRITE_BUFSIZE - 1;
10258 
10259 		if (copy_from_user(kbuf, buffer + done, size)) {
10260 			ret = -EFAULT;
10261 			goto out;
10262 		}
10263 		kbuf[size] = '\0';
10264 		buf = kbuf;
10265 		do {
10266 			tmp = strchr(buf, '\n');
10267 			if (tmp) {
10268 				*tmp = '\0';
10269 				size = tmp - buf + 1;
10270 			} else {
10271 				size = strlen(buf);
10272 				if (done + size < count) {
10273 					if (buf != kbuf)
10274 						break;
10275 					/* This can accept WRITE_BUFSIZE - 2 ('\n' + '\0') */
10276 					pr_warn("Line length is too long: Should be less than %d\n",
10277 						WRITE_BUFSIZE - 2);
10278 					ret = -EINVAL;
10279 					goto out;
10280 				}
10281 			}
10282 			done += size;
10283 
10284 			/* Remove comments */
10285 			tmp = strchr(buf, '#');
10286 
10287 			if (tmp)
10288 				*tmp = '\0';
10289 
10290 			ret = createfn(buf);
10291 			if (ret)
10292 				goto out;
10293 			buf += size;
10294 
10295 		} while (done < count);
10296 	}
10297 	ret = done;
10298 
10299 out:
10300 	kfree(kbuf);
10301 
10302 	return ret;
10303 }
10304 
10305 #ifdef CONFIG_TRACER_MAX_TRACE
10306 __init static bool tr_needs_alloc_snapshot(const char *name)
10307 {
10308 	char *test;
10309 	int len = strlen(name);
10310 	bool ret;
10311 
10312 	if (!boot_snapshot_index)
10313 		return false;
10314 
10315 	if (strncmp(name, boot_snapshot_info, len) == 0 &&
10316 	    boot_snapshot_info[len] == '\t')
10317 		return true;
10318 
10319 	test = kmalloc(strlen(name) + 3, GFP_KERNEL);
10320 	if (!test)
10321 		return false;
10322 
10323 	sprintf(test, "\t%s\t", name);
10324 	ret = strstr(boot_snapshot_info, test) == NULL;
10325 	kfree(test);
10326 	return ret;
10327 }
10328 
10329 __init static void do_allocate_snapshot(const char *name)
10330 {
10331 	if (!tr_needs_alloc_snapshot(name))
10332 		return;
10333 
10334 	/*
10335 	 * When allocate_snapshot is set, the next call to
10336 	 * allocate_trace_buffers() (called by trace_array_get_by_name())
10337 	 * will allocate the snapshot buffer. That will alse clear
10338 	 * this flag.
10339 	 */
10340 	allocate_snapshot = true;
10341 }
10342 #else
10343 static inline void do_allocate_snapshot(const char *name) { }
10344 #endif
10345 
10346 __init static void enable_instances(void)
10347 {
10348 	struct trace_array *tr;
10349 	char *curr_str;
10350 	char *name;
10351 	char *str;
10352 	char *tok;
10353 
10354 	/* A tab is always appended */
10355 	boot_instance_info[boot_instance_index - 1] = '\0';
10356 	str = boot_instance_info;
10357 
10358 	while ((curr_str = strsep(&str, "\t"))) {
10359 		phys_addr_t start = 0;
10360 		phys_addr_t size = 0;
10361 		unsigned long addr = 0;
10362 		bool traceprintk = false;
10363 		bool traceoff = false;
10364 		char *flag_delim;
10365 		char *addr_delim;
10366 
10367 		tok = strsep(&curr_str, ",");
10368 
10369 		flag_delim = strchr(tok, '^');
10370 		addr_delim = strchr(tok, '@');
10371 
10372 		if (addr_delim)
10373 			*addr_delim++ = '\0';
10374 
10375 		if (flag_delim)
10376 			*flag_delim++ = '\0';
10377 
10378 		name = tok;
10379 
10380 		if (flag_delim) {
10381 			char *flag;
10382 
10383 			while ((flag = strsep(&flag_delim, "^"))) {
10384 				if (strcmp(flag, "traceoff") == 0) {
10385 					traceoff = true;
10386 				} else if ((strcmp(flag, "printk") == 0) ||
10387 					   (strcmp(flag, "traceprintk") == 0) ||
10388 					   (strcmp(flag, "trace_printk") == 0)) {
10389 					traceprintk = true;
10390 				} else {
10391 					pr_info("Tracing: Invalid instance flag '%s' for %s\n",
10392 						flag, name);
10393 				}
10394 			}
10395 		}
10396 
10397 		tok = addr_delim;
10398 		if (tok && isdigit(*tok)) {
10399 			start = memparse(tok, &tok);
10400 			if (!start) {
10401 				pr_warn("Tracing: Invalid boot instance address for %s\n",
10402 					name);
10403 				continue;
10404 			}
10405 			if (*tok != ':') {
10406 				pr_warn("Tracing: No size specified for instance %s\n", name);
10407 				continue;
10408 			}
10409 			tok++;
10410 			size = memparse(tok, &tok);
10411 			if (!size) {
10412 				pr_warn("Tracing: Invalid boot instance size for %s\n",
10413 					name);
10414 				continue;
10415 			}
10416 		} else if (tok) {
10417 			if (!reserve_mem_find_by_name(tok, &start, &size)) {
10418 				start = 0;
10419 				pr_warn("Failed to map boot instance %s to %s\n", name, tok);
10420 				continue;
10421 			}
10422 		}
10423 
10424 		if (start) {
10425 			addr = map_pages(start, size);
10426 			if (addr) {
10427 				pr_info("Tracing: mapped boot instance %s at physical memory %pa of size 0x%lx\n",
10428 					name, &start, (unsigned long)size);
10429 			} else {
10430 				pr_warn("Tracing: Failed to map boot instance %s\n", name);
10431 				continue;
10432 			}
10433 		} else {
10434 			/* Only non mapped buffers have snapshot buffers */
10435 			if (IS_ENABLED(CONFIG_TRACER_MAX_TRACE))
10436 				do_allocate_snapshot(name);
10437 		}
10438 
10439 		tr = trace_array_create_systems(name, NULL, addr, size);
10440 		if (IS_ERR(tr)) {
10441 			pr_warn("Tracing: Failed to create instance buffer %s\n", curr_str);
10442 			continue;
10443 		}
10444 
10445 		if (traceoff)
10446 			tracer_tracing_off(tr);
10447 
10448 		if (traceprintk)
10449 			update_printk_trace(tr);
10450 
10451 		/*
10452 		 * If start is set, then this is a mapped buffer, and
10453 		 * cannot be deleted by user space, so keep the reference
10454 		 * to it.
10455 		 */
10456 		if (start) {
10457 			tr->flags |= TRACE_ARRAY_FL_BOOT;
10458 			tr->ref++;
10459 		}
10460 
10461 		while ((tok = strsep(&curr_str, ","))) {
10462 			early_enable_events(tr, tok, true);
10463 		}
10464 	}
10465 }
10466 
10467 __init static int tracer_alloc_buffers(void)
10468 {
10469 	int ring_buf_size;
10470 	int ret = -ENOMEM;
10471 
10472 
10473 	if (security_locked_down(LOCKDOWN_TRACEFS)) {
10474 		pr_warn("Tracing disabled due to lockdown\n");
10475 		return -EPERM;
10476 	}
10477 
10478 	/*
10479 	 * Make sure we don't accidentally add more trace options
10480 	 * than we have bits for.
10481 	 */
10482 	BUILD_BUG_ON(TRACE_ITER_LAST_BIT > TRACE_FLAGS_MAX_SIZE);
10483 
10484 	if (!alloc_cpumask_var(&tracing_buffer_mask, GFP_KERNEL))
10485 		goto out;
10486 
10487 	if (!alloc_cpumask_var(&global_trace.tracing_cpumask, GFP_KERNEL))
10488 		goto out_free_buffer_mask;
10489 
10490 	/* Only allocate trace_printk buffers if a trace_printk exists */
10491 	if (&__stop___trace_bprintk_fmt != &__start___trace_bprintk_fmt)
10492 		/* Must be called before global_trace.buffer is allocated */
10493 		trace_printk_init_buffers();
10494 
10495 	/* To save memory, keep the ring buffer size to its minimum */
10496 	if (global_trace.ring_buffer_expanded)
10497 		ring_buf_size = trace_buf_size;
10498 	else
10499 		ring_buf_size = 1;
10500 
10501 	cpumask_copy(tracing_buffer_mask, cpu_possible_mask);
10502 	cpumask_copy(global_trace.tracing_cpumask, cpu_all_mask);
10503 
10504 	raw_spin_lock_init(&global_trace.start_lock);
10505 
10506 	/*
10507 	 * The prepare callbacks allocates some memory for the ring buffer. We
10508 	 * don't free the buffer if the CPU goes down. If we were to free
10509 	 * the buffer, then the user would lose any trace that was in the
10510 	 * buffer. The memory will be removed once the "instance" is removed.
10511 	 */
10512 	ret = cpuhp_setup_state_multi(CPUHP_TRACE_RB_PREPARE,
10513 				      "trace/RB:prepare", trace_rb_cpu_prepare,
10514 				      NULL);
10515 	if (ret < 0)
10516 		goto out_free_cpumask;
10517 	/* Used for event triggers */
10518 	ret = -ENOMEM;
10519 	temp_buffer = ring_buffer_alloc(PAGE_SIZE, RB_FL_OVERWRITE);
10520 	if (!temp_buffer)
10521 		goto out_rm_hp_state;
10522 
10523 	if (trace_create_savedcmd() < 0)
10524 		goto out_free_temp_buffer;
10525 
10526 	if (!zalloc_cpumask_var(&global_trace.pipe_cpumask, GFP_KERNEL))
10527 		goto out_free_savedcmd;
10528 
10529 	/* TODO: make the number of buffers hot pluggable with CPUS */
10530 	if (allocate_trace_buffers(&global_trace, ring_buf_size) < 0) {
10531 		MEM_FAIL(1, "tracer: failed to allocate ring buffer!\n");
10532 		goto out_free_pipe_cpumask;
10533 	}
10534 	if (global_trace.buffer_disabled)
10535 		tracing_off();
10536 
10537 	if (trace_boot_clock) {
10538 		ret = tracing_set_clock(&global_trace, trace_boot_clock);
10539 		if (ret < 0)
10540 			pr_warn("Trace clock %s not defined, going back to default\n",
10541 				trace_boot_clock);
10542 	}
10543 
10544 	/*
10545 	 * register_tracer() might reference current_trace, so it
10546 	 * needs to be set before we register anything. This is
10547 	 * just a bootstrap of current_trace anyway.
10548 	 */
10549 	global_trace.current_trace = &nop_trace;
10550 
10551 	global_trace.max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
10552 #ifdef CONFIG_TRACER_MAX_TRACE
10553 	spin_lock_init(&global_trace.snapshot_trigger_lock);
10554 #endif
10555 	ftrace_init_global_array_ops(&global_trace);
10556 
10557 #ifdef CONFIG_MODULES
10558 	INIT_LIST_HEAD(&global_trace.mod_events);
10559 #endif
10560 
10561 	init_trace_flags_index(&global_trace);
10562 
10563 	register_tracer(&nop_trace);
10564 
10565 	/* Function tracing may start here (via kernel command line) */
10566 	init_function_trace();
10567 
10568 	/* All seems OK, enable tracing */
10569 	tracing_disabled = 0;
10570 
10571 	atomic_notifier_chain_register(&panic_notifier_list,
10572 				       &trace_panic_notifier);
10573 
10574 	register_die_notifier(&trace_die_notifier);
10575 
10576 	global_trace.flags = TRACE_ARRAY_FL_GLOBAL;
10577 
10578 	INIT_LIST_HEAD(&global_trace.systems);
10579 	INIT_LIST_HEAD(&global_trace.events);
10580 	INIT_LIST_HEAD(&global_trace.hist_vars);
10581 	INIT_LIST_HEAD(&global_trace.err_log);
10582 	list_add(&global_trace.list, &ftrace_trace_arrays);
10583 
10584 	apply_trace_boot_options();
10585 
10586 	register_snapshot_cmd();
10587 
10588 	return 0;
10589 
10590 out_free_pipe_cpumask:
10591 	free_cpumask_var(global_trace.pipe_cpumask);
10592 out_free_savedcmd:
10593 	trace_free_saved_cmdlines_buffer();
10594 out_free_temp_buffer:
10595 	ring_buffer_free(temp_buffer);
10596 out_rm_hp_state:
10597 	cpuhp_remove_multi_state(CPUHP_TRACE_RB_PREPARE);
10598 out_free_cpumask:
10599 	free_cpumask_var(global_trace.tracing_cpumask);
10600 out_free_buffer_mask:
10601 	free_cpumask_var(tracing_buffer_mask);
10602 out:
10603 	return ret;
10604 }
10605 
10606 #ifdef CONFIG_FUNCTION_TRACER
10607 /* Used to set module cached ftrace filtering at boot up */
10608 __init struct trace_array *trace_get_global_array(void)
10609 {
10610 	return &global_trace;
10611 }
10612 #endif
10613 
10614 void __init ftrace_boot_snapshot(void)
10615 {
10616 #ifdef CONFIG_TRACER_MAX_TRACE
10617 	struct trace_array *tr;
10618 
10619 	if (!snapshot_at_boot)
10620 		return;
10621 
10622 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
10623 		if (!tr->allocated_snapshot)
10624 			continue;
10625 
10626 		tracing_snapshot_instance(tr);
10627 		trace_array_puts(tr, "** Boot snapshot taken **\n");
10628 	}
10629 #endif
10630 }
10631 
10632 void __init early_trace_init(void)
10633 {
10634 	if (tracepoint_printk) {
10635 		tracepoint_print_iter =
10636 			kzalloc(sizeof(*tracepoint_print_iter), GFP_KERNEL);
10637 		if (MEM_FAIL(!tracepoint_print_iter,
10638 			     "Failed to allocate trace iterator\n"))
10639 			tracepoint_printk = 0;
10640 		else
10641 			static_key_enable(&tracepoint_printk_key.key);
10642 	}
10643 	tracer_alloc_buffers();
10644 
10645 	init_events();
10646 }
10647 
10648 void __init trace_init(void)
10649 {
10650 	trace_event_init();
10651 
10652 	if (boot_instance_index)
10653 		enable_instances();
10654 }
10655 
10656 __init static void clear_boot_tracer(void)
10657 {
10658 	/*
10659 	 * The default tracer at boot buffer is an init section.
10660 	 * This function is called in lateinit. If we did not
10661 	 * find the boot tracer, then clear it out, to prevent
10662 	 * later registration from accessing the buffer that is
10663 	 * about to be freed.
10664 	 */
10665 	if (!default_bootup_tracer)
10666 		return;
10667 
10668 	printk(KERN_INFO "ftrace bootup tracer '%s' not registered.\n",
10669 	       default_bootup_tracer);
10670 	default_bootup_tracer = NULL;
10671 }
10672 
10673 #ifdef CONFIG_HAVE_UNSTABLE_SCHED_CLOCK
10674 __init static void tracing_set_default_clock(void)
10675 {
10676 	/* sched_clock_stable() is determined in late_initcall */
10677 	if (!trace_boot_clock && !sched_clock_stable()) {
10678 		if (security_locked_down(LOCKDOWN_TRACEFS)) {
10679 			pr_warn("Can not set tracing clock due to lockdown\n");
10680 			return;
10681 		}
10682 
10683 		printk(KERN_WARNING
10684 		       "Unstable clock detected, switching default tracing clock to \"global\"\n"
10685 		       "If you want to keep using the local clock, then add:\n"
10686 		       "  \"trace_clock=local\"\n"
10687 		       "on the kernel command line\n");
10688 		tracing_set_clock(&global_trace, "global");
10689 	}
10690 }
10691 #else
10692 static inline void tracing_set_default_clock(void) { }
10693 #endif
10694 
10695 __init static int late_trace_init(void)
10696 {
10697 	if (tracepoint_printk && tracepoint_printk_stop_on_boot) {
10698 		static_key_disable(&tracepoint_printk_key.key);
10699 		tracepoint_printk = 0;
10700 	}
10701 
10702 	tracing_set_default_clock();
10703 	clear_boot_tracer();
10704 	return 0;
10705 }
10706 
10707 late_initcall_sync(late_trace_init);
10708