xref: /linux/kernel/trace/trace.c (revision a926d15a799acc6935820340b5a1428754f8ab45)
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * ring buffer based function tracer
4  *
5  * Copyright (C) 2007-2012 Steven Rostedt <srostedt@redhat.com>
6  * Copyright (C) 2008 Ingo Molnar <mingo@redhat.com>
7  *
8  * Originally taken from the RT patch by:
9  *    Arnaldo Carvalho de Melo <acme@redhat.com>
10  *
11  * Based on code from the latency_tracer, that is:
12  *  Copyright (C) 2004-2006 Ingo Molnar
13  *  Copyright (C) 2004 Nadia Yvette Chambers
14  */
15 #include <linux/ring_buffer.h>
16 #include <linux/utsname.h>
17 #include <linux/stacktrace.h>
18 #include <linux/writeback.h>
19 #include <linux/kallsyms.h>
20 #include <linux/security.h>
21 #include <linux/seq_file.h>
22 #include <linux/irqflags.h>
23 #include <linux/debugfs.h>
24 #include <linux/tracefs.h>
25 #include <linux/pagemap.h>
26 #include <linux/hardirq.h>
27 #include <linux/linkage.h>
28 #include <linux/uaccess.h>
29 #include <linux/cleanup.h>
30 #include <linux/vmalloc.h>
31 #include <linux/ftrace.h>
32 #include <linux/module.h>
33 #include <linux/percpu.h>
34 #include <linux/splice.h>
35 #include <linux/kdebug.h>
36 #include <linux/string.h>
37 #include <linux/mount.h>
38 #include <linux/rwsem.h>
39 #include <linux/slab.h>
40 #include <linux/ctype.h>
41 #include <linux/init.h>
42 #include <linux/panic_notifier.h>
43 #include <linux/poll.h>
44 #include <linux/nmi.h>
45 #include <linux/fs.h>
46 #include <linux/trace.h>
47 #include <linux/sched/clock.h>
48 #include <linux/sched/rt.h>
49 #include <linux/fsnotify.h>
50 #include <linux/irq_work.h>
51 #include <linux/workqueue.h>
52 
53 #include <asm/setup.h> /* COMMAND_LINE_SIZE */
54 
55 #include "trace.h"
56 #include "trace_output.h"
57 
58 #ifdef CONFIG_FTRACE_STARTUP_TEST
59 /*
60  * We need to change this state when a selftest is running.
61  * A selftest will lurk into the ring-buffer to count the
62  * entries inserted during the selftest although some concurrent
63  * insertions into the ring-buffer such as trace_printk could occurred
64  * at the same time, giving false positive or negative results.
65  */
66 static bool __read_mostly tracing_selftest_running;
67 
68 /*
69  * If boot-time tracing including tracers/events via kernel cmdline
70  * is running, we do not want to run SELFTEST.
71  */
72 bool __read_mostly tracing_selftest_disabled;
73 
74 void __init disable_tracing_selftest(const char *reason)
75 {
76 	if (!tracing_selftest_disabled) {
77 		tracing_selftest_disabled = true;
78 		pr_info("Ftrace startup test is disabled due to %s\n", reason);
79 	}
80 }
81 #else
82 #define tracing_selftest_running	0
83 #define tracing_selftest_disabled	0
84 #endif
85 
86 /* Pipe tracepoints to printk */
87 static struct trace_iterator *tracepoint_print_iter;
88 int tracepoint_printk;
89 static bool tracepoint_printk_stop_on_boot __initdata;
90 static bool traceoff_after_boot __initdata;
91 static DEFINE_STATIC_KEY_FALSE(tracepoint_printk_key);
92 
93 /* For tracers that don't implement custom flags */
94 static struct tracer_opt dummy_tracer_opt[] = {
95 	{ }
96 };
97 
98 static int
99 dummy_set_flag(struct trace_array *tr, u32 old_flags, u32 bit, int set)
100 {
101 	return 0;
102 }
103 
104 /*
105  * To prevent the comm cache from being overwritten when no
106  * tracing is active, only save the comm when a trace event
107  * occurred.
108  */
109 DEFINE_PER_CPU(bool, trace_taskinfo_save);
110 
111 /*
112  * Kill all tracing for good (never come back).
113  * It is initialized to 1 but will turn to zero if the initialization
114  * of the tracer is successful. But that is the only place that sets
115  * this back to zero.
116  */
117 static int tracing_disabled = 1;
118 
119 cpumask_var_t __read_mostly	tracing_buffer_mask;
120 
121 /*
122  * ftrace_dump_on_oops - variable to dump ftrace buffer on oops
123  *
124  * If there is an oops (or kernel panic) and the ftrace_dump_on_oops
125  * is set, then ftrace_dump is called. This will output the contents
126  * of the ftrace buffers to the console.  This is very useful for
127  * capturing traces that lead to crashes and outputing it to a
128  * serial console.
129  *
130  * It is default off, but you can enable it with either specifying
131  * "ftrace_dump_on_oops" in the kernel command line, or setting
132  * /proc/sys/kernel/ftrace_dump_on_oops
133  * Set 1 if you want to dump buffers of all CPUs
134  * Set 2 if you want to dump the buffer of the CPU that triggered oops
135  * Set instance name if you want to dump the specific trace instance
136  * Multiple instance dump is also supported, and instances are seperated
137  * by commas.
138  */
139 /* Set to string format zero to disable by default */
140 char ftrace_dump_on_oops[MAX_TRACER_SIZE] = "0";
141 
142 /* When set, tracing will stop when a WARN*() is hit */
143 int __disable_trace_on_warning;
144 
145 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
146 /* Map of enums to their values, for "eval_map" file */
147 struct trace_eval_map_head {
148 	struct module			*mod;
149 	unsigned long			length;
150 };
151 
152 union trace_eval_map_item;
153 
154 struct trace_eval_map_tail {
155 	/*
156 	 * "end" is first and points to NULL as it must be different
157 	 * than "mod" or "eval_string"
158 	 */
159 	union trace_eval_map_item	*next;
160 	const char			*end;	/* points to NULL */
161 };
162 
163 static DEFINE_MUTEX(trace_eval_mutex);
164 
165 /*
166  * The trace_eval_maps are saved in an array with two extra elements,
167  * one at the beginning, and one at the end. The beginning item contains
168  * the count of the saved maps (head.length), and the module they
169  * belong to if not built in (head.mod). The ending item contains a
170  * pointer to the next array of saved eval_map items.
171  */
172 union trace_eval_map_item {
173 	struct trace_eval_map		map;
174 	struct trace_eval_map_head	head;
175 	struct trace_eval_map_tail	tail;
176 };
177 
178 static union trace_eval_map_item *trace_eval_maps;
179 #endif /* CONFIG_TRACE_EVAL_MAP_FILE */
180 
181 int tracing_set_tracer(struct trace_array *tr, const char *buf);
182 static void ftrace_trace_userstack(struct trace_array *tr,
183 				   struct trace_buffer *buffer,
184 				   unsigned int trace_ctx);
185 
186 static char bootup_tracer_buf[MAX_TRACER_SIZE] __initdata;
187 static char *default_bootup_tracer;
188 
189 static bool allocate_snapshot;
190 static bool snapshot_at_boot;
191 
192 static char boot_instance_info[COMMAND_LINE_SIZE] __initdata;
193 static int boot_instance_index;
194 
195 static char boot_snapshot_info[COMMAND_LINE_SIZE] __initdata;
196 static int boot_snapshot_index;
197 
198 static int __init set_cmdline_ftrace(char *str)
199 {
200 	strscpy(bootup_tracer_buf, str, MAX_TRACER_SIZE);
201 	default_bootup_tracer = bootup_tracer_buf;
202 	/* We are using ftrace early, expand it */
203 	trace_set_ring_buffer_expanded(NULL);
204 	return 1;
205 }
206 __setup("ftrace=", set_cmdline_ftrace);
207 
208 int ftrace_dump_on_oops_enabled(void)
209 {
210 	if (!strcmp("0", ftrace_dump_on_oops))
211 		return 0;
212 	else
213 		return 1;
214 }
215 
216 static int __init set_ftrace_dump_on_oops(char *str)
217 {
218 	if (!*str) {
219 		strscpy(ftrace_dump_on_oops, "1", MAX_TRACER_SIZE);
220 		return 1;
221 	}
222 
223 	if (*str == ',') {
224 		strscpy(ftrace_dump_on_oops, "1", MAX_TRACER_SIZE);
225 		strscpy(ftrace_dump_on_oops + 1, str, MAX_TRACER_SIZE - 1);
226 		return 1;
227 	}
228 
229 	if (*str++ == '=') {
230 		strscpy(ftrace_dump_on_oops, str, MAX_TRACER_SIZE);
231 		return 1;
232 	}
233 
234 	return 0;
235 }
236 __setup("ftrace_dump_on_oops", set_ftrace_dump_on_oops);
237 
238 static int __init stop_trace_on_warning(char *str)
239 {
240 	if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
241 		__disable_trace_on_warning = 1;
242 	return 1;
243 }
244 __setup("traceoff_on_warning", stop_trace_on_warning);
245 
246 static int __init boot_alloc_snapshot(char *str)
247 {
248 	char *slot = boot_snapshot_info + boot_snapshot_index;
249 	int left = sizeof(boot_snapshot_info) - boot_snapshot_index;
250 	int ret;
251 
252 	if (str[0] == '=') {
253 		str++;
254 		if (strlen(str) >= left)
255 			return -1;
256 
257 		ret = snprintf(slot, left, "%s\t", str);
258 		boot_snapshot_index += ret;
259 	} else {
260 		allocate_snapshot = true;
261 		/* We also need the main ring buffer expanded */
262 		trace_set_ring_buffer_expanded(NULL);
263 	}
264 	return 1;
265 }
266 __setup("alloc_snapshot", boot_alloc_snapshot);
267 
268 
269 static int __init boot_snapshot(char *str)
270 {
271 	snapshot_at_boot = true;
272 	boot_alloc_snapshot(str);
273 	return 1;
274 }
275 __setup("ftrace_boot_snapshot", boot_snapshot);
276 
277 
278 static int __init boot_instance(char *str)
279 {
280 	char *slot = boot_instance_info + boot_instance_index;
281 	int left = sizeof(boot_instance_info) - boot_instance_index;
282 	int ret;
283 
284 	if (strlen(str) >= left)
285 		return -1;
286 
287 	ret = snprintf(slot, left, "%s\t", str);
288 	boot_instance_index += ret;
289 
290 	return 1;
291 }
292 __setup("trace_instance=", boot_instance);
293 
294 
295 static char trace_boot_options_buf[MAX_TRACER_SIZE] __initdata;
296 
297 static int __init set_trace_boot_options(char *str)
298 {
299 	strscpy(trace_boot_options_buf, str, MAX_TRACER_SIZE);
300 	return 1;
301 }
302 __setup("trace_options=", set_trace_boot_options);
303 
304 static char trace_boot_clock_buf[MAX_TRACER_SIZE] __initdata;
305 static char *trace_boot_clock __initdata;
306 
307 static int __init set_trace_boot_clock(char *str)
308 {
309 	strscpy(trace_boot_clock_buf, str, MAX_TRACER_SIZE);
310 	trace_boot_clock = trace_boot_clock_buf;
311 	return 1;
312 }
313 __setup("trace_clock=", set_trace_boot_clock);
314 
315 static int __init set_tracepoint_printk(char *str)
316 {
317 	/* Ignore the "tp_printk_stop_on_boot" param */
318 	if (*str == '_')
319 		return 0;
320 
321 	if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
322 		tracepoint_printk = 1;
323 	return 1;
324 }
325 __setup("tp_printk", set_tracepoint_printk);
326 
327 static int __init set_tracepoint_printk_stop(char *str)
328 {
329 	tracepoint_printk_stop_on_boot = true;
330 	return 1;
331 }
332 __setup("tp_printk_stop_on_boot", set_tracepoint_printk_stop);
333 
334 static int __init set_traceoff_after_boot(char *str)
335 {
336 	traceoff_after_boot = true;
337 	return 1;
338 }
339 __setup("traceoff_after_boot", set_traceoff_after_boot);
340 
341 unsigned long long ns2usecs(u64 nsec)
342 {
343 	nsec += 500;
344 	do_div(nsec, 1000);
345 	return nsec;
346 }
347 
348 static void
349 trace_process_export(struct trace_export *export,
350 	       struct ring_buffer_event *event, int flag)
351 {
352 	struct trace_entry *entry;
353 	unsigned int size = 0;
354 
355 	if (export->flags & flag) {
356 		entry = ring_buffer_event_data(event);
357 		size = ring_buffer_event_length(event);
358 		export->write(export, entry, size);
359 	}
360 }
361 
362 static DEFINE_MUTEX(ftrace_export_lock);
363 
364 static struct trace_export __rcu *ftrace_exports_list __read_mostly;
365 
366 static DEFINE_STATIC_KEY_FALSE(trace_function_exports_enabled);
367 static DEFINE_STATIC_KEY_FALSE(trace_event_exports_enabled);
368 static DEFINE_STATIC_KEY_FALSE(trace_marker_exports_enabled);
369 
370 static inline void ftrace_exports_enable(struct trace_export *export)
371 {
372 	if (export->flags & TRACE_EXPORT_FUNCTION)
373 		static_branch_inc(&trace_function_exports_enabled);
374 
375 	if (export->flags & TRACE_EXPORT_EVENT)
376 		static_branch_inc(&trace_event_exports_enabled);
377 
378 	if (export->flags & TRACE_EXPORT_MARKER)
379 		static_branch_inc(&trace_marker_exports_enabled);
380 }
381 
382 static inline void ftrace_exports_disable(struct trace_export *export)
383 {
384 	if (export->flags & TRACE_EXPORT_FUNCTION)
385 		static_branch_dec(&trace_function_exports_enabled);
386 
387 	if (export->flags & TRACE_EXPORT_EVENT)
388 		static_branch_dec(&trace_event_exports_enabled);
389 
390 	if (export->flags & TRACE_EXPORT_MARKER)
391 		static_branch_dec(&trace_marker_exports_enabled);
392 }
393 
394 static void ftrace_exports(struct ring_buffer_event *event, int flag)
395 {
396 	struct trace_export *export;
397 
398 	preempt_disable_notrace();
399 
400 	export = rcu_dereference_raw_check(ftrace_exports_list);
401 	while (export) {
402 		trace_process_export(export, event, flag);
403 		export = rcu_dereference_raw_check(export->next);
404 	}
405 
406 	preempt_enable_notrace();
407 }
408 
409 static inline void
410 add_trace_export(struct trace_export **list, struct trace_export *export)
411 {
412 	rcu_assign_pointer(export->next, *list);
413 	/*
414 	 * We are entering export into the list but another
415 	 * CPU might be walking that list. We need to make sure
416 	 * the export->next pointer is valid before another CPU sees
417 	 * the export pointer included into the list.
418 	 */
419 	rcu_assign_pointer(*list, export);
420 }
421 
422 static inline int
423 rm_trace_export(struct trace_export **list, struct trace_export *export)
424 {
425 	struct trace_export **p;
426 
427 	for (p = list; *p != NULL; p = &(*p)->next)
428 		if (*p == export)
429 			break;
430 
431 	if (*p != export)
432 		return -1;
433 
434 	rcu_assign_pointer(*p, (*p)->next);
435 
436 	return 0;
437 }
438 
439 static inline void
440 add_ftrace_export(struct trace_export **list, struct trace_export *export)
441 {
442 	ftrace_exports_enable(export);
443 
444 	add_trace_export(list, export);
445 }
446 
447 static inline int
448 rm_ftrace_export(struct trace_export **list, struct trace_export *export)
449 {
450 	int ret;
451 
452 	ret = rm_trace_export(list, export);
453 	ftrace_exports_disable(export);
454 
455 	return ret;
456 }
457 
458 int register_ftrace_export(struct trace_export *export)
459 {
460 	if (WARN_ON_ONCE(!export->write))
461 		return -1;
462 
463 	mutex_lock(&ftrace_export_lock);
464 
465 	add_ftrace_export(&ftrace_exports_list, export);
466 
467 	mutex_unlock(&ftrace_export_lock);
468 
469 	return 0;
470 }
471 EXPORT_SYMBOL_GPL(register_ftrace_export);
472 
473 int unregister_ftrace_export(struct trace_export *export)
474 {
475 	int ret;
476 
477 	mutex_lock(&ftrace_export_lock);
478 
479 	ret = rm_ftrace_export(&ftrace_exports_list, export);
480 
481 	mutex_unlock(&ftrace_export_lock);
482 
483 	return ret;
484 }
485 EXPORT_SYMBOL_GPL(unregister_ftrace_export);
486 
487 /* trace_flags holds trace_options default values */
488 #define TRACE_DEFAULT_FLAGS						\
489 	(FUNCTION_DEFAULT_FLAGS |					\
490 	 TRACE_ITER_PRINT_PARENT | TRACE_ITER_PRINTK |			\
491 	 TRACE_ITER_ANNOTATE | TRACE_ITER_CONTEXT_INFO |		\
492 	 TRACE_ITER_RECORD_CMD | TRACE_ITER_OVERWRITE |			\
493 	 TRACE_ITER_IRQ_INFO | TRACE_ITER_MARKERS |			\
494 	 TRACE_ITER_HASH_PTR | TRACE_ITER_TRACE_PRINTK)
495 
496 /* trace_options that are only supported by global_trace */
497 #define TOP_LEVEL_TRACE_FLAGS (TRACE_ITER_PRINTK |			\
498 	       TRACE_ITER_PRINTK_MSGONLY | TRACE_ITER_RECORD_CMD)
499 
500 /* trace_flags that are default zero for instances */
501 #define ZEROED_TRACE_FLAGS \
502 	(TRACE_ITER_EVENT_FORK | TRACE_ITER_FUNC_FORK | TRACE_ITER_TRACE_PRINTK)
503 
504 /*
505  * The global_trace is the descriptor that holds the top-level tracing
506  * buffers for the live tracing.
507  */
508 static struct trace_array global_trace = {
509 	.trace_flags = TRACE_DEFAULT_FLAGS,
510 };
511 
512 static struct trace_array *printk_trace = &global_trace;
513 
514 static __always_inline bool printk_binsafe(struct trace_array *tr)
515 {
516 	/*
517 	 * The binary format of traceprintk can cause a crash if used
518 	 * by a buffer from another boot. Force the use of the
519 	 * non binary version of trace_printk if the trace_printk
520 	 * buffer is a boot mapped ring buffer.
521 	 */
522 	return !(tr->flags & TRACE_ARRAY_FL_BOOT);
523 }
524 
525 static void update_printk_trace(struct trace_array *tr)
526 {
527 	if (printk_trace == tr)
528 		return;
529 
530 	printk_trace->trace_flags &= ~TRACE_ITER_TRACE_PRINTK;
531 	printk_trace = tr;
532 	tr->trace_flags |= TRACE_ITER_TRACE_PRINTK;
533 }
534 
535 void trace_set_ring_buffer_expanded(struct trace_array *tr)
536 {
537 	if (!tr)
538 		tr = &global_trace;
539 	tr->ring_buffer_expanded = true;
540 }
541 
542 LIST_HEAD(ftrace_trace_arrays);
543 
544 int trace_array_get(struct trace_array *this_tr)
545 {
546 	struct trace_array *tr;
547 
548 	guard(mutex)(&trace_types_lock);
549 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
550 		if (tr == this_tr) {
551 			tr->ref++;
552 			return 0;
553 		}
554 	}
555 
556 	return -ENODEV;
557 }
558 
559 static void __trace_array_put(struct trace_array *this_tr)
560 {
561 	WARN_ON(!this_tr->ref);
562 	this_tr->ref--;
563 }
564 
565 /**
566  * trace_array_put - Decrement the reference counter for this trace array.
567  * @this_tr : pointer to the trace array
568  *
569  * NOTE: Use this when we no longer need the trace array returned by
570  * trace_array_get_by_name(). This ensures the trace array can be later
571  * destroyed.
572  *
573  */
574 void trace_array_put(struct trace_array *this_tr)
575 {
576 	if (!this_tr)
577 		return;
578 
579 	mutex_lock(&trace_types_lock);
580 	__trace_array_put(this_tr);
581 	mutex_unlock(&trace_types_lock);
582 }
583 EXPORT_SYMBOL_GPL(trace_array_put);
584 
585 int tracing_check_open_get_tr(struct trace_array *tr)
586 {
587 	int ret;
588 
589 	ret = security_locked_down(LOCKDOWN_TRACEFS);
590 	if (ret)
591 		return ret;
592 
593 	if (tracing_disabled)
594 		return -ENODEV;
595 
596 	if (tr && trace_array_get(tr) < 0)
597 		return -ENODEV;
598 
599 	return 0;
600 }
601 
602 /**
603  * trace_find_filtered_pid - check if a pid exists in a filtered_pid list
604  * @filtered_pids: The list of pids to check
605  * @search_pid: The PID to find in @filtered_pids
606  *
607  * Returns true if @search_pid is found in @filtered_pids, and false otherwise.
608  */
609 bool
610 trace_find_filtered_pid(struct trace_pid_list *filtered_pids, pid_t search_pid)
611 {
612 	return trace_pid_list_is_set(filtered_pids, search_pid);
613 }
614 
615 /**
616  * trace_ignore_this_task - should a task be ignored for tracing
617  * @filtered_pids: The list of pids to check
618  * @filtered_no_pids: The list of pids not to be traced
619  * @task: The task that should be ignored if not filtered
620  *
621  * Checks if @task should be traced or not from @filtered_pids.
622  * Returns true if @task should *NOT* be traced.
623  * Returns false if @task should be traced.
624  */
625 bool
626 trace_ignore_this_task(struct trace_pid_list *filtered_pids,
627 		       struct trace_pid_list *filtered_no_pids,
628 		       struct task_struct *task)
629 {
630 	/*
631 	 * If filtered_no_pids is not empty, and the task's pid is listed
632 	 * in filtered_no_pids, then return true.
633 	 * Otherwise, if filtered_pids is empty, that means we can
634 	 * trace all tasks. If it has content, then only trace pids
635 	 * within filtered_pids.
636 	 */
637 
638 	return (filtered_pids &&
639 		!trace_find_filtered_pid(filtered_pids, task->pid)) ||
640 		(filtered_no_pids &&
641 		 trace_find_filtered_pid(filtered_no_pids, task->pid));
642 }
643 
644 /**
645  * trace_filter_add_remove_task - Add or remove a task from a pid_list
646  * @pid_list: The list to modify
647  * @self: The current task for fork or NULL for exit
648  * @task: The task to add or remove
649  *
650  * If adding a task, if @self is defined, the task is only added if @self
651  * is also included in @pid_list. This happens on fork and tasks should
652  * only be added when the parent is listed. If @self is NULL, then the
653  * @task pid will be removed from the list, which would happen on exit
654  * of a task.
655  */
656 void trace_filter_add_remove_task(struct trace_pid_list *pid_list,
657 				  struct task_struct *self,
658 				  struct task_struct *task)
659 {
660 	if (!pid_list)
661 		return;
662 
663 	/* For forks, we only add if the forking task is listed */
664 	if (self) {
665 		if (!trace_find_filtered_pid(pid_list, self->pid))
666 			return;
667 	}
668 
669 	/* "self" is set for forks, and NULL for exits */
670 	if (self)
671 		trace_pid_list_set(pid_list, task->pid);
672 	else
673 		trace_pid_list_clear(pid_list, task->pid);
674 }
675 
676 /**
677  * trace_pid_next - Used for seq_file to get to the next pid of a pid_list
678  * @pid_list: The pid list to show
679  * @v: The last pid that was shown (+1 the actual pid to let zero be displayed)
680  * @pos: The position of the file
681  *
682  * This is used by the seq_file "next" operation to iterate the pids
683  * listed in a trace_pid_list structure.
684  *
685  * Returns the pid+1 as we want to display pid of zero, but NULL would
686  * stop the iteration.
687  */
688 void *trace_pid_next(struct trace_pid_list *pid_list, void *v, loff_t *pos)
689 {
690 	long pid = (unsigned long)v;
691 	unsigned int next;
692 
693 	(*pos)++;
694 
695 	/* pid already is +1 of the actual previous bit */
696 	if (trace_pid_list_next(pid_list, pid, &next) < 0)
697 		return NULL;
698 
699 	pid = next;
700 
701 	/* Return pid + 1 to allow zero to be represented */
702 	return (void *)(pid + 1);
703 }
704 
705 /**
706  * trace_pid_start - Used for seq_file to start reading pid lists
707  * @pid_list: The pid list to show
708  * @pos: The position of the file
709  *
710  * This is used by seq_file "start" operation to start the iteration
711  * of listing pids.
712  *
713  * Returns the pid+1 as we want to display pid of zero, but NULL would
714  * stop the iteration.
715  */
716 void *trace_pid_start(struct trace_pid_list *pid_list, loff_t *pos)
717 {
718 	unsigned long pid;
719 	unsigned int first;
720 	loff_t l = 0;
721 
722 	if (trace_pid_list_first(pid_list, &first) < 0)
723 		return NULL;
724 
725 	pid = first;
726 
727 	/* Return pid + 1 so that zero can be the exit value */
728 	for (pid++; pid && l < *pos;
729 	     pid = (unsigned long)trace_pid_next(pid_list, (void *)pid, &l))
730 		;
731 	return (void *)pid;
732 }
733 
734 /**
735  * trace_pid_show - show the current pid in seq_file processing
736  * @m: The seq_file structure to write into
737  * @v: A void pointer of the pid (+1) value to display
738  *
739  * Can be directly used by seq_file operations to display the current
740  * pid value.
741  */
742 int trace_pid_show(struct seq_file *m, void *v)
743 {
744 	unsigned long pid = (unsigned long)v - 1;
745 
746 	seq_printf(m, "%lu\n", pid);
747 	return 0;
748 }
749 
750 /* 128 should be much more than enough */
751 #define PID_BUF_SIZE		127
752 
753 int trace_pid_write(struct trace_pid_list *filtered_pids,
754 		    struct trace_pid_list **new_pid_list,
755 		    const char __user *ubuf, size_t cnt)
756 {
757 	struct trace_pid_list *pid_list;
758 	struct trace_parser parser;
759 	unsigned long val;
760 	int nr_pids = 0;
761 	ssize_t read = 0;
762 	ssize_t ret;
763 	loff_t pos;
764 	pid_t pid;
765 
766 	if (trace_parser_get_init(&parser, PID_BUF_SIZE + 1))
767 		return -ENOMEM;
768 
769 	/*
770 	 * Always recreate a new array. The write is an all or nothing
771 	 * operation. Always create a new array when adding new pids by
772 	 * the user. If the operation fails, then the current list is
773 	 * not modified.
774 	 */
775 	pid_list = trace_pid_list_alloc();
776 	if (!pid_list) {
777 		trace_parser_put(&parser);
778 		return -ENOMEM;
779 	}
780 
781 	if (filtered_pids) {
782 		/* copy the current bits to the new max */
783 		ret = trace_pid_list_first(filtered_pids, &pid);
784 		while (!ret) {
785 			trace_pid_list_set(pid_list, pid);
786 			ret = trace_pid_list_next(filtered_pids, pid + 1, &pid);
787 			nr_pids++;
788 		}
789 	}
790 
791 	ret = 0;
792 	while (cnt > 0) {
793 
794 		pos = 0;
795 
796 		ret = trace_get_user(&parser, ubuf, cnt, &pos);
797 		if (ret < 0)
798 			break;
799 
800 		read += ret;
801 		ubuf += ret;
802 		cnt -= ret;
803 
804 		if (!trace_parser_loaded(&parser))
805 			break;
806 
807 		ret = -EINVAL;
808 		if (kstrtoul(parser.buffer, 0, &val))
809 			break;
810 
811 		pid = (pid_t)val;
812 
813 		if (trace_pid_list_set(pid_list, pid) < 0) {
814 			ret = -1;
815 			break;
816 		}
817 		nr_pids++;
818 
819 		trace_parser_clear(&parser);
820 		ret = 0;
821 	}
822 	trace_parser_put(&parser);
823 
824 	if (ret < 0) {
825 		trace_pid_list_free(pid_list);
826 		return ret;
827 	}
828 
829 	if (!nr_pids) {
830 		/* Cleared the list of pids */
831 		trace_pid_list_free(pid_list);
832 		pid_list = NULL;
833 	}
834 
835 	*new_pid_list = pid_list;
836 
837 	return read;
838 }
839 
840 static u64 buffer_ftrace_now(struct array_buffer *buf, int cpu)
841 {
842 	u64 ts;
843 
844 	/* Early boot up does not have a buffer yet */
845 	if (!buf->buffer)
846 		return trace_clock_local();
847 
848 	ts = ring_buffer_time_stamp(buf->buffer);
849 	ring_buffer_normalize_time_stamp(buf->buffer, cpu, &ts);
850 
851 	return ts;
852 }
853 
854 u64 ftrace_now(int cpu)
855 {
856 	return buffer_ftrace_now(&global_trace.array_buffer, cpu);
857 }
858 
859 /**
860  * tracing_is_enabled - Show if global_trace has been enabled
861  *
862  * Shows if the global trace has been enabled or not. It uses the
863  * mirror flag "buffer_disabled" to be used in fast paths such as for
864  * the irqsoff tracer. But it may be inaccurate due to races. If you
865  * need to know the accurate state, use tracing_is_on() which is a little
866  * slower, but accurate.
867  */
868 int tracing_is_enabled(void)
869 {
870 	/*
871 	 * For quick access (irqsoff uses this in fast path), just
872 	 * return the mirror variable of the state of the ring buffer.
873 	 * It's a little racy, but we don't really care.
874 	 */
875 	smp_rmb();
876 	return !global_trace.buffer_disabled;
877 }
878 
879 /*
880  * trace_buf_size is the size in bytes that is allocated
881  * for a buffer. Note, the number of bytes is always rounded
882  * to page size.
883  *
884  * This number is purposely set to a low number of 16384.
885  * If the dump on oops happens, it will be much appreciated
886  * to not have to wait for all that output. Anyway this can be
887  * boot time and run time configurable.
888  */
889 #define TRACE_BUF_SIZE_DEFAULT	1441792UL /* 16384 * 88 (sizeof(entry)) */
890 
891 static unsigned long		trace_buf_size = TRACE_BUF_SIZE_DEFAULT;
892 
893 /* trace_types holds a link list of available tracers. */
894 static struct tracer		*trace_types __read_mostly;
895 
896 /*
897  * trace_types_lock is used to protect the trace_types list.
898  */
899 DEFINE_MUTEX(trace_types_lock);
900 
901 /*
902  * serialize the access of the ring buffer
903  *
904  * ring buffer serializes readers, but it is low level protection.
905  * The validity of the events (which returns by ring_buffer_peek() ..etc)
906  * are not protected by ring buffer.
907  *
908  * The content of events may become garbage if we allow other process consumes
909  * these events concurrently:
910  *   A) the page of the consumed events may become a normal page
911  *      (not reader page) in ring buffer, and this page will be rewritten
912  *      by events producer.
913  *   B) The page of the consumed events may become a page for splice_read,
914  *      and this page will be returned to system.
915  *
916  * These primitives allow multi process access to different cpu ring buffer
917  * concurrently.
918  *
919  * These primitives don't distinguish read-only and read-consume access.
920  * Multi read-only access are also serialized.
921  */
922 
923 #ifdef CONFIG_SMP
924 static DECLARE_RWSEM(all_cpu_access_lock);
925 static DEFINE_PER_CPU(struct mutex, cpu_access_lock);
926 
927 static inline void trace_access_lock(int cpu)
928 {
929 	if (cpu == RING_BUFFER_ALL_CPUS) {
930 		/* gain it for accessing the whole ring buffer. */
931 		down_write(&all_cpu_access_lock);
932 	} else {
933 		/* gain it for accessing a cpu ring buffer. */
934 
935 		/* Firstly block other trace_access_lock(RING_BUFFER_ALL_CPUS). */
936 		down_read(&all_cpu_access_lock);
937 
938 		/* Secondly block other access to this @cpu ring buffer. */
939 		mutex_lock(&per_cpu(cpu_access_lock, cpu));
940 	}
941 }
942 
943 static inline void trace_access_unlock(int cpu)
944 {
945 	if (cpu == RING_BUFFER_ALL_CPUS) {
946 		up_write(&all_cpu_access_lock);
947 	} else {
948 		mutex_unlock(&per_cpu(cpu_access_lock, cpu));
949 		up_read(&all_cpu_access_lock);
950 	}
951 }
952 
953 static inline void trace_access_lock_init(void)
954 {
955 	int cpu;
956 
957 	for_each_possible_cpu(cpu)
958 		mutex_init(&per_cpu(cpu_access_lock, cpu));
959 }
960 
961 #else
962 
963 static DEFINE_MUTEX(access_lock);
964 
965 static inline void trace_access_lock(int cpu)
966 {
967 	(void)cpu;
968 	mutex_lock(&access_lock);
969 }
970 
971 static inline void trace_access_unlock(int cpu)
972 {
973 	(void)cpu;
974 	mutex_unlock(&access_lock);
975 }
976 
977 static inline void trace_access_lock_init(void)
978 {
979 }
980 
981 #endif
982 
983 #ifdef CONFIG_STACKTRACE
984 static void __ftrace_trace_stack(struct trace_array *tr,
985 				 struct trace_buffer *buffer,
986 				 unsigned int trace_ctx,
987 				 int skip, struct pt_regs *regs);
988 static inline void ftrace_trace_stack(struct trace_array *tr,
989 				      struct trace_buffer *buffer,
990 				      unsigned int trace_ctx,
991 				      int skip, struct pt_regs *regs);
992 
993 #else
994 static inline void __ftrace_trace_stack(struct trace_array *tr,
995 					struct trace_buffer *buffer,
996 					unsigned int trace_ctx,
997 					int skip, struct pt_regs *regs)
998 {
999 }
1000 static inline void ftrace_trace_stack(struct trace_array *tr,
1001 				      struct trace_buffer *buffer,
1002 				      unsigned long trace_ctx,
1003 				      int skip, struct pt_regs *regs)
1004 {
1005 }
1006 
1007 #endif
1008 
1009 static __always_inline void
1010 trace_event_setup(struct ring_buffer_event *event,
1011 		  int type, unsigned int trace_ctx)
1012 {
1013 	struct trace_entry *ent = ring_buffer_event_data(event);
1014 
1015 	tracing_generic_entry_update(ent, type, trace_ctx);
1016 }
1017 
1018 static __always_inline struct ring_buffer_event *
1019 __trace_buffer_lock_reserve(struct trace_buffer *buffer,
1020 			  int type,
1021 			  unsigned long len,
1022 			  unsigned int trace_ctx)
1023 {
1024 	struct ring_buffer_event *event;
1025 
1026 	event = ring_buffer_lock_reserve(buffer, len);
1027 	if (event != NULL)
1028 		trace_event_setup(event, type, trace_ctx);
1029 
1030 	return event;
1031 }
1032 
1033 void tracer_tracing_on(struct trace_array *tr)
1034 {
1035 	if (tr->array_buffer.buffer)
1036 		ring_buffer_record_on(tr->array_buffer.buffer);
1037 	/*
1038 	 * This flag is looked at when buffers haven't been allocated
1039 	 * yet, or by some tracers (like irqsoff), that just want to
1040 	 * know if the ring buffer has been disabled, but it can handle
1041 	 * races of where it gets disabled but we still do a record.
1042 	 * As the check is in the fast path of the tracers, it is more
1043 	 * important to be fast than accurate.
1044 	 */
1045 	tr->buffer_disabled = 0;
1046 	/* Make the flag seen by readers */
1047 	smp_wmb();
1048 }
1049 
1050 /**
1051  * tracing_on - enable tracing buffers
1052  *
1053  * This function enables tracing buffers that may have been
1054  * disabled with tracing_off.
1055  */
1056 void tracing_on(void)
1057 {
1058 	tracer_tracing_on(&global_trace);
1059 }
1060 EXPORT_SYMBOL_GPL(tracing_on);
1061 
1062 
1063 static __always_inline void
1064 __buffer_unlock_commit(struct trace_buffer *buffer, struct ring_buffer_event *event)
1065 {
1066 	__this_cpu_write(trace_taskinfo_save, true);
1067 
1068 	/* If this is the temp buffer, we need to commit fully */
1069 	if (this_cpu_read(trace_buffered_event) == event) {
1070 		/* Length is in event->array[0] */
1071 		ring_buffer_write(buffer, event->array[0], &event->array[1]);
1072 		/* Release the temp buffer */
1073 		this_cpu_dec(trace_buffered_event_cnt);
1074 		/* ring_buffer_unlock_commit() enables preemption */
1075 		preempt_enable_notrace();
1076 	} else
1077 		ring_buffer_unlock_commit(buffer);
1078 }
1079 
1080 int __trace_array_puts(struct trace_array *tr, unsigned long ip,
1081 		       const char *str, int size)
1082 {
1083 	struct ring_buffer_event *event;
1084 	struct trace_buffer *buffer;
1085 	struct print_entry *entry;
1086 	unsigned int trace_ctx;
1087 	int alloc;
1088 
1089 	if (!(tr->trace_flags & TRACE_ITER_PRINTK))
1090 		return 0;
1091 
1092 	if (unlikely(tracing_selftest_running && tr == &global_trace))
1093 		return 0;
1094 
1095 	if (unlikely(tracing_disabled))
1096 		return 0;
1097 
1098 	alloc = sizeof(*entry) + size + 2; /* possible \n added */
1099 
1100 	trace_ctx = tracing_gen_ctx();
1101 	buffer = tr->array_buffer.buffer;
1102 	ring_buffer_nest_start(buffer);
1103 	event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, alloc,
1104 					    trace_ctx);
1105 	if (!event) {
1106 		size = 0;
1107 		goto out;
1108 	}
1109 
1110 	entry = ring_buffer_event_data(event);
1111 	entry->ip = ip;
1112 
1113 	memcpy(&entry->buf, str, size);
1114 
1115 	/* Add a newline if necessary */
1116 	if (entry->buf[size - 1] != '\n') {
1117 		entry->buf[size] = '\n';
1118 		entry->buf[size + 1] = '\0';
1119 	} else
1120 		entry->buf[size] = '\0';
1121 
1122 	__buffer_unlock_commit(buffer, event);
1123 	ftrace_trace_stack(tr, buffer, trace_ctx, 4, NULL);
1124  out:
1125 	ring_buffer_nest_end(buffer);
1126 	return size;
1127 }
1128 EXPORT_SYMBOL_GPL(__trace_array_puts);
1129 
1130 /**
1131  * __trace_puts - write a constant string into the trace buffer.
1132  * @ip:	   The address of the caller
1133  * @str:   The constant string to write
1134  * @size:  The size of the string.
1135  */
1136 int __trace_puts(unsigned long ip, const char *str, int size)
1137 {
1138 	return __trace_array_puts(printk_trace, ip, str, size);
1139 }
1140 EXPORT_SYMBOL_GPL(__trace_puts);
1141 
1142 /**
1143  * __trace_bputs - write the pointer to a constant string into trace buffer
1144  * @ip:	   The address of the caller
1145  * @str:   The constant string to write to the buffer to
1146  */
1147 int __trace_bputs(unsigned long ip, const char *str)
1148 {
1149 	struct trace_array *tr = READ_ONCE(printk_trace);
1150 	struct ring_buffer_event *event;
1151 	struct trace_buffer *buffer;
1152 	struct bputs_entry *entry;
1153 	unsigned int trace_ctx;
1154 	int size = sizeof(struct bputs_entry);
1155 	int ret = 0;
1156 
1157 	if (!printk_binsafe(tr))
1158 		return __trace_puts(ip, str, strlen(str));
1159 
1160 	if (!(tr->trace_flags & TRACE_ITER_PRINTK))
1161 		return 0;
1162 
1163 	if (unlikely(tracing_selftest_running || tracing_disabled))
1164 		return 0;
1165 
1166 	trace_ctx = tracing_gen_ctx();
1167 	buffer = tr->array_buffer.buffer;
1168 
1169 	ring_buffer_nest_start(buffer);
1170 	event = __trace_buffer_lock_reserve(buffer, TRACE_BPUTS, size,
1171 					    trace_ctx);
1172 	if (!event)
1173 		goto out;
1174 
1175 	entry = ring_buffer_event_data(event);
1176 	entry->ip			= ip;
1177 	entry->str			= str;
1178 
1179 	__buffer_unlock_commit(buffer, event);
1180 	ftrace_trace_stack(tr, buffer, trace_ctx, 4, NULL);
1181 
1182 	ret = 1;
1183  out:
1184 	ring_buffer_nest_end(buffer);
1185 	return ret;
1186 }
1187 EXPORT_SYMBOL_GPL(__trace_bputs);
1188 
1189 #ifdef CONFIG_TRACER_SNAPSHOT
1190 static void tracing_snapshot_instance_cond(struct trace_array *tr,
1191 					   void *cond_data)
1192 {
1193 	struct tracer *tracer = tr->current_trace;
1194 	unsigned long flags;
1195 
1196 	if (in_nmi()) {
1197 		trace_array_puts(tr, "*** SNAPSHOT CALLED FROM NMI CONTEXT ***\n");
1198 		trace_array_puts(tr, "*** snapshot is being ignored        ***\n");
1199 		return;
1200 	}
1201 
1202 	if (!tr->allocated_snapshot) {
1203 		trace_array_puts(tr, "*** SNAPSHOT NOT ALLOCATED ***\n");
1204 		trace_array_puts(tr, "*** stopping trace here!   ***\n");
1205 		tracer_tracing_off(tr);
1206 		return;
1207 	}
1208 
1209 	/* Note, snapshot can not be used when the tracer uses it */
1210 	if (tracer->use_max_tr) {
1211 		trace_array_puts(tr, "*** LATENCY TRACER ACTIVE ***\n");
1212 		trace_array_puts(tr, "*** Can not use snapshot (sorry) ***\n");
1213 		return;
1214 	}
1215 
1216 	if (tr->mapped) {
1217 		trace_array_puts(tr, "*** BUFFER MEMORY MAPPED ***\n");
1218 		trace_array_puts(tr, "*** Can not use snapshot (sorry) ***\n");
1219 		return;
1220 	}
1221 
1222 	local_irq_save(flags);
1223 	update_max_tr(tr, current, smp_processor_id(), cond_data);
1224 	local_irq_restore(flags);
1225 }
1226 
1227 void tracing_snapshot_instance(struct trace_array *tr)
1228 {
1229 	tracing_snapshot_instance_cond(tr, NULL);
1230 }
1231 
1232 /**
1233  * tracing_snapshot - take a snapshot of the current buffer.
1234  *
1235  * This causes a swap between the snapshot buffer and the current live
1236  * tracing buffer. You can use this to take snapshots of the live
1237  * trace when some condition is triggered, but continue to trace.
1238  *
1239  * Note, make sure to allocate the snapshot with either
1240  * a tracing_snapshot_alloc(), or by doing it manually
1241  * with: echo 1 > /sys/kernel/tracing/snapshot
1242  *
1243  * If the snapshot buffer is not allocated, it will stop tracing.
1244  * Basically making a permanent snapshot.
1245  */
1246 void tracing_snapshot(void)
1247 {
1248 	struct trace_array *tr = &global_trace;
1249 
1250 	tracing_snapshot_instance(tr);
1251 }
1252 EXPORT_SYMBOL_GPL(tracing_snapshot);
1253 
1254 /**
1255  * tracing_snapshot_cond - conditionally take a snapshot of the current buffer.
1256  * @tr:		The tracing instance to snapshot
1257  * @cond_data:	The data to be tested conditionally, and possibly saved
1258  *
1259  * This is the same as tracing_snapshot() except that the snapshot is
1260  * conditional - the snapshot will only happen if the
1261  * cond_snapshot.update() implementation receiving the cond_data
1262  * returns true, which means that the trace array's cond_snapshot
1263  * update() operation used the cond_data to determine whether the
1264  * snapshot should be taken, and if it was, presumably saved it along
1265  * with the snapshot.
1266  */
1267 void tracing_snapshot_cond(struct trace_array *tr, void *cond_data)
1268 {
1269 	tracing_snapshot_instance_cond(tr, cond_data);
1270 }
1271 EXPORT_SYMBOL_GPL(tracing_snapshot_cond);
1272 
1273 /**
1274  * tracing_cond_snapshot_data - get the user data associated with a snapshot
1275  * @tr:		The tracing instance
1276  *
1277  * When the user enables a conditional snapshot using
1278  * tracing_snapshot_cond_enable(), the user-defined cond_data is saved
1279  * with the snapshot.  This accessor is used to retrieve it.
1280  *
1281  * Should not be called from cond_snapshot.update(), since it takes
1282  * the tr->max_lock lock, which the code calling
1283  * cond_snapshot.update() has already done.
1284  *
1285  * Returns the cond_data associated with the trace array's snapshot.
1286  */
1287 void *tracing_cond_snapshot_data(struct trace_array *tr)
1288 {
1289 	void *cond_data = NULL;
1290 
1291 	local_irq_disable();
1292 	arch_spin_lock(&tr->max_lock);
1293 
1294 	if (tr->cond_snapshot)
1295 		cond_data = tr->cond_snapshot->cond_data;
1296 
1297 	arch_spin_unlock(&tr->max_lock);
1298 	local_irq_enable();
1299 
1300 	return cond_data;
1301 }
1302 EXPORT_SYMBOL_GPL(tracing_cond_snapshot_data);
1303 
1304 static int resize_buffer_duplicate_size(struct array_buffer *trace_buf,
1305 					struct array_buffer *size_buf, int cpu_id);
1306 static void set_buffer_entries(struct array_buffer *buf, unsigned long val);
1307 
1308 int tracing_alloc_snapshot_instance(struct trace_array *tr)
1309 {
1310 	int order;
1311 	int ret;
1312 
1313 	if (!tr->allocated_snapshot) {
1314 
1315 		/* Make the snapshot buffer have the same order as main buffer */
1316 		order = ring_buffer_subbuf_order_get(tr->array_buffer.buffer);
1317 		ret = ring_buffer_subbuf_order_set(tr->max_buffer.buffer, order);
1318 		if (ret < 0)
1319 			return ret;
1320 
1321 		/* allocate spare buffer */
1322 		ret = resize_buffer_duplicate_size(&tr->max_buffer,
1323 				   &tr->array_buffer, RING_BUFFER_ALL_CPUS);
1324 		if (ret < 0)
1325 			return ret;
1326 
1327 		tr->allocated_snapshot = true;
1328 	}
1329 
1330 	return 0;
1331 }
1332 
1333 static void free_snapshot(struct trace_array *tr)
1334 {
1335 	/*
1336 	 * We don't free the ring buffer. instead, resize it because
1337 	 * The max_tr ring buffer has some state (e.g. ring->clock) and
1338 	 * we want preserve it.
1339 	 */
1340 	ring_buffer_subbuf_order_set(tr->max_buffer.buffer, 0);
1341 	ring_buffer_resize(tr->max_buffer.buffer, 1, RING_BUFFER_ALL_CPUS);
1342 	set_buffer_entries(&tr->max_buffer, 1);
1343 	tracing_reset_online_cpus(&tr->max_buffer);
1344 	tr->allocated_snapshot = false;
1345 }
1346 
1347 static int tracing_arm_snapshot_locked(struct trace_array *tr)
1348 {
1349 	int ret;
1350 
1351 	lockdep_assert_held(&trace_types_lock);
1352 
1353 	spin_lock(&tr->snapshot_trigger_lock);
1354 	if (tr->snapshot == UINT_MAX || tr->mapped) {
1355 		spin_unlock(&tr->snapshot_trigger_lock);
1356 		return -EBUSY;
1357 	}
1358 
1359 	tr->snapshot++;
1360 	spin_unlock(&tr->snapshot_trigger_lock);
1361 
1362 	ret = tracing_alloc_snapshot_instance(tr);
1363 	if (ret) {
1364 		spin_lock(&tr->snapshot_trigger_lock);
1365 		tr->snapshot--;
1366 		spin_unlock(&tr->snapshot_trigger_lock);
1367 	}
1368 
1369 	return ret;
1370 }
1371 
1372 int tracing_arm_snapshot(struct trace_array *tr)
1373 {
1374 	int ret;
1375 
1376 	mutex_lock(&trace_types_lock);
1377 	ret = tracing_arm_snapshot_locked(tr);
1378 	mutex_unlock(&trace_types_lock);
1379 
1380 	return ret;
1381 }
1382 
1383 void tracing_disarm_snapshot(struct trace_array *tr)
1384 {
1385 	spin_lock(&tr->snapshot_trigger_lock);
1386 	if (!WARN_ON(!tr->snapshot))
1387 		tr->snapshot--;
1388 	spin_unlock(&tr->snapshot_trigger_lock);
1389 }
1390 
1391 /**
1392  * tracing_alloc_snapshot - allocate snapshot buffer.
1393  *
1394  * This only allocates the snapshot buffer if it isn't already
1395  * allocated - it doesn't also take a snapshot.
1396  *
1397  * This is meant to be used in cases where the snapshot buffer needs
1398  * to be set up for events that can't sleep but need to be able to
1399  * trigger a snapshot.
1400  */
1401 int tracing_alloc_snapshot(void)
1402 {
1403 	struct trace_array *tr = &global_trace;
1404 	int ret;
1405 
1406 	ret = tracing_alloc_snapshot_instance(tr);
1407 	WARN_ON(ret < 0);
1408 
1409 	return ret;
1410 }
1411 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
1412 
1413 /**
1414  * tracing_snapshot_alloc - allocate and take a snapshot of the current buffer.
1415  *
1416  * This is similar to tracing_snapshot(), but it will allocate the
1417  * snapshot buffer if it isn't already allocated. Use this only
1418  * where it is safe to sleep, as the allocation may sleep.
1419  *
1420  * This causes a swap between the snapshot buffer and the current live
1421  * tracing buffer. You can use this to take snapshots of the live
1422  * trace when some condition is triggered, but continue to trace.
1423  */
1424 void tracing_snapshot_alloc(void)
1425 {
1426 	int ret;
1427 
1428 	ret = tracing_alloc_snapshot();
1429 	if (ret < 0)
1430 		return;
1431 
1432 	tracing_snapshot();
1433 }
1434 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
1435 
1436 /**
1437  * tracing_snapshot_cond_enable - enable conditional snapshot for an instance
1438  * @tr:		The tracing instance
1439  * @cond_data:	User data to associate with the snapshot
1440  * @update:	Implementation of the cond_snapshot update function
1441  *
1442  * Check whether the conditional snapshot for the given instance has
1443  * already been enabled, or if the current tracer is already using a
1444  * snapshot; if so, return -EBUSY, else create a cond_snapshot and
1445  * save the cond_data and update function inside.
1446  *
1447  * Returns 0 if successful, error otherwise.
1448  */
1449 int tracing_snapshot_cond_enable(struct trace_array *tr, void *cond_data,
1450 				 cond_update_fn_t update)
1451 {
1452 	struct cond_snapshot *cond_snapshot __free(kfree) =
1453 		kzalloc(sizeof(*cond_snapshot), GFP_KERNEL);
1454 	int ret;
1455 
1456 	if (!cond_snapshot)
1457 		return -ENOMEM;
1458 
1459 	cond_snapshot->cond_data = cond_data;
1460 	cond_snapshot->update = update;
1461 
1462 	guard(mutex)(&trace_types_lock);
1463 
1464 	if (tr->current_trace->use_max_tr)
1465 		return -EBUSY;
1466 
1467 	/*
1468 	 * The cond_snapshot can only change to NULL without the
1469 	 * trace_types_lock. We don't care if we race with it going
1470 	 * to NULL, but we want to make sure that it's not set to
1471 	 * something other than NULL when we get here, which we can
1472 	 * do safely with only holding the trace_types_lock and not
1473 	 * having to take the max_lock.
1474 	 */
1475 	if (tr->cond_snapshot)
1476 		return -EBUSY;
1477 
1478 	ret = tracing_arm_snapshot_locked(tr);
1479 	if (ret)
1480 		return ret;
1481 
1482 	local_irq_disable();
1483 	arch_spin_lock(&tr->max_lock);
1484 	tr->cond_snapshot = no_free_ptr(cond_snapshot);
1485 	arch_spin_unlock(&tr->max_lock);
1486 	local_irq_enable();
1487 
1488 	return 0;
1489 }
1490 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_enable);
1491 
1492 /**
1493  * tracing_snapshot_cond_disable - disable conditional snapshot for an instance
1494  * @tr:		The tracing instance
1495  *
1496  * Check whether the conditional snapshot for the given instance is
1497  * enabled; if so, free the cond_snapshot associated with it,
1498  * otherwise return -EINVAL.
1499  *
1500  * Returns 0 if successful, error otherwise.
1501  */
1502 int tracing_snapshot_cond_disable(struct trace_array *tr)
1503 {
1504 	int ret = 0;
1505 
1506 	local_irq_disable();
1507 	arch_spin_lock(&tr->max_lock);
1508 
1509 	if (!tr->cond_snapshot)
1510 		ret = -EINVAL;
1511 	else {
1512 		kfree(tr->cond_snapshot);
1513 		tr->cond_snapshot = NULL;
1514 	}
1515 
1516 	arch_spin_unlock(&tr->max_lock);
1517 	local_irq_enable();
1518 
1519 	tracing_disarm_snapshot(tr);
1520 
1521 	return ret;
1522 }
1523 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_disable);
1524 #else
1525 void tracing_snapshot(void)
1526 {
1527 	WARN_ONCE(1, "Snapshot feature not enabled, but internal snapshot used");
1528 }
1529 EXPORT_SYMBOL_GPL(tracing_snapshot);
1530 void tracing_snapshot_cond(struct trace_array *tr, void *cond_data)
1531 {
1532 	WARN_ONCE(1, "Snapshot feature not enabled, but internal conditional snapshot used");
1533 }
1534 EXPORT_SYMBOL_GPL(tracing_snapshot_cond);
1535 int tracing_alloc_snapshot(void)
1536 {
1537 	WARN_ONCE(1, "Snapshot feature not enabled, but snapshot allocation used");
1538 	return -ENODEV;
1539 }
1540 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
1541 void tracing_snapshot_alloc(void)
1542 {
1543 	/* Give warning */
1544 	tracing_snapshot();
1545 }
1546 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
1547 void *tracing_cond_snapshot_data(struct trace_array *tr)
1548 {
1549 	return NULL;
1550 }
1551 EXPORT_SYMBOL_GPL(tracing_cond_snapshot_data);
1552 int tracing_snapshot_cond_enable(struct trace_array *tr, void *cond_data, cond_update_fn_t update)
1553 {
1554 	return -ENODEV;
1555 }
1556 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_enable);
1557 int tracing_snapshot_cond_disable(struct trace_array *tr)
1558 {
1559 	return false;
1560 }
1561 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_disable);
1562 #define free_snapshot(tr)	do { } while (0)
1563 #define tracing_arm_snapshot_locked(tr) ({ -EBUSY; })
1564 #endif /* CONFIG_TRACER_SNAPSHOT */
1565 
1566 void tracer_tracing_off(struct trace_array *tr)
1567 {
1568 	if (tr->array_buffer.buffer)
1569 		ring_buffer_record_off(tr->array_buffer.buffer);
1570 	/*
1571 	 * This flag is looked at when buffers haven't been allocated
1572 	 * yet, or by some tracers (like irqsoff), that just want to
1573 	 * know if the ring buffer has been disabled, but it can handle
1574 	 * races of where it gets disabled but we still do a record.
1575 	 * As the check is in the fast path of the tracers, it is more
1576 	 * important to be fast than accurate.
1577 	 */
1578 	tr->buffer_disabled = 1;
1579 	/* Make the flag seen by readers */
1580 	smp_wmb();
1581 }
1582 
1583 /**
1584  * tracing_off - turn off tracing buffers
1585  *
1586  * This function stops the tracing buffers from recording data.
1587  * It does not disable any overhead the tracers themselves may
1588  * be causing. This function simply causes all recording to
1589  * the ring buffers to fail.
1590  */
1591 void tracing_off(void)
1592 {
1593 	tracer_tracing_off(&global_trace);
1594 }
1595 EXPORT_SYMBOL_GPL(tracing_off);
1596 
1597 void disable_trace_on_warning(void)
1598 {
1599 	if (__disable_trace_on_warning) {
1600 		trace_array_printk_buf(global_trace.array_buffer.buffer, _THIS_IP_,
1601 			"Disabling tracing due to warning\n");
1602 		tracing_off();
1603 	}
1604 }
1605 
1606 /**
1607  * tracer_tracing_is_on - show real state of ring buffer enabled
1608  * @tr : the trace array to know if ring buffer is enabled
1609  *
1610  * Shows real state of the ring buffer if it is enabled or not.
1611  */
1612 bool tracer_tracing_is_on(struct trace_array *tr)
1613 {
1614 	if (tr->array_buffer.buffer)
1615 		return ring_buffer_record_is_set_on(tr->array_buffer.buffer);
1616 	return !tr->buffer_disabled;
1617 }
1618 
1619 /**
1620  * tracing_is_on - show state of ring buffers enabled
1621  */
1622 int tracing_is_on(void)
1623 {
1624 	return tracer_tracing_is_on(&global_trace);
1625 }
1626 EXPORT_SYMBOL_GPL(tracing_is_on);
1627 
1628 static int __init set_buf_size(char *str)
1629 {
1630 	unsigned long buf_size;
1631 
1632 	if (!str)
1633 		return 0;
1634 	buf_size = memparse(str, &str);
1635 	/*
1636 	 * nr_entries can not be zero and the startup
1637 	 * tests require some buffer space. Therefore
1638 	 * ensure we have at least 4096 bytes of buffer.
1639 	 */
1640 	trace_buf_size = max(4096UL, buf_size);
1641 	return 1;
1642 }
1643 __setup("trace_buf_size=", set_buf_size);
1644 
1645 static int __init set_tracing_thresh(char *str)
1646 {
1647 	unsigned long threshold;
1648 	int ret;
1649 
1650 	if (!str)
1651 		return 0;
1652 	ret = kstrtoul(str, 0, &threshold);
1653 	if (ret < 0)
1654 		return 0;
1655 	tracing_thresh = threshold * 1000;
1656 	return 1;
1657 }
1658 __setup("tracing_thresh=", set_tracing_thresh);
1659 
1660 unsigned long nsecs_to_usecs(unsigned long nsecs)
1661 {
1662 	return nsecs / 1000;
1663 }
1664 
1665 /*
1666  * TRACE_FLAGS is defined as a tuple matching bit masks with strings.
1667  * It uses C(a, b) where 'a' is the eval (enum) name and 'b' is the string that
1668  * matches it. By defining "C(a, b) b", TRACE_FLAGS becomes a list
1669  * of strings in the order that the evals (enum) were defined.
1670  */
1671 #undef C
1672 #define C(a, b) b
1673 
1674 /* These must match the bit positions in trace_iterator_flags */
1675 static const char *trace_options[] = {
1676 	TRACE_FLAGS
1677 	NULL
1678 };
1679 
1680 static struct {
1681 	u64 (*func)(void);
1682 	const char *name;
1683 	int in_ns;		/* is this clock in nanoseconds? */
1684 } trace_clocks[] = {
1685 	{ trace_clock_local,		"local",	1 },
1686 	{ trace_clock_global,		"global",	1 },
1687 	{ trace_clock_counter,		"counter",	0 },
1688 	{ trace_clock_jiffies,		"uptime",	0 },
1689 	{ trace_clock,			"perf",		1 },
1690 	{ ktime_get_mono_fast_ns,	"mono",		1 },
1691 	{ ktime_get_raw_fast_ns,	"mono_raw",	1 },
1692 	{ ktime_get_boot_fast_ns,	"boot",		1 },
1693 	{ ktime_get_tai_fast_ns,	"tai",		1 },
1694 	ARCH_TRACE_CLOCKS
1695 };
1696 
1697 bool trace_clock_in_ns(struct trace_array *tr)
1698 {
1699 	if (trace_clocks[tr->clock_id].in_ns)
1700 		return true;
1701 
1702 	return false;
1703 }
1704 
1705 /*
1706  * trace_parser_get_init - gets the buffer for trace parser
1707  */
1708 int trace_parser_get_init(struct trace_parser *parser, int size)
1709 {
1710 	memset(parser, 0, sizeof(*parser));
1711 
1712 	parser->buffer = kmalloc(size, GFP_KERNEL);
1713 	if (!parser->buffer)
1714 		return 1;
1715 
1716 	parser->size = size;
1717 	return 0;
1718 }
1719 
1720 /*
1721  * trace_parser_put - frees the buffer for trace parser
1722  */
1723 void trace_parser_put(struct trace_parser *parser)
1724 {
1725 	kfree(parser->buffer);
1726 	parser->buffer = NULL;
1727 }
1728 
1729 /*
1730  * trace_get_user - reads the user input string separated by  space
1731  * (matched by isspace(ch))
1732  *
1733  * For each string found the 'struct trace_parser' is updated,
1734  * and the function returns.
1735  *
1736  * Returns number of bytes read.
1737  *
1738  * See kernel/trace/trace.h for 'struct trace_parser' details.
1739  */
1740 int trace_get_user(struct trace_parser *parser, const char __user *ubuf,
1741 	size_t cnt, loff_t *ppos)
1742 {
1743 	char ch;
1744 	size_t read = 0;
1745 	ssize_t ret;
1746 
1747 	if (!*ppos)
1748 		trace_parser_clear(parser);
1749 
1750 	ret = get_user(ch, ubuf++);
1751 	if (ret)
1752 		goto out;
1753 
1754 	read++;
1755 	cnt--;
1756 
1757 	/*
1758 	 * The parser is not finished with the last write,
1759 	 * continue reading the user input without skipping spaces.
1760 	 */
1761 	if (!parser->cont) {
1762 		/* skip white space */
1763 		while (cnt && isspace(ch)) {
1764 			ret = get_user(ch, ubuf++);
1765 			if (ret)
1766 				goto out;
1767 			read++;
1768 			cnt--;
1769 		}
1770 
1771 		parser->idx = 0;
1772 
1773 		/* only spaces were written */
1774 		if (isspace(ch) || !ch) {
1775 			*ppos += read;
1776 			ret = read;
1777 			goto out;
1778 		}
1779 	}
1780 
1781 	/* read the non-space input */
1782 	while (cnt && !isspace(ch) && ch) {
1783 		if (parser->idx < parser->size - 1)
1784 			parser->buffer[parser->idx++] = ch;
1785 		else {
1786 			ret = -EINVAL;
1787 			goto out;
1788 		}
1789 		ret = get_user(ch, ubuf++);
1790 		if (ret)
1791 			goto out;
1792 		read++;
1793 		cnt--;
1794 	}
1795 
1796 	/* We either got finished input or we have to wait for another call. */
1797 	if (isspace(ch) || !ch) {
1798 		parser->buffer[parser->idx] = 0;
1799 		parser->cont = false;
1800 	} else if (parser->idx < parser->size - 1) {
1801 		parser->cont = true;
1802 		parser->buffer[parser->idx++] = ch;
1803 		/* Make sure the parsed string always terminates with '\0'. */
1804 		parser->buffer[parser->idx] = 0;
1805 	} else {
1806 		ret = -EINVAL;
1807 		goto out;
1808 	}
1809 
1810 	*ppos += read;
1811 	ret = read;
1812 
1813 out:
1814 	return ret;
1815 }
1816 
1817 /* TODO add a seq_buf_to_buffer() */
1818 static ssize_t trace_seq_to_buffer(struct trace_seq *s, void *buf, size_t cnt)
1819 {
1820 	int len;
1821 
1822 	if (trace_seq_used(s) <= s->readpos)
1823 		return -EBUSY;
1824 
1825 	len = trace_seq_used(s) - s->readpos;
1826 	if (cnt > len)
1827 		cnt = len;
1828 	memcpy(buf, s->buffer + s->readpos, cnt);
1829 
1830 	s->readpos += cnt;
1831 	return cnt;
1832 }
1833 
1834 unsigned long __read_mostly	tracing_thresh;
1835 
1836 #ifdef CONFIG_TRACER_MAX_TRACE
1837 static const struct file_operations tracing_max_lat_fops;
1838 
1839 #ifdef LATENCY_FS_NOTIFY
1840 
1841 static struct workqueue_struct *fsnotify_wq;
1842 
1843 static void latency_fsnotify_workfn(struct work_struct *work)
1844 {
1845 	struct trace_array *tr = container_of(work, struct trace_array,
1846 					      fsnotify_work);
1847 	fsnotify_inode(tr->d_max_latency->d_inode, FS_MODIFY);
1848 }
1849 
1850 static void latency_fsnotify_workfn_irq(struct irq_work *iwork)
1851 {
1852 	struct trace_array *tr = container_of(iwork, struct trace_array,
1853 					      fsnotify_irqwork);
1854 	queue_work(fsnotify_wq, &tr->fsnotify_work);
1855 }
1856 
1857 static void trace_create_maxlat_file(struct trace_array *tr,
1858 				     struct dentry *d_tracer)
1859 {
1860 	INIT_WORK(&tr->fsnotify_work, latency_fsnotify_workfn);
1861 	init_irq_work(&tr->fsnotify_irqwork, latency_fsnotify_workfn_irq);
1862 	tr->d_max_latency = trace_create_file("tracing_max_latency",
1863 					      TRACE_MODE_WRITE,
1864 					      d_tracer, tr,
1865 					      &tracing_max_lat_fops);
1866 }
1867 
1868 __init static int latency_fsnotify_init(void)
1869 {
1870 	fsnotify_wq = alloc_workqueue("tr_max_lat_wq",
1871 				      WQ_UNBOUND | WQ_HIGHPRI, 0);
1872 	if (!fsnotify_wq) {
1873 		pr_err("Unable to allocate tr_max_lat_wq\n");
1874 		return -ENOMEM;
1875 	}
1876 	return 0;
1877 }
1878 
1879 late_initcall_sync(latency_fsnotify_init);
1880 
1881 void latency_fsnotify(struct trace_array *tr)
1882 {
1883 	if (!fsnotify_wq)
1884 		return;
1885 	/*
1886 	 * We cannot call queue_work(&tr->fsnotify_work) from here because it's
1887 	 * possible that we are called from __schedule() or do_idle(), which
1888 	 * could cause a deadlock.
1889 	 */
1890 	irq_work_queue(&tr->fsnotify_irqwork);
1891 }
1892 
1893 #else /* !LATENCY_FS_NOTIFY */
1894 
1895 #define trace_create_maxlat_file(tr, d_tracer)				\
1896 	trace_create_file("tracing_max_latency", TRACE_MODE_WRITE,	\
1897 			  d_tracer, tr, &tracing_max_lat_fops)
1898 
1899 #endif
1900 
1901 /*
1902  * Copy the new maximum trace into the separate maximum-trace
1903  * structure. (this way the maximum trace is permanently saved,
1904  * for later retrieval via /sys/kernel/tracing/tracing_max_latency)
1905  */
1906 static void
1907 __update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
1908 {
1909 	struct array_buffer *trace_buf = &tr->array_buffer;
1910 	struct array_buffer *max_buf = &tr->max_buffer;
1911 	struct trace_array_cpu *data = per_cpu_ptr(trace_buf->data, cpu);
1912 	struct trace_array_cpu *max_data = per_cpu_ptr(max_buf->data, cpu);
1913 
1914 	max_buf->cpu = cpu;
1915 	max_buf->time_start = data->preempt_timestamp;
1916 
1917 	max_data->saved_latency = tr->max_latency;
1918 	max_data->critical_start = data->critical_start;
1919 	max_data->critical_end = data->critical_end;
1920 
1921 	strscpy(max_data->comm, tsk->comm);
1922 	max_data->pid = tsk->pid;
1923 	/*
1924 	 * If tsk == current, then use current_uid(), as that does not use
1925 	 * RCU. The irq tracer can be called out of RCU scope.
1926 	 */
1927 	if (tsk == current)
1928 		max_data->uid = current_uid();
1929 	else
1930 		max_data->uid = task_uid(tsk);
1931 
1932 	max_data->nice = tsk->static_prio - 20 - MAX_RT_PRIO;
1933 	max_data->policy = tsk->policy;
1934 	max_data->rt_priority = tsk->rt_priority;
1935 
1936 	/* record this tasks comm */
1937 	tracing_record_cmdline(tsk);
1938 	latency_fsnotify(tr);
1939 }
1940 
1941 /**
1942  * update_max_tr - snapshot all trace buffers from global_trace to max_tr
1943  * @tr: tracer
1944  * @tsk: the task with the latency
1945  * @cpu: The cpu that initiated the trace.
1946  * @cond_data: User data associated with a conditional snapshot
1947  *
1948  * Flip the buffers between the @tr and the max_tr and record information
1949  * about which task was the cause of this latency.
1950  */
1951 void
1952 update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu,
1953 	      void *cond_data)
1954 {
1955 	if (tr->stop_count)
1956 		return;
1957 
1958 	WARN_ON_ONCE(!irqs_disabled());
1959 
1960 	if (!tr->allocated_snapshot) {
1961 		/* Only the nop tracer should hit this when disabling */
1962 		WARN_ON_ONCE(tr->current_trace != &nop_trace);
1963 		return;
1964 	}
1965 
1966 	arch_spin_lock(&tr->max_lock);
1967 
1968 	/* Inherit the recordable setting from array_buffer */
1969 	if (ring_buffer_record_is_set_on(tr->array_buffer.buffer))
1970 		ring_buffer_record_on(tr->max_buffer.buffer);
1971 	else
1972 		ring_buffer_record_off(tr->max_buffer.buffer);
1973 
1974 #ifdef CONFIG_TRACER_SNAPSHOT
1975 	if (tr->cond_snapshot && !tr->cond_snapshot->update(tr, cond_data)) {
1976 		arch_spin_unlock(&tr->max_lock);
1977 		return;
1978 	}
1979 #endif
1980 	swap(tr->array_buffer.buffer, tr->max_buffer.buffer);
1981 
1982 	__update_max_tr(tr, tsk, cpu);
1983 
1984 	arch_spin_unlock(&tr->max_lock);
1985 
1986 	/* Any waiters on the old snapshot buffer need to wake up */
1987 	ring_buffer_wake_waiters(tr->array_buffer.buffer, RING_BUFFER_ALL_CPUS);
1988 }
1989 
1990 /**
1991  * update_max_tr_single - only copy one trace over, and reset the rest
1992  * @tr: tracer
1993  * @tsk: task with the latency
1994  * @cpu: the cpu of the buffer to copy.
1995  *
1996  * Flip the trace of a single CPU buffer between the @tr and the max_tr.
1997  */
1998 void
1999 update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu)
2000 {
2001 	int ret;
2002 
2003 	if (tr->stop_count)
2004 		return;
2005 
2006 	WARN_ON_ONCE(!irqs_disabled());
2007 	if (!tr->allocated_snapshot) {
2008 		/* Only the nop tracer should hit this when disabling */
2009 		WARN_ON_ONCE(tr->current_trace != &nop_trace);
2010 		return;
2011 	}
2012 
2013 	arch_spin_lock(&tr->max_lock);
2014 
2015 	ret = ring_buffer_swap_cpu(tr->max_buffer.buffer, tr->array_buffer.buffer, cpu);
2016 
2017 	if (ret == -EBUSY) {
2018 		/*
2019 		 * We failed to swap the buffer due to a commit taking
2020 		 * place on this CPU. We fail to record, but we reset
2021 		 * the max trace buffer (no one writes directly to it)
2022 		 * and flag that it failed.
2023 		 * Another reason is resize is in progress.
2024 		 */
2025 		trace_array_printk_buf(tr->max_buffer.buffer, _THIS_IP_,
2026 			"Failed to swap buffers due to commit or resize in progress\n");
2027 	}
2028 
2029 	WARN_ON_ONCE(ret && ret != -EAGAIN && ret != -EBUSY);
2030 
2031 	__update_max_tr(tr, tsk, cpu);
2032 	arch_spin_unlock(&tr->max_lock);
2033 }
2034 
2035 #endif /* CONFIG_TRACER_MAX_TRACE */
2036 
2037 struct pipe_wait {
2038 	struct trace_iterator		*iter;
2039 	int				wait_index;
2040 };
2041 
2042 static bool wait_pipe_cond(void *data)
2043 {
2044 	struct pipe_wait *pwait = data;
2045 	struct trace_iterator *iter = pwait->iter;
2046 
2047 	if (atomic_read_acquire(&iter->wait_index) != pwait->wait_index)
2048 		return true;
2049 
2050 	return iter->closed;
2051 }
2052 
2053 static int wait_on_pipe(struct trace_iterator *iter, int full)
2054 {
2055 	struct pipe_wait pwait;
2056 	int ret;
2057 
2058 	/* Iterators are static, they should be filled or empty */
2059 	if (trace_buffer_iter(iter, iter->cpu_file))
2060 		return 0;
2061 
2062 	pwait.wait_index = atomic_read_acquire(&iter->wait_index);
2063 	pwait.iter = iter;
2064 
2065 	ret = ring_buffer_wait(iter->array_buffer->buffer, iter->cpu_file, full,
2066 			       wait_pipe_cond, &pwait);
2067 
2068 #ifdef CONFIG_TRACER_MAX_TRACE
2069 	/*
2070 	 * Make sure this is still the snapshot buffer, as if a snapshot were
2071 	 * to happen, this would now be the main buffer.
2072 	 */
2073 	if (iter->snapshot)
2074 		iter->array_buffer = &iter->tr->max_buffer;
2075 #endif
2076 	return ret;
2077 }
2078 
2079 #ifdef CONFIG_FTRACE_STARTUP_TEST
2080 static bool selftests_can_run;
2081 
2082 struct trace_selftests {
2083 	struct list_head		list;
2084 	struct tracer			*type;
2085 };
2086 
2087 static LIST_HEAD(postponed_selftests);
2088 
2089 static int save_selftest(struct tracer *type)
2090 {
2091 	struct trace_selftests *selftest;
2092 
2093 	selftest = kmalloc(sizeof(*selftest), GFP_KERNEL);
2094 	if (!selftest)
2095 		return -ENOMEM;
2096 
2097 	selftest->type = type;
2098 	list_add(&selftest->list, &postponed_selftests);
2099 	return 0;
2100 }
2101 
2102 static int run_tracer_selftest(struct tracer *type)
2103 {
2104 	struct trace_array *tr = &global_trace;
2105 	struct tracer *saved_tracer = tr->current_trace;
2106 	int ret;
2107 
2108 	if (!type->selftest || tracing_selftest_disabled)
2109 		return 0;
2110 
2111 	/*
2112 	 * If a tracer registers early in boot up (before scheduling is
2113 	 * initialized and such), then do not run its selftests yet.
2114 	 * Instead, run it a little later in the boot process.
2115 	 */
2116 	if (!selftests_can_run)
2117 		return save_selftest(type);
2118 
2119 	if (!tracing_is_on()) {
2120 		pr_warn("Selftest for tracer %s skipped due to tracing disabled\n",
2121 			type->name);
2122 		return 0;
2123 	}
2124 
2125 	/*
2126 	 * Run a selftest on this tracer.
2127 	 * Here we reset the trace buffer, and set the current
2128 	 * tracer to be this tracer. The tracer can then run some
2129 	 * internal tracing to verify that everything is in order.
2130 	 * If we fail, we do not register this tracer.
2131 	 */
2132 	tracing_reset_online_cpus(&tr->array_buffer);
2133 
2134 	tr->current_trace = type;
2135 
2136 #ifdef CONFIG_TRACER_MAX_TRACE
2137 	if (type->use_max_tr) {
2138 		/* If we expanded the buffers, make sure the max is expanded too */
2139 		if (tr->ring_buffer_expanded)
2140 			ring_buffer_resize(tr->max_buffer.buffer, trace_buf_size,
2141 					   RING_BUFFER_ALL_CPUS);
2142 		tr->allocated_snapshot = true;
2143 	}
2144 #endif
2145 
2146 	/* the test is responsible for initializing and enabling */
2147 	pr_info("Testing tracer %s: ", type->name);
2148 	ret = type->selftest(type, tr);
2149 	/* the test is responsible for resetting too */
2150 	tr->current_trace = saved_tracer;
2151 	if (ret) {
2152 		printk(KERN_CONT "FAILED!\n");
2153 		/* Add the warning after printing 'FAILED' */
2154 		WARN_ON(1);
2155 		return -1;
2156 	}
2157 	/* Only reset on passing, to avoid touching corrupted buffers */
2158 	tracing_reset_online_cpus(&tr->array_buffer);
2159 
2160 #ifdef CONFIG_TRACER_MAX_TRACE
2161 	if (type->use_max_tr) {
2162 		tr->allocated_snapshot = false;
2163 
2164 		/* Shrink the max buffer again */
2165 		if (tr->ring_buffer_expanded)
2166 			ring_buffer_resize(tr->max_buffer.buffer, 1,
2167 					   RING_BUFFER_ALL_CPUS);
2168 	}
2169 #endif
2170 
2171 	printk(KERN_CONT "PASSED\n");
2172 	return 0;
2173 }
2174 
2175 static int do_run_tracer_selftest(struct tracer *type)
2176 {
2177 	int ret;
2178 
2179 	/*
2180 	 * Tests can take a long time, especially if they are run one after the
2181 	 * other, as does happen during bootup when all the tracers are
2182 	 * registered. This could cause the soft lockup watchdog to trigger.
2183 	 */
2184 	cond_resched();
2185 
2186 	tracing_selftest_running = true;
2187 	ret = run_tracer_selftest(type);
2188 	tracing_selftest_running = false;
2189 
2190 	return ret;
2191 }
2192 
2193 static __init int init_trace_selftests(void)
2194 {
2195 	struct trace_selftests *p, *n;
2196 	struct tracer *t, **last;
2197 	int ret;
2198 
2199 	selftests_can_run = true;
2200 
2201 	guard(mutex)(&trace_types_lock);
2202 
2203 	if (list_empty(&postponed_selftests))
2204 		return 0;
2205 
2206 	pr_info("Running postponed tracer tests:\n");
2207 
2208 	tracing_selftest_running = true;
2209 	list_for_each_entry_safe(p, n, &postponed_selftests, list) {
2210 		/* This loop can take minutes when sanitizers are enabled, so
2211 		 * lets make sure we allow RCU processing.
2212 		 */
2213 		cond_resched();
2214 		ret = run_tracer_selftest(p->type);
2215 		/* If the test fails, then warn and remove from available_tracers */
2216 		if (ret < 0) {
2217 			WARN(1, "tracer: %s failed selftest, disabling\n",
2218 			     p->type->name);
2219 			last = &trace_types;
2220 			for (t = trace_types; t; t = t->next) {
2221 				if (t == p->type) {
2222 					*last = t->next;
2223 					break;
2224 				}
2225 				last = &t->next;
2226 			}
2227 		}
2228 		list_del(&p->list);
2229 		kfree(p);
2230 	}
2231 	tracing_selftest_running = false;
2232 
2233 	return 0;
2234 }
2235 core_initcall(init_trace_selftests);
2236 #else
2237 static inline int do_run_tracer_selftest(struct tracer *type)
2238 {
2239 	return 0;
2240 }
2241 #endif /* CONFIG_FTRACE_STARTUP_TEST */
2242 
2243 static void add_tracer_options(struct trace_array *tr, struct tracer *t);
2244 
2245 static void __init apply_trace_boot_options(void);
2246 
2247 /**
2248  * register_tracer - register a tracer with the ftrace system.
2249  * @type: the plugin for the tracer
2250  *
2251  * Register a new plugin tracer.
2252  */
2253 int __init register_tracer(struct tracer *type)
2254 {
2255 	struct tracer *t;
2256 	int ret = 0;
2257 
2258 	if (!type->name) {
2259 		pr_info("Tracer must have a name\n");
2260 		return -1;
2261 	}
2262 
2263 	if (strlen(type->name) >= MAX_TRACER_SIZE) {
2264 		pr_info("Tracer has a name longer than %d\n", MAX_TRACER_SIZE);
2265 		return -1;
2266 	}
2267 
2268 	if (security_locked_down(LOCKDOWN_TRACEFS)) {
2269 		pr_warn("Can not register tracer %s due to lockdown\n",
2270 			   type->name);
2271 		return -EPERM;
2272 	}
2273 
2274 	mutex_lock(&trace_types_lock);
2275 
2276 	for (t = trace_types; t; t = t->next) {
2277 		if (strcmp(type->name, t->name) == 0) {
2278 			/* already found */
2279 			pr_info("Tracer %s already registered\n",
2280 				type->name);
2281 			ret = -1;
2282 			goto out;
2283 		}
2284 	}
2285 
2286 	if (!type->set_flag)
2287 		type->set_flag = &dummy_set_flag;
2288 	if (!type->flags) {
2289 		/*allocate a dummy tracer_flags*/
2290 		type->flags = kmalloc(sizeof(*type->flags), GFP_KERNEL);
2291 		if (!type->flags) {
2292 			ret = -ENOMEM;
2293 			goto out;
2294 		}
2295 		type->flags->val = 0;
2296 		type->flags->opts = dummy_tracer_opt;
2297 	} else
2298 		if (!type->flags->opts)
2299 			type->flags->opts = dummy_tracer_opt;
2300 
2301 	/* store the tracer for __set_tracer_option */
2302 	type->flags->trace = type;
2303 
2304 	ret = do_run_tracer_selftest(type);
2305 	if (ret < 0)
2306 		goto out;
2307 
2308 	type->next = trace_types;
2309 	trace_types = type;
2310 	add_tracer_options(&global_trace, type);
2311 
2312  out:
2313 	mutex_unlock(&trace_types_lock);
2314 
2315 	if (ret || !default_bootup_tracer)
2316 		goto out_unlock;
2317 
2318 	if (strncmp(default_bootup_tracer, type->name, MAX_TRACER_SIZE))
2319 		goto out_unlock;
2320 
2321 	printk(KERN_INFO "Starting tracer '%s'\n", type->name);
2322 	/* Do we want this tracer to start on bootup? */
2323 	tracing_set_tracer(&global_trace, type->name);
2324 	default_bootup_tracer = NULL;
2325 
2326 	apply_trace_boot_options();
2327 
2328 	/* disable other selftests, since this will break it. */
2329 	disable_tracing_selftest("running a tracer");
2330 
2331  out_unlock:
2332 	return ret;
2333 }
2334 
2335 static void tracing_reset_cpu(struct array_buffer *buf, int cpu)
2336 {
2337 	struct trace_buffer *buffer = buf->buffer;
2338 
2339 	if (!buffer)
2340 		return;
2341 
2342 	ring_buffer_record_disable(buffer);
2343 
2344 	/* Make sure all commits have finished */
2345 	synchronize_rcu();
2346 	ring_buffer_reset_cpu(buffer, cpu);
2347 
2348 	ring_buffer_record_enable(buffer);
2349 }
2350 
2351 void tracing_reset_online_cpus(struct array_buffer *buf)
2352 {
2353 	struct trace_buffer *buffer = buf->buffer;
2354 
2355 	if (!buffer)
2356 		return;
2357 
2358 	ring_buffer_record_disable(buffer);
2359 
2360 	/* Make sure all commits have finished */
2361 	synchronize_rcu();
2362 
2363 	buf->time_start = buffer_ftrace_now(buf, buf->cpu);
2364 
2365 	ring_buffer_reset_online_cpus(buffer);
2366 
2367 	ring_buffer_record_enable(buffer);
2368 }
2369 
2370 static void tracing_reset_all_cpus(struct array_buffer *buf)
2371 {
2372 	struct trace_buffer *buffer = buf->buffer;
2373 
2374 	if (!buffer)
2375 		return;
2376 
2377 	ring_buffer_record_disable(buffer);
2378 
2379 	/* Make sure all commits have finished */
2380 	synchronize_rcu();
2381 
2382 	buf->time_start = buffer_ftrace_now(buf, buf->cpu);
2383 
2384 	ring_buffer_reset(buffer);
2385 
2386 	ring_buffer_record_enable(buffer);
2387 }
2388 
2389 /* Must have trace_types_lock held */
2390 void tracing_reset_all_online_cpus_unlocked(void)
2391 {
2392 	struct trace_array *tr;
2393 
2394 	lockdep_assert_held(&trace_types_lock);
2395 
2396 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
2397 		if (!tr->clear_trace)
2398 			continue;
2399 		tr->clear_trace = false;
2400 		tracing_reset_online_cpus(&tr->array_buffer);
2401 #ifdef CONFIG_TRACER_MAX_TRACE
2402 		tracing_reset_online_cpus(&tr->max_buffer);
2403 #endif
2404 	}
2405 }
2406 
2407 void tracing_reset_all_online_cpus(void)
2408 {
2409 	mutex_lock(&trace_types_lock);
2410 	tracing_reset_all_online_cpus_unlocked();
2411 	mutex_unlock(&trace_types_lock);
2412 }
2413 
2414 int is_tracing_stopped(void)
2415 {
2416 	return global_trace.stop_count;
2417 }
2418 
2419 static void tracing_start_tr(struct trace_array *tr)
2420 {
2421 	struct trace_buffer *buffer;
2422 	unsigned long flags;
2423 
2424 	if (tracing_disabled)
2425 		return;
2426 
2427 	raw_spin_lock_irqsave(&tr->start_lock, flags);
2428 	if (--tr->stop_count) {
2429 		if (WARN_ON_ONCE(tr->stop_count < 0)) {
2430 			/* Someone screwed up their debugging */
2431 			tr->stop_count = 0;
2432 		}
2433 		goto out;
2434 	}
2435 
2436 	/* Prevent the buffers from switching */
2437 	arch_spin_lock(&tr->max_lock);
2438 
2439 	buffer = tr->array_buffer.buffer;
2440 	if (buffer)
2441 		ring_buffer_record_enable(buffer);
2442 
2443 #ifdef CONFIG_TRACER_MAX_TRACE
2444 	buffer = tr->max_buffer.buffer;
2445 	if (buffer)
2446 		ring_buffer_record_enable(buffer);
2447 #endif
2448 
2449 	arch_spin_unlock(&tr->max_lock);
2450 
2451  out:
2452 	raw_spin_unlock_irqrestore(&tr->start_lock, flags);
2453 }
2454 
2455 /**
2456  * tracing_start - quick start of the tracer
2457  *
2458  * If tracing is enabled but was stopped by tracing_stop,
2459  * this will start the tracer back up.
2460  */
2461 void tracing_start(void)
2462 
2463 {
2464 	return tracing_start_tr(&global_trace);
2465 }
2466 
2467 static void tracing_stop_tr(struct trace_array *tr)
2468 {
2469 	struct trace_buffer *buffer;
2470 	unsigned long flags;
2471 
2472 	raw_spin_lock_irqsave(&tr->start_lock, flags);
2473 	if (tr->stop_count++)
2474 		goto out;
2475 
2476 	/* Prevent the buffers from switching */
2477 	arch_spin_lock(&tr->max_lock);
2478 
2479 	buffer = tr->array_buffer.buffer;
2480 	if (buffer)
2481 		ring_buffer_record_disable(buffer);
2482 
2483 #ifdef CONFIG_TRACER_MAX_TRACE
2484 	buffer = tr->max_buffer.buffer;
2485 	if (buffer)
2486 		ring_buffer_record_disable(buffer);
2487 #endif
2488 
2489 	arch_spin_unlock(&tr->max_lock);
2490 
2491  out:
2492 	raw_spin_unlock_irqrestore(&tr->start_lock, flags);
2493 }
2494 
2495 /**
2496  * tracing_stop - quick stop of the tracer
2497  *
2498  * Light weight way to stop tracing. Use in conjunction with
2499  * tracing_start.
2500  */
2501 void tracing_stop(void)
2502 {
2503 	return tracing_stop_tr(&global_trace);
2504 }
2505 
2506 /*
2507  * Several functions return TRACE_TYPE_PARTIAL_LINE if the trace_seq
2508  * overflowed, and TRACE_TYPE_HANDLED otherwise. This helper function
2509  * simplifies those functions and keeps them in sync.
2510  */
2511 enum print_line_t trace_handle_return(struct trace_seq *s)
2512 {
2513 	return trace_seq_has_overflowed(s) ?
2514 		TRACE_TYPE_PARTIAL_LINE : TRACE_TYPE_HANDLED;
2515 }
2516 EXPORT_SYMBOL_GPL(trace_handle_return);
2517 
2518 static unsigned short migration_disable_value(void)
2519 {
2520 #if defined(CONFIG_SMP)
2521 	return current->migration_disabled;
2522 #else
2523 	return 0;
2524 #endif
2525 }
2526 
2527 unsigned int tracing_gen_ctx_irq_test(unsigned int irqs_status)
2528 {
2529 	unsigned int trace_flags = irqs_status;
2530 	unsigned int pc;
2531 
2532 	pc = preempt_count();
2533 
2534 	if (pc & NMI_MASK)
2535 		trace_flags |= TRACE_FLAG_NMI;
2536 	if (pc & HARDIRQ_MASK)
2537 		trace_flags |= TRACE_FLAG_HARDIRQ;
2538 	if (in_serving_softirq())
2539 		trace_flags |= TRACE_FLAG_SOFTIRQ;
2540 	if (softirq_count() >> (SOFTIRQ_SHIFT + 1))
2541 		trace_flags |= TRACE_FLAG_BH_OFF;
2542 
2543 	if (tif_need_resched())
2544 		trace_flags |= TRACE_FLAG_NEED_RESCHED;
2545 	if (test_preempt_need_resched())
2546 		trace_flags |= TRACE_FLAG_PREEMPT_RESCHED;
2547 	if (IS_ENABLED(CONFIG_ARCH_HAS_PREEMPT_LAZY) && tif_test_bit(TIF_NEED_RESCHED_LAZY))
2548 		trace_flags |= TRACE_FLAG_NEED_RESCHED_LAZY;
2549 	return (trace_flags << 16) | (min_t(unsigned int, pc & 0xff, 0xf)) |
2550 		(min_t(unsigned int, migration_disable_value(), 0xf)) << 4;
2551 }
2552 
2553 struct ring_buffer_event *
2554 trace_buffer_lock_reserve(struct trace_buffer *buffer,
2555 			  int type,
2556 			  unsigned long len,
2557 			  unsigned int trace_ctx)
2558 {
2559 	return __trace_buffer_lock_reserve(buffer, type, len, trace_ctx);
2560 }
2561 
2562 DEFINE_PER_CPU(struct ring_buffer_event *, trace_buffered_event);
2563 DEFINE_PER_CPU(int, trace_buffered_event_cnt);
2564 static int trace_buffered_event_ref;
2565 
2566 /**
2567  * trace_buffered_event_enable - enable buffering events
2568  *
2569  * When events are being filtered, it is quicker to use a temporary
2570  * buffer to write the event data into if there's a likely chance
2571  * that it will not be committed. The discard of the ring buffer
2572  * is not as fast as committing, and is much slower than copying
2573  * a commit.
2574  *
2575  * When an event is to be filtered, allocate per cpu buffers to
2576  * write the event data into, and if the event is filtered and discarded
2577  * it is simply dropped, otherwise, the entire data is to be committed
2578  * in one shot.
2579  */
2580 void trace_buffered_event_enable(void)
2581 {
2582 	struct ring_buffer_event *event;
2583 	struct page *page;
2584 	int cpu;
2585 
2586 	WARN_ON_ONCE(!mutex_is_locked(&event_mutex));
2587 
2588 	if (trace_buffered_event_ref++)
2589 		return;
2590 
2591 	for_each_tracing_cpu(cpu) {
2592 		page = alloc_pages_node(cpu_to_node(cpu),
2593 					GFP_KERNEL | __GFP_NORETRY, 0);
2594 		/* This is just an optimization and can handle failures */
2595 		if (!page) {
2596 			pr_err("Failed to allocate event buffer\n");
2597 			break;
2598 		}
2599 
2600 		event = page_address(page);
2601 		memset(event, 0, sizeof(*event));
2602 
2603 		per_cpu(trace_buffered_event, cpu) = event;
2604 
2605 		preempt_disable();
2606 		if (cpu == smp_processor_id() &&
2607 		    __this_cpu_read(trace_buffered_event) !=
2608 		    per_cpu(trace_buffered_event, cpu))
2609 			WARN_ON_ONCE(1);
2610 		preempt_enable();
2611 	}
2612 }
2613 
2614 static void enable_trace_buffered_event(void *data)
2615 {
2616 	/* Probably not needed, but do it anyway */
2617 	smp_rmb();
2618 	this_cpu_dec(trace_buffered_event_cnt);
2619 }
2620 
2621 static void disable_trace_buffered_event(void *data)
2622 {
2623 	this_cpu_inc(trace_buffered_event_cnt);
2624 }
2625 
2626 /**
2627  * trace_buffered_event_disable - disable buffering events
2628  *
2629  * When a filter is removed, it is faster to not use the buffered
2630  * events, and to commit directly into the ring buffer. Free up
2631  * the temp buffers when there are no more users. This requires
2632  * special synchronization with current events.
2633  */
2634 void trace_buffered_event_disable(void)
2635 {
2636 	int cpu;
2637 
2638 	WARN_ON_ONCE(!mutex_is_locked(&event_mutex));
2639 
2640 	if (WARN_ON_ONCE(!trace_buffered_event_ref))
2641 		return;
2642 
2643 	if (--trace_buffered_event_ref)
2644 		return;
2645 
2646 	/* For each CPU, set the buffer as used. */
2647 	on_each_cpu_mask(tracing_buffer_mask, disable_trace_buffered_event,
2648 			 NULL, true);
2649 
2650 	/* Wait for all current users to finish */
2651 	synchronize_rcu();
2652 
2653 	for_each_tracing_cpu(cpu) {
2654 		free_page((unsigned long)per_cpu(trace_buffered_event, cpu));
2655 		per_cpu(trace_buffered_event, cpu) = NULL;
2656 	}
2657 
2658 	/*
2659 	 * Wait for all CPUs that potentially started checking if they can use
2660 	 * their event buffer only after the previous synchronize_rcu() call and
2661 	 * they still read a valid pointer from trace_buffered_event. It must be
2662 	 * ensured they don't see cleared trace_buffered_event_cnt else they
2663 	 * could wrongly decide to use the pointed-to buffer which is now freed.
2664 	 */
2665 	synchronize_rcu();
2666 
2667 	/* For each CPU, relinquish the buffer */
2668 	on_each_cpu_mask(tracing_buffer_mask, enable_trace_buffered_event, NULL,
2669 			 true);
2670 }
2671 
2672 static struct trace_buffer *temp_buffer;
2673 
2674 struct ring_buffer_event *
2675 trace_event_buffer_lock_reserve(struct trace_buffer **current_rb,
2676 			  struct trace_event_file *trace_file,
2677 			  int type, unsigned long len,
2678 			  unsigned int trace_ctx)
2679 {
2680 	struct ring_buffer_event *entry;
2681 	struct trace_array *tr = trace_file->tr;
2682 	int val;
2683 
2684 	*current_rb = tr->array_buffer.buffer;
2685 
2686 	if (!tr->no_filter_buffering_ref &&
2687 	    (trace_file->flags & (EVENT_FILE_FL_SOFT_DISABLED | EVENT_FILE_FL_FILTERED))) {
2688 		preempt_disable_notrace();
2689 		/*
2690 		 * Filtering is on, so try to use the per cpu buffer first.
2691 		 * This buffer will simulate a ring_buffer_event,
2692 		 * where the type_len is zero and the array[0] will
2693 		 * hold the full length.
2694 		 * (see include/linux/ring-buffer.h for details on
2695 		 *  how the ring_buffer_event is structured).
2696 		 *
2697 		 * Using a temp buffer during filtering and copying it
2698 		 * on a matched filter is quicker than writing directly
2699 		 * into the ring buffer and then discarding it when
2700 		 * it doesn't match. That is because the discard
2701 		 * requires several atomic operations to get right.
2702 		 * Copying on match and doing nothing on a failed match
2703 		 * is still quicker than no copy on match, but having
2704 		 * to discard out of the ring buffer on a failed match.
2705 		 */
2706 		if ((entry = __this_cpu_read(trace_buffered_event))) {
2707 			int max_len = PAGE_SIZE - struct_size(entry, array, 1);
2708 
2709 			val = this_cpu_inc_return(trace_buffered_event_cnt);
2710 
2711 			/*
2712 			 * Preemption is disabled, but interrupts and NMIs
2713 			 * can still come in now. If that happens after
2714 			 * the above increment, then it will have to go
2715 			 * back to the old method of allocating the event
2716 			 * on the ring buffer, and if the filter fails, it
2717 			 * will have to call ring_buffer_discard_commit()
2718 			 * to remove it.
2719 			 *
2720 			 * Need to also check the unlikely case that the
2721 			 * length is bigger than the temp buffer size.
2722 			 * If that happens, then the reserve is pretty much
2723 			 * guaranteed to fail, as the ring buffer currently
2724 			 * only allows events less than a page. But that may
2725 			 * change in the future, so let the ring buffer reserve
2726 			 * handle the failure in that case.
2727 			 */
2728 			if (val == 1 && likely(len <= max_len)) {
2729 				trace_event_setup(entry, type, trace_ctx);
2730 				entry->array[0] = len;
2731 				/* Return with preemption disabled */
2732 				return entry;
2733 			}
2734 			this_cpu_dec(trace_buffered_event_cnt);
2735 		}
2736 		/* __trace_buffer_lock_reserve() disables preemption */
2737 		preempt_enable_notrace();
2738 	}
2739 
2740 	entry = __trace_buffer_lock_reserve(*current_rb, type, len,
2741 					    trace_ctx);
2742 	/*
2743 	 * If tracing is off, but we have triggers enabled
2744 	 * we still need to look at the event data. Use the temp_buffer
2745 	 * to store the trace event for the trigger to use. It's recursive
2746 	 * safe and will not be recorded anywhere.
2747 	 */
2748 	if (!entry && trace_file->flags & EVENT_FILE_FL_TRIGGER_COND) {
2749 		*current_rb = temp_buffer;
2750 		entry = __trace_buffer_lock_reserve(*current_rb, type, len,
2751 						    trace_ctx);
2752 	}
2753 	return entry;
2754 }
2755 EXPORT_SYMBOL_GPL(trace_event_buffer_lock_reserve);
2756 
2757 static DEFINE_RAW_SPINLOCK(tracepoint_iter_lock);
2758 static DEFINE_MUTEX(tracepoint_printk_mutex);
2759 
2760 static void output_printk(struct trace_event_buffer *fbuffer)
2761 {
2762 	struct trace_event_call *event_call;
2763 	struct trace_event_file *file;
2764 	struct trace_event *event;
2765 	unsigned long flags;
2766 	struct trace_iterator *iter = tracepoint_print_iter;
2767 
2768 	/* We should never get here if iter is NULL */
2769 	if (WARN_ON_ONCE(!iter))
2770 		return;
2771 
2772 	event_call = fbuffer->trace_file->event_call;
2773 	if (!event_call || !event_call->event.funcs ||
2774 	    !event_call->event.funcs->trace)
2775 		return;
2776 
2777 	file = fbuffer->trace_file;
2778 	if (test_bit(EVENT_FILE_FL_SOFT_DISABLED_BIT, &file->flags) ||
2779 	    (unlikely(file->flags & EVENT_FILE_FL_FILTERED) &&
2780 	     !filter_match_preds(file->filter, fbuffer->entry)))
2781 		return;
2782 
2783 	event = &fbuffer->trace_file->event_call->event;
2784 
2785 	raw_spin_lock_irqsave(&tracepoint_iter_lock, flags);
2786 	trace_seq_init(&iter->seq);
2787 	iter->ent = fbuffer->entry;
2788 	event_call->event.funcs->trace(iter, 0, event);
2789 	trace_seq_putc(&iter->seq, 0);
2790 	printk("%s", iter->seq.buffer);
2791 
2792 	raw_spin_unlock_irqrestore(&tracepoint_iter_lock, flags);
2793 }
2794 
2795 int tracepoint_printk_sysctl(const struct ctl_table *table, int write,
2796 			     void *buffer, size_t *lenp,
2797 			     loff_t *ppos)
2798 {
2799 	int save_tracepoint_printk;
2800 	int ret;
2801 
2802 	guard(mutex)(&tracepoint_printk_mutex);
2803 	save_tracepoint_printk = tracepoint_printk;
2804 
2805 	ret = proc_dointvec(table, write, buffer, lenp, ppos);
2806 
2807 	/*
2808 	 * This will force exiting early, as tracepoint_printk
2809 	 * is always zero when tracepoint_printk_iter is not allocated
2810 	 */
2811 	if (!tracepoint_print_iter)
2812 		tracepoint_printk = 0;
2813 
2814 	if (save_tracepoint_printk == tracepoint_printk)
2815 		return ret;
2816 
2817 	if (tracepoint_printk)
2818 		static_key_enable(&tracepoint_printk_key.key);
2819 	else
2820 		static_key_disable(&tracepoint_printk_key.key);
2821 
2822 	return ret;
2823 }
2824 
2825 void trace_event_buffer_commit(struct trace_event_buffer *fbuffer)
2826 {
2827 	enum event_trigger_type tt = ETT_NONE;
2828 	struct trace_event_file *file = fbuffer->trace_file;
2829 
2830 	if (__event_trigger_test_discard(file, fbuffer->buffer, fbuffer->event,
2831 			fbuffer->entry, &tt))
2832 		goto discard;
2833 
2834 	if (static_key_false(&tracepoint_printk_key.key))
2835 		output_printk(fbuffer);
2836 
2837 	if (static_branch_unlikely(&trace_event_exports_enabled))
2838 		ftrace_exports(fbuffer->event, TRACE_EXPORT_EVENT);
2839 
2840 	trace_buffer_unlock_commit_regs(file->tr, fbuffer->buffer,
2841 			fbuffer->event, fbuffer->trace_ctx, fbuffer->regs);
2842 
2843 discard:
2844 	if (tt)
2845 		event_triggers_post_call(file, tt);
2846 
2847 }
2848 EXPORT_SYMBOL_GPL(trace_event_buffer_commit);
2849 
2850 /*
2851  * Skip 3:
2852  *
2853  *   trace_buffer_unlock_commit_regs()
2854  *   trace_event_buffer_commit()
2855  *   trace_event_raw_event_xxx()
2856  */
2857 # define STACK_SKIP 3
2858 
2859 void trace_buffer_unlock_commit_regs(struct trace_array *tr,
2860 				     struct trace_buffer *buffer,
2861 				     struct ring_buffer_event *event,
2862 				     unsigned int trace_ctx,
2863 				     struct pt_regs *regs)
2864 {
2865 	__buffer_unlock_commit(buffer, event);
2866 
2867 	/*
2868 	 * If regs is not set, then skip the necessary functions.
2869 	 * Note, we can still get here via blktrace, wakeup tracer
2870 	 * and mmiotrace, but that's ok if they lose a function or
2871 	 * two. They are not that meaningful.
2872 	 */
2873 	ftrace_trace_stack(tr, buffer, trace_ctx, regs ? 0 : STACK_SKIP, regs);
2874 	ftrace_trace_userstack(tr, buffer, trace_ctx);
2875 }
2876 
2877 /*
2878  * Similar to trace_buffer_unlock_commit_regs() but do not dump stack.
2879  */
2880 void
2881 trace_buffer_unlock_commit_nostack(struct trace_buffer *buffer,
2882 				   struct ring_buffer_event *event)
2883 {
2884 	__buffer_unlock_commit(buffer, event);
2885 }
2886 
2887 void
2888 trace_function(struct trace_array *tr, unsigned long ip, unsigned long
2889 	       parent_ip, unsigned int trace_ctx)
2890 {
2891 	struct trace_buffer *buffer = tr->array_buffer.buffer;
2892 	struct ring_buffer_event *event;
2893 	struct ftrace_entry *entry;
2894 
2895 	event = __trace_buffer_lock_reserve(buffer, TRACE_FN, sizeof(*entry),
2896 					    trace_ctx);
2897 	if (!event)
2898 		return;
2899 	entry	= ring_buffer_event_data(event);
2900 	entry->ip			= ip;
2901 	entry->parent_ip		= parent_ip;
2902 
2903 	if (static_branch_unlikely(&trace_function_exports_enabled))
2904 		ftrace_exports(event, TRACE_EXPORT_FUNCTION);
2905 	__buffer_unlock_commit(buffer, event);
2906 }
2907 
2908 #ifdef CONFIG_STACKTRACE
2909 
2910 /* Allow 4 levels of nesting: normal, softirq, irq, NMI */
2911 #define FTRACE_KSTACK_NESTING	4
2912 
2913 #define FTRACE_KSTACK_ENTRIES	(SZ_4K / FTRACE_KSTACK_NESTING)
2914 
2915 struct ftrace_stack {
2916 	unsigned long		calls[FTRACE_KSTACK_ENTRIES];
2917 };
2918 
2919 
2920 struct ftrace_stacks {
2921 	struct ftrace_stack	stacks[FTRACE_KSTACK_NESTING];
2922 };
2923 
2924 static DEFINE_PER_CPU(struct ftrace_stacks, ftrace_stacks);
2925 static DEFINE_PER_CPU(int, ftrace_stack_reserve);
2926 
2927 static void __ftrace_trace_stack(struct trace_array *tr,
2928 				 struct trace_buffer *buffer,
2929 				 unsigned int trace_ctx,
2930 				 int skip, struct pt_regs *regs)
2931 {
2932 	struct ring_buffer_event *event;
2933 	unsigned int size, nr_entries;
2934 	struct ftrace_stack *fstack;
2935 	struct stack_entry *entry;
2936 	int stackidx;
2937 
2938 	/*
2939 	 * Add one, for this function and the call to save_stack_trace()
2940 	 * If regs is set, then these functions will not be in the way.
2941 	 */
2942 #ifndef CONFIG_UNWINDER_ORC
2943 	if (!regs)
2944 		skip++;
2945 #endif
2946 
2947 	preempt_disable_notrace();
2948 
2949 	stackidx = __this_cpu_inc_return(ftrace_stack_reserve) - 1;
2950 
2951 	/* This should never happen. If it does, yell once and skip */
2952 	if (WARN_ON_ONCE(stackidx >= FTRACE_KSTACK_NESTING))
2953 		goto out;
2954 
2955 	/*
2956 	 * The above __this_cpu_inc_return() is 'atomic' cpu local. An
2957 	 * interrupt will either see the value pre increment or post
2958 	 * increment. If the interrupt happens pre increment it will have
2959 	 * restored the counter when it returns.  We just need a barrier to
2960 	 * keep gcc from moving things around.
2961 	 */
2962 	barrier();
2963 
2964 	fstack = this_cpu_ptr(ftrace_stacks.stacks) + stackidx;
2965 	size = ARRAY_SIZE(fstack->calls);
2966 
2967 	if (regs) {
2968 		nr_entries = stack_trace_save_regs(regs, fstack->calls,
2969 						   size, skip);
2970 	} else {
2971 		nr_entries = stack_trace_save(fstack->calls, size, skip);
2972 	}
2973 
2974 #ifdef CONFIG_DYNAMIC_FTRACE
2975 	/* Mark entry of stack trace as trampoline code */
2976 	if (tr->ops && tr->ops->trampoline) {
2977 		unsigned long tramp_start = tr->ops->trampoline;
2978 		unsigned long tramp_end = tramp_start + tr->ops->trampoline_size;
2979 		unsigned long *calls = fstack->calls;
2980 
2981 		for (int i = 0; i < nr_entries; i++) {
2982 			if (calls[i] >= tramp_start && calls[i] < tramp_end)
2983 				calls[i] = FTRACE_TRAMPOLINE_MARKER;
2984 		}
2985 	}
2986 #endif
2987 
2988 	event = __trace_buffer_lock_reserve(buffer, TRACE_STACK,
2989 				    struct_size(entry, caller, nr_entries),
2990 				    trace_ctx);
2991 	if (!event)
2992 		goto out;
2993 	entry = ring_buffer_event_data(event);
2994 
2995 	entry->size = nr_entries;
2996 	memcpy(&entry->caller, fstack->calls,
2997 	       flex_array_size(entry, caller, nr_entries));
2998 
2999 	__buffer_unlock_commit(buffer, event);
3000 
3001  out:
3002 	/* Again, don't let gcc optimize things here */
3003 	barrier();
3004 	__this_cpu_dec(ftrace_stack_reserve);
3005 	preempt_enable_notrace();
3006 
3007 }
3008 
3009 static inline void ftrace_trace_stack(struct trace_array *tr,
3010 				      struct trace_buffer *buffer,
3011 				      unsigned int trace_ctx,
3012 				      int skip, struct pt_regs *regs)
3013 {
3014 	if (!(tr->trace_flags & TRACE_ITER_STACKTRACE))
3015 		return;
3016 
3017 	__ftrace_trace_stack(tr, buffer, trace_ctx, skip, regs);
3018 }
3019 
3020 void __trace_stack(struct trace_array *tr, unsigned int trace_ctx,
3021 		   int skip)
3022 {
3023 	struct trace_buffer *buffer = tr->array_buffer.buffer;
3024 
3025 	if (rcu_is_watching()) {
3026 		__ftrace_trace_stack(tr, buffer, trace_ctx, skip, NULL);
3027 		return;
3028 	}
3029 
3030 	if (WARN_ON_ONCE(IS_ENABLED(CONFIG_GENERIC_ENTRY)))
3031 		return;
3032 
3033 	/*
3034 	 * When an NMI triggers, RCU is enabled via ct_nmi_enter(),
3035 	 * but if the above rcu_is_watching() failed, then the NMI
3036 	 * triggered someplace critical, and ct_irq_enter() should
3037 	 * not be called from NMI.
3038 	 */
3039 	if (unlikely(in_nmi()))
3040 		return;
3041 
3042 	ct_irq_enter_irqson();
3043 	__ftrace_trace_stack(tr, buffer, trace_ctx, skip, NULL);
3044 	ct_irq_exit_irqson();
3045 }
3046 
3047 /**
3048  * trace_dump_stack - record a stack back trace in the trace buffer
3049  * @skip: Number of functions to skip (helper handlers)
3050  */
3051 void trace_dump_stack(int skip)
3052 {
3053 	if (tracing_disabled || tracing_selftest_running)
3054 		return;
3055 
3056 #ifndef CONFIG_UNWINDER_ORC
3057 	/* Skip 1 to skip this function. */
3058 	skip++;
3059 #endif
3060 	__ftrace_trace_stack(printk_trace, printk_trace->array_buffer.buffer,
3061 				tracing_gen_ctx(), skip, NULL);
3062 }
3063 EXPORT_SYMBOL_GPL(trace_dump_stack);
3064 
3065 #ifdef CONFIG_USER_STACKTRACE_SUPPORT
3066 static DEFINE_PER_CPU(int, user_stack_count);
3067 
3068 static void
3069 ftrace_trace_userstack(struct trace_array *tr,
3070 		       struct trace_buffer *buffer, unsigned int trace_ctx)
3071 {
3072 	struct ring_buffer_event *event;
3073 	struct userstack_entry *entry;
3074 
3075 	if (!(tr->trace_flags & TRACE_ITER_USERSTACKTRACE))
3076 		return;
3077 
3078 	/*
3079 	 * NMIs can not handle page faults, even with fix ups.
3080 	 * The save user stack can (and often does) fault.
3081 	 */
3082 	if (unlikely(in_nmi()))
3083 		return;
3084 
3085 	/*
3086 	 * prevent recursion, since the user stack tracing may
3087 	 * trigger other kernel events.
3088 	 */
3089 	preempt_disable();
3090 	if (__this_cpu_read(user_stack_count))
3091 		goto out;
3092 
3093 	__this_cpu_inc(user_stack_count);
3094 
3095 	event = __trace_buffer_lock_reserve(buffer, TRACE_USER_STACK,
3096 					    sizeof(*entry), trace_ctx);
3097 	if (!event)
3098 		goto out_drop_count;
3099 	entry	= ring_buffer_event_data(event);
3100 
3101 	entry->tgid		= current->tgid;
3102 	memset(&entry->caller, 0, sizeof(entry->caller));
3103 
3104 	stack_trace_save_user(entry->caller, FTRACE_STACK_ENTRIES);
3105 	__buffer_unlock_commit(buffer, event);
3106 
3107  out_drop_count:
3108 	__this_cpu_dec(user_stack_count);
3109  out:
3110 	preempt_enable();
3111 }
3112 #else /* CONFIG_USER_STACKTRACE_SUPPORT */
3113 static void ftrace_trace_userstack(struct trace_array *tr,
3114 				   struct trace_buffer *buffer,
3115 				   unsigned int trace_ctx)
3116 {
3117 }
3118 #endif /* !CONFIG_USER_STACKTRACE_SUPPORT */
3119 
3120 #endif /* CONFIG_STACKTRACE */
3121 
3122 static inline void
3123 func_repeats_set_delta_ts(struct func_repeats_entry *entry,
3124 			  unsigned long long delta)
3125 {
3126 	entry->bottom_delta_ts = delta & U32_MAX;
3127 	entry->top_delta_ts = (delta >> 32);
3128 }
3129 
3130 void trace_last_func_repeats(struct trace_array *tr,
3131 			     struct trace_func_repeats *last_info,
3132 			     unsigned int trace_ctx)
3133 {
3134 	struct trace_buffer *buffer = tr->array_buffer.buffer;
3135 	struct func_repeats_entry *entry;
3136 	struct ring_buffer_event *event;
3137 	u64 delta;
3138 
3139 	event = __trace_buffer_lock_reserve(buffer, TRACE_FUNC_REPEATS,
3140 					    sizeof(*entry), trace_ctx);
3141 	if (!event)
3142 		return;
3143 
3144 	delta = ring_buffer_event_time_stamp(buffer, event) -
3145 		last_info->ts_last_call;
3146 
3147 	entry = ring_buffer_event_data(event);
3148 	entry->ip = last_info->ip;
3149 	entry->parent_ip = last_info->parent_ip;
3150 	entry->count = last_info->count;
3151 	func_repeats_set_delta_ts(entry, delta);
3152 
3153 	__buffer_unlock_commit(buffer, event);
3154 }
3155 
3156 /* created for use with alloc_percpu */
3157 struct trace_buffer_struct {
3158 	int nesting;
3159 	char buffer[4][TRACE_BUF_SIZE];
3160 };
3161 
3162 static struct trace_buffer_struct __percpu *trace_percpu_buffer;
3163 
3164 /*
3165  * This allows for lockless recording.  If we're nested too deeply, then
3166  * this returns NULL.
3167  */
3168 static char *get_trace_buf(void)
3169 {
3170 	struct trace_buffer_struct *buffer = this_cpu_ptr(trace_percpu_buffer);
3171 
3172 	if (!trace_percpu_buffer || buffer->nesting >= 4)
3173 		return NULL;
3174 
3175 	buffer->nesting++;
3176 
3177 	/* Interrupts must see nesting incremented before we use the buffer */
3178 	barrier();
3179 	return &buffer->buffer[buffer->nesting - 1][0];
3180 }
3181 
3182 static void put_trace_buf(void)
3183 {
3184 	/* Don't let the decrement of nesting leak before this */
3185 	barrier();
3186 	this_cpu_dec(trace_percpu_buffer->nesting);
3187 }
3188 
3189 static int alloc_percpu_trace_buffer(void)
3190 {
3191 	struct trace_buffer_struct __percpu *buffers;
3192 
3193 	if (trace_percpu_buffer)
3194 		return 0;
3195 
3196 	buffers = alloc_percpu(struct trace_buffer_struct);
3197 	if (MEM_FAIL(!buffers, "Could not allocate percpu trace_printk buffer"))
3198 		return -ENOMEM;
3199 
3200 	trace_percpu_buffer = buffers;
3201 	return 0;
3202 }
3203 
3204 static int buffers_allocated;
3205 
3206 void trace_printk_init_buffers(void)
3207 {
3208 	if (buffers_allocated)
3209 		return;
3210 
3211 	if (alloc_percpu_trace_buffer())
3212 		return;
3213 
3214 	/* trace_printk() is for debug use only. Don't use it in production. */
3215 
3216 	pr_warn("\n");
3217 	pr_warn("**********************************************************\n");
3218 	pr_warn("**   NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE   **\n");
3219 	pr_warn("**                                                      **\n");
3220 	pr_warn("** trace_printk() being used. Allocating extra memory.  **\n");
3221 	pr_warn("**                                                      **\n");
3222 	pr_warn("** This means that this is a DEBUG kernel and it is     **\n");
3223 	pr_warn("** unsafe for production use.                           **\n");
3224 	pr_warn("**                                                      **\n");
3225 	pr_warn("** If you see this message and you are not debugging    **\n");
3226 	pr_warn("** the kernel, report this immediately to your vendor!  **\n");
3227 	pr_warn("**                                                      **\n");
3228 	pr_warn("**   NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE   **\n");
3229 	pr_warn("**********************************************************\n");
3230 
3231 	/* Expand the buffers to set size */
3232 	tracing_update_buffers(&global_trace);
3233 
3234 	buffers_allocated = 1;
3235 
3236 	/*
3237 	 * trace_printk_init_buffers() can be called by modules.
3238 	 * If that happens, then we need to start cmdline recording
3239 	 * directly here. If the global_trace.buffer is already
3240 	 * allocated here, then this was called by module code.
3241 	 */
3242 	if (global_trace.array_buffer.buffer)
3243 		tracing_start_cmdline_record();
3244 }
3245 EXPORT_SYMBOL_GPL(trace_printk_init_buffers);
3246 
3247 void trace_printk_start_comm(void)
3248 {
3249 	/* Start tracing comms if trace printk is set */
3250 	if (!buffers_allocated)
3251 		return;
3252 	tracing_start_cmdline_record();
3253 }
3254 
3255 static void trace_printk_start_stop_comm(int enabled)
3256 {
3257 	if (!buffers_allocated)
3258 		return;
3259 
3260 	if (enabled)
3261 		tracing_start_cmdline_record();
3262 	else
3263 		tracing_stop_cmdline_record();
3264 }
3265 
3266 /**
3267  * trace_vbprintk - write binary msg to tracing buffer
3268  * @ip:    The address of the caller
3269  * @fmt:   The string format to write to the buffer
3270  * @args:  Arguments for @fmt
3271  */
3272 int trace_vbprintk(unsigned long ip, const char *fmt, va_list args)
3273 {
3274 	struct ring_buffer_event *event;
3275 	struct trace_buffer *buffer;
3276 	struct trace_array *tr = READ_ONCE(printk_trace);
3277 	struct bprint_entry *entry;
3278 	unsigned int trace_ctx;
3279 	char *tbuffer;
3280 	int len = 0, size;
3281 
3282 	if (!printk_binsafe(tr))
3283 		return trace_vprintk(ip, fmt, args);
3284 
3285 	if (unlikely(tracing_selftest_running || tracing_disabled))
3286 		return 0;
3287 
3288 	/* Don't pollute graph traces with trace_vprintk internals */
3289 	pause_graph_tracing();
3290 
3291 	trace_ctx = tracing_gen_ctx();
3292 	preempt_disable_notrace();
3293 
3294 	tbuffer = get_trace_buf();
3295 	if (!tbuffer) {
3296 		len = 0;
3297 		goto out_nobuffer;
3298 	}
3299 
3300 	len = vbin_printf((u32 *)tbuffer, TRACE_BUF_SIZE/sizeof(int), fmt, args);
3301 
3302 	if (len > TRACE_BUF_SIZE/sizeof(int) || len < 0)
3303 		goto out_put;
3304 
3305 	size = sizeof(*entry) + sizeof(u32) * len;
3306 	buffer = tr->array_buffer.buffer;
3307 	ring_buffer_nest_start(buffer);
3308 	event = __trace_buffer_lock_reserve(buffer, TRACE_BPRINT, size,
3309 					    trace_ctx);
3310 	if (!event)
3311 		goto out;
3312 	entry = ring_buffer_event_data(event);
3313 	entry->ip			= ip;
3314 	entry->fmt			= fmt;
3315 
3316 	memcpy(entry->buf, tbuffer, sizeof(u32) * len);
3317 	__buffer_unlock_commit(buffer, event);
3318 	ftrace_trace_stack(tr, buffer, trace_ctx, 6, NULL);
3319 
3320 out:
3321 	ring_buffer_nest_end(buffer);
3322 out_put:
3323 	put_trace_buf();
3324 
3325 out_nobuffer:
3326 	preempt_enable_notrace();
3327 	unpause_graph_tracing();
3328 
3329 	return len;
3330 }
3331 EXPORT_SYMBOL_GPL(trace_vbprintk);
3332 
3333 __printf(3, 0)
3334 static int
3335 __trace_array_vprintk(struct trace_buffer *buffer,
3336 		      unsigned long ip, const char *fmt, va_list args)
3337 {
3338 	struct ring_buffer_event *event;
3339 	int len = 0, size;
3340 	struct print_entry *entry;
3341 	unsigned int trace_ctx;
3342 	char *tbuffer;
3343 
3344 	if (tracing_disabled)
3345 		return 0;
3346 
3347 	/* Don't pollute graph traces with trace_vprintk internals */
3348 	pause_graph_tracing();
3349 
3350 	trace_ctx = tracing_gen_ctx();
3351 	preempt_disable_notrace();
3352 
3353 
3354 	tbuffer = get_trace_buf();
3355 	if (!tbuffer) {
3356 		len = 0;
3357 		goto out_nobuffer;
3358 	}
3359 
3360 	len = vscnprintf(tbuffer, TRACE_BUF_SIZE, fmt, args);
3361 
3362 	size = sizeof(*entry) + len + 1;
3363 	ring_buffer_nest_start(buffer);
3364 	event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
3365 					    trace_ctx);
3366 	if (!event)
3367 		goto out;
3368 	entry = ring_buffer_event_data(event);
3369 	entry->ip = ip;
3370 
3371 	memcpy(&entry->buf, tbuffer, len + 1);
3372 	__buffer_unlock_commit(buffer, event);
3373 	ftrace_trace_stack(printk_trace, buffer, trace_ctx, 6, NULL);
3374 
3375 out:
3376 	ring_buffer_nest_end(buffer);
3377 	put_trace_buf();
3378 
3379 out_nobuffer:
3380 	preempt_enable_notrace();
3381 	unpause_graph_tracing();
3382 
3383 	return len;
3384 }
3385 
3386 __printf(3, 0)
3387 int trace_array_vprintk(struct trace_array *tr,
3388 			unsigned long ip, const char *fmt, va_list args)
3389 {
3390 	if (tracing_selftest_running && tr == &global_trace)
3391 		return 0;
3392 
3393 	return __trace_array_vprintk(tr->array_buffer.buffer, ip, fmt, args);
3394 }
3395 
3396 /**
3397  * trace_array_printk - Print a message to a specific instance
3398  * @tr: The instance trace_array descriptor
3399  * @ip: The instruction pointer that this is called from.
3400  * @fmt: The format to print (printf format)
3401  *
3402  * If a subsystem sets up its own instance, they have the right to
3403  * printk strings into their tracing instance buffer using this
3404  * function. Note, this function will not write into the top level
3405  * buffer (use trace_printk() for that), as writing into the top level
3406  * buffer should only have events that can be individually disabled.
3407  * trace_printk() is only used for debugging a kernel, and should not
3408  * be ever incorporated in normal use.
3409  *
3410  * trace_array_printk() can be used, as it will not add noise to the
3411  * top level tracing buffer.
3412  *
3413  * Note, trace_array_init_printk() must be called on @tr before this
3414  * can be used.
3415  */
3416 __printf(3, 0)
3417 int trace_array_printk(struct trace_array *tr,
3418 		       unsigned long ip, const char *fmt, ...)
3419 {
3420 	int ret;
3421 	va_list ap;
3422 
3423 	if (!tr)
3424 		return -ENOENT;
3425 
3426 	/* This is only allowed for created instances */
3427 	if (tr == &global_trace)
3428 		return 0;
3429 
3430 	if (!(tr->trace_flags & TRACE_ITER_PRINTK))
3431 		return 0;
3432 
3433 	va_start(ap, fmt);
3434 	ret = trace_array_vprintk(tr, ip, fmt, ap);
3435 	va_end(ap);
3436 	return ret;
3437 }
3438 EXPORT_SYMBOL_GPL(trace_array_printk);
3439 
3440 /**
3441  * trace_array_init_printk - Initialize buffers for trace_array_printk()
3442  * @tr: The trace array to initialize the buffers for
3443  *
3444  * As trace_array_printk() only writes into instances, they are OK to
3445  * have in the kernel (unlike trace_printk()). This needs to be called
3446  * before trace_array_printk() can be used on a trace_array.
3447  */
3448 int trace_array_init_printk(struct trace_array *tr)
3449 {
3450 	if (!tr)
3451 		return -ENOENT;
3452 
3453 	/* This is only allowed for created instances */
3454 	if (tr == &global_trace)
3455 		return -EINVAL;
3456 
3457 	return alloc_percpu_trace_buffer();
3458 }
3459 EXPORT_SYMBOL_GPL(trace_array_init_printk);
3460 
3461 __printf(3, 4)
3462 int trace_array_printk_buf(struct trace_buffer *buffer,
3463 			   unsigned long ip, const char *fmt, ...)
3464 {
3465 	int ret;
3466 	va_list ap;
3467 
3468 	if (!(printk_trace->trace_flags & TRACE_ITER_PRINTK))
3469 		return 0;
3470 
3471 	va_start(ap, fmt);
3472 	ret = __trace_array_vprintk(buffer, ip, fmt, ap);
3473 	va_end(ap);
3474 	return ret;
3475 }
3476 
3477 __printf(2, 0)
3478 int trace_vprintk(unsigned long ip, const char *fmt, va_list args)
3479 {
3480 	return trace_array_vprintk(printk_trace, ip, fmt, args);
3481 }
3482 EXPORT_SYMBOL_GPL(trace_vprintk);
3483 
3484 static void trace_iterator_increment(struct trace_iterator *iter)
3485 {
3486 	struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, iter->cpu);
3487 
3488 	iter->idx++;
3489 	if (buf_iter)
3490 		ring_buffer_iter_advance(buf_iter);
3491 }
3492 
3493 static struct trace_entry *
3494 peek_next_entry(struct trace_iterator *iter, int cpu, u64 *ts,
3495 		unsigned long *lost_events)
3496 {
3497 	struct ring_buffer_event *event;
3498 	struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, cpu);
3499 
3500 	if (buf_iter) {
3501 		event = ring_buffer_iter_peek(buf_iter, ts);
3502 		if (lost_events)
3503 			*lost_events = ring_buffer_iter_dropped(buf_iter) ?
3504 				(unsigned long)-1 : 0;
3505 	} else {
3506 		event = ring_buffer_peek(iter->array_buffer->buffer, cpu, ts,
3507 					 lost_events);
3508 	}
3509 
3510 	if (event) {
3511 		iter->ent_size = ring_buffer_event_length(event);
3512 		return ring_buffer_event_data(event);
3513 	}
3514 	iter->ent_size = 0;
3515 	return NULL;
3516 }
3517 
3518 static struct trace_entry *
3519 __find_next_entry(struct trace_iterator *iter, int *ent_cpu,
3520 		  unsigned long *missing_events, u64 *ent_ts)
3521 {
3522 	struct trace_buffer *buffer = iter->array_buffer->buffer;
3523 	struct trace_entry *ent, *next = NULL;
3524 	unsigned long lost_events = 0, next_lost = 0;
3525 	int cpu_file = iter->cpu_file;
3526 	u64 next_ts = 0, ts;
3527 	int next_cpu = -1;
3528 	int next_size = 0;
3529 	int cpu;
3530 
3531 	/*
3532 	 * If we are in a per_cpu trace file, don't bother by iterating over
3533 	 * all cpu and peek directly.
3534 	 */
3535 	if (cpu_file > RING_BUFFER_ALL_CPUS) {
3536 		if (ring_buffer_empty_cpu(buffer, cpu_file))
3537 			return NULL;
3538 		ent = peek_next_entry(iter, cpu_file, ent_ts, missing_events);
3539 		if (ent_cpu)
3540 			*ent_cpu = cpu_file;
3541 
3542 		return ent;
3543 	}
3544 
3545 	for_each_tracing_cpu(cpu) {
3546 
3547 		if (ring_buffer_empty_cpu(buffer, cpu))
3548 			continue;
3549 
3550 		ent = peek_next_entry(iter, cpu, &ts, &lost_events);
3551 
3552 		/*
3553 		 * Pick the entry with the smallest timestamp:
3554 		 */
3555 		if (ent && (!next || ts < next_ts)) {
3556 			next = ent;
3557 			next_cpu = cpu;
3558 			next_ts = ts;
3559 			next_lost = lost_events;
3560 			next_size = iter->ent_size;
3561 		}
3562 	}
3563 
3564 	iter->ent_size = next_size;
3565 
3566 	if (ent_cpu)
3567 		*ent_cpu = next_cpu;
3568 
3569 	if (ent_ts)
3570 		*ent_ts = next_ts;
3571 
3572 	if (missing_events)
3573 		*missing_events = next_lost;
3574 
3575 	return next;
3576 }
3577 
3578 #define STATIC_FMT_BUF_SIZE	128
3579 static char static_fmt_buf[STATIC_FMT_BUF_SIZE];
3580 
3581 char *trace_iter_expand_format(struct trace_iterator *iter)
3582 {
3583 	char *tmp;
3584 
3585 	/*
3586 	 * iter->tr is NULL when used with tp_printk, which makes
3587 	 * this get called where it is not safe to call krealloc().
3588 	 */
3589 	if (!iter->tr || iter->fmt == static_fmt_buf)
3590 		return NULL;
3591 
3592 	tmp = krealloc(iter->fmt, iter->fmt_size + STATIC_FMT_BUF_SIZE,
3593 		       GFP_KERNEL);
3594 	if (tmp) {
3595 		iter->fmt_size += STATIC_FMT_BUF_SIZE;
3596 		iter->fmt = tmp;
3597 	}
3598 
3599 	return tmp;
3600 }
3601 
3602 /* Returns true if the string is safe to dereference from an event */
3603 static bool trace_safe_str(struct trace_iterator *iter, const char *str)
3604 {
3605 	unsigned long addr = (unsigned long)str;
3606 	struct trace_event *trace_event;
3607 	struct trace_event_call *event;
3608 
3609 	/* OK if part of the event data */
3610 	if ((addr >= (unsigned long)iter->ent) &&
3611 	    (addr < (unsigned long)iter->ent + iter->ent_size))
3612 		return true;
3613 
3614 	/* OK if part of the temp seq buffer */
3615 	if ((addr >= (unsigned long)iter->tmp_seq.buffer) &&
3616 	    (addr < (unsigned long)iter->tmp_seq.buffer + TRACE_SEQ_BUFFER_SIZE))
3617 		return true;
3618 
3619 	/* Core rodata can not be freed */
3620 	if (is_kernel_rodata(addr))
3621 		return true;
3622 
3623 	if (trace_is_tracepoint_string(str))
3624 		return true;
3625 
3626 	/*
3627 	 * Now this could be a module event, referencing core module
3628 	 * data, which is OK.
3629 	 */
3630 	if (!iter->ent)
3631 		return false;
3632 
3633 	trace_event = ftrace_find_event(iter->ent->type);
3634 	if (!trace_event)
3635 		return false;
3636 
3637 	event = container_of(trace_event, struct trace_event_call, event);
3638 	if ((event->flags & TRACE_EVENT_FL_DYNAMIC) || !event->module)
3639 		return false;
3640 
3641 	/* Would rather have rodata, but this will suffice */
3642 	if (within_module_core(addr, event->module))
3643 		return true;
3644 
3645 	return false;
3646 }
3647 
3648 /**
3649  * ignore_event - Check dereferenced fields while writing to the seq buffer
3650  * @iter: The iterator that holds the seq buffer and the event being printed
3651  *
3652  * At boot up, test_event_printk() will flag any event that dereferences
3653  * a string with "%s" that does exist in the ring buffer. It may still
3654  * be valid, as the string may point to a static string in the kernel
3655  * rodata that never gets freed. But if the string pointer is pointing
3656  * to something that was allocated, there's a chance that it can be freed
3657  * by the time the user reads the trace. This would cause a bad memory
3658  * access by the kernel and possibly crash the system.
3659  *
3660  * This function will check if the event has any fields flagged as needing
3661  * to be checked at runtime and perform those checks.
3662  *
3663  * If it is found that a field is unsafe, it will write into the @iter->seq
3664  * a message stating what was found to be unsafe.
3665  *
3666  * @return: true if the event is unsafe and should be ignored,
3667  *          false otherwise.
3668  */
3669 bool ignore_event(struct trace_iterator *iter)
3670 {
3671 	struct ftrace_event_field *field;
3672 	struct trace_event *trace_event;
3673 	struct trace_event_call *event;
3674 	struct list_head *head;
3675 	struct trace_seq *seq;
3676 	const void *ptr;
3677 
3678 	trace_event = ftrace_find_event(iter->ent->type);
3679 
3680 	seq = &iter->seq;
3681 
3682 	if (!trace_event) {
3683 		trace_seq_printf(seq, "EVENT ID %d NOT FOUND?\n", iter->ent->type);
3684 		return true;
3685 	}
3686 
3687 	event = container_of(trace_event, struct trace_event_call, event);
3688 	if (!(event->flags & TRACE_EVENT_FL_TEST_STR))
3689 		return false;
3690 
3691 	head = trace_get_fields(event);
3692 	if (!head) {
3693 		trace_seq_printf(seq, "FIELDS FOR EVENT '%s' NOT FOUND?\n",
3694 				 trace_event_name(event));
3695 		return true;
3696 	}
3697 
3698 	/* Offsets are from the iter->ent that points to the raw event */
3699 	ptr = iter->ent;
3700 
3701 	list_for_each_entry(field, head, link) {
3702 		const char *str;
3703 		bool good;
3704 
3705 		if (!field->needs_test)
3706 			continue;
3707 
3708 		str = *(const char **)(ptr + field->offset);
3709 
3710 		good = trace_safe_str(iter, str);
3711 
3712 		/*
3713 		 * If you hit this warning, it is likely that the
3714 		 * trace event in question used %s on a string that
3715 		 * was saved at the time of the event, but may not be
3716 		 * around when the trace is read. Use __string(),
3717 		 * __assign_str() and __get_str() helpers in the TRACE_EVENT()
3718 		 * instead. See samples/trace_events/trace-events-sample.h
3719 		 * for reference.
3720 		 */
3721 		if (WARN_ONCE(!good, "event '%s' has unsafe pointer field '%s'",
3722 			      trace_event_name(event), field->name)) {
3723 			trace_seq_printf(seq, "EVENT %s: HAS UNSAFE POINTER FIELD '%s'\n",
3724 					 trace_event_name(event), field->name);
3725 			return true;
3726 		}
3727 	}
3728 	return false;
3729 }
3730 
3731 const char *trace_event_format(struct trace_iterator *iter, const char *fmt)
3732 {
3733 	const char *p, *new_fmt;
3734 	char *q;
3735 
3736 	if (WARN_ON_ONCE(!fmt))
3737 		return fmt;
3738 
3739 	if (!iter->tr || iter->tr->trace_flags & TRACE_ITER_HASH_PTR)
3740 		return fmt;
3741 
3742 	p = fmt;
3743 	new_fmt = q = iter->fmt;
3744 	while (*p) {
3745 		if (unlikely(q - new_fmt + 3 > iter->fmt_size)) {
3746 			if (!trace_iter_expand_format(iter))
3747 				return fmt;
3748 
3749 			q += iter->fmt - new_fmt;
3750 			new_fmt = iter->fmt;
3751 		}
3752 
3753 		*q++ = *p++;
3754 
3755 		/* Replace %p with %px */
3756 		if (p[-1] == '%') {
3757 			if (p[0] == '%') {
3758 				*q++ = *p++;
3759 			} else if (p[0] == 'p' && !isalnum(p[1])) {
3760 				*q++ = *p++;
3761 				*q++ = 'x';
3762 			}
3763 		}
3764 	}
3765 	*q = '\0';
3766 
3767 	return new_fmt;
3768 }
3769 
3770 #define STATIC_TEMP_BUF_SIZE	128
3771 static char static_temp_buf[STATIC_TEMP_BUF_SIZE] __aligned(4);
3772 
3773 /* Find the next real entry, without updating the iterator itself */
3774 struct trace_entry *trace_find_next_entry(struct trace_iterator *iter,
3775 					  int *ent_cpu, u64 *ent_ts)
3776 {
3777 	/* __find_next_entry will reset ent_size */
3778 	int ent_size = iter->ent_size;
3779 	struct trace_entry *entry;
3780 
3781 	/*
3782 	 * If called from ftrace_dump(), then the iter->temp buffer
3783 	 * will be the static_temp_buf and not created from kmalloc.
3784 	 * If the entry size is greater than the buffer, we can
3785 	 * not save it. Just return NULL in that case. This is only
3786 	 * used to add markers when two consecutive events' time
3787 	 * stamps have a large delta. See trace_print_lat_context()
3788 	 */
3789 	if (iter->temp == static_temp_buf &&
3790 	    STATIC_TEMP_BUF_SIZE < ent_size)
3791 		return NULL;
3792 
3793 	/*
3794 	 * The __find_next_entry() may call peek_next_entry(), which may
3795 	 * call ring_buffer_peek() that may make the contents of iter->ent
3796 	 * undefined. Need to copy iter->ent now.
3797 	 */
3798 	if (iter->ent && iter->ent != iter->temp) {
3799 		if ((!iter->temp || iter->temp_size < iter->ent_size) &&
3800 		    !WARN_ON_ONCE(iter->temp == static_temp_buf)) {
3801 			void *temp;
3802 			temp = kmalloc(iter->ent_size, GFP_KERNEL);
3803 			if (!temp)
3804 				return NULL;
3805 			kfree(iter->temp);
3806 			iter->temp = temp;
3807 			iter->temp_size = iter->ent_size;
3808 		}
3809 		memcpy(iter->temp, iter->ent, iter->ent_size);
3810 		iter->ent = iter->temp;
3811 	}
3812 	entry = __find_next_entry(iter, ent_cpu, NULL, ent_ts);
3813 	/* Put back the original ent_size */
3814 	iter->ent_size = ent_size;
3815 
3816 	return entry;
3817 }
3818 
3819 /* Find the next real entry, and increment the iterator to the next entry */
3820 void *trace_find_next_entry_inc(struct trace_iterator *iter)
3821 {
3822 	iter->ent = __find_next_entry(iter, &iter->cpu,
3823 				      &iter->lost_events, &iter->ts);
3824 
3825 	if (iter->ent)
3826 		trace_iterator_increment(iter);
3827 
3828 	return iter->ent ? iter : NULL;
3829 }
3830 
3831 static void trace_consume(struct trace_iterator *iter)
3832 {
3833 	ring_buffer_consume(iter->array_buffer->buffer, iter->cpu, &iter->ts,
3834 			    &iter->lost_events);
3835 }
3836 
3837 static void *s_next(struct seq_file *m, void *v, loff_t *pos)
3838 {
3839 	struct trace_iterator *iter = m->private;
3840 	int i = (int)*pos;
3841 	void *ent;
3842 
3843 	WARN_ON_ONCE(iter->leftover);
3844 
3845 	(*pos)++;
3846 
3847 	/* can't go backwards */
3848 	if (iter->idx > i)
3849 		return NULL;
3850 
3851 	if (iter->idx < 0)
3852 		ent = trace_find_next_entry_inc(iter);
3853 	else
3854 		ent = iter;
3855 
3856 	while (ent && iter->idx < i)
3857 		ent = trace_find_next_entry_inc(iter);
3858 
3859 	iter->pos = *pos;
3860 
3861 	return ent;
3862 }
3863 
3864 void tracing_iter_reset(struct trace_iterator *iter, int cpu)
3865 {
3866 	struct ring_buffer_iter *buf_iter;
3867 	unsigned long entries = 0;
3868 	u64 ts;
3869 
3870 	per_cpu_ptr(iter->array_buffer->data, cpu)->skipped_entries = 0;
3871 
3872 	buf_iter = trace_buffer_iter(iter, cpu);
3873 	if (!buf_iter)
3874 		return;
3875 
3876 	ring_buffer_iter_reset(buf_iter);
3877 
3878 	/*
3879 	 * We could have the case with the max latency tracers
3880 	 * that a reset never took place on a cpu. This is evident
3881 	 * by the timestamp being before the start of the buffer.
3882 	 */
3883 	while (ring_buffer_iter_peek(buf_iter, &ts)) {
3884 		if (ts >= iter->array_buffer->time_start)
3885 			break;
3886 		entries++;
3887 		ring_buffer_iter_advance(buf_iter);
3888 		/* This could be a big loop */
3889 		cond_resched();
3890 	}
3891 
3892 	per_cpu_ptr(iter->array_buffer->data, cpu)->skipped_entries = entries;
3893 }
3894 
3895 /*
3896  * The current tracer is copied to avoid a global locking
3897  * all around.
3898  */
3899 static void *s_start(struct seq_file *m, loff_t *pos)
3900 {
3901 	struct trace_iterator *iter = m->private;
3902 	struct trace_array *tr = iter->tr;
3903 	int cpu_file = iter->cpu_file;
3904 	void *p = NULL;
3905 	loff_t l = 0;
3906 	int cpu;
3907 
3908 	mutex_lock(&trace_types_lock);
3909 	if (unlikely(tr->current_trace != iter->trace)) {
3910 		/* Close iter->trace before switching to the new current tracer */
3911 		if (iter->trace->close)
3912 			iter->trace->close(iter);
3913 		iter->trace = tr->current_trace;
3914 		/* Reopen the new current tracer */
3915 		if (iter->trace->open)
3916 			iter->trace->open(iter);
3917 	}
3918 	mutex_unlock(&trace_types_lock);
3919 
3920 #ifdef CONFIG_TRACER_MAX_TRACE
3921 	if (iter->snapshot && iter->trace->use_max_tr)
3922 		return ERR_PTR(-EBUSY);
3923 #endif
3924 
3925 	if (*pos != iter->pos) {
3926 		iter->ent = NULL;
3927 		iter->cpu = 0;
3928 		iter->idx = -1;
3929 
3930 		if (cpu_file == RING_BUFFER_ALL_CPUS) {
3931 			for_each_tracing_cpu(cpu)
3932 				tracing_iter_reset(iter, cpu);
3933 		} else
3934 			tracing_iter_reset(iter, cpu_file);
3935 
3936 		iter->leftover = 0;
3937 		for (p = iter; p && l < *pos; p = s_next(m, p, &l))
3938 			;
3939 
3940 	} else {
3941 		/*
3942 		 * If we overflowed the seq_file before, then we want
3943 		 * to just reuse the trace_seq buffer again.
3944 		 */
3945 		if (iter->leftover)
3946 			p = iter;
3947 		else {
3948 			l = *pos - 1;
3949 			p = s_next(m, p, &l);
3950 		}
3951 	}
3952 
3953 	trace_event_read_lock();
3954 	trace_access_lock(cpu_file);
3955 	return p;
3956 }
3957 
3958 static void s_stop(struct seq_file *m, void *p)
3959 {
3960 	struct trace_iterator *iter = m->private;
3961 
3962 #ifdef CONFIG_TRACER_MAX_TRACE
3963 	if (iter->snapshot && iter->trace->use_max_tr)
3964 		return;
3965 #endif
3966 
3967 	trace_access_unlock(iter->cpu_file);
3968 	trace_event_read_unlock();
3969 }
3970 
3971 static void
3972 get_total_entries_cpu(struct array_buffer *buf, unsigned long *total,
3973 		      unsigned long *entries, int cpu)
3974 {
3975 	unsigned long count;
3976 
3977 	count = ring_buffer_entries_cpu(buf->buffer, cpu);
3978 	/*
3979 	 * If this buffer has skipped entries, then we hold all
3980 	 * entries for the trace and we need to ignore the
3981 	 * ones before the time stamp.
3982 	 */
3983 	if (per_cpu_ptr(buf->data, cpu)->skipped_entries) {
3984 		count -= per_cpu_ptr(buf->data, cpu)->skipped_entries;
3985 		/* total is the same as the entries */
3986 		*total = count;
3987 	} else
3988 		*total = count +
3989 			ring_buffer_overrun_cpu(buf->buffer, cpu);
3990 	*entries = count;
3991 }
3992 
3993 static void
3994 get_total_entries(struct array_buffer *buf,
3995 		  unsigned long *total, unsigned long *entries)
3996 {
3997 	unsigned long t, e;
3998 	int cpu;
3999 
4000 	*total = 0;
4001 	*entries = 0;
4002 
4003 	for_each_tracing_cpu(cpu) {
4004 		get_total_entries_cpu(buf, &t, &e, cpu);
4005 		*total += t;
4006 		*entries += e;
4007 	}
4008 }
4009 
4010 unsigned long trace_total_entries_cpu(struct trace_array *tr, int cpu)
4011 {
4012 	unsigned long total, entries;
4013 
4014 	if (!tr)
4015 		tr = &global_trace;
4016 
4017 	get_total_entries_cpu(&tr->array_buffer, &total, &entries, cpu);
4018 
4019 	return entries;
4020 }
4021 
4022 unsigned long trace_total_entries(struct trace_array *tr)
4023 {
4024 	unsigned long total, entries;
4025 
4026 	if (!tr)
4027 		tr = &global_trace;
4028 
4029 	get_total_entries(&tr->array_buffer, &total, &entries);
4030 
4031 	return entries;
4032 }
4033 
4034 static void print_lat_help_header(struct seq_file *m)
4035 {
4036 	seq_puts(m, "#                    _------=> CPU#            \n"
4037 		    "#                   / _-----=> irqs-off/BH-disabled\n"
4038 		    "#                  | / _----=> need-resched    \n"
4039 		    "#                  || / _---=> hardirq/softirq \n"
4040 		    "#                  ||| / _--=> preempt-depth   \n"
4041 		    "#                  |||| / _-=> migrate-disable \n"
4042 		    "#                  ||||| /     delay           \n"
4043 		    "#  cmd     pid     |||||| time  |   caller     \n"
4044 		    "#     \\   /        ||||||  \\    |    /       \n");
4045 }
4046 
4047 static void print_event_info(struct array_buffer *buf, struct seq_file *m)
4048 {
4049 	unsigned long total;
4050 	unsigned long entries;
4051 
4052 	get_total_entries(buf, &total, &entries);
4053 	seq_printf(m, "# entries-in-buffer/entries-written: %lu/%lu   #P:%d\n",
4054 		   entries, total, num_online_cpus());
4055 	seq_puts(m, "#\n");
4056 }
4057 
4058 static void print_func_help_header(struct array_buffer *buf, struct seq_file *m,
4059 				   unsigned int flags)
4060 {
4061 	bool tgid = flags & TRACE_ITER_RECORD_TGID;
4062 
4063 	print_event_info(buf, m);
4064 
4065 	seq_printf(m, "#           TASK-PID    %s CPU#     TIMESTAMP  FUNCTION\n", tgid ? "   TGID   " : "");
4066 	seq_printf(m, "#              | |      %s   |         |         |\n",      tgid ? "     |    " : "");
4067 }
4068 
4069 static void print_func_help_header_irq(struct array_buffer *buf, struct seq_file *m,
4070 				       unsigned int flags)
4071 {
4072 	bool tgid = flags & TRACE_ITER_RECORD_TGID;
4073 	static const char space[] = "            ";
4074 	int prec = tgid ? 12 : 2;
4075 
4076 	print_event_info(buf, m);
4077 
4078 	seq_printf(m, "#                            %.*s  _-----=> irqs-off/BH-disabled\n", prec, space);
4079 	seq_printf(m, "#                            %.*s / _----=> need-resched\n", prec, space);
4080 	seq_printf(m, "#                            %.*s| / _---=> hardirq/softirq\n", prec, space);
4081 	seq_printf(m, "#                            %.*s|| / _--=> preempt-depth\n", prec, space);
4082 	seq_printf(m, "#                            %.*s||| / _-=> migrate-disable\n", prec, space);
4083 	seq_printf(m, "#                            %.*s|||| /     delay\n", prec, space);
4084 	seq_printf(m, "#           TASK-PID  %.*s CPU#  |||||  TIMESTAMP  FUNCTION\n", prec, "     TGID   ");
4085 	seq_printf(m, "#              | |    %.*s   |   |||||     |         |\n", prec, "       |    ");
4086 }
4087 
4088 void
4089 print_trace_header(struct seq_file *m, struct trace_iterator *iter)
4090 {
4091 	unsigned long sym_flags = (global_trace.trace_flags & TRACE_ITER_SYM_MASK);
4092 	struct array_buffer *buf = iter->array_buffer;
4093 	struct trace_array_cpu *data = per_cpu_ptr(buf->data, buf->cpu);
4094 	struct tracer *type = iter->trace;
4095 	unsigned long entries;
4096 	unsigned long total;
4097 	const char *name = type->name;
4098 
4099 	get_total_entries(buf, &total, &entries);
4100 
4101 	seq_printf(m, "# %s latency trace v1.1.5 on %s\n",
4102 		   name, init_utsname()->release);
4103 	seq_puts(m, "# -----------------------------------"
4104 		 "---------------------------------\n");
4105 	seq_printf(m, "# latency: %lu us, #%lu/%lu, CPU#%d |"
4106 		   " (M:%s VP:%d, KP:%d, SP:%d HP:%d",
4107 		   nsecs_to_usecs(data->saved_latency),
4108 		   entries,
4109 		   total,
4110 		   buf->cpu,
4111 		   preempt_model_none()      ? "server" :
4112 		   preempt_model_voluntary() ? "desktop" :
4113 		   preempt_model_full()      ? "preempt" :
4114 		   preempt_model_lazy()	     ? "lazy"    :
4115 		   preempt_model_rt()        ? "preempt_rt" :
4116 		   "unknown",
4117 		   /* These are reserved for later use */
4118 		   0, 0, 0, 0);
4119 #ifdef CONFIG_SMP
4120 	seq_printf(m, " #P:%d)\n", num_online_cpus());
4121 #else
4122 	seq_puts(m, ")\n");
4123 #endif
4124 	seq_puts(m, "#    -----------------\n");
4125 	seq_printf(m, "#    | task: %.16s-%d "
4126 		   "(uid:%d nice:%ld policy:%ld rt_prio:%ld)\n",
4127 		   data->comm, data->pid,
4128 		   from_kuid_munged(seq_user_ns(m), data->uid), data->nice,
4129 		   data->policy, data->rt_priority);
4130 	seq_puts(m, "#    -----------------\n");
4131 
4132 	if (data->critical_start) {
4133 		seq_puts(m, "#  => started at: ");
4134 		seq_print_ip_sym(&iter->seq, data->critical_start, sym_flags);
4135 		trace_print_seq(m, &iter->seq);
4136 		seq_puts(m, "\n#  => ended at:   ");
4137 		seq_print_ip_sym(&iter->seq, data->critical_end, sym_flags);
4138 		trace_print_seq(m, &iter->seq);
4139 		seq_puts(m, "\n#\n");
4140 	}
4141 
4142 	seq_puts(m, "#\n");
4143 }
4144 
4145 static void test_cpu_buff_start(struct trace_iterator *iter)
4146 {
4147 	struct trace_seq *s = &iter->seq;
4148 	struct trace_array *tr = iter->tr;
4149 
4150 	if (!(tr->trace_flags & TRACE_ITER_ANNOTATE))
4151 		return;
4152 
4153 	if (!(iter->iter_flags & TRACE_FILE_ANNOTATE))
4154 		return;
4155 
4156 	if (cpumask_available(iter->started) &&
4157 	    cpumask_test_cpu(iter->cpu, iter->started))
4158 		return;
4159 
4160 	if (per_cpu_ptr(iter->array_buffer->data, iter->cpu)->skipped_entries)
4161 		return;
4162 
4163 	if (cpumask_available(iter->started))
4164 		cpumask_set_cpu(iter->cpu, iter->started);
4165 
4166 	/* Don't print started cpu buffer for the first entry of the trace */
4167 	if (iter->idx > 1)
4168 		trace_seq_printf(s, "##### CPU %u buffer started ####\n",
4169 				iter->cpu);
4170 }
4171 
4172 static enum print_line_t print_trace_fmt(struct trace_iterator *iter)
4173 {
4174 	struct trace_array *tr = iter->tr;
4175 	struct trace_seq *s = &iter->seq;
4176 	unsigned long sym_flags = (tr->trace_flags & TRACE_ITER_SYM_MASK);
4177 	struct trace_entry *entry;
4178 	struct trace_event *event;
4179 
4180 	entry = iter->ent;
4181 
4182 	test_cpu_buff_start(iter);
4183 
4184 	event = ftrace_find_event(entry->type);
4185 
4186 	if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
4187 		if (iter->iter_flags & TRACE_FILE_LAT_FMT)
4188 			trace_print_lat_context(iter);
4189 		else
4190 			trace_print_context(iter);
4191 	}
4192 
4193 	if (trace_seq_has_overflowed(s))
4194 		return TRACE_TYPE_PARTIAL_LINE;
4195 
4196 	if (event) {
4197 		if (tr->trace_flags & TRACE_ITER_FIELDS)
4198 			return print_event_fields(iter, event);
4199 		/*
4200 		 * For TRACE_EVENT() events, the print_fmt is not
4201 		 * safe to use if the array has delta offsets
4202 		 * Force printing via the fields.
4203 		 */
4204 		if ((tr->text_delta || tr->data_delta) &&
4205 		    event->type > __TRACE_LAST_TYPE)
4206 			return print_event_fields(iter, event);
4207 
4208 		return event->funcs->trace(iter, sym_flags, event);
4209 	}
4210 
4211 	trace_seq_printf(s, "Unknown type %d\n", entry->type);
4212 
4213 	return trace_handle_return(s);
4214 }
4215 
4216 static enum print_line_t print_raw_fmt(struct trace_iterator *iter)
4217 {
4218 	struct trace_array *tr = iter->tr;
4219 	struct trace_seq *s = &iter->seq;
4220 	struct trace_entry *entry;
4221 	struct trace_event *event;
4222 
4223 	entry = iter->ent;
4224 
4225 	if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO)
4226 		trace_seq_printf(s, "%d %d %llu ",
4227 				 entry->pid, iter->cpu, iter->ts);
4228 
4229 	if (trace_seq_has_overflowed(s))
4230 		return TRACE_TYPE_PARTIAL_LINE;
4231 
4232 	event = ftrace_find_event(entry->type);
4233 	if (event)
4234 		return event->funcs->raw(iter, 0, event);
4235 
4236 	trace_seq_printf(s, "%d ?\n", entry->type);
4237 
4238 	return trace_handle_return(s);
4239 }
4240 
4241 static enum print_line_t print_hex_fmt(struct trace_iterator *iter)
4242 {
4243 	struct trace_array *tr = iter->tr;
4244 	struct trace_seq *s = &iter->seq;
4245 	unsigned char newline = '\n';
4246 	struct trace_entry *entry;
4247 	struct trace_event *event;
4248 
4249 	entry = iter->ent;
4250 
4251 	if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
4252 		SEQ_PUT_HEX_FIELD(s, entry->pid);
4253 		SEQ_PUT_HEX_FIELD(s, iter->cpu);
4254 		SEQ_PUT_HEX_FIELD(s, iter->ts);
4255 		if (trace_seq_has_overflowed(s))
4256 			return TRACE_TYPE_PARTIAL_LINE;
4257 	}
4258 
4259 	event = ftrace_find_event(entry->type);
4260 	if (event) {
4261 		enum print_line_t ret = event->funcs->hex(iter, 0, event);
4262 		if (ret != TRACE_TYPE_HANDLED)
4263 			return ret;
4264 	}
4265 
4266 	SEQ_PUT_FIELD(s, newline);
4267 
4268 	return trace_handle_return(s);
4269 }
4270 
4271 static enum print_line_t print_bin_fmt(struct trace_iterator *iter)
4272 {
4273 	struct trace_array *tr = iter->tr;
4274 	struct trace_seq *s = &iter->seq;
4275 	struct trace_entry *entry;
4276 	struct trace_event *event;
4277 
4278 	entry = iter->ent;
4279 
4280 	if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
4281 		SEQ_PUT_FIELD(s, entry->pid);
4282 		SEQ_PUT_FIELD(s, iter->cpu);
4283 		SEQ_PUT_FIELD(s, iter->ts);
4284 		if (trace_seq_has_overflowed(s))
4285 			return TRACE_TYPE_PARTIAL_LINE;
4286 	}
4287 
4288 	event = ftrace_find_event(entry->type);
4289 	return event ? event->funcs->binary(iter, 0, event) :
4290 		TRACE_TYPE_HANDLED;
4291 }
4292 
4293 int trace_empty(struct trace_iterator *iter)
4294 {
4295 	struct ring_buffer_iter *buf_iter;
4296 	int cpu;
4297 
4298 	/* If we are looking at one CPU buffer, only check that one */
4299 	if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
4300 		cpu = iter->cpu_file;
4301 		buf_iter = trace_buffer_iter(iter, cpu);
4302 		if (buf_iter) {
4303 			if (!ring_buffer_iter_empty(buf_iter))
4304 				return 0;
4305 		} else {
4306 			if (!ring_buffer_empty_cpu(iter->array_buffer->buffer, cpu))
4307 				return 0;
4308 		}
4309 		return 1;
4310 	}
4311 
4312 	for_each_tracing_cpu(cpu) {
4313 		buf_iter = trace_buffer_iter(iter, cpu);
4314 		if (buf_iter) {
4315 			if (!ring_buffer_iter_empty(buf_iter))
4316 				return 0;
4317 		} else {
4318 			if (!ring_buffer_empty_cpu(iter->array_buffer->buffer, cpu))
4319 				return 0;
4320 		}
4321 	}
4322 
4323 	return 1;
4324 }
4325 
4326 /*  Called with trace_event_read_lock() held. */
4327 enum print_line_t print_trace_line(struct trace_iterator *iter)
4328 {
4329 	struct trace_array *tr = iter->tr;
4330 	unsigned long trace_flags = tr->trace_flags;
4331 	enum print_line_t ret;
4332 
4333 	if (iter->lost_events) {
4334 		if (iter->lost_events == (unsigned long)-1)
4335 			trace_seq_printf(&iter->seq, "CPU:%d [LOST EVENTS]\n",
4336 					 iter->cpu);
4337 		else
4338 			trace_seq_printf(&iter->seq, "CPU:%d [LOST %lu EVENTS]\n",
4339 					 iter->cpu, iter->lost_events);
4340 		if (trace_seq_has_overflowed(&iter->seq))
4341 			return TRACE_TYPE_PARTIAL_LINE;
4342 	}
4343 
4344 	if (iter->trace && iter->trace->print_line) {
4345 		ret = iter->trace->print_line(iter);
4346 		if (ret != TRACE_TYPE_UNHANDLED)
4347 			return ret;
4348 	}
4349 
4350 	if (iter->ent->type == TRACE_BPUTS &&
4351 			trace_flags & TRACE_ITER_PRINTK &&
4352 			trace_flags & TRACE_ITER_PRINTK_MSGONLY)
4353 		return trace_print_bputs_msg_only(iter);
4354 
4355 	if (iter->ent->type == TRACE_BPRINT &&
4356 			trace_flags & TRACE_ITER_PRINTK &&
4357 			trace_flags & TRACE_ITER_PRINTK_MSGONLY)
4358 		return trace_print_bprintk_msg_only(iter);
4359 
4360 	if (iter->ent->type == TRACE_PRINT &&
4361 			trace_flags & TRACE_ITER_PRINTK &&
4362 			trace_flags & TRACE_ITER_PRINTK_MSGONLY)
4363 		return trace_print_printk_msg_only(iter);
4364 
4365 	if (trace_flags & TRACE_ITER_BIN)
4366 		return print_bin_fmt(iter);
4367 
4368 	if (trace_flags & TRACE_ITER_HEX)
4369 		return print_hex_fmt(iter);
4370 
4371 	if (trace_flags & TRACE_ITER_RAW)
4372 		return print_raw_fmt(iter);
4373 
4374 	return print_trace_fmt(iter);
4375 }
4376 
4377 void trace_latency_header(struct seq_file *m)
4378 {
4379 	struct trace_iterator *iter = m->private;
4380 	struct trace_array *tr = iter->tr;
4381 
4382 	/* print nothing if the buffers are empty */
4383 	if (trace_empty(iter))
4384 		return;
4385 
4386 	if (iter->iter_flags & TRACE_FILE_LAT_FMT)
4387 		print_trace_header(m, iter);
4388 
4389 	if (!(tr->trace_flags & TRACE_ITER_VERBOSE))
4390 		print_lat_help_header(m);
4391 }
4392 
4393 void trace_default_header(struct seq_file *m)
4394 {
4395 	struct trace_iterator *iter = m->private;
4396 	struct trace_array *tr = iter->tr;
4397 	unsigned long trace_flags = tr->trace_flags;
4398 
4399 	if (!(trace_flags & TRACE_ITER_CONTEXT_INFO))
4400 		return;
4401 
4402 	if (iter->iter_flags & TRACE_FILE_LAT_FMT) {
4403 		/* print nothing if the buffers are empty */
4404 		if (trace_empty(iter))
4405 			return;
4406 		print_trace_header(m, iter);
4407 		if (!(trace_flags & TRACE_ITER_VERBOSE))
4408 			print_lat_help_header(m);
4409 	} else {
4410 		if (!(trace_flags & TRACE_ITER_VERBOSE)) {
4411 			if (trace_flags & TRACE_ITER_IRQ_INFO)
4412 				print_func_help_header_irq(iter->array_buffer,
4413 							   m, trace_flags);
4414 			else
4415 				print_func_help_header(iter->array_buffer, m,
4416 						       trace_flags);
4417 		}
4418 	}
4419 }
4420 
4421 static void test_ftrace_alive(struct seq_file *m)
4422 {
4423 	if (!ftrace_is_dead())
4424 		return;
4425 	seq_puts(m, "# WARNING: FUNCTION TRACING IS CORRUPTED\n"
4426 		    "#          MAY BE MISSING FUNCTION EVENTS\n");
4427 }
4428 
4429 #ifdef CONFIG_TRACER_MAX_TRACE
4430 static void show_snapshot_main_help(struct seq_file *m)
4431 {
4432 	seq_puts(m, "# echo 0 > snapshot : Clears and frees snapshot buffer\n"
4433 		    "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
4434 		    "#                      Takes a snapshot of the main buffer.\n"
4435 		    "# echo 2 > snapshot : Clears snapshot buffer (but does not allocate or free)\n"
4436 		    "#                      (Doesn't have to be '2' works with any number that\n"
4437 		    "#                       is not a '0' or '1')\n");
4438 }
4439 
4440 static void show_snapshot_percpu_help(struct seq_file *m)
4441 {
4442 	seq_puts(m, "# echo 0 > snapshot : Invalid for per_cpu snapshot file.\n");
4443 #ifdef CONFIG_RING_BUFFER_ALLOW_SWAP
4444 	seq_puts(m, "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
4445 		    "#                      Takes a snapshot of the main buffer for this cpu.\n");
4446 #else
4447 	seq_puts(m, "# echo 1 > snapshot : Not supported with this kernel.\n"
4448 		    "#                     Must use main snapshot file to allocate.\n");
4449 #endif
4450 	seq_puts(m, "# echo 2 > snapshot : Clears this cpu's snapshot buffer (but does not allocate)\n"
4451 		    "#                      (Doesn't have to be '2' works with any number that\n"
4452 		    "#                       is not a '0' or '1')\n");
4453 }
4454 
4455 static void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter)
4456 {
4457 	if (iter->tr->allocated_snapshot)
4458 		seq_puts(m, "#\n# * Snapshot is allocated *\n#\n");
4459 	else
4460 		seq_puts(m, "#\n# * Snapshot is freed *\n#\n");
4461 
4462 	seq_puts(m, "# Snapshot commands:\n");
4463 	if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
4464 		show_snapshot_main_help(m);
4465 	else
4466 		show_snapshot_percpu_help(m);
4467 }
4468 #else
4469 /* Should never be called */
4470 static inline void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter) { }
4471 #endif
4472 
4473 static int s_show(struct seq_file *m, void *v)
4474 {
4475 	struct trace_iterator *iter = v;
4476 	int ret;
4477 
4478 	if (iter->ent == NULL) {
4479 		if (iter->tr) {
4480 			seq_printf(m, "# tracer: %s\n", iter->trace->name);
4481 			seq_puts(m, "#\n");
4482 			test_ftrace_alive(m);
4483 		}
4484 		if (iter->snapshot && trace_empty(iter))
4485 			print_snapshot_help(m, iter);
4486 		else if (iter->trace && iter->trace->print_header)
4487 			iter->trace->print_header(m);
4488 		else
4489 			trace_default_header(m);
4490 
4491 	} else if (iter->leftover) {
4492 		/*
4493 		 * If we filled the seq_file buffer earlier, we
4494 		 * want to just show it now.
4495 		 */
4496 		ret = trace_print_seq(m, &iter->seq);
4497 
4498 		/* ret should this time be zero, but you never know */
4499 		iter->leftover = ret;
4500 
4501 	} else {
4502 		ret = print_trace_line(iter);
4503 		if (ret == TRACE_TYPE_PARTIAL_LINE) {
4504 			iter->seq.full = 0;
4505 			trace_seq_puts(&iter->seq, "[LINE TOO BIG]\n");
4506 		}
4507 		ret = trace_print_seq(m, &iter->seq);
4508 		/*
4509 		 * If we overflow the seq_file buffer, then it will
4510 		 * ask us for this data again at start up.
4511 		 * Use that instead.
4512 		 *  ret is 0 if seq_file write succeeded.
4513 		 *        -1 otherwise.
4514 		 */
4515 		iter->leftover = ret;
4516 	}
4517 
4518 	return 0;
4519 }
4520 
4521 /*
4522  * Should be used after trace_array_get(), trace_types_lock
4523  * ensures that i_cdev was already initialized.
4524  */
4525 static inline int tracing_get_cpu(struct inode *inode)
4526 {
4527 	if (inode->i_cdev) /* See trace_create_cpu_file() */
4528 		return (long)inode->i_cdev - 1;
4529 	return RING_BUFFER_ALL_CPUS;
4530 }
4531 
4532 static const struct seq_operations tracer_seq_ops = {
4533 	.start		= s_start,
4534 	.next		= s_next,
4535 	.stop		= s_stop,
4536 	.show		= s_show,
4537 };
4538 
4539 /*
4540  * Note, as iter itself can be allocated and freed in different
4541  * ways, this function is only used to free its content, and not
4542  * the iterator itself. The only requirement to all the allocations
4543  * is that it must zero all fields (kzalloc), as freeing works with
4544  * ethier allocated content or NULL.
4545  */
4546 static void free_trace_iter_content(struct trace_iterator *iter)
4547 {
4548 	/* The fmt is either NULL, allocated or points to static_fmt_buf */
4549 	if (iter->fmt != static_fmt_buf)
4550 		kfree(iter->fmt);
4551 
4552 	kfree(iter->temp);
4553 	kfree(iter->buffer_iter);
4554 	mutex_destroy(&iter->mutex);
4555 	free_cpumask_var(iter->started);
4556 }
4557 
4558 static struct trace_iterator *
4559 __tracing_open(struct inode *inode, struct file *file, bool snapshot)
4560 {
4561 	struct trace_array *tr = inode->i_private;
4562 	struct trace_iterator *iter;
4563 	int cpu;
4564 
4565 	if (tracing_disabled)
4566 		return ERR_PTR(-ENODEV);
4567 
4568 	iter = __seq_open_private(file, &tracer_seq_ops, sizeof(*iter));
4569 	if (!iter)
4570 		return ERR_PTR(-ENOMEM);
4571 
4572 	iter->buffer_iter = kcalloc(nr_cpu_ids, sizeof(*iter->buffer_iter),
4573 				    GFP_KERNEL);
4574 	if (!iter->buffer_iter)
4575 		goto release;
4576 
4577 	/*
4578 	 * trace_find_next_entry() may need to save off iter->ent.
4579 	 * It will place it into the iter->temp buffer. As most
4580 	 * events are less than 128, allocate a buffer of that size.
4581 	 * If one is greater, then trace_find_next_entry() will
4582 	 * allocate a new buffer to adjust for the bigger iter->ent.
4583 	 * It's not critical if it fails to get allocated here.
4584 	 */
4585 	iter->temp = kmalloc(128, GFP_KERNEL);
4586 	if (iter->temp)
4587 		iter->temp_size = 128;
4588 
4589 	/*
4590 	 * trace_event_printf() may need to modify given format
4591 	 * string to replace %p with %px so that it shows real address
4592 	 * instead of hash value. However, that is only for the event
4593 	 * tracing, other tracer may not need. Defer the allocation
4594 	 * until it is needed.
4595 	 */
4596 	iter->fmt = NULL;
4597 	iter->fmt_size = 0;
4598 
4599 	mutex_lock(&trace_types_lock);
4600 	iter->trace = tr->current_trace;
4601 
4602 	if (!zalloc_cpumask_var(&iter->started, GFP_KERNEL))
4603 		goto fail;
4604 
4605 	iter->tr = tr;
4606 
4607 #ifdef CONFIG_TRACER_MAX_TRACE
4608 	/* Currently only the top directory has a snapshot */
4609 	if (tr->current_trace->print_max || snapshot)
4610 		iter->array_buffer = &tr->max_buffer;
4611 	else
4612 #endif
4613 		iter->array_buffer = &tr->array_buffer;
4614 	iter->snapshot = snapshot;
4615 	iter->pos = -1;
4616 	iter->cpu_file = tracing_get_cpu(inode);
4617 	mutex_init(&iter->mutex);
4618 
4619 	/* Notify the tracer early; before we stop tracing. */
4620 	if (iter->trace->open)
4621 		iter->trace->open(iter);
4622 
4623 	/* Annotate start of buffers if we had overruns */
4624 	if (ring_buffer_overruns(iter->array_buffer->buffer))
4625 		iter->iter_flags |= TRACE_FILE_ANNOTATE;
4626 
4627 	/* Output in nanoseconds only if we are using a clock in nanoseconds. */
4628 	if (trace_clocks[tr->clock_id].in_ns)
4629 		iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
4630 
4631 	/*
4632 	 * If pause-on-trace is enabled, then stop the trace while
4633 	 * dumping, unless this is the "snapshot" file
4634 	 */
4635 	if (!iter->snapshot && (tr->trace_flags & TRACE_ITER_PAUSE_ON_TRACE))
4636 		tracing_stop_tr(tr);
4637 
4638 	if (iter->cpu_file == RING_BUFFER_ALL_CPUS) {
4639 		for_each_tracing_cpu(cpu) {
4640 			iter->buffer_iter[cpu] =
4641 				ring_buffer_read_prepare(iter->array_buffer->buffer,
4642 							 cpu, GFP_KERNEL);
4643 		}
4644 		ring_buffer_read_prepare_sync();
4645 		for_each_tracing_cpu(cpu) {
4646 			ring_buffer_read_start(iter->buffer_iter[cpu]);
4647 			tracing_iter_reset(iter, cpu);
4648 		}
4649 	} else {
4650 		cpu = iter->cpu_file;
4651 		iter->buffer_iter[cpu] =
4652 			ring_buffer_read_prepare(iter->array_buffer->buffer,
4653 						 cpu, GFP_KERNEL);
4654 		ring_buffer_read_prepare_sync();
4655 		ring_buffer_read_start(iter->buffer_iter[cpu]);
4656 		tracing_iter_reset(iter, cpu);
4657 	}
4658 
4659 	mutex_unlock(&trace_types_lock);
4660 
4661 	return iter;
4662 
4663  fail:
4664 	mutex_unlock(&trace_types_lock);
4665 	free_trace_iter_content(iter);
4666 release:
4667 	seq_release_private(inode, file);
4668 	return ERR_PTR(-ENOMEM);
4669 }
4670 
4671 int tracing_open_generic(struct inode *inode, struct file *filp)
4672 {
4673 	int ret;
4674 
4675 	ret = tracing_check_open_get_tr(NULL);
4676 	if (ret)
4677 		return ret;
4678 
4679 	filp->private_data = inode->i_private;
4680 	return 0;
4681 }
4682 
4683 bool tracing_is_disabled(void)
4684 {
4685 	return (tracing_disabled) ? true: false;
4686 }
4687 
4688 /*
4689  * Open and update trace_array ref count.
4690  * Must have the current trace_array passed to it.
4691  */
4692 int tracing_open_generic_tr(struct inode *inode, struct file *filp)
4693 {
4694 	struct trace_array *tr = inode->i_private;
4695 	int ret;
4696 
4697 	ret = tracing_check_open_get_tr(tr);
4698 	if (ret)
4699 		return ret;
4700 
4701 	filp->private_data = inode->i_private;
4702 
4703 	return 0;
4704 }
4705 
4706 /*
4707  * The private pointer of the inode is the trace_event_file.
4708  * Update the tr ref count associated to it.
4709  */
4710 int tracing_open_file_tr(struct inode *inode, struct file *filp)
4711 {
4712 	struct trace_event_file *file = inode->i_private;
4713 	int ret;
4714 
4715 	ret = tracing_check_open_get_tr(file->tr);
4716 	if (ret)
4717 		return ret;
4718 
4719 	mutex_lock(&event_mutex);
4720 
4721 	/* Fail if the file is marked for removal */
4722 	if (file->flags & EVENT_FILE_FL_FREED) {
4723 		trace_array_put(file->tr);
4724 		ret = -ENODEV;
4725 	} else {
4726 		event_file_get(file);
4727 	}
4728 
4729 	mutex_unlock(&event_mutex);
4730 	if (ret)
4731 		return ret;
4732 
4733 	filp->private_data = inode->i_private;
4734 
4735 	return 0;
4736 }
4737 
4738 int tracing_release_file_tr(struct inode *inode, struct file *filp)
4739 {
4740 	struct trace_event_file *file = inode->i_private;
4741 
4742 	trace_array_put(file->tr);
4743 	event_file_put(file);
4744 
4745 	return 0;
4746 }
4747 
4748 int tracing_single_release_file_tr(struct inode *inode, struct file *filp)
4749 {
4750 	tracing_release_file_tr(inode, filp);
4751 	return single_release(inode, filp);
4752 }
4753 
4754 static int tracing_mark_open(struct inode *inode, struct file *filp)
4755 {
4756 	stream_open(inode, filp);
4757 	return tracing_open_generic_tr(inode, filp);
4758 }
4759 
4760 static int tracing_release(struct inode *inode, struct file *file)
4761 {
4762 	struct trace_array *tr = inode->i_private;
4763 	struct seq_file *m = file->private_data;
4764 	struct trace_iterator *iter;
4765 	int cpu;
4766 
4767 	if (!(file->f_mode & FMODE_READ)) {
4768 		trace_array_put(tr);
4769 		return 0;
4770 	}
4771 
4772 	/* Writes do not use seq_file */
4773 	iter = m->private;
4774 	mutex_lock(&trace_types_lock);
4775 
4776 	for_each_tracing_cpu(cpu) {
4777 		if (iter->buffer_iter[cpu])
4778 			ring_buffer_read_finish(iter->buffer_iter[cpu]);
4779 	}
4780 
4781 	if (iter->trace && iter->trace->close)
4782 		iter->trace->close(iter);
4783 
4784 	if (!iter->snapshot && tr->stop_count)
4785 		/* reenable tracing if it was previously enabled */
4786 		tracing_start_tr(tr);
4787 
4788 	__trace_array_put(tr);
4789 
4790 	mutex_unlock(&trace_types_lock);
4791 
4792 	free_trace_iter_content(iter);
4793 	seq_release_private(inode, file);
4794 
4795 	return 0;
4796 }
4797 
4798 int tracing_release_generic_tr(struct inode *inode, struct file *file)
4799 {
4800 	struct trace_array *tr = inode->i_private;
4801 
4802 	trace_array_put(tr);
4803 	return 0;
4804 }
4805 
4806 static int tracing_single_release_tr(struct inode *inode, struct file *file)
4807 {
4808 	struct trace_array *tr = inode->i_private;
4809 
4810 	trace_array_put(tr);
4811 
4812 	return single_release(inode, file);
4813 }
4814 
4815 static int tracing_open(struct inode *inode, struct file *file)
4816 {
4817 	struct trace_array *tr = inode->i_private;
4818 	struct trace_iterator *iter;
4819 	int ret;
4820 
4821 	ret = tracing_check_open_get_tr(tr);
4822 	if (ret)
4823 		return ret;
4824 
4825 	/* If this file was open for write, then erase contents */
4826 	if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC)) {
4827 		int cpu = tracing_get_cpu(inode);
4828 		struct array_buffer *trace_buf = &tr->array_buffer;
4829 
4830 #ifdef CONFIG_TRACER_MAX_TRACE
4831 		if (tr->current_trace->print_max)
4832 			trace_buf = &tr->max_buffer;
4833 #endif
4834 
4835 		if (cpu == RING_BUFFER_ALL_CPUS)
4836 			tracing_reset_online_cpus(trace_buf);
4837 		else
4838 			tracing_reset_cpu(trace_buf, cpu);
4839 	}
4840 
4841 	if (file->f_mode & FMODE_READ) {
4842 		iter = __tracing_open(inode, file, false);
4843 		if (IS_ERR(iter))
4844 			ret = PTR_ERR(iter);
4845 		else if (tr->trace_flags & TRACE_ITER_LATENCY_FMT)
4846 			iter->iter_flags |= TRACE_FILE_LAT_FMT;
4847 	}
4848 
4849 	if (ret < 0)
4850 		trace_array_put(tr);
4851 
4852 	return ret;
4853 }
4854 
4855 /*
4856  * Some tracers are not suitable for instance buffers.
4857  * A tracer is always available for the global array (toplevel)
4858  * or if it explicitly states that it is.
4859  */
4860 static bool
4861 trace_ok_for_array(struct tracer *t, struct trace_array *tr)
4862 {
4863 #ifdef CONFIG_TRACER_SNAPSHOT
4864 	/* arrays with mapped buffer range do not have snapshots */
4865 	if (tr->range_addr_start && t->use_max_tr)
4866 		return false;
4867 #endif
4868 	return (tr->flags & TRACE_ARRAY_FL_GLOBAL) || t->allow_instances;
4869 }
4870 
4871 /* Find the next tracer that this trace array may use */
4872 static struct tracer *
4873 get_tracer_for_array(struct trace_array *tr, struct tracer *t)
4874 {
4875 	while (t && !trace_ok_for_array(t, tr))
4876 		t = t->next;
4877 
4878 	return t;
4879 }
4880 
4881 static void *
4882 t_next(struct seq_file *m, void *v, loff_t *pos)
4883 {
4884 	struct trace_array *tr = m->private;
4885 	struct tracer *t = v;
4886 
4887 	(*pos)++;
4888 
4889 	if (t)
4890 		t = get_tracer_for_array(tr, t->next);
4891 
4892 	return t;
4893 }
4894 
4895 static void *t_start(struct seq_file *m, loff_t *pos)
4896 {
4897 	struct trace_array *tr = m->private;
4898 	struct tracer *t;
4899 	loff_t l = 0;
4900 
4901 	mutex_lock(&trace_types_lock);
4902 
4903 	t = get_tracer_for_array(tr, trace_types);
4904 	for (; t && l < *pos; t = t_next(m, t, &l))
4905 			;
4906 
4907 	return t;
4908 }
4909 
4910 static void t_stop(struct seq_file *m, void *p)
4911 {
4912 	mutex_unlock(&trace_types_lock);
4913 }
4914 
4915 static int t_show(struct seq_file *m, void *v)
4916 {
4917 	struct tracer *t = v;
4918 
4919 	if (!t)
4920 		return 0;
4921 
4922 	seq_puts(m, t->name);
4923 	if (t->next)
4924 		seq_putc(m, ' ');
4925 	else
4926 		seq_putc(m, '\n');
4927 
4928 	return 0;
4929 }
4930 
4931 static const struct seq_operations show_traces_seq_ops = {
4932 	.start		= t_start,
4933 	.next		= t_next,
4934 	.stop		= t_stop,
4935 	.show		= t_show,
4936 };
4937 
4938 static int show_traces_open(struct inode *inode, struct file *file)
4939 {
4940 	struct trace_array *tr = inode->i_private;
4941 	struct seq_file *m;
4942 	int ret;
4943 
4944 	ret = tracing_check_open_get_tr(tr);
4945 	if (ret)
4946 		return ret;
4947 
4948 	ret = seq_open(file, &show_traces_seq_ops);
4949 	if (ret) {
4950 		trace_array_put(tr);
4951 		return ret;
4952 	}
4953 
4954 	m = file->private_data;
4955 	m->private = tr;
4956 
4957 	return 0;
4958 }
4959 
4960 static int tracing_seq_release(struct inode *inode, struct file *file)
4961 {
4962 	struct trace_array *tr = inode->i_private;
4963 
4964 	trace_array_put(tr);
4965 	return seq_release(inode, file);
4966 }
4967 
4968 static ssize_t
4969 tracing_write_stub(struct file *filp, const char __user *ubuf,
4970 		   size_t count, loff_t *ppos)
4971 {
4972 	return count;
4973 }
4974 
4975 loff_t tracing_lseek(struct file *file, loff_t offset, int whence)
4976 {
4977 	int ret;
4978 
4979 	if (file->f_mode & FMODE_READ)
4980 		ret = seq_lseek(file, offset, whence);
4981 	else
4982 		file->f_pos = ret = 0;
4983 
4984 	return ret;
4985 }
4986 
4987 static const struct file_operations tracing_fops = {
4988 	.open		= tracing_open,
4989 	.read		= seq_read,
4990 	.read_iter	= seq_read_iter,
4991 	.splice_read	= copy_splice_read,
4992 	.write		= tracing_write_stub,
4993 	.llseek		= tracing_lseek,
4994 	.release	= tracing_release,
4995 };
4996 
4997 static const struct file_operations show_traces_fops = {
4998 	.open		= show_traces_open,
4999 	.read		= seq_read,
5000 	.llseek		= seq_lseek,
5001 	.release	= tracing_seq_release,
5002 };
5003 
5004 static ssize_t
5005 tracing_cpumask_read(struct file *filp, char __user *ubuf,
5006 		     size_t count, loff_t *ppos)
5007 {
5008 	struct trace_array *tr = file_inode(filp)->i_private;
5009 	char *mask_str;
5010 	int len;
5011 
5012 	len = snprintf(NULL, 0, "%*pb\n",
5013 		       cpumask_pr_args(tr->tracing_cpumask)) + 1;
5014 	mask_str = kmalloc(len, GFP_KERNEL);
5015 	if (!mask_str)
5016 		return -ENOMEM;
5017 
5018 	len = snprintf(mask_str, len, "%*pb\n",
5019 		       cpumask_pr_args(tr->tracing_cpumask));
5020 	if (len >= count) {
5021 		count = -EINVAL;
5022 		goto out_err;
5023 	}
5024 	count = simple_read_from_buffer(ubuf, count, ppos, mask_str, len);
5025 
5026 out_err:
5027 	kfree(mask_str);
5028 
5029 	return count;
5030 }
5031 
5032 int tracing_set_cpumask(struct trace_array *tr,
5033 			cpumask_var_t tracing_cpumask_new)
5034 {
5035 	int cpu;
5036 
5037 	if (!tr)
5038 		return -EINVAL;
5039 
5040 	local_irq_disable();
5041 	arch_spin_lock(&tr->max_lock);
5042 	for_each_tracing_cpu(cpu) {
5043 		/*
5044 		 * Increase/decrease the disabled counter if we are
5045 		 * about to flip a bit in the cpumask:
5046 		 */
5047 		if (cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
5048 				!cpumask_test_cpu(cpu, tracing_cpumask_new)) {
5049 			atomic_inc(&per_cpu_ptr(tr->array_buffer.data, cpu)->disabled);
5050 			ring_buffer_record_disable_cpu(tr->array_buffer.buffer, cpu);
5051 #ifdef CONFIG_TRACER_MAX_TRACE
5052 			ring_buffer_record_disable_cpu(tr->max_buffer.buffer, cpu);
5053 #endif
5054 		}
5055 		if (!cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
5056 				cpumask_test_cpu(cpu, tracing_cpumask_new)) {
5057 			atomic_dec(&per_cpu_ptr(tr->array_buffer.data, cpu)->disabled);
5058 			ring_buffer_record_enable_cpu(tr->array_buffer.buffer, cpu);
5059 #ifdef CONFIG_TRACER_MAX_TRACE
5060 			ring_buffer_record_enable_cpu(tr->max_buffer.buffer, cpu);
5061 #endif
5062 		}
5063 	}
5064 	arch_spin_unlock(&tr->max_lock);
5065 	local_irq_enable();
5066 
5067 	cpumask_copy(tr->tracing_cpumask, tracing_cpumask_new);
5068 
5069 	return 0;
5070 }
5071 
5072 static ssize_t
5073 tracing_cpumask_write(struct file *filp, const char __user *ubuf,
5074 		      size_t count, loff_t *ppos)
5075 {
5076 	struct trace_array *tr = file_inode(filp)->i_private;
5077 	cpumask_var_t tracing_cpumask_new;
5078 	int err;
5079 
5080 	if (count == 0 || count > KMALLOC_MAX_SIZE)
5081 		return -EINVAL;
5082 
5083 	if (!zalloc_cpumask_var(&tracing_cpumask_new, GFP_KERNEL))
5084 		return -ENOMEM;
5085 
5086 	err = cpumask_parse_user(ubuf, count, tracing_cpumask_new);
5087 	if (err)
5088 		goto err_free;
5089 
5090 	err = tracing_set_cpumask(tr, tracing_cpumask_new);
5091 	if (err)
5092 		goto err_free;
5093 
5094 	free_cpumask_var(tracing_cpumask_new);
5095 
5096 	return count;
5097 
5098 err_free:
5099 	free_cpumask_var(tracing_cpumask_new);
5100 
5101 	return err;
5102 }
5103 
5104 static const struct file_operations tracing_cpumask_fops = {
5105 	.open		= tracing_open_generic_tr,
5106 	.read		= tracing_cpumask_read,
5107 	.write		= tracing_cpumask_write,
5108 	.release	= tracing_release_generic_tr,
5109 	.llseek		= generic_file_llseek,
5110 };
5111 
5112 static int tracing_trace_options_show(struct seq_file *m, void *v)
5113 {
5114 	struct tracer_opt *trace_opts;
5115 	struct trace_array *tr = m->private;
5116 	u32 tracer_flags;
5117 	int i;
5118 
5119 	guard(mutex)(&trace_types_lock);
5120 
5121 	tracer_flags = tr->current_trace->flags->val;
5122 	trace_opts = tr->current_trace->flags->opts;
5123 
5124 	for (i = 0; trace_options[i]; i++) {
5125 		if (tr->trace_flags & (1 << i))
5126 			seq_printf(m, "%s\n", trace_options[i]);
5127 		else
5128 			seq_printf(m, "no%s\n", trace_options[i]);
5129 	}
5130 
5131 	for (i = 0; trace_opts[i].name; i++) {
5132 		if (tracer_flags & trace_opts[i].bit)
5133 			seq_printf(m, "%s\n", trace_opts[i].name);
5134 		else
5135 			seq_printf(m, "no%s\n", trace_opts[i].name);
5136 	}
5137 
5138 	return 0;
5139 }
5140 
5141 static int __set_tracer_option(struct trace_array *tr,
5142 			       struct tracer_flags *tracer_flags,
5143 			       struct tracer_opt *opts, int neg)
5144 {
5145 	struct tracer *trace = tracer_flags->trace;
5146 	int ret;
5147 
5148 	ret = trace->set_flag(tr, tracer_flags->val, opts->bit, !neg);
5149 	if (ret)
5150 		return ret;
5151 
5152 	if (neg)
5153 		tracer_flags->val &= ~opts->bit;
5154 	else
5155 		tracer_flags->val |= opts->bit;
5156 	return 0;
5157 }
5158 
5159 /* Try to assign a tracer specific option */
5160 static int set_tracer_option(struct trace_array *tr, char *cmp, int neg)
5161 {
5162 	struct tracer *trace = tr->current_trace;
5163 	struct tracer_flags *tracer_flags = trace->flags;
5164 	struct tracer_opt *opts = NULL;
5165 	int i;
5166 
5167 	for (i = 0; tracer_flags->opts[i].name; i++) {
5168 		opts = &tracer_flags->opts[i];
5169 
5170 		if (strcmp(cmp, opts->name) == 0)
5171 			return __set_tracer_option(tr, trace->flags, opts, neg);
5172 	}
5173 
5174 	return -EINVAL;
5175 }
5176 
5177 /* Some tracers require overwrite to stay enabled */
5178 int trace_keep_overwrite(struct tracer *tracer, u32 mask, int set)
5179 {
5180 	if (tracer->enabled && (mask & TRACE_ITER_OVERWRITE) && !set)
5181 		return -1;
5182 
5183 	return 0;
5184 }
5185 
5186 int set_tracer_flag(struct trace_array *tr, unsigned int mask, int enabled)
5187 {
5188 	if ((mask == TRACE_ITER_RECORD_TGID) ||
5189 	    (mask == TRACE_ITER_RECORD_CMD) ||
5190 	    (mask == TRACE_ITER_TRACE_PRINTK))
5191 		lockdep_assert_held(&event_mutex);
5192 
5193 	/* do nothing if flag is already set */
5194 	if (!!(tr->trace_flags & mask) == !!enabled)
5195 		return 0;
5196 
5197 	/* Give the tracer a chance to approve the change */
5198 	if (tr->current_trace->flag_changed)
5199 		if (tr->current_trace->flag_changed(tr, mask, !!enabled))
5200 			return -EINVAL;
5201 
5202 	if (mask == TRACE_ITER_TRACE_PRINTK) {
5203 		if (enabled) {
5204 			update_printk_trace(tr);
5205 		} else {
5206 			/*
5207 			 * The global_trace cannot clear this.
5208 			 * It's flag only gets cleared if another instance sets it.
5209 			 */
5210 			if (printk_trace == &global_trace)
5211 				return -EINVAL;
5212 			/*
5213 			 * An instance must always have it set.
5214 			 * by default, that's the global_trace instane.
5215 			 */
5216 			if (printk_trace == tr)
5217 				update_printk_trace(&global_trace);
5218 		}
5219 	}
5220 
5221 	if (enabled)
5222 		tr->trace_flags |= mask;
5223 	else
5224 		tr->trace_flags &= ~mask;
5225 
5226 	if (mask == TRACE_ITER_RECORD_CMD)
5227 		trace_event_enable_cmd_record(enabled);
5228 
5229 	if (mask == TRACE_ITER_RECORD_TGID) {
5230 
5231 		if (trace_alloc_tgid_map() < 0) {
5232 			tr->trace_flags &= ~TRACE_ITER_RECORD_TGID;
5233 			return -ENOMEM;
5234 		}
5235 
5236 		trace_event_enable_tgid_record(enabled);
5237 	}
5238 
5239 	if (mask == TRACE_ITER_EVENT_FORK)
5240 		trace_event_follow_fork(tr, enabled);
5241 
5242 	if (mask == TRACE_ITER_FUNC_FORK)
5243 		ftrace_pid_follow_fork(tr, enabled);
5244 
5245 	if (mask == TRACE_ITER_OVERWRITE) {
5246 		ring_buffer_change_overwrite(tr->array_buffer.buffer, enabled);
5247 #ifdef CONFIG_TRACER_MAX_TRACE
5248 		ring_buffer_change_overwrite(tr->max_buffer.buffer, enabled);
5249 #endif
5250 	}
5251 
5252 	if (mask == TRACE_ITER_PRINTK) {
5253 		trace_printk_start_stop_comm(enabled);
5254 		trace_printk_control(enabled);
5255 	}
5256 
5257 	return 0;
5258 }
5259 
5260 int trace_set_options(struct trace_array *tr, char *option)
5261 {
5262 	char *cmp;
5263 	int neg = 0;
5264 	int ret;
5265 	size_t orig_len = strlen(option);
5266 	int len;
5267 
5268 	cmp = strstrip(option);
5269 
5270 	len = str_has_prefix(cmp, "no");
5271 	if (len)
5272 		neg = 1;
5273 
5274 	cmp += len;
5275 
5276 	mutex_lock(&event_mutex);
5277 	mutex_lock(&trace_types_lock);
5278 
5279 	ret = match_string(trace_options, -1, cmp);
5280 	/* If no option could be set, test the specific tracer options */
5281 	if (ret < 0)
5282 		ret = set_tracer_option(tr, cmp, neg);
5283 	else
5284 		ret = set_tracer_flag(tr, 1 << ret, !neg);
5285 
5286 	mutex_unlock(&trace_types_lock);
5287 	mutex_unlock(&event_mutex);
5288 
5289 	/*
5290 	 * If the first trailing whitespace is replaced with '\0' by strstrip,
5291 	 * turn it back into a space.
5292 	 */
5293 	if (orig_len > strlen(option))
5294 		option[strlen(option)] = ' ';
5295 
5296 	return ret;
5297 }
5298 
5299 static void __init apply_trace_boot_options(void)
5300 {
5301 	char *buf = trace_boot_options_buf;
5302 	char *option;
5303 
5304 	while (true) {
5305 		option = strsep(&buf, ",");
5306 
5307 		if (!option)
5308 			break;
5309 
5310 		if (*option)
5311 			trace_set_options(&global_trace, option);
5312 
5313 		/* Put back the comma to allow this to be called again */
5314 		if (buf)
5315 			*(buf - 1) = ',';
5316 	}
5317 }
5318 
5319 static ssize_t
5320 tracing_trace_options_write(struct file *filp, const char __user *ubuf,
5321 			size_t cnt, loff_t *ppos)
5322 {
5323 	struct seq_file *m = filp->private_data;
5324 	struct trace_array *tr = m->private;
5325 	char buf[64];
5326 	int ret;
5327 
5328 	if (cnt >= sizeof(buf))
5329 		return -EINVAL;
5330 
5331 	if (copy_from_user(buf, ubuf, cnt))
5332 		return -EFAULT;
5333 
5334 	buf[cnt] = 0;
5335 
5336 	ret = trace_set_options(tr, buf);
5337 	if (ret < 0)
5338 		return ret;
5339 
5340 	*ppos += cnt;
5341 
5342 	return cnt;
5343 }
5344 
5345 static int tracing_trace_options_open(struct inode *inode, struct file *file)
5346 {
5347 	struct trace_array *tr = inode->i_private;
5348 	int ret;
5349 
5350 	ret = tracing_check_open_get_tr(tr);
5351 	if (ret)
5352 		return ret;
5353 
5354 	ret = single_open(file, tracing_trace_options_show, inode->i_private);
5355 	if (ret < 0)
5356 		trace_array_put(tr);
5357 
5358 	return ret;
5359 }
5360 
5361 static const struct file_operations tracing_iter_fops = {
5362 	.open		= tracing_trace_options_open,
5363 	.read		= seq_read,
5364 	.llseek		= seq_lseek,
5365 	.release	= tracing_single_release_tr,
5366 	.write		= tracing_trace_options_write,
5367 };
5368 
5369 static const char readme_msg[] =
5370 	"tracing mini-HOWTO:\n\n"
5371 	"By default tracefs removes all OTH file permission bits.\n"
5372 	"When mounting tracefs an optional group id can be specified\n"
5373 	"which adds the group to every directory and file in tracefs:\n\n"
5374 	"\t e.g. mount -t tracefs [-o [gid=<gid>]] nodev /sys/kernel/tracing\n\n"
5375 	"# echo 0 > tracing_on : quick way to disable tracing\n"
5376 	"# echo 1 > tracing_on : quick way to re-enable tracing\n\n"
5377 	" Important files:\n"
5378 	"  trace\t\t\t- The static contents of the buffer\n"
5379 	"\t\t\t  To clear the buffer write into this file: echo > trace\n"
5380 	"  trace_pipe\t\t- A consuming read to see the contents of the buffer\n"
5381 	"  current_tracer\t- function and latency tracers\n"
5382 	"  available_tracers\t- list of configured tracers for current_tracer\n"
5383 	"  error_log\t- error log for failed commands (that support it)\n"
5384 	"  buffer_size_kb\t- view and modify size of per cpu buffer\n"
5385 	"  buffer_total_size_kb  - view total size of all cpu buffers\n\n"
5386 	"  trace_clock\t\t- change the clock used to order events\n"
5387 	"       local:   Per cpu clock but may not be synced across CPUs\n"
5388 	"      global:   Synced across CPUs but slows tracing down.\n"
5389 	"     counter:   Not a clock, but just an increment\n"
5390 	"      uptime:   Jiffy counter from time of boot\n"
5391 	"        perf:   Same clock that perf events use\n"
5392 #ifdef CONFIG_X86_64
5393 	"     x86-tsc:   TSC cycle counter\n"
5394 #endif
5395 	"\n  timestamp_mode\t- view the mode used to timestamp events\n"
5396 	"       delta:   Delta difference against a buffer-wide timestamp\n"
5397 	"    absolute:   Absolute (standalone) timestamp\n"
5398 	"\n  trace_marker\t\t- Writes into this file writes into the kernel buffer\n"
5399 	"\n  trace_marker_raw\t\t- Writes into this file writes binary data into the kernel buffer\n"
5400 	"  tracing_cpumask\t- Limit which CPUs to trace\n"
5401 	"  instances\t\t- Make sub-buffers with: mkdir instances/foo\n"
5402 	"\t\t\t  Remove sub-buffer with rmdir\n"
5403 	"  trace_options\t\t- Set format or modify how tracing happens\n"
5404 	"\t\t\t  Disable an option by prefixing 'no' to the\n"
5405 	"\t\t\t  option name\n"
5406 	"  saved_cmdlines_size\t- echo command number in here to store comm-pid list\n"
5407 #ifdef CONFIG_DYNAMIC_FTRACE
5408 	"\n  available_filter_functions - list of functions that can be filtered on\n"
5409 	"  set_ftrace_filter\t- echo function name in here to only trace these\n"
5410 	"\t\t\t  functions\n"
5411 	"\t     accepts: func_full_name or glob-matching-pattern\n"
5412 	"\t     modules: Can select a group via module\n"
5413 	"\t      Format: :mod:<module-name>\n"
5414 	"\t     example: echo :mod:ext3 > set_ftrace_filter\n"
5415 	"\t    triggers: a command to perform when function is hit\n"
5416 	"\t      Format: <function>:<trigger>[:count]\n"
5417 	"\t     trigger: traceon, traceoff\n"
5418 	"\t\t      enable_event:<system>:<event>\n"
5419 	"\t\t      disable_event:<system>:<event>\n"
5420 #ifdef CONFIG_STACKTRACE
5421 	"\t\t      stacktrace\n"
5422 #endif
5423 #ifdef CONFIG_TRACER_SNAPSHOT
5424 	"\t\t      snapshot\n"
5425 #endif
5426 	"\t\t      dump\n"
5427 	"\t\t      cpudump\n"
5428 	"\t     example: echo do_fault:traceoff > set_ftrace_filter\n"
5429 	"\t              echo do_trap:traceoff:3 > set_ftrace_filter\n"
5430 	"\t     The first one will disable tracing every time do_fault is hit\n"
5431 	"\t     The second will disable tracing at most 3 times when do_trap is hit\n"
5432 	"\t       The first time do trap is hit and it disables tracing, the\n"
5433 	"\t       counter will decrement to 2. If tracing is already disabled,\n"
5434 	"\t       the counter will not decrement. It only decrements when the\n"
5435 	"\t       trigger did work\n"
5436 	"\t     To remove trigger without count:\n"
5437 	"\t       echo '!<function>:<trigger> > set_ftrace_filter\n"
5438 	"\t     To remove trigger with a count:\n"
5439 	"\t       echo '!<function>:<trigger>:0 > set_ftrace_filter\n"
5440 	"  set_ftrace_notrace\t- echo function name in here to never trace.\n"
5441 	"\t    accepts: func_full_name, *func_end, func_begin*, *func_middle*\n"
5442 	"\t    modules: Can select a group via module command :mod:\n"
5443 	"\t    Does not accept triggers\n"
5444 #endif /* CONFIG_DYNAMIC_FTRACE */
5445 #ifdef CONFIG_FUNCTION_TRACER
5446 	"  set_ftrace_pid\t- Write pid(s) to only function trace those pids\n"
5447 	"\t\t    (function)\n"
5448 	"  set_ftrace_notrace_pid\t- Write pid(s) to not function trace those pids\n"
5449 	"\t\t    (function)\n"
5450 #endif
5451 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
5452 	"  set_graph_function\t- Trace the nested calls of a function (function_graph)\n"
5453 	"  set_graph_notrace\t- Do not trace the nested calls of a function (function_graph)\n"
5454 	"  max_graph_depth\t- Trace a limited depth of nested calls (0 is unlimited)\n"
5455 #endif
5456 #ifdef CONFIG_TRACER_SNAPSHOT
5457 	"\n  snapshot\t\t- Like 'trace' but shows the content of the static\n"
5458 	"\t\t\t  snapshot buffer. Read the contents for more\n"
5459 	"\t\t\t  information\n"
5460 #endif
5461 #ifdef CONFIG_STACK_TRACER
5462 	"  stack_trace\t\t- Shows the max stack trace when active\n"
5463 	"  stack_max_size\t- Shows current max stack size that was traced\n"
5464 	"\t\t\t  Write into this file to reset the max size (trigger a\n"
5465 	"\t\t\t  new trace)\n"
5466 #ifdef CONFIG_DYNAMIC_FTRACE
5467 	"  stack_trace_filter\t- Like set_ftrace_filter but limits what stack_trace\n"
5468 	"\t\t\t  traces\n"
5469 #endif
5470 #endif /* CONFIG_STACK_TRACER */
5471 #ifdef CONFIG_DYNAMIC_EVENTS
5472 	"  dynamic_events\t\t- Create/append/remove/show the generic dynamic events\n"
5473 	"\t\t\t  Write into this file to define/undefine new trace events.\n"
5474 #endif
5475 #ifdef CONFIG_KPROBE_EVENTS
5476 	"  kprobe_events\t\t- Create/append/remove/show the kernel dynamic events\n"
5477 	"\t\t\t  Write into this file to define/undefine new trace events.\n"
5478 #endif
5479 #ifdef CONFIG_UPROBE_EVENTS
5480 	"  uprobe_events\t\t- Create/append/remove/show the userspace dynamic events\n"
5481 	"\t\t\t  Write into this file to define/undefine new trace events.\n"
5482 #endif
5483 #if defined(CONFIG_KPROBE_EVENTS) || defined(CONFIG_UPROBE_EVENTS) || \
5484     defined(CONFIG_FPROBE_EVENTS)
5485 	"\t  accepts: event-definitions (one definition per line)\n"
5486 #if defined(CONFIG_KPROBE_EVENTS) || defined(CONFIG_UPROBE_EVENTS)
5487 	"\t   Format: p[:[<group>/][<event>]] <place> [<args>]\n"
5488 	"\t           r[maxactive][:[<group>/][<event>]] <place> [<args>]\n"
5489 #endif
5490 #ifdef CONFIG_FPROBE_EVENTS
5491 	"\t           f[:[<group>/][<event>]] <func-name>[%return] [<args>]\n"
5492 	"\t           t[:[<group>/][<event>]] <tracepoint> [<args>]\n"
5493 #endif
5494 #ifdef CONFIG_HIST_TRIGGERS
5495 	"\t           s:[synthetic/]<event> <field> [<field>]\n"
5496 #endif
5497 	"\t           e[:[<group>/][<event>]] <attached-group>.<attached-event> [<args>] [if <filter>]\n"
5498 	"\t           -:[<group>/][<event>]\n"
5499 #ifdef CONFIG_KPROBE_EVENTS
5500 	"\t    place: [<module>:]<symbol>[+<offset>]|<memaddr>\n"
5501   "place (kretprobe): [<module>:]<symbol>[+<offset>]%return|<memaddr>\n"
5502 #endif
5503 #ifdef CONFIG_UPROBE_EVENTS
5504   "   place (uprobe): <path>:<offset>[%return][(ref_ctr_offset)]\n"
5505 #endif
5506 	"\t     args: <name>=fetcharg[:type]\n"
5507 	"\t fetcharg: (%<register>|$<efield>), @<address>, @<symbol>[+|-<offset>],\n"
5508 #ifdef CONFIG_HAVE_FUNCTION_ARG_ACCESS_API
5509 	"\t           $stack<index>, $stack, $retval, $comm, $arg<N>,\n"
5510 #ifdef CONFIG_PROBE_EVENTS_BTF_ARGS
5511 	"\t           <argname>[->field[->field|.field...]],\n"
5512 #endif
5513 #else
5514 	"\t           $stack<index>, $stack, $retval, $comm,\n"
5515 #endif
5516 	"\t           +|-[u]<offset>(<fetcharg>), \\imm-value, \\\"imm-string\"\n"
5517 	"\t     kernel return probes support: $retval, $arg<N>, $comm\n"
5518 	"\t     type: s8/16/32/64, u8/16/32/64, x8/16/32/64, char, string, symbol,\n"
5519 	"\t           b<bit-width>@<bit-offset>/<container-size>, ustring,\n"
5520 	"\t           symstr, %pd/%pD, <type>\\[<array-size>\\]\n"
5521 #ifdef CONFIG_HIST_TRIGGERS
5522 	"\t    field: <stype> <name>;\n"
5523 	"\t    stype: u8/u16/u32/u64, s8/s16/s32/s64, pid_t,\n"
5524 	"\t           [unsigned] char/int/long\n"
5525 #endif
5526 	"\t    efield: For event probes ('e' types), the field is on of the fields\n"
5527 	"\t            of the <attached-group>/<attached-event>.\n"
5528 #endif
5529 	"  set_event\t\t- Enables events by name written into it\n"
5530 	"\t\t\t  Can enable module events via: :mod:<module>\n"
5531 	"  events/\t\t- Directory containing all trace event subsystems:\n"
5532 	"      enable\t\t- Write 0/1 to enable/disable tracing of all events\n"
5533 	"  events/<system>/\t- Directory containing all trace events for <system>:\n"
5534 	"      enable\t\t- Write 0/1 to enable/disable tracing of all <system>\n"
5535 	"\t\t\t  events\n"
5536 	"      filter\t\t- If set, only events passing filter are traced\n"
5537 	"  events/<system>/<event>/\t- Directory containing control files for\n"
5538 	"\t\t\t  <event>:\n"
5539 	"      enable\t\t- Write 0/1 to enable/disable tracing of <event>\n"
5540 	"      filter\t\t- If set, only events passing filter are traced\n"
5541 	"      trigger\t\t- If set, a command to perform when event is hit\n"
5542 	"\t    Format: <trigger>[:count][if <filter>]\n"
5543 	"\t   trigger: traceon, traceoff\n"
5544 	"\t            enable_event:<system>:<event>\n"
5545 	"\t            disable_event:<system>:<event>\n"
5546 #ifdef CONFIG_HIST_TRIGGERS
5547 	"\t            enable_hist:<system>:<event>\n"
5548 	"\t            disable_hist:<system>:<event>\n"
5549 #endif
5550 #ifdef CONFIG_STACKTRACE
5551 	"\t\t    stacktrace\n"
5552 #endif
5553 #ifdef CONFIG_TRACER_SNAPSHOT
5554 	"\t\t    snapshot\n"
5555 #endif
5556 #ifdef CONFIG_HIST_TRIGGERS
5557 	"\t\t    hist (see below)\n"
5558 #endif
5559 	"\t   example: echo traceoff > events/block/block_unplug/trigger\n"
5560 	"\t            echo traceoff:3 > events/block/block_unplug/trigger\n"
5561 	"\t            echo 'enable_event:kmem:kmalloc:3 if nr_rq > 1' > \\\n"
5562 	"\t                  events/block/block_unplug/trigger\n"
5563 	"\t   The first disables tracing every time block_unplug is hit.\n"
5564 	"\t   The second disables tracing the first 3 times block_unplug is hit.\n"
5565 	"\t   The third enables the kmalloc event the first 3 times block_unplug\n"
5566 	"\t     is hit and has value of greater than 1 for the 'nr_rq' event field.\n"
5567 	"\t   Like function triggers, the counter is only decremented if it\n"
5568 	"\t    enabled or disabled tracing.\n"
5569 	"\t   To remove a trigger without a count:\n"
5570 	"\t     echo '!<trigger> > <system>/<event>/trigger\n"
5571 	"\t   To remove a trigger with a count:\n"
5572 	"\t     echo '!<trigger>:0 > <system>/<event>/trigger\n"
5573 	"\t   Filters can be ignored when removing a trigger.\n"
5574 #ifdef CONFIG_HIST_TRIGGERS
5575 	"      hist trigger\t- If set, event hits are aggregated into a hash table\n"
5576 	"\t    Format: hist:keys=<field1[,field2,...]>\n"
5577 	"\t            [:<var1>=<field|var_ref|numeric_literal>[,<var2>=...]]\n"
5578 	"\t            [:values=<field1[,field2,...]>]\n"
5579 	"\t            [:sort=<field1[,field2,...]>]\n"
5580 	"\t            [:size=#entries]\n"
5581 	"\t            [:pause][:continue][:clear]\n"
5582 	"\t            [:name=histname1]\n"
5583 	"\t            [:nohitcount]\n"
5584 	"\t            [:<handler>.<action>]\n"
5585 	"\t            [if <filter>]\n\n"
5586 	"\t    Note, special fields can be used as well:\n"
5587 	"\t            common_timestamp - to record current timestamp\n"
5588 	"\t            common_cpu - to record the CPU the event happened on\n"
5589 	"\n"
5590 	"\t    A hist trigger variable can be:\n"
5591 	"\t        - a reference to a field e.g. x=current_timestamp,\n"
5592 	"\t        - a reference to another variable e.g. y=$x,\n"
5593 	"\t        - a numeric literal: e.g. ms_per_sec=1000,\n"
5594 	"\t        - an arithmetic expression: e.g. time_secs=current_timestamp/1000\n"
5595 	"\n"
5596 	"\t    hist trigger arithmetic expressions support addition(+), subtraction(-),\n"
5597 	"\t    multiplication(*) and division(/) operators. An operand can be either a\n"
5598 	"\t    variable reference, field or numeric literal.\n"
5599 	"\n"
5600 	"\t    When a matching event is hit, an entry is added to a hash\n"
5601 	"\t    table using the key(s) and value(s) named, and the value of a\n"
5602 	"\t    sum called 'hitcount' is incremented.  Keys and values\n"
5603 	"\t    correspond to fields in the event's format description.  Keys\n"
5604 	"\t    can be any field, or the special string 'common_stacktrace'.\n"
5605 	"\t    Compound keys consisting of up to two fields can be specified\n"
5606 	"\t    by the 'keys' keyword.  Values must correspond to numeric\n"
5607 	"\t    fields.  Sort keys consisting of up to two fields can be\n"
5608 	"\t    specified using the 'sort' keyword.  The sort direction can\n"
5609 	"\t    be modified by appending '.descending' or '.ascending' to a\n"
5610 	"\t    sort field.  The 'size' parameter can be used to specify more\n"
5611 	"\t    or fewer than the default 2048 entries for the hashtable size.\n"
5612 	"\t    If a hist trigger is given a name using the 'name' parameter,\n"
5613 	"\t    its histogram data will be shared with other triggers of the\n"
5614 	"\t    same name, and trigger hits will update this common data.\n\n"
5615 	"\t    Reading the 'hist' file for the event will dump the hash\n"
5616 	"\t    table in its entirety to stdout.  If there are multiple hist\n"
5617 	"\t    triggers attached to an event, there will be a table for each\n"
5618 	"\t    trigger in the output.  The table displayed for a named\n"
5619 	"\t    trigger will be the same as any other instance having the\n"
5620 	"\t    same name.  The default format used to display a given field\n"
5621 	"\t    can be modified by appending any of the following modifiers\n"
5622 	"\t    to the field name, as applicable:\n\n"
5623 	"\t            .hex        display a number as a hex value\n"
5624 	"\t            .sym        display an address as a symbol\n"
5625 	"\t            .sym-offset display an address as a symbol and offset\n"
5626 	"\t            .execname   display a common_pid as a program name\n"
5627 	"\t            .syscall    display a syscall id as a syscall name\n"
5628 	"\t            .log2       display log2 value rather than raw number\n"
5629 	"\t            .buckets=size  display values in groups of size rather than raw number\n"
5630 	"\t            .usecs      display a common_timestamp in microseconds\n"
5631 	"\t            .percent    display a number of percentage value\n"
5632 	"\t            .graph      display a bar-graph of a value\n\n"
5633 	"\t    The 'pause' parameter can be used to pause an existing hist\n"
5634 	"\t    trigger or to start a hist trigger but not log any events\n"
5635 	"\t    until told to do so.  'continue' can be used to start or\n"
5636 	"\t    restart a paused hist trigger.\n\n"
5637 	"\t    The 'clear' parameter will clear the contents of a running\n"
5638 	"\t    hist trigger and leave its current paused/active state\n"
5639 	"\t    unchanged.\n\n"
5640 	"\t    The 'nohitcount' (or NOHC) parameter will suppress display of\n"
5641 	"\t    raw hitcount in the histogram.\n\n"
5642 	"\t    The enable_hist and disable_hist triggers can be used to\n"
5643 	"\t    have one event conditionally start and stop another event's\n"
5644 	"\t    already-attached hist trigger.  The syntax is analogous to\n"
5645 	"\t    the enable_event and disable_event triggers.\n\n"
5646 	"\t    Hist trigger handlers and actions are executed whenever a\n"
5647 	"\t    a histogram entry is added or updated.  They take the form:\n\n"
5648 	"\t        <handler>.<action>\n\n"
5649 	"\t    The available handlers are:\n\n"
5650 	"\t        onmatch(matching.event)  - invoke on addition or update\n"
5651 	"\t        onmax(var)               - invoke if var exceeds current max\n"
5652 	"\t        onchange(var)            - invoke action if var changes\n\n"
5653 	"\t    The available actions are:\n\n"
5654 	"\t        trace(<synthetic_event>,param list)  - generate synthetic event\n"
5655 	"\t        save(field,...)                      - save current event fields\n"
5656 #ifdef CONFIG_TRACER_SNAPSHOT
5657 	"\t        snapshot()                           - snapshot the trace buffer\n\n"
5658 #endif
5659 #ifdef CONFIG_SYNTH_EVENTS
5660 	"  events/synthetic_events\t- Create/append/remove/show synthetic events\n"
5661 	"\t  Write into this file to define/undefine new synthetic events.\n"
5662 	"\t     example: echo 'myevent u64 lat; char name[]; long[] stack' >> synthetic_events\n"
5663 #endif
5664 #endif
5665 ;
5666 
5667 static ssize_t
5668 tracing_readme_read(struct file *filp, char __user *ubuf,
5669 		       size_t cnt, loff_t *ppos)
5670 {
5671 	return simple_read_from_buffer(ubuf, cnt, ppos,
5672 					readme_msg, strlen(readme_msg));
5673 }
5674 
5675 static const struct file_operations tracing_readme_fops = {
5676 	.open		= tracing_open_generic,
5677 	.read		= tracing_readme_read,
5678 	.llseek		= generic_file_llseek,
5679 };
5680 
5681 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
5682 static union trace_eval_map_item *
5683 update_eval_map(union trace_eval_map_item *ptr)
5684 {
5685 	if (!ptr->map.eval_string) {
5686 		if (ptr->tail.next) {
5687 			ptr = ptr->tail.next;
5688 			/* Set ptr to the next real item (skip head) */
5689 			ptr++;
5690 		} else
5691 			return NULL;
5692 	}
5693 	return ptr;
5694 }
5695 
5696 static void *eval_map_next(struct seq_file *m, void *v, loff_t *pos)
5697 {
5698 	union trace_eval_map_item *ptr = v;
5699 
5700 	/*
5701 	 * Paranoid! If ptr points to end, we don't want to increment past it.
5702 	 * This really should never happen.
5703 	 */
5704 	(*pos)++;
5705 	ptr = update_eval_map(ptr);
5706 	if (WARN_ON_ONCE(!ptr))
5707 		return NULL;
5708 
5709 	ptr++;
5710 	ptr = update_eval_map(ptr);
5711 
5712 	return ptr;
5713 }
5714 
5715 static void *eval_map_start(struct seq_file *m, loff_t *pos)
5716 {
5717 	union trace_eval_map_item *v;
5718 	loff_t l = 0;
5719 
5720 	mutex_lock(&trace_eval_mutex);
5721 
5722 	v = trace_eval_maps;
5723 	if (v)
5724 		v++;
5725 
5726 	while (v && l < *pos) {
5727 		v = eval_map_next(m, v, &l);
5728 	}
5729 
5730 	return v;
5731 }
5732 
5733 static void eval_map_stop(struct seq_file *m, void *v)
5734 {
5735 	mutex_unlock(&trace_eval_mutex);
5736 }
5737 
5738 static int eval_map_show(struct seq_file *m, void *v)
5739 {
5740 	union trace_eval_map_item *ptr = v;
5741 
5742 	seq_printf(m, "%s %ld (%s)\n",
5743 		   ptr->map.eval_string, ptr->map.eval_value,
5744 		   ptr->map.system);
5745 
5746 	return 0;
5747 }
5748 
5749 static const struct seq_operations tracing_eval_map_seq_ops = {
5750 	.start		= eval_map_start,
5751 	.next		= eval_map_next,
5752 	.stop		= eval_map_stop,
5753 	.show		= eval_map_show,
5754 };
5755 
5756 static int tracing_eval_map_open(struct inode *inode, struct file *filp)
5757 {
5758 	int ret;
5759 
5760 	ret = tracing_check_open_get_tr(NULL);
5761 	if (ret)
5762 		return ret;
5763 
5764 	return seq_open(filp, &tracing_eval_map_seq_ops);
5765 }
5766 
5767 static const struct file_operations tracing_eval_map_fops = {
5768 	.open		= tracing_eval_map_open,
5769 	.read		= seq_read,
5770 	.llseek		= seq_lseek,
5771 	.release	= seq_release,
5772 };
5773 
5774 static inline union trace_eval_map_item *
5775 trace_eval_jmp_to_tail(union trace_eval_map_item *ptr)
5776 {
5777 	/* Return tail of array given the head */
5778 	return ptr + ptr->head.length + 1;
5779 }
5780 
5781 static void
5782 trace_insert_eval_map_file(struct module *mod, struct trace_eval_map **start,
5783 			   int len)
5784 {
5785 	struct trace_eval_map **stop;
5786 	struct trace_eval_map **map;
5787 	union trace_eval_map_item *map_array;
5788 	union trace_eval_map_item *ptr;
5789 
5790 	stop = start + len;
5791 
5792 	/*
5793 	 * The trace_eval_maps contains the map plus a head and tail item,
5794 	 * where the head holds the module and length of array, and the
5795 	 * tail holds a pointer to the next list.
5796 	 */
5797 	map_array = kmalloc_array(len + 2, sizeof(*map_array), GFP_KERNEL);
5798 	if (!map_array) {
5799 		pr_warn("Unable to allocate trace eval mapping\n");
5800 		return;
5801 	}
5802 
5803 	guard(mutex)(&trace_eval_mutex);
5804 
5805 	if (!trace_eval_maps)
5806 		trace_eval_maps = map_array;
5807 	else {
5808 		ptr = trace_eval_maps;
5809 		for (;;) {
5810 			ptr = trace_eval_jmp_to_tail(ptr);
5811 			if (!ptr->tail.next)
5812 				break;
5813 			ptr = ptr->tail.next;
5814 
5815 		}
5816 		ptr->tail.next = map_array;
5817 	}
5818 	map_array->head.mod = mod;
5819 	map_array->head.length = len;
5820 	map_array++;
5821 
5822 	for (map = start; (unsigned long)map < (unsigned long)stop; map++) {
5823 		map_array->map = **map;
5824 		map_array++;
5825 	}
5826 	memset(map_array, 0, sizeof(*map_array));
5827 }
5828 
5829 static void trace_create_eval_file(struct dentry *d_tracer)
5830 {
5831 	trace_create_file("eval_map", TRACE_MODE_READ, d_tracer,
5832 			  NULL, &tracing_eval_map_fops);
5833 }
5834 
5835 #else /* CONFIG_TRACE_EVAL_MAP_FILE */
5836 static inline void trace_create_eval_file(struct dentry *d_tracer) { }
5837 static inline void trace_insert_eval_map_file(struct module *mod,
5838 			      struct trace_eval_map **start, int len) { }
5839 #endif /* !CONFIG_TRACE_EVAL_MAP_FILE */
5840 
5841 static void trace_insert_eval_map(struct module *mod,
5842 				  struct trace_eval_map **start, int len)
5843 {
5844 	struct trace_eval_map **map;
5845 
5846 	if (len <= 0)
5847 		return;
5848 
5849 	map = start;
5850 
5851 	trace_event_eval_update(map, len);
5852 
5853 	trace_insert_eval_map_file(mod, start, len);
5854 }
5855 
5856 static ssize_t
5857 tracing_set_trace_read(struct file *filp, char __user *ubuf,
5858 		       size_t cnt, loff_t *ppos)
5859 {
5860 	struct trace_array *tr = filp->private_data;
5861 	char buf[MAX_TRACER_SIZE+2];
5862 	int r;
5863 
5864 	mutex_lock(&trace_types_lock);
5865 	r = sprintf(buf, "%s\n", tr->current_trace->name);
5866 	mutex_unlock(&trace_types_lock);
5867 
5868 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
5869 }
5870 
5871 int tracer_init(struct tracer *t, struct trace_array *tr)
5872 {
5873 	tracing_reset_online_cpus(&tr->array_buffer);
5874 	return t->init(tr);
5875 }
5876 
5877 static void set_buffer_entries(struct array_buffer *buf, unsigned long val)
5878 {
5879 	int cpu;
5880 
5881 	for_each_tracing_cpu(cpu)
5882 		per_cpu_ptr(buf->data, cpu)->entries = val;
5883 }
5884 
5885 static void update_buffer_entries(struct array_buffer *buf, int cpu)
5886 {
5887 	if (cpu == RING_BUFFER_ALL_CPUS) {
5888 		set_buffer_entries(buf, ring_buffer_size(buf->buffer, 0));
5889 	} else {
5890 		per_cpu_ptr(buf->data, cpu)->entries = ring_buffer_size(buf->buffer, cpu);
5891 	}
5892 }
5893 
5894 #ifdef CONFIG_TRACER_MAX_TRACE
5895 /* resize @tr's buffer to the size of @size_tr's entries */
5896 static int resize_buffer_duplicate_size(struct array_buffer *trace_buf,
5897 					struct array_buffer *size_buf, int cpu_id)
5898 {
5899 	int cpu, ret = 0;
5900 
5901 	if (cpu_id == RING_BUFFER_ALL_CPUS) {
5902 		for_each_tracing_cpu(cpu) {
5903 			ret = ring_buffer_resize(trace_buf->buffer,
5904 				 per_cpu_ptr(size_buf->data, cpu)->entries, cpu);
5905 			if (ret < 0)
5906 				break;
5907 			per_cpu_ptr(trace_buf->data, cpu)->entries =
5908 				per_cpu_ptr(size_buf->data, cpu)->entries;
5909 		}
5910 	} else {
5911 		ret = ring_buffer_resize(trace_buf->buffer,
5912 				 per_cpu_ptr(size_buf->data, cpu_id)->entries, cpu_id);
5913 		if (ret == 0)
5914 			per_cpu_ptr(trace_buf->data, cpu_id)->entries =
5915 				per_cpu_ptr(size_buf->data, cpu_id)->entries;
5916 	}
5917 
5918 	return ret;
5919 }
5920 #endif /* CONFIG_TRACER_MAX_TRACE */
5921 
5922 static int __tracing_resize_ring_buffer(struct trace_array *tr,
5923 					unsigned long size, int cpu)
5924 {
5925 	int ret;
5926 
5927 	/*
5928 	 * If kernel or user changes the size of the ring buffer
5929 	 * we use the size that was given, and we can forget about
5930 	 * expanding it later.
5931 	 */
5932 	trace_set_ring_buffer_expanded(tr);
5933 
5934 	/* May be called before buffers are initialized */
5935 	if (!tr->array_buffer.buffer)
5936 		return 0;
5937 
5938 	/* Do not allow tracing while resizing ring buffer */
5939 	tracing_stop_tr(tr);
5940 
5941 	ret = ring_buffer_resize(tr->array_buffer.buffer, size, cpu);
5942 	if (ret < 0)
5943 		goto out_start;
5944 
5945 #ifdef CONFIG_TRACER_MAX_TRACE
5946 	if (!tr->allocated_snapshot)
5947 		goto out;
5948 
5949 	ret = ring_buffer_resize(tr->max_buffer.buffer, size, cpu);
5950 	if (ret < 0) {
5951 		int r = resize_buffer_duplicate_size(&tr->array_buffer,
5952 						     &tr->array_buffer, cpu);
5953 		if (r < 0) {
5954 			/*
5955 			 * AARGH! We are left with different
5956 			 * size max buffer!!!!
5957 			 * The max buffer is our "snapshot" buffer.
5958 			 * When a tracer needs a snapshot (one of the
5959 			 * latency tracers), it swaps the max buffer
5960 			 * with the saved snap shot. We succeeded to
5961 			 * update the size of the main buffer, but failed to
5962 			 * update the size of the max buffer. But when we tried
5963 			 * to reset the main buffer to the original size, we
5964 			 * failed there too. This is very unlikely to
5965 			 * happen, but if it does, warn and kill all
5966 			 * tracing.
5967 			 */
5968 			WARN_ON(1);
5969 			tracing_disabled = 1;
5970 		}
5971 		goto out_start;
5972 	}
5973 
5974 	update_buffer_entries(&tr->max_buffer, cpu);
5975 
5976  out:
5977 #endif /* CONFIG_TRACER_MAX_TRACE */
5978 
5979 	update_buffer_entries(&tr->array_buffer, cpu);
5980  out_start:
5981 	tracing_start_tr(tr);
5982 	return ret;
5983 }
5984 
5985 ssize_t tracing_resize_ring_buffer(struct trace_array *tr,
5986 				  unsigned long size, int cpu_id)
5987 {
5988 	guard(mutex)(&trace_types_lock);
5989 
5990 	if (cpu_id != RING_BUFFER_ALL_CPUS) {
5991 		/* make sure, this cpu is enabled in the mask */
5992 		if (!cpumask_test_cpu(cpu_id, tracing_buffer_mask))
5993 			return -EINVAL;
5994 	}
5995 
5996 	return __tracing_resize_ring_buffer(tr, size, cpu_id);
5997 }
5998 
5999 static void update_last_data(struct trace_array *tr)
6000 {
6001 	if (!tr->text_delta && !tr->data_delta)
6002 		return;
6003 
6004 	/*
6005 	 * Need to clear all CPU buffers as there cannot be events
6006 	 * from the previous boot mixed with events with this boot
6007 	 * as that will cause a confusing trace. Need to clear all
6008 	 * CPU buffers, even for those that may currently be offline.
6009 	 */
6010 	tracing_reset_all_cpus(&tr->array_buffer);
6011 
6012 	/* Using current data now */
6013 	tr->text_delta = 0;
6014 	tr->data_delta = 0;
6015 }
6016 
6017 /**
6018  * tracing_update_buffers - used by tracing facility to expand ring buffers
6019  * @tr: The tracing instance
6020  *
6021  * To save on memory when the tracing is never used on a system with it
6022  * configured in. The ring buffers are set to a minimum size. But once
6023  * a user starts to use the tracing facility, then they need to grow
6024  * to their default size.
6025  *
6026  * This function is to be called when a tracer is about to be used.
6027  */
6028 int tracing_update_buffers(struct trace_array *tr)
6029 {
6030 	int ret = 0;
6031 
6032 	mutex_lock(&trace_types_lock);
6033 
6034 	update_last_data(tr);
6035 
6036 	if (!tr->ring_buffer_expanded)
6037 		ret = __tracing_resize_ring_buffer(tr, trace_buf_size,
6038 						RING_BUFFER_ALL_CPUS);
6039 	mutex_unlock(&trace_types_lock);
6040 
6041 	return ret;
6042 }
6043 
6044 struct trace_option_dentry;
6045 
6046 static void
6047 create_trace_option_files(struct trace_array *tr, struct tracer *tracer);
6048 
6049 /*
6050  * Used to clear out the tracer before deletion of an instance.
6051  * Must have trace_types_lock held.
6052  */
6053 static void tracing_set_nop(struct trace_array *tr)
6054 {
6055 	if (tr->current_trace == &nop_trace)
6056 		return;
6057 
6058 	tr->current_trace->enabled--;
6059 
6060 	if (tr->current_trace->reset)
6061 		tr->current_trace->reset(tr);
6062 
6063 	tr->current_trace = &nop_trace;
6064 }
6065 
6066 static bool tracer_options_updated;
6067 
6068 static void add_tracer_options(struct trace_array *tr, struct tracer *t)
6069 {
6070 	/* Only enable if the directory has been created already. */
6071 	if (!tr->dir)
6072 		return;
6073 
6074 	/* Only create trace option files after update_tracer_options finish */
6075 	if (!tracer_options_updated)
6076 		return;
6077 
6078 	create_trace_option_files(tr, t);
6079 }
6080 
6081 int tracing_set_tracer(struct trace_array *tr, const char *buf)
6082 {
6083 	struct tracer *t;
6084 #ifdef CONFIG_TRACER_MAX_TRACE
6085 	bool had_max_tr;
6086 #endif
6087 	int ret;
6088 
6089 	guard(mutex)(&trace_types_lock);
6090 
6091 	update_last_data(tr);
6092 
6093 	if (!tr->ring_buffer_expanded) {
6094 		ret = __tracing_resize_ring_buffer(tr, trace_buf_size,
6095 						RING_BUFFER_ALL_CPUS);
6096 		if (ret < 0)
6097 			return ret;
6098 		ret = 0;
6099 	}
6100 
6101 	for (t = trace_types; t; t = t->next) {
6102 		if (strcmp(t->name, buf) == 0)
6103 			break;
6104 	}
6105 	if (!t)
6106 		return -EINVAL;
6107 
6108 	if (t == tr->current_trace)
6109 		return 0;
6110 
6111 #ifdef CONFIG_TRACER_SNAPSHOT
6112 	if (t->use_max_tr) {
6113 		local_irq_disable();
6114 		arch_spin_lock(&tr->max_lock);
6115 		ret = tr->cond_snapshot ? -EBUSY : 0;
6116 		arch_spin_unlock(&tr->max_lock);
6117 		local_irq_enable();
6118 		if (ret)
6119 			return ret;
6120 	}
6121 #endif
6122 	/* Some tracers won't work on kernel command line */
6123 	if (system_state < SYSTEM_RUNNING && t->noboot) {
6124 		pr_warn("Tracer '%s' is not allowed on command line, ignored\n",
6125 			t->name);
6126 		return -EINVAL;
6127 	}
6128 
6129 	/* Some tracers are only allowed for the top level buffer */
6130 	if (!trace_ok_for_array(t, tr))
6131 		return -EINVAL;
6132 
6133 	/* If trace pipe files are being read, we can't change the tracer */
6134 	if (tr->trace_ref)
6135 		return -EBUSY;
6136 
6137 	trace_branch_disable();
6138 
6139 	tr->current_trace->enabled--;
6140 
6141 	if (tr->current_trace->reset)
6142 		tr->current_trace->reset(tr);
6143 
6144 #ifdef CONFIG_TRACER_MAX_TRACE
6145 	had_max_tr = tr->current_trace->use_max_tr;
6146 
6147 	/* Current trace needs to be nop_trace before synchronize_rcu */
6148 	tr->current_trace = &nop_trace;
6149 
6150 	if (had_max_tr && !t->use_max_tr) {
6151 		/*
6152 		 * We need to make sure that the update_max_tr sees that
6153 		 * current_trace changed to nop_trace to keep it from
6154 		 * swapping the buffers after we resize it.
6155 		 * The update_max_tr is called from interrupts disabled
6156 		 * so a synchronized_sched() is sufficient.
6157 		 */
6158 		synchronize_rcu();
6159 		free_snapshot(tr);
6160 		tracing_disarm_snapshot(tr);
6161 	}
6162 
6163 	if (!had_max_tr && t->use_max_tr) {
6164 		ret = tracing_arm_snapshot_locked(tr);
6165 		if (ret)
6166 			return ret;
6167 	}
6168 #else
6169 	tr->current_trace = &nop_trace;
6170 #endif
6171 
6172 	if (t->init) {
6173 		ret = tracer_init(t, tr);
6174 		if (ret) {
6175 #ifdef CONFIG_TRACER_MAX_TRACE
6176 			if (t->use_max_tr)
6177 				tracing_disarm_snapshot(tr);
6178 #endif
6179 			return ret;
6180 		}
6181 	}
6182 
6183 	tr->current_trace = t;
6184 	tr->current_trace->enabled++;
6185 	trace_branch_enable(tr);
6186 
6187 	return 0;
6188 }
6189 
6190 static ssize_t
6191 tracing_set_trace_write(struct file *filp, const char __user *ubuf,
6192 			size_t cnt, loff_t *ppos)
6193 {
6194 	struct trace_array *tr = filp->private_data;
6195 	char buf[MAX_TRACER_SIZE+1];
6196 	char *name;
6197 	size_t ret;
6198 	int err;
6199 
6200 	ret = cnt;
6201 
6202 	if (cnt > MAX_TRACER_SIZE)
6203 		cnt = MAX_TRACER_SIZE;
6204 
6205 	if (copy_from_user(buf, ubuf, cnt))
6206 		return -EFAULT;
6207 
6208 	buf[cnt] = 0;
6209 
6210 	name = strim(buf);
6211 
6212 	err = tracing_set_tracer(tr, name);
6213 	if (err)
6214 		return err;
6215 
6216 	*ppos += ret;
6217 
6218 	return ret;
6219 }
6220 
6221 static ssize_t
6222 tracing_nsecs_read(unsigned long *ptr, char __user *ubuf,
6223 		   size_t cnt, loff_t *ppos)
6224 {
6225 	char buf[64];
6226 	int r;
6227 
6228 	r = snprintf(buf, sizeof(buf), "%ld\n",
6229 		     *ptr == (unsigned long)-1 ? -1 : nsecs_to_usecs(*ptr));
6230 	if (r > sizeof(buf))
6231 		r = sizeof(buf);
6232 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6233 }
6234 
6235 static ssize_t
6236 tracing_nsecs_write(unsigned long *ptr, const char __user *ubuf,
6237 		    size_t cnt, loff_t *ppos)
6238 {
6239 	unsigned long val;
6240 	int ret;
6241 
6242 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
6243 	if (ret)
6244 		return ret;
6245 
6246 	*ptr = val * 1000;
6247 
6248 	return cnt;
6249 }
6250 
6251 static ssize_t
6252 tracing_thresh_read(struct file *filp, char __user *ubuf,
6253 		    size_t cnt, loff_t *ppos)
6254 {
6255 	return tracing_nsecs_read(&tracing_thresh, ubuf, cnt, ppos);
6256 }
6257 
6258 static ssize_t
6259 tracing_thresh_write(struct file *filp, const char __user *ubuf,
6260 		     size_t cnt, loff_t *ppos)
6261 {
6262 	struct trace_array *tr = filp->private_data;
6263 	int ret;
6264 
6265 	guard(mutex)(&trace_types_lock);
6266 	ret = tracing_nsecs_write(&tracing_thresh, ubuf, cnt, ppos);
6267 	if (ret < 0)
6268 		return ret;
6269 
6270 	if (tr->current_trace->update_thresh) {
6271 		ret = tr->current_trace->update_thresh(tr);
6272 		if (ret < 0)
6273 			return ret;
6274 	}
6275 
6276 	return cnt;
6277 }
6278 
6279 #ifdef CONFIG_TRACER_MAX_TRACE
6280 
6281 static ssize_t
6282 tracing_max_lat_read(struct file *filp, char __user *ubuf,
6283 		     size_t cnt, loff_t *ppos)
6284 {
6285 	struct trace_array *tr = filp->private_data;
6286 
6287 	return tracing_nsecs_read(&tr->max_latency, ubuf, cnt, ppos);
6288 }
6289 
6290 static ssize_t
6291 tracing_max_lat_write(struct file *filp, const char __user *ubuf,
6292 		      size_t cnt, loff_t *ppos)
6293 {
6294 	struct trace_array *tr = filp->private_data;
6295 
6296 	return tracing_nsecs_write(&tr->max_latency, ubuf, cnt, ppos);
6297 }
6298 
6299 #endif
6300 
6301 static int open_pipe_on_cpu(struct trace_array *tr, int cpu)
6302 {
6303 	if (cpu == RING_BUFFER_ALL_CPUS) {
6304 		if (cpumask_empty(tr->pipe_cpumask)) {
6305 			cpumask_setall(tr->pipe_cpumask);
6306 			return 0;
6307 		}
6308 	} else if (!cpumask_test_cpu(cpu, tr->pipe_cpumask)) {
6309 		cpumask_set_cpu(cpu, tr->pipe_cpumask);
6310 		return 0;
6311 	}
6312 	return -EBUSY;
6313 }
6314 
6315 static void close_pipe_on_cpu(struct trace_array *tr, int cpu)
6316 {
6317 	if (cpu == RING_BUFFER_ALL_CPUS) {
6318 		WARN_ON(!cpumask_full(tr->pipe_cpumask));
6319 		cpumask_clear(tr->pipe_cpumask);
6320 	} else {
6321 		WARN_ON(!cpumask_test_cpu(cpu, tr->pipe_cpumask));
6322 		cpumask_clear_cpu(cpu, tr->pipe_cpumask);
6323 	}
6324 }
6325 
6326 static int tracing_open_pipe(struct inode *inode, struct file *filp)
6327 {
6328 	struct trace_array *tr = inode->i_private;
6329 	struct trace_iterator *iter;
6330 	int cpu;
6331 	int ret;
6332 
6333 	ret = tracing_check_open_get_tr(tr);
6334 	if (ret)
6335 		return ret;
6336 
6337 	mutex_lock(&trace_types_lock);
6338 	cpu = tracing_get_cpu(inode);
6339 	ret = open_pipe_on_cpu(tr, cpu);
6340 	if (ret)
6341 		goto fail_pipe_on_cpu;
6342 
6343 	/* create a buffer to store the information to pass to userspace */
6344 	iter = kzalloc(sizeof(*iter), GFP_KERNEL);
6345 	if (!iter) {
6346 		ret = -ENOMEM;
6347 		goto fail_alloc_iter;
6348 	}
6349 
6350 	trace_seq_init(&iter->seq);
6351 	iter->trace = tr->current_trace;
6352 
6353 	if (!alloc_cpumask_var(&iter->started, GFP_KERNEL)) {
6354 		ret = -ENOMEM;
6355 		goto fail;
6356 	}
6357 
6358 	/* trace pipe does not show start of buffer */
6359 	cpumask_setall(iter->started);
6360 
6361 	if (tr->trace_flags & TRACE_ITER_LATENCY_FMT)
6362 		iter->iter_flags |= TRACE_FILE_LAT_FMT;
6363 
6364 	/* Output in nanoseconds only if we are using a clock in nanoseconds. */
6365 	if (trace_clocks[tr->clock_id].in_ns)
6366 		iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
6367 
6368 	iter->tr = tr;
6369 	iter->array_buffer = &tr->array_buffer;
6370 	iter->cpu_file = cpu;
6371 	mutex_init(&iter->mutex);
6372 	filp->private_data = iter;
6373 
6374 	if (iter->trace->pipe_open)
6375 		iter->trace->pipe_open(iter);
6376 
6377 	nonseekable_open(inode, filp);
6378 
6379 	tr->trace_ref++;
6380 
6381 	mutex_unlock(&trace_types_lock);
6382 	return ret;
6383 
6384 fail:
6385 	kfree(iter);
6386 fail_alloc_iter:
6387 	close_pipe_on_cpu(tr, cpu);
6388 fail_pipe_on_cpu:
6389 	__trace_array_put(tr);
6390 	mutex_unlock(&trace_types_lock);
6391 	return ret;
6392 }
6393 
6394 static int tracing_release_pipe(struct inode *inode, struct file *file)
6395 {
6396 	struct trace_iterator *iter = file->private_data;
6397 	struct trace_array *tr = inode->i_private;
6398 
6399 	mutex_lock(&trace_types_lock);
6400 
6401 	tr->trace_ref--;
6402 
6403 	if (iter->trace->pipe_close)
6404 		iter->trace->pipe_close(iter);
6405 	close_pipe_on_cpu(tr, iter->cpu_file);
6406 	mutex_unlock(&trace_types_lock);
6407 
6408 	free_trace_iter_content(iter);
6409 	kfree(iter);
6410 
6411 	trace_array_put(tr);
6412 
6413 	return 0;
6414 }
6415 
6416 static __poll_t
6417 trace_poll(struct trace_iterator *iter, struct file *filp, poll_table *poll_table)
6418 {
6419 	struct trace_array *tr = iter->tr;
6420 
6421 	/* Iterators are static, they should be filled or empty */
6422 	if (trace_buffer_iter(iter, iter->cpu_file))
6423 		return EPOLLIN | EPOLLRDNORM;
6424 
6425 	if (tr->trace_flags & TRACE_ITER_BLOCK)
6426 		/*
6427 		 * Always select as readable when in blocking mode
6428 		 */
6429 		return EPOLLIN | EPOLLRDNORM;
6430 	else
6431 		return ring_buffer_poll_wait(iter->array_buffer->buffer, iter->cpu_file,
6432 					     filp, poll_table, iter->tr->buffer_percent);
6433 }
6434 
6435 static __poll_t
6436 tracing_poll_pipe(struct file *filp, poll_table *poll_table)
6437 {
6438 	struct trace_iterator *iter = filp->private_data;
6439 
6440 	return trace_poll(iter, filp, poll_table);
6441 }
6442 
6443 /* Must be called with iter->mutex held. */
6444 static int tracing_wait_pipe(struct file *filp)
6445 {
6446 	struct trace_iterator *iter = filp->private_data;
6447 	int ret;
6448 
6449 	while (trace_empty(iter)) {
6450 
6451 		if ((filp->f_flags & O_NONBLOCK)) {
6452 			return -EAGAIN;
6453 		}
6454 
6455 		/*
6456 		 * We block until we read something and tracing is disabled.
6457 		 * We still block if tracing is disabled, but we have never
6458 		 * read anything. This allows a user to cat this file, and
6459 		 * then enable tracing. But after we have read something,
6460 		 * we give an EOF when tracing is again disabled.
6461 		 *
6462 		 * iter->pos will be 0 if we haven't read anything.
6463 		 */
6464 		if (!tracer_tracing_is_on(iter->tr) && iter->pos)
6465 			break;
6466 
6467 		mutex_unlock(&iter->mutex);
6468 
6469 		ret = wait_on_pipe(iter, 0);
6470 
6471 		mutex_lock(&iter->mutex);
6472 
6473 		if (ret)
6474 			return ret;
6475 	}
6476 
6477 	return 1;
6478 }
6479 
6480 /*
6481  * Consumer reader.
6482  */
6483 static ssize_t
6484 tracing_read_pipe(struct file *filp, char __user *ubuf,
6485 		  size_t cnt, loff_t *ppos)
6486 {
6487 	struct trace_iterator *iter = filp->private_data;
6488 	ssize_t sret;
6489 
6490 	/*
6491 	 * Avoid more than one consumer on a single file descriptor
6492 	 * This is just a matter of traces coherency, the ring buffer itself
6493 	 * is protected.
6494 	 */
6495 	guard(mutex)(&iter->mutex);
6496 
6497 	/* return any leftover data */
6498 	sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
6499 	if (sret != -EBUSY)
6500 		return sret;
6501 
6502 	trace_seq_init(&iter->seq);
6503 
6504 	if (iter->trace->read) {
6505 		sret = iter->trace->read(iter, filp, ubuf, cnt, ppos);
6506 		if (sret)
6507 			return sret;
6508 	}
6509 
6510 waitagain:
6511 	sret = tracing_wait_pipe(filp);
6512 	if (sret <= 0)
6513 		return sret;
6514 
6515 	/* stop when tracing is finished */
6516 	if (trace_empty(iter))
6517 		return 0;
6518 
6519 	if (cnt >= TRACE_SEQ_BUFFER_SIZE)
6520 		cnt = TRACE_SEQ_BUFFER_SIZE - 1;
6521 
6522 	/* reset all but tr, trace, and overruns */
6523 	trace_iterator_reset(iter);
6524 	cpumask_clear(iter->started);
6525 	trace_seq_init(&iter->seq);
6526 
6527 	trace_event_read_lock();
6528 	trace_access_lock(iter->cpu_file);
6529 	while (trace_find_next_entry_inc(iter) != NULL) {
6530 		enum print_line_t ret;
6531 		int save_len = iter->seq.seq.len;
6532 
6533 		ret = print_trace_line(iter);
6534 		if (ret == TRACE_TYPE_PARTIAL_LINE) {
6535 			/*
6536 			 * If one print_trace_line() fills entire trace_seq in one shot,
6537 			 * trace_seq_to_user() will returns -EBUSY because save_len == 0,
6538 			 * In this case, we need to consume it, otherwise, loop will peek
6539 			 * this event next time, resulting in an infinite loop.
6540 			 */
6541 			if (save_len == 0) {
6542 				iter->seq.full = 0;
6543 				trace_seq_puts(&iter->seq, "[LINE TOO BIG]\n");
6544 				trace_consume(iter);
6545 				break;
6546 			}
6547 
6548 			/* In other cases, don't print partial lines */
6549 			iter->seq.seq.len = save_len;
6550 			break;
6551 		}
6552 		if (ret != TRACE_TYPE_NO_CONSUME)
6553 			trace_consume(iter);
6554 
6555 		if (trace_seq_used(&iter->seq) >= cnt)
6556 			break;
6557 
6558 		/*
6559 		 * Setting the full flag means we reached the trace_seq buffer
6560 		 * size and we should leave by partial output condition above.
6561 		 * One of the trace_seq_* functions is not used properly.
6562 		 */
6563 		WARN_ONCE(iter->seq.full, "full flag set for trace type %d",
6564 			  iter->ent->type);
6565 	}
6566 	trace_access_unlock(iter->cpu_file);
6567 	trace_event_read_unlock();
6568 
6569 	/* Now copy what we have to the user */
6570 	sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
6571 	if (iter->seq.readpos >= trace_seq_used(&iter->seq))
6572 		trace_seq_init(&iter->seq);
6573 
6574 	/*
6575 	 * If there was nothing to send to user, in spite of consuming trace
6576 	 * entries, go back to wait for more entries.
6577 	 */
6578 	if (sret == -EBUSY)
6579 		goto waitagain;
6580 
6581 	return sret;
6582 }
6583 
6584 static void tracing_spd_release_pipe(struct splice_pipe_desc *spd,
6585 				     unsigned int idx)
6586 {
6587 	__free_page(spd->pages[idx]);
6588 }
6589 
6590 static size_t
6591 tracing_fill_pipe_page(size_t rem, struct trace_iterator *iter)
6592 {
6593 	size_t count;
6594 	int save_len;
6595 	int ret;
6596 
6597 	/* Seq buffer is page-sized, exactly what we need. */
6598 	for (;;) {
6599 		save_len = iter->seq.seq.len;
6600 		ret = print_trace_line(iter);
6601 
6602 		if (trace_seq_has_overflowed(&iter->seq)) {
6603 			iter->seq.seq.len = save_len;
6604 			break;
6605 		}
6606 
6607 		/*
6608 		 * This should not be hit, because it should only
6609 		 * be set if the iter->seq overflowed. But check it
6610 		 * anyway to be safe.
6611 		 */
6612 		if (ret == TRACE_TYPE_PARTIAL_LINE) {
6613 			iter->seq.seq.len = save_len;
6614 			break;
6615 		}
6616 
6617 		count = trace_seq_used(&iter->seq) - save_len;
6618 		if (rem < count) {
6619 			rem = 0;
6620 			iter->seq.seq.len = save_len;
6621 			break;
6622 		}
6623 
6624 		if (ret != TRACE_TYPE_NO_CONSUME)
6625 			trace_consume(iter);
6626 		rem -= count;
6627 		if (!trace_find_next_entry_inc(iter))	{
6628 			rem = 0;
6629 			iter->ent = NULL;
6630 			break;
6631 		}
6632 	}
6633 
6634 	return rem;
6635 }
6636 
6637 static ssize_t tracing_splice_read_pipe(struct file *filp,
6638 					loff_t *ppos,
6639 					struct pipe_inode_info *pipe,
6640 					size_t len,
6641 					unsigned int flags)
6642 {
6643 	struct page *pages_def[PIPE_DEF_BUFFERS];
6644 	struct partial_page partial_def[PIPE_DEF_BUFFERS];
6645 	struct trace_iterator *iter = filp->private_data;
6646 	struct splice_pipe_desc spd = {
6647 		.pages		= pages_def,
6648 		.partial	= partial_def,
6649 		.nr_pages	= 0, /* This gets updated below. */
6650 		.nr_pages_max	= PIPE_DEF_BUFFERS,
6651 		.ops		= &default_pipe_buf_ops,
6652 		.spd_release	= tracing_spd_release_pipe,
6653 	};
6654 	ssize_t ret;
6655 	size_t rem;
6656 	unsigned int i;
6657 
6658 	if (splice_grow_spd(pipe, &spd))
6659 		return -ENOMEM;
6660 
6661 	mutex_lock(&iter->mutex);
6662 
6663 	if (iter->trace->splice_read) {
6664 		ret = iter->trace->splice_read(iter, filp,
6665 					       ppos, pipe, len, flags);
6666 		if (ret)
6667 			goto out_err;
6668 	}
6669 
6670 	ret = tracing_wait_pipe(filp);
6671 	if (ret <= 0)
6672 		goto out_err;
6673 
6674 	if (!iter->ent && !trace_find_next_entry_inc(iter)) {
6675 		ret = -EFAULT;
6676 		goto out_err;
6677 	}
6678 
6679 	trace_event_read_lock();
6680 	trace_access_lock(iter->cpu_file);
6681 
6682 	/* Fill as many pages as possible. */
6683 	for (i = 0, rem = len; i < spd.nr_pages_max && rem; i++) {
6684 		spd.pages[i] = alloc_page(GFP_KERNEL);
6685 		if (!spd.pages[i])
6686 			break;
6687 
6688 		rem = tracing_fill_pipe_page(rem, iter);
6689 
6690 		/* Copy the data into the page, so we can start over. */
6691 		ret = trace_seq_to_buffer(&iter->seq,
6692 					  page_address(spd.pages[i]),
6693 					  trace_seq_used(&iter->seq));
6694 		if (ret < 0) {
6695 			__free_page(spd.pages[i]);
6696 			break;
6697 		}
6698 		spd.partial[i].offset = 0;
6699 		spd.partial[i].len = trace_seq_used(&iter->seq);
6700 
6701 		trace_seq_init(&iter->seq);
6702 	}
6703 
6704 	trace_access_unlock(iter->cpu_file);
6705 	trace_event_read_unlock();
6706 	mutex_unlock(&iter->mutex);
6707 
6708 	spd.nr_pages = i;
6709 
6710 	if (i)
6711 		ret = splice_to_pipe(pipe, &spd);
6712 	else
6713 		ret = 0;
6714 out:
6715 	splice_shrink_spd(&spd);
6716 	return ret;
6717 
6718 out_err:
6719 	mutex_unlock(&iter->mutex);
6720 	goto out;
6721 }
6722 
6723 static ssize_t
6724 tracing_entries_read(struct file *filp, char __user *ubuf,
6725 		     size_t cnt, loff_t *ppos)
6726 {
6727 	struct inode *inode = file_inode(filp);
6728 	struct trace_array *tr = inode->i_private;
6729 	int cpu = tracing_get_cpu(inode);
6730 	char buf[64];
6731 	int r = 0;
6732 	ssize_t ret;
6733 
6734 	mutex_lock(&trace_types_lock);
6735 
6736 	if (cpu == RING_BUFFER_ALL_CPUS) {
6737 		int cpu, buf_size_same;
6738 		unsigned long size;
6739 
6740 		size = 0;
6741 		buf_size_same = 1;
6742 		/* check if all cpu sizes are same */
6743 		for_each_tracing_cpu(cpu) {
6744 			/* fill in the size from first enabled cpu */
6745 			if (size == 0)
6746 				size = per_cpu_ptr(tr->array_buffer.data, cpu)->entries;
6747 			if (size != per_cpu_ptr(tr->array_buffer.data, cpu)->entries) {
6748 				buf_size_same = 0;
6749 				break;
6750 			}
6751 		}
6752 
6753 		if (buf_size_same) {
6754 			if (!tr->ring_buffer_expanded)
6755 				r = sprintf(buf, "%lu (expanded: %lu)\n",
6756 					    size >> 10,
6757 					    trace_buf_size >> 10);
6758 			else
6759 				r = sprintf(buf, "%lu\n", size >> 10);
6760 		} else
6761 			r = sprintf(buf, "X\n");
6762 	} else
6763 		r = sprintf(buf, "%lu\n", per_cpu_ptr(tr->array_buffer.data, cpu)->entries >> 10);
6764 
6765 	mutex_unlock(&trace_types_lock);
6766 
6767 	ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6768 	return ret;
6769 }
6770 
6771 static ssize_t
6772 tracing_entries_write(struct file *filp, const char __user *ubuf,
6773 		      size_t cnt, loff_t *ppos)
6774 {
6775 	struct inode *inode = file_inode(filp);
6776 	struct trace_array *tr = inode->i_private;
6777 	unsigned long val;
6778 	int ret;
6779 
6780 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
6781 	if (ret)
6782 		return ret;
6783 
6784 	/* must have at least 1 entry */
6785 	if (!val)
6786 		return -EINVAL;
6787 
6788 	/* value is in KB */
6789 	val <<= 10;
6790 	ret = tracing_resize_ring_buffer(tr, val, tracing_get_cpu(inode));
6791 	if (ret < 0)
6792 		return ret;
6793 
6794 	*ppos += cnt;
6795 
6796 	return cnt;
6797 }
6798 
6799 static ssize_t
6800 tracing_total_entries_read(struct file *filp, char __user *ubuf,
6801 				size_t cnt, loff_t *ppos)
6802 {
6803 	struct trace_array *tr = filp->private_data;
6804 	char buf[64];
6805 	int r, cpu;
6806 	unsigned long size = 0, expanded_size = 0;
6807 
6808 	mutex_lock(&trace_types_lock);
6809 	for_each_tracing_cpu(cpu) {
6810 		size += per_cpu_ptr(tr->array_buffer.data, cpu)->entries >> 10;
6811 		if (!tr->ring_buffer_expanded)
6812 			expanded_size += trace_buf_size >> 10;
6813 	}
6814 	if (tr->ring_buffer_expanded)
6815 		r = sprintf(buf, "%lu\n", size);
6816 	else
6817 		r = sprintf(buf, "%lu (expanded: %lu)\n", size, expanded_size);
6818 	mutex_unlock(&trace_types_lock);
6819 
6820 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6821 }
6822 
6823 static ssize_t
6824 tracing_last_boot_read(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos)
6825 {
6826 	struct trace_array *tr = filp->private_data;
6827 	struct seq_buf seq;
6828 	char buf[64];
6829 
6830 	seq_buf_init(&seq, buf, 64);
6831 
6832 	seq_buf_printf(&seq, "text delta:\t%ld\n", tr->text_delta);
6833 	seq_buf_printf(&seq, "data delta:\t%ld\n", tr->data_delta);
6834 
6835 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, seq_buf_used(&seq));
6836 }
6837 
6838 static int tracing_buffer_meta_open(struct inode *inode, struct file *filp)
6839 {
6840 	struct trace_array *tr = inode->i_private;
6841 	int cpu = tracing_get_cpu(inode);
6842 	int ret;
6843 
6844 	ret = tracing_check_open_get_tr(tr);
6845 	if (ret)
6846 		return ret;
6847 
6848 	ret = ring_buffer_meta_seq_init(filp, tr->array_buffer.buffer, cpu);
6849 	if (ret < 0)
6850 		__trace_array_put(tr);
6851 	return ret;
6852 }
6853 
6854 static ssize_t
6855 tracing_free_buffer_write(struct file *filp, const char __user *ubuf,
6856 			  size_t cnt, loff_t *ppos)
6857 {
6858 	/*
6859 	 * There is no need to read what the user has written, this function
6860 	 * is just to make sure that there is no error when "echo" is used
6861 	 */
6862 
6863 	*ppos += cnt;
6864 
6865 	return cnt;
6866 }
6867 
6868 static int
6869 tracing_free_buffer_release(struct inode *inode, struct file *filp)
6870 {
6871 	struct trace_array *tr = inode->i_private;
6872 
6873 	/* disable tracing ? */
6874 	if (tr->trace_flags & TRACE_ITER_STOP_ON_FREE)
6875 		tracer_tracing_off(tr);
6876 	/* resize the ring buffer to 0 */
6877 	tracing_resize_ring_buffer(tr, 0, RING_BUFFER_ALL_CPUS);
6878 
6879 	trace_array_put(tr);
6880 
6881 	return 0;
6882 }
6883 
6884 #define TRACE_MARKER_MAX_SIZE		4096
6885 
6886 static ssize_t
6887 tracing_mark_write(struct file *filp, const char __user *ubuf,
6888 					size_t cnt, loff_t *fpos)
6889 {
6890 	struct trace_array *tr = filp->private_data;
6891 	struct ring_buffer_event *event;
6892 	enum event_trigger_type tt = ETT_NONE;
6893 	struct trace_buffer *buffer;
6894 	struct print_entry *entry;
6895 	int meta_size;
6896 	ssize_t written;
6897 	size_t size;
6898 	int len;
6899 
6900 /* Used in tracing_mark_raw_write() as well */
6901 #define FAULTED_STR "<faulted>"
6902 #define FAULTED_SIZE (sizeof(FAULTED_STR) - 1) /* '\0' is already accounted for */
6903 
6904 	if (tracing_disabled)
6905 		return -EINVAL;
6906 
6907 	if (!(tr->trace_flags & TRACE_ITER_MARKERS))
6908 		return -EINVAL;
6909 
6910 	if ((ssize_t)cnt < 0)
6911 		return -EINVAL;
6912 
6913 	if (cnt > TRACE_MARKER_MAX_SIZE)
6914 		cnt = TRACE_MARKER_MAX_SIZE;
6915 
6916 	meta_size = sizeof(*entry) + 2;  /* add '\0' and possible '\n' */
6917  again:
6918 	size = cnt + meta_size;
6919 
6920 	/* If less than "<faulted>", then make sure we can still add that */
6921 	if (cnt < FAULTED_SIZE)
6922 		size += FAULTED_SIZE - cnt;
6923 
6924 	buffer = tr->array_buffer.buffer;
6925 	event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
6926 					    tracing_gen_ctx());
6927 	if (unlikely(!event)) {
6928 		/*
6929 		 * If the size was greater than what was allowed, then
6930 		 * make it smaller and try again.
6931 		 */
6932 		if (size > ring_buffer_max_event_size(buffer)) {
6933 			/* cnt < FAULTED size should never be bigger than max */
6934 			if (WARN_ON_ONCE(cnt < FAULTED_SIZE))
6935 				return -EBADF;
6936 			cnt = ring_buffer_max_event_size(buffer) - meta_size;
6937 			/* The above should only happen once */
6938 			if (WARN_ON_ONCE(cnt + meta_size == size))
6939 				return -EBADF;
6940 			goto again;
6941 		}
6942 
6943 		/* Ring buffer disabled, return as if not open for write */
6944 		return -EBADF;
6945 	}
6946 
6947 	entry = ring_buffer_event_data(event);
6948 	entry->ip = _THIS_IP_;
6949 
6950 	len = __copy_from_user_inatomic(&entry->buf, ubuf, cnt);
6951 	if (len) {
6952 		memcpy(&entry->buf, FAULTED_STR, FAULTED_SIZE);
6953 		cnt = FAULTED_SIZE;
6954 		written = -EFAULT;
6955 	} else
6956 		written = cnt;
6957 
6958 	if (tr->trace_marker_file && !list_empty(&tr->trace_marker_file->triggers)) {
6959 		/* do not add \n before testing triggers, but add \0 */
6960 		entry->buf[cnt] = '\0';
6961 		tt = event_triggers_call(tr->trace_marker_file, buffer, entry, event);
6962 	}
6963 
6964 	if (entry->buf[cnt - 1] != '\n') {
6965 		entry->buf[cnt] = '\n';
6966 		entry->buf[cnt + 1] = '\0';
6967 	} else
6968 		entry->buf[cnt] = '\0';
6969 
6970 	if (static_branch_unlikely(&trace_marker_exports_enabled))
6971 		ftrace_exports(event, TRACE_EXPORT_MARKER);
6972 	__buffer_unlock_commit(buffer, event);
6973 
6974 	if (tt)
6975 		event_triggers_post_call(tr->trace_marker_file, tt);
6976 
6977 	return written;
6978 }
6979 
6980 static ssize_t
6981 tracing_mark_raw_write(struct file *filp, const char __user *ubuf,
6982 					size_t cnt, loff_t *fpos)
6983 {
6984 	struct trace_array *tr = filp->private_data;
6985 	struct ring_buffer_event *event;
6986 	struct trace_buffer *buffer;
6987 	struct raw_data_entry *entry;
6988 	ssize_t written;
6989 	int size;
6990 	int len;
6991 
6992 #define FAULT_SIZE_ID (FAULTED_SIZE + sizeof(int))
6993 
6994 	if (tracing_disabled)
6995 		return -EINVAL;
6996 
6997 	if (!(tr->trace_flags & TRACE_ITER_MARKERS))
6998 		return -EINVAL;
6999 
7000 	/* The marker must at least have a tag id */
7001 	if (cnt < sizeof(unsigned int))
7002 		return -EINVAL;
7003 
7004 	size = sizeof(*entry) + cnt;
7005 	if (cnt < FAULT_SIZE_ID)
7006 		size += FAULT_SIZE_ID - cnt;
7007 
7008 	buffer = tr->array_buffer.buffer;
7009 
7010 	if (size > ring_buffer_max_event_size(buffer))
7011 		return -EINVAL;
7012 
7013 	event = __trace_buffer_lock_reserve(buffer, TRACE_RAW_DATA, size,
7014 					    tracing_gen_ctx());
7015 	if (!event)
7016 		/* Ring buffer disabled, return as if not open for write */
7017 		return -EBADF;
7018 
7019 	entry = ring_buffer_event_data(event);
7020 
7021 	len = __copy_from_user_inatomic(&entry->id, ubuf, cnt);
7022 	if (len) {
7023 		entry->id = -1;
7024 		memcpy(&entry->buf, FAULTED_STR, FAULTED_SIZE);
7025 		written = -EFAULT;
7026 	} else
7027 		written = cnt;
7028 
7029 	__buffer_unlock_commit(buffer, event);
7030 
7031 	return written;
7032 }
7033 
7034 static int tracing_clock_show(struct seq_file *m, void *v)
7035 {
7036 	struct trace_array *tr = m->private;
7037 	int i;
7038 
7039 	for (i = 0; i < ARRAY_SIZE(trace_clocks); i++)
7040 		seq_printf(m,
7041 			"%s%s%s%s", i ? " " : "",
7042 			i == tr->clock_id ? "[" : "", trace_clocks[i].name,
7043 			i == tr->clock_id ? "]" : "");
7044 	seq_putc(m, '\n');
7045 
7046 	return 0;
7047 }
7048 
7049 int tracing_set_clock(struct trace_array *tr, const char *clockstr)
7050 {
7051 	int i;
7052 
7053 	for (i = 0; i < ARRAY_SIZE(trace_clocks); i++) {
7054 		if (strcmp(trace_clocks[i].name, clockstr) == 0)
7055 			break;
7056 	}
7057 	if (i == ARRAY_SIZE(trace_clocks))
7058 		return -EINVAL;
7059 
7060 	mutex_lock(&trace_types_lock);
7061 
7062 	tr->clock_id = i;
7063 
7064 	ring_buffer_set_clock(tr->array_buffer.buffer, trace_clocks[i].func);
7065 
7066 	/*
7067 	 * New clock may not be consistent with the previous clock.
7068 	 * Reset the buffer so that it doesn't have incomparable timestamps.
7069 	 */
7070 	tracing_reset_online_cpus(&tr->array_buffer);
7071 
7072 #ifdef CONFIG_TRACER_MAX_TRACE
7073 	if (tr->max_buffer.buffer)
7074 		ring_buffer_set_clock(tr->max_buffer.buffer, trace_clocks[i].func);
7075 	tracing_reset_online_cpus(&tr->max_buffer);
7076 #endif
7077 
7078 	mutex_unlock(&trace_types_lock);
7079 
7080 	return 0;
7081 }
7082 
7083 static ssize_t tracing_clock_write(struct file *filp, const char __user *ubuf,
7084 				   size_t cnt, loff_t *fpos)
7085 {
7086 	struct seq_file *m = filp->private_data;
7087 	struct trace_array *tr = m->private;
7088 	char buf[64];
7089 	const char *clockstr;
7090 	int ret;
7091 
7092 	if (cnt >= sizeof(buf))
7093 		return -EINVAL;
7094 
7095 	if (copy_from_user(buf, ubuf, cnt))
7096 		return -EFAULT;
7097 
7098 	buf[cnt] = 0;
7099 
7100 	clockstr = strstrip(buf);
7101 
7102 	ret = tracing_set_clock(tr, clockstr);
7103 	if (ret)
7104 		return ret;
7105 
7106 	*fpos += cnt;
7107 
7108 	return cnt;
7109 }
7110 
7111 static int tracing_clock_open(struct inode *inode, struct file *file)
7112 {
7113 	struct trace_array *tr = inode->i_private;
7114 	int ret;
7115 
7116 	ret = tracing_check_open_get_tr(tr);
7117 	if (ret)
7118 		return ret;
7119 
7120 	ret = single_open(file, tracing_clock_show, inode->i_private);
7121 	if (ret < 0)
7122 		trace_array_put(tr);
7123 
7124 	return ret;
7125 }
7126 
7127 static int tracing_time_stamp_mode_show(struct seq_file *m, void *v)
7128 {
7129 	struct trace_array *tr = m->private;
7130 
7131 	mutex_lock(&trace_types_lock);
7132 
7133 	if (ring_buffer_time_stamp_abs(tr->array_buffer.buffer))
7134 		seq_puts(m, "delta [absolute]\n");
7135 	else
7136 		seq_puts(m, "[delta] absolute\n");
7137 
7138 	mutex_unlock(&trace_types_lock);
7139 
7140 	return 0;
7141 }
7142 
7143 static int tracing_time_stamp_mode_open(struct inode *inode, struct file *file)
7144 {
7145 	struct trace_array *tr = inode->i_private;
7146 	int ret;
7147 
7148 	ret = tracing_check_open_get_tr(tr);
7149 	if (ret)
7150 		return ret;
7151 
7152 	ret = single_open(file, tracing_time_stamp_mode_show, inode->i_private);
7153 	if (ret < 0)
7154 		trace_array_put(tr);
7155 
7156 	return ret;
7157 }
7158 
7159 u64 tracing_event_time_stamp(struct trace_buffer *buffer, struct ring_buffer_event *rbe)
7160 {
7161 	if (rbe == this_cpu_read(trace_buffered_event))
7162 		return ring_buffer_time_stamp(buffer);
7163 
7164 	return ring_buffer_event_time_stamp(buffer, rbe);
7165 }
7166 
7167 /*
7168  * Set or disable using the per CPU trace_buffer_event when possible.
7169  */
7170 int tracing_set_filter_buffering(struct trace_array *tr, bool set)
7171 {
7172 	guard(mutex)(&trace_types_lock);
7173 
7174 	if (set && tr->no_filter_buffering_ref++)
7175 		return 0;
7176 
7177 	if (!set) {
7178 		if (WARN_ON_ONCE(!tr->no_filter_buffering_ref))
7179 			return -EINVAL;
7180 
7181 		--tr->no_filter_buffering_ref;
7182 	}
7183 
7184 	return 0;
7185 }
7186 
7187 struct ftrace_buffer_info {
7188 	struct trace_iterator	iter;
7189 	void			*spare;
7190 	unsigned int		spare_cpu;
7191 	unsigned int		spare_size;
7192 	unsigned int		read;
7193 };
7194 
7195 #ifdef CONFIG_TRACER_SNAPSHOT
7196 static int tracing_snapshot_open(struct inode *inode, struct file *file)
7197 {
7198 	struct trace_array *tr = inode->i_private;
7199 	struct trace_iterator *iter;
7200 	struct seq_file *m;
7201 	int ret;
7202 
7203 	ret = tracing_check_open_get_tr(tr);
7204 	if (ret)
7205 		return ret;
7206 
7207 	if (file->f_mode & FMODE_READ) {
7208 		iter = __tracing_open(inode, file, true);
7209 		if (IS_ERR(iter))
7210 			ret = PTR_ERR(iter);
7211 	} else {
7212 		/* Writes still need the seq_file to hold the private data */
7213 		ret = -ENOMEM;
7214 		m = kzalloc(sizeof(*m), GFP_KERNEL);
7215 		if (!m)
7216 			goto out;
7217 		iter = kzalloc(sizeof(*iter), GFP_KERNEL);
7218 		if (!iter) {
7219 			kfree(m);
7220 			goto out;
7221 		}
7222 		ret = 0;
7223 
7224 		iter->tr = tr;
7225 		iter->array_buffer = &tr->max_buffer;
7226 		iter->cpu_file = tracing_get_cpu(inode);
7227 		m->private = iter;
7228 		file->private_data = m;
7229 	}
7230 out:
7231 	if (ret < 0)
7232 		trace_array_put(tr);
7233 
7234 	return ret;
7235 }
7236 
7237 static void tracing_swap_cpu_buffer(void *tr)
7238 {
7239 	update_max_tr_single((struct trace_array *)tr, current, smp_processor_id());
7240 }
7241 
7242 static ssize_t
7243 tracing_snapshot_write(struct file *filp, const char __user *ubuf, size_t cnt,
7244 		       loff_t *ppos)
7245 {
7246 	struct seq_file *m = filp->private_data;
7247 	struct trace_iterator *iter = m->private;
7248 	struct trace_array *tr = iter->tr;
7249 	unsigned long val;
7250 	int ret;
7251 
7252 	ret = tracing_update_buffers(tr);
7253 	if (ret < 0)
7254 		return ret;
7255 
7256 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
7257 	if (ret)
7258 		return ret;
7259 
7260 	guard(mutex)(&trace_types_lock);
7261 
7262 	if (tr->current_trace->use_max_tr)
7263 		return -EBUSY;
7264 
7265 	local_irq_disable();
7266 	arch_spin_lock(&tr->max_lock);
7267 	if (tr->cond_snapshot)
7268 		ret = -EBUSY;
7269 	arch_spin_unlock(&tr->max_lock);
7270 	local_irq_enable();
7271 	if (ret)
7272 		return ret;
7273 
7274 	switch (val) {
7275 	case 0:
7276 		if (iter->cpu_file != RING_BUFFER_ALL_CPUS)
7277 			return -EINVAL;
7278 		if (tr->allocated_snapshot)
7279 			free_snapshot(tr);
7280 		break;
7281 	case 1:
7282 /* Only allow per-cpu swap if the ring buffer supports it */
7283 #ifndef CONFIG_RING_BUFFER_ALLOW_SWAP
7284 		if (iter->cpu_file != RING_BUFFER_ALL_CPUS)
7285 			return -EINVAL;
7286 #endif
7287 		if (tr->allocated_snapshot)
7288 			ret = resize_buffer_duplicate_size(&tr->max_buffer,
7289 					&tr->array_buffer, iter->cpu_file);
7290 
7291 		ret = tracing_arm_snapshot_locked(tr);
7292 		if (ret)
7293 			return ret;
7294 
7295 		/* Now, we're going to swap */
7296 		if (iter->cpu_file == RING_BUFFER_ALL_CPUS) {
7297 			local_irq_disable();
7298 			update_max_tr(tr, current, smp_processor_id(), NULL);
7299 			local_irq_enable();
7300 		} else {
7301 			smp_call_function_single(iter->cpu_file, tracing_swap_cpu_buffer,
7302 						 (void *)tr, 1);
7303 		}
7304 		tracing_disarm_snapshot(tr);
7305 		break;
7306 	default:
7307 		if (tr->allocated_snapshot) {
7308 			if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
7309 				tracing_reset_online_cpus(&tr->max_buffer);
7310 			else
7311 				tracing_reset_cpu(&tr->max_buffer, iter->cpu_file);
7312 		}
7313 		break;
7314 	}
7315 
7316 	if (ret >= 0) {
7317 		*ppos += cnt;
7318 		ret = cnt;
7319 	}
7320 
7321 	return ret;
7322 }
7323 
7324 static int tracing_snapshot_release(struct inode *inode, struct file *file)
7325 {
7326 	struct seq_file *m = file->private_data;
7327 	int ret;
7328 
7329 	ret = tracing_release(inode, file);
7330 
7331 	if (file->f_mode & FMODE_READ)
7332 		return ret;
7333 
7334 	/* If write only, the seq_file is just a stub */
7335 	if (m)
7336 		kfree(m->private);
7337 	kfree(m);
7338 
7339 	return 0;
7340 }
7341 
7342 static int tracing_buffers_open(struct inode *inode, struct file *filp);
7343 static ssize_t tracing_buffers_read(struct file *filp, char __user *ubuf,
7344 				    size_t count, loff_t *ppos);
7345 static int tracing_buffers_release(struct inode *inode, struct file *file);
7346 static ssize_t tracing_buffers_splice_read(struct file *file, loff_t *ppos,
7347 		   struct pipe_inode_info *pipe, size_t len, unsigned int flags);
7348 
7349 static int snapshot_raw_open(struct inode *inode, struct file *filp)
7350 {
7351 	struct ftrace_buffer_info *info;
7352 	int ret;
7353 
7354 	/* The following checks for tracefs lockdown */
7355 	ret = tracing_buffers_open(inode, filp);
7356 	if (ret < 0)
7357 		return ret;
7358 
7359 	info = filp->private_data;
7360 
7361 	if (info->iter.trace->use_max_tr) {
7362 		tracing_buffers_release(inode, filp);
7363 		return -EBUSY;
7364 	}
7365 
7366 	info->iter.snapshot = true;
7367 	info->iter.array_buffer = &info->iter.tr->max_buffer;
7368 
7369 	return ret;
7370 }
7371 
7372 #endif /* CONFIG_TRACER_SNAPSHOT */
7373 
7374 
7375 static const struct file_operations tracing_thresh_fops = {
7376 	.open		= tracing_open_generic,
7377 	.read		= tracing_thresh_read,
7378 	.write		= tracing_thresh_write,
7379 	.llseek		= generic_file_llseek,
7380 };
7381 
7382 #ifdef CONFIG_TRACER_MAX_TRACE
7383 static const struct file_operations tracing_max_lat_fops = {
7384 	.open		= tracing_open_generic_tr,
7385 	.read		= tracing_max_lat_read,
7386 	.write		= tracing_max_lat_write,
7387 	.llseek		= generic_file_llseek,
7388 	.release	= tracing_release_generic_tr,
7389 };
7390 #endif
7391 
7392 static const struct file_operations set_tracer_fops = {
7393 	.open		= tracing_open_generic_tr,
7394 	.read		= tracing_set_trace_read,
7395 	.write		= tracing_set_trace_write,
7396 	.llseek		= generic_file_llseek,
7397 	.release	= tracing_release_generic_tr,
7398 };
7399 
7400 static const struct file_operations tracing_pipe_fops = {
7401 	.open		= tracing_open_pipe,
7402 	.poll		= tracing_poll_pipe,
7403 	.read		= tracing_read_pipe,
7404 	.splice_read	= tracing_splice_read_pipe,
7405 	.release	= tracing_release_pipe,
7406 };
7407 
7408 static const struct file_operations tracing_entries_fops = {
7409 	.open		= tracing_open_generic_tr,
7410 	.read		= tracing_entries_read,
7411 	.write		= tracing_entries_write,
7412 	.llseek		= generic_file_llseek,
7413 	.release	= tracing_release_generic_tr,
7414 };
7415 
7416 static const struct file_operations tracing_buffer_meta_fops = {
7417 	.open		= tracing_buffer_meta_open,
7418 	.read		= seq_read,
7419 	.llseek		= seq_lseek,
7420 	.release	= tracing_seq_release,
7421 };
7422 
7423 static const struct file_operations tracing_total_entries_fops = {
7424 	.open		= tracing_open_generic_tr,
7425 	.read		= tracing_total_entries_read,
7426 	.llseek		= generic_file_llseek,
7427 	.release	= tracing_release_generic_tr,
7428 };
7429 
7430 static const struct file_operations tracing_free_buffer_fops = {
7431 	.open		= tracing_open_generic_tr,
7432 	.write		= tracing_free_buffer_write,
7433 	.release	= tracing_free_buffer_release,
7434 };
7435 
7436 static const struct file_operations tracing_mark_fops = {
7437 	.open		= tracing_mark_open,
7438 	.write		= tracing_mark_write,
7439 	.release	= tracing_release_generic_tr,
7440 };
7441 
7442 static const struct file_operations tracing_mark_raw_fops = {
7443 	.open		= tracing_mark_open,
7444 	.write		= tracing_mark_raw_write,
7445 	.release	= tracing_release_generic_tr,
7446 };
7447 
7448 static const struct file_operations trace_clock_fops = {
7449 	.open		= tracing_clock_open,
7450 	.read		= seq_read,
7451 	.llseek		= seq_lseek,
7452 	.release	= tracing_single_release_tr,
7453 	.write		= tracing_clock_write,
7454 };
7455 
7456 static const struct file_operations trace_time_stamp_mode_fops = {
7457 	.open		= tracing_time_stamp_mode_open,
7458 	.read		= seq_read,
7459 	.llseek		= seq_lseek,
7460 	.release	= tracing_single_release_tr,
7461 };
7462 
7463 static const struct file_operations last_boot_fops = {
7464 	.open		= tracing_open_generic_tr,
7465 	.read		= tracing_last_boot_read,
7466 	.llseek		= generic_file_llseek,
7467 	.release	= tracing_release_generic_tr,
7468 };
7469 
7470 #ifdef CONFIG_TRACER_SNAPSHOT
7471 static const struct file_operations snapshot_fops = {
7472 	.open		= tracing_snapshot_open,
7473 	.read		= seq_read,
7474 	.write		= tracing_snapshot_write,
7475 	.llseek		= tracing_lseek,
7476 	.release	= tracing_snapshot_release,
7477 };
7478 
7479 static const struct file_operations snapshot_raw_fops = {
7480 	.open		= snapshot_raw_open,
7481 	.read		= tracing_buffers_read,
7482 	.release	= tracing_buffers_release,
7483 	.splice_read	= tracing_buffers_splice_read,
7484 };
7485 
7486 #endif /* CONFIG_TRACER_SNAPSHOT */
7487 
7488 /*
7489  * trace_min_max_write - Write a u64 value to a trace_min_max_param struct
7490  * @filp: The active open file structure
7491  * @ubuf: The userspace provided buffer to read value into
7492  * @cnt: The maximum number of bytes to read
7493  * @ppos: The current "file" position
7494  *
7495  * This function implements the write interface for a struct trace_min_max_param.
7496  * The filp->private_data must point to a trace_min_max_param structure that
7497  * defines where to write the value, the min and the max acceptable values,
7498  * and a lock to protect the write.
7499  */
7500 static ssize_t
7501 trace_min_max_write(struct file *filp, const char __user *ubuf, size_t cnt, loff_t *ppos)
7502 {
7503 	struct trace_min_max_param *param = filp->private_data;
7504 	u64 val;
7505 	int err;
7506 
7507 	if (!param)
7508 		return -EFAULT;
7509 
7510 	err = kstrtoull_from_user(ubuf, cnt, 10, &val);
7511 	if (err)
7512 		return err;
7513 
7514 	if (param->lock)
7515 		mutex_lock(param->lock);
7516 
7517 	if (param->min && val < *param->min)
7518 		err = -EINVAL;
7519 
7520 	if (param->max && val > *param->max)
7521 		err = -EINVAL;
7522 
7523 	if (!err)
7524 		*param->val = val;
7525 
7526 	if (param->lock)
7527 		mutex_unlock(param->lock);
7528 
7529 	if (err)
7530 		return err;
7531 
7532 	return cnt;
7533 }
7534 
7535 /*
7536  * trace_min_max_read - Read a u64 value from a trace_min_max_param struct
7537  * @filp: The active open file structure
7538  * @ubuf: The userspace provided buffer to read value into
7539  * @cnt: The maximum number of bytes to read
7540  * @ppos: The current "file" position
7541  *
7542  * This function implements the read interface for a struct trace_min_max_param.
7543  * The filp->private_data must point to a trace_min_max_param struct with valid
7544  * data.
7545  */
7546 static ssize_t
7547 trace_min_max_read(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos)
7548 {
7549 	struct trace_min_max_param *param = filp->private_data;
7550 	char buf[U64_STR_SIZE];
7551 	int len;
7552 	u64 val;
7553 
7554 	if (!param)
7555 		return -EFAULT;
7556 
7557 	val = *param->val;
7558 
7559 	if (cnt > sizeof(buf))
7560 		cnt = sizeof(buf);
7561 
7562 	len = snprintf(buf, sizeof(buf), "%llu\n", val);
7563 
7564 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, len);
7565 }
7566 
7567 const struct file_operations trace_min_max_fops = {
7568 	.open		= tracing_open_generic,
7569 	.read		= trace_min_max_read,
7570 	.write		= trace_min_max_write,
7571 };
7572 
7573 #define TRACING_LOG_ERRS_MAX	8
7574 #define TRACING_LOG_LOC_MAX	128
7575 
7576 #define CMD_PREFIX "  Command: "
7577 
7578 struct err_info {
7579 	const char	**errs;	/* ptr to loc-specific array of err strings */
7580 	u8		type;	/* index into errs -> specific err string */
7581 	u16		pos;	/* caret position */
7582 	u64		ts;
7583 };
7584 
7585 struct tracing_log_err {
7586 	struct list_head	list;
7587 	struct err_info		info;
7588 	char			loc[TRACING_LOG_LOC_MAX]; /* err location */
7589 	char			*cmd;                     /* what caused err */
7590 };
7591 
7592 static DEFINE_MUTEX(tracing_err_log_lock);
7593 
7594 static struct tracing_log_err *alloc_tracing_log_err(int len)
7595 {
7596 	struct tracing_log_err *err;
7597 
7598 	err = kzalloc(sizeof(*err), GFP_KERNEL);
7599 	if (!err)
7600 		return ERR_PTR(-ENOMEM);
7601 
7602 	err->cmd = kzalloc(len, GFP_KERNEL);
7603 	if (!err->cmd) {
7604 		kfree(err);
7605 		return ERR_PTR(-ENOMEM);
7606 	}
7607 
7608 	return err;
7609 }
7610 
7611 static void free_tracing_log_err(struct tracing_log_err *err)
7612 {
7613 	kfree(err->cmd);
7614 	kfree(err);
7615 }
7616 
7617 static struct tracing_log_err *get_tracing_log_err(struct trace_array *tr,
7618 						   int len)
7619 {
7620 	struct tracing_log_err *err;
7621 	char *cmd;
7622 
7623 	if (tr->n_err_log_entries < TRACING_LOG_ERRS_MAX) {
7624 		err = alloc_tracing_log_err(len);
7625 		if (PTR_ERR(err) != -ENOMEM)
7626 			tr->n_err_log_entries++;
7627 
7628 		return err;
7629 	}
7630 	cmd = kzalloc(len, GFP_KERNEL);
7631 	if (!cmd)
7632 		return ERR_PTR(-ENOMEM);
7633 	err = list_first_entry(&tr->err_log, struct tracing_log_err, list);
7634 	kfree(err->cmd);
7635 	err->cmd = cmd;
7636 	list_del(&err->list);
7637 
7638 	return err;
7639 }
7640 
7641 /**
7642  * err_pos - find the position of a string within a command for error careting
7643  * @cmd: The tracing command that caused the error
7644  * @str: The string to position the caret at within @cmd
7645  *
7646  * Finds the position of the first occurrence of @str within @cmd.  The
7647  * return value can be passed to tracing_log_err() for caret placement
7648  * within @cmd.
7649  *
7650  * Returns the index within @cmd of the first occurrence of @str or 0
7651  * if @str was not found.
7652  */
7653 unsigned int err_pos(char *cmd, const char *str)
7654 {
7655 	char *found;
7656 
7657 	if (WARN_ON(!strlen(cmd)))
7658 		return 0;
7659 
7660 	found = strstr(cmd, str);
7661 	if (found)
7662 		return found - cmd;
7663 
7664 	return 0;
7665 }
7666 
7667 /**
7668  * tracing_log_err - write an error to the tracing error log
7669  * @tr: The associated trace array for the error (NULL for top level array)
7670  * @loc: A string describing where the error occurred
7671  * @cmd: The tracing command that caused the error
7672  * @errs: The array of loc-specific static error strings
7673  * @type: The index into errs[], which produces the specific static err string
7674  * @pos: The position the caret should be placed in the cmd
7675  *
7676  * Writes an error into tracing/error_log of the form:
7677  *
7678  * <loc>: error: <text>
7679  *   Command: <cmd>
7680  *              ^
7681  *
7682  * tracing/error_log is a small log file containing the last
7683  * TRACING_LOG_ERRS_MAX errors (8).  Memory for errors isn't allocated
7684  * unless there has been a tracing error, and the error log can be
7685  * cleared and have its memory freed by writing the empty string in
7686  * truncation mode to it i.e. echo > tracing/error_log.
7687  *
7688  * NOTE: the @errs array along with the @type param are used to
7689  * produce a static error string - this string is not copied and saved
7690  * when the error is logged - only a pointer to it is saved.  See
7691  * existing callers for examples of how static strings are typically
7692  * defined for use with tracing_log_err().
7693  */
7694 void tracing_log_err(struct trace_array *tr,
7695 		     const char *loc, const char *cmd,
7696 		     const char **errs, u8 type, u16 pos)
7697 {
7698 	struct tracing_log_err *err;
7699 	int len = 0;
7700 
7701 	if (!tr)
7702 		tr = &global_trace;
7703 
7704 	len += sizeof(CMD_PREFIX) + 2 * sizeof("\n") + strlen(cmd) + 1;
7705 
7706 	guard(mutex)(&tracing_err_log_lock);
7707 
7708 	err = get_tracing_log_err(tr, len);
7709 	if (PTR_ERR(err) == -ENOMEM)
7710 		return;
7711 
7712 	snprintf(err->loc, TRACING_LOG_LOC_MAX, "%s: error: ", loc);
7713 	snprintf(err->cmd, len, "\n" CMD_PREFIX "%s\n", cmd);
7714 
7715 	err->info.errs = errs;
7716 	err->info.type = type;
7717 	err->info.pos = pos;
7718 	err->info.ts = local_clock();
7719 
7720 	list_add_tail(&err->list, &tr->err_log);
7721 }
7722 
7723 static void clear_tracing_err_log(struct trace_array *tr)
7724 {
7725 	struct tracing_log_err *err, *next;
7726 
7727 	mutex_lock(&tracing_err_log_lock);
7728 	list_for_each_entry_safe(err, next, &tr->err_log, list) {
7729 		list_del(&err->list);
7730 		free_tracing_log_err(err);
7731 	}
7732 
7733 	tr->n_err_log_entries = 0;
7734 	mutex_unlock(&tracing_err_log_lock);
7735 }
7736 
7737 static void *tracing_err_log_seq_start(struct seq_file *m, loff_t *pos)
7738 {
7739 	struct trace_array *tr = m->private;
7740 
7741 	mutex_lock(&tracing_err_log_lock);
7742 
7743 	return seq_list_start(&tr->err_log, *pos);
7744 }
7745 
7746 static void *tracing_err_log_seq_next(struct seq_file *m, void *v, loff_t *pos)
7747 {
7748 	struct trace_array *tr = m->private;
7749 
7750 	return seq_list_next(v, &tr->err_log, pos);
7751 }
7752 
7753 static void tracing_err_log_seq_stop(struct seq_file *m, void *v)
7754 {
7755 	mutex_unlock(&tracing_err_log_lock);
7756 }
7757 
7758 static void tracing_err_log_show_pos(struct seq_file *m, u16 pos)
7759 {
7760 	u16 i;
7761 
7762 	for (i = 0; i < sizeof(CMD_PREFIX) - 1; i++)
7763 		seq_putc(m, ' ');
7764 	for (i = 0; i < pos; i++)
7765 		seq_putc(m, ' ');
7766 	seq_puts(m, "^\n");
7767 }
7768 
7769 static int tracing_err_log_seq_show(struct seq_file *m, void *v)
7770 {
7771 	struct tracing_log_err *err = v;
7772 
7773 	if (err) {
7774 		const char *err_text = err->info.errs[err->info.type];
7775 		u64 sec = err->info.ts;
7776 		u32 nsec;
7777 
7778 		nsec = do_div(sec, NSEC_PER_SEC);
7779 		seq_printf(m, "[%5llu.%06u] %s%s", sec, nsec / 1000,
7780 			   err->loc, err_text);
7781 		seq_printf(m, "%s", err->cmd);
7782 		tracing_err_log_show_pos(m, err->info.pos);
7783 	}
7784 
7785 	return 0;
7786 }
7787 
7788 static const struct seq_operations tracing_err_log_seq_ops = {
7789 	.start  = tracing_err_log_seq_start,
7790 	.next   = tracing_err_log_seq_next,
7791 	.stop   = tracing_err_log_seq_stop,
7792 	.show   = tracing_err_log_seq_show
7793 };
7794 
7795 static int tracing_err_log_open(struct inode *inode, struct file *file)
7796 {
7797 	struct trace_array *tr = inode->i_private;
7798 	int ret = 0;
7799 
7800 	ret = tracing_check_open_get_tr(tr);
7801 	if (ret)
7802 		return ret;
7803 
7804 	/* If this file was opened for write, then erase contents */
7805 	if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC))
7806 		clear_tracing_err_log(tr);
7807 
7808 	if (file->f_mode & FMODE_READ) {
7809 		ret = seq_open(file, &tracing_err_log_seq_ops);
7810 		if (!ret) {
7811 			struct seq_file *m = file->private_data;
7812 			m->private = tr;
7813 		} else {
7814 			trace_array_put(tr);
7815 		}
7816 	}
7817 	return ret;
7818 }
7819 
7820 static ssize_t tracing_err_log_write(struct file *file,
7821 				     const char __user *buffer,
7822 				     size_t count, loff_t *ppos)
7823 {
7824 	return count;
7825 }
7826 
7827 static int tracing_err_log_release(struct inode *inode, struct file *file)
7828 {
7829 	struct trace_array *tr = inode->i_private;
7830 
7831 	trace_array_put(tr);
7832 
7833 	if (file->f_mode & FMODE_READ)
7834 		seq_release(inode, file);
7835 
7836 	return 0;
7837 }
7838 
7839 static const struct file_operations tracing_err_log_fops = {
7840 	.open           = tracing_err_log_open,
7841 	.write		= tracing_err_log_write,
7842 	.read           = seq_read,
7843 	.llseek         = tracing_lseek,
7844 	.release        = tracing_err_log_release,
7845 };
7846 
7847 static int tracing_buffers_open(struct inode *inode, struct file *filp)
7848 {
7849 	struct trace_array *tr = inode->i_private;
7850 	struct ftrace_buffer_info *info;
7851 	int ret;
7852 
7853 	ret = tracing_check_open_get_tr(tr);
7854 	if (ret)
7855 		return ret;
7856 
7857 	info = kvzalloc(sizeof(*info), GFP_KERNEL);
7858 	if (!info) {
7859 		trace_array_put(tr);
7860 		return -ENOMEM;
7861 	}
7862 
7863 	mutex_lock(&trace_types_lock);
7864 
7865 	info->iter.tr		= tr;
7866 	info->iter.cpu_file	= tracing_get_cpu(inode);
7867 	info->iter.trace	= tr->current_trace;
7868 	info->iter.array_buffer = &tr->array_buffer;
7869 	info->spare		= NULL;
7870 	/* Force reading ring buffer for first read */
7871 	info->read		= (unsigned int)-1;
7872 
7873 	filp->private_data = info;
7874 
7875 	tr->trace_ref++;
7876 
7877 	mutex_unlock(&trace_types_lock);
7878 
7879 	ret = nonseekable_open(inode, filp);
7880 	if (ret < 0)
7881 		trace_array_put(tr);
7882 
7883 	return ret;
7884 }
7885 
7886 static __poll_t
7887 tracing_buffers_poll(struct file *filp, poll_table *poll_table)
7888 {
7889 	struct ftrace_buffer_info *info = filp->private_data;
7890 	struct trace_iterator *iter = &info->iter;
7891 
7892 	return trace_poll(iter, filp, poll_table);
7893 }
7894 
7895 static ssize_t
7896 tracing_buffers_read(struct file *filp, char __user *ubuf,
7897 		     size_t count, loff_t *ppos)
7898 {
7899 	struct ftrace_buffer_info *info = filp->private_data;
7900 	struct trace_iterator *iter = &info->iter;
7901 	void *trace_data;
7902 	int page_size;
7903 	ssize_t ret = 0;
7904 	ssize_t size;
7905 
7906 	if (!count)
7907 		return 0;
7908 
7909 #ifdef CONFIG_TRACER_MAX_TRACE
7910 	if (iter->snapshot && iter->tr->current_trace->use_max_tr)
7911 		return -EBUSY;
7912 #endif
7913 
7914 	page_size = ring_buffer_subbuf_size_get(iter->array_buffer->buffer);
7915 
7916 	/* Make sure the spare matches the current sub buffer size */
7917 	if (info->spare) {
7918 		if (page_size != info->spare_size) {
7919 			ring_buffer_free_read_page(iter->array_buffer->buffer,
7920 						   info->spare_cpu, info->spare);
7921 			info->spare = NULL;
7922 		}
7923 	}
7924 
7925 	if (!info->spare) {
7926 		info->spare = ring_buffer_alloc_read_page(iter->array_buffer->buffer,
7927 							  iter->cpu_file);
7928 		if (IS_ERR(info->spare)) {
7929 			ret = PTR_ERR(info->spare);
7930 			info->spare = NULL;
7931 		} else {
7932 			info->spare_cpu = iter->cpu_file;
7933 			info->spare_size = page_size;
7934 		}
7935 	}
7936 	if (!info->spare)
7937 		return ret;
7938 
7939 	/* Do we have previous read data to read? */
7940 	if (info->read < page_size)
7941 		goto read;
7942 
7943  again:
7944 	trace_access_lock(iter->cpu_file);
7945 	ret = ring_buffer_read_page(iter->array_buffer->buffer,
7946 				    info->spare,
7947 				    count,
7948 				    iter->cpu_file, 0);
7949 	trace_access_unlock(iter->cpu_file);
7950 
7951 	if (ret < 0) {
7952 		if (trace_empty(iter) && !iter->closed) {
7953 			if ((filp->f_flags & O_NONBLOCK))
7954 				return -EAGAIN;
7955 
7956 			ret = wait_on_pipe(iter, 0);
7957 			if (ret)
7958 				return ret;
7959 
7960 			goto again;
7961 		}
7962 		return 0;
7963 	}
7964 
7965 	info->read = 0;
7966  read:
7967 	size = page_size - info->read;
7968 	if (size > count)
7969 		size = count;
7970 	trace_data = ring_buffer_read_page_data(info->spare);
7971 	ret = copy_to_user(ubuf, trace_data + info->read, size);
7972 	if (ret == size)
7973 		return -EFAULT;
7974 
7975 	size -= ret;
7976 
7977 	*ppos += size;
7978 	info->read += size;
7979 
7980 	return size;
7981 }
7982 
7983 static int tracing_buffers_flush(struct file *file, fl_owner_t id)
7984 {
7985 	struct ftrace_buffer_info *info = file->private_data;
7986 	struct trace_iterator *iter = &info->iter;
7987 
7988 	iter->closed = true;
7989 	/* Make sure the waiters see the new wait_index */
7990 	(void)atomic_fetch_inc_release(&iter->wait_index);
7991 
7992 	ring_buffer_wake_waiters(iter->array_buffer->buffer, iter->cpu_file);
7993 
7994 	return 0;
7995 }
7996 
7997 static int tracing_buffers_release(struct inode *inode, struct file *file)
7998 {
7999 	struct ftrace_buffer_info *info = file->private_data;
8000 	struct trace_iterator *iter = &info->iter;
8001 
8002 	mutex_lock(&trace_types_lock);
8003 
8004 	iter->tr->trace_ref--;
8005 
8006 	__trace_array_put(iter->tr);
8007 
8008 	if (info->spare)
8009 		ring_buffer_free_read_page(iter->array_buffer->buffer,
8010 					   info->spare_cpu, info->spare);
8011 	kvfree(info);
8012 
8013 	mutex_unlock(&trace_types_lock);
8014 
8015 	return 0;
8016 }
8017 
8018 struct buffer_ref {
8019 	struct trace_buffer	*buffer;
8020 	void			*page;
8021 	int			cpu;
8022 	refcount_t		refcount;
8023 };
8024 
8025 static void buffer_ref_release(struct buffer_ref *ref)
8026 {
8027 	if (!refcount_dec_and_test(&ref->refcount))
8028 		return;
8029 	ring_buffer_free_read_page(ref->buffer, ref->cpu, ref->page);
8030 	kfree(ref);
8031 }
8032 
8033 static void buffer_pipe_buf_release(struct pipe_inode_info *pipe,
8034 				    struct pipe_buffer *buf)
8035 {
8036 	struct buffer_ref *ref = (struct buffer_ref *)buf->private;
8037 
8038 	buffer_ref_release(ref);
8039 	buf->private = 0;
8040 }
8041 
8042 static bool buffer_pipe_buf_get(struct pipe_inode_info *pipe,
8043 				struct pipe_buffer *buf)
8044 {
8045 	struct buffer_ref *ref = (struct buffer_ref *)buf->private;
8046 
8047 	if (refcount_read(&ref->refcount) > INT_MAX/2)
8048 		return false;
8049 
8050 	refcount_inc(&ref->refcount);
8051 	return true;
8052 }
8053 
8054 /* Pipe buffer operations for a buffer. */
8055 static const struct pipe_buf_operations buffer_pipe_buf_ops = {
8056 	.release		= buffer_pipe_buf_release,
8057 	.get			= buffer_pipe_buf_get,
8058 };
8059 
8060 /*
8061  * Callback from splice_to_pipe(), if we need to release some pages
8062  * at the end of the spd in case we error'ed out in filling the pipe.
8063  */
8064 static void buffer_spd_release(struct splice_pipe_desc *spd, unsigned int i)
8065 {
8066 	struct buffer_ref *ref =
8067 		(struct buffer_ref *)spd->partial[i].private;
8068 
8069 	buffer_ref_release(ref);
8070 	spd->partial[i].private = 0;
8071 }
8072 
8073 static ssize_t
8074 tracing_buffers_splice_read(struct file *file, loff_t *ppos,
8075 			    struct pipe_inode_info *pipe, size_t len,
8076 			    unsigned int flags)
8077 {
8078 	struct ftrace_buffer_info *info = file->private_data;
8079 	struct trace_iterator *iter = &info->iter;
8080 	struct partial_page partial_def[PIPE_DEF_BUFFERS];
8081 	struct page *pages_def[PIPE_DEF_BUFFERS];
8082 	struct splice_pipe_desc spd = {
8083 		.pages		= pages_def,
8084 		.partial	= partial_def,
8085 		.nr_pages_max	= PIPE_DEF_BUFFERS,
8086 		.ops		= &buffer_pipe_buf_ops,
8087 		.spd_release	= buffer_spd_release,
8088 	};
8089 	struct buffer_ref *ref;
8090 	bool woken = false;
8091 	int page_size;
8092 	int entries, i;
8093 	ssize_t ret = 0;
8094 
8095 #ifdef CONFIG_TRACER_MAX_TRACE
8096 	if (iter->snapshot && iter->tr->current_trace->use_max_tr)
8097 		return -EBUSY;
8098 #endif
8099 
8100 	page_size = ring_buffer_subbuf_size_get(iter->array_buffer->buffer);
8101 	if (*ppos & (page_size - 1))
8102 		return -EINVAL;
8103 
8104 	if (len & (page_size - 1)) {
8105 		if (len < page_size)
8106 			return -EINVAL;
8107 		len &= (~(page_size - 1));
8108 	}
8109 
8110 	if (splice_grow_spd(pipe, &spd))
8111 		return -ENOMEM;
8112 
8113  again:
8114 	trace_access_lock(iter->cpu_file);
8115 	entries = ring_buffer_entries_cpu(iter->array_buffer->buffer, iter->cpu_file);
8116 
8117 	for (i = 0; i < spd.nr_pages_max && len && entries; i++, len -= page_size) {
8118 		struct page *page;
8119 		int r;
8120 
8121 		ref = kzalloc(sizeof(*ref), GFP_KERNEL);
8122 		if (!ref) {
8123 			ret = -ENOMEM;
8124 			break;
8125 		}
8126 
8127 		refcount_set(&ref->refcount, 1);
8128 		ref->buffer = iter->array_buffer->buffer;
8129 		ref->page = ring_buffer_alloc_read_page(ref->buffer, iter->cpu_file);
8130 		if (IS_ERR(ref->page)) {
8131 			ret = PTR_ERR(ref->page);
8132 			ref->page = NULL;
8133 			kfree(ref);
8134 			break;
8135 		}
8136 		ref->cpu = iter->cpu_file;
8137 
8138 		r = ring_buffer_read_page(ref->buffer, ref->page,
8139 					  len, iter->cpu_file, 1);
8140 		if (r < 0) {
8141 			ring_buffer_free_read_page(ref->buffer, ref->cpu,
8142 						   ref->page);
8143 			kfree(ref);
8144 			break;
8145 		}
8146 
8147 		page = virt_to_page(ring_buffer_read_page_data(ref->page));
8148 
8149 		spd.pages[i] = page;
8150 		spd.partial[i].len = page_size;
8151 		spd.partial[i].offset = 0;
8152 		spd.partial[i].private = (unsigned long)ref;
8153 		spd.nr_pages++;
8154 		*ppos += page_size;
8155 
8156 		entries = ring_buffer_entries_cpu(iter->array_buffer->buffer, iter->cpu_file);
8157 	}
8158 
8159 	trace_access_unlock(iter->cpu_file);
8160 	spd.nr_pages = i;
8161 
8162 	/* did we read anything? */
8163 	if (!spd.nr_pages) {
8164 
8165 		if (ret)
8166 			goto out;
8167 
8168 		if (woken)
8169 			goto out;
8170 
8171 		ret = -EAGAIN;
8172 		if ((file->f_flags & O_NONBLOCK) || (flags & SPLICE_F_NONBLOCK))
8173 			goto out;
8174 
8175 		ret = wait_on_pipe(iter, iter->snapshot ? 0 : iter->tr->buffer_percent);
8176 		if (ret)
8177 			goto out;
8178 
8179 		/* No need to wait after waking up when tracing is off */
8180 		if (!tracer_tracing_is_on(iter->tr))
8181 			goto out;
8182 
8183 		/* Iterate one more time to collect any new data then exit */
8184 		woken = true;
8185 
8186 		goto again;
8187 	}
8188 
8189 	ret = splice_to_pipe(pipe, &spd);
8190 out:
8191 	splice_shrink_spd(&spd);
8192 
8193 	return ret;
8194 }
8195 
8196 static long tracing_buffers_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
8197 {
8198 	struct ftrace_buffer_info *info = file->private_data;
8199 	struct trace_iterator *iter = &info->iter;
8200 	int err;
8201 
8202 	if (cmd == TRACE_MMAP_IOCTL_GET_READER) {
8203 		if (!(file->f_flags & O_NONBLOCK)) {
8204 			err = ring_buffer_wait(iter->array_buffer->buffer,
8205 					       iter->cpu_file,
8206 					       iter->tr->buffer_percent,
8207 					       NULL, NULL);
8208 			if (err)
8209 				return err;
8210 		}
8211 
8212 		return ring_buffer_map_get_reader(iter->array_buffer->buffer,
8213 						  iter->cpu_file);
8214 	} else if (cmd) {
8215 		return -ENOTTY;
8216 	}
8217 
8218 	/*
8219 	 * An ioctl call with cmd 0 to the ring buffer file will wake up all
8220 	 * waiters
8221 	 */
8222 	mutex_lock(&trace_types_lock);
8223 
8224 	/* Make sure the waiters see the new wait_index */
8225 	(void)atomic_fetch_inc_release(&iter->wait_index);
8226 
8227 	ring_buffer_wake_waiters(iter->array_buffer->buffer, iter->cpu_file);
8228 
8229 	mutex_unlock(&trace_types_lock);
8230 	return 0;
8231 }
8232 
8233 #ifdef CONFIG_TRACER_MAX_TRACE
8234 static int get_snapshot_map(struct trace_array *tr)
8235 {
8236 	int err = 0;
8237 
8238 	/*
8239 	 * Called with mmap_lock held. lockdep would be unhappy if we would now
8240 	 * take trace_types_lock. Instead use the specific
8241 	 * snapshot_trigger_lock.
8242 	 */
8243 	spin_lock(&tr->snapshot_trigger_lock);
8244 
8245 	if (tr->snapshot || tr->mapped == UINT_MAX)
8246 		err = -EBUSY;
8247 	else
8248 		tr->mapped++;
8249 
8250 	spin_unlock(&tr->snapshot_trigger_lock);
8251 
8252 	/* Wait for update_max_tr() to observe iter->tr->mapped */
8253 	if (tr->mapped == 1)
8254 		synchronize_rcu();
8255 
8256 	return err;
8257 
8258 }
8259 static void put_snapshot_map(struct trace_array *tr)
8260 {
8261 	spin_lock(&tr->snapshot_trigger_lock);
8262 	if (!WARN_ON(!tr->mapped))
8263 		tr->mapped--;
8264 	spin_unlock(&tr->snapshot_trigger_lock);
8265 }
8266 #else
8267 static inline int get_snapshot_map(struct trace_array *tr) { return 0; }
8268 static inline void put_snapshot_map(struct trace_array *tr) { }
8269 #endif
8270 
8271 static void tracing_buffers_mmap_close(struct vm_area_struct *vma)
8272 {
8273 	struct ftrace_buffer_info *info = vma->vm_file->private_data;
8274 	struct trace_iterator *iter = &info->iter;
8275 
8276 	WARN_ON(ring_buffer_unmap(iter->array_buffer->buffer, iter->cpu_file));
8277 	put_snapshot_map(iter->tr);
8278 }
8279 
8280 static const struct vm_operations_struct tracing_buffers_vmops = {
8281 	.close		= tracing_buffers_mmap_close,
8282 };
8283 
8284 static int tracing_buffers_mmap(struct file *filp, struct vm_area_struct *vma)
8285 {
8286 	struct ftrace_buffer_info *info = filp->private_data;
8287 	struct trace_iterator *iter = &info->iter;
8288 	int ret = 0;
8289 
8290 	/* Currently the boot mapped buffer is not supported for mmap */
8291 	if (iter->tr->flags & TRACE_ARRAY_FL_BOOT)
8292 		return -ENODEV;
8293 
8294 	ret = get_snapshot_map(iter->tr);
8295 	if (ret)
8296 		return ret;
8297 
8298 	ret = ring_buffer_map(iter->array_buffer->buffer, iter->cpu_file, vma);
8299 	if (ret)
8300 		put_snapshot_map(iter->tr);
8301 
8302 	vma->vm_ops = &tracing_buffers_vmops;
8303 
8304 	return ret;
8305 }
8306 
8307 static const struct file_operations tracing_buffers_fops = {
8308 	.open		= tracing_buffers_open,
8309 	.read		= tracing_buffers_read,
8310 	.poll		= tracing_buffers_poll,
8311 	.release	= tracing_buffers_release,
8312 	.flush		= tracing_buffers_flush,
8313 	.splice_read	= tracing_buffers_splice_read,
8314 	.unlocked_ioctl = tracing_buffers_ioctl,
8315 	.mmap		= tracing_buffers_mmap,
8316 };
8317 
8318 static ssize_t
8319 tracing_stats_read(struct file *filp, char __user *ubuf,
8320 		   size_t count, loff_t *ppos)
8321 {
8322 	struct inode *inode = file_inode(filp);
8323 	struct trace_array *tr = inode->i_private;
8324 	struct array_buffer *trace_buf = &tr->array_buffer;
8325 	int cpu = tracing_get_cpu(inode);
8326 	struct trace_seq *s;
8327 	unsigned long cnt;
8328 	unsigned long long t;
8329 	unsigned long usec_rem;
8330 
8331 	s = kmalloc(sizeof(*s), GFP_KERNEL);
8332 	if (!s)
8333 		return -ENOMEM;
8334 
8335 	trace_seq_init(s);
8336 
8337 	cnt = ring_buffer_entries_cpu(trace_buf->buffer, cpu);
8338 	trace_seq_printf(s, "entries: %ld\n", cnt);
8339 
8340 	cnt = ring_buffer_overrun_cpu(trace_buf->buffer, cpu);
8341 	trace_seq_printf(s, "overrun: %ld\n", cnt);
8342 
8343 	cnt = ring_buffer_commit_overrun_cpu(trace_buf->buffer, cpu);
8344 	trace_seq_printf(s, "commit overrun: %ld\n", cnt);
8345 
8346 	cnt = ring_buffer_bytes_cpu(trace_buf->buffer, cpu);
8347 	trace_seq_printf(s, "bytes: %ld\n", cnt);
8348 
8349 	if (trace_clocks[tr->clock_id].in_ns) {
8350 		/* local or global for trace_clock */
8351 		t = ns2usecs(ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
8352 		usec_rem = do_div(t, USEC_PER_SEC);
8353 		trace_seq_printf(s, "oldest event ts: %5llu.%06lu\n",
8354 								t, usec_rem);
8355 
8356 		t = ns2usecs(ring_buffer_time_stamp(trace_buf->buffer));
8357 		usec_rem = do_div(t, USEC_PER_SEC);
8358 		trace_seq_printf(s, "now ts: %5llu.%06lu\n", t, usec_rem);
8359 	} else {
8360 		/* counter or tsc mode for trace_clock */
8361 		trace_seq_printf(s, "oldest event ts: %llu\n",
8362 				ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
8363 
8364 		trace_seq_printf(s, "now ts: %llu\n",
8365 				ring_buffer_time_stamp(trace_buf->buffer));
8366 	}
8367 
8368 	cnt = ring_buffer_dropped_events_cpu(trace_buf->buffer, cpu);
8369 	trace_seq_printf(s, "dropped events: %ld\n", cnt);
8370 
8371 	cnt = ring_buffer_read_events_cpu(trace_buf->buffer, cpu);
8372 	trace_seq_printf(s, "read events: %ld\n", cnt);
8373 
8374 	count = simple_read_from_buffer(ubuf, count, ppos,
8375 					s->buffer, trace_seq_used(s));
8376 
8377 	kfree(s);
8378 
8379 	return count;
8380 }
8381 
8382 static const struct file_operations tracing_stats_fops = {
8383 	.open		= tracing_open_generic_tr,
8384 	.read		= tracing_stats_read,
8385 	.llseek		= generic_file_llseek,
8386 	.release	= tracing_release_generic_tr,
8387 };
8388 
8389 #ifdef CONFIG_DYNAMIC_FTRACE
8390 
8391 static ssize_t
8392 tracing_read_dyn_info(struct file *filp, char __user *ubuf,
8393 		  size_t cnt, loff_t *ppos)
8394 {
8395 	ssize_t ret;
8396 	char *buf;
8397 	int r;
8398 
8399 	/* 512 should be plenty to hold the amount needed */
8400 #define DYN_INFO_BUF_SIZE	512
8401 
8402 	buf = kmalloc(DYN_INFO_BUF_SIZE, GFP_KERNEL);
8403 	if (!buf)
8404 		return -ENOMEM;
8405 
8406 	r = scnprintf(buf, DYN_INFO_BUF_SIZE,
8407 		      "%ld pages:%ld groups: %ld\n"
8408 		      "ftrace boot update time = %llu (ns)\n"
8409 		      "ftrace module total update time = %llu (ns)\n",
8410 		      ftrace_update_tot_cnt,
8411 		      ftrace_number_of_pages,
8412 		      ftrace_number_of_groups,
8413 		      ftrace_update_time,
8414 		      ftrace_total_mod_time);
8415 
8416 	ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
8417 	kfree(buf);
8418 	return ret;
8419 }
8420 
8421 static const struct file_operations tracing_dyn_info_fops = {
8422 	.open		= tracing_open_generic,
8423 	.read		= tracing_read_dyn_info,
8424 	.llseek		= generic_file_llseek,
8425 };
8426 #endif /* CONFIG_DYNAMIC_FTRACE */
8427 
8428 #if defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE)
8429 static void
8430 ftrace_snapshot(unsigned long ip, unsigned long parent_ip,
8431 		struct trace_array *tr, struct ftrace_probe_ops *ops,
8432 		void *data)
8433 {
8434 	tracing_snapshot_instance(tr);
8435 }
8436 
8437 static void
8438 ftrace_count_snapshot(unsigned long ip, unsigned long parent_ip,
8439 		      struct trace_array *tr, struct ftrace_probe_ops *ops,
8440 		      void *data)
8441 {
8442 	struct ftrace_func_mapper *mapper = data;
8443 	long *count = NULL;
8444 
8445 	if (mapper)
8446 		count = (long *)ftrace_func_mapper_find_ip(mapper, ip);
8447 
8448 	if (count) {
8449 
8450 		if (*count <= 0)
8451 			return;
8452 
8453 		(*count)--;
8454 	}
8455 
8456 	tracing_snapshot_instance(tr);
8457 }
8458 
8459 static int
8460 ftrace_snapshot_print(struct seq_file *m, unsigned long ip,
8461 		      struct ftrace_probe_ops *ops, void *data)
8462 {
8463 	struct ftrace_func_mapper *mapper = data;
8464 	long *count = NULL;
8465 
8466 	seq_printf(m, "%ps:", (void *)ip);
8467 
8468 	seq_puts(m, "snapshot");
8469 
8470 	if (mapper)
8471 		count = (long *)ftrace_func_mapper_find_ip(mapper, ip);
8472 
8473 	if (count)
8474 		seq_printf(m, ":count=%ld\n", *count);
8475 	else
8476 		seq_puts(m, ":unlimited\n");
8477 
8478 	return 0;
8479 }
8480 
8481 static int
8482 ftrace_snapshot_init(struct ftrace_probe_ops *ops, struct trace_array *tr,
8483 		     unsigned long ip, void *init_data, void **data)
8484 {
8485 	struct ftrace_func_mapper *mapper = *data;
8486 
8487 	if (!mapper) {
8488 		mapper = allocate_ftrace_func_mapper();
8489 		if (!mapper)
8490 			return -ENOMEM;
8491 		*data = mapper;
8492 	}
8493 
8494 	return ftrace_func_mapper_add_ip(mapper, ip, init_data);
8495 }
8496 
8497 static void
8498 ftrace_snapshot_free(struct ftrace_probe_ops *ops, struct trace_array *tr,
8499 		     unsigned long ip, void *data)
8500 {
8501 	struct ftrace_func_mapper *mapper = data;
8502 
8503 	if (!ip) {
8504 		if (!mapper)
8505 			return;
8506 		free_ftrace_func_mapper(mapper, NULL);
8507 		return;
8508 	}
8509 
8510 	ftrace_func_mapper_remove_ip(mapper, ip);
8511 }
8512 
8513 static struct ftrace_probe_ops snapshot_probe_ops = {
8514 	.func			= ftrace_snapshot,
8515 	.print			= ftrace_snapshot_print,
8516 };
8517 
8518 static struct ftrace_probe_ops snapshot_count_probe_ops = {
8519 	.func			= ftrace_count_snapshot,
8520 	.print			= ftrace_snapshot_print,
8521 	.init			= ftrace_snapshot_init,
8522 	.free			= ftrace_snapshot_free,
8523 };
8524 
8525 static int
8526 ftrace_trace_snapshot_callback(struct trace_array *tr, struct ftrace_hash *hash,
8527 			       char *glob, char *cmd, char *param, int enable)
8528 {
8529 	struct ftrace_probe_ops *ops;
8530 	void *count = (void *)-1;
8531 	char *number;
8532 	int ret;
8533 
8534 	if (!tr)
8535 		return -ENODEV;
8536 
8537 	/* hash funcs only work with set_ftrace_filter */
8538 	if (!enable)
8539 		return -EINVAL;
8540 
8541 	ops = param ? &snapshot_count_probe_ops :  &snapshot_probe_ops;
8542 
8543 	if (glob[0] == '!') {
8544 		ret = unregister_ftrace_function_probe_func(glob+1, tr, ops);
8545 		if (!ret)
8546 			tracing_disarm_snapshot(tr);
8547 
8548 		return ret;
8549 	}
8550 
8551 	if (!param)
8552 		goto out_reg;
8553 
8554 	number = strsep(&param, ":");
8555 
8556 	if (!strlen(number))
8557 		goto out_reg;
8558 
8559 	/*
8560 	 * We use the callback data field (which is a pointer)
8561 	 * as our counter.
8562 	 */
8563 	ret = kstrtoul(number, 0, (unsigned long *)&count);
8564 	if (ret)
8565 		return ret;
8566 
8567  out_reg:
8568 	ret = tracing_arm_snapshot(tr);
8569 	if (ret < 0)
8570 		goto out;
8571 
8572 	ret = register_ftrace_function_probe(glob, tr, ops, count);
8573 	if (ret < 0)
8574 		tracing_disarm_snapshot(tr);
8575  out:
8576 	return ret < 0 ? ret : 0;
8577 }
8578 
8579 static struct ftrace_func_command ftrace_snapshot_cmd = {
8580 	.name			= "snapshot",
8581 	.func			= ftrace_trace_snapshot_callback,
8582 };
8583 
8584 static __init int register_snapshot_cmd(void)
8585 {
8586 	return register_ftrace_command(&ftrace_snapshot_cmd);
8587 }
8588 #else
8589 static inline __init int register_snapshot_cmd(void) { return 0; }
8590 #endif /* defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE) */
8591 
8592 static struct dentry *tracing_get_dentry(struct trace_array *tr)
8593 {
8594 	if (WARN_ON(!tr->dir))
8595 		return ERR_PTR(-ENODEV);
8596 
8597 	/* Top directory uses NULL as the parent */
8598 	if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
8599 		return NULL;
8600 
8601 	/* All sub buffers have a descriptor */
8602 	return tr->dir;
8603 }
8604 
8605 static struct dentry *tracing_dentry_percpu(struct trace_array *tr, int cpu)
8606 {
8607 	struct dentry *d_tracer;
8608 
8609 	if (tr->percpu_dir)
8610 		return tr->percpu_dir;
8611 
8612 	d_tracer = tracing_get_dentry(tr);
8613 	if (IS_ERR(d_tracer))
8614 		return NULL;
8615 
8616 	tr->percpu_dir = tracefs_create_dir("per_cpu", d_tracer);
8617 
8618 	MEM_FAIL(!tr->percpu_dir,
8619 		  "Could not create tracefs directory 'per_cpu/%d'\n", cpu);
8620 
8621 	return tr->percpu_dir;
8622 }
8623 
8624 static struct dentry *
8625 trace_create_cpu_file(const char *name, umode_t mode, struct dentry *parent,
8626 		      void *data, long cpu, const struct file_operations *fops)
8627 {
8628 	struct dentry *ret = trace_create_file(name, mode, parent, data, fops);
8629 
8630 	if (ret) /* See tracing_get_cpu() */
8631 		d_inode(ret)->i_cdev = (void *)(cpu + 1);
8632 	return ret;
8633 }
8634 
8635 static void
8636 tracing_init_tracefs_percpu(struct trace_array *tr, long cpu)
8637 {
8638 	struct dentry *d_percpu = tracing_dentry_percpu(tr, cpu);
8639 	struct dentry *d_cpu;
8640 	char cpu_dir[30]; /* 30 characters should be more than enough */
8641 
8642 	if (!d_percpu)
8643 		return;
8644 
8645 	snprintf(cpu_dir, 30, "cpu%ld", cpu);
8646 	d_cpu = tracefs_create_dir(cpu_dir, d_percpu);
8647 	if (!d_cpu) {
8648 		pr_warn("Could not create tracefs '%s' entry\n", cpu_dir);
8649 		return;
8650 	}
8651 
8652 	/* per cpu trace_pipe */
8653 	trace_create_cpu_file("trace_pipe", TRACE_MODE_READ, d_cpu,
8654 				tr, cpu, &tracing_pipe_fops);
8655 
8656 	/* per cpu trace */
8657 	trace_create_cpu_file("trace", TRACE_MODE_WRITE, d_cpu,
8658 				tr, cpu, &tracing_fops);
8659 
8660 	trace_create_cpu_file("trace_pipe_raw", TRACE_MODE_READ, d_cpu,
8661 				tr, cpu, &tracing_buffers_fops);
8662 
8663 	trace_create_cpu_file("stats", TRACE_MODE_READ, d_cpu,
8664 				tr, cpu, &tracing_stats_fops);
8665 
8666 	trace_create_cpu_file("buffer_size_kb", TRACE_MODE_READ, d_cpu,
8667 				tr, cpu, &tracing_entries_fops);
8668 
8669 	if (tr->range_addr_start)
8670 		trace_create_cpu_file("buffer_meta", TRACE_MODE_READ, d_cpu,
8671 				      tr, cpu, &tracing_buffer_meta_fops);
8672 #ifdef CONFIG_TRACER_SNAPSHOT
8673 	if (!tr->range_addr_start) {
8674 		trace_create_cpu_file("snapshot", TRACE_MODE_WRITE, d_cpu,
8675 				      tr, cpu, &snapshot_fops);
8676 
8677 		trace_create_cpu_file("snapshot_raw", TRACE_MODE_READ, d_cpu,
8678 				      tr, cpu, &snapshot_raw_fops);
8679 	}
8680 #endif
8681 }
8682 
8683 #ifdef CONFIG_FTRACE_SELFTEST
8684 /* Let selftest have access to static functions in this file */
8685 #include "trace_selftest.c"
8686 #endif
8687 
8688 static ssize_t
8689 trace_options_read(struct file *filp, char __user *ubuf, size_t cnt,
8690 			loff_t *ppos)
8691 {
8692 	struct trace_option_dentry *topt = filp->private_data;
8693 	char *buf;
8694 
8695 	if (topt->flags->val & topt->opt->bit)
8696 		buf = "1\n";
8697 	else
8698 		buf = "0\n";
8699 
8700 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
8701 }
8702 
8703 static ssize_t
8704 trace_options_write(struct file *filp, const char __user *ubuf, size_t cnt,
8705 			 loff_t *ppos)
8706 {
8707 	struct trace_option_dentry *topt = filp->private_data;
8708 	unsigned long val;
8709 	int ret;
8710 
8711 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
8712 	if (ret)
8713 		return ret;
8714 
8715 	if (val != 0 && val != 1)
8716 		return -EINVAL;
8717 
8718 	if (!!(topt->flags->val & topt->opt->bit) != val) {
8719 		mutex_lock(&trace_types_lock);
8720 		ret = __set_tracer_option(topt->tr, topt->flags,
8721 					  topt->opt, !val);
8722 		mutex_unlock(&trace_types_lock);
8723 		if (ret)
8724 			return ret;
8725 	}
8726 
8727 	*ppos += cnt;
8728 
8729 	return cnt;
8730 }
8731 
8732 static int tracing_open_options(struct inode *inode, struct file *filp)
8733 {
8734 	struct trace_option_dentry *topt = inode->i_private;
8735 	int ret;
8736 
8737 	ret = tracing_check_open_get_tr(topt->tr);
8738 	if (ret)
8739 		return ret;
8740 
8741 	filp->private_data = inode->i_private;
8742 	return 0;
8743 }
8744 
8745 static int tracing_release_options(struct inode *inode, struct file *file)
8746 {
8747 	struct trace_option_dentry *topt = file->private_data;
8748 
8749 	trace_array_put(topt->tr);
8750 	return 0;
8751 }
8752 
8753 static const struct file_operations trace_options_fops = {
8754 	.open = tracing_open_options,
8755 	.read = trace_options_read,
8756 	.write = trace_options_write,
8757 	.llseek	= generic_file_llseek,
8758 	.release = tracing_release_options,
8759 };
8760 
8761 /*
8762  * In order to pass in both the trace_array descriptor as well as the index
8763  * to the flag that the trace option file represents, the trace_array
8764  * has a character array of trace_flags_index[], which holds the index
8765  * of the bit for the flag it represents. index[0] == 0, index[1] == 1, etc.
8766  * The address of this character array is passed to the flag option file
8767  * read/write callbacks.
8768  *
8769  * In order to extract both the index and the trace_array descriptor,
8770  * get_tr_index() uses the following algorithm.
8771  *
8772  *   idx = *ptr;
8773  *
8774  * As the pointer itself contains the address of the index (remember
8775  * index[1] == 1).
8776  *
8777  * Then to get the trace_array descriptor, by subtracting that index
8778  * from the ptr, we get to the start of the index itself.
8779  *
8780  *   ptr - idx == &index[0]
8781  *
8782  * Then a simple container_of() from that pointer gets us to the
8783  * trace_array descriptor.
8784  */
8785 static void get_tr_index(void *data, struct trace_array **ptr,
8786 			 unsigned int *pindex)
8787 {
8788 	*pindex = *(unsigned char *)data;
8789 
8790 	*ptr = container_of(data - *pindex, struct trace_array,
8791 			    trace_flags_index);
8792 }
8793 
8794 static ssize_t
8795 trace_options_core_read(struct file *filp, char __user *ubuf, size_t cnt,
8796 			loff_t *ppos)
8797 {
8798 	void *tr_index = filp->private_data;
8799 	struct trace_array *tr;
8800 	unsigned int index;
8801 	char *buf;
8802 
8803 	get_tr_index(tr_index, &tr, &index);
8804 
8805 	if (tr->trace_flags & (1 << index))
8806 		buf = "1\n";
8807 	else
8808 		buf = "0\n";
8809 
8810 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
8811 }
8812 
8813 static ssize_t
8814 trace_options_core_write(struct file *filp, const char __user *ubuf, size_t cnt,
8815 			 loff_t *ppos)
8816 {
8817 	void *tr_index = filp->private_data;
8818 	struct trace_array *tr;
8819 	unsigned int index;
8820 	unsigned long val;
8821 	int ret;
8822 
8823 	get_tr_index(tr_index, &tr, &index);
8824 
8825 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
8826 	if (ret)
8827 		return ret;
8828 
8829 	if (val != 0 && val != 1)
8830 		return -EINVAL;
8831 
8832 	mutex_lock(&event_mutex);
8833 	mutex_lock(&trace_types_lock);
8834 	ret = set_tracer_flag(tr, 1 << index, val);
8835 	mutex_unlock(&trace_types_lock);
8836 	mutex_unlock(&event_mutex);
8837 
8838 	if (ret < 0)
8839 		return ret;
8840 
8841 	*ppos += cnt;
8842 
8843 	return cnt;
8844 }
8845 
8846 static const struct file_operations trace_options_core_fops = {
8847 	.open = tracing_open_generic,
8848 	.read = trace_options_core_read,
8849 	.write = trace_options_core_write,
8850 	.llseek = generic_file_llseek,
8851 };
8852 
8853 struct dentry *trace_create_file(const char *name,
8854 				 umode_t mode,
8855 				 struct dentry *parent,
8856 				 void *data,
8857 				 const struct file_operations *fops)
8858 {
8859 	struct dentry *ret;
8860 
8861 	ret = tracefs_create_file(name, mode, parent, data, fops);
8862 	if (!ret)
8863 		pr_warn("Could not create tracefs '%s' entry\n", name);
8864 
8865 	return ret;
8866 }
8867 
8868 
8869 static struct dentry *trace_options_init_dentry(struct trace_array *tr)
8870 {
8871 	struct dentry *d_tracer;
8872 
8873 	if (tr->options)
8874 		return tr->options;
8875 
8876 	d_tracer = tracing_get_dentry(tr);
8877 	if (IS_ERR(d_tracer))
8878 		return NULL;
8879 
8880 	tr->options = tracefs_create_dir("options", d_tracer);
8881 	if (!tr->options) {
8882 		pr_warn("Could not create tracefs directory 'options'\n");
8883 		return NULL;
8884 	}
8885 
8886 	return tr->options;
8887 }
8888 
8889 static void
8890 create_trace_option_file(struct trace_array *tr,
8891 			 struct trace_option_dentry *topt,
8892 			 struct tracer_flags *flags,
8893 			 struct tracer_opt *opt)
8894 {
8895 	struct dentry *t_options;
8896 
8897 	t_options = trace_options_init_dentry(tr);
8898 	if (!t_options)
8899 		return;
8900 
8901 	topt->flags = flags;
8902 	topt->opt = opt;
8903 	topt->tr = tr;
8904 
8905 	topt->entry = trace_create_file(opt->name, TRACE_MODE_WRITE,
8906 					t_options, topt, &trace_options_fops);
8907 
8908 }
8909 
8910 static void
8911 create_trace_option_files(struct trace_array *tr, struct tracer *tracer)
8912 {
8913 	struct trace_option_dentry *topts;
8914 	struct trace_options *tr_topts;
8915 	struct tracer_flags *flags;
8916 	struct tracer_opt *opts;
8917 	int cnt;
8918 	int i;
8919 
8920 	if (!tracer)
8921 		return;
8922 
8923 	flags = tracer->flags;
8924 
8925 	if (!flags || !flags->opts)
8926 		return;
8927 
8928 	/*
8929 	 * If this is an instance, only create flags for tracers
8930 	 * the instance may have.
8931 	 */
8932 	if (!trace_ok_for_array(tracer, tr))
8933 		return;
8934 
8935 	for (i = 0; i < tr->nr_topts; i++) {
8936 		/* Make sure there's no duplicate flags. */
8937 		if (WARN_ON_ONCE(tr->topts[i].tracer->flags == tracer->flags))
8938 			return;
8939 	}
8940 
8941 	opts = flags->opts;
8942 
8943 	for (cnt = 0; opts[cnt].name; cnt++)
8944 		;
8945 
8946 	topts = kcalloc(cnt + 1, sizeof(*topts), GFP_KERNEL);
8947 	if (!topts)
8948 		return;
8949 
8950 	tr_topts = krealloc(tr->topts, sizeof(*tr->topts) * (tr->nr_topts + 1),
8951 			    GFP_KERNEL);
8952 	if (!tr_topts) {
8953 		kfree(topts);
8954 		return;
8955 	}
8956 
8957 	tr->topts = tr_topts;
8958 	tr->topts[tr->nr_topts].tracer = tracer;
8959 	tr->topts[tr->nr_topts].topts = topts;
8960 	tr->nr_topts++;
8961 
8962 	for (cnt = 0; opts[cnt].name; cnt++) {
8963 		create_trace_option_file(tr, &topts[cnt], flags,
8964 					 &opts[cnt]);
8965 		MEM_FAIL(topts[cnt].entry == NULL,
8966 			  "Failed to create trace option: %s",
8967 			  opts[cnt].name);
8968 	}
8969 }
8970 
8971 static struct dentry *
8972 create_trace_option_core_file(struct trace_array *tr,
8973 			      const char *option, long index)
8974 {
8975 	struct dentry *t_options;
8976 
8977 	t_options = trace_options_init_dentry(tr);
8978 	if (!t_options)
8979 		return NULL;
8980 
8981 	return trace_create_file(option, TRACE_MODE_WRITE, t_options,
8982 				 (void *)&tr->trace_flags_index[index],
8983 				 &trace_options_core_fops);
8984 }
8985 
8986 static void create_trace_options_dir(struct trace_array *tr)
8987 {
8988 	struct dentry *t_options;
8989 	bool top_level = tr == &global_trace;
8990 	int i;
8991 
8992 	t_options = trace_options_init_dentry(tr);
8993 	if (!t_options)
8994 		return;
8995 
8996 	for (i = 0; trace_options[i]; i++) {
8997 		if (top_level ||
8998 		    !((1 << i) & TOP_LEVEL_TRACE_FLAGS))
8999 			create_trace_option_core_file(tr, trace_options[i], i);
9000 	}
9001 }
9002 
9003 static ssize_t
9004 rb_simple_read(struct file *filp, char __user *ubuf,
9005 	       size_t cnt, loff_t *ppos)
9006 {
9007 	struct trace_array *tr = filp->private_data;
9008 	char buf[64];
9009 	int r;
9010 
9011 	r = tracer_tracing_is_on(tr);
9012 	r = sprintf(buf, "%d\n", r);
9013 
9014 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
9015 }
9016 
9017 static ssize_t
9018 rb_simple_write(struct file *filp, const char __user *ubuf,
9019 		size_t cnt, loff_t *ppos)
9020 {
9021 	struct trace_array *tr = filp->private_data;
9022 	struct trace_buffer *buffer = tr->array_buffer.buffer;
9023 	unsigned long val;
9024 	int ret;
9025 
9026 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
9027 	if (ret)
9028 		return ret;
9029 
9030 	if (buffer) {
9031 		mutex_lock(&trace_types_lock);
9032 		if (!!val == tracer_tracing_is_on(tr)) {
9033 			val = 0; /* do nothing */
9034 		} else if (val) {
9035 			tracer_tracing_on(tr);
9036 			if (tr->current_trace->start)
9037 				tr->current_trace->start(tr);
9038 		} else {
9039 			tracer_tracing_off(tr);
9040 			if (tr->current_trace->stop)
9041 				tr->current_trace->stop(tr);
9042 			/* Wake up any waiters */
9043 			ring_buffer_wake_waiters(buffer, RING_BUFFER_ALL_CPUS);
9044 		}
9045 		mutex_unlock(&trace_types_lock);
9046 	}
9047 
9048 	(*ppos)++;
9049 
9050 	return cnt;
9051 }
9052 
9053 static const struct file_operations rb_simple_fops = {
9054 	.open		= tracing_open_generic_tr,
9055 	.read		= rb_simple_read,
9056 	.write		= rb_simple_write,
9057 	.release	= tracing_release_generic_tr,
9058 	.llseek		= default_llseek,
9059 };
9060 
9061 static ssize_t
9062 buffer_percent_read(struct file *filp, char __user *ubuf,
9063 		    size_t cnt, loff_t *ppos)
9064 {
9065 	struct trace_array *tr = filp->private_data;
9066 	char buf[64];
9067 	int r;
9068 
9069 	r = tr->buffer_percent;
9070 	r = sprintf(buf, "%d\n", r);
9071 
9072 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
9073 }
9074 
9075 static ssize_t
9076 buffer_percent_write(struct file *filp, const char __user *ubuf,
9077 		     size_t cnt, loff_t *ppos)
9078 {
9079 	struct trace_array *tr = filp->private_data;
9080 	unsigned long val;
9081 	int ret;
9082 
9083 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
9084 	if (ret)
9085 		return ret;
9086 
9087 	if (val > 100)
9088 		return -EINVAL;
9089 
9090 	tr->buffer_percent = val;
9091 
9092 	(*ppos)++;
9093 
9094 	return cnt;
9095 }
9096 
9097 static const struct file_operations buffer_percent_fops = {
9098 	.open		= tracing_open_generic_tr,
9099 	.read		= buffer_percent_read,
9100 	.write		= buffer_percent_write,
9101 	.release	= tracing_release_generic_tr,
9102 	.llseek		= default_llseek,
9103 };
9104 
9105 static ssize_t
9106 buffer_subbuf_size_read(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos)
9107 {
9108 	struct trace_array *tr = filp->private_data;
9109 	size_t size;
9110 	char buf[64];
9111 	int order;
9112 	int r;
9113 
9114 	order = ring_buffer_subbuf_order_get(tr->array_buffer.buffer);
9115 	size = (PAGE_SIZE << order) / 1024;
9116 
9117 	r = sprintf(buf, "%zd\n", size);
9118 
9119 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
9120 }
9121 
9122 static ssize_t
9123 buffer_subbuf_size_write(struct file *filp, const char __user *ubuf,
9124 			 size_t cnt, loff_t *ppos)
9125 {
9126 	struct trace_array *tr = filp->private_data;
9127 	unsigned long val;
9128 	int old_order;
9129 	int order;
9130 	int pages;
9131 	int ret;
9132 
9133 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
9134 	if (ret)
9135 		return ret;
9136 
9137 	val *= 1024; /* value passed in is in KB */
9138 
9139 	pages = DIV_ROUND_UP(val, PAGE_SIZE);
9140 	order = fls(pages - 1);
9141 
9142 	/* limit between 1 and 128 system pages */
9143 	if (order < 0 || order > 7)
9144 		return -EINVAL;
9145 
9146 	/* Do not allow tracing while changing the order of the ring buffer */
9147 	tracing_stop_tr(tr);
9148 
9149 	old_order = ring_buffer_subbuf_order_get(tr->array_buffer.buffer);
9150 	if (old_order == order)
9151 		goto out;
9152 
9153 	ret = ring_buffer_subbuf_order_set(tr->array_buffer.buffer, order);
9154 	if (ret)
9155 		goto out;
9156 
9157 #ifdef CONFIG_TRACER_MAX_TRACE
9158 
9159 	if (!tr->allocated_snapshot)
9160 		goto out_max;
9161 
9162 	ret = ring_buffer_subbuf_order_set(tr->max_buffer.buffer, order);
9163 	if (ret) {
9164 		/* Put back the old order */
9165 		cnt = ring_buffer_subbuf_order_set(tr->array_buffer.buffer, old_order);
9166 		if (WARN_ON_ONCE(cnt)) {
9167 			/*
9168 			 * AARGH! We are left with different orders!
9169 			 * The max buffer is our "snapshot" buffer.
9170 			 * When a tracer needs a snapshot (one of the
9171 			 * latency tracers), it swaps the max buffer
9172 			 * with the saved snap shot. We succeeded to
9173 			 * update the order of the main buffer, but failed to
9174 			 * update the order of the max buffer. But when we tried
9175 			 * to reset the main buffer to the original size, we
9176 			 * failed there too. This is very unlikely to
9177 			 * happen, but if it does, warn and kill all
9178 			 * tracing.
9179 			 */
9180 			tracing_disabled = 1;
9181 		}
9182 		goto out;
9183 	}
9184  out_max:
9185 #endif
9186 	(*ppos)++;
9187  out:
9188 	if (ret)
9189 		cnt = ret;
9190 	tracing_start_tr(tr);
9191 	return cnt;
9192 }
9193 
9194 static const struct file_operations buffer_subbuf_size_fops = {
9195 	.open		= tracing_open_generic_tr,
9196 	.read		= buffer_subbuf_size_read,
9197 	.write		= buffer_subbuf_size_write,
9198 	.release	= tracing_release_generic_tr,
9199 	.llseek		= default_llseek,
9200 };
9201 
9202 static struct dentry *trace_instance_dir;
9203 
9204 static void
9205 init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer);
9206 
9207 static int
9208 allocate_trace_buffer(struct trace_array *tr, struct array_buffer *buf, int size)
9209 {
9210 	enum ring_buffer_flags rb_flags;
9211 
9212 	rb_flags = tr->trace_flags & TRACE_ITER_OVERWRITE ? RB_FL_OVERWRITE : 0;
9213 
9214 	buf->tr = tr;
9215 
9216 	if (tr->range_addr_start && tr->range_addr_size) {
9217 		buf->buffer = ring_buffer_alloc_range(size, rb_flags, 0,
9218 						      tr->range_addr_start,
9219 						      tr->range_addr_size);
9220 
9221 		ring_buffer_last_boot_delta(buf->buffer,
9222 					    &tr->text_delta, &tr->data_delta);
9223 		/*
9224 		 * This is basically the same as a mapped buffer,
9225 		 * with the same restrictions.
9226 		 */
9227 		tr->mapped++;
9228 	} else {
9229 		buf->buffer = ring_buffer_alloc(size, rb_flags);
9230 	}
9231 	if (!buf->buffer)
9232 		return -ENOMEM;
9233 
9234 	buf->data = alloc_percpu(struct trace_array_cpu);
9235 	if (!buf->data) {
9236 		ring_buffer_free(buf->buffer);
9237 		buf->buffer = NULL;
9238 		return -ENOMEM;
9239 	}
9240 
9241 	/* Allocate the first page for all buffers */
9242 	set_buffer_entries(&tr->array_buffer,
9243 			   ring_buffer_size(tr->array_buffer.buffer, 0));
9244 
9245 	return 0;
9246 }
9247 
9248 static void free_trace_buffer(struct array_buffer *buf)
9249 {
9250 	if (buf->buffer) {
9251 		ring_buffer_free(buf->buffer);
9252 		buf->buffer = NULL;
9253 		free_percpu(buf->data);
9254 		buf->data = NULL;
9255 	}
9256 }
9257 
9258 static int allocate_trace_buffers(struct trace_array *tr, int size)
9259 {
9260 	int ret;
9261 
9262 	ret = allocate_trace_buffer(tr, &tr->array_buffer, size);
9263 	if (ret)
9264 		return ret;
9265 
9266 #ifdef CONFIG_TRACER_MAX_TRACE
9267 	/* Fix mapped buffer trace arrays do not have snapshot buffers */
9268 	if (tr->range_addr_start)
9269 		return 0;
9270 
9271 	ret = allocate_trace_buffer(tr, &tr->max_buffer,
9272 				    allocate_snapshot ? size : 1);
9273 	if (MEM_FAIL(ret, "Failed to allocate trace buffer\n")) {
9274 		free_trace_buffer(&tr->array_buffer);
9275 		return -ENOMEM;
9276 	}
9277 	tr->allocated_snapshot = allocate_snapshot;
9278 
9279 	allocate_snapshot = false;
9280 #endif
9281 
9282 	return 0;
9283 }
9284 
9285 static void free_trace_buffers(struct trace_array *tr)
9286 {
9287 	if (!tr)
9288 		return;
9289 
9290 	free_trace_buffer(&tr->array_buffer);
9291 
9292 #ifdef CONFIG_TRACER_MAX_TRACE
9293 	free_trace_buffer(&tr->max_buffer);
9294 #endif
9295 }
9296 
9297 static void init_trace_flags_index(struct trace_array *tr)
9298 {
9299 	int i;
9300 
9301 	/* Used by the trace options files */
9302 	for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++)
9303 		tr->trace_flags_index[i] = i;
9304 }
9305 
9306 static void __update_tracer_options(struct trace_array *tr)
9307 {
9308 	struct tracer *t;
9309 
9310 	for (t = trace_types; t; t = t->next)
9311 		add_tracer_options(tr, t);
9312 }
9313 
9314 static void update_tracer_options(struct trace_array *tr)
9315 {
9316 	mutex_lock(&trace_types_lock);
9317 	tracer_options_updated = true;
9318 	__update_tracer_options(tr);
9319 	mutex_unlock(&trace_types_lock);
9320 }
9321 
9322 /* Must have trace_types_lock held */
9323 struct trace_array *trace_array_find(const char *instance)
9324 {
9325 	struct trace_array *tr, *found = NULL;
9326 
9327 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
9328 		if (tr->name && strcmp(tr->name, instance) == 0) {
9329 			found = tr;
9330 			break;
9331 		}
9332 	}
9333 
9334 	return found;
9335 }
9336 
9337 struct trace_array *trace_array_find_get(const char *instance)
9338 {
9339 	struct trace_array *tr;
9340 
9341 	mutex_lock(&trace_types_lock);
9342 	tr = trace_array_find(instance);
9343 	if (tr)
9344 		tr->ref++;
9345 	mutex_unlock(&trace_types_lock);
9346 
9347 	return tr;
9348 }
9349 
9350 static int trace_array_create_dir(struct trace_array *tr)
9351 {
9352 	int ret;
9353 
9354 	tr->dir = tracefs_create_dir(tr->name, trace_instance_dir);
9355 	if (!tr->dir)
9356 		return -EINVAL;
9357 
9358 	ret = event_trace_add_tracer(tr->dir, tr);
9359 	if (ret) {
9360 		tracefs_remove(tr->dir);
9361 		return ret;
9362 	}
9363 
9364 	init_tracer_tracefs(tr, tr->dir);
9365 	__update_tracer_options(tr);
9366 
9367 	return ret;
9368 }
9369 
9370 static struct trace_array *
9371 trace_array_create_systems(const char *name, const char *systems,
9372 			   unsigned long range_addr_start,
9373 			   unsigned long range_addr_size)
9374 {
9375 	struct trace_array *tr;
9376 	int ret;
9377 
9378 	ret = -ENOMEM;
9379 	tr = kzalloc(sizeof(*tr), GFP_KERNEL);
9380 	if (!tr)
9381 		return ERR_PTR(ret);
9382 
9383 	tr->name = kstrdup(name, GFP_KERNEL);
9384 	if (!tr->name)
9385 		goto out_free_tr;
9386 
9387 	if (!alloc_cpumask_var(&tr->tracing_cpumask, GFP_KERNEL))
9388 		goto out_free_tr;
9389 
9390 	if (!zalloc_cpumask_var(&tr->pipe_cpumask, GFP_KERNEL))
9391 		goto out_free_tr;
9392 
9393 	if (systems) {
9394 		tr->system_names = kstrdup_const(systems, GFP_KERNEL);
9395 		if (!tr->system_names)
9396 			goto out_free_tr;
9397 	}
9398 
9399 	/* Only for boot up memory mapped ring buffers */
9400 	tr->range_addr_start = range_addr_start;
9401 	tr->range_addr_size = range_addr_size;
9402 
9403 	tr->trace_flags = global_trace.trace_flags & ~ZEROED_TRACE_FLAGS;
9404 
9405 	cpumask_copy(tr->tracing_cpumask, cpu_all_mask);
9406 
9407 	raw_spin_lock_init(&tr->start_lock);
9408 
9409 	tr->max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
9410 #ifdef CONFIG_TRACER_MAX_TRACE
9411 	spin_lock_init(&tr->snapshot_trigger_lock);
9412 #endif
9413 	tr->current_trace = &nop_trace;
9414 
9415 	INIT_LIST_HEAD(&tr->systems);
9416 	INIT_LIST_HEAD(&tr->events);
9417 	INIT_LIST_HEAD(&tr->hist_vars);
9418 	INIT_LIST_HEAD(&tr->err_log);
9419 
9420 #ifdef CONFIG_MODULES
9421 	INIT_LIST_HEAD(&tr->mod_events);
9422 #endif
9423 
9424 	if (allocate_trace_buffers(tr, trace_buf_size) < 0)
9425 		goto out_free_tr;
9426 
9427 	/* The ring buffer is defaultly expanded */
9428 	trace_set_ring_buffer_expanded(tr);
9429 
9430 	if (ftrace_allocate_ftrace_ops(tr) < 0)
9431 		goto out_free_tr;
9432 
9433 	ftrace_init_trace_array(tr);
9434 
9435 	init_trace_flags_index(tr);
9436 
9437 	if (trace_instance_dir) {
9438 		ret = trace_array_create_dir(tr);
9439 		if (ret)
9440 			goto out_free_tr;
9441 	} else
9442 		__trace_early_add_events(tr);
9443 
9444 	list_add(&tr->list, &ftrace_trace_arrays);
9445 
9446 	tr->ref++;
9447 
9448 	return tr;
9449 
9450  out_free_tr:
9451 	ftrace_free_ftrace_ops(tr);
9452 	free_trace_buffers(tr);
9453 	free_cpumask_var(tr->pipe_cpumask);
9454 	free_cpumask_var(tr->tracing_cpumask);
9455 	kfree_const(tr->system_names);
9456 	kfree(tr->name);
9457 	kfree(tr);
9458 
9459 	return ERR_PTR(ret);
9460 }
9461 
9462 static struct trace_array *trace_array_create(const char *name)
9463 {
9464 	return trace_array_create_systems(name, NULL, 0, 0);
9465 }
9466 
9467 static int instance_mkdir(const char *name)
9468 {
9469 	struct trace_array *tr;
9470 	int ret;
9471 
9472 	guard(mutex)(&event_mutex);
9473 	guard(mutex)(&trace_types_lock);
9474 
9475 	ret = -EEXIST;
9476 	if (trace_array_find(name))
9477 		return -EEXIST;
9478 
9479 	tr = trace_array_create(name);
9480 
9481 	ret = PTR_ERR_OR_ZERO(tr);
9482 
9483 	return ret;
9484 }
9485 
9486 static u64 map_pages(u64 start, u64 size)
9487 {
9488 	struct page **pages;
9489 	phys_addr_t page_start;
9490 	unsigned int page_count;
9491 	unsigned int i;
9492 	void *vaddr;
9493 
9494 	page_count = DIV_ROUND_UP(size, PAGE_SIZE);
9495 
9496 	page_start = start;
9497 	pages = kmalloc_array(page_count, sizeof(struct page *), GFP_KERNEL);
9498 	if (!pages)
9499 		return 0;
9500 
9501 	for (i = 0; i < page_count; i++) {
9502 		phys_addr_t addr = page_start + i * PAGE_SIZE;
9503 		pages[i] = pfn_to_page(addr >> PAGE_SHIFT);
9504 	}
9505 	vaddr = vmap(pages, page_count, VM_MAP, PAGE_KERNEL);
9506 	kfree(pages);
9507 
9508 	return (u64)(unsigned long)vaddr;
9509 }
9510 
9511 /**
9512  * trace_array_get_by_name - Create/Lookup a trace array, given its name.
9513  * @name: The name of the trace array to be looked up/created.
9514  * @systems: A list of systems to create event directories for (NULL for all)
9515  *
9516  * Returns pointer to trace array with given name.
9517  * NULL, if it cannot be created.
9518  *
9519  * NOTE: This function increments the reference counter associated with the
9520  * trace array returned. This makes sure it cannot be freed while in use.
9521  * Use trace_array_put() once the trace array is no longer needed.
9522  * If the trace_array is to be freed, trace_array_destroy() needs to
9523  * be called after the trace_array_put(), or simply let user space delete
9524  * it from the tracefs instances directory. But until the
9525  * trace_array_put() is called, user space can not delete it.
9526  *
9527  */
9528 struct trace_array *trace_array_get_by_name(const char *name, const char *systems)
9529 {
9530 	struct trace_array *tr;
9531 
9532 	guard(mutex)(&event_mutex);
9533 	guard(mutex)(&trace_types_lock);
9534 
9535 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
9536 		if (tr->name && strcmp(tr->name, name) == 0) {
9537 			tr->ref++;
9538 			return tr;
9539 		}
9540 	}
9541 
9542 	tr = trace_array_create_systems(name, systems, 0, 0);
9543 
9544 	if (IS_ERR(tr))
9545 		tr = NULL;
9546 	else
9547 		tr->ref++;
9548 
9549 	return tr;
9550 }
9551 EXPORT_SYMBOL_GPL(trace_array_get_by_name);
9552 
9553 static int __remove_instance(struct trace_array *tr)
9554 {
9555 	int i;
9556 
9557 	/* Reference counter for a newly created trace array = 1. */
9558 	if (tr->ref > 1 || (tr->current_trace && tr->trace_ref))
9559 		return -EBUSY;
9560 
9561 	list_del(&tr->list);
9562 
9563 	/* Disable all the flags that were enabled coming in */
9564 	for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++) {
9565 		if ((1 << i) & ZEROED_TRACE_FLAGS)
9566 			set_tracer_flag(tr, 1 << i, 0);
9567 	}
9568 
9569 	if (printk_trace == tr)
9570 		update_printk_trace(&global_trace);
9571 
9572 	tracing_set_nop(tr);
9573 	clear_ftrace_function_probes(tr);
9574 	event_trace_del_tracer(tr);
9575 	ftrace_clear_pids(tr);
9576 	ftrace_destroy_function_files(tr);
9577 	tracefs_remove(tr->dir);
9578 	free_percpu(tr->last_func_repeats);
9579 	free_trace_buffers(tr);
9580 	clear_tracing_err_log(tr);
9581 
9582 	for (i = 0; i < tr->nr_topts; i++) {
9583 		kfree(tr->topts[i].topts);
9584 	}
9585 	kfree(tr->topts);
9586 
9587 	free_cpumask_var(tr->pipe_cpumask);
9588 	free_cpumask_var(tr->tracing_cpumask);
9589 	kfree_const(tr->system_names);
9590 	kfree(tr->name);
9591 	kfree(tr);
9592 
9593 	return 0;
9594 }
9595 
9596 int trace_array_destroy(struct trace_array *this_tr)
9597 {
9598 	struct trace_array *tr;
9599 
9600 	if (!this_tr)
9601 		return -EINVAL;
9602 
9603 	guard(mutex)(&event_mutex);
9604 	guard(mutex)(&trace_types_lock);
9605 
9606 
9607 	/* Making sure trace array exists before destroying it. */
9608 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
9609 		if (tr == this_tr)
9610 			return __remove_instance(tr);
9611 	}
9612 
9613 	return -ENODEV;
9614 }
9615 EXPORT_SYMBOL_GPL(trace_array_destroy);
9616 
9617 static int instance_rmdir(const char *name)
9618 {
9619 	struct trace_array *tr;
9620 
9621 	guard(mutex)(&event_mutex);
9622 	guard(mutex)(&trace_types_lock);
9623 
9624 	tr = trace_array_find(name);
9625 	if (!tr)
9626 		return -ENODEV;
9627 
9628 	return __remove_instance(tr);
9629 }
9630 
9631 static __init void create_trace_instances(struct dentry *d_tracer)
9632 {
9633 	struct trace_array *tr;
9634 
9635 	trace_instance_dir = tracefs_create_instance_dir("instances", d_tracer,
9636 							 instance_mkdir,
9637 							 instance_rmdir);
9638 	if (MEM_FAIL(!trace_instance_dir, "Failed to create instances directory\n"))
9639 		return;
9640 
9641 	guard(mutex)(&event_mutex);
9642 	guard(mutex)(&trace_types_lock);
9643 
9644 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
9645 		if (!tr->name)
9646 			continue;
9647 		if (MEM_FAIL(trace_array_create_dir(tr) < 0,
9648 			     "Failed to create instance directory\n"))
9649 			return;
9650 	}
9651 }
9652 
9653 static void
9654 init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer)
9655 {
9656 	int cpu;
9657 
9658 	trace_create_file("available_tracers", TRACE_MODE_READ, d_tracer,
9659 			tr, &show_traces_fops);
9660 
9661 	trace_create_file("current_tracer", TRACE_MODE_WRITE, d_tracer,
9662 			tr, &set_tracer_fops);
9663 
9664 	trace_create_file("tracing_cpumask", TRACE_MODE_WRITE, d_tracer,
9665 			  tr, &tracing_cpumask_fops);
9666 
9667 	trace_create_file("trace_options", TRACE_MODE_WRITE, d_tracer,
9668 			  tr, &tracing_iter_fops);
9669 
9670 	trace_create_file("trace", TRACE_MODE_WRITE, d_tracer,
9671 			  tr, &tracing_fops);
9672 
9673 	trace_create_file("trace_pipe", TRACE_MODE_READ, d_tracer,
9674 			  tr, &tracing_pipe_fops);
9675 
9676 	trace_create_file("buffer_size_kb", TRACE_MODE_WRITE, d_tracer,
9677 			  tr, &tracing_entries_fops);
9678 
9679 	trace_create_file("buffer_total_size_kb", TRACE_MODE_READ, d_tracer,
9680 			  tr, &tracing_total_entries_fops);
9681 
9682 	trace_create_file("free_buffer", 0200, d_tracer,
9683 			  tr, &tracing_free_buffer_fops);
9684 
9685 	trace_create_file("trace_marker", 0220, d_tracer,
9686 			  tr, &tracing_mark_fops);
9687 
9688 	tr->trace_marker_file = __find_event_file(tr, "ftrace", "print");
9689 
9690 	trace_create_file("trace_marker_raw", 0220, d_tracer,
9691 			  tr, &tracing_mark_raw_fops);
9692 
9693 	trace_create_file("trace_clock", TRACE_MODE_WRITE, d_tracer, tr,
9694 			  &trace_clock_fops);
9695 
9696 	trace_create_file("tracing_on", TRACE_MODE_WRITE, d_tracer,
9697 			  tr, &rb_simple_fops);
9698 
9699 	trace_create_file("timestamp_mode", TRACE_MODE_READ, d_tracer, tr,
9700 			  &trace_time_stamp_mode_fops);
9701 
9702 	tr->buffer_percent = 50;
9703 
9704 	trace_create_file("buffer_percent", TRACE_MODE_WRITE, d_tracer,
9705 			tr, &buffer_percent_fops);
9706 
9707 	trace_create_file("buffer_subbuf_size_kb", TRACE_MODE_WRITE, d_tracer,
9708 			  tr, &buffer_subbuf_size_fops);
9709 
9710 	create_trace_options_dir(tr);
9711 
9712 #ifdef CONFIG_TRACER_MAX_TRACE
9713 	trace_create_maxlat_file(tr, d_tracer);
9714 #endif
9715 
9716 	if (ftrace_create_function_files(tr, d_tracer))
9717 		MEM_FAIL(1, "Could not allocate function filter files");
9718 
9719 	if (tr->range_addr_start) {
9720 		trace_create_file("last_boot_info", TRACE_MODE_READ, d_tracer,
9721 				  tr, &last_boot_fops);
9722 #ifdef CONFIG_TRACER_SNAPSHOT
9723 	} else {
9724 		trace_create_file("snapshot", TRACE_MODE_WRITE, d_tracer,
9725 				  tr, &snapshot_fops);
9726 #endif
9727 	}
9728 
9729 	trace_create_file("error_log", TRACE_MODE_WRITE, d_tracer,
9730 			  tr, &tracing_err_log_fops);
9731 
9732 	for_each_tracing_cpu(cpu)
9733 		tracing_init_tracefs_percpu(tr, cpu);
9734 
9735 	ftrace_init_tracefs(tr, d_tracer);
9736 }
9737 
9738 static struct vfsmount *trace_automount(struct dentry *mntpt, void *ingore)
9739 {
9740 	struct vfsmount *mnt;
9741 	struct file_system_type *type;
9742 
9743 	/*
9744 	 * To maintain backward compatibility for tools that mount
9745 	 * debugfs to get to the tracing facility, tracefs is automatically
9746 	 * mounted to the debugfs/tracing directory.
9747 	 */
9748 	type = get_fs_type("tracefs");
9749 	if (!type)
9750 		return NULL;
9751 	mnt = vfs_submount(mntpt, type, "tracefs", NULL);
9752 	put_filesystem(type);
9753 	if (IS_ERR(mnt))
9754 		return NULL;
9755 	mntget(mnt);
9756 
9757 	return mnt;
9758 }
9759 
9760 /**
9761  * tracing_init_dentry - initialize top level trace array
9762  *
9763  * This is called when creating files or directories in the tracing
9764  * directory. It is called via fs_initcall() by any of the boot up code
9765  * and expects to return the dentry of the top level tracing directory.
9766  */
9767 int tracing_init_dentry(void)
9768 {
9769 	struct trace_array *tr = &global_trace;
9770 
9771 	if (security_locked_down(LOCKDOWN_TRACEFS)) {
9772 		pr_warn("Tracing disabled due to lockdown\n");
9773 		return -EPERM;
9774 	}
9775 
9776 	/* The top level trace array uses  NULL as parent */
9777 	if (tr->dir)
9778 		return 0;
9779 
9780 	if (WARN_ON(!tracefs_initialized()))
9781 		return -ENODEV;
9782 
9783 	/*
9784 	 * As there may still be users that expect the tracing
9785 	 * files to exist in debugfs/tracing, we must automount
9786 	 * the tracefs file system there, so older tools still
9787 	 * work with the newer kernel.
9788 	 */
9789 	tr->dir = debugfs_create_automount("tracing", NULL,
9790 					   trace_automount, NULL);
9791 
9792 	return 0;
9793 }
9794 
9795 extern struct trace_eval_map *__start_ftrace_eval_maps[];
9796 extern struct trace_eval_map *__stop_ftrace_eval_maps[];
9797 
9798 static struct workqueue_struct *eval_map_wq __initdata;
9799 static struct work_struct eval_map_work __initdata;
9800 static struct work_struct tracerfs_init_work __initdata;
9801 
9802 static void __init eval_map_work_func(struct work_struct *work)
9803 {
9804 	int len;
9805 
9806 	len = __stop_ftrace_eval_maps - __start_ftrace_eval_maps;
9807 	trace_insert_eval_map(NULL, __start_ftrace_eval_maps, len);
9808 }
9809 
9810 static int __init trace_eval_init(void)
9811 {
9812 	INIT_WORK(&eval_map_work, eval_map_work_func);
9813 
9814 	eval_map_wq = alloc_workqueue("eval_map_wq", WQ_UNBOUND, 0);
9815 	if (!eval_map_wq) {
9816 		pr_err("Unable to allocate eval_map_wq\n");
9817 		/* Do work here */
9818 		eval_map_work_func(&eval_map_work);
9819 		return -ENOMEM;
9820 	}
9821 
9822 	queue_work(eval_map_wq, &eval_map_work);
9823 	return 0;
9824 }
9825 
9826 subsys_initcall(trace_eval_init);
9827 
9828 static int __init trace_eval_sync(void)
9829 {
9830 	/* Make sure the eval map updates are finished */
9831 	if (eval_map_wq)
9832 		destroy_workqueue(eval_map_wq);
9833 	return 0;
9834 }
9835 
9836 late_initcall_sync(trace_eval_sync);
9837 
9838 
9839 #ifdef CONFIG_MODULES
9840 
9841 bool module_exists(const char *module)
9842 {
9843 	/* All modules have the symbol __this_module */
9844 	static const char this_mod[] = "__this_module";
9845 	char modname[MAX_PARAM_PREFIX_LEN + sizeof(this_mod) + 2];
9846 	unsigned long val;
9847 	int n;
9848 
9849 	n = snprintf(modname, sizeof(modname), "%s:%s", module, this_mod);
9850 
9851 	if (n > sizeof(modname) - 1)
9852 		return false;
9853 
9854 	val = module_kallsyms_lookup_name(modname);
9855 	return val != 0;
9856 }
9857 
9858 static void trace_module_add_evals(struct module *mod)
9859 {
9860 	if (!mod->num_trace_evals)
9861 		return;
9862 
9863 	/*
9864 	 * Modules with bad taint do not have events created, do
9865 	 * not bother with enums either.
9866 	 */
9867 	if (trace_module_has_bad_taint(mod))
9868 		return;
9869 
9870 	trace_insert_eval_map(mod, mod->trace_evals, mod->num_trace_evals);
9871 }
9872 
9873 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
9874 static void trace_module_remove_evals(struct module *mod)
9875 {
9876 	union trace_eval_map_item *map;
9877 	union trace_eval_map_item **last = &trace_eval_maps;
9878 
9879 	if (!mod->num_trace_evals)
9880 		return;
9881 
9882 	guard(mutex)(&trace_eval_mutex);
9883 
9884 	map = trace_eval_maps;
9885 
9886 	while (map) {
9887 		if (map->head.mod == mod)
9888 			break;
9889 		map = trace_eval_jmp_to_tail(map);
9890 		last = &map->tail.next;
9891 		map = map->tail.next;
9892 	}
9893 	if (!map)
9894 		return;
9895 
9896 	*last = trace_eval_jmp_to_tail(map)->tail.next;
9897 	kfree(map);
9898 }
9899 #else
9900 static inline void trace_module_remove_evals(struct module *mod) { }
9901 #endif /* CONFIG_TRACE_EVAL_MAP_FILE */
9902 
9903 static int trace_module_notify(struct notifier_block *self,
9904 			       unsigned long val, void *data)
9905 {
9906 	struct module *mod = data;
9907 
9908 	switch (val) {
9909 	case MODULE_STATE_COMING:
9910 		trace_module_add_evals(mod);
9911 		break;
9912 	case MODULE_STATE_GOING:
9913 		trace_module_remove_evals(mod);
9914 		break;
9915 	}
9916 
9917 	return NOTIFY_OK;
9918 }
9919 
9920 static struct notifier_block trace_module_nb = {
9921 	.notifier_call = trace_module_notify,
9922 	.priority = 0,
9923 };
9924 #endif /* CONFIG_MODULES */
9925 
9926 static __init void tracer_init_tracefs_work_func(struct work_struct *work)
9927 {
9928 
9929 	event_trace_init();
9930 
9931 	init_tracer_tracefs(&global_trace, NULL);
9932 	ftrace_init_tracefs_toplevel(&global_trace, NULL);
9933 
9934 	trace_create_file("tracing_thresh", TRACE_MODE_WRITE, NULL,
9935 			&global_trace, &tracing_thresh_fops);
9936 
9937 	trace_create_file("README", TRACE_MODE_READ, NULL,
9938 			NULL, &tracing_readme_fops);
9939 
9940 	trace_create_file("saved_cmdlines", TRACE_MODE_READ, NULL,
9941 			NULL, &tracing_saved_cmdlines_fops);
9942 
9943 	trace_create_file("saved_cmdlines_size", TRACE_MODE_WRITE, NULL,
9944 			  NULL, &tracing_saved_cmdlines_size_fops);
9945 
9946 	trace_create_file("saved_tgids", TRACE_MODE_READ, NULL,
9947 			NULL, &tracing_saved_tgids_fops);
9948 
9949 	trace_create_eval_file(NULL);
9950 
9951 #ifdef CONFIG_MODULES
9952 	register_module_notifier(&trace_module_nb);
9953 #endif
9954 
9955 #ifdef CONFIG_DYNAMIC_FTRACE
9956 	trace_create_file("dyn_ftrace_total_info", TRACE_MODE_READ, NULL,
9957 			NULL, &tracing_dyn_info_fops);
9958 #endif
9959 
9960 	create_trace_instances(NULL);
9961 
9962 	update_tracer_options(&global_trace);
9963 }
9964 
9965 static __init int tracer_init_tracefs(void)
9966 {
9967 	int ret;
9968 
9969 	trace_access_lock_init();
9970 
9971 	ret = tracing_init_dentry();
9972 	if (ret)
9973 		return 0;
9974 
9975 	if (eval_map_wq) {
9976 		INIT_WORK(&tracerfs_init_work, tracer_init_tracefs_work_func);
9977 		queue_work(eval_map_wq, &tracerfs_init_work);
9978 	} else {
9979 		tracer_init_tracefs_work_func(NULL);
9980 	}
9981 
9982 	rv_init_interface();
9983 
9984 	return 0;
9985 }
9986 
9987 fs_initcall(tracer_init_tracefs);
9988 
9989 static int trace_die_panic_handler(struct notifier_block *self,
9990 				unsigned long ev, void *unused);
9991 
9992 static struct notifier_block trace_panic_notifier = {
9993 	.notifier_call = trace_die_panic_handler,
9994 	.priority = INT_MAX - 1,
9995 };
9996 
9997 static struct notifier_block trace_die_notifier = {
9998 	.notifier_call = trace_die_panic_handler,
9999 	.priority = INT_MAX - 1,
10000 };
10001 
10002 /*
10003  * The idea is to execute the following die/panic callback early, in order
10004  * to avoid showing irrelevant information in the trace (like other panic
10005  * notifier functions); we are the 2nd to run, after hung_task/rcu_stall
10006  * warnings get disabled (to prevent potential log flooding).
10007  */
10008 static int trace_die_panic_handler(struct notifier_block *self,
10009 				unsigned long ev, void *unused)
10010 {
10011 	if (!ftrace_dump_on_oops_enabled())
10012 		return NOTIFY_DONE;
10013 
10014 	/* The die notifier requires DIE_OOPS to trigger */
10015 	if (self == &trace_die_notifier && ev != DIE_OOPS)
10016 		return NOTIFY_DONE;
10017 
10018 	ftrace_dump(DUMP_PARAM);
10019 
10020 	return NOTIFY_DONE;
10021 }
10022 
10023 /*
10024  * printk is set to max of 1024, we really don't need it that big.
10025  * Nothing should be printing 1000 characters anyway.
10026  */
10027 #define TRACE_MAX_PRINT		1000
10028 
10029 /*
10030  * Define here KERN_TRACE so that we have one place to modify
10031  * it if we decide to change what log level the ftrace dump
10032  * should be at.
10033  */
10034 #define KERN_TRACE		KERN_EMERG
10035 
10036 void
10037 trace_printk_seq(struct trace_seq *s)
10038 {
10039 	/* Probably should print a warning here. */
10040 	if (s->seq.len >= TRACE_MAX_PRINT)
10041 		s->seq.len = TRACE_MAX_PRINT;
10042 
10043 	/*
10044 	 * More paranoid code. Although the buffer size is set to
10045 	 * PAGE_SIZE, and TRACE_MAX_PRINT is 1000, this is just
10046 	 * an extra layer of protection.
10047 	 */
10048 	if (WARN_ON_ONCE(s->seq.len >= s->seq.size))
10049 		s->seq.len = s->seq.size - 1;
10050 
10051 	/* should be zero ended, but we are paranoid. */
10052 	s->buffer[s->seq.len] = 0;
10053 
10054 	printk(KERN_TRACE "%s", s->buffer);
10055 
10056 	trace_seq_init(s);
10057 }
10058 
10059 static void trace_init_iter(struct trace_iterator *iter, struct trace_array *tr)
10060 {
10061 	iter->tr = tr;
10062 	iter->trace = iter->tr->current_trace;
10063 	iter->cpu_file = RING_BUFFER_ALL_CPUS;
10064 	iter->array_buffer = &tr->array_buffer;
10065 
10066 	if (iter->trace && iter->trace->open)
10067 		iter->trace->open(iter);
10068 
10069 	/* Annotate start of buffers if we had overruns */
10070 	if (ring_buffer_overruns(iter->array_buffer->buffer))
10071 		iter->iter_flags |= TRACE_FILE_ANNOTATE;
10072 
10073 	/* Output in nanoseconds only if we are using a clock in nanoseconds. */
10074 	if (trace_clocks[iter->tr->clock_id].in_ns)
10075 		iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
10076 
10077 	/* Can not use kmalloc for iter.temp and iter.fmt */
10078 	iter->temp = static_temp_buf;
10079 	iter->temp_size = STATIC_TEMP_BUF_SIZE;
10080 	iter->fmt = static_fmt_buf;
10081 	iter->fmt_size = STATIC_FMT_BUF_SIZE;
10082 }
10083 
10084 void trace_init_global_iter(struct trace_iterator *iter)
10085 {
10086 	trace_init_iter(iter, &global_trace);
10087 }
10088 
10089 static void ftrace_dump_one(struct trace_array *tr, enum ftrace_dump_mode dump_mode)
10090 {
10091 	/* use static because iter can be a bit big for the stack */
10092 	static struct trace_iterator iter;
10093 	unsigned int old_userobj;
10094 	unsigned long flags;
10095 	int cnt = 0, cpu;
10096 
10097 	/*
10098 	 * Always turn off tracing when we dump.
10099 	 * We don't need to show trace output of what happens
10100 	 * between multiple crashes.
10101 	 *
10102 	 * If the user does a sysrq-z, then they can re-enable
10103 	 * tracing with echo 1 > tracing_on.
10104 	 */
10105 	tracer_tracing_off(tr);
10106 
10107 	local_irq_save(flags);
10108 
10109 	/* Simulate the iterator */
10110 	trace_init_iter(&iter, tr);
10111 
10112 	for_each_tracing_cpu(cpu) {
10113 		atomic_inc(&per_cpu_ptr(iter.array_buffer->data, cpu)->disabled);
10114 	}
10115 
10116 	old_userobj = tr->trace_flags & TRACE_ITER_SYM_USEROBJ;
10117 
10118 	/* don't look at user memory in panic mode */
10119 	tr->trace_flags &= ~TRACE_ITER_SYM_USEROBJ;
10120 
10121 	if (dump_mode == DUMP_ORIG)
10122 		iter.cpu_file = raw_smp_processor_id();
10123 	else
10124 		iter.cpu_file = RING_BUFFER_ALL_CPUS;
10125 
10126 	if (tr == &global_trace)
10127 		printk(KERN_TRACE "Dumping ftrace buffer:\n");
10128 	else
10129 		printk(KERN_TRACE "Dumping ftrace instance %s buffer:\n", tr->name);
10130 
10131 	/* Did function tracer already get disabled? */
10132 	if (ftrace_is_dead()) {
10133 		printk("# WARNING: FUNCTION TRACING IS CORRUPTED\n");
10134 		printk("#          MAY BE MISSING FUNCTION EVENTS\n");
10135 	}
10136 
10137 	/*
10138 	 * We need to stop all tracing on all CPUS to read
10139 	 * the next buffer. This is a bit expensive, but is
10140 	 * not done often. We fill all what we can read,
10141 	 * and then release the locks again.
10142 	 */
10143 
10144 	while (!trace_empty(&iter)) {
10145 
10146 		if (!cnt)
10147 			printk(KERN_TRACE "---------------------------------\n");
10148 
10149 		cnt++;
10150 
10151 		trace_iterator_reset(&iter);
10152 		iter.iter_flags |= TRACE_FILE_LAT_FMT;
10153 
10154 		if (trace_find_next_entry_inc(&iter) != NULL) {
10155 			int ret;
10156 
10157 			ret = print_trace_line(&iter);
10158 			if (ret != TRACE_TYPE_NO_CONSUME)
10159 				trace_consume(&iter);
10160 		}
10161 		touch_nmi_watchdog();
10162 
10163 		trace_printk_seq(&iter.seq);
10164 	}
10165 
10166 	if (!cnt)
10167 		printk(KERN_TRACE "   (ftrace buffer empty)\n");
10168 	else
10169 		printk(KERN_TRACE "---------------------------------\n");
10170 
10171 	tr->trace_flags |= old_userobj;
10172 
10173 	for_each_tracing_cpu(cpu) {
10174 		atomic_dec(&per_cpu_ptr(iter.array_buffer->data, cpu)->disabled);
10175 	}
10176 	local_irq_restore(flags);
10177 }
10178 
10179 static void ftrace_dump_by_param(void)
10180 {
10181 	bool first_param = true;
10182 	char dump_param[MAX_TRACER_SIZE];
10183 	char *buf, *token, *inst_name;
10184 	struct trace_array *tr;
10185 
10186 	strscpy(dump_param, ftrace_dump_on_oops, MAX_TRACER_SIZE);
10187 	buf = dump_param;
10188 
10189 	while ((token = strsep(&buf, ",")) != NULL) {
10190 		if (first_param) {
10191 			first_param = false;
10192 			if (!strcmp("0", token))
10193 				continue;
10194 			else if (!strcmp("1", token)) {
10195 				ftrace_dump_one(&global_trace, DUMP_ALL);
10196 				continue;
10197 			}
10198 			else if (!strcmp("2", token) ||
10199 			  !strcmp("orig_cpu", token)) {
10200 				ftrace_dump_one(&global_trace, DUMP_ORIG);
10201 				continue;
10202 			}
10203 		}
10204 
10205 		inst_name = strsep(&token, "=");
10206 		tr = trace_array_find(inst_name);
10207 		if (!tr) {
10208 			printk(KERN_TRACE "Instance %s not found\n", inst_name);
10209 			continue;
10210 		}
10211 
10212 		if (token && (!strcmp("2", token) ||
10213 			  !strcmp("orig_cpu", token)))
10214 			ftrace_dump_one(tr, DUMP_ORIG);
10215 		else
10216 			ftrace_dump_one(tr, DUMP_ALL);
10217 	}
10218 }
10219 
10220 void ftrace_dump(enum ftrace_dump_mode oops_dump_mode)
10221 {
10222 	static atomic_t dump_running;
10223 
10224 	/* Only allow one dump user at a time. */
10225 	if (atomic_inc_return(&dump_running) != 1) {
10226 		atomic_dec(&dump_running);
10227 		return;
10228 	}
10229 
10230 	switch (oops_dump_mode) {
10231 	case DUMP_ALL:
10232 		ftrace_dump_one(&global_trace, DUMP_ALL);
10233 		break;
10234 	case DUMP_ORIG:
10235 		ftrace_dump_one(&global_trace, DUMP_ORIG);
10236 		break;
10237 	case DUMP_PARAM:
10238 		ftrace_dump_by_param();
10239 		break;
10240 	case DUMP_NONE:
10241 		break;
10242 	default:
10243 		printk(KERN_TRACE "Bad dumping mode, switching to all CPUs dump\n");
10244 		ftrace_dump_one(&global_trace, DUMP_ALL);
10245 	}
10246 
10247 	atomic_dec(&dump_running);
10248 }
10249 EXPORT_SYMBOL_GPL(ftrace_dump);
10250 
10251 #define WRITE_BUFSIZE  4096
10252 
10253 ssize_t trace_parse_run_command(struct file *file, const char __user *buffer,
10254 				size_t count, loff_t *ppos,
10255 				int (*createfn)(const char *))
10256 {
10257 	char *kbuf, *buf, *tmp;
10258 	int ret = 0;
10259 	size_t done = 0;
10260 	size_t size;
10261 
10262 	kbuf = kmalloc(WRITE_BUFSIZE, GFP_KERNEL);
10263 	if (!kbuf)
10264 		return -ENOMEM;
10265 
10266 	while (done < count) {
10267 		size = count - done;
10268 
10269 		if (size >= WRITE_BUFSIZE)
10270 			size = WRITE_BUFSIZE - 1;
10271 
10272 		if (copy_from_user(kbuf, buffer + done, size)) {
10273 			ret = -EFAULT;
10274 			goto out;
10275 		}
10276 		kbuf[size] = '\0';
10277 		buf = kbuf;
10278 		do {
10279 			tmp = strchr(buf, '\n');
10280 			if (tmp) {
10281 				*tmp = '\0';
10282 				size = tmp - buf + 1;
10283 			} else {
10284 				size = strlen(buf);
10285 				if (done + size < count) {
10286 					if (buf != kbuf)
10287 						break;
10288 					/* This can accept WRITE_BUFSIZE - 2 ('\n' + '\0') */
10289 					pr_warn("Line length is too long: Should be less than %d\n",
10290 						WRITE_BUFSIZE - 2);
10291 					ret = -EINVAL;
10292 					goto out;
10293 				}
10294 			}
10295 			done += size;
10296 
10297 			/* Remove comments */
10298 			tmp = strchr(buf, '#');
10299 
10300 			if (tmp)
10301 				*tmp = '\0';
10302 
10303 			ret = createfn(buf);
10304 			if (ret)
10305 				goto out;
10306 			buf += size;
10307 
10308 		} while (done < count);
10309 	}
10310 	ret = done;
10311 
10312 out:
10313 	kfree(kbuf);
10314 
10315 	return ret;
10316 }
10317 
10318 #ifdef CONFIG_TRACER_MAX_TRACE
10319 __init static bool tr_needs_alloc_snapshot(const char *name)
10320 {
10321 	char *test;
10322 	int len = strlen(name);
10323 	bool ret;
10324 
10325 	if (!boot_snapshot_index)
10326 		return false;
10327 
10328 	if (strncmp(name, boot_snapshot_info, len) == 0 &&
10329 	    boot_snapshot_info[len] == '\t')
10330 		return true;
10331 
10332 	test = kmalloc(strlen(name) + 3, GFP_KERNEL);
10333 	if (!test)
10334 		return false;
10335 
10336 	sprintf(test, "\t%s\t", name);
10337 	ret = strstr(boot_snapshot_info, test) == NULL;
10338 	kfree(test);
10339 	return ret;
10340 }
10341 
10342 __init static void do_allocate_snapshot(const char *name)
10343 {
10344 	if (!tr_needs_alloc_snapshot(name))
10345 		return;
10346 
10347 	/*
10348 	 * When allocate_snapshot is set, the next call to
10349 	 * allocate_trace_buffers() (called by trace_array_get_by_name())
10350 	 * will allocate the snapshot buffer. That will alse clear
10351 	 * this flag.
10352 	 */
10353 	allocate_snapshot = true;
10354 }
10355 #else
10356 static inline void do_allocate_snapshot(const char *name) { }
10357 #endif
10358 
10359 __init static void enable_instances(void)
10360 {
10361 	struct trace_array *tr;
10362 	char *curr_str;
10363 	char *name;
10364 	char *str;
10365 	char *tok;
10366 
10367 	/* A tab is always appended */
10368 	boot_instance_info[boot_instance_index - 1] = '\0';
10369 	str = boot_instance_info;
10370 
10371 	while ((curr_str = strsep(&str, "\t"))) {
10372 		phys_addr_t start = 0;
10373 		phys_addr_t size = 0;
10374 		unsigned long addr = 0;
10375 		bool traceprintk = false;
10376 		bool traceoff = false;
10377 		char *flag_delim;
10378 		char *addr_delim;
10379 
10380 		tok = strsep(&curr_str, ",");
10381 
10382 		flag_delim = strchr(tok, '^');
10383 		addr_delim = strchr(tok, '@');
10384 
10385 		if (addr_delim)
10386 			*addr_delim++ = '\0';
10387 
10388 		if (flag_delim)
10389 			*flag_delim++ = '\0';
10390 
10391 		name = tok;
10392 
10393 		if (flag_delim) {
10394 			char *flag;
10395 
10396 			while ((flag = strsep(&flag_delim, "^"))) {
10397 				if (strcmp(flag, "traceoff") == 0) {
10398 					traceoff = true;
10399 				} else if ((strcmp(flag, "printk") == 0) ||
10400 					   (strcmp(flag, "traceprintk") == 0) ||
10401 					   (strcmp(flag, "trace_printk") == 0)) {
10402 					traceprintk = true;
10403 				} else {
10404 					pr_info("Tracing: Invalid instance flag '%s' for %s\n",
10405 						flag, name);
10406 				}
10407 			}
10408 		}
10409 
10410 		tok = addr_delim;
10411 		if (tok && isdigit(*tok)) {
10412 			start = memparse(tok, &tok);
10413 			if (!start) {
10414 				pr_warn("Tracing: Invalid boot instance address for %s\n",
10415 					name);
10416 				continue;
10417 			}
10418 			if (*tok != ':') {
10419 				pr_warn("Tracing: No size specified for instance %s\n", name);
10420 				continue;
10421 			}
10422 			tok++;
10423 			size = memparse(tok, &tok);
10424 			if (!size) {
10425 				pr_warn("Tracing: Invalid boot instance size for %s\n",
10426 					name);
10427 				continue;
10428 			}
10429 		} else if (tok) {
10430 			if (!reserve_mem_find_by_name(tok, &start, &size)) {
10431 				start = 0;
10432 				pr_warn("Failed to map boot instance %s to %s\n", name, tok);
10433 				continue;
10434 			}
10435 		}
10436 
10437 		if (start) {
10438 			addr = map_pages(start, size);
10439 			if (addr) {
10440 				pr_info("Tracing: mapped boot instance %s at physical memory %pa of size 0x%lx\n",
10441 					name, &start, (unsigned long)size);
10442 			} else {
10443 				pr_warn("Tracing: Failed to map boot instance %s\n", name);
10444 				continue;
10445 			}
10446 		} else {
10447 			/* Only non mapped buffers have snapshot buffers */
10448 			if (IS_ENABLED(CONFIG_TRACER_MAX_TRACE))
10449 				do_allocate_snapshot(name);
10450 		}
10451 
10452 		tr = trace_array_create_systems(name, NULL, addr, size);
10453 		if (IS_ERR(tr)) {
10454 			pr_warn("Tracing: Failed to create instance buffer %s\n", curr_str);
10455 			continue;
10456 		}
10457 
10458 		if (traceoff)
10459 			tracer_tracing_off(tr);
10460 
10461 		if (traceprintk)
10462 			update_printk_trace(tr);
10463 
10464 		/*
10465 		 * If start is set, then this is a mapped buffer, and
10466 		 * cannot be deleted by user space, so keep the reference
10467 		 * to it.
10468 		 */
10469 		if (start) {
10470 			tr->flags |= TRACE_ARRAY_FL_BOOT;
10471 			tr->ref++;
10472 		}
10473 
10474 		while ((tok = strsep(&curr_str, ","))) {
10475 			early_enable_events(tr, tok, true);
10476 		}
10477 	}
10478 }
10479 
10480 __init static int tracer_alloc_buffers(void)
10481 {
10482 	int ring_buf_size;
10483 	int ret = -ENOMEM;
10484 
10485 
10486 	if (security_locked_down(LOCKDOWN_TRACEFS)) {
10487 		pr_warn("Tracing disabled due to lockdown\n");
10488 		return -EPERM;
10489 	}
10490 
10491 	/*
10492 	 * Make sure we don't accidentally add more trace options
10493 	 * than we have bits for.
10494 	 */
10495 	BUILD_BUG_ON(TRACE_ITER_LAST_BIT > TRACE_FLAGS_MAX_SIZE);
10496 
10497 	if (!alloc_cpumask_var(&tracing_buffer_mask, GFP_KERNEL))
10498 		goto out;
10499 
10500 	if (!alloc_cpumask_var(&global_trace.tracing_cpumask, GFP_KERNEL))
10501 		goto out_free_buffer_mask;
10502 
10503 	/* Only allocate trace_printk buffers if a trace_printk exists */
10504 	if (&__stop___trace_bprintk_fmt != &__start___trace_bprintk_fmt)
10505 		/* Must be called before global_trace.buffer is allocated */
10506 		trace_printk_init_buffers();
10507 
10508 	/* To save memory, keep the ring buffer size to its minimum */
10509 	if (global_trace.ring_buffer_expanded)
10510 		ring_buf_size = trace_buf_size;
10511 	else
10512 		ring_buf_size = 1;
10513 
10514 	cpumask_copy(tracing_buffer_mask, cpu_possible_mask);
10515 	cpumask_copy(global_trace.tracing_cpumask, cpu_all_mask);
10516 
10517 	raw_spin_lock_init(&global_trace.start_lock);
10518 
10519 	/*
10520 	 * The prepare callbacks allocates some memory for the ring buffer. We
10521 	 * don't free the buffer if the CPU goes down. If we were to free
10522 	 * the buffer, then the user would lose any trace that was in the
10523 	 * buffer. The memory will be removed once the "instance" is removed.
10524 	 */
10525 	ret = cpuhp_setup_state_multi(CPUHP_TRACE_RB_PREPARE,
10526 				      "trace/RB:prepare", trace_rb_cpu_prepare,
10527 				      NULL);
10528 	if (ret < 0)
10529 		goto out_free_cpumask;
10530 	/* Used for event triggers */
10531 	ret = -ENOMEM;
10532 	temp_buffer = ring_buffer_alloc(PAGE_SIZE, RB_FL_OVERWRITE);
10533 	if (!temp_buffer)
10534 		goto out_rm_hp_state;
10535 
10536 	if (trace_create_savedcmd() < 0)
10537 		goto out_free_temp_buffer;
10538 
10539 	if (!zalloc_cpumask_var(&global_trace.pipe_cpumask, GFP_KERNEL))
10540 		goto out_free_savedcmd;
10541 
10542 	/* TODO: make the number of buffers hot pluggable with CPUS */
10543 	if (allocate_trace_buffers(&global_trace, ring_buf_size) < 0) {
10544 		MEM_FAIL(1, "tracer: failed to allocate ring buffer!\n");
10545 		goto out_free_pipe_cpumask;
10546 	}
10547 	if (global_trace.buffer_disabled)
10548 		tracing_off();
10549 
10550 	if (trace_boot_clock) {
10551 		ret = tracing_set_clock(&global_trace, trace_boot_clock);
10552 		if (ret < 0)
10553 			pr_warn("Trace clock %s not defined, going back to default\n",
10554 				trace_boot_clock);
10555 	}
10556 
10557 	/*
10558 	 * register_tracer() might reference current_trace, so it
10559 	 * needs to be set before we register anything. This is
10560 	 * just a bootstrap of current_trace anyway.
10561 	 */
10562 	global_trace.current_trace = &nop_trace;
10563 
10564 	global_trace.max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
10565 #ifdef CONFIG_TRACER_MAX_TRACE
10566 	spin_lock_init(&global_trace.snapshot_trigger_lock);
10567 #endif
10568 	ftrace_init_global_array_ops(&global_trace);
10569 
10570 #ifdef CONFIG_MODULES
10571 	INIT_LIST_HEAD(&global_trace.mod_events);
10572 #endif
10573 
10574 	init_trace_flags_index(&global_trace);
10575 
10576 	register_tracer(&nop_trace);
10577 
10578 	/* Function tracing may start here (via kernel command line) */
10579 	init_function_trace();
10580 
10581 	/* All seems OK, enable tracing */
10582 	tracing_disabled = 0;
10583 
10584 	atomic_notifier_chain_register(&panic_notifier_list,
10585 				       &trace_panic_notifier);
10586 
10587 	register_die_notifier(&trace_die_notifier);
10588 
10589 	global_trace.flags = TRACE_ARRAY_FL_GLOBAL;
10590 
10591 	INIT_LIST_HEAD(&global_trace.systems);
10592 	INIT_LIST_HEAD(&global_trace.events);
10593 	INIT_LIST_HEAD(&global_trace.hist_vars);
10594 	INIT_LIST_HEAD(&global_trace.err_log);
10595 	list_add(&global_trace.list, &ftrace_trace_arrays);
10596 
10597 	apply_trace_boot_options();
10598 
10599 	register_snapshot_cmd();
10600 
10601 	return 0;
10602 
10603 out_free_pipe_cpumask:
10604 	free_cpumask_var(global_trace.pipe_cpumask);
10605 out_free_savedcmd:
10606 	trace_free_saved_cmdlines_buffer();
10607 out_free_temp_buffer:
10608 	ring_buffer_free(temp_buffer);
10609 out_rm_hp_state:
10610 	cpuhp_remove_multi_state(CPUHP_TRACE_RB_PREPARE);
10611 out_free_cpumask:
10612 	free_cpumask_var(global_trace.tracing_cpumask);
10613 out_free_buffer_mask:
10614 	free_cpumask_var(tracing_buffer_mask);
10615 out:
10616 	return ret;
10617 }
10618 
10619 #ifdef CONFIG_FUNCTION_TRACER
10620 /* Used to set module cached ftrace filtering at boot up */
10621 __init struct trace_array *trace_get_global_array(void)
10622 {
10623 	return &global_trace;
10624 }
10625 #endif
10626 
10627 void __init ftrace_boot_snapshot(void)
10628 {
10629 #ifdef CONFIG_TRACER_MAX_TRACE
10630 	struct trace_array *tr;
10631 
10632 	if (!snapshot_at_boot)
10633 		return;
10634 
10635 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
10636 		if (!tr->allocated_snapshot)
10637 			continue;
10638 
10639 		tracing_snapshot_instance(tr);
10640 		trace_array_puts(tr, "** Boot snapshot taken **\n");
10641 	}
10642 #endif
10643 }
10644 
10645 void __init early_trace_init(void)
10646 {
10647 	if (tracepoint_printk) {
10648 		tracepoint_print_iter =
10649 			kzalloc(sizeof(*tracepoint_print_iter), GFP_KERNEL);
10650 		if (MEM_FAIL(!tracepoint_print_iter,
10651 			     "Failed to allocate trace iterator\n"))
10652 			tracepoint_printk = 0;
10653 		else
10654 			static_key_enable(&tracepoint_printk_key.key);
10655 	}
10656 	tracer_alloc_buffers();
10657 
10658 	init_events();
10659 }
10660 
10661 void __init trace_init(void)
10662 {
10663 	trace_event_init();
10664 
10665 	if (boot_instance_index)
10666 		enable_instances();
10667 }
10668 
10669 __init static void clear_boot_tracer(void)
10670 {
10671 	/*
10672 	 * The default tracer at boot buffer is an init section.
10673 	 * This function is called in lateinit. If we did not
10674 	 * find the boot tracer, then clear it out, to prevent
10675 	 * later registration from accessing the buffer that is
10676 	 * about to be freed.
10677 	 */
10678 	if (!default_bootup_tracer)
10679 		return;
10680 
10681 	printk(KERN_INFO "ftrace bootup tracer '%s' not registered.\n",
10682 	       default_bootup_tracer);
10683 	default_bootup_tracer = NULL;
10684 }
10685 
10686 #ifdef CONFIG_HAVE_UNSTABLE_SCHED_CLOCK
10687 __init static void tracing_set_default_clock(void)
10688 {
10689 	/* sched_clock_stable() is determined in late_initcall */
10690 	if (!trace_boot_clock && !sched_clock_stable()) {
10691 		if (security_locked_down(LOCKDOWN_TRACEFS)) {
10692 			pr_warn("Can not set tracing clock due to lockdown\n");
10693 			return;
10694 		}
10695 
10696 		printk(KERN_WARNING
10697 		       "Unstable clock detected, switching default tracing clock to \"global\"\n"
10698 		       "If you want to keep using the local clock, then add:\n"
10699 		       "  \"trace_clock=local\"\n"
10700 		       "on the kernel command line\n");
10701 		tracing_set_clock(&global_trace, "global");
10702 	}
10703 }
10704 #else
10705 static inline void tracing_set_default_clock(void) { }
10706 #endif
10707 
10708 __init static int late_trace_init(void)
10709 {
10710 	if (tracepoint_printk && tracepoint_printk_stop_on_boot) {
10711 		static_key_disable(&tracepoint_printk_key.key);
10712 		tracepoint_printk = 0;
10713 	}
10714 
10715 	if (traceoff_after_boot)
10716 		tracing_off();
10717 
10718 	tracing_set_default_clock();
10719 	clear_boot_tracer();
10720 	return 0;
10721 }
10722 
10723 late_initcall_sync(late_trace_init);
10724