xref: /linux/kernel/trace/trace.c (revision eb2dcde9f970ed8d3669444d47c8524b4bdf7d32)
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * ring buffer based function tracer
4  *
5  * Copyright (C) 2007-2012 Steven Rostedt <srostedt@redhat.com>
6  * Copyright (C) 2008 Ingo Molnar <mingo@redhat.com>
7  *
8  * Originally taken from the RT patch by:
9  *    Arnaldo Carvalho de Melo <acme@redhat.com>
10  *
11  * Based on code from the latency_tracer, that is:
12  *  Copyright (C) 2004-2006 Ingo Molnar
13  *  Copyright (C) 2004 Nadia Yvette Chambers
14  */
15 #include <linux/ring_buffer.h>
16 #include <linux/utsname.h>
17 #include <linux/stacktrace.h>
18 #include <linux/writeback.h>
19 #include <linux/kallsyms.h>
20 #include <linux/security.h>
21 #include <linux/seq_file.h>
22 #include <linux/irqflags.h>
23 #include <linux/debugfs.h>
24 #include <linux/tracefs.h>
25 #include <linux/pagemap.h>
26 #include <linux/hardirq.h>
27 #include <linux/linkage.h>
28 #include <linux/uaccess.h>
29 #include <linux/vmalloc.h>
30 #include <linux/ftrace.h>
31 #include <linux/module.h>
32 #include <linux/percpu.h>
33 #include <linux/splice.h>
34 #include <linux/kdebug.h>
35 #include <linux/string.h>
36 #include <linux/mount.h>
37 #include <linux/rwsem.h>
38 #include <linux/slab.h>
39 #include <linux/ctype.h>
40 #include <linux/init.h>
41 #include <linux/panic_notifier.h>
42 #include <linux/poll.h>
43 #include <linux/nmi.h>
44 #include <linux/fs.h>
45 #include <linux/trace.h>
46 #include <linux/sched/clock.h>
47 #include <linux/sched/rt.h>
48 #include <linux/fsnotify.h>
49 #include <linux/irq_work.h>
50 #include <linux/workqueue.h>
51 
52 #include <asm/setup.h> /* COMMAND_LINE_SIZE */
53 
54 #include "trace.h"
55 #include "trace_output.h"
56 
57 #ifdef CONFIG_FTRACE_STARTUP_TEST
58 /*
59  * We need to change this state when a selftest is running.
60  * A selftest will lurk into the ring-buffer to count the
61  * entries inserted during the selftest although some concurrent
62  * insertions into the ring-buffer such as trace_printk could occurred
63  * at the same time, giving false positive or negative results.
64  */
65 static bool __read_mostly tracing_selftest_running;
66 
67 /*
68  * If boot-time tracing including tracers/events via kernel cmdline
69  * is running, we do not want to run SELFTEST.
70  */
71 bool __read_mostly tracing_selftest_disabled;
72 
73 void __init disable_tracing_selftest(const char *reason)
74 {
75 	if (!tracing_selftest_disabled) {
76 		tracing_selftest_disabled = true;
77 		pr_info("Ftrace startup test is disabled due to %s\n", reason);
78 	}
79 }
80 #else
81 #define tracing_selftest_running	0
82 #define tracing_selftest_disabled	0
83 #endif
84 
85 /* Pipe tracepoints to printk */
86 static struct trace_iterator *tracepoint_print_iter;
87 int tracepoint_printk;
88 static bool tracepoint_printk_stop_on_boot __initdata;
89 static DEFINE_STATIC_KEY_FALSE(tracepoint_printk_key);
90 
91 /* For tracers that don't implement custom flags */
92 static struct tracer_opt dummy_tracer_opt[] = {
93 	{ }
94 };
95 
96 static int
97 dummy_set_flag(struct trace_array *tr, u32 old_flags, u32 bit, int set)
98 {
99 	return 0;
100 }
101 
102 /*
103  * To prevent the comm cache from being overwritten when no
104  * tracing is active, only save the comm when a trace event
105  * occurred.
106  */
107 DEFINE_PER_CPU(bool, trace_taskinfo_save);
108 
109 /*
110  * Kill all tracing for good (never come back).
111  * It is initialized to 1 but will turn to zero if the initialization
112  * of the tracer is successful. But that is the only place that sets
113  * this back to zero.
114  */
115 static int tracing_disabled = 1;
116 
117 cpumask_var_t __read_mostly	tracing_buffer_mask;
118 
119 /*
120  * ftrace_dump_on_oops - variable to dump ftrace buffer on oops
121  *
122  * If there is an oops (or kernel panic) and the ftrace_dump_on_oops
123  * is set, then ftrace_dump is called. This will output the contents
124  * of the ftrace buffers to the console.  This is very useful for
125  * capturing traces that lead to crashes and outputing it to a
126  * serial console.
127  *
128  * It is default off, but you can enable it with either specifying
129  * "ftrace_dump_on_oops" in the kernel command line, or setting
130  * /proc/sys/kernel/ftrace_dump_on_oops
131  * Set 1 if you want to dump buffers of all CPUs
132  * Set 2 if you want to dump the buffer of the CPU that triggered oops
133  * Set instance name if you want to dump the specific trace instance
134  * Multiple instance dump is also supported, and instances are seperated
135  * by commas.
136  */
137 /* Set to string format zero to disable by default */
138 char ftrace_dump_on_oops[MAX_TRACER_SIZE] = "0";
139 
140 /* When set, tracing will stop when a WARN*() is hit */
141 int __disable_trace_on_warning;
142 
143 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
144 /* Map of enums to their values, for "eval_map" file */
145 struct trace_eval_map_head {
146 	struct module			*mod;
147 	unsigned long			length;
148 };
149 
150 union trace_eval_map_item;
151 
152 struct trace_eval_map_tail {
153 	/*
154 	 * "end" is first and points to NULL as it must be different
155 	 * than "mod" or "eval_string"
156 	 */
157 	union trace_eval_map_item	*next;
158 	const char			*end;	/* points to NULL */
159 };
160 
161 static DEFINE_MUTEX(trace_eval_mutex);
162 
163 /*
164  * The trace_eval_maps are saved in an array with two extra elements,
165  * one at the beginning, and one at the end. The beginning item contains
166  * the count of the saved maps (head.length), and the module they
167  * belong to if not built in (head.mod). The ending item contains a
168  * pointer to the next array of saved eval_map items.
169  */
170 union trace_eval_map_item {
171 	struct trace_eval_map		map;
172 	struct trace_eval_map_head	head;
173 	struct trace_eval_map_tail	tail;
174 };
175 
176 static union trace_eval_map_item *trace_eval_maps;
177 #endif /* CONFIG_TRACE_EVAL_MAP_FILE */
178 
179 int tracing_set_tracer(struct trace_array *tr, const char *buf);
180 static void ftrace_trace_userstack(struct trace_array *tr,
181 				   struct trace_buffer *buffer,
182 				   unsigned int trace_ctx);
183 
184 static char bootup_tracer_buf[MAX_TRACER_SIZE] __initdata;
185 static char *default_bootup_tracer;
186 
187 static bool allocate_snapshot;
188 static bool snapshot_at_boot;
189 
190 static char boot_instance_info[COMMAND_LINE_SIZE] __initdata;
191 static int boot_instance_index;
192 
193 static char boot_snapshot_info[COMMAND_LINE_SIZE] __initdata;
194 static int boot_snapshot_index;
195 
196 static int __init set_cmdline_ftrace(char *str)
197 {
198 	strscpy(bootup_tracer_buf, str, MAX_TRACER_SIZE);
199 	default_bootup_tracer = bootup_tracer_buf;
200 	/* We are using ftrace early, expand it */
201 	trace_set_ring_buffer_expanded(NULL);
202 	return 1;
203 }
204 __setup("ftrace=", set_cmdline_ftrace);
205 
206 int ftrace_dump_on_oops_enabled(void)
207 {
208 	if (!strcmp("0", ftrace_dump_on_oops))
209 		return 0;
210 	else
211 		return 1;
212 }
213 
214 static int __init set_ftrace_dump_on_oops(char *str)
215 {
216 	if (!*str) {
217 		strscpy(ftrace_dump_on_oops, "1", MAX_TRACER_SIZE);
218 		return 1;
219 	}
220 
221 	if (*str == ',') {
222 		strscpy(ftrace_dump_on_oops, "1", MAX_TRACER_SIZE);
223 		strscpy(ftrace_dump_on_oops + 1, str, MAX_TRACER_SIZE - 1);
224 		return 1;
225 	}
226 
227 	if (*str++ == '=') {
228 		strscpy(ftrace_dump_on_oops, str, MAX_TRACER_SIZE);
229 		return 1;
230 	}
231 
232 	return 0;
233 }
234 __setup("ftrace_dump_on_oops", set_ftrace_dump_on_oops);
235 
236 static int __init stop_trace_on_warning(char *str)
237 {
238 	if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
239 		__disable_trace_on_warning = 1;
240 	return 1;
241 }
242 __setup("traceoff_on_warning", stop_trace_on_warning);
243 
244 static int __init boot_alloc_snapshot(char *str)
245 {
246 	char *slot = boot_snapshot_info + boot_snapshot_index;
247 	int left = sizeof(boot_snapshot_info) - boot_snapshot_index;
248 	int ret;
249 
250 	if (str[0] == '=') {
251 		str++;
252 		if (strlen(str) >= left)
253 			return -1;
254 
255 		ret = snprintf(slot, left, "%s\t", str);
256 		boot_snapshot_index += ret;
257 	} else {
258 		allocate_snapshot = true;
259 		/* We also need the main ring buffer expanded */
260 		trace_set_ring_buffer_expanded(NULL);
261 	}
262 	return 1;
263 }
264 __setup("alloc_snapshot", boot_alloc_snapshot);
265 
266 
267 static int __init boot_snapshot(char *str)
268 {
269 	snapshot_at_boot = true;
270 	boot_alloc_snapshot(str);
271 	return 1;
272 }
273 __setup("ftrace_boot_snapshot", boot_snapshot);
274 
275 
276 static int __init boot_instance(char *str)
277 {
278 	char *slot = boot_instance_info + boot_instance_index;
279 	int left = sizeof(boot_instance_info) - boot_instance_index;
280 	int ret;
281 
282 	if (strlen(str) >= left)
283 		return -1;
284 
285 	ret = snprintf(slot, left, "%s\t", str);
286 	boot_instance_index += ret;
287 
288 	return 1;
289 }
290 __setup("trace_instance=", boot_instance);
291 
292 
293 static char trace_boot_options_buf[MAX_TRACER_SIZE] __initdata;
294 
295 static int __init set_trace_boot_options(char *str)
296 {
297 	strscpy(trace_boot_options_buf, str, MAX_TRACER_SIZE);
298 	return 1;
299 }
300 __setup("trace_options=", set_trace_boot_options);
301 
302 static char trace_boot_clock_buf[MAX_TRACER_SIZE] __initdata;
303 static char *trace_boot_clock __initdata;
304 
305 static int __init set_trace_boot_clock(char *str)
306 {
307 	strscpy(trace_boot_clock_buf, str, MAX_TRACER_SIZE);
308 	trace_boot_clock = trace_boot_clock_buf;
309 	return 1;
310 }
311 __setup("trace_clock=", set_trace_boot_clock);
312 
313 static int __init set_tracepoint_printk(char *str)
314 {
315 	/* Ignore the "tp_printk_stop_on_boot" param */
316 	if (*str == '_')
317 		return 0;
318 
319 	if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
320 		tracepoint_printk = 1;
321 	return 1;
322 }
323 __setup("tp_printk", set_tracepoint_printk);
324 
325 static int __init set_tracepoint_printk_stop(char *str)
326 {
327 	tracepoint_printk_stop_on_boot = true;
328 	return 1;
329 }
330 __setup("tp_printk_stop_on_boot", set_tracepoint_printk_stop);
331 
332 unsigned long long ns2usecs(u64 nsec)
333 {
334 	nsec += 500;
335 	do_div(nsec, 1000);
336 	return nsec;
337 }
338 
339 static void
340 trace_process_export(struct trace_export *export,
341 	       struct ring_buffer_event *event, int flag)
342 {
343 	struct trace_entry *entry;
344 	unsigned int size = 0;
345 
346 	if (export->flags & flag) {
347 		entry = ring_buffer_event_data(event);
348 		size = ring_buffer_event_length(event);
349 		export->write(export, entry, size);
350 	}
351 }
352 
353 static DEFINE_MUTEX(ftrace_export_lock);
354 
355 static struct trace_export __rcu *ftrace_exports_list __read_mostly;
356 
357 static DEFINE_STATIC_KEY_FALSE(trace_function_exports_enabled);
358 static DEFINE_STATIC_KEY_FALSE(trace_event_exports_enabled);
359 static DEFINE_STATIC_KEY_FALSE(trace_marker_exports_enabled);
360 
361 static inline void ftrace_exports_enable(struct trace_export *export)
362 {
363 	if (export->flags & TRACE_EXPORT_FUNCTION)
364 		static_branch_inc(&trace_function_exports_enabled);
365 
366 	if (export->flags & TRACE_EXPORT_EVENT)
367 		static_branch_inc(&trace_event_exports_enabled);
368 
369 	if (export->flags & TRACE_EXPORT_MARKER)
370 		static_branch_inc(&trace_marker_exports_enabled);
371 }
372 
373 static inline void ftrace_exports_disable(struct trace_export *export)
374 {
375 	if (export->flags & TRACE_EXPORT_FUNCTION)
376 		static_branch_dec(&trace_function_exports_enabled);
377 
378 	if (export->flags & TRACE_EXPORT_EVENT)
379 		static_branch_dec(&trace_event_exports_enabled);
380 
381 	if (export->flags & TRACE_EXPORT_MARKER)
382 		static_branch_dec(&trace_marker_exports_enabled);
383 }
384 
385 static void ftrace_exports(struct ring_buffer_event *event, int flag)
386 {
387 	struct trace_export *export;
388 
389 	preempt_disable_notrace();
390 
391 	export = rcu_dereference_raw_check(ftrace_exports_list);
392 	while (export) {
393 		trace_process_export(export, event, flag);
394 		export = rcu_dereference_raw_check(export->next);
395 	}
396 
397 	preempt_enable_notrace();
398 }
399 
400 static inline void
401 add_trace_export(struct trace_export **list, struct trace_export *export)
402 {
403 	rcu_assign_pointer(export->next, *list);
404 	/*
405 	 * We are entering export into the list but another
406 	 * CPU might be walking that list. We need to make sure
407 	 * the export->next pointer is valid before another CPU sees
408 	 * the export pointer included into the list.
409 	 */
410 	rcu_assign_pointer(*list, export);
411 }
412 
413 static inline int
414 rm_trace_export(struct trace_export **list, struct trace_export *export)
415 {
416 	struct trace_export **p;
417 
418 	for (p = list; *p != NULL; p = &(*p)->next)
419 		if (*p == export)
420 			break;
421 
422 	if (*p != export)
423 		return -1;
424 
425 	rcu_assign_pointer(*p, (*p)->next);
426 
427 	return 0;
428 }
429 
430 static inline void
431 add_ftrace_export(struct trace_export **list, struct trace_export *export)
432 {
433 	ftrace_exports_enable(export);
434 
435 	add_trace_export(list, export);
436 }
437 
438 static inline int
439 rm_ftrace_export(struct trace_export **list, struct trace_export *export)
440 {
441 	int ret;
442 
443 	ret = rm_trace_export(list, export);
444 	ftrace_exports_disable(export);
445 
446 	return ret;
447 }
448 
449 int register_ftrace_export(struct trace_export *export)
450 {
451 	if (WARN_ON_ONCE(!export->write))
452 		return -1;
453 
454 	mutex_lock(&ftrace_export_lock);
455 
456 	add_ftrace_export(&ftrace_exports_list, export);
457 
458 	mutex_unlock(&ftrace_export_lock);
459 
460 	return 0;
461 }
462 EXPORT_SYMBOL_GPL(register_ftrace_export);
463 
464 int unregister_ftrace_export(struct trace_export *export)
465 {
466 	int ret;
467 
468 	mutex_lock(&ftrace_export_lock);
469 
470 	ret = rm_ftrace_export(&ftrace_exports_list, export);
471 
472 	mutex_unlock(&ftrace_export_lock);
473 
474 	return ret;
475 }
476 EXPORT_SYMBOL_GPL(unregister_ftrace_export);
477 
478 /* trace_flags holds trace_options default values */
479 #define TRACE_DEFAULT_FLAGS						\
480 	(FUNCTION_DEFAULT_FLAGS |					\
481 	 TRACE_ITER_PRINT_PARENT | TRACE_ITER_PRINTK |			\
482 	 TRACE_ITER_ANNOTATE | TRACE_ITER_CONTEXT_INFO |		\
483 	 TRACE_ITER_RECORD_CMD | TRACE_ITER_OVERWRITE |			\
484 	 TRACE_ITER_IRQ_INFO | TRACE_ITER_MARKERS |			\
485 	 TRACE_ITER_HASH_PTR)
486 
487 /* trace_options that are only supported by global_trace */
488 #define TOP_LEVEL_TRACE_FLAGS (TRACE_ITER_PRINTK |			\
489 	       TRACE_ITER_PRINTK_MSGONLY | TRACE_ITER_RECORD_CMD)
490 
491 /* trace_flags that are default zero for instances */
492 #define ZEROED_TRACE_FLAGS \
493 	(TRACE_ITER_EVENT_FORK | TRACE_ITER_FUNC_FORK)
494 
495 /*
496  * The global_trace is the descriptor that holds the top-level tracing
497  * buffers for the live tracing.
498  */
499 static struct trace_array global_trace = {
500 	.trace_flags = TRACE_DEFAULT_FLAGS,
501 };
502 
503 void trace_set_ring_buffer_expanded(struct trace_array *tr)
504 {
505 	if (!tr)
506 		tr = &global_trace;
507 	tr->ring_buffer_expanded = true;
508 }
509 
510 LIST_HEAD(ftrace_trace_arrays);
511 
512 int trace_array_get(struct trace_array *this_tr)
513 {
514 	struct trace_array *tr;
515 	int ret = -ENODEV;
516 
517 	mutex_lock(&trace_types_lock);
518 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
519 		if (tr == this_tr) {
520 			tr->ref++;
521 			ret = 0;
522 			break;
523 		}
524 	}
525 	mutex_unlock(&trace_types_lock);
526 
527 	return ret;
528 }
529 
530 static void __trace_array_put(struct trace_array *this_tr)
531 {
532 	WARN_ON(!this_tr->ref);
533 	this_tr->ref--;
534 }
535 
536 /**
537  * trace_array_put - Decrement the reference counter for this trace array.
538  * @this_tr : pointer to the trace array
539  *
540  * NOTE: Use this when we no longer need the trace array returned by
541  * trace_array_get_by_name(). This ensures the trace array can be later
542  * destroyed.
543  *
544  */
545 void trace_array_put(struct trace_array *this_tr)
546 {
547 	if (!this_tr)
548 		return;
549 
550 	mutex_lock(&trace_types_lock);
551 	__trace_array_put(this_tr);
552 	mutex_unlock(&trace_types_lock);
553 }
554 EXPORT_SYMBOL_GPL(trace_array_put);
555 
556 int tracing_check_open_get_tr(struct trace_array *tr)
557 {
558 	int ret;
559 
560 	ret = security_locked_down(LOCKDOWN_TRACEFS);
561 	if (ret)
562 		return ret;
563 
564 	if (tracing_disabled)
565 		return -ENODEV;
566 
567 	if (tr && trace_array_get(tr) < 0)
568 		return -ENODEV;
569 
570 	return 0;
571 }
572 
573 int call_filter_check_discard(struct trace_event_call *call, void *rec,
574 			      struct trace_buffer *buffer,
575 			      struct ring_buffer_event *event)
576 {
577 	if (unlikely(call->flags & TRACE_EVENT_FL_FILTERED) &&
578 	    !filter_match_preds(call->filter, rec)) {
579 		__trace_event_discard_commit(buffer, event);
580 		return 1;
581 	}
582 
583 	return 0;
584 }
585 
586 /**
587  * trace_find_filtered_pid - check if a pid exists in a filtered_pid list
588  * @filtered_pids: The list of pids to check
589  * @search_pid: The PID to find in @filtered_pids
590  *
591  * Returns true if @search_pid is found in @filtered_pids, and false otherwise.
592  */
593 bool
594 trace_find_filtered_pid(struct trace_pid_list *filtered_pids, pid_t search_pid)
595 {
596 	return trace_pid_list_is_set(filtered_pids, search_pid);
597 }
598 
599 /**
600  * trace_ignore_this_task - should a task be ignored for tracing
601  * @filtered_pids: The list of pids to check
602  * @filtered_no_pids: The list of pids not to be traced
603  * @task: The task that should be ignored if not filtered
604  *
605  * Checks if @task should be traced or not from @filtered_pids.
606  * Returns true if @task should *NOT* be traced.
607  * Returns false if @task should be traced.
608  */
609 bool
610 trace_ignore_this_task(struct trace_pid_list *filtered_pids,
611 		       struct trace_pid_list *filtered_no_pids,
612 		       struct task_struct *task)
613 {
614 	/*
615 	 * If filtered_no_pids is not empty, and the task's pid is listed
616 	 * in filtered_no_pids, then return true.
617 	 * Otherwise, if filtered_pids is empty, that means we can
618 	 * trace all tasks. If it has content, then only trace pids
619 	 * within filtered_pids.
620 	 */
621 
622 	return (filtered_pids &&
623 		!trace_find_filtered_pid(filtered_pids, task->pid)) ||
624 		(filtered_no_pids &&
625 		 trace_find_filtered_pid(filtered_no_pids, task->pid));
626 }
627 
628 /**
629  * trace_filter_add_remove_task - Add or remove a task from a pid_list
630  * @pid_list: The list to modify
631  * @self: The current task for fork or NULL for exit
632  * @task: The task to add or remove
633  *
634  * If adding a task, if @self is defined, the task is only added if @self
635  * is also included in @pid_list. This happens on fork and tasks should
636  * only be added when the parent is listed. If @self is NULL, then the
637  * @task pid will be removed from the list, which would happen on exit
638  * of a task.
639  */
640 void trace_filter_add_remove_task(struct trace_pid_list *pid_list,
641 				  struct task_struct *self,
642 				  struct task_struct *task)
643 {
644 	if (!pid_list)
645 		return;
646 
647 	/* For forks, we only add if the forking task is listed */
648 	if (self) {
649 		if (!trace_find_filtered_pid(pid_list, self->pid))
650 			return;
651 	}
652 
653 	/* "self" is set for forks, and NULL for exits */
654 	if (self)
655 		trace_pid_list_set(pid_list, task->pid);
656 	else
657 		trace_pid_list_clear(pid_list, task->pid);
658 }
659 
660 /**
661  * trace_pid_next - Used for seq_file to get to the next pid of a pid_list
662  * @pid_list: The pid list to show
663  * @v: The last pid that was shown (+1 the actual pid to let zero be displayed)
664  * @pos: The position of the file
665  *
666  * This is used by the seq_file "next" operation to iterate the pids
667  * listed in a trace_pid_list structure.
668  *
669  * Returns the pid+1 as we want to display pid of zero, but NULL would
670  * stop the iteration.
671  */
672 void *trace_pid_next(struct trace_pid_list *pid_list, void *v, loff_t *pos)
673 {
674 	long pid = (unsigned long)v;
675 	unsigned int next;
676 
677 	(*pos)++;
678 
679 	/* pid already is +1 of the actual previous bit */
680 	if (trace_pid_list_next(pid_list, pid, &next) < 0)
681 		return NULL;
682 
683 	pid = next;
684 
685 	/* Return pid + 1 to allow zero to be represented */
686 	return (void *)(pid + 1);
687 }
688 
689 /**
690  * trace_pid_start - Used for seq_file to start reading pid lists
691  * @pid_list: The pid list to show
692  * @pos: The position of the file
693  *
694  * This is used by seq_file "start" operation to start the iteration
695  * of listing pids.
696  *
697  * Returns the pid+1 as we want to display pid of zero, but NULL would
698  * stop the iteration.
699  */
700 void *trace_pid_start(struct trace_pid_list *pid_list, loff_t *pos)
701 {
702 	unsigned long pid;
703 	unsigned int first;
704 	loff_t l = 0;
705 
706 	if (trace_pid_list_first(pid_list, &first) < 0)
707 		return NULL;
708 
709 	pid = first;
710 
711 	/* Return pid + 1 so that zero can be the exit value */
712 	for (pid++; pid && l < *pos;
713 	     pid = (unsigned long)trace_pid_next(pid_list, (void *)pid, &l))
714 		;
715 	return (void *)pid;
716 }
717 
718 /**
719  * trace_pid_show - show the current pid in seq_file processing
720  * @m: The seq_file structure to write into
721  * @v: A void pointer of the pid (+1) value to display
722  *
723  * Can be directly used by seq_file operations to display the current
724  * pid value.
725  */
726 int trace_pid_show(struct seq_file *m, void *v)
727 {
728 	unsigned long pid = (unsigned long)v - 1;
729 
730 	seq_printf(m, "%lu\n", pid);
731 	return 0;
732 }
733 
734 /* 128 should be much more than enough */
735 #define PID_BUF_SIZE		127
736 
737 int trace_pid_write(struct trace_pid_list *filtered_pids,
738 		    struct trace_pid_list **new_pid_list,
739 		    const char __user *ubuf, size_t cnt)
740 {
741 	struct trace_pid_list *pid_list;
742 	struct trace_parser parser;
743 	unsigned long val;
744 	int nr_pids = 0;
745 	ssize_t read = 0;
746 	ssize_t ret;
747 	loff_t pos;
748 	pid_t pid;
749 
750 	if (trace_parser_get_init(&parser, PID_BUF_SIZE + 1))
751 		return -ENOMEM;
752 
753 	/*
754 	 * Always recreate a new array. The write is an all or nothing
755 	 * operation. Always create a new array when adding new pids by
756 	 * the user. If the operation fails, then the current list is
757 	 * not modified.
758 	 */
759 	pid_list = trace_pid_list_alloc();
760 	if (!pid_list) {
761 		trace_parser_put(&parser);
762 		return -ENOMEM;
763 	}
764 
765 	if (filtered_pids) {
766 		/* copy the current bits to the new max */
767 		ret = trace_pid_list_first(filtered_pids, &pid);
768 		while (!ret) {
769 			trace_pid_list_set(pid_list, pid);
770 			ret = trace_pid_list_next(filtered_pids, pid + 1, &pid);
771 			nr_pids++;
772 		}
773 	}
774 
775 	ret = 0;
776 	while (cnt > 0) {
777 
778 		pos = 0;
779 
780 		ret = trace_get_user(&parser, ubuf, cnt, &pos);
781 		if (ret < 0)
782 			break;
783 
784 		read += ret;
785 		ubuf += ret;
786 		cnt -= ret;
787 
788 		if (!trace_parser_loaded(&parser))
789 			break;
790 
791 		ret = -EINVAL;
792 		if (kstrtoul(parser.buffer, 0, &val))
793 			break;
794 
795 		pid = (pid_t)val;
796 
797 		if (trace_pid_list_set(pid_list, pid) < 0) {
798 			ret = -1;
799 			break;
800 		}
801 		nr_pids++;
802 
803 		trace_parser_clear(&parser);
804 		ret = 0;
805 	}
806 	trace_parser_put(&parser);
807 
808 	if (ret < 0) {
809 		trace_pid_list_free(pid_list);
810 		return ret;
811 	}
812 
813 	if (!nr_pids) {
814 		/* Cleared the list of pids */
815 		trace_pid_list_free(pid_list);
816 		pid_list = NULL;
817 	}
818 
819 	*new_pid_list = pid_list;
820 
821 	return read;
822 }
823 
824 static u64 buffer_ftrace_now(struct array_buffer *buf, int cpu)
825 {
826 	u64 ts;
827 
828 	/* Early boot up does not have a buffer yet */
829 	if (!buf->buffer)
830 		return trace_clock_local();
831 
832 	ts = ring_buffer_time_stamp(buf->buffer);
833 	ring_buffer_normalize_time_stamp(buf->buffer, cpu, &ts);
834 
835 	return ts;
836 }
837 
838 u64 ftrace_now(int cpu)
839 {
840 	return buffer_ftrace_now(&global_trace.array_buffer, cpu);
841 }
842 
843 /**
844  * tracing_is_enabled - Show if global_trace has been enabled
845  *
846  * Shows if the global trace has been enabled or not. It uses the
847  * mirror flag "buffer_disabled" to be used in fast paths such as for
848  * the irqsoff tracer. But it may be inaccurate due to races. If you
849  * need to know the accurate state, use tracing_is_on() which is a little
850  * slower, but accurate.
851  */
852 int tracing_is_enabled(void)
853 {
854 	/*
855 	 * For quick access (irqsoff uses this in fast path), just
856 	 * return the mirror variable of the state of the ring buffer.
857 	 * It's a little racy, but we don't really care.
858 	 */
859 	smp_rmb();
860 	return !global_trace.buffer_disabled;
861 }
862 
863 /*
864  * trace_buf_size is the size in bytes that is allocated
865  * for a buffer. Note, the number of bytes is always rounded
866  * to page size.
867  *
868  * This number is purposely set to a low number of 16384.
869  * If the dump on oops happens, it will be much appreciated
870  * to not have to wait for all that output. Anyway this can be
871  * boot time and run time configurable.
872  */
873 #define TRACE_BUF_SIZE_DEFAULT	1441792UL /* 16384 * 88 (sizeof(entry)) */
874 
875 static unsigned long		trace_buf_size = TRACE_BUF_SIZE_DEFAULT;
876 
877 /* trace_types holds a link list of available tracers. */
878 static struct tracer		*trace_types __read_mostly;
879 
880 /*
881  * trace_types_lock is used to protect the trace_types list.
882  */
883 DEFINE_MUTEX(trace_types_lock);
884 
885 /*
886  * serialize the access of the ring buffer
887  *
888  * ring buffer serializes readers, but it is low level protection.
889  * The validity of the events (which returns by ring_buffer_peek() ..etc)
890  * are not protected by ring buffer.
891  *
892  * The content of events may become garbage if we allow other process consumes
893  * these events concurrently:
894  *   A) the page of the consumed events may become a normal page
895  *      (not reader page) in ring buffer, and this page will be rewritten
896  *      by events producer.
897  *   B) The page of the consumed events may become a page for splice_read,
898  *      and this page will be returned to system.
899  *
900  * These primitives allow multi process access to different cpu ring buffer
901  * concurrently.
902  *
903  * These primitives don't distinguish read-only and read-consume access.
904  * Multi read-only access are also serialized.
905  */
906 
907 #ifdef CONFIG_SMP
908 static DECLARE_RWSEM(all_cpu_access_lock);
909 static DEFINE_PER_CPU(struct mutex, cpu_access_lock);
910 
911 static inline void trace_access_lock(int cpu)
912 {
913 	if (cpu == RING_BUFFER_ALL_CPUS) {
914 		/* gain it for accessing the whole ring buffer. */
915 		down_write(&all_cpu_access_lock);
916 	} else {
917 		/* gain it for accessing a cpu ring buffer. */
918 
919 		/* Firstly block other trace_access_lock(RING_BUFFER_ALL_CPUS). */
920 		down_read(&all_cpu_access_lock);
921 
922 		/* Secondly block other access to this @cpu ring buffer. */
923 		mutex_lock(&per_cpu(cpu_access_lock, cpu));
924 	}
925 }
926 
927 static inline void trace_access_unlock(int cpu)
928 {
929 	if (cpu == RING_BUFFER_ALL_CPUS) {
930 		up_write(&all_cpu_access_lock);
931 	} else {
932 		mutex_unlock(&per_cpu(cpu_access_lock, cpu));
933 		up_read(&all_cpu_access_lock);
934 	}
935 }
936 
937 static inline void trace_access_lock_init(void)
938 {
939 	int cpu;
940 
941 	for_each_possible_cpu(cpu)
942 		mutex_init(&per_cpu(cpu_access_lock, cpu));
943 }
944 
945 #else
946 
947 static DEFINE_MUTEX(access_lock);
948 
949 static inline void trace_access_lock(int cpu)
950 {
951 	(void)cpu;
952 	mutex_lock(&access_lock);
953 }
954 
955 static inline void trace_access_unlock(int cpu)
956 {
957 	(void)cpu;
958 	mutex_unlock(&access_lock);
959 }
960 
961 static inline void trace_access_lock_init(void)
962 {
963 }
964 
965 #endif
966 
967 #ifdef CONFIG_STACKTRACE
968 static void __ftrace_trace_stack(struct trace_buffer *buffer,
969 				 unsigned int trace_ctx,
970 				 int skip, struct pt_regs *regs);
971 static inline void ftrace_trace_stack(struct trace_array *tr,
972 				      struct trace_buffer *buffer,
973 				      unsigned int trace_ctx,
974 				      int skip, struct pt_regs *regs);
975 
976 #else
977 static inline void __ftrace_trace_stack(struct trace_buffer *buffer,
978 					unsigned int trace_ctx,
979 					int skip, struct pt_regs *regs)
980 {
981 }
982 static inline void ftrace_trace_stack(struct trace_array *tr,
983 				      struct trace_buffer *buffer,
984 				      unsigned long trace_ctx,
985 				      int skip, struct pt_regs *regs)
986 {
987 }
988 
989 #endif
990 
991 static __always_inline void
992 trace_event_setup(struct ring_buffer_event *event,
993 		  int type, unsigned int trace_ctx)
994 {
995 	struct trace_entry *ent = ring_buffer_event_data(event);
996 
997 	tracing_generic_entry_update(ent, type, trace_ctx);
998 }
999 
1000 static __always_inline struct ring_buffer_event *
1001 __trace_buffer_lock_reserve(struct trace_buffer *buffer,
1002 			  int type,
1003 			  unsigned long len,
1004 			  unsigned int trace_ctx)
1005 {
1006 	struct ring_buffer_event *event;
1007 
1008 	event = ring_buffer_lock_reserve(buffer, len);
1009 	if (event != NULL)
1010 		trace_event_setup(event, type, trace_ctx);
1011 
1012 	return event;
1013 }
1014 
1015 void tracer_tracing_on(struct trace_array *tr)
1016 {
1017 	if (tr->array_buffer.buffer)
1018 		ring_buffer_record_on(tr->array_buffer.buffer);
1019 	/*
1020 	 * This flag is looked at when buffers haven't been allocated
1021 	 * yet, or by some tracers (like irqsoff), that just want to
1022 	 * know if the ring buffer has been disabled, but it can handle
1023 	 * races of where it gets disabled but we still do a record.
1024 	 * As the check is in the fast path of the tracers, it is more
1025 	 * important to be fast than accurate.
1026 	 */
1027 	tr->buffer_disabled = 0;
1028 	/* Make the flag seen by readers */
1029 	smp_wmb();
1030 }
1031 
1032 /**
1033  * tracing_on - enable tracing buffers
1034  *
1035  * This function enables tracing buffers that may have been
1036  * disabled with tracing_off.
1037  */
1038 void tracing_on(void)
1039 {
1040 	tracer_tracing_on(&global_trace);
1041 }
1042 EXPORT_SYMBOL_GPL(tracing_on);
1043 
1044 
1045 static __always_inline void
1046 __buffer_unlock_commit(struct trace_buffer *buffer, struct ring_buffer_event *event)
1047 {
1048 	__this_cpu_write(trace_taskinfo_save, true);
1049 
1050 	/* If this is the temp buffer, we need to commit fully */
1051 	if (this_cpu_read(trace_buffered_event) == event) {
1052 		/* Length is in event->array[0] */
1053 		ring_buffer_write(buffer, event->array[0], &event->array[1]);
1054 		/* Release the temp buffer */
1055 		this_cpu_dec(trace_buffered_event_cnt);
1056 		/* ring_buffer_unlock_commit() enables preemption */
1057 		preempt_enable_notrace();
1058 	} else
1059 		ring_buffer_unlock_commit(buffer);
1060 }
1061 
1062 int __trace_array_puts(struct trace_array *tr, unsigned long ip,
1063 		       const char *str, int size)
1064 {
1065 	struct ring_buffer_event *event;
1066 	struct trace_buffer *buffer;
1067 	struct print_entry *entry;
1068 	unsigned int trace_ctx;
1069 	int alloc;
1070 
1071 	if (!(tr->trace_flags & TRACE_ITER_PRINTK))
1072 		return 0;
1073 
1074 	if (unlikely(tracing_selftest_running && tr == &global_trace))
1075 		return 0;
1076 
1077 	if (unlikely(tracing_disabled))
1078 		return 0;
1079 
1080 	alloc = sizeof(*entry) + size + 2; /* possible \n added */
1081 
1082 	trace_ctx = tracing_gen_ctx();
1083 	buffer = tr->array_buffer.buffer;
1084 	ring_buffer_nest_start(buffer);
1085 	event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, alloc,
1086 					    trace_ctx);
1087 	if (!event) {
1088 		size = 0;
1089 		goto out;
1090 	}
1091 
1092 	entry = ring_buffer_event_data(event);
1093 	entry->ip = ip;
1094 
1095 	memcpy(&entry->buf, str, size);
1096 
1097 	/* Add a newline if necessary */
1098 	if (entry->buf[size - 1] != '\n') {
1099 		entry->buf[size] = '\n';
1100 		entry->buf[size + 1] = '\0';
1101 	} else
1102 		entry->buf[size] = '\0';
1103 
1104 	__buffer_unlock_commit(buffer, event);
1105 	ftrace_trace_stack(tr, buffer, trace_ctx, 4, NULL);
1106  out:
1107 	ring_buffer_nest_end(buffer);
1108 	return size;
1109 }
1110 EXPORT_SYMBOL_GPL(__trace_array_puts);
1111 
1112 /**
1113  * __trace_puts - write a constant string into the trace buffer.
1114  * @ip:	   The address of the caller
1115  * @str:   The constant string to write
1116  * @size:  The size of the string.
1117  */
1118 int __trace_puts(unsigned long ip, const char *str, int size)
1119 {
1120 	return __trace_array_puts(&global_trace, ip, str, size);
1121 }
1122 EXPORT_SYMBOL_GPL(__trace_puts);
1123 
1124 /**
1125  * __trace_bputs - write the pointer to a constant string into trace buffer
1126  * @ip:	   The address of the caller
1127  * @str:   The constant string to write to the buffer to
1128  */
1129 int __trace_bputs(unsigned long ip, const char *str)
1130 {
1131 	struct ring_buffer_event *event;
1132 	struct trace_buffer *buffer;
1133 	struct bputs_entry *entry;
1134 	unsigned int trace_ctx;
1135 	int size = sizeof(struct bputs_entry);
1136 	int ret = 0;
1137 
1138 	if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
1139 		return 0;
1140 
1141 	if (unlikely(tracing_selftest_running || tracing_disabled))
1142 		return 0;
1143 
1144 	trace_ctx = tracing_gen_ctx();
1145 	buffer = global_trace.array_buffer.buffer;
1146 
1147 	ring_buffer_nest_start(buffer);
1148 	event = __trace_buffer_lock_reserve(buffer, TRACE_BPUTS, size,
1149 					    trace_ctx);
1150 	if (!event)
1151 		goto out;
1152 
1153 	entry = ring_buffer_event_data(event);
1154 	entry->ip			= ip;
1155 	entry->str			= str;
1156 
1157 	__buffer_unlock_commit(buffer, event);
1158 	ftrace_trace_stack(&global_trace, buffer, trace_ctx, 4, NULL);
1159 
1160 	ret = 1;
1161  out:
1162 	ring_buffer_nest_end(buffer);
1163 	return ret;
1164 }
1165 EXPORT_SYMBOL_GPL(__trace_bputs);
1166 
1167 #ifdef CONFIG_TRACER_SNAPSHOT
1168 static void tracing_snapshot_instance_cond(struct trace_array *tr,
1169 					   void *cond_data)
1170 {
1171 	struct tracer *tracer = tr->current_trace;
1172 	unsigned long flags;
1173 
1174 	if (in_nmi()) {
1175 		trace_array_puts(tr, "*** SNAPSHOT CALLED FROM NMI CONTEXT ***\n");
1176 		trace_array_puts(tr, "*** snapshot is being ignored        ***\n");
1177 		return;
1178 	}
1179 
1180 	if (!tr->allocated_snapshot) {
1181 		trace_array_puts(tr, "*** SNAPSHOT NOT ALLOCATED ***\n");
1182 		trace_array_puts(tr, "*** stopping trace here!   ***\n");
1183 		tracer_tracing_off(tr);
1184 		return;
1185 	}
1186 
1187 	/* Note, snapshot can not be used when the tracer uses it */
1188 	if (tracer->use_max_tr) {
1189 		trace_array_puts(tr, "*** LATENCY TRACER ACTIVE ***\n");
1190 		trace_array_puts(tr, "*** Can not use snapshot (sorry) ***\n");
1191 		return;
1192 	}
1193 
1194 	if (tr->mapped) {
1195 		trace_array_puts(tr, "*** BUFFER MEMORY MAPPED ***\n");
1196 		trace_array_puts(tr, "*** Can not use snapshot (sorry) ***\n");
1197 		return;
1198 	}
1199 
1200 	local_irq_save(flags);
1201 	update_max_tr(tr, current, smp_processor_id(), cond_data);
1202 	local_irq_restore(flags);
1203 }
1204 
1205 void tracing_snapshot_instance(struct trace_array *tr)
1206 {
1207 	tracing_snapshot_instance_cond(tr, NULL);
1208 }
1209 
1210 /**
1211  * tracing_snapshot - take a snapshot of the current buffer.
1212  *
1213  * This causes a swap between the snapshot buffer and the current live
1214  * tracing buffer. You can use this to take snapshots of the live
1215  * trace when some condition is triggered, but continue to trace.
1216  *
1217  * Note, make sure to allocate the snapshot with either
1218  * a tracing_snapshot_alloc(), or by doing it manually
1219  * with: echo 1 > /sys/kernel/tracing/snapshot
1220  *
1221  * If the snapshot buffer is not allocated, it will stop tracing.
1222  * Basically making a permanent snapshot.
1223  */
1224 void tracing_snapshot(void)
1225 {
1226 	struct trace_array *tr = &global_trace;
1227 
1228 	tracing_snapshot_instance(tr);
1229 }
1230 EXPORT_SYMBOL_GPL(tracing_snapshot);
1231 
1232 /**
1233  * tracing_snapshot_cond - conditionally take a snapshot of the current buffer.
1234  * @tr:		The tracing instance to snapshot
1235  * @cond_data:	The data to be tested conditionally, and possibly saved
1236  *
1237  * This is the same as tracing_snapshot() except that the snapshot is
1238  * conditional - the snapshot will only happen if the
1239  * cond_snapshot.update() implementation receiving the cond_data
1240  * returns true, which means that the trace array's cond_snapshot
1241  * update() operation used the cond_data to determine whether the
1242  * snapshot should be taken, and if it was, presumably saved it along
1243  * with the snapshot.
1244  */
1245 void tracing_snapshot_cond(struct trace_array *tr, void *cond_data)
1246 {
1247 	tracing_snapshot_instance_cond(tr, cond_data);
1248 }
1249 EXPORT_SYMBOL_GPL(tracing_snapshot_cond);
1250 
1251 /**
1252  * tracing_cond_snapshot_data - get the user data associated with a snapshot
1253  * @tr:		The tracing instance
1254  *
1255  * When the user enables a conditional snapshot using
1256  * tracing_snapshot_cond_enable(), the user-defined cond_data is saved
1257  * with the snapshot.  This accessor is used to retrieve it.
1258  *
1259  * Should not be called from cond_snapshot.update(), since it takes
1260  * the tr->max_lock lock, which the code calling
1261  * cond_snapshot.update() has already done.
1262  *
1263  * Returns the cond_data associated with the trace array's snapshot.
1264  */
1265 void *tracing_cond_snapshot_data(struct trace_array *tr)
1266 {
1267 	void *cond_data = NULL;
1268 
1269 	local_irq_disable();
1270 	arch_spin_lock(&tr->max_lock);
1271 
1272 	if (tr->cond_snapshot)
1273 		cond_data = tr->cond_snapshot->cond_data;
1274 
1275 	arch_spin_unlock(&tr->max_lock);
1276 	local_irq_enable();
1277 
1278 	return cond_data;
1279 }
1280 EXPORT_SYMBOL_GPL(tracing_cond_snapshot_data);
1281 
1282 static int resize_buffer_duplicate_size(struct array_buffer *trace_buf,
1283 					struct array_buffer *size_buf, int cpu_id);
1284 static void set_buffer_entries(struct array_buffer *buf, unsigned long val);
1285 
1286 int tracing_alloc_snapshot_instance(struct trace_array *tr)
1287 {
1288 	int order;
1289 	int ret;
1290 
1291 	if (!tr->allocated_snapshot) {
1292 
1293 		/* Make the snapshot buffer have the same order as main buffer */
1294 		order = ring_buffer_subbuf_order_get(tr->array_buffer.buffer);
1295 		ret = ring_buffer_subbuf_order_set(tr->max_buffer.buffer, order);
1296 		if (ret < 0)
1297 			return ret;
1298 
1299 		/* allocate spare buffer */
1300 		ret = resize_buffer_duplicate_size(&tr->max_buffer,
1301 				   &tr->array_buffer, RING_BUFFER_ALL_CPUS);
1302 		if (ret < 0)
1303 			return ret;
1304 
1305 		tr->allocated_snapshot = true;
1306 	}
1307 
1308 	return 0;
1309 }
1310 
1311 static void free_snapshot(struct trace_array *tr)
1312 {
1313 	/*
1314 	 * We don't free the ring buffer. instead, resize it because
1315 	 * The max_tr ring buffer has some state (e.g. ring->clock) and
1316 	 * we want preserve it.
1317 	 */
1318 	ring_buffer_subbuf_order_set(tr->max_buffer.buffer, 0);
1319 	ring_buffer_resize(tr->max_buffer.buffer, 1, RING_BUFFER_ALL_CPUS);
1320 	set_buffer_entries(&tr->max_buffer, 1);
1321 	tracing_reset_online_cpus(&tr->max_buffer);
1322 	tr->allocated_snapshot = false;
1323 }
1324 
1325 static int tracing_arm_snapshot_locked(struct trace_array *tr)
1326 {
1327 	int ret;
1328 
1329 	lockdep_assert_held(&trace_types_lock);
1330 
1331 	spin_lock(&tr->snapshot_trigger_lock);
1332 	if (tr->snapshot == UINT_MAX || tr->mapped) {
1333 		spin_unlock(&tr->snapshot_trigger_lock);
1334 		return -EBUSY;
1335 	}
1336 
1337 	tr->snapshot++;
1338 	spin_unlock(&tr->snapshot_trigger_lock);
1339 
1340 	ret = tracing_alloc_snapshot_instance(tr);
1341 	if (ret) {
1342 		spin_lock(&tr->snapshot_trigger_lock);
1343 		tr->snapshot--;
1344 		spin_unlock(&tr->snapshot_trigger_lock);
1345 	}
1346 
1347 	return ret;
1348 }
1349 
1350 int tracing_arm_snapshot(struct trace_array *tr)
1351 {
1352 	int ret;
1353 
1354 	mutex_lock(&trace_types_lock);
1355 	ret = tracing_arm_snapshot_locked(tr);
1356 	mutex_unlock(&trace_types_lock);
1357 
1358 	return ret;
1359 }
1360 
1361 void tracing_disarm_snapshot(struct trace_array *tr)
1362 {
1363 	spin_lock(&tr->snapshot_trigger_lock);
1364 	if (!WARN_ON(!tr->snapshot))
1365 		tr->snapshot--;
1366 	spin_unlock(&tr->snapshot_trigger_lock);
1367 }
1368 
1369 /**
1370  * tracing_alloc_snapshot - allocate snapshot buffer.
1371  *
1372  * This only allocates the snapshot buffer if it isn't already
1373  * allocated - it doesn't also take a snapshot.
1374  *
1375  * This is meant to be used in cases where the snapshot buffer needs
1376  * to be set up for events that can't sleep but need to be able to
1377  * trigger a snapshot.
1378  */
1379 int tracing_alloc_snapshot(void)
1380 {
1381 	struct trace_array *tr = &global_trace;
1382 	int ret;
1383 
1384 	ret = tracing_alloc_snapshot_instance(tr);
1385 	WARN_ON(ret < 0);
1386 
1387 	return ret;
1388 }
1389 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
1390 
1391 /**
1392  * tracing_snapshot_alloc - allocate and take a snapshot of the current buffer.
1393  *
1394  * This is similar to tracing_snapshot(), but it will allocate the
1395  * snapshot buffer if it isn't already allocated. Use this only
1396  * where it is safe to sleep, as the allocation may sleep.
1397  *
1398  * This causes a swap between the snapshot buffer and the current live
1399  * tracing buffer. You can use this to take snapshots of the live
1400  * trace when some condition is triggered, but continue to trace.
1401  */
1402 void tracing_snapshot_alloc(void)
1403 {
1404 	int ret;
1405 
1406 	ret = tracing_alloc_snapshot();
1407 	if (ret < 0)
1408 		return;
1409 
1410 	tracing_snapshot();
1411 }
1412 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
1413 
1414 /**
1415  * tracing_snapshot_cond_enable - enable conditional snapshot for an instance
1416  * @tr:		The tracing instance
1417  * @cond_data:	User data to associate with the snapshot
1418  * @update:	Implementation of the cond_snapshot update function
1419  *
1420  * Check whether the conditional snapshot for the given instance has
1421  * already been enabled, or if the current tracer is already using a
1422  * snapshot; if so, return -EBUSY, else create a cond_snapshot and
1423  * save the cond_data and update function inside.
1424  *
1425  * Returns 0 if successful, error otherwise.
1426  */
1427 int tracing_snapshot_cond_enable(struct trace_array *tr, void *cond_data,
1428 				 cond_update_fn_t update)
1429 {
1430 	struct cond_snapshot *cond_snapshot;
1431 	int ret = 0;
1432 
1433 	cond_snapshot = kzalloc(sizeof(*cond_snapshot), GFP_KERNEL);
1434 	if (!cond_snapshot)
1435 		return -ENOMEM;
1436 
1437 	cond_snapshot->cond_data = cond_data;
1438 	cond_snapshot->update = update;
1439 
1440 	mutex_lock(&trace_types_lock);
1441 
1442 	if (tr->current_trace->use_max_tr) {
1443 		ret = -EBUSY;
1444 		goto fail_unlock;
1445 	}
1446 
1447 	/*
1448 	 * The cond_snapshot can only change to NULL without the
1449 	 * trace_types_lock. We don't care if we race with it going
1450 	 * to NULL, but we want to make sure that it's not set to
1451 	 * something other than NULL when we get here, which we can
1452 	 * do safely with only holding the trace_types_lock and not
1453 	 * having to take the max_lock.
1454 	 */
1455 	if (tr->cond_snapshot) {
1456 		ret = -EBUSY;
1457 		goto fail_unlock;
1458 	}
1459 
1460 	ret = tracing_arm_snapshot_locked(tr);
1461 	if (ret)
1462 		goto fail_unlock;
1463 
1464 	local_irq_disable();
1465 	arch_spin_lock(&tr->max_lock);
1466 	tr->cond_snapshot = cond_snapshot;
1467 	arch_spin_unlock(&tr->max_lock);
1468 	local_irq_enable();
1469 
1470 	mutex_unlock(&trace_types_lock);
1471 
1472 	return ret;
1473 
1474  fail_unlock:
1475 	mutex_unlock(&trace_types_lock);
1476 	kfree(cond_snapshot);
1477 	return ret;
1478 }
1479 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_enable);
1480 
1481 /**
1482  * tracing_snapshot_cond_disable - disable conditional snapshot for an instance
1483  * @tr:		The tracing instance
1484  *
1485  * Check whether the conditional snapshot for the given instance is
1486  * enabled; if so, free the cond_snapshot associated with it,
1487  * otherwise return -EINVAL.
1488  *
1489  * Returns 0 if successful, error otherwise.
1490  */
1491 int tracing_snapshot_cond_disable(struct trace_array *tr)
1492 {
1493 	int ret = 0;
1494 
1495 	local_irq_disable();
1496 	arch_spin_lock(&tr->max_lock);
1497 
1498 	if (!tr->cond_snapshot)
1499 		ret = -EINVAL;
1500 	else {
1501 		kfree(tr->cond_snapshot);
1502 		tr->cond_snapshot = NULL;
1503 	}
1504 
1505 	arch_spin_unlock(&tr->max_lock);
1506 	local_irq_enable();
1507 
1508 	tracing_disarm_snapshot(tr);
1509 
1510 	return ret;
1511 }
1512 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_disable);
1513 #else
1514 void tracing_snapshot(void)
1515 {
1516 	WARN_ONCE(1, "Snapshot feature not enabled, but internal snapshot used");
1517 }
1518 EXPORT_SYMBOL_GPL(tracing_snapshot);
1519 void tracing_snapshot_cond(struct trace_array *tr, void *cond_data)
1520 {
1521 	WARN_ONCE(1, "Snapshot feature not enabled, but internal conditional snapshot used");
1522 }
1523 EXPORT_SYMBOL_GPL(tracing_snapshot_cond);
1524 int tracing_alloc_snapshot(void)
1525 {
1526 	WARN_ONCE(1, "Snapshot feature not enabled, but snapshot allocation used");
1527 	return -ENODEV;
1528 }
1529 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
1530 void tracing_snapshot_alloc(void)
1531 {
1532 	/* Give warning */
1533 	tracing_snapshot();
1534 }
1535 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
1536 void *tracing_cond_snapshot_data(struct trace_array *tr)
1537 {
1538 	return NULL;
1539 }
1540 EXPORT_SYMBOL_GPL(tracing_cond_snapshot_data);
1541 int tracing_snapshot_cond_enable(struct trace_array *tr, void *cond_data, cond_update_fn_t update)
1542 {
1543 	return -ENODEV;
1544 }
1545 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_enable);
1546 int tracing_snapshot_cond_disable(struct trace_array *tr)
1547 {
1548 	return false;
1549 }
1550 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_disable);
1551 #define free_snapshot(tr)	do { } while (0)
1552 #define tracing_arm_snapshot_locked(tr) ({ -EBUSY; })
1553 #endif /* CONFIG_TRACER_SNAPSHOT */
1554 
1555 void tracer_tracing_off(struct trace_array *tr)
1556 {
1557 	if (tr->array_buffer.buffer)
1558 		ring_buffer_record_off(tr->array_buffer.buffer);
1559 	/*
1560 	 * This flag is looked at when buffers haven't been allocated
1561 	 * yet, or by some tracers (like irqsoff), that just want to
1562 	 * know if the ring buffer has been disabled, but it can handle
1563 	 * races of where it gets disabled but we still do a record.
1564 	 * As the check is in the fast path of the tracers, it is more
1565 	 * important to be fast than accurate.
1566 	 */
1567 	tr->buffer_disabled = 1;
1568 	/* Make the flag seen by readers */
1569 	smp_wmb();
1570 }
1571 
1572 /**
1573  * tracing_off - turn off tracing buffers
1574  *
1575  * This function stops the tracing buffers from recording data.
1576  * It does not disable any overhead the tracers themselves may
1577  * be causing. This function simply causes all recording to
1578  * the ring buffers to fail.
1579  */
1580 void tracing_off(void)
1581 {
1582 	tracer_tracing_off(&global_trace);
1583 }
1584 EXPORT_SYMBOL_GPL(tracing_off);
1585 
1586 void disable_trace_on_warning(void)
1587 {
1588 	if (__disable_trace_on_warning) {
1589 		trace_array_printk_buf(global_trace.array_buffer.buffer, _THIS_IP_,
1590 			"Disabling tracing due to warning\n");
1591 		tracing_off();
1592 	}
1593 }
1594 
1595 /**
1596  * tracer_tracing_is_on - show real state of ring buffer enabled
1597  * @tr : the trace array to know if ring buffer is enabled
1598  *
1599  * Shows real state of the ring buffer if it is enabled or not.
1600  */
1601 bool tracer_tracing_is_on(struct trace_array *tr)
1602 {
1603 	if (tr->array_buffer.buffer)
1604 		return ring_buffer_record_is_set_on(tr->array_buffer.buffer);
1605 	return !tr->buffer_disabled;
1606 }
1607 
1608 /**
1609  * tracing_is_on - show state of ring buffers enabled
1610  */
1611 int tracing_is_on(void)
1612 {
1613 	return tracer_tracing_is_on(&global_trace);
1614 }
1615 EXPORT_SYMBOL_GPL(tracing_is_on);
1616 
1617 static int __init set_buf_size(char *str)
1618 {
1619 	unsigned long buf_size;
1620 
1621 	if (!str)
1622 		return 0;
1623 	buf_size = memparse(str, &str);
1624 	/*
1625 	 * nr_entries can not be zero and the startup
1626 	 * tests require some buffer space. Therefore
1627 	 * ensure we have at least 4096 bytes of buffer.
1628 	 */
1629 	trace_buf_size = max(4096UL, buf_size);
1630 	return 1;
1631 }
1632 __setup("trace_buf_size=", set_buf_size);
1633 
1634 static int __init set_tracing_thresh(char *str)
1635 {
1636 	unsigned long threshold;
1637 	int ret;
1638 
1639 	if (!str)
1640 		return 0;
1641 	ret = kstrtoul(str, 0, &threshold);
1642 	if (ret < 0)
1643 		return 0;
1644 	tracing_thresh = threshold * 1000;
1645 	return 1;
1646 }
1647 __setup("tracing_thresh=", set_tracing_thresh);
1648 
1649 unsigned long nsecs_to_usecs(unsigned long nsecs)
1650 {
1651 	return nsecs / 1000;
1652 }
1653 
1654 /*
1655  * TRACE_FLAGS is defined as a tuple matching bit masks with strings.
1656  * It uses C(a, b) where 'a' is the eval (enum) name and 'b' is the string that
1657  * matches it. By defining "C(a, b) b", TRACE_FLAGS becomes a list
1658  * of strings in the order that the evals (enum) were defined.
1659  */
1660 #undef C
1661 #define C(a, b) b
1662 
1663 /* These must match the bit positions in trace_iterator_flags */
1664 static const char *trace_options[] = {
1665 	TRACE_FLAGS
1666 	NULL
1667 };
1668 
1669 static struct {
1670 	u64 (*func)(void);
1671 	const char *name;
1672 	int in_ns;		/* is this clock in nanoseconds? */
1673 } trace_clocks[] = {
1674 	{ trace_clock_local,		"local",	1 },
1675 	{ trace_clock_global,		"global",	1 },
1676 	{ trace_clock_counter,		"counter",	0 },
1677 	{ trace_clock_jiffies,		"uptime",	0 },
1678 	{ trace_clock,			"perf",		1 },
1679 	{ ktime_get_mono_fast_ns,	"mono",		1 },
1680 	{ ktime_get_raw_fast_ns,	"mono_raw",	1 },
1681 	{ ktime_get_boot_fast_ns,	"boot",		1 },
1682 	{ ktime_get_tai_fast_ns,	"tai",		1 },
1683 	ARCH_TRACE_CLOCKS
1684 };
1685 
1686 bool trace_clock_in_ns(struct trace_array *tr)
1687 {
1688 	if (trace_clocks[tr->clock_id].in_ns)
1689 		return true;
1690 
1691 	return false;
1692 }
1693 
1694 /*
1695  * trace_parser_get_init - gets the buffer for trace parser
1696  */
1697 int trace_parser_get_init(struct trace_parser *parser, int size)
1698 {
1699 	memset(parser, 0, sizeof(*parser));
1700 
1701 	parser->buffer = kmalloc(size, GFP_KERNEL);
1702 	if (!parser->buffer)
1703 		return 1;
1704 
1705 	parser->size = size;
1706 	return 0;
1707 }
1708 
1709 /*
1710  * trace_parser_put - frees the buffer for trace parser
1711  */
1712 void trace_parser_put(struct trace_parser *parser)
1713 {
1714 	kfree(parser->buffer);
1715 	parser->buffer = NULL;
1716 }
1717 
1718 /*
1719  * trace_get_user - reads the user input string separated by  space
1720  * (matched by isspace(ch))
1721  *
1722  * For each string found the 'struct trace_parser' is updated,
1723  * and the function returns.
1724  *
1725  * Returns number of bytes read.
1726  *
1727  * See kernel/trace/trace.h for 'struct trace_parser' details.
1728  */
1729 int trace_get_user(struct trace_parser *parser, const char __user *ubuf,
1730 	size_t cnt, loff_t *ppos)
1731 {
1732 	char ch;
1733 	size_t read = 0;
1734 	ssize_t ret;
1735 
1736 	if (!*ppos)
1737 		trace_parser_clear(parser);
1738 
1739 	ret = get_user(ch, ubuf++);
1740 	if (ret)
1741 		goto out;
1742 
1743 	read++;
1744 	cnt--;
1745 
1746 	/*
1747 	 * The parser is not finished with the last write,
1748 	 * continue reading the user input without skipping spaces.
1749 	 */
1750 	if (!parser->cont) {
1751 		/* skip white space */
1752 		while (cnt && isspace(ch)) {
1753 			ret = get_user(ch, ubuf++);
1754 			if (ret)
1755 				goto out;
1756 			read++;
1757 			cnt--;
1758 		}
1759 
1760 		parser->idx = 0;
1761 
1762 		/* only spaces were written */
1763 		if (isspace(ch) || !ch) {
1764 			*ppos += read;
1765 			ret = read;
1766 			goto out;
1767 		}
1768 	}
1769 
1770 	/* read the non-space input */
1771 	while (cnt && !isspace(ch) && ch) {
1772 		if (parser->idx < parser->size - 1)
1773 			parser->buffer[parser->idx++] = ch;
1774 		else {
1775 			ret = -EINVAL;
1776 			goto out;
1777 		}
1778 		ret = get_user(ch, ubuf++);
1779 		if (ret)
1780 			goto out;
1781 		read++;
1782 		cnt--;
1783 	}
1784 
1785 	/* We either got finished input or we have to wait for another call. */
1786 	if (isspace(ch) || !ch) {
1787 		parser->buffer[parser->idx] = 0;
1788 		parser->cont = false;
1789 	} else if (parser->idx < parser->size - 1) {
1790 		parser->cont = true;
1791 		parser->buffer[parser->idx++] = ch;
1792 		/* Make sure the parsed string always terminates with '\0'. */
1793 		parser->buffer[parser->idx] = 0;
1794 	} else {
1795 		ret = -EINVAL;
1796 		goto out;
1797 	}
1798 
1799 	*ppos += read;
1800 	ret = read;
1801 
1802 out:
1803 	return ret;
1804 }
1805 
1806 /* TODO add a seq_buf_to_buffer() */
1807 static ssize_t trace_seq_to_buffer(struct trace_seq *s, void *buf, size_t cnt)
1808 {
1809 	int len;
1810 
1811 	if (trace_seq_used(s) <= s->readpos)
1812 		return -EBUSY;
1813 
1814 	len = trace_seq_used(s) - s->readpos;
1815 	if (cnt > len)
1816 		cnt = len;
1817 	memcpy(buf, s->buffer + s->readpos, cnt);
1818 
1819 	s->readpos += cnt;
1820 	return cnt;
1821 }
1822 
1823 unsigned long __read_mostly	tracing_thresh;
1824 
1825 #ifdef CONFIG_TRACER_MAX_TRACE
1826 static const struct file_operations tracing_max_lat_fops;
1827 
1828 #ifdef LATENCY_FS_NOTIFY
1829 
1830 static struct workqueue_struct *fsnotify_wq;
1831 
1832 static void latency_fsnotify_workfn(struct work_struct *work)
1833 {
1834 	struct trace_array *tr = container_of(work, struct trace_array,
1835 					      fsnotify_work);
1836 	fsnotify_inode(tr->d_max_latency->d_inode, FS_MODIFY);
1837 }
1838 
1839 static void latency_fsnotify_workfn_irq(struct irq_work *iwork)
1840 {
1841 	struct trace_array *tr = container_of(iwork, struct trace_array,
1842 					      fsnotify_irqwork);
1843 	queue_work(fsnotify_wq, &tr->fsnotify_work);
1844 }
1845 
1846 static void trace_create_maxlat_file(struct trace_array *tr,
1847 				     struct dentry *d_tracer)
1848 {
1849 	INIT_WORK(&tr->fsnotify_work, latency_fsnotify_workfn);
1850 	init_irq_work(&tr->fsnotify_irqwork, latency_fsnotify_workfn_irq);
1851 	tr->d_max_latency = trace_create_file("tracing_max_latency",
1852 					      TRACE_MODE_WRITE,
1853 					      d_tracer, tr,
1854 					      &tracing_max_lat_fops);
1855 }
1856 
1857 __init static int latency_fsnotify_init(void)
1858 {
1859 	fsnotify_wq = alloc_workqueue("tr_max_lat_wq",
1860 				      WQ_UNBOUND | WQ_HIGHPRI, 0);
1861 	if (!fsnotify_wq) {
1862 		pr_err("Unable to allocate tr_max_lat_wq\n");
1863 		return -ENOMEM;
1864 	}
1865 	return 0;
1866 }
1867 
1868 late_initcall_sync(latency_fsnotify_init);
1869 
1870 void latency_fsnotify(struct trace_array *tr)
1871 {
1872 	if (!fsnotify_wq)
1873 		return;
1874 	/*
1875 	 * We cannot call queue_work(&tr->fsnotify_work) from here because it's
1876 	 * possible that we are called from __schedule() or do_idle(), which
1877 	 * could cause a deadlock.
1878 	 */
1879 	irq_work_queue(&tr->fsnotify_irqwork);
1880 }
1881 
1882 #else /* !LATENCY_FS_NOTIFY */
1883 
1884 #define trace_create_maxlat_file(tr, d_tracer)				\
1885 	trace_create_file("tracing_max_latency", TRACE_MODE_WRITE,	\
1886 			  d_tracer, tr, &tracing_max_lat_fops)
1887 
1888 #endif
1889 
1890 /*
1891  * Copy the new maximum trace into the separate maximum-trace
1892  * structure. (this way the maximum trace is permanently saved,
1893  * for later retrieval via /sys/kernel/tracing/tracing_max_latency)
1894  */
1895 static void
1896 __update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
1897 {
1898 	struct array_buffer *trace_buf = &tr->array_buffer;
1899 	struct array_buffer *max_buf = &tr->max_buffer;
1900 	struct trace_array_cpu *data = per_cpu_ptr(trace_buf->data, cpu);
1901 	struct trace_array_cpu *max_data = per_cpu_ptr(max_buf->data, cpu);
1902 
1903 	max_buf->cpu = cpu;
1904 	max_buf->time_start = data->preempt_timestamp;
1905 
1906 	max_data->saved_latency = tr->max_latency;
1907 	max_data->critical_start = data->critical_start;
1908 	max_data->critical_end = data->critical_end;
1909 
1910 	strncpy(max_data->comm, tsk->comm, TASK_COMM_LEN);
1911 	max_data->pid = tsk->pid;
1912 	/*
1913 	 * If tsk == current, then use current_uid(), as that does not use
1914 	 * RCU. The irq tracer can be called out of RCU scope.
1915 	 */
1916 	if (tsk == current)
1917 		max_data->uid = current_uid();
1918 	else
1919 		max_data->uid = task_uid(tsk);
1920 
1921 	max_data->nice = tsk->static_prio - 20 - MAX_RT_PRIO;
1922 	max_data->policy = tsk->policy;
1923 	max_data->rt_priority = tsk->rt_priority;
1924 
1925 	/* record this tasks comm */
1926 	tracing_record_cmdline(tsk);
1927 	latency_fsnotify(tr);
1928 }
1929 
1930 /**
1931  * update_max_tr - snapshot all trace buffers from global_trace to max_tr
1932  * @tr: tracer
1933  * @tsk: the task with the latency
1934  * @cpu: The cpu that initiated the trace.
1935  * @cond_data: User data associated with a conditional snapshot
1936  *
1937  * Flip the buffers between the @tr and the max_tr and record information
1938  * about which task was the cause of this latency.
1939  */
1940 void
1941 update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu,
1942 	      void *cond_data)
1943 {
1944 	if (tr->stop_count)
1945 		return;
1946 
1947 	WARN_ON_ONCE(!irqs_disabled());
1948 
1949 	if (!tr->allocated_snapshot) {
1950 		/* Only the nop tracer should hit this when disabling */
1951 		WARN_ON_ONCE(tr->current_trace != &nop_trace);
1952 		return;
1953 	}
1954 
1955 	arch_spin_lock(&tr->max_lock);
1956 
1957 	/* Inherit the recordable setting from array_buffer */
1958 	if (ring_buffer_record_is_set_on(tr->array_buffer.buffer))
1959 		ring_buffer_record_on(tr->max_buffer.buffer);
1960 	else
1961 		ring_buffer_record_off(tr->max_buffer.buffer);
1962 
1963 #ifdef CONFIG_TRACER_SNAPSHOT
1964 	if (tr->cond_snapshot && !tr->cond_snapshot->update(tr, cond_data)) {
1965 		arch_spin_unlock(&tr->max_lock);
1966 		return;
1967 	}
1968 #endif
1969 	swap(tr->array_buffer.buffer, tr->max_buffer.buffer);
1970 
1971 	__update_max_tr(tr, tsk, cpu);
1972 
1973 	arch_spin_unlock(&tr->max_lock);
1974 
1975 	/* Any waiters on the old snapshot buffer need to wake up */
1976 	ring_buffer_wake_waiters(tr->array_buffer.buffer, RING_BUFFER_ALL_CPUS);
1977 }
1978 
1979 /**
1980  * update_max_tr_single - only copy one trace over, and reset the rest
1981  * @tr: tracer
1982  * @tsk: task with the latency
1983  * @cpu: the cpu of the buffer to copy.
1984  *
1985  * Flip the trace of a single CPU buffer between the @tr and the max_tr.
1986  */
1987 void
1988 update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu)
1989 {
1990 	int ret;
1991 
1992 	if (tr->stop_count)
1993 		return;
1994 
1995 	WARN_ON_ONCE(!irqs_disabled());
1996 	if (!tr->allocated_snapshot) {
1997 		/* Only the nop tracer should hit this when disabling */
1998 		WARN_ON_ONCE(tr->current_trace != &nop_trace);
1999 		return;
2000 	}
2001 
2002 	arch_spin_lock(&tr->max_lock);
2003 
2004 	ret = ring_buffer_swap_cpu(tr->max_buffer.buffer, tr->array_buffer.buffer, cpu);
2005 
2006 	if (ret == -EBUSY) {
2007 		/*
2008 		 * We failed to swap the buffer due to a commit taking
2009 		 * place on this CPU. We fail to record, but we reset
2010 		 * the max trace buffer (no one writes directly to it)
2011 		 * and flag that it failed.
2012 		 * Another reason is resize is in progress.
2013 		 */
2014 		trace_array_printk_buf(tr->max_buffer.buffer, _THIS_IP_,
2015 			"Failed to swap buffers due to commit or resize in progress\n");
2016 	}
2017 
2018 	WARN_ON_ONCE(ret && ret != -EAGAIN && ret != -EBUSY);
2019 
2020 	__update_max_tr(tr, tsk, cpu);
2021 	arch_spin_unlock(&tr->max_lock);
2022 }
2023 
2024 #endif /* CONFIG_TRACER_MAX_TRACE */
2025 
2026 struct pipe_wait {
2027 	struct trace_iterator		*iter;
2028 	int				wait_index;
2029 };
2030 
2031 static bool wait_pipe_cond(void *data)
2032 {
2033 	struct pipe_wait *pwait = data;
2034 	struct trace_iterator *iter = pwait->iter;
2035 
2036 	if (atomic_read_acquire(&iter->wait_index) != pwait->wait_index)
2037 		return true;
2038 
2039 	return iter->closed;
2040 }
2041 
2042 static int wait_on_pipe(struct trace_iterator *iter, int full)
2043 {
2044 	struct pipe_wait pwait;
2045 	int ret;
2046 
2047 	/* Iterators are static, they should be filled or empty */
2048 	if (trace_buffer_iter(iter, iter->cpu_file))
2049 		return 0;
2050 
2051 	pwait.wait_index = atomic_read_acquire(&iter->wait_index);
2052 	pwait.iter = iter;
2053 
2054 	ret = ring_buffer_wait(iter->array_buffer->buffer, iter->cpu_file, full,
2055 			       wait_pipe_cond, &pwait);
2056 
2057 #ifdef CONFIG_TRACER_MAX_TRACE
2058 	/*
2059 	 * Make sure this is still the snapshot buffer, as if a snapshot were
2060 	 * to happen, this would now be the main buffer.
2061 	 */
2062 	if (iter->snapshot)
2063 		iter->array_buffer = &iter->tr->max_buffer;
2064 #endif
2065 	return ret;
2066 }
2067 
2068 #ifdef CONFIG_FTRACE_STARTUP_TEST
2069 static bool selftests_can_run;
2070 
2071 struct trace_selftests {
2072 	struct list_head		list;
2073 	struct tracer			*type;
2074 };
2075 
2076 static LIST_HEAD(postponed_selftests);
2077 
2078 static int save_selftest(struct tracer *type)
2079 {
2080 	struct trace_selftests *selftest;
2081 
2082 	selftest = kmalloc(sizeof(*selftest), GFP_KERNEL);
2083 	if (!selftest)
2084 		return -ENOMEM;
2085 
2086 	selftest->type = type;
2087 	list_add(&selftest->list, &postponed_selftests);
2088 	return 0;
2089 }
2090 
2091 static int run_tracer_selftest(struct tracer *type)
2092 {
2093 	struct trace_array *tr = &global_trace;
2094 	struct tracer *saved_tracer = tr->current_trace;
2095 	int ret;
2096 
2097 	if (!type->selftest || tracing_selftest_disabled)
2098 		return 0;
2099 
2100 	/*
2101 	 * If a tracer registers early in boot up (before scheduling is
2102 	 * initialized and such), then do not run its selftests yet.
2103 	 * Instead, run it a little later in the boot process.
2104 	 */
2105 	if (!selftests_can_run)
2106 		return save_selftest(type);
2107 
2108 	if (!tracing_is_on()) {
2109 		pr_warn("Selftest for tracer %s skipped due to tracing disabled\n",
2110 			type->name);
2111 		return 0;
2112 	}
2113 
2114 	/*
2115 	 * Run a selftest on this tracer.
2116 	 * Here we reset the trace buffer, and set the current
2117 	 * tracer to be this tracer. The tracer can then run some
2118 	 * internal tracing to verify that everything is in order.
2119 	 * If we fail, we do not register this tracer.
2120 	 */
2121 	tracing_reset_online_cpus(&tr->array_buffer);
2122 
2123 	tr->current_trace = type;
2124 
2125 #ifdef CONFIG_TRACER_MAX_TRACE
2126 	if (type->use_max_tr) {
2127 		/* If we expanded the buffers, make sure the max is expanded too */
2128 		if (tr->ring_buffer_expanded)
2129 			ring_buffer_resize(tr->max_buffer.buffer, trace_buf_size,
2130 					   RING_BUFFER_ALL_CPUS);
2131 		tr->allocated_snapshot = true;
2132 	}
2133 #endif
2134 
2135 	/* the test is responsible for initializing and enabling */
2136 	pr_info("Testing tracer %s: ", type->name);
2137 	ret = type->selftest(type, tr);
2138 	/* the test is responsible for resetting too */
2139 	tr->current_trace = saved_tracer;
2140 	if (ret) {
2141 		printk(KERN_CONT "FAILED!\n");
2142 		/* Add the warning after printing 'FAILED' */
2143 		WARN_ON(1);
2144 		return -1;
2145 	}
2146 	/* Only reset on passing, to avoid touching corrupted buffers */
2147 	tracing_reset_online_cpus(&tr->array_buffer);
2148 
2149 #ifdef CONFIG_TRACER_MAX_TRACE
2150 	if (type->use_max_tr) {
2151 		tr->allocated_snapshot = false;
2152 
2153 		/* Shrink the max buffer again */
2154 		if (tr->ring_buffer_expanded)
2155 			ring_buffer_resize(tr->max_buffer.buffer, 1,
2156 					   RING_BUFFER_ALL_CPUS);
2157 	}
2158 #endif
2159 
2160 	printk(KERN_CONT "PASSED\n");
2161 	return 0;
2162 }
2163 
2164 static int do_run_tracer_selftest(struct tracer *type)
2165 {
2166 	int ret;
2167 
2168 	/*
2169 	 * Tests can take a long time, especially if they are run one after the
2170 	 * other, as does happen during bootup when all the tracers are
2171 	 * registered. This could cause the soft lockup watchdog to trigger.
2172 	 */
2173 	cond_resched();
2174 
2175 	tracing_selftest_running = true;
2176 	ret = run_tracer_selftest(type);
2177 	tracing_selftest_running = false;
2178 
2179 	return ret;
2180 }
2181 
2182 static __init int init_trace_selftests(void)
2183 {
2184 	struct trace_selftests *p, *n;
2185 	struct tracer *t, **last;
2186 	int ret;
2187 
2188 	selftests_can_run = true;
2189 
2190 	mutex_lock(&trace_types_lock);
2191 
2192 	if (list_empty(&postponed_selftests))
2193 		goto out;
2194 
2195 	pr_info("Running postponed tracer tests:\n");
2196 
2197 	tracing_selftest_running = true;
2198 	list_for_each_entry_safe(p, n, &postponed_selftests, list) {
2199 		/* This loop can take minutes when sanitizers are enabled, so
2200 		 * lets make sure we allow RCU processing.
2201 		 */
2202 		cond_resched();
2203 		ret = run_tracer_selftest(p->type);
2204 		/* If the test fails, then warn and remove from available_tracers */
2205 		if (ret < 0) {
2206 			WARN(1, "tracer: %s failed selftest, disabling\n",
2207 			     p->type->name);
2208 			last = &trace_types;
2209 			for (t = trace_types; t; t = t->next) {
2210 				if (t == p->type) {
2211 					*last = t->next;
2212 					break;
2213 				}
2214 				last = &t->next;
2215 			}
2216 		}
2217 		list_del(&p->list);
2218 		kfree(p);
2219 	}
2220 	tracing_selftest_running = false;
2221 
2222  out:
2223 	mutex_unlock(&trace_types_lock);
2224 
2225 	return 0;
2226 }
2227 core_initcall(init_trace_selftests);
2228 #else
2229 static inline int run_tracer_selftest(struct tracer *type)
2230 {
2231 	return 0;
2232 }
2233 static inline int do_run_tracer_selftest(struct tracer *type)
2234 {
2235 	return 0;
2236 }
2237 #endif /* CONFIG_FTRACE_STARTUP_TEST */
2238 
2239 static void add_tracer_options(struct trace_array *tr, struct tracer *t);
2240 
2241 static void __init apply_trace_boot_options(void);
2242 
2243 /**
2244  * register_tracer - register a tracer with the ftrace system.
2245  * @type: the plugin for the tracer
2246  *
2247  * Register a new plugin tracer.
2248  */
2249 int __init register_tracer(struct tracer *type)
2250 {
2251 	struct tracer *t;
2252 	int ret = 0;
2253 
2254 	if (!type->name) {
2255 		pr_info("Tracer must have a name\n");
2256 		return -1;
2257 	}
2258 
2259 	if (strlen(type->name) >= MAX_TRACER_SIZE) {
2260 		pr_info("Tracer has a name longer than %d\n", MAX_TRACER_SIZE);
2261 		return -1;
2262 	}
2263 
2264 	if (security_locked_down(LOCKDOWN_TRACEFS)) {
2265 		pr_warn("Can not register tracer %s due to lockdown\n",
2266 			   type->name);
2267 		return -EPERM;
2268 	}
2269 
2270 	mutex_lock(&trace_types_lock);
2271 
2272 	for (t = trace_types; t; t = t->next) {
2273 		if (strcmp(type->name, t->name) == 0) {
2274 			/* already found */
2275 			pr_info("Tracer %s already registered\n",
2276 				type->name);
2277 			ret = -1;
2278 			goto out;
2279 		}
2280 	}
2281 
2282 	if (!type->set_flag)
2283 		type->set_flag = &dummy_set_flag;
2284 	if (!type->flags) {
2285 		/*allocate a dummy tracer_flags*/
2286 		type->flags = kmalloc(sizeof(*type->flags), GFP_KERNEL);
2287 		if (!type->flags) {
2288 			ret = -ENOMEM;
2289 			goto out;
2290 		}
2291 		type->flags->val = 0;
2292 		type->flags->opts = dummy_tracer_opt;
2293 	} else
2294 		if (!type->flags->opts)
2295 			type->flags->opts = dummy_tracer_opt;
2296 
2297 	/* store the tracer for __set_tracer_option */
2298 	type->flags->trace = type;
2299 
2300 	ret = do_run_tracer_selftest(type);
2301 	if (ret < 0)
2302 		goto out;
2303 
2304 	type->next = trace_types;
2305 	trace_types = type;
2306 	add_tracer_options(&global_trace, type);
2307 
2308  out:
2309 	mutex_unlock(&trace_types_lock);
2310 
2311 	if (ret || !default_bootup_tracer)
2312 		goto out_unlock;
2313 
2314 	if (strncmp(default_bootup_tracer, type->name, MAX_TRACER_SIZE))
2315 		goto out_unlock;
2316 
2317 	printk(KERN_INFO "Starting tracer '%s'\n", type->name);
2318 	/* Do we want this tracer to start on bootup? */
2319 	tracing_set_tracer(&global_trace, type->name);
2320 	default_bootup_tracer = NULL;
2321 
2322 	apply_trace_boot_options();
2323 
2324 	/* disable other selftests, since this will break it. */
2325 	disable_tracing_selftest("running a tracer");
2326 
2327  out_unlock:
2328 	return ret;
2329 }
2330 
2331 static void tracing_reset_cpu(struct array_buffer *buf, int cpu)
2332 {
2333 	struct trace_buffer *buffer = buf->buffer;
2334 
2335 	if (!buffer)
2336 		return;
2337 
2338 	ring_buffer_record_disable(buffer);
2339 
2340 	/* Make sure all commits have finished */
2341 	synchronize_rcu();
2342 	ring_buffer_reset_cpu(buffer, cpu);
2343 
2344 	ring_buffer_record_enable(buffer);
2345 }
2346 
2347 void tracing_reset_online_cpus(struct array_buffer *buf)
2348 {
2349 	struct trace_buffer *buffer = buf->buffer;
2350 
2351 	if (!buffer)
2352 		return;
2353 
2354 	ring_buffer_record_disable(buffer);
2355 
2356 	/* Make sure all commits have finished */
2357 	synchronize_rcu();
2358 
2359 	buf->time_start = buffer_ftrace_now(buf, buf->cpu);
2360 
2361 	ring_buffer_reset_online_cpus(buffer);
2362 
2363 	ring_buffer_record_enable(buffer);
2364 }
2365 
2366 /* Must have trace_types_lock held */
2367 void tracing_reset_all_online_cpus_unlocked(void)
2368 {
2369 	struct trace_array *tr;
2370 
2371 	lockdep_assert_held(&trace_types_lock);
2372 
2373 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
2374 		if (!tr->clear_trace)
2375 			continue;
2376 		tr->clear_trace = false;
2377 		tracing_reset_online_cpus(&tr->array_buffer);
2378 #ifdef CONFIG_TRACER_MAX_TRACE
2379 		tracing_reset_online_cpus(&tr->max_buffer);
2380 #endif
2381 	}
2382 }
2383 
2384 void tracing_reset_all_online_cpus(void)
2385 {
2386 	mutex_lock(&trace_types_lock);
2387 	tracing_reset_all_online_cpus_unlocked();
2388 	mutex_unlock(&trace_types_lock);
2389 }
2390 
2391 int is_tracing_stopped(void)
2392 {
2393 	return global_trace.stop_count;
2394 }
2395 
2396 static void tracing_start_tr(struct trace_array *tr)
2397 {
2398 	struct trace_buffer *buffer;
2399 	unsigned long flags;
2400 
2401 	if (tracing_disabled)
2402 		return;
2403 
2404 	raw_spin_lock_irqsave(&tr->start_lock, flags);
2405 	if (--tr->stop_count) {
2406 		if (WARN_ON_ONCE(tr->stop_count < 0)) {
2407 			/* Someone screwed up their debugging */
2408 			tr->stop_count = 0;
2409 		}
2410 		goto out;
2411 	}
2412 
2413 	/* Prevent the buffers from switching */
2414 	arch_spin_lock(&tr->max_lock);
2415 
2416 	buffer = tr->array_buffer.buffer;
2417 	if (buffer)
2418 		ring_buffer_record_enable(buffer);
2419 
2420 #ifdef CONFIG_TRACER_MAX_TRACE
2421 	buffer = tr->max_buffer.buffer;
2422 	if (buffer)
2423 		ring_buffer_record_enable(buffer);
2424 #endif
2425 
2426 	arch_spin_unlock(&tr->max_lock);
2427 
2428  out:
2429 	raw_spin_unlock_irqrestore(&tr->start_lock, flags);
2430 }
2431 
2432 /**
2433  * tracing_start - quick start of the tracer
2434  *
2435  * If tracing is enabled but was stopped by tracing_stop,
2436  * this will start the tracer back up.
2437  */
2438 void tracing_start(void)
2439 
2440 {
2441 	return tracing_start_tr(&global_trace);
2442 }
2443 
2444 static void tracing_stop_tr(struct trace_array *tr)
2445 {
2446 	struct trace_buffer *buffer;
2447 	unsigned long flags;
2448 
2449 	raw_spin_lock_irqsave(&tr->start_lock, flags);
2450 	if (tr->stop_count++)
2451 		goto out;
2452 
2453 	/* Prevent the buffers from switching */
2454 	arch_spin_lock(&tr->max_lock);
2455 
2456 	buffer = tr->array_buffer.buffer;
2457 	if (buffer)
2458 		ring_buffer_record_disable(buffer);
2459 
2460 #ifdef CONFIG_TRACER_MAX_TRACE
2461 	buffer = tr->max_buffer.buffer;
2462 	if (buffer)
2463 		ring_buffer_record_disable(buffer);
2464 #endif
2465 
2466 	arch_spin_unlock(&tr->max_lock);
2467 
2468  out:
2469 	raw_spin_unlock_irqrestore(&tr->start_lock, flags);
2470 }
2471 
2472 /**
2473  * tracing_stop - quick stop of the tracer
2474  *
2475  * Light weight way to stop tracing. Use in conjunction with
2476  * tracing_start.
2477  */
2478 void tracing_stop(void)
2479 {
2480 	return tracing_stop_tr(&global_trace);
2481 }
2482 
2483 /*
2484  * Several functions return TRACE_TYPE_PARTIAL_LINE if the trace_seq
2485  * overflowed, and TRACE_TYPE_HANDLED otherwise. This helper function
2486  * simplifies those functions and keeps them in sync.
2487  */
2488 enum print_line_t trace_handle_return(struct trace_seq *s)
2489 {
2490 	return trace_seq_has_overflowed(s) ?
2491 		TRACE_TYPE_PARTIAL_LINE : TRACE_TYPE_HANDLED;
2492 }
2493 EXPORT_SYMBOL_GPL(trace_handle_return);
2494 
2495 static unsigned short migration_disable_value(void)
2496 {
2497 #if defined(CONFIG_SMP)
2498 	return current->migration_disabled;
2499 #else
2500 	return 0;
2501 #endif
2502 }
2503 
2504 unsigned int tracing_gen_ctx_irq_test(unsigned int irqs_status)
2505 {
2506 	unsigned int trace_flags = irqs_status;
2507 	unsigned int pc;
2508 
2509 	pc = preempt_count();
2510 
2511 	if (pc & NMI_MASK)
2512 		trace_flags |= TRACE_FLAG_NMI;
2513 	if (pc & HARDIRQ_MASK)
2514 		trace_flags |= TRACE_FLAG_HARDIRQ;
2515 	if (in_serving_softirq())
2516 		trace_flags |= TRACE_FLAG_SOFTIRQ;
2517 	if (softirq_count() >> (SOFTIRQ_SHIFT + 1))
2518 		trace_flags |= TRACE_FLAG_BH_OFF;
2519 
2520 	if (tif_need_resched())
2521 		trace_flags |= TRACE_FLAG_NEED_RESCHED;
2522 	if (test_preempt_need_resched())
2523 		trace_flags |= TRACE_FLAG_PREEMPT_RESCHED;
2524 	return (trace_flags << 16) | (min_t(unsigned int, pc & 0xff, 0xf)) |
2525 		(min_t(unsigned int, migration_disable_value(), 0xf)) << 4;
2526 }
2527 
2528 struct ring_buffer_event *
2529 trace_buffer_lock_reserve(struct trace_buffer *buffer,
2530 			  int type,
2531 			  unsigned long len,
2532 			  unsigned int trace_ctx)
2533 {
2534 	return __trace_buffer_lock_reserve(buffer, type, len, trace_ctx);
2535 }
2536 
2537 DEFINE_PER_CPU(struct ring_buffer_event *, trace_buffered_event);
2538 DEFINE_PER_CPU(int, trace_buffered_event_cnt);
2539 static int trace_buffered_event_ref;
2540 
2541 /**
2542  * trace_buffered_event_enable - enable buffering events
2543  *
2544  * When events are being filtered, it is quicker to use a temporary
2545  * buffer to write the event data into if there's a likely chance
2546  * that it will not be committed. The discard of the ring buffer
2547  * is not as fast as committing, and is much slower than copying
2548  * a commit.
2549  *
2550  * When an event is to be filtered, allocate per cpu buffers to
2551  * write the event data into, and if the event is filtered and discarded
2552  * it is simply dropped, otherwise, the entire data is to be committed
2553  * in one shot.
2554  */
2555 void trace_buffered_event_enable(void)
2556 {
2557 	struct ring_buffer_event *event;
2558 	struct page *page;
2559 	int cpu;
2560 
2561 	WARN_ON_ONCE(!mutex_is_locked(&event_mutex));
2562 
2563 	if (trace_buffered_event_ref++)
2564 		return;
2565 
2566 	for_each_tracing_cpu(cpu) {
2567 		page = alloc_pages_node(cpu_to_node(cpu),
2568 					GFP_KERNEL | __GFP_NORETRY, 0);
2569 		/* This is just an optimization and can handle failures */
2570 		if (!page) {
2571 			pr_err("Failed to allocate event buffer\n");
2572 			break;
2573 		}
2574 
2575 		event = page_address(page);
2576 		memset(event, 0, sizeof(*event));
2577 
2578 		per_cpu(trace_buffered_event, cpu) = event;
2579 
2580 		preempt_disable();
2581 		if (cpu == smp_processor_id() &&
2582 		    __this_cpu_read(trace_buffered_event) !=
2583 		    per_cpu(trace_buffered_event, cpu))
2584 			WARN_ON_ONCE(1);
2585 		preempt_enable();
2586 	}
2587 }
2588 
2589 static void enable_trace_buffered_event(void *data)
2590 {
2591 	/* Probably not needed, but do it anyway */
2592 	smp_rmb();
2593 	this_cpu_dec(trace_buffered_event_cnt);
2594 }
2595 
2596 static void disable_trace_buffered_event(void *data)
2597 {
2598 	this_cpu_inc(trace_buffered_event_cnt);
2599 }
2600 
2601 /**
2602  * trace_buffered_event_disable - disable buffering events
2603  *
2604  * When a filter is removed, it is faster to not use the buffered
2605  * events, and to commit directly into the ring buffer. Free up
2606  * the temp buffers when there are no more users. This requires
2607  * special synchronization with current events.
2608  */
2609 void trace_buffered_event_disable(void)
2610 {
2611 	int cpu;
2612 
2613 	WARN_ON_ONCE(!mutex_is_locked(&event_mutex));
2614 
2615 	if (WARN_ON_ONCE(!trace_buffered_event_ref))
2616 		return;
2617 
2618 	if (--trace_buffered_event_ref)
2619 		return;
2620 
2621 	/* For each CPU, set the buffer as used. */
2622 	on_each_cpu_mask(tracing_buffer_mask, disable_trace_buffered_event,
2623 			 NULL, true);
2624 
2625 	/* Wait for all current users to finish */
2626 	synchronize_rcu();
2627 
2628 	for_each_tracing_cpu(cpu) {
2629 		free_page((unsigned long)per_cpu(trace_buffered_event, cpu));
2630 		per_cpu(trace_buffered_event, cpu) = NULL;
2631 	}
2632 
2633 	/*
2634 	 * Wait for all CPUs that potentially started checking if they can use
2635 	 * their event buffer only after the previous synchronize_rcu() call and
2636 	 * they still read a valid pointer from trace_buffered_event. It must be
2637 	 * ensured they don't see cleared trace_buffered_event_cnt else they
2638 	 * could wrongly decide to use the pointed-to buffer which is now freed.
2639 	 */
2640 	synchronize_rcu();
2641 
2642 	/* For each CPU, relinquish the buffer */
2643 	on_each_cpu_mask(tracing_buffer_mask, enable_trace_buffered_event, NULL,
2644 			 true);
2645 }
2646 
2647 static struct trace_buffer *temp_buffer;
2648 
2649 struct ring_buffer_event *
2650 trace_event_buffer_lock_reserve(struct trace_buffer **current_rb,
2651 			  struct trace_event_file *trace_file,
2652 			  int type, unsigned long len,
2653 			  unsigned int trace_ctx)
2654 {
2655 	struct ring_buffer_event *entry;
2656 	struct trace_array *tr = trace_file->tr;
2657 	int val;
2658 
2659 	*current_rb = tr->array_buffer.buffer;
2660 
2661 	if (!tr->no_filter_buffering_ref &&
2662 	    (trace_file->flags & (EVENT_FILE_FL_SOFT_DISABLED | EVENT_FILE_FL_FILTERED))) {
2663 		preempt_disable_notrace();
2664 		/*
2665 		 * Filtering is on, so try to use the per cpu buffer first.
2666 		 * This buffer will simulate a ring_buffer_event,
2667 		 * where the type_len is zero and the array[0] will
2668 		 * hold the full length.
2669 		 * (see include/linux/ring-buffer.h for details on
2670 		 *  how the ring_buffer_event is structured).
2671 		 *
2672 		 * Using a temp buffer during filtering and copying it
2673 		 * on a matched filter is quicker than writing directly
2674 		 * into the ring buffer and then discarding it when
2675 		 * it doesn't match. That is because the discard
2676 		 * requires several atomic operations to get right.
2677 		 * Copying on match and doing nothing on a failed match
2678 		 * is still quicker than no copy on match, but having
2679 		 * to discard out of the ring buffer on a failed match.
2680 		 */
2681 		if ((entry = __this_cpu_read(trace_buffered_event))) {
2682 			int max_len = PAGE_SIZE - struct_size(entry, array, 1);
2683 
2684 			val = this_cpu_inc_return(trace_buffered_event_cnt);
2685 
2686 			/*
2687 			 * Preemption is disabled, but interrupts and NMIs
2688 			 * can still come in now. If that happens after
2689 			 * the above increment, then it will have to go
2690 			 * back to the old method of allocating the event
2691 			 * on the ring buffer, and if the filter fails, it
2692 			 * will have to call ring_buffer_discard_commit()
2693 			 * to remove it.
2694 			 *
2695 			 * Need to also check the unlikely case that the
2696 			 * length is bigger than the temp buffer size.
2697 			 * If that happens, then the reserve is pretty much
2698 			 * guaranteed to fail, as the ring buffer currently
2699 			 * only allows events less than a page. But that may
2700 			 * change in the future, so let the ring buffer reserve
2701 			 * handle the failure in that case.
2702 			 */
2703 			if (val == 1 && likely(len <= max_len)) {
2704 				trace_event_setup(entry, type, trace_ctx);
2705 				entry->array[0] = len;
2706 				/* Return with preemption disabled */
2707 				return entry;
2708 			}
2709 			this_cpu_dec(trace_buffered_event_cnt);
2710 		}
2711 		/* __trace_buffer_lock_reserve() disables preemption */
2712 		preempt_enable_notrace();
2713 	}
2714 
2715 	entry = __trace_buffer_lock_reserve(*current_rb, type, len,
2716 					    trace_ctx);
2717 	/*
2718 	 * If tracing is off, but we have triggers enabled
2719 	 * we still need to look at the event data. Use the temp_buffer
2720 	 * to store the trace event for the trigger to use. It's recursive
2721 	 * safe and will not be recorded anywhere.
2722 	 */
2723 	if (!entry && trace_file->flags & EVENT_FILE_FL_TRIGGER_COND) {
2724 		*current_rb = temp_buffer;
2725 		entry = __trace_buffer_lock_reserve(*current_rb, type, len,
2726 						    trace_ctx);
2727 	}
2728 	return entry;
2729 }
2730 EXPORT_SYMBOL_GPL(trace_event_buffer_lock_reserve);
2731 
2732 static DEFINE_RAW_SPINLOCK(tracepoint_iter_lock);
2733 static DEFINE_MUTEX(tracepoint_printk_mutex);
2734 
2735 static void output_printk(struct trace_event_buffer *fbuffer)
2736 {
2737 	struct trace_event_call *event_call;
2738 	struct trace_event_file *file;
2739 	struct trace_event *event;
2740 	unsigned long flags;
2741 	struct trace_iterator *iter = tracepoint_print_iter;
2742 
2743 	/* We should never get here if iter is NULL */
2744 	if (WARN_ON_ONCE(!iter))
2745 		return;
2746 
2747 	event_call = fbuffer->trace_file->event_call;
2748 	if (!event_call || !event_call->event.funcs ||
2749 	    !event_call->event.funcs->trace)
2750 		return;
2751 
2752 	file = fbuffer->trace_file;
2753 	if (test_bit(EVENT_FILE_FL_SOFT_DISABLED_BIT, &file->flags) ||
2754 	    (unlikely(file->flags & EVENT_FILE_FL_FILTERED) &&
2755 	     !filter_match_preds(file->filter, fbuffer->entry)))
2756 		return;
2757 
2758 	event = &fbuffer->trace_file->event_call->event;
2759 
2760 	raw_spin_lock_irqsave(&tracepoint_iter_lock, flags);
2761 	trace_seq_init(&iter->seq);
2762 	iter->ent = fbuffer->entry;
2763 	event_call->event.funcs->trace(iter, 0, event);
2764 	trace_seq_putc(&iter->seq, 0);
2765 	printk("%s", iter->seq.buffer);
2766 
2767 	raw_spin_unlock_irqrestore(&tracepoint_iter_lock, flags);
2768 }
2769 
2770 int tracepoint_printk_sysctl(const struct ctl_table *table, int write,
2771 			     void *buffer, size_t *lenp,
2772 			     loff_t *ppos)
2773 {
2774 	int save_tracepoint_printk;
2775 	int ret;
2776 
2777 	mutex_lock(&tracepoint_printk_mutex);
2778 	save_tracepoint_printk = tracepoint_printk;
2779 
2780 	ret = proc_dointvec(table, write, buffer, lenp, ppos);
2781 
2782 	/*
2783 	 * This will force exiting early, as tracepoint_printk
2784 	 * is always zero when tracepoint_printk_iter is not allocated
2785 	 */
2786 	if (!tracepoint_print_iter)
2787 		tracepoint_printk = 0;
2788 
2789 	if (save_tracepoint_printk == tracepoint_printk)
2790 		goto out;
2791 
2792 	if (tracepoint_printk)
2793 		static_key_enable(&tracepoint_printk_key.key);
2794 	else
2795 		static_key_disable(&tracepoint_printk_key.key);
2796 
2797  out:
2798 	mutex_unlock(&tracepoint_printk_mutex);
2799 
2800 	return ret;
2801 }
2802 
2803 void trace_event_buffer_commit(struct trace_event_buffer *fbuffer)
2804 {
2805 	enum event_trigger_type tt = ETT_NONE;
2806 	struct trace_event_file *file = fbuffer->trace_file;
2807 
2808 	if (__event_trigger_test_discard(file, fbuffer->buffer, fbuffer->event,
2809 			fbuffer->entry, &tt))
2810 		goto discard;
2811 
2812 	if (static_key_false(&tracepoint_printk_key.key))
2813 		output_printk(fbuffer);
2814 
2815 	if (static_branch_unlikely(&trace_event_exports_enabled))
2816 		ftrace_exports(fbuffer->event, TRACE_EXPORT_EVENT);
2817 
2818 	trace_buffer_unlock_commit_regs(file->tr, fbuffer->buffer,
2819 			fbuffer->event, fbuffer->trace_ctx, fbuffer->regs);
2820 
2821 discard:
2822 	if (tt)
2823 		event_triggers_post_call(file, tt);
2824 
2825 }
2826 EXPORT_SYMBOL_GPL(trace_event_buffer_commit);
2827 
2828 /*
2829  * Skip 3:
2830  *
2831  *   trace_buffer_unlock_commit_regs()
2832  *   trace_event_buffer_commit()
2833  *   trace_event_raw_event_xxx()
2834  */
2835 # define STACK_SKIP 3
2836 
2837 void trace_buffer_unlock_commit_regs(struct trace_array *tr,
2838 				     struct trace_buffer *buffer,
2839 				     struct ring_buffer_event *event,
2840 				     unsigned int trace_ctx,
2841 				     struct pt_regs *regs)
2842 {
2843 	__buffer_unlock_commit(buffer, event);
2844 
2845 	/*
2846 	 * If regs is not set, then skip the necessary functions.
2847 	 * Note, we can still get here via blktrace, wakeup tracer
2848 	 * and mmiotrace, but that's ok if they lose a function or
2849 	 * two. They are not that meaningful.
2850 	 */
2851 	ftrace_trace_stack(tr, buffer, trace_ctx, regs ? 0 : STACK_SKIP, regs);
2852 	ftrace_trace_userstack(tr, buffer, trace_ctx);
2853 }
2854 
2855 /*
2856  * Similar to trace_buffer_unlock_commit_regs() but do not dump stack.
2857  */
2858 void
2859 trace_buffer_unlock_commit_nostack(struct trace_buffer *buffer,
2860 				   struct ring_buffer_event *event)
2861 {
2862 	__buffer_unlock_commit(buffer, event);
2863 }
2864 
2865 void
2866 trace_function(struct trace_array *tr, unsigned long ip, unsigned long
2867 	       parent_ip, unsigned int trace_ctx)
2868 {
2869 	struct trace_event_call *call = &event_function;
2870 	struct trace_buffer *buffer = tr->array_buffer.buffer;
2871 	struct ring_buffer_event *event;
2872 	struct ftrace_entry *entry;
2873 
2874 	event = __trace_buffer_lock_reserve(buffer, TRACE_FN, sizeof(*entry),
2875 					    trace_ctx);
2876 	if (!event)
2877 		return;
2878 	entry	= ring_buffer_event_data(event);
2879 	entry->ip			= ip;
2880 	entry->parent_ip		= parent_ip;
2881 
2882 	if (!call_filter_check_discard(call, entry, buffer, event)) {
2883 		if (static_branch_unlikely(&trace_function_exports_enabled))
2884 			ftrace_exports(event, TRACE_EXPORT_FUNCTION);
2885 		__buffer_unlock_commit(buffer, event);
2886 	}
2887 }
2888 
2889 #ifdef CONFIG_STACKTRACE
2890 
2891 /* Allow 4 levels of nesting: normal, softirq, irq, NMI */
2892 #define FTRACE_KSTACK_NESTING	4
2893 
2894 #define FTRACE_KSTACK_ENTRIES	(PAGE_SIZE / FTRACE_KSTACK_NESTING)
2895 
2896 struct ftrace_stack {
2897 	unsigned long		calls[FTRACE_KSTACK_ENTRIES];
2898 };
2899 
2900 
2901 struct ftrace_stacks {
2902 	struct ftrace_stack	stacks[FTRACE_KSTACK_NESTING];
2903 };
2904 
2905 static DEFINE_PER_CPU(struct ftrace_stacks, ftrace_stacks);
2906 static DEFINE_PER_CPU(int, ftrace_stack_reserve);
2907 
2908 static void __ftrace_trace_stack(struct trace_buffer *buffer,
2909 				 unsigned int trace_ctx,
2910 				 int skip, struct pt_regs *regs)
2911 {
2912 	struct trace_event_call *call = &event_kernel_stack;
2913 	struct ring_buffer_event *event;
2914 	unsigned int size, nr_entries;
2915 	struct ftrace_stack *fstack;
2916 	struct stack_entry *entry;
2917 	int stackidx;
2918 
2919 	/*
2920 	 * Add one, for this function and the call to save_stack_trace()
2921 	 * If regs is set, then these functions will not be in the way.
2922 	 */
2923 #ifndef CONFIG_UNWINDER_ORC
2924 	if (!regs)
2925 		skip++;
2926 #endif
2927 
2928 	preempt_disable_notrace();
2929 
2930 	stackidx = __this_cpu_inc_return(ftrace_stack_reserve) - 1;
2931 
2932 	/* This should never happen. If it does, yell once and skip */
2933 	if (WARN_ON_ONCE(stackidx >= FTRACE_KSTACK_NESTING))
2934 		goto out;
2935 
2936 	/*
2937 	 * The above __this_cpu_inc_return() is 'atomic' cpu local. An
2938 	 * interrupt will either see the value pre increment or post
2939 	 * increment. If the interrupt happens pre increment it will have
2940 	 * restored the counter when it returns.  We just need a barrier to
2941 	 * keep gcc from moving things around.
2942 	 */
2943 	barrier();
2944 
2945 	fstack = this_cpu_ptr(ftrace_stacks.stacks) + stackidx;
2946 	size = ARRAY_SIZE(fstack->calls);
2947 
2948 	if (regs) {
2949 		nr_entries = stack_trace_save_regs(regs, fstack->calls,
2950 						   size, skip);
2951 	} else {
2952 		nr_entries = stack_trace_save(fstack->calls, size, skip);
2953 	}
2954 
2955 	event = __trace_buffer_lock_reserve(buffer, TRACE_STACK,
2956 				    struct_size(entry, caller, nr_entries),
2957 				    trace_ctx);
2958 	if (!event)
2959 		goto out;
2960 	entry = ring_buffer_event_data(event);
2961 
2962 	entry->size = nr_entries;
2963 	memcpy(&entry->caller, fstack->calls,
2964 	       flex_array_size(entry, caller, nr_entries));
2965 
2966 	if (!call_filter_check_discard(call, entry, buffer, event))
2967 		__buffer_unlock_commit(buffer, event);
2968 
2969  out:
2970 	/* Again, don't let gcc optimize things here */
2971 	barrier();
2972 	__this_cpu_dec(ftrace_stack_reserve);
2973 	preempt_enable_notrace();
2974 
2975 }
2976 
2977 static inline void ftrace_trace_stack(struct trace_array *tr,
2978 				      struct trace_buffer *buffer,
2979 				      unsigned int trace_ctx,
2980 				      int skip, struct pt_regs *regs)
2981 {
2982 	if (!(tr->trace_flags & TRACE_ITER_STACKTRACE))
2983 		return;
2984 
2985 	__ftrace_trace_stack(buffer, trace_ctx, skip, regs);
2986 }
2987 
2988 void __trace_stack(struct trace_array *tr, unsigned int trace_ctx,
2989 		   int skip)
2990 {
2991 	struct trace_buffer *buffer = tr->array_buffer.buffer;
2992 
2993 	if (rcu_is_watching()) {
2994 		__ftrace_trace_stack(buffer, trace_ctx, skip, NULL);
2995 		return;
2996 	}
2997 
2998 	if (WARN_ON_ONCE(IS_ENABLED(CONFIG_GENERIC_ENTRY)))
2999 		return;
3000 
3001 	/*
3002 	 * When an NMI triggers, RCU is enabled via ct_nmi_enter(),
3003 	 * but if the above rcu_is_watching() failed, then the NMI
3004 	 * triggered someplace critical, and ct_irq_enter() should
3005 	 * not be called from NMI.
3006 	 */
3007 	if (unlikely(in_nmi()))
3008 		return;
3009 
3010 	ct_irq_enter_irqson();
3011 	__ftrace_trace_stack(buffer, trace_ctx, skip, NULL);
3012 	ct_irq_exit_irqson();
3013 }
3014 
3015 /**
3016  * trace_dump_stack - record a stack back trace in the trace buffer
3017  * @skip: Number of functions to skip (helper handlers)
3018  */
3019 void trace_dump_stack(int skip)
3020 {
3021 	if (tracing_disabled || tracing_selftest_running)
3022 		return;
3023 
3024 #ifndef CONFIG_UNWINDER_ORC
3025 	/* Skip 1 to skip this function. */
3026 	skip++;
3027 #endif
3028 	__ftrace_trace_stack(global_trace.array_buffer.buffer,
3029 			     tracing_gen_ctx(), skip, NULL);
3030 }
3031 EXPORT_SYMBOL_GPL(trace_dump_stack);
3032 
3033 #ifdef CONFIG_USER_STACKTRACE_SUPPORT
3034 static DEFINE_PER_CPU(int, user_stack_count);
3035 
3036 static void
3037 ftrace_trace_userstack(struct trace_array *tr,
3038 		       struct trace_buffer *buffer, unsigned int trace_ctx)
3039 {
3040 	struct trace_event_call *call = &event_user_stack;
3041 	struct ring_buffer_event *event;
3042 	struct userstack_entry *entry;
3043 
3044 	if (!(tr->trace_flags & TRACE_ITER_USERSTACKTRACE))
3045 		return;
3046 
3047 	/*
3048 	 * NMIs can not handle page faults, even with fix ups.
3049 	 * The save user stack can (and often does) fault.
3050 	 */
3051 	if (unlikely(in_nmi()))
3052 		return;
3053 
3054 	/*
3055 	 * prevent recursion, since the user stack tracing may
3056 	 * trigger other kernel events.
3057 	 */
3058 	preempt_disable();
3059 	if (__this_cpu_read(user_stack_count))
3060 		goto out;
3061 
3062 	__this_cpu_inc(user_stack_count);
3063 
3064 	event = __trace_buffer_lock_reserve(buffer, TRACE_USER_STACK,
3065 					    sizeof(*entry), trace_ctx);
3066 	if (!event)
3067 		goto out_drop_count;
3068 	entry	= ring_buffer_event_data(event);
3069 
3070 	entry->tgid		= current->tgid;
3071 	memset(&entry->caller, 0, sizeof(entry->caller));
3072 
3073 	stack_trace_save_user(entry->caller, FTRACE_STACK_ENTRIES);
3074 	if (!call_filter_check_discard(call, entry, buffer, event))
3075 		__buffer_unlock_commit(buffer, event);
3076 
3077  out_drop_count:
3078 	__this_cpu_dec(user_stack_count);
3079  out:
3080 	preempt_enable();
3081 }
3082 #else /* CONFIG_USER_STACKTRACE_SUPPORT */
3083 static void ftrace_trace_userstack(struct trace_array *tr,
3084 				   struct trace_buffer *buffer,
3085 				   unsigned int trace_ctx)
3086 {
3087 }
3088 #endif /* !CONFIG_USER_STACKTRACE_SUPPORT */
3089 
3090 #endif /* CONFIG_STACKTRACE */
3091 
3092 static inline void
3093 func_repeats_set_delta_ts(struct func_repeats_entry *entry,
3094 			  unsigned long long delta)
3095 {
3096 	entry->bottom_delta_ts = delta & U32_MAX;
3097 	entry->top_delta_ts = (delta >> 32);
3098 }
3099 
3100 void trace_last_func_repeats(struct trace_array *tr,
3101 			     struct trace_func_repeats *last_info,
3102 			     unsigned int trace_ctx)
3103 {
3104 	struct trace_buffer *buffer = tr->array_buffer.buffer;
3105 	struct func_repeats_entry *entry;
3106 	struct ring_buffer_event *event;
3107 	u64 delta;
3108 
3109 	event = __trace_buffer_lock_reserve(buffer, TRACE_FUNC_REPEATS,
3110 					    sizeof(*entry), trace_ctx);
3111 	if (!event)
3112 		return;
3113 
3114 	delta = ring_buffer_event_time_stamp(buffer, event) -
3115 		last_info->ts_last_call;
3116 
3117 	entry = ring_buffer_event_data(event);
3118 	entry->ip = last_info->ip;
3119 	entry->parent_ip = last_info->parent_ip;
3120 	entry->count = last_info->count;
3121 	func_repeats_set_delta_ts(entry, delta);
3122 
3123 	__buffer_unlock_commit(buffer, event);
3124 }
3125 
3126 /* created for use with alloc_percpu */
3127 struct trace_buffer_struct {
3128 	int nesting;
3129 	char buffer[4][TRACE_BUF_SIZE];
3130 };
3131 
3132 static struct trace_buffer_struct __percpu *trace_percpu_buffer;
3133 
3134 /*
3135  * This allows for lockless recording.  If we're nested too deeply, then
3136  * this returns NULL.
3137  */
3138 static char *get_trace_buf(void)
3139 {
3140 	struct trace_buffer_struct *buffer = this_cpu_ptr(trace_percpu_buffer);
3141 
3142 	if (!trace_percpu_buffer || buffer->nesting >= 4)
3143 		return NULL;
3144 
3145 	buffer->nesting++;
3146 
3147 	/* Interrupts must see nesting incremented before we use the buffer */
3148 	barrier();
3149 	return &buffer->buffer[buffer->nesting - 1][0];
3150 }
3151 
3152 static void put_trace_buf(void)
3153 {
3154 	/* Don't let the decrement of nesting leak before this */
3155 	barrier();
3156 	this_cpu_dec(trace_percpu_buffer->nesting);
3157 }
3158 
3159 static int alloc_percpu_trace_buffer(void)
3160 {
3161 	struct trace_buffer_struct __percpu *buffers;
3162 
3163 	if (trace_percpu_buffer)
3164 		return 0;
3165 
3166 	buffers = alloc_percpu(struct trace_buffer_struct);
3167 	if (MEM_FAIL(!buffers, "Could not allocate percpu trace_printk buffer"))
3168 		return -ENOMEM;
3169 
3170 	trace_percpu_buffer = buffers;
3171 	return 0;
3172 }
3173 
3174 static int buffers_allocated;
3175 
3176 void trace_printk_init_buffers(void)
3177 {
3178 	if (buffers_allocated)
3179 		return;
3180 
3181 	if (alloc_percpu_trace_buffer())
3182 		return;
3183 
3184 	/* trace_printk() is for debug use only. Don't use it in production. */
3185 
3186 	pr_warn("\n");
3187 	pr_warn("**********************************************************\n");
3188 	pr_warn("**   NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE   **\n");
3189 	pr_warn("**                                                      **\n");
3190 	pr_warn("** trace_printk() being used. Allocating extra memory.  **\n");
3191 	pr_warn("**                                                      **\n");
3192 	pr_warn("** This means that this is a DEBUG kernel and it is     **\n");
3193 	pr_warn("** unsafe for production use.                           **\n");
3194 	pr_warn("**                                                      **\n");
3195 	pr_warn("** If you see this message and you are not debugging    **\n");
3196 	pr_warn("** the kernel, report this immediately to your vendor!  **\n");
3197 	pr_warn("**                                                      **\n");
3198 	pr_warn("**   NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE   **\n");
3199 	pr_warn("**********************************************************\n");
3200 
3201 	/* Expand the buffers to set size */
3202 	tracing_update_buffers(&global_trace);
3203 
3204 	buffers_allocated = 1;
3205 
3206 	/*
3207 	 * trace_printk_init_buffers() can be called by modules.
3208 	 * If that happens, then we need to start cmdline recording
3209 	 * directly here. If the global_trace.buffer is already
3210 	 * allocated here, then this was called by module code.
3211 	 */
3212 	if (global_trace.array_buffer.buffer)
3213 		tracing_start_cmdline_record();
3214 }
3215 EXPORT_SYMBOL_GPL(trace_printk_init_buffers);
3216 
3217 void trace_printk_start_comm(void)
3218 {
3219 	/* Start tracing comms if trace printk is set */
3220 	if (!buffers_allocated)
3221 		return;
3222 	tracing_start_cmdline_record();
3223 }
3224 
3225 static void trace_printk_start_stop_comm(int enabled)
3226 {
3227 	if (!buffers_allocated)
3228 		return;
3229 
3230 	if (enabled)
3231 		tracing_start_cmdline_record();
3232 	else
3233 		tracing_stop_cmdline_record();
3234 }
3235 
3236 /**
3237  * trace_vbprintk - write binary msg to tracing buffer
3238  * @ip:    The address of the caller
3239  * @fmt:   The string format to write to the buffer
3240  * @args:  Arguments for @fmt
3241  */
3242 int trace_vbprintk(unsigned long ip, const char *fmt, va_list args)
3243 {
3244 	struct trace_event_call *call = &event_bprint;
3245 	struct ring_buffer_event *event;
3246 	struct trace_buffer *buffer;
3247 	struct trace_array *tr = &global_trace;
3248 	struct bprint_entry *entry;
3249 	unsigned int trace_ctx;
3250 	char *tbuffer;
3251 	int len = 0, size;
3252 
3253 	if (unlikely(tracing_selftest_running || tracing_disabled))
3254 		return 0;
3255 
3256 	/* Don't pollute graph traces with trace_vprintk internals */
3257 	pause_graph_tracing();
3258 
3259 	trace_ctx = tracing_gen_ctx();
3260 	preempt_disable_notrace();
3261 
3262 	tbuffer = get_trace_buf();
3263 	if (!tbuffer) {
3264 		len = 0;
3265 		goto out_nobuffer;
3266 	}
3267 
3268 	len = vbin_printf((u32 *)tbuffer, TRACE_BUF_SIZE/sizeof(int), fmt, args);
3269 
3270 	if (len > TRACE_BUF_SIZE/sizeof(int) || len < 0)
3271 		goto out_put;
3272 
3273 	size = sizeof(*entry) + sizeof(u32) * len;
3274 	buffer = tr->array_buffer.buffer;
3275 	ring_buffer_nest_start(buffer);
3276 	event = __trace_buffer_lock_reserve(buffer, TRACE_BPRINT, size,
3277 					    trace_ctx);
3278 	if (!event)
3279 		goto out;
3280 	entry = ring_buffer_event_data(event);
3281 	entry->ip			= ip;
3282 	entry->fmt			= fmt;
3283 
3284 	memcpy(entry->buf, tbuffer, sizeof(u32) * len);
3285 	if (!call_filter_check_discard(call, entry, buffer, event)) {
3286 		__buffer_unlock_commit(buffer, event);
3287 		ftrace_trace_stack(tr, buffer, trace_ctx, 6, NULL);
3288 	}
3289 
3290 out:
3291 	ring_buffer_nest_end(buffer);
3292 out_put:
3293 	put_trace_buf();
3294 
3295 out_nobuffer:
3296 	preempt_enable_notrace();
3297 	unpause_graph_tracing();
3298 
3299 	return len;
3300 }
3301 EXPORT_SYMBOL_GPL(trace_vbprintk);
3302 
3303 __printf(3, 0)
3304 static int
3305 __trace_array_vprintk(struct trace_buffer *buffer,
3306 		      unsigned long ip, const char *fmt, va_list args)
3307 {
3308 	struct trace_event_call *call = &event_print;
3309 	struct ring_buffer_event *event;
3310 	int len = 0, size;
3311 	struct print_entry *entry;
3312 	unsigned int trace_ctx;
3313 	char *tbuffer;
3314 
3315 	if (tracing_disabled)
3316 		return 0;
3317 
3318 	/* Don't pollute graph traces with trace_vprintk internals */
3319 	pause_graph_tracing();
3320 
3321 	trace_ctx = tracing_gen_ctx();
3322 	preempt_disable_notrace();
3323 
3324 
3325 	tbuffer = get_trace_buf();
3326 	if (!tbuffer) {
3327 		len = 0;
3328 		goto out_nobuffer;
3329 	}
3330 
3331 	len = vscnprintf(tbuffer, TRACE_BUF_SIZE, fmt, args);
3332 
3333 	size = sizeof(*entry) + len + 1;
3334 	ring_buffer_nest_start(buffer);
3335 	event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
3336 					    trace_ctx);
3337 	if (!event)
3338 		goto out;
3339 	entry = ring_buffer_event_data(event);
3340 	entry->ip = ip;
3341 
3342 	memcpy(&entry->buf, tbuffer, len + 1);
3343 	if (!call_filter_check_discard(call, entry, buffer, event)) {
3344 		__buffer_unlock_commit(buffer, event);
3345 		ftrace_trace_stack(&global_trace, buffer, trace_ctx, 6, NULL);
3346 	}
3347 
3348 out:
3349 	ring_buffer_nest_end(buffer);
3350 	put_trace_buf();
3351 
3352 out_nobuffer:
3353 	preempt_enable_notrace();
3354 	unpause_graph_tracing();
3355 
3356 	return len;
3357 }
3358 
3359 __printf(3, 0)
3360 int trace_array_vprintk(struct trace_array *tr,
3361 			unsigned long ip, const char *fmt, va_list args)
3362 {
3363 	if (tracing_selftest_running && tr == &global_trace)
3364 		return 0;
3365 
3366 	return __trace_array_vprintk(tr->array_buffer.buffer, ip, fmt, args);
3367 }
3368 
3369 /**
3370  * trace_array_printk - Print a message to a specific instance
3371  * @tr: The instance trace_array descriptor
3372  * @ip: The instruction pointer that this is called from.
3373  * @fmt: The format to print (printf format)
3374  *
3375  * If a subsystem sets up its own instance, they have the right to
3376  * printk strings into their tracing instance buffer using this
3377  * function. Note, this function will not write into the top level
3378  * buffer (use trace_printk() for that), as writing into the top level
3379  * buffer should only have events that can be individually disabled.
3380  * trace_printk() is only used for debugging a kernel, and should not
3381  * be ever incorporated in normal use.
3382  *
3383  * trace_array_printk() can be used, as it will not add noise to the
3384  * top level tracing buffer.
3385  *
3386  * Note, trace_array_init_printk() must be called on @tr before this
3387  * can be used.
3388  */
3389 __printf(3, 0)
3390 int trace_array_printk(struct trace_array *tr,
3391 		       unsigned long ip, const char *fmt, ...)
3392 {
3393 	int ret;
3394 	va_list ap;
3395 
3396 	if (!tr)
3397 		return -ENOENT;
3398 
3399 	/* This is only allowed for created instances */
3400 	if (tr == &global_trace)
3401 		return 0;
3402 
3403 	if (!(tr->trace_flags & TRACE_ITER_PRINTK))
3404 		return 0;
3405 
3406 	va_start(ap, fmt);
3407 	ret = trace_array_vprintk(tr, ip, fmt, ap);
3408 	va_end(ap);
3409 	return ret;
3410 }
3411 EXPORT_SYMBOL_GPL(trace_array_printk);
3412 
3413 /**
3414  * trace_array_init_printk - Initialize buffers for trace_array_printk()
3415  * @tr: The trace array to initialize the buffers for
3416  *
3417  * As trace_array_printk() only writes into instances, they are OK to
3418  * have in the kernel (unlike trace_printk()). This needs to be called
3419  * before trace_array_printk() can be used on a trace_array.
3420  */
3421 int trace_array_init_printk(struct trace_array *tr)
3422 {
3423 	if (!tr)
3424 		return -ENOENT;
3425 
3426 	/* This is only allowed for created instances */
3427 	if (tr == &global_trace)
3428 		return -EINVAL;
3429 
3430 	return alloc_percpu_trace_buffer();
3431 }
3432 EXPORT_SYMBOL_GPL(trace_array_init_printk);
3433 
3434 __printf(3, 4)
3435 int trace_array_printk_buf(struct trace_buffer *buffer,
3436 			   unsigned long ip, const char *fmt, ...)
3437 {
3438 	int ret;
3439 	va_list ap;
3440 
3441 	if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
3442 		return 0;
3443 
3444 	va_start(ap, fmt);
3445 	ret = __trace_array_vprintk(buffer, ip, fmt, ap);
3446 	va_end(ap);
3447 	return ret;
3448 }
3449 
3450 __printf(2, 0)
3451 int trace_vprintk(unsigned long ip, const char *fmt, va_list args)
3452 {
3453 	return trace_array_vprintk(&global_trace, ip, fmt, args);
3454 }
3455 EXPORT_SYMBOL_GPL(trace_vprintk);
3456 
3457 static void trace_iterator_increment(struct trace_iterator *iter)
3458 {
3459 	struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, iter->cpu);
3460 
3461 	iter->idx++;
3462 	if (buf_iter)
3463 		ring_buffer_iter_advance(buf_iter);
3464 }
3465 
3466 static struct trace_entry *
3467 peek_next_entry(struct trace_iterator *iter, int cpu, u64 *ts,
3468 		unsigned long *lost_events)
3469 {
3470 	struct ring_buffer_event *event;
3471 	struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, cpu);
3472 
3473 	if (buf_iter) {
3474 		event = ring_buffer_iter_peek(buf_iter, ts);
3475 		if (lost_events)
3476 			*lost_events = ring_buffer_iter_dropped(buf_iter) ?
3477 				(unsigned long)-1 : 0;
3478 	} else {
3479 		event = ring_buffer_peek(iter->array_buffer->buffer, cpu, ts,
3480 					 lost_events);
3481 	}
3482 
3483 	if (event) {
3484 		iter->ent_size = ring_buffer_event_length(event);
3485 		return ring_buffer_event_data(event);
3486 	}
3487 	iter->ent_size = 0;
3488 	return NULL;
3489 }
3490 
3491 static struct trace_entry *
3492 __find_next_entry(struct trace_iterator *iter, int *ent_cpu,
3493 		  unsigned long *missing_events, u64 *ent_ts)
3494 {
3495 	struct trace_buffer *buffer = iter->array_buffer->buffer;
3496 	struct trace_entry *ent, *next = NULL;
3497 	unsigned long lost_events = 0, next_lost = 0;
3498 	int cpu_file = iter->cpu_file;
3499 	u64 next_ts = 0, ts;
3500 	int next_cpu = -1;
3501 	int next_size = 0;
3502 	int cpu;
3503 
3504 	/*
3505 	 * If we are in a per_cpu trace file, don't bother by iterating over
3506 	 * all cpu and peek directly.
3507 	 */
3508 	if (cpu_file > RING_BUFFER_ALL_CPUS) {
3509 		if (ring_buffer_empty_cpu(buffer, cpu_file))
3510 			return NULL;
3511 		ent = peek_next_entry(iter, cpu_file, ent_ts, missing_events);
3512 		if (ent_cpu)
3513 			*ent_cpu = cpu_file;
3514 
3515 		return ent;
3516 	}
3517 
3518 	for_each_tracing_cpu(cpu) {
3519 
3520 		if (ring_buffer_empty_cpu(buffer, cpu))
3521 			continue;
3522 
3523 		ent = peek_next_entry(iter, cpu, &ts, &lost_events);
3524 
3525 		/*
3526 		 * Pick the entry with the smallest timestamp:
3527 		 */
3528 		if (ent && (!next || ts < next_ts)) {
3529 			next = ent;
3530 			next_cpu = cpu;
3531 			next_ts = ts;
3532 			next_lost = lost_events;
3533 			next_size = iter->ent_size;
3534 		}
3535 	}
3536 
3537 	iter->ent_size = next_size;
3538 
3539 	if (ent_cpu)
3540 		*ent_cpu = next_cpu;
3541 
3542 	if (ent_ts)
3543 		*ent_ts = next_ts;
3544 
3545 	if (missing_events)
3546 		*missing_events = next_lost;
3547 
3548 	return next;
3549 }
3550 
3551 #define STATIC_FMT_BUF_SIZE	128
3552 static char static_fmt_buf[STATIC_FMT_BUF_SIZE];
3553 
3554 char *trace_iter_expand_format(struct trace_iterator *iter)
3555 {
3556 	char *tmp;
3557 
3558 	/*
3559 	 * iter->tr is NULL when used with tp_printk, which makes
3560 	 * this get called where it is not safe to call krealloc().
3561 	 */
3562 	if (!iter->tr || iter->fmt == static_fmt_buf)
3563 		return NULL;
3564 
3565 	tmp = krealloc(iter->fmt, iter->fmt_size + STATIC_FMT_BUF_SIZE,
3566 		       GFP_KERNEL);
3567 	if (tmp) {
3568 		iter->fmt_size += STATIC_FMT_BUF_SIZE;
3569 		iter->fmt = tmp;
3570 	}
3571 
3572 	return tmp;
3573 }
3574 
3575 /* Returns true if the string is safe to dereference from an event */
3576 static bool trace_safe_str(struct trace_iterator *iter, const char *str,
3577 			   bool star, int len)
3578 {
3579 	unsigned long addr = (unsigned long)str;
3580 	struct trace_event *trace_event;
3581 	struct trace_event_call *event;
3582 
3583 	/* Ignore strings with no length */
3584 	if (star && !len)
3585 		return true;
3586 
3587 	/* OK if part of the event data */
3588 	if ((addr >= (unsigned long)iter->ent) &&
3589 	    (addr < (unsigned long)iter->ent + iter->ent_size))
3590 		return true;
3591 
3592 	/* OK if part of the temp seq buffer */
3593 	if ((addr >= (unsigned long)iter->tmp_seq.buffer) &&
3594 	    (addr < (unsigned long)iter->tmp_seq.buffer + TRACE_SEQ_BUFFER_SIZE))
3595 		return true;
3596 
3597 	/* Core rodata can not be freed */
3598 	if (is_kernel_rodata(addr))
3599 		return true;
3600 
3601 	if (trace_is_tracepoint_string(str))
3602 		return true;
3603 
3604 	/*
3605 	 * Now this could be a module event, referencing core module
3606 	 * data, which is OK.
3607 	 */
3608 	if (!iter->ent)
3609 		return false;
3610 
3611 	trace_event = ftrace_find_event(iter->ent->type);
3612 	if (!trace_event)
3613 		return false;
3614 
3615 	event = container_of(trace_event, struct trace_event_call, event);
3616 	if ((event->flags & TRACE_EVENT_FL_DYNAMIC) || !event->module)
3617 		return false;
3618 
3619 	/* Would rather have rodata, but this will suffice */
3620 	if (within_module_core(addr, event->module))
3621 		return true;
3622 
3623 	return false;
3624 }
3625 
3626 static DEFINE_STATIC_KEY_FALSE(trace_no_verify);
3627 
3628 static int test_can_verify_check(const char *fmt, ...)
3629 {
3630 	char buf[16];
3631 	va_list ap;
3632 	int ret;
3633 
3634 	/*
3635 	 * The verifier is dependent on vsnprintf() modifies the va_list
3636 	 * passed to it, where it is sent as a reference. Some architectures
3637 	 * (like x86_32) passes it by value, which means that vsnprintf()
3638 	 * does not modify the va_list passed to it, and the verifier
3639 	 * would then need to be able to understand all the values that
3640 	 * vsnprintf can use. If it is passed by value, then the verifier
3641 	 * is disabled.
3642 	 */
3643 	va_start(ap, fmt);
3644 	vsnprintf(buf, 16, "%d", ap);
3645 	ret = va_arg(ap, int);
3646 	va_end(ap);
3647 
3648 	return ret;
3649 }
3650 
3651 static void test_can_verify(void)
3652 {
3653 	if (!test_can_verify_check("%d %d", 0, 1)) {
3654 		pr_info("trace event string verifier disabled\n");
3655 		static_branch_inc(&trace_no_verify);
3656 	}
3657 }
3658 
3659 /**
3660  * trace_check_vprintf - Check dereferenced strings while writing to the seq buffer
3661  * @iter: The iterator that holds the seq buffer and the event being printed
3662  * @fmt: The format used to print the event
3663  * @ap: The va_list holding the data to print from @fmt.
3664  *
3665  * This writes the data into the @iter->seq buffer using the data from
3666  * @fmt and @ap. If the format has a %s, then the source of the string
3667  * is examined to make sure it is safe to print, otherwise it will
3668  * warn and print "[UNSAFE MEMORY]" in place of the dereferenced string
3669  * pointer.
3670  */
3671 void trace_check_vprintf(struct trace_iterator *iter, const char *fmt,
3672 			 va_list ap)
3673 {
3674 	long text_delta = iter->tr->text_delta;
3675 	long data_delta = iter->tr->data_delta;
3676 	const char *p = fmt;
3677 	const char *str;
3678 	bool good;
3679 	int i, j;
3680 
3681 	if (WARN_ON_ONCE(!fmt))
3682 		return;
3683 
3684 	if (static_branch_unlikely(&trace_no_verify))
3685 		goto print;
3686 
3687 	/* Don't bother checking when doing a ftrace_dump() */
3688 	if (iter->fmt == static_fmt_buf)
3689 		goto print;
3690 
3691 	while (*p) {
3692 		bool star = false;
3693 		int len = 0;
3694 
3695 		j = 0;
3696 
3697 		/*
3698 		 * We only care about %s and variants
3699 		 * as well as %p[sS] if delta is non-zero
3700 		 */
3701 		for (i = 0; p[i]; i++) {
3702 			if (i + 1 >= iter->fmt_size) {
3703 				/*
3704 				 * If we can't expand the copy buffer,
3705 				 * just print it.
3706 				 */
3707 				if (!trace_iter_expand_format(iter))
3708 					goto print;
3709 			}
3710 
3711 			if (p[i] == '\\' && p[i+1]) {
3712 				i++;
3713 				continue;
3714 			}
3715 			if (p[i] == '%') {
3716 				/* Need to test cases like %08.*s */
3717 				for (j = 1; p[i+j]; j++) {
3718 					if (isdigit(p[i+j]) ||
3719 					    p[i+j] == '.')
3720 						continue;
3721 					if (p[i+j] == '*') {
3722 						star = true;
3723 						continue;
3724 					}
3725 					break;
3726 				}
3727 				if (p[i+j] == 's')
3728 					break;
3729 
3730 				if (text_delta && p[i+1] == 'p' &&
3731 				    ((p[i+2] == 's' || p[i+2] == 'S')))
3732 					break;
3733 
3734 				star = false;
3735 			}
3736 			j = 0;
3737 		}
3738 		/* If no %s found then just print normally */
3739 		if (!p[i])
3740 			break;
3741 
3742 		/* Copy up to the %s, and print that */
3743 		strncpy(iter->fmt, p, i);
3744 		iter->fmt[i] = '\0';
3745 		trace_seq_vprintf(&iter->seq, iter->fmt, ap);
3746 
3747 		/* Add delta to %pS pointers */
3748 		if (p[i+1] == 'p') {
3749 			unsigned long addr;
3750 			char fmt[4];
3751 
3752 			fmt[0] = '%';
3753 			fmt[1] = 'p';
3754 			fmt[2] = p[i+2]; /* Either %ps or %pS */
3755 			fmt[3] = '\0';
3756 
3757 			addr = va_arg(ap, unsigned long);
3758 			addr += text_delta;
3759 			trace_seq_printf(&iter->seq, fmt, (void *)addr);
3760 
3761 			p += i + 3;
3762 			continue;
3763 		}
3764 
3765 		/*
3766 		 * If iter->seq is full, the above call no longer guarantees
3767 		 * that ap is in sync with fmt processing, and further calls
3768 		 * to va_arg() can return wrong positional arguments.
3769 		 *
3770 		 * Ensure that ap is no longer used in this case.
3771 		 */
3772 		if (iter->seq.full) {
3773 			p = "";
3774 			break;
3775 		}
3776 
3777 		if (star)
3778 			len = va_arg(ap, int);
3779 
3780 		/* The ap now points to the string data of the %s */
3781 		str = va_arg(ap, const char *);
3782 
3783 		good = trace_safe_str(iter, str, star, len);
3784 
3785 		/* Could be from the last boot */
3786 		if (data_delta && !good) {
3787 			str += data_delta;
3788 			good = trace_safe_str(iter, str, star, len);
3789 		}
3790 
3791 		/*
3792 		 * If you hit this warning, it is likely that the
3793 		 * trace event in question used %s on a string that
3794 		 * was saved at the time of the event, but may not be
3795 		 * around when the trace is read. Use __string(),
3796 		 * __assign_str() and __get_str() helpers in the TRACE_EVENT()
3797 		 * instead. See samples/trace_events/trace-events-sample.h
3798 		 * for reference.
3799 		 */
3800 		if (WARN_ONCE(!good, "fmt: '%s' current_buffer: '%s'",
3801 			      fmt, seq_buf_str(&iter->seq.seq))) {
3802 			int ret;
3803 
3804 			/* Try to safely read the string */
3805 			if (star) {
3806 				if (len + 1 > iter->fmt_size)
3807 					len = iter->fmt_size - 1;
3808 				if (len < 0)
3809 					len = 0;
3810 				ret = copy_from_kernel_nofault(iter->fmt, str, len);
3811 				iter->fmt[len] = 0;
3812 				star = false;
3813 			} else {
3814 				ret = strncpy_from_kernel_nofault(iter->fmt, str,
3815 								  iter->fmt_size);
3816 			}
3817 			if (ret < 0)
3818 				trace_seq_printf(&iter->seq, "(0x%px)", str);
3819 			else
3820 				trace_seq_printf(&iter->seq, "(0x%px:%s)",
3821 						 str, iter->fmt);
3822 			str = "[UNSAFE-MEMORY]";
3823 			strcpy(iter->fmt, "%s");
3824 		} else {
3825 			strncpy(iter->fmt, p + i, j + 1);
3826 			iter->fmt[j+1] = '\0';
3827 		}
3828 		if (star)
3829 			trace_seq_printf(&iter->seq, iter->fmt, len, str);
3830 		else
3831 			trace_seq_printf(&iter->seq, iter->fmt, str);
3832 
3833 		p += i + j + 1;
3834 	}
3835  print:
3836 	if (*p)
3837 		trace_seq_vprintf(&iter->seq, p, ap);
3838 }
3839 
3840 const char *trace_event_format(struct trace_iterator *iter, const char *fmt)
3841 {
3842 	const char *p, *new_fmt;
3843 	char *q;
3844 
3845 	if (WARN_ON_ONCE(!fmt))
3846 		return fmt;
3847 
3848 	if (!iter->tr || iter->tr->trace_flags & TRACE_ITER_HASH_PTR)
3849 		return fmt;
3850 
3851 	p = fmt;
3852 	new_fmt = q = iter->fmt;
3853 	while (*p) {
3854 		if (unlikely(q - new_fmt + 3 > iter->fmt_size)) {
3855 			if (!trace_iter_expand_format(iter))
3856 				return fmt;
3857 
3858 			q += iter->fmt - new_fmt;
3859 			new_fmt = iter->fmt;
3860 		}
3861 
3862 		*q++ = *p++;
3863 
3864 		/* Replace %p with %px */
3865 		if (p[-1] == '%') {
3866 			if (p[0] == '%') {
3867 				*q++ = *p++;
3868 			} else if (p[0] == 'p' && !isalnum(p[1])) {
3869 				*q++ = *p++;
3870 				*q++ = 'x';
3871 			}
3872 		}
3873 	}
3874 	*q = '\0';
3875 
3876 	return new_fmt;
3877 }
3878 
3879 #define STATIC_TEMP_BUF_SIZE	128
3880 static char static_temp_buf[STATIC_TEMP_BUF_SIZE] __aligned(4);
3881 
3882 /* Find the next real entry, without updating the iterator itself */
3883 struct trace_entry *trace_find_next_entry(struct trace_iterator *iter,
3884 					  int *ent_cpu, u64 *ent_ts)
3885 {
3886 	/* __find_next_entry will reset ent_size */
3887 	int ent_size = iter->ent_size;
3888 	struct trace_entry *entry;
3889 
3890 	/*
3891 	 * If called from ftrace_dump(), then the iter->temp buffer
3892 	 * will be the static_temp_buf and not created from kmalloc.
3893 	 * If the entry size is greater than the buffer, we can
3894 	 * not save it. Just return NULL in that case. This is only
3895 	 * used to add markers when two consecutive events' time
3896 	 * stamps have a large delta. See trace_print_lat_context()
3897 	 */
3898 	if (iter->temp == static_temp_buf &&
3899 	    STATIC_TEMP_BUF_SIZE < ent_size)
3900 		return NULL;
3901 
3902 	/*
3903 	 * The __find_next_entry() may call peek_next_entry(), which may
3904 	 * call ring_buffer_peek() that may make the contents of iter->ent
3905 	 * undefined. Need to copy iter->ent now.
3906 	 */
3907 	if (iter->ent && iter->ent != iter->temp) {
3908 		if ((!iter->temp || iter->temp_size < iter->ent_size) &&
3909 		    !WARN_ON_ONCE(iter->temp == static_temp_buf)) {
3910 			void *temp;
3911 			temp = kmalloc(iter->ent_size, GFP_KERNEL);
3912 			if (!temp)
3913 				return NULL;
3914 			kfree(iter->temp);
3915 			iter->temp = temp;
3916 			iter->temp_size = iter->ent_size;
3917 		}
3918 		memcpy(iter->temp, iter->ent, iter->ent_size);
3919 		iter->ent = iter->temp;
3920 	}
3921 	entry = __find_next_entry(iter, ent_cpu, NULL, ent_ts);
3922 	/* Put back the original ent_size */
3923 	iter->ent_size = ent_size;
3924 
3925 	return entry;
3926 }
3927 
3928 /* Find the next real entry, and increment the iterator to the next entry */
3929 void *trace_find_next_entry_inc(struct trace_iterator *iter)
3930 {
3931 	iter->ent = __find_next_entry(iter, &iter->cpu,
3932 				      &iter->lost_events, &iter->ts);
3933 
3934 	if (iter->ent)
3935 		trace_iterator_increment(iter);
3936 
3937 	return iter->ent ? iter : NULL;
3938 }
3939 
3940 static void trace_consume(struct trace_iterator *iter)
3941 {
3942 	ring_buffer_consume(iter->array_buffer->buffer, iter->cpu, &iter->ts,
3943 			    &iter->lost_events);
3944 }
3945 
3946 static void *s_next(struct seq_file *m, void *v, loff_t *pos)
3947 {
3948 	struct trace_iterator *iter = m->private;
3949 	int i = (int)*pos;
3950 	void *ent;
3951 
3952 	WARN_ON_ONCE(iter->leftover);
3953 
3954 	(*pos)++;
3955 
3956 	/* can't go backwards */
3957 	if (iter->idx > i)
3958 		return NULL;
3959 
3960 	if (iter->idx < 0)
3961 		ent = trace_find_next_entry_inc(iter);
3962 	else
3963 		ent = iter;
3964 
3965 	while (ent && iter->idx < i)
3966 		ent = trace_find_next_entry_inc(iter);
3967 
3968 	iter->pos = *pos;
3969 
3970 	return ent;
3971 }
3972 
3973 void tracing_iter_reset(struct trace_iterator *iter, int cpu)
3974 {
3975 	struct ring_buffer_iter *buf_iter;
3976 	unsigned long entries = 0;
3977 	u64 ts;
3978 
3979 	per_cpu_ptr(iter->array_buffer->data, cpu)->skipped_entries = 0;
3980 
3981 	buf_iter = trace_buffer_iter(iter, cpu);
3982 	if (!buf_iter)
3983 		return;
3984 
3985 	ring_buffer_iter_reset(buf_iter);
3986 
3987 	/*
3988 	 * We could have the case with the max latency tracers
3989 	 * that a reset never took place on a cpu. This is evident
3990 	 * by the timestamp being before the start of the buffer.
3991 	 */
3992 	while (ring_buffer_iter_peek(buf_iter, &ts)) {
3993 		if (ts >= iter->array_buffer->time_start)
3994 			break;
3995 		entries++;
3996 		ring_buffer_iter_advance(buf_iter);
3997 	}
3998 
3999 	per_cpu_ptr(iter->array_buffer->data, cpu)->skipped_entries = entries;
4000 }
4001 
4002 /*
4003  * The current tracer is copied to avoid a global locking
4004  * all around.
4005  */
4006 static void *s_start(struct seq_file *m, loff_t *pos)
4007 {
4008 	struct trace_iterator *iter = m->private;
4009 	struct trace_array *tr = iter->tr;
4010 	int cpu_file = iter->cpu_file;
4011 	void *p = NULL;
4012 	loff_t l = 0;
4013 	int cpu;
4014 
4015 	mutex_lock(&trace_types_lock);
4016 	if (unlikely(tr->current_trace != iter->trace)) {
4017 		/* Close iter->trace before switching to the new current tracer */
4018 		if (iter->trace->close)
4019 			iter->trace->close(iter);
4020 		iter->trace = tr->current_trace;
4021 		/* Reopen the new current tracer */
4022 		if (iter->trace->open)
4023 			iter->trace->open(iter);
4024 	}
4025 	mutex_unlock(&trace_types_lock);
4026 
4027 #ifdef CONFIG_TRACER_MAX_TRACE
4028 	if (iter->snapshot && iter->trace->use_max_tr)
4029 		return ERR_PTR(-EBUSY);
4030 #endif
4031 
4032 	if (*pos != iter->pos) {
4033 		iter->ent = NULL;
4034 		iter->cpu = 0;
4035 		iter->idx = -1;
4036 
4037 		if (cpu_file == RING_BUFFER_ALL_CPUS) {
4038 			for_each_tracing_cpu(cpu)
4039 				tracing_iter_reset(iter, cpu);
4040 		} else
4041 			tracing_iter_reset(iter, cpu_file);
4042 
4043 		iter->leftover = 0;
4044 		for (p = iter; p && l < *pos; p = s_next(m, p, &l))
4045 			;
4046 
4047 	} else {
4048 		/*
4049 		 * If we overflowed the seq_file before, then we want
4050 		 * to just reuse the trace_seq buffer again.
4051 		 */
4052 		if (iter->leftover)
4053 			p = iter;
4054 		else {
4055 			l = *pos - 1;
4056 			p = s_next(m, p, &l);
4057 		}
4058 	}
4059 
4060 	trace_event_read_lock();
4061 	trace_access_lock(cpu_file);
4062 	return p;
4063 }
4064 
4065 static void s_stop(struct seq_file *m, void *p)
4066 {
4067 	struct trace_iterator *iter = m->private;
4068 
4069 #ifdef CONFIG_TRACER_MAX_TRACE
4070 	if (iter->snapshot && iter->trace->use_max_tr)
4071 		return;
4072 #endif
4073 
4074 	trace_access_unlock(iter->cpu_file);
4075 	trace_event_read_unlock();
4076 }
4077 
4078 static void
4079 get_total_entries_cpu(struct array_buffer *buf, unsigned long *total,
4080 		      unsigned long *entries, int cpu)
4081 {
4082 	unsigned long count;
4083 
4084 	count = ring_buffer_entries_cpu(buf->buffer, cpu);
4085 	/*
4086 	 * If this buffer has skipped entries, then we hold all
4087 	 * entries for the trace and we need to ignore the
4088 	 * ones before the time stamp.
4089 	 */
4090 	if (per_cpu_ptr(buf->data, cpu)->skipped_entries) {
4091 		count -= per_cpu_ptr(buf->data, cpu)->skipped_entries;
4092 		/* total is the same as the entries */
4093 		*total = count;
4094 	} else
4095 		*total = count +
4096 			ring_buffer_overrun_cpu(buf->buffer, cpu);
4097 	*entries = count;
4098 }
4099 
4100 static void
4101 get_total_entries(struct array_buffer *buf,
4102 		  unsigned long *total, unsigned long *entries)
4103 {
4104 	unsigned long t, e;
4105 	int cpu;
4106 
4107 	*total = 0;
4108 	*entries = 0;
4109 
4110 	for_each_tracing_cpu(cpu) {
4111 		get_total_entries_cpu(buf, &t, &e, cpu);
4112 		*total += t;
4113 		*entries += e;
4114 	}
4115 }
4116 
4117 unsigned long trace_total_entries_cpu(struct trace_array *tr, int cpu)
4118 {
4119 	unsigned long total, entries;
4120 
4121 	if (!tr)
4122 		tr = &global_trace;
4123 
4124 	get_total_entries_cpu(&tr->array_buffer, &total, &entries, cpu);
4125 
4126 	return entries;
4127 }
4128 
4129 unsigned long trace_total_entries(struct trace_array *tr)
4130 {
4131 	unsigned long total, entries;
4132 
4133 	if (!tr)
4134 		tr = &global_trace;
4135 
4136 	get_total_entries(&tr->array_buffer, &total, &entries);
4137 
4138 	return entries;
4139 }
4140 
4141 static void print_lat_help_header(struct seq_file *m)
4142 {
4143 	seq_puts(m, "#                    _------=> CPU#            \n"
4144 		    "#                   / _-----=> irqs-off/BH-disabled\n"
4145 		    "#                  | / _----=> need-resched    \n"
4146 		    "#                  || / _---=> hardirq/softirq \n"
4147 		    "#                  ||| / _--=> preempt-depth   \n"
4148 		    "#                  |||| / _-=> migrate-disable \n"
4149 		    "#                  ||||| /     delay           \n"
4150 		    "#  cmd     pid     |||||| time  |   caller     \n"
4151 		    "#     \\   /        ||||||  \\    |    /       \n");
4152 }
4153 
4154 static void print_event_info(struct array_buffer *buf, struct seq_file *m)
4155 {
4156 	unsigned long total;
4157 	unsigned long entries;
4158 
4159 	get_total_entries(buf, &total, &entries);
4160 	seq_printf(m, "# entries-in-buffer/entries-written: %lu/%lu   #P:%d\n",
4161 		   entries, total, num_online_cpus());
4162 	seq_puts(m, "#\n");
4163 }
4164 
4165 static void print_func_help_header(struct array_buffer *buf, struct seq_file *m,
4166 				   unsigned int flags)
4167 {
4168 	bool tgid = flags & TRACE_ITER_RECORD_TGID;
4169 
4170 	print_event_info(buf, m);
4171 
4172 	seq_printf(m, "#           TASK-PID    %s CPU#     TIMESTAMP  FUNCTION\n", tgid ? "   TGID   " : "");
4173 	seq_printf(m, "#              | |      %s   |         |         |\n",      tgid ? "     |    " : "");
4174 }
4175 
4176 static void print_func_help_header_irq(struct array_buffer *buf, struct seq_file *m,
4177 				       unsigned int flags)
4178 {
4179 	bool tgid = flags & TRACE_ITER_RECORD_TGID;
4180 	static const char space[] = "            ";
4181 	int prec = tgid ? 12 : 2;
4182 
4183 	print_event_info(buf, m);
4184 
4185 	seq_printf(m, "#                            %.*s  _-----=> irqs-off/BH-disabled\n", prec, space);
4186 	seq_printf(m, "#                            %.*s / _----=> need-resched\n", prec, space);
4187 	seq_printf(m, "#                            %.*s| / _---=> hardirq/softirq\n", prec, space);
4188 	seq_printf(m, "#                            %.*s|| / _--=> preempt-depth\n", prec, space);
4189 	seq_printf(m, "#                            %.*s||| / _-=> migrate-disable\n", prec, space);
4190 	seq_printf(m, "#                            %.*s|||| /     delay\n", prec, space);
4191 	seq_printf(m, "#           TASK-PID  %.*s CPU#  |||||  TIMESTAMP  FUNCTION\n", prec, "     TGID   ");
4192 	seq_printf(m, "#              | |    %.*s   |   |||||     |         |\n", prec, "       |    ");
4193 }
4194 
4195 void
4196 print_trace_header(struct seq_file *m, struct trace_iterator *iter)
4197 {
4198 	unsigned long sym_flags = (global_trace.trace_flags & TRACE_ITER_SYM_MASK);
4199 	struct array_buffer *buf = iter->array_buffer;
4200 	struct trace_array_cpu *data = per_cpu_ptr(buf->data, buf->cpu);
4201 	struct tracer *type = iter->trace;
4202 	unsigned long entries;
4203 	unsigned long total;
4204 	const char *name = type->name;
4205 
4206 	get_total_entries(buf, &total, &entries);
4207 
4208 	seq_printf(m, "# %s latency trace v1.1.5 on %s\n",
4209 		   name, init_utsname()->release);
4210 	seq_puts(m, "# -----------------------------------"
4211 		 "---------------------------------\n");
4212 	seq_printf(m, "# latency: %lu us, #%lu/%lu, CPU#%d |"
4213 		   " (M:%s VP:%d, KP:%d, SP:%d HP:%d",
4214 		   nsecs_to_usecs(data->saved_latency),
4215 		   entries,
4216 		   total,
4217 		   buf->cpu,
4218 		   preempt_model_none()      ? "server" :
4219 		   preempt_model_voluntary() ? "desktop" :
4220 		   preempt_model_full()      ? "preempt" :
4221 		   preempt_model_rt()        ? "preempt_rt" :
4222 		   "unknown",
4223 		   /* These are reserved for later use */
4224 		   0, 0, 0, 0);
4225 #ifdef CONFIG_SMP
4226 	seq_printf(m, " #P:%d)\n", num_online_cpus());
4227 #else
4228 	seq_puts(m, ")\n");
4229 #endif
4230 	seq_puts(m, "#    -----------------\n");
4231 	seq_printf(m, "#    | task: %.16s-%d "
4232 		   "(uid:%d nice:%ld policy:%ld rt_prio:%ld)\n",
4233 		   data->comm, data->pid,
4234 		   from_kuid_munged(seq_user_ns(m), data->uid), data->nice,
4235 		   data->policy, data->rt_priority);
4236 	seq_puts(m, "#    -----------------\n");
4237 
4238 	if (data->critical_start) {
4239 		seq_puts(m, "#  => started at: ");
4240 		seq_print_ip_sym(&iter->seq, data->critical_start, sym_flags);
4241 		trace_print_seq(m, &iter->seq);
4242 		seq_puts(m, "\n#  => ended at:   ");
4243 		seq_print_ip_sym(&iter->seq, data->critical_end, sym_flags);
4244 		trace_print_seq(m, &iter->seq);
4245 		seq_puts(m, "\n#\n");
4246 	}
4247 
4248 	seq_puts(m, "#\n");
4249 }
4250 
4251 static void test_cpu_buff_start(struct trace_iterator *iter)
4252 {
4253 	struct trace_seq *s = &iter->seq;
4254 	struct trace_array *tr = iter->tr;
4255 
4256 	if (!(tr->trace_flags & TRACE_ITER_ANNOTATE))
4257 		return;
4258 
4259 	if (!(iter->iter_flags & TRACE_FILE_ANNOTATE))
4260 		return;
4261 
4262 	if (cpumask_available(iter->started) &&
4263 	    cpumask_test_cpu(iter->cpu, iter->started))
4264 		return;
4265 
4266 	if (per_cpu_ptr(iter->array_buffer->data, iter->cpu)->skipped_entries)
4267 		return;
4268 
4269 	if (cpumask_available(iter->started))
4270 		cpumask_set_cpu(iter->cpu, iter->started);
4271 
4272 	/* Don't print started cpu buffer for the first entry of the trace */
4273 	if (iter->idx > 1)
4274 		trace_seq_printf(s, "##### CPU %u buffer started ####\n",
4275 				iter->cpu);
4276 }
4277 
4278 static enum print_line_t print_trace_fmt(struct trace_iterator *iter)
4279 {
4280 	struct trace_array *tr = iter->tr;
4281 	struct trace_seq *s = &iter->seq;
4282 	unsigned long sym_flags = (tr->trace_flags & TRACE_ITER_SYM_MASK);
4283 	struct trace_entry *entry;
4284 	struct trace_event *event;
4285 
4286 	entry = iter->ent;
4287 
4288 	test_cpu_buff_start(iter);
4289 
4290 	event = ftrace_find_event(entry->type);
4291 
4292 	if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
4293 		if (iter->iter_flags & TRACE_FILE_LAT_FMT)
4294 			trace_print_lat_context(iter);
4295 		else
4296 			trace_print_context(iter);
4297 	}
4298 
4299 	if (trace_seq_has_overflowed(s))
4300 		return TRACE_TYPE_PARTIAL_LINE;
4301 
4302 	if (event) {
4303 		if (tr->trace_flags & TRACE_ITER_FIELDS)
4304 			return print_event_fields(iter, event);
4305 		return event->funcs->trace(iter, sym_flags, event);
4306 	}
4307 
4308 	trace_seq_printf(s, "Unknown type %d\n", entry->type);
4309 
4310 	return trace_handle_return(s);
4311 }
4312 
4313 static enum print_line_t print_raw_fmt(struct trace_iterator *iter)
4314 {
4315 	struct trace_array *tr = iter->tr;
4316 	struct trace_seq *s = &iter->seq;
4317 	struct trace_entry *entry;
4318 	struct trace_event *event;
4319 
4320 	entry = iter->ent;
4321 
4322 	if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO)
4323 		trace_seq_printf(s, "%d %d %llu ",
4324 				 entry->pid, iter->cpu, iter->ts);
4325 
4326 	if (trace_seq_has_overflowed(s))
4327 		return TRACE_TYPE_PARTIAL_LINE;
4328 
4329 	event = ftrace_find_event(entry->type);
4330 	if (event)
4331 		return event->funcs->raw(iter, 0, event);
4332 
4333 	trace_seq_printf(s, "%d ?\n", entry->type);
4334 
4335 	return trace_handle_return(s);
4336 }
4337 
4338 static enum print_line_t print_hex_fmt(struct trace_iterator *iter)
4339 {
4340 	struct trace_array *tr = iter->tr;
4341 	struct trace_seq *s = &iter->seq;
4342 	unsigned char newline = '\n';
4343 	struct trace_entry *entry;
4344 	struct trace_event *event;
4345 
4346 	entry = iter->ent;
4347 
4348 	if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
4349 		SEQ_PUT_HEX_FIELD(s, entry->pid);
4350 		SEQ_PUT_HEX_FIELD(s, iter->cpu);
4351 		SEQ_PUT_HEX_FIELD(s, iter->ts);
4352 		if (trace_seq_has_overflowed(s))
4353 			return TRACE_TYPE_PARTIAL_LINE;
4354 	}
4355 
4356 	event = ftrace_find_event(entry->type);
4357 	if (event) {
4358 		enum print_line_t ret = event->funcs->hex(iter, 0, event);
4359 		if (ret != TRACE_TYPE_HANDLED)
4360 			return ret;
4361 	}
4362 
4363 	SEQ_PUT_FIELD(s, newline);
4364 
4365 	return trace_handle_return(s);
4366 }
4367 
4368 static enum print_line_t print_bin_fmt(struct trace_iterator *iter)
4369 {
4370 	struct trace_array *tr = iter->tr;
4371 	struct trace_seq *s = &iter->seq;
4372 	struct trace_entry *entry;
4373 	struct trace_event *event;
4374 
4375 	entry = iter->ent;
4376 
4377 	if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
4378 		SEQ_PUT_FIELD(s, entry->pid);
4379 		SEQ_PUT_FIELD(s, iter->cpu);
4380 		SEQ_PUT_FIELD(s, iter->ts);
4381 		if (trace_seq_has_overflowed(s))
4382 			return TRACE_TYPE_PARTIAL_LINE;
4383 	}
4384 
4385 	event = ftrace_find_event(entry->type);
4386 	return event ? event->funcs->binary(iter, 0, event) :
4387 		TRACE_TYPE_HANDLED;
4388 }
4389 
4390 int trace_empty(struct trace_iterator *iter)
4391 {
4392 	struct ring_buffer_iter *buf_iter;
4393 	int cpu;
4394 
4395 	/* If we are looking at one CPU buffer, only check that one */
4396 	if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
4397 		cpu = iter->cpu_file;
4398 		buf_iter = trace_buffer_iter(iter, cpu);
4399 		if (buf_iter) {
4400 			if (!ring_buffer_iter_empty(buf_iter))
4401 				return 0;
4402 		} else {
4403 			if (!ring_buffer_empty_cpu(iter->array_buffer->buffer, cpu))
4404 				return 0;
4405 		}
4406 		return 1;
4407 	}
4408 
4409 	for_each_tracing_cpu(cpu) {
4410 		buf_iter = trace_buffer_iter(iter, cpu);
4411 		if (buf_iter) {
4412 			if (!ring_buffer_iter_empty(buf_iter))
4413 				return 0;
4414 		} else {
4415 			if (!ring_buffer_empty_cpu(iter->array_buffer->buffer, cpu))
4416 				return 0;
4417 		}
4418 	}
4419 
4420 	return 1;
4421 }
4422 
4423 /*  Called with trace_event_read_lock() held. */
4424 enum print_line_t print_trace_line(struct trace_iterator *iter)
4425 {
4426 	struct trace_array *tr = iter->tr;
4427 	unsigned long trace_flags = tr->trace_flags;
4428 	enum print_line_t ret;
4429 
4430 	if (iter->lost_events) {
4431 		if (iter->lost_events == (unsigned long)-1)
4432 			trace_seq_printf(&iter->seq, "CPU:%d [LOST EVENTS]\n",
4433 					 iter->cpu);
4434 		else
4435 			trace_seq_printf(&iter->seq, "CPU:%d [LOST %lu EVENTS]\n",
4436 					 iter->cpu, iter->lost_events);
4437 		if (trace_seq_has_overflowed(&iter->seq))
4438 			return TRACE_TYPE_PARTIAL_LINE;
4439 	}
4440 
4441 	if (iter->trace && iter->trace->print_line) {
4442 		ret = iter->trace->print_line(iter);
4443 		if (ret != TRACE_TYPE_UNHANDLED)
4444 			return ret;
4445 	}
4446 
4447 	if (iter->ent->type == TRACE_BPUTS &&
4448 			trace_flags & TRACE_ITER_PRINTK &&
4449 			trace_flags & TRACE_ITER_PRINTK_MSGONLY)
4450 		return trace_print_bputs_msg_only(iter);
4451 
4452 	if (iter->ent->type == TRACE_BPRINT &&
4453 			trace_flags & TRACE_ITER_PRINTK &&
4454 			trace_flags & TRACE_ITER_PRINTK_MSGONLY)
4455 		return trace_print_bprintk_msg_only(iter);
4456 
4457 	if (iter->ent->type == TRACE_PRINT &&
4458 			trace_flags & TRACE_ITER_PRINTK &&
4459 			trace_flags & TRACE_ITER_PRINTK_MSGONLY)
4460 		return trace_print_printk_msg_only(iter);
4461 
4462 	if (trace_flags & TRACE_ITER_BIN)
4463 		return print_bin_fmt(iter);
4464 
4465 	if (trace_flags & TRACE_ITER_HEX)
4466 		return print_hex_fmt(iter);
4467 
4468 	if (trace_flags & TRACE_ITER_RAW)
4469 		return print_raw_fmt(iter);
4470 
4471 	return print_trace_fmt(iter);
4472 }
4473 
4474 void trace_latency_header(struct seq_file *m)
4475 {
4476 	struct trace_iterator *iter = m->private;
4477 	struct trace_array *tr = iter->tr;
4478 
4479 	/* print nothing if the buffers are empty */
4480 	if (trace_empty(iter))
4481 		return;
4482 
4483 	if (iter->iter_flags & TRACE_FILE_LAT_FMT)
4484 		print_trace_header(m, iter);
4485 
4486 	if (!(tr->trace_flags & TRACE_ITER_VERBOSE))
4487 		print_lat_help_header(m);
4488 }
4489 
4490 void trace_default_header(struct seq_file *m)
4491 {
4492 	struct trace_iterator *iter = m->private;
4493 	struct trace_array *tr = iter->tr;
4494 	unsigned long trace_flags = tr->trace_flags;
4495 
4496 	if (!(trace_flags & TRACE_ITER_CONTEXT_INFO))
4497 		return;
4498 
4499 	if (iter->iter_flags & TRACE_FILE_LAT_FMT) {
4500 		/* print nothing if the buffers are empty */
4501 		if (trace_empty(iter))
4502 			return;
4503 		print_trace_header(m, iter);
4504 		if (!(trace_flags & TRACE_ITER_VERBOSE))
4505 			print_lat_help_header(m);
4506 	} else {
4507 		if (!(trace_flags & TRACE_ITER_VERBOSE)) {
4508 			if (trace_flags & TRACE_ITER_IRQ_INFO)
4509 				print_func_help_header_irq(iter->array_buffer,
4510 							   m, trace_flags);
4511 			else
4512 				print_func_help_header(iter->array_buffer, m,
4513 						       trace_flags);
4514 		}
4515 	}
4516 }
4517 
4518 static void test_ftrace_alive(struct seq_file *m)
4519 {
4520 	if (!ftrace_is_dead())
4521 		return;
4522 	seq_puts(m, "# WARNING: FUNCTION TRACING IS CORRUPTED\n"
4523 		    "#          MAY BE MISSING FUNCTION EVENTS\n");
4524 }
4525 
4526 #ifdef CONFIG_TRACER_MAX_TRACE
4527 static void show_snapshot_main_help(struct seq_file *m)
4528 {
4529 	seq_puts(m, "# echo 0 > snapshot : Clears and frees snapshot buffer\n"
4530 		    "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
4531 		    "#                      Takes a snapshot of the main buffer.\n"
4532 		    "# echo 2 > snapshot : Clears snapshot buffer (but does not allocate or free)\n"
4533 		    "#                      (Doesn't have to be '2' works with any number that\n"
4534 		    "#                       is not a '0' or '1')\n");
4535 }
4536 
4537 static void show_snapshot_percpu_help(struct seq_file *m)
4538 {
4539 	seq_puts(m, "# echo 0 > snapshot : Invalid for per_cpu snapshot file.\n");
4540 #ifdef CONFIG_RING_BUFFER_ALLOW_SWAP
4541 	seq_puts(m, "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
4542 		    "#                      Takes a snapshot of the main buffer for this cpu.\n");
4543 #else
4544 	seq_puts(m, "# echo 1 > snapshot : Not supported with this kernel.\n"
4545 		    "#                     Must use main snapshot file to allocate.\n");
4546 #endif
4547 	seq_puts(m, "# echo 2 > snapshot : Clears this cpu's snapshot buffer (but does not allocate)\n"
4548 		    "#                      (Doesn't have to be '2' works with any number that\n"
4549 		    "#                       is not a '0' or '1')\n");
4550 }
4551 
4552 static void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter)
4553 {
4554 	if (iter->tr->allocated_snapshot)
4555 		seq_puts(m, "#\n# * Snapshot is allocated *\n#\n");
4556 	else
4557 		seq_puts(m, "#\n# * Snapshot is freed *\n#\n");
4558 
4559 	seq_puts(m, "# Snapshot commands:\n");
4560 	if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
4561 		show_snapshot_main_help(m);
4562 	else
4563 		show_snapshot_percpu_help(m);
4564 }
4565 #else
4566 /* Should never be called */
4567 static inline void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter) { }
4568 #endif
4569 
4570 static int s_show(struct seq_file *m, void *v)
4571 {
4572 	struct trace_iterator *iter = v;
4573 	int ret;
4574 
4575 	if (iter->ent == NULL) {
4576 		if (iter->tr) {
4577 			seq_printf(m, "# tracer: %s\n", iter->trace->name);
4578 			seq_puts(m, "#\n");
4579 			test_ftrace_alive(m);
4580 		}
4581 		if (iter->snapshot && trace_empty(iter))
4582 			print_snapshot_help(m, iter);
4583 		else if (iter->trace && iter->trace->print_header)
4584 			iter->trace->print_header(m);
4585 		else
4586 			trace_default_header(m);
4587 
4588 	} else if (iter->leftover) {
4589 		/*
4590 		 * If we filled the seq_file buffer earlier, we
4591 		 * want to just show it now.
4592 		 */
4593 		ret = trace_print_seq(m, &iter->seq);
4594 
4595 		/* ret should this time be zero, but you never know */
4596 		iter->leftover = ret;
4597 
4598 	} else {
4599 		ret = print_trace_line(iter);
4600 		if (ret == TRACE_TYPE_PARTIAL_LINE) {
4601 			iter->seq.full = 0;
4602 			trace_seq_puts(&iter->seq, "[LINE TOO BIG]\n");
4603 		}
4604 		ret = trace_print_seq(m, &iter->seq);
4605 		/*
4606 		 * If we overflow the seq_file buffer, then it will
4607 		 * ask us for this data again at start up.
4608 		 * Use that instead.
4609 		 *  ret is 0 if seq_file write succeeded.
4610 		 *        -1 otherwise.
4611 		 */
4612 		iter->leftover = ret;
4613 	}
4614 
4615 	return 0;
4616 }
4617 
4618 /*
4619  * Should be used after trace_array_get(), trace_types_lock
4620  * ensures that i_cdev was already initialized.
4621  */
4622 static inline int tracing_get_cpu(struct inode *inode)
4623 {
4624 	if (inode->i_cdev) /* See trace_create_cpu_file() */
4625 		return (long)inode->i_cdev - 1;
4626 	return RING_BUFFER_ALL_CPUS;
4627 }
4628 
4629 static const struct seq_operations tracer_seq_ops = {
4630 	.start		= s_start,
4631 	.next		= s_next,
4632 	.stop		= s_stop,
4633 	.show		= s_show,
4634 };
4635 
4636 /*
4637  * Note, as iter itself can be allocated and freed in different
4638  * ways, this function is only used to free its content, and not
4639  * the iterator itself. The only requirement to all the allocations
4640  * is that it must zero all fields (kzalloc), as freeing works with
4641  * ethier allocated content or NULL.
4642  */
4643 static void free_trace_iter_content(struct trace_iterator *iter)
4644 {
4645 	/* The fmt is either NULL, allocated or points to static_fmt_buf */
4646 	if (iter->fmt != static_fmt_buf)
4647 		kfree(iter->fmt);
4648 
4649 	kfree(iter->temp);
4650 	kfree(iter->buffer_iter);
4651 	mutex_destroy(&iter->mutex);
4652 	free_cpumask_var(iter->started);
4653 }
4654 
4655 static struct trace_iterator *
4656 __tracing_open(struct inode *inode, struct file *file, bool snapshot)
4657 {
4658 	struct trace_array *tr = inode->i_private;
4659 	struct trace_iterator *iter;
4660 	int cpu;
4661 
4662 	if (tracing_disabled)
4663 		return ERR_PTR(-ENODEV);
4664 
4665 	iter = __seq_open_private(file, &tracer_seq_ops, sizeof(*iter));
4666 	if (!iter)
4667 		return ERR_PTR(-ENOMEM);
4668 
4669 	iter->buffer_iter = kcalloc(nr_cpu_ids, sizeof(*iter->buffer_iter),
4670 				    GFP_KERNEL);
4671 	if (!iter->buffer_iter)
4672 		goto release;
4673 
4674 	/*
4675 	 * trace_find_next_entry() may need to save off iter->ent.
4676 	 * It will place it into the iter->temp buffer. As most
4677 	 * events are less than 128, allocate a buffer of that size.
4678 	 * If one is greater, then trace_find_next_entry() will
4679 	 * allocate a new buffer to adjust for the bigger iter->ent.
4680 	 * It's not critical if it fails to get allocated here.
4681 	 */
4682 	iter->temp = kmalloc(128, GFP_KERNEL);
4683 	if (iter->temp)
4684 		iter->temp_size = 128;
4685 
4686 	/*
4687 	 * trace_event_printf() may need to modify given format
4688 	 * string to replace %p with %px so that it shows real address
4689 	 * instead of hash value. However, that is only for the event
4690 	 * tracing, other tracer may not need. Defer the allocation
4691 	 * until it is needed.
4692 	 */
4693 	iter->fmt = NULL;
4694 	iter->fmt_size = 0;
4695 
4696 	mutex_lock(&trace_types_lock);
4697 	iter->trace = tr->current_trace;
4698 
4699 	if (!zalloc_cpumask_var(&iter->started, GFP_KERNEL))
4700 		goto fail;
4701 
4702 	iter->tr = tr;
4703 
4704 #ifdef CONFIG_TRACER_MAX_TRACE
4705 	/* Currently only the top directory has a snapshot */
4706 	if (tr->current_trace->print_max || snapshot)
4707 		iter->array_buffer = &tr->max_buffer;
4708 	else
4709 #endif
4710 		iter->array_buffer = &tr->array_buffer;
4711 	iter->snapshot = snapshot;
4712 	iter->pos = -1;
4713 	iter->cpu_file = tracing_get_cpu(inode);
4714 	mutex_init(&iter->mutex);
4715 
4716 	/* Notify the tracer early; before we stop tracing. */
4717 	if (iter->trace->open)
4718 		iter->trace->open(iter);
4719 
4720 	/* Annotate start of buffers if we had overruns */
4721 	if (ring_buffer_overruns(iter->array_buffer->buffer))
4722 		iter->iter_flags |= TRACE_FILE_ANNOTATE;
4723 
4724 	/* Output in nanoseconds only if we are using a clock in nanoseconds. */
4725 	if (trace_clocks[tr->clock_id].in_ns)
4726 		iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
4727 
4728 	/*
4729 	 * If pause-on-trace is enabled, then stop the trace while
4730 	 * dumping, unless this is the "snapshot" file
4731 	 */
4732 	if (!iter->snapshot && (tr->trace_flags & TRACE_ITER_PAUSE_ON_TRACE))
4733 		tracing_stop_tr(tr);
4734 
4735 	if (iter->cpu_file == RING_BUFFER_ALL_CPUS) {
4736 		for_each_tracing_cpu(cpu) {
4737 			iter->buffer_iter[cpu] =
4738 				ring_buffer_read_prepare(iter->array_buffer->buffer,
4739 							 cpu, GFP_KERNEL);
4740 		}
4741 		ring_buffer_read_prepare_sync();
4742 		for_each_tracing_cpu(cpu) {
4743 			ring_buffer_read_start(iter->buffer_iter[cpu]);
4744 			tracing_iter_reset(iter, cpu);
4745 		}
4746 	} else {
4747 		cpu = iter->cpu_file;
4748 		iter->buffer_iter[cpu] =
4749 			ring_buffer_read_prepare(iter->array_buffer->buffer,
4750 						 cpu, GFP_KERNEL);
4751 		ring_buffer_read_prepare_sync();
4752 		ring_buffer_read_start(iter->buffer_iter[cpu]);
4753 		tracing_iter_reset(iter, cpu);
4754 	}
4755 
4756 	mutex_unlock(&trace_types_lock);
4757 
4758 	return iter;
4759 
4760  fail:
4761 	mutex_unlock(&trace_types_lock);
4762 	free_trace_iter_content(iter);
4763 release:
4764 	seq_release_private(inode, file);
4765 	return ERR_PTR(-ENOMEM);
4766 }
4767 
4768 int tracing_open_generic(struct inode *inode, struct file *filp)
4769 {
4770 	int ret;
4771 
4772 	ret = tracing_check_open_get_tr(NULL);
4773 	if (ret)
4774 		return ret;
4775 
4776 	filp->private_data = inode->i_private;
4777 	return 0;
4778 }
4779 
4780 bool tracing_is_disabled(void)
4781 {
4782 	return (tracing_disabled) ? true: false;
4783 }
4784 
4785 /*
4786  * Open and update trace_array ref count.
4787  * Must have the current trace_array passed to it.
4788  */
4789 int tracing_open_generic_tr(struct inode *inode, struct file *filp)
4790 {
4791 	struct trace_array *tr = inode->i_private;
4792 	int ret;
4793 
4794 	ret = tracing_check_open_get_tr(tr);
4795 	if (ret)
4796 		return ret;
4797 
4798 	filp->private_data = inode->i_private;
4799 
4800 	return 0;
4801 }
4802 
4803 /*
4804  * The private pointer of the inode is the trace_event_file.
4805  * Update the tr ref count associated to it.
4806  */
4807 int tracing_open_file_tr(struct inode *inode, struct file *filp)
4808 {
4809 	struct trace_event_file *file = inode->i_private;
4810 	int ret;
4811 
4812 	ret = tracing_check_open_get_tr(file->tr);
4813 	if (ret)
4814 		return ret;
4815 
4816 	mutex_lock(&event_mutex);
4817 
4818 	/* Fail if the file is marked for removal */
4819 	if (file->flags & EVENT_FILE_FL_FREED) {
4820 		trace_array_put(file->tr);
4821 		ret = -ENODEV;
4822 	} else {
4823 		event_file_get(file);
4824 	}
4825 
4826 	mutex_unlock(&event_mutex);
4827 	if (ret)
4828 		return ret;
4829 
4830 	filp->private_data = inode->i_private;
4831 
4832 	return 0;
4833 }
4834 
4835 int tracing_release_file_tr(struct inode *inode, struct file *filp)
4836 {
4837 	struct trace_event_file *file = inode->i_private;
4838 
4839 	trace_array_put(file->tr);
4840 	event_file_put(file);
4841 
4842 	return 0;
4843 }
4844 
4845 int tracing_single_release_file_tr(struct inode *inode, struct file *filp)
4846 {
4847 	tracing_release_file_tr(inode, filp);
4848 	return single_release(inode, filp);
4849 }
4850 
4851 static int tracing_mark_open(struct inode *inode, struct file *filp)
4852 {
4853 	stream_open(inode, filp);
4854 	return tracing_open_generic_tr(inode, filp);
4855 }
4856 
4857 static int tracing_release(struct inode *inode, struct file *file)
4858 {
4859 	struct trace_array *tr = inode->i_private;
4860 	struct seq_file *m = file->private_data;
4861 	struct trace_iterator *iter;
4862 	int cpu;
4863 
4864 	if (!(file->f_mode & FMODE_READ)) {
4865 		trace_array_put(tr);
4866 		return 0;
4867 	}
4868 
4869 	/* Writes do not use seq_file */
4870 	iter = m->private;
4871 	mutex_lock(&trace_types_lock);
4872 
4873 	for_each_tracing_cpu(cpu) {
4874 		if (iter->buffer_iter[cpu])
4875 			ring_buffer_read_finish(iter->buffer_iter[cpu]);
4876 	}
4877 
4878 	if (iter->trace && iter->trace->close)
4879 		iter->trace->close(iter);
4880 
4881 	if (!iter->snapshot && tr->stop_count)
4882 		/* reenable tracing if it was previously enabled */
4883 		tracing_start_tr(tr);
4884 
4885 	__trace_array_put(tr);
4886 
4887 	mutex_unlock(&trace_types_lock);
4888 
4889 	free_trace_iter_content(iter);
4890 	seq_release_private(inode, file);
4891 
4892 	return 0;
4893 }
4894 
4895 int tracing_release_generic_tr(struct inode *inode, struct file *file)
4896 {
4897 	struct trace_array *tr = inode->i_private;
4898 
4899 	trace_array_put(tr);
4900 	return 0;
4901 }
4902 
4903 static int tracing_single_release_tr(struct inode *inode, struct file *file)
4904 {
4905 	struct trace_array *tr = inode->i_private;
4906 
4907 	trace_array_put(tr);
4908 
4909 	return single_release(inode, file);
4910 }
4911 
4912 static int tracing_open(struct inode *inode, struct file *file)
4913 {
4914 	struct trace_array *tr = inode->i_private;
4915 	struct trace_iterator *iter;
4916 	int ret;
4917 
4918 	ret = tracing_check_open_get_tr(tr);
4919 	if (ret)
4920 		return ret;
4921 
4922 	/* If this file was open for write, then erase contents */
4923 	if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC)) {
4924 		int cpu = tracing_get_cpu(inode);
4925 		struct array_buffer *trace_buf = &tr->array_buffer;
4926 
4927 #ifdef CONFIG_TRACER_MAX_TRACE
4928 		if (tr->current_trace->print_max)
4929 			trace_buf = &tr->max_buffer;
4930 #endif
4931 
4932 		if (cpu == RING_BUFFER_ALL_CPUS)
4933 			tracing_reset_online_cpus(trace_buf);
4934 		else
4935 			tracing_reset_cpu(trace_buf, cpu);
4936 	}
4937 
4938 	if (file->f_mode & FMODE_READ) {
4939 		iter = __tracing_open(inode, file, false);
4940 		if (IS_ERR(iter))
4941 			ret = PTR_ERR(iter);
4942 		else if (tr->trace_flags & TRACE_ITER_LATENCY_FMT)
4943 			iter->iter_flags |= TRACE_FILE_LAT_FMT;
4944 	}
4945 
4946 	if (ret < 0)
4947 		trace_array_put(tr);
4948 
4949 	return ret;
4950 }
4951 
4952 /*
4953  * Some tracers are not suitable for instance buffers.
4954  * A tracer is always available for the global array (toplevel)
4955  * or if it explicitly states that it is.
4956  */
4957 static bool
4958 trace_ok_for_array(struct tracer *t, struct trace_array *tr)
4959 {
4960 #ifdef CONFIG_TRACER_SNAPSHOT
4961 	/* arrays with mapped buffer range do not have snapshots */
4962 	if (tr->range_addr_start && t->use_max_tr)
4963 		return false;
4964 #endif
4965 	return (tr->flags & TRACE_ARRAY_FL_GLOBAL) || t->allow_instances;
4966 }
4967 
4968 /* Find the next tracer that this trace array may use */
4969 static struct tracer *
4970 get_tracer_for_array(struct trace_array *tr, struct tracer *t)
4971 {
4972 	while (t && !trace_ok_for_array(t, tr))
4973 		t = t->next;
4974 
4975 	return t;
4976 }
4977 
4978 static void *
4979 t_next(struct seq_file *m, void *v, loff_t *pos)
4980 {
4981 	struct trace_array *tr = m->private;
4982 	struct tracer *t = v;
4983 
4984 	(*pos)++;
4985 
4986 	if (t)
4987 		t = get_tracer_for_array(tr, t->next);
4988 
4989 	return t;
4990 }
4991 
4992 static void *t_start(struct seq_file *m, loff_t *pos)
4993 {
4994 	struct trace_array *tr = m->private;
4995 	struct tracer *t;
4996 	loff_t l = 0;
4997 
4998 	mutex_lock(&trace_types_lock);
4999 
5000 	t = get_tracer_for_array(tr, trace_types);
5001 	for (; t && l < *pos; t = t_next(m, t, &l))
5002 			;
5003 
5004 	return t;
5005 }
5006 
5007 static void t_stop(struct seq_file *m, void *p)
5008 {
5009 	mutex_unlock(&trace_types_lock);
5010 }
5011 
5012 static int t_show(struct seq_file *m, void *v)
5013 {
5014 	struct tracer *t = v;
5015 
5016 	if (!t)
5017 		return 0;
5018 
5019 	seq_puts(m, t->name);
5020 	if (t->next)
5021 		seq_putc(m, ' ');
5022 	else
5023 		seq_putc(m, '\n');
5024 
5025 	return 0;
5026 }
5027 
5028 static const struct seq_operations show_traces_seq_ops = {
5029 	.start		= t_start,
5030 	.next		= t_next,
5031 	.stop		= t_stop,
5032 	.show		= t_show,
5033 };
5034 
5035 static int show_traces_open(struct inode *inode, struct file *file)
5036 {
5037 	struct trace_array *tr = inode->i_private;
5038 	struct seq_file *m;
5039 	int ret;
5040 
5041 	ret = tracing_check_open_get_tr(tr);
5042 	if (ret)
5043 		return ret;
5044 
5045 	ret = seq_open(file, &show_traces_seq_ops);
5046 	if (ret) {
5047 		trace_array_put(tr);
5048 		return ret;
5049 	}
5050 
5051 	m = file->private_data;
5052 	m->private = tr;
5053 
5054 	return 0;
5055 }
5056 
5057 static int tracing_seq_release(struct inode *inode, struct file *file)
5058 {
5059 	struct trace_array *tr = inode->i_private;
5060 
5061 	trace_array_put(tr);
5062 	return seq_release(inode, file);
5063 }
5064 
5065 static ssize_t
5066 tracing_write_stub(struct file *filp, const char __user *ubuf,
5067 		   size_t count, loff_t *ppos)
5068 {
5069 	return count;
5070 }
5071 
5072 loff_t tracing_lseek(struct file *file, loff_t offset, int whence)
5073 {
5074 	int ret;
5075 
5076 	if (file->f_mode & FMODE_READ)
5077 		ret = seq_lseek(file, offset, whence);
5078 	else
5079 		file->f_pos = ret = 0;
5080 
5081 	return ret;
5082 }
5083 
5084 static const struct file_operations tracing_fops = {
5085 	.open		= tracing_open,
5086 	.read		= seq_read,
5087 	.read_iter	= seq_read_iter,
5088 	.splice_read	= copy_splice_read,
5089 	.write		= tracing_write_stub,
5090 	.llseek		= tracing_lseek,
5091 	.release	= tracing_release,
5092 };
5093 
5094 static const struct file_operations show_traces_fops = {
5095 	.open		= show_traces_open,
5096 	.read		= seq_read,
5097 	.llseek		= seq_lseek,
5098 	.release	= tracing_seq_release,
5099 };
5100 
5101 static ssize_t
5102 tracing_cpumask_read(struct file *filp, char __user *ubuf,
5103 		     size_t count, loff_t *ppos)
5104 {
5105 	struct trace_array *tr = file_inode(filp)->i_private;
5106 	char *mask_str;
5107 	int len;
5108 
5109 	len = snprintf(NULL, 0, "%*pb\n",
5110 		       cpumask_pr_args(tr->tracing_cpumask)) + 1;
5111 	mask_str = kmalloc(len, GFP_KERNEL);
5112 	if (!mask_str)
5113 		return -ENOMEM;
5114 
5115 	len = snprintf(mask_str, len, "%*pb\n",
5116 		       cpumask_pr_args(tr->tracing_cpumask));
5117 	if (len >= count) {
5118 		count = -EINVAL;
5119 		goto out_err;
5120 	}
5121 	count = simple_read_from_buffer(ubuf, count, ppos, mask_str, len);
5122 
5123 out_err:
5124 	kfree(mask_str);
5125 
5126 	return count;
5127 }
5128 
5129 int tracing_set_cpumask(struct trace_array *tr,
5130 			cpumask_var_t tracing_cpumask_new)
5131 {
5132 	int cpu;
5133 
5134 	if (!tr)
5135 		return -EINVAL;
5136 
5137 	local_irq_disable();
5138 	arch_spin_lock(&tr->max_lock);
5139 	for_each_tracing_cpu(cpu) {
5140 		/*
5141 		 * Increase/decrease the disabled counter if we are
5142 		 * about to flip a bit in the cpumask:
5143 		 */
5144 		if (cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
5145 				!cpumask_test_cpu(cpu, tracing_cpumask_new)) {
5146 			atomic_inc(&per_cpu_ptr(tr->array_buffer.data, cpu)->disabled);
5147 			ring_buffer_record_disable_cpu(tr->array_buffer.buffer, cpu);
5148 #ifdef CONFIG_TRACER_MAX_TRACE
5149 			ring_buffer_record_disable_cpu(tr->max_buffer.buffer, cpu);
5150 #endif
5151 		}
5152 		if (!cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
5153 				cpumask_test_cpu(cpu, tracing_cpumask_new)) {
5154 			atomic_dec(&per_cpu_ptr(tr->array_buffer.data, cpu)->disabled);
5155 			ring_buffer_record_enable_cpu(tr->array_buffer.buffer, cpu);
5156 #ifdef CONFIG_TRACER_MAX_TRACE
5157 			ring_buffer_record_enable_cpu(tr->max_buffer.buffer, cpu);
5158 #endif
5159 		}
5160 	}
5161 	arch_spin_unlock(&tr->max_lock);
5162 	local_irq_enable();
5163 
5164 	cpumask_copy(tr->tracing_cpumask, tracing_cpumask_new);
5165 
5166 	return 0;
5167 }
5168 
5169 static ssize_t
5170 tracing_cpumask_write(struct file *filp, const char __user *ubuf,
5171 		      size_t count, loff_t *ppos)
5172 {
5173 	struct trace_array *tr = file_inode(filp)->i_private;
5174 	cpumask_var_t tracing_cpumask_new;
5175 	int err;
5176 
5177 	if (!zalloc_cpumask_var(&tracing_cpumask_new, GFP_KERNEL))
5178 		return -ENOMEM;
5179 
5180 	err = cpumask_parse_user(ubuf, count, tracing_cpumask_new);
5181 	if (err)
5182 		goto err_free;
5183 
5184 	err = tracing_set_cpumask(tr, tracing_cpumask_new);
5185 	if (err)
5186 		goto err_free;
5187 
5188 	free_cpumask_var(tracing_cpumask_new);
5189 
5190 	return count;
5191 
5192 err_free:
5193 	free_cpumask_var(tracing_cpumask_new);
5194 
5195 	return err;
5196 }
5197 
5198 static const struct file_operations tracing_cpumask_fops = {
5199 	.open		= tracing_open_generic_tr,
5200 	.read		= tracing_cpumask_read,
5201 	.write		= tracing_cpumask_write,
5202 	.release	= tracing_release_generic_tr,
5203 	.llseek		= generic_file_llseek,
5204 };
5205 
5206 static int tracing_trace_options_show(struct seq_file *m, void *v)
5207 {
5208 	struct tracer_opt *trace_opts;
5209 	struct trace_array *tr = m->private;
5210 	u32 tracer_flags;
5211 	int i;
5212 
5213 	mutex_lock(&trace_types_lock);
5214 	tracer_flags = tr->current_trace->flags->val;
5215 	trace_opts = tr->current_trace->flags->opts;
5216 
5217 	for (i = 0; trace_options[i]; i++) {
5218 		if (tr->trace_flags & (1 << i))
5219 			seq_printf(m, "%s\n", trace_options[i]);
5220 		else
5221 			seq_printf(m, "no%s\n", trace_options[i]);
5222 	}
5223 
5224 	for (i = 0; trace_opts[i].name; i++) {
5225 		if (tracer_flags & trace_opts[i].bit)
5226 			seq_printf(m, "%s\n", trace_opts[i].name);
5227 		else
5228 			seq_printf(m, "no%s\n", trace_opts[i].name);
5229 	}
5230 	mutex_unlock(&trace_types_lock);
5231 
5232 	return 0;
5233 }
5234 
5235 static int __set_tracer_option(struct trace_array *tr,
5236 			       struct tracer_flags *tracer_flags,
5237 			       struct tracer_opt *opts, int neg)
5238 {
5239 	struct tracer *trace = tracer_flags->trace;
5240 	int ret;
5241 
5242 	ret = trace->set_flag(tr, tracer_flags->val, opts->bit, !neg);
5243 	if (ret)
5244 		return ret;
5245 
5246 	if (neg)
5247 		tracer_flags->val &= ~opts->bit;
5248 	else
5249 		tracer_flags->val |= opts->bit;
5250 	return 0;
5251 }
5252 
5253 /* Try to assign a tracer specific option */
5254 static int set_tracer_option(struct trace_array *tr, char *cmp, int neg)
5255 {
5256 	struct tracer *trace = tr->current_trace;
5257 	struct tracer_flags *tracer_flags = trace->flags;
5258 	struct tracer_opt *opts = NULL;
5259 	int i;
5260 
5261 	for (i = 0; tracer_flags->opts[i].name; i++) {
5262 		opts = &tracer_flags->opts[i];
5263 
5264 		if (strcmp(cmp, opts->name) == 0)
5265 			return __set_tracer_option(tr, trace->flags, opts, neg);
5266 	}
5267 
5268 	return -EINVAL;
5269 }
5270 
5271 /* Some tracers require overwrite to stay enabled */
5272 int trace_keep_overwrite(struct tracer *tracer, u32 mask, int set)
5273 {
5274 	if (tracer->enabled && (mask & TRACE_ITER_OVERWRITE) && !set)
5275 		return -1;
5276 
5277 	return 0;
5278 }
5279 
5280 int set_tracer_flag(struct trace_array *tr, unsigned int mask, int enabled)
5281 {
5282 	if ((mask == TRACE_ITER_RECORD_TGID) ||
5283 	    (mask == TRACE_ITER_RECORD_CMD))
5284 		lockdep_assert_held(&event_mutex);
5285 
5286 	/* do nothing if flag is already set */
5287 	if (!!(tr->trace_flags & mask) == !!enabled)
5288 		return 0;
5289 
5290 	/* Give the tracer a chance to approve the change */
5291 	if (tr->current_trace->flag_changed)
5292 		if (tr->current_trace->flag_changed(tr, mask, !!enabled))
5293 			return -EINVAL;
5294 
5295 	if (enabled)
5296 		tr->trace_flags |= mask;
5297 	else
5298 		tr->trace_flags &= ~mask;
5299 
5300 	if (mask == TRACE_ITER_RECORD_CMD)
5301 		trace_event_enable_cmd_record(enabled);
5302 
5303 	if (mask == TRACE_ITER_RECORD_TGID) {
5304 
5305 		if (trace_alloc_tgid_map() < 0) {
5306 			tr->trace_flags &= ~TRACE_ITER_RECORD_TGID;
5307 			return -ENOMEM;
5308 		}
5309 
5310 		trace_event_enable_tgid_record(enabled);
5311 	}
5312 
5313 	if (mask == TRACE_ITER_EVENT_FORK)
5314 		trace_event_follow_fork(tr, enabled);
5315 
5316 	if (mask == TRACE_ITER_FUNC_FORK)
5317 		ftrace_pid_follow_fork(tr, enabled);
5318 
5319 	if (mask == TRACE_ITER_OVERWRITE) {
5320 		ring_buffer_change_overwrite(tr->array_buffer.buffer, enabled);
5321 #ifdef CONFIG_TRACER_MAX_TRACE
5322 		ring_buffer_change_overwrite(tr->max_buffer.buffer, enabled);
5323 #endif
5324 	}
5325 
5326 	if (mask == TRACE_ITER_PRINTK) {
5327 		trace_printk_start_stop_comm(enabled);
5328 		trace_printk_control(enabled);
5329 	}
5330 
5331 	return 0;
5332 }
5333 
5334 int trace_set_options(struct trace_array *tr, char *option)
5335 {
5336 	char *cmp;
5337 	int neg = 0;
5338 	int ret;
5339 	size_t orig_len = strlen(option);
5340 	int len;
5341 
5342 	cmp = strstrip(option);
5343 
5344 	len = str_has_prefix(cmp, "no");
5345 	if (len)
5346 		neg = 1;
5347 
5348 	cmp += len;
5349 
5350 	mutex_lock(&event_mutex);
5351 	mutex_lock(&trace_types_lock);
5352 
5353 	ret = match_string(trace_options, -1, cmp);
5354 	/* If no option could be set, test the specific tracer options */
5355 	if (ret < 0)
5356 		ret = set_tracer_option(tr, cmp, neg);
5357 	else
5358 		ret = set_tracer_flag(tr, 1 << ret, !neg);
5359 
5360 	mutex_unlock(&trace_types_lock);
5361 	mutex_unlock(&event_mutex);
5362 
5363 	/*
5364 	 * If the first trailing whitespace is replaced with '\0' by strstrip,
5365 	 * turn it back into a space.
5366 	 */
5367 	if (orig_len > strlen(option))
5368 		option[strlen(option)] = ' ';
5369 
5370 	return ret;
5371 }
5372 
5373 static void __init apply_trace_boot_options(void)
5374 {
5375 	char *buf = trace_boot_options_buf;
5376 	char *option;
5377 
5378 	while (true) {
5379 		option = strsep(&buf, ",");
5380 
5381 		if (!option)
5382 			break;
5383 
5384 		if (*option)
5385 			trace_set_options(&global_trace, option);
5386 
5387 		/* Put back the comma to allow this to be called again */
5388 		if (buf)
5389 			*(buf - 1) = ',';
5390 	}
5391 }
5392 
5393 static ssize_t
5394 tracing_trace_options_write(struct file *filp, const char __user *ubuf,
5395 			size_t cnt, loff_t *ppos)
5396 {
5397 	struct seq_file *m = filp->private_data;
5398 	struct trace_array *tr = m->private;
5399 	char buf[64];
5400 	int ret;
5401 
5402 	if (cnt >= sizeof(buf))
5403 		return -EINVAL;
5404 
5405 	if (copy_from_user(buf, ubuf, cnt))
5406 		return -EFAULT;
5407 
5408 	buf[cnt] = 0;
5409 
5410 	ret = trace_set_options(tr, buf);
5411 	if (ret < 0)
5412 		return ret;
5413 
5414 	*ppos += cnt;
5415 
5416 	return cnt;
5417 }
5418 
5419 static int tracing_trace_options_open(struct inode *inode, struct file *file)
5420 {
5421 	struct trace_array *tr = inode->i_private;
5422 	int ret;
5423 
5424 	ret = tracing_check_open_get_tr(tr);
5425 	if (ret)
5426 		return ret;
5427 
5428 	ret = single_open(file, tracing_trace_options_show, inode->i_private);
5429 	if (ret < 0)
5430 		trace_array_put(tr);
5431 
5432 	return ret;
5433 }
5434 
5435 static const struct file_operations tracing_iter_fops = {
5436 	.open		= tracing_trace_options_open,
5437 	.read		= seq_read,
5438 	.llseek		= seq_lseek,
5439 	.release	= tracing_single_release_tr,
5440 	.write		= tracing_trace_options_write,
5441 };
5442 
5443 static const char readme_msg[] =
5444 	"tracing mini-HOWTO:\n\n"
5445 	"# echo 0 > tracing_on : quick way to disable tracing\n"
5446 	"# echo 1 > tracing_on : quick way to re-enable tracing\n\n"
5447 	" Important files:\n"
5448 	"  trace\t\t\t- The static contents of the buffer\n"
5449 	"\t\t\t  To clear the buffer write into this file: echo > trace\n"
5450 	"  trace_pipe\t\t- A consuming read to see the contents of the buffer\n"
5451 	"  current_tracer\t- function and latency tracers\n"
5452 	"  available_tracers\t- list of configured tracers for current_tracer\n"
5453 	"  error_log\t- error log for failed commands (that support it)\n"
5454 	"  buffer_size_kb\t- view and modify size of per cpu buffer\n"
5455 	"  buffer_total_size_kb  - view total size of all cpu buffers\n\n"
5456 	"  trace_clock\t\t- change the clock used to order events\n"
5457 	"       local:   Per cpu clock but may not be synced across CPUs\n"
5458 	"      global:   Synced across CPUs but slows tracing down.\n"
5459 	"     counter:   Not a clock, but just an increment\n"
5460 	"      uptime:   Jiffy counter from time of boot\n"
5461 	"        perf:   Same clock that perf events use\n"
5462 #ifdef CONFIG_X86_64
5463 	"     x86-tsc:   TSC cycle counter\n"
5464 #endif
5465 	"\n  timestamp_mode\t- view the mode used to timestamp events\n"
5466 	"       delta:   Delta difference against a buffer-wide timestamp\n"
5467 	"    absolute:   Absolute (standalone) timestamp\n"
5468 	"\n  trace_marker\t\t- Writes into this file writes into the kernel buffer\n"
5469 	"\n  trace_marker_raw\t\t- Writes into this file writes binary data into the kernel buffer\n"
5470 	"  tracing_cpumask\t- Limit which CPUs to trace\n"
5471 	"  instances\t\t- Make sub-buffers with: mkdir instances/foo\n"
5472 	"\t\t\t  Remove sub-buffer with rmdir\n"
5473 	"  trace_options\t\t- Set format or modify how tracing happens\n"
5474 	"\t\t\t  Disable an option by prefixing 'no' to the\n"
5475 	"\t\t\t  option name\n"
5476 	"  saved_cmdlines_size\t- echo command number in here to store comm-pid list\n"
5477 #ifdef CONFIG_DYNAMIC_FTRACE
5478 	"\n  available_filter_functions - list of functions that can be filtered on\n"
5479 	"  set_ftrace_filter\t- echo function name in here to only trace these\n"
5480 	"\t\t\t  functions\n"
5481 	"\t     accepts: func_full_name or glob-matching-pattern\n"
5482 	"\t     modules: Can select a group via module\n"
5483 	"\t      Format: :mod:<module-name>\n"
5484 	"\t     example: echo :mod:ext3 > set_ftrace_filter\n"
5485 	"\t    triggers: a command to perform when function is hit\n"
5486 	"\t      Format: <function>:<trigger>[:count]\n"
5487 	"\t     trigger: traceon, traceoff\n"
5488 	"\t\t      enable_event:<system>:<event>\n"
5489 	"\t\t      disable_event:<system>:<event>\n"
5490 #ifdef CONFIG_STACKTRACE
5491 	"\t\t      stacktrace\n"
5492 #endif
5493 #ifdef CONFIG_TRACER_SNAPSHOT
5494 	"\t\t      snapshot\n"
5495 #endif
5496 	"\t\t      dump\n"
5497 	"\t\t      cpudump\n"
5498 	"\t     example: echo do_fault:traceoff > set_ftrace_filter\n"
5499 	"\t              echo do_trap:traceoff:3 > set_ftrace_filter\n"
5500 	"\t     The first one will disable tracing every time do_fault is hit\n"
5501 	"\t     The second will disable tracing at most 3 times when do_trap is hit\n"
5502 	"\t       The first time do trap is hit and it disables tracing, the\n"
5503 	"\t       counter will decrement to 2. If tracing is already disabled,\n"
5504 	"\t       the counter will not decrement. It only decrements when the\n"
5505 	"\t       trigger did work\n"
5506 	"\t     To remove trigger without count:\n"
5507 	"\t       echo '!<function>:<trigger> > set_ftrace_filter\n"
5508 	"\t     To remove trigger with a count:\n"
5509 	"\t       echo '!<function>:<trigger>:0 > set_ftrace_filter\n"
5510 	"  set_ftrace_notrace\t- echo function name in here to never trace.\n"
5511 	"\t    accepts: func_full_name, *func_end, func_begin*, *func_middle*\n"
5512 	"\t    modules: Can select a group via module command :mod:\n"
5513 	"\t    Does not accept triggers\n"
5514 #endif /* CONFIG_DYNAMIC_FTRACE */
5515 #ifdef CONFIG_FUNCTION_TRACER
5516 	"  set_ftrace_pid\t- Write pid(s) to only function trace those pids\n"
5517 	"\t\t    (function)\n"
5518 	"  set_ftrace_notrace_pid\t- Write pid(s) to not function trace those pids\n"
5519 	"\t\t    (function)\n"
5520 #endif
5521 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
5522 	"  set_graph_function\t- Trace the nested calls of a function (function_graph)\n"
5523 	"  set_graph_notrace\t- Do not trace the nested calls of a function (function_graph)\n"
5524 	"  max_graph_depth\t- Trace a limited depth of nested calls (0 is unlimited)\n"
5525 #endif
5526 #ifdef CONFIG_TRACER_SNAPSHOT
5527 	"\n  snapshot\t\t- Like 'trace' but shows the content of the static\n"
5528 	"\t\t\t  snapshot buffer. Read the contents for more\n"
5529 	"\t\t\t  information\n"
5530 #endif
5531 #ifdef CONFIG_STACK_TRACER
5532 	"  stack_trace\t\t- Shows the max stack trace when active\n"
5533 	"  stack_max_size\t- Shows current max stack size that was traced\n"
5534 	"\t\t\t  Write into this file to reset the max size (trigger a\n"
5535 	"\t\t\t  new trace)\n"
5536 #ifdef CONFIG_DYNAMIC_FTRACE
5537 	"  stack_trace_filter\t- Like set_ftrace_filter but limits what stack_trace\n"
5538 	"\t\t\t  traces\n"
5539 #endif
5540 #endif /* CONFIG_STACK_TRACER */
5541 #ifdef CONFIG_DYNAMIC_EVENTS
5542 	"  dynamic_events\t\t- Create/append/remove/show the generic dynamic events\n"
5543 	"\t\t\t  Write into this file to define/undefine new trace events.\n"
5544 #endif
5545 #ifdef CONFIG_KPROBE_EVENTS
5546 	"  kprobe_events\t\t- Create/append/remove/show the kernel dynamic events\n"
5547 	"\t\t\t  Write into this file to define/undefine new trace events.\n"
5548 #endif
5549 #ifdef CONFIG_UPROBE_EVENTS
5550 	"  uprobe_events\t\t- Create/append/remove/show the userspace dynamic events\n"
5551 	"\t\t\t  Write into this file to define/undefine new trace events.\n"
5552 #endif
5553 #if defined(CONFIG_KPROBE_EVENTS) || defined(CONFIG_UPROBE_EVENTS) || \
5554     defined(CONFIG_FPROBE_EVENTS)
5555 	"\t  accepts: event-definitions (one definition per line)\n"
5556 #if defined(CONFIG_KPROBE_EVENTS) || defined(CONFIG_UPROBE_EVENTS)
5557 	"\t   Format: p[:[<group>/][<event>]] <place> [<args>]\n"
5558 	"\t           r[maxactive][:[<group>/][<event>]] <place> [<args>]\n"
5559 #endif
5560 #ifdef CONFIG_FPROBE_EVENTS
5561 	"\t           f[:[<group>/][<event>]] <func-name>[%return] [<args>]\n"
5562 	"\t           t[:[<group>/][<event>]] <tracepoint> [<args>]\n"
5563 #endif
5564 #ifdef CONFIG_HIST_TRIGGERS
5565 	"\t           s:[synthetic/]<event> <field> [<field>]\n"
5566 #endif
5567 	"\t           e[:[<group>/][<event>]] <attached-group>.<attached-event> [<args>] [if <filter>]\n"
5568 	"\t           -:[<group>/][<event>]\n"
5569 #ifdef CONFIG_KPROBE_EVENTS
5570 	"\t    place: [<module>:]<symbol>[+<offset>]|<memaddr>\n"
5571   "place (kretprobe): [<module>:]<symbol>[+<offset>]%return|<memaddr>\n"
5572 #endif
5573 #ifdef CONFIG_UPROBE_EVENTS
5574   "   place (uprobe): <path>:<offset>[%return][(ref_ctr_offset)]\n"
5575 #endif
5576 	"\t     args: <name>=fetcharg[:type]\n"
5577 	"\t fetcharg: (%<register>|$<efield>), @<address>, @<symbol>[+|-<offset>],\n"
5578 #ifdef CONFIG_HAVE_FUNCTION_ARG_ACCESS_API
5579 	"\t           $stack<index>, $stack, $retval, $comm, $arg<N>,\n"
5580 #ifdef CONFIG_PROBE_EVENTS_BTF_ARGS
5581 	"\t           <argname>[->field[->field|.field...]],\n"
5582 #endif
5583 #else
5584 	"\t           $stack<index>, $stack, $retval, $comm,\n"
5585 #endif
5586 	"\t           +|-[u]<offset>(<fetcharg>), \\imm-value, \\\"imm-string\"\n"
5587 	"\t     kernel return probes support: $retval, $arg<N>, $comm\n"
5588 	"\t     type: s8/16/32/64, u8/16/32/64, x8/16/32/64, char, string, symbol,\n"
5589 	"\t           b<bit-width>@<bit-offset>/<container-size>, ustring,\n"
5590 	"\t           symstr, %pd/%pD, <type>\\[<array-size>\\]\n"
5591 #ifdef CONFIG_HIST_TRIGGERS
5592 	"\t    field: <stype> <name>;\n"
5593 	"\t    stype: u8/u16/u32/u64, s8/s16/s32/s64, pid_t,\n"
5594 	"\t           [unsigned] char/int/long\n"
5595 #endif
5596 	"\t    efield: For event probes ('e' types), the field is on of the fields\n"
5597 	"\t            of the <attached-group>/<attached-event>.\n"
5598 #endif
5599 	"  events/\t\t- Directory containing all trace event subsystems:\n"
5600 	"      enable\t\t- Write 0/1 to enable/disable tracing of all events\n"
5601 	"  events/<system>/\t- Directory containing all trace events for <system>:\n"
5602 	"      enable\t\t- Write 0/1 to enable/disable tracing of all <system>\n"
5603 	"\t\t\t  events\n"
5604 	"      filter\t\t- If set, only events passing filter are traced\n"
5605 	"  events/<system>/<event>/\t- Directory containing control files for\n"
5606 	"\t\t\t  <event>:\n"
5607 	"      enable\t\t- Write 0/1 to enable/disable tracing of <event>\n"
5608 	"      filter\t\t- If set, only events passing filter are traced\n"
5609 	"      trigger\t\t- If set, a command to perform when event is hit\n"
5610 	"\t    Format: <trigger>[:count][if <filter>]\n"
5611 	"\t   trigger: traceon, traceoff\n"
5612 	"\t            enable_event:<system>:<event>\n"
5613 	"\t            disable_event:<system>:<event>\n"
5614 #ifdef CONFIG_HIST_TRIGGERS
5615 	"\t            enable_hist:<system>:<event>\n"
5616 	"\t            disable_hist:<system>:<event>\n"
5617 #endif
5618 #ifdef CONFIG_STACKTRACE
5619 	"\t\t    stacktrace\n"
5620 #endif
5621 #ifdef CONFIG_TRACER_SNAPSHOT
5622 	"\t\t    snapshot\n"
5623 #endif
5624 #ifdef CONFIG_HIST_TRIGGERS
5625 	"\t\t    hist (see below)\n"
5626 #endif
5627 	"\t   example: echo traceoff > events/block/block_unplug/trigger\n"
5628 	"\t            echo traceoff:3 > events/block/block_unplug/trigger\n"
5629 	"\t            echo 'enable_event:kmem:kmalloc:3 if nr_rq > 1' > \\\n"
5630 	"\t                  events/block/block_unplug/trigger\n"
5631 	"\t   The first disables tracing every time block_unplug is hit.\n"
5632 	"\t   The second disables tracing the first 3 times block_unplug is hit.\n"
5633 	"\t   The third enables the kmalloc event the first 3 times block_unplug\n"
5634 	"\t     is hit and has value of greater than 1 for the 'nr_rq' event field.\n"
5635 	"\t   Like function triggers, the counter is only decremented if it\n"
5636 	"\t    enabled or disabled tracing.\n"
5637 	"\t   To remove a trigger without a count:\n"
5638 	"\t     echo '!<trigger> > <system>/<event>/trigger\n"
5639 	"\t   To remove a trigger with a count:\n"
5640 	"\t     echo '!<trigger>:0 > <system>/<event>/trigger\n"
5641 	"\t   Filters can be ignored when removing a trigger.\n"
5642 #ifdef CONFIG_HIST_TRIGGERS
5643 	"      hist trigger\t- If set, event hits are aggregated into a hash table\n"
5644 	"\t    Format: hist:keys=<field1[,field2,...]>\n"
5645 	"\t            [:<var1>=<field|var_ref|numeric_literal>[,<var2>=...]]\n"
5646 	"\t            [:values=<field1[,field2,...]>]\n"
5647 	"\t            [:sort=<field1[,field2,...]>]\n"
5648 	"\t            [:size=#entries]\n"
5649 	"\t            [:pause][:continue][:clear]\n"
5650 	"\t            [:name=histname1]\n"
5651 	"\t            [:nohitcount]\n"
5652 	"\t            [:<handler>.<action>]\n"
5653 	"\t            [if <filter>]\n\n"
5654 	"\t    Note, special fields can be used as well:\n"
5655 	"\t            common_timestamp - to record current timestamp\n"
5656 	"\t            common_cpu - to record the CPU the event happened on\n"
5657 	"\n"
5658 	"\t    A hist trigger variable can be:\n"
5659 	"\t        - a reference to a field e.g. x=current_timestamp,\n"
5660 	"\t        - a reference to another variable e.g. y=$x,\n"
5661 	"\t        - a numeric literal: e.g. ms_per_sec=1000,\n"
5662 	"\t        - an arithmetic expression: e.g. time_secs=current_timestamp/1000\n"
5663 	"\n"
5664 	"\t    hist trigger arithmetic expressions support addition(+), subtraction(-),\n"
5665 	"\t    multiplication(*) and division(/) operators. An operand can be either a\n"
5666 	"\t    variable reference, field or numeric literal.\n"
5667 	"\n"
5668 	"\t    When a matching event is hit, an entry is added to a hash\n"
5669 	"\t    table using the key(s) and value(s) named, and the value of a\n"
5670 	"\t    sum called 'hitcount' is incremented.  Keys and values\n"
5671 	"\t    correspond to fields in the event's format description.  Keys\n"
5672 	"\t    can be any field, or the special string 'common_stacktrace'.\n"
5673 	"\t    Compound keys consisting of up to two fields can be specified\n"
5674 	"\t    by the 'keys' keyword.  Values must correspond to numeric\n"
5675 	"\t    fields.  Sort keys consisting of up to two fields can be\n"
5676 	"\t    specified using the 'sort' keyword.  The sort direction can\n"
5677 	"\t    be modified by appending '.descending' or '.ascending' to a\n"
5678 	"\t    sort field.  The 'size' parameter can be used to specify more\n"
5679 	"\t    or fewer than the default 2048 entries for the hashtable size.\n"
5680 	"\t    If a hist trigger is given a name using the 'name' parameter,\n"
5681 	"\t    its histogram data will be shared with other triggers of the\n"
5682 	"\t    same name, and trigger hits will update this common data.\n\n"
5683 	"\t    Reading the 'hist' file for the event will dump the hash\n"
5684 	"\t    table in its entirety to stdout.  If there are multiple hist\n"
5685 	"\t    triggers attached to an event, there will be a table for each\n"
5686 	"\t    trigger in the output.  The table displayed for a named\n"
5687 	"\t    trigger will be the same as any other instance having the\n"
5688 	"\t    same name.  The default format used to display a given field\n"
5689 	"\t    can be modified by appending any of the following modifiers\n"
5690 	"\t    to the field name, as applicable:\n\n"
5691 	"\t            .hex        display a number as a hex value\n"
5692 	"\t            .sym        display an address as a symbol\n"
5693 	"\t            .sym-offset display an address as a symbol and offset\n"
5694 	"\t            .execname   display a common_pid as a program name\n"
5695 	"\t            .syscall    display a syscall id as a syscall name\n"
5696 	"\t            .log2       display log2 value rather than raw number\n"
5697 	"\t            .buckets=size  display values in groups of size rather than raw number\n"
5698 	"\t            .usecs      display a common_timestamp in microseconds\n"
5699 	"\t            .percent    display a number of percentage value\n"
5700 	"\t            .graph      display a bar-graph of a value\n\n"
5701 	"\t    The 'pause' parameter can be used to pause an existing hist\n"
5702 	"\t    trigger or to start a hist trigger but not log any events\n"
5703 	"\t    until told to do so.  'continue' can be used to start or\n"
5704 	"\t    restart a paused hist trigger.\n\n"
5705 	"\t    The 'clear' parameter will clear the contents of a running\n"
5706 	"\t    hist trigger and leave its current paused/active state\n"
5707 	"\t    unchanged.\n\n"
5708 	"\t    The 'nohitcount' (or NOHC) parameter will suppress display of\n"
5709 	"\t    raw hitcount in the histogram.\n\n"
5710 	"\t    The enable_hist and disable_hist triggers can be used to\n"
5711 	"\t    have one event conditionally start and stop another event's\n"
5712 	"\t    already-attached hist trigger.  The syntax is analogous to\n"
5713 	"\t    the enable_event and disable_event triggers.\n\n"
5714 	"\t    Hist trigger handlers and actions are executed whenever a\n"
5715 	"\t    a histogram entry is added or updated.  They take the form:\n\n"
5716 	"\t        <handler>.<action>\n\n"
5717 	"\t    The available handlers are:\n\n"
5718 	"\t        onmatch(matching.event)  - invoke on addition or update\n"
5719 	"\t        onmax(var)               - invoke if var exceeds current max\n"
5720 	"\t        onchange(var)            - invoke action if var changes\n\n"
5721 	"\t    The available actions are:\n\n"
5722 	"\t        trace(<synthetic_event>,param list)  - generate synthetic event\n"
5723 	"\t        save(field,...)                      - save current event fields\n"
5724 #ifdef CONFIG_TRACER_SNAPSHOT
5725 	"\t        snapshot()                           - snapshot the trace buffer\n\n"
5726 #endif
5727 #ifdef CONFIG_SYNTH_EVENTS
5728 	"  events/synthetic_events\t- Create/append/remove/show synthetic events\n"
5729 	"\t  Write into this file to define/undefine new synthetic events.\n"
5730 	"\t     example: echo 'myevent u64 lat; char name[]; long[] stack' >> synthetic_events\n"
5731 #endif
5732 #endif
5733 ;
5734 
5735 static ssize_t
5736 tracing_readme_read(struct file *filp, char __user *ubuf,
5737 		       size_t cnt, loff_t *ppos)
5738 {
5739 	return simple_read_from_buffer(ubuf, cnt, ppos,
5740 					readme_msg, strlen(readme_msg));
5741 }
5742 
5743 static const struct file_operations tracing_readme_fops = {
5744 	.open		= tracing_open_generic,
5745 	.read		= tracing_readme_read,
5746 	.llseek		= generic_file_llseek,
5747 };
5748 
5749 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
5750 static union trace_eval_map_item *
5751 update_eval_map(union trace_eval_map_item *ptr)
5752 {
5753 	if (!ptr->map.eval_string) {
5754 		if (ptr->tail.next) {
5755 			ptr = ptr->tail.next;
5756 			/* Set ptr to the next real item (skip head) */
5757 			ptr++;
5758 		} else
5759 			return NULL;
5760 	}
5761 	return ptr;
5762 }
5763 
5764 static void *eval_map_next(struct seq_file *m, void *v, loff_t *pos)
5765 {
5766 	union trace_eval_map_item *ptr = v;
5767 
5768 	/*
5769 	 * Paranoid! If ptr points to end, we don't want to increment past it.
5770 	 * This really should never happen.
5771 	 */
5772 	(*pos)++;
5773 	ptr = update_eval_map(ptr);
5774 	if (WARN_ON_ONCE(!ptr))
5775 		return NULL;
5776 
5777 	ptr++;
5778 	ptr = update_eval_map(ptr);
5779 
5780 	return ptr;
5781 }
5782 
5783 static void *eval_map_start(struct seq_file *m, loff_t *pos)
5784 {
5785 	union trace_eval_map_item *v;
5786 	loff_t l = 0;
5787 
5788 	mutex_lock(&trace_eval_mutex);
5789 
5790 	v = trace_eval_maps;
5791 	if (v)
5792 		v++;
5793 
5794 	while (v && l < *pos) {
5795 		v = eval_map_next(m, v, &l);
5796 	}
5797 
5798 	return v;
5799 }
5800 
5801 static void eval_map_stop(struct seq_file *m, void *v)
5802 {
5803 	mutex_unlock(&trace_eval_mutex);
5804 }
5805 
5806 static int eval_map_show(struct seq_file *m, void *v)
5807 {
5808 	union trace_eval_map_item *ptr = v;
5809 
5810 	seq_printf(m, "%s %ld (%s)\n",
5811 		   ptr->map.eval_string, ptr->map.eval_value,
5812 		   ptr->map.system);
5813 
5814 	return 0;
5815 }
5816 
5817 static const struct seq_operations tracing_eval_map_seq_ops = {
5818 	.start		= eval_map_start,
5819 	.next		= eval_map_next,
5820 	.stop		= eval_map_stop,
5821 	.show		= eval_map_show,
5822 };
5823 
5824 static int tracing_eval_map_open(struct inode *inode, struct file *filp)
5825 {
5826 	int ret;
5827 
5828 	ret = tracing_check_open_get_tr(NULL);
5829 	if (ret)
5830 		return ret;
5831 
5832 	return seq_open(filp, &tracing_eval_map_seq_ops);
5833 }
5834 
5835 static const struct file_operations tracing_eval_map_fops = {
5836 	.open		= tracing_eval_map_open,
5837 	.read		= seq_read,
5838 	.llseek		= seq_lseek,
5839 	.release	= seq_release,
5840 };
5841 
5842 static inline union trace_eval_map_item *
5843 trace_eval_jmp_to_tail(union trace_eval_map_item *ptr)
5844 {
5845 	/* Return tail of array given the head */
5846 	return ptr + ptr->head.length + 1;
5847 }
5848 
5849 static void
5850 trace_insert_eval_map_file(struct module *mod, struct trace_eval_map **start,
5851 			   int len)
5852 {
5853 	struct trace_eval_map **stop;
5854 	struct trace_eval_map **map;
5855 	union trace_eval_map_item *map_array;
5856 	union trace_eval_map_item *ptr;
5857 
5858 	stop = start + len;
5859 
5860 	/*
5861 	 * The trace_eval_maps contains the map plus a head and tail item,
5862 	 * where the head holds the module and length of array, and the
5863 	 * tail holds a pointer to the next list.
5864 	 */
5865 	map_array = kmalloc_array(len + 2, sizeof(*map_array), GFP_KERNEL);
5866 	if (!map_array) {
5867 		pr_warn("Unable to allocate trace eval mapping\n");
5868 		return;
5869 	}
5870 
5871 	mutex_lock(&trace_eval_mutex);
5872 
5873 	if (!trace_eval_maps)
5874 		trace_eval_maps = map_array;
5875 	else {
5876 		ptr = trace_eval_maps;
5877 		for (;;) {
5878 			ptr = trace_eval_jmp_to_tail(ptr);
5879 			if (!ptr->tail.next)
5880 				break;
5881 			ptr = ptr->tail.next;
5882 
5883 		}
5884 		ptr->tail.next = map_array;
5885 	}
5886 	map_array->head.mod = mod;
5887 	map_array->head.length = len;
5888 	map_array++;
5889 
5890 	for (map = start; (unsigned long)map < (unsigned long)stop; map++) {
5891 		map_array->map = **map;
5892 		map_array++;
5893 	}
5894 	memset(map_array, 0, sizeof(*map_array));
5895 
5896 	mutex_unlock(&trace_eval_mutex);
5897 }
5898 
5899 static void trace_create_eval_file(struct dentry *d_tracer)
5900 {
5901 	trace_create_file("eval_map", TRACE_MODE_READ, d_tracer,
5902 			  NULL, &tracing_eval_map_fops);
5903 }
5904 
5905 #else /* CONFIG_TRACE_EVAL_MAP_FILE */
5906 static inline void trace_create_eval_file(struct dentry *d_tracer) { }
5907 static inline void trace_insert_eval_map_file(struct module *mod,
5908 			      struct trace_eval_map **start, int len) { }
5909 #endif /* !CONFIG_TRACE_EVAL_MAP_FILE */
5910 
5911 static void trace_insert_eval_map(struct module *mod,
5912 				  struct trace_eval_map **start, int len)
5913 {
5914 	struct trace_eval_map **map;
5915 
5916 	if (len <= 0)
5917 		return;
5918 
5919 	map = start;
5920 
5921 	trace_event_eval_update(map, len);
5922 
5923 	trace_insert_eval_map_file(mod, start, len);
5924 }
5925 
5926 static ssize_t
5927 tracing_set_trace_read(struct file *filp, char __user *ubuf,
5928 		       size_t cnt, loff_t *ppos)
5929 {
5930 	struct trace_array *tr = filp->private_data;
5931 	char buf[MAX_TRACER_SIZE+2];
5932 	int r;
5933 
5934 	mutex_lock(&trace_types_lock);
5935 	r = sprintf(buf, "%s\n", tr->current_trace->name);
5936 	mutex_unlock(&trace_types_lock);
5937 
5938 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
5939 }
5940 
5941 int tracer_init(struct tracer *t, struct trace_array *tr)
5942 {
5943 	tracing_reset_online_cpus(&tr->array_buffer);
5944 	return t->init(tr);
5945 }
5946 
5947 static void set_buffer_entries(struct array_buffer *buf, unsigned long val)
5948 {
5949 	int cpu;
5950 
5951 	for_each_tracing_cpu(cpu)
5952 		per_cpu_ptr(buf->data, cpu)->entries = val;
5953 }
5954 
5955 static void update_buffer_entries(struct array_buffer *buf, int cpu)
5956 {
5957 	if (cpu == RING_BUFFER_ALL_CPUS) {
5958 		set_buffer_entries(buf, ring_buffer_size(buf->buffer, 0));
5959 	} else {
5960 		per_cpu_ptr(buf->data, cpu)->entries = ring_buffer_size(buf->buffer, cpu);
5961 	}
5962 }
5963 
5964 #ifdef CONFIG_TRACER_MAX_TRACE
5965 /* resize @tr's buffer to the size of @size_tr's entries */
5966 static int resize_buffer_duplicate_size(struct array_buffer *trace_buf,
5967 					struct array_buffer *size_buf, int cpu_id)
5968 {
5969 	int cpu, ret = 0;
5970 
5971 	if (cpu_id == RING_BUFFER_ALL_CPUS) {
5972 		for_each_tracing_cpu(cpu) {
5973 			ret = ring_buffer_resize(trace_buf->buffer,
5974 				 per_cpu_ptr(size_buf->data, cpu)->entries, cpu);
5975 			if (ret < 0)
5976 				break;
5977 			per_cpu_ptr(trace_buf->data, cpu)->entries =
5978 				per_cpu_ptr(size_buf->data, cpu)->entries;
5979 		}
5980 	} else {
5981 		ret = ring_buffer_resize(trace_buf->buffer,
5982 				 per_cpu_ptr(size_buf->data, cpu_id)->entries, cpu_id);
5983 		if (ret == 0)
5984 			per_cpu_ptr(trace_buf->data, cpu_id)->entries =
5985 				per_cpu_ptr(size_buf->data, cpu_id)->entries;
5986 	}
5987 
5988 	return ret;
5989 }
5990 #endif /* CONFIG_TRACER_MAX_TRACE */
5991 
5992 static int __tracing_resize_ring_buffer(struct trace_array *tr,
5993 					unsigned long size, int cpu)
5994 {
5995 	int ret;
5996 
5997 	/*
5998 	 * If kernel or user changes the size of the ring buffer
5999 	 * we use the size that was given, and we can forget about
6000 	 * expanding it later.
6001 	 */
6002 	trace_set_ring_buffer_expanded(tr);
6003 
6004 	/* May be called before buffers are initialized */
6005 	if (!tr->array_buffer.buffer)
6006 		return 0;
6007 
6008 	/* Do not allow tracing while resizing ring buffer */
6009 	tracing_stop_tr(tr);
6010 
6011 	ret = ring_buffer_resize(tr->array_buffer.buffer, size, cpu);
6012 	if (ret < 0)
6013 		goto out_start;
6014 
6015 #ifdef CONFIG_TRACER_MAX_TRACE
6016 	if (!tr->allocated_snapshot)
6017 		goto out;
6018 
6019 	ret = ring_buffer_resize(tr->max_buffer.buffer, size, cpu);
6020 	if (ret < 0) {
6021 		int r = resize_buffer_duplicate_size(&tr->array_buffer,
6022 						     &tr->array_buffer, cpu);
6023 		if (r < 0) {
6024 			/*
6025 			 * AARGH! We are left with different
6026 			 * size max buffer!!!!
6027 			 * The max buffer is our "snapshot" buffer.
6028 			 * When a tracer needs a snapshot (one of the
6029 			 * latency tracers), it swaps the max buffer
6030 			 * with the saved snap shot. We succeeded to
6031 			 * update the size of the main buffer, but failed to
6032 			 * update the size of the max buffer. But when we tried
6033 			 * to reset the main buffer to the original size, we
6034 			 * failed there too. This is very unlikely to
6035 			 * happen, but if it does, warn and kill all
6036 			 * tracing.
6037 			 */
6038 			WARN_ON(1);
6039 			tracing_disabled = 1;
6040 		}
6041 		goto out_start;
6042 	}
6043 
6044 	update_buffer_entries(&tr->max_buffer, cpu);
6045 
6046  out:
6047 #endif /* CONFIG_TRACER_MAX_TRACE */
6048 
6049 	update_buffer_entries(&tr->array_buffer, cpu);
6050  out_start:
6051 	tracing_start_tr(tr);
6052 	return ret;
6053 }
6054 
6055 ssize_t tracing_resize_ring_buffer(struct trace_array *tr,
6056 				  unsigned long size, int cpu_id)
6057 {
6058 	int ret;
6059 
6060 	mutex_lock(&trace_types_lock);
6061 
6062 	if (cpu_id != RING_BUFFER_ALL_CPUS) {
6063 		/* make sure, this cpu is enabled in the mask */
6064 		if (!cpumask_test_cpu(cpu_id, tracing_buffer_mask)) {
6065 			ret = -EINVAL;
6066 			goto out;
6067 		}
6068 	}
6069 
6070 	ret = __tracing_resize_ring_buffer(tr, size, cpu_id);
6071 	if (ret < 0)
6072 		ret = -ENOMEM;
6073 
6074 out:
6075 	mutex_unlock(&trace_types_lock);
6076 
6077 	return ret;
6078 }
6079 
6080 static void update_last_data(struct trace_array *tr)
6081 {
6082 	if (!tr->text_delta && !tr->data_delta)
6083 		return;
6084 
6085 	/* Clear old data */
6086 	tracing_reset_online_cpus(&tr->array_buffer);
6087 
6088 	/* Using current data now */
6089 	tr->text_delta = 0;
6090 	tr->data_delta = 0;
6091 }
6092 
6093 /**
6094  * tracing_update_buffers - used by tracing facility to expand ring buffers
6095  * @tr: The tracing instance
6096  *
6097  * To save on memory when the tracing is never used on a system with it
6098  * configured in. The ring buffers are set to a minimum size. But once
6099  * a user starts to use the tracing facility, then they need to grow
6100  * to their default size.
6101  *
6102  * This function is to be called when a tracer is about to be used.
6103  */
6104 int tracing_update_buffers(struct trace_array *tr)
6105 {
6106 	int ret = 0;
6107 
6108 	mutex_lock(&trace_types_lock);
6109 
6110 	update_last_data(tr);
6111 
6112 	if (!tr->ring_buffer_expanded)
6113 		ret = __tracing_resize_ring_buffer(tr, trace_buf_size,
6114 						RING_BUFFER_ALL_CPUS);
6115 	mutex_unlock(&trace_types_lock);
6116 
6117 	return ret;
6118 }
6119 
6120 struct trace_option_dentry;
6121 
6122 static void
6123 create_trace_option_files(struct trace_array *tr, struct tracer *tracer);
6124 
6125 /*
6126  * Used to clear out the tracer before deletion of an instance.
6127  * Must have trace_types_lock held.
6128  */
6129 static void tracing_set_nop(struct trace_array *tr)
6130 {
6131 	if (tr->current_trace == &nop_trace)
6132 		return;
6133 
6134 	tr->current_trace->enabled--;
6135 
6136 	if (tr->current_trace->reset)
6137 		tr->current_trace->reset(tr);
6138 
6139 	tr->current_trace = &nop_trace;
6140 }
6141 
6142 static bool tracer_options_updated;
6143 
6144 static void add_tracer_options(struct trace_array *tr, struct tracer *t)
6145 {
6146 	/* Only enable if the directory has been created already. */
6147 	if (!tr->dir)
6148 		return;
6149 
6150 	/* Only create trace option files after update_tracer_options finish */
6151 	if (!tracer_options_updated)
6152 		return;
6153 
6154 	create_trace_option_files(tr, t);
6155 }
6156 
6157 int tracing_set_tracer(struct trace_array *tr, const char *buf)
6158 {
6159 	struct tracer *t;
6160 #ifdef CONFIG_TRACER_MAX_TRACE
6161 	bool had_max_tr;
6162 #endif
6163 	int ret = 0;
6164 
6165 	mutex_lock(&trace_types_lock);
6166 
6167 	update_last_data(tr);
6168 
6169 	if (!tr->ring_buffer_expanded) {
6170 		ret = __tracing_resize_ring_buffer(tr, trace_buf_size,
6171 						RING_BUFFER_ALL_CPUS);
6172 		if (ret < 0)
6173 			goto out;
6174 		ret = 0;
6175 	}
6176 
6177 	for (t = trace_types; t; t = t->next) {
6178 		if (strcmp(t->name, buf) == 0)
6179 			break;
6180 	}
6181 	if (!t) {
6182 		ret = -EINVAL;
6183 		goto out;
6184 	}
6185 	if (t == tr->current_trace)
6186 		goto out;
6187 
6188 #ifdef CONFIG_TRACER_SNAPSHOT
6189 	if (t->use_max_tr) {
6190 		local_irq_disable();
6191 		arch_spin_lock(&tr->max_lock);
6192 		if (tr->cond_snapshot)
6193 			ret = -EBUSY;
6194 		arch_spin_unlock(&tr->max_lock);
6195 		local_irq_enable();
6196 		if (ret)
6197 			goto out;
6198 	}
6199 #endif
6200 	/* Some tracers won't work on kernel command line */
6201 	if (system_state < SYSTEM_RUNNING && t->noboot) {
6202 		pr_warn("Tracer '%s' is not allowed on command line, ignored\n",
6203 			t->name);
6204 		goto out;
6205 	}
6206 
6207 	/* Some tracers are only allowed for the top level buffer */
6208 	if (!trace_ok_for_array(t, tr)) {
6209 		ret = -EINVAL;
6210 		goto out;
6211 	}
6212 
6213 	/* If trace pipe files are being read, we can't change the tracer */
6214 	if (tr->trace_ref) {
6215 		ret = -EBUSY;
6216 		goto out;
6217 	}
6218 
6219 	trace_branch_disable();
6220 
6221 	tr->current_trace->enabled--;
6222 
6223 	if (tr->current_trace->reset)
6224 		tr->current_trace->reset(tr);
6225 
6226 #ifdef CONFIG_TRACER_MAX_TRACE
6227 	had_max_tr = tr->current_trace->use_max_tr;
6228 
6229 	/* Current trace needs to be nop_trace before synchronize_rcu */
6230 	tr->current_trace = &nop_trace;
6231 
6232 	if (had_max_tr && !t->use_max_tr) {
6233 		/*
6234 		 * We need to make sure that the update_max_tr sees that
6235 		 * current_trace changed to nop_trace to keep it from
6236 		 * swapping the buffers after we resize it.
6237 		 * The update_max_tr is called from interrupts disabled
6238 		 * so a synchronized_sched() is sufficient.
6239 		 */
6240 		synchronize_rcu();
6241 		free_snapshot(tr);
6242 		tracing_disarm_snapshot(tr);
6243 	}
6244 
6245 	if (!had_max_tr && t->use_max_tr) {
6246 		ret = tracing_arm_snapshot_locked(tr);
6247 		if (ret)
6248 			goto out;
6249 	}
6250 #else
6251 	tr->current_trace = &nop_trace;
6252 #endif
6253 
6254 	if (t->init) {
6255 		ret = tracer_init(t, tr);
6256 		if (ret) {
6257 #ifdef CONFIG_TRACER_MAX_TRACE
6258 			if (t->use_max_tr)
6259 				tracing_disarm_snapshot(tr);
6260 #endif
6261 			goto out;
6262 		}
6263 	}
6264 
6265 	tr->current_trace = t;
6266 	tr->current_trace->enabled++;
6267 	trace_branch_enable(tr);
6268  out:
6269 	mutex_unlock(&trace_types_lock);
6270 
6271 	return ret;
6272 }
6273 
6274 static ssize_t
6275 tracing_set_trace_write(struct file *filp, const char __user *ubuf,
6276 			size_t cnt, loff_t *ppos)
6277 {
6278 	struct trace_array *tr = filp->private_data;
6279 	char buf[MAX_TRACER_SIZE+1];
6280 	char *name;
6281 	size_t ret;
6282 	int err;
6283 
6284 	ret = cnt;
6285 
6286 	if (cnt > MAX_TRACER_SIZE)
6287 		cnt = MAX_TRACER_SIZE;
6288 
6289 	if (copy_from_user(buf, ubuf, cnt))
6290 		return -EFAULT;
6291 
6292 	buf[cnt] = 0;
6293 
6294 	name = strim(buf);
6295 
6296 	err = tracing_set_tracer(tr, name);
6297 	if (err)
6298 		return err;
6299 
6300 	*ppos += ret;
6301 
6302 	return ret;
6303 }
6304 
6305 static ssize_t
6306 tracing_nsecs_read(unsigned long *ptr, char __user *ubuf,
6307 		   size_t cnt, loff_t *ppos)
6308 {
6309 	char buf[64];
6310 	int r;
6311 
6312 	r = snprintf(buf, sizeof(buf), "%ld\n",
6313 		     *ptr == (unsigned long)-1 ? -1 : nsecs_to_usecs(*ptr));
6314 	if (r > sizeof(buf))
6315 		r = sizeof(buf);
6316 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6317 }
6318 
6319 static ssize_t
6320 tracing_nsecs_write(unsigned long *ptr, const char __user *ubuf,
6321 		    size_t cnt, loff_t *ppos)
6322 {
6323 	unsigned long val;
6324 	int ret;
6325 
6326 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
6327 	if (ret)
6328 		return ret;
6329 
6330 	*ptr = val * 1000;
6331 
6332 	return cnt;
6333 }
6334 
6335 static ssize_t
6336 tracing_thresh_read(struct file *filp, char __user *ubuf,
6337 		    size_t cnt, loff_t *ppos)
6338 {
6339 	return tracing_nsecs_read(&tracing_thresh, ubuf, cnt, ppos);
6340 }
6341 
6342 static ssize_t
6343 tracing_thresh_write(struct file *filp, const char __user *ubuf,
6344 		     size_t cnt, loff_t *ppos)
6345 {
6346 	struct trace_array *tr = filp->private_data;
6347 	int ret;
6348 
6349 	mutex_lock(&trace_types_lock);
6350 	ret = tracing_nsecs_write(&tracing_thresh, ubuf, cnt, ppos);
6351 	if (ret < 0)
6352 		goto out;
6353 
6354 	if (tr->current_trace->update_thresh) {
6355 		ret = tr->current_trace->update_thresh(tr);
6356 		if (ret < 0)
6357 			goto out;
6358 	}
6359 
6360 	ret = cnt;
6361 out:
6362 	mutex_unlock(&trace_types_lock);
6363 
6364 	return ret;
6365 }
6366 
6367 #ifdef CONFIG_TRACER_MAX_TRACE
6368 
6369 static ssize_t
6370 tracing_max_lat_read(struct file *filp, char __user *ubuf,
6371 		     size_t cnt, loff_t *ppos)
6372 {
6373 	struct trace_array *tr = filp->private_data;
6374 
6375 	return tracing_nsecs_read(&tr->max_latency, ubuf, cnt, ppos);
6376 }
6377 
6378 static ssize_t
6379 tracing_max_lat_write(struct file *filp, const char __user *ubuf,
6380 		      size_t cnt, loff_t *ppos)
6381 {
6382 	struct trace_array *tr = filp->private_data;
6383 
6384 	return tracing_nsecs_write(&tr->max_latency, ubuf, cnt, ppos);
6385 }
6386 
6387 #endif
6388 
6389 static int open_pipe_on_cpu(struct trace_array *tr, int cpu)
6390 {
6391 	if (cpu == RING_BUFFER_ALL_CPUS) {
6392 		if (cpumask_empty(tr->pipe_cpumask)) {
6393 			cpumask_setall(tr->pipe_cpumask);
6394 			return 0;
6395 		}
6396 	} else if (!cpumask_test_cpu(cpu, tr->pipe_cpumask)) {
6397 		cpumask_set_cpu(cpu, tr->pipe_cpumask);
6398 		return 0;
6399 	}
6400 	return -EBUSY;
6401 }
6402 
6403 static void close_pipe_on_cpu(struct trace_array *tr, int cpu)
6404 {
6405 	if (cpu == RING_BUFFER_ALL_CPUS) {
6406 		WARN_ON(!cpumask_full(tr->pipe_cpumask));
6407 		cpumask_clear(tr->pipe_cpumask);
6408 	} else {
6409 		WARN_ON(!cpumask_test_cpu(cpu, tr->pipe_cpumask));
6410 		cpumask_clear_cpu(cpu, tr->pipe_cpumask);
6411 	}
6412 }
6413 
6414 static int tracing_open_pipe(struct inode *inode, struct file *filp)
6415 {
6416 	struct trace_array *tr = inode->i_private;
6417 	struct trace_iterator *iter;
6418 	int cpu;
6419 	int ret;
6420 
6421 	ret = tracing_check_open_get_tr(tr);
6422 	if (ret)
6423 		return ret;
6424 
6425 	mutex_lock(&trace_types_lock);
6426 	cpu = tracing_get_cpu(inode);
6427 	ret = open_pipe_on_cpu(tr, cpu);
6428 	if (ret)
6429 		goto fail_pipe_on_cpu;
6430 
6431 	/* create a buffer to store the information to pass to userspace */
6432 	iter = kzalloc(sizeof(*iter), GFP_KERNEL);
6433 	if (!iter) {
6434 		ret = -ENOMEM;
6435 		goto fail_alloc_iter;
6436 	}
6437 
6438 	trace_seq_init(&iter->seq);
6439 	iter->trace = tr->current_trace;
6440 
6441 	if (!alloc_cpumask_var(&iter->started, GFP_KERNEL)) {
6442 		ret = -ENOMEM;
6443 		goto fail;
6444 	}
6445 
6446 	/* trace pipe does not show start of buffer */
6447 	cpumask_setall(iter->started);
6448 
6449 	if (tr->trace_flags & TRACE_ITER_LATENCY_FMT)
6450 		iter->iter_flags |= TRACE_FILE_LAT_FMT;
6451 
6452 	/* Output in nanoseconds only if we are using a clock in nanoseconds. */
6453 	if (trace_clocks[tr->clock_id].in_ns)
6454 		iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
6455 
6456 	iter->tr = tr;
6457 	iter->array_buffer = &tr->array_buffer;
6458 	iter->cpu_file = cpu;
6459 	mutex_init(&iter->mutex);
6460 	filp->private_data = iter;
6461 
6462 	if (iter->trace->pipe_open)
6463 		iter->trace->pipe_open(iter);
6464 
6465 	nonseekable_open(inode, filp);
6466 
6467 	tr->trace_ref++;
6468 
6469 	mutex_unlock(&trace_types_lock);
6470 	return ret;
6471 
6472 fail:
6473 	kfree(iter);
6474 fail_alloc_iter:
6475 	close_pipe_on_cpu(tr, cpu);
6476 fail_pipe_on_cpu:
6477 	__trace_array_put(tr);
6478 	mutex_unlock(&trace_types_lock);
6479 	return ret;
6480 }
6481 
6482 static int tracing_release_pipe(struct inode *inode, struct file *file)
6483 {
6484 	struct trace_iterator *iter = file->private_data;
6485 	struct trace_array *tr = inode->i_private;
6486 
6487 	mutex_lock(&trace_types_lock);
6488 
6489 	tr->trace_ref--;
6490 
6491 	if (iter->trace->pipe_close)
6492 		iter->trace->pipe_close(iter);
6493 	close_pipe_on_cpu(tr, iter->cpu_file);
6494 	mutex_unlock(&trace_types_lock);
6495 
6496 	free_trace_iter_content(iter);
6497 	kfree(iter);
6498 
6499 	trace_array_put(tr);
6500 
6501 	return 0;
6502 }
6503 
6504 static __poll_t
6505 trace_poll(struct trace_iterator *iter, struct file *filp, poll_table *poll_table)
6506 {
6507 	struct trace_array *tr = iter->tr;
6508 
6509 	/* Iterators are static, they should be filled or empty */
6510 	if (trace_buffer_iter(iter, iter->cpu_file))
6511 		return EPOLLIN | EPOLLRDNORM;
6512 
6513 	if (tr->trace_flags & TRACE_ITER_BLOCK)
6514 		/*
6515 		 * Always select as readable when in blocking mode
6516 		 */
6517 		return EPOLLIN | EPOLLRDNORM;
6518 	else
6519 		return ring_buffer_poll_wait(iter->array_buffer->buffer, iter->cpu_file,
6520 					     filp, poll_table, iter->tr->buffer_percent);
6521 }
6522 
6523 static __poll_t
6524 tracing_poll_pipe(struct file *filp, poll_table *poll_table)
6525 {
6526 	struct trace_iterator *iter = filp->private_data;
6527 
6528 	return trace_poll(iter, filp, poll_table);
6529 }
6530 
6531 /* Must be called with iter->mutex held. */
6532 static int tracing_wait_pipe(struct file *filp)
6533 {
6534 	struct trace_iterator *iter = filp->private_data;
6535 	int ret;
6536 
6537 	while (trace_empty(iter)) {
6538 
6539 		if ((filp->f_flags & O_NONBLOCK)) {
6540 			return -EAGAIN;
6541 		}
6542 
6543 		/*
6544 		 * We block until we read something and tracing is disabled.
6545 		 * We still block if tracing is disabled, but we have never
6546 		 * read anything. This allows a user to cat this file, and
6547 		 * then enable tracing. But after we have read something,
6548 		 * we give an EOF when tracing is again disabled.
6549 		 *
6550 		 * iter->pos will be 0 if we haven't read anything.
6551 		 */
6552 		if (!tracer_tracing_is_on(iter->tr) && iter->pos)
6553 			break;
6554 
6555 		mutex_unlock(&iter->mutex);
6556 
6557 		ret = wait_on_pipe(iter, 0);
6558 
6559 		mutex_lock(&iter->mutex);
6560 
6561 		if (ret)
6562 			return ret;
6563 	}
6564 
6565 	return 1;
6566 }
6567 
6568 /*
6569  * Consumer reader.
6570  */
6571 static ssize_t
6572 tracing_read_pipe(struct file *filp, char __user *ubuf,
6573 		  size_t cnt, loff_t *ppos)
6574 {
6575 	struct trace_iterator *iter = filp->private_data;
6576 	ssize_t sret;
6577 
6578 	/*
6579 	 * Avoid more than one consumer on a single file descriptor
6580 	 * This is just a matter of traces coherency, the ring buffer itself
6581 	 * is protected.
6582 	 */
6583 	mutex_lock(&iter->mutex);
6584 
6585 	/* return any leftover data */
6586 	sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
6587 	if (sret != -EBUSY)
6588 		goto out;
6589 
6590 	trace_seq_init(&iter->seq);
6591 
6592 	if (iter->trace->read) {
6593 		sret = iter->trace->read(iter, filp, ubuf, cnt, ppos);
6594 		if (sret)
6595 			goto out;
6596 	}
6597 
6598 waitagain:
6599 	sret = tracing_wait_pipe(filp);
6600 	if (sret <= 0)
6601 		goto out;
6602 
6603 	/* stop when tracing is finished */
6604 	if (trace_empty(iter)) {
6605 		sret = 0;
6606 		goto out;
6607 	}
6608 
6609 	if (cnt >= TRACE_SEQ_BUFFER_SIZE)
6610 		cnt = TRACE_SEQ_BUFFER_SIZE - 1;
6611 
6612 	/* reset all but tr, trace, and overruns */
6613 	trace_iterator_reset(iter);
6614 	cpumask_clear(iter->started);
6615 	trace_seq_init(&iter->seq);
6616 
6617 	trace_event_read_lock();
6618 	trace_access_lock(iter->cpu_file);
6619 	while (trace_find_next_entry_inc(iter) != NULL) {
6620 		enum print_line_t ret;
6621 		int save_len = iter->seq.seq.len;
6622 
6623 		ret = print_trace_line(iter);
6624 		if (ret == TRACE_TYPE_PARTIAL_LINE) {
6625 			/*
6626 			 * If one print_trace_line() fills entire trace_seq in one shot,
6627 			 * trace_seq_to_user() will returns -EBUSY because save_len == 0,
6628 			 * In this case, we need to consume it, otherwise, loop will peek
6629 			 * this event next time, resulting in an infinite loop.
6630 			 */
6631 			if (save_len == 0) {
6632 				iter->seq.full = 0;
6633 				trace_seq_puts(&iter->seq, "[LINE TOO BIG]\n");
6634 				trace_consume(iter);
6635 				break;
6636 			}
6637 
6638 			/* In other cases, don't print partial lines */
6639 			iter->seq.seq.len = save_len;
6640 			break;
6641 		}
6642 		if (ret != TRACE_TYPE_NO_CONSUME)
6643 			trace_consume(iter);
6644 
6645 		if (trace_seq_used(&iter->seq) >= cnt)
6646 			break;
6647 
6648 		/*
6649 		 * Setting the full flag means we reached the trace_seq buffer
6650 		 * size and we should leave by partial output condition above.
6651 		 * One of the trace_seq_* functions is not used properly.
6652 		 */
6653 		WARN_ONCE(iter->seq.full, "full flag set for trace type %d",
6654 			  iter->ent->type);
6655 	}
6656 	trace_access_unlock(iter->cpu_file);
6657 	trace_event_read_unlock();
6658 
6659 	/* Now copy what we have to the user */
6660 	sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
6661 	if (iter->seq.readpos >= trace_seq_used(&iter->seq))
6662 		trace_seq_init(&iter->seq);
6663 
6664 	/*
6665 	 * If there was nothing to send to user, in spite of consuming trace
6666 	 * entries, go back to wait for more entries.
6667 	 */
6668 	if (sret == -EBUSY)
6669 		goto waitagain;
6670 
6671 out:
6672 	mutex_unlock(&iter->mutex);
6673 
6674 	return sret;
6675 }
6676 
6677 static void tracing_spd_release_pipe(struct splice_pipe_desc *spd,
6678 				     unsigned int idx)
6679 {
6680 	__free_page(spd->pages[idx]);
6681 }
6682 
6683 static size_t
6684 tracing_fill_pipe_page(size_t rem, struct trace_iterator *iter)
6685 {
6686 	size_t count;
6687 	int save_len;
6688 	int ret;
6689 
6690 	/* Seq buffer is page-sized, exactly what we need. */
6691 	for (;;) {
6692 		save_len = iter->seq.seq.len;
6693 		ret = print_trace_line(iter);
6694 
6695 		if (trace_seq_has_overflowed(&iter->seq)) {
6696 			iter->seq.seq.len = save_len;
6697 			break;
6698 		}
6699 
6700 		/*
6701 		 * This should not be hit, because it should only
6702 		 * be set if the iter->seq overflowed. But check it
6703 		 * anyway to be safe.
6704 		 */
6705 		if (ret == TRACE_TYPE_PARTIAL_LINE) {
6706 			iter->seq.seq.len = save_len;
6707 			break;
6708 		}
6709 
6710 		count = trace_seq_used(&iter->seq) - save_len;
6711 		if (rem < count) {
6712 			rem = 0;
6713 			iter->seq.seq.len = save_len;
6714 			break;
6715 		}
6716 
6717 		if (ret != TRACE_TYPE_NO_CONSUME)
6718 			trace_consume(iter);
6719 		rem -= count;
6720 		if (!trace_find_next_entry_inc(iter))	{
6721 			rem = 0;
6722 			iter->ent = NULL;
6723 			break;
6724 		}
6725 	}
6726 
6727 	return rem;
6728 }
6729 
6730 static ssize_t tracing_splice_read_pipe(struct file *filp,
6731 					loff_t *ppos,
6732 					struct pipe_inode_info *pipe,
6733 					size_t len,
6734 					unsigned int flags)
6735 {
6736 	struct page *pages_def[PIPE_DEF_BUFFERS];
6737 	struct partial_page partial_def[PIPE_DEF_BUFFERS];
6738 	struct trace_iterator *iter = filp->private_data;
6739 	struct splice_pipe_desc spd = {
6740 		.pages		= pages_def,
6741 		.partial	= partial_def,
6742 		.nr_pages	= 0, /* This gets updated below. */
6743 		.nr_pages_max	= PIPE_DEF_BUFFERS,
6744 		.ops		= &default_pipe_buf_ops,
6745 		.spd_release	= tracing_spd_release_pipe,
6746 	};
6747 	ssize_t ret;
6748 	size_t rem;
6749 	unsigned int i;
6750 
6751 	if (splice_grow_spd(pipe, &spd))
6752 		return -ENOMEM;
6753 
6754 	mutex_lock(&iter->mutex);
6755 
6756 	if (iter->trace->splice_read) {
6757 		ret = iter->trace->splice_read(iter, filp,
6758 					       ppos, pipe, len, flags);
6759 		if (ret)
6760 			goto out_err;
6761 	}
6762 
6763 	ret = tracing_wait_pipe(filp);
6764 	if (ret <= 0)
6765 		goto out_err;
6766 
6767 	if (!iter->ent && !trace_find_next_entry_inc(iter)) {
6768 		ret = -EFAULT;
6769 		goto out_err;
6770 	}
6771 
6772 	trace_event_read_lock();
6773 	trace_access_lock(iter->cpu_file);
6774 
6775 	/* Fill as many pages as possible. */
6776 	for (i = 0, rem = len; i < spd.nr_pages_max && rem; i++) {
6777 		spd.pages[i] = alloc_page(GFP_KERNEL);
6778 		if (!spd.pages[i])
6779 			break;
6780 
6781 		rem = tracing_fill_pipe_page(rem, iter);
6782 
6783 		/* Copy the data into the page, so we can start over. */
6784 		ret = trace_seq_to_buffer(&iter->seq,
6785 					  page_address(spd.pages[i]),
6786 					  trace_seq_used(&iter->seq));
6787 		if (ret < 0) {
6788 			__free_page(spd.pages[i]);
6789 			break;
6790 		}
6791 		spd.partial[i].offset = 0;
6792 		spd.partial[i].len = trace_seq_used(&iter->seq);
6793 
6794 		trace_seq_init(&iter->seq);
6795 	}
6796 
6797 	trace_access_unlock(iter->cpu_file);
6798 	trace_event_read_unlock();
6799 	mutex_unlock(&iter->mutex);
6800 
6801 	spd.nr_pages = i;
6802 
6803 	if (i)
6804 		ret = splice_to_pipe(pipe, &spd);
6805 	else
6806 		ret = 0;
6807 out:
6808 	splice_shrink_spd(&spd);
6809 	return ret;
6810 
6811 out_err:
6812 	mutex_unlock(&iter->mutex);
6813 	goto out;
6814 }
6815 
6816 static ssize_t
6817 tracing_entries_read(struct file *filp, char __user *ubuf,
6818 		     size_t cnt, loff_t *ppos)
6819 {
6820 	struct inode *inode = file_inode(filp);
6821 	struct trace_array *tr = inode->i_private;
6822 	int cpu = tracing_get_cpu(inode);
6823 	char buf[64];
6824 	int r = 0;
6825 	ssize_t ret;
6826 
6827 	mutex_lock(&trace_types_lock);
6828 
6829 	if (cpu == RING_BUFFER_ALL_CPUS) {
6830 		int cpu, buf_size_same;
6831 		unsigned long size;
6832 
6833 		size = 0;
6834 		buf_size_same = 1;
6835 		/* check if all cpu sizes are same */
6836 		for_each_tracing_cpu(cpu) {
6837 			/* fill in the size from first enabled cpu */
6838 			if (size == 0)
6839 				size = per_cpu_ptr(tr->array_buffer.data, cpu)->entries;
6840 			if (size != per_cpu_ptr(tr->array_buffer.data, cpu)->entries) {
6841 				buf_size_same = 0;
6842 				break;
6843 			}
6844 		}
6845 
6846 		if (buf_size_same) {
6847 			if (!tr->ring_buffer_expanded)
6848 				r = sprintf(buf, "%lu (expanded: %lu)\n",
6849 					    size >> 10,
6850 					    trace_buf_size >> 10);
6851 			else
6852 				r = sprintf(buf, "%lu\n", size >> 10);
6853 		} else
6854 			r = sprintf(buf, "X\n");
6855 	} else
6856 		r = sprintf(buf, "%lu\n", per_cpu_ptr(tr->array_buffer.data, cpu)->entries >> 10);
6857 
6858 	mutex_unlock(&trace_types_lock);
6859 
6860 	ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6861 	return ret;
6862 }
6863 
6864 static ssize_t
6865 tracing_entries_write(struct file *filp, const char __user *ubuf,
6866 		      size_t cnt, loff_t *ppos)
6867 {
6868 	struct inode *inode = file_inode(filp);
6869 	struct trace_array *tr = inode->i_private;
6870 	unsigned long val;
6871 	int ret;
6872 
6873 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
6874 	if (ret)
6875 		return ret;
6876 
6877 	/* must have at least 1 entry */
6878 	if (!val)
6879 		return -EINVAL;
6880 
6881 	/* value is in KB */
6882 	val <<= 10;
6883 	ret = tracing_resize_ring_buffer(tr, val, tracing_get_cpu(inode));
6884 	if (ret < 0)
6885 		return ret;
6886 
6887 	*ppos += cnt;
6888 
6889 	return cnt;
6890 }
6891 
6892 static ssize_t
6893 tracing_total_entries_read(struct file *filp, char __user *ubuf,
6894 				size_t cnt, loff_t *ppos)
6895 {
6896 	struct trace_array *tr = filp->private_data;
6897 	char buf[64];
6898 	int r, cpu;
6899 	unsigned long size = 0, expanded_size = 0;
6900 
6901 	mutex_lock(&trace_types_lock);
6902 	for_each_tracing_cpu(cpu) {
6903 		size += per_cpu_ptr(tr->array_buffer.data, cpu)->entries >> 10;
6904 		if (!tr->ring_buffer_expanded)
6905 			expanded_size += trace_buf_size >> 10;
6906 	}
6907 	if (tr->ring_buffer_expanded)
6908 		r = sprintf(buf, "%lu\n", size);
6909 	else
6910 		r = sprintf(buf, "%lu (expanded: %lu)\n", size, expanded_size);
6911 	mutex_unlock(&trace_types_lock);
6912 
6913 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6914 }
6915 
6916 static ssize_t
6917 tracing_last_boot_read(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos)
6918 {
6919 	struct trace_array *tr = filp->private_data;
6920 	struct seq_buf seq;
6921 	char buf[64];
6922 
6923 	seq_buf_init(&seq, buf, 64);
6924 
6925 	seq_buf_printf(&seq, "text delta:\t%ld\n", tr->text_delta);
6926 	seq_buf_printf(&seq, "data delta:\t%ld\n", tr->data_delta);
6927 
6928 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, seq_buf_used(&seq));
6929 }
6930 
6931 static int tracing_buffer_meta_open(struct inode *inode, struct file *filp)
6932 {
6933 	struct trace_array *tr = inode->i_private;
6934 	int cpu = tracing_get_cpu(inode);
6935 	int ret;
6936 
6937 	ret = tracing_check_open_get_tr(tr);
6938 	if (ret)
6939 		return ret;
6940 
6941 	ret = ring_buffer_meta_seq_init(filp, tr->array_buffer.buffer, cpu);
6942 	if (ret < 0)
6943 		__trace_array_put(tr);
6944 	return ret;
6945 }
6946 
6947 static ssize_t
6948 tracing_free_buffer_write(struct file *filp, const char __user *ubuf,
6949 			  size_t cnt, loff_t *ppos)
6950 {
6951 	/*
6952 	 * There is no need to read what the user has written, this function
6953 	 * is just to make sure that there is no error when "echo" is used
6954 	 */
6955 
6956 	*ppos += cnt;
6957 
6958 	return cnt;
6959 }
6960 
6961 static int
6962 tracing_free_buffer_release(struct inode *inode, struct file *filp)
6963 {
6964 	struct trace_array *tr = inode->i_private;
6965 
6966 	/* disable tracing ? */
6967 	if (tr->trace_flags & TRACE_ITER_STOP_ON_FREE)
6968 		tracer_tracing_off(tr);
6969 	/* resize the ring buffer to 0 */
6970 	tracing_resize_ring_buffer(tr, 0, RING_BUFFER_ALL_CPUS);
6971 
6972 	trace_array_put(tr);
6973 
6974 	return 0;
6975 }
6976 
6977 #define TRACE_MARKER_MAX_SIZE		4096
6978 
6979 static ssize_t
6980 tracing_mark_write(struct file *filp, const char __user *ubuf,
6981 					size_t cnt, loff_t *fpos)
6982 {
6983 	struct trace_array *tr = filp->private_data;
6984 	struct ring_buffer_event *event;
6985 	enum event_trigger_type tt = ETT_NONE;
6986 	struct trace_buffer *buffer;
6987 	struct print_entry *entry;
6988 	int meta_size;
6989 	ssize_t written;
6990 	size_t size;
6991 	int len;
6992 
6993 /* Used in tracing_mark_raw_write() as well */
6994 #define FAULTED_STR "<faulted>"
6995 #define FAULTED_SIZE (sizeof(FAULTED_STR) - 1) /* '\0' is already accounted for */
6996 
6997 	if (tracing_disabled)
6998 		return -EINVAL;
6999 
7000 	if (!(tr->trace_flags & TRACE_ITER_MARKERS))
7001 		return -EINVAL;
7002 
7003 	if ((ssize_t)cnt < 0)
7004 		return -EINVAL;
7005 
7006 	if (cnt > TRACE_MARKER_MAX_SIZE)
7007 		cnt = TRACE_MARKER_MAX_SIZE;
7008 
7009 	meta_size = sizeof(*entry) + 2;  /* add '\0' and possible '\n' */
7010  again:
7011 	size = cnt + meta_size;
7012 
7013 	/* If less than "<faulted>", then make sure we can still add that */
7014 	if (cnt < FAULTED_SIZE)
7015 		size += FAULTED_SIZE - cnt;
7016 
7017 	buffer = tr->array_buffer.buffer;
7018 	event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
7019 					    tracing_gen_ctx());
7020 	if (unlikely(!event)) {
7021 		/*
7022 		 * If the size was greater than what was allowed, then
7023 		 * make it smaller and try again.
7024 		 */
7025 		if (size > ring_buffer_max_event_size(buffer)) {
7026 			/* cnt < FAULTED size should never be bigger than max */
7027 			if (WARN_ON_ONCE(cnt < FAULTED_SIZE))
7028 				return -EBADF;
7029 			cnt = ring_buffer_max_event_size(buffer) - meta_size;
7030 			/* The above should only happen once */
7031 			if (WARN_ON_ONCE(cnt + meta_size == size))
7032 				return -EBADF;
7033 			goto again;
7034 		}
7035 
7036 		/* Ring buffer disabled, return as if not open for write */
7037 		return -EBADF;
7038 	}
7039 
7040 	entry = ring_buffer_event_data(event);
7041 	entry->ip = _THIS_IP_;
7042 
7043 	len = __copy_from_user_inatomic(&entry->buf, ubuf, cnt);
7044 	if (len) {
7045 		memcpy(&entry->buf, FAULTED_STR, FAULTED_SIZE);
7046 		cnt = FAULTED_SIZE;
7047 		written = -EFAULT;
7048 	} else
7049 		written = cnt;
7050 
7051 	if (tr->trace_marker_file && !list_empty(&tr->trace_marker_file->triggers)) {
7052 		/* do not add \n before testing triggers, but add \0 */
7053 		entry->buf[cnt] = '\0';
7054 		tt = event_triggers_call(tr->trace_marker_file, buffer, entry, event);
7055 	}
7056 
7057 	if (entry->buf[cnt - 1] != '\n') {
7058 		entry->buf[cnt] = '\n';
7059 		entry->buf[cnt + 1] = '\0';
7060 	} else
7061 		entry->buf[cnt] = '\0';
7062 
7063 	if (static_branch_unlikely(&trace_marker_exports_enabled))
7064 		ftrace_exports(event, TRACE_EXPORT_MARKER);
7065 	__buffer_unlock_commit(buffer, event);
7066 
7067 	if (tt)
7068 		event_triggers_post_call(tr->trace_marker_file, tt);
7069 
7070 	return written;
7071 }
7072 
7073 static ssize_t
7074 tracing_mark_raw_write(struct file *filp, const char __user *ubuf,
7075 					size_t cnt, loff_t *fpos)
7076 {
7077 	struct trace_array *tr = filp->private_data;
7078 	struct ring_buffer_event *event;
7079 	struct trace_buffer *buffer;
7080 	struct raw_data_entry *entry;
7081 	ssize_t written;
7082 	int size;
7083 	int len;
7084 
7085 #define FAULT_SIZE_ID (FAULTED_SIZE + sizeof(int))
7086 
7087 	if (tracing_disabled)
7088 		return -EINVAL;
7089 
7090 	if (!(tr->trace_flags & TRACE_ITER_MARKERS))
7091 		return -EINVAL;
7092 
7093 	/* The marker must at least have a tag id */
7094 	if (cnt < sizeof(unsigned int))
7095 		return -EINVAL;
7096 
7097 	size = sizeof(*entry) + cnt;
7098 	if (cnt < FAULT_SIZE_ID)
7099 		size += FAULT_SIZE_ID - cnt;
7100 
7101 	buffer = tr->array_buffer.buffer;
7102 
7103 	if (size > ring_buffer_max_event_size(buffer))
7104 		return -EINVAL;
7105 
7106 	event = __trace_buffer_lock_reserve(buffer, TRACE_RAW_DATA, size,
7107 					    tracing_gen_ctx());
7108 	if (!event)
7109 		/* Ring buffer disabled, return as if not open for write */
7110 		return -EBADF;
7111 
7112 	entry = ring_buffer_event_data(event);
7113 
7114 	len = __copy_from_user_inatomic(&entry->id, ubuf, cnt);
7115 	if (len) {
7116 		entry->id = -1;
7117 		memcpy(&entry->buf, FAULTED_STR, FAULTED_SIZE);
7118 		written = -EFAULT;
7119 	} else
7120 		written = cnt;
7121 
7122 	__buffer_unlock_commit(buffer, event);
7123 
7124 	return written;
7125 }
7126 
7127 static int tracing_clock_show(struct seq_file *m, void *v)
7128 {
7129 	struct trace_array *tr = m->private;
7130 	int i;
7131 
7132 	for (i = 0; i < ARRAY_SIZE(trace_clocks); i++)
7133 		seq_printf(m,
7134 			"%s%s%s%s", i ? " " : "",
7135 			i == tr->clock_id ? "[" : "", trace_clocks[i].name,
7136 			i == tr->clock_id ? "]" : "");
7137 	seq_putc(m, '\n');
7138 
7139 	return 0;
7140 }
7141 
7142 int tracing_set_clock(struct trace_array *tr, const char *clockstr)
7143 {
7144 	int i;
7145 
7146 	for (i = 0; i < ARRAY_SIZE(trace_clocks); i++) {
7147 		if (strcmp(trace_clocks[i].name, clockstr) == 0)
7148 			break;
7149 	}
7150 	if (i == ARRAY_SIZE(trace_clocks))
7151 		return -EINVAL;
7152 
7153 	mutex_lock(&trace_types_lock);
7154 
7155 	tr->clock_id = i;
7156 
7157 	ring_buffer_set_clock(tr->array_buffer.buffer, trace_clocks[i].func);
7158 
7159 	/*
7160 	 * New clock may not be consistent with the previous clock.
7161 	 * Reset the buffer so that it doesn't have incomparable timestamps.
7162 	 */
7163 	tracing_reset_online_cpus(&tr->array_buffer);
7164 
7165 #ifdef CONFIG_TRACER_MAX_TRACE
7166 	if (tr->max_buffer.buffer)
7167 		ring_buffer_set_clock(tr->max_buffer.buffer, trace_clocks[i].func);
7168 	tracing_reset_online_cpus(&tr->max_buffer);
7169 #endif
7170 
7171 	mutex_unlock(&trace_types_lock);
7172 
7173 	return 0;
7174 }
7175 
7176 static ssize_t tracing_clock_write(struct file *filp, const char __user *ubuf,
7177 				   size_t cnt, loff_t *fpos)
7178 {
7179 	struct seq_file *m = filp->private_data;
7180 	struct trace_array *tr = m->private;
7181 	char buf[64];
7182 	const char *clockstr;
7183 	int ret;
7184 
7185 	if (cnt >= sizeof(buf))
7186 		return -EINVAL;
7187 
7188 	if (copy_from_user(buf, ubuf, cnt))
7189 		return -EFAULT;
7190 
7191 	buf[cnt] = 0;
7192 
7193 	clockstr = strstrip(buf);
7194 
7195 	ret = tracing_set_clock(tr, clockstr);
7196 	if (ret)
7197 		return ret;
7198 
7199 	*fpos += cnt;
7200 
7201 	return cnt;
7202 }
7203 
7204 static int tracing_clock_open(struct inode *inode, struct file *file)
7205 {
7206 	struct trace_array *tr = inode->i_private;
7207 	int ret;
7208 
7209 	ret = tracing_check_open_get_tr(tr);
7210 	if (ret)
7211 		return ret;
7212 
7213 	ret = single_open(file, tracing_clock_show, inode->i_private);
7214 	if (ret < 0)
7215 		trace_array_put(tr);
7216 
7217 	return ret;
7218 }
7219 
7220 static int tracing_time_stamp_mode_show(struct seq_file *m, void *v)
7221 {
7222 	struct trace_array *tr = m->private;
7223 
7224 	mutex_lock(&trace_types_lock);
7225 
7226 	if (ring_buffer_time_stamp_abs(tr->array_buffer.buffer))
7227 		seq_puts(m, "delta [absolute]\n");
7228 	else
7229 		seq_puts(m, "[delta] absolute\n");
7230 
7231 	mutex_unlock(&trace_types_lock);
7232 
7233 	return 0;
7234 }
7235 
7236 static int tracing_time_stamp_mode_open(struct inode *inode, struct file *file)
7237 {
7238 	struct trace_array *tr = inode->i_private;
7239 	int ret;
7240 
7241 	ret = tracing_check_open_get_tr(tr);
7242 	if (ret)
7243 		return ret;
7244 
7245 	ret = single_open(file, tracing_time_stamp_mode_show, inode->i_private);
7246 	if (ret < 0)
7247 		trace_array_put(tr);
7248 
7249 	return ret;
7250 }
7251 
7252 u64 tracing_event_time_stamp(struct trace_buffer *buffer, struct ring_buffer_event *rbe)
7253 {
7254 	if (rbe == this_cpu_read(trace_buffered_event))
7255 		return ring_buffer_time_stamp(buffer);
7256 
7257 	return ring_buffer_event_time_stamp(buffer, rbe);
7258 }
7259 
7260 /*
7261  * Set or disable using the per CPU trace_buffer_event when possible.
7262  */
7263 int tracing_set_filter_buffering(struct trace_array *tr, bool set)
7264 {
7265 	int ret = 0;
7266 
7267 	mutex_lock(&trace_types_lock);
7268 
7269 	if (set && tr->no_filter_buffering_ref++)
7270 		goto out;
7271 
7272 	if (!set) {
7273 		if (WARN_ON_ONCE(!tr->no_filter_buffering_ref)) {
7274 			ret = -EINVAL;
7275 			goto out;
7276 		}
7277 
7278 		--tr->no_filter_buffering_ref;
7279 	}
7280  out:
7281 	mutex_unlock(&trace_types_lock);
7282 
7283 	return ret;
7284 }
7285 
7286 struct ftrace_buffer_info {
7287 	struct trace_iterator	iter;
7288 	void			*spare;
7289 	unsigned int		spare_cpu;
7290 	unsigned int		spare_size;
7291 	unsigned int		read;
7292 };
7293 
7294 #ifdef CONFIG_TRACER_SNAPSHOT
7295 static int tracing_snapshot_open(struct inode *inode, struct file *file)
7296 {
7297 	struct trace_array *tr = inode->i_private;
7298 	struct trace_iterator *iter;
7299 	struct seq_file *m;
7300 	int ret;
7301 
7302 	ret = tracing_check_open_get_tr(tr);
7303 	if (ret)
7304 		return ret;
7305 
7306 	if (file->f_mode & FMODE_READ) {
7307 		iter = __tracing_open(inode, file, true);
7308 		if (IS_ERR(iter))
7309 			ret = PTR_ERR(iter);
7310 	} else {
7311 		/* Writes still need the seq_file to hold the private data */
7312 		ret = -ENOMEM;
7313 		m = kzalloc(sizeof(*m), GFP_KERNEL);
7314 		if (!m)
7315 			goto out;
7316 		iter = kzalloc(sizeof(*iter), GFP_KERNEL);
7317 		if (!iter) {
7318 			kfree(m);
7319 			goto out;
7320 		}
7321 		ret = 0;
7322 
7323 		iter->tr = tr;
7324 		iter->array_buffer = &tr->max_buffer;
7325 		iter->cpu_file = tracing_get_cpu(inode);
7326 		m->private = iter;
7327 		file->private_data = m;
7328 	}
7329 out:
7330 	if (ret < 0)
7331 		trace_array_put(tr);
7332 
7333 	return ret;
7334 }
7335 
7336 static void tracing_swap_cpu_buffer(void *tr)
7337 {
7338 	update_max_tr_single((struct trace_array *)tr, current, smp_processor_id());
7339 }
7340 
7341 static ssize_t
7342 tracing_snapshot_write(struct file *filp, const char __user *ubuf, size_t cnt,
7343 		       loff_t *ppos)
7344 {
7345 	struct seq_file *m = filp->private_data;
7346 	struct trace_iterator *iter = m->private;
7347 	struct trace_array *tr = iter->tr;
7348 	unsigned long val;
7349 	int ret;
7350 
7351 	ret = tracing_update_buffers(tr);
7352 	if (ret < 0)
7353 		return ret;
7354 
7355 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
7356 	if (ret)
7357 		return ret;
7358 
7359 	mutex_lock(&trace_types_lock);
7360 
7361 	if (tr->current_trace->use_max_tr) {
7362 		ret = -EBUSY;
7363 		goto out;
7364 	}
7365 
7366 	local_irq_disable();
7367 	arch_spin_lock(&tr->max_lock);
7368 	if (tr->cond_snapshot)
7369 		ret = -EBUSY;
7370 	arch_spin_unlock(&tr->max_lock);
7371 	local_irq_enable();
7372 	if (ret)
7373 		goto out;
7374 
7375 	switch (val) {
7376 	case 0:
7377 		if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
7378 			ret = -EINVAL;
7379 			break;
7380 		}
7381 		if (tr->allocated_snapshot)
7382 			free_snapshot(tr);
7383 		break;
7384 	case 1:
7385 /* Only allow per-cpu swap if the ring buffer supports it */
7386 #ifndef CONFIG_RING_BUFFER_ALLOW_SWAP
7387 		if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
7388 			ret = -EINVAL;
7389 			break;
7390 		}
7391 #endif
7392 		if (tr->allocated_snapshot)
7393 			ret = resize_buffer_duplicate_size(&tr->max_buffer,
7394 					&tr->array_buffer, iter->cpu_file);
7395 
7396 		ret = tracing_arm_snapshot_locked(tr);
7397 		if (ret)
7398 			break;
7399 
7400 		/* Now, we're going to swap */
7401 		if (iter->cpu_file == RING_BUFFER_ALL_CPUS) {
7402 			local_irq_disable();
7403 			update_max_tr(tr, current, smp_processor_id(), NULL);
7404 			local_irq_enable();
7405 		} else {
7406 			smp_call_function_single(iter->cpu_file, tracing_swap_cpu_buffer,
7407 						 (void *)tr, 1);
7408 		}
7409 		tracing_disarm_snapshot(tr);
7410 		break;
7411 	default:
7412 		if (tr->allocated_snapshot) {
7413 			if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
7414 				tracing_reset_online_cpus(&tr->max_buffer);
7415 			else
7416 				tracing_reset_cpu(&tr->max_buffer, iter->cpu_file);
7417 		}
7418 		break;
7419 	}
7420 
7421 	if (ret >= 0) {
7422 		*ppos += cnt;
7423 		ret = cnt;
7424 	}
7425 out:
7426 	mutex_unlock(&trace_types_lock);
7427 	return ret;
7428 }
7429 
7430 static int tracing_snapshot_release(struct inode *inode, struct file *file)
7431 {
7432 	struct seq_file *m = file->private_data;
7433 	int ret;
7434 
7435 	ret = tracing_release(inode, file);
7436 
7437 	if (file->f_mode & FMODE_READ)
7438 		return ret;
7439 
7440 	/* If write only, the seq_file is just a stub */
7441 	if (m)
7442 		kfree(m->private);
7443 	kfree(m);
7444 
7445 	return 0;
7446 }
7447 
7448 static int tracing_buffers_open(struct inode *inode, struct file *filp);
7449 static ssize_t tracing_buffers_read(struct file *filp, char __user *ubuf,
7450 				    size_t count, loff_t *ppos);
7451 static int tracing_buffers_release(struct inode *inode, struct file *file);
7452 static ssize_t tracing_buffers_splice_read(struct file *file, loff_t *ppos,
7453 		   struct pipe_inode_info *pipe, size_t len, unsigned int flags);
7454 
7455 static int snapshot_raw_open(struct inode *inode, struct file *filp)
7456 {
7457 	struct ftrace_buffer_info *info;
7458 	int ret;
7459 
7460 	/* The following checks for tracefs lockdown */
7461 	ret = tracing_buffers_open(inode, filp);
7462 	if (ret < 0)
7463 		return ret;
7464 
7465 	info = filp->private_data;
7466 
7467 	if (info->iter.trace->use_max_tr) {
7468 		tracing_buffers_release(inode, filp);
7469 		return -EBUSY;
7470 	}
7471 
7472 	info->iter.snapshot = true;
7473 	info->iter.array_buffer = &info->iter.tr->max_buffer;
7474 
7475 	return ret;
7476 }
7477 
7478 #endif /* CONFIG_TRACER_SNAPSHOT */
7479 
7480 
7481 static const struct file_operations tracing_thresh_fops = {
7482 	.open		= tracing_open_generic,
7483 	.read		= tracing_thresh_read,
7484 	.write		= tracing_thresh_write,
7485 	.llseek		= generic_file_llseek,
7486 };
7487 
7488 #ifdef CONFIG_TRACER_MAX_TRACE
7489 static const struct file_operations tracing_max_lat_fops = {
7490 	.open		= tracing_open_generic_tr,
7491 	.read		= tracing_max_lat_read,
7492 	.write		= tracing_max_lat_write,
7493 	.llseek		= generic_file_llseek,
7494 	.release	= tracing_release_generic_tr,
7495 };
7496 #endif
7497 
7498 static const struct file_operations set_tracer_fops = {
7499 	.open		= tracing_open_generic_tr,
7500 	.read		= tracing_set_trace_read,
7501 	.write		= tracing_set_trace_write,
7502 	.llseek		= generic_file_llseek,
7503 	.release	= tracing_release_generic_tr,
7504 };
7505 
7506 static const struct file_operations tracing_pipe_fops = {
7507 	.open		= tracing_open_pipe,
7508 	.poll		= tracing_poll_pipe,
7509 	.read		= tracing_read_pipe,
7510 	.splice_read	= tracing_splice_read_pipe,
7511 	.release	= tracing_release_pipe,
7512 	.llseek		= no_llseek,
7513 };
7514 
7515 static const struct file_operations tracing_entries_fops = {
7516 	.open		= tracing_open_generic_tr,
7517 	.read		= tracing_entries_read,
7518 	.write		= tracing_entries_write,
7519 	.llseek		= generic_file_llseek,
7520 	.release	= tracing_release_generic_tr,
7521 };
7522 
7523 static const struct file_operations tracing_buffer_meta_fops = {
7524 	.open		= tracing_buffer_meta_open,
7525 	.read		= seq_read,
7526 	.llseek		= seq_lseek,
7527 	.release	= tracing_seq_release,
7528 };
7529 
7530 static const struct file_operations tracing_total_entries_fops = {
7531 	.open		= tracing_open_generic_tr,
7532 	.read		= tracing_total_entries_read,
7533 	.llseek		= generic_file_llseek,
7534 	.release	= tracing_release_generic_tr,
7535 };
7536 
7537 static const struct file_operations tracing_free_buffer_fops = {
7538 	.open		= tracing_open_generic_tr,
7539 	.write		= tracing_free_buffer_write,
7540 	.release	= tracing_free_buffer_release,
7541 };
7542 
7543 static const struct file_operations tracing_mark_fops = {
7544 	.open		= tracing_mark_open,
7545 	.write		= tracing_mark_write,
7546 	.release	= tracing_release_generic_tr,
7547 };
7548 
7549 static const struct file_operations tracing_mark_raw_fops = {
7550 	.open		= tracing_mark_open,
7551 	.write		= tracing_mark_raw_write,
7552 	.release	= tracing_release_generic_tr,
7553 };
7554 
7555 static const struct file_operations trace_clock_fops = {
7556 	.open		= tracing_clock_open,
7557 	.read		= seq_read,
7558 	.llseek		= seq_lseek,
7559 	.release	= tracing_single_release_tr,
7560 	.write		= tracing_clock_write,
7561 };
7562 
7563 static const struct file_operations trace_time_stamp_mode_fops = {
7564 	.open		= tracing_time_stamp_mode_open,
7565 	.read		= seq_read,
7566 	.llseek		= seq_lseek,
7567 	.release	= tracing_single_release_tr,
7568 };
7569 
7570 static const struct file_operations last_boot_fops = {
7571 	.open		= tracing_open_generic_tr,
7572 	.read		= tracing_last_boot_read,
7573 	.llseek		= generic_file_llseek,
7574 	.release	= tracing_release_generic_tr,
7575 };
7576 
7577 #ifdef CONFIG_TRACER_SNAPSHOT
7578 static const struct file_operations snapshot_fops = {
7579 	.open		= tracing_snapshot_open,
7580 	.read		= seq_read,
7581 	.write		= tracing_snapshot_write,
7582 	.llseek		= tracing_lseek,
7583 	.release	= tracing_snapshot_release,
7584 };
7585 
7586 static const struct file_operations snapshot_raw_fops = {
7587 	.open		= snapshot_raw_open,
7588 	.read		= tracing_buffers_read,
7589 	.release	= tracing_buffers_release,
7590 	.splice_read	= tracing_buffers_splice_read,
7591 	.llseek		= no_llseek,
7592 };
7593 
7594 #endif /* CONFIG_TRACER_SNAPSHOT */
7595 
7596 /*
7597  * trace_min_max_write - Write a u64 value to a trace_min_max_param struct
7598  * @filp: The active open file structure
7599  * @ubuf: The userspace provided buffer to read value into
7600  * @cnt: The maximum number of bytes to read
7601  * @ppos: The current "file" position
7602  *
7603  * This function implements the write interface for a struct trace_min_max_param.
7604  * The filp->private_data must point to a trace_min_max_param structure that
7605  * defines where to write the value, the min and the max acceptable values,
7606  * and a lock to protect the write.
7607  */
7608 static ssize_t
7609 trace_min_max_write(struct file *filp, const char __user *ubuf, size_t cnt, loff_t *ppos)
7610 {
7611 	struct trace_min_max_param *param = filp->private_data;
7612 	u64 val;
7613 	int err;
7614 
7615 	if (!param)
7616 		return -EFAULT;
7617 
7618 	err = kstrtoull_from_user(ubuf, cnt, 10, &val);
7619 	if (err)
7620 		return err;
7621 
7622 	if (param->lock)
7623 		mutex_lock(param->lock);
7624 
7625 	if (param->min && val < *param->min)
7626 		err = -EINVAL;
7627 
7628 	if (param->max && val > *param->max)
7629 		err = -EINVAL;
7630 
7631 	if (!err)
7632 		*param->val = val;
7633 
7634 	if (param->lock)
7635 		mutex_unlock(param->lock);
7636 
7637 	if (err)
7638 		return err;
7639 
7640 	return cnt;
7641 }
7642 
7643 /*
7644  * trace_min_max_read - Read a u64 value from a trace_min_max_param struct
7645  * @filp: The active open file structure
7646  * @ubuf: The userspace provided buffer to read value into
7647  * @cnt: The maximum number of bytes to read
7648  * @ppos: The current "file" position
7649  *
7650  * This function implements the read interface for a struct trace_min_max_param.
7651  * The filp->private_data must point to a trace_min_max_param struct with valid
7652  * data.
7653  */
7654 static ssize_t
7655 trace_min_max_read(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos)
7656 {
7657 	struct trace_min_max_param *param = filp->private_data;
7658 	char buf[U64_STR_SIZE];
7659 	int len;
7660 	u64 val;
7661 
7662 	if (!param)
7663 		return -EFAULT;
7664 
7665 	val = *param->val;
7666 
7667 	if (cnt > sizeof(buf))
7668 		cnt = sizeof(buf);
7669 
7670 	len = snprintf(buf, sizeof(buf), "%llu\n", val);
7671 
7672 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, len);
7673 }
7674 
7675 const struct file_operations trace_min_max_fops = {
7676 	.open		= tracing_open_generic,
7677 	.read		= trace_min_max_read,
7678 	.write		= trace_min_max_write,
7679 };
7680 
7681 #define TRACING_LOG_ERRS_MAX	8
7682 #define TRACING_LOG_LOC_MAX	128
7683 
7684 #define CMD_PREFIX "  Command: "
7685 
7686 struct err_info {
7687 	const char	**errs;	/* ptr to loc-specific array of err strings */
7688 	u8		type;	/* index into errs -> specific err string */
7689 	u16		pos;	/* caret position */
7690 	u64		ts;
7691 };
7692 
7693 struct tracing_log_err {
7694 	struct list_head	list;
7695 	struct err_info		info;
7696 	char			loc[TRACING_LOG_LOC_MAX]; /* err location */
7697 	char			*cmd;                     /* what caused err */
7698 };
7699 
7700 static DEFINE_MUTEX(tracing_err_log_lock);
7701 
7702 static struct tracing_log_err *alloc_tracing_log_err(int len)
7703 {
7704 	struct tracing_log_err *err;
7705 
7706 	err = kzalloc(sizeof(*err), GFP_KERNEL);
7707 	if (!err)
7708 		return ERR_PTR(-ENOMEM);
7709 
7710 	err->cmd = kzalloc(len, GFP_KERNEL);
7711 	if (!err->cmd) {
7712 		kfree(err);
7713 		return ERR_PTR(-ENOMEM);
7714 	}
7715 
7716 	return err;
7717 }
7718 
7719 static void free_tracing_log_err(struct tracing_log_err *err)
7720 {
7721 	kfree(err->cmd);
7722 	kfree(err);
7723 }
7724 
7725 static struct tracing_log_err *get_tracing_log_err(struct trace_array *tr,
7726 						   int len)
7727 {
7728 	struct tracing_log_err *err;
7729 	char *cmd;
7730 
7731 	if (tr->n_err_log_entries < TRACING_LOG_ERRS_MAX) {
7732 		err = alloc_tracing_log_err(len);
7733 		if (PTR_ERR(err) != -ENOMEM)
7734 			tr->n_err_log_entries++;
7735 
7736 		return err;
7737 	}
7738 	cmd = kzalloc(len, GFP_KERNEL);
7739 	if (!cmd)
7740 		return ERR_PTR(-ENOMEM);
7741 	err = list_first_entry(&tr->err_log, struct tracing_log_err, list);
7742 	kfree(err->cmd);
7743 	err->cmd = cmd;
7744 	list_del(&err->list);
7745 
7746 	return err;
7747 }
7748 
7749 /**
7750  * err_pos - find the position of a string within a command for error careting
7751  * @cmd: The tracing command that caused the error
7752  * @str: The string to position the caret at within @cmd
7753  *
7754  * Finds the position of the first occurrence of @str within @cmd.  The
7755  * return value can be passed to tracing_log_err() for caret placement
7756  * within @cmd.
7757  *
7758  * Returns the index within @cmd of the first occurrence of @str or 0
7759  * if @str was not found.
7760  */
7761 unsigned int err_pos(char *cmd, const char *str)
7762 {
7763 	char *found;
7764 
7765 	if (WARN_ON(!strlen(cmd)))
7766 		return 0;
7767 
7768 	found = strstr(cmd, str);
7769 	if (found)
7770 		return found - cmd;
7771 
7772 	return 0;
7773 }
7774 
7775 /**
7776  * tracing_log_err - write an error to the tracing error log
7777  * @tr: The associated trace array for the error (NULL for top level array)
7778  * @loc: A string describing where the error occurred
7779  * @cmd: The tracing command that caused the error
7780  * @errs: The array of loc-specific static error strings
7781  * @type: The index into errs[], which produces the specific static err string
7782  * @pos: The position the caret should be placed in the cmd
7783  *
7784  * Writes an error into tracing/error_log of the form:
7785  *
7786  * <loc>: error: <text>
7787  *   Command: <cmd>
7788  *              ^
7789  *
7790  * tracing/error_log is a small log file containing the last
7791  * TRACING_LOG_ERRS_MAX errors (8).  Memory for errors isn't allocated
7792  * unless there has been a tracing error, and the error log can be
7793  * cleared and have its memory freed by writing the empty string in
7794  * truncation mode to it i.e. echo > tracing/error_log.
7795  *
7796  * NOTE: the @errs array along with the @type param are used to
7797  * produce a static error string - this string is not copied and saved
7798  * when the error is logged - only a pointer to it is saved.  See
7799  * existing callers for examples of how static strings are typically
7800  * defined for use with tracing_log_err().
7801  */
7802 void tracing_log_err(struct trace_array *tr,
7803 		     const char *loc, const char *cmd,
7804 		     const char **errs, u8 type, u16 pos)
7805 {
7806 	struct tracing_log_err *err;
7807 	int len = 0;
7808 
7809 	if (!tr)
7810 		tr = &global_trace;
7811 
7812 	len += sizeof(CMD_PREFIX) + 2 * sizeof("\n") + strlen(cmd) + 1;
7813 
7814 	mutex_lock(&tracing_err_log_lock);
7815 	err = get_tracing_log_err(tr, len);
7816 	if (PTR_ERR(err) == -ENOMEM) {
7817 		mutex_unlock(&tracing_err_log_lock);
7818 		return;
7819 	}
7820 
7821 	snprintf(err->loc, TRACING_LOG_LOC_MAX, "%s: error: ", loc);
7822 	snprintf(err->cmd, len, "\n" CMD_PREFIX "%s\n", cmd);
7823 
7824 	err->info.errs = errs;
7825 	err->info.type = type;
7826 	err->info.pos = pos;
7827 	err->info.ts = local_clock();
7828 
7829 	list_add_tail(&err->list, &tr->err_log);
7830 	mutex_unlock(&tracing_err_log_lock);
7831 }
7832 
7833 static void clear_tracing_err_log(struct trace_array *tr)
7834 {
7835 	struct tracing_log_err *err, *next;
7836 
7837 	mutex_lock(&tracing_err_log_lock);
7838 	list_for_each_entry_safe(err, next, &tr->err_log, list) {
7839 		list_del(&err->list);
7840 		free_tracing_log_err(err);
7841 	}
7842 
7843 	tr->n_err_log_entries = 0;
7844 	mutex_unlock(&tracing_err_log_lock);
7845 }
7846 
7847 static void *tracing_err_log_seq_start(struct seq_file *m, loff_t *pos)
7848 {
7849 	struct trace_array *tr = m->private;
7850 
7851 	mutex_lock(&tracing_err_log_lock);
7852 
7853 	return seq_list_start(&tr->err_log, *pos);
7854 }
7855 
7856 static void *tracing_err_log_seq_next(struct seq_file *m, void *v, loff_t *pos)
7857 {
7858 	struct trace_array *tr = m->private;
7859 
7860 	return seq_list_next(v, &tr->err_log, pos);
7861 }
7862 
7863 static void tracing_err_log_seq_stop(struct seq_file *m, void *v)
7864 {
7865 	mutex_unlock(&tracing_err_log_lock);
7866 }
7867 
7868 static void tracing_err_log_show_pos(struct seq_file *m, u16 pos)
7869 {
7870 	u16 i;
7871 
7872 	for (i = 0; i < sizeof(CMD_PREFIX) - 1; i++)
7873 		seq_putc(m, ' ');
7874 	for (i = 0; i < pos; i++)
7875 		seq_putc(m, ' ');
7876 	seq_puts(m, "^\n");
7877 }
7878 
7879 static int tracing_err_log_seq_show(struct seq_file *m, void *v)
7880 {
7881 	struct tracing_log_err *err = v;
7882 
7883 	if (err) {
7884 		const char *err_text = err->info.errs[err->info.type];
7885 		u64 sec = err->info.ts;
7886 		u32 nsec;
7887 
7888 		nsec = do_div(sec, NSEC_PER_SEC);
7889 		seq_printf(m, "[%5llu.%06u] %s%s", sec, nsec / 1000,
7890 			   err->loc, err_text);
7891 		seq_printf(m, "%s", err->cmd);
7892 		tracing_err_log_show_pos(m, err->info.pos);
7893 	}
7894 
7895 	return 0;
7896 }
7897 
7898 static const struct seq_operations tracing_err_log_seq_ops = {
7899 	.start  = tracing_err_log_seq_start,
7900 	.next   = tracing_err_log_seq_next,
7901 	.stop   = tracing_err_log_seq_stop,
7902 	.show   = tracing_err_log_seq_show
7903 };
7904 
7905 static int tracing_err_log_open(struct inode *inode, struct file *file)
7906 {
7907 	struct trace_array *tr = inode->i_private;
7908 	int ret = 0;
7909 
7910 	ret = tracing_check_open_get_tr(tr);
7911 	if (ret)
7912 		return ret;
7913 
7914 	/* If this file was opened for write, then erase contents */
7915 	if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC))
7916 		clear_tracing_err_log(tr);
7917 
7918 	if (file->f_mode & FMODE_READ) {
7919 		ret = seq_open(file, &tracing_err_log_seq_ops);
7920 		if (!ret) {
7921 			struct seq_file *m = file->private_data;
7922 			m->private = tr;
7923 		} else {
7924 			trace_array_put(tr);
7925 		}
7926 	}
7927 	return ret;
7928 }
7929 
7930 static ssize_t tracing_err_log_write(struct file *file,
7931 				     const char __user *buffer,
7932 				     size_t count, loff_t *ppos)
7933 {
7934 	return count;
7935 }
7936 
7937 static int tracing_err_log_release(struct inode *inode, struct file *file)
7938 {
7939 	struct trace_array *tr = inode->i_private;
7940 
7941 	trace_array_put(tr);
7942 
7943 	if (file->f_mode & FMODE_READ)
7944 		seq_release(inode, file);
7945 
7946 	return 0;
7947 }
7948 
7949 static const struct file_operations tracing_err_log_fops = {
7950 	.open           = tracing_err_log_open,
7951 	.write		= tracing_err_log_write,
7952 	.read           = seq_read,
7953 	.llseek         = tracing_lseek,
7954 	.release        = tracing_err_log_release,
7955 };
7956 
7957 static int tracing_buffers_open(struct inode *inode, struct file *filp)
7958 {
7959 	struct trace_array *tr = inode->i_private;
7960 	struct ftrace_buffer_info *info;
7961 	int ret;
7962 
7963 	ret = tracing_check_open_get_tr(tr);
7964 	if (ret)
7965 		return ret;
7966 
7967 	info = kvzalloc(sizeof(*info), GFP_KERNEL);
7968 	if (!info) {
7969 		trace_array_put(tr);
7970 		return -ENOMEM;
7971 	}
7972 
7973 	mutex_lock(&trace_types_lock);
7974 
7975 	info->iter.tr		= tr;
7976 	info->iter.cpu_file	= tracing_get_cpu(inode);
7977 	info->iter.trace	= tr->current_trace;
7978 	info->iter.array_buffer = &tr->array_buffer;
7979 	info->spare		= NULL;
7980 	/* Force reading ring buffer for first read */
7981 	info->read		= (unsigned int)-1;
7982 
7983 	filp->private_data = info;
7984 
7985 	tr->trace_ref++;
7986 
7987 	mutex_unlock(&trace_types_lock);
7988 
7989 	ret = nonseekable_open(inode, filp);
7990 	if (ret < 0)
7991 		trace_array_put(tr);
7992 
7993 	return ret;
7994 }
7995 
7996 static __poll_t
7997 tracing_buffers_poll(struct file *filp, poll_table *poll_table)
7998 {
7999 	struct ftrace_buffer_info *info = filp->private_data;
8000 	struct trace_iterator *iter = &info->iter;
8001 
8002 	return trace_poll(iter, filp, poll_table);
8003 }
8004 
8005 static ssize_t
8006 tracing_buffers_read(struct file *filp, char __user *ubuf,
8007 		     size_t count, loff_t *ppos)
8008 {
8009 	struct ftrace_buffer_info *info = filp->private_data;
8010 	struct trace_iterator *iter = &info->iter;
8011 	void *trace_data;
8012 	int page_size;
8013 	ssize_t ret = 0;
8014 	ssize_t size;
8015 
8016 	if (!count)
8017 		return 0;
8018 
8019 #ifdef CONFIG_TRACER_MAX_TRACE
8020 	if (iter->snapshot && iter->tr->current_trace->use_max_tr)
8021 		return -EBUSY;
8022 #endif
8023 
8024 	page_size = ring_buffer_subbuf_size_get(iter->array_buffer->buffer);
8025 
8026 	/* Make sure the spare matches the current sub buffer size */
8027 	if (info->spare) {
8028 		if (page_size != info->spare_size) {
8029 			ring_buffer_free_read_page(iter->array_buffer->buffer,
8030 						   info->spare_cpu, info->spare);
8031 			info->spare = NULL;
8032 		}
8033 	}
8034 
8035 	if (!info->spare) {
8036 		info->spare = ring_buffer_alloc_read_page(iter->array_buffer->buffer,
8037 							  iter->cpu_file);
8038 		if (IS_ERR(info->spare)) {
8039 			ret = PTR_ERR(info->spare);
8040 			info->spare = NULL;
8041 		} else {
8042 			info->spare_cpu = iter->cpu_file;
8043 			info->spare_size = page_size;
8044 		}
8045 	}
8046 	if (!info->spare)
8047 		return ret;
8048 
8049 	/* Do we have previous read data to read? */
8050 	if (info->read < page_size)
8051 		goto read;
8052 
8053  again:
8054 	trace_access_lock(iter->cpu_file);
8055 	ret = ring_buffer_read_page(iter->array_buffer->buffer,
8056 				    info->spare,
8057 				    count,
8058 				    iter->cpu_file, 0);
8059 	trace_access_unlock(iter->cpu_file);
8060 
8061 	if (ret < 0) {
8062 		if (trace_empty(iter)) {
8063 			if ((filp->f_flags & O_NONBLOCK))
8064 				return -EAGAIN;
8065 
8066 			ret = wait_on_pipe(iter, 0);
8067 			if (ret)
8068 				return ret;
8069 
8070 			goto again;
8071 		}
8072 		return 0;
8073 	}
8074 
8075 	info->read = 0;
8076  read:
8077 	size = page_size - info->read;
8078 	if (size > count)
8079 		size = count;
8080 	trace_data = ring_buffer_read_page_data(info->spare);
8081 	ret = copy_to_user(ubuf, trace_data + info->read, size);
8082 	if (ret == size)
8083 		return -EFAULT;
8084 
8085 	size -= ret;
8086 
8087 	*ppos += size;
8088 	info->read += size;
8089 
8090 	return size;
8091 }
8092 
8093 static int tracing_buffers_flush(struct file *file, fl_owner_t id)
8094 {
8095 	struct ftrace_buffer_info *info = file->private_data;
8096 	struct trace_iterator *iter = &info->iter;
8097 
8098 	iter->closed = true;
8099 	/* Make sure the waiters see the new wait_index */
8100 	(void)atomic_fetch_inc_release(&iter->wait_index);
8101 
8102 	ring_buffer_wake_waiters(iter->array_buffer->buffer, iter->cpu_file);
8103 
8104 	return 0;
8105 }
8106 
8107 static int tracing_buffers_release(struct inode *inode, struct file *file)
8108 {
8109 	struct ftrace_buffer_info *info = file->private_data;
8110 	struct trace_iterator *iter = &info->iter;
8111 
8112 	mutex_lock(&trace_types_lock);
8113 
8114 	iter->tr->trace_ref--;
8115 
8116 	__trace_array_put(iter->tr);
8117 
8118 	if (info->spare)
8119 		ring_buffer_free_read_page(iter->array_buffer->buffer,
8120 					   info->spare_cpu, info->spare);
8121 	kvfree(info);
8122 
8123 	mutex_unlock(&trace_types_lock);
8124 
8125 	return 0;
8126 }
8127 
8128 struct buffer_ref {
8129 	struct trace_buffer	*buffer;
8130 	void			*page;
8131 	int			cpu;
8132 	refcount_t		refcount;
8133 };
8134 
8135 static void buffer_ref_release(struct buffer_ref *ref)
8136 {
8137 	if (!refcount_dec_and_test(&ref->refcount))
8138 		return;
8139 	ring_buffer_free_read_page(ref->buffer, ref->cpu, ref->page);
8140 	kfree(ref);
8141 }
8142 
8143 static void buffer_pipe_buf_release(struct pipe_inode_info *pipe,
8144 				    struct pipe_buffer *buf)
8145 {
8146 	struct buffer_ref *ref = (struct buffer_ref *)buf->private;
8147 
8148 	buffer_ref_release(ref);
8149 	buf->private = 0;
8150 }
8151 
8152 static bool buffer_pipe_buf_get(struct pipe_inode_info *pipe,
8153 				struct pipe_buffer *buf)
8154 {
8155 	struct buffer_ref *ref = (struct buffer_ref *)buf->private;
8156 
8157 	if (refcount_read(&ref->refcount) > INT_MAX/2)
8158 		return false;
8159 
8160 	refcount_inc(&ref->refcount);
8161 	return true;
8162 }
8163 
8164 /* Pipe buffer operations for a buffer. */
8165 static const struct pipe_buf_operations buffer_pipe_buf_ops = {
8166 	.release		= buffer_pipe_buf_release,
8167 	.get			= buffer_pipe_buf_get,
8168 };
8169 
8170 /*
8171  * Callback from splice_to_pipe(), if we need to release some pages
8172  * at the end of the spd in case we error'ed out in filling the pipe.
8173  */
8174 static void buffer_spd_release(struct splice_pipe_desc *spd, unsigned int i)
8175 {
8176 	struct buffer_ref *ref =
8177 		(struct buffer_ref *)spd->partial[i].private;
8178 
8179 	buffer_ref_release(ref);
8180 	spd->partial[i].private = 0;
8181 }
8182 
8183 static ssize_t
8184 tracing_buffers_splice_read(struct file *file, loff_t *ppos,
8185 			    struct pipe_inode_info *pipe, size_t len,
8186 			    unsigned int flags)
8187 {
8188 	struct ftrace_buffer_info *info = file->private_data;
8189 	struct trace_iterator *iter = &info->iter;
8190 	struct partial_page partial_def[PIPE_DEF_BUFFERS];
8191 	struct page *pages_def[PIPE_DEF_BUFFERS];
8192 	struct splice_pipe_desc spd = {
8193 		.pages		= pages_def,
8194 		.partial	= partial_def,
8195 		.nr_pages_max	= PIPE_DEF_BUFFERS,
8196 		.ops		= &buffer_pipe_buf_ops,
8197 		.spd_release	= buffer_spd_release,
8198 	};
8199 	struct buffer_ref *ref;
8200 	bool woken = false;
8201 	int page_size;
8202 	int entries, i;
8203 	ssize_t ret = 0;
8204 
8205 #ifdef CONFIG_TRACER_MAX_TRACE
8206 	if (iter->snapshot && iter->tr->current_trace->use_max_tr)
8207 		return -EBUSY;
8208 #endif
8209 
8210 	page_size = ring_buffer_subbuf_size_get(iter->array_buffer->buffer);
8211 	if (*ppos & (page_size - 1))
8212 		return -EINVAL;
8213 
8214 	if (len & (page_size - 1)) {
8215 		if (len < page_size)
8216 			return -EINVAL;
8217 		len &= (~(page_size - 1));
8218 	}
8219 
8220 	if (splice_grow_spd(pipe, &spd))
8221 		return -ENOMEM;
8222 
8223  again:
8224 	trace_access_lock(iter->cpu_file);
8225 	entries = ring_buffer_entries_cpu(iter->array_buffer->buffer, iter->cpu_file);
8226 
8227 	for (i = 0; i < spd.nr_pages_max && len && entries; i++, len -= page_size) {
8228 		struct page *page;
8229 		int r;
8230 
8231 		ref = kzalloc(sizeof(*ref), GFP_KERNEL);
8232 		if (!ref) {
8233 			ret = -ENOMEM;
8234 			break;
8235 		}
8236 
8237 		refcount_set(&ref->refcount, 1);
8238 		ref->buffer = iter->array_buffer->buffer;
8239 		ref->page = ring_buffer_alloc_read_page(ref->buffer, iter->cpu_file);
8240 		if (IS_ERR(ref->page)) {
8241 			ret = PTR_ERR(ref->page);
8242 			ref->page = NULL;
8243 			kfree(ref);
8244 			break;
8245 		}
8246 		ref->cpu = iter->cpu_file;
8247 
8248 		r = ring_buffer_read_page(ref->buffer, ref->page,
8249 					  len, iter->cpu_file, 1);
8250 		if (r < 0) {
8251 			ring_buffer_free_read_page(ref->buffer, ref->cpu,
8252 						   ref->page);
8253 			kfree(ref);
8254 			break;
8255 		}
8256 
8257 		page = virt_to_page(ring_buffer_read_page_data(ref->page));
8258 
8259 		spd.pages[i] = page;
8260 		spd.partial[i].len = page_size;
8261 		spd.partial[i].offset = 0;
8262 		spd.partial[i].private = (unsigned long)ref;
8263 		spd.nr_pages++;
8264 		*ppos += page_size;
8265 
8266 		entries = ring_buffer_entries_cpu(iter->array_buffer->buffer, iter->cpu_file);
8267 	}
8268 
8269 	trace_access_unlock(iter->cpu_file);
8270 	spd.nr_pages = i;
8271 
8272 	/* did we read anything? */
8273 	if (!spd.nr_pages) {
8274 
8275 		if (ret)
8276 			goto out;
8277 
8278 		if (woken)
8279 			goto out;
8280 
8281 		ret = -EAGAIN;
8282 		if ((file->f_flags & O_NONBLOCK) || (flags & SPLICE_F_NONBLOCK))
8283 			goto out;
8284 
8285 		ret = wait_on_pipe(iter, iter->snapshot ? 0 : iter->tr->buffer_percent);
8286 		if (ret)
8287 			goto out;
8288 
8289 		/* No need to wait after waking up when tracing is off */
8290 		if (!tracer_tracing_is_on(iter->tr))
8291 			goto out;
8292 
8293 		/* Iterate one more time to collect any new data then exit */
8294 		woken = true;
8295 
8296 		goto again;
8297 	}
8298 
8299 	ret = splice_to_pipe(pipe, &spd);
8300 out:
8301 	splice_shrink_spd(&spd);
8302 
8303 	return ret;
8304 }
8305 
8306 static long tracing_buffers_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
8307 {
8308 	struct ftrace_buffer_info *info = file->private_data;
8309 	struct trace_iterator *iter = &info->iter;
8310 	int err;
8311 
8312 	if (cmd == TRACE_MMAP_IOCTL_GET_READER) {
8313 		if (!(file->f_flags & O_NONBLOCK)) {
8314 			err = ring_buffer_wait(iter->array_buffer->buffer,
8315 					       iter->cpu_file,
8316 					       iter->tr->buffer_percent,
8317 					       NULL, NULL);
8318 			if (err)
8319 				return err;
8320 		}
8321 
8322 		return ring_buffer_map_get_reader(iter->array_buffer->buffer,
8323 						  iter->cpu_file);
8324 	} else if (cmd) {
8325 		return -ENOTTY;
8326 	}
8327 
8328 	/*
8329 	 * An ioctl call with cmd 0 to the ring buffer file will wake up all
8330 	 * waiters
8331 	 */
8332 	mutex_lock(&trace_types_lock);
8333 
8334 	/* Make sure the waiters see the new wait_index */
8335 	(void)atomic_fetch_inc_release(&iter->wait_index);
8336 
8337 	ring_buffer_wake_waiters(iter->array_buffer->buffer, iter->cpu_file);
8338 
8339 	mutex_unlock(&trace_types_lock);
8340 	return 0;
8341 }
8342 
8343 #ifdef CONFIG_TRACER_MAX_TRACE
8344 static int get_snapshot_map(struct trace_array *tr)
8345 {
8346 	int err = 0;
8347 
8348 	/*
8349 	 * Called with mmap_lock held. lockdep would be unhappy if we would now
8350 	 * take trace_types_lock. Instead use the specific
8351 	 * snapshot_trigger_lock.
8352 	 */
8353 	spin_lock(&tr->snapshot_trigger_lock);
8354 
8355 	if (tr->snapshot || tr->mapped == UINT_MAX)
8356 		err = -EBUSY;
8357 	else
8358 		tr->mapped++;
8359 
8360 	spin_unlock(&tr->snapshot_trigger_lock);
8361 
8362 	/* Wait for update_max_tr() to observe iter->tr->mapped */
8363 	if (tr->mapped == 1)
8364 		synchronize_rcu();
8365 
8366 	return err;
8367 
8368 }
8369 static void put_snapshot_map(struct trace_array *tr)
8370 {
8371 	spin_lock(&tr->snapshot_trigger_lock);
8372 	if (!WARN_ON(!tr->mapped))
8373 		tr->mapped--;
8374 	spin_unlock(&tr->snapshot_trigger_lock);
8375 }
8376 #else
8377 static inline int get_snapshot_map(struct trace_array *tr) { return 0; }
8378 static inline void put_snapshot_map(struct trace_array *tr) { }
8379 #endif
8380 
8381 static void tracing_buffers_mmap_close(struct vm_area_struct *vma)
8382 {
8383 	struct ftrace_buffer_info *info = vma->vm_file->private_data;
8384 	struct trace_iterator *iter = &info->iter;
8385 
8386 	WARN_ON(ring_buffer_unmap(iter->array_buffer->buffer, iter->cpu_file));
8387 	put_snapshot_map(iter->tr);
8388 }
8389 
8390 static const struct vm_operations_struct tracing_buffers_vmops = {
8391 	.close		= tracing_buffers_mmap_close,
8392 };
8393 
8394 static int tracing_buffers_mmap(struct file *filp, struct vm_area_struct *vma)
8395 {
8396 	struct ftrace_buffer_info *info = filp->private_data;
8397 	struct trace_iterator *iter = &info->iter;
8398 	int ret = 0;
8399 
8400 	ret = get_snapshot_map(iter->tr);
8401 	if (ret)
8402 		return ret;
8403 
8404 	ret = ring_buffer_map(iter->array_buffer->buffer, iter->cpu_file, vma);
8405 	if (ret)
8406 		put_snapshot_map(iter->tr);
8407 
8408 	vma->vm_ops = &tracing_buffers_vmops;
8409 
8410 	return ret;
8411 }
8412 
8413 static const struct file_operations tracing_buffers_fops = {
8414 	.open		= tracing_buffers_open,
8415 	.read		= tracing_buffers_read,
8416 	.poll		= tracing_buffers_poll,
8417 	.release	= tracing_buffers_release,
8418 	.flush		= tracing_buffers_flush,
8419 	.splice_read	= tracing_buffers_splice_read,
8420 	.unlocked_ioctl = tracing_buffers_ioctl,
8421 	.llseek		= no_llseek,
8422 	.mmap		= tracing_buffers_mmap,
8423 };
8424 
8425 static ssize_t
8426 tracing_stats_read(struct file *filp, char __user *ubuf,
8427 		   size_t count, loff_t *ppos)
8428 {
8429 	struct inode *inode = file_inode(filp);
8430 	struct trace_array *tr = inode->i_private;
8431 	struct array_buffer *trace_buf = &tr->array_buffer;
8432 	int cpu = tracing_get_cpu(inode);
8433 	struct trace_seq *s;
8434 	unsigned long cnt;
8435 	unsigned long long t;
8436 	unsigned long usec_rem;
8437 
8438 	s = kmalloc(sizeof(*s), GFP_KERNEL);
8439 	if (!s)
8440 		return -ENOMEM;
8441 
8442 	trace_seq_init(s);
8443 
8444 	cnt = ring_buffer_entries_cpu(trace_buf->buffer, cpu);
8445 	trace_seq_printf(s, "entries: %ld\n", cnt);
8446 
8447 	cnt = ring_buffer_overrun_cpu(trace_buf->buffer, cpu);
8448 	trace_seq_printf(s, "overrun: %ld\n", cnt);
8449 
8450 	cnt = ring_buffer_commit_overrun_cpu(trace_buf->buffer, cpu);
8451 	trace_seq_printf(s, "commit overrun: %ld\n", cnt);
8452 
8453 	cnt = ring_buffer_bytes_cpu(trace_buf->buffer, cpu);
8454 	trace_seq_printf(s, "bytes: %ld\n", cnt);
8455 
8456 	if (trace_clocks[tr->clock_id].in_ns) {
8457 		/* local or global for trace_clock */
8458 		t = ns2usecs(ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
8459 		usec_rem = do_div(t, USEC_PER_SEC);
8460 		trace_seq_printf(s, "oldest event ts: %5llu.%06lu\n",
8461 								t, usec_rem);
8462 
8463 		t = ns2usecs(ring_buffer_time_stamp(trace_buf->buffer));
8464 		usec_rem = do_div(t, USEC_PER_SEC);
8465 		trace_seq_printf(s, "now ts: %5llu.%06lu\n", t, usec_rem);
8466 	} else {
8467 		/* counter or tsc mode for trace_clock */
8468 		trace_seq_printf(s, "oldest event ts: %llu\n",
8469 				ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
8470 
8471 		trace_seq_printf(s, "now ts: %llu\n",
8472 				ring_buffer_time_stamp(trace_buf->buffer));
8473 	}
8474 
8475 	cnt = ring_buffer_dropped_events_cpu(trace_buf->buffer, cpu);
8476 	trace_seq_printf(s, "dropped events: %ld\n", cnt);
8477 
8478 	cnt = ring_buffer_read_events_cpu(trace_buf->buffer, cpu);
8479 	trace_seq_printf(s, "read events: %ld\n", cnt);
8480 
8481 	count = simple_read_from_buffer(ubuf, count, ppos,
8482 					s->buffer, trace_seq_used(s));
8483 
8484 	kfree(s);
8485 
8486 	return count;
8487 }
8488 
8489 static const struct file_operations tracing_stats_fops = {
8490 	.open		= tracing_open_generic_tr,
8491 	.read		= tracing_stats_read,
8492 	.llseek		= generic_file_llseek,
8493 	.release	= tracing_release_generic_tr,
8494 };
8495 
8496 #ifdef CONFIG_DYNAMIC_FTRACE
8497 
8498 static ssize_t
8499 tracing_read_dyn_info(struct file *filp, char __user *ubuf,
8500 		  size_t cnt, loff_t *ppos)
8501 {
8502 	ssize_t ret;
8503 	char *buf;
8504 	int r;
8505 
8506 	/* 256 should be plenty to hold the amount needed */
8507 	buf = kmalloc(256, GFP_KERNEL);
8508 	if (!buf)
8509 		return -ENOMEM;
8510 
8511 	r = scnprintf(buf, 256, "%ld pages:%ld groups: %ld\n",
8512 		      ftrace_update_tot_cnt,
8513 		      ftrace_number_of_pages,
8514 		      ftrace_number_of_groups);
8515 
8516 	ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
8517 	kfree(buf);
8518 	return ret;
8519 }
8520 
8521 static const struct file_operations tracing_dyn_info_fops = {
8522 	.open		= tracing_open_generic,
8523 	.read		= tracing_read_dyn_info,
8524 	.llseek		= generic_file_llseek,
8525 };
8526 #endif /* CONFIG_DYNAMIC_FTRACE */
8527 
8528 #if defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE)
8529 static void
8530 ftrace_snapshot(unsigned long ip, unsigned long parent_ip,
8531 		struct trace_array *tr, struct ftrace_probe_ops *ops,
8532 		void *data)
8533 {
8534 	tracing_snapshot_instance(tr);
8535 }
8536 
8537 static void
8538 ftrace_count_snapshot(unsigned long ip, unsigned long parent_ip,
8539 		      struct trace_array *tr, struct ftrace_probe_ops *ops,
8540 		      void *data)
8541 {
8542 	struct ftrace_func_mapper *mapper = data;
8543 	long *count = NULL;
8544 
8545 	if (mapper)
8546 		count = (long *)ftrace_func_mapper_find_ip(mapper, ip);
8547 
8548 	if (count) {
8549 
8550 		if (*count <= 0)
8551 			return;
8552 
8553 		(*count)--;
8554 	}
8555 
8556 	tracing_snapshot_instance(tr);
8557 }
8558 
8559 static int
8560 ftrace_snapshot_print(struct seq_file *m, unsigned long ip,
8561 		      struct ftrace_probe_ops *ops, void *data)
8562 {
8563 	struct ftrace_func_mapper *mapper = data;
8564 	long *count = NULL;
8565 
8566 	seq_printf(m, "%ps:", (void *)ip);
8567 
8568 	seq_puts(m, "snapshot");
8569 
8570 	if (mapper)
8571 		count = (long *)ftrace_func_mapper_find_ip(mapper, ip);
8572 
8573 	if (count)
8574 		seq_printf(m, ":count=%ld\n", *count);
8575 	else
8576 		seq_puts(m, ":unlimited\n");
8577 
8578 	return 0;
8579 }
8580 
8581 static int
8582 ftrace_snapshot_init(struct ftrace_probe_ops *ops, struct trace_array *tr,
8583 		     unsigned long ip, void *init_data, void **data)
8584 {
8585 	struct ftrace_func_mapper *mapper = *data;
8586 
8587 	if (!mapper) {
8588 		mapper = allocate_ftrace_func_mapper();
8589 		if (!mapper)
8590 			return -ENOMEM;
8591 		*data = mapper;
8592 	}
8593 
8594 	return ftrace_func_mapper_add_ip(mapper, ip, init_data);
8595 }
8596 
8597 static void
8598 ftrace_snapshot_free(struct ftrace_probe_ops *ops, struct trace_array *tr,
8599 		     unsigned long ip, void *data)
8600 {
8601 	struct ftrace_func_mapper *mapper = data;
8602 
8603 	if (!ip) {
8604 		if (!mapper)
8605 			return;
8606 		free_ftrace_func_mapper(mapper, NULL);
8607 		return;
8608 	}
8609 
8610 	ftrace_func_mapper_remove_ip(mapper, ip);
8611 }
8612 
8613 static struct ftrace_probe_ops snapshot_probe_ops = {
8614 	.func			= ftrace_snapshot,
8615 	.print			= ftrace_snapshot_print,
8616 };
8617 
8618 static struct ftrace_probe_ops snapshot_count_probe_ops = {
8619 	.func			= ftrace_count_snapshot,
8620 	.print			= ftrace_snapshot_print,
8621 	.init			= ftrace_snapshot_init,
8622 	.free			= ftrace_snapshot_free,
8623 };
8624 
8625 static int
8626 ftrace_trace_snapshot_callback(struct trace_array *tr, struct ftrace_hash *hash,
8627 			       char *glob, char *cmd, char *param, int enable)
8628 {
8629 	struct ftrace_probe_ops *ops;
8630 	void *count = (void *)-1;
8631 	char *number;
8632 	int ret;
8633 
8634 	if (!tr)
8635 		return -ENODEV;
8636 
8637 	/* hash funcs only work with set_ftrace_filter */
8638 	if (!enable)
8639 		return -EINVAL;
8640 
8641 	ops = param ? &snapshot_count_probe_ops :  &snapshot_probe_ops;
8642 
8643 	if (glob[0] == '!') {
8644 		ret = unregister_ftrace_function_probe_func(glob+1, tr, ops);
8645 		if (!ret)
8646 			tracing_disarm_snapshot(tr);
8647 
8648 		return ret;
8649 	}
8650 
8651 	if (!param)
8652 		goto out_reg;
8653 
8654 	number = strsep(&param, ":");
8655 
8656 	if (!strlen(number))
8657 		goto out_reg;
8658 
8659 	/*
8660 	 * We use the callback data field (which is a pointer)
8661 	 * as our counter.
8662 	 */
8663 	ret = kstrtoul(number, 0, (unsigned long *)&count);
8664 	if (ret)
8665 		return ret;
8666 
8667  out_reg:
8668 	ret = tracing_arm_snapshot(tr);
8669 	if (ret < 0)
8670 		goto out;
8671 
8672 	ret = register_ftrace_function_probe(glob, tr, ops, count);
8673 	if (ret < 0)
8674 		tracing_disarm_snapshot(tr);
8675  out:
8676 	return ret < 0 ? ret : 0;
8677 }
8678 
8679 static struct ftrace_func_command ftrace_snapshot_cmd = {
8680 	.name			= "snapshot",
8681 	.func			= ftrace_trace_snapshot_callback,
8682 };
8683 
8684 static __init int register_snapshot_cmd(void)
8685 {
8686 	return register_ftrace_command(&ftrace_snapshot_cmd);
8687 }
8688 #else
8689 static inline __init int register_snapshot_cmd(void) { return 0; }
8690 #endif /* defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE) */
8691 
8692 static struct dentry *tracing_get_dentry(struct trace_array *tr)
8693 {
8694 	if (WARN_ON(!tr->dir))
8695 		return ERR_PTR(-ENODEV);
8696 
8697 	/* Top directory uses NULL as the parent */
8698 	if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
8699 		return NULL;
8700 
8701 	/* All sub buffers have a descriptor */
8702 	return tr->dir;
8703 }
8704 
8705 static struct dentry *tracing_dentry_percpu(struct trace_array *tr, int cpu)
8706 {
8707 	struct dentry *d_tracer;
8708 
8709 	if (tr->percpu_dir)
8710 		return tr->percpu_dir;
8711 
8712 	d_tracer = tracing_get_dentry(tr);
8713 	if (IS_ERR(d_tracer))
8714 		return NULL;
8715 
8716 	tr->percpu_dir = tracefs_create_dir("per_cpu", d_tracer);
8717 
8718 	MEM_FAIL(!tr->percpu_dir,
8719 		  "Could not create tracefs directory 'per_cpu/%d'\n", cpu);
8720 
8721 	return tr->percpu_dir;
8722 }
8723 
8724 static struct dentry *
8725 trace_create_cpu_file(const char *name, umode_t mode, struct dentry *parent,
8726 		      void *data, long cpu, const struct file_operations *fops)
8727 {
8728 	struct dentry *ret = trace_create_file(name, mode, parent, data, fops);
8729 
8730 	if (ret) /* See tracing_get_cpu() */
8731 		d_inode(ret)->i_cdev = (void *)(cpu + 1);
8732 	return ret;
8733 }
8734 
8735 static void
8736 tracing_init_tracefs_percpu(struct trace_array *tr, long cpu)
8737 {
8738 	struct dentry *d_percpu = tracing_dentry_percpu(tr, cpu);
8739 	struct dentry *d_cpu;
8740 	char cpu_dir[30]; /* 30 characters should be more than enough */
8741 
8742 	if (!d_percpu)
8743 		return;
8744 
8745 	snprintf(cpu_dir, 30, "cpu%ld", cpu);
8746 	d_cpu = tracefs_create_dir(cpu_dir, d_percpu);
8747 	if (!d_cpu) {
8748 		pr_warn("Could not create tracefs '%s' entry\n", cpu_dir);
8749 		return;
8750 	}
8751 
8752 	/* per cpu trace_pipe */
8753 	trace_create_cpu_file("trace_pipe", TRACE_MODE_READ, d_cpu,
8754 				tr, cpu, &tracing_pipe_fops);
8755 
8756 	/* per cpu trace */
8757 	trace_create_cpu_file("trace", TRACE_MODE_WRITE, d_cpu,
8758 				tr, cpu, &tracing_fops);
8759 
8760 	trace_create_cpu_file("trace_pipe_raw", TRACE_MODE_READ, d_cpu,
8761 				tr, cpu, &tracing_buffers_fops);
8762 
8763 	trace_create_cpu_file("stats", TRACE_MODE_READ, d_cpu,
8764 				tr, cpu, &tracing_stats_fops);
8765 
8766 	trace_create_cpu_file("buffer_size_kb", TRACE_MODE_READ, d_cpu,
8767 				tr, cpu, &tracing_entries_fops);
8768 
8769 	if (tr->range_addr_start)
8770 		trace_create_cpu_file("buffer_meta", TRACE_MODE_READ, d_cpu,
8771 				      tr, cpu, &tracing_buffer_meta_fops);
8772 #ifdef CONFIG_TRACER_SNAPSHOT
8773 	if (!tr->range_addr_start) {
8774 		trace_create_cpu_file("snapshot", TRACE_MODE_WRITE, d_cpu,
8775 				      tr, cpu, &snapshot_fops);
8776 
8777 		trace_create_cpu_file("snapshot_raw", TRACE_MODE_READ, d_cpu,
8778 				      tr, cpu, &snapshot_raw_fops);
8779 	}
8780 #endif
8781 }
8782 
8783 #ifdef CONFIG_FTRACE_SELFTEST
8784 /* Let selftest have access to static functions in this file */
8785 #include "trace_selftest.c"
8786 #endif
8787 
8788 static ssize_t
8789 trace_options_read(struct file *filp, char __user *ubuf, size_t cnt,
8790 			loff_t *ppos)
8791 {
8792 	struct trace_option_dentry *topt = filp->private_data;
8793 	char *buf;
8794 
8795 	if (topt->flags->val & topt->opt->bit)
8796 		buf = "1\n";
8797 	else
8798 		buf = "0\n";
8799 
8800 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
8801 }
8802 
8803 static ssize_t
8804 trace_options_write(struct file *filp, const char __user *ubuf, size_t cnt,
8805 			 loff_t *ppos)
8806 {
8807 	struct trace_option_dentry *topt = filp->private_data;
8808 	unsigned long val;
8809 	int ret;
8810 
8811 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
8812 	if (ret)
8813 		return ret;
8814 
8815 	if (val != 0 && val != 1)
8816 		return -EINVAL;
8817 
8818 	if (!!(topt->flags->val & topt->opt->bit) != val) {
8819 		mutex_lock(&trace_types_lock);
8820 		ret = __set_tracer_option(topt->tr, topt->flags,
8821 					  topt->opt, !val);
8822 		mutex_unlock(&trace_types_lock);
8823 		if (ret)
8824 			return ret;
8825 	}
8826 
8827 	*ppos += cnt;
8828 
8829 	return cnt;
8830 }
8831 
8832 static int tracing_open_options(struct inode *inode, struct file *filp)
8833 {
8834 	struct trace_option_dentry *topt = inode->i_private;
8835 	int ret;
8836 
8837 	ret = tracing_check_open_get_tr(topt->tr);
8838 	if (ret)
8839 		return ret;
8840 
8841 	filp->private_data = inode->i_private;
8842 	return 0;
8843 }
8844 
8845 static int tracing_release_options(struct inode *inode, struct file *file)
8846 {
8847 	struct trace_option_dentry *topt = file->private_data;
8848 
8849 	trace_array_put(topt->tr);
8850 	return 0;
8851 }
8852 
8853 static const struct file_operations trace_options_fops = {
8854 	.open = tracing_open_options,
8855 	.read = trace_options_read,
8856 	.write = trace_options_write,
8857 	.llseek	= generic_file_llseek,
8858 	.release = tracing_release_options,
8859 };
8860 
8861 /*
8862  * In order to pass in both the trace_array descriptor as well as the index
8863  * to the flag that the trace option file represents, the trace_array
8864  * has a character array of trace_flags_index[], which holds the index
8865  * of the bit for the flag it represents. index[0] == 0, index[1] == 1, etc.
8866  * The address of this character array is passed to the flag option file
8867  * read/write callbacks.
8868  *
8869  * In order to extract both the index and the trace_array descriptor,
8870  * get_tr_index() uses the following algorithm.
8871  *
8872  *   idx = *ptr;
8873  *
8874  * As the pointer itself contains the address of the index (remember
8875  * index[1] == 1).
8876  *
8877  * Then to get the trace_array descriptor, by subtracting that index
8878  * from the ptr, we get to the start of the index itself.
8879  *
8880  *   ptr - idx == &index[0]
8881  *
8882  * Then a simple container_of() from that pointer gets us to the
8883  * trace_array descriptor.
8884  */
8885 static void get_tr_index(void *data, struct trace_array **ptr,
8886 			 unsigned int *pindex)
8887 {
8888 	*pindex = *(unsigned char *)data;
8889 
8890 	*ptr = container_of(data - *pindex, struct trace_array,
8891 			    trace_flags_index);
8892 }
8893 
8894 static ssize_t
8895 trace_options_core_read(struct file *filp, char __user *ubuf, size_t cnt,
8896 			loff_t *ppos)
8897 {
8898 	void *tr_index = filp->private_data;
8899 	struct trace_array *tr;
8900 	unsigned int index;
8901 	char *buf;
8902 
8903 	get_tr_index(tr_index, &tr, &index);
8904 
8905 	if (tr->trace_flags & (1 << index))
8906 		buf = "1\n";
8907 	else
8908 		buf = "0\n";
8909 
8910 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
8911 }
8912 
8913 static ssize_t
8914 trace_options_core_write(struct file *filp, const char __user *ubuf, size_t cnt,
8915 			 loff_t *ppos)
8916 {
8917 	void *tr_index = filp->private_data;
8918 	struct trace_array *tr;
8919 	unsigned int index;
8920 	unsigned long val;
8921 	int ret;
8922 
8923 	get_tr_index(tr_index, &tr, &index);
8924 
8925 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
8926 	if (ret)
8927 		return ret;
8928 
8929 	if (val != 0 && val != 1)
8930 		return -EINVAL;
8931 
8932 	mutex_lock(&event_mutex);
8933 	mutex_lock(&trace_types_lock);
8934 	ret = set_tracer_flag(tr, 1 << index, val);
8935 	mutex_unlock(&trace_types_lock);
8936 	mutex_unlock(&event_mutex);
8937 
8938 	if (ret < 0)
8939 		return ret;
8940 
8941 	*ppos += cnt;
8942 
8943 	return cnt;
8944 }
8945 
8946 static const struct file_operations trace_options_core_fops = {
8947 	.open = tracing_open_generic,
8948 	.read = trace_options_core_read,
8949 	.write = trace_options_core_write,
8950 	.llseek = generic_file_llseek,
8951 };
8952 
8953 struct dentry *trace_create_file(const char *name,
8954 				 umode_t mode,
8955 				 struct dentry *parent,
8956 				 void *data,
8957 				 const struct file_operations *fops)
8958 {
8959 	struct dentry *ret;
8960 
8961 	ret = tracefs_create_file(name, mode, parent, data, fops);
8962 	if (!ret)
8963 		pr_warn("Could not create tracefs '%s' entry\n", name);
8964 
8965 	return ret;
8966 }
8967 
8968 
8969 static struct dentry *trace_options_init_dentry(struct trace_array *tr)
8970 {
8971 	struct dentry *d_tracer;
8972 
8973 	if (tr->options)
8974 		return tr->options;
8975 
8976 	d_tracer = tracing_get_dentry(tr);
8977 	if (IS_ERR(d_tracer))
8978 		return NULL;
8979 
8980 	tr->options = tracefs_create_dir("options", d_tracer);
8981 	if (!tr->options) {
8982 		pr_warn("Could not create tracefs directory 'options'\n");
8983 		return NULL;
8984 	}
8985 
8986 	return tr->options;
8987 }
8988 
8989 static void
8990 create_trace_option_file(struct trace_array *tr,
8991 			 struct trace_option_dentry *topt,
8992 			 struct tracer_flags *flags,
8993 			 struct tracer_opt *opt)
8994 {
8995 	struct dentry *t_options;
8996 
8997 	t_options = trace_options_init_dentry(tr);
8998 	if (!t_options)
8999 		return;
9000 
9001 	topt->flags = flags;
9002 	topt->opt = opt;
9003 	topt->tr = tr;
9004 
9005 	topt->entry = trace_create_file(opt->name, TRACE_MODE_WRITE,
9006 					t_options, topt, &trace_options_fops);
9007 
9008 }
9009 
9010 static void
9011 create_trace_option_files(struct trace_array *tr, struct tracer *tracer)
9012 {
9013 	struct trace_option_dentry *topts;
9014 	struct trace_options *tr_topts;
9015 	struct tracer_flags *flags;
9016 	struct tracer_opt *opts;
9017 	int cnt;
9018 	int i;
9019 
9020 	if (!tracer)
9021 		return;
9022 
9023 	flags = tracer->flags;
9024 
9025 	if (!flags || !flags->opts)
9026 		return;
9027 
9028 	/*
9029 	 * If this is an instance, only create flags for tracers
9030 	 * the instance may have.
9031 	 */
9032 	if (!trace_ok_for_array(tracer, tr))
9033 		return;
9034 
9035 	for (i = 0; i < tr->nr_topts; i++) {
9036 		/* Make sure there's no duplicate flags. */
9037 		if (WARN_ON_ONCE(tr->topts[i].tracer->flags == tracer->flags))
9038 			return;
9039 	}
9040 
9041 	opts = flags->opts;
9042 
9043 	for (cnt = 0; opts[cnt].name; cnt++)
9044 		;
9045 
9046 	topts = kcalloc(cnt + 1, sizeof(*topts), GFP_KERNEL);
9047 	if (!topts)
9048 		return;
9049 
9050 	tr_topts = krealloc(tr->topts, sizeof(*tr->topts) * (tr->nr_topts + 1),
9051 			    GFP_KERNEL);
9052 	if (!tr_topts) {
9053 		kfree(topts);
9054 		return;
9055 	}
9056 
9057 	tr->topts = tr_topts;
9058 	tr->topts[tr->nr_topts].tracer = tracer;
9059 	tr->topts[tr->nr_topts].topts = topts;
9060 	tr->nr_topts++;
9061 
9062 	for (cnt = 0; opts[cnt].name; cnt++) {
9063 		create_trace_option_file(tr, &topts[cnt], flags,
9064 					 &opts[cnt]);
9065 		MEM_FAIL(topts[cnt].entry == NULL,
9066 			  "Failed to create trace option: %s",
9067 			  opts[cnt].name);
9068 	}
9069 }
9070 
9071 static struct dentry *
9072 create_trace_option_core_file(struct trace_array *tr,
9073 			      const char *option, long index)
9074 {
9075 	struct dentry *t_options;
9076 
9077 	t_options = trace_options_init_dentry(tr);
9078 	if (!t_options)
9079 		return NULL;
9080 
9081 	return trace_create_file(option, TRACE_MODE_WRITE, t_options,
9082 				 (void *)&tr->trace_flags_index[index],
9083 				 &trace_options_core_fops);
9084 }
9085 
9086 static void create_trace_options_dir(struct trace_array *tr)
9087 {
9088 	struct dentry *t_options;
9089 	bool top_level = tr == &global_trace;
9090 	int i;
9091 
9092 	t_options = trace_options_init_dentry(tr);
9093 	if (!t_options)
9094 		return;
9095 
9096 	for (i = 0; trace_options[i]; i++) {
9097 		if (top_level ||
9098 		    !((1 << i) & TOP_LEVEL_TRACE_FLAGS))
9099 			create_trace_option_core_file(tr, trace_options[i], i);
9100 	}
9101 }
9102 
9103 static ssize_t
9104 rb_simple_read(struct file *filp, char __user *ubuf,
9105 	       size_t cnt, loff_t *ppos)
9106 {
9107 	struct trace_array *tr = filp->private_data;
9108 	char buf[64];
9109 	int r;
9110 
9111 	r = tracer_tracing_is_on(tr);
9112 	r = sprintf(buf, "%d\n", r);
9113 
9114 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
9115 }
9116 
9117 static ssize_t
9118 rb_simple_write(struct file *filp, const char __user *ubuf,
9119 		size_t cnt, loff_t *ppos)
9120 {
9121 	struct trace_array *tr = filp->private_data;
9122 	struct trace_buffer *buffer = tr->array_buffer.buffer;
9123 	unsigned long val;
9124 	int ret;
9125 
9126 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
9127 	if (ret)
9128 		return ret;
9129 
9130 	if (buffer) {
9131 		mutex_lock(&trace_types_lock);
9132 		if (!!val == tracer_tracing_is_on(tr)) {
9133 			val = 0; /* do nothing */
9134 		} else if (val) {
9135 			tracer_tracing_on(tr);
9136 			if (tr->current_trace->start)
9137 				tr->current_trace->start(tr);
9138 		} else {
9139 			tracer_tracing_off(tr);
9140 			if (tr->current_trace->stop)
9141 				tr->current_trace->stop(tr);
9142 			/* Wake up any waiters */
9143 			ring_buffer_wake_waiters(buffer, RING_BUFFER_ALL_CPUS);
9144 		}
9145 		mutex_unlock(&trace_types_lock);
9146 	}
9147 
9148 	(*ppos)++;
9149 
9150 	return cnt;
9151 }
9152 
9153 static const struct file_operations rb_simple_fops = {
9154 	.open		= tracing_open_generic_tr,
9155 	.read		= rb_simple_read,
9156 	.write		= rb_simple_write,
9157 	.release	= tracing_release_generic_tr,
9158 	.llseek		= default_llseek,
9159 };
9160 
9161 static ssize_t
9162 buffer_percent_read(struct file *filp, char __user *ubuf,
9163 		    size_t cnt, loff_t *ppos)
9164 {
9165 	struct trace_array *tr = filp->private_data;
9166 	char buf[64];
9167 	int r;
9168 
9169 	r = tr->buffer_percent;
9170 	r = sprintf(buf, "%d\n", r);
9171 
9172 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
9173 }
9174 
9175 static ssize_t
9176 buffer_percent_write(struct file *filp, const char __user *ubuf,
9177 		     size_t cnt, loff_t *ppos)
9178 {
9179 	struct trace_array *tr = filp->private_data;
9180 	unsigned long val;
9181 	int ret;
9182 
9183 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
9184 	if (ret)
9185 		return ret;
9186 
9187 	if (val > 100)
9188 		return -EINVAL;
9189 
9190 	tr->buffer_percent = val;
9191 
9192 	(*ppos)++;
9193 
9194 	return cnt;
9195 }
9196 
9197 static const struct file_operations buffer_percent_fops = {
9198 	.open		= tracing_open_generic_tr,
9199 	.read		= buffer_percent_read,
9200 	.write		= buffer_percent_write,
9201 	.release	= tracing_release_generic_tr,
9202 	.llseek		= default_llseek,
9203 };
9204 
9205 static ssize_t
9206 buffer_subbuf_size_read(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos)
9207 {
9208 	struct trace_array *tr = filp->private_data;
9209 	size_t size;
9210 	char buf[64];
9211 	int order;
9212 	int r;
9213 
9214 	order = ring_buffer_subbuf_order_get(tr->array_buffer.buffer);
9215 	size = (PAGE_SIZE << order) / 1024;
9216 
9217 	r = sprintf(buf, "%zd\n", size);
9218 
9219 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
9220 }
9221 
9222 static ssize_t
9223 buffer_subbuf_size_write(struct file *filp, const char __user *ubuf,
9224 			 size_t cnt, loff_t *ppos)
9225 {
9226 	struct trace_array *tr = filp->private_data;
9227 	unsigned long val;
9228 	int old_order;
9229 	int order;
9230 	int pages;
9231 	int ret;
9232 
9233 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
9234 	if (ret)
9235 		return ret;
9236 
9237 	val *= 1024; /* value passed in is in KB */
9238 
9239 	pages = DIV_ROUND_UP(val, PAGE_SIZE);
9240 	order = fls(pages - 1);
9241 
9242 	/* limit between 1 and 128 system pages */
9243 	if (order < 0 || order > 7)
9244 		return -EINVAL;
9245 
9246 	/* Do not allow tracing while changing the order of the ring buffer */
9247 	tracing_stop_tr(tr);
9248 
9249 	old_order = ring_buffer_subbuf_order_get(tr->array_buffer.buffer);
9250 	if (old_order == order)
9251 		goto out;
9252 
9253 	ret = ring_buffer_subbuf_order_set(tr->array_buffer.buffer, order);
9254 	if (ret)
9255 		goto out;
9256 
9257 #ifdef CONFIG_TRACER_MAX_TRACE
9258 
9259 	if (!tr->allocated_snapshot)
9260 		goto out_max;
9261 
9262 	ret = ring_buffer_subbuf_order_set(tr->max_buffer.buffer, order);
9263 	if (ret) {
9264 		/* Put back the old order */
9265 		cnt = ring_buffer_subbuf_order_set(tr->array_buffer.buffer, old_order);
9266 		if (WARN_ON_ONCE(cnt)) {
9267 			/*
9268 			 * AARGH! We are left with different orders!
9269 			 * The max buffer is our "snapshot" buffer.
9270 			 * When a tracer needs a snapshot (one of the
9271 			 * latency tracers), it swaps the max buffer
9272 			 * with the saved snap shot. We succeeded to
9273 			 * update the order of the main buffer, but failed to
9274 			 * update the order of the max buffer. But when we tried
9275 			 * to reset the main buffer to the original size, we
9276 			 * failed there too. This is very unlikely to
9277 			 * happen, but if it does, warn and kill all
9278 			 * tracing.
9279 			 */
9280 			tracing_disabled = 1;
9281 		}
9282 		goto out;
9283 	}
9284  out_max:
9285 #endif
9286 	(*ppos)++;
9287  out:
9288 	if (ret)
9289 		cnt = ret;
9290 	tracing_start_tr(tr);
9291 	return cnt;
9292 }
9293 
9294 static const struct file_operations buffer_subbuf_size_fops = {
9295 	.open		= tracing_open_generic_tr,
9296 	.read		= buffer_subbuf_size_read,
9297 	.write		= buffer_subbuf_size_write,
9298 	.release	= tracing_release_generic_tr,
9299 	.llseek		= default_llseek,
9300 };
9301 
9302 static struct dentry *trace_instance_dir;
9303 
9304 static void
9305 init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer);
9306 
9307 static int
9308 allocate_trace_buffer(struct trace_array *tr, struct array_buffer *buf, int size)
9309 {
9310 	enum ring_buffer_flags rb_flags;
9311 
9312 	rb_flags = tr->trace_flags & TRACE_ITER_OVERWRITE ? RB_FL_OVERWRITE : 0;
9313 
9314 	buf->tr = tr;
9315 
9316 	if (tr->range_addr_start && tr->range_addr_size) {
9317 		buf->buffer = ring_buffer_alloc_range(size, rb_flags, 0,
9318 						      tr->range_addr_start,
9319 						      tr->range_addr_size);
9320 
9321 		ring_buffer_last_boot_delta(buf->buffer,
9322 					    &tr->text_delta, &tr->data_delta);
9323 		/*
9324 		 * This is basically the same as a mapped buffer,
9325 		 * with the same restrictions.
9326 		 */
9327 		tr->mapped++;
9328 	} else {
9329 		buf->buffer = ring_buffer_alloc(size, rb_flags);
9330 	}
9331 	if (!buf->buffer)
9332 		return -ENOMEM;
9333 
9334 	buf->data = alloc_percpu(struct trace_array_cpu);
9335 	if (!buf->data) {
9336 		ring_buffer_free(buf->buffer);
9337 		buf->buffer = NULL;
9338 		return -ENOMEM;
9339 	}
9340 
9341 	/* Allocate the first page for all buffers */
9342 	set_buffer_entries(&tr->array_buffer,
9343 			   ring_buffer_size(tr->array_buffer.buffer, 0));
9344 
9345 	return 0;
9346 }
9347 
9348 static void free_trace_buffer(struct array_buffer *buf)
9349 {
9350 	if (buf->buffer) {
9351 		ring_buffer_free(buf->buffer);
9352 		buf->buffer = NULL;
9353 		free_percpu(buf->data);
9354 		buf->data = NULL;
9355 	}
9356 }
9357 
9358 static int allocate_trace_buffers(struct trace_array *tr, int size)
9359 {
9360 	int ret;
9361 
9362 	ret = allocate_trace_buffer(tr, &tr->array_buffer, size);
9363 	if (ret)
9364 		return ret;
9365 
9366 #ifdef CONFIG_TRACER_MAX_TRACE
9367 	/* Fix mapped buffer trace arrays do not have snapshot buffers */
9368 	if (tr->range_addr_start)
9369 		return 0;
9370 
9371 	ret = allocate_trace_buffer(tr, &tr->max_buffer,
9372 				    allocate_snapshot ? size : 1);
9373 	if (MEM_FAIL(ret, "Failed to allocate trace buffer\n")) {
9374 		free_trace_buffer(&tr->array_buffer);
9375 		return -ENOMEM;
9376 	}
9377 	tr->allocated_snapshot = allocate_snapshot;
9378 
9379 	allocate_snapshot = false;
9380 #endif
9381 
9382 	return 0;
9383 }
9384 
9385 static void free_trace_buffers(struct trace_array *tr)
9386 {
9387 	if (!tr)
9388 		return;
9389 
9390 	free_trace_buffer(&tr->array_buffer);
9391 
9392 #ifdef CONFIG_TRACER_MAX_TRACE
9393 	free_trace_buffer(&tr->max_buffer);
9394 #endif
9395 }
9396 
9397 static void init_trace_flags_index(struct trace_array *tr)
9398 {
9399 	int i;
9400 
9401 	/* Used by the trace options files */
9402 	for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++)
9403 		tr->trace_flags_index[i] = i;
9404 }
9405 
9406 static void __update_tracer_options(struct trace_array *tr)
9407 {
9408 	struct tracer *t;
9409 
9410 	for (t = trace_types; t; t = t->next)
9411 		add_tracer_options(tr, t);
9412 }
9413 
9414 static void update_tracer_options(struct trace_array *tr)
9415 {
9416 	mutex_lock(&trace_types_lock);
9417 	tracer_options_updated = true;
9418 	__update_tracer_options(tr);
9419 	mutex_unlock(&trace_types_lock);
9420 }
9421 
9422 /* Must have trace_types_lock held */
9423 struct trace_array *trace_array_find(const char *instance)
9424 {
9425 	struct trace_array *tr, *found = NULL;
9426 
9427 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
9428 		if (tr->name && strcmp(tr->name, instance) == 0) {
9429 			found = tr;
9430 			break;
9431 		}
9432 	}
9433 
9434 	return found;
9435 }
9436 
9437 struct trace_array *trace_array_find_get(const char *instance)
9438 {
9439 	struct trace_array *tr;
9440 
9441 	mutex_lock(&trace_types_lock);
9442 	tr = trace_array_find(instance);
9443 	if (tr)
9444 		tr->ref++;
9445 	mutex_unlock(&trace_types_lock);
9446 
9447 	return tr;
9448 }
9449 
9450 static int trace_array_create_dir(struct trace_array *tr)
9451 {
9452 	int ret;
9453 
9454 	tr->dir = tracefs_create_dir(tr->name, trace_instance_dir);
9455 	if (!tr->dir)
9456 		return -EINVAL;
9457 
9458 	ret = event_trace_add_tracer(tr->dir, tr);
9459 	if (ret) {
9460 		tracefs_remove(tr->dir);
9461 		return ret;
9462 	}
9463 
9464 	init_tracer_tracefs(tr, tr->dir);
9465 	__update_tracer_options(tr);
9466 
9467 	return ret;
9468 }
9469 
9470 static struct trace_array *
9471 trace_array_create_systems(const char *name, const char *systems,
9472 			   unsigned long range_addr_start,
9473 			   unsigned long range_addr_size)
9474 {
9475 	struct trace_array *tr;
9476 	int ret;
9477 
9478 	ret = -ENOMEM;
9479 	tr = kzalloc(sizeof(*tr), GFP_KERNEL);
9480 	if (!tr)
9481 		return ERR_PTR(ret);
9482 
9483 	tr->name = kstrdup(name, GFP_KERNEL);
9484 	if (!tr->name)
9485 		goto out_free_tr;
9486 
9487 	if (!alloc_cpumask_var(&tr->tracing_cpumask, GFP_KERNEL))
9488 		goto out_free_tr;
9489 
9490 	if (!zalloc_cpumask_var(&tr->pipe_cpumask, GFP_KERNEL))
9491 		goto out_free_tr;
9492 
9493 	if (systems) {
9494 		tr->system_names = kstrdup_const(systems, GFP_KERNEL);
9495 		if (!tr->system_names)
9496 			goto out_free_tr;
9497 	}
9498 
9499 	/* Only for boot up memory mapped ring buffers */
9500 	tr->range_addr_start = range_addr_start;
9501 	tr->range_addr_size = range_addr_size;
9502 
9503 	tr->trace_flags = global_trace.trace_flags & ~ZEROED_TRACE_FLAGS;
9504 
9505 	cpumask_copy(tr->tracing_cpumask, cpu_all_mask);
9506 
9507 	raw_spin_lock_init(&tr->start_lock);
9508 
9509 	tr->max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
9510 #ifdef CONFIG_TRACER_MAX_TRACE
9511 	spin_lock_init(&tr->snapshot_trigger_lock);
9512 #endif
9513 	tr->current_trace = &nop_trace;
9514 
9515 	INIT_LIST_HEAD(&tr->systems);
9516 	INIT_LIST_HEAD(&tr->events);
9517 	INIT_LIST_HEAD(&tr->hist_vars);
9518 	INIT_LIST_HEAD(&tr->err_log);
9519 
9520 	if (allocate_trace_buffers(tr, trace_buf_size) < 0)
9521 		goto out_free_tr;
9522 
9523 	/* The ring buffer is defaultly expanded */
9524 	trace_set_ring_buffer_expanded(tr);
9525 
9526 	if (ftrace_allocate_ftrace_ops(tr) < 0)
9527 		goto out_free_tr;
9528 
9529 	ftrace_init_trace_array(tr);
9530 
9531 	init_trace_flags_index(tr);
9532 
9533 	if (trace_instance_dir) {
9534 		ret = trace_array_create_dir(tr);
9535 		if (ret)
9536 			goto out_free_tr;
9537 	} else
9538 		__trace_early_add_events(tr);
9539 
9540 	list_add(&tr->list, &ftrace_trace_arrays);
9541 
9542 	tr->ref++;
9543 
9544 	return tr;
9545 
9546  out_free_tr:
9547 	ftrace_free_ftrace_ops(tr);
9548 	free_trace_buffers(tr);
9549 	free_cpumask_var(tr->pipe_cpumask);
9550 	free_cpumask_var(tr->tracing_cpumask);
9551 	kfree_const(tr->system_names);
9552 	kfree(tr->name);
9553 	kfree(tr);
9554 
9555 	return ERR_PTR(ret);
9556 }
9557 
9558 static struct trace_array *trace_array_create(const char *name)
9559 {
9560 	return trace_array_create_systems(name, NULL, 0, 0);
9561 }
9562 
9563 static int instance_mkdir(const char *name)
9564 {
9565 	struct trace_array *tr;
9566 	int ret;
9567 
9568 	mutex_lock(&event_mutex);
9569 	mutex_lock(&trace_types_lock);
9570 
9571 	ret = -EEXIST;
9572 	if (trace_array_find(name))
9573 		goto out_unlock;
9574 
9575 	tr = trace_array_create(name);
9576 
9577 	ret = PTR_ERR_OR_ZERO(tr);
9578 
9579 out_unlock:
9580 	mutex_unlock(&trace_types_lock);
9581 	mutex_unlock(&event_mutex);
9582 	return ret;
9583 }
9584 
9585 static u64 map_pages(u64 start, u64 size)
9586 {
9587 	struct page **pages;
9588 	phys_addr_t page_start;
9589 	unsigned int page_count;
9590 	unsigned int i;
9591 	void *vaddr;
9592 
9593 	page_count = DIV_ROUND_UP(size, PAGE_SIZE);
9594 
9595 	page_start = start;
9596 	pages = kmalloc_array(page_count, sizeof(struct page *), GFP_KERNEL);
9597 	if (!pages)
9598 		return 0;
9599 
9600 	for (i = 0; i < page_count; i++) {
9601 		phys_addr_t addr = page_start + i * PAGE_SIZE;
9602 		pages[i] = pfn_to_page(addr >> PAGE_SHIFT);
9603 	}
9604 	vaddr = vmap(pages, page_count, VM_MAP, PAGE_KERNEL);
9605 	kfree(pages);
9606 
9607 	return (u64)(unsigned long)vaddr;
9608 }
9609 
9610 /**
9611  * trace_array_get_by_name - Create/Lookup a trace array, given its name.
9612  * @name: The name of the trace array to be looked up/created.
9613  * @systems: A list of systems to create event directories for (NULL for all)
9614  *
9615  * Returns pointer to trace array with given name.
9616  * NULL, if it cannot be created.
9617  *
9618  * NOTE: This function increments the reference counter associated with the
9619  * trace array returned. This makes sure it cannot be freed while in use.
9620  * Use trace_array_put() once the trace array is no longer needed.
9621  * If the trace_array is to be freed, trace_array_destroy() needs to
9622  * be called after the trace_array_put(), or simply let user space delete
9623  * it from the tracefs instances directory. But until the
9624  * trace_array_put() is called, user space can not delete it.
9625  *
9626  */
9627 struct trace_array *trace_array_get_by_name(const char *name, const char *systems)
9628 {
9629 	struct trace_array *tr;
9630 
9631 	mutex_lock(&event_mutex);
9632 	mutex_lock(&trace_types_lock);
9633 
9634 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
9635 		if (tr->name && strcmp(tr->name, name) == 0)
9636 			goto out_unlock;
9637 	}
9638 
9639 	tr = trace_array_create_systems(name, systems, 0, 0);
9640 
9641 	if (IS_ERR(tr))
9642 		tr = NULL;
9643 out_unlock:
9644 	if (tr)
9645 		tr->ref++;
9646 
9647 	mutex_unlock(&trace_types_lock);
9648 	mutex_unlock(&event_mutex);
9649 	return tr;
9650 }
9651 EXPORT_SYMBOL_GPL(trace_array_get_by_name);
9652 
9653 static int __remove_instance(struct trace_array *tr)
9654 {
9655 	int i;
9656 
9657 	/* Reference counter for a newly created trace array = 1. */
9658 	if (tr->ref > 1 || (tr->current_trace && tr->trace_ref))
9659 		return -EBUSY;
9660 
9661 	list_del(&tr->list);
9662 
9663 	/* Disable all the flags that were enabled coming in */
9664 	for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++) {
9665 		if ((1 << i) & ZEROED_TRACE_FLAGS)
9666 			set_tracer_flag(tr, 1 << i, 0);
9667 	}
9668 
9669 	tracing_set_nop(tr);
9670 	clear_ftrace_function_probes(tr);
9671 	event_trace_del_tracer(tr);
9672 	ftrace_clear_pids(tr);
9673 	ftrace_destroy_function_files(tr);
9674 	tracefs_remove(tr->dir);
9675 	free_percpu(tr->last_func_repeats);
9676 	free_trace_buffers(tr);
9677 	clear_tracing_err_log(tr);
9678 
9679 	for (i = 0; i < tr->nr_topts; i++) {
9680 		kfree(tr->topts[i].topts);
9681 	}
9682 	kfree(tr->topts);
9683 
9684 	free_cpumask_var(tr->pipe_cpumask);
9685 	free_cpumask_var(tr->tracing_cpumask);
9686 	kfree_const(tr->system_names);
9687 	kfree(tr->name);
9688 	kfree(tr);
9689 
9690 	return 0;
9691 }
9692 
9693 int trace_array_destroy(struct trace_array *this_tr)
9694 {
9695 	struct trace_array *tr;
9696 	int ret;
9697 
9698 	if (!this_tr)
9699 		return -EINVAL;
9700 
9701 	mutex_lock(&event_mutex);
9702 	mutex_lock(&trace_types_lock);
9703 
9704 	ret = -ENODEV;
9705 
9706 	/* Making sure trace array exists before destroying it. */
9707 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
9708 		if (tr == this_tr) {
9709 			ret = __remove_instance(tr);
9710 			break;
9711 		}
9712 	}
9713 
9714 	mutex_unlock(&trace_types_lock);
9715 	mutex_unlock(&event_mutex);
9716 
9717 	return ret;
9718 }
9719 EXPORT_SYMBOL_GPL(trace_array_destroy);
9720 
9721 static int instance_rmdir(const char *name)
9722 {
9723 	struct trace_array *tr;
9724 	int ret;
9725 
9726 	mutex_lock(&event_mutex);
9727 	mutex_lock(&trace_types_lock);
9728 
9729 	ret = -ENODEV;
9730 	tr = trace_array_find(name);
9731 	if (tr)
9732 		ret = __remove_instance(tr);
9733 
9734 	mutex_unlock(&trace_types_lock);
9735 	mutex_unlock(&event_mutex);
9736 
9737 	return ret;
9738 }
9739 
9740 static __init void create_trace_instances(struct dentry *d_tracer)
9741 {
9742 	struct trace_array *tr;
9743 
9744 	trace_instance_dir = tracefs_create_instance_dir("instances", d_tracer,
9745 							 instance_mkdir,
9746 							 instance_rmdir);
9747 	if (MEM_FAIL(!trace_instance_dir, "Failed to create instances directory\n"))
9748 		return;
9749 
9750 	mutex_lock(&event_mutex);
9751 	mutex_lock(&trace_types_lock);
9752 
9753 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
9754 		if (!tr->name)
9755 			continue;
9756 		if (MEM_FAIL(trace_array_create_dir(tr) < 0,
9757 			     "Failed to create instance directory\n"))
9758 			break;
9759 	}
9760 
9761 	mutex_unlock(&trace_types_lock);
9762 	mutex_unlock(&event_mutex);
9763 }
9764 
9765 static void
9766 init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer)
9767 {
9768 	int cpu;
9769 
9770 	trace_create_file("available_tracers", TRACE_MODE_READ, d_tracer,
9771 			tr, &show_traces_fops);
9772 
9773 	trace_create_file("current_tracer", TRACE_MODE_WRITE, d_tracer,
9774 			tr, &set_tracer_fops);
9775 
9776 	trace_create_file("tracing_cpumask", TRACE_MODE_WRITE, d_tracer,
9777 			  tr, &tracing_cpumask_fops);
9778 
9779 	trace_create_file("trace_options", TRACE_MODE_WRITE, d_tracer,
9780 			  tr, &tracing_iter_fops);
9781 
9782 	trace_create_file("trace", TRACE_MODE_WRITE, d_tracer,
9783 			  tr, &tracing_fops);
9784 
9785 	trace_create_file("trace_pipe", TRACE_MODE_READ, d_tracer,
9786 			  tr, &tracing_pipe_fops);
9787 
9788 	trace_create_file("buffer_size_kb", TRACE_MODE_WRITE, d_tracer,
9789 			  tr, &tracing_entries_fops);
9790 
9791 	trace_create_file("buffer_total_size_kb", TRACE_MODE_READ, d_tracer,
9792 			  tr, &tracing_total_entries_fops);
9793 
9794 	trace_create_file("free_buffer", 0200, d_tracer,
9795 			  tr, &tracing_free_buffer_fops);
9796 
9797 	trace_create_file("trace_marker", 0220, d_tracer,
9798 			  tr, &tracing_mark_fops);
9799 
9800 	tr->trace_marker_file = __find_event_file(tr, "ftrace", "print");
9801 
9802 	trace_create_file("trace_marker_raw", 0220, d_tracer,
9803 			  tr, &tracing_mark_raw_fops);
9804 
9805 	trace_create_file("trace_clock", TRACE_MODE_WRITE, d_tracer, tr,
9806 			  &trace_clock_fops);
9807 
9808 	trace_create_file("tracing_on", TRACE_MODE_WRITE, d_tracer,
9809 			  tr, &rb_simple_fops);
9810 
9811 	trace_create_file("timestamp_mode", TRACE_MODE_READ, d_tracer, tr,
9812 			  &trace_time_stamp_mode_fops);
9813 
9814 	tr->buffer_percent = 50;
9815 
9816 	trace_create_file("buffer_percent", TRACE_MODE_WRITE, d_tracer,
9817 			tr, &buffer_percent_fops);
9818 
9819 	trace_create_file("buffer_subbuf_size_kb", TRACE_MODE_WRITE, d_tracer,
9820 			  tr, &buffer_subbuf_size_fops);
9821 
9822 	create_trace_options_dir(tr);
9823 
9824 #ifdef CONFIG_TRACER_MAX_TRACE
9825 	trace_create_maxlat_file(tr, d_tracer);
9826 #endif
9827 
9828 	if (ftrace_create_function_files(tr, d_tracer))
9829 		MEM_FAIL(1, "Could not allocate function filter files");
9830 
9831 	if (tr->range_addr_start) {
9832 		trace_create_file("last_boot_info", TRACE_MODE_READ, d_tracer,
9833 				  tr, &last_boot_fops);
9834 #ifdef CONFIG_TRACER_SNAPSHOT
9835 	} else {
9836 		trace_create_file("snapshot", TRACE_MODE_WRITE, d_tracer,
9837 				  tr, &snapshot_fops);
9838 #endif
9839 	}
9840 
9841 	trace_create_file("error_log", TRACE_MODE_WRITE, d_tracer,
9842 			  tr, &tracing_err_log_fops);
9843 
9844 	for_each_tracing_cpu(cpu)
9845 		tracing_init_tracefs_percpu(tr, cpu);
9846 
9847 	ftrace_init_tracefs(tr, d_tracer);
9848 }
9849 
9850 static struct vfsmount *trace_automount(struct dentry *mntpt, void *ingore)
9851 {
9852 	struct vfsmount *mnt;
9853 	struct file_system_type *type;
9854 
9855 	/*
9856 	 * To maintain backward compatibility for tools that mount
9857 	 * debugfs to get to the tracing facility, tracefs is automatically
9858 	 * mounted to the debugfs/tracing directory.
9859 	 */
9860 	type = get_fs_type("tracefs");
9861 	if (!type)
9862 		return NULL;
9863 	mnt = vfs_submount(mntpt, type, "tracefs", NULL);
9864 	put_filesystem(type);
9865 	if (IS_ERR(mnt))
9866 		return NULL;
9867 	mntget(mnt);
9868 
9869 	return mnt;
9870 }
9871 
9872 /**
9873  * tracing_init_dentry - initialize top level trace array
9874  *
9875  * This is called when creating files or directories in the tracing
9876  * directory. It is called via fs_initcall() by any of the boot up code
9877  * and expects to return the dentry of the top level tracing directory.
9878  */
9879 int tracing_init_dentry(void)
9880 {
9881 	struct trace_array *tr = &global_trace;
9882 
9883 	if (security_locked_down(LOCKDOWN_TRACEFS)) {
9884 		pr_warn("Tracing disabled due to lockdown\n");
9885 		return -EPERM;
9886 	}
9887 
9888 	/* The top level trace array uses  NULL as parent */
9889 	if (tr->dir)
9890 		return 0;
9891 
9892 	if (WARN_ON(!tracefs_initialized()))
9893 		return -ENODEV;
9894 
9895 	/*
9896 	 * As there may still be users that expect the tracing
9897 	 * files to exist in debugfs/tracing, we must automount
9898 	 * the tracefs file system there, so older tools still
9899 	 * work with the newer kernel.
9900 	 */
9901 	tr->dir = debugfs_create_automount("tracing", NULL,
9902 					   trace_automount, NULL);
9903 
9904 	return 0;
9905 }
9906 
9907 extern struct trace_eval_map *__start_ftrace_eval_maps[];
9908 extern struct trace_eval_map *__stop_ftrace_eval_maps[];
9909 
9910 static struct workqueue_struct *eval_map_wq __initdata;
9911 static struct work_struct eval_map_work __initdata;
9912 static struct work_struct tracerfs_init_work __initdata;
9913 
9914 static void __init eval_map_work_func(struct work_struct *work)
9915 {
9916 	int len;
9917 
9918 	len = __stop_ftrace_eval_maps - __start_ftrace_eval_maps;
9919 	trace_insert_eval_map(NULL, __start_ftrace_eval_maps, len);
9920 }
9921 
9922 static int __init trace_eval_init(void)
9923 {
9924 	INIT_WORK(&eval_map_work, eval_map_work_func);
9925 
9926 	eval_map_wq = alloc_workqueue("eval_map_wq", WQ_UNBOUND, 0);
9927 	if (!eval_map_wq) {
9928 		pr_err("Unable to allocate eval_map_wq\n");
9929 		/* Do work here */
9930 		eval_map_work_func(&eval_map_work);
9931 		return -ENOMEM;
9932 	}
9933 
9934 	queue_work(eval_map_wq, &eval_map_work);
9935 	return 0;
9936 }
9937 
9938 subsys_initcall(trace_eval_init);
9939 
9940 static int __init trace_eval_sync(void)
9941 {
9942 	/* Make sure the eval map updates are finished */
9943 	if (eval_map_wq)
9944 		destroy_workqueue(eval_map_wq);
9945 	return 0;
9946 }
9947 
9948 late_initcall_sync(trace_eval_sync);
9949 
9950 
9951 #ifdef CONFIG_MODULES
9952 static void trace_module_add_evals(struct module *mod)
9953 {
9954 	if (!mod->num_trace_evals)
9955 		return;
9956 
9957 	/*
9958 	 * Modules with bad taint do not have events created, do
9959 	 * not bother with enums either.
9960 	 */
9961 	if (trace_module_has_bad_taint(mod))
9962 		return;
9963 
9964 	trace_insert_eval_map(mod, mod->trace_evals, mod->num_trace_evals);
9965 }
9966 
9967 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
9968 static void trace_module_remove_evals(struct module *mod)
9969 {
9970 	union trace_eval_map_item *map;
9971 	union trace_eval_map_item **last = &trace_eval_maps;
9972 
9973 	if (!mod->num_trace_evals)
9974 		return;
9975 
9976 	mutex_lock(&trace_eval_mutex);
9977 
9978 	map = trace_eval_maps;
9979 
9980 	while (map) {
9981 		if (map->head.mod == mod)
9982 			break;
9983 		map = trace_eval_jmp_to_tail(map);
9984 		last = &map->tail.next;
9985 		map = map->tail.next;
9986 	}
9987 	if (!map)
9988 		goto out;
9989 
9990 	*last = trace_eval_jmp_to_tail(map)->tail.next;
9991 	kfree(map);
9992  out:
9993 	mutex_unlock(&trace_eval_mutex);
9994 }
9995 #else
9996 static inline void trace_module_remove_evals(struct module *mod) { }
9997 #endif /* CONFIG_TRACE_EVAL_MAP_FILE */
9998 
9999 static int trace_module_notify(struct notifier_block *self,
10000 			       unsigned long val, void *data)
10001 {
10002 	struct module *mod = data;
10003 
10004 	switch (val) {
10005 	case MODULE_STATE_COMING:
10006 		trace_module_add_evals(mod);
10007 		break;
10008 	case MODULE_STATE_GOING:
10009 		trace_module_remove_evals(mod);
10010 		break;
10011 	}
10012 
10013 	return NOTIFY_OK;
10014 }
10015 
10016 static struct notifier_block trace_module_nb = {
10017 	.notifier_call = trace_module_notify,
10018 	.priority = 0,
10019 };
10020 #endif /* CONFIG_MODULES */
10021 
10022 static __init void tracer_init_tracefs_work_func(struct work_struct *work)
10023 {
10024 
10025 	event_trace_init();
10026 
10027 	init_tracer_tracefs(&global_trace, NULL);
10028 	ftrace_init_tracefs_toplevel(&global_trace, NULL);
10029 
10030 	trace_create_file("tracing_thresh", TRACE_MODE_WRITE, NULL,
10031 			&global_trace, &tracing_thresh_fops);
10032 
10033 	trace_create_file("README", TRACE_MODE_READ, NULL,
10034 			NULL, &tracing_readme_fops);
10035 
10036 	trace_create_file("saved_cmdlines", TRACE_MODE_READ, NULL,
10037 			NULL, &tracing_saved_cmdlines_fops);
10038 
10039 	trace_create_file("saved_cmdlines_size", TRACE_MODE_WRITE, NULL,
10040 			  NULL, &tracing_saved_cmdlines_size_fops);
10041 
10042 	trace_create_file("saved_tgids", TRACE_MODE_READ, NULL,
10043 			NULL, &tracing_saved_tgids_fops);
10044 
10045 	trace_create_eval_file(NULL);
10046 
10047 #ifdef CONFIG_MODULES
10048 	register_module_notifier(&trace_module_nb);
10049 #endif
10050 
10051 #ifdef CONFIG_DYNAMIC_FTRACE
10052 	trace_create_file("dyn_ftrace_total_info", TRACE_MODE_READ, NULL,
10053 			NULL, &tracing_dyn_info_fops);
10054 #endif
10055 
10056 	create_trace_instances(NULL);
10057 
10058 	update_tracer_options(&global_trace);
10059 }
10060 
10061 static __init int tracer_init_tracefs(void)
10062 {
10063 	int ret;
10064 
10065 	trace_access_lock_init();
10066 
10067 	ret = tracing_init_dentry();
10068 	if (ret)
10069 		return 0;
10070 
10071 	if (eval_map_wq) {
10072 		INIT_WORK(&tracerfs_init_work, tracer_init_tracefs_work_func);
10073 		queue_work(eval_map_wq, &tracerfs_init_work);
10074 	} else {
10075 		tracer_init_tracefs_work_func(NULL);
10076 	}
10077 
10078 	rv_init_interface();
10079 
10080 	return 0;
10081 }
10082 
10083 fs_initcall(tracer_init_tracefs);
10084 
10085 static int trace_die_panic_handler(struct notifier_block *self,
10086 				unsigned long ev, void *unused);
10087 
10088 static struct notifier_block trace_panic_notifier = {
10089 	.notifier_call = trace_die_panic_handler,
10090 	.priority = INT_MAX - 1,
10091 };
10092 
10093 static struct notifier_block trace_die_notifier = {
10094 	.notifier_call = trace_die_panic_handler,
10095 	.priority = INT_MAX - 1,
10096 };
10097 
10098 /*
10099  * The idea is to execute the following die/panic callback early, in order
10100  * to avoid showing irrelevant information in the trace (like other panic
10101  * notifier functions); we are the 2nd to run, after hung_task/rcu_stall
10102  * warnings get disabled (to prevent potential log flooding).
10103  */
10104 static int trace_die_panic_handler(struct notifier_block *self,
10105 				unsigned long ev, void *unused)
10106 {
10107 	if (!ftrace_dump_on_oops_enabled())
10108 		return NOTIFY_DONE;
10109 
10110 	/* The die notifier requires DIE_OOPS to trigger */
10111 	if (self == &trace_die_notifier && ev != DIE_OOPS)
10112 		return NOTIFY_DONE;
10113 
10114 	ftrace_dump(DUMP_PARAM);
10115 
10116 	return NOTIFY_DONE;
10117 }
10118 
10119 /*
10120  * printk is set to max of 1024, we really don't need it that big.
10121  * Nothing should be printing 1000 characters anyway.
10122  */
10123 #define TRACE_MAX_PRINT		1000
10124 
10125 /*
10126  * Define here KERN_TRACE so that we have one place to modify
10127  * it if we decide to change what log level the ftrace dump
10128  * should be at.
10129  */
10130 #define KERN_TRACE		KERN_EMERG
10131 
10132 void
10133 trace_printk_seq(struct trace_seq *s)
10134 {
10135 	/* Probably should print a warning here. */
10136 	if (s->seq.len >= TRACE_MAX_PRINT)
10137 		s->seq.len = TRACE_MAX_PRINT;
10138 
10139 	/*
10140 	 * More paranoid code. Although the buffer size is set to
10141 	 * PAGE_SIZE, and TRACE_MAX_PRINT is 1000, this is just
10142 	 * an extra layer of protection.
10143 	 */
10144 	if (WARN_ON_ONCE(s->seq.len >= s->seq.size))
10145 		s->seq.len = s->seq.size - 1;
10146 
10147 	/* should be zero ended, but we are paranoid. */
10148 	s->buffer[s->seq.len] = 0;
10149 
10150 	printk(KERN_TRACE "%s", s->buffer);
10151 
10152 	trace_seq_init(s);
10153 }
10154 
10155 static void trace_init_iter(struct trace_iterator *iter, struct trace_array *tr)
10156 {
10157 	iter->tr = tr;
10158 	iter->trace = iter->tr->current_trace;
10159 	iter->cpu_file = RING_BUFFER_ALL_CPUS;
10160 	iter->array_buffer = &tr->array_buffer;
10161 
10162 	if (iter->trace && iter->trace->open)
10163 		iter->trace->open(iter);
10164 
10165 	/* Annotate start of buffers if we had overruns */
10166 	if (ring_buffer_overruns(iter->array_buffer->buffer))
10167 		iter->iter_flags |= TRACE_FILE_ANNOTATE;
10168 
10169 	/* Output in nanoseconds only if we are using a clock in nanoseconds. */
10170 	if (trace_clocks[iter->tr->clock_id].in_ns)
10171 		iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
10172 
10173 	/* Can not use kmalloc for iter.temp and iter.fmt */
10174 	iter->temp = static_temp_buf;
10175 	iter->temp_size = STATIC_TEMP_BUF_SIZE;
10176 	iter->fmt = static_fmt_buf;
10177 	iter->fmt_size = STATIC_FMT_BUF_SIZE;
10178 }
10179 
10180 void trace_init_global_iter(struct trace_iterator *iter)
10181 {
10182 	trace_init_iter(iter, &global_trace);
10183 }
10184 
10185 static void ftrace_dump_one(struct trace_array *tr, enum ftrace_dump_mode dump_mode)
10186 {
10187 	/* use static because iter can be a bit big for the stack */
10188 	static struct trace_iterator iter;
10189 	unsigned int old_userobj;
10190 	unsigned long flags;
10191 	int cnt = 0, cpu;
10192 
10193 	/*
10194 	 * Always turn off tracing when we dump.
10195 	 * We don't need to show trace output of what happens
10196 	 * between multiple crashes.
10197 	 *
10198 	 * If the user does a sysrq-z, then they can re-enable
10199 	 * tracing with echo 1 > tracing_on.
10200 	 */
10201 	tracer_tracing_off(tr);
10202 
10203 	local_irq_save(flags);
10204 
10205 	/* Simulate the iterator */
10206 	trace_init_iter(&iter, tr);
10207 
10208 	for_each_tracing_cpu(cpu) {
10209 		atomic_inc(&per_cpu_ptr(iter.array_buffer->data, cpu)->disabled);
10210 	}
10211 
10212 	old_userobj = tr->trace_flags & TRACE_ITER_SYM_USEROBJ;
10213 
10214 	/* don't look at user memory in panic mode */
10215 	tr->trace_flags &= ~TRACE_ITER_SYM_USEROBJ;
10216 
10217 	if (dump_mode == DUMP_ORIG)
10218 		iter.cpu_file = raw_smp_processor_id();
10219 	else
10220 		iter.cpu_file = RING_BUFFER_ALL_CPUS;
10221 
10222 	if (tr == &global_trace)
10223 		printk(KERN_TRACE "Dumping ftrace buffer:\n");
10224 	else
10225 		printk(KERN_TRACE "Dumping ftrace instance %s buffer:\n", tr->name);
10226 
10227 	/* Did function tracer already get disabled? */
10228 	if (ftrace_is_dead()) {
10229 		printk("# WARNING: FUNCTION TRACING IS CORRUPTED\n");
10230 		printk("#          MAY BE MISSING FUNCTION EVENTS\n");
10231 	}
10232 
10233 	/*
10234 	 * We need to stop all tracing on all CPUS to read
10235 	 * the next buffer. This is a bit expensive, but is
10236 	 * not done often. We fill all what we can read,
10237 	 * and then release the locks again.
10238 	 */
10239 
10240 	while (!trace_empty(&iter)) {
10241 
10242 		if (!cnt)
10243 			printk(KERN_TRACE "---------------------------------\n");
10244 
10245 		cnt++;
10246 
10247 		trace_iterator_reset(&iter);
10248 		iter.iter_flags |= TRACE_FILE_LAT_FMT;
10249 
10250 		if (trace_find_next_entry_inc(&iter) != NULL) {
10251 			int ret;
10252 
10253 			ret = print_trace_line(&iter);
10254 			if (ret != TRACE_TYPE_NO_CONSUME)
10255 				trace_consume(&iter);
10256 		}
10257 		touch_nmi_watchdog();
10258 
10259 		trace_printk_seq(&iter.seq);
10260 	}
10261 
10262 	if (!cnt)
10263 		printk(KERN_TRACE "   (ftrace buffer empty)\n");
10264 	else
10265 		printk(KERN_TRACE "---------------------------------\n");
10266 
10267 	tr->trace_flags |= old_userobj;
10268 
10269 	for_each_tracing_cpu(cpu) {
10270 		atomic_dec(&per_cpu_ptr(iter.array_buffer->data, cpu)->disabled);
10271 	}
10272 	local_irq_restore(flags);
10273 }
10274 
10275 static void ftrace_dump_by_param(void)
10276 {
10277 	bool first_param = true;
10278 	char dump_param[MAX_TRACER_SIZE];
10279 	char *buf, *token, *inst_name;
10280 	struct trace_array *tr;
10281 
10282 	strscpy(dump_param, ftrace_dump_on_oops, MAX_TRACER_SIZE);
10283 	buf = dump_param;
10284 
10285 	while ((token = strsep(&buf, ",")) != NULL) {
10286 		if (first_param) {
10287 			first_param = false;
10288 			if (!strcmp("0", token))
10289 				continue;
10290 			else if (!strcmp("1", token)) {
10291 				ftrace_dump_one(&global_trace, DUMP_ALL);
10292 				continue;
10293 			}
10294 			else if (!strcmp("2", token) ||
10295 			  !strcmp("orig_cpu", token)) {
10296 				ftrace_dump_one(&global_trace, DUMP_ORIG);
10297 				continue;
10298 			}
10299 		}
10300 
10301 		inst_name = strsep(&token, "=");
10302 		tr = trace_array_find(inst_name);
10303 		if (!tr) {
10304 			printk(KERN_TRACE "Instance %s not found\n", inst_name);
10305 			continue;
10306 		}
10307 
10308 		if (token && (!strcmp("2", token) ||
10309 			  !strcmp("orig_cpu", token)))
10310 			ftrace_dump_one(tr, DUMP_ORIG);
10311 		else
10312 			ftrace_dump_one(tr, DUMP_ALL);
10313 	}
10314 }
10315 
10316 void ftrace_dump(enum ftrace_dump_mode oops_dump_mode)
10317 {
10318 	static atomic_t dump_running;
10319 
10320 	/* Only allow one dump user at a time. */
10321 	if (atomic_inc_return(&dump_running) != 1) {
10322 		atomic_dec(&dump_running);
10323 		return;
10324 	}
10325 
10326 	switch (oops_dump_mode) {
10327 	case DUMP_ALL:
10328 		ftrace_dump_one(&global_trace, DUMP_ALL);
10329 		break;
10330 	case DUMP_ORIG:
10331 		ftrace_dump_one(&global_trace, DUMP_ORIG);
10332 		break;
10333 	case DUMP_PARAM:
10334 		ftrace_dump_by_param();
10335 		break;
10336 	case DUMP_NONE:
10337 		break;
10338 	default:
10339 		printk(KERN_TRACE "Bad dumping mode, switching to all CPUs dump\n");
10340 		ftrace_dump_one(&global_trace, DUMP_ALL);
10341 	}
10342 
10343 	atomic_dec(&dump_running);
10344 }
10345 EXPORT_SYMBOL_GPL(ftrace_dump);
10346 
10347 #define WRITE_BUFSIZE  4096
10348 
10349 ssize_t trace_parse_run_command(struct file *file, const char __user *buffer,
10350 				size_t count, loff_t *ppos,
10351 				int (*createfn)(const char *))
10352 {
10353 	char *kbuf, *buf, *tmp;
10354 	int ret = 0;
10355 	size_t done = 0;
10356 	size_t size;
10357 
10358 	kbuf = kmalloc(WRITE_BUFSIZE, GFP_KERNEL);
10359 	if (!kbuf)
10360 		return -ENOMEM;
10361 
10362 	while (done < count) {
10363 		size = count - done;
10364 
10365 		if (size >= WRITE_BUFSIZE)
10366 			size = WRITE_BUFSIZE - 1;
10367 
10368 		if (copy_from_user(kbuf, buffer + done, size)) {
10369 			ret = -EFAULT;
10370 			goto out;
10371 		}
10372 		kbuf[size] = '\0';
10373 		buf = kbuf;
10374 		do {
10375 			tmp = strchr(buf, '\n');
10376 			if (tmp) {
10377 				*tmp = '\0';
10378 				size = tmp - buf + 1;
10379 			} else {
10380 				size = strlen(buf);
10381 				if (done + size < count) {
10382 					if (buf != kbuf)
10383 						break;
10384 					/* This can accept WRITE_BUFSIZE - 2 ('\n' + '\0') */
10385 					pr_warn("Line length is too long: Should be less than %d\n",
10386 						WRITE_BUFSIZE - 2);
10387 					ret = -EINVAL;
10388 					goto out;
10389 				}
10390 			}
10391 			done += size;
10392 
10393 			/* Remove comments */
10394 			tmp = strchr(buf, '#');
10395 
10396 			if (tmp)
10397 				*tmp = '\0';
10398 
10399 			ret = createfn(buf);
10400 			if (ret)
10401 				goto out;
10402 			buf += size;
10403 
10404 		} while (done < count);
10405 	}
10406 	ret = done;
10407 
10408 out:
10409 	kfree(kbuf);
10410 
10411 	return ret;
10412 }
10413 
10414 #ifdef CONFIG_TRACER_MAX_TRACE
10415 __init static bool tr_needs_alloc_snapshot(const char *name)
10416 {
10417 	char *test;
10418 	int len = strlen(name);
10419 	bool ret;
10420 
10421 	if (!boot_snapshot_index)
10422 		return false;
10423 
10424 	if (strncmp(name, boot_snapshot_info, len) == 0 &&
10425 	    boot_snapshot_info[len] == '\t')
10426 		return true;
10427 
10428 	test = kmalloc(strlen(name) + 3, GFP_KERNEL);
10429 	if (!test)
10430 		return false;
10431 
10432 	sprintf(test, "\t%s\t", name);
10433 	ret = strstr(boot_snapshot_info, test) == NULL;
10434 	kfree(test);
10435 	return ret;
10436 }
10437 
10438 __init static void do_allocate_snapshot(const char *name)
10439 {
10440 	if (!tr_needs_alloc_snapshot(name))
10441 		return;
10442 
10443 	/*
10444 	 * When allocate_snapshot is set, the next call to
10445 	 * allocate_trace_buffers() (called by trace_array_get_by_name())
10446 	 * will allocate the snapshot buffer. That will alse clear
10447 	 * this flag.
10448 	 */
10449 	allocate_snapshot = true;
10450 }
10451 #else
10452 static inline void do_allocate_snapshot(const char *name) { }
10453 #endif
10454 
10455 __init static void enable_instances(void)
10456 {
10457 	struct trace_array *tr;
10458 	char *curr_str;
10459 	char *name;
10460 	char *str;
10461 	char *tok;
10462 
10463 	/* A tab is always appended */
10464 	boot_instance_info[boot_instance_index - 1] = '\0';
10465 	str = boot_instance_info;
10466 
10467 	while ((curr_str = strsep(&str, "\t"))) {
10468 		phys_addr_t start = 0;
10469 		phys_addr_t size = 0;
10470 		unsigned long addr = 0;
10471 
10472 		tok = strsep(&curr_str, ",");
10473 		name = strsep(&tok, "@");
10474 
10475 		if (tok && isdigit(*tok)) {
10476 			start = memparse(tok, &tok);
10477 			if (!start) {
10478 				pr_warn("Tracing: Invalid boot instance address for %s\n",
10479 					name);
10480 				continue;
10481 			}
10482 			if (*tok != ':') {
10483 				pr_warn("Tracing: No size specified for instance %s\n", name);
10484 				continue;
10485 			}
10486 			tok++;
10487 			size = memparse(tok, &tok);
10488 			if (!size) {
10489 				pr_warn("Tracing: Invalid boot instance size for %s\n",
10490 					name);
10491 				continue;
10492 			}
10493 		} else if (tok) {
10494 			if (!reserve_mem_find_by_name(tok, &start, &size)) {
10495 				start = 0;
10496 				pr_warn("Failed to map boot instance %s to %s\n", name, tok);
10497 				continue;
10498 			}
10499 		}
10500 
10501 		if (start) {
10502 			addr = map_pages(start, size);
10503 			if (addr) {
10504 				pr_info("Tracing: mapped boot instance %s at physical memory %pa of size 0x%lx\n",
10505 					name, &start, (unsigned long)size);
10506 			} else {
10507 				pr_warn("Tracing: Failed to map boot instance %s\n", name);
10508 				continue;
10509 			}
10510 		} else {
10511 			/* Only non mapped buffers have snapshot buffers */
10512 			if (IS_ENABLED(CONFIG_TRACER_MAX_TRACE))
10513 				do_allocate_snapshot(name);
10514 		}
10515 
10516 		tr = trace_array_create_systems(name, NULL, addr, size);
10517 		if (IS_ERR(tr)) {
10518 			pr_warn("Tracing: Failed to create instance buffer %s\n", curr_str);
10519 			continue;
10520 		}
10521 
10522 		/* Only allow non mapped buffers to be deleted */
10523 		if (!start)
10524 			trace_array_put(tr);
10525 
10526 		while ((tok = strsep(&curr_str, ","))) {
10527 			early_enable_events(tr, tok, true);
10528 		}
10529 	}
10530 }
10531 
10532 __init static int tracer_alloc_buffers(void)
10533 {
10534 	int ring_buf_size;
10535 	int ret = -ENOMEM;
10536 
10537 
10538 	if (security_locked_down(LOCKDOWN_TRACEFS)) {
10539 		pr_warn("Tracing disabled due to lockdown\n");
10540 		return -EPERM;
10541 	}
10542 
10543 	/*
10544 	 * Make sure we don't accidentally add more trace options
10545 	 * than we have bits for.
10546 	 */
10547 	BUILD_BUG_ON(TRACE_ITER_LAST_BIT > TRACE_FLAGS_MAX_SIZE);
10548 
10549 	if (!alloc_cpumask_var(&tracing_buffer_mask, GFP_KERNEL))
10550 		goto out;
10551 
10552 	if (!alloc_cpumask_var(&global_trace.tracing_cpumask, GFP_KERNEL))
10553 		goto out_free_buffer_mask;
10554 
10555 	/* Only allocate trace_printk buffers if a trace_printk exists */
10556 	if (&__stop___trace_bprintk_fmt != &__start___trace_bprintk_fmt)
10557 		/* Must be called before global_trace.buffer is allocated */
10558 		trace_printk_init_buffers();
10559 
10560 	/* To save memory, keep the ring buffer size to its minimum */
10561 	if (global_trace.ring_buffer_expanded)
10562 		ring_buf_size = trace_buf_size;
10563 	else
10564 		ring_buf_size = 1;
10565 
10566 	cpumask_copy(tracing_buffer_mask, cpu_possible_mask);
10567 	cpumask_copy(global_trace.tracing_cpumask, cpu_all_mask);
10568 
10569 	raw_spin_lock_init(&global_trace.start_lock);
10570 
10571 	/*
10572 	 * The prepare callbacks allocates some memory for the ring buffer. We
10573 	 * don't free the buffer if the CPU goes down. If we were to free
10574 	 * the buffer, then the user would lose any trace that was in the
10575 	 * buffer. The memory will be removed once the "instance" is removed.
10576 	 */
10577 	ret = cpuhp_setup_state_multi(CPUHP_TRACE_RB_PREPARE,
10578 				      "trace/RB:prepare", trace_rb_cpu_prepare,
10579 				      NULL);
10580 	if (ret < 0)
10581 		goto out_free_cpumask;
10582 	/* Used for event triggers */
10583 	ret = -ENOMEM;
10584 	temp_buffer = ring_buffer_alloc(PAGE_SIZE, RB_FL_OVERWRITE);
10585 	if (!temp_buffer)
10586 		goto out_rm_hp_state;
10587 
10588 	if (trace_create_savedcmd() < 0)
10589 		goto out_free_temp_buffer;
10590 
10591 	if (!zalloc_cpumask_var(&global_trace.pipe_cpumask, GFP_KERNEL))
10592 		goto out_free_savedcmd;
10593 
10594 	/* TODO: make the number of buffers hot pluggable with CPUS */
10595 	if (allocate_trace_buffers(&global_trace, ring_buf_size) < 0) {
10596 		MEM_FAIL(1, "tracer: failed to allocate ring buffer!\n");
10597 		goto out_free_pipe_cpumask;
10598 	}
10599 	if (global_trace.buffer_disabled)
10600 		tracing_off();
10601 
10602 	if (trace_boot_clock) {
10603 		ret = tracing_set_clock(&global_trace, trace_boot_clock);
10604 		if (ret < 0)
10605 			pr_warn("Trace clock %s not defined, going back to default\n",
10606 				trace_boot_clock);
10607 	}
10608 
10609 	/*
10610 	 * register_tracer() might reference current_trace, so it
10611 	 * needs to be set before we register anything. This is
10612 	 * just a bootstrap of current_trace anyway.
10613 	 */
10614 	global_trace.current_trace = &nop_trace;
10615 
10616 	global_trace.max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
10617 #ifdef CONFIG_TRACER_MAX_TRACE
10618 	spin_lock_init(&global_trace.snapshot_trigger_lock);
10619 #endif
10620 	ftrace_init_global_array_ops(&global_trace);
10621 
10622 	init_trace_flags_index(&global_trace);
10623 
10624 	register_tracer(&nop_trace);
10625 
10626 	/* Function tracing may start here (via kernel command line) */
10627 	init_function_trace();
10628 
10629 	/* All seems OK, enable tracing */
10630 	tracing_disabled = 0;
10631 
10632 	atomic_notifier_chain_register(&panic_notifier_list,
10633 				       &trace_panic_notifier);
10634 
10635 	register_die_notifier(&trace_die_notifier);
10636 
10637 	global_trace.flags = TRACE_ARRAY_FL_GLOBAL;
10638 
10639 	INIT_LIST_HEAD(&global_trace.systems);
10640 	INIT_LIST_HEAD(&global_trace.events);
10641 	INIT_LIST_HEAD(&global_trace.hist_vars);
10642 	INIT_LIST_HEAD(&global_trace.err_log);
10643 	list_add(&global_trace.list, &ftrace_trace_arrays);
10644 
10645 	apply_trace_boot_options();
10646 
10647 	register_snapshot_cmd();
10648 
10649 	test_can_verify();
10650 
10651 	return 0;
10652 
10653 out_free_pipe_cpumask:
10654 	free_cpumask_var(global_trace.pipe_cpumask);
10655 out_free_savedcmd:
10656 	trace_free_saved_cmdlines_buffer();
10657 out_free_temp_buffer:
10658 	ring_buffer_free(temp_buffer);
10659 out_rm_hp_state:
10660 	cpuhp_remove_multi_state(CPUHP_TRACE_RB_PREPARE);
10661 out_free_cpumask:
10662 	free_cpumask_var(global_trace.tracing_cpumask);
10663 out_free_buffer_mask:
10664 	free_cpumask_var(tracing_buffer_mask);
10665 out:
10666 	return ret;
10667 }
10668 
10669 void __init ftrace_boot_snapshot(void)
10670 {
10671 #ifdef CONFIG_TRACER_MAX_TRACE
10672 	struct trace_array *tr;
10673 
10674 	if (!snapshot_at_boot)
10675 		return;
10676 
10677 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
10678 		if (!tr->allocated_snapshot)
10679 			continue;
10680 
10681 		tracing_snapshot_instance(tr);
10682 		trace_array_puts(tr, "** Boot snapshot taken **\n");
10683 	}
10684 #endif
10685 }
10686 
10687 void __init early_trace_init(void)
10688 {
10689 	if (tracepoint_printk) {
10690 		tracepoint_print_iter =
10691 			kzalloc(sizeof(*tracepoint_print_iter), GFP_KERNEL);
10692 		if (MEM_FAIL(!tracepoint_print_iter,
10693 			     "Failed to allocate trace iterator\n"))
10694 			tracepoint_printk = 0;
10695 		else
10696 			static_key_enable(&tracepoint_printk_key.key);
10697 	}
10698 	tracer_alloc_buffers();
10699 
10700 	init_events();
10701 }
10702 
10703 void __init trace_init(void)
10704 {
10705 	trace_event_init();
10706 
10707 	if (boot_instance_index)
10708 		enable_instances();
10709 }
10710 
10711 __init static void clear_boot_tracer(void)
10712 {
10713 	/*
10714 	 * The default tracer at boot buffer is an init section.
10715 	 * This function is called in lateinit. If we did not
10716 	 * find the boot tracer, then clear it out, to prevent
10717 	 * later registration from accessing the buffer that is
10718 	 * about to be freed.
10719 	 */
10720 	if (!default_bootup_tracer)
10721 		return;
10722 
10723 	printk(KERN_INFO "ftrace bootup tracer '%s' not registered.\n",
10724 	       default_bootup_tracer);
10725 	default_bootup_tracer = NULL;
10726 }
10727 
10728 #ifdef CONFIG_HAVE_UNSTABLE_SCHED_CLOCK
10729 __init static void tracing_set_default_clock(void)
10730 {
10731 	/* sched_clock_stable() is determined in late_initcall */
10732 	if (!trace_boot_clock && !sched_clock_stable()) {
10733 		if (security_locked_down(LOCKDOWN_TRACEFS)) {
10734 			pr_warn("Can not set tracing clock due to lockdown\n");
10735 			return;
10736 		}
10737 
10738 		printk(KERN_WARNING
10739 		       "Unstable clock detected, switching default tracing clock to \"global\"\n"
10740 		       "If you want to keep using the local clock, then add:\n"
10741 		       "  \"trace_clock=local\"\n"
10742 		       "on the kernel command line\n");
10743 		tracing_set_clock(&global_trace, "global");
10744 	}
10745 }
10746 #else
10747 static inline void tracing_set_default_clock(void) { }
10748 #endif
10749 
10750 __init static int late_trace_init(void)
10751 {
10752 	if (tracepoint_printk && tracepoint_printk_stop_on_boot) {
10753 		static_key_disable(&tracepoint_printk_key.key);
10754 		tracepoint_printk = 0;
10755 	}
10756 
10757 	tracing_set_default_clock();
10758 	clear_boot_tracer();
10759 	return 0;
10760 }
10761 
10762 late_initcall_sync(late_trace_init);
10763