xref: /linux/kernel/trace/trace.c (revision a48b0872e69428d3d02994dcfad3519f01def7fa)
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * ring buffer based function tracer
4  *
5  * Copyright (C) 2007-2012 Steven Rostedt <srostedt@redhat.com>
6  * Copyright (C) 2008 Ingo Molnar <mingo@redhat.com>
7  *
8  * Originally taken from the RT patch by:
9  *    Arnaldo Carvalho de Melo <acme@redhat.com>
10  *
11  * Based on code from the latency_tracer, that is:
12  *  Copyright (C) 2004-2006 Ingo Molnar
13  *  Copyright (C) 2004 Nadia Yvette Chambers
14  */
15 #include <linux/ring_buffer.h>
16 #include <generated/utsrelease.h>
17 #include <linux/stacktrace.h>
18 #include <linux/writeback.h>
19 #include <linux/kallsyms.h>
20 #include <linux/security.h>
21 #include <linux/seq_file.h>
22 #include <linux/notifier.h>
23 #include <linux/irqflags.h>
24 #include <linux/debugfs.h>
25 #include <linux/tracefs.h>
26 #include <linux/pagemap.h>
27 #include <linux/hardirq.h>
28 #include <linux/linkage.h>
29 #include <linux/uaccess.h>
30 #include <linux/vmalloc.h>
31 #include <linux/ftrace.h>
32 #include <linux/module.h>
33 #include <linux/percpu.h>
34 #include <linux/splice.h>
35 #include <linux/kdebug.h>
36 #include <linux/string.h>
37 #include <linux/mount.h>
38 #include <linux/rwsem.h>
39 #include <linux/slab.h>
40 #include <linux/ctype.h>
41 #include <linux/init.h>
42 #include <linux/poll.h>
43 #include <linux/nmi.h>
44 #include <linux/fs.h>
45 #include <linux/trace.h>
46 #include <linux/sched/clock.h>
47 #include <linux/sched/rt.h>
48 #include <linux/fsnotify.h>
49 #include <linux/irq_work.h>
50 #include <linux/workqueue.h>
51 
52 #include "trace.h"
53 #include "trace_output.h"
54 
55 /*
56  * On boot up, the ring buffer is set to the minimum size, so that
57  * we do not waste memory on systems that are not using tracing.
58  */
59 bool ring_buffer_expanded;
60 
61 /*
62  * We need to change this state when a selftest is running.
63  * A selftest will lurk into the ring-buffer to count the
64  * entries inserted during the selftest although some concurrent
65  * insertions into the ring-buffer such as trace_printk could occurred
66  * at the same time, giving false positive or negative results.
67  */
68 static bool __read_mostly tracing_selftest_running;
69 
70 /*
71  * If boot-time tracing including tracers/events via kernel cmdline
72  * is running, we do not want to run SELFTEST.
73  */
74 bool __read_mostly tracing_selftest_disabled;
75 
76 #ifdef CONFIG_FTRACE_STARTUP_TEST
77 void __init disable_tracing_selftest(const char *reason)
78 {
79 	if (!tracing_selftest_disabled) {
80 		tracing_selftest_disabled = true;
81 		pr_info("Ftrace startup test is disabled due to %s\n", reason);
82 	}
83 }
84 #endif
85 
86 /* Pipe tracepoints to printk */
87 struct trace_iterator *tracepoint_print_iter;
88 int tracepoint_printk;
89 static DEFINE_STATIC_KEY_FALSE(tracepoint_printk_key);
90 
91 /* For tracers that don't implement custom flags */
92 static struct tracer_opt dummy_tracer_opt[] = {
93 	{ }
94 };
95 
96 static int
97 dummy_set_flag(struct trace_array *tr, u32 old_flags, u32 bit, int set)
98 {
99 	return 0;
100 }
101 
102 /*
103  * To prevent the comm cache from being overwritten when no
104  * tracing is active, only save the comm when a trace event
105  * occurred.
106  */
107 static DEFINE_PER_CPU(bool, trace_taskinfo_save);
108 
109 /*
110  * Kill all tracing for good (never come back).
111  * It is initialized to 1 but will turn to zero if the initialization
112  * of the tracer is successful. But that is the only place that sets
113  * this back to zero.
114  */
115 static int tracing_disabled = 1;
116 
117 cpumask_var_t __read_mostly	tracing_buffer_mask;
118 
119 /*
120  * ftrace_dump_on_oops - variable to dump ftrace buffer on oops
121  *
122  * If there is an oops (or kernel panic) and the ftrace_dump_on_oops
123  * is set, then ftrace_dump is called. This will output the contents
124  * of the ftrace buffers to the console.  This is very useful for
125  * capturing traces that lead to crashes and outputing it to a
126  * serial console.
127  *
128  * It is default off, but you can enable it with either specifying
129  * "ftrace_dump_on_oops" in the kernel command line, or setting
130  * /proc/sys/kernel/ftrace_dump_on_oops
131  * Set 1 if you want to dump buffers of all CPUs
132  * Set 2 if you want to dump the buffer of the CPU that triggered oops
133  */
134 
135 enum ftrace_dump_mode ftrace_dump_on_oops;
136 
137 /* When set, tracing will stop when a WARN*() is hit */
138 int __disable_trace_on_warning;
139 
140 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
141 /* Map of enums to their values, for "eval_map" file */
142 struct trace_eval_map_head {
143 	struct module			*mod;
144 	unsigned long			length;
145 };
146 
147 union trace_eval_map_item;
148 
149 struct trace_eval_map_tail {
150 	/*
151 	 * "end" is first and points to NULL as it must be different
152 	 * than "mod" or "eval_string"
153 	 */
154 	union trace_eval_map_item	*next;
155 	const char			*end;	/* points to NULL */
156 };
157 
158 static DEFINE_MUTEX(trace_eval_mutex);
159 
160 /*
161  * The trace_eval_maps are saved in an array with two extra elements,
162  * one at the beginning, and one at the end. The beginning item contains
163  * the count of the saved maps (head.length), and the module they
164  * belong to if not built in (head.mod). The ending item contains a
165  * pointer to the next array of saved eval_map items.
166  */
167 union trace_eval_map_item {
168 	struct trace_eval_map		map;
169 	struct trace_eval_map_head	head;
170 	struct trace_eval_map_tail	tail;
171 };
172 
173 static union trace_eval_map_item *trace_eval_maps;
174 #endif /* CONFIG_TRACE_EVAL_MAP_FILE */
175 
176 int tracing_set_tracer(struct trace_array *tr, const char *buf);
177 static void ftrace_trace_userstack(struct trace_array *tr,
178 				   struct trace_buffer *buffer,
179 				   unsigned int trace_ctx);
180 
181 #define MAX_TRACER_SIZE		100
182 static char bootup_tracer_buf[MAX_TRACER_SIZE] __initdata;
183 static char *default_bootup_tracer;
184 
185 static bool allocate_snapshot;
186 
187 static int __init set_cmdline_ftrace(char *str)
188 {
189 	strlcpy(bootup_tracer_buf, str, MAX_TRACER_SIZE);
190 	default_bootup_tracer = bootup_tracer_buf;
191 	/* We are using ftrace early, expand it */
192 	ring_buffer_expanded = true;
193 	return 1;
194 }
195 __setup("ftrace=", set_cmdline_ftrace);
196 
197 static int __init set_ftrace_dump_on_oops(char *str)
198 {
199 	if (*str++ != '=' || !*str) {
200 		ftrace_dump_on_oops = DUMP_ALL;
201 		return 1;
202 	}
203 
204 	if (!strcmp("orig_cpu", str)) {
205 		ftrace_dump_on_oops = DUMP_ORIG;
206                 return 1;
207         }
208 
209         return 0;
210 }
211 __setup("ftrace_dump_on_oops", set_ftrace_dump_on_oops);
212 
213 static int __init stop_trace_on_warning(char *str)
214 {
215 	if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
216 		__disable_trace_on_warning = 1;
217 	return 1;
218 }
219 __setup("traceoff_on_warning", stop_trace_on_warning);
220 
221 static int __init boot_alloc_snapshot(char *str)
222 {
223 	allocate_snapshot = true;
224 	/* We also need the main ring buffer expanded */
225 	ring_buffer_expanded = true;
226 	return 1;
227 }
228 __setup("alloc_snapshot", boot_alloc_snapshot);
229 
230 
231 static char trace_boot_options_buf[MAX_TRACER_SIZE] __initdata;
232 
233 static int __init set_trace_boot_options(char *str)
234 {
235 	strlcpy(trace_boot_options_buf, str, MAX_TRACER_SIZE);
236 	return 0;
237 }
238 __setup("trace_options=", set_trace_boot_options);
239 
240 static char trace_boot_clock_buf[MAX_TRACER_SIZE] __initdata;
241 static char *trace_boot_clock __initdata;
242 
243 static int __init set_trace_boot_clock(char *str)
244 {
245 	strlcpy(trace_boot_clock_buf, str, MAX_TRACER_SIZE);
246 	trace_boot_clock = trace_boot_clock_buf;
247 	return 0;
248 }
249 __setup("trace_clock=", set_trace_boot_clock);
250 
251 static int __init set_tracepoint_printk(char *str)
252 {
253 	if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
254 		tracepoint_printk = 1;
255 	return 1;
256 }
257 __setup("tp_printk", set_tracepoint_printk);
258 
259 unsigned long long ns2usecs(u64 nsec)
260 {
261 	nsec += 500;
262 	do_div(nsec, 1000);
263 	return nsec;
264 }
265 
266 static void
267 trace_process_export(struct trace_export *export,
268 	       struct ring_buffer_event *event, int flag)
269 {
270 	struct trace_entry *entry;
271 	unsigned int size = 0;
272 
273 	if (export->flags & flag) {
274 		entry = ring_buffer_event_data(event);
275 		size = ring_buffer_event_length(event);
276 		export->write(export, entry, size);
277 	}
278 }
279 
280 static DEFINE_MUTEX(ftrace_export_lock);
281 
282 static struct trace_export __rcu *ftrace_exports_list __read_mostly;
283 
284 static DEFINE_STATIC_KEY_FALSE(trace_function_exports_enabled);
285 static DEFINE_STATIC_KEY_FALSE(trace_event_exports_enabled);
286 static DEFINE_STATIC_KEY_FALSE(trace_marker_exports_enabled);
287 
288 static inline void ftrace_exports_enable(struct trace_export *export)
289 {
290 	if (export->flags & TRACE_EXPORT_FUNCTION)
291 		static_branch_inc(&trace_function_exports_enabled);
292 
293 	if (export->flags & TRACE_EXPORT_EVENT)
294 		static_branch_inc(&trace_event_exports_enabled);
295 
296 	if (export->flags & TRACE_EXPORT_MARKER)
297 		static_branch_inc(&trace_marker_exports_enabled);
298 }
299 
300 static inline void ftrace_exports_disable(struct trace_export *export)
301 {
302 	if (export->flags & TRACE_EXPORT_FUNCTION)
303 		static_branch_dec(&trace_function_exports_enabled);
304 
305 	if (export->flags & TRACE_EXPORT_EVENT)
306 		static_branch_dec(&trace_event_exports_enabled);
307 
308 	if (export->flags & TRACE_EXPORT_MARKER)
309 		static_branch_dec(&trace_marker_exports_enabled);
310 }
311 
312 static void ftrace_exports(struct ring_buffer_event *event, int flag)
313 {
314 	struct trace_export *export;
315 
316 	preempt_disable_notrace();
317 
318 	export = rcu_dereference_raw_check(ftrace_exports_list);
319 	while (export) {
320 		trace_process_export(export, event, flag);
321 		export = rcu_dereference_raw_check(export->next);
322 	}
323 
324 	preempt_enable_notrace();
325 }
326 
327 static inline void
328 add_trace_export(struct trace_export **list, struct trace_export *export)
329 {
330 	rcu_assign_pointer(export->next, *list);
331 	/*
332 	 * We are entering export into the list but another
333 	 * CPU might be walking that list. We need to make sure
334 	 * the export->next pointer is valid before another CPU sees
335 	 * the export pointer included into the list.
336 	 */
337 	rcu_assign_pointer(*list, export);
338 }
339 
340 static inline int
341 rm_trace_export(struct trace_export **list, struct trace_export *export)
342 {
343 	struct trace_export **p;
344 
345 	for (p = list; *p != NULL; p = &(*p)->next)
346 		if (*p == export)
347 			break;
348 
349 	if (*p != export)
350 		return -1;
351 
352 	rcu_assign_pointer(*p, (*p)->next);
353 
354 	return 0;
355 }
356 
357 static inline void
358 add_ftrace_export(struct trace_export **list, struct trace_export *export)
359 {
360 	ftrace_exports_enable(export);
361 
362 	add_trace_export(list, export);
363 }
364 
365 static inline int
366 rm_ftrace_export(struct trace_export **list, struct trace_export *export)
367 {
368 	int ret;
369 
370 	ret = rm_trace_export(list, export);
371 	ftrace_exports_disable(export);
372 
373 	return ret;
374 }
375 
376 int register_ftrace_export(struct trace_export *export)
377 {
378 	if (WARN_ON_ONCE(!export->write))
379 		return -1;
380 
381 	mutex_lock(&ftrace_export_lock);
382 
383 	add_ftrace_export(&ftrace_exports_list, export);
384 
385 	mutex_unlock(&ftrace_export_lock);
386 
387 	return 0;
388 }
389 EXPORT_SYMBOL_GPL(register_ftrace_export);
390 
391 int unregister_ftrace_export(struct trace_export *export)
392 {
393 	int ret;
394 
395 	mutex_lock(&ftrace_export_lock);
396 
397 	ret = rm_ftrace_export(&ftrace_exports_list, export);
398 
399 	mutex_unlock(&ftrace_export_lock);
400 
401 	return ret;
402 }
403 EXPORT_SYMBOL_GPL(unregister_ftrace_export);
404 
405 /* trace_flags holds trace_options default values */
406 #define TRACE_DEFAULT_FLAGS						\
407 	(FUNCTION_DEFAULT_FLAGS |					\
408 	 TRACE_ITER_PRINT_PARENT | TRACE_ITER_PRINTK |			\
409 	 TRACE_ITER_ANNOTATE | TRACE_ITER_CONTEXT_INFO |		\
410 	 TRACE_ITER_RECORD_CMD | TRACE_ITER_OVERWRITE |			\
411 	 TRACE_ITER_IRQ_INFO | TRACE_ITER_MARKERS |			\
412 	 TRACE_ITER_HASH_PTR)
413 
414 /* trace_options that are only supported by global_trace */
415 #define TOP_LEVEL_TRACE_FLAGS (TRACE_ITER_PRINTK |			\
416 	       TRACE_ITER_PRINTK_MSGONLY | TRACE_ITER_RECORD_CMD)
417 
418 /* trace_flags that are default zero for instances */
419 #define ZEROED_TRACE_FLAGS \
420 	(TRACE_ITER_EVENT_FORK | TRACE_ITER_FUNC_FORK)
421 
422 /*
423  * The global_trace is the descriptor that holds the top-level tracing
424  * buffers for the live tracing.
425  */
426 static struct trace_array global_trace = {
427 	.trace_flags = TRACE_DEFAULT_FLAGS,
428 };
429 
430 LIST_HEAD(ftrace_trace_arrays);
431 
432 int trace_array_get(struct trace_array *this_tr)
433 {
434 	struct trace_array *tr;
435 	int ret = -ENODEV;
436 
437 	mutex_lock(&trace_types_lock);
438 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
439 		if (tr == this_tr) {
440 			tr->ref++;
441 			ret = 0;
442 			break;
443 		}
444 	}
445 	mutex_unlock(&trace_types_lock);
446 
447 	return ret;
448 }
449 
450 static void __trace_array_put(struct trace_array *this_tr)
451 {
452 	WARN_ON(!this_tr->ref);
453 	this_tr->ref--;
454 }
455 
456 /**
457  * trace_array_put - Decrement the reference counter for this trace array.
458  * @this_tr : pointer to the trace array
459  *
460  * NOTE: Use this when we no longer need the trace array returned by
461  * trace_array_get_by_name(). This ensures the trace array can be later
462  * destroyed.
463  *
464  */
465 void trace_array_put(struct trace_array *this_tr)
466 {
467 	if (!this_tr)
468 		return;
469 
470 	mutex_lock(&trace_types_lock);
471 	__trace_array_put(this_tr);
472 	mutex_unlock(&trace_types_lock);
473 }
474 EXPORT_SYMBOL_GPL(trace_array_put);
475 
476 int tracing_check_open_get_tr(struct trace_array *tr)
477 {
478 	int ret;
479 
480 	ret = security_locked_down(LOCKDOWN_TRACEFS);
481 	if (ret)
482 		return ret;
483 
484 	if (tracing_disabled)
485 		return -ENODEV;
486 
487 	if (tr && trace_array_get(tr) < 0)
488 		return -ENODEV;
489 
490 	return 0;
491 }
492 
493 int call_filter_check_discard(struct trace_event_call *call, void *rec,
494 			      struct trace_buffer *buffer,
495 			      struct ring_buffer_event *event)
496 {
497 	if (unlikely(call->flags & TRACE_EVENT_FL_FILTERED) &&
498 	    !filter_match_preds(call->filter, rec)) {
499 		__trace_event_discard_commit(buffer, event);
500 		return 1;
501 	}
502 
503 	return 0;
504 }
505 
506 void trace_free_pid_list(struct trace_pid_list *pid_list)
507 {
508 	vfree(pid_list->pids);
509 	kfree(pid_list);
510 }
511 
512 /**
513  * trace_find_filtered_pid - check if a pid exists in a filtered_pid list
514  * @filtered_pids: The list of pids to check
515  * @search_pid: The PID to find in @filtered_pids
516  *
517  * Returns true if @search_pid is found in @filtered_pids, and false otherwise.
518  */
519 bool
520 trace_find_filtered_pid(struct trace_pid_list *filtered_pids, pid_t search_pid)
521 {
522 	/*
523 	 * If pid_max changed after filtered_pids was created, we
524 	 * by default ignore all pids greater than the previous pid_max.
525 	 */
526 	if (search_pid >= filtered_pids->pid_max)
527 		return false;
528 
529 	return test_bit(search_pid, filtered_pids->pids);
530 }
531 
532 /**
533  * trace_ignore_this_task - should a task be ignored for tracing
534  * @filtered_pids: The list of pids to check
535  * @filtered_no_pids: The list of pids not to be traced
536  * @task: The task that should be ignored if not filtered
537  *
538  * Checks if @task should be traced or not from @filtered_pids.
539  * Returns true if @task should *NOT* be traced.
540  * Returns false if @task should be traced.
541  */
542 bool
543 trace_ignore_this_task(struct trace_pid_list *filtered_pids,
544 		       struct trace_pid_list *filtered_no_pids,
545 		       struct task_struct *task)
546 {
547 	/*
548 	 * If filtered_no_pids is not empty, and the task's pid is listed
549 	 * in filtered_no_pids, then return true.
550 	 * Otherwise, if filtered_pids is empty, that means we can
551 	 * trace all tasks. If it has content, then only trace pids
552 	 * within filtered_pids.
553 	 */
554 
555 	return (filtered_pids &&
556 		!trace_find_filtered_pid(filtered_pids, task->pid)) ||
557 		(filtered_no_pids &&
558 		 trace_find_filtered_pid(filtered_no_pids, task->pid));
559 }
560 
561 /**
562  * trace_filter_add_remove_task - Add or remove a task from a pid_list
563  * @pid_list: The list to modify
564  * @self: The current task for fork or NULL for exit
565  * @task: The task to add or remove
566  *
567  * If adding a task, if @self is defined, the task is only added if @self
568  * is also included in @pid_list. This happens on fork and tasks should
569  * only be added when the parent is listed. If @self is NULL, then the
570  * @task pid will be removed from the list, which would happen on exit
571  * of a task.
572  */
573 void trace_filter_add_remove_task(struct trace_pid_list *pid_list,
574 				  struct task_struct *self,
575 				  struct task_struct *task)
576 {
577 	if (!pid_list)
578 		return;
579 
580 	/* For forks, we only add if the forking task is listed */
581 	if (self) {
582 		if (!trace_find_filtered_pid(pid_list, self->pid))
583 			return;
584 	}
585 
586 	/* Sorry, but we don't support pid_max changing after setting */
587 	if (task->pid >= pid_list->pid_max)
588 		return;
589 
590 	/* "self" is set for forks, and NULL for exits */
591 	if (self)
592 		set_bit(task->pid, pid_list->pids);
593 	else
594 		clear_bit(task->pid, pid_list->pids);
595 }
596 
597 /**
598  * trace_pid_next - Used for seq_file to get to the next pid of a pid_list
599  * @pid_list: The pid list to show
600  * @v: The last pid that was shown (+1 the actual pid to let zero be displayed)
601  * @pos: The position of the file
602  *
603  * This is used by the seq_file "next" operation to iterate the pids
604  * listed in a trace_pid_list structure.
605  *
606  * Returns the pid+1 as we want to display pid of zero, but NULL would
607  * stop the iteration.
608  */
609 void *trace_pid_next(struct trace_pid_list *pid_list, void *v, loff_t *pos)
610 {
611 	unsigned long pid = (unsigned long)v;
612 
613 	(*pos)++;
614 
615 	/* pid already is +1 of the actual previous bit */
616 	pid = find_next_bit(pid_list->pids, pid_list->pid_max, pid);
617 
618 	/* Return pid + 1 to allow zero to be represented */
619 	if (pid < pid_list->pid_max)
620 		return (void *)(pid + 1);
621 
622 	return NULL;
623 }
624 
625 /**
626  * trace_pid_start - Used for seq_file to start reading pid lists
627  * @pid_list: The pid list to show
628  * @pos: The position of the file
629  *
630  * This is used by seq_file "start" operation to start the iteration
631  * of listing pids.
632  *
633  * Returns the pid+1 as we want to display pid of zero, but NULL would
634  * stop the iteration.
635  */
636 void *trace_pid_start(struct trace_pid_list *pid_list, loff_t *pos)
637 {
638 	unsigned long pid;
639 	loff_t l = 0;
640 
641 	pid = find_first_bit(pid_list->pids, pid_list->pid_max);
642 	if (pid >= pid_list->pid_max)
643 		return NULL;
644 
645 	/* Return pid + 1 so that zero can be the exit value */
646 	for (pid++; pid && l < *pos;
647 	     pid = (unsigned long)trace_pid_next(pid_list, (void *)pid, &l))
648 		;
649 	return (void *)pid;
650 }
651 
652 /**
653  * trace_pid_show - show the current pid in seq_file processing
654  * @m: The seq_file structure to write into
655  * @v: A void pointer of the pid (+1) value to display
656  *
657  * Can be directly used by seq_file operations to display the current
658  * pid value.
659  */
660 int trace_pid_show(struct seq_file *m, void *v)
661 {
662 	unsigned long pid = (unsigned long)v - 1;
663 
664 	seq_printf(m, "%lu\n", pid);
665 	return 0;
666 }
667 
668 /* 128 should be much more than enough */
669 #define PID_BUF_SIZE		127
670 
671 int trace_pid_write(struct trace_pid_list *filtered_pids,
672 		    struct trace_pid_list **new_pid_list,
673 		    const char __user *ubuf, size_t cnt)
674 {
675 	struct trace_pid_list *pid_list;
676 	struct trace_parser parser;
677 	unsigned long val;
678 	int nr_pids = 0;
679 	ssize_t read = 0;
680 	ssize_t ret = 0;
681 	loff_t pos;
682 	pid_t pid;
683 
684 	if (trace_parser_get_init(&parser, PID_BUF_SIZE + 1))
685 		return -ENOMEM;
686 
687 	/*
688 	 * Always recreate a new array. The write is an all or nothing
689 	 * operation. Always create a new array when adding new pids by
690 	 * the user. If the operation fails, then the current list is
691 	 * not modified.
692 	 */
693 	pid_list = kmalloc(sizeof(*pid_list), GFP_KERNEL);
694 	if (!pid_list) {
695 		trace_parser_put(&parser);
696 		return -ENOMEM;
697 	}
698 
699 	pid_list->pid_max = READ_ONCE(pid_max);
700 
701 	/* Only truncating will shrink pid_max */
702 	if (filtered_pids && filtered_pids->pid_max > pid_list->pid_max)
703 		pid_list->pid_max = filtered_pids->pid_max;
704 
705 	pid_list->pids = vzalloc((pid_list->pid_max + 7) >> 3);
706 	if (!pid_list->pids) {
707 		trace_parser_put(&parser);
708 		kfree(pid_list);
709 		return -ENOMEM;
710 	}
711 
712 	if (filtered_pids) {
713 		/* copy the current bits to the new max */
714 		for_each_set_bit(pid, filtered_pids->pids,
715 				 filtered_pids->pid_max) {
716 			set_bit(pid, pid_list->pids);
717 			nr_pids++;
718 		}
719 	}
720 
721 	while (cnt > 0) {
722 
723 		pos = 0;
724 
725 		ret = trace_get_user(&parser, ubuf, cnt, &pos);
726 		if (ret < 0 || !trace_parser_loaded(&parser))
727 			break;
728 
729 		read += ret;
730 		ubuf += ret;
731 		cnt -= ret;
732 
733 		ret = -EINVAL;
734 		if (kstrtoul(parser.buffer, 0, &val))
735 			break;
736 		if (val >= pid_list->pid_max)
737 			break;
738 
739 		pid = (pid_t)val;
740 
741 		set_bit(pid, pid_list->pids);
742 		nr_pids++;
743 
744 		trace_parser_clear(&parser);
745 		ret = 0;
746 	}
747 	trace_parser_put(&parser);
748 
749 	if (ret < 0) {
750 		trace_free_pid_list(pid_list);
751 		return ret;
752 	}
753 
754 	if (!nr_pids) {
755 		/* Cleared the list of pids */
756 		trace_free_pid_list(pid_list);
757 		read = ret;
758 		pid_list = NULL;
759 	}
760 
761 	*new_pid_list = pid_list;
762 
763 	return read;
764 }
765 
766 static u64 buffer_ftrace_now(struct array_buffer *buf, int cpu)
767 {
768 	u64 ts;
769 
770 	/* Early boot up does not have a buffer yet */
771 	if (!buf->buffer)
772 		return trace_clock_local();
773 
774 	ts = ring_buffer_time_stamp(buf->buffer);
775 	ring_buffer_normalize_time_stamp(buf->buffer, cpu, &ts);
776 
777 	return ts;
778 }
779 
780 u64 ftrace_now(int cpu)
781 {
782 	return buffer_ftrace_now(&global_trace.array_buffer, cpu);
783 }
784 
785 /**
786  * tracing_is_enabled - Show if global_trace has been enabled
787  *
788  * Shows if the global trace has been enabled or not. It uses the
789  * mirror flag "buffer_disabled" to be used in fast paths such as for
790  * the irqsoff tracer. But it may be inaccurate due to races. If you
791  * need to know the accurate state, use tracing_is_on() which is a little
792  * slower, but accurate.
793  */
794 int tracing_is_enabled(void)
795 {
796 	/*
797 	 * For quick access (irqsoff uses this in fast path), just
798 	 * return the mirror variable of the state of the ring buffer.
799 	 * It's a little racy, but we don't really care.
800 	 */
801 	smp_rmb();
802 	return !global_trace.buffer_disabled;
803 }
804 
805 /*
806  * trace_buf_size is the size in bytes that is allocated
807  * for a buffer. Note, the number of bytes is always rounded
808  * to page size.
809  *
810  * This number is purposely set to a low number of 16384.
811  * If the dump on oops happens, it will be much appreciated
812  * to not have to wait for all that output. Anyway this can be
813  * boot time and run time configurable.
814  */
815 #define TRACE_BUF_SIZE_DEFAULT	1441792UL /* 16384 * 88 (sizeof(entry)) */
816 
817 static unsigned long		trace_buf_size = TRACE_BUF_SIZE_DEFAULT;
818 
819 /* trace_types holds a link list of available tracers. */
820 static struct tracer		*trace_types __read_mostly;
821 
822 /*
823  * trace_types_lock is used to protect the trace_types list.
824  */
825 DEFINE_MUTEX(trace_types_lock);
826 
827 /*
828  * serialize the access of the ring buffer
829  *
830  * ring buffer serializes readers, but it is low level protection.
831  * The validity of the events (which returns by ring_buffer_peek() ..etc)
832  * are not protected by ring buffer.
833  *
834  * The content of events may become garbage if we allow other process consumes
835  * these events concurrently:
836  *   A) the page of the consumed events may become a normal page
837  *      (not reader page) in ring buffer, and this page will be rewritten
838  *      by events producer.
839  *   B) The page of the consumed events may become a page for splice_read,
840  *      and this page will be returned to system.
841  *
842  * These primitives allow multi process access to different cpu ring buffer
843  * concurrently.
844  *
845  * These primitives don't distinguish read-only and read-consume access.
846  * Multi read-only access are also serialized.
847  */
848 
849 #ifdef CONFIG_SMP
850 static DECLARE_RWSEM(all_cpu_access_lock);
851 static DEFINE_PER_CPU(struct mutex, cpu_access_lock);
852 
853 static inline void trace_access_lock(int cpu)
854 {
855 	if (cpu == RING_BUFFER_ALL_CPUS) {
856 		/* gain it for accessing the whole ring buffer. */
857 		down_write(&all_cpu_access_lock);
858 	} else {
859 		/* gain it for accessing a cpu ring buffer. */
860 
861 		/* Firstly block other trace_access_lock(RING_BUFFER_ALL_CPUS). */
862 		down_read(&all_cpu_access_lock);
863 
864 		/* Secondly block other access to this @cpu ring buffer. */
865 		mutex_lock(&per_cpu(cpu_access_lock, cpu));
866 	}
867 }
868 
869 static inline void trace_access_unlock(int cpu)
870 {
871 	if (cpu == RING_BUFFER_ALL_CPUS) {
872 		up_write(&all_cpu_access_lock);
873 	} else {
874 		mutex_unlock(&per_cpu(cpu_access_lock, cpu));
875 		up_read(&all_cpu_access_lock);
876 	}
877 }
878 
879 static inline void trace_access_lock_init(void)
880 {
881 	int cpu;
882 
883 	for_each_possible_cpu(cpu)
884 		mutex_init(&per_cpu(cpu_access_lock, cpu));
885 }
886 
887 #else
888 
889 static DEFINE_MUTEX(access_lock);
890 
891 static inline void trace_access_lock(int cpu)
892 {
893 	(void)cpu;
894 	mutex_lock(&access_lock);
895 }
896 
897 static inline void trace_access_unlock(int cpu)
898 {
899 	(void)cpu;
900 	mutex_unlock(&access_lock);
901 }
902 
903 static inline void trace_access_lock_init(void)
904 {
905 }
906 
907 #endif
908 
909 #ifdef CONFIG_STACKTRACE
910 static void __ftrace_trace_stack(struct trace_buffer *buffer,
911 				 unsigned int trace_ctx,
912 				 int skip, struct pt_regs *regs);
913 static inline void ftrace_trace_stack(struct trace_array *tr,
914 				      struct trace_buffer *buffer,
915 				      unsigned int trace_ctx,
916 				      int skip, struct pt_regs *regs);
917 
918 #else
919 static inline void __ftrace_trace_stack(struct trace_buffer *buffer,
920 					unsigned int trace_ctx,
921 					int skip, struct pt_regs *regs)
922 {
923 }
924 static inline void ftrace_trace_stack(struct trace_array *tr,
925 				      struct trace_buffer *buffer,
926 				      unsigned long trace_ctx,
927 				      int skip, struct pt_regs *regs)
928 {
929 }
930 
931 #endif
932 
933 static __always_inline void
934 trace_event_setup(struct ring_buffer_event *event,
935 		  int type, unsigned int trace_ctx)
936 {
937 	struct trace_entry *ent = ring_buffer_event_data(event);
938 
939 	tracing_generic_entry_update(ent, type, trace_ctx);
940 }
941 
942 static __always_inline struct ring_buffer_event *
943 __trace_buffer_lock_reserve(struct trace_buffer *buffer,
944 			  int type,
945 			  unsigned long len,
946 			  unsigned int trace_ctx)
947 {
948 	struct ring_buffer_event *event;
949 
950 	event = ring_buffer_lock_reserve(buffer, len);
951 	if (event != NULL)
952 		trace_event_setup(event, type, trace_ctx);
953 
954 	return event;
955 }
956 
957 void tracer_tracing_on(struct trace_array *tr)
958 {
959 	if (tr->array_buffer.buffer)
960 		ring_buffer_record_on(tr->array_buffer.buffer);
961 	/*
962 	 * This flag is looked at when buffers haven't been allocated
963 	 * yet, or by some tracers (like irqsoff), that just want to
964 	 * know if the ring buffer has been disabled, but it can handle
965 	 * races of where it gets disabled but we still do a record.
966 	 * As the check is in the fast path of the tracers, it is more
967 	 * important to be fast than accurate.
968 	 */
969 	tr->buffer_disabled = 0;
970 	/* Make the flag seen by readers */
971 	smp_wmb();
972 }
973 
974 /**
975  * tracing_on - enable tracing buffers
976  *
977  * This function enables tracing buffers that may have been
978  * disabled with tracing_off.
979  */
980 void tracing_on(void)
981 {
982 	tracer_tracing_on(&global_trace);
983 }
984 EXPORT_SYMBOL_GPL(tracing_on);
985 
986 
987 static __always_inline void
988 __buffer_unlock_commit(struct trace_buffer *buffer, struct ring_buffer_event *event)
989 {
990 	__this_cpu_write(trace_taskinfo_save, true);
991 
992 	/* If this is the temp buffer, we need to commit fully */
993 	if (this_cpu_read(trace_buffered_event) == event) {
994 		/* Length is in event->array[0] */
995 		ring_buffer_write(buffer, event->array[0], &event->array[1]);
996 		/* Release the temp buffer */
997 		this_cpu_dec(trace_buffered_event_cnt);
998 	} else
999 		ring_buffer_unlock_commit(buffer, event);
1000 }
1001 
1002 /**
1003  * __trace_puts - write a constant string into the trace buffer.
1004  * @ip:	   The address of the caller
1005  * @str:   The constant string to write
1006  * @size:  The size of the string.
1007  */
1008 int __trace_puts(unsigned long ip, const char *str, int size)
1009 {
1010 	struct ring_buffer_event *event;
1011 	struct trace_buffer *buffer;
1012 	struct print_entry *entry;
1013 	unsigned int trace_ctx;
1014 	int alloc;
1015 
1016 	if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
1017 		return 0;
1018 
1019 	if (unlikely(tracing_selftest_running || tracing_disabled))
1020 		return 0;
1021 
1022 	alloc = sizeof(*entry) + size + 2; /* possible \n added */
1023 
1024 	trace_ctx = tracing_gen_ctx();
1025 	buffer = global_trace.array_buffer.buffer;
1026 	ring_buffer_nest_start(buffer);
1027 	event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, alloc,
1028 					    trace_ctx);
1029 	if (!event) {
1030 		size = 0;
1031 		goto out;
1032 	}
1033 
1034 	entry = ring_buffer_event_data(event);
1035 	entry->ip = ip;
1036 
1037 	memcpy(&entry->buf, str, size);
1038 
1039 	/* Add a newline if necessary */
1040 	if (entry->buf[size - 1] != '\n') {
1041 		entry->buf[size] = '\n';
1042 		entry->buf[size + 1] = '\0';
1043 	} else
1044 		entry->buf[size] = '\0';
1045 
1046 	__buffer_unlock_commit(buffer, event);
1047 	ftrace_trace_stack(&global_trace, buffer, trace_ctx, 4, NULL);
1048  out:
1049 	ring_buffer_nest_end(buffer);
1050 	return size;
1051 }
1052 EXPORT_SYMBOL_GPL(__trace_puts);
1053 
1054 /**
1055  * __trace_bputs - write the pointer to a constant string into trace buffer
1056  * @ip:	   The address of the caller
1057  * @str:   The constant string to write to the buffer to
1058  */
1059 int __trace_bputs(unsigned long ip, const char *str)
1060 {
1061 	struct ring_buffer_event *event;
1062 	struct trace_buffer *buffer;
1063 	struct bputs_entry *entry;
1064 	unsigned int trace_ctx;
1065 	int size = sizeof(struct bputs_entry);
1066 	int ret = 0;
1067 
1068 	if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
1069 		return 0;
1070 
1071 	if (unlikely(tracing_selftest_running || tracing_disabled))
1072 		return 0;
1073 
1074 	trace_ctx = tracing_gen_ctx();
1075 	buffer = global_trace.array_buffer.buffer;
1076 
1077 	ring_buffer_nest_start(buffer);
1078 	event = __trace_buffer_lock_reserve(buffer, TRACE_BPUTS, size,
1079 					    trace_ctx);
1080 	if (!event)
1081 		goto out;
1082 
1083 	entry = ring_buffer_event_data(event);
1084 	entry->ip			= ip;
1085 	entry->str			= str;
1086 
1087 	__buffer_unlock_commit(buffer, event);
1088 	ftrace_trace_stack(&global_trace, buffer, trace_ctx, 4, NULL);
1089 
1090 	ret = 1;
1091  out:
1092 	ring_buffer_nest_end(buffer);
1093 	return ret;
1094 }
1095 EXPORT_SYMBOL_GPL(__trace_bputs);
1096 
1097 #ifdef CONFIG_TRACER_SNAPSHOT
1098 static void tracing_snapshot_instance_cond(struct trace_array *tr,
1099 					   void *cond_data)
1100 {
1101 	struct tracer *tracer = tr->current_trace;
1102 	unsigned long flags;
1103 
1104 	if (in_nmi()) {
1105 		internal_trace_puts("*** SNAPSHOT CALLED FROM NMI CONTEXT ***\n");
1106 		internal_trace_puts("*** snapshot is being ignored        ***\n");
1107 		return;
1108 	}
1109 
1110 	if (!tr->allocated_snapshot) {
1111 		internal_trace_puts("*** SNAPSHOT NOT ALLOCATED ***\n");
1112 		internal_trace_puts("*** stopping trace here!   ***\n");
1113 		tracing_off();
1114 		return;
1115 	}
1116 
1117 	/* Note, snapshot can not be used when the tracer uses it */
1118 	if (tracer->use_max_tr) {
1119 		internal_trace_puts("*** LATENCY TRACER ACTIVE ***\n");
1120 		internal_trace_puts("*** Can not use snapshot (sorry) ***\n");
1121 		return;
1122 	}
1123 
1124 	local_irq_save(flags);
1125 	update_max_tr(tr, current, smp_processor_id(), cond_data);
1126 	local_irq_restore(flags);
1127 }
1128 
1129 void tracing_snapshot_instance(struct trace_array *tr)
1130 {
1131 	tracing_snapshot_instance_cond(tr, NULL);
1132 }
1133 
1134 /**
1135  * tracing_snapshot - take a snapshot of the current buffer.
1136  *
1137  * This causes a swap between the snapshot buffer and the current live
1138  * tracing buffer. You can use this to take snapshots of the live
1139  * trace when some condition is triggered, but continue to trace.
1140  *
1141  * Note, make sure to allocate the snapshot with either
1142  * a tracing_snapshot_alloc(), or by doing it manually
1143  * with: echo 1 > /sys/kernel/debug/tracing/snapshot
1144  *
1145  * If the snapshot buffer is not allocated, it will stop tracing.
1146  * Basically making a permanent snapshot.
1147  */
1148 void tracing_snapshot(void)
1149 {
1150 	struct trace_array *tr = &global_trace;
1151 
1152 	tracing_snapshot_instance(tr);
1153 }
1154 EXPORT_SYMBOL_GPL(tracing_snapshot);
1155 
1156 /**
1157  * tracing_snapshot_cond - conditionally take a snapshot of the current buffer.
1158  * @tr:		The tracing instance to snapshot
1159  * @cond_data:	The data to be tested conditionally, and possibly saved
1160  *
1161  * This is the same as tracing_snapshot() except that the snapshot is
1162  * conditional - the snapshot will only happen if the
1163  * cond_snapshot.update() implementation receiving the cond_data
1164  * returns true, which means that the trace array's cond_snapshot
1165  * update() operation used the cond_data to determine whether the
1166  * snapshot should be taken, and if it was, presumably saved it along
1167  * with the snapshot.
1168  */
1169 void tracing_snapshot_cond(struct trace_array *tr, void *cond_data)
1170 {
1171 	tracing_snapshot_instance_cond(tr, cond_data);
1172 }
1173 EXPORT_SYMBOL_GPL(tracing_snapshot_cond);
1174 
1175 /**
1176  * tracing_snapshot_cond_data - get the user data associated with a snapshot
1177  * @tr:		The tracing instance
1178  *
1179  * When the user enables a conditional snapshot using
1180  * tracing_snapshot_cond_enable(), the user-defined cond_data is saved
1181  * with the snapshot.  This accessor is used to retrieve it.
1182  *
1183  * Should not be called from cond_snapshot.update(), since it takes
1184  * the tr->max_lock lock, which the code calling
1185  * cond_snapshot.update() has already done.
1186  *
1187  * Returns the cond_data associated with the trace array's snapshot.
1188  */
1189 void *tracing_cond_snapshot_data(struct trace_array *tr)
1190 {
1191 	void *cond_data = NULL;
1192 
1193 	arch_spin_lock(&tr->max_lock);
1194 
1195 	if (tr->cond_snapshot)
1196 		cond_data = tr->cond_snapshot->cond_data;
1197 
1198 	arch_spin_unlock(&tr->max_lock);
1199 
1200 	return cond_data;
1201 }
1202 EXPORT_SYMBOL_GPL(tracing_cond_snapshot_data);
1203 
1204 static int resize_buffer_duplicate_size(struct array_buffer *trace_buf,
1205 					struct array_buffer *size_buf, int cpu_id);
1206 static void set_buffer_entries(struct array_buffer *buf, unsigned long val);
1207 
1208 int tracing_alloc_snapshot_instance(struct trace_array *tr)
1209 {
1210 	int ret;
1211 
1212 	if (!tr->allocated_snapshot) {
1213 
1214 		/* allocate spare buffer */
1215 		ret = resize_buffer_duplicate_size(&tr->max_buffer,
1216 				   &tr->array_buffer, RING_BUFFER_ALL_CPUS);
1217 		if (ret < 0)
1218 			return ret;
1219 
1220 		tr->allocated_snapshot = true;
1221 	}
1222 
1223 	return 0;
1224 }
1225 
1226 static void free_snapshot(struct trace_array *tr)
1227 {
1228 	/*
1229 	 * We don't free the ring buffer. instead, resize it because
1230 	 * The max_tr ring buffer has some state (e.g. ring->clock) and
1231 	 * we want preserve it.
1232 	 */
1233 	ring_buffer_resize(tr->max_buffer.buffer, 1, RING_BUFFER_ALL_CPUS);
1234 	set_buffer_entries(&tr->max_buffer, 1);
1235 	tracing_reset_online_cpus(&tr->max_buffer);
1236 	tr->allocated_snapshot = false;
1237 }
1238 
1239 /**
1240  * tracing_alloc_snapshot - allocate snapshot buffer.
1241  *
1242  * This only allocates the snapshot buffer if it isn't already
1243  * allocated - it doesn't also take a snapshot.
1244  *
1245  * This is meant to be used in cases where the snapshot buffer needs
1246  * to be set up for events that can't sleep but need to be able to
1247  * trigger a snapshot.
1248  */
1249 int tracing_alloc_snapshot(void)
1250 {
1251 	struct trace_array *tr = &global_trace;
1252 	int ret;
1253 
1254 	ret = tracing_alloc_snapshot_instance(tr);
1255 	WARN_ON(ret < 0);
1256 
1257 	return ret;
1258 }
1259 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
1260 
1261 /**
1262  * tracing_snapshot_alloc - allocate and take a snapshot of the current buffer.
1263  *
1264  * This is similar to tracing_snapshot(), but it will allocate the
1265  * snapshot buffer if it isn't already allocated. Use this only
1266  * where it is safe to sleep, as the allocation may sleep.
1267  *
1268  * This causes a swap between the snapshot buffer and the current live
1269  * tracing buffer. You can use this to take snapshots of the live
1270  * trace when some condition is triggered, but continue to trace.
1271  */
1272 void tracing_snapshot_alloc(void)
1273 {
1274 	int ret;
1275 
1276 	ret = tracing_alloc_snapshot();
1277 	if (ret < 0)
1278 		return;
1279 
1280 	tracing_snapshot();
1281 }
1282 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
1283 
1284 /**
1285  * tracing_snapshot_cond_enable - enable conditional snapshot for an instance
1286  * @tr:		The tracing instance
1287  * @cond_data:	User data to associate with the snapshot
1288  * @update:	Implementation of the cond_snapshot update function
1289  *
1290  * Check whether the conditional snapshot for the given instance has
1291  * already been enabled, or if the current tracer is already using a
1292  * snapshot; if so, return -EBUSY, else create a cond_snapshot and
1293  * save the cond_data and update function inside.
1294  *
1295  * Returns 0 if successful, error otherwise.
1296  */
1297 int tracing_snapshot_cond_enable(struct trace_array *tr, void *cond_data,
1298 				 cond_update_fn_t update)
1299 {
1300 	struct cond_snapshot *cond_snapshot;
1301 	int ret = 0;
1302 
1303 	cond_snapshot = kzalloc(sizeof(*cond_snapshot), GFP_KERNEL);
1304 	if (!cond_snapshot)
1305 		return -ENOMEM;
1306 
1307 	cond_snapshot->cond_data = cond_data;
1308 	cond_snapshot->update = update;
1309 
1310 	mutex_lock(&trace_types_lock);
1311 
1312 	ret = tracing_alloc_snapshot_instance(tr);
1313 	if (ret)
1314 		goto fail_unlock;
1315 
1316 	if (tr->current_trace->use_max_tr) {
1317 		ret = -EBUSY;
1318 		goto fail_unlock;
1319 	}
1320 
1321 	/*
1322 	 * The cond_snapshot can only change to NULL without the
1323 	 * trace_types_lock. We don't care if we race with it going
1324 	 * to NULL, but we want to make sure that it's not set to
1325 	 * something other than NULL when we get here, which we can
1326 	 * do safely with only holding the trace_types_lock and not
1327 	 * having to take the max_lock.
1328 	 */
1329 	if (tr->cond_snapshot) {
1330 		ret = -EBUSY;
1331 		goto fail_unlock;
1332 	}
1333 
1334 	arch_spin_lock(&tr->max_lock);
1335 	tr->cond_snapshot = cond_snapshot;
1336 	arch_spin_unlock(&tr->max_lock);
1337 
1338 	mutex_unlock(&trace_types_lock);
1339 
1340 	return ret;
1341 
1342  fail_unlock:
1343 	mutex_unlock(&trace_types_lock);
1344 	kfree(cond_snapshot);
1345 	return ret;
1346 }
1347 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_enable);
1348 
1349 /**
1350  * tracing_snapshot_cond_disable - disable conditional snapshot for an instance
1351  * @tr:		The tracing instance
1352  *
1353  * Check whether the conditional snapshot for the given instance is
1354  * enabled; if so, free the cond_snapshot associated with it,
1355  * otherwise return -EINVAL.
1356  *
1357  * Returns 0 if successful, error otherwise.
1358  */
1359 int tracing_snapshot_cond_disable(struct trace_array *tr)
1360 {
1361 	int ret = 0;
1362 
1363 	arch_spin_lock(&tr->max_lock);
1364 
1365 	if (!tr->cond_snapshot)
1366 		ret = -EINVAL;
1367 	else {
1368 		kfree(tr->cond_snapshot);
1369 		tr->cond_snapshot = NULL;
1370 	}
1371 
1372 	arch_spin_unlock(&tr->max_lock);
1373 
1374 	return ret;
1375 }
1376 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_disable);
1377 #else
1378 void tracing_snapshot(void)
1379 {
1380 	WARN_ONCE(1, "Snapshot feature not enabled, but internal snapshot used");
1381 }
1382 EXPORT_SYMBOL_GPL(tracing_snapshot);
1383 void tracing_snapshot_cond(struct trace_array *tr, void *cond_data)
1384 {
1385 	WARN_ONCE(1, "Snapshot feature not enabled, but internal conditional snapshot used");
1386 }
1387 EXPORT_SYMBOL_GPL(tracing_snapshot_cond);
1388 int tracing_alloc_snapshot(void)
1389 {
1390 	WARN_ONCE(1, "Snapshot feature not enabled, but snapshot allocation used");
1391 	return -ENODEV;
1392 }
1393 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
1394 void tracing_snapshot_alloc(void)
1395 {
1396 	/* Give warning */
1397 	tracing_snapshot();
1398 }
1399 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
1400 void *tracing_cond_snapshot_data(struct trace_array *tr)
1401 {
1402 	return NULL;
1403 }
1404 EXPORT_SYMBOL_GPL(tracing_cond_snapshot_data);
1405 int tracing_snapshot_cond_enable(struct trace_array *tr, void *cond_data, cond_update_fn_t update)
1406 {
1407 	return -ENODEV;
1408 }
1409 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_enable);
1410 int tracing_snapshot_cond_disable(struct trace_array *tr)
1411 {
1412 	return false;
1413 }
1414 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_disable);
1415 #endif /* CONFIG_TRACER_SNAPSHOT */
1416 
1417 void tracer_tracing_off(struct trace_array *tr)
1418 {
1419 	if (tr->array_buffer.buffer)
1420 		ring_buffer_record_off(tr->array_buffer.buffer);
1421 	/*
1422 	 * This flag is looked at when buffers haven't been allocated
1423 	 * yet, or by some tracers (like irqsoff), that just want to
1424 	 * know if the ring buffer has been disabled, but it can handle
1425 	 * races of where it gets disabled but we still do a record.
1426 	 * As the check is in the fast path of the tracers, it is more
1427 	 * important to be fast than accurate.
1428 	 */
1429 	tr->buffer_disabled = 1;
1430 	/* Make the flag seen by readers */
1431 	smp_wmb();
1432 }
1433 
1434 /**
1435  * tracing_off - turn off tracing buffers
1436  *
1437  * This function stops the tracing buffers from recording data.
1438  * It does not disable any overhead the tracers themselves may
1439  * be causing. This function simply causes all recording to
1440  * the ring buffers to fail.
1441  */
1442 void tracing_off(void)
1443 {
1444 	tracer_tracing_off(&global_trace);
1445 }
1446 EXPORT_SYMBOL_GPL(tracing_off);
1447 
1448 void disable_trace_on_warning(void)
1449 {
1450 	if (__disable_trace_on_warning) {
1451 		trace_array_printk_buf(global_trace.array_buffer.buffer, _THIS_IP_,
1452 			"Disabling tracing due to warning\n");
1453 		tracing_off();
1454 	}
1455 }
1456 
1457 /**
1458  * tracer_tracing_is_on - show real state of ring buffer enabled
1459  * @tr : the trace array to know if ring buffer is enabled
1460  *
1461  * Shows real state of the ring buffer if it is enabled or not.
1462  */
1463 bool tracer_tracing_is_on(struct trace_array *tr)
1464 {
1465 	if (tr->array_buffer.buffer)
1466 		return ring_buffer_record_is_on(tr->array_buffer.buffer);
1467 	return !tr->buffer_disabled;
1468 }
1469 
1470 /**
1471  * tracing_is_on - show state of ring buffers enabled
1472  */
1473 int tracing_is_on(void)
1474 {
1475 	return tracer_tracing_is_on(&global_trace);
1476 }
1477 EXPORT_SYMBOL_GPL(tracing_is_on);
1478 
1479 static int __init set_buf_size(char *str)
1480 {
1481 	unsigned long buf_size;
1482 
1483 	if (!str)
1484 		return 0;
1485 	buf_size = memparse(str, &str);
1486 	/* nr_entries can not be zero */
1487 	if (buf_size == 0)
1488 		return 0;
1489 	trace_buf_size = buf_size;
1490 	return 1;
1491 }
1492 __setup("trace_buf_size=", set_buf_size);
1493 
1494 static int __init set_tracing_thresh(char *str)
1495 {
1496 	unsigned long threshold;
1497 	int ret;
1498 
1499 	if (!str)
1500 		return 0;
1501 	ret = kstrtoul(str, 0, &threshold);
1502 	if (ret < 0)
1503 		return 0;
1504 	tracing_thresh = threshold * 1000;
1505 	return 1;
1506 }
1507 __setup("tracing_thresh=", set_tracing_thresh);
1508 
1509 unsigned long nsecs_to_usecs(unsigned long nsecs)
1510 {
1511 	return nsecs / 1000;
1512 }
1513 
1514 /*
1515  * TRACE_FLAGS is defined as a tuple matching bit masks with strings.
1516  * It uses C(a, b) where 'a' is the eval (enum) name and 'b' is the string that
1517  * matches it. By defining "C(a, b) b", TRACE_FLAGS becomes a list
1518  * of strings in the order that the evals (enum) were defined.
1519  */
1520 #undef C
1521 #define C(a, b) b
1522 
1523 /* These must match the bit positions in trace_iterator_flags */
1524 static const char *trace_options[] = {
1525 	TRACE_FLAGS
1526 	NULL
1527 };
1528 
1529 static struct {
1530 	u64 (*func)(void);
1531 	const char *name;
1532 	int in_ns;		/* is this clock in nanoseconds? */
1533 } trace_clocks[] = {
1534 	{ trace_clock_local,		"local",	1 },
1535 	{ trace_clock_global,		"global",	1 },
1536 	{ trace_clock_counter,		"counter",	0 },
1537 	{ trace_clock_jiffies,		"uptime",	0 },
1538 	{ trace_clock,			"perf",		1 },
1539 	{ ktime_get_mono_fast_ns,	"mono",		1 },
1540 	{ ktime_get_raw_fast_ns,	"mono_raw",	1 },
1541 	{ ktime_get_boot_fast_ns,	"boot",		1 },
1542 	ARCH_TRACE_CLOCKS
1543 };
1544 
1545 bool trace_clock_in_ns(struct trace_array *tr)
1546 {
1547 	if (trace_clocks[tr->clock_id].in_ns)
1548 		return true;
1549 
1550 	return false;
1551 }
1552 
1553 /*
1554  * trace_parser_get_init - gets the buffer for trace parser
1555  */
1556 int trace_parser_get_init(struct trace_parser *parser, int size)
1557 {
1558 	memset(parser, 0, sizeof(*parser));
1559 
1560 	parser->buffer = kmalloc(size, GFP_KERNEL);
1561 	if (!parser->buffer)
1562 		return 1;
1563 
1564 	parser->size = size;
1565 	return 0;
1566 }
1567 
1568 /*
1569  * trace_parser_put - frees the buffer for trace parser
1570  */
1571 void trace_parser_put(struct trace_parser *parser)
1572 {
1573 	kfree(parser->buffer);
1574 	parser->buffer = NULL;
1575 }
1576 
1577 /*
1578  * trace_get_user - reads the user input string separated by  space
1579  * (matched by isspace(ch))
1580  *
1581  * For each string found the 'struct trace_parser' is updated,
1582  * and the function returns.
1583  *
1584  * Returns number of bytes read.
1585  *
1586  * See kernel/trace/trace.h for 'struct trace_parser' details.
1587  */
1588 int trace_get_user(struct trace_parser *parser, const char __user *ubuf,
1589 	size_t cnt, loff_t *ppos)
1590 {
1591 	char ch;
1592 	size_t read = 0;
1593 	ssize_t ret;
1594 
1595 	if (!*ppos)
1596 		trace_parser_clear(parser);
1597 
1598 	ret = get_user(ch, ubuf++);
1599 	if (ret)
1600 		goto out;
1601 
1602 	read++;
1603 	cnt--;
1604 
1605 	/*
1606 	 * The parser is not finished with the last write,
1607 	 * continue reading the user input without skipping spaces.
1608 	 */
1609 	if (!parser->cont) {
1610 		/* skip white space */
1611 		while (cnt && isspace(ch)) {
1612 			ret = get_user(ch, ubuf++);
1613 			if (ret)
1614 				goto out;
1615 			read++;
1616 			cnt--;
1617 		}
1618 
1619 		parser->idx = 0;
1620 
1621 		/* only spaces were written */
1622 		if (isspace(ch) || !ch) {
1623 			*ppos += read;
1624 			ret = read;
1625 			goto out;
1626 		}
1627 	}
1628 
1629 	/* read the non-space input */
1630 	while (cnt && !isspace(ch) && ch) {
1631 		if (parser->idx < parser->size - 1)
1632 			parser->buffer[parser->idx++] = ch;
1633 		else {
1634 			ret = -EINVAL;
1635 			goto out;
1636 		}
1637 		ret = get_user(ch, ubuf++);
1638 		if (ret)
1639 			goto out;
1640 		read++;
1641 		cnt--;
1642 	}
1643 
1644 	/* We either got finished input or we have to wait for another call. */
1645 	if (isspace(ch) || !ch) {
1646 		parser->buffer[parser->idx] = 0;
1647 		parser->cont = false;
1648 	} else if (parser->idx < parser->size - 1) {
1649 		parser->cont = true;
1650 		parser->buffer[parser->idx++] = ch;
1651 		/* Make sure the parsed string always terminates with '\0'. */
1652 		parser->buffer[parser->idx] = 0;
1653 	} else {
1654 		ret = -EINVAL;
1655 		goto out;
1656 	}
1657 
1658 	*ppos += read;
1659 	ret = read;
1660 
1661 out:
1662 	return ret;
1663 }
1664 
1665 /* TODO add a seq_buf_to_buffer() */
1666 static ssize_t trace_seq_to_buffer(struct trace_seq *s, void *buf, size_t cnt)
1667 {
1668 	int len;
1669 
1670 	if (trace_seq_used(s) <= s->seq.readpos)
1671 		return -EBUSY;
1672 
1673 	len = trace_seq_used(s) - s->seq.readpos;
1674 	if (cnt > len)
1675 		cnt = len;
1676 	memcpy(buf, s->buffer + s->seq.readpos, cnt);
1677 
1678 	s->seq.readpos += cnt;
1679 	return cnt;
1680 }
1681 
1682 unsigned long __read_mostly	tracing_thresh;
1683 static const struct file_operations tracing_max_lat_fops;
1684 
1685 #if (defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)) && \
1686 	defined(CONFIG_FSNOTIFY)
1687 
1688 static struct workqueue_struct *fsnotify_wq;
1689 
1690 static void latency_fsnotify_workfn(struct work_struct *work)
1691 {
1692 	struct trace_array *tr = container_of(work, struct trace_array,
1693 					      fsnotify_work);
1694 	fsnotify_inode(tr->d_max_latency->d_inode, FS_MODIFY);
1695 }
1696 
1697 static void latency_fsnotify_workfn_irq(struct irq_work *iwork)
1698 {
1699 	struct trace_array *tr = container_of(iwork, struct trace_array,
1700 					      fsnotify_irqwork);
1701 	queue_work(fsnotify_wq, &tr->fsnotify_work);
1702 }
1703 
1704 static void trace_create_maxlat_file(struct trace_array *tr,
1705 				     struct dentry *d_tracer)
1706 {
1707 	INIT_WORK(&tr->fsnotify_work, latency_fsnotify_workfn);
1708 	init_irq_work(&tr->fsnotify_irqwork, latency_fsnotify_workfn_irq);
1709 	tr->d_max_latency = trace_create_file("tracing_max_latency", 0644,
1710 					      d_tracer, &tr->max_latency,
1711 					      &tracing_max_lat_fops);
1712 }
1713 
1714 __init static int latency_fsnotify_init(void)
1715 {
1716 	fsnotify_wq = alloc_workqueue("tr_max_lat_wq",
1717 				      WQ_UNBOUND | WQ_HIGHPRI, 0);
1718 	if (!fsnotify_wq) {
1719 		pr_err("Unable to allocate tr_max_lat_wq\n");
1720 		return -ENOMEM;
1721 	}
1722 	return 0;
1723 }
1724 
1725 late_initcall_sync(latency_fsnotify_init);
1726 
1727 void latency_fsnotify(struct trace_array *tr)
1728 {
1729 	if (!fsnotify_wq)
1730 		return;
1731 	/*
1732 	 * We cannot call queue_work(&tr->fsnotify_work) from here because it's
1733 	 * possible that we are called from __schedule() or do_idle(), which
1734 	 * could cause a deadlock.
1735 	 */
1736 	irq_work_queue(&tr->fsnotify_irqwork);
1737 }
1738 
1739 /*
1740  * (defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)) && \
1741  *  defined(CONFIG_FSNOTIFY)
1742  */
1743 #else
1744 
1745 #define trace_create_maxlat_file(tr, d_tracer)				\
1746 	trace_create_file("tracing_max_latency", 0644, d_tracer,	\
1747 			  &tr->max_latency, &tracing_max_lat_fops)
1748 
1749 #endif
1750 
1751 #ifdef CONFIG_TRACER_MAX_TRACE
1752 /*
1753  * Copy the new maximum trace into the separate maximum-trace
1754  * structure. (this way the maximum trace is permanently saved,
1755  * for later retrieval via /sys/kernel/tracing/tracing_max_latency)
1756  */
1757 static void
1758 __update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
1759 {
1760 	struct array_buffer *trace_buf = &tr->array_buffer;
1761 	struct array_buffer *max_buf = &tr->max_buffer;
1762 	struct trace_array_cpu *data = per_cpu_ptr(trace_buf->data, cpu);
1763 	struct trace_array_cpu *max_data = per_cpu_ptr(max_buf->data, cpu);
1764 
1765 	max_buf->cpu = cpu;
1766 	max_buf->time_start = data->preempt_timestamp;
1767 
1768 	max_data->saved_latency = tr->max_latency;
1769 	max_data->critical_start = data->critical_start;
1770 	max_data->critical_end = data->critical_end;
1771 
1772 	strncpy(max_data->comm, tsk->comm, TASK_COMM_LEN);
1773 	max_data->pid = tsk->pid;
1774 	/*
1775 	 * If tsk == current, then use current_uid(), as that does not use
1776 	 * RCU. The irq tracer can be called out of RCU scope.
1777 	 */
1778 	if (tsk == current)
1779 		max_data->uid = current_uid();
1780 	else
1781 		max_data->uid = task_uid(tsk);
1782 
1783 	max_data->nice = tsk->static_prio - 20 - MAX_RT_PRIO;
1784 	max_data->policy = tsk->policy;
1785 	max_data->rt_priority = tsk->rt_priority;
1786 
1787 	/* record this tasks comm */
1788 	tracing_record_cmdline(tsk);
1789 	latency_fsnotify(tr);
1790 }
1791 
1792 /**
1793  * update_max_tr - snapshot all trace buffers from global_trace to max_tr
1794  * @tr: tracer
1795  * @tsk: the task with the latency
1796  * @cpu: The cpu that initiated the trace.
1797  * @cond_data: User data associated with a conditional snapshot
1798  *
1799  * Flip the buffers between the @tr and the max_tr and record information
1800  * about which task was the cause of this latency.
1801  */
1802 void
1803 update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu,
1804 	      void *cond_data)
1805 {
1806 	if (tr->stop_count)
1807 		return;
1808 
1809 	WARN_ON_ONCE(!irqs_disabled());
1810 
1811 	if (!tr->allocated_snapshot) {
1812 		/* Only the nop tracer should hit this when disabling */
1813 		WARN_ON_ONCE(tr->current_trace != &nop_trace);
1814 		return;
1815 	}
1816 
1817 	arch_spin_lock(&tr->max_lock);
1818 
1819 	/* Inherit the recordable setting from array_buffer */
1820 	if (ring_buffer_record_is_set_on(tr->array_buffer.buffer))
1821 		ring_buffer_record_on(tr->max_buffer.buffer);
1822 	else
1823 		ring_buffer_record_off(tr->max_buffer.buffer);
1824 
1825 #ifdef CONFIG_TRACER_SNAPSHOT
1826 	if (tr->cond_snapshot && !tr->cond_snapshot->update(tr, cond_data))
1827 		goto out_unlock;
1828 #endif
1829 	swap(tr->array_buffer.buffer, tr->max_buffer.buffer);
1830 
1831 	__update_max_tr(tr, tsk, cpu);
1832 
1833  out_unlock:
1834 	arch_spin_unlock(&tr->max_lock);
1835 }
1836 
1837 /**
1838  * update_max_tr_single - only copy one trace over, and reset the rest
1839  * @tr: tracer
1840  * @tsk: task with the latency
1841  * @cpu: the cpu of the buffer to copy.
1842  *
1843  * Flip the trace of a single CPU buffer between the @tr and the max_tr.
1844  */
1845 void
1846 update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu)
1847 {
1848 	int ret;
1849 
1850 	if (tr->stop_count)
1851 		return;
1852 
1853 	WARN_ON_ONCE(!irqs_disabled());
1854 	if (!tr->allocated_snapshot) {
1855 		/* Only the nop tracer should hit this when disabling */
1856 		WARN_ON_ONCE(tr->current_trace != &nop_trace);
1857 		return;
1858 	}
1859 
1860 	arch_spin_lock(&tr->max_lock);
1861 
1862 	ret = ring_buffer_swap_cpu(tr->max_buffer.buffer, tr->array_buffer.buffer, cpu);
1863 
1864 	if (ret == -EBUSY) {
1865 		/*
1866 		 * We failed to swap the buffer due to a commit taking
1867 		 * place on this CPU. We fail to record, but we reset
1868 		 * the max trace buffer (no one writes directly to it)
1869 		 * and flag that it failed.
1870 		 */
1871 		trace_array_printk_buf(tr->max_buffer.buffer, _THIS_IP_,
1872 			"Failed to swap buffers due to commit in progress\n");
1873 	}
1874 
1875 	WARN_ON_ONCE(ret && ret != -EAGAIN && ret != -EBUSY);
1876 
1877 	__update_max_tr(tr, tsk, cpu);
1878 	arch_spin_unlock(&tr->max_lock);
1879 }
1880 #endif /* CONFIG_TRACER_MAX_TRACE */
1881 
1882 static int wait_on_pipe(struct trace_iterator *iter, int full)
1883 {
1884 	/* Iterators are static, they should be filled or empty */
1885 	if (trace_buffer_iter(iter, iter->cpu_file))
1886 		return 0;
1887 
1888 	return ring_buffer_wait(iter->array_buffer->buffer, iter->cpu_file,
1889 				full);
1890 }
1891 
1892 #ifdef CONFIG_FTRACE_STARTUP_TEST
1893 static bool selftests_can_run;
1894 
1895 struct trace_selftests {
1896 	struct list_head		list;
1897 	struct tracer			*type;
1898 };
1899 
1900 static LIST_HEAD(postponed_selftests);
1901 
1902 static int save_selftest(struct tracer *type)
1903 {
1904 	struct trace_selftests *selftest;
1905 
1906 	selftest = kmalloc(sizeof(*selftest), GFP_KERNEL);
1907 	if (!selftest)
1908 		return -ENOMEM;
1909 
1910 	selftest->type = type;
1911 	list_add(&selftest->list, &postponed_selftests);
1912 	return 0;
1913 }
1914 
1915 static int run_tracer_selftest(struct tracer *type)
1916 {
1917 	struct trace_array *tr = &global_trace;
1918 	struct tracer *saved_tracer = tr->current_trace;
1919 	int ret;
1920 
1921 	if (!type->selftest || tracing_selftest_disabled)
1922 		return 0;
1923 
1924 	/*
1925 	 * If a tracer registers early in boot up (before scheduling is
1926 	 * initialized and such), then do not run its selftests yet.
1927 	 * Instead, run it a little later in the boot process.
1928 	 */
1929 	if (!selftests_can_run)
1930 		return save_selftest(type);
1931 
1932 	if (!tracing_is_on()) {
1933 		pr_warn("Selftest for tracer %s skipped due to tracing disabled\n",
1934 			type->name);
1935 		return 0;
1936 	}
1937 
1938 	/*
1939 	 * Run a selftest on this tracer.
1940 	 * Here we reset the trace buffer, and set the current
1941 	 * tracer to be this tracer. The tracer can then run some
1942 	 * internal tracing to verify that everything is in order.
1943 	 * If we fail, we do not register this tracer.
1944 	 */
1945 	tracing_reset_online_cpus(&tr->array_buffer);
1946 
1947 	tr->current_trace = type;
1948 
1949 #ifdef CONFIG_TRACER_MAX_TRACE
1950 	if (type->use_max_tr) {
1951 		/* If we expanded the buffers, make sure the max is expanded too */
1952 		if (ring_buffer_expanded)
1953 			ring_buffer_resize(tr->max_buffer.buffer, trace_buf_size,
1954 					   RING_BUFFER_ALL_CPUS);
1955 		tr->allocated_snapshot = true;
1956 	}
1957 #endif
1958 
1959 	/* the test is responsible for initializing and enabling */
1960 	pr_info("Testing tracer %s: ", type->name);
1961 	ret = type->selftest(type, tr);
1962 	/* the test is responsible for resetting too */
1963 	tr->current_trace = saved_tracer;
1964 	if (ret) {
1965 		printk(KERN_CONT "FAILED!\n");
1966 		/* Add the warning after printing 'FAILED' */
1967 		WARN_ON(1);
1968 		return -1;
1969 	}
1970 	/* Only reset on passing, to avoid touching corrupted buffers */
1971 	tracing_reset_online_cpus(&tr->array_buffer);
1972 
1973 #ifdef CONFIG_TRACER_MAX_TRACE
1974 	if (type->use_max_tr) {
1975 		tr->allocated_snapshot = false;
1976 
1977 		/* Shrink the max buffer again */
1978 		if (ring_buffer_expanded)
1979 			ring_buffer_resize(tr->max_buffer.buffer, 1,
1980 					   RING_BUFFER_ALL_CPUS);
1981 	}
1982 #endif
1983 
1984 	printk(KERN_CONT "PASSED\n");
1985 	return 0;
1986 }
1987 
1988 static __init int init_trace_selftests(void)
1989 {
1990 	struct trace_selftests *p, *n;
1991 	struct tracer *t, **last;
1992 	int ret;
1993 
1994 	selftests_can_run = true;
1995 
1996 	mutex_lock(&trace_types_lock);
1997 
1998 	if (list_empty(&postponed_selftests))
1999 		goto out;
2000 
2001 	pr_info("Running postponed tracer tests:\n");
2002 
2003 	tracing_selftest_running = true;
2004 	list_for_each_entry_safe(p, n, &postponed_selftests, list) {
2005 		/* This loop can take minutes when sanitizers are enabled, so
2006 		 * lets make sure we allow RCU processing.
2007 		 */
2008 		cond_resched();
2009 		ret = run_tracer_selftest(p->type);
2010 		/* If the test fails, then warn and remove from available_tracers */
2011 		if (ret < 0) {
2012 			WARN(1, "tracer: %s failed selftest, disabling\n",
2013 			     p->type->name);
2014 			last = &trace_types;
2015 			for (t = trace_types; t; t = t->next) {
2016 				if (t == p->type) {
2017 					*last = t->next;
2018 					break;
2019 				}
2020 				last = &t->next;
2021 			}
2022 		}
2023 		list_del(&p->list);
2024 		kfree(p);
2025 	}
2026 	tracing_selftest_running = false;
2027 
2028  out:
2029 	mutex_unlock(&trace_types_lock);
2030 
2031 	return 0;
2032 }
2033 core_initcall(init_trace_selftests);
2034 #else
2035 static inline int run_tracer_selftest(struct tracer *type)
2036 {
2037 	return 0;
2038 }
2039 #endif /* CONFIG_FTRACE_STARTUP_TEST */
2040 
2041 static void add_tracer_options(struct trace_array *tr, struct tracer *t);
2042 
2043 static void __init apply_trace_boot_options(void);
2044 
2045 /**
2046  * register_tracer - register a tracer with the ftrace system.
2047  * @type: the plugin for the tracer
2048  *
2049  * Register a new plugin tracer.
2050  */
2051 int __init register_tracer(struct tracer *type)
2052 {
2053 	struct tracer *t;
2054 	int ret = 0;
2055 
2056 	if (!type->name) {
2057 		pr_info("Tracer must have a name\n");
2058 		return -1;
2059 	}
2060 
2061 	if (strlen(type->name) >= MAX_TRACER_SIZE) {
2062 		pr_info("Tracer has a name longer than %d\n", MAX_TRACER_SIZE);
2063 		return -1;
2064 	}
2065 
2066 	if (security_locked_down(LOCKDOWN_TRACEFS)) {
2067 		pr_warn("Can not register tracer %s due to lockdown\n",
2068 			   type->name);
2069 		return -EPERM;
2070 	}
2071 
2072 	mutex_lock(&trace_types_lock);
2073 
2074 	tracing_selftest_running = true;
2075 
2076 	for (t = trace_types; t; t = t->next) {
2077 		if (strcmp(type->name, t->name) == 0) {
2078 			/* already found */
2079 			pr_info("Tracer %s already registered\n",
2080 				type->name);
2081 			ret = -1;
2082 			goto out;
2083 		}
2084 	}
2085 
2086 	if (!type->set_flag)
2087 		type->set_flag = &dummy_set_flag;
2088 	if (!type->flags) {
2089 		/*allocate a dummy tracer_flags*/
2090 		type->flags = kmalloc(sizeof(*type->flags), GFP_KERNEL);
2091 		if (!type->flags) {
2092 			ret = -ENOMEM;
2093 			goto out;
2094 		}
2095 		type->flags->val = 0;
2096 		type->flags->opts = dummy_tracer_opt;
2097 	} else
2098 		if (!type->flags->opts)
2099 			type->flags->opts = dummy_tracer_opt;
2100 
2101 	/* store the tracer for __set_tracer_option */
2102 	type->flags->trace = type;
2103 
2104 	ret = run_tracer_selftest(type);
2105 	if (ret < 0)
2106 		goto out;
2107 
2108 	type->next = trace_types;
2109 	trace_types = type;
2110 	add_tracer_options(&global_trace, type);
2111 
2112  out:
2113 	tracing_selftest_running = false;
2114 	mutex_unlock(&trace_types_lock);
2115 
2116 	if (ret || !default_bootup_tracer)
2117 		goto out_unlock;
2118 
2119 	if (strncmp(default_bootup_tracer, type->name, MAX_TRACER_SIZE))
2120 		goto out_unlock;
2121 
2122 	printk(KERN_INFO "Starting tracer '%s'\n", type->name);
2123 	/* Do we want this tracer to start on bootup? */
2124 	tracing_set_tracer(&global_trace, type->name);
2125 	default_bootup_tracer = NULL;
2126 
2127 	apply_trace_boot_options();
2128 
2129 	/* disable other selftests, since this will break it. */
2130 	disable_tracing_selftest("running a tracer");
2131 
2132  out_unlock:
2133 	return ret;
2134 }
2135 
2136 static void tracing_reset_cpu(struct array_buffer *buf, int cpu)
2137 {
2138 	struct trace_buffer *buffer = buf->buffer;
2139 
2140 	if (!buffer)
2141 		return;
2142 
2143 	ring_buffer_record_disable(buffer);
2144 
2145 	/* Make sure all commits have finished */
2146 	synchronize_rcu();
2147 	ring_buffer_reset_cpu(buffer, cpu);
2148 
2149 	ring_buffer_record_enable(buffer);
2150 }
2151 
2152 void tracing_reset_online_cpus(struct array_buffer *buf)
2153 {
2154 	struct trace_buffer *buffer = buf->buffer;
2155 
2156 	if (!buffer)
2157 		return;
2158 
2159 	ring_buffer_record_disable(buffer);
2160 
2161 	/* Make sure all commits have finished */
2162 	synchronize_rcu();
2163 
2164 	buf->time_start = buffer_ftrace_now(buf, buf->cpu);
2165 
2166 	ring_buffer_reset_online_cpus(buffer);
2167 
2168 	ring_buffer_record_enable(buffer);
2169 }
2170 
2171 /* Must have trace_types_lock held */
2172 void tracing_reset_all_online_cpus(void)
2173 {
2174 	struct trace_array *tr;
2175 
2176 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
2177 		if (!tr->clear_trace)
2178 			continue;
2179 		tr->clear_trace = false;
2180 		tracing_reset_online_cpus(&tr->array_buffer);
2181 #ifdef CONFIG_TRACER_MAX_TRACE
2182 		tracing_reset_online_cpus(&tr->max_buffer);
2183 #endif
2184 	}
2185 }
2186 
2187 static int *tgid_map;
2188 
2189 #define SAVED_CMDLINES_DEFAULT 128
2190 #define NO_CMDLINE_MAP UINT_MAX
2191 static arch_spinlock_t trace_cmdline_lock = __ARCH_SPIN_LOCK_UNLOCKED;
2192 struct saved_cmdlines_buffer {
2193 	unsigned map_pid_to_cmdline[PID_MAX_DEFAULT+1];
2194 	unsigned *map_cmdline_to_pid;
2195 	unsigned cmdline_num;
2196 	int cmdline_idx;
2197 	char *saved_cmdlines;
2198 };
2199 static struct saved_cmdlines_buffer *savedcmd;
2200 
2201 /* temporary disable recording */
2202 static atomic_t trace_record_taskinfo_disabled __read_mostly;
2203 
2204 static inline char *get_saved_cmdlines(int idx)
2205 {
2206 	return &savedcmd->saved_cmdlines[idx * TASK_COMM_LEN];
2207 }
2208 
2209 static inline void set_cmdline(int idx, const char *cmdline)
2210 {
2211 	strncpy(get_saved_cmdlines(idx), cmdline, TASK_COMM_LEN);
2212 }
2213 
2214 static int allocate_cmdlines_buffer(unsigned int val,
2215 				    struct saved_cmdlines_buffer *s)
2216 {
2217 	s->map_cmdline_to_pid = kmalloc_array(val,
2218 					      sizeof(*s->map_cmdline_to_pid),
2219 					      GFP_KERNEL);
2220 	if (!s->map_cmdline_to_pid)
2221 		return -ENOMEM;
2222 
2223 	s->saved_cmdlines = kmalloc_array(TASK_COMM_LEN, val, GFP_KERNEL);
2224 	if (!s->saved_cmdlines) {
2225 		kfree(s->map_cmdline_to_pid);
2226 		return -ENOMEM;
2227 	}
2228 
2229 	s->cmdline_idx = 0;
2230 	s->cmdline_num = val;
2231 	memset(&s->map_pid_to_cmdline, NO_CMDLINE_MAP,
2232 	       sizeof(s->map_pid_to_cmdline));
2233 	memset(s->map_cmdline_to_pid, NO_CMDLINE_MAP,
2234 	       val * sizeof(*s->map_cmdline_to_pid));
2235 
2236 	return 0;
2237 }
2238 
2239 static int trace_create_savedcmd(void)
2240 {
2241 	int ret;
2242 
2243 	savedcmd = kmalloc(sizeof(*savedcmd), GFP_KERNEL);
2244 	if (!savedcmd)
2245 		return -ENOMEM;
2246 
2247 	ret = allocate_cmdlines_buffer(SAVED_CMDLINES_DEFAULT, savedcmd);
2248 	if (ret < 0) {
2249 		kfree(savedcmd);
2250 		savedcmd = NULL;
2251 		return -ENOMEM;
2252 	}
2253 
2254 	return 0;
2255 }
2256 
2257 int is_tracing_stopped(void)
2258 {
2259 	return global_trace.stop_count;
2260 }
2261 
2262 /**
2263  * tracing_start - quick start of the tracer
2264  *
2265  * If tracing is enabled but was stopped by tracing_stop,
2266  * this will start the tracer back up.
2267  */
2268 void tracing_start(void)
2269 {
2270 	struct trace_buffer *buffer;
2271 	unsigned long flags;
2272 
2273 	if (tracing_disabled)
2274 		return;
2275 
2276 	raw_spin_lock_irqsave(&global_trace.start_lock, flags);
2277 	if (--global_trace.stop_count) {
2278 		if (global_trace.stop_count < 0) {
2279 			/* Someone screwed up their debugging */
2280 			WARN_ON_ONCE(1);
2281 			global_trace.stop_count = 0;
2282 		}
2283 		goto out;
2284 	}
2285 
2286 	/* Prevent the buffers from switching */
2287 	arch_spin_lock(&global_trace.max_lock);
2288 
2289 	buffer = global_trace.array_buffer.buffer;
2290 	if (buffer)
2291 		ring_buffer_record_enable(buffer);
2292 
2293 #ifdef CONFIG_TRACER_MAX_TRACE
2294 	buffer = global_trace.max_buffer.buffer;
2295 	if (buffer)
2296 		ring_buffer_record_enable(buffer);
2297 #endif
2298 
2299 	arch_spin_unlock(&global_trace.max_lock);
2300 
2301  out:
2302 	raw_spin_unlock_irqrestore(&global_trace.start_lock, flags);
2303 }
2304 
2305 static void tracing_start_tr(struct trace_array *tr)
2306 {
2307 	struct trace_buffer *buffer;
2308 	unsigned long flags;
2309 
2310 	if (tracing_disabled)
2311 		return;
2312 
2313 	/* If global, we need to also start the max tracer */
2314 	if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
2315 		return tracing_start();
2316 
2317 	raw_spin_lock_irqsave(&tr->start_lock, flags);
2318 
2319 	if (--tr->stop_count) {
2320 		if (tr->stop_count < 0) {
2321 			/* Someone screwed up their debugging */
2322 			WARN_ON_ONCE(1);
2323 			tr->stop_count = 0;
2324 		}
2325 		goto out;
2326 	}
2327 
2328 	buffer = tr->array_buffer.buffer;
2329 	if (buffer)
2330 		ring_buffer_record_enable(buffer);
2331 
2332  out:
2333 	raw_spin_unlock_irqrestore(&tr->start_lock, flags);
2334 }
2335 
2336 /**
2337  * tracing_stop - quick stop of the tracer
2338  *
2339  * Light weight way to stop tracing. Use in conjunction with
2340  * tracing_start.
2341  */
2342 void tracing_stop(void)
2343 {
2344 	struct trace_buffer *buffer;
2345 	unsigned long flags;
2346 
2347 	raw_spin_lock_irqsave(&global_trace.start_lock, flags);
2348 	if (global_trace.stop_count++)
2349 		goto out;
2350 
2351 	/* Prevent the buffers from switching */
2352 	arch_spin_lock(&global_trace.max_lock);
2353 
2354 	buffer = global_trace.array_buffer.buffer;
2355 	if (buffer)
2356 		ring_buffer_record_disable(buffer);
2357 
2358 #ifdef CONFIG_TRACER_MAX_TRACE
2359 	buffer = global_trace.max_buffer.buffer;
2360 	if (buffer)
2361 		ring_buffer_record_disable(buffer);
2362 #endif
2363 
2364 	arch_spin_unlock(&global_trace.max_lock);
2365 
2366  out:
2367 	raw_spin_unlock_irqrestore(&global_trace.start_lock, flags);
2368 }
2369 
2370 static void tracing_stop_tr(struct trace_array *tr)
2371 {
2372 	struct trace_buffer *buffer;
2373 	unsigned long flags;
2374 
2375 	/* If global, we need to also stop the max tracer */
2376 	if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
2377 		return tracing_stop();
2378 
2379 	raw_spin_lock_irqsave(&tr->start_lock, flags);
2380 	if (tr->stop_count++)
2381 		goto out;
2382 
2383 	buffer = tr->array_buffer.buffer;
2384 	if (buffer)
2385 		ring_buffer_record_disable(buffer);
2386 
2387  out:
2388 	raw_spin_unlock_irqrestore(&tr->start_lock, flags);
2389 }
2390 
2391 static int trace_save_cmdline(struct task_struct *tsk)
2392 {
2393 	unsigned tpid, idx;
2394 
2395 	/* treat recording of idle task as a success */
2396 	if (!tsk->pid)
2397 		return 1;
2398 
2399 	tpid = tsk->pid & (PID_MAX_DEFAULT - 1);
2400 
2401 	/*
2402 	 * It's not the end of the world if we don't get
2403 	 * the lock, but we also don't want to spin
2404 	 * nor do we want to disable interrupts,
2405 	 * so if we miss here, then better luck next time.
2406 	 */
2407 	if (!arch_spin_trylock(&trace_cmdline_lock))
2408 		return 0;
2409 
2410 	idx = savedcmd->map_pid_to_cmdline[tpid];
2411 	if (idx == NO_CMDLINE_MAP) {
2412 		idx = (savedcmd->cmdline_idx + 1) % savedcmd->cmdline_num;
2413 
2414 		savedcmd->map_pid_to_cmdline[tpid] = idx;
2415 		savedcmd->cmdline_idx = idx;
2416 	}
2417 
2418 	savedcmd->map_cmdline_to_pid[idx] = tsk->pid;
2419 	set_cmdline(idx, tsk->comm);
2420 
2421 	arch_spin_unlock(&trace_cmdline_lock);
2422 
2423 	return 1;
2424 }
2425 
2426 static void __trace_find_cmdline(int pid, char comm[])
2427 {
2428 	unsigned map;
2429 	int tpid;
2430 
2431 	if (!pid) {
2432 		strcpy(comm, "<idle>");
2433 		return;
2434 	}
2435 
2436 	if (WARN_ON_ONCE(pid < 0)) {
2437 		strcpy(comm, "<XXX>");
2438 		return;
2439 	}
2440 
2441 	tpid = pid & (PID_MAX_DEFAULT - 1);
2442 	map = savedcmd->map_pid_to_cmdline[tpid];
2443 	if (map != NO_CMDLINE_MAP) {
2444 		tpid = savedcmd->map_cmdline_to_pid[map];
2445 		if (tpid == pid) {
2446 			strlcpy(comm, get_saved_cmdlines(map), TASK_COMM_LEN);
2447 			return;
2448 		}
2449 	}
2450 	strcpy(comm, "<...>");
2451 }
2452 
2453 void trace_find_cmdline(int pid, char comm[])
2454 {
2455 	preempt_disable();
2456 	arch_spin_lock(&trace_cmdline_lock);
2457 
2458 	__trace_find_cmdline(pid, comm);
2459 
2460 	arch_spin_unlock(&trace_cmdline_lock);
2461 	preempt_enable();
2462 }
2463 
2464 int trace_find_tgid(int pid)
2465 {
2466 	if (unlikely(!tgid_map || !pid || pid > PID_MAX_DEFAULT))
2467 		return 0;
2468 
2469 	return tgid_map[pid];
2470 }
2471 
2472 static int trace_save_tgid(struct task_struct *tsk)
2473 {
2474 	/* treat recording of idle task as a success */
2475 	if (!tsk->pid)
2476 		return 1;
2477 
2478 	if (unlikely(!tgid_map || tsk->pid > PID_MAX_DEFAULT))
2479 		return 0;
2480 
2481 	tgid_map[tsk->pid] = tsk->tgid;
2482 	return 1;
2483 }
2484 
2485 static bool tracing_record_taskinfo_skip(int flags)
2486 {
2487 	if (unlikely(!(flags & (TRACE_RECORD_CMDLINE | TRACE_RECORD_TGID))))
2488 		return true;
2489 	if (atomic_read(&trace_record_taskinfo_disabled) || !tracing_is_on())
2490 		return true;
2491 	if (!__this_cpu_read(trace_taskinfo_save))
2492 		return true;
2493 	return false;
2494 }
2495 
2496 /**
2497  * tracing_record_taskinfo - record the task info of a task
2498  *
2499  * @task:  task to record
2500  * @flags: TRACE_RECORD_CMDLINE for recording comm
2501  *         TRACE_RECORD_TGID for recording tgid
2502  */
2503 void tracing_record_taskinfo(struct task_struct *task, int flags)
2504 {
2505 	bool done;
2506 
2507 	if (tracing_record_taskinfo_skip(flags))
2508 		return;
2509 
2510 	/*
2511 	 * Record as much task information as possible. If some fail, continue
2512 	 * to try to record the others.
2513 	 */
2514 	done = !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(task);
2515 	done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(task);
2516 
2517 	/* If recording any information failed, retry again soon. */
2518 	if (!done)
2519 		return;
2520 
2521 	__this_cpu_write(trace_taskinfo_save, false);
2522 }
2523 
2524 /**
2525  * tracing_record_taskinfo_sched_switch - record task info for sched_switch
2526  *
2527  * @prev: previous task during sched_switch
2528  * @next: next task during sched_switch
2529  * @flags: TRACE_RECORD_CMDLINE for recording comm
2530  *         TRACE_RECORD_TGID for recording tgid
2531  */
2532 void tracing_record_taskinfo_sched_switch(struct task_struct *prev,
2533 					  struct task_struct *next, int flags)
2534 {
2535 	bool done;
2536 
2537 	if (tracing_record_taskinfo_skip(flags))
2538 		return;
2539 
2540 	/*
2541 	 * Record as much task information as possible. If some fail, continue
2542 	 * to try to record the others.
2543 	 */
2544 	done  = !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(prev);
2545 	done &= !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(next);
2546 	done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(prev);
2547 	done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(next);
2548 
2549 	/* If recording any information failed, retry again soon. */
2550 	if (!done)
2551 		return;
2552 
2553 	__this_cpu_write(trace_taskinfo_save, false);
2554 }
2555 
2556 /* Helpers to record a specific task information */
2557 void tracing_record_cmdline(struct task_struct *task)
2558 {
2559 	tracing_record_taskinfo(task, TRACE_RECORD_CMDLINE);
2560 }
2561 
2562 void tracing_record_tgid(struct task_struct *task)
2563 {
2564 	tracing_record_taskinfo(task, TRACE_RECORD_TGID);
2565 }
2566 
2567 /*
2568  * Several functions return TRACE_TYPE_PARTIAL_LINE if the trace_seq
2569  * overflowed, and TRACE_TYPE_HANDLED otherwise. This helper function
2570  * simplifies those functions and keeps them in sync.
2571  */
2572 enum print_line_t trace_handle_return(struct trace_seq *s)
2573 {
2574 	return trace_seq_has_overflowed(s) ?
2575 		TRACE_TYPE_PARTIAL_LINE : TRACE_TYPE_HANDLED;
2576 }
2577 EXPORT_SYMBOL_GPL(trace_handle_return);
2578 
2579 unsigned int tracing_gen_ctx_irq_test(unsigned int irqs_status)
2580 {
2581 	unsigned int trace_flags = irqs_status;
2582 	unsigned int pc;
2583 
2584 	pc = preempt_count();
2585 
2586 	if (pc & NMI_MASK)
2587 		trace_flags |= TRACE_FLAG_NMI;
2588 	if (pc & HARDIRQ_MASK)
2589 		trace_flags |= TRACE_FLAG_HARDIRQ;
2590 	if (in_serving_softirq())
2591 		trace_flags |= TRACE_FLAG_SOFTIRQ;
2592 
2593 	if (tif_need_resched())
2594 		trace_flags |= TRACE_FLAG_NEED_RESCHED;
2595 	if (test_preempt_need_resched())
2596 		trace_flags |= TRACE_FLAG_PREEMPT_RESCHED;
2597 	return (trace_flags << 16) | (pc & 0xff);
2598 }
2599 
2600 struct ring_buffer_event *
2601 trace_buffer_lock_reserve(struct trace_buffer *buffer,
2602 			  int type,
2603 			  unsigned long len,
2604 			  unsigned int trace_ctx)
2605 {
2606 	return __trace_buffer_lock_reserve(buffer, type, len, trace_ctx);
2607 }
2608 
2609 DEFINE_PER_CPU(struct ring_buffer_event *, trace_buffered_event);
2610 DEFINE_PER_CPU(int, trace_buffered_event_cnt);
2611 static int trace_buffered_event_ref;
2612 
2613 /**
2614  * trace_buffered_event_enable - enable buffering events
2615  *
2616  * When events are being filtered, it is quicker to use a temporary
2617  * buffer to write the event data into if there's a likely chance
2618  * that it will not be committed. The discard of the ring buffer
2619  * is not as fast as committing, and is much slower than copying
2620  * a commit.
2621  *
2622  * When an event is to be filtered, allocate per cpu buffers to
2623  * write the event data into, and if the event is filtered and discarded
2624  * it is simply dropped, otherwise, the entire data is to be committed
2625  * in one shot.
2626  */
2627 void trace_buffered_event_enable(void)
2628 {
2629 	struct ring_buffer_event *event;
2630 	struct page *page;
2631 	int cpu;
2632 
2633 	WARN_ON_ONCE(!mutex_is_locked(&event_mutex));
2634 
2635 	if (trace_buffered_event_ref++)
2636 		return;
2637 
2638 	for_each_tracing_cpu(cpu) {
2639 		page = alloc_pages_node(cpu_to_node(cpu),
2640 					GFP_KERNEL | __GFP_NORETRY, 0);
2641 		if (!page)
2642 			goto failed;
2643 
2644 		event = page_address(page);
2645 		memset(event, 0, sizeof(*event));
2646 
2647 		per_cpu(trace_buffered_event, cpu) = event;
2648 
2649 		preempt_disable();
2650 		if (cpu == smp_processor_id() &&
2651 		    __this_cpu_read(trace_buffered_event) !=
2652 		    per_cpu(trace_buffered_event, cpu))
2653 			WARN_ON_ONCE(1);
2654 		preempt_enable();
2655 	}
2656 
2657 	return;
2658  failed:
2659 	trace_buffered_event_disable();
2660 }
2661 
2662 static void enable_trace_buffered_event(void *data)
2663 {
2664 	/* Probably not needed, but do it anyway */
2665 	smp_rmb();
2666 	this_cpu_dec(trace_buffered_event_cnt);
2667 }
2668 
2669 static void disable_trace_buffered_event(void *data)
2670 {
2671 	this_cpu_inc(trace_buffered_event_cnt);
2672 }
2673 
2674 /**
2675  * trace_buffered_event_disable - disable buffering events
2676  *
2677  * When a filter is removed, it is faster to not use the buffered
2678  * events, and to commit directly into the ring buffer. Free up
2679  * the temp buffers when there are no more users. This requires
2680  * special synchronization with current events.
2681  */
2682 void trace_buffered_event_disable(void)
2683 {
2684 	int cpu;
2685 
2686 	WARN_ON_ONCE(!mutex_is_locked(&event_mutex));
2687 
2688 	if (WARN_ON_ONCE(!trace_buffered_event_ref))
2689 		return;
2690 
2691 	if (--trace_buffered_event_ref)
2692 		return;
2693 
2694 	preempt_disable();
2695 	/* For each CPU, set the buffer as used. */
2696 	smp_call_function_many(tracing_buffer_mask,
2697 			       disable_trace_buffered_event, NULL, 1);
2698 	preempt_enable();
2699 
2700 	/* Wait for all current users to finish */
2701 	synchronize_rcu();
2702 
2703 	for_each_tracing_cpu(cpu) {
2704 		free_page((unsigned long)per_cpu(trace_buffered_event, cpu));
2705 		per_cpu(trace_buffered_event, cpu) = NULL;
2706 	}
2707 	/*
2708 	 * Make sure trace_buffered_event is NULL before clearing
2709 	 * trace_buffered_event_cnt.
2710 	 */
2711 	smp_wmb();
2712 
2713 	preempt_disable();
2714 	/* Do the work on each cpu */
2715 	smp_call_function_many(tracing_buffer_mask,
2716 			       enable_trace_buffered_event, NULL, 1);
2717 	preempt_enable();
2718 }
2719 
2720 static struct trace_buffer *temp_buffer;
2721 
2722 struct ring_buffer_event *
2723 trace_event_buffer_lock_reserve(struct trace_buffer **current_rb,
2724 			  struct trace_event_file *trace_file,
2725 			  int type, unsigned long len,
2726 			  unsigned int trace_ctx)
2727 {
2728 	struct ring_buffer_event *entry;
2729 	struct trace_array *tr = trace_file->tr;
2730 	int val;
2731 
2732 	*current_rb = tr->array_buffer.buffer;
2733 
2734 	if (!tr->no_filter_buffering_ref &&
2735 	    (trace_file->flags & (EVENT_FILE_FL_SOFT_DISABLED | EVENT_FILE_FL_FILTERED)) &&
2736 	    (entry = this_cpu_read(trace_buffered_event))) {
2737 		/* Try to use the per cpu buffer first */
2738 		val = this_cpu_inc_return(trace_buffered_event_cnt);
2739 		if ((len < (PAGE_SIZE - sizeof(*entry))) && val == 1) {
2740 			trace_event_setup(entry, type, trace_ctx);
2741 			entry->array[0] = len;
2742 			return entry;
2743 		}
2744 		this_cpu_dec(trace_buffered_event_cnt);
2745 	}
2746 
2747 	entry = __trace_buffer_lock_reserve(*current_rb, type, len,
2748 					    trace_ctx);
2749 	/*
2750 	 * If tracing is off, but we have triggers enabled
2751 	 * we still need to look at the event data. Use the temp_buffer
2752 	 * to store the trace event for the trigger to use. It's recursive
2753 	 * safe and will not be recorded anywhere.
2754 	 */
2755 	if (!entry && trace_file->flags & EVENT_FILE_FL_TRIGGER_COND) {
2756 		*current_rb = temp_buffer;
2757 		entry = __trace_buffer_lock_reserve(*current_rb, type, len,
2758 						    trace_ctx);
2759 	}
2760 	return entry;
2761 }
2762 EXPORT_SYMBOL_GPL(trace_event_buffer_lock_reserve);
2763 
2764 static DEFINE_SPINLOCK(tracepoint_iter_lock);
2765 static DEFINE_MUTEX(tracepoint_printk_mutex);
2766 
2767 static void output_printk(struct trace_event_buffer *fbuffer)
2768 {
2769 	struct trace_event_call *event_call;
2770 	struct trace_event_file *file;
2771 	struct trace_event *event;
2772 	unsigned long flags;
2773 	struct trace_iterator *iter = tracepoint_print_iter;
2774 
2775 	/* We should never get here if iter is NULL */
2776 	if (WARN_ON_ONCE(!iter))
2777 		return;
2778 
2779 	event_call = fbuffer->trace_file->event_call;
2780 	if (!event_call || !event_call->event.funcs ||
2781 	    !event_call->event.funcs->trace)
2782 		return;
2783 
2784 	file = fbuffer->trace_file;
2785 	if (test_bit(EVENT_FILE_FL_SOFT_DISABLED_BIT, &file->flags) ||
2786 	    (unlikely(file->flags & EVENT_FILE_FL_FILTERED) &&
2787 	     !filter_match_preds(file->filter, fbuffer->entry)))
2788 		return;
2789 
2790 	event = &fbuffer->trace_file->event_call->event;
2791 
2792 	spin_lock_irqsave(&tracepoint_iter_lock, flags);
2793 	trace_seq_init(&iter->seq);
2794 	iter->ent = fbuffer->entry;
2795 	event_call->event.funcs->trace(iter, 0, event);
2796 	trace_seq_putc(&iter->seq, 0);
2797 	printk("%s", iter->seq.buffer);
2798 
2799 	spin_unlock_irqrestore(&tracepoint_iter_lock, flags);
2800 }
2801 
2802 int tracepoint_printk_sysctl(struct ctl_table *table, int write,
2803 			     void *buffer, size_t *lenp,
2804 			     loff_t *ppos)
2805 {
2806 	int save_tracepoint_printk;
2807 	int ret;
2808 
2809 	mutex_lock(&tracepoint_printk_mutex);
2810 	save_tracepoint_printk = tracepoint_printk;
2811 
2812 	ret = proc_dointvec(table, write, buffer, lenp, ppos);
2813 
2814 	/*
2815 	 * This will force exiting early, as tracepoint_printk
2816 	 * is always zero when tracepoint_printk_iter is not allocated
2817 	 */
2818 	if (!tracepoint_print_iter)
2819 		tracepoint_printk = 0;
2820 
2821 	if (save_tracepoint_printk == tracepoint_printk)
2822 		goto out;
2823 
2824 	if (tracepoint_printk)
2825 		static_key_enable(&tracepoint_printk_key.key);
2826 	else
2827 		static_key_disable(&tracepoint_printk_key.key);
2828 
2829  out:
2830 	mutex_unlock(&tracepoint_printk_mutex);
2831 
2832 	return ret;
2833 }
2834 
2835 void trace_event_buffer_commit(struct trace_event_buffer *fbuffer)
2836 {
2837 	if (static_key_false(&tracepoint_printk_key.key))
2838 		output_printk(fbuffer);
2839 
2840 	if (static_branch_unlikely(&trace_event_exports_enabled))
2841 		ftrace_exports(fbuffer->event, TRACE_EXPORT_EVENT);
2842 	event_trigger_unlock_commit_regs(fbuffer->trace_file, fbuffer->buffer,
2843 				    fbuffer->event, fbuffer->entry,
2844 				    fbuffer->trace_ctx, fbuffer->regs);
2845 }
2846 EXPORT_SYMBOL_GPL(trace_event_buffer_commit);
2847 
2848 /*
2849  * Skip 3:
2850  *
2851  *   trace_buffer_unlock_commit_regs()
2852  *   trace_event_buffer_commit()
2853  *   trace_event_raw_event_xxx()
2854  */
2855 # define STACK_SKIP 3
2856 
2857 void trace_buffer_unlock_commit_regs(struct trace_array *tr,
2858 				     struct trace_buffer *buffer,
2859 				     struct ring_buffer_event *event,
2860 				     unsigned int trace_ctx,
2861 				     struct pt_regs *regs)
2862 {
2863 	__buffer_unlock_commit(buffer, event);
2864 
2865 	/*
2866 	 * If regs is not set, then skip the necessary functions.
2867 	 * Note, we can still get here via blktrace, wakeup tracer
2868 	 * and mmiotrace, but that's ok if they lose a function or
2869 	 * two. They are not that meaningful.
2870 	 */
2871 	ftrace_trace_stack(tr, buffer, trace_ctx, regs ? 0 : STACK_SKIP, regs);
2872 	ftrace_trace_userstack(tr, buffer, trace_ctx);
2873 }
2874 
2875 /*
2876  * Similar to trace_buffer_unlock_commit_regs() but do not dump stack.
2877  */
2878 void
2879 trace_buffer_unlock_commit_nostack(struct trace_buffer *buffer,
2880 				   struct ring_buffer_event *event)
2881 {
2882 	__buffer_unlock_commit(buffer, event);
2883 }
2884 
2885 void
2886 trace_function(struct trace_array *tr, unsigned long ip, unsigned long
2887 	       parent_ip, unsigned int trace_ctx)
2888 {
2889 	struct trace_event_call *call = &event_function;
2890 	struct trace_buffer *buffer = tr->array_buffer.buffer;
2891 	struct ring_buffer_event *event;
2892 	struct ftrace_entry *entry;
2893 
2894 	event = __trace_buffer_lock_reserve(buffer, TRACE_FN, sizeof(*entry),
2895 					    trace_ctx);
2896 	if (!event)
2897 		return;
2898 	entry	= ring_buffer_event_data(event);
2899 	entry->ip			= ip;
2900 	entry->parent_ip		= parent_ip;
2901 
2902 	if (!call_filter_check_discard(call, entry, buffer, event)) {
2903 		if (static_branch_unlikely(&trace_function_exports_enabled))
2904 			ftrace_exports(event, TRACE_EXPORT_FUNCTION);
2905 		__buffer_unlock_commit(buffer, event);
2906 	}
2907 }
2908 
2909 #ifdef CONFIG_STACKTRACE
2910 
2911 /* Allow 4 levels of nesting: normal, softirq, irq, NMI */
2912 #define FTRACE_KSTACK_NESTING	4
2913 
2914 #define FTRACE_KSTACK_ENTRIES	(PAGE_SIZE / FTRACE_KSTACK_NESTING)
2915 
2916 struct ftrace_stack {
2917 	unsigned long		calls[FTRACE_KSTACK_ENTRIES];
2918 };
2919 
2920 
2921 struct ftrace_stacks {
2922 	struct ftrace_stack	stacks[FTRACE_KSTACK_NESTING];
2923 };
2924 
2925 static DEFINE_PER_CPU(struct ftrace_stacks, ftrace_stacks);
2926 static DEFINE_PER_CPU(int, ftrace_stack_reserve);
2927 
2928 static void __ftrace_trace_stack(struct trace_buffer *buffer,
2929 				 unsigned int trace_ctx,
2930 				 int skip, struct pt_regs *regs)
2931 {
2932 	struct trace_event_call *call = &event_kernel_stack;
2933 	struct ring_buffer_event *event;
2934 	unsigned int size, nr_entries;
2935 	struct ftrace_stack *fstack;
2936 	struct stack_entry *entry;
2937 	int stackidx;
2938 
2939 	/*
2940 	 * Add one, for this function and the call to save_stack_trace()
2941 	 * If regs is set, then these functions will not be in the way.
2942 	 */
2943 #ifndef CONFIG_UNWINDER_ORC
2944 	if (!regs)
2945 		skip++;
2946 #endif
2947 
2948 	preempt_disable_notrace();
2949 
2950 	stackidx = __this_cpu_inc_return(ftrace_stack_reserve) - 1;
2951 
2952 	/* This should never happen. If it does, yell once and skip */
2953 	if (WARN_ON_ONCE(stackidx >= FTRACE_KSTACK_NESTING))
2954 		goto out;
2955 
2956 	/*
2957 	 * The above __this_cpu_inc_return() is 'atomic' cpu local. An
2958 	 * interrupt will either see the value pre increment or post
2959 	 * increment. If the interrupt happens pre increment it will have
2960 	 * restored the counter when it returns.  We just need a barrier to
2961 	 * keep gcc from moving things around.
2962 	 */
2963 	barrier();
2964 
2965 	fstack = this_cpu_ptr(ftrace_stacks.stacks) + stackidx;
2966 	size = ARRAY_SIZE(fstack->calls);
2967 
2968 	if (regs) {
2969 		nr_entries = stack_trace_save_regs(regs, fstack->calls,
2970 						   size, skip);
2971 	} else {
2972 		nr_entries = stack_trace_save(fstack->calls, size, skip);
2973 	}
2974 
2975 	size = nr_entries * sizeof(unsigned long);
2976 	event = __trace_buffer_lock_reserve(buffer, TRACE_STACK,
2977 				    (sizeof(*entry) - sizeof(entry->caller)) + size,
2978 				    trace_ctx);
2979 	if (!event)
2980 		goto out;
2981 	entry = ring_buffer_event_data(event);
2982 
2983 	memcpy(&entry->caller, fstack->calls, size);
2984 	entry->size = nr_entries;
2985 
2986 	if (!call_filter_check_discard(call, entry, buffer, event))
2987 		__buffer_unlock_commit(buffer, event);
2988 
2989  out:
2990 	/* Again, don't let gcc optimize things here */
2991 	barrier();
2992 	__this_cpu_dec(ftrace_stack_reserve);
2993 	preempt_enable_notrace();
2994 
2995 }
2996 
2997 static inline void ftrace_trace_stack(struct trace_array *tr,
2998 				      struct trace_buffer *buffer,
2999 				      unsigned int trace_ctx,
3000 				      int skip, struct pt_regs *regs)
3001 {
3002 	if (!(tr->trace_flags & TRACE_ITER_STACKTRACE))
3003 		return;
3004 
3005 	__ftrace_trace_stack(buffer, trace_ctx, skip, regs);
3006 }
3007 
3008 void __trace_stack(struct trace_array *tr, unsigned int trace_ctx,
3009 		   int skip)
3010 {
3011 	struct trace_buffer *buffer = tr->array_buffer.buffer;
3012 
3013 	if (rcu_is_watching()) {
3014 		__ftrace_trace_stack(buffer, trace_ctx, skip, NULL);
3015 		return;
3016 	}
3017 
3018 	/*
3019 	 * When an NMI triggers, RCU is enabled via rcu_nmi_enter(),
3020 	 * but if the above rcu_is_watching() failed, then the NMI
3021 	 * triggered someplace critical, and rcu_irq_enter() should
3022 	 * not be called from NMI.
3023 	 */
3024 	if (unlikely(in_nmi()))
3025 		return;
3026 
3027 	rcu_irq_enter_irqson();
3028 	__ftrace_trace_stack(buffer, trace_ctx, skip, NULL);
3029 	rcu_irq_exit_irqson();
3030 }
3031 
3032 /**
3033  * trace_dump_stack - record a stack back trace in the trace buffer
3034  * @skip: Number of functions to skip (helper handlers)
3035  */
3036 void trace_dump_stack(int skip)
3037 {
3038 	if (tracing_disabled || tracing_selftest_running)
3039 		return;
3040 
3041 #ifndef CONFIG_UNWINDER_ORC
3042 	/* Skip 1 to skip this function. */
3043 	skip++;
3044 #endif
3045 	__ftrace_trace_stack(global_trace.array_buffer.buffer,
3046 			     tracing_gen_ctx(), skip, NULL);
3047 }
3048 EXPORT_SYMBOL_GPL(trace_dump_stack);
3049 
3050 #ifdef CONFIG_USER_STACKTRACE_SUPPORT
3051 static DEFINE_PER_CPU(int, user_stack_count);
3052 
3053 static void
3054 ftrace_trace_userstack(struct trace_array *tr,
3055 		       struct trace_buffer *buffer, unsigned int trace_ctx)
3056 {
3057 	struct trace_event_call *call = &event_user_stack;
3058 	struct ring_buffer_event *event;
3059 	struct userstack_entry *entry;
3060 
3061 	if (!(tr->trace_flags & TRACE_ITER_USERSTACKTRACE))
3062 		return;
3063 
3064 	/*
3065 	 * NMIs can not handle page faults, even with fix ups.
3066 	 * The save user stack can (and often does) fault.
3067 	 */
3068 	if (unlikely(in_nmi()))
3069 		return;
3070 
3071 	/*
3072 	 * prevent recursion, since the user stack tracing may
3073 	 * trigger other kernel events.
3074 	 */
3075 	preempt_disable();
3076 	if (__this_cpu_read(user_stack_count))
3077 		goto out;
3078 
3079 	__this_cpu_inc(user_stack_count);
3080 
3081 	event = __trace_buffer_lock_reserve(buffer, TRACE_USER_STACK,
3082 					    sizeof(*entry), trace_ctx);
3083 	if (!event)
3084 		goto out_drop_count;
3085 	entry	= ring_buffer_event_data(event);
3086 
3087 	entry->tgid		= current->tgid;
3088 	memset(&entry->caller, 0, sizeof(entry->caller));
3089 
3090 	stack_trace_save_user(entry->caller, FTRACE_STACK_ENTRIES);
3091 	if (!call_filter_check_discard(call, entry, buffer, event))
3092 		__buffer_unlock_commit(buffer, event);
3093 
3094  out_drop_count:
3095 	__this_cpu_dec(user_stack_count);
3096  out:
3097 	preempt_enable();
3098 }
3099 #else /* CONFIG_USER_STACKTRACE_SUPPORT */
3100 static void ftrace_trace_userstack(struct trace_array *tr,
3101 				   struct trace_buffer *buffer,
3102 				   unsigned int trace_ctx)
3103 {
3104 }
3105 #endif /* !CONFIG_USER_STACKTRACE_SUPPORT */
3106 
3107 #endif /* CONFIG_STACKTRACE */
3108 
3109 static inline void
3110 func_repeats_set_delta_ts(struct func_repeats_entry *entry,
3111 			  unsigned long long delta)
3112 {
3113 	entry->bottom_delta_ts = delta & U32_MAX;
3114 	entry->top_delta_ts = (delta >> 32);
3115 }
3116 
3117 void trace_last_func_repeats(struct trace_array *tr,
3118 			     struct trace_func_repeats *last_info,
3119 			     unsigned int trace_ctx)
3120 {
3121 	struct trace_buffer *buffer = tr->array_buffer.buffer;
3122 	struct func_repeats_entry *entry;
3123 	struct ring_buffer_event *event;
3124 	u64 delta;
3125 
3126 	event = __trace_buffer_lock_reserve(buffer, TRACE_FUNC_REPEATS,
3127 					    sizeof(*entry), trace_ctx);
3128 	if (!event)
3129 		return;
3130 
3131 	delta = ring_buffer_event_time_stamp(buffer, event) -
3132 		last_info->ts_last_call;
3133 
3134 	entry = ring_buffer_event_data(event);
3135 	entry->ip = last_info->ip;
3136 	entry->parent_ip = last_info->parent_ip;
3137 	entry->count = last_info->count;
3138 	func_repeats_set_delta_ts(entry, delta);
3139 
3140 	__buffer_unlock_commit(buffer, event);
3141 }
3142 
3143 /* created for use with alloc_percpu */
3144 struct trace_buffer_struct {
3145 	int nesting;
3146 	char buffer[4][TRACE_BUF_SIZE];
3147 };
3148 
3149 static struct trace_buffer_struct *trace_percpu_buffer;
3150 
3151 /*
3152  * This allows for lockless recording.  If we're nested too deeply, then
3153  * this returns NULL.
3154  */
3155 static char *get_trace_buf(void)
3156 {
3157 	struct trace_buffer_struct *buffer = this_cpu_ptr(trace_percpu_buffer);
3158 
3159 	if (!buffer || buffer->nesting >= 4)
3160 		return NULL;
3161 
3162 	buffer->nesting++;
3163 
3164 	/* Interrupts must see nesting incremented before we use the buffer */
3165 	barrier();
3166 	return &buffer->buffer[buffer->nesting - 1][0];
3167 }
3168 
3169 static void put_trace_buf(void)
3170 {
3171 	/* Don't let the decrement of nesting leak before this */
3172 	barrier();
3173 	this_cpu_dec(trace_percpu_buffer->nesting);
3174 }
3175 
3176 static int alloc_percpu_trace_buffer(void)
3177 {
3178 	struct trace_buffer_struct *buffers;
3179 
3180 	if (trace_percpu_buffer)
3181 		return 0;
3182 
3183 	buffers = alloc_percpu(struct trace_buffer_struct);
3184 	if (MEM_FAIL(!buffers, "Could not allocate percpu trace_printk buffer"))
3185 		return -ENOMEM;
3186 
3187 	trace_percpu_buffer = buffers;
3188 	return 0;
3189 }
3190 
3191 static int buffers_allocated;
3192 
3193 void trace_printk_init_buffers(void)
3194 {
3195 	if (buffers_allocated)
3196 		return;
3197 
3198 	if (alloc_percpu_trace_buffer())
3199 		return;
3200 
3201 	/* trace_printk() is for debug use only. Don't use it in production. */
3202 
3203 	pr_warn("\n");
3204 	pr_warn("**********************************************************\n");
3205 	pr_warn("**   NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE   **\n");
3206 	pr_warn("**                                                      **\n");
3207 	pr_warn("** trace_printk() being used. Allocating extra memory.  **\n");
3208 	pr_warn("**                                                      **\n");
3209 	pr_warn("** This means that this is a DEBUG kernel and it is     **\n");
3210 	pr_warn("** unsafe for production use.                           **\n");
3211 	pr_warn("**                                                      **\n");
3212 	pr_warn("** If you see this message and you are not debugging    **\n");
3213 	pr_warn("** the kernel, report this immediately to your vendor!  **\n");
3214 	pr_warn("**                                                      **\n");
3215 	pr_warn("**   NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE   **\n");
3216 	pr_warn("**********************************************************\n");
3217 
3218 	/* Expand the buffers to set size */
3219 	tracing_update_buffers();
3220 
3221 	buffers_allocated = 1;
3222 
3223 	/*
3224 	 * trace_printk_init_buffers() can be called by modules.
3225 	 * If that happens, then we need to start cmdline recording
3226 	 * directly here. If the global_trace.buffer is already
3227 	 * allocated here, then this was called by module code.
3228 	 */
3229 	if (global_trace.array_buffer.buffer)
3230 		tracing_start_cmdline_record();
3231 }
3232 EXPORT_SYMBOL_GPL(trace_printk_init_buffers);
3233 
3234 void trace_printk_start_comm(void)
3235 {
3236 	/* Start tracing comms if trace printk is set */
3237 	if (!buffers_allocated)
3238 		return;
3239 	tracing_start_cmdline_record();
3240 }
3241 
3242 static void trace_printk_start_stop_comm(int enabled)
3243 {
3244 	if (!buffers_allocated)
3245 		return;
3246 
3247 	if (enabled)
3248 		tracing_start_cmdline_record();
3249 	else
3250 		tracing_stop_cmdline_record();
3251 }
3252 
3253 /**
3254  * trace_vbprintk - write binary msg to tracing buffer
3255  * @ip:    The address of the caller
3256  * @fmt:   The string format to write to the buffer
3257  * @args:  Arguments for @fmt
3258  */
3259 int trace_vbprintk(unsigned long ip, const char *fmt, va_list args)
3260 {
3261 	struct trace_event_call *call = &event_bprint;
3262 	struct ring_buffer_event *event;
3263 	struct trace_buffer *buffer;
3264 	struct trace_array *tr = &global_trace;
3265 	struct bprint_entry *entry;
3266 	unsigned int trace_ctx;
3267 	char *tbuffer;
3268 	int len = 0, size;
3269 
3270 	if (unlikely(tracing_selftest_running || tracing_disabled))
3271 		return 0;
3272 
3273 	/* Don't pollute graph traces with trace_vprintk internals */
3274 	pause_graph_tracing();
3275 
3276 	trace_ctx = tracing_gen_ctx();
3277 	preempt_disable_notrace();
3278 
3279 	tbuffer = get_trace_buf();
3280 	if (!tbuffer) {
3281 		len = 0;
3282 		goto out_nobuffer;
3283 	}
3284 
3285 	len = vbin_printf((u32 *)tbuffer, TRACE_BUF_SIZE/sizeof(int), fmt, args);
3286 
3287 	if (len > TRACE_BUF_SIZE/sizeof(int) || len < 0)
3288 		goto out_put;
3289 
3290 	size = sizeof(*entry) + sizeof(u32) * len;
3291 	buffer = tr->array_buffer.buffer;
3292 	ring_buffer_nest_start(buffer);
3293 	event = __trace_buffer_lock_reserve(buffer, TRACE_BPRINT, size,
3294 					    trace_ctx);
3295 	if (!event)
3296 		goto out;
3297 	entry = ring_buffer_event_data(event);
3298 	entry->ip			= ip;
3299 	entry->fmt			= fmt;
3300 
3301 	memcpy(entry->buf, tbuffer, sizeof(u32) * len);
3302 	if (!call_filter_check_discard(call, entry, buffer, event)) {
3303 		__buffer_unlock_commit(buffer, event);
3304 		ftrace_trace_stack(tr, buffer, trace_ctx, 6, NULL);
3305 	}
3306 
3307 out:
3308 	ring_buffer_nest_end(buffer);
3309 out_put:
3310 	put_trace_buf();
3311 
3312 out_nobuffer:
3313 	preempt_enable_notrace();
3314 	unpause_graph_tracing();
3315 
3316 	return len;
3317 }
3318 EXPORT_SYMBOL_GPL(trace_vbprintk);
3319 
3320 __printf(3, 0)
3321 static int
3322 __trace_array_vprintk(struct trace_buffer *buffer,
3323 		      unsigned long ip, const char *fmt, va_list args)
3324 {
3325 	struct trace_event_call *call = &event_print;
3326 	struct ring_buffer_event *event;
3327 	int len = 0, size;
3328 	struct print_entry *entry;
3329 	unsigned int trace_ctx;
3330 	char *tbuffer;
3331 
3332 	if (tracing_disabled || tracing_selftest_running)
3333 		return 0;
3334 
3335 	/* Don't pollute graph traces with trace_vprintk internals */
3336 	pause_graph_tracing();
3337 
3338 	trace_ctx = tracing_gen_ctx();
3339 	preempt_disable_notrace();
3340 
3341 
3342 	tbuffer = get_trace_buf();
3343 	if (!tbuffer) {
3344 		len = 0;
3345 		goto out_nobuffer;
3346 	}
3347 
3348 	len = vscnprintf(tbuffer, TRACE_BUF_SIZE, fmt, args);
3349 
3350 	size = sizeof(*entry) + len + 1;
3351 	ring_buffer_nest_start(buffer);
3352 	event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
3353 					    trace_ctx);
3354 	if (!event)
3355 		goto out;
3356 	entry = ring_buffer_event_data(event);
3357 	entry->ip = ip;
3358 
3359 	memcpy(&entry->buf, tbuffer, len + 1);
3360 	if (!call_filter_check_discard(call, entry, buffer, event)) {
3361 		__buffer_unlock_commit(buffer, event);
3362 		ftrace_trace_stack(&global_trace, buffer, trace_ctx, 6, NULL);
3363 	}
3364 
3365 out:
3366 	ring_buffer_nest_end(buffer);
3367 	put_trace_buf();
3368 
3369 out_nobuffer:
3370 	preempt_enable_notrace();
3371 	unpause_graph_tracing();
3372 
3373 	return len;
3374 }
3375 
3376 __printf(3, 0)
3377 int trace_array_vprintk(struct trace_array *tr,
3378 			unsigned long ip, const char *fmt, va_list args)
3379 {
3380 	return __trace_array_vprintk(tr->array_buffer.buffer, ip, fmt, args);
3381 }
3382 
3383 /**
3384  * trace_array_printk - Print a message to a specific instance
3385  * @tr: The instance trace_array descriptor
3386  * @ip: The instruction pointer that this is called from.
3387  * @fmt: The format to print (printf format)
3388  *
3389  * If a subsystem sets up its own instance, they have the right to
3390  * printk strings into their tracing instance buffer using this
3391  * function. Note, this function will not write into the top level
3392  * buffer (use trace_printk() for that), as writing into the top level
3393  * buffer should only have events that can be individually disabled.
3394  * trace_printk() is only used for debugging a kernel, and should not
3395  * be ever incorporated in normal use.
3396  *
3397  * trace_array_printk() can be used, as it will not add noise to the
3398  * top level tracing buffer.
3399  *
3400  * Note, trace_array_init_printk() must be called on @tr before this
3401  * can be used.
3402  */
3403 __printf(3, 0)
3404 int trace_array_printk(struct trace_array *tr,
3405 		       unsigned long ip, const char *fmt, ...)
3406 {
3407 	int ret;
3408 	va_list ap;
3409 
3410 	if (!tr)
3411 		return -ENOENT;
3412 
3413 	/* This is only allowed for created instances */
3414 	if (tr == &global_trace)
3415 		return 0;
3416 
3417 	if (!(tr->trace_flags & TRACE_ITER_PRINTK))
3418 		return 0;
3419 
3420 	va_start(ap, fmt);
3421 	ret = trace_array_vprintk(tr, ip, fmt, ap);
3422 	va_end(ap);
3423 	return ret;
3424 }
3425 EXPORT_SYMBOL_GPL(trace_array_printk);
3426 
3427 /**
3428  * trace_array_init_printk - Initialize buffers for trace_array_printk()
3429  * @tr: The trace array to initialize the buffers for
3430  *
3431  * As trace_array_printk() only writes into instances, they are OK to
3432  * have in the kernel (unlike trace_printk()). This needs to be called
3433  * before trace_array_printk() can be used on a trace_array.
3434  */
3435 int trace_array_init_printk(struct trace_array *tr)
3436 {
3437 	if (!tr)
3438 		return -ENOENT;
3439 
3440 	/* This is only allowed for created instances */
3441 	if (tr == &global_trace)
3442 		return -EINVAL;
3443 
3444 	return alloc_percpu_trace_buffer();
3445 }
3446 EXPORT_SYMBOL_GPL(trace_array_init_printk);
3447 
3448 __printf(3, 4)
3449 int trace_array_printk_buf(struct trace_buffer *buffer,
3450 			   unsigned long ip, const char *fmt, ...)
3451 {
3452 	int ret;
3453 	va_list ap;
3454 
3455 	if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
3456 		return 0;
3457 
3458 	va_start(ap, fmt);
3459 	ret = __trace_array_vprintk(buffer, ip, fmt, ap);
3460 	va_end(ap);
3461 	return ret;
3462 }
3463 
3464 __printf(2, 0)
3465 int trace_vprintk(unsigned long ip, const char *fmt, va_list args)
3466 {
3467 	return trace_array_vprintk(&global_trace, ip, fmt, args);
3468 }
3469 EXPORT_SYMBOL_GPL(trace_vprintk);
3470 
3471 static void trace_iterator_increment(struct trace_iterator *iter)
3472 {
3473 	struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, iter->cpu);
3474 
3475 	iter->idx++;
3476 	if (buf_iter)
3477 		ring_buffer_iter_advance(buf_iter);
3478 }
3479 
3480 static struct trace_entry *
3481 peek_next_entry(struct trace_iterator *iter, int cpu, u64 *ts,
3482 		unsigned long *lost_events)
3483 {
3484 	struct ring_buffer_event *event;
3485 	struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, cpu);
3486 
3487 	if (buf_iter) {
3488 		event = ring_buffer_iter_peek(buf_iter, ts);
3489 		if (lost_events)
3490 			*lost_events = ring_buffer_iter_dropped(buf_iter) ?
3491 				(unsigned long)-1 : 0;
3492 	} else {
3493 		event = ring_buffer_peek(iter->array_buffer->buffer, cpu, ts,
3494 					 lost_events);
3495 	}
3496 
3497 	if (event) {
3498 		iter->ent_size = ring_buffer_event_length(event);
3499 		return ring_buffer_event_data(event);
3500 	}
3501 	iter->ent_size = 0;
3502 	return NULL;
3503 }
3504 
3505 static struct trace_entry *
3506 __find_next_entry(struct trace_iterator *iter, int *ent_cpu,
3507 		  unsigned long *missing_events, u64 *ent_ts)
3508 {
3509 	struct trace_buffer *buffer = iter->array_buffer->buffer;
3510 	struct trace_entry *ent, *next = NULL;
3511 	unsigned long lost_events = 0, next_lost = 0;
3512 	int cpu_file = iter->cpu_file;
3513 	u64 next_ts = 0, ts;
3514 	int next_cpu = -1;
3515 	int next_size = 0;
3516 	int cpu;
3517 
3518 	/*
3519 	 * If we are in a per_cpu trace file, don't bother by iterating over
3520 	 * all cpu and peek directly.
3521 	 */
3522 	if (cpu_file > RING_BUFFER_ALL_CPUS) {
3523 		if (ring_buffer_empty_cpu(buffer, cpu_file))
3524 			return NULL;
3525 		ent = peek_next_entry(iter, cpu_file, ent_ts, missing_events);
3526 		if (ent_cpu)
3527 			*ent_cpu = cpu_file;
3528 
3529 		return ent;
3530 	}
3531 
3532 	for_each_tracing_cpu(cpu) {
3533 
3534 		if (ring_buffer_empty_cpu(buffer, cpu))
3535 			continue;
3536 
3537 		ent = peek_next_entry(iter, cpu, &ts, &lost_events);
3538 
3539 		/*
3540 		 * Pick the entry with the smallest timestamp:
3541 		 */
3542 		if (ent && (!next || ts < next_ts)) {
3543 			next = ent;
3544 			next_cpu = cpu;
3545 			next_ts = ts;
3546 			next_lost = lost_events;
3547 			next_size = iter->ent_size;
3548 		}
3549 	}
3550 
3551 	iter->ent_size = next_size;
3552 
3553 	if (ent_cpu)
3554 		*ent_cpu = next_cpu;
3555 
3556 	if (ent_ts)
3557 		*ent_ts = next_ts;
3558 
3559 	if (missing_events)
3560 		*missing_events = next_lost;
3561 
3562 	return next;
3563 }
3564 
3565 #define STATIC_FMT_BUF_SIZE	128
3566 static char static_fmt_buf[STATIC_FMT_BUF_SIZE];
3567 
3568 static char *trace_iter_expand_format(struct trace_iterator *iter)
3569 {
3570 	char *tmp;
3571 
3572 	/*
3573 	 * iter->tr is NULL when used with tp_printk, which makes
3574 	 * this get called where it is not safe to call krealloc().
3575 	 */
3576 	if (!iter->tr || iter->fmt == static_fmt_buf)
3577 		return NULL;
3578 
3579 	tmp = krealloc(iter->fmt, iter->fmt_size + STATIC_FMT_BUF_SIZE,
3580 		       GFP_KERNEL);
3581 	if (tmp) {
3582 		iter->fmt_size += STATIC_FMT_BUF_SIZE;
3583 		iter->fmt = tmp;
3584 	}
3585 
3586 	return tmp;
3587 }
3588 
3589 /* Returns true if the string is safe to dereference from an event */
3590 static bool trace_safe_str(struct trace_iterator *iter, const char *str)
3591 {
3592 	unsigned long addr = (unsigned long)str;
3593 	struct trace_event *trace_event;
3594 	struct trace_event_call *event;
3595 
3596 	/* OK if part of the event data */
3597 	if ((addr >= (unsigned long)iter->ent) &&
3598 	    (addr < (unsigned long)iter->ent + iter->ent_size))
3599 		return true;
3600 
3601 	/* OK if part of the temp seq buffer */
3602 	if ((addr >= (unsigned long)iter->tmp_seq.buffer) &&
3603 	    (addr < (unsigned long)iter->tmp_seq.buffer + PAGE_SIZE))
3604 		return true;
3605 
3606 	/* Core rodata can not be freed */
3607 	if (is_kernel_rodata(addr))
3608 		return true;
3609 
3610 	if (trace_is_tracepoint_string(str))
3611 		return true;
3612 
3613 	/*
3614 	 * Now this could be a module event, referencing core module
3615 	 * data, which is OK.
3616 	 */
3617 	if (!iter->ent)
3618 		return false;
3619 
3620 	trace_event = ftrace_find_event(iter->ent->type);
3621 	if (!trace_event)
3622 		return false;
3623 
3624 	event = container_of(trace_event, struct trace_event_call, event);
3625 	if (!event->mod)
3626 		return false;
3627 
3628 	/* Would rather have rodata, but this will suffice */
3629 	if (within_module_core(addr, event->mod))
3630 		return true;
3631 
3632 	return false;
3633 }
3634 
3635 static const char *show_buffer(struct trace_seq *s)
3636 {
3637 	struct seq_buf *seq = &s->seq;
3638 
3639 	seq_buf_terminate(seq);
3640 
3641 	return seq->buffer;
3642 }
3643 
3644 static DEFINE_STATIC_KEY_FALSE(trace_no_verify);
3645 
3646 static int test_can_verify_check(const char *fmt, ...)
3647 {
3648 	char buf[16];
3649 	va_list ap;
3650 	int ret;
3651 
3652 	/*
3653 	 * The verifier is dependent on vsnprintf() modifies the va_list
3654 	 * passed to it, where it is sent as a reference. Some architectures
3655 	 * (like x86_32) passes it by value, which means that vsnprintf()
3656 	 * does not modify the va_list passed to it, and the verifier
3657 	 * would then need to be able to understand all the values that
3658 	 * vsnprintf can use. If it is passed by value, then the verifier
3659 	 * is disabled.
3660 	 */
3661 	va_start(ap, fmt);
3662 	vsnprintf(buf, 16, "%d", ap);
3663 	ret = va_arg(ap, int);
3664 	va_end(ap);
3665 
3666 	return ret;
3667 }
3668 
3669 static void test_can_verify(void)
3670 {
3671 	if (!test_can_verify_check("%d %d", 0, 1)) {
3672 		pr_info("trace event string verifier disabled\n");
3673 		static_branch_inc(&trace_no_verify);
3674 	}
3675 }
3676 
3677 /**
3678  * trace_check_vprintf - Check dereferenced strings while writing to the seq buffer
3679  * @iter: The iterator that holds the seq buffer and the event being printed
3680  * @fmt: The format used to print the event
3681  * @ap: The va_list holding the data to print from @fmt.
3682  *
3683  * This writes the data into the @iter->seq buffer using the data from
3684  * @fmt and @ap. If the format has a %s, then the source of the string
3685  * is examined to make sure it is safe to print, otherwise it will
3686  * warn and print "[UNSAFE MEMORY]" in place of the dereferenced string
3687  * pointer.
3688  */
3689 void trace_check_vprintf(struct trace_iterator *iter, const char *fmt,
3690 			 va_list ap)
3691 {
3692 	const char *p = fmt;
3693 	const char *str;
3694 	int i, j;
3695 
3696 	if (WARN_ON_ONCE(!fmt))
3697 		return;
3698 
3699 	if (static_branch_unlikely(&trace_no_verify))
3700 		goto print;
3701 
3702 	/* Don't bother checking when doing a ftrace_dump() */
3703 	if (iter->fmt == static_fmt_buf)
3704 		goto print;
3705 
3706 	while (*p) {
3707 		j = 0;
3708 
3709 		/* We only care about %s and variants */
3710 		for (i = 0; p[i]; i++) {
3711 			if (i + 1 >= iter->fmt_size) {
3712 				/*
3713 				 * If we can't expand the copy buffer,
3714 				 * just print it.
3715 				 */
3716 				if (!trace_iter_expand_format(iter))
3717 					goto print;
3718 			}
3719 
3720 			if (p[i] == '\\' && p[i+1]) {
3721 				i++;
3722 				continue;
3723 			}
3724 			if (p[i] == '%') {
3725 				/* Need to test cases like %08.*s */
3726 				for (j = 1; p[i+j]; j++) {
3727 					if (isdigit(p[i+j]) ||
3728 					    p[i+j] == '*' ||
3729 					    p[i+j] == '.')
3730 						continue;
3731 					break;
3732 				}
3733 				if (p[i+j] == 's')
3734 					break;
3735 			}
3736 			j = 0;
3737 		}
3738 		/* If no %s found then just print normally */
3739 		if (!p[i])
3740 			break;
3741 
3742 		/* Copy up to the %s, and print that */
3743 		strncpy(iter->fmt, p, i);
3744 		iter->fmt[i] = '\0';
3745 		trace_seq_vprintf(&iter->seq, iter->fmt, ap);
3746 
3747 		/* The ap now points to the string data of the %s */
3748 		str = va_arg(ap, const char *);
3749 
3750 		/*
3751 		 * If you hit this warning, it is likely that the
3752 		 * trace event in question used %s on a string that
3753 		 * was saved at the time of the event, but may not be
3754 		 * around when the trace is read. Use __string(),
3755 		 * __assign_str() and __get_str() helpers in the TRACE_EVENT()
3756 		 * instead. See samples/trace_events/trace-events-sample.h
3757 		 * for reference.
3758 		 */
3759 		if (WARN_ONCE(!trace_safe_str(iter, str),
3760 			      "fmt: '%s' current_buffer: '%s'",
3761 			      fmt, show_buffer(&iter->seq))) {
3762 			int ret;
3763 
3764 			/* Try to safely read the string */
3765 			ret = strncpy_from_kernel_nofault(iter->fmt, str,
3766 							  iter->fmt_size);
3767 			if (ret < 0)
3768 				trace_seq_printf(&iter->seq, "(0x%px)", str);
3769 			else
3770 				trace_seq_printf(&iter->seq, "(0x%px:%s)",
3771 						 str, iter->fmt);
3772 			str = "[UNSAFE-MEMORY]";
3773 			strcpy(iter->fmt, "%s");
3774 		} else {
3775 			strncpy(iter->fmt, p + i, j + 1);
3776 			iter->fmt[j+1] = '\0';
3777 		}
3778 		trace_seq_printf(&iter->seq, iter->fmt, str);
3779 
3780 		p += i + j + 1;
3781 	}
3782  print:
3783 	if (*p)
3784 		trace_seq_vprintf(&iter->seq, p, ap);
3785 }
3786 
3787 const char *trace_event_format(struct trace_iterator *iter, const char *fmt)
3788 {
3789 	const char *p, *new_fmt;
3790 	char *q;
3791 
3792 	if (WARN_ON_ONCE(!fmt))
3793 		return fmt;
3794 
3795 	if (!iter->tr || iter->tr->trace_flags & TRACE_ITER_HASH_PTR)
3796 		return fmt;
3797 
3798 	p = fmt;
3799 	new_fmt = q = iter->fmt;
3800 	while (*p) {
3801 		if (unlikely(q - new_fmt + 3 > iter->fmt_size)) {
3802 			if (!trace_iter_expand_format(iter))
3803 				return fmt;
3804 
3805 			q += iter->fmt - new_fmt;
3806 			new_fmt = iter->fmt;
3807 		}
3808 
3809 		*q++ = *p++;
3810 
3811 		/* Replace %p with %px */
3812 		if (p[-1] == '%') {
3813 			if (p[0] == '%') {
3814 				*q++ = *p++;
3815 			} else if (p[0] == 'p' && !isalnum(p[1])) {
3816 				*q++ = *p++;
3817 				*q++ = 'x';
3818 			}
3819 		}
3820 	}
3821 	*q = '\0';
3822 
3823 	return new_fmt;
3824 }
3825 
3826 #define STATIC_TEMP_BUF_SIZE	128
3827 static char static_temp_buf[STATIC_TEMP_BUF_SIZE] __aligned(4);
3828 
3829 /* Find the next real entry, without updating the iterator itself */
3830 struct trace_entry *trace_find_next_entry(struct trace_iterator *iter,
3831 					  int *ent_cpu, u64 *ent_ts)
3832 {
3833 	/* __find_next_entry will reset ent_size */
3834 	int ent_size = iter->ent_size;
3835 	struct trace_entry *entry;
3836 
3837 	/*
3838 	 * If called from ftrace_dump(), then the iter->temp buffer
3839 	 * will be the static_temp_buf and not created from kmalloc.
3840 	 * If the entry size is greater than the buffer, we can
3841 	 * not save it. Just return NULL in that case. This is only
3842 	 * used to add markers when two consecutive events' time
3843 	 * stamps have a large delta. See trace_print_lat_context()
3844 	 */
3845 	if (iter->temp == static_temp_buf &&
3846 	    STATIC_TEMP_BUF_SIZE < ent_size)
3847 		return NULL;
3848 
3849 	/*
3850 	 * The __find_next_entry() may call peek_next_entry(), which may
3851 	 * call ring_buffer_peek() that may make the contents of iter->ent
3852 	 * undefined. Need to copy iter->ent now.
3853 	 */
3854 	if (iter->ent && iter->ent != iter->temp) {
3855 		if ((!iter->temp || iter->temp_size < iter->ent_size) &&
3856 		    !WARN_ON_ONCE(iter->temp == static_temp_buf)) {
3857 			void *temp;
3858 			temp = kmalloc(iter->ent_size, GFP_KERNEL);
3859 			if (!temp)
3860 				return NULL;
3861 			kfree(iter->temp);
3862 			iter->temp = temp;
3863 			iter->temp_size = iter->ent_size;
3864 		}
3865 		memcpy(iter->temp, iter->ent, iter->ent_size);
3866 		iter->ent = iter->temp;
3867 	}
3868 	entry = __find_next_entry(iter, ent_cpu, NULL, ent_ts);
3869 	/* Put back the original ent_size */
3870 	iter->ent_size = ent_size;
3871 
3872 	return entry;
3873 }
3874 
3875 /* Find the next real entry, and increment the iterator to the next entry */
3876 void *trace_find_next_entry_inc(struct trace_iterator *iter)
3877 {
3878 	iter->ent = __find_next_entry(iter, &iter->cpu,
3879 				      &iter->lost_events, &iter->ts);
3880 
3881 	if (iter->ent)
3882 		trace_iterator_increment(iter);
3883 
3884 	return iter->ent ? iter : NULL;
3885 }
3886 
3887 static void trace_consume(struct trace_iterator *iter)
3888 {
3889 	ring_buffer_consume(iter->array_buffer->buffer, iter->cpu, &iter->ts,
3890 			    &iter->lost_events);
3891 }
3892 
3893 static void *s_next(struct seq_file *m, void *v, loff_t *pos)
3894 {
3895 	struct trace_iterator *iter = m->private;
3896 	int i = (int)*pos;
3897 	void *ent;
3898 
3899 	WARN_ON_ONCE(iter->leftover);
3900 
3901 	(*pos)++;
3902 
3903 	/* can't go backwards */
3904 	if (iter->idx > i)
3905 		return NULL;
3906 
3907 	if (iter->idx < 0)
3908 		ent = trace_find_next_entry_inc(iter);
3909 	else
3910 		ent = iter;
3911 
3912 	while (ent && iter->idx < i)
3913 		ent = trace_find_next_entry_inc(iter);
3914 
3915 	iter->pos = *pos;
3916 
3917 	return ent;
3918 }
3919 
3920 void tracing_iter_reset(struct trace_iterator *iter, int cpu)
3921 {
3922 	struct ring_buffer_iter *buf_iter;
3923 	unsigned long entries = 0;
3924 	u64 ts;
3925 
3926 	per_cpu_ptr(iter->array_buffer->data, cpu)->skipped_entries = 0;
3927 
3928 	buf_iter = trace_buffer_iter(iter, cpu);
3929 	if (!buf_iter)
3930 		return;
3931 
3932 	ring_buffer_iter_reset(buf_iter);
3933 
3934 	/*
3935 	 * We could have the case with the max latency tracers
3936 	 * that a reset never took place on a cpu. This is evident
3937 	 * by the timestamp being before the start of the buffer.
3938 	 */
3939 	while (ring_buffer_iter_peek(buf_iter, &ts)) {
3940 		if (ts >= iter->array_buffer->time_start)
3941 			break;
3942 		entries++;
3943 		ring_buffer_iter_advance(buf_iter);
3944 	}
3945 
3946 	per_cpu_ptr(iter->array_buffer->data, cpu)->skipped_entries = entries;
3947 }
3948 
3949 /*
3950  * The current tracer is copied to avoid a global locking
3951  * all around.
3952  */
3953 static void *s_start(struct seq_file *m, loff_t *pos)
3954 {
3955 	struct trace_iterator *iter = m->private;
3956 	struct trace_array *tr = iter->tr;
3957 	int cpu_file = iter->cpu_file;
3958 	void *p = NULL;
3959 	loff_t l = 0;
3960 	int cpu;
3961 
3962 	/*
3963 	 * copy the tracer to avoid using a global lock all around.
3964 	 * iter->trace is a copy of current_trace, the pointer to the
3965 	 * name may be used instead of a strcmp(), as iter->trace->name
3966 	 * will point to the same string as current_trace->name.
3967 	 */
3968 	mutex_lock(&trace_types_lock);
3969 	if (unlikely(tr->current_trace && iter->trace->name != tr->current_trace->name))
3970 		*iter->trace = *tr->current_trace;
3971 	mutex_unlock(&trace_types_lock);
3972 
3973 #ifdef CONFIG_TRACER_MAX_TRACE
3974 	if (iter->snapshot && iter->trace->use_max_tr)
3975 		return ERR_PTR(-EBUSY);
3976 #endif
3977 
3978 	if (!iter->snapshot)
3979 		atomic_inc(&trace_record_taskinfo_disabled);
3980 
3981 	if (*pos != iter->pos) {
3982 		iter->ent = NULL;
3983 		iter->cpu = 0;
3984 		iter->idx = -1;
3985 
3986 		if (cpu_file == RING_BUFFER_ALL_CPUS) {
3987 			for_each_tracing_cpu(cpu)
3988 				tracing_iter_reset(iter, cpu);
3989 		} else
3990 			tracing_iter_reset(iter, cpu_file);
3991 
3992 		iter->leftover = 0;
3993 		for (p = iter; p && l < *pos; p = s_next(m, p, &l))
3994 			;
3995 
3996 	} else {
3997 		/*
3998 		 * If we overflowed the seq_file before, then we want
3999 		 * to just reuse the trace_seq buffer again.
4000 		 */
4001 		if (iter->leftover)
4002 			p = iter;
4003 		else {
4004 			l = *pos - 1;
4005 			p = s_next(m, p, &l);
4006 		}
4007 	}
4008 
4009 	trace_event_read_lock();
4010 	trace_access_lock(cpu_file);
4011 	return p;
4012 }
4013 
4014 static void s_stop(struct seq_file *m, void *p)
4015 {
4016 	struct trace_iterator *iter = m->private;
4017 
4018 #ifdef CONFIG_TRACER_MAX_TRACE
4019 	if (iter->snapshot && iter->trace->use_max_tr)
4020 		return;
4021 #endif
4022 
4023 	if (!iter->snapshot)
4024 		atomic_dec(&trace_record_taskinfo_disabled);
4025 
4026 	trace_access_unlock(iter->cpu_file);
4027 	trace_event_read_unlock();
4028 }
4029 
4030 static void
4031 get_total_entries_cpu(struct array_buffer *buf, unsigned long *total,
4032 		      unsigned long *entries, int cpu)
4033 {
4034 	unsigned long count;
4035 
4036 	count = ring_buffer_entries_cpu(buf->buffer, cpu);
4037 	/*
4038 	 * If this buffer has skipped entries, then we hold all
4039 	 * entries for the trace and we need to ignore the
4040 	 * ones before the time stamp.
4041 	 */
4042 	if (per_cpu_ptr(buf->data, cpu)->skipped_entries) {
4043 		count -= per_cpu_ptr(buf->data, cpu)->skipped_entries;
4044 		/* total is the same as the entries */
4045 		*total = count;
4046 	} else
4047 		*total = count +
4048 			ring_buffer_overrun_cpu(buf->buffer, cpu);
4049 	*entries = count;
4050 }
4051 
4052 static void
4053 get_total_entries(struct array_buffer *buf,
4054 		  unsigned long *total, unsigned long *entries)
4055 {
4056 	unsigned long t, e;
4057 	int cpu;
4058 
4059 	*total = 0;
4060 	*entries = 0;
4061 
4062 	for_each_tracing_cpu(cpu) {
4063 		get_total_entries_cpu(buf, &t, &e, cpu);
4064 		*total += t;
4065 		*entries += e;
4066 	}
4067 }
4068 
4069 unsigned long trace_total_entries_cpu(struct trace_array *tr, int cpu)
4070 {
4071 	unsigned long total, entries;
4072 
4073 	if (!tr)
4074 		tr = &global_trace;
4075 
4076 	get_total_entries_cpu(&tr->array_buffer, &total, &entries, cpu);
4077 
4078 	return entries;
4079 }
4080 
4081 unsigned long trace_total_entries(struct trace_array *tr)
4082 {
4083 	unsigned long total, entries;
4084 
4085 	if (!tr)
4086 		tr = &global_trace;
4087 
4088 	get_total_entries(&tr->array_buffer, &total, &entries);
4089 
4090 	return entries;
4091 }
4092 
4093 static void print_lat_help_header(struct seq_file *m)
4094 {
4095 	seq_puts(m, "#                    _------=> CPU#            \n"
4096 		    "#                   / _-----=> irqs-off        \n"
4097 		    "#                  | / _----=> need-resched    \n"
4098 		    "#                  || / _---=> hardirq/softirq \n"
4099 		    "#                  ||| / _--=> preempt-depth   \n"
4100 		    "#                  |||| /     delay            \n"
4101 		    "#  cmd     pid     ||||| time  |   caller      \n"
4102 		    "#     \\   /        |||||  \\    |   /         \n");
4103 }
4104 
4105 static void print_event_info(struct array_buffer *buf, struct seq_file *m)
4106 {
4107 	unsigned long total;
4108 	unsigned long entries;
4109 
4110 	get_total_entries(buf, &total, &entries);
4111 	seq_printf(m, "# entries-in-buffer/entries-written: %lu/%lu   #P:%d\n",
4112 		   entries, total, num_online_cpus());
4113 	seq_puts(m, "#\n");
4114 }
4115 
4116 static void print_func_help_header(struct array_buffer *buf, struct seq_file *m,
4117 				   unsigned int flags)
4118 {
4119 	bool tgid = flags & TRACE_ITER_RECORD_TGID;
4120 
4121 	print_event_info(buf, m);
4122 
4123 	seq_printf(m, "#           TASK-PID    %s CPU#     TIMESTAMP  FUNCTION\n", tgid ? "   TGID   " : "");
4124 	seq_printf(m, "#              | |      %s   |         |         |\n",      tgid ? "     |    " : "");
4125 }
4126 
4127 static void print_func_help_header_irq(struct array_buffer *buf, struct seq_file *m,
4128 				       unsigned int flags)
4129 {
4130 	bool tgid = flags & TRACE_ITER_RECORD_TGID;
4131 	const char *space = "            ";
4132 	int prec = tgid ? 12 : 2;
4133 
4134 	print_event_info(buf, m);
4135 
4136 	seq_printf(m, "#                            %.*s  _-----=> irqs-off\n", prec, space);
4137 	seq_printf(m, "#                            %.*s / _----=> need-resched\n", prec, space);
4138 	seq_printf(m, "#                            %.*s| / _---=> hardirq/softirq\n", prec, space);
4139 	seq_printf(m, "#                            %.*s|| / _--=> preempt-depth\n", prec, space);
4140 	seq_printf(m, "#                            %.*s||| /     delay\n", prec, space);
4141 	seq_printf(m, "#           TASK-PID  %.*s CPU#  ||||   TIMESTAMP  FUNCTION\n", prec, "     TGID   ");
4142 	seq_printf(m, "#              | |    %.*s   |   ||||      |         |\n", prec, "       |    ");
4143 }
4144 
4145 void
4146 print_trace_header(struct seq_file *m, struct trace_iterator *iter)
4147 {
4148 	unsigned long sym_flags = (global_trace.trace_flags & TRACE_ITER_SYM_MASK);
4149 	struct array_buffer *buf = iter->array_buffer;
4150 	struct trace_array_cpu *data = per_cpu_ptr(buf->data, buf->cpu);
4151 	struct tracer *type = iter->trace;
4152 	unsigned long entries;
4153 	unsigned long total;
4154 	const char *name = "preemption";
4155 
4156 	name = type->name;
4157 
4158 	get_total_entries(buf, &total, &entries);
4159 
4160 	seq_printf(m, "# %s latency trace v1.1.5 on %s\n",
4161 		   name, UTS_RELEASE);
4162 	seq_puts(m, "# -----------------------------------"
4163 		 "---------------------------------\n");
4164 	seq_printf(m, "# latency: %lu us, #%lu/%lu, CPU#%d |"
4165 		   " (M:%s VP:%d, KP:%d, SP:%d HP:%d",
4166 		   nsecs_to_usecs(data->saved_latency),
4167 		   entries,
4168 		   total,
4169 		   buf->cpu,
4170 #if defined(CONFIG_PREEMPT_NONE)
4171 		   "server",
4172 #elif defined(CONFIG_PREEMPT_VOLUNTARY)
4173 		   "desktop",
4174 #elif defined(CONFIG_PREEMPT)
4175 		   "preempt",
4176 #elif defined(CONFIG_PREEMPT_RT)
4177 		   "preempt_rt",
4178 #else
4179 		   "unknown",
4180 #endif
4181 		   /* These are reserved for later use */
4182 		   0, 0, 0, 0);
4183 #ifdef CONFIG_SMP
4184 	seq_printf(m, " #P:%d)\n", num_online_cpus());
4185 #else
4186 	seq_puts(m, ")\n");
4187 #endif
4188 	seq_puts(m, "#    -----------------\n");
4189 	seq_printf(m, "#    | task: %.16s-%d "
4190 		   "(uid:%d nice:%ld policy:%ld rt_prio:%ld)\n",
4191 		   data->comm, data->pid,
4192 		   from_kuid_munged(seq_user_ns(m), data->uid), data->nice,
4193 		   data->policy, data->rt_priority);
4194 	seq_puts(m, "#    -----------------\n");
4195 
4196 	if (data->critical_start) {
4197 		seq_puts(m, "#  => started at: ");
4198 		seq_print_ip_sym(&iter->seq, data->critical_start, sym_flags);
4199 		trace_print_seq(m, &iter->seq);
4200 		seq_puts(m, "\n#  => ended at:   ");
4201 		seq_print_ip_sym(&iter->seq, data->critical_end, sym_flags);
4202 		trace_print_seq(m, &iter->seq);
4203 		seq_puts(m, "\n#\n");
4204 	}
4205 
4206 	seq_puts(m, "#\n");
4207 }
4208 
4209 static void test_cpu_buff_start(struct trace_iterator *iter)
4210 {
4211 	struct trace_seq *s = &iter->seq;
4212 	struct trace_array *tr = iter->tr;
4213 
4214 	if (!(tr->trace_flags & TRACE_ITER_ANNOTATE))
4215 		return;
4216 
4217 	if (!(iter->iter_flags & TRACE_FILE_ANNOTATE))
4218 		return;
4219 
4220 	if (cpumask_available(iter->started) &&
4221 	    cpumask_test_cpu(iter->cpu, iter->started))
4222 		return;
4223 
4224 	if (per_cpu_ptr(iter->array_buffer->data, iter->cpu)->skipped_entries)
4225 		return;
4226 
4227 	if (cpumask_available(iter->started))
4228 		cpumask_set_cpu(iter->cpu, iter->started);
4229 
4230 	/* Don't print started cpu buffer for the first entry of the trace */
4231 	if (iter->idx > 1)
4232 		trace_seq_printf(s, "##### CPU %u buffer started ####\n",
4233 				iter->cpu);
4234 }
4235 
4236 static enum print_line_t print_trace_fmt(struct trace_iterator *iter)
4237 {
4238 	struct trace_array *tr = iter->tr;
4239 	struct trace_seq *s = &iter->seq;
4240 	unsigned long sym_flags = (tr->trace_flags & TRACE_ITER_SYM_MASK);
4241 	struct trace_entry *entry;
4242 	struct trace_event *event;
4243 
4244 	entry = iter->ent;
4245 
4246 	test_cpu_buff_start(iter);
4247 
4248 	event = ftrace_find_event(entry->type);
4249 
4250 	if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
4251 		if (iter->iter_flags & TRACE_FILE_LAT_FMT)
4252 			trace_print_lat_context(iter);
4253 		else
4254 			trace_print_context(iter);
4255 	}
4256 
4257 	if (trace_seq_has_overflowed(s))
4258 		return TRACE_TYPE_PARTIAL_LINE;
4259 
4260 	if (event)
4261 		return event->funcs->trace(iter, sym_flags, event);
4262 
4263 	trace_seq_printf(s, "Unknown type %d\n", entry->type);
4264 
4265 	return trace_handle_return(s);
4266 }
4267 
4268 static enum print_line_t print_raw_fmt(struct trace_iterator *iter)
4269 {
4270 	struct trace_array *tr = iter->tr;
4271 	struct trace_seq *s = &iter->seq;
4272 	struct trace_entry *entry;
4273 	struct trace_event *event;
4274 
4275 	entry = iter->ent;
4276 
4277 	if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO)
4278 		trace_seq_printf(s, "%d %d %llu ",
4279 				 entry->pid, iter->cpu, iter->ts);
4280 
4281 	if (trace_seq_has_overflowed(s))
4282 		return TRACE_TYPE_PARTIAL_LINE;
4283 
4284 	event = ftrace_find_event(entry->type);
4285 	if (event)
4286 		return event->funcs->raw(iter, 0, event);
4287 
4288 	trace_seq_printf(s, "%d ?\n", entry->type);
4289 
4290 	return trace_handle_return(s);
4291 }
4292 
4293 static enum print_line_t print_hex_fmt(struct trace_iterator *iter)
4294 {
4295 	struct trace_array *tr = iter->tr;
4296 	struct trace_seq *s = &iter->seq;
4297 	unsigned char newline = '\n';
4298 	struct trace_entry *entry;
4299 	struct trace_event *event;
4300 
4301 	entry = iter->ent;
4302 
4303 	if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
4304 		SEQ_PUT_HEX_FIELD(s, entry->pid);
4305 		SEQ_PUT_HEX_FIELD(s, iter->cpu);
4306 		SEQ_PUT_HEX_FIELD(s, iter->ts);
4307 		if (trace_seq_has_overflowed(s))
4308 			return TRACE_TYPE_PARTIAL_LINE;
4309 	}
4310 
4311 	event = ftrace_find_event(entry->type);
4312 	if (event) {
4313 		enum print_line_t ret = event->funcs->hex(iter, 0, event);
4314 		if (ret != TRACE_TYPE_HANDLED)
4315 			return ret;
4316 	}
4317 
4318 	SEQ_PUT_FIELD(s, newline);
4319 
4320 	return trace_handle_return(s);
4321 }
4322 
4323 static enum print_line_t print_bin_fmt(struct trace_iterator *iter)
4324 {
4325 	struct trace_array *tr = iter->tr;
4326 	struct trace_seq *s = &iter->seq;
4327 	struct trace_entry *entry;
4328 	struct trace_event *event;
4329 
4330 	entry = iter->ent;
4331 
4332 	if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
4333 		SEQ_PUT_FIELD(s, entry->pid);
4334 		SEQ_PUT_FIELD(s, iter->cpu);
4335 		SEQ_PUT_FIELD(s, iter->ts);
4336 		if (trace_seq_has_overflowed(s))
4337 			return TRACE_TYPE_PARTIAL_LINE;
4338 	}
4339 
4340 	event = ftrace_find_event(entry->type);
4341 	return event ? event->funcs->binary(iter, 0, event) :
4342 		TRACE_TYPE_HANDLED;
4343 }
4344 
4345 int trace_empty(struct trace_iterator *iter)
4346 {
4347 	struct ring_buffer_iter *buf_iter;
4348 	int cpu;
4349 
4350 	/* If we are looking at one CPU buffer, only check that one */
4351 	if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
4352 		cpu = iter->cpu_file;
4353 		buf_iter = trace_buffer_iter(iter, cpu);
4354 		if (buf_iter) {
4355 			if (!ring_buffer_iter_empty(buf_iter))
4356 				return 0;
4357 		} else {
4358 			if (!ring_buffer_empty_cpu(iter->array_buffer->buffer, cpu))
4359 				return 0;
4360 		}
4361 		return 1;
4362 	}
4363 
4364 	for_each_tracing_cpu(cpu) {
4365 		buf_iter = trace_buffer_iter(iter, cpu);
4366 		if (buf_iter) {
4367 			if (!ring_buffer_iter_empty(buf_iter))
4368 				return 0;
4369 		} else {
4370 			if (!ring_buffer_empty_cpu(iter->array_buffer->buffer, cpu))
4371 				return 0;
4372 		}
4373 	}
4374 
4375 	return 1;
4376 }
4377 
4378 /*  Called with trace_event_read_lock() held. */
4379 enum print_line_t print_trace_line(struct trace_iterator *iter)
4380 {
4381 	struct trace_array *tr = iter->tr;
4382 	unsigned long trace_flags = tr->trace_flags;
4383 	enum print_line_t ret;
4384 
4385 	if (iter->lost_events) {
4386 		if (iter->lost_events == (unsigned long)-1)
4387 			trace_seq_printf(&iter->seq, "CPU:%d [LOST EVENTS]\n",
4388 					 iter->cpu);
4389 		else
4390 			trace_seq_printf(&iter->seq, "CPU:%d [LOST %lu EVENTS]\n",
4391 					 iter->cpu, iter->lost_events);
4392 		if (trace_seq_has_overflowed(&iter->seq))
4393 			return TRACE_TYPE_PARTIAL_LINE;
4394 	}
4395 
4396 	if (iter->trace && iter->trace->print_line) {
4397 		ret = iter->trace->print_line(iter);
4398 		if (ret != TRACE_TYPE_UNHANDLED)
4399 			return ret;
4400 	}
4401 
4402 	if (iter->ent->type == TRACE_BPUTS &&
4403 			trace_flags & TRACE_ITER_PRINTK &&
4404 			trace_flags & TRACE_ITER_PRINTK_MSGONLY)
4405 		return trace_print_bputs_msg_only(iter);
4406 
4407 	if (iter->ent->type == TRACE_BPRINT &&
4408 			trace_flags & TRACE_ITER_PRINTK &&
4409 			trace_flags & TRACE_ITER_PRINTK_MSGONLY)
4410 		return trace_print_bprintk_msg_only(iter);
4411 
4412 	if (iter->ent->type == TRACE_PRINT &&
4413 			trace_flags & TRACE_ITER_PRINTK &&
4414 			trace_flags & TRACE_ITER_PRINTK_MSGONLY)
4415 		return trace_print_printk_msg_only(iter);
4416 
4417 	if (trace_flags & TRACE_ITER_BIN)
4418 		return print_bin_fmt(iter);
4419 
4420 	if (trace_flags & TRACE_ITER_HEX)
4421 		return print_hex_fmt(iter);
4422 
4423 	if (trace_flags & TRACE_ITER_RAW)
4424 		return print_raw_fmt(iter);
4425 
4426 	return print_trace_fmt(iter);
4427 }
4428 
4429 void trace_latency_header(struct seq_file *m)
4430 {
4431 	struct trace_iterator *iter = m->private;
4432 	struct trace_array *tr = iter->tr;
4433 
4434 	/* print nothing if the buffers are empty */
4435 	if (trace_empty(iter))
4436 		return;
4437 
4438 	if (iter->iter_flags & TRACE_FILE_LAT_FMT)
4439 		print_trace_header(m, iter);
4440 
4441 	if (!(tr->trace_flags & TRACE_ITER_VERBOSE))
4442 		print_lat_help_header(m);
4443 }
4444 
4445 void trace_default_header(struct seq_file *m)
4446 {
4447 	struct trace_iterator *iter = m->private;
4448 	struct trace_array *tr = iter->tr;
4449 	unsigned long trace_flags = tr->trace_flags;
4450 
4451 	if (!(trace_flags & TRACE_ITER_CONTEXT_INFO))
4452 		return;
4453 
4454 	if (iter->iter_flags & TRACE_FILE_LAT_FMT) {
4455 		/* print nothing if the buffers are empty */
4456 		if (trace_empty(iter))
4457 			return;
4458 		print_trace_header(m, iter);
4459 		if (!(trace_flags & TRACE_ITER_VERBOSE))
4460 			print_lat_help_header(m);
4461 	} else {
4462 		if (!(trace_flags & TRACE_ITER_VERBOSE)) {
4463 			if (trace_flags & TRACE_ITER_IRQ_INFO)
4464 				print_func_help_header_irq(iter->array_buffer,
4465 							   m, trace_flags);
4466 			else
4467 				print_func_help_header(iter->array_buffer, m,
4468 						       trace_flags);
4469 		}
4470 	}
4471 }
4472 
4473 static void test_ftrace_alive(struct seq_file *m)
4474 {
4475 	if (!ftrace_is_dead())
4476 		return;
4477 	seq_puts(m, "# WARNING: FUNCTION TRACING IS CORRUPTED\n"
4478 		    "#          MAY BE MISSING FUNCTION EVENTS\n");
4479 }
4480 
4481 #ifdef CONFIG_TRACER_MAX_TRACE
4482 static void show_snapshot_main_help(struct seq_file *m)
4483 {
4484 	seq_puts(m, "# echo 0 > snapshot : Clears and frees snapshot buffer\n"
4485 		    "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
4486 		    "#                      Takes a snapshot of the main buffer.\n"
4487 		    "# echo 2 > snapshot : Clears snapshot buffer (but does not allocate or free)\n"
4488 		    "#                      (Doesn't have to be '2' works with any number that\n"
4489 		    "#                       is not a '0' or '1')\n");
4490 }
4491 
4492 static void show_snapshot_percpu_help(struct seq_file *m)
4493 {
4494 	seq_puts(m, "# echo 0 > snapshot : Invalid for per_cpu snapshot file.\n");
4495 #ifdef CONFIG_RING_BUFFER_ALLOW_SWAP
4496 	seq_puts(m, "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
4497 		    "#                      Takes a snapshot of the main buffer for this cpu.\n");
4498 #else
4499 	seq_puts(m, "# echo 1 > snapshot : Not supported with this kernel.\n"
4500 		    "#                     Must use main snapshot file to allocate.\n");
4501 #endif
4502 	seq_puts(m, "# echo 2 > snapshot : Clears this cpu's snapshot buffer (but does not allocate)\n"
4503 		    "#                      (Doesn't have to be '2' works with any number that\n"
4504 		    "#                       is not a '0' or '1')\n");
4505 }
4506 
4507 static void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter)
4508 {
4509 	if (iter->tr->allocated_snapshot)
4510 		seq_puts(m, "#\n# * Snapshot is allocated *\n#\n");
4511 	else
4512 		seq_puts(m, "#\n# * Snapshot is freed *\n#\n");
4513 
4514 	seq_puts(m, "# Snapshot commands:\n");
4515 	if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
4516 		show_snapshot_main_help(m);
4517 	else
4518 		show_snapshot_percpu_help(m);
4519 }
4520 #else
4521 /* Should never be called */
4522 static inline void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter) { }
4523 #endif
4524 
4525 static int s_show(struct seq_file *m, void *v)
4526 {
4527 	struct trace_iterator *iter = v;
4528 	int ret;
4529 
4530 	if (iter->ent == NULL) {
4531 		if (iter->tr) {
4532 			seq_printf(m, "# tracer: %s\n", iter->trace->name);
4533 			seq_puts(m, "#\n");
4534 			test_ftrace_alive(m);
4535 		}
4536 		if (iter->snapshot && trace_empty(iter))
4537 			print_snapshot_help(m, iter);
4538 		else if (iter->trace && iter->trace->print_header)
4539 			iter->trace->print_header(m);
4540 		else
4541 			trace_default_header(m);
4542 
4543 	} else if (iter->leftover) {
4544 		/*
4545 		 * If we filled the seq_file buffer earlier, we
4546 		 * want to just show it now.
4547 		 */
4548 		ret = trace_print_seq(m, &iter->seq);
4549 
4550 		/* ret should this time be zero, but you never know */
4551 		iter->leftover = ret;
4552 
4553 	} else {
4554 		print_trace_line(iter);
4555 		ret = trace_print_seq(m, &iter->seq);
4556 		/*
4557 		 * If we overflow the seq_file buffer, then it will
4558 		 * ask us for this data again at start up.
4559 		 * Use that instead.
4560 		 *  ret is 0 if seq_file write succeeded.
4561 		 *        -1 otherwise.
4562 		 */
4563 		iter->leftover = ret;
4564 	}
4565 
4566 	return 0;
4567 }
4568 
4569 /*
4570  * Should be used after trace_array_get(), trace_types_lock
4571  * ensures that i_cdev was already initialized.
4572  */
4573 static inline int tracing_get_cpu(struct inode *inode)
4574 {
4575 	if (inode->i_cdev) /* See trace_create_cpu_file() */
4576 		return (long)inode->i_cdev - 1;
4577 	return RING_BUFFER_ALL_CPUS;
4578 }
4579 
4580 static const struct seq_operations tracer_seq_ops = {
4581 	.start		= s_start,
4582 	.next		= s_next,
4583 	.stop		= s_stop,
4584 	.show		= s_show,
4585 };
4586 
4587 static struct trace_iterator *
4588 __tracing_open(struct inode *inode, struct file *file, bool snapshot)
4589 {
4590 	struct trace_array *tr = inode->i_private;
4591 	struct trace_iterator *iter;
4592 	int cpu;
4593 
4594 	if (tracing_disabled)
4595 		return ERR_PTR(-ENODEV);
4596 
4597 	iter = __seq_open_private(file, &tracer_seq_ops, sizeof(*iter));
4598 	if (!iter)
4599 		return ERR_PTR(-ENOMEM);
4600 
4601 	iter->buffer_iter = kcalloc(nr_cpu_ids, sizeof(*iter->buffer_iter),
4602 				    GFP_KERNEL);
4603 	if (!iter->buffer_iter)
4604 		goto release;
4605 
4606 	/*
4607 	 * trace_find_next_entry() may need to save off iter->ent.
4608 	 * It will place it into the iter->temp buffer. As most
4609 	 * events are less than 128, allocate a buffer of that size.
4610 	 * If one is greater, then trace_find_next_entry() will
4611 	 * allocate a new buffer to adjust for the bigger iter->ent.
4612 	 * It's not critical if it fails to get allocated here.
4613 	 */
4614 	iter->temp = kmalloc(128, GFP_KERNEL);
4615 	if (iter->temp)
4616 		iter->temp_size = 128;
4617 
4618 	/*
4619 	 * trace_event_printf() may need to modify given format
4620 	 * string to replace %p with %px so that it shows real address
4621 	 * instead of hash value. However, that is only for the event
4622 	 * tracing, other tracer may not need. Defer the allocation
4623 	 * until it is needed.
4624 	 */
4625 	iter->fmt = NULL;
4626 	iter->fmt_size = 0;
4627 
4628 	/*
4629 	 * We make a copy of the current tracer to avoid concurrent
4630 	 * changes on it while we are reading.
4631 	 */
4632 	mutex_lock(&trace_types_lock);
4633 	iter->trace = kzalloc(sizeof(*iter->trace), GFP_KERNEL);
4634 	if (!iter->trace)
4635 		goto fail;
4636 
4637 	*iter->trace = *tr->current_trace;
4638 
4639 	if (!zalloc_cpumask_var(&iter->started, GFP_KERNEL))
4640 		goto fail;
4641 
4642 	iter->tr = tr;
4643 
4644 #ifdef CONFIG_TRACER_MAX_TRACE
4645 	/* Currently only the top directory has a snapshot */
4646 	if (tr->current_trace->print_max || snapshot)
4647 		iter->array_buffer = &tr->max_buffer;
4648 	else
4649 #endif
4650 		iter->array_buffer = &tr->array_buffer;
4651 	iter->snapshot = snapshot;
4652 	iter->pos = -1;
4653 	iter->cpu_file = tracing_get_cpu(inode);
4654 	mutex_init(&iter->mutex);
4655 
4656 	/* Notify the tracer early; before we stop tracing. */
4657 	if (iter->trace->open)
4658 		iter->trace->open(iter);
4659 
4660 	/* Annotate start of buffers if we had overruns */
4661 	if (ring_buffer_overruns(iter->array_buffer->buffer))
4662 		iter->iter_flags |= TRACE_FILE_ANNOTATE;
4663 
4664 	/* Output in nanoseconds only if we are using a clock in nanoseconds. */
4665 	if (trace_clocks[tr->clock_id].in_ns)
4666 		iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
4667 
4668 	/*
4669 	 * If pause-on-trace is enabled, then stop the trace while
4670 	 * dumping, unless this is the "snapshot" file
4671 	 */
4672 	if (!iter->snapshot && (tr->trace_flags & TRACE_ITER_PAUSE_ON_TRACE))
4673 		tracing_stop_tr(tr);
4674 
4675 	if (iter->cpu_file == RING_BUFFER_ALL_CPUS) {
4676 		for_each_tracing_cpu(cpu) {
4677 			iter->buffer_iter[cpu] =
4678 				ring_buffer_read_prepare(iter->array_buffer->buffer,
4679 							 cpu, GFP_KERNEL);
4680 		}
4681 		ring_buffer_read_prepare_sync();
4682 		for_each_tracing_cpu(cpu) {
4683 			ring_buffer_read_start(iter->buffer_iter[cpu]);
4684 			tracing_iter_reset(iter, cpu);
4685 		}
4686 	} else {
4687 		cpu = iter->cpu_file;
4688 		iter->buffer_iter[cpu] =
4689 			ring_buffer_read_prepare(iter->array_buffer->buffer,
4690 						 cpu, GFP_KERNEL);
4691 		ring_buffer_read_prepare_sync();
4692 		ring_buffer_read_start(iter->buffer_iter[cpu]);
4693 		tracing_iter_reset(iter, cpu);
4694 	}
4695 
4696 	mutex_unlock(&trace_types_lock);
4697 
4698 	return iter;
4699 
4700  fail:
4701 	mutex_unlock(&trace_types_lock);
4702 	kfree(iter->trace);
4703 	kfree(iter->temp);
4704 	kfree(iter->buffer_iter);
4705 release:
4706 	seq_release_private(inode, file);
4707 	return ERR_PTR(-ENOMEM);
4708 }
4709 
4710 int tracing_open_generic(struct inode *inode, struct file *filp)
4711 {
4712 	int ret;
4713 
4714 	ret = tracing_check_open_get_tr(NULL);
4715 	if (ret)
4716 		return ret;
4717 
4718 	filp->private_data = inode->i_private;
4719 	return 0;
4720 }
4721 
4722 bool tracing_is_disabled(void)
4723 {
4724 	return (tracing_disabled) ? true: false;
4725 }
4726 
4727 /*
4728  * Open and update trace_array ref count.
4729  * Must have the current trace_array passed to it.
4730  */
4731 int tracing_open_generic_tr(struct inode *inode, struct file *filp)
4732 {
4733 	struct trace_array *tr = inode->i_private;
4734 	int ret;
4735 
4736 	ret = tracing_check_open_get_tr(tr);
4737 	if (ret)
4738 		return ret;
4739 
4740 	filp->private_data = inode->i_private;
4741 
4742 	return 0;
4743 }
4744 
4745 static int tracing_release(struct inode *inode, struct file *file)
4746 {
4747 	struct trace_array *tr = inode->i_private;
4748 	struct seq_file *m = file->private_data;
4749 	struct trace_iterator *iter;
4750 	int cpu;
4751 
4752 	if (!(file->f_mode & FMODE_READ)) {
4753 		trace_array_put(tr);
4754 		return 0;
4755 	}
4756 
4757 	/* Writes do not use seq_file */
4758 	iter = m->private;
4759 	mutex_lock(&trace_types_lock);
4760 
4761 	for_each_tracing_cpu(cpu) {
4762 		if (iter->buffer_iter[cpu])
4763 			ring_buffer_read_finish(iter->buffer_iter[cpu]);
4764 	}
4765 
4766 	if (iter->trace && iter->trace->close)
4767 		iter->trace->close(iter);
4768 
4769 	if (!iter->snapshot && tr->stop_count)
4770 		/* reenable tracing if it was previously enabled */
4771 		tracing_start_tr(tr);
4772 
4773 	__trace_array_put(tr);
4774 
4775 	mutex_unlock(&trace_types_lock);
4776 
4777 	mutex_destroy(&iter->mutex);
4778 	free_cpumask_var(iter->started);
4779 	kfree(iter->fmt);
4780 	kfree(iter->temp);
4781 	kfree(iter->trace);
4782 	kfree(iter->buffer_iter);
4783 	seq_release_private(inode, file);
4784 
4785 	return 0;
4786 }
4787 
4788 static int tracing_release_generic_tr(struct inode *inode, struct file *file)
4789 {
4790 	struct trace_array *tr = inode->i_private;
4791 
4792 	trace_array_put(tr);
4793 	return 0;
4794 }
4795 
4796 static int tracing_single_release_tr(struct inode *inode, struct file *file)
4797 {
4798 	struct trace_array *tr = inode->i_private;
4799 
4800 	trace_array_put(tr);
4801 
4802 	return single_release(inode, file);
4803 }
4804 
4805 static int tracing_open(struct inode *inode, struct file *file)
4806 {
4807 	struct trace_array *tr = inode->i_private;
4808 	struct trace_iterator *iter;
4809 	int ret;
4810 
4811 	ret = tracing_check_open_get_tr(tr);
4812 	if (ret)
4813 		return ret;
4814 
4815 	/* If this file was open for write, then erase contents */
4816 	if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC)) {
4817 		int cpu = tracing_get_cpu(inode);
4818 		struct array_buffer *trace_buf = &tr->array_buffer;
4819 
4820 #ifdef CONFIG_TRACER_MAX_TRACE
4821 		if (tr->current_trace->print_max)
4822 			trace_buf = &tr->max_buffer;
4823 #endif
4824 
4825 		if (cpu == RING_BUFFER_ALL_CPUS)
4826 			tracing_reset_online_cpus(trace_buf);
4827 		else
4828 			tracing_reset_cpu(trace_buf, cpu);
4829 	}
4830 
4831 	if (file->f_mode & FMODE_READ) {
4832 		iter = __tracing_open(inode, file, false);
4833 		if (IS_ERR(iter))
4834 			ret = PTR_ERR(iter);
4835 		else if (tr->trace_flags & TRACE_ITER_LATENCY_FMT)
4836 			iter->iter_flags |= TRACE_FILE_LAT_FMT;
4837 	}
4838 
4839 	if (ret < 0)
4840 		trace_array_put(tr);
4841 
4842 	return ret;
4843 }
4844 
4845 /*
4846  * Some tracers are not suitable for instance buffers.
4847  * A tracer is always available for the global array (toplevel)
4848  * or if it explicitly states that it is.
4849  */
4850 static bool
4851 trace_ok_for_array(struct tracer *t, struct trace_array *tr)
4852 {
4853 	return (tr->flags & TRACE_ARRAY_FL_GLOBAL) || t->allow_instances;
4854 }
4855 
4856 /* Find the next tracer that this trace array may use */
4857 static struct tracer *
4858 get_tracer_for_array(struct trace_array *tr, struct tracer *t)
4859 {
4860 	while (t && !trace_ok_for_array(t, tr))
4861 		t = t->next;
4862 
4863 	return t;
4864 }
4865 
4866 static void *
4867 t_next(struct seq_file *m, void *v, loff_t *pos)
4868 {
4869 	struct trace_array *tr = m->private;
4870 	struct tracer *t = v;
4871 
4872 	(*pos)++;
4873 
4874 	if (t)
4875 		t = get_tracer_for_array(tr, t->next);
4876 
4877 	return t;
4878 }
4879 
4880 static void *t_start(struct seq_file *m, loff_t *pos)
4881 {
4882 	struct trace_array *tr = m->private;
4883 	struct tracer *t;
4884 	loff_t l = 0;
4885 
4886 	mutex_lock(&trace_types_lock);
4887 
4888 	t = get_tracer_for_array(tr, trace_types);
4889 	for (; t && l < *pos; t = t_next(m, t, &l))
4890 			;
4891 
4892 	return t;
4893 }
4894 
4895 static void t_stop(struct seq_file *m, void *p)
4896 {
4897 	mutex_unlock(&trace_types_lock);
4898 }
4899 
4900 static int t_show(struct seq_file *m, void *v)
4901 {
4902 	struct tracer *t = v;
4903 
4904 	if (!t)
4905 		return 0;
4906 
4907 	seq_puts(m, t->name);
4908 	if (t->next)
4909 		seq_putc(m, ' ');
4910 	else
4911 		seq_putc(m, '\n');
4912 
4913 	return 0;
4914 }
4915 
4916 static const struct seq_operations show_traces_seq_ops = {
4917 	.start		= t_start,
4918 	.next		= t_next,
4919 	.stop		= t_stop,
4920 	.show		= t_show,
4921 };
4922 
4923 static int show_traces_open(struct inode *inode, struct file *file)
4924 {
4925 	struct trace_array *tr = inode->i_private;
4926 	struct seq_file *m;
4927 	int ret;
4928 
4929 	ret = tracing_check_open_get_tr(tr);
4930 	if (ret)
4931 		return ret;
4932 
4933 	ret = seq_open(file, &show_traces_seq_ops);
4934 	if (ret) {
4935 		trace_array_put(tr);
4936 		return ret;
4937 	}
4938 
4939 	m = file->private_data;
4940 	m->private = tr;
4941 
4942 	return 0;
4943 }
4944 
4945 static int show_traces_release(struct inode *inode, struct file *file)
4946 {
4947 	struct trace_array *tr = inode->i_private;
4948 
4949 	trace_array_put(tr);
4950 	return seq_release(inode, file);
4951 }
4952 
4953 static ssize_t
4954 tracing_write_stub(struct file *filp, const char __user *ubuf,
4955 		   size_t count, loff_t *ppos)
4956 {
4957 	return count;
4958 }
4959 
4960 loff_t tracing_lseek(struct file *file, loff_t offset, int whence)
4961 {
4962 	int ret;
4963 
4964 	if (file->f_mode & FMODE_READ)
4965 		ret = seq_lseek(file, offset, whence);
4966 	else
4967 		file->f_pos = ret = 0;
4968 
4969 	return ret;
4970 }
4971 
4972 static const struct file_operations tracing_fops = {
4973 	.open		= tracing_open,
4974 	.read		= seq_read,
4975 	.write		= tracing_write_stub,
4976 	.llseek		= tracing_lseek,
4977 	.release	= tracing_release,
4978 };
4979 
4980 static const struct file_operations show_traces_fops = {
4981 	.open		= show_traces_open,
4982 	.read		= seq_read,
4983 	.llseek		= seq_lseek,
4984 	.release	= show_traces_release,
4985 };
4986 
4987 static ssize_t
4988 tracing_cpumask_read(struct file *filp, char __user *ubuf,
4989 		     size_t count, loff_t *ppos)
4990 {
4991 	struct trace_array *tr = file_inode(filp)->i_private;
4992 	char *mask_str;
4993 	int len;
4994 
4995 	len = snprintf(NULL, 0, "%*pb\n",
4996 		       cpumask_pr_args(tr->tracing_cpumask)) + 1;
4997 	mask_str = kmalloc(len, GFP_KERNEL);
4998 	if (!mask_str)
4999 		return -ENOMEM;
5000 
5001 	len = snprintf(mask_str, len, "%*pb\n",
5002 		       cpumask_pr_args(tr->tracing_cpumask));
5003 	if (len >= count) {
5004 		count = -EINVAL;
5005 		goto out_err;
5006 	}
5007 	count = simple_read_from_buffer(ubuf, count, ppos, mask_str, len);
5008 
5009 out_err:
5010 	kfree(mask_str);
5011 
5012 	return count;
5013 }
5014 
5015 int tracing_set_cpumask(struct trace_array *tr,
5016 			cpumask_var_t tracing_cpumask_new)
5017 {
5018 	int cpu;
5019 
5020 	if (!tr)
5021 		return -EINVAL;
5022 
5023 	local_irq_disable();
5024 	arch_spin_lock(&tr->max_lock);
5025 	for_each_tracing_cpu(cpu) {
5026 		/*
5027 		 * Increase/decrease the disabled counter if we are
5028 		 * about to flip a bit in the cpumask:
5029 		 */
5030 		if (cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
5031 				!cpumask_test_cpu(cpu, tracing_cpumask_new)) {
5032 			atomic_inc(&per_cpu_ptr(tr->array_buffer.data, cpu)->disabled);
5033 			ring_buffer_record_disable_cpu(tr->array_buffer.buffer, cpu);
5034 		}
5035 		if (!cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
5036 				cpumask_test_cpu(cpu, tracing_cpumask_new)) {
5037 			atomic_dec(&per_cpu_ptr(tr->array_buffer.data, cpu)->disabled);
5038 			ring_buffer_record_enable_cpu(tr->array_buffer.buffer, cpu);
5039 		}
5040 	}
5041 	arch_spin_unlock(&tr->max_lock);
5042 	local_irq_enable();
5043 
5044 	cpumask_copy(tr->tracing_cpumask, tracing_cpumask_new);
5045 
5046 	return 0;
5047 }
5048 
5049 static ssize_t
5050 tracing_cpumask_write(struct file *filp, const char __user *ubuf,
5051 		      size_t count, loff_t *ppos)
5052 {
5053 	struct trace_array *tr = file_inode(filp)->i_private;
5054 	cpumask_var_t tracing_cpumask_new;
5055 	int err;
5056 
5057 	if (!zalloc_cpumask_var(&tracing_cpumask_new, GFP_KERNEL))
5058 		return -ENOMEM;
5059 
5060 	err = cpumask_parse_user(ubuf, count, tracing_cpumask_new);
5061 	if (err)
5062 		goto err_free;
5063 
5064 	err = tracing_set_cpumask(tr, tracing_cpumask_new);
5065 	if (err)
5066 		goto err_free;
5067 
5068 	free_cpumask_var(tracing_cpumask_new);
5069 
5070 	return count;
5071 
5072 err_free:
5073 	free_cpumask_var(tracing_cpumask_new);
5074 
5075 	return err;
5076 }
5077 
5078 static const struct file_operations tracing_cpumask_fops = {
5079 	.open		= tracing_open_generic_tr,
5080 	.read		= tracing_cpumask_read,
5081 	.write		= tracing_cpumask_write,
5082 	.release	= tracing_release_generic_tr,
5083 	.llseek		= generic_file_llseek,
5084 };
5085 
5086 static int tracing_trace_options_show(struct seq_file *m, void *v)
5087 {
5088 	struct tracer_opt *trace_opts;
5089 	struct trace_array *tr = m->private;
5090 	u32 tracer_flags;
5091 	int i;
5092 
5093 	mutex_lock(&trace_types_lock);
5094 	tracer_flags = tr->current_trace->flags->val;
5095 	trace_opts = tr->current_trace->flags->opts;
5096 
5097 	for (i = 0; trace_options[i]; i++) {
5098 		if (tr->trace_flags & (1 << i))
5099 			seq_printf(m, "%s\n", trace_options[i]);
5100 		else
5101 			seq_printf(m, "no%s\n", trace_options[i]);
5102 	}
5103 
5104 	for (i = 0; trace_opts[i].name; i++) {
5105 		if (tracer_flags & trace_opts[i].bit)
5106 			seq_printf(m, "%s\n", trace_opts[i].name);
5107 		else
5108 			seq_printf(m, "no%s\n", trace_opts[i].name);
5109 	}
5110 	mutex_unlock(&trace_types_lock);
5111 
5112 	return 0;
5113 }
5114 
5115 static int __set_tracer_option(struct trace_array *tr,
5116 			       struct tracer_flags *tracer_flags,
5117 			       struct tracer_opt *opts, int neg)
5118 {
5119 	struct tracer *trace = tracer_flags->trace;
5120 	int ret;
5121 
5122 	ret = trace->set_flag(tr, tracer_flags->val, opts->bit, !neg);
5123 	if (ret)
5124 		return ret;
5125 
5126 	if (neg)
5127 		tracer_flags->val &= ~opts->bit;
5128 	else
5129 		tracer_flags->val |= opts->bit;
5130 	return 0;
5131 }
5132 
5133 /* Try to assign a tracer specific option */
5134 static int set_tracer_option(struct trace_array *tr, char *cmp, int neg)
5135 {
5136 	struct tracer *trace = tr->current_trace;
5137 	struct tracer_flags *tracer_flags = trace->flags;
5138 	struct tracer_opt *opts = NULL;
5139 	int i;
5140 
5141 	for (i = 0; tracer_flags->opts[i].name; i++) {
5142 		opts = &tracer_flags->opts[i];
5143 
5144 		if (strcmp(cmp, opts->name) == 0)
5145 			return __set_tracer_option(tr, trace->flags, opts, neg);
5146 	}
5147 
5148 	return -EINVAL;
5149 }
5150 
5151 /* Some tracers require overwrite to stay enabled */
5152 int trace_keep_overwrite(struct tracer *tracer, u32 mask, int set)
5153 {
5154 	if (tracer->enabled && (mask & TRACE_ITER_OVERWRITE) && !set)
5155 		return -1;
5156 
5157 	return 0;
5158 }
5159 
5160 int set_tracer_flag(struct trace_array *tr, unsigned int mask, int enabled)
5161 {
5162 	if ((mask == TRACE_ITER_RECORD_TGID) ||
5163 	    (mask == TRACE_ITER_RECORD_CMD))
5164 		lockdep_assert_held(&event_mutex);
5165 
5166 	/* do nothing if flag is already set */
5167 	if (!!(tr->trace_flags & mask) == !!enabled)
5168 		return 0;
5169 
5170 	/* Give the tracer a chance to approve the change */
5171 	if (tr->current_trace->flag_changed)
5172 		if (tr->current_trace->flag_changed(tr, mask, !!enabled))
5173 			return -EINVAL;
5174 
5175 	if (enabled)
5176 		tr->trace_flags |= mask;
5177 	else
5178 		tr->trace_flags &= ~mask;
5179 
5180 	if (mask == TRACE_ITER_RECORD_CMD)
5181 		trace_event_enable_cmd_record(enabled);
5182 
5183 	if (mask == TRACE_ITER_RECORD_TGID) {
5184 		if (!tgid_map)
5185 			tgid_map = kvcalloc(PID_MAX_DEFAULT + 1,
5186 					   sizeof(*tgid_map),
5187 					   GFP_KERNEL);
5188 		if (!tgid_map) {
5189 			tr->trace_flags &= ~TRACE_ITER_RECORD_TGID;
5190 			return -ENOMEM;
5191 		}
5192 
5193 		trace_event_enable_tgid_record(enabled);
5194 	}
5195 
5196 	if (mask == TRACE_ITER_EVENT_FORK)
5197 		trace_event_follow_fork(tr, enabled);
5198 
5199 	if (mask == TRACE_ITER_FUNC_FORK)
5200 		ftrace_pid_follow_fork(tr, enabled);
5201 
5202 	if (mask == TRACE_ITER_OVERWRITE) {
5203 		ring_buffer_change_overwrite(tr->array_buffer.buffer, enabled);
5204 #ifdef CONFIG_TRACER_MAX_TRACE
5205 		ring_buffer_change_overwrite(tr->max_buffer.buffer, enabled);
5206 #endif
5207 	}
5208 
5209 	if (mask == TRACE_ITER_PRINTK) {
5210 		trace_printk_start_stop_comm(enabled);
5211 		trace_printk_control(enabled);
5212 	}
5213 
5214 	return 0;
5215 }
5216 
5217 int trace_set_options(struct trace_array *tr, char *option)
5218 {
5219 	char *cmp;
5220 	int neg = 0;
5221 	int ret;
5222 	size_t orig_len = strlen(option);
5223 	int len;
5224 
5225 	cmp = strstrip(option);
5226 
5227 	len = str_has_prefix(cmp, "no");
5228 	if (len)
5229 		neg = 1;
5230 
5231 	cmp += len;
5232 
5233 	mutex_lock(&event_mutex);
5234 	mutex_lock(&trace_types_lock);
5235 
5236 	ret = match_string(trace_options, -1, cmp);
5237 	/* If no option could be set, test the specific tracer options */
5238 	if (ret < 0)
5239 		ret = set_tracer_option(tr, cmp, neg);
5240 	else
5241 		ret = set_tracer_flag(tr, 1 << ret, !neg);
5242 
5243 	mutex_unlock(&trace_types_lock);
5244 	mutex_unlock(&event_mutex);
5245 
5246 	/*
5247 	 * If the first trailing whitespace is replaced with '\0' by strstrip,
5248 	 * turn it back into a space.
5249 	 */
5250 	if (orig_len > strlen(option))
5251 		option[strlen(option)] = ' ';
5252 
5253 	return ret;
5254 }
5255 
5256 static void __init apply_trace_boot_options(void)
5257 {
5258 	char *buf = trace_boot_options_buf;
5259 	char *option;
5260 
5261 	while (true) {
5262 		option = strsep(&buf, ",");
5263 
5264 		if (!option)
5265 			break;
5266 
5267 		if (*option)
5268 			trace_set_options(&global_trace, option);
5269 
5270 		/* Put back the comma to allow this to be called again */
5271 		if (buf)
5272 			*(buf - 1) = ',';
5273 	}
5274 }
5275 
5276 static ssize_t
5277 tracing_trace_options_write(struct file *filp, const char __user *ubuf,
5278 			size_t cnt, loff_t *ppos)
5279 {
5280 	struct seq_file *m = filp->private_data;
5281 	struct trace_array *tr = m->private;
5282 	char buf[64];
5283 	int ret;
5284 
5285 	if (cnt >= sizeof(buf))
5286 		return -EINVAL;
5287 
5288 	if (copy_from_user(buf, ubuf, cnt))
5289 		return -EFAULT;
5290 
5291 	buf[cnt] = 0;
5292 
5293 	ret = trace_set_options(tr, buf);
5294 	if (ret < 0)
5295 		return ret;
5296 
5297 	*ppos += cnt;
5298 
5299 	return cnt;
5300 }
5301 
5302 static int tracing_trace_options_open(struct inode *inode, struct file *file)
5303 {
5304 	struct trace_array *tr = inode->i_private;
5305 	int ret;
5306 
5307 	ret = tracing_check_open_get_tr(tr);
5308 	if (ret)
5309 		return ret;
5310 
5311 	ret = single_open(file, tracing_trace_options_show, inode->i_private);
5312 	if (ret < 0)
5313 		trace_array_put(tr);
5314 
5315 	return ret;
5316 }
5317 
5318 static const struct file_operations tracing_iter_fops = {
5319 	.open		= tracing_trace_options_open,
5320 	.read		= seq_read,
5321 	.llseek		= seq_lseek,
5322 	.release	= tracing_single_release_tr,
5323 	.write		= tracing_trace_options_write,
5324 };
5325 
5326 static const char readme_msg[] =
5327 	"tracing mini-HOWTO:\n\n"
5328 	"# echo 0 > tracing_on : quick way to disable tracing\n"
5329 	"# echo 1 > tracing_on : quick way to re-enable tracing\n\n"
5330 	" Important files:\n"
5331 	"  trace\t\t\t- The static contents of the buffer\n"
5332 	"\t\t\t  To clear the buffer write into this file: echo > trace\n"
5333 	"  trace_pipe\t\t- A consuming read to see the contents of the buffer\n"
5334 	"  current_tracer\t- function and latency tracers\n"
5335 	"  available_tracers\t- list of configured tracers for current_tracer\n"
5336 	"  error_log\t- error log for failed commands (that support it)\n"
5337 	"  buffer_size_kb\t- view and modify size of per cpu buffer\n"
5338 	"  buffer_total_size_kb  - view total size of all cpu buffers\n\n"
5339 	"  trace_clock\t\t-change the clock used to order events\n"
5340 	"       local:   Per cpu clock but may not be synced across CPUs\n"
5341 	"      global:   Synced across CPUs but slows tracing down.\n"
5342 	"     counter:   Not a clock, but just an increment\n"
5343 	"      uptime:   Jiffy counter from time of boot\n"
5344 	"        perf:   Same clock that perf events use\n"
5345 #ifdef CONFIG_X86_64
5346 	"     x86-tsc:   TSC cycle counter\n"
5347 #endif
5348 	"\n  timestamp_mode\t-view the mode used to timestamp events\n"
5349 	"       delta:   Delta difference against a buffer-wide timestamp\n"
5350 	"    absolute:   Absolute (standalone) timestamp\n"
5351 	"\n  trace_marker\t\t- Writes into this file writes into the kernel buffer\n"
5352 	"\n  trace_marker_raw\t\t- Writes into this file writes binary data into the kernel buffer\n"
5353 	"  tracing_cpumask\t- Limit which CPUs to trace\n"
5354 	"  instances\t\t- Make sub-buffers with: mkdir instances/foo\n"
5355 	"\t\t\t  Remove sub-buffer with rmdir\n"
5356 	"  trace_options\t\t- Set format or modify how tracing happens\n"
5357 	"\t\t\t  Disable an option by prefixing 'no' to the\n"
5358 	"\t\t\t  option name\n"
5359 	"  saved_cmdlines_size\t- echo command number in here to store comm-pid list\n"
5360 #ifdef CONFIG_DYNAMIC_FTRACE
5361 	"\n  available_filter_functions - list of functions that can be filtered on\n"
5362 	"  set_ftrace_filter\t- echo function name in here to only trace these\n"
5363 	"\t\t\t  functions\n"
5364 	"\t     accepts: func_full_name or glob-matching-pattern\n"
5365 	"\t     modules: Can select a group via module\n"
5366 	"\t      Format: :mod:<module-name>\n"
5367 	"\t     example: echo :mod:ext3 > set_ftrace_filter\n"
5368 	"\t    triggers: a command to perform when function is hit\n"
5369 	"\t      Format: <function>:<trigger>[:count]\n"
5370 	"\t     trigger: traceon, traceoff\n"
5371 	"\t\t      enable_event:<system>:<event>\n"
5372 	"\t\t      disable_event:<system>:<event>\n"
5373 #ifdef CONFIG_STACKTRACE
5374 	"\t\t      stacktrace\n"
5375 #endif
5376 #ifdef CONFIG_TRACER_SNAPSHOT
5377 	"\t\t      snapshot\n"
5378 #endif
5379 	"\t\t      dump\n"
5380 	"\t\t      cpudump\n"
5381 	"\t     example: echo do_fault:traceoff > set_ftrace_filter\n"
5382 	"\t              echo do_trap:traceoff:3 > set_ftrace_filter\n"
5383 	"\t     The first one will disable tracing every time do_fault is hit\n"
5384 	"\t     The second will disable tracing at most 3 times when do_trap is hit\n"
5385 	"\t       The first time do trap is hit and it disables tracing, the\n"
5386 	"\t       counter will decrement to 2. If tracing is already disabled,\n"
5387 	"\t       the counter will not decrement. It only decrements when the\n"
5388 	"\t       trigger did work\n"
5389 	"\t     To remove trigger without count:\n"
5390 	"\t       echo '!<function>:<trigger> > set_ftrace_filter\n"
5391 	"\t     To remove trigger with a count:\n"
5392 	"\t       echo '!<function>:<trigger>:0 > set_ftrace_filter\n"
5393 	"  set_ftrace_notrace\t- echo function name in here to never trace.\n"
5394 	"\t    accepts: func_full_name, *func_end, func_begin*, *func_middle*\n"
5395 	"\t    modules: Can select a group via module command :mod:\n"
5396 	"\t    Does not accept triggers\n"
5397 #endif /* CONFIG_DYNAMIC_FTRACE */
5398 #ifdef CONFIG_FUNCTION_TRACER
5399 	"  set_ftrace_pid\t- Write pid(s) to only function trace those pids\n"
5400 	"\t\t    (function)\n"
5401 	"  set_ftrace_notrace_pid\t- Write pid(s) to not function trace those pids\n"
5402 	"\t\t    (function)\n"
5403 #endif
5404 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
5405 	"  set_graph_function\t- Trace the nested calls of a function (function_graph)\n"
5406 	"  set_graph_notrace\t- Do not trace the nested calls of a function (function_graph)\n"
5407 	"  max_graph_depth\t- Trace a limited depth of nested calls (0 is unlimited)\n"
5408 #endif
5409 #ifdef CONFIG_TRACER_SNAPSHOT
5410 	"\n  snapshot\t\t- Like 'trace' but shows the content of the static\n"
5411 	"\t\t\t  snapshot buffer. Read the contents for more\n"
5412 	"\t\t\t  information\n"
5413 #endif
5414 #ifdef CONFIG_STACK_TRACER
5415 	"  stack_trace\t\t- Shows the max stack trace when active\n"
5416 	"  stack_max_size\t- Shows current max stack size that was traced\n"
5417 	"\t\t\t  Write into this file to reset the max size (trigger a\n"
5418 	"\t\t\t  new trace)\n"
5419 #ifdef CONFIG_DYNAMIC_FTRACE
5420 	"  stack_trace_filter\t- Like set_ftrace_filter but limits what stack_trace\n"
5421 	"\t\t\t  traces\n"
5422 #endif
5423 #endif /* CONFIG_STACK_TRACER */
5424 #ifdef CONFIG_DYNAMIC_EVENTS
5425 	"  dynamic_events\t\t- Create/append/remove/show the generic dynamic events\n"
5426 	"\t\t\t  Write into this file to define/undefine new trace events.\n"
5427 #endif
5428 #ifdef CONFIG_KPROBE_EVENTS
5429 	"  kprobe_events\t\t- Create/append/remove/show the kernel dynamic events\n"
5430 	"\t\t\t  Write into this file to define/undefine new trace events.\n"
5431 #endif
5432 #ifdef CONFIG_UPROBE_EVENTS
5433 	"  uprobe_events\t\t- Create/append/remove/show the userspace dynamic events\n"
5434 	"\t\t\t  Write into this file to define/undefine new trace events.\n"
5435 #endif
5436 #if defined(CONFIG_KPROBE_EVENTS) || defined(CONFIG_UPROBE_EVENTS)
5437 	"\t  accepts: event-definitions (one definition per line)\n"
5438 	"\t   Format: p[:[<group>/]<event>] <place> [<args>]\n"
5439 	"\t           r[maxactive][:[<group>/]<event>] <place> [<args>]\n"
5440 #ifdef CONFIG_HIST_TRIGGERS
5441 	"\t           s:[synthetic/]<event> <field> [<field>]\n"
5442 #endif
5443 	"\t           -:[<group>/]<event>\n"
5444 #ifdef CONFIG_KPROBE_EVENTS
5445 	"\t    place: [<module>:]<symbol>[+<offset>]|<memaddr>\n"
5446   "place (kretprobe): [<module>:]<symbol>[+<offset>]%return|<memaddr>\n"
5447 #endif
5448 #ifdef CONFIG_UPROBE_EVENTS
5449   "   place (uprobe): <path>:<offset>[%return][(ref_ctr_offset)]\n"
5450 #endif
5451 	"\t     args: <name>=fetcharg[:type]\n"
5452 	"\t fetcharg: %<register>, @<address>, @<symbol>[+|-<offset>],\n"
5453 #ifdef CONFIG_HAVE_FUNCTION_ARG_ACCESS_API
5454 	"\t           $stack<index>, $stack, $retval, $comm, $arg<N>,\n"
5455 #else
5456 	"\t           $stack<index>, $stack, $retval, $comm,\n"
5457 #endif
5458 	"\t           +|-[u]<offset>(<fetcharg>), \\imm-value, \\\"imm-string\"\n"
5459 	"\t     type: s8/16/32/64, u8/16/32/64, x8/16/32/64, string, symbol,\n"
5460 	"\t           b<bit-width>@<bit-offset>/<container-size>, ustring,\n"
5461 	"\t           <type>\\[<array-size>\\]\n"
5462 #ifdef CONFIG_HIST_TRIGGERS
5463 	"\t    field: <stype> <name>;\n"
5464 	"\t    stype: u8/u16/u32/u64, s8/s16/s32/s64, pid_t,\n"
5465 	"\t           [unsigned] char/int/long\n"
5466 #endif
5467 #endif
5468 	"  events/\t\t- Directory containing all trace event subsystems:\n"
5469 	"      enable\t\t- Write 0/1 to enable/disable tracing of all events\n"
5470 	"  events/<system>/\t- Directory containing all trace events for <system>:\n"
5471 	"      enable\t\t- Write 0/1 to enable/disable tracing of all <system>\n"
5472 	"\t\t\t  events\n"
5473 	"      filter\t\t- If set, only events passing filter are traced\n"
5474 	"  events/<system>/<event>/\t- Directory containing control files for\n"
5475 	"\t\t\t  <event>:\n"
5476 	"      enable\t\t- Write 0/1 to enable/disable tracing of <event>\n"
5477 	"      filter\t\t- If set, only events passing filter are traced\n"
5478 	"      trigger\t\t- If set, a command to perform when event is hit\n"
5479 	"\t    Format: <trigger>[:count][if <filter>]\n"
5480 	"\t   trigger: traceon, traceoff\n"
5481 	"\t            enable_event:<system>:<event>\n"
5482 	"\t            disable_event:<system>:<event>\n"
5483 #ifdef CONFIG_HIST_TRIGGERS
5484 	"\t            enable_hist:<system>:<event>\n"
5485 	"\t            disable_hist:<system>:<event>\n"
5486 #endif
5487 #ifdef CONFIG_STACKTRACE
5488 	"\t\t    stacktrace\n"
5489 #endif
5490 #ifdef CONFIG_TRACER_SNAPSHOT
5491 	"\t\t    snapshot\n"
5492 #endif
5493 #ifdef CONFIG_HIST_TRIGGERS
5494 	"\t\t    hist (see below)\n"
5495 #endif
5496 	"\t   example: echo traceoff > events/block/block_unplug/trigger\n"
5497 	"\t            echo traceoff:3 > events/block/block_unplug/trigger\n"
5498 	"\t            echo 'enable_event:kmem:kmalloc:3 if nr_rq > 1' > \\\n"
5499 	"\t                  events/block/block_unplug/trigger\n"
5500 	"\t   The first disables tracing every time block_unplug is hit.\n"
5501 	"\t   The second disables tracing the first 3 times block_unplug is hit.\n"
5502 	"\t   The third enables the kmalloc event the first 3 times block_unplug\n"
5503 	"\t     is hit and has value of greater than 1 for the 'nr_rq' event field.\n"
5504 	"\t   Like function triggers, the counter is only decremented if it\n"
5505 	"\t    enabled or disabled tracing.\n"
5506 	"\t   To remove a trigger without a count:\n"
5507 	"\t     echo '!<trigger> > <system>/<event>/trigger\n"
5508 	"\t   To remove a trigger with a count:\n"
5509 	"\t     echo '!<trigger>:0 > <system>/<event>/trigger\n"
5510 	"\t   Filters can be ignored when removing a trigger.\n"
5511 #ifdef CONFIG_HIST_TRIGGERS
5512 	"      hist trigger\t- If set, event hits are aggregated into a hash table\n"
5513 	"\t    Format: hist:keys=<field1[,field2,...]>\n"
5514 	"\t            [:values=<field1[,field2,...]>]\n"
5515 	"\t            [:sort=<field1[,field2,...]>]\n"
5516 	"\t            [:size=#entries]\n"
5517 	"\t            [:pause][:continue][:clear]\n"
5518 	"\t            [:name=histname1]\n"
5519 	"\t            [:<handler>.<action>]\n"
5520 	"\t            [if <filter>]\n\n"
5521 	"\t    When a matching event is hit, an entry is added to a hash\n"
5522 	"\t    table using the key(s) and value(s) named, and the value of a\n"
5523 	"\t    sum called 'hitcount' is incremented.  Keys and values\n"
5524 	"\t    correspond to fields in the event's format description.  Keys\n"
5525 	"\t    can be any field, or the special string 'stacktrace'.\n"
5526 	"\t    Compound keys consisting of up to two fields can be specified\n"
5527 	"\t    by the 'keys' keyword.  Values must correspond to numeric\n"
5528 	"\t    fields.  Sort keys consisting of up to two fields can be\n"
5529 	"\t    specified using the 'sort' keyword.  The sort direction can\n"
5530 	"\t    be modified by appending '.descending' or '.ascending' to a\n"
5531 	"\t    sort field.  The 'size' parameter can be used to specify more\n"
5532 	"\t    or fewer than the default 2048 entries for the hashtable size.\n"
5533 	"\t    If a hist trigger is given a name using the 'name' parameter,\n"
5534 	"\t    its histogram data will be shared with other triggers of the\n"
5535 	"\t    same name, and trigger hits will update this common data.\n\n"
5536 	"\t    Reading the 'hist' file for the event will dump the hash\n"
5537 	"\t    table in its entirety to stdout.  If there are multiple hist\n"
5538 	"\t    triggers attached to an event, there will be a table for each\n"
5539 	"\t    trigger in the output.  The table displayed for a named\n"
5540 	"\t    trigger will be the same as any other instance having the\n"
5541 	"\t    same name.  The default format used to display a given field\n"
5542 	"\t    can be modified by appending any of the following modifiers\n"
5543 	"\t    to the field name, as applicable:\n\n"
5544 	"\t            .hex        display a number as a hex value\n"
5545 	"\t            .sym        display an address as a symbol\n"
5546 	"\t            .sym-offset display an address as a symbol and offset\n"
5547 	"\t            .execname   display a common_pid as a program name\n"
5548 	"\t            .syscall    display a syscall id as a syscall name\n"
5549 	"\t            .log2       display log2 value rather than raw number\n"
5550 	"\t            .usecs      display a common_timestamp in microseconds\n\n"
5551 	"\t    The 'pause' parameter can be used to pause an existing hist\n"
5552 	"\t    trigger or to start a hist trigger but not log any events\n"
5553 	"\t    until told to do so.  'continue' can be used to start or\n"
5554 	"\t    restart a paused hist trigger.\n\n"
5555 	"\t    The 'clear' parameter will clear the contents of a running\n"
5556 	"\t    hist trigger and leave its current paused/active state\n"
5557 	"\t    unchanged.\n\n"
5558 	"\t    The enable_hist and disable_hist triggers can be used to\n"
5559 	"\t    have one event conditionally start and stop another event's\n"
5560 	"\t    already-attached hist trigger.  The syntax is analogous to\n"
5561 	"\t    the enable_event and disable_event triggers.\n\n"
5562 	"\t    Hist trigger handlers and actions are executed whenever a\n"
5563 	"\t    a histogram entry is added or updated.  They take the form:\n\n"
5564 	"\t        <handler>.<action>\n\n"
5565 	"\t    The available handlers are:\n\n"
5566 	"\t        onmatch(matching.event)  - invoke on addition or update\n"
5567 	"\t        onmax(var)               - invoke if var exceeds current max\n"
5568 	"\t        onchange(var)            - invoke action if var changes\n\n"
5569 	"\t    The available actions are:\n\n"
5570 	"\t        trace(<synthetic_event>,param list)  - generate synthetic event\n"
5571 	"\t        save(field,...)                      - save current event fields\n"
5572 #ifdef CONFIG_TRACER_SNAPSHOT
5573 	"\t        snapshot()                           - snapshot the trace buffer\n\n"
5574 #endif
5575 #ifdef CONFIG_SYNTH_EVENTS
5576 	"  events/synthetic_events\t- Create/append/remove/show synthetic events\n"
5577 	"\t  Write into this file to define/undefine new synthetic events.\n"
5578 	"\t     example: echo 'myevent u64 lat; char name[]' >> synthetic_events\n"
5579 #endif
5580 #endif
5581 ;
5582 
5583 static ssize_t
5584 tracing_readme_read(struct file *filp, char __user *ubuf,
5585 		       size_t cnt, loff_t *ppos)
5586 {
5587 	return simple_read_from_buffer(ubuf, cnt, ppos,
5588 					readme_msg, strlen(readme_msg));
5589 }
5590 
5591 static const struct file_operations tracing_readme_fops = {
5592 	.open		= tracing_open_generic,
5593 	.read		= tracing_readme_read,
5594 	.llseek		= generic_file_llseek,
5595 };
5596 
5597 static void *saved_tgids_next(struct seq_file *m, void *v, loff_t *pos)
5598 {
5599 	int *ptr = v;
5600 
5601 	if (*pos || m->count)
5602 		ptr++;
5603 
5604 	(*pos)++;
5605 
5606 	for (; ptr <= &tgid_map[PID_MAX_DEFAULT]; ptr++) {
5607 		if (trace_find_tgid(*ptr))
5608 			return ptr;
5609 	}
5610 
5611 	return NULL;
5612 }
5613 
5614 static void *saved_tgids_start(struct seq_file *m, loff_t *pos)
5615 {
5616 	void *v;
5617 	loff_t l = 0;
5618 
5619 	if (!tgid_map)
5620 		return NULL;
5621 
5622 	v = &tgid_map[0];
5623 	while (l <= *pos) {
5624 		v = saved_tgids_next(m, v, &l);
5625 		if (!v)
5626 			return NULL;
5627 	}
5628 
5629 	return v;
5630 }
5631 
5632 static void saved_tgids_stop(struct seq_file *m, void *v)
5633 {
5634 }
5635 
5636 static int saved_tgids_show(struct seq_file *m, void *v)
5637 {
5638 	int pid = (int *)v - tgid_map;
5639 
5640 	seq_printf(m, "%d %d\n", pid, trace_find_tgid(pid));
5641 	return 0;
5642 }
5643 
5644 static const struct seq_operations tracing_saved_tgids_seq_ops = {
5645 	.start		= saved_tgids_start,
5646 	.stop		= saved_tgids_stop,
5647 	.next		= saved_tgids_next,
5648 	.show		= saved_tgids_show,
5649 };
5650 
5651 static int tracing_saved_tgids_open(struct inode *inode, struct file *filp)
5652 {
5653 	int ret;
5654 
5655 	ret = tracing_check_open_get_tr(NULL);
5656 	if (ret)
5657 		return ret;
5658 
5659 	return seq_open(filp, &tracing_saved_tgids_seq_ops);
5660 }
5661 
5662 
5663 static const struct file_operations tracing_saved_tgids_fops = {
5664 	.open		= tracing_saved_tgids_open,
5665 	.read		= seq_read,
5666 	.llseek		= seq_lseek,
5667 	.release	= seq_release,
5668 };
5669 
5670 static void *saved_cmdlines_next(struct seq_file *m, void *v, loff_t *pos)
5671 {
5672 	unsigned int *ptr = v;
5673 
5674 	if (*pos || m->count)
5675 		ptr++;
5676 
5677 	(*pos)++;
5678 
5679 	for (; ptr < &savedcmd->map_cmdline_to_pid[savedcmd->cmdline_num];
5680 	     ptr++) {
5681 		if (*ptr == -1 || *ptr == NO_CMDLINE_MAP)
5682 			continue;
5683 
5684 		return ptr;
5685 	}
5686 
5687 	return NULL;
5688 }
5689 
5690 static void *saved_cmdlines_start(struct seq_file *m, loff_t *pos)
5691 {
5692 	void *v;
5693 	loff_t l = 0;
5694 
5695 	preempt_disable();
5696 	arch_spin_lock(&trace_cmdline_lock);
5697 
5698 	v = &savedcmd->map_cmdline_to_pid[0];
5699 	while (l <= *pos) {
5700 		v = saved_cmdlines_next(m, v, &l);
5701 		if (!v)
5702 			return NULL;
5703 	}
5704 
5705 	return v;
5706 }
5707 
5708 static void saved_cmdlines_stop(struct seq_file *m, void *v)
5709 {
5710 	arch_spin_unlock(&trace_cmdline_lock);
5711 	preempt_enable();
5712 }
5713 
5714 static int saved_cmdlines_show(struct seq_file *m, void *v)
5715 {
5716 	char buf[TASK_COMM_LEN];
5717 	unsigned int *pid = v;
5718 
5719 	__trace_find_cmdline(*pid, buf);
5720 	seq_printf(m, "%d %s\n", *pid, buf);
5721 	return 0;
5722 }
5723 
5724 static const struct seq_operations tracing_saved_cmdlines_seq_ops = {
5725 	.start		= saved_cmdlines_start,
5726 	.next		= saved_cmdlines_next,
5727 	.stop		= saved_cmdlines_stop,
5728 	.show		= saved_cmdlines_show,
5729 };
5730 
5731 static int tracing_saved_cmdlines_open(struct inode *inode, struct file *filp)
5732 {
5733 	int ret;
5734 
5735 	ret = tracing_check_open_get_tr(NULL);
5736 	if (ret)
5737 		return ret;
5738 
5739 	return seq_open(filp, &tracing_saved_cmdlines_seq_ops);
5740 }
5741 
5742 static const struct file_operations tracing_saved_cmdlines_fops = {
5743 	.open		= tracing_saved_cmdlines_open,
5744 	.read		= seq_read,
5745 	.llseek		= seq_lseek,
5746 	.release	= seq_release,
5747 };
5748 
5749 static ssize_t
5750 tracing_saved_cmdlines_size_read(struct file *filp, char __user *ubuf,
5751 				 size_t cnt, loff_t *ppos)
5752 {
5753 	char buf[64];
5754 	int r;
5755 
5756 	arch_spin_lock(&trace_cmdline_lock);
5757 	r = scnprintf(buf, sizeof(buf), "%u\n", savedcmd->cmdline_num);
5758 	arch_spin_unlock(&trace_cmdline_lock);
5759 
5760 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
5761 }
5762 
5763 static void free_saved_cmdlines_buffer(struct saved_cmdlines_buffer *s)
5764 {
5765 	kfree(s->saved_cmdlines);
5766 	kfree(s->map_cmdline_to_pid);
5767 	kfree(s);
5768 }
5769 
5770 static int tracing_resize_saved_cmdlines(unsigned int val)
5771 {
5772 	struct saved_cmdlines_buffer *s, *savedcmd_temp;
5773 
5774 	s = kmalloc(sizeof(*s), GFP_KERNEL);
5775 	if (!s)
5776 		return -ENOMEM;
5777 
5778 	if (allocate_cmdlines_buffer(val, s) < 0) {
5779 		kfree(s);
5780 		return -ENOMEM;
5781 	}
5782 
5783 	arch_spin_lock(&trace_cmdline_lock);
5784 	savedcmd_temp = savedcmd;
5785 	savedcmd = s;
5786 	arch_spin_unlock(&trace_cmdline_lock);
5787 	free_saved_cmdlines_buffer(savedcmd_temp);
5788 
5789 	return 0;
5790 }
5791 
5792 static ssize_t
5793 tracing_saved_cmdlines_size_write(struct file *filp, const char __user *ubuf,
5794 				  size_t cnt, loff_t *ppos)
5795 {
5796 	unsigned long val;
5797 	int ret;
5798 
5799 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
5800 	if (ret)
5801 		return ret;
5802 
5803 	/* must have at least 1 entry or less than PID_MAX_DEFAULT */
5804 	if (!val || val > PID_MAX_DEFAULT)
5805 		return -EINVAL;
5806 
5807 	ret = tracing_resize_saved_cmdlines((unsigned int)val);
5808 	if (ret < 0)
5809 		return ret;
5810 
5811 	*ppos += cnt;
5812 
5813 	return cnt;
5814 }
5815 
5816 static const struct file_operations tracing_saved_cmdlines_size_fops = {
5817 	.open		= tracing_open_generic,
5818 	.read		= tracing_saved_cmdlines_size_read,
5819 	.write		= tracing_saved_cmdlines_size_write,
5820 };
5821 
5822 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
5823 static union trace_eval_map_item *
5824 update_eval_map(union trace_eval_map_item *ptr)
5825 {
5826 	if (!ptr->map.eval_string) {
5827 		if (ptr->tail.next) {
5828 			ptr = ptr->tail.next;
5829 			/* Set ptr to the next real item (skip head) */
5830 			ptr++;
5831 		} else
5832 			return NULL;
5833 	}
5834 	return ptr;
5835 }
5836 
5837 static void *eval_map_next(struct seq_file *m, void *v, loff_t *pos)
5838 {
5839 	union trace_eval_map_item *ptr = v;
5840 
5841 	/*
5842 	 * Paranoid! If ptr points to end, we don't want to increment past it.
5843 	 * This really should never happen.
5844 	 */
5845 	(*pos)++;
5846 	ptr = update_eval_map(ptr);
5847 	if (WARN_ON_ONCE(!ptr))
5848 		return NULL;
5849 
5850 	ptr++;
5851 	ptr = update_eval_map(ptr);
5852 
5853 	return ptr;
5854 }
5855 
5856 static void *eval_map_start(struct seq_file *m, loff_t *pos)
5857 {
5858 	union trace_eval_map_item *v;
5859 	loff_t l = 0;
5860 
5861 	mutex_lock(&trace_eval_mutex);
5862 
5863 	v = trace_eval_maps;
5864 	if (v)
5865 		v++;
5866 
5867 	while (v && l < *pos) {
5868 		v = eval_map_next(m, v, &l);
5869 	}
5870 
5871 	return v;
5872 }
5873 
5874 static void eval_map_stop(struct seq_file *m, void *v)
5875 {
5876 	mutex_unlock(&trace_eval_mutex);
5877 }
5878 
5879 static int eval_map_show(struct seq_file *m, void *v)
5880 {
5881 	union trace_eval_map_item *ptr = v;
5882 
5883 	seq_printf(m, "%s %ld (%s)\n",
5884 		   ptr->map.eval_string, ptr->map.eval_value,
5885 		   ptr->map.system);
5886 
5887 	return 0;
5888 }
5889 
5890 static const struct seq_operations tracing_eval_map_seq_ops = {
5891 	.start		= eval_map_start,
5892 	.next		= eval_map_next,
5893 	.stop		= eval_map_stop,
5894 	.show		= eval_map_show,
5895 };
5896 
5897 static int tracing_eval_map_open(struct inode *inode, struct file *filp)
5898 {
5899 	int ret;
5900 
5901 	ret = tracing_check_open_get_tr(NULL);
5902 	if (ret)
5903 		return ret;
5904 
5905 	return seq_open(filp, &tracing_eval_map_seq_ops);
5906 }
5907 
5908 static const struct file_operations tracing_eval_map_fops = {
5909 	.open		= tracing_eval_map_open,
5910 	.read		= seq_read,
5911 	.llseek		= seq_lseek,
5912 	.release	= seq_release,
5913 };
5914 
5915 static inline union trace_eval_map_item *
5916 trace_eval_jmp_to_tail(union trace_eval_map_item *ptr)
5917 {
5918 	/* Return tail of array given the head */
5919 	return ptr + ptr->head.length + 1;
5920 }
5921 
5922 static void
5923 trace_insert_eval_map_file(struct module *mod, struct trace_eval_map **start,
5924 			   int len)
5925 {
5926 	struct trace_eval_map **stop;
5927 	struct trace_eval_map **map;
5928 	union trace_eval_map_item *map_array;
5929 	union trace_eval_map_item *ptr;
5930 
5931 	stop = start + len;
5932 
5933 	/*
5934 	 * The trace_eval_maps contains the map plus a head and tail item,
5935 	 * where the head holds the module and length of array, and the
5936 	 * tail holds a pointer to the next list.
5937 	 */
5938 	map_array = kmalloc_array(len + 2, sizeof(*map_array), GFP_KERNEL);
5939 	if (!map_array) {
5940 		pr_warn("Unable to allocate trace eval mapping\n");
5941 		return;
5942 	}
5943 
5944 	mutex_lock(&trace_eval_mutex);
5945 
5946 	if (!trace_eval_maps)
5947 		trace_eval_maps = map_array;
5948 	else {
5949 		ptr = trace_eval_maps;
5950 		for (;;) {
5951 			ptr = trace_eval_jmp_to_tail(ptr);
5952 			if (!ptr->tail.next)
5953 				break;
5954 			ptr = ptr->tail.next;
5955 
5956 		}
5957 		ptr->tail.next = map_array;
5958 	}
5959 	map_array->head.mod = mod;
5960 	map_array->head.length = len;
5961 	map_array++;
5962 
5963 	for (map = start; (unsigned long)map < (unsigned long)stop; map++) {
5964 		map_array->map = **map;
5965 		map_array++;
5966 	}
5967 	memset(map_array, 0, sizeof(*map_array));
5968 
5969 	mutex_unlock(&trace_eval_mutex);
5970 }
5971 
5972 static void trace_create_eval_file(struct dentry *d_tracer)
5973 {
5974 	trace_create_file("eval_map", 0444, d_tracer,
5975 			  NULL, &tracing_eval_map_fops);
5976 }
5977 
5978 #else /* CONFIG_TRACE_EVAL_MAP_FILE */
5979 static inline void trace_create_eval_file(struct dentry *d_tracer) { }
5980 static inline void trace_insert_eval_map_file(struct module *mod,
5981 			      struct trace_eval_map **start, int len) { }
5982 #endif /* !CONFIG_TRACE_EVAL_MAP_FILE */
5983 
5984 static void trace_insert_eval_map(struct module *mod,
5985 				  struct trace_eval_map **start, int len)
5986 {
5987 	struct trace_eval_map **map;
5988 
5989 	if (len <= 0)
5990 		return;
5991 
5992 	map = start;
5993 
5994 	trace_event_eval_update(map, len);
5995 
5996 	trace_insert_eval_map_file(mod, start, len);
5997 }
5998 
5999 static ssize_t
6000 tracing_set_trace_read(struct file *filp, char __user *ubuf,
6001 		       size_t cnt, loff_t *ppos)
6002 {
6003 	struct trace_array *tr = filp->private_data;
6004 	char buf[MAX_TRACER_SIZE+2];
6005 	int r;
6006 
6007 	mutex_lock(&trace_types_lock);
6008 	r = sprintf(buf, "%s\n", tr->current_trace->name);
6009 	mutex_unlock(&trace_types_lock);
6010 
6011 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6012 }
6013 
6014 int tracer_init(struct tracer *t, struct trace_array *tr)
6015 {
6016 	tracing_reset_online_cpus(&tr->array_buffer);
6017 	return t->init(tr);
6018 }
6019 
6020 static void set_buffer_entries(struct array_buffer *buf, unsigned long val)
6021 {
6022 	int cpu;
6023 
6024 	for_each_tracing_cpu(cpu)
6025 		per_cpu_ptr(buf->data, cpu)->entries = val;
6026 }
6027 
6028 #ifdef CONFIG_TRACER_MAX_TRACE
6029 /* resize @tr's buffer to the size of @size_tr's entries */
6030 static int resize_buffer_duplicate_size(struct array_buffer *trace_buf,
6031 					struct array_buffer *size_buf, int cpu_id)
6032 {
6033 	int cpu, ret = 0;
6034 
6035 	if (cpu_id == RING_BUFFER_ALL_CPUS) {
6036 		for_each_tracing_cpu(cpu) {
6037 			ret = ring_buffer_resize(trace_buf->buffer,
6038 				 per_cpu_ptr(size_buf->data, cpu)->entries, cpu);
6039 			if (ret < 0)
6040 				break;
6041 			per_cpu_ptr(trace_buf->data, cpu)->entries =
6042 				per_cpu_ptr(size_buf->data, cpu)->entries;
6043 		}
6044 	} else {
6045 		ret = ring_buffer_resize(trace_buf->buffer,
6046 				 per_cpu_ptr(size_buf->data, cpu_id)->entries, cpu_id);
6047 		if (ret == 0)
6048 			per_cpu_ptr(trace_buf->data, cpu_id)->entries =
6049 				per_cpu_ptr(size_buf->data, cpu_id)->entries;
6050 	}
6051 
6052 	return ret;
6053 }
6054 #endif /* CONFIG_TRACER_MAX_TRACE */
6055 
6056 static int __tracing_resize_ring_buffer(struct trace_array *tr,
6057 					unsigned long size, int cpu)
6058 {
6059 	int ret;
6060 
6061 	/*
6062 	 * If kernel or user changes the size of the ring buffer
6063 	 * we use the size that was given, and we can forget about
6064 	 * expanding it later.
6065 	 */
6066 	ring_buffer_expanded = true;
6067 
6068 	/* May be called before buffers are initialized */
6069 	if (!tr->array_buffer.buffer)
6070 		return 0;
6071 
6072 	ret = ring_buffer_resize(tr->array_buffer.buffer, size, cpu);
6073 	if (ret < 0)
6074 		return ret;
6075 
6076 #ifdef CONFIG_TRACER_MAX_TRACE
6077 	if (!(tr->flags & TRACE_ARRAY_FL_GLOBAL) ||
6078 	    !tr->current_trace->use_max_tr)
6079 		goto out;
6080 
6081 	ret = ring_buffer_resize(tr->max_buffer.buffer, size, cpu);
6082 	if (ret < 0) {
6083 		int r = resize_buffer_duplicate_size(&tr->array_buffer,
6084 						     &tr->array_buffer, cpu);
6085 		if (r < 0) {
6086 			/*
6087 			 * AARGH! We are left with different
6088 			 * size max buffer!!!!
6089 			 * The max buffer is our "snapshot" buffer.
6090 			 * When a tracer needs a snapshot (one of the
6091 			 * latency tracers), it swaps the max buffer
6092 			 * with the saved snap shot. We succeeded to
6093 			 * update the size of the main buffer, but failed to
6094 			 * update the size of the max buffer. But when we tried
6095 			 * to reset the main buffer to the original size, we
6096 			 * failed there too. This is very unlikely to
6097 			 * happen, but if it does, warn and kill all
6098 			 * tracing.
6099 			 */
6100 			WARN_ON(1);
6101 			tracing_disabled = 1;
6102 		}
6103 		return ret;
6104 	}
6105 
6106 	if (cpu == RING_BUFFER_ALL_CPUS)
6107 		set_buffer_entries(&tr->max_buffer, size);
6108 	else
6109 		per_cpu_ptr(tr->max_buffer.data, cpu)->entries = size;
6110 
6111  out:
6112 #endif /* CONFIG_TRACER_MAX_TRACE */
6113 
6114 	if (cpu == RING_BUFFER_ALL_CPUS)
6115 		set_buffer_entries(&tr->array_buffer, size);
6116 	else
6117 		per_cpu_ptr(tr->array_buffer.data, cpu)->entries = size;
6118 
6119 	return ret;
6120 }
6121 
6122 ssize_t tracing_resize_ring_buffer(struct trace_array *tr,
6123 				  unsigned long size, int cpu_id)
6124 {
6125 	int ret = size;
6126 
6127 	mutex_lock(&trace_types_lock);
6128 
6129 	if (cpu_id != RING_BUFFER_ALL_CPUS) {
6130 		/* make sure, this cpu is enabled in the mask */
6131 		if (!cpumask_test_cpu(cpu_id, tracing_buffer_mask)) {
6132 			ret = -EINVAL;
6133 			goto out;
6134 		}
6135 	}
6136 
6137 	ret = __tracing_resize_ring_buffer(tr, size, cpu_id);
6138 	if (ret < 0)
6139 		ret = -ENOMEM;
6140 
6141 out:
6142 	mutex_unlock(&trace_types_lock);
6143 
6144 	return ret;
6145 }
6146 
6147 
6148 /**
6149  * tracing_update_buffers - used by tracing facility to expand ring buffers
6150  *
6151  * To save on memory when the tracing is never used on a system with it
6152  * configured in. The ring buffers are set to a minimum size. But once
6153  * a user starts to use the tracing facility, then they need to grow
6154  * to their default size.
6155  *
6156  * This function is to be called when a tracer is about to be used.
6157  */
6158 int tracing_update_buffers(void)
6159 {
6160 	int ret = 0;
6161 
6162 	mutex_lock(&trace_types_lock);
6163 	if (!ring_buffer_expanded)
6164 		ret = __tracing_resize_ring_buffer(&global_trace, trace_buf_size,
6165 						RING_BUFFER_ALL_CPUS);
6166 	mutex_unlock(&trace_types_lock);
6167 
6168 	return ret;
6169 }
6170 
6171 struct trace_option_dentry;
6172 
6173 static void
6174 create_trace_option_files(struct trace_array *tr, struct tracer *tracer);
6175 
6176 /*
6177  * Used to clear out the tracer before deletion of an instance.
6178  * Must have trace_types_lock held.
6179  */
6180 static void tracing_set_nop(struct trace_array *tr)
6181 {
6182 	if (tr->current_trace == &nop_trace)
6183 		return;
6184 
6185 	tr->current_trace->enabled--;
6186 
6187 	if (tr->current_trace->reset)
6188 		tr->current_trace->reset(tr);
6189 
6190 	tr->current_trace = &nop_trace;
6191 }
6192 
6193 static void add_tracer_options(struct trace_array *tr, struct tracer *t)
6194 {
6195 	/* Only enable if the directory has been created already. */
6196 	if (!tr->dir)
6197 		return;
6198 
6199 	create_trace_option_files(tr, t);
6200 }
6201 
6202 int tracing_set_tracer(struct trace_array *tr, const char *buf)
6203 {
6204 	struct tracer *t;
6205 #ifdef CONFIG_TRACER_MAX_TRACE
6206 	bool had_max_tr;
6207 #endif
6208 	int ret = 0;
6209 
6210 	mutex_lock(&trace_types_lock);
6211 
6212 	if (!ring_buffer_expanded) {
6213 		ret = __tracing_resize_ring_buffer(tr, trace_buf_size,
6214 						RING_BUFFER_ALL_CPUS);
6215 		if (ret < 0)
6216 			goto out;
6217 		ret = 0;
6218 	}
6219 
6220 	for (t = trace_types; t; t = t->next) {
6221 		if (strcmp(t->name, buf) == 0)
6222 			break;
6223 	}
6224 	if (!t) {
6225 		ret = -EINVAL;
6226 		goto out;
6227 	}
6228 	if (t == tr->current_trace)
6229 		goto out;
6230 
6231 #ifdef CONFIG_TRACER_SNAPSHOT
6232 	if (t->use_max_tr) {
6233 		arch_spin_lock(&tr->max_lock);
6234 		if (tr->cond_snapshot)
6235 			ret = -EBUSY;
6236 		arch_spin_unlock(&tr->max_lock);
6237 		if (ret)
6238 			goto out;
6239 	}
6240 #endif
6241 	/* Some tracers won't work on kernel command line */
6242 	if (system_state < SYSTEM_RUNNING && t->noboot) {
6243 		pr_warn("Tracer '%s' is not allowed on command line, ignored\n",
6244 			t->name);
6245 		goto out;
6246 	}
6247 
6248 	/* Some tracers are only allowed for the top level buffer */
6249 	if (!trace_ok_for_array(t, tr)) {
6250 		ret = -EINVAL;
6251 		goto out;
6252 	}
6253 
6254 	/* If trace pipe files are being read, we can't change the tracer */
6255 	if (tr->trace_ref) {
6256 		ret = -EBUSY;
6257 		goto out;
6258 	}
6259 
6260 	trace_branch_disable();
6261 
6262 	tr->current_trace->enabled--;
6263 
6264 	if (tr->current_trace->reset)
6265 		tr->current_trace->reset(tr);
6266 
6267 	/* Current trace needs to be nop_trace before synchronize_rcu */
6268 	tr->current_trace = &nop_trace;
6269 
6270 #ifdef CONFIG_TRACER_MAX_TRACE
6271 	had_max_tr = tr->allocated_snapshot;
6272 
6273 	if (had_max_tr && !t->use_max_tr) {
6274 		/*
6275 		 * We need to make sure that the update_max_tr sees that
6276 		 * current_trace changed to nop_trace to keep it from
6277 		 * swapping the buffers after we resize it.
6278 		 * The update_max_tr is called from interrupts disabled
6279 		 * so a synchronized_sched() is sufficient.
6280 		 */
6281 		synchronize_rcu();
6282 		free_snapshot(tr);
6283 	}
6284 #endif
6285 
6286 #ifdef CONFIG_TRACER_MAX_TRACE
6287 	if (t->use_max_tr && !had_max_tr) {
6288 		ret = tracing_alloc_snapshot_instance(tr);
6289 		if (ret < 0)
6290 			goto out;
6291 	}
6292 #endif
6293 
6294 	if (t->init) {
6295 		ret = tracer_init(t, tr);
6296 		if (ret)
6297 			goto out;
6298 	}
6299 
6300 	tr->current_trace = t;
6301 	tr->current_trace->enabled++;
6302 	trace_branch_enable(tr);
6303  out:
6304 	mutex_unlock(&trace_types_lock);
6305 
6306 	return ret;
6307 }
6308 
6309 static ssize_t
6310 tracing_set_trace_write(struct file *filp, const char __user *ubuf,
6311 			size_t cnt, loff_t *ppos)
6312 {
6313 	struct trace_array *tr = filp->private_data;
6314 	char buf[MAX_TRACER_SIZE+1];
6315 	int i;
6316 	size_t ret;
6317 	int err;
6318 
6319 	ret = cnt;
6320 
6321 	if (cnt > MAX_TRACER_SIZE)
6322 		cnt = MAX_TRACER_SIZE;
6323 
6324 	if (copy_from_user(buf, ubuf, cnt))
6325 		return -EFAULT;
6326 
6327 	buf[cnt] = 0;
6328 
6329 	/* strip ending whitespace. */
6330 	for (i = cnt - 1; i > 0 && isspace(buf[i]); i--)
6331 		buf[i] = 0;
6332 
6333 	err = tracing_set_tracer(tr, buf);
6334 	if (err)
6335 		return err;
6336 
6337 	*ppos += ret;
6338 
6339 	return ret;
6340 }
6341 
6342 static ssize_t
6343 tracing_nsecs_read(unsigned long *ptr, char __user *ubuf,
6344 		   size_t cnt, loff_t *ppos)
6345 {
6346 	char buf[64];
6347 	int r;
6348 
6349 	r = snprintf(buf, sizeof(buf), "%ld\n",
6350 		     *ptr == (unsigned long)-1 ? -1 : nsecs_to_usecs(*ptr));
6351 	if (r > sizeof(buf))
6352 		r = sizeof(buf);
6353 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6354 }
6355 
6356 static ssize_t
6357 tracing_nsecs_write(unsigned long *ptr, const char __user *ubuf,
6358 		    size_t cnt, loff_t *ppos)
6359 {
6360 	unsigned long val;
6361 	int ret;
6362 
6363 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
6364 	if (ret)
6365 		return ret;
6366 
6367 	*ptr = val * 1000;
6368 
6369 	return cnt;
6370 }
6371 
6372 static ssize_t
6373 tracing_thresh_read(struct file *filp, char __user *ubuf,
6374 		    size_t cnt, loff_t *ppos)
6375 {
6376 	return tracing_nsecs_read(&tracing_thresh, ubuf, cnt, ppos);
6377 }
6378 
6379 static ssize_t
6380 tracing_thresh_write(struct file *filp, const char __user *ubuf,
6381 		     size_t cnt, loff_t *ppos)
6382 {
6383 	struct trace_array *tr = filp->private_data;
6384 	int ret;
6385 
6386 	mutex_lock(&trace_types_lock);
6387 	ret = tracing_nsecs_write(&tracing_thresh, ubuf, cnt, ppos);
6388 	if (ret < 0)
6389 		goto out;
6390 
6391 	if (tr->current_trace->update_thresh) {
6392 		ret = tr->current_trace->update_thresh(tr);
6393 		if (ret < 0)
6394 			goto out;
6395 	}
6396 
6397 	ret = cnt;
6398 out:
6399 	mutex_unlock(&trace_types_lock);
6400 
6401 	return ret;
6402 }
6403 
6404 #if defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)
6405 
6406 static ssize_t
6407 tracing_max_lat_read(struct file *filp, char __user *ubuf,
6408 		     size_t cnt, loff_t *ppos)
6409 {
6410 	return tracing_nsecs_read(filp->private_data, ubuf, cnt, ppos);
6411 }
6412 
6413 static ssize_t
6414 tracing_max_lat_write(struct file *filp, const char __user *ubuf,
6415 		      size_t cnt, loff_t *ppos)
6416 {
6417 	return tracing_nsecs_write(filp->private_data, ubuf, cnt, ppos);
6418 }
6419 
6420 #endif
6421 
6422 static int tracing_open_pipe(struct inode *inode, struct file *filp)
6423 {
6424 	struct trace_array *tr = inode->i_private;
6425 	struct trace_iterator *iter;
6426 	int ret;
6427 
6428 	ret = tracing_check_open_get_tr(tr);
6429 	if (ret)
6430 		return ret;
6431 
6432 	mutex_lock(&trace_types_lock);
6433 
6434 	/* create a buffer to store the information to pass to userspace */
6435 	iter = kzalloc(sizeof(*iter), GFP_KERNEL);
6436 	if (!iter) {
6437 		ret = -ENOMEM;
6438 		__trace_array_put(tr);
6439 		goto out;
6440 	}
6441 
6442 	trace_seq_init(&iter->seq);
6443 	iter->trace = tr->current_trace;
6444 
6445 	if (!alloc_cpumask_var(&iter->started, GFP_KERNEL)) {
6446 		ret = -ENOMEM;
6447 		goto fail;
6448 	}
6449 
6450 	/* trace pipe does not show start of buffer */
6451 	cpumask_setall(iter->started);
6452 
6453 	if (tr->trace_flags & TRACE_ITER_LATENCY_FMT)
6454 		iter->iter_flags |= TRACE_FILE_LAT_FMT;
6455 
6456 	/* Output in nanoseconds only if we are using a clock in nanoseconds. */
6457 	if (trace_clocks[tr->clock_id].in_ns)
6458 		iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
6459 
6460 	iter->tr = tr;
6461 	iter->array_buffer = &tr->array_buffer;
6462 	iter->cpu_file = tracing_get_cpu(inode);
6463 	mutex_init(&iter->mutex);
6464 	filp->private_data = iter;
6465 
6466 	if (iter->trace->pipe_open)
6467 		iter->trace->pipe_open(iter);
6468 
6469 	nonseekable_open(inode, filp);
6470 
6471 	tr->trace_ref++;
6472 out:
6473 	mutex_unlock(&trace_types_lock);
6474 	return ret;
6475 
6476 fail:
6477 	kfree(iter);
6478 	__trace_array_put(tr);
6479 	mutex_unlock(&trace_types_lock);
6480 	return ret;
6481 }
6482 
6483 static int tracing_release_pipe(struct inode *inode, struct file *file)
6484 {
6485 	struct trace_iterator *iter = file->private_data;
6486 	struct trace_array *tr = inode->i_private;
6487 
6488 	mutex_lock(&trace_types_lock);
6489 
6490 	tr->trace_ref--;
6491 
6492 	if (iter->trace->pipe_close)
6493 		iter->trace->pipe_close(iter);
6494 
6495 	mutex_unlock(&trace_types_lock);
6496 
6497 	free_cpumask_var(iter->started);
6498 	mutex_destroy(&iter->mutex);
6499 	kfree(iter);
6500 
6501 	trace_array_put(tr);
6502 
6503 	return 0;
6504 }
6505 
6506 static __poll_t
6507 trace_poll(struct trace_iterator *iter, struct file *filp, poll_table *poll_table)
6508 {
6509 	struct trace_array *tr = iter->tr;
6510 
6511 	/* Iterators are static, they should be filled or empty */
6512 	if (trace_buffer_iter(iter, iter->cpu_file))
6513 		return EPOLLIN | EPOLLRDNORM;
6514 
6515 	if (tr->trace_flags & TRACE_ITER_BLOCK)
6516 		/*
6517 		 * Always select as readable when in blocking mode
6518 		 */
6519 		return EPOLLIN | EPOLLRDNORM;
6520 	else
6521 		return ring_buffer_poll_wait(iter->array_buffer->buffer, iter->cpu_file,
6522 					     filp, poll_table);
6523 }
6524 
6525 static __poll_t
6526 tracing_poll_pipe(struct file *filp, poll_table *poll_table)
6527 {
6528 	struct trace_iterator *iter = filp->private_data;
6529 
6530 	return trace_poll(iter, filp, poll_table);
6531 }
6532 
6533 /* Must be called with iter->mutex held. */
6534 static int tracing_wait_pipe(struct file *filp)
6535 {
6536 	struct trace_iterator *iter = filp->private_data;
6537 	int ret;
6538 
6539 	while (trace_empty(iter)) {
6540 
6541 		if ((filp->f_flags & O_NONBLOCK)) {
6542 			return -EAGAIN;
6543 		}
6544 
6545 		/*
6546 		 * We block until we read something and tracing is disabled.
6547 		 * We still block if tracing is disabled, but we have never
6548 		 * read anything. This allows a user to cat this file, and
6549 		 * then enable tracing. But after we have read something,
6550 		 * we give an EOF when tracing is again disabled.
6551 		 *
6552 		 * iter->pos will be 0 if we haven't read anything.
6553 		 */
6554 		if (!tracer_tracing_is_on(iter->tr) && iter->pos)
6555 			break;
6556 
6557 		mutex_unlock(&iter->mutex);
6558 
6559 		ret = wait_on_pipe(iter, 0);
6560 
6561 		mutex_lock(&iter->mutex);
6562 
6563 		if (ret)
6564 			return ret;
6565 	}
6566 
6567 	return 1;
6568 }
6569 
6570 /*
6571  * Consumer reader.
6572  */
6573 static ssize_t
6574 tracing_read_pipe(struct file *filp, char __user *ubuf,
6575 		  size_t cnt, loff_t *ppos)
6576 {
6577 	struct trace_iterator *iter = filp->private_data;
6578 	ssize_t sret;
6579 
6580 	/*
6581 	 * Avoid more than one consumer on a single file descriptor
6582 	 * This is just a matter of traces coherency, the ring buffer itself
6583 	 * is protected.
6584 	 */
6585 	mutex_lock(&iter->mutex);
6586 
6587 	/* return any leftover data */
6588 	sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
6589 	if (sret != -EBUSY)
6590 		goto out;
6591 
6592 	trace_seq_init(&iter->seq);
6593 
6594 	if (iter->trace->read) {
6595 		sret = iter->trace->read(iter, filp, ubuf, cnt, ppos);
6596 		if (sret)
6597 			goto out;
6598 	}
6599 
6600 waitagain:
6601 	sret = tracing_wait_pipe(filp);
6602 	if (sret <= 0)
6603 		goto out;
6604 
6605 	/* stop when tracing is finished */
6606 	if (trace_empty(iter)) {
6607 		sret = 0;
6608 		goto out;
6609 	}
6610 
6611 	if (cnt >= PAGE_SIZE)
6612 		cnt = PAGE_SIZE - 1;
6613 
6614 	/* reset all but tr, trace, and overruns */
6615 	memset(&iter->seq, 0,
6616 	       sizeof(struct trace_iterator) -
6617 	       offsetof(struct trace_iterator, seq));
6618 	cpumask_clear(iter->started);
6619 	trace_seq_init(&iter->seq);
6620 	iter->pos = -1;
6621 
6622 	trace_event_read_lock();
6623 	trace_access_lock(iter->cpu_file);
6624 	while (trace_find_next_entry_inc(iter) != NULL) {
6625 		enum print_line_t ret;
6626 		int save_len = iter->seq.seq.len;
6627 
6628 		ret = print_trace_line(iter);
6629 		if (ret == TRACE_TYPE_PARTIAL_LINE) {
6630 			/* don't print partial lines */
6631 			iter->seq.seq.len = save_len;
6632 			break;
6633 		}
6634 		if (ret != TRACE_TYPE_NO_CONSUME)
6635 			trace_consume(iter);
6636 
6637 		if (trace_seq_used(&iter->seq) >= cnt)
6638 			break;
6639 
6640 		/*
6641 		 * Setting the full flag means we reached the trace_seq buffer
6642 		 * size and we should leave by partial output condition above.
6643 		 * One of the trace_seq_* functions is not used properly.
6644 		 */
6645 		WARN_ONCE(iter->seq.full, "full flag set for trace type %d",
6646 			  iter->ent->type);
6647 	}
6648 	trace_access_unlock(iter->cpu_file);
6649 	trace_event_read_unlock();
6650 
6651 	/* Now copy what we have to the user */
6652 	sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
6653 	if (iter->seq.seq.readpos >= trace_seq_used(&iter->seq))
6654 		trace_seq_init(&iter->seq);
6655 
6656 	/*
6657 	 * If there was nothing to send to user, in spite of consuming trace
6658 	 * entries, go back to wait for more entries.
6659 	 */
6660 	if (sret == -EBUSY)
6661 		goto waitagain;
6662 
6663 out:
6664 	mutex_unlock(&iter->mutex);
6665 
6666 	return sret;
6667 }
6668 
6669 static void tracing_spd_release_pipe(struct splice_pipe_desc *spd,
6670 				     unsigned int idx)
6671 {
6672 	__free_page(spd->pages[idx]);
6673 }
6674 
6675 static size_t
6676 tracing_fill_pipe_page(size_t rem, struct trace_iterator *iter)
6677 {
6678 	size_t count;
6679 	int save_len;
6680 	int ret;
6681 
6682 	/* Seq buffer is page-sized, exactly what we need. */
6683 	for (;;) {
6684 		save_len = iter->seq.seq.len;
6685 		ret = print_trace_line(iter);
6686 
6687 		if (trace_seq_has_overflowed(&iter->seq)) {
6688 			iter->seq.seq.len = save_len;
6689 			break;
6690 		}
6691 
6692 		/*
6693 		 * This should not be hit, because it should only
6694 		 * be set if the iter->seq overflowed. But check it
6695 		 * anyway to be safe.
6696 		 */
6697 		if (ret == TRACE_TYPE_PARTIAL_LINE) {
6698 			iter->seq.seq.len = save_len;
6699 			break;
6700 		}
6701 
6702 		count = trace_seq_used(&iter->seq) - save_len;
6703 		if (rem < count) {
6704 			rem = 0;
6705 			iter->seq.seq.len = save_len;
6706 			break;
6707 		}
6708 
6709 		if (ret != TRACE_TYPE_NO_CONSUME)
6710 			trace_consume(iter);
6711 		rem -= count;
6712 		if (!trace_find_next_entry_inc(iter))	{
6713 			rem = 0;
6714 			iter->ent = NULL;
6715 			break;
6716 		}
6717 	}
6718 
6719 	return rem;
6720 }
6721 
6722 static ssize_t tracing_splice_read_pipe(struct file *filp,
6723 					loff_t *ppos,
6724 					struct pipe_inode_info *pipe,
6725 					size_t len,
6726 					unsigned int flags)
6727 {
6728 	struct page *pages_def[PIPE_DEF_BUFFERS];
6729 	struct partial_page partial_def[PIPE_DEF_BUFFERS];
6730 	struct trace_iterator *iter = filp->private_data;
6731 	struct splice_pipe_desc spd = {
6732 		.pages		= pages_def,
6733 		.partial	= partial_def,
6734 		.nr_pages	= 0, /* This gets updated below. */
6735 		.nr_pages_max	= PIPE_DEF_BUFFERS,
6736 		.ops		= &default_pipe_buf_ops,
6737 		.spd_release	= tracing_spd_release_pipe,
6738 	};
6739 	ssize_t ret;
6740 	size_t rem;
6741 	unsigned int i;
6742 
6743 	if (splice_grow_spd(pipe, &spd))
6744 		return -ENOMEM;
6745 
6746 	mutex_lock(&iter->mutex);
6747 
6748 	if (iter->trace->splice_read) {
6749 		ret = iter->trace->splice_read(iter, filp,
6750 					       ppos, pipe, len, flags);
6751 		if (ret)
6752 			goto out_err;
6753 	}
6754 
6755 	ret = tracing_wait_pipe(filp);
6756 	if (ret <= 0)
6757 		goto out_err;
6758 
6759 	if (!iter->ent && !trace_find_next_entry_inc(iter)) {
6760 		ret = -EFAULT;
6761 		goto out_err;
6762 	}
6763 
6764 	trace_event_read_lock();
6765 	trace_access_lock(iter->cpu_file);
6766 
6767 	/* Fill as many pages as possible. */
6768 	for (i = 0, rem = len; i < spd.nr_pages_max && rem; i++) {
6769 		spd.pages[i] = alloc_page(GFP_KERNEL);
6770 		if (!spd.pages[i])
6771 			break;
6772 
6773 		rem = tracing_fill_pipe_page(rem, iter);
6774 
6775 		/* Copy the data into the page, so we can start over. */
6776 		ret = trace_seq_to_buffer(&iter->seq,
6777 					  page_address(spd.pages[i]),
6778 					  trace_seq_used(&iter->seq));
6779 		if (ret < 0) {
6780 			__free_page(spd.pages[i]);
6781 			break;
6782 		}
6783 		spd.partial[i].offset = 0;
6784 		spd.partial[i].len = trace_seq_used(&iter->seq);
6785 
6786 		trace_seq_init(&iter->seq);
6787 	}
6788 
6789 	trace_access_unlock(iter->cpu_file);
6790 	trace_event_read_unlock();
6791 	mutex_unlock(&iter->mutex);
6792 
6793 	spd.nr_pages = i;
6794 
6795 	if (i)
6796 		ret = splice_to_pipe(pipe, &spd);
6797 	else
6798 		ret = 0;
6799 out:
6800 	splice_shrink_spd(&spd);
6801 	return ret;
6802 
6803 out_err:
6804 	mutex_unlock(&iter->mutex);
6805 	goto out;
6806 }
6807 
6808 static ssize_t
6809 tracing_entries_read(struct file *filp, char __user *ubuf,
6810 		     size_t cnt, loff_t *ppos)
6811 {
6812 	struct inode *inode = file_inode(filp);
6813 	struct trace_array *tr = inode->i_private;
6814 	int cpu = tracing_get_cpu(inode);
6815 	char buf[64];
6816 	int r = 0;
6817 	ssize_t ret;
6818 
6819 	mutex_lock(&trace_types_lock);
6820 
6821 	if (cpu == RING_BUFFER_ALL_CPUS) {
6822 		int cpu, buf_size_same;
6823 		unsigned long size;
6824 
6825 		size = 0;
6826 		buf_size_same = 1;
6827 		/* check if all cpu sizes are same */
6828 		for_each_tracing_cpu(cpu) {
6829 			/* fill in the size from first enabled cpu */
6830 			if (size == 0)
6831 				size = per_cpu_ptr(tr->array_buffer.data, cpu)->entries;
6832 			if (size != per_cpu_ptr(tr->array_buffer.data, cpu)->entries) {
6833 				buf_size_same = 0;
6834 				break;
6835 			}
6836 		}
6837 
6838 		if (buf_size_same) {
6839 			if (!ring_buffer_expanded)
6840 				r = sprintf(buf, "%lu (expanded: %lu)\n",
6841 					    size >> 10,
6842 					    trace_buf_size >> 10);
6843 			else
6844 				r = sprintf(buf, "%lu\n", size >> 10);
6845 		} else
6846 			r = sprintf(buf, "X\n");
6847 	} else
6848 		r = sprintf(buf, "%lu\n", per_cpu_ptr(tr->array_buffer.data, cpu)->entries >> 10);
6849 
6850 	mutex_unlock(&trace_types_lock);
6851 
6852 	ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6853 	return ret;
6854 }
6855 
6856 static ssize_t
6857 tracing_entries_write(struct file *filp, const char __user *ubuf,
6858 		      size_t cnt, loff_t *ppos)
6859 {
6860 	struct inode *inode = file_inode(filp);
6861 	struct trace_array *tr = inode->i_private;
6862 	unsigned long val;
6863 	int ret;
6864 
6865 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
6866 	if (ret)
6867 		return ret;
6868 
6869 	/* must have at least 1 entry */
6870 	if (!val)
6871 		return -EINVAL;
6872 
6873 	/* value is in KB */
6874 	val <<= 10;
6875 	ret = tracing_resize_ring_buffer(tr, val, tracing_get_cpu(inode));
6876 	if (ret < 0)
6877 		return ret;
6878 
6879 	*ppos += cnt;
6880 
6881 	return cnt;
6882 }
6883 
6884 static ssize_t
6885 tracing_total_entries_read(struct file *filp, char __user *ubuf,
6886 				size_t cnt, loff_t *ppos)
6887 {
6888 	struct trace_array *tr = filp->private_data;
6889 	char buf[64];
6890 	int r, cpu;
6891 	unsigned long size = 0, expanded_size = 0;
6892 
6893 	mutex_lock(&trace_types_lock);
6894 	for_each_tracing_cpu(cpu) {
6895 		size += per_cpu_ptr(tr->array_buffer.data, cpu)->entries >> 10;
6896 		if (!ring_buffer_expanded)
6897 			expanded_size += trace_buf_size >> 10;
6898 	}
6899 	if (ring_buffer_expanded)
6900 		r = sprintf(buf, "%lu\n", size);
6901 	else
6902 		r = sprintf(buf, "%lu (expanded: %lu)\n", size, expanded_size);
6903 	mutex_unlock(&trace_types_lock);
6904 
6905 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6906 }
6907 
6908 static ssize_t
6909 tracing_free_buffer_write(struct file *filp, const char __user *ubuf,
6910 			  size_t cnt, loff_t *ppos)
6911 {
6912 	/*
6913 	 * There is no need to read what the user has written, this function
6914 	 * is just to make sure that there is no error when "echo" is used
6915 	 */
6916 
6917 	*ppos += cnt;
6918 
6919 	return cnt;
6920 }
6921 
6922 static int
6923 tracing_free_buffer_release(struct inode *inode, struct file *filp)
6924 {
6925 	struct trace_array *tr = inode->i_private;
6926 
6927 	/* disable tracing ? */
6928 	if (tr->trace_flags & TRACE_ITER_STOP_ON_FREE)
6929 		tracer_tracing_off(tr);
6930 	/* resize the ring buffer to 0 */
6931 	tracing_resize_ring_buffer(tr, 0, RING_BUFFER_ALL_CPUS);
6932 
6933 	trace_array_put(tr);
6934 
6935 	return 0;
6936 }
6937 
6938 static ssize_t
6939 tracing_mark_write(struct file *filp, const char __user *ubuf,
6940 					size_t cnt, loff_t *fpos)
6941 {
6942 	struct trace_array *tr = filp->private_data;
6943 	struct ring_buffer_event *event;
6944 	enum event_trigger_type tt = ETT_NONE;
6945 	struct trace_buffer *buffer;
6946 	struct print_entry *entry;
6947 	ssize_t written;
6948 	int size;
6949 	int len;
6950 
6951 /* Used in tracing_mark_raw_write() as well */
6952 #define FAULTED_STR "<faulted>"
6953 #define FAULTED_SIZE (sizeof(FAULTED_STR) - 1) /* '\0' is already accounted for */
6954 
6955 	if (tracing_disabled)
6956 		return -EINVAL;
6957 
6958 	if (!(tr->trace_flags & TRACE_ITER_MARKERS))
6959 		return -EINVAL;
6960 
6961 	if (cnt > TRACE_BUF_SIZE)
6962 		cnt = TRACE_BUF_SIZE;
6963 
6964 	BUILD_BUG_ON(TRACE_BUF_SIZE >= PAGE_SIZE);
6965 
6966 	size = sizeof(*entry) + cnt + 2; /* add '\0' and possible '\n' */
6967 
6968 	/* If less than "<faulted>", then make sure we can still add that */
6969 	if (cnt < FAULTED_SIZE)
6970 		size += FAULTED_SIZE - cnt;
6971 
6972 	buffer = tr->array_buffer.buffer;
6973 	event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
6974 					    tracing_gen_ctx());
6975 	if (unlikely(!event))
6976 		/* Ring buffer disabled, return as if not open for write */
6977 		return -EBADF;
6978 
6979 	entry = ring_buffer_event_data(event);
6980 	entry->ip = _THIS_IP_;
6981 
6982 	len = __copy_from_user_inatomic(&entry->buf, ubuf, cnt);
6983 	if (len) {
6984 		memcpy(&entry->buf, FAULTED_STR, FAULTED_SIZE);
6985 		cnt = FAULTED_SIZE;
6986 		written = -EFAULT;
6987 	} else
6988 		written = cnt;
6989 
6990 	if (tr->trace_marker_file && !list_empty(&tr->trace_marker_file->triggers)) {
6991 		/* do not add \n before testing triggers, but add \0 */
6992 		entry->buf[cnt] = '\0';
6993 		tt = event_triggers_call(tr->trace_marker_file, buffer, entry, event);
6994 	}
6995 
6996 	if (entry->buf[cnt - 1] != '\n') {
6997 		entry->buf[cnt] = '\n';
6998 		entry->buf[cnt + 1] = '\0';
6999 	} else
7000 		entry->buf[cnt] = '\0';
7001 
7002 	if (static_branch_unlikely(&trace_marker_exports_enabled))
7003 		ftrace_exports(event, TRACE_EXPORT_MARKER);
7004 	__buffer_unlock_commit(buffer, event);
7005 
7006 	if (tt)
7007 		event_triggers_post_call(tr->trace_marker_file, tt);
7008 
7009 	if (written > 0)
7010 		*fpos += written;
7011 
7012 	return written;
7013 }
7014 
7015 /* Limit it for now to 3K (including tag) */
7016 #define RAW_DATA_MAX_SIZE (1024*3)
7017 
7018 static ssize_t
7019 tracing_mark_raw_write(struct file *filp, const char __user *ubuf,
7020 					size_t cnt, loff_t *fpos)
7021 {
7022 	struct trace_array *tr = filp->private_data;
7023 	struct ring_buffer_event *event;
7024 	struct trace_buffer *buffer;
7025 	struct raw_data_entry *entry;
7026 	ssize_t written;
7027 	int size;
7028 	int len;
7029 
7030 #define FAULT_SIZE_ID (FAULTED_SIZE + sizeof(int))
7031 
7032 	if (tracing_disabled)
7033 		return -EINVAL;
7034 
7035 	if (!(tr->trace_flags & TRACE_ITER_MARKERS))
7036 		return -EINVAL;
7037 
7038 	/* The marker must at least have a tag id */
7039 	if (cnt < sizeof(unsigned int) || cnt > RAW_DATA_MAX_SIZE)
7040 		return -EINVAL;
7041 
7042 	if (cnt > TRACE_BUF_SIZE)
7043 		cnt = TRACE_BUF_SIZE;
7044 
7045 	BUILD_BUG_ON(TRACE_BUF_SIZE >= PAGE_SIZE);
7046 
7047 	size = sizeof(*entry) + cnt;
7048 	if (cnt < FAULT_SIZE_ID)
7049 		size += FAULT_SIZE_ID - cnt;
7050 
7051 	buffer = tr->array_buffer.buffer;
7052 	event = __trace_buffer_lock_reserve(buffer, TRACE_RAW_DATA, size,
7053 					    tracing_gen_ctx());
7054 	if (!event)
7055 		/* Ring buffer disabled, return as if not open for write */
7056 		return -EBADF;
7057 
7058 	entry = ring_buffer_event_data(event);
7059 
7060 	len = __copy_from_user_inatomic(&entry->id, ubuf, cnt);
7061 	if (len) {
7062 		entry->id = -1;
7063 		memcpy(&entry->buf, FAULTED_STR, FAULTED_SIZE);
7064 		written = -EFAULT;
7065 	} else
7066 		written = cnt;
7067 
7068 	__buffer_unlock_commit(buffer, event);
7069 
7070 	if (written > 0)
7071 		*fpos += written;
7072 
7073 	return written;
7074 }
7075 
7076 static int tracing_clock_show(struct seq_file *m, void *v)
7077 {
7078 	struct trace_array *tr = m->private;
7079 	int i;
7080 
7081 	for (i = 0; i < ARRAY_SIZE(trace_clocks); i++)
7082 		seq_printf(m,
7083 			"%s%s%s%s", i ? " " : "",
7084 			i == tr->clock_id ? "[" : "", trace_clocks[i].name,
7085 			i == tr->clock_id ? "]" : "");
7086 	seq_putc(m, '\n');
7087 
7088 	return 0;
7089 }
7090 
7091 int tracing_set_clock(struct trace_array *tr, const char *clockstr)
7092 {
7093 	int i;
7094 
7095 	for (i = 0; i < ARRAY_SIZE(trace_clocks); i++) {
7096 		if (strcmp(trace_clocks[i].name, clockstr) == 0)
7097 			break;
7098 	}
7099 	if (i == ARRAY_SIZE(trace_clocks))
7100 		return -EINVAL;
7101 
7102 	mutex_lock(&trace_types_lock);
7103 
7104 	tr->clock_id = i;
7105 
7106 	ring_buffer_set_clock(tr->array_buffer.buffer, trace_clocks[i].func);
7107 
7108 	/*
7109 	 * New clock may not be consistent with the previous clock.
7110 	 * Reset the buffer so that it doesn't have incomparable timestamps.
7111 	 */
7112 	tracing_reset_online_cpus(&tr->array_buffer);
7113 
7114 #ifdef CONFIG_TRACER_MAX_TRACE
7115 	if (tr->max_buffer.buffer)
7116 		ring_buffer_set_clock(tr->max_buffer.buffer, trace_clocks[i].func);
7117 	tracing_reset_online_cpus(&tr->max_buffer);
7118 #endif
7119 
7120 	mutex_unlock(&trace_types_lock);
7121 
7122 	return 0;
7123 }
7124 
7125 static ssize_t tracing_clock_write(struct file *filp, const char __user *ubuf,
7126 				   size_t cnt, loff_t *fpos)
7127 {
7128 	struct seq_file *m = filp->private_data;
7129 	struct trace_array *tr = m->private;
7130 	char buf[64];
7131 	const char *clockstr;
7132 	int ret;
7133 
7134 	if (cnt >= sizeof(buf))
7135 		return -EINVAL;
7136 
7137 	if (copy_from_user(buf, ubuf, cnt))
7138 		return -EFAULT;
7139 
7140 	buf[cnt] = 0;
7141 
7142 	clockstr = strstrip(buf);
7143 
7144 	ret = tracing_set_clock(tr, clockstr);
7145 	if (ret)
7146 		return ret;
7147 
7148 	*fpos += cnt;
7149 
7150 	return cnt;
7151 }
7152 
7153 static int tracing_clock_open(struct inode *inode, struct file *file)
7154 {
7155 	struct trace_array *tr = inode->i_private;
7156 	int ret;
7157 
7158 	ret = tracing_check_open_get_tr(tr);
7159 	if (ret)
7160 		return ret;
7161 
7162 	ret = single_open(file, tracing_clock_show, inode->i_private);
7163 	if (ret < 0)
7164 		trace_array_put(tr);
7165 
7166 	return ret;
7167 }
7168 
7169 static int tracing_time_stamp_mode_show(struct seq_file *m, void *v)
7170 {
7171 	struct trace_array *tr = m->private;
7172 
7173 	mutex_lock(&trace_types_lock);
7174 
7175 	if (ring_buffer_time_stamp_abs(tr->array_buffer.buffer))
7176 		seq_puts(m, "delta [absolute]\n");
7177 	else
7178 		seq_puts(m, "[delta] absolute\n");
7179 
7180 	mutex_unlock(&trace_types_lock);
7181 
7182 	return 0;
7183 }
7184 
7185 static int tracing_time_stamp_mode_open(struct inode *inode, struct file *file)
7186 {
7187 	struct trace_array *tr = inode->i_private;
7188 	int ret;
7189 
7190 	ret = tracing_check_open_get_tr(tr);
7191 	if (ret)
7192 		return ret;
7193 
7194 	ret = single_open(file, tracing_time_stamp_mode_show, inode->i_private);
7195 	if (ret < 0)
7196 		trace_array_put(tr);
7197 
7198 	return ret;
7199 }
7200 
7201 u64 tracing_event_time_stamp(struct trace_buffer *buffer, struct ring_buffer_event *rbe)
7202 {
7203 	if (rbe == this_cpu_read(trace_buffered_event))
7204 		return ring_buffer_time_stamp(buffer);
7205 
7206 	return ring_buffer_event_time_stamp(buffer, rbe);
7207 }
7208 
7209 /*
7210  * Set or disable using the per CPU trace_buffer_event when possible.
7211  */
7212 int tracing_set_filter_buffering(struct trace_array *tr, bool set)
7213 {
7214 	int ret = 0;
7215 
7216 	mutex_lock(&trace_types_lock);
7217 
7218 	if (set && tr->no_filter_buffering_ref++)
7219 		goto out;
7220 
7221 	if (!set) {
7222 		if (WARN_ON_ONCE(!tr->no_filter_buffering_ref)) {
7223 			ret = -EINVAL;
7224 			goto out;
7225 		}
7226 
7227 		--tr->no_filter_buffering_ref;
7228 	}
7229  out:
7230 	mutex_unlock(&trace_types_lock);
7231 
7232 	return ret;
7233 }
7234 
7235 struct ftrace_buffer_info {
7236 	struct trace_iterator	iter;
7237 	void			*spare;
7238 	unsigned int		spare_cpu;
7239 	unsigned int		read;
7240 };
7241 
7242 #ifdef CONFIG_TRACER_SNAPSHOT
7243 static int tracing_snapshot_open(struct inode *inode, struct file *file)
7244 {
7245 	struct trace_array *tr = inode->i_private;
7246 	struct trace_iterator *iter;
7247 	struct seq_file *m;
7248 	int ret;
7249 
7250 	ret = tracing_check_open_get_tr(tr);
7251 	if (ret)
7252 		return ret;
7253 
7254 	if (file->f_mode & FMODE_READ) {
7255 		iter = __tracing_open(inode, file, true);
7256 		if (IS_ERR(iter))
7257 			ret = PTR_ERR(iter);
7258 	} else {
7259 		/* Writes still need the seq_file to hold the private data */
7260 		ret = -ENOMEM;
7261 		m = kzalloc(sizeof(*m), GFP_KERNEL);
7262 		if (!m)
7263 			goto out;
7264 		iter = kzalloc(sizeof(*iter), GFP_KERNEL);
7265 		if (!iter) {
7266 			kfree(m);
7267 			goto out;
7268 		}
7269 		ret = 0;
7270 
7271 		iter->tr = tr;
7272 		iter->array_buffer = &tr->max_buffer;
7273 		iter->cpu_file = tracing_get_cpu(inode);
7274 		m->private = iter;
7275 		file->private_data = m;
7276 	}
7277 out:
7278 	if (ret < 0)
7279 		trace_array_put(tr);
7280 
7281 	return ret;
7282 }
7283 
7284 static ssize_t
7285 tracing_snapshot_write(struct file *filp, const char __user *ubuf, size_t cnt,
7286 		       loff_t *ppos)
7287 {
7288 	struct seq_file *m = filp->private_data;
7289 	struct trace_iterator *iter = m->private;
7290 	struct trace_array *tr = iter->tr;
7291 	unsigned long val;
7292 	int ret;
7293 
7294 	ret = tracing_update_buffers();
7295 	if (ret < 0)
7296 		return ret;
7297 
7298 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
7299 	if (ret)
7300 		return ret;
7301 
7302 	mutex_lock(&trace_types_lock);
7303 
7304 	if (tr->current_trace->use_max_tr) {
7305 		ret = -EBUSY;
7306 		goto out;
7307 	}
7308 
7309 	arch_spin_lock(&tr->max_lock);
7310 	if (tr->cond_snapshot)
7311 		ret = -EBUSY;
7312 	arch_spin_unlock(&tr->max_lock);
7313 	if (ret)
7314 		goto out;
7315 
7316 	switch (val) {
7317 	case 0:
7318 		if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
7319 			ret = -EINVAL;
7320 			break;
7321 		}
7322 		if (tr->allocated_snapshot)
7323 			free_snapshot(tr);
7324 		break;
7325 	case 1:
7326 /* Only allow per-cpu swap if the ring buffer supports it */
7327 #ifndef CONFIG_RING_BUFFER_ALLOW_SWAP
7328 		if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
7329 			ret = -EINVAL;
7330 			break;
7331 		}
7332 #endif
7333 		if (tr->allocated_snapshot)
7334 			ret = resize_buffer_duplicate_size(&tr->max_buffer,
7335 					&tr->array_buffer, iter->cpu_file);
7336 		else
7337 			ret = tracing_alloc_snapshot_instance(tr);
7338 		if (ret < 0)
7339 			break;
7340 		local_irq_disable();
7341 		/* Now, we're going to swap */
7342 		if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
7343 			update_max_tr(tr, current, smp_processor_id(), NULL);
7344 		else
7345 			update_max_tr_single(tr, current, iter->cpu_file);
7346 		local_irq_enable();
7347 		break;
7348 	default:
7349 		if (tr->allocated_snapshot) {
7350 			if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
7351 				tracing_reset_online_cpus(&tr->max_buffer);
7352 			else
7353 				tracing_reset_cpu(&tr->max_buffer, iter->cpu_file);
7354 		}
7355 		break;
7356 	}
7357 
7358 	if (ret >= 0) {
7359 		*ppos += cnt;
7360 		ret = cnt;
7361 	}
7362 out:
7363 	mutex_unlock(&trace_types_lock);
7364 	return ret;
7365 }
7366 
7367 static int tracing_snapshot_release(struct inode *inode, struct file *file)
7368 {
7369 	struct seq_file *m = file->private_data;
7370 	int ret;
7371 
7372 	ret = tracing_release(inode, file);
7373 
7374 	if (file->f_mode & FMODE_READ)
7375 		return ret;
7376 
7377 	/* If write only, the seq_file is just a stub */
7378 	if (m)
7379 		kfree(m->private);
7380 	kfree(m);
7381 
7382 	return 0;
7383 }
7384 
7385 static int tracing_buffers_open(struct inode *inode, struct file *filp);
7386 static ssize_t tracing_buffers_read(struct file *filp, char __user *ubuf,
7387 				    size_t count, loff_t *ppos);
7388 static int tracing_buffers_release(struct inode *inode, struct file *file);
7389 static ssize_t tracing_buffers_splice_read(struct file *file, loff_t *ppos,
7390 		   struct pipe_inode_info *pipe, size_t len, unsigned int flags);
7391 
7392 static int snapshot_raw_open(struct inode *inode, struct file *filp)
7393 {
7394 	struct ftrace_buffer_info *info;
7395 	int ret;
7396 
7397 	/* The following checks for tracefs lockdown */
7398 	ret = tracing_buffers_open(inode, filp);
7399 	if (ret < 0)
7400 		return ret;
7401 
7402 	info = filp->private_data;
7403 
7404 	if (info->iter.trace->use_max_tr) {
7405 		tracing_buffers_release(inode, filp);
7406 		return -EBUSY;
7407 	}
7408 
7409 	info->iter.snapshot = true;
7410 	info->iter.array_buffer = &info->iter.tr->max_buffer;
7411 
7412 	return ret;
7413 }
7414 
7415 #endif /* CONFIG_TRACER_SNAPSHOT */
7416 
7417 
7418 static const struct file_operations tracing_thresh_fops = {
7419 	.open		= tracing_open_generic,
7420 	.read		= tracing_thresh_read,
7421 	.write		= tracing_thresh_write,
7422 	.llseek		= generic_file_llseek,
7423 };
7424 
7425 #if defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)
7426 static const struct file_operations tracing_max_lat_fops = {
7427 	.open		= tracing_open_generic,
7428 	.read		= tracing_max_lat_read,
7429 	.write		= tracing_max_lat_write,
7430 	.llseek		= generic_file_llseek,
7431 };
7432 #endif
7433 
7434 static const struct file_operations set_tracer_fops = {
7435 	.open		= tracing_open_generic,
7436 	.read		= tracing_set_trace_read,
7437 	.write		= tracing_set_trace_write,
7438 	.llseek		= generic_file_llseek,
7439 };
7440 
7441 static const struct file_operations tracing_pipe_fops = {
7442 	.open		= tracing_open_pipe,
7443 	.poll		= tracing_poll_pipe,
7444 	.read		= tracing_read_pipe,
7445 	.splice_read	= tracing_splice_read_pipe,
7446 	.release	= tracing_release_pipe,
7447 	.llseek		= no_llseek,
7448 };
7449 
7450 static const struct file_operations tracing_entries_fops = {
7451 	.open		= tracing_open_generic_tr,
7452 	.read		= tracing_entries_read,
7453 	.write		= tracing_entries_write,
7454 	.llseek		= generic_file_llseek,
7455 	.release	= tracing_release_generic_tr,
7456 };
7457 
7458 static const struct file_operations tracing_total_entries_fops = {
7459 	.open		= tracing_open_generic_tr,
7460 	.read		= tracing_total_entries_read,
7461 	.llseek		= generic_file_llseek,
7462 	.release	= tracing_release_generic_tr,
7463 };
7464 
7465 static const struct file_operations tracing_free_buffer_fops = {
7466 	.open		= tracing_open_generic_tr,
7467 	.write		= tracing_free_buffer_write,
7468 	.release	= tracing_free_buffer_release,
7469 };
7470 
7471 static const struct file_operations tracing_mark_fops = {
7472 	.open		= tracing_open_generic_tr,
7473 	.write		= tracing_mark_write,
7474 	.llseek		= generic_file_llseek,
7475 	.release	= tracing_release_generic_tr,
7476 };
7477 
7478 static const struct file_operations tracing_mark_raw_fops = {
7479 	.open		= tracing_open_generic_tr,
7480 	.write		= tracing_mark_raw_write,
7481 	.llseek		= generic_file_llseek,
7482 	.release	= tracing_release_generic_tr,
7483 };
7484 
7485 static const struct file_operations trace_clock_fops = {
7486 	.open		= tracing_clock_open,
7487 	.read		= seq_read,
7488 	.llseek		= seq_lseek,
7489 	.release	= tracing_single_release_tr,
7490 	.write		= tracing_clock_write,
7491 };
7492 
7493 static const struct file_operations trace_time_stamp_mode_fops = {
7494 	.open		= tracing_time_stamp_mode_open,
7495 	.read		= seq_read,
7496 	.llseek		= seq_lseek,
7497 	.release	= tracing_single_release_tr,
7498 };
7499 
7500 #ifdef CONFIG_TRACER_SNAPSHOT
7501 static const struct file_operations snapshot_fops = {
7502 	.open		= tracing_snapshot_open,
7503 	.read		= seq_read,
7504 	.write		= tracing_snapshot_write,
7505 	.llseek		= tracing_lseek,
7506 	.release	= tracing_snapshot_release,
7507 };
7508 
7509 static const struct file_operations snapshot_raw_fops = {
7510 	.open		= snapshot_raw_open,
7511 	.read		= tracing_buffers_read,
7512 	.release	= tracing_buffers_release,
7513 	.splice_read	= tracing_buffers_splice_read,
7514 	.llseek		= no_llseek,
7515 };
7516 
7517 #endif /* CONFIG_TRACER_SNAPSHOT */
7518 
7519 #define TRACING_LOG_ERRS_MAX	8
7520 #define TRACING_LOG_LOC_MAX	128
7521 
7522 #define CMD_PREFIX "  Command: "
7523 
7524 struct err_info {
7525 	const char	**errs;	/* ptr to loc-specific array of err strings */
7526 	u8		type;	/* index into errs -> specific err string */
7527 	u8		pos;	/* MAX_FILTER_STR_VAL = 256 */
7528 	u64		ts;
7529 };
7530 
7531 struct tracing_log_err {
7532 	struct list_head	list;
7533 	struct err_info		info;
7534 	char			loc[TRACING_LOG_LOC_MAX]; /* err location */
7535 	char			cmd[MAX_FILTER_STR_VAL]; /* what caused err */
7536 };
7537 
7538 static DEFINE_MUTEX(tracing_err_log_lock);
7539 
7540 static struct tracing_log_err *get_tracing_log_err(struct trace_array *tr)
7541 {
7542 	struct tracing_log_err *err;
7543 
7544 	if (tr->n_err_log_entries < TRACING_LOG_ERRS_MAX) {
7545 		err = kzalloc(sizeof(*err), GFP_KERNEL);
7546 		if (!err)
7547 			err = ERR_PTR(-ENOMEM);
7548 		tr->n_err_log_entries++;
7549 
7550 		return err;
7551 	}
7552 
7553 	err = list_first_entry(&tr->err_log, struct tracing_log_err, list);
7554 	list_del(&err->list);
7555 
7556 	return err;
7557 }
7558 
7559 /**
7560  * err_pos - find the position of a string within a command for error careting
7561  * @cmd: The tracing command that caused the error
7562  * @str: The string to position the caret at within @cmd
7563  *
7564  * Finds the position of the first occurrence of @str within @cmd.  The
7565  * return value can be passed to tracing_log_err() for caret placement
7566  * within @cmd.
7567  *
7568  * Returns the index within @cmd of the first occurrence of @str or 0
7569  * if @str was not found.
7570  */
7571 unsigned int err_pos(char *cmd, const char *str)
7572 {
7573 	char *found;
7574 
7575 	if (WARN_ON(!strlen(cmd)))
7576 		return 0;
7577 
7578 	found = strstr(cmd, str);
7579 	if (found)
7580 		return found - cmd;
7581 
7582 	return 0;
7583 }
7584 
7585 /**
7586  * tracing_log_err - write an error to the tracing error log
7587  * @tr: The associated trace array for the error (NULL for top level array)
7588  * @loc: A string describing where the error occurred
7589  * @cmd: The tracing command that caused the error
7590  * @errs: The array of loc-specific static error strings
7591  * @type: The index into errs[], which produces the specific static err string
7592  * @pos: The position the caret should be placed in the cmd
7593  *
7594  * Writes an error into tracing/error_log of the form:
7595  *
7596  * <loc>: error: <text>
7597  *   Command: <cmd>
7598  *              ^
7599  *
7600  * tracing/error_log is a small log file containing the last
7601  * TRACING_LOG_ERRS_MAX errors (8).  Memory for errors isn't allocated
7602  * unless there has been a tracing error, and the error log can be
7603  * cleared and have its memory freed by writing the empty string in
7604  * truncation mode to it i.e. echo > tracing/error_log.
7605  *
7606  * NOTE: the @errs array along with the @type param are used to
7607  * produce a static error string - this string is not copied and saved
7608  * when the error is logged - only a pointer to it is saved.  See
7609  * existing callers for examples of how static strings are typically
7610  * defined for use with tracing_log_err().
7611  */
7612 void tracing_log_err(struct trace_array *tr,
7613 		     const char *loc, const char *cmd,
7614 		     const char **errs, u8 type, u8 pos)
7615 {
7616 	struct tracing_log_err *err;
7617 
7618 	if (!tr)
7619 		tr = &global_trace;
7620 
7621 	mutex_lock(&tracing_err_log_lock);
7622 	err = get_tracing_log_err(tr);
7623 	if (PTR_ERR(err) == -ENOMEM) {
7624 		mutex_unlock(&tracing_err_log_lock);
7625 		return;
7626 	}
7627 
7628 	snprintf(err->loc, TRACING_LOG_LOC_MAX, "%s: error: ", loc);
7629 	snprintf(err->cmd, MAX_FILTER_STR_VAL,"\n" CMD_PREFIX "%s\n", cmd);
7630 
7631 	err->info.errs = errs;
7632 	err->info.type = type;
7633 	err->info.pos = pos;
7634 	err->info.ts = local_clock();
7635 
7636 	list_add_tail(&err->list, &tr->err_log);
7637 	mutex_unlock(&tracing_err_log_lock);
7638 }
7639 
7640 static void clear_tracing_err_log(struct trace_array *tr)
7641 {
7642 	struct tracing_log_err *err, *next;
7643 
7644 	mutex_lock(&tracing_err_log_lock);
7645 	list_for_each_entry_safe(err, next, &tr->err_log, list) {
7646 		list_del(&err->list);
7647 		kfree(err);
7648 	}
7649 
7650 	tr->n_err_log_entries = 0;
7651 	mutex_unlock(&tracing_err_log_lock);
7652 }
7653 
7654 static void *tracing_err_log_seq_start(struct seq_file *m, loff_t *pos)
7655 {
7656 	struct trace_array *tr = m->private;
7657 
7658 	mutex_lock(&tracing_err_log_lock);
7659 
7660 	return seq_list_start(&tr->err_log, *pos);
7661 }
7662 
7663 static void *tracing_err_log_seq_next(struct seq_file *m, void *v, loff_t *pos)
7664 {
7665 	struct trace_array *tr = m->private;
7666 
7667 	return seq_list_next(v, &tr->err_log, pos);
7668 }
7669 
7670 static void tracing_err_log_seq_stop(struct seq_file *m, void *v)
7671 {
7672 	mutex_unlock(&tracing_err_log_lock);
7673 }
7674 
7675 static void tracing_err_log_show_pos(struct seq_file *m, u8 pos)
7676 {
7677 	u8 i;
7678 
7679 	for (i = 0; i < sizeof(CMD_PREFIX) - 1; i++)
7680 		seq_putc(m, ' ');
7681 	for (i = 0; i < pos; i++)
7682 		seq_putc(m, ' ');
7683 	seq_puts(m, "^\n");
7684 }
7685 
7686 static int tracing_err_log_seq_show(struct seq_file *m, void *v)
7687 {
7688 	struct tracing_log_err *err = v;
7689 
7690 	if (err) {
7691 		const char *err_text = err->info.errs[err->info.type];
7692 		u64 sec = err->info.ts;
7693 		u32 nsec;
7694 
7695 		nsec = do_div(sec, NSEC_PER_SEC);
7696 		seq_printf(m, "[%5llu.%06u] %s%s", sec, nsec / 1000,
7697 			   err->loc, err_text);
7698 		seq_printf(m, "%s", err->cmd);
7699 		tracing_err_log_show_pos(m, err->info.pos);
7700 	}
7701 
7702 	return 0;
7703 }
7704 
7705 static const struct seq_operations tracing_err_log_seq_ops = {
7706 	.start  = tracing_err_log_seq_start,
7707 	.next   = tracing_err_log_seq_next,
7708 	.stop   = tracing_err_log_seq_stop,
7709 	.show   = tracing_err_log_seq_show
7710 };
7711 
7712 static int tracing_err_log_open(struct inode *inode, struct file *file)
7713 {
7714 	struct trace_array *tr = inode->i_private;
7715 	int ret = 0;
7716 
7717 	ret = tracing_check_open_get_tr(tr);
7718 	if (ret)
7719 		return ret;
7720 
7721 	/* If this file was opened for write, then erase contents */
7722 	if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC))
7723 		clear_tracing_err_log(tr);
7724 
7725 	if (file->f_mode & FMODE_READ) {
7726 		ret = seq_open(file, &tracing_err_log_seq_ops);
7727 		if (!ret) {
7728 			struct seq_file *m = file->private_data;
7729 			m->private = tr;
7730 		} else {
7731 			trace_array_put(tr);
7732 		}
7733 	}
7734 	return ret;
7735 }
7736 
7737 static ssize_t tracing_err_log_write(struct file *file,
7738 				     const char __user *buffer,
7739 				     size_t count, loff_t *ppos)
7740 {
7741 	return count;
7742 }
7743 
7744 static int tracing_err_log_release(struct inode *inode, struct file *file)
7745 {
7746 	struct trace_array *tr = inode->i_private;
7747 
7748 	trace_array_put(tr);
7749 
7750 	if (file->f_mode & FMODE_READ)
7751 		seq_release(inode, file);
7752 
7753 	return 0;
7754 }
7755 
7756 static const struct file_operations tracing_err_log_fops = {
7757 	.open           = tracing_err_log_open,
7758 	.write		= tracing_err_log_write,
7759 	.read           = seq_read,
7760 	.llseek         = seq_lseek,
7761 	.release        = tracing_err_log_release,
7762 };
7763 
7764 static int tracing_buffers_open(struct inode *inode, struct file *filp)
7765 {
7766 	struct trace_array *tr = inode->i_private;
7767 	struct ftrace_buffer_info *info;
7768 	int ret;
7769 
7770 	ret = tracing_check_open_get_tr(tr);
7771 	if (ret)
7772 		return ret;
7773 
7774 	info = kvzalloc(sizeof(*info), GFP_KERNEL);
7775 	if (!info) {
7776 		trace_array_put(tr);
7777 		return -ENOMEM;
7778 	}
7779 
7780 	mutex_lock(&trace_types_lock);
7781 
7782 	info->iter.tr		= tr;
7783 	info->iter.cpu_file	= tracing_get_cpu(inode);
7784 	info->iter.trace	= tr->current_trace;
7785 	info->iter.array_buffer = &tr->array_buffer;
7786 	info->spare		= NULL;
7787 	/* Force reading ring buffer for first read */
7788 	info->read		= (unsigned int)-1;
7789 
7790 	filp->private_data = info;
7791 
7792 	tr->trace_ref++;
7793 
7794 	mutex_unlock(&trace_types_lock);
7795 
7796 	ret = nonseekable_open(inode, filp);
7797 	if (ret < 0)
7798 		trace_array_put(tr);
7799 
7800 	return ret;
7801 }
7802 
7803 static __poll_t
7804 tracing_buffers_poll(struct file *filp, poll_table *poll_table)
7805 {
7806 	struct ftrace_buffer_info *info = filp->private_data;
7807 	struct trace_iterator *iter = &info->iter;
7808 
7809 	return trace_poll(iter, filp, poll_table);
7810 }
7811 
7812 static ssize_t
7813 tracing_buffers_read(struct file *filp, char __user *ubuf,
7814 		     size_t count, loff_t *ppos)
7815 {
7816 	struct ftrace_buffer_info *info = filp->private_data;
7817 	struct trace_iterator *iter = &info->iter;
7818 	ssize_t ret = 0;
7819 	ssize_t size;
7820 
7821 	if (!count)
7822 		return 0;
7823 
7824 #ifdef CONFIG_TRACER_MAX_TRACE
7825 	if (iter->snapshot && iter->tr->current_trace->use_max_tr)
7826 		return -EBUSY;
7827 #endif
7828 
7829 	if (!info->spare) {
7830 		info->spare = ring_buffer_alloc_read_page(iter->array_buffer->buffer,
7831 							  iter->cpu_file);
7832 		if (IS_ERR(info->spare)) {
7833 			ret = PTR_ERR(info->spare);
7834 			info->spare = NULL;
7835 		} else {
7836 			info->spare_cpu = iter->cpu_file;
7837 		}
7838 	}
7839 	if (!info->spare)
7840 		return ret;
7841 
7842 	/* Do we have previous read data to read? */
7843 	if (info->read < PAGE_SIZE)
7844 		goto read;
7845 
7846  again:
7847 	trace_access_lock(iter->cpu_file);
7848 	ret = ring_buffer_read_page(iter->array_buffer->buffer,
7849 				    &info->spare,
7850 				    count,
7851 				    iter->cpu_file, 0);
7852 	trace_access_unlock(iter->cpu_file);
7853 
7854 	if (ret < 0) {
7855 		if (trace_empty(iter)) {
7856 			if ((filp->f_flags & O_NONBLOCK))
7857 				return -EAGAIN;
7858 
7859 			ret = wait_on_pipe(iter, 0);
7860 			if (ret)
7861 				return ret;
7862 
7863 			goto again;
7864 		}
7865 		return 0;
7866 	}
7867 
7868 	info->read = 0;
7869  read:
7870 	size = PAGE_SIZE - info->read;
7871 	if (size > count)
7872 		size = count;
7873 
7874 	ret = copy_to_user(ubuf, info->spare + info->read, size);
7875 	if (ret == size)
7876 		return -EFAULT;
7877 
7878 	size -= ret;
7879 
7880 	*ppos += size;
7881 	info->read += size;
7882 
7883 	return size;
7884 }
7885 
7886 static int tracing_buffers_release(struct inode *inode, struct file *file)
7887 {
7888 	struct ftrace_buffer_info *info = file->private_data;
7889 	struct trace_iterator *iter = &info->iter;
7890 
7891 	mutex_lock(&trace_types_lock);
7892 
7893 	iter->tr->trace_ref--;
7894 
7895 	__trace_array_put(iter->tr);
7896 
7897 	if (info->spare)
7898 		ring_buffer_free_read_page(iter->array_buffer->buffer,
7899 					   info->spare_cpu, info->spare);
7900 	kvfree(info);
7901 
7902 	mutex_unlock(&trace_types_lock);
7903 
7904 	return 0;
7905 }
7906 
7907 struct buffer_ref {
7908 	struct trace_buffer	*buffer;
7909 	void			*page;
7910 	int			cpu;
7911 	refcount_t		refcount;
7912 };
7913 
7914 static void buffer_ref_release(struct buffer_ref *ref)
7915 {
7916 	if (!refcount_dec_and_test(&ref->refcount))
7917 		return;
7918 	ring_buffer_free_read_page(ref->buffer, ref->cpu, ref->page);
7919 	kfree(ref);
7920 }
7921 
7922 static void buffer_pipe_buf_release(struct pipe_inode_info *pipe,
7923 				    struct pipe_buffer *buf)
7924 {
7925 	struct buffer_ref *ref = (struct buffer_ref *)buf->private;
7926 
7927 	buffer_ref_release(ref);
7928 	buf->private = 0;
7929 }
7930 
7931 static bool buffer_pipe_buf_get(struct pipe_inode_info *pipe,
7932 				struct pipe_buffer *buf)
7933 {
7934 	struct buffer_ref *ref = (struct buffer_ref *)buf->private;
7935 
7936 	if (refcount_read(&ref->refcount) > INT_MAX/2)
7937 		return false;
7938 
7939 	refcount_inc(&ref->refcount);
7940 	return true;
7941 }
7942 
7943 /* Pipe buffer operations for a buffer. */
7944 static const struct pipe_buf_operations buffer_pipe_buf_ops = {
7945 	.release		= buffer_pipe_buf_release,
7946 	.get			= buffer_pipe_buf_get,
7947 };
7948 
7949 /*
7950  * Callback from splice_to_pipe(), if we need to release some pages
7951  * at the end of the spd in case we error'ed out in filling the pipe.
7952  */
7953 static void buffer_spd_release(struct splice_pipe_desc *spd, unsigned int i)
7954 {
7955 	struct buffer_ref *ref =
7956 		(struct buffer_ref *)spd->partial[i].private;
7957 
7958 	buffer_ref_release(ref);
7959 	spd->partial[i].private = 0;
7960 }
7961 
7962 static ssize_t
7963 tracing_buffers_splice_read(struct file *file, loff_t *ppos,
7964 			    struct pipe_inode_info *pipe, size_t len,
7965 			    unsigned int flags)
7966 {
7967 	struct ftrace_buffer_info *info = file->private_data;
7968 	struct trace_iterator *iter = &info->iter;
7969 	struct partial_page partial_def[PIPE_DEF_BUFFERS];
7970 	struct page *pages_def[PIPE_DEF_BUFFERS];
7971 	struct splice_pipe_desc spd = {
7972 		.pages		= pages_def,
7973 		.partial	= partial_def,
7974 		.nr_pages_max	= PIPE_DEF_BUFFERS,
7975 		.ops		= &buffer_pipe_buf_ops,
7976 		.spd_release	= buffer_spd_release,
7977 	};
7978 	struct buffer_ref *ref;
7979 	int entries, i;
7980 	ssize_t ret = 0;
7981 
7982 #ifdef CONFIG_TRACER_MAX_TRACE
7983 	if (iter->snapshot && iter->tr->current_trace->use_max_tr)
7984 		return -EBUSY;
7985 #endif
7986 
7987 	if (*ppos & (PAGE_SIZE - 1))
7988 		return -EINVAL;
7989 
7990 	if (len & (PAGE_SIZE - 1)) {
7991 		if (len < PAGE_SIZE)
7992 			return -EINVAL;
7993 		len &= PAGE_MASK;
7994 	}
7995 
7996 	if (splice_grow_spd(pipe, &spd))
7997 		return -ENOMEM;
7998 
7999  again:
8000 	trace_access_lock(iter->cpu_file);
8001 	entries = ring_buffer_entries_cpu(iter->array_buffer->buffer, iter->cpu_file);
8002 
8003 	for (i = 0; i < spd.nr_pages_max && len && entries; i++, len -= PAGE_SIZE) {
8004 		struct page *page;
8005 		int r;
8006 
8007 		ref = kzalloc(sizeof(*ref), GFP_KERNEL);
8008 		if (!ref) {
8009 			ret = -ENOMEM;
8010 			break;
8011 		}
8012 
8013 		refcount_set(&ref->refcount, 1);
8014 		ref->buffer = iter->array_buffer->buffer;
8015 		ref->page = ring_buffer_alloc_read_page(ref->buffer, iter->cpu_file);
8016 		if (IS_ERR(ref->page)) {
8017 			ret = PTR_ERR(ref->page);
8018 			ref->page = NULL;
8019 			kfree(ref);
8020 			break;
8021 		}
8022 		ref->cpu = iter->cpu_file;
8023 
8024 		r = ring_buffer_read_page(ref->buffer, &ref->page,
8025 					  len, iter->cpu_file, 1);
8026 		if (r < 0) {
8027 			ring_buffer_free_read_page(ref->buffer, ref->cpu,
8028 						   ref->page);
8029 			kfree(ref);
8030 			break;
8031 		}
8032 
8033 		page = virt_to_page(ref->page);
8034 
8035 		spd.pages[i] = page;
8036 		spd.partial[i].len = PAGE_SIZE;
8037 		spd.partial[i].offset = 0;
8038 		spd.partial[i].private = (unsigned long)ref;
8039 		spd.nr_pages++;
8040 		*ppos += PAGE_SIZE;
8041 
8042 		entries = ring_buffer_entries_cpu(iter->array_buffer->buffer, iter->cpu_file);
8043 	}
8044 
8045 	trace_access_unlock(iter->cpu_file);
8046 	spd.nr_pages = i;
8047 
8048 	/* did we read anything? */
8049 	if (!spd.nr_pages) {
8050 		if (ret)
8051 			goto out;
8052 
8053 		ret = -EAGAIN;
8054 		if ((file->f_flags & O_NONBLOCK) || (flags & SPLICE_F_NONBLOCK))
8055 			goto out;
8056 
8057 		ret = wait_on_pipe(iter, iter->tr->buffer_percent);
8058 		if (ret)
8059 			goto out;
8060 
8061 		goto again;
8062 	}
8063 
8064 	ret = splice_to_pipe(pipe, &spd);
8065 out:
8066 	splice_shrink_spd(&spd);
8067 
8068 	return ret;
8069 }
8070 
8071 static const struct file_operations tracing_buffers_fops = {
8072 	.open		= tracing_buffers_open,
8073 	.read		= tracing_buffers_read,
8074 	.poll		= tracing_buffers_poll,
8075 	.release	= tracing_buffers_release,
8076 	.splice_read	= tracing_buffers_splice_read,
8077 	.llseek		= no_llseek,
8078 };
8079 
8080 static ssize_t
8081 tracing_stats_read(struct file *filp, char __user *ubuf,
8082 		   size_t count, loff_t *ppos)
8083 {
8084 	struct inode *inode = file_inode(filp);
8085 	struct trace_array *tr = inode->i_private;
8086 	struct array_buffer *trace_buf = &tr->array_buffer;
8087 	int cpu = tracing_get_cpu(inode);
8088 	struct trace_seq *s;
8089 	unsigned long cnt;
8090 	unsigned long long t;
8091 	unsigned long usec_rem;
8092 
8093 	s = kmalloc(sizeof(*s), GFP_KERNEL);
8094 	if (!s)
8095 		return -ENOMEM;
8096 
8097 	trace_seq_init(s);
8098 
8099 	cnt = ring_buffer_entries_cpu(trace_buf->buffer, cpu);
8100 	trace_seq_printf(s, "entries: %ld\n", cnt);
8101 
8102 	cnt = ring_buffer_overrun_cpu(trace_buf->buffer, cpu);
8103 	trace_seq_printf(s, "overrun: %ld\n", cnt);
8104 
8105 	cnt = ring_buffer_commit_overrun_cpu(trace_buf->buffer, cpu);
8106 	trace_seq_printf(s, "commit overrun: %ld\n", cnt);
8107 
8108 	cnt = ring_buffer_bytes_cpu(trace_buf->buffer, cpu);
8109 	trace_seq_printf(s, "bytes: %ld\n", cnt);
8110 
8111 	if (trace_clocks[tr->clock_id].in_ns) {
8112 		/* local or global for trace_clock */
8113 		t = ns2usecs(ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
8114 		usec_rem = do_div(t, USEC_PER_SEC);
8115 		trace_seq_printf(s, "oldest event ts: %5llu.%06lu\n",
8116 								t, usec_rem);
8117 
8118 		t = ns2usecs(ring_buffer_time_stamp(trace_buf->buffer));
8119 		usec_rem = do_div(t, USEC_PER_SEC);
8120 		trace_seq_printf(s, "now ts: %5llu.%06lu\n", t, usec_rem);
8121 	} else {
8122 		/* counter or tsc mode for trace_clock */
8123 		trace_seq_printf(s, "oldest event ts: %llu\n",
8124 				ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
8125 
8126 		trace_seq_printf(s, "now ts: %llu\n",
8127 				ring_buffer_time_stamp(trace_buf->buffer));
8128 	}
8129 
8130 	cnt = ring_buffer_dropped_events_cpu(trace_buf->buffer, cpu);
8131 	trace_seq_printf(s, "dropped events: %ld\n", cnt);
8132 
8133 	cnt = ring_buffer_read_events_cpu(trace_buf->buffer, cpu);
8134 	trace_seq_printf(s, "read events: %ld\n", cnt);
8135 
8136 	count = simple_read_from_buffer(ubuf, count, ppos,
8137 					s->buffer, trace_seq_used(s));
8138 
8139 	kfree(s);
8140 
8141 	return count;
8142 }
8143 
8144 static const struct file_operations tracing_stats_fops = {
8145 	.open		= tracing_open_generic_tr,
8146 	.read		= tracing_stats_read,
8147 	.llseek		= generic_file_llseek,
8148 	.release	= tracing_release_generic_tr,
8149 };
8150 
8151 #ifdef CONFIG_DYNAMIC_FTRACE
8152 
8153 static ssize_t
8154 tracing_read_dyn_info(struct file *filp, char __user *ubuf,
8155 		  size_t cnt, loff_t *ppos)
8156 {
8157 	ssize_t ret;
8158 	char *buf;
8159 	int r;
8160 
8161 	/* 256 should be plenty to hold the amount needed */
8162 	buf = kmalloc(256, GFP_KERNEL);
8163 	if (!buf)
8164 		return -ENOMEM;
8165 
8166 	r = scnprintf(buf, 256, "%ld pages:%ld groups: %ld\n",
8167 		      ftrace_update_tot_cnt,
8168 		      ftrace_number_of_pages,
8169 		      ftrace_number_of_groups);
8170 
8171 	ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
8172 	kfree(buf);
8173 	return ret;
8174 }
8175 
8176 static const struct file_operations tracing_dyn_info_fops = {
8177 	.open		= tracing_open_generic,
8178 	.read		= tracing_read_dyn_info,
8179 	.llseek		= generic_file_llseek,
8180 };
8181 #endif /* CONFIG_DYNAMIC_FTRACE */
8182 
8183 #if defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE)
8184 static void
8185 ftrace_snapshot(unsigned long ip, unsigned long parent_ip,
8186 		struct trace_array *tr, struct ftrace_probe_ops *ops,
8187 		void *data)
8188 {
8189 	tracing_snapshot_instance(tr);
8190 }
8191 
8192 static void
8193 ftrace_count_snapshot(unsigned long ip, unsigned long parent_ip,
8194 		      struct trace_array *tr, struct ftrace_probe_ops *ops,
8195 		      void *data)
8196 {
8197 	struct ftrace_func_mapper *mapper = data;
8198 	long *count = NULL;
8199 
8200 	if (mapper)
8201 		count = (long *)ftrace_func_mapper_find_ip(mapper, ip);
8202 
8203 	if (count) {
8204 
8205 		if (*count <= 0)
8206 			return;
8207 
8208 		(*count)--;
8209 	}
8210 
8211 	tracing_snapshot_instance(tr);
8212 }
8213 
8214 static int
8215 ftrace_snapshot_print(struct seq_file *m, unsigned long ip,
8216 		      struct ftrace_probe_ops *ops, void *data)
8217 {
8218 	struct ftrace_func_mapper *mapper = data;
8219 	long *count = NULL;
8220 
8221 	seq_printf(m, "%ps:", (void *)ip);
8222 
8223 	seq_puts(m, "snapshot");
8224 
8225 	if (mapper)
8226 		count = (long *)ftrace_func_mapper_find_ip(mapper, ip);
8227 
8228 	if (count)
8229 		seq_printf(m, ":count=%ld\n", *count);
8230 	else
8231 		seq_puts(m, ":unlimited\n");
8232 
8233 	return 0;
8234 }
8235 
8236 static int
8237 ftrace_snapshot_init(struct ftrace_probe_ops *ops, struct trace_array *tr,
8238 		     unsigned long ip, void *init_data, void **data)
8239 {
8240 	struct ftrace_func_mapper *mapper = *data;
8241 
8242 	if (!mapper) {
8243 		mapper = allocate_ftrace_func_mapper();
8244 		if (!mapper)
8245 			return -ENOMEM;
8246 		*data = mapper;
8247 	}
8248 
8249 	return ftrace_func_mapper_add_ip(mapper, ip, init_data);
8250 }
8251 
8252 static void
8253 ftrace_snapshot_free(struct ftrace_probe_ops *ops, struct trace_array *tr,
8254 		     unsigned long ip, void *data)
8255 {
8256 	struct ftrace_func_mapper *mapper = data;
8257 
8258 	if (!ip) {
8259 		if (!mapper)
8260 			return;
8261 		free_ftrace_func_mapper(mapper, NULL);
8262 		return;
8263 	}
8264 
8265 	ftrace_func_mapper_remove_ip(mapper, ip);
8266 }
8267 
8268 static struct ftrace_probe_ops snapshot_probe_ops = {
8269 	.func			= ftrace_snapshot,
8270 	.print			= ftrace_snapshot_print,
8271 };
8272 
8273 static struct ftrace_probe_ops snapshot_count_probe_ops = {
8274 	.func			= ftrace_count_snapshot,
8275 	.print			= ftrace_snapshot_print,
8276 	.init			= ftrace_snapshot_init,
8277 	.free			= ftrace_snapshot_free,
8278 };
8279 
8280 static int
8281 ftrace_trace_snapshot_callback(struct trace_array *tr, struct ftrace_hash *hash,
8282 			       char *glob, char *cmd, char *param, int enable)
8283 {
8284 	struct ftrace_probe_ops *ops;
8285 	void *count = (void *)-1;
8286 	char *number;
8287 	int ret;
8288 
8289 	if (!tr)
8290 		return -ENODEV;
8291 
8292 	/* hash funcs only work with set_ftrace_filter */
8293 	if (!enable)
8294 		return -EINVAL;
8295 
8296 	ops = param ? &snapshot_count_probe_ops :  &snapshot_probe_ops;
8297 
8298 	if (glob[0] == '!')
8299 		return unregister_ftrace_function_probe_func(glob+1, tr, ops);
8300 
8301 	if (!param)
8302 		goto out_reg;
8303 
8304 	number = strsep(&param, ":");
8305 
8306 	if (!strlen(number))
8307 		goto out_reg;
8308 
8309 	/*
8310 	 * We use the callback data field (which is a pointer)
8311 	 * as our counter.
8312 	 */
8313 	ret = kstrtoul(number, 0, (unsigned long *)&count);
8314 	if (ret)
8315 		return ret;
8316 
8317  out_reg:
8318 	ret = tracing_alloc_snapshot_instance(tr);
8319 	if (ret < 0)
8320 		goto out;
8321 
8322 	ret = register_ftrace_function_probe(glob, tr, ops, count);
8323 
8324  out:
8325 	return ret < 0 ? ret : 0;
8326 }
8327 
8328 static struct ftrace_func_command ftrace_snapshot_cmd = {
8329 	.name			= "snapshot",
8330 	.func			= ftrace_trace_snapshot_callback,
8331 };
8332 
8333 static __init int register_snapshot_cmd(void)
8334 {
8335 	return register_ftrace_command(&ftrace_snapshot_cmd);
8336 }
8337 #else
8338 static inline __init int register_snapshot_cmd(void) { return 0; }
8339 #endif /* defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE) */
8340 
8341 static struct dentry *tracing_get_dentry(struct trace_array *tr)
8342 {
8343 	if (WARN_ON(!tr->dir))
8344 		return ERR_PTR(-ENODEV);
8345 
8346 	/* Top directory uses NULL as the parent */
8347 	if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
8348 		return NULL;
8349 
8350 	/* All sub buffers have a descriptor */
8351 	return tr->dir;
8352 }
8353 
8354 static struct dentry *tracing_dentry_percpu(struct trace_array *tr, int cpu)
8355 {
8356 	struct dentry *d_tracer;
8357 
8358 	if (tr->percpu_dir)
8359 		return tr->percpu_dir;
8360 
8361 	d_tracer = tracing_get_dentry(tr);
8362 	if (IS_ERR(d_tracer))
8363 		return NULL;
8364 
8365 	tr->percpu_dir = tracefs_create_dir("per_cpu", d_tracer);
8366 
8367 	MEM_FAIL(!tr->percpu_dir,
8368 		  "Could not create tracefs directory 'per_cpu/%d'\n", cpu);
8369 
8370 	return tr->percpu_dir;
8371 }
8372 
8373 static struct dentry *
8374 trace_create_cpu_file(const char *name, umode_t mode, struct dentry *parent,
8375 		      void *data, long cpu, const struct file_operations *fops)
8376 {
8377 	struct dentry *ret = trace_create_file(name, mode, parent, data, fops);
8378 
8379 	if (ret) /* See tracing_get_cpu() */
8380 		d_inode(ret)->i_cdev = (void *)(cpu + 1);
8381 	return ret;
8382 }
8383 
8384 static void
8385 tracing_init_tracefs_percpu(struct trace_array *tr, long cpu)
8386 {
8387 	struct dentry *d_percpu = tracing_dentry_percpu(tr, cpu);
8388 	struct dentry *d_cpu;
8389 	char cpu_dir[30]; /* 30 characters should be more than enough */
8390 
8391 	if (!d_percpu)
8392 		return;
8393 
8394 	snprintf(cpu_dir, 30, "cpu%ld", cpu);
8395 	d_cpu = tracefs_create_dir(cpu_dir, d_percpu);
8396 	if (!d_cpu) {
8397 		pr_warn("Could not create tracefs '%s' entry\n", cpu_dir);
8398 		return;
8399 	}
8400 
8401 	/* per cpu trace_pipe */
8402 	trace_create_cpu_file("trace_pipe", 0444, d_cpu,
8403 				tr, cpu, &tracing_pipe_fops);
8404 
8405 	/* per cpu trace */
8406 	trace_create_cpu_file("trace", 0644, d_cpu,
8407 				tr, cpu, &tracing_fops);
8408 
8409 	trace_create_cpu_file("trace_pipe_raw", 0444, d_cpu,
8410 				tr, cpu, &tracing_buffers_fops);
8411 
8412 	trace_create_cpu_file("stats", 0444, d_cpu,
8413 				tr, cpu, &tracing_stats_fops);
8414 
8415 	trace_create_cpu_file("buffer_size_kb", 0444, d_cpu,
8416 				tr, cpu, &tracing_entries_fops);
8417 
8418 #ifdef CONFIG_TRACER_SNAPSHOT
8419 	trace_create_cpu_file("snapshot", 0644, d_cpu,
8420 				tr, cpu, &snapshot_fops);
8421 
8422 	trace_create_cpu_file("snapshot_raw", 0444, d_cpu,
8423 				tr, cpu, &snapshot_raw_fops);
8424 #endif
8425 }
8426 
8427 #ifdef CONFIG_FTRACE_SELFTEST
8428 /* Let selftest have access to static functions in this file */
8429 #include "trace_selftest.c"
8430 #endif
8431 
8432 static ssize_t
8433 trace_options_read(struct file *filp, char __user *ubuf, size_t cnt,
8434 			loff_t *ppos)
8435 {
8436 	struct trace_option_dentry *topt = filp->private_data;
8437 	char *buf;
8438 
8439 	if (topt->flags->val & topt->opt->bit)
8440 		buf = "1\n";
8441 	else
8442 		buf = "0\n";
8443 
8444 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
8445 }
8446 
8447 static ssize_t
8448 trace_options_write(struct file *filp, const char __user *ubuf, size_t cnt,
8449 			 loff_t *ppos)
8450 {
8451 	struct trace_option_dentry *topt = filp->private_data;
8452 	unsigned long val;
8453 	int ret;
8454 
8455 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
8456 	if (ret)
8457 		return ret;
8458 
8459 	if (val != 0 && val != 1)
8460 		return -EINVAL;
8461 
8462 	if (!!(topt->flags->val & topt->opt->bit) != val) {
8463 		mutex_lock(&trace_types_lock);
8464 		ret = __set_tracer_option(topt->tr, topt->flags,
8465 					  topt->opt, !val);
8466 		mutex_unlock(&trace_types_lock);
8467 		if (ret)
8468 			return ret;
8469 	}
8470 
8471 	*ppos += cnt;
8472 
8473 	return cnt;
8474 }
8475 
8476 
8477 static const struct file_operations trace_options_fops = {
8478 	.open = tracing_open_generic,
8479 	.read = trace_options_read,
8480 	.write = trace_options_write,
8481 	.llseek	= generic_file_llseek,
8482 };
8483 
8484 /*
8485  * In order to pass in both the trace_array descriptor as well as the index
8486  * to the flag that the trace option file represents, the trace_array
8487  * has a character array of trace_flags_index[], which holds the index
8488  * of the bit for the flag it represents. index[0] == 0, index[1] == 1, etc.
8489  * The address of this character array is passed to the flag option file
8490  * read/write callbacks.
8491  *
8492  * In order to extract both the index and the trace_array descriptor,
8493  * get_tr_index() uses the following algorithm.
8494  *
8495  *   idx = *ptr;
8496  *
8497  * As the pointer itself contains the address of the index (remember
8498  * index[1] == 1).
8499  *
8500  * Then to get the trace_array descriptor, by subtracting that index
8501  * from the ptr, we get to the start of the index itself.
8502  *
8503  *   ptr - idx == &index[0]
8504  *
8505  * Then a simple container_of() from that pointer gets us to the
8506  * trace_array descriptor.
8507  */
8508 static void get_tr_index(void *data, struct trace_array **ptr,
8509 			 unsigned int *pindex)
8510 {
8511 	*pindex = *(unsigned char *)data;
8512 
8513 	*ptr = container_of(data - *pindex, struct trace_array,
8514 			    trace_flags_index);
8515 }
8516 
8517 static ssize_t
8518 trace_options_core_read(struct file *filp, char __user *ubuf, size_t cnt,
8519 			loff_t *ppos)
8520 {
8521 	void *tr_index = filp->private_data;
8522 	struct trace_array *tr;
8523 	unsigned int index;
8524 	char *buf;
8525 
8526 	get_tr_index(tr_index, &tr, &index);
8527 
8528 	if (tr->trace_flags & (1 << index))
8529 		buf = "1\n";
8530 	else
8531 		buf = "0\n";
8532 
8533 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
8534 }
8535 
8536 static ssize_t
8537 trace_options_core_write(struct file *filp, const char __user *ubuf, size_t cnt,
8538 			 loff_t *ppos)
8539 {
8540 	void *tr_index = filp->private_data;
8541 	struct trace_array *tr;
8542 	unsigned int index;
8543 	unsigned long val;
8544 	int ret;
8545 
8546 	get_tr_index(tr_index, &tr, &index);
8547 
8548 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
8549 	if (ret)
8550 		return ret;
8551 
8552 	if (val != 0 && val != 1)
8553 		return -EINVAL;
8554 
8555 	mutex_lock(&event_mutex);
8556 	mutex_lock(&trace_types_lock);
8557 	ret = set_tracer_flag(tr, 1 << index, val);
8558 	mutex_unlock(&trace_types_lock);
8559 	mutex_unlock(&event_mutex);
8560 
8561 	if (ret < 0)
8562 		return ret;
8563 
8564 	*ppos += cnt;
8565 
8566 	return cnt;
8567 }
8568 
8569 static const struct file_operations trace_options_core_fops = {
8570 	.open = tracing_open_generic,
8571 	.read = trace_options_core_read,
8572 	.write = trace_options_core_write,
8573 	.llseek = generic_file_llseek,
8574 };
8575 
8576 struct dentry *trace_create_file(const char *name,
8577 				 umode_t mode,
8578 				 struct dentry *parent,
8579 				 void *data,
8580 				 const struct file_operations *fops)
8581 {
8582 	struct dentry *ret;
8583 
8584 	ret = tracefs_create_file(name, mode, parent, data, fops);
8585 	if (!ret)
8586 		pr_warn("Could not create tracefs '%s' entry\n", name);
8587 
8588 	return ret;
8589 }
8590 
8591 
8592 static struct dentry *trace_options_init_dentry(struct trace_array *tr)
8593 {
8594 	struct dentry *d_tracer;
8595 
8596 	if (tr->options)
8597 		return tr->options;
8598 
8599 	d_tracer = tracing_get_dentry(tr);
8600 	if (IS_ERR(d_tracer))
8601 		return NULL;
8602 
8603 	tr->options = tracefs_create_dir("options", d_tracer);
8604 	if (!tr->options) {
8605 		pr_warn("Could not create tracefs directory 'options'\n");
8606 		return NULL;
8607 	}
8608 
8609 	return tr->options;
8610 }
8611 
8612 static void
8613 create_trace_option_file(struct trace_array *tr,
8614 			 struct trace_option_dentry *topt,
8615 			 struct tracer_flags *flags,
8616 			 struct tracer_opt *opt)
8617 {
8618 	struct dentry *t_options;
8619 
8620 	t_options = trace_options_init_dentry(tr);
8621 	if (!t_options)
8622 		return;
8623 
8624 	topt->flags = flags;
8625 	topt->opt = opt;
8626 	topt->tr = tr;
8627 
8628 	topt->entry = trace_create_file(opt->name, 0644, t_options, topt,
8629 				    &trace_options_fops);
8630 
8631 }
8632 
8633 static void
8634 create_trace_option_files(struct trace_array *tr, struct tracer *tracer)
8635 {
8636 	struct trace_option_dentry *topts;
8637 	struct trace_options *tr_topts;
8638 	struct tracer_flags *flags;
8639 	struct tracer_opt *opts;
8640 	int cnt;
8641 	int i;
8642 
8643 	if (!tracer)
8644 		return;
8645 
8646 	flags = tracer->flags;
8647 
8648 	if (!flags || !flags->opts)
8649 		return;
8650 
8651 	/*
8652 	 * If this is an instance, only create flags for tracers
8653 	 * the instance may have.
8654 	 */
8655 	if (!trace_ok_for_array(tracer, tr))
8656 		return;
8657 
8658 	for (i = 0; i < tr->nr_topts; i++) {
8659 		/* Make sure there's no duplicate flags. */
8660 		if (WARN_ON_ONCE(tr->topts[i].tracer->flags == tracer->flags))
8661 			return;
8662 	}
8663 
8664 	opts = flags->opts;
8665 
8666 	for (cnt = 0; opts[cnt].name; cnt++)
8667 		;
8668 
8669 	topts = kcalloc(cnt + 1, sizeof(*topts), GFP_KERNEL);
8670 	if (!topts)
8671 		return;
8672 
8673 	tr_topts = krealloc(tr->topts, sizeof(*tr->topts) * (tr->nr_topts + 1),
8674 			    GFP_KERNEL);
8675 	if (!tr_topts) {
8676 		kfree(topts);
8677 		return;
8678 	}
8679 
8680 	tr->topts = tr_topts;
8681 	tr->topts[tr->nr_topts].tracer = tracer;
8682 	tr->topts[tr->nr_topts].topts = topts;
8683 	tr->nr_topts++;
8684 
8685 	for (cnt = 0; opts[cnt].name; cnt++) {
8686 		create_trace_option_file(tr, &topts[cnt], flags,
8687 					 &opts[cnt]);
8688 		MEM_FAIL(topts[cnt].entry == NULL,
8689 			  "Failed to create trace option: %s",
8690 			  opts[cnt].name);
8691 	}
8692 }
8693 
8694 static struct dentry *
8695 create_trace_option_core_file(struct trace_array *tr,
8696 			      const char *option, long index)
8697 {
8698 	struct dentry *t_options;
8699 
8700 	t_options = trace_options_init_dentry(tr);
8701 	if (!t_options)
8702 		return NULL;
8703 
8704 	return trace_create_file(option, 0644, t_options,
8705 				 (void *)&tr->trace_flags_index[index],
8706 				 &trace_options_core_fops);
8707 }
8708 
8709 static void create_trace_options_dir(struct trace_array *tr)
8710 {
8711 	struct dentry *t_options;
8712 	bool top_level = tr == &global_trace;
8713 	int i;
8714 
8715 	t_options = trace_options_init_dentry(tr);
8716 	if (!t_options)
8717 		return;
8718 
8719 	for (i = 0; trace_options[i]; i++) {
8720 		if (top_level ||
8721 		    !((1 << i) & TOP_LEVEL_TRACE_FLAGS))
8722 			create_trace_option_core_file(tr, trace_options[i], i);
8723 	}
8724 }
8725 
8726 static ssize_t
8727 rb_simple_read(struct file *filp, char __user *ubuf,
8728 	       size_t cnt, loff_t *ppos)
8729 {
8730 	struct trace_array *tr = filp->private_data;
8731 	char buf[64];
8732 	int r;
8733 
8734 	r = tracer_tracing_is_on(tr);
8735 	r = sprintf(buf, "%d\n", r);
8736 
8737 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
8738 }
8739 
8740 static ssize_t
8741 rb_simple_write(struct file *filp, const char __user *ubuf,
8742 		size_t cnt, loff_t *ppos)
8743 {
8744 	struct trace_array *tr = filp->private_data;
8745 	struct trace_buffer *buffer = tr->array_buffer.buffer;
8746 	unsigned long val;
8747 	int ret;
8748 
8749 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
8750 	if (ret)
8751 		return ret;
8752 
8753 	if (buffer) {
8754 		mutex_lock(&trace_types_lock);
8755 		if (!!val == tracer_tracing_is_on(tr)) {
8756 			val = 0; /* do nothing */
8757 		} else if (val) {
8758 			tracer_tracing_on(tr);
8759 			if (tr->current_trace->start)
8760 				tr->current_trace->start(tr);
8761 		} else {
8762 			tracer_tracing_off(tr);
8763 			if (tr->current_trace->stop)
8764 				tr->current_trace->stop(tr);
8765 		}
8766 		mutex_unlock(&trace_types_lock);
8767 	}
8768 
8769 	(*ppos)++;
8770 
8771 	return cnt;
8772 }
8773 
8774 static const struct file_operations rb_simple_fops = {
8775 	.open		= tracing_open_generic_tr,
8776 	.read		= rb_simple_read,
8777 	.write		= rb_simple_write,
8778 	.release	= tracing_release_generic_tr,
8779 	.llseek		= default_llseek,
8780 };
8781 
8782 static ssize_t
8783 buffer_percent_read(struct file *filp, char __user *ubuf,
8784 		    size_t cnt, loff_t *ppos)
8785 {
8786 	struct trace_array *tr = filp->private_data;
8787 	char buf[64];
8788 	int r;
8789 
8790 	r = tr->buffer_percent;
8791 	r = sprintf(buf, "%d\n", r);
8792 
8793 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
8794 }
8795 
8796 static ssize_t
8797 buffer_percent_write(struct file *filp, const char __user *ubuf,
8798 		     size_t cnt, loff_t *ppos)
8799 {
8800 	struct trace_array *tr = filp->private_data;
8801 	unsigned long val;
8802 	int ret;
8803 
8804 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
8805 	if (ret)
8806 		return ret;
8807 
8808 	if (val > 100)
8809 		return -EINVAL;
8810 
8811 	if (!val)
8812 		val = 1;
8813 
8814 	tr->buffer_percent = val;
8815 
8816 	(*ppos)++;
8817 
8818 	return cnt;
8819 }
8820 
8821 static const struct file_operations buffer_percent_fops = {
8822 	.open		= tracing_open_generic_tr,
8823 	.read		= buffer_percent_read,
8824 	.write		= buffer_percent_write,
8825 	.release	= tracing_release_generic_tr,
8826 	.llseek		= default_llseek,
8827 };
8828 
8829 static struct dentry *trace_instance_dir;
8830 
8831 static void
8832 init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer);
8833 
8834 static int
8835 allocate_trace_buffer(struct trace_array *tr, struct array_buffer *buf, int size)
8836 {
8837 	enum ring_buffer_flags rb_flags;
8838 
8839 	rb_flags = tr->trace_flags & TRACE_ITER_OVERWRITE ? RB_FL_OVERWRITE : 0;
8840 
8841 	buf->tr = tr;
8842 
8843 	buf->buffer = ring_buffer_alloc(size, rb_flags);
8844 	if (!buf->buffer)
8845 		return -ENOMEM;
8846 
8847 	buf->data = alloc_percpu(struct trace_array_cpu);
8848 	if (!buf->data) {
8849 		ring_buffer_free(buf->buffer);
8850 		buf->buffer = NULL;
8851 		return -ENOMEM;
8852 	}
8853 
8854 	/* Allocate the first page for all buffers */
8855 	set_buffer_entries(&tr->array_buffer,
8856 			   ring_buffer_size(tr->array_buffer.buffer, 0));
8857 
8858 	return 0;
8859 }
8860 
8861 static int allocate_trace_buffers(struct trace_array *tr, int size)
8862 {
8863 	int ret;
8864 
8865 	ret = allocate_trace_buffer(tr, &tr->array_buffer, size);
8866 	if (ret)
8867 		return ret;
8868 
8869 #ifdef CONFIG_TRACER_MAX_TRACE
8870 	ret = allocate_trace_buffer(tr, &tr->max_buffer,
8871 				    allocate_snapshot ? size : 1);
8872 	if (MEM_FAIL(ret, "Failed to allocate trace buffer\n")) {
8873 		ring_buffer_free(tr->array_buffer.buffer);
8874 		tr->array_buffer.buffer = NULL;
8875 		free_percpu(tr->array_buffer.data);
8876 		tr->array_buffer.data = NULL;
8877 		return -ENOMEM;
8878 	}
8879 	tr->allocated_snapshot = allocate_snapshot;
8880 
8881 	/*
8882 	 * Only the top level trace array gets its snapshot allocated
8883 	 * from the kernel command line.
8884 	 */
8885 	allocate_snapshot = false;
8886 #endif
8887 
8888 	return 0;
8889 }
8890 
8891 static void free_trace_buffer(struct array_buffer *buf)
8892 {
8893 	if (buf->buffer) {
8894 		ring_buffer_free(buf->buffer);
8895 		buf->buffer = NULL;
8896 		free_percpu(buf->data);
8897 		buf->data = NULL;
8898 	}
8899 }
8900 
8901 static void free_trace_buffers(struct trace_array *tr)
8902 {
8903 	if (!tr)
8904 		return;
8905 
8906 	free_trace_buffer(&tr->array_buffer);
8907 
8908 #ifdef CONFIG_TRACER_MAX_TRACE
8909 	free_trace_buffer(&tr->max_buffer);
8910 #endif
8911 }
8912 
8913 static void init_trace_flags_index(struct trace_array *tr)
8914 {
8915 	int i;
8916 
8917 	/* Used by the trace options files */
8918 	for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++)
8919 		tr->trace_flags_index[i] = i;
8920 }
8921 
8922 static void __update_tracer_options(struct trace_array *tr)
8923 {
8924 	struct tracer *t;
8925 
8926 	for (t = trace_types; t; t = t->next)
8927 		add_tracer_options(tr, t);
8928 }
8929 
8930 static void update_tracer_options(struct trace_array *tr)
8931 {
8932 	mutex_lock(&trace_types_lock);
8933 	__update_tracer_options(tr);
8934 	mutex_unlock(&trace_types_lock);
8935 }
8936 
8937 /* Must have trace_types_lock held */
8938 struct trace_array *trace_array_find(const char *instance)
8939 {
8940 	struct trace_array *tr, *found = NULL;
8941 
8942 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
8943 		if (tr->name && strcmp(tr->name, instance) == 0) {
8944 			found = tr;
8945 			break;
8946 		}
8947 	}
8948 
8949 	return found;
8950 }
8951 
8952 struct trace_array *trace_array_find_get(const char *instance)
8953 {
8954 	struct trace_array *tr;
8955 
8956 	mutex_lock(&trace_types_lock);
8957 	tr = trace_array_find(instance);
8958 	if (tr)
8959 		tr->ref++;
8960 	mutex_unlock(&trace_types_lock);
8961 
8962 	return tr;
8963 }
8964 
8965 static int trace_array_create_dir(struct trace_array *tr)
8966 {
8967 	int ret;
8968 
8969 	tr->dir = tracefs_create_dir(tr->name, trace_instance_dir);
8970 	if (!tr->dir)
8971 		return -EINVAL;
8972 
8973 	ret = event_trace_add_tracer(tr->dir, tr);
8974 	if (ret)
8975 		tracefs_remove(tr->dir);
8976 
8977 	init_tracer_tracefs(tr, tr->dir);
8978 	__update_tracer_options(tr);
8979 
8980 	return ret;
8981 }
8982 
8983 static struct trace_array *trace_array_create(const char *name)
8984 {
8985 	struct trace_array *tr;
8986 	int ret;
8987 
8988 	ret = -ENOMEM;
8989 	tr = kzalloc(sizeof(*tr), GFP_KERNEL);
8990 	if (!tr)
8991 		return ERR_PTR(ret);
8992 
8993 	tr->name = kstrdup(name, GFP_KERNEL);
8994 	if (!tr->name)
8995 		goto out_free_tr;
8996 
8997 	if (!alloc_cpumask_var(&tr->tracing_cpumask, GFP_KERNEL))
8998 		goto out_free_tr;
8999 
9000 	tr->trace_flags = global_trace.trace_flags & ~ZEROED_TRACE_FLAGS;
9001 
9002 	cpumask_copy(tr->tracing_cpumask, cpu_all_mask);
9003 
9004 	raw_spin_lock_init(&tr->start_lock);
9005 
9006 	tr->max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
9007 
9008 	tr->current_trace = &nop_trace;
9009 
9010 	INIT_LIST_HEAD(&tr->systems);
9011 	INIT_LIST_HEAD(&tr->events);
9012 	INIT_LIST_HEAD(&tr->hist_vars);
9013 	INIT_LIST_HEAD(&tr->err_log);
9014 
9015 	if (allocate_trace_buffers(tr, trace_buf_size) < 0)
9016 		goto out_free_tr;
9017 
9018 	if (ftrace_allocate_ftrace_ops(tr) < 0)
9019 		goto out_free_tr;
9020 
9021 	ftrace_init_trace_array(tr);
9022 
9023 	init_trace_flags_index(tr);
9024 
9025 	if (trace_instance_dir) {
9026 		ret = trace_array_create_dir(tr);
9027 		if (ret)
9028 			goto out_free_tr;
9029 	} else
9030 		__trace_early_add_events(tr);
9031 
9032 	list_add(&tr->list, &ftrace_trace_arrays);
9033 
9034 	tr->ref++;
9035 
9036 	return tr;
9037 
9038  out_free_tr:
9039 	ftrace_free_ftrace_ops(tr);
9040 	free_trace_buffers(tr);
9041 	free_cpumask_var(tr->tracing_cpumask);
9042 	kfree(tr->name);
9043 	kfree(tr);
9044 
9045 	return ERR_PTR(ret);
9046 }
9047 
9048 static int instance_mkdir(const char *name)
9049 {
9050 	struct trace_array *tr;
9051 	int ret;
9052 
9053 	mutex_lock(&event_mutex);
9054 	mutex_lock(&trace_types_lock);
9055 
9056 	ret = -EEXIST;
9057 	if (trace_array_find(name))
9058 		goto out_unlock;
9059 
9060 	tr = trace_array_create(name);
9061 
9062 	ret = PTR_ERR_OR_ZERO(tr);
9063 
9064 out_unlock:
9065 	mutex_unlock(&trace_types_lock);
9066 	mutex_unlock(&event_mutex);
9067 	return ret;
9068 }
9069 
9070 /**
9071  * trace_array_get_by_name - Create/Lookup a trace array, given its name.
9072  * @name: The name of the trace array to be looked up/created.
9073  *
9074  * Returns pointer to trace array with given name.
9075  * NULL, if it cannot be created.
9076  *
9077  * NOTE: This function increments the reference counter associated with the
9078  * trace array returned. This makes sure it cannot be freed while in use.
9079  * Use trace_array_put() once the trace array is no longer needed.
9080  * If the trace_array is to be freed, trace_array_destroy() needs to
9081  * be called after the trace_array_put(), or simply let user space delete
9082  * it from the tracefs instances directory. But until the
9083  * trace_array_put() is called, user space can not delete it.
9084  *
9085  */
9086 struct trace_array *trace_array_get_by_name(const char *name)
9087 {
9088 	struct trace_array *tr;
9089 
9090 	mutex_lock(&event_mutex);
9091 	mutex_lock(&trace_types_lock);
9092 
9093 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
9094 		if (tr->name && strcmp(tr->name, name) == 0)
9095 			goto out_unlock;
9096 	}
9097 
9098 	tr = trace_array_create(name);
9099 
9100 	if (IS_ERR(tr))
9101 		tr = NULL;
9102 out_unlock:
9103 	if (tr)
9104 		tr->ref++;
9105 
9106 	mutex_unlock(&trace_types_lock);
9107 	mutex_unlock(&event_mutex);
9108 	return tr;
9109 }
9110 EXPORT_SYMBOL_GPL(trace_array_get_by_name);
9111 
9112 static int __remove_instance(struct trace_array *tr)
9113 {
9114 	int i;
9115 
9116 	/* Reference counter for a newly created trace array = 1. */
9117 	if (tr->ref > 1 || (tr->current_trace && tr->trace_ref))
9118 		return -EBUSY;
9119 
9120 	list_del(&tr->list);
9121 
9122 	/* Disable all the flags that were enabled coming in */
9123 	for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++) {
9124 		if ((1 << i) & ZEROED_TRACE_FLAGS)
9125 			set_tracer_flag(tr, 1 << i, 0);
9126 	}
9127 
9128 	tracing_set_nop(tr);
9129 	clear_ftrace_function_probes(tr);
9130 	event_trace_del_tracer(tr);
9131 	ftrace_clear_pids(tr);
9132 	ftrace_destroy_function_files(tr);
9133 	tracefs_remove(tr->dir);
9134 	free_percpu(tr->last_func_repeats);
9135 	free_trace_buffers(tr);
9136 
9137 	for (i = 0; i < tr->nr_topts; i++) {
9138 		kfree(tr->topts[i].topts);
9139 	}
9140 	kfree(tr->topts);
9141 
9142 	free_cpumask_var(tr->tracing_cpumask);
9143 	kfree(tr->name);
9144 	kfree(tr);
9145 
9146 	return 0;
9147 }
9148 
9149 int trace_array_destroy(struct trace_array *this_tr)
9150 {
9151 	struct trace_array *tr;
9152 	int ret;
9153 
9154 	if (!this_tr)
9155 		return -EINVAL;
9156 
9157 	mutex_lock(&event_mutex);
9158 	mutex_lock(&trace_types_lock);
9159 
9160 	ret = -ENODEV;
9161 
9162 	/* Making sure trace array exists before destroying it. */
9163 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
9164 		if (tr == this_tr) {
9165 			ret = __remove_instance(tr);
9166 			break;
9167 		}
9168 	}
9169 
9170 	mutex_unlock(&trace_types_lock);
9171 	mutex_unlock(&event_mutex);
9172 
9173 	return ret;
9174 }
9175 EXPORT_SYMBOL_GPL(trace_array_destroy);
9176 
9177 static int instance_rmdir(const char *name)
9178 {
9179 	struct trace_array *tr;
9180 	int ret;
9181 
9182 	mutex_lock(&event_mutex);
9183 	mutex_lock(&trace_types_lock);
9184 
9185 	ret = -ENODEV;
9186 	tr = trace_array_find(name);
9187 	if (tr)
9188 		ret = __remove_instance(tr);
9189 
9190 	mutex_unlock(&trace_types_lock);
9191 	mutex_unlock(&event_mutex);
9192 
9193 	return ret;
9194 }
9195 
9196 static __init void create_trace_instances(struct dentry *d_tracer)
9197 {
9198 	struct trace_array *tr;
9199 
9200 	trace_instance_dir = tracefs_create_instance_dir("instances", d_tracer,
9201 							 instance_mkdir,
9202 							 instance_rmdir);
9203 	if (MEM_FAIL(!trace_instance_dir, "Failed to create instances directory\n"))
9204 		return;
9205 
9206 	mutex_lock(&event_mutex);
9207 	mutex_lock(&trace_types_lock);
9208 
9209 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
9210 		if (!tr->name)
9211 			continue;
9212 		if (MEM_FAIL(trace_array_create_dir(tr) < 0,
9213 			     "Failed to create instance directory\n"))
9214 			break;
9215 	}
9216 
9217 	mutex_unlock(&trace_types_lock);
9218 	mutex_unlock(&event_mutex);
9219 }
9220 
9221 static void
9222 init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer)
9223 {
9224 	struct trace_event_file *file;
9225 	int cpu;
9226 
9227 	trace_create_file("available_tracers", 0444, d_tracer,
9228 			tr, &show_traces_fops);
9229 
9230 	trace_create_file("current_tracer", 0644, d_tracer,
9231 			tr, &set_tracer_fops);
9232 
9233 	trace_create_file("tracing_cpumask", 0644, d_tracer,
9234 			  tr, &tracing_cpumask_fops);
9235 
9236 	trace_create_file("trace_options", 0644, d_tracer,
9237 			  tr, &tracing_iter_fops);
9238 
9239 	trace_create_file("trace", 0644, d_tracer,
9240 			  tr, &tracing_fops);
9241 
9242 	trace_create_file("trace_pipe", 0444, d_tracer,
9243 			  tr, &tracing_pipe_fops);
9244 
9245 	trace_create_file("buffer_size_kb", 0644, d_tracer,
9246 			  tr, &tracing_entries_fops);
9247 
9248 	trace_create_file("buffer_total_size_kb", 0444, d_tracer,
9249 			  tr, &tracing_total_entries_fops);
9250 
9251 	trace_create_file("free_buffer", 0200, d_tracer,
9252 			  tr, &tracing_free_buffer_fops);
9253 
9254 	trace_create_file("trace_marker", 0220, d_tracer,
9255 			  tr, &tracing_mark_fops);
9256 
9257 	file = __find_event_file(tr, "ftrace", "print");
9258 	if (file && file->dir)
9259 		trace_create_file("trigger", 0644, file->dir, file,
9260 				  &event_trigger_fops);
9261 	tr->trace_marker_file = file;
9262 
9263 	trace_create_file("trace_marker_raw", 0220, d_tracer,
9264 			  tr, &tracing_mark_raw_fops);
9265 
9266 	trace_create_file("trace_clock", 0644, d_tracer, tr,
9267 			  &trace_clock_fops);
9268 
9269 	trace_create_file("tracing_on", 0644, d_tracer,
9270 			  tr, &rb_simple_fops);
9271 
9272 	trace_create_file("timestamp_mode", 0444, d_tracer, tr,
9273 			  &trace_time_stamp_mode_fops);
9274 
9275 	tr->buffer_percent = 50;
9276 
9277 	trace_create_file("buffer_percent", 0444, d_tracer,
9278 			tr, &buffer_percent_fops);
9279 
9280 	create_trace_options_dir(tr);
9281 
9282 #if defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)
9283 	trace_create_maxlat_file(tr, d_tracer);
9284 #endif
9285 
9286 	if (ftrace_create_function_files(tr, d_tracer))
9287 		MEM_FAIL(1, "Could not allocate function filter files");
9288 
9289 #ifdef CONFIG_TRACER_SNAPSHOT
9290 	trace_create_file("snapshot", 0644, d_tracer,
9291 			  tr, &snapshot_fops);
9292 #endif
9293 
9294 	trace_create_file("error_log", 0644, d_tracer,
9295 			  tr, &tracing_err_log_fops);
9296 
9297 	for_each_tracing_cpu(cpu)
9298 		tracing_init_tracefs_percpu(tr, cpu);
9299 
9300 	ftrace_init_tracefs(tr, d_tracer);
9301 }
9302 
9303 static struct vfsmount *trace_automount(struct dentry *mntpt, void *ingore)
9304 {
9305 	struct vfsmount *mnt;
9306 	struct file_system_type *type;
9307 
9308 	/*
9309 	 * To maintain backward compatibility for tools that mount
9310 	 * debugfs to get to the tracing facility, tracefs is automatically
9311 	 * mounted to the debugfs/tracing directory.
9312 	 */
9313 	type = get_fs_type("tracefs");
9314 	if (!type)
9315 		return NULL;
9316 	mnt = vfs_submount(mntpt, type, "tracefs", NULL);
9317 	put_filesystem(type);
9318 	if (IS_ERR(mnt))
9319 		return NULL;
9320 	mntget(mnt);
9321 
9322 	return mnt;
9323 }
9324 
9325 /**
9326  * tracing_init_dentry - initialize top level trace array
9327  *
9328  * This is called when creating files or directories in the tracing
9329  * directory. It is called via fs_initcall() by any of the boot up code
9330  * and expects to return the dentry of the top level tracing directory.
9331  */
9332 int tracing_init_dentry(void)
9333 {
9334 	struct trace_array *tr = &global_trace;
9335 
9336 	if (security_locked_down(LOCKDOWN_TRACEFS)) {
9337 		pr_warn("Tracing disabled due to lockdown\n");
9338 		return -EPERM;
9339 	}
9340 
9341 	/* The top level trace array uses  NULL as parent */
9342 	if (tr->dir)
9343 		return 0;
9344 
9345 	if (WARN_ON(!tracefs_initialized()))
9346 		return -ENODEV;
9347 
9348 	/*
9349 	 * As there may still be users that expect the tracing
9350 	 * files to exist in debugfs/tracing, we must automount
9351 	 * the tracefs file system there, so older tools still
9352 	 * work with the newer kernel.
9353 	 */
9354 	tr->dir = debugfs_create_automount("tracing", NULL,
9355 					   trace_automount, NULL);
9356 
9357 	return 0;
9358 }
9359 
9360 extern struct trace_eval_map *__start_ftrace_eval_maps[];
9361 extern struct trace_eval_map *__stop_ftrace_eval_maps[];
9362 
9363 static struct workqueue_struct *eval_map_wq __initdata;
9364 static struct work_struct eval_map_work __initdata;
9365 
9366 static void __init eval_map_work_func(struct work_struct *work)
9367 {
9368 	int len;
9369 
9370 	len = __stop_ftrace_eval_maps - __start_ftrace_eval_maps;
9371 	trace_insert_eval_map(NULL, __start_ftrace_eval_maps, len);
9372 }
9373 
9374 static int __init trace_eval_init(void)
9375 {
9376 	INIT_WORK(&eval_map_work, eval_map_work_func);
9377 
9378 	eval_map_wq = alloc_workqueue("eval_map_wq", WQ_UNBOUND, 0);
9379 	if (!eval_map_wq) {
9380 		pr_err("Unable to allocate eval_map_wq\n");
9381 		/* Do work here */
9382 		eval_map_work_func(&eval_map_work);
9383 		return -ENOMEM;
9384 	}
9385 
9386 	queue_work(eval_map_wq, &eval_map_work);
9387 	return 0;
9388 }
9389 
9390 static int __init trace_eval_sync(void)
9391 {
9392 	/* Make sure the eval map updates are finished */
9393 	if (eval_map_wq)
9394 		destroy_workqueue(eval_map_wq);
9395 	return 0;
9396 }
9397 
9398 late_initcall_sync(trace_eval_sync);
9399 
9400 
9401 #ifdef CONFIG_MODULES
9402 static void trace_module_add_evals(struct module *mod)
9403 {
9404 	if (!mod->num_trace_evals)
9405 		return;
9406 
9407 	/*
9408 	 * Modules with bad taint do not have events created, do
9409 	 * not bother with enums either.
9410 	 */
9411 	if (trace_module_has_bad_taint(mod))
9412 		return;
9413 
9414 	trace_insert_eval_map(mod, mod->trace_evals, mod->num_trace_evals);
9415 }
9416 
9417 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
9418 static void trace_module_remove_evals(struct module *mod)
9419 {
9420 	union trace_eval_map_item *map;
9421 	union trace_eval_map_item **last = &trace_eval_maps;
9422 
9423 	if (!mod->num_trace_evals)
9424 		return;
9425 
9426 	mutex_lock(&trace_eval_mutex);
9427 
9428 	map = trace_eval_maps;
9429 
9430 	while (map) {
9431 		if (map->head.mod == mod)
9432 			break;
9433 		map = trace_eval_jmp_to_tail(map);
9434 		last = &map->tail.next;
9435 		map = map->tail.next;
9436 	}
9437 	if (!map)
9438 		goto out;
9439 
9440 	*last = trace_eval_jmp_to_tail(map)->tail.next;
9441 	kfree(map);
9442  out:
9443 	mutex_unlock(&trace_eval_mutex);
9444 }
9445 #else
9446 static inline void trace_module_remove_evals(struct module *mod) { }
9447 #endif /* CONFIG_TRACE_EVAL_MAP_FILE */
9448 
9449 static int trace_module_notify(struct notifier_block *self,
9450 			       unsigned long val, void *data)
9451 {
9452 	struct module *mod = data;
9453 
9454 	switch (val) {
9455 	case MODULE_STATE_COMING:
9456 		trace_module_add_evals(mod);
9457 		break;
9458 	case MODULE_STATE_GOING:
9459 		trace_module_remove_evals(mod);
9460 		break;
9461 	}
9462 
9463 	return NOTIFY_OK;
9464 }
9465 
9466 static struct notifier_block trace_module_nb = {
9467 	.notifier_call = trace_module_notify,
9468 	.priority = 0,
9469 };
9470 #endif /* CONFIG_MODULES */
9471 
9472 static __init int tracer_init_tracefs(void)
9473 {
9474 	int ret;
9475 
9476 	trace_access_lock_init();
9477 
9478 	ret = tracing_init_dentry();
9479 	if (ret)
9480 		return 0;
9481 
9482 	event_trace_init();
9483 
9484 	init_tracer_tracefs(&global_trace, NULL);
9485 	ftrace_init_tracefs_toplevel(&global_trace, NULL);
9486 
9487 	trace_create_file("tracing_thresh", 0644, NULL,
9488 			&global_trace, &tracing_thresh_fops);
9489 
9490 	trace_create_file("README", 0444, NULL,
9491 			NULL, &tracing_readme_fops);
9492 
9493 	trace_create_file("saved_cmdlines", 0444, NULL,
9494 			NULL, &tracing_saved_cmdlines_fops);
9495 
9496 	trace_create_file("saved_cmdlines_size", 0644, NULL,
9497 			  NULL, &tracing_saved_cmdlines_size_fops);
9498 
9499 	trace_create_file("saved_tgids", 0444, NULL,
9500 			NULL, &tracing_saved_tgids_fops);
9501 
9502 	trace_eval_init();
9503 
9504 	trace_create_eval_file(NULL);
9505 
9506 #ifdef CONFIG_MODULES
9507 	register_module_notifier(&trace_module_nb);
9508 #endif
9509 
9510 #ifdef CONFIG_DYNAMIC_FTRACE
9511 	trace_create_file("dyn_ftrace_total_info", 0444, NULL,
9512 			NULL, &tracing_dyn_info_fops);
9513 #endif
9514 
9515 	create_trace_instances(NULL);
9516 
9517 	update_tracer_options(&global_trace);
9518 
9519 	return 0;
9520 }
9521 
9522 static int trace_panic_handler(struct notifier_block *this,
9523 			       unsigned long event, void *unused)
9524 {
9525 	if (ftrace_dump_on_oops)
9526 		ftrace_dump(ftrace_dump_on_oops);
9527 	return NOTIFY_OK;
9528 }
9529 
9530 static struct notifier_block trace_panic_notifier = {
9531 	.notifier_call  = trace_panic_handler,
9532 	.next           = NULL,
9533 	.priority       = 150   /* priority: INT_MAX >= x >= 0 */
9534 };
9535 
9536 static int trace_die_handler(struct notifier_block *self,
9537 			     unsigned long val,
9538 			     void *data)
9539 {
9540 	switch (val) {
9541 	case DIE_OOPS:
9542 		if (ftrace_dump_on_oops)
9543 			ftrace_dump(ftrace_dump_on_oops);
9544 		break;
9545 	default:
9546 		break;
9547 	}
9548 	return NOTIFY_OK;
9549 }
9550 
9551 static struct notifier_block trace_die_notifier = {
9552 	.notifier_call = trace_die_handler,
9553 	.priority = 200
9554 };
9555 
9556 /*
9557  * printk is set to max of 1024, we really don't need it that big.
9558  * Nothing should be printing 1000 characters anyway.
9559  */
9560 #define TRACE_MAX_PRINT		1000
9561 
9562 /*
9563  * Define here KERN_TRACE so that we have one place to modify
9564  * it if we decide to change what log level the ftrace dump
9565  * should be at.
9566  */
9567 #define KERN_TRACE		KERN_EMERG
9568 
9569 void
9570 trace_printk_seq(struct trace_seq *s)
9571 {
9572 	/* Probably should print a warning here. */
9573 	if (s->seq.len >= TRACE_MAX_PRINT)
9574 		s->seq.len = TRACE_MAX_PRINT;
9575 
9576 	/*
9577 	 * More paranoid code. Although the buffer size is set to
9578 	 * PAGE_SIZE, and TRACE_MAX_PRINT is 1000, this is just
9579 	 * an extra layer of protection.
9580 	 */
9581 	if (WARN_ON_ONCE(s->seq.len >= s->seq.size))
9582 		s->seq.len = s->seq.size - 1;
9583 
9584 	/* should be zero ended, but we are paranoid. */
9585 	s->buffer[s->seq.len] = 0;
9586 
9587 	printk(KERN_TRACE "%s", s->buffer);
9588 
9589 	trace_seq_init(s);
9590 }
9591 
9592 void trace_init_global_iter(struct trace_iterator *iter)
9593 {
9594 	iter->tr = &global_trace;
9595 	iter->trace = iter->tr->current_trace;
9596 	iter->cpu_file = RING_BUFFER_ALL_CPUS;
9597 	iter->array_buffer = &global_trace.array_buffer;
9598 
9599 	if (iter->trace && iter->trace->open)
9600 		iter->trace->open(iter);
9601 
9602 	/* Annotate start of buffers if we had overruns */
9603 	if (ring_buffer_overruns(iter->array_buffer->buffer))
9604 		iter->iter_flags |= TRACE_FILE_ANNOTATE;
9605 
9606 	/* Output in nanoseconds only if we are using a clock in nanoseconds. */
9607 	if (trace_clocks[iter->tr->clock_id].in_ns)
9608 		iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
9609 }
9610 
9611 void ftrace_dump(enum ftrace_dump_mode oops_dump_mode)
9612 {
9613 	/* use static because iter can be a bit big for the stack */
9614 	static struct trace_iterator iter;
9615 	static atomic_t dump_running;
9616 	struct trace_array *tr = &global_trace;
9617 	unsigned int old_userobj;
9618 	unsigned long flags;
9619 	int cnt = 0, cpu;
9620 
9621 	/* Only allow one dump user at a time. */
9622 	if (atomic_inc_return(&dump_running) != 1) {
9623 		atomic_dec(&dump_running);
9624 		return;
9625 	}
9626 
9627 	/*
9628 	 * Always turn off tracing when we dump.
9629 	 * We don't need to show trace output of what happens
9630 	 * between multiple crashes.
9631 	 *
9632 	 * If the user does a sysrq-z, then they can re-enable
9633 	 * tracing with echo 1 > tracing_on.
9634 	 */
9635 	tracing_off();
9636 
9637 	local_irq_save(flags);
9638 	printk_nmi_direct_enter();
9639 
9640 	/* Simulate the iterator */
9641 	trace_init_global_iter(&iter);
9642 	/* Can not use kmalloc for iter.temp and iter.fmt */
9643 	iter.temp = static_temp_buf;
9644 	iter.temp_size = STATIC_TEMP_BUF_SIZE;
9645 	iter.fmt = static_fmt_buf;
9646 	iter.fmt_size = STATIC_FMT_BUF_SIZE;
9647 
9648 	for_each_tracing_cpu(cpu) {
9649 		atomic_inc(&per_cpu_ptr(iter.array_buffer->data, cpu)->disabled);
9650 	}
9651 
9652 	old_userobj = tr->trace_flags & TRACE_ITER_SYM_USEROBJ;
9653 
9654 	/* don't look at user memory in panic mode */
9655 	tr->trace_flags &= ~TRACE_ITER_SYM_USEROBJ;
9656 
9657 	switch (oops_dump_mode) {
9658 	case DUMP_ALL:
9659 		iter.cpu_file = RING_BUFFER_ALL_CPUS;
9660 		break;
9661 	case DUMP_ORIG:
9662 		iter.cpu_file = raw_smp_processor_id();
9663 		break;
9664 	case DUMP_NONE:
9665 		goto out_enable;
9666 	default:
9667 		printk(KERN_TRACE "Bad dumping mode, switching to all CPUs dump\n");
9668 		iter.cpu_file = RING_BUFFER_ALL_CPUS;
9669 	}
9670 
9671 	printk(KERN_TRACE "Dumping ftrace buffer:\n");
9672 
9673 	/* Did function tracer already get disabled? */
9674 	if (ftrace_is_dead()) {
9675 		printk("# WARNING: FUNCTION TRACING IS CORRUPTED\n");
9676 		printk("#          MAY BE MISSING FUNCTION EVENTS\n");
9677 	}
9678 
9679 	/*
9680 	 * We need to stop all tracing on all CPUS to read
9681 	 * the next buffer. This is a bit expensive, but is
9682 	 * not done often. We fill all what we can read,
9683 	 * and then release the locks again.
9684 	 */
9685 
9686 	while (!trace_empty(&iter)) {
9687 
9688 		if (!cnt)
9689 			printk(KERN_TRACE "---------------------------------\n");
9690 
9691 		cnt++;
9692 
9693 		trace_iterator_reset(&iter);
9694 		iter.iter_flags |= TRACE_FILE_LAT_FMT;
9695 
9696 		if (trace_find_next_entry_inc(&iter) != NULL) {
9697 			int ret;
9698 
9699 			ret = print_trace_line(&iter);
9700 			if (ret != TRACE_TYPE_NO_CONSUME)
9701 				trace_consume(&iter);
9702 		}
9703 		touch_nmi_watchdog();
9704 
9705 		trace_printk_seq(&iter.seq);
9706 	}
9707 
9708 	if (!cnt)
9709 		printk(KERN_TRACE "   (ftrace buffer empty)\n");
9710 	else
9711 		printk(KERN_TRACE "---------------------------------\n");
9712 
9713  out_enable:
9714 	tr->trace_flags |= old_userobj;
9715 
9716 	for_each_tracing_cpu(cpu) {
9717 		atomic_dec(&per_cpu_ptr(iter.array_buffer->data, cpu)->disabled);
9718 	}
9719 	atomic_dec(&dump_running);
9720 	printk_nmi_direct_exit();
9721 	local_irq_restore(flags);
9722 }
9723 EXPORT_SYMBOL_GPL(ftrace_dump);
9724 
9725 #define WRITE_BUFSIZE  4096
9726 
9727 ssize_t trace_parse_run_command(struct file *file, const char __user *buffer,
9728 				size_t count, loff_t *ppos,
9729 				int (*createfn)(const char *))
9730 {
9731 	char *kbuf, *buf, *tmp;
9732 	int ret = 0;
9733 	size_t done = 0;
9734 	size_t size;
9735 
9736 	kbuf = kmalloc(WRITE_BUFSIZE, GFP_KERNEL);
9737 	if (!kbuf)
9738 		return -ENOMEM;
9739 
9740 	while (done < count) {
9741 		size = count - done;
9742 
9743 		if (size >= WRITE_BUFSIZE)
9744 			size = WRITE_BUFSIZE - 1;
9745 
9746 		if (copy_from_user(kbuf, buffer + done, size)) {
9747 			ret = -EFAULT;
9748 			goto out;
9749 		}
9750 		kbuf[size] = '\0';
9751 		buf = kbuf;
9752 		do {
9753 			tmp = strchr(buf, '\n');
9754 			if (tmp) {
9755 				*tmp = '\0';
9756 				size = tmp - buf + 1;
9757 			} else {
9758 				size = strlen(buf);
9759 				if (done + size < count) {
9760 					if (buf != kbuf)
9761 						break;
9762 					/* This can accept WRITE_BUFSIZE - 2 ('\n' + '\0') */
9763 					pr_warn("Line length is too long: Should be less than %d\n",
9764 						WRITE_BUFSIZE - 2);
9765 					ret = -EINVAL;
9766 					goto out;
9767 				}
9768 			}
9769 			done += size;
9770 
9771 			/* Remove comments */
9772 			tmp = strchr(buf, '#');
9773 
9774 			if (tmp)
9775 				*tmp = '\0';
9776 
9777 			ret = createfn(buf);
9778 			if (ret)
9779 				goto out;
9780 			buf += size;
9781 
9782 		} while (done < count);
9783 	}
9784 	ret = done;
9785 
9786 out:
9787 	kfree(kbuf);
9788 
9789 	return ret;
9790 }
9791 
9792 __init static int tracer_alloc_buffers(void)
9793 {
9794 	int ring_buf_size;
9795 	int ret = -ENOMEM;
9796 
9797 
9798 	if (security_locked_down(LOCKDOWN_TRACEFS)) {
9799 		pr_warn("Tracing disabled due to lockdown\n");
9800 		return -EPERM;
9801 	}
9802 
9803 	/*
9804 	 * Make sure we don't accidentally add more trace options
9805 	 * than we have bits for.
9806 	 */
9807 	BUILD_BUG_ON(TRACE_ITER_LAST_BIT > TRACE_FLAGS_MAX_SIZE);
9808 
9809 	if (!alloc_cpumask_var(&tracing_buffer_mask, GFP_KERNEL))
9810 		goto out;
9811 
9812 	if (!alloc_cpumask_var(&global_trace.tracing_cpumask, GFP_KERNEL))
9813 		goto out_free_buffer_mask;
9814 
9815 	/* Only allocate trace_printk buffers if a trace_printk exists */
9816 	if (&__stop___trace_bprintk_fmt != &__start___trace_bprintk_fmt)
9817 		/* Must be called before global_trace.buffer is allocated */
9818 		trace_printk_init_buffers();
9819 
9820 	/* To save memory, keep the ring buffer size to its minimum */
9821 	if (ring_buffer_expanded)
9822 		ring_buf_size = trace_buf_size;
9823 	else
9824 		ring_buf_size = 1;
9825 
9826 	cpumask_copy(tracing_buffer_mask, cpu_possible_mask);
9827 	cpumask_copy(global_trace.tracing_cpumask, cpu_all_mask);
9828 
9829 	raw_spin_lock_init(&global_trace.start_lock);
9830 
9831 	/*
9832 	 * The prepare callbacks allocates some memory for the ring buffer. We
9833 	 * don't free the buffer if the CPU goes down. If we were to free
9834 	 * the buffer, then the user would lose any trace that was in the
9835 	 * buffer. The memory will be removed once the "instance" is removed.
9836 	 */
9837 	ret = cpuhp_setup_state_multi(CPUHP_TRACE_RB_PREPARE,
9838 				      "trace/RB:preapre", trace_rb_cpu_prepare,
9839 				      NULL);
9840 	if (ret < 0)
9841 		goto out_free_cpumask;
9842 	/* Used for event triggers */
9843 	ret = -ENOMEM;
9844 	temp_buffer = ring_buffer_alloc(PAGE_SIZE, RB_FL_OVERWRITE);
9845 	if (!temp_buffer)
9846 		goto out_rm_hp_state;
9847 
9848 	if (trace_create_savedcmd() < 0)
9849 		goto out_free_temp_buffer;
9850 
9851 	/* TODO: make the number of buffers hot pluggable with CPUS */
9852 	if (allocate_trace_buffers(&global_trace, ring_buf_size) < 0) {
9853 		MEM_FAIL(1, "tracer: failed to allocate ring buffer!\n");
9854 		goto out_free_savedcmd;
9855 	}
9856 
9857 	if (global_trace.buffer_disabled)
9858 		tracing_off();
9859 
9860 	if (trace_boot_clock) {
9861 		ret = tracing_set_clock(&global_trace, trace_boot_clock);
9862 		if (ret < 0)
9863 			pr_warn("Trace clock %s not defined, going back to default\n",
9864 				trace_boot_clock);
9865 	}
9866 
9867 	/*
9868 	 * register_tracer() might reference current_trace, so it
9869 	 * needs to be set before we register anything. This is
9870 	 * just a bootstrap of current_trace anyway.
9871 	 */
9872 	global_trace.current_trace = &nop_trace;
9873 
9874 	global_trace.max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
9875 
9876 	ftrace_init_global_array_ops(&global_trace);
9877 
9878 	init_trace_flags_index(&global_trace);
9879 
9880 	register_tracer(&nop_trace);
9881 
9882 	/* Function tracing may start here (via kernel command line) */
9883 	init_function_trace();
9884 
9885 	/* All seems OK, enable tracing */
9886 	tracing_disabled = 0;
9887 
9888 	atomic_notifier_chain_register(&panic_notifier_list,
9889 				       &trace_panic_notifier);
9890 
9891 	register_die_notifier(&trace_die_notifier);
9892 
9893 	global_trace.flags = TRACE_ARRAY_FL_GLOBAL;
9894 
9895 	INIT_LIST_HEAD(&global_trace.systems);
9896 	INIT_LIST_HEAD(&global_trace.events);
9897 	INIT_LIST_HEAD(&global_trace.hist_vars);
9898 	INIT_LIST_HEAD(&global_trace.err_log);
9899 	list_add(&global_trace.list, &ftrace_trace_arrays);
9900 
9901 	apply_trace_boot_options();
9902 
9903 	register_snapshot_cmd();
9904 
9905 	test_can_verify();
9906 
9907 	return 0;
9908 
9909 out_free_savedcmd:
9910 	free_saved_cmdlines_buffer(savedcmd);
9911 out_free_temp_buffer:
9912 	ring_buffer_free(temp_buffer);
9913 out_rm_hp_state:
9914 	cpuhp_remove_multi_state(CPUHP_TRACE_RB_PREPARE);
9915 out_free_cpumask:
9916 	free_cpumask_var(global_trace.tracing_cpumask);
9917 out_free_buffer_mask:
9918 	free_cpumask_var(tracing_buffer_mask);
9919 out:
9920 	return ret;
9921 }
9922 
9923 void __init early_trace_init(void)
9924 {
9925 	if (tracepoint_printk) {
9926 		tracepoint_print_iter =
9927 			kzalloc(sizeof(*tracepoint_print_iter), GFP_KERNEL);
9928 		if (MEM_FAIL(!tracepoint_print_iter,
9929 			     "Failed to allocate trace iterator\n"))
9930 			tracepoint_printk = 0;
9931 		else
9932 			static_key_enable(&tracepoint_printk_key.key);
9933 	}
9934 	tracer_alloc_buffers();
9935 }
9936 
9937 void __init trace_init(void)
9938 {
9939 	trace_event_init();
9940 }
9941 
9942 __init static int clear_boot_tracer(void)
9943 {
9944 	/*
9945 	 * The default tracer at boot buffer is an init section.
9946 	 * This function is called in lateinit. If we did not
9947 	 * find the boot tracer, then clear it out, to prevent
9948 	 * later registration from accessing the buffer that is
9949 	 * about to be freed.
9950 	 */
9951 	if (!default_bootup_tracer)
9952 		return 0;
9953 
9954 	printk(KERN_INFO "ftrace bootup tracer '%s' not registered.\n",
9955 	       default_bootup_tracer);
9956 	default_bootup_tracer = NULL;
9957 
9958 	return 0;
9959 }
9960 
9961 fs_initcall(tracer_init_tracefs);
9962 late_initcall_sync(clear_boot_tracer);
9963 
9964 #ifdef CONFIG_HAVE_UNSTABLE_SCHED_CLOCK
9965 __init static int tracing_set_default_clock(void)
9966 {
9967 	/* sched_clock_stable() is determined in late_initcall */
9968 	if (!trace_boot_clock && !sched_clock_stable()) {
9969 		if (security_locked_down(LOCKDOWN_TRACEFS)) {
9970 			pr_warn("Can not set tracing clock due to lockdown\n");
9971 			return -EPERM;
9972 		}
9973 
9974 		printk(KERN_WARNING
9975 		       "Unstable clock detected, switching default tracing clock to \"global\"\n"
9976 		       "If you want to keep using the local clock, then add:\n"
9977 		       "  \"trace_clock=local\"\n"
9978 		       "on the kernel command line\n");
9979 		tracing_set_clock(&global_trace, "global");
9980 	}
9981 
9982 	return 0;
9983 }
9984 late_initcall_sync(tracing_set_default_clock);
9985 #endif
9986