xref: /linux/kernel/trace/trace.c (revision 87c9c16317882dd6dbbc07e349bc3223e14f3244)
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * ring buffer based function tracer
4  *
5  * Copyright (C) 2007-2012 Steven Rostedt <srostedt@redhat.com>
6  * Copyright (C) 2008 Ingo Molnar <mingo@redhat.com>
7  *
8  * Originally taken from the RT patch by:
9  *    Arnaldo Carvalho de Melo <acme@redhat.com>
10  *
11  * Based on code from the latency_tracer, that is:
12  *  Copyright (C) 2004-2006 Ingo Molnar
13  *  Copyright (C) 2004 Nadia Yvette Chambers
14  */
15 #include <linux/ring_buffer.h>
16 #include <generated/utsrelease.h>
17 #include <linux/stacktrace.h>
18 #include <linux/writeback.h>
19 #include <linux/kallsyms.h>
20 #include <linux/security.h>
21 #include <linux/seq_file.h>
22 #include <linux/notifier.h>
23 #include <linux/irqflags.h>
24 #include <linux/debugfs.h>
25 #include <linux/tracefs.h>
26 #include <linux/pagemap.h>
27 #include <linux/hardirq.h>
28 #include <linux/linkage.h>
29 #include <linux/uaccess.h>
30 #include <linux/vmalloc.h>
31 #include <linux/ftrace.h>
32 #include <linux/module.h>
33 #include <linux/percpu.h>
34 #include <linux/splice.h>
35 #include <linux/kdebug.h>
36 #include <linux/string.h>
37 #include <linux/mount.h>
38 #include <linux/rwsem.h>
39 #include <linux/slab.h>
40 #include <linux/ctype.h>
41 #include <linux/init.h>
42 #include <linux/poll.h>
43 #include <linux/nmi.h>
44 #include <linux/fs.h>
45 #include <linux/trace.h>
46 #include <linux/sched/clock.h>
47 #include <linux/sched/rt.h>
48 #include <linux/fsnotify.h>
49 #include <linux/irq_work.h>
50 #include <linux/workqueue.h>
51 
52 #include "trace.h"
53 #include "trace_output.h"
54 
55 /*
56  * On boot up, the ring buffer is set to the minimum size, so that
57  * we do not waste memory on systems that are not using tracing.
58  */
59 bool ring_buffer_expanded;
60 
61 /*
62  * We need to change this state when a selftest is running.
63  * A selftest will lurk into the ring-buffer to count the
64  * entries inserted during the selftest although some concurrent
65  * insertions into the ring-buffer such as trace_printk could occurred
66  * at the same time, giving false positive or negative results.
67  */
68 static bool __read_mostly tracing_selftest_running;
69 
70 /*
71  * If boot-time tracing including tracers/events via kernel cmdline
72  * is running, we do not want to run SELFTEST.
73  */
74 bool __read_mostly tracing_selftest_disabled;
75 
76 #ifdef CONFIG_FTRACE_STARTUP_TEST
77 void __init disable_tracing_selftest(const char *reason)
78 {
79 	if (!tracing_selftest_disabled) {
80 		tracing_selftest_disabled = true;
81 		pr_info("Ftrace startup test is disabled due to %s\n", reason);
82 	}
83 }
84 #endif
85 
86 /* Pipe tracepoints to printk */
87 struct trace_iterator *tracepoint_print_iter;
88 int tracepoint_printk;
89 static DEFINE_STATIC_KEY_FALSE(tracepoint_printk_key);
90 
91 /* For tracers that don't implement custom flags */
92 static struct tracer_opt dummy_tracer_opt[] = {
93 	{ }
94 };
95 
96 static int
97 dummy_set_flag(struct trace_array *tr, u32 old_flags, u32 bit, int set)
98 {
99 	return 0;
100 }
101 
102 /*
103  * To prevent the comm cache from being overwritten when no
104  * tracing is active, only save the comm when a trace event
105  * occurred.
106  */
107 static DEFINE_PER_CPU(bool, trace_taskinfo_save);
108 
109 /*
110  * Kill all tracing for good (never come back).
111  * It is initialized to 1 but will turn to zero if the initialization
112  * of the tracer is successful. But that is the only place that sets
113  * this back to zero.
114  */
115 static int tracing_disabled = 1;
116 
117 cpumask_var_t __read_mostly	tracing_buffer_mask;
118 
119 /*
120  * ftrace_dump_on_oops - variable to dump ftrace buffer on oops
121  *
122  * If there is an oops (or kernel panic) and the ftrace_dump_on_oops
123  * is set, then ftrace_dump is called. This will output the contents
124  * of the ftrace buffers to the console.  This is very useful for
125  * capturing traces that lead to crashes and outputing it to a
126  * serial console.
127  *
128  * It is default off, but you can enable it with either specifying
129  * "ftrace_dump_on_oops" in the kernel command line, or setting
130  * /proc/sys/kernel/ftrace_dump_on_oops
131  * Set 1 if you want to dump buffers of all CPUs
132  * Set 2 if you want to dump the buffer of the CPU that triggered oops
133  */
134 
135 enum ftrace_dump_mode ftrace_dump_on_oops;
136 
137 /* When set, tracing will stop when a WARN*() is hit */
138 int __disable_trace_on_warning;
139 
140 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
141 /* Map of enums to their values, for "eval_map" file */
142 struct trace_eval_map_head {
143 	struct module			*mod;
144 	unsigned long			length;
145 };
146 
147 union trace_eval_map_item;
148 
149 struct trace_eval_map_tail {
150 	/*
151 	 * "end" is first and points to NULL as it must be different
152 	 * than "mod" or "eval_string"
153 	 */
154 	union trace_eval_map_item	*next;
155 	const char			*end;	/* points to NULL */
156 };
157 
158 static DEFINE_MUTEX(trace_eval_mutex);
159 
160 /*
161  * The trace_eval_maps are saved in an array with two extra elements,
162  * one at the beginning, and one at the end. The beginning item contains
163  * the count of the saved maps (head.length), and the module they
164  * belong to if not built in (head.mod). The ending item contains a
165  * pointer to the next array of saved eval_map items.
166  */
167 union trace_eval_map_item {
168 	struct trace_eval_map		map;
169 	struct trace_eval_map_head	head;
170 	struct trace_eval_map_tail	tail;
171 };
172 
173 static union trace_eval_map_item *trace_eval_maps;
174 #endif /* CONFIG_TRACE_EVAL_MAP_FILE */
175 
176 int tracing_set_tracer(struct trace_array *tr, const char *buf);
177 static void ftrace_trace_userstack(struct trace_array *tr,
178 				   struct trace_buffer *buffer,
179 				   unsigned int trace_ctx);
180 
181 #define MAX_TRACER_SIZE		100
182 static char bootup_tracer_buf[MAX_TRACER_SIZE] __initdata;
183 static char *default_bootup_tracer;
184 
185 static bool allocate_snapshot;
186 
187 static int __init set_cmdline_ftrace(char *str)
188 {
189 	strlcpy(bootup_tracer_buf, str, MAX_TRACER_SIZE);
190 	default_bootup_tracer = bootup_tracer_buf;
191 	/* We are using ftrace early, expand it */
192 	ring_buffer_expanded = true;
193 	return 1;
194 }
195 __setup("ftrace=", set_cmdline_ftrace);
196 
197 static int __init set_ftrace_dump_on_oops(char *str)
198 {
199 	if (*str++ != '=' || !*str) {
200 		ftrace_dump_on_oops = DUMP_ALL;
201 		return 1;
202 	}
203 
204 	if (!strcmp("orig_cpu", str)) {
205 		ftrace_dump_on_oops = DUMP_ORIG;
206                 return 1;
207         }
208 
209         return 0;
210 }
211 __setup("ftrace_dump_on_oops", set_ftrace_dump_on_oops);
212 
213 static int __init stop_trace_on_warning(char *str)
214 {
215 	if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
216 		__disable_trace_on_warning = 1;
217 	return 1;
218 }
219 __setup("traceoff_on_warning", stop_trace_on_warning);
220 
221 static int __init boot_alloc_snapshot(char *str)
222 {
223 	allocate_snapshot = true;
224 	/* We also need the main ring buffer expanded */
225 	ring_buffer_expanded = true;
226 	return 1;
227 }
228 __setup("alloc_snapshot", boot_alloc_snapshot);
229 
230 
231 static char trace_boot_options_buf[MAX_TRACER_SIZE] __initdata;
232 
233 static int __init set_trace_boot_options(char *str)
234 {
235 	strlcpy(trace_boot_options_buf, str, MAX_TRACER_SIZE);
236 	return 0;
237 }
238 __setup("trace_options=", set_trace_boot_options);
239 
240 static char trace_boot_clock_buf[MAX_TRACER_SIZE] __initdata;
241 static char *trace_boot_clock __initdata;
242 
243 static int __init set_trace_boot_clock(char *str)
244 {
245 	strlcpy(trace_boot_clock_buf, str, MAX_TRACER_SIZE);
246 	trace_boot_clock = trace_boot_clock_buf;
247 	return 0;
248 }
249 __setup("trace_clock=", set_trace_boot_clock);
250 
251 static int __init set_tracepoint_printk(char *str)
252 {
253 	if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
254 		tracepoint_printk = 1;
255 	return 1;
256 }
257 __setup("tp_printk", set_tracepoint_printk);
258 
259 unsigned long long ns2usecs(u64 nsec)
260 {
261 	nsec += 500;
262 	do_div(nsec, 1000);
263 	return nsec;
264 }
265 
266 static void
267 trace_process_export(struct trace_export *export,
268 	       struct ring_buffer_event *event, int flag)
269 {
270 	struct trace_entry *entry;
271 	unsigned int size = 0;
272 
273 	if (export->flags & flag) {
274 		entry = ring_buffer_event_data(event);
275 		size = ring_buffer_event_length(event);
276 		export->write(export, entry, size);
277 	}
278 }
279 
280 static DEFINE_MUTEX(ftrace_export_lock);
281 
282 static struct trace_export __rcu *ftrace_exports_list __read_mostly;
283 
284 static DEFINE_STATIC_KEY_FALSE(trace_function_exports_enabled);
285 static DEFINE_STATIC_KEY_FALSE(trace_event_exports_enabled);
286 static DEFINE_STATIC_KEY_FALSE(trace_marker_exports_enabled);
287 
288 static inline void ftrace_exports_enable(struct trace_export *export)
289 {
290 	if (export->flags & TRACE_EXPORT_FUNCTION)
291 		static_branch_inc(&trace_function_exports_enabled);
292 
293 	if (export->flags & TRACE_EXPORT_EVENT)
294 		static_branch_inc(&trace_event_exports_enabled);
295 
296 	if (export->flags & TRACE_EXPORT_MARKER)
297 		static_branch_inc(&trace_marker_exports_enabled);
298 }
299 
300 static inline void ftrace_exports_disable(struct trace_export *export)
301 {
302 	if (export->flags & TRACE_EXPORT_FUNCTION)
303 		static_branch_dec(&trace_function_exports_enabled);
304 
305 	if (export->flags & TRACE_EXPORT_EVENT)
306 		static_branch_dec(&trace_event_exports_enabled);
307 
308 	if (export->flags & TRACE_EXPORT_MARKER)
309 		static_branch_dec(&trace_marker_exports_enabled);
310 }
311 
312 static void ftrace_exports(struct ring_buffer_event *event, int flag)
313 {
314 	struct trace_export *export;
315 
316 	preempt_disable_notrace();
317 
318 	export = rcu_dereference_raw_check(ftrace_exports_list);
319 	while (export) {
320 		trace_process_export(export, event, flag);
321 		export = rcu_dereference_raw_check(export->next);
322 	}
323 
324 	preempt_enable_notrace();
325 }
326 
327 static inline void
328 add_trace_export(struct trace_export **list, struct trace_export *export)
329 {
330 	rcu_assign_pointer(export->next, *list);
331 	/*
332 	 * We are entering export into the list but another
333 	 * CPU might be walking that list. We need to make sure
334 	 * the export->next pointer is valid before another CPU sees
335 	 * the export pointer included into the list.
336 	 */
337 	rcu_assign_pointer(*list, export);
338 }
339 
340 static inline int
341 rm_trace_export(struct trace_export **list, struct trace_export *export)
342 {
343 	struct trace_export **p;
344 
345 	for (p = list; *p != NULL; p = &(*p)->next)
346 		if (*p == export)
347 			break;
348 
349 	if (*p != export)
350 		return -1;
351 
352 	rcu_assign_pointer(*p, (*p)->next);
353 
354 	return 0;
355 }
356 
357 static inline void
358 add_ftrace_export(struct trace_export **list, struct trace_export *export)
359 {
360 	ftrace_exports_enable(export);
361 
362 	add_trace_export(list, export);
363 }
364 
365 static inline int
366 rm_ftrace_export(struct trace_export **list, struct trace_export *export)
367 {
368 	int ret;
369 
370 	ret = rm_trace_export(list, export);
371 	ftrace_exports_disable(export);
372 
373 	return ret;
374 }
375 
376 int register_ftrace_export(struct trace_export *export)
377 {
378 	if (WARN_ON_ONCE(!export->write))
379 		return -1;
380 
381 	mutex_lock(&ftrace_export_lock);
382 
383 	add_ftrace_export(&ftrace_exports_list, export);
384 
385 	mutex_unlock(&ftrace_export_lock);
386 
387 	return 0;
388 }
389 EXPORT_SYMBOL_GPL(register_ftrace_export);
390 
391 int unregister_ftrace_export(struct trace_export *export)
392 {
393 	int ret;
394 
395 	mutex_lock(&ftrace_export_lock);
396 
397 	ret = rm_ftrace_export(&ftrace_exports_list, export);
398 
399 	mutex_unlock(&ftrace_export_lock);
400 
401 	return ret;
402 }
403 EXPORT_SYMBOL_GPL(unregister_ftrace_export);
404 
405 /* trace_flags holds trace_options default values */
406 #define TRACE_DEFAULT_FLAGS						\
407 	(FUNCTION_DEFAULT_FLAGS |					\
408 	 TRACE_ITER_PRINT_PARENT | TRACE_ITER_PRINTK |			\
409 	 TRACE_ITER_ANNOTATE | TRACE_ITER_CONTEXT_INFO |		\
410 	 TRACE_ITER_RECORD_CMD | TRACE_ITER_OVERWRITE |			\
411 	 TRACE_ITER_IRQ_INFO | TRACE_ITER_MARKERS |			\
412 	 TRACE_ITER_HASH_PTR)
413 
414 /* trace_options that are only supported by global_trace */
415 #define TOP_LEVEL_TRACE_FLAGS (TRACE_ITER_PRINTK |			\
416 	       TRACE_ITER_PRINTK_MSGONLY | TRACE_ITER_RECORD_CMD)
417 
418 /* trace_flags that are default zero for instances */
419 #define ZEROED_TRACE_FLAGS \
420 	(TRACE_ITER_EVENT_FORK | TRACE_ITER_FUNC_FORK)
421 
422 /*
423  * The global_trace is the descriptor that holds the top-level tracing
424  * buffers for the live tracing.
425  */
426 static struct trace_array global_trace = {
427 	.trace_flags = TRACE_DEFAULT_FLAGS,
428 };
429 
430 LIST_HEAD(ftrace_trace_arrays);
431 
432 int trace_array_get(struct trace_array *this_tr)
433 {
434 	struct trace_array *tr;
435 	int ret = -ENODEV;
436 
437 	mutex_lock(&trace_types_lock);
438 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
439 		if (tr == this_tr) {
440 			tr->ref++;
441 			ret = 0;
442 			break;
443 		}
444 	}
445 	mutex_unlock(&trace_types_lock);
446 
447 	return ret;
448 }
449 
450 static void __trace_array_put(struct trace_array *this_tr)
451 {
452 	WARN_ON(!this_tr->ref);
453 	this_tr->ref--;
454 }
455 
456 /**
457  * trace_array_put - Decrement the reference counter for this trace array.
458  * @this_tr : pointer to the trace array
459  *
460  * NOTE: Use this when we no longer need the trace array returned by
461  * trace_array_get_by_name(). This ensures the trace array can be later
462  * destroyed.
463  *
464  */
465 void trace_array_put(struct trace_array *this_tr)
466 {
467 	if (!this_tr)
468 		return;
469 
470 	mutex_lock(&trace_types_lock);
471 	__trace_array_put(this_tr);
472 	mutex_unlock(&trace_types_lock);
473 }
474 EXPORT_SYMBOL_GPL(trace_array_put);
475 
476 int tracing_check_open_get_tr(struct trace_array *tr)
477 {
478 	int ret;
479 
480 	ret = security_locked_down(LOCKDOWN_TRACEFS);
481 	if (ret)
482 		return ret;
483 
484 	if (tracing_disabled)
485 		return -ENODEV;
486 
487 	if (tr && trace_array_get(tr) < 0)
488 		return -ENODEV;
489 
490 	return 0;
491 }
492 
493 int call_filter_check_discard(struct trace_event_call *call, void *rec,
494 			      struct trace_buffer *buffer,
495 			      struct ring_buffer_event *event)
496 {
497 	if (unlikely(call->flags & TRACE_EVENT_FL_FILTERED) &&
498 	    !filter_match_preds(call->filter, rec)) {
499 		__trace_event_discard_commit(buffer, event);
500 		return 1;
501 	}
502 
503 	return 0;
504 }
505 
506 void trace_free_pid_list(struct trace_pid_list *pid_list)
507 {
508 	vfree(pid_list->pids);
509 	kfree(pid_list);
510 }
511 
512 /**
513  * trace_find_filtered_pid - check if a pid exists in a filtered_pid list
514  * @filtered_pids: The list of pids to check
515  * @search_pid: The PID to find in @filtered_pids
516  *
517  * Returns true if @search_pid is found in @filtered_pids, and false otherwise.
518  */
519 bool
520 trace_find_filtered_pid(struct trace_pid_list *filtered_pids, pid_t search_pid)
521 {
522 	/*
523 	 * If pid_max changed after filtered_pids was created, we
524 	 * by default ignore all pids greater than the previous pid_max.
525 	 */
526 	if (search_pid >= filtered_pids->pid_max)
527 		return false;
528 
529 	return test_bit(search_pid, filtered_pids->pids);
530 }
531 
532 /**
533  * trace_ignore_this_task - should a task be ignored for tracing
534  * @filtered_pids: The list of pids to check
535  * @filtered_no_pids: The list of pids not to be traced
536  * @task: The task that should be ignored if not filtered
537  *
538  * Checks if @task should be traced or not from @filtered_pids.
539  * Returns true if @task should *NOT* be traced.
540  * Returns false if @task should be traced.
541  */
542 bool
543 trace_ignore_this_task(struct trace_pid_list *filtered_pids,
544 		       struct trace_pid_list *filtered_no_pids,
545 		       struct task_struct *task)
546 {
547 	/*
548 	 * If filtered_no_pids is not empty, and the task's pid is listed
549 	 * in filtered_no_pids, then return true.
550 	 * Otherwise, if filtered_pids is empty, that means we can
551 	 * trace all tasks. If it has content, then only trace pids
552 	 * within filtered_pids.
553 	 */
554 
555 	return (filtered_pids &&
556 		!trace_find_filtered_pid(filtered_pids, task->pid)) ||
557 		(filtered_no_pids &&
558 		 trace_find_filtered_pid(filtered_no_pids, task->pid));
559 }
560 
561 /**
562  * trace_filter_add_remove_task - Add or remove a task from a pid_list
563  * @pid_list: The list to modify
564  * @self: The current task for fork or NULL for exit
565  * @task: The task to add or remove
566  *
567  * If adding a task, if @self is defined, the task is only added if @self
568  * is also included in @pid_list. This happens on fork and tasks should
569  * only be added when the parent is listed. If @self is NULL, then the
570  * @task pid will be removed from the list, which would happen on exit
571  * of a task.
572  */
573 void trace_filter_add_remove_task(struct trace_pid_list *pid_list,
574 				  struct task_struct *self,
575 				  struct task_struct *task)
576 {
577 	if (!pid_list)
578 		return;
579 
580 	/* For forks, we only add if the forking task is listed */
581 	if (self) {
582 		if (!trace_find_filtered_pid(pid_list, self->pid))
583 			return;
584 	}
585 
586 	/* Sorry, but we don't support pid_max changing after setting */
587 	if (task->pid >= pid_list->pid_max)
588 		return;
589 
590 	/* "self" is set for forks, and NULL for exits */
591 	if (self)
592 		set_bit(task->pid, pid_list->pids);
593 	else
594 		clear_bit(task->pid, pid_list->pids);
595 }
596 
597 /**
598  * trace_pid_next - Used for seq_file to get to the next pid of a pid_list
599  * @pid_list: The pid list to show
600  * @v: The last pid that was shown (+1 the actual pid to let zero be displayed)
601  * @pos: The position of the file
602  *
603  * This is used by the seq_file "next" operation to iterate the pids
604  * listed in a trace_pid_list structure.
605  *
606  * Returns the pid+1 as we want to display pid of zero, but NULL would
607  * stop the iteration.
608  */
609 void *trace_pid_next(struct trace_pid_list *pid_list, void *v, loff_t *pos)
610 {
611 	unsigned long pid = (unsigned long)v;
612 
613 	(*pos)++;
614 
615 	/* pid already is +1 of the actual previous bit */
616 	pid = find_next_bit(pid_list->pids, pid_list->pid_max, pid);
617 
618 	/* Return pid + 1 to allow zero to be represented */
619 	if (pid < pid_list->pid_max)
620 		return (void *)(pid + 1);
621 
622 	return NULL;
623 }
624 
625 /**
626  * trace_pid_start - Used for seq_file to start reading pid lists
627  * @pid_list: The pid list to show
628  * @pos: The position of the file
629  *
630  * This is used by seq_file "start" operation to start the iteration
631  * of listing pids.
632  *
633  * Returns the pid+1 as we want to display pid of zero, but NULL would
634  * stop the iteration.
635  */
636 void *trace_pid_start(struct trace_pid_list *pid_list, loff_t *pos)
637 {
638 	unsigned long pid;
639 	loff_t l = 0;
640 
641 	pid = find_first_bit(pid_list->pids, pid_list->pid_max);
642 	if (pid >= pid_list->pid_max)
643 		return NULL;
644 
645 	/* Return pid + 1 so that zero can be the exit value */
646 	for (pid++; pid && l < *pos;
647 	     pid = (unsigned long)trace_pid_next(pid_list, (void *)pid, &l))
648 		;
649 	return (void *)pid;
650 }
651 
652 /**
653  * trace_pid_show - show the current pid in seq_file processing
654  * @m: The seq_file structure to write into
655  * @v: A void pointer of the pid (+1) value to display
656  *
657  * Can be directly used by seq_file operations to display the current
658  * pid value.
659  */
660 int trace_pid_show(struct seq_file *m, void *v)
661 {
662 	unsigned long pid = (unsigned long)v - 1;
663 
664 	seq_printf(m, "%lu\n", pid);
665 	return 0;
666 }
667 
668 /* 128 should be much more than enough */
669 #define PID_BUF_SIZE		127
670 
671 int trace_pid_write(struct trace_pid_list *filtered_pids,
672 		    struct trace_pid_list **new_pid_list,
673 		    const char __user *ubuf, size_t cnt)
674 {
675 	struct trace_pid_list *pid_list;
676 	struct trace_parser parser;
677 	unsigned long val;
678 	int nr_pids = 0;
679 	ssize_t read = 0;
680 	ssize_t ret = 0;
681 	loff_t pos;
682 	pid_t pid;
683 
684 	if (trace_parser_get_init(&parser, PID_BUF_SIZE + 1))
685 		return -ENOMEM;
686 
687 	/*
688 	 * Always recreate a new array. The write is an all or nothing
689 	 * operation. Always create a new array when adding new pids by
690 	 * the user. If the operation fails, then the current list is
691 	 * not modified.
692 	 */
693 	pid_list = kmalloc(sizeof(*pid_list), GFP_KERNEL);
694 	if (!pid_list) {
695 		trace_parser_put(&parser);
696 		return -ENOMEM;
697 	}
698 
699 	pid_list->pid_max = READ_ONCE(pid_max);
700 
701 	/* Only truncating will shrink pid_max */
702 	if (filtered_pids && filtered_pids->pid_max > pid_list->pid_max)
703 		pid_list->pid_max = filtered_pids->pid_max;
704 
705 	pid_list->pids = vzalloc((pid_list->pid_max + 7) >> 3);
706 	if (!pid_list->pids) {
707 		trace_parser_put(&parser);
708 		kfree(pid_list);
709 		return -ENOMEM;
710 	}
711 
712 	if (filtered_pids) {
713 		/* copy the current bits to the new max */
714 		for_each_set_bit(pid, filtered_pids->pids,
715 				 filtered_pids->pid_max) {
716 			set_bit(pid, pid_list->pids);
717 			nr_pids++;
718 		}
719 	}
720 
721 	while (cnt > 0) {
722 
723 		pos = 0;
724 
725 		ret = trace_get_user(&parser, ubuf, cnt, &pos);
726 		if (ret < 0 || !trace_parser_loaded(&parser))
727 			break;
728 
729 		read += ret;
730 		ubuf += ret;
731 		cnt -= ret;
732 
733 		ret = -EINVAL;
734 		if (kstrtoul(parser.buffer, 0, &val))
735 			break;
736 		if (val >= pid_list->pid_max)
737 			break;
738 
739 		pid = (pid_t)val;
740 
741 		set_bit(pid, pid_list->pids);
742 		nr_pids++;
743 
744 		trace_parser_clear(&parser);
745 		ret = 0;
746 	}
747 	trace_parser_put(&parser);
748 
749 	if (ret < 0) {
750 		trace_free_pid_list(pid_list);
751 		return ret;
752 	}
753 
754 	if (!nr_pids) {
755 		/* Cleared the list of pids */
756 		trace_free_pid_list(pid_list);
757 		read = ret;
758 		pid_list = NULL;
759 	}
760 
761 	*new_pid_list = pid_list;
762 
763 	return read;
764 }
765 
766 static u64 buffer_ftrace_now(struct array_buffer *buf, int cpu)
767 {
768 	u64 ts;
769 
770 	/* Early boot up does not have a buffer yet */
771 	if (!buf->buffer)
772 		return trace_clock_local();
773 
774 	ts = ring_buffer_time_stamp(buf->buffer);
775 	ring_buffer_normalize_time_stamp(buf->buffer, cpu, &ts);
776 
777 	return ts;
778 }
779 
780 u64 ftrace_now(int cpu)
781 {
782 	return buffer_ftrace_now(&global_trace.array_buffer, cpu);
783 }
784 
785 /**
786  * tracing_is_enabled - Show if global_trace has been enabled
787  *
788  * Shows if the global trace has been enabled or not. It uses the
789  * mirror flag "buffer_disabled" to be used in fast paths such as for
790  * the irqsoff tracer. But it may be inaccurate due to races. If you
791  * need to know the accurate state, use tracing_is_on() which is a little
792  * slower, but accurate.
793  */
794 int tracing_is_enabled(void)
795 {
796 	/*
797 	 * For quick access (irqsoff uses this in fast path), just
798 	 * return the mirror variable of the state of the ring buffer.
799 	 * It's a little racy, but we don't really care.
800 	 */
801 	smp_rmb();
802 	return !global_trace.buffer_disabled;
803 }
804 
805 /*
806  * trace_buf_size is the size in bytes that is allocated
807  * for a buffer. Note, the number of bytes is always rounded
808  * to page size.
809  *
810  * This number is purposely set to a low number of 16384.
811  * If the dump on oops happens, it will be much appreciated
812  * to not have to wait for all that output. Anyway this can be
813  * boot time and run time configurable.
814  */
815 #define TRACE_BUF_SIZE_DEFAULT	1441792UL /* 16384 * 88 (sizeof(entry)) */
816 
817 static unsigned long		trace_buf_size = TRACE_BUF_SIZE_DEFAULT;
818 
819 /* trace_types holds a link list of available tracers. */
820 static struct tracer		*trace_types __read_mostly;
821 
822 /*
823  * trace_types_lock is used to protect the trace_types list.
824  */
825 DEFINE_MUTEX(trace_types_lock);
826 
827 /*
828  * serialize the access of the ring buffer
829  *
830  * ring buffer serializes readers, but it is low level protection.
831  * The validity of the events (which returns by ring_buffer_peek() ..etc)
832  * are not protected by ring buffer.
833  *
834  * The content of events may become garbage if we allow other process consumes
835  * these events concurrently:
836  *   A) the page of the consumed events may become a normal page
837  *      (not reader page) in ring buffer, and this page will be rewritten
838  *      by events producer.
839  *   B) The page of the consumed events may become a page for splice_read,
840  *      and this page will be returned to system.
841  *
842  * These primitives allow multi process access to different cpu ring buffer
843  * concurrently.
844  *
845  * These primitives don't distinguish read-only and read-consume access.
846  * Multi read-only access are also serialized.
847  */
848 
849 #ifdef CONFIG_SMP
850 static DECLARE_RWSEM(all_cpu_access_lock);
851 static DEFINE_PER_CPU(struct mutex, cpu_access_lock);
852 
853 static inline void trace_access_lock(int cpu)
854 {
855 	if (cpu == RING_BUFFER_ALL_CPUS) {
856 		/* gain it for accessing the whole ring buffer. */
857 		down_write(&all_cpu_access_lock);
858 	} else {
859 		/* gain it for accessing a cpu ring buffer. */
860 
861 		/* Firstly block other trace_access_lock(RING_BUFFER_ALL_CPUS). */
862 		down_read(&all_cpu_access_lock);
863 
864 		/* Secondly block other access to this @cpu ring buffer. */
865 		mutex_lock(&per_cpu(cpu_access_lock, cpu));
866 	}
867 }
868 
869 static inline void trace_access_unlock(int cpu)
870 {
871 	if (cpu == RING_BUFFER_ALL_CPUS) {
872 		up_write(&all_cpu_access_lock);
873 	} else {
874 		mutex_unlock(&per_cpu(cpu_access_lock, cpu));
875 		up_read(&all_cpu_access_lock);
876 	}
877 }
878 
879 static inline void trace_access_lock_init(void)
880 {
881 	int cpu;
882 
883 	for_each_possible_cpu(cpu)
884 		mutex_init(&per_cpu(cpu_access_lock, cpu));
885 }
886 
887 #else
888 
889 static DEFINE_MUTEX(access_lock);
890 
891 static inline void trace_access_lock(int cpu)
892 {
893 	(void)cpu;
894 	mutex_lock(&access_lock);
895 }
896 
897 static inline void trace_access_unlock(int cpu)
898 {
899 	(void)cpu;
900 	mutex_unlock(&access_lock);
901 }
902 
903 static inline void trace_access_lock_init(void)
904 {
905 }
906 
907 #endif
908 
909 #ifdef CONFIG_STACKTRACE
910 static void __ftrace_trace_stack(struct trace_buffer *buffer,
911 				 unsigned int trace_ctx,
912 				 int skip, struct pt_regs *regs);
913 static inline void ftrace_trace_stack(struct trace_array *tr,
914 				      struct trace_buffer *buffer,
915 				      unsigned int trace_ctx,
916 				      int skip, struct pt_regs *regs);
917 
918 #else
919 static inline void __ftrace_trace_stack(struct trace_buffer *buffer,
920 					unsigned int trace_ctx,
921 					int skip, struct pt_regs *regs)
922 {
923 }
924 static inline void ftrace_trace_stack(struct trace_array *tr,
925 				      struct trace_buffer *buffer,
926 				      unsigned long trace_ctx,
927 				      int skip, struct pt_regs *regs)
928 {
929 }
930 
931 #endif
932 
933 static __always_inline void
934 trace_event_setup(struct ring_buffer_event *event,
935 		  int type, unsigned int trace_ctx)
936 {
937 	struct trace_entry *ent = ring_buffer_event_data(event);
938 
939 	tracing_generic_entry_update(ent, type, trace_ctx);
940 }
941 
942 static __always_inline struct ring_buffer_event *
943 __trace_buffer_lock_reserve(struct trace_buffer *buffer,
944 			  int type,
945 			  unsigned long len,
946 			  unsigned int trace_ctx)
947 {
948 	struct ring_buffer_event *event;
949 
950 	event = ring_buffer_lock_reserve(buffer, len);
951 	if (event != NULL)
952 		trace_event_setup(event, type, trace_ctx);
953 
954 	return event;
955 }
956 
957 void tracer_tracing_on(struct trace_array *tr)
958 {
959 	if (tr->array_buffer.buffer)
960 		ring_buffer_record_on(tr->array_buffer.buffer);
961 	/*
962 	 * This flag is looked at when buffers haven't been allocated
963 	 * yet, or by some tracers (like irqsoff), that just want to
964 	 * know if the ring buffer has been disabled, but it can handle
965 	 * races of where it gets disabled but we still do a record.
966 	 * As the check is in the fast path of the tracers, it is more
967 	 * important to be fast than accurate.
968 	 */
969 	tr->buffer_disabled = 0;
970 	/* Make the flag seen by readers */
971 	smp_wmb();
972 }
973 
974 /**
975  * tracing_on - enable tracing buffers
976  *
977  * This function enables tracing buffers that may have been
978  * disabled with tracing_off.
979  */
980 void tracing_on(void)
981 {
982 	tracer_tracing_on(&global_trace);
983 }
984 EXPORT_SYMBOL_GPL(tracing_on);
985 
986 
987 static __always_inline void
988 __buffer_unlock_commit(struct trace_buffer *buffer, struct ring_buffer_event *event)
989 {
990 	__this_cpu_write(trace_taskinfo_save, true);
991 
992 	/* If this is the temp buffer, we need to commit fully */
993 	if (this_cpu_read(trace_buffered_event) == event) {
994 		/* Length is in event->array[0] */
995 		ring_buffer_write(buffer, event->array[0], &event->array[1]);
996 		/* Release the temp buffer */
997 		this_cpu_dec(trace_buffered_event_cnt);
998 	} else
999 		ring_buffer_unlock_commit(buffer, event);
1000 }
1001 
1002 /**
1003  * __trace_puts - write a constant string into the trace buffer.
1004  * @ip:	   The address of the caller
1005  * @str:   The constant string to write
1006  * @size:  The size of the string.
1007  */
1008 int __trace_puts(unsigned long ip, const char *str, int size)
1009 {
1010 	struct ring_buffer_event *event;
1011 	struct trace_buffer *buffer;
1012 	struct print_entry *entry;
1013 	unsigned int trace_ctx;
1014 	int alloc;
1015 
1016 	if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
1017 		return 0;
1018 
1019 	if (unlikely(tracing_selftest_running || tracing_disabled))
1020 		return 0;
1021 
1022 	alloc = sizeof(*entry) + size + 2; /* possible \n added */
1023 
1024 	trace_ctx = tracing_gen_ctx();
1025 	buffer = global_trace.array_buffer.buffer;
1026 	ring_buffer_nest_start(buffer);
1027 	event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, alloc,
1028 					    trace_ctx);
1029 	if (!event) {
1030 		size = 0;
1031 		goto out;
1032 	}
1033 
1034 	entry = ring_buffer_event_data(event);
1035 	entry->ip = ip;
1036 
1037 	memcpy(&entry->buf, str, size);
1038 
1039 	/* Add a newline if necessary */
1040 	if (entry->buf[size - 1] != '\n') {
1041 		entry->buf[size] = '\n';
1042 		entry->buf[size + 1] = '\0';
1043 	} else
1044 		entry->buf[size] = '\0';
1045 
1046 	__buffer_unlock_commit(buffer, event);
1047 	ftrace_trace_stack(&global_trace, buffer, trace_ctx, 4, NULL);
1048  out:
1049 	ring_buffer_nest_end(buffer);
1050 	return size;
1051 }
1052 EXPORT_SYMBOL_GPL(__trace_puts);
1053 
1054 /**
1055  * __trace_bputs - write the pointer to a constant string into trace buffer
1056  * @ip:	   The address of the caller
1057  * @str:   The constant string to write to the buffer to
1058  */
1059 int __trace_bputs(unsigned long ip, const char *str)
1060 {
1061 	struct ring_buffer_event *event;
1062 	struct trace_buffer *buffer;
1063 	struct bputs_entry *entry;
1064 	unsigned int trace_ctx;
1065 	int size = sizeof(struct bputs_entry);
1066 	int ret = 0;
1067 
1068 	if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
1069 		return 0;
1070 
1071 	if (unlikely(tracing_selftest_running || tracing_disabled))
1072 		return 0;
1073 
1074 	trace_ctx = tracing_gen_ctx();
1075 	buffer = global_trace.array_buffer.buffer;
1076 
1077 	ring_buffer_nest_start(buffer);
1078 	event = __trace_buffer_lock_reserve(buffer, TRACE_BPUTS, size,
1079 					    trace_ctx);
1080 	if (!event)
1081 		goto out;
1082 
1083 	entry = ring_buffer_event_data(event);
1084 	entry->ip			= ip;
1085 	entry->str			= str;
1086 
1087 	__buffer_unlock_commit(buffer, event);
1088 	ftrace_trace_stack(&global_trace, buffer, trace_ctx, 4, NULL);
1089 
1090 	ret = 1;
1091  out:
1092 	ring_buffer_nest_end(buffer);
1093 	return ret;
1094 }
1095 EXPORT_SYMBOL_GPL(__trace_bputs);
1096 
1097 #ifdef CONFIG_TRACER_SNAPSHOT
1098 static void tracing_snapshot_instance_cond(struct trace_array *tr,
1099 					   void *cond_data)
1100 {
1101 	struct tracer *tracer = tr->current_trace;
1102 	unsigned long flags;
1103 
1104 	if (in_nmi()) {
1105 		internal_trace_puts("*** SNAPSHOT CALLED FROM NMI CONTEXT ***\n");
1106 		internal_trace_puts("*** snapshot is being ignored        ***\n");
1107 		return;
1108 	}
1109 
1110 	if (!tr->allocated_snapshot) {
1111 		internal_trace_puts("*** SNAPSHOT NOT ALLOCATED ***\n");
1112 		internal_trace_puts("*** stopping trace here!   ***\n");
1113 		tracing_off();
1114 		return;
1115 	}
1116 
1117 	/* Note, snapshot can not be used when the tracer uses it */
1118 	if (tracer->use_max_tr) {
1119 		internal_trace_puts("*** LATENCY TRACER ACTIVE ***\n");
1120 		internal_trace_puts("*** Can not use snapshot (sorry) ***\n");
1121 		return;
1122 	}
1123 
1124 	local_irq_save(flags);
1125 	update_max_tr(tr, current, smp_processor_id(), cond_data);
1126 	local_irq_restore(flags);
1127 }
1128 
1129 void tracing_snapshot_instance(struct trace_array *tr)
1130 {
1131 	tracing_snapshot_instance_cond(tr, NULL);
1132 }
1133 
1134 /**
1135  * tracing_snapshot - take a snapshot of the current buffer.
1136  *
1137  * This causes a swap between the snapshot buffer and the current live
1138  * tracing buffer. You can use this to take snapshots of the live
1139  * trace when some condition is triggered, but continue to trace.
1140  *
1141  * Note, make sure to allocate the snapshot with either
1142  * a tracing_snapshot_alloc(), or by doing it manually
1143  * with: echo 1 > /sys/kernel/debug/tracing/snapshot
1144  *
1145  * If the snapshot buffer is not allocated, it will stop tracing.
1146  * Basically making a permanent snapshot.
1147  */
1148 void tracing_snapshot(void)
1149 {
1150 	struct trace_array *tr = &global_trace;
1151 
1152 	tracing_snapshot_instance(tr);
1153 }
1154 EXPORT_SYMBOL_GPL(tracing_snapshot);
1155 
1156 /**
1157  * tracing_snapshot_cond - conditionally take a snapshot of the current buffer.
1158  * @tr:		The tracing instance to snapshot
1159  * @cond_data:	The data to be tested conditionally, and possibly saved
1160  *
1161  * This is the same as tracing_snapshot() except that the snapshot is
1162  * conditional - the snapshot will only happen if the
1163  * cond_snapshot.update() implementation receiving the cond_data
1164  * returns true, which means that the trace array's cond_snapshot
1165  * update() operation used the cond_data to determine whether the
1166  * snapshot should be taken, and if it was, presumably saved it along
1167  * with the snapshot.
1168  */
1169 void tracing_snapshot_cond(struct trace_array *tr, void *cond_data)
1170 {
1171 	tracing_snapshot_instance_cond(tr, cond_data);
1172 }
1173 EXPORT_SYMBOL_GPL(tracing_snapshot_cond);
1174 
1175 /**
1176  * tracing_snapshot_cond_data - get the user data associated with a snapshot
1177  * @tr:		The tracing instance
1178  *
1179  * When the user enables a conditional snapshot using
1180  * tracing_snapshot_cond_enable(), the user-defined cond_data is saved
1181  * with the snapshot.  This accessor is used to retrieve it.
1182  *
1183  * Should not be called from cond_snapshot.update(), since it takes
1184  * the tr->max_lock lock, which the code calling
1185  * cond_snapshot.update() has already done.
1186  *
1187  * Returns the cond_data associated with the trace array's snapshot.
1188  */
1189 void *tracing_cond_snapshot_data(struct trace_array *tr)
1190 {
1191 	void *cond_data = NULL;
1192 
1193 	arch_spin_lock(&tr->max_lock);
1194 
1195 	if (tr->cond_snapshot)
1196 		cond_data = tr->cond_snapshot->cond_data;
1197 
1198 	arch_spin_unlock(&tr->max_lock);
1199 
1200 	return cond_data;
1201 }
1202 EXPORT_SYMBOL_GPL(tracing_cond_snapshot_data);
1203 
1204 static int resize_buffer_duplicate_size(struct array_buffer *trace_buf,
1205 					struct array_buffer *size_buf, int cpu_id);
1206 static void set_buffer_entries(struct array_buffer *buf, unsigned long val);
1207 
1208 int tracing_alloc_snapshot_instance(struct trace_array *tr)
1209 {
1210 	int ret;
1211 
1212 	if (!tr->allocated_snapshot) {
1213 
1214 		/* allocate spare buffer */
1215 		ret = resize_buffer_duplicate_size(&tr->max_buffer,
1216 				   &tr->array_buffer, RING_BUFFER_ALL_CPUS);
1217 		if (ret < 0)
1218 			return ret;
1219 
1220 		tr->allocated_snapshot = true;
1221 	}
1222 
1223 	return 0;
1224 }
1225 
1226 static void free_snapshot(struct trace_array *tr)
1227 {
1228 	/*
1229 	 * We don't free the ring buffer. instead, resize it because
1230 	 * The max_tr ring buffer has some state (e.g. ring->clock) and
1231 	 * we want preserve it.
1232 	 */
1233 	ring_buffer_resize(tr->max_buffer.buffer, 1, RING_BUFFER_ALL_CPUS);
1234 	set_buffer_entries(&tr->max_buffer, 1);
1235 	tracing_reset_online_cpus(&tr->max_buffer);
1236 	tr->allocated_snapshot = false;
1237 }
1238 
1239 /**
1240  * tracing_alloc_snapshot - allocate snapshot buffer.
1241  *
1242  * This only allocates the snapshot buffer if it isn't already
1243  * allocated - it doesn't also take a snapshot.
1244  *
1245  * This is meant to be used in cases where the snapshot buffer needs
1246  * to be set up for events that can't sleep but need to be able to
1247  * trigger a snapshot.
1248  */
1249 int tracing_alloc_snapshot(void)
1250 {
1251 	struct trace_array *tr = &global_trace;
1252 	int ret;
1253 
1254 	ret = tracing_alloc_snapshot_instance(tr);
1255 	WARN_ON(ret < 0);
1256 
1257 	return ret;
1258 }
1259 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
1260 
1261 /**
1262  * tracing_snapshot_alloc - allocate and take a snapshot of the current buffer.
1263  *
1264  * This is similar to tracing_snapshot(), but it will allocate the
1265  * snapshot buffer if it isn't already allocated. Use this only
1266  * where it is safe to sleep, as the allocation may sleep.
1267  *
1268  * This causes a swap between the snapshot buffer and the current live
1269  * tracing buffer. You can use this to take snapshots of the live
1270  * trace when some condition is triggered, but continue to trace.
1271  */
1272 void tracing_snapshot_alloc(void)
1273 {
1274 	int ret;
1275 
1276 	ret = tracing_alloc_snapshot();
1277 	if (ret < 0)
1278 		return;
1279 
1280 	tracing_snapshot();
1281 }
1282 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
1283 
1284 /**
1285  * tracing_snapshot_cond_enable - enable conditional snapshot for an instance
1286  * @tr:		The tracing instance
1287  * @cond_data:	User data to associate with the snapshot
1288  * @update:	Implementation of the cond_snapshot update function
1289  *
1290  * Check whether the conditional snapshot for the given instance has
1291  * already been enabled, or if the current tracer is already using a
1292  * snapshot; if so, return -EBUSY, else create a cond_snapshot and
1293  * save the cond_data and update function inside.
1294  *
1295  * Returns 0 if successful, error otherwise.
1296  */
1297 int tracing_snapshot_cond_enable(struct trace_array *tr, void *cond_data,
1298 				 cond_update_fn_t update)
1299 {
1300 	struct cond_snapshot *cond_snapshot;
1301 	int ret = 0;
1302 
1303 	cond_snapshot = kzalloc(sizeof(*cond_snapshot), GFP_KERNEL);
1304 	if (!cond_snapshot)
1305 		return -ENOMEM;
1306 
1307 	cond_snapshot->cond_data = cond_data;
1308 	cond_snapshot->update = update;
1309 
1310 	mutex_lock(&trace_types_lock);
1311 
1312 	ret = tracing_alloc_snapshot_instance(tr);
1313 	if (ret)
1314 		goto fail_unlock;
1315 
1316 	if (tr->current_trace->use_max_tr) {
1317 		ret = -EBUSY;
1318 		goto fail_unlock;
1319 	}
1320 
1321 	/*
1322 	 * The cond_snapshot can only change to NULL without the
1323 	 * trace_types_lock. We don't care if we race with it going
1324 	 * to NULL, but we want to make sure that it's not set to
1325 	 * something other than NULL when we get here, which we can
1326 	 * do safely with only holding the trace_types_lock and not
1327 	 * having to take the max_lock.
1328 	 */
1329 	if (tr->cond_snapshot) {
1330 		ret = -EBUSY;
1331 		goto fail_unlock;
1332 	}
1333 
1334 	arch_spin_lock(&tr->max_lock);
1335 	tr->cond_snapshot = cond_snapshot;
1336 	arch_spin_unlock(&tr->max_lock);
1337 
1338 	mutex_unlock(&trace_types_lock);
1339 
1340 	return ret;
1341 
1342  fail_unlock:
1343 	mutex_unlock(&trace_types_lock);
1344 	kfree(cond_snapshot);
1345 	return ret;
1346 }
1347 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_enable);
1348 
1349 /**
1350  * tracing_snapshot_cond_disable - disable conditional snapshot for an instance
1351  * @tr:		The tracing instance
1352  *
1353  * Check whether the conditional snapshot for the given instance is
1354  * enabled; if so, free the cond_snapshot associated with it,
1355  * otherwise return -EINVAL.
1356  *
1357  * Returns 0 if successful, error otherwise.
1358  */
1359 int tracing_snapshot_cond_disable(struct trace_array *tr)
1360 {
1361 	int ret = 0;
1362 
1363 	arch_spin_lock(&tr->max_lock);
1364 
1365 	if (!tr->cond_snapshot)
1366 		ret = -EINVAL;
1367 	else {
1368 		kfree(tr->cond_snapshot);
1369 		tr->cond_snapshot = NULL;
1370 	}
1371 
1372 	arch_spin_unlock(&tr->max_lock);
1373 
1374 	return ret;
1375 }
1376 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_disable);
1377 #else
1378 void tracing_snapshot(void)
1379 {
1380 	WARN_ONCE(1, "Snapshot feature not enabled, but internal snapshot used");
1381 }
1382 EXPORT_SYMBOL_GPL(tracing_snapshot);
1383 void tracing_snapshot_cond(struct trace_array *tr, void *cond_data)
1384 {
1385 	WARN_ONCE(1, "Snapshot feature not enabled, but internal conditional snapshot used");
1386 }
1387 EXPORT_SYMBOL_GPL(tracing_snapshot_cond);
1388 int tracing_alloc_snapshot(void)
1389 {
1390 	WARN_ONCE(1, "Snapshot feature not enabled, but snapshot allocation used");
1391 	return -ENODEV;
1392 }
1393 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
1394 void tracing_snapshot_alloc(void)
1395 {
1396 	/* Give warning */
1397 	tracing_snapshot();
1398 }
1399 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
1400 void *tracing_cond_snapshot_data(struct trace_array *tr)
1401 {
1402 	return NULL;
1403 }
1404 EXPORT_SYMBOL_GPL(tracing_cond_snapshot_data);
1405 int tracing_snapshot_cond_enable(struct trace_array *tr, void *cond_data, cond_update_fn_t update)
1406 {
1407 	return -ENODEV;
1408 }
1409 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_enable);
1410 int tracing_snapshot_cond_disable(struct trace_array *tr)
1411 {
1412 	return false;
1413 }
1414 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_disable);
1415 #endif /* CONFIG_TRACER_SNAPSHOT */
1416 
1417 void tracer_tracing_off(struct trace_array *tr)
1418 {
1419 	if (tr->array_buffer.buffer)
1420 		ring_buffer_record_off(tr->array_buffer.buffer);
1421 	/*
1422 	 * This flag is looked at when buffers haven't been allocated
1423 	 * yet, or by some tracers (like irqsoff), that just want to
1424 	 * know if the ring buffer has been disabled, but it can handle
1425 	 * races of where it gets disabled but we still do a record.
1426 	 * As the check is in the fast path of the tracers, it is more
1427 	 * important to be fast than accurate.
1428 	 */
1429 	tr->buffer_disabled = 1;
1430 	/* Make the flag seen by readers */
1431 	smp_wmb();
1432 }
1433 
1434 /**
1435  * tracing_off - turn off tracing buffers
1436  *
1437  * This function stops the tracing buffers from recording data.
1438  * It does not disable any overhead the tracers themselves may
1439  * be causing. This function simply causes all recording to
1440  * the ring buffers to fail.
1441  */
1442 void tracing_off(void)
1443 {
1444 	tracer_tracing_off(&global_trace);
1445 }
1446 EXPORT_SYMBOL_GPL(tracing_off);
1447 
1448 void disable_trace_on_warning(void)
1449 {
1450 	if (__disable_trace_on_warning) {
1451 		trace_array_printk_buf(global_trace.array_buffer.buffer, _THIS_IP_,
1452 			"Disabling tracing due to warning\n");
1453 		tracing_off();
1454 	}
1455 }
1456 
1457 /**
1458  * tracer_tracing_is_on - show real state of ring buffer enabled
1459  * @tr : the trace array to know if ring buffer is enabled
1460  *
1461  * Shows real state of the ring buffer if it is enabled or not.
1462  */
1463 bool tracer_tracing_is_on(struct trace_array *tr)
1464 {
1465 	if (tr->array_buffer.buffer)
1466 		return ring_buffer_record_is_on(tr->array_buffer.buffer);
1467 	return !tr->buffer_disabled;
1468 }
1469 
1470 /**
1471  * tracing_is_on - show state of ring buffers enabled
1472  */
1473 int tracing_is_on(void)
1474 {
1475 	return tracer_tracing_is_on(&global_trace);
1476 }
1477 EXPORT_SYMBOL_GPL(tracing_is_on);
1478 
1479 static int __init set_buf_size(char *str)
1480 {
1481 	unsigned long buf_size;
1482 
1483 	if (!str)
1484 		return 0;
1485 	buf_size = memparse(str, &str);
1486 	/* nr_entries can not be zero */
1487 	if (buf_size == 0)
1488 		return 0;
1489 	trace_buf_size = buf_size;
1490 	return 1;
1491 }
1492 __setup("trace_buf_size=", set_buf_size);
1493 
1494 static int __init set_tracing_thresh(char *str)
1495 {
1496 	unsigned long threshold;
1497 	int ret;
1498 
1499 	if (!str)
1500 		return 0;
1501 	ret = kstrtoul(str, 0, &threshold);
1502 	if (ret < 0)
1503 		return 0;
1504 	tracing_thresh = threshold * 1000;
1505 	return 1;
1506 }
1507 __setup("tracing_thresh=", set_tracing_thresh);
1508 
1509 unsigned long nsecs_to_usecs(unsigned long nsecs)
1510 {
1511 	return nsecs / 1000;
1512 }
1513 
1514 /*
1515  * TRACE_FLAGS is defined as a tuple matching bit masks with strings.
1516  * It uses C(a, b) where 'a' is the eval (enum) name and 'b' is the string that
1517  * matches it. By defining "C(a, b) b", TRACE_FLAGS becomes a list
1518  * of strings in the order that the evals (enum) were defined.
1519  */
1520 #undef C
1521 #define C(a, b) b
1522 
1523 /* These must match the bit positions in trace_iterator_flags */
1524 static const char *trace_options[] = {
1525 	TRACE_FLAGS
1526 	NULL
1527 };
1528 
1529 static struct {
1530 	u64 (*func)(void);
1531 	const char *name;
1532 	int in_ns;		/* is this clock in nanoseconds? */
1533 } trace_clocks[] = {
1534 	{ trace_clock_local,		"local",	1 },
1535 	{ trace_clock_global,		"global",	1 },
1536 	{ trace_clock_counter,		"counter",	0 },
1537 	{ trace_clock_jiffies,		"uptime",	0 },
1538 	{ trace_clock,			"perf",		1 },
1539 	{ ktime_get_mono_fast_ns,	"mono",		1 },
1540 	{ ktime_get_raw_fast_ns,	"mono_raw",	1 },
1541 	{ ktime_get_boot_fast_ns,	"boot",		1 },
1542 	ARCH_TRACE_CLOCKS
1543 };
1544 
1545 bool trace_clock_in_ns(struct trace_array *tr)
1546 {
1547 	if (trace_clocks[tr->clock_id].in_ns)
1548 		return true;
1549 
1550 	return false;
1551 }
1552 
1553 /*
1554  * trace_parser_get_init - gets the buffer for trace parser
1555  */
1556 int trace_parser_get_init(struct trace_parser *parser, int size)
1557 {
1558 	memset(parser, 0, sizeof(*parser));
1559 
1560 	parser->buffer = kmalloc(size, GFP_KERNEL);
1561 	if (!parser->buffer)
1562 		return 1;
1563 
1564 	parser->size = size;
1565 	return 0;
1566 }
1567 
1568 /*
1569  * trace_parser_put - frees the buffer for trace parser
1570  */
1571 void trace_parser_put(struct trace_parser *parser)
1572 {
1573 	kfree(parser->buffer);
1574 	parser->buffer = NULL;
1575 }
1576 
1577 /*
1578  * trace_get_user - reads the user input string separated by  space
1579  * (matched by isspace(ch))
1580  *
1581  * For each string found the 'struct trace_parser' is updated,
1582  * and the function returns.
1583  *
1584  * Returns number of bytes read.
1585  *
1586  * See kernel/trace/trace.h for 'struct trace_parser' details.
1587  */
1588 int trace_get_user(struct trace_parser *parser, const char __user *ubuf,
1589 	size_t cnt, loff_t *ppos)
1590 {
1591 	char ch;
1592 	size_t read = 0;
1593 	ssize_t ret;
1594 
1595 	if (!*ppos)
1596 		trace_parser_clear(parser);
1597 
1598 	ret = get_user(ch, ubuf++);
1599 	if (ret)
1600 		goto out;
1601 
1602 	read++;
1603 	cnt--;
1604 
1605 	/*
1606 	 * The parser is not finished with the last write,
1607 	 * continue reading the user input without skipping spaces.
1608 	 */
1609 	if (!parser->cont) {
1610 		/* skip white space */
1611 		while (cnt && isspace(ch)) {
1612 			ret = get_user(ch, ubuf++);
1613 			if (ret)
1614 				goto out;
1615 			read++;
1616 			cnt--;
1617 		}
1618 
1619 		parser->idx = 0;
1620 
1621 		/* only spaces were written */
1622 		if (isspace(ch) || !ch) {
1623 			*ppos += read;
1624 			ret = read;
1625 			goto out;
1626 		}
1627 	}
1628 
1629 	/* read the non-space input */
1630 	while (cnt && !isspace(ch) && ch) {
1631 		if (parser->idx < parser->size - 1)
1632 			parser->buffer[parser->idx++] = ch;
1633 		else {
1634 			ret = -EINVAL;
1635 			goto out;
1636 		}
1637 		ret = get_user(ch, ubuf++);
1638 		if (ret)
1639 			goto out;
1640 		read++;
1641 		cnt--;
1642 	}
1643 
1644 	/* We either got finished input or we have to wait for another call. */
1645 	if (isspace(ch) || !ch) {
1646 		parser->buffer[parser->idx] = 0;
1647 		parser->cont = false;
1648 	} else if (parser->idx < parser->size - 1) {
1649 		parser->cont = true;
1650 		parser->buffer[parser->idx++] = ch;
1651 		/* Make sure the parsed string always terminates with '\0'. */
1652 		parser->buffer[parser->idx] = 0;
1653 	} else {
1654 		ret = -EINVAL;
1655 		goto out;
1656 	}
1657 
1658 	*ppos += read;
1659 	ret = read;
1660 
1661 out:
1662 	return ret;
1663 }
1664 
1665 /* TODO add a seq_buf_to_buffer() */
1666 static ssize_t trace_seq_to_buffer(struct trace_seq *s, void *buf, size_t cnt)
1667 {
1668 	int len;
1669 
1670 	if (trace_seq_used(s) <= s->seq.readpos)
1671 		return -EBUSY;
1672 
1673 	len = trace_seq_used(s) - s->seq.readpos;
1674 	if (cnt > len)
1675 		cnt = len;
1676 	memcpy(buf, s->buffer + s->seq.readpos, cnt);
1677 
1678 	s->seq.readpos += cnt;
1679 	return cnt;
1680 }
1681 
1682 unsigned long __read_mostly	tracing_thresh;
1683 static const struct file_operations tracing_max_lat_fops;
1684 
1685 #if (defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)) && \
1686 	defined(CONFIG_FSNOTIFY)
1687 
1688 static struct workqueue_struct *fsnotify_wq;
1689 
1690 static void latency_fsnotify_workfn(struct work_struct *work)
1691 {
1692 	struct trace_array *tr = container_of(work, struct trace_array,
1693 					      fsnotify_work);
1694 	fsnotify_inode(tr->d_max_latency->d_inode, FS_MODIFY);
1695 }
1696 
1697 static void latency_fsnotify_workfn_irq(struct irq_work *iwork)
1698 {
1699 	struct trace_array *tr = container_of(iwork, struct trace_array,
1700 					      fsnotify_irqwork);
1701 	queue_work(fsnotify_wq, &tr->fsnotify_work);
1702 }
1703 
1704 static void trace_create_maxlat_file(struct trace_array *tr,
1705 				     struct dentry *d_tracer)
1706 {
1707 	INIT_WORK(&tr->fsnotify_work, latency_fsnotify_workfn);
1708 	init_irq_work(&tr->fsnotify_irqwork, latency_fsnotify_workfn_irq);
1709 	tr->d_max_latency = trace_create_file("tracing_max_latency", 0644,
1710 					      d_tracer, &tr->max_latency,
1711 					      &tracing_max_lat_fops);
1712 }
1713 
1714 __init static int latency_fsnotify_init(void)
1715 {
1716 	fsnotify_wq = alloc_workqueue("tr_max_lat_wq",
1717 				      WQ_UNBOUND | WQ_HIGHPRI, 0);
1718 	if (!fsnotify_wq) {
1719 		pr_err("Unable to allocate tr_max_lat_wq\n");
1720 		return -ENOMEM;
1721 	}
1722 	return 0;
1723 }
1724 
1725 late_initcall_sync(latency_fsnotify_init);
1726 
1727 void latency_fsnotify(struct trace_array *tr)
1728 {
1729 	if (!fsnotify_wq)
1730 		return;
1731 	/*
1732 	 * We cannot call queue_work(&tr->fsnotify_work) from here because it's
1733 	 * possible that we are called from __schedule() or do_idle(), which
1734 	 * could cause a deadlock.
1735 	 */
1736 	irq_work_queue(&tr->fsnotify_irqwork);
1737 }
1738 
1739 /*
1740  * (defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)) && \
1741  *  defined(CONFIG_FSNOTIFY)
1742  */
1743 #else
1744 
1745 #define trace_create_maxlat_file(tr, d_tracer)				\
1746 	trace_create_file("tracing_max_latency", 0644, d_tracer,	\
1747 			  &tr->max_latency, &tracing_max_lat_fops)
1748 
1749 #endif
1750 
1751 #ifdef CONFIG_TRACER_MAX_TRACE
1752 /*
1753  * Copy the new maximum trace into the separate maximum-trace
1754  * structure. (this way the maximum trace is permanently saved,
1755  * for later retrieval via /sys/kernel/tracing/tracing_max_latency)
1756  */
1757 static void
1758 __update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
1759 {
1760 	struct array_buffer *trace_buf = &tr->array_buffer;
1761 	struct array_buffer *max_buf = &tr->max_buffer;
1762 	struct trace_array_cpu *data = per_cpu_ptr(trace_buf->data, cpu);
1763 	struct trace_array_cpu *max_data = per_cpu_ptr(max_buf->data, cpu);
1764 
1765 	max_buf->cpu = cpu;
1766 	max_buf->time_start = data->preempt_timestamp;
1767 
1768 	max_data->saved_latency = tr->max_latency;
1769 	max_data->critical_start = data->critical_start;
1770 	max_data->critical_end = data->critical_end;
1771 
1772 	strncpy(max_data->comm, tsk->comm, TASK_COMM_LEN);
1773 	max_data->pid = tsk->pid;
1774 	/*
1775 	 * If tsk == current, then use current_uid(), as that does not use
1776 	 * RCU. The irq tracer can be called out of RCU scope.
1777 	 */
1778 	if (tsk == current)
1779 		max_data->uid = current_uid();
1780 	else
1781 		max_data->uid = task_uid(tsk);
1782 
1783 	max_data->nice = tsk->static_prio - 20 - MAX_RT_PRIO;
1784 	max_data->policy = tsk->policy;
1785 	max_data->rt_priority = tsk->rt_priority;
1786 
1787 	/* record this tasks comm */
1788 	tracing_record_cmdline(tsk);
1789 	latency_fsnotify(tr);
1790 }
1791 
1792 /**
1793  * update_max_tr - snapshot all trace buffers from global_trace to max_tr
1794  * @tr: tracer
1795  * @tsk: the task with the latency
1796  * @cpu: The cpu that initiated the trace.
1797  * @cond_data: User data associated with a conditional snapshot
1798  *
1799  * Flip the buffers between the @tr and the max_tr and record information
1800  * about which task was the cause of this latency.
1801  */
1802 void
1803 update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu,
1804 	      void *cond_data)
1805 {
1806 	if (tr->stop_count)
1807 		return;
1808 
1809 	WARN_ON_ONCE(!irqs_disabled());
1810 
1811 	if (!tr->allocated_snapshot) {
1812 		/* Only the nop tracer should hit this when disabling */
1813 		WARN_ON_ONCE(tr->current_trace != &nop_trace);
1814 		return;
1815 	}
1816 
1817 	arch_spin_lock(&tr->max_lock);
1818 
1819 	/* Inherit the recordable setting from array_buffer */
1820 	if (ring_buffer_record_is_set_on(tr->array_buffer.buffer))
1821 		ring_buffer_record_on(tr->max_buffer.buffer);
1822 	else
1823 		ring_buffer_record_off(tr->max_buffer.buffer);
1824 
1825 #ifdef CONFIG_TRACER_SNAPSHOT
1826 	if (tr->cond_snapshot && !tr->cond_snapshot->update(tr, cond_data))
1827 		goto out_unlock;
1828 #endif
1829 	swap(tr->array_buffer.buffer, tr->max_buffer.buffer);
1830 
1831 	__update_max_tr(tr, tsk, cpu);
1832 
1833  out_unlock:
1834 	arch_spin_unlock(&tr->max_lock);
1835 }
1836 
1837 /**
1838  * update_max_tr_single - only copy one trace over, and reset the rest
1839  * @tr: tracer
1840  * @tsk: task with the latency
1841  * @cpu: the cpu of the buffer to copy.
1842  *
1843  * Flip the trace of a single CPU buffer between the @tr and the max_tr.
1844  */
1845 void
1846 update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu)
1847 {
1848 	int ret;
1849 
1850 	if (tr->stop_count)
1851 		return;
1852 
1853 	WARN_ON_ONCE(!irqs_disabled());
1854 	if (!tr->allocated_snapshot) {
1855 		/* Only the nop tracer should hit this when disabling */
1856 		WARN_ON_ONCE(tr->current_trace != &nop_trace);
1857 		return;
1858 	}
1859 
1860 	arch_spin_lock(&tr->max_lock);
1861 
1862 	ret = ring_buffer_swap_cpu(tr->max_buffer.buffer, tr->array_buffer.buffer, cpu);
1863 
1864 	if (ret == -EBUSY) {
1865 		/*
1866 		 * We failed to swap the buffer due to a commit taking
1867 		 * place on this CPU. We fail to record, but we reset
1868 		 * the max trace buffer (no one writes directly to it)
1869 		 * and flag that it failed.
1870 		 */
1871 		trace_array_printk_buf(tr->max_buffer.buffer, _THIS_IP_,
1872 			"Failed to swap buffers due to commit in progress\n");
1873 	}
1874 
1875 	WARN_ON_ONCE(ret && ret != -EAGAIN && ret != -EBUSY);
1876 
1877 	__update_max_tr(tr, tsk, cpu);
1878 	arch_spin_unlock(&tr->max_lock);
1879 }
1880 #endif /* CONFIG_TRACER_MAX_TRACE */
1881 
1882 static int wait_on_pipe(struct trace_iterator *iter, int full)
1883 {
1884 	/* Iterators are static, they should be filled or empty */
1885 	if (trace_buffer_iter(iter, iter->cpu_file))
1886 		return 0;
1887 
1888 	return ring_buffer_wait(iter->array_buffer->buffer, iter->cpu_file,
1889 				full);
1890 }
1891 
1892 #ifdef CONFIG_FTRACE_STARTUP_TEST
1893 static bool selftests_can_run;
1894 
1895 struct trace_selftests {
1896 	struct list_head		list;
1897 	struct tracer			*type;
1898 };
1899 
1900 static LIST_HEAD(postponed_selftests);
1901 
1902 static int save_selftest(struct tracer *type)
1903 {
1904 	struct trace_selftests *selftest;
1905 
1906 	selftest = kmalloc(sizeof(*selftest), GFP_KERNEL);
1907 	if (!selftest)
1908 		return -ENOMEM;
1909 
1910 	selftest->type = type;
1911 	list_add(&selftest->list, &postponed_selftests);
1912 	return 0;
1913 }
1914 
1915 static int run_tracer_selftest(struct tracer *type)
1916 {
1917 	struct trace_array *tr = &global_trace;
1918 	struct tracer *saved_tracer = tr->current_trace;
1919 	int ret;
1920 
1921 	if (!type->selftest || tracing_selftest_disabled)
1922 		return 0;
1923 
1924 	/*
1925 	 * If a tracer registers early in boot up (before scheduling is
1926 	 * initialized and such), then do not run its selftests yet.
1927 	 * Instead, run it a little later in the boot process.
1928 	 */
1929 	if (!selftests_can_run)
1930 		return save_selftest(type);
1931 
1932 	if (!tracing_is_on()) {
1933 		pr_warn("Selftest for tracer %s skipped due to tracing disabled\n",
1934 			type->name);
1935 		return 0;
1936 	}
1937 
1938 	/*
1939 	 * Run a selftest on this tracer.
1940 	 * Here we reset the trace buffer, and set the current
1941 	 * tracer to be this tracer. The tracer can then run some
1942 	 * internal tracing to verify that everything is in order.
1943 	 * If we fail, we do not register this tracer.
1944 	 */
1945 	tracing_reset_online_cpus(&tr->array_buffer);
1946 
1947 	tr->current_trace = type;
1948 
1949 #ifdef CONFIG_TRACER_MAX_TRACE
1950 	if (type->use_max_tr) {
1951 		/* If we expanded the buffers, make sure the max is expanded too */
1952 		if (ring_buffer_expanded)
1953 			ring_buffer_resize(tr->max_buffer.buffer, trace_buf_size,
1954 					   RING_BUFFER_ALL_CPUS);
1955 		tr->allocated_snapshot = true;
1956 	}
1957 #endif
1958 
1959 	/* the test is responsible for initializing and enabling */
1960 	pr_info("Testing tracer %s: ", type->name);
1961 	ret = type->selftest(type, tr);
1962 	/* the test is responsible for resetting too */
1963 	tr->current_trace = saved_tracer;
1964 	if (ret) {
1965 		printk(KERN_CONT "FAILED!\n");
1966 		/* Add the warning after printing 'FAILED' */
1967 		WARN_ON(1);
1968 		return -1;
1969 	}
1970 	/* Only reset on passing, to avoid touching corrupted buffers */
1971 	tracing_reset_online_cpus(&tr->array_buffer);
1972 
1973 #ifdef CONFIG_TRACER_MAX_TRACE
1974 	if (type->use_max_tr) {
1975 		tr->allocated_snapshot = false;
1976 
1977 		/* Shrink the max buffer again */
1978 		if (ring_buffer_expanded)
1979 			ring_buffer_resize(tr->max_buffer.buffer, 1,
1980 					   RING_BUFFER_ALL_CPUS);
1981 	}
1982 #endif
1983 
1984 	printk(KERN_CONT "PASSED\n");
1985 	return 0;
1986 }
1987 
1988 static __init int init_trace_selftests(void)
1989 {
1990 	struct trace_selftests *p, *n;
1991 	struct tracer *t, **last;
1992 	int ret;
1993 
1994 	selftests_can_run = true;
1995 
1996 	mutex_lock(&trace_types_lock);
1997 
1998 	if (list_empty(&postponed_selftests))
1999 		goto out;
2000 
2001 	pr_info("Running postponed tracer tests:\n");
2002 
2003 	tracing_selftest_running = true;
2004 	list_for_each_entry_safe(p, n, &postponed_selftests, list) {
2005 		/* This loop can take minutes when sanitizers are enabled, so
2006 		 * lets make sure we allow RCU processing.
2007 		 */
2008 		cond_resched();
2009 		ret = run_tracer_selftest(p->type);
2010 		/* If the test fails, then warn and remove from available_tracers */
2011 		if (ret < 0) {
2012 			WARN(1, "tracer: %s failed selftest, disabling\n",
2013 			     p->type->name);
2014 			last = &trace_types;
2015 			for (t = trace_types; t; t = t->next) {
2016 				if (t == p->type) {
2017 					*last = t->next;
2018 					break;
2019 				}
2020 				last = &t->next;
2021 			}
2022 		}
2023 		list_del(&p->list);
2024 		kfree(p);
2025 	}
2026 	tracing_selftest_running = false;
2027 
2028  out:
2029 	mutex_unlock(&trace_types_lock);
2030 
2031 	return 0;
2032 }
2033 core_initcall(init_trace_selftests);
2034 #else
2035 static inline int run_tracer_selftest(struct tracer *type)
2036 {
2037 	return 0;
2038 }
2039 #endif /* CONFIG_FTRACE_STARTUP_TEST */
2040 
2041 static void add_tracer_options(struct trace_array *tr, struct tracer *t);
2042 
2043 static void __init apply_trace_boot_options(void);
2044 
2045 /**
2046  * register_tracer - register a tracer with the ftrace system.
2047  * @type: the plugin for the tracer
2048  *
2049  * Register a new plugin tracer.
2050  */
2051 int __init register_tracer(struct tracer *type)
2052 {
2053 	struct tracer *t;
2054 	int ret = 0;
2055 
2056 	if (!type->name) {
2057 		pr_info("Tracer must have a name\n");
2058 		return -1;
2059 	}
2060 
2061 	if (strlen(type->name) >= MAX_TRACER_SIZE) {
2062 		pr_info("Tracer has a name longer than %d\n", MAX_TRACER_SIZE);
2063 		return -1;
2064 	}
2065 
2066 	if (security_locked_down(LOCKDOWN_TRACEFS)) {
2067 		pr_warn("Can not register tracer %s due to lockdown\n",
2068 			   type->name);
2069 		return -EPERM;
2070 	}
2071 
2072 	mutex_lock(&trace_types_lock);
2073 
2074 	tracing_selftest_running = true;
2075 
2076 	for (t = trace_types; t; t = t->next) {
2077 		if (strcmp(type->name, t->name) == 0) {
2078 			/* already found */
2079 			pr_info("Tracer %s already registered\n",
2080 				type->name);
2081 			ret = -1;
2082 			goto out;
2083 		}
2084 	}
2085 
2086 	if (!type->set_flag)
2087 		type->set_flag = &dummy_set_flag;
2088 	if (!type->flags) {
2089 		/*allocate a dummy tracer_flags*/
2090 		type->flags = kmalloc(sizeof(*type->flags), GFP_KERNEL);
2091 		if (!type->flags) {
2092 			ret = -ENOMEM;
2093 			goto out;
2094 		}
2095 		type->flags->val = 0;
2096 		type->flags->opts = dummy_tracer_opt;
2097 	} else
2098 		if (!type->flags->opts)
2099 			type->flags->opts = dummy_tracer_opt;
2100 
2101 	/* store the tracer for __set_tracer_option */
2102 	type->flags->trace = type;
2103 
2104 	ret = run_tracer_selftest(type);
2105 	if (ret < 0)
2106 		goto out;
2107 
2108 	type->next = trace_types;
2109 	trace_types = type;
2110 	add_tracer_options(&global_trace, type);
2111 
2112  out:
2113 	tracing_selftest_running = false;
2114 	mutex_unlock(&trace_types_lock);
2115 
2116 	if (ret || !default_bootup_tracer)
2117 		goto out_unlock;
2118 
2119 	if (strncmp(default_bootup_tracer, type->name, MAX_TRACER_SIZE))
2120 		goto out_unlock;
2121 
2122 	printk(KERN_INFO "Starting tracer '%s'\n", type->name);
2123 	/* Do we want this tracer to start on bootup? */
2124 	tracing_set_tracer(&global_trace, type->name);
2125 	default_bootup_tracer = NULL;
2126 
2127 	apply_trace_boot_options();
2128 
2129 	/* disable other selftests, since this will break it. */
2130 	disable_tracing_selftest("running a tracer");
2131 
2132  out_unlock:
2133 	return ret;
2134 }
2135 
2136 static void tracing_reset_cpu(struct array_buffer *buf, int cpu)
2137 {
2138 	struct trace_buffer *buffer = buf->buffer;
2139 
2140 	if (!buffer)
2141 		return;
2142 
2143 	ring_buffer_record_disable(buffer);
2144 
2145 	/* Make sure all commits have finished */
2146 	synchronize_rcu();
2147 	ring_buffer_reset_cpu(buffer, cpu);
2148 
2149 	ring_buffer_record_enable(buffer);
2150 }
2151 
2152 void tracing_reset_online_cpus(struct array_buffer *buf)
2153 {
2154 	struct trace_buffer *buffer = buf->buffer;
2155 
2156 	if (!buffer)
2157 		return;
2158 
2159 	ring_buffer_record_disable(buffer);
2160 
2161 	/* Make sure all commits have finished */
2162 	synchronize_rcu();
2163 
2164 	buf->time_start = buffer_ftrace_now(buf, buf->cpu);
2165 
2166 	ring_buffer_reset_online_cpus(buffer);
2167 
2168 	ring_buffer_record_enable(buffer);
2169 }
2170 
2171 /* Must have trace_types_lock held */
2172 void tracing_reset_all_online_cpus(void)
2173 {
2174 	struct trace_array *tr;
2175 
2176 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
2177 		if (!tr->clear_trace)
2178 			continue;
2179 		tr->clear_trace = false;
2180 		tracing_reset_online_cpus(&tr->array_buffer);
2181 #ifdef CONFIG_TRACER_MAX_TRACE
2182 		tracing_reset_online_cpus(&tr->max_buffer);
2183 #endif
2184 	}
2185 }
2186 
2187 static int *tgid_map;
2188 
2189 #define SAVED_CMDLINES_DEFAULT 128
2190 #define NO_CMDLINE_MAP UINT_MAX
2191 static arch_spinlock_t trace_cmdline_lock = __ARCH_SPIN_LOCK_UNLOCKED;
2192 struct saved_cmdlines_buffer {
2193 	unsigned map_pid_to_cmdline[PID_MAX_DEFAULT+1];
2194 	unsigned *map_cmdline_to_pid;
2195 	unsigned cmdline_num;
2196 	int cmdline_idx;
2197 	char *saved_cmdlines;
2198 };
2199 static struct saved_cmdlines_buffer *savedcmd;
2200 
2201 /* temporary disable recording */
2202 static atomic_t trace_record_taskinfo_disabled __read_mostly;
2203 
2204 static inline char *get_saved_cmdlines(int idx)
2205 {
2206 	return &savedcmd->saved_cmdlines[idx * TASK_COMM_LEN];
2207 }
2208 
2209 static inline void set_cmdline(int idx, const char *cmdline)
2210 {
2211 	strncpy(get_saved_cmdlines(idx), cmdline, TASK_COMM_LEN);
2212 }
2213 
2214 static int allocate_cmdlines_buffer(unsigned int val,
2215 				    struct saved_cmdlines_buffer *s)
2216 {
2217 	s->map_cmdline_to_pid = kmalloc_array(val,
2218 					      sizeof(*s->map_cmdline_to_pid),
2219 					      GFP_KERNEL);
2220 	if (!s->map_cmdline_to_pid)
2221 		return -ENOMEM;
2222 
2223 	s->saved_cmdlines = kmalloc_array(TASK_COMM_LEN, val, GFP_KERNEL);
2224 	if (!s->saved_cmdlines) {
2225 		kfree(s->map_cmdline_to_pid);
2226 		return -ENOMEM;
2227 	}
2228 
2229 	s->cmdline_idx = 0;
2230 	s->cmdline_num = val;
2231 	memset(&s->map_pid_to_cmdline, NO_CMDLINE_MAP,
2232 	       sizeof(s->map_pid_to_cmdline));
2233 	memset(s->map_cmdline_to_pid, NO_CMDLINE_MAP,
2234 	       val * sizeof(*s->map_cmdline_to_pid));
2235 
2236 	return 0;
2237 }
2238 
2239 static int trace_create_savedcmd(void)
2240 {
2241 	int ret;
2242 
2243 	savedcmd = kmalloc(sizeof(*savedcmd), GFP_KERNEL);
2244 	if (!savedcmd)
2245 		return -ENOMEM;
2246 
2247 	ret = allocate_cmdlines_buffer(SAVED_CMDLINES_DEFAULT, savedcmd);
2248 	if (ret < 0) {
2249 		kfree(savedcmd);
2250 		savedcmd = NULL;
2251 		return -ENOMEM;
2252 	}
2253 
2254 	return 0;
2255 }
2256 
2257 int is_tracing_stopped(void)
2258 {
2259 	return global_trace.stop_count;
2260 }
2261 
2262 /**
2263  * tracing_start - quick start of the tracer
2264  *
2265  * If tracing is enabled but was stopped by tracing_stop,
2266  * this will start the tracer back up.
2267  */
2268 void tracing_start(void)
2269 {
2270 	struct trace_buffer *buffer;
2271 	unsigned long flags;
2272 
2273 	if (tracing_disabled)
2274 		return;
2275 
2276 	raw_spin_lock_irqsave(&global_trace.start_lock, flags);
2277 	if (--global_trace.stop_count) {
2278 		if (global_trace.stop_count < 0) {
2279 			/* Someone screwed up their debugging */
2280 			WARN_ON_ONCE(1);
2281 			global_trace.stop_count = 0;
2282 		}
2283 		goto out;
2284 	}
2285 
2286 	/* Prevent the buffers from switching */
2287 	arch_spin_lock(&global_trace.max_lock);
2288 
2289 	buffer = global_trace.array_buffer.buffer;
2290 	if (buffer)
2291 		ring_buffer_record_enable(buffer);
2292 
2293 #ifdef CONFIG_TRACER_MAX_TRACE
2294 	buffer = global_trace.max_buffer.buffer;
2295 	if (buffer)
2296 		ring_buffer_record_enable(buffer);
2297 #endif
2298 
2299 	arch_spin_unlock(&global_trace.max_lock);
2300 
2301  out:
2302 	raw_spin_unlock_irqrestore(&global_trace.start_lock, flags);
2303 }
2304 
2305 static void tracing_start_tr(struct trace_array *tr)
2306 {
2307 	struct trace_buffer *buffer;
2308 	unsigned long flags;
2309 
2310 	if (tracing_disabled)
2311 		return;
2312 
2313 	/* If global, we need to also start the max tracer */
2314 	if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
2315 		return tracing_start();
2316 
2317 	raw_spin_lock_irqsave(&tr->start_lock, flags);
2318 
2319 	if (--tr->stop_count) {
2320 		if (tr->stop_count < 0) {
2321 			/* Someone screwed up their debugging */
2322 			WARN_ON_ONCE(1);
2323 			tr->stop_count = 0;
2324 		}
2325 		goto out;
2326 	}
2327 
2328 	buffer = tr->array_buffer.buffer;
2329 	if (buffer)
2330 		ring_buffer_record_enable(buffer);
2331 
2332  out:
2333 	raw_spin_unlock_irqrestore(&tr->start_lock, flags);
2334 }
2335 
2336 /**
2337  * tracing_stop - quick stop of the tracer
2338  *
2339  * Light weight way to stop tracing. Use in conjunction with
2340  * tracing_start.
2341  */
2342 void tracing_stop(void)
2343 {
2344 	struct trace_buffer *buffer;
2345 	unsigned long flags;
2346 
2347 	raw_spin_lock_irqsave(&global_trace.start_lock, flags);
2348 	if (global_trace.stop_count++)
2349 		goto out;
2350 
2351 	/* Prevent the buffers from switching */
2352 	arch_spin_lock(&global_trace.max_lock);
2353 
2354 	buffer = global_trace.array_buffer.buffer;
2355 	if (buffer)
2356 		ring_buffer_record_disable(buffer);
2357 
2358 #ifdef CONFIG_TRACER_MAX_TRACE
2359 	buffer = global_trace.max_buffer.buffer;
2360 	if (buffer)
2361 		ring_buffer_record_disable(buffer);
2362 #endif
2363 
2364 	arch_spin_unlock(&global_trace.max_lock);
2365 
2366  out:
2367 	raw_spin_unlock_irqrestore(&global_trace.start_lock, flags);
2368 }
2369 
2370 static void tracing_stop_tr(struct trace_array *tr)
2371 {
2372 	struct trace_buffer *buffer;
2373 	unsigned long flags;
2374 
2375 	/* If global, we need to also stop the max tracer */
2376 	if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
2377 		return tracing_stop();
2378 
2379 	raw_spin_lock_irqsave(&tr->start_lock, flags);
2380 	if (tr->stop_count++)
2381 		goto out;
2382 
2383 	buffer = tr->array_buffer.buffer;
2384 	if (buffer)
2385 		ring_buffer_record_disable(buffer);
2386 
2387  out:
2388 	raw_spin_unlock_irqrestore(&tr->start_lock, flags);
2389 }
2390 
2391 static int trace_save_cmdline(struct task_struct *tsk)
2392 {
2393 	unsigned tpid, idx;
2394 
2395 	/* treat recording of idle task as a success */
2396 	if (!tsk->pid)
2397 		return 1;
2398 
2399 	tpid = tsk->pid & (PID_MAX_DEFAULT - 1);
2400 
2401 	/*
2402 	 * It's not the end of the world if we don't get
2403 	 * the lock, but we also don't want to spin
2404 	 * nor do we want to disable interrupts,
2405 	 * so if we miss here, then better luck next time.
2406 	 */
2407 	if (!arch_spin_trylock(&trace_cmdline_lock))
2408 		return 0;
2409 
2410 	idx = savedcmd->map_pid_to_cmdline[tpid];
2411 	if (idx == NO_CMDLINE_MAP) {
2412 		idx = (savedcmd->cmdline_idx + 1) % savedcmd->cmdline_num;
2413 
2414 		savedcmd->map_pid_to_cmdline[tpid] = idx;
2415 		savedcmd->cmdline_idx = idx;
2416 	}
2417 
2418 	savedcmd->map_cmdline_to_pid[idx] = tsk->pid;
2419 	set_cmdline(idx, tsk->comm);
2420 
2421 	arch_spin_unlock(&trace_cmdline_lock);
2422 
2423 	return 1;
2424 }
2425 
2426 static void __trace_find_cmdline(int pid, char comm[])
2427 {
2428 	unsigned map;
2429 	int tpid;
2430 
2431 	if (!pid) {
2432 		strcpy(comm, "<idle>");
2433 		return;
2434 	}
2435 
2436 	if (WARN_ON_ONCE(pid < 0)) {
2437 		strcpy(comm, "<XXX>");
2438 		return;
2439 	}
2440 
2441 	tpid = pid & (PID_MAX_DEFAULT - 1);
2442 	map = savedcmd->map_pid_to_cmdline[tpid];
2443 	if (map != NO_CMDLINE_MAP) {
2444 		tpid = savedcmd->map_cmdline_to_pid[map];
2445 		if (tpid == pid) {
2446 			strlcpy(comm, get_saved_cmdlines(map), TASK_COMM_LEN);
2447 			return;
2448 		}
2449 	}
2450 	strcpy(comm, "<...>");
2451 }
2452 
2453 void trace_find_cmdline(int pid, char comm[])
2454 {
2455 	preempt_disable();
2456 	arch_spin_lock(&trace_cmdline_lock);
2457 
2458 	__trace_find_cmdline(pid, comm);
2459 
2460 	arch_spin_unlock(&trace_cmdline_lock);
2461 	preempt_enable();
2462 }
2463 
2464 int trace_find_tgid(int pid)
2465 {
2466 	if (unlikely(!tgid_map || !pid || pid > PID_MAX_DEFAULT))
2467 		return 0;
2468 
2469 	return tgid_map[pid];
2470 }
2471 
2472 static int trace_save_tgid(struct task_struct *tsk)
2473 {
2474 	/* treat recording of idle task as a success */
2475 	if (!tsk->pid)
2476 		return 1;
2477 
2478 	if (unlikely(!tgid_map || tsk->pid > PID_MAX_DEFAULT))
2479 		return 0;
2480 
2481 	tgid_map[tsk->pid] = tsk->tgid;
2482 	return 1;
2483 }
2484 
2485 static bool tracing_record_taskinfo_skip(int flags)
2486 {
2487 	if (unlikely(!(flags & (TRACE_RECORD_CMDLINE | TRACE_RECORD_TGID))))
2488 		return true;
2489 	if (atomic_read(&trace_record_taskinfo_disabled) || !tracing_is_on())
2490 		return true;
2491 	if (!__this_cpu_read(trace_taskinfo_save))
2492 		return true;
2493 	return false;
2494 }
2495 
2496 /**
2497  * tracing_record_taskinfo - record the task info of a task
2498  *
2499  * @task:  task to record
2500  * @flags: TRACE_RECORD_CMDLINE for recording comm
2501  *         TRACE_RECORD_TGID for recording tgid
2502  */
2503 void tracing_record_taskinfo(struct task_struct *task, int flags)
2504 {
2505 	bool done;
2506 
2507 	if (tracing_record_taskinfo_skip(flags))
2508 		return;
2509 
2510 	/*
2511 	 * Record as much task information as possible. If some fail, continue
2512 	 * to try to record the others.
2513 	 */
2514 	done = !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(task);
2515 	done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(task);
2516 
2517 	/* If recording any information failed, retry again soon. */
2518 	if (!done)
2519 		return;
2520 
2521 	__this_cpu_write(trace_taskinfo_save, false);
2522 }
2523 
2524 /**
2525  * tracing_record_taskinfo_sched_switch - record task info for sched_switch
2526  *
2527  * @prev: previous task during sched_switch
2528  * @next: next task during sched_switch
2529  * @flags: TRACE_RECORD_CMDLINE for recording comm
2530  *         TRACE_RECORD_TGID for recording tgid
2531  */
2532 void tracing_record_taskinfo_sched_switch(struct task_struct *prev,
2533 					  struct task_struct *next, int flags)
2534 {
2535 	bool done;
2536 
2537 	if (tracing_record_taskinfo_skip(flags))
2538 		return;
2539 
2540 	/*
2541 	 * Record as much task information as possible. If some fail, continue
2542 	 * to try to record the others.
2543 	 */
2544 	done  = !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(prev);
2545 	done &= !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(next);
2546 	done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(prev);
2547 	done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(next);
2548 
2549 	/* If recording any information failed, retry again soon. */
2550 	if (!done)
2551 		return;
2552 
2553 	__this_cpu_write(trace_taskinfo_save, false);
2554 }
2555 
2556 /* Helpers to record a specific task information */
2557 void tracing_record_cmdline(struct task_struct *task)
2558 {
2559 	tracing_record_taskinfo(task, TRACE_RECORD_CMDLINE);
2560 }
2561 
2562 void tracing_record_tgid(struct task_struct *task)
2563 {
2564 	tracing_record_taskinfo(task, TRACE_RECORD_TGID);
2565 }
2566 
2567 /*
2568  * Several functions return TRACE_TYPE_PARTIAL_LINE if the trace_seq
2569  * overflowed, and TRACE_TYPE_HANDLED otherwise. This helper function
2570  * simplifies those functions and keeps them in sync.
2571  */
2572 enum print_line_t trace_handle_return(struct trace_seq *s)
2573 {
2574 	return trace_seq_has_overflowed(s) ?
2575 		TRACE_TYPE_PARTIAL_LINE : TRACE_TYPE_HANDLED;
2576 }
2577 EXPORT_SYMBOL_GPL(trace_handle_return);
2578 
2579 unsigned int tracing_gen_ctx_irq_test(unsigned int irqs_status)
2580 {
2581 	unsigned int trace_flags = irqs_status;
2582 	unsigned int pc;
2583 
2584 	pc = preempt_count();
2585 
2586 	if (pc & NMI_MASK)
2587 		trace_flags |= TRACE_FLAG_NMI;
2588 	if (pc & HARDIRQ_MASK)
2589 		trace_flags |= TRACE_FLAG_HARDIRQ;
2590 	if (in_serving_softirq())
2591 		trace_flags |= TRACE_FLAG_SOFTIRQ;
2592 
2593 	if (tif_need_resched())
2594 		trace_flags |= TRACE_FLAG_NEED_RESCHED;
2595 	if (test_preempt_need_resched())
2596 		trace_flags |= TRACE_FLAG_PREEMPT_RESCHED;
2597 	return (trace_flags << 16) | (pc & 0xff);
2598 }
2599 
2600 struct ring_buffer_event *
2601 trace_buffer_lock_reserve(struct trace_buffer *buffer,
2602 			  int type,
2603 			  unsigned long len,
2604 			  unsigned int trace_ctx)
2605 {
2606 	return __trace_buffer_lock_reserve(buffer, type, len, trace_ctx);
2607 }
2608 
2609 DEFINE_PER_CPU(struct ring_buffer_event *, trace_buffered_event);
2610 DEFINE_PER_CPU(int, trace_buffered_event_cnt);
2611 static int trace_buffered_event_ref;
2612 
2613 /**
2614  * trace_buffered_event_enable - enable buffering events
2615  *
2616  * When events are being filtered, it is quicker to use a temporary
2617  * buffer to write the event data into if there's a likely chance
2618  * that it will not be committed. The discard of the ring buffer
2619  * is not as fast as committing, and is much slower than copying
2620  * a commit.
2621  *
2622  * When an event is to be filtered, allocate per cpu buffers to
2623  * write the event data into, and if the event is filtered and discarded
2624  * it is simply dropped, otherwise, the entire data is to be committed
2625  * in one shot.
2626  */
2627 void trace_buffered_event_enable(void)
2628 {
2629 	struct ring_buffer_event *event;
2630 	struct page *page;
2631 	int cpu;
2632 
2633 	WARN_ON_ONCE(!mutex_is_locked(&event_mutex));
2634 
2635 	if (trace_buffered_event_ref++)
2636 		return;
2637 
2638 	for_each_tracing_cpu(cpu) {
2639 		page = alloc_pages_node(cpu_to_node(cpu),
2640 					GFP_KERNEL | __GFP_NORETRY, 0);
2641 		if (!page)
2642 			goto failed;
2643 
2644 		event = page_address(page);
2645 		memset(event, 0, sizeof(*event));
2646 
2647 		per_cpu(trace_buffered_event, cpu) = event;
2648 
2649 		preempt_disable();
2650 		if (cpu == smp_processor_id() &&
2651 		    __this_cpu_read(trace_buffered_event) !=
2652 		    per_cpu(trace_buffered_event, cpu))
2653 			WARN_ON_ONCE(1);
2654 		preempt_enable();
2655 	}
2656 
2657 	return;
2658  failed:
2659 	trace_buffered_event_disable();
2660 }
2661 
2662 static void enable_trace_buffered_event(void *data)
2663 {
2664 	/* Probably not needed, but do it anyway */
2665 	smp_rmb();
2666 	this_cpu_dec(trace_buffered_event_cnt);
2667 }
2668 
2669 static void disable_trace_buffered_event(void *data)
2670 {
2671 	this_cpu_inc(trace_buffered_event_cnt);
2672 }
2673 
2674 /**
2675  * trace_buffered_event_disable - disable buffering events
2676  *
2677  * When a filter is removed, it is faster to not use the buffered
2678  * events, and to commit directly into the ring buffer. Free up
2679  * the temp buffers when there are no more users. This requires
2680  * special synchronization with current events.
2681  */
2682 void trace_buffered_event_disable(void)
2683 {
2684 	int cpu;
2685 
2686 	WARN_ON_ONCE(!mutex_is_locked(&event_mutex));
2687 
2688 	if (WARN_ON_ONCE(!trace_buffered_event_ref))
2689 		return;
2690 
2691 	if (--trace_buffered_event_ref)
2692 		return;
2693 
2694 	preempt_disable();
2695 	/* For each CPU, set the buffer as used. */
2696 	smp_call_function_many(tracing_buffer_mask,
2697 			       disable_trace_buffered_event, NULL, 1);
2698 	preempt_enable();
2699 
2700 	/* Wait for all current users to finish */
2701 	synchronize_rcu();
2702 
2703 	for_each_tracing_cpu(cpu) {
2704 		free_page((unsigned long)per_cpu(trace_buffered_event, cpu));
2705 		per_cpu(trace_buffered_event, cpu) = NULL;
2706 	}
2707 	/*
2708 	 * Make sure trace_buffered_event is NULL before clearing
2709 	 * trace_buffered_event_cnt.
2710 	 */
2711 	smp_wmb();
2712 
2713 	preempt_disable();
2714 	/* Do the work on each cpu */
2715 	smp_call_function_many(tracing_buffer_mask,
2716 			       enable_trace_buffered_event, NULL, 1);
2717 	preempt_enable();
2718 }
2719 
2720 static struct trace_buffer *temp_buffer;
2721 
2722 struct ring_buffer_event *
2723 trace_event_buffer_lock_reserve(struct trace_buffer **current_rb,
2724 			  struct trace_event_file *trace_file,
2725 			  int type, unsigned long len,
2726 			  unsigned int trace_ctx)
2727 {
2728 	struct ring_buffer_event *entry;
2729 	struct trace_array *tr = trace_file->tr;
2730 	int val;
2731 
2732 	*current_rb = tr->array_buffer.buffer;
2733 
2734 	if (!tr->no_filter_buffering_ref &&
2735 	    (trace_file->flags & (EVENT_FILE_FL_SOFT_DISABLED | EVENT_FILE_FL_FILTERED)) &&
2736 	    (entry = this_cpu_read(trace_buffered_event))) {
2737 		/* Try to use the per cpu buffer first */
2738 		val = this_cpu_inc_return(trace_buffered_event_cnt);
2739 		if ((len < (PAGE_SIZE - sizeof(*entry))) && val == 1) {
2740 			trace_event_setup(entry, type, trace_ctx);
2741 			entry->array[0] = len;
2742 			return entry;
2743 		}
2744 		this_cpu_dec(trace_buffered_event_cnt);
2745 	}
2746 
2747 	entry = __trace_buffer_lock_reserve(*current_rb, type, len,
2748 					    trace_ctx);
2749 	/*
2750 	 * If tracing is off, but we have triggers enabled
2751 	 * we still need to look at the event data. Use the temp_buffer
2752 	 * to store the trace event for the trigger to use. It's recursive
2753 	 * safe and will not be recorded anywhere.
2754 	 */
2755 	if (!entry && trace_file->flags & EVENT_FILE_FL_TRIGGER_COND) {
2756 		*current_rb = temp_buffer;
2757 		entry = __trace_buffer_lock_reserve(*current_rb, type, len,
2758 						    trace_ctx);
2759 	}
2760 	return entry;
2761 }
2762 EXPORT_SYMBOL_GPL(trace_event_buffer_lock_reserve);
2763 
2764 static DEFINE_SPINLOCK(tracepoint_iter_lock);
2765 static DEFINE_MUTEX(tracepoint_printk_mutex);
2766 
2767 static void output_printk(struct trace_event_buffer *fbuffer)
2768 {
2769 	struct trace_event_call *event_call;
2770 	struct trace_event_file *file;
2771 	struct trace_event *event;
2772 	unsigned long flags;
2773 	struct trace_iterator *iter = tracepoint_print_iter;
2774 
2775 	/* We should never get here if iter is NULL */
2776 	if (WARN_ON_ONCE(!iter))
2777 		return;
2778 
2779 	event_call = fbuffer->trace_file->event_call;
2780 	if (!event_call || !event_call->event.funcs ||
2781 	    !event_call->event.funcs->trace)
2782 		return;
2783 
2784 	file = fbuffer->trace_file;
2785 	if (test_bit(EVENT_FILE_FL_SOFT_DISABLED_BIT, &file->flags) ||
2786 	    (unlikely(file->flags & EVENT_FILE_FL_FILTERED) &&
2787 	     !filter_match_preds(file->filter, fbuffer->entry)))
2788 		return;
2789 
2790 	event = &fbuffer->trace_file->event_call->event;
2791 
2792 	spin_lock_irqsave(&tracepoint_iter_lock, flags);
2793 	trace_seq_init(&iter->seq);
2794 	iter->ent = fbuffer->entry;
2795 	event_call->event.funcs->trace(iter, 0, event);
2796 	trace_seq_putc(&iter->seq, 0);
2797 	printk("%s", iter->seq.buffer);
2798 
2799 	spin_unlock_irqrestore(&tracepoint_iter_lock, flags);
2800 }
2801 
2802 int tracepoint_printk_sysctl(struct ctl_table *table, int write,
2803 			     void *buffer, size_t *lenp,
2804 			     loff_t *ppos)
2805 {
2806 	int save_tracepoint_printk;
2807 	int ret;
2808 
2809 	mutex_lock(&tracepoint_printk_mutex);
2810 	save_tracepoint_printk = tracepoint_printk;
2811 
2812 	ret = proc_dointvec(table, write, buffer, lenp, ppos);
2813 
2814 	/*
2815 	 * This will force exiting early, as tracepoint_printk
2816 	 * is always zero when tracepoint_printk_iter is not allocated
2817 	 */
2818 	if (!tracepoint_print_iter)
2819 		tracepoint_printk = 0;
2820 
2821 	if (save_tracepoint_printk == tracepoint_printk)
2822 		goto out;
2823 
2824 	if (tracepoint_printk)
2825 		static_key_enable(&tracepoint_printk_key.key);
2826 	else
2827 		static_key_disable(&tracepoint_printk_key.key);
2828 
2829  out:
2830 	mutex_unlock(&tracepoint_printk_mutex);
2831 
2832 	return ret;
2833 }
2834 
2835 void trace_event_buffer_commit(struct trace_event_buffer *fbuffer)
2836 {
2837 	if (static_key_false(&tracepoint_printk_key.key))
2838 		output_printk(fbuffer);
2839 
2840 	if (static_branch_unlikely(&trace_event_exports_enabled))
2841 		ftrace_exports(fbuffer->event, TRACE_EXPORT_EVENT);
2842 	event_trigger_unlock_commit_regs(fbuffer->trace_file, fbuffer->buffer,
2843 				    fbuffer->event, fbuffer->entry,
2844 				    fbuffer->trace_ctx, fbuffer->regs);
2845 }
2846 EXPORT_SYMBOL_GPL(trace_event_buffer_commit);
2847 
2848 /*
2849  * Skip 3:
2850  *
2851  *   trace_buffer_unlock_commit_regs()
2852  *   trace_event_buffer_commit()
2853  *   trace_event_raw_event_xxx()
2854  */
2855 # define STACK_SKIP 3
2856 
2857 void trace_buffer_unlock_commit_regs(struct trace_array *tr,
2858 				     struct trace_buffer *buffer,
2859 				     struct ring_buffer_event *event,
2860 				     unsigned int trace_ctx,
2861 				     struct pt_regs *regs)
2862 {
2863 	__buffer_unlock_commit(buffer, event);
2864 
2865 	/*
2866 	 * If regs is not set, then skip the necessary functions.
2867 	 * Note, we can still get here via blktrace, wakeup tracer
2868 	 * and mmiotrace, but that's ok if they lose a function or
2869 	 * two. They are not that meaningful.
2870 	 */
2871 	ftrace_trace_stack(tr, buffer, trace_ctx, regs ? 0 : STACK_SKIP, regs);
2872 	ftrace_trace_userstack(tr, buffer, trace_ctx);
2873 }
2874 
2875 /*
2876  * Similar to trace_buffer_unlock_commit_regs() but do not dump stack.
2877  */
2878 void
2879 trace_buffer_unlock_commit_nostack(struct trace_buffer *buffer,
2880 				   struct ring_buffer_event *event)
2881 {
2882 	__buffer_unlock_commit(buffer, event);
2883 }
2884 
2885 void
2886 trace_function(struct trace_array *tr, unsigned long ip, unsigned long
2887 	       parent_ip, unsigned int trace_ctx)
2888 {
2889 	struct trace_event_call *call = &event_function;
2890 	struct trace_buffer *buffer = tr->array_buffer.buffer;
2891 	struct ring_buffer_event *event;
2892 	struct ftrace_entry *entry;
2893 
2894 	event = __trace_buffer_lock_reserve(buffer, TRACE_FN, sizeof(*entry),
2895 					    trace_ctx);
2896 	if (!event)
2897 		return;
2898 	entry	= ring_buffer_event_data(event);
2899 	entry->ip			= ip;
2900 	entry->parent_ip		= parent_ip;
2901 
2902 	if (!call_filter_check_discard(call, entry, buffer, event)) {
2903 		if (static_branch_unlikely(&trace_function_exports_enabled))
2904 			ftrace_exports(event, TRACE_EXPORT_FUNCTION);
2905 		__buffer_unlock_commit(buffer, event);
2906 	}
2907 }
2908 
2909 #ifdef CONFIG_STACKTRACE
2910 
2911 /* Allow 4 levels of nesting: normal, softirq, irq, NMI */
2912 #define FTRACE_KSTACK_NESTING	4
2913 
2914 #define FTRACE_KSTACK_ENTRIES	(PAGE_SIZE / FTRACE_KSTACK_NESTING)
2915 
2916 struct ftrace_stack {
2917 	unsigned long		calls[FTRACE_KSTACK_ENTRIES];
2918 };
2919 
2920 
2921 struct ftrace_stacks {
2922 	struct ftrace_stack	stacks[FTRACE_KSTACK_NESTING];
2923 };
2924 
2925 static DEFINE_PER_CPU(struct ftrace_stacks, ftrace_stacks);
2926 static DEFINE_PER_CPU(int, ftrace_stack_reserve);
2927 
2928 static void __ftrace_trace_stack(struct trace_buffer *buffer,
2929 				 unsigned int trace_ctx,
2930 				 int skip, struct pt_regs *regs)
2931 {
2932 	struct trace_event_call *call = &event_kernel_stack;
2933 	struct ring_buffer_event *event;
2934 	unsigned int size, nr_entries;
2935 	struct ftrace_stack *fstack;
2936 	struct stack_entry *entry;
2937 	int stackidx;
2938 
2939 	/*
2940 	 * Add one, for this function and the call to save_stack_trace()
2941 	 * If regs is set, then these functions will not be in the way.
2942 	 */
2943 #ifndef CONFIG_UNWINDER_ORC
2944 	if (!regs)
2945 		skip++;
2946 #endif
2947 
2948 	preempt_disable_notrace();
2949 
2950 	stackidx = __this_cpu_inc_return(ftrace_stack_reserve) - 1;
2951 
2952 	/* This should never happen. If it does, yell once and skip */
2953 	if (WARN_ON_ONCE(stackidx >= FTRACE_KSTACK_NESTING))
2954 		goto out;
2955 
2956 	/*
2957 	 * The above __this_cpu_inc_return() is 'atomic' cpu local. An
2958 	 * interrupt will either see the value pre increment or post
2959 	 * increment. If the interrupt happens pre increment it will have
2960 	 * restored the counter when it returns.  We just need a barrier to
2961 	 * keep gcc from moving things around.
2962 	 */
2963 	barrier();
2964 
2965 	fstack = this_cpu_ptr(ftrace_stacks.stacks) + stackidx;
2966 	size = ARRAY_SIZE(fstack->calls);
2967 
2968 	if (regs) {
2969 		nr_entries = stack_trace_save_regs(regs, fstack->calls,
2970 						   size, skip);
2971 	} else {
2972 		nr_entries = stack_trace_save(fstack->calls, size, skip);
2973 	}
2974 
2975 	size = nr_entries * sizeof(unsigned long);
2976 	event = __trace_buffer_lock_reserve(buffer, TRACE_STACK,
2977 				    (sizeof(*entry) - sizeof(entry->caller)) + size,
2978 				    trace_ctx);
2979 	if (!event)
2980 		goto out;
2981 	entry = ring_buffer_event_data(event);
2982 
2983 	memcpy(&entry->caller, fstack->calls, size);
2984 	entry->size = nr_entries;
2985 
2986 	if (!call_filter_check_discard(call, entry, buffer, event))
2987 		__buffer_unlock_commit(buffer, event);
2988 
2989  out:
2990 	/* Again, don't let gcc optimize things here */
2991 	barrier();
2992 	__this_cpu_dec(ftrace_stack_reserve);
2993 	preempt_enable_notrace();
2994 
2995 }
2996 
2997 static inline void ftrace_trace_stack(struct trace_array *tr,
2998 				      struct trace_buffer *buffer,
2999 				      unsigned int trace_ctx,
3000 				      int skip, struct pt_regs *regs)
3001 {
3002 	if (!(tr->trace_flags & TRACE_ITER_STACKTRACE))
3003 		return;
3004 
3005 	__ftrace_trace_stack(buffer, trace_ctx, skip, regs);
3006 }
3007 
3008 void __trace_stack(struct trace_array *tr, unsigned int trace_ctx,
3009 		   int skip)
3010 {
3011 	struct trace_buffer *buffer = tr->array_buffer.buffer;
3012 
3013 	if (rcu_is_watching()) {
3014 		__ftrace_trace_stack(buffer, trace_ctx, skip, NULL);
3015 		return;
3016 	}
3017 
3018 	/*
3019 	 * When an NMI triggers, RCU is enabled via rcu_nmi_enter(),
3020 	 * but if the above rcu_is_watching() failed, then the NMI
3021 	 * triggered someplace critical, and rcu_irq_enter() should
3022 	 * not be called from NMI.
3023 	 */
3024 	if (unlikely(in_nmi()))
3025 		return;
3026 
3027 	rcu_irq_enter_irqson();
3028 	__ftrace_trace_stack(buffer, trace_ctx, skip, NULL);
3029 	rcu_irq_exit_irqson();
3030 }
3031 
3032 /**
3033  * trace_dump_stack - record a stack back trace in the trace buffer
3034  * @skip: Number of functions to skip (helper handlers)
3035  */
3036 void trace_dump_stack(int skip)
3037 {
3038 	if (tracing_disabled || tracing_selftest_running)
3039 		return;
3040 
3041 #ifndef CONFIG_UNWINDER_ORC
3042 	/* Skip 1 to skip this function. */
3043 	skip++;
3044 #endif
3045 	__ftrace_trace_stack(global_trace.array_buffer.buffer,
3046 			     tracing_gen_ctx(), skip, NULL);
3047 }
3048 EXPORT_SYMBOL_GPL(trace_dump_stack);
3049 
3050 #ifdef CONFIG_USER_STACKTRACE_SUPPORT
3051 static DEFINE_PER_CPU(int, user_stack_count);
3052 
3053 static void
3054 ftrace_trace_userstack(struct trace_array *tr,
3055 		       struct trace_buffer *buffer, unsigned int trace_ctx)
3056 {
3057 	struct trace_event_call *call = &event_user_stack;
3058 	struct ring_buffer_event *event;
3059 	struct userstack_entry *entry;
3060 
3061 	if (!(tr->trace_flags & TRACE_ITER_USERSTACKTRACE))
3062 		return;
3063 
3064 	/*
3065 	 * NMIs can not handle page faults, even with fix ups.
3066 	 * The save user stack can (and often does) fault.
3067 	 */
3068 	if (unlikely(in_nmi()))
3069 		return;
3070 
3071 	/*
3072 	 * prevent recursion, since the user stack tracing may
3073 	 * trigger other kernel events.
3074 	 */
3075 	preempt_disable();
3076 	if (__this_cpu_read(user_stack_count))
3077 		goto out;
3078 
3079 	__this_cpu_inc(user_stack_count);
3080 
3081 	event = __trace_buffer_lock_reserve(buffer, TRACE_USER_STACK,
3082 					    sizeof(*entry), trace_ctx);
3083 	if (!event)
3084 		goto out_drop_count;
3085 	entry	= ring_buffer_event_data(event);
3086 
3087 	entry->tgid		= current->tgid;
3088 	memset(&entry->caller, 0, sizeof(entry->caller));
3089 
3090 	stack_trace_save_user(entry->caller, FTRACE_STACK_ENTRIES);
3091 	if (!call_filter_check_discard(call, entry, buffer, event))
3092 		__buffer_unlock_commit(buffer, event);
3093 
3094  out_drop_count:
3095 	__this_cpu_dec(user_stack_count);
3096  out:
3097 	preempt_enable();
3098 }
3099 #else /* CONFIG_USER_STACKTRACE_SUPPORT */
3100 static void ftrace_trace_userstack(struct trace_array *tr,
3101 				   struct trace_buffer *buffer,
3102 				   unsigned int trace_ctx)
3103 {
3104 }
3105 #endif /* !CONFIG_USER_STACKTRACE_SUPPORT */
3106 
3107 #endif /* CONFIG_STACKTRACE */
3108 
3109 static inline void
3110 func_repeats_set_delta_ts(struct func_repeats_entry *entry,
3111 			  unsigned long long delta)
3112 {
3113 	entry->bottom_delta_ts = delta & U32_MAX;
3114 	entry->top_delta_ts = (delta >> 32);
3115 }
3116 
3117 void trace_last_func_repeats(struct trace_array *tr,
3118 			     struct trace_func_repeats *last_info,
3119 			     unsigned int trace_ctx)
3120 {
3121 	struct trace_buffer *buffer = tr->array_buffer.buffer;
3122 	struct func_repeats_entry *entry;
3123 	struct ring_buffer_event *event;
3124 	u64 delta;
3125 
3126 	event = __trace_buffer_lock_reserve(buffer, TRACE_FUNC_REPEATS,
3127 					    sizeof(*entry), trace_ctx);
3128 	if (!event)
3129 		return;
3130 
3131 	delta = ring_buffer_event_time_stamp(buffer, event) -
3132 		last_info->ts_last_call;
3133 
3134 	entry = ring_buffer_event_data(event);
3135 	entry->ip = last_info->ip;
3136 	entry->parent_ip = last_info->parent_ip;
3137 	entry->count = last_info->count;
3138 	func_repeats_set_delta_ts(entry, delta);
3139 
3140 	__buffer_unlock_commit(buffer, event);
3141 }
3142 
3143 /* created for use with alloc_percpu */
3144 struct trace_buffer_struct {
3145 	int nesting;
3146 	char buffer[4][TRACE_BUF_SIZE];
3147 };
3148 
3149 static struct trace_buffer_struct *trace_percpu_buffer;
3150 
3151 /*
3152  * This allows for lockless recording.  If we're nested too deeply, then
3153  * this returns NULL.
3154  */
3155 static char *get_trace_buf(void)
3156 {
3157 	struct trace_buffer_struct *buffer = this_cpu_ptr(trace_percpu_buffer);
3158 
3159 	if (!buffer || buffer->nesting >= 4)
3160 		return NULL;
3161 
3162 	buffer->nesting++;
3163 
3164 	/* Interrupts must see nesting incremented before we use the buffer */
3165 	barrier();
3166 	return &buffer->buffer[buffer->nesting - 1][0];
3167 }
3168 
3169 static void put_trace_buf(void)
3170 {
3171 	/* Don't let the decrement of nesting leak before this */
3172 	barrier();
3173 	this_cpu_dec(trace_percpu_buffer->nesting);
3174 }
3175 
3176 static int alloc_percpu_trace_buffer(void)
3177 {
3178 	struct trace_buffer_struct *buffers;
3179 
3180 	if (trace_percpu_buffer)
3181 		return 0;
3182 
3183 	buffers = alloc_percpu(struct trace_buffer_struct);
3184 	if (MEM_FAIL(!buffers, "Could not allocate percpu trace_printk buffer"))
3185 		return -ENOMEM;
3186 
3187 	trace_percpu_buffer = buffers;
3188 	return 0;
3189 }
3190 
3191 static int buffers_allocated;
3192 
3193 void trace_printk_init_buffers(void)
3194 {
3195 	if (buffers_allocated)
3196 		return;
3197 
3198 	if (alloc_percpu_trace_buffer())
3199 		return;
3200 
3201 	/* trace_printk() is for debug use only. Don't use it in production. */
3202 
3203 	pr_warn("\n");
3204 	pr_warn("**********************************************************\n");
3205 	pr_warn("**   NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE   **\n");
3206 	pr_warn("**                                                      **\n");
3207 	pr_warn("** trace_printk() being used. Allocating extra memory.  **\n");
3208 	pr_warn("**                                                      **\n");
3209 	pr_warn("** This means that this is a DEBUG kernel and it is     **\n");
3210 	pr_warn("** unsafe for production use.                           **\n");
3211 	pr_warn("**                                                      **\n");
3212 	pr_warn("** If you see this message and you are not debugging    **\n");
3213 	pr_warn("** the kernel, report this immediately to your vendor!  **\n");
3214 	pr_warn("**                                                      **\n");
3215 	pr_warn("**   NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE   **\n");
3216 	pr_warn("**********************************************************\n");
3217 
3218 	/* Expand the buffers to set size */
3219 	tracing_update_buffers();
3220 
3221 	buffers_allocated = 1;
3222 
3223 	/*
3224 	 * trace_printk_init_buffers() can be called by modules.
3225 	 * If that happens, then we need to start cmdline recording
3226 	 * directly here. If the global_trace.buffer is already
3227 	 * allocated here, then this was called by module code.
3228 	 */
3229 	if (global_trace.array_buffer.buffer)
3230 		tracing_start_cmdline_record();
3231 }
3232 EXPORT_SYMBOL_GPL(trace_printk_init_buffers);
3233 
3234 void trace_printk_start_comm(void)
3235 {
3236 	/* Start tracing comms if trace printk is set */
3237 	if (!buffers_allocated)
3238 		return;
3239 	tracing_start_cmdline_record();
3240 }
3241 
3242 static void trace_printk_start_stop_comm(int enabled)
3243 {
3244 	if (!buffers_allocated)
3245 		return;
3246 
3247 	if (enabled)
3248 		tracing_start_cmdline_record();
3249 	else
3250 		tracing_stop_cmdline_record();
3251 }
3252 
3253 /**
3254  * trace_vbprintk - write binary msg to tracing buffer
3255  * @ip:    The address of the caller
3256  * @fmt:   The string format to write to the buffer
3257  * @args:  Arguments for @fmt
3258  */
3259 int trace_vbprintk(unsigned long ip, const char *fmt, va_list args)
3260 {
3261 	struct trace_event_call *call = &event_bprint;
3262 	struct ring_buffer_event *event;
3263 	struct trace_buffer *buffer;
3264 	struct trace_array *tr = &global_trace;
3265 	struct bprint_entry *entry;
3266 	unsigned int trace_ctx;
3267 	char *tbuffer;
3268 	int len = 0, size;
3269 
3270 	if (unlikely(tracing_selftest_running || tracing_disabled))
3271 		return 0;
3272 
3273 	/* Don't pollute graph traces with trace_vprintk internals */
3274 	pause_graph_tracing();
3275 
3276 	trace_ctx = tracing_gen_ctx();
3277 	preempt_disable_notrace();
3278 
3279 	tbuffer = get_trace_buf();
3280 	if (!tbuffer) {
3281 		len = 0;
3282 		goto out_nobuffer;
3283 	}
3284 
3285 	len = vbin_printf((u32 *)tbuffer, TRACE_BUF_SIZE/sizeof(int), fmt, args);
3286 
3287 	if (len > TRACE_BUF_SIZE/sizeof(int) || len < 0)
3288 		goto out_put;
3289 
3290 	size = sizeof(*entry) + sizeof(u32) * len;
3291 	buffer = tr->array_buffer.buffer;
3292 	ring_buffer_nest_start(buffer);
3293 	event = __trace_buffer_lock_reserve(buffer, TRACE_BPRINT, size,
3294 					    trace_ctx);
3295 	if (!event)
3296 		goto out;
3297 	entry = ring_buffer_event_data(event);
3298 	entry->ip			= ip;
3299 	entry->fmt			= fmt;
3300 
3301 	memcpy(entry->buf, tbuffer, sizeof(u32) * len);
3302 	if (!call_filter_check_discard(call, entry, buffer, event)) {
3303 		__buffer_unlock_commit(buffer, event);
3304 		ftrace_trace_stack(tr, buffer, trace_ctx, 6, NULL);
3305 	}
3306 
3307 out:
3308 	ring_buffer_nest_end(buffer);
3309 out_put:
3310 	put_trace_buf();
3311 
3312 out_nobuffer:
3313 	preempt_enable_notrace();
3314 	unpause_graph_tracing();
3315 
3316 	return len;
3317 }
3318 EXPORT_SYMBOL_GPL(trace_vbprintk);
3319 
3320 __printf(3, 0)
3321 static int
3322 __trace_array_vprintk(struct trace_buffer *buffer,
3323 		      unsigned long ip, const char *fmt, va_list args)
3324 {
3325 	struct trace_event_call *call = &event_print;
3326 	struct ring_buffer_event *event;
3327 	int len = 0, size;
3328 	struct print_entry *entry;
3329 	unsigned int trace_ctx;
3330 	char *tbuffer;
3331 
3332 	if (tracing_disabled || tracing_selftest_running)
3333 		return 0;
3334 
3335 	/* Don't pollute graph traces with trace_vprintk internals */
3336 	pause_graph_tracing();
3337 
3338 	trace_ctx = tracing_gen_ctx();
3339 	preempt_disable_notrace();
3340 
3341 
3342 	tbuffer = get_trace_buf();
3343 	if (!tbuffer) {
3344 		len = 0;
3345 		goto out_nobuffer;
3346 	}
3347 
3348 	len = vscnprintf(tbuffer, TRACE_BUF_SIZE, fmt, args);
3349 
3350 	size = sizeof(*entry) + len + 1;
3351 	ring_buffer_nest_start(buffer);
3352 	event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
3353 					    trace_ctx);
3354 	if (!event)
3355 		goto out;
3356 	entry = ring_buffer_event_data(event);
3357 	entry->ip = ip;
3358 
3359 	memcpy(&entry->buf, tbuffer, len + 1);
3360 	if (!call_filter_check_discard(call, entry, buffer, event)) {
3361 		__buffer_unlock_commit(buffer, event);
3362 		ftrace_trace_stack(&global_trace, buffer, trace_ctx, 6, NULL);
3363 	}
3364 
3365 out:
3366 	ring_buffer_nest_end(buffer);
3367 	put_trace_buf();
3368 
3369 out_nobuffer:
3370 	preempt_enable_notrace();
3371 	unpause_graph_tracing();
3372 
3373 	return len;
3374 }
3375 
3376 __printf(3, 0)
3377 int trace_array_vprintk(struct trace_array *tr,
3378 			unsigned long ip, const char *fmt, va_list args)
3379 {
3380 	return __trace_array_vprintk(tr->array_buffer.buffer, ip, fmt, args);
3381 }
3382 
3383 /**
3384  * trace_array_printk - Print a message to a specific instance
3385  * @tr: The instance trace_array descriptor
3386  * @ip: The instruction pointer that this is called from.
3387  * @fmt: The format to print (printf format)
3388  *
3389  * If a subsystem sets up its own instance, they have the right to
3390  * printk strings into their tracing instance buffer using this
3391  * function. Note, this function will not write into the top level
3392  * buffer (use trace_printk() for that), as writing into the top level
3393  * buffer should only have events that can be individually disabled.
3394  * trace_printk() is only used for debugging a kernel, and should not
3395  * be ever incorporated in normal use.
3396  *
3397  * trace_array_printk() can be used, as it will not add noise to the
3398  * top level tracing buffer.
3399  *
3400  * Note, trace_array_init_printk() must be called on @tr before this
3401  * can be used.
3402  */
3403 __printf(3, 0)
3404 int trace_array_printk(struct trace_array *tr,
3405 		       unsigned long ip, const char *fmt, ...)
3406 {
3407 	int ret;
3408 	va_list ap;
3409 
3410 	if (!tr)
3411 		return -ENOENT;
3412 
3413 	/* This is only allowed for created instances */
3414 	if (tr == &global_trace)
3415 		return 0;
3416 
3417 	if (!(tr->trace_flags & TRACE_ITER_PRINTK))
3418 		return 0;
3419 
3420 	va_start(ap, fmt);
3421 	ret = trace_array_vprintk(tr, ip, fmt, ap);
3422 	va_end(ap);
3423 	return ret;
3424 }
3425 EXPORT_SYMBOL_GPL(trace_array_printk);
3426 
3427 /**
3428  * trace_array_init_printk - Initialize buffers for trace_array_printk()
3429  * @tr: The trace array to initialize the buffers for
3430  *
3431  * As trace_array_printk() only writes into instances, they are OK to
3432  * have in the kernel (unlike trace_printk()). This needs to be called
3433  * before trace_array_printk() can be used on a trace_array.
3434  */
3435 int trace_array_init_printk(struct trace_array *tr)
3436 {
3437 	if (!tr)
3438 		return -ENOENT;
3439 
3440 	/* This is only allowed for created instances */
3441 	if (tr == &global_trace)
3442 		return -EINVAL;
3443 
3444 	return alloc_percpu_trace_buffer();
3445 }
3446 EXPORT_SYMBOL_GPL(trace_array_init_printk);
3447 
3448 __printf(3, 4)
3449 int trace_array_printk_buf(struct trace_buffer *buffer,
3450 			   unsigned long ip, const char *fmt, ...)
3451 {
3452 	int ret;
3453 	va_list ap;
3454 
3455 	if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
3456 		return 0;
3457 
3458 	va_start(ap, fmt);
3459 	ret = __trace_array_vprintk(buffer, ip, fmt, ap);
3460 	va_end(ap);
3461 	return ret;
3462 }
3463 
3464 __printf(2, 0)
3465 int trace_vprintk(unsigned long ip, const char *fmt, va_list args)
3466 {
3467 	return trace_array_vprintk(&global_trace, ip, fmt, args);
3468 }
3469 EXPORT_SYMBOL_GPL(trace_vprintk);
3470 
3471 static void trace_iterator_increment(struct trace_iterator *iter)
3472 {
3473 	struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, iter->cpu);
3474 
3475 	iter->idx++;
3476 	if (buf_iter)
3477 		ring_buffer_iter_advance(buf_iter);
3478 }
3479 
3480 static struct trace_entry *
3481 peek_next_entry(struct trace_iterator *iter, int cpu, u64 *ts,
3482 		unsigned long *lost_events)
3483 {
3484 	struct ring_buffer_event *event;
3485 	struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, cpu);
3486 
3487 	if (buf_iter) {
3488 		event = ring_buffer_iter_peek(buf_iter, ts);
3489 		if (lost_events)
3490 			*lost_events = ring_buffer_iter_dropped(buf_iter) ?
3491 				(unsigned long)-1 : 0;
3492 	} else {
3493 		event = ring_buffer_peek(iter->array_buffer->buffer, cpu, ts,
3494 					 lost_events);
3495 	}
3496 
3497 	if (event) {
3498 		iter->ent_size = ring_buffer_event_length(event);
3499 		return ring_buffer_event_data(event);
3500 	}
3501 	iter->ent_size = 0;
3502 	return NULL;
3503 }
3504 
3505 static struct trace_entry *
3506 __find_next_entry(struct trace_iterator *iter, int *ent_cpu,
3507 		  unsigned long *missing_events, u64 *ent_ts)
3508 {
3509 	struct trace_buffer *buffer = iter->array_buffer->buffer;
3510 	struct trace_entry *ent, *next = NULL;
3511 	unsigned long lost_events = 0, next_lost = 0;
3512 	int cpu_file = iter->cpu_file;
3513 	u64 next_ts = 0, ts;
3514 	int next_cpu = -1;
3515 	int next_size = 0;
3516 	int cpu;
3517 
3518 	/*
3519 	 * If we are in a per_cpu trace file, don't bother by iterating over
3520 	 * all cpu and peek directly.
3521 	 */
3522 	if (cpu_file > RING_BUFFER_ALL_CPUS) {
3523 		if (ring_buffer_empty_cpu(buffer, cpu_file))
3524 			return NULL;
3525 		ent = peek_next_entry(iter, cpu_file, ent_ts, missing_events);
3526 		if (ent_cpu)
3527 			*ent_cpu = cpu_file;
3528 
3529 		return ent;
3530 	}
3531 
3532 	for_each_tracing_cpu(cpu) {
3533 
3534 		if (ring_buffer_empty_cpu(buffer, cpu))
3535 			continue;
3536 
3537 		ent = peek_next_entry(iter, cpu, &ts, &lost_events);
3538 
3539 		/*
3540 		 * Pick the entry with the smallest timestamp:
3541 		 */
3542 		if (ent && (!next || ts < next_ts)) {
3543 			next = ent;
3544 			next_cpu = cpu;
3545 			next_ts = ts;
3546 			next_lost = lost_events;
3547 			next_size = iter->ent_size;
3548 		}
3549 	}
3550 
3551 	iter->ent_size = next_size;
3552 
3553 	if (ent_cpu)
3554 		*ent_cpu = next_cpu;
3555 
3556 	if (ent_ts)
3557 		*ent_ts = next_ts;
3558 
3559 	if (missing_events)
3560 		*missing_events = next_lost;
3561 
3562 	return next;
3563 }
3564 
3565 #define STATIC_FMT_BUF_SIZE	128
3566 static char static_fmt_buf[STATIC_FMT_BUF_SIZE];
3567 
3568 static char *trace_iter_expand_format(struct trace_iterator *iter)
3569 {
3570 	char *tmp;
3571 
3572 	/*
3573 	 * iter->tr is NULL when used with tp_printk, which makes
3574 	 * this get called where it is not safe to call krealloc().
3575 	 */
3576 	if (!iter->tr || iter->fmt == static_fmt_buf)
3577 		return NULL;
3578 
3579 	tmp = krealloc(iter->fmt, iter->fmt_size + STATIC_FMT_BUF_SIZE,
3580 		       GFP_KERNEL);
3581 	if (tmp) {
3582 		iter->fmt_size += STATIC_FMT_BUF_SIZE;
3583 		iter->fmt = tmp;
3584 	}
3585 
3586 	return tmp;
3587 }
3588 
3589 /* Returns true if the string is safe to dereference from an event */
3590 static bool trace_safe_str(struct trace_iterator *iter, const char *str)
3591 {
3592 	unsigned long addr = (unsigned long)str;
3593 	struct trace_event *trace_event;
3594 	struct trace_event_call *event;
3595 
3596 	/* OK if part of the event data */
3597 	if ((addr >= (unsigned long)iter->ent) &&
3598 	    (addr < (unsigned long)iter->ent + iter->ent_size))
3599 		return true;
3600 
3601 	/* OK if part of the temp seq buffer */
3602 	if ((addr >= (unsigned long)iter->tmp_seq.buffer) &&
3603 	    (addr < (unsigned long)iter->tmp_seq.buffer + PAGE_SIZE))
3604 		return true;
3605 
3606 	/* Core rodata can not be freed */
3607 	if (is_kernel_rodata(addr))
3608 		return true;
3609 
3610 	if (trace_is_tracepoint_string(str))
3611 		return true;
3612 
3613 	/*
3614 	 * Now this could be a module event, referencing core module
3615 	 * data, which is OK.
3616 	 */
3617 	if (!iter->ent)
3618 		return false;
3619 
3620 	trace_event = ftrace_find_event(iter->ent->type);
3621 	if (!trace_event)
3622 		return false;
3623 
3624 	event = container_of(trace_event, struct trace_event_call, event);
3625 	if (!event->mod)
3626 		return false;
3627 
3628 	/* Would rather have rodata, but this will suffice */
3629 	if (within_module_core(addr, event->mod))
3630 		return true;
3631 
3632 	return false;
3633 }
3634 
3635 static const char *show_buffer(struct trace_seq *s)
3636 {
3637 	struct seq_buf *seq = &s->seq;
3638 
3639 	seq_buf_terminate(seq);
3640 
3641 	return seq->buffer;
3642 }
3643 
3644 static DEFINE_STATIC_KEY_FALSE(trace_no_verify);
3645 
3646 static int test_can_verify_check(const char *fmt, ...)
3647 {
3648 	char buf[16];
3649 	va_list ap;
3650 	int ret;
3651 
3652 	/*
3653 	 * The verifier is dependent on vsnprintf() modifies the va_list
3654 	 * passed to it, where it is sent as a reference. Some architectures
3655 	 * (like x86_32) passes it by value, which means that vsnprintf()
3656 	 * does not modify the va_list passed to it, and the verifier
3657 	 * would then need to be able to understand all the values that
3658 	 * vsnprintf can use. If it is passed by value, then the verifier
3659 	 * is disabled.
3660 	 */
3661 	va_start(ap, fmt);
3662 	vsnprintf(buf, 16, "%d", ap);
3663 	ret = va_arg(ap, int);
3664 	va_end(ap);
3665 
3666 	return ret;
3667 }
3668 
3669 static void test_can_verify(void)
3670 {
3671 	if (!test_can_verify_check("%d %d", 0, 1)) {
3672 		pr_info("trace event string verifier disabled\n");
3673 		static_branch_inc(&trace_no_verify);
3674 	}
3675 }
3676 
3677 /**
3678  * trace_check_vprintf - Check dereferenced strings while writing to the seq buffer
3679  * @iter: The iterator that holds the seq buffer and the event being printed
3680  * @fmt: The format used to print the event
3681  * @ap: The va_list holding the data to print from @fmt.
3682  *
3683  * This writes the data into the @iter->seq buffer using the data from
3684  * @fmt and @ap. If the format has a %s, then the source of the string
3685  * is examined to make sure it is safe to print, otherwise it will
3686  * warn and print "[UNSAFE MEMORY]" in place of the dereferenced string
3687  * pointer.
3688  */
3689 void trace_check_vprintf(struct trace_iterator *iter, const char *fmt,
3690 			 va_list ap)
3691 {
3692 	const char *p = fmt;
3693 	const char *str;
3694 	int i, j;
3695 
3696 	if (WARN_ON_ONCE(!fmt))
3697 		return;
3698 
3699 	if (static_branch_unlikely(&trace_no_verify))
3700 		goto print;
3701 
3702 	/* Don't bother checking when doing a ftrace_dump() */
3703 	if (iter->fmt == static_fmt_buf)
3704 		goto print;
3705 
3706 	while (*p) {
3707 		bool star = false;
3708 		int len = 0;
3709 
3710 		j = 0;
3711 
3712 		/* We only care about %s and variants */
3713 		for (i = 0; p[i]; i++) {
3714 			if (i + 1 >= iter->fmt_size) {
3715 				/*
3716 				 * If we can't expand the copy buffer,
3717 				 * just print it.
3718 				 */
3719 				if (!trace_iter_expand_format(iter))
3720 					goto print;
3721 			}
3722 
3723 			if (p[i] == '\\' && p[i+1]) {
3724 				i++;
3725 				continue;
3726 			}
3727 			if (p[i] == '%') {
3728 				/* Need to test cases like %08.*s */
3729 				for (j = 1; p[i+j]; j++) {
3730 					if (isdigit(p[i+j]) ||
3731 					    p[i+j] == '.')
3732 						continue;
3733 					if (p[i+j] == '*') {
3734 						star = true;
3735 						continue;
3736 					}
3737 					break;
3738 				}
3739 				if (p[i+j] == 's')
3740 					break;
3741 				star = false;
3742 			}
3743 			j = 0;
3744 		}
3745 		/* If no %s found then just print normally */
3746 		if (!p[i])
3747 			break;
3748 
3749 		/* Copy up to the %s, and print that */
3750 		strncpy(iter->fmt, p, i);
3751 		iter->fmt[i] = '\0';
3752 		trace_seq_vprintf(&iter->seq, iter->fmt, ap);
3753 
3754 		if (star)
3755 			len = va_arg(ap, int);
3756 
3757 		/* The ap now points to the string data of the %s */
3758 		str = va_arg(ap, const char *);
3759 
3760 		/*
3761 		 * If you hit this warning, it is likely that the
3762 		 * trace event in question used %s on a string that
3763 		 * was saved at the time of the event, but may not be
3764 		 * around when the trace is read. Use __string(),
3765 		 * __assign_str() and __get_str() helpers in the TRACE_EVENT()
3766 		 * instead. See samples/trace_events/trace-events-sample.h
3767 		 * for reference.
3768 		 */
3769 		if (WARN_ONCE(!trace_safe_str(iter, str),
3770 			      "fmt: '%s' current_buffer: '%s'",
3771 			      fmt, show_buffer(&iter->seq))) {
3772 			int ret;
3773 
3774 			/* Try to safely read the string */
3775 			if (star) {
3776 				if (len + 1 > iter->fmt_size)
3777 					len = iter->fmt_size - 1;
3778 				if (len < 0)
3779 					len = 0;
3780 				ret = copy_from_kernel_nofault(iter->fmt, str, len);
3781 				iter->fmt[len] = 0;
3782 				star = false;
3783 			} else {
3784 				ret = strncpy_from_kernel_nofault(iter->fmt, str,
3785 								  iter->fmt_size);
3786 			}
3787 			if (ret < 0)
3788 				trace_seq_printf(&iter->seq, "(0x%px)", str);
3789 			else
3790 				trace_seq_printf(&iter->seq, "(0x%px:%s)",
3791 						 str, iter->fmt);
3792 			str = "[UNSAFE-MEMORY]";
3793 			strcpy(iter->fmt, "%s");
3794 		} else {
3795 			strncpy(iter->fmt, p + i, j + 1);
3796 			iter->fmt[j+1] = '\0';
3797 		}
3798 		if (star)
3799 			trace_seq_printf(&iter->seq, iter->fmt, len, str);
3800 		else
3801 			trace_seq_printf(&iter->seq, iter->fmt, str);
3802 
3803 		p += i + j + 1;
3804 	}
3805  print:
3806 	if (*p)
3807 		trace_seq_vprintf(&iter->seq, p, ap);
3808 }
3809 
3810 const char *trace_event_format(struct trace_iterator *iter, const char *fmt)
3811 {
3812 	const char *p, *new_fmt;
3813 	char *q;
3814 
3815 	if (WARN_ON_ONCE(!fmt))
3816 		return fmt;
3817 
3818 	if (!iter->tr || iter->tr->trace_flags & TRACE_ITER_HASH_PTR)
3819 		return fmt;
3820 
3821 	p = fmt;
3822 	new_fmt = q = iter->fmt;
3823 	while (*p) {
3824 		if (unlikely(q - new_fmt + 3 > iter->fmt_size)) {
3825 			if (!trace_iter_expand_format(iter))
3826 				return fmt;
3827 
3828 			q += iter->fmt - new_fmt;
3829 			new_fmt = iter->fmt;
3830 		}
3831 
3832 		*q++ = *p++;
3833 
3834 		/* Replace %p with %px */
3835 		if (p[-1] == '%') {
3836 			if (p[0] == '%') {
3837 				*q++ = *p++;
3838 			} else if (p[0] == 'p' && !isalnum(p[1])) {
3839 				*q++ = *p++;
3840 				*q++ = 'x';
3841 			}
3842 		}
3843 	}
3844 	*q = '\0';
3845 
3846 	return new_fmt;
3847 }
3848 
3849 #define STATIC_TEMP_BUF_SIZE	128
3850 static char static_temp_buf[STATIC_TEMP_BUF_SIZE] __aligned(4);
3851 
3852 /* Find the next real entry, without updating the iterator itself */
3853 struct trace_entry *trace_find_next_entry(struct trace_iterator *iter,
3854 					  int *ent_cpu, u64 *ent_ts)
3855 {
3856 	/* __find_next_entry will reset ent_size */
3857 	int ent_size = iter->ent_size;
3858 	struct trace_entry *entry;
3859 
3860 	/*
3861 	 * If called from ftrace_dump(), then the iter->temp buffer
3862 	 * will be the static_temp_buf and not created from kmalloc.
3863 	 * If the entry size is greater than the buffer, we can
3864 	 * not save it. Just return NULL in that case. This is only
3865 	 * used to add markers when two consecutive events' time
3866 	 * stamps have a large delta. See trace_print_lat_context()
3867 	 */
3868 	if (iter->temp == static_temp_buf &&
3869 	    STATIC_TEMP_BUF_SIZE < ent_size)
3870 		return NULL;
3871 
3872 	/*
3873 	 * The __find_next_entry() may call peek_next_entry(), which may
3874 	 * call ring_buffer_peek() that may make the contents of iter->ent
3875 	 * undefined. Need to copy iter->ent now.
3876 	 */
3877 	if (iter->ent && iter->ent != iter->temp) {
3878 		if ((!iter->temp || iter->temp_size < iter->ent_size) &&
3879 		    !WARN_ON_ONCE(iter->temp == static_temp_buf)) {
3880 			void *temp;
3881 			temp = kmalloc(iter->ent_size, GFP_KERNEL);
3882 			if (!temp)
3883 				return NULL;
3884 			kfree(iter->temp);
3885 			iter->temp = temp;
3886 			iter->temp_size = iter->ent_size;
3887 		}
3888 		memcpy(iter->temp, iter->ent, iter->ent_size);
3889 		iter->ent = iter->temp;
3890 	}
3891 	entry = __find_next_entry(iter, ent_cpu, NULL, ent_ts);
3892 	/* Put back the original ent_size */
3893 	iter->ent_size = ent_size;
3894 
3895 	return entry;
3896 }
3897 
3898 /* Find the next real entry, and increment the iterator to the next entry */
3899 void *trace_find_next_entry_inc(struct trace_iterator *iter)
3900 {
3901 	iter->ent = __find_next_entry(iter, &iter->cpu,
3902 				      &iter->lost_events, &iter->ts);
3903 
3904 	if (iter->ent)
3905 		trace_iterator_increment(iter);
3906 
3907 	return iter->ent ? iter : NULL;
3908 }
3909 
3910 static void trace_consume(struct trace_iterator *iter)
3911 {
3912 	ring_buffer_consume(iter->array_buffer->buffer, iter->cpu, &iter->ts,
3913 			    &iter->lost_events);
3914 }
3915 
3916 static void *s_next(struct seq_file *m, void *v, loff_t *pos)
3917 {
3918 	struct trace_iterator *iter = m->private;
3919 	int i = (int)*pos;
3920 	void *ent;
3921 
3922 	WARN_ON_ONCE(iter->leftover);
3923 
3924 	(*pos)++;
3925 
3926 	/* can't go backwards */
3927 	if (iter->idx > i)
3928 		return NULL;
3929 
3930 	if (iter->idx < 0)
3931 		ent = trace_find_next_entry_inc(iter);
3932 	else
3933 		ent = iter;
3934 
3935 	while (ent && iter->idx < i)
3936 		ent = trace_find_next_entry_inc(iter);
3937 
3938 	iter->pos = *pos;
3939 
3940 	return ent;
3941 }
3942 
3943 void tracing_iter_reset(struct trace_iterator *iter, int cpu)
3944 {
3945 	struct ring_buffer_iter *buf_iter;
3946 	unsigned long entries = 0;
3947 	u64 ts;
3948 
3949 	per_cpu_ptr(iter->array_buffer->data, cpu)->skipped_entries = 0;
3950 
3951 	buf_iter = trace_buffer_iter(iter, cpu);
3952 	if (!buf_iter)
3953 		return;
3954 
3955 	ring_buffer_iter_reset(buf_iter);
3956 
3957 	/*
3958 	 * We could have the case with the max latency tracers
3959 	 * that a reset never took place on a cpu. This is evident
3960 	 * by the timestamp being before the start of the buffer.
3961 	 */
3962 	while (ring_buffer_iter_peek(buf_iter, &ts)) {
3963 		if (ts >= iter->array_buffer->time_start)
3964 			break;
3965 		entries++;
3966 		ring_buffer_iter_advance(buf_iter);
3967 	}
3968 
3969 	per_cpu_ptr(iter->array_buffer->data, cpu)->skipped_entries = entries;
3970 }
3971 
3972 /*
3973  * The current tracer is copied to avoid a global locking
3974  * all around.
3975  */
3976 static void *s_start(struct seq_file *m, loff_t *pos)
3977 {
3978 	struct trace_iterator *iter = m->private;
3979 	struct trace_array *tr = iter->tr;
3980 	int cpu_file = iter->cpu_file;
3981 	void *p = NULL;
3982 	loff_t l = 0;
3983 	int cpu;
3984 
3985 	/*
3986 	 * copy the tracer to avoid using a global lock all around.
3987 	 * iter->trace is a copy of current_trace, the pointer to the
3988 	 * name may be used instead of a strcmp(), as iter->trace->name
3989 	 * will point to the same string as current_trace->name.
3990 	 */
3991 	mutex_lock(&trace_types_lock);
3992 	if (unlikely(tr->current_trace && iter->trace->name != tr->current_trace->name))
3993 		*iter->trace = *tr->current_trace;
3994 	mutex_unlock(&trace_types_lock);
3995 
3996 #ifdef CONFIG_TRACER_MAX_TRACE
3997 	if (iter->snapshot && iter->trace->use_max_tr)
3998 		return ERR_PTR(-EBUSY);
3999 #endif
4000 
4001 	if (!iter->snapshot)
4002 		atomic_inc(&trace_record_taskinfo_disabled);
4003 
4004 	if (*pos != iter->pos) {
4005 		iter->ent = NULL;
4006 		iter->cpu = 0;
4007 		iter->idx = -1;
4008 
4009 		if (cpu_file == RING_BUFFER_ALL_CPUS) {
4010 			for_each_tracing_cpu(cpu)
4011 				tracing_iter_reset(iter, cpu);
4012 		} else
4013 			tracing_iter_reset(iter, cpu_file);
4014 
4015 		iter->leftover = 0;
4016 		for (p = iter; p && l < *pos; p = s_next(m, p, &l))
4017 			;
4018 
4019 	} else {
4020 		/*
4021 		 * If we overflowed the seq_file before, then we want
4022 		 * to just reuse the trace_seq buffer again.
4023 		 */
4024 		if (iter->leftover)
4025 			p = iter;
4026 		else {
4027 			l = *pos - 1;
4028 			p = s_next(m, p, &l);
4029 		}
4030 	}
4031 
4032 	trace_event_read_lock();
4033 	trace_access_lock(cpu_file);
4034 	return p;
4035 }
4036 
4037 static void s_stop(struct seq_file *m, void *p)
4038 {
4039 	struct trace_iterator *iter = m->private;
4040 
4041 #ifdef CONFIG_TRACER_MAX_TRACE
4042 	if (iter->snapshot && iter->trace->use_max_tr)
4043 		return;
4044 #endif
4045 
4046 	if (!iter->snapshot)
4047 		atomic_dec(&trace_record_taskinfo_disabled);
4048 
4049 	trace_access_unlock(iter->cpu_file);
4050 	trace_event_read_unlock();
4051 }
4052 
4053 static void
4054 get_total_entries_cpu(struct array_buffer *buf, unsigned long *total,
4055 		      unsigned long *entries, int cpu)
4056 {
4057 	unsigned long count;
4058 
4059 	count = ring_buffer_entries_cpu(buf->buffer, cpu);
4060 	/*
4061 	 * If this buffer has skipped entries, then we hold all
4062 	 * entries for the trace and we need to ignore the
4063 	 * ones before the time stamp.
4064 	 */
4065 	if (per_cpu_ptr(buf->data, cpu)->skipped_entries) {
4066 		count -= per_cpu_ptr(buf->data, cpu)->skipped_entries;
4067 		/* total is the same as the entries */
4068 		*total = count;
4069 	} else
4070 		*total = count +
4071 			ring_buffer_overrun_cpu(buf->buffer, cpu);
4072 	*entries = count;
4073 }
4074 
4075 static void
4076 get_total_entries(struct array_buffer *buf,
4077 		  unsigned long *total, unsigned long *entries)
4078 {
4079 	unsigned long t, e;
4080 	int cpu;
4081 
4082 	*total = 0;
4083 	*entries = 0;
4084 
4085 	for_each_tracing_cpu(cpu) {
4086 		get_total_entries_cpu(buf, &t, &e, cpu);
4087 		*total += t;
4088 		*entries += e;
4089 	}
4090 }
4091 
4092 unsigned long trace_total_entries_cpu(struct trace_array *tr, int cpu)
4093 {
4094 	unsigned long total, entries;
4095 
4096 	if (!tr)
4097 		tr = &global_trace;
4098 
4099 	get_total_entries_cpu(&tr->array_buffer, &total, &entries, cpu);
4100 
4101 	return entries;
4102 }
4103 
4104 unsigned long trace_total_entries(struct trace_array *tr)
4105 {
4106 	unsigned long total, entries;
4107 
4108 	if (!tr)
4109 		tr = &global_trace;
4110 
4111 	get_total_entries(&tr->array_buffer, &total, &entries);
4112 
4113 	return entries;
4114 }
4115 
4116 static void print_lat_help_header(struct seq_file *m)
4117 {
4118 	seq_puts(m, "#                    _------=> CPU#            \n"
4119 		    "#                   / _-----=> irqs-off        \n"
4120 		    "#                  | / _----=> need-resched    \n"
4121 		    "#                  || / _---=> hardirq/softirq \n"
4122 		    "#                  ||| / _--=> preempt-depth   \n"
4123 		    "#                  |||| /     delay            \n"
4124 		    "#  cmd     pid     ||||| time  |   caller      \n"
4125 		    "#     \\   /        |||||  \\    |   /         \n");
4126 }
4127 
4128 static void print_event_info(struct array_buffer *buf, struct seq_file *m)
4129 {
4130 	unsigned long total;
4131 	unsigned long entries;
4132 
4133 	get_total_entries(buf, &total, &entries);
4134 	seq_printf(m, "# entries-in-buffer/entries-written: %lu/%lu   #P:%d\n",
4135 		   entries, total, num_online_cpus());
4136 	seq_puts(m, "#\n");
4137 }
4138 
4139 static void print_func_help_header(struct array_buffer *buf, struct seq_file *m,
4140 				   unsigned int flags)
4141 {
4142 	bool tgid = flags & TRACE_ITER_RECORD_TGID;
4143 
4144 	print_event_info(buf, m);
4145 
4146 	seq_printf(m, "#           TASK-PID    %s CPU#     TIMESTAMP  FUNCTION\n", tgid ? "   TGID   " : "");
4147 	seq_printf(m, "#              | |      %s   |         |         |\n",      tgid ? "     |    " : "");
4148 }
4149 
4150 static void print_func_help_header_irq(struct array_buffer *buf, struct seq_file *m,
4151 				       unsigned int flags)
4152 {
4153 	bool tgid = flags & TRACE_ITER_RECORD_TGID;
4154 	const char *space = "            ";
4155 	int prec = tgid ? 12 : 2;
4156 
4157 	print_event_info(buf, m);
4158 
4159 	seq_printf(m, "#                            %.*s  _-----=> irqs-off\n", prec, space);
4160 	seq_printf(m, "#                            %.*s / _----=> need-resched\n", prec, space);
4161 	seq_printf(m, "#                            %.*s| / _---=> hardirq/softirq\n", prec, space);
4162 	seq_printf(m, "#                            %.*s|| / _--=> preempt-depth\n", prec, space);
4163 	seq_printf(m, "#                            %.*s||| /     delay\n", prec, space);
4164 	seq_printf(m, "#           TASK-PID  %.*s CPU#  ||||   TIMESTAMP  FUNCTION\n", prec, "     TGID   ");
4165 	seq_printf(m, "#              | |    %.*s   |   ||||      |         |\n", prec, "       |    ");
4166 }
4167 
4168 void
4169 print_trace_header(struct seq_file *m, struct trace_iterator *iter)
4170 {
4171 	unsigned long sym_flags = (global_trace.trace_flags & TRACE_ITER_SYM_MASK);
4172 	struct array_buffer *buf = iter->array_buffer;
4173 	struct trace_array_cpu *data = per_cpu_ptr(buf->data, buf->cpu);
4174 	struct tracer *type = iter->trace;
4175 	unsigned long entries;
4176 	unsigned long total;
4177 	const char *name = "preemption";
4178 
4179 	name = type->name;
4180 
4181 	get_total_entries(buf, &total, &entries);
4182 
4183 	seq_printf(m, "# %s latency trace v1.1.5 on %s\n",
4184 		   name, UTS_RELEASE);
4185 	seq_puts(m, "# -----------------------------------"
4186 		 "---------------------------------\n");
4187 	seq_printf(m, "# latency: %lu us, #%lu/%lu, CPU#%d |"
4188 		   " (M:%s VP:%d, KP:%d, SP:%d HP:%d",
4189 		   nsecs_to_usecs(data->saved_latency),
4190 		   entries,
4191 		   total,
4192 		   buf->cpu,
4193 #if defined(CONFIG_PREEMPT_NONE)
4194 		   "server",
4195 #elif defined(CONFIG_PREEMPT_VOLUNTARY)
4196 		   "desktop",
4197 #elif defined(CONFIG_PREEMPT)
4198 		   "preempt",
4199 #elif defined(CONFIG_PREEMPT_RT)
4200 		   "preempt_rt",
4201 #else
4202 		   "unknown",
4203 #endif
4204 		   /* These are reserved for later use */
4205 		   0, 0, 0, 0);
4206 #ifdef CONFIG_SMP
4207 	seq_printf(m, " #P:%d)\n", num_online_cpus());
4208 #else
4209 	seq_puts(m, ")\n");
4210 #endif
4211 	seq_puts(m, "#    -----------------\n");
4212 	seq_printf(m, "#    | task: %.16s-%d "
4213 		   "(uid:%d nice:%ld policy:%ld rt_prio:%ld)\n",
4214 		   data->comm, data->pid,
4215 		   from_kuid_munged(seq_user_ns(m), data->uid), data->nice,
4216 		   data->policy, data->rt_priority);
4217 	seq_puts(m, "#    -----------------\n");
4218 
4219 	if (data->critical_start) {
4220 		seq_puts(m, "#  => started at: ");
4221 		seq_print_ip_sym(&iter->seq, data->critical_start, sym_flags);
4222 		trace_print_seq(m, &iter->seq);
4223 		seq_puts(m, "\n#  => ended at:   ");
4224 		seq_print_ip_sym(&iter->seq, data->critical_end, sym_flags);
4225 		trace_print_seq(m, &iter->seq);
4226 		seq_puts(m, "\n#\n");
4227 	}
4228 
4229 	seq_puts(m, "#\n");
4230 }
4231 
4232 static void test_cpu_buff_start(struct trace_iterator *iter)
4233 {
4234 	struct trace_seq *s = &iter->seq;
4235 	struct trace_array *tr = iter->tr;
4236 
4237 	if (!(tr->trace_flags & TRACE_ITER_ANNOTATE))
4238 		return;
4239 
4240 	if (!(iter->iter_flags & TRACE_FILE_ANNOTATE))
4241 		return;
4242 
4243 	if (cpumask_available(iter->started) &&
4244 	    cpumask_test_cpu(iter->cpu, iter->started))
4245 		return;
4246 
4247 	if (per_cpu_ptr(iter->array_buffer->data, iter->cpu)->skipped_entries)
4248 		return;
4249 
4250 	if (cpumask_available(iter->started))
4251 		cpumask_set_cpu(iter->cpu, iter->started);
4252 
4253 	/* Don't print started cpu buffer for the first entry of the trace */
4254 	if (iter->idx > 1)
4255 		trace_seq_printf(s, "##### CPU %u buffer started ####\n",
4256 				iter->cpu);
4257 }
4258 
4259 static enum print_line_t print_trace_fmt(struct trace_iterator *iter)
4260 {
4261 	struct trace_array *tr = iter->tr;
4262 	struct trace_seq *s = &iter->seq;
4263 	unsigned long sym_flags = (tr->trace_flags & TRACE_ITER_SYM_MASK);
4264 	struct trace_entry *entry;
4265 	struct trace_event *event;
4266 
4267 	entry = iter->ent;
4268 
4269 	test_cpu_buff_start(iter);
4270 
4271 	event = ftrace_find_event(entry->type);
4272 
4273 	if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
4274 		if (iter->iter_flags & TRACE_FILE_LAT_FMT)
4275 			trace_print_lat_context(iter);
4276 		else
4277 			trace_print_context(iter);
4278 	}
4279 
4280 	if (trace_seq_has_overflowed(s))
4281 		return TRACE_TYPE_PARTIAL_LINE;
4282 
4283 	if (event)
4284 		return event->funcs->trace(iter, sym_flags, event);
4285 
4286 	trace_seq_printf(s, "Unknown type %d\n", entry->type);
4287 
4288 	return trace_handle_return(s);
4289 }
4290 
4291 static enum print_line_t print_raw_fmt(struct trace_iterator *iter)
4292 {
4293 	struct trace_array *tr = iter->tr;
4294 	struct trace_seq *s = &iter->seq;
4295 	struct trace_entry *entry;
4296 	struct trace_event *event;
4297 
4298 	entry = iter->ent;
4299 
4300 	if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO)
4301 		trace_seq_printf(s, "%d %d %llu ",
4302 				 entry->pid, iter->cpu, iter->ts);
4303 
4304 	if (trace_seq_has_overflowed(s))
4305 		return TRACE_TYPE_PARTIAL_LINE;
4306 
4307 	event = ftrace_find_event(entry->type);
4308 	if (event)
4309 		return event->funcs->raw(iter, 0, event);
4310 
4311 	trace_seq_printf(s, "%d ?\n", entry->type);
4312 
4313 	return trace_handle_return(s);
4314 }
4315 
4316 static enum print_line_t print_hex_fmt(struct trace_iterator *iter)
4317 {
4318 	struct trace_array *tr = iter->tr;
4319 	struct trace_seq *s = &iter->seq;
4320 	unsigned char newline = '\n';
4321 	struct trace_entry *entry;
4322 	struct trace_event *event;
4323 
4324 	entry = iter->ent;
4325 
4326 	if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
4327 		SEQ_PUT_HEX_FIELD(s, entry->pid);
4328 		SEQ_PUT_HEX_FIELD(s, iter->cpu);
4329 		SEQ_PUT_HEX_FIELD(s, iter->ts);
4330 		if (trace_seq_has_overflowed(s))
4331 			return TRACE_TYPE_PARTIAL_LINE;
4332 	}
4333 
4334 	event = ftrace_find_event(entry->type);
4335 	if (event) {
4336 		enum print_line_t ret = event->funcs->hex(iter, 0, event);
4337 		if (ret != TRACE_TYPE_HANDLED)
4338 			return ret;
4339 	}
4340 
4341 	SEQ_PUT_FIELD(s, newline);
4342 
4343 	return trace_handle_return(s);
4344 }
4345 
4346 static enum print_line_t print_bin_fmt(struct trace_iterator *iter)
4347 {
4348 	struct trace_array *tr = iter->tr;
4349 	struct trace_seq *s = &iter->seq;
4350 	struct trace_entry *entry;
4351 	struct trace_event *event;
4352 
4353 	entry = iter->ent;
4354 
4355 	if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
4356 		SEQ_PUT_FIELD(s, entry->pid);
4357 		SEQ_PUT_FIELD(s, iter->cpu);
4358 		SEQ_PUT_FIELD(s, iter->ts);
4359 		if (trace_seq_has_overflowed(s))
4360 			return TRACE_TYPE_PARTIAL_LINE;
4361 	}
4362 
4363 	event = ftrace_find_event(entry->type);
4364 	return event ? event->funcs->binary(iter, 0, event) :
4365 		TRACE_TYPE_HANDLED;
4366 }
4367 
4368 int trace_empty(struct trace_iterator *iter)
4369 {
4370 	struct ring_buffer_iter *buf_iter;
4371 	int cpu;
4372 
4373 	/* If we are looking at one CPU buffer, only check that one */
4374 	if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
4375 		cpu = iter->cpu_file;
4376 		buf_iter = trace_buffer_iter(iter, cpu);
4377 		if (buf_iter) {
4378 			if (!ring_buffer_iter_empty(buf_iter))
4379 				return 0;
4380 		} else {
4381 			if (!ring_buffer_empty_cpu(iter->array_buffer->buffer, cpu))
4382 				return 0;
4383 		}
4384 		return 1;
4385 	}
4386 
4387 	for_each_tracing_cpu(cpu) {
4388 		buf_iter = trace_buffer_iter(iter, cpu);
4389 		if (buf_iter) {
4390 			if (!ring_buffer_iter_empty(buf_iter))
4391 				return 0;
4392 		} else {
4393 			if (!ring_buffer_empty_cpu(iter->array_buffer->buffer, cpu))
4394 				return 0;
4395 		}
4396 	}
4397 
4398 	return 1;
4399 }
4400 
4401 /*  Called with trace_event_read_lock() held. */
4402 enum print_line_t print_trace_line(struct trace_iterator *iter)
4403 {
4404 	struct trace_array *tr = iter->tr;
4405 	unsigned long trace_flags = tr->trace_flags;
4406 	enum print_line_t ret;
4407 
4408 	if (iter->lost_events) {
4409 		if (iter->lost_events == (unsigned long)-1)
4410 			trace_seq_printf(&iter->seq, "CPU:%d [LOST EVENTS]\n",
4411 					 iter->cpu);
4412 		else
4413 			trace_seq_printf(&iter->seq, "CPU:%d [LOST %lu EVENTS]\n",
4414 					 iter->cpu, iter->lost_events);
4415 		if (trace_seq_has_overflowed(&iter->seq))
4416 			return TRACE_TYPE_PARTIAL_LINE;
4417 	}
4418 
4419 	if (iter->trace && iter->trace->print_line) {
4420 		ret = iter->trace->print_line(iter);
4421 		if (ret != TRACE_TYPE_UNHANDLED)
4422 			return ret;
4423 	}
4424 
4425 	if (iter->ent->type == TRACE_BPUTS &&
4426 			trace_flags & TRACE_ITER_PRINTK &&
4427 			trace_flags & TRACE_ITER_PRINTK_MSGONLY)
4428 		return trace_print_bputs_msg_only(iter);
4429 
4430 	if (iter->ent->type == TRACE_BPRINT &&
4431 			trace_flags & TRACE_ITER_PRINTK &&
4432 			trace_flags & TRACE_ITER_PRINTK_MSGONLY)
4433 		return trace_print_bprintk_msg_only(iter);
4434 
4435 	if (iter->ent->type == TRACE_PRINT &&
4436 			trace_flags & TRACE_ITER_PRINTK &&
4437 			trace_flags & TRACE_ITER_PRINTK_MSGONLY)
4438 		return trace_print_printk_msg_only(iter);
4439 
4440 	if (trace_flags & TRACE_ITER_BIN)
4441 		return print_bin_fmt(iter);
4442 
4443 	if (trace_flags & TRACE_ITER_HEX)
4444 		return print_hex_fmt(iter);
4445 
4446 	if (trace_flags & TRACE_ITER_RAW)
4447 		return print_raw_fmt(iter);
4448 
4449 	return print_trace_fmt(iter);
4450 }
4451 
4452 void trace_latency_header(struct seq_file *m)
4453 {
4454 	struct trace_iterator *iter = m->private;
4455 	struct trace_array *tr = iter->tr;
4456 
4457 	/* print nothing if the buffers are empty */
4458 	if (trace_empty(iter))
4459 		return;
4460 
4461 	if (iter->iter_flags & TRACE_FILE_LAT_FMT)
4462 		print_trace_header(m, iter);
4463 
4464 	if (!(tr->trace_flags & TRACE_ITER_VERBOSE))
4465 		print_lat_help_header(m);
4466 }
4467 
4468 void trace_default_header(struct seq_file *m)
4469 {
4470 	struct trace_iterator *iter = m->private;
4471 	struct trace_array *tr = iter->tr;
4472 	unsigned long trace_flags = tr->trace_flags;
4473 
4474 	if (!(trace_flags & TRACE_ITER_CONTEXT_INFO))
4475 		return;
4476 
4477 	if (iter->iter_flags & TRACE_FILE_LAT_FMT) {
4478 		/* print nothing if the buffers are empty */
4479 		if (trace_empty(iter))
4480 			return;
4481 		print_trace_header(m, iter);
4482 		if (!(trace_flags & TRACE_ITER_VERBOSE))
4483 			print_lat_help_header(m);
4484 	} else {
4485 		if (!(trace_flags & TRACE_ITER_VERBOSE)) {
4486 			if (trace_flags & TRACE_ITER_IRQ_INFO)
4487 				print_func_help_header_irq(iter->array_buffer,
4488 							   m, trace_flags);
4489 			else
4490 				print_func_help_header(iter->array_buffer, m,
4491 						       trace_flags);
4492 		}
4493 	}
4494 }
4495 
4496 static void test_ftrace_alive(struct seq_file *m)
4497 {
4498 	if (!ftrace_is_dead())
4499 		return;
4500 	seq_puts(m, "# WARNING: FUNCTION TRACING IS CORRUPTED\n"
4501 		    "#          MAY BE MISSING FUNCTION EVENTS\n");
4502 }
4503 
4504 #ifdef CONFIG_TRACER_MAX_TRACE
4505 static void show_snapshot_main_help(struct seq_file *m)
4506 {
4507 	seq_puts(m, "# echo 0 > snapshot : Clears and frees snapshot buffer\n"
4508 		    "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
4509 		    "#                      Takes a snapshot of the main buffer.\n"
4510 		    "# echo 2 > snapshot : Clears snapshot buffer (but does not allocate or free)\n"
4511 		    "#                      (Doesn't have to be '2' works with any number that\n"
4512 		    "#                       is not a '0' or '1')\n");
4513 }
4514 
4515 static void show_snapshot_percpu_help(struct seq_file *m)
4516 {
4517 	seq_puts(m, "# echo 0 > snapshot : Invalid for per_cpu snapshot file.\n");
4518 #ifdef CONFIG_RING_BUFFER_ALLOW_SWAP
4519 	seq_puts(m, "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
4520 		    "#                      Takes a snapshot of the main buffer for this cpu.\n");
4521 #else
4522 	seq_puts(m, "# echo 1 > snapshot : Not supported with this kernel.\n"
4523 		    "#                     Must use main snapshot file to allocate.\n");
4524 #endif
4525 	seq_puts(m, "# echo 2 > snapshot : Clears this cpu's snapshot buffer (but does not allocate)\n"
4526 		    "#                      (Doesn't have to be '2' works with any number that\n"
4527 		    "#                       is not a '0' or '1')\n");
4528 }
4529 
4530 static void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter)
4531 {
4532 	if (iter->tr->allocated_snapshot)
4533 		seq_puts(m, "#\n# * Snapshot is allocated *\n#\n");
4534 	else
4535 		seq_puts(m, "#\n# * Snapshot is freed *\n#\n");
4536 
4537 	seq_puts(m, "# Snapshot commands:\n");
4538 	if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
4539 		show_snapshot_main_help(m);
4540 	else
4541 		show_snapshot_percpu_help(m);
4542 }
4543 #else
4544 /* Should never be called */
4545 static inline void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter) { }
4546 #endif
4547 
4548 static int s_show(struct seq_file *m, void *v)
4549 {
4550 	struct trace_iterator *iter = v;
4551 	int ret;
4552 
4553 	if (iter->ent == NULL) {
4554 		if (iter->tr) {
4555 			seq_printf(m, "# tracer: %s\n", iter->trace->name);
4556 			seq_puts(m, "#\n");
4557 			test_ftrace_alive(m);
4558 		}
4559 		if (iter->snapshot && trace_empty(iter))
4560 			print_snapshot_help(m, iter);
4561 		else if (iter->trace && iter->trace->print_header)
4562 			iter->trace->print_header(m);
4563 		else
4564 			trace_default_header(m);
4565 
4566 	} else if (iter->leftover) {
4567 		/*
4568 		 * If we filled the seq_file buffer earlier, we
4569 		 * want to just show it now.
4570 		 */
4571 		ret = trace_print_seq(m, &iter->seq);
4572 
4573 		/* ret should this time be zero, but you never know */
4574 		iter->leftover = ret;
4575 
4576 	} else {
4577 		print_trace_line(iter);
4578 		ret = trace_print_seq(m, &iter->seq);
4579 		/*
4580 		 * If we overflow the seq_file buffer, then it will
4581 		 * ask us for this data again at start up.
4582 		 * Use that instead.
4583 		 *  ret is 0 if seq_file write succeeded.
4584 		 *        -1 otherwise.
4585 		 */
4586 		iter->leftover = ret;
4587 	}
4588 
4589 	return 0;
4590 }
4591 
4592 /*
4593  * Should be used after trace_array_get(), trace_types_lock
4594  * ensures that i_cdev was already initialized.
4595  */
4596 static inline int tracing_get_cpu(struct inode *inode)
4597 {
4598 	if (inode->i_cdev) /* See trace_create_cpu_file() */
4599 		return (long)inode->i_cdev - 1;
4600 	return RING_BUFFER_ALL_CPUS;
4601 }
4602 
4603 static const struct seq_operations tracer_seq_ops = {
4604 	.start		= s_start,
4605 	.next		= s_next,
4606 	.stop		= s_stop,
4607 	.show		= s_show,
4608 };
4609 
4610 static struct trace_iterator *
4611 __tracing_open(struct inode *inode, struct file *file, bool snapshot)
4612 {
4613 	struct trace_array *tr = inode->i_private;
4614 	struct trace_iterator *iter;
4615 	int cpu;
4616 
4617 	if (tracing_disabled)
4618 		return ERR_PTR(-ENODEV);
4619 
4620 	iter = __seq_open_private(file, &tracer_seq_ops, sizeof(*iter));
4621 	if (!iter)
4622 		return ERR_PTR(-ENOMEM);
4623 
4624 	iter->buffer_iter = kcalloc(nr_cpu_ids, sizeof(*iter->buffer_iter),
4625 				    GFP_KERNEL);
4626 	if (!iter->buffer_iter)
4627 		goto release;
4628 
4629 	/*
4630 	 * trace_find_next_entry() may need to save off iter->ent.
4631 	 * It will place it into the iter->temp buffer. As most
4632 	 * events are less than 128, allocate a buffer of that size.
4633 	 * If one is greater, then trace_find_next_entry() will
4634 	 * allocate a new buffer to adjust for the bigger iter->ent.
4635 	 * It's not critical if it fails to get allocated here.
4636 	 */
4637 	iter->temp = kmalloc(128, GFP_KERNEL);
4638 	if (iter->temp)
4639 		iter->temp_size = 128;
4640 
4641 	/*
4642 	 * trace_event_printf() may need to modify given format
4643 	 * string to replace %p with %px so that it shows real address
4644 	 * instead of hash value. However, that is only for the event
4645 	 * tracing, other tracer may not need. Defer the allocation
4646 	 * until it is needed.
4647 	 */
4648 	iter->fmt = NULL;
4649 	iter->fmt_size = 0;
4650 
4651 	/*
4652 	 * We make a copy of the current tracer to avoid concurrent
4653 	 * changes on it while we are reading.
4654 	 */
4655 	mutex_lock(&trace_types_lock);
4656 	iter->trace = kzalloc(sizeof(*iter->trace), GFP_KERNEL);
4657 	if (!iter->trace)
4658 		goto fail;
4659 
4660 	*iter->trace = *tr->current_trace;
4661 
4662 	if (!zalloc_cpumask_var(&iter->started, GFP_KERNEL))
4663 		goto fail;
4664 
4665 	iter->tr = tr;
4666 
4667 #ifdef CONFIG_TRACER_MAX_TRACE
4668 	/* Currently only the top directory has a snapshot */
4669 	if (tr->current_trace->print_max || snapshot)
4670 		iter->array_buffer = &tr->max_buffer;
4671 	else
4672 #endif
4673 		iter->array_buffer = &tr->array_buffer;
4674 	iter->snapshot = snapshot;
4675 	iter->pos = -1;
4676 	iter->cpu_file = tracing_get_cpu(inode);
4677 	mutex_init(&iter->mutex);
4678 
4679 	/* Notify the tracer early; before we stop tracing. */
4680 	if (iter->trace->open)
4681 		iter->trace->open(iter);
4682 
4683 	/* Annotate start of buffers if we had overruns */
4684 	if (ring_buffer_overruns(iter->array_buffer->buffer))
4685 		iter->iter_flags |= TRACE_FILE_ANNOTATE;
4686 
4687 	/* Output in nanoseconds only if we are using a clock in nanoseconds. */
4688 	if (trace_clocks[tr->clock_id].in_ns)
4689 		iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
4690 
4691 	/*
4692 	 * If pause-on-trace is enabled, then stop the trace while
4693 	 * dumping, unless this is the "snapshot" file
4694 	 */
4695 	if (!iter->snapshot && (tr->trace_flags & TRACE_ITER_PAUSE_ON_TRACE))
4696 		tracing_stop_tr(tr);
4697 
4698 	if (iter->cpu_file == RING_BUFFER_ALL_CPUS) {
4699 		for_each_tracing_cpu(cpu) {
4700 			iter->buffer_iter[cpu] =
4701 				ring_buffer_read_prepare(iter->array_buffer->buffer,
4702 							 cpu, GFP_KERNEL);
4703 		}
4704 		ring_buffer_read_prepare_sync();
4705 		for_each_tracing_cpu(cpu) {
4706 			ring_buffer_read_start(iter->buffer_iter[cpu]);
4707 			tracing_iter_reset(iter, cpu);
4708 		}
4709 	} else {
4710 		cpu = iter->cpu_file;
4711 		iter->buffer_iter[cpu] =
4712 			ring_buffer_read_prepare(iter->array_buffer->buffer,
4713 						 cpu, GFP_KERNEL);
4714 		ring_buffer_read_prepare_sync();
4715 		ring_buffer_read_start(iter->buffer_iter[cpu]);
4716 		tracing_iter_reset(iter, cpu);
4717 	}
4718 
4719 	mutex_unlock(&trace_types_lock);
4720 
4721 	return iter;
4722 
4723  fail:
4724 	mutex_unlock(&trace_types_lock);
4725 	kfree(iter->trace);
4726 	kfree(iter->temp);
4727 	kfree(iter->buffer_iter);
4728 release:
4729 	seq_release_private(inode, file);
4730 	return ERR_PTR(-ENOMEM);
4731 }
4732 
4733 int tracing_open_generic(struct inode *inode, struct file *filp)
4734 {
4735 	int ret;
4736 
4737 	ret = tracing_check_open_get_tr(NULL);
4738 	if (ret)
4739 		return ret;
4740 
4741 	filp->private_data = inode->i_private;
4742 	return 0;
4743 }
4744 
4745 bool tracing_is_disabled(void)
4746 {
4747 	return (tracing_disabled) ? true: false;
4748 }
4749 
4750 /*
4751  * Open and update trace_array ref count.
4752  * Must have the current trace_array passed to it.
4753  */
4754 int tracing_open_generic_tr(struct inode *inode, struct file *filp)
4755 {
4756 	struct trace_array *tr = inode->i_private;
4757 	int ret;
4758 
4759 	ret = tracing_check_open_get_tr(tr);
4760 	if (ret)
4761 		return ret;
4762 
4763 	filp->private_data = inode->i_private;
4764 
4765 	return 0;
4766 }
4767 
4768 static int tracing_release(struct inode *inode, struct file *file)
4769 {
4770 	struct trace_array *tr = inode->i_private;
4771 	struct seq_file *m = file->private_data;
4772 	struct trace_iterator *iter;
4773 	int cpu;
4774 
4775 	if (!(file->f_mode & FMODE_READ)) {
4776 		trace_array_put(tr);
4777 		return 0;
4778 	}
4779 
4780 	/* Writes do not use seq_file */
4781 	iter = m->private;
4782 	mutex_lock(&trace_types_lock);
4783 
4784 	for_each_tracing_cpu(cpu) {
4785 		if (iter->buffer_iter[cpu])
4786 			ring_buffer_read_finish(iter->buffer_iter[cpu]);
4787 	}
4788 
4789 	if (iter->trace && iter->trace->close)
4790 		iter->trace->close(iter);
4791 
4792 	if (!iter->snapshot && tr->stop_count)
4793 		/* reenable tracing if it was previously enabled */
4794 		tracing_start_tr(tr);
4795 
4796 	__trace_array_put(tr);
4797 
4798 	mutex_unlock(&trace_types_lock);
4799 
4800 	mutex_destroy(&iter->mutex);
4801 	free_cpumask_var(iter->started);
4802 	kfree(iter->fmt);
4803 	kfree(iter->temp);
4804 	kfree(iter->trace);
4805 	kfree(iter->buffer_iter);
4806 	seq_release_private(inode, file);
4807 
4808 	return 0;
4809 }
4810 
4811 static int tracing_release_generic_tr(struct inode *inode, struct file *file)
4812 {
4813 	struct trace_array *tr = inode->i_private;
4814 
4815 	trace_array_put(tr);
4816 	return 0;
4817 }
4818 
4819 static int tracing_single_release_tr(struct inode *inode, struct file *file)
4820 {
4821 	struct trace_array *tr = inode->i_private;
4822 
4823 	trace_array_put(tr);
4824 
4825 	return single_release(inode, file);
4826 }
4827 
4828 static int tracing_open(struct inode *inode, struct file *file)
4829 {
4830 	struct trace_array *tr = inode->i_private;
4831 	struct trace_iterator *iter;
4832 	int ret;
4833 
4834 	ret = tracing_check_open_get_tr(tr);
4835 	if (ret)
4836 		return ret;
4837 
4838 	/* If this file was open for write, then erase contents */
4839 	if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC)) {
4840 		int cpu = tracing_get_cpu(inode);
4841 		struct array_buffer *trace_buf = &tr->array_buffer;
4842 
4843 #ifdef CONFIG_TRACER_MAX_TRACE
4844 		if (tr->current_trace->print_max)
4845 			trace_buf = &tr->max_buffer;
4846 #endif
4847 
4848 		if (cpu == RING_BUFFER_ALL_CPUS)
4849 			tracing_reset_online_cpus(trace_buf);
4850 		else
4851 			tracing_reset_cpu(trace_buf, cpu);
4852 	}
4853 
4854 	if (file->f_mode & FMODE_READ) {
4855 		iter = __tracing_open(inode, file, false);
4856 		if (IS_ERR(iter))
4857 			ret = PTR_ERR(iter);
4858 		else if (tr->trace_flags & TRACE_ITER_LATENCY_FMT)
4859 			iter->iter_flags |= TRACE_FILE_LAT_FMT;
4860 	}
4861 
4862 	if (ret < 0)
4863 		trace_array_put(tr);
4864 
4865 	return ret;
4866 }
4867 
4868 /*
4869  * Some tracers are not suitable for instance buffers.
4870  * A tracer is always available for the global array (toplevel)
4871  * or if it explicitly states that it is.
4872  */
4873 static bool
4874 trace_ok_for_array(struct tracer *t, struct trace_array *tr)
4875 {
4876 	return (tr->flags & TRACE_ARRAY_FL_GLOBAL) || t->allow_instances;
4877 }
4878 
4879 /* Find the next tracer that this trace array may use */
4880 static struct tracer *
4881 get_tracer_for_array(struct trace_array *tr, struct tracer *t)
4882 {
4883 	while (t && !trace_ok_for_array(t, tr))
4884 		t = t->next;
4885 
4886 	return t;
4887 }
4888 
4889 static void *
4890 t_next(struct seq_file *m, void *v, loff_t *pos)
4891 {
4892 	struct trace_array *tr = m->private;
4893 	struct tracer *t = v;
4894 
4895 	(*pos)++;
4896 
4897 	if (t)
4898 		t = get_tracer_for_array(tr, t->next);
4899 
4900 	return t;
4901 }
4902 
4903 static void *t_start(struct seq_file *m, loff_t *pos)
4904 {
4905 	struct trace_array *tr = m->private;
4906 	struct tracer *t;
4907 	loff_t l = 0;
4908 
4909 	mutex_lock(&trace_types_lock);
4910 
4911 	t = get_tracer_for_array(tr, trace_types);
4912 	for (; t && l < *pos; t = t_next(m, t, &l))
4913 			;
4914 
4915 	return t;
4916 }
4917 
4918 static void t_stop(struct seq_file *m, void *p)
4919 {
4920 	mutex_unlock(&trace_types_lock);
4921 }
4922 
4923 static int t_show(struct seq_file *m, void *v)
4924 {
4925 	struct tracer *t = v;
4926 
4927 	if (!t)
4928 		return 0;
4929 
4930 	seq_puts(m, t->name);
4931 	if (t->next)
4932 		seq_putc(m, ' ');
4933 	else
4934 		seq_putc(m, '\n');
4935 
4936 	return 0;
4937 }
4938 
4939 static const struct seq_operations show_traces_seq_ops = {
4940 	.start		= t_start,
4941 	.next		= t_next,
4942 	.stop		= t_stop,
4943 	.show		= t_show,
4944 };
4945 
4946 static int show_traces_open(struct inode *inode, struct file *file)
4947 {
4948 	struct trace_array *tr = inode->i_private;
4949 	struct seq_file *m;
4950 	int ret;
4951 
4952 	ret = tracing_check_open_get_tr(tr);
4953 	if (ret)
4954 		return ret;
4955 
4956 	ret = seq_open(file, &show_traces_seq_ops);
4957 	if (ret) {
4958 		trace_array_put(tr);
4959 		return ret;
4960 	}
4961 
4962 	m = file->private_data;
4963 	m->private = tr;
4964 
4965 	return 0;
4966 }
4967 
4968 static int show_traces_release(struct inode *inode, struct file *file)
4969 {
4970 	struct trace_array *tr = inode->i_private;
4971 
4972 	trace_array_put(tr);
4973 	return seq_release(inode, file);
4974 }
4975 
4976 static ssize_t
4977 tracing_write_stub(struct file *filp, const char __user *ubuf,
4978 		   size_t count, loff_t *ppos)
4979 {
4980 	return count;
4981 }
4982 
4983 loff_t tracing_lseek(struct file *file, loff_t offset, int whence)
4984 {
4985 	int ret;
4986 
4987 	if (file->f_mode & FMODE_READ)
4988 		ret = seq_lseek(file, offset, whence);
4989 	else
4990 		file->f_pos = ret = 0;
4991 
4992 	return ret;
4993 }
4994 
4995 static const struct file_operations tracing_fops = {
4996 	.open		= tracing_open,
4997 	.read		= seq_read,
4998 	.write		= tracing_write_stub,
4999 	.llseek		= tracing_lseek,
5000 	.release	= tracing_release,
5001 };
5002 
5003 static const struct file_operations show_traces_fops = {
5004 	.open		= show_traces_open,
5005 	.read		= seq_read,
5006 	.llseek		= seq_lseek,
5007 	.release	= show_traces_release,
5008 };
5009 
5010 static ssize_t
5011 tracing_cpumask_read(struct file *filp, char __user *ubuf,
5012 		     size_t count, loff_t *ppos)
5013 {
5014 	struct trace_array *tr = file_inode(filp)->i_private;
5015 	char *mask_str;
5016 	int len;
5017 
5018 	len = snprintf(NULL, 0, "%*pb\n",
5019 		       cpumask_pr_args(tr->tracing_cpumask)) + 1;
5020 	mask_str = kmalloc(len, GFP_KERNEL);
5021 	if (!mask_str)
5022 		return -ENOMEM;
5023 
5024 	len = snprintf(mask_str, len, "%*pb\n",
5025 		       cpumask_pr_args(tr->tracing_cpumask));
5026 	if (len >= count) {
5027 		count = -EINVAL;
5028 		goto out_err;
5029 	}
5030 	count = simple_read_from_buffer(ubuf, count, ppos, mask_str, len);
5031 
5032 out_err:
5033 	kfree(mask_str);
5034 
5035 	return count;
5036 }
5037 
5038 int tracing_set_cpumask(struct trace_array *tr,
5039 			cpumask_var_t tracing_cpumask_new)
5040 {
5041 	int cpu;
5042 
5043 	if (!tr)
5044 		return -EINVAL;
5045 
5046 	local_irq_disable();
5047 	arch_spin_lock(&tr->max_lock);
5048 	for_each_tracing_cpu(cpu) {
5049 		/*
5050 		 * Increase/decrease the disabled counter if we are
5051 		 * about to flip a bit in the cpumask:
5052 		 */
5053 		if (cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
5054 				!cpumask_test_cpu(cpu, tracing_cpumask_new)) {
5055 			atomic_inc(&per_cpu_ptr(tr->array_buffer.data, cpu)->disabled);
5056 			ring_buffer_record_disable_cpu(tr->array_buffer.buffer, cpu);
5057 		}
5058 		if (!cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
5059 				cpumask_test_cpu(cpu, tracing_cpumask_new)) {
5060 			atomic_dec(&per_cpu_ptr(tr->array_buffer.data, cpu)->disabled);
5061 			ring_buffer_record_enable_cpu(tr->array_buffer.buffer, cpu);
5062 		}
5063 	}
5064 	arch_spin_unlock(&tr->max_lock);
5065 	local_irq_enable();
5066 
5067 	cpumask_copy(tr->tracing_cpumask, tracing_cpumask_new);
5068 
5069 	return 0;
5070 }
5071 
5072 static ssize_t
5073 tracing_cpumask_write(struct file *filp, const char __user *ubuf,
5074 		      size_t count, loff_t *ppos)
5075 {
5076 	struct trace_array *tr = file_inode(filp)->i_private;
5077 	cpumask_var_t tracing_cpumask_new;
5078 	int err;
5079 
5080 	if (!zalloc_cpumask_var(&tracing_cpumask_new, GFP_KERNEL))
5081 		return -ENOMEM;
5082 
5083 	err = cpumask_parse_user(ubuf, count, tracing_cpumask_new);
5084 	if (err)
5085 		goto err_free;
5086 
5087 	err = tracing_set_cpumask(tr, tracing_cpumask_new);
5088 	if (err)
5089 		goto err_free;
5090 
5091 	free_cpumask_var(tracing_cpumask_new);
5092 
5093 	return count;
5094 
5095 err_free:
5096 	free_cpumask_var(tracing_cpumask_new);
5097 
5098 	return err;
5099 }
5100 
5101 static const struct file_operations tracing_cpumask_fops = {
5102 	.open		= tracing_open_generic_tr,
5103 	.read		= tracing_cpumask_read,
5104 	.write		= tracing_cpumask_write,
5105 	.release	= tracing_release_generic_tr,
5106 	.llseek		= generic_file_llseek,
5107 };
5108 
5109 static int tracing_trace_options_show(struct seq_file *m, void *v)
5110 {
5111 	struct tracer_opt *trace_opts;
5112 	struct trace_array *tr = m->private;
5113 	u32 tracer_flags;
5114 	int i;
5115 
5116 	mutex_lock(&trace_types_lock);
5117 	tracer_flags = tr->current_trace->flags->val;
5118 	trace_opts = tr->current_trace->flags->opts;
5119 
5120 	for (i = 0; trace_options[i]; i++) {
5121 		if (tr->trace_flags & (1 << i))
5122 			seq_printf(m, "%s\n", trace_options[i]);
5123 		else
5124 			seq_printf(m, "no%s\n", trace_options[i]);
5125 	}
5126 
5127 	for (i = 0; trace_opts[i].name; i++) {
5128 		if (tracer_flags & trace_opts[i].bit)
5129 			seq_printf(m, "%s\n", trace_opts[i].name);
5130 		else
5131 			seq_printf(m, "no%s\n", trace_opts[i].name);
5132 	}
5133 	mutex_unlock(&trace_types_lock);
5134 
5135 	return 0;
5136 }
5137 
5138 static int __set_tracer_option(struct trace_array *tr,
5139 			       struct tracer_flags *tracer_flags,
5140 			       struct tracer_opt *opts, int neg)
5141 {
5142 	struct tracer *trace = tracer_flags->trace;
5143 	int ret;
5144 
5145 	ret = trace->set_flag(tr, tracer_flags->val, opts->bit, !neg);
5146 	if (ret)
5147 		return ret;
5148 
5149 	if (neg)
5150 		tracer_flags->val &= ~opts->bit;
5151 	else
5152 		tracer_flags->val |= opts->bit;
5153 	return 0;
5154 }
5155 
5156 /* Try to assign a tracer specific option */
5157 static int set_tracer_option(struct trace_array *tr, char *cmp, int neg)
5158 {
5159 	struct tracer *trace = tr->current_trace;
5160 	struct tracer_flags *tracer_flags = trace->flags;
5161 	struct tracer_opt *opts = NULL;
5162 	int i;
5163 
5164 	for (i = 0; tracer_flags->opts[i].name; i++) {
5165 		opts = &tracer_flags->opts[i];
5166 
5167 		if (strcmp(cmp, opts->name) == 0)
5168 			return __set_tracer_option(tr, trace->flags, opts, neg);
5169 	}
5170 
5171 	return -EINVAL;
5172 }
5173 
5174 /* Some tracers require overwrite to stay enabled */
5175 int trace_keep_overwrite(struct tracer *tracer, u32 mask, int set)
5176 {
5177 	if (tracer->enabled && (mask & TRACE_ITER_OVERWRITE) && !set)
5178 		return -1;
5179 
5180 	return 0;
5181 }
5182 
5183 int set_tracer_flag(struct trace_array *tr, unsigned int mask, int enabled)
5184 {
5185 	if ((mask == TRACE_ITER_RECORD_TGID) ||
5186 	    (mask == TRACE_ITER_RECORD_CMD))
5187 		lockdep_assert_held(&event_mutex);
5188 
5189 	/* do nothing if flag is already set */
5190 	if (!!(tr->trace_flags & mask) == !!enabled)
5191 		return 0;
5192 
5193 	/* Give the tracer a chance to approve the change */
5194 	if (tr->current_trace->flag_changed)
5195 		if (tr->current_trace->flag_changed(tr, mask, !!enabled))
5196 			return -EINVAL;
5197 
5198 	if (enabled)
5199 		tr->trace_flags |= mask;
5200 	else
5201 		tr->trace_flags &= ~mask;
5202 
5203 	if (mask == TRACE_ITER_RECORD_CMD)
5204 		trace_event_enable_cmd_record(enabled);
5205 
5206 	if (mask == TRACE_ITER_RECORD_TGID) {
5207 		if (!tgid_map)
5208 			tgid_map = kvcalloc(PID_MAX_DEFAULT + 1,
5209 					   sizeof(*tgid_map),
5210 					   GFP_KERNEL);
5211 		if (!tgid_map) {
5212 			tr->trace_flags &= ~TRACE_ITER_RECORD_TGID;
5213 			return -ENOMEM;
5214 		}
5215 
5216 		trace_event_enable_tgid_record(enabled);
5217 	}
5218 
5219 	if (mask == TRACE_ITER_EVENT_FORK)
5220 		trace_event_follow_fork(tr, enabled);
5221 
5222 	if (mask == TRACE_ITER_FUNC_FORK)
5223 		ftrace_pid_follow_fork(tr, enabled);
5224 
5225 	if (mask == TRACE_ITER_OVERWRITE) {
5226 		ring_buffer_change_overwrite(tr->array_buffer.buffer, enabled);
5227 #ifdef CONFIG_TRACER_MAX_TRACE
5228 		ring_buffer_change_overwrite(tr->max_buffer.buffer, enabled);
5229 #endif
5230 	}
5231 
5232 	if (mask == TRACE_ITER_PRINTK) {
5233 		trace_printk_start_stop_comm(enabled);
5234 		trace_printk_control(enabled);
5235 	}
5236 
5237 	return 0;
5238 }
5239 
5240 int trace_set_options(struct trace_array *tr, char *option)
5241 {
5242 	char *cmp;
5243 	int neg = 0;
5244 	int ret;
5245 	size_t orig_len = strlen(option);
5246 	int len;
5247 
5248 	cmp = strstrip(option);
5249 
5250 	len = str_has_prefix(cmp, "no");
5251 	if (len)
5252 		neg = 1;
5253 
5254 	cmp += len;
5255 
5256 	mutex_lock(&event_mutex);
5257 	mutex_lock(&trace_types_lock);
5258 
5259 	ret = match_string(trace_options, -1, cmp);
5260 	/* If no option could be set, test the specific tracer options */
5261 	if (ret < 0)
5262 		ret = set_tracer_option(tr, cmp, neg);
5263 	else
5264 		ret = set_tracer_flag(tr, 1 << ret, !neg);
5265 
5266 	mutex_unlock(&trace_types_lock);
5267 	mutex_unlock(&event_mutex);
5268 
5269 	/*
5270 	 * If the first trailing whitespace is replaced with '\0' by strstrip,
5271 	 * turn it back into a space.
5272 	 */
5273 	if (orig_len > strlen(option))
5274 		option[strlen(option)] = ' ';
5275 
5276 	return ret;
5277 }
5278 
5279 static void __init apply_trace_boot_options(void)
5280 {
5281 	char *buf = trace_boot_options_buf;
5282 	char *option;
5283 
5284 	while (true) {
5285 		option = strsep(&buf, ",");
5286 
5287 		if (!option)
5288 			break;
5289 
5290 		if (*option)
5291 			trace_set_options(&global_trace, option);
5292 
5293 		/* Put back the comma to allow this to be called again */
5294 		if (buf)
5295 			*(buf - 1) = ',';
5296 	}
5297 }
5298 
5299 static ssize_t
5300 tracing_trace_options_write(struct file *filp, const char __user *ubuf,
5301 			size_t cnt, loff_t *ppos)
5302 {
5303 	struct seq_file *m = filp->private_data;
5304 	struct trace_array *tr = m->private;
5305 	char buf[64];
5306 	int ret;
5307 
5308 	if (cnt >= sizeof(buf))
5309 		return -EINVAL;
5310 
5311 	if (copy_from_user(buf, ubuf, cnt))
5312 		return -EFAULT;
5313 
5314 	buf[cnt] = 0;
5315 
5316 	ret = trace_set_options(tr, buf);
5317 	if (ret < 0)
5318 		return ret;
5319 
5320 	*ppos += cnt;
5321 
5322 	return cnt;
5323 }
5324 
5325 static int tracing_trace_options_open(struct inode *inode, struct file *file)
5326 {
5327 	struct trace_array *tr = inode->i_private;
5328 	int ret;
5329 
5330 	ret = tracing_check_open_get_tr(tr);
5331 	if (ret)
5332 		return ret;
5333 
5334 	ret = single_open(file, tracing_trace_options_show, inode->i_private);
5335 	if (ret < 0)
5336 		trace_array_put(tr);
5337 
5338 	return ret;
5339 }
5340 
5341 static const struct file_operations tracing_iter_fops = {
5342 	.open		= tracing_trace_options_open,
5343 	.read		= seq_read,
5344 	.llseek		= seq_lseek,
5345 	.release	= tracing_single_release_tr,
5346 	.write		= tracing_trace_options_write,
5347 };
5348 
5349 static const char readme_msg[] =
5350 	"tracing mini-HOWTO:\n\n"
5351 	"# echo 0 > tracing_on : quick way to disable tracing\n"
5352 	"# echo 1 > tracing_on : quick way to re-enable tracing\n\n"
5353 	" Important files:\n"
5354 	"  trace\t\t\t- The static contents of the buffer\n"
5355 	"\t\t\t  To clear the buffer write into this file: echo > trace\n"
5356 	"  trace_pipe\t\t- A consuming read to see the contents of the buffer\n"
5357 	"  current_tracer\t- function and latency tracers\n"
5358 	"  available_tracers\t- list of configured tracers for current_tracer\n"
5359 	"  error_log\t- error log for failed commands (that support it)\n"
5360 	"  buffer_size_kb\t- view and modify size of per cpu buffer\n"
5361 	"  buffer_total_size_kb  - view total size of all cpu buffers\n\n"
5362 	"  trace_clock\t\t-change the clock used to order events\n"
5363 	"       local:   Per cpu clock but may not be synced across CPUs\n"
5364 	"      global:   Synced across CPUs but slows tracing down.\n"
5365 	"     counter:   Not a clock, but just an increment\n"
5366 	"      uptime:   Jiffy counter from time of boot\n"
5367 	"        perf:   Same clock that perf events use\n"
5368 #ifdef CONFIG_X86_64
5369 	"     x86-tsc:   TSC cycle counter\n"
5370 #endif
5371 	"\n  timestamp_mode\t-view the mode used to timestamp events\n"
5372 	"       delta:   Delta difference against a buffer-wide timestamp\n"
5373 	"    absolute:   Absolute (standalone) timestamp\n"
5374 	"\n  trace_marker\t\t- Writes into this file writes into the kernel buffer\n"
5375 	"\n  trace_marker_raw\t\t- Writes into this file writes binary data into the kernel buffer\n"
5376 	"  tracing_cpumask\t- Limit which CPUs to trace\n"
5377 	"  instances\t\t- Make sub-buffers with: mkdir instances/foo\n"
5378 	"\t\t\t  Remove sub-buffer with rmdir\n"
5379 	"  trace_options\t\t- Set format or modify how tracing happens\n"
5380 	"\t\t\t  Disable an option by prefixing 'no' to the\n"
5381 	"\t\t\t  option name\n"
5382 	"  saved_cmdlines_size\t- echo command number in here to store comm-pid list\n"
5383 #ifdef CONFIG_DYNAMIC_FTRACE
5384 	"\n  available_filter_functions - list of functions that can be filtered on\n"
5385 	"  set_ftrace_filter\t- echo function name in here to only trace these\n"
5386 	"\t\t\t  functions\n"
5387 	"\t     accepts: func_full_name or glob-matching-pattern\n"
5388 	"\t     modules: Can select a group via module\n"
5389 	"\t      Format: :mod:<module-name>\n"
5390 	"\t     example: echo :mod:ext3 > set_ftrace_filter\n"
5391 	"\t    triggers: a command to perform when function is hit\n"
5392 	"\t      Format: <function>:<trigger>[:count]\n"
5393 	"\t     trigger: traceon, traceoff\n"
5394 	"\t\t      enable_event:<system>:<event>\n"
5395 	"\t\t      disable_event:<system>:<event>\n"
5396 #ifdef CONFIG_STACKTRACE
5397 	"\t\t      stacktrace\n"
5398 #endif
5399 #ifdef CONFIG_TRACER_SNAPSHOT
5400 	"\t\t      snapshot\n"
5401 #endif
5402 	"\t\t      dump\n"
5403 	"\t\t      cpudump\n"
5404 	"\t     example: echo do_fault:traceoff > set_ftrace_filter\n"
5405 	"\t              echo do_trap:traceoff:3 > set_ftrace_filter\n"
5406 	"\t     The first one will disable tracing every time do_fault is hit\n"
5407 	"\t     The second will disable tracing at most 3 times when do_trap is hit\n"
5408 	"\t       The first time do trap is hit and it disables tracing, the\n"
5409 	"\t       counter will decrement to 2. If tracing is already disabled,\n"
5410 	"\t       the counter will not decrement. It only decrements when the\n"
5411 	"\t       trigger did work\n"
5412 	"\t     To remove trigger without count:\n"
5413 	"\t       echo '!<function>:<trigger> > set_ftrace_filter\n"
5414 	"\t     To remove trigger with a count:\n"
5415 	"\t       echo '!<function>:<trigger>:0 > set_ftrace_filter\n"
5416 	"  set_ftrace_notrace\t- echo function name in here to never trace.\n"
5417 	"\t    accepts: func_full_name, *func_end, func_begin*, *func_middle*\n"
5418 	"\t    modules: Can select a group via module command :mod:\n"
5419 	"\t    Does not accept triggers\n"
5420 #endif /* CONFIG_DYNAMIC_FTRACE */
5421 #ifdef CONFIG_FUNCTION_TRACER
5422 	"  set_ftrace_pid\t- Write pid(s) to only function trace those pids\n"
5423 	"\t\t    (function)\n"
5424 	"  set_ftrace_notrace_pid\t- Write pid(s) to not function trace those pids\n"
5425 	"\t\t    (function)\n"
5426 #endif
5427 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
5428 	"  set_graph_function\t- Trace the nested calls of a function (function_graph)\n"
5429 	"  set_graph_notrace\t- Do not trace the nested calls of a function (function_graph)\n"
5430 	"  max_graph_depth\t- Trace a limited depth of nested calls (0 is unlimited)\n"
5431 #endif
5432 #ifdef CONFIG_TRACER_SNAPSHOT
5433 	"\n  snapshot\t\t- Like 'trace' but shows the content of the static\n"
5434 	"\t\t\t  snapshot buffer. Read the contents for more\n"
5435 	"\t\t\t  information\n"
5436 #endif
5437 #ifdef CONFIG_STACK_TRACER
5438 	"  stack_trace\t\t- Shows the max stack trace when active\n"
5439 	"  stack_max_size\t- Shows current max stack size that was traced\n"
5440 	"\t\t\t  Write into this file to reset the max size (trigger a\n"
5441 	"\t\t\t  new trace)\n"
5442 #ifdef CONFIG_DYNAMIC_FTRACE
5443 	"  stack_trace_filter\t- Like set_ftrace_filter but limits what stack_trace\n"
5444 	"\t\t\t  traces\n"
5445 #endif
5446 #endif /* CONFIG_STACK_TRACER */
5447 #ifdef CONFIG_DYNAMIC_EVENTS
5448 	"  dynamic_events\t\t- Create/append/remove/show the generic dynamic events\n"
5449 	"\t\t\t  Write into this file to define/undefine new trace events.\n"
5450 #endif
5451 #ifdef CONFIG_KPROBE_EVENTS
5452 	"  kprobe_events\t\t- Create/append/remove/show the kernel dynamic events\n"
5453 	"\t\t\t  Write into this file to define/undefine new trace events.\n"
5454 #endif
5455 #ifdef CONFIG_UPROBE_EVENTS
5456 	"  uprobe_events\t\t- Create/append/remove/show the userspace dynamic events\n"
5457 	"\t\t\t  Write into this file to define/undefine new trace events.\n"
5458 #endif
5459 #if defined(CONFIG_KPROBE_EVENTS) || defined(CONFIG_UPROBE_EVENTS)
5460 	"\t  accepts: event-definitions (one definition per line)\n"
5461 	"\t   Format: p[:[<group>/]<event>] <place> [<args>]\n"
5462 	"\t           r[maxactive][:[<group>/]<event>] <place> [<args>]\n"
5463 #ifdef CONFIG_HIST_TRIGGERS
5464 	"\t           s:[synthetic/]<event> <field> [<field>]\n"
5465 #endif
5466 	"\t           -:[<group>/]<event>\n"
5467 #ifdef CONFIG_KPROBE_EVENTS
5468 	"\t    place: [<module>:]<symbol>[+<offset>]|<memaddr>\n"
5469   "place (kretprobe): [<module>:]<symbol>[+<offset>]%return|<memaddr>\n"
5470 #endif
5471 #ifdef CONFIG_UPROBE_EVENTS
5472   "   place (uprobe): <path>:<offset>[%return][(ref_ctr_offset)]\n"
5473 #endif
5474 	"\t     args: <name>=fetcharg[:type]\n"
5475 	"\t fetcharg: %<register>, @<address>, @<symbol>[+|-<offset>],\n"
5476 #ifdef CONFIG_HAVE_FUNCTION_ARG_ACCESS_API
5477 	"\t           $stack<index>, $stack, $retval, $comm, $arg<N>,\n"
5478 #else
5479 	"\t           $stack<index>, $stack, $retval, $comm,\n"
5480 #endif
5481 	"\t           +|-[u]<offset>(<fetcharg>), \\imm-value, \\\"imm-string\"\n"
5482 	"\t     type: s8/16/32/64, u8/16/32/64, x8/16/32/64, string, symbol,\n"
5483 	"\t           b<bit-width>@<bit-offset>/<container-size>, ustring,\n"
5484 	"\t           <type>\\[<array-size>\\]\n"
5485 #ifdef CONFIG_HIST_TRIGGERS
5486 	"\t    field: <stype> <name>;\n"
5487 	"\t    stype: u8/u16/u32/u64, s8/s16/s32/s64, pid_t,\n"
5488 	"\t           [unsigned] char/int/long\n"
5489 #endif
5490 #endif
5491 	"  events/\t\t- Directory containing all trace event subsystems:\n"
5492 	"      enable\t\t- Write 0/1 to enable/disable tracing of all events\n"
5493 	"  events/<system>/\t- Directory containing all trace events for <system>:\n"
5494 	"      enable\t\t- Write 0/1 to enable/disable tracing of all <system>\n"
5495 	"\t\t\t  events\n"
5496 	"      filter\t\t- If set, only events passing filter are traced\n"
5497 	"  events/<system>/<event>/\t- Directory containing control files for\n"
5498 	"\t\t\t  <event>:\n"
5499 	"      enable\t\t- Write 0/1 to enable/disable tracing of <event>\n"
5500 	"      filter\t\t- If set, only events passing filter are traced\n"
5501 	"      trigger\t\t- If set, a command to perform when event is hit\n"
5502 	"\t    Format: <trigger>[:count][if <filter>]\n"
5503 	"\t   trigger: traceon, traceoff\n"
5504 	"\t            enable_event:<system>:<event>\n"
5505 	"\t            disable_event:<system>:<event>\n"
5506 #ifdef CONFIG_HIST_TRIGGERS
5507 	"\t            enable_hist:<system>:<event>\n"
5508 	"\t            disable_hist:<system>:<event>\n"
5509 #endif
5510 #ifdef CONFIG_STACKTRACE
5511 	"\t\t    stacktrace\n"
5512 #endif
5513 #ifdef CONFIG_TRACER_SNAPSHOT
5514 	"\t\t    snapshot\n"
5515 #endif
5516 #ifdef CONFIG_HIST_TRIGGERS
5517 	"\t\t    hist (see below)\n"
5518 #endif
5519 	"\t   example: echo traceoff > events/block/block_unplug/trigger\n"
5520 	"\t            echo traceoff:3 > events/block/block_unplug/trigger\n"
5521 	"\t            echo 'enable_event:kmem:kmalloc:3 if nr_rq > 1' > \\\n"
5522 	"\t                  events/block/block_unplug/trigger\n"
5523 	"\t   The first disables tracing every time block_unplug is hit.\n"
5524 	"\t   The second disables tracing the first 3 times block_unplug is hit.\n"
5525 	"\t   The third enables the kmalloc event the first 3 times block_unplug\n"
5526 	"\t     is hit and has value of greater than 1 for the 'nr_rq' event field.\n"
5527 	"\t   Like function triggers, the counter is only decremented if it\n"
5528 	"\t    enabled or disabled tracing.\n"
5529 	"\t   To remove a trigger without a count:\n"
5530 	"\t     echo '!<trigger> > <system>/<event>/trigger\n"
5531 	"\t   To remove a trigger with a count:\n"
5532 	"\t     echo '!<trigger>:0 > <system>/<event>/trigger\n"
5533 	"\t   Filters can be ignored when removing a trigger.\n"
5534 #ifdef CONFIG_HIST_TRIGGERS
5535 	"      hist trigger\t- If set, event hits are aggregated into a hash table\n"
5536 	"\t    Format: hist:keys=<field1[,field2,...]>\n"
5537 	"\t            [:values=<field1[,field2,...]>]\n"
5538 	"\t            [:sort=<field1[,field2,...]>]\n"
5539 	"\t            [:size=#entries]\n"
5540 	"\t            [:pause][:continue][:clear]\n"
5541 	"\t            [:name=histname1]\n"
5542 	"\t            [:<handler>.<action>]\n"
5543 	"\t            [if <filter>]\n\n"
5544 	"\t    When a matching event is hit, an entry is added to a hash\n"
5545 	"\t    table using the key(s) and value(s) named, and the value of a\n"
5546 	"\t    sum called 'hitcount' is incremented.  Keys and values\n"
5547 	"\t    correspond to fields in the event's format description.  Keys\n"
5548 	"\t    can be any field, or the special string 'stacktrace'.\n"
5549 	"\t    Compound keys consisting of up to two fields can be specified\n"
5550 	"\t    by the 'keys' keyword.  Values must correspond to numeric\n"
5551 	"\t    fields.  Sort keys consisting of up to two fields can be\n"
5552 	"\t    specified using the 'sort' keyword.  The sort direction can\n"
5553 	"\t    be modified by appending '.descending' or '.ascending' to a\n"
5554 	"\t    sort field.  The 'size' parameter can be used to specify more\n"
5555 	"\t    or fewer than the default 2048 entries for the hashtable size.\n"
5556 	"\t    If a hist trigger is given a name using the 'name' parameter,\n"
5557 	"\t    its histogram data will be shared with other triggers of the\n"
5558 	"\t    same name, and trigger hits will update this common data.\n\n"
5559 	"\t    Reading the 'hist' file for the event will dump the hash\n"
5560 	"\t    table in its entirety to stdout.  If there are multiple hist\n"
5561 	"\t    triggers attached to an event, there will be a table for each\n"
5562 	"\t    trigger in the output.  The table displayed for a named\n"
5563 	"\t    trigger will be the same as any other instance having the\n"
5564 	"\t    same name.  The default format used to display a given field\n"
5565 	"\t    can be modified by appending any of the following modifiers\n"
5566 	"\t    to the field name, as applicable:\n\n"
5567 	"\t            .hex        display a number as a hex value\n"
5568 	"\t            .sym        display an address as a symbol\n"
5569 	"\t            .sym-offset display an address as a symbol and offset\n"
5570 	"\t            .execname   display a common_pid as a program name\n"
5571 	"\t            .syscall    display a syscall id as a syscall name\n"
5572 	"\t            .log2       display log2 value rather than raw number\n"
5573 	"\t            .usecs      display a common_timestamp in microseconds\n\n"
5574 	"\t    The 'pause' parameter can be used to pause an existing hist\n"
5575 	"\t    trigger or to start a hist trigger but not log any events\n"
5576 	"\t    until told to do so.  'continue' can be used to start or\n"
5577 	"\t    restart a paused hist trigger.\n\n"
5578 	"\t    The 'clear' parameter will clear the contents of a running\n"
5579 	"\t    hist trigger and leave its current paused/active state\n"
5580 	"\t    unchanged.\n\n"
5581 	"\t    The enable_hist and disable_hist triggers can be used to\n"
5582 	"\t    have one event conditionally start and stop another event's\n"
5583 	"\t    already-attached hist trigger.  The syntax is analogous to\n"
5584 	"\t    the enable_event and disable_event triggers.\n\n"
5585 	"\t    Hist trigger handlers and actions are executed whenever a\n"
5586 	"\t    a histogram entry is added or updated.  They take the form:\n\n"
5587 	"\t        <handler>.<action>\n\n"
5588 	"\t    The available handlers are:\n\n"
5589 	"\t        onmatch(matching.event)  - invoke on addition or update\n"
5590 	"\t        onmax(var)               - invoke if var exceeds current max\n"
5591 	"\t        onchange(var)            - invoke action if var changes\n\n"
5592 	"\t    The available actions are:\n\n"
5593 	"\t        trace(<synthetic_event>,param list)  - generate synthetic event\n"
5594 	"\t        save(field,...)                      - save current event fields\n"
5595 #ifdef CONFIG_TRACER_SNAPSHOT
5596 	"\t        snapshot()                           - snapshot the trace buffer\n\n"
5597 #endif
5598 #ifdef CONFIG_SYNTH_EVENTS
5599 	"  events/synthetic_events\t- Create/append/remove/show synthetic events\n"
5600 	"\t  Write into this file to define/undefine new synthetic events.\n"
5601 	"\t     example: echo 'myevent u64 lat; char name[]' >> synthetic_events\n"
5602 #endif
5603 #endif
5604 ;
5605 
5606 static ssize_t
5607 tracing_readme_read(struct file *filp, char __user *ubuf,
5608 		       size_t cnt, loff_t *ppos)
5609 {
5610 	return simple_read_from_buffer(ubuf, cnt, ppos,
5611 					readme_msg, strlen(readme_msg));
5612 }
5613 
5614 static const struct file_operations tracing_readme_fops = {
5615 	.open		= tracing_open_generic,
5616 	.read		= tracing_readme_read,
5617 	.llseek		= generic_file_llseek,
5618 };
5619 
5620 static void *saved_tgids_next(struct seq_file *m, void *v, loff_t *pos)
5621 {
5622 	int *ptr = v;
5623 
5624 	if (*pos || m->count)
5625 		ptr++;
5626 
5627 	(*pos)++;
5628 
5629 	for (; ptr <= &tgid_map[PID_MAX_DEFAULT]; ptr++) {
5630 		if (trace_find_tgid(*ptr))
5631 			return ptr;
5632 	}
5633 
5634 	return NULL;
5635 }
5636 
5637 static void *saved_tgids_start(struct seq_file *m, loff_t *pos)
5638 {
5639 	void *v;
5640 	loff_t l = 0;
5641 
5642 	if (!tgid_map)
5643 		return NULL;
5644 
5645 	v = &tgid_map[0];
5646 	while (l <= *pos) {
5647 		v = saved_tgids_next(m, v, &l);
5648 		if (!v)
5649 			return NULL;
5650 	}
5651 
5652 	return v;
5653 }
5654 
5655 static void saved_tgids_stop(struct seq_file *m, void *v)
5656 {
5657 }
5658 
5659 static int saved_tgids_show(struct seq_file *m, void *v)
5660 {
5661 	int pid = (int *)v - tgid_map;
5662 
5663 	seq_printf(m, "%d %d\n", pid, trace_find_tgid(pid));
5664 	return 0;
5665 }
5666 
5667 static const struct seq_operations tracing_saved_tgids_seq_ops = {
5668 	.start		= saved_tgids_start,
5669 	.stop		= saved_tgids_stop,
5670 	.next		= saved_tgids_next,
5671 	.show		= saved_tgids_show,
5672 };
5673 
5674 static int tracing_saved_tgids_open(struct inode *inode, struct file *filp)
5675 {
5676 	int ret;
5677 
5678 	ret = tracing_check_open_get_tr(NULL);
5679 	if (ret)
5680 		return ret;
5681 
5682 	return seq_open(filp, &tracing_saved_tgids_seq_ops);
5683 }
5684 
5685 
5686 static const struct file_operations tracing_saved_tgids_fops = {
5687 	.open		= tracing_saved_tgids_open,
5688 	.read		= seq_read,
5689 	.llseek		= seq_lseek,
5690 	.release	= seq_release,
5691 };
5692 
5693 static void *saved_cmdlines_next(struct seq_file *m, void *v, loff_t *pos)
5694 {
5695 	unsigned int *ptr = v;
5696 
5697 	if (*pos || m->count)
5698 		ptr++;
5699 
5700 	(*pos)++;
5701 
5702 	for (; ptr < &savedcmd->map_cmdline_to_pid[savedcmd->cmdline_num];
5703 	     ptr++) {
5704 		if (*ptr == -1 || *ptr == NO_CMDLINE_MAP)
5705 			continue;
5706 
5707 		return ptr;
5708 	}
5709 
5710 	return NULL;
5711 }
5712 
5713 static void *saved_cmdlines_start(struct seq_file *m, loff_t *pos)
5714 {
5715 	void *v;
5716 	loff_t l = 0;
5717 
5718 	preempt_disable();
5719 	arch_spin_lock(&trace_cmdline_lock);
5720 
5721 	v = &savedcmd->map_cmdline_to_pid[0];
5722 	while (l <= *pos) {
5723 		v = saved_cmdlines_next(m, v, &l);
5724 		if (!v)
5725 			return NULL;
5726 	}
5727 
5728 	return v;
5729 }
5730 
5731 static void saved_cmdlines_stop(struct seq_file *m, void *v)
5732 {
5733 	arch_spin_unlock(&trace_cmdline_lock);
5734 	preempt_enable();
5735 }
5736 
5737 static int saved_cmdlines_show(struct seq_file *m, void *v)
5738 {
5739 	char buf[TASK_COMM_LEN];
5740 	unsigned int *pid = v;
5741 
5742 	__trace_find_cmdline(*pid, buf);
5743 	seq_printf(m, "%d %s\n", *pid, buf);
5744 	return 0;
5745 }
5746 
5747 static const struct seq_operations tracing_saved_cmdlines_seq_ops = {
5748 	.start		= saved_cmdlines_start,
5749 	.next		= saved_cmdlines_next,
5750 	.stop		= saved_cmdlines_stop,
5751 	.show		= saved_cmdlines_show,
5752 };
5753 
5754 static int tracing_saved_cmdlines_open(struct inode *inode, struct file *filp)
5755 {
5756 	int ret;
5757 
5758 	ret = tracing_check_open_get_tr(NULL);
5759 	if (ret)
5760 		return ret;
5761 
5762 	return seq_open(filp, &tracing_saved_cmdlines_seq_ops);
5763 }
5764 
5765 static const struct file_operations tracing_saved_cmdlines_fops = {
5766 	.open		= tracing_saved_cmdlines_open,
5767 	.read		= seq_read,
5768 	.llseek		= seq_lseek,
5769 	.release	= seq_release,
5770 };
5771 
5772 static ssize_t
5773 tracing_saved_cmdlines_size_read(struct file *filp, char __user *ubuf,
5774 				 size_t cnt, loff_t *ppos)
5775 {
5776 	char buf[64];
5777 	int r;
5778 
5779 	arch_spin_lock(&trace_cmdline_lock);
5780 	r = scnprintf(buf, sizeof(buf), "%u\n", savedcmd->cmdline_num);
5781 	arch_spin_unlock(&trace_cmdline_lock);
5782 
5783 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
5784 }
5785 
5786 static void free_saved_cmdlines_buffer(struct saved_cmdlines_buffer *s)
5787 {
5788 	kfree(s->saved_cmdlines);
5789 	kfree(s->map_cmdline_to_pid);
5790 	kfree(s);
5791 }
5792 
5793 static int tracing_resize_saved_cmdlines(unsigned int val)
5794 {
5795 	struct saved_cmdlines_buffer *s, *savedcmd_temp;
5796 
5797 	s = kmalloc(sizeof(*s), GFP_KERNEL);
5798 	if (!s)
5799 		return -ENOMEM;
5800 
5801 	if (allocate_cmdlines_buffer(val, s) < 0) {
5802 		kfree(s);
5803 		return -ENOMEM;
5804 	}
5805 
5806 	arch_spin_lock(&trace_cmdline_lock);
5807 	savedcmd_temp = savedcmd;
5808 	savedcmd = s;
5809 	arch_spin_unlock(&trace_cmdline_lock);
5810 	free_saved_cmdlines_buffer(savedcmd_temp);
5811 
5812 	return 0;
5813 }
5814 
5815 static ssize_t
5816 tracing_saved_cmdlines_size_write(struct file *filp, const char __user *ubuf,
5817 				  size_t cnt, loff_t *ppos)
5818 {
5819 	unsigned long val;
5820 	int ret;
5821 
5822 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
5823 	if (ret)
5824 		return ret;
5825 
5826 	/* must have at least 1 entry or less than PID_MAX_DEFAULT */
5827 	if (!val || val > PID_MAX_DEFAULT)
5828 		return -EINVAL;
5829 
5830 	ret = tracing_resize_saved_cmdlines((unsigned int)val);
5831 	if (ret < 0)
5832 		return ret;
5833 
5834 	*ppos += cnt;
5835 
5836 	return cnt;
5837 }
5838 
5839 static const struct file_operations tracing_saved_cmdlines_size_fops = {
5840 	.open		= tracing_open_generic,
5841 	.read		= tracing_saved_cmdlines_size_read,
5842 	.write		= tracing_saved_cmdlines_size_write,
5843 };
5844 
5845 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
5846 static union trace_eval_map_item *
5847 update_eval_map(union trace_eval_map_item *ptr)
5848 {
5849 	if (!ptr->map.eval_string) {
5850 		if (ptr->tail.next) {
5851 			ptr = ptr->tail.next;
5852 			/* Set ptr to the next real item (skip head) */
5853 			ptr++;
5854 		} else
5855 			return NULL;
5856 	}
5857 	return ptr;
5858 }
5859 
5860 static void *eval_map_next(struct seq_file *m, void *v, loff_t *pos)
5861 {
5862 	union trace_eval_map_item *ptr = v;
5863 
5864 	/*
5865 	 * Paranoid! If ptr points to end, we don't want to increment past it.
5866 	 * This really should never happen.
5867 	 */
5868 	(*pos)++;
5869 	ptr = update_eval_map(ptr);
5870 	if (WARN_ON_ONCE(!ptr))
5871 		return NULL;
5872 
5873 	ptr++;
5874 	ptr = update_eval_map(ptr);
5875 
5876 	return ptr;
5877 }
5878 
5879 static void *eval_map_start(struct seq_file *m, loff_t *pos)
5880 {
5881 	union trace_eval_map_item *v;
5882 	loff_t l = 0;
5883 
5884 	mutex_lock(&trace_eval_mutex);
5885 
5886 	v = trace_eval_maps;
5887 	if (v)
5888 		v++;
5889 
5890 	while (v && l < *pos) {
5891 		v = eval_map_next(m, v, &l);
5892 	}
5893 
5894 	return v;
5895 }
5896 
5897 static void eval_map_stop(struct seq_file *m, void *v)
5898 {
5899 	mutex_unlock(&trace_eval_mutex);
5900 }
5901 
5902 static int eval_map_show(struct seq_file *m, void *v)
5903 {
5904 	union trace_eval_map_item *ptr = v;
5905 
5906 	seq_printf(m, "%s %ld (%s)\n",
5907 		   ptr->map.eval_string, ptr->map.eval_value,
5908 		   ptr->map.system);
5909 
5910 	return 0;
5911 }
5912 
5913 static const struct seq_operations tracing_eval_map_seq_ops = {
5914 	.start		= eval_map_start,
5915 	.next		= eval_map_next,
5916 	.stop		= eval_map_stop,
5917 	.show		= eval_map_show,
5918 };
5919 
5920 static int tracing_eval_map_open(struct inode *inode, struct file *filp)
5921 {
5922 	int ret;
5923 
5924 	ret = tracing_check_open_get_tr(NULL);
5925 	if (ret)
5926 		return ret;
5927 
5928 	return seq_open(filp, &tracing_eval_map_seq_ops);
5929 }
5930 
5931 static const struct file_operations tracing_eval_map_fops = {
5932 	.open		= tracing_eval_map_open,
5933 	.read		= seq_read,
5934 	.llseek		= seq_lseek,
5935 	.release	= seq_release,
5936 };
5937 
5938 static inline union trace_eval_map_item *
5939 trace_eval_jmp_to_tail(union trace_eval_map_item *ptr)
5940 {
5941 	/* Return tail of array given the head */
5942 	return ptr + ptr->head.length + 1;
5943 }
5944 
5945 static void
5946 trace_insert_eval_map_file(struct module *mod, struct trace_eval_map **start,
5947 			   int len)
5948 {
5949 	struct trace_eval_map **stop;
5950 	struct trace_eval_map **map;
5951 	union trace_eval_map_item *map_array;
5952 	union trace_eval_map_item *ptr;
5953 
5954 	stop = start + len;
5955 
5956 	/*
5957 	 * The trace_eval_maps contains the map plus a head and tail item,
5958 	 * where the head holds the module and length of array, and the
5959 	 * tail holds a pointer to the next list.
5960 	 */
5961 	map_array = kmalloc_array(len + 2, sizeof(*map_array), GFP_KERNEL);
5962 	if (!map_array) {
5963 		pr_warn("Unable to allocate trace eval mapping\n");
5964 		return;
5965 	}
5966 
5967 	mutex_lock(&trace_eval_mutex);
5968 
5969 	if (!trace_eval_maps)
5970 		trace_eval_maps = map_array;
5971 	else {
5972 		ptr = trace_eval_maps;
5973 		for (;;) {
5974 			ptr = trace_eval_jmp_to_tail(ptr);
5975 			if (!ptr->tail.next)
5976 				break;
5977 			ptr = ptr->tail.next;
5978 
5979 		}
5980 		ptr->tail.next = map_array;
5981 	}
5982 	map_array->head.mod = mod;
5983 	map_array->head.length = len;
5984 	map_array++;
5985 
5986 	for (map = start; (unsigned long)map < (unsigned long)stop; map++) {
5987 		map_array->map = **map;
5988 		map_array++;
5989 	}
5990 	memset(map_array, 0, sizeof(*map_array));
5991 
5992 	mutex_unlock(&trace_eval_mutex);
5993 }
5994 
5995 static void trace_create_eval_file(struct dentry *d_tracer)
5996 {
5997 	trace_create_file("eval_map", 0444, d_tracer,
5998 			  NULL, &tracing_eval_map_fops);
5999 }
6000 
6001 #else /* CONFIG_TRACE_EVAL_MAP_FILE */
6002 static inline void trace_create_eval_file(struct dentry *d_tracer) { }
6003 static inline void trace_insert_eval_map_file(struct module *mod,
6004 			      struct trace_eval_map **start, int len) { }
6005 #endif /* !CONFIG_TRACE_EVAL_MAP_FILE */
6006 
6007 static void trace_insert_eval_map(struct module *mod,
6008 				  struct trace_eval_map **start, int len)
6009 {
6010 	struct trace_eval_map **map;
6011 
6012 	if (len <= 0)
6013 		return;
6014 
6015 	map = start;
6016 
6017 	trace_event_eval_update(map, len);
6018 
6019 	trace_insert_eval_map_file(mod, start, len);
6020 }
6021 
6022 static ssize_t
6023 tracing_set_trace_read(struct file *filp, char __user *ubuf,
6024 		       size_t cnt, loff_t *ppos)
6025 {
6026 	struct trace_array *tr = filp->private_data;
6027 	char buf[MAX_TRACER_SIZE+2];
6028 	int r;
6029 
6030 	mutex_lock(&trace_types_lock);
6031 	r = sprintf(buf, "%s\n", tr->current_trace->name);
6032 	mutex_unlock(&trace_types_lock);
6033 
6034 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6035 }
6036 
6037 int tracer_init(struct tracer *t, struct trace_array *tr)
6038 {
6039 	tracing_reset_online_cpus(&tr->array_buffer);
6040 	return t->init(tr);
6041 }
6042 
6043 static void set_buffer_entries(struct array_buffer *buf, unsigned long val)
6044 {
6045 	int cpu;
6046 
6047 	for_each_tracing_cpu(cpu)
6048 		per_cpu_ptr(buf->data, cpu)->entries = val;
6049 }
6050 
6051 #ifdef CONFIG_TRACER_MAX_TRACE
6052 /* resize @tr's buffer to the size of @size_tr's entries */
6053 static int resize_buffer_duplicate_size(struct array_buffer *trace_buf,
6054 					struct array_buffer *size_buf, int cpu_id)
6055 {
6056 	int cpu, ret = 0;
6057 
6058 	if (cpu_id == RING_BUFFER_ALL_CPUS) {
6059 		for_each_tracing_cpu(cpu) {
6060 			ret = ring_buffer_resize(trace_buf->buffer,
6061 				 per_cpu_ptr(size_buf->data, cpu)->entries, cpu);
6062 			if (ret < 0)
6063 				break;
6064 			per_cpu_ptr(trace_buf->data, cpu)->entries =
6065 				per_cpu_ptr(size_buf->data, cpu)->entries;
6066 		}
6067 	} else {
6068 		ret = ring_buffer_resize(trace_buf->buffer,
6069 				 per_cpu_ptr(size_buf->data, cpu_id)->entries, cpu_id);
6070 		if (ret == 0)
6071 			per_cpu_ptr(trace_buf->data, cpu_id)->entries =
6072 				per_cpu_ptr(size_buf->data, cpu_id)->entries;
6073 	}
6074 
6075 	return ret;
6076 }
6077 #endif /* CONFIG_TRACER_MAX_TRACE */
6078 
6079 static int __tracing_resize_ring_buffer(struct trace_array *tr,
6080 					unsigned long size, int cpu)
6081 {
6082 	int ret;
6083 
6084 	/*
6085 	 * If kernel or user changes the size of the ring buffer
6086 	 * we use the size that was given, and we can forget about
6087 	 * expanding it later.
6088 	 */
6089 	ring_buffer_expanded = true;
6090 
6091 	/* May be called before buffers are initialized */
6092 	if (!tr->array_buffer.buffer)
6093 		return 0;
6094 
6095 	ret = ring_buffer_resize(tr->array_buffer.buffer, size, cpu);
6096 	if (ret < 0)
6097 		return ret;
6098 
6099 #ifdef CONFIG_TRACER_MAX_TRACE
6100 	if (!(tr->flags & TRACE_ARRAY_FL_GLOBAL) ||
6101 	    !tr->current_trace->use_max_tr)
6102 		goto out;
6103 
6104 	ret = ring_buffer_resize(tr->max_buffer.buffer, size, cpu);
6105 	if (ret < 0) {
6106 		int r = resize_buffer_duplicate_size(&tr->array_buffer,
6107 						     &tr->array_buffer, cpu);
6108 		if (r < 0) {
6109 			/*
6110 			 * AARGH! We are left with different
6111 			 * size max buffer!!!!
6112 			 * The max buffer is our "snapshot" buffer.
6113 			 * When a tracer needs a snapshot (one of the
6114 			 * latency tracers), it swaps the max buffer
6115 			 * with the saved snap shot. We succeeded to
6116 			 * update the size of the main buffer, but failed to
6117 			 * update the size of the max buffer. But when we tried
6118 			 * to reset the main buffer to the original size, we
6119 			 * failed there too. This is very unlikely to
6120 			 * happen, but if it does, warn and kill all
6121 			 * tracing.
6122 			 */
6123 			WARN_ON(1);
6124 			tracing_disabled = 1;
6125 		}
6126 		return ret;
6127 	}
6128 
6129 	if (cpu == RING_BUFFER_ALL_CPUS)
6130 		set_buffer_entries(&tr->max_buffer, size);
6131 	else
6132 		per_cpu_ptr(tr->max_buffer.data, cpu)->entries = size;
6133 
6134  out:
6135 #endif /* CONFIG_TRACER_MAX_TRACE */
6136 
6137 	if (cpu == RING_BUFFER_ALL_CPUS)
6138 		set_buffer_entries(&tr->array_buffer, size);
6139 	else
6140 		per_cpu_ptr(tr->array_buffer.data, cpu)->entries = size;
6141 
6142 	return ret;
6143 }
6144 
6145 ssize_t tracing_resize_ring_buffer(struct trace_array *tr,
6146 				  unsigned long size, int cpu_id)
6147 {
6148 	int ret = size;
6149 
6150 	mutex_lock(&trace_types_lock);
6151 
6152 	if (cpu_id != RING_BUFFER_ALL_CPUS) {
6153 		/* make sure, this cpu is enabled in the mask */
6154 		if (!cpumask_test_cpu(cpu_id, tracing_buffer_mask)) {
6155 			ret = -EINVAL;
6156 			goto out;
6157 		}
6158 	}
6159 
6160 	ret = __tracing_resize_ring_buffer(tr, size, cpu_id);
6161 	if (ret < 0)
6162 		ret = -ENOMEM;
6163 
6164 out:
6165 	mutex_unlock(&trace_types_lock);
6166 
6167 	return ret;
6168 }
6169 
6170 
6171 /**
6172  * tracing_update_buffers - used by tracing facility to expand ring buffers
6173  *
6174  * To save on memory when the tracing is never used on a system with it
6175  * configured in. The ring buffers are set to a minimum size. But once
6176  * a user starts to use the tracing facility, then they need to grow
6177  * to their default size.
6178  *
6179  * This function is to be called when a tracer is about to be used.
6180  */
6181 int tracing_update_buffers(void)
6182 {
6183 	int ret = 0;
6184 
6185 	mutex_lock(&trace_types_lock);
6186 	if (!ring_buffer_expanded)
6187 		ret = __tracing_resize_ring_buffer(&global_trace, trace_buf_size,
6188 						RING_BUFFER_ALL_CPUS);
6189 	mutex_unlock(&trace_types_lock);
6190 
6191 	return ret;
6192 }
6193 
6194 struct trace_option_dentry;
6195 
6196 static void
6197 create_trace_option_files(struct trace_array *tr, struct tracer *tracer);
6198 
6199 /*
6200  * Used to clear out the tracer before deletion of an instance.
6201  * Must have trace_types_lock held.
6202  */
6203 static void tracing_set_nop(struct trace_array *tr)
6204 {
6205 	if (tr->current_trace == &nop_trace)
6206 		return;
6207 
6208 	tr->current_trace->enabled--;
6209 
6210 	if (tr->current_trace->reset)
6211 		tr->current_trace->reset(tr);
6212 
6213 	tr->current_trace = &nop_trace;
6214 }
6215 
6216 static void add_tracer_options(struct trace_array *tr, struct tracer *t)
6217 {
6218 	/* Only enable if the directory has been created already. */
6219 	if (!tr->dir)
6220 		return;
6221 
6222 	create_trace_option_files(tr, t);
6223 }
6224 
6225 int tracing_set_tracer(struct trace_array *tr, const char *buf)
6226 {
6227 	struct tracer *t;
6228 #ifdef CONFIG_TRACER_MAX_TRACE
6229 	bool had_max_tr;
6230 #endif
6231 	int ret = 0;
6232 
6233 	mutex_lock(&trace_types_lock);
6234 
6235 	if (!ring_buffer_expanded) {
6236 		ret = __tracing_resize_ring_buffer(tr, trace_buf_size,
6237 						RING_BUFFER_ALL_CPUS);
6238 		if (ret < 0)
6239 			goto out;
6240 		ret = 0;
6241 	}
6242 
6243 	for (t = trace_types; t; t = t->next) {
6244 		if (strcmp(t->name, buf) == 0)
6245 			break;
6246 	}
6247 	if (!t) {
6248 		ret = -EINVAL;
6249 		goto out;
6250 	}
6251 	if (t == tr->current_trace)
6252 		goto out;
6253 
6254 #ifdef CONFIG_TRACER_SNAPSHOT
6255 	if (t->use_max_tr) {
6256 		arch_spin_lock(&tr->max_lock);
6257 		if (tr->cond_snapshot)
6258 			ret = -EBUSY;
6259 		arch_spin_unlock(&tr->max_lock);
6260 		if (ret)
6261 			goto out;
6262 	}
6263 #endif
6264 	/* Some tracers won't work on kernel command line */
6265 	if (system_state < SYSTEM_RUNNING && t->noboot) {
6266 		pr_warn("Tracer '%s' is not allowed on command line, ignored\n",
6267 			t->name);
6268 		goto out;
6269 	}
6270 
6271 	/* Some tracers are only allowed for the top level buffer */
6272 	if (!trace_ok_for_array(t, tr)) {
6273 		ret = -EINVAL;
6274 		goto out;
6275 	}
6276 
6277 	/* If trace pipe files are being read, we can't change the tracer */
6278 	if (tr->trace_ref) {
6279 		ret = -EBUSY;
6280 		goto out;
6281 	}
6282 
6283 	trace_branch_disable();
6284 
6285 	tr->current_trace->enabled--;
6286 
6287 	if (tr->current_trace->reset)
6288 		tr->current_trace->reset(tr);
6289 
6290 	/* Current trace needs to be nop_trace before synchronize_rcu */
6291 	tr->current_trace = &nop_trace;
6292 
6293 #ifdef CONFIG_TRACER_MAX_TRACE
6294 	had_max_tr = tr->allocated_snapshot;
6295 
6296 	if (had_max_tr && !t->use_max_tr) {
6297 		/*
6298 		 * We need to make sure that the update_max_tr sees that
6299 		 * current_trace changed to nop_trace to keep it from
6300 		 * swapping the buffers after we resize it.
6301 		 * The update_max_tr is called from interrupts disabled
6302 		 * so a synchronized_sched() is sufficient.
6303 		 */
6304 		synchronize_rcu();
6305 		free_snapshot(tr);
6306 	}
6307 #endif
6308 
6309 #ifdef CONFIG_TRACER_MAX_TRACE
6310 	if (t->use_max_tr && !had_max_tr) {
6311 		ret = tracing_alloc_snapshot_instance(tr);
6312 		if (ret < 0)
6313 			goto out;
6314 	}
6315 #endif
6316 
6317 	if (t->init) {
6318 		ret = tracer_init(t, tr);
6319 		if (ret)
6320 			goto out;
6321 	}
6322 
6323 	tr->current_trace = t;
6324 	tr->current_trace->enabled++;
6325 	trace_branch_enable(tr);
6326  out:
6327 	mutex_unlock(&trace_types_lock);
6328 
6329 	return ret;
6330 }
6331 
6332 static ssize_t
6333 tracing_set_trace_write(struct file *filp, const char __user *ubuf,
6334 			size_t cnt, loff_t *ppos)
6335 {
6336 	struct trace_array *tr = filp->private_data;
6337 	char buf[MAX_TRACER_SIZE+1];
6338 	int i;
6339 	size_t ret;
6340 	int err;
6341 
6342 	ret = cnt;
6343 
6344 	if (cnt > MAX_TRACER_SIZE)
6345 		cnt = MAX_TRACER_SIZE;
6346 
6347 	if (copy_from_user(buf, ubuf, cnt))
6348 		return -EFAULT;
6349 
6350 	buf[cnt] = 0;
6351 
6352 	/* strip ending whitespace. */
6353 	for (i = cnt - 1; i > 0 && isspace(buf[i]); i--)
6354 		buf[i] = 0;
6355 
6356 	err = tracing_set_tracer(tr, buf);
6357 	if (err)
6358 		return err;
6359 
6360 	*ppos += ret;
6361 
6362 	return ret;
6363 }
6364 
6365 static ssize_t
6366 tracing_nsecs_read(unsigned long *ptr, char __user *ubuf,
6367 		   size_t cnt, loff_t *ppos)
6368 {
6369 	char buf[64];
6370 	int r;
6371 
6372 	r = snprintf(buf, sizeof(buf), "%ld\n",
6373 		     *ptr == (unsigned long)-1 ? -1 : nsecs_to_usecs(*ptr));
6374 	if (r > sizeof(buf))
6375 		r = sizeof(buf);
6376 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6377 }
6378 
6379 static ssize_t
6380 tracing_nsecs_write(unsigned long *ptr, const char __user *ubuf,
6381 		    size_t cnt, loff_t *ppos)
6382 {
6383 	unsigned long val;
6384 	int ret;
6385 
6386 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
6387 	if (ret)
6388 		return ret;
6389 
6390 	*ptr = val * 1000;
6391 
6392 	return cnt;
6393 }
6394 
6395 static ssize_t
6396 tracing_thresh_read(struct file *filp, char __user *ubuf,
6397 		    size_t cnt, loff_t *ppos)
6398 {
6399 	return tracing_nsecs_read(&tracing_thresh, ubuf, cnt, ppos);
6400 }
6401 
6402 static ssize_t
6403 tracing_thresh_write(struct file *filp, const char __user *ubuf,
6404 		     size_t cnt, loff_t *ppos)
6405 {
6406 	struct trace_array *tr = filp->private_data;
6407 	int ret;
6408 
6409 	mutex_lock(&trace_types_lock);
6410 	ret = tracing_nsecs_write(&tracing_thresh, ubuf, cnt, ppos);
6411 	if (ret < 0)
6412 		goto out;
6413 
6414 	if (tr->current_trace->update_thresh) {
6415 		ret = tr->current_trace->update_thresh(tr);
6416 		if (ret < 0)
6417 			goto out;
6418 	}
6419 
6420 	ret = cnt;
6421 out:
6422 	mutex_unlock(&trace_types_lock);
6423 
6424 	return ret;
6425 }
6426 
6427 #if defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)
6428 
6429 static ssize_t
6430 tracing_max_lat_read(struct file *filp, char __user *ubuf,
6431 		     size_t cnt, loff_t *ppos)
6432 {
6433 	return tracing_nsecs_read(filp->private_data, ubuf, cnt, ppos);
6434 }
6435 
6436 static ssize_t
6437 tracing_max_lat_write(struct file *filp, const char __user *ubuf,
6438 		      size_t cnt, loff_t *ppos)
6439 {
6440 	return tracing_nsecs_write(filp->private_data, ubuf, cnt, ppos);
6441 }
6442 
6443 #endif
6444 
6445 static int tracing_open_pipe(struct inode *inode, struct file *filp)
6446 {
6447 	struct trace_array *tr = inode->i_private;
6448 	struct trace_iterator *iter;
6449 	int ret;
6450 
6451 	ret = tracing_check_open_get_tr(tr);
6452 	if (ret)
6453 		return ret;
6454 
6455 	mutex_lock(&trace_types_lock);
6456 
6457 	/* create a buffer to store the information to pass to userspace */
6458 	iter = kzalloc(sizeof(*iter), GFP_KERNEL);
6459 	if (!iter) {
6460 		ret = -ENOMEM;
6461 		__trace_array_put(tr);
6462 		goto out;
6463 	}
6464 
6465 	trace_seq_init(&iter->seq);
6466 	iter->trace = tr->current_trace;
6467 
6468 	if (!alloc_cpumask_var(&iter->started, GFP_KERNEL)) {
6469 		ret = -ENOMEM;
6470 		goto fail;
6471 	}
6472 
6473 	/* trace pipe does not show start of buffer */
6474 	cpumask_setall(iter->started);
6475 
6476 	if (tr->trace_flags & TRACE_ITER_LATENCY_FMT)
6477 		iter->iter_flags |= TRACE_FILE_LAT_FMT;
6478 
6479 	/* Output in nanoseconds only if we are using a clock in nanoseconds. */
6480 	if (trace_clocks[tr->clock_id].in_ns)
6481 		iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
6482 
6483 	iter->tr = tr;
6484 	iter->array_buffer = &tr->array_buffer;
6485 	iter->cpu_file = tracing_get_cpu(inode);
6486 	mutex_init(&iter->mutex);
6487 	filp->private_data = iter;
6488 
6489 	if (iter->trace->pipe_open)
6490 		iter->trace->pipe_open(iter);
6491 
6492 	nonseekable_open(inode, filp);
6493 
6494 	tr->trace_ref++;
6495 out:
6496 	mutex_unlock(&trace_types_lock);
6497 	return ret;
6498 
6499 fail:
6500 	kfree(iter);
6501 	__trace_array_put(tr);
6502 	mutex_unlock(&trace_types_lock);
6503 	return ret;
6504 }
6505 
6506 static int tracing_release_pipe(struct inode *inode, struct file *file)
6507 {
6508 	struct trace_iterator *iter = file->private_data;
6509 	struct trace_array *tr = inode->i_private;
6510 
6511 	mutex_lock(&trace_types_lock);
6512 
6513 	tr->trace_ref--;
6514 
6515 	if (iter->trace->pipe_close)
6516 		iter->trace->pipe_close(iter);
6517 
6518 	mutex_unlock(&trace_types_lock);
6519 
6520 	free_cpumask_var(iter->started);
6521 	mutex_destroy(&iter->mutex);
6522 	kfree(iter);
6523 
6524 	trace_array_put(tr);
6525 
6526 	return 0;
6527 }
6528 
6529 static __poll_t
6530 trace_poll(struct trace_iterator *iter, struct file *filp, poll_table *poll_table)
6531 {
6532 	struct trace_array *tr = iter->tr;
6533 
6534 	/* Iterators are static, they should be filled or empty */
6535 	if (trace_buffer_iter(iter, iter->cpu_file))
6536 		return EPOLLIN | EPOLLRDNORM;
6537 
6538 	if (tr->trace_flags & TRACE_ITER_BLOCK)
6539 		/*
6540 		 * Always select as readable when in blocking mode
6541 		 */
6542 		return EPOLLIN | EPOLLRDNORM;
6543 	else
6544 		return ring_buffer_poll_wait(iter->array_buffer->buffer, iter->cpu_file,
6545 					     filp, poll_table);
6546 }
6547 
6548 static __poll_t
6549 tracing_poll_pipe(struct file *filp, poll_table *poll_table)
6550 {
6551 	struct trace_iterator *iter = filp->private_data;
6552 
6553 	return trace_poll(iter, filp, poll_table);
6554 }
6555 
6556 /* Must be called with iter->mutex held. */
6557 static int tracing_wait_pipe(struct file *filp)
6558 {
6559 	struct trace_iterator *iter = filp->private_data;
6560 	int ret;
6561 
6562 	while (trace_empty(iter)) {
6563 
6564 		if ((filp->f_flags & O_NONBLOCK)) {
6565 			return -EAGAIN;
6566 		}
6567 
6568 		/*
6569 		 * We block until we read something and tracing is disabled.
6570 		 * We still block if tracing is disabled, but we have never
6571 		 * read anything. This allows a user to cat this file, and
6572 		 * then enable tracing. But after we have read something,
6573 		 * we give an EOF when tracing is again disabled.
6574 		 *
6575 		 * iter->pos will be 0 if we haven't read anything.
6576 		 */
6577 		if (!tracer_tracing_is_on(iter->tr) && iter->pos)
6578 			break;
6579 
6580 		mutex_unlock(&iter->mutex);
6581 
6582 		ret = wait_on_pipe(iter, 0);
6583 
6584 		mutex_lock(&iter->mutex);
6585 
6586 		if (ret)
6587 			return ret;
6588 	}
6589 
6590 	return 1;
6591 }
6592 
6593 /*
6594  * Consumer reader.
6595  */
6596 static ssize_t
6597 tracing_read_pipe(struct file *filp, char __user *ubuf,
6598 		  size_t cnt, loff_t *ppos)
6599 {
6600 	struct trace_iterator *iter = filp->private_data;
6601 	ssize_t sret;
6602 
6603 	/*
6604 	 * Avoid more than one consumer on a single file descriptor
6605 	 * This is just a matter of traces coherency, the ring buffer itself
6606 	 * is protected.
6607 	 */
6608 	mutex_lock(&iter->mutex);
6609 
6610 	/* return any leftover data */
6611 	sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
6612 	if (sret != -EBUSY)
6613 		goto out;
6614 
6615 	trace_seq_init(&iter->seq);
6616 
6617 	if (iter->trace->read) {
6618 		sret = iter->trace->read(iter, filp, ubuf, cnt, ppos);
6619 		if (sret)
6620 			goto out;
6621 	}
6622 
6623 waitagain:
6624 	sret = tracing_wait_pipe(filp);
6625 	if (sret <= 0)
6626 		goto out;
6627 
6628 	/* stop when tracing is finished */
6629 	if (trace_empty(iter)) {
6630 		sret = 0;
6631 		goto out;
6632 	}
6633 
6634 	if (cnt >= PAGE_SIZE)
6635 		cnt = PAGE_SIZE - 1;
6636 
6637 	/* reset all but tr, trace, and overruns */
6638 	memset(&iter->seq, 0,
6639 	       sizeof(struct trace_iterator) -
6640 	       offsetof(struct trace_iterator, seq));
6641 	cpumask_clear(iter->started);
6642 	trace_seq_init(&iter->seq);
6643 	iter->pos = -1;
6644 
6645 	trace_event_read_lock();
6646 	trace_access_lock(iter->cpu_file);
6647 	while (trace_find_next_entry_inc(iter) != NULL) {
6648 		enum print_line_t ret;
6649 		int save_len = iter->seq.seq.len;
6650 
6651 		ret = print_trace_line(iter);
6652 		if (ret == TRACE_TYPE_PARTIAL_LINE) {
6653 			/* don't print partial lines */
6654 			iter->seq.seq.len = save_len;
6655 			break;
6656 		}
6657 		if (ret != TRACE_TYPE_NO_CONSUME)
6658 			trace_consume(iter);
6659 
6660 		if (trace_seq_used(&iter->seq) >= cnt)
6661 			break;
6662 
6663 		/*
6664 		 * Setting the full flag means we reached the trace_seq buffer
6665 		 * size and we should leave by partial output condition above.
6666 		 * One of the trace_seq_* functions is not used properly.
6667 		 */
6668 		WARN_ONCE(iter->seq.full, "full flag set for trace type %d",
6669 			  iter->ent->type);
6670 	}
6671 	trace_access_unlock(iter->cpu_file);
6672 	trace_event_read_unlock();
6673 
6674 	/* Now copy what we have to the user */
6675 	sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
6676 	if (iter->seq.seq.readpos >= trace_seq_used(&iter->seq))
6677 		trace_seq_init(&iter->seq);
6678 
6679 	/*
6680 	 * If there was nothing to send to user, in spite of consuming trace
6681 	 * entries, go back to wait for more entries.
6682 	 */
6683 	if (sret == -EBUSY)
6684 		goto waitagain;
6685 
6686 out:
6687 	mutex_unlock(&iter->mutex);
6688 
6689 	return sret;
6690 }
6691 
6692 static void tracing_spd_release_pipe(struct splice_pipe_desc *spd,
6693 				     unsigned int idx)
6694 {
6695 	__free_page(spd->pages[idx]);
6696 }
6697 
6698 static size_t
6699 tracing_fill_pipe_page(size_t rem, struct trace_iterator *iter)
6700 {
6701 	size_t count;
6702 	int save_len;
6703 	int ret;
6704 
6705 	/* Seq buffer is page-sized, exactly what we need. */
6706 	for (;;) {
6707 		save_len = iter->seq.seq.len;
6708 		ret = print_trace_line(iter);
6709 
6710 		if (trace_seq_has_overflowed(&iter->seq)) {
6711 			iter->seq.seq.len = save_len;
6712 			break;
6713 		}
6714 
6715 		/*
6716 		 * This should not be hit, because it should only
6717 		 * be set if the iter->seq overflowed. But check it
6718 		 * anyway to be safe.
6719 		 */
6720 		if (ret == TRACE_TYPE_PARTIAL_LINE) {
6721 			iter->seq.seq.len = save_len;
6722 			break;
6723 		}
6724 
6725 		count = trace_seq_used(&iter->seq) - save_len;
6726 		if (rem < count) {
6727 			rem = 0;
6728 			iter->seq.seq.len = save_len;
6729 			break;
6730 		}
6731 
6732 		if (ret != TRACE_TYPE_NO_CONSUME)
6733 			trace_consume(iter);
6734 		rem -= count;
6735 		if (!trace_find_next_entry_inc(iter))	{
6736 			rem = 0;
6737 			iter->ent = NULL;
6738 			break;
6739 		}
6740 	}
6741 
6742 	return rem;
6743 }
6744 
6745 static ssize_t tracing_splice_read_pipe(struct file *filp,
6746 					loff_t *ppos,
6747 					struct pipe_inode_info *pipe,
6748 					size_t len,
6749 					unsigned int flags)
6750 {
6751 	struct page *pages_def[PIPE_DEF_BUFFERS];
6752 	struct partial_page partial_def[PIPE_DEF_BUFFERS];
6753 	struct trace_iterator *iter = filp->private_data;
6754 	struct splice_pipe_desc spd = {
6755 		.pages		= pages_def,
6756 		.partial	= partial_def,
6757 		.nr_pages	= 0, /* This gets updated below. */
6758 		.nr_pages_max	= PIPE_DEF_BUFFERS,
6759 		.ops		= &default_pipe_buf_ops,
6760 		.spd_release	= tracing_spd_release_pipe,
6761 	};
6762 	ssize_t ret;
6763 	size_t rem;
6764 	unsigned int i;
6765 
6766 	if (splice_grow_spd(pipe, &spd))
6767 		return -ENOMEM;
6768 
6769 	mutex_lock(&iter->mutex);
6770 
6771 	if (iter->trace->splice_read) {
6772 		ret = iter->trace->splice_read(iter, filp,
6773 					       ppos, pipe, len, flags);
6774 		if (ret)
6775 			goto out_err;
6776 	}
6777 
6778 	ret = tracing_wait_pipe(filp);
6779 	if (ret <= 0)
6780 		goto out_err;
6781 
6782 	if (!iter->ent && !trace_find_next_entry_inc(iter)) {
6783 		ret = -EFAULT;
6784 		goto out_err;
6785 	}
6786 
6787 	trace_event_read_lock();
6788 	trace_access_lock(iter->cpu_file);
6789 
6790 	/* Fill as many pages as possible. */
6791 	for (i = 0, rem = len; i < spd.nr_pages_max && rem; i++) {
6792 		spd.pages[i] = alloc_page(GFP_KERNEL);
6793 		if (!spd.pages[i])
6794 			break;
6795 
6796 		rem = tracing_fill_pipe_page(rem, iter);
6797 
6798 		/* Copy the data into the page, so we can start over. */
6799 		ret = trace_seq_to_buffer(&iter->seq,
6800 					  page_address(spd.pages[i]),
6801 					  trace_seq_used(&iter->seq));
6802 		if (ret < 0) {
6803 			__free_page(spd.pages[i]);
6804 			break;
6805 		}
6806 		spd.partial[i].offset = 0;
6807 		spd.partial[i].len = trace_seq_used(&iter->seq);
6808 
6809 		trace_seq_init(&iter->seq);
6810 	}
6811 
6812 	trace_access_unlock(iter->cpu_file);
6813 	trace_event_read_unlock();
6814 	mutex_unlock(&iter->mutex);
6815 
6816 	spd.nr_pages = i;
6817 
6818 	if (i)
6819 		ret = splice_to_pipe(pipe, &spd);
6820 	else
6821 		ret = 0;
6822 out:
6823 	splice_shrink_spd(&spd);
6824 	return ret;
6825 
6826 out_err:
6827 	mutex_unlock(&iter->mutex);
6828 	goto out;
6829 }
6830 
6831 static ssize_t
6832 tracing_entries_read(struct file *filp, char __user *ubuf,
6833 		     size_t cnt, loff_t *ppos)
6834 {
6835 	struct inode *inode = file_inode(filp);
6836 	struct trace_array *tr = inode->i_private;
6837 	int cpu = tracing_get_cpu(inode);
6838 	char buf[64];
6839 	int r = 0;
6840 	ssize_t ret;
6841 
6842 	mutex_lock(&trace_types_lock);
6843 
6844 	if (cpu == RING_BUFFER_ALL_CPUS) {
6845 		int cpu, buf_size_same;
6846 		unsigned long size;
6847 
6848 		size = 0;
6849 		buf_size_same = 1;
6850 		/* check if all cpu sizes are same */
6851 		for_each_tracing_cpu(cpu) {
6852 			/* fill in the size from first enabled cpu */
6853 			if (size == 0)
6854 				size = per_cpu_ptr(tr->array_buffer.data, cpu)->entries;
6855 			if (size != per_cpu_ptr(tr->array_buffer.data, cpu)->entries) {
6856 				buf_size_same = 0;
6857 				break;
6858 			}
6859 		}
6860 
6861 		if (buf_size_same) {
6862 			if (!ring_buffer_expanded)
6863 				r = sprintf(buf, "%lu (expanded: %lu)\n",
6864 					    size >> 10,
6865 					    trace_buf_size >> 10);
6866 			else
6867 				r = sprintf(buf, "%lu\n", size >> 10);
6868 		} else
6869 			r = sprintf(buf, "X\n");
6870 	} else
6871 		r = sprintf(buf, "%lu\n", per_cpu_ptr(tr->array_buffer.data, cpu)->entries >> 10);
6872 
6873 	mutex_unlock(&trace_types_lock);
6874 
6875 	ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6876 	return ret;
6877 }
6878 
6879 static ssize_t
6880 tracing_entries_write(struct file *filp, const char __user *ubuf,
6881 		      size_t cnt, loff_t *ppos)
6882 {
6883 	struct inode *inode = file_inode(filp);
6884 	struct trace_array *tr = inode->i_private;
6885 	unsigned long val;
6886 	int ret;
6887 
6888 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
6889 	if (ret)
6890 		return ret;
6891 
6892 	/* must have at least 1 entry */
6893 	if (!val)
6894 		return -EINVAL;
6895 
6896 	/* value is in KB */
6897 	val <<= 10;
6898 	ret = tracing_resize_ring_buffer(tr, val, tracing_get_cpu(inode));
6899 	if (ret < 0)
6900 		return ret;
6901 
6902 	*ppos += cnt;
6903 
6904 	return cnt;
6905 }
6906 
6907 static ssize_t
6908 tracing_total_entries_read(struct file *filp, char __user *ubuf,
6909 				size_t cnt, loff_t *ppos)
6910 {
6911 	struct trace_array *tr = filp->private_data;
6912 	char buf[64];
6913 	int r, cpu;
6914 	unsigned long size = 0, expanded_size = 0;
6915 
6916 	mutex_lock(&trace_types_lock);
6917 	for_each_tracing_cpu(cpu) {
6918 		size += per_cpu_ptr(tr->array_buffer.data, cpu)->entries >> 10;
6919 		if (!ring_buffer_expanded)
6920 			expanded_size += trace_buf_size >> 10;
6921 	}
6922 	if (ring_buffer_expanded)
6923 		r = sprintf(buf, "%lu\n", size);
6924 	else
6925 		r = sprintf(buf, "%lu (expanded: %lu)\n", size, expanded_size);
6926 	mutex_unlock(&trace_types_lock);
6927 
6928 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6929 }
6930 
6931 static ssize_t
6932 tracing_free_buffer_write(struct file *filp, const char __user *ubuf,
6933 			  size_t cnt, loff_t *ppos)
6934 {
6935 	/*
6936 	 * There is no need to read what the user has written, this function
6937 	 * is just to make sure that there is no error when "echo" is used
6938 	 */
6939 
6940 	*ppos += cnt;
6941 
6942 	return cnt;
6943 }
6944 
6945 static int
6946 tracing_free_buffer_release(struct inode *inode, struct file *filp)
6947 {
6948 	struct trace_array *tr = inode->i_private;
6949 
6950 	/* disable tracing ? */
6951 	if (tr->trace_flags & TRACE_ITER_STOP_ON_FREE)
6952 		tracer_tracing_off(tr);
6953 	/* resize the ring buffer to 0 */
6954 	tracing_resize_ring_buffer(tr, 0, RING_BUFFER_ALL_CPUS);
6955 
6956 	trace_array_put(tr);
6957 
6958 	return 0;
6959 }
6960 
6961 static ssize_t
6962 tracing_mark_write(struct file *filp, const char __user *ubuf,
6963 					size_t cnt, loff_t *fpos)
6964 {
6965 	struct trace_array *tr = filp->private_data;
6966 	struct ring_buffer_event *event;
6967 	enum event_trigger_type tt = ETT_NONE;
6968 	struct trace_buffer *buffer;
6969 	struct print_entry *entry;
6970 	ssize_t written;
6971 	int size;
6972 	int len;
6973 
6974 /* Used in tracing_mark_raw_write() as well */
6975 #define FAULTED_STR "<faulted>"
6976 #define FAULTED_SIZE (sizeof(FAULTED_STR) - 1) /* '\0' is already accounted for */
6977 
6978 	if (tracing_disabled)
6979 		return -EINVAL;
6980 
6981 	if (!(tr->trace_flags & TRACE_ITER_MARKERS))
6982 		return -EINVAL;
6983 
6984 	if (cnt > TRACE_BUF_SIZE)
6985 		cnt = TRACE_BUF_SIZE;
6986 
6987 	BUILD_BUG_ON(TRACE_BUF_SIZE >= PAGE_SIZE);
6988 
6989 	size = sizeof(*entry) + cnt + 2; /* add '\0' and possible '\n' */
6990 
6991 	/* If less than "<faulted>", then make sure we can still add that */
6992 	if (cnt < FAULTED_SIZE)
6993 		size += FAULTED_SIZE - cnt;
6994 
6995 	buffer = tr->array_buffer.buffer;
6996 	event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
6997 					    tracing_gen_ctx());
6998 	if (unlikely(!event))
6999 		/* Ring buffer disabled, return as if not open for write */
7000 		return -EBADF;
7001 
7002 	entry = ring_buffer_event_data(event);
7003 	entry->ip = _THIS_IP_;
7004 
7005 	len = __copy_from_user_inatomic(&entry->buf, ubuf, cnt);
7006 	if (len) {
7007 		memcpy(&entry->buf, FAULTED_STR, FAULTED_SIZE);
7008 		cnt = FAULTED_SIZE;
7009 		written = -EFAULT;
7010 	} else
7011 		written = cnt;
7012 
7013 	if (tr->trace_marker_file && !list_empty(&tr->trace_marker_file->triggers)) {
7014 		/* do not add \n before testing triggers, but add \0 */
7015 		entry->buf[cnt] = '\0';
7016 		tt = event_triggers_call(tr->trace_marker_file, buffer, entry, event);
7017 	}
7018 
7019 	if (entry->buf[cnt - 1] != '\n') {
7020 		entry->buf[cnt] = '\n';
7021 		entry->buf[cnt + 1] = '\0';
7022 	} else
7023 		entry->buf[cnt] = '\0';
7024 
7025 	if (static_branch_unlikely(&trace_marker_exports_enabled))
7026 		ftrace_exports(event, TRACE_EXPORT_MARKER);
7027 	__buffer_unlock_commit(buffer, event);
7028 
7029 	if (tt)
7030 		event_triggers_post_call(tr->trace_marker_file, tt);
7031 
7032 	if (written > 0)
7033 		*fpos += written;
7034 
7035 	return written;
7036 }
7037 
7038 /* Limit it for now to 3K (including tag) */
7039 #define RAW_DATA_MAX_SIZE (1024*3)
7040 
7041 static ssize_t
7042 tracing_mark_raw_write(struct file *filp, const char __user *ubuf,
7043 					size_t cnt, loff_t *fpos)
7044 {
7045 	struct trace_array *tr = filp->private_data;
7046 	struct ring_buffer_event *event;
7047 	struct trace_buffer *buffer;
7048 	struct raw_data_entry *entry;
7049 	ssize_t written;
7050 	int size;
7051 	int len;
7052 
7053 #define FAULT_SIZE_ID (FAULTED_SIZE + sizeof(int))
7054 
7055 	if (tracing_disabled)
7056 		return -EINVAL;
7057 
7058 	if (!(tr->trace_flags & TRACE_ITER_MARKERS))
7059 		return -EINVAL;
7060 
7061 	/* The marker must at least have a tag id */
7062 	if (cnt < sizeof(unsigned int) || cnt > RAW_DATA_MAX_SIZE)
7063 		return -EINVAL;
7064 
7065 	if (cnt > TRACE_BUF_SIZE)
7066 		cnt = TRACE_BUF_SIZE;
7067 
7068 	BUILD_BUG_ON(TRACE_BUF_SIZE >= PAGE_SIZE);
7069 
7070 	size = sizeof(*entry) + cnt;
7071 	if (cnt < FAULT_SIZE_ID)
7072 		size += FAULT_SIZE_ID - cnt;
7073 
7074 	buffer = tr->array_buffer.buffer;
7075 	event = __trace_buffer_lock_reserve(buffer, TRACE_RAW_DATA, size,
7076 					    tracing_gen_ctx());
7077 	if (!event)
7078 		/* Ring buffer disabled, return as if not open for write */
7079 		return -EBADF;
7080 
7081 	entry = ring_buffer_event_data(event);
7082 
7083 	len = __copy_from_user_inatomic(&entry->id, ubuf, cnt);
7084 	if (len) {
7085 		entry->id = -1;
7086 		memcpy(&entry->buf, FAULTED_STR, FAULTED_SIZE);
7087 		written = -EFAULT;
7088 	} else
7089 		written = cnt;
7090 
7091 	__buffer_unlock_commit(buffer, event);
7092 
7093 	if (written > 0)
7094 		*fpos += written;
7095 
7096 	return written;
7097 }
7098 
7099 static int tracing_clock_show(struct seq_file *m, void *v)
7100 {
7101 	struct trace_array *tr = m->private;
7102 	int i;
7103 
7104 	for (i = 0; i < ARRAY_SIZE(trace_clocks); i++)
7105 		seq_printf(m,
7106 			"%s%s%s%s", i ? " " : "",
7107 			i == tr->clock_id ? "[" : "", trace_clocks[i].name,
7108 			i == tr->clock_id ? "]" : "");
7109 	seq_putc(m, '\n');
7110 
7111 	return 0;
7112 }
7113 
7114 int tracing_set_clock(struct trace_array *tr, const char *clockstr)
7115 {
7116 	int i;
7117 
7118 	for (i = 0; i < ARRAY_SIZE(trace_clocks); i++) {
7119 		if (strcmp(trace_clocks[i].name, clockstr) == 0)
7120 			break;
7121 	}
7122 	if (i == ARRAY_SIZE(trace_clocks))
7123 		return -EINVAL;
7124 
7125 	mutex_lock(&trace_types_lock);
7126 
7127 	tr->clock_id = i;
7128 
7129 	ring_buffer_set_clock(tr->array_buffer.buffer, trace_clocks[i].func);
7130 
7131 	/*
7132 	 * New clock may not be consistent with the previous clock.
7133 	 * Reset the buffer so that it doesn't have incomparable timestamps.
7134 	 */
7135 	tracing_reset_online_cpus(&tr->array_buffer);
7136 
7137 #ifdef CONFIG_TRACER_MAX_TRACE
7138 	if (tr->max_buffer.buffer)
7139 		ring_buffer_set_clock(tr->max_buffer.buffer, trace_clocks[i].func);
7140 	tracing_reset_online_cpus(&tr->max_buffer);
7141 #endif
7142 
7143 	mutex_unlock(&trace_types_lock);
7144 
7145 	return 0;
7146 }
7147 
7148 static ssize_t tracing_clock_write(struct file *filp, const char __user *ubuf,
7149 				   size_t cnt, loff_t *fpos)
7150 {
7151 	struct seq_file *m = filp->private_data;
7152 	struct trace_array *tr = m->private;
7153 	char buf[64];
7154 	const char *clockstr;
7155 	int ret;
7156 
7157 	if (cnt >= sizeof(buf))
7158 		return -EINVAL;
7159 
7160 	if (copy_from_user(buf, ubuf, cnt))
7161 		return -EFAULT;
7162 
7163 	buf[cnt] = 0;
7164 
7165 	clockstr = strstrip(buf);
7166 
7167 	ret = tracing_set_clock(tr, clockstr);
7168 	if (ret)
7169 		return ret;
7170 
7171 	*fpos += cnt;
7172 
7173 	return cnt;
7174 }
7175 
7176 static int tracing_clock_open(struct inode *inode, struct file *file)
7177 {
7178 	struct trace_array *tr = inode->i_private;
7179 	int ret;
7180 
7181 	ret = tracing_check_open_get_tr(tr);
7182 	if (ret)
7183 		return ret;
7184 
7185 	ret = single_open(file, tracing_clock_show, inode->i_private);
7186 	if (ret < 0)
7187 		trace_array_put(tr);
7188 
7189 	return ret;
7190 }
7191 
7192 static int tracing_time_stamp_mode_show(struct seq_file *m, void *v)
7193 {
7194 	struct trace_array *tr = m->private;
7195 
7196 	mutex_lock(&trace_types_lock);
7197 
7198 	if (ring_buffer_time_stamp_abs(tr->array_buffer.buffer))
7199 		seq_puts(m, "delta [absolute]\n");
7200 	else
7201 		seq_puts(m, "[delta] absolute\n");
7202 
7203 	mutex_unlock(&trace_types_lock);
7204 
7205 	return 0;
7206 }
7207 
7208 static int tracing_time_stamp_mode_open(struct inode *inode, struct file *file)
7209 {
7210 	struct trace_array *tr = inode->i_private;
7211 	int ret;
7212 
7213 	ret = tracing_check_open_get_tr(tr);
7214 	if (ret)
7215 		return ret;
7216 
7217 	ret = single_open(file, tracing_time_stamp_mode_show, inode->i_private);
7218 	if (ret < 0)
7219 		trace_array_put(tr);
7220 
7221 	return ret;
7222 }
7223 
7224 u64 tracing_event_time_stamp(struct trace_buffer *buffer, struct ring_buffer_event *rbe)
7225 {
7226 	if (rbe == this_cpu_read(trace_buffered_event))
7227 		return ring_buffer_time_stamp(buffer);
7228 
7229 	return ring_buffer_event_time_stamp(buffer, rbe);
7230 }
7231 
7232 /*
7233  * Set or disable using the per CPU trace_buffer_event when possible.
7234  */
7235 int tracing_set_filter_buffering(struct trace_array *tr, bool set)
7236 {
7237 	int ret = 0;
7238 
7239 	mutex_lock(&trace_types_lock);
7240 
7241 	if (set && tr->no_filter_buffering_ref++)
7242 		goto out;
7243 
7244 	if (!set) {
7245 		if (WARN_ON_ONCE(!tr->no_filter_buffering_ref)) {
7246 			ret = -EINVAL;
7247 			goto out;
7248 		}
7249 
7250 		--tr->no_filter_buffering_ref;
7251 	}
7252  out:
7253 	mutex_unlock(&trace_types_lock);
7254 
7255 	return ret;
7256 }
7257 
7258 struct ftrace_buffer_info {
7259 	struct trace_iterator	iter;
7260 	void			*spare;
7261 	unsigned int		spare_cpu;
7262 	unsigned int		read;
7263 };
7264 
7265 #ifdef CONFIG_TRACER_SNAPSHOT
7266 static int tracing_snapshot_open(struct inode *inode, struct file *file)
7267 {
7268 	struct trace_array *tr = inode->i_private;
7269 	struct trace_iterator *iter;
7270 	struct seq_file *m;
7271 	int ret;
7272 
7273 	ret = tracing_check_open_get_tr(tr);
7274 	if (ret)
7275 		return ret;
7276 
7277 	if (file->f_mode & FMODE_READ) {
7278 		iter = __tracing_open(inode, file, true);
7279 		if (IS_ERR(iter))
7280 			ret = PTR_ERR(iter);
7281 	} else {
7282 		/* Writes still need the seq_file to hold the private data */
7283 		ret = -ENOMEM;
7284 		m = kzalloc(sizeof(*m), GFP_KERNEL);
7285 		if (!m)
7286 			goto out;
7287 		iter = kzalloc(sizeof(*iter), GFP_KERNEL);
7288 		if (!iter) {
7289 			kfree(m);
7290 			goto out;
7291 		}
7292 		ret = 0;
7293 
7294 		iter->tr = tr;
7295 		iter->array_buffer = &tr->max_buffer;
7296 		iter->cpu_file = tracing_get_cpu(inode);
7297 		m->private = iter;
7298 		file->private_data = m;
7299 	}
7300 out:
7301 	if (ret < 0)
7302 		trace_array_put(tr);
7303 
7304 	return ret;
7305 }
7306 
7307 static ssize_t
7308 tracing_snapshot_write(struct file *filp, const char __user *ubuf, size_t cnt,
7309 		       loff_t *ppos)
7310 {
7311 	struct seq_file *m = filp->private_data;
7312 	struct trace_iterator *iter = m->private;
7313 	struct trace_array *tr = iter->tr;
7314 	unsigned long val;
7315 	int ret;
7316 
7317 	ret = tracing_update_buffers();
7318 	if (ret < 0)
7319 		return ret;
7320 
7321 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
7322 	if (ret)
7323 		return ret;
7324 
7325 	mutex_lock(&trace_types_lock);
7326 
7327 	if (tr->current_trace->use_max_tr) {
7328 		ret = -EBUSY;
7329 		goto out;
7330 	}
7331 
7332 	arch_spin_lock(&tr->max_lock);
7333 	if (tr->cond_snapshot)
7334 		ret = -EBUSY;
7335 	arch_spin_unlock(&tr->max_lock);
7336 	if (ret)
7337 		goto out;
7338 
7339 	switch (val) {
7340 	case 0:
7341 		if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
7342 			ret = -EINVAL;
7343 			break;
7344 		}
7345 		if (tr->allocated_snapshot)
7346 			free_snapshot(tr);
7347 		break;
7348 	case 1:
7349 /* Only allow per-cpu swap if the ring buffer supports it */
7350 #ifndef CONFIG_RING_BUFFER_ALLOW_SWAP
7351 		if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
7352 			ret = -EINVAL;
7353 			break;
7354 		}
7355 #endif
7356 		if (tr->allocated_snapshot)
7357 			ret = resize_buffer_duplicate_size(&tr->max_buffer,
7358 					&tr->array_buffer, iter->cpu_file);
7359 		else
7360 			ret = tracing_alloc_snapshot_instance(tr);
7361 		if (ret < 0)
7362 			break;
7363 		local_irq_disable();
7364 		/* Now, we're going to swap */
7365 		if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
7366 			update_max_tr(tr, current, smp_processor_id(), NULL);
7367 		else
7368 			update_max_tr_single(tr, current, iter->cpu_file);
7369 		local_irq_enable();
7370 		break;
7371 	default:
7372 		if (tr->allocated_snapshot) {
7373 			if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
7374 				tracing_reset_online_cpus(&tr->max_buffer);
7375 			else
7376 				tracing_reset_cpu(&tr->max_buffer, iter->cpu_file);
7377 		}
7378 		break;
7379 	}
7380 
7381 	if (ret >= 0) {
7382 		*ppos += cnt;
7383 		ret = cnt;
7384 	}
7385 out:
7386 	mutex_unlock(&trace_types_lock);
7387 	return ret;
7388 }
7389 
7390 static int tracing_snapshot_release(struct inode *inode, struct file *file)
7391 {
7392 	struct seq_file *m = file->private_data;
7393 	int ret;
7394 
7395 	ret = tracing_release(inode, file);
7396 
7397 	if (file->f_mode & FMODE_READ)
7398 		return ret;
7399 
7400 	/* If write only, the seq_file is just a stub */
7401 	if (m)
7402 		kfree(m->private);
7403 	kfree(m);
7404 
7405 	return 0;
7406 }
7407 
7408 static int tracing_buffers_open(struct inode *inode, struct file *filp);
7409 static ssize_t tracing_buffers_read(struct file *filp, char __user *ubuf,
7410 				    size_t count, loff_t *ppos);
7411 static int tracing_buffers_release(struct inode *inode, struct file *file);
7412 static ssize_t tracing_buffers_splice_read(struct file *file, loff_t *ppos,
7413 		   struct pipe_inode_info *pipe, size_t len, unsigned int flags);
7414 
7415 static int snapshot_raw_open(struct inode *inode, struct file *filp)
7416 {
7417 	struct ftrace_buffer_info *info;
7418 	int ret;
7419 
7420 	/* The following checks for tracefs lockdown */
7421 	ret = tracing_buffers_open(inode, filp);
7422 	if (ret < 0)
7423 		return ret;
7424 
7425 	info = filp->private_data;
7426 
7427 	if (info->iter.trace->use_max_tr) {
7428 		tracing_buffers_release(inode, filp);
7429 		return -EBUSY;
7430 	}
7431 
7432 	info->iter.snapshot = true;
7433 	info->iter.array_buffer = &info->iter.tr->max_buffer;
7434 
7435 	return ret;
7436 }
7437 
7438 #endif /* CONFIG_TRACER_SNAPSHOT */
7439 
7440 
7441 static const struct file_operations tracing_thresh_fops = {
7442 	.open		= tracing_open_generic,
7443 	.read		= tracing_thresh_read,
7444 	.write		= tracing_thresh_write,
7445 	.llseek		= generic_file_llseek,
7446 };
7447 
7448 #if defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)
7449 static const struct file_operations tracing_max_lat_fops = {
7450 	.open		= tracing_open_generic,
7451 	.read		= tracing_max_lat_read,
7452 	.write		= tracing_max_lat_write,
7453 	.llseek		= generic_file_llseek,
7454 };
7455 #endif
7456 
7457 static const struct file_operations set_tracer_fops = {
7458 	.open		= tracing_open_generic,
7459 	.read		= tracing_set_trace_read,
7460 	.write		= tracing_set_trace_write,
7461 	.llseek		= generic_file_llseek,
7462 };
7463 
7464 static const struct file_operations tracing_pipe_fops = {
7465 	.open		= tracing_open_pipe,
7466 	.poll		= tracing_poll_pipe,
7467 	.read		= tracing_read_pipe,
7468 	.splice_read	= tracing_splice_read_pipe,
7469 	.release	= tracing_release_pipe,
7470 	.llseek		= no_llseek,
7471 };
7472 
7473 static const struct file_operations tracing_entries_fops = {
7474 	.open		= tracing_open_generic_tr,
7475 	.read		= tracing_entries_read,
7476 	.write		= tracing_entries_write,
7477 	.llseek		= generic_file_llseek,
7478 	.release	= tracing_release_generic_tr,
7479 };
7480 
7481 static const struct file_operations tracing_total_entries_fops = {
7482 	.open		= tracing_open_generic_tr,
7483 	.read		= tracing_total_entries_read,
7484 	.llseek		= generic_file_llseek,
7485 	.release	= tracing_release_generic_tr,
7486 };
7487 
7488 static const struct file_operations tracing_free_buffer_fops = {
7489 	.open		= tracing_open_generic_tr,
7490 	.write		= tracing_free_buffer_write,
7491 	.release	= tracing_free_buffer_release,
7492 };
7493 
7494 static const struct file_operations tracing_mark_fops = {
7495 	.open		= tracing_open_generic_tr,
7496 	.write		= tracing_mark_write,
7497 	.llseek		= generic_file_llseek,
7498 	.release	= tracing_release_generic_tr,
7499 };
7500 
7501 static const struct file_operations tracing_mark_raw_fops = {
7502 	.open		= tracing_open_generic_tr,
7503 	.write		= tracing_mark_raw_write,
7504 	.llseek		= generic_file_llseek,
7505 	.release	= tracing_release_generic_tr,
7506 };
7507 
7508 static const struct file_operations trace_clock_fops = {
7509 	.open		= tracing_clock_open,
7510 	.read		= seq_read,
7511 	.llseek		= seq_lseek,
7512 	.release	= tracing_single_release_tr,
7513 	.write		= tracing_clock_write,
7514 };
7515 
7516 static const struct file_operations trace_time_stamp_mode_fops = {
7517 	.open		= tracing_time_stamp_mode_open,
7518 	.read		= seq_read,
7519 	.llseek		= seq_lseek,
7520 	.release	= tracing_single_release_tr,
7521 };
7522 
7523 #ifdef CONFIG_TRACER_SNAPSHOT
7524 static const struct file_operations snapshot_fops = {
7525 	.open		= tracing_snapshot_open,
7526 	.read		= seq_read,
7527 	.write		= tracing_snapshot_write,
7528 	.llseek		= tracing_lseek,
7529 	.release	= tracing_snapshot_release,
7530 };
7531 
7532 static const struct file_operations snapshot_raw_fops = {
7533 	.open		= snapshot_raw_open,
7534 	.read		= tracing_buffers_read,
7535 	.release	= tracing_buffers_release,
7536 	.splice_read	= tracing_buffers_splice_read,
7537 	.llseek		= no_llseek,
7538 };
7539 
7540 #endif /* CONFIG_TRACER_SNAPSHOT */
7541 
7542 #define TRACING_LOG_ERRS_MAX	8
7543 #define TRACING_LOG_LOC_MAX	128
7544 
7545 #define CMD_PREFIX "  Command: "
7546 
7547 struct err_info {
7548 	const char	**errs;	/* ptr to loc-specific array of err strings */
7549 	u8		type;	/* index into errs -> specific err string */
7550 	u8		pos;	/* MAX_FILTER_STR_VAL = 256 */
7551 	u64		ts;
7552 };
7553 
7554 struct tracing_log_err {
7555 	struct list_head	list;
7556 	struct err_info		info;
7557 	char			loc[TRACING_LOG_LOC_MAX]; /* err location */
7558 	char			cmd[MAX_FILTER_STR_VAL]; /* what caused err */
7559 };
7560 
7561 static DEFINE_MUTEX(tracing_err_log_lock);
7562 
7563 static struct tracing_log_err *get_tracing_log_err(struct trace_array *tr)
7564 {
7565 	struct tracing_log_err *err;
7566 
7567 	if (tr->n_err_log_entries < TRACING_LOG_ERRS_MAX) {
7568 		err = kzalloc(sizeof(*err), GFP_KERNEL);
7569 		if (!err)
7570 			err = ERR_PTR(-ENOMEM);
7571 		tr->n_err_log_entries++;
7572 
7573 		return err;
7574 	}
7575 
7576 	err = list_first_entry(&tr->err_log, struct tracing_log_err, list);
7577 	list_del(&err->list);
7578 
7579 	return err;
7580 }
7581 
7582 /**
7583  * err_pos - find the position of a string within a command for error careting
7584  * @cmd: The tracing command that caused the error
7585  * @str: The string to position the caret at within @cmd
7586  *
7587  * Finds the position of the first occurrence of @str within @cmd.  The
7588  * return value can be passed to tracing_log_err() for caret placement
7589  * within @cmd.
7590  *
7591  * Returns the index within @cmd of the first occurrence of @str or 0
7592  * if @str was not found.
7593  */
7594 unsigned int err_pos(char *cmd, const char *str)
7595 {
7596 	char *found;
7597 
7598 	if (WARN_ON(!strlen(cmd)))
7599 		return 0;
7600 
7601 	found = strstr(cmd, str);
7602 	if (found)
7603 		return found - cmd;
7604 
7605 	return 0;
7606 }
7607 
7608 /**
7609  * tracing_log_err - write an error to the tracing error log
7610  * @tr: The associated trace array for the error (NULL for top level array)
7611  * @loc: A string describing where the error occurred
7612  * @cmd: The tracing command that caused the error
7613  * @errs: The array of loc-specific static error strings
7614  * @type: The index into errs[], which produces the specific static err string
7615  * @pos: The position the caret should be placed in the cmd
7616  *
7617  * Writes an error into tracing/error_log of the form:
7618  *
7619  * <loc>: error: <text>
7620  *   Command: <cmd>
7621  *              ^
7622  *
7623  * tracing/error_log is a small log file containing the last
7624  * TRACING_LOG_ERRS_MAX errors (8).  Memory for errors isn't allocated
7625  * unless there has been a tracing error, and the error log can be
7626  * cleared and have its memory freed by writing the empty string in
7627  * truncation mode to it i.e. echo > tracing/error_log.
7628  *
7629  * NOTE: the @errs array along with the @type param are used to
7630  * produce a static error string - this string is not copied and saved
7631  * when the error is logged - only a pointer to it is saved.  See
7632  * existing callers for examples of how static strings are typically
7633  * defined for use with tracing_log_err().
7634  */
7635 void tracing_log_err(struct trace_array *tr,
7636 		     const char *loc, const char *cmd,
7637 		     const char **errs, u8 type, u8 pos)
7638 {
7639 	struct tracing_log_err *err;
7640 
7641 	if (!tr)
7642 		tr = &global_trace;
7643 
7644 	mutex_lock(&tracing_err_log_lock);
7645 	err = get_tracing_log_err(tr);
7646 	if (PTR_ERR(err) == -ENOMEM) {
7647 		mutex_unlock(&tracing_err_log_lock);
7648 		return;
7649 	}
7650 
7651 	snprintf(err->loc, TRACING_LOG_LOC_MAX, "%s: error: ", loc);
7652 	snprintf(err->cmd, MAX_FILTER_STR_VAL,"\n" CMD_PREFIX "%s\n", cmd);
7653 
7654 	err->info.errs = errs;
7655 	err->info.type = type;
7656 	err->info.pos = pos;
7657 	err->info.ts = local_clock();
7658 
7659 	list_add_tail(&err->list, &tr->err_log);
7660 	mutex_unlock(&tracing_err_log_lock);
7661 }
7662 
7663 static void clear_tracing_err_log(struct trace_array *tr)
7664 {
7665 	struct tracing_log_err *err, *next;
7666 
7667 	mutex_lock(&tracing_err_log_lock);
7668 	list_for_each_entry_safe(err, next, &tr->err_log, list) {
7669 		list_del(&err->list);
7670 		kfree(err);
7671 	}
7672 
7673 	tr->n_err_log_entries = 0;
7674 	mutex_unlock(&tracing_err_log_lock);
7675 }
7676 
7677 static void *tracing_err_log_seq_start(struct seq_file *m, loff_t *pos)
7678 {
7679 	struct trace_array *tr = m->private;
7680 
7681 	mutex_lock(&tracing_err_log_lock);
7682 
7683 	return seq_list_start(&tr->err_log, *pos);
7684 }
7685 
7686 static void *tracing_err_log_seq_next(struct seq_file *m, void *v, loff_t *pos)
7687 {
7688 	struct trace_array *tr = m->private;
7689 
7690 	return seq_list_next(v, &tr->err_log, pos);
7691 }
7692 
7693 static void tracing_err_log_seq_stop(struct seq_file *m, void *v)
7694 {
7695 	mutex_unlock(&tracing_err_log_lock);
7696 }
7697 
7698 static void tracing_err_log_show_pos(struct seq_file *m, u8 pos)
7699 {
7700 	u8 i;
7701 
7702 	for (i = 0; i < sizeof(CMD_PREFIX) - 1; i++)
7703 		seq_putc(m, ' ');
7704 	for (i = 0; i < pos; i++)
7705 		seq_putc(m, ' ');
7706 	seq_puts(m, "^\n");
7707 }
7708 
7709 static int tracing_err_log_seq_show(struct seq_file *m, void *v)
7710 {
7711 	struct tracing_log_err *err = v;
7712 
7713 	if (err) {
7714 		const char *err_text = err->info.errs[err->info.type];
7715 		u64 sec = err->info.ts;
7716 		u32 nsec;
7717 
7718 		nsec = do_div(sec, NSEC_PER_SEC);
7719 		seq_printf(m, "[%5llu.%06u] %s%s", sec, nsec / 1000,
7720 			   err->loc, err_text);
7721 		seq_printf(m, "%s", err->cmd);
7722 		tracing_err_log_show_pos(m, err->info.pos);
7723 	}
7724 
7725 	return 0;
7726 }
7727 
7728 static const struct seq_operations tracing_err_log_seq_ops = {
7729 	.start  = tracing_err_log_seq_start,
7730 	.next   = tracing_err_log_seq_next,
7731 	.stop   = tracing_err_log_seq_stop,
7732 	.show   = tracing_err_log_seq_show
7733 };
7734 
7735 static int tracing_err_log_open(struct inode *inode, struct file *file)
7736 {
7737 	struct trace_array *tr = inode->i_private;
7738 	int ret = 0;
7739 
7740 	ret = tracing_check_open_get_tr(tr);
7741 	if (ret)
7742 		return ret;
7743 
7744 	/* If this file was opened for write, then erase contents */
7745 	if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC))
7746 		clear_tracing_err_log(tr);
7747 
7748 	if (file->f_mode & FMODE_READ) {
7749 		ret = seq_open(file, &tracing_err_log_seq_ops);
7750 		if (!ret) {
7751 			struct seq_file *m = file->private_data;
7752 			m->private = tr;
7753 		} else {
7754 			trace_array_put(tr);
7755 		}
7756 	}
7757 	return ret;
7758 }
7759 
7760 static ssize_t tracing_err_log_write(struct file *file,
7761 				     const char __user *buffer,
7762 				     size_t count, loff_t *ppos)
7763 {
7764 	return count;
7765 }
7766 
7767 static int tracing_err_log_release(struct inode *inode, struct file *file)
7768 {
7769 	struct trace_array *tr = inode->i_private;
7770 
7771 	trace_array_put(tr);
7772 
7773 	if (file->f_mode & FMODE_READ)
7774 		seq_release(inode, file);
7775 
7776 	return 0;
7777 }
7778 
7779 static const struct file_operations tracing_err_log_fops = {
7780 	.open           = tracing_err_log_open,
7781 	.write		= tracing_err_log_write,
7782 	.read           = seq_read,
7783 	.llseek         = seq_lseek,
7784 	.release        = tracing_err_log_release,
7785 };
7786 
7787 static int tracing_buffers_open(struct inode *inode, struct file *filp)
7788 {
7789 	struct trace_array *tr = inode->i_private;
7790 	struct ftrace_buffer_info *info;
7791 	int ret;
7792 
7793 	ret = tracing_check_open_get_tr(tr);
7794 	if (ret)
7795 		return ret;
7796 
7797 	info = kvzalloc(sizeof(*info), GFP_KERNEL);
7798 	if (!info) {
7799 		trace_array_put(tr);
7800 		return -ENOMEM;
7801 	}
7802 
7803 	mutex_lock(&trace_types_lock);
7804 
7805 	info->iter.tr		= tr;
7806 	info->iter.cpu_file	= tracing_get_cpu(inode);
7807 	info->iter.trace	= tr->current_trace;
7808 	info->iter.array_buffer = &tr->array_buffer;
7809 	info->spare		= NULL;
7810 	/* Force reading ring buffer for first read */
7811 	info->read		= (unsigned int)-1;
7812 
7813 	filp->private_data = info;
7814 
7815 	tr->trace_ref++;
7816 
7817 	mutex_unlock(&trace_types_lock);
7818 
7819 	ret = nonseekable_open(inode, filp);
7820 	if (ret < 0)
7821 		trace_array_put(tr);
7822 
7823 	return ret;
7824 }
7825 
7826 static __poll_t
7827 tracing_buffers_poll(struct file *filp, poll_table *poll_table)
7828 {
7829 	struct ftrace_buffer_info *info = filp->private_data;
7830 	struct trace_iterator *iter = &info->iter;
7831 
7832 	return trace_poll(iter, filp, poll_table);
7833 }
7834 
7835 static ssize_t
7836 tracing_buffers_read(struct file *filp, char __user *ubuf,
7837 		     size_t count, loff_t *ppos)
7838 {
7839 	struct ftrace_buffer_info *info = filp->private_data;
7840 	struct trace_iterator *iter = &info->iter;
7841 	ssize_t ret = 0;
7842 	ssize_t size;
7843 
7844 	if (!count)
7845 		return 0;
7846 
7847 #ifdef CONFIG_TRACER_MAX_TRACE
7848 	if (iter->snapshot && iter->tr->current_trace->use_max_tr)
7849 		return -EBUSY;
7850 #endif
7851 
7852 	if (!info->spare) {
7853 		info->spare = ring_buffer_alloc_read_page(iter->array_buffer->buffer,
7854 							  iter->cpu_file);
7855 		if (IS_ERR(info->spare)) {
7856 			ret = PTR_ERR(info->spare);
7857 			info->spare = NULL;
7858 		} else {
7859 			info->spare_cpu = iter->cpu_file;
7860 		}
7861 	}
7862 	if (!info->spare)
7863 		return ret;
7864 
7865 	/* Do we have previous read data to read? */
7866 	if (info->read < PAGE_SIZE)
7867 		goto read;
7868 
7869  again:
7870 	trace_access_lock(iter->cpu_file);
7871 	ret = ring_buffer_read_page(iter->array_buffer->buffer,
7872 				    &info->spare,
7873 				    count,
7874 				    iter->cpu_file, 0);
7875 	trace_access_unlock(iter->cpu_file);
7876 
7877 	if (ret < 0) {
7878 		if (trace_empty(iter)) {
7879 			if ((filp->f_flags & O_NONBLOCK))
7880 				return -EAGAIN;
7881 
7882 			ret = wait_on_pipe(iter, 0);
7883 			if (ret)
7884 				return ret;
7885 
7886 			goto again;
7887 		}
7888 		return 0;
7889 	}
7890 
7891 	info->read = 0;
7892  read:
7893 	size = PAGE_SIZE - info->read;
7894 	if (size > count)
7895 		size = count;
7896 
7897 	ret = copy_to_user(ubuf, info->spare + info->read, size);
7898 	if (ret == size)
7899 		return -EFAULT;
7900 
7901 	size -= ret;
7902 
7903 	*ppos += size;
7904 	info->read += size;
7905 
7906 	return size;
7907 }
7908 
7909 static int tracing_buffers_release(struct inode *inode, struct file *file)
7910 {
7911 	struct ftrace_buffer_info *info = file->private_data;
7912 	struct trace_iterator *iter = &info->iter;
7913 
7914 	mutex_lock(&trace_types_lock);
7915 
7916 	iter->tr->trace_ref--;
7917 
7918 	__trace_array_put(iter->tr);
7919 
7920 	if (info->spare)
7921 		ring_buffer_free_read_page(iter->array_buffer->buffer,
7922 					   info->spare_cpu, info->spare);
7923 	kvfree(info);
7924 
7925 	mutex_unlock(&trace_types_lock);
7926 
7927 	return 0;
7928 }
7929 
7930 struct buffer_ref {
7931 	struct trace_buffer	*buffer;
7932 	void			*page;
7933 	int			cpu;
7934 	refcount_t		refcount;
7935 };
7936 
7937 static void buffer_ref_release(struct buffer_ref *ref)
7938 {
7939 	if (!refcount_dec_and_test(&ref->refcount))
7940 		return;
7941 	ring_buffer_free_read_page(ref->buffer, ref->cpu, ref->page);
7942 	kfree(ref);
7943 }
7944 
7945 static void buffer_pipe_buf_release(struct pipe_inode_info *pipe,
7946 				    struct pipe_buffer *buf)
7947 {
7948 	struct buffer_ref *ref = (struct buffer_ref *)buf->private;
7949 
7950 	buffer_ref_release(ref);
7951 	buf->private = 0;
7952 }
7953 
7954 static bool buffer_pipe_buf_get(struct pipe_inode_info *pipe,
7955 				struct pipe_buffer *buf)
7956 {
7957 	struct buffer_ref *ref = (struct buffer_ref *)buf->private;
7958 
7959 	if (refcount_read(&ref->refcount) > INT_MAX/2)
7960 		return false;
7961 
7962 	refcount_inc(&ref->refcount);
7963 	return true;
7964 }
7965 
7966 /* Pipe buffer operations for a buffer. */
7967 static const struct pipe_buf_operations buffer_pipe_buf_ops = {
7968 	.release		= buffer_pipe_buf_release,
7969 	.get			= buffer_pipe_buf_get,
7970 };
7971 
7972 /*
7973  * Callback from splice_to_pipe(), if we need to release some pages
7974  * at the end of the spd in case we error'ed out in filling the pipe.
7975  */
7976 static void buffer_spd_release(struct splice_pipe_desc *spd, unsigned int i)
7977 {
7978 	struct buffer_ref *ref =
7979 		(struct buffer_ref *)spd->partial[i].private;
7980 
7981 	buffer_ref_release(ref);
7982 	spd->partial[i].private = 0;
7983 }
7984 
7985 static ssize_t
7986 tracing_buffers_splice_read(struct file *file, loff_t *ppos,
7987 			    struct pipe_inode_info *pipe, size_t len,
7988 			    unsigned int flags)
7989 {
7990 	struct ftrace_buffer_info *info = file->private_data;
7991 	struct trace_iterator *iter = &info->iter;
7992 	struct partial_page partial_def[PIPE_DEF_BUFFERS];
7993 	struct page *pages_def[PIPE_DEF_BUFFERS];
7994 	struct splice_pipe_desc spd = {
7995 		.pages		= pages_def,
7996 		.partial	= partial_def,
7997 		.nr_pages_max	= PIPE_DEF_BUFFERS,
7998 		.ops		= &buffer_pipe_buf_ops,
7999 		.spd_release	= buffer_spd_release,
8000 	};
8001 	struct buffer_ref *ref;
8002 	int entries, i;
8003 	ssize_t ret = 0;
8004 
8005 #ifdef CONFIG_TRACER_MAX_TRACE
8006 	if (iter->snapshot && iter->tr->current_trace->use_max_tr)
8007 		return -EBUSY;
8008 #endif
8009 
8010 	if (*ppos & (PAGE_SIZE - 1))
8011 		return -EINVAL;
8012 
8013 	if (len & (PAGE_SIZE - 1)) {
8014 		if (len < PAGE_SIZE)
8015 			return -EINVAL;
8016 		len &= PAGE_MASK;
8017 	}
8018 
8019 	if (splice_grow_spd(pipe, &spd))
8020 		return -ENOMEM;
8021 
8022  again:
8023 	trace_access_lock(iter->cpu_file);
8024 	entries = ring_buffer_entries_cpu(iter->array_buffer->buffer, iter->cpu_file);
8025 
8026 	for (i = 0; i < spd.nr_pages_max && len && entries; i++, len -= PAGE_SIZE) {
8027 		struct page *page;
8028 		int r;
8029 
8030 		ref = kzalloc(sizeof(*ref), GFP_KERNEL);
8031 		if (!ref) {
8032 			ret = -ENOMEM;
8033 			break;
8034 		}
8035 
8036 		refcount_set(&ref->refcount, 1);
8037 		ref->buffer = iter->array_buffer->buffer;
8038 		ref->page = ring_buffer_alloc_read_page(ref->buffer, iter->cpu_file);
8039 		if (IS_ERR(ref->page)) {
8040 			ret = PTR_ERR(ref->page);
8041 			ref->page = NULL;
8042 			kfree(ref);
8043 			break;
8044 		}
8045 		ref->cpu = iter->cpu_file;
8046 
8047 		r = ring_buffer_read_page(ref->buffer, &ref->page,
8048 					  len, iter->cpu_file, 1);
8049 		if (r < 0) {
8050 			ring_buffer_free_read_page(ref->buffer, ref->cpu,
8051 						   ref->page);
8052 			kfree(ref);
8053 			break;
8054 		}
8055 
8056 		page = virt_to_page(ref->page);
8057 
8058 		spd.pages[i] = page;
8059 		spd.partial[i].len = PAGE_SIZE;
8060 		spd.partial[i].offset = 0;
8061 		spd.partial[i].private = (unsigned long)ref;
8062 		spd.nr_pages++;
8063 		*ppos += PAGE_SIZE;
8064 
8065 		entries = ring_buffer_entries_cpu(iter->array_buffer->buffer, iter->cpu_file);
8066 	}
8067 
8068 	trace_access_unlock(iter->cpu_file);
8069 	spd.nr_pages = i;
8070 
8071 	/* did we read anything? */
8072 	if (!spd.nr_pages) {
8073 		if (ret)
8074 			goto out;
8075 
8076 		ret = -EAGAIN;
8077 		if ((file->f_flags & O_NONBLOCK) || (flags & SPLICE_F_NONBLOCK))
8078 			goto out;
8079 
8080 		ret = wait_on_pipe(iter, iter->tr->buffer_percent);
8081 		if (ret)
8082 			goto out;
8083 
8084 		goto again;
8085 	}
8086 
8087 	ret = splice_to_pipe(pipe, &spd);
8088 out:
8089 	splice_shrink_spd(&spd);
8090 
8091 	return ret;
8092 }
8093 
8094 static const struct file_operations tracing_buffers_fops = {
8095 	.open		= tracing_buffers_open,
8096 	.read		= tracing_buffers_read,
8097 	.poll		= tracing_buffers_poll,
8098 	.release	= tracing_buffers_release,
8099 	.splice_read	= tracing_buffers_splice_read,
8100 	.llseek		= no_llseek,
8101 };
8102 
8103 static ssize_t
8104 tracing_stats_read(struct file *filp, char __user *ubuf,
8105 		   size_t count, loff_t *ppos)
8106 {
8107 	struct inode *inode = file_inode(filp);
8108 	struct trace_array *tr = inode->i_private;
8109 	struct array_buffer *trace_buf = &tr->array_buffer;
8110 	int cpu = tracing_get_cpu(inode);
8111 	struct trace_seq *s;
8112 	unsigned long cnt;
8113 	unsigned long long t;
8114 	unsigned long usec_rem;
8115 
8116 	s = kmalloc(sizeof(*s), GFP_KERNEL);
8117 	if (!s)
8118 		return -ENOMEM;
8119 
8120 	trace_seq_init(s);
8121 
8122 	cnt = ring_buffer_entries_cpu(trace_buf->buffer, cpu);
8123 	trace_seq_printf(s, "entries: %ld\n", cnt);
8124 
8125 	cnt = ring_buffer_overrun_cpu(trace_buf->buffer, cpu);
8126 	trace_seq_printf(s, "overrun: %ld\n", cnt);
8127 
8128 	cnt = ring_buffer_commit_overrun_cpu(trace_buf->buffer, cpu);
8129 	trace_seq_printf(s, "commit overrun: %ld\n", cnt);
8130 
8131 	cnt = ring_buffer_bytes_cpu(trace_buf->buffer, cpu);
8132 	trace_seq_printf(s, "bytes: %ld\n", cnt);
8133 
8134 	if (trace_clocks[tr->clock_id].in_ns) {
8135 		/* local or global for trace_clock */
8136 		t = ns2usecs(ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
8137 		usec_rem = do_div(t, USEC_PER_SEC);
8138 		trace_seq_printf(s, "oldest event ts: %5llu.%06lu\n",
8139 								t, usec_rem);
8140 
8141 		t = ns2usecs(ring_buffer_time_stamp(trace_buf->buffer));
8142 		usec_rem = do_div(t, USEC_PER_SEC);
8143 		trace_seq_printf(s, "now ts: %5llu.%06lu\n", t, usec_rem);
8144 	} else {
8145 		/* counter or tsc mode for trace_clock */
8146 		trace_seq_printf(s, "oldest event ts: %llu\n",
8147 				ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
8148 
8149 		trace_seq_printf(s, "now ts: %llu\n",
8150 				ring_buffer_time_stamp(trace_buf->buffer));
8151 	}
8152 
8153 	cnt = ring_buffer_dropped_events_cpu(trace_buf->buffer, cpu);
8154 	trace_seq_printf(s, "dropped events: %ld\n", cnt);
8155 
8156 	cnt = ring_buffer_read_events_cpu(trace_buf->buffer, cpu);
8157 	trace_seq_printf(s, "read events: %ld\n", cnt);
8158 
8159 	count = simple_read_from_buffer(ubuf, count, ppos,
8160 					s->buffer, trace_seq_used(s));
8161 
8162 	kfree(s);
8163 
8164 	return count;
8165 }
8166 
8167 static const struct file_operations tracing_stats_fops = {
8168 	.open		= tracing_open_generic_tr,
8169 	.read		= tracing_stats_read,
8170 	.llseek		= generic_file_llseek,
8171 	.release	= tracing_release_generic_tr,
8172 };
8173 
8174 #ifdef CONFIG_DYNAMIC_FTRACE
8175 
8176 static ssize_t
8177 tracing_read_dyn_info(struct file *filp, char __user *ubuf,
8178 		  size_t cnt, loff_t *ppos)
8179 {
8180 	ssize_t ret;
8181 	char *buf;
8182 	int r;
8183 
8184 	/* 256 should be plenty to hold the amount needed */
8185 	buf = kmalloc(256, GFP_KERNEL);
8186 	if (!buf)
8187 		return -ENOMEM;
8188 
8189 	r = scnprintf(buf, 256, "%ld pages:%ld groups: %ld\n",
8190 		      ftrace_update_tot_cnt,
8191 		      ftrace_number_of_pages,
8192 		      ftrace_number_of_groups);
8193 
8194 	ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
8195 	kfree(buf);
8196 	return ret;
8197 }
8198 
8199 static const struct file_operations tracing_dyn_info_fops = {
8200 	.open		= tracing_open_generic,
8201 	.read		= tracing_read_dyn_info,
8202 	.llseek		= generic_file_llseek,
8203 };
8204 #endif /* CONFIG_DYNAMIC_FTRACE */
8205 
8206 #if defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE)
8207 static void
8208 ftrace_snapshot(unsigned long ip, unsigned long parent_ip,
8209 		struct trace_array *tr, struct ftrace_probe_ops *ops,
8210 		void *data)
8211 {
8212 	tracing_snapshot_instance(tr);
8213 }
8214 
8215 static void
8216 ftrace_count_snapshot(unsigned long ip, unsigned long parent_ip,
8217 		      struct trace_array *tr, struct ftrace_probe_ops *ops,
8218 		      void *data)
8219 {
8220 	struct ftrace_func_mapper *mapper = data;
8221 	long *count = NULL;
8222 
8223 	if (mapper)
8224 		count = (long *)ftrace_func_mapper_find_ip(mapper, ip);
8225 
8226 	if (count) {
8227 
8228 		if (*count <= 0)
8229 			return;
8230 
8231 		(*count)--;
8232 	}
8233 
8234 	tracing_snapshot_instance(tr);
8235 }
8236 
8237 static int
8238 ftrace_snapshot_print(struct seq_file *m, unsigned long ip,
8239 		      struct ftrace_probe_ops *ops, void *data)
8240 {
8241 	struct ftrace_func_mapper *mapper = data;
8242 	long *count = NULL;
8243 
8244 	seq_printf(m, "%ps:", (void *)ip);
8245 
8246 	seq_puts(m, "snapshot");
8247 
8248 	if (mapper)
8249 		count = (long *)ftrace_func_mapper_find_ip(mapper, ip);
8250 
8251 	if (count)
8252 		seq_printf(m, ":count=%ld\n", *count);
8253 	else
8254 		seq_puts(m, ":unlimited\n");
8255 
8256 	return 0;
8257 }
8258 
8259 static int
8260 ftrace_snapshot_init(struct ftrace_probe_ops *ops, struct trace_array *tr,
8261 		     unsigned long ip, void *init_data, void **data)
8262 {
8263 	struct ftrace_func_mapper *mapper = *data;
8264 
8265 	if (!mapper) {
8266 		mapper = allocate_ftrace_func_mapper();
8267 		if (!mapper)
8268 			return -ENOMEM;
8269 		*data = mapper;
8270 	}
8271 
8272 	return ftrace_func_mapper_add_ip(mapper, ip, init_data);
8273 }
8274 
8275 static void
8276 ftrace_snapshot_free(struct ftrace_probe_ops *ops, struct trace_array *tr,
8277 		     unsigned long ip, void *data)
8278 {
8279 	struct ftrace_func_mapper *mapper = data;
8280 
8281 	if (!ip) {
8282 		if (!mapper)
8283 			return;
8284 		free_ftrace_func_mapper(mapper, NULL);
8285 		return;
8286 	}
8287 
8288 	ftrace_func_mapper_remove_ip(mapper, ip);
8289 }
8290 
8291 static struct ftrace_probe_ops snapshot_probe_ops = {
8292 	.func			= ftrace_snapshot,
8293 	.print			= ftrace_snapshot_print,
8294 };
8295 
8296 static struct ftrace_probe_ops snapshot_count_probe_ops = {
8297 	.func			= ftrace_count_snapshot,
8298 	.print			= ftrace_snapshot_print,
8299 	.init			= ftrace_snapshot_init,
8300 	.free			= ftrace_snapshot_free,
8301 };
8302 
8303 static int
8304 ftrace_trace_snapshot_callback(struct trace_array *tr, struct ftrace_hash *hash,
8305 			       char *glob, char *cmd, char *param, int enable)
8306 {
8307 	struct ftrace_probe_ops *ops;
8308 	void *count = (void *)-1;
8309 	char *number;
8310 	int ret;
8311 
8312 	if (!tr)
8313 		return -ENODEV;
8314 
8315 	/* hash funcs only work with set_ftrace_filter */
8316 	if (!enable)
8317 		return -EINVAL;
8318 
8319 	ops = param ? &snapshot_count_probe_ops :  &snapshot_probe_ops;
8320 
8321 	if (glob[0] == '!')
8322 		return unregister_ftrace_function_probe_func(glob+1, tr, ops);
8323 
8324 	if (!param)
8325 		goto out_reg;
8326 
8327 	number = strsep(&param, ":");
8328 
8329 	if (!strlen(number))
8330 		goto out_reg;
8331 
8332 	/*
8333 	 * We use the callback data field (which is a pointer)
8334 	 * as our counter.
8335 	 */
8336 	ret = kstrtoul(number, 0, (unsigned long *)&count);
8337 	if (ret)
8338 		return ret;
8339 
8340  out_reg:
8341 	ret = tracing_alloc_snapshot_instance(tr);
8342 	if (ret < 0)
8343 		goto out;
8344 
8345 	ret = register_ftrace_function_probe(glob, tr, ops, count);
8346 
8347  out:
8348 	return ret < 0 ? ret : 0;
8349 }
8350 
8351 static struct ftrace_func_command ftrace_snapshot_cmd = {
8352 	.name			= "snapshot",
8353 	.func			= ftrace_trace_snapshot_callback,
8354 };
8355 
8356 static __init int register_snapshot_cmd(void)
8357 {
8358 	return register_ftrace_command(&ftrace_snapshot_cmd);
8359 }
8360 #else
8361 static inline __init int register_snapshot_cmd(void) { return 0; }
8362 #endif /* defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE) */
8363 
8364 static struct dentry *tracing_get_dentry(struct trace_array *tr)
8365 {
8366 	if (WARN_ON(!tr->dir))
8367 		return ERR_PTR(-ENODEV);
8368 
8369 	/* Top directory uses NULL as the parent */
8370 	if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
8371 		return NULL;
8372 
8373 	/* All sub buffers have a descriptor */
8374 	return tr->dir;
8375 }
8376 
8377 static struct dentry *tracing_dentry_percpu(struct trace_array *tr, int cpu)
8378 {
8379 	struct dentry *d_tracer;
8380 
8381 	if (tr->percpu_dir)
8382 		return tr->percpu_dir;
8383 
8384 	d_tracer = tracing_get_dentry(tr);
8385 	if (IS_ERR(d_tracer))
8386 		return NULL;
8387 
8388 	tr->percpu_dir = tracefs_create_dir("per_cpu", d_tracer);
8389 
8390 	MEM_FAIL(!tr->percpu_dir,
8391 		  "Could not create tracefs directory 'per_cpu/%d'\n", cpu);
8392 
8393 	return tr->percpu_dir;
8394 }
8395 
8396 static struct dentry *
8397 trace_create_cpu_file(const char *name, umode_t mode, struct dentry *parent,
8398 		      void *data, long cpu, const struct file_operations *fops)
8399 {
8400 	struct dentry *ret = trace_create_file(name, mode, parent, data, fops);
8401 
8402 	if (ret) /* See tracing_get_cpu() */
8403 		d_inode(ret)->i_cdev = (void *)(cpu + 1);
8404 	return ret;
8405 }
8406 
8407 static void
8408 tracing_init_tracefs_percpu(struct trace_array *tr, long cpu)
8409 {
8410 	struct dentry *d_percpu = tracing_dentry_percpu(tr, cpu);
8411 	struct dentry *d_cpu;
8412 	char cpu_dir[30]; /* 30 characters should be more than enough */
8413 
8414 	if (!d_percpu)
8415 		return;
8416 
8417 	snprintf(cpu_dir, 30, "cpu%ld", cpu);
8418 	d_cpu = tracefs_create_dir(cpu_dir, d_percpu);
8419 	if (!d_cpu) {
8420 		pr_warn("Could not create tracefs '%s' entry\n", cpu_dir);
8421 		return;
8422 	}
8423 
8424 	/* per cpu trace_pipe */
8425 	trace_create_cpu_file("trace_pipe", 0444, d_cpu,
8426 				tr, cpu, &tracing_pipe_fops);
8427 
8428 	/* per cpu trace */
8429 	trace_create_cpu_file("trace", 0644, d_cpu,
8430 				tr, cpu, &tracing_fops);
8431 
8432 	trace_create_cpu_file("trace_pipe_raw", 0444, d_cpu,
8433 				tr, cpu, &tracing_buffers_fops);
8434 
8435 	trace_create_cpu_file("stats", 0444, d_cpu,
8436 				tr, cpu, &tracing_stats_fops);
8437 
8438 	trace_create_cpu_file("buffer_size_kb", 0444, d_cpu,
8439 				tr, cpu, &tracing_entries_fops);
8440 
8441 #ifdef CONFIG_TRACER_SNAPSHOT
8442 	trace_create_cpu_file("snapshot", 0644, d_cpu,
8443 				tr, cpu, &snapshot_fops);
8444 
8445 	trace_create_cpu_file("snapshot_raw", 0444, d_cpu,
8446 				tr, cpu, &snapshot_raw_fops);
8447 #endif
8448 }
8449 
8450 #ifdef CONFIG_FTRACE_SELFTEST
8451 /* Let selftest have access to static functions in this file */
8452 #include "trace_selftest.c"
8453 #endif
8454 
8455 static ssize_t
8456 trace_options_read(struct file *filp, char __user *ubuf, size_t cnt,
8457 			loff_t *ppos)
8458 {
8459 	struct trace_option_dentry *topt = filp->private_data;
8460 	char *buf;
8461 
8462 	if (topt->flags->val & topt->opt->bit)
8463 		buf = "1\n";
8464 	else
8465 		buf = "0\n";
8466 
8467 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
8468 }
8469 
8470 static ssize_t
8471 trace_options_write(struct file *filp, const char __user *ubuf, size_t cnt,
8472 			 loff_t *ppos)
8473 {
8474 	struct trace_option_dentry *topt = filp->private_data;
8475 	unsigned long val;
8476 	int ret;
8477 
8478 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
8479 	if (ret)
8480 		return ret;
8481 
8482 	if (val != 0 && val != 1)
8483 		return -EINVAL;
8484 
8485 	if (!!(topt->flags->val & topt->opt->bit) != val) {
8486 		mutex_lock(&trace_types_lock);
8487 		ret = __set_tracer_option(topt->tr, topt->flags,
8488 					  topt->opt, !val);
8489 		mutex_unlock(&trace_types_lock);
8490 		if (ret)
8491 			return ret;
8492 	}
8493 
8494 	*ppos += cnt;
8495 
8496 	return cnt;
8497 }
8498 
8499 
8500 static const struct file_operations trace_options_fops = {
8501 	.open = tracing_open_generic,
8502 	.read = trace_options_read,
8503 	.write = trace_options_write,
8504 	.llseek	= generic_file_llseek,
8505 };
8506 
8507 /*
8508  * In order to pass in both the trace_array descriptor as well as the index
8509  * to the flag that the trace option file represents, the trace_array
8510  * has a character array of trace_flags_index[], which holds the index
8511  * of the bit for the flag it represents. index[0] == 0, index[1] == 1, etc.
8512  * The address of this character array is passed to the flag option file
8513  * read/write callbacks.
8514  *
8515  * In order to extract both the index and the trace_array descriptor,
8516  * get_tr_index() uses the following algorithm.
8517  *
8518  *   idx = *ptr;
8519  *
8520  * As the pointer itself contains the address of the index (remember
8521  * index[1] == 1).
8522  *
8523  * Then to get the trace_array descriptor, by subtracting that index
8524  * from the ptr, we get to the start of the index itself.
8525  *
8526  *   ptr - idx == &index[0]
8527  *
8528  * Then a simple container_of() from that pointer gets us to the
8529  * trace_array descriptor.
8530  */
8531 static void get_tr_index(void *data, struct trace_array **ptr,
8532 			 unsigned int *pindex)
8533 {
8534 	*pindex = *(unsigned char *)data;
8535 
8536 	*ptr = container_of(data - *pindex, struct trace_array,
8537 			    trace_flags_index);
8538 }
8539 
8540 static ssize_t
8541 trace_options_core_read(struct file *filp, char __user *ubuf, size_t cnt,
8542 			loff_t *ppos)
8543 {
8544 	void *tr_index = filp->private_data;
8545 	struct trace_array *tr;
8546 	unsigned int index;
8547 	char *buf;
8548 
8549 	get_tr_index(tr_index, &tr, &index);
8550 
8551 	if (tr->trace_flags & (1 << index))
8552 		buf = "1\n";
8553 	else
8554 		buf = "0\n";
8555 
8556 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
8557 }
8558 
8559 static ssize_t
8560 trace_options_core_write(struct file *filp, const char __user *ubuf, size_t cnt,
8561 			 loff_t *ppos)
8562 {
8563 	void *tr_index = filp->private_data;
8564 	struct trace_array *tr;
8565 	unsigned int index;
8566 	unsigned long val;
8567 	int ret;
8568 
8569 	get_tr_index(tr_index, &tr, &index);
8570 
8571 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
8572 	if (ret)
8573 		return ret;
8574 
8575 	if (val != 0 && val != 1)
8576 		return -EINVAL;
8577 
8578 	mutex_lock(&event_mutex);
8579 	mutex_lock(&trace_types_lock);
8580 	ret = set_tracer_flag(tr, 1 << index, val);
8581 	mutex_unlock(&trace_types_lock);
8582 	mutex_unlock(&event_mutex);
8583 
8584 	if (ret < 0)
8585 		return ret;
8586 
8587 	*ppos += cnt;
8588 
8589 	return cnt;
8590 }
8591 
8592 static const struct file_operations trace_options_core_fops = {
8593 	.open = tracing_open_generic,
8594 	.read = trace_options_core_read,
8595 	.write = trace_options_core_write,
8596 	.llseek = generic_file_llseek,
8597 };
8598 
8599 struct dentry *trace_create_file(const char *name,
8600 				 umode_t mode,
8601 				 struct dentry *parent,
8602 				 void *data,
8603 				 const struct file_operations *fops)
8604 {
8605 	struct dentry *ret;
8606 
8607 	ret = tracefs_create_file(name, mode, parent, data, fops);
8608 	if (!ret)
8609 		pr_warn("Could not create tracefs '%s' entry\n", name);
8610 
8611 	return ret;
8612 }
8613 
8614 
8615 static struct dentry *trace_options_init_dentry(struct trace_array *tr)
8616 {
8617 	struct dentry *d_tracer;
8618 
8619 	if (tr->options)
8620 		return tr->options;
8621 
8622 	d_tracer = tracing_get_dentry(tr);
8623 	if (IS_ERR(d_tracer))
8624 		return NULL;
8625 
8626 	tr->options = tracefs_create_dir("options", d_tracer);
8627 	if (!tr->options) {
8628 		pr_warn("Could not create tracefs directory 'options'\n");
8629 		return NULL;
8630 	}
8631 
8632 	return tr->options;
8633 }
8634 
8635 static void
8636 create_trace_option_file(struct trace_array *tr,
8637 			 struct trace_option_dentry *topt,
8638 			 struct tracer_flags *flags,
8639 			 struct tracer_opt *opt)
8640 {
8641 	struct dentry *t_options;
8642 
8643 	t_options = trace_options_init_dentry(tr);
8644 	if (!t_options)
8645 		return;
8646 
8647 	topt->flags = flags;
8648 	topt->opt = opt;
8649 	topt->tr = tr;
8650 
8651 	topt->entry = trace_create_file(opt->name, 0644, t_options, topt,
8652 				    &trace_options_fops);
8653 
8654 }
8655 
8656 static void
8657 create_trace_option_files(struct trace_array *tr, struct tracer *tracer)
8658 {
8659 	struct trace_option_dentry *topts;
8660 	struct trace_options *tr_topts;
8661 	struct tracer_flags *flags;
8662 	struct tracer_opt *opts;
8663 	int cnt;
8664 	int i;
8665 
8666 	if (!tracer)
8667 		return;
8668 
8669 	flags = tracer->flags;
8670 
8671 	if (!flags || !flags->opts)
8672 		return;
8673 
8674 	/*
8675 	 * If this is an instance, only create flags for tracers
8676 	 * the instance may have.
8677 	 */
8678 	if (!trace_ok_for_array(tracer, tr))
8679 		return;
8680 
8681 	for (i = 0; i < tr->nr_topts; i++) {
8682 		/* Make sure there's no duplicate flags. */
8683 		if (WARN_ON_ONCE(tr->topts[i].tracer->flags == tracer->flags))
8684 			return;
8685 	}
8686 
8687 	opts = flags->opts;
8688 
8689 	for (cnt = 0; opts[cnt].name; cnt++)
8690 		;
8691 
8692 	topts = kcalloc(cnt + 1, sizeof(*topts), GFP_KERNEL);
8693 	if (!topts)
8694 		return;
8695 
8696 	tr_topts = krealloc(tr->topts, sizeof(*tr->topts) * (tr->nr_topts + 1),
8697 			    GFP_KERNEL);
8698 	if (!tr_topts) {
8699 		kfree(topts);
8700 		return;
8701 	}
8702 
8703 	tr->topts = tr_topts;
8704 	tr->topts[tr->nr_topts].tracer = tracer;
8705 	tr->topts[tr->nr_topts].topts = topts;
8706 	tr->nr_topts++;
8707 
8708 	for (cnt = 0; opts[cnt].name; cnt++) {
8709 		create_trace_option_file(tr, &topts[cnt], flags,
8710 					 &opts[cnt]);
8711 		MEM_FAIL(topts[cnt].entry == NULL,
8712 			  "Failed to create trace option: %s",
8713 			  opts[cnt].name);
8714 	}
8715 }
8716 
8717 static struct dentry *
8718 create_trace_option_core_file(struct trace_array *tr,
8719 			      const char *option, long index)
8720 {
8721 	struct dentry *t_options;
8722 
8723 	t_options = trace_options_init_dentry(tr);
8724 	if (!t_options)
8725 		return NULL;
8726 
8727 	return trace_create_file(option, 0644, t_options,
8728 				 (void *)&tr->trace_flags_index[index],
8729 				 &trace_options_core_fops);
8730 }
8731 
8732 static void create_trace_options_dir(struct trace_array *tr)
8733 {
8734 	struct dentry *t_options;
8735 	bool top_level = tr == &global_trace;
8736 	int i;
8737 
8738 	t_options = trace_options_init_dentry(tr);
8739 	if (!t_options)
8740 		return;
8741 
8742 	for (i = 0; trace_options[i]; i++) {
8743 		if (top_level ||
8744 		    !((1 << i) & TOP_LEVEL_TRACE_FLAGS))
8745 			create_trace_option_core_file(tr, trace_options[i], i);
8746 	}
8747 }
8748 
8749 static ssize_t
8750 rb_simple_read(struct file *filp, char __user *ubuf,
8751 	       size_t cnt, loff_t *ppos)
8752 {
8753 	struct trace_array *tr = filp->private_data;
8754 	char buf[64];
8755 	int r;
8756 
8757 	r = tracer_tracing_is_on(tr);
8758 	r = sprintf(buf, "%d\n", r);
8759 
8760 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
8761 }
8762 
8763 static ssize_t
8764 rb_simple_write(struct file *filp, const char __user *ubuf,
8765 		size_t cnt, loff_t *ppos)
8766 {
8767 	struct trace_array *tr = filp->private_data;
8768 	struct trace_buffer *buffer = tr->array_buffer.buffer;
8769 	unsigned long val;
8770 	int ret;
8771 
8772 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
8773 	if (ret)
8774 		return ret;
8775 
8776 	if (buffer) {
8777 		mutex_lock(&trace_types_lock);
8778 		if (!!val == tracer_tracing_is_on(tr)) {
8779 			val = 0; /* do nothing */
8780 		} else if (val) {
8781 			tracer_tracing_on(tr);
8782 			if (tr->current_trace->start)
8783 				tr->current_trace->start(tr);
8784 		} else {
8785 			tracer_tracing_off(tr);
8786 			if (tr->current_trace->stop)
8787 				tr->current_trace->stop(tr);
8788 		}
8789 		mutex_unlock(&trace_types_lock);
8790 	}
8791 
8792 	(*ppos)++;
8793 
8794 	return cnt;
8795 }
8796 
8797 static const struct file_operations rb_simple_fops = {
8798 	.open		= tracing_open_generic_tr,
8799 	.read		= rb_simple_read,
8800 	.write		= rb_simple_write,
8801 	.release	= tracing_release_generic_tr,
8802 	.llseek		= default_llseek,
8803 };
8804 
8805 static ssize_t
8806 buffer_percent_read(struct file *filp, char __user *ubuf,
8807 		    size_t cnt, loff_t *ppos)
8808 {
8809 	struct trace_array *tr = filp->private_data;
8810 	char buf[64];
8811 	int r;
8812 
8813 	r = tr->buffer_percent;
8814 	r = sprintf(buf, "%d\n", r);
8815 
8816 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
8817 }
8818 
8819 static ssize_t
8820 buffer_percent_write(struct file *filp, const char __user *ubuf,
8821 		     size_t cnt, loff_t *ppos)
8822 {
8823 	struct trace_array *tr = filp->private_data;
8824 	unsigned long val;
8825 	int ret;
8826 
8827 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
8828 	if (ret)
8829 		return ret;
8830 
8831 	if (val > 100)
8832 		return -EINVAL;
8833 
8834 	if (!val)
8835 		val = 1;
8836 
8837 	tr->buffer_percent = val;
8838 
8839 	(*ppos)++;
8840 
8841 	return cnt;
8842 }
8843 
8844 static const struct file_operations buffer_percent_fops = {
8845 	.open		= tracing_open_generic_tr,
8846 	.read		= buffer_percent_read,
8847 	.write		= buffer_percent_write,
8848 	.release	= tracing_release_generic_tr,
8849 	.llseek		= default_llseek,
8850 };
8851 
8852 static struct dentry *trace_instance_dir;
8853 
8854 static void
8855 init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer);
8856 
8857 static int
8858 allocate_trace_buffer(struct trace_array *tr, struct array_buffer *buf, int size)
8859 {
8860 	enum ring_buffer_flags rb_flags;
8861 
8862 	rb_flags = tr->trace_flags & TRACE_ITER_OVERWRITE ? RB_FL_OVERWRITE : 0;
8863 
8864 	buf->tr = tr;
8865 
8866 	buf->buffer = ring_buffer_alloc(size, rb_flags);
8867 	if (!buf->buffer)
8868 		return -ENOMEM;
8869 
8870 	buf->data = alloc_percpu(struct trace_array_cpu);
8871 	if (!buf->data) {
8872 		ring_buffer_free(buf->buffer);
8873 		buf->buffer = NULL;
8874 		return -ENOMEM;
8875 	}
8876 
8877 	/* Allocate the first page for all buffers */
8878 	set_buffer_entries(&tr->array_buffer,
8879 			   ring_buffer_size(tr->array_buffer.buffer, 0));
8880 
8881 	return 0;
8882 }
8883 
8884 static int allocate_trace_buffers(struct trace_array *tr, int size)
8885 {
8886 	int ret;
8887 
8888 	ret = allocate_trace_buffer(tr, &tr->array_buffer, size);
8889 	if (ret)
8890 		return ret;
8891 
8892 #ifdef CONFIG_TRACER_MAX_TRACE
8893 	ret = allocate_trace_buffer(tr, &tr->max_buffer,
8894 				    allocate_snapshot ? size : 1);
8895 	if (MEM_FAIL(ret, "Failed to allocate trace buffer\n")) {
8896 		ring_buffer_free(tr->array_buffer.buffer);
8897 		tr->array_buffer.buffer = NULL;
8898 		free_percpu(tr->array_buffer.data);
8899 		tr->array_buffer.data = NULL;
8900 		return -ENOMEM;
8901 	}
8902 	tr->allocated_snapshot = allocate_snapshot;
8903 
8904 	/*
8905 	 * Only the top level trace array gets its snapshot allocated
8906 	 * from the kernel command line.
8907 	 */
8908 	allocate_snapshot = false;
8909 #endif
8910 
8911 	return 0;
8912 }
8913 
8914 static void free_trace_buffer(struct array_buffer *buf)
8915 {
8916 	if (buf->buffer) {
8917 		ring_buffer_free(buf->buffer);
8918 		buf->buffer = NULL;
8919 		free_percpu(buf->data);
8920 		buf->data = NULL;
8921 	}
8922 }
8923 
8924 static void free_trace_buffers(struct trace_array *tr)
8925 {
8926 	if (!tr)
8927 		return;
8928 
8929 	free_trace_buffer(&tr->array_buffer);
8930 
8931 #ifdef CONFIG_TRACER_MAX_TRACE
8932 	free_trace_buffer(&tr->max_buffer);
8933 #endif
8934 }
8935 
8936 static void init_trace_flags_index(struct trace_array *tr)
8937 {
8938 	int i;
8939 
8940 	/* Used by the trace options files */
8941 	for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++)
8942 		tr->trace_flags_index[i] = i;
8943 }
8944 
8945 static void __update_tracer_options(struct trace_array *tr)
8946 {
8947 	struct tracer *t;
8948 
8949 	for (t = trace_types; t; t = t->next)
8950 		add_tracer_options(tr, t);
8951 }
8952 
8953 static void update_tracer_options(struct trace_array *tr)
8954 {
8955 	mutex_lock(&trace_types_lock);
8956 	__update_tracer_options(tr);
8957 	mutex_unlock(&trace_types_lock);
8958 }
8959 
8960 /* Must have trace_types_lock held */
8961 struct trace_array *trace_array_find(const char *instance)
8962 {
8963 	struct trace_array *tr, *found = NULL;
8964 
8965 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
8966 		if (tr->name && strcmp(tr->name, instance) == 0) {
8967 			found = tr;
8968 			break;
8969 		}
8970 	}
8971 
8972 	return found;
8973 }
8974 
8975 struct trace_array *trace_array_find_get(const char *instance)
8976 {
8977 	struct trace_array *tr;
8978 
8979 	mutex_lock(&trace_types_lock);
8980 	tr = trace_array_find(instance);
8981 	if (tr)
8982 		tr->ref++;
8983 	mutex_unlock(&trace_types_lock);
8984 
8985 	return tr;
8986 }
8987 
8988 static int trace_array_create_dir(struct trace_array *tr)
8989 {
8990 	int ret;
8991 
8992 	tr->dir = tracefs_create_dir(tr->name, trace_instance_dir);
8993 	if (!tr->dir)
8994 		return -EINVAL;
8995 
8996 	ret = event_trace_add_tracer(tr->dir, tr);
8997 	if (ret)
8998 		tracefs_remove(tr->dir);
8999 
9000 	init_tracer_tracefs(tr, tr->dir);
9001 	__update_tracer_options(tr);
9002 
9003 	return ret;
9004 }
9005 
9006 static struct trace_array *trace_array_create(const char *name)
9007 {
9008 	struct trace_array *tr;
9009 	int ret;
9010 
9011 	ret = -ENOMEM;
9012 	tr = kzalloc(sizeof(*tr), GFP_KERNEL);
9013 	if (!tr)
9014 		return ERR_PTR(ret);
9015 
9016 	tr->name = kstrdup(name, GFP_KERNEL);
9017 	if (!tr->name)
9018 		goto out_free_tr;
9019 
9020 	if (!alloc_cpumask_var(&tr->tracing_cpumask, GFP_KERNEL))
9021 		goto out_free_tr;
9022 
9023 	tr->trace_flags = global_trace.trace_flags & ~ZEROED_TRACE_FLAGS;
9024 
9025 	cpumask_copy(tr->tracing_cpumask, cpu_all_mask);
9026 
9027 	raw_spin_lock_init(&tr->start_lock);
9028 
9029 	tr->max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
9030 
9031 	tr->current_trace = &nop_trace;
9032 
9033 	INIT_LIST_HEAD(&tr->systems);
9034 	INIT_LIST_HEAD(&tr->events);
9035 	INIT_LIST_HEAD(&tr->hist_vars);
9036 	INIT_LIST_HEAD(&tr->err_log);
9037 
9038 	if (allocate_trace_buffers(tr, trace_buf_size) < 0)
9039 		goto out_free_tr;
9040 
9041 	if (ftrace_allocate_ftrace_ops(tr) < 0)
9042 		goto out_free_tr;
9043 
9044 	ftrace_init_trace_array(tr);
9045 
9046 	init_trace_flags_index(tr);
9047 
9048 	if (trace_instance_dir) {
9049 		ret = trace_array_create_dir(tr);
9050 		if (ret)
9051 			goto out_free_tr;
9052 	} else
9053 		__trace_early_add_events(tr);
9054 
9055 	list_add(&tr->list, &ftrace_trace_arrays);
9056 
9057 	tr->ref++;
9058 
9059 	return tr;
9060 
9061  out_free_tr:
9062 	ftrace_free_ftrace_ops(tr);
9063 	free_trace_buffers(tr);
9064 	free_cpumask_var(tr->tracing_cpumask);
9065 	kfree(tr->name);
9066 	kfree(tr);
9067 
9068 	return ERR_PTR(ret);
9069 }
9070 
9071 static int instance_mkdir(const char *name)
9072 {
9073 	struct trace_array *tr;
9074 	int ret;
9075 
9076 	mutex_lock(&event_mutex);
9077 	mutex_lock(&trace_types_lock);
9078 
9079 	ret = -EEXIST;
9080 	if (trace_array_find(name))
9081 		goto out_unlock;
9082 
9083 	tr = trace_array_create(name);
9084 
9085 	ret = PTR_ERR_OR_ZERO(tr);
9086 
9087 out_unlock:
9088 	mutex_unlock(&trace_types_lock);
9089 	mutex_unlock(&event_mutex);
9090 	return ret;
9091 }
9092 
9093 /**
9094  * trace_array_get_by_name - Create/Lookup a trace array, given its name.
9095  * @name: The name of the trace array to be looked up/created.
9096  *
9097  * Returns pointer to trace array with given name.
9098  * NULL, if it cannot be created.
9099  *
9100  * NOTE: This function increments the reference counter associated with the
9101  * trace array returned. This makes sure it cannot be freed while in use.
9102  * Use trace_array_put() once the trace array is no longer needed.
9103  * If the trace_array is to be freed, trace_array_destroy() needs to
9104  * be called after the trace_array_put(), or simply let user space delete
9105  * it from the tracefs instances directory. But until the
9106  * trace_array_put() is called, user space can not delete it.
9107  *
9108  */
9109 struct trace_array *trace_array_get_by_name(const char *name)
9110 {
9111 	struct trace_array *tr;
9112 
9113 	mutex_lock(&event_mutex);
9114 	mutex_lock(&trace_types_lock);
9115 
9116 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
9117 		if (tr->name && strcmp(tr->name, name) == 0)
9118 			goto out_unlock;
9119 	}
9120 
9121 	tr = trace_array_create(name);
9122 
9123 	if (IS_ERR(tr))
9124 		tr = NULL;
9125 out_unlock:
9126 	if (tr)
9127 		tr->ref++;
9128 
9129 	mutex_unlock(&trace_types_lock);
9130 	mutex_unlock(&event_mutex);
9131 	return tr;
9132 }
9133 EXPORT_SYMBOL_GPL(trace_array_get_by_name);
9134 
9135 static int __remove_instance(struct trace_array *tr)
9136 {
9137 	int i;
9138 
9139 	/* Reference counter for a newly created trace array = 1. */
9140 	if (tr->ref > 1 || (tr->current_trace && tr->trace_ref))
9141 		return -EBUSY;
9142 
9143 	list_del(&tr->list);
9144 
9145 	/* Disable all the flags that were enabled coming in */
9146 	for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++) {
9147 		if ((1 << i) & ZEROED_TRACE_FLAGS)
9148 			set_tracer_flag(tr, 1 << i, 0);
9149 	}
9150 
9151 	tracing_set_nop(tr);
9152 	clear_ftrace_function_probes(tr);
9153 	event_trace_del_tracer(tr);
9154 	ftrace_clear_pids(tr);
9155 	ftrace_destroy_function_files(tr);
9156 	tracefs_remove(tr->dir);
9157 	free_percpu(tr->last_func_repeats);
9158 	free_trace_buffers(tr);
9159 
9160 	for (i = 0; i < tr->nr_topts; i++) {
9161 		kfree(tr->topts[i].topts);
9162 	}
9163 	kfree(tr->topts);
9164 
9165 	free_cpumask_var(tr->tracing_cpumask);
9166 	kfree(tr->name);
9167 	kfree(tr);
9168 
9169 	return 0;
9170 }
9171 
9172 int trace_array_destroy(struct trace_array *this_tr)
9173 {
9174 	struct trace_array *tr;
9175 	int ret;
9176 
9177 	if (!this_tr)
9178 		return -EINVAL;
9179 
9180 	mutex_lock(&event_mutex);
9181 	mutex_lock(&trace_types_lock);
9182 
9183 	ret = -ENODEV;
9184 
9185 	/* Making sure trace array exists before destroying it. */
9186 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
9187 		if (tr == this_tr) {
9188 			ret = __remove_instance(tr);
9189 			break;
9190 		}
9191 	}
9192 
9193 	mutex_unlock(&trace_types_lock);
9194 	mutex_unlock(&event_mutex);
9195 
9196 	return ret;
9197 }
9198 EXPORT_SYMBOL_GPL(trace_array_destroy);
9199 
9200 static int instance_rmdir(const char *name)
9201 {
9202 	struct trace_array *tr;
9203 	int ret;
9204 
9205 	mutex_lock(&event_mutex);
9206 	mutex_lock(&trace_types_lock);
9207 
9208 	ret = -ENODEV;
9209 	tr = trace_array_find(name);
9210 	if (tr)
9211 		ret = __remove_instance(tr);
9212 
9213 	mutex_unlock(&trace_types_lock);
9214 	mutex_unlock(&event_mutex);
9215 
9216 	return ret;
9217 }
9218 
9219 static __init void create_trace_instances(struct dentry *d_tracer)
9220 {
9221 	struct trace_array *tr;
9222 
9223 	trace_instance_dir = tracefs_create_instance_dir("instances", d_tracer,
9224 							 instance_mkdir,
9225 							 instance_rmdir);
9226 	if (MEM_FAIL(!trace_instance_dir, "Failed to create instances directory\n"))
9227 		return;
9228 
9229 	mutex_lock(&event_mutex);
9230 	mutex_lock(&trace_types_lock);
9231 
9232 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
9233 		if (!tr->name)
9234 			continue;
9235 		if (MEM_FAIL(trace_array_create_dir(tr) < 0,
9236 			     "Failed to create instance directory\n"))
9237 			break;
9238 	}
9239 
9240 	mutex_unlock(&trace_types_lock);
9241 	mutex_unlock(&event_mutex);
9242 }
9243 
9244 static void
9245 init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer)
9246 {
9247 	struct trace_event_file *file;
9248 	int cpu;
9249 
9250 	trace_create_file("available_tracers", 0444, d_tracer,
9251 			tr, &show_traces_fops);
9252 
9253 	trace_create_file("current_tracer", 0644, d_tracer,
9254 			tr, &set_tracer_fops);
9255 
9256 	trace_create_file("tracing_cpumask", 0644, d_tracer,
9257 			  tr, &tracing_cpumask_fops);
9258 
9259 	trace_create_file("trace_options", 0644, d_tracer,
9260 			  tr, &tracing_iter_fops);
9261 
9262 	trace_create_file("trace", 0644, d_tracer,
9263 			  tr, &tracing_fops);
9264 
9265 	trace_create_file("trace_pipe", 0444, d_tracer,
9266 			  tr, &tracing_pipe_fops);
9267 
9268 	trace_create_file("buffer_size_kb", 0644, d_tracer,
9269 			  tr, &tracing_entries_fops);
9270 
9271 	trace_create_file("buffer_total_size_kb", 0444, d_tracer,
9272 			  tr, &tracing_total_entries_fops);
9273 
9274 	trace_create_file("free_buffer", 0200, d_tracer,
9275 			  tr, &tracing_free_buffer_fops);
9276 
9277 	trace_create_file("trace_marker", 0220, d_tracer,
9278 			  tr, &tracing_mark_fops);
9279 
9280 	file = __find_event_file(tr, "ftrace", "print");
9281 	if (file && file->dir)
9282 		trace_create_file("trigger", 0644, file->dir, file,
9283 				  &event_trigger_fops);
9284 	tr->trace_marker_file = file;
9285 
9286 	trace_create_file("trace_marker_raw", 0220, d_tracer,
9287 			  tr, &tracing_mark_raw_fops);
9288 
9289 	trace_create_file("trace_clock", 0644, d_tracer, tr,
9290 			  &trace_clock_fops);
9291 
9292 	trace_create_file("tracing_on", 0644, d_tracer,
9293 			  tr, &rb_simple_fops);
9294 
9295 	trace_create_file("timestamp_mode", 0444, d_tracer, tr,
9296 			  &trace_time_stamp_mode_fops);
9297 
9298 	tr->buffer_percent = 50;
9299 
9300 	trace_create_file("buffer_percent", 0444, d_tracer,
9301 			tr, &buffer_percent_fops);
9302 
9303 	create_trace_options_dir(tr);
9304 
9305 #if defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)
9306 	trace_create_maxlat_file(tr, d_tracer);
9307 #endif
9308 
9309 	if (ftrace_create_function_files(tr, d_tracer))
9310 		MEM_FAIL(1, "Could not allocate function filter files");
9311 
9312 #ifdef CONFIG_TRACER_SNAPSHOT
9313 	trace_create_file("snapshot", 0644, d_tracer,
9314 			  tr, &snapshot_fops);
9315 #endif
9316 
9317 	trace_create_file("error_log", 0644, d_tracer,
9318 			  tr, &tracing_err_log_fops);
9319 
9320 	for_each_tracing_cpu(cpu)
9321 		tracing_init_tracefs_percpu(tr, cpu);
9322 
9323 	ftrace_init_tracefs(tr, d_tracer);
9324 }
9325 
9326 static struct vfsmount *trace_automount(struct dentry *mntpt, void *ingore)
9327 {
9328 	struct vfsmount *mnt;
9329 	struct file_system_type *type;
9330 
9331 	/*
9332 	 * To maintain backward compatibility for tools that mount
9333 	 * debugfs to get to the tracing facility, tracefs is automatically
9334 	 * mounted to the debugfs/tracing directory.
9335 	 */
9336 	type = get_fs_type("tracefs");
9337 	if (!type)
9338 		return NULL;
9339 	mnt = vfs_submount(mntpt, type, "tracefs", NULL);
9340 	put_filesystem(type);
9341 	if (IS_ERR(mnt))
9342 		return NULL;
9343 	mntget(mnt);
9344 
9345 	return mnt;
9346 }
9347 
9348 /**
9349  * tracing_init_dentry - initialize top level trace array
9350  *
9351  * This is called when creating files or directories in the tracing
9352  * directory. It is called via fs_initcall() by any of the boot up code
9353  * and expects to return the dentry of the top level tracing directory.
9354  */
9355 int tracing_init_dentry(void)
9356 {
9357 	struct trace_array *tr = &global_trace;
9358 
9359 	if (security_locked_down(LOCKDOWN_TRACEFS)) {
9360 		pr_warn("Tracing disabled due to lockdown\n");
9361 		return -EPERM;
9362 	}
9363 
9364 	/* The top level trace array uses  NULL as parent */
9365 	if (tr->dir)
9366 		return 0;
9367 
9368 	if (WARN_ON(!tracefs_initialized()))
9369 		return -ENODEV;
9370 
9371 	/*
9372 	 * As there may still be users that expect the tracing
9373 	 * files to exist in debugfs/tracing, we must automount
9374 	 * the tracefs file system there, so older tools still
9375 	 * work with the newer kernel.
9376 	 */
9377 	tr->dir = debugfs_create_automount("tracing", NULL,
9378 					   trace_automount, NULL);
9379 
9380 	return 0;
9381 }
9382 
9383 extern struct trace_eval_map *__start_ftrace_eval_maps[];
9384 extern struct trace_eval_map *__stop_ftrace_eval_maps[];
9385 
9386 static struct workqueue_struct *eval_map_wq __initdata;
9387 static struct work_struct eval_map_work __initdata;
9388 
9389 static void __init eval_map_work_func(struct work_struct *work)
9390 {
9391 	int len;
9392 
9393 	len = __stop_ftrace_eval_maps - __start_ftrace_eval_maps;
9394 	trace_insert_eval_map(NULL, __start_ftrace_eval_maps, len);
9395 }
9396 
9397 static int __init trace_eval_init(void)
9398 {
9399 	INIT_WORK(&eval_map_work, eval_map_work_func);
9400 
9401 	eval_map_wq = alloc_workqueue("eval_map_wq", WQ_UNBOUND, 0);
9402 	if (!eval_map_wq) {
9403 		pr_err("Unable to allocate eval_map_wq\n");
9404 		/* Do work here */
9405 		eval_map_work_func(&eval_map_work);
9406 		return -ENOMEM;
9407 	}
9408 
9409 	queue_work(eval_map_wq, &eval_map_work);
9410 	return 0;
9411 }
9412 
9413 static int __init trace_eval_sync(void)
9414 {
9415 	/* Make sure the eval map updates are finished */
9416 	if (eval_map_wq)
9417 		destroy_workqueue(eval_map_wq);
9418 	return 0;
9419 }
9420 
9421 late_initcall_sync(trace_eval_sync);
9422 
9423 
9424 #ifdef CONFIG_MODULES
9425 static void trace_module_add_evals(struct module *mod)
9426 {
9427 	if (!mod->num_trace_evals)
9428 		return;
9429 
9430 	/*
9431 	 * Modules with bad taint do not have events created, do
9432 	 * not bother with enums either.
9433 	 */
9434 	if (trace_module_has_bad_taint(mod))
9435 		return;
9436 
9437 	trace_insert_eval_map(mod, mod->trace_evals, mod->num_trace_evals);
9438 }
9439 
9440 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
9441 static void trace_module_remove_evals(struct module *mod)
9442 {
9443 	union trace_eval_map_item *map;
9444 	union trace_eval_map_item **last = &trace_eval_maps;
9445 
9446 	if (!mod->num_trace_evals)
9447 		return;
9448 
9449 	mutex_lock(&trace_eval_mutex);
9450 
9451 	map = trace_eval_maps;
9452 
9453 	while (map) {
9454 		if (map->head.mod == mod)
9455 			break;
9456 		map = trace_eval_jmp_to_tail(map);
9457 		last = &map->tail.next;
9458 		map = map->tail.next;
9459 	}
9460 	if (!map)
9461 		goto out;
9462 
9463 	*last = trace_eval_jmp_to_tail(map)->tail.next;
9464 	kfree(map);
9465  out:
9466 	mutex_unlock(&trace_eval_mutex);
9467 }
9468 #else
9469 static inline void trace_module_remove_evals(struct module *mod) { }
9470 #endif /* CONFIG_TRACE_EVAL_MAP_FILE */
9471 
9472 static int trace_module_notify(struct notifier_block *self,
9473 			       unsigned long val, void *data)
9474 {
9475 	struct module *mod = data;
9476 
9477 	switch (val) {
9478 	case MODULE_STATE_COMING:
9479 		trace_module_add_evals(mod);
9480 		break;
9481 	case MODULE_STATE_GOING:
9482 		trace_module_remove_evals(mod);
9483 		break;
9484 	}
9485 
9486 	return NOTIFY_OK;
9487 }
9488 
9489 static struct notifier_block trace_module_nb = {
9490 	.notifier_call = trace_module_notify,
9491 	.priority = 0,
9492 };
9493 #endif /* CONFIG_MODULES */
9494 
9495 static __init int tracer_init_tracefs(void)
9496 {
9497 	int ret;
9498 
9499 	trace_access_lock_init();
9500 
9501 	ret = tracing_init_dentry();
9502 	if (ret)
9503 		return 0;
9504 
9505 	event_trace_init();
9506 
9507 	init_tracer_tracefs(&global_trace, NULL);
9508 	ftrace_init_tracefs_toplevel(&global_trace, NULL);
9509 
9510 	trace_create_file("tracing_thresh", 0644, NULL,
9511 			&global_trace, &tracing_thresh_fops);
9512 
9513 	trace_create_file("README", 0444, NULL,
9514 			NULL, &tracing_readme_fops);
9515 
9516 	trace_create_file("saved_cmdlines", 0444, NULL,
9517 			NULL, &tracing_saved_cmdlines_fops);
9518 
9519 	trace_create_file("saved_cmdlines_size", 0644, NULL,
9520 			  NULL, &tracing_saved_cmdlines_size_fops);
9521 
9522 	trace_create_file("saved_tgids", 0444, NULL,
9523 			NULL, &tracing_saved_tgids_fops);
9524 
9525 	trace_eval_init();
9526 
9527 	trace_create_eval_file(NULL);
9528 
9529 #ifdef CONFIG_MODULES
9530 	register_module_notifier(&trace_module_nb);
9531 #endif
9532 
9533 #ifdef CONFIG_DYNAMIC_FTRACE
9534 	trace_create_file("dyn_ftrace_total_info", 0444, NULL,
9535 			NULL, &tracing_dyn_info_fops);
9536 #endif
9537 
9538 	create_trace_instances(NULL);
9539 
9540 	update_tracer_options(&global_trace);
9541 
9542 	return 0;
9543 }
9544 
9545 static int trace_panic_handler(struct notifier_block *this,
9546 			       unsigned long event, void *unused)
9547 {
9548 	if (ftrace_dump_on_oops)
9549 		ftrace_dump(ftrace_dump_on_oops);
9550 	return NOTIFY_OK;
9551 }
9552 
9553 static struct notifier_block trace_panic_notifier = {
9554 	.notifier_call  = trace_panic_handler,
9555 	.next           = NULL,
9556 	.priority       = 150   /* priority: INT_MAX >= x >= 0 */
9557 };
9558 
9559 static int trace_die_handler(struct notifier_block *self,
9560 			     unsigned long val,
9561 			     void *data)
9562 {
9563 	switch (val) {
9564 	case DIE_OOPS:
9565 		if (ftrace_dump_on_oops)
9566 			ftrace_dump(ftrace_dump_on_oops);
9567 		break;
9568 	default:
9569 		break;
9570 	}
9571 	return NOTIFY_OK;
9572 }
9573 
9574 static struct notifier_block trace_die_notifier = {
9575 	.notifier_call = trace_die_handler,
9576 	.priority = 200
9577 };
9578 
9579 /*
9580  * printk is set to max of 1024, we really don't need it that big.
9581  * Nothing should be printing 1000 characters anyway.
9582  */
9583 #define TRACE_MAX_PRINT		1000
9584 
9585 /*
9586  * Define here KERN_TRACE so that we have one place to modify
9587  * it if we decide to change what log level the ftrace dump
9588  * should be at.
9589  */
9590 #define KERN_TRACE		KERN_EMERG
9591 
9592 void
9593 trace_printk_seq(struct trace_seq *s)
9594 {
9595 	/* Probably should print a warning here. */
9596 	if (s->seq.len >= TRACE_MAX_PRINT)
9597 		s->seq.len = TRACE_MAX_PRINT;
9598 
9599 	/*
9600 	 * More paranoid code. Although the buffer size is set to
9601 	 * PAGE_SIZE, and TRACE_MAX_PRINT is 1000, this is just
9602 	 * an extra layer of protection.
9603 	 */
9604 	if (WARN_ON_ONCE(s->seq.len >= s->seq.size))
9605 		s->seq.len = s->seq.size - 1;
9606 
9607 	/* should be zero ended, but we are paranoid. */
9608 	s->buffer[s->seq.len] = 0;
9609 
9610 	printk(KERN_TRACE "%s", s->buffer);
9611 
9612 	trace_seq_init(s);
9613 }
9614 
9615 void trace_init_global_iter(struct trace_iterator *iter)
9616 {
9617 	iter->tr = &global_trace;
9618 	iter->trace = iter->tr->current_trace;
9619 	iter->cpu_file = RING_BUFFER_ALL_CPUS;
9620 	iter->array_buffer = &global_trace.array_buffer;
9621 
9622 	if (iter->trace && iter->trace->open)
9623 		iter->trace->open(iter);
9624 
9625 	/* Annotate start of buffers if we had overruns */
9626 	if (ring_buffer_overruns(iter->array_buffer->buffer))
9627 		iter->iter_flags |= TRACE_FILE_ANNOTATE;
9628 
9629 	/* Output in nanoseconds only if we are using a clock in nanoseconds. */
9630 	if (trace_clocks[iter->tr->clock_id].in_ns)
9631 		iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
9632 }
9633 
9634 void ftrace_dump(enum ftrace_dump_mode oops_dump_mode)
9635 {
9636 	/* use static because iter can be a bit big for the stack */
9637 	static struct trace_iterator iter;
9638 	static atomic_t dump_running;
9639 	struct trace_array *tr = &global_trace;
9640 	unsigned int old_userobj;
9641 	unsigned long flags;
9642 	int cnt = 0, cpu;
9643 
9644 	/* Only allow one dump user at a time. */
9645 	if (atomic_inc_return(&dump_running) != 1) {
9646 		atomic_dec(&dump_running);
9647 		return;
9648 	}
9649 
9650 	/*
9651 	 * Always turn off tracing when we dump.
9652 	 * We don't need to show trace output of what happens
9653 	 * between multiple crashes.
9654 	 *
9655 	 * If the user does a sysrq-z, then they can re-enable
9656 	 * tracing with echo 1 > tracing_on.
9657 	 */
9658 	tracing_off();
9659 
9660 	local_irq_save(flags);
9661 	printk_nmi_direct_enter();
9662 
9663 	/* Simulate the iterator */
9664 	trace_init_global_iter(&iter);
9665 	/* Can not use kmalloc for iter.temp and iter.fmt */
9666 	iter.temp = static_temp_buf;
9667 	iter.temp_size = STATIC_TEMP_BUF_SIZE;
9668 	iter.fmt = static_fmt_buf;
9669 	iter.fmt_size = STATIC_FMT_BUF_SIZE;
9670 
9671 	for_each_tracing_cpu(cpu) {
9672 		atomic_inc(&per_cpu_ptr(iter.array_buffer->data, cpu)->disabled);
9673 	}
9674 
9675 	old_userobj = tr->trace_flags & TRACE_ITER_SYM_USEROBJ;
9676 
9677 	/* don't look at user memory in panic mode */
9678 	tr->trace_flags &= ~TRACE_ITER_SYM_USEROBJ;
9679 
9680 	switch (oops_dump_mode) {
9681 	case DUMP_ALL:
9682 		iter.cpu_file = RING_BUFFER_ALL_CPUS;
9683 		break;
9684 	case DUMP_ORIG:
9685 		iter.cpu_file = raw_smp_processor_id();
9686 		break;
9687 	case DUMP_NONE:
9688 		goto out_enable;
9689 	default:
9690 		printk(KERN_TRACE "Bad dumping mode, switching to all CPUs dump\n");
9691 		iter.cpu_file = RING_BUFFER_ALL_CPUS;
9692 	}
9693 
9694 	printk(KERN_TRACE "Dumping ftrace buffer:\n");
9695 
9696 	/* Did function tracer already get disabled? */
9697 	if (ftrace_is_dead()) {
9698 		printk("# WARNING: FUNCTION TRACING IS CORRUPTED\n");
9699 		printk("#          MAY BE MISSING FUNCTION EVENTS\n");
9700 	}
9701 
9702 	/*
9703 	 * We need to stop all tracing on all CPUS to read
9704 	 * the next buffer. This is a bit expensive, but is
9705 	 * not done often. We fill all what we can read,
9706 	 * and then release the locks again.
9707 	 */
9708 
9709 	while (!trace_empty(&iter)) {
9710 
9711 		if (!cnt)
9712 			printk(KERN_TRACE "---------------------------------\n");
9713 
9714 		cnt++;
9715 
9716 		trace_iterator_reset(&iter);
9717 		iter.iter_flags |= TRACE_FILE_LAT_FMT;
9718 
9719 		if (trace_find_next_entry_inc(&iter) != NULL) {
9720 			int ret;
9721 
9722 			ret = print_trace_line(&iter);
9723 			if (ret != TRACE_TYPE_NO_CONSUME)
9724 				trace_consume(&iter);
9725 		}
9726 		touch_nmi_watchdog();
9727 
9728 		trace_printk_seq(&iter.seq);
9729 	}
9730 
9731 	if (!cnt)
9732 		printk(KERN_TRACE "   (ftrace buffer empty)\n");
9733 	else
9734 		printk(KERN_TRACE "---------------------------------\n");
9735 
9736  out_enable:
9737 	tr->trace_flags |= old_userobj;
9738 
9739 	for_each_tracing_cpu(cpu) {
9740 		atomic_dec(&per_cpu_ptr(iter.array_buffer->data, cpu)->disabled);
9741 	}
9742 	atomic_dec(&dump_running);
9743 	printk_nmi_direct_exit();
9744 	local_irq_restore(flags);
9745 }
9746 EXPORT_SYMBOL_GPL(ftrace_dump);
9747 
9748 #define WRITE_BUFSIZE  4096
9749 
9750 ssize_t trace_parse_run_command(struct file *file, const char __user *buffer,
9751 				size_t count, loff_t *ppos,
9752 				int (*createfn)(const char *))
9753 {
9754 	char *kbuf, *buf, *tmp;
9755 	int ret = 0;
9756 	size_t done = 0;
9757 	size_t size;
9758 
9759 	kbuf = kmalloc(WRITE_BUFSIZE, GFP_KERNEL);
9760 	if (!kbuf)
9761 		return -ENOMEM;
9762 
9763 	while (done < count) {
9764 		size = count - done;
9765 
9766 		if (size >= WRITE_BUFSIZE)
9767 			size = WRITE_BUFSIZE - 1;
9768 
9769 		if (copy_from_user(kbuf, buffer + done, size)) {
9770 			ret = -EFAULT;
9771 			goto out;
9772 		}
9773 		kbuf[size] = '\0';
9774 		buf = kbuf;
9775 		do {
9776 			tmp = strchr(buf, '\n');
9777 			if (tmp) {
9778 				*tmp = '\0';
9779 				size = tmp - buf + 1;
9780 			} else {
9781 				size = strlen(buf);
9782 				if (done + size < count) {
9783 					if (buf != kbuf)
9784 						break;
9785 					/* This can accept WRITE_BUFSIZE - 2 ('\n' + '\0') */
9786 					pr_warn("Line length is too long: Should be less than %d\n",
9787 						WRITE_BUFSIZE - 2);
9788 					ret = -EINVAL;
9789 					goto out;
9790 				}
9791 			}
9792 			done += size;
9793 
9794 			/* Remove comments */
9795 			tmp = strchr(buf, '#');
9796 
9797 			if (tmp)
9798 				*tmp = '\0';
9799 
9800 			ret = createfn(buf);
9801 			if (ret)
9802 				goto out;
9803 			buf += size;
9804 
9805 		} while (done < count);
9806 	}
9807 	ret = done;
9808 
9809 out:
9810 	kfree(kbuf);
9811 
9812 	return ret;
9813 }
9814 
9815 __init static int tracer_alloc_buffers(void)
9816 {
9817 	int ring_buf_size;
9818 	int ret = -ENOMEM;
9819 
9820 
9821 	if (security_locked_down(LOCKDOWN_TRACEFS)) {
9822 		pr_warn("Tracing disabled due to lockdown\n");
9823 		return -EPERM;
9824 	}
9825 
9826 	/*
9827 	 * Make sure we don't accidentally add more trace options
9828 	 * than we have bits for.
9829 	 */
9830 	BUILD_BUG_ON(TRACE_ITER_LAST_BIT > TRACE_FLAGS_MAX_SIZE);
9831 
9832 	if (!alloc_cpumask_var(&tracing_buffer_mask, GFP_KERNEL))
9833 		goto out;
9834 
9835 	if (!alloc_cpumask_var(&global_trace.tracing_cpumask, GFP_KERNEL))
9836 		goto out_free_buffer_mask;
9837 
9838 	/* Only allocate trace_printk buffers if a trace_printk exists */
9839 	if (&__stop___trace_bprintk_fmt != &__start___trace_bprintk_fmt)
9840 		/* Must be called before global_trace.buffer is allocated */
9841 		trace_printk_init_buffers();
9842 
9843 	/* To save memory, keep the ring buffer size to its minimum */
9844 	if (ring_buffer_expanded)
9845 		ring_buf_size = trace_buf_size;
9846 	else
9847 		ring_buf_size = 1;
9848 
9849 	cpumask_copy(tracing_buffer_mask, cpu_possible_mask);
9850 	cpumask_copy(global_trace.tracing_cpumask, cpu_all_mask);
9851 
9852 	raw_spin_lock_init(&global_trace.start_lock);
9853 
9854 	/*
9855 	 * The prepare callbacks allocates some memory for the ring buffer. We
9856 	 * don't free the buffer if the CPU goes down. If we were to free
9857 	 * the buffer, then the user would lose any trace that was in the
9858 	 * buffer. The memory will be removed once the "instance" is removed.
9859 	 */
9860 	ret = cpuhp_setup_state_multi(CPUHP_TRACE_RB_PREPARE,
9861 				      "trace/RB:preapre", trace_rb_cpu_prepare,
9862 				      NULL);
9863 	if (ret < 0)
9864 		goto out_free_cpumask;
9865 	/* Used for event triggers */
9866 	ret = -ENOMEM;
9867 	temp_buffer = ring_buffer_alloc(PAGE_SIZE, RB_FL_OVERWRITE);
9868 	if (!temp_buffer)
9869 		goto out_rm_hp_state;
9870 
9871 	if (trace_create_savedcmd() < 0)
9872 		goto out_free_temp_buffer;
9873 
9874 	/* TODO: make the number of buffers hot pluggable with CPUS */
9875 	if (allocate_trace_buffers(&global_trace, ring_buf_size) < 0) {
9876 		MEM_FAIL(1, "tracer: failed to allocate ring buffer!\n");
9877 		goto out_free_savedcmd;
9878 	}
9879 
9880 	if (global_trace.buffer_disabled)
9881 		tracing_off();
9882 
9883 	if (trace_boot_clock) {
9884 		ret = tracing_set_clock(&global_trace, trace_boot_clock);
9885 		if (ret < 0)
9886 			pr_warn("Trace clock %s not defined, going back to default\n",
9887 				trace_boot_clock);
9888 	}
9889 
9890 	/*
9891 	 * register_tracer() might reference current_trace, so it
9892 	 * needs to be set before we register anything. This is
9893 	 * just a bootstrap of current_trace anyway.
9894 	 */
9895 	global_trace.current_trace = &nop_trace;
9896 
9897 	global_trace.max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
9898 
9899 	ftrace_init_global_array_ops(&global_trace);
9900 
9901 	init_trace_flags_index(&global_trace);
9902 
9903 	register_tracer(&nop_trace);
9904 
9905 	/* Function tracing may start here (via kernel command line) */
9906 	init_function_trace();
9907 
9908 	/* All seems OK, enable tracing */
9909 	tracing_disabled = 0;
9910 
9911 	atomic_notifier_chain_register(&panic_notifier_list,
9912 				       &trace_panic_notifier);
9913 
9914 	register_die_notifier(&trace_die_notifier);
9915 
9916 	global_trace.flags = TRACE_ARRAY_FL_GLOBAL;
9917 
9918 	INIT_LIST_HEAD(&global_trace.systems);
9919 	INIT_LIST_HEAD(&global_trace.events);
9920 	INIT_LIST_HEAD(&global_trace.hist_vars);
9921 	INIT_LIST_HEAD(&global_trace.err_log);
9922 	list_add(&global_trace.list, &ftrace_trace_arrays);
9923 
9924 	apply_trace_boot_options();
9925 
9926 	register_snapshot_cmd();
9927 
9928 	test_can_verify();
9929 
9930 	return 0;
9931 
9932 out_free_savedcmd:
9933 	free_saved_cmdlines_buffer(savedcmd);
9934 out_free_temp_buffer:
9935 	ring_buffer_free(temp_buffer);
9936 out_rm_hp_state:
9937 	cpuhp_remove_multi_state(CPUHP_TRACE_RB_PREPARE);
9938 out_free_cpumask:
9939 	free_cpumask_var(global_trace.tracing_cpumask);
9940 out_free_buffer_mask:
9941 	free_cpumask_var(tracing_buffer_mask);
9942 out:
9943 	return ret;
9944 }
9945 
9946 void __init early_trace_init(void)
9947 {
9948 	if (tracepoint_printk) {
9949 		tracepoint_print_iter =
9950 			kzalloc(sizeof(*tracepoint_print_iter), GFP_KERNEL);
9951 		if (MEM_FAIL(!tracepoint_print_iter,
9952 			     "Failed to allocate trace iterator\n"))
9953 			tracepoint_printk = 0;
9954 		else
9955 			static_key_enable(&tracepoint_printk_key.key);
9956 	}
9957 	tracer_alloc_buffers();
9958 }
9959 
9960 void __init trace_init(void)
9961 {
9962 	trace_event_init();
9963 }
9964 
9965 __init static int clear_boot_tracer(void)
9966 {
9967 	/*
9968 	 * The default tracer at boot buffer is an init section.
9969 	 * This function is called in lateinit. If we did not
9970 	 * find the boot tracer, then clear it out, to prevent
9971 	 * later registration from accessing the buffer that is
9972 	 * about to be freed.
9973 	 */
9974 	if (!default_bootup_tracer)
9975 		return 0;
9976 
9977 	printk(KERN_INFO "ftrace bootup tracer '%s' not registered.\n",
9978 	       default_bootup_tracer);
9979 	default_bootup_tracer = NULL;
9980 
9981 	return 0;
9982 }
9983 
9984 fs_initcall(tracer_init_tracefs);
9985 late_initcall_sync(clear_boot_tracer);
9986 
9987 #ifdef CONFIG_HAVE_UNSTABLE_SCHED_CLOCK
9988 __init static int tracing_set_default_clock(void)
9989 {
9990 	/* sched_clock_stable() is determined in late_initcall */
9991 	if (!trace_boot_clock && !sched_clock_stable()) {
9992 		if (security_locked_down(LOCKDOWN_TRACEFS)) {
9993 			pr_warn("Can not set tracing clock due to lockdown\n");
9994 			return -EPERM;
9995 		}
9996 
9997 		printk(KERN_WARNING
9998 		       "Unstable clock detected, switching default tracing clock to \"global\"\n"
9999 		       "If you want to keep using the local clock, then add:\n"
10000 		       "  \"trace_clock=local\"\n"
10001 		       "on the kernel command line\n");
10002 		tracing_set_clock(&global_trace, "global");
10003 	}
10004 
10005 	return 0;
10006 }
10007 late_initcall_sync(tracing_set_default_clock);
10008 #endif
10009