xref: /linux/kernel/trace/trace.c (revision 372e2db7210df7c45ead46429aeb1443ba148060)
1 /*
2  * ring buffer based function tracer
3  *
4  * Copyright (C) 2007-2012 Steven Rostedt <srostedt@redhat.com>
5  * Copyright (C) 2008 Ingo Molnar <mingo@redhat.com>
6  *
7  * Originally taken from the RT patch by:
8  *    Arnaldo Carvalho de Melo <acme@redhat.com>
9  *
10  * Based on code from the latency_tracer, that is:
11  *  Copyright (C) 2004-2006 Ingo Molnar
12  *  Copyright (C) 2004 Nadia Yvette Chambers
13  */
14 #include <linux/ring_buffer.h>
15 #include <generated/utsrelease.h>
16 #include <linux/stacktrace.h>
17 #include <linux/writeback.h>
18 #include <linux/kallsyms.h>
19 #include <linux/seq_file.h>
20 #include <linux/notifier.h>
21 #include <linux/irqflags.h>
22 #include <linux/debugfs.h>
23 #include <linux/tracefs.h>
24 #include <linux/pagemap.h>
25 #include <linux/hardirq.h>
26 #include <linux/linkage.h>
27 #include <linux/uaccess.h>
28 #include <linux/vmalloc.h>
29 #include <linux/ftrace.h>
30 #include <linux/module.h>
31 #include <linux/percpu.h>
32 #include <linux/splice.h>
33 #include <linux/kdebug.h>
34 #include <linux/string.h>
35 #include <linux/mount.h>
36 #include <linux/rwsem.h>
37 #include <linux/slab.h>
38 #include <linux/ctype.h>
39 #include <linux/init.h>
40 #include <linux/poll.h>
41 #include <linux/nmi.h>
42 #include <linux/fs.h>
43 #include <linux/sched/rt.h>
44 
45 #include "trace.h"
46 #include "trace_output.h"
47 
48 /*
49  * On boot up, the ring buffer is set to the minimum size, so that
50  * we do not waste memory on systems that are not using tracing.
51  */
52 bool ring_buffer_expanded;
53 
54 /*
55  * We need to change this state when a selftest is running.
56  * A selftest will lurk into the ring-buffer to count the
57  * entries inserted during the selftest although some concurrent
58  * insertions into the ring-buffer such as trace_printk could occurred
59  * at the same time, giving false positive or negative results.
60  */
61 static bool __read_mostly tracing_selftest_running;
62 
63 /*
64  * If a tracer is running, we do not want to run SELFTEST.
65  */
66 bool __read_mostly tracing_selftest_disabled;
67 
68 /* Pipe tracepoints to printk */
69 struct trace_iterator *tracepoint_print_iter;
70 int tracepoint_printk;
71 
72 /* For tracers that don't implement custom flags */
73 static struct tracer_opt dummy_tracer_opt[] = {
74 	{ }
75 };
76 
77 static int
78 dummy_set_flag(struct trace_array *tr, u32 old_flags, u32 bit, int set)
79 {
80 	return 0;
81 }
82 
83 /*
84  * To prevent the comm cache from being overwritten when no
85  * tracing is active, only save the comm when a trace event
86  * occurred.
87  */
88 static DEFINE_PER_CPU(bool, trace_cmdline_save);
89 
90 /*
91  * Kill all tracing for good (never come back).
92  * It is initialized to 1 but will turn to zero if the initialization
93  * of the tracer is successful. But that is the only place that sets
94  * this back to zero.
95  */
96 static int tracing_disabled = 1;
97 
98 cpumask_var_t __read_mostly	tracing_buffer_mask;
99 
100 /*
101  * ftrace_dump_on_oops - variable to dump ftrace buffer on oops
102  *
103  * If there is an oops (or kernel panic) and the ftrace_dump_on_oops
104  * is set, then ftrace_dump is called. This will output the contents
105  * of the ftrace buffers to the console.  This is very useful for
106  * capturing traces that lead to crashes and outputing it to a
107  * serial console.
108  *
109  * It is default off, but you can enable it with either specifying
110  * "ftrace_dump_on_oops" in the kernel command line, or setting
111  * /proc/sys/kernel/ftrace_dump_on_oops
112  * Set 1 if you want to dump buffers of all CPUs
113  * Set 2 if you want to dump the buffer of the CPU that triggered oops
114  */
115 
116 enum ftrace_dump_mode ftrace_dump_on_oops;
117 
118 /* When set, tracing will stop when a WARN*() is hit */
119 int __disable_trace_on_warning;
120 
121 #ifdef CONFIG_TRACE_ENUM_MAP_FILE
122 /* Map of enums to their values, for "enum_map" file */
123 struct trace_enum_map_head {
124 	struct module			*mod;
125 	unsigned long			length;
126 };
127 
128 union trace_enum_map_item;
129 
130 struct trace_enum_map_tail {
131 	/*
132 	 * "end" is first and points to NULL as it must be different
133 	 * than "mod" or "enum_string"
134 	 */
135 	union trace_enum_map_item	*next;
136 	const char			*end;	/* points to NULL */
137 };
138 
139 static DEFINE_MUTEX(trace_enum_mutex);
140 
141 /*
142  * The trace_enum_maps are saved in an array with two extra elements,
143  * one at the beginning, and one at the end. The beginning item contains
144  * the count of the saved maps (head.length), and the module they
145  * belong to if not built in (head.mod). The ending item contains a
146  * pointer to the next array of saved enum_map items.
147  */
148 union trace_enum_map_item {
149 	struct trace_enum_map		map;
150 	struct trace_enum_map_head	head;
151 	struct trace_enum_map_tail	tail;
152 };
153 
154 static union trace_enum_map_item *trace_enum_maps;
155 #endif /* CONFIG_TRACE_ENUM_MAP_FILE */
156 
157 static int tracing_set_tracer(struct trace_array *tr, const char *buf);
158 
159 #define MAX_TRACER_SIZE		100
160 static char bootup_tracer_buf[MAX_TRACER_SIZE] __initdata;
161 static char *default_bootup_tracer;
162 
163 static bool allocate_snapshot;
164 
165 static int __init set_cmdline_ftrace(char *str)
166 {
167 	strlcpy(bootup_tracer_buf, str, MAX_TRACER_SIZE);
168 	default_bootup_tracer = bootup_tracer_buf;
169 	/* We are using ftrace early, expand it */
170 	ring_buffer_expanded = true;
171 	return 1;
172 }
173 __setup("ftrace=", set_cmdline_ftrace);
174 
175 static int __init set_ftrace_dump_on_oops(char *str)
176 {
177 	if (*str++ != '=' || !*str) {
178 		ftrace_dump_on_oops = DUMP_ALL;
179 		return 1;
180 	}
181 
182 	if (!strcmp("orig_cpu", str)) {
183 		ftrace_dump_on_oops = DUMP_ORIG;
184                 return 1;
185         }
186 
187         return 0;
188 }
189 __setup("ftrace_dump_on_oops", set_ftrace_dump_on_oops);
190 
191 static int __init stop_trace_on_warning(char *str)
192 {
193 	if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
194 		__disable_trace_on_warning = 1;
195 	return 1;
196 }
197 __setup("traceoff_on_warning", stop_trace_on_warning);
198 
199 static int __init boot_alloc_snapshot(char *str)
200 {
201 	allocate_snapshot = true;
202 	/* We also need the main ring buffer expanded */
203 	ring_buffer_expanded = true;
204 	return 1;
205 }
206 __setup("alloc_snapshot", boot_alloc_snapshot);
207 
208 
209 static char trace_boot_options_buf[MAX_TRACER_SIZE] __initdata;
210 
211 static int __init set_trace_boot_options(char *str)
212 {
213 	strlcpy(trace_boot_options_buf, str, MAX_TRACER_SIZE);
214 	return 0;
215 }
216 __setup("trace_options=", set_trace_boot_options);
217 
218 static char trace_boot_clock_buf[MAX_TRACER_SIZE] __initdata;
219 static char *trace_boot_clock __initdata;
220 
221 static int __init set_trace_boot_clock(char *str)
222 {
223 	strlcpy(trace_boot_clock_buf, str, MAX_TRACER_SIZE);
224 	trace_boot_clock = trace_boot_clock_buf;
225 	return 0;
226 }
227 __setup("trace_clock=", set_trace_boot_clock);
228 
229 static int __init set_tracepoint_printk(char *str)
230 {
231 	if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
232 		tracepoint_printk = 1;
233 	return 1;
234 }
235 __setup("tp_printk", set_tracepoint_printk);
236 
237 unsigned long long ns2usecs(cycle_t nsec)
238 {
239 	nsec += 500;
240 	do_div(nsec, 1000);
241 	return nsec;
242 }
243 
244 /* trace_flags holds trace_options default values */
245 #define TRACE_DEFAULT_FLAGS						\
246 	(FUNCTION_DEFAULT_FLAGS |					\
247 	 TRACE_ITER_PRINT_PARENT | TRACE_ITER_PRINTK |			\
248 	 TRACE_ITER_ANNOTATE | TRACE_ITER_CONTEXT_INFO |		\
249 	 TRACE_ITER_RECORD_CMD | TRACE_ITER_OVERWRITE |			\
250 	 TRACE_ITER_IRQ_INFO | TRACE_ITER_MARKERS)
251 
252 /* trace_options that are only supported by global_trace */
253 #define TOP_LEVEL_TRACE_FLAGS (TRACE_ITER_PRINTK |			\
254 	       TRACE_ITER_PRINTK_MSGONLY | TRACE_ITER_RECORD_CMD)
255 
256 /* trace_flags that are default zero for instances */
257 #define ZEROED_TRACE_FLAGS \
258 	TRACE_ITER_EVENT_FORK
259 
260 /*
261  * The global_trace is the descriptor that holds the tracing
262  * buffers for the live tracing. For each CPU, it contains
263  * a link list of pages that will store trace entries. The
264  * page descriptor of the pages in the memory is used to hold
265  * the link list by linking the lru item in the page descriptor
266  * to each of the pages in the buffer per CPU.
267  *
268  * For each active CPU there is a data field that holds the
269  * pages for the buffer for that CPU. Each CPU has the same number
270  * of pages allocated for its buffer.
271  */
272 static struct trace_array global_trace = {
273 	.trace_flags = TRACE_DEFAULT_FLAGS,
274 };
275 
276 LIST_HEAD(ftrace_trace_arrays);
277 
278 int trace_array_get(struct trace_array *this_tr)
279 {
280 	struct trace_array *tr;
281 	int ret = -ENODEV;
282 
283 	mutex_lock(&trace_types_lock);
284 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
285 		if (tr == this_tr) {
286 			tr->ref++;
287 			ret = 0;
288 			break;
289 		}
290 	}
291 	mutex_unlock(&trace_types_lock);
292 
293 	return ret;
294 }
295 
296 static void __trace_array_put(struct trace_array *this_tr)
297 {
298 	WARN_ON(!this_tr->ref);
299 	this_tr->ref--;
300 }
301 
302 void trace_array_put(struct trace_array *this_tr)
303 {
304 	mutex_lock(&trace_types_lock);
305 	__trace_array_put(this_tr);
306 	mutex_unlock(&trace_types_lock);
307 }
308 
309 int call_filter_check_discard(struct trace_event_call *call, void *rec,
310 			      struct ring_buffer *buffer,
311 			      struct ring_buffer_event *event)
312 {
313 	if (unlikely(call->flags & TRACE_EVENT_FL_FILTERED) &&
314 	    !filter_match_preds(call->filter, rec)) {
315 		__trace_event_discard_commit(buffer, event);
316 		return 1;
317 	}
318 
319 	return 0;
320 }
321 
322 void trace_free_pid_list(struct trace_pid_list *pid_list)
323 {
324 	vfree(pid_list->pids);
325 	kfree(pid_list);
326 }
327 
328 /**
329  * trace_find_filtered_pid - check if a pid exists in a filtered_pid list
330  * @filtered_pids: The list of pids to check
331  * @search_pid: The PID to find in @filtered_pids
332  *
333  * Returns true if @search_pid is fonud in @filtered_pids, and false otherwis.
334  */
335 bool
336 trace_find_filtered_pid(struct trace_pid_list *filtered_pids, pid_t search_pid)
337 {
338 	/*
339 	 * If pid_max changed after filtered_pids was created, we
340 	 * by default ignore all pids greater than the previous pid_max.
341 	 */
342 	if (search_pid >= filtered_pids->pid_max)
343 		return false;
344 
345 	return test_bit(search_pid, filtered_pids->pids);
346 }
347 
348 /**
349  * trace_ignore_this_task - should a task be ignored for tracing
350  * @filtered_pids: The list of pids to check
351  * @task: The task that should be ignored if not filtered
352  *
353  * Checks if @task should be traced or not from @filtered_pids.
354  * Returns true if @task should *NOT* be traced.
355  * Returns false if @task should be traced.
356  */
357 bool
358 trace_ignore_this_task(struct trace_pid_list *filtered_pids, struct task_struct *task)
359 {
360 	/*
361 	 * Return false, because if filtered_pids does not exist,
362 	 * all pids are good to trace.
363 	 */
364 	if (!filtered_pids)
365 		return false;
366 
367 	return !trace_find_filtered_pid(filtered_pids, task->pid);
368 }
369 
370 /**
371  * trace_pid_filter_add_remove - Add or remove a task from a pid_list
372  * @pid_list: The list to modify
373  * @self: The current task for fork or NULL for exit
374  * @task: The task to add or remove
375  *
376  * If adding a task, if @self is defined, the task is only added if @self
377  * is also included in @pid_list. This happens on fork and tasks should
378  * only be added when the parent is listed. If @self is NULL, then the
379  * @task pid will be removed from the list, which would happen on exit
380  * of a task.
381  */
382 void trace_filter_add_remove_task(struct trace_pid_list *pid_list,
383 				  struct task_struct *self,
384 				  struct task_struct *task)
385 {
386 	if (!pid_list)
387 		return;
388 
389 	/* For forks, we only add if the forking task is listed */
390 	if (self) {
391 		if (!trace_find_filtered_pid(pid_list, self->pid))
392 			return;
393 	}
394 
395 	/* Sorry, but we don't support pid_max changing after setting */
396 	if (task->pid >= pid_list->pid_max)
397 		return;
398 
399 	/* "self" is set for forks, and NULL for exits */
400 	if (self)
401 		set_bit(task->pid, pid_list->pids);
402 	else
403 		clear_bit(task->pid, pid_list->pids);
404 }
405 
406 /**
407  * trace_pid_next - Used for seq_file to get to the next pid of a pid_list
408  * @pid_list: The pid list to show
409  * @v: The last pid that was shown (+1 the actual pid to let zero be displayed)
410  * @pos: The position of the file
411  *
412  * This is used by the seq_file "next" operation to iterate the pids
413  * listed in a trace_pid_list structure.
414  *
415  * Returns the pid+1 as we want to display pid of zero, but NULL would
416  * stop the iteration.
417  */
418 void *trace_pid_next(struct trace_pid_list *pid_list, void *v, loff_t *pos)
419 {
420 	unsigned long pid = (unsigned long)v;
421 
422 	(*pos)++;
423 
424 	/* pid already is +1 of the actual prevous bit */
425 	pid = find_next_bit(pid_list->pids, pid_list->pid_max, pid);
426 
427 	/* Return pid + 1 to allow zero to be represented */
428 	if (pid < pid_list->pid_max)
429 		return (void *)(pid + 1);
430 
431 	return NULL;
432 }
433 
434 /**
435  * trace_pid_start - Used for seq_file to start reading pid lists
436  * @pid_list: The pid list to show
437  * @pos: The position of the file
438  *
439  * This is used by seq_file "start" operation to start the iteration
440  * of listing pids.
441  *
442  * Returns the pid+1 as we want to display pid of zero, but NULL would
443  * stop the iteration.
444  */
445 void *trace_pid_start(struct trace_pid_list *pid_list, loff_t *pos)
446 {
447 	unsigned long pid;
448 	loff_t l = 0;
449 
450 	pid = find_first_bit(pid_list->pids, pid_list->pid_max);
451 	if (pid >= pid_list->pid_max)
452 		return NULL;
453 
454 	/* Return pid + 1 so that zero can be the exit value */
455 	for (pid++; pid && l < *pos;
456 	     pid = (unsigned long)trace_pid_next(pid_list, (void *)pid, &l))
457 		;
458 	return (void *)pid;
459 }
460 
461 /**
462  * trace_pid_show - show the current pid in seq_file processing
463  * @m: The seq_file structure to write into
464  * @v: A void pointer of the pid (+1) value to display
465  *
466  * Can be directly used by seq_file operations to display the current
467  * pid value.
468  */
469 int trace_pid_show(struct seq_file *m, void *v)
470 {
471 	unsigned long pid = (unsigned long)v - 1;
472 
473 	seq_printf(m, "%lu\n", pid);
474 	return 0;
475 }
476 
477 /* 128 should be much more than enough */
478 #define PID_BUF_SIZE		127
479 
480 int trace_pid_write(struct trace_pid_list *filtered_pids,
481 		    struct trace_pid_list **new_pid_list,
482 		    const char __user *ubuf, size_t cnt)
483 {
484 	struct trace_pid_list *pid_list;
485 	struct trace_parser parser;
486 	unsigned long val;
487 	int nr_pids = 0;
488 	ssize_t read = 0;
489 	ssize_t ret = 0;
490 	loff_t pos;
491 	pid_t pid;
492 
493 	if (trace_parser_get_init(&parser, PID_BUF_SIZE + 1))
494 		return -ENOMEM;
495 
496 	/*
497 	 * Always recreate a new array. The write is an all or nothing
498 	 * operation. Always create a new array when adding new pids by
499 	 * the user. If the operation fails, then the current list is
500 	 * not modified.
501 	 */
502 	pid_list = kmalloc(sizeof(*pid_list), GFP_KERNEL);
503 	if (!pid_list)
504 		return -ENOMEM;
505 
506 	pid_list->pid_max = READ_ONCE(pid_max);
507 
508 	/* Only truncating will shrink pid_max */
509 	if (filtered_pids && filtered_pids->pid_max > pid_list->pid_max)
510 		pid_list->pid_max = filtered_pids->pid_max;
511 
512 	pid_list->pids = vzalloc((pid_list->pid_max + 7) >> 3);
513 	if (!pid_list->pids) {
514 		kfree(pid_list);
515 		return -ENOMEM;
516 	}
517 
518 	if (filtered_pids) {
519 		/* copy the current bits to the new max */
520 		for_each_set_bit(pid, filtered_pids->pids,
521 				 filtered_pids->pid_max) {
522 			set_bit(pid, pid_list->pids);
523 			nr_pids++;
524 		}
525 	}
526 
527 	while (cnt > 0) {
528 
529 		pos = 0;
530 
531 		ret = trace_get_user(&parser, ubuf, cnt, &pos);
532 		if (ret < 0 || !trace_parser_loaded(&parser))
533 			break;
534 
535 		read += ret;
536 		ubuf += ret;
537 		cnt -= ret;
538 
539 		parser.buffer[parser.idx] = 0;
540 
541 		ret = -EINVAL;
542 		if (kstrtoul(parser.buffer, 0, &val))
543 			break;
544 		if (val >= pid_list->pid_max)
545 			break;
546 
547 		pid = (pid_t)val;
548 
549 		set_bit(pid, pid_list->pids);
550 		nr_pids++;
551 
552 		trace_parser_clear(&parser);
553 		ret = 0;
554 	}
555 	trace_parser_put(&parser);
556 
557 	if (ret < 0) {
558 		trace_free_pid_list(pid_list);
559 		return ret;
560 	}
561 
562 	if (!nr_pids) {
563 		/* Cleared the list of pids */
564 		trace_free_pid_list(pid_list);
565 		read = ret;
566 		pid_list = NULL;
567 	}
568 
569 	*new_pid_list = pid_list;
570 
571 	return read;
572 }
573 
574 static cycle_t buffer_ftrace_now(struct trace_buffer *buf, int cpu)
575 {
576 	u64 ts;
577 
578 	/* Early boot up does not have a buffer yet */
579 	if (!buf->buffer)
580 		return trace_clock_local();
581 
582 	ts = ring_buffer_time_stamp(buf->buffer, cpu);
583 	ring_buffer_normalize_time_stamp(buf->buffer, cpu, &ts);
584 
585 	return ts;
586 }
587 
588 cycle_t ftrace_now(int cpu)
589 {
590 	return buffer_ftrace_now(&global_trace.trace_buffer, cpu);
591 }
592 
593 /**
594  * tracing_is_enabled - Show if global_trace has been disabled
595  *
596  * Shows if the global trace has been enabled or not. It uses the
597  * mirror flag "buffer_disabled" to be used in fast paths such as for
598  * the irqsoff tracer. But it may be inaccurate due to races. If you
599  * need to know the accurate state, use tracing_is_on() which is a little
600  * slower, but accurate.
601  */
602 int tracing_is_enabled(void)
603 {
604 	/*
605 	 * For quick access (irqsoff uses this in fast path), just
606 	 * return the mirror variable of the state of the ring buffer.
607 	 * It's a little racy, but we don't really care.
608 	 */
609 	smp_rmb();
610 	return !global_trace.buffer_disabled;
611 }
612 
613 /*
614  * trace_buf_size is the size in bytes that is allocated
615  * for a buffer. Note, the number of bytes is always rounded
616  * to page size.
617  *
618  * This number is purposely set to a low number of 16384.
619  * If the dump on oops happens, it will be much appreciated
620  * to not have to wait for all that output. Anyway this can be
621  * boot time and run time configurable.
622  */
623 #define TRACE_BUF_SIZE_DEFAULT	1441792UL /* 16384 * 88 (sizeof(entry)) */
624 
625 static unsigned long		trace_buf_size = TRACE_BUF_SIZE_DEFAULT;
626 
627 /* trace_types holds a link list of available tracers. */
628 static struct tracer		*trace_types __read_mostly;
629 
630 /*
631  * trace_types_lock is used to protect the trace_types list.
632  */
633 DEFINE_MUTEX(trace_types_lock);
634 
635 /*
636  * serialize the access of the ring buffer
637  *
638  * ring buffer serializes readers, but it is low level protection.
639  * The validity of the events (which returns by ring_buffer_peek() ..etc)
640  * are not protected by ring buffer.
641  *
642  * The content of events may become garbage if we allow other process consumes
643  * these events concurrently:
644  *   A) the page of the consumed events may become a normal page
645  *      (not reader page) in ring buffer, and this page will be rewrited
646  *      by events producer.
647  *   B) The page of the consumed events may become a page for splice_read,
648  *      and this page will be returned to system.
649  *
650  * These primitives allow multi process access to different cpu ring buffer
651  * concurrently.
652  *
653  * These primitives don't distinguish read-only and read-consume access.
654  * Multi read-only access are also serialized.
655  */
656 
657 #ifdef CONFIG_SMP
658 static DECLARE_RWSEM(all_cpu_access_lock);
659 static DEFINE_PER_CPU(struct mutex, cpu_access_lock);
660 
661 static inline void trace_access_lock(int cpu)
662 {
663 	if (cpu == RING_BUFFER_ALL_CPUS) {
664 		/* gain it for accessing the whole ring buffer. */
665 		down_write(&all_cpu_access_lock);
666 	} else {
667 		/* gain it for accessing a cpu ring buffer. */
668 
669 		/* Firstly block other trace_access_lock(RING_BUFFER_ALL_CPUS). */
670 		down_read(&all_cpu_access_lock);
671 
672 		/* Secondly block other access to this @cpu ring buffer. */
673 		mutex_lock(&per_cpu(cpu_access_lock, cpu));
674 	}
675 }
676 
677 static inline void trace_access_unlock(int cpu)
678 {
679 	if (cpu == RING_BUFFER_ALL_CPUS) {
680 		up_write(&all_cpu_access_lock);
681 	} else {
682 		mutex_unlock(&per_cpu(cpu_access_lock, cpu));
683 		up_read(&all_cpu_access_lock);
684 	}
685 }
686 
687 static inline void trace_access_lock_init(void)
688 {
689 	int cpu;
690 
691 	for_each_possible_cpu(cpu)
692 		mutex_init(&per_cpu(cpu_access_lock, cpu));
693 }
694 
695 #else
696 
697 static DEFINE_MUTEX(access_lock);
698 
699 static inline void trace_access_lock(int cpu)
700 {
701 	(void)cpu;
702 	mutex_lock(&access_lock);
703 }
704 
705 static inline void trace_access_unlock(int cpu)
706 {
707 	(void)cpu;
708 	mutex_unlock(&access_lock);
709 }
710 
711 static inline void trace_access_lock_init(void)
712 {
713 }
714 
715 #endif
716 
717 #ifdef CONFIG_STACKTRACE
718 static void __ftrace_trace_stack(struct ring_buffer *buffer,
719 				 unsigned long flags,
720 				 int skip, int pc, struct pt_regs *regs);
721 static inline void ftrace_trace_stack(struct trace_array *tr,
722 				      struct ring_buffer *buffer,
723 				      unsigned long flags,
724 				      int skip, int pc, struct pt_regs *regs);
725 
726 #else
727 static inline void __ftrace_trace_stack(struct ring_buffer *buffer,
728 					unsigned long flags,
729 					int skip, int pc, struct pt_regs *regs)
730 {
731 }
732 static inline void ftrace_trace_stack(struct trace_array *tr,
733 				      struct ring_buffer *buffer,
734 				      unsigned long flags,
735 				      int skip, int pc, struct pt_regs *regs)
736 {
737 }
738 
739 #endif
740 
741 static void tracer_tracing_on(struct trace_array *tr)
742 {
743 	if (tr->trace_buffer.buffer)
744 		ring_buffer_record_on(tr->trace_buffer.buffer);
745 	/*
746 	 * This flag is looked at when buffers haven't been allocated
747 	 * yet, or by some tracers (like irqsoff), that just want to
748 	 * know if the ring buffer has been disabled, but it can handle
749 	 * races of where it gets disabled but we still do a record.
750 	 * As the check is in the fast path of the tracers, it is more
751 	 * important to be fast than accurate.
752 	 */
753 	tr->buffer_disabled = 0;
754 	/* Make the flag seen by readers */
755 	smp_wmb();
756 }
757 
758 /**
759  * tracing_on - enable tracing buffers
760  *
761  * This function enables tracing buffers that may have been
762  * disabled with tracing_off.
763  */
764 void tracing_on(void)
765 {
766 	tracer_tracing_on(&global_trace);
767 }
768 EXPORT_SYMBOL_GPL(tracing_on);
769 
770 /**
771  * __trace_puts - write a constant string into the trace buffer.
772  * @ip:	   The address of the caller
773  * @str:   The constant string to write
774  * @size:  The size of the string.
775  */
776 int __trace_puts(unsigned long ip, const char *str, int size)
777 {
778 	struct ring_buffer_event *event;
779 	struct ring_buffer *buffer;
780 	struct print_entry *entry;
781 	unsigned long irq_flags;
782 	int alloc;
783 	int pc;
784 
785 	if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
786 		return 0;
787 
788 	pc = preempt_count();
789 
790 	if (unlikely(tracing_selftest_running || tracing_disabled))
791 		return 0;
792 
793 	alloc = sizeof(*entry) + size + 2; /* possible \n added */
794 
795 	local_save_flags(irq_flags);
796 	buffer = global_trace.trace_buffer.buffer;
797 	event = trace_buffer_lock_reserve(buffer, TRACE_PRINT, alloc,
798 					  irq_flags, pc);
799 	if (!event)
800 		return 0;
801 
802 	entry = ring_buffer_event_data(event);
803 	entry->ip = ip;
804 
805 	memcpy(&entry->buf, str, size);
806 
807 	/* Add a newline if necessary */
808 	if (entry->buf[size - 1] != '\n') {
809 		entry->buf[size] = '\n';
810 		entry->buf[size + 1] = '\0';
811 	} else
812 		entry->buf[size] = '\0';
813 
814 	__buffer_unlock_commit(buffer, event);
815 	ftrace_trace_stack(&global_trace, buffer, irq_flags, 4, pc, NULL);
816 
817 	return size;
818 }
819 EXPORT_SYMBOL_GPL(__trace_puts);
820 
821 /**
822  * __trace_bputs - write the pointer to a constant string into trace buffer
823  * @ip:	   The address of the caller
824  * @str:   The constant string to write to the buffer to
825  */
826 int __trace_bputs(unsigned long ip, const char *str)
827 {
828 	struct ring_buffer_event *event;
829 	struct ring_buffer *buffer;
830 	struct bputs_entry *entry;
831 	unsigned long irq_flags;
832 	int size = sizeof(struct bputs_entry);
833 	int pc;
834 
835 	if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
836 		return 0;
837 
838 	pc = preempt_count();
839 
840 	if (unlikely(tracing_selftest_running || tracing_disabled))
841 		return 0;
842 
843 	local_save_flags(irq_flags);
844 	buffer = global_trace.trace_buffer.buffer;
845 	event = trace_buffer_lock_reserve(buffer, TRACE_BPUTS, size,
846 					  irq_flags, pc);
847 	if (!event)
848 		return 0;
849 
850 	entry = ring_buffer_event_data(event);
851 	entry->ip			= ip;
852 	entry->str			= str;
853 
854 	__buffer_unlock_commit(buffer, event);
855 	ftrace_trace_stack(&global_trace, buffer, irq_flags, 4, pc, NULL);
856 
857 	return 1;
858 }
859 EXPORT_SYMBOL_GPL(__trace_bputs);
860 
861 #ifdef CONFIG_TRACER_SNAPSHOT
862 /**
863  * trace_snapshot - take a snapshot of the current buffer.
864  *
865  * This causes a swap between the snapshot buffer and the current live
866  * tracing buffer. You can use this to take snapshots of the live
867  * trace when some condition is triggered, but continue to trace.
868  *
869  * Note, make sure to allocate the snapshot with either
870  * a tracing_snapshot_alloc(), or by doing it manually
871  * with: echo 1 > /sys/kernel/debug/tracing/snapshot
872  *
873  * If the snapshot buffer is not allocated, it will stop tracing.
874  * Basically making a permanent snapshot.
875  */
876 void tracing_snapshot(void)
877 {
878 	struct trace_array *tr = &global_trace;
879 	struct tracer *tracer = tr->current_trace;
880 	unsigned long flags;
881 
882 	if (in_nmi()) {
883 		internal_trace_puts("*** SNAPSHOT CALLED FROM NMI CONTEXT ***\n");
884 		internal_trace_puts("*** snapshot is being ignored        ***\n");
885 		return;
886 	}
887 
888 	if (!tr->allocated_snapshot) {
889 		internal_trace_puts("*** SNAPSHOT NOT ALLOCATED ***\n");
890 		internal_trace_puts("*** stopping trace here!   ***\n");
891 		tracing_off();
892 		return;
893 	}
894 
895 	/* Note, snapshot can not be used when the tracer uses it */
896 	if (tracer->use_max_tr) {
897 		internal_trace_puts("*** LATENCY TRACER ACTIVE ***\n");
898 		internal_trace_puts("*** Can not use snapshot (sorry) ***\n");
899 		return;
900 	}
901 
902 	local_irq_save(flags);
903 	update_max_tr(tr, current, smp_processor_id());
904 	local_irq_restore(flags);
905 }
906 EXPORT_SYMBOL_GPL(tracing_snapshot);
907 
908 static int resize_buffer_duplicate_size(struct trace_buffer *trace_buf,
909 					struct trace_buffer *size_buf, int cpu_id);
910 static void set_buffer_entries(struct trace_buffer *buf, unsigned long val);
911 
912 static int alloc_snapshot(struct trace_array *tr)
913 {
914 	int ret;
915 
916 	if (!tr->allocated_snapshot) {
917 
918 		/* allocate spare buffer */
919 		ret = resize_buffer_duplicate_size(&tr->max_buffer,
920 				   &tr->trace_buffer, RING_BUFFER_ALL_CPUS);
921 		if (ret < 0)
922 			return ret;
923 
924 		tr->allocated_snapshot = true;
925 	}
926 
927 	return 0;
928 }
929 
930 static void free_snapshot(struct trace_array *tr)
931 {
932 	/*
933 	 * We don't free the ring buffer. instead, resize it because
934 	 * The max_tr ring buffer has some state (e.g. ring->clock) and
935 	 * we want preserve it.
936 	 */
937 	ring_buffer_resize(tr->max_buffer.buffer, 1, RING_BUFFER_ALL_CPUS);
938 	set_buffer_entries(&tr->max_buffer, 1);
939 	tracing_reset_online_cpus(&tr->max_buffer);
940 	tr->allocated_snapshot = false;
941 }
942 
943 /**
944  * tracing_alloc_snapshot - allocate snapshot buffer.
945  *
946  * This only allocates the snapshot buffer if it isn't already
947  * allocated - it doesn't also take a snapshot.
948  *
949  * This is meant to be used in cases where the snapshot buffer needs
950  * to be set up for events that can't sleep but need to be able to
951  * trigger a snapshot.
952  */
953 int tracing_alloc_snapshot(void)
954 {
955 	struct trace_array *tr = &global_trace;
956 	int ret;
957 
958 	ret = alloc_snapshot(tr);
959 	WARN_ON(ret < 0);
960 
961 	return ret;
962 }
963 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
964 
965 /**
966  * trace_snapshot_alloc - allocate and take a snapshot of the current buffer.
967  *
968  * This is similar to trace_snapshot(), but it will allocate the
969  * snapshot buffer if it isn't already allocated. Use this only
970  * where it is safe to sleep, as the allocation may sleep.
971  *
972  * This causes a swap between the snapshot buffer and the current live
973  * tracing buffer. You can use this to take snapshots of the live
974  * trace when some condition is triggered, but continue to trace.
975  */
976 void tracing_snapshot_alloc(void)
977 {
978 	int ret;
979 
980 	ret = tracing_alloc_snapshot();
981 	if (ret < 0)
982 		return;
983 
984 	tracing_snapshot();
985 }
986 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
987 #else
988 void tracing_snapshot(void)
989 {
990 	WARN_ONCE(1, "Snapshot feature not enabled, but internal snapshot used");
991 }
992 EXPORT_SYMBOL_GPL(tracing_snapshot);
993 int tracing_alloc_snapshot(void)
994 {
995 	WARN_ONCE(1, "Snapshot feature not enabled, but snapshot allocation used");
996 	return -ENODEV;
997 }
998 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
999 void tracing_snapshot_alloc(void)
1000 {
1001 	/* Give warning */
1002 	tracing_snapshot();
1003 }
1004 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
1005 #endif /* CONFIG_TRACER_SNAPSHOT */
1006 
1007 static void tracer_tracing_off(struct trace_array *tr)
1008 {
1009 	if (tr->trace_buffer.buffer)
1010 		ring_buffer_record_off(tr->trace_buffer.buffer);
1011 	/*
1012 	 * This flag is looked at when buffers haven't been allocated
1013 	 * yet, or by some tracers (like irqsoff), that just want to
1014 	 * know if the ring buffer has been disabled, but it can handle
1015 	 * races of where it gets disabled but we still do a record.
1016 	 * As the check is in the fast path of the tracers, it is more
1017 	 * important to be fast than accurate.
1018 	 */
1019 	tr->buffer_disabled = 1;
1020 	/* Make the flag seen by readers */
1021 	smp_wmb();
1022 }
1023 
1024 /**
1025  * tracing_off - turn off tracing buffers
1026  *
1027  * This function stops the tracing buffers from recording data.
1028  * It does not disable any overhead the tracers themselves may
1029  * be causing. This function simply causes all recording to
1030  * the ring buffers to fail.
1031  */
1032 void tracing_off(void)
1033 {
1034 	tracer_tracing_off(&global_trace);
1035 }
1036 EXPORT_SYMBOL_GPL(tracing_off);
1037 
1038 void disable_trace_on_warning(void)
1039 {
1040 	if (__disable_trace_on_warning)
1041 		tracing_off();
1042 }
1043 
1044 /**
1045  * tracer_tracing_is_on - show real state of ring buffer enabled
1046  * @tr : the trace array to know if ring buffer is enabled
1047  *
1048  * Shows real state of the ring buffer if it is enabled or not.
1049  */
1050 int tracer_tracing_is_on(struct trace_array *tr)
1051 {
1052 	if (tr->trace_buffer.buffer)
1053 		return ring_buffer_record_is_on(tr->trace_buffer.buffer);
1054 	return !tr->buffer_disabled;
1055 }
1056 
1057 /**
1058  * tracing_is_on - show state of ring buffers enabled
1059  */
1060 int tracing_is_on(void)
1061 {
1062 	return tracer_tracing_is_on(&global_trace);
1063 }
1064 EXPORT_SYMBOL_GPL(tracing_is_on);
1065 
1066 static int __init set_buf_size(char *str)
1067 {
1068 	unsigned long buf_size;
1069 
1070 	if (!str)
1071 		return 0;
1072 	buf_size = memparse(str, &str);
1073 	/* nr_entries can not be zero */
1074 	if (buf_size == 0)
1075 		return 0;
1076 	trace_buf_size = buf_size;
1077 	return 1;
1078 }
1079 __setup("trace_buf_size=", set_buf_size);
1080 
1081 static int __init set_tracing_thresh(char *str)
1082 {
1083 	unsigned long threshold;
1084 	int ret;
1085 
1086 	if (!str)
1087 		return 0;
1088 	ret = kstrtoul(str, 0, &threshold);
1089 	if (ret < 0)
1090 		return 0;
1091 	tracing_thresh = threshold * 1000;
1092 	return 1;
1093 }
1094 __setup("tracing_thresh=", set_tracing_thresh);
1095 
1096 unsigned long nsecs_to_usecs(unsigned long nsecs)
1097 {
1098 	return nsecs / 1000;
1099 }
1100 
1101 /*
1102  * TRACE_FLAGS is defined as a tuple matching bit masks with strings.
1103  * It uses C(a, b) where 'a' is the enum name and 'b' is the string that
1104  * matches it. By defining "C(a, b) b", TRACE_FLAGS becomes a list
1105  * of strings in the order that the enums were defined.
1106  */
1107 #undef C
1108 #define C(a, b) b
1109 
1110 /* These must match the bit postions in trace_iterator_flags */
1111 static const char *trace_options[] = {
1112 	TRACE_FLAGS
1113 	NULL
1114 };
1115 
1116 static struct {
1117 	u64 (*func)(void);
1118 	const char *name;
1119 	int in_ns;		/* is this clock in nanoseconds? */
1120 } trace_clocks[] = {
1121 	{ trace_clock_local,		"local",	1 },
1122 	{ trace_clock_global,		"global",	1 },
1123 	{ trace_clock_counter,		"counter",	0 },
1124 	{ trace_clock_jiffies,		"uptime",	0 },
1125 	{ trace_clock,			"perf",		1 },
1126 	{ ktime_get_mono_fast_ns,	"mono",		1 },
1127 	{ ktime_get_raw_fast_ns,	"mono_raw",	1 },
1128 	{ ktime_get_boot_fast_ns,	"boot",		1 },
1129 	ARCH_TRACE_CLOCKS
1130 };
1131 
1132 /*
1133  * trace_parser_get_init - gets the buffer for trace parser
1134  */
1135 int trace_parser_get_init(struct trace_parser *parser, int size)
1136 {
1137 	memset(parser, 0, sizeof(*parser));
1138 
1139 	parser->buffer = kmalloc(size, GFP_KERNEL);
1140 	if (!parser->buffer)
1141 		return 1;
1142 
1143 	parser->size = size;
1144 	return 0;
1145 }
1146 
1147 /*
1148  * trace_parser_put - frees the buffer for trace parser
1149  */
1150 void trace_parser_put(struct trace_parser *parser)
1151 {
1152 	kfree(parser->buffer);
1153 }
1154 
1155 /*
1156  * trace_get_user - reads the user input string separated by  space
1157  * (matched by isspace(ch))
1158  *
1159  * For each string found the 'struct trace_parser' is updated,
1160  * and the function returns.
1161  *
1162  * Returns number of bytes read.
1163  *
1164  * See kernel/trace/trace.h for 'struct trace_parser' details.
1165  */
1166 int trace_get_user(struct trace_parser *parser, const char __user *ubuf,
1167 	size_t cnt, loff_t *ppos)
1168 {
1169 	char ch;
1170 	size_t read = 0;
1171 	ssize_t ret;
1172 
1173 	if (!*ppos)
1174 		trace_parser_clear(parser);
1175 
1176 	ret = get_user(ch, ubuf++);
1177 	if (ret)
1178 		goto out;
1179 
1180 	read++;
1181 	cnt--;
1182 
1183 	/*
1184 	 * The parser is not finished with the last write,
1185 	 * continue reading the user input without skipping spaces.
1186 	 */
1187 	if (!parser->cont) {
1188 		/* skip white space */
1189 		while (cnt && isspace(ch)) {
1190 			ret = get_user(ch, ubuf++);
1191 			if (ret)
1192 				goto out;
1193 			read++;
1194 			cnt--;
1195 		}
1196 
1197 		/* only spaces were written */
1198 		if (isspace(ch)) {
1199 			*ppos += read;
1200 			ret = read;
1201 			goto out;
1202 		}
1203 
1204 		parser->idx = 0;
1205 	}
1206 
1207 	/* read the non-space input */
1208 	while (cnt && !isspace(ch)) {
1209 		if (parser->idx < parser->size - 1)
1210 			parser->buffer[parser->idx++] = ch;
1211 		else {
1212 			ret = -EINVAL;
1213 			goto out;
1214 		}
1215 		ret = get_user(ch, ubuf++);
1216 		if (ret)
1217 			goto out;
1218 		read++;
1219 		cnt--;
1220 	}
1221 
1222 	/* We either got finished input or we have to wait for another call. */
1223 	if (isspace(ch)) {
1224 		parser->buffer[parser->idx] = 0;
1225 		parser->cont = false;
1226 	} else if (parser->idx < parser->size - 1) {
1227 		parser->cont = true;
1228 		parser->buffer[parser->idx++] = ch;
1229 	} else {
1230 		ret = -EINVAL;
1231 		goto out;
1232 	}
1233 
1234 	*ppos += read;
1235 	ret = read;
1236 
1237 out:
1238 	return ret;
1239 }
1240 
1241 /* TODO add a seq_buf_to_buffer() */
1242 static ssize_t trace_seq_to_buffer(struct trace_seq *s, void *buf, size_t cnt)
1243 {
1244 	int len;
1245 
1246 	if (trace_seq_used(s) <= s->seq.readpos)
1247 		return -EBUSY;
1248 
1249 	len = trace_seq_used(s) - s->seq.readpos;
1250 	if (cnt > len)
1251 		cnt = len;
1252 	memcpy(buf, s->buffer + s->seq.readpos, cnt);
1253 
1254 	s->seq.readpos += cnt;
1255 	return cnt;
1256 }
1257 
1258 unsigned long __read_mostly	tracing_thresh;
1259 
1260 #ifdef CONFIG_TRACER_MAX_TRACE
1261 /*
1262  * Copy the new maximum trace into the separate maximum-trace
1263  * structure. (this way the maximum trace is permanently saved,
1264  * for later retrieval via /sys/kernel/debug/tracing/latency_trace)
1265  */
1266 static void
1267 __update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
1268 {
1269 	struct trace_buffer *trace_buf = &tr->trace_buffer;
1270 	struct trace_buffer *max_buf = &tr->max_buffer;
1271 	struct trace_array_cpu *data = per_cpu_ptr(trace_buf->data, cpu);
1272 	struct trace_array_cpu *max_data = per_cpu_ptr(max_buf->data, cpu);
1273 
1274 	max_buf->cpu = cpu;
1275 	max_buf->time_start = data->preempt_timestamp;
1276 
1277 	max_data->saved_latency = tr->max_latency;
1278 	max_data->critical_start = data->critical_start;
1279 	max_data->critical_end = data->critical_end;
1280 
1281 	memcpy(max_data->comm, tsk->comm, TASK_COMM_LEN);
1282 	max_data->pid = tsk->pid;
1283 	/*
1284 	 * If tsk == current, then use current_uid(), as that does not use
1285 	 * RCU. The irq tracer can be called out of RCU scope.
1286 	 */
1287 	if (tsk == current)
1288 		max_data->uid = current_uid();
1289 	else
1290 		max_data->uid = task_uid(tsk);
1291 
1292 	max_data->nice = tsk->static_prio - 20 - MAX_RT_PRIO;
1293 	max_data->policy = tsk->policy;
1294 	max_data->rt_priority = tsk->rt_priority;
1295 
1296 	/* record this tasks comm */
1297 	tracing_record_cmdline(tsk);
1298 }
1299 
1300 /**
1301  * update_max_tr - snapshot all trace buffers from global_trace to max_tr
1302  * @tr: tracer
1303  * @tsk: the task with the latency
1304  * @cpu: The cpu that initiated the trace.
1305  *
1306  * Flip the buffers between the @tr and the max_tr and record information
1307  * about which task was the cause of this latency.
1308  */
1309 void
1310 update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
1311 {
1312 	struct ring_buffer *buf;
1313 
1314 	if (tr->stop_count)
1315 		return;
1316 
1317 	WARN_ON_ONCE(!irqs_disabled());
1318 
1319 	if (!tr->allocated_snapshot) {
1320 		/* Only the nop tracer should hit this when disabling */
1321 		WARN_ON_ONCE(tr->current_trace != &nop_trace);
1322 		return;
1323 	}
1324 
1325 	arch_spin_lock(&tr->max_lock);
1326 
1327 	buf = tr->trace_buffer.buffer;
1328 	tr->trace_buffer.buffer = tr->max_buffer.buffer;
1329 	tr->max_buffer.buffer = buf;
1330 
1331 	__update_max_tr(tr, tsk, cpu);
1332 	arch_spin_unlock(&tr->max_lock);
1333 }
1334 
1335 /**
1336  * update_max_tr_single - only copy one trace over, and reset the rest
1337  * @tr - tracer
1338  * @tsk - task with the latency
1339  * @cpu - the cpu of the buffer to copy.
1340  *
1341  * Flip the trace of a single CPU buffer between the @tr and the max_tr.
1342  */
1343 void
1344 update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu)
1345 {
1346 	int ret;
1347 
1348 	if (tr->stop_count)
1349 		return;
1350 
1351 	WARN_ON_ONCE(!irqs_disabled());
1352 	if (!tr->allocated_snapshot) {
1353 		/* Only the nop tracer should hit this when disabling */
1354 		WARN_ON_ONCE(tr->current_trace != &nop_trace);
1355 		return;
1356 	}
1357 
1358 	arch_spin_lock(&tr->max_lock);
1359 
1360 	ret = ring_buffer_swap_cpu(tr->max_buffer.buffer, tr->trace_buffer.buffer, cpu);
1361 
1362 	if (ret == -EBUSY) {
1363 		/*
1364 		 * We failed to swap the buffer due to a commit taking
1365 		 * place on this CPU. We fail to record, but we reset
1366 		 * the max trace buffer (no one writes directly to it)
1367 		 * and flag that it failed.
1368 		 */
1369 		trace_array_printk_buf(tr->max_buffer.buffer, _THIS_IP_,
1370 			"Failed to swap buffers due to commit in progress\n");
1371 	}
1372 
1373 	WARN_ON_ONCE(ret && ret != -EAGAIN && ret != -EBUSY);
1374 
1375 	__update_max_tr(tr, tsk, cpu);
1376 	arch_spin_unlock(&tr->max_lock);
1377 }
1378 #endif /* CONFIG_TRACER_MAX_TRACE */
1379 
1380 static int wait_on_pipe(struct trace_iterator *iter, bool full)
1381 {
1382 	/* Iterators are static, they should be filled or empty */
1383 	if (trace_buffer_iter(iter, iter->cpu_file))
1384 		return 0;
1385 
1386 	return ring_buffer_wait(iter->trace_buffer->buffer, iter->cpu_file,
1387 				full);
1388 }
1389 
1390 #ifdef CONFIG_FTRACE_STARTUP_TEST
1391 static int run_tracer_selftest(struct tracer *type)
1392 {
1393 	struct trace_array *tr = &global_trace;
1394 	struct tracer *saved_tracer = tr->current_trace;
1395 	int ret;
1396 
1397 	if (!type->selftest || tracing_selftest_disabled)
1398 		return 0;
1399 
1400 	/*
1401 	 * Run a selftest on this tracer.
1402 	 * Here we reset the trace buffer, and set the current
1403 	 * tracer to be this tracer. The tracer can then run some
1404 	 * internal tracing to verify that everything is in order.
1405 	 * If we fail, we do not register this tracer.
1406 	 */
1407 	tracing_reset_online_cpus(&tr->trace_buffer);
1408 
1409 	tr->current_trace = type;
1410 
1411 #ifdef CONFIG_TRACER_MAX_TRACE
1412 	if (type->use_max_tr) {
1413 		/* If we expanded the buffers, make sure the max is expanded too */
1414 		if (ring_buffer_expanded)
1415 			ring_buffer_resize(tr->max_buffer.buffer, trace_buf_size,
1416 					   RING_BUFFER_ALL_CPUS);
1417 		tr->allocated_snapshot = true;
1418 	}
1419 #endif
1420 
1421 	/* the test is responsible for initializing and enabling */
1422 	pr_info("Testing tracer %s: ", type->name);
1423 	ret = type->selftest(type, tr);
1424 	/* the test is responsible for resetting too */
1425 	tr->current_trace = saved_tracer;
1426 	if (ret) {
1427 		printk(KERN_CONT "FAILED!\n");
1428 		/* Add the warning after printing 'FAILED' */
1429 		WARN_ON(1);
1430 		return -1;
1431 	}
1432 	/* Only reset on passing, to avoid touching corrupted buffers */
1433 	tracing_reset_online_cpus(&tr->trace_buffer);
1434 
1435 #ifdef CONFIG_TRACER_MAX_TRACE
1436 	if (type->use_max_tr) {
1437 		tr->allocated_snapshot = false;
1438 
1439 		/* Shrink the max buffer again */
1440 		if (ring_buffer_expanded)
1441 			ring_buffer_resize(tr->max_buffer.buffer, 1,
1442 					   RING_BUFFER_ALL_CPUS);
1443 	}
1444 #endif
1445 
1446 	printk(KERN_CONT "PASSED\n");
1447 	return 0;
1448 }
1449 #else
1450 static inline int run_tracer_selftest(struct tracer *type)
1451 {
1452 	return 0;
1453 }
1454 #endif /* CONFIG_FTRACE_STARTUP_TEST */
1455 
1456 static void add_tracer_options(struct trace_array *tr, struct tracer *t);
1457 
1458 static void __init apply_trace_boot_options(void);
1459 
1460 /**
1461  * register_tracer - register a tracer with the ftrace system.
1462  * @type - the plugin for the tracer
1463  *
1464  * Register a new plugin tracer.
1465  */
1466 int __init register_tracer(struct tracer *type)
1467 {
1468 	struct tracer *t;
1469 	int ret = 0;
1470 
1471 	if (!type->name) {
1472 		pr_info("Tracer must have a name\n");
1473 		return -1;
1474 	}
1475 
1476 	if (strlen(type->name) >= MAX_TRACER_SIZE) {
1477 		pr_info("Tracer has a name longer than %d\n", MAX_TRACER_SIZE);
1478 		return -1;
1479 	}
1480 
1481 	mutex_lock(&trace_types_lock);
1482 
1483 	tracing_selftest_running = true;
1484 
1485 	for (t = trace_types; t; t = t->next) {
1486 		if (strcmp(type->name, t->name) == 0) {
1487 			/* already found */
1488 			pr_info("Tracer %s already registered\n",
1489 				type->name);
1490 			ret = -1;
1491 			goto out;
1492 		}
1493 	}
1494 
1495 	if (!type->set_flag)
1496 		type->set_flag = &dummy_set_flag;
1497 	if (!type->flags) {
1498 		/*allocate a dummy tracer_flags*/
1499 		type->flags = kmalloc(sizeof(*type->flags), GFP_KERNEL);
1500 		if (!type->flags) {
1501 			ret = -ENOMEM;
1502 			goto out;
1503 		}
1504 		type->flags->val = 0;
1505 		type->flags->opts = dummy_tracer_opt;
1506 	} else
1507 		if (!type->flags->opts)
1508 			type->flags->opts = dummy_tracer_opt;
1509 
1510 	/* store the tracer for __set_tracer_option */
1511 	type->flags->trace = type;
1512 
1513 	ret = run_tracer_selftest(type);
1514 	if (ret < 0)
1515 		goto out;
1516 
1517 	type->next = trace_types;
1518 	trace_types = type;
1519 	add_tracer_options(&global_trace, type);
1520 
1521  out:
1522 	tracing_selftest_running = false;
1523 	mutex_unlock(&trace_types_lock);
1524 
1525 	if (ret || !default_bootup_tracer)
1526 		goto out_unlock;
1527 
1528 	if (strncmp(default_bootup_tracer, type->name, MAX_TRACER_SIZE))
1529 		goto out_unlock;
1530 
1531 	printk(KERN_INFO "Starting tracer '%s'\n", type->name);
1532 	/* Do we want this tracer to start on bootup? */
1533 	tracing_set_tracer(&global_trace, type->name);
1534 	default_bootup_tracer = NULL;
1535 
1536 	apply_trace_boot_options();
1537 
1538 	/* disable other selftests, since this will break it. */
1539 	tracing_selftest_disabled = true;
1540 #ifdef CONFIG_FTRACE_STARTUP_TEST
1541 	printk(KERN_INFO "Disabling FTRACE selftests due to running tracer '%s'\n",
1542 	       type->name);
1543 #endif
1544 
1545  out_unlock:
1546 	return ret;
1547 }
1548 
1549 void tracing_reset(struct trace_buffer *buf, int cpu)
1550 {
1551 	struct ring_buffer *buffer = buf->buffer;
1552 
1553 	if (!buffer)
1554 		return;
1555 
1556 	ring_buffer_record_disable(buffer);
1557 
1558 	/* Make sure all commits have finished */
1559 	synchronize_sched();
1560 	ring_buffer_reset_cpu(buffer, cpu);
1561 
1562 	ring_buffer_record_enable(buffer);
1563 }
1564 
1565 void tracing_reset_online_cpus(struct trace_buffer *buf)
1566 {
1567 	struct ring_buffer *buffer = buf->buffer;
1568 	int cpu;
1569 
1570 	if (!buffer)
1571 		return;
1572 
1573 	ring_buffer_record_disable(buffer);
1574 
1575 	/* Make sure all commits have finished */
1576 	synchronize_sched();
1577 
1578 	buf->time_start = buffer_ftrace_now(buf, buf->cpu);
1579 
1580 	for_each_online_cpu(cpu)
1581 		ring_buffer_reset_cpu(buffer, cpu);
1582 
1583 	ring_buffer_record_enable(buffer);
1584 }
1585 
1586 /* Must have trace_types_lock held */
1587 void tracing_reset_all_online_cpus(void)
1588 {
1589 	struct trace_array *tr;
1590 
1591 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
1592 		tracing_reset_online_cpus(&tr->trace_buffer);
1593 #ifdef CONFIG_TRACER_MAX_TRACE
1594 		tracing_reset_online_cpus(&tr->max_buffer);
1595 #endif
1596 	}
1597 }
1598 
1599 #define SAVED_CMDLINES_DEFAULT 128
1600 #define NO_CMDLINE_MAP UINT_MAX
1601 static arch_spinlock_t trace_cmdline_lock = __ARCH_SPIN_LOCK_UNLOCKED;
1602 struct saved_cmdlines_buffer {
1603 	unsigned map_pid_to_cmdline[PID_MAX_DEFAULT+1];
1604 	unsigned *map_cmdline_to_pid;
1605 	unsigned cmdline_num;
1606 	int cmdline_idx;
1607 	char *saved_cmdlines;
1608 };
1609 static struct saved_cmdlines_buffer *savedcmd;
1610 
1611 /* temporary disable recording */
1612 static atomic_t trace_record_cmdline_disabled __read_mostly;
1613 
1614 static inline char *get_saved_cmdlines(int idx)
1615 {
1616 	return &savedcmd->saved_cmdlines[idx * TASK_COMM_LEN];
1617 }
1618 
1619 static inline void set_cmdline(int idx, const char *cmdline)
1620 {
1621 	memcpy(get_saved_cmdlines(idx), cmdline, TASK_COMM_LEN);
1622 }
1623 
1624 static int allocate_cmdlines_buffer(unsigned int val,
1625 				    struct saved_cmdlines_buffer *s)
1626 {
1627 	s->map_cmdline_to_pid = kmalloc(val * sizeof(*s->map_cmdline_to_pid),
1628 					GFP_KERNEL);
1629 	if (!s->map_cmdline_to_pid)
1630 		return -ENOMEM;
1631 
1632 	s->saved_cmdlines = kmalloc(val * TASK_COMM_LEN, GFP_KERNEL);
1633 	if (!s->saved_cmdlines) {
1634 		kfree(s->map_cmdline_to_pid);
1635 		return -ENOMEM;
1636 	}
1637 
1638 	s->cmdline_idx = 0;
1639 	s->cmdline_num = val;
1640 	memset(&s->map_pid_to_cmdline, NO_CMDLINE_MAP,
1641 	       sizeof(s->map_pid_to_cmdline));
1642 	memset(s->map_cmdline_to_pid, NO_CMDLINE_MAP,
1643 	       val * sizeof(*s->map_cmdline_to_pid));
1644 
1645 	return 0;
1646 }
1647 
1648 static int trace_create_savedcmd(void)
1649 {
1650 	int ret;
1651 
1652 	savedcmd = kmalloc(sizeof(*savedcmd), GFP_KERNEL);
1653 	if (!savedcmd)
1654 		return -ENOMEM;
1655 
1656 	ret = allocate_cmdlines_buffer(SAVED_CMDLINES_DEFAULT, savedcmd);
1657 	if (ret < 0) {
1658 		kfree(savedcmd);
1659 		savedcmd = NULL;
1660 		return -ENOMEM;
1661 	}
1662 
1663 	return 0;
1664 }
1665 
1666 int is_tracing_stopped(void)
1667 {
1668 	return global_trace.stop_count;
1669 }
1670 
1671 /**
1672  * tracing_start - quick start of the tracer
1673  *
1674  * If tracing is enabled but was stopped by tracing_stop,
1675  * this will start the tracer back up.
1676  */
1677 void tracing_start(void)
1678 {
1679 	struct ring_buffer *buffer;
1680 	unsigned long flags;
1681 
1682 	if (tracing_disabled)
1683 		return;
1684 
1685 	raw_spin_lock_irqsave(&global_trace.start_lock, flags);
1686 	if (--global_trace.stop_count) {
1687 		if (global_trace.stop_count < 0) {
1688 			/* Someone screwed up their debugging */
1689 			WARN_ON_ONCE(1);
1690 			global_trace.stop_count = 0;
1691 		}
1692 		goto out;
1693 	}
1694 
1695 	/* Prevent the buffers from switching */
1696 	arch_spin_lock(&global_trace.max_lock);
1697 
1698 	buffer = global_trace.trace_buffer.buffer;
1699 	if (buffer)
1700 		ring_buffer_record_enable(buffer);
1701 
1702 #ifdef CONFIG_TRACER_MAX_TRACE
1703 	buffer = global_trace.max_buffer.buffer;
1704 	if (buffer)
1705 		ring_buffer_record_enable(buffer);
1706 #endif
1707 
1708 	arch_spin_unlock(&global_trace.max_lock);
1709 
1710  out:
1711 	raw_spin_unlock_irqrestore(&global_trace.start_lock, flags);
1712 }
1713 
1714 static void tracing_start_tr(struct trace_array *tr)
1715 {
1716 	struct ring_buffer *buffer;
1717 	unsigned long flags;
1718 
1719 	if (tracing_disabled)
1720 		return;
1721 
1722 	/* If global, we need to also start the max tracer */
1723 	if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
1724 		return tracing_start();
1725 
1726 	raw_spin_lock_irqsave(&tr->start_lock, flags);
1727 
1728 	if (--tr->stop_count) {
1729 		if (tr->stop_count < 0) {
1730 			/* Someone screwed up their debugging */
1731 			WARN_ON_ONCE(1);
1732 			tr->stop_count = 0;
1733 		}
1734 		goto out;
1735 	}
1736 
1737 	buffer = tr->trace_buffer.buffer;
1738 	if (buffer)
1739 		ring_buffer_record_enable(buffer);
1740 
1741  out:
1742 	raw_spin_unlock_irqrestore(&tr->start_lock, flags);
1743 }
1744 
1745 /**
1746  * tracing_stop - quick stop of the tracer
1747  *
1748  * Light weight way to stop tracing. Use in conjunction with
1749  * tracing_start.
1750  */
1751 void tracing_stop(void)
1752 {
1753 	struct ring_buffer *buffer;
1754 	unsigned long flags;
1755 
1756 	raw_spin_lock_irqsave(&global_trace.start_lock, flags);
1757 	if (global_trace.stop_count++)
1758 		goto out;
1759 
1760 	/* Prevent the buffers from switching */
1761 	arch_spin_lock(&global_trace.max_lock);
1762 
1763 	buffer = global_trace.trace_buffer.buffer;
1764 	if (buffer)
1765 		ring_buffer_record_disable(buffer);
1766 
1767 #ifdef CONFIG_TRACER_MAX_TRACE
1768 	buffer = global_trace.max_buffer.buffer;
1769 	if (buffer)
1770 		ring_buffer_record_disable(buffer);
1771 #endif
1772 
1773 	arch_spin_unlock(&global_trace.max_lock);
1774 
1775  out:
1776 	raw_spin_unlock_irqrestore(&global_trace.start_lock, flags);
1777 }
1778 
1779 static void tracing_stop_tr(struct trace_array *tr)
1780 {
1781 	struct ring_buffer *buffer;
1782 	unsigned long flags;
1783 
1784 	/* If global, we need to also stop the max tracer */
1785 	if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
1786 		return tracing_stop();
1787 
1788 	raw_spin_lock_irqsave(&tr->start_lock, flags);
1789 	if (tr->stop_count++)
1790 		goto out;
1791 
1792 	buffer = tr->trace_buffer.buffer;
1793 	if (buffer)
1794 		ring_buffer_record_disable(buffer);
1795 
1796  out:
1797 	raw_spin_unlock_irqrestore(&tr->start_lock, flags);
1798 }
1799 
1800 void trace_stop_cmdline_recording(void);
1801 
1802 static int trace_save_cmdline(struct task_struct *tsk)
1803 {
1804 	unsigned pid, idx;
1805 
1806 	if (!tsk->pid || unlikely(tsk->pid > PID_MAX_DEFAULT))
1807 		return 0;
1808 
1809 	/*
1810 	 * It's not the end of the world if we don't get
1811 	 * the lock, but we also don't want to spin
1812 	 * nor do we want to disable interrupts,
1813 	 * so if we miss here, then better luck next time.
1814 	 */
1815 	if (!arch_spin_trylock(&trace_cmdline_lock))
1816 		return 0;
1817 
1818 	idx = savedcmd->map_pid_to_cmdline[tsk->pid];
1819 	if (idx == NO_CMDLINE_MAP) {
1820 		idx = (savedcmd->cmdline_idx + 1) % savedcmd->cmdline_num;
1821 
1822 		/*
1823 		 * Check whether the cmdline buffer at idx has a pid
1824 		 * mapped. We are going to overwrite that entry so we
1825 		 * need to clear the map_pid_to_cmdline. Otherwise we
1826 		 * would read the new comm for the old pid.
1827 		 */
1828 		pid = savedcmd->map_cmdline_to_pid[idx];
1829 		if (pid != NO_CMDLINE_MAP)
1830 			savedcmd->map_pid_to_cmdline[pid] = NO_CMDLINE_MAP;
1831 
1832 		savedcmd->map_cmdline_to_pid[idx] = tsk->pid;
1833 		savedcmd->map_pid_to_cmdline[tsk->pid] = idx;
1834 
1835 		savedcmd->cmdline_idx = idx;
1836 	}
1837 
1838 	set_cmdline(idx, tsk->comm);
1839 
1840 	arch_spin_unlock(&trace_cmdline_lock);
1841 
1842 	return 1;
1843 }
1844 
1845 static void __trace_find_cmdline(int pid, char comm[])
1846 {
1847 	unsigned map;
1848 
1849 	if (!pid) {
1850 		strcpy(comm, "<idle>");
1851 		return;
1852 	}
1853 
1854 	if (WARN_ON_ONCE(pid < 0)) {
1855 		strcpy(comm, "<XXX>");
1856 		return;
1857 	}
1858 
1859 	if (pid > PID_MAX_DEFAULT) {
1860 		strcpy(comm, "<...>");
1861 		return;
1862 	}
1863 
1864 	map = savedcmd->map_pid_to_cmdline[pid];
1865 	if (map != NO_CMDLINE_MAP)
1866 		strcpy(comm, get_saved_cmdlines(map));
1867 	else
1868 		strcpy(comm, "<...>");
1869 }
1870 
1871 void trace_find_cmdline(int pid, char comm[])
1872 {
1873 	preempt_disable();
1874 	arch_spin_lock(&trace_cmdline_lock);
1875 
1876 	__trace_find_cmdline(pid, comm);
1877 
1878 	arch_spin_unlock(&trace_cmdline_lock);
1879 	preempt_enable();
1880 }
1881 
1882 void tracing_record_cmdline(struct task_struct *tsk)
1883 {
1884 	if (atomic_read(&trace_record_cmdline_disabled) || !tracing_is_on())
1885 		return;
1886 
1887 	if (!__this_cpu_read(trace_cmdline_save))
1888 		return;
1889 
1890 	if (trace_save_cmdline(tsk))
1891 		__this_cpu_write(trace_cmdline_save, false);
1892 }
1893 
1894 void
1895 tracing_generic_entry_update(struct trace_entry *entry, unsigned long flags,
1896 			     int pc)
1897 {
1898 	struct task_struct *tsk = current;
1899 
1900 	entry->preempt_count		= pc & 0xff;
1901 	entry->pid			= (tsk) ? tsk->pid : 0;
1902 	entry->flags =
1903 #ifdef CONFIG_TRACE_IRQFLAGS_SUPPORT
1904 		(irqs_disabled_flags(flags) ? TRACE_FLAG_IRQS_OFF : 0) |
1905 #else
1906 		TRACE_FLAG_IRQS_NOSUPPORT |
1907 #endif
1908 		((pc & NMI_MASK    ) ? TRACE_FLAG_NMI     : 0) |
1909 		((pc & HARDIRQ_MASK) ? TRACE_FLAG_HARDIRQ : 0) |
1910 		((pc & SOFTIRQ_MASK) ? TRACE_FLAG_SOFTIRQ : 0) |
1911 		(tif_need_resched() ? TRACE_FLAG_NEED_RESCHED : 0) |
1912 		(test_preempt_need_resched() ? TRACE_FLAG_PREEMPT_RESCHED : 0);
1913 }
1914 EXPORT_SYMBOL_GPL(tracing_generic_entry_update);
1915 
1916 static __always_inline void
1917 trace_event_setup(struct ring_buffer_event *event,
1918 		  int type, unsigned long flags, int pc)
1919 {
1920 	struct trace_entry *ent = ring_buffer_event_data(event);
1921 
1922 	tracing_generic_entry_update(ent, flags, pc);
1923 	ent->type = type;
1924 }
1925 
1926 struct ring_buffer_event *
1927 trace_buffer_lock_reserve(struct ring_buffer *buffer,
1928 			  int type,
1929 			  unsigned long len,
1930 			  unsigned long flags, int pc)
1931 {
1932 	struct ring_buffer_event *event;
1933 
1934 	event = ring_buffer_lock_reserve(buffer, len);
1935 	if (event != NULL)
1936 		trace_event_setup(event, type, flags, pc);
1937 
1938 	return event;
1939 }
1940 
1941 DEFINE_PER_CPU(struct ring_buffer_event *, trace_buffered_event);
1942 DEFINE_PER_CPU(int, trace_buffered_event_cnt);
1943 static int trace_buffered_event_ref;
1944 
1945 /**
1946  * trace_buffered_event_enable - enable buffering events
1947  *
1948  * When events are being filtered, it is quicker to use a temporary
1949  * buffer to write the event data into if there's a likely chance
1950  * that it will not be committed. The discard of the ring buffer
1951  * is not as fast as committing, and is much slower than copying
1952  * a commit.
1953  *
1954  * When an event is to be filtered, allocate per cpu buffers to
1955  * write the event data into, and if the event is filtered and discarded
1956  * it is simply dropped, otherwise, the entire data is to be committed
1957  * in one shot.
1958  */
1959 void trace_buffered_event_enable(void)
1960 {
1961 	struct ring_buffer_event *event;
1962 	struct page *page;
1963 	int cpu;
1964 
1965 	WARN_ON_ONCE(!mutex_is_locked(&event_mutex));
1966 
1967 	if (trace_buffered_event_ref++)
1968 		return;
1969 
1970 	for_each_tracing_cpu(cpu) {
1971 		page = alloc_pages_node(cpu_to_node(cpu),
1972 					GFP_KERNEL | __GFP_NORETRY, 0);
1973 		if (!page)
1974 			goto failed;
1975 
1976 		event = page_address(page);
1977 		memset(event, 0, sizeof(*event));
1978 
1979 		per_cpu(trace_buffered_event, cpu) = event;
1980 
1981 		preempt_disable();
1982 		if (cpu == smp_processor_id() &&
1983 		    this_cpu_read(trace_buffered_event) !=
1984 		    per_cpu(trace_buffered_event, cpu))
1985 			WARN_ON_ONCE(1);
1986 		preempt_enable();
1987 	}
1988 
1989 	return;
1990  failed:
1991 	trace_buffered_event_disable();
1992 }
1993 
1994 static void enable_trace_buffered_event(void *data)
1995 {
1996 	/* Probably not needed, but do it anyway */
1997 	smp_rmb();
1998 	this_cpu_dec(trace_buffered_event_cnt);
1999 }
2000 
2001 static void disable_trace_buffered_event(void *data)
2002 {
2003 	this_cpu_inc(trace_buffered_event_cnt);
2004 }
2005 
2006 /**
2007  * trace_buffered_event_disable - disable buffering events
2008  *
2009  * When a filter is removed, it is faster to not use the buffered
2010  * events, and to commit directly into the ring buffer. Free up
2011  * the temp buffers when there are no more users. This requires
2012  * special synchronization with current events.
2013  */
2014 void trace_buffered_event_disable(void)
2015 {
2016 	int cpu;
2017 
2018 	WARN_ON_ONCE(!mutex_is_locked(&event_mutex));
2019 
2020 	if (WARN_ON_ONCE(!trace_buffered_event_ref))
2021 		return;
2022 
2023 	if (--trace_buffered_event_ref)
2024 		return;
2025 
2026 	preempt_disable();
2027 	/* For each CPU, set the buffer as used. */
2028 	smp_call_function_many(tracing_buffer_mask,
2029 			       disable_trace_buffered_event, NULL, 1);
2030 	preempt_enable();
2031 
2032 	/* Wait for all current users to finish */
2033 	synchronize_sched();
2034 
2035 	for_each_tracing_cpu(cpu) {
2036 		free_page((unsigned long)per_cpu(trace_buffered_event, cpu));
2037 		per_cpu(trace_buffered_event, cpu) = NULL;
2038 	}
2039 	/*
2040 	 * Make sure trace_buffered_event is NULL before clearing
2041 	 * trace_buffered_event_cnt.
2042 	 */
2043 	smp_wmb();
2044 
2045 	preempt_disable();
2046 	/* Do the work on each cpu */
2047 	smp_call_function_many(tracing_buffer_mask,
2048 			       enable_trace_buffered_event, NULL, 1);
2049 	preempt_enable();
2050 }
2051 
2052 void
2053 __buffer_unlock_commit(struct ring_buffer *buffer, struct ring_buffer_event *event)
2054 {
2055 	__this_cpu_write(trace_cmdline_save, true);
2056 
2057 	/* If this is the temp buffer, we need to commit fully */
2058 	if (this_cpu_read(trace_buffered_event) == event) {
2059 		/* Length is in event->array[0] */
2060 		ring_buffer_write(buffer, event->array[0], &event->array[1]);
2061 		/* Release the temp buffer */
2062 		this_cpu_dec(trace_buffered_event_cnt);
2063 	} else
2064 		ring_buffer_unlock_commit(buffer, event);
2065 }
2066 
2067 static struct ring_buffer *temp_buffer;
2068 
2069 struct ring_buffer_event *
2070 trace_event_buffer_lock_reserve(struct ring_buffer **current_rb,
2071 			  struct trace_event_file *trace_file,
2072 			  int type, unsigned long len,
2073 			  unsigned long flags, int pc)
2074 {
2075 	struct ring_buffer_event *entry;
2076 	int val;
2077 
2078 	*current_rb = trace_file->tr->trace_buffer.buffer;
2079 
2080 	if ((trace_file->flags &
2081 	     (EVENT_FILE_FL_SOFT_DISABLED | EVENT_FILE_FL_FILTERED)) &&
2082 	    (entry = this_cpu_read(trace_buffered_event))) {
2083 		/* Try to use the per cpu buffer first */
2084 		val = this_cpu_inc_return(trace_buffered_event_cnt);
2085 		if (val == 1) {
2086 			trace_event_setup(entry, type, flags, pc);
2087 			entry->array[0] = len;
2088 			return entry;
2089 		}
2090 		this_cpu_dec(trace_buffered_event_cnt);
2091 	}
2092 
2093 	entry = trace_buffer_lock_reserve(*current_rb,
2094 					 type, len, flags, pc);
2095 	/*
2096 	 * If tracing is off, but we have triggers enabled
2097 	 * we still need to look at the event data. Use the temp_buffer
2098 	 * to store the trace event for the tigger to use. It's recusive
2099 	 * safe and will not be recorded anywhere.
2100 	 */
2101 	if (!entry && trace_file->flags & EVENT_FILE_FL_TRIGGER_COND) {
2102 		*current_rb = temp_buffer;
2103 		entry = trace_buffer_lock_reserve(*current_rb,
2104 						  type, len, flags, pc);
2105 	}
2106 	return entry;
2107 }
2108 EXPORT_SYMBOL_GPL(trace_event_buffer_lock_reserve);
2109 
2110 void trace_buffer_unlock_commit_regs(struct trace_array *tr,
2111 				     struct ring_buffer *buffer,
2112 				     struct ring_buffer_event *event,
2113 				     unsigned long flags, int pc,
2114 				     struct pt_regs *regs)
2115 {
2116 	__buffer_unlock_commit(buffer, event);
2117 
2118 	/*
2119 	 * If regs is not set, then skip the following callers:
2120 	 *   trace_buffer_unlock_commit_regs
2121 	 *   event_trigger_unlock_commit
2122 	 *   trace_event_buffer_commit
2123 	 *   trace_event_raw_event_sched_switch
2124 	 * Note, we can still get here via blktrace, wakeup tracer
2125 	 * and mmiotrace, but that's ok if they lose a function or
2126 	 * two. They are that meaningful.
2127 	 */
2128 	ftrace_trace_stack(tr, buffer, flags, regs ? 0 : 4, pc, regs);
2129 	ftrace_trace_userstack(buffer, flags, pc);
2130 }
2131 
2132 void
2133 trace_function(struct trace_array *tr,
2134 	       unsigned long ip, unsigned long parent_ip, unsigned long flags,
2135 	       int pc)
2136 {
2137 	struct trace_event_call *call = &event_function;
2138 	struct ring_buffer *buffer = tr->trace_buffer.buffer;
2139 	struct ring_buffer_event *event;
2140 	struct ftrace_entry *entry;
2141 
2142 	event = trace_buffer_lock_reserve(buffer, TRACE_FN, sizeof(*entry),
2143 					  flags, pc);
2144 	if (!event)
2145 		return;
2146 	entry	= ring_buffer_event_data(event);
2147 	entry->ip			= ip;
2148 	entry->parent_ip		= parent_ip;
2149 
2150 	if (!call_filter_check_discard(call, entry, buffer, event))
2151 		__buffer_unlock_commit(buffer, event);
2152 }
2153 
2154 #ifdef CONFIG_STACKTRACE
2155 
2156 #define FTRACE_STACK_MAX_ENTRIES (PAGE_SIZE / sizeof(unsigned long))
2157 struct ftrace_stack {
2158 	unsigned long		calls[FTRACE_STACK_MAX_ENTRIES];
2159 };
2160 
2161 static DEFINE_PER_CPU(struct ftrace_stack, ftrace_stack);
2162 static DEFINE_PER_CPU(int, ftrace_stack_reserve);
2163 
2164 static void __ftrace_trace_stack(struct ring_buffer *buffer,
2165 				 unsigned long flags,
2166 				 int skip, int pc, struct pt_regs *regs)
2167 {
2168 	struct trace_event_call *call = &event_kernel_stack;
2169 	struct ring_buffer_event *event;
2170 	struct stack_entry *entry;
2171 	struct stack_trace trace;
2172 	int use_stack;
2173 	int size = FTRACE_STACK_ENTRIES;
2174 
2175 	trace.nr_entries	= 0;
2176 	trace.skip		= skip;
2177 
2178 	/*
2179 	 * Add two, for this function and the call to save_stack_trace()
2180 	 * If regs is set, then these functions will not be in the way.
2181 	 */
2182 	if (!regs)
2183 		trace.skip += 2;
2184 
2185 	/*
2186 	 * Since events can happen in NMIs there's no safe way to
2187 	 * use the per cpu ftrace_stacks. We reserve it and if an interrupt
2188 	 * or NMI comes in, it will just have to use the default
2189 	 * FTRACE_STACK_SIZE.
2190 	 */
2191 	preempt_disable_notrace();
2192 
2193 	use_stack = __this_cpu_inc_return(ftrace_stack_reserve);
2194 	/*
2195 	 * We don't need any atomic variables, just a barrier.
2196 	 * If an interrupt comes in, we don't care, because it would
2197 	 * have exited and put the counter back to what we want.
2198 	 * We just need a barrier to keep gcc from moving things
2199 	 * around.
2200 	 */
2201 	barrier();
2202 	if (use_stack == 1) {
2203 		trace.entries		= this_cpu_ptr(ftrace_stack.calls);
2204 		trace.max_entries	= FTRACE_STACK_MAX_ENTRIES;
2205 
2206 		if (regs)
2207 			save_stack_trace_regs(regs, &trace);
2208 		else
2209 			save_stack_trace(&trace);
2210 
2211 		if (trace.nr_entries > size)
2212 			size = trace.nr_entries;
2213 	} else
2214 		/* From now on, use_stack is a boolean */
2215 		use_stack = 0;
2216 
2217 	size *= sizeof(unsigned long);
2218 
2219 	event = trace_buffer_lock_reserve(buffer, TRACE_STACK,
2220 					  sizeof(*entry) + size, flags, pc);
2221 	if (!event)
2222 		goto out;
2223 	entry = ring_buffer_event_data(event);
2224 
2225 	memset(&entry->caller, 0, size);
2226 
2227 	if (use_stack)
2228 		memcpy(&entry->caller, trace.entries,
2229 		       trace.nr_entries * sizeof(unsigned long));
2230 	else {
2231 		trace.max_entries	= FTRACE_STACK_ENTRIES;
2232 		trace.entries		= entry->caller;
2233 		if (regs)
2234 			save_stack_trace_regs(regs, &trace);
2235 		else
2236 			save_stack_trace(&trace);
2237 	}
2238 
2239 	entry->size = trace.nr_entries;
2240 
2241 	if (!call_filter_check_discard(call, entry, buffer, event))
2242 		__buffer_unlock_commit(buffer, event);
2243 
2244  out:
2245 	/* Again, don't let gcc optimize things here */
2246 	barrier();
2247 	__this_cpu_dec(ftrace_stack_reserve);
2248 	preempt_enable_notrace();
2249 
2250 }
2251 
2252 static inline void ftrace_trace_stack(struct trace_array *tr,
2253 				      struct ring_buffer *buffer,
2254 				      unsigned long flags,
2255 				      int skip, int pc, struct pt_regs *regs)
2256 {
2257 	if (!(tr->trace_flags & TRACE_ITER_STACKTRACE))
2258 		return;
2259 
2260 	__ftrace_trace_stack(buffer, flags, skip, pc, regs);
2261 }
2262 
2263 void __trace_stack(struct trace_array *tr, unsigned long flags, int skip,
2264 		   int pc)
2265 {
2266 	__ftrace_trace_stack(tr->trace_buffer.buffer, flags, skip, pc, NULL);
2267 }
2268 
2269 /**
2270  * trace_dump_stack - record a stack back trace in the trace buffer
2271  * @skip: Number of functions to skip (helper handlers)
2272  */
2273 void trace_dump_stack(int skip)
2274 {
2275 	unsigned long flags;
2276 
2277 	if (tracing_disabled || tracing_selftest_running)
2278 		return;
2279 
2280 	local_save_flags(flags);
2281 
2282 	/*
2283 	 * Skip 3 more, seems to get us at the caller of
2284 	 * this function.
2285 	 */
2286 	skip += 3;
2287 	__ftrace_trace_stack(global_trace.trace_buffer.buffer,
2288 			     flags, skip, preempt_count(), NULL);
2289 }
2290 
2291 static DEFINE_PER_CPU(int, user_stack_count);
2292 
2293 void
2294 ftrace_trace_userstack(struct ring_buffer *buffer, unsigned long flags, int pc)
2295 {
2296 	struct trace_event_call *call = &event_user_stack;
2297 	struct ring_buffer_event *event;
2298 	struct userstack_entry *entry;
2299 	struct stack_trace trace;
2300 
2301 	if (!(global_trace.trace_flags & TRACE_ITER_USERSTACKTRACE))
2302 		return;
2303 
2304 	/*
2305 	 * NMIs can not handle page faults, even with fix ups.
2306 	 * The save user stack can (and often does) fault.
2307 	 */
2308 	if (unlikely(in_nmi()))
2309 		return;
2310 
2311 	/*
2312 	 * prevent recursion, since the user stack tracing may
2313 	 * trigger other kernel events.
2314 	 */
2315 	preempt_disable();
2316 	if (__this_cpu_read(user_stack_count))
2317 		goto out;
2318 
2319 	__this_cpu_inc(user_stack_count);
2320 
2321 	event = trace_buffer_lock_reserve(buffer, TRACE_USER_STACK,
2322 					  sizeof(*entry), flags, pc);
2323 	if (!event)
2324 		goto out_drop_count;
2325 	entry	= ring_buffer_event_data(event);
2326 
2327 	entry->tgid		= current->tgid;
2328 	memset(&entry->caller, 0, sizeof(entry->caller));
2329 
2330 	trace.nr_entries	= 0;
2331 	trace.max_entries	= FTRACE_STACK_ENTRIES;
2332 	trace.skip		= 0;
2333 	trace.entries		= entry->caller;
2334 
2335 	save_stack_trace_user(&trace);
2336 	if (!call_filter_check_discard(call, entry, buffer, event))
2337 		__buffer_unlock_commit(buffer, event);
2338 
2339  out_drop_count:
2340 	__this_cpu_dec(user_stack_count);
2341  out:
2342 	preempt_enable();
2343 }
2344 
2345 #ifdef UNUSED
2346 static void __trace_userstack(struct trace_array *tr, unsigned long flags)
2347 {
2348 	ftrace_trace_userstack(tr, flags, preempt_count());
2349 }
2350 #endif /* UNUSED */
2351 
2352 #endif /* CONFIG_STACKTRACE */
2353 
2354 /* created for use with alloc_percpu */
2355 struct trace_buffer_struct {
2356 	int nesting;
2357 	char buffer[4][TRACE_BUF_SIZE];
2358 };
2359 
2360 static struct trace_buffer_struct *trace_percpu_buffer;
2361 
2362 /*
2363  * Thise allows for lockless recording.  If we're nested too deeply, then
2364  * this returns NULL.
2365  */
2366 static char *get_trace_buf(void)
2367 {
2368 	struct trace_buffer_struct *buffer = this_cpu_ptr(trace_percpu_buffer);
2369 
2370 	if (!buffer || buffer->nesting >= 4)
2371 		return NULL;
2372 
2373 	return &buffer->buffer[buffer->nesting++][0];
2374 }
2375 
2376 static void put_trace_buf(void)
2377 {
2378 	this_cpu_dec(trace_percpu_buffer->nesting);
2379 }
2380 
2381 static int alloc_percpu_trace_buffer(void)
2382 {
2383 	struct trace_buffer_struct *buffers;
2384 
2385 	buffers = alloc_percpu(struct trace_buffer_struct);
2386 	if (WARN(!buffers, "Could not allocate percpu trace_printk buffer"))
2387 		return -ENOMEM;
2388 
2389 	trace_percpu_buffer = buffers;
2390 	return 0;
2391 }
2392 
2393 static int buffers_allocated;
2394 
2395 void trace_printk_init_buffers(void)
2396 {
2397 	if (buffers_allocated)
2398 		return;
2399 
2400 	if (alloc_percpu_trace_buffer())
2401 		return;
2402 
2403 	/* trace_printk() is for debug use only. Don't use it in production. */
2404 
2405 	pr_warn("\n");
2406 	pr_warn("**********************************************************\n");
2407 	pr_warn("**   NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE   **\n");
2408 	pr_warn("**                                                      **\n");
2409 	pr_warn("** trace_printk() being used. Allocating extra memory.  **\n");
2410 	pr_warn("**                                                      **\n");
2411 	pr_warn("** This means that this is a DEBUG kernel and it is     **\n");
2412 	pr_warn("** unsafe for production use.                           **\n");
2413 	pr_warn("**                                                      **\n");
2414 	pr_warn("** If you see this message and you are not debugging    **\n");
2415 	pr_warn("** the kernel, report this immediately to your vendor!  **\n");
2416 	pr_warn("**                                                      **\n");
2417 	pr_warn("**   NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE   **\n");
2418 	pr_warn("**********************************************************\n");
2419 
2420 	/* Expand the buffers to set size */
2421 	tracing_update_buffers();
2422 
2423 	buffers_allocated = 1;
2424 
2425 	/*
2426 	 * trace_printk_init_buffers() can be called by modules.
2427 	 * If that happens, then we need to start cmdline recording
2428 	 * directly here. If the global_trace.buffer is already
2429 	 * allocated here, then this was called by module code.
2430 	 */
2431 	if (global_trace.trace_buffer.buffer)
2432 		tracing_start_cmdline_record();
2433 }
2434 
2435 void trace_printk_start_comm(void)
2436 {
2437 	/* Start tracing comms if trace printk is set */
2438 	if (!buffers_allocated)
2439 		return;
2440 	tracing_start_cmdline_record();
2441 }
2442 
2443 static void trace_printk_start_stop_comm(int enabled)
2444 {
2445 	if (!buffers_allocated)
2446 		return;
2447 
2448 	if (enabled)
2449 		tracing_start_cmdline_record();
2450 	else
2451 		tracing_stop_cmdline_record();
2452 }
2453 
2454 /**
2455  * trace_vbprintk - write binary msg to tracing buffer
2456  *
2457  */
2458 int trace_vbprintk(unsigned long ip, const char *fmt, va_list args)
2459 {
2460 	struct trace_event_call *call = &event_bprint;
2461 	struct ring_buffer_event *event;
2462 	struct ring_buffer *buffer;
2463 	struct trace_array *tr = &global_trace;
2464 	struct bprint_entry *entry;
2465 	unsigned long flags;
2466 	char *tbuffer;
2467 	int len = 0, size, pc;
2468 
2469 	if (unlikely(tracing_selftest_running || tracing_disabled))
2470 		return 0;
2471 
2472 	/* Don't pollute graph traces with trace_vprintk internals */
2473 	pause_graph_tracing();
2474 
2475 	pc = preempt_count();
2476 	preempt_disable_notrace();
2477 
2478 	tbuffer = get_trace_buf();
2479 	if (!tbuffer) {
2480 		len = 0;
2481 		goto out_nobuffer;
2482 	}
2483 
2484 	len = vbin_printf((u32 *)tbuffer, TRACE_BUF_SIZE/sizeof(int), fmt, args);
2485 
2486 	if (len > TRACE_BUF_SIZE/sizeof(int) || len < 0)
2487 		goto out;
2488 
2489 	local_save_flags(flags);
2490 	size = sizeof(*entry) + sizeof(u32) * len;
2491 	buffer = tr->trace_buffer.buffer;
2492 	event = trace_buffer_lock_reserve(buffer, TRACE_BPRINT, size,
2493 					  flags, pc);
2494 	if (!event)
2495 		goto out;
2496 	entry = ring_buffer_event_data(event);
2497 	entry->ip			= ip;
2498 	entry->fmt			= fmt;
2499 
2500 	memcpy(entry->buf, tbuffer, sizeof(u32) * len);
2501 	if (!call_filter_check_discard(call, entry, buffer, event)) {
2502 		__buffer_unlock_commit(buffer, event);
2503 		ftrace_trace_stack(tr, buffer, flags, 6, pc, NULL);
2504 	}
2505 
2506 out:
2507 	put_trace_buf();
2508 
2509 out_nobuffer:
2510 	preempt_enable_notrace();
2511 	unpause_graph_tracing();
2512 
2513 	return len;
2514 }
2515 EXPORT_SYMBOL_GPL(trace_vbprintk);
2516 
2517 static int
2518 __trace_array_vprintk(struct ring_buffer *buffer,
2519 		      unsigned long ip, const char *fmt, va_list args)
2520 {
2521 	struct trace_event_call *call = &event_print;
2522 	struct ring_buffer_event *event;
2523 	int len = 0, size, pc;
2524 	struct print_entry *entry;
2525 	unsigned long flags;
2526 	char *tbuffer;
2527 
2528 	if (tracing_disabled || tracing_selftest_running)
2529 		return 0;
2530 
2531 	/* Don't pollute graph traces with trace_vprintk internals */
2532 	pause_graph_tracing();
2533 
2534 	pc = preempt_count();
2535 	preempt_disable_notrace();
2536 
2537 
2538 	tbuffer = get_trace_buf();
2539 	if (!tbuffer) {
2540 		len = 0;
2541 		goto out_nobuffer;
2542 	}
2543 
2544 	len = vscnprintf(tbuffer, TRACE_BUF_SIZE, fmt, args);
2545 
2546 	local_save_flags(flags);
2547 	size = sizeof(*entry) + len + 1;
2548 	event = trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
2549 					  flags, pc);
2550 	if (!event)
2551 		goto out;
2552 	entry = ring_buffer_event_data(event);
2553 	entry->ip = ip;
2554 
2555 	memcpy(&entry->buf, tbuffer, len + 1);
2556 	if (!call_filter_check_discard(call, entry, buffer, event)) {
2557 		__buffer_unlock_commit(buffer, event);
2558 		ftrace_trace_stack(&global_trace, buffer, flags, 6, pc, NULL);
2559 	}
2560 
2561 out:
2562 	put_trace_buf();
2563 
2564 out_nobuffer:
2565 	preempt_enable_notrace();
2566 	unpause_graph_tracing();
2567 
2568 	return len;
2569 }
2570 
2571 int trace_array_vprintk(struct trace_array *tr,
2572 			unsigned long ip, const char *fmt, va_list args)
2573 {
2574 	return __trace_array_vprintk(tr->trace_buffer.buffer, ip, fmt, args);
2575 }
2576 
2577 int trace_array_printk(struct trace_array *tr,
2578 		       unsigned long ip, const char *fmt, ...)
2579 {
2580 	int ret;
2581 	va_list ap;
2582 
2583 	if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
2584 		return 0;
2585 
2586 	va_start(ap, fmt);
2587 	ret = trace_array_vprintk(tr, ip, fmt, ap);
2588 	va_end(ap);
2589 	return ret;
2590 }
2591 
2592 int trace_array_printk_buf(struct ring_buffer *buffer,
2593 			   unsigned long ip, const char *fmt, ...)
2594 {
2595 	int ret;
2596 	va_list ap;
2597 
2598 	if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
2599 		return 0;
2600 
2601 	va_start(ap, fmt);
2602 	ret = __trace_array_vprintk(buffer, ip, fmt, ap);
2603 	va_end(ap);
2604 	return ret;
2605 }
2606 
2607 int trace_vprintk(unsigned long ip, const char *fmt, va_list args)
2608 {
2609 	return trace_array_vprintk(&global_trace, ip, fmt, args);
2610 }
2611 EXPORT_SYMBOL_GPL(trace_vprintk);
2612 
2613 static void trace_iterator_increment(struct trace_iterator *iter)
2614 {
2615 	struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, iter->cpu);
2616 
2617 	iter->idx++;
2618 	if (buf_iter)
2619 		ring_buffer_read(buf_iter, NULL);
2620 }
2621 
2622 static struct trace_entry *
2623 peek_next_entry(struct trace_iterator *iter, int cpu, u64 *ts,
2624 		unsigned long *lost_events)
2625 {
2626 	struct ring_buffer_event *event;
2627 	struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, cpu);
2628 
2629 	if (buf_iter)
2630 		event = ring_buffer_iter_peek(buf_iter, ts);
2631 	else
2632 		event = ring_buffer_peek(iter->trace_buffer->buffer, cpu, ts,
2633 					 lost_events);
2634 
2635 	if (event) {
2636 		iter->ent_size = ring_buffer_event_length(event);
2637 		return ring_buffer_event_data(event);
2638 	}
2639 	iter->ent_size = 0;
2640 	return NULL;
2641 }
2642 
2643 static struct trace_entry *
2644 __find_next_entry(struct trace_iterator *iter, int *ent_cpu,
2645 		  unsigned long *missing_events, u64 *ent_ts)
2646 {
2647 	struct ring_buffer *buffer = iter->trace_buffer->buffer;
2648 	struct trace_entry *ent, *next = NULL;
2649 	unsigned long lost_events = 0, next_lost = 0;
2650 	int cpu_file = iter->cpu_file;
2651 	u64 next_ts = 0, ts;
2652 	int next_cpu = -1;
2653 	int next_size = 0;
2654 	int cpu;
2655 
2656 	/*
2657 	 * If we are in a per_cpu trace file, don't bother by iterating over
2658 	 * all cpu and peek directly.
2659 	 */
2660 	if (cpu_file > RING_BUFFER_ALL_CPUS) {
2661 		if (ring_buffer_empty_cpu(buffer, cpu_file))
2662 			return NULL;
2663 		ent = peek_next_entry(iter, cpu_file, ent_ts, missing_events);
2664 		if (ent_cpu)
2665 			*ent_cpu = cpu_file;
2666 
2667 		return ent;
2668 	}
2669 
2670 	for_each_tracing_cpu(cpu) {
2671 
2672 		if (ring_buffer_empty_cpu(buffer, cpu))
2673 			continue;
2674 
2675 		ent = peek_next_entry(iter, cpu, &ts, &lost_events);
2676 
2677 		/*
2678 		 * Pick the entry with the smallest timestamp:
2679 		 */
2680 		if (ent && (!next || ts < next_ts)) {
2681 			next = ent;
2682 			next_cpu = cpu;
2683 			next_ts = ts;
2684 			next_lost = lost_events;
2685 			next_size = iter->ent_size;
2686 		}
2687 	}
2688 
2689 	iter->ent_size = next_size;
2690 
2691 	if (ent_cpu)
2692 		*ent_cpu = next_cpu;
2693 
2694 	if (ent_ts)
2695 		*ent_ts = next_ts;
2696 
2697 	if (missing_events)
2698 		*missing_events = next_lost;
2699 
2700 	return next;
2701 }
2702 
2703 /* Find the next real entry, without updating the iterator itself */
2704 struct trace_entry *trace_find_next_entry(struct trace_iterator *iter,
2705 					  int *ent_cpu, u64 *ent_ts)
2706 {
2707 	return __find_next_entry(iter, ent_cpu, NULL, ent_ts);
2708 }
2709 
2710 /* Find the next real entry, and increment the iterator to the next entry */
2711 void *trace_find_next_entry_inc(struct trace_iterator *iter)
2712 {
2713 	iter->ent = __find_next_entry(iter, &iter->cpu,
2714 				      &iter->lost_events, &iter->ts);
2715 
2716 	if (iter->ent)
2717 		trace_iterator_increment(iter);
2718 
2719 	return iter->ent ? iter : NULL;
2720 }
2721 
2722 static void trace_consume(struct trace_iterator *iter)
2723 {
2724 	ring_buffer_consume(iter->trace_buffer->buffer, iter->cpu, &iter->ts,
2725 			    &iter->lost_events);
2726 }
2727 
2728 static void *s_next(struct seq_file *m, void *v, loff_t *pos)
2729 {
2730 	struct trace_iterator *iter = m->private;
2731 	int i = (int)*pos;
2732 	void *ent;
2733 
2734 	WARN_ON_ONCE(iter->leftover);
2735 
2736 	(*pos)++;
2737 
2738 	/* can't go backwards */
2739 	if (iter->idx > i)
2740 		return NULL;
2741 
2742 	if (iter->idx < 0)
2743 		ent = trace_find_next_entry_inc(iter);
2744 	else
2745 		ent = iter;
2746 
2747 	while (ent && iter->idx < i)
2748 		ent = trace_find_next_entry_inc(iter);
2749 
2750 	iter->pos = *pos;
2751 
2752 	return ent;
2753 }
2754 
2755 void tracing_iter_reset(struct trace_iterator *iter, int cpu)
2756 {
2757 	struct ring_buffer_event *event;
2758 	struct ring_buffer_iter *buf_iter;
2759 	unsigned long entries = 0;
2760 	u64 ts;
2761 
2762 	per_cpu_ptr(iter->trace_buffer->data, cpu)->skipped_entries = 0;
2763 
2764 	buf_iter = trace_buffer_iter(iter, cpu);
2765 	if (!buf_iter)
2766 		return;
2767 
2768 	ring_buffer_iter_reset(buf_iter);
2769 
2770 	/*
2771 	 * We could have the case with the max latency tracers
2772 	 * that a reset never took place on a cpu. This is evident
2773 	 * by the timestamp being before the start of the buffer.
2774 	 */
2775 	while ((event = ring_buffer_iter_peek(buf_iter, &ts))) {
2776 		if (ts >= iter->trace_buffer->time_start)
2777 			break;
2778 		entries++;
2779 		ring_buffer_read(buf_iter, NULL);
2780 	}
2781 
2782 	per_cpu_ptr(iter->trace_buffer->data, cpu)->skipped_entries = entries;
2783 }
2784 
2785 /*
2786  * The current tracer is copied to avoid a global locking
2787  * all around.
2788  */
2789 static void *s_start(struct seq_file *m, loff_t *pos)
2790 {
2791 	struct trace_iterator *iter = m->private;
2792 	struct trace_array *tr = iter->tr;
2793 	int cpu_file = iter->cpu_file;
2794 	void *p = NULL;
2795 	loff_t l = 0;
2796 	int cpu;
2797 
2798 	/*
2799 	 * copy the tracer to avoid using a global lock all around.
2800 	 * iter->trace is a copy of current_trace, the pointer to the
2801 	 * name may be used instead of a strcmp(), as iter->trace->name
2802 	 * will point to the same string as current_trace->name.
2803 	 */
2804 	mutex_lock(&trace_types_lock);
2805 	if (unlikely(tr->current_trace && iter->trace->name != tr->current_trace->name))
2806 		*iter->trace = *tr->current_trace;
2807 	mutex_unlock(&trace_types_lock);
2808 
2809 #ifdef CONFIG_TRACER_MAX_TRACE
2810 	if (iter->snapshot && iter->trace->use_max_tr)
2811 		return ERR_PTR(-EBUSY);
2812 #endif
2813 
2814 	if (!iter->snapshot)
2815 		atomic_inc(&trace_record_cmdline_disabled);
2816 
2817 	if (*pos != iter->pos) {
2818 		iter->ent = NULL;
2819 		iter->cpu = 0;
2820 		iter->idx = -1;
2821 
2822 		if (cpu_file == RING_BUFFER_ALL_CPUS) {
2823 			for_each_tracing_cpu(cpu)
2824 				tracing_iter_reset(iter, cpu);
2825 		} else
2826 			tracing_iter_reset(iter, cpu_file);
2827 
2828 		iter->leftover = 0;
2829 		for (p = iter; p && l < *pos; p = s_next(m, p, &l))
2830 			;
2831 
2832 	} else {
2833 		/*
2834 		 * If we overflowed the seq_file before, then we want
2835 		 * to just reuse the trace_seq buffer again.
2836 		 */
2837 		if (iter->leftover)
2838 			p = iter;
2839 		else {
2840 			l = *pos - 1;
2841 			p = s_next(m, p, &l);
2842 		}
2843 	}
2844 
2845 	trace_event_read_lock();
2846 	trace_access_lock(cpu_file);
2847 	return p;
2848 }
2849 
2850 static void s_stop(struct seq_file *m, void *p)
2851 {
2852 	struct trace_iterator *iter = m->private;
2853 
2854 #ifdef CONFIG_TRACER_MAX_TRACE
2855 	if (iter->snapshot && iter->trace->use_max_tr)
2856 		return;
2857 #endif
2858 
2859 	if (!iter->snapshot)
2860 		atomic_dec(&trace_record_cmdline_disabled);
2861 
2862 	trace_access_unlock(iter->cpu_file);
2863 	trace_event_read_unlock();
2864 }
2865 
2866 static void
2867 get_total_entries(struct trace_buffer *buf,
2868 		  unsigned long *total, unsigned long *entries)
2869 {
2870 	unsigned long count;
2871 	int cpu;
2872 
2873 	*total = 0;
2874 	*entries = 0;
2875 
2876 	for_each_tracing_cpu(cpu) {
2877 		count = ring_buffer_entries_cpu(buf->buffer, cpu);
2878 		/*
2879 		 * If this buffer has skipped entries, then we hold all
2880 		 * entries for the trace and we need to ignore the
2881 		 * ones before the time stamp.
2882 		 */
2883 		if (per_cpu_ptr(buf->data, cpu)->skipped_entries) {
2884 			count -= per_cpu_ptr(buf->data, cpu)->skipped_entries;
2885 			/* total is the same as the entries */
2886 			*total += count;
2887 		} else
2888 			*total += count +
2889 				ring_buffer_overrun_cpu(buf->buffer, cpu);
2890 		*entries += count;
2891 	}
2892 }
2893 
2894 static void print_lat_help_header(struct seq_file *m)
2895 {
2896 	seq_puts(m, "#                  _------=> CPU#            \n"
2897 		    "#                 / _-----=> irqs-off        \n"
2898 		    "#                | / _----=> need-resched    \n"
2899 		    "#                || / _---=> hardirq/softirq \n"
2900 		    "#                ||| / _--=> preempt-depth   \n"
2901 		    "#                |||| /     delay            \n"
2902 		    "#  cmd     pid   ||||| time  |   caller      \n"
2903 		    "#     \\   /      |||||  \\    |   /         \n");
2904 }
2905 
2906 static void print_event_info(struct trace_buffer *buf, struct seq_file *m)
2907 {
2908 	unsigned long total;
2909 	unsigned long entries;
2910 
2911 	get_total_entries(buf, &total, &entries);
2912 	seq_printf(m, "# entries-in-buffer/entries-written: %lu/%lu   #P:%d\n",
2913 		   entries, total, num_online_cpus());
2914 	seq_puts(m, "#\n");
2915 }
2916 
2917 static void print_func_help_header(struct trace_buffer *buf, struct seq_file *m)
2918 {
2919 	print_event_info(buf, m);
2920 	seq_puts(m, "#           TASK-PID   CPU#      TIMESTAMP  FUNCTION\n"
2921 		    "#              | |       |          |         |\n");
2922 }
2923 
2924 static void print_func_help_header_irq(struct trace_buffer *buf, struct seq_file *m)
2925 {
2926 	print_event_info(buf, m);
2927 	seq_puts(m, "#                              _-----=> irqs-off\n"
2928 		    "#                             / _----=> need-resched\n"
2929 		    "#                            | / _---=> hardirq/softirq\n"
2930 		    "#                            || / _--=> preempt-depth\n"
2931 		    "#                            ||| /     delay\n"
2932 		    "#           TASK-PID   CPU#  ||||    TIMESTAMP  FUNCTION\n"
2933 		    "#              | |       |   ||||       |         |\n");
2934 }
2935 
2936 void
2937 print_trace_header(struct seq_file *m, struct trace_iterator *iter)
2938 {
2939 	unsigned long sym_flags = (global_trace.trace_flags & TRACE_ITER_SYM_MASK);
2940 	struct trace_buffer *buf = iter->trace_buffer;
2941 	struct trace_array_cpu *data = per_cpu_ptr(buf->data, buf->cpu);
2942 	struct tracer *type = iter->trace;
2943 	unsigned long entries;
2944 	unsigned long total;
2945 	const char *name = "preemption";
2946 
2947 	name = type->name;
2948 
2949 	get_total_entries(buf, &total, &entries);
2950 
2951 	seq_printf(m, "# %s latency trace v1.1.5 on %s\n",
2952 		   name, UTS_RELEASE);
2953 	seq_puts(m, "# -----------------------------------"
2954 		 "---------------------------------\n");
2955 	seq_printf(m, "# latency: %lu us, #%lu/%lu, CPU#%d |"
2956 		   " (M:%s VP:%d, KP:%d, SP:%d HP:%d",
2957 		   nsecs_to_usecs(data->saved_latency),
2958 		   entries,
2959 		   total,
2960 		   buf->cpu,
2961 #if defined(CONFIG_PREEMPT_NONE)
2962 		   "server",
2963 #elif defined(CONFIG_PREEMPT_VOLUNTARY)
2964 		   "desktop",
2965 #elif defined(CONFIG_PREEMPT)
2966 		   "preempt",
2967 #else
2968 		   "unknown",
2969 #endif
2970 		   /* These are reserved for later use */
2971 		   0, 0, 0, 0);
2972 #ifdef CONFIG_SMP
2973 	seq_printf(m, " #P:%d)\n", num_online_cpus());
2974 #else
2975 	seq_puts(m, ")\n");
2976 #endif
2977 	seq_puts(m, "#    -----------------\n");
2978 	seq_printf(m, "#    | task: %.16s-%d "
2979 		   "(uid:%d nice:%ld policy:%ld rt_prio:%ld)\n",
2980 		   data->comm, data->pid,
2981 		   from_kuid_munged(seq_user_ns(m), data->uid), data->nice,
2982 		   data->policy, data->rt_priority);
2983 	seq_puts(m, "#    -----------------\n");
2984 
2985 	if (data->critical_start) {
2986 		seq_puts(m, "#  => started at: ");
2987 		seq_print_ip_sym(&iter->seq, data->critical_start, sym_flags);
2988 		trace_print_seq(m, &iter->seq);
2989 		seq_puts(m, "\n#  => ended at:   ");
2990 		seq_print_ip_sym(&iter->seq, data->critical_end, sym_flags);
2991 		trace_print_seq(m, &iter->seq);
2992 		seq_puts(m, "\n#\n");
2993 	}
2994 
2995 	seq_puts(m, "#\n");
2996 }
2997 
2998 static void test_cpu_buff_start(struct trace_iterator *iter)
2999 {
3000 	struct trace_seq *s = &iter->seq;
3001 	struct trace_array *tr = iter->tr;
3002 
3003 	if (!(tr->trace_flags & TRACE_ITER_ANNOTATE))
3004 		return;
3005 
3006 	if (!(iter->iter_flags & TRACE_FILE_ANNOTATE))
3007 		return;
3008 
3009 	if (iter->started && cpumask_test_cpu(iter->cpu, iter->started))
3010 		return;
3011 
3012 	if (per_cpu_ptr(iter->trace_buffer->data, iter->cpu)->skipped_entries)
3013 		return;
3014 
3015 	if (iter->started)
3016 		cpumask_set_cpu(iter->cpu, iter->started);
3017 
3018 	/* Don't print started cpu buffer for the first entry of the trace */
3019 	if (iter->idx > 1)
3020 		trace_seq_printf(s, "##### CPU %u buffer started ####\n",
3021 				iter->cpu);
3022 }
3023 
3024 static enum print_line_t print_trace_fmt(struct trace_iterator *iter)
3025 {
3026 	struct trace_array *tr = iter->tr;
3027 	struct trace_seq *s = &iter->seq;
3028 	unsigned long sym_flags = (tr->trace_flags & TRACE_ITER_SYM_MASK);
3029 	struct trace_entry *entry;
3030 	struct trace_event *event;
3031 
3032 	entry = iter->ent;
3033 
3034 	test_cpu_buff_start(iter);
3035 
3036 	event = ftrace_find_event(entry->type);
3037 
3038 	if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
3039 		if (iter->iter_flags & TRACE_FILE_LAT_FMT)
3040 			trace_print_lat_context(iter);
3041 		else
3042 			trace_print_context(iter);
3043 	}
3044 
3045 	if (trace_seq_has_overflowed(s))
3046 		return TRACE_TYPE_PARTIAL_LINE;
3047 
3048 	if (event)
3049 		return event->funcs->trace(iter, sym_flags, event);
3050 
3051 	trace_seq_printf(s, "Unknown type %d\n", entry->type);
3052 
3053 	return trace_handle_return(s);
3054 }
3055 
3056 static enum print_line_t print_raw_fmt(struct trace_iterator *iter)
3057 {
3058 	struct trace_array *tr = iter->tr;
3059 	struct trace_seq *s = &iter->seq;
3060 	struct trace_entry *entry;
3061 	struct trace_event *event;
3062 
3063 	entry = iter->ent;
3064 
3065 	if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO)
3066 		trace_seq_printf(s, "%d %d %llu ",
3067 				 entry->pid, iter->cpu, iter->ts);
3068 
3069 	if (trace_seq_has_overflowed(s))
3070 		return TRACE_TYPE_PARTIAL_LINE;
3071 
3072 	event = ftrace_find_event(entry->type);
3073 	if (event)
3074 		return event->funcs->raw(iter, 0, event);
3075 
3076 	trace_seq_printf(s, "%d ?\n", entry->type);
3077 
3078 	return trace_handle_return(s);
3079 }
3080 
3081 static enum print_line_t print_hex_fmt(struct trace_iterator *iter)
3082 {
3083 	struct trace_array *tr = iter->tr;
3084 	struct trace_seq *s = &iter->seq;
3085 	unsigned char newline = '\n';
3086 	struct trace_entry *entry;
3087 	struct trace_event *event;
3088 
3089 	entry = iter->ent;
3090 
3091 	if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
3092 		SEQ_PUT_HEX_FIELD(s, entry->pid);
3093 		SEQ_PUT_HEX_FIELD(s, iter->cpu);
3094 		SEQ_PUT_HEX_FIELD(s, iter->ts);
3095 		if (trace_seq_has_overflowed(s))
3096 			return TRACE_TYPE_PARTIAL_LINE;
3097 	}
3098 
3099 	event = ftrace_find_event(entry->type);
3100 	if (event) {
3101 		enum print_line_t ret = event->funcs->hex(iter, 0, event);
3102 		if (ret != TRACE_TYPE_HANDLED)
3103 			return ret;
3104 	}
3105 
3106 	SEQ_PUT_FIELD(s, newline);
3107 
3108 	return trace_handle_return(s);
3109 }
3110 
3111 static enum print_line_t print_bin_fmt(struct trace_iterator *iter)
3112 {
3113 	struct trace_array *tr = iter->tr;
3114 	struct trace_seq *s = &iter->seq;
3115 	struct trace_entry *entry;
3116 	struct trace_event *event;
3117 
3118 	entry = iter->ent;
3119 
3120 	if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
3121 		SEQ_PUT_FIELD(s, entry->pid);
3122 		SEQ_PUT_FIELD(s, iter->cpu);
3123 		SEQ_PUT_FIELD(s, iter->ts);
3124 		if (trace_seq_has_overflowed(s))
3125 			return TRACE_TYPE_PARTIAL_LINE;
3126 	}
3127 
3128 	event = ftrace_find_event(entry->type);
3129 	return event ? event->funcs->binary(iter, 0, event) :
3130 		TRACE_TYPE_HANDLED;
3131 }
3132 
3133 int trace_empty(struct trace_iterator *iter)
3134 {
3135 	struct ring_buffer_iter *buf_iter;
3136 	int cpu;
3137 
3138 	/* If we are looking at one CPU buffer, only check that one */
3139 	if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
3140 		cpu = iter->cpu_file;
3141 		buf_iter = trace_buffer_iter(iter, cpu);
3142 		if (buf_iter) {
3143 			if (!ring_buffer_iter_empty(buf_iter))
3144 				return 0;
3145 		} else {
3146 			if (!ring_buffer_empty_cpu(iter->trace_buffer->buffer, cpu))
3147 				return 0;
3148 		}
3149 		return 1;
3150 	}
3151 
3152 	for_each_tracing_cpu(cpu) {
3153 		buf_iter = trace_buffer_iter(iter, cpu);
3154 		if (buf_iter) {
3155 			if (!ring_buffer_iter_empty(buf_iter))
3156 				return 0;
3157 		} else {
3158 			if (!ring_buffer_empty_cpu(iter->trace_buffer->buffer, cpu))
3159 				return 0;
3160 		}
3161 	}
3162 
3163 	return 1;
3164 }
3165 
3166 /*  Called with trace_event_read_lock() held. */
3167 enum print_line_t print_trace_line(struct trace_iterator *iter)
3168 {
3169 	struct trace_array *tr = iter->tr;
3170 	unsigned long trace_flags = tr->trace_flags;
3171 	enum print_line_t ret;
3172 
3173 	if (iter->lost_events) {
3174 		trace_seq_printf(&iter->seq, "CPU:%d [LOST %lu EVENTS]\n",
3175 				 iter->cpu, iter->lost_events);
3176 		if (trace_seq_has_overflowed(&iter->seq))
3177 			return TRACE_TYPE_PARTIAL_LINE;
3178 	}
3179 
3180 	if (iter->trace && iter->trace->print_line) {
3181 		ret = iter->trace->print_line(iter);
3182 		if (ret != TRACE_TYPE_UNHANDLED)
3183 			return ret;
3184 	}
3185 
3186 	if (iter->ent->type == TRACE_BPUTS &&
3187 			trace_flags & TRACE_ITER_PRINTK &&
3188 			trace_flags & TRACE_ITER_PRINTK_MSGONLY)
3189 		return trace_print_bputs_msg_only(iter);
3190 
3191 	if (iter->ent->type == TRACE_BPRINT &&
3192 			trace_flags & TRACE_ITER_PRINTK &&
3193 			trace_flags & TRACE_ITER_PRINTK_MSGONLY)
3194 		return trace_print_bprintk_msg_only(iter);
3195 
3196 	if (iter->ent->type == TRACE_PRINT &&
3197 			trace_flags & TRACE_ITER_PRINTK &&
3198 			trace_flags & TRACE_ITER_PRINTK_MSGONLY)
3199 		return trace_print_printk_msg_only(iter);
3200 
3201 	if (trace_flags & TRACE_ITER_BIN)
3202 		return print_bin_fmt(iter);
3203 
3204 	if (trace_flags & TRACE_ITER_HEX)
3205 		return print_hex_fmt(iter);
3206 
3207 	if (trace_flags & TRACE_ITER_RAW)
3208 		return print_raw_fmt(iter);
3209 
3210 	return print_trace_fmt(iter);
3211 }
3212 
3213 void trace_latency_header(struct seq_file *m)
3214 {
3215 	struct trace_iterator *iter = m->private;
3216 	struct trace_array *tr = iter->tr;
3217 
3218 	/* print nothing if the buffers are empty */
3219 	if (trace_empty(iter))
3220 		return;
3221 
3222 	if (iter->iter_flags & TRACE_FILE_LAT_FMT)
3223 		print_trace_header(m, iter);
3224 
3225 	if (!(tr->trace_flags & TRACE_ITER_VERBOSE))
3226 		print_lat_help_header(m);
3227 }
3228 
3229 void trace_default_header(struct seq_file *m)
3230 {
3231 	struct trace_iterator *iter = m->private;
3232 	struct trace_array *tr = iter->tr;
3233 	unsigned long trace_flags = tr->trace_flags;
3234 
3235 	if (!(trace_flags & TRACE_ITER_CONTEXT_INFO))
3236 		return;
3237 
3238 	if (iter->iter_flags & TRACE_FILE_LAT_FMT) {
3239 		/* print nothing if the buffers are empty */
3240 		if (trace_empty(iter))
3241 			return;
3242 		print_trace_header(m, iter);
3243 		if (!(trace_flags & TRACE_ITER_VERBOSE))
3244 			print_lat_help_header(m);
3245 	} else {
3246 		if (!(trace_flags & TRACE_ITER_VERBOSE)) {
3247 			if (trace_flags & TRACE_ITER_IRQ_INFO)
3248 				print_func_help_header_irq(iter->trace_buffer, m);
3249 			else
3250 				print_func_help_header(iter->trace_buffer, m);
3251 		}
3252 	}
3253 }
3254 
3255 static void test_ftrace_alive(struct seq_file *m)
3256 {
3257 	if (!ftrace_is_dead())
3258 		return;
3259 	seq_puts(m, "# WARNING: FUNCTION TRACING IS CORRUPTED\n"
3260 		    "#          MAY BE MISSING FUNCTION EVENTS\n");
3261 }
3262 
3263 #ifdef CONFIG_TRACER_MAX_TRACE
3264 static void show_snapshot_main_help(struct seq_file *m)
3265 {
3266 	seq_puts(m, "# echo 0 > snapshot : Clears and frees snapshot buffer\n"
3267 		    "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
3268 		    "#                      Takes a snapshot of the main buffer.\n"
3269 		    "# echo 2 > snapshot : Clears snapshot buffer (but does not allocate or free)\n"
3270 		    "#                      (Doesn't have to be '2' works with any number that\n"
3271 		    "#                       is not a '0' or '1')\n");
3272 }
3273 
3274 static void show_snapshot_percpu_help(struct seq_file *m)
3275 {
3276 	seq_puts(m, "# echo 0 > snapshot : Invalid for per_cpu snapshot file.\n");
3277 #ifdef CONFIG_RING_BUFFER_ALLOW_SWAP
3278 	seq_puts(m, "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
3279 		    "#                      Takes a snapshot of the main buffer for this cpu.\n");
3280 #else
3281 	seq_puts(m, "# echo 1 > snapshot : Not supported with this kernel.\n"
3282 		    "#                     Must use main snapshot file to allocate.\n");
3283 #endif
3284 	seq_puts(m, "# echo 2 > snapshot : Clears this cpu's snapshot buffer (but does not allocate)\n"
3285 		    "#                      (Doesn't have to be '2' works with any number that\n"
3286 		    "#                       is not a '0' or '1')\n");
3287 }
3288 
3289 static void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter)
3290 {
3291 	if (iter->tr->allocated_snapshot)
3292 		seq_puts(m, "#\n# * Snapshot is allocated *\n#\n");
3293 	else
3294 		seq_puts(m, "#\n# * Snapshot is freed *\n#\n");
3295 
3296 	seq_puts(m, "# Snapshot commands:\n");
3297 	if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
3298 		show_snapshot_main_help(m);
3299 	else
3300 		show_snapshot_percpu_help(m);
3301 }
3302 #else
3303 /* Should never be called */
3304 static inline void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter) { }
3305 #endif
3306 
3307 static int s_show(struct seq_file *m, void *v)
3308 {
3309 	struct trace_iterator *iter = v;
3310 	int ret;
3311 
3312 	if (iter->ent == NULL) {
3313 		if (iter->tr) {
3314 			seq_printf(m, "# tracer: %s\n", iter->trace->name);
3315 			seq_puts(m, "#\n");
3316 			test_ftrace_alive(m);
3317 		}
3318 		if (iter->snapshot && trace_empty(iter))
3319 			print_snapshot_help(m, iter);
3320 		else if (iter->trace && iter->trace->print_header)
3321 			iter->trace->print_header(m);
3322 		else
3323 			trace_default_header(m);
3324 
3325 	} else if (iter->leftover) {
3326 		/*
3327 		 * If we filled the seq_file buffer earlier, we
3328 		 * want to just show it now.
3329 		 */
3330 		ret = trace_print_seq(m, &iter->seq);
3331 
3332 		/* ret should this time be zero, but you never know */
3333 		iter->leftover = ret;
3334 
3335 	} else {
3336 		print_trace_line(iter);
3337 		ret = trace_print_seq(m, &iter->seq);
3338 		/*
3339 		 * If we overflow the seq_file buffer, then it will
3340 		 * ask us for this data again at start up.
3341 		 * Use that instead.
3342 		 *  ret is 0 if seq_file write succeeded.
3343 		 *        -1 otherwise.
3344 		 */
3345 		iter->leftover = ret;
3346 	}
3347 
3348 	return 0;
3349 }
3350 
3351 /*
3352  * Should be used after trace_array_get(), trace_types_lock
3353  * ensures that i_cdev was already initialized.
3354  */
3355 static inline int tracing_get_cpu(struct inode *inode)
3356 {
3357 	if (inode->i_cdev) /* See trace_create_cpu_file() */
3358 		return (long)inode->i_cdev - 1;
3359 	return RING_BUFFER_ALL_CPUS;
3360 }
3361 
3362 static const struct seq_operations tracer_seq_ops = {
3363 	.start		= s_start,
3364 	.next		= s_next,
3365 	.stop		= s_stop,
3366 	.show		= s_show,
3367 };
3368 
3369 static struct trace_iterator *
3370 __tracing_open(struct inode *inode, struct file *file, bool snapshot)
3371 {
3372 	struct trace_array *tr = inode->i_private;
3373 	struct trace_iterator *iter;
3374 	int cpu;
3375 
3376 	if (tracing_disabled)
3377 		return ERR_PTR(-ENODEV);
3378 
3379 	iter = __seq_open_private(file, &tracer_seq_ops, sizeof(*iter));
3380 	if (!iter)
3381 		return ERR_PTR(-ENOMEM);
3382 
3383 	iter->buffer_iter = kcalloc(nr_cpu_ids, sizeof(*iter->buffer_iter),
3384 				    GFP_KERNEL);
3385 	if (!iter->buffer_iter)
3386 		goto release;
3387 
3388 	/*
3389 	 * We make a copy of the current tracer to avoid concurrent
3390 	 * changes on it while we are reading.
3391 	 */
3392 	mutex_lock(&trace_types_lock);
3393 	iter->trace = kzalloc(sizeof(*iter->trace), GFP_KERNEL);
3394 	if (!iter->trace)
3395 		goto fail;
3396 
3397 	*iter->trace = *tr->current_trace;
3398 
3399 	if (!zalloc_cpumask_var(&iter->started, GFP_KERNEL))
3400 		goto fail;
3401 
3402 	iter->tr = tr;
3403 
3404 #ifdef CONFIG_TRACER_MAX_TRACE
3405 	/* Currently only the top directory has a snapshot */
3406 	if (tr->current_trace->print_max || snapshot)
3407 		iter->trace_buffer = &tr->max_buffer;
3408 	else
3409 #endif
3410 		iter->trace_buffer = &tr->trace_buffer;
3411 	iter->snapshot = snapshot;
3412 	iter->pos = -1;
3413 	iter->cpu_file = tracing_get_cpu(inode);
3414 	mutex_init(&iter->mutex);
3415 
3416 	/* Notify the tracer early; before we stop tracing. */
3417 	if (iter->trace && iter->trace->open)
3418 		iter->trace->open(iter);
3419 
3420 	/* Annotate start of buffers if we had overruns */
3421 	if (ring_buffer_overruns(iter->trace_buffer->buffer))
3422 		iter->iter_flags |= TRACE_FILE_ANNOTATE;
3423 
3424 	/* Output in nanoseconds only if we are using a clock in nanoseconds. */
3425 	if (trace_clocks[tr->clock_id].in_ns)
3426 		iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
3427 
3428 	/* stop the trace while dumping if we are not opening "snapshot" */
3429 	if (!iter->snapshot)
3430 		tracing_stop_tr(tr);
3431 
3432 	if (iter->cpu_file == RING_BUFFER_ALL_CPUS) {
3433 		for_each_tracing_cpu(cpu) {
3434 			iter->buffer_iter[cpu] =
3435 				ring_buffer_read_prepare(iter->trace_buffer->buffer, cpu);
3436 		}
3437 		ring_buffer_read_prepare_sync();
3438 		for_each_tracing_cpu(cpu) {
3439 			ring_buffer_read_start(iter->buffer_iter[cpu]);
3440 			tracing_iter_reset(iter, cpu);
3441 		}
3442 	} else {
3443 		cpu = iter->cpu_file;
3444 		iter->buffer_iter[cpu] =
3445 			ring_buffer_read_prepare(iter->trace_buffer->buffer, cpu);
3446 		ring_buffer_read_prepare_sync();
3447 		ring_buffer_read_start(iter->buffer_iter[cpu]);
3448 		tracing_iter_reset(iter, cpu);
3449 	}
3450 
3451 	mutex_unlock(&trace_types_lock);
3452 
3453 	return iter;
3454 
3455  fail:
3456 	mutex_unlock(&trace_types_lock);
3457 	kfree(iter->trace);
3458 	kfree(iter->buffer_iter);
3459 release:
3460 	seq_release_private(inode, file);
3461 	return ERR_PTR(-ENOMEM);
3462 }
3463 
3464 int tracing_open_generic(struct inode *inode, struct file *filp)
3465 {
3466 	if (tracing_disabled)
3467 		return -ENODEV;
3468 
3469 	filp->private_data = inode->i_private;
3470 	return 0;
3471 }
3472 
3473 bool tracing_is_disabled(void)
3474 {
3475 	return (tracing_disabled) ? true: false;
3476 }
3477 
3478 /*
3479  * Open and update trace_array ref count.
3480  * Must have the current trace_array passed to it.
3481  */
3482 static int tracing_open_generic_tr(struct inode *inode, struct file *filp)
3483 {
3484 	struct trace_array *tr = inode->i_private;
3485 
3486 	if (tracing_disabled)
3487 		return -ENODEV;
3488 
3489 	if (trace_array_get(tr) < 0)
3490 		return -ENODEV;
3491 
3492 	filp->private_data = inode->i_private;
3493 
3494 	return 0;
3495 }
3496 
3497 static int tracing_release(struct inode *inode, struct file *file)
3498 {
3499 	struct trace_array *tr = inode->i_private;
3500 	struct seq_file *m = file->private_data;
3501 	struct trace_iterator *iter;
3502 	int cpu;
3503 
3504 	if (!(file->f_mode & FMODE_READ)) {
3505 		trace_array_put(tr);
3506 		return 0;
3507 	}
3508 
3509 	/* Writes do not use seq_file */
3510 	iter = m->private;
3511 	mutex_lock(&trace_types_lock);
3512 
3513 	for_each_tracing_cpu(cpu) {
3514 		if (iter->buffer_iter[cpu])
3515 			ring_buffer_read_finish(iter->buffer_iter[cpu]);
3516 	}
3517 
3518 	if (iter->trace && iter->trace->close)
3519 		iter->trace->close(iter);
3520 
3521 	if (!iter->snapshot)
3522 		/* reenable tracing if it was previously enabled */
3523 		tracing_start_tr(tr);
3524 
3525 	__trace_array_put(tr);
3526 
3527 	mutex_unlock(&trace_types_lock);
3528 
3529 	mutex_destroy(&iter->mutex);
3530 	free_cpumask_var(iter->started);
3531 	kfree(iter->trace);
3532 	kfree(iter->buffer_iter);
3533 	seq_release_private(inode, file);
3534 
3535 	return 0;
3536 }
3537 
3538 static int tracing_release_generic_tr(struct inode *inode, struct file *file)
3539 {
3540 	struct trace_array *tr = inode->i_private;
3541 
3542 	trace_array_put(tr);
3543 	return 0;
3544 }
3545 
3546 static int tracing_single_release_tr(struct inode *inode, struct file *file)
3547 {
3548 	struct trace_array *tr = inode->i_private;
3549 
3550 	trace_array_put(tr);
3551 
3552 	return single_release(inode, file);
3553 }
3554 
3555 static int tracing_open(struct inode *inode, struct file *file)
3556 {
3557 	struct trace_array *tr = inode->i_private;
3558 	struct trace_iterator *iter;
3559 	int ret = 0;
3560 
3561 	if (trace_array_get(tr) < 0)
3562 		return -ENODEV;
3563 
3564 	/* If this file was open for write, then erase contents */
3565 	if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC)) {
3566 		int cpu = tracing_get_cpu(inode);
3567 
3568 		if (cpu == RING_BUFFER_ALL_CPUS)
3569 			tracing_reset_online_cpus(&tr->trace_buffer);
3570 		else
3571 			tracing_reset(&tr->trace_buffer, cpu);
3572 	}
3573 
3574 	if (file->f_mode & FMODE_READ) {
3575 		iter = __tracing_open(inode, file, false);
3576 		if (IS_ERR(iter))
3577 			ret = PTR_ERR(iter);
3578 		else if (tr->trace_flags & TRACE_ITER_LATENCY_FMT)
3579 			iter->iter_flags |= TRACE_FILE_LAT_FMT;
3580 	}
3581 
3582 	if (ret < 0)
3583 		trace_array_put(tr);
3584 
3585 	return ret;
3586 }
3587 
3588 /*
3589  * Some tracers are not suitable for instance buffers.
3590  * A tracer is always available for the global array (toplevel)
3591  * or if it explicitly states that it is.
3592  */
3593 static bool
3594 trace_ok_for_array(struct tracer *t, struct trace_array *tr)
3595 {
3596 	return (tr->flags & TRACE_ARRAY_FL_GLOBAL) || t->allow_instances;
3597 }
3598 
3599 /* Find the next tracer that this trace array may use */
3600 static struct tracer *
3601 get_tracer_for_array(struct trace_array *tr, struct tracer *t)
3602 {
3603 	while (t && !trace_ok_for_array(t, tr))
3604 		t = t->next;
3605 
3606 	return t;
3607 }
3608 
3609 static void *
3610 t_next(struct seq_file *m, void *v, loff_t *pos)
3611 {
3612 	struct trace_array *tr = m->private;
3613 	struct tracer *t = v;
3614 
3615 	(*pos)++;
3616 
3617 	if (t)
3618 		t = get_tracer_for_array(tr, t->next);
3619 
3620 	return t;
3621 }
3622 
3623 static void *t_start(struct seq_file *m, loff_t *pos)
3624 {
3625 	struct trace_array *tr = m->private;
3626 	struct tracer *t;
3627 	loff_t l = 0;
3628 
3629 	mutex_lock(&trace_types_lock);
3630 
3631 	t = get_tracer_for_array(tr, trace_types);
3632 	for (; t && l < *pos; t = t_next(m, t, &l))
3633 			;
3634 
3635 	return t;
3636 }
3637 
3638 static void t_stop(struct seq_file *m, void *p)
3639 {
3640 	mutex_unlock(&trace_types_lock);
3641 }
3642 
3643 static int t_show(struct seq_file *m, void *v)
3644 {
3645 	struct tracer *t = v;
3646 
3647 	if (!t)
3648 		return 0;
3649 
3650 	seq_puts(m, t->name);
3651 	if (t->next)
3652 		seq_putc(m, ' ');
3653 	else
3654 		seq_putc(m, '\n');
3655 
3656 	return 0;
3657 }
3658 
3659 static const struct seq_operations show_traces_seq_ops = {
3660 	.start		= t_start,
3661 	.next		= t_next,
3662 	.stop		= t_stop,
3663 	.show		= t_show,
3664 };
3665 
3666 static int show_traces_open(struct inode *inode, struct file *file)
3667 {
3668 	struct trace_array *tr = inode->i_private;
3669 	struct seq_file *m;
3670 	int ret;
3671 
3672 	if (tracing_disabled)
3673 		return -ENODEV;
3674 
3675 	ret = seq_open(file, &show_traces_seq_ops);
3676 	if (ret)
3677 		return ret;
3678 
3679 	m = file->private_data;
3680 	m->private = tr;
3681 
3682 	return 0;
3683 }
3684 
3685 static ssize_t
3686 tracing_write_stub(struct file *filp, const char __user *ubuf,
3687 		   size_t count, loff_t *ppos)
3688 {
3689 	return count;
3690 }
3691 
3692 loff_t tracing_lseek(struct file *file, loff_t offset, int whence)
3693 {
3694 	int ret;
3695 
3696 	if (file->f_mode & FMODE_READ)
3697 		ret = seq_lseek(file, offset, whence);
3698 	else
3699 		file->f_pos = ret = 0;
3700 
3701 	return ret;
3702 }
3703 
3704 static const struct file_operations tracing_fops = {
3705 	.open		= tracing_open,
3706 	.read		= seq_read,
3707 	.write		= tracing_write_stub,
3708 	.llseek		= tracing_lseek,
3709 	.release	= tracing_release,
3710 };
3711 
3712 static const struct file_operations show_traces_fops = {
3713 	.open		= show_traces_open,
3714 	.read		= seq_read,
3715 	.release	= seq_release,
3716 	.llseek		= seq_lseek,
3717 };
3718 
3719 /*
3720  * The tracer itself will not take this lock, but still we want
3721  * to provide a consistent cpumask to user-space:
3722  */
3723 static DEFINE_MUTEX(tracing_cpumask_update_lock);
3724 
3725 /*
3726  * Temporary storage for the character representation of the
3727  * CPU bitmask (and one more byte for the newline):
3728  */
3729 static char mask_str[NR_CPUS + 1];
3730 
3731 static ssize_t
3732 tracing_cpumask_read(struct file *filp, char __user *ubuf,
3733 		     size_t count, loff_t *ppos)
3734 {
3735 	struct trace_array *tr = file_inode(filp)->i_private;
3736 	int len;
3737 
3738 	mutex_lock(&tracing_cpumask_update_lock);
3739 
3740 	len = snprintf(mask_str, count, "%*pb\n",
3741 		       cpumask_pr_args(tr->tracing_cpumask));
3742 	if (len >= count) {
3743 		count = -EINVAL;
3744 		goto out_err;
3745 	}
3746 	count = simple_read_from_buffer(ubuf, count, ppos, mask_str, NR_CPUS+1);
3747 
3748 out_err:
3749 	mutex_unlock(&tracing_cpumask_update_lock);
3750 
3751 	return count;
3752 }
3753 
3754 static ssize_t
3755 tracing_cpumask_write(struct file *filp, const char __user *ubuf,
3756 		      size_t count, loff_t *ppos)
3757 {
3758 	struct trace_array *tr = file_inode(filp)->i_private;
3759 	cpumask_var_t tracing_cpumask_new;
3760 	int err, cpu;
3761 
3762 	if (!alloc_cpumask_var(&tracing_cpumask_new, GFP_KERNEL))
3763 		return -ENOMEM;
3764 
3765 	err = cpumask_parse_user(ubuf, count, tracing_cpumask_new);
3766 	if (err)
3767 		goto err_unlock;
3768 
3769 	mutex_lock(&tracing_cpumask_update_lock);
3770 
3771 	local_irq_disable();
3772 	arch_spin_lock(&tr->max_lock);
3773 	for_each_tracing_cpu(cpu) {
3774 		/*
3775 		 * Increase/decrease the disabled counter if we are
3776 		 * about to flip a bit in the cpumask:
3777 		 */
3778 		if (cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
3779 				!cpumask_test_cpu(cpu, tracing_cpumask_new)) {
3780 			atomic_inc(&per_cpu_ptr(tr->trace_buffer.data, cpu)->disabled);
3781 			ring_buffer_record_disable_cpu(tr->trace_buffer.buffer, cpu);
3782 		}
3783 		if (!cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
3784 				cpumask_test_cpu(cpu, tracing_cpumask_new)) {
3785 			atomic_dec(&per_cpu_ptr(tr->trace_buffer.data, cpu)->disabled);
3786 			ring_buffer_record_enable_cpu(tr->trace_buffer.buffer, cpu);
3787 		}
3788 	}
3789 	arch_spin_unlock(&tr->max_lock);
3790 	local_irq_enable();
3791 
3792 	cpumask_copy(tr->tracing_cpumask, tracing_cpumask_new);
3793 
3794 	mutex_unlock(&tracing_cpumask_update_lock);
3795 	free_cpumask_var(tracing_cpumask_new);
3796 
3797 	return count;
3798 
3799 err_unlock:
3800 	free_cpumask_var(tracing_cpumask_new);
3801 
3802 	return err;
3803 }
3804 
3805 static const struct file_operations tracing_cpumask_fops = {
3806 	.open		= tracing_open_generic_tr,
3807 	.read		= tracing_cpumask_read,
3808 	.write		= tracing_cpumask_write,
3809 	.release	= tracing_release_generic_tr,
3810 	.llseek		= generic_file_llseek,
3811 };
3812 
3813 static int tracing_trace_options_show(struct seq_file *m, void *v)
3814 {
3815 	struct tracer_opt *trace_opts;
3816 	struct trace_array *tr = m->private;
3817 	u32 tracer_flags;
3818 	int i;
3819 
3820 	mutex_lock(&trace_types_lock);
3821 	tracer_flags = tr->current_trace->flags->val;
3822 	trace_opts = tr->current_trace->flags->opts;
3823 
3824 	for (i = 0; trace_options[i]; i++) {
3825 		if (tr->trace_flags & (1 << i))
3826 			seq_printf(m, "%s\n", trace_options[i]);
3827 		else
3828 			seq_printf(m, "no%s\n", trace_options[i]);
3829 	}
3830 
3831 	for (i = 0; trace_opts[i].name; i++) {
3832 		if (tracer_flags & trace_opts[i].bit)
3833 			seq_printf(m, "%s\n", trace_opts[i].name);
3834 		else
3835 			seq_printf(m, "no%s\n", trace_opts[i].name);
3836 	}
3837 	mutex_unlock(&trace_types_lock);
3838 
3839 	return 0;
3840 }
3841 
3842 static int __set_tracer_option(struct trace_array *tr,
3843 			       struct tracer_flags *tracer_flags,
3844 			       struct tracer_opt *opts, int neg)
3845 {
3846 	struct tracer *trace = tracer_flags->trace;
3847 	int ret;
3848 
3849 	ret = trace->set_flag(tr, tracer_flags->val, opts->bit, !neg);
3850 	if (ret)
3851 		return ret;
3852 
3853 	if (neg)
3854 		tracer_flags->val &= ~opts->bit;
3855 	else
3856 		tracer_flags->val |= opts->bit;
3857 	return 0;
3858 }
3859 
3860 /* Try to assign a tracer specific option */
3861 static int set_tracer_option(struct trace_array *tr, char *cmp, int neg)
3862 {
3863 	struct tracer *trace = tr->current_trace;
3864 	struct tracer_flags *tracer_flags = trace->flags;
3865 	struct tracer_opt *opts = NULL;
3866 	int i;
3867 
3868 	for (i = 0; tracer_flags->opts[i].name; i++) {
3869 		opts = &tracer_flags->opts[i];
3870 
3871 		if (strcmp(cmp, opts->name) == 0)
3872 			return __set_tracer_option(tr, trace->flags, opts, neg);
3873 	}
3874 
3875 	return -EINVAL;
3876 }
3877 
3878 /* Some tracers require overwrite to stay enabled */
3879 int trace_keep_overwrite(struct tracer *tracer, u32 mask, int set)
3880 {
3881 	if (tracer->enabled && (mask & TRACE_ITER_OVERWRITE) && !set)
3882 		return -1;
3883 
3884 	return 0;
3885 }
3886 
3887 int set_tracer_flag(struct trace_array *tr, unsigned int mask, int enabled)
3888 {
3889 	/* do nothing if flag is already set */
3890 	if (!!(tr->trace_flags & mask) == !!enabled)
3891 		return 0;
3892 
3893 	/* Give the tracer a chance to approve the change */
3894 	if (tr->current_trace->flag_changed)
3895 		if (tr->current_trace->flag_changed(tr, mask, !!enabled))
3896 			return -EINVAL;
3897 
3898 	if (enabled)
3899 		tr->trace_flags |= mask;
3900 	else
3901 		tr->trace_flags &= ~mask;
3902 
3903 	if (mask == TRACE_ITER_RECORD_CMD)
3904 		trace_event_enable_cmd_record(enabled);
3905 
3906 	if (mask == TRACE_ITER_EVENT_FORK)
3907 		trace_event_follow_fork(tr, enabled);
3908 
3909 	if (mask == TRACE_ITER_OVERWRITE) {
3910 		ring_buffer_change_overwrite(tr->trace_buffer.buffer, enabled);
3911 #ifdef CONFIG_TRACER_MAX_TRACE
3912 		ring_buffer_change_overwrite(tr->max_buffer.buffer, enabled);
3913 #endif
3914 	}
3915 
3916 	if (mask == TRACE_ITER_PRINTK) {
3917 		trace_printk_start_stop_comm(enabled);
3918 		trace_printk_control(enabled);
3919 	}
3920 
3921 	return 0;
3922 }
3923 
3924 static int trace_set_options(struct trace_array *tr, char *option)
3925 {
3926 	char *cmp;
3927 	int neg = 0;
3928 	int ret = -ENODEV;
3929 	int i;
3930 	size_t orig_len = strlen(option);
3931 
3932 	cmp = strstrip(option);
3933 
3934 	if (strncmp(cmp, "no", 2) == 0) {
3935 		neg = 1;
3936 		cmp += 2;
3937 	}
3938 
3939 	mutex_lock(&trace_types_lock);
3940 
3941 	for (i = 0; trace_options[i]; i++) {
3942 		if (strcmp(cmp, trace_options[i]) == 0) {
3943 			ret = set_tracer_flag(tr, 1 << i, !neg);
3944 			break;
3945 		}
3946 	}
3947 
3948 	/* If no option could be set, test the specific tracer options */
3949 	if (!trace_options[i])
3950 		ret = set_tracer_option(tr, cmp, neg);
3951 
3952 	mutex_unlock(&trace_types_lock);
3953 
3954 	/*
3955 	 * If the first trailing whitespace is replaced with '\0' by strstrip,
3956 	 * turn it back into a space.
3957 	 */
3958 	if (orig_len > strlen(option))
3959 		option[strlen(option)] = ' ';
3960 
3961 	return ret;
3962 }
3963 
3964 static void __init apply_trace_boot_options(void)
3965 {
3966 	char *buf = trace_boot_options_buf;
3967 	char *option;
3968 
3969 	while (true) {
3970 		option = strsep(&buf, ",");
3971 
3972 		if (!option)
3973 			break;
3974 
3975 		if (*option)
3976 			trace_set_options(&global_trace, option);
3977 
3978 		/* Put back the comma to allow this to be called again */
3979 		if (buf)
3980 			*(buf - 1) = ',';
3981 	}
3982 }
3983 
3984 static ssize_t
3985 tracing_trace_options_write(struct file *filp, const char __user *ubuf,
3986 			size_t cnt, loff_t *ppos)
3987 {
3988 	struct seq_file *m = filp->private_data;
3989 	struct trace_array *tr = m->private;
3990 	char buf[64];
3991 	int ret;
3992 
3993 	if (cnt >= sizeof(buf))
3994 		return -EINVAL;
3995 
3996 	if (copy_from_user(buf, ubuf, cnt))
3997 		return -EFAULT;
3998 
3999 	buf[cnt] = 0;
4000 
4001 	ret = trace_set_options(tr, buf);
4002 	if (ret < 0)
4003 		return ret;
4004 
4005 	*ppos += cnt;
4006 
4007 	return cnt;
4008 }
4009 
4010 static int tracing_trace_options_open(struct inode *inode, struct file *file)
4011 {
4012 	struct trace_array *tr = inode->i_private;
4013 	int ret;
4014 
4015 	if (tracing_disabled)
4016 		return -ENODEV;
4017 
4018 	if (trace_array_get(tr) < 0)
4019 		return -ENODEV;
4020 
4021 	ret = single_open(file, tracing_trace_options_show, inode->i_private);
4022 	if (ret < 0)
4023 		trace_array_put(tr);
4024 
4025 	return ret;
4026 }
4027 
4028 static const struct file_operations tracing_iter_fops = {
4029 	.open		= tracing_trace_options_open,
4030 	.read		= seq_read,
4031 	.llseek		= seq_lseek,
4032 	.release	= tracing_single_release_tr,
4033 	.write		= tracing_trace_options_write,
4034 };
4035 
4036 static const char readme_msg[] =
4037 	"tracing mini-HOWTO:\n\n"
4038 	"# echo 0 > tracing_on : quick way to disable tracing\n"
4039 	"# echo 1 > tracing_on : quick way to re-enable tracing\n\n"
4040 	" Important files:\n"
4041 	"  trace\t\t\t- The static contents of the buffer\n"
4042 	"\t\t\t  To clear the buffer write into this file: echo > trace\n"
4043 	"  trace_pipe\t\t- A consuming read to see the contents of the buffer\n"
4044 	"  current_tracer\t- function and latency tracers\n"
4045 	"  available_tracers\t- list of configured tracers for current_tracer\n"
4046 	"  buffer_size_kb\t- view and modify size of per cpu buffer\n"
4047 	"  buffer_total_size_kb  - view total size of all cpu buffers\n\n"
4048 	"  trace_clock\t\t-change the clock used to order events\n"
4049 	"       local:   Per cpu clock but may not be synced across CPUs\n"
4050 	"      global:   Synced across CPUs but slows tracing down.\n"
4051 	"     counter:   Not a clock, but just an increment\n"
4052 	"      uptime:   Jiffy counter from time of boot\n"
4053 	"        perf:   Same clock that perf events use\n"
4054 #ifdef CONFIG_X86_64
4055 	"     x86-tsc:   TSC cycle counter\n"
4056 #endif
4057 	"\n  trace_marker\t\t- Writes into this file writes into the kernel buffer\n"
4058 	"  tracing_cpumask\t- Limit which CPUs to trace\n"
4059 	"  instances\t\t- Make sub-buffers with: mkdir instances/foo\n"
4060 	"\t\t\t  Remove sub-buffer with rmdir\n"
4061 	"  trace_options\t\t- Set format or modify how tracing happens\n"
4062 	"\t\t\t  Disable an option by adding a suffix 'no' to the\n"
4063 	"\t\t\t  option name\n"
4064 	"  saved_cmdlines_size\t- echo command number in here to store comm-pid list\n"
4065 #ifdef CONFIG_DYNAMIC_FTRACE
4066 	"\n  available_filter_functions - list of functions that can be filtered on\n"
4067 	"  set_ftrace_filter\t- echo function name in here to only trace these\n"
4068 	"\t\t\t  functions\n"
4069 	"\t     accepts: func_full_name, *func_end, func_begin*, *func_middle*\n"
4070 	"\t     modules: Can select a group via module\n"
4071 	"\t      Format: :mod:<module-name>\n"
4072 	"\t     example: echo :mod:ext3 > set_ftrace_filter\n"
4073 	"\t    triggers: a command to perform when function is hit\n"
4074 	"\t      Format: <function>:<trigger>[:count]\n"
4075 	"\t     trigger: traceon, traceoff\n"
4076 	"\t\t      enable_event:<system>:<event>\n"
4077 	"\t\t      disable_event:<system>:<event>\n"
4078 #ifdef CONFIG_STACKTRACE
4079 	"\t\t      stacktrace\n"
4080 #endif
4081 #ifdef CONFIG_TRACER_SNAPSHOT
4082 	"\t\t      snapshot\n"
4083 #endif
4084 	"\t\t      dump\n"
4085 	"\t\t      cpudump\n"
4086 	"\t     example: echo do_fault:traceoff > set_ftrace_filter\n"
4087 	"\t              echo do_trap:traceoff:3 > set_ftrace_filter\n"
4088 	"\t     The first one will disable tracing every time do_fault is hit\n"
4089 	"\t     The second will disable tracing at most 3 times when do_trap is hit\n"
4090 	"\t       The first time do trap is hit and it disables tracing, the\n"
4091 	"\t       counter will decrement to 2. If tracing is already disabled,\n"
4092 	"\t       the counter will not decrement. It only decrements when the\n"
4093 	"\t       trigger did work\n"
4094 	"\t     To remove trigger without count:\n"
4095 	"\t       echo '!<function>:<trigger> > set_ftrace_filter\n"
4096 	"\t     To remove trigger with a count:\n"
4097 	"\t       echo '!<function>:<trigger>:0 > set_ftrace_filter\n"
4098 	"  set_ftrace_notrace\t- echo function name in here to never trace.\n"
4099 	"\t    accepts: func_full_name, *func_end, func_begin*, *func_middle*\n"
4100 	"\t    modules: Can select a group via module command :mod:\n"
4101 	"\t    Does not accept triggers\n"
4102 #endif /* CONFIG_DYNAMIC_FTRACE */
4103 #ifdef CONFIG_FUNCTION_TRACER
4104 	"  set_ftrace_pid\t- Write pid(s) to only function trace those pids\n"
4105 	"\t\t    (function)\n"
4106 #endif
4107 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
4108 	"  set_graph_function\t- Trace the nested calls of a function (function_graph)\n"
4109 	"  set_graph_notrace\t- Do not trace the nested calls of a function (function_graph)\n"
4110 	"  max_graph_depth\t- Trace a limited depth of nested calls (0 is unlimited)\n"
4111 #endif
4112 #ifdef CONFIG_TRACER_SNAPSHOT
4113 	"\n  snapshot\t\t- Like 'trace' but shows the content of the static\n"
4114 	"\t\t\t  snapshot buffer. Read the contents for more\n"
4115 	"\t\t\t  information\n"
4116 #endif
4117 #ifdef CONFIG_STACK_TRACER
4118 	"  stack_trace\t\t- Shows the max stack trace when active\n"
4119 	"  stack_max_size\t- Shows current max stack size that was traced\n"
4120 	"\t\t\t  Write into this file to reset the max size (trigger a\n"
4121 	"\t\t\t  new trace)\n"
4122 #ifdef CONFIG_DYNAMIC_FTRACE
4123 	"  stack_trace_filter\t- Like set_ftrace_filter but limits what stack_trace\n"
4124 	"\t\t\t  traces\n"
4125 #endif
4126 #endif /* CONFIG_STACK_TRACER */
4127 #ifdef CONFIG_KPROBE_EVENT
4128 	"  kprobe_events\t\t- Add/remove/show the kernel dynamic events\n"
4129 	"\t\t\t  Write into this file to define/undefine new trace events.\n"
4130 #endif
4131 #ifdef CONFIG_UPROBE_EVENT
4132 	"  uprobe_events\t\t- Add/remove/show the userspace dynamic events\n"
4133 	"\t\t\t  Write into this file to define/undefine new trace events.\n"
4134 #endif
4135 #if defined(CONFIG_KPROBE_EVENT) || defined(CONFIG_UPROBE_EVENT)
4136 	"\t  accepts: event-definitions (one definition per line)\n"
4137 	"\t   Format: p|r[:[<group>/]<event>] <place> [<args>]\n"
4138 	"\t           -:[<group>/]<event>\n"
4139 #ifdef CONFIG_KPROBE_EVENT
4140 	"\t    place: [<module>:]<symbol>[+<offset>]|<memaddr>\n"
4141 #endif
4142 #ifdef CONFIG_UPROBE_EVENT
4143 	"\t    place: <path>:<offset>\n"
4144 #endif
4145 	"\t     args: <name>=fetcharg[:type]\n"
4146 	"\t fetcharg: %<register>, @<address>, @<symbol>[+|-<offset>],\n"
4147 	"\t           $stack<index>, $stack, $retval, $comm\n"
4148 	"\t     type: s8/16/32/64, u8/16/32/64, x8/16/32/64, string,\n"
4149 	"\t           b<bit-width>@<bit-offset>/<container-size>\n"
4150 #endif
4151 	"  events/\t\t- Directory containing all trace event subsystems:\n"
4152 	"      enable\t\t- Write 0/1 to enable/disable tracing of all events\n"
4153 	"  events/<system>/\t- Directory containing all trace events for <system>:\n"
4154 	"      enable\t\t- Write 0/1 to enable/disable tracing of all <system>\n"
4155 	"\t\t\t  events\n"
4156 	"      filter\t\t- If set, only events passing filter are traced\n"
4157 	"  events/<system>/<event>/\t- Directory containing control files for\n"
4158 	"\t\t\t  <event>:\n"
4159 	"      enable\t\t- Write 0/1 to enable/disable tracing of <event>\n"
4160 	"      filter\t\t- If set, only events passing filter are traced\n"
4161 	"      trigger\t\t- If set, a command to perform when event is hit\n"
4162 	"\t    Format: <trigger>[:count][if <filter>]\n"
4163 	"\t   trigger: traceon, traceoff\n"
4164 	"\t            enable_event:<system>:<event>\n"
4165 	"\t            disable_event:<system>:<event>\n"
4166 #ifdef CONFIG_HIST_TRIGGERS
4167 	"\t            enable_hist:<system>:<event>\n"
4168 	"\t            disable_hist:<system>:<event>\n"
4169 #endif
4170 #ifdef CONFIG_STACKTRACE
4171 	"\t\t    stacktrace\n"
4172 #endif
4173 #ifdef CONFIG_TRACER_SNAPSHOT
4174 	"\t\t    snapshot\n"
4175 #endif
4176 #ifdef CONFIG_HIST_TRIGGERS
4177 	"\t\t    hist (see below)\n"
4178 #endif
4179 	"\t   example: echo traceoff > events/block/block_unplug/trigger\n"
4180 	"\t            echo traceoff:3 > events/block/block_unplug/trigger\n"
4181 	"\t            echo 'enable_event:kmem:kmalloc:3 if nr_rq > 1' > \\\n"
4182 	"\t                  events/block/block_unplug/trigger\n"
4183 	"\t   The first disables tracing every time block_unplug is hit.\n"
4184 	"\t   The second disables tracing the first 3 times block_unplug is hit.\n"
4185 	"\t   The third enables the kmalloc event the first 3 times block_unplug\n"
4186 	"\t     is hit and has value of greater than 1 for the 'nr_rq' event field.\n"
4187 	"\t   Like function triggers, the counter is only decremented if it\n"
4188 	"\t    enabled or disabled tracing.\n"
4189 	"\t   To remove a trigger without a count:\n"
4190 	"\t     echo '!<trigger> > <system>/<event>/trigger\n"
4191 	"\t   To remove a trigger with a count:\n"
4192 	"\t     echo '!<trigger>:0 > <system>/<event>/trigger\n"
4193 	"\t   Filters can be ignored when removing a trigger.\n"
4194 #ifdef CONFIG_HIST_TRIGGERS
4195 	"      hist trigger\t- If set, event hits are aggregated into a hash table\n"
4196 	"\t    Format: hist:keys=<field1[,field2,...]>\n"
4197 	"\t            [:values=<field1[,field2,...]>]\n"
4198 	"\t            [:sort=<field1[,field2,...]>]\n"
4199 	"\t            [:size=#entries]\n"
4200 	"\t            [:pause][:continue][:clear]\n"
4201 	"\t            [:name=histname1]\n"
4202 	"\t            [if <filter>]\n\n"
4203 	"\t    When a matching event is hit, an entry is added to a hash\n"
4204 	"\t    table using the key(s) and value(s) named, and the value of a\n"
4205 	"\t    sum called 'hitcount' is incremented.  Keys and values\n"
4206 	"\t    correspond to fields in the event's format description.  Keys\n"
4207 	"\t    can be any field, or the special string 'stacktrace'.\n"
4208 	"\t    Compound keys consisting of up to two fields can be specified\n"
4209 	"\t    by the 'keys' keyword.  Values must correspond to numeric\n"
4210 	"\t    fields.  Sort keys consisting of up to two fields can be\n"
4211 	"\t    specified using the 'sort' keyword.  The sort direction can\n"
4212 	"\t    be modified by appending '.descending' or '.ascending' to a\n"
4213 	"\t    sort field.  The 'size' parameter can be used to specify more\n"
4214 	"\t    or fewer than the default 2048 entries for the hashtable size.\n"
4215 	"\t    If a hist trigger is given a name using the 'name' parameter,\n"
4216 	"\t    its histogram data will be shared with other triggers of the\n"
4217 	"\t    same name, and trigger hits will update this common data.\n\n"
4218 	"\t    Reading the 'hist' file for the event will dump the hash\n"
4219 	"\t    table in its entirety to stdout.  If there are multiple hist\n"
4220 	"\t    triggers attached to an event, there will be a table for each\n"
4221 	"\t    trigger in the output.  The table displayed for a named\n"
4222 	"\t    trigger will be the same as any other instance having the\n"
4223 	"\t    same name.  The default format used to display a given field\n"
4224 	"\t    can be modified by appending any of the following modifiers\n"
4225 	"\t    to the field name, as applicable:\n\n"
4226 	"\t            .hex        display a number as a hex value\n"
4227 	"\t            .sym        display an address as a symbol\n"
4228 	"\t            .sym-offset display an address as a symbol and offset\n"
4229 	"\t            .execname   display a common_pid as a program name\n"
4230 	"\t            .syscall    display a syscall id as a syscall name\n\n"
4231 	"\t            .log2       display log2 value rather than raw number\n\n"
4232 	"\t    The 'pause' parameter can be used to pause an existing hist\n"
4233 	"\t    trigger or to start a hist trigger but not log any events\n"
4234 	"\t    until told to do so.  'continue' can be used to start or\n"
4235 	"\t    restart a paused hist trigger.\n\n"
4236 	"\t    The 'clear' parameter will clear the contents of a running\n"
4237 	"\t    hist trigger and leave its current paused/active state\n"
4238 	"\t    unchanged.\n\n"
4239 	"\t    The enable_hist and disable_hist triggers can be used to\n"
4240 	"\t    have one event conditionally start and stop another event's\n"
4241 	"\t    already-attached hist trigger.  The syntax is analagous to\n"
4242 	"\t    the enable_event and disable_event triggers.\n"
4243 #endif
4244 ;
4245 
4246 static ssize_t
4247 tracing_readme_read(struct file *filp, char __user *ubuf,
4248 		       size_t cnt, loff_t *ppos)
4249 {
4250 	return simple_read_from_buffer(ubuf, cnt, ppos,
4251 					readme_msg, strlen(readme_msg));
4252 }
4253 
4254 static const struct file_operations tracing_readme_fops = {
4255 	.open		= tracing_open_generic,
4256 	.read		= tracing_readme_read,
4257 	.llseek		= generic_file_llseek,
4258 };
4259 
4260 static void *saved_cmdlines_next(struct seq_file *m, void *v, loff_t *pos)
4261 {
4262 	unsigned int *ptr = v;
4263 
4264 	if (*pos || m->count)
4265 		ptr++;
4266 
4267 	(*pos)++;
4268 
4269 	for (; ptr < &savedcmd->map_cmdline_to_pid[savedcmd->cmdline_num];
4270 	     ptr++) {
4271 		if (*ptr == -1 || *ptr == NO_CMDLINE_MAP)
4272 			continue;
4273 
4274 		return ptr;
4275 	}
4276 
4277 	return NULL;
4278 }
4279 
4280 static void *saved_cmdlines_start(struct seq_file *m, loff_t *pos)
4281 {
4282 	void *v;
4283 	loff_t l = 0;
4284 
4285 	preempt_disable();
4286 	arch_spin_lock(&trace_cmdline_lock);
4287 
4288 	v = &savedcmd->map_cmdline_to_pid[0];
4289 	while (l <= *pos) {
4290 		v = saved_cmdlines_next(m, v, &l);
4291 		if (!v)
4292 			return NULL;
4293 	}
4294 
4295 	return v;
4296 }
4297 
4298 static void saved_cmdlines_stop(struct seq_file *m, void *v)
4299 {
4300 	arch_spin_unlock(&trace_cmdline_lock);
4301 	preempt_enable();
4302 }
4303 
4304 static int saved_cmdlines_show(struct seq_file *m, void *v)
4305 {
4306 	char buf[TASK_COMM_LEN];
4307 	unsigned int *pid = v;
4308 
4309 	__trace_find_cmdline(*pid, buf);
4310 	seq_printf(m, "%d %s\n", *pid, buf);
4311 	return 0;
4312 }
4313 
4314 static const struct seq_operations tracing_saved_cmdlines_seq_ops = {
4315 	.start		= saved_cmdlines_start,
4316 	.next		= saved_cmdlines_next,
4317 	.stop		= saved_cmdlines_stop,
4318 	.show		= saved_cmdlines_show,
4319 };
4320 
4321 static int tracing_saved_cmdlines_open(struct inode *inode, struct file *filp)
4322 {
4323 	if (tracing_disabled)
4324 		return -ENODEV;
4325 
4326 	return seq_open(filp, &tracing_saved_cmdlines_seq_ops);
4327 }
4328 
4329 static const struct file_operations tracing_saved_cmdlines_fops = {
4330 	.open		= tracing_saved_cmdlines_open,
4331 	.read		= seq_read,
4332 	.llseek		= seq_lseek,
4333 	.release	= seq_release,
4334 };
4335 
4336 static ssize_t
4337 tracing_saved_cmdlines_size_read(struct file *filp, char __user *ubuf,
4338 				 size_t cnt, loff_t *ppos)
4339 {
4340 	char buf[64];
4341 	int r;
4342 
4343 	arch_spin_lock(&trace_cmdline_lock);
4344 	r = scnprintf(buf, sizeof(buf), "%u\n", savedcmd->cmdline_num);
4345 	arch_spin_unlock(&trace_cmdline_lock);
4346 
4347 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
4348 }
4349 
4350 static void free_saved_cmdlines_buffer(struct saved_cmdlines_buffer *s)
4351 {
4352 	kfree(s->saved_cmdlines);
4353 	kfree(s->map_cmdline_to_pid);
4354 	kfree(s);
4355 }
4356 
4357 static int tracing_resize_saved_cmdlines(unsigned int val)
4358 {
4359 	struct saved_cmdlines_buffer *s, *savedcmd_temp;
4360 
4361 	s = kmalloc(sizeof(*s), GFP_KERNEL);
4362 	if (!s)
4363 		return -ENOMEM;
4364 
4365 	if (allocate_cmdlines_buffer(val, s) < 0) {
4366 		kfree(s);
4367 		return -ENOMEM;
4368 	}
4369 
4370 	arch_spin_lock(&trace_cmdline_lock);
4371 	savedcmd_temp = savedcmd;
4372 	savedcmd = s;
4373 	arch_spin_unlock(&trace_cmdline_lock);
4374 	free_saved_cmdlines_buffer(savedcmd_temp);
4375 
4376 	return 0;
4377 }
4378 
4379 static ssize_t
4380 tracing_saved_cmdlines_size_write(struct file *filp, const char __user *ubuf,
4381 				  size_t cnt, loff_t *ppos)
4382 {
4383 	unsigned long val;
4384 	int ret;
4385 
4386 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
4387 	if (ret)
4388 		return ret;
4389 
4390 	/* must have at least 1 entry or less than PID_MAX_DEFAULT */
4391 	if (!val || val > PID_MAX_DEFAULT)
4392 		return -EINVAL;
4393 
4394 	ret = tracing_resize_saved_cmdlines((unsigned int)val);
4395 	if (ret < 0)
4396 		return ret;
4397 
4398 	*ppos += cnt;
4399 
4400 	return cnt;
4401 }
4402 
4403 static const struct file_operations tracing_saved_cmdlines_size_fops = {
4404 	.open		= tracing_open_generic,
4405 	.read		= tracing_saved_cmdlines_size_read,
4406 	.write		= tracing_saved_cmdlines_size_write,
4407 };
4408 
4409 #ifdef CONFIG_TRACE_ENUM_MAP_FILE
4410 static union trace_enum_map_item *
4411 update_enum_map(union trace_enum_map_item *ptr)
4412 {
4413 	if (!ptr->map.enum_string) {
4414 		if (ptr->tail.next) {
4415 			ptr = ptr->tail.next;
4416 			/* Set ptr to the next real item (skip head) */
4417 			ptr++;
4418 		} else
4419 			return NULL;
4420 	}
4421 	return ptr;
4422 }
4423 
4424 static void *enum_map_next(struct seq_file *m, void *v, loff_t *pos)
4425 {
4426 	union trace_enum_map_item *ptr = v;
4427 
4428 	/*
4429 	 * Paranoid! If ptr points to end, we don't want to increment past it.
4430 	 * This really should never happen.
4431 	 */
4432 	ptr = update_enum_map(ptr);
4433 	if (WARN_ON_ONCE(!ptr))
4434 		return NULL;
4435 
4436 	ptr++;
4437 
4438 	(*pos)++;
4439 
4440 	ptr = update_enum_map(ptr);
4441 
4442 	return ptr;
4443 }
4444 
4445 static void *enum_map_start(struct seq_file *m, loff_t *pos)
4446 {
4447 	union trace_enum_map_item *v;
4448 	loff_t l = 0;
4449 
4450 	mutex_lock(&trace_enum_mutex);
4451 
4452 	v = trace_enum_maps;
4453 	if (v)
4454 		v++;
4455 
4456 	while (v && l < *pos) {
4457 		v = enum_map_next(m, v, &l);
4458 	}
4459 
4460 	return v;
4461 }
4462 
4463 static void enum_map_stop(struct seq_file *m, void *v)
4464 {
4465 	mutex_unlock(&trace_enum_mutex);
4466 }
4467 
4468 static int enum_map_show(struct seq_file *m, void *v)
4469 {
4470 	union trace_enum_map_item *ptr = v;
4471 
4472 	seq_printf(m, "%s %ld (%s)\n",
4473 		   ptr->map.enum_string, ptr->map.enum_value,
4474 		   ptr->map.system);
4475 
4476 	return 0;
4477 }
4478 
4479 static const struct seq_operations tracing_enum_map_seq_ops = {
4480 	.start		= enum_map_start,
4481 	.next		= enum_map_next,
4482 	.stop		= enum_map_stop,
4483 	.show		= enum_map_show,
4484 };
4485 
4486 static int tracing_enum_map_open(struct inode *inode, struct file *filp)
4487 {
4488 	if (tracing_disabled)
4489 		return -ENODEV;
4490 
4491 	return seq_open(filp, &tracing_enum_map_seq_ops);
4492 }
4493 
4494 static const struct file_operations tracing_enum_map_fops = {
4495 	.open		= tracing_enum_map_open,
4496 	.read		= seq_read,
4497 	.llseek		= seq_lseek,
4498 	.release	= seq_release,
4499 };
4500 
4501 static inline union trace_enum_map_item *
4502 trace_enum_jmp_to_tail(union trace_enum_map_item *ptr)
4503 {
4504 	/* Return tail of array given the head */
4505 	return ptr + ptr->head.length + 1;
4506 }
4507 
4508 static void
4509 trace_insert_enum_map_file(struct module *mod, struct trace_enum_map **start,
4510 			   int len)
4511 {
4512 	struct trace_enum_map **stop;
4513 	struct trace_enum_map **map;
4514 	union trace_enum_map_item *map_array;
4515 	union trace_enum_map_item *ptr;
4516 
4517 	stop = start + len;
4518 
4519 	/*
4520 	 * The trace_enum_maps contains the map plus a head and tail item,
4521 	 * where the head holds the module and length of array, and the
4522 	 * tail holds a pointer to the next list.
4523 	 */
4524 	map_array = kmalloc(sizeof(*map_array) * (len + 2), GFP_KERNEL);
4525 	if (!map_array) {
4526 		pr_warn("Unable to allocate trace enum mapping\n");
4527 		return;
4528 	}
4529 
4530 	mutex_lock(&trace_enum_mutex);
4531 
4532 	if (!trace_enum_maps)
4533 		trace_enum_maps = map_array;
4534 	else {
4535 		ptr = trace_enum_maps;
4536 		for (;;) {
4537 			ptr = trace_enum_jmp_to_tail(ptr);
4538 			if (!ptr->tail.next)
4539 				break;
4540 			ptr = ptr->tail.next;
4541 
4542 		}
4543 		ptr->tail.next = map_array;
4544 	}
4545 	map_array->head.mod = mod;
4546 	map_array->head.length = len;
4547 	map_array++;
4548 
4549 	for (map = start; (unsigned long)map < (unsigned long)stop; map++) {
4550 		map_array->map = **map;
4551 		map_array++;
4552 	}
4553 	memset(map_array, 0, sizeof(*map_array));
4554 
4555 	mutex_unlock(&trace_enum_mutex);
4556 }
4557 
4558 static void trace_create_enum_file(struct dentry *d_tracer)
4559 {
4560 	trace_create_file("enum_map", 0444, d_tracer,
4561 			  NULL, &tracing_enum_map_fops);
4562 }
4563 
4564 #else /* CONFIG_TRACE_ENUM_MAP_FILE */
4565 static inline void trace_create_enum_file(struct dentry *d_tracer) { }
4566 static inline void trace_insert_enum_map_file(struct module *mod,
4567 			      struct trace_enum_map **start, int len) { }
4568 #endif /* !CONFIG_TRACE_ENUM_MAP_FILE */
4569 
4570 static void trace_insert_enum_map(struct module *mod,
4571 				  struct trace_enum_map **start, int len)
4572 {
4573 	struct trace_enum_map **map;
4574 
4575 	if (len <= 0)
4576 		return;
4577 
4578 	map = start;
4579 
4580 	trace_event_enum_update(map, len);
4581 
4582 	trace_insert_enum_map_file(mod, start, len);
4583 }
4584 
4585 static ssize_t
4586 tracing_set_trace_read(struct file *filp, char __user *ubuf,
4587 		       size_t cnt, loff_t *ppos)
4588 {
4589 	struct trace_array *tr = filp->private_data;
4590 	char buf[MAX_TRACER_SIZE+2];
4591 	int r;
4592 
4593 	mutex_lock(&trace_types_lock);
4594 	r = sprintf(buf, "%s\n", tr->current_trace->name);
4595 	mutex_unlock(&trace_types_lock);
4596 
4597 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
4598 }
4599 
4600 int tracer_init(struct tracer *t, struct trace_array *tr)
4601 {
4602 	tracing_reset_online_cpus(&tr->trace_buffer);
4603 	return t->init(tr);
4604 }
4605 
4606 static void set_buffer_entries(struct trace_buffer *buf, unsigned long val)
4607 {
4608 	int cpu;
4609 
4610 	for_each_tracing_cpu(cpu)
4611 		per_cpu_ptr(buf->data, cpu)->entries = val;
4612 }
4613 
4614 #ifdef CONFIG_TRACER_MAX_TRACE
4615 /* resize @tr's buffer to the size of @size_tr's entries */
4616 static int resize_buffer_duplicate_size(struct trace_buffer *trace_buf,
4617 					struct trace_buffer *size_buf, int cpu_id)
4618 {
4619 	int cpu, ret = 0;
4620 
4621 	if (cpu_id == RING_BUFFER_ALL_CPUS) {
4622 		for_each_tracing_cpu(cpu) {
4623 			ret = ring_buffer_resize(trace_buf->buffer,
4624 				 per_cpu_ptr(size_buf->data, cpu)->entries, cpu);
4625 			if (ret < 0)
4626 				break;
4627 			per_cpu_ptr(trace_buf->data, cpu)->entries =
4628 				per_cpu_ptr(size_buf->data, cpu)->entries;
4629 		}
4630 	} else {
4631 		ret = ring_buffer_resize(trace_buf->buffer,
4632 				 per_cpu_ptr(size_buf->data, cpu_id)->entries, cpu_id);
4633 		if (ret == 0)
4634 			per_cpu_ptr(trace_buf->data, cpu_id)->entries =
4635 				per_cpu_ptr(size_buf->data, cpu_id)->entries;
4636 	}
4637 
4638 	return ret;
4639 }
4640 #endif /* CONFIG_TRACER_MAX_TRACE */
4641 
4642 static int __tracing_resize_ring_buffer(struct trace_array *tr,
4643 					unsigned long size, int cpu)
4644 {
4645 	int ret;
4646 
4647 	/*
4648 	 * If kernel or user changes the size of the ring buffer
4649 	 * we use the size that was given, and we can forget about
4650 	 * expanding it later.
4651 	 */
4652 	ring_buffer_expanded = true;
4653 
4654 	/* May be called before buffers are initialized */
4655 	if (!tr->trace_buffer.buffer)
4656 		return 0;
4657 
4658 	ret = ring_buffer_resize(tr->trace_buffer.buffer, size, cpu);
4659 	if (ret < 0)
4660 		return ret;
4661 
4662 #ifdef CONFIG_TRACER_MAX_TRACE
4663 	if (!(tr->flags & TRACE_ARRAY_FL_GLOBAL) ||
4664 	    !tr->current_trace->use_max_tr)
4665 		goto out;
4666 
4667 	ret = ring_buffer_resize(tr->max_buffer.buffer, size, cpu);
4668 	if (ret < 0) {
4669 		int r = resize_buffer_duplicate_size(&tr->trace_buffer,
4670 						     &tr->trace_buffer, cpu);
4671 		if (r < 0) {
4672 			/*
4673 			 * AARGH! We are left with different
4674 			 * size max buffer!!!!
4675 			 * The max buffer is our "snapshot" buffer.
4676 			 * When a tracer needs a snapshot (one of the
4677 			 * latency tracers), it swaps the max buffer
4678 			 * with the saved snap shot. We succeeded to
4679 			 * update the size of the main buffer, but failed to
4680 			 * update the size of the max buffer. But when we tried
4681 			 * to reset the main buffer to the original size, we
4682 			 * failed there too. This is very unlikely to
4683 			 * happen, but if it does, warn and kill all
4684 			 * tracing.
4685 			 */
4686 			WARN_ON(1);
4687 			tracing_disabled = 1;
4688 		}
4689 		return ret;
4690 	}
4691 
4692 	if (cpu == RING_BUFFER_ALL_CPUS)
4693 		set_buffer_entries(&tr->max_buffer, size);
4694 	else
4695 		per_cpu_ptr(tr->max_buffer.data, cpu)->entries = size;
4696 
4697  out:
4698 #endif /* CONFIG_TRACER_MAX_TRACE */
4699 
4700 	if (cpu == RING_BUFFER_ALL_CPUS)
4701 		set_buffer_entries(&tr->trace_buffer, size);
4702 	else
4703 		per_cpu_ptr(tr->trace_buffer.data, cpu)->entries = size;
4704 
4705 	return ret;
4706 }
4707 
4708 static ssize_t tracing_resize_ring_buffer(struct trace_array *tr,
4709 					  unsigned long size, int cpu_id)
4710 {
4711 	int ret = size;
4712 
4713 	mutex_lock(&trace_types_lock);
4714 
4715 	if (cpu_id != RING_BUFFER_ALL_CPUS) {
4716 		/* make sure, this cpu is enabled in the mask */
4717 		if (!cpumask_test_cpu(cpu_id, tracing_buffer_mask)) {
4718 			ret = -EINVAL;
4719 			goto out;
4720 		}
4721 	}
4722 
4723 	ret = __tracing_resize_ring_buffer(tr, size, cpu_id);
4724 	if (ret < 0)
4725 		ret = -ENOMEM;
4726 
4727 out:
4728 	mutex_unlock(&trace_types_lock);
4729 
4730 	return ret;
4731 }
4732 
4733 
4734 /**
4735  * tracing_update_buffers - used by tracing facility to expand ring buffers
4736  *
4737  * To save on memory when the tracing is never used on a system with it
4738  * configured in. The ring buffers are set to a minimum size. But once
4739  * a user starts to use the tracing facility, then they need to grow
4740  * to their default size.
4741  *
4742  * This function is to be called when a tracer is about to be used.
4743  */
4744 int tracing_update_buffers(void)
4745 {
4746 	int ret = 0;
4747 
4748 	mutex_lock(&trace_types_lock);
4749 	if (!ring_buffer_expanded)
4750 		ret = __tracing_resize_ring_buffer(&global_trace, trace_buf_size,
4751 						RING_BUFFER_ALL_CPUS);
4752 	mutex_unlock(&trace_types_lock);
4753 
4754 	return ret;
4755 }
4756 
4757 struct trace_option_dentry;
4758 
4759 static void
4760 create_trace_option_files(struct trace_array *tr, struct tracer *tracer);
4761 
4762 /*
4763  * Used to clear out the tracer before deletion of an instance.
4764  * Must have trace_types_lock held.
4765  */
4766 static void tracing_set_nop(struct trace_array *tr)
4767 {
4768 	if (tr->current_trace == &nop_trace)
4769 		return;
4770 
4771 	tr->current_trace->enabled--;
4772 
4773 	if (tr->current_trace->reset)
4774 		tr->current_trace->reset(tr);
4775 
4776 	tr->current_trace = &nop_trace;
4777 }
4778 
4779 static void add_tracer_options(struct trace_array *tr, struct tracer *t)
4780 {
4781 	/* Only enable if the directory has been created already. */
4782 	if (!tr->dir)
4783 		return;
4784 
4785 	create_trace_option_files(tr, t);
4786 }
4787 
4788 static int tracing_set_tracer(struct trace_array *tr, const char *buf)
4789 {
4790 	struct tracer *t;
4791 #ifdef CONFIG_TRACER_MAX_TRACE
4792 	bool had_max_tr;
4793 #endif
4794 	int ret = 0;
4795 
4796 	mutex_lock(&trace_types_lock);
4797 
4798 	if (!ring_buffer_expanded) {
4799 		ret = __tracing_resize_ring_buffer(tr, trace_buf_size,
4800 						RING_BUFFER_ALL_CPUS);
4801 		if (ret < 0)
4802 			goto out;
4803 		ret = 0;
4804 	}
4805 
4806 	for (t = trace_types; t; t = t->next) {
4807 		if (strcmp(t->name, buf) == 0)
4808 			break;
4809 	}
4810 	if (!t) {
4811 		ret = -EINVAL;
4812 		goto out;
4813 	}
4814 	if (t == tr->current_trace)
4815 		goto out;
4816 
4817 	/* Some tracers are only allowed for the top level buffer */
4818 	if (!trace_ok_for_array(t, tr)) {
4819 		ret = -EINVAL;
4820 		goto out;
4821 	}
4822 
4823 	/* If trace pipe files are being read, we can't change the tracer */
4824 	if (tr->current_trace->ref) {
4825 		ret = -EBUSY;
4826 		goto out;
4827 	}
4828 
4829 	trace_branch_disable();
4830 
4831 	tr->current_trace->enabled--;
4832 
4833 	if (tr->current_trace->reset)
4834 		tr->current_trace->reset(tr);
4835 
4836 	/* Current trace needs to be nop_trace before synchronize_sched */
4837 	tr->current_trace = &nop_trace;
4838 
4839 #ifdef CONFIG_TRACER_MAX_TRACE
4840 	had_max_tr = tr->allocated_snapshot;
4841 
4842 	if (had_max_tr && !t->use_max_tr) {
4843 		/*
4844 		 * We need to make sure that the update_max_tr sees that
4845 		 * current_trace changed to nop_trace to keep it from
4846 		 * swapping the buffers after we resize it.
4847 		 * The update_max_tr is called from interrupts disabled
4848 		 * so a synchronized_sched() is sufficient.
4849 		 */
4850 		synchronize_sched();
4851 		free_snapshot(tr);
4852 	}
4853 #endif
4854 
4855 #ifdef CONFIG_TRACER_MAX_TRACE
4856 	if (t->use_max_tr && !had_max_tr) {
4857 		ret = alloc_snapshot(tr);
4858 		if (ret < 0)
4859 			goto out;
4860 	}
4861 #endif
4862 
4863 	if (t->init) {
4864 		ret = tracer_init(t, tr);
4865 		if (ret)
4866 			goto out;
4867 	}
4868 
4869 	tr->current_trace = t;
4870 	tr->current_trace->enabled++;
4871 	trace_branch_enable(tr);
4872  out:
4873 	mutex_unlock(&trace_types_lock);
4874 
4875 	return ret;
4876 }
4877 
4878 static ssize_t
4879 tracing_set_trace_write(struct file *filp, const char __user *ubuf,
4880 			size_t cnt, loff_t *ppos)
4881 {
4882 	struct trace_array *tr = filp->private_data;
4883 	char buf[MAX_TRACER_SIZE+1];
4884 	int i;
4885 	size_t ret;
4886 	int err;
4887 
4888 	ret = cnt;
4889 
4890 	if (cnt > MAX_TRACER_SIZE)
4891 		cnt = MAX_TRACER_SIZE;
4892 
4893 	if (copy_from_user(buf, ubuf, cnt))
4894 		return -EFAULT;
4895 
4896 	buf[cnt] = 0;
4897 
4898 	/* strip ending whitespace. */
4899 	for (i = cnt - 1; i > 0 && isspace(buf[i]); i--)
4900 		buf[i] = 0;
4901 
4902 	err = tracing_set_tracer(tr, buf);
4903 	if (err)
4904 		return err;
4905 
4906 	*ppos += ret;
4907 
4908 	return ret;
4909 }
4910 
4911 static ssize_t
4912 tracing_nsecs_read(unsigned long *ptr, char __user *ubuf,
4913 		   size_t cnt, loff_t *ppos)
4914 {
4915 	char buf[64];
4916 	int r;
4917 
4918 	r = snprintf(buf, sizeof(buf), "%ld\n",
4919 		     *ptr == (unsigned long)-1 ? -1 : nsecs_to_usecs(*ptr));
4920 	if (r > sizeof(buf))
4921 		r = sizeof(buf);
4922 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
4923 }
4924 
4925 static ssize_t
4926 tracing_nsecs_write(unsigned long *ptr, const char __user *ubuf,
4927 		    size_t cnt, loff_t *ppos)
4928 {
4929 	unsigned long val;
4930 	int ret;
4931 
4932 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
4933 	if (ret)
4934 		return ret;
4935 
4936 	*ptr = val * 1000;
4937 
4938 	return cnt;
4939 }
4940 
4941 static ssize_t
4942 tracing_thresh_read(struct file *filp, char __user *ubuf,
4943 		    size_t cnt, loff_t *ppos)
4944 {
4945 	return tracing_nsecs_read(&tracing_thresh, ubuf, cnt, ppos);
4946 }
4947 
4948 static ssize_t
4949 tracing_thresh_write(struct file *filp, const char __user *ubuf,
4950 		     size_t cnt, loff_t *ppos)
4951 {
4952 	struct trace_array *tr = filp->private_data;
4953 	int ret;
4954 
4955 	mutex_lock(&trace_types_lock);
4956 	ret = tracing_nsecs_write(&tracing_thresh, ubuf, cnt, ppos);
4957 	if (ret < 0)
4958 		goto out;
4959 
4960 	if (tr->current_trace->update_thresh) {
4961 		ret = tr->current_trace->update_thresh(tr);
4962 		if (ret < 0)
4963 			goto out;
4964 	}
4965 
4966 	ret = cnt;
4967 out:
4968 	mutex_unlock(&trace_types_lock);
4969 
4970 	return ret;
4971 }
4972 
4973 #if defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)
4974 
4975 static ssize_t
4976 tracing_max_lat_read(struct file *filp, char __user *ubuf,
4977 		     size_t cnt, loff_t *ppos)
4978 {
4979 	return tracing_nsecs_read(filp->private_data, ubuf, cnt, ppos);
4980 }
4981 
4982 static ssize_t
4983 tracing_max_lat_write(struct file *filp, const char __user *ubuf,
4984 		      size_t cnt, loff_t *ppos)
4985 {
4986 	return tracing_nsecs_write(filp->private_data, ubuf, cnt, ppos);
4987 }
4988 
4989 #endif
4990 
4991 static int tracing_open_pipe(struct inode *inode, struct file *filp)
4992 {
4993 	struct trace_array *tr = inode->i_private;
4994 	struct trace_iterator *iter;
4995 	int ret = 0;
4996 
4997 	if (tracing_disabled)
4998 		return -ENODEV;
4999 
5000 	if (trace_array_get(tr) < 0)
5001 		return -ENODEV;
5002 
5003 	mutex_lock(&trace_types_lock);
5004 
5005 	/* create a buffer to store the information to pass to userspace */
5006 	iter = kzalloc(sizeof(*iter), GFP_KERNEL);
5007 	if (!iter) {
5008 		ret = -ENOMEM;
5009 		__trace_array_put(tr);
5010 		goto out;
5011 	}
5012 
5013 	trace_seq_init(&iter->seq);
5014 	iter->trace = tr->current_trace;
5015 
5016 	if (!alloc_cpumask_var(&iter->started, GFP_KERNEL)) {
5017 		ret = -ENOMEM;
5018 		goto fail;
5019 	}
5020 
5021 	/* trace pipe does not show start of buffer */
5022 	cpumask_setall(iter->started);
5023 
5024 	if (tr->trace_flags & TRACE_ITER_LATENCY_FMT)
5025 		iter->iter_flags |= TRACE_FILE_LAT_FMT;
5026 
5027 	/* Output in nanoseconds only if we are using a clock in nanoseconds. */
5028 	if (trace_clocks[tr->clock_id].in_ns)
5029 		iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
5030 
5031 	iter->tr = tr;
5032 	iter->trace_buffer = &tr->trace_buffer;
5033 	iter->cpu_file = tracing_get_cpu(inode);
5034 	mutex_init(&iter->mutex);
5035 	filp->private_data = iter;
5036 
5037 	if (iter->trace->pipe_open)
5038 		iter->trace->pipe_open(iter);
5039 
5040 	nonseekable_open(inode, filp);
5041 
5042 	tr->current_trace->ref++;
5043 out:
5044 	mutex_unlock(&trace_types_lock);
5045 	return ret;
5046 
5047 fail:
5048 	kfree(iter->trace);
5049 	kfree(iter);
5050 	__trace_array_put(tr);
5051 	mutex_unlock(&trace_types_lock);
5052 	return ret;
5053 }
5054 
5055 static int tracing_release_pipe(struct inode *inode, struct file *file)
5056 {
5057 	struct trace_iterator *iter = file->private_data;
5058 	struct trace_array *tr = inode->i_private;
5059 
5060 	mutex_lock(&trace_types_lock);
5061 
5062 	tr->current_trace->ref--;
5063 
5064 	if (iter->trace->pipe_close)
5065 		iter->trace->pipe_close(iter);
5066 
5067 	mutex_unlock(&trace_types_lock);
5068 
5069 	free_cpumask_var(iter->started);
5070 	mutex_destroy(&iter->mutex);
5071 	kfree(iter);
5072 
5073 	trace_array_put(tr);
5074 
5075 	return 0;
5076 }
5077 
5078 static unsigned int
5079 trace_poll(struct trace_iterator *iter, struct file *filp, poll_table *poll_table)
5080 {
5081 	struct trace_array *tr = iter->tr;
5082 
5083 	/* Iterators are static, they should be filled or empty */
5084 	if (trace_buffer_iter(iter, iter->cpu_file))
5085 		return POLLIN | POLLRDNORM;
5086 
5087 	if (tr->trace_flags & TRACE_ITER_BLOCK)
5088 		/*
5089 		 * Always select as readable when in blocking mode
5090 		 */
5091 		return POLLIN | POLLRDNORM;
5092 	else
5093 		return ring_buffer_poll_wait(iter->trace_buffer->buffer, iter->cpu_file,
5094 					     filp, poll_table);
5095 }
5096 
5097 static unsigned int
5098 tracing_poll_pipe(struct file *filp, poll_table *poll_table)
5099 {
5100 	struct trace_iterator *iter = filp->private_data;
5101 
5102 	return trace_poll(iter, filp, poll_table);
5103 }
5104 
5105 /* Must be called with iter->mutex held. */
5106 static int tracing_wait_pipe(struct file *filp)
5107 {
5108 	struct trace_iterator *iter = filp->private_data;
5109 	int ret;
5110 
5111 	while (trace_empty(iter)) {
5112 
5113 		if ((filp->f_flags & O_NONBLOCK)) {
5114 			return -EAGAIN;
5115 		}
5116 
5117 		/*
5118 		 * We block until we read something and tracing is disabled.
5119 		 * We still block if tracing is disabled, but we have never
5120 		 * read anything. This allows a user to cat this file, and
5121 		 * then enable tracing. But after we have read something,
5122 		 * we give an EOF when tracing is again disabled.
5123 		 *
5124 		 * iter->pos will be 0 if we haven't read anything.
5125 		 */
5126 		if (!tracing_is_on() && iter->pos)
5127 			break;
5128 
5129 		mutex_unlock(&iter->mutex);
5130 
5131 		ret = wait_on_pipe(iter, false);
5132 
5133 		mutex_lock(&iter->mutex);
5134 
5135 		if (ret)
5136 			return ret;
5137 	}
5138 
5139 	return 1;
5140 }
5141 
5142 /*
5143  * Consumer reader.
5144  */
5145 static ssize_t
5146 tracing_read_pipe(struct file *filp, char __user *ubuf,
5147 		  size_t cnt, loff_t *ppos)
5148 {
5149 	struct trace_iterator *iter = filp->private_data;
5150 	ssize_t sret;
5151 
5152 	/*
5153 	 * Avoid more than one consumer on a single file descriptor
5154 	 * This is just a matter of traces coherency, the ring buffer itself
5155 	 * is protected.
5156 	 */
5157 	mutex_lock(&iter->mutex);
5158 
5159 	/* return any leftover data */
5160 	sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
5161 	if (sret != -EBUSY)
5162 		goto out;
5163 
5164 	trace_seq_init(&iter->seq);
5165 
5166 	if (iter->trace->read) {
5167 		sret = iter->trace->read(iter, filp, ubuf, cnt, ppos);
5168 		if (sret)
5169 			goto out;
5170 	}
5171 
5172 waitagain:
5173 	sret = tracing_wait_pipe(filp);
5174 	if (sret <= 0)
5175 		goto out;
5176 
5177 	/* stop when tracing is finished */
5178 	if (trace_empty(iter)) {
5179 		sret = 0;
5180 		goto out;
5181 	}
5182 
5183 	if (cnt >= PAGE_SIZE)
5184 		cnt = PAGE_SIZE - 1;
5185 
5186 	/* reset all but tr, trace, and overruns */
5187 	memset(&iter->seq, 0,
5188 	       sizeof(struct trace_iterator) -
5189 	       offsetof(struct trace_iterator, seq));
5190 	cpumask_clear(iter->started);
5191 	iter->pos = -1;
5192 
5193 	trace_event_read_lock();
5194 	trace_access_lock(iter->cpu_file);
5195 	while (trace_find_next_entry_inc(iter) != NULL) {
5196 		enum print_line_t ret;
5197 		int save_len = iter->seq.seq.len;
5198 
5199 		ret = print_trace_line(iter);
5200 		if (ret == TRACE_TYPE_PARTIAL_LINE) {
5201 			/* don't print partial lines */
5202 			iter->seq.seq.len = save_len;
5203 			break;
5204 		}
5205 		if (ret != TRACE_TYPE_NO_CONSUME)
5206 			trace_consume(iter);
5207 
5208 		if (trace_seq_used(&iter->seq) >= cnt)
5209 			break;
5210 
5211 		/*
5212 		 * Setting the full flag means we reached the trace_seq buffer
5213 		 * size and we should leave by partial output condition above.
5214 		 * One of the trace_seq_* functions is not used properly.
5215 		 */
5216 		WARN_ONCE(iter->seq.full, "full flag set for trace type %d",
5217 			  iter->ent->type);
5218 	}
5219 	trace_access_unlock(iter->cpu_file);
5220 	trace_event_read_unlock();
5221 
5222 	/* Now copy what we have to the user */
5223 	sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
5224 	if (iter->seq.seq.readpos >= trace_seq_used(&iter->seq))
5225 		trace_seq_init(&iter->seq);
5226 
5227 	/*
5228 	 * If there was nothing to send to user, in spite of consuming trace
5229 	 * entries, go back to wait for more entries.
5230 	 */
5231 	if (sret == -EBUSY)
5232 		goto waitagain;
5233 
5234 out:
5235 	mutex_unlock(&iter->mutex);
5236 
5237 	return sret;
5238 }
5239 
5240 static void tracing_spd_release_pipe(struct splice_pipe_desc *spd,
5241 				     unsigned int idx)
5242 {
5243 	__free_page(spd->pages[idx]);
5244 }
5245 
5246 static const struct pipe_buf_operations tracing_pipe_buf_ops = {
5247 	.can_merge		= 0,
5248 	.confirm		= generic_pipe_buf_confirm,
5249 	.release		= generic_pipe_buf_release,
5250 	.steal			= generic_pipe_buf_steal,
5251 	.get			= generic_pipe_buf_get,
5252 };
5253 
5254 static size_t
5255 tracing_fill_pipe_page(size_t rem, struct trace_iterator *iter)
5256 {
5257 	size_t count;
5258 	int save_len;
5259 	int ret;
5260 
5261 	/* Seq buffer is page-sized, exactly what we need. */
5262 	for (;;) {
5263 		save_len = iter->seq.seq.len;
5264 		ret = print_trace_line(iter);
5265 
5266 		if (trace_seq_has_overflowed(&iter->seq)) {
5267 			iter->seq.seq.len = save_len;
5268 			break;
5269 		}
5270 
5271 		/*
5272 		 * This should not be hit, because it should only
5273 		 * be set if the iter->seq overflowed. But check it
5274 		 * anyway to be safe.
5275 		 */
5276 		if (ret == TRACE_TYPE_PARTIAL_LINE) {
5277 			iter->seq.seq.len = save_len;
5278 			break;
5279 		}
5280 
5281 		count = trace_seq_used(&iter->seq) - save_len;
5282 		if (rem < count) {
5283 			rem = 0;
5284 			iter->seq.seq.len = save_len;
5285 			break;
5286 		}
5287 
5288 		if (ret != TRACE_TYPE_NO_CONSUME)
5289 			trace_consume(iter);
5290 		rem -= count;
5291 		if (!trace_find_next_entry_inc(iter))	{
5292 			rem = 0;
5293 			iter->ent = NULL;
5294 			break;
5295 		}
5296 	}
5297 
5298 	return rem;
5299 }
5300 
5301 static ssize_t tracing_splice_read_pipe(struct file *filp,
5302 					loff_t *ppos,
5303 					struct pipe_inode_info *pipe,
5304 					size_t len,
5305 					unsigned int flags)
5306 {
5307 	struct page *pages_def[PIPE_DEF_BUFFERS];
5308 	struct partial_page partial_def[PIPE_DEF_BUFFERS];
5309 	struct trace_iterator *iter = filp->private_data;
5310 	struct splice_pipe_desc spd = {
5311 		.pages		= pages_def,
5312 		.partial	= partial_def,
5313 		.nr_pages	= 0, /* This gets updated below. */
5314 		.nr_pages_max	= PIPE_DEF_BUFFERS,
5315 		.flags		= flags,
5316 		.ops		= &tracing_pipe_buf_ops,
5317 		.spd_release	= tracing_spd_release_pipe,
5318 	};
5319 	ssize_t ret;
5320 	size_t rem;
5321 	unsigned int i;
5322 
5323 	if (splice_grow_spd(pipe, &spd))
5324 		return -ENOMEM;
5325 
5326 	mutex_lock(&iter->mutex);
5327 
5328 	if (iter->trace->splice_read) {
5329 		ret = iter->trace->splice_read(iter, filp,
5330 					       ppos, pipe, len, flags);
5331 		if (ret)
5332 			goto out_err;
5333 	}
5334 
5335 	ret = tracing_wait_pipe(filp);
5336 	if (ret <= 0)
5337 		goto out_err;
5338 
5339 	if (!iter->ent && !trace_find_next_entry_inc(iter)) {
5340 		ret = -EFAULT;
5341 		goto out_err;
5342 	}
5343 
5344 	trace_event_read_lock();
5345 	trace_access_lock(iter->cpu_file);
5346 
5347 	/* Fill as many pages as possible. */
5348 	for (i = 0, rem = len; i < spd.nr_pages_max && rem; i++) {
5349 		spd.pages[i] = alloc_page(GFP_KERNEL);
5350 		if (!spd.pages[i])
5351 			break;
5352 
5353 		rem = tracing_fill_pipe_page(rem, iter);
5354 
5355 		/* Copy the data into the page, so we can start over. */
5356 		ret = trace_seq_to_buffer(&iter->seq,
5357 					  page_address(spd.pages[i]),
5358 					  trace_seq_used(&iter->seq));
5359 		if (ret < 0) {
5360 			__free_page(spd.pages[i]);
5361 			break;
5362 		}
5363 		spd.partial[i].offset = 0;
5364 		spd.partial[i].len = trace_seq_used(&iter->seq);
5365 
5366 		trace_seq_init(&iter->seq);
5367 	}
5368 
5369 	trace_access_unlock(iter->cpu_file);
5370 	trace_event_read_unlock();
5371 	mutex_unlock(&iter->mutex);
5372 
5373 	spd.nr_pages = i;
5374 
5375 	if (i)
5376 		ret = splice_to_pipe(pipe, &spd);
5377 	else
5378 		ret = 0;
5379 out:
5380 	splice_shrink_spd(&spd);
5381 	return ret;
5382 
5383 out_err:
5384 	mutex_unlock(&iter->mutex);
5385 	goto out;
5386 }
5387 
5388 static ssize_t
5389 tracing_entries_read(struct file *filp, char __user *ubuf,
5390 		     size_t cnt, loff_t *ppos)
5391 {
5392 	struct inode *inode = file_inode(filp);
5393 	struct trace_array *tr = inode->i_private;
5394 	int cpu = tracing_get_cpu(inode);
5395 	char buf[64];
5396 	int r = 0;
5397 	ssize_t ret;
5398 
5399 	mutex_lock(&trace_types_lock);
5400 
5401 	if (cpu == RING_BUFFER_ALL_CPUS) {
5402 		int cpu, buf_size_same;
5403 		unsigned long size;
5404 
5405 		size = 0;
5406 		buf_size_same = 1;
5407 		/* check if all cpu sizes are same */
5408 		for_each_tracing_cpu(cpu) {
5409 			/* fill in the size from first enabled cpu */
5410 			if (size == 0)
5411 				size = per_cpu_ptr(tr->trace_buffer.data, cpu)->entries;
5412 			if (size != per_cpu_ptr(tr->trace_buffer.data, cpu)->entries) {
5413 				buf_size_same = 0;
5414 				break;
5415 			}
5416 		}
5417 
5418 		if (buf_size_same) {
5419 			if (!ring_buffer_expanded)
5420 				r = sprintf(buf, "%lu (expanded: %lu)\n",
5421 					    size >> 10,
5422 					    trace_buf_size >> 10);
5423 			else
5424 				r = sprintf(buf, "%lu\n", size >> 10);
5425 		} else
5426 			r = sprintf(buf, "X\n");
5427 	} else
5428 		r = sprintf(buf, "%lu\n", per_cpu_ptr(tr->trace_buffer.data, cpu)->entries >> 10);
5429 
5430 	mutex_unlock(&trace_types_lock);
5431 
5432 	ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
5433 	return ret;
5434 }
5435 
5436 static ssize_t
5437 tracing_entries_write(struct file *filp, const char __user *ubuf,
5438 		      size_t cnt, loff_t *ppos)
5439 {
5440 	struct inode *inode = file_inode(filp);
5441 	struct trace_array *tr = inode->i_private;
5442 	unsigned long val;
5443 	int ret;
5444 
5445 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
5446 	if (ret)
5447 		return ret;
5448 
5449 	/* must have at least 1 entry */
5450 	if (!val)
5451 		return -EINVAL;
5452 
5453 	/* value is in KB */
5454 	val <<= 10;
5455 	ret = tracing_resize_ring_buffer(tr, val, tracing_get_cpu(inode));
5456 	if (ret < 0)
5457 		return ret;
5458 
5459 	*ppos += cnt;
5460 
5461 	return cnt;
5462 }
5463 
5464 static ssize_t
5465 tracing_total_entries_read(struct file *filp, char __user *ubuf,
5466 				size_t cnt, loff_t *ppos)
5467 {
5468 	struct trace_array *tr = filp->private_data;
5469 	char buf[64];
5470 	int r, cpu;
5471 	unsigned long size = 0, expanded_size = 0;
5472 
5473 	mutex_lock(&trace_types_lock);
5474 	for_each_tracing_cpu(cpu) {
5475 		size += per_cpu_ptr(tr->trace_buffer.data, cpu)->entries >> 10;
5476 		if (!ring_buffer_expanded)
5477 			expanded_size += trace_buf_size >> 10;
5478 	}
5479 	if (ring_buffer_expanded)
5480 		r = sprintf(buf, "%lu\n", size);
5481 	else
5482 		r = sprintf(buf, "%lu (expanded: %lu)\n", size, expanded_size);
5483 	mutex_unlock(&trace_types_lock);
5484 
5485 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
5486 }
5487 
5488 static ssize_t
5489 tracing_free_buffer_write(struct file *filp, const char __user *ubuf,
5490 			  size_t cnt, loff_t *ppos)
5491 {
5492 	/*
5493 	 * There is no need to read what the user has written, this function
5494 	 * is just to make sure that there is no error when "echo" is used
5495 	 */
5496 
5497 	*ppos += cnt;
5498 
5499 	return cnt;
5500 }
5501 
5502 static int
5503 tracing_free_buffer_release(struct inode *inode, struct file *filp)
5504 {
5505 	struct trace_array *tr = inode->i_private;
5506 
5507 	/* disable tracing ? */
5508 	if (tr->trace_flags & TRACE_ITER_STOP_ON_FREE)
5509 		tracer_tracing_off(tr);
5510 	/* resize the ring buffer to 0 */
5511 	tracing_resize_ring_buffer(tr, 0, RING_BUFFER_ALL_CPUS);
5512 
5513 	trace_array_put(tr);
5514 
5515 	return 0;
5516 }
5517 
5518 static ssize_t
5519 tracing_mark_write(struct file *filp, const char __user *ubuf,
5520 					size_t cnt, loff_t *fpos)
5521 {
5522 	unsigned long addr = (unsigned long)ubuf;
5523 	struct trace_array *tr = filp->private_data;
5524 	struct ring_buffer_event *event;
5525 	struct ring_buffer *buffer;
5526 	struct print_entry *entry;
5527 	unsigned long irq_flags;
5528 	struct page *pages[2];
5529 	void *map_page[2];
5530 	int nr_pages = 1;
5531 	ssize_t written;
5532 	int offset;
5533 	int size;
5534 	int len;
5535 	int ret;
5536 	int i;
5537 
5538 	if (tracing_disabled)
5539 		return -EINVAL;
5540 
5541 	if (!(tr->trace_flags & TRACE_ITER_MARKERS))
5542 		return -EINVAL;
5543 
5544 	if (cnt > TRACE_BUF_SIZE)
5545 		cnt = TRACE_BUF_SIZE;
5546 
5547 	/*
5548 	 * Userspace is injecting traces into the kernel trace buffer.
5549 	 * We want to be as non intrusive as possible.
5550 	 * To do so, we do not want to allocate any special buffers
5551 	 * or take any locks, but instead write the userspace data
5552 	 * straight into the ring buffer.
5553 	 *
5554 	 * First we need to pin the userspace buffer into memory,
5555 	 * which, most likely it is, because it just referenced it.
5556 	 * But there's no guarantee that it is. By using get_user_pages_fast()
5557 	 * and kmap_atomic/kunmap_atomic() we can get access to the
5558 	 * pages directly. We then write the data directly into the
5559 	 * ring buffer.
5560 	 */
5561 	BUILD_BUG_ON(TRACE_BUF_SIZE >= PAGE_SIZE);
5562 
5563 	/* check if we cross pages */
5564 	if ((addr & PAGE_MASK) != ((addr + cnt) & PAGE_MASK))
5565 		nr_pages = 2;
5566 
5567 	offset = addr & (PAGE_SIZE - 1);
5568 	addr &= PAGE_MASK;
5569 
5570 	ret = get_user_pages_fast(addr, nr_pages, 0, pages);
5571 	if (ret < nr_pages) {
5572 		while (--ret >= 0)
5573 			put_page(pages[ret]);
5574 		written = -EFAULT;
5575 		goto out;
5576 	}
5577 
5578 	for (i = 0; i < nr_pages; i++)
5579 		map_page[i] = kmap_atomic(pages[i]);
5580 
5581 	local_save_flags(irq_flags);
5582 	size = sizeof(*entry) + cnt + 2; /* possible \n added */
5583 	buffer = tr->trace_buffer.buffer;
5584 	event = trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
5585 					  irq_flags, preempt_count());
5586 	if (!event) {
5587 		/* Ring buffer disabled, return as if not open for write */
5588 		written = -EBADF;
5589 		goto out_unlock;
5590 	}
5591 
5592 	entry = ring_buffer_event_data(event);
5593 	entry->ip = _THIS_IP_;
5594 
5595 	if (nr_pages == 2) {
5596 		len = PAGE_SIZE - offset;
5597 		memcpy(&entry->buf, map_page[0] + offset, len);
5598 		memcpy(&entry->buf[len], map_page[1], cnt - len);
5599 	} else
5600 		memcpy(&entry->buf, map_page[0] + offset, cnt);
5601 
5602 	if (entry->buf[cnt - 1] != '\n') {
5603 		entry->buf[cnt] = '\n';
5604 		entry->buf[cnt + 1] = '\0';
5605 	} else
5606 		entry->buf[cnt] = '\0';
5607 
5608 	__buffer_unlock_commit(buffer, event);
5609 
5610 	written = cnt;
5611 
5612 	*fpos += written;
5613 
5614  out_unlock:
5615 	for (i = nr_pages - 1; i >= 0; i--) {
5616 		kunmap_atomic(map_page[i]);
5617 		put_page(pages[i]);
5618 	}
5619  out:
5620 	return written;
5621 }
5622 
5623 static int tracing_clock_show(struct seq_file *m, void *v)
5624 {
5625 	struct trace_array *tr = m->private;
5626 	int i;
5627 
5628 	for (i = 0; i < ARRAY_SIZE(trace_clocks); i++)
5629 		seq_printf(m,
5630 			"%s%s%s%s", i ? " " : "",
5631 			i == tr->clock_id ? "[" : "", trace_clocks[i].name,
5632 			i == tr->clock_id ? "]" : "");
5633 	seq_putc(m, '\n');
5634 
5635 	return 0;
5636 }
5637 
5638 static int tracing_set_clock(struct trace_array *tr, const char *clockstr)
5639 {
5640 	int i;
5641 
5642 	for (i = 0; i < ARRAY_SIZE(trace_clocks); i++) {
5643 		if (strcmp(trace_clocks[i].name, clockstr) == 0)
5644 			break;
5645 	}
5646 	if (i == ARRAY_SIZE(trace_clocks))
5647 		return -EINVAL;
5648 
5649 	mutex_lock(&trace_types_lock);
5650 
5651 	tr->clock_id = i;
5652 
5653 	ring_buffer_set_clock(tr->trace_buffer.buffer, trace_clocks[i].func);
5654 
5655 	/*
5656 	 * New clock may not be consistent with the previous clock.
5657 	 * Reset the buffer so that it doesn't have incomparable timestamps.
5658 	 */
5659 	tracing_reset_online_cpus(&tr->trace_buffer);
5660 
5661 #ifdef CONFIG_TRACER_MAX_TRACE
5662 	if (tr->flags & TRACE_ARRAY_FL_GLOBAL && tr->max_buffer.buffer)
5663 		ring_buffer_set_clock(tr->max_buffer.buffer, trace_clocks[i].func);
5664 	tracing_reset_online_cpus(&tr->max_buffer);
5665 #endif
5666 
5667 	mutex_unlock(&trace_types_lock);
5668 
5669 	return 0;
5670 }
5671 
5672 static ssize_t tracing_clock_write(struct file *filp, const char __user *ubuf,
5673 				   size_t cnt, loff_t *fpos)
5674 {
5675 	struct seq_file *m = filp->private_data;
5676 	struct trace_array *tr = m->private;
5677 	char buf[64];
5678 	const char *clockstr;
5679 	int ret;
5680 
5681 	if (cnt >= sizeof(buf))
5682 		return -EINVAL;
5683 
5684 	if (copy_from_user(buf, ubuf, cnt))
5685 		return -EFAULT;
5686 
5687 	buf[cnt] = 0;
5688 
5689 	clockstr = strstrip(buf);
5690 
5691 	ret = tracing_set_clock(tr, clockstr);
5692 	if (ret)
5693 		return ret;
5694 
5695 	*fpos += cnt;
5696 
5697 	return cnt;
5698 }
5699 
5700 static int tracing_clock_open(struct inode *inode, struct file *file)
5701 {
5702 	struct trace_array *tr = inode->i_private;
5703 	int ret;
5704 
5705 	if (tracing_disabled)
5706 		return -ENODEV;
5707 
5708 	if (trace_array_get(tr))
5709 		return -ENODEV;
5710 
5711 	ret = single_open(file, tracing_clock_show, inode->i_private);
5712 	if (ret < 0)
5713 		trace_array_put(tr);
5714 
5715 	return ret;
5716 }
5717 
5718 struct ftrace_buffer_info {
5719 	struct trace_iterator	iter;
5720 	void			*spare;
5721 	unsigned int		read;
5722 };
5723 
5724 #ifdef CONFIG_TRACER_SNAPSHOT
5725 static int tracing_snapshot_open(struct inode *inode, struct file *file)
5726 {
5727 	struct trace_array *tr = inode->i_private;
5728 	struct trace_iterator *iter;
5729 	struct seq_file *m;
5730 	int ret = 0;
5731 
5732 	if (trace_array_get(tr) < 0)
5733 		return -ENODEV;
5734 
5735 	if (file->f_mode & FMODE_READ) {
5736 		iter = __tracing_open(inode, file, true);
5737 		if (IS_ERR(iter))
5738 			ret = PTR_ERR(iter);
5739 	} else {
5740 		/* Writes still need the seq_file to hold the private data */
5741 		ret = -ENOMEM;
5742 		m = kzalloc(sizeof(*m), GFP_KERNEL);
5743 		if (!m)
5744 			goto out;
5745 		iter = kzalloc(sizeof(*iter), GFP_KERNEL);
5746 		if (!iter) {
5747 			kfree(m);
5748 			goto out;
5749 		}
5750 		ret = 0;
5751 
5752 		iter->tr = tr;
5753 		iter->trace_buffer = &tr->max_buffer;
5754 		iter->cpu_file = tracing_get_cpu(inode);
5755 		m->private = iter;
5756 		file->private_data = m;
5757 	}
5758 out:
5759 	if (ret < 0)
5760 		trace_array_put(tr);
5761 
5762 	return ret;
5763 }
5764 
5765 static ssize_t
5766 tracing_snapshot_write(struct file *filp, const char __user *ubuf, size_t cnt,
5767 		       loff_t *ppos)
5768 {
5769 	struct seq_file *m = filp->private_data;
5770 	struct trace_iterator *iter = m->private;
5771 	struct trace_array *tr = iter->tr;
5772 	unsigned long val;
5773 	int ret;
5774 
5775 	ret = tracing_update_buffers();
5776 	if (ret < 0)
5777 		return ret;
5778 
5779 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
5780 	if (ret)
5781 		return ret;
5782 
5783 	mutex_lock(&trace_types_lock);
5784 
5785 	if (tr->current_trace->use_max_tr) {
5786 		ret = -EBUSY;
5787 		goto out;
5788 	}
5789 
5790 	switch (val) {
5791 	case 0:
5792 		if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
5793 			ret = -EINVAL;
5794 			break;
5795 		}
5796 		if (tr->allocated_snapshot)
5797 			free_snapshot(tr);
5798 		break;
5799 	case 1:
5800 /* Only allow per-cpu swap if the ring buffer supports it */
5801 #ifndef CONFIG_RING_BUFFER_ALLOW_SWAP
5802 		if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
5803 			ret = -EINVAL;
5804 			break;
5805 		}
5806 #endif
5807 		if (!tr->allocated_snapshot) {
5808 			ret = alloc_snapshot(tr);
5809 			if (ret < 0)
5810 				break;
5811 		}
5812 		local_irq_disable();
5813 		/* Now, we're going to swap */
5814 		if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
5815 			update_max_tr(tr, current, smp_processor_id());
5816 		else
5817 			update_max_tr_single(tr, current, iter->cpu_file);
5818 		local_irq_enable();
5819 		break;
5820 	default:
5821 		if (tr->allocated_snapshot) {
5822 			if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
5823 				tracing_reset_online_cpus(&tr->max_buffer);
5824 			else
5825 				tracing_reset(&tr->max_buffer, iter->cpu_file);
5826 		}
5827 		break;
5828 	}
5829 
5830 	if (ret >= 0) {
5831 		*ppos += cnt;
5832 		ret = cnt;
5833 	}
5834 out:
5835 	mutex_unlock(&trace_types_lock);
5836 	return ret;
5837 }
5838 
5839 static int tracing_snapshot_release(struct inode *inode, struct file *file)
5840 {
5841 	struct seq_file *m = file->private_data;
5842 	int ret;
5843 
5844 	ret = tracing_release(inode, file);
5845 
5846 	if (file->f_mode & FMODE_READ)
5847 		return ret;
5848 
5849 	/* If write only, the seq_file is just a stub */
5850 	if (m)
5851 		kfree(m->private);
5852 	kfree(m);
5853 
5854 	return 0;
5855 }
5856 
5857 static int tracing_buffers_open(struct inode *inode, struct file *filp);
5858 static ssize_t tracing_buffers_read(struct file *filp, char __user *ubuf,
5859 				    size_t count, loff_t *ppos);
5860 static int tracing_buffers_release(struct inode *inode, struct file *file);
5861 static ssize_t tracing_buffers_splice_read(struct file *file, loff_t *ppos,
5862 		   struct pipe_inode_info *pipe, size_t len, unsigned int flags);
5863 
5864 static int snapshot_raw_open(struct inode *inode, struct file *filp)
5865 {
5866 	struct ftrace_buffer_info *info;
5867 	int ret;
5868 
5869 	ret = tracing_buffers_open(inode, filp);
5870 	if (ret < 0)
5871 		return ret;
5872 
5873 	info = filp->private_data;
5874 
5875 	if (info->iter.trace->use_max_tr) {
5876 		tracing_buffers_release(inode, filp);
5877 		return -EBUSY;
5878 	}
5879 
5880 	info->iter.snapshot = true;
5881 	info->iter.trace_buffer = &info->iter.tr->max_buffer;
5882 
5883 	return ret;
5884 }
5885 
5886 #endif /* CONFIG_TRACER_SNAPSHOT */
5887 
5888 
5889 static const struct file_operations tracing_thresh_fops = {
5890 	.open		= tracing_open_generic,
5891 	.read		= tracing_thresh_read,
5892 	.write		= tracing_thresh_write,
5893 	.llseek		= generic_file_llseek,
5894 };
5895 
5896 #if defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)
5897 static const struct file_operations tracing_max_lat_fops = {
5898 	.open		= tracing_open_generic,
5899 	.read		= tracing_max_lat_read,
5900 	.write		= tracing_max_lat_write,
5901 	.llseek		= generic_file_llseek,
5902 };
5903 #endif
5904 
5905 static const struct file_operations set_tracer_fops = {
5906 	.open		= tracing_open_generic,
5907 	.read		= tracing_set_trace_read,
5908 	.write		= tracing_set_trace_write,
5909 	.llseek		= generic_file_llseek,
5910 };
5911 
5912 static const struct file_operations tracing_pipe_fops = {
5913 	.open		= tracing_open_pipe,
5914 	.poll		= tracing_poll_pipe,
5915 	.read		= tracing_read_pipe,
5916 	.splice_read	= tracing_splice_read_pipe,
5917 	.release	= tracing_release_pipe,
5918 	.llseek		= no_llseek,
5919 };
5920 
5921 static const struct file_operations tracing_entries_fops = {
5922 	.open		= tracing_open_generic_tr,
5923 	.read		= tracing_entries_read,
5924 	.write		= tracing_entries_write,
5925 	.llseek		= generic_file_llseek,
5926 	.release	= tracing_release_generic_tr,
5927 };
5928 
5929 static const struct file_operations tracing_total_entries_fops = {
5930 	.open		= tracing_open_generic_tr,
5931 	.read		= tracing_total_entries_read,
5932 	.llseek		= generic_file_llseek,
5933 	.release	= tracing_release_generic_tr,
5934 };
5935 
5936 static const struct file_operations tracing_free_buffer_fops = {
5937 	.open		= tracing_open_generic_tr,
5938 	.write		= tracing_free_buffer_write,
5939 	.release	= tracing_free_buffer_release,
5940 };
5941 
5942 static const struct file_operations tracing_mark_fops = {
5943 	.open		= tracing_open_generic_tr,
5944 	.write		= tracing_mark_write,
5945 	.llseek		= generic_file_llseek,
5946 	.release	= tracing_release_generic_tr,
5947 };
5948 
5949 static const struct file_operations trace_clock_fops = {
5950 	.open		= tracing_clock_open,
5951 	.read		= seq_read,
5952 	.llseek		= seq_lseek,
5953 	.release	= tracing_single_release_tr,
5954 	.write		= tracing_clock_write,
5955 };
5956 
5957 #ifdef CONFIG_TRACER_SNAPSHOT
5958 static const struct file_operations snapshot_fops = {
5959 	.open		= tracing_snapshot_open,
5960 	.read		= seq_read,
5961 	.write		= tracing_snapshot_write,
5962 	.llseek		= tracing_lseek,
5963 	.release	= tracing_snapshot_release,
5964 };
5965 
5966 static const struct file_operations snapshot_raw_fops = {
5967 	.open		= snapshot_raw_open,
5968 	.read		= tracing_buffers_read,
5969 	.release	= tracing_buffers_release,
5970 	.splice_read	= tracing_buffers_splice_read,
5971 	.llseek		= no_llseek,
5972 };
5973 
5974 #endif /* CONFIG_TRACER_SNAPSHOT */
5975 
5976 static int tracing_buffers_open(struct inode *inode, struct file *filp)
5977 {
5978 	struct trace_array *tr = inode->i_private;
5979 	struct ftrace_buffer_info *info;
5980 	int ret;
5981 
5982 	if (tracing_disabled)
5983 		return -ENODEV;
5984 
5985 	if (trace_array_get(tr) < 0)
5986 		return -ENODEV;
5987 
5988 	info = kzalloc(sizeof(*info), GFP_KERNEL);
5989 	if (!info) {
5990 		trace_array_put(tr);
5991 		return -ENOMEM;
5992 	}
5993 
5994 	mutex_lock(&trace_types_lock);
5995 
5996 	info->iter.tr		= tr;
5997 	info->iter.cpu_file	= tracing_get_cpu(inode);
5998 	info->iter.trace	= tr->current_trace;
5999 	info->iter.trace_buffer = &tr->trace_buffer;
6000 	info->spare		= NULL;
6001 	/* Force reading ring buffer for first read */
6002 	info->read		= (unsigned int)-1;
6003 
6004 	filp->private_data = info;
6005 
6006 	tr->current_trace->ref++;
6007 
6008 	mutex_unlock(&trace_types_lock);
6009 
6010 	ret = nonseekable_open(inode, filp);
6011 	if (ret < 0)
6012 		trace_array_put(tr);
6013 
6014 	return ret;
6015 }
6016 
6017 static unsigned int
6018 tracing_buffers_poll(struct file *filp, poll_table *poll_table)
6019 {
6020 	struct ftrace_buffer_info *info = filp->private_data;
6021 	struct trace_iterator *iter = &info->iter;
6022 
6023 	return trace_poll(iter, filp, poll_table);
6024 }
6025 
6026 static ssize_t
6027 tracing_buffers_read(struct file *filp, char __user *ubuf,
6028 		     size_t count, loff_t *ppos)
6029 {
6030 	struct ftrace_buffer_info *info = filp->private_data;
6031 	struct trace_iterator *iter = &info->iter;
6032 	ssize_t ret;
6033 	ssize_t size;
6034 
6035 	if (!count)
6036 		return 0;
6037 
6038 #ifdef CONFIG_TRACER_MAX_TRACE
6039 	if (iter->snapshot && iter->tr->current_trace->use_max_tr)
6040 		return -EBUSY;
6041 #endif
6042 
6043 	if (!info->spare)
6044 		info->spare = ring_buffer_alloc_read_page(iter->trace_buffer->buffer,
6045 							  iter->cpu_file);
6046 	if (!info->spare)
6047 		return -ENOMEM;
6048 
6049 	/* Do we have previous read data to read? */
6050 	if (info->read < PAGE_SIZE)
6051 		goto read;
6052 
6053  again:
6054 	trace_access_lock(iter->cpu_file);
6055 	ret = ring_buffer_read_page(iter->trace_buffer->buffer,
6056 				    &info->spare,
6057 				    count,
6058 				    iter->cpu_file, 0);
6059 	trace_access_unlock(iter->cpu_file);
6060 
6061 	if (ret < 0) {
6062 		if (trace_empty(iter)) {
6063 			if ((filp->f_flags & O_NONBLOCK))
6064 				return -EAGAIN;
6065 
6066 			ret = wait_on_pipe(iter, false);
6067 			if (ret)
6068 				return ret;
6069 
6070 			goto again;
6071 		}
6072 		return 0;
6073 	}
6074 
6075 	info->read = 0;
6076  read:
6077 	size = PAGE_SIZE - info->read;
6078 	if (size > count)
6079 		size = count;
6080 
6081 	ret = copy_to_user(ubuf, info->spare + info->read, size);
6082 	if (ret == size)
6083 		return -EFAULT;
6084 
6085 	size -= ret;
6086 
6087 	*ppos += size;
6088 	info->read += size;
6089 
6090 	return size;
6091 }
6092 
6093 static int tracing_buffers_release(struct inode *inode, struct file *file)
6094 {
6095 	struct ftrace_buffer_info *info = file->private_data;
6096 	struct trace_iterator *iter = &info->iter;
6097 
6098 	mutex_lock(&trace_types_lock);
6099 
6100 	iter->tr->current_trace->ref--;
6101 
6102 	__trace_array_put(iter->tr);
6103 
6104 	if (info->spare)
6105 		ring_buffer_free_read_page(iter->trace_buffer->buffer, info->spare);
6106 	kfree(info);
6107 
6108 	mutex_unlock(&trace_types_lock);
6109 
6110 	return 0;
6111 }
6112 
6113 struct buffer_ref {
6114 	struct ring_buffer	*buffer;
6115 	void			*page;
6116 	int			ref;
6117 };
6118 
6119 static void buffer_pipe_buf_release(struct pipe_inode_info *pipe,
6120 				    struct pipe_buffer *buf)
6121 {
6122 	struct buffer_ref *ref = (struct buffer_ref *)buf->private;
6123 
6124 	if (--ref->ref)
6125 		return;
6126 
6127 	ring_buffer_free_read_page(ref->buffer, ref->page);
6128 	kfree(ref);
6129 	buf->private = 0;
6130 }
6131 
6132 static void buffer_pipe_buf_get(struct pipe_inode_info *pipe,
6133 				struct pipe_buffer *buf)
6134 {
6135 	struct buffer_ref *ref = (struct buffer_ref *)buf->private;
6136 
6137 	ref->ref++;
6138 }
6139 
6140 /* Pipe buffer operations for a buffer. */
6141 static const struct pipe_buf_operations buffer_pipe_buf_ops = {
6142 	.can_merge		= 0,
6143 	.confirm		= generic_pipe_buf_confirm,
6144 	.release		= buffer_pipe_buf_release,
6145 	.steal			= generic_pipe_buf_steal,
6146 	.get			= buffer_pipe_buf_get,
6147 };
6148 
6149 /*
6150  * Callback from splice_to_pipe(), if we need to release some pages
6151  * at the end of the spd in case we error'ed out in filling the pipe.
6152  */
6153 static void buffer_spd_release(struct splice_pipe_desc *spd, unsigned int i)
6154 {
6155 	struct buffer_ref *ref =
6156 		(struct buffer_ref *)spd->partial[i].private;
6157 
6158 	if (--ref->ref)
6159 		return;
6160 
6161 	ring_buffer_free_read_page(ref->buffer, ref->page);
6162 	kfree(ref);
6163 	spd->partial[i].private = 0;
6164 }
6165 
6166 static ssize_t
6167 tracing_buffers_splice_read(struct file *file, loff_t *ppos,
6168 			    struct pipe_inode_info *pipe, size_t len,
6169 			    unsigned int flags)
6170 {
6171 	struct ftrace_buffer_info *info = file->private_data;
6172 	struct trace_iterator *iter = &info->iter;
6173 	struct partial_page partial_def[PIPE_DEF_BUFFERS];
6174 	struct page *pages_def[PIPE_DEF_BUFFERS];
6175 	struct splice_pipe_desc spd = {
6176 		.pages		= pages_def,
6177 		.partial	= partial_def,
6178 		.nr_pages_max	= PIPE_DEF_BUFFERS,
6179 		.flags		= flags,
6180 		.ops		= &buffer_pipe_buf_ops,
6181 		.spd_release	= buffer_spd_release,
6182 	};
6183 	struct buffer_ref *ref;
6184 	int entries, size, i;
6185 	ssize_t ret = 0;
6186 
6187 #ifdef CONFIG_TRACER_MAX_TRACE
6188 	if (iter->snapshot && iter->tr->current_trace->use_max_tr)
6189 		return -EBUSY;
6190 #endif
6191 
6192 	if (*ppos & (PAGE_SIZE - 1))
6193 		return -EINVAL;
6194 
6195 	if (len & (PAGE_SIZE - 1)) {
6196 		if (len < PAGE_SIZE)
6197 			return -EINVAL;
6198 		len &= PAGE_MASK;
6199 	}
6200 
6201 	if (splice_grow_spd(pipe, &spd))
6202 		return -ENOMEM;
6203 
6204  again:
6205 	trace_access_lock(iter->cpu_file);
6206 	entries = ring_buffer_entries_cpu(iter->trace_buffer->buffer, iter->cpu_file);
6207 
6208 	for (i = 0; i < spd.nr_pages_max && len && entries; i++, len -= PAGE_SIZE) {
6209 		struct page *page;
6210 		int r;
6211 
6212 		ref = kzalloc(sizeof(*ref), GFP_KERNEL);
6213 		if (!ref) {
6214 			ret = -ENOMEM;
6215 			break;
6216 		}
6217 
6218 		ref->ref = 1;
6219 		ref->buffer = iter->trace_buffer->buffer;
6220 		ref->page = ring_buffer_alloc_read_page(ref->buffer, iter->cpu_file);
6221 		if (!ref->page) {
6222 			ret = -ENOMEM;
6223 			kfree(ref);
6224 			break;
6225 		}
6226 
6227 		r = ring_buffer_read_page(ref->buffer, &ref->page,
6228 					  len, iter->cpu_file, 1);
6229 		if (r < 0) {
6230 			ring_buffer_free_read_page(ref->buffer, ref->page);
6231 			kfree(ref);
6232 			break;
6233 		}
6234 
6235 		/*
6236 		 * zero out any left over data, this is going to
6237 		 * user land.
6238 		 */
6239 		size = ring_buffer_page_len(ref->page);
6240 		if (size < PAGE_SIZE)
6241 			memset(ref->page + size, 0, PAGE_SIZE - size);
6242 
6243 		page = virt_to_page(ref->page);
6244 
6245 		spd.pages[i] = page;
6246 		spd.partial[i].len = PAGE_SIZE;
6247 		spd.partial[i].offset = 0;
6248 		spd.partial[i].private = (unsigned long)ref;
6249 		spd.nr_pages++;
6250 		*ppos += PAGE_SIZE;
6251 
6252 		entries = ring_buffer_entries_cpu(iter->trace_buffer->buffer, iter->cpu_file);
6253 	}
6254 
6255 	trace_access_unlock(iter->cpu_file);
6256 	spd.nr_pages = i;
6257 
6258 	/* did we read anything? */
6259 	if (!spd.nr_pages) {
6260 		if (ret)
6261 			goto out;
6262 
6263 		ret = -EAGAIN;
6264 		if ((file->f_flags & O_NONBLOCK) || (flags & SPLICE_F_NONBLOCK))
6265 			goto out;
6266 
6267 		ret = wait_on_pipe(iter, true);
6268 		if (ret)
6269 			goto out;
6270 
6271 		goto again;
6272 	}
6273 
6274 	ret = splice_to_pipe(pipe, &spd);
6275 out:
6276 	splice_shrink_spd(&spd);
6277 
6278 	return ret;
6279 }
6280 
6281 static const struct file_operations tracing_buffers_fops = {
6282 	.open		= tracing_buffers_open,
6283 	.read		= tracing_buffers_read,
6284 	.poll		= tracing_buffers_poll,
6285 	.release	= tracing_buffers_release,
6286 	.splice_read	= tracing_buffers_splice_read,
6287 	.llseek		= no_llseek,
6288 };
6289 
6290 static ssize_t
6291 tracing_stats_read(struct file *filp, char __user *ubuf,
6292 		   size_t count, loff_t *ppos)
6293 {
6294 	struct inode *inode = file_inode(filp);
6295 	struct trace_array *tr = inode->i_private;
6296 	struct trace_buffer *trace_buf = &tr->trace_buffer;
6297 	int cpu = tracing_get_cpu(inode);
6298 	struct trace_seq *s;
6299 	unsigned long cnt;
6300 	unsigned long long t;
6301 	unsigned long usec_rem;
6302 
6303 	s = kmalloc(sizeof(*s), GFP_KERNEL);
6304 	if (!s)
6305 		return -ENOMEM;
6306 
6307 	trace_seq_init(s);
6308 
6309 	cnt = ring_buffer_entries_cpu(trace_buf->buffer, cpu);
6310 	trace_seq_printf(s, "entries: %ld\n", cnt);
6311 
6312 	cnt = ring_buffer_overrun_cpu(trace_buf->buffer, cpu);
6313 	trace_seq_printf(s, "overrun: %ld\n", cnt);
6314 
6315 	cnt = ring_buffer_commit_overrun_cpu(trace_buf->buffer, cpu);
6316 	trace_seq_printf(s, "commit overrun: %ld\n", cnt);
6317 
6318 	cnt = ring_buffer_bytes_cpu(trace_buf->buffer, cpu);
6319 	trace_seq_printf(s, "bytes: %ld\n", cnt);
6320 
6321 	if (trace_clocks[tr->clock_id].in_ns) {
6322 		/* local or global for trace_clock */
6323 		t = ns2usecs(ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
6324 		usec_rem = do_div(t, USEC_PER_SEC);
6325 		trace_seq_printf(s, "oldest event ts: %5llu.%06lu\n",
6326 								t, usec_rem);
6327 
6328 		t = ns2usecs(ring_buffer_time_stamp(trace_buf->buffer, cpu));
6329 		usec_rem = do_div(t, USEC_PER_SEC);
6330 		trace_seq_printf(s, "now ts: %5llu.%06lu\n", t, usec_rem);
6331 	} else {
6332 		/* counter or tsc mode for trace_clock */
6333 		trace_seq_printf(s, "oldest event ts: %llu\n",
6334 				ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
6335 
6336 		trace_seq_printf(s, "now ts: %llu\n",
6337 				ring_buffer_time_stamp(trace_buf->buffer, cpu));
6338 	}
6339 
6340 	cnt = ring_buffer_dropped_events_cpu(trace_buf->buffer, cpu);
6341 	trace_seq_printf(s, "dropped events: %ld\n", cnt);
6342 
6343 	cnt = ring_buffer_read_events_cpu(trace_buf->buffer, cpu);
6344 	trace_seq_printf(s, "read events: %ld\n", cnt);
6345 
6346 	count = simple_read_from_buffer(ubuf, count, ppos,
6347 					s->buffer, trace_seq_used(s));
6348 
6349 	kfree(s);
6350 
6351 	return count;
6352 }
6353 
6354 static const struct file_operations tracing_stats_fops = {
6355 	.open		= tracing_open_generic_tr,
6356 	.read		= tracing_stats_read,
6357 	.llseek		= generic_file_llseek,
6358 	.release	= tracing_release_generic_tr,
6359 };
6360 
6361 #ifdef CONFIG_DYNAMIC_FTRACE
6362 
6363 int __weak ftrace_arch_read_dyn_info(char *buf, int size)
6364 {
6365 	return 0;
6366 }
6367 
6368 static ssize_t
6369 tracing_read_dyn_info(struct file *filp, char __user *ubuf,
6370 		  size_t cnt, loff_t *ppos)
6371 {
6372 	static char ftrace_dyn_info_buffer[1024];
6373 	static DEFINE_MUTEX(dyn_info_mutex);
6374 	unsigned long *p = filp->private_data;
6375 	char *buf = ftrace_dyn_info_buffer;
6376 	int size = ARRAY_SIZE(ftrace_dyn_info_buffer);
6377 	int r;
6378 
6379 	mutex_lock(&dyn_info_mutex);
6380 	r = sprintf(buf, "%ld ", *p);
6381 
6382 	r += ftrace_arch_read_dyn_info(buf+r, (size-1)-r);
6383 	buf[r++] = '\n';
6384 
6385 	r = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6386 
6387 	mutex_unlock(&dyn_info_mutex);
6388 
6389 	return r;
6390 }
6391 
6392 static const struct file_operations tracing_dyn_info_fops = {
6393 	.open		= tracing_open_generic,
6394 	.read		= tracing_read_dyn_info,
6395 	.llseek		= generic_file_llseek,
6396 };
6397 #endif /* CONFIG_DYNAMIC_FTRACE */
6398 
6399 #if defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE)
6400 static void
6401 ftrace_snapshot(unsigned long ip, unsigned long parent_ip, void **data)
6402 {
6403 	tracing_snapshot();
6404 }
6405 
6406 static void
6407 ftrace_count_snapshot(unsigned long ip, unsigned long parent_ip, void **data)
6408 {
6409 	unsigned long *count = (long *)data;
6410 
6411 	if (!*count)
6412 		return;
6413 
6414 	if (*count != -1)
6415 		(*count)--;
6416 
6417 	tracing_snapshot();
6418 }
6419 
6420 static int
6421 ftrace_snapshot_print(struct seq_file *m, unsigned long ip,
6422 		      struct ftrace_probe_ops *ops, void *data)
6423 {
6424 	long count = (long)data;
6425 
6426 	seq_printf(m, "%ps:", (void *)ip);
6427 
6428 	seq_puts(m, "snapshot");
6429 
6430 	if (count == -1)
6431 		seq_puts(m, ":unlimited\n");
6432 	else
6433 		seq_printf(m, ":count=%ld\n", count);
6434 
6435 	return 0;
6436 }
6437 
6438 static struct ftrace_probe_ops snapshot_probe_ops = {
6439 	.func			= ftrace_snapshot,
6440 	.print			= ftrace_snapshot_print,
6441 };
6442 
6443 static struct ftrace_probe_ops snapshot_count_probe_ops = {
6444 	.func			= ftrace_count_snapshot,
6445 	.print			= ftrace_snapshot_print,
6446 };
6447 
6448 static int
6449 ftrace_trace_snapshot_callback(struct ftrace_hash *hash,
6450 			       char *glob, char *cmd, char *param, int enable)
6451 {
6452 	struct ftrace_probe_ops *ops;
6453 	void *count = (void *)-1;
6454 	char *number;
6455 	int ret;
6456 
6457 	/* hash funcs only work with set_ftrace_filter */
6458 	if (!enable)
6459 		return -EINVAL;
6460 
6461 	ops = param ? &snapshot_count_probe_ops :  &snapshot_probe_ops;
6462 
6463 	if (glob[0] == '!') {
6464 		unregister_ftrace_function_probe_func(glob+1, ops);
6465 		return 0;
6466 	}
6467 
6468 	if (!param)
6469 		goto out_reg;
6470 
6471 	number = strsep(&param, ":");
6472 
6473 	if (!strlen(number))
6474 		goto out_reg;
6475 
6476 	/*
6477 	 * We use the callback data field (which is a pointer)
6478 	 * as our counter.
6479 	 */
6480 	ret = kstrtoul(number, 0, (unsigned long *)&count);
6481 	if (ret)
6482 		return ret;
6483 
6484  out_reg:
6485 	ret = register_ftrace_function_probe(glob, ops, count);
6486 
6487 	if (ret >= 0)
6488 		alloc_snapshot(&global_trace);
6489 
6490 	return ret < 0 ? ret : 0;
6491 }
6492 
6493 static struct ftrace_func_command ftrace_snapshot_cmd = {
6494 	.name			= "snapshot",
6495 	.func			= ftrace_trace_snapshot_callback,
6496 };
6497 
6498 static __init int register_snapshot_cmd(void)
6499 {
6500 	return register_ftrace_command(&ftrace_snapshot_cmd);
6501 }
6502 #else
6503 static inline __init int register_snapshot_cmd(void) { return 0; }
6504 #endif /* defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE) */
6505 
6506 static struct dentry *tracing_get_dentry(struct trace_array *tr)
6507 {
6508 	if (WARN_ON(!tr->dir))
6509 		return ERR_PTR(-ENODEV);
6510 
6511 	/* Top directory uses NULL as the parent */
6512 	if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
6513 		return NULL;
6514 
6515 	/* All sub buffers have a descriptor */
6516 	return tr->dir;
6517 }
6518 
6519 static struct dentry *tracing_dentry_percpu(struct trace_array *tr, int cpu)
6520 {
6521 	struct dentry *d_tracer;
6522 
6523 	if (tr->percpu_dir)
6524 		return tr->percpu_dir;
6525 
6526 	d_tracer = tracing_get_dentry(tr);
6527 	if (IS_ERR(d_tracer))
6528 		return NULL;
6529 
6530 	tr->percpu_dir = tracefs_create_dir("per_cpu", d_tracer);
6531 
6532 	WARN_ONCE(!tr->percpu_dir,
6533 		  "Could not create tracefs directory 'per_cpu/%d'\n", cpu);
6534 
6535 	return tr->percpu_dir;
6536 }
6537 
6538 static struct dentry *
6539 trace_create_cpu_file(const char *name, umode_t mode, struct dentry *parent,
6540 		      void *data, long cpu, const struct file_operations *fops)
6541 {
6542 	struct dentry *ret = trace_create_file(name, mode, parent, data, fops);
6543 
6544 	if (ret) /* See tracing_get_cpu() */
6545 		d_inode(ret)->i_cdev = (void *)(cpu + 1);
6546 	return ret;
6547 }
6548 
6549 static void
6550 tracing_init_tracefs_percpu(struct trace_array *tr, long cpu)
6551 {
6552 	struct dentry *d_percpu = tracing_dentry_percpu(tr, cpu);
6553 	struct dentry *d_cpu;
6554 	char cpu_dir[30]; /* 30 characters should be more than enough */
6555 
6556 	if (!d_percpu)
6557 		return;
6558 
6559 	snprintf(cpu_dir, 30, "cpu%ld", cpu);
6560 	d_cpu = tracefs_create_dir(cpu_dir, d_percpu);
6561 	if (!d_cpu) {
6562 		pr_warn("Could not create tracefs '%s' entry\n", cpu_dir);
6563 		return;
6564 	}
6565 
6566 	/* per cpu trace_pipe */
6567 	trace_create_cpu_file("trace_pipe", 0444, d_cpu,
6568 				tr, cpu, &tracing_pipe_fops);
6569 
6570 	/* per cpu trace */
6571 	trace_create_cpu_file("trace", 0644, d_cpu,
6572 				tr, cpu, &tracing_fops);
6573 
6574 	trace_create_cpu_file("trace_pipe_raw", 0444, d_cpu,
6575 				tr, cpu, &tracing_buffers_fops);
6576 
6577 	trace_create_cpu_file("stats", 0444, d_cpu,
6578 				tr, cpu, &tracing_stats_fops);
6579 
6580 	trace_create_cpu_file("buffer_size_kb", 0444, d_cpu,
6581 				tr, cpu, &tracing_entries_fops);
6582 
6583 #ifdef CONFIG_TRACER_SNAPSHOT
6584 	trace_create_cpu_file("snapshot", 0644, d_cpu,
6585 				tr, cpu, &snapshot_fops);
6586 
6587 	trace_create_cpu_file("snapshot_raw", 0444, d_cpu,
6588 				tr, cpu, &snapshot_raw_fops);
6589 #endif
6590 }
6591 
6592 #ifdef CONFIG_FTRACE_SELFTEST
6593 /* Let selftest have access to static functions in this file */
6594 #include "trace_selftest.c"
6595 #endif
6596 
6597 static ssize_t
6598 trace_options_read(struct file *filp, char __user *ubuf, size_t cnt,
6599 			loff_t *ppos)
6600 {
6601 	struct trace_option_dentry *topt = filp->private_data;
6602 	char *buf;
6603 
6604 	if (topt->flags->val & topt->opt->bit)
6605 		buf = "1\n";
6606 	else
6607 		buf = "0\n";
6608 
6609 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
6610 }
6611 
6612 static ssize_t
6613 trace_options_write(struct file *filp, const char __user *ubuf, size_t cnt,
6614 			 loff_t *ppos)
6615 {
6616 	struct trace_option_dentry *topt = filp->private_data;
6617 	unsigned long val;
6618 	int ret;
6619 
6620 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
6621 	if (ret)
6622 		return ret;
6623 
6624 	if (val != 0 && val != 1)
6625 		return -EINVAL;
6626 
6627 	if (!!(topt->flags->val & topt->opt->bit) != val) {
6628 		mutex_lock(&trace_types_lock);
6629 		ret = __set_tracer_option(topt->tr, topt->flags,
6630 					  topt->opt, !val);
6631 		mutex_unlock(&trace_types_lock);
6632 		if (ret)
6633 			return ret;
6634 	}
6635 
6636 	*ppos += cnt;
6637 
6638 	return cnt;
6639 }
6640 
6641 
6642 static const struct file_operations trace_options_fops = {
6643 	.open = tracing_open_generic,
6644 	.read = trace_options_read,
6645 	.write = trace_options_write,
6646 	.llseek	= generic_file_llseek,
6647 };
6648 
6649 /*
6650  * In order to pass in both the trace_array descriptor as well as the index
6651  * to the flag that the trace option file represents, the trace_array
6652  * has a character array of trace_flags_index[], which holds the index
6653  * of the bit for the flag it represents. index[0] == 0, index[1] == 1, etc.
6654  * The address of this character array is passed to the flag option file
6655  * read/write callbacks.
6656  *
6657  * In order to extract both the index and the trace_array descriptor,
6658  * get_tr_index() uses the following algorithm.
6659  *
6660  *   idx = *ptr;
6661  *
6662  * As the pointer itself contains the address of the index (remember
6663  * index[1] == 1).
6664  *
6665  * Then to get the trace_array descriptor, by subtracting that index
6666  * from the ptr, we get to the start of the index itself.
6667  *
6668  *   ptr - idx == &index[0]
6669  *
6670  * Then a simple container_of() from that pointer gets us to the
6671  * trace_array descriptor.
6672  */
6673 static void get_tr_index(void *data, struct trace_array **ptr,
6674 			 unsigned int *pindex)
6675 {
6676 	*pindex = *(unsigned char *)data;
6677 
6678 	*ptr = container_of(data - *pindex, struct trace_array,
6679 			    trace_flags_index);
6680 }
6681 
6682 static ssize_t
6683 trace_options_core_read(struct file *filp, char __user *ubuf, size_t cnt,
6684 			loff_t *ppos)
6685 {
6686 	void *tr_index = filp->private_data;
6687 	struct trace_array *tr;
6688 	unsigned int index;
6689 	char *buf;
6690 
6691 	get_tr_index(tr_index, &tr, &index);
6692 
6693 	if (tr->trace_flags & (1 << index))
6694 		buf = "1\n";
6695 	else
6696 		buf = "0\n";
6697 
6698 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
6699 }
6700 
6701 static ssize_t
6702 trace_options_core_write(struct file *filp, const char __user *ubuf, size_t cnt,
6703 			 loff_t *ppos)
6704 {
6705 	void *tr_index = filp->private_data;
6706 	struct trace_array *tr;
6707 	unsigned int index;
6708 	unsigned long val;
6709 	int ret;
6710 
6711 	get_tr_index(tr_index, &tr, &index);
6712 
6713 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
6714 	if (ret)
6715 		return ret;
6716 
6717 	if (val != 0 && val != 1)
6718 		return -EINVAL;
6719 
6720 	mutex_lock(&trace_types_lock);
6721 	ret = set_tracer_flag(tr, 1 << index, val);
6722 	mutex_unlock(&trace_types_lock);
6723 
6724 	if (ret < 0)
6725 		return ret;
6726 
6727 	*ppos += cnt;
6728 
6729 	return cnt;
6730 }
6731 
6732 static const struct file_operations trace_options_core_fops = {
6733 	.open = tracing_open_generic,
6734 	.read = trace_options_core_read,
6735 	.write = trace_options_core_write,
6736 	.llseek = generic_file_llseek,
6737 };
6738 
6739 struct dentry *trace_create_file(const char *name,
6740 				 umode_t mode,
6741 				 struct dentry *parent,
6742 				 void *data,
6743 				 const struct file_operations *fops)
6744 {
6745 	struct dentry *ret;
6746 
6747 	ret = tracefs_create_file(name, mode, parent, data, fops);
6748 	if (!ret)
6749 		pr_warn("Could not create tracefs '%s' entry\n", name);
6750 
6751 	return ret;
6752 }
6753 
6754 
6755 static struct dentry *trace_options_init_dentry(struct trace_array *tr)
6756 {
6757 	struct dentry *d_tracer;
6758 
6759 	if (tr->options)
6760 		return tr->options;
6761 
6762 	d_tracer = tracing_get_dentry(tr);
6763 	if (IS_ERR(d_tracer))
6764 		return NULL;
6765 
6766 	tr->options = tracefs_create_dir("options", d_tracer);
6767 	if (!tr->options) {
6768 		pr_warn("Could not create tracefs directory 'options'\n");
6769 		return NULL;
6770 	}
6771 
6772 	return tr->options;
6773 }
6774 
6775 static void
6776 create_trace_option_file(struct trace_array *tr,
6777 			 struct trace_option_dentry *topt,
6778 			 struct tracer_flags *flags,
6779 			 struct tracer_opt *opt)
6780 {
6781 	struct dentry *t_options;
6782 
6783 	t_options = trace_options_init_dentry(tr);
6784 	if (!t_options)
6785 		return;
6786 
6787 	topt->flags = flags;
6788 	topt->opt = opt;
6789 	topt->tr = tr;
6790 
6791 	topt->entry = trace_create_file(opt->name, 0644, t_options, topt,
6792 				    &trace_options_fops);
6793 
6794 }
6795 
6796 static void
6797 create_trace_option_files(struct trace_array *tr, struct tracer *tracer)
6798 {
6799 	struct trace_option_dentry *topts;
6800 	struct trace_options *tr_topts;
6801 	struct tracer_flags *flags;
6802 	struct tracer_opt *opts;
6803 	int cnt;
6804 	int i;
6805 
6806 	if (!tracer)
6807 		return;
6808 
6809 	flags = tracer->flags;
6810 
6811 	if (!flags || !flags->opts)
6812 		return;
6813 
6814 	/*
6815 	 * If this is an instance, only create flags for tracers
6816 	 * the instance may have.
6817 	 */
6818 	if (!trace_ok_for_array(tracer, tr))
6819 		return;
6820 
6821 	for (i = 0; i < tr->nr_topts; i++) {
6822 		/* Make sure there's no duplicate flags. */
6823 		if (WARN_ON_ONCE(tr->topts[i].tracer->flags == tracer->flags))
6824 			return;
6825 	}
6826 
6827 	opts = flags->opts;
6828 
6829 	for (cnt = 0; opts[cnt].name; cnt++)
6830 		;
6831 
6832 	topts = kcalloc(cnt + 1, sizeof(*topts), GFP_KERNEL);
6833 	if (!topts)
6834 		return;
6835 
6836 	tr_topts = krealloc(tr->topts, sizeof(*tr->topts) * (tr->nr_topts + 1),
6837 			    GFP_KERNEL);
6838 	if (!tr_topts) {
6839 		kfree(topts);
6840 		return;
6841 	}
6842 
6843 	tr->topts = tr_topts;
6844 	tr->topts[tr->nr_topts].tracer = tracer;
6845 	tr->topts[tr->nr_topts].topts = topts;
6846 	tr->nr_topts++;
6847 
6848 	for (cnt = 0; opts[cnt].name; cnt++) {
6849 		create_trace_option_file(tr, &topts[cnt], flags,
6850 					 &opts[cnt]);
6851 		WARN_ONCE(topts[cnt].entry == NULL,
6852 			  "Failed to create trace option: %s",
6853 			  opts[cnt].name);
6854 	}
6855 }
6856 
6857 static struct dentry *
6858 create_trace_option_core_file(struct trace_array *tr,
6859 			      const char *option, long index)
6860 {
6861 	struct dentry *t_options;
6862 
6863 	t_options = trace_options_init_dentry(tr);
6864 	if (!t_options)
6865 		return NULL;
6866 
6867 	return trace_create_file(option, 0644, t_options,
6868 				 (void *)&tr->trace_flags_index[index],
6869 				 &trace_options_core_fops);
6870 }
6871 
6872 static void create_trace_options_dir(struct trace_array *tr)
6873 {
6874 	struct dentry *t_options;
6875 	bool top_level = tr == &global_trace;
6876 	int i;
6877 
6878 	t_options = trace_options_init_dentry(tr);
6879 	if (!t_options)
6880 		return;
6881 
6882 	for (i = 0; trace_options[i]; i++) {
6883 		if (top_level ||
6884 		    !((1 << i) & TOP_LEVEL_TRACE_FLAGS))
6885 			create_trace_option_core_file(tr, trace_options[i], i);
6886 	}
6887 }
6888 
6889 static ssize_t
6890 rb_simple_read(struct file *filp, char __user *ubuf,
6891 	       size_t cnt, loff_t *ppos)
6892 {
6893 	struct trace_array *tr = filp->private_data;
6894 	char buf[64];
6895 	int r;
6896 
6897 	r = tracer_tracing_is_on(tr);
6898 	r = sprintf(buf, "%d\n", r);
6899 
6900 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6901 }
6902 
6903 static ssize_t
6904 rb_simple_write(struct file *filp, const char __user *ubuf,
6905 		size_t cnt, loff_t *ppos)
6906 {
6907 	struct trace_array *tr = filp->private_data;
6908 	struct ring_buffer *buffer = tr->trace_buffer.buffer;
6909 	unsigned long val;
6910 	int ret;
6911 
6912 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
6913 	if (ret)
6914 		return ret;
6915 
6916 	if (buffer) {
6917 		mutex_lock(&trace_types_lock);
6918 		if (val) {
6919 			tracer_tracing_on(tr);
6920 			if (tr->current_trace->start)
6921 				tr->current_trace->start(tr);
6922 		} else {
6923 			tracer_tracing_off(tr);
6924 			if (tr->current_trace->stop)
6925 				tr->current_trace->stop(tr);
6926 		}
6927 		mutex_unlock(&trace_types_lock);
6928 	}
6929 
6930 	(*ppos)++;
6931 
6932 	return cnt;
6933 }
6934 
6935 static const struct file_operations rb_simple_fops = {
6936 	.open		= tracing_open_generic_tr,
6937 	.read		= rb_simple_read,
6938 	.write		= rb_simple_write,
6939 	.release	= tracing_release_generic_tr,
6940 	.llseek		= default_llseek,
6941 };
6942 
6943 struct dentry *trace_instance_dir;
6944 
6945 static void
6946 init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer);
6947 
6948 static int
6949 allocate_trace_buffer(struct trace_array *tr, struct trace_buffer *buf, int size)
6950 {
6951 	enum ring_buffer_flags rb_flags;
6952 
6953 	rb_flags = tr->trace_flags & TRACE_ITER_OVERWRITE ? RB_FL_OVERWRITE : 0;
6954 
6955 	buf->tr = tr;
6956 
6957 	buf->buffer = ring_buffer_alloc(size, rb_flags);
6958 	if (!buf->buffer)
6959 		return -ENOMEM;
6960 
6961 	buf->data = alloc_percpu(struct trace_array_cpu);
6962 	if (!buf->data) {
6963 		ring_buffer_free(buf->buffer);
6964 		return -ENOMEM;
6965 	}
6966 
6967 	/* Allocate the first page for all buffers */
6968 	set_buffer_entries(&tr->trace_buffer,
6969 			   ring_buffer_size(tr->trace_buffer.buffer, 0));
6970 
6971 	return 0;
6972 }
6973 
6974 static int allocate_trace_buffers(struct trace_array *tr, int size)
6975 {
6976 	int ret;
6977 
6978 	ret = allocate_trace_buffer(tr, &tr->trace_buffer, size);
6979 	if (ret)
6980 		return ret;
6981 
6982 #ifdef CONFIG_TRACER_MAX_TRACE
6983 	ret = allocate_trace_buffer(tr, &tr->max_buffer,
6984 				    allocate_snapshot ? size : 1);
6985 	if (WARN_ON(ret)) {
6986 		ring_buffer_free(tr->trace_buffer.buffer);
6987 		free_percpu(tr->trace_buffer.data);
6988 		return -ENOMEM;
6989 	}
6990 	tr->allocated_snapshot = allocate_snapshot;
6991 
6992 	/*
6993 	 * Only the top level trace array gets its snapshot allocated
6994 	 * from the kernel command line.
6995 	 */
6996 	allocate_snapshot = false;
6997 #endif
6998 	return 0;
6999 }
7000 
7001 static void free_trace_buffer(struct trace_buffer *buf)
7002 {
7003 	if (buf->buffer) {
7004 		ring_buffer_free(buf->buffer);
7005 		buf->buffer = NULL;
7006 		free_percpu(buf->data);
7007 		buf->data = NULL;
7008 	}
7009 }
7010 
7011 static void free_trace_buffers(struct trace_array *tr)
7012 {
7013 	if (!tr)
7014 		return;
7015 
7016 	free_trace_buffer(&tr->trace_buffer);
7017 
7018 #ifdef CONFIG_TRACER_MAX_TRACE
7019 	free_trace_buffer(&tr->max_buffer);
7020 #endif
7021 }
7022 
7023 static void init_trace_flags_index(struct trace_array *tr)
7024 {
7025 	int i;
7026 
7027 	/* Used by the trace options files */
7028 	for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++)
7029 		tr->trace_flags_index[i] = i;
7030 }
7031 
7032 static void __update_tracer_options(struct trace_array *tr)
7033 {
7034 	struct tracer *t;
7035 
7036 	for (t = trace_types; t; t = t->next)
7037 		add_tracer_options(tr, t);
7038 }
7039 
7040 static void update_tracer_options(struct trace_array *tr)
7041 {
7042 	mutex_lock(&trace_types_lock);
7043 	__update_tracer_options(tr);
7044 	mutex_unlock(&trace_types_lock);
7045 }
7046 
7047 static int instance_mkdir(const char *name)
7048 {
7049 	struct trace_array *tr;
7050 	int ret;
7051 
7052 	mutex_lock(&trace_types_lock);
7053 
7054 	ret = -EEXIST;
7055 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
7056 		if (tr->name && strcmp(tr->name, name) == 0)
7057 			goto out_unlock;
7058 	}
7059 
7060 	ret = -ENOMEM;
7061 	tr = kzalloc(sizeof(*tr), GFP_KERNEL);
7062 	if (!tr)
7063 		goto out_unlock;
7064 
7065 	tr->name = kstrdup(name, GFP_KERNEL);
7066 	if (!tr->name)
7067 		goto out_free_tr;
7068 
7069 	if (!alloc_cpumask_var(&tr->tracing_cpumask, GFP_KERNEL))
7070 		goto out_free_tr;
7071 
7072 	tr->trace_flags = global_trace.trace_flags & ~ZEROED_TRACE_FLAGS;
7073 
7074 	cpumask_copy(tr->tracing_cpumask, cpu_all_mask);
7075 
7076 	raw_spin_lock_init(&tr->start_lock);
7077 
7078 	tr->max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
7079 
7080 	tr->current_trace = &nop_trace;
7081 
7082 	INIT_LIST_HEAD(&tr->systems);
7083 	INIT_LIST_HEAD(&tr->events);
7084 
7085 	if (allocate_trace_buffers(tr, trace_buf_size) < 0)
7086 		goto out_free_tr;
7087 
7088 	tr->dir = tracefs_create_dir(name, trace_instance_dir);
7089 	if (!tr->dir)
7090 		goto out_free_tr;
7091 
7092 	ret = event_trace_add_tracer(tr->dir, tr);
7093 	if (ret) {
7094 		tracefs_remove_recursive(tr->dir);
7095 		goto out_free_tr;
7096 	}
7097 
7098 	init_tracer_tracefs(tr, tr->dir);
7099 	init_trace_flags_index(tr);
7100 	__update_tracer_options(tr);
7101 
7102 	list_add(&tr->list, &ftrace_trace_arrays);
7103 
7104 	mutex_unlock(&trace_types_lock);
7105 
7106 	return 0;
7107 
7108  out_free_tr:
7109 	free_trace_buffers(tr);
7110 	free_cpumask_var(tr->tracing_cpumask);
7111 	kfree(tr->name);
7112 	kfree(tr);
7113 
7114  out_unlock:
7115 	mutex_unlock(&trace_types_lock);
7116 
7117 	return ret;
7118 
7119 }
7120 
7121 static int instance_rmdir(const char *name)
7122 {
7123 	struct trace_array *tr;
7124 	int found = 0;
7125 	int ret;
7126 	int i;
7127 
7128 	mutex_lock(&trace_types_lock);
7129 
7130 	ret = -ENODEV;
7131 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
7132 		if (tr->name && strcmp(tr->name, name) == 0) {
7133 			found = 1;
7134 			break;
7135 		}
7136 	}
7137 	if (!found)
7138 		goto out_unlock;
7139 
7140 	ret = -EBUSY;
7141 	if (tr->ref || (tr->current_trace && tr->current_trace->ref))
7142 		goto out_unlock;
7143 
7144 	list_del(&tr->list);
7145 
7146 	/* Disable all the flags that were enabled coming in */
7147 	for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++) {
7148 		if ((1 << i) & ZEROED_TRACE_FLAGS)
7149 			set_tracer_flag(tr, 1 << i, 0);
7150 	}
7151 
7152 	tracing_set_nop(tr);
7153 	event_trace_del_tracer(tr);
7154 	ftrace_destroy_function_files(tr);
7155 	tracefs_remove_recursive(tr->dir);
7156 	free_trace_buffers(tr);
7157 
7158 	for (i = 0; i < tr->nr_topts; i++) {
7159 		kfree(tr->topts[i].topts);
7160 	}
7161 	kfree(tr->topts);
7162 
7163 	kfree(tr->name);
7164 	kfree(tr);
7165 
7166 	ret = 0;
7167 
7168  out_unlock:
7169 	mutex_unlock(&trace_types_lock);
7170 
7171 	return ret;
7172 }
7173 
7174 static __init void create_trace_instances(struct dentry *d_tracer)
7175 {
7176 	trace_instance_dir = tracefs_create_instance_dir("instances", d_tracer,
7177 							 instance_mkdir,
7178 							 instance_rmdir);
7179 	if (WARN_ON(!trace_instance_dir))
7180 		return;
7181 }
7182 
7183 static void
7184 init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer)
7185 {
7186 	int cpu;
7187 
7188 	trace_create_file("available_tracers", 0444, d_tracer,
7189 			tr, &show_traces_fops);
7190 
7191 	trace_create_file("current_tracer", 0644, d_tracer,
7192 			tr, &set_tracer_fops);
7193 
7194 	trace_create_file("tracing_cpumask", 0644, d_tracer,
7195 			  tr, &tracing_cpumask_fops);
7196 
7197 	trace_create_file("trace_options", 0644, d_tracer,
7198 			  tr, &tracing_iter_fops);
7199 
7200 	trace_create_file("trace", 0644, d_tracer,
7201 			  tr, &tracing_fops);
7202 
7203 	trace_create_file("trace_pipe", 0444, d_tracer,
7204 			  tr, &tracing_pipe_fops);
7205 
7206 	trace_create_file("buffer_size_kb", 0644, d_tracer,
7207 			  tr, &tracing_entries_fops);
7208 
7209 	trace_create_file("buffer_total_size_kb", 0444, d_tracer,
7210 			  tr, &tracing_total_entries_fops);
7211 
7212 	trace_create_file("free_buffer", 0200, d_tracer,
7213 			  tr, &tracing_free_buffer_fops);
7214 
7215 	trace_create_file("trace_marker", 0220, d_tracer,
7216 			  tr, &tracing_mark_fops);
7217 
7218 	trace_create_file("trace_clock", 0644, d_tracer, tr,
7219 			  &trace_clock_fops);
7220 
7221 	trace_create_file("tracing_on", 0644, d_tracer,
7222 			  tr, &rb_simple_fops);
7223 
7224 	create_trace_options_dir(tr);
7225 
7226 #if defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)
7227 	trace_create_file("tracing_max_latency", 0644, d_tracer,
7228 			&tr->max_latency, &tracing_max_lat_fops);
7229 #endif
7230 
7231 	if (ftrace_create_function_files(tr, d_tracer))
7232 		WARN(1, "Could not allocate function filter files");
7233 
7234 #ifdef CONFIG_TRACER_SNAPSHOT
7235 	trace_create_file("snapshot", 0644, d_tracer,
7236 			  tr, &snapshot_fops);
7237 #endif
7238 
7239 	for_each_tracing_cpu(cpu)
7240 		tracing_init_tracefs_percpu(tr, cpu);
7241 
7242 	ftrace_init_tracefs(tr, d_tracer);
7243 }
7244 
7245 static struct vfsmount *trace_automount(void *ingore)
7246 {
7247 	struct vfsmount *mnt;
7248 	struct file_system_type *type;
7249 
7250 	/*
7251 	 * To maintain backward compatibility for tools that mount
7252 	 * debugfs to get to the tracing facility, tracefs is automatically
7253 	 * mounted to the debugfs/tracing directory.
7254 	 */
7255 	type = get_fs_type("tracefs");
7256 	if (!type)
7257 		return NULL;
7258 	mnt = vfs_kern_mount(type, 0, "tracefs", NULL);
7259 	put_filesystem(type);
7260 	if (IS_ERR(mnt))
7261 		return NULL;
7262 	mntget(mnt);
7263 
7264 	return mnt;
7265 }
7266 
7267 /**
7268  * tracing_init_dentry - initialize top level trace array
7269  *
7270  * This is called when creating files or directories in the tracing
7271  * directory. It is called via fs_initcall() by any of the boot up code
7272  * and expects to return the dentry of the top level tracing directory.
7273  */
7274 struct dentry *tracing_init_dentry(void)
7275 {
7276 	struct trace_array *tr = &global_trace;
7277 
7278 	/* The top level trace array uses  NULL as parent */
7279 	if (tr->dir)
7280 		return NULL;
7281 
7282 	if (WARN_ON(!tracefs_initialized()) ||
7283 		(IS_ENABLED(CONFIG_DEBUG_FS) &&
7284 		 WARN_ON(!debugfs_initialized())))
7285 		return ERR_PTR(-ENODEV);
7286 
7287 	/*
7288 	 * As there may still be users that expect the tracing
7289 	 * files to exist in debugfs/tracing, we must automount
7290 	 * the tracefs file system there, so older tools still
7291 	 * work with the newer kerenl.
7292 	 */
7293 	tr->dir = debugfs_create_automount("tracing", NULL,
7294 					   trace_automount, NULL);
7295 	if (!tr->dir) {
7296 		pr_warn_once("Could not create debugfs directory 'tracing'\n");
7297 		return ERR_PTR(-ENOMEM);
7298 	}
7299 
7300 	return NULL;
7301 }
7302 
7303 extern struct trace_enum_map *__start_ftrace_enum_maps[];
7304 extern struct trace_enum_map *__stop_ftrace_enum_maps[];
7305 
7306 static void __init trace_enum_init(void)
7307 {
7308 	int len;
7309 
7310 	len = __stop_ftrace_enum_maps - __start_ftrace_enum_maps;
7311 	trace_insert_enum_map(NULL, __start_ftrace_enum_maps, len);
7312 }
7313 
7314 #ifdef CONFIG_MODULES
7315 static void trace_module_add_enums(struct module *mod)
7316 {
7317 	if (!mod->num_trace_enums)
7318 		return;
7319 
7320 	/*
7321 	 * Modules with bad taint do not have events created, do
7322 	 * not bother with enums either.
7323 	 */
7324 	if (trace_module_has_bad_taint(mod))
7325 		return;
7326 
7327 	trace_insert_enum_map(mod, mod->trace_enums, mod->num_trace_enums);
7328 }
7329 
7330 #ifdef CONFIG_TRACE_ENUM_MAP_FILE
7331 static void trace_module_remove_enums(struct module *mod)
7332 {
7333 	union trace_enum_map_item *map;
7334 	union trace_enum_map_item **last = &trace_enum_maps;
7335 
7336 	if (!mod->num_trace_enums)
7337 		return;
7338 
7339 	mutex_lock(&trace_enum_mutex);
7340 
7341 	map = trace_enum_maps;
7342 
7343 	while (map) {
7344 		if (map->head.mod == mod)
7345 			break;
7346 		map = trace_enum_jmp_to_tail(map);
7347 		last = &map->tail.next;
7348 		map = map->tail.next;
7349 	}
7350 	if (!map)
7351 		goto out;
7352 
7353 	*last = trace_enum_jmp_to_tail(map)->tail.next;
7354 	kfree(map);
7355  out:
7356 	mutex_unlock(&trace_enum_mutex);
7357 }
7358 #else
7359 static inline void trace_module_remove_enums(struct module *mod) { }
7360 #endif /* CONFIG_TRACE_ENUM_MAP_FILE */
7361 
7362 static int trace_module_notify(struct notifier_block *self,
7363 			       unsigned long val, void *data)
7364 {
7365 	struct module *mod = data;
7366 
7367 	switch (val) {
7368 	case MODULE_STATE_COMING:
7369 		trace_module_add_enums(mod);
7370 		break;
7371 	case MODULE_STATE_GOING:
7372 		trace_module_remove_enums(mod);
7373 		break;
7374 	}
7375 
7376 	return 0;
7377 }
7378 
7379 static struct notifier_block trace_module_nb = {
7380 	.notifier_call = trace_module_notify,
7381 	.priority = 0,
7382 };
7383 #endif /* CONFIG_MODULES */
7384 
7385 static __init int tracer_init_tracefs(void)
7386 {
7387 	struct dentry *d_tracer;
7388 
7389 	trace_access_lock_init();
7390 
7391 	d_tracer = tracing_init_dentry();
7392 	if (IS_ERR(d_tracer))
7393 		return 0;
7394 
7395 	init_tracer_tracefs(&global_trace, d_tracer);
7396 	ftrace_init_tracefs_toplevel(&global_trace, d_tracer);
7397 
7398 	trace_create_file("tracing_thresh", 0644, d_tracer,
7399 			&global_trace, &tracing_thresh_fops);
7400 
7401 	trace_create_file("README", 0444, d_tracer,
7402 			NULL, &tracing_readme_fops);
7403 
7404 	trace_create_file("saved_cmdlines", 0444, d_tracer,
7405 			NULL, &tracing_saved_cmdlines_fops);
7406 
7407 	trace_create_file("saved_cmdlines_size", 0644, d_tracer,
7408 			  NULL, &tracing_saved_cmdlines_size_fops);
7409 
7410 	trace_enum_init();
7411 
7412 	trace_create_enum_file(d_tracer);
7413 
7414 #ifdef CONFIG_MODULES
7415 	register_module_notifier(&trace_module_nb);
7416 #endif
7417 
7418 #ifdef CONFIG_DYNAMIC_FTRACE
7419 	trace_create_file("dyn_ftrace_total_info", 0444, d_tracer,
7420 			&ftrace_update_tot_cnt, &tracing_dyn_info_fops);
7421 #endif
7422 
7423 	create_trace_instances(d_tracer);
7424 
7425 	update_tracer_options(&global_trace);
7426 
7427 	return 0;
7428 }
7429 
7430 static int trace_panic_handler(struct notifier_block *this,
7431 			       unsigned long event, void *unused)
7432 {
7433 	if (ftrace_dump_on_oops)
7434 		ftrace_dump(ftrace_dump_on_oops);
7435 	return NOTIFY_OK;
7436 }
7437 
7438 static struct notifier_block trace_panic_notifier = {
7439 	.notifier_call  = trace_panic_handler,
7440 	.next           = NULL,
7441 	.priority       = 150   /* priority: INT_MAX >= x >= 0 */
7442 };
7443 
7444 static int trace_die_handler(struct notifier_block *self,
7445 			     unsigned long val,
7446 			     void *data)
7447 {
7448 	switch (val) {
7449 	case DIE_OOPS:
7450 		if (ftrace_dump_on_oops)
7451 			ftrace_dump(ftrace_dump_on_oops);
7452 		break;
7453 	default:
7454 		break;
7455 	}
7456 	return NOTIFY_OK;
7457 }
7458 
7459 static struct notifier_block trace_die_notifier = {
7460 	.notifier_call = trace_die_handler,
7461 	.priority = 200
7462 };
7463 
7464 /*
7465  * printk is set to max of 1024, we really don't need it that big.
7466  * Nothing should be printing 1000 characters anyway.
7467  */
7468 #define TRACE_MAX_PRINT		1000
7469 
7470 /*
7471  * Define here KERN_TRACE so that we have one place to modify
7472  * it if we decide to change what log level the ftrace dump
7473  * should be at.
7474  */
7475 #define KERN_TRACE		KERN_EMERG
7476 
7477 void
7478 trace_printk_seq(struct trace_seq *s)
7479 {
7480 	/* Probably should print a warning here. */
7481 	if (s->seq.len >= TRACE_MAX_PRINT)
7482 		s->seq.len = TRACE_MAX_PRINT;
7483 
7484 	/*
7485 	 * More paranoid code. Although the buffer size is set to
7486 	 * PAGE_SIZE, and TRACE_MAX_PRINT is 1000, this is just
7487 	 * an extra layer of protection.
7488 	 */
7489 	if (WARN_ON_ONCE(s->seq.len >= s->seq.size))
7490 		s->seq.len = s->seq.size - 1;
7491 
7492 	/* should be zero ended, but we are paranoid. */
7493 	s->buffer[s->seq.len] = 0;
7494 
7495 	printk(KERN_TRACE "%s", s->buffer);
7496 
7497 	trace_seq_init(s);
7498 }
7499 
7500 void trace_init_global_iter(struct trace_iterator *iter)
7501 {
7502 	iter->tr = &global_trace;
7503 	iter->trace = iter->tr->current_trace;
7504 	iter->cpu_file = RING_BUFFER_ALL_CPUS;
7505 	iter->trace_buffer = &global_trace.trace_buffer;
7506 
7507 	if (iter->trace && iter->trace->open)
7508 		iter->trace->open(iter);
7509 
7510 	/* Annotate start of buffers if we had overruns */
7511 	if (ring_buffer_overruns(iter->trace_buffer->buffer))
7512 		iter->iter_flags |= TRACE_FILE_ANNOTATE;
7513 
7514 	/* Output in nanoseconds only if we are using a clock in nanoseconds. */
7515 	if (trace_clocks[iter->tr->clock_id].in_ns)
7516 		iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
7517 }
7518 
7519 void ftrace_dump(enum ftrace_dump_mode oops_dump_mode)
7520 {
7521 	/* use static because iter can be a bit big for the stack */
7522 	static struct trace_iterator iter;
7523 	static atomic_t dump_running;
7524 	struct trace_array *tr = &global_trace;
7525 	unsigned int old_userobj;
7526 	unsigned long flags;
7527 	int cnt = 0, cpu;
7528 
7529 	/* Only allow one dump user at a time. */
7530 	if (atomic_inc_return(&dump_running) != 1) {
7531 		atomic_dec(&dump_running);
7532 		return;
7533 	}
7534 
7535 	/*
7536 	 * Always turn off tracing when we dump.
7537 	 * We don't need to show trace output of what happens
7538 	 * between multiple crashes.
7539 	 *
7540 	 * If the user does a sysrq-z, then they can re-enable
7541 	 * tracing with echo 1 > tracing_on.
7542 	 */
7543 	tracing_off();
7544 
7545 	local_irq_save(flags);
7546 
7547 	/* Simulate the iterator */
7548 	trace_init_global_iter(&iter);
7549 
7550 	for_each_tracing_cpu(cpu) {
7551 		atomic_inc(&per_cpu_ptr(iter.trace_buffer->data, cpu)->disabled);
7552 	}
7553 
7554 	old_userobj = tr->trace_flags & TRACE_ITER_SYM_USEROBJ;
7555 
7556 	/* don't look at user memory in panic mode */
7557 	tr->trace_flags &= ~TRACE_ITER_SYM_USEROBJ;
7558 
7559 	switch (oops_dump_mode) {
7560 	case DUMP_ALL:
7561 		iter.cpu_file = RING_BUFFER_ALL_CPUS;
7562 		break;
7563 	case DUMP_ORIG:
7564 		iter.cpu_file = raw_smp_processor_id();
7565 		break;
7566 	case DUMP_NONE:
7567 		goto out_enable;
7568 	default:
7569 		printk(KERN_TRACE "Bad dumping mode, switching to all CPUs dump\n");
7570 		iter.cpu_file = RING_BUFFER_ALL_CPUS;
7571 	}
7572 
7573 	printk(KERN_TRACE "Dumping ftrace buffer:\n");
7574 
7575 	/* Did function tracer already get disabled? */
7576 	if (ftrace_is_dead()) {
7577 		printk("# WARNING: FUNCTION TRACING IS CORRUPTED\n");
7578 		printk("#          MAY BE MISSING FUNCTION EVENTS\n");
7579 	}
7580 
7581 	/*
7582 	 * We need to stop all tracing on all CPUS to read the
7583 	 * the next buffer. This is a bit expensive, but is
7584 	 * not done often. We fill all what we can read,
7585 	 * and then release the locks again.
7586 	 */
7587 
7588 	while (!trace_empty(&iter)) {
7589 
7590 		if (!cnt)
7591 			printk(KERN_TRACE "---------------------------------\n");
7592 
7593 		cnt++;
7594 
7595 		/* reset all but tr, trace, and overruns */
7596 		memset(&iter.seq, 0,
7597 		       sizeof(struct trace_iterator) -
7598 		       offsetof(struct trace_iterator, seq));
7599 		iter.iter_flags |= TRACE_FILE_LAT_FMT;
7600 		iter.pos = -1;
7601 
7602 		if (trace_find_next_entry_inc(&iter) != NULL) {
7603 			int ret;
7604 
7605 			ret = print_trace_line(&iter);
7606 			if (ret != TRACE_TYPE_NO_CONSUME)
7607 				trace_consume(&iter);
7608 		}
7609 		touch_nmi_watchdog();
7610 
7611 		trace_printk_seq(&iter.seq);
7612 	}
7613 
7614 	if (!cnt)
7615 		printk(KERN_TRACE "   (ftrace buffer empty)\n");
7616 	else
7617 		printk(KERN_TRACE "---------------------------------\n");
7618 
7619  out_enable:
7620 	tr->trace_flags |= old_userobj;
7621 
7622 	for_each_tracing_cpu(cpu) {
7623 		atomic_dec(&per_cpu_ptr(iter.trace_buffer->data, cpu)->disabled);
7624 	}
7625  	atomic_dec(&dump_running);
7626 	local_irq_restore(flags);
7627 }
7628 EXPORT_SYMBOL_GPL(ftrace_dump);
7629 
7630 __init static int tracer_alloc_buffers(void)
7631 {
7632 	int ring_buf_size;
7633 	int ret = -ENOMEM;
7634 
7635 	/*
7636 	 * Make sure we don't accidently add more trace options
7637 	 * than we have bits for.
7638 	 */
7639 	BUILD_BUG_ON(TRACE_ITER_LAST_BIT > TRACE_FLAGS_MAX_SIZE);
7640 
7641 	if (!alloc_cpumask_var(&tracing_buffer_mask, GFP_KERNEL))
7642 		goto out;
7643 
7644 	if (!alloc_cpumask_var(&global_trace.tracing_cpumask, GFP_KERNEL))
7645 		goto out_free_buffer_mask;
7646 
7647 	/* Only allocate trace_printk buffers if a trace_printk exists */
7648 	if (__stop___trace_bprintk_fmt != __start___trace_bprintk_fmt)
7649 		/* Must be called before global_trace.buffer is allocated */
7650 		trace_printk_init_buffers();
7651 
7652 	/* To save memory, keep the ring buffer size to its minimum */
7653 	if (ring_buffer_expanded)
7654 		ring_buf_size = trace_buf_size;
7655 	else
7656 		ring_buf_size = 1;
7657 
7658 	cpumask_copy(tracing_buffer_mask, cpu_possible_mask);
7659 	cpumask_copy(global_trace.tracing_cpumask, cpu_all_mask);
7660 
7661 	raw_spin_lock_init(&global_trace.start_lock);
7662 
7663 	/*
7664 	 * The prepare callbacks allocates some memory for the ring buffer. We
7665 	 * don't free the buffer if the if the CPU goes down. If we were to free
7666 	 * the buffer, then the user would lose any trace that was in the
7667 	 * buffer. The memory will be removed once the "instance" is removed.
7668 	 */
7669 	ret = cpuhp_setup_state_multi(CPUHP_TRACE_RB_PREPARE,
7670 				      "trace/RB:preapre", trace_rb_cpu_prepare,
7671 				      NULL);
7672 	if (ret < 0)
7673 		goto out_free_cpumask;
7674 	/* Used for event triggers */
7675 	temp_buffer = ring_buffer_alloc(PAGE_SIZE, RB_FL_OVERWRITE);
7676 	if (!temp_buffer)
7677 		goto out_rm_hp_state;
7678 
7679 	if (trace_create_savedcmd() < 0)
7680 		goto out_free_temp_buffer;
7681 
7682 	/* TODO: make the number of buffers hot pluggable with CPUS */
7683 	if (allocate_trace_buffers(&global_trace, ring_buf_size) < 0) {
7684 		printk(KERN_ERR "tracer: failed to allocate ring buffer!\n");
7685 		WARN_ON(1);
7686 		goto out_free_savedcmd;
7687 	}
7688 
7689 	if (global_trace.buffer_disabled)
7690 		tracing_off();
7691 
7692 	if (trace_boot_clock) {
7693 		ret = tracing_set_clock(&global_trace, trace_boot_clock);
7694 		if (ret < 0)
7695 			pr_warn("Trace clock %s not defined, going back to default\n",
7696 				trace_boot_clock);
7697 	}
7698 
7699 	/*
7700 	 * register_tracer() might reference current_trace, so it
7701 	 * needs to be set before we register anything. This is
7702 	 * just a bootstrap of current_trace anyway.
7703 	 */
7704 	global_trace.current_trace = &nop_trace;
7705 
7706 	global_trace.max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
7707 
7708 	ftrace_init_global_array_ops(&global_trace);
7709 
7710 	init_trace_flags_index(&global_trace);
7711 
7712 	register_tracer(&nop_trace);
7713 
7714 	/* All seems OK, enable tracing */
7715 	tracing_disabled = 0;
7716 
7717 	atomic_notifier_chain_register(&panic_notifier_list,
7718 				       &trace_panic_notifier);
7719 
7720 	register_die_notifier(&trace_die_notifier);
7721 
7722 	global_trace.flags = TRACE_ARRAY_FL_GLOBAL;
7723 
7724 	INIT_LIST_HEAD(&global_trace.systems);
7725 	INIT_LIST_HEAD(&global_trace.events);
7726 	list_add(&global_trace.list, &ftrace_trace_arrays);
7727 
7728 	apply_trace_boot_options();
7729 
7730 	register_snapshot_cmd();
7731 
7732 	return 0;
7733 
7734 out_free_savedcmd:
7735 	free_saved_cmdlines_buffer(savedcmd);
7736 out_free_temp_buffer:
7737 	ring_buffer_free(temp_buffer);
7738 out_rm_hp_state:
7739 	cpuhp_remove_multi_state(CPUHP_TRACE_RB_PREPARE);
7740 out_free_cpumask:
7741 	free_cpumask_var(global_trace.tracing_cpumask);
7742 out_free_buffer_mask:
7743 	free_cpumask_var(tracing_buffer_mask);
7744 out:
7745 	return ret;
7746 }
7747 
7748 void __init trace_init(void)
7749 {
7750 	if (tracepoint_printk) {
7751 		tracepoint_print_iter =
7752 			kmalloc(sizeof(*tracepoint_print_iter), GFP_KERNEL);
7753 		if (WARN_ON(!tracepoint_print_iter))
7754 			tracepoint_printk = 0;
7755 	}
7756 	tracer_alloc_buffers();
7757 	trace_event_init();
7758 }
7759 
7760 __init static int clear_boot_tracer(void)
7761 {
7762 	/*
7763 	 * The default tracer at boot buffer is an init section.
7764 	 * This function is called in lateinit. If we did not
7765 	 * find the boot tracer, then clear it out, to prevent
7766 	 * later registration from accessing the buffer that is
7767 	 * about to be freed.
7768 	 */
7769 	if (!default_bootup_tracer)
7770 		return 0;
7771 
7772 	printk(KERN_INFO "ftrace bootup tracer '%s' not registered.\n",
7773 	       default_bootup_tracer);
7774 	default_bootup_tracer = NULL;
7775 
7776 	return 0;
7777 }
7778 
7779 fs_initcall(tracer_init_tracefs);
7780 late_initcall(clear_boot_tracer);
7781