xref: /linux/kernel/trace/trace.c (revision 071bf69a0220253a44acb8b2a27f7a262b9a46bf)
1 /*
2  * ring buffer based function tracer
3  *
4  * Copyright (C) 2007-2012 Steven Rostedt <srostedt@redhat.com>
5  * Copyright (C) 2008 Ingo Molnar <mingo@redhat.com>
6  *
7  * Originally taken from the RT patch by:
8  *    Arnaldo Carvalho de Melo <acme@redhat.com>
9  *
10  * Based on code from the latency_tracer, that is:
11  *  Copyright (C) 2004-2006 Ingo Molnar
12  *  Copyright (C) 2004 Nadia Yvette Chambers
13  */
14 #include <linux/ring_buffer.h>
15 #include <generated/utsrelease.h>
16 #include <linux/stacktrace.h>
17 #include <linux/writeback.h>
18 #include <linux/kallsyms.h>
19 #include <linux/seq_file.h>
20 #include <linux/notifier.h>
21 #include <linux/irqflags.h>
22 #include <linux/debugfs.h>
23 #include <linux/tracefs.h>
24 #include <linux/pagemap.h>
25 #include <linux/hardirq.h>
26 #include <linux/linkage.h>
27 #include <linux/uaccess.h>
28 #include <linux/vmalloc.h>
29 #include <linux/ftrace.h>
30 #include <linux/module.h>
31 #include <linux/percpu.h>
32 #include <linux/splice.h>
33 #include <linux/kdebug.h>
34 #include <linux/string.h>
35 #include <linux/mount.h>
36 #include <linux/rwsem.h>
37 #include <linux/slab.h>
38 #include <linux/ctype.h>
39 #include <linux/init.h>
40 #include <linux/poll.h>
41 #include <linux/nmi.h>
42 #include <linux/fs.h>
43 #include <linux/sched/rt.h>
44 
45 #include "trace.h"
46 #include "trace_output.h"
47 
48 /*
49  * On boot up, the ring buffer is set to the minimum size, so that
50  * we do not waste memory on systems that are not using tracing.
51  */
52 bool ring_buffer_expanded;
53 
54 /*
55  * We need to change this state when a selftest is running.
56  * A selftest will lurk into the ring-buffer to count the
57  * entries inserted during the selftest although some concurrent
58  * insertions into the ring-buffer such as trace_printk could occurred
59  * at the same time, giving false positive or negative results.
60  */
61 static bool __read_mostly tracing_selftest_running;
62 
63 /*
64  * If a tracer is running, we do not want to run SELFTEST.
65  */
66 bool __read_mostly tracing_selftest_disabled;
67 
68 /* Pipe tracepoints to printk */
69 struct trace_iterator *tracepoint_print_iter;
70 int tracepoint_printk;
71 
72 /* For tracers that don't implement custom flags */
73 static struct tracer_opt dummy_tracer_opt[] = {
74 	{ }
75 };
76 
77 static int
78 dummy_set_flag(struct trace_array *tr, u32 old_flags, u32 bit, int set)
79 {
80 	return 0;
81 }
82 
83 /*
84  * To prevent the comm cache from being overwritten when no
85  * tracing is active, only save the comm when a trace event
86  * occurred.
87  */
88 static DEFINE_PER_CPU(bool, trace_cmdline_save);
89 
90 /*
91  * Kill all tracing for good (never come back).
92  * It is initialized to 1 but will turn to zero if the initialization
93  * of the tracer is successful. But that is the only place that sets
94  * this back to zero.
95  */
96 static int tracing_disabled = 1;
97 
98 cpumask_var_t __read_mostly	tracing_buffer_mask;
99 
100 /*
101  * ftrace_dump_on_oops - variable to dump ftrace buffer on oops
102  *
103  * If there is an oops (or kernel panic) and the ftrace_dump_on_oops
104  * is set, then ftrace_dump is called. This will output the contents
105  * of the ftrace buffers to the console.  This is very useful for
106  * capturing traces that lead to crashes and outputing it to a
107  * serial console.
108  *
109  * It is default off, but you can enable it with either specifying
110  * "ftrace_dump_on_oops" in the kernel command line, or setting
111  * /proc/sys/kernel/ftrace_dump_on_oops
112  * Set 1 if you want to dump buffers of all CPUs
113  * Set 2 if you want to dump the buffer of the CPU that triggered oops
114  */
115 
116 enum ftrace_dump_mode ftrace_dump_on_oops;
117 
118 /* When set, tracing will stop when a WARN*() is hit */
119 int __disable_trace_on_warning;
120 
121 #ifdef CONFIG_TRACE_ENUM_MAP_FILE
122 /* Map of enums to their values, for "enum_map" file */
123 struct trace_enum_map_head {
124 	struct module			*mod;
125 	unsigned long			length;
126 };
127 
128 union trace_enum_map_item;
129 
130 struct trace_enum_map_tail {
131 	/*
132 	 * "end" is first and points to NULL as it must be different
133 	 * than "mod" or "enum_string"
134 	 */
135 	union trace_enum_map_item	*next;
136 	const char			*end;	/* points to NULL */
137 };
138 
139 static DEFINE_MUTEX(trace_enum_mutex);
140 
141 /*
142  * The trace_enum_maps are saved in an array with two extra elements,
143  * one at the beginning, and one at the end. The beginning item contains
144  * the count of the saved maps (head.length), and the module they
145  * belong to if not built in (head.mod). The ending item contains a
146  * pointer to the next array of saved enum_map items.
147  */
148 union trace_enum_map_item {
149 	struct trace_enum_map		map;
150 	struct trace_enum_map_head	head;
151 	struct trace_enum_map_tail	tail;
152 };
153 
154 static union trace_enum_map_item *trace_enum_maps;
155 #endif /* CONFIG_TRACE_ENUM_MAP_FILE */
156 
157 static int tracing_set_tracer(struct trace_array *tr, const char *buf);
158 
159 #define MAX_TRACER_SIZE		100
160 static char bootup_tracer_buf[MAX_TRACER_SIZE] __initdata;
161 static char *default_bootup_tracer;
162 
163 static bool allocate_snapshot;
164 
165 static int __init set_cmdline_ftrace(char *str)
166 {
167 	strlcpy(bootup_tracer_buf, str, MAX_TRACER_SIZE);
168 	default_bootup_tracer = bootup_tracer_buf;
169 	/* We are using ftrace early, expand it */
170 	ring_buffer_expanded = true;
171 	return 1;
172 }
173 __setup("ftrace=", set_cmdline_ftrace);
174 
175 static int __init set_ftrace_dump_on_oops(char *str)
176 {
177 	if (*str++ != '=' || !*str) {
178 		ftrace_dump_on_oops = DUMP_ALL;
179 		return 1;
180 	}
181 
182 	if (!strcmp("orig_cpu", str)) {
183 		ftrace_dump_on_oops = DUMP_ORIG;
184                 return 1;
185         }
186 
187         return 0;
188 }
189 __setup("ftrace_dump_on_oops", set_ftrace_dump_on_oops);
190 
191 static int __init stop_trace_on_warning(char *str)
192 {
193 	if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
194 		__disable_trace_on_warning = 1;
195 	return 1;
196 }
197 __setup("traceoff_on_warning", stop_trace_on_warning);
198 
199 static int __init boot_alloc_snapshot(char *str)
200 {
201 	allocate_snapshot = true;
202 	/* We also need the main ring buffer expanded */
203 	ring_buffer_expanded = true;
204 	return 1;
205 }
206 __setup("alloc_snapshot", boot_alloc_snapshot);
207 
208 
209 static char trace_boot_options_buf[MAX_TRACER_SIZE] __initdata;
210 
211 static int __init set_trace_boot_options(char *str)
212 {
213 	strlcpy(trace_boot_options_buf, str, MAX_TRACER_SIZE);
214 	return 0;
215 }
216 __setup("trace_options=", set_trace_boot_options);
217 
218 static char trace_boot_clock_buf[MAX_TRACER_SIZE] __initdata;
219 static char *trace_boot_clock __initdata;
220 
221 static int __init set_trace_boot_clock(char *str)
222 {
223 	strlcpy(trace_boot_clock_buf, str, MAX_TRACER_SIZE);
224 	trace_boot_clock = trace_boot_clock_buf;
225 	return 0;
226 }
227 __setup("trace_clock=", set_trace_boot_clock);
228 
229 static int __init set_tracepoint_printk(char *str)
230 {
231 	if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
232 		tracepoint_printk = 1;
233 	return 1;
234 }
235 __setup("tp_printk", set_tracepoint_printk);
236 
237 unsigned long long ns2usecs(cycle_t nsec)
238 {
239 	nsec += 500;
240 	do_div(nsec, 1000);
241 	return nsec;
242 }
243 
244 /* trace_flags holds trace_options default values */
245 #define TRACE_DEFAULT_FLAGS						\
246 	(FUNCTION_DEFAULT_FLAGS |					\
247 	 TRACE_ITER_PRINT_PARENT | TRACE_ITER_PRINTK |			\
248 	 TRACE_ITER_ANNOTATE | TRACE_ITER_CONTEXT_INFO |		\
249 	 TRACE_ITER_RECORD_CMD | TRACE_ITER_OVERWRITE |			\
250 	 TRACE_ITER_IRQ_INFO | TRACE_ITER_MARKERS)
251 
252 /* trace_options that are only supported by global_trace */
253 #define TOP_LEVEL_TRACE_FLAGS (TRACE_ITER_PRINTK |			\
254 	       TRACE_ITER_PRINTK_MSGONLY | TRACE_ITER_RECORD_CMD)
255 
256 /* trace_flags that are default zero for instances */
257 #define ZEROED_TRACE_FLAGS \
258 	TRACE_ITER_EVENT_FORK
259 
260 /*
261  * The global_trace is the descriptor that holds the tracing
262  * buffers for the live tracing. For each CPU, it contains
263  * a link list of pages that will store trace entries. The
264  * page descriptor of the pages in the memory is used to hold
265  * the link list by linking the lru item in the page descriptor
266  * to each of the pages in the buffer per CPU.
267  *
268  * For each active CPU there is a data field that holds the
269  * pages for the buffer for that CPU. Each CPU has the same number
270  * of pages allocated for its buffer.
271  */
272 static struct trace_array global_trace = {
273 	.trace_flags = TRACE_DEFAULT_FLAGS,
274 };
275 
276 LIST_HEAD(ftrace_trace_arrays);
277 
278 int trace_array_get(struct trace_array *this_tr)
279 {
280 	struct trace_array *tr;
281 	int ret = -ENODEV;
282 
283 	mutex_lock(&trace_types_lock);
284 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
285 		if (tr == this_tr) {
286 			tr->ref++;
287 			ret = 0;
288 			break;
289 		}
290 	}
291 	mutex_unlock(&trace_types_lock);
292 
293 	return ret;
294 }
295 
296 static void __trace_array_put(struct trace_array *this_tr)
297 {
298 	WARN_ON(!this_tr->ref);
299 	this_tr->ref--;
300 }
301 
302 void trace_array_put(struct trace_array *this_tr)
303 {
304 	mutex_lock(&trace_types_lock);
305 	__trace_array_put(this_tr);
306 	mutex_unlock(&trace_types_lock);
307 }
308 
309 int call_filter_check_discard(struct trace_event_call *call, void *rec,
310 			      struct ring_buffer *buffer,
311 			      struct ring_buffer_event *event)
312 {
313 	if (unlikely(call->flags & TRACE_EVENT_FL_FILTERED) &&
314 	    !filter_match_preds(call->filter, rec)) {
315 		__trace_event_discard_commit(buffer, event);
316 		return 1;
317 	}
318 
319 	return 0;
320 }
321 
322 void trace_free_pid_list(struct trace_pid_list *pid_list)
323 {
324 	vfree(pid_list->pids);
325 	kfree(pid_list);
326 }
327 
328 /**
329  * trace_find_filtered_pid - check if a pid exists in a filtered_pid list
330  * @filtered_pids: The list of pids to check
331  * @search_pid: The PID to find in @filtered_pids
332  *
333  * Returns true if @search_pid is fonud in @filtered_pids, and false otherwis.
334  */
335 bool
336 trace_find_filtered_pid(struct trace_pid_list *filtered_pids, pid_t search_pid)
337 {
338 	/*
339 	 * If pid_max changed after filtered_pids was created, we
340 	 * by default ignore all pids greater than the previous pid_max.
341 	 */
342 	if (search_pid >= filtered_pids->pid_max)
343 		return false;
344 
345 	return test_bit(search_pid, filtered_pids->pids);
346 }
347 
348 /**
349  * trace_ignore_this_task - should a task be ignored for tracing
350  * @filtered_pids: The list of pids to check
351  * @task: The task that should be ignored if not filtered
352  *
353  * Checks if @task should be traced or not from @filtered_pids.
354  * Returns true if @task should *NOT* be traced.
355  * Returns false if @task should be traced.
356  */
357 bool
358 trace_ignore_this_task(struct trace_pid_list *filtered_pids, struct task_struct *task)
359 {
360 	/*
361 	 * Return false, because if filtered_pids does not exist,
362 	 * all pids are good to trace.
363 	 */
364 	if (!filtered_pids)
365 		return false;
366 
367 	return !trace_find_filtered_pid(filtered_pids, task->pid);
368 }
369 
370 /**
371  * trace_pid_filter_add_remove - Add or remove a task from a pid_list
372  * @pid_list: The list to modify
373  * @self: The current task for fork or NULL for exit
374  * @task: The task to add or remove
375  *
376  * If adding a task, if @self is defined, the task is only added if @self
377  * is also included in @pid_list. This happens on fork and tasks should
378  * only be added when the parent is listed. If @self is NULL, then the
379  * @task pid will be removed from the list, which would happen on exit
380  * of a task.
381  */
382 void trace_filter_add_remove_task(struct trace_pid_list *pid_list,
383 				  struct task_struct *self,
384 				  struct task_struct *task)
385 {
386 	if (!pid_list)
387 		return;
388 
389 	/* For forks, we only add if the forking task is listed */
390 	if (self) {
391 		if (!trace_find_filtered_pid(pid_list, self->pid))
392 			return;
393 	}
394 
395 	/* Sorry, but we don't support pid_max changing after setting */
396 	if (task->pid >= pid_list->pid_max)
397 		return;
398 
399 	/* "self" is set for forks, and NULL for exits */
400 	if (self)
401 		set_bit(task->pid, pid_list->pids);
402 	else
403 		clear_bit(task->pid, pid_list->pids);
404 }
405 
406 /**
407  * trace_pid_next - Used for seq_file to get to the next pid of a pid_list
408  * @pid_list: The pid list to show
409  * @v: The last pid that was shown (+1 the actual pid to let zero be displayed)
410  * @pos: The position of the file
411  *
412  * This is used by the seq_file "next" operation to iterate the pids
413  * listed in a trace_pid_list structure.
414  *
415  * Returns the pid+1 as we want to display pid of zero, but NULL would
416  * stop the iteration.
417  */
418 void *trace_pid_next(struct trace_pid_list *pid_list, void *v, loff_t *pos)
419 {
420 	unsigned long pid = (unsigned long)v;
421 
422 	(*pos)++;
423 
424 	/* pid already is +1 of the actual prevous bit */
425 	pid = find_next_bit(pid_list->pids, pid_list->pid_max, pid);
426 
427 	/* Return pid + 1 to allow zero to be represented */
428 	if (pid < pid_list->pid_max)
429 		return (void *)(pid + 1);
430 
431 	return NULL;
432 }
433 
434 /**
435  * trace_pid_start - Used for seq_file to start reading pid lists
436  * @pid_list: The pid list to show
437  * @pos: The position of the file
438  *
439  * This is used by seq_file "start" operation to start the iteration
440  * of listing pids.
441  *
442  * Returns the pid+1 as we want to display pid of zero, but NULL would
443  * stop the iteration.
444  */
445 void *trace_pid_start(struct trace_pid_list *pid_list, loff_t *pos)
446 {
447 	unsigned long pid;
448 	loff_t l = 0;
449 
450 	pid = find_first_bit(pid_list->pids, pid_list->pid_max);
451 	if (pid >= pid_list->pid_max)
452 		return NULL;
453 
454 	/* Return pid + 1 so that zero can be the exit value */
455 	for (pid++; pid && l < *pos;
456 	     pid = (unsigned long)trace_pid_next(pid_list, (void *)pid, &l))
457 		;
458 	return (void *)pid;
459 }
460 
461 /**
462  * trace_pid_show - show the current pid in seq_file processing
463  * @m: The seq_file structure to write into
464  * @v: A void pointer of the pid (+1) value to display
465  *
466  * Can be directly used by seq_file operations to display the current
467  * pid value.
468  */
469 int trace_pid_show(struct seq_file *m, void *v)
470 {
471 	unsigned long pid = (unsigned long)v - 1;
472 
473 	seq_printf(m, "%lu\n", pid);
474 	return 0;
475 }
476 
477 /* 128 should be much more than enough */
478 #define PID_BUF_SIZE		127
479 
480 int trace_pid_write(struct trace_pid_list *filtered_pids,
481 		    struct trace_pid_list **new_pid_list,
482 		    const char __user *ubuf, size_t cnt)
483 {
484 	struct trace_pid_list *pid_list;
485 	struct trace_parser parser;
486 	unsigned long val;
487 	int nr_pids = 0;
488 	ssize_t read = 0;
489 	ssize_t ret = 0;
490 	loff_t pos;
491 	pid_t pid;
492 
493 	if (trace_parser_get_init(&parser, PID_BUF_SIZE + 1))
494 		return -ENOMEM;
495 
496 	/*
497 	 * Always recreate a new array. The write is an all or nothing
498 	 * operation. Always create a new array when adding new pids by
499 	 * the user. If the operation fails, then the current list is
500 	 * not modified.
501 	 */
502 	pid_list = kmalloc(sizeof(*pid_list), GFP_KERNEL);
503 	if (!pid_list)
504 		return -ENOMEM;
505 
506 	pid_list->pid_max = READ_ONCE(pid_max);
507 
508 	/* Only truncating will shrink pid_max */
509 	if (filtered_pids && filtered_pids->pid_max > pid_list->pid_max)
510 		pid_list->pid_max = filtered_pids->pid_max;
511 
512 	pid_list->pids = vzalloc((pid_list->pid_max + 7) >> 3);
513 	if (!pid_list->pids) {
514 		kfree(pid_list);
515 		return -ENOMEM;
516 	}
517 
518 	if (filtered_pids) {
519 		/* copy the current bits to the new max */
520 		for_each_set_bit(pid, filtered_pids->pids,
521 				 filtered_pids->pid_max) {
522 			set_bit(pid, pid_list->pids);
523 			nr_pids++;
524 		}
525 	}
526 
527 	while (cnt > 0) {
528 
529 		pos = 0;
530 
531 		ret = trace_get_user(&parser, ubuf, cnt, &pos);
532 		if (ret < 0 || !trace_parser_loaded(&parser))
533 			break;
534 
535 		read += ret;
536 		ubuf += ret;
537 		cnt -= ret;
538 
539 		parser.buffer[parser.idx] = 0;
540 
541 		ret = -EINVAL;
542 		if (kstrtoul(parser.buffer, 0, &val))
543 			break;
544 		if (val >= pid_list->pid_max)
545 			break;
546 
547 		pid = (pid_t)val;
548 
549 		set_bit(pid, pid_list->pids);
550 		nr_pids++;
551 
552 		trace_parser_clear(&parser);
553 		ret = 0;
554 	}
555 	trace_parser_put(&parser);
556 
557 	if (ret < 0) {
558 		trace_free_pid_list(pid_list);
559 		return ret;
560 	}
561 
562 	if (!nr_pids) {
563 		/* Cleared the list of pids */
564 		trace_free_pid_list(pid_list);
565 		read = ret;
566 		pid_list = NULL;
567 	}
568 
569 	*new_pid_list = pid_list;
570 
571 	return read;
572 }
573 
574 static cycle_t buffer_ftrace_now(struct trace_buffer *buf, int cpu)
575 {
576 	u64 ts;
577 
578 	/* Early boot up does not have a buffer yet */
579 	if (!buf->buffer)
580 		return trace_clock_local();
581 
582 	ts = ring_buffer_time_stamp(buf->buffer, cpu);
583 	ring_buffer_normalize_time_stamp(buf->buffer, cpu, &ts);
584 
585 	return ts;
586 }
587 
588 cycle_t ftrace_now(int cpu)
589 {
590 	return buffer_ftrace_now(&global_trace.trace_buffer, cpu);
591 }
592 
593 /**
594  * tracing_is_enabled - Show if global_trace has been disabled
595  *
596  * Shows if the global trace has been enabled or not. It uses the
597  * mirror flag "buffer_disabled" to be used in fast paths such as for
598  * the irqsoff tracer. But it may be inaccurate due to races. If you
599  * need to know the accurate state, use tracing_is_on() which is a little
600  * slower, but accurate.
601  */
602 int tracing_is_enabled(void)
603 {
604 	/*
605 	 * For quick access (irqsoff uses this in fast path), just
606 	 * return the mirror variable of the state of the ring buffer.
607 	 * It's a little racy, but we don't really care.
608 	 */
609 	smp_rmb();
610 	return !global_trace.buffer_disabled;
611 }
612 
613 /*
614  * trace_buf_size is the size in bytes that is allocated
615  * for a buffer. Note, the number of bytes is always rounded
616  * to page size.
617  *
618  * This number is purposely set to a low number of 16384.
619  * If the dump on oops happens, it will be much appreciated
620  * to not have to wait for all that output. Anyway this can be
621  * boot time and run time configurable.
622  */
623 #define TRACE_BUF_SIZE_DEFAULT	1441792UL /* 16384 * 88 (sizeof(entry)) */
624 
625 static unsigned long		trace_buf_size = TRACE_BUF_SIZE_DEFAULT;
626 
627 /* trace_types holds a link list of available tracers. */
628 static struct tracer		*trace_types __read_mostly;
629 
630 /*
631  * trace_types_lock is used to protect the trace_types list.
632  */
633 DEFINE_MUTEX(trace_types_lock);
634 
635 /*
636  * serialize the access of the ring buffer
637  *
638  * ring buffer serializes readers, but it is low level protection.
639  * The validity of the events (which returns by ring_buffer_peek() ..etc)
640  * are not protected by ring buffer.
641  *
642  * The content of events may become garbage if we allow other process consumes
643  * these events concurrently:
644  *   A) the page of the consumed events may become a normal page
645  *      (not reader page) in ring buffer, and this page will be rewrited
646  *      by events producer.
647  *   B) The page of the consumed events may become a page for splice_read,
648  *      and this page will be returned to system.
649  *
650  * These primitives allow multi process access to different cpu ring buffer
651  * concurrently.
652  *
653  * These primitives don't distinguish read-only and read-consume access.
654  * Multi read-only access are also serialized.
655  */
656 
657 #ifdef CONFIG_SMP
658 static DECLARE_RWSEM(all_cpu_access_lock);
659 static DEFINE_PER_CPU(struct mutex, cpu_access_lock);
660 
661 static inline void trace_access_lock(int cpu)
662 {
663 	if (cpu == RING_BUFFER_ALL_CPUS) {
664 		/* gain it for accessing the whole ring buffer. */
665 		down_write(&all_cpu_access_lock);
666 	} else {
667 		/* gain it for accessing a cpu ring buffer. */
668 
669 		/* Firstly block other trace_access_lock(RING_BUFFER_ALL_CPUS). */
670 		down_read(&all_cpu_access_lock);
671 
672 		/* Secondly block other access to this @cpu ring buffer. */
673 		mutex_lock(&per_cpu(cpu_access_lock, cpu));
674 	}
675 }
676 
677 static inline void trace_access_unlock(int cpu)
678 {
679 	if (cpu == RING_BUFFER_ALL_CPUS) {
680 		up_write(&all_cpu_access_lock);
681 	} else {
682 		mutex_unlock(&per_cpu(cpu_access_lock, cpu));
683 		up_read(&all_cpu_access_lock);
684 	}
685 }
686 
687 static inline void trace_access_lock_init(void)
688 {
689 	int cpu;
690 
691 	for_each_possible_cpu(cpu)
692 		mutex_init(&per_cpu(cpu_access_lock, cpu));
693 }
694 
695 #else
696 
697 static DEFINE_MUTEX(access_lock);
698 
699 static inline void trace_access_lock(int cpu)
700 {
701 	(void)cpu;
702 	mutex_lock(&access_lock);
703 }
704 
705 static inline void trace_access_unlock(int cpu)
706 {
707 	(void)cpu;
708 	mutex_unlock(&access_lock);
709 }
710 
711 static inline void trace_access_lock_init(void)
712 {
713 }
714 
715 #endif
716 
717 #ifdef CONFIG_STACKTRACE
718 static void __ftrace_trace_stack(struct ring_buffer *buffer,
719 				 unsigned long flags,
720 				 int skip, int pc, struct pt_regs *regs);
721 static inline void ftrace_trace_stack(struct trace_array *tr,
722 				      struct ring_buffer *buffer,
723 				      unsigned long flags,
724 				      int skip, int pc, struct pt_regs *regs);
725 
726 #else
727 static inline void __ftrace_trace_stack(struct ring_buffer *buffer,
728 					unsigned long flags,
729 					int skip, int pc, struct pt_regs *regs)
730 {
731 }
732 static inline void ftrace_trace_stack(struct trace_array *tr,
733 				      struct ring_buffer *buffer,
734 				      unsigned long flags,
735 				      int skip, int pc, struct pt_regs *regs)
736 {
737 }
738 
739 #endif
740 
741 static void tracer_tracing_on(struct trace_array *tr)
742 {
743 	if (tr->trace_buffer.buffer)
744 		ring_buffer_record_on(tr->trace_buffer.buffer);
745 	/*
746 	 * This flag is looked at when buffers haven't been allocated
747 	 * yet, or by some tracers (like irqsoff), that just want to
748 	 * know if the ring buffer has been disabled, but it can handle
749 	 * races of where it gets disabled but we still do a record.
750 	 * As the check is in the fast path of the tracers, it is more
751 	 * important to be fast than accurate.
752 	 */
753 	tr->buffer_disabled = 0;
754 	/* Make the flag seen by readers */
755 	smp_wmb();
756 }
757 
758 /**
759  * tracing_on - enable tracing buffers
760  *
761  * This function enables tracing buffers that may have been
762  * disabled with tracing_off.
763  */
764 void tracing_on(void)
765 {
766 	tracer_tracing_on(&global_trace);
767 }
768 EXPORT_SYMBOL_GPL(tracing_on);
769 
770 /**
771  * __trace_puts - write a constant string into the trace buffer.
772  * @ip:	   The address of the caller
773  * @str:   The constant string to write
774  * @size:  The size of the string.
775  */
776 int __trace_puts(unsigned long ip, const char *str, int size)
777 {
778 	struct ring_buffer_event *event;
779 	struct ring_buffer *buffer;
780 	struct print_entry *entry;
781 	unsigned long irq_flags;
782 	int alloc;
783 	int pc;
784 
785 	if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
786 		return 0;
787 
788 	pc = preempt_count();
789 
790 	if (unlikely(tracing_selftest_running || tracing_disabled))
791 		return 0;
792 
793 	alloc = sizeof(*entry) + size + 2; /* possible \n added */
794 
795 	local_save_flags(irq_flags);
796 	buffer = global_trace.trace_buffer.buffer;
797 	event = trace_buffer_lock_reserve(buffer, TRACE_PRINT, alloc,
798 					  irq_flags, pc);
799 	if (!event)
800 		return 0;
801 
802 	entry = ring_buffer_event_data(event);
803 	entry->ip = ip;
804 
805 	memcpy(&entry->buf, str, size);
806 
807 	/* Add a newline if necessary */
808 	if (entry->buf[size - 1] != '\n') {
809 		entry->buf[size] = '\n';
810 		entry->buf[size + 1] = '\0';
811 	} else
812 		entry->buf[size] = '\0';
813 
814 	__buffer_unlock_commit(buffer, event);
815 	ftrace_trace_stack(&global_trace, buffer, irq_flags, 4, pc, NULL);
816 
817 	return size;
818 }
819 EXPORT_SYMBOL_GPL(__trace_puts);
820 
821 /**
822  * __trace_bputs - write the pointer to a constant string into trace buffer
823  * @ip:	   The address of the caller
824  * @str:   The constant string to write to the buffer to
825  */
826 int __trace_bputs(unsigned long ip, const char *str)
827 {
828 	struct ring_buffer_event *event;
829 	struct ring_buffer *buffer;
830 	struct bputs_entry *entry;
831 	unsigned long irq_flags;
832 	int size = sizeof(struct bputs_entry);
833 	int pc;
834 
835 	if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
836 		return 0;
837 
838 	pc = preempt_count();
839 
840 	if (unlikely(tracing_selftest_running || tracing_disabled))
841 		return 0;
842 
843 	local_save_flags(irq_flags);
844 	buffer = global_trace.trace_buffer.buffer;
845 	event = trace_buffer_lock_reserve(buffer, TRACE_BPUTS, size,
846 					  irq_flags, pc);
847 	if (!event)
848 		return 0;
849 
850 	entry = ring_buffer_event_data(event);
851 	entry->ip			= ip;
852 	entry->str			= str;
853 
854 	__buffer_unlock_commit(buffer, event);
855 	ftrace_trace_stack(&global_trace, buffer, irq_flags, 4, pc, NULL);
856 
857 	return 1;
858 }
859 EXPORT_SYMBOL_GPL(__trace_bputs);
860 
861 #ifdef CONFIG_TRACER_SNAPSHOT
862 /**
863  * trace_snapshot - take a snapshot of the current buffer.
864  *
865  * This causes a swap between the snapshot buffer and the current live
866  * tracing buffer. You can use this to take snapshots of the live
867  * trace when some condition is triggered, but continue to trace.
868  *
869  * Note, make sure to allocate the snapshot with either
870  * a tracing_snapshot_alloc(), or by doing it manually
871  * with: echo 1 > /sys/kernel/debug/tracing/snapshot
872  *
873  * If the snapshot buffer is not allocated, it will stop tracing.
874  * Basically making a permanent snapshot.
875  */
876 void tracing_snapshot(void)
877 {
878 	struct trace_array *tr = &global_trace;
879 	struct tracer *tracer = tr->current_trace;
880 	unsigned long flags;
881 
882 	if (in_nmi()) {
883 		internal_trace_puts("*** SNAPSHOT CALLED FROM NMI CONTEXT ***\n");
884 		internal_trace_puts("*** snapshot is being ignored        ***\n");
885 		return;
886 	}
887 
888 	if (!tr->allocated_snapshot) {
889 		internal_trace_puts("*** SNAPSHOT NOT ALLOCATED ***\n");
890 		internal_trace_puts("*** stopping trace here!   ***\n");
891 		tracing_off();
892 		return;
893 	}
894 
895 	/* Note, snapshot can not be used when the tracer uses it */
896 	if (tracer->use_max_tr) {
897 		internal_trace_puts("*** LATENCY TRACER ACTIVE ***\n");
898 		internal_trace_puts("*** Can not use snapshot (sorry) ***\n");
899 		return;
900 	}
901 
902 	local_irq_save(flags);
903 	update_max_tr(tr, current, smp_processor_id());
904 	local_irq_restore(flags);
905 }
906 EXPORT_SYMBOL_GPL(tracing_snapshot);
907 
908 static int resize_buffer_duplicate_size(struct trace_buffer *trace_buf,
909 					struct trace_buffer *size_buf, int cpu_id);
910 static void set_buffer_entries(struct trace_buffer *buf, unsigned long val);
911 
912 static int alloc_snapshot(struct trace_array *tr)
913 {
914 	int ret;
915 
916 	if (!tr->allocated_snapshot) {
917 
918 		/* allocate spare buffer */
919 		ret = resize_buffer_duplicate_size(&tr->max_buffer,
920 				   &tr->trace_buffer, RING_BUFFER_ALL_CPUS);
921 		if (ret < 0)
922 			return ret;
923 
924 		tr->allocated_snapshot = true;
925 	}
926 
927 	return 0;
928 }
929 
930 static void free_snapshot(struct trace_array *tr)
931 {
932 	/*
933 	 * We don't free the ring buffer. instead, resize it because
934 	 * The max_tr ring buffer has some state (e.g. ring->clock) and
935 	 * we want preserve it.
936 	 */
937 	ring_buffer_resize(tr->max_buffer.buffer, 1, RING_BUFFER_ALL_CPUS);
938 	set_buffer_entries(&tr->max_buffer, 1);
939 	tracing_reset_online_cpus(&tr->max_buffer);
940 	tr->allocated_snapshot = false;
941 }
942 
943 /**
944  * tracing_alloc_snapshot - allocate snapshot buffer.
945  *
946  * This only allocates the snapshot buffer if it isn't already
947  * allocated - it doesn't also take a snapshot.
948  *
949  * This is meant to be used in cases where the snapshot buffer needs
950  * to be set up for events that can't sleep but need to be able to
951  * trigger a snapshot.
952  */
953 int tracing_alloc_snapshot(void)
954 {
955 	struct trace_array *tr = &global_trace;
956 	int ret;
957 
958 	ret = alloc_snapshot(tr);
959 	WARN_ON(ret < 0);
960 
961 	return ret;
962 }
963 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
964 
965 /**
966  * trace_snapshot_alloc - allocate and take a snapshot of the current buffer.
967  *
968  * This is similar to trace_snapshot(), but it will allocate the
969  * snapshot buffer if it isn't already allocated. Use this only
970  * where it is safe to sleep, as the allocation may sleep.
971  *
972  * This causes a swap between the snapshot buffer and the current live
973  * tracing buffer. You can use this to take snapshots of the live
974  * trace when some condition is triggered, but continue to trace.
975  */
976 void tracing_snapshot_alloc(void)
977 {
978 	int ret;
979 
980 	ret = tracing_alloc_snapshot();
981 	if (ret < 0)
982 		return;
983 
984 	tracing_snapshot();
985 }
986 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
987 #else
988 void tracing_snapshot(void)
989 {
990 	WARN_ONCE(1, "Snapshot feature not enabled, but internal snapshot used");
991 }
992 EXPORT_SYMBOL_GPL(tracing_snapshot);
993 int tracing_alloc_snapshot(void)
994 {
995 	WARN_ONCE(1, "Snapshot feature not enabled, but snapshot allocation used");
996 	return -ENODEV;
997 }
998 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
999 void tracing_snapshot_alloc(void)
1000 {
1001 	/* Give warning */
1002 	tracing_snapshot();
1003 }
1004 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
1005 #endif /* CONFIG_TRACER_SNAPSHOT */
1006 
1007 static void tracer_tracing_off(struct trace_array *tr)
1008 {
1009 	if (tr->trace_buffer.buffer)
1010 		ring_buffer_record_off(tr->trace_buffer.buffer);
1011 	/*
1012 	 * This flag is looked at when buffers haven't been allocated
1013 	 * yet, or by some tracers (like irqsoff), that just want to
1014 	 * know if the ring buffer has been disabled, but it can handle
1015 	 * races of where it gets disabled but we still do a record.
1016 	 * As the check is in the fast path of the tracers, it is more
1017 	 * important to be fast than accurate.
1018 	 */
1019 	tr->buffer_disabled = 1;
1020 	/* Make the flag seen by readers */
1021 	smp_wmb();
1022 }
1023 
1024 /**
1025  * tracing_off - turn off tracing buffers
1026  *
1027  * This function stops the tracing buffers from recording data.
1028  * It does not disable any overhead the tracers themselves may
1029  * be causing. This function simply causes all recording to
1030  * the ring buffers to fail.
1031  */
1032 void tracing_off(void)
1033 {
1034 	tracer_tracing_off(&global_trace);
1035 }
1036 EXPORT_SYMBOL_GPL(tracing_off);
1037 
1038 void disable_trace_on_warning(void)
1039 {
1040 	if (__disable_trace_on_warning)
1041 		tracing_off();
1042 }
1043 
1044 /**
1045  * tracer_tracing_is_on - show real state of ring buffer enabled
1046  * @tr : the trace array to know if ring buffer is enabled
1047  *
1048  * Shows real state of the ring buffer if it is enabled or not.
1049  */
1050 static int tracer_tracing_is_on(struct trace_array *tr)
1051 {
1052 	if (tr->trace_buffer.buffer)
1053 		return ring_buffer_record_is_on(tr->trace_buffer.buffer);
1054 	return !tr->buffer_disabled;
1055 }
1056 
1057 /**
1058  * tracing_is_on - show state of ring buffers enabled
1059  */
1060 int tracing_is_on(void)
1061 {
1062 	return tracer_tracing_is_on(&global_trace);
1063 }
1064 EXPORT_SYMBOL_GPL(tracing_is_on);
1065 
1066 static int __init set_buf_size(char *str)
1067 {
1068 	unsigned long buf_size;
1069 
1070 	if (!str)
1071 		return 0;
1072 	buf_size = memparse(str, &str);
1073 	/* nr_entries can not be zero */
1074 	if (buf_size == 0)
1075 		return 0;
1076 	trace_buf_size = buf_size;
1077 	return 1;
1078 }
1079 __setup("trace_buf_size=", set_buf_size);
1080 
1081 static int __init set_tracing_thresh(char *str)
1082 {
1083 	unsigned long threshold;
1084 	int ret;
1085 
1086 	if (!str)
1087 		return 0;
1088 	ret = kstrtoul(str, 0, &threshold);
1089 	if (ret < 0)
1090 		return 0;
1091 	tracing_thresh = threshold * 1000;
1092 	return 1;
1093 }
1094 __setup("tracing_thresh=", set_tracing_thresh);
1095 
1096 unsigned long nsecs_to_usecs(unsigned long nsecs)
1097 {
1098 	return nsecs / 1000;
1099 }
1100 
1101 /*
1102  * TRACE_FLAGS is defined as a tuple matching bit masks with strings.
1103  * It uses C(a, b) where 'a' is the enum name and 'b' is the string that
1104  * matches it. By defining "C(a, b) b", TRACE_FLAGS becomes a list
1105  * of strings in the order that the enums were defined.
1106  */
1107 #undef C
1108 #define C(a, b) b
1109 
1110 /* These must match the bit postions in trace_iterator_flags */
1111 static const char *trace_options[] = {
1112 	TRACE_FLAGS
1113 	NULL
1114 };
1115 
1116 static struct {
1117 	u64 (*func)(void);
1118 	const char *name;
1119 	int in_ns;		/* is this clock in nanoseconds? */
1120 } trace_clocks[] = {
1121 	{ trace_clock_local,		"local",	1 },
1122 	{ trace_clock_global,		"global",	1 },
1123 	{ trace_clock_counter,		"counter",	0 },
1124 	{ trace_clock_jiffies,		"uptime",	0 },
1125 	{ trace_clock,			"perf",		1 },
1126 	{ ktime_get_mono_fast_ns,	"mono",		1 },
1127 	{ ktime_get_raw_fast_ns,	"mono_raw",	1 },
1128 	ARCH_TRACE_CLOCKS
1129 };
1130 
1131 /*
1132  * trace_parser_get_init - gets the buffer for trace parser
1133  */
1134 int trace_parser_get_init(struct trace_parser *parser, int size)
1135 {
1136 	memset(parser, 0, sizeof(*parser));
1137 
1138 	parser->buffer = kmalloc(size, GFP_KERNEL);
1139 	if (!parser->buffer)
1140 		return 1;
1141 
1142 	parser->size = size;
1143 	return 0;
1144 }
1145 
1146 /*
1147  * trace_parser_put - frees the buffer for trace parser
1148  */
1149 void trace_parser_put(struct trace_parser *parser)
1150 {
1151 	kfree(parser->buffer);
1152 }
1153 
1154 /*
1155  * trace_get_user - reads the user input string separated by  space
1156  * (matched by isspace(ch))
1157  *
1158  * For each string found the 'struct trace_parser' is updated,
1159  * and the function returns.
1160  *
1161  * Returns number of bytes read.
1162  *
1163  * See kernel/trace/trace.h for 'struct trace_parser' details.
1164  */
1165 int trace_get_user(struct trace_parser *parser, const char __user *ubuf,
1166 	size_t cnt, loff_t *ppos)
1167 {
1168 	char ch;
1169 	size_t read = 0;
1170 	ssize_t ret;
1171 
1172 	if (!*ppos)
1173 		trace_parser_clear(parser);
1174 
1175 	ret = get_user(ch, ubuf++);
1176 	if (ret)
1177 		goto out;
1178 
1179 	read++;
1180 	cnt--;
1181 
1182 	/*
1183 	 * The parser is not finished with the last write,
1184 	 * continue reading the user input without skipping spaces.
1185 	 */
1186 	if (!parser->cont) {
1187 		/* skip white space */
1188 		while (cnt && isspace(ch)) {
1189 			ret = get_user(ch, ubuf++);
1190 			if (ret)
1191 				goto out;
1192 			read++;
1193 			cnt--;
1194 		}
1195 
1196 		/* only spaces were written */
1197 		if (isspace(ch)) {
1198 			*ppos += read;
1199 			ret = read;
1200 			goto out;
1201 		}
1202 
1203 		parser->idx = 0;
1204 	}
1205 
1206 	/* read the non-space input */
1207 	while (cnt && !isspace(ch)) {
1208 		if (parser->idx < parser->size - 1)
1209 			parser->buffer[parser->idx++] = ch;
1210 		else {
1211 			ret = -EINVAL;
1212 			goto out;
1213 		}
1214 		ret = get_user(ch, ubuf++);
1215 		if (ret)
1216 			goto out;
1217 		read++;
1218 		cnt--;
1219 	}
1220 
1221 	/* We either got finished input or we have to wait for another call. */
1222 	if (isspace(ch)) {
1223 		parser->buffer[parser->idx] = 0;
1224 		parser->cont = false;
1225 	} else if (parser->idx < parser->size - 1) {
1226 		parser->cont = true;
1227 		parser->buffer[parser->idx++] = ch;
1228 	} else {
1229 		ret = -EINVAL;
1230 		goto out;
1231 	}
1232 
1233 	*ppos += read;
1234 	ret = read;
1235 
1236 out:
1237 	return ret;
1238 }
1239 
1240 /* TODO add a seq_buf_to_buffer() */
1241 static ssize_t trace_seq_to_buffer(struct trace_seq *s, void *buf, size_t cnt)
1242 {
1243 	int len;
1244 
1245 	if (trace_seq_used(s) <= s->seq.readpos)
1246 		return -EBUSY;
1247 
1248 	len = trace_seq_used(s) - s->seq.readpos;
1249 	if (cnt > len)
1250 		cnt = len;
1251 	memcpy(buf, s->buffer + s->seq.readpos, cnt);
1252 
1253 	s->seq.readpos += cnt;
1254 	return cnt;
1255 }
1256 
1257 unsigned long __read_mostly	tracing_thresh;
1258 
1259 #ifdef CONFIG_TRACER_MAX_TRACE
1260 /*
1261  * Copy the new maximum trace into the separate maximum-trace
1262  * structure. (this way the maximum trace is permanently saved,
1263  * for later retrieval via /sys/kernel/debug/tracing/latency_trace)
1264  */
1265 static void
1266 __update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
1267 {
1268 	struct trace_buffer *trace_buf = &tr->trace_buffer;
1269 	struct trace_buffer *max_buf = &tr->max_buffer;
1270 	struct trace_array_cpu *data = per_cpu_ptr(trace_buf->data, cpu);
1271 	struct trace_array_cpu *max_data = per_cpu_ptr(max_buf->data, cpu);
1272 
1273 	max_buf->cpu = cpu;
1274 	max_buf->time_start = data->preempt_timestamp;
1275 
1276 	max_data->saved_latency = tr->max_latency;
1277 	max_data->critical_start = data->critical_start;
1278 	max_data->critical_end = data->critical_end;
1279 
1280 	memcpy(max_data->comm, tsk->comm, TASK_COMM_LEN);
1281 	max_data->pid = tsk->pid;
1282 	/*
1283 	 * If tsk == current, then use current_uid(), as that does not use
1284 	 * RCU. The irq tracer can be called out of RCU scope.
1285 	 */
1286 	if (tsk == current)
1287 		max_data->uid = current_uid();
1288 	else
1289 		max_data->uid = task_uid(tsk);
1290 
1291 	max_data->nice = tsk->static_prio - 20 - MAX_RT_PRIO;
1292 	max_data->policy = tsk->policy;
1293 	max_data->rt_priority = tsk->rt_priority;
1294 
1295 	/* record this tasks comm */
1296 	tracing_record_cmdline(tsk);
1297 }
1298 
1299 /**
1300  * update_max_tr - snapshot all trace buffers from global_trace to max_tr
1301  * @tr: tracer
1302  * @tsk: the task with the latency
1303  * @cpu: The cpu that initiated the trace.
1304  *
1305  * Flip the buffers between the @tr and the max_tr and record information
1306  * about which task was the cause of this latency.
1307  */
1308 void
1309 update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
1310 {
1311 	struct ring_buffer *buf;
1312 
1313 	if (tr->stop_count)
1314 		return;
1315 
1316 	WARN_ON_ONCE(!irqs_disabled());
1317 
1318 	if (!tr->allocated_snapshot) {
1319 		/* Only the nop tracer should hit this when disabling */
1320 		WARN_ON_ONCE(tr->current_trace != &nop_trace);
1321 		return;
1322 	}
1323 
1324 	arch_spin_lock(&tr->max_lock);
1325 
1326 	buf = tr->trace_buffer.buffer;
1327 	tr->trace_buffer.buffer = tr->max_buffer.buffer;
1328 	tr->max_buffer.buffer = buf;
1329 
1330 	__update_max_tr(tr, tsk, cpu);
1331 	arch_spin_unlock(&tr->max_lock);
1332 }
1333 
1334 /**
1335  * update_max_tr_single - only copy one trace over, and reset the rest
1336  * @tr - tracer
1337  * @tsk - task with the latency
1338  * @cpu - the cpu of the buffer to copy.
1339  *
1340  * Flip the trace of a single CPU buffer between the @tr and the max_tr.
1341  */
1342 void
1343 update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu)
1344 {
1345 	int ret;
1346 
1347 	if (tr->stop_count)
1348 		return;
1349 
1350 	WARN_ON_ONCE(!irqs_disabled());
1351 	if (!tr->allocated_snapshot) {
1352 		/* Only the nop tracer should hit this when disabling */
1353 		WARN_ON_ONCE(tr->current_trace != &nop_trace);
1354 		return;
1355 	}
1356 
1357 	arch_spin_lock(&tr->max_lock);
1358 
1359 	ret = ring_buffer_swap_cpu(tr->max_buffer.buffer, tr->trace_buffer.buffer, cpu);
1360 
1361 	if (ret == -EBUSY) {
1362 		/*
1363 		 * We failed to swap the buffer due to a commit taking
1364 		 * place on this CPU. We fail to record, but we reset
1365 		 * the max trace buffer (no one writes directly to it)
1366 		 * and flag that it failed.
1367 		 */
1368 		trace_array_printk_buf(tr->max_buffer.buffer, _THIS_IP_,
1369 			"Failed to swap buffers due to commit in progress\n");
1370 	}
1371 
1372 	WARN_ON_ONCE(ret && ret != -EAGAIN && ret != -EBUSY);
1373 
1374 	__update_max_tr(tr, tsk, cpu);
1375 	arch_spin_unlock(&tr->max_lock);
1376 }
1377 #endif /* CONFIG_TRACER_MAX_TRACE */
1378 
1379 static int wait_on_pipe(struct trace_iterator *iter, bool full)
1380 {
1381 	/* Iterators are static, they should be filled or empty */
1382 	if (trace_buffer_iter(iter, iter->cpu_file))
1383 		return 0;
1384 
1385 	return ring_buffer_wait(iter->trace_buffer->buffer, iter->cpu_file,
1386 				full);
1387 }
1388 
1389 #ifdef CONFIG_FTRACE_STARTUP_TEST
1390 static int run_tracer_selftest(struct tracer *type)
1391 {
1392 	struct trace_array *tr = &global_trace;
1393 	struct tracer *saved_tracer = tr->current_trace;
1394 	int ret;
1395 
1396 	if (!type->selftest || tracing_selftest_disabled)
1397 		return 0;
1398 
1399 	/*
1400 	 * Run a selftest on this tracer.
1401 	 * Here we reset the trace buffer, and set the current
1402 	 * tracer to be this tracer. The tracer can then run some
1403 	 * internal tracing to verify that everything is in order.
1404 	 * If we fail, we do not register this tracer.
1405 	 */
1406 	tracing_reset_online_cpus(&tr->trace_buffer);
1407 
1408 	tr->current_trace = type;
1409 
1410 #ifdef CONFIG_TRACER_MAX_TRACE
1411 	if (type->use_max_tr) {
1412 		/* If we expanded the buffers, make sure the max is expanded too */
1413 		if (ring_buffer_expanded)
1414 			ring_buffer_resize(tr->max_buffer.buffer, trace_buf_size,
1415 					   RING_BUFFER_ALL_CPUS);
1416 		tr->allocated_snapshot = true;
1417 	}
1418 #endif
1419 
1420 	/* the test is responsible for initializing and enabling */
1421 	pr_info("Testing tracer %s: ", type->name);
1422 	ret = type->selftest(type, tr);
1423 	/* the test is responsible for resetting too */
1424 	tr->current_trace = saved_tracer;
1425 	if (ret) {
1426 		printk(KERN_CONT "FAILED!\n");
1427 		/* Add the warning after printing 'FAILED' */
1428 		WARN_ON(1);
1429 		return -1;
1430 	}
1431 	/* Only reset on passing, to avoid touching corrupted buffers */
1432 	tracing_reset_online_cpus(&tr->trace_buffer);
1433 
1434 #ifdef CONFIG_TRACER_MAX_TRACE
1435 	if (type->use_max_tr) {
1436 		tr->allocated_snapshot = false;
1437 
1438 		/* Shrink the max buffer again */
1439 		if (ring_buffer_expanded)
1440 			ring_buffer_resize(tr->max_buffer.buffer, 1,
1441 					   RING_BUFFER_ALL_CPUS);
1442 	}
1443 #endif
1444 
1445 	printk(KERN_CONT "PASSED\n");
1446 	return 0;
1447 }
1448 #else
1449 static inline int run_tracer_selftest(struct tracer *type)
1450 {
1451 	return 0;
1452 }
1453 #endif /* CONFIG_FTRACE_STARTUP_TEST */
1454 
1455 static void add_tracer_options(struct trace_array *tr, struct tracer *t);
1456 
1457 static void __init apply_trace_boot_options(void);
1458 
1459 /**
1460  * register_tracer - register a tracer with the ftrace system.
1461  * @type - the plugin for the tracer
1462  *
1463  * Register a new plugin tracer.
1464  */
1465 int __init register_tracer(struct tracer *type)
1466 {
1467 	struct tracer *t;
1468 	int ret = 0;
1469 
1470 	if (!type->name) {
1471 		pr_info("Tracer must have a name\n");
1472 		return -1;
1473 	}
1474 
1475 	if (strlen(type->name) >= MAX_TRACER_SIZE) {
1476 		pr_info("Tracer has a name longer than %d\n", MAX_TRACER_SIZE);
1477 		return -1;
1478 	}
1479 
1480 	mutex_lock(&trace_types_lock);
1481 
1482 	tracing_selftest_running = true;
1483 
1484 	for (t = trace_types; t; t = t->next) {
1485 		if (strcmp(type->name, t->name) == 0) {
1486 			/* already found */
1487 			pr_info("Tracer %s already registered\n",
1488 				type->name);
1489 			ret = -1;
1490 			goto out;
1491 		}
1492 	}
1493 
1494 	if (!type->set_flag)
1495 		type->set_flag = &dummy_set_flag;
1496 	if (!type->flags) {
1497 		/*allocate a dummy tracer_flags*/
1498 		type->flags = kmalloc(sizeof(*type->flags), GFP_KERNEL);
1499 		if (!type->flags) {
1500 			ret = -ENOMEM;
1501 			goto out;
1502 		}
1503 		type->flags->val = 0;
1504 		type->flags->opts = dummy_tracer_opt;
1505 	} else
1506 		if (!type->flags->opts)
1507 			type->flags->opts = dummy_tracer_opt;
1508 
1509 	/* store the tracer for __set_tracer_option */
1510 	type->flags->trace = type;
1511 
1512 	ret = run_tracer_selftest(type);
1513 	if (ret < 0)
1514 		goto out;
1515 
1516 	type->next = trace_types;
1517 	trace_types = type;
1518 	add_tracer_options(&global_trace, type);
1519 
1520  out:
1521 	tracing_selftest_running = false;
1522 	mutex_unlock(&trace_types_lock);
1523 
1524 	if (ret || !default_bootup_tracer)
1525 		goto out_unlock;
1526 
1527 	if (strncmp(default_bootup_tracer, type->name, MAX_TRACER_SIZE))
1528 		goto out_unlock;
1529 
1530 	printk(KERN_INFO "Starting tracer '%s'\n", type->name);
1531 	/* Do we want this tracer to start on bootup? */
1532 	tracing_set_tracer(&global_trace, type->name);
1533 	default_bootup_tracer = NULL;
1534 
1535 	apply_trace_boot_options();
1536 
1537 	/* disable other selftests, since this will break it. */
1538 	tracing_selftest_disabled = true;
1539 #ifdef CONFIG_FTRACE_STARTUP_TEST
1540 	printk(KERN_INFO "Disabling FTRACE selftests due to running tracer '%s'\n",
1541 	       type->name);
1542 #endif
1543 
1544  out_unlock:
1545 	return ret;
1546 }
1547 
1548 void tracing_reset(struct trace_buffer *buf, int cpu)
1549 {
1550 	struct ring_buffer *buffer = buf->buffer;
1551 
1552 	if (!buffer)
1553 		return;
1554 
1555 	ring_buffer_record_disable(buffer);
1556 
1557 	/* Make sure all commits have finished */
1558 	synchronize_sched();
1559 	ring_buffer_reset_cpu(buffer, cpu);
1560 
1561 	ring_buffer_record_enable(buffer);
1562 }
1563 
1564 void tracing_reset_online_cpus(struct trace_buffer *buf)
1565 {
1566 	struct ring_buffer *buffer = buf->buffer;
1567 	int cpu;
1568 
1569 	if (!buffer)
1570 		return;
1571 
1572 	ring_buffer_record_disable(buffer);
1573 
1574 	/* Make sure all commits have finished */
1575 	synchronize_sched();
1576 
1577 	buf->time_start = buffer_ftrace_now(buf, buf->cpu);
1578 
1579 	for_each_online_cpu(cpu)
1580 		ring_buffer_reset_cpu(buffer, cpu);
1581 
1582 	ring_buffer_record_enable(buffer);
1583 }
1584 
1585 /* Must have trace_types_lock held */
1586 void tracing_reset_all_online_cpus(void)
1587 {
1588 	struct trace_array *tr;
1589 
1590 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
1591 		tracing_reset_online_cpus(&tr->trace_buffer);
1592 #ifdef CONFIG_TRACER_MAX_TRACE
1593 		tracing_reset_online_cpus(&tr->max_buffer);
1594 #endif
1595 	}
1596 }
1597 
1598 #define SAVED_CMDLINES_DEFAULT 128
1599 #define NO_CMDLINE_MAP UINT_MAX
1600 static arch_spinlock_t trace_cmdline_lock = __ARCH_SPIN_LOCK_UNLOCKED;
1601 struct saved_cmdlines_buffer {
1602 	unsigned map_pid_to_cmdline[PID_MAX_DEFAULT+1];
1603 	unsigned *map_cmdline_to_pid;
1604 	unsigned cmdline_num;
1605 	int cmdline_idx;
1606 	char *saved_cmdlines;
1607 };
1608 static struct saved_cmdlines_buffer *savedcmd;
1609 
1610 /* temporary disable recording */
1611 static atomic_t trace_record_cmdline_disabled __read_mostly;
1612 
1613 static inline char *get_saved_cmdlines(int idx)
1614 {
1615 	return &savedcmd->saved_cmdlines[idx * TASK_COMM_LEN];
1616 }
1617 
1618 static inline void set_cmdline(int idx, const char *cmdline)
1619 {
1620 	memcpy(get_saved_cmdlines(idx), cmdline, TASK_COMM_LEN);
1621 }
1622 
1623 static int allocate_cmdlines_buffer(unsigned int val,
1624 				    struct saved_cmdlines_buffer *s)
1625 {
1626 	s->map_cmdline_to_pid = kmalloc(val * sizeof(*s->map_cmdline_to_pid),
1627 					GFP_KERNEL);
1628 	if (!s->map_cmdline_to_pid)
1629 		return -ENOMEM;
1630 
1631 	s->saved_cmdlines = kmalloc(val * TASK_COMM_LEN, GFP_KERNEL);
1632 	if (!s->saved_cmdlines) {
1633 		kfree(s->map_cmdline_to_pid);
1634 		return -ENOMEM;
1635 	}
1636 
1637 	s->cmdline_idx = 0;
1638 	s->cmdline_num = val;
1639 	memset(&s->map_pid_to_cmdline, NO_CMDLINE_MAP,
1640 	       sizeof(s->map_pid_to_cmdline));
1641 	memset(s->map_cmdline_to_pid, NO_CMDLINE_MAP,
1642 	       val * sizeof(*s->map_cmdline_to_pid));
1643 
1644 	return 0;
1645 }
1646 
1647 static int trace_create_savedcmd(void)
1648 {
1649 	int ret;
1650 
1651 	savedcmd = kmalloc(sizeof(*savedcmd), GFP_KERNEL);
1652 	if (!savedcmd)
1653 		return -ENOMEM;
1654 
1655 	ret = allocate_cmdlines_buffer(SAVED_CMDLINES_DEFAULT, savedcmd);
1656 	if (ret < 0) {
1657 		kfree(savedcmd);
1658 		savedcmd = NULL;
1659 		return -ENOMEM;
1660 	}
1661 
1662 	return 0;
1663 }
1664 
1665 int is_tracing_stopped(void)
1666 {
1667 	return global_trace.stop_count;
1668 }
1669 
1670 /**
1671  * tracing_start - quick start of the tracer
1672  *
1673  * If tracing is enabled but was stopped by tracing_stop,
1674  * this will start the tracer back up.
1675  */
1676 void tracing_start(void)
1677 {
1678 	struct ring_buffer *buffer;
1679 	unsigned long flags;
1680 
1681 	if (tracing_disabled)
1682 		return;
1683 
1684 	raw_spin_lock_irqsave(&global_trace.start_lock, flags);
1685 	if (--global_trace.stop_count) {
1686 		if (global_trace.stop_count < 0) {
1687 			/* Someone screwed up their debugging */
1688 			WARN_ON_ONCE(1);
1689 			global_trace.stop_count = 0;
1690 		}
1691 		goto out;
1692 	}
1693 
1694 	/* Prevent the buffers from switching */
1695 	arch_spin_lock(&global_trace.max_lock);
1696 
1697 	buffer = global_trace.trace_buffer.buffer;
1698 	if (buffer)
1699 		ring_buffer_record_enable(buffer);
1700 
1701 #ifdef CONFIG_TRACER_MAX_TRACE
1702 	buffer = global_trace.max_buffer.buffer;
1703 	if (buffer)
1704 		ring_buffer_record_enable(buffer);
1705 #endif
1706 
1707 	arch_spin_unlock(&global_trace.max_lock);
1708 
1709  out:
1710 	raw_spin_unlock_irqrestore(&global_trace.start_lock, flags);
1711 }
1712 
1713 static void tracing_start_tr(struct trace_array *tr)
1714 {
1715 	struct ring_buffer *buffer;
1716 	unsigned long flags;
1717 
1718 	if (tracing_disabled)
1719 		return;
1720 
1721 	/* If global, we need to also start the max tracer */
1722 	if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
1723 		return tracing_start();
1724 
1725 	raw_spin_lock_irqsave(&tr->start_lock, flags);
1726 
1727 	if (--tr->stop_count) {
1728 		if (tr->stop_count < 0) {
1729 			/* Someone screwed up their debugging */
1730 			WARN_ON_ONCE(1);
1731 			tr->stop_count = 0;
1732 		}
1733 		goto out;
1734 	}
1735 
1736 	buffer = tr->trace_buffer.buffer;
1737 	if (buffer)
1738 		ring_buffer_record_enable(buffer);
1739 
1740  out:
1741 	raw_spin_unlock_irqrestore(&tr->start_lock, flags);
1742 }
1743 
1744 /**
1745  * tracing_stop - quick stop of the tracer
1746  *
1747  * Light weight way to stop tracing. Use in conjunction with
1748  * tracing_start.
1749  */
1750 void tracing_stop(void)
1751 {
1752 	struct ring_buffer *buffer;
1753 	unsigned long flags;
1754 
1755 	raw_spin_lock_irqsave(&global_trace.start_lock, flags);
1756 	if (global_trace.stop_count++)
1757 		goto out;
1758 
1759 	/* Prevent the buffers from switching */
1760 	arch_spin_lock(&global_trace.max_lock);
1761 
1762 	buffer = global_trace.trace_buffer.buffer;
1763 	if (buffer)
1764 		ring_buffer_record_disable(buffer);
1765 
1766 #ifdef CONFIG_TRACER_MAX_TRACE
1767 	buffer = global_trace.max_buffer.buffer;
1768 	if (buffer)
1769 		ring_buffer_record_disable(buffer);
1770 #endif
1771 
1772 	arch_spin_unlock(&global_trace.max_lock);
1773 
1774  out:
1775 	raw_spin_unlock_irqrestore(&global_trace.start_lock, flags);
1776 }
1777 
1778 static void tracing_stop_tr(struct trace_array *tr)
1779 {
1780 	struct ring_buffer *buffer;
1781 	unsigned long flags;
1782 
1783 	/* If global, we need to also stop the max tracer */
1784 	if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
1785 		return tracing_stop();
1786 
1787 	raw_spin_lock_irqsave(&tr->start_lock, flags);
1788 	if (tr->stop_count++)
1789 		goto out;
1790 
1791 	buffer = tr->trace_buffer.buffer;
1792 	if (buffer)
1793 		ring_buffer_record_disable(buffer);
1794 
1795  out:
1796 	raw_spin_unlock_irqrestore(&tr->start_lock, flags);
1797 }
1798 
1799 void trace_stop_cmdline_recording(void);
1800 
1801 static int trace_save_cmdline(struct task_struct *tsk)
1802 {
1803 	unsigned pid, idx;
1804 
1805 	if (!tsk->pid || unlikely(tsk->pid > PID_MAX_DEFAULT))
1806 		return 0;
1807 
1808 	/*
1809 	 * It's not the end of the world if we don't get
1810 	 * the lock, but we also don't want to spin
1811 	 * nor do we want to disable interrupts,
1812 	 * so if we miss here, then better luck next time.
1813 	 */
1814 	if (!arch_spin_trylock(&trace_cmdline_lock))
1815 		return 0;
1816 
1817 	idx = savedcmd->map_pid_to_cmdline[tsk->pid];
1818 	if (idx == NO_CMDLINE_MAP) {
1819 		idx = (savedcmd->cmdline_idx + 1) % savedcmd->cmdline_num;
1820 
1821 		/*
1822 		 * Check whether the cmdline buffer at idx has a pid
1823 		 * mapped. We are going to overwrite that entry so we
1824 		 * need to clear the map_pid_to_cmdline. Otherwise we
1825 		 * would read the new comm for the old pid.
1826 		 */
1827 		pid = savedcmd->map_cmdline_to_pid[idx];
1828 		if (pid != NO_CMDLINE_MAP)
1829 			savedcmd->map_pid_to_cmdline[pid] = NO_CMDLINE_MAP;
1830 
1831 		savedcmd->map_cmdline_to_pid[idx] = tsk->pid;
1832 		savedcmd->map_pid_to_cmdline[tsk->pid] = idx;
1833 
1834 		savedcmd->cmdline_idx = idx;
1835 	}
1836 
1837 	set_cmdline(idx, tsk->comm);
1838 
1839 	arch_spin_unlock(&trace_cmdline_lock);
1840 
1841 	return 1;
1842 }
1843 
1844 static void __trace_find_cmdline(int pid, char comm[])
1845 {
1846 	unsigned map;
1847 
1848 	if (!pid) {
1849 		strcpy(comm, "<idle>");
1850 		return;
1851 	}
1852 
1853 	if (WARN_ON_ONCE(pid < 0)) {
1854 		strcpy(comm, "<XXX>");
1855 		return;
1856 	}
1857 
1858 	if (pid > PID_MAX_DEFAULT) {
1859 		strcpy(comm, "<...>");
1860 		return;
1861 	}
1862 
1863 	map = savedcmd->map_pid_to_cmdline[pid];
1864 	if (map != NO_CMDLINE_MAP)
1865 		strcpy(comm, get_saved_cmdlines(map));
1866 	else
1867 		strcpy(comm, "<...>");
1868 }
1869 
1870 void trace_find_cmdline(int pid, char comm[])
1871 {
1872 	preempt_disable();
1873 	arch_spin_lock(&trace_cmdline_lock);
1874 
1875 	__trace_find_cmdline(pid, comm);
1876 
1877 	arch_spin_unlock(&trace_cmdline_lock);
1878 	preempt_enable();
1879 }
1880 
1881 void tracing_record_cmdline(struct task_struct *tsk)
1882 {
1883 	if (atomic_read(&trace_record_cmdline_disabled) || !tracing_is_on())
1884 		return;
1885 
1886 	if (!__this_cpu_read(trace_cmdline_save))
1887 		return;
1888 
1889 	if (trace_save_cmdline(tsk))
1890 		__this_cpu_write(trace_cmdline_save, false);
1891 }
1892 
1893 void
1894 tracing_generic_entry_update(struct trace_entry *entry, unsigned long flags,
1895 			     int pc)
1896 {
1897 	struct task_struct *tsk = current;
1898 
1899 	entry->preempt_count		= pc & 0xff;
1900 	entry->pid			= (tsk) ? tsk->pid : 0;
1901 	entry->flags =
1902 #ifdef CONFIG_TRACE_IRQFLAGS_SUPPORT
1903 		(irqs_disabled_flags(flags) ? TRACE_FLAG_IRQS_OFF : 0) |
1904 #else
1905 		TRACE_FLAG_IRQS_NOSUPPORT |
1906 #endif
1907 		((pc & NMI_MASK    ) ? TRACE_FLAG_NMI     : 0) |
1908 		((pc & HARDIRQ_MASK) ? TRACE_FLAG_HARDIRQ : 0) |
1909 		((pc & SOFTIRQ_MASK) ? TRACE_FLAG_SOFTIRQ : 0) |
1910 		(tif_need_resched() ? TRACE_FLAG_NEED_RESCHED : 0) |
1911 		(test_preempt_need_resched() ? TRACE_FLAG_PREEMPT_RESCHED : 0);
1912 }
1913 EXPORT_SYMBOL_GPL(tracing_generic_entry_update);
1914 
1915 static __always_inline void
1916 trace_event_setup(struct ring_buffer_event *event,
1917 		  int type, unsigned long flags, int pc)
1918 {
1919 	struct trace_entry *ent = ring_buffer_event_data(event);
1920 
1921 	tracing_generic_entry_update(ent, flags, pc);
1922 	ent->type = type;
1923 }
1924 
1925 struct ring_buffer_event *
1926 trace_buffer_lock_reserve(struct ring_buffer *buffer,
1927 			  int type,
1928 			  unsigned long len,
1929 			  unsigned long flags, int pc)
1930 {
1931 	struct ring_buffer_event *event;
1932 
1933 	event = ring_buffer_lock_reserve(buffer, len);
1934 	if (event != NULL)
1935 		trace_event_setup(event, type, flags, pc);
1936 
1937 	return event;
1938 }
1939 
1940 DEFINE_PER_CPU(struct ring_buffer_event *, trace_buffered_event);
1941 DEFINE_PER_CPU(int, trace_buffered_event_cnt);
1942 static int trace_buffered_event_ref;
1943 
1944 /**
1945  * trace_buffered_event_enable - enable buffering events
1946  *
1947  * When events are being filtered, it is quicker to use a temporary
1948  * buffer to write the event data into if there's a likely chance
1949  * that it will not be committed. The discard of the ring buffer
1950  * is not as fast as committing, and is much slower than copying
1951  * a commit.
1952  *
1953  * When an event is to be filtered, allocate per cpu buffers to
1954  * write the event data into, and if the event is filtered and discarded
1955  * it is simply dropped, otherwise, the entire data is to be committed
1956  * in one shot.
1957  */
1958 void trace_buffered_event_enable(void)
1959 {
1960 	struct ring_buffer_event *event;
1961 	struct page *page;
1962 	int cpu;
1963 
1964 	WARN_ON_ONCE(!mutex_is_locked(&event_mutex));
1965 
1966 	if (trace_buffered_event_ref++)
1967 		return;
1968 
1969 	for_each_tracing_cpu(cpu) {
1970 		page = alloc_pages_node(cpu_to_node(cpu),
1971 					GFP_KERNEL | __GFP_NORETRY, 0);
1972 		if (!page)
1973 			goto failed;
1974 
1975 		event = page_address(page);
1976 		memset(event, 0, sizeof(*event));
1977 
1978 		per_cpu(trace_buffered_event, cpu) = event;
1979 
1980 		preempt_disable();
1981 		if (cpu == smp_processor_id() &&
1982 		    this_cpu_read(trace_buffered_event) !=
1983 		    per_cpu(trace_buffered_event, cpu))
1984 			WARN_ON_ONCE(1);
1985 		preempt_enable();
1986 	}
1987 
1988 	return;
1989  failed:
1990 	trace_buffered_event_disable();
1991 }
1992 
1993 static void enable_trace_buffered_event(void *data)
1994 {
1995 	/* Probably not needed, but do it anyway */
1996 	smp_rmb();
1997 	this_cpu_dec(trace_buffered_event_cnt);
1998 }
1999 
2000 static void disable_trace_buffered_event(void *data)
2001 {
2002 	this_cpu_inc(trace_buffered_event_cnt);
2003 }
2004 
2005 /**
2006  * trace_buffered_event_disable - disable buffering events
2007  *
2008  * When a filter is removed, it is faster to not use the buffered
2009  * events, and to commit directly into the ring buffer. Free up
2010  * the temp buffers when there are no more users. This requires
2011  * special synchronization with current events.
2012  */
2013 void trace_buffered_event_disable(void)
2014 {
2015 	int cpu;
2016 
2017 	WARN_ON_ONCE(!mutex_is_locked(&event_mutex));
2018 
2019 	if (WARN_ON_ONCE(!trace_buffered_event_ref))
2020 		return;
2021 
2022 	if (--trace_buffered_event_ref)
2023 		return;
2024 
2025 	preempt_disable();
2026 	/* For each CPU, set the buffer as used. */
2027 	smp_call_function_many(tracing_buffer_mask,
2028 			       disable_trace_buffered_event, NULL, 1);
2029 	preempt_enable();
2030 
2031 	/* Wait for all current users to finish */
2032 	synchronize_sched();
2033 
2034 	for_each_tracing_cpu(cpu) {
2035 		free_page((unsigned long)per_cpu(trace_buffered_event, cpu));
2036 		per_cpu(trace_buffered_event, cpu) = NULL;
2037 	}
2038 	/*
2039 	 * Make sure trace_buffered_event is NULL before clearing
2040 	 * trace_buffered_event_cnt.
2041 	 */
2042 	smp_wmb();
2043 
2044 	preempt_disable();
2045 	/* Do the work on each cpu */
2046 	smp_call_function_many(tracing_buffer_mask,
2047 			       enable_trace_buffered_event, NULL, 1);
2048 	preempt_enable();
2049 }
2050 
2051 void
2052 __buffer_unlock_commit(struct ring_buffer *buffer, struct ring_buffer_event *event)
2053 {
2054 	__this_cpu_write(trace_cmdline_save, true);
2055 
2056 	/* If this is the temp buffer, we need to commit fully */
2057 	if (this_cpu_read(trace_buffered_event) == event) {
2058 		/* Length is in event->array[0] */
2059 		ring_buffer_write(buffer, event->array[0], &event->array[1]);
2060 		/* Release the temp buffer */
2061 		this_cpu_dec(trace_buffered_event_cnt);
2062 	} else
2063 		ring_buffer_unlock_commit(buffer, event);
2064 }
2065 
2066 static struct ring_buffer *temp_buffer;
2067 
2068 struct ring_buffer_event *
2069 trace_event_buffer_lock_reserve(struct ring_buffer **current_rb,
2070 			  struct trace_event_file *trace_file,
2071 			  int type, unsigned long len,
2072 			  unsigned long flags, int pc)
2073 {
2074 	struct ring_buffer_event *entry;
2075 	int val;
2076 
2077 	*current_rb = trace_file->tr->trace_buffer.buffer;
2078 
2079 	if ((trace_file->flags &
2080 	     (EVENT_FILE_FL_SOFT_DISABLED | EVENT_FILE_FL_FILTERED)) &&
2081 	    (entry = this_cpu_read(trace_buffered_event))) {
2082 		/* Try to use the per cpu buffer first */
2083 		val = this_cpu_inc_return(trace_buffered_event_cnt);
2084 		if (val == 1) {
2085 			trace_event_setup(entry, type, flags, pc);
2086 			entry->array[0] = len;
2087 			return entry;
2088 		}
2089 		this_cpu_dec(trace_buffered_event_cnt);
2090 	}
2091 
2092 	entry = trace_buffer_lock_reserve(*current_rb,
2093 					 type, len, flags, pc);
2094 	/*
2095 	 * If tracing is off, but we have triggers enabled
2096 	 * we still need to look at the event data. Use the temp_buffer
2097 	 * to store the trace event for the tigger to use. It's recusive
2098 	 * safe and will not be recorded anywhere.
2099 	 */
2100 	if (!entry && trace_file->flags & EVENT_FILE_FL_TRIGGER_COND) {
2101 		*current_rb = temp_buffer;
2102 		entry = trace_buffer_lock_reserve(*current_rb,
2103 						  type, len, flags, pc);
2104 	}
2105 	return entry;
2106 }
2107 EXPORT_SYMBOL_GPL(trace_event_buffer_lock_reserve);
2108 
2109 void trace_buffer_unlock_commit_regs(struct trace_array *tr,
2110 				     struct ring_buffer *buffer,
2111 				     struct ring_buffer_event *event,
2112 				     unsigned long flags, int pc,
2113 				     struct pt_regs *regs)
2114 {
2115 	__buffer_unlock_commit(buffer, event);
2116 
2117 	/*
2118 	 * If regs is not set, then skip the following callers:
2119 	 *   trace_buffer_unlock_commit_regs
2120 	 *   event_trigger_unlock_commit
2121 	 *   trace_event_buffer_commit
2122 	 *   trace_event_raw_event_sched_switch
2123 	 * Note, we can still get here via blktrace, wakeup tracer
2124 	 * and mmiotrace, but that's ok if they lose a function or
2125 	 * two. They are that meaningful.
2126 	 */
2127 	ftrace_trace_stack(tr, buffer, flags, regs ? 0 : 4, pc, regs);
2128 	ftrace_trace_userstack(buffer, flags, pc);
2129 }
2130 
2131 void
2132 trace_function(struct trace_array *tr,
2133 	       unsigned long ip, unsigned long parent_ip, unsigned long flags,
2134 	       int pc)
2135 {
2136 	struct trace_event_call *call = &event_function;
2137 	struct ring_buffer *buffer = tr->trace_buffer.buffer;
2138 	struct ring_buffer_event *event;
2139 	struct ftrace_entry *entry;
2140 
2141 	event = trace_buffer_lock_reserve(buffer, TRACE_FN, sizeof(*entry),
2142 					  flags, pc);
2143 	if (!event)
2144 		return;
2145 	entry	= ring_buffer_event_data(event);
2146 	entry->ip			= ip;
2147 	entry->parent_ip		= parent_ip;
2148 
2149 	if (!call_filter_check_discard(call, entry, buffer, event))
2150 		__buffer_unlock_commit(buffer, event);
2151 }
2152 
2153 #ifdef CONFIG_STACKTRACE
2154 
2155 #define FTRACE_STACK_MAX_ENTRIES (PAGE_SIZE / sizeof(unsigned long))
2156 struct ftrace_stack {
2157 	unsigned long		calls[FTRACE_STACK_MAX_ENTRIES];
2158 };
2159 
2160 static DEFINE_PER_CPU(struct ftrace_stack, ftrace_stack);
2161 static DEFINE_PER_CPU(int, ftrace_stack_reserve);
2162 
2163 static void __ftrace_trace_stack(struct ring_buffer *buffer,
2164 				 unsigned long flags,
2165 				 int skip, int pc, struct pt_regs *regs)
2166 {
2167 	struct trace_event_call *call = &event_kernel_stack;
2168 	struct ring_buffer_event *event;
2169 	struct stack_entry *entry;
2170 	struct stack_trace trace;
2171 	int use_stack;
2172 	int size = FTRACE_STACK_ENTRIES;
2173 
2174 	trace.nr_entries	= 0;
2175 	trace.skip		= skip;
2176 
2177 	/*
2178 	 * Add two, for this function and the call to save_stack_trace()
2179 	 * If regs is set, then these functions will not be in the way.
2180 	 */
2181 	if (!regs)
2182 		trace.skip += 2;
2183 
2184 	/*
2185 	 * Since events can happen in NMIs there's no safe way to
2186 	 * use the per cpu ftrace_stacks. We reserve it and if an interrupt
2187 	 * or NMI comes in, it will just have to use the default
2188 	 * FTRACE_STACK_SIZE.
2189 	 */
2190 	preempt_disable_notrace();
2191 
2192 	use_stack = __this_cpu_inc_return(ftrace_stack_reserve);
2193 	/*
2194 	 * We don't need any atomic variables, just a barrier.
2195 	 * If an interrupt comes in, we don't care, because it would
2196 	 * have exited and put the counter back to what we want.
2197 	 * We just need a barrier to keep gcc from moving things
2198 	 * around.
2199 	 */
2200 	barrier();
2201 	if (use_stack == 1) {
2202 		trace.entries		= this_cpu_ptr(ftrace_stack.calls);
2203 		trace.max_entries	= FTRACE_STACK_MAX_ENTRIES;
2204 
2205 		if (regs)
2206 			save_stack_trace_regs(regs, &trace);
2207 		else
2208 			save_stack_trace(&trace);
2209 
2210 		if (trace.nr_entries > size)
2211 			size = trace.nr_entries;
2212 	} else
2213 		/* From now on, use_stack is a boolean */
2214 		use_stack = 0;
2215 
2216 	size *= sizeof(unsigned long);
2217 
2218 	event = trace_buffer_lock_reserve(buffer, TRACE_STACK,
2219 					  sizeof(*entry) + size, flags, pc);
2220 	if (!event)
2221 		goto out;
2222 	entry = ring_buffer_event_data(event);
2223 
2224 	memset(&entry->caller, 0, size);
2225 
2226 	if (use_stack)
2227 		memcpy(&entry->caller, trace.entries,
2228 		       trace.nr_entries * sizeof(unsigned long));
2229 	else {
2230 		trace.max_entries	= FTRACE_STACK_ENTRIES;
2231 		trace.entries		= entry->caller;
2232 		if (regs)
2233 			save_stack_trace_regs(regs, &trace);
2234 		else
2235 			save_stack_trace(&trace);
2236 	}
2237 
2238 	entry->size = trace.nr_entries;
2239 
2240 	if (!call_filter_check_discard(call, entry, buffer, event))
2241 		__buffer_unlock_commit(buffer, event);
2242 
2243  out:
2244 	/* Again, don't let gcc optimize things here */
2245 	barrier();
2246 	__this_cpu_dec(ftrace_stack_reserve);
2247 	preempt_enable_notrace();
2248 
2249 }
2250 
2251 static inline void ftrace_trace_stack(struct trace_array *tr,
2252 				      struct ring_buffer *buffer,
2253 				      unsigned long flags,
2254 				      int skip, int pc, struct pt_regs *regs)
2255 {
2256 	if (!(tr->trace_flags & TRACE_ITER_STACKTRACE))
2257 		return;
2258 
2259 	__ftrace_trace_stack(buffer, flags, skip, pc, regs);
2260 }
2261 
2262 void __trace_stack(struct trace_array *tr, unsigned long flags, int skip,
2263 		   int pc)
2264 {
2265 	__ftrace_trace_stack(tr->trace_buffer.buffer, flags, skip, pc, NULL);
2266 }
2267 
2268 /**
2269  * trace_dump_stack - record a stack back trace in the trace buffer
2270  * @skip: Number of functions to skip (helper handlers)
2271  */
2272 void trace_dump_stack(int skip)
2273 {
2274 	unsigned long flags;
2275 
2276 	if (tracing_disabled || tracing_selftest_running)
2277 		return;
2278 
2279 	local_save_flags(flags);
2280 
2281 	/*
2282 	 * Skip 3 more, seems to get us at the caller of
2283 	 * this function.
2284 	 */
2285 	skip += 3;
2286 	__ftrace_trace_stack(global_trace.trace_buffer.buffer,
2287 			     flags, skip, preempt_count(), NULL);
2288 }
2289 
2290 static DEFINE_PER_CPU(int, user_stack_count);
2291 
2292 void
2293 ftrace_trace_userstack(struct ring_buffer *buffer, unsigned long flags, int pc)
2294 {
2295 	struct trace_event_call *call = &event_user_stack;
2296 	struct ring_buffer_event *event;
2297 	struct userstack_entry *entry;
2298 	struct stack_trace trace;
2299 
2300 	if (!(global_trace.trace_flags & TRACE_ITER_USERSTACKTRACE))
2301 		return;
2302 
2303 	/*
2304 	 * NMIs can not handle page faults, even with fix ups.
2305 	 * The save user stack can (and often does) fault.
2306 	 */
2307 	if (unlikely(in_nmi()))
2308 		return;
2309 
2310 	/*
2311 	 * prevent recursion, since the user stack tracing may
2312 	 * trigger other kernel events.
2313 	 */
2314 	preempt_disable();
2315 	if (__this_cpu_read(user_stack_count))
2316 		goto out;
2317 
2318 	__this_cpu_inc(user_stack_count);
2319 
2320 	event = trace_buffer_lock_reserve(buffer, TRACE_USER_STACK,
2321 					  sizeof(*entry), flags, pc);
2322 	if (!event)
2323 		goto out_drop_count;
2324 	entry	= ring_buffer_event_data(event);
2325 
2326 	entry->tgid		= current->tgid;
2327 	memset(&entry->caller, 0, sizeof(entry->caller));
2328 
2329 	trace.nr_entries	= 0;
2330 	trace.max_entries	= FTRACE_STACK_ENTRIES;
2331 	trace.skip		= 0;
2332 	trace.entries		= entry->caller;
2333 
2334 	save_stack_trace_user(&trace);
2335 	if (!call_filter_check_discard(call, entry, buffer, event))
2336 		__buffer_unlock_commit(buffer, event);
2337 
2338  out_drop_count:
2339 	__this_cpu_dec(user_stack_count);
2340  out:
2341 	preempt_enable();
2342 }
2343 
2344 #ifdef UNUSED
2345 static void __trace_userstack(struct trace_array *tr, unsigned long flags)
2346 {
2347 	ftrace_trace_userstack(tr, flags, preempt_count());
2348 }
2349 #endif /* UNUSED */
2350 
2351 #endif /* CONFIG_STACKTRACE */
2352 
2353 /* created for use with alloc_percpu */
2354 struct trace_buffer_struct {
2355 	int nesting;
2356 	char buffer[4][TRACE_BUF_SIZE];
2357 };
2358 
2359 static struct trace_buffer_struct *trace_percpu_buffer;
2360 
2361 /*
2362  * Thise allows for lockless recording.  If we're nested too deeply, then
2363  * this returns NULL.
2364  */
2365 static char *get_trace_buf(void)
2366 {
2367 	struct trace_buffer_struct *buffer = this_cpu_ptr(trace_percpu_buffer);
2368 
2369 	if (!buffer || buffer->nesting >= 4)
2370 		return NULL;
2371 
2372 	return &buffer->buffer[buffer->nesting++][0];
2373 }
2374 
2375 static void put_trace_buf(void)
2376 {
2377 	this_cpu_dec(trace_percpu_buffer->nesting);
2378 }
2379 
2380 static int alloc_percpu_trace_buffer(void)
2381 {
2382 	struct trace_buffer_struct *buffers;
2383 
2384 	buffers = alloc_percpu(struct trace_buffer_struct);
2385 	if (WARN(!buffers, "Could not allocate percpu trace_printk buffer"))
2386 		return -ENOMEM;
2387 
2388 	trace_percpu_buffer = buffers;
2389 	return 0;
2390 }
2391 
2392 static int buffers_allocated;
2393 
2394 void trace_printk_init_buffers(void)
2395 {
2396 	if (buffers_allocated)
2397 		return;
2398 
2399 	if (alloc_percpu_trace_buffer())
2400 		return;
2401 
2402 	/* trace_printk() is for debug use only. Don't use it in production. */
2403 
2404 	pr_warn("\n");
2405 	pr_warn("**********************************************************\n");
2406 	pr_warn("**   NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE   **\n");
2407 	pr_warn("**                                                      **\n");
2408 	pr_warn("** trace_printk() being used. Allocating extra memory.  **\n");
2409 	pr_warn("**                                                      **\n");
2410 	pr_warn("** This means that this is a DEBUG kernel and it is     **\n");
2411 	pr_warn("** unsafe for production use.                           **\n");
2412 	pr_warn("**                                                      **\n");
2413 	pr_warn("** If you see this message and you are not debugging    **\n");
2414 	pr_warn("** the kernel, report this immediately to your vendor!  **\n");
2415 	pr_warn("**                                                      **\n");
2416 	pr_warn("**   NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE   **\n");
2417 	pr_warn("**********************************************************\n");
2418 
2419 	/* Expand the buffers to set size */
2420 	tracing_update_buffers();
2421 
2422 	buffers_allocated = 1;
2423 
2424 	/*
2425 	 * trace_printk_init_buffers() can be called by modules.
2426 	 * If that happens, then we need to start cmdline recording
2427 	 * directly here. If the global_trace.buffer is already
2428 	 * allocated here, then this was called by module code.
2429 	 */
2430 	if (global_trace.trace_buffer.buffer)
2431 		tracing_start_cmdline_record();
2432 }
2433 
2434 void trace_printk_start_comm(void)
2435 {
2436 	/* Start tracing comms if trace printk is set */
2437 	if (!buffers_allocated)
2438 		return;
2439 	tracing_start_cmdline_record();
2440 }
2441 
2442 static void trace_printk_start_stop_comm(int enabled)
2443 {
2444 	if (!buffers_allocated)
2445 		return;
2446 
2447 	if (enabled)
2448 		tracing_start_cmdline_record();
2449 	else
2450 		tracing_stop_cmdline_record();
2451 }
2452 
2453 /**
2454  * trace_vbprintk - write binary msg to tracing buffer
2455  *
2456  */
2457 int trace_vbprintk(unsigned long ip, const char *fmt, va_list args)
2458 {
2459 	struct trace_event_call *call = &event_bprint;
2460 	struct ring_buffer_event *event;
2461 	struct ring_buffer *buffer;
2462 	struct trace_array *tr = &global_trace;
2463 	struct bprint_entry *entry;
2464 	unsigned long flags;
2465 	char *tbuffer;
2466 	int len = 0, size, pc;
2467 
2468 	if (unlikely(tracing_selftest_running || tracing_disabled))
2469 		return 0;
2470 
2471 	/* Don't pollute graph traces with trace_vprintk internals */
2472 	pause_graph_tracing();
2473 
2474 	pc = preempt_count();
2475 	preempt_disable_notrace();
2476 
2477 	tbuffer = get_trace_buf();
2478 	if (!tbuffer) {
2479 		len = 0;
2480 		goto out_nobuffer;
2481 	}
2482 
2483 	len = vbin_printf((u32 *)tbuffer, TRACE_BUF_SIZE/sizeof(int), fmt, args);
2484 
2485 	if (len > TRACE_BUF_SIZE/sizeof(int) || len < 0)
2486 		goto out;
2487 
2488 	local_save_flags(flags);
2489 	size = sizeof(*entry) + sizeof(u32) * len;
2490 	buffer = tr->trace_buffer.buffer;
2491 	event = trace_buffer_lock_reserve(buffer, TRACE_BPRINT, size,
2492 					  flags, pc);
2493 	if (!event)
2494 		goto out;
2495 	entry = ring_buffer_event_data(event);
2496 	entry->ip			= ip;
2497 	entry->fmt			= fmt;
2498 
2499 	memcpy(entry->buf, tbuffer, sizeof(u32) * len);
2500 	if (!call_filter_check_discard(call, entry, buffer, event)) {
2501 		__buffer_unlock_commit(buffer, event);
2502 		ftrace_trace_stack(tr, buffer, flags, 6, pc, NULL);
2503 	}
2504 
2505 out:
2506 	put_trace_buf();
2507 
2508 out_nobuffer:
2509 	preempt_enable_notrace();
2510 	unpause_graph_tracing();
2511 
2512 	return len;
2513 }
2514 EXPORT_SYMBOL_GPL(trace_vbprintk);
2515 
2516 static int
2517 __trace_array_vprintk(struct ring_buffer *buffer,
2518 		      unsigned long ip, const char *fmt, va_list args)
2519 {
2520 	struct trace_event_call *call = &event_print;
2521 	struct ring_buffer_event *event;
2522 	int len = 0, size, pc;
2523 	struct print_entry *entry;
2524 	unsigned long flags;
2525 	char *tbuffer;
2526 
2527 	if (tracing_disabled || tracing_selftest_running)
2528 		return 0;
2529 
2530 	/* Don't pollute graph traces with trace_vprintk internals */
2531 	pause_graph_tracing();
2532 
2533 	pc = preempt_count();
2534 	preempt_disable_notrace();
2535 
2536 
2537 	tbuffer = get_trace_buf();
2538 	if (!tbuffer) {
2539 		len = 0;
2540 		goto out_nobuffer;
2541 	}
2542 
2543 	len = vscnprintf(tbuffer, TRACE_BUF_SIZE, fmt, args);
2544 
2545 	local_save_flags(flags);
2546 	size = sizeof(*entry) + len + 1;
2547 	event = trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
2548 					  flags, pc);
2549 	if (!event)
2550 		goto out;
2551 	entry = ring_buffer_event_data(event);
2552 	entry->ip = ip;
2553 
2554 	memcpy(&entry->buf, tbuffer, len + 1);
2555 	if (!call_filter_check_discard(call, entry, buffer, event)) {
2556 		__buffer_unlock_commit(buffer, event);
2557 		ftrace_trace_stack(&global_trace, buffer, flags, 6, pc, NULL);
2558 	}
2559 
2560 out:
2561 	put_trace_buf();
2562 
2563 out_nobuffer:
2564 	preempt_enable_notrace();
2565 	unpause_graph_tracing();
2566 
2567 	return len;
2568 }
2569 
2570 int trace_array_vprintk(struct trace_array *tr,
2571 			unsigned long ip, const char *fmt, va_list args)
2572 {
2573 	return __trace_array_vprintk(tr->trace_buffer.buffer, ip, fmt, args);
2574 }
2575 
2576 int trace_array_printk(struct trace_array *tr,
2577 		       unsigned long ip, const char *fmt, ...)
2578 {
2579 	int ret;
2580 	va_list ap;
2581 
2582 	if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
2583 		return 0;
2584 
2585 	va_start(ap, fmt);
2586 	ret = trace_array_vprintk(tr, ip, fmt, ap);
2587 	va_end(ap);
2588 	return ret;
2589 }
2590 
2591 int trace_array_printk_buf(struct ring_buffer *buffer,
2592 			   unsigned long ip, const char *fmt, ...)
2593 {
2594 	int ret;
2595 	va_list ap;
2596 
2597 	if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
2598 		return 0;
2599 
2600 	va_start(ap, fmt);
2601 	ret = __trace_array_vprintk(buffer, ip, fmt, ap);
2602 	va_end(ap);
2603 	return ret;
2604 }
2605 
2606 int trace_vprintk(unsigned long ip, const char *fmt, va_list args)
2607 {
2608 	return trace_array_vprintk(&global_trace, ip, fmt, args);
2609 }
2610 EXPORT_SYMBOL_GPL(trace_vprintk);
2611 
2612 static void trace_iterator_increment(struct trace_iterator *iter)
2613 {
2614 	struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, iter->cpu);
2615 
2616 	iter->idx++;
2617 	if (buf_iter)
2618 		ring_buffer_read(buf_iter, NULL);
2619 }
2620 
2621 static struct trace_entry *
2622 peek_next_entry(struct trace_iterator *iter, int cpu, u64 *ts,
2623 		unsigned long *lost_events)
2624 {
2625 	struct ring_buffer_event *event;
2626 	struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, cpu);
2627 
2628 	if (buf_iter)
2629 		event = ring_buffer_iter_peek(buf_iter, ts);
2630 	else
2631 		event = ring_buffer_peek(iter->trace_buffer->buffer, cpu, ts,
2632 					 lost_events);
2633 
2634 	if (event) {
2635 		iter->ent_size = ring_buffer_event_length(event);
2636 		return ring_buffer_event_data(event);
2637 	}
2638 	iter->ent_size = 0;
2639 	return NULL;
2640 }
2641 
2642 static struct trace_entry *
2643 __find_next_entry(struct trace_iterator *iter, int *ent_cpu,
2644 		  unsigned long *missing_events, u64 *ent_ts)
2645 {
2646 	struct ring_buffer *buffer = iter->trace_buffer->buffer;
2647 	struct trace_entry *ent, *next = NULL;
2648 	unsigned long lost_events = 0, next_lost = 0;
2649 	int cpu_file = iter->cpu_file;
2650 	u64 next_ts = 0, ts;
2651 	int next_cpu = -1;
2652 	int next_size = 0;
2653 	int cpu;
2654 
2655 	/*
2656 	 * If we are in a per_cpu trace file, don't bother by iterating over
2657 	 * all cpu and peek directly.
2658 	 */
2659 	if (cpu_file > RING_BUFFER_ALL_CPUS) {
2660 		if (ring_buffer_empty_cpu(buffer, cpu_file))
2661 			return NULL;
2662 		ent = peek_next_entry(iter, cpu_file, ent_ts, missing_events);
2663 		if (ent_cpu)
2664 			*ent_cpu = cpu_file;
2665 
2666 		return ent;
2667 	}
2668 
2669 	for_each_tracing_cpu(cpu) {
2670 
2671 		if (ring_buffer_empty_cpu(buffer, cpu))
2672 			continue;
2673 
2674 		ent = peek_next_entry(iter, cpu, &ts, &lost_events);
2675 
2676 		/*
2677 		 * Pick the entry with the smallest timestamp:
2678 		 */
2679 		if (ent && (!next || ts < next_ts)) {
2680 			next = ent;
2681 			next_cpu = cpu;
2682 			next_ts = ts;
2683 			next_lost = lost_events;
2684 			next_size = iter->ent_size;
2685 		}
2686 	}
2687 
2688 	iter->ent_size = next_size;
2689 
2690 	if (ent_cpu)
2691 		*ent_cpu = next_cpu;
2692 
2693 	if (ent_ts)
2694 		*ent_ts = next_ts;
2695 
2696 	if (missing_events)
2697 		*missing_events = next_lost;
2698 
2699 	return next;
2700 }
2701 
2702 /* Find the next real entry, without updating the iterator itself */
2703 struct trace_entry *trace_find_next_entry(struct trace_iterator *iter,
2704 					  int *ent_cpu, u64 *ent_ts)
2705 {
2706 	return __find_next_entry(iter, ent_cpu, NULL, ent_ts);
2707 }
2708 
2709 /* Find the next real entry, and increment the iterator to the next entry */
2710 void *trace_find_next_entry_inc(struct trace_iterator *iter)
2711 {
2712 	iter->ent = __find_next_entry(iter, &iter->cpu,
2713 				      &iter->lost_events, &iter->ts);
2714 
2715 	if (iter->ent)
2716 		trace_iterator_increment(iter);
2717 
2718 	return iter->ent ? iter : NULL;
2719 }
2720 
2721 static void trace_consume(struct trace_iterator *iter)
2722 {
2723 	ring_buffer_consume(iter->trace_buffer->buffer, iter->cpu, &iter->ts,
2724 			    &iter->lost_events);
2725 }
2726 
2727 static void *s_next(struct seq_file *m, void *v, loff_t *pos)
2728 {
2729 	struct trace_iterator *iter = m->private;
2730 	int i = (int)*pos;
2731 	void *ent;
2732 
2733 	WARN_ON_ONCE(iter->leftover);
2734 
2735 	(*pos)++;
2736 
2737 	/* can't go backwards */
2738 	if (iter->idx > i)
2739 		return NULL;
2740 
2741 	if (iter->idx < 0)
2742 		ent = trace_find_next_entry_inc(iter);
2743 	else
2744 		ent = iter;
2745 
2746 	while (ent && iter->idx < i)
2747 		ent = trace_find_next_entry_inc(iter);
2748 
2749 	iter->pos = *pos;
2750 
2751 	return ent;
2752 }
2753 
2754 void tracing_iter_reset(struct trace_iterator *iter, int cpu)
2755 {
2756 	struct ring_buffer_event *event;
2757 	struct ring_buffer_iter *buf_iter;
2758 	unsigned long entries = 0;
2759 	u64 ts;
2760 
2761 	per_cpu_ptr(iter->trace_buffer->data, cpu)->skipped_entries = 0;
2762 
2763 	buf_iter = trace_buffer_iter(iter, cpu);
2764 	if (!buf_iter)
2765 		return;
2766 
2767 	ring_buffer_iter_reset(buf_iter);
2768 
2769 	/*
2770 	 * We could have the case with the max latency tracers
2771 	 * that a reset never took place on a cpu. This is evident
2772 	 * by the timestamp being before the start of the buffer.
2773 	 */
2774 	while ((event = ring_buffer_iter_peek(buf_iter, &ts))) {
2775 		if (ts >= iter->trace_buffer->time_start)
2776 			break;
2777 		entries++;
2778 		ring_buffer_read(buf_iter, NULL);
2779 	}
2780 
2781 	per_cpu_ptr(iter->trace_buffer->data, cpu)->skipped_entries = entries;
2782 }
2783 
2784 /*
2785  * The current tracer is copied to avoid a global locking
2786  * all around.
2787  */
2788 static void *s_start(struct seq_file *m, loff_t *pos)
2789 {
2790 	struct trace_iterator *iter = m->private;
2791 	struct trace_array *tr = iter->tr;
2792 	int cpu_file = iter->cpu_file;
2793 	void *p = NULL;
2794 	loff_t l = 0;
2795 	int cpu;
2796 
2797 	/*
2798 	 * copy the tracer to avoid using a global lock all around.
2799 	 * iter->trace is a copy of current_trace, the pointer to the
2800 	 * name may be used instead of a strcmp(), as iter->trace->name
2801 	 * will point to the same string as current_trace->name.
2802 	 */
2803 	mutex_lock(&trace_types_lock);
2804 	if (unlikely(tr->current_trace && iter->trace->name != tr->current_trace->name))
2805 		*iter->trace = *tr->current_trace;
2806 	mutex_unlock(&trace_types_lock);
2807 
2808 #ifdef CONFIG_TRACER_MAX_TRACE
2809 	if (iter->snapshot && iter->trace->use_max_tr)
2810 		return ERR_PTR(-EBUSY);
2811 #endif
2812 
2813 	if (!iter->snapshot)
2814 		atomic_inc(&trace_record_cmdline_disabled);
2815 
2816 	if (*pos != iter->pos) {
2817 		iter->ent = NULL;
2818 		iter->cpu = 0;
2819 		iter->idx = -1;
2820 
2821 		if (cpu_file == RING_BUFFER_ALL_CPUS) {
2822 			for_each_tracing_cpu(cpu)
2823 				tracing_iter_reset(iter, cpu);
2824 		} else
2825 			tracing_iter_reset(iter, cpu_file);
2826 
2827 		iter->leftover = 0;
2828 		for (p = iter; p && l < *pos; p = s_next(m, p, &l))
2829 			;
2830 
2831 	} else {
2832 		/*
2833 		 * If we overflowed the seq_file before, then we want
2834 		 * to just reuse the trace_seq buffer again.
2835 		 */
2836 		if (iter->leftover)
2837 			p = iter;
2838 		else {
2839 			l = *pos - 1;
2840 			p = s_next(m, p, &l);
2841 		}
2842 	}
2843 
2844 	trace_event_read_lock();
2845 	trace_access_lock(cpu_file);
2846 	return p;
2847 }
2848 
2849 static void s_stop(struct seq_file *m, void *p)
2850 {
2851 	struct trace_iterator *iter = m->private;
2852 
2853 #ifdef CONFIG_TRACER_MAX_TRACE
2854 	if (iter->snapshot && iter->trace->use_max_tr)
2855 		return;
2856 #endif
2857 
2858 	if (!iter->snapshot)
2859 		atomic_dec(&trace_record_cmdline_disabled);
2860 
2861 	trace_access_unlock(iter->cpu_file);
2862 	trace_event_read_unlock();
2863 }
2864 
2865 static void
2866 get_total_entries(struct trace_buffer *buf,
2867 		  unsigned long *total, unsigned long *entries)
2868 {
2869 	unsigned long count;
2870 	int cpu;
2871 
2872 	*total = 0;
2873 	*entries = 0;
2874 
2875 	for_each_tracing_cpu(cpu) {
2876 		count = ring_buffer_entries_cpu(buf->buffer, cpu);
2877 		/*
2878 		 * If this buffer has skipped entries, then we hold all
2879 		 * entries for the trace and we need to ignore the
2880 		 * ones before the time stamp.
2881 		 */
2882 		if (per_cpu_ptr(buf->data, cpu)->skipped_entries) {
2883 			count -= per_cpu_ptr(buf->data, cpu)->skipped_entries;
2884 			/* total is the same as the entries */
2885 			*total += count;
2886 		} else
2887 			*total += count +
2888 				ring_buffer_overrun_cpu(buf->buffer, cpu);
2889 		*entries += count;
2890 	}
2891 }
2892 
2893 static void print_lat_help_header(struct seq_file *m)
2894 {
2895 	seq_puts(m, "#                  _------=> CPU#            \n"
2896 		    "#                 / _-----=> irqs-off        \n"
2897 		    "#                | / _----=> need-resched    \n"
2898 		    "#                || / _---=> hardirq/softirq \n"
2899 		    "#                ||| / _--=> preempt-depth   \n"
2900 		    "#                |||| /     delay            \n"
2901 		    "#  cmd     pid   ||||| time  |   caller      \n"
2902 		    "#     \\   /      |||||  \\    |   /         \n");
2903 }
2904 
2905 static void print_event_info(struct trace_buffer *buf, struct seq_file *m)
2906 {
2907 	unsigned long total;
2908 	unsigned long entries;
2909 
2910 	get_total_entries(buf, &total, &entries);
2911 	seq_printf(m, "# entries-in-buffer/entries-written: %lu/%lu   #P:%d\n",
2912 		   entries, total, num_online_cpus());
2913 	seq_puts(m, "#\n");
2914 }
2915 
2916 static void print_func_help_header(struct trace_buffer *buf, struct seq_file *m)
2917 {
2918 	print_event_info(buf, m);
2919 	seq_puts(m, "#           TASK-PID   CPU#      TIMESTAMP  FUNCTION\n"
2920 		    "#              | |       |          |         |\n");
2921 }
2922 
2923 static void print_func_help_header_irq(struct trace_buffer *buf, struct seq_file *m)
2924 {
2925 	print_event_info(buf, m);
2926 	seq_puts(m, "#                              _-----=> irqs-off\n"
2927 		    "#                             / _----=> need-resched\n"
2928 		    "#                            | / _---=> hardirq/softirq\n"
2929 		    "#                            || / _--=> preempt-depth\n"
2930 		    "#                            ||| /     delay\n"
2931 		    "#           TASK-PID   CPU#  ||||    TIMESTAMP  FUNCTION\n"
2932 		    "#              | |       |   ||||       |         |\n");
2933 }
2934 
2935 void
2936 print_trace_header(struct seq_file *m, struct trace_iterator *iter)
2937 {
2938 	unsigned long sym_flags = (global_trace.trace_flags & TRACE_ITER_SYM_MASK);
2939 	struct trace_buffer *buf = iter->trace_buffer;
2940 	struct trace_array_cpu *data = per_cpu_ptr(buf->data, buf->cpu);
2941 	struct tracer *type = iter->trace;
2942 	unsigned long entries;
2943 	unsigned long total;
2944 	const char *name = "preemption";
2945 
2946 	name = type->name;
2947 
2948 	get_total_entries(buf, &total, &entries);
2949 
2950 	seq_printf(m, "# %s latency trace v1.1.5 on %s\n",
2951 		   name, UTS_RELEASE);
2952 	seq_puts(m, "# -----------------------------------"
2953 		 "---------------------------------\n");
2954 	seq_printf(m, "# latency: %lu us, #%lu/%lu, CPU#%d |"
2955 		   " (M:%s VP:%d, KP:%d, SP:%d HP:%d",
2956 		   nsecs_to_usecs(data->saved_latency),
2957 		   entries,
2958 		   total,
2959 		   buf->cpu,
2960 #if defined(CONFIG_PREEMPT_NONE)
2961 		   "server",
2962 #elif defined(CONFIG_PREEMPT_VOLUNTARY)
2963 		   "desktop",
2964 #elif defined(CONFIG_PREEMPT)
2965 		   "preempt",
2966 #else
2967 		   "unknown",
2968 #endif
2969 		   /* These are reserved for later use */
2970 		   0, 0, 0, 0);
2971 #ifdef CONFIG_SMP
2972 	seq_printf(m, " #P:%d)\n", num_online_cpus());
2973 #else
2974 	seq_puts(m, ")\n");
2975 #endif
2976 	seq_puts(m, "#    -----------------\n");
2977 	seq_printf(m, "#    | task: %.16s-%d "
2978 		   "(uid:%d nice:%ld policy:%ld rt_prio:%ld)\n",
2979 		   data->comm, data->pid,
2980 		   from_kuid_munged(seq_user_ns(m), data->uid), data->nice,
2981 		   data->policy, data->rt_priority);
2982 	seq_puts(m, "#    -----------------\n");
2983 
2984 	if (data->critical_start) {
2985 		seq_puts(m, "#  => started at: ");
2986 		seq_print_ip_sym(&iter->seq, data->critical_start, sym_flags);
2987 		trace_print_seq(m, &iter->seq);
2988 		seq_puts(m, "\n#  => ended at:   ");
2989 		seq_print_ip_sym(&iter->seq, data->critical_end, sym_flags);
2990 		trace_print_seq(m, &iter->seq);
2991 		seq_puts(m, "\n#\n");
2992 	}
2993 
2994 	seq_puts(m, "#\n");
2995 }
2996 
2997 static void test_cpu_buff_start(struct trace_iterator *iter)
2998 {
2999 	struct trace_seq *s = &iter->seq;
3000 	struct trace_array *tr = iter->tr;
3001 
3002 	if (!(tr->trace_flags & TRACE_ITER_ANNOTATE))
3003 		return;
3004 
3005 	if (!(iter->iter_flags & TRACE_FILE_ANNOTATE))
3006 		return;
3007 
3008 	if (iter->started && cpumask_test_cpu(iter->cpu, iter->started))
3009 		return;
3010 
3011 	if (per_cpu_ptr(iter->trace_buffer->data, iter->cpu)->skipped_entries)
3012 		return;
3013 
3014 	if (iter->started)
3015 		cpumask_set_cpu(iter->cpu, iter->started);
3016 
3017 	/* Don't print started cpu buffer for the first entry of the trace */
3018 	if (iter->idx > 1)
3019 		trace_seq_printf(s, "##### CPU %u buffer started ####\n",
3020 				iter->cpu);
3021 }
3022 
3023 static enum print_line_t print_trace_fmt(struct trace_iterator *iter)
3024 {
3025 	struct trace_array *tr = iter->tr;
3026 	struct trace_seq *s = &iter->seq;
3027 	unsigned long sym_flags = (tr->trace_flags & TRACE_ITER_SYM_MASK);
3028 	struct trace_entry *entry;
3029 	struct trace_event *event;
3030 
3031 	entry = iter->ent;
3032 
3033 	test_cpu_buff_start(iter);
3034 
3035 	event = ftrace_find_event(entry->type);
3036 
3037 	if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
3038 		if (iter->iter_flags & TRACE_FILE_LAT_FMT)
3039 			trace_print_lat_context(iter);
3040 		else
3041 			trace_print_context(iter);
3042 	}
3043 
3044 	if (trace_seq_has_overflowed(s))
3045 		return TRACE_TYPE_PARTIAL_LINE;
3046 
3047 	if (event)
3048 		return event->funcs->trace(iter, sym_flags, event);
3049 
3050 	trace_seq_printf(s, "Unknown type %d\n", entry->type);
3051 
3052 	return trace_handle_return(s);
3053 }
3054 
3055 static enum print_line_t print_raw_fmt(struct trace_iterator *iter)
3056 {
3057 	struct trace_array *tr = iter->tr;
3058 	struct trace_seq *s = &iter->seq;
3059 	struct trace_entry *entry;
3060 	struct trace_event *event;
3061 
3062 	entry = iter->ent;
3063 
3064 	if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO)
3065 		trace_seq_printf(s, "%d %d %llu ",
3066 				 entry->pid, iter->cpu, iter->ts);
3067 
3068 	if (trace_seq_has_overflowed(s))
3069 		return TRACE_TYPE_PARTIAL_LINE;
3070 
3071 	event = ftrace_find_event(entry->type);
3072 	if (event)
3073 		return event->funcs->raw(iter, 0, event);
3074 
3075 	trace_seq_printf(s, "%d ?\n", entry->type);
3076 
3077 	return trace_handle_return(s);
3078 }
3079 
3080 static enum print_line_t print_hex_fmt(struct trace_iterator *iter)
3081 {
3082 	struct trace_array *tr = iter->tr;
3083 	struct trace_seq *s = &iter->seq;
3084 	unsigned char newline = '\n';
3085 	struct trace_entry *entry;
3086 	struct trace_event *event;
3087 
3088 	entry = iter->ent;
3089 
3090 	if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
3091 		SEQ_PUT_HEX_FIELD(s, entry->pid);
3092 		SEQ_PUT_HEX_FIELD(s, iter->cpu);
3093 		SEQ_PUT_HEX_FIELD(s, iter->ts);
3094 		if (trace_seq_has_overflowed(s))
3095 			return TRACE_TYPE_PARTIAL_LINE;
3096 	}
3097 
3098 	event = ftrace_find_event(entry->type);
3099 	if (event) {
3100 		enum print_line_t ret = event->funcs->hex(iter, 0, event);
3101 		if (ret != TRACE_TYPE_HANDLED)
3102 			return ret;
3103 	}
3104 
3105 	SEQ_PUT_FIELD(s, newline);
3106 
3107 	return trace_handle_return(s);
3108 }
3109 
3110 static enum print_line_t print_bin_fmt(struct trace_iterator *iter)
3111 {
3112 	struct trace_array *tr = iter->tr;
3113 	struct trace_seq *s = &iter->seq;
3114 	struct trace_entry *entry;
3115 	struct trace_event *event;
3116 
3117 	entry = iter->ent;
3118 
3119 	if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
3120 		SEQ_PUT_FIELD(s, entry->pid);
3121 		SEQ_PUT_FIELD(s, iter->cpu);
3122 		SEQ_PUT_FIELD(s, iter->ts);
3123 		if (trace_seq_has_overflowed(s))
3124 			return TRACE_TYPE_PARTIAL_LINE;
3125 	}
3126 
3127 	event = ftrace_find_event(entry->type);
3128 	return event ? event->funcs->binary(iter, 0, event) :
3129 		TRACE_TYPE_HANDLED;
3130 }
3131 
3132 int trace_empty(struct trace_iterator *iter)
3133 {
3134 	struct ring_buffer_iter *buf_iter;
3135 	int cpu;
3136 
3137 	/* If we are looking at one CPU buffer, only check that one */
3138 	if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
3139 		cpu = iter->cpu_file;
3140 		buf_iter = trace_buffer_iter(iter, cpu);
3141 		if (buf_iter) {
3142 			if (!ring_buffer_iter_empty(buf_iter))
3143 				return 0;
3144 		} else {
3145 			if (!ring_buffer_empty_cpu(iter->trace_buffer->buffer, cpu))
3146 				return 0;
3147 		}
3148 		return 1;
3149 	}
3150 
3151 	for_each_tracing_cpu(cpu) {
3152 		buf_iter = trace_buffer_iter(iter, cpu);
3153 		if (buf_iter) {
3154 			if (!ring_buffer_iter_empty(buf_iter))
3155 				return 0;
3156 		} else {
3157 			if (!ring_buffer_empty_cpu(iter->trace_buffer->buffer, cpu))
3158 				return 0;
3159 		}
3160 	}
3161 
3162 	return 1;
3163 }
3164 
3165 /*  Called with trace_event_read_lock() held. */
3166 enum print_line_t print_trace_line(struct trace_iterator *iter)
3167 {
3168 	struct trace_array *tr = iter->tr;
3169 	unsigned long trace_flags = tr->trace_flags;
3170 	enum print_line_t ret;
3171 
3172 	if (iter->lost_events) {
3173 		trace_seq_printf(&iter->seq, "CPU:%d [LOST %lu EVENTS]\n",
3174 				 iter->cpu, iter->lost_events);
3175 		if (trace_seq_has_overflowed(&iter->seq))
3176 			return TRACE_TYPE_PARTIAL_LINE;
3177 	}
3178 
3179 	if (iter->trace && iter->trace->print_line) {
3180 		ret = iter->trace->print_line(iter);
3181 		if (ret != TRACE_TYPE_UNHANDLED)
3182 			return ret;
3183 	}
3184 
3185 	if (iter->ent->type == TRACE_BPUTS &&
3186 			trace_flags & TRACE_ITER_PRINTK &&
3187 			trace_flags & TRACE_ITER_PRINTK_MSGONLY)
3188 		return trace_print_bputs_msg_only(iter);
3189 
3190 	if (iter->ent->type == TRACE_BPRINT &&
3191 			trace_flags & TRACE_ITER_PRINTK &&
3192 			trace_flags & TRACE_ITER_PRINTK_MSGONLY)
3193 		return trace_print_bprintk_msg_only(iter);
3194 
3195 	if (iter->ent->type == TRACE_PRINT &&
3196 			trace_flags & TRACE_ITER_PRINTK &&
3197 			trace_flags & TRACE_ITER_PRINTK_MSGONLY)
3198 		return trace_print_printk_msg_only(iter);
3199 
3200 	if (trace_flags & TRACE_ITER_BIN)
3201 		return print_bin_fmt(iter);
3202 
3203 	if (trace_flags & TRACE_ITER_HEX)
3204 		return print_hex_fmt(iter);
3205 
3206 	if (trace_flags & TRACE_ITER_RAW)
3207 		return print_raw_fmt(iter);
3208 
3209 	return print_trace_fmt(iter);
3210 }
3211 
3212 void trace_latency_header(struct seq_file *m)
3213 {
3214 	struct trace_iterator *iter = m->private;
3215 	struct trace_array *tr = iter->tr;
3216 
3217 	/* print nothing if the buffers are empty */
3218 	if (trace_empty(iter))
3219 		return;
3220 
3221 	if (iter->iter_flags & TRACE_FILE_LAT_FMT)
3222 		print_trace_header(m, iter);
3223 
3224 	if (!(tr->trace_flags & TRACE_ITER_VERBOSE))
3225 		print_lat_help_header(m);
3226 }
3227 
3228 void trace_default_header(struct seq_file *m)
3229 {
3230 	struct trace_iterator *iter = m->private;
3231 	struct trace_array *tr = iter->tr;
3232 	unsigned long trace_flags = tr->trace_flags;
3233 
3234 	if (!(trace_flags & TRACE_ITER_CONTEXT_INFO))
3235 		return;
3236 
3237 	if (iter->iter_flags & TRACE_FILE_LAT_FMT) {
3238 		/* print nothing if the buffers are empty */
3239 		if (trace_empty(iter))
3240 			return;
3241 		print_trace_header(m, iter);
3242 		if (!(trace_flags & TRACE_ITER_VERBOSE))
3243 			print_lat_help_header(m);
3244 	} else {
3245 		if (!(trace_flags & TRACE_ITER_VERBOSE)) {
3246 			if (trace_flags & TRACE_ITER_IRQ_INFO)
3247 				print_func_help_header_irq(iter->trace_buffer, m);
3248 			else
3249 				print_func_help_header(iter->trace_buffer, m);
3250 		}
3251 	}
3252 }
3253 
3254 static void test_ftrace_alive(struct seq_file *m)
3255 {
3256 	if (!ftrace_is_dead())
3257 		return;
3258 	seq_puts(m, "# WARNING: FUNCTION TRACING IS CORRUPTED\n"
3259 		    "#          MAY BE MISSING FUNCTION EVENTS\n");
3260 }
3261 
3262 #ifdef CONFIG_TRACER_MAX_TRACE
3263 static void show_snapshot_main_help(struct seq_file *m)
3264 {
3265 	seq_puts(m, "# echo 0 > snapshot : Clears and frees snapshot buffer\n"
3266 		    "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
3267 		    "#                      Takes a snapshot of the main buffer.\n"
3268 		    "# echo 2 > snapshot : Clears snapshot buffer (but does not allocate or free)\n"
3269 		    "#                      (Doesn't have to be '2' works with any number that\n"
3270 		    "#                       is not a '0' or '1')\n");
3271 }
3272 
3273 static void show_snapshot_percpu_help(struct seq_file *m)
3274 {
3275 	seq_puts(m, "# echo 0 > snapshot : Invalid for per_cpu snapshot file.\n");
3276 #ifdef CONFIG_RING_BUFFER_ALLOW_SWAP
3277 	seq_puts(m, "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
3278 		    "#                      Takes a snapshot of the main buffer for this cpu.\n");
3279 #else
3280 	seq_puts(m, "# echo 1 > snapshot : Not supported with this kernel.\n"
3281 		    "#                     Must use main snapshot file to allocate.\n");
3282 #endif
3283 	seq_puts(m, "# echo 2 > snapshot : Clears this cpu's snapshot buffer (but does not allocate)\n"
3284 		    "#                      (Doesn't have to be '2' works with any number that\n"
3285 		    "#                       is not a '0' or '1')\n");
3286 }
3287 
3288 static void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter)
3289 {
3290 	if (iter->tr->allocated_snapshot)
3291 		seq_puts(m, "#\n# * Snapshot is allocated *\n#\n");
3292 	else
3293 		seq_puts(m, "#\n# * Snapshot is freed *\n#\n");
3294 
3295 	seq_puts(m, "# Snapshot commands:\n");
3296 	if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
3297 		show_snapshot_main_help(m);
3298 	else
3299 		show_snapshot_percpu_help(m);
3300 }
3301 #else
3302 /* Should never be called */
3303 static inline void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter) { }
3304 #endif
3305 
3306 static int s_show(struct seq_file *m, void *v)
3307 {
3308 	struct trace_iterator *iter = v;
3309 	int ret;
3310 
3311 	if (iter->ent == NULL) {
3312 		if (iter->tr) {
3313 			seq_printf(m, "# tracer: %s\n", iter->trace->name);
3314 			seq_puts(m, "#\n");
3315 			test_ftrace_alive(m);
3316 		}
3317 		if (iter->snapshot && trace_empty(iter))
3318 			print_snapshot_help(m, iter);
3319 		else if (iter->trace && iter->trace->print_header)
3320 			iter->trace->print_header(m);
3321 		else
3322 			trace_default_header(m);
3323 
3324 	} else if (iter->leftover) {
3325 		/*
3326 		 * If we filled the seq_file buffer earlier, we
3327 		 * want to just show it now.
3328 		 */
3329 		ret = trace_print_seq(m, &iter->seq);
3330 
3331 		/* ret should this time be zero, but you never know */
3332 		iter->leftover = ret;
3333 
3334 	} else {
3335 		print_trace_line(iter);
3336 		ret = trace_print_seq(m, &iter->seq);
3337 		/*
3338 		 * If we overflow the seq_file buffer, then it will
3339 		 * ask us for this data again at start up.
3340 		 * Use that instead.
3341 		 *  ret is 0 if seq_file write succeeded.
3342 		 *        -1 otherwise.
3343 		 */
3344 		iter->leftover = ret;
3345 	}
3346 
3347 	return 0;
3348 }
3349 
3350 /*
3351  * Should be used after trace_array_get(), trace_types_lock
3352  * ensures that i_cdev was already initialized.
3353  */
3354 static inline int tracing_get_cpu(struct inode *inode)
3355 {
3356 	if (inode->i_cdev) /* See trace_create_cpu_file() */
3357 		return (long)inode->i_cdev - 1;
3358 	return RING_BUFFER_ALL_CPUS;
3359 }
3360 
3361 static const struct seq_operations tracer_seq_ops = {
3362 	.start		= s_start,
3363 	.next		= s_next,
3364 	.stop		= s_stop,
3365 	.show		= s_show,
3366 };
3367 
3368 static struct trace_iterator *
3369 __tracing_open(struct inode *inode, struct file *file, bool snapshot)
3370 {
3371 	struct trace_array *tr = inode->i_private;
3372 	struct trace_iterator *iter;
3373 	int cpu;
3374 
3375 	if (tracing_disabled)
3376 		return ERR_PTR(-ENODEV);
3377 
3378 	iter = __seq_open_private(file, &tracer_seq_ops, sizeof(*iter));
3379 	if (!iter)
3380 		return ERR_PTR(-ENOMEM);
3381 
3382 	iter->buffer_iter = kcalloc(nr_cpu_ids, sizeof(*iter->buffer_iter),
3383 				    GFP_KERNEL);
3384 	if (!iter->buffer_iter)
3385 		goto release;
3386 
3387 	/*
3388 	 * We make a copy of the current tracer to avoid concurrent
3389 	 * changes on it while we are reading.
3390 	 */
3391 	mutex_lock(&trace_types_lock);
3392 	iter->trace = kzalloc(sizeof(*iter->trace), GFP_KERNEL);
3393 	if (!iter->trace)
3394 		goto fail;
3395 
3396 	*iter->trace = *tr->current_trace;
3397 
3398 	if (!zalloc_cpumask_var(&iter->started, GFP_KERNEL))
3399 		goto fail;
3400 
3401 	iter->tr = tr;
3402 
3403 #ifdef CONFIG_TRACER_MAX_TRACE
3404 	/* Currently only the top directory has a snapshot */
3405 	if (tr->current_trace->print_max || snapshot)
3406 		iter->trace_buffer = &tr->max_buffer;
3407 	else
3408 #endif
3409 		iter->trace_buffer = &tr->trace_buffer;
3410 	iter->snapshot = snapshot;
3411 	iter->pos = -1;
3412 	iter->cpu_file = tracing_get_cpu(inode);
3413 	mutex_init(&iter->mutex);
3414 
3415 	/* Notify the tracer early; before we stop tracing. */
3416 	if (iter->trace && iter->trace->open)
3417 		iter->trace->open(iter);
3418 
3419 	/* Annotate start of buffers if we had overruns */
3420 	if (ring_buffer_overruns(iter->trace_buffer->buffer))
3421 		iter->iter_flags |= TRACE_FILE_ANNOTATE;
3422 
3423 	/* Output in nanoseconds only if we are using a clock in nanoseconds. */
3424 	if (trace_clocks[tr->clock_id].in_ns)
3425 		iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
3426 
3427 	/* stop the trace while dumping if we are not opening "snapshot" */
3428 	if (!iter->snapshot)
3429 		tracing_stop_tr(tr);
3430 
3431 	if (iter->cpu_file == RING_BUFFER_ALL_CPUS) {
3432 		for_each_tracing_cpu(cpu) {
3433 			iter->buffer_iter[cpu] =
3434 				ring_buffer_read_prepare(iter->trace_buffer->buffer, cpu);
3435 		}
3436 		ring_buffer_read_prepare_sync();
3437 		for_each_tracing_cpu(cpu) {
3438 			ring_buffer_read_start(iter->buffer_iter[cpu]);
3439 			tracing_iter_reset(iter, cpu);
3440 		}
3441 	} else {
3442 		cpu = iter->cpu_file;
3443 		iter->buffer_iter[cpu] =
3444 			ring_buffer_read_prepare(iter->trace_buffer->buffer, cpu);
3445 		ring_buffer_read_prepare_sync();
3446 		ring_buffer_read_start(iter->buffer_iter[cpu]);
3447 		tracing_iter_reset(iter, cpu);
3448 	}
3449 
3450 	mutex_unlock(&trace_types_lock);
3451 
3452 	return iter;
3453 
3454  fail:
3455 	mutex_unlock(&trace_types_lock);
3456 	kfree(iter->trace);
3457 	kfree(iter->buffer_iter);
3458 release:
3459 	seq_release_private(inode, file);
3460 	return ERR_PTR(-ENOMEM);
3461 }
3462 
3463 int tracing_open_generic(struct inode *inode, struct file *filp)
3464 {
3465 	if (tracing_disabled)
3466 		return -ENODEV;
3467 
3468 	filp->private_data = inode->i_private;
3469 	return 0;
3470 }
3471 
3472 bool tracing_is_disabled(void)
3473 {
3474 	return (tracing_disabled) ? true: false;
3475 }
3476 
3477 /*
3478  * Open and update trace_array ref count.
3479  * Must have the current trace_array passed to it.
3480  */
3481 static int tracing_open_generic_tr(struct inode *inode, struct file *filp)
3482 {
3483 	struct trace_array *tr = inode->i_private;
3484 
3485 	if (tracing_disabled)
3486 		return -ENODEV;
3487 
3488 	if (trace_array_get(tr) < 0)
3489 		return -ENODEV;
3490 
3491 	filp->private_data = inode->i_private;
3492 
3493 	return 0;
3494 }
3495 
3496 static int tracing_release(struct inode *inode, struct file *file)
3497 {
3498 	struct trace_array *tr = inode->i_private;
3499 	struct seq_file *m = file->private_data;
3500 	struct trace_iterator *iter;
3501 	int cpu;
3502 
3503 	if (!(file->f_mode & FMODE_READ)) {
3504 		trace_array_put(tr);
3505 		return 0;
3506 	}
3507 
3508 	/* Writes do not use seq_file */
3509 	iter = m->private;
3510 	mutex_lock(&trace_types_lock);
3511 
3512 	for_each_tracing_cpu(cpu) {
3513 		if (iter->buffer_iter[cpu])
3514 			ring_buffer_read_finish(iter->buffer_iter[cpu]);
3515 	}
3516 
3517 	if (iter->trace && iter->trace->close)
3518 		iter->trace->close(iter);
3519 
3520 	if (!iter->snapshot)
3521 		/* reenable tracing if it was previously enabled */
3522 		tracing_start_tr(tr);
3523 
3524 	__trace_array_put(tr);
3525 
3526 	mutex_unlock(&trace_types_lock);
3527 
3528 	mutex_destroy(&iter->mutex);
3529 	free_cpumask_var(iter->started);
3530 	kfree(iter->trace);
3531 	kfree(iter->buffer_iter);
3532 	seq_release_private(inode, file);
3533 
3534 	return 0;
3535 }
3536 
3537 static int tracing_release_generic_tr(struct inode *inode, struct file *file)
3538 {
3539 	struct trace_array *tr = inode->i_private;
3540 
3541 	trace_array_put(tr);
3542 	return 0;
3543 }
3544 
3545 static int tracing_single_release_tr(struct inode *inode, struct file *file)
3546 {
3547 	struct trace_array *tr = inode->i_private;
3548 
3549 	trace_array_put(tr);
3550 
3551 	return single_release(inode, file);
3552 }
3553 
3554 static int tracing_open(struct inode *inode, struct file *file)
3555 {
3556 	struct trace_array *tr = inode->i_private;
3557 	struct trace_iterator *iter;
3558 	int ret = 0;
3559 
3560 	if (trace_array_get(tr) < 0)
3561 		return -ENODEV;
3562 
3563 	/* If this file was open for write, then erase contents */
3564 	if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC)) {
3565 		int cpu = tracing_get_cpu(inode);
3566 
3567 		if (cpu == RING_BUFFER_ALL_CPUS)
3568 			tracing_reset_online_cpus(&tr->trace_buffer);
3569 		else
3570 			tracing_reset(&tr->trace_buffer, cpu);
3571 	}
3572 
3573 	if (file->f_mode & FMODE_READ) {
3574 		iter = __tracing_open(inode, file, false);
3575 		if (IS_ERR(iter))
3576 			ret = PTR_ERR(iter);
3577 		else if (tr->trace_flags & TRACE_ITER_LATENCY_FMT)
3578 			iter->iter_flags |= TRACE_FILE_LAT_FMT;
3579 	}
3580 
3581 	if (ret < 0)
3582 		trace_array_put(tr);
3583 
3584 	return ret;
3585 }
3586 
3587 /*
3588  * Some tracers are not suitable for instance buffers.
3589  * A tracer is always available for the global array (toplevel)
3590  * or if it explicitly states that it is.
3591  */
3592 static bool
3593 trace_ok_for_array(struct tracer *t, struct trace_array *tr)
3594 {
3595 	return (tr->flags & TRACE_ARRAY_FL_GLOBAL) || t->allow_instances;
3596 }
3597 
3598 /* Find the next tracer that this trace array may use */
3599 static struct tracer *
3600 get_tracer_for_array(struct trace_array *tr, struct tracer *t)
3601 {
3602 	while (t && !trace_ok_for_array(t, tr))
3603 		t = t->next;
3604 
3605 	return t;
3606 }
3607 
3608 static void *
3609 t_next(struct seq_file *m, void *v, loff_t *pos)
3610 {
3611 	struct trace_array *tr = m->private;
3612 	struct tracer *t = v;
3613 
3614 	(*pos)++;
3615 
3616 	if (t)
3617 		t = get_tracer_for_array(tr, t->next);
3618 
3619 	return t;
3620 }
3621 
3622 static void *t_start(struct seq_file *m, loff_t *pos)
3623 {
3624 	struct trace_array *tr = m->private;
3625 	struct tracer *t;
3626 	loff_t l = 0;
3627 
3628 	mutex_lock(&trace_types_lock);
3629 
3630 	t = get_tracer_for_array(tr, trace_types);
3631 	for (; t && l < *pos; t = t_next(m, t, &l))
3632 			;
3633 
3634 	return t;
3635 }
3636 
3637 static void t_stop(struct seq_file *m, void *p)
3638 {
3639 	mutex_unlock(&trace_types_lock);
3640 }
3641 
3642 static int t_show(struct seq_file *m, void *v)
3643 {
3644 	struct tracer *t = v;
3645 
3646 	if (!t)
3647 		return 0;
3648 
3649 	seq_puts(m, t->name);
3650 	if (t->next)
3651 		seq_putc(m, ' ');
3652 	else
3653 		seq_putc(m, '\n');
3654 
3655 	return 0;
3656 }
3657 
3658 static const struct seq_operations show_traces_seq_ops = {
3659 	.start		= t_start,
3660 	.next		= t_next,
3661 	.stop		= t_stop,
3662 	.show		= t_show,
3663 };
3664 
3665 static int show_traces_open(struct inode *inode, struct file *file)
3666 {
3667 	struct trace_array *tr = inode->i_private;
3668 	struct seq_file *m;
3669 	int ret;
3670 
3671 	if (tracing_disabled)
3672 		return -ENODEV;
3673 
3674 	ret = seq_open(file, &show_traces_seq_ops);
3675 	if (ret)
3676 		return ret;
3677 
3678 	m = file->private_data;
3679 	m->private = tr;
3680 
3681 	return 0;
3682 }
3683 
3684 static ssize_t
3685 tracing_write_stub(struct file *filp, const char __user *ubuf,
3686 		   size_t count, loff_t *ppos)
3687 {
3688 	return count;
3689 }
3690 
3691 loff_t tracing_lseek(struct file *file, loff_t offset, int whence)
3692 {
3693 	int ret;
3694 
3695 	if (file->f_mode & FMODE_READ)
3696 		ret = seq_lseek(file, offset, whence);
3697 	else
3698 		file->f_pos = ret = 0;
3699 
3700 	return ret;
3701 }
3702 
3703 static const struct file_operations tracing_fops = {
3704 	.open		= tracing_open,
3705 	.read		= seq_read,
3706 	.write		= tracing_write_stub,
3707 	.llseek		= tracing_lseek,
3708 	.release	= tracing_release,
3709 };
3710 
3711 static const struct file_operations show_traces_fops = {
3712 	.open		= show_traces_open,
3713 	.read		= seq_read,
3714 	.release	= seq_release,
3715 	.llseek		= seq_lseek,
3716 };
3717 
3718 /*
3719  * The tracer itself will not take this lock, but still we want
3720  * to provide a consistent cpumask to user-space:
3721  */
3722 static DEFINE_MUTEX(tracing_cpumask_update_lock);
3723 
3724 /*
3725  * Temporary storage for the character representation of the
3726  * CPU bitmask (and one more byte for the newline):
3727  */
3728 static char mask_str[NR_CPUS + 1];
3729 
3730 static ssize_t
3731 tracing_cpumask_read(struct file *filp, char __user *ubuf,
3732 		     size_t count, loff_t *ppos)
3733 {
3734 	struct trace_array *tr = file_inode(filp)->i_private;
3735 	int len;
3736 
3737 	mutex_lock(&tracing_cpumask_update_lock);
3738 
3739 	len = snprintf(mask_str, count, "%*pb\n",
3740 		       cpumask_pr_args(tr->tracing_cpumask));
3741 	if (len >= count) {
3742 		count = -EINVAL;
3743 		goto out_err;
3744 	}
3745 	count = simple_read_from_buffer(ubuf, count, ppos, mask_str, NR_CPUS+1);
3746 
3747 out_err:
3748 	mutex_unlock(&tracing_cpumask_update_lock);
3749 
3750 	return count;
3751 }
3752 
3753 static ssize_t
3754 tracing_cpumask_write(struct file *filp, const char __user *ubuf,
3755 		      size_t count, loff_t *ppos)
3756 {
3757 	struct trace_array *tr = file_inode(filp)->i_private;
3758 	cpumask_var_t tracing_cpumask_new;
3759 	int err, cpu;
3760 
3761 	if (!alloc_cpumask_var(&tracing_cpumask_new, GFP_KERNEL))
3762 		return -ENOMEM;
3763 
3764 	err = cpumask_parse_user(ubuf, count, tracing_cpumask_new);
3765 	if (err)
3766 		goto err_unlock;
3767 
3768 	mutex_lock(&tracing_cpumask_update_lock);
3769 
3770 	local_irq_disable();
3771 	arch_spin_lock(&tr->max_lock);
3772 	for_each_tracing_cpu(cpu) {
3773 		/*
3774 		 * Increase/decrease the disabled counter if we are
3775 		 * about to flip a bit in the cpumask:
3776 		 */
3777 		if (cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
3778 				!cpumask_test_cpu(cpu, tracing_cpumask_new)) {
3779 			atomic_inc(&per_cpu_ptr(tr->trace_buffer.data, cpu)->disabled);
3780 			ring_buffer_record_disable_cpu(tr->trace_buffer.buffer, cpu);
3781 		}
3782 		if (!cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
3783 				cpumask_test_cpu(cpu, tracing_cpumask_new)) {
3784 			atomic_dec(&per_cpu_ptr(tr->trace_buffer.data, cpu)->disabled);
3785 			ring_buffer_record_enable_cpu(tr->trace_buffer.buffer, cpu);
3786 		}
3787 	}
3788 	arch_spin_unlock(&tr->max_lock);
3789 	local_irq_enable();
3790 
3791 	cpumask_copy(tr->tracing_cpumask, tracing_cpumask_new);
3792 
3793 	mutex_unlock(&tracing_cpumask_update_lock);
3794 	free_cpumask_var(tracing_cpumask_new);
3795 
3796 	return count;
3797 
3798 err_unlock:
3799 	free_cpumask_var(tracing_cpumask_new);
3800 
3801 	return err;
3802 }
3803 
3804 static const struct file_operations tracing_cpumask_fops = {
3805 	.open		= tracing_open_generic_tr,
3806 	.read		= tracing_cpumask_read,
3807 	.write		= tracing_cpumask_write,
3808 	.release	= tracing_release_generic_tr,
3809 	.llseek		= generic_file_llseek,
3810 };
3811 
3812 static int tracing_trace_options_show(struct seq_file *m, void *v)
3813 {
3814 	struct tracer_opt *trace_opts;
3815 	struct trace_array *tr = m->private;
3816 	u32 tracer_flags;
3817 	int i;
3818 
3819 	mutex_lock(&trace_types_lock);
3820 	tracer_flags = tr->current_trace->flags->val;
3821 	trace_opts = tr->current_trace->flags->opts;
3822 
3823 	for (i = 0; trace_options[i]; i++) {
3824 		if (tr->trace_flags & (1 << i))
3825 			seq_printf(m, "%s\n", trace_options[i]);
3826 		else
3827 			seq_printf(m, "no%s\n", trace_options[i]);
3828 	}
3829 
3830 	for (i = 0; trace_opts[i].name; i++) {
3831 		if (tracer_flags & trace_opts[i].bit)
3832 			seq_printf(m, "%s\n", trace_opts[i].name);
3833 		else
3834 			seq_printf(m, "no%s\n", trace_opts[i].name);
3835 	}
3836 	mutex_unlock(&trace_types_lock);
3837 
3838 	return 0;
3839 }
3840 
3841 static int __set_tracer_option(struct trace_array *tr,
3842 			       struct tracer_flags *tracer_flags,
3843 			       struct tracer_opt *opts, int neg)
3844 {
3845 	struct tracer *trace = tracer_flags->trace;
3846 	int ret;
3847 
3848 	ret = trace->set_flag(tr, tracer_flags->val, opts->bit, !neg);
3849 	if (ret)
3850 		return ret;
3851 
3852 	if (neg)
3853 		tracer_flags->val &= ~opts->bit;
3854 	else
3855 		tracer_flags->val |= opts->bit;
3856 	return 0;
3857 }
3858 
3859 /* Try to assign a tracer specific option */
3860 static int set_tracer_option(struct trace_array *tr, char *cmp, int neg)
3861 {
3862 	struct tracer *trace = tr->current_trace;
3863 	struct tracer_flags *tracer_flags = trace->flags;
3864 	struct tracer_opt *opts = NULL;
3865 	int i;
3866 
3867 	for (i = 0; tracer_flags->opts[i].name; i++) {
3868 		opts = &tracer_flags->opts[i];
3869 
3870 		if (strcmp(cmp, opts->name) == 0)
3871 			return __set_tracer_option(tr, trace->flags, opts, neg);
3872 	}
3873 
3874 	return -EINVAL;
3875 }
3876 
3877 /* Some tracers require overwrite to stay enabled */
3878 int trace_keep_overwrite(struct tracer *tracer, u32 mask, int set)
3879 {
3880 	if (tracer->enabled && (mask & TRACE_ITER_OVERWRITE) && !set)
3881 		return -1;
3882 
3883 	return 0;
3884 }
3885 
3886 int set_tracer_flag(struct trace_array *tr, unsigned int mask, int enabled)
3887 {
3888 	/* do nothing if flag is already set */
3889 	if (!!(tr->trace_flags & mask) == !!enabled)
3890 		return 0;
3891 
3892 	/* Give the tracer a chance to approve the change */
3893 	if (tr->current_trace->flag_changed)
3894 		if (tr->current_trace->flag_changed(tr, mask, !!enabled))
3895 			return -EINVAL;
3896 
3897 	if (enabled)
3898 		tr->trace_flags |= mask;
3899 	else
3900 		tr->trace_flags &= ~mask;
3901 
3902 	if (mask == TRACE_ITER_RECORD_CMD)
3903 		trace_event_enable_cmd_record(enabled);
3904 
3905 	if (mask == TRACE_ITER_EVENT_FORK)
3906 		trace_event_follow_fork(tr, enabled);
3907 
3908 	if (mask == TRACE_ITER_OVERWRITE) {
3909 		ring_buffer_change_overwrite(tr->trace_buffer.buffer, enabled);
3910 #ifdef CONFIG_TRACER_MAX_TRACE
3911 		ring_buffer_change_overwrite(tr->max_buffer.buffer, enabled);
3912 #endif
3913 	}
3914 
3915 	if (mask == TRACE_ITER_PRINTK) {
3916 		trace_printk_start_stop_comm(enabled);
3917 		trace_printk_control(enabled);
3918 	}
3919 
3920 	return 0;
3921 }
3922 
3923 static int trace_set_options(struct trace_array *tr, char *option)
3924 {
3925 	char *cmp;
3926 	int neg = 0;
3927 	int ret = -ENODEV;
3928 	int i;
3929 	size_t orig_len = strlen(option);
3930 
3931 	cmp = strstrip(option);
3932 
3933 	if (strncmp(cmp, "no", 2) == 0) {
3934 		neg = 1;
3935 		cmp += 2;
3936 	}
3937 
3938 	mutex_lock(&trace_types_lock);
3939 
3940 	for (i = 0; trace_options[i]; i++) {
3941 		if (strcmp(cmp, trace_options[i]) == 0) {
3942 			ret = set_tracer_flag(tr, 1 << i, !neg);
3943 			break;
3944 		}
3945 	}
3946 
3947 	/* If no option could be set, test the specific tracer options */
3948 	if (!trace_options[i])
3949 		ret = set_tracer_option(tr, cmp, neg);
3950 
3951 	mutex_unlock(&trace_types_lock);
3952 
3953 	/*
3954 	 * If the first trailing whitespace is replaced with '\0' by strstrip,
3955 	 * turn it back into a space.
3956 	 */
3957 	if (orig_len > strlen(option))
3958 		option[strlen(option)] = ' ';
3959 
3960 	return ret;
3961 }
3962 
3963 static void __init apply_trace_boot_options(void)
3964 {
3965 	char *buf = trace_boot_options_buf;
3966 	char *option;
3967 
3968 	while (true) {
3969 		option = strsep(&buf, ",");
3970 
3971 		if (!option)
3972 			break;
3973 
3974 		if (*option)
3975 			trace_set_options(&global_trace, option);
3976 
3977 		/* Put back the comma to allow this to be called again */
3978 		if (buf)
3979 			*(buf - 1) = ',';
3980 	}
3981 }
3982 
3983 static ssize_t
3984 tracing_trace_options_write(struct file *filp, const char __user *ubuf,
3985 			size_t cnt, loff_t *ppos)
3986 {
3987 	struct seq_file *m = filp->private_data;
3988 	struct trace_array *tr = m->private;
3989 	char buf[64];
3990 	int ret;
3991 
3992 	if (cnt >= sizeof(buf))
3993 		return -EINVAL;
3994 
3995 	if (copy_from_user(buf, ubuf, cnt))
3996 		return -EFAULT;
3997 
3998 	buf[cnt] = 0;
3999 
4000 	ret = trace_set_options(tr, buf);
4001 	if (ret < 0)
4002 		return ret;
4003 
4004 	*ppos += cnt;
4005 
4006 	return cnt;
4007 }
4008 
4009 static int tracing_trace_options_open(struct inode *inode, struct file *file)
4010 {
4011 	struct trace_array *tr = inode->i_private;
4012 	int ret;
4013 
4014 	if (tracing_disabled)
4015 		return -ENODEV;
4016 
4017 	if (trace_array_get(tr) < 0)
4018 		return -ENODEV;
4019 
4020 	ret = single_open(file, tracing_trace_options_show, inode->i_private);
4021 	if (ret < 0)
4022 		trace_array_put(tr);
4023 
4024 	return ret;
4025 }
4026 
4027 static const struct file_operations tracing_iter_fops = {
4028 	.open		= tracing_trace_options_open,
4029 	.read		= seq_read,
4030 	.llseek		= seq_lseek,
4031 	.release	= tracing_single_release_tr,
4032 	.write		= tracing_trace_options_write,
4033 };
4034 
4035 static const char readme_msg[] =
4036 	"tracing mini-HOWTO:\n\n"
4037 	"# echo 0 > tracing_on : quick way to disable tracing\n"
4038 	"# echo 1 > tracing_on : quick way to re-enable tracing\n\n"
4039 	" Important files:\n"
4040 	"  trace\t\t\t- The static contents of the buffer\n"
4041 	"\t\t\t  To clear the buffer write into this file: echo > trace\n"
4042 	"  trace_pipe\t\t- A consuming read to see the contents of the buffer\n"
4043 	"  current_tracer\t- function and latency tracers\n"
4044 	"  available_tracers\t- list of configured tracers for current_tracer\n"
4045 	"  buffer_size_kb\t- view and modify size of per cpu buffer\n"
4046 	"  buffer_total_size_kb  - view total size of all cpu buffers\n\n"
4047 	"  trace_clock\t\t-change the clock used to order events\n"
4048 	"       local:   Per cpu clock but may not be synced across CPUs\n"
4049 	"      global:   Synced across CPUs but slows tracing down.\n"
4050 	"     counter:   Not a clock, but just an increment\n"
4051 	"      uptime:   Jiffy counter from time of boot\n"
4052 	"        perf:   Same clock that perf events use\n"
4053 #ifdef CONFIG_X86_64
4054 	"     x86-tsc:   TSC cycle counter\n"
4055 #endif
4056 	"\n  trace_marker\t\t- Writes into this file writes into the kernel buffer\n"
4057 	"  tracing_cpumask\t- Limit which CPUs to trace\n"
4058 	"  instances\t\t- Make sub-buffers with: mkdir instances/foo\n"
4059 	"\t\t\t  Remove sub-buffer with rmdir\n"
4060 	"  trace_options\t\t- Set format or modify how tracing happens\n"
4061 	"\t\t\t  Disable an option by adding a suffix 'no' to the\n"
4062 	"\t\t\t  option name\n"
4063 	"  saved_cmdlines_size\t- echo command number in here to store comm-pid list\n"
4064 #ifdef CONFIG_DYNAMIC_FTRACE
4065 	"\n  available_filter_functions - list of functions that can be filtered on\n"
4066 	"  set_ftrace_filter\t- echo function name in here to only trace these\n"
4067 	"\t\t\t  functions\n"
4068 	"\t     accepts: func_full_name, *func_end, func_begin*, *func_middle*\n"
4069 	"\t     modules: Can select a group via module\n"
4070 	"\t      Format: :mod:<module-name>\n"
4071 	"\t     example: echo :mod:ext3 > set_ftrace_filter\n"
4072 	"\t    triggers: a command to perform when function is hit\n"
4073 	"\t      Format: <function>:<trigger>[:count]\n"
4074 	"\t     trigger: traceon, traceoff\n"
4075 	"\t\t      enable_event:<system>:<event>\n"
4076 	"\t\t      disable_event:<system>:<event>\n"
4077 #ifdef CONFIG_STACKTRACE
4078 	"\t\t      stacktrace\n"
4079 #endif
4080 #ifdef CONFIG_TRACER_SNAPSHOT
4081 	"\t\t      snapshot\n"
4082 #endif
4083 	"\t\t      dump\n"
4084 	"\t\t      cpudump\n"
4085 	"\t     example: echo do_fault:traceoff > set_ftrace_filter\n"
4086 	"\t              echo do_trap:traceoff:3 > set_ftrace_filter\n"
4087 	"\t     The first one will disable tracing every time do_fault is hit\n"
4088 	"\t     The second will disable tracing at most 3 times when do_trap is hit\n"
4089 	"\t       The first time do trap is hit and it disables tracing, the\n"
4090 	"\t       counter will decrement to 2. If tracing is already disabled,\n"
4091 	"\t       the counter will not decrement. It only decrements when the\n"
4092 	"\t       trigger did work\n"
4093 	"\t     To remove trigger without count:\n"
4094 	"\t       echo '!<function>:<trigger> > set_ftrace_filter\n"
4095 	"\t     To remove trigger with a count:\n"
4096 	"\t       echo '!<function>:<trigger>:0 > set_ftrace_filter\n"
4097 	"  set_ftrace_notrace\t- echo function name in here to never trace.\n"
4098 	"\t    accepts: func_full_name, *func_end, func_begin*, *func_middle*\n"
4099 	"\t    modules: Can select a group via module command :mod:\n"
4100 	"\t    Does not accept triggers\n"
4101 #endif /* CONFIG_DYNAMIC_FTRACE */
4102 #ifdef CONFIG_FUNCTION_TRACER
4103 	"  set_ftrace_pid\t- Write pid(s) to only function trace those pids\n"
4104 	"\t\t    (function)\n"
4105 #endif
4106 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
4107 	"  set_graph_function\t- Trace the nested calls of a function (function_graph)\n"
4108 	"  set_graph_notrace\t- Do not trace the nested calls of a function (function_graph)\n"
4109 	"  max_graph_depth\t- Trace a limited depth of nested calls (0 is unlimited)\n"
4110 #endif
4111 #ifdef CONFIG_TRACER_SNAPSHOT
4112 	"\n  snapshot\t\t- Like 'trace' but shows the content of the static\n"
4113 	"\t\t\t  snapshot buffer. Read the contents for more\n"
4114 	"\t\t\t  information\n"
4115 #endif
4116 #ifdef CONFIG_STACK_TRACER
4117 	"  stack_trace\t\t- Shows the max stack trace when active\n"
4118 	"  stack_max_size\t- Shows current max stack size that was traced\n"
4119 	"\t\t\t  Write into this file to reset the max size (trigger a\n"
4120 	"\t\t\t  new trace)\n"
4121 #ifdef CONFIG_DYNAMIC_FTRACE
4122 	"  stack_trace_filter\t- Like set_ftrace_filter but limits what stack_trace\n"
4123 	"\t\t\t  traces\n"
4124 #endif
4125 #endif /* CONFIG_STACK_TRACER */
4126 	"  events/\t\t- Directory containing all trace event subsystems:\n"
4127 	"      enable\t\t- Write 0/1 to enable/disable tracing of all events\n"
4128 	"  events/<system>/\t- Directory containing all trace events for <system>:\n"
4129 	"      enable\t\t- Write 0/1 to enable/disable tracing of all <system>\n"
4130 	"\t\t\t  events\n"
4131 	"      filter\t\t- If set, only events passing filter are traced\n"
4132 	"  events/<system>/<event>/\t- Directory containing control files for\n"
4133 	"\t\t\t  <event>:\n"
4134 	"      enable\t\t- Write 0/1 to enable/disable tracing of <event>\n"
4135 	"      filter\t\t- If set, only events passing filter are traced\n"
4136 	"      trigger\t\t- If set, a command to perform when event is hit\n"
4137 	"\t    Format: <trigger>[:count][if <filter>]\n"
4138 	"\t   trigger: traceon, traceoff\n"
4139 	"\t            enable_event:<system>:<event>\n"
4140 	"\t            disable_event:<system>:<event>\n"
4141 #ifdef CONFIG_HIST_TRIGGERS
4142 	"\t            enable_hist:<system>:<event>\n"
4143 	"\t            disable_hist:<system>:<event>\n"
4144 #endif
4145 #ifdef CONFIG_STACKTRACE
4146 	"\t\t    stacktrace\n"
4147 #endif
4148 #ifdef CONFIG_TRACER_SNAPSHOT
4149 	"\t\t    snapshot\n"
4150 #endif
4151 #ifdef CONFIG_HIST_TRIGGERS
4152 	"\t\t    hist (see below)\n"
4153 #endif
4154 	"\t   example: echo traceoff > events/block/block_unplug/trigger\n"
4155 	"\t            echo traceoff:3 > events/block/block_unplug/trigger\n"
4156 	"\t            echo 'enable_event:kmem:kmalloc:3 if nr_rq > 1' > \\\n"
4157 	"\t                  events/block/block_unplug/trigger\n"
4158 	"\t   The first disables tracing every time block_unplug is hit.\n"
4159 	"\t   The second disables tracing the first 3 times block_unplug is hit.\n"
4160 	"\t   The third enables the kmalloc event the first 3 times block_unplug\n"
4161 	"\t     is hit and has value of greater than 1 for the 'nr_rq' event field.\n"
4162 	"\t   Like function triggers, the counter is only decremented if it\n"
4163 	"\t    enabled or disabled tracing.\n"
4164 	"\t   To remove a trigger without a count:\n"
4165 	"\t     echo '!<trigger> > <system>/<event>/trigger\n"
4166 	"\t   To remove a trigger with a count:\n"
4167 	"\t     echo '!<trigger>:0 > <system>/<event>/trigger\n"
4168 	"\t   Filters can be ignored when removing a trigger.\n"
4169 #ifdef CONFIG_HIST_TRIGGERS
4170 	"      hist trigger\t- If set, event hits are aggregated into a hash table\n"
4171 	"\t    Format: hist:keys=<field1[,field2,...]>\n"
4172 	"\t            [:values=<field1[,field2,...]>]\n"
4173 	"\t            [:sort=<field1[,field2,...]>]\n"
4174 	"\t            [:size=#entries]\n"
4175 	"\t            [:pause][:continue][:clear]\n"
4176 	"\t            [:name=histname1]\n"
4177 	"\t            [if <filter>]\n\n"
4178 	"\t    When a matching event is hit, an entry is added to a hash\n"
4179 	"\t    table using the key(s) and value(s) named, and the value of a\n"
4180 	"\t    sum called 'hitcount' is incremented.  Keys and values\n"
4181 	"\t    correspond to fields in the event's format description.  Keys\n"
4182 	"\t    can be any field, or the special string 'stacktrace'.\n"
4183 	"\t    Compound keys consisting of up to two fields can be specified\n"
4184 	"\t    by the 'keys' keyword.  Values must correspond to numeric\n"
4185 	"\t    fields.  Sort keys consisting of up to two fields can be\n"
4186 	"\t    specified using the 'sort' keyword.  The sort direction can\n"
4187 	"\t    be modified by appending '.descending' or '.ascending' to a\n"
4188 	"\t    sort field.  The 'size' parameter can be used to specify more\n"
4189 	"\t    or fewer than the default 2048 entries for the hashtable size.\n"
4190 	"\t    If a hist trigger is given a name using the 'name' parameter,\n"
4191 	"\t    its histogram data will be shared with other triggers of the\n"
4192 	"\t    same name, and trigger hits will update this common data.\n\n"
4193 	"\t    Reading the 'hist' file for the event will dump the hash\n"
4194 	"\t    table in its entirety to stdout.  If there are multiple hist\n"
4195 	"\t    triggers attached to an event, there will be a table for each\n"
4196 	"\t    trigger in the output.  The table displayed for a named\n"
4197 	"\t    trigger will be the same as any other instance having the\n"
4198 	"\t    same name.  The default format used to display a given field\n"
4199 	"\t    can be modified by appending any of the following modifiers\n"
4200 	"\t    to the field name, as applicable:\n\n"
4201 	"\t            .hex        display a number as a hex value\n"
4202 	"\t            .sym        display an address as a symbol\n"
4203 	"\t            .sym-offset display an address as a symbol and offset\n"
4204 	"\t            .execname   display a common_pid as a program name\n"
4205 	"\t            .syscall    display a syscall id as a syscall name\n\n"
4206 	"\t            .log2       display log2 value rather than raw number\n\n"
4207 	"\t    The 'pause' parameter can be used to pause an existing hist\n"
4208 	"\t    trigger or to start a hist trigger but not log any events\n"
4209 	"\t    until told to do so.  'continue' can be used to start or\n"
4210 	"\t    restart a paused hist trigger.\n\n"
4211 	"\t    The 'clear' parameter will clear the contents of a running\n"
4212 	"\t    hist trigger and leave its current paused/active state\n"
4213 	"\t    unchanged.\n\n"
4214 	"\t    The enable_hist and disable_hist triggers can be used to\n"
4215 	"\t    have one event conditionally start and stop another event's\n"
4216 	"\t    already-attached hist trigger.  The syntax is analagous to\n"
4217 	"\t    the enable_event and disable_event triggers.\n"
4218 #endif
4219 ;
4220 
4221 static ssize_t
4222 tracing_readme_read(struct file *filp, char __user *ubuf,
4223 		       size_t cnt, loff_t *ppos)
4224 {
4225 	return simple_read_from_buffer(ubuf, cnt, ppos,
4226 					readme_msg, strlen(readme_msg));
4227 }
4228 
4229 static const struct file_operations tracing_readme_fops = {
4230 	.open		= tracing_open_generic,
4231 	.read		= tracing_readme_read,
4232 	.llseek		= generic_file_llseek,
4233 };
4234 
4235 static void *saved_cmdlines_next(struct seq_file *m, void *v, loff_t *pos)
4236 {
4237 	unsigned int *ptr = v;
4238 
4239 	if (*pos || m->count)
4240 		ptr++;
4241 
4242 	(*pos)++;
4243 
4244 	for (; ptr < &savedcmd->map_cmdline_to_pid[savedcmd->cmdline_num];
4245 	     ptr++) {
4246 		if (*ptr == -1 || *ptr == NO_CMDLINE_MAP)
4247 			continue;
4248 
4249 		return ptr;
4250 	}
4251 
4252 	return NULL;
4253 }
4254 
4255 static void *saved_cmdlines_start(struct seq_file *m, loff_t *pos)
4256 {
4257 	void *v;
4258 	loff_t l = 0;
4259 
4260 	preempt_disable();
4261 	arch_spin_lock(&trace_cmdline_lock);
4262 
4263 	v = &savedcmd->map_cmdline_to_pid[0];
4264 	while (l <= *pos) {
4265 		v = saved_cmdlines_next(m, v, &l);
4266 		if (!v)
4267 			return NULL;
4268 	}
4269 
4270 	return v;
4271 }
4272 
4273 static void saved_cmdlines_stop(struct seq_file *m, void *v)
4274 {
4275 	arch_spin_unlock(&trace_cmdline_lock);
4276 	preempt_enable();
4277 }
4278 
4279 static int saved_cmdlines_show(struct seq_file *m, void *v)
4280 {
4281 	char buf[TASK_COMM_LEN];
4282 	unsigned int *pid = v;
4283 
4284 	__trace_find_cmdline(*pid, buf);
4285 	seq_printf(m, "%d %s\n", *pid, buf);
4286 	return 0;
4287 }
4288 
4289 static const struct seq_operations tracing_saved_cmdlines_seq_ops = {
4290 	.start		= saved_cmdlines_start,
4291 	.next		= saved_cmdlines_next,
4292 	.stop		= saved_cmdlines_stop,
4293 	.show		= saved_cmdlines_show,
4294 };
4295 
4296 static int tracing_saved_cmdlines_open(struct inode *inode, struct file *filp)
4297 {
4298 	if (tracing_disabled)
4299 		return -ENODEV;
4300 
4301 	return seq_open(filp, &tracing_saved_cmdlines_seq_ops);
4302 }
4303 
4304 static const struct file_operations tracing_saved_cmdlines_fops = {
4305 	.open		= tracing_saved_cmdlines_open,
4306 	.read		= seq_read,
4307 	.llseek		= seq_lseek,
4308 	.release	= seq_release,
4309 };
4310 
4311 static ssize_t
4312 tracing_saved_cmdlines_size_read(struct file *filp, char __user *ubuf,
4313 				 size_t cnt, loff_t *ppos)
4314 {
4315 	char buf[64];
4316 	int r;
4317 
4318 	arch_spin_lock(&trace_cmdline_lock);
4319 	r = scnprintf(buf, sizeof(buf), "%u\n", savedcmd->cmdline_num);
4320 	arch_spin_unlock(&trace_cmdline_lock);
4321 
4322 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
4323 }
4324 
4325 static void free_saved_cmdlines_buffer(struct saved_cmdlines_buffer *s)
4326 {
4327 	kfree(s->saved_cmdlines);
4328 	kfree(s->map_cmdline_to_pid);
4329 	kfree(s);
4330 }
4331 
4332 static int tracing_resize_saved_cmdlines(unsigned int val)
4333 {
4334 	struct saved_cmdlines_buffer *s, *savedcmd_temp;
4335 
4336 	s = kmalloc(sizeof(*s), GFP_KERNEL);
4337 	if (!s)
4338 		return -ENOMEM;
4339 
4340 	if (allocate_cmdlines_buffer(val, s) < 0) {
4341 		kfree(s);
4342 		return -ENOMEM;
4343 	}
4344 
4345 	arch_spin_lock(&trace_cmdline_lock);
4346 	savedcmd_temp = savedcmd;
4347 	savedcmd = s;
4348 	arch_spin_unlock(&trace_cmdline_lock);
4349 	free_saved_cmdlines_buffer(savedcmd_temp);
4350 
4351 	return 0;
4352 }
4353 
4354 static ssize_t
4355 tracing_saved_cmdlines_size_write(struct file *filp, const char __user *ubuf,
4356 				  size_t cnt, loff_t *ppos)
4357 {
4358 	unsigned long val;
4359 	int ret;
4360 
4361 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
4362 	if (ret)
4363 		return ret;
4364 
4365 	/* must have at least 1 entry or less than PID_MAX_DEFAULT */
4366 	if (!val || val > PID_MAX_DEFAULT)
4367 		return -EINVAL;
4368 
4369 	ret = tracing_resize_saved_cmdlines((unsigned int)val);
4370 	if (ret < 0)
4371 		return ret;
4372 
4373 	*ppos += cnt;
4374 
4375 	return cnt;
4376 }
4377 
4378 static const struct file_operations tracing_saved_cmdlines_size_fops = {
4379 	.open		= tracing_open_generic,
4380 	.read		= tracing_saved_cmdlines_size_read,
4381 	.write		= tracing_saved_cmdlines_size_write,
4382 };
4383 
4384 #ifdef CONFIG_TRACE_ENUM_MAP_FILE
4385 static union trace_enum_map_item *
4386 update_enum_map(union trace_enum_map_item *ptr)
4387 {
4388 	if (!ptr->map.enum_string) {
4389 		if (ptr->tail.next) {
4390 			ptr = ptr->tail.next;
4391 			/* Set ptr to the next real item (skip head) */
4392 			ptr++;
4393 		} else
4394 			return NULL;
4395 	}
4396 	return ptr;
4397 }
4398 
4399 static void *enum_map_next(struct seq_file *m, void *v, loff_t *pos)
4400 {
4401 	union trace_enum_map_item *ptr = v;
4402 
4403 	/*
4404 	 * Paranoid! If ptr points to end, we don't want to increment past it.
4405 	 * This really should never happen.
4406 	 */
4407 	ptr = update_enum_map(ptr);
4408 	if (WARN_ON_ONCE(!ptr))
4409 		return NULL;
4410 
4411 	ptr++;
4412 
4413 	(*pos)++;
4414 
4415 	ptr = update_enum_map(ptr);
4416 
4417 	return ptr;
4418 }
4419 
4420 static void *enum_map_start(struct seq_file *m, loff_t *pos)
4421 {
4422 	union trace_enum_map_item *v;
4423 	loff_t l = 0;
4424 
4425 	mutex_lock(&trace_enum_mutex);
4426 
4427 	v = trace_enum_maps;
4428 	if (v)
4429 		v++;
4430 
4431 	while (v && l < *pos) {
4432 		v = enum_map_next(m, v, &l);
4433 	}
4434 
4435 	return v;
4436 }
4437 
4438 static void enum_map_stop(struct seq_file *m, void *v)
4439 {
4440 	mutex_unlock(&trace_enum_mutex);
4441 }
4442 
4443 static int enum_map_show(struct seq_file *m, void *v)
4444 {
4445 	union trace_enum_map_item *ptr = v;
4446 
4447 	seq_printf(m, "%s %ld (%s)\n",
4448 		   ptr->map.enum_string, ptr->map.enum_value,
4449 		   ptr->map.system);
4450 
4451 	return 0;
4452 }
4453 
4454 static const struct seq_operations tracing_enum_map_seq_ops = {
4455 	.start		= enum_map_start,
4456 	.next		= enum_map_next,
4457 	.stop		= enum_map_stop,
4458 	.show		= enum_map_show,
4459 };
4460 
4461 static int tracing_enum_map_open(struct inode *inode, struct file *filp)
4462 {
4463 	if (tracing_disabled)
4464 		return -ENODEV;
4465 
4466 	return seq_open(filp, &tracing_enum_map_seq_ops);
4467 }
4468 
4469 static const struct file_operations tracing_enum_map_fops = {
4470 	.open		= tracing_enum_map_open,
4471 	.read		= seq_read,
4472 	.llseek		= seq_lseek,
4473 	.release	= seq_release,
4474 };
4475 
4476 static inline union trace_enum_map_item *
4477 trace_enum_jmp_to_tail(union trace_enum_map_item *ptr)
4478 {
4479 	/* Return tail of array given the head */
4480 	return ptr + ptr->head.length + 1;
4481 }
4482 
4483 static void
4484 trace_insert_enum_map_file(struct module *mod, struct trace_enum_map **start,
4485 			   int len)
4486 {
4487 	struct trace_enum_map **stop;
4488 	struct trace_enum_map **map;
4489 	union trace_enum_map_item *map_array;
4490 	union trace_enum_map_item *ptr;
4491 
4492 	stop = start + len;
4493 
4494 	/*
4495 	 * The trace_enum_maps contains the map plus a head and tail item,
4496 	 * where the head holds the module and length of array, and the
4497 	 * tail holds a pointer to the next list.
4498 	 */
4499 	map_array = kmalloc(sizeof(*map_array) * (len + 2), GFP_KERNEL);
4500 	if (!map_array) {
4501 		pr_warn("Unable to allocate trace enum mapping\n");
4502 		return;
4503 	}
4504 
4505 	mutex_lock(&trace_enum_mutex);
4506 
4507 	if (!trace_enum_maps)
4508 		trace_enum_maps = map_array;
4509 	else {
4510 		ptr = trace_enum_maps;
4511 		for (;;) {
4512 			ptr = trace_enum_jmp_to_tail(ptr);
4513 			if (!ptr->tail.next)
4514 				break;
4515 			ptr = ptr->tail.next;
4516 
4517 		}
4518 		ptr->tail.next = map_array;
4519 	}
4520 	map_array->head.mod = mod;
4521 	map_array->head.length = len;
4522 	map_array++;
4523 
4524 	for (map = start; (unsigned long)map < (unsigned long)stop; map++) {
4525 		map_array->map = **map;
4526 		map_array++;
4527 	}
4528 	memset(map_array, 0, sizeof(*map_array));
4529 
4530 	mutex_unlock(&trace_enum_mutex);
4531 }
4532 
4533 static void trace_create_enum_file(struct dentry *d_tracer)
4534 {
4535 	trace_create_file("enum_map", 0444, d_tracer,
4536 			  NULL, &tracing_enum_map_fops);
4537 }
4538 
4539 #else /* CONFIG_TRACE_ENUM_MAP_FILE */
4540 static inline void trace_create_enum_file(struct dentry *d_tracer) { }
4541 static inline void trace_insert_enum_map_file(struct module *mod,
4542 			      struct trace_enum_map **start, int len) { }
4543 #endif /* !CONFIG_TRACE_ENUM_MAP_FILE */
4544 
4545 static void trace_insert_enum_map(struct module *mod,
4546 				  struct trace_enum_map **start, int len)
4547 {
4548 	struct trace_enum_map **map;
4549 
4550 	if (len <= 0)
4551 		return;
4552 
4553 	map = start;
4554 
4555 	trace_event_enum_update(map, len);
4556 
4557 	trace_insert_enum_map_file(mod, start, len);
4558 }
4559 
4560 static ssize_t
4561 tracing_set_trace_read(struct file *filp, char __user *ubuf,
4562 		       size_t cnt, loff_t *ppos)
4563 {
4564 	struct trace_array *tr = filp->private_data;
4565 	char buf[MAX_TRACER_SIZE+2];
4566 	int r;
4567 
4568 	mutex_lock(&trace_types_lock);
4569 	r = sprintf(buf, "%s\n", tr->current_trace->name);
4570 	mutex_unlock(&trace_types_lock);
4571 
4572 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
4573 }
4574 
4575 int tracer_init(struct tracer *t, struct trace_array *tr)
4576 {
4577 	tracing_reset_online_cpus(&tr->trace_buffer);
4578 	return t->init(tr);
4579 }
4580 
4581 static void set_buffer_entries(struct trace_buffer *buf, unsigned long val)
4582 {
4583 	int cpu;
4584 
4585 	for_each_tracing_cpu(cpu)
4586 		per_cpu_ptr(buf->data, cpu)->entries = val;
4587 }
4588 
4589 #ifdef CONFIG_TRACER_MAX_TRACE
4590 /* resize @tr's buffer to the size of @size_tr's entries */
4591 static int resize_buffer_duplicate_size(struct trace_buffer *trace_buf,
4592 					struct trace_buffer *size_buf, int cpu_id)
4593 {
4594 	int cpu, ret = 0;
4595 
4596 	if (cpu_id == RING_BUFFER_ALL_CPUS) {
4597 		for_each_tracing_cpu(cpu) {
4598 			ret = ring_buffer_resize(trace_buf->buffer,
4599 				 per_cpu_ptr(size_buf->data, cpu)->entries, cpu);
4600 			if (ret < 0)
4601 				break;
4602 			per_cpu_ptr(trace_buf->data, cpu)->entries =
4603 				per_cpu_ptr(size_buf->data, cpu)->entries;
4604 		}
4605 	} else {
4606 		ret = ring_buffer_resize(trace_buf->buffer,
4607 				 per_cpu_ptr(size_buf->data, cpu_id)->entries, cpu_id);
4608 		if (ret == 0)
4609 			per_cpu_ptr(trace_buf->data, cpu_id)->entries =
4610 				per_cpu_ptr(size_buf->data, cpu_id)->entries;
4611 	}
4612 
4613 	return ret;
4614 }
4615 #endif /* CONFIG_TRACER_MAX_TRACE */
4616 
4617 static int __tracing_resize_ring_buffer(struct trace_array *tr,
4618 					unsigned long size, int cpu)
4619 {
4620 	int ret;
4621 
4622 	/*
4623 	 * If kernel or user changes the size of the ring buffer
4624 	 * we use the size that was given, and we can forget about
4625 	 * expanding it later.
4626 	 */
4627 	ring_buffer_expanded = true;
4628 
4629 	/* May be called before buffers are initialized */
4630 	if (!tr->trace_buffer.buffer)
4631 		return 0;
4632 
4633 	ret = ring_buffer_resize(tr->trace_buffer.buffer, size, cpu);
4634 	if (ret < 0)
4635 		return ret;
4636 
4637 #ifdef CONFIG_TRACER_MAX_TRACE
4638 	if (!(tr->flags & TRACE_ARRAY_FL_GLOBAL) ||
4639 	    !tr->current_trace->use_max_tr)
4640 		goto out;
4641 
4642 	ret = ring_buffer_resize(tr->max_buffer.buffer, size, cpu);
4643 	if (ret < 0) {
4644 		int r = resize_buffer_duplicate_size(&tr->trace_buffer,
4645 						     &tr->trace_buffer, cpu);
4646 		if (r < 0) {
4647 			/*
4648 			 * AARGH! We are left with different
4649 			 * size max buffer!!!!
4650 			 * The max buffer is our "snapshot" buffer.
4651 			 * When a tracer needs a snapshot (one of the
4652 			 * latency tracers), it swaps the max buffer
4653 			 * with the saved snap shot. We succeeded to
4654 			 * update the size of the main buffer, but failed to
4655 			 * update the size of the max buffer. But when we tried
4656 			 * to reset the main buffer to the original size, we
4657 			 * failed there too. This is very unlikely to
4658 			 * happen, but if it does, warn and kill all
4659 			 * tracing.
4660 			 */
4661 			WARN_ON(1);
4662 			tracing_disabled = 1;
4663 		}
4664 		return ret;
4665 	}
4666 
4667 	if (cpu == RING_BUFFER_ALL_CPUS)
4668 		set_buffer_entries(&tr->max_buffer, size);
4669 	else
4670 		per_cpu_ptr(tr->max_buffer.data, cpu)->entries = size;
4671 
4672  out:
4673 #endif /* CONFIG_TRACER_MAX_TRACE */
4674 
4675 	if (cpu == RING_BUFFER_ALL_CPUS)
4676 		set_buffer_entries(&tr->trace_buffer, size);
4677 	else
4678 		per_cpu_ptr(tr->trace_buffer.data, cpu)->entries = size;
4679 
4680 	return ret;
4681 }
4682 
4683 static ssize_t tracing_resize_ring_buffer(struct trace_array *tr,
4684 					  unsigned long size, int cpu_id)
4685 {
4686 	int ret = size;
4687 
4688 	mutex_lock(&trace_types_lock);
4689 
4690 	if (cpu_id != RING_BUFFER_ALL_CPUS) {
4691 		/* make sure, this cpu is enabled in the mask */
4692 		if (!cpumask_test_cpu(cpu_id, tracing_buffer_mask)) {
4693 			ret = -EINVAL;
4694 			goto out;
4695 		}
4696 	}
4697 
4698 	ret = __tracing_resize_ring_buffer(tr, size, cpu_id);
4699 	if (ret < 0)
4700 		ret = -ENOMEM;
4701 
4702 out:
4703 	mutex_unlock(&trace_types_lock);
4704 
4705 	return ret;
4706 }
4707 
4708 
4709 /**
4710  * tracing_update_buffers - used by tracing facility to expand ring buffers
4711  *
4712  * To save on memory when the tracing is never used on a system with it
4713  * configured in. The ring buffers are set to a minimum size. But once
4714  * a user starts to use the tracing facility, then they need to grow
4715  * to their default size.
4716  *
4717  * This function is to be called when a tracer is about to be used.
4718  */
4719 int tracing_update_buffers(void)
4720 {
4721 	int ret = 0;
4722 
4723 	mutex_lock(&trace_types_lock);
4724 	if (!ring_buffer_expanded)
4725 		ret = __tracing_resize_ring_buffer(&global_trace, trace_buf_size,
4726 						RING_BUFFER_ALL_CPUS);
4727 	mutex_unlock(&trace_types_lock);
4728 
4729 	return ret;
4730 }
4731 
4732 struct trace_option_dentry;
4733 
4734 static void
4735 create_trace_option_files(struct trace_array *tr, struct tracer *tracer);
4736 
4737 /*
4738  * Used to clear out the tracer before deletion of an instance.
4739  * Must have trace_types_lock held.
4740  */
4741 static void tracing_set_nop(struct trace_array *tr)
4742 {
4743 	if (tr->current_trace == &nop_trace)
4744 		return;
4745 
4746 	tr->current_trace->enabled--;
4747 
4748 	if (tr->current_trace->reset)
4749 		tr->current_trace->reset(tr);
4750 
4751 	tr->current_trace = &nop_trace;
4752 }
4753 
4754 static void add_tracer_options(struct trace_array *tr, struct tracer *t)
4755 {
4756 	/* Only enable if the directory has been created already. */
4757 	if (!tr->dir)
4758 		return;
4759 
4760 	create_trace_option_files(tr, t);
4761 }
4762 
4763 static int tracing_set_tracer(struct trace_array *tr, const char *buf)
4764 {
4765 	struct tracer *t;
4766 #ifdef CONFIG_TRACER_MAX_TRACE
4767 	bool had_max_tr;
4768 #endif
4769 	int ret = 0;
4770 
4771 	mutex_lock(&trace_types_lock);
4772 
4773 	if (!ring_buffer_expanded) {
4774 		ret = __tracing_resize_ring_buffer(tr, trace_buf_size,
4775 						RING_BUFFER_ALL_CPUS);
4776 		if (ret < 0)
4777 			goto out;
4778 		ret = 0;
4779 	}
4780 
4781 	for (t = trace_types; t; t = t->next) {
4782 		if (strcmp(t->name, buf) == 0)
4783 			break;
4784 	}
4785 	if (!t) {
4786 		ret = -EINVAL;
4787 		goto out;
4788 	}
4789 	if (t == tr->current_trace)
4790 		goto out;
4791 
4792 	/* Some tracers are only allowed for the top level buffer */
4793 	if (!trace_ok_for_array(t, tr)) {
4794 		ret = -EINVAL;
4795 		goto out;
4796 	}
4797 
4798 	/* If trace pipe files are being read, we can't change the tracer */
4799 	if (tr->current_trace->ref) {
4800 		ret = -EBUSY;
4801 		goto out;
4802 	}
4803 
4804 	trace_branch_disable();
4805 
4806 	tr->current_trace->enabled--;
4807 
4808 	if (tr->current_trace->reset)
4809 		tr->current_trace->reset(tr);
4810 
4811 	/* Current trace needs to be nop_trace before synchronize_sched */
4812 	tr->current_trace = &nop_trace;
4813 
4814 #ifdef CONFIG_TRACER_MAX_TRACE
4815 	had_max_tr = tr->allocated_snapshot;
4816 
4817 	if (had_max_tr && !t->use_max_tr) {
4818 		/*
4819 		 * We need to make sure that the update_max_tr sees that
4820 		 * current_trace changed to nop_trace to keep it from
4821 		 * swapping the buffers after we resize it.
4822 		 * The update_max_tr is called from interrupts disabled
4823 		 * so a synchronized_sched() is sufficient.
4824 		 */
4825 		synchronize_sched();
4826 		free_snapshot(tr);
4827 	}
4828 #endif
4829 
4830 #ifdef CONFIG_TRACER_MAX_TRACE
4831 	if (t->use_max_tr && !had_max_tr) {
4832 		ret = alloc_snapshot(tr);
4833 		if (ret < 0)
4834 			goto out;
4835 	}
4836 #endif
4837 
4838 	if (t->init) {
4839 		ret = tracer_init(t, tr);
4840 		if (ret)
4841 			goto out;
4842 	}
4843 
4844 	tr->current_trace = t;
4845 	tr->current_trace->enabled++;
4846 	trace_branch_enable(tr);
4847  out:
4848 	mutex_unlock(&trace_types_lock);
4849 
4850 	return ret;
4851 }
4852 
4853 static ssize_t
4854 tracing_set_trace_write(struct file *filp, const char __user *ubuf,
4855 			size_t cnt, loff_t *ppos)
4856 {
4857 	struct trace_array *tr = filp->private_data;
4858 	char buf[MAX_TRACER_SIZE+1];
4859 	int i;
4860 	size_t ret;
4861 	int err;
4862 
4863 	ret = cnt;
4864 
4865 	if (cnt > MAX_TRACER_SIZE)
4866 		cnt = MAX_TRACER_SIZE;
4867 
4868 	if (copy_from_user(buf, ubuf, cnt))
4869 		return -EFAULT;
4870 
4871 	buf[cnt] = 0;
4872 
4873 	/* strip ending whitespace. */
4874 	for (i = cnt - 1; i > 0 && isspace(buf[i]); i--)
4875 		buf[i] = 0;
4876 
4877 	err = tracing_set_tracer(tr, buf);
4878 	if (err)
4879 		return err;
4880 
4881 	*ppos += ret;
4882 
4883 	return ret;
4884 }
4885 
4886 static ssize_t
4887 tracing_nsecs_read(unsigned long *ptr, char __user *ubuf,
4888 		   size_t cnt, loff_t *ppos)
4889 {
4890 	char buf[64];
4891 	int r;
4892 
4893 	r = snprintf(buf, sizeof(buf), "%ld\n",
4894 		     *ptr == (unsigned long)-1 ? -1 : nsecs_to_usecs(*ptr));
4895 	if (r > sizeof(buf))
4896 		r = sizeof(buf);
4897 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
4898 }
4899 
4900 static ssize_t
4901 tracing_nsecs_write(unsigned long *ptr, const char __user *ubuf,
4902 		    size_t cnt, loff_t *ppos)
4903 {
4904 	unsigned long val;
4905 	int ret;
4906 
4907 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
4908 	if (ret)
4909 		return ret;
4910 
4911 	*ptr = val * 1000;
4912 
4913 	return cnt;
4914 }
4915 
4916 static ssize_t
4917 tracing_thresh_read(struct file *filp, char __user *ubuf,
4918 		    size_t cnt, loff_t *ppos)
4919 {
4920 	return tracing_nsecs_read(&tracing_thresh, ubuf, cnt, ppos);
4921 }
4922 
4923 static ssize_t
4924 tracing_thresh_write(struct file *filp, const char __user *ubuf,
4925 		     size_t cnt, loff_t *ppos)
4926 {
4927 	struct trace_array *tr = filp->private_data;
4928 	int ret;
4929 
4930 	mutex_lock(&trace_types_lock);
4931 	ret = tracing_nsecs_write(&tracing_thresh, ubuf, cnt, ppos);
4932 	if (ret < 0)
4933 		goto out;
4934 
4935 	if (tr->current_trace->update_thresh) {
4936 		ret = tr->current_trace->update_thresh(tr);
4937 		if (ret < 0)
4938 			goto out;
4939 	}
4940 
4941 	ret = cnt;
4942 out:
4943 	mutex_unlock(&trace_types_lock);
4944 
4945 	return ret;
4946 }
4947 
4948 #ifdef CONFIG_TRACER_MAX_TRACE
4949 
4950 static ssize_t
4951 tracing_max_lat_read(struct file *filp, char __user *ubuf,
4952 		     size_t cnt, loff_t *ppos)
4953 {
4954 	return tracing_nsecs_read(filp->private_data, ubuf, cnt, ppos);
4955 }
4956 
4957 static ssize_t
4958 tracing_max_lat_write(struct file *filp, const char __user *ubuf,
4959 		      size_t cnt, loff_t *ppos)
4960 {
4961 	return tracing_nsecs_write(filp->private_data, ubuf, cnt, ppos);
4962 }
4963 
4964 #endif
4965 
4966 static int tracing_open_pipe(struct inode *inode, struct file *filp)
4967 {
4968 	struct trace_array *tr = inode->i_private;
4969 	struct trace_iterator *iter;
4970 	int ret = 0;
4971 
4972 	if (tracing_disabled)
4973 		return -ENODEV;
4974 
4975 	if (trace_array_get(tr) < 0)
4976 		return -ENODEV;
4977 
4978 	mutex_lock(&trace_types_lock);
4979 
4980 	/* create a buffer to store the information to pass to userspace */
4981 	iter = kzalloc(sizeof(*iter), GFP_KERNEL);
4982 	if (!iter) {
4983 		ret = -ENOMEM;
4984 		__trace_array_put(tr);
4985 		goto out;
4986 	}
4987 
4988 	trace_seq_init(&iter->seq);
4989 	iter->trace = tr->current_trace;
4990 
4991 	if (!alloc_cpumask_var(&iter->started, GFP_KERNEL)) {
4992 		ret = -ENOMEM;
4993 		goto fail;
4994 	}
4995 
4996 	/* trace pipe does not show start of buffer */
4997 	cpumask_setall(iter->started);
4998 
4999 	if (tr->trace_flags & TRACE_ITER_LATENCY_FMT)
5000 		iter->iter_flags |= TRACE_FILE_LAT_FMT;
5001 
5002 	/* Output in nanoseconds only if we are using a clock in nanoseconds. */
5003 	if (trace_clocks[tr->clock_id].in_ns)
5004 		iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
5005 
5006 	iter->tr = tr;
5007 	iter->trace_buffer = &tr->trace_buffer;
5008 	iter->cpu_file = tracing_get_cpu(inode);
5009 	mutex_init(&iter->mutex);
5010 	filp->private_data = iter;
5011 
5012 	if (iter->trace->pipe_open)
5013 		iter->trace->pipe_open(iter);
5014 
5015 	nonseekable_open(inode, filp);
5016 
5017 	tr->current_trace->ref++;
5018 out:
5019 	mutex_unlock(&trace_types_lock);
5020 	return ret;
5021 
5022 fail:
5023 	kfree(iter->trace);
5024 	kfree(iter);
5025 	__trace_array_put(tr);
5026 	mutex_unlock(&trace_types_lock);
5027 	return ret;
5028 }
5029 
5030 static int tracing_release_pipe(struct inode *inode, struct file *file)
5031 {
5032 	struct trace_iterator *iter = file->private_data;
5033 	struct trace_array *tr = inode->i_private;
5034 
5035 	mutex_lock(&trace_types_lock);
5036 
5037 	tr->current_trace->ref--;
5038 
5039 	if (iter->trace->pipe_close)
5040 		iter->trace->pipe_close(iter);
5041 
5042 	mutex_unlock(&trace_types_lock);
5043 
5044 	free_cpumask_var(iter->started);
5045 	mutex_destroy(&iter->mutex);
5046 	kfree(iter);
5047 
5048 	trace_array_put(tr);
5049 
5050 	return 0;
5051 }
5052 
5053 static unsigned int
5054 trace_poll(struct trace_iterator *iter, struct file *filp, poll_table *poll_table)
5055 {
5056 	struct trace_array *tr = iter->tr;
5057 
5058 	/* Iterators are static, they should be filled or empty */
5059 	if (trace_buffer_iter(iter, iter->cpu_file))
5060 		return POLLIN | POLLRDNORM;
5061 
5062 	if (tr->trace_flags & TRACE_ITER_BLOCK)
5063 		/*
5064 		 * Always select as readable when in blocking mode
5065 		 */
5066 		return POLLIN | POLLRDNORM;
5067 	else
5068 		return ring_buffer_poll_wait(iter->trace_buffer->buffer, iter->cpu_file,
5069 					     filp, poll_table);
5070 }
5071 
5072 static unsigned int
5073 tracing_poll_pipe(struct file *filp, poll_table *poll_table)
5074 {
5075 	struct trace_iterator *iter = filp->private_data;
5076 
5077 	return trace_poll(iter, filp, poll_table);
5078 }
5079 
5080 /* Must be called with iter->mutex held. */
5081 static int tracing_wait_pipe(struct file *filp)
5082 {
5083 	struct trace_iterator *iter = filp->private_data;
5084 	int ret;
5085 
5086 	while (trace_empty(iter)) {
5087 
5088 		if ((filp->f_flags & O_NONBLOCK)) {
5089 			return -EAGAIN;
5090 		}
5091 
5092 		/*
5093 		 * We block until we read something and tracing is disabled.
5094 		 * We still block if tracing is disabled, but we have never
5095 		 * read anything. This allows a user to cat this file, and
5096 		 * then enable tracing. But after we have read something,
5097 		 * we give an EOF when tracing is again disabled.
5098 		 *
5099 		 * iter->pos will be 0 if we haven't read anything.
5100 		 */
5101 		if (!tracing_is_on() && iter->pos)
5102 			break;
5103 
5104 		mutex_unlock(&iter->mutex);
5105 
5106 		ret = wait_on_pipe(iter, false);
5107 
5108 		mutex_lock(&iter->mutex);
5109 
5110 		if (ret)
5111 			return ret;
5112 	}
5113 
5114 	return 1;
5115 }
5116 
5117 /*
5118  * Consumer reader.
5119  */
5120 static ssize_t
5121 tracing_read_pipe(struct file *filp, char __user *ubuf,
5122 		  size_t cnt, loff_t *ppos)
5123 {
5124 	struct trace_iterator *iter = filp->private_data;
5125 	ssize_t sret;
5126 
5127 	/* return any leftover data */
5128 	sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
5129 	if (sret != -EBUSY)
5130 		return sret;
5131 
5132 	trace_seq_init(&iter->seq);
5133 
5134 	/*
5135 	 * Avoid more than one consumer on a single file descriptor
5136 	 * This is just a matter of traces coherency, the ring buffer itself
5137 	 * is protected.
5138 	 */
5139 	mutex_lock(&iter->mutex);
5140 	if (iter->trace->read) {
5141 		sret = iter->trace->read(iter, filp, ubuf, cnt, ppos);
5142 		if (sret)
5143 			goto out;
5144 	}
5145 
5146 waitagain:
5147 	sret = tracing_wait_pipe(filp);
5148 	if (sret <= 0)
5149 		goto out;
5150 
5151 	/* stop when tracing is finished */
5152 	if (trace_empty(iter)) {
5153 		sret = 0;
5154 		goto out;
5155 	}
5156 
5157 	if (cnt >= PAGE_SIZE)
5158 		cnt = PAGE_SIZE - 1;
5159 
5160 	/* reset all but tr, trace, and overruns */
5161 	memset(&iter->seq, 0,
5162 	       sizeof(struct trace_iterator) -
5163 	       offsetof(struct trace_iterator, seq));
5164 	cpumask_clear(iter->started);
5165 	iter->pos = -1;
5166 
5167 	trace_event_read_lock();
5168 	trace_access_lock(iter->cpu_file);
5169 	while (trace_find_next_entry_inc(iter) != NULL) {
5170 		enum print_line_t ret;
5171 		int save_len = iter->seq.seq.len;
5172 
5173 		ret = print_trace_line(iter);
5174 		if (ret == TRACE_TYPE_PARTIAL_LINE) {
5175 			/* don't print partial lines */
5176 			iter->seq.seq.len = save_len;
5177 			break;
5178 		}
5179 		if (ret != TRACE_TYPE_NO_CONSUME)
5180 			trace_consume(iter);
5181 
5182 		if (trace_seq_used(&iter->seq) >= cnt)
5183 			break;
5184 
5185 		/*
5186 		 * Setting the full flag means we reached the trace_seq buffer
5187 		 * size and we should leave by partial output condition above.
5188 		 * One of the trace_seq_* functions is not used properly.
5189 		 */
5190 		WARN_ONCE(iter->seq.full, "full flag set for trace type %d",
5191 			  iter->ent->type);
5192 	}
5193 	trace_access_unlock(iter->cpu_file);
5194 	trace_event_read_unlock();
5195 
5196 	/* Now copy what we have to the user */
5197 	sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
5198 	if (iter->seq.seq.readpos >= trace_seq_used(&iter->seq))
5199 		trace_seq_init(&iter->seq);
5200 
5201 	/*
5202 	 * If there was nothing to send to user, in spite of consuming trace
5203 	 * entries, go back to wait for more entries.
5204 	 */
5205 	if (sret == -EBUSY)
5206 		goto waitagain;
5207 
5208 out:
5209 	mutex_unlock(&iter->mutex);
5210 
5211 	return sret;
5212 }
5213 
5214 static void tracing_spd_release_pipe(struct splice_pipe_desc *spd,
5215 				     unsigned int idx)
5216 {
5217 	__free_page(spd->pages[idx]);
5218 }
5219 
5220 static const struct pipe_buf_operations tracing_pipe_buf_ops = {
5221 	.can_merge		= 0,
5222 	.confirm		= generic_pipe_buf_confirm,
5223 	.release		= generic_pipe_buf_release,
5224 	.steal			= generic_pipe_buf_steal,
5225 	.get			= generic_pipe_buf_get,
5226 };
5227 
5228 static size_t
5229 tracing_fill_pipe_page(size_t rem, struct trace_iterator *iter)
5230 {
5231 	size_t count;
5232 	int save_len;
5233 	int ret;
5234 
5235 	/* Seq buffer is page-sized, exactly what we need. */
5236 	for (;;) {
5237 		save_len = iter->seq.seq.len;
5238 		ret = print_trace_line(iter);
5239 
5240 		if (trace_seq_has_overflowed(&iter->seq)) {
5241 			iter->seq.seq.len = save_len;
5242 			break;
5243 		}
5244 
5245 		/*
5246 		 * This should not be hit, because it should only
5247 		 * be set if the iter->seq overflowed. But check it
5248 		 * anyway to be safe.
5249 		 */
5250 		if (ret == TRACE_TYPE_PARTIAL_LINE) {
5251 			iter->seq.seq.len = save_len;
5252 			break;
5253 		}
5254 
5255 		count = trace_seq_used(&iter->seq) - save_len;
5256 		if (rem < count) {
5257 			rem = 0;
5258 			iter->seq.seq.len = save_len;
5259 			break;
5260 		}
5261 
5262 		if (ret != TRACE_TYPE_NO_CONSUME)
5263 			trace_consume(iter);
5264 		rem -= count;
5265 		if (!trace_find_next_entry_inc(iter))	{
5266 			rem = 0;
5267 			iter->ent = NULL;
5268 			break;
5269 		}
5270 	}
5271 
5272 	return rem;
5273 }
5274 
5275 static ssize_t tracing_splice_read_pipe(struct file *filp,
5276 					loff_t *ppos,
5277 					struct pipe_inode_info *pipe,
5278 					size_t len,
5279 					unsigned int flags)
5280 {
5281 	struct page *pages_def[PIPE_DEF_BUFFERS];
5282 	struct partial_page partial_def[PIPE_DEF_BUFFERS];
5283 	struct trace_iterator *iter = filp->private_data;
5284 	struct splice_pipe_desc spd = {
5285 		.pages		= pages_def,
5286 		.partial	= partial_def,
5287 		.nr_pages	= 0, /* This gets updated below. */
5288 		.nr_pages_max	= PIPE_DEF_BUFFERS,
5289 		.flags		= flags,
5290 		.ops		= &tracing_pipe_buf_ops,
5291 		.spd_release	= tracing_spd_release_pipe,
5292 	};
5293 	ssize_t ret;
5294 	size_t rem;
5295 	unsigned int i;
5296 
5297 	if (splice_grow_spd(pipe, &spd))
5298 		return -ENOMEM;
5299 
5300 	mutex_lock(&iter->mutex);
5301 
5302 	if (iter->trace->splice_read) {
5303 		ret = iter->trace->splice_read(iter, filp,
5304 					       ppos, pipe, len, flags);
5305 		if (ret)
5306 			goto out_err;
5307 	}
5308 
5309 	ret = tracing_wait_pipe(filp);
5310 	if (ret <= 0)
5311 		goto out_err;
5312 
5313 	if (!iter->ent && !trace_find_next_entry_inc(iter)) {
5314 		ret = -EFAULT;
5315 		goto out_err;
5316 	}
5317 
5318 	trace_event_read_lock();
5319 	trace_access_lock(iter->cpu_file);
5320 
5321 	/* Fill as many pages as possible. */
5322 	for (i = 0, rem = len; i < spd.nr_pages_max && rem; i++) {
5323 		spd.pages[i] = alloc_page(GFP_KERNEL);
5324 		if (!spd.pages[i])
5325 			break;
5326 
5327 		rem = tracing_fill_pipe_page(rem, iter);
5328 
5329 		/* Copy the data into the page, so we can start over. */
5330 		ret = trace_seq_to_buffer(&iter->seq,
5331 					  page_address(spd.pages[i]),
5332 					  trace_seq_used(&iter->seq));
5333 		if (ret < 0) {
5334 			__free_page(spd.pages[i]);
5335 			break;
5336 		}
5337 		spd.partial[i].offset = 0;
5338 		spd.partial[i].len = trace_seq_used(&iter->seq);
5339 
5340 		trace_seq_init(&iter->seq);
5341 	}
5342 
5343 	trace_access_unlock(iter->cpu_file);
5344 	trace_event_read_unlock();
5345 	mutex_unlock(&iter->mutex);
5346 
5347 	spd.nr_pages = i;
5348 
5349 	if (i)
5350 		ret = splice_to_pipe(pipe, &spd);
5351 	else
5352 		ret = 0;
5353 out:
5354 	splice_shrink_spd(&spd);
5355 	return ret;
5356 
5357 out_err:
5358 	mutex_unlock(&iter->mutex);
5359 	goto out;
5360 }
5361 
5362 static ssize_t
5363 tracing_entries_read(struct file *filp, char __user *ubuf,
5364 		     size_t cnt, loff_t *ppos)
5365 {
5366 	struct inode *inode = file_inode(filp);
5367 	struct trace_array *tr = inode->i_private;
5368 	int cpu = tracing_get_cpu(inode);
5369 	char buf[64];
5370 	int r = 0;
5371 	ssize_t ret;
5372 
5373 	mutex_lock(&trace_types_lock);
5374 
5375 	if (cpu == RING_BUFFER_ALL_CPUS) {
5376 		int cpu, buf_size_same;
5377 		unsigned long size;
5378 
5379 		size = 0;
5380 		buf_size_same = 1;
5381 		/* check if all cpu sizes are same */
5382 		for_each_tracing_cpu(cpu) {
5383 			/* fill in the size from first enabled cpu */
5384 			if (size == 0)
5385 				size = per_cpu_ptr(tr->trace_buffer.data, cpu)->entries;
5386 			if (size != per_cpu_ptr(tr->trace_buffer.data, cpu)->entries) {
5387 				buf_size_same = 0;
5388 				break;
5389 			}
5390 		}
5391 
5392 		if (buf_size_same) {
5393 			if (!ring_buffer_expanded)
5394 				r = sprintf(buf, "%lu (expanded: %lu)\n",
5395 					    size >> 10,
5396 					    trace_buf_size >> 10);
5397 			else
5398 				r = sprintf(buf, "%lu\n", size >> 10);
5399 		} else
5400 			r = sprintf(buf, "X\n");
5401 	} else
5402 		r = sprintf(buf, "%lu\n", per_cpu_ptr(tr->trace_buffer.data, cpu)->entries >> 10);
5403 
5404 	mutex_unlock(&trace_types_lock);
5405 
5406 	ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
5407 	return ret;
5408 }
5409 
5410 static ssize_t
5411 tracing_entries_write(struct file *filp, const char __user *ubuf,
5412 		      size_t cnt, loff_t *ppos)
5413 {
5414 	struct inode *inode = file_inode(filp);
5415 	struct trace_array *tr = inode->i_private;
5416 	unsigned long val;
5417 	int ret;
5418 
5419 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
5420 	if (ret)
5421 		return ret;
5422 
5423 	/* must have at least 1 entry */
5424 	if (!val)
5425 		return -EINVAL;
5426 
5427 	/* value is in KB */
5428 	val <<= 10;
5429 	ret = tracing_resize_ring_buffer(tr, val, tracing_get_cpu(inode));
5430 	if (ret < 0)
5431 		return ret;
5432 
5433 	*ppos += cnt;
5434 
5435 	return cnt;
5436 }
5437 
5438 static ssize_t
5439 tracing_total_entries_read(struct file *filp, char __user *ubuf,
5440 				size_t cnt, loff_t *ppos)
5441 {
5442 	struct trace_array *tr = filp->private_data;
5443 	char buf[64];
5444 	int r, cpu;
5445 	unsigned long size = 0, expanded_size = 0;
5446 
5447 	mutex_lock(&trace_types_lock);
5448 	for_each_tracing_cpu(cpu) {
5449 		size += per_cpu_ptr(tr->trace_buffer.data, cpu)->entries >> 10;
5450 		if (!ring_buffer_expanded)
5451 			expanded_size += trace_buf_size >> 10;
5452 	}
5453 	if (ring_buffer_expanded)
5454 		r = sprintf(buf, "%lu\n", size);
5455 	else
5456 		r = sprintf(buf, "%lu (expanded: %lu)\n", size, expanded_size);
5457 	mutex_unlock(&trace_types_lock);
5458 
5459 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
5460 }
5461 
5462 static ssize_t
5463 tracing_free_buffer_write(struct file *filp, const char __user *ubuf,
5464 			  size_t cnt, loff_t *ppos)
5465 {
5466 	/*
5467 	 * There is no need to read what the user has written, this function
5468 	 * is just to make sure that there is no error when "echo" is used
5469 	 */
5470 
5471 	*ppos += cnt;
5472 
5473 	return cnt;
5474 }
5475 
5476 static int
5477 tracing_free_buffer_release(struct inode *inode, struct file *filp)
5478 {
5479 	struct trace_array *tr = inode->i_private;
5480 
5481 	/* disable tracing ? */
5482 	if (tr->trace_flags & TRACE_ITER_STOP_ON_FREE)
5483 		tracer_tracing_off(tr);
5484 	/* resize the ring buffer to 0 */
5485 	tracing_resize_ring_buffer(tr, 0, RING_BUFFER_ALL_CPUS);
5486 
5487 	trace_array_put(tr);
5488 
5489 	return 0;
5490 }
5491 
5492 static ssize_t
5493 tracing_mark_write(struct file *filp, const char __user *ubuf,
5494 					size_t cnt, loff_t *fpos)
5495 {
5496 	unsigned long addr = (unsigned long)ubuf;
5497 	struct trace_array *tr = filp->private_data;
5498 	struct ring_buffer_event *event;
5499 	struct ring_buffer *buffer;
5500 	struct print_entry *entry;
5501 	unsigned long irq_flags;
5502 	struct page *pages[2];
5503 	void *map_page[2];
5504 	int nr_pages = 1;
5505 	ssize_t written;
5506 	int offset;
5507 	int size;
5508 	int len;
5509 	int ret;
5510 	int i;
5511 
5512 	if (tracing_disabled)
5513 		return -EINVAL;
5514 
5515 	if (!(tr->trace_flags & TRACE_ITER_MARKERS))
5516 		return -EINVAL;
5517 
5518 	if (cnt > TRACE_BUF_SIZE)
5519 		cnt = TRACE_BUF_SIZE;
5520 
5521 	/*
5522 	 * Userspace is injecting traces into the kernel trace buffer.
5523 	 * We want to be as non intrusive as possible.
5524 	 * To do so, we do not want to allocate any special buffers
5525 	 * or take any locks, but instead write the userspace data
5526 	 * straight into the ring buffer.
5527 	 *
5528 	 * First we need to pin the userspace buffer into memory,
5529 	 * which, most likely it is, because it just referenced it.
5530 	 * But there's no guarantee that it is. By using get_user_pages_fast()
5531 	 * and kmap_atomic/kunmap_atomic() we can get access to the
5532 	 * pages directly. We then write the data directly into the
5533 	 * ring buffer.
5534 	 */
5535 	BUILD_BUG_ON(TRACE_BUF_SIZE >= PAGE_SIZE);
5536 
5537 	/* check if we cross pages */
5538 	if ((addr & PAGE_MASK) != ((addr + cnt) & PAGE_MASK))
5539 		nr_pages = 2;
5540 
5541 	offset = addr & (PAGE_SIZE - 1);
5542 	addr &= PAGE_MASK;
5543 
5544 	ret = get_user_pages_fast(addr, nr_pages, 0, pages);
5545 	if (ret < nr_pages) {
5546 		while (--ret >= 0)
5547 			put_page(pages[ret]);
5548 		written = -EFAULT;
5549 		goto out;
5550 	}
5551 
5552 	for (i = 0; i < nr_pages; i++)
5553 		map_page[i] = kmap_atomic(pages[i]);
5554 
5555 	local_save_flags(irq_flags);
5556 	size = sizeof(*entry) + cnt + 2; /* possible \n added */
5557 	buffer = tr->trace_buffer.buffer;
5558 	event = trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
5559 					  irq_flags, preempt_count());
5560 	if (!event) {
5561 		/* Ring buffer disabled, return as if not open for write */
5562 		written = -EBADF;
5563 		goto out_unlock;
5564 	}
5565 
5566 	entry = ring_buffer_event_data(event);
5567 	entry->ip = _THIS_IP_;
5568 
5569 	if (nr_pages == 2) {
5570 		len = PAGE_SIZE - offset;
5571 		memcpy(&entry->buf, map_page[0] + offset, len);
5572 		memcpy(&entry->buf[len], map_page[1], cnt - len);
5573 	} else
5574 		memcpy(&entry->buf, map_page[0] + offset, cnt);
5575 
5576 	if (entry->buf[cnt - 1] != '\n') {
5577 		entry->buf[cnt] = '\n';
5578 		entry->buf[cnt + 1] = '\0';
5579 	} else
5580 		entry->buf[cnt] = '\0';
5581 
5582 	__buffer_unlock_commit(buffer, event);
5583 
5584 	written = cnt;
5585 
5586 	*fpos += written;
5587 
5588  out_unlock:
5589 	for (i = nr_pages - 1; i >= 0; i--) {
5590 		kunmap_atomic(map_page[i]);
5591 		put_page(pages[i]);
5592 	}
5593  out:
5594 	return written;
5595 }
5596 
5597 static int tracing_clock_show(struct seq_file *m, void *v)
5598 {
5599 	struct trace_array *tr = m->private;
5600 	int i;
5601 
5602 	for (i = 0; i < ARRAY_SIZE(trace_clocks); i++)
5603 		seq_printf(m,
5604 			"%s%s%s%s", i ? " " : "",
5605 			i == tr->clock_id ? "[" : "", trace_clocks[i].name,
5606 			i == tr->clock_id ? "]" : "");
5607 	seq_putc(m, '\n');
5608 
5609 	return 0;
5610 }
5611 
5612 static int tracing_set_clock(struct trace_array *tr, const char *clockstr)
5613 {
5614 	int i;
5615 
5616 	for (i = 0; i < ARRAY_SIZE(trace_clocks); i++) {
5617 		if (strcmp(trace_clocks[i].name, clockstr) == 0)
5618 			break;
5619 	}
5620 	if (i == ARRAY_SIZE(trace_clocks))
5621 		return -EINVAL;
5622 
5623 	mutex_lock(&trace_types_lock);
5624 
5625 	tr->clock_id = i;
5626 
5627 	ring_buffer_set_clock(tr->trace_buffer.buffer, trace_clocks[i].func);
5628 
5629 	/*
5630 	 * New clock may not be consistent with the previous clock.
5631 	 * Reset the buffer so that it doesn't have incomparable timestamps.
5632 	 */
5633 	tracing_reset_online_cpus(&tr->trace_buffer);
5634 
5635 #ifdef CONFIG_TRACER_MAX_TRACE
5636 	if (tr->flags & TRACE_ARRAY_FL_GLOBAL && tr->max_buffer.buffer)
5637 		ring_buffer_set_clock(tr->max_buffer.buffer, trace_clocks[i].func);
5638 	tracing_reset_online_cpus(&tr->max_buffer);
5639 #endif
5640 
5641 	mutex_unlock(&trace_types_lock);
5642 
5643 	return 0;
5644 }
5645 
5646 static ssize_t tracing_clock_write(struct file *filp, const char __user *ubuf,
5647 				   size_t cnt, loff_t *fpos)
5648 {
5649 	struct seq_file *m = filp->private_data;
5650 	struct trace_array *tr = m->private;
5651 	char buf[64];
5652 	const char *clockstr;
5653 	int ret;
5654 
5655 	if (cnt >= sizeof(buf))
5656 		return -EINVAL;
5657 
5658 	if (copy_from_user(buf, ubuf, cnt))
5659 		return -EFAULT;
5660 
5661 	buf[cnt] = 0;
5662 
5663 	clockstr = strstrip(buf);
5664 
5665 	ret = tracing_set_clock(tr, clockstr);
5666 	if (ret)
5667 		return ret;
5668 
5669 	*fpos += cnt;
5670 
5671 	return cnt;
5672 }
5673 
5674 static int tracing_clock_open(struct inode *inode, struct file *file)
5675 {
5676 	struct trace_array *tr = inode->i_private;
5677 	int ret;
5678 
5679 	if (tracing_disabled)
5680 		return -ENODEV;
5681 
5682 	if (trace_array_get(tr))
5683 		return -ENODEV;
5684 
5685 	ret = single_open(file, tracing_clock_show, inode->i_private);
5686 	if (ret < 0)
5687 		trace_array_put(tr);
5688 
5689 	return ret;
5690 }
5691 
5692 struct ftrace_buffer_info {
5693 	struct trace_iterator	iter;
5694 	void			*spare;
5695 	unsigned int		read;
5696 };
5697 
5698 #ifdef CONFIG_TRACER_SNAPSHOT
5699 static int tracing_snapshot_open(struct inode *inode, struct file *file)
5700 {
5701 	struct trace_array *tr = inode->i_private;
5702 	struct trace_iterator *iter;
5703 	struct seq_file *m;
5704 	int ret = 0;
5705 
5706 	if (trace_array_get(tr) < 0)
5707 		return -ENODEV;
5708 
5709 	if (file->f_mode & FMODE_READ) {
5710 		iter = __tracing_open(inode, file, true);
5711 		if (IS_ERR(iter))
5712 			ret = PTR_ERR(iter);
5713 	} else {
5714 		/* Writes still need the seq_file to hold the private data */
5715 		ret = -ENOMEM;
5716 		m = kzalloc(sizeof(*m), GFP_KERNEL);
5717 		if (!m)
5718 			goto out;
5719 		iter = kzalloc(sizeof(*iter), GFP_KERNEL);
5720 		if (!iter) {
5721 			kfree(m);
5722 			goto out;
5723 		}
5724 		ret = 0;
5725 
5726 		iter->tr = tr;
5727 		iter->trace_buffer = &tr->max_buffer;
5728 		iter->cpu_file = tracing_get_cpu(inode);
5729 		m->private = iter;
5730 		file->private_data = m;
5731 	}
5732 out:
5733 	if (ret < 0)
5734 		trace_array_put(tr);
5735 
5736 	return ret;
5737 }
5738 
5739 static ssize_t
5740 tracing_snapshot_write(struct file *filp, const char __user *ubuf, size_t cnt,
5741 		       loff_t *ppos)
5742 {
5743 	struct seq_file *m = filp->private_data;
5744 	struct trace_iterator *iter = m->private;
5745 	struct trace_array *tr = iter->tr;
5746 	unsigned long val;
5747 	int ret;
5748 
5749 	ret = tracing_update_buffers();
5750 	if (ret < 0)
5751 		return ret;
5752 
5753 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
5754 	if (ret)
5755 		return ret;
5756 
5757 	mutex_lock(&trace_types_lock);
5758 
5759 	if (tr->current_trace->use_max_tr) {
5760 		ret = -EBUSY;
5761 		goto out;
5762 	}
5763 
5764 	switch (val) {
5765 	case 0:
5766 		if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
5767 			ret = -EINVAL;
5768 			break;
5769 		}
5770 		if (tr->allocated_snapshot)
5771 			free_snapshot(tr);
5772 		break;
5773 	case 1:
5774 /* Only allow per-cpu swap if the ring buffer supports it */
5775 #ifndef CONFIG_RING_BUFFER_ALLOW_SWAP
5776 		if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
5777 			ret = -EINVAL;
5778 			break;
5779 		}
5780 #endif
5781 		if (!tr->allocated_snapshot) {
5782 			ret = alloc_snapshot(tr);
5783 			if (ret < 0)
5784 				break;
5785 		}
5786 		local_irq_disable();
5787 		/* Now, we're going to swap */
5788 		if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
5789 			update_max_tr(tr, current, smp_processor_id());
5790 		else
5791 			update_max_tr_single(tr, current, iter->cpu_file);
5792 		local_irq_enable();
5793 		break;
5794 	default:
5795 		if (tr->allocated_snapshot) {
5796 			if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
5797 				tracing_reset_online_cpus(&tr->max_buffer);
5798 			else
5799 				tracing_reset(&tr->max_buffer, iter->cpu_file);
5800 		}
5801 		break;
5802 	}
5803 
5804 	if (ret >= 0) {
5805 		*ppos += cnt;
5806 		ret = cnt;
5807 	}
5808 out:
5809 	mutex_unlock(&trace_types_lock);
5810 	return ret;
5811 }
5812 
5813 static int tracing_snapshot_release(struct inode *inode, struct file *file)
5814 {
5815 	struct seq_file *m = file->private_data;
5816 	int ret;
5817 
5818 	ret = tracing_release(inode, file);
5819 
5820 	if (file->f_mode & FMODE_READ)
5821 		return ret;
5822 
5823 	/* If write only, the seq_file is just a stub */
5824 	if (m)
5825 		kfree(m->private);
5826 	kfree(m);
5827 
5828 	return 0;
5829 }
5830 
5831 static int tracing_buffers_open(struct inode *inode, struct file *filp);
5832 static ssize_t tracing_buffers_read(struct file *filp, char __user *ubuf,
5833 				    size_t count, loff_t *ppos);
5834 static int tracing_buffers_release(struct inode *inode, struct file *file);
5835 static ssize_t tracing_buffers_splice_read(struct file *file, loff_t *ppos,
5836 		   struct pipe_inode_info *pipe, size_t len, unsigned int flags);
5837 
5838 static int snapshot_raw_open(struct inode *inode, struct file *filp)
5839 {
5840 	struct ftrace_buffer_info *info;
5841 	int ret;
5842 
5843 	ret = tracing_buffers_open(inode, filp);
5844 	if (ret < 0)
5845 		return ret;
5846 
5847 	info = filp->private_data;
5848 
5849 	if (info->iter.trace->use_max_tr) {
5850 		tracing_buffers_release(inode, filp);
5851 		return -EBUSY;
5852 	}
5853 
5854 	info->iter.snapshot = true;
5855 	info->iter.trace_buffer = &info->iter.tr->max_buffer;
5856 
5857 	return ret;
5858 }
5859 
5860 #endif /* CONFIG_TRACER_SNAPSHOT */
5861 
5862 
5863 static const struct file_operations tracing_thresh_fops = {
5864 	.open		= tracing_open_generic,
5865 	.read		= tracing_thresh_read,
5866 	.write		= tracing_thresh_write,
5867 	.llseek		= generic_file_llseek,
5868 };
5869 
5870 #ifdef CONFIG_TRACER_MAX_TRACE
5871 static const struct file_operations tracing_max_lat_fops = {
5872 	.open		= tracing_open_generic,
5873 	.read		= tracing_max_lat_read,
5874 	.write		= tracing_max_lat_write,
5875 	.llseek		= generic_file_llseek,
5876 };
5877 #endif
5878 
5879 static const struct file_operations set_tracer_fops = {
5880 	.open		= tracing_open_generic,
5881 	.read		= tracing_set_trace_read,
5882 	.write		= tracing_set_trace_write,
5883 	.llseek		= generic_file_llseek,
5884 };
5885 
5886 static const struct file_operations tracing_pipe_fops = {
5887 	.open		= tracing_open_pipe,
5888 	.poll		= tracing_poll_pipe,
5889 	.read		= tracing_read_pipe,
5890 	.splice_read	= tracing_splice_read_pipe,
5891 	.release	= tracing_release_pipe,
5892 	.llseek		= no_llseek,
5893 };
5894 
5895 static const struct file_operations tracing_entries_fops = {
5896 	.open		= tracing_open_generic_tr,
5897 	.read		= tracing_entries_read,
5898 	.write		= tracing_entries_write,
5899 	.llseek		= generic_file_llseek,
5900 	.release	= tracing_release_generic_tr,
5901 };
5902 
5903 static const struct file_operations tracing_total_entries_fops = {
5904 	.open		= tracing_open_generic_tr,
5905 	.read		= tracing_total_entries_read,
5906 	.llseek		= generic_file_llseek,
5907 	.release	= tracing_release_generic_tr,
5908 };
5909 
5910 static const struct file_operations tracing_free_buffer_fops = {
5911 	.open		= tracing_open_generic_tr,
5912 	.write		= tracing_free_buffer_write,
5913 	.release	= tracing_free_buffer_release,
5914 };
5915 
5916 static const struct file_operations tracing_mark_fops = {
5917 	.open		= tracing_open_generic_tr,
5918 	.write		= tracing_mark_write,
5919 	.llseek		= generic_file_llseek,
5920 	.release	= tracing_release_generic_tr,
5921 };
5922 
5923 static const struct file_operations trace_clock_fops = {
5924 	.open		= tracing_clock_open,
5925 	.read		= seq_read,
5926 	.llseek		= seq_lseek,
5927 	.release	= tracing_single_release_tr,
5928 	.write		= tracing_clock_write,
5929 };
5930 
5931 #ifdef CONFIG_TRACER_SNAPSHOT
5932 static const struct file_operations snapshot_fops = {
5933 	.open		= tracing_snapshot_open,
5934 	.read		= seq_read,
5935 	.write		= tracing_snapshot_write,
5936 	.llseek		= tracing_lseek,
5937 	.release	= tracing_snapshot_release,
5938 };
5939 
5940 static const struct file_operations snapshot_raw_fops = {
5941 	.open		= snapshot_raw_open,
5942 	.read		= tracing_buffers_read,
5943 	.release	= tracing_buffers_release,
5944 	.splice_read	= tracing_buffers_splice_read,
5945 	.llseek		= no_llseek,
5946 };
5947 
5948 #endif /* CONFIG_TRACER_SNAPSHOT */
5949 
5950 static int tracing_buffers_open(struct inode *inode, struct file *filp)
5951 {
5952 	struct trace_array *tr = inode->i_private;
5953 	struct ftrace_buffer_info *info;
5954 	int ret;
5955 
5956 	if (tracing_disabled)
5957 		return -ENODEV;
5958 
5959 	if (trace_array_get(tr) < 0)
5960 		return -ENODEV;
5961 
5962 	info = kzalloc(sizeof(*info), GFP_KERNEL);
5963 	if (!info) {
5964 		trace_array_put(tr);
5965 		return -ENOMEM;
5966 	}
5967 
5968 	mutex_lock(&trace_types_lock);
5969 
5970 	info->iter.tr		= tr;
5971 	info->iter.cpu_file	= tracing_get_cpu(inode);
5972 	info->iter.trace	= tr->current_trace;
5973 	info->iter.trace_buffer = &tr->trace_buffer;
5974 	info->spare		= NULL;
5975 	/* Force reading ring buffer for first read */
5976 	info->read		= (unsigned int)-1;
5977 
5978 	filp->private_data = info;
5979 
5980 	tr->current_trace->ref++;
5981 
5982 	mutex_unlock(&trace_types_lock);
5983 
5984 	ret = nonseekable_open(inode, filp);
5985 	if (ret < 0)
5986 		trace_array_put(tr);
5987 
5988 	return ret;
5989 }
5990 
5991 static unsigned int
5992 tracing_buffers_poll(struct file *filp, poll_table *poll_table)
5993 {
5994 	struct ftrace_buffer_info *info = filp->private_data;
5995 	struct trace_iterator *iter = &info->iter;
5996 
5997 	return trace_poll(iter, filp, poll_table);
5998 }
5999 
6000 static ssize_t
6001 tracing_buffers_read(struct file *filp, char __user *ubuf,
6002 		     size_t count, loff_t *ppos)
6003 {
6004 	struct ftrace_buffer_info *info = filp->private_data;
6005 	struct trace_iterator *iter = &info->iter;
6006 	ssize_t ret;
6007 	ssize_t size;
6008 
6009 	if (!count)
6010 		return 0;
6011 
6012 #ifdef CONFIG_TRACER_MAX_TRACE
6013 	if (iter->snapshot && iter->tr->current_trace->use_max_tr)
6014 		return -EBUSY;
6015 #endif
6016 
6017 	if (!info->spare)
6018 		info->spare = ring_buffer_alloc_read_page(iter->trace_buffer->buffer,
6019 							  iter->cpu_file);
6020 	if (!info->spare)
6021 		return -ENOMEM;
6022 
6023 	/* Do we have previous read data to read? */
6024 	if (info->read < PAGE_SIZE)
6025 		goto read;
6026 
6027  again:
6028 	trace_access_lock(iter->cpu_file);
6029 	ret = ring_buffer_read_page(iter->trace_buffer->buffer,
6030 				    &info->spare,
6031 				    count,
6032 				    iter->cpu_file, 0);
6033 	trace_access_unlock(iter->cpu_file);
6034 
6035 	if (ret < 0) {
6036 		if (trace_empty(iter)) {
6037 			if ((filp->f_flags & O_NONBLOCK))
6038 				return -EAGAIN;
6039 
6040 			ret = wait_on_pipe(iter, false);
6041 			if (ret)
6042 				return ret;
6043 
6044 			goto again;
6045 		}
6046 		return 0;
6047 	}
6048 
6049 	info->read = 0;
6050  read:
6051 	size = PAGE_SIZE - info->read;
6052 	if (size > count)
6053 		size = count;
6054 
6055 	ret = copy_to_user(ubuf, info->spare + info->read, size);
6056 	if (ret == size)
6057 		return -EFAULT;
6058 
6059 	size -= ret;
6060 
6061 	*ppos += size;
6062 	info->read += size;
6063 
6064 	return size;
6065 }
6066 
6067 static int tracing_buffers_release(struct inode *inode, struct file *file)
6068 {
6069 	struct ftrace_buffer_info *info = file->private_data;
6070 	struct trace_iterator *iter = &info->iter;
6071 
6072 	mutex_lock(&trace_types_lock);
6073 
6074 	iter->tr->current_trace->ref--;
6075 
6076 	__trace_array_put(iter->tr);
6077 
6078 	if (info->spare)
6079 		ring_buffer_free_read_page(iter->trace_buffer->buffer, info->spare);
6080 	kfree(info);
6081 
6082 	mutex_unlock(&trace_types_lock);
6083 
6084 	return 0;
6085 }
6086 
6087 struct buffer_ref {
6088 	struct ring_buffer	*buffer;
6089 	void			*page;
6090 	int			ref;
6091 };
6092 
6093 static void buffer_pipe_buf_release(struct pipe_inode_info *pipe,
6094 				    struct pipe_buffer *buf)
6095 {
6096 	struct buffer_ref *ref = (struct buffer_ref *)buf->private;
6097 
6098 	if (--ref->ref)
6099 		return;
6100 
6101 	ring_buffer_free_read_page(ref->buffer, ref->page);
6102 	kfree(ref);
6103 	buf->private = 0;
6104 }
6105 
6106 static void buffer_pipe_buf_get(struct pipe_inode_info *pipe,
6107 				struct pipe_buffer *buf)
6108 {
6109 	struct buffer_ref *ref = (struct buffer_ref *)buf->private;
6110 
6111 	ref->ref++;
6112 }
6113 
6114 /* Pipe buffer operations for a buffer. */
6115 static const struct pipe_buf_operations buffer_pipe_buf_ops = {
6116 	.can_merge		= 0,
6117 	.confirm		= generic_pipe_buf_confirm,
6118 	.release		= buffer_pipe_buf_release,
6119 	.steal			= generic_pipe_buf_steal,
6120 	.get			= buffer_pipe_buf_get,
6121 };
6122 
6123 /*
6124  * Callback from splice_to_pipe(), if we need to release some pages
6125  * at the end of the spd in case we error'ed out in filling the pipe.
6126  */
6127 static void buffer_spd_release(struct splice_pipe_desc *spd, unsigned int i)
6128 {
6129 	struct buffer_ref *ref =
6130 		(struct buffer_ref *)spd->partial[i].private;
6131 
6132 	if (--ref->ref)
6133 		return;
6134 
6135 	ring_buffer_free_read_page(ref->buffer, ref->page);
6136 	kfree(ref);
6137 	spd->partial[i].private = 0;
6138 }
6139 
6140 static ssize_t
6141 tracing_buffers_splice_read(struct file *file, loff_t *ppos,
6142 			    struct pipe_inode_info *pipe, size_t len,
6143 			    unsigned int flags)
6144 {
6145 	struct ftrace_buffer_info *info = file->private_data;
6146 	struct trace_iterator *iter = &info->iter;
6147 	struct partial_page partial_def[PIPE_DEF_BUFFERS];
6148 	struct page *pages_def[PIPE_DEF_BUFFERS];
6149 	struct splice_pipe_desc spd = {
6150 		.pages		= pages_def,
6151 		.partial	= partial_def,
6152 		.nr_pages_max	= PIPE_DEF_BUFFERS,
6153 		.flags		= flags,
6154 		.ops		= &buffer_pipe_buf_ops,
6155 		.spd_release	= buffer_spd_release,
6156 	};
6157 	struct buffer_ref *ref;
6158 	int entries, size, i;
6159 	ssize_t ret = 0;
6160 
6161 #ifdef CONFIG_TRACER_MAX_TRACE
6162 	if (iter->snapshot && iter->tr->current_trace->use_max_tr)
6163 		return -EBUSY;
6164 #endif
6165 
6166 	if (splice_grow_spd(pipe, &spd))
6167 		return -ENOMEM;
6168 
6169 	if (*ppos & (PAGE_SIZE - 1))
6170 		return -EINVAL;
6171 
6172 	if (len & (PAGE_SIZE - 1)) {
6173 		if (len < PAGE_SIZE)
6174 			return -EINVAL;
6175 		len &= PAGE_MASK;
6176 	}
6177 
6178  again:
6179 	trace_access_lock(iter->cpu_file);
6180 	entries = ring_buffer_entries_cpu(iter->trace_buffer->buffer, iter->cpu_file);
6181 
6182 	for (i = 0; i < spd.nr_pages_max && len && entries; i++, len -= PAGE_SIZE) {
6183 		struct page *page;
6184 		int r;
6185 
6186 		ref = kzalloc(sizeof(*ref), GFP_KERNEL);
6187 		if (!ref) {
6188 			ret = -ENOMEM;
6189 			break;
6190 		}
6191 
6192 		ref->ref = 1;
6193 		ref->buffer = iter->trace_buffer->buffer;
6194 		ref->page = ring_buffer_alloc_read_page(ref->buffer, iter->cpu_file);
6195 		if (!ref->page) {
6196 			ret = -ENOMEM;
6197 			kfree(ref);
6198 			break;
6199 		}
6200 
6201 		r = ring_buffer_read_page(ref->buffer, &ref->page,
6202 					  len, iter->cpu_file, 1);
6203 		if (r < 0) {
6204 			ring_buffer_free_read_page(ref->buffer, ref->page);
6205 			kfree(ref);
6206 			break;
6207 		}
6208 
6209 		/*
6210 		 * zero out any left over data, this is going to
6211 		 * user land.
6212 		 */
6213 		size = ring_buffer_page_len(ref->page);
6214 		if (size < PAGE_SIZE)
6215 			memset(ref->page + size, 0, PAGE_SIZE - size);
6216 
6217 		page = virt_to_page(ref->page);
6218 
6219 		spd.pages[i] = page;
6220 		spd.partial[i].len = PAGE_SIZE;
6221 		spd.partial[i].offset = 0;
6222 		spd.partial[i].private = (unsigned long)ref;
6223 		spd.nr_pages++;
6224 		*ppos += PAGE_SIZE;
6225 
6226 		entries = ring_buffer_entries_cpu(iter->trace_buffer->buffer, iter->cpu_file);
6227 	}
6228 
6229 	trace_access_unlock(iter->cpu_file);
6230 	spd.nr_pages = i;
6231 
6232 	/* did we read anything? */
6233 	if (!spd.nr_pages) {
6234 		if (ret)
6235 			return ret;
6236 
6237 		if ((file->f_flags & O_NONBLOCK) || (flags & SPLICE_F_NONBLOCK))
6238 			return -EAGAIN;
6239 
6240 		ret = wait_on_pipe(iter, true);
6241 		if (ret)
6242 			return ret;
6243 
6244 		goto again;
6245 	}
6246 
6247 	ret = splice_to_pipe(pipe, &spd);
6248 	splice_shrink_spd(&spd);
6249 
6250 	return ret;
6251 }
6252 
6253 static const struct file_operations tracing_buffers_fops = {
6254 	.open		= tracing_buffers_open,
6255 	.read		= tracing_buffers_read,
6256 	.poll		= tracing_buffers_poll,
6257 	.release	= tracing_buffers_release,
6258 	.splice_read	= tracing_buffers_splice_read,
6259 	.llseek		= no_llseek,
6260 };
6261 
6262 static ssize_t
6263 tracing_stats_read(struct file *filp, char __user *ubuf,
6264 		   size_t count, loff_t *ppos)
6265 {
6266 	struct inode *inode = file_inode(filp);
6267 	struct trace_array *tr = inode->i_private;
6268 	struct trace_buffer *trace_buf = &tr->trace_buffer;
6269 	int cpu = tracing_get_cpu(inode);
6270 	struct trace_seq *s;
6271 	unsigned long cnt;
6272 	unsigned long long t;
6273 	unsigned long usec_rem;
6274 
6275 	s = kmalloc(sizeof(*s), GFP_KERNEL);
6276 	if (!s)
6277 		return -ENOMEM;
6278 
6279 	trace_seq_init(s);
6280 
6281 	cnt = ring_buffer_entries_cpu(trace_buf->buffer, cpu);
6282 	trace_seq_printf(s, "entries: %ld\n", cnt);
6283 
6284 	cnt = ring_buffer_overrun_cpu(trace_buf->buffer, cpu);
6285 	trace_seq_printf(s, "overrun: %ld\n", cnt);
6286 
6287 	cnt = ring_buffer_commit_overrun_cpu(trace_buf->buffer, cpu);
6288 	trace_seq_printf(s, "commit overrun: %ld\n", cnt);
6289 
6290 	cnt = ring_buffer_bytes_cpu(trace_buf->buffer, cpu);
6291 	trace_seq_printf(s, "bytes: %ld\n", cnt);
6292 
6293 	if (trace_clocks[tr->clock_id].in_ns) {
6294 		/* local or global for trace_clock */
6295 		t = ns2usecs(ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
6296 		usec_rem = do_div(t, USEC_PER_SEC);
6297 		trace_seq_printf(s, "oldest event ts: %5llu.%06lu\n",
6298 								t, usec_rem);
6299 
6300 		t = ns2usecs(ring_buffer_time_stamp(trace_buf->buffer, cpu));
6301 		usec_rem = do_div(t, USEC_PER_SEC);
6302 		trace_seq_printf(s, "now ts: %5llu.%06lu\n", t, usec_rem);
6303 	} else {
6304 		/* counter or tsc mode for trace_clock */
6305 		trace_seq_printf(s, "oldest event ts: %llu\n",
6306 				ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
6307 
6308 		trace_seq_printf(s, "now ts: %llu\n",
6309 				ring_buffer_time_stamp(trace_buf->buffer, cpu));
6310 	}
6311 
6312 	cnt = ring_buffer_dropped_events_cpu(trace_buf->buffer, cpu);
6313 	trace_seq_printf(s, "dropped events: %ld\n", cnt);
6314 
6315 	cnt = ring_buffer_read_events_cpu(trace_buf->buffer, cpu);
6316 	trace_seq_printf(s, "read events: %ld\n", cnt);
6317 
6318 	count = simple_read_from_buffer(ubuf, count, ppos,
6319 					s->buffer, trace_seq_used(s));
6320 
6321 	kfree(s);
6322 
6323 	return count;
6324 }
6325 
6326 static const struct file_operations tracing_stats_fops = {
6327 	.open		= tracing_open_generic_tr,
6328 	.read		= tracing_stats_read,
6329 	.llseek		= generic_file_llseek,
6330 	.release	= tracing_release_generic_tr,
6331 };
6332 
6333 #ifdef CONFIG_DYNAMIC_FTRACE
6334 
6335 int __weak ftrace_arch_read_dyn_info(char *buf, int size)
6336 {
6337 	return 0;
6338 }
6339 
6340 static ssize_t
6341 tracing_read_dyn_info(struct file *filp, char __user *ubuf,
6342 		  size_t cnt, loff_t *ppos)
6343 {
6344 	static char ftrace_dyn_info_buffer[1024];
6345 	static DEFINE_MUTEX(dyn_info_mutex);
6346 	unsigned long *p = filp->private_data;
6347 	char *buf = ftrace_dyn_info_buffer;
6348 	int size = ARRAY_SIZE(ftrace_dyn_info_buffer);
6349 	int r;
6350 
6351 	mutex_lock(&dyn_info_mutex);
6352 	r = sprintf(buf, "%ld ", *p);
6353 
6354 	r += ftrace_arch_read_dyn_info(buf+r, (size-1)-r);
6355 	buf[r++] = '\n';
6356 
6357 	r = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6358 
6359 	mutex_unlock(&dyn_info_mutex);
6360 
6361 	return r;
6362 }
6363 
6364 static const struct file_operations tracing_dyn_info_fops = {
6365 	.open		= tracing_open_generic,
6366 	.read		= tracing_read_dyn_info,
6367 	.llseek		= generic_file_llseek,
6368 };
6369 #endif /* CONFIG_DYNAMIC_FTRACE */
6370 
6371 #if defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE)
6372 static void
6373 ftrace_snapshot(unsigned long ip, unsigned long parent_ip, void **data)
6374 {
6375 	tracing_snapshot();
6376 }
6377 
6378 static void
6379 ftrace_count_snapshot(unsigned long ip, unsigned long parent_ip, void **data)
6380 {
6381 	unsigned long *count = (long *)data;
6382 
6383 	if (!*count)
6384 		return;
6385 
6386 	if (*count != -1)
6387 		(*count)--;
6388 
6389 	tracing_snapshot();
6390 }
6391 
6392 static int
6393 ftrace_snapshot_print(struct seq_file *m, unsigned long ip,
6394 		      struct ftrace_probe_ops *ops, void *data)
6395 {
6396 	long count = (long)data;
6397 
6398 	seq_printf(m, "%ps:", (void *)ip);
6399 
6400 	seq_puts(m, "snapshot");
6401 
6402 	if (count == -1)
6403 		seq_puts(m, ":unlimited\n");
6404 	else
6405 		seq_printf(m, ":count=%ld\n", count);
6406 
6407 	return 0;
6408 }
6409 
6410 static struct ftrace_probe_ops snapshot_probe_ops = {
6411 	.func			= ftrace_snapshot,
6412 	.print			= ftrace_snapshot_print,
6413 };
6414 
6415 static struct ftrace_probe_ops snapshot_count_probe_ops = {
6416 	.func			= ftrace_count_snapshot,
6417 	.print			= ftrace_snapshot_print,
6418 };
6419 
6420 static int
6421 ftrace_trace_snapshot_callback(struct ftrace_hash *hash,
6422 			       char *glob, char *cmd, char *param, int enable)
6423 {
6424 	struct ftrace_probe_ops *ops;
6425 	void *count = (void *)-1;
6426 	char *number;
6427 	int ret;
6428 
6429 	/* hash funcs only work with set_ftrace_filter */
6430 	if (!enable)
6431 		return -EINVAL;
6432 
6433 	ops = param ? &snapshot_count_probe_ops :  &snapshot_probe_ops;
6434 
6435 	if (glob[0] == '!') {
6436 		unregister_ftrace_function_probe_func(glob+1, ops);
6437 		return 0;
6438 	}
6439 
6440 	if (!param)
6441 		goto out_reg;
6442 
6443 	number = strsep(&param, ":");
6444 
6445 	if (!strlen(number))
6446 		goto out_reg;
6447 
6448 	/*
6449 	 * We use the callback data field (which is a pointer)
6450 	 * as our counter.
6451 	 */
6452 	ret = kstrtoul(number, 0, (unsigned long *)&count);
6453 	if (ret)
6454 		return ret;
6455 
6456  out_reg:
6457 	ret = register_ftrace_function_probe(glob, ops, count);
6458 
6459 	if (ret >= 0)
6460 		alloc_snapshot(&global_trace);
6461 
6462 	return ret < 0 ? ret : 0;
6463 }
6464 
6465 static struct ftrace_func_command ftrace_snapshot_cmd = {
6466 	.name			= "snapshot",
6467 	.func			= ftrace_trace_snapshot_callback,
6468 };
6469 
6470 static __init int register_snapshot_cmd(void)
6471 {
6472 	return register_ftrace_command(&ftrace_snapshot_cmd);
6473 }
6474 #else
6475 static inline __init int register_snapshot_cmd(void) { return 0; }
6476 #endif /* defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE) */
6477 
6478 static struct dentry *tracing_get_dentry(struct trace_array *tr)
6479 {
6480 	if (WARN_ON(!tr->dir))
6481 		return ERR_PTR(-ENODEV);
6482 
6483 	/* Top directory uses NULL as the parent */
6484 	if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
6485 		return NULL;
6486 
6487 	/* All sub buffers have a descriptor */
6488 	return tr->dir;
6489 }
6490 
6491 static struct dentry *tracing_dentry_percpu(struct trace_array *tr, int cpu)
6492 {
6493 	struct dentry *d_tracer;
6494 
6495 	if (tr->percpu_dir)
6496 		return tr->percpu_dir;
6497 
6498 	d_tracer = tracing_get_dentry(tr);
6499 	if (IS_ERR(d_tracer))
6500 		return NULL;
6501 
6502 	tr->percpu_dir = tracefs_create_dir("per_cpu", d_tracer);
6503 
6504 	WARN_ONCE(!tr->percpu_dir,
6505 		  "Could not create tracefs directory 'per_cpu/%d'\n", cpu);
6506 
6507 	return tr->percpu_dir;
6508 }
6509 
6510 static struct dentry *
6511 trace_create_cpu_file(const char *name, umode_t mode, struct dentry *parent,
6512 		      void *data, long cpu, const struct file_operations *fops)
6513 {
6514 	struct dentry *ret = trace_create_file(name, mode, parent, data, fops);
6515 
6516 	if (ret) /* See tracing_get_cpu() */
6517 		d_inode(ret)->i_cdev = (void *)(cpu + 1);
6518 	return ret;
6519 }
6520 
6521 static void
6522 tracing_init_tracefs_percpu(struct trace_array *tr, long cpu)
6523 {
6524 	struct dentry *d_percpu = tracing_dentry_percpu(tr, cpu);
6525 	struct dentry *d_cpu;
6526 	char cpu_dir[30]; /* 30 characters should be more than enough */
6527 
6528 	if (!d_percpu)
6529 		return;
6530 
6531 	snprintf(cpu_dir, 30, "cpu%ld", cpu);
6532 	d_cpu = tracefs_create_dir(cpu_dir, d_percpu);
6533 	if (!d_cpu) {
6534 		pr_warn("Could not create tracefs '%s' entry\n", cpu_dir);
6535 		return;
6536 	}
6537 
6538 	/* per cpu trace_pipe */
6539 	trace_create_cpu_file("trace_pipe", 0444, d_cpu,
6540 				tr, cpu, &tracing_pipe_fops);
6541 
6542 	/* per cpu trace */
6543 	trace_create_cpu_file("trace", 0644, d_cpu,
6544 				tr, cpu, &tracing_fops);
6545 
6546 	trace_create_cpu_file("trace_pipe_raw", 0444, d_cpu,
6547 				tr, cpu, &tracing_buffers_fops);
6548 
6549 	trace_create_cpu_file("stats", 0444, d_cpu,
6550 				tr, cpu, &tracing_stats_fops);
6551 
6552 	trace_create_cpu_file("buffer_size_kb", 0444, d_cpu,
6553 				tr, cpu, &tracing_entries_fops);
6554 
6555 #ifdef CONFIG_TRACER_SNAPSHOT
6556 	trace_create_cpu_file("snapshot", 0644, d_cpu,
6557 				tr, cpu, &snapshot_fops);
6558 
6559 	trace_create_cpu_file("snapshot_raw", 0444, d_cpu,
6560 				tr, cpu, &snapshot_raw_fops);
6561 #endif
6562 }
6563 
6564 #ifdef CONFIG_FTRACE_SELFTEST
6565 /* Let selftest have access to static functions in this file */
6566 #include "trace_selftest.c"
6567 #endif
6568 
6569 static ssize_t
6570 trace_options_read(struct file *filp, char __user *ubuf, size_t cnt,
6571 			loff_t *ppos)
6572 {
6573 	struct trace_option_dentry *topt = filp->private_data;
6574 	char *buf;
6575 
6576 	if (topt->flags->val & topt->opt->bit)
6577 		buf = "1\n";
6578 	else
6579 		buf = "0\n";
6580 
6581 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
6582 }
6583 
6584 static ssize_t
6585 trace_options_write(struct file *filp, const char __user *ubuf, size_t cnt,
6586 			 loff_t *ppos)
6587 {
6588 	struct trace_option_dentry *topt = filp->private_data;
6589 	unsigned long val;
6590 	int ret;
6591 
6592 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
6593 	if (ret)
6594 		return ret;
6595 
6596 	if (val != 0 && val != 1)
6597 		return -EINVAL;
6598 
6599 	if (!!(topt->flags->val & topt->opt->bit) != val) {
6600 		mutex_lock(&trace_types_lock);
6601 		ret = __set_tracer_option(topt->tr, topt->flags,
6602 					  topt->opt, !val);
6603 		mutex_unlock(&trace_types_lock);
6604 		if (ret)
6605 			return ret;
6606 	}
6607 
6608 	*ppos += cnt;
6609 
6610 	return cnt;
6611 }
6612 
6613 
6614 static const struct file_operations trace_options_fops = {
6615 	.open = tracing_open_generic,
6616 	.read = trace_options_read,
6617 	.write = trace_options_write,
6618 	.llseek	= generic_file_llseek,
6619 };
6620 
6621 /*
6622  * In order to pass in both the trace_array descriptor as well as the index
6623  * to the flag that the trace option file represents, the trace_array
6624  * has a character array of trace_flags_index[], which holds the index
6625  * of the bit for the flag it represents. index[0] == 0, index[1] == 1, etc.
6626  * The address of this character array is passed to the flag option file
6627  * read/write callbacks.
6628  *
6629  * In order to extract both the index and the trace_array descriptor,
6630  * get_tr_index() uses the following algorithm.
6631  *
6632  *   idx = *ptr;
6633  *
6634  * As the pointer itself contains the address of the index (remember
6635  * index[1] == 1).
6636  *
6637  * Then to get the trace_array descriptor, by subtracting that index
6638  * from the ptr, we get to the start of the index itself.
6639  *
6640  *   ptr - idx == &index[0]
6641  *
6642  * Then a simple container_of() from that pointer gets us to the
6643  * trace_array descriptor.
6644  */
6645 static void get_tr_index(void *data, struct trace_array **ptr,
6646 			 unsigned int *pindex)
6647 {
6648 	*pindex = *(unsigned char *)data;
6649 
6650 	*ptr = container_of(data - *pindex, struct trace_array,
6651 			    trace_flags_index);
6652 }
6653 
6654 static ssize_t
6655 trace_options_core_read(struct file *filp, char __user *ubuf, size_t cnt,
6656 			loff_t *ppos)
6657 {
6658 	void *tr_index = filp->private_data;
6659 	struct trace_array *tr;
6660 	unsigned int index;
6661 	char *buf;
6662 
6663 	get_tr_index(tr_index, &tr, &index);
6664 
6665 	if (tr->trace_flags & (1 << index))
6666 		buf = "1\n";
6667 	else
6668 		buf = "0\n";
6669 
6670 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
6671 }
6672 
6673 static ssize_t
6674 trace_options_core_write(struct file *filp, const char __user *ubuf, size_t cnt,
6675 			 loff_t *ppos)
6676 {
6677 	void *tr_index = filp->private_data;
6678 	struct trace_array *tr;
6679 	unsigned int index;
6680 	unsigned long val;
6681 	int ret;
6682 
6683 	get_tr_index(tr_index, &tr, &index);
6684 
6685 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
6686 	if (ret)
6687 		return ret;
6688 
6689 	if (val != 0 && val != 1)
6690 		return -EINVAL;
6691 
6692 	mutex_lock(&trace_types_lock);
6693 	ret = set_tracer_flag(tr, 1 << index, val);
6694 	mutex_unlock(&trace_types_lock);
6695 
6696 	if (ret < 0)
6697 		return ret;
6698 
6699 	*ppos += cnt;
6700 
6701 	return cnt;
6702 }
6703 
6704 static const struct file_operations trace_options_core_fops = {
6705 	.open = tracing_open_generic,
6706 	.read = trace_options_core_read,
6707 	.write = trace_options_core_write,
6708 	.llseek = generic_file_llseek,
6709 };
6710 
6711 struct dentry *trace_create_file(const char *name,
6712 				 umode_t mode,
6713 				 struct dentry *parent,
6714 				 void *data,
6715 				 const struct file_operations *fops)
6716 {
6717 	struct dentry *ret;
6718 
6719 	ret = tracefs_create_file(name, mode, parent, data, fops);
6720 	if (!ret)
6721 		pr_warn("Could not create tracefs '%s' entry\n", name);
6722 
6723 	return ret;
6724 }
6725 
6726 
6727 static struct dentry *trace_options_init_dentry(struct trace_array *tr)
6728 {
6729 	struct dentry *d_tracer;
6730 
6731 	if (tr->options)
6732 		return tr->options;
6733 
6734 	d_tracer = tracing_get_dentry(tr);
6735 	if (IS_ERR(d_tracer))
6736 		return NULL;
6737 
6738 	tr->options = tracefs_create_dir("options", d_tracer);
6739 	if (!tr->options) {
6740 		pr_warn("Could not create tracefs directory 'options'\n");
6741 		return NULL;
6742 	}
6743 
6744 	return tr->options;
6745 }
6746 
6747 static void
6748 create_trace_option_file(struct trace_array *tr,
6749 			 struct trace_option_dentry *topt,
6750 			 struct tracer_flags *flags,
6751 			 struct tracer_opt *opt)
6752 {
6753 	struct dentry *t_options;
6754 
6755 	t_options = trace_options_init_dentry(tr);
6756 	if (!t_options)
6757 		return;
6758 
6759 	topt->flags = flags;
6760 	topt->opt = opt;
6761 	topt->tr = tr;
6762 
6763 	topt->entry = trace_create_file(opt->name, 0644, t_options, topt,
6764 				    &trace_options_fops);
6765 
6766 }
6767 
6768 static void
6769 create_trace_option_files(struct trace_array *tr, struct tracer *tracer)
6770 {
6771 	struct trace_option_dentry *topts;
6772 	struct trace_options *tr_topts;
6773 	struct tracer_flags *flags;
6774 	struct tracer_opt *opts;
6775 	int cnt;
6776 	int i;
6777 
6778 	if (!tracer)
6779 		return;
6780 
6781 	flags = tracer->flags;
6782 
6783 	if (!flags || !flags->opts)
6784 		return;
6785 
6786 	/*
6787 	 * If this is an instance, only create flags for tracers
6788 	 * the instance may have.
6789 	 */
6790 	if (!trace_ok_for_array(tracer, tr))
6791 		return;
6792 
6793 	for (i = 0; i < tr->nr_topts; i++) {
6794 		/* Make sure there's no duplicate flags. */
6795 		if (WARN_ON_ONCE(tr->topts[i].tracer->flags == tracer->flags))
6796 			return;
6797 	}
6798 
6799 	opts = flags->opts;
6800 
6801 	for (cnt = 0; opts[cnt].name; cnt++)
6802 		;
6803 
6804 	topts = kcalloc(cnt + 1, sizeof(*topts), GFP_KERNEL);
6805 	if (!topts)
6806 		return;
6807 
6808 	tr_topts = krealloc(tr->topts, sizeof(*tr->topts) * (tr->nr_topts + 1),
6809 			    GFP_KERNEL);
6810 	if (!tr_topts) {
6811 		kfree(topts);
6812 		return;
6813 	}
6814 
6815 	tr->topts = tr_topts;
6816 	tr->topts[tr->nr_topts].tracer = tracer;
6817 	tr->topts[tr->nr_topts].topts = topts;
6818 	tr->nr_topts++;
6819 
6820 	for (cnt = 0; opts[cnt].name; cnt++) {
6821 		create_trace_option_file(tr, &topts[cnt], flags,
6822 					 &opts[cnt]);
6823 		WARN_ONCE(topts[cnt].entry == NULL,
6824 			  "Failed to create trace option: %s",
6825 			  opts[cnt].name);
6826 	}
6827 }
6828 
6829 static struct dentry *
6830 create_trace_option_core_file(struct trace_array *tr,
6831 			      const char *option, long index)
6832 {
6833 	struct dentry *t_options;
6834 
6835 	t_options = trace_options_init_dentry(tr);
6836 	if (!t_options)
6837 		return NULL;
6838 
6839 	return trace_create_file(option, 0644, t_options,
6840 				 (void *)&tr->trace_flags_index[index],
6841 				 &trace_options_core_fops);
6842 }
6843 
6844 static void create_trace_options_dir(struct trace_array *tr)
6845 {
6846 	struct dentry *t_options;
6847 	bool top_level = tr == &global_trace;
6848 	int i;
6849 
6850 	t_options = trace_options_init_dentry(tr);
6851 	if (!t_options)
6852 		return;
6853 
6854 	for (i = 0; trace_options[i]; i++) {
6855 		if (top_level ||
6856 		    !((1 << i) & TOP_LEVEL_TRACE_FLAGS))
6857 			create_trace_option_core_file(tr, trace_options[i], i);
6858 	}
6859 }
6860 
6861 static ssize_t
6862 rb_simple_read(struct file *filp, char __user *ubuf,
6863 	       size_t cnt, loff_t *ppos)
6864 {
6865 	struct trace_array *tr = filp->private_data;
6866 	char buf[64];
6867 	int r;
6868 
6869 	r = tracer_tracing_is_on(tr);
6870 	r = sprintf(buf, "%d\n", r);
6871 
6872 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6873 }
6874 
6875 static ssize_t
6876 rb_simple_write(struct file *filp, const char __user *ubuf,
6877 		size_t cnt, loff_t *ppos)
6878 {
6879 	struct trace_array *tr = filp->private_data;
6880 	struct ring_buffer *buffer = tr->trace_buffer.buffer;
6881 	unsigned long val;
6882 	int ret;
6883 
6884 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
6885 	if (ret)
6886 		return ret;
6887 
6888 	if (buffer) {
6889 		mutex_lock(&trace_types_lock);
6890 		if (val) {
6891 			tracer_tracing_on(tr);
6892 			if (tr->current_trace->start)
6893 				tr->current_trace->start(tr);
6894 		} else {
6895 			tracer_tracing_off(tr);
6896 			if (tr->current_trace->stop)
6897 				tr->current_trace->stop(tr);
6898 		}
6899 		mutex_unlock(&trace_types_lock);
6900 	}
6901 
6902 	(*ppos)++;
6903 
6904 	return cnt;
6905 }
6906 
6907 static const struct file_operations rb_simple_fops = {
6908 	.open		= tracing_open_generic_tr,
6909 	.read		= rb_simple_read,
6910 	.write		= rb_simple_write,
6911 	.release	= tracing_release_generic_tr,
6912 	.llseek		= default_llseek,
6913 };
6914 
6915 struct dentry *trace_instance_dir;
6916 
6917 static void
6918 init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer);
6919 
6920 static int
6921 allocate_trace_buffer(struct trace_array *tr, struct trace_buffer *buf, int size)
6922 {
6923 	enum ring_buffer_flags rb_flags;
6924 
6925 	rb_flags = tr->trace_flags & TRACE_ITER_OVERWRITE ? RB_FL_OVERWRITE : 0;
6926 
6927 	buf->tr = tr;
6928 
6929 	buf->buffer = ring_buffer_alloc(size, rb_flags);
6930 	if (!buf->buffer)
6931 		return -ENOMEM;
6932 
6933 	buf->data = alloc_percpu(struct trace_array_cpu);
6934 	if (!buf->data) {
6935 		ring_buffer_free(buf->buffer);
6936 		return -ENOMEM;
6937 	}
6938 
6939 	/* Allocate the first page for all buffers */
6940 	set_buffer_entries(&tr->trace_buffer,
6941 			   ring_buffer_size(tr->trace_buffer.buffer, 0));
6942 
6943 	return 0;
6944 }
6945 
6946 static int allocate_trace_buffers(struct trace_array *tr, int size)
6947 {
6948 	int ret;
6949 
6950 	ret = allocate_trace_buffer(tr, &tr->trace_buffer, size);
6951 	if (ret)
6952 		return ret;
6953 
6954 #ifdef CONFIG_TRACER_MAX_TRACE
6955 	ret = allocate_trace_buffer(tr, &tr->max_buffer,
6956 				    allocate_snapshot ? size : 1);
6957 	if (WARN_ON(ret)) {
6958 		ring_buffer_free(tr->trace_buffer.buffer);
6959 		free_percpu(tr->trace_buffer.data);
6960 		return -ENOMEM;
6961 	}
6962 	tr->allocated_snapshot = allocate_snapshot;
6963 
6964 	/*
6965 	 * Only the top level trace array gets its snapshot allocated
6966 	 * from the kernel command line.
6967 	 */
6968 	allocate_snapshot = false;
6969 #endif
6970 	return 0;
6971 }
6972 
6973 static void free_trace_buffer(struct trace_buffer *buf)
6974 {
6975 	if (buf->buffer) {
6976 		ring_buffer_free(buf->buffer);
6977 		buf->buffer = NULL;
6978 		free_percpu(buf->data);
6979 		buf->data = NULL;
6980 	}
6981 }
6982 
6983 static void free_trace_buffers(struct trace_array *tr)
6984 {
6985 	if (!tr)
6986 		return;
6987 
6988 	free_trace_buffer(&tr->trace_buffer);
6989 
6990 #ifdef CONFIG_TRACER_MAX_TRACE
6991 	free_trace_buffer(&tr->max_buffer);
6992 #endif
6993 }
6994 
6995 static void init_trace_flags_index(struct trace_array *tr)
6996 {
6997 	int i;
6998 
6999 	/* Used by the trace options files */
7000 	for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++)
7001 		tr->trace_flags_index[i] = i;
7002 }
7003 
7004 static void __update_tracer_options(struct trace_array *tr)
7005 {
7006 	struct tracer *t;
7007 
7008 	for (t = trace_types; t; t = t->next)
7009 		add_tracer_options(tr, t);
7010 }
7011 
7012 static void update_tracer_options(struct trace_array *tr)
7013 {
7014 	mutex_lock(&trace_types_lock);
7015 	__update_tracer_options(tr);
7016 	mutex_unlock(&trace_types_lock);
7017 }
7018 
7019 static int instance_mkdir(const char *name)
7020 {
7021 	struct trace_array *tr;
7022 	int ret;
7023 
7024 	mutex_lock(&trace_types_lock);
7025 
7026 	ret = -EEXIST;
7027 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
7028 		if (tr->name && strcmp(tr->name, name) == 0)
7029 			goto out_unlock;
7030 	}
7031 
7032 	ret = -ENOMEM;
7033 	tr = kzalloc(sizeof(*tr), GFP_KERNEL);
7034 	if (!tr)
7035 		goto out_unlock;
7036 
7037 	tr->name = kstrdup(name, GFP_KERNEL);
7038 	if (!tr->name)
7039 		goto out_free_tr;
7040 
7041 	if (!alloc_cpumask_var(&tr->tracing_cpumask, GFP_KERNEL))
7042 		goto out_free_tr;
7043 
7044 	tr->trace_flags = global_trace.trace_flags & ~ZEROED_TRACE_FLAGS;
7045 
7046 	cpumask_copy(tr->tracing_cpumask, cpu_all_mask);
7047 
7048 	raw_spin_lock_init(&tr->start_lock);
7049 
7050 	tr->max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
7051 
7052 	tr->current_trace = &nop_trace;
7053 
7054 	INIT_LIST_HEAD(&tr->systems);
7055 	INIT_LIST_HEAD(&tr->events);
7056 
7057 	if (allocate_trace_buffers(tr, trace_buf_size) < 0)
7058 		goto out_free_tr;
7059 
7060 	tr->dir = tracefs_create_dir(name, trace_instance_dir);
7061 	if (!tr->dir)
7062 		goto out_free_tr;
7063 
7064 	ret = event_trace_add_tracer(tr->dir, tr);
7065 	if (ret) {
7066 		tracefs_remove_recursive(tr->dir);
7067 		goto out_free_tr;
7068 	}
7069 
7070 	init_tracer_tracefs(tr, tr->dir);
7071 	init_trace_flags_index(tr);
7072 	__update_tracer_options(tr);
7073 
7074 	list_add(&tr->list, &ftrace_trace_arrays);
7075 
7076 	mutex_unlock(&trace_types_lock);
7077 
7078 	return 0;
7079 
7080  out_free_tr:
7081 	free_trace_buffers(tr);
7082 	free_cpumask_var(tr->tracing_cpumask);
7083 	kfree(tr->name);
7084 	kfree(tr);
7085 
7086  out_unlock:
7087 	mutex_unlock(&trace_types_lock);
7088 
7089 	return ret;
7090 
7091 }
7092 
7093 static int instance_rmdir(const char *name)
7094 {
7095 	struct trace_array *tr;
7096 	int found = 0;
7097 	int ret;
7098 	int i;
7099 
7100 	mutex_lock(&trace_types_lock);
7101 
7102 	ret = -ENODEV;
7103 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
7104 		if (tr->name && strcmp(tr->name, name) == 0) {
7105 			found = 1;
7106 			break;
7107 		}
7108 	}
7109 	if (!found)
7110 		goto out_unlock;
7111 
7112 	ret = -EBUSY;
7113 	if (tr->ref || (tr->current_trace && tr->current_trace->ref))
7114 		goto out_unlock;
7115 
7116 	list_del(&tr->list);
7117 
7118 	/* Disable all the flags that were enabled coming in */
7119 	for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++) {
7120 		if ((1 << i) & ZEROED_TRACE_FLAGS)
7121 			set_tracer_flag(tr, 1 << i, 0);
7122 	}
7123 
7124 	tracing_set_nop(tr);
7125 	event_trace_del_tracer(tr);
7126 	ftrace_destroy_function_files(tr);
7127 	tracefs_remove_recursive(tr->dir);
7128 	free_trace_buffers(tr);
7129 
7130 	for (i = 0; i < tr->nr_topts; i++) {
7131 		kfree(tr->topts[i].topts);
7132 	}
7133 	kfree(tr->topts);
7134 
7135 	kfree(tr->name);
7136 	kfree(tr);
7137 
7138 	ret = 0;
7139 
7140  out_unlock:
7141 	mutex_unlock(&trace_types_lock);
7142 
7143 	return ret;
7144 }
7145 
7146 static __init void create_trace_instances(struct dentry *d_tracer)
7147 {
7148 	trace_instance_dir = tracefs_create_instance_dir("instances", d_tracer,
7149 							 instance_mkdir,
7150 							 instance_rmdir);
7151 	if (WARN_ON(!trace_instance_dir))
7152 		return;
7153 }
7154 
7155 static void
7156 init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer)
7157 {
7158 	int cpu;
7159 
7160 	trace_create_file("available_tracers", 0444, d_tracer,
7161 			tr, &show_traces_fops);
7162 
7163 	trace_create_file("current_tracer", 0644, d_tracer,
7164 			tr, &set_tracer_fops);
7165 
7166 	trace_create_file("tracing_cpumask", 0644, d_tracer,
7167 			  tr, &tracing_cpumask_fops);
7168 
7169 	trace_create_file("trace_options", 0644, d_tracer,
7170 			  tr, &tracing_iter_fops);
7171 
7172 	trace_create_file("trace", 0644, d_tracer,
7173 			  tr, &tracing_fops);
7174 
7175 	trace_create_file("trace_pipe", 0444, d_tracer,
7176 			  tr, &tracing_pipe_fops);
7177 
7178 	trace_create_file("buffer_size_kb", 0644, d_tracer,
7179 			  tr, &tracing_entries_fops);
7180 
7181 	trace_create_file("buffer_total_size_kb", 0444, d_tracer,
7182 			  tr, &tracing_total_entries_fops);
7183 
7184 	trace_create_file("free_buffer", 0200, d_tracer,
7185 			  tr, &tracing_free_buffer_fops);
7186 
7187 	trace_create_file("trace_marker", 0220, d_tracer,
7188 			  tr, &tracing_mark_fops);
7189 
7190 	trace_create_file("trace_clock", 0644, d_tracer, tr,
7191 			  &trace_clock_fops);
7192 
7193 	trace_create_file("tracing_on", 0644, d_tracer,
7194 			  tr, &rb_simple_fops);
7195 
7196 	create_trace_options_dir(tr);
7197 
7198 #ifdef CONFIG_TRACER_MAX_TRACE
7199 	trace_create_file("tracing_max_latency", 0644, d_tracer,
7200 			&tr->max_latency, &tracing_max_lat_fops);
7201 #endif
7202 
7203 	if (ftrace_create_function_files(tr, d_tracer))
7204 		WARN(1, "Could not allocate function filter files");
7205 
7206 #ifdef CONFIG_TRACER_SNAPSHOT
7207 	trace_create_file("snapshot", 0644, d_tracer,
7208 			  tr, &snapshot_fops);
7209 #endif
7210 
7211 	for_each_tracing_cpu(cpu)
7212 		tracing_init_tracefs_percpu(tr, cpu);
7213 
7214 	ftrace_init_tracefs(tr, d_tracer);
7215 }
7216 
7217 static struct vfsmount *trace_automount(void *ingore)
7218 {
7219 	struct vfsmount *mnt;
7220 	struct file_system_type *type;
7221 
7222 	/*
7223 	 * To maintain backward compatibility for tools that mount
7224 	 * debugfs to get to the tracing facility, tracefs is automatically
7225 	 * mounted to the debugfs/tracing directory.
7226 	 */
7227 	type = get_fs_type("tracefs");
7228 	if (!type)
7229 		return NULL;
7230 	mnt = vfs_kern_mount(type, 0, "tracefs", NULL);
7231 	put_filesystem(type);
7232 	if (IS_ERR(mnt))
7233 		return NULL;
7234 	mntget(mnt);
7235 
7236 	return mnt;
7237 }
7238 
7239 /**
7240  * tracing_init_dentry - initialize top level trace array
7241  *
7242  * This is called when creating files or directories in the tracing
7243  * directory. It is called via fs_initcall() by any of the boot up code
7244  * and expects to return the dentry of the top level tracing directory.
7245  */
7246 struct dentry *tracing_init_dentry(void)
7247 {
7248 	struct trace_array *tr = &global_trace;
7249 
7250 	/* The top level trace array uses  NULL as parent */
7251 	if (tr->dir)
7252 		return NULL;
7253 
7254 	if (WARN_ON(!tracefs_initialized()) ||
7255 		(IS_ENABLED(CONFIG_DEBUG_FS) &&
7256 		 WARN_ON(!debugfs_initialized())))
7257 		return ERR_PTR(-ENODEV);
7258 
7259 	/*
7260 	 * As there may still be users that expect the tracing
7261 	 * files to exist in debugfs/tracing, we must automount
7262 	 * the tracefs file system there, so older tools still
7263 	 * work with the newer kerenl.
7264 	 */
7265 	tr->dir = debugfs_create_automount("tracing", NULL,
7266 					   trace_automount, NULL);
7267 	if (!tr->dir) {
7268 		pr_warn_once("Could not create debugfs directory 'tracing'\n");
7269 		return ERR_PTR(-ENOMEM);
7270 	}
7271 
7272 	return NULL;
7273 }
7274 
7275 extern struct trace_enum_map *__start_ftrace_enum_maps[];
7276 extern struct trace_enum_map *__stop_ftrace_enum_maps[];
7277 
7278 static void __init trace_enum_init(void)
7279 {
7280 	int len;
7281 
7282 	len = __stop_ftrace_enum_maps - __start_ftrace_enum_maps;
7283 	trace_insert_enum_map(NULL, __start_ftrace_enum_maps, len);
7284 }
7285 
7286 #ifdef CONFIG_MODULES
7287 static void trace_module_add_enums(struct module *mod)
7288 {
7289 	if (!mod->num_trace_enums)
7290 		return;
7291 
7292 	/*
7293 	 * Modules with bad taint do not have events created, do
7294 	 * not bother with enums either.
7295 	 */
7296 	if (trace_module_has_bad_taint(mod))
7297 		return;
7298 
7299 	trace_insert_enum_map(mod, mod->trace_enums, mod->num_trace_enums);
7300 }
7301 
7302 #ifdef CONFIG_TRACE_ENUM_MAP_FILE
7303 static void trace_module_remove_enums(struct module *mod)
7304 {
7305 	union trace_enum_map_item *map;
7306 	union trace_enum_map_item **last = &trace_enum_maps;
7307 
7308 	if (!mod->num_trace_enums)
7309 		return;
7310 
7311 	mutex_lock(&trace_enum_mutex);
7312 
7313 	map = trace_enum_maps;
7314 
7315 	while (map) {
7316 		if (map->head.mod == mod)
7317 			break;
7318 		map = trace_enum_jmp_to_tail(map);
7319 		last = &map->tail.next;
7320 		map = map->tail.next;
7321 	}
7322 	if (!map)
7323 		goto out;
7324 
7325 	*last = trace_enum_jmp_to_tail(map)->tail.next;
7326 	kfree(map);
7327  out:
7328 	mutex_unlock(&trace_enum_mutex);
7329 }
7330 #else
7331 static inline void trace_module_remove_enums(struct module *mod) { }
7332 #endif /* CONFIG_TRACE_ENUM_MAP_FILE */
7333 
7334 static int trace_module_notify(struct notifier_block *self,
7335 			       unsigned long val, void *data)
7336 {
7337 	struct module *mod = data;
7338 
7339 	switch (val) {
7340 	case MODULE_STATE_COMING:
7341 		trace_module_add_enums(mod);
7342 		break;
7343 	case MODULE_STATE_GOING:
7344 		trace_module_remove_enums(mod);
7345 		break;
7346 	}
7347 
7348 	return 0;
7349 }
7350 
7351 static struct notifier_block trace_module_nb = {
7352 	.notifier_call = trace_module_notify,
7353 	.priority = 0,
7354 };
7355 #endif /* CONFIG_MODULES */
7356 
7357 static __init int tracer_init_tracefs(void)
7358 {
7359 	struct dentry *d_tracer;
7360 
7361 	trace_access_lock_init();
7362 
7363 	d_tracer = tracing_init_dentry();
7364 	if (IS_ERR(d_tracer))
7365 		return 0;
7366 
7367 	init_tracer_tracefs(&global_trace, d_tracer);
7368 	ftrace_init_tracefs_toplevel(&global_trace, d_tracer);
7369 
7370 	trace_create_file("tracing_thresh", 0644, d_tracer,
7371 			&global_trace, &tracing_thresh_fops);
7372 
7373 	trace_create_file("README", 0444, d_tracer,
7374 			NULL, &tracing_readme_fops);
7375 
7376 	trace_create_file("saved_cmdlines", 0444, d_tracer,
7377 			NULL, &tracing_saved_cmdlines_fops);
7378 
7379 	trace_create_file("saved_cmdlines_size", 0644, d_tracer,
7380 			  NULL, &tracing_saved_cmdlines_size_fops);
7381 
7382 	trace_enum_init();
7383 
7384 	trace_create_enum_file(d_tracer);
7385 
7386 #ifdef CONFIG_MODULES
7387 	register_module_notifier(&trace_module_nb);
7388 #endif
7389 
7390 #ifdef CONFIG_DYNAMIC_FTRACE
7391 	trace_create_file("dyn_ftrace_total_info", 0444, d_tracer,
7392 			&ftrace_update_tot_cnt, &tracing_dyn_info_fops);
7393 #endif
7394 
7395 	create_trace_instances(d_tracer);
7396 
7397 	update_tracer_options(&global_trace);
7398 
7399 	return 0;
7400 }
7401 
7402 static int trace_panic_handler(struct notifier_block *this,
7403 			       unsigned long event, void *unused)
7404 {
7405 	if (ftrace_dump_on_oops)
7406 		ftrace_dump(ftrace_dump_on_oops);
7407 	return NOTIFY_OK;
7408 }
7409 
7410 static struct notifier_block trace_panic_notifier = {
7411 	.notifier_call  = trace_panic_handler,
7412 	.next           = NULL,
7413 	.priority       = 150   /* priority: INT_MAX >= x >= 0 */
7414 };
7415 
7416 static int trace_die_handler(struct notifier_block *self,
7417 			     unsigned long val,
7418 			     void *data)
7419 {
7420 	switch (val) {
7421 	case DIE_OOPS:
7422 		if (ftrace_dump_on_oops)
7423 			ftrace_dump(ftrace_dump_on_oops);
7424 		break;
7425 	default:
7426 		break;
7427 	}
7428 	return NOTIFY_OK;
7429 }
7430 
7431 static struct notifier_block trace_die_notifier = {
7432 	.notifier_call = trace_die_handler,
7433 	.priority = 200
7434 };
7435 
7436 /*
7437  * printk is set to max of 1024, we really don't need it that big.
7438  * Nothing should be printing 1000 characters anyway.
7439  */
7440 #define TRACE_MAX_PRINT		1000
7441 
7442 /*
7443  * Define here KERN_TRACE so that we have one place to modify
7444  * it if we decide to change what log level the ftrace dump
7445  * should be at.
7446  */
7447 #define KERN_TRACE		KERN_EMERG
7448 
7449 void
7450 trace_printk_seq(struct trace_seq *s)
7451 {
7452 	/* Probably should print a warning here. */
7453 	if (s->seq.len >= TRACE_MAX_PRINT)
7454 		s->seq.len = TRACE_MAX_PRINT;
7455 
7456 	/*
7457 	 * More paranoid code. Although the buffer size is set to
7458 	 * PAGE_SIZE, and TRACE_MAX_PRINT is 1000, this is just
7459 	 * an extra layer of protection.
7460 	 */
7461 	if (WARN_ON_ONCE(s->seq.len >= s->seq.size))
7462 		s->seq.len = s->seq.size - 1;
7463 
7464 	/* should be zero ended, but we are paranoid. */
7465 	s->buffer[s->seq.len] = 0;
7466 
7467 	printk(KERN_TRACE "%s", s->buffer);
7468 
7469 	trace_seq_init(s);
7470 }
7471 
7472 void trace_init_global_iter(struct trace_iterator *iter)
7473 {
7474 	iter->tr = &global_trace;
7475 	iter->trace = iter->tr->current_trace;
7476 	iter->cpu_file = RING_BUFFER_ALL_CPUS;
7477 	iter->trace_buffer = &global_trace.trace_buffer;
7478 
7479 	if (iter->trace && iter->trace->open)
7480 		iter->trace->open(iter);
7481 
7482 	/* Annotate start of buffers if we had overruns */
7483 	if (ring_buffer_overruns(iter->trace_buffer->buffer))
7484 		iter->iter_flags |= TRACE_FILE_ANNOTATE;
7485 
7486 	/* Output in nanoseconds only if we are using a clock in nanoseconds. */
7487 	if (trace_clocks[iter->tr->clock_id].in_ns)
7488 		iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
7489 }
7490 
7491 void ftrace_dump(enum ftrace_dump_mode oops_dump_mode)
7492 {
7493 	/* use static because iter can be a bit big for the stack */
7494 	static struct trace_iterator iter;
7495 	static atomic_t dump_running;
7496 	struct trace_array *tr = &global_trace;
7497 	unsigned int old_userobj;
7498 	unsigned long flags;
7499 	int cnt = 0, cpu;
7500 
7501 	/* Only allow one dump user at a time. */
7502 	if (atomic_inc_return(&dump_running) != 1) {
7503 		atomic_dec(&dump_running);
7504 		return;
7505 	}
7506 
7507 	/*
7508 	 * Always turn off tracing when we dump.
7509 	 * We don't need to show trace output of what happens
7510 	 * between multiple crashes.
7511 	 *
7512 	 * If the user does a sysrq-z, then they can re-enable
7513 	 * tracing with echo 1 > tracing_on.
7514 	 */
7515 	tracing_off();
7516 
7517 	local_irq_save(flags);
7518 
7519 	/* Simulate the iterator */
7520 	trace_init_global_iter(&iter);
7521 
7522 	for_each_tracing_cpu(cpu) {
7523 		atomic_inc(&per_cpu_ptr(iter.trace_buffer->data, cpu)->disabled);
7524 	}
7525 
7526 	old_userobj = tr->trace_flags & TRACE_ITER_SYM_USEROBJ;
7527 
7528 	/* don't look at user memory in panic mode */
7529 	tr->trace_flags &= ~TRACE_ITER_SYM_USEROBJ;
7530 
7531 	switch (oops_dump_mode) {
7532 	case DUMP_ALL:
7533 		iter.cpu_file = RING_BUFFER_ALL_CPUS;
7534 		break;
7535 	case DUMP_ORIG:
7536 		iter.cpu_file = raw_smp_processor_id();
7537 		break;
7538 	case DUMP_NONE:
7539 		goto out_enable;
7540 	default:
7541 		printk(KERN_TRACE "Bad dumping mode, switching to all CPUs dump\n");
7542 		iter.cpu_file = RING_BUFFER_ALL_CPUS;
7543 	}
7544 
7545 	printk(KERN_TRACE "Dumping ftrace buffer:\n");
7546 
7547 	/* Did function tracer already get disabled? */
7548 	if (ftrace_is_dead()) {
7549 		printk("# WARNING: FUNCTION TRACING IS CORRUPTED\n");
7550 		printk("#          MAY BE MISSING FUNCTION EVENTS\n");
7551 	}
7552 
7553 	/*
7554 	 * We need to stop all tracing on all CPUS to read the
7555 	 * the next buffer. This is a bit expensive, but is
7556 	 * not done often. We fill all what we can read,
7557 	 * and then release the locks again.
7558 	 */
7559 
7560 	while (!trace_empty(&iter)) {
7561 
7562 		if (!cnt)
7563 			printk(KERN_TRACE "---------------------------------\n");
7564 
7565 		cnt++;
7566 
7567 		/* reset all but tr, trace, and overruns */
7568 		memset(&iter.seq, 0,
7569 		       sizeof(struct trace_iterator) -
7570 		       offsetof(struct trace_iterator, seq));
7571 		iter.iter_flags |= TRACE_FILE_LAT_FMT;
7572 		iter.pos = -1;
7573 
7574 		if (trace_find_next_entry_inc(&iter) != NULL) {
7575 			int ret;
7576 
7577 			ret = print_trace_line(&iter);
7578 			if (ret != TRACE_TYPE_NO_CONSUME)
7579 				trace_consume(&iter);
7580 		}
7581 		touch_nmi_watchdog();
7582 
7583 		trace_printk_seq(&iter.seq);
7584 	}
7585 
7586 	if (!cnt)
7587 		printk(KERN_TRACE "   (ftrace buffer empty)\n");
7588 	else
7589 		printk(KERN_TRACE "---------------------------------\n");
7590 
7591  out_enable:
7592 	tr->trace_flags |= old_userobj;
7593 
7594 	for_each_tracing_cpu(cpu) {
7595 		atomic_dec(&per_cpu_ptr(iter.trace_buffer->data, cpu)->disabled);
7596 	}
7597  	atomic_dec(&dump_running);
7598 	local_irq_restore(flags);
7599 }
7600 EXPORT_SYMBOL_GPL(ftrace_dump);
7601 
7602 __init static int tracer_alloc_buffers(void)
7603 {
7604 	int ring_buf_size;
7605 	int ret = -ENOMEM;
7606 
7607 	/*
7608 	 * Make sure we don't accidently add more trace options
7609 	 * than we have bits for.
7610 	 */
7611 	BUILD_BUG_ON(TRACE_ITER_LAST_BIT > TRACE_FLAGS_MAX_SIZE);
7612 
7613 	if (!alloc_cpumask_var(&tracing_buffer_mask, GFP_KERNEL))
7614 		goto out;
7615 
7616 	if (!alloc_cpumask_var(&global_trace.tracing_cpumask, GFP_KERNEL))
7617 		goto out_free_buffer_mask;
7618 
7619 	/* Only allocate trace_printk buffers if a trace_printk exists */
7620 	if (__stop___trace_bprintk_fmt != __start___trace_bprintk_fmt)
7621 		/* Must be called before global_trace.buffer is allocated */
7622 		trace_printk_init_buffers();
7623 
7624 	/* To save memory, keep the ring buffer size to its minimum */
7625 	if (ring_buffer_expanded)
7626 		ring_buf_size = trace_buf_size;
7627 	else
7628 		ring_buf_size = 1;
7629 
7630 	cpumask_copy(tracing_buffer_mask, cpu_possible_mask);
7631 	cpumask_copy(global_trace.tracing_cpumask, cpu_all_mask);
7632 
7633 	raw_spin_lock_init(&global_trace.start_lock);
7634 
7635 	/* Used for event triggers */
7636 	temp_buffer = ring_buffer_alloc(PAGE_SIZE, RB_FL_OVERWRITE);
7637 	if (!temp_buffer)
7638 		goto out_free_cpumask;
7639 
7640 	if (trace_create_savedcmd() < 0)
7641 		goto out_free_temp_buffer;
7642 
7643 	/* TODO: make the number of buffers hot pluggable with CPUS */
7644 	if (allocate_trace_buffers(&global_trace, ring_buf_size) < 0) {
7645 		printk(KERN_ERR "tracer: failed to allocate ring buffer!\n");
7646 		WARN_ON(1);
7647 		goto out_free_savedcmd;
7648 	}
7649 
7650 	if (global_trace.buffer_disabled)
7651 		tracing_off();
7652 
7653 	if (trace_boot_clock) {
7654 		ret = tracing_set_clock(&global_trace, trace_boot_clock);
7655 		if (ret < 0)
7656 			pr_warn("Trace clock %s not defined, going back to default\n",
7657 				trace_boot_clock);
7658 	}
7659 
7660 	/*
7661 	 * register_tracer() might reference current_trace, so it
7662 	 * needs to be set before we register anything. This is
7663 	 * just a bootstrap of current_trace anyway.
7664 	 */
7665 	global_trace.current_trace = &nop_trace;
7666 
7667 	global_trace.max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
7668 
7669 	ftrace_init_global_array_ops(&global_trace);
7670 
7671 	init_trace_flags_index(&global_trace);
7672 
7673 	register_tracer(&nop_trace);
7674 
7675 	/* All seems OK, enable tracing */
7676 	tracing_disabled = 0;
7677 
7678 	atomic_notifier_chain_register(&panic_notifier_list,
7679 				       &trace_panic_notifier);
7680 
7681 	register_die_notifier(&trace_die_notifier);
7682 
7683 	global_trace.flags = TRACE_ARRAY_FL_GLOBAL;
7684 
7685 	INIT_LIST_HEAD(&global_trace.systems);
7686 	INIT_LIST_HEAD(&global_trace.events);
7687 	list_add(&global_trace.list, &ftrace_trace_arrays);
7688 
7689 	apply_trace_boot_options();
7690 
7691 	register_snapshot_cmd();
7692 
7693 	return 0;
7694 
7695 out_free_savedcmd:
7696 	free_saved_cmdlines_buffer(savedcmd);
7697 out_free_temp_buffer:
7698 	ring_buffer_free(temp_buffer);
7699 out_free_cpumask:
7700 	free_cpumask_var(global_trace.tracing_cpumask);
7701 out_free_buffer_mask:
7702 	free_cpumask_var(tracing_buffer_mask);
7703 out:
7704 	return ret;
7705 }
7706 
7707 void __init trace_init(void)
7708 {
7709 	if (tracepoint_printk) {
7710 		tracepoint_print_iter =
7711 			kmalloc(sizeof(*tracepoint_print_iter), GFP_KERNEL);
7712 		if (WARN_ON(!tracepoint_print_iter))
7713 			tracepoint_printk = 0;
7714 	}
7715 	tracer_alloc_buffers();
7716 	trace_event_init();
7717 }
7718 
7719 __init static int clear_boot_tracer(void)
7720 {
7721 	/*
7722 	 * The default tracer at boot buffer is an init section.
7723 	 * This function is called in lateinit. If we did not
7724 	 * find the boot tracer, then clear it out, to prevent
7725 	 * later registration from accessing the buffer that is
7726 	 * about to be freed.
7727 	 */
7728 	if (!default_bootup_tracer)
7729 		return 0;
7730 
7731 	printk(KERN_INFO "ftrace bootup tracer '%s' not registered.\n",
7732 	       default_bootup_tracer);
7733 	default_bootup_tracer = NULL;
7734 
7735 	return 0;
7736 }
7737 
7738 fs_initcall(tracer_init_tracefs);
7739 late_initcall(clear_boot_tracer);
7740