xref: /linux/kernel/trace/trace.c (revision a4cc96d1f0170b779c32c6b2cc58764f5d2cdef0)
1 /*
2  * ring buffer based function tracer
3  *
4  * Copyright (C) 2007-2012 Steven Rostedt <srostedt@redhat.com>
5  * Copyright (C) 2008 Ingo Molnar <mingo@redhat.com>
6  *
7  * Originally taken from the RT patch by:
8  *    Arnaldo Carvalho de Melo <acme@redhat.com>
9  *
10  * Based on code from the latency_tracer, that is:
11  *  Copyright (C) 2004-2006 Ingo Molnar
12  *  Copyright (C) 2004 Nadia Yvette Chambers
13  */
14 #include <linux/ring_buffer.h>
15 #include <generated/utsrelease.h>
16 #include <linux/stacktrace.h>
17 #include <linux/writeback.h>
18 #include <linux/kallsyms.h>
19 #include <linux/seq_file.h>
20 #include <linux/notifier.h>
21 #include <linux/irqflags.h>
22 #include <linux/debugfs.h>
23 #include <linux/tracefs.h>
24 #include <linux/pagemap.h>
25 #include <linux/hardirq.h>
26 #include <linux/linkage.h>
27 #include <linux/uaccess.h>
28 #include <linux/vmalloc.h>
29 #include <linux/ftrace.h>
30 #include <linux/module.h>
31 #include <linux/percpu.h>
32 #include <linux/splice.h>
33 #include <linux/kdebug.h>
34 #include <linux/string.h>
35 #include <linux/mount.h>
36 #include <linux/rwsem.h>
37 #include <linux/slab.h>
38 #include <linux/ctype.h>
39 #include <linux/init.h>
40 #include <linux/poll.h>
41 #include <linux/nmi.h>
42 #include <linux/fs.h>
43 #include <linux/sched/rt.h>
44 
45 #include "trace.h"
46 #include "trace_output.h"
47 
48 /*
49  * On boot up, the ring buffer is set to the minimum size, so that
50  * we do not waste memory on systems that are not using tracing.
51  */
52 bool ring_buffer_expanded;
53 
54 /*
55  * We need to change this state when a selftest is running.
56  * A selftest will lurk into the ring-buffer to count the
57  * entries inserted during the selftest although some concurrent
58  * insertions into the ring-buffer such as trace_printk could occurred
59  * at the same time, giving false positive or negative results.
60  */
61 static bool __read_mostly tracing_selftest_running;
62 
63 /*
64  * If a tracer is running, we do not want to run SELFTEST.
65  */
66 bool __read_mostly tracing_selftest_disabled;
67 
68 /* Pipe tracepoints to printk */
69 struct trace_iterator *tracepoint_print_iter;
70 int tracepoint_printk;
71 
72 /* For tracers that don't implement custom flags */
73 static struct tracer_opt dummy_tracer_opt[] = {
74 	{ }
75 };
76 
77 static int
78 dummy_set_flag(struct trace_array *tr, u32 old_flags, u32 bit, int set)
79 {
80 	return 0;
81 }
82 
83 /*
84  * To prevent the comm cache from being overwritten when no
85  * tracing is active, only save the comm when a trace event
86  * occurred.
87  */
88 static DEFINE_PER_CPU(bool, trace_cmdline_save);
89 
90 /*
91  * Kill all tracing for good (never come back).
92  * It is initialized to 1 but will turn to zero if the initialization
93  * of the tracer is successful. But that is the only place that sets
94  * this back to zero.
95  */
96 static int tracing_disabled = 1;
97 
98 cpumask_var_t __read_mostly	tracing_buffer_mask;
99 
100 /*
101  * ftrace_dump_on_oops - variable to dump ftrace buffer on oops
102  *
103  * If there is an oops (or kernel panic) and the ftrace_dump_on_oops
104  * is set, then ftrace_dump is called. This will output the contents
105  * of the ftrace buffers to the console.  This is very useful for
106  * capturing traces that lead to crashes and outputing it to a
107  * serial console.
108  *
109  * It is default off, but you can enable it with either specifying
110  * "ftrace_dump_on_oops" in the kernel command line, or setting
111  * /proc/sys/kernel/ftrace_dump_on_oops
112  * Set 1 if you want to dump buffers of all CPUs
113  * Set 2 if you want to dump the buffer of the CPU that triggered oops
114  */
115 
116 enum ftrace_dump_mode ftrace_dump_on_oops;
117 
118 /* When set, tracing will stop when a WARN*() is hit */
119 int __disable_trace_on_warning;
120 
121 #ifdef CONFIG_TRACE_ENUM_MAP_FILE
122 /* Map of enums to their values, for "enum_map" file */
123 struct trace_enum_map_head {
124 	struct module			*mod;
125 	unsigned long			length;
126 };
127 
128 union trace_enum_map_item;
129 
130 struct trace_enum_map_tail {
131 	/*
132 	 * "end" is first and points to NULL as it must be different
133 	 * than "mod" or "enum_string"
134 	 */
135 	union trace_enum_map_item	*next;
136 	const char			*end;	/* points to NULL */
137 };
138 
139 static DEFINE_MUTEX(trace_enum_mutex);
140 
141 /*
142  * The trace_enum_maps are saved in an array with two extra elements,
143  * one at the beginning, and one at the end. The beginning item contains
144  * the count of the saved maps (head.length), and the module they
145  * belong to if not built in (head.mod). The ending item contains a
146  * pointer to the next array of saved enum_map items.
147  */
148 union trace_enum_map_item {
149 	struct trace_enum_map		map;
150 	struct trace_enum_map_head	head;
151 	struct trace_enum_map_tail	tail;
152 };
153 
154 static union trace_enum_map_item *trace_enum_maps;
155 #endif /* CONFIG_TRACE_ENUM_MAP_FILE */
156 
157 static int tracing_set_tracer(struct trace_array *tr, const char *buf);
158 
159 #define MAX_TRACER_SIZE		100
160 static char bootup_tracer_buf[MAX_TRACER_SIZE] __initdata;
161 static char *default_bootup_tracer;
162 
163 static bool allocate_snapshot;
164 
165 static int __init set_cmdline_ftrace(char *str)
166 {
167 	strlcpy(bootup_tracer_buf, str, MAX_TRACER_SIZE);
168 	default_bootup_tracer = bootup_tracer_buf;
169 	/* We are using ftrace early, expand it */
170 	ring_buffer_expanded = true;
171 	return 1;
172 }
173 __setup("ftrace=", set_cmdline_ftrace);
174 
175 static int __init set_ftrace_dump_on_oops(char *str)
176 {
177 	if (*str++ != '=' || !*str) {
178 		ftrace_dump_on_oops = DUMP_ALL;
179 		return 1;
180 	}
181 
182 	if (!strcmp("orig_cpu", str)) {
183 		ftrace_dump_on_oops = DUMP_ORIG;
184                 return 1;
185         }
186 
187         return 0;
188 }
189 __setup("ftrace_dump_on_oops", set_ftrace_dump_on_oops);
190 
191 static int __init stop_trace_on_warning(char *str)
192 {
193 	if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
194 		__disable_trace_on_warning = 1;
195 	return 1;
196 }
197 __setup("traceoff_on_warning", stop_trace_on_warning);
198 
199 static int __init boot_alloc_snapshot(char *str)
200 {
201 	allocate_snapshot = true;
202 	/* We also need the main ring buffer expanded */
203 	ring_buffer_expanded = true;
204 	return 1;
205 }
206 __setup("alloc_snapshot", boot_alloc_snapshot);
207 
208 
209 static char trace_boot_options_buf[MAX_TRACER_SIZE] __initdata;
210 
211 static int __init set_trace_boot_options(char *str)
212 {
213 	strlcpy(trace_boot_options_buf, str, MAX_TRACER_SIZE);
214 	return 0;
215 }
216 __setup("trace_options=", set_trace_boot_options);
217 
218 static char trace_boot_clock_buf[MAX_TRACER_SIZE] __initdata;
219 static char *trace_boot_clock __initdata;
220 
221 static int __init set_trace_boot_clock(char *str)
222 {
223 	strlcpy(trace_boot_clock_buf, str, MAX_TRACER_SIZE);
224 	trace_boot_clock = trace_boot_clock_buf;
225 	return 0;
226 }
227 __setup("trace_clock=", set_trace_boot_clock);
228 
229 static int __init set_tracepoint_printk(char *str)
230 {
231 	if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
232 		tracepoint_printk = 1;
233 	return 1;
234 }
235 __setup("tp_printk", set_tracepoint_printk);
236 
237 unsigned long long ns2usecs(cycle_t nsec)
238 {
239 	nsec += 500;
240 	do_div(nsec, 1000);
241 	return nsec;
242 }
243 
244 /* trace_flags holds trace_options default values */
245 #define TRACE_DEFAULT_FLAGS						\
246 	(FUNCTION_DEFAULT_FLAGS |					\
247 	 TRACE_ITER_PRINT_PARENT | TRACE_ITER_PRINTK |			\
248 	 TRACE_ITER_ANNOTATE | TRACE_ITER_CONTEXT_INFO |		\
249 	 TRACE_ITER_RECORD_CMD | TRACE_ITER_OVERWRITE |			\
250 	 TRACE_ITER_IRQ_INFO | TRACE_ITER_MARKERS)
251 
252 /* trace_options that are only supported by global_trace */
253 #define TOP_LEVEL_TRACE_FLAGS (TRACE_ITER_PRINTK |			\
254 	       TRACE_ITER_PRINTK_MSGONLY | TRACE_ITER_RECORD_CMD)
255 
256 /* trace_flags that are default zero for instances */
257 #define ZEROED_TRACE_FLAGS \
258 	TRACE_ITER_EVENT_FORK
259 
260 /*
261  * The global_trace is the descriptor that holds the tracing
262  * buffers for the live tracing. For each CPU, it contains
263  * a link list of pages that will store trace entries. The
264  * page descriptor of the pages in the memory is used to hold
265  * the link list by linking the lru item in the page descriptor
266  * to each of the pages in the buffer per CPU.
267  *
268  * For each active CPU there is a data field that holds the
269  * pages for the buffer for that CPU. Each CPU has the same number
270  * of pages allocated for its buffer.
271  */
272 static struct trace_array global_trace = {
273 	.trace_flags = TRACE_DEFAULT_FLAGS,
274 };
275 
276 LIST_HEAD(ftrace_trace_arrays);
277 
278 int trace_array_get(struct trace_array *this_tr)
279 {
280 	struct trace_array *tr;
281 	int ret = -ENODEV;
282 
283 	mutex_lock(&trace_types_lock);
284 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
285 		if (tr == this_tr) {
286 			tr->ref++;
287 			ret = 0;
288 			break;
289 		}
290 	}
291 	mutex_unlock(&trace_types_lock);
292 
293 	return ret;
294 }
295 
296 static void __trace_array_put(struct trace_array *this_tr)
297 {
298 	WARN_ON(!this_tr->ref);
299 	this_tr->ref--;
300 }
301 
302 void trace_array_put(struct trace_array *this_tr)
303 {
304 	mutex_lock(&trace_types_lock);
305 	__trace_array_put(this_tr);
306 	mutex_unlock(&trace_types_lock);
307 }
308 
309 int call_filter_check_discard(struct trace_event_call *call, void *rec,
310 			      struct ring_buffer *buffer,
311 			      struct ring_buffer_event *event)
312 {
313 	if (unlikely(call->flags & TRACE_EVENT_FL_FILTERED) &&
314 	    !filter_match_preds(call->filter, rec)) {
315 		__trace_event_discard_commit(buffer, event);
316 		return 1;
317 	}
318 
319 	return 0;
320 }
321 
322 void trace_free_pid_list(struct trace_pid_list *pid_list)
323 {
324 	vfree(pid_list->pids);
325 	kfree(pid_list);
326 }
327 
328 /**
329  * trace_find_filtered_pid - check if a pid exists in a filtered_pid list
330  * @filtered_pids: The list of pids to check
331  * @search_pid: The PID to find in @filtered_pids
332  *
333  * Returns true if @search_pid is fonud in @filtered_pids, and false otherwis.
334  */
335 bool
336 trace_find_filtered_pid(struct trace_pid_list *filtered_pids, pid_t search_pid)
337 {
338 	/*
339 	 * If pid_max changed after filtered_pids was created, we
340 	 * by default ignore all pids greater than the previous pid_max.
341 	 */
342 	if (search_pid >= filtered_pids->pid_max)
343 		return false;
344 
345 	return test_bit(search_pid, filtered_pids->pids);
346 }
347 
348 /**
349  * trace_ignore_this_task - should a task be ignored for tracing
350  * @filtered_pids: The list of pids to check
351  * @task: The task that should be ignored if not filtered
352  *
353  * Checks if @task should be traced or not from @filtered_pids.
354  * Returns true if @task should *NOT* be traced.
355  * Returns false if @task should be traced.
356  */
357 bool
358 trace_ignore_this_task(struct trace_pid_list *filtered_pids, struct task_struct *task)
359 {
360 	/*
361 	 * Return false, because if filtered_pids does not exist,
362 	 * all pids are good to trace.
363 	 */
364 	if (!filtered_pids)
365 		return false;
366 
367 	return !trace_find_filtered_pid(filtered_pids, task->pid);
368 }
369 
370 /**
371  * trace_pid_filter_add_remove - Add or remove a task from a pid_list
372  * @pid_list: The list to modify
373  * @self: The current task for fork or NULL for exit
374  * @task: The task to add or remove
375  *
376  * If adding a task, if @self is defined, the task is only added if @self
377  * is also included in @pid_list. This happens on fork and tasks should
378  * only be added when the parent is listed. If @self is NULL, then the
379  * @task pid will be removed from the list, which would happen on exit
380  * of a task.
381  */
382 void trace_filter_add_remove_task(struct trace_pid_list *pid_list,
383 				  struct task_struct *self,
384 				  struct task_struct *task)
385 {
386 	if (!pid_list)
387 		return;
388 
389 	/* For forks, we only add if the forking task is listed */
390 	if (self) {
391 		if (!trace_find_filtered_pid(pid_list, self->pid))
392 			return;
393 	}
394 
395 	/* Sorry, but we don't support pid_max changing after setting */
396 	if (task->pid >= pid_list->pid_max)
397 		return;
398 
399 	/* "self" is set for forks, and NULL for exits */
400 	if (self)
401 		set_bit(task->pid, pid_list->pids);
402 	else
403 		clear_bit(task->pid, pid_list->pids);
404 }
405 
406 /**
407  * trace_pid_next - Used for seq_file to get to the next pid of a pid_list
408  * @pid_list: The pid list to show
409  * @v: The last pid that was shown (+1 the actual pid to let zero be displayed)
410  * @pos: The position of the file
411  *
412  * This is used by the seq_file "next" operation to iterate the pids
413  * listed in a trace_pid_list structure.
414  *
415  * Returns the pid+1 as we want to display pid of zero, but NULL would
416  * stop the iteration.
417  */
418 void *trace_pid_next(struct trace_pid_list *pid_list, void *v, loff_t *pos)
419 {
420 	unsigned long pid = (unsigned long)v;
421 
422 	(*pos)++;
423 
424 	/* pid already is +1 of the actual prevous bit */
425 	pid = find_next_bit(pid_list->pids, pid_list->pid_max, pid);
426 
427 	/* Return pid + 1 to allow zero to be represented */
428 	if (pid < pid_list->pid_max)
429 		return (void *)(pid + 1);
430 
431 	return NULL;
432 }
433 
434 /**
435  * trace_pid_start - Used for seq_file to start reading pid lists
436  * @pid_list: The pid list to show
437  * @pos: The position of the file
438  *
439  * This is used by seq_file "start" operation to start the iteration
440  * of listing pids.
441  *
442  * Returns the pid+1 as we want to display pid of zero, but NULL would
443  * stop the iteration.
444  */
445 void *trace_pid_start(struct trace_pid_list *pid_list, loff_t *pos)
446 {
447 	unsigned long pid;
448 	loff_t l = 0;
449 
450 	pid = find_first_bit(pid_list->pids, pid_list->pid_max);
451 	if (pid >= pid_list->pid_max)
452 		return NULL;
453 
454 	/* Return pid + 1 so that zero can be the exit value */
455 	for (pid++; pid && l < *pos;
456 	     pid = (unsigned long)trace_pid_next(pid_list, (void *)pid, &l))
457 		;
458 	return (void *)pid;
459 }
460 
461 /**
462  * trace_pid_show - show the current pid in seq_file processing
463  * @m: The seq_file structure to write into
464  * @v: A void pointer of the pid (+1) value to display
465  *
466  * Can be directly used by seq_file operations to display the current
467  * pid value.
468  */
469 int trace_pid_show(struct seq_file *m, void *v)
470 {
471 	unsigned long pid = (unsigned long)v - 1;
472 
473 	seq_printf(m, "%lu\n", pid);
474 	return 0;
475 }
476 
477 /* 128 should be much more than enough */
478 #define PID_BUF_SIZE		127
479 
480 int trace_pid_write(struct trace_pid_list *filtered_pids,
481 		    struct trace_pid_list **new_pid_list,
482 		    const char __user *ubuf, size_t cnt)
483 {
484 	struct trace_pid_list *pid_list;
485 	struct trace_parser parser;
486 	unsigned long val;
487 	int nr_pids = 0;
488 	ssize_t read = 0;
489 	ssize_t ret = 0;
490 	loff_t pos;
491 	pid_t pid;
492 
493 	if (trace_parser_get_init(&parser, PID_BUF_SIZE + 1))
494 		return -ENOMEM;
495 
496 	/*
497 	 * Always recreate a new array. The write is an all or nothing
498 	 * operation. Always create a new array when adding new pids by
499 	 * the user. If the operation fails, then the current list is
500 	 * not modified.
501 	 */
502 	pid_list = kmalloc(sizeof(*pid_list), GFP_KERNEL);
503 	if (!pid_list)
504 		return -ENOMEM;
505 
506 	pid_list->pid_max = READ_ONCE(pid_max);
507 
508 	/* Only truncating will shrink pid_max */
509 	if (filtered_pids && filtered_pids->pid_max > pid_list->pid_max)
510 		pid_list->pid_max = filtered_pids->pid_max;
511 
512 	pid_list->pids = vzalloc((pid_list->pid_max + 7) >> 3);
513 	if (!pid_list->pids) {
514 		kfree(pid_list);
515 		return -ENOMEM;
516 	}
517 
518 	if (filtered_pids) {
519 		/* copy the current bits to the new max */
520 		for_each_set_bit(pid, filtered_pids->pids,
521 				 filtered_pids->pid_max) {
522 			set_bit(pid, pid_list->pids);
523 			nr_pids++;
524 		}
525 	}
526 
527 	while (cnt > 0) {
528 
529 		pos = 0;
530 
531 		ret = trace_get_user(&parser, ubuf, cnt, &pos);
532 		if (ret < 0 || !trace_parser_loaded(&parser))
533 			break;
534 
535 		read += ret;
536 		ubuf += ret;
537 		cnt -= ret;
538 
539 		parser.buffer[parser.idx] = 0;
540 
541 		ret = -EINVAL;
542 		if (kstrtoul(parser.buffer, 0, &val))
543 			break;
544 		if (val >= pid_list->pid_max)
545 			break;
546 
547 		pid = (pid_t)val;
548 
549 		set_bit(pid, pid_list->pids);
550 		nr_pids++;
551 
552 		trace_parser_clear(&parser);
553 		ret = 0;
554 	}
555 	trace_parser_put(&parser);
556 
557 	if (ret < 0) {
558 		trace_free_pid_list(pid_list);
559 		return ret;
560 	}
561 
562 	if (!nr_pids) {
563 		/* Cleared the list of pids */
564 		trace_free_pid_list(pid_list);
565 		read = ret;
566 		pid_list = NULL;
567 	}
568 
569 	*new_pid_list = pid_list;
570 
571 	return read;
572 }
573 
574 static cycle_t buffer_ftrace_now(struct trace_buffer *buf, int cpu)
575 {
576 	u64 ts;
577 
578 	/* Early boot up does not have a buffer yet */
579 	if (!buf->buffer)
580 		return trace_clock_local();
581 
582 	ts = ring_buffer_time_stamp(buf->buffer, cpu);
583 	ring_buffer_normalize_time_stamp(buf->buffer, cpu, &ts);
584 
585 	return ts;
586 }
587 
588 cycle_t ftrace_now(int cpu)
589 {
590 	return buffer_ftrace_now(&global_trace.trace_buffer, cpu);
591 }
592 
593 /**
594  * tracing_is_enabled - Show if global_trace has been disabled
595  *
596  * Shows if the global trace has been enabled or not. It uses the
597  * mirror flag "buffer_disabled" to be used in fast paths such as for
598  * the irqsoff tracer. But it may be inaccurate due to races. If you
599  * need to know the accurate state, use tracing_is_on() which is a little
600  * slower, but accurate.
601  */
602 int tracing_is_enabled(void)
603 {
604 	/*
605 	 * For quick access (irqsoff uses this in fast path), just
606 	 * return the mirror variable of the state of the ring buffer.
607 	 * It's a little racy, but we don't really care.
608 	 */
609 	smp_rmb();
610 	return !global_trace.buffer_disabled;
611 }
612 
613 /*
614  * trace_buf_size is the size in bytes that is allocated
615  * for a buffer. Note, the number of bytes is always rounded
616  * to page size.
617  *
618  * This number is purposely set to a low number of 16384.
619  * If the dump on oops happens, it will be much appreciated
620  * to not have to wait for all that output. Anyway this can be
621  * boot time and run time configurable.
622  */
623 #define TRACE_BUF_SIZE_DEFAULT	1441792UL /* 16384 * 88 (sizeof(entry)) */
624 
625 static unsigned long		trace_buf_size = TRACE_BUF_SIZE_DEFAULT;
626 
627 /* trace_types holds a link list of available tracers. */
628 static struct tracer		*trace_types __read_mostly;
629 
630 /*
631  * trace_types_lock is used to protect the trace_types list.
632  */
633 DEFINE_MUTEX(trace_types_lock);
634 
635 /*
636  * serialize the access of the ring buffer
637  *
638  * ring buffer serializes readers, but it is low level protection.
639  * The validity of the events (which returns by ring_buffer_peek() ..etc)
640  * are not protected by ring buffer.
641  *
642  * The content of events may become garbage if we allow other process consumes
643  * these events concurrently:
644  *   A) the page of the consumed events may become a normal page
645  *      (not reader page) in ring buffer, and this page will be rewrited
646  *      by events producer.
647  *   B) The page of the consumed events may become a page for splice_read,
648  *      and this page will be returned to system.
649  *
650  * These primitives allow multi process access to different cpu ring buffer
651  * concurrently.
652  *
653  * These primitives don't distinguish read-only and read-consume access.
654  * Multi read-only access are also serialized.
655  */
656 
657 #ifdef CONFIG_SMP
658 static DECLARE_RWSEM(all_cpu_access_lock);
659 static DEFINE_PER_CPU(struct mutex, cpu_access_lock);
660 
661 static inline void trace_access_lock(int cpu)
662 {
663 	if (cpu == RING_BUFFER_ALL_CPUS) {
664 		/* gain it for accessing the whole ring buffer. */
665 		down_write(&all_cpu_access_lock);
666 	} else {
667 		/* gain it for accessing a cpu ring buffer. */
668 
669 		/* Firstly block other trace_access_lock(RING_BUFFER_ALL_CPUS). */
670 		down_read(&all_cpu_access_lock);
671 
672 		/* Secondly block other access to this @cpu ring buffer. */
673 		mutex_lock(&per_cpu(cpu_access_lock, cpu));
674 	}
675 }
676 
677 static inline void trace_access_unlock(int cpu)
678 {
679 	if (cpu == RING_BUFFER_ALL_CPUS) {
680 		up_write(&all_cpu_access_lock);
681 	} else {
682 		mutex_unlock(&per_cpu(cpu_access_lock, cpu));
683 		up_read(&all_cpu_access_lock);
684 	}
685 }
686 
687 static inline void trace_access_lock_init(void)
688 {
689 	int cpu;
690 
691 	for_each_possible_cpu(cpu)
692 		mutex_init(&per_cpu(cpu_access_lock, cpu));
693 }
694 
695 #else
696 
697 static DEFINE_MUTEX(access_lock);
698 
699 static inline void trace_access_lock(int cpu)
700 {
701 	(void)cpu;
702 	mutex_lock(&access_lock);
703 }
704 
705 static inline void trace_access_unlock(int cpu)
706 {
707 	(void)cpu;
708 	mutex_unlock(&access_lock);
709 }
710 
711 static inline void trace_access_lock_init(void)
712 {
713 }
714 
715 #endif
716 
717 #ifdef CONFIG_STACKTRACE
718 static void __ftrace_trace_stack(struct ring_buffer *buffer,
719 				 unsigned long flags,
720 				 int skip, int pc, struct pt_regs *regs);
721 static inline void ftrace_trace_stack(struct trace_array *tr,
722 				      struct ring_buffer *buffer,
723 				      unsigned long flags,
724 				      int skip, int pc, struct pt_regs *regs);
725 
726 #else
727 static inline void __ftrace_trace_stack(struct ring_buffer *buffer,
728 					unsigned long flags,
729 					int skip, int pc, struct pt_regs *regs)
730 {
731 }
732 static inline void ftrace_trace_stack(struct trace_array *tr,
733 				      struct ring_buffer *buffer,
734 				      unsigned long flags,
735 				      int skip, int pc, struct pt_regs *regs)
736 {
737 }
738 
739 #endif
740 
741 static void tracer_tracing_on(struct trace_array *tr)
742 {
743 	if (tr->trace_buffer.buffer)
744 		ring_buffer_record_on(tr->trace_buffer.buffer);
745 	/*
746 	 * This flag is looked at when buffers haven't been allocated
747 	 * yet, or by some tracers (like irqsoff), that just want to
748 	 * know if the ring buffer has been disabled, but it can handle
749 	 * races of where it gets disabled but we still do a record.
750 	 * As the check is in the fast path of the tracers, it is more
751 	 * important to be fast than accurate.
752 	 */
753 	tr->buffer_disabled = 0;
754 	/* Make the flag seen by readers */
755 	smp_wmb();
756 }
757 
758 /**
759  * tracing_on - enable tracing buffers
760  *
761  * This function enables tracing buffers that may have been
762  * disabled with tracing_off.
763  */
764 void tracing_on(void)
765 {
766 	tracer_tracing_on(&global_trace);
767 }
768 EXPORT_SYMBOL_GPL(tracing_on);
769 
770 /**
771  * __trace_puts - write a constant string into the trace buffer.
772  * @ip:	   The address of the caller
773  * @str:   The constant string to write
774  * @size:  The size of the string.
775  */
776 int __trace_puts(unsigned long ip, const char *str, int size)
777 {
778 	struct ring_buffer_event *event;
779 	struct ring_buffer *buffer;
780 	struct print_entry *entry;
781 	unsigned long irq_flags;
782 	int alloc;
783 	int pc;
784 
785 	if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
786 		return 0;
787 
788 	pc = preempt_count();
789 
790 	if (unlikely(tracing_selftest_running || tracing_disabled))
791 		return 0;
792 
793 	alloc = sizeof(*entry) + size + 2; /* possible \n added */
794 
795 	local_save_flags(irq_flags);
796 	buffer = global_trace.trace_buffer.buffer;
797 	event = trace_buffer_lock_reserve(buffer, TRACE_PRINT, alloc,
798 					  irq_flags, pc);
799 	if (!event)
800 		return 0;
801 
802 	entry = ring_buffer_event_data(event);
803 	entry->ip = ip;
804 
805 	memcpy(&entry->buf, str, size);
806 
807 	/* Add a newline if necessary */
808 	if (entry->buf[size - 1] != '\n') {
809 		entry->buf[size] = '\n';
810 		entry->buf[size + 1] = '\0';
811 	} else
812 		entry->buf[size] = '\0';
813 
814 	__buffer_unlock_commit(buffer, event);
815 	ftrace_trace_stack(&global_trace, buffer, irq_flags, 4, pc, NULL);
816 
817 	return size;
818 }
819 EXPORT_SYMBOL_GPL(__trace_puts);
820 
821 /**
822  * __trace_bputs - write the pointer to a constant string into trace buffer
823  * @ip:	   The address of the caller
824  * @str:   The constant string to write to the buffer to
825  */
826 int __trace_bputs(unsigned long ip, const char *str)
827 {
828 	struct ring_buffer_event *event;
829 	struct ring_buffer *buffer;
830 	struct bputs_entry *entry;
831 	unsigned long irq_flags;
832 	int size = sizeof(struct bputs_entry);
833 	int pc;
834 
835 	if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
836 		return 0;
837 
838 	pc = preempt_count();
839 
840 	if (unlikely(tracing_selftest_running || tracing_disabled))
841 		return 0;
842 
843 	local_save_flags(irq_flags);
844 	buffer = global_trace.trace_buffer.buffer;
845 	event = trace_buffer_lock_reserve(buffer, TRACE_BPUTS, size,
846 					  irq_flags, pc);
847 	if (!event)
848 		return 0;
849 
850 	entry = ring_buffer_event_data(event);
851 	entry->ip			= ip;
852 	entry->str			= str;
853 
854 	__buffer_unlock_commit(buffer, event);
855 	ftrace_trace_stack(&global_trace, buffer, irq_flags, 4, pc, NULL);
856 
857 	return 1;
858 }
859 EXPORT_SYMBOL_GPL(__trace_bputs);
860 
861 #ifdef CONFIG_TRACER_SNAPSHOT
862 /**
863  * trace_snapshot - take a snapshot of the current buffer.
864  *
865  * This causes a swap between the snapshot buffer and the current live
866  * tracing buffer. You can use this to take snapshots of the live
867  * trace when some condition is triggered, but continue to trace.
868  *
869  * Note, make sure to allocate the snapshot with either
870  * a tracing_snapshot_alloc(), or by doing it manually
871  * with: echo 1 > /sys/kernel/debug/tracing/snapshot
872  *
873  * If the snapshot buffer is not allocated, it will stop tracing.
874  * Basically making a permanent snapshot.
875  */
876 void tracing_snapshot(void)
877 {
878 	struct trace_array *tr = &global_trace;
879 	struct tracer *tracer = tr->current_trace;
880 	unsigned long flags;
881 
882 	if (in_nmi()) {
883 		internal_trace_puts("*** SNAPSHOT CALLED FROM NMI CONTEXT ***\n");
884 		internal_trace_puts("*** snapshot is being ignored        ***\n");
885 		return;
886 	}
887 
888 	if (!tr->allocated_snapshot) {
889 		internal_trace_puts("*** SNAPSHOT NOT ALLOCATED ***\n");
890 		internal_trace_puts("*** stopping trace here!   ***\n");
891 		tracing_off();
892 		return;
893 	}
894 
895 	/* Note, snapshot can not be used when the tracer uses it */
896 	if (tracer->use_max_tr) {
897 		internal_trace_puts("*** LATENCY TRACER ACTIVE ***\n");
898 		internal_trace_puts("*** Can not use snapshot (sorry) ***\n");
899 		return;
900 	}
901 
902 	local_irq_save(flags);
903 	update_max_tr(tr, current, smp_processor_id());
904 	local_irq_restore(flags);
905 }
906 EXPORT_SYMBOL_GPL(tracing_snapshot);
907 
908 static int resize_buffer_duplicate_size(struct trace_buffer *trace_buf,
909 					struct trace_buffer *size_buf, int cpu_id);
910 static void set_buffer_entries(struct trace_buffer *buf, unsigned long val);
911 
912 static int alloc_snapshot(struct trace_array *tr)
913 {
914 	int ret;
915 
916 	if (!tr->allocated_snapshot) {
917 
918 		/* allocate spare buffer */
919 		ret = resize_buffer_duplicate_size(&tr->max_buffer,
920 				   &tr->trace_buffer, RING_BUFFER_ALL_CPUS);
921 		if (ret < 0)
922 			return ret;
923 
924 		tr->allocated_snapshot = true;
925 	}
926 
927 	return 0;
928 }
929 
930 static void free_snapshot(struct trace_array *tr)
931 {
932 	/*
933 	 * We don't free the ring buffer. instead, resize it because
934 	 * The max_tr ring buffer has some state (e.g. ring->clock) and
935 	 * we want preserve it.
936 	 */
937 	ring_buffer_resize(tr->max_buffer.buffer, 1, RING_BUFFER_ALL_CPUS);
938 	set_buffer_entries(&tr->max_buffer, 1);
939 	tracing_reset_online_cpus(&tr->max_buffer);
940 	tr->allocated_snapshot = false;
941 }
942 
943 /**
944  * tracing_alloc_snapshot - allocate snapshot buffer.
945  *
946  * This only allocates the snapshot buffer if it isn't already
947  * allocated - it doesn't also take a snapshot.
948  *
949  * This is meant to be used in cases where the snapshot buffer needs
950  * to be set up for events that can't sleep but need to be able to
951  * trigger a snapshot.
952  */
953 int tracing_alloc_snapshot(void)
954 {
955 	struct trace_array *tr = &global_trace;
956 	int ret;
957 
958 	ret = alloc_snapshot(tr);
959 	WARN_ON(ret < 0);
960 
961 	return ret;
962 }
963 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
964 
965 /**
966  * trace_snapshot_alloc - allocate and take a snapshot of the current buffer.
967  *
968  * This is similar to trace_snapshot(), but it will allocate the
969  * snapshot buffer if it isn't already allocated. Use this only
970  * where it is safe to sleep, as the allocation may sleep.
971  *
972  * This causes a swap between the snapshot buffer and the current live
973  * tracing buffer. You can use this to take snapshots of the live
974  * trace when some condition is triggered, but continue to trace.
975  */
976 void tracing_snapshot_alloc(void)
977 {
978 	int ret;
979 
980 	ret = tracing_alloc_snapshot();
981 	if (ret < 0)
982 		return;
983 
984 	tracing_snapshot();
985 }
986 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
987 #else
988 void tracing_snapshot(void)
989 {
990 	WARN_ONCE(1, "Snapshot feature not enabled, but internal snapshot used");
991 }
992 EXPORT_SYMBOL_GPL(tracing_snapshot);
993 int tracing_alloc_snapshot(void)
994 {
995 	WARN_ONCE(1, "Snapshot feature not enabled, but snapshot allocation used");
996 	return -ENODEV;
997 }
998 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
999 void tracing_snapshot_alloc(void)
1000 {
1001 	/* Give warning */
1002 	tracing_snapshot();
1003 }
1004 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
1005 #endif /* CONFIG_TRACER_SNAPSHOT */
1006 
1007 static void tracer_tracing_off(struct trace_array *tr)
1008 {
1009 	if (tr->trace_buffer.buffer)
1010 		ring_buffer_record_off(tr->trace_buffer.buffer);
1011 	/*
1012 	 * This flag is looked at when buffers haven't been allocated
1013 	 * yet, or by some tracers (like irqsoff), that just want to
1014 	 * know if the ring buffer has been disabled, but it can handle
1015 	 * races of where it gets disabled but we still do a record.
1016 	 * As the check is in the fast path of the tracers, it is more
1017 	 * important to be fast than accurate.
1018 	 */
1019 	tr->buffer_disabled = 1;
1020 	/* Make the flag seen by readers */
1021 	smp_wmb();
1022 }
1023 
1024 /**
1025  * tracing_off - turn off tracing buffers
1026  *
1027  * This function stops the tracing buffers from recording data.
1028  * It does not disable any overhead the tracers themselves may
1029  * be causing. This function simply causes all recording to
1030  * the ring buffers to fail.
1031  */
1032 void tracing_off(void)
1033 {
1034 	tracer_tracing_off(&global_trace);
1035 }
1036 EXPORT_SYMBOL_GPL(tracing_off);
1037 
1038 void disable_trace_on_warning(void)
1039 {
1040 	if (__disable_trace_on_warning)
1041 		tracing_off();
1042 }
1043 
1044 /**
1045  * tracer_tracing_is_on - show real state of ring buffer enabled
1046  * @tr : the trace array to know if ring buffer is enabled
1047  *
1048  * Shows real state of the ring buffer if it is enabled or not.
1049  */
1050 static int tracer_tracing_is_on(struct trace_array *tr)
1051 {
1052 	if (tr->trace_buffer.buffer)
1053 		return ring_buffer_record_is_on(tr->trace_buffer.buffer);
1054 	return !tr->buffer_disabled;
1055 }
1056 
1057 /**
1058  * tracing_is_on - show state of ring buffers enabled
1059  */
1060 int tracing_is_on(void)
1061 {
1062 	return tracer_tracing_is_on(&global_trace);
1063 }
1064 EXPORT_SYMBOL_GPL(tracing_is_on);
1065 
1066 static int __init set_buf_size(char *str)
1067 {
1068 	unsigned long buf_size;
1069 
1070 	if (!str)
1071 		return 0;
1072 	buf_size = memparse(str, &str);
1073 	/* nr_entries can not be zero */
1074 	if (buf_size == 0)
1075 		return 0;
1076 	trace_buf_size = buf_size;
1077 	return 1;
1078 }
1079 __setup("trace_buf_size=", set_buf_size);
1080 
1081 static int __init set_tracing_thresh(char *str)
1082 {
1083 	unsigned long threshold;
1084 	int ret;
1085 
1086 	if (!str)
1087 		return 0;
1088 	ret = kstrtoul(str, 0, &threshold);
1089 	if (ret < 0)
1090 		return 0;
1091 	tracing_thresh = threshold * 1000;
1092 	return 1;
1093 }
1094 __setup("tracing_thresh=", set_tracing_thresh);
1095 
1096 unsigned long nsecs_to_usecs(unsigned long nsecs)
1097 {
1098 	return nsecs / 1000;
1099 }
1100 
1101 /*
1102  * TRACE_FLAGS is defined as a tuple matching bit masks with strings.
1103  * It uses C(a, b) where 'a' is the enum name and 'b' is the string that
1104  * matches it. By defining "C(a, b) b", TRACE_FLAGS becomes a list
1105  * of strings in the order that the enums were defined.
1106  */
1107 #undef C
1108 #define C(a, b) b
1109 
1110 /* These must match the bit postions in trace_iterator_flags */
1111 static const char *trace_options[] = {
1112 	TRACE_FLAGS
1113 	NULL
1114 };
1115 
1116 static struct {
1117 	u64 (*func)(void);
1118 	const char *name;
1119 	int in_ns;		/* is this clock in nanoseconds? */
1120 } trace_clocks[] = {
1121 	{ trace_clock_local,		"local",	1 },
1122 	{ trace_clock_global,		"global",	1 },
1123 	{ trace_clock_counter,		"counter",	0 },
1124 	{ trace_clock_jiffies,		"uptime",	0 },
1125 	{ trace_clock,			"perf",		1 },
1126 	{ ktime_get_mono_fast_ns,	"mono",		1 },
1127 	{ ktime_get_raw_fast_ns,	"mono_raw",	1 },
1128 	ARCH_TRACE_CLOCKS
1129 };
1130 
1131 /*
1132  * trace_parser_get_init - gets the buffer for trace parser
1133  */
1134 int trace_parser_get_init(struct trace_parser *parser, int size)
1135 {
1136 	memset(parser, 0, sizeof(*parser));
1137 
1138 	parser->buffer = kmalloc(size, GFP_KERNEL);
1139 	if (!parser->buffer)
1140 		return 1;
1141 
1142 	parser->size = size;
1143 	return 0;
1144 }
1145 
1146 /*
1147  * trace_parser_put - frees the buffer for trace parser
1148  */
1149 void trace_parser_put(struct trace_parser *parser)
1150 {
1151 	kfree(parser->buffer);
1152 }
1153 
1154 /*
1155  * trace_get_user - reads the user input string separated by  space
1156  * (matched by isspace(ch))
1157  *
1158  * For each string found the 'struct trace_parser' is updated,
1159  * and the function returns.
1160  *
1161  * Returns number of bytes read.
1162  *
1163  * See kernel/trace/trace.h for 'struct trace_parser' details.
1164  */
1165 int trace_get_user(struct trace_parser *parser, const char __user *ubuf,
1166 	size_t cnt, loff_t *ppos)
1167 {
1168 	char ch;
1169 	size_t read = 0;
1170 	ssize_t ret;
1171 
1172 	if (!*ppos)
1173 		trace_parser_clear(parser);
1174 
1175 	ret = get_user(ch, ubuf++);
1176 	if (ret)
1177 		goto out;
1178 
1179 	read++;
1180 	cnt--;
1181 
1182 	/*
1183 	 * The parser is not finished with the last write,
1184 	 * continue reading the user input without skipping spaces.
1185 	 */
1186 	if (!parser->cont) {
1187 		/* skip white space */
1188 		while (cnt && isspace(ch)) {
1189 			ret = get_user(ch, ubuf++);
1190 			if (ret)
1191 				goto out;
1192 			read++;
1193 			cnt--;
1194 		}
1195 
1196 		/* only spaces were written */
1197 		if (isspace(ch)) {
1198 			*ppos += read;
1199 			ret = read;
1200 			goto out;
1201 		}
1202 
1203 		parser->idx = 0;
1204 	}
1205 
1206 	/* read the non-space input */
1207 	while (cnt && !isspace(ch)) {
1208 		if (parser->idx < parser->size - 1)
1209 			parser->buffer[parser->idx++] = ch;
1210 		else {
1211 			ret = -EINVAL;
1212 			goto out;
1213 		}
1214 		ret = get_user(ch, ubuf++);
1215 		if (ret)
1216 			goto out;
1217 		read++;
1218 		cnt--;
1219 	}
1220 
1221 	/* We either got finished input or we have to wait for another call. */
1222 	if (isspace(ch)) {
1223 		parser->buffer[parser->idx] = 0;
1224 		parser->cont = false;
1225 	} else if (parser->idx < parser->size - 1) {
1226 		parser->cont = true;
1227 		parser->buffer[parser->idx++] = ch;
1228 	} else {
1229 		ret = -EINVAL;
1230 		goto out;
1231 	}
1232 
1233 	*ppos += read;
1234 	ret = read;
1235 
1236 out:
1237 	return ret;
1238 }
1239 
1240 /* TODO add a seq_buf_to_buffer() */
1241 static ssize_t trace_seq_to_buffer(struct trace_seq *s, void *buf, size_t cnt)
1242 {
1243 	int len;
1244 
1245 	if (trace_seq_used(s) <= s->seq.readpos)
1246 		return -EBUSY;
1247 
1248 	len = trace_seq_used(s) - s->seq.readpos;
1249 	if (cnt > len)
1250 		cnt = len;
1251 	memcpy(buf, s->buffer + s->seq.readpos, cnt);
1252 
1253 	s->seq.readpos += cnt;
1254 	return cnt;
1255 }
1256 
1257 unsigned long __read_mostly	tracing_thresh;
1258 
1259 #ifdef CONFIG_TRACER_MAX_TRACE
1260 /*
1261  * Copy the new maximum trace into the separate maximum-trace
1262  * structure. (this way the maximum trace is permanently saved,
1263  * for later retrieval via /sys/kernel/debug/tracing/latency_trace)
1264  */
1265 static void
1266 __update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
1267 {
1268 	struct trace_buffer *trace_buf = &tr->trace_buffer;
1269 	struct trace_buffer *max_buf = &tr->max_buffer;
1270 	struct trace_array_cpu *data = per_cpu_ptr(trace_buf->data, cpu);
1271 	struct trace_array_cpu *max_data = per_cpu_ptr(max_buf->data, cpu);
1272 
1273 	max_buf->cpu = cpu;
1274 	max_buf->time_start = data->preempt_timestamp;
1275 
1276 	max_data->saved_latency = tr->max_latency;
1277 	max_data->critical_start = data->critical_start;
1278 	max_data->critical_end = data->critical_end;
1279 
1280 	memcpy(max_data->comm, tsk->comm, TASK_COMM_LEN);
1281 	max_data->pid = tsk->pid;
1282 	/*
1283 	 * If tsk == current, then use current_uid(), as that does not use
1284 	 * RCU. The irq tracer can be called out of RCU scope.
1285 	 */
1286 	if (tsk == current)
1287 		max_data->uid = current_uid();
1288 	else
1289 		max_data->uid = task_uid(tsk);
1290 
1291 	max_data->nice = tsk->static_prio - 20 - MAX_RT_PRIO;
1292 	max_data->policy = tsk->policy;
1293 	max_data->rt_priority = tsk->rt_priority;
1294 
1295 	/* record this tasks comm */
1296 	tracing_record_cmdline(tsk);
1297 }
1298 
1299 /**
1300  * update_max_tr - snapshot all trace buffers from global_trace to max_tr
1301  * @tr: tracer
1302  * @tsk: the task with the latency
1303  * @cpu: The cpu that initiated the trace.
1304  *
1305  * Flip the buffers between the @tr and the max_tr and record information
1306  * about which task was the cause of this latency.
1307  */
1308 void
1309 update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
1310 {
1311 	struct ring_buffer *buf;
1312 
1313 	if (tr->stop_count)
1314 		return;
1315 
1316 	WARN_ON_ONCE(!irqs_disabled());
1317 
1318 	if (!tr->allocated_snapshot) {
1319 		/* Only the nop tracer should hit this when disabling */
1320 		WARN_ON_ONCE(tr->current_trace != &nop_trace);
1321 		return;
1322 	}
1323 
1324 	arch_spin_lock(&tr->max_lock);
1325 
1326 	buf = tr->trace_buffer.buffer;
1327 	tr->trace_buffer.buffer = tr->max_buffer.buffer;
1328 	tr->max_buffer.buffer = buf;
1329 
1330 	__update_max_tr(tr, tsk, cpu);
1331 	arch_spin_unlock(&tr->max_lock);
1332 }
1333 
1334 /**
1335  * update_max_tr_single - only copy one trace over, and reset the rest
1336  * @tr - tracer
1337  * @tsk - task with the latency
1338  * @cpu - the cpu of the buffer to copy.
1339  *
1340  * Flip the trace of a single CPU buffer between the @tr and the max_tr.
1341  */
1342 void
1343 update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu)
1344 {
1345 	int ret;
1346 
1347 	if (tr->stop_count)
1348 		return;
1349 
1350 	WARN_ON_ONCE(!irqs_disabled());
1351 	if (!tr->allocated_snapshot) {
1352 		/* Only the nop tracer should hit this when disabling */
1353 		WARN_ON_ONCE(tr->current_trace != &nop_trace);
1354 		return;
1355 	}
1356 
1357 	arch_spin_lock(&tr->max_lock);
1358 
1359 	ret = ring_buffer_swap_cpu(tr->max_buffer.buffer, tr->trace_buffer.buffer, cpu);
1360 
1361 	if (ret == -EBUSY) {
1362 		/*
1363 		 * We failed to swap the buffer due to a commit taking
1364 		 * place on this CPU. We fail to record, but we reset
1365 		 * the max trace buffer (no one writes directly to it)
1366 		 * and flag that it failed.
1367 		 */
1368 		trace_array_printk_buf(tr->max_buffer.buffer, _THIS_IP_,
1369 			"Failed to swap buffers due to commit in progress\n");
1370 	}
1371 
1372 	WARN_ON_ONCE(ret && ret != -EAGAIN && ret != -EBUSY);
1373 
1374 	__update_max_tr(tr, tsk, cpu);
1375 	arch_spin_unlock(&tr->max_lock);
1376 }
1377 #endif /* CONFIG_TRACER_MAX_TRACE */
1378 
1379 static int wait_on_pipe(struct trace_iterator *iter, bool full)
1380 {
1381 	/* Iterators are static, they should be filled or empty */
1382 	if (trace_buffer_iter(iter, iter->cpu_file))
1383 		return 0;
1384 
1385 	return ring_buffer_wait(iter->trace_buffer->buffer, iter->cpu_file,
1386 				full);
1387 }
1388 
1389 #ifdef CONFIG_FTRACE_STARTUP_TEST
1390 static int run_tracer_selftest(struct tracer *type)
1391 {
1392 	struct trace_array *tr = &global_trace;
1393 	struct tracer *saved_tracer = tr->current_trace;
1394 	int ret;
1395 
1396 	if (!type->selftest || tracing_selftest_disabled)
1397 		return 0;
1398 
1399 	/*
1400 	 * Run a selftest on this tracer.
1401 	 * Here we reset the trace buffer, and set the current
1402 	 * tracer to be this tracer. The tracer can then run some
1403 	 * internal tracing to verify that everything is in order.
1404 	 * If we fail, we do not register this tracer.
1405 	 */
1406 	tracing_reset_online_cpus(&tr->trace_buffer);
1407 
1408 	tr->current_trace = type;
1409 
1410 #ifdef CONFIG_TRACER_MAX_TRACE
1411 	if (type->use_max_tr) {
1412 		/* If we expanded the buffers, make sure the max is expanded too */
1413 		if (ring_buffer_expanded)
1414 			ring_buffer_resize(tr->max_buffer.buffer, trace_buf_size,
1415 					   RING_BUFFER_ALL_CPUS);
1416 		tr->allocated_snapshot = true;
1417 	}
1418 #endif
1419 
1420 	/* the test is responsible for initializing and enabling */
1421 	pr_info("Testing tracer %s: ", type->name);
1422 	ret = type->selftest(type, tr);
1423 	/* the test is responsible for resetting too */
1424 	tr->current_trace = saved_tracer;
1425 	if (ret) {
1426 		printk(KERN_CONT "FAILED!\n");
1427 		/* Add the warning after printing 'FAILED' */
1428 		WARN_ON(1);
1429 		return -1;
1430 	}
1431 	/* Only reset on passing, to avoid touching corrupted buffers */
1432 	tracing_reset_online_cpus(&tr->trace_buffer);
1433 
1434 #ifdef CONFIG_TRACER_MAX_TRACE
1435 	if (type->use_max_tr) {
1436 		tr->allocated_snapshot = false;
1437 
1438 		/* Shrink the max buffer again */
1439 		if (ring_buffer_expanded)
1440 			ring_buffer_resize(tr->max_buffer.buffer, 1,
1441 					   RING_BUFFER_ALL_CPUS);
1442 	}
1443 #endif
1444 
1445 	printk(KERN_CONT "PASSED\n");
1446 	return 0;
1447 }
1448 #else
1449 static inline int run_tracer_selftest(struct tracer *type)
1450 {
1451 	return 0;
1452 }
1453 #endif /* CONFIG_FTRACE_STARTUP_TEST */
1454 
1455 static void add_tracer_options(struct trace_array *tr, struct tracer *t);
1456 
1457 static void __init apply_trace_boot_options(void);
1458 
1459 /**
1460  * register_tracer - register a tracer with the ftrace system.
1461  * @type - the plugin for the tracer
1462  *
1463  * Register a new plugin tracer.
1464  */
1465 int __init register_tracer(struct tracer *type)
1466 {
1467 	struct tracer *t;
1468 	int ret = 0;
1469 
1470 	if (!type->name) {
1471 		pr_info("Tracer must have a name\n");
1472 		return -1;
1473 	}
1474 
1475 	if (strlen(type->name) >= MAX_TRACER_SIZE) {
1476 		pr_info("Tracer has a name longer than %d\n", MAX_TRACER_SIZE);
1477 		return -1;
1478 	}
1479 
1480 	mutex_lock(&trace_types_lock);
1481 
1482 	tracing_selftest_running = true;
1483 
1484 	for (t = trace_types; t; t = t->next) {
1485 		if (strcmp(type->name, t->name) == 0) {
1486 			/* already found */
1487 			pr_info("Tracer %s already registered\n",
1488 				type->name);
1489 			ret = -1;
1490 			goto out;
1491 		}
1492 	}
1493 
1494 	if (!type->set_flag)
1495 		type->set_flag = &dummy_set_flag;
1496 	if (!type->flags) {
1497 		/*allocate a dummy tracer_flags*/
1498 		type->flags = kmalloc(sizeof(*type->flags), GFP_KERNEL);
1499 		if (!type->flags) {
1500 			ret = -ENOMEM;
1501 			goto out;
1502 		}
1503 		type->flags->val = 0;
1504 		type->flags->opts = dummy_tracer_opt;
1505 	} else
1506 		if (!type->flags->opts)
1507 			type->flags->opts = dummy_tracer_opt;
1508 
1509 	/* store the tracer for __set_tracer_option */
1510 	type->flags->trace = type;
1511 
1512 	ret = run_tracer_selftest(type);
1513 	if (ret < 0)
1514 		goto out;
1515 
1516 	type->next = trace_types;
1517 	trace_types = type;
1518 	add_tracer_options(&global_trace, type);
1519 
1520  out:
1521 	tracing_selftest_running = false;
1522 	mutex_unlock(&trace_types_lock);
1523 
1524 	if (ret || !default_bootup_tracer)
1525 		goto out_unlock;
1526 
1527 	if (strncmp(default_bootup_tracer, type->name, MAX_TRACER_SIZE))
1528 		goto out_unlock;
1529 
1530 	printk(KERN_INFO "Starting tracer '%s'\n", type->name);
1531 	/* Do we want this tracer to start on bootup? */
1532 	tracing_set_tracer(&global_trace, type->name);
1533 	default_bootup_tracer = NULL;
1534 
1535 	apply_trace_boot_options();
1536 
1537 	/* disable other selftests, since this will break it. */
1538 	tracing_selftest_disabled = true;
1539 #ifdef CONFIG_FTRACE_STARTUP_TEST
1540 	printk(KERN_INFO "Disabling FTRACE selftests due to running tracer '%s'\n",
1541 	       type->name);
1542 #endif
1543 
1544  out_unlock:
1545 	return ret;
1546 }
1547 
1548 void tracing_reset(struct trace_buffer *buf, int cpu)
1549 {
1550 	struct ring_buffer *buffer = buf->buffer;
1551 
1552 	if (!buffer)
1553 		return;
1554 
1555 	ring_buffer_record_disable(buffer);
1556 
1557 	/* Make sure all commits have finished */
1558 	synchronize_sched();
1559 	ring_buffer_reset_cpu(buffer, cpu);
1560 
1561 	ring_buffer_record_enable(buffer);
1562 }
1563 
1564 void tracing_reset_online_cpus(struct trace_buffer *buf)
1565 {
1566 	struct ring_buffer *buffer = buf->buffer;
1567 	int cpu;
1568 
1569 	if (!buffer)
1570 		return;
1571 
1572 	ring_buffer_record_disable(buffer);
1573 
1574 	/* Make sure all commits have finished */
1575 	synchronize_sched();
1576 
1577 	buf->time_start = buffer_ftrace_now(buf, buf->cpu);
1578 
1579 	for_each_online_cpu(cpu)
1580 		ring_buffer_reset_cpu(buffer, cpu);
1581 
1582 	ring_buffer_record_enable(buffer);
1583 }
1584 
1585 /* Must have trace_types_lock held */
1586 void tracing_reset_all_online_cpus(void)
1587 {
1588 	struct trace_array *tr;
1589 
1590 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
1591 		tracing_reset_online_cpus(&tr->trace_buffer);
1592 #ifdef CONFIG_TRACER_MAX_TRACE
1593 		tracing_reset_online_cpus(&tr->max_buffer);
1594 #endif
1595 	}
1596 }
1597 
1598 #define SAVED_CMDLINES_DEFAULT 128
1599 #define NO_CMDLINE_MAP UINT_MAX
1600 static arch_spinlock_t trace_cmdline_lock = __ARCH_SPIN_LOCK_UNLOCKED;
1601 struct saved_cmdlines_buffer {
1602 	unsigned map_pid_to_cmdline[PID_MAX_DEFAULT+1];
1603 	unsigned *map_cmdline_to_pid;
1604 	unsigned cmdline_num;
1605 	int cmdline_idx;
1606 	char *saved_cmdlines;
1607 };
1608 static struct saved_cmdlines_buffer *savedcmd;
1609 
1610 /* temporary disable recording */
1611 static atomic_t trace_record_cmdline_disabled __read_mostly;
1612 
1613 static inline char *get_saved_cmdlines(int idx)
1614 {
1615 	return &savedcmd->saved_cmdlines[idx * TASK_COMM_LEN];
1616 }
1617 
1618 static inline void set_cmdline(int idx, const char *cmdline)
1619 {
1620 	memcpy(get_saved_cmdlines(idx), cmdline, TASK_COMM_LEN);
1621 }
1622 
1623 static int allocate_cmdlines_buffer(unsigned int val,
1624 				    struct saved_cmdlines_buffer *s)
1625 {
1626 	s->map_cmdline_to_pid = kmalloc(val * sizeof(*s->map_cmdline_to_pid),
1627 					GFP_KERNEL);
1628 	if (!s->map_cmdline_to_pid)
1629 		return -ENOMEM;
1630 
1631 	s->saved_cmdlines = kmalloc(val * TASK_COMM_LEN, GFP_KERNEL);
1632 	if (!s->saved_cmdlines) {
1633 		kfree(s->map_cmdline_to_pid);
1634 		return -ENOMEM;
1635 	}
1636 
1637 	s->cmdline_idx = 0;
1638 	s->cmdline_num = val;
1639 	memset(&s->map_pid_to_cmdline, NO_CMDLINE_MAP,
1640 	       sizeof(s->map_pid_to_cmdline));
1641 	memset(s->map_cmdline_to_pid, NO_CMDLINE_MAP,
1642 	       val * sizeof(*s->map_cmdline_to_pid));
1643 
1644 	return 0;
1645 }
1646 
1647 static int trace_create_savedcmd(void)
1648 {
1649 	int ret;
1650 
1651 	savedcmd = kmalloc(sizeof(*savedcmd), GFP_KERNEL);
1652 	if (!savedcmd)
1653 		return -ENOMEM;
1654 
1655 	ret = allocate_cmdlines_buffer(SAVED_CMDLINES_DEFAULT, savedcmd);
1656 	if (ret < 0) {
1657 		kfree(savedcmd);
1658 		savedcmd = NULL;
1659 		return -ENOMEM;
1660 	}
1661 
1662 	return 0;
1663 }
1664 
1665 int is_tracing_stopped(void)
1666 {
1667 	return global_trace.stop_count;
1668 }
1669 
1670 /**
1671  * tracing_start - quick start of the tracer
1672  *
1673  * If tracing is enabled but was stopped by tracing_stop,
1674  * this will start the tracer back up.
1675  */
1676 void tracing_start(void)
1677 {
1678 	struct ring_buffer *buffer;
1679 	unsigned long flags;
1680 
1681 	if (tracing_disabled)
1682 		return;
1683 
1684 	raw_spin_lock_irqsave(&global_trace.start_lock, flags);
1685 	if (--global_trace.stop_count) {
1686 		if (global_trace.stop_count < 0) {
1687 			/* Someone screwed up their debugging */
1688 			WARN_ON_ONCE(1);
1689 			global_trace.stop_count = 0;
1690 		}
1691 		goto out;
1692 	}
1693 
1694 	/* Prevent the buffers from switching */
1695 	arch_spin_lock(&global_trace.max_lock);
1696 
1697 	buffer = global_trace.trace_buffer.buffer;
1698 	if (buffer)
1699 		ring_buffer_record_enable(buffer);
1700 
1701 #ifdef CONFIG_TRACER_MAX_TRACE
1702 	buffer = global_trace.max_buffer.buffer;
1703 	if (buffer)
1704 		ring_buffer_record_enable(buffer);
1705 #endif
1706 
1707 	arch_spin_unlock(&global_trace.max_lock);
1708 
1709  out:
1710 	raw_spin_unlock_irqrestore(&global_trace.start_lock, flags);
1711 }
1712 
1713 static void tracing_start_tr(struct trace_array *tr)
1714 {
1715 	struct ring_buffer *buffer;
1716 	unsigned long flags;
1717 
1718 	if (tracing_disabled)
1719 		return;
1720 
1721 	/* If global, we need to also start the max tracer */
1722 	if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
1723 		return tracing_start();
1724 
1725 	raw_spin_lock_irqsave(&tr->start_lock, flags);
1726 
1727 	if (--tr->stop_count) {
1728 		if (tr->stop_count < 0) {
1729 			/* Someone screwed up their debugging */
1730 			WARN_ON_ONCE(1);
1731 			tr->stop_count = 0;
1732 		}
1733 		goto out;
1734 	}
1735 
1736 	buffer = tr->trace_buffer.buffer;
1737 	if (buffer)
1738 		ring_buffer_record_enable(buffer);
1739 
1740  out:
1741 	raw_spin_unlock_irqrestore(&tr->start_lock, flags);
1742 }
1743 
1744 /**
1745  * tracing_stop - quick stop of the tracer
1746  *
1747  * Light weight way to stop tracing. Use in conjunction with
1748  * tracing_start.
1749  */
1750 void tracing_stop(void)
1751 {
1752 	struct ring_buffer *buffer;
1753 	unsigned long flags;
1754 
1755 	raw_spin_lock_irqsave(&global_trace.start_lock, flags);
1756 	if (global_trace.stop_count++)
1757 		goto out;
1758 
1759 	/* Prevent the buffers from switching */
1760 	arch_spin_lock(&global_trace.max_lock);
1761 
1762 	buffer = global_trace.trace_buffer.buffer;
1763 	if (buffer)
1764 		ring_buffer_record_disable(buffer);
1765 
1766 #ifdef CONFIG_TRACER_MAX_TRACE
1767 	buffer = global_trace.max_buffer.buffer;
1768 	if (buffer)
1769 		ring_buffer_record_disable(buffer);
1770 #endif
1771 
1772 	arch_spin_unlock(&global_trace.max_lock);
1773 
1774  out:
1775 	raw_spin_unlock_irqrestore(&global_trace.start_lock, flags);
1776 }
1777 
1778 static void tracing_stop_tr(struct trace_array *tr)
1779 {
1780 	struct ring_buffer *buffer;
1781 	unsigned long flags;
1782 
1783 	/* If global, we need to also stop the max tracer */
1784 	if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
1785 		return tracing_stop();
1786 
1787 	raw_spin_lock_irqsave(&tr->start_lock, flags);
1788 	if (tr->stop_count++)
1789 		goto out;
1790 
1791 	buffer = tr->trace_buffer.buffer;
1792 	if (buffer)
1793 		ring_buffer_record_disable(buffer);
1794 
1795  out:
1796 	raw_spin_unlock_irqrestore(&tr->start_lock, flags);
1797 }
1798 
1799 void trace_stop_cmdline_recording(void);
1800 
1801 static int trace_save_cmdline(struct task_struct *tsk)
1802 {
1803 	unsigned pid, idx;
1804 
1805 	if (!tsk->pid || unlikely(tsk->pid > PID_MAX_DEFAULT))
1806 		return 0;
1807 
1808 	/*
1809 	 * It's not the end of the world if we don't get
1810 	 * the lock, but we also don't want to spin
1811 	 * nor do we want to disable interrupts,
1812 	 * so if we miss here, then better luck next time.
1813 	 */
1814 	if (!arch_spin_trylock(&trace_cmdline_lock))
1815 		return 0;
1816 
1817 	idx = savedcmd->map_pid_to_cmdline[tsk->pid];
1818 	if (idx == NO_CMDLINE_MAP) {
1819 		idx = (savedcmd->cmdline_idx + 1) % savedcmd->cmdline_num;
1820 
1821 		/*
1822 		 * Check whether the cmdline buffer at idx has a pid
1823 		 * mapped. We are going to overwrite that entry so we
1824 		 * need to clear the map_pid_to_cmdline. Otherwise we
1825 		 * would read the new comm for the old pid.
1826 		 */
1827 		pid = savedcmd->map_cmdline_to_pid[idx];
1828 		if (pid != NO_CMDLINE_MAP)
1829 			savedcmd->map_pid_to_cmdline[pid] = NO_CMDLINE_MAP;
1830 
1831 		savedcmd->map_cmdline_to_pid[idx] = tsk->pid;
1832 		savedcmd->map_pid_to_cmdline[tsk->pid] = idx;
1833 
1834 		savedcmd->cmdline_idx = idx;
1835 	}
1836 
1837 	set_cmdline(idx, tsk->comm);
1838 
1839 	arch_spin_unlock(&trace_cmdline_lock);
1840 
1841 	return 1;
1842 }
1843 
1844 static void __trace_find_cmdline(int pid, char comm[])
1845 {
1846 	unsigned map;
1847 
1848 	if (!pid) {
1849 		strcpy(comm, "<idle>");
1850 		return;
1851 	}
1852 
1853 	if (WARN_ON_ONCE(pid < 0)) {
1854 		strcpy(comm, "<XXX>");
1855 		return;
1856 	}
1857 
1858 	if (pid > PID_MAX_DEFAULT) {
1859 		strcpy(comm, "<...>");
1860 		return;
1861 	}
1862 
1863 	map = savedcmd->map_pid_to_cmdline[pid];
1864 	if (map != NO_CMDLINE_MAP)
1865 		strcpy(comm, get_saved_cmdlines(map));
1866 	else
1867 		strcpy(comm, "<...>");
1868 }
1869 
1870 void trace_find_cmdline(int pid, char comm[])
1871 {
1872 	preempt_disable();
1873 	arch_spin_lock(&trace_cmdline_lock);
1874 
1875 	__trace_find_cmdline(pid, comm);
1876 
1877 	arch_spin_unlock(&trace_cmdline_lock);
1878 	preempt_enable();
1879 }
1880 
1881 void tracing_record_cmdline(struct task_struct *tsk)
1882 {
1883 	if (atomic_read(&trace_record_cmdline_disabled) || !tracing_is_on())
1884 		return;
1885 
1886 	if (!__this_cpu_read(trace_cmdline_save))
1887 		return;
1888 
1889 	if (trace_save_cmdline(tsk))
1890 		__this_cpu_write(trace_cmdline_save, false);
1891 }
1892 
1893 void
1894 tracing_generic_entry_update(struct trace_entry *entry, unsigned long flags,
1895 			     int pc)
1896 {
1897 	struct task_struct *tsk = current;
1898 
1899 	entry->preempt_count		= pc & 0xff;
1900 	entry->pid			= (tsk) ? tsk->pid : 0;
1901 	entry->flags =
1902 #ifdef CONFIG_TRACE_IRQFLAGS_SUPPORT
1903 		(irqs_disabled_flags(flags) ? TRACE_FLAG_IRQS_OFF : 0) |
1904 #else
1905 		TRACE_FLAG_IRQS_NOSUPPORT |
1906 #endif
1907 		((pc & NMI_MASK    ) ? TRACE_FLAG_NMI     : 0) |
1908 		((pc & HARDIRQ_MASK) ? TRACE_FLAG_HARDIRQ : 0) |
1909 		((pc & SOFTIRQ_MASK) ? TRACE_FLAG_SOFTIRQ : 0) |
1910 		(tif_need_resched() ? TRACE_FLAG_NEED_RESCHED : 0) |
1911 		(test_preempt_need_resched() ? TRACE_FLAG_PREEMPT_RESCHED : 0);
1912 }
1913 EXPORT_SYMBOL_GPL(tracing_generic_entry_update);
1914 
1915 static __always_inline void
1916 trace_event_setup(struct ring_buffer_event *event,
1917 		  int type, unsigned long flags, int pc)
1918 {
1919 	struct trace_entry *ent = ring_buffer_event_data(event);
1920 
1921 	tracing_generic_entry_update(ent, flags, pc);
1922 	ent->type = type;
1923 }
1924 
1925 struct ring_buffer_event *
1926 trace_buffer_lock_reserve(struct ring_buffer *buffer,
1927 			  int type,
1928 			  unsigned long len,
1929 			  unsigned long flags, int pc)
1930 {
1931 	struct ring_buffer_event *event;
1932 
1933 	event = ring_buffer_lock_reserve(buffer, len);
1934 	if (event != NULL)
1935 		trace_event_setup(event, type, flags, pc);
1936 
1937 	return event;
1938 }
1939 
1940 DEFINE_PER_CPU(struct ring_buffer_event *, trace_buffered_event);
1941 DEFINE_PER_CPU(int, trace_buffered_event_cnt);
1942 static int trace_buffered_event_ref;
1943 
1944 /**
1945  * trace_buffered_event_enable - enable buffering events
1946  *
1947  * When events are being filtered, it is quicker to use a temporary
1948  * buffer to write the event data into if there's a likely chance
1949  * that it will not be committed. The discard of the ring buffer
1950  * is not as fast as committing, and is much slower than copying
1951  * a commit.
1952  *
1953  * When an event is to be filtered, allocate per cpu buffers to
1954  * write the event data into, and if the event is filtered and discarded
1955  * it is simply dropped, otherwise, the entire data is to be committed
1956  * in one shot.
1957  */
1958 void trace_buffered_event_enable(void)
1959 {
1960 	struct ring_buffer_event *event;
1961 	struct page *page;
1962 	int cpu;
1963 
1964 	WARN_ON_ONCE(!mutex_is_locked(&event_mutex));
1965 
1966 	if (trace_buffered_event_ref++)
1967 		return;
1968 
1969 	for_each_tracing_cpu(cpu) {
1970 		page = alloc_pages_node(cpu_to_node(cpu),
1971 					GFP_KERNEL | __GFP_NORETRY, 0);
1972 		if (!page)
1973 			goto failed;
1974 
1975 		event = page_address(page);
1976 		memset(event, 0, sizeof(*event));
1977 
1978 		per_cpu(trace_buffered_event, cpu) = event;
1979 
1980 		preempt_disable();
1981 		if (cpu == smp_processor_id() &&
1982 		    this_cpu_read(trace_buffered_event) !=
1983 		    per_cpu(trace_buffered_event, cpu))
1984 			WARN_ON_ONCE(1);
1985 		preempt_enable();
1986 	}
1987 
1988 	return;
1989  failed:
1990 	trace_buffered_event_disable();
1991 }
1992 
1993 static void enable_trace_buffered_event(void *data)
1994 {
1995 	/* Probably not needed, but do it anyway */
1996 	smp_rmb();
1997 	this_cpu_dec(trace_buffered_event_cnt);
1998 }
1999 
2000 static void disable_trace_buffered_event(void *data)
2001 {
2002 	this_cpu_inc(trace_buffered_event_cnt);
2003 }
2004 
2005 /**
2006  * trace_buffered_event_disable - disable buffering events
2007  *
2008  * When a filter is removed, it is faster to not use the buffered
2009  * events, and to commit directly into the ring buffer. Free up
2010  * the temp buffers when there are no more users. This requires
2011  * special synchronization with current events.
2012  */
2013 void trace_buffered_event_disable(void)
2014 {
2015 	int cpu;
2016 
2017 	WARN_ON_ONCE(!mutex_is_locked(&event_mutex));
2018 
2019 	if (WARN_ON_ONCE(!trace_buffered_event_ref))
2020 		return;
2021 
2022 	if (--trace_buffered_event_ref)
2023 		return;
2024 
2025 	preempt_disable();
2026 	/* For each CPU, set the buffer as used. */
2027 	smp_call_function_many(tracing_buffer_mask,
2028 			       disable_trace_buffered_event, NULL, 1);
2029 	preempt_enable();
2030 
2031 	/* Wait for all current users to finish */
2032 	synchronize_sched();
2033 
2034 	for_each_tracing_cpu(cpu) {
2035 		free_page((unsigned long)per_cpu(trace_buffered_event, cpu));
2036 		per_cpu(trace_buffered_event, cpu) = NULL;
2037 	}
2038 	/*
2039 	 * Make sure trace_buffered_event is NULL before clearing
2040 	 * trace_buffered_event_cnt.
2041 	 */
2042 	smp_wmb();
2043 
2044 	preempt_disable();
2045 	/* Do the work on each cpu */
2046 	smp_call_function_many(tracing_buffer_mask,
2047 			       enable_trace_buffered_event, NULL, 1);
2048 	preempt_enable();
2049 }
2050 
2051 void
2052 __buffer_unlock_commit(struct ring_buffer *buffer, struct ring_buffer_event *event)
2053 {
2054 	__this_cpu_write(trace_cmdline_save, true);
2055 
2056 	/* If this is the temp buffer, we need to commit fully */
2057 	if (this_cpu_read(trace_buffered_event) == event) {
2058 		/* Length is in event->array[0] */
2059 		ring_buffer_write(buffer, event->array[0], &event->array[1]);
2060 		/* Release the temp buffer */
2061 		this_cpu_dec(trace_buffered_event_cnt);
2062 	} else
2063 		ring_buffer_unlock_commit(buffer, event);
2064 }
2065 
2066 static struct ring_buffer *temp_buffer;
2067 
2068 struct ring_buffer_event *
2069 trace_event_buffer_lock_reserve(struct ring_buffer **current_rb,
2070 			  struct trace_event_file *trace_file,
2071 			  int type, unsigned long len,
2072 			  unsigned long flags, int pc)
2073 {
2074 	struct ring_buffer_event *entry;
2075 	int val;
2076 
2077 	*current_rb = trace_file->tr->trace_buffer.buffer;
2078 
2079 	if ((trace_file->flags &
2080 	     (EVENT_FILE_FL_SOFT_DISABLED | EVENT_FILE_FL_FILTERED)) &&
2081 	    (entry = this_cpu_read(trace_buffered_event))) {
2082 		/* Try to use the per cpu buffer first */
2083 		val = this_cpu_inc_return(trace_buffered_event_cnt);
2084 		if (val == 1) {
2085 			trace_event_setup(entry, type, flags, pc);
2086 			entry->array[0] = len;
2087 			return entry;
2088 		}
2089 		this_cpu_dec(trace_buffered_event_cnt);
2090 	}
2091 
2092 	entry = trace_buffer_lock_reserve(*current_rb,
2093 					 type, len, flags, pc);
2094 	/*
2095 	 * If tracing is off, but we have triggers enabled
2096 	 * we still need to look at the event data. Use the temp_buffer
2097 	 * to store the trace event for the tigger to use. It's recusive
2098 	 * safe and will not be recorded anywhere.
2099 	 */
2100 	if (!entry && trace_file->flags & EVENT_FILE_FL_TRIGGER_COND) {
2101 		*current_rb = temp_buffer;
2102 		entry = trace_buffer_lock_reserve(*current_rb,
2103 						  type, len, flags, pc);
2104 	}
2105 	return entry;
2106 }
2107 EXPORT_SYMBOL_GPL(trace_event_buffer_lock_reserve);
2108 
2109 void trace_buffer_unlock_commit_regs(struct trace_array *tr,
2110 				     struct ring_buffer *buffer,
2111 				     struct ring_buffer_event *event,
2112 				     unsigned long flags, int pc,
2113 				     struct pt_regs *regs)
2114 {
2115 	__buffer_unlock_commit(buffer, event);
2116 
2117 	/*
2118 	 * If regs is not set, then skip the following callers:
2119 	 *   trace_buffer_unlock_commit_regs
2120 	 *   event_trigger_unlock_commit
2121 	 *   trace_event_buffer_commit
2122 	 *   trace_event_raw_event_sched_switch
2123 	 * Note, we can still get here via blktrace, wakeup tracer
2124 	 * and mmiotrace, but that's ok if they lose a function or
2125 	 * two. They are that meaningful.
2126 	 */
2127 	ftrace_trace_stack(tr, buffer, flags, regs ? 0 : 4, pc, regs);
2128 	ftrace_trace_userstack(buffer, flags, pc);
2129 }
2130 
2131 void
2132 trace_function(struct trace_array *tr,
2133 	       unsigned long ip, unsigned long parent_ip, unsigned long flags,
2134 	       int pc)
2135 {
2136 	struct trace_event_call *call = &event_function;
2137 	struct ring_buffer *buffer = tr->trace_buffer.buffer;
2138 	struct ring_buffer_event *event;
2139 	struct ftrace_entry *entry;
2140 
2141 	event = trace_buffer_lock_reserve(buffer, TRACE_FN, sizeof(*entry),
2142 					  flags, pc);
2143 	if (!event)
2144 		return;
2145 	entry	= ring_buffer_event_data(event);
2146 	entry->ip			= ip;
2147 	entry->parent_ip		= parent_ip;
2148 
2149 	if (!call_filter_check_discard(call, entry, buffer, event))
2150 		__buffer_unlock_commit(buffer, event);
2151 }
2152 
2153 #ifdef CONFIG_STACKTRACE
2154 
2155 #define FTRACE_STACK_MAX_ENTRIES (PAGE_SIZE / sizeof(unsigned long))
2156 struct ftrace_stack {
2157 	unsigned long		calls[FTRACE_STACK_MAX_ENTRIES];
2158 };
2159 
2160 static DEFINE_PER_CPU(struct ftrace_stack, ftrace_stack);
2161 static DEFINE_PER_CPU(int, ftrace_stack_reserve);
2162 
2163 static void __ftrace_trace_stack(struct ring_buffer *buffer,
2164 				 unsigned long flags,
2165 				 int skip, int pc, struct pt_regs *regs)
2166 {
2167 	struct trace_event_call *call = &event_kernel_stack;
2168 	struct ring_buffer_event *event;
2169 	struct stack_entry *entry;
2170 	struct stack_trace trace;
2171 	int use_stack;
2172 	int size = FTRACE_STACK_ENTRIES;
2173 
2174 	trace.nr_entries	= 0;
2175 	trace.skip		= skip;
2176 
2177 	/*
2178 	 * Add two, for this function and the call to save_stack_trace()
2179 	 * If regs is set, then these functions will not be in the way.
2180 	 */
2181 	if (!regs)
2182 		trace.skip += 2;
2183 
2184 	/*
2185 	 * Since events can happen in NMIs there's no safe way to
2186 	 * use the per cpu ftrace_stacks. We reserve it and if an interrupt
2187 	 * or NMI comes in, it will just have to use the default
2188 	 * FTRACE_STACK_SIZE.
2189 	 */
2190 	preempt_disable_notrace();
2191 
2192 	use_stack = __this_cpu_inc_return(ftrace_stack_reserve);
2193 	/*
2194 	 * We don't need any atomic variables, just a barrier.
2195 	 * If an interrupt comes in, we don't care, because it would
2196 	 * have exited and put the counter back to what we want.
2197 	 * We just need a barrier to keep gcc from moving things
2198 	 * around.
2199 	 */
2200 	barrier();
2201 	if (use_stack == 1) {
2202 		trace.entries		= this_cpu_ptr(ftrace_stack.calls);
2203 		trace.max_entries	= FTRACE_STACK_MAX_ENTRIES;
2204 
2205 		if (regs)
2206 			save_stack_trace_regs(regs, &trace);
2207 		else
2208 			save_stack_trace(&trace);
2209 
2210 		if (trace.nr_entries > size)
2211 			size = trace.nr_entries;
2212 	} else
2213 		/* From now on, use_stack is a boolean */
2214 		use_stack = 0;
2215 
2216 	size *= sizeof(unsigned long);
2217 
2218 	event = trace_buffer_lock_reserve(buffer, TRACE_STACK,
2219 					  sizeof(*entry) + size, flags, pc);
2220 	if (!event)
2221 		goto out;
2222 	entry = ring_buffer_event_data(event);
2223 
2224 	memset(&entry->caller, 0, size);
2225 
2226 	if (use_stack)
2227 		memcpy(&entry->caller, trace.entries,
2228 		       trace.nr_entries * sizeof(unsigned long));
2229 	else {
2230 		trace.max_entries	= FTRACE_STACK_ENTRIES;
2231 		trace.entries		= entry->caller;
2232 		if (regs)
2233 			save_stack_trace_regs(regs, &trace);
2234 		else
2235 			save_stack_trace(&trace);
2236 	}
2237 
2238 	entry->size = trace.nr_entries;
2239 
2240 	if (!call_filter_check_discard(call, entry, buffer, event))
2241 		__buffer_unlock_commit(buffer, event);
2242 
2243  out:
2244 	/* Again, don't let gcc optimize things here */
2245 	barrier();
2246 	__this_cpu_dec(ftrace_stack_reserve);
2247 	preempt_enable_notrace();
2248 
2249 }
2250 
2251 static inline void ftrace_trace_stack(struct trace_array *tr,
2252 				      struct ring_buffer *buffer,
2253 				      unsigned long flags,
2254 				      int skip, int pc, struct pt_regs *regs)
2255 {
2256 	if (!(tr->trace_flags & TRACE_ITER_STACKTRACE))
2257 		return;
2258 
2259 	__ftrace_trace_stack(buffer, flags, skip, pc, regs);
2260 }
2261 
2262 void __trace_stack(struct trace_array *tr, unsigned long flags, int skip,
2263 		   int pc)
2264 {
2265 	__ftrace_trace_stack(tr->trace_buffer.buffer, flags, skip, pc, NULL);
2266 }
2267 
2268 /**
2269  * trace_dump_stack - record a stack back trace in the trace buffer
2270  * @skip: Number of functions to skip (helper handlers)
2271  */
2272 void trace_dump_stack(int skip)
2273 {
2274 	unsigned long flags;
2275 
2276 	if (tracing_disabled || tracing_selftest_running)
2277 		return;
2278 
2279 	local_save_flags(flags);
2280 
2281 	/*
2282 	 * Skip 3 more, seems to get us at the caller of
2283 	 * this function.
2284 	 */
2285 	skip += 3;
2286 	__ftrace_trace_stack(global_trace.trace_buffer.buffer,
2287 			     flags, skip, preempt_count(), NULL);
2288 }
2289 
2290 static DEFINE_PER_CPU(int, user_stack_count);
2291 
2292 void
2293 ftrace_trace_userstack(struct ring_buffer *buffer, unsigned long flags, int pc)
2294 {
2295 	struct trace_event_call *call = &event_user_stack;
2296 	struct ring_buffer_event *event;
2297 	struct userstack_entry *entry;
2298 	struct stack_trace trace;
2299 
2300 	if (!(global_trace.trace_flags & TRACE_ITER_USERSTACKTRACE))
2301 		return;
2302 
2303 	/*
2304 	 * NMIs can not handle page faults, even with fix ups.
2305 	 * The save user stack can (and often does) fault.
2306 	 */
2307 	if (unlikely(in_nmi()))
2308 		return;
2309 
2310 	/*
2311 	 * prevent recursion, since the user stack tracing may
2312 	 * trigger other kernel events.
2313 	 */
2314 	preempt_disable();
2315 	if (__this_cpu_read(user_stack_count))
2316 		goto out;
2317 
2318 	__this_cpu_inc(user_stack_count);
2319 
2320 	event = trace_buffer_lock_reserve(buffer, TRACE_USER_STACK,
2321 					  sizeof(*entry), flags, pc);
2322 	if (!event)
2323 		goto out_drop_count;
2324 	entry	= ring_buffer_event_data(event);
2325 
2326 	entry->tgid		= current->tgid;
2327 	memset(&entry->caller, 0, sizeof(entry->caller));
2328 
2329 	trace.nr_entries	= 0;
2330 	trace.max_entries	= FTRACE_STACK_ENTRIES;
2331 	trace.skip		= 0;
2332 	trace.entries		= entry->caller;
2333 
2334 	save_stack_trace_user(&trace);
2335 	if (!call_filter_check_discard(call, entry, buffer, event))
2336 		__buffer_unlock_commit(buffer, event);
2337 
2338  out_drop_count:
2339 	__this_cpu_dec(user_stack_count);
2340  out:
2341 	preempt_enable();
2342 }
2343 
2344 #ifdef UNUSED
2345 static void __trace_userstack(struct trace_array *tr, unsigned long flags)
2346 {
2347 	ftrace_trace_userstack(tr, flags, preempt_count());
2348 }
2349 #endif /* UNUSED */
2350 
2351 #endif /* CONFIG_STACKTRACE */
2352 
2353 /* created for use with alloc_percpu */
2354 struct trace_buffer_struct {
2355 	int nesting;
2356 	char buffer[4][TRACE_BUF_SIZE];
2357 };
2358 
2359 static struct trace_buffer_struct *trace_percpu_buffer;
2360 
2361 /*
2362  * Thise allows for lockless recording.  If we're nested too deeply, then
2363  * this returns NULL.
2364  */
2365 static char *get_trace_buf(void)
2366 {
2367 	struct trace_buffer_struct *buffer = this_cpu_ptr(trace_percpu_buffer);
2368 
2369 	if (!buffer || buffer->nesting >= 4)
2370 		return NULL;
2371 
2372 	return &buffer->buffer[buffer->nesting++][0];
2373 }
2374 
2375 static void put_trace_buf(void)
2376 {
2377 	this_cpu_dec(trace_percpu_buffer->nesting);
2378 }
2379 
2380 static int alloc_percpu_trace_buffer(void)
2381 {
2382 	struct trace_buffer_struct *buffers;
2383 
2384 	buffers = alloc_percpu(struct trace_buffer_struct);
2385 	if (WARN(!buffers, "Could not allocate percpu trace_printk buffer"))
2386 		return -ENOMEM;
2387 
2388 	trace_percpu_buffer = buffers;
2389 	return 0;
2390 }
2391 
2392 static int buffers_allocated;
2393 
2394 void trace_printk_init_buffers(void)
2395 {
2396 	if (buffers_allocated)
2397 		return;
2398 
2399 	if (alloc_percpu_trace_buffer())
2400 		return;
2401 
2402 	/* trace_printk() is for debug use only. Don't use it in production. */
2403 
2404 	pr_warn("\n");
2405 	pr_warn("**********************************************************\n");
2406 	pr_warn("**   NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE   **\n");
2407 	pr_warn("**                                                      **\n");
2408 	pr_warn("** trace_printk() being used. Allocating extra memory.  **\n");
2409 	pr_warn("**                                                      **\n");
2410 	pr_warn("** This means that this is a DEBUG kernel and it is     **\n");
2411 	pr_warn("** unsafe for production use.                           **\n");
2412 	pr_warn("**                                                      **\n");
2413 	pr_warn("** If you see this message and you are not debugging    **\n");
2414 	pr_warn("** the kernel, report this immediately to your vendor!  **\n");
2415 	pr_warn("**                                                      **\n");
2416 	pr_warn("**   NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE   **\n");
2417 	pr_warn("**********************************************************\n");
2418 
2419 	/* Expand the buffers to set size */
2420 	tracing_update_buffers();
2421 
2422 	buffers_allocated = 1;
2423 
2424 	/*
2425 	 * trace_printk_init_buffers() can be called by modules.
2426 	 * If that happens, then we need to start cmdline recording
2427 	 * directly here. If the global_trace.buffer is already
2428 	 * allocated here, then this was called by module code.
2429 	 */
2430 	if (global_trace.trace_buffer.buffer)
2431 		tracing_start_cmdline_record();
2432 }
2433 
2434 void trace_printk_start_comm(void)
2435 {
2436 	/* Start tracing comms if trace printk is set */
2437 	if (!buffers_allocated)
2438 		return;
2439 	tracing_start_cmdline_record();
2440 }
2441 
2442 static void trace_printk_start_stop_comm(int enabled)
2443 {
2444 	if (!buffers_allocated)
2445 		return;
2446 
2447 	if (enabled)
2448 		tracing_start_cmdline_record();
2449 	else
2450 		tracing_stop_cmdline_record();
2451 }
2452 
2453 /**
2454  * trace_vbprintk - write binary msg to tracing buffer
2455  *
2456  */
2457 int trace_vbprintk(unsigned long ip, const char *fmt, va_list args)
2458 {
2459 	struct trace_event_call *call = &event_bprint;
2460 	struct ring_buffer_event *event;
2461 	struct ring_buffer *buffer;
2462 	struct trace_array *tr = &global_trace;
2463 	struct bprint_entry *entry;
2464 	unsigned long flags;
2465 	char *tbuffer;
2466 	int len = 0, size, pc;
2467 
2468 	if (unlikely(tracing_selftest_running || tracing_disabled))
2469 		return 0;
2470 
2471 	/* Don't pollute graph traces with trace_vprintk internals */
2472 	pause_graph_tracing();
2473 
2474 	pc = preempt_count();
2475 	preempt_disable_notrace();
2476 
2477 	tbuffer = get_trace_buf();
2478 	if (!tbuffer) {
2479 		len = 0;
2480 		goto out_nobuffer;
2481 	}
2482 
2483 	len = vbin_printf((u32 *)tbuffer, TRACE_BUF_SIZE/sizeof(int), fmt, args);
2484 
2485 	if (len > TRACE_BUF_SIZE/sizeof(int) || len < 0)
2486 		goto out;
2487 
2488 	local_save_flags(flags);
2489 	size = sizeof(*entry) + sizeof(u32) * len;
2490 	buffer = tr->trace_buffer.buffer;
2491 	event = trace_buffer_lock_reserve(buffer, TRACE_BPRINT, size,
2492 					  flags, pc);
2493 	if (!event)
2494 		goto out;
2495 	entry = ring_buffer_event_data(event);
2496 	entry->ip			= ip;
2497 	entry->fmt			= fmt;
2498 
2499 	memcpy(entry->buf, tbuffer, sizeof(u32) * len);
2500 	if (!call_filter_check_discard(call, entry, buffer, event)) {
2501 		__buffer_unlock_commit(buffer, event);
2502 		ftrace_trace_stack(tr, buffer, flags, 6, pc, NULL);
2503 	}
2504 
2505 out:
2506 	put_trace_buf();
2507 
2508 out_nobuffer:
2509 	preempt_enable_notrace();
2510 	unpause_graph_tracing();
2511 
2512 	return len;
2513 }
2514 EXPORT_SYMBOL_GPL(trace_vbprintk);
2515 
2516 static int
2517 __trace_array_vprintk(struct ring_buffer *buffer,
2518 		      unsigned long ip, const char *fmt, va_list args)
2519 {
2520 	struct trace_event_call *call = &event_print;
2521 	struct ring_buffer_event *event;
2522 	int len = 0, size, pc;
2523 	struct print_entry *entry;
2524 	unsigned long flags;
2525 	char *tbuffer;
2526 
2527 	if (tracing_disabled || tracing_selftest_running)
2528 		return 0;
2529 
2530 	/* Don't pollute graph traces with trace_vprintk internals */
2531 	pause_graph_tracing();
2532 
2533 	pc = preempt_count();
2534 	preempt_disable_notrace();
2535 
2536 
2537 	tbuffer = get_trace_buf();
2538 	if (!tbuffer) {
2539 		len = 0;
2540 		goto out_nobuffer;
2541 	}
2542 
2543 	len = vscnprintf(tbuffer, TRACE_BUF_SIZE, fmt, args);
2544 
2545 	local_save_flags(flags);
2546 	size = sizeof(*entry) + len + 1;
2547 	event = trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
2548 					  flags, pc);
2549 	if (!event)
2550 		goto out;
2551 	entry = ring_buffer_event_data(event);
2552 	entry->ip = ip;
2553 
2554 	memcpy(&entry->buf, tbuffer, len + 1);
2555 	if (!call_filter_check_discard(call, entry, buffer, event)) {
2556 		__buffer_unlock_commit(buffer, event);
2557 		ftrace_trace_stack(&global_trace, buffer, flags, 6, pc, NULL);
2558 	}
2559 
2560 out:
2561 	put_trace_buf();
2562 
2563 out_nobuffer:
2564 	preempt_enable_notrace();
2565 	unpause_graph_tracing();
2566 
2567 	return len;
2568 }
2569 
2570 int trace_array_vprintk(struct trace_array *tr,
2571 			unsigned long ip, const char *fmt, va_list args)
2572 {
2573 	return __trace_array_vprintk(tr->trace_buffer.buffer, ip, fmt, args);
2574 }
2575 
2576 int trace_array_printk(struct trace_array *tr,
2577 		       unsigned long ip, const char *fmt, ...)
2578 {
2579 	int ret;
2580 	va_list ap;
2581 
2582 	if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
2583 		return 0;
2584 
2585 	va_start(ap, fmt);
2586 	ret = trace_array_vprintk(tr, ip, fmt, ap);
2587 	va_end(ap);
2588 	return ret;
2589 }
2590 
2591 int trace_array_printk_buf(struct ring_buffer *buffer,
2592 			   unsigned long ip, const char *fmt, ...)
2593 {
2594 	int ret;
2595 	va_list ap;
2596 
2597 	if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
2598 		return 0;
2599 
2600 	va_start(ap, fmt);
2601 	ret = __trace_array_vprintk(buffer, ip, fmt, ap);
2602 	va_end(ap);
2603 	return ret;
2604 }
2605 
2606 int trace_vprintk(unsigned long ip, const char *fmt, va_list args)
2607 {
2608 	return trace_array_vprintk(&global_trace, ip, fmt, args);
2609 }
2610 EXPORT_SYMBOL_GPL(trace_vprintk);
2611 
2612 static void trace_iterator_increment(struct trace_iterator *iter)
2613 {
2614 	struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, iter->cpu);
2615 
2616 	iter->idx++;
2617 	if (buf_iter)
2618 		ring_buffer_read(buf_iter, NULL);
2619 }
2620 
2621 static struct trace_entry *
2622 peek_next_entry(struct trace_iterator *iter, int cpu, u64 *ts,
2623 		unsigned long *lost_events)
2624 {
2625 	struct ring_buffer_event *event;
2626 	struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, cpu);
2627 
2628 	if (buf_iter)
2629 		event = ring_buffer_iter_peek(buf_iter, ts);
2630 	else
2631 		event = ring_buffer_peek(iter->trace_buffer->buffer, cpu, ts,
2632 					 lost_events);
2633 
2634 	if (event) {
2635 		iter->ent_size = ring_buffer_event_length(event);
2636 		return ring_buffer_event_data(event);
2637 	}
2638 	iter->ent_size = 0;
2639 	return NULL;
2640 }
2641 
2642 static struct trace_entry *
2643 __find_next_entry(struct trace_iterator *iter, int *ent_cpu,
2644 		  unsigned long *missing_events, u64 *ent_ts)
2645 {
2646 	struct ring_buffer *buffer = iter->trace_buffer->buffer;
2647 	struct trace_entry *ent, *next = NULL;
2648 	unsigned long lost_events = 0, next_lost = 0;
2649 	int cpu_file = iter->cpu_file;
2650 	u64 next_ts = 0, ts;
2651 	int next_cpu = -1;
2652 	int next_size = 0;
2653 	int cpu;
2654 
2655 	/*
2656 	 * If we are in a per_cpu trace file, don't bother by iterating over
2657 	 * all cpu and peek directly.
2658 	 */
2659 	if (cpu_file > RING_BUFFER_ALL_CPUS) {
2660 		if (ring_buffer_empty_cpu(buffer, cpu_file))
2661 			return NULL;
2662 		ent = peek_next_entry(iter, cpu_file, ent_ts, missing_events);
2663 		if (ent_cpu)
2664 			*ent_cpu = cpu_file;
2665 
2666 		return ent;
2667 	}
2668 
2669 	for_each_tracing_cpu(cpu) {
2670 
2671 		if (ring_buffer_empty_cpu(buffer, cpu))
2672 			continue;
2673 
2674 		ent = peek_next_entry(iter, cpu, &ts, &lost_events);
2675 
2676 		/*
2677 		 * Pick the entry with the smallest timestamp:
2678 		 */
2679 		if (ent && (!next || ts < next_ts)) {
2680 			next = ent;
2681 			next_cpu = cpu;
2682 			next_ts = ts;
2683 			next_lost = lost_events;
2684 			next_size = iter->ent_size;
2685 		}
2686 	}
2687 
2688 	iter->ent_size = next_size;
2689 
2690 	if (ent_cpu)
2691 		*ent_cpu = next_cpu;
2692 
2693 	if (ent_ts)
2694 		*ent_ts = next_ts;
2695 
2696 	if (missing_events)
2697 		*missing_events = next_lost;
2698 
2699 	return next;
2700 }
2701 
2702 /* Find the next real entry, without updating the iterator itself */
2703 struct trace_entry *trace_find_next_entry(struct trace_iterator *iter,
2704 					  int *ent_cpu, u64 *ent_ts)
2705 {
2706 	return __find_next_entry(iter, ent_cpu, NULL, ent_ts);
2707 }
2708 
2709 /* Find the next real entry, and increment the iterator to the next entry */
2710 void *trace_find_next_entry_inc(struct trace_iterator *iter)
2711 {
2712 	iter->ent = __find_next_entry(iter, &iter->cpu,
2713 				      &iter->lost_events, &iter->ts);
2714 
2715 	if (iter->ent)
2716 		trace_iterator_increment(iter);
2717 
2718 	return iter->ent ? iter : NULL;
2719 }
2720 
2721 static void trace_consume(struct trace_iterator *iter)
2722 {
2723 	ring_buffer_consume(iter->trace_buffer->buffer, iter->cpu, &iter->ts,
2724 			    &iter->lost_events);
2725 }
2726 
2727 static void *s_next(struct seq_file *m, void *v, loff_t *pos)
2728 {
2729 	struct trace_iterator *iter = m->private;
2730 	int i = (int)*pos;
2731 	void *ent;
2732 
2733 	WARN_ON_ONCE(iter->leftover);
2734 
2735 	(*pos)++;
2736 
2737 	/* can't go backwards */
2738 	if (iter->idx > i)
2739 		return NULL;
2740 
2741 	if (iter->idx < 0)
2742 		ent = trace_find_next_entry_inc(iter);
2743 	else
2744 		ent = iter;
2745 
2746 	while (ent && iter->idx < i)
2747 		ent = trace_find_next_entry_inc(iter);
2748 
2749 	iter->pos = *pos;
2750 
2751 	return ent;
2752 }
2753 
2754 void tracing_iter_reset(struct trace_iterator *iter, int cpu)
2755 {
2756 	struct ring_buffer_event *event;
2757 	struct ring_buffer_iter *buf_iter;
2758 	unsigned long entries = 0;
2759 	u64 ts;
2760 
2761 	per_cpu_ptr(iter->trace_buffer->data, cpu)->skipped_entries = 0;
2762 
2763 	buf_iter = trace_buffer_iter(iter, cpu);
2764 	if (!buf_iter)
2765 		return;
2766 
2767 	ring_buffer_iter_reset(buf_iter);
2768 
2769 	/*
2770 	 * We could have the case with the max latency tracers
2771 	 * that a reset never took place on a cpu. This is evident
2772 	 * by the timestamp being before the start of the buffer.
2773 	 */
2774 	while ((event = ring_buffer_iter_peek(buf_iter, &ts))) {
2775 		if (ts >= iter->trace_buffer->time_start)
2776 			break;
2777 		entries++;
2778 		ring_buffer_read(buf_iter, NULL);
2779 	}
2780 
2781 	per_cpu_ptr(iter->trace_buffer->data, cpu)->skipped_entries = entries;
2782 }
2783 
2784 /*
2785  * The current tracer is copied to avoid a global locking
2786  * all around.
2787  */
2788 static void *s_start(struct seq_file *m, loff_t *pos)
2789 {
2790 	struct trace_iterator *iter = m->private;
2791 	struct trace_array *tr = iter->tr;
2792 	int cpu_file = iter->cpu_file;
2793 	void *p = NULL;
2794 	loff_t l = 0;
2795 	int cpu;
2796 
2797 	/*
2798 	 * copy the tracer to avoid using a global lock all around.
2799 	 * iter->trace is a copy of current_trace, the pointer to the
2800 	 * name may be used instead of a strcmp(), as iter->trace->name
2801 	 * will point to the same string as current_trace->name.
2802 	 */
2803 	mutex_lock(&trace_types_lock);
2804 	if (unlikely(tr->current_trace && iter->trace->name != tr->current_trace->name))
2805 		*iter->trace = *tr->current_trace;
2806 	mutex_unlock(&trace_types_lock);
2807 
2808 #ifdef CONFIG_TRACER_MAX_TRACE
2809 	if (iter->snapshot && iter->trace->use_max_tr)
2810 		return ERR_PTR(-EBUSY);
2811 #endif
2812 
2813 	if (!iter->snapshot)
2814 		atomic_inc(&trace_record_cmdline_disabled);
2815 
2816 	if (*pos != iter->pos) {
2817 		iter->ent = NULL;
2818 		iter->cpu = 0;
2819 		iter->idx = -1;
2820 
2821 		if (cpu_file == RING_BUFFER_ALL_CPUS) {
2822 			for_each_tracing_cpu(cpu)
2823 				tracing_iter_reset(iter, cpu);
2824 		} else
2825 			tracing_iter_reset(iter, cpu_file);
2826 
2827 		iter->leftover = 0;
2828 		for (p = iter; p && l < *pos; p = s_next(m, p, &l))
2829 			;
2830 
2831 	} else {
2832 		/*
2833 		 * If we overflowed the seq_file before, then we want
2834 		 * to just reuse the trace_seq buffer again.
2835 		 */
2836 		if (iter->leftover)
2837 			p = iter;
2838 		else {
2839 			l = *pos - 1;
2840 			p = s_next(m, p, &l);
2841 		}
2842 	}
2843 
2844 	trace_event_read_lock();
2845 	trace_access_lock(cpu_file);
2846 	return p;
2847 }
2848 
2849 static void s_stop(struct seq_file *m, void *p)
2850 {
2851 	struct trace_iterator *iter = m->private;
2852 
2853 #ifdef CONFIG_TRACER_MAX_TRACE
2854 	if (iter->snapshot && iter->trace->use_max_tr)
2855 		return;
2856 #endif
2857 
2858 	if (!iter->snapshot)
2859 		atomic_dec(&trace_record_cmdline_disabled);
2860 
2861 	trace_access_unlock(iter->cpu_file);
2862 	trace_event_read_unlock();
2863 }
2864 
2865 static void
2866 get_total_entries(struct trace_buffer *buf,
2867 		  unsigned long *total, unsigned long *entries)
2868 {
2869 	unsigned long count;
2870 	int cpu;
2871 
2872 	*total = 0;
2873 	*entries = 0;
2874 
2875 	for_each_tracing_cpu(cpu) {
2876 		count = ring_buffer_entries_cpu(buf->buffer, cpu);
2877 		/*
2878 		 * If this buffer has skipped entries, then we hold all
2879 		 * entries for the trace and we need to ignore the
2880 		 * ones before the time stamp.
2881 		 */
2882 		if (per_cpu_ptr(buf->data, cpu)->skipped_entries) {
2883 			count -= per_cpu_ptr(buf->data, cpu)->skipped_entries;
2884 			/* total is the same as the entries */
2885 			*total += count;
2886 		} else
2887 			*total += count +
2888 				ring_buffer_overrun_cpu(buf->buffer, cpu);
2889 		*entries += count;
2890 	}
2891 }
2892 
2893 static void print_lat_help_header(struct seq_file *m)
2894 {
2895 	seq_puts(m, "#                  _------=> CPU#            \n"
2896 		    "#                 / _-----=> irqs-off        \n"
2897 		    "#                | / _----=> need-resched    \n"
2898 		    "#                || / _---=> hardirq/softirq \n"
2899 		    "#                ||| / _--=> preempt-depth   \n"
2900 		    "#                |||| /     delay            \n"
2901 		    "#  cmd     pid   ||||| time  |   caller      \n"
2902 		    "#     \\   /      |||||  \\    |   /         \n");
2903 }
2904 
2905 static void print_event_info(struct trace_buffer *buf, struct seq_file *m)
2906 {
2907 	unsigned long total;
2908 	unsigned long entries;
2909 
2910 	get_total_entries(buf, &total, &entries);
2911 	seq_printf(m, "# entries-in-buffer/entries-written: %lu/%lu   #P:%d\n",
2912 		   entries, total, num_online_cpus());
2913 	seq_puts(m, "#\n");
2914 }
2915 
2916 static void print_func_help_header(struct trace_buffer *buf, struct seq_file *m)
2917 {
2918 	print_event_info(buf, m);
2919 	seq_puts(m, "#           TASK-PID   CPU#      TIMESTAMP  FUNCTION\n"
2920 		    "#              | |       |          |         |\n");
2921 }
2922 
2923 static void print_func_help_header_irq(struct trace_buffer *buf, struct seq_file *m)
2924 {
2925 	print_event_info(buf, m);
2926 	seq_puts(m, "#                              _-----=> irqs-off\n"
2927 		    "#                             / _----=> need-resched\n"
2928 		    "#                            | / _---=> hardirq/softirq\n"
2929 		    "#                            || / _--=> preempt-depth\n"
2930 		    "#                            ||| /     delay\n"
2931 		    "#           TASK-PID   CPU#  ||||    TIMESTAMP  FUNCTION\n"
2932 		    "#              | |       |   ||||       |         |\n");
2933 }
2934 
2935 void
2936 print_trace_header(struct seq_file *m, struct trace_iterator *iter)
2937 {
2938 	unsigned long sym_flags = (global_trace.trace_flags & TRACE_ITER_SYM_MASK);
2939 	struct trace_buffer *buf = iter->trace_buffer;
2940 	struct trace_array_cpu *data = per_cpu_ptr(buf->data, buf->cpu);
2941 	struct tracer *type = iter->trace;
2942 	unsigned long entries;
2943 	unsigned long total;
2944 	const char *name = "preemption";
2945 
2946 	name = type->name;
2947 
2948 	get_total_entries(buf, &total, &entries);
2949 
2950 	seq_printf(m, "# %s latency trace v1.1.5 on %s\n",
2951 		   name, UTS_RELEASE);
2952 	seq_puts(m, "# -----------------------------------"
2953 		 "---------------------------------\n");
2954 	seq_printf(m, "# latency: %lu us, #%lu/%lu, CPU#%d |"
2955 		   " (M:%s VP:%d, KP:%d, SP:%d HP:%d",
2956 		   nsecs_to_usecs(data->saved_latency),
2957 		   entries,
2958 		   total,
2959 		   buf->cpu,
2960 #if defined(CONFIG_PREEMPT_NONE)
2961 		   "server",
2962 #elif defined(CONFIG_PREEMPT_VOLUNTARY)
2963 		   "desktop",
2964 #elif defined(CONFIG_PREEMPT)
2965 		   "preempt",
2966 #else
2967 		   "unknown",
2968 #endif
2969 		   /* These are reserved for later use */
2970 		   0, 0, 0, 0);
2971 #ifdef CONFIG_SMP
2972 	seq_printf(m, " #P:%d)\n", num_online_cpus());
2973 #else
2974 	seq_puts(m, ")\n");
2975 #endif
2976 	seq_puts(m, "#    -----------------\n");
2977 	seq_printf(m, "#    | task: %.16s-%d "
2978 		   "(uid:%d nice:%ld policy:%ld rt_prio:%ld)\n",
2979 		   data->comm, data->pid,
2980 		   from_kuid_munged(seq_user_ns(m), data->uid), data->nice,
2981 		   data->policy, data->rt_priority);
2982 	seq_puts(m, "#    -----------------\n");
2983 
2984 	if (data->critical_start) {
2985 		seq_puts(m, "#  => started at: ");
2986 		seq_print_ip_sym(&iter->seq, data->critical_start, sym_flags);
2987 		trace_print_seq(m, &iter->seq);
2988 		seq_puts(m, "\n#  => ended at:   ");
2989 		seq_print_ip_sym(&iter->seq, data->critical_end, sym_flags);
2990 		trace_print_seq(m, &iter->seq);
2991 		seq_puts(m, "\n#\n");
2992 	}
2993 
2994 	seq_puts(m, "#\n");
2995 }
2996 
2997 static void test_cpu_buff_start(struct trace_iterator *iter)
2998 {
2999 	struct trace_seq *s = &iter->seq;
3000 	struct trace_array *tr = iter->tr;
3001 
3002 	if (!(tr->trace_flags & TRACE_ITER_ANNOTATE))
3003 		return;
3004 
3005 	if (!(iter->iter_flags & TRACE_FILE_ANNOTATE))
3006 		return;
3007 
3008 	if (iter->started && cpumask_test_cpu(iter->cpu, iter->started))
3009 		return;
3010 
3011 	if (per_cpu_ptr(iter->trace_buffer->data, iter->cpu)->skipped_entries)
3012 		return;
3013 
3014 	if (iter->started)
3015 		cpumask_set_cpu(iter->cpu, iter->started);
3016 
3017 	/* Don't print started cpu buffer for the first entry of the trace */
3018 	if (iter->idx > 1)
3019 		trace_seq_printf(s, "##### CPU %u buffer started ####\n",
3020 				iter->cpu);
3021 }
3022 
3023 static enum print_line_t print_trace_fmt(struct trace_iterator *iter)
3024 {
3025 	struct trace_array *tr = iter->tr;
3026 	struct trace_seq *s = &iter->seq;
3027 	unsigned long sym_flags = (tr->trace_flags & TRACE_ITER_SYM_MASK);
3028 	struct trace_entry *entry;
3029 	struct trace_event *event;
3030 
3031 	entry = iter->ent;
3032 
3033 	test_cpu_buff_start(iter);
3034 
3035 	event = ftrace_find_event(entry->type);
3036 
3037 	if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
3038 		if (iter->iter_flags & TRACE_FILE_LAT_FMT)
3039 			trace_print_lat_context(iter);
3040 		else
3041 			trace_print_context(iter);
3042 	}
3043 
3044 	if (trace_seq_has_overflowed(s))
3045 		return TRACE_TYPE_PARTIAL_LINE;
3046 
3047 	if (event)
3048 		return event->funcs->trace(iter, sym_flags, event);
3049 
3050 	trace_seq_printf(s, "Unknown type %d\n", entry->type);
3051 
3052 	return trace_handle_return(s);
3053 }
3054 
3055 static enum print_line_t print_raw_fmt(struct trace_iterator *iter)
3056 {
3057 	struct trace_array *tr = iter->tr;
3058 	struct trace_seq *s = &iter->seq;
3059 	struct trace_entry *entry;
3060 	struct trace_event *event;
3061 
3062 	entry = iter->ent;
3063 
3064 	if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO)
3065 		trace_seq_printf(s, "%d %d %llu ",
3066 				 entry->pid, iter->cpu, iter->ts);
3067 
3068 	if (trace_seq_has_overflowed(s))
3069 		return TRACE_TYPE_PARTIAL_LINE;
3070 
3071 	event = ftrace_find_event(entry->type);
3072 	if (event)
3073 		return event->funcs->raw(iter, 0, event);
3074 
3075 	trace_seq_printf(s, "%d ?\n", entry->type);
3076 
3077 	return trace_handle_return(s);
3078 }
3079 
3080 static enum print_line_t print_hex_fmt(struct trace_iterator *iter)
3081 {
3082 	struct trace_array *tr = iter->tr;
3083 	struct trace_seq *s = &iter->seq;
3084 	unsigned char newline = '\n';
3085 	struct trace_entry *entry;
3086 	struct trace_event *event;
3087 
3088 	entry = iter->ent;
3089 
3090 	if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
3091 		SEQ_PUT_HEX_FIELD(s, entry->pid);
3092 		SEQ_PUT_HEX_FIELD(s, iter->cpu);
3093 		SEQ_PUT_HEX_FIELD(s, iter->ts);
3094 		if (trace_seq_has_overflowed(s))
3095 			return TRACE_TYPE_PARTIAL_LINE;
3096 	}
3097 
3098 	event = ftrace_find_event(entry->type);
3099 	if (event) {
3100 		enum print_line_t ret = event->funcs->hex(iter, 0, event);
3101 		if (ret != TRACE_TYPE_HANDLED)
3102 			return ret;
3103 	}
3104 
3105 	SEQ_PUT_FIELD(s, newline);
3106 
3107 	return trace_handle_return(s);
3108 }
3109 
3110 static enum print_line_t print_bin_fmt(struct trace_iterator *iter)
3111 {
3112 	struct trace_array *tr = iter->tr;
3113 	struct trace_seq *s = &iter->seq;
3114 	struct trace_entry *entry;
3115 	struct trace_event *event;
3116 
3117 	entry = iter->ent;
3118 
3119 	if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
3120 		SEQ_PUT_FIELD(s, entry->pid);
3121 		SEQ_PUT_FIELD(s, iter->cpu);
3122 		SEQ_PUT_FIELD(s, iter->ts);
3123 		if (trace_seq_has_overflowed(s))
3124 			return TRACE_TYPE_PARTIAL_LINE;
3125 	}
3126 
3127 	event = ftrace_find_event(entry->type);
3128 	return event ? event->funcs->binary(iter, 0, event) :
3129 		TRACE_TYPE_HANDLED;
3130 }
3131 
3132 int trace_empty(struct trace_iterator *iter)
3133 {
3134 	struct ring_buffer_iter *buf_iter;
3135 	int cpu;
3136 
3137 	/* If we are looking at one CPU buffer, only check that one */
3138 	if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
3139 		cpu = iter->cpu_file;
3140 		buf_iter = trace_buffer_iter(iter, cpu);
3141 		if (buf_iter) {
3142 			if (!ring_buffer_iter_empty(buf_iter))
3143 				return 0;
3144 		} else {
3145 			if (!ring_buffer_empty_cpu(iter->trace_buffer->buffer, cpu))
3146 				return 0;
3147 		}
3148 		return 1;
3149 	}
3150 
3151 	for_each_tracing_cpu(cpu) {
3152 		buf_iter = trace_buffer_iter(iter, cpu);
3153 		if (buf_iter) {
3154 			if (!ring_buffer_iter_empty(buf_iter))
3155 				return 0;
3156 		} else {
3157 			if (!ring_buffer_empty_cpu(iter->trace_buffer->buffer, cpu))
3158 				return 0;
3159 		}
3160 	}
3161 
3162 	return 1;
3163 }
3164 
3165 /*  Called with trace_event_read_lock() held. */
3166 enum print_line_t print_trace_line(struct trace_iterator *iter)
3167 {
3168 	struct trace_array *tr = iter->tr;
3169 	unsigned long trace_flags = tr->trace_flags;
3170 	enum print_line_t ret;
3171 
3172 	if (iter->lost_events) {
3173 		trace_seq_printf(&iter->seq, "CPU:%d [LOST %lu EVENTS]\n",
3174 				 iter->cpu, iter->lost_events);
3175 		if (trace_seq_has_overflowed(&iter->seq))
3176 			return TRACE_TYPE_PARTIAL_LINE;
3177 	}
3178 
3179 	if (iter->trace && iter->trace->print_line) {
3180 		ret = iter->trace->print_line(iter);
3181 		if (ret != TRACE_TYPE_UNHANDLED)
3182 			return ret;
3183 	}
3184 
3185 	if (iter->ent->type == TRACE_BPUTS &&
3186 			trace_flags & TRACE_ITER_PRINTK &&
3187 			trace_flags & TRACE_ITER_PRINTK_MSGONLY)
3188 		return trace_print_bputs_msg_only(iter);
3189 
3190 	if (iter->ent->type == TRACE_BPRINT &&
3191 			trace_flags & TRACE_ITER_PRINTK &&
3192 			trace_flags & TRACE_ITER_PRINTK_MSGONLY)
3193 		return trace_print_bprintk_msg_only(iter);
3194 
3195 	if (iter->ent->type == TRACE_PRINT &&
3196 			trace_flags & TRACE_ITER_PRINTK &&
3197 			trace_flags & TRACE_ITER_PRINTK_MSGONLY)
3198 		return trace_print_printk_msg_only(iter);
3199 
3200 	if (trace_flags & TRACE_ITER_BIN)
3201 		return print_bin_fmt(iter);
3202 
3203 	if (trace_flags & TRACE_ITER_HEX)
3204 		return print_hex_fmt(iter);
3205 
3206 	if (trace_flags & TRACE_ITER_RAW)
3207 		return print_raw_fmt(iter);
3208 
3209 	return print_trace_fmt(iter);
3210 }
3211 
3212 void trace_latency_header(struct seq_file *m)
3213 {
3214 	struct trace_iterator *iter = m->private;
3215 	struct trace_array *tr = iter->tr;
3216 
3217 	/* print nothing if the buffers are empty */
3218 	if (trace_empty(iter))
3219 		return;
3220 
3221 	if (iter->iter_flags & TRACE_FILE_LAT_FMT)
3222 		print_trace_header(m, iter);
3223 
3224 	if (!(tr->trace_flags & TRACE_ITER_VERBOSE))
3225 		print_lat_help_header(m);
3226 }
3227 
3228 void trace_default_header(struct seq_file *m)
3229 {
3230 	struct trace_iterator *iter = m->private;
3231 	struct trace_array *tr = iter->tr;
3232 	unsigned long trace_flags = tr->trace_flags;
3233 
3234 	if (!(trace_flags & TRACE_ITER_CONTEXT_INFO))
3235 		return;
3236 
3237 	if (iter->iter_flags & TRACE_FILE_LAT_FMT) {
3238 		/* print nothing if the buffers are empty */
3239 		if (trace_empty(iter))
3240 			return;
3241 		print_trace_header(m, iter);
3242 		if (!(trace_flags & TRACE_ITER_VERBOSE))
3243 			print_lat_help_header(m);
3244 	} else {
3245 		if (!(trace_flags & TRACE_ITER_VERBOSE)) {
3246 			if (trace_flags & TRACE_ITER_IRQ_INFO)
3247 				print_func_help_header_irq(iter->trace_buffer, m);
3248 			else
3249 				print_func_help_header(iter->trace_buffer, m);
3250 		}
3251 	}
3252 }
3253 
3254 static void test_ftrace_alive(struct seq_file *m)
3255 {
3256 	if (!ftrace_is_dead())
3257 		return;
3258 	seq_puts(m, "# WARNING: FUNCTION TRACING IS CORRUPTED\n"
3259 		    "#          MAY BE MISSING FUNCTION EVENTS\n");
3260 }
3261 
3262 #ifdef CONFIG_TRACER_MAX_TRACE
3263 static void show_snapshot_main_help(struct seq_file *m)
3264 {
3265 	seq_puts(m, "# echo 0 > snapshot : Clears and frees snapshot buffer\n"
3266 		    "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
3267 		    "#                      Takes a snapshot of the main buffer.\n"
3268 		    "# echo 2 > snapshot : Clears snapshot buffer (but does not allocate or free)\n"
3269 		    "#                      (Doesn't have to be '2' works with any number that\n"
3270 		    "#                       is not a '0' or '1')\n");
3271 }
3272 
3273 static void show_snapshot_percpu_help(struct seq_file *m)
3274 {
3275 	seq_puts(m, "# echo 0 > snapshot : Invalid for per_cpu snapshot file.\n");
3276 #ifdef CONFIG_RING_BUFFER_ALLOW_SWAP
3277 	seq_puts(m, "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
3278 		    "#                      Takes a snapshot of the main buffer for this cpu.\n");
3279 #else
3280 	seq_puts(m, "# echo 1 > snapshot : Not supported with this kernel.\n"
3281 		    "#                     Must use main snapshot file to allocate.\n");
3282 #endif
3283 	seq_puts(m, "# echo 2 > snapshot : Clears this cpu's snapshot buffer (but does not allocate)\n"
3284 		    "#                      (Doesn't have to be '2' works with any number that\n"
3285 		    "#                       is not a '0' or '1')\n");
3286 }
3287 
3288 static void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter)
3289 {
3290 	if (iter->tr->allocated_snapshot)
3291 		seq_puts(m, "#\n# * Snapshot is allocated *\n#\n");
3292 	else
3293 		seq_puts(m, "#\n# * Snapshot is freed *\n#\n");
3294 
3295 	seq_puts(m, "# Snapshot commands:\n");
3296 	if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
3297 		show_snapshot_main_help(m);
3298 	else
3299 		show_snapshot_percpu_help(m);
3300 }
3301 #else
3302 /* Should never be called */
3303 static inline void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter) { }
3304 #endif
3305 
3306 static int s_show(struct seq_file *m, void *v)
3307 {
3308 	struct trace_iterator *iter = v;
3309 	int ret;
3310 
3311 	if (iter->ent == NULL) {
3312 		if (iter->tr) {
3313 			seq_printf(m, "# tracer: %s\n", iter->trace->name);
3314 			seq_puts(m, "#\n");
3315 			test_ftrace_alive(m);
3316 		}
3317 		if (iter->snapshot && trace_empty(iter))
3318 			print_snapshot_help(m, iter);
3319 		else if (iter->trace && iter->trace->print_header)
3320 			iter->trace->print_header(m);
3321 		else
3322 			trace_default_header(m);
3323 
3324 	} else if (iter->leftover) {
3325 		/*
3326 		 * If we filled the seq_file buffer earlier, we
3327 		 * want to just show it now.
3328 		 */
3329 		ret = trace_print_seq(m, &iter->seq);
3330 
3331 		/* ret should this time be zero, but you never know */
3332 		iter->leftover = ret;
3333 
3334 	} else {
3335 		print_trace_line(iter);
3336 		ret = trace_print_seq(m, &iter->seq);
3337 		/*
3338 		 * If we overflow the seq_file buffer, then it will
3339 		 * ask us for this data again at start up.
3340 		 * Use that instead.
3341 		 *  ret is 0 if seq_file write succeeded.
3342 		 *        -1 otherwise.
3343 		 */
3344 		iter->leftover = ret;
3345 	}
3346 
3347 	return 0;
3348 }
3349 
3350 /*
3351  * Should be used after trace_array_get(), trace_types_lock
3352  * ensures that i_cdev was already initialized.
3353  */
3354 static inline int tracing_get_cpu(struct inode *inode)
3355 {
3356 	if (inode->i_cdev) /* See trace_create_cpu_file() */
3357 		return (long)inode->i_cdev - 1;
3358 	return RING_BUFFER_ALL_CPUS;
3359 }
3360 
3361 static const struct seq_operations tracer_seq_ops = {
3362 	.start		= s_start,
3363 	.next		= s_next,
3364 	.stop		= s_stop,
3365 	.show		= s_show,
3366 };
3367 
3368 static struct trace_iterator *
3369 __tracing_open(struct inode *inode, struct file *file, bool snapshot)
3370 {
3371 	struct trace_array *tr = inode->i_private;
3372 	struct trace_iterator *iter;
3373 	int cpu;
3374 
3375 	if (tracing_disabled)
3376 		return ERR_PTR(-ENODEV);
3377 
3378 	iter = __seq_open_private(file, &tracer_seq_ops, sizeof(*iter));
3379 	if (!iter)
3380 		return ERR_PTR(-ENOMEM);
3381 
3382 	iter->buffer_iter = kcalloc(nr_cpu_ids, sizeof(*iter->buffer_iter),
3383 				    GFP_KERNEL);
3384 	if (!iter->buffer_iter)
3385 		goto release;
3386 
3387 	/*
3388 	 * We make a copy of the current tracer to avoid concurrent
3389 	 * changes on it while we are reading.
3390 	 */
3391 	mutex_lock(&trace_types_lock);
3392 	iter->trace = kzalloc(sizeof(*iter->trace), GFP_KERNEL);
3393 	if (!iter->trace)
3394 		goto fail;
3395 
3396 	*iter->trace = *tr->current_trace;
3397 
3398 	if (!zalloc_cpumask_var(&iter->started, GFP_KERNEL))
3399 		goto fail;
3400 
3401 	iter->tr = tr;
3402 
3403 #ifdef CONFIG_TRACER_MAX_TRACE
3404 	/* Currently only the top directory has a snapshot */
3405 	if (tr->current_trace->print_max || snapshot)
3406 		iter->trace_buffer = &tr->max_buffer;
3407 	else
3408 #endif
3409 		iter->trace_buffer = &tr->trace_buffer;
3410 	iter->snapshot = snapshot;
3411 	iter->pos = -1;
3412 	iter->cpu_file = tracing_get_cpu(inode);
3413 	mutex_init(&iter->mutex);
3414 
3415 	/* Notify the tracer early; before we stop tracing. */
3416 	if (iter->trace && iter->trace->open)
3417 		iter->trace->open(iter);
3418 
3419 	/* Annotate start of buffers if we had overruns */
3420 	if (ring_buffer_overruns(iter->trace_buffer->buffer))
3421 		iter->iter_flags |= TRACE_FILE_ANNOTATE;
3422 
3423 	/* Output in nanoseconds only if we are using a clock in nanoseconds. */
3424 	if (trace_clocks[tr->clock_id].in_ns)
3425 		iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
3426 
3427 	/* stop the trace while dumping if we are not opening "snapshot" */
3428 	if (!iter->snapshot)
3429 		tracing_stop_tr(tr);
3430 
3431 	if (iter->cpu_file == RING_BUFFER_ALL_CPUS) {
3432 		for_each_tracing_cpu(cpu) {
3433 			iter->buffer_iter[cpu] =
3434 				ring_buffer_read_prepare(iter->trace_buffer->buffer, cpu);
3435 		}
3436 		ring_buffer_read_prepare_sync();
3437 		for_each_tracing_cpu(cpu) {
3438 			ring_buffer_read_start(iter->buffer_iter[cpu]);
3439 			tracing_iter_reset(iter, cpu);
3440 		}
3441 	} else {
3442 		cpu = iter->cpu_file;
3443 		iter->buffer_iter[cpu] =
3444 			ring_buffer_read_prepare(iter->trace_buffer->buffer, cpu);
3445 		ring_buffer_read_prepare_sync();
3446 		ring_buffer_read_start(iter->buffer_iter[cpu]);
3447 		tracing_iter_reset(iter, cpu);
3448 	}
3449 
3450 	mutex_unlock(&trace_types_lock);
3451 
3452 	return iter;
3453 
3454  fail:
3455 	mutex_unlock(&trace_types_lock);
3456 	kfree(iter->trace);
3457 	kfree(iter->buffer_iter);
3458 release:
3459 	seq_release_private(inode, file);
3460 	return ERR_PTR(-ENOMEM);
3461 }
3462 
3463 int tracing_open_generic(struct inode *inode, struct file *filp)
3464 {
3465 	if (tracing_disabled)
3466 		return -ENODEV;
3467 
3468 	filp->private_data = inode->i_private;
3469 	return 0;
3470 }
3471 
3472 bool tracing_is_disabled(void)
3473 {
3474 	return (tracing_disabled) ? true: false;
3475 }
3476 
3477 /*
3478  * Open and update trace_array ref count.
3479  * Must have the current trace_array passed to it.
3480  */
3481 static int tracing_open_generic_tr(struct inode *inode, struct file *filp)
3482 {
3483 	struct trace_array *tr = inode->i_private;
3484 
3485 	if (tracing_disabled)
3486 		return -ENODEV;
3487 
3488 	if (trace_array_get(tr) < 0)
3489 		return -ENODEV;
3490 
3491 	filp->private_data = inode->i_private;
3492 
3493 	return 0;
3494 }
3495 
3496 static int tracing_release(struct inode *inode, struct file *file)
3497 {
3498 	struct trace_array *tr = inode->i_private;
3499 	struct seq_file *m = file->private_data;
3500 	struct trace_iterator *iter;
3501 	int cpu;
3502 
3503 	if (!(file->f_mode & FMODE_READ)) {
3504 		trace_array_put(tr);
3505 		return 0;
3506 	}
3507 
3508 	/* Writes do not use seq_file */
3509 	iter = m->private;
3510 	mutex_lock(&trace_types_lock);
3511 
3512 	for_each_tracing_cpu(cpu) {
3513 		if (iter->buffer_iter[cpu])
3514 			ring_buffer_read_finish(iter->buffer_iter[cpu]);
3515 	}
3516 
3517 	if (iter->trace && iter->trace->close)
3518 		iter->trace->close(iter);
3519 
3520 	if (!iter->snapshot)
3521 		/* reenable tracing if it was previously enabled */
3522 		tracing_start_tr(tr);
3523 
3524 	__trace_array_put(tr);
3525 
3526 	mutex_unlock(&trace_types_lock);
3527 
3528 	mutex_destroy(&iter->mutex);
3529 	free_cpumask_var(iter->started);
3530 	kfree(iter->trace);
3531 	kfree(iter->buffer_iter);
3532 	seq_release_private(inode, file);
3533 
3534 	return 0;
3535 }
3536 
3537 static int tracing_release_generic_tr(struct inode *inode, struct file *file)
3538 {
3539 	struct trace_array *tr = inode->i_private;
3540 
3541 	trace_array_put(tr);
3542 	return 0;
3543 }
3544 
3545 static int tracing_single_release_tr(struct inode *inode, struct file *file)
3546 {
3547 	struct trace_array *tr = inode->i_private;
3548 
3549 	trace_array_put(tr);
3550 
3551 	return single_release(inode, file);
3552 }
3553 
3554 static int tracing_open(struct inode *inode, struct file *file)
3555 {
3556 	struct trace_array *tr = inode->i_private;
3557 	struct trace_iterator *iter;
3558 	int ret = 0;
3559 
3560 	if (trace_array_get(tr) < 0)
3561 		return -ENODEV;
3562 
3563 	/* If this file was open for write, then erase contents */
3564 	if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC)) {
3565 		int cpu = tracing_get_cpu(inode);
3566 
3567 		if (cpu == RING_BUFFER_ALL_CPUS)
3568 			tracing_reset_online_cpus(&tr->trace_buffer);
3569 		else
3570 			tracing_reset(&tr->trace_buffer, cpu);
3571 	}
3572 
3573 	if (file->f_mode & FMODE_READ) {
3574 		iter = __tracing_open(inode, file, false);
3575 		if (IS_ERR(iter))
3576 			ret = PTR_ERR(iter);
3577 		else if (tr->trace_flags & TRACE_ITER_LATENCY_FMT)
3578 			iter->iter_flags |= TRACE_FILE_LAT_FMT;
3579 	}
3580 
3581 	if (ret < 0)
3582 		trace_array_put(tr);
3583 
3584 	return ret;
3585 }
3586 
3587 /*
3588  * Some tracers are not suitable for instance buffers.
3589  * A tracer is always available for the global array (toplevel)
3590  * or if it explicitly states that it is.
3591  */
3592 static bool
3593 trace_ok_for_array(struct tracer *t, struct trace_array *tr)
3594 {
3595 	return (tr->flags & TRACE_ARRAY_FL_GLOBAL) || t->allow_instances;
3596 }
3597 
3598 /* Find the next tracer that this trace array may use */
3599 static struct tracer *
3600 get_tracer_for_array(struct trace_array *tr, struct tracer *t)
3601 {
3602 	while (t && !trace_ok_for_array(t, tr))
3603 		t = t->next;
3604 
3605 	return t;
3606 }
3607 
3608 static void *
3609 t_next(struct seq_file *m, void *v, loff_t *pos)
3610 {
3611 	struct trace_array *tr = m->private;
3612 	struct tracer *t = v;
3613 
3614 	(*pos)++;
3615 
3616 	if (t)
3617 		t = get_tracer_for_array(tr, t->next);
3618 
3619 	return t;
3620 }
3621 
3622 static void *t_start(struct seq_file *m, loff_t *pos)
3623 {
3624 	struct trace_array *tr = m->private;
3625 	struct tracer *t;
3626 	loff_t l = 0;
3627 
3628 	mutex_lock(&trace_types_lock);
3629 
3630 	t = get_tracer_for_array(tr, trace_types);
3631 	for (; t && l < *pos; t = t_next(m, t, &l))
3632 			;
3633 
3634 	return t;
3635 }
3636 
3637 static void t_stop(struct seq_file *m, void *p)
3638 {
3639 	mutex_unlock(&trace_types_lock);
3640 }
3641 
3642 static int t_show(struct seq_file *m, void *v)
3643 {
3644 	struct tracer *t = v;
3645 
3646 	if (!t)
3647 		return 0;
3648 
3649 	seq_puts(m, t->name);
3650 	if (t->next)
3651 		seq_putc(m, ' ');
3652 	else
3653 		seq_putc(m, '\n');
3654 
3655 	return 0;
3656 }
3657 
3658 static const struct seq_operations show_traces_seq_ops = {
3659 	.start		= t_start,
3660 	.next		= t_next,
3661 	.stop		= t_stop,
3662 	.show		= t_show,
3663 };
3664 
3665 static int show_traces_open(struct inode *inode, struct file *file)
3666 {
3667 	struct trace_array *tr = inode->i_private;
3668 	struct seq_file *m;
3669 	int ret;
3670 
3671 	if (tracing_disabled)
3672 		return -ENODEV;
3673 
3674 	ret = seq_open(file, &show_traces_seq_ops);
3675 	if (ret)
3676 		return ret;
3677 
3678 	m = file->private_data;
3679 	m->private = tr;
3680 
3681 	return 0;
3682 }
3683 
3684 static ssize_t
3685 tracing_write_stub(struct file *filp, const char __user *ubuf,
3686 		   size_t count, loff_t *ppos)
3687 {
3688 	return count;
3689 }
3690 
3691 loff_t tracing_lseek(struct file *file, loff_t offset, int whence)
3692 {
3693 	int ret;
3694 
3695 	if (file->f_mode & FMODE_READ)
3696 		ret = seq_lseek(file, offset, whence);
3697 	else
3698 		file->f_pos = ret = 0;
3699 
3700 	return ret;
3701 }
3702 
3703 static const struct file_operations tracing_fops = {
3704 	.open		= tracing_open,
3705 	.read		= seq_read,
3706 	.write		= tracing_write_stub,
3707 	.llseek		= tracing_lseek,
3708 	.release	= tracing_release,
3709 };
3710 
3711 static const struct file_operations show_traces_fops = {
3712 	.open		= show_traces_open,
3713 	.read		= seq_read,
3714 	.release	= seq_release,
3715 	.llseek		= seq_lseek,
3716 };
3717 
3718 /*
3719  * The tracer itself will not take this lock, but still we want
3720  * to provide a consistent cpumask to user-space:
3721  */
3722 static DEFINE_MUTEX(tracing_cpumask_update_lock);
3723 
3724 /*
3725  * Temporary storage for the character representation of the
3726  * CPU bitmask (and one more byte for the newline):
3727  */
3728 static char mask_str[NR_CPUS + 1];
3729 
3730 static ssize_t
3731 tracing_cpumask_read(struct file *filp, char __user *ubuf,
3732 		     size_t count, loff_t *ppos)
3733 {
3734 	struct trace_array *tr = file_inode(filp)->i_private;
3735 	int len;
3736 
3737 	mutex_lock(&tracing_cpumask_update_lock);
3738 
3739 	len = snprintf(mask_str, count, "%*pb\n",
3740 		       cpumask_pr_args(tr->tracing_cpumask));
3741 	if (len >= count) {
3742 		count = -EINVAL;
3743 		goto out_err;
3744 	}
3745 	count = simple_read_from_buffer(ubuf, count, ppos, mask_str, NR_CPUS+1);
3746 
3747 out_err:
3748 	mutex_unlock(&tracing_cpumask_update_lock);
3749 
3750 	return count;
3751 }
3752 
3753 static ssize_t
3754 tracing_cpumask_write(struct file *filp, const char __user *ubuf,
3755 		      size_t count, loff_t *ppos)
3756 {
3757 	struct trace_array *tr = file_inode(filp)->i_private;
3758 	cpumask_var_t tracing_cpumask_new;
3759 	int err, cpu;
3760 
3761 	if (!alloc_cpumask_var(&tracing_cpumask_new, GFP_KERNEL))
3762 		return -ENOMEM;
3763 
3764 	err = cpumask_parse_user(ubuf, count, tracing_cpumask_new);
3765 	if (err)
3766 		goto err_unlock;
3767 
3768 	mutex_lock(&tracing_cpumask_update_lock);
3769 
3770 	local_irq_disable();
3771 	arch_spin_lock(&tr->max_lock);
3772 	for_each_tracing_cpu(cpu) {
3773 		/*
3774 		 * Increase/decrease the disabled counter if we are
3775 		 * about to flip a bit in the cpumask:
3776 		 */
3777 		if (cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
3778 				!cpumask_test_cpu(cpu, tracing_cpumask_new)) {
3779 			atomic_inc(&per_cpu_ptr(tr->trace_buffer.data, cpu)->disabled);
3780 			ring_buffer_record_disable_cpu(tr->trace_buffer.buffer, cpu);
3781 		}
3782 		if (!cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
3783 				cpumask_test_cpu(cpu, tracing_cpumask_new)) {
3784 			atomic_dec(&per_cpu_ptr(tr->trace_buffer.data, cpu)->disabled);
3785 			ring_buffer_record_enable_cpu(tr->trace_buffer.buffer, cpu);
3786 		}
3787 	}
3788 	arch_spin_unlock(&tr->max_lock);
3789 	local_irq_enable();
3790 
3791 	cpumask_copy(tr->tracing_cpumask, tracing_cpumask_new);
3792 
3793 	mutex_unlock(&tracing_cpumask_update_lock);
3794 	free_cpumask_var(tracing_cpumask_new);
3795 
3796 	return count;
3797 
3798 err_unlock:
3799 	free_cpumask_var(tracing_cpumask_new);
3800 
3801 	return err;
3802 }
3803 
3804 static const struct file_operations tracing_cpumask_fops = {
3805 	.open		= tracing_open_generic_tr,
3806 	.read		= tracing_cpumask_read,
3807 	.write		= tracing_cpumask_write,
3808 	.release	= tracing_release_generic_tr,
3809 	.llseek		= generic_file_llseek,
3810 };
3811 
3812 static int tracing_trace_options_show(struct seq_file *m, void *v)
3813 {
3814 	struct tracer_opt *trace_opts;
3815 	struct trace_array *tr = m->private;
3816 	u32 tracer_flags;
3817 	int i;
3818 
3819 	mutex_lock(&trace_types_lock);
3820 	tracer_flags = tr->current_trace->flags->val;
3821 	trace_opts = tr->current_trace->flags->opts;
3822 
3823 	for (i = 0; trace_options[i]; i++) {
3824 		if (tr->trace_flags & (1 << i))
3825 			seq_printf(m, "%s\n", trace_options[i]);
3826 		else
3827 			seq_printf(m, "no%s\n", trace_options[i]);
3828 	}
3829 
3830 	for (i = 0; trace_opts[i].name; i++) {
3831 		if (tracer_flags & trace_opts[i].bit)
3832 			seq_printf(m, "%s\n", trace_opts[i].name);
3833 		else
3834 			seq_printf(m, "no%s\n", trace_opts[i].name);
3835 	}
3836 	mutex_unlock(&trace_types_lock);
3837 
3838 	return 0;
3839 }
3840 
3841 static int __set_tracer_option(struct trace_array *tr,
3842 			       struct tracer_flags *tracer_flags,
3843 			       struct tracer_opt *opts, int neg)
3844 {
3845 	struct tracer *trace = tracer_flags->trace;
3846 	int ret;
3847 
3848 	ret = trace->set_flag(tr, tracer_flags->val, opts->bit, !neg);
3849 	if (ret)
3850 		return ret;
3851 
3852 	if (neg)
3853 		tracer_flags->val &= ~opts->bit;
3854 	else
3855 		tracer_flags->val |= opts->bit;
3856 	return 0;
3857 }
3858 
3859 /* Try to assign a tracer specific option */
3860 static int set_tracer_option(struct trace_array *tr, char *cmp, int neg)
3861 {
3862 	struct tracer *trace = tr->current_trace;
3863 	struct tracer_flags *tracer_flags = trace->flags;
3864 	struct tracer_opt *opts = NULL;
3865 	int i;
3866 
3867 	for (i = 0; tracer_flags->opts[i].name; i++) {
3868 		opts = &tracer_flags->opts[i];
3869 
3870 		if (strcmp(cmp, opts->name) == 0)
3871 			return __set_tracer_option(tr, trace->flags, opts, neg);
3872 	}
3873 
3874 	return -EINVAL;
3875 }
3876 
3877 /* Some tracers require overwrite to stay enabled */
3878 int trace_keep_overwrite(struct tracer *tracer, u32 mask, int set)
3879 {
3880 	if (tracer->enabled && (mask & TRACE_ITER_OVERWRITE) && !set)
3881 		return -1;
3882 
3883 	return 0;
3884 }
3885 
3886 int set_tracer_flag(struct trace_array *tr, unsigned int mask, int enabled)
3887 {
3888 	/* do nothing if flag is already set */
3889 	if (!!(tr->trace_flags & mask) == !!enabled)
3890 		return 0;
3891 
3892 	/* Give the tracer a chance to approve the change */
3893 	if (tr->current_trace->flag_changed)
3894 		if (tr->current_trace->flag_changed(tr, mask, !!enabled))
3895 			return -EINVAL;
3896 
3897 	if (enabled)
3898 		tr->trace_flags |= mask;
3899 	else
3900 		tr->trace_flags &= ~mask;
3901 
3902 	if (mask == TRACE_ITER_RECORD_CMD)
3903 		trace_event_enable_cmd_record(enabled);
3904 
3905 	if (mask == TRACE_ITER_EVENT_FORK)
3906 		trace_event_follow_fork(tr, enabled);
3907 
3908 	if (mask == TRACE_ITER_OVERWRITE) {
3909 		ring_buffer_change_overwrite(tr->trace_buffer.buffer, enabled);
3910 #ifdef CONFIG_TRACER_MAX_TRACE
3911 		ring_buffer_change_overwrite(tr->max_buffer.buffer, enabled);
3912 #endif
3913 	}
3914 
3915 	if (mask == TRACE_ITER_PRINTK) {
3916 		trace_printk_start_stop_comm(enabled);
3917 		trace_printk_control(enabled);
3918 	}
3919 
3920 	return 0;
3921 }
3922 
3923 static int trace_set_options(struct trace_array *tr, char *option)
3924 {
3925 	char *cmp;
3926 	int neg = 0;
3927 	int ret = -ENODEV;
3928 	int i;
3929 	size_t orig_len = strlen(option);
3930 
3931 	cmp = strstrip(option);
3932 
3933 	if (strncmp(cmp, "no", 2) == 0) {
3934 		neg = 1;
3935 		cmp += 2;
3936 	}
3937 
3938 	mutex_lock(&trace_types_lock);
3939 
3940 	for (i = 0; trace_options[i]; i++) {
3941 		if (strcmp(cmp, trace_options[i]) == 0) {
3942 			ret = set_tracer_flag(tr, 1 << i, !neg);
3943 			break;
3944 		}
3945 	}
3946 
3947 	/* If no option could be set, test the specific tracer options */
3948 	if (!trace_options[i])
3949 		ret = set_tracer_option(tr, cmp, neg);
3950 
3951 	mutex_unlock(&trace_types_lock);
3952 
3953 	/*
3954 	 * If the first trailing whitespace is replaced with '\0' by strstrip,
3955 	 * turn it back into a space.
3956 	 */
3957 	if (orig_len > strlen(option))
3958 		option[strlen(option)] = ' ';
3959 
3960 	return ret;
3961 }
3962 
3963 static void __init apply_trace_boot_options(void)
3964 {
3965 	char *buf = trace_boot_options_buf;
3966 	char *option;
3967 
3968 	while (true) {
3969 		option = strsep(&buf, ",");
3970 
3971 		if (!option)
3972 			break;
3973 
3974 		if (*option)
3975 			trace_set_options(&global_trace, option);
3976 
3977 		/* Put back the comma to allow this to be called again */
3978 		if (buf)
3979 			*(buf - 1) = ',';
3980 	}
3981 }
3982 
3983 static ssize_t
3984 tracing_trace_options_write(struct file *filp, const char __user *ubuf,
3985 			size_t cnt, loff_t *ppos)
3986 {
3987 	struct seq_file *m = filp->private_data;
3988 	struct trace_array *tr = m->private;
3989 	char buf[64];
3990 	int ret;
3991 
3992 	if (cnt >= sizeof(buf))
3993 		return -EINVAL;
3994 
3995 	if (copy_from_user(buf, ubuf, cnt))
3996 		return -EFAULT;
3997 
3998 	buf[cnt] = 0;
3999 
4000 	ret = trace_set_options(tr, buf);
4001 	if (ret < 0)
4002 		return ret;
4003 
4004 	*ppos += cnt;
4005 
4006 	return cnt;
4007 }
4008 
4009 static int tracing_trace_options_open(struct inode *inode, struct file *file)
4010 {
4011 	struct trace_array *tr = inode->i_private;
4012 	int ret;
4013 
4014 	if (tracing_disabled)
4015 		return -ENODEV;
4016 
4017 	if (trace_array_get(tr) < 0)
4018 		return -ENODEV;
4019 
4020 	ret = single_open(file, tracing_trace_options_show, inode->i_private);
4021 	if (ret < 0)
4022 		trace_array_put(tr);
4023 
4024 	return ret;
4025 }
4026 
4027 static const struct file_operations tracing_iter_fops = {
4028 	.open		= tracing_trace_options_open,
4029 	.read		= seq_read,
4030 	.llseek		= seq_lseek,
4031 	.release	= tracing_single_release_tr,
4032 	.write		= tracing_trace_options_write,
4033 };
4034 
4035 static const char readme_msg[] =
4036 	"tracing mini-HOWTO:\n\n"
4037 	"# echo 0 > tracing_on : quick way to disable tracing\n"
4038 	"# echo 1 > tracing_on : quick way to re-enable tracing\n\n"
4039 	" Important files:\n"
4040 	"  trace\t\t\t- The static contents of the buffer\n"
4041 	"\t\t\t  To clear the buffer write into this file: echo > trace\n"
4042 	"  trace_pipe\t\t- A consuming read to see the contents of the buffer\n"
4043 	"  current_tracer\t- function and latency tracers\n"
4044 	"  available_tracers\t- list of configured tracers for current_tracer\n"
4045 	"  buffer_size_kb\t- view and modify size of per cpu buffer\n"
4046 	"  buffer_total_size_kb  - view total size of all cpu buffers\n\n"
4047 	"  trace_clock\t\t-change the clock used to order events\n"
4048 	"       local:   Per cpu clock but may not be synced across CPUs\n"
4049 	"      global:   Synced across CPUs but slows tracing down.\n"
4050 	"     counter:   Not a clock, but just an increment\n"
4051 	"      uptime:   Jiffy counter from time of boot\n"
4052 	"        perf:   Same clock that perf events use\n"
4053 #ifdef CONFIG_X86_64
4054 	"     x86-tsc:   TSC cycle counter\n"
4055 #endif
4056 	"\n  trace_marker\t\t- Writes into this file writes into the kernel buffer\n"
4057 	"  tracing_cpumask\t- Limit which CPUs to trace\n"
4058 	"  instances\t\t- Make sub-buffers with: mkdir instances/foo\n"
4059 	"\t\t\t  Remove sub-buffer with rmdir\n"
4060 	"  trace_options\t\t- Set format or modify how tracing happens\n"
4061 	"\t\t\t  Disable an option by adding a suffix 'no' to the\n"
4062 	"\t\t\t  option name\n"
4063 	"  saved_cmdlines_size\t- echo command number in here to store comm-pid list\n"
4064 #ifdef CONFIG_DYNAMIC_FTRACE
4065 	"\n  available_filter_functions - list of functions that can be filtered on\n"
4066 	"  set_ftrace_filter\t- echo function name in here to only trace these\n"
4067 	"\t\t\t  functions\n"
4068 	"\t     accepts: func_full_name, *func_end, func_begin*, *func_middle*\n"
4069 	"\t     modules: Can select a group via module\n"
4070 	"\t      Format: :mod:<module-name>\n"
4071 	"\t     example: echo :mod:ext3 > set_ftrace_filter\n"
4072 	"\t    triggers: a command to perform when function is hit\n"
4073 	"\t      Format: <function>:<trigger>[:count]\n"
4074 	"\t     trigger: traceon, traceoff\n"
4075 	"\t\t      enable_event:<system>:<event>\n"
4076 	"\t\t      disable_event:<system>:<event>\n"
4077 #ifdef CONFIG_STACKTRACE
4078 	"\t\t      stacktrace\n"
4079 #endif
4080 #ifdef CONFIG_TRACER_SNAPSHOT
4081 	"\t\t      snapshot\n"
4082 #endif
4083 	"\t\t      dump\n"
4084 	"\t\t      cpudump\n"
4085 	"\t     example: echo do_fault:traceoff > set_ftrace_filter\n"
4086 	"\t              echo do_trap:traceoff:3 > set_ftrace_filter\n"
4087 	"\t     The first one will disable tracing every time do_fault is hit\n"
4088 	"\t     The second will disable tracing at most 3 times when do_trap is hit\n"
4089 	"\t       The first time do trap is hit and it disables tracing, the\n"
4090 	"\t       counter will decrement to 2. If tracing is already disabled,\n"
4091 	"\t       the counter will not decrement. It only decrements when the\n"
4092 	"\t       trigger did work\n"
4093 	"\t     To remove trigger without count:\n"
4094 	"\t       echo '!<function>:<trigger> > set_ftrace_filter\n"
4095 	"\t     To remove trigger with a count:\n"
4096 	"\t       echo '!<function>:<trigger>:0 > set_ftrace_filter\n"
4097 	"  set_ftrace_notrace\t- echo function name in here to never trace.\n"
4098 	"\t    accepts: func_full_name, *func_end, func_begin*, *func_middle*\n"
4099 	"\t    modules: Can select a group via module command :mod:\n"
4100 	"\t    Does not accept triggers\n"
4101 #endif /* CONFIG_DYNAMIC_FTRACE */
4102 #ifdef CONFIG_FUNCTION_TRACER
4103 	"  set_ftrace_pid\t- Write pid(s) to only function trace those pids\n"
4104 	"\t\t    (function)\n"
4105 #endif
4106 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
4107 	"  set_graph_function\t- Trace the nested calls of a function (function_graph)\n"
4108 	"  set_graph_notrace\t- Do not trace the nested calls of a function (function_graph)\n"
4109 	"  max_graph_depth\t- Trace a limited depth of nested calls (0 is unlimited)\n"
4110 #endif
4111 #ifdef CONFIG_TRACER_SNAPSHOT
4112 	"\n  snapshot\t\t- Like 'trace' but shows the content of the static\n"
4113 	"\t\t\t  snapshot buffer. Read the contents for more\n"
4114 	"\t\t\t  information\n"
4115 #endif
4116 #ifdef CONFIG_STACK_TRACER
4117 	"  stack_trace\t\t- Shows the max stack trace when active\n"
4118 	"  stack_max_size\t- Shows current max stack size that was traced\n"
4119 	"\t\t\t  Write into this file to reset the max size (trigger a\n"
4120 	"\t\t\t  new trace)\n"
4121 #ifdef CONFIG_DYNAMIC_FTRACE
4122 	"  stack_trace_filter\t- Like set_ftrace_filter but limits what stack_trace\n"
4123 	"\t\t\t  traces\n"
4124 #endif
4125 #endif /* CONFIG_STACK_TRACER */
4126 	"  events/\t\t- Directory containing all trace event subsystems:\n"
4127 	"      enable\t\t- Write 0/1 to enable/disable tracing of all events\n"
4128 	"  events/<system>/\t- Directory containing all trace events for <system>:\n"
4129 	"      enable\t\t- Write 0/1 to enable/disable tracing of all <system>\n"
4130 	"\t\t\t  events\n"
4131 	"      filter\t\t- If set, only events passing filter are traced\n"
4132 	"  events/<system>/<event>/\t- Directory containing control files for\n"
4133 	"\t\t\t  <event>:\n"
4134 	"      enable\t\t- Write 0/1 to enable/disable tracing of <event>\n"
4135 	"      filter\t\t- If set, only events passing filter are traced\n"
4136 	"      trigger\t\t- If set, a command to perform when event is hit\n"
4137 	"\t    Format: <trigger>[:count][if <filter>]\n"
4138 	"\t   trigger: traceon, traceoff\n"
4139 	"\t            enable_event:<system>:<event>\n"
4140 	"\t            disable_event:<system>:<event>\n"
4141 #ifdef CONFIG_HIST_TRIGGERS
4142 	"\t            enable_hist:<system>:<event>\n"
4143 	"\t            disable_hist:<system>:<event>\n"
4144 #endif
4145 #ifdef CONFIG_STACKTRACE
4146 	"\t\t    stacktrace\n"
4147 #endif
4148 #ifdef CONFIG_TRACER_SNAPSHOT
4149 	"\t\t    snapshot\n"
4150 #endif
4151 #ifdef CONFIG_HIST_TRIGGERS
4152 	"\t\t    hist (see below)\n"
4153 #endif
4154 	"\t   example: echo traceoff > events/block/block_unplug/trigger\n"
4155 	"\t            echo traceoff:3 > events/block/block_unplug/trigger\n"
4156 	"\t            echo 'enable_event:kmem:kmalloc:3 if nr_rq > 1' > \\\n"
4157 	"\t                  events/block/block_unplug/trigger\n"
4158 	"\t   The first disables tracing every time block_unplug is hit.\n"
4159 	"\t   The second disables tracing the first 3 times block_unplug is hit.\n"
4160 	"\t   The third enables the kmalloc event the first 3 times block_unplug\n"
4161 	"\t     is hit and has value of greater than 1 for the 'nr_rq' event field.\n"
4162 	"\t   Like function triggers, the counter is only decremented if it\n"
4163 	"\t    enabled or disabled tracing.\n"
4164 	"\t   To remove a trigger without a count:\n"
4165 	"\t     echo '!<trigger> > <system>/<event>/trigger\n"
4166 	"\t   To remove a trigger with a count:\n"
4167 	"\t     echo '!<trigger>:0 > <system>/<event>/trigger\n"
4168 	"\t   Filters can be ignored when removing a trigger.\n"
4169 #ifdef CONFIG_HIST_TRIGGERS
4170 	"      hist trigger\t- If set, event hits are aggregated into a hash table\n"
4171 	"\t    Format: hist:keys=<field1[,field2,...]>\n"
4172 	"\t            [:values=<field1[,field2,...]>]\n"
4173 	"\t            [:sort=<field1[,field2,...]>]\n"
4174 	"\t            [:size=#entries]\n"
4175 	"\t            [:pause][:continue][:clear]\n"
4176 	"\t            [:name=histname1]\n"
4177 	"\t            [if <filter>]\n\n"
4178 	"\t    When a matching event is hit, an entry is added to a hash\n"
4179 	"\t    table using the key(s) and value(s) named, and the value of a\n"
4180 	"\t    sum called 'hitcount' is incremented.  Keys and values\n"
4181 	"\t    correspond to fields in the event's format description.  Keys\n"
4182 	"\t    can be any field, or the special string 'stacktrace'.\n"
4183 	"\t    Compound keys consisting of up to two fields can be specified\n"
4184 	"\t    by the 'keys' keyword.  Values must correspond to numeric\n"
4185 	"\t    fields.  Sort keys consisting of up to two fields can be\n"
4186 	"\t    specified using the 'sort' keyword.  The sort direction can\n"
4187 	"\t    be modified by appending '.descending' or '.ascending' to a\n"
4188 	"\t    sort field.  The 'size' parameter can be used to specify more\n"
4189 	"\t    or fewer than the default 2048 entries for the hashtable size.\n"
4190 	"\t    If a hist trigger is given a name using the 'name' parameter,\n"
4191 	"\t    its histogram data will be shared with other triggers of the\n"
4192 	"\t    same name, and trigger hits will update this common data.\n\n"
4193 	"\t    Reading the 'hist' file for the event will dump the hash\n"
4194 	"\t    table in its entirety to stdout.  If there are multiple hist\n"
4195 	"\t    triggers attached to an event, there will be a table for each\n"
4196 	"\t    trigger in the output.  The table displayed for a named\n"
4197 	"\t    trigger will be the same as any other instance having the\n"
4198 	"\t    same name.  The default format used to display a given field\n"
4199 	"\t    can be modified by appending any of the following modifiers\n"
4200 	"\t    to the field name, as applicable:\n\n"
4201 	"\t            .hex        display a number as a hex value\n"
4202 	"\t            .sym        display an address as a symbol\n"
4203 	"\t            .sym-offset display an address as a symbol and offset\n"
4204 	"\t            .execname   display a common_pid as a program name\n"
4205 	"\t            .syscall    display a syscall id as a syscall name\n\n"
4206 	"\t            .log2       display log2 value rather than raw number\n\n"
4207 	"\t    The 'pause' parameter can be used to pause an existing hist\n"
4208 	"\t    trigger or to start a hist trigger but not log any events\n"
4209 	"\t    until told to do so.  'continue' can be used to start or\n"
4210 	"\t    restart a paused hist trigger.\n\n"
4211 	"\t    The 'clear' parameter will clear the contents of a running\n"
4212 	"\t    hist trigger and leave its current paused/active state\n"
4213 	"\t    unchanged.\n\n"
4214 	"\t    The enable_hist and disable_hist triggers can be used to\n"
4215 	"\t    have one event conditionally start and stop another event's\n"
4216 	"\t    already-attached hist trigger.  The syntax is analagous to\n"
4217 	"\t    the enable_event and disable_event triggers.\n"
4218 #endif
4219 ;
4220 
4221 static ssize_t
4222 tracing_readme_read(struct file *filp, char __user *ubuf,
4223 		       size_t cnt, loff_t *ppos)
4224 {
4225 	return simple_read_from_buffer(ubuf, cnt, ppos,
4226 					readme_msg, strlen(readme_msg));
4227 }
4228 
4229 static const struct file_operations tracing_readme_fops = {
4230 	.open		= tracing_open_generic,
4231 	.read		= tracing_readme_read,
4232 	.llseek		= generic_file_llseek,
4233 };
4234 
4235 static void *saved_cmdlines_next(struct seq_file *m, void *v, loff_t *pos)
4236 {
4237 	unsigned int *ptr = v;
4238 
4239 	if (*pos || m->count)
4240 		ptr++;
4241 
4242 	(*pos)++;
4243 
4244 	for (; ptr < &savedcmd->map_cmdline_to_pid[savedcmd->cmdline_num];
4245 	     ptr++) {
4246 		if (*ptr == -1 || *ptr == NO_CMDLINE_MAP)
4247 			continue;
4248 
4249 		return ptr;
4250 	}
4251 
4252 	return NULL;
4253 }
4254 
4255 static void *saved_cmdlines_start(struct seq_file *m, loff_t *pos)
4256 {
4257 	void *v;
4258 	loff_t l = 0;
4259 
4260 	preempt_disable();
4261 	arch_spin_lock(&trace_cmdline_lock);
4262 
4263 	v = &savedcmd->map_cmdline_to_pid[0];
4264 	while (l <= *pos) {
4265 		v = saved_cmdlines_next(m, v, &l);
4266 		if (!v)
4267 			return NULL;
4268 	}
4269 
4270 	return v;
4271 }
4272 
4273 static void saved_cmdlines_stop(struct seq_file *m, void *v)
4274 {
4275 	arch_spin_unlock(&trace_cmdline_lock);
4276 	preempt_enable();
4277 }
4278 
4279 static int saved_cmdlines_show(struct seq_file *m, void *v)
4280 {
4281 	char buf[TASK_COMM_LEN];
4282 	unsigned int *pid = v;
4283 
4284 	__trace_find_cmdline(*pid, buf);
4285 	seq_printf(m, "%d %s\n", *pid, buf);
4286 	return 0;
4287 }
4288 
4289 static const struct seq_operations tracing_saved_cmdlines_seq_ops = {
4290 	.start		= saved_cmdlines_start,
4291 	.next		= saved_cmdlines_next,
4292 	.stop		= saved_cmdlines_stop,
4293 	.show		= saved_cmdlines_show,
4294 };
4295 
4296 static int tracing_saved_cmdlines_open(struct inode *inode, struct file *filp)
4297 {
4298 	if (tracing_disabled)
4299 		return -ENODEV;
4300 
4301 	return seq_open(filp, &tracing_saved_cmdlines_seq_ops);
4302 }
4303 
4304 static const struct file_operations tracing_saved_cmdlines_fops = {
4305 	.open		= tracing_saved_cmdlines_open,
4306 	.read		= seq_read,
4307 	.llseek		= seq_lseek,
4308 	.release	= seq_release,
4309 };
4310 
4311 static ssize_t
4312 tracing_saved_cmdlines_size_read(struct file *filp, char __user *ubuf,
4313 				 size_t cnt, loff_t *ppos)
4314 {
4315 	char buf[64];
4316 	int r;
4317 
4318 	arch_spin_lock(&trace_cmdline_lock);
4319 	r = scnprintf(buf, sizeof(buf), "%u\n", savedcmd->cmdline_num);
4320 	arch_spin_unlock(&trace_cmdline_lock);
4321 
4322 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
4323 }
4324 
4325 static void free_saved_cmdlines_buffer(struct saved_cmdlines_buffer *s)
4326 {
4327 	kfree(s->saved_cmdlines);
4328 	kfree(s->map_cmdline_to_pid);
4329 	kfree(s);
4330 }
4331 
4332 static int tracing_resize_saved_cmdlines(unsigned int val)
4333 {
4334 	struct saved_cmdlines_buffer *s, *savedcmd_temp;
4335 
4336 	s = kmalloc(sizeof(*s), GFP_KERNEL);
4337 	if (!s)
4338 		return -ENOMEM;
4339 
4340 	if (allocate_cmdlines_buffer(val, s) < 0) {
4341 		kfree(s);
4342 		return -ENOMEM;
4343 	}
4344 
4345 	arch_spin_lock(&trace_cmdline_lock);
4346 	savedcmd_temp = savedcmd;
4347 	savedcmd = s;
4348 	arch_spin_unlock(&trace_cmdline_lock);
4349 	free_saved_cmdlines_buffer(savedcmd_temp);
4350 
4351 	return 0;
4352 }
4353 
4354 static ssize_t
4355 tracing_saved_cmdlines_size_write(struct file *filp, const char __user *ubuf,
4356 				  size_t cnt, loff_t *ppos)
4357 {
4358 	unsigned long val;
4359 	int ret;
4360 
4361 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
4362 	if (ret)
4363 		return ret;
4364 
4365 	/* must have at least 1 entry or less than PID_MAX_DEFAULT */
4366 	if (!val || val > PID_MAX_DEFAULT)
4367 		return -EINVAL;
4368 
4369 	ret = tracing_resize_saved_cmdlines((unsigned int)val);
4370 	if (ret < 0)
4371 		return ret;
4372 
4373 	*ppos += cnt;
4374 
4375 	return cnt;
4376 }
4377 
4378 static const struct file_operations tracing_saved_cmdlines_size_fops = {
4379 	.open		= tracing_open_generic,
4380 	.read		= tracing_saved_cmdlines_size_read,
4381 	.write		= tracing_saved_cmdlines_size_write,
4382 };
4383 
4384 #ifdef CONFIG_TRACE_ENUM_MAP_FILE
4385 static union trace_enum_map_item *
4386 update_enum_map(union trace_enum_map_item *ptr)
4387 {
4388 	if (!ptr->map.enum_string) {
4389 		if (ptr->tail.next) {
4390 			ptr = ptr->tail.next;
4391 			/* Set ptr to the next real item (skip head) */
4392 			ptr++;
4393 		} else
4394 			return NULL;
4395 	}
4396 	return ptr;
4397 }
4398 
4399 static void *enum_map_next(struct seq_file *m, void *v, loff_t *pos)
4400 {
4401 	union trace_enum_map_item *ptr = v;
4402 
4403 	/*
4404 	 * Paranoid! If ptr points to end, we don't want to increment past it.
4405 	 * This really should never happen.
4406 	 */
4407 	ptr = update_enum_map(ptr);
4408 	if (WARN_ON_ONCE(!ptr))
4409 		return NULL;
4410 
4411 	ptr++;
4412 
4413 	(*pos)++;
4414 
4415 	ptr = update_enum_map(ptr);
4416 
4417 	return ptr;
4418 }
4419 
4420 static void *enum_map_start(struct seq_file *m, loff_t *pos)
4421 {
4422 	union trace_enum_map_item *v;
4423 	loff_t l = 0;
4424 
4425 	mutex_lock(&trace_enum_mutex);
4426 
4427 	v = trace_enum_maps;
4428 	if (v)
4429 		v++;
4430 
4431 	while (v && l < *pos) {
4432 		v = enum_map_next(m, v, &l);
4433 	}
4434 
4435 	return v;
4436 }
4437 
4438 static void enum_map_stop(struct seq_file *m, void *v)
4439 {
4440 	mutex_unlock(&trace_enum_mutex);
4441 }
4442 
4443 static int enum_map_show(struct seq_file *m, void *v)
4444 {
4445 	union trace_enum_map_item *ptr = v;
4446 
4447 	seq_printf(m, "%s %ld (%s)\n",
4448 		   ptr->map.enum_string, ptr->map.enum_value,
4449 		   ptr->map.system);
4450 
4451 	return 0;
4452 }
4453 
4454 static const struct seq_operations tracing_enum_map_seq_ops = {
4455 	.start		= enum_map_start,
4456 	.next		= enum_map_next,
4457 	.stop		= enum_map_stop,
4458 	.show		= enum_map_show,
4459 };
4460 
4461 static int tracing_enum_map_open(struct inode *inode, struct file *filp)
4462 {
4463 	if (tracing_disabled)
4464 		return -ENODEV;
4465 
4466 	return seq_open(filp, &tracing_enum_map_seq_ops);
4467 }
4468 
4469 static const struct file_operations tracing_enum_map_fops = {
4470 	.open		= tracing_enum_map_open,
4471 	.read		= seq_read,
4472 	.llseek		= seq_lseek,
4473 	.release	= seq_release,
4474 };
4475 
4476 static inline union trace_enum_map_item *
4477 trace_enum_jmp_to_tail(union trace_enum_map_item *ptr)
4478 {
4479 	/* Return tail of array given the head */
4480 	return ptr + ptr->head.length + 1;
4481 }
4482 
4483 static void
4484 trace_insert_enum_map_file(struct module *mod, struct trace_enum_map **start,
4485 			   int len)
4486 {
4487 	struct trace_enum_map **stop;
4488 	struct trace_enum_map **map;
4489 	union trace_enum_map_item *map_array;
4490 	union trace_enum_map_item *ptr;
4491 
4492 	stop = start + len;
4493 
4494 	/*
4495 	 * The trace_enum_maps contains the map plus a head and tail item,
4496 	 * where the head holds the module and length of array, and the
4497 	 * tail holds a pointer to the next list.
4498 	 */
4499 	map_array = kmalloc(sizeof(*map_array) * (len + 2), GFP_KERNEL);
4500 	if (!map_array) {
4501 		pr_warn("Unable to allocate trace enum mapping\n");
4502 		return;
4503 	}
4504 
4505 	mutex_lock(&trace_enum_mutex);
4506 
4507 	if (!trace_enum_maps)
4508 		trace_enum_maps = map_array;
4509 	else {
4510 		ptr = trace_enum_maps;
4511 		for (;;) {
4512 			ptr = trace_enum_jmp_to_tail(ptr);
4513 			if (!ptr->tail.next)
4514 				break;
4515 			ptr = ptr->tail.next;
4516 
4517 		}
4518 		ptr->tail.next = map_array;
4519 	}
4520 	map_array->head.mod = mod;
4521 	map_array->head.length = len;
4522 	map_array++;
4523 
4524 	for (map = start; (unsigned long)map < (unsigned long)stop; map++) {
4525 		map_array->map = **map;
4526 		map_array++;
4527 	}
4528 	memset(map_array, 0, sizeof(*map_array));
4529 
4530 	mutex_unlock(&trace_enum_mutex);
4531 }
4532 
4533 static void trace_create_enum_file(struct dentry *d_tracer)
4534 {
4535 	trace_create_file("enum_map", 0444, d_tracer,
4536 			  NULL, &tracing_enum_map_fops);
4537 }
4538 
4539 #else /* CONFIG_TRACE_ENUM_MAP_FILE */
4540 static inline void trace_create_enum_file(struct dentry *d_tracer) { }
4541 static inline void trace_insert_enum_map_file(struct module *mod,
4542 			      struct trace_enum_map **start, int len) { }
4543 #endif /* !CONFIG_TRACE_ENUM_MAP_FILE */
4544 
4545 static void trace_insert_enum_map(struct module *mod,
4546 				  struct trace_enum_map **start, int len)
4547 {
4548 	struct trace_enum_map **map;
4549 
4550 	if (len <= 0)
4551 		return;
4552 
4553 	map = start;
4554 
4555 	trace_event_enum_update(map, len);
4556 
4557 	trace_insert_enum_map_file(mod, start, len);
4558 }
4559 
4560 static ssize_t
4561 tracing_set_trace_read(struct file *filp, char __user *ubuf,
4562 		       size_t cnt, loff_t *ppos)
4563 {
4564 	struct trace_array *tr = filp->private_data;
4565 	char buf[MAX_TRACER_SIZE+2];
4566 	int r;
4567 
4568 	mutex_lock(&trace_types_lock);
4569 	r = sprintf(buf, "%s\n", tr->current_trace->name);
4570 	mutex_unlock(&trace_types_lock);
4571 
4572 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
4573 }
4574 
4575 int tracer_init(struct tracer *t, struct trace_array *tr)
4576 {
4577 	tracing_reset_online_cpus(&tr->trace_buffer);
4578 	return t->init(tr);
4579 }
4580 
4581 static void set_buffer_entries(struct trace_buffer *buf, unsigned long val)
4582 {
4583 	int cpu;
4584 
4585 	for_each_tracing_cpu(cpu)
4586 		per_cpu_ptr(buf->data, cpu)->entries = val;
4587 }
4588 
4589 #ifdef CONFIG_TRACER_MAX_TRACE
4590 /* resize @tr's buffer to the size of @size_tr's entries */
4591 static int resize_buffer_duplicate_size(struct trace_buffer *trace_buf,
4592 					struct trace_buffer *size_buf, int cpu_id)
4593 {
4594 	int cpu, ret = 0;
4595 
4596 	if (cpu_id == RING_BUFFER_ALL_CPUS) {
4597 		for_each_tracing_cpu(cpu) {
4598 			ret = ring_buffer_resize(trace_buf->buffer,
4599 				 per_cpu_ptr(size_buf->data, cpu)->entries, cpu);
4600 			if (ret < 0)
4601 				break;
4602 			per_cpu_ptr(trace_buf->data, cpu)->entries =
4603 				per_cpu_ptr(size_buf->data, cpu)->entries;
4604 		}
4605 	} else {
4606 		ret = ring_buffer_resize(trace_buf->buffer,
4607 				 per_cpu_ptr(size_buf->data, cpu_id)->entries, cpu_id);
4608 		if (ret == 0)
4609 			per_cpu_ptr(trace_buf->data, cpu_id)->entries =
4610 				per_cpu_ptr(size_buf->data, cpu_id)->entries;
4611 	}
4612 
4613 	return ret;
4614 }
4615 #endif /* CONFIG_TRACER_MAX_TRACE */
4616 
4617 static int __tracing_resize_ring_buffer(struct trace_array *tr,
4618 					unsigned long size, int cpu)
4619 {
4620 	int ret;
4621 
4622 	/*
4623 	 * If kernel or user changes the size of the ring buffer
4624 	 * we use the size that was given, and we can forget about
4625 	 * expanding it later.
4626 	 */
4627 	ring_buffer_expanded = true;
4628 
4629 	/* May be called before buffers are initialized */
4630 	if (!tr->trace_buffer.buffer)
4631 		return 0;
4632 
4633 	ret = ring_buffer_resize(tr->trace_buffer.buffer, size, cpu);
4634 	if (ret < 0)
4635 		return ret;
4636 
4637 #ifdef CONFIG_TRACER_MAX_TRACE
4638 	if (!(tr->flags & TRACE_ARRAY_FL_GLOBAL) ||
4639 	    !tr->current_trace->use_max_tr)
4640 		goto out;
4641 
4642 	ret = ring_buffer_resize(tr->max_buffer.buffer, size, cpu);
4643 	if (ret < 0) {
4644 		int r = resize_buffer_duplicate_size(&tr->trace_buffer,
4645 						     &tr->trace_buffer, cpu);
4646 		if (r < 0) {
4647 			/*
4648 			 * AARGH! We are left with different
4649 			 * size max buffer!!!!
4650 			 * The max buffer is our "snapshot" buffer.
4651 			 * When a tracer needs a snapshot (one of the
4652 			 * latency tracers), it swaps the max buffer
4653 			 * with the saved snap shot. We succeeded to
4654 			 * update the size of the main buffer, but failed to
4655 			 * update the size of the max buffer. But when we tried
4656 			 * to reset the main buffer to the original size, we
4657 			 * failed there too. This is very unlikely to
4658 			 * happen, but if it does, warn and kill all
4659 			 * tracing.
4660 			 */
4661 			WARN_ON(1);
4662 			tracing_disabled = 1;
4663 		}
4664 		return ret;
4665 	}
4666 
4667 	if (cpu == RING_BUFFER_ALL_CPUS)
4668 		set_buffer_entries(&tr->max_buffer, size);
4669 	else
4670 		per_cpu_ptr(tr->max_buffer.data, cpu)->entries = size;
4671 
4672  out:
4673 #endif /* CONFIG_TRACER_MAX_TRACE */
4674 
4675 	if (cpu == RING_BUFFER_ALL_CPUS)
4676 		set_buffer_entries(&tr->trace_buffer, size);
4677 	else
4678 		per_cpu_ptr(tr->trace_buffer.data, cpu)->entries = size;
4679 
4680 	return ret;
4681 }
4682 
4683 static ssize_t tracing_resize_ring_buffer(struct trace_array *tr,
4684 					  unsigned long size, int cpu_id)
4685 {
4686 	int ret = size;
4687 
4688 	mutex_lock(&trace_types_lock);
4689 
4690 	if (cpu_id != RING_BUFFER_ALL_CPUS) {
4691 		/* make sure, this cpu is enabled in the mask */
4692 		if (!cpumask_test_cpu(cpu_id, tracing_buffer_mask)) {
4693 			ret = -EINVAL;
4694 			goto out;
4695 		}
4696 	}
4697 
4698 	ret = __tracing_resize_ring_buffer(tr, size, cpu_id);
4699 	if (ret < 0)
4700 		ret = -ENOMEM;
4701 
4702 out:
4703 	mutex_unlock(&trace_types_lock);
4704 
4705 	return ret;
4706 }
4707 
4708 
4709 /**
4710  * tracing_update_buffers - used by tracing facility to expand ring buffers
4711  *
4712  * To save on memory when the tracing is never used on a system with it
4713  * configured in. The ring buffers are set to a minimum size. But once
4714  * a user starts to use the tracing facility, then they need to grow
4715  * to their default size.
4716  *
4717  * This function is to be called when a tracer is about to be used.
4718  */
4719 int tracing_update_buffers(void)
4720 {
4721 	int ret = 0;
4722 
4723 	mutex_lock(&trace_types_lock);
4724 	if (!ring_buffer_expanded)
4725 		ret = __tracing_resize_ring_buffer(&global_trace, trace_buf_size,
4726 						RING_BUFFER_ALL_CPUS);
4727 	mutex_unlock(&trace_types_lock);
4728 
4729 	return ret;
4730 }
4731 
4732 struct trace_option_dentry;
4733 
4734 static void
4735 create_trace_option_files(struct trace_array *tr, struct tracer *tracer);
4736 
4737 /*
4738  * Used to clear out the tracer before deletion of an instance.
4739  * Must have trace_types_lock held.
4740  */
4741 static void tracing_set_nop(struct trace_array *tr)
4742 {
4743 	if (tr->current_trace == &nop_trace)
4744 		return;
4745 
4746 	tr->current_trace->enabled--;
4747 
4748 	if (tr->current_trace->reset)
4749 		tr->current_trace->reset(tr);
4750 
4751 	tr->current_trace = &nop_trace;
4752 }
4753 
4754 static void add_tracer_options(struct trace_array *tr, struct tracer *t)
4755 {
4756 	/* Only enable if the directory has been created already. */
4757 	if (!tr->dir)
4758 		return;
4759 
4760 	create_trace_option_files(tr, t);
4761 }
4762 
4763 static int tracing_set_tracer(struct trace_array *tr, const char *buf)
4764 {
4765 	struct tracer *t;
4766 #ifdef CONFIG_TRACER_MAX_TRACE
4767 	bool had_max_tr;
4768 #endif
4769 	int ret = 0;
4770 
4771 	mutex_lock(&trace_types_lock);
4772 
4773 	if (!ring_buffer_expanded) {
4774 		ret = __tracing_resize_ring_buffer(tr, trace_buf_size,
4775 						RING_BUFFER_ALL_CPUS);
4776 		if (ret < 0)
4777 			goto out;
4778 		ret = 0;
4779 	}
4780 
4781 	for (t = trace_types; t; t = t->next) {
4782 		if (strcmp(t->name, buf) == 0)
4783 			break;
4784 	}
4785 	if (!t) {
4786 		ret = -EINVAL;
4787 		goto out;
4788 	}
4789 	if (t == tr->current_trace)
4790 		goto out;
4791 
4792 	/* Some tracers are only allowed for the top level buffer */
4793 	if (!trace_ok_for_array(t, tr)) {
4794 		ret = -EINVAL;
4795 		goto out;
4796 	}
4797 
4798 	/* If trace pipe files are being read, we can't change the tracer */
4799 	if (tr->current_trace->ref) {
4800 		ret = -EBUSY;
4801 		goto out;
4802 	}
4803 
4804 	trace_branch_disable();
4805 
4806 	tr->current_trace->enabled--;
4807 
4808 	if (tr->current_trace->reset)
4809 		tr->current_trace->reset(tr);
4810 
4811 	/* Current trace needs to be nop_trace before synchronize_sched */
4812 	tr->current_trace = &nop_trace;
4813 
4814 #ifdef CONFIG_TRACER_MAX_TRACE
4815 	had_max_tr = tr->allocated_snapshot;
4816 
4817 	if (had_max_tr && !t->use_max_tr) {
4818 		/*
4819 		 * We need to make sure that the update_max_tr sees that
4820 		 * current_trace changed to nop_trace to keep it from
4821 		 * swapping the buffers after we resize it.
4822 		 * The update_max_tr is called from interrupts disabled
4823 		 * so a synchronized_sched() is sufficient.
4824 		 */
4825 		synchronize_sched();
4826 		free_snapshot(tr);
4827 	}
4828 #endif
4829 
4830 #ifdef CONFIG_TRACER_MAX_TRACE
4831 	if (t->use_max_tr && !had_max_tr) {
4832 		ret = alloc_snapshot(tr);
4833 		if (ret < 0)
4834 			goto out;
4835 	}
4836 #endif
4837 
4838 	if (t->init) {
4839 		ret = tracer_init(t, tr);
4840 		if (ret)
4841 			goto out;
4842 	}
4843 
4844 	tr->current_trace = t;
4845 	tr->current_trace->enabled++;
4846 	trace_branch_enable(tr);
4847  out:
4848 	mutex_unlock(&trace_types_lock);
4849 
4850 	return ret;
4851 }
4852 
4853 static ssize_t
4854 tracing_set_trace_write(struct file *filp, const char __user *ubuf,
4855 			size_t cnt, loff_t *ppos)
4856 {
4857 	struct trace_array *tr = filp->private_data;
4858 	char buf[MAX_TRACER_SIZE+1];
4859 	int i;
4860 	size_t ret;
4861 	int err;
4862 
4863 	ret = cnt;
4864 
4865 	if (cnt > MAX_TRACER_SIZE)
4866 		cnt = MAX_TRACER_SIZE;
4867 
4868 	if (copy_from_user(buf, ubuf, cnt))
4869 		return -EFAULT;
4870 
4871 	buf[cnt] = 0;
4872 
4873 	/* strip ending whitespace. */
4874 	for (i = cnt - 1; i > 0 && isspace(buf[i]); i--)
4875 		buf[i] = 0;
4876 
4877 	err = tracing_set_tracer(tr, buf);
4878 	if (err)
4879 		return err;
4880 
4881 	*ppos += ret;
4882 
4883 	return ret;
4884 }
4885 
4886 static ssize_t
4887 tracing_nsecs_read(unsigned long *ptr, char __user *ubuf,
4888 		   size_t cnt, loff_t *ppos)
4889 {
4890 	char buf[64];
4891 	int r;
4892 
4893 	r = snprintf(buf, sizeof(buf), "%ld\n",
4894 		     *ptr == (unsigned long)-1 ? -1 : nsecs_to_usecs(*ptr));
4895 	if (r > sizeof(buf))
4896 		r = sizeof(buf);
4897 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
4898 }
4899 
4900 static ssize_t
4901 tracing_nsecs_write(unsigned long *ptr, const char __user *ubuf,
4902 		    size_t cnt, loff_t *ppos)
4903 {
4904 	unsigned long val;
4905 	int ret;
4906 
4907 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
4908 	if (ret)
4909 		return ret;
4910 
4911 	*ptr = val * 1000;
4912 
4913 	return cnt;
4914 }
4915 
4916 static ssize_t
4917 tracing_thresh_read(struct file *filp, char __user *ubuf,
4918 		    size_t cnt, loff_t *ppos)
4919 {
4920 	return tracing_nsecs_read(&tracing_thresh, ubuf, cnt, ppos);
4921 }
4922 
4923 static ssize_t
4924 tracing_thresh_write(struct file *filp, const char __user *ubuf,
4925 		     size_t cnt, loff_t *ppos)
4926 {
4927 	struct trace_array *tr = filp->private_data;
4928 	int ret;
4929 
4930 	mutex_lock(&trace_types_lock);
4931 	ret = tracing_nsecs_write(&tracing_thresh, ubuf, cnt, ppos);
4932 	if (ret < 0)
4933 		goto out;
4934 
4935 	if (tr->current_trace->update_thresh) {
4936 		ret = tr->current_trace->update_thresh(tr);
4937 		if (ret < 0)
4938 			goto out;
4939 	}
4940 
4941 	ret = cnt;
4942 out:
4943 	mutex_unlock(&trace_types_lock);
4944 
4945 	return ret;
4946 }
4947 
4948 #ifdef CONFIG_TRACER_MAX_TRACE
4949 
4950 static ssize_t
4951 tracing_max_lat_read(struct file *filp, char __user *ubuf,
4952 		     size_t cnt, loff_t *ppos)
4953 {
4954 	return tracing_nsecs_read(filp->private_data, ubuf, cnt, ppos);
4955 }
4956 
4957 static ssize_t
4958 tracing_max_lat_write(struct file *filp, const char __user *ubuf,
4959 		      size_t cnt, loff_t *ppos)
4960 {
4961 	return tracing_nsecs_write(filp->private_data, ubuf, cnt, ppos);
4962 }
4963 
4964 #endif
4965 
4966 static int tracing_open_pipe(struct inode *inode, struct file *filp)
4967 {
4968 	struct trace_array *tr = inode->i_private;
4969 	struct trace_iterator *iter;
4970 	int ret = 0;
4971 
4972 	if (tracing_disabled)
4973 		return -ENODEV;
4974 
4975 	if (trace_array_get(tr) < 0)
4976 		return -ENODEV;
4977 
4978 	mutex_lock(&trace_types_lock);
4979 
4980 	/* create a buffer to store the information to pass to userspace */
4981 	iter = kzalloc(sizeof(*iter), GFP_KERNEL);
4982 	if (!iter) {
4983 		ret = -ENOMEM;
4984 		__trace_array_put(tr);
4985 		goto out;
4986 	}
4987 
4988 	trace_seq_init(&iter->seq);
4989 	iter->trace = tr->current_trace;
4990 
4991 	if (!alloc_cpumask_var(&iter->started, GFP_KERNEL)) {
4992 		ret = -ENOMEM;
4993 		goto fail;
4994 	}
4995 
4996 	/* trace pipe does not show start of buffer */
4997 	cpumask_setall(iter->started);
4998 
4999 	if (tr->trace_flags & TRACE_ITER_LATENCY_FMT)
5000 		iter->iter_flags |= TRACE_FILE_LAT_FMT;
5001 
5002 	/* Output in nanoseconds only if we are using a clock in nanoseconds. */
5003 	if (trace_clocks[tr->clock_id].in_ns)
5004 		iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
5005 
5006 	iter->tr = tr;
5007 	iter->trace_buffer = &tr->trace_buffer;
5008 	iter->cpu_file = tracing_get_cpu(inode);
5009 	mutex_init(&iter->mutex);
5010 	filp->private_data = iter;
5011 
5012 	if (iter->trace->pipe_open)
5013 		iter->trace->pipe_open(iter);
5014 
5015 	nonseekable_open(inode, filp);
5016 
5017 	tr->current_trace->ref++;
5018 out:
5019 	mutex_unlock(&trace_types_lock);
5020 	return ret;
5021 
5022 fail:
5023 	kfree(iter->trace);
5024 	kfree(iter);
5025 	__trace_array_put(tr);
5026 	mutex_unlock(&trace_types_lock);
5027 	return ret;
5028 }
5029 
5030 static int tracing_release_pipe(struct inode *inode, struct file *file)
5031 {
5032 	struct trace_iterator *iter = file->private_data;
5033 	struct trace_array *tr = inode->i_private;
5034 
5035 	mutex_lock(&trace_types_lock);
5036 
5037 	tr->current_trace->ref--;
5038 
5039 	if (iter->trace->pipe_close)
5040 		iter->trace->pipe_close(iter);
5041 
5042 	mutex_unlock(&trace_types_lock);
5043 
5044 	free_cpumask_var(iter->started);
5045 	mutex_destroy(&iter->mutex);
5046 	kfree(iter);
5047 
5048 	trace_array_put(tr);
5049 
5050 	return 0;
5051 }
5052 
5053 static unsigned int
5054 trace_poll(struct trace_iterator *iter, struct file *filp, poll_table *poll_table)
5055 {
5056 	struct trace_array *tr = iter->tr;
5057 
5058 	/* Iterators are static, they should be filled or empty */
5059 	if (trace_buffer_iter(iter, iter->cpu_file))
5060 		return POLLIN | POLLRDNORM;
5061 
5062 	if (tr->trace_flags & TRACE_ITER_BLOCK)
5063 		/*
5064 		 * Always select as readable when in blocking mode
5065 		 */
5066 		return POLLIN | POLLRDNORM;
5067 	else
5068 		return ring_buffer_poll_wait(iter->trace_buffer->buffer, iter->cpu_file,
5069 					     filp, poll_table);
5070 }
5071 
5072 static unsigned int
5073 tracing_poll_pipe(struct file *filp, poll_table *poll_table)
5074 {
5075 	struct trace_iterator *iter = filp->private_data;
5076 
5077 	return trace_poll(iter, filp, poll_table);
5078 }
5079 
5080 /* Must be called with iter->mutex held. */
5081 static int tracing_wait_pipe(struct file *filp)
5082 {
5083 	struct trace_iterator *iter = filp->private_data;
5084 	int ret;
5085 
5086 	while (trace_empty(iter)) {
5087 
5088 		if ((filp->f_flags & O_NONBLOCK)) {
5089 			return -EAGAIN;
5090 		}
5091 
5092 		/*
5093 		 * We block until we read something and tracing is disabled.
5094 		 * We still block if tracing is disabled, but we have never
5095 		 * read anything. This allows a user to cat this file, and
5096 		 * then enable tracing. But after we have read something,
5097 		 * we give an EOF when tracing is again disabled.
5098 		 *
5099 		 * iter->pos will be 0 if we haven't read anything.
5100 		 */
5101 		if (!tracing_is_on() && iter->pos)
5102 			break;
5103 
5104 		mutex_unlock(&iter->mutex);
5105 
5106 		ret = wait_on_pipe(iter, false);
5107 
5108 		mutex_lock(&iter->mutex);
5109 
5110 		if (ret)
5111 			return ret;
5112 	}
5113 
5114 	return 1;
5115 }
5116 
5117 /*
5118  * Consumer reader.
5119  */
5120 static ssize_t
5121 tracing_read_pipe(struct file *filp, char __user *ubuf,
5122 		  size_t cnt, loff_t *ppos)
5123 {
5124 	struct trace_iterator *iter = filp->private_data;
5125 	ssize_t sret;
5126 
5127 	/*
5128 	 * Avoid more than one consumer on a single file descriptor
5129 	 * This is just a matter of traces coherency, the ring buffer itself
5130 	 * is protected.
5131 	 */
5132 	mutex_lock(&iter->mutex);
5133 
5134 	/* return any leftover data */
5135 	sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
5136 	if (sret != -EBUSY)
5137 		goto out;
5138 
5139 	trace_seq_init(&iter->seq);
5140 
5141 	if (iter->trace->read) {
5142 		sret = iter->trace->read(iter, filp, ubuf, cnt, ppos);
5143 		if (sret)
5144 			goto out;
5145 	}
5146 
5147 waitagain:
5148 	sret = tracing_wait_pipe(filp);
5149 	if (sret <= 0)
5150 		goto out;
5151 
5152 	/* stop when tracing is finished */
5153 	if (trace_empty(iter)) {
5154 		sret = 0;
5155 		goto out;
5156 	}
5157 
5158 	if (cnt >= PAGE_SIZE)
5159 		cnt = PAGE_SIZE - 1;
5160 
5161 	/* reset all but tr, trace, and overruns */
5162 	memset(&iter->seq, 0,
5163 	       sizeof(struct trace_iterator) -
5164 	       offsetof(struct trace_iterator, seq));
5165 	cpumask_clear(iter->started);
5166 	iter->pos = -1;
5167 
5168 	trace_event_read_lock();
5169 	trace_access_lock(iter->cpu_file);
5170 	while (trace_find_next_entry_inc(iter) != NULL) {
5171 		enum print_line_t ret;
5172 		int save_len = iter->seq.seq.len;
5173 
5174 		ret = print_trace_line(iter);
5175 		if (ret == TRACE_TYPE_PARTIAL_LINE) {
5176 			/* don't print partial lines */
5177 			iter->seq.seq.len = save_len;
5178 			break;
5179 		}
5180 		if (ret != TRACE_TYPE_NO_CONSUME)
5181 			trace_consume(iter);
5182 
5183 		if (trace_seq_used(&iter->seq) >= cnt)
5184 			break;
5185 
5186 		/*
5187 		 * Setting the full flag means we reached the trace_seq buffer
5188 		 * size and we should leave by partial output condition above.
5189 		 * One of the trace_seq_* functions is not used properly.
5190 		 */
5191 		WARN_ONCE(iter->seq.full, "full flag set for trace type %d",
5192 			  iter->ent->type);
5193 	}
5194 	trace_access_unlock(iter->cpu_file);
5195 	trace_event_read_unlock();
5196 
5197 	/* Now copy what we have to the user */
5198 	sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
5199 	if (iter->seq.seq.readpos >= trace_seq_used(&iter->seq))
5200 		trace_seq_init(&iter->seq);
5201 
5202 	/*
5203 	 * If there was nothing to send to user, in spite of consuming trace
5204 	 * entries, go back to wait for more entries.
5205 	 */
5206 	if (sret == -EBUSY)
5207 		goto waitagain;
5208 
5209 out:
5210 	mutex_unlock(&iter->mutex);
5211 
5212 	return sret;
5213 }
5214 
5215 static void tracing_spd_release_pipe(struct splice_pipe_desc *spd,
5216 				     unsigned int idx)
5217 {
5218 	__free_page(spd->pages[idx]);
5219 }
5220 
5221 static const struct pipe_buf_operations tracing_pipe_buf_ops = {
5222 	.can_merge		= 0,
5223 	.confirm		= generic_pipe_buf_confirm,
5224 	.release		= generic_pipe_buf_release,
5225 	.steal			= generic_pipe_buf_steal,
5226 	.get			= generic_pipe_buf_get,
5227 };
5228 
5229 static size_t
5230 tracing_fill_pipe_page(size_t rem, struct trace_iterator *iter)
5231 {
5232 	size_t count;
5233 	int save_len;
5234 	int ret;
5235 
5236 	/* Seq buffer is page-sized, exactly what we need. */
5237 	for (;;) {
5238 		save_len = iter->seq.seq.len;
5239 		ret = print_trace_line(iter);
5240 
5241 		if (trace_seq_has_overflowed(&iter->seq)) {
5242 			iter->seq.seq.len = save_len;
5243 			break;
5244 		}
5245 
5246 		/*
5247 		 * This should not be hit, because it should only
5248 		 * be set if the iter->seq overflowed. But check it
5249 		 * anyway to be safe.
5250 		 */
5251 		if (ret == TRACE_TYPE_PARTIAL_LINE) {
5252 			iter->seq.seq.len = save_len;
5253 			break;
5254 		}
5255 
5256 		count = trace_seq_used(&iter->seq) - save_len;
5257 		if (rem < count) {
5258 			rem = 0;
5259 			iter->seq.seq.len = save_len;
5260 			break;
5261 		}
5262 
5263 		if (ret != TRACE_TYPE_NO_CONSUME)
5264 			trace_consume(iter);
5265 		rem -= count;
5266 		if (!trace_find_next_entry_inc(iter))	{
5267 			rem = 0;
5268 			iter->ent = NULL;
5269 			break;
5270 		}
5271 	}
5272 
5273 	return rem;
5274 }
5275 
5276 static ssize_t tracing_splice_read_pipe(struct file *filp,
5277 					loff_t *ppos,
5278 					struct pipe_inode_info *pipe,
5279 					size_t len,
5280 					unsigned int flags)
5281 {
5282 	struct page *pages_def[PIPE_DEF_BUFFERS];
5283 	struct partial_page partial_def[PIPE_DEF_BUFFERS];
5284 	struct trace_iterator *iter = filp->private_data;
5285 	struct splice_pipe_desc spd = {
5286 		.pages		= pages_def,
5287 		.partial	= partial_def,
5288 		.nr_pages	= 0, /* This gets updated below. */
5289 		.nr_pages_max	= PIPE_DEF_BUFFERS,
5290 		.flags		= flags,
5291 		.ops		= &tracing_pipe_buf_ops,
5292 		.spd_release	= tracing_spd_release_pipe,
5293 	};
5294 	ssize_t ret;
5295 	size_t rem;
5296 	unsigned int i;
5297 
5298 	if (splice_grow_spd(pipe, &spd))
5299 		return -ENOMEM;
5300 
5301 	mutex_lock(&iter->mutex);
5302 
5303 	if (iter->trace->splice_read) {
5304 		ret = iter->trace->splice_read(iter, filp,
5305 					       ppos, pipe, len, flags);
5306 		if (ret)
5307 			goto out_err;
5308 	}
5309 
5310 	ret = tracing_wait_pipe(filp);
5311 	if (ret <= 0)
5312 		goto out_err;
5313 
5314 	if (!iter->ent && !trace_find_next_entry_inc(iter)) {
5315 		ret = -EFAULT;
5316 		goto out_err;
5317 	}
5318 
5319 	trace_event_read_lock();
5320 	trace_access_lock(iter->cpu_file);
5321 
5322 	/* Fill as many pages as possible. */
5323 	for (i = 0, rem = len; i < spd.nr_pages_max && rem; i++) {
5324 		spd.pages[i] = alloc_page(GFP_KERNEL);
5325 		if (!spd.pages[i])
5326 			break;
5327 
5328 		rem = tracing_fill_pipe_page(rem, iter);
5329 
5330 		/* Copy the data into the page, so we can start over. */
5331 		ret = trace_seq_to_buffer(&iter->seq,
5332 					  page_address(spd.pages[i]),
5333 					  trace_seq_used(&iter->seq));
5334 		if (ret < 0) {
5335 			__free_page(spd.pages[i]);
5336 			break;
5337 		}
5338 		spd.partial[i].offset = 0;
5339 		spd.partial[i].len = trace_seq_used(&iter->seq);
5340 
5341 		trace_seq_init(&iter->seq);
5342 	}
5343 
5344 	trace_access_unlock(iter->cpu_file);
5345 	trace_event_read_unlock();
5346 	mutex_unlock(&iter->mutex);
5347 
5348 	spd.nr_pages = i;
5349 
5350 	if (i)
5351 		ret = splice_to_pipe(pipe, &spd);
5352 	else
5353 		ret = 0;
5354 out:
5355 	splice_shrink_spd(&spd);
5356 	return ret;
5357 
5358 out_err:
5359 	mutex_unlock(&iter->mutex);
5360 	goto out;
5361 }
5362 
5363 static ssize_t
5364 tracing_entries_read(struct file *filp, char __user *ubuf,
5365 		     size_t cnt, loff_t *ppos)
5366 {
5367 	struct inode *inode = file_inode(filp);
5368 	struct trace_array *tr = inode->i_private;
5369 	int cpu = tracing_get_cpu(inode);
5370 	char buf[64];
5371 	int r = 0;
5372 	ssize_t ret;
5373 
5374 	mutex_lock(&trace_types_lock);
5375 
5376 	if (cpu == RING_BUFFER_ALL_CPUS) {
5377 		int cpu, buf_size_same;
5378 		unsigned long size;
5379 
5380 		size = 0;
5381 		buf_size_same = 1;
5382 		/* check if all cpu sizes are same */
5383 		for_each_tracing_cpu(cpu) {
5384 			/* fill in the size from first enabled cpu */
5385 			if (size == 0)
5386 				size = per_cpu_ptr(tr->trace_buffer.data, cpu)->entries;
5387 			if (size != per_cpu_ptr(tr->trace_buffer.data, cpu)->entries) {
5388 				buf_size_same = 0;
5389 				break;
5390 			}
5391 		}
5392 
5393 		if (buf_size_same) {
5394 			if (!ring_buffer_expanded)
5395 				r = sprintf(buf, "%lu (expanded: %lu)\n",
5396 					    size >> 10,
5397 					    trace_buf_size >> 10);
5398 			else
5399 				r = sprintf(buf, "%lu\n", size >> 10);
5400 		} else
5401 			r = sprintf(buf, "X\n");
5402 	} else
5403 		r = sprintf(buf, "%lu\n", per_cpu_ptr(tr->trace_buffer.data, cpu)->entries >> 10);
5404 
5405 	mutex_unlock(&trace_types_lock);
5406 
5407 	ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
5408 	return ret;
5409 }
5410 
5411 static ssize_t
5412 tracing_entries_write(struct file *filp, const char __user *ubuf,
5413 		      size_t cnt, loff_t *ppos)
5414 {
5415 	struct inode *inode = file_inode(filp);
5416 	struct trace_array *tr = inode->i_private;
5417 	unsigned long val;
5418 	int ret;
5419 
5420 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
5421 	if (ret)
5422 		return ret;
5423 
5424 	/* must have at least 1 entry */
5425 	if (!val)
5426 		return -EINVAL;
5427 
5428 	/* value is in KB */
5429 	val <<= 10;
5430 	ret = tracing_resize_ring_buffer(tr, val, tracing_get_cpu(inode));
5431 	if (ret < 0)
5432 		return ret;
5433 
5434 	*ppos += cnt;
5435 
5436 	return cnt;
5437 }
5438 
5439 static ssize_t
5440 tracing_total_entries_read(struct file *filp, char __user *ubuf,
5441 				size_t cnt, loff_t *ppos)
5442 {
5443 	struct trace_array *tr = filp->private_data;
5444 	char buf[64];
5445 	int r, cpu;
5446 	unsigned long size = 0, expanded_size = 0;
5447 
5448 	mutex_lock(&trace_types_lock);
5449 	for_each_tracing_cpu(cpu) {
5450 		size += per_cpu_ptr(tr->trace_buffer.data, cpu)->entries >> 10;
5451 		if (!ring_buffer_expanded)
5452 			expanded_size += trace_buf_size >> 10;
5453 	}
5454 	if (ring_buffer_expanded)
5455 		r = sprintf(buf, "%lu\n", size);
5456 	else
5457 		r = sprintf(buf, "%lu (expanded: %lu)\n", size, expanded_size);
5458 	mutex_unlock(&trace_types_lock);
5459 
5460 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
5461 }
5462 
5463 static ssize_t
5464 tracing_free_buffer_write(struct file *filp, const char __user *ubuf,
5465 			  size_t cnt, loff_t *ppos)
5466 {
5467 	/*
5468 	 * There is no need to read what the user has written, this function
5469 	 * is just to make sure that there is no error when "echo" is used
5470 	 */
5471 
5472 	*ppos += cnt;
5473 
5474 	return cnt;
5475 }
5476 
5477 static int
5478 tracing_free_buffer_release(struct inode *inode, struct file *filp)
5479 {
5480 	struct trace_array *tr = inode->i_private;
5481 
5482 	/* disable tracing ? */
5483 	if (tr->trace_flags & TRACE_ITER_STOP_ON_FREE)
5484 		tracer_tracing_off(tr);
5485 	/* resize the ring buffer to 0 */
5486 	tracing_resize_ring_buffer(tr, 0, RING_BUFFER_ALL_CPUS);
5487 
5488 	trace_array_put(tr);
5489 
5490 	return 0;
5491 }
5492 
5493 static ssize_t
5494 tracing_mark_write(struct file *filp, const char __user *ubuf,
5495 					size_t cnt, loff_t *fpos)
5496 {
5497 	unsigned long addr = (unsigned long)ubuf;
5498 	struct trace_array *tr = filp->private_data;
5499 	struct ring_buffer_event *event;
5500 	struct ring_buffer *buffer;
5501 	struct print_entry *entry;
5502 	unsigned long irq_flags;
5503 	struct page *pages[2];
5504 	void *map_page[2];
5505 	int nr_pages = 1;
5506 	ssize_t written;
5507 	int offset;
5508 	int size;
5509 	int len;
5510 	int ret;
5511 	int i;
5512 
5513 	if (tracing_disabled)
5514 		return -EINVAL;
5515 
5516 	if (!(tr->trace_flags & TRACE_ITER_MARKERS))
5517 		return -EINVAL;
5518 
5519 	if (cnt > TRACE_BUF_SIZE)
5520 		cnt = TRACE_BUF_SIZE;
5521 
5522 	/*
5523 	 * Userspace is injecting traces into the kernel trace buffer.
5524 	 * We want to be as non intrusive as possible.
5525 	 * To do so, we do not want to allocate any special buffers
5526 	 * or take any locks, but instead write the userspace data
5527 	 * straight into the ring buffer.
5528 	 *
5529 	 * First we need to pin the userspace buffer into memory,
5530 	 * which, most likely it is, because it just referenced it.
5531 	 * But there's no guarantee that it is. By using get_user_pages_fast()
5532 	 * and kmap_atomic/kunmap_atomic() we can get access to the
5533 	 * pages directly. We then write the data directly into the
5534 	 * ring buffer.
5535 	 */
5536 	BUILD_BUG_ON(TRACE_BUF_SIZE >= PAGE_SIZE);
5537 
5538 	/* check if we cross pages */
5539 	if ((addr & PAGE_MASK) != ((addr + cnt) & PAGE_MASK))
5540 		nr_pages = 2;
5541 
5542 	offset = addr & (PAGE_SIZE - 1);
5543 	addr &= PAGE_MASK;
5544 
5545 	ret = get_user_pages_fast(addr, nr_pages, 0, pages);
5546 	if (ret < nr_pages) {
5547 		while (--ret >= 0)
5548 			put_page(pages[ret]);
5549 		written = -EFAULT;
5550 		goto out;
5551 	}
5552 
5553 	for (i = 0; i < nr_pages; i++)
5554 		map_page[i] = kmap_atomic(pages[i]);
5555 
5556 	local_save_flags(irq_flags);
5557 	size = sizeof(*entry) + cnt + 2; /* possible \n added */
5558 	buffer = tr->trace_buffer.buffer;
5559 	event = trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
5560 					  irq_flags, preempt_count());
5561 	if (!event) {
5562 		/* Ring buffer disabled, return as if not open for write */
5563 		written = -EBADF;
5564 		goto out_unlock;
5565 	}
5566 
5567 	entry = ring_buffer_event_data(event);
5568 	entry->ip = _THIS_IP_;
5569 
5570 	if (nr_pages == 2) {
5571 		len = PAGE_SIZE - offset;
5572 		memcpy(&entry->buf, map_page[0] + offset, len);
5573 		memcpy(&entry->buf[len], map_page[1], cnt - len);
5574 	} else
5575 		memcpy(&entry->buf, map_page[0] + offset, cnt);
5576 
5577 	if (entry->buf[cnt - 1] != '\n') {
5578 		entry->buf[cnt] = '\n';
5579 		entry->buf[cnt + 1] = '\0';
5580 	} else
5581 		entry->buf[cnt] = '\0';
5582 
5583 	__buffer_unlock_commit(buffer, event);
5584 
5585 	written = cnt;
5586 
5587 	*fpos += written;
5588 
5589  out_unlock:
5590 	for (i = nr_pages - 1; i >= 0; i--) {
5591 		kunmap_atomic(map_page[i]);
5592 		put_page(pages[i]);
5593 	}
5594  out:
5595 	return written;
5596 }
5597 
5598 static int tracing_clock_show(struct seq_file *m, void *v)
5599 {
5600 	struct trace_array *tr = m->private;
5601 	int i;
5602 
5603 	for (i = 0; i < ARRAY_SIZE(trace_clocks); i++)
5604 		seq_printf(m,
5605 			"%s%s%s%s", i ? " " : "",
5606 			i == tr->clock_id ? "[" : "", trace_clocks[i].name,
5607 			i == tr->clock_id ? "]" : "");
5608 	seq_putc(m, '\n');
5609 
5610 	return 0;
5611 }
5612 
5613 static int tracing_set_clock(struct trace_array *tr, const char *clockstr)
5614 {
5615 	int i;
5616 
5617 	for (i = 0; i < ARRAY_SIZE(trace_clocks); i++) {
5618 		if (strcmp(trace_clocks[i].name, clockstr) == 0)
5619 			break;
5620 	}
5621 	if (i == ARRAY_SIZE(trace_clocks))
5622 		return -EINVAL;
5623 
5624 	mutex_lock(&trace_types_lock);
5625 
5626 	tr->clock_id = i;
5627 
5628 	ring_buffer_set_clock(tr->trace_buffer.buffer, trace_clocks[i].func);
5629 
5630 	/*
5631 	 * New clock may not be consistent with the previous clock.
5632 	 * Reset the buffer so that it doesn't have incomparable timestamps.
5633 	 */
5634 	tracing_reset_online_cpus(&tr->trace_buffer);
5635 
5636 #ifdef CONFIG_TRACER_MAX_TRACE
5637 	if (tr->flags & TRACE_ARRAY_FL_GLOBAL && tr->max_buffer.buffer)
5638 		ring_buffer_set_clock(tr->max_buffer.buffer, trace_clocks[i].func);
5639 	tracing_reset_online_cpus(&tr->max_buffer);
5640 #endif
5641 
5642 	mutex_unlock(&trace_types_lock);
5643 
5644 	return 0;
5645 }
5646 
5647 static ssize_t tracing_clock_write(struct file *filp, const char __user *ubuf,
5648 				   size_t cnt, loff_t *fpos)
5649 {
5650 	struct seq_file *m = filp->private_data;
5651 	struct trace_array *tr = m->private;
5652 	char buf[64];
5653 	const char *clockstr;
5654 	int ret;
5655 
5656 	if (cnt >= sizeof(buf))
5657 		return -EINVAL;
5658 
5659 	if (copy_from_user(buf, ubuf, cnt))
5660 		return -EFAULT;
5661 
5662 	buf[cnt] = 0;
5663 
5664 	clockstr = strstrip(buf);
5665 
5666 	ret = tracing_set_clock(tr, clockstr);
5667 	if (ret)
5668 		return ret;
5669 
5670 	*fpos += cnt;
5671 
5672 	return cnt;
5673 }
5674 
5675 static int tracing_clock_open(struct inode *inode, struct file *file)
5676 {
5677 	struct trace_array *tr = inode->i_private;
5678 	int ret;
5679 
5680 	if (tracing_disabled)
5681 		return -ENODEV;
5682 
5683 	if (trace_array_get(tr))
5684 		return -ENODEV;
5685 
5686 	ret = single_open(file, tracing_clock_show, inode->i_private);
5687 	if (ret < 0)
5688 		trace_array_put(tr);
5689 
5690 	return ret;
5691 }
5692 
5693 struct ftrace_buffer_info {
5694 	struct trace_iterator	iter;
5695 	void			*spare;
5696 	unsigned int		read;
5697 };
5698 
5699 #ifdef CONFIG_TRACER_SNAPSHOT
5700 static int tracing_snapshot_open(struct inode *inode, struct file *file)
5701 {
5702 	struct trace_array *tr = inode->i_private;
5703 	struct trace_iterator *iter;
5704 	struct seq_file *m;
5705 	int ret = 0;
5706 
5707 	if (trace_array_get(tr) < 0)
5708 		return -ENODEV;
5709 
5710 	if (file->f_mode & FMODE_READ) {
5711 		iter = __tracing_open(inode, file, true);
5712 		if (IS_ERR(iter))
5713 			ret = PTR_ERR(iter);
5714 	} else {
5715 		/* Writes still need the seq_file to hold the private data */
5716 		ret = -ENOMEM;
5717 		m = kzalloc(sizeof(*m), GFP_KERNEL);
5718 		if (!m)
5719 			goto out;
5720 		iter = kzalloc(sizeof(*iter), GFP_KERNEL);
5721 		if (!iter) {
5722 			kfree(m);
5723 			goto out;
5724 		}
5725 		ret = 0;
5726 
5727 		iter->tr = tr;
5728 		iter->trace_buffer = &tr->max_buffer;
5729 		iter->cpu_file = tracing_get_cpu(inode);
5730 		m->private = iter;
5731 		file->private_data = m;
5732 	}
5733 out:
5734 	if (ret < 0)
5735 		trace_array_put(tr);
5736 
5737 	return ret;
5738 }
5739 
5740 static ssize_t
5741 tracing_snapshot_write(struct file *filp, const char __user *ubuf, size_t cnt,
5742 		       loff_t *ppos)
5743 {
5744 	struct seq_file *m = filp->private_data;
5745 	struct trace_iterator *iter = m->private;
5746 	struct trace_array *tr = iter->tr;
5747 	unsigned long val;
5748 	int ret;
5749 
5750 	ret = tracing_update_buffers();
5751 	if (ret < 0)
5752 		return ret;
5753 
5754 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
5755 	if (ret)
5756 		return ret;
5757 
5758 	mutex_lock(&trace_types_lock);
5759 
5760 	if (tr->current_trace->use_max_tr) {
5761 		ret = -EBUSY;
5762 		goto out;
5763 	}
5764 
5765 	switch (val) {
5766 	case 0:
5767 		if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
5768 			ret = -EINVAL;
5769 			break;
5770 		}
5771 		if (tr->allocated_snapshot)
5772 			free_snapshot(tr);
5773 		break;
5774 	case 1:
5775 /* Only allow per-cpu swap if the ring buffer supports it */
5776 #ifndef CONFIG_RING_BUFFER_ALLOW_SWAP
5777 		if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
5778 			ret = -EINVAL;
5779 			break;
5780 		}
5781 #endif
5782 		if (!tr->allocated_snapshot) {
5783 			ret = alloc_snapshot(tr);
5784 			if (ret < 0)
5785 				break;
5786 		}
5787 		local_irq_disable();
5788 		/* Now, we're going to swap */
5789 		if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
5790 			update_max_tr(tr, current, smp_processor_id());
5791 		else
5792 			update_max_tr_single(tr, current, iter->cpu_file);
5793 		local_irq_enable();
5794 		break;
5795 	default:
5796 		if (tr->allocated_snapshot) {
5797 			if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
5798 				tracing_reset_online_cpus(&tr->max_buffer);
5799 			else
5800 				tracing_reset(&tr->max_buffer, iter->cpu_file);
5801 		}
5802 		break;
5803 	}
5804 
5805 	if (ret >= 0) {
5806 		*ppos += cnt;
5807 		ret = cnt;
5808 	}
5809 out:
5810 	mutex_unlock(&trace_types_lock);
5811 	return ret;
5812 }
5813 
5814 static int tracing_snapshot_release(struct inode *inode, struct file *file)
5815 {
5816 	struct seq_file *m = file->private_data;
5817 	int ret;
5818 
5819 	ret = tracing_release(inode, file);
5820 
5821 	if (file->f_mode & FMODE_READ)
5822 		return ret;
5823 
5824 	/* If write only, the seq_file is just a stub */
5825 	if (m)
5826 		kfree(m->private);
5827 	kfree(m);
5828 
5829 	return 0;
5830 }
5831 
5832 static int tracing_buffers_open(struct inode *inode, struct file *filp);
5833 static ssize_t tracing_buffers_read(struct file *filp, char __user *ubuf,
5834 				    size_t count, loff_t *ppos);
5835 static int tracing_buffers_release(struct inode *inode, struct file *file);
5836 static ssize_t tracing_buffers_splice_read(struct file *file, loff_t *ppos,
5837 		   struct pipe_inode_info *pipe, size_t len, unsigned int flags);
5838 
5839 static int snapshot_raw_open(struct inode *inode, struct file *filp)
5840 {
5841 	struct ftrace_buffer_info *info;
5842 	int ret;
5843 
5844 	ret = tracing_buffers_open(inode, filp);
5845 	if (ret < 0)
5846 		return ret;
5847 
5848 	info = filp->private_data;
5849 
5850 	if (info->iter.trace->use_max_tr) {
5851 		tracing_buffers_release(inode, filp);
5852 		return -EBUSY;
5853 	}
5854 
5855 	info->iter.snapshot = true;
5856 	info->iter.trace_buffer = &info->iter.tr->max_buffer;
5857 
5858 	return ret;
5859 }
5860 
5861 #endif /* CONFIG_TRACER_SNAPSHOT */
5862 
5863 
5864 static const struct file_operations tracing_thresh_fops = {
5865 	.open		= tracing_open_generic,
5866 	.read		= tracing_thresh_read,
5867 	.write		= tracing_thresh_write,
5868 	.llseek		= generic_file_llseek,
5869 };
5870 
5871 #ifdef CONFIG_TRACER_MAX_TRACE
5872 static const struct file_operations tracing_max_lat_fops = {
5873 	.open		= tracing_open_generic,
5874 	.read		= tracing_max_lat_read,
5875 	.write		= tracing_max_lat_write,
5876 	.llseek		= generic_file_llseek,
5877 };
5878 #endif
5879 
5880 static const struct file_operations set_tracer_fops = {
5881 	.open		= tracing_open_generic,
5882 	.read		= tracing_set_trace_read,
5883 	.write		= tracing_set_trace_write,
5884 	.llseek		= generic_file_llseek,
5885 };
5886 
5887 static const struct file_operations tracing_pipe_fops = {
5888 	.open		= tracing_open_pipe,
5889 	.poll		= tracing_poll_pipe,
5890 	.read		= tracing_read_pipe,
5891 	.splice_read	= tracing_splice_read_pipe,
5892 	.release	= tracing_release_pipe,
5893 	.llseek		= no_llseek,
5894 };
5895 
5896 static const struct file_operations tracing_entries_fops = {
5897 	.open		= tracing_open_generic_tr,
5898 	.read		= tracing_entries_read,
5899 	.write		= tracing_entries_write,
5900 	.llseek		= generic_file_llseek,
5901 	.release	= tracing_release_generic_tr,
5902 };
5903 
5904 static const struct file_operations tracing_total_entries_fops = {
5905 	.open		= tracing_open_generic_tr,
5906 	.read		= tracing_total_entries_read,
5907 	.llseek		= generic_file_llseek,
5908 	.release	= tracing_release_generic_tr,
5909 };
5910 
5911 static const struct file_operations tracing_free_buffer_fops = {
5912 	.open		= tracing_open_generic_tr,
5913 	.write		= tracing_free_buffer_write,
5914 	.release	= tracing_free_buffer_release,
5915 };
5916 
5917 static const struct file_operations tracing_mark_fops = {
5918 	.open		= tracing_open_generic_tr,
5919 	.write		= tracing_mark_write,
5920 	.llseek		= generic_file_llseek,
5921 	.release	= tracing_release_generic_tr,
5922 };
5923 
5924 static const struct file_operations trace_clock_fops = {
5925 	.open		= tracing_clock_open,
5926 	.read		= seq_read,
5927 	.llseek		= seq_lseek,
5928 	.release	= tracing_single_release_tr,
5929 	.write		= tracing_clock_write,
5930 };
5931 
5932 #ifdef CONFIG_TRACER_SNAPSHOT
5933 static const struct file_operations snapshot_fops = {
5934 	.open		= tracing_snapshot_open,
5935 	.read		= seq_read,
5936 	.write		= tracing_snapshot_write,
5937 	.llseek		= tracing_lseek,
5938 	.release	= tracing_snapshot_release,
5939 };
5940 
5941 static const struct file_operations snapshot_raw_fops = {
5942 	.open		= snapshot_raw_open,
5943 	.read		= tracing_buffers_read,
5944 	.release	= tracing_buffers_release,
5945 	.splice_read	= tracing_buffers_splice_read,
5946 	.llseek		= no_llseek,
5947 };
5948 
5949 #endif /* CONFIG_TRACER_SNAPSHOT */
5950 
5951 static int tracing_buffers_open(struct inode *inode, struct file *filp)
5952 {
5953 	struct trace_array *tr = inode->i_private;
5954 	struct ftrace_buffer_info *info;
5955 	int ret;
5956 
5957 	if (tracing_disabled)
5958 		return -ENODEV;
5959 
5960 	if (trace_array_get(tr) < 0)
5961 		return -ENODEV;
5962 
5963 	info = kzalloc(sizeof(*info), GFP_KERNEL);
5964 	if (!info) {
5965 		trace_array_put(tr);
5966 		return -ENOMEM;
5967 	}
5968 
5969 	mutex_lock(&trace_types_lock);
5970 
5971 	info->iter.tr		= tr;
5972 	info->iter.cpu_file	= tracing_get_cpu(inode);
5973 	info->iter.trace	= tr->current_trace;
5974 	info->iter.trace_buffer = &tr->trace_buffer;
5975 	info->spare		= NULL;
5976 	/* Force reading ring buffer for first read */
5977 	info->read		= (unsigned int)-1;
5978 
5979 	filp->private_data = info;
5980 
5981 	tr->current_trace->ref++;
5982 
5983 	mutex_unlock(&trace_types_lock);
5984 
5985 	ret = nonseekable_open(inode, filp);
5986 	if (ret < 0)
5987 		trace_array_put(tr);
5988 
5989 	return ret;
5990 }
5991 
5992 static unsigned int
5993 tracing_buffers_poll(struct file *filp, poll_table *poll_table)
5994 {
5995 	struct ftrace_buffer_info *info = filp->private_data;
5996 	struct trace_iterator *iter = &info->iter;
5997 
5998 	return trace_poll(iter, filp, poll_table);
5999 }
6000 
6001 static ssize_t
6002 tracing_buffers_read(struct file *filp, char __user *ubuf,
6003 		     size_t count, loff_t *ppos)
6004 {
6005 	struct ftrace_buffer_info *info = filp->private_data;
6006 	struct trace_iterator *iter = &info->iter;
6007 	ssize_t ret;
6008 	ssize_t size;
6009 
6010 	if (!count)
6011 		return 0;
6012 
6013 #ifdef CONFIG_TRACER_MAX_TRACE
6014 	if (iter->snapshot && iter->tr->current_trace->use_max_tr)
6015 		return -EBUSY;
6016 #endif
6017 
6018 	if (!info->spare)
6019 		info->spare = ring_buffer_alloc_read_page(iter->trace_buffer->buffer,
6020 							  iter->cpu_file);
6021 	if (!info->spare)
6022 		return -ENOMEM;
6023 
6024 	/* Do we have previous read data to read? */
6025 	if (info->read < PAGE_SIZE)
6026 		goto read;
6027 
6028  again:
6029 	trace_access_lock(iter->cpu_file);
6030 	ret = ring_buffer_read_page(iter->trace_buffer->buffer,
6031 				    &info->spare,
6032 				    count,
6033 				    iter->cpu_file, 0);
6034 	trace_access_unlock(iter->cpu_file);
6035 
6036 	if (ret < 0) {
6037 		if (trace_empty(iter)) {
6038 			if ((filp->f_flags & O_NONBLOCK))
6039 				return -EAGAIN;
6040 
6041 			ret = wait_on_pipe(iter, false);
6042 			if (ret)
6043 				return ret;
6044 
6045 			goto again;
6046 		}
6047 		return 0;
6048 	}
6049 
6050 	info->read = 0;
6051  read:
6052 	size = PAGE_SIZE - info->read;
6053 	if (size > count)
6054 		size = count;
6055 
6056 	ret = copy_to_user(ubuf, info->spare + info->read, size);
6057 	if (ret == size)
6058 		return -EFAULT;
6059 
6060 	size -= ret;
6061 
6062 	*ppos += size;
6063 	info->read += size;
6064 
6065 	return size;
6066 }
6067 
6068 static int tracing_buffers_release(struct inode *inode, struct file *file)
6069 {
6070 	struct ftrace_buffer_info *info = file->private_data;
6071 	struct trace_iterator *iter = &info->iter;
6072 
6073 	mutex_lock(&trace_types_lock);
6074 
6075 	iter->tr->current_trace->ref--;
6076 
6077 	__trace_array_put(iter->tr);
6078 
6079 	if (info->spare)
6080 		ring_buffer_free_read_page(iter->trace_buffer->buffer, info->spare);
6081 	kfree(info);
6082 
6083 	mutex_unlock(&trace_types_lock);
6084 
6085 	return 0;
6086 }
6087 
6088 struct buffer_ref {
6089 	struct ring_buffer	*buffer;
6090 	void			*page;
6091 	int			ref;
6092 };
6093 
6094 static void buffer_pipe_buf_release(struct pipe_inode_info *pipe,
6095 				    struct pipe_buffer *buf)
6096 {
6097 	struct buffer_ref *ref = (struct buffer_ref *)buf->private;
6098 
6099 	if (--ref->ref)
6100 		return;
6101 
6102 	ring_buffer_free_read_page(ref->buffer, ref->page);
6103 	kfree(ref);
6104 	buf->private = 0;
6105 }
6106 
6107 static void buffer_pipe_buf_get(struct pipe_inode_info *pipe,
6108 				struct pipe_buffer *buf)
6109 {
6110 	struct buffer_ref *ref = (struct buffer_ref *)buf->private;
6111 
6112 	ref->ref++;
6113 }
6114 
6115 /* Pipe buffer operations for a buffer. */
6116 static const struct pipe_buf_operations buffer_pipe_buf_ops = {
6117 	.can_merge		= 0,
6118 	.confirm		= generic_pipe_buf_confirm,
6119 	.release		= buffer_pipe_buf_release,
6120 	.steal			= generic_pipe_buf_steal,
6121 	.get			= buffer_pipe_buf_get,
6122 };
6123 
6124 /*
6125  * Callback from splice_to_pipe(), if we need to release some pages
6126  * at the end of the spd in case we error'ed out in filling the pipe.
6127  */
6128 static void buffer_spd_release(struct splice_pipe_desc *spd, unsigned int i)
6129 {
6130 	struct buffer_ref *ref =
6131 		(struct buffer_ref *)spd->partial[i].private;
6132 
6133 	if (--ref->ref)
6134 		return;
6135 
6136 	ring_buffer_free_read_page(ref->buffer, ref->page);
6137 	kfree(ref);
6138 	spd->partial[i].private = 0;
6139 }
6140 
6141 static ssize_t
6142 tracing_buffers_splice_read(struct file *file, loff_t *ppos,
6143 			    struct pipe_inode_info *pipe, size_t len,
6144 			    unsigned int flags)
6145 {
6146 	struct ftrace_buffer_info *info = file->private_data;
6147 	struct trace_iterator *iter = &info->iter;
6148 	struct partial_page partial_def[PIPE_DEF_BUFFERS];
6149 	struct page *pages_def[PIPE_DEF_BUFFERS];
6150 	struct splice_pipe_desc spd = {
6151 		.pages		= pages_def,
6152 		.partial	= partial_def,
6153 		.nr_pages_max	= PIPE_DEF_BUFFERS,
6154 		.flags		= flags,
6155 		.ops		= &buffer_pipe_buf_ops,
6156 		.spd_release	= buffer_spd_release,
6157 	};
6158 	struct buffer_ref *ref;
6159 	int entries, size, i;
6160 	ssize_t ret = 0;
6161 
6162 #ifdef CONFIG_TRACER_MAX_TRACE
6163 	if (iter->snapshot && iter->tr->current_trace->use_max_tr)
6164 		return -EBUSY;
6165 #endif
6166 
6167 	if (*ppos & (PAGE_SIZE - 1))
6168 		return -EINVAL;
6169 
6170 	if (len & (PAGE_SIZE - 1)) {
6171 		if (len < PAGE_SIZE)
6172 			return -EINVAL;
6173 		len &= PAGE_MASK;
6174 	}
6175 
6176 	if (splice_grow_spd(pipe, &spd))
6177 		return -ENOMEM;
6178 
6179  again:
6180 	trace_access_lock(iter->cpu_file);
6181 	entries = ring_buffer_entries_cpu(iter->trace_buffer->buffer, iter->cpu_file);
6182 
6183 	for (i = 0; i < spd.nr_pages_max && len && entries; i++, len -= PAGE_SIZE) {
6184 		struct page *page;
6185 		int r;
6186 
6187 		ref = kzalloc(sizeof(*ref), GFP_KERNEL);
6188 		if (!ref) {
6189 			ret = -ENOMEM;
6190 			break;
6191 		}
6192 
6193 		ref->ref = 1;
6194 		ref->buffer = iter->trace_buffer->buffer;
6195 		ref->page = ring_buffer_alloc_read_page(ref->buffer, iter->cpu_file);
6196 		if (!ref->page) {
6197 			ret = -ENOMEM;
6198 			kfree(ref);
6199 			break;
6200 		}
6201 
6202 		r = ring_buffer_read_page(ref->buffer, &ref->page,
6203 					  len, iter->cpu_file, 1);
6204 		if (r < 0) {
6205 			ring_buffer_free_read_page(ref->buffer, ref->page);
6206 			kfree(ref);
6207 			break;
6208 		}
6209 
6210 		/*
6211 		 * zero out any left over data, this is going to
6212 		 * user land.
6213 		 */
6214 		size = ring_buffer_page_len(ref->page);
6215 		if (size < PAGE_SIZE)
6216 			memset(ref->page + size, 0, PAGE_SIZE - size);
6217 
6218 		page = virt_to_page(ref->page);
6219 
6220 		spd.pages[i] = page;
6221 		spd.partial[i].len = PAGE_SIZE;
6222 		spd.partial[i].offset = 0;
6223 		spd.partial[i].private = (unsigned long)ref;
6224 		spd.nr_pages++;
6225 		*ppos += PAGE_SIZE;
6226 
6227 		entries = ring_buffer_entries_cpu(iter->trace_buffer->buffer, iter->cpu_file);
6228 	}
6229 
6230 	trace_access_unlock(iter->cpu_file);
6231 	spd.nr_pages = i;
6232 
6233 	/* did we read anything? */
6234 	if (!spd.nr_pages) {
6235 		if (ret)
6236 			goto out;
6237 
6238 		ret = -EAGAIN;
6239 		if ((file->f_flags & O_NONBLOCK) || (flags & SPLICE_F_NONBLOCK))
6240 			goto out;
6241 
6242 		ret = wait_on_pipe(iter, true);
6243 		if (ret)
6244 			goto out;
6245 
6246 		goto again;
6247 	}
6248 
6249 	ret = splice_to_pipe(pipe, &spd);
6250 out:
6251 	splice_shrink_spd(&spd);
6252 
6253 	return ret;
6254 }
6255 
6256 static const struct file_operations tracing_buffers_fops = {
6257 	.open		= tracing_buffers_open,
6258 	.read		= tracing_buffers_read,
6259 	.poll		= tracing_buffers_poll,
6260 	.release	= tracing_buffers_release,
6261 	.splice_read	= tracing_buffers_splice_read,
6262 	.llseek		= no_llseek,
6263 };
6264 
6265 static ssize_t
6266 tracing_stats_read(struct file *filp, char __user *ubuf,
6267 		   size_t count, loff_t *ppos)
6268 {
6269 	struct inode *inode = file_inode(filp);
6270 	struct trace_array *tr = inode->i_private;
6271 	struct trace_buffer *trace_buf = &tr->trace_buffer;
6272 	int cpu = tracing_get_cpu(inode);
6273 	struct trace_seq *s;
6274 	unsigned long cnt;
6275 	unsigned long long t;
6276 	unsigned long usec_rem;
6277 
6278 	s = kmalloc(sizeof(*s), GFP_KERNEL);
6279 	if (!s)
6280 		return -ENOMEM;
6281 
6282 	trace_seq_init(s);
6283 
6284 	cnt = ring_buffer_entries_cpu(trace_buf->buffer, cpu);
6285 	trace_seq_printf(s, "entries: %ld\n", cnt);
6286 
6287 	cnt = ring_buffer_overrun_cpu(trace_buf->buffer, cpu);
6288 	trace_seq_printf(s, "overrun: %ld\n", cnt);
6289 
6290 	cnt = ring_buffer_commit_overrun_cpu(trace_buf->buffer, cpu);
6291 	trace_seq_printf(s, "commit overrun: %ld\n", cnt);
6292 
6293 	cnt = ring_buffer_bytes_cpu(trace_buf->buffer, cpu);
6294 	trace_seq_printf(s, "bytes: %ld\n", cnt);
6295 
6296 	if (trace_clocks[tr->clock_id].in_ns) {
6297 		/* local or global for trace_clock */
6298 		t = ns2usecs(ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
6299 		usec_rem = do_div(t, USEC_PER_SEC);
6300 		trace_seq_printf(s, "oldest event ts: %5llu.%06lu\n",
6301 								t, usec_rem);
6302 
6303 		t = ns2usecs(ring_buffer_time_stamp(trace_buf->buffer, cpu));
6304 		usec_rem = do_div(t, USEC_PER_SEC);
6305 		trace_seq_printf(s, "now ts: %5llu.%06lu\n", t, usec_rem);
6306 	} else {
6307 		/* counter or tsc mode for trace_clock */
6308 		trace_seq_printf(s, "oldest event ts: %llu\n",
6309 				ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
6310 
6311 		trace_seq_printf(s, "now ts: %llu\n",
6312 				ring_buffer_time_stamp(trace_buf->buffer, cpu));
6313 	}
6314 
6315 	cnt = ring_buffer_dropped_events_cpu(trace_buf->buffer, cpu);
6316 	trace_seq_printf(s, "dropped events: %ld\n", cnt);
6317 
6318 	cnt = ring_buffer_read_events_cpu(trace_buf->buffer, cpu);
6319 	trace_seq_printf(s, "read events: %ld\n", cnt);
6320 
6321 	count = simple_read_from_buffer(ubuf, count, ppos,
6322 					s->buffer, trace_seq_used(s));
6323 
6324 	kfree(s);
6325 
6326 	return count;
6327 }
6328 
6329 static const struct file_operations tracing_stats_fops = {
6330 	.open		= tracing_open_generic_tr,
6331 	.read		= tracing_stats_read,
6332 	.llseek		= generic_file_llseek,
6333 	.release	= tracing_release_generic_tr,
6334 };
6335 
6336 #ifdef CONFIG_DYNAMIC_FTRACE
6337 
6338 int __weak ftrace_arch_read_dyn_info(char *buf, int size)
6339 {
6340 	return 0;
6341 }
6342 
6343 static ssize_t
6344 tracing_read_dyn_info(struct file *filp, char __user *ubuf,
6345 		  size_t cnt, loff_t *ppos)
6346 {
6347 	static char ftrace_dyn_info_buffer[1024];
6348 	static DEFINE_MUTEX(dyn_info_mutex);
6349 	unsigned long *p = filp->private_data;
6350 	char *buf = ftrace_dyn_info_buffer;
6351 	int size = ARRAY_SIZE(ftrace_dyn_info_buffer);
6352 	int r;
6353 
6354 	mutex_lock(&dyn_info_mutex);
6355 	r = sprintf(buf, "%ld ", *p);
6356 
6357 	r += ftrace_arch_read_dyn_info(buf+r, (size-1)-r);
6358 	buf[r++] = '\n';
6359 
6360 	r = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6361 
6362 	mutex_unlock(&dyn_info_mutex);
6363 
6364 	return r;
6365 }
6366 
6367 static const struct file_operations tracing_dyn_info_fops = {
6368 	.open		= tracing_open_generic,
6369 	.read		= tracing_read_dyn_info,
6370 	.llseek		= generic_file_llseek,
6371 };
6372 #endif /* CONFIG_DYNAMIC_FTRACE */
6373 
6374 #if defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE)
6375 static void
6376 ftrace_snapshot(unsigned long ip, unsigned long parent_ip, void **data)
6377 {
6378 	tracing_snapshot();
6379 }
6380 
6381 static void
6382 ftrace_count_snapshot(unsigned long ip, unsigned long parent_ip, void **data)
6383 {
6384 	unsigned long *count = (long *)data;
6385 
6386 	if (!*count)
6387 		return;
6388 
6389 	if (*count != -1)
6390 		(*count)--;
6391 
6392 	tracing_snapshot();
6393 }
6394 
6395 static int
6396 ftrace_snapshot_print(struct seq_file *m, unsigned long ip,
6397 		      struct ftrace_probe_ops *ops, void *data)
6398 {
6399 	long count = (long)data;
6400 
6401 	seq_printf(m, "%ps:", (void *)ip);
6402 
6403 	seq_puts(m, "snapshot");
6404 
6405 	if (count == -1)
6406 		seq_puts(m, ":unlimited\n");
6407 	else
6408 		seq_printf(m, ":count=%ld\n", count);
6409 
6410 	return 0;
6411 }
6412 
6413 static struct ftrace_probe_ops snapshot_probe_ops = {
6414 	.func			= ftrace_snapshot,
6415 	.print			= ftrace_snapshot_print,
6416 };
6417 
6418 static struct ftrace_probe_ops snapshot_count_probe_ops = {
6419 	.func			= ftrace_count_snapshot,
6420 	.print			= ftrace_snapshot_print,
6421 };
6422 
6423 static int
6424 ftrace_trace_snapshot_callback(struct ftrace_hash *hash,
6425 			       char *glob, char *cmd, char *param, int enable)
6426 {
6427 	struct ftrace_probe_ops *ops;
6428 	void *count = (void *)-1;
6429 	char *number;
6430 	int ret;
6431 
6432 	/* hash funcs only work with set_ftrace_filter */
6433 	if (!enable)
6434 		return -EINVAL;
6435 
6436 	ops = param ? &snapshot_count_probe_ops :  &snapshot_probe_ops;
6437 
6438 	if (glob[0] == '!') {
6439 		unregister_ftrace_function_probe_func(glob+1, ops);
6440 		return 0;
6441 	}
6442 
6443 	if (!param)
6444 		goto out_reg;
6445 
6446 	number = strsep(&param, ":");
6447 
6448 	if (!strlen(number))
6449 		goto out_reg;
6450 
6451 	/*
6452 	 * We use the callback data field (which is a pointer)
6453 	 * as our counter.
6454 	 */
6455 	ret = kstrtoul(number, 0, (unsigned long *)&count);
6456 	if (ret)
6457 		return ret;
6458 
6459  out_reg:
6460 	ret = register_ftrace_function_probe(glob, ops, count);
6461 
6462 	if (ret >= 0)
6463 		alloc_snapshot(&global_trace);
6464 
6465 	return ret < 0 ? ret : 0;
6466 }
6467 
6468 static struct ftrace_func_command ftrace_snapshot_cmd = {
6469 	.name			= "snapshot",
6470 	.func			= ftrace_trace_snapshot_callback,
6471 };
6472 
6473 static __init int register_snapshot_cmd(void)
6474 {
6475 	return register_ftrace_command(&ftrace_snapshot_cmd);
6476 }
6477 #else
6478 static inline __init int register_snapshot_cmd(void) { return 0; }
6479 #endif /* defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE) */
6480 
6481 static struct dentry *tracing_get_dentry(struct trace_array *tr)
6482 {
6483 	if (WARN_ON(!tr->dir))
6484 		return ERR_PTR(-ENODEV);
6485 
6486 	/* Top directory uses NULL as the parent */
6487 	if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
6488 		return NULL;
6489 
6490 	/* All sub buffers have a descriptor */
6491 	return tr->dir;
6492 }
6493 
6494 static struct dentry *tracing_dentry_percpu(struct trace_array *tr, int cpu)
6495 {
6496 	struct dentry *d_tracer;
6497 
6498 	if (tr->percpu_dir)
6499 		return tr->percpu_dir;
6500 
6501 	d_tracer = tracing_get_dentry(tr);
6502 	if (IS_ERR(d_tracer))
6503 		return NULL;
6504 
6505 	tr->percpu_dir = tracefs_create_dir("per_cpu", d_tracer);
6506 
6507 	WARN_ONCE(!tr->percpu_dir,
6508 		  "Could not create tracefs directory 'per_cpu/%d'\n", cpu);
6509 
6510 	return tr->percpu_dir;
6511 }
6512 
6513 static struct dentry *
6514 trace_create_cpu_file(const char *name, umode_t mode, struct dentry *parent,
6515 		      void *data, long cpu, const struct file_operations *fops)
6516 {
6517 	struct dentry *ret = trace_create_file(name, mode, parent, data, fops);
6518 
6519 	if (ret) /* See tracing_get_cpu() */
6520 		d_inode(ret)->i_cdev = (void *)(cpu + 1);
6521 	return ret;
6522 }
6523 
6524 static void
6525 tracing_init_tracefs_percpu(struct trace_array *tr, long cpu)
6526 {
6527 	struct dentry *d_percpu = tracing_dentry_percpu(tr, cpu);
6528 	struct dentry *d_cpu;
6529 	char cpu_dir[30]; /* 30 characters should be more than enough */
6530 
6531 	if (!d_percpu)
6532 		return;
6533 
6534 	snprintf(cpu_dir, 30, "cpu%ld", cpu);
6535 	d_cpu = tracefs_create_dir(cpu_dir, d_percpu);
6536 	if (!d_cpu) {
6537 		pr_warn("Could not create tracefs '%s' entry\n", cpu_dir);
6538 		return;
6539 	}
6540 
6541 	/* per cpu trace_pipe */
6542 	trace_create_cpu_file("trace_pipe", 0444, d_cpu,
6543 				tr, cpu, &tracing_pipe_fops);
6544 
6545 	/* per cpu trace */
6546 	trace_create_cpu_file("trace", 0644, d_cpu,
6547 				tr, cpu, &tracing_fops);
6548 
6549 	trace_create_cpu_file("trace_pipe_raw", 0444, d_cpu,
6550 				tr, cpu, &tracing_buffers_fops);
6551 
6552 	trace_create_cpu_file("stats", 0444, d_cpu,
6553 				tr, cpu, &tracing_stats_fops);
6554 
6555 	trace_create_cpu_file("buffer_size_kb", 0444, d_cpu,
6556 				tr, cpu, &tracing_entries_fops);
6557 
6558 #ifdef CONFIG_TRACER_SNAPSHOT
6559 	trace_create_cpu_file("snapshot", 0644, d_cpu,
6560 				tr, cpu, &snapshot_fops);
6561 
6562 	trace_create_cpu_file("snapshot_raw", 0444, d_cpu,
6563 				tr, cpu, &snapshot_raw_fops);
6564 #endif
6565 }
6566 
6567 #ifdef CONFIG_FTRACE_SELFTEST
6568 /* Let selftest have access to static functions in this file */
6569 #include "trace_selftest.c"
6570 #endif
6571 
6572 static ssize_t
6573 trace_options_read(struct file *filp, char __user *ubuf, size_t cnt,
6574 			loff_t *ppos)
6575 {
6576 	struct trace_option_dentry *topt = filp->private_data;
6577 	char *buf;
6578 
6579 	if (topt->flags->val & topt->opt->bit)
6580 		buf = "1\n";
6581 	else
6582 		buf = "0\n";
6583 
6584 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
6585 }
6586 
6587 static ssize_t
6588 trace_options_write(struct file *filp, const char __user *ubuf, size_t cnt,
6589 			 loff_t *ppos)
6590 {
6591 	struct trace_option_dentry *topt = filp->private_data;
6592 	unsigned long val;
6593 	int ret;
6594 
6595 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
6596 	if (ret)
6597 		return ret;
6598 
6599 	if (val != 0 && val != 1)
6600 		return -EINVAL;
6601 
6602 	if (!!(topt->flags->val & topt->opt->bit) != val) {
6603 		mutex_lock(&trace_types_lock);
6604 		ret = __set_tracer_option(topt->tr, topt->flags,
6605 					  topt->opt, !val);
6606 		mutex_unlock(&trace_types_lock);
6607 		if (ret)
6608 			return ret;
6609 	}
6610 
6611 	*ppos += cnt;
6612 
6613 	return cnt;
6614 }
6615 
6616 
6617 static const struct file_operations trace_options_fops = {
6618 	.open = tracing_open_generic,
6619 	.read = trace_options_read,
6620 	.write = trace_options_write,
6621 	.llseek	= generic_file_llseek,
6622 };
6623 
6624 /*
6625  * In order to pass in both the trace_array descriptor as well as the index
6626  * to the flag that the trace option file represents, the trace_array
6627  * has a character array of trace_flags_index[], which holds the index
6628  * of the bit for the flag it represents. index[0] == 0, index[1] == 1, etc.
6629  * The address of this character array is passed to the flag option file
6630  * read/write callbacks.
6631  *
6632  * In order to extract both the index and the trace_array descriptor,
6633  * get_tr_index() uses the following algorithm.
6634  *
6635  *   idx = *ptr;
6636  *
6637  * As the pointer itself contains the address of the index (remember
6638  * index[1] == 1).
6639  *
6640  * Then to get the trace_array descriptor, by subtracting that index
6641  * from the ptr, we get to the start of the index itself.
6642  *
6643  *   ptr - idx == &index[0]
6644  *
6645  * Then a simple container_of() from that pointer gets us to the
6646  * trace_array descriptor.
6647  */
6648 static void get_tr_index(void *data, struct trace_array **ptr,
6649 			 unsigned int *pindex)
6650 {
6651 	*pindex = *(unsigned char *)data;
6652 
6653 	*ptr = container_of(data - *pindex, struct trace_array,
6654 			    trace_flags_index);
6655 }
6656 
6657 static ssize_t
6658 trace_options_core_read(struct file *filp, char __user *ubuf, size_t cnt,
6659 			loff_t *ppos)
6660 {
6661 	void *tr_index = filp->private_data;
6662 	struct trace_array *tr;
6663 	unsigned int index;
6664 	char *buf;
6665 
6666 	get_tr_index(tr_index, &tr, &index);
6667 
6668 	if (tr->trace_flags & (1 << index))
6669 		buf = "1\n";
6670 	else
6671 		buf = "0\n";
6672 
6673 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
6674 }
6675 
6676 static ssize_t
6677 trace_options_core_write(struct file *filp, const char __user *ubuf, size_t cnt,
6678 			 loff_t *ppos)
6679 {
6680 	void *tr_index = filp->private_data;
6681 	struct trace_array *tr;
6682 	unsigned int index;
6683 	unsigned long val;
6684 	int ret;
6685 
6686 	get_tr_index(tr_index, &tr, &index);
6687 
6688 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
6689 	if (ret)
6690 		return ret;
6691 
6692 	if (val != 0 && val != 1)
6693 		return -EINVAL;
6694 
6695 	mutex_lock(&trace_types_lock);
6696 	ret = set_tracer_flag(tr, 1 << index, val);
6697 	mutex_unlock(&trace_types_lock);
6698 
6699 	if (ret < 0)
6700 		return ret;
6701 
6702 	*ppos += cnt;
6703 
6704 	return cnt;
6705 }
6706 
6707 static const struct file_operations trace_options_core_fops = {
6708 	.open = tracing_open_generic,
6709 	.read = trace_options_core_read,
6710 	.write = trace_options_core_write,
6711 	.llseek = generic_file_llseek,
6712 };
6713 
6714 struct dentry *trace_create_file(const char *name,
6715 				 umode_t mode,
6716 				 struct dentry *parent,
6717 				 void *data,
6718 				 const struct file_operations *fops)
6719 {
6720 	struct dentry *ret;
6721 
6722 	ret = tracefs_create_file(name, mode, parent, data, fops);
6723 	if (!ret)
6724 		pr_warn("Could not create tracefs '%s' entry\n", name);
6725 
6726 	return ret;
6727 }
6728 
6729 
6730 static struct dentry *trace_options_init_dentry(struct trace_array *tr)
6731 {
6732 	struct dentry *d_tracer;
6733 
6734 	if (tr->options)
6735 		return tr->options;
6736 
6737 	d_tracer = tracing_get_dentry(tr);
6738 	if (IS_ERR(d_tracer))
6739 		return NULL;
6740 
6741 	tr->options = tracefs_create_dir("options", d_tracer);
6742 	if (!tr->options) {
6743 		pr_warn("Could not create tracefs directory 'options'\n");
6744 		return NULL;
6745 	}
6746 
6747 	return tr->options;
6748 }
6749 
6750 static void
6751 create_trace_option_file(struct trace_array *tr,
6752 			 struct trace_option_dentry *topt,
6753 			 struct tracer_flags *flags,
6754 			 struct tracer_opt *opt)
6755 {
6756 	struct dentry *t_options;
6757 
6758 	t_options = trace_options_init_dentry(tr);
6759 	if (!t_options)
6760 		return;
6761 
6762 	topt->flags = flags;
6763 	topt->opt = opt;
6764 	topt->tr = tr;
6765 
6766 	topt->entry = trace_create_file(opt->name, 0644, t_options, topt,
6767 				    &trace_options_fops);
6768 
6769 }
6770 
6771 static void
6772 create_trace_option_files(struct trace_array *tr, struct tracer *tracer)
6773 {
6774 	struct trace_option_dentry *topts;
6775 	struct trace_options *tr_topts;
6776 	struct tracer_flags *flags;
6777 	struct tracer_opt *opts;
6778 	int cnt;
6779 	int i;
6780 
6781 	if (!tracer)
6782 		return;
6783 
6784 	flags = tracer->flags;
6785 
6786 	if (!flags || !flags->opts)
6787 		return;
6788 
6789 	/*
6790 	 * If this is an instance, only create flags for tracers
6791 	 * the instance may have.
6792 	 */
6793 	if (!trace_ok_for_array(tracer, tr))
6794 		return;
6795 
6796 	for (i = 0; i < tr->nr_topts; i++) {
6797 		/* Make sure there's no duplicate flags. */
6798 		if (WARN_ON_ONCE(tr->topts[i].tracer->flags == tracer->flags))
6799 			return;
6800 	}
6801 
6802 	opts = flags->opts;
6803 
6804 	for (cnt = 0; opts[cnt].name; cnt++)
6805 		;
6806 
6807 	topts = kcalloc(cnt + 1, sizeof(*topts), GFP_KERNEL);
6808 	if (!topts)
6809 		return;
6810 
6811 	tr_topts = krealloc(tr->topts, sizeof(*tr->topts) * (tr->nr_topts + 1),
6812 			    GFP_KERNEL);
6813 	if (!tr_topts) {
6814 		kfree(topts);
6815 		return;
6816 	}
6817 
6818 	tr->topts = tr_topts;
6819 	tr->topts[tr->nr_topts].tracer = tracer;
6820 	tr->topts[tr->nr_topts].topts = topts;
6821 	tr->nr_topts++;
6822 
6823 	for (cnt = 0; opts[cnt].name; cnt++) {
6824 		create_trace_option_file(tr, &topts[cnt], flags,
6825 					 &opts[cnt]);
6826 		WARN_ONCE(topts[cnt].entry == NULL,
6827 			  "Failed to create trace option: %s",
6828 			  opts[cnt].name);
6829 	}
6830 }
6831 
6832 static struct dentry *
6833 create_trace_option_core_file(struct trace_array *tr,
6834 			      const char *option, long index)
6835 {
6836 	struct dentry *t_options;
6837 
6838 	t_options = trace_options_init_dentry(tr);
6839 	if (!t_options)
6840 		return NULL;
6841 
6842 	return trace_create_file(option, 0644, t_options,
6843 				 (void *)&tr->trace_flags_index[index],
6844 				 &trace_options_core_fops);
6845 }
6846 
6847 static void create_trace_options_dir(struct trace_array *tr)
6848 {
6849 	struct dentry *t_options;
6850 	bool top_level = tr == &global_trace;
6851 	int i;
6852 
6853 	t_options = trace_options_init_dentry(tr);
6854 	if (!t_options)
6855 		return;
6856 
6857 	for (i = 0; trace_options[i]; i++) {
6858 		if (top_level ||
6859 		    !((1 << i) & TOP_LEVEL_TRACE_FLAGS))
6860 			create_trace_option_core_file(tr, trace_options[i], i);
6861 	}
6862 }
6863 
6864 static ssize_t
6865 rb_simple_read(struct file *filp, char __user *ubuf,
6866 	       size_t cnt, loff_t *ppos)
6867 {
6868 	struct trace_array *tr = filp->private_data;
6869 	char buf[64];
6870 	int r;
6871 
6872 	r = tracer_tracing_is_on(tr);
6873 	r = sprintf(buf, "%d\n", r);
6874 
6875 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6876 }
6877 
6878 static ssize_t
6879 rb_simple_write(struct file *filp, const char __user *ubuf,
6880 		size_t cnt, loff_t *ppos)
6881 {
6882 	struct trace_array *tr = filp->private_data;
6883 	struct ring_buffer *buffer = tr->trace_buffer.buffer;
6884 	unsigned long val;
6885 	int ret;
6886 
6887 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
6888 	if (ret)
6889 		return ret;
6890 
6891 	if (buffer) {
6892 		mutex_lock(&trace_types_lock);
6893 		if (val) {
6894 			tracer_tracing_on(tr);
6895 			if (tr->current_trace->start)
6896 				tr->current_trace->start(tr);
6897 		} else {
6898 			tracer_tracing_off(tr);
6899 			if (tr->current_trace->stop)
6900 				tr->current_trace->stop(tr);
6901 		}
6902 		mutex_unlock(&trace_types_lock);
6903 	}
6904 
6905 	(*ppos)++;
6906 
6907 	return cnt;
6908 }
6909 
6910 static const struct file_operations rb_simple_fops = {
6911 	.open		= tracing_open_generic_tr,
6912 	.read		= rb_simple_read,
6913 	.write		= rb_simple_write,
6914 	.release	= tracing_release_generic_tr,
6915 	.llseek		= default_llseek,
6916 };
6917 
6918 struct dentry *trace_instance_dir;
6919 
6920 static void
6921 init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer);
6922 
6923 static int
6924 allocate_trace_buffer(struct trace_array *tr, struct trace_buffer *buf, int size)
6925 {
6926 	enum ring_buffer_flags rb_flags;
6927 
6928 	rb_flags = tr->trace_flags & TRACE_ITER_OVERWRITE ? RB_FL_OVERWRITE : 0;
6929 
6930 	buf->tr = tr;
6931 
6932 	buf->buffer = ring_buffer_alloc(size, rb_flags);
6933 	if (!buf->buffer)
6934 		return -ENOMEM;
6935 
6936 	buf->data = alloc_percpu(struct trace_array_cpu);
6937 	if (!buf->data) {
6938 		ring_buffer_free(buf->buffer);
6939 		return -ENOMEM;
6940 	}
6941 
6942 	/* Allocate the first page for all buffers */
6943 	set_buffer_entries(&tr->trace_buffer,
6944 			   ring_buffer_size(tr->trace_buffer.buffer, 0));
6945 
6946 	return 0;
6947 }
6948 
6949 static int allocate_trace_buffers(struct trace_array *tr, int size)
6950 {
6951 	int ret;
6952 
6953 	ret = allocate_trace_buffer(tr, &tr->trace_buffer, size);
6954 	if (ret)
6955 		return ret;
6956 
6957 #ifdef CONFIG_TRACER_MAX_TRACE
6958 	ret = allocate_trace_buffer(tr, &tr->max_buffer,
6959 				    allocate_snapshot ? size : 1);
6960 	if (WARN_ON(ret)) {
6961 		ring_buffer_free(tr->trace_buffer.buffer);
6962 		free_percpu(tr->trace_buffer.data);
6963 		return -ENOMEM;
6964 	}
6965 	tr->allocated_snapshot = allocate_snapshot;
6966 
6967 	/*
6968 	 * Only the top level trace array gets its snapshot allocated
6969 	 * from the kernel command line.
6970 	 */
6971 	allocate_snapshot = false;
6972 #endif
6973 	return 0;
6974 }
6975 
6976 static void free_trace_buffer(struct trace_buffer *buf)
6977 {
6978 	if (buf->buffer) {
6979 		ring_buffer_free(buf->buffer);
6980 		buf->buffer = NULL;
6981 		free_percpu(buf->data);
6982 		buf->data = NULL;
6983 	}
6984 }
6985 
6986 static void free_trace_buffers(struct trace_array *tr)
6987 {
6988 	if (!tr)
6989 		return;
6990 
6991 	free_trace_buffer(&tr->trace_buffer);
6992 
6993 #ifdef CONFIG_TRACER_MAX_TRACE
6994 	free_trace_buffer(&tr->max_buffer);
6995 #endif
6996 }
6997 
6998 static void init_trace_flags_index(struct trace_array *tr)
6999 {
7000 	int i;
7001 
7002 	/* Used by the trace options files */
7003 	for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++)
7004 		tr->trace_flags_index[i] = i;
7005 }
7006 
7007 static void __update_tracer_options(struct trace_array *tr)
7008 {
7009 	struct tracer *t;
7010 
7011 	for (t = trace_types; t; t = t->next)
7012 		add_tracer_options(tr, t);
7013 }
7014 
7015 static void update_tracer_options(struct trace_array *tr)
7016 {
7017 	mutex_lock(&trace_types_lock);
7018 	__update_tracer_options(tr);
7019 	mutex_unlock(&trace_types_lock);
7020 }
7021 
7022 static int instance_mkdir(const char *name)
7023 {
7024 	struct trace_array *tr;
7025 	int ret;
7026 
7027 	mutex_lock(&trace_types_lock);
7028 
7029 	ret = -EEXIST;
7030 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
7031 		if (tr->name && strcmp(tr->name, name) == 0)
7032 			goto out_unlock;
7033 	}
7034 
7035 	ret = -ENOMEM;
7036 	tr = kzalloc(sizeof(*tr), GFP_KERNEL);
7037 	if (!tr)
7038 		goto out_unlock;
7039 
7040 	tr->name = kstrdup(name, GFP_KERNEL);
7041 	if (!tr->name)
7042 		goto out_free_tr;
7043 
7044 	if (!alloc_cpumask_var(&tr->tracing_cpumask, GFP_KERNEL))
7045 		goto out_free_tr;
7046 
7047 	tr->trace_flags = global_trace.trace_flags & ~ZEROED_TRACE_FLAGS;
7048 
7049 	cpumask_copy(tr->tracing_cpumask, cpu_all_mask);
7050 
7051 	raw_spin_lock_init(&tr->start_lock);
7052 
7053 	tr->max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
7054 
7055 	tr->current_trace = &nop_trace;
7056 
7057 	INIT_LIST_HEAD(&tr->systems);
7058 	INIT_LIST_HEAD(&tr->events);
7059 
7060 	if (allocate_trace_buffers(tr, trace_buf_size) < 0)
7061 		goto out_free_tr;
7062 
7063 	tr->dir = tracefs_create_dir(name, trace_instance_dir);
7064 	if (!tr->dir)
7065 		goto out_free_tr;
7066 
7067 	ret = event_trace_add_tracer(tr->dir, tr);
7068 	if (ret) {
7069 		tracefs_remove_recursive(tr->dir);
7070 		goto out_free_tr;
7071 	}
7072 
7073 	init_tracer_tracefs(tr, tr->dir);
7074 	init_trace_flags_index(tr);
7075 	__update_tracer_options(tr);
7076 
7077 	list_add(&tr->list, &ftrace_trace_arrays);
7078 
7079 	mutex_unlock(&trace_types_lock);
7080 
7081 	return 0;
7082 
7083  out_free_tr:
7084 	free_trace_buffers(tr);
7085 	free_cpumask_var(tr->tracing_cpumask);
7086 	kfree(tr->name);
7087 	kfree(tr);
7088 
7089  out_unlock:
7090 	mutex_unlock(&trace_types_lock);
7091 
7092 	return ret;
7093 
7094 }
7095 
7096 static int instance_rmdir(const char *name)
7097 {
7098 	struct trace_array *tr;
7099 	int found = 0;
7100 	int ret;
7101 	int i;
7102 
7103 	mutex_lock(&trace_types_lock);
7104 
7105 	ret = -ENODEV;
7106 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
7107 		if (tr->name && strcmp(tr->name, name) == 0) {
7108 			found = 1;
7109 			break;
7110 		}
7111 	}
7112 	if (!found)
7113 		goto out_unlock;
7114 
7115 	ret = -EBUSY;
7116 	if (tr->ref || (tr->current_trace && tr->current_trace->ref))
7117 		goto out_unlock;
7118 
7119 	list_del(&tr->list);
7120 
7121 	/* Disable all the flags that were enabled coming in */
7122 	for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++) {
7123 		if ((1 << i) & ZEROED_TRACE_FLAGS)
7124 			set_tracer_flag(tr, 1 << i, 0);
7125 	}
7126 
7127 	tracing_set_nop(tr);
7128 	event_trace_del_tracer(tr);
7129 	ftrace_destroy_function_files(tr);
7130 	tracefs_remove_recursive(tr->dir);
7131 	free_trace_buffers(tr);
7132 
7133 	for (i = 0; i < tr->nr_topts; i++) {
7134 		kfree(tr->topts[i].topts);
7135 	}
7136 	kfree(tr->topts);
7137 
7138 	kfree(tr->name);
7139 	kfree(tr);
7140 
7141 	ret = 0;
7142 
7143  out_unlock:
7144 	mutex_unlock(&trace_types_lock);
7145 
7146 	return ret;
7147 }
7148 
7149 static __init void create_trace_instances(struct dentry *d_tracer)
7150 {
7151 	trace_instance_dir = tracefs_create_instance_dir("instances", d_tracer,
7152 							 instance_mkdir,
7153 							 instance_rmdir);
7154 	if (WARN_ON(!trace_instance_dir))
7155 		return;
7156 }
7157 
7158 static void
7159 init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer)
7160 {
7161 	int cpu;
7162 
7163 	trace_create_file("available_tracers", 0444, d_tracer,
7164 			tr, &show_traces_fops);
7165 
7166 	trace_create_file("current_tracer", 0644, d_tracer,
7167 			tr, &set_tracer_fops);
7168 
7169 	trace_create_file("tracing_cpumask", 0644, d_tracer,
7170 			  tr, &tracing_cpumask_fops);
7171 
7172 	trace_create_file("trace_options", 0644, d_tracer,
7173 			  tr, &tracing_iter_fops);
7174 
7175 	trace_create_file("trace", 0644, d_tracer,
7176 			  tr, &tracing_fops);
7177 
7178 	trace_create_file("trace_pipe", 0444, d_tracer,
7179 			  tr, &tracing_pipe_fops);
7180 
7181 	trace_create_file("buffer_size_kb", 0644, d_tracer,
7182 			  tr, &tracing_entries_fops);
7183 
7184 	trace_create_file("buffer_total_size_kb", 0444, d_tracer,
7185 			  tr, &tracing_total_entries_fops);
7186 
7187 	trace_create_file("free_buffer", 0200, d_tracer,
7188 			  tr, &tracing_free_buffer_fops);
7189 
7190 	trace_create_file("trace_marker", 0220, d_tracer,
7191 			  tr, &tracing_mark_fops);
7192 
7193 	trace_create_file("trace_clock", 0644, d_tracer, tr,
7194 			  &trace_clock_fops);
7195 
7196 	trace_create_file("tracing_on", 0644, d_tracer,
7197 			  tr, &rb_simple_fops);
7198 
7199 	create_trace_options_dir(tr);
7200 
7201 #ifdef CONFIG_TRACER_MAX_TRACE
7202 	trace_create_file("tracing_max_latency", 0644, d_tracer,
7203 			&tr->max_latency, &tracing_max_lat_fops);
7204 #endif
7205 
7206 	if (ftrace_create_function_files(tr, d_tracer))
7207 		WARN(1, "Could not allocate function filter files");
7208 
7209 #ifdef CONFIG_TRACER_SNAPSHOT
7210 	trace_create_file("snapshot", 0644, d_tracer,
7211 			  tr, &snapshot_fops);
7212 #endif
7213 
7214 	for_each_tracing_cpu(cpu)
7215 		tracing_init_tracefs_percpu(tr, cpu);
7216 
7217 	ftrace_init_tracefs(tr, d_tracer);
7218 }
7219 
7220 static struct vfsmount *trace_automount(void *ingore)
7221 {
7222 	struct vfsmount *mnt;
7223 	struct file_system_type *type;
7224 
7225 	/*
7226 	 * To maintain backward compatibility for tools that mount
7227 	 * debugfs to get to the tracing facility, tracefs is automatically
7228 	 * mounted to the debugfs/tracing directory.
7229 	 */
7230 	type = get_fs_type("tracefs");
7231 	if (!type)
7232 		return NULL;
7233 	mnt = vfs_kern_mount(type, 0, "tracefs", NULL);
7234 	put_filesystem(type);
7235 	if (IS_ERR(mnt))
7236 		return NULL;
7237 	mntget(mnt);
7238 
7239 	return mnt;
7240 }
7241 
7242 /**
7243  * tracing_init_dentry - initialize top level trace array
7244  *
7245  * This is called when creating files or directories in the tracing
7246  * directory. It is called via fs_initcall() by any of the boot up code
7247  * and expects to return the dentry of the top level tracing directory.
7248  */
7249 struct dentry *tracing_init_dentry(void)
7250 {
7251 	struct trace_array *tr = &global_trace;
7252 
7253 	/* The top level trace array uses  NULL as parent */
7254 	if (tr->dir)
7255 		return NULL;
7256 
7257 	if (WARN_ON(!tracefs_initialized()) ||
7258 		(IS_ENABLED(CONFIG_DEBUG_FS) &&
7259 		 WARN_ON(!debugfs_initialized())))
7260 		return ERR_PTR(-ENODEV);
7261 
7262 	/*
7263 	 * As there may still be users that expect the tracing
7264 	 * files to exist in debugfs/tracing, we must automount
7265 	 * the tracefs file system there, so older tools still
7266 	 * work with the newer kerenl.
7267 	 */
7268 	tr->dir = debugfs_create_automount("tracing", NULL,
7269 					   trace_automount, NULL);
7270 	if (!tr->dir) {
7271 		pr_warn_once("Could not create debugfs directory 'tracing'\n");
7272 		return ERR_PTR(-ENOMEM);
7273 	}
7274 
7275 	return NULL;
7276 }
7277 
7278 extern struct trace_enum_map *__start_ftrace_enum_maps[];
7279 extern struct trace_enum_map *__stop_ftrace_enum_maps[];
7280 
7281 static void __init trace_enum_init(void)
7282 {
7283 	int len;
7284 
7285 	len = __stop_ftrace_enum_maps - __start_ftrace_enum_maps;
7286 	trace_insert_enum_map(NULL, __start_ftrace_enum_maps, len);
7287 }
7288 
7289 #ifdef CONFIG_MODULES
7290 static void trace_module_add_enums(struct module *mod)
7291 {
7292 	if (!mod->num_trace_enums)
7293 		return;
7294 
7295 	/*
7296 	 * Modules with bad taint do not have events created, do
7297 	 * not bother with enums either.
7298 	 */
7299 	if (trace_module_has_bad_taint(mod))
7300 		return;
7301 
7302 	trace_insert_enum_map(mod, mod->trace_enums, mod->num_trace_enums);
7303 }
7304 
7305 #ifdef CONFIG_TRACE_ENUM_MAP_FILE
7306 static void trace_module_remove_enums(struct module *mod)
7307 {
7308 	union trace_enum_map_item *map;
7309 	union trace_enum_map_item **last = &trace_enum_maps;
7310 
7311 	if (!mod->num_trace_enums)
7312 		return;
7313 
7314 	mutex_lock(&trace_enum_mutex);
7315 
7316 	map = trace_enum_maps;
7317 
7318 	while (map) {
7319 		if (map->head.mod == mod)
7320 			break;
7321 		map = trace_enum_jmp_to_tail(map);
7322 		last = &map->tail.next;
7323 		map = map->tail.next;
7324 	}
7325 	if (!map)
7326 		goto out;
7327 
7328 	*last = trace_enum_jmp_to_tail(map)->tail.next;
7329 	kfree(map);
7330  out:
7331 	mutex_unlock(&trace_enum_mutex);
7332 }
7333 #else
7334 static inline void trace_module_remove_enums(struct module *mod) { }
7335 #endif /* CONFIG_TRACE_ENUM_MAP_FILE */
7336 
7337 static int trace_module_notify(struct notifier_block *self,
7338 			       unsigned long val, void *data)
7339 {
7340 	struct module *mod = data;
7341 
7342 	switch (val) {
7343 	case MODULE_STATE_COMING:
7344 		trace_module_add_enums(mod);
7345 		break;
7346 	case MODULE_STATE_GOING:
7347 		trace_module_remove_enums(mod);
7348 		break;
7349 	}
7350 
7351 	return 0;
7352 }
7353 
7354 static struct notifier_block trace_module_nb = {
7355 	.notifier_call = trace_module_notify,
7356 	.priority = 0,
7357 };
7358 #endif /* CONFIG_MODULES */
7359 
7360 static __init int tracer_init_tracefs(void)
7361 {
7362 	struct dentry *d_tracer;
7363 
7364 	trace_access_lock_init();
7365 
7366 	d_tracer = tracing_init_dentry();
7367 	if (IS_ERR(d_tracer))
7368 		return 0;
7369 
7370 	init_tracer_tracefs(&global_trace, d_tracer);
7371 	ftrace_init_tracefs_toplevel(&global_trace, d_tracer);
7372 
7373 	trace_create_file("tracing_thresh", 0644, d_tracer,
7374 			&global_trace, &tracing_thresh_fops);
7375 
7376 	trace_create_file("README", 0444, d_tracer,
7377 			NULL, &tracing_readme_fops);
7378 
7379 	trace_create_file("saved_cmdlines", 0444, d_tracer,
7380 			NULL, &tracing_saved_cmdlines_fops);
7381 
7382 	trace_create_file("saved_cmdlines_size", 0644, d_tracer,
7383 			  NULL, &tracing_saved_cmdlines_size_fops);
7384 
7385 	trace_enum_init();
7386 
7387 	trace_create_enum_file(d_tracer);
7388 
7389 #ifdef CONFIG_MODULES
7390 	register_module_notifier(&trace_module_nb);
7391 #endif
7392 
7393 #ifdef CONFIG_DYNAMIC_FTRACE
7394 	trace_create_file("dyn_ftrace_total_info", 0444, d_tracer,
7395 			&ftrace_update_tot_cnt, &tracing_dyn_info_fops);
7396 #endif
7397 
7398 	create_trace_instances(d_tracer);
7399 
7400 	update_tracer_options(&global_trace);
7401 
7402 	return 0;
7403 }
7404 
7405 static int trace_panic_handler(struct notifier_block *this,
7406 			       unsigned long event, void *unused)
7407 {
7408 	if (ftrace_dump_on_oops)
7409 		ftrace_dump(ftrace_dump_on_oops);
7410 	return NOTIFY_OK;
7411 }
7412 
7413 static struct notifier_block trace_panic_notifier = {
7414 	.notifier_call  = trace_panic_handler,
7415 	.next           = NULL,
7416 	.priority       = 150   /* priority: INT_MAX >= x >= 0 */
7417 };
7418 
7419 static int trace_die_handler(struct notifier_block *self,
7420 			     unsigned long val,
7421 			     void *data)
7422 {
7423 	switch (val) {
7424 	case DIE_OOPS:
7425 		if (ftrace_dump_on_oops)
7426 			ftrace_dump(ftrace_dump_on_oops);
7427 		break;
7428 	default:
7429 		break;
7430 	}
7431 	return NOTIFY_OK;
7432 }
7433 
7434 static struct notifier_block trace_die_notifier = {
7435 	.notifier_call = trace_die_handler,
7436 	.priority = 200
7437 };
7438 
7439 /*
7440  * printk is set to max of 1024, we really don't need it that big.
7441  * Nothing should be printing 1000 characters anyway.
7442  */
7443 #define TRACE_MAX_PRINT		1000
7444 
7445 /*
7446  * Define here KERN_TRACE so that we have one place to modify
7447  * it if we decide to change what log level the ftrace dump
7448  * should be at.
7449  */
7450 #define KERN_TRACE		KERN_EMERG
7451 
7452 void
7453 trace_printk_seq(struct trace_seq *s)
7454 {
7455 	/* Probably should print a warning here. */
7456 	if (s->seq.len >= TRACE_MAX_PRINT)
7457 		s->seq.len = TRACE_MAX_PRINT;
7458 
7459 	/*
7460 	 * More paranoid code. Although the buffer size is set to
7461 	 * PAGE_SIZE, and TRACE_MAX_PRINT is 1000, this is just
7462 	 * an extra layer of protection.
7463 	 */
7464 	if (WARN_ON_ONCE(s->seq.len >= s->seq.size))
7465 		s->seq.len = s->seq.size - 1;
7466 
7467 	/* should be zero ended, but we are paranoid. */
7468 	s->buffer[s->seq.len] = 0;
7469 
7470 	printk(KERN_TRACE "%s", s->buffer);
7471 
7472 	trace_seq_init(s);
7473 }
7474 
7475 void trace_init_global_iter(struct trace_iterator *iter)
7476 {
7477 	iter->tr = &global_trace;
7478 	iter->trace = iter->tr->current_trace;
7479 	iter->cpu_file = RING_BUFFER_ALL_CPUS;
7480 	iter->trace_buffer = &global_trace.trace_buffer;
7481 
7482 	if (iter->trace && iter->trace->open)
7483 		iter->trace->open(iter);
7484 
7485 	/* Annotate start of buffers if we had overruns */
7486 	if (ring_buffer_overruns(iter->trace_buffer->buffer))
7487 		iter->iter_flags |= TRACE_FILE_ANNOTATE;
7488 
7489 	/* Output in nanoseconds only if we are using a clock in nanoseconds. */
7490 	if (trace_clocks[iter->tr->clock_id].in_ns)
7491 		iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
7492 }
7493 
7494 void ftrace_dump(enum ftrace_dump_mode oops_dump_mode)
7495 {
7496 	/* use static because iter can be a bit big for the stack */
7497 	static struct trace_iterator iter;
7498 	static atomic_t dump_running;
7499 	struct trace_array *tr = &global_trace;
7500 	unsigned int old_userobj;
7501 	unsigned long flags;
7502 	int cnt = 0, cpu;
7503 
7504 	/* Only allow one dump user at a time. */
7505 	if (atomic_inc_return(&dump_running) != 1) {
7506 		atomic_dec(&dump_running);
7507 		return;
7508 	}
7509 
7510 	/*
7511 	 * Always turn off tracing when we dump.
7512 	 * We don't need to show trace output of what happens
7513 	 * between multiple crashes.
7514 	 *
7515 	 * If the user does a sysrq-z, then they can re-enable
7516 	 * tracing with echo 1 > tracing_on.
7517 	 */
7518 	tracing_off();
7519 
7520 	local_irq_save(flags);
7521 
7522 	/* Simulate the iterator */
7523 	trace_init_global_iter(&iter);
7524 
7525 	for_each_tracing_cpu(cpu) {
7526 		atomic_inc(&per_cpu_ptr(iter.trace_buffer->data, cpu)->disabled);
7527 	}
7528 
7529 	old_userobj = tr->trace_flags & TRACE_ITER_SYM_USEROBJ;
7530 
7531 	/* don't look at user memory in panic mode */
7532 	tr->trace_flags &= ~TRACE_ITER_SYM_USEROBJ;
7533 
7534 	switch (oops_dump_mode) {
7535 	case DUMP_ALL:
7536 		iter.cpu_file = RING_BUFFER_ALL_CPUS;
7537 		break;
7538 	case DUMP_ORIG:
7539 		iter.cpu_file = raw_smp_processor_id();
7540 		break;
7541 	case DUMP_NONE:
7542 		goto out_enable;
7543 	default:
7544 		printk(KERN_TRACE "Bad dumping mode, switching to all CPUs dump\n");
7545 		iter.cpu_file = RING_BUFFER_ALL_CPUS;
7546 	}
7547 
7548 	printk(KERN_TRACE "Dumping ftrace buffer:\n");
7549 
7550 	/* Did function tracer already get disabled? */
7551 	if (ftrace_is_dead()) {
7552 		printk("# WARNING: FUNCTION TRACING IS CORRUPTED\n");
7553 		printk("#          MAY BE MISSING FUNCTION EVENTS\n");
7554 	}
7555 
7556 	/*
7557 	 * We need to stop all tracing on all CPUS to read the
7558 	 * the next buffer. This is a bit expensive, but is
7559 	 * not done often. We fill all what we can read,
7560 	 * and then release the locks again.
7561 	 */
7562 
7563 	while (!trace_empty(&iter)) {
7564 
7565 		if (!cnt)
7566 			printk(KERN_TRACE "---------------------------------\n");
7567 
7568 		cnt++;
7569 
7570 		/* reset all but tr, trace, and overruns */
7571 		memset(&iter.seq, 0,
7572 		       sizeof(struct trace_iterator) -
7573 		       offsetof(struct trace_iterator, seq));
7574 		iter.iter_flags |= TRACE_FILE_LAT_FMT;
7575 		iter.pos = -1;
7576 
7577 		if (trace_find_next_entry_inc(&iter) != NULL) {
7578 			int ret;
7579 
7580 			ret = print_trace_line(&iter);
7581 			if (ret != TRACE_TYPE_NO_CONSUME)
7582 				trace_consume(&iter);
7583 		}
7584 		touch_nmi_watchdog();
7585 
7586 		trace_printk_seq(&iter.seq);
7587 	}
7588 
7589 	if (!cnt)
7590 		printk(KERN_TRACE "   (ftrace buffer empty)\n");
7591 	else
7592 		printk(KERN_TRACE "---------------------------------\n");
7593 
7594  out_enable:
7595 	tr->trace_flags |= old_userobj;
7596 
7597 	for_each_tracing_cpu(cpu) {
7598 		atomic_dec(&per_cpu_ptr(iter.trace_buffer->data, cpu)->disabled);
7599 	}
7600  	atomic_dec(&dump_running);
7601 	local_irq_restore(flags);
7602 }
7603 EXPORT_SYMBOL_GPL(ftrace_dump);
7604 
7605 __init static int tracer_alloc_buffers(void)
7606 {
7607 	int ring_buf_size;
7608 	int ret = -ENOMEM;
7609 
7610 	/*
7611 	 * Make sure we don't accidently add more trace options
7612 	 * than we have bits for.
7613 	 */
7614 	BUILD_BUG_ON(TRACE_ITER_LAST_BIT > TRACE_FLAGS_MAX_SIZE);
7615 
7616 	if (!alloc_cpumask_var(&tracing_buffer_mask, GFP_KERNEL))
7617 		goto out;
7618 
7619 	if (!alloc_cpumask_var(&global_trace.tracing_cpumask, GFP_KERNEL))
7620 		goto out_free_buffer_mask;
7621 
7622 	/* Only allocate trace_printk buffers if a trace_printk exists */
7623 	if (__stop___trace_bprintk_fmt != __start___trace_bprintk_fmt)
7624 		/* Must be called before global_trace.buffer is allocated */
7625 		trace_printk_init_buffers();
7626 
7627 	/* To save memory, keep the ring buffer size to its minimum */
7628 	if (ring_buffer_expanded)
7629 		ring_buf_size = trace_buf_size;
7630 	else
7631 		ring_buf_size = 1;
7632 
7633 	cpumask_copy(tracing_buffer_mask, cpu_possible_mask);
7634 	cpumask_copy(global_trace.tracing_cpumask, cpu_all_mask);
7635 
7636 	raw_spin_lock_init(&global_trace.start_lock);
7637 
7638 	/* Used for event triggers */
7639 	temp_buffer = ring_buffer_alloc(PAGE_SIZE, RB_FL_OVERWRITE);
7640 	if (!temp_buffer)
7641 		goto out_free_cpumask;
7642 
7643 	if (trace_create_savedcmd() < 0)
7644 		goto out_free_temp_buffer;
7645 
7646 	/* TODO: make the number of buffers hot pluggable with CPUS */
7647 	if (allocate_trace_buffers(&global_trace, ring_buf_size) < 0) {
7648 		printk(KERN_ERR "tracer: failed to allocate ring buffer!\n");
7649 		WARN_ON(1);
7650 		goto out_free_savedcmd;
7651 	}
7652 
7653 	if (global_trace.buffer_disabled)
7654 		tracing_off();
7655 
7656 	if (trace_boot_clock) {
7657 		ret = tracing_set_clock(&global_trace, trace_boot_clock);
7658 		if (ret < 0)
7659 			pr_warn("Trace clock %s not defined, going back to default\n",
7660 				trace_boot_clock);
7661 	}
7662 
7663 	/*
7664 	 * register_tracer() might reference current_trace, so it
7665 	 * needs to be set before we register anything. This is
7666 	 * just a bootstrap of current_trace anyway.
7667 	 */
7668 	global_trace.current_trace = &nop_trace;
7669 
7670 	global_trace.max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
7671 
7672 	ftrace_init_global_array_ops(&global_trace);
7673 
7674 	init_trace_flags_index(&global_trace);
7675 
7676 	register_tracer(&nop_trace);
7677 
7678 	/* All seems OK, enable tracing */
7679 	tracing_disabled = 0;
7680 
7681 	atomic_notifier_chain_register(&panic_notifier_list,
7682 				       &trace_panic_notifier);
7683 
7684 	register_die_notifier(&trace_die_notifier);
7685 
7686 	global_trace.flags = TRACE_ARRAY_FL_GLOBAL;
7687 
7688 	INIT_LIST_HEAD(&global_trace.systems);
7689 	INIT_LIST_HEAD(&global_trace.events);
7690 	list_add(&global_trace.list, &ftrace_trace_arrays);
7691 
7692 	apply_trace_boot_options();
7693 
7694 	register_snapshot_cmd();
7695 
7696 	return 0;
7697 
7698 out_free_savedcmd:
7699 	free_saved_cmdlines_buffer(savedcmd);
7700 out_free_temp_buffer:
7701 	ring_buffer_free(temp_buffer);
7702 out_free_cpumask:
7703 	free_cpumask_var(global_trace.tracing_cpumask);
7704 out_free_buffer_mask:
7705 	free_cpumask_var(tracing_buffer_mask);
7706 out:
7707 	return ret;
7708 }
7709 
7710 void __init trace_init(void)
7711 {
7712 	if (tracepoint_printk) {
7713 		tracepoint_print_iter =
7714 			kmalloc(sizeof(*tracepoint_print_iter), GFP_KERNEL);
7715 		if (WARN_ON(!tracepoint_print_iter))
7716 			tracepoint_printk = 0;
7717 	}
7718 	tracer_alloc_buffers();
7719 	trace_event_init();
7720 }
7721 
7722 __init static int clear_boot_tracer(void)
7723 {
7724 	/*
7725 	 * The default tracer at boot buffer is an init section.
7726 	 * This function is called in lateinit. If we did not
7727 	 * find the boot tracer, then clear it out, to prevent
7728 	 * later registration from accessing the buffer that is
7729 	 * about to be freed.
7730 	 */
7731 	if (!default_bootup_tracer)
7732 		return 0;
7733 
7734 	printk(KERN_INFO "ftrace bootup tracer '%s' not registered.\n",
7735 	       default_bootup_tracer);
7736 	default_bootup_tracer = NULL;
7737 
7738 	return 0;
7739 }
7740 
7741 fs_initcall(tracer_init_tracefs);
7742 late_initcall(clear_boot_tracer);
7743