xref: /linux/kernel/trace/trace.c (revision 148f9bb87745ed45f7a11b2cbd3bc0f017d5d257)
1 /*
2  * ring buffer based function tracer
3  *
4  * Copyright (C) 2007-2012 Steven Rostedt <srostedt@redhat.com>
5  * Copyright (C) 2008 Ingo Molnar <mingo@redhat.com>
6  *
7  * Originally taken from the RT patch by:
8  *    Arnaldo Carvalho de Melo <acme@redhat.com>
9  *
10  * Based on code from the latency_tracer, that is:
11  *  Copyright (C) 2004-2006 Ingo Molnar
12  *  Copyright (C) 2004 Nadia Yvette Chambers
13  */
14 #include <linux/ring_buffer.h>
15 #include <generated/utsrelease.h>
16 #include <linux/stacktrace.h>
17 #include <linux/writeback.h>
18 #include <linux/kallsyms.h>
19 #include <linux/seq_file.h>
20 #include <linux/notifier.h>
21 #include <linux/irqflags.h>
22 #include <linux/debugfs.h>
23 #include <linux/pagemap.h>
24 #include <linux/hardirq.h>
25 #include <linux/linkage.h>
26 #include <linux/uaccess.h>
27 #include <linux/kprobes.h>
28 #include <linux/ftrace.h>
29 #include <linux/module.h>
30 #include <linux/percpu.h>
31 #include <linux/splice.h>
32 #include <linux/kdebug.h>
33 #include <linux/string.h>
34 #include <linux/rwsem.h>
35 #include <linux/slab.h>
36 #include <linux/ctype.h>
37 #include <linux/init.h>
38 #include <linux/poll.h>
39 #include <linux/nmi.h>
40 #include <linux/fs.h>
41 #include <linux/sched/rt.h>
42 
43 #include "trace.h"
44 #include "trace_output.h"
45 
46 /*
47  * On boot up, the ring buffer is set to the minimum size, so that
48  * we do not waste memory on systems that are not using tracing.
49  */
50 bool ring_buffer_expanded;
51 
52 /*
53  * We need to change this state when a selftest is running.
54  * A selftest will lurk into the ring-buffer to count the
55  * entries inserted during the selftest although some concurrent
56  * insertions into the ring-buffer such as trace_printk could occurred
57  * at the same time, giving false positive or negative results.
58  */
59 static bool __read_mostly tracing_selftest_running;
60 
61 /*
62  * If a tracer is running, we do not want to run SELFTEST.
63  */
64 bool __read_mostly tracing_selftest_disabled;
65 
66 /* For tracers that don't implement custom flags */
67 static struct tracer_opt dummy_tracer_opt[] = {
68 	{ }
69 };
70 
71 static struct tracer_flags dummy_tracer_flags = {
72 	.val = 0,
73 	.opts = dummy_tracer_opt
74 };
75 
76 static int dummy_set_flag(u32 old_flags, u32 bit, int set)
77 {
78 	return 0;
79 }
80 
81 /*
82  * To prevent the comm cache from being overwritten when no
83  * tracing is active, only save the comm when a trace event
84  * occurred.
85  */
86 static DEFINE_PER_CPU(bool, trace_cmdline_save);
87 
88 /*
89  * Kill all tracing for good (never come back).
90  * It is initialized to 1 but will turn to zero if the initialization
91  * of the tracer is successful. But that is the only place that sets
92  * this back to zero.
93  */
94 static int tracing_disabled = 1;
95 
96 DEFINE_PER_CPU(int, ftrace_cpu_disabled);
97 
98 cpumask_var_t __read_mostly	tracing_buffer_mask;
99 
100 /*
101  * ftrace_dump_on_oops - variable to dump ftrace buffer on oops
102  *
103  * If there is an oops (or kernel panic) and the ftrace_dump_on_oops
104  * is set, then ftrace_dump is called. This will output the contents
105  * of the ftrace buffers to the console.  This is very useful for
106  * capturing traces that lead to crashes and outputing it to a
107  * serial console.
108  *
109  * It is default off, but you can enable it with either specifying
110  * "ftrace_dump_on_oops" in the kernel command line, or setting
111  * /proc/sys/kernel/ftrace_dump_on_oops
112  * Set 1 if you want to dump buffers of all CPUs
113  * Set 2 if you want to dump the buffer of the CPU that triggered oops
114  */
115 
116 enum ftrace_dump_mode ftrace_dump_on_oops;
117 
118 /* When set, tracing will stop when a WARN*() is hit */
119 int __disable_trace_on_warning;
120 
121 static int tracing_set_tracer(const char *buf);
122 
123 #define MAX_TRACER_SIZE		100
124 static char bootup_tracer_buf[MAX_TRACER_SIZE] __initdata;
125 static char *default_bootup_tracer;
126 
127 static bool allocate_snapshot;
128 
129 static int __init set_cmdline_ftrace(char *str)
130 {
131 	strlcpy(bootup_tracer_buf, str, MAX_TRACER_SIZE);
132 	default_bootup_tracer = bootup_tracer_buf;
133 	/* We are using ftrace early, expand it */
134 	ring_buffer_expanded = true;
135 	return 1;
136 }
137 __setup("ftrace=", set_cmdline_ftrace);
138 
139 static int __init set_ftrace_dump_on_oops(char *str)
140 {
141 	if (*str++ != '=' || !*str) {
142 		ftrace_dump_on_oops = DUMP_ALL;
143 		return 1;
144 	}
145 
146 	if (!strcmp("orig_cpu", str)) {
147 		ftrace_dump_on_oops = DUMP_ORIG;
148                 return 1;
149         }
150 
151         return 0;
152 }
153 __setup("ftrace_dump_on_oops", set_ftrace_dump_on_oops);
154 
155 static int __init stop_trace_on_warning(char *str)
156 {
157 	__disable_trace_on_warning = 1;
158 	return 1;
159 }
160 __setup("traceoff_on_warning=", stop_trace_on_warning);
161 
162 static int __init boot_alloc_snapshot(char *str)
163 {
164 	allocate_snapshot = true;
165 	/* We also need the main ring buffer expanded */
166 	ring_buffer_expanded = true;
167 	return 1;
168 }
169 __setup("alloc_snapshot", boot_alloc_snapshot);
170 
171 
172 static char trace_boot_options_buf[MAX_TRACER_SIZE] __initdata;
173 static char *trace_boot_options __initdata;
174 
175 static int __init set_trace_boot_options(char *str)
176 {
177 	strlcpy(trace_boot_options_buf, str, MAX_TRACER_SIZE);
178 	trace_boot_options = trace_boot_options_buf;
179 	return 0;
180 }
181 __setup("trace_options=", set_trace_boot_options);
182 
183 
184 unsigned long long ns2usecs(cycle_t nsec)
185 {
186 	nsec += 500;
187 	do_div(nsec, 1000);
188 	return nsec;
189 }
190 
191 /*
192  * The global_trace is the descriptor that holds the tracing
193  * buffers for the live tracing. For each CPU, it contains
194  * a link list of pages that will store trace entries. The
195  * page descriptor of the pages in the memory is used to hold
196  * the link list by linking the lru item in the page descriptor
197  * to each of the pages in the buffer per CPU.
198  *
199  * For each active CPU there is a data field that holds the
200  * pages for the buffer for that CPU. Each CPU has the same number
201  * of pages allocated for its buffer.
202  */
203 static struct trace_array	global_trace;
204 
205 LIST_HEAD(ftrace_trace_arrays);
206 
207 int trace_array_get(struct trace_array *this_tr)
208 {
209 	struct trace_array *tr;
210 	int ret = -ENODEV;
211 
212 	mutex_lock(&trace_types_lock);
213 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
214 		if (tr == this_tr) {
215 			tr->ref++;
216 			ret = 0;
217 			break;
218 		}
219 	}
220 	mutex_unlock(&trace_types_lock);
221 
222 	return ret;
223 }
224 
225 static void __trace_array_put(struct trace_array *this_tr)
226 {
227 	WARN_ON(!this_tr->ref);
228 	this_tr->ref--;
229 }
230 
231 void trace_array_put(struct trace_array *this_tr)
232 {
233 	mutex_lock(&trace_types_lock);
234 	__trace_array_put(this_tr);
235 	mutex_unlock(&trace_types_lock);
236 }
237 
238 int filter_current_check_discard(struct ring_buffer *buffer,
239 				 struct ftrace_event_call *call, void *rec,
240 				 struct ring_buffer_event *event)
241 {
242 	return filter_check_discard(call, rec, buffer, event);
243 }
244 EXPORT_SYMBOL_GPL(filter_current_check_discard);
245 
246 cycle_t ftrace_now(int cpu)
247 {
248 	u64 ts;
249 
250 	/* Early boot up does not have a buffer yet */
251 	if (!global_trace.trace_buffer.buffer)
252 		return trace_clock_local();
253 
254 	ts = ring_buffer_time_stamp(global_trace.trace_buffer.buffer, cpu);
255 	ring_buffer_normalize_time_stamp(global_trace.trace_buffer.buffer, cpu, &ts);
256 
257 	return ts;
258 }
259 
260 /**
261  * tracing_is_enabled - Show if global_trace has been disabled
262  *
263  * Shows if the global trace has been enabled or not. It uses the
264  * mirror flag "buffer_disabled" to be used in fast paths such as for
265  * the irqsoff tracer. But it may be inaccurate due to races. If you
266  * need to know the accurate state, use tracing_is_on() which is a little
267  * slower, but accurate.
268  */
269 int tracing_is_enabled(void)
270 {
271 	/*
272 	 * For quick access (irqsoff uses this in fast path), just
273 	 * return the mirror variable of the state of the ring buffer.
274 	 * It's a little racy, but we don't really care.
275 	 */
276 	smp_rmb();
277 	return !global_trace.buffer_disabled;
278 }
279 
280 /*
281  * trace_buf_size is the size in bytes that is allocated
282  * for a buffer. Note, the number of bytes is always rounded
283  * to page size.
284  *
285  * This number is purposely set to a low number of 16384.
286  * If the dump on oops happens, it will be much appreciated
287  * to not have to wait for all that output. Anyway this can be
288  * boot time and run time configurable.
289  */
290 #define TRACE_BUF_SIZE_DEFAULT	1441792UL /* 16384 * 88 (sizeof(entry)) */
291 
292 static unsigned long		trace_buf_size = TRACE_BUF_SIZE_DEFAULT;
293 
294 /* trace_types holds a link list of available tracers. */
295 static struct tracer		*trace_types __read_mostly;
296 
297 /*
298  * trace_types_lock is used to protect the trace_types list.
299  */
300 DEFINE_MUTEX(trace_types_lock);
301 
302 /*
303  * serialize the access of the ring buffer
304  *
305  * ring buffer serializes readers, but it is low level protection.
306  * The validity of the events (which returns by ring_buffer_peek() ..etc)
307  * are not protected by ring buffer.
308  *
309  * The content of events may become garbage if we allow other process consumes
310  * these events concurrently:
311  *   A) the page of the consumed events may become a normal page
312  *      (not reader page) in ring buffer, and this page will be rewrited
313  *      by events producer.
314  *   B) The page of the consumed events may become a page for splice_read,
315  *      and this page will be returned to system.
316  *
317  * These primitives allow multi process access to different cpu ring buffer
318  * concurrently.
319  *
320  * These primitives don't distinguish read-only and read-consume access.
321  * Multi read-only access are also serialized.
322  */
323 
324 #ifdef CONFIG_SMP
325 static DECLARE_RWSEM(all_cpu_access_lock);
326 static DEFINE_PER_CPU(struct mutex, cpu_access_lock);
327 
328 static inline void trace_access_lock(int cpu)
329 {
330 	if (cpu == RING_BUFFER_ALL_CPUS) {
331 		/* gain it for accessing the whole ring buffer. */
332 		down_write(&all_cpu_access_lock);
333 	} else {
334 		/* gain it for accessing a cpu ring buffer. */
335 
336 		/* Firstly block other trace_access_lock(RING_BUFFER_ALL_CPUS). */
337 		down_read(&all_cpu_access_lock);
338 
339 		/* Secondly block other access to this @cpu ring buffer. */
340 		mutex_lock(&per_cpu(cpu_access_lock, cpu));
341 	}
342 }
343 
344 static inline void trace_access_unlock(int cpu)
345 {
346 	if (cpu == RING_BUFFER_ALL_CPUS) {
347 		up_write(&all_cpu_access_lock);
348 	} else {
349 		mutex_unlock(&per_cpu(cpu_access_lock, cpu));
350 		up_read(&all_cpu_access_lock);
351 	}
352 }
353 
354 static inline void trace_access_lock_init(void)
355 {
356 	int cpu;
357 
358 	for_each_possible_cpu(cpu)
359 		mutex_init(&per_cpu(cpu_access_lock, cpu));
360 }
361 
362 #else
363 
364 static DEFINE_MUTEX(access_lock);
365 
366 static inline void trace_access_lock(int cpu)
367 {
368 	(void)cpu;
369 	mutex_lock(&access_lock);
370 }
371 
372 static inline void trace_access_unlock(int cpu)
373 {
374 	(void)cpu;
375 	mutex_unlock(&access_lock);
376 }
377 
378 static inline void trace_access_lock_init(void)
379 {
380 }
381 
382 #endif
383 
384 /* trace_flags holds trace_options default values */
385 unsigned long trace_flags = TRACE_ITER_PRINT_PARENT | TRACE_ITER_PRINTK |
386 	TRACE_ITER_ANNOTATE | TRACE_ITER_CONTEXT_INFO | TRACE_ITER_SLEEP_TIME |
387 	TRACE_ITER_GRAPH_TIME | TRACE_ITER_RECORD_CMD | TRACE_ITER_OVERWRITE |
388 	TRACE_ITER_IRQ_INFO | TRACE_ITER_MARKERS | TRACE_ITER_FUNCTION;
389 
390 static void tracer_tracing_on(struct trace_array *tr)
391 {
392 	if (tr->trace_buffer.buffer)
393 		ring_buffer_record_on(tr->trace_buffer.buffer);
394 	/*
395 	 * This flag is looked at when buffers haven't been allocated
396 	 * yet, or by some tracers (like irqsoff), that just want to
397 	 * know if the ring buffer has been disabled, but it can handle
398 	 * races of where it gets disabled but we still do a record.
399 	 * As the check is in the fast path of the tracers, it is more
400 	 * important to be fast than accurate.
401 	 */
402 	tr->buffer_disabled = 0;
403 	/* Make the flag seen by readers */
404 	smp_wmb();
405 }
406 
407 /**
408  * tracing_on - enable tracing buffers
409  *
410  * This function enables tracing buffers that may have been
411  * disabled with tracing_off.
412  */
413 void tracing_on(void)
414 {
415 	tracer_tracing_on(&global_trace);
416 }
417 EXPORT_SYMBOL_GPL(tracing_on);
418 
419 /**
420  * __trace_puts - write a constant string into the trace buffer.
421  * @ip:	   The address of the caller
422  * @str:   The constant string to write
423  * @size:  The size of the string.
424  */
425 int __trace_puts(unsigned long ip, const char *str, int size)
426 {
427 	struct ring_buffer_event *event;
428 	struct ring_buffer *buffer;
429 	struct print_entry *entry;
430 	unsigned long irq_flags;
431 	int alloc;
432 
433 	alloc = sizeof(*entry) + size + 2; /* possible \n added */
434 
435 	local_save_flags(irq_flags);
436 	buffer = global_trace.trace_buffer.buffer;
437 	event = trace_buffer_lock_reserve(buffer, TRACE_PRINT, alloc,
438 					  irq_flags, preempt_count());
439 	if (!event)
440 		return 0;
441 
442 	entry = ring_buffer_event_data(event);
443 	entry->ip = ip;
444 
445 	memcpy(&entry->buf, str, size);
446 
447 	/* Add a newline if necessary */
448 	if (entry->buf[size - 1] != '\n') {
449 		entry->buf[size] = '\n';
450 		entry->buf[size + 1] = '\0';
451 	} else
452 		entry->buf[size] = '\0';
453 
454 	__buffer_unlock_commit(buffer, event);
455 
456 	return size;
457 }
458 EXPORT_SYMBOL_GPL(__trace_puts);
459 
460 /**
461  * __trace_bputs - write the pointer to a constant string into trace buffer
462  * @ip:	   The address of the caller
463  * @str:   The constant string to write to the buffer to
464  */
465 int __trace_bputs(unsigned long ip, const char *str)
466 {
467 	struct ring_buffer_event *event;
468 	struct ring_buffer *buffer;
469 	struct bputs_entry *entry;
470 	unsigned long irq_flags;
471 	int size = sizeof(struct bputs_entry);
472 
473 	local_save_flags(irq_flags);
474 	buffer = global_trace.trace_buffer.buffer;
475 	event = trace_buffer_lock_reserve(buffer, TRACE_BPUTS, size,
476 					  irq_flags, preempt_count());
477 	if (!event)
478 		return 0;
479 
480 	entry = ring_buffer_event_data(event);
481 	entry->ip			= ip;
482 	entry->str			= str;
483 
484 	__buffer_unlock_commit(buffer, event);
485 
486 	return 1;
487 }
488 EXPORT_SYMBOL_GPL(__trace_bputs);
489 
490 #ifdef CONFIG_TRACER_SNAPSHOT
491 /**
492  * trace_snapshot - take a snapshot of the current buffer.
493  *
494  * This causes a swap between the snapshot buffer and the current live
495  * tracing buffer. You can use this to take snapshots of the live
496  * trace when some condition is triggered, but continue to trace.
497  *
498  * Note, make sure to allocate the snapshot with either
499  * a tracing_snapshot_alloc(), or by doing it manually
500  * with: echo 1 > /sys/kernel/debug/tracing/snapshot
501  *
502  * If the snapshot buffer is not allocated, it will stop tracing.
503  * Basically making a permanent snapshot.
504  */
505 void tracing_snapshot(void)
506 {
507 	struct trace_array *tr = &global_trace;
508 	struct tracer *tracer = tr->current_trace;
509 	unsigned long flags;
510 
511 	if (in_nmi()) {
512 		internal_trace_puts("*** SNAPSHOT CALLED FROM NMI CONTEXT ***\n");
513 		internal_trace_puts("*** snapshot is being ignored        ***\n");
514 		return;
515 	}
516 
517 	if (!tr->allocated_snapshot) {
518 		internal_trace_puts("*** SNAPSHOT NOT ALLOCATED ***\n");
519 		internal_trace_puts("*** stopping trace here!   ***\n");
520 		tracing_off();
521 		return;
522 	}
523 
524 	/* Note, snapshot can not be used when the tracer uses it */
525 	if (tracer->use_max_tr) {
526 		internal_trace_puts("*** LATENCY TRACER ACTIVE ***\n");
527 		internal_trace_puts("*** Can not use snapshot (sorry) ***\n");
528 		return;
529 	}
530 
531 	local_irq_save(flags);
532 	update_max_tr(tr, current, smp_processor_id());
533 	local_irq_restore(flags);
534 }
535 EXPORT_SYMBOL_GPL(tracing_snapshot);
536 
537 static int resize_buffer_duplicate_size(struct trace_buffer *trace_buf,
538 					struct trace_buffer *size_buf, int cpu_id);
539 static void set_buffer_entries(struct trace_buffer *buf, unsigned long val);
540 
541 static int alloc_snapshot(struct trace_array *tr)
542 {
543 	int ret;
544 
545 	if (!tr->allocated_snapshot) {
546 
547 		/* allocate spare buffer */
548 		ret = resize_buffer_duplicate_size(&tr->max_buffer,
549 				   &tr->trace_buffer, RING_BUFFER_ALL_CPUS);
550 		if (ret < 0)
551 			return ret;
552 
553 		tr->allocated_snapshot = true;
554 	}
555 
556 	return 0;
557 }
558 
559 void free_snapshot(struct trace_array *tr)
560 {
561 	/*
562 	 * We don't free the ring buffer. instead, resize it because
563 	 * The max_tr ring buffer has some state (e.g. ring->clock) and
564 	 * we want preserve it.
565 	 */
566 	ring_buffer_resize(tr->max_buffer.buffer, 1, RING_BUFFER_ALL_CPUS);
567 	set_buffer_entries(&tr->max_buffer, 1);
568 	tracing_reset_online_cpus(&tr->max_buffer);
569 	tr->allocated_snapshot = false;
570 }
571 
572 /**
573  * trace_snapshot_alloc - allocate and take a snapshot of the current buffer.
574  *
575  * This is similar to trace_snapshot(), but it will allocate the
576  * snapshot buffer if it isn't already allocated. Use this only
577  * where it is safe to sleep, as the allocation may sleep.
578  *
579  * This causes a swap between the snapshot buffer and the current live
580  * tracing buffer. You can use this to take snapshots of the live
581  * trace when some condition is triggered, but continue to trace.
582  */
583 void tracing_snapshot_alloc(void)
584 {
585 	struct trace_array *tr = &global_trace;
586 	int ret;
587 
588 	ret = alloc_snapshot(tr);
589 	if (WARN_ON(ret < 0))
590 		return;
591 
592 	tracing_snapshot();
593 }
594 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
595 #else
596 void tracing_snapshot(void)
597 {
598 	WARN_ONCE(1, "Snapshot feature not enabled, but internal snapshot used");
599 }
600 EXPORT_SYMBOL_GPL(tracing_snapshot);
601 void tracing_snapshot_alloc(void)
602 {
603 	/* Give warning */
604 	tracing_snapshot();
605 }
606 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
607 #endif /* CONFIG_TRACER_SNAPSHOT */
608 
609 static void tracer_tracing_off(struct trace_array *tr)
610 {
611 	if (tr->trace_buffer.buffer)
612 		ring_buffer_record_off(tr->trace_buffer.buffer);
613 	/*
614 	 * This flag is looked at when buffers haven't been allocated
615 	 * yet, or by some tracers (like irqsoff), that just want to
616 	 * know if the ring buffer has been disabled, but it can handle
617 	 * races of where it gets disabled but we still do a record.
618 	 * As the check is in the fast path of the tracers, it is more
619 	 * important to be fast than accurate.
620 	 */
621 	tr->buffer_disabled = 1;
622 	/* Make the flag seen by readers */
623 	smp_wmb();
624 }
625 
626 /**
627  * tracing_off - turn off tracing buffers
628  *
629  * This function stops the tracing buffers from recording data.
630  * It does not disable any overhead the tracers themselves may
631  * be causing. This function simply causes all recording to
632  * the ring buffers to fail.
633  */
634 void tracing_off(void)
635 {
636 	tracer_tracing_off(&global_trace);
637 }
638 EXPORT_SYMBOL_GPL(tracing_off);
639 
640 void disable_trace_on_warning(void)
641 {
642 	if (__disable_trace_on_warning)
643 		tracing_off();
644 }
645 
646 /**
647  * tracer_tracing_is_on - show real state of ring buffer enabled
648  * @tr : the trace array to know if ring buffer is enabled
649  *
650  * Shows real state of the ring buffer if it is enabled or not.
651  */
652 static int tracer_tracing_is_on(struct trace_array *tr)
653 {
654 	if (tr->trace_buffer.buffer)
655 		return ring_buffer_record_is_on(tr->trace_buffer.buffer);
656 	return !tr->buffer_disabled;
657 }
658 
659 /**
660  * tracing_is_on - show state of ring buffers enabled
661  */
662 int tracing_is_on(void)
663 {
664 	return tracer_tracing_is_on(&global_trace);
665 }
666 EXPORT_SYMBOL_GPL(tracing_is_on);
667 
668 static int __init set_buf_size(char *str)
669 {
670 	unsigned long buf_size;
671 
672 	if (!str)
673 		return 0;
674 	buf_size = memparse(str, &str);
675 	/* nr_entries can not be zero */
676 	if (buf_size == 0)
677 		return 0;
678 	trace_buf_size = buf_size;
679 	return 1;
680 }
681 __setup("trace_buf_size=", set_buf_size);
682 
683 static int __init set_tracing_thresh(char *str)
684 {
685 	unsigned long threshold;
686 	int ret;
687 
688 	if (!str)
689 		return 0;
690 	ret = kstrtoul(str, 0, &threshold);
691 	if (ret < 0)
692 		return 0;
693 	tracing_thresh = threshold * 1000;
694 	return 1;
695 }
696 __setup("tracing_thresh=", set_tracing_thresh);
697 
698 unsigned long nsecs_to_usecs(unsigned long nsecs)
699 {
700 	return nsecs / 1000;
701 }
702 
703 /* These must match the bit postions in trace_iterator_flags */
704 static const char *trace_options[] = {
705 	"print-parent",
706 	"sym-offset",
707 	"sym-addr",
708 	"verbose",
709 	"raw",
710 	"hex",
711 	"bin",
712 	"block",
713 	"stacktrace",
714 	"trace_printk",
715 	"ftrace_preempt",
716 	"branch",
717 	"annotate",
718 	"userstacktrace",
719 	"sym-userobj",
720 	"printk-msg-only",
721 	"context-info",
722 	"latency-format",
723 	"sleep-time",
724 	"graph-time",
725 	"record-cmd",
726 	"overwrite",
727 	"disable_on_free",
728 	"irq-info",
729 	"markers",
730 	"function-trace",
731 	NULL
732 };
733 
734 static struct {
735 	u64 (*func)(void);
736 	const char *name;
737 	int in_ns;		/* is this clock in nanoseconds? */
738 } trace_clocks[] = {
739 	{ trace_clock_local,	"local",	1 },
740 	{ trace_clock_global,	"global",	1 },
741 	{ trace_clock_counter,	"counter",	0 },
742 	{ trace_clock_jiffies,	"uptime",	1 },
743 	{ trace_clock,		"perf",		1 },
744 	ARCH_TRACE_CLOCKS
745 };
746 
747 /*
748  * trace_parser_get_init - gets the buffer for trace parser
749  */
750 int trace_parser_get_init(struct trace_parser *parser, int size)
751 {
752 	memset(parser, 0, sizeof(*parser));
753 
754 	parser->buffer = kmalloc(size, GFP_KERNEL);
755 	if (!parser->buffer)
756 		return 1;
757 
758 	parser->size = size;
759 	return 0;
760 }
761 
762 /*
763  * trace_parser_put - frees the buffer for trace parser
764  */
765 void trace_parser_put(struct trace_parser *parser)
766 {
767 	kfree(parser->buffer);
768 }
769 
770 /*
771  * trace_get_user - reads the user input string separated by  space
772  * (matched by isspace(ch))
773  *
774  * For each string found the 'struct trace_parser' is updated,
775  * and the function returns.
776  *
777  * Returns number of bytes read.
778  *
779  * See kernel/trace/trace.h for 'struct trace_parser' details.
780  */
781 int trace_get_user(struct trace_parser *parser, const char __user *ubuf,
782 	size_t cnt, loff_t *ppos)
783 {
784 	char ch;
785 	size_t read = 0;
786 	ssize_t ret;
787 
788 	if (!*ppos)
789 		trace_parser_clear(parser);
790 
791 	ret = get_user(ch, ubuf++);
792 	if (ret)
793 		goto out;
794 
795 	read++;
796 	cnt--;
797 
798 	/*
799 	 * The parser is not finished with the last write,
800 	 * continue reading the user input without skipping spaces.
801 	 */
802 	if (!parser->cont) {
803 		/* skip white space */
804 		while (cnt && isspace(ch)) {
805 			ret = get_user(ch, ubuf++);
806 			if (ret)
807 				goto out;
808 			read++;
809 			cnt--;
810 		}
811 
812 		/* only spaces were written */
813 		if (isspace(ch)) {
814 			*ppos += read;
815 			ret = read;
816 			goto out;
817 		}
818 
819 		parser->idx = 0;
820 	}
821 
822 	/* read the non-space input */
823 	while (cnt && !isspace(ch)) {
824 		if (parser->idx < parser->size - 1)
825 			parser->buffer[parser->idx++] = ch;
826 		else {
827 			ret = -EINVAL;
828 			goto out;
829 		}
830 		ret = get_user(ch, ubuf++);
831 		if (ret)
832 			goto out;
833 		read++;
834 		cnt--;
835 	}
836 
837 	/* We either got finished input or we have to wait for another call. */
838 	if (isspace(ch)) {
839 		parser->buffer[parser->idx] = 0;
840 		parser->cont = false;
841 	} else {
842 		parser->cont = true;
843 		parser->buffer[parser->idx++] = ch;
844 	}
845 
846 	*ppos += read;
847 	ret = read;
848 
849 out:
850 	return ret;
851 }
852 
853 ssize_t trace_seq_to_user(struct trace_seq *s, char __user *ubuf, size_t cnt)
854 {
855 	int len;
856 	int ret;
857 
858 	if (!cnt)
859 		return 0;
860 
861 	if (s->len <= s->readpos)
862 		return -EBUSY;
863 
864 	len = s->len - s->readpos;
865 	if (cnt > len)
866 		cnt = len;
867 	ret = copy_to_user(ubuf, s->buffer + s->readpos, cnt);
868 	if (ret == cnt)
869 		return -EFAULT;
870 
871 	cnt -= ret;
872 
873 	s->readpos += cnt;
874 	return cnt;
875 }
876 
877 static ssize_t trace_seq_to_buffer(struct trace_seq *s, void *buf, size_t cnt)
878 {
879 	int len;
880 
881 	if (s->len <= s->readpos)
882 		return -EBUSY;
883 
884 	len = s->len - s->readpos;
885 	if (cnt > len)
886 		cnt = len;
887 	memcpy(buf, s->buffer + s->readpos, cnt);
888 
889 	s->readpos += cnt;
890 	return cnt;
891 }
892 
893 /*
894  * ftrace_max_lock is used to protect the swapping of buffers
895  * when taking a max snapshot. The buffers themselves are
896  * protected by per_cpu spinlocks. But the action of the swap
897  * needs its own lock.
898  *
899  * This is defined as a arch_spinlock_t in order to help
900  * with performance when lockdep debugging is enabled.
901  *
902  * It is also used in other places outside the update_max_tr
903  * so it needs to be defined outside of the
904  * CONFIG_TRACER_MAX_TRACE.
905  */
906 static arch_spinlock_t ftrace_max_lock =
907 	(arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
908 
909 unsigned long __read_mostly	tracing_thresh;
910 
911 #ifdef CONFIG_TRACER_MAX_TRACE
912 unsigned long __read_mostly	tracing_max_latency;
913 
914 /*
915  * Copy the new maximum trace into the separate maximum-trace
916  * structure. (this way the maximum trace is permanently saved,
917  * for later retrieval via /sys/kernel/debug/tracing/latency_trace)
918  */
919 static void
920 __update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
921 {
922 	struct trace_buffer *trace_buf = &tr->trace_buffer;
923 	struct trace_buffer *max_buf = &tr->max_buffer;
924 	struct trace_array_cpu *data = per_cpu_ptr(trace_buf->data, cpu);
925 	struct trace_array_cpu *max_data = per_cpu_ptr(max_buf->data, cpu);
926 
927 	max_buf->cpu = cpu;
928 	max_buf->time_start = data->preempt_timestamp;
929 
930 	max_data->saved_latency = tracing_max_latency;
931 	max_data->critical_start = data->critical_start;
932 	max_data->critical_end = data->critical_end;
933 
934 	memcpy(max_data->comm, tsk->comm, TASK_COMM_LEN);
935 	max_data->pid = tsk->pid;
936 	/*
937 	 * If tsk == current, then use current_uid(), as that does not use
938 	 * RCU. The irq tracer can be called out of RCU scope.
939 	 */
940 	if (tsk == current)
941 		max_data->uid = current_uid();
942 	else
943 		max_data->uid = task_uid(tsk);
944 
945 	max_data->nice = tsk->static_prio - 20 - MAX_RT_PRIO;
946 	max_data->policy = tsk->policy;
947 	max_data->rt_priority = tsk->rt_priority;
948 
949 	/* record this tasks comm */
950 	tracing_record_cmdline(tsk);
951 }
952 
953 /**
954  * update_max_tr - snapshot all trace buffers from global_trace to max_tr
955  * @tr: tracer
956  * @tsk: the task with the latency
957  * @cpu: The cpu that initiated the trace.
958  *
959  * Flip the buffers between the @tr and the max_tr and record information
960  * about which task was the cause of this latency.
961  */
962 void
963 update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
964 {
965 	struct ring_buffer *buf;
966 
967 	if (tr->stop_count)
968 		return;
969 
970 	WARN_ON_ONCE(!irqs_disabled());
971 
972 	if (!tr->allocated_snapshot) {
973 		/* Only the nop tracer should hit this when disabling */
974 		WARN_ON_ONCE(tr->current_trace != &nop_trace);
975 		return;
976 	}
977 
978 	arch_spin_lock(&ftrace_max_lock);
979 
980 	buf = tr->trace_buffer.buffer;
981 	tr->trace_buffer.buffer = tr->max_buffer.buffer;
982 	tr->max_buffer.buffer = buf;
983 
984 	__update_max_tr(tr, tsk, cpu);
985 	arch_spin_unlock(&ftrace_max_lock);
986 }
987 
988 /**
989  * update_max_tr_single - only copy one trace over, and reset the rest
990  * @tr - tracer
991  * @tsk - task with the latency
992  * @cpu - the cpu of the buffer to copy.
993  *
994  * Flip the trace of a single CPU buffer between the @tr and the max_tr.
995  */
996 void
997 update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu)
998 {
999 	int ret;
1000 
1001 	if (tr->stop_count)
1002 		return;
1003 
1004 	WARN_ON_ONCE(!irqs_disabled());
1005 	if (!tr->allocated_snapshot) {
1006 		/* Only the nop tracer should hit this when disabling */
1007 		WARN_ON_ONCE(tr->current_trace != &nop_trace);
1008 		return;
1009 	}
1010 
1011 	arch_spin_lock(&ftrace_max_lock);
1012 
1013 	ret = ring_buffer_swap_cpu(tr->max_buffer.buffer, tr->trace_buffer.buffer, cpu);
1014 
1015 	if (ret == -EBUSY) {
1016 		/*
1017 		 * We failed to swap the buffer due to a commit taking
1018 		 * place on this CPU. We fail to record, but we reset
1019 		 * the max trace buffer (no one writes directly to it)
1020 		 * and flag that it failed.
1021 		 */
1022 		trace_array_printk_buf(tr->max_buffer.buffer, _THIS_IP_,
1023 			"Failed to swap buffers due to commit in progress\n");
1024 	}
1025 
1026 	WARN_ON_ONCE(ret && ret != -EAGAIN && ret != -EBUSY);
1027 
1028 	__update_max_tr(tr, tsk, cpu);
1029 	arch_spin_unlock(&ftrace_max_lock);
1030 }
1031 #endif /* CONFIG_TRACER_MAX_TRACE */
1032 
1033 static void default_wait_pipe(struct trace_iterator *iter)
1034 {
1035 	/* Iterators are static, they should be filled or empty */
1036 	if (trace_buffer_iter(iter, iter->cpu_file))
1037 		return;
1038 
1039 	ring_buffer_wait(iter->trace_buffer->buffer, iter->cpu_file);
1040 }
1041 
1042 #ifdef CONFIG_FTRACE_STARTUP_TEST
1043 static int run_tracer_selftest(struct tracer *type)
1044 {
1045 	struct trace_array *tr = &global_trace;
1046 	struct tracer *saved_tracer = tr->current_trace;
1047 	int ret;
1048 
1049 	if (!type->selftest || tracing_selftest_disabled)
1050 		return 0;
1051 
1052 	/*
1053 	 * Run a selftest on this tracer.
1054 	 * Here we reset the trace buffer, and set the current
1055 	 * tracer to be this tracer. The tracer can then run some
1056 	 * internal tracing to verify that everything is in order.
1057 	 * If we fail, we do not register this tracer.
1058 	 */
1059 	tracing_reset_online_cpus(&tr->trace_buffer);
1060 
1061 	tr->current_trace = type;
1062 
1063 #ifdef CONFIG_TRACER_MAX_TRACE
1064 	if (type->use_max_tr) {
1065 		/* If we expanded the buffers, make sure the max is expanded too */
1066 		if (ring_buffer_expanded)
1067 			ring_buffer_resize(tr->max_buffer.buffer, trace_buf_size,
1068 					   RING_BUFFER_ALL_CPUS);
1069 		tr->allocated_snapshot = true;
1070 	}
1071 #endif
1072 
1073 	/* the test is responsible for initializing and enabling */
1074 	pr_info("Testing tracer %s: ", type->name);
1075 	ret = type->selftest(type, tr);
1076 	/* the test is responsible for resetting too */
1077 	tr->current_trace = saved_tracer;
1078 	if (ret) {
1079 		printk(KERN_CONT "FAILED!\n");
1080 		/* Add the warning after printing 'FAILED' */
1081 		WARN_ON(1);
1082 		return -1;
1083 	}
1084 	/* Only reset on passing, to avoid touching corrupted buffers */
1085 	tracing_reset_online_cpus(&tr->trace_buffer);
1086 
1087 #ifdef CONFIG_TRACER_MAX_TRACE
1088 	if (type->use_max_tr) {
1089 		tr->allocated_snapshot = false;
1090 
1091 		/* Shrink the max buffer again */
1092 		if (ring_buffer_expanded)
1093 			ring_buffer_resize(tr->max_buffer.buffer, 1,
1094 					   RING_BUFFER_ALL_CPUS);
1095 	}
1096 #endif
1097 
1098 	printk(KERN_CONT "PASSED\n");
1099 	return 0;
1100 }
1101 #else
1102 static inline int run_tracer_selftest(struct tracer *type)
1103 {
1104 	return 0;
1105 }
1106 #endif /* CONFIG_FTRACE_STARTUP_TEST */
1107 
1108 /**
1109  * register_tracer - register a tracer with the ftrace system.
1110  * @type - the plugin for the tracer
1111  *
1112  * Register a new plugin tracer.
1113  */
1114 int register_tracer(struct tracer *type)
1115 {
1116 	struct tracer *t;
1117 	int ret = 0;
1118 
1119 	if (!type->name) {
1120 		pr_info("Tracer must have a name\n");
1121 		return -1;
1122 	}
1123 
1124 	if (strlen(type->name) >= MAX_TRACER_SIZE) {
1125 		pr_info("Tracer has a name longer than %d\n", MAX_TRACER_SIZE);
1126 		return -1;
1127 	}
1128 
1129 	mutex_lock(&trace_types_lock);
1130 
1131 	tracing_selftest_running = true;
1132 
1133 	for (t = trace_types; t; t = t->next) {
1134 		if (strcmp(type->name, t->name) == 0) {
1135 			/* already found */
1136 			pr_info("Tracer %s already registered\n",
1137 				type->name);
1138 			ret = -1;
1139 			goto out;
1140 		}
1141 	}
1142 
1143 	if (!type->set_flag)
1144 		type->set_flag = &dummy_set_flag;
1145 	if (!type->flags)
1146 		type->flags = &dummy_tracer_flags;
1147 	else
1148 		if (!type->flags->opts)
1149 			type->flags->opts = dummy_tracer_opt;
1150 	if (!type->wait_pipe)
1151 		type->wait_pipe = default_wait_pipe;
1152 
1153 	ret = run_tracer_selftest(type);
1154 	if (ret < 0)
1155 		goto out;
1156 
1157 	type->next = trace_types;
1158 	trace_types = type;
1159 
1160  out:
1161 	tracing_selftest_running = false;
1162 	mutex_unlock(&trace_types_lock);
1163 
1164 	if (ret || !default_bootup_tracer)
1165 		goto out_unlock;
1166 
1167 	if (strncmp(default_bootup_tracer, type->name, MAX_TRACER_SIZE))
1168 		goto out_unlock;
1169 
1170 	printk(KERN_INFO "Starting tracer '%s'\n", type->name);
1171 	/* Do we want this tracer to start on bootup? */
1172 	tracing_set_tracer(type->name);
1173 	default_bootup_tracer = NULL;
1174 	/* disable other selftests, since this will break it. */
1175 	tracing_selftest_disabled = true;
1176 #ifdef CONFIG_FTRACE_STARTUP_TEST
1177 	printk(KERN_INFO "Disabling FTRACE selftests due to running tracer '%s'\n",
1178 	       type->name);
1179 #endif
1180 
1181  out_unlock:
1182 	return ret;
1183 }
1184 
1185 void tracing_reset(struct trace_buffer *buf, int cpu)
1186 {
1187 	struct ring_buffer *buffer = buf->buffer;
1188 
1189 	if (!buffer)
1190 		return;
1191 
1192 	ring_buffer_record_disable(buffer);
1193 
1194 	/* Make sure all commits have finished */
1195 	synchronize_sched();
1196 	ring_buffer_reset_cpu(buffer, cpu);
1197 
1198 	ring_buffer_record_enable(buffer);
1199 }
1200 
1201 void tracing_reset_online_cpus(struct trace_buffer *buf)
1202 {
1203 	struct ring_buffer *buffer = buf->buffer;
1204 	int cpu;
1205 
1206 	if (!buffer)
1207 		return;
1208 
1209 	ring_buffer_record_disable(buffer);
1210 
1211 	/* Make sure all commits have finished */
1212 	synchronize_sched();
1213 
1214 	buf->time_start = ftrace_now(buf->cpu);
1215 
1216 	for_each_online_cpu(cpu)
1217 		ring_buffer_reset_cpu(buffer, cpu);
1218 
1219 	ring_buffer_record_enable(buffer);
1220 }
1221 
1222 void tracing_reset_current(int cpu)
1223 {
1224 	tracing_reset(&global_trace.trace_buffer, cpu);
1225 }
1226 
1227 void tracing_reset_all_online_cpus(void)
1228 {
1229 	struct trace_array *tr;
1230 
1231 	mutex_lock(&trace_types_lock);
1232 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
1233 		tracing_reset_online_cpus(&tr->trace_buffer);
1234 #ifdef CONFIG_TRACER_MAX_TRACE
1235 		tracing_reset_online_cpus(&tr->max_buffer);
1236 #endif
1237 	}
1238 	mutex_unlock(&trace_types_lock);
1239 }
1240 
1241 #define SAVED_CMDLINES 128
1242 #define NO_CMDLINE_MAP UINT_MAX
1243 static unsigned map_pid_to_cmdline[PID_MAX_DEFAULT+1];
1244 static unsigned map_cmdline_to_pid[SAVED_CMDLINES];
1245 static char saved_cmdlines[SAVED_CMDLINES][TASK_COMM_LEN];
1246 static int cmdline_idx;
1247 static arch_spinlock_t trace_cmdline_lock = __ARCH_SPIN_LOCK_UNLOCKED;
1248 
1249 /* temporary disable recording */
1250 static atomic_t trace_record_cmdline_disabled __read_mostly;
1251 
1252 static void trace_init_cmdlines(void)
1253 {
1254 	memset(&map_pid_to_cmdline, NO_CMDLINE_MAP, sizeof(map_pid_to_cmdline));
1255 	memset(&map_cmdline_to_pid, NO_CMDLINE_MAP, sizeof(map_cmdline_to_pid));
1256 	cmdline_idx = 0;
1257 }
1258 
1259 int is_tracing_stopped(void)
1260 {
1261 	return global_trace.stop_count;
1262 }
1263 
1264 /**
1265  * ftrace_off_permanent - disable all ftrace code permanently
1266  *
1267  * This should only be called when a serious anomally has
1268  * been detected.  This will turn off the function tracing,
1269  * ring buffers, and other tracing utilites. It takes no
1270  * locks and can be called from any context.
1271  */
1272 void ftrace_off_permanent(void)
1273 {
1274 	tracing_disabled = 1;
1275 	ftrace_stop();
1276 	tracing_off_permanent();
1277 }
1278 
1279 /**
1280  * tracing_start - quick start of the tracer
1281  *
1282  * If tracing is enabled but was stopped by tracing_stop,
1283  * this will start the tracer back up.
1284  */
1285 void tracing_start(void)
1286 {
1287 	struct ring_buffer *buffer;
1288 	unsigned long flags;
1289 
1290 	if (tracing_disabled)
1291 		return;
1292 
1293 	raw_spin_lock_irqsave(&global_trace.start_lock, flags);
1294 	if (--global_trace.stop_count) {
1295 		if (global_trace.stop_count < 0) {
1296 			/* Someone screwed up their debugging */
1297 			WARN_ON_ONCE(1);
1298 			global_trace.stop_count = 0;
1299 		}
1300 		goto out;
1301 	}
1302 
1303 	/* Prevent the buffers from switching */
1304 	arch_spin_lock(&ftrace_max_lock);
1305 
1306 	buffer = global_trace.trace_buffer.buffer;
1307 	if (buffer)
1308 		ring_buffer_record_enable(buffer);
1309 
1310 #ifdef CONFIG_TRACER_MAX_TRACE
1311 	buffer = global_trace.max_buffer.buffer;
1312 	if (buffer)
1313 		ring_buffer_record_enable(buffer);
1314 #endif
1315 
1316 	arch_spin_unlock(&ftrace_max_lock);
1317 
1318 	ftrace_start();
1319  out:
1320 	raw_spin_unlock_irqrestore(&global_trace.start_lock, flags);
1321 }
1322 
1323 static void tracing_start_tr(struct trace_array *tr)
1324 {
1325 	struct ring_buffer *buffer;
1326 	unsigned long flags;
1327 
1328 	if (tracing_disabled)
1329 		return;
1330 
1331 	/* If global, we need to also start the max tracer */
1332 	if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
1333 		return tracing_start();
1334 
1335 	raw_spin_lock_irqsave(&tr->start_lock, flags);
1336 
1337 	if (--tr->stop_count) {
1338 		if (tr->stop_count < 0) {
1339 			/* Someone screwed up their debugging */
1340 			WARN_ON_ONCE(1);
1341 			tr->stop_count = 0;
1342 		}
1343 		goto out;
1344 	}
1345 
1346 	buffer = tr->trace_buffer.buffer;
1347 	if (buffer)
1348 		ring_buffer_record_enable(buffer);
1349 
1350  out:
1351 	raw_spin_unlock_irqrestore(&tr->start_lock, flags);
1352 }
1353 
1354 /**
1355  * tracing_stop - quick stop of the tracer
1356  *
1357  * Light weight way to stop tracing. Use in conjunction with
1358  * tracing_start.
1359  */
1360 void tracing_stop(void)
1361 {
1362 	struct ring_buffer *buffer;
1363 	unsigned long flags;
1364 
1365 	ftrace_stop();
1366 	raw_spin_lock_irqsave(&global_trace.start_lock, flags);
1367 	if (global_trace.stop_count++)
1368 		goto out;
1369 
1370 	/* Prevent the buffers from switching */
1371 	arch_spin_lock(&ftrace_max_lock);
1372 
1373 	buffer = global_trace.trace_buffer.buffer;
1374 	if (buffer)
1375 		ring_buffer_record_disable(buffer);
1376 
1377 #ifdef CONFIG_TRACER_MAX_TRACE
1378 	buffer = global_trace.max_buffer.buffer;
1379 	if (buffer)
1380 		ring_buffer_record_disable(buffer);
1381 #endif
1382 
1383 	arch_spin_unlock(&ftrace_max_lock);
1384 
1385  out:
1386 	raw_spin_unlock_irqrestore(&global_trace.start_lock, flags);
1387 }
1388 
1389 static void tracing_stop_tr(struct trace_array *tr)
1390 {
1391 	struct ring_buffer *buffer;
1392 	unsigned long flags;
1393 
1394 	/* If global, we need to also stop the max tracer */
1395 	if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
1396 		return tracing_stop();
1397 
1398 	raw_spin_lock_irqsave(&tr->start_lock, flags);
1399 	if (tr->stop_count++)
1400 		goto out;
1401 
1402 	buffer = tr->trace_buffer.buffer;
1403 	if (buffer)
1404 		ring_buffer_record_disable(buffer);
1405 
1406  out:
1407 	raw_spin_unlock_irqrestore(&tr->start_lock, flags);
1408 }
1409 
1410 void trace_stop_cmdline_recording(void);
1411 
1412 static void trace_save_cmdline(struct task_struct *tsk)
1413 {
1414 	unsigned pid, idx;
1415 
1416 	if (!tsk->pid || unlikely(tsk->pid > PID_MAX_DEFAULT))
1417 		return;
1418 
1419 	/*
1420 	 * It's not the end of the world if we don't get
1421 	 * the lock, but we also don't want to spin
1422 	 * nor do we want to disable interrupts,
1423 	 * so if we miss here, then better luck next time.
1424 	 */
1425 	if (!arch_spin_trylock(&trace_cmdline_lock))
1426 		return;
1427 
1428 	idx = map_pid_to_cmdline[tsk->pid];
1429 	if (idx == NO_CMDLINE_MAP) {
1430 		idx = (cmdline_idx + 1) % SAVED_CMDLINES;
1431 
1432 		/*
1433 		 * Check whether the cmdline buffer at idx has a pid
1434 		 * mapped. We are going to overwrite that entry so we
1435 		 * need to clear the map_pid_to_cmdline. Otherwise we
1436 		 * would read the new comm for the old pid.
1437 		 */
1438 		pid = map_cmdline_to_pid[idx];
1439 		if (pid != NO_CMDLINE_MAP)
1440 			map_pid_to_cmdline[pid] = NO_CMDLINE_MAP;
1441 
1442 		map_cmdline_to_pid[idx] = tsk->pid;
1443 		map_pid_to_cmdline[tsk->pid] = idx;
1444 
1445 		cmdline_idx = idx;
1446 	}
1447 
1448 	memcpy(&saved_cmdlines[idx], tsk->comm, TASK_COMM_LEN);
1449 
1450 	arch_spin_unlock(&trace_cmdline_lock);
1451 }
1452 
1453 void trace_find_cmdline(int pid, char comm[])
1454 {
1455 	unsigned map;
1456 
1457 	if (!pid) {
1458 		strcpy(comm, "<idle>");
1459 		return;
1460 	}
1461 
1462 	if (WARN_ON_ONCE(pid < 0)) {
1463 		strcpy(comm, "<XXX>");
1464 		return;
1465 	}
1466 
1467 	if (pid > PID_MAX_DEFAULT) {
1468 		strcpy(comm, "<...>");
1469 		return;
1470 	}
1471 
1472 	preempt_disable();
1473 	arch_spin_lock(&trace_cmdline_lock);
1474 	map = map_pid_to_cmdline[pid];
1475 	if (map != NO_CMDLINE_MAP)
1476 		strcpy(comm, saved_cmdlines[map]);
1477 	else
1478 		strcpy(comm, "<...>");
1479 
1480 	arch_spin_unlock(&trace_cmdline_lock);
1481 	preempt_enable();
1482 }
1483 
1484 void tracing_record_cmdline(struct task_struct *tsk)
1485 {
1486 	if (atomic_read(&trace_record_cmdline_disabled) || !tracing_is_on())
1487 		return;
1488 
1489 	if (!__this_cpu_read(trace_cmdline_save))
1490 		return;
1491 
1492 	__this_cpu_write(trace_cmdline_save, false);
1493 
1494 	trace_save_cmdline(tsk);
1495 }
1496 
1497 void
1498 tracing_generic_entry_update(struct trace_entry *entry, unsigned long flags,
1499 			     int pc)
1500 {
1501 	struct task_struct *tsk = current;
1502 
1503 	entry->preempt_count		= pc & 0xff;
1504 	entry->pid			= (tsk) ? tsk->pid : 0;
1505 	entry->flags =
1506 #ifdef CONFIG_TRACE_IRQFLAGS_SUPPORT
1507 		(irqs_disabled_flags(flags) ? TRACE_FLAG_IRQS_OFF : 0) |
1508 #else
1509 		TRACE_FLAG_IRQS_NOSUPPORT |
1510 #endif
1511 		((pc & HARDIRQ_MASK) ? TRACE_FLAG_HARDIRQ : 0) |
1512 		((pc & SOFTIRQ_MASK) ? TRACE_FLAG_SOFTIRQ : 0) |
1513 		(need_resched() ? TRACE_FLAG_NEED_RESCHED : 0);
1514 }
1515 EXPORT_SYMBOL_GPL(tracing_generic_entry_update);
1516 
1517 struct ring_buffer_event *
1518 trace_buffer_lock_reserve(struct ring_buffer *buffer,
1519 			  int type,
1520 			  unsigned long len,
1521 			  unsigned long flags, int pc)
1522 {
1523 	struct ring_buffer_event *event;
1524 
1525 	event = ring_buffer_lock_reserve(buffer, len);
1526 	if (event != NULL) {
1527 		struct trace_entry *ent = ring_buffer_event_data(event);
1528 
1529 		tracing_generic_entry_update(ent, flags, pc);
1530 		ent->type = type;
1531 	}
1532 
1533 	return event;
1534 }
1535 
1536 void
1537 __buffer_unlock_commit(struct ring_buffer *buffer, struct ring_buffer_event *event)
1538 {
1539 	__this_cpu_write(trace_cmdline_save, true);
1540 	ring_buffer_unlock_commit(buffer, event);
1541 }
1542 
1543 static inline void
1544 __trace_buffer_unlock_commit(struct ring_buffer *buffer,
1545 			     struct ring_buffer_event *event,
1546 			     unsigned long flags, int pc)
1547 {
1548 	__buffer_unlock_commit(buffer, event);
1549 
1550 	ftrace_trace_stack(buffer, flags, 6, pc);
1551 	ftrace_trace_userstack(buffer, flags, pc);
1552 }
1553 
1554 void trace_buffer_unlock_commit(struct ring_buffer *buffer,
1555 				struct ring_buffer_event *event,
1556 				unsigned long flags, int pc)
1557 {
1558 	__trace_buffer_unlock_commit(buffer, event, flags, pc);
1559 }
1560 EXPORT_SYMBOL_GPL(trace_buffer_unlock_commit);
1561 
1562 struct ring_buffer_event *
1563 trace_event_buffer_lock_reserve(struct ring_buffer **current_rb,
1564 			  struct ftrace_event_file *ftrace_file,
1565 			  int type, unsigned long len,
1566 			  unsigned long flags, int pc)
1567 {
1568 	*current_rb = ftrace_file->tr->trace_buffer.buffer;
1569 	return trace_buffer_lock_reserve(*current_rb,
1570 					 type, len, flags, pc);
1571 }
1572 EXPORT_SYMBOL_GPL(trace_event_buffer_lock_reserve);
1573 
1574 struct ring_buffer_event *
1575 trace_current_buffer_lock_reserve(struct ring_buffer **current_rb,
1576 				  int type, unsigned long len,
1577 				  unsigned long flags, int pc)
1578 {
1579 	*current_rb = global_trace.trace_buffer.buffer;
1580 	return trace_buffer_lock_reserve(*current_rb,
1581 					 type, len, flags, pc);
1582 }
1583 EXPORT_SYMBOL_GPL(trace_current_buffer_lock_reserve);
1584 
1585 void trace_current_buffer_unlock_commit(struct ring_buffer *buffer,
1586 					struct ring_buffer_event *event,
1587 					unsigned long flags, int pc)
1588 {
1589 	__trace_buffer_unlock_commit(buffer, event, flags, pc);
1590 }
1591 EXPORT_SYMBOL_GPL(trace_current_buffer_unlock_commit);
1592 
1593 void trace_buffer_unlock_commit_regs(struct ring_buffer *buffer,
1594 				     struct ring_buffer_event *event,
1595 				     unsigned long flags, int pc,
1596 				     struct pt_regs *regs)
1597 {
1598 	__buffer_unlock_commit(buffer, event);
1599 
1600 	ftrace_trace_stack_regs(buffer, flags, 0, pc, regs);
1601 	ftrace_trace_userstack(buffer, flags, pc);
1602 }
1603 EXPORT_SYMBOL_GPL(trace_buffer_unlock_commit_regs);
1604 
1605 void trace_current_buffer_discard_commit(struct ring_buffer *buffer,
1606 					 struct ring_buffer_event *event)
1607 {
1608 	ring_buffer_discard_commit(buffer, event);
1609 }
1610 EXPORT_SYMBOL_GPL(trace_current_buffer_discard_commit);
1611 
1612 void
1613 trace_function(struct trace_array *tr,
1614 	       unsigned long ip, unsigned long parent_ip, unsigned long flags,
1615 	       int pc)
1616 {
1617 	struct ftrace_event_call *call = &event_function;
1618 	struct ring_buffer *buffer = tr->trace_buffer.buffer;
1619 	struct ring_buffer_event *event;
1620 	struct ftrace_entry *entry;
1621 
1622 	/* If we are reading the ring buffer, don't trace */
1623 	if (unlikely(__this_cpu_read(ftrace_cpu_disabled)))
1624 		return;
1625 
1626 	event = trace_buffer_lock_reserve(buffer, TRACE_FN, sizeof(*entry),
1627 					  flags, pc);
1628 	if (!event)
1629 		return;
1630 	entry	= ring_buffer_event_data(event);
1631 	entry->ip			= ip;
1632 	entry->parent_ip		= parent_ip;
1633 
1634 	if (!filter_check_discard(call, entry, buffer, event))
1635 		__buffer_unlock_commit(buffer, event);
1636 }
1637 
1638 #ifdef CONFIG_STACKTRACE
1639 
1640 #define FTRACE_STACK_MAX_ENTRIES (PAGE_SIZE / sizeof(unsigned long))
1641 struct ftrace_stack {
1642 	unsigned long		calls[FTRACE_STACK_MAX_ENTRIES];
1643 };
1644 
1645 static DEFINE_PER_CPU(struct ftrace_stack, ftrace_stack);
1646 static DEFINE_PER_CPU(int, ftrace_stack_reserve);
1647 
1648 static void __ftrace_trace_stack(struct ring_buffer *buffer,
1649 				 unsigned long flags,
1650 				 int skip, int pc, struct pt_regs *regs)
1651 {
1652 	struct ftrace_event_call *call = &event_kernel_stack;
1653 	struct ring_buffer_event *event;
1654 	struct stack_entry *entry;
1655 	struct stack_trace trace;
1656 	int use_stack;
1657 	int size = FTRACE_STACK_ENTRIES;
1658 
1659 	trace.nr_entries	= 0;
1660 	trace.skip		= skip;
1661 
1662 	/*
1663 	 * Since events can happen in NMIs there's no safe way to
1664 	 * use the per cpu ftrace_stacks. We reserve it and if an interrupt
1665 	 * or NMI comes in, it will just have to use the default
1666 	 * FTRACE_STACK_SIZE.
1667 	 */
1668 	preempt_disable_notrace();
1669 
1670 	use_stack = __this_cpu_inc_return(ftrace_stack_reserve);
1671 	/*
1672 	 * We don't need any atomic variables, just a barrier.
1673 	 * If an interrupt comes in, we don't care, because it would
1674 	 * have exited and put the counter back to what we want.
1675 	 * We just need a barrier to keep gcc from moving things
1676 	 * around.
1677 	 */
1678 	barrier();
1679 	if (use_stack == 1) {
1680 		trace.entries		= &__get_cpu_var(ftrace_stack).calls[0];
1681 		trace.max_entries	= FTRACE_STACK_MAX_ENTRIES;
1682 
1683 		if (regs)
1684 			save_stack_trace_regs(regs, &trace);
1685 		else
1686 			save_stack_trace(&trace);
1687 
1688 		if (trace.nr_entries > size)
1689 			size = trace.nr_entries;
1690 	} else
1691 		/* From now on, use_stack is a boolean */
1692 		use_stack = 0;
1693 
1694 	size *= sizeof(unsigned long);
1695 
1696 	event = trace_buffer_lock_reserve(buffer, TRACE_STACK,
1697 					  sizeof(*entry) + size, flags, pc);
1698 	if (!event)
1699 		goto out;
1700 	entry = ring_buffer_event_data(event);
1701 
1702 	memset(&entry->caller, 0, size);
1703 
1704 	if (use_stack)
1705 		memcpy(&entry->caller, trace.entries,
1706 		       trace.nr_entries * sizeof(unsigned long));
1707 	else {
1708 		trace.max_entries	= FTRACE_STACK_ENTRIES;
1709 		trace.entries		= entry->caller;
1710 		if (regs)
1711 			save_stack_trace_regs(regs, &trace);
1712 		else
1713 			save_stack_trace(&trace);
1714 	}
1715 
1716 	entry->size = trace.nr_entries;
1717 
1718 	if (!filter_check_discard(call, entry, buffer, event))
1719 		__buffer_unlock_commit(buffer, event);
1720 
1721  out:
1722 	/* Again, don't let gcc optimize things here */
1723 	barrier();
1724 	__this_cpu_dec(ftrace_stack_reserve);
1725 	preempt_enable_notrace();
1726 
1727 }
1728 
1729 void ftrace_trace_stack_regs(struct ring_buffer *buffer, unsigned long flags,
1730 			     int skip, int pc, struct pt_regs *regs)
1731 {
1732 	if (!(trace_flags & TRACE_ITER_STACKTRACE))
1733 		return;
1734 
1735 	__ftrace_trace_stack(buffer, flags, skip, pc, regs);
1736 }
1737 
1738 void ftrace_trace_stack(struct ring_buffer *buffer, unsigned long flags,
1739 			int skip, int pc)
1740 {
1741 	if (!(trace_flags & TRACE_ITER_STACKTRACE))
1742 		return;
1743 
1744 	__ftrace_trace_stack(buffer, flags, skip, pc, NULL);
1745 }
1746 
1747 void __trace_stack(struct trace_array *tr, unsigned long flags, int skip,
1748 		   int pc)
1749 {
1750 	__ftrace_trace_stack(tr->trace_buffer.buffer, flags, skip, pc, NULL);
1751 }
1752 
1753 /**
1754  * trace_dump_stack - record a stack back trace in the trace buffer
1755  * @skip: Number of functions to skip (helper handlers)
1756  */
1757 void trace_dump_stack(int skip)
1758 {
1759 	unsigned long flags;
1760 
1761 	if (tracing_disabled || tracing_selftest_running)
1762 		return;
1763 
1764 	local_save_flags(flags);
1765 
1766 	/*
1767 	 * Skip 3 more, seems to get us at the caller of
1768 	 * this function.
1769 	 */
1770 	skip += 3;
1771 	__ftrace_trace_stack(global_trace.trace_buffer.buffer,
1772 			     flags, skip, preempt_count(), NULL);
1773 }
1774 
1775 static DEFINE_PER_CPU(int, user_stack_count);
1776 
1777 void
1778 ftrace_trace_userstack(struct ring_buffer *buffer, unsigned long flags, int pc)
1779 {
1780 	struct ftrace_event_call *call = &event_user_stack;
1781 	struct ring_buffer_event *event;
1782 	struct userstack_entry *entry;
1783 	struct stack_trace trace;
1784 
1785 	if (!(trace_flags & TRACE_ITER_USERSTACKTRACE))
1786 		return;
1787 
1788 	/*
1789 	 * NMIs can not handle page faults, even with fix ups.
1790 	 * The save user stack can (and often does) fault.
1791 	 */
1792 	if (unlikely(in_nmi()))
1793 		return;
1794 
1795 	/*
1796 	 * prevent recursion, since the user stack tracing may
1797 	 * trigger other kernel events.
1798 	 */
1799 	preempt_disable();
1800 	if (__this_cpu_read(user_stack_count))
1801 		goto out;
1802 
1803 	__this_cpu_inc(user_stack_count);
1804 
1805 	event = trace_buffer_lock_reserve(buffer, TRACE_USER_STACK,
1806 					  sizeof(*entry), flags, pc);
1807 	if (!event)
1808 		goto out_drop_count;
1809 	entry	= ring_buffer_event_data(event);
1810 
1811 	entry->tgid		= current->tgid;
1812 	memset(&entry->caller, 0, sizeof(entry->caller));
1813 
1814 	trace.nr_entries	= 0;
1815 	trace.max_entries	= FTRACE_STACK_ENTRIES;
1816 	trace.skip		= 0;
1817 	trace.entries		= entry->caller;
1818 
1819 	save_stack_trace_user(&trace);
1820 	if (!filter_check_discard(call, entry, buffer, event))
1821 		__buffer_unlock_commit(buffer, event);
1822 
1823  out_drop_count:
1824 	__this_cpu_dec(user_stack_count);
1825  out:
1826 	preempt_enable();
1827 }
1828 
1829 #ifdef UNUSED
1830 static void __trace_userstack(struct trace_array *tr, unsigned long flags)
1831 {
1832 	ftrace_trace_userstack(tr, flags, preempt_count());
1833 }
1834 #endif /* UNUSED */
1835 
1836 #endif /* CONFIG_STACKTRACE */
1837 
1838 /* created for use with alloc_percpu */
1839 struct trace_buffer_struct {
1840 	char buffer[TRACE_BUF_SIZE];
1841 };
1842 
1843 static struct trace_buffer_struct *trace_percpu_buffer;
1844 static struct trace_buffer_struct *trace_percpu_sirq_buffer;
1845 static struct trace_buffer_struct *trace_percpu_irq_buffer;
1846 static struct trace_buffer_struct *trace_percpu_nmi_buffer;
1847 
1848 /*
1849  * The buffer used is dependent on the context. There is a per cpu
1850  * buffer for normal context, softirq contex, hard irq context and
1851  * for NMI context. Thise allows for lockless recording.
1852  *
1853  * Note, if the buffers failed to be allocated, then this returns NULL
1854  */
1855 static char *get_trace_buf(void)
1856 {
1857 	struct trace_buffer_struct *percpu_buffer;
1858 
1859 	/*
1860 	 * If we have allocated per cpu buffers, then we do not
1861 	 * need to do any locking.
1862 	 */
1863 	if (in_nmi())
1864 		percpu_buffer = trace_percpu_nmi_buffer;
1865 	else if (in_irq())
1866 		percpu_buffer = trace_percpu_irq_buffer;
1867 	else if (in_softirq())
1868 		percpu_buffer = trace_percpu_sirq_buffer;
1869 	else
1870 		percpu_buffer = trace_percpu_buffer;
1871 
1872 	if (!percpu_buffer)
1873 		return NULL;
1874 
1875 	return this_cpu_ptr(&percpu_buffer->buffer[0]);
1876 }
1877 
1878 static int alloc_percpu_trace_buffer(void)
1879 {
1880 	struct trace_buffer_struct *buffers;
1881 	struct trace_buffer_struct *sirq_buffers;
1882 	struct trace_buffer_struct *irq_buffers;
1883 	struct trace_buffer_struct *nmi_buffers;
1884 
1885 	buffers = alloc_percpu(struct trace_buffer_struct);
1886 	if (!buffers)
1887 		goto err_warn;
1888 
1889 	sirq_buffers = alloc_percpu(struct trace_buffer_struct);
1890 	if (!sirq_buffers)
1891 		goto err_sirq;
1892 
1893 	irq_buffers = alloc_percpu(struct trace_buffer_struct);
1894 	if (!irq_buffers)
1895 		goto err_irq;
1896 
1897 	nmi_buffers = alloc_percpu(struct trace_buffer_struct);
1898 	if (!nmi_buffers)
1899 		goto err_nmi;
1900 
1901 	trace_percpu_buffer = buffers;
1902 	trace_percpu_sirq_buffer = sirq_buffers;
1903 	trace_percpu_irq_buffer = irq_buffers;
1904 	trace_percpu_nmi_buffer = nmi_buffers;
1905 
1906 	return 0;
1907 
1908  err_nmi:
1909 	free_percpu(irq_buffers);
1910  err_irq:
1911 	free_percpu(sirq_buffers);
1912  err_sirq:
1913 	free_percpu(buffers);
1914  err_warn:
1915 	WARN(1, "Could not allocate percpu trace_printk buffer");
1916 	return -ENOMEM;
1917 }
1918 
1919 static int buffers_allocated;
1920 
1921 void trace_printk_init_buffers(void)
1922 {
1923 	if (buffers_allocated)
1924 		return;
1925 
1926 	if (alloc_percpu_trace_buffer())
1927 		return;
1928 
1929 	pr_info("ftrace: Allocated trace_printk buffers\n");
1930 
1931 	/* Expand the buffers to set size */
1932 	tracing_update_buffers();
1933 
1934 	buffers_allocated = 1;
1935 
1936 	/*
1937 	 * trace_printk_init_buffers() can be called by modules.
1938 	 * If that happens, then we need to start cmdline recording
1939 	 * directly here. If the global_trace.buffer is already
1940 	 * allocated here, then this was called by module code.
1941 	 */
1942 	if (global_trace.trace_buffer.buffer)
1943 		tracing_start_cmdline_record();
1944 }
1945 
1946 void trace_printk_start_comm(void)
1947 {
1948 	/* Start tracing comms if trace printk is set */
1949 	if (!buffers_allocated)
1950 		return;
1951 	tracing_start_cmdline_record();
1952 }
1953 
1954 static void trace_printk_start_stop_comm(int enabled)
1955 {
1956 	if (!buffers_allocated)
1957 		return;
1958 
1959 	if (enabled)
1960 		tracing_start_cmdline_record();
1961 	else
1962 		tracing_stop_cmdline_record();
1963 }
1964 
1965 /**
1966  * trace_vbprintk - write binary msg to tracing buffer
1967  *
1968  */
1969 int trace_vbprintk(unsigned long ip, const char *fmt, va_list args)
1970 {
1971 	struct ftrace_event_call *call = &event_bprint;
1972 	struct ring_buffer_event *event;
1973 	struct ring_buffer *buffer;
1974 	struct trace_array *tr = &global_trace;
1975 	struct bprint_entry *entry;
1976 	unsigned long flags;
1977 	char *tbuffer;
1978 	int len = 0, size, pc;
1979 
1980 	if (unlikely(tracing_selftest_running || tracing_disabled))
1981 		return 0;
1982 
1983 	/* Don't pollute graph traces with trace_vprintk internals */
1984 	pause_graph_tracing();
1985 
1986 	pc = preempt_count();
1987 	preempt_disable_notrace();
1988 
1989 	tbuffer = get_trace_buf();
1990 	if (!tbuffer) {
1991 		len = 0;
1992 		goto out;
1993 	}
1994 
1995 	len = vbin_printf((u32 *)tbuffer, TRACE_BUF_SIZE/sizeof(int), fmt, args);
1996 
1997 	if (len > TRACE_BUF_SIZE/sizeof(int) || len < 0)
1998 		goto out;
1999 
2000 	local_save_flags(flags);
2001 	size = sizeof(*entry) + sizeof(u32) * len;
2002 	buffer = tr->trace_buffer.buffer;
2003 	event = trace_buffer_lock_reserve(buffer, TRACE_BPRINT, size,
2004 					  flags, pc);
2005 	if (!event)
2006 		goto out;
2007 	entry = ring_buffer_event_data(event);
2008 	entry->ip			= ip;
2009 	entry->fmt			= fmt;
2010 
2011 	memcpy(entry->buf, tbuffer, sizeof(u32) * len);
2012 	if (!filter_check_discard(call, entry, buffer, event)) {
2013 		__buffer_unlock_commit(buffer, event);
2014 		ftrace_trace_stack(buffer, flags, 6, pc);
2015 	}
2016 
2017 out:
2018 	preempt_enable_notrace();
2019 	unpause_graph_tracing();
2020 
2021 	return len;
2022 }
2023 EXPORT_SYMBOL_GPL(trace_vbprintk);
2024 
2025 static int
2026 __trace_array_vprintk(struct ring_buffer *buffer,
2027 		      unsigned long ip, const char *fmt, va_list args)
2028 {
2029 	struct ftrace_event_call *call = &event_print;
2030 	struct ring_buffer_event *event;
2031 	int len = 0, size, pc;
2032 	struct print_entry *entry;
2033 	unsigned long flags;
2034 	char *tbuffer;
2035 
2036 	if (tracing_disabled || tracing_selftest_running)
2037 		return 0;
2038 
2039 	/* Don't pollute graph traces with trace_vprintk internals */
2040 	pause_graph_tracing();
2041 
2042 	pc = preempt_count();
2043 	preempt_disable_notrace();
2044 
2045 
2046 	tbuffer = get_trace_buf();
2047 	if (!tbuffer) {
2048 		len = 0;
2049 		goto out;
2050 	}
2051 
2052 	len = vsnprintf(tbuffer, TRACE_BUF_SIZE, fmt, args);
2053 	if (len > TRACE_BUF_SIZE)
2054 		goto out;
2055 
2056 	local_save_flags(flags);
2057 	size = sizeof(*entry) + len + 1;
2058 	event = trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
2059 					  flags, pc);
2060 	if (!event)
2061 		goto out;
2062 	entry = ring_buffer_event_data(event);
2063 	entry->ip = ip;
2064 
2065 	memcpy(&entry->buf, tbuffer, len);
2066 	entry->buf[len] = '\0';
2067 	if (!filter_check_discard(call, entry, buffer, event)) {
2068 		__buffer_unlock_commit(buffer, event);
2069 		ftrace_trace_stack(buffer, flags, 6, pc);
2070 	}
2071  out:
2072 	preempt_enable_notrace();
2073 	unpause_graph_tracing();
2074 
2075 	return len;
2076 }
2077 
2078 int trace_array_vprintk(struct trace_array *tr,
2079 			unsigned long ip, const char *fmt, va_list args)
2080 {
2081 	return __trace_array_vprintk(tr->trace_buffer.buffer, ip, fmt, args);
2082 }
2083 
2084 int trace_array_printk(struct trace_array *tr,
2085 		       unsigned long ip, const char *fmt, ...)
2086 {
2087 	int ret;
2088 	va_list ap;
2089 
2090 	if (!(trace_flags & TRACE_ITER_PRINTK))
2091 		return 0;
2092 
2093 	va_start(ap, fmt);
2094 	ret = trace_array_vprintk(tr, ip, fmt, ap);
2095 	va_end(ap);
2096 	return ret;
2097 }
2098 
2099 int trace_array_printk_buf(struct ring_buffer *buffer,
2100 			   unsigned long ip, const char *fmt, ...)
2101 {
2102 	int ret;
2103 	va_list ap;
2104 
2105 	if (!(trace_flags & TRACE_ITER_PRINTK))
2106 		return 0;
2107 
2108 	va_start(ap, fmt);
2109 	ret = __trace_array_vprintk(buffer, ip, fmt, ap);
2110 	va_end(ap);
2111 	return ret;
2112 }
2113 
2114 int trace_vprintk(unsigned long ip, const char *fmt, va_list args)
2115 {
2116 	return trace_array_vprintk(&global_trace, ip, fmt, args);
2117 }
2118 EXPORT_SYMBOL_GPL(trace_vprintk);
2119 
2120 static void trace_iterator_increment(struct trace_iterator *iter)
2121 {
2122 	struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, iter->cpu);
2123 
2124 	iter->idx++;
2125 	if (buf_iter)
2126 		ring_buffer_read(buf_iter, NULL);
2127 }
2128 
2129 static struct trace_entry *
2130 peek_next_entry(struct trace_iterator *iter, int cpu, u64 *ts,
2131 		unsigned long *lost_events)
2132 {
2133 	struct ring_buffer_event *event;
2134 	struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, cpu);
2135 
2136 	if (buf_iter)
2137 		event = ring_buffer_iter_peek(buf_iter, ts);
2138 	else
2139 		event = ring_buffer_peek(iter->trace_buffer->buffer, cpu, ts,
2140 					 lost_events);
2141 
2142 	if (event) {
2143 		iter->ent_size = ring_buffer_event_length(event);
2144 		return ring_buffer_event_data(event);
2145 	}
2146 	iter->ent_size = 0;
2147 	return NULL;
2148 }
2149 
2150 static struct trace_entry *
2151 __find_next_entry(struct trace_iterator *iter, int *ent_cpu,
2152 		  unsigned long *missing_events, u64 *ent_ts)
2153 {
2154 	struct ring_buffer *buffer = iter->trace_buffer->buffer;
2155 	struct trace_entry *ent, *next = NULL;
2156 	unsigned long lost_events = 0, next_lost = 0;
2157 	int cpu_file = iter->cpu_file;
2158 	u64 next_ts = 0, ts;
2159 	int next_cpu = -1;
2160 	int next_size = 0;
2161 	int cpu;
2162 
2163 	/*
2164 	 * If we are in a per_cpu trace file, don't bother by iterating over
2165 	 * all cpu and peek directly.
2166 	 */
2167 	if (cpu_file > RING_BUFFER_ALL_CPUS) {
2168 		if (ring_buffer_empty_cpu(buffer, cpu_file))
2169 			return NULL;
2170 		ent = peek_next_entry(iter, cpu_file, ent_ts, missing_events);
2171 		if (ent_cpu)
2172 			*ent_cpu = cpu_file;
2173 
2174 		return ent;
2175 	}
2176 
2177 	for_each_tracing_cpu(cpu) {
2178 
2179 		if (ring_buffer_empty_cpu(buffer, cpu))
2180 			continue;
2181 
2182 		ent = peek_next_entry(iter, cpu, &ts, &lost_events);
2183 
2184 		/*
2185 		 * Pick the entry with the smallest timestamp:
2186 		 */
2187 		if (ent && (!next || ts < next_ts)) {
2188 			next = ent;
2189 			next_cpu = cpu;
2190 			next_ts = ts;
2191 			next_lost = lost_events;
2192 			next_size = iter->ent_size;
2193 		}
2194 	}
2195 
2196 	iter->ent_size = next_size;
2197 
2198 	if (ent_cpu)
2199 		*ent_cpu = next_cpu;
2200 
2201 	if (ent_ts)
2202 		*ent_ts = next_ts;
2203 
2204 	if (missing_events)
2205 		*missing_events = next_lost;
2206 
2207 	return next;
2208 }
2209 
2210 /* Find the next real entry, without updating the iterator itself */
2211 struct trace_entry *trace_find_next_entry(struct trace_iterator *iter,
2212 					  int *ent_cpu, u64 *ent_ts)
2213 {
2214 	return __find_next_entry(iter, ent_cpu, NULL, ent_ts);
2215 }
2216 
2217 /* Find the next real entry, and increment the iterator to the next entry */
2218 void *trace_find_next_entry_inc(struct trace_iterator *iter)
2219 {
2220 	iter->ent = __find_next_entry(iter, &iter->cpu,
2221 				      &iter->lost_events, &iter->ts);
2222 
2223 	if (iter->ent)
2224 		trace_iterator_increment(iter);
2225 
2226 	return iter->ent ? iter : NULL;
2227 }
2228 
2229 static void trace_consume(struct trace_iterator *iter)
2230 {
2231 	ring_buffer_consume(iter->trace_buffer->buffer, iter->cpu, &iter->ts,
2232 			    &iter->lost_events);
2233 }
2234 
2235 static void *s_next(struct seq_file *m, void *v, loff_t *pos)
2236 {
2237 	struct trace_iterator *iter = m->private;
2238 	int i = (int)*pos;
2239 	void *ent;
2240 
2241 	WARN_ON_ONCE(iter->leftover);
2242 
2243 	(*pos)++;
2244 
2245 	/* can't go backwards */
2246 	if (iter->idx > i)
2247 		return NULL;
2248 
2249 	if (iter->idx < 0)
2250 		ent = trace_find_next_entry_inc(iter);
2251 	else
2252 		ent = iter;
2253 
2254 	while (ent && iter->idx < i)
2255 		ent = trace_find_next_entry_inc(iter);
2256 
2257 	iter->pos = *pos;
2258 
2259 	return ent;
2260 }
2261 
2262 void tracing_iter_reset(struct trace_iterator *iter, int cpu)
2263 {
2264 	struct ring_buffer_event *event;
2265 	struct ring_buffer_iter *buf_iter;
2266 	unsigned long entries = 0;
2267 	u64 ts;
2268 
2269 	per_cpu_ptr(iter->trace_buffer->data, cpu)->skipped_entries = 0;
2270 
2271 	buf_iter = trace_buffer_iter(iter, cpu);
2272 	if (!buf_iter)
2273 		return;
2274 
2275 	ring_buffer_iter_reset(buf_iter);
2276 
2277 	/*
2278 	 * We could have the case with the max latency tracers
2279 	 * that a reset never took place on a cpu. This is evident
2280 	 * by the timestamp being before the start of the buffer.
2281 	 */
2282 	while ((event = ring_buffer_iter_peek(buf_iter, &ts))) {
2283 		if (ts >= iter->trace_buffer->time_start)
2284 			break;
2285 		entries++;
2286 		ring_buffer_read(buf_iter, NULL);
2287 	}
2288 
2289 	per_cpu_ptr(iter->trace_buffer->data, cpu)->skipped_entries = entries;
2290 }
2291 
2292 /*
2293  * The current tracer is copied to avoid a global locking
2294  * all around.
2295  */
2296 static void *s_start(struct seq_file *m, loff_t *pos)
2297 {
2298 	struct trace_iterator *iter = m->private;
2299 	struct trace_array *tr = iter->tr;
2300 	int cpu_file = iter->cpu_file;
2301 	void *p = NULL;
2302 	loff_t l = 0;
2303 	int cpu;
2304 
2305 	/*
2306 	 * copy the tracer to avoid using a global lock all around.
2307 	 * iter->trace is a copy of current_trace, the pointer to the
2308 	 * name may be used instead of a strcmp(), as iter->trace->name
2309 	 * will point to the same string as current_trace->name.
2310 	 */
2311 	mutex_lock(&trace_types_lock);
2312 	if (unlikely(tr->current_trace && iter->trace->name != tr->current_trace->name))
2313 		*iter->trace = *tr->current_trace;
2314 	mutex_unlock(&trace_types_lock);
2315 
2316 #ifdef CONFIG_TRACER_MAX_TRACE
2317 	if (iter->snapshot && iter->trace->use_max_tr)
2318 		return ERR_PTR(-EBUSY);
2319 #endif
2320 
2321 	if (!iter->snapshot)
2322 		atomic_inc(&trace_record_cmdline_disabled);
2323 
2324 	if (*pos != iter->pos) {
2325 		iter->ent = NULL;
2326 		iter->cpu = 0;
2327 		iter->idx = -1;
2328 
2329 		if (cpu_file == RING_BUFFER_ALL_CPUS) {
2330 			for_each_tracing_cpu(cpu)
2331 				tracing_iter_reset(iter, cpu);
2332 		} else
2333 			tracing_iter_reset(iter, cpu_file);
2334 
2335 		iter->leftover = 0;
2336 		for (p = iter; p && l < *pos; p = s_next(m, p, &l))
2337 			;
2338 
2339 	} else {
2340 		/*
2341 		 * If we overflowed the seq_file before, then we want
2342 		 * to just reuse the trace_seq buffer again.
2343 		 */
2344 		if (iter->leftover)
2345 			p = iter;
2346 		else {
2347 			l = *pos - 1;
2348 			p = s_next(m, p, &l);
2349 		}
2350 	}
2351 
2352 	trace_event_read_lock();
2353 	trace_access_lock(cpu_file);
2354 	return p;
2355 }
2356 
2357 static void s_stop(struct seq_file *m, void *p)
2358 {
2359 	struct trace_iterator *iter = m->private;
2360 
2361 #ifdef CONFIG_TRACER_MAX_TRACE
2362 	if (iter->snapshot && iter->trace->use_max_tr)
2363 		return;
2364 #endif
2365 
2366 	if (!iter->snapshot)
2367 		atomic_dec(&trace_record_cmdline_disabled);
2368 
2369 	trace_access_unlock(iter->cpu_file);
2370 	trace_event_read_unlock();
2371 }
2372 
2373 static void
2374 get_total_entries(struct trace_buffer *buf,
2375 		  unsigned long *total, unsigned long *entries)
2376 {
2377 	unsigned long count;
2378 	int cpu;
2379 
2380 	*total = 0;
2381 	*entries = 0;
2382 
2383 	for_each_tracing_cpu(cpu) {
2384 		count = ring_buffer_entries_cpu(buf->buffer, cpu);
2385 		/*
2386 		 * If this buffer has skipped entries, then we hold all
2387 		 * entries for the trace and we need to ignore the
2388 		 * ones before the time stamp.
2389 		 */
2390 		if (per_cpu_ptr(buf->data, cpu)->skipped_entries) {
2391 			count -= per_cpu_ptr(buf->data, cpu)->skipped_entries;
2392 			/* total is the same as the entries */
2393 			*total += count;
2394 		} else
2395 			*total += count +
2396 				ring_buffer_overrun_cpu(buf->buffer, cpu);
2397 		*entries += count;
2398 	}
2399 }
2400 
2401 static void print_lat_help_header(struct seq_file *m)
2402 {
2403 	seq_puts(m, "#                  _------=> CPU#            \n");
2404 	seq_puts(m, "#                 / _-----=> irqs-off        \n");
2405 	seq_puts(m, "#                | / _----=> need-resched    \n");
2406 	seq_puts(m, "#                || / _---=> hardirq/softirq \n");
2407 	seq_puts(m, "#                ||| / _--=> preempt-depth   \n");
2408 	seq_puts(m, "#                |||| /     delay             \n");
2409 	seq_puts(m, "#  cmd     pid   ||||| time  |   caller      \n");
2410 	seq_puts(m, "#     \\   /      |||||  \\    |   /           \n");
2411 }
2412 
2413 static void print_event_info(struct trace_buffer *buf, struct seq_file *m)
2414 {
2415 	unsigned long total;
2416 	unsigned long entries;
2417 
2418 	get_total_entries(buf, &total, &entries);
2419 	seq_printf(m, "# entries-in-buffer/entries-written: %lu/%lu   #P:%d\n",
2420 		   entries, total, num_online_cpus());
2421 	seq_puts(m, "#\n");
2422 }
2423 
2424 static void print_func_help_header(struct trace_buffer *buf, struct seq_file *m)
2425 {
2426 	print_event_info(buf, m);
2427 	seq_puts(m, "#           TASK-PID   CPU#      TIMESTAMP  FUNCTION\n");
2428 	seq_puts(m, "#              | |       |          |         |\n");
2429 }
2430 
2431 static void print_func_help_header_irq(struct trace_buffer *buf, struct seq_file *m)
2432 {
2433 	print_event_info(buf, m);
2434 	seq_puts(m, "#                              _-----=> irqs-off\n");
2435 	seq_puts(m, "#                             / _----=> need-resched\n");
2436 	seq_puts(m, "#                            | / _---=> hardirq/softirq\n");
2437 	seq_puts(m, "#                            || / _--=> preempt-depth\n");
2438 	seq_puts(m, "#                            ||| /     delay\n");
2439 	seq_puts(m, "#           TASK-PID   CPU#  ||||    TIMESTAMP  FUNCTION\n");
2440 	seq_puts(m, "#              | |       |   ||||       |         |\n");
2441 }
2442 
2443 void
2444 print_trace_header(struct seq_file *m, struct trace_iterator *iter)
2445 {
2446 	unsigned long sym_flags = (trace_flags & TRACE_ITER_SYM_MASK);
2447 	struct trace_buffer *buf = iter->trace_buffer;
2448 	struct trace_array_cpu *data = per_cpu_ptr(buf->data, buf->cpu);
2449 	struct tracer *type = iter->trace;
2450 	unsigned long entries;
2451 	unsigned long total;
2452 	const char *name = "preemption";
2453 
2454 	name = type->name;
2455 
2456 	get_total_entries(buf, &total, &entries);
2457 
2458 	seq_printf(m, "# %s latency trace v1.1.5 on %s\n",
2459 		   name, UTS_RELEASE);
2460 	seq_puts(m, "# -----------------------------------"
2461 		 "---------------------------------\n");
2462 	seq_printf(m, "# latency: %lu us, #%lu/%lu, CPU#%d |"
2463 		   " (M:%s VP:%d, KP:%d, SP:%d HP:%d",
2464 		   nsecs_to_usecs(data->saved_latency),
2465 		   entries,
2466 		   total,
2467 		   buf->cpu,
2468 #if defined(CONFIG_PREEMPT_NONE)
2469 		   "server",
2470 #elif defined(CONFIG_PREEMPT_VOLUNTARY)
2471 		   "desktop",
2472 #elif defined(CONFIG_PREEMPT)
2473 		   "preempt",
2474 #else
2475 		   "unknown",
2476 #endif
2477 		   /* These are reserved for later use */
2478 		   0, 0, 0, 0);
2479 #ifdef CONFIG_SMP
2480 	seq_printf(m, " #P:%d)\n", num_online_cpus());
2481 #else
2482 	seq_puts(m, ")\n");
2483 #endif
2484 	seq_puts(m, "#    -----------------\n");
2485 	seq_printf(m, "#    | task: %.16s-%d "
2486 		   "(uid:%d nice:%ld policy:%ld rt_prio:%ld)\n",
2487 		   data->comm, data->pid,
2488 		   from_kuid_munged(seq_user_ns(m), data->uid), data->nice,
2489 		   data->policy, data->rt_priority);
2490 	seq_puts(m, "#    -----------------\n");
2491 
2492 	if (data->critical_start) {
2493 		seq_puts(m, "#  => started at: ");
2494 		seq_print_ip_sym(&iter->seq, data->critical_start, sym_flags);
2495 		trace_print_seq(m, &iter->seq);
2496 		seq_puts(m, "\n#  => ended at:   ");
2497 		seq_print_ip_sym(&iter->seq, data->critical_end, sym_flags);
2498 		trace_print_seq(m, &iter->seq);
2499 		seq_puts(m, "\n#\n");
2500 	}
2501 
2502 	seq_puts(m, "#\n");
2503 }
2504 
2505 static void test_cpu_buff_start(struct trace_iterator *iter)
2506 {
2507 	struct trace_seq *s = &iter->seq;
2508 
2509 	if (!(trace_flags & TRACE_ITER_ANNOTATE))
2510 		return;
2511 
2512 	if (!(iter->iter_flags & TRACE_FILE_ANNOTATE))
2513 		return;
2514 
2515 	if (cpumask_test_cpu(iter->cpu, iter->started))
2516 		return;
2517 
2518 	if (per_cpu_ptr(iter->trace_buffer->data, iter->cpu)->skipped_entries)
2519 		return;
2520 
2521 	cpumask_set_cpu(iter->cpu, iter->started);
2522 
2523 	/* Don't print started cpu buffer for the first entry of the trace */
2524 	if (iter->idx > 1)
2525 		trace_seq_printf(s, "##### CPU %u buffer started ####\n",
2526 				iter->cpu);
2527 }
2528 
2529 static enum print_line_t print_trace_fmt(struct trace_iterator *iter)
2530 {
2531 	struct trace_seq *s = &iter->seq;
2532 	unsigned long sym_flags = (trace_flags & TRACE_ITER_SYM_MASK);
2533 	struct trace_entry *entry;
2534 	struct trace_event *event;
2535 
2536 	entry = iter->ent;
2537 
2538 	test_cpu_buff_start(iter);
2539 
2540 	event = ftrace_find_event(entry->type);
2541 
2542 	if (trace_flags & TRACE_ITER_CONTEXT_INFO) {
2543 		if (iter->iter_flags & TRACE_FILE_LAT_FMT) {
2544 			if (!trace_print_lat_context(iter))
2545 				goto partial;
2546 		} else {
2547 			if (!trace_print_context(iter))
2548 				goto partial;
2549 		}
2550 	}
2551 
2552 	if (event)
2553 		return event->funcs->trace(iter, sym_flags, event);
2554 
2555 	if (!trace_seq_printf(s, "Unknown type %d\n", entry->type))
2556 		goto partial;
2557 
2558 	return TRACE_TYPE_HANDLED;
2559 partial:
2560 	return TRACE_TYPE_PARTIAL_LINE;
2561 }
2562 
2563 static enum print_line_t print_raw_fmt(struct trace_iterator *iter)
2564 {
2565 	struct trace_seq *s = &iter->seq;
2566 	struct trace_entry *entry;
2567 	struct trace_event *event;
2568 
2569 	entry = iter->ent;
2570 
2571 	if (trace_flags & TRACE_ITER_CONTEXT_INFO) {
2572 		if (!trace_seq_printf(s, "%d %d %llu ",
2573 				      entry->pid, iter->cpu, iter->ts))
2574 			goto partial;
2575 	}
2576 
2577 	event = ftrace_find_event(entry->type);
2578 	if (event)
2579 		return event->funcs->raw(iter, 0, event);
2580 
2581 	if (!trace_seq_printf(s, "%d ?\n", entry->type))
2582 		goto partial;
2583 
2584 	return TRACE_TYPE_HANDLED;
2585 partial:
2586 	return TRACE_TYPE_PARTIAL_LINE;
2587 }
2588 
2589 static enum print_line_t print_hex_fmt(struct trace_iterator *iter)
2590 {
2591 	struct trace_seq *s = &iter->seq;
2592 	unsigned char newline = '\n';
2593 	struct trace_entry *entry;
2594 	struct trace_event *event;
2595 
2596 	entry = iter->ent;
2597 
2598 	if (trace_flags & TRACE_ITER_CONTEXT_INFO) {
2599 		SEQ_PUT_HEX_FIELD_RET(s, entry->pid);
2600 		SEQ_PUT_HEX_FIELD_RET(s, iter->cpu);
2601 		SEQ_PUT_HEX_FIELD_RET(s, iter->ts);
2602 	}
2603 
2604 	event = ftrace_find_event(entry->type);
2605 	if (event) {
2606 		enum print_line_t ret = event->funcs->hex(iter, 0, event);
2607 		if (ret != TRACE_TYPE_HANDLED)
2608 			return ret;
2609 	}
2610 
2611 	SEQ_PUT_FIELD_RET(s, newline);
2612 
2613 	return TRACE_TYPE_HANDLED;
2614 }
2615 
2616 static enum print_line_t print_bin_fmt(struct trace_iterator *iter)
2617 {
2618 	struct trace_seq *s = &iter->seq;
2619 	struct trace_entry *entry;
2620 	struct trace_event *event;
2621 
2622 	entry = iter->ent;
2623 
2624 	if (trace_flags & TRACE_ITER_CONTEXT_INFO) {
2625 		SEQ_PUT_FIELD_RET(s, entry->pid);
2626 		SEQ_PUT_FIELD_RET(s, iter->cpu);
2627 		SEQ_PUT_FIELD_RET(s, iter->ts);
2628 	}
2629 
2630 	event = ftrace_find_event(entry->type);
2631 	return event ? event->funcs->binary(iter, 0, event) :
2632 		TRACE_TYPE_HANDLED;
2633 }
2634 
2635 int trace_empty(struct trace_iterator *iter)
2636 {
2637 	struct ring_buffer_iter *buf_iter;
2638 	int cpu;
2639 
2640 	/* If we are looking at one CPU buffer, only check that one */
2641 	if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
2642 		cpu = iter->cpu_file;
2643 		buf_iter = trace_buffer_iter(iter, cpu);
2644 		if (buf_iter) {
2645 			if (!ring_buffer_iter_empty(buf_iter))
2646 				return 0;
2647 		} else {
2648 			if (!ring_buffer_empty_cpu(iter->trace_buffer->buffer, cpu))
2649 				return 0;
2650 		}
2651 		return 1;
2652 	}
2653 
2654 	for_each_tracing_cpu(cpu) {
2655 		buf_iter = trace_buffer_iter(iter, cpu);
2656 		if (buf_iter) {
2657 			if (!ring_buffer_iter_empty(buf_iter))
2658 				return 0;
2659 		} else {
2660 			if (!ring_buffer_empty_cpu(iter->trace_buffer->buffer, cpu))
2661 				return 0;
2662 		}
2663 	}
2664 
2665 	return 1;
2666 }
2667 
2668 /*  Called with trace_event_read_lock() held. */
2669 enum print_line_t print_trace_line(struct trace_iterator *iter)
2670 {
2671 	enum print_line_t ret;
2672 
2673 	if (iter->lost_events &&
2674 	    !trace_seq_printf(&iter->seq, "CPU:%d [LOST %lu EVENTS]\n",
2675 				 iter->cpu, iter->lost_events))
2676 		return TRACE_TYPE_PARTIAL_LINE;
2677 
2678 	if (iter->trace && iter->trace->print_line) {
2679 		ret = iter->trace->print_line(iter);
2680 		if (ret != TRACE_TYPE_UNHANDLED)
2681 			return ret;
2682 	}
2683 
2684 	if (iter->ent->type == TRACE_BPUTS &&
2685 			trace_flags & TRACE_ITER_PRINTK &&
2686 			trace_flags & TRACE_ITER_PRINTK_MSGONLY)
2687 		return trace_print_bputs_msg_only(iter);
2688 
2689 	if (iter->ent->type == TRACE_BPRINT &&
2690 			trace_flags & TRACE_ITER_PRINTK &&
2691 			trace_flags & TRACE_ITER_PRINTK_MSGONLY)
2692 		return trace_print_bprintk_msg_only(iter);
2693 
2694 	if (iter->ent->type == TRACE_PRINT &&
2695 			trace_flags & TRACE_ITER_PRINTK &&
2696 			trace_flags & TRACE_ITER_PRINTK_MSGONLY)
2697 		return trace_print_printk_msg_only(iter);
2698 
2699 	if (trace_flags & TRACE_ITER_BIN)
2700 		return print_bin_fmt(iter);
2701 
2702 	if (trace_flags & TRACE_ITER_HEX)
2703 		return print_hex_fmt(iter);
2704 
2705 	if (trace_flags & TRACE_ITER_RAW)
2706 		return print_raw_fmt(iter);
2707 
2708 	return print_trace_fmt(iter);
2709 }
2710 
2711 void trace_latency_header(struct seq_file *m)
2712 {
2713 	struct trace_iterator *iter = m->private;
2714 
2715 	/* print nothing if the buffers are empty */
2716 	if (trace_empty(iter))
2717 		return;
2718 
2719 	if (iter->iter_flags & TRACE_FILE_LAT_FMT)
2720 		print_trace_header(m, iter);
2721 
2722 	if (!(trace_flags & TRACE_ITER_VERBOSE))
2723 		print_lat_help_header(m);
2724 }
2725 
2726 void trace_default_header(struct seq_file *m)
2727 {
2728 	struct trace_iterator *iter = m->private;
2729 
2730 	if (!(trace_flags & TRACE_ITER_CONTEXT_INFO))
2731 		return;
2732 
2733 	if (iter->iter_flags & TRACE_FILE_LAT_FMT) {
2734 		/* print nothing if the buffers are empty */
2735 		if (trace_empty(iter))
2736 			return;
2737 		print_trace_header(m, iter);
2738 		if (!(trace_flags & TRACE_ITER_VERBOSE))
2739 			print_lat_help_header(m);
2740 	} else {
2741 		if (!(trace_flags & TRACE_ITER_VERBOSE)) {
2742 			if (trace_flags & TRACE_ITER_IRQ_INFO)
2743 				print_func_help_header_irq(iter->trace_buffer, m);
2744 			else
2745 				print_func_help_header(iter->trace_buffer, m);
2746 		}
2747 	}
2748 }
2749 
2750 static void test_ftrace_alive(struct seq_file *m)
2751 {
2752 	if (!ftrace_is_dead())
2753 		return;
2754 	seq_printf(m, "# WARNING: FUNCTION TRACING IS CORRUPTED\n");
2755 	seq_printf(m, "#          MAY BE MISSING FUNCTION EVENTS\n");
2756 }
2757 
2758 #ifdef CONFIG_TRACER_MAX_TRACE
2759 static void show_snapshot_main_help(struct seq_file *m)
2760 {
2761 	seq_printf(m, "# echo 0 > snapshot : Clears and frees snapshot buffer\n");
2762 	seq_printf(m, "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n");
2763 	seq_printf(m, "#                      Takes a snapshot of the main buffer.\n");
2764 	seq_printf(m, "# echo 2 > snapshot : Clears snapshot buffer (but does not allocate)\n");
2765 	seq_printf(m, "#                      (Doesn't have to be '2' works with any number that\n");
2766 	seq_printf(m, "#                       is not a '0' or '1')\n");
2767 }
2768 
2769 static void show_snapshot_percpu_help(struct seq_file *m)
2770 {
2771 	seq_printf(m, "# echo 0 > snapshot : Invalid for per_cpu snapshot file.\n");
2772 #ifdef CONFIG_RING_BUFFER_ALLOW_SWAP
2773 	seq_printf(m, "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n");
2774 	seq_printf(m, "#                      Takes a snapshot of the main buffer for this cpu.\n");
2775 #else
2776 	seq_printf(m, "# echo 1 > snapshot : Not supported with this kernel.\n");
2777 	seq_printf(m, "#                     Must use main snapshot file to allocate.\n");
2778 #endif
2779 	seq_printf(m, "# echo 2 > snapshot : Clears this cpu's snapshot buffer (but does not allocate)\n");
2780 	seq_printf(m, "#                      (Doesn't have to be '2' works with any number that\n");
2781 	seq_printf(m, "#                       is not a '0' or '1')\n");
2782 }
2783 
2784 static void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter)
2785 {
2786 	if (iter->tr->allocated_snapshot)
2787 		seq_printf(m, "#\n# * Snapshot is allocated *\n#\n");
2788 	else
2789 		seq_printf(m, "#\n# * Snapshot is freed *\n#\n");
2790 
2791 	seq_printf(m, "# Snapshot commands:\n");
2792 	if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
2793 		show_snapshot_main_help(m);
2794 	else
2795 		show_snapshot_percpu_help(m);
2796 }
2797 #else
2798 /* Should never be called */
2799 static inline void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter) { }
2800 #endif
2801 
2802 static int s_show(struct seq_file *m, void *v)
2803 {
2804 	struct trace_iterator *iter = v;
2805 	int ret;
2806 
2807 	if (iter->ent == NULL) {
2808 		if (iter->tr) {
2809 			seq_printf(m, "# tracer: %s\n", iter->trace->name);
2810 			seq_puts(m, "#\n");
2811 			test_ftrace_alive(m);
2812 		}
2813 		if (iter->snapshot && trace_empty(iter))
2814 			print_snapshot_help(m, iter);
2815 		else if (iter->trace && iter->trace->print_header)
2816 			iter->trace->print_header(m);
2817 		else
2818 			trace_default_header(m);
2819 
2820 	} else if (iter->leftover) {
2821 		/*
2822 		 * If we filled the seq_file buffer earlier, we
2823 		 * want to just show it now.
2824 		 */
2825 		ret = trace_print_seq(m, &iter->seq);
2826 
2827 		/* ret should this time be zero, but you never know */
2828 		iter->leftover = ret;
2829 
2830 	} else {
2831 		print_trace_line(iter);
2832 		ret = trace_print_seq(m, &iter->seq);
2833 		/*
2834 		 * If we overflow the seq_file buffer, then it will
2835 		 * ask us for this data again at start up.
2836 		 * Use that instead.
2837 		 *  ret is 0 if seq_file write succeeded.
2838 		 *        -1 otherwise.
2839 		 */
2840 		iter->leftover = ret;
2841 	}
2842 
2843 	return 0;
2844 }
2845 
2846 static const struct seq_operations tracer_seq_ops = {
2847 	.start		= s_start,
2848 	.next		= s_next,
2849 	.stop		= s_stop,
2850 	.show		= s_show,
2851 };
2852 
2853 static struct trace_iterator *
2854 __tracing_open(struct trace_array *tr, struct trace_cpu *tc,
2855 	       struct inode *inode, struct file *file, bool snapshot)
2856 {
2857 	struct trace_iterator *iter;
2858 	int cpu;
2859 
2860 	if (tracing_disabled)
2861 		return ERR_PTR(-ENODEV);
2862 
2863 	iter = __seq_open_private(file, &tracer_seq_ops, sizeof(*iter));
2864 	if (!iter)
2865 		return ERR_PTR(-ENOMEM);
2866 
2867 	iter->buffer_iter = kzalloc(sizeof(*iter->buffer_iter) * num_possible_cpus(),
2868 				    GFP_KERNEL);
2869 	if (!iter->buffer_iter)
2870 		goto release;
2871 
2872 	/*
2873 	 * We make a copy of the current tracer to avoid concurrent
2874 	 * changes on it while we are reading.
2875 	 */
2876 	mutex_lock(&trace_types_lock);
2877 	iter->trace = kzalloc(sizeof(*iter->trace), GFP_KERNEL);
2878 	if (!iter->trace)
2879 		goto fail;
2880 
2881 	*iter->trace = *tr->current_trace;
2882 
2883 	if (!zalloc_cpumask_var(&iter->started, GFP_KERNEL))
2884 		goto fail;
2885 
2886 	iter->tr = tr;
2887 
2888 #ifdef CONFIG_TRACER_MAX_TRACE
2889 	/* Currently only the top directory has a snapshot */
2890 	if (tr->current_trace->print_max || snapshot)
2891 		iter->trace_buffer = &tr->max_buffer;
2892 	else
2893 #endif
2894 		iter->trace_buffer = &tr->trace_buffer;
2895 	iter->snapshot = snapshot;
2896 	iter->pos = -1;
2897 	mutex_init(&iter->mutex);
2898 	iter->cpu_file = tc->cpu;
2899 
2900 	/* Notify the tracer early; before we stop tracing. */
2901 	if (iter->trace && iter->trace->open)
2902 		iter->trace->open(iter);
2903 
2904 	/* Annotate start of buffers if we had overruns */
2905 	if (ring_buffer_overruns(iter->trace_buffer->buffer))
2906 		iter->iter_flags |= TRACE_FILE_ANNOTATE;
2907 
2908 	/* Output in nanoseconds only if we are using a clock in nanoseconds. */
2909 	if (trace_clocks[tr->clock_id].in_ns)
2910 		iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
2911 
2912 	/* stop the trace while dumping if we are not opening "snapshot" */
2913 	if (!iter->snapshot)
2914 		tracing_stop_tr(tr);
2915 
2916 	if (iter->cpu_file == RING_BUFFER_ALL_CPUS) {
2917 		for_each_tracing_cpu(cpu) {
2918 			iter->buffer_iter[cpu] =
2919 				ring_buffer_read_prepare(iter->trace_buffer->buffer, cpu);
2920 		}
2921 		ring_buffer_read_prepare_sync();
2922 		for_each_tracing_cpu(cpu) {
2923 			ring_buffer_read_start(iter->buffer_iter[cpu]);
2924 			tracing_iter_reset(iter, cpu);
2925 		}
2926 	} else {
2927 		cpu = iter->cpu_file;
2928 		iter->buffer_iter[cpu] =
2929 			ring_buffer_read_prepare(iter->trace_buffer->buffer, cpu);
2930 		ring_buffer_read_prepare_sync();
2931 		ring_buffer_read_start(iter->buffer_iter[cpu]);
2932 		tracing_iter_reset(iter, cpu);
2933 	}
2934 
2935 	mutex_unlock(&trace_types_lock);
2936 
2937 	return iter;
2938 
2939  fail:
2940 	mutex_unlock(&trace_types_lock);
2941 	kfree(iter->trace);
2942 	kfree(iter->buffer_iter);
2943 release:
2944 	seq_release_private(inode, file);
2945 	return ERR_PTR(-ENOMEM);
2946 }
2947 
2948 int tracing_open_generic(struct inode *inode, struct file *filp)
2949 {
2950 	if (tracing_disabled)
2951 		return -ENODEV;
2952 
2953 	filp->private_data = inode->i_private;
2954 	return 0;
2955 }
2956 
2957 /*
2958  * Open and update trace_array ref count.
2959  * Must have the current trace_array passed to it.
2960  */
2961 static int tracing_open_generic_tr(struct inode *inode, struct file *filp)
2962 {
2963 	struct trace_array *tr = inode->i_private;
2964 
2965 	if (tracing_disabled)
2966 		return -ENODEV;
2967 
2968 	if (trace_array_get(tr) < 0)
2969 		return -ENODEV;
2970 
2971 	filp->private_data = inode->i_private;
2972 
2973 	return 0;
2974 
2975 }
2976 
2977 static int tracing_open_generic_tc(struct inode *inode, struct file *filp)
2978 {
2979 	struct trace_cpu *tc = inode->i_private;
2980 	struct trace_array *tr = tc->tr;
2981 
2982 	if (tracing_disabled)
2983 		return -ENODEV;
2984 
2985 	if (trace_array_get(tr) < 0)
2986 		return -ENODEV;
2987 
2988 	filp->private_data = inode->i_private;
2989 
2990 	return 0;
2991 
2992 }
2993 
2994 static int tracing_release(struct inode *inode, struct file *file)
2995 {
2996 	struct seq_file *m = file->private_data;
2997 	struct trace_iterator *iter;
2998 	struct trace_array *tr;
2999 	int cpu;
3000 
3001 	/* Writes do not use seq_file, need to grab tr from inode */
3002 	if (!(file->f_mode & FMODE_READ)) {
3003 		struct trace_cpu *tc = inode->i_private;
3004 
3005 		trace_array_put(tc->tr);
3006 		return 0;
3007 	}
3008 
3009 	iter = m->private;
3010 	tr = iter->tr;
3011 	trace_array_put(tr);
3012 
3013 	mutex_lock(&trace_types_lock);
3014 
3015 	for_each_tracing_cpu(cpu) {
3016 		if (iter->buffer_iter[cpu])
3017 			ring_buffer_read_finish(iter->buffer_iter[cpu]);
3018 	}
3019 
3020 	if (iter->trace && iter->trace->close)
3021 		iter->trace->close(iter);
3022 
3023 	if (!iter->snapshot)
3024 		/* reenable tracing if it was previously enabled */
3025 		tracing_start_tr(tr);
3026 	mutex_unlock(&trace_types_lock);
3027 
3028 	mutex_destroy(&iter->mutex);
3029 	free_cpumask_var(iter->started);
3030 	kfree(iter->trace);
3031 	kfree(iter->buffer_iter);
3032 	seq_release_private(inode, file);
3033 
3034 	return 0;
3035 }
3036 
3037 static int tracing_release_generic_tr(struct inode *inode, struct file *file)
3038 {
3039 	struct trace_array *tr = inode->i_private;
3040 
3041 	trace_array_put(tr);
3042 	return 0;
3043 }
3044 
3045 static int tracing_release_generic_tc(struct inode *inode, struct file *file)
3046 {
3047 	struct trace_cpu *tc = inode->i_private;
3048 	struct trace_array *tr = tc->tr;
3049 
3050 	trace_array_put(tr);
3051 	return 0;
3052 }
3053 
3054 static int tracing_single_release_tr(struct inode *inode, struct file *file)
3055 {
3056 	struct trace_array *tr = inode->i_private;
3057 
3058 	trace_array_put(tr);
3059 
3060 	return single_release(inode, file);
3061 }
3062 
3063 static int tracing_open(struct inode *inode, struct file *file)
3064 {
3065 	struct trace_cpu *tc = inode->i_private;
3066 	struct trace_array *tr = tc->tr;
3067 	struct trace_iterator *iter;
3068 	int ret = 0;
3069 
3070 	if (trace_array_get(tr) < 0)
3071 		return -ENODEV;
3072 
3073 	/* If this file was open for write, then erase contents */
3074 	if ((file->f_mode & FMODE_WRITE) &&
3075 	    (file->f_flags & O_TRUNC)) {
3076 		if (tc->cpu == RING_BUFFER_ALL_CPUS)
3077 			tracing_reset_online_cpus(&tr->trace_buffer);
3078 		else
3079 			tracing_reset(&tr->trace_buffer, tc->cpu);
3080 	}
3081 
3082 	if (file->f_mode & FMODE_READ) {
3083 		iter = __tracing_open(tr, tc, inode, file, false);
3084 		if (IS_ERR(iter))
3085 			ret = PTR_ERR(iter);
3086 		else if (trace_flags & TRACE_ITER_LATENCY_FMT)
3087 			iter->iter_flags |= TRACE_FILE_LAT_FMT;
3088 	}
3089 
3090 	if (ret < 0)
3091 		trace_array_put(tr);
3092 
3093 	return ret;
3094 }
3095 
3096 static void *
3097 t_next(struct seq_file *m, void *v, loff_t *pos)
3098 {
3099 	struct tracer *t = v;
3100 
3101 	(*pos)++;
3102 
3103 	if (t)
3104 		t = t->next;
3105 
3106 	return t;
3107 }
3108 
3109 static void *t_start(struct seq_file *m, loff_t *pos)
3110 {
3111 	struct tracer *t;
3112 	loff_t l = 0;
3113 
3114 	mutex_lock(&trace_types_lock);
3115 	for (t = trace_types; t && l < *pos; t = t_next(m, t, &l))
3116 		;
3117 
3118 	return t;
3119 }
3120 
3121 static void t_stop(struct seq_file *m, void *p)
3122 {
3123 	mutex_unlock(&trace_types_lock);
3124 }
3125 
3126 static int t_show(struct seq_file *m, void *v)
3127 {
3128 	struct tracer *t = v;
3129 
3130 	if (!t)
3131 		return 0;
3132 
3133 	seq_printf(m, "%s", t->name);
3134 	if (t->next)
3135 		seq_putc(m, ' ');
3136 	else
3137 		seq_putc(m, '\n');
3138 
3139 	return 0;
3140 }
3141 
3142 static const struct seq_operations show_traces_seq_ops = {
3143 	.start		= t_start,
3144 	.next		= t_next,
3145 	.stop		= t_stop,
3146 	.show		= t_show,
3147 };
3148 
3149 static int show_traces_open(struct inode *inode, struct file *file)
3150 {
3151 	if (tracing_disabled)
3152 		return -ENODEV;
3153 
3154 	return seq_open(file, &show_traces_seq_ops);
3155 }
3156 
3157 static ssize_t
3158 tracing_write_stub(struct file *filp, const char __user *ubuf,
3159 		   size_t count, loff_t *ppos)
3160 {
3161 	return count;
3162 }
3163 
3164 static loff_t tracing_seek(struct file *file, loff_t offset, int origin)
3165 {
3166 	if (file->f_mode & FMODE_READ)
3167 		return seq_lseek(file, offset, origin);
3168 	else
3169 		return 0;
3170 }
3171 
3172 static const struct file_operations tracing_fops = {
3173 	.open		= tracing_open,
3174 	.read		= seq_read,
3175 	.write		= tracing_write_stub,
3176 	.llseek		= tracing_seek,
3177 	.release	= tracing_release,
3178 };
3179 
3180 static const struct file_operations show_traces_fops = {
3181 	.open		= show_traces_open,
3182 	.read		= seq_read,
3183 	.release	= seq_release,
3184 	.llseek		= seq_lseek,
3185 };
3186 
3187 /*
3188  * Only trace on a CPU if the bitmask is set:
3189  */
3190 static cpumask_var_t tracing_cpumask;
3191 
3192 /*
3193  * The tracer itself will not take this lock, but still we want
3194  * to provide a consistent cpumask to user-space:
3195  */
3196 static DEFINE_MUTEX(tracing_cpumask_update_lock);
3197 
3198 /*
3199  * Temporary storage for the character representation of the
3200  * CPU bitmask (and one more byte for the newline):
3201  */
3202 static char mask_str[NR_CPUS + 1];
3203 
3204 static ssize_t
3205 tracing_cpumask_read(struct file *filp, char __user *ubuf,
3206 		     size_t count, loff_t *ppos)
3207 {
3208 	int len;
3209 
3210 	mutex_lock(&tracing_cpumask_update_lock);
3211 
3212 	len = cpumask_scnprintf(mask_str, count, tracing_cpumask);
3213 	if (count - len < 2) {
3214 		count = -EINVAL;
3215 		goto out_err;
3216 	}
3217 	len += sprintf(mask_str + len, "\n");
3218 	count = simple_read_from_buffer(ubuf, count, ppos, mask_str, NR_CPUS+1);
3219 
3220 out_err:
3221 	mutex_unlock(&tracing_cpumask_update_lock);
3222 
3223 	return count;
3224 }
3225 
3226 static ssize_t
3227 tracing_cpumask_write(struct file *filp, const char __user *ubuf,
3228 		      size_t count, loff_t *ppos)
3229 {
3230 	struct trace_array *tr = filp->private_data;
3231 	cpumask_var_t tracing_cpumask_new;
3232 	int err, cpu;
3233 
3234 	if (!alloc_cpumask_var(&tracing_cpumask_new, GFP_KERNEL))
3235 		return -ENOMEM;
3236 
3237 	err = cpumask_parse_user(ubuf, count, tracing_cpumask_new);
3238 	if (err)
3239 		goto err_unlock;
3240 
3241 	mutex_lock(&tracing_cpumask_update_lock);
3242 
3243 	local_irq_disable();
3244 	arch_spin_lock(&ftrace_max_lock);
3245 	for_each_tracing_cpu(cpu) {
3246 		/*
3247 		 * Increase/decrease the disabled counter if we are
3248 		 * about to flip a bit in the cpumask:
3249 		 */
3250 		if (cpumask_test_cpu(cpu, tracing_cpumask) &&
3251 				!cpumask_test_cpu(cpu, tracing_cpumask_new)) {
3252 			atomic_inc(&per_cpu_ptr(tr->trace_buffer.data, cpu)->disabled);
3253 			ring_buffer_record_disable_cpu(tr->trace_buffer.buffer, cpu);
3254 		}
3255 		if (!cpumask_test_cpu(cpu, tracing_cpumask) &&
3256 				cpumask_test_cpu(cpu, tracing_cpumask_new)) {
3257 			atomic_dec(&per_cpu_ptr(tr->trace_buffer.data, cpu)->disabled);
3258 			ring_buffer_record_enable_cpu(tr->trace_buffer.buffer, cpu);
3259 		}
3260 	}
3261 	arch_spin_unlock(&ftrace_max_lock);
3262 	local_irq_enable();
3263 
3264 	cpumask_copy(tracing_cpumask, tracing_cpumask_new);
3265 
3266 	mutex_unlock(&tracing_cpumask_update_lock);
3267 	free_cpumask_var(tracing_cpumask_new);
3268 
3269 	return count;
3270 
3271 err_unlock:
3272 	free_cpumask_var(tracing_cpumask_new);
3273 
3274 	return err;
3275 }
3276 
3277 static const struct file_operations tracing_cpumask_fops = {
3278 	.open		= tracing_open_generic,
3279 	.read		= tracing_cpumask_read,
3280 	.write		= tracing_cpumask_write,
3281 	.llseek		= generic_file_llseek,
3282 };
3283 
3284 static int tracing_trace_options_show(struct seq_file *m, void *v)
3285 {
3286 	struct tracer_opt *trace_opts;
3287 	struct trace_array *tr = m->private;
3288 	u32 tracer_flags;
3289 	int i;
3290 
3291 	mutex_lock(&trace_types_lock);
3292 	tracer_flags = tr->current_trace->flags->val;
3293 	trace_opts = tr->current_trace->flags->opts;
3294 
3295 	for (i = 0; trace_options[i]; i++) {
3296 		if (trace_flags & (1 << i))
3297 			seq_printf(m, "%s\n", trace_options[i]);
3298 		else
3299 			seq_printf(m, "no%s\n", trace_options[i]);
3300 	}
3301 
3302 	for (i = 0; trace_opts[i].name; i++) {
3303 		if (tracer_flags & trace_opts[i].bit)
3304 			seq_printf(m, "%s\n", trace_opts[i].name);
3305 		else
3306 			seq_printf(m, "no%s\n", trace_opts[i].name);
3307 	}
3308 	mutex_unlock(&trace_types_lock);
3309 
3310 	return 0;
3311 }
3312 
3313 static int __set_tracer_option(struct tracer *trace,
3314 			       struct tracer_flags *tracer_flags,
3315 			       struct tracer_opt *opts, int neg)
3316 {
3317 	int ret;
3318 
3319 	ret = trace->set_flag(tracer_flags->val, opts->bit, !neg);
3320 	if (ret)
3321 		return ret;
3322 
3323 	if (neg)
3324 		tracer_flags->val &= ~opts->bit;
3325 	else
3326 		tracer_flags->val |= opts->bit;
3327 	return 0;
3328 }
3329 
3330 /* Try to assign a tracer specific option */
3331 static int set_tracer_option(struct tracer *trace, char *cmp, int neg)
3332 {
3333 	struct tracer_flags *tracer_flags = trace->flags;
3334 	struct tracer_opt *opts = NULL;
3335 	int i;
3336 
3337 	for (i = 0; tracer_flags->opts[i].name; i++) {
3338 		opts = &tracer_flags->opts[i];
3339 
3340 		if (strcmp(cmp, opts->name) == 0)
3341 			return __set_tracer_option(trace, trace->flags,
3342 						   opts, neg);
3343 	}
3344 
3345 	return -EINVAL;
3346 }
3347 
3348 /* Some tracers require overwrite to stay enabled */
3349 int trace_keep_overwrite(struct tracer *tracer, u32 mask, int set)
3350 {
3351 	if (tracer->enabled && (mask & TRACE_ITER_OVERWRITE) && !set)
3352 		return -1;
3353 
3354 	return 0;
3355 }
3356 
3357 int set_tracer_flag(struct trace_array *tr, unsigned int mask, int enabled)
3358 {
3359 	/* do nothing if flag is already set */
3360 	if (!!(trace_flags & mask) == !!enabled)
3361 		return 0;
3362 
3363 	/* Give the tracer a chance to approve the change */
3364 	if (tr->current_trace->flag_changed)
3365 		if (tr->current_trace->flag_changed(tr->current_trace, mask, !!enabled))
3366 			return -EINVAL;
3367 
3368 	if (enabled)
3369 		trace_flags |= mask;
3370 	else
3371 		trace_flags &= ~mask;
3372 
3373 	if (mask == TRACE_ITER_RECORD_CMD)
3374 		trace_event_enable_cmd_record(enabled);
3375 
3376 	if (mask == TRACE_ITER_OVERWRITE) {
3377 		ring_buffer_change_overwrite(tr->trace_buffer.buffer, enabled);
3378 #ifdef CONFIG_TRACER_MAX_TRACE
3379 		ring_buffer_change_overwrite(tr->max_buffer.buffer, enabled);
3380 #endif
3381 	}
3382 
3383 	if (mask == TRACE_ITER_PRINTK)
3384 		trace_printk_start_stop_comm(enabled);
3385 
3386 	return 0;
3387 }
3388 
3389 static int trace_set_options(struct trace_array *tr, char *option)
3390 {
3391 	char *cmp;
3392 	int neg = 0;
3393 	int ret = -ENODEV;
3394 	int i;
3395 
3396 	cmp = strstrip(option);
3397 
3398 	if (strncmp(cmp, "no", 2) == 0) {
3399 		neg = 1;
3400 		cmp += 2;
3401 	}
3402 
3403 	mutex_lock(&trace_types_lock);
3404 
3405 	for (i = 0; trace_options[i]; i++) {
3406 		if (strcmp(cmp, trace_options[i]) == 0) {
3407 			ret = set_tracer_flag(tr, 1 << i, !neg);
3408 			break;
3409 		}
3410 	}
3411 
3412 	/* If no option could be set, test the specific tracer options */
3413 	if (!trace_options[i])
3414 		ret = set_tracer_option(tr->current_trace, cmp, neg);
3415 
3416 	mutex_unlock(&trace_types_lock);
3417 
3418 	return ret;
3419 }
3420 
3421 static ssize_t
3422 tracing_trace_options_write(struct file *filp, const char __user *ubuf,
3423 			size_t cnt, loff_t *ppos)
3424 {
3425 	struct seq_file *m = filp->private_data;
3426 	struct trace_array *tr = m->private;
3427 	char buf[64];
3428 	int ret;
3429 
3430 	if (cnt >= sizeof(buf))
3431 		return -EINVAL;
3432 
3433 	if (copy_from_user(&buf, ubuf, cnt))
3434 		return -EFAULT;
3435 
3436 	buf[cnt] = 0;
3437 
3438 	ret = trace_set_options(tr, buf);
3439 	if (ret < 0)
3440 		return ret;
3441 
3442 	*ppos += cnt;
3443 
3444 	return cnt;
3445 }
3446 
3447 static int tracing_trace_options_open(struct inode *inode, struct file *file)
3448 {
3449 	struct trace_array *tr = inode->i_private;
3450 
3451 	if (tracing_disabled)
3452 		return -ENODEV;
3453 
3454 	if (trace_array_get(tr) < 0)
3455 		return -ENODEV;
3456 
3457 	return single_open(file, tracing_trace_options_show, inode->i_private);
3458 }
3459 
3460 static const struct file_operations tracing_iter_fops = {
3461 	.open		= tracing_trace_options_open,
3462 	.read		= seq_read,
3463 	.llseek		= seq_lseek,
3464 	.release	= tracing_single_release_tr,
3465 	.write		= tracing_trace_options_write,
3466 };
3467 
3468 static const char readme_msg[] =
3469 	"tracing mini-HOWTO:\n\n"
3470 	"# echo 0 > tracing_on : quick way to disable tracing\n"
3471 	"# echo 1 > tracing_on : quick way to re-enable tracing\n\n"
3472 	" Important files:\n"
3473 	"  trace\t\t\t- The static contents of the buffer\n"
3474 	"\t\t\t  To clear the buffer write into this file: echo > trace\n"
3475 	"  trace_pipe\t\t- A consuming read to see the contents of the buffer\n"
3476 	"  current_tracer\t- function and latency tracers\n"
3477 	"  available_tracers\t- list of configured tracers for current_tracer\n"
3478 	"  buffer_size_kb\t- view and modify size of per cpu buffer\n"
3479 	"  buffer_total_size_kb  - view total size of all cpu buffers\n\n"
3480 	"  trace_clock\t\t-change the clock used to order events\n"
3481 	"       local:   Per cpu clock but may not be synced across CPUs\n"
3482 	"      global:   Synced across CPUs but slows tracing down.\n"
3483 	"     counter:   Not a clock, but just an increment\n"
3484 	"      uptime:   Jiffy counter from time of boot\n"
3485 	"        perf:   Same clock that perf events use\n"
3486 #ifdef CONFIG_X86_64
3487 	"     x86-tsc:   TSC cycle counter\n"
3488 #endif
3489 	"\n  trace_marker\t\t- Writes into this file writes into the kernel buffer\n"
3490 	"  tracing_cpumask\t- Limit which CPUs to trace\n"
3491 	"  instances\t\t- Make sub-buffers with: mkdir instances/foo\n"
3492 	"\t\t\t  Remove sub-buffer with rmdir\n"
3493 	"  trace_options\t\t- Set format or modify how tracing happens\n"
3494 	"\t\t\t  Disable an option by adding a suffix 'no' to the option name\n"
3495 #ifdef CONFIG_DYNAMIC_FTRACE
3496 	"\n  available_filter_functions - list of functions that can be filtered on\n"
3497 	"  set_ftrace_filter\t- echo function name in here to only trace these functions\n"
3498 	"            accepts: func_full_name, *func_end, func_begin*, *func_middle*\n"
3499 	"            modules: Can select a group via module\n"
3500 	"             Format: :mod:<module-name>\n"
3501 	"             example: echo :mod:ext3 > set_ftrace_filter\n"
3502 	"            triggers: a command to perform when function is hit\n"
3503 	"              Format: <function>:<trigger>[:count]\n"
3504 	"             trigger: traceon, traceoff\n"
3505 	"                      enable_event:<system>:<event>\n"
3506 	"                      disable_event:<system>:<event>\n"
3507 #ifdef CONFIG_STACKTRACE
3508 	"                      stacktrace\n"
3509 #endif
3510 #ifdef CONFIG_TRACER_SNAPSHOT
3511 	"                      snapshot\n"
3512 #endif
3513 	"             example: echo do_fault:traceoff > set_ftrace_filter\n"
3514 	"                      echo do_trap:traceoff:3 > set_ftrace_filter\n"
3515 	"             The first one will disable tracing every time do_fault is hit\n"
3516 	"             The second will disable tracing at most 3 times when do_trap is hit\n"
3517 	"               The first time do trap is hit and it disables tracing, the counter\n"
3518 	"               will decrement to 2. If tracing is already disabled, the counter\n"
3519 	"               will not decrement. It only decrements when the trigger did work\n"
3520 	"             To remove trigger without count:\n"
3521 	"               echo '!<function>:<trigger> > set_ftrace_filter\n"
3522 	"             To remove trigger with a count:\n"
3523 	"               echo '!<function>:<trigger>:0 > set_ftrace_filter\n"
3524 	"  set_ftrace_notrace\t- echo function name in here to never trace.\n"
3525 	"            accepts: func_full_name, *func_end, func_begin*, *func_middle*\n"
3526 	"            modules: Can select a group via module command :mod:\n"
3527 	"            Does not accept triggers\n"
3528 #endif /* CONFIG_DYNAMIC_FTRACE */
3529 #ifdef CONFIG_FUNCTION_TRACER
3530 	"  set_ftrace_pid\t- Write pid(s) to only function trace those pids (function)\n"
3531 #endif
3532 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
3533 	"  set_graph_function\t- Trace the nested calls of a function (function_graph)\n"
3534 	"  max_graph_depth\t- Trace a limited depth of nested calls (0 is unlimited)\n"
3535 #endif
3536 #ifdef CONFIG_TRACER_SNAPSHOT
3537 	"\n  snapshot\t\t- Like 'trace' but shows the content of the static snapshot buffer\n"
3538 	"\t\t\t  Read the contents for more information\n"
3539 #endif
3540 #ifdef CONFIG_STACKTRACE
3541 	"  stack_trace\t\t- Shows the max stack trace when active\n"
3542 	"  stack_max_size\t- Shows current max stack size that was traced\n"
3543 	"\t\t\t  Write into this file to reset the max size (trigger a new trace)\n"
3544 #ifdef CONFIG_DYNAMIC_FTRACE
3545 	"  stack_trace_filter\t- Like set_ftrace_filter but limits what stack_trace traces\n"
3546 #endif
3547 #endif /* CONFIG_STACKTRACE */
3548 ;
3549 
3550 static ssize_t
3551 tracing_readme_read(struct file *filp, char __user *ubuf,
3552 		       size_t cnt, loff_t *ppos)
3553 {
3554 	return simple_read_from_buffer(ubuf, cnt, ppos,
3555 					readme_msg, strlen(readme_msg));
3556 }
3557 
3558 static const struct file_operations tracing_readme_fops = {
3559 	.open		= tracing_open_generic,
3560 	.read		= tracing_readme_read,
3561 	.llseek		= generic_file_llseek,
3562 };
3563 
3564 static ssize_t
3565 tracing_saved_cmdlines_read(struct file *file, char __user *ubuf,
3566 				size_t cnt, loff_t *ppos)
3567 {
3568 	char *buf_comm;
3569 	char *file_buf;
3570 	char *buf;
3571 	int len = 0;
3572 	int pid;
3573 	int i;
3574 
3575 	file_buf = kmalloc(SAVED_CMDLINES*(16+TASK_COMM_LEN), GFP_KERNEL);
3576 	if (!file_buf)
3577 		return -ENOMEM;
3578 
3579 	buf_comm = kmalloc(TASK_COMM_LEN, GFP_KERNEL);
3580 	if (!buf_comm) {
3581 		kfree(file_buf);
3582 		return -ENOMEM;
3583 	}
3584 
3585 	buf = file_buf;
3586 
3587 	for (i = 0; i < SAVED_CMDLINES; i++) {
3588 		int r;
3589 
3590 		pid = map_cmdline_to_pid[i];
3591 		if (pid == -1 || pid == NO_CMDLINE_MAP)
3592 			continue;
3593 
3594 		trace_find_cmdline(pid, buf_comm);
3595 		r = sprintf(buf, "%d %s\n", pid, buf_comm);
3596 		buf += r;
3597 		len += r;
3598 	}
3599 
3600 	len = simple_read_from_buffer(ubuf, cnt, ppos,
3601 				      file_buf, len);
3602 
3603 	kfree(file_buf);
3604 	kfree(buf_comm);
3605 
3606 	return len;
3607 }
3608 
3609 static const struct file_operations tracing_saved_cmdlines_fops = {
3610     .open       = tracing_open_generic,
3611     .read       = tracing_saved_cmdlines_read,
3612     .llseek	= generic_file_llseek,
3613 };
3614 
3615 static ssize_t
3616 tracing_set_trace_read(struct file *filp, char __user *ubuf,
3617 		       size_t cnt, loff_t *ppos)
3618 {
3619 	struct trace_array *tr = filp->private_data;
3620 	char buf[MAX_TRACER_SIZE+2];
3621 	int r;
3622 
3623 	mutex_lock(&trace_types_lock);
3624 	r = sprintf(buf, "%s\n", tr->current_trace->name);
3625 	mutex_unlock(&trace_types_lock);
3626 
3627 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
3628 }
3629 
3630 int tracer_init(struct tracer *t, struct trace_array *tr)
3631 {
3632 	tracing_reset_online_cpus(&tr->trace_buffer);
3633 	return t->init(tr);
3634 }
3635 
3636 static void set_buffer_entries(struct trace_buffer *buf, unsigned long val)
3637 {
3638 	int cpu;
3639 
3640 	for_each_tracing_cpu(cpu)
3641 		per_cpu_ptr(buf->data, cpu)->entries = val;
3642 }
3643 
3644 #ifdef CONFIG_TRACER_MAX_TRACE
3645 /* resize @tr's buffer to the size of @size_tr's entries */
3646 static int resize_buffer_duplicate_size(struct trace_buffer *trace_buf,
3647 					struct trace_buffer *size_buf, int cpu_id)
3648 {
3649 	int cpu, ret = 0;
3650 
3651 	if (cpu_id == RING_BUFFER_ALL_CPUS) {
3652 		for_each_tracing_cpu(cpu) {
3653 			ret = ring_buffer_resize(trace_buf->buffer,
3654 				 per_cpu_ptr(size_buf->data, cpu)->entries, cpu);
3655 			if (ret < 0)
3656 				break;
3657 			per_cpu_ptr(trace_buf->data, cpu)->entries =
3658 				per_cpu_ptr(size_buf->data, cpu)->entries;
3659 		}
3660 	} else {
3661 		ret = ring_buffer_resize(trace_buf->buffer,
3662 				 per_cpu_ptr(size_buf->data, cpu_id)->entries, cpu_id);
3663 		if (ret == 0)
3664 			per_cpu_ptr(trace_buf->data, cpu_id)->entries =
3665 				per_cpu_ptr(size_buf->data, cpu_id)->entries;
3666 	}
3667 
3668 	return ret;
3669 }
3670 #endif /* CONFIG_TRACER_MAX_TRACE */
3671 
3672 static int __tracing_resize_ring_buffer(struct trace_array *tr,
3673 					unsigned long size, int cpu)
3674 {
3675 	int ret;
3676 
3677 	/*
3678 	 * If kernel or user changes the size of the ring buffer
3679 	 * we use the size that was given, and we can forget about
3680 	 * expanding it later.
3681 	 */
3682 	ring_buffer_expanded = true;
3683 
3684 	/* May be called before buffers are initialized */
3685 	if (!tr->trace_buffer.buffer)
3686 		return 0;
3687 
3688 	ret = ring_buffer_resize(tr->trace_buffer.buffer, size, cpu);
3689 	if (ret < 0)
3690 		return ret;
3691 
3692 #ifdef CONFIG_TRACER_MAX_TRACE
3693 	if (!(tr->flags & TRACE_ARRAY_FL_GLOBAL) ||
3694 	    !tr->current_trace->use_max_tr)
3695 		goto out;
3696 
3697 	ret = ring_buffer_resize(tr->max_buffer.buffer, size, cpu);
3698 	if (ret < 0) {
3699 		int r = resize_buffer_duplicate_size(&tr->trace_buffer,
3700 						     &tr->trace_buffer, cpu);
3701 		if (r < 0) {
3702 			/*
3703 			 * AARGH! We are left with different
3704 			 * size max buffer!!!!
3705 			 * The max buffer is our "snapshot" buffer.
3706 			 * When a tracer needs a snapshot (one of the
3707 			 * latency tracers), it swaps the max buffer
3708 			 * with the saved snap shot. We succeeded to
3709 			 * update the size of the main buffer, but failed to
3710 			 * update the size of the max buffer. But when we tried
3711 			 * to reset the main buffer to the original size, we
3712 			 * failed there too. This is very unlikely to
3713 			 * happen, but if it does, warn and kill all
3714 			 * tracing.
3715 			 */
3716 			WARN_ON(1);
3717 			tracing_disabled = 1;
3718 		}
3719 		return ret;
3720 	}
3721 
3722 	if (cpu == RING_BUFFER_ALL_CPUS)
3723 		set_buffer_entries(&tr->max_buffer, size);
3724 	else
3725 		per_cpu_ptr(tr->max_buffer.data, cpu)->entries = size;
3726 
3727  out:
3728 #endif /* CONFIG_TRACER_MAX_TRACE */
3729 
3730 	if (cpu == RING_BUFFER_ALL_CPUS)
3731 		set_buffer_entries(&tr->trace_buffer, size);
3732 	else
3733 		per_cpu_ptr(tr->trace_buffer.data, cpu)->entries = size;
3734 
3735 	return ret;
3736 }
3737 
3738 static ssize_t tracing_resize_ring_buffer(struct trace_array *tr,
3739 					  unsigned long size, int cpu_id)
3740 {
3741 	int ret = size;
3742 
3743 	mutex_lock(&trace_types_lock);
3744 
3745 	if (cpu_id != RING_BUFFER_ALL_CPUS) {
3746 		/* make sure, this cpu is enabled in the mask */
3747 		if (!cpumask_test_cpu(cpu_id, tracing_buffer_mask)) {
3748 			ret = -EINVAL;
3749 			goto out;
3750 		}
3751 	}
3752 
3753 	ret = __tracing_resize_ring_buffer(tr, size, cpu_id);
3754 	if (ret < 0)
3755 		ret = -ENOMEM;
3756 
3757 out:
3758 	mutex_unlock(&trace_types_lock);
3759 
3760 	return ret;
3761 }
3762 
3763 
3764 /**
3765  * tracing_update_buffers - used by tracing facility to expand ring buffers
3766  *
3767  * To save on memory when the tracing is never used on a system with it
3768  * configured in. The ring buffers are set to a minimum size. But once
3769  * a user starts to use the tracing facility, then they need to grow
3770  * to their default size.
3771  *
3772  * This function is to be called when a tracer is about to be used.
3773  */
3774 int tracing_update_buffers(void)
3775 {
3776 	int ret = 0;
3777 
3778 	mutex_lock(&trace_types_lock);
3779 	if (!ring_buffer_expanded)
3780 		ret = __tracing_resize_ring_buffer(&global_trace, trace_buf_size,
3781 						RING_BUFFER_ALL_CPUS);
3782 	mutex_unlock(&trace_types_lock);
3783 
3784 	return ret;
3785 }
3786 
3787 struct trace_option_dentry;
3788 
3789 static struct trace_option_dentry *
3790 create_trace_option_files(struct trace_array *tr, struct tracer *tracer);
3791 
3792 static void
3793 destroy_trace_option_files(struct trace_option_dentry *topts);
3794 
3795 static int tracing_set_tracer(const char *buf)
3796 {
3797 	static struct trace_option_dentry *topts;
3798 	struct trace_array *tr = &global_trace;
3799 	struct tracer *t;
3800 #ifdef CONFIG_TRACER_MAX_TRACE
3801 	bool had_max_tr;
3802 #endif
3803 	int ret = 0;
3804 
3805 	mutex_lock(&trace_types_lock);
3806 
3807 	if (!ring_buffer_expanded) {
3808 		ret = __tracing_resize_ring_buffer(tr, trace_buf_size,
3809 						RING_BUFFER_ALL_CPUS);
3810 		if (ret < 0)
3811 			goto out;
3812 		ret = 0;
3813 	}
3814 
3815 	for (t = trace_types; t; t = t->next) {
3816 		if (strcmp(t->name, buf) == 0)
3817 			break;
3818 	}
3819 	if (!t) {
3820 		ret = -EINVAL;
3821 		goto out;
3822 	}
3823 	if (t == tr->current_trace)
3824 		goto out;
3825 
3826 	trace_branch_disable();
3827 
3828 	tr->current_trace->enabled = false;
3829 
3830 	if (tr->current_trace->reset)
3831 		tr->current_trace->reset(tr);
3832 
3833 	/* Current trace needs to be nop_trace before synchronize_sched */
3834 	tr->current_trace = &nop_trace;
3835 
3836 #ifdef CONFIG_TRACER_MAX_TRACE
3837 	had_max_tr = tr->allocated_snapshot;
3838 
3839 	if (had_max_tr && !t->use_max_tr) {
3840 		/*
3841 		 * We need to make sure that the update_max_tr sees that
3842 		 * current_trace changed to nop_trace to keep it from
3843 		 * swapping the buffers after we resize it.
3844 		 * The update_max_tr is called from interrupts disabled
3845 		 * so a synchronized_sched() is sufficient.
3846 		 */
3847 		synchronize_sched();
3848 		free_snapshot(tr);
3849 	}
3850 #endif
3851 	destroy_trace_option_files(topts);
3852 
3853 	topts = create_trace_option_files(tr, t);
3854 
3855 #ifdef CONFIG_TRACER_MAX_TRACE
3856 	if (t->use_max_tr && !had_max_tr) {
3857 		ret = alloc_snapshot(tr);
3858 		if (ret < 0)
3859 			goto out;
3860 	}
3861 #endif
3862 
3863 	if (t->init) {
3864 		ret = tracer_init(t, tr);
3865 		if (ret)
3866 			goto out;
3867 	}
3868 
3869 	tr->current_trace = t;
3870 	tr->current_trace->enabled = true;
3871 	trace_branch_enable(tr);
3872  out:
3873 	mutex_unlock(&trace_types_lock);
3874 
3875 	return ret;
3876 }
3877 
3878 static ssize_t
3879 tracing_set_trace_write(struct file *filp, const char __user *ubuf,
3880 			size_t cnt, loff_t *ppos)
3881 {
3882 	char buf[MAX_TRACER_SIZE+1];
3883 	int i;
3884 	size_t ret;
3885 	int err;
3886 
3887 	ret = cnt;
3888 
3889 	if (cnt > MAX_TRACER_SIZE)
3890 		cnt = MAX_TRACER_SIZE;
3891 
3892 	if (copy_from_user(&buf, ubuf, cnt))
3893 		return -EFAULT;
3894 
3895 	buf[cnt] = 0;
3896 
3897 	/* strip ending whitespace. */
3898 	for (i = cnt - 1; i > 0 && isspace(buf[i]); i--)
3899 		buf[i] = 0;
3900 
3901 	err = tracing_set_tracer(buf);
3902 	if (err)
3903 		return err;
3904 
3905 	*ppos += ret;
3906 
3907 	return ret;
3908 }
3909 
3910 static ssize_t
3911 tracing_max_lat_read(struct file *filp, char __user *ubuf,
3912 		     size_t cnt, loff_t *ppos)
3913 {
3914 	unsigned long *ptr = filp->private_data;
3915 	char buf[64];
3916 	int r;
3917 
3918 	r = snprintf(buf, sizeof(buf), "%ld\n",
3919 		     *ptr == (unsigned long)-1 ? -1 : nsecs_to_usecs(*ptr));
3920 	if (r > sizeof(buf))
3921 		r = sizeof(buf);
3922 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
3923 }
3924 
3925 static ssize_t
3926 tracing_max_lat_write(struct file *filp, const char __user *ubuf,
3927 		      size_t cnt, loff_t *ppos)
3928 {
3929 	unsigned long *ptr = filp->private_data;
3930 	unsigned long val;
3931 	int ret;
3932 
3933 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
3934 	if (ret)
3935 		return ret;
3936 
3937 	*ptr = val * 1000;
3938 
3939 	return cnt;
3940 }
3941 
3942 static int tracing_open_pipe(struct inode *inode, struct file *filp)
3943 {
3944 	struct trace_cpu *tc = inode->i_private;
3945 	struct trace_array *tr = tc->tr;
3946 	struct trace_iterator *iter;
3947 	int ret = 0;
3948 
3949 	if (tracing_disabled)
3950 		return -ENODEV;
3951 
3952 	if (trace_array_get(tr) < 0)
3953 		return -ENODEV;
3954 
3955 	mutex_lock(&trace_types_lock);
3956 
3957 	/* create a buffer to store the information to pass to userspace */
3958 	iter = kzalloc(sizeof(*iter), GFP_KERNEL);
3959 	if (!iter) {
3960 		ret = -ENOMEM;
3961 		goto out;
3962 	}
3963 
3964 	/*
3965 	 * We make a copy of the current tracer to avoid concurrent
3966 	 * changes on it while we are reading.
3967 	 */
3968 	iter->trace = kmalloc(sizeof(*iter->trace), GFP_KERNEL);
3969 	if (!iter->trace) {
3970 		ret = -ENOMEM;
3971 		goto fail;
3972 	}
3973 	*iter->trace = *tr->current_trace;
3974 
3975 	if (!alloc_cpumask_var(&iter->started, GFP_KERNEL)) {
3976 		ret = -ENOMEM;
3977 		goto fail;
3978 	}
3979 
3980 	/* trace pipe does not show start of buffer */
3981 	cpumask_setall(iter->started);
3982 
3983 	if (trace_flags & TRACE_ITER_LATENCY_FMT)
3984 		iter->iter_flags |= TRACE_FILE_LAT_FMT;
3985 
3986 	/* Output in nanoseconds only if we are using a clock in nanoseconds. */
3987 	if (trace_clocks[tr->clock_id].in_ns)
3988 		iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
3989 
3990 	iter->cpu_file = tc->cpu;
3991 	iter->tr = tc->tr;
3992 	iter->trace_buffer = &tc->tr->trace_buffer;
3993 	mutex_init(&iter->mutex);
3994 	filp->private_data = iter;
3995 
3996 	if (iter->trace->pipe_open)
3997 		iter->trace->pipe_open(iter);
3998 
3999 	nonseekable_open(inode, filp);
4000 out:
4001 	mutex_unlock(&trace_types_lock);
4002 	return ret;
4003 
4004 fail:
4005 	kfree(iter->trace);
4006 	kfree(iter);
4007 	__trace_array_put(tr);
4008 	mutex_unlock(&trace_types_lock);
4009 	return ret;
4010 }
4011 
4012 static int tracing_release_pipe(struct inode *inode, struct file *file)
4013 {
4014 	struct trace_iterator *iter = file->private_data;
4015 	struct trace_cpu *tc = inode->i_private;
4016 	struct trace_array *tr = tc->tr;
4017 
4018 	mutex_lock(&trace_types_lock);
4019 
4020 	if (iter->trace->pipe_close)
4021 		iter->trace->pipe_close(iter);
4022 
4023 	mutex_unlock(&trace_types_lock);
4024 
4025 	free_cpumask_var(iter->started);
4026 	mutex_destroy(&iter->mutex);
4027 	kfree(iter->trace);
4028 	kfree(iter);
4029 
4030 	trace_array_put(tr);
4031 
4032 	return 0;
4033 }
4034 
4035 static unsigned int
4036 trace_poll(struct trace_iterator *iter, struct file *filp, poll_table *poll_table)
4037 {
4038 	/* Iterators are static, they should be filled or empty */
4039 	if (trace_buffer_iter(iter, iter->cpu_file))
4040 		return POLLIN | POLLRDNORM;
4041 
4042 	if (trace_flags & TRACE_ITER_BLOCK)
4043 		/*
4044 		 * Always select as readable when in blocking mode
4045 		 */
4046 		return POLLIN | POLLRDNORM;
4047 	else
4048 		return ring_buffer_poll_wait(iter->trace_buffer->buffer, iter->cpu_file,
4049 					     filp, poll_table);
4050 }
4051 
4052 static unsigned int
4053 tracing_poll_pipe(struct file *filp, poll_table *poll_table)
4054 {
4055 	struct trace_iterator *iter = filp->private_data;
4056 
4057 	return trace_poll(iter, filp, poll_table);
4058 }
4059 
4060 /*
4061  * This is a make-shift waitqueue.
4062  * A tracer might use this callback on some rare cases:
4063  *
4064  *  1) the current tracer might hold the runqueue lock when it wakes up
4065  *     a reader, hence a deadlock (sched, function, and function graph tracers)
4066  *  2) the function tracers, trace all functions, we don't want
4067  *     the overhead of calling wake_up and friends
4068  *     (and tracing them too)
4069  *
4070  *     Anyway, this is really very primitive wakeup.
4071  */
4072 void poll_wait_pipe(struct trace_iterator *iter)
4073 {
4074 	set_current_state(TASK_INTERRUPTIBLE);
4075 	/* sleep for 100 msecs, and try again. */
4076 	schedule_timeout(HZ / 10);
4077 }
4078 
4079 /* Must be called with trace_types_lock mutex held. */
4080 static int tracing_wait_pipe(struct file *filp)
4081 {
4082 	struct trace_iterator *iter = filp->private_data;
4083 
4084 	while (trace_empty(iter)) {
4085 
4086 		if ((filp->f_flags & O_NONBLOCK)) {
4087 			return -EAGAIN;
4088 		}
4089 
4090 		mutex_unlock(&iter->mutex);
4091 
4092 		iter->trace->wait_pipe(iter);
4093 
4094 		mutex_lock(&iter->mutex);
4095 
4096 		if (signal_pending(current))
4097 			return -EINTR;
4098 
4099 		/*
4100 		 * We block until we read something and tracing is disabled.
4101 		 * We still block if tracing is disabled, but we have never
4102 		 * read anything. This allows a user to cat this file, and
4103 		 * then enable tracing. But after we have read something,
4104 		 * we give an EOF when tracing is again disabled.
4105 		 *
4106 		 * iter->pos will be 0 if we haven't read anything.
4107 		 */
4108 		if (!tracing_is_on() && iter->pos)
4109 			break;
4110 	}
4111 
4112 	return 1;
4113 }
4114 
4115 /*
4116  * Consumer reader.
4117  */
4118 static ssize_t
4119 tracing_read_pipe(struct file *filp, char __user *ubuf,
4120 		  size_t cnt, loff_t *ppos)
4121 {
4122 	struct trace_iterator *iter = filp->private_data;
4123 	struct trace_array *tr = iter->tr;
4124 	ssize_t sret;
4125 
4126 	/* return any leftover data */
4127 	sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
4128 	if (sret != -EBUSY)
4129 		return sret;
4130 
4131 	trace_seq_init(&iter->seq);
4132 
4133 	/* copy the tracer to avoid using a global lock all around */
4134 	mutex_lock(&trace_types_lock);
4135 	if (unlikely(iter->trace->name != tr->current_trace->name))
4136 		*iter->trace = *tr->current_trace;
4137 	mutex_unlock(&trace_types_lock);
4138 
4139 	/*
4140 	 * Avoid more than one consumer on a single file descriptor
4141 	 * This is just a matter of traces coherency, the ring buffer itself
4142 	 * is protected.
4143 	 */
4144 	mutex_lock(&iter->mutex);
4145 	if (iter->trace->read) {
4146 		sret = iter->trace->read(iter, filp, ubuf, cnt, ppos);
4147 		if (sret)
4148 			goto out;
4149 	}
4150 
4151 waitagain:
4152 	sret = tracing_wait_pipe(filp);
4153 	if (sret <= 0)
4154 		goto out;
4155 
4156 	/* stop when tracing is finished */
4157 	if (trace_empty(iter)) {
4158 		sret = 0;
4159 		goto out;
4160 	}
4161 
4162 	if (cnt >= PAGE_SIZE)
4163 		cnt = PAGE_SIZE - 1;
4164 
4165 	/* reset all but tr, trace, and overruns */
4166 	memset(&iter->seq, 0,
4167 	       sizeof(struct trace_iterator) -
4168 	       offsetof(struct trace_iterator, seq));
4169 	iter->pos = -1;
4170 
4171 	trace_event_read_lock();
4172 	trace_access_lock(iter->cpu_file);
4173 	while (trace_find_next_entry_inc(iter) != NULL) {
4174 		enum print_line_t ret;
4175 		int len = iter->seq.len;
4176 
4177 		ret = print_trace_line(iter);
4178 		if (ret == TRACE_TYPE_PARTIAL_LINE) {
4179 			/* don't print partial lines */
4180 			iter->seq.len = len;
4181 			break;
4182 		}
4183 		if (ret != TRACE_TYPE_NO_CONSUME)
4184 			trace_consume(iter);
4185 
4186 		if (iter->seq.len >= cnt)
4187 			break;
4188 
4189 		/*
4190 		 * Setting the full flag means we reached the trace_seq buffer
4191 		 * size and we should leave by partial output condition above.
4192 		 * One of the trace_seq_* functions is not used properly.
4193 		 */
4194 		WARN_ONCE(iter->seq.full, "full flag set for trace type %d",
4195 			  iter->ent->type);
4196 	}
4197 	trace_access_unlock(iter->cpu_file);
4198 	trace_event_read_unlock();
4199 
4200 	/* Now copy what we have to the user */
4201 	sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
4202 	if (iter->seq.readpos >= iter->seq.len)
4203 		trace_seq_init(&iter->seq);
4204 
4205 	/*
4206 	 * If there was nothing to send to user, in spite of consuming trace
4207 	 * entries, go back to wait for more entries.
4208 	 */
4209 	if (sret == -EBUSY)
4210 		goto waitagain;
4211 
4212 out:
4213 	mutex_unlock(&iter->mutex);
4214 
4215 	return sret;
4216 }
4217 
4218 static void tracing_pipe_buf_release(struct pipe_inode_info *pipe,
4219 				     struct pipe_buffer *buf)
4220 {
4221 	__free_page(buf->page);
4222 }
4223 
4224 static void tracing_spd_release_pipe(struct splice_pipe_desc *spd,
4225 				     unsigned int idx)
4226 {
4227 	__free_page(spd->pages[idx]);
4228 }
4229 
4230 static const struct pipe_buf_operations tracing_pipe_buf_ops = {
4231 	.can_merge		= 0,
4232 	.map			= generic_pipe_buf_map,
4233 	.unmap			= generic_pipe_buf_unmap,
4234 	.confirm		= generic_pipe_buf_confirm,
4235 	.release		= tracing_pipe_buf_release,
4236 	.steal			= generic_pipe_buf_steal,
4237 	.get			= generic_pipe_buf_get,
4238 };
4239 
4240 static size_t
4241 tracing_fill_pipe_page(size_t rem, struct trace_iterator *iter)
4242 {
4243 	size_t count;
4244 	int ret;
4245 
4246 	/* Seq buffer is page-sized, exactly what we need. */
4247 	for (;;) {
4248 		count = iter->seq.len;
4249 		ret = print_trace_line(iter);
4250 		count = iter->seq.len - count;
4251 		if (rem < count) {
4252 			rem = 0;
4253 			iter->seq.len -= count;
4254 			break;
4255 		}
4256 		if (ret == TRACE_TYPE_PARTIAL_LINE) {
4257 			iter->seq.len -= count;
4258 			break;
4259 		}
4260 
4261 		if (ret != TRACE_TYPE_NO_CONSUME)
4262 			trace_consume(iter);
4263 		rem -= count;
4264 		if (!trace_find_next_entry_inc(iter))	{
4265 			rem = 0;
4266 			iter->ent = NULL;
4267 			break;
4268 		}
4269 	}
4270 
4271 	return rem;
4272 }
4273 
4274 static ssize_t tracing_splice_read_pipe(struct file *filp,
4275 					loff_t *ppos,
4276 					struct pipe_inode_info *pipe,
4277 					size_t len,
4278 					unsigned int flags)
4279 {
4280 	struct page *pages_def[PIPE_DEF_BUFFERS];
4281 	struct partial_page partial_def[PIPE_DEF_BUFFERS];
4282 	struct trace_iterator *iter = filp->private_data;
4283 	struct splice_pipe_desc spd = {
4284 		.pages		= pages_def,
4285 		.partial	= partial_def,
4286 		.nr_pages	= 0, /* This gets updated below. */
4287 		.nr_pages_max	= PIPE_DEF_BUFFERS,
4288 		.flags		= flags,
4289 		.ops		= &tracing_pipe_buf_ops,
4290 		.spd_release	= tracing_spd_release_pipe,
4291 	};
4292 	struct trace_array *tr = iter->tr;
4293 	ssize_t ret;
4294 	size_t rem;
4295 	unsigned int i;
4296 
4297 	if (splice_grow_spd(pipe, &spd))
4298 		return -ENOMEM;
4299 
4300 	/* copy the tracer to avoid using a global lock all around */
4301 	mutex_lock(&trace_types_lock);
4302 	if (unlikely(iter->trace->name != tr->current_trace->name))
4303 		*iter->trace = *tr->current_trace;
4304 	mutex_unlock(&trace_types_lock);
4305 
4306 	mutex_lock(&iter->mutex);
4307 
4308 	if (iter->trace->splice_read) {
4309 		ret = iter->trace->splice_read(iter, filp,
4310 					       ppos, pipe, len, flags);
4311 		if (ret)
4312 			goto out_err;
4313 	}
4314 
4315 	ret = tracing_wait_pipe(filp);
4316 	if (ret <= 0)
4317 		goto out_err;
4318 
4319 	if (!iter->ent && !trace_find_next_entry_inc(iter)) {
4320 		ret = -EFAULT;
4321 		goto out_err;
4322 	}
4323 
4324 	trace_event_read_lock();
4325 	trace_access_lock(iter->cpu_file);
4326 
4327 	/* Fill as many pages as possible. */
4328 	for (i = 0, rem = len; i < pipe->buffers && rem; i++) {
4329 		spd.pages[i] = alloc_page(GFP_KERNEL);
4330 		if (!spd.pages[i])
4331 			break;
4332 
4333 		rem = tracing_fill_pipe_page(rem, iter);
4334 
4335 		/* Copy the data into the page, so we can start over. */
4336 		ret = trace_seq_to_buffer(&iter->seq,
4337 					  page_address(spd.pages[i]),
4338 					  iter->seq.len);
4339 		if (ret < 0) {
4340 			__free_page(spd.pages[i]);
4341 			break;
4342 		}
4343 		spd.partial[i].offset = 0;
4344 		spd.partial[i].len = iter->seq.len;
4345 
4346 		trace_seq_init(&iter->seq);
4347 	}
4348 
4349 	trace_access_unlock(iter->cpu_file);
4350 	trace_event_read_unlock();
4351 	mutex_unlock(&iter->mutex);
4352 
4353 	spd.nr_pages = i;
4354 
4355 	ret = splice_to_pipe(pipe, &spd);
4356 out:
4357 	splice_shrink_spd(&spd);
4358 	return ret;
4359 
4360 out_err:
4361 	mutex_unlock(&iter->mutex);
4362 	goto out;
4363 }
4364 
4365 static ssize_t
4366 tracing_entries_read(struct file *filp, char __user *ubuf,
4367 		     size_t cnt, loff_t *ppos)
4368 {
4369 	struct trace_cpu *tc = filp->private_data;
4370 	struct trace_array *tr = tc->tr;
4371 	char buf[64];
4372 	int r = 0;
4373 	ssize_t ret;
4374 
4375 	mutex_lock(&trace_types_lock);
4376 
4377 	if (tc->cpu == RING_BUFFER_ALL_CPUS) {
4378 		int cpu, buf_size_same;
4379 		unsigned long size;
4380 
4381 		size = 0;
4382 		buf_size_same = 1;
4383 		/* check if all cpu sizes are same */
4384 		for_each_tracing_cpu(cpu) {
4385 			/* fill in the size from first enabled cpu */
4386 			if (size == 0)
4387 				size = per_cpu_ptr(tr->trace_buffer.data, cpu)->entries;
4388 			if (size != per_cpu_ptr(tr->trace_buffer.data, cpu)->entries) {
4389 				buf_size_same = 0;
4390 				break;
4391 			}
4392 		}
4393 
4394 		if (buf_size_same) {
4395 			if (!ring_buffer_expanded)
4396 				r = sprintf(buf, "%lu (expanded: %lu)\n",
4397 					    size >> 10,
4398 					    trace_buf_size >> 10);
4399 			else
4400 				r = sprintf(buf, "%lu\n", size >> 10);
4401 		} else
4402 			r = sprintf(buf, "X\n");
4403 	} else
4404 		r = sprintf(buf, "%lu\n", per_cpu_ptr(tr->trace_buffer.data, tc->cpu)->entries >> 10);
4405 
4406 	mutex_unlock(&trace_types_lock);
4407 
4408 	ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
4409 	return ret;
4410 }
4411 
4412 static ssize_t
4413 tracing_entries_write(struct file *filp, const char __user *ubuf,
4414 		      size_t cnt, loff_t *ppos)
4415 {
4416 	struct trace_cpu *tc = filp->private_data;
4417 	unsigned long val;
4418 	int ret;
4419 
4420 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
4421 	if (ret)
4422 		return ret;
4423 
4424 	/* must have at least 1 entry */
4425 	if (!val)
4426 		return -EINVAL;
4427 
4428 	/* value is in KB */
4429 	val <<= 10;
4430 
4431 	ret = tracing_resize_ring_buffer(tc->tr, val, tc->cpu);
4432 	if (ret < 0)
4433 		return ret;
4434 
4435 	*ppos += cnt;
4436 
4437 	return cnt;
4438 }
4439 
4440 static ssize_t
4441 tracing_total_entries_read(struct file *filp, char __user *ubuf,
4442 				size_t cnt, loff_t *ppos)
4443 {
4444 	struct trace_array *tr = filp->private_data;
4445 	char buf[64];
4446 	int r, cpu;
4447 	unsigned long size = 0, expanded_size = 0;
4448 
4449 	mutex_lock(&trace_types_lock);
4450 	for_each_tracing_cpu(cpu) {
4451 		size += per_cpu_ptr(tr->trace_buffer.data, cpu)->entries >> 10;
4452 		if (!ring_buffer_expanded)
4453 			expanded_size += trace_buf_size >> 10;
4454 	}
4455 	if (ring_buffer_expanded)
4456 		r = sprintf(buf, "%lu\n", size);
4457 	else
4458 		r = sprintf(buf, "%lu (expanded: %lu)\n", size, expanded_size);
4459 	mutex_unlock(&trace_types_lock);
4460 
4461 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
4462 }
4463 
4464 static ssize_t
4465 tracing_free_buffer_write(struct file *filp, const char __user *ubuf,
4466 			  size_t cnt, loff_t *ppos)
4467 {
4468 	/*
4469 	 * There is no need to read what the user has written, this function
4470 	 * is just to make sure that there is no error when "echo" is used
4471 	 */
4472 
4473 	*ppos += cnt;
4474 
4475 	return cnt;
4476 }
4477 
4478 static int
4479 tracing_free_buffer_release(struct inode *inode, struct file *filp)
4480 {
4481 	struct trace_array *tr = inode->i_private;
4482 
4483 	/* disable tracing ? */
4484 	if (trace_flags & TRACE_ITER_STOP_ON_FREE)
4485 		tracing_off();
4486 	/* resize the ring buffer to 0 */
4487 	tracing_resize_ring_buffer(tr, 0, RING_BUFFER_ALL_CPUS);
4488 
4489 	trace_array_put(tr);
4490 
4491 	return 0;
4492 }
4493 
4494 static ssize_t
4495 tracing_mark_write(struct file *filp, const char __user *ubuf,
4496 					size_t cnt, loff_t *fpos)
4497 {
4498 	unsigned long addr = (unsigned long)ubuf;
4499 	struct trace_array *tr = filp->private_data;
4500 	struct ring_buffer_event *event;
4501 	struct ring_buffer *buffer;
4502 	struct print_entry *entry;
4503 	unsigned long irq_flags;
4504 	struct page *pages[2];
4505 	void *map_page[2];
4506 	int nr_pages = 1;
4507 	ssize_t written;
4508 	int offset;
4509 	int size;
4510 	int len;
4511 	int ret;
4512 	int i;
4513 
4514 	if (tracing_disabled)
4515 		return -EINVAL;
4516 
4517 	if (!(trace_flags & TRACE_ITER_MARKERS))
4518 		return -EINVAL;
4519 
4520 	if (cnt > TRACE_BUF_SIZE)
4521 		cnt = TRACE_BUF_SIZE;
4522 
4523 	/*
4524 	 * Userspace is injecting traces into the kernel trace buffer.
4525 	 * We want to be as non intrusive as possible.
4526 	 * To do so, we do not want to allocate any special buffers
4527 	 * or take any locks, but instead write the userspace data
4528 	 * straight into the ring buffer.
4529 	 *
4530 	 * First we need to pin the userspace buffer into memory,
4531 	 * which, most likely it is, because it just referenced it.
4532 	 * But there's no guarantee that it is. By using get_user_pages_fast()
4533 	 * and kmap_atomic/kunmap_atomic() we can get access to the
4534 	 * pages directly. We then write the data directly into the
4535 	 * ring buffer.
4536 	 */
4537 	BUILD_BUG_ON(TRACE_BUF_SIZE >= PAGE_SIZE);
4538 
4539 	/* check if we cross pages */
4540 	if ((addr & PAGE_MASK) != ((addr + cnt) & PAGE_MASK))
4541 		nr_pages = 2;
4542 
4543 	offset = addr & (PAGE_SIZE - 1);
4544 	addr &= PAGE_MASK;
4545 
4546 	ret = get_user_pages_fast(addr, nr_pages, 0, pages);
4547 	if (ret < nr_pages) {
4548 		while (--ret >= 0)
4549 			put_page(pages[ret]);
4550 		written = -EFAULT;
4551 		goto out;
4552 	}
4553 
4554 	for (i = 0; i < nr_pages; i++)
4555 		map_page[i] = kmap_atomic(pages[i]);
4556 
4557 	local_save_flags(irq_flags);
4558 	size = sizeof(*entry) + cnt + 2; /* possible \n added */
4559 	buffer = tr->trace_buffer.buffer;
4560 	event = trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
4561 					  irq_flags, preempt_count());
4562 	if (!event) {
4563 		/* Ring buffer disabled, return as if not open for write */
4564 		written = -EBADF;
4565 		goto out_unlock;
4566 	}
4567 
4568 	entry = ring_buffer_event_data(event);
4569 	entry->ip = _THIS_IP_;
4570 
4571 	if (nr_pages == 2) {
4572 		len = PAGE_SIZE - offset;
4573 		memcpy(&entry->buf, map_page[0] + offset, len);
4574 		memcpy(&entry->buf[len], map_page[1], cnt - len);
4575 	} else
4576 		memcpy(&entry->buf, map_page[0] + offset, cnt);
4577 
4578 	if (entry->buf[cnt - 1] != '\n') {
4579 		entry->buf[cnt] = '\n';
4580 		entry->buf[cnt + 1] = '\0';
4581 	} else
4582 		entry->buf[cnt] = '\0';
4583 
4584 	__buffer_unlock_commit(buffer, event);
4585 
4586 	written = cnt;
4587 
4588 	*fpos += written;
4589 
4590  out_unlock:
4591 	for (i = 0; i < nr_pages; i++){
4592 		kunmap_atomic(map_page[i]);
4593 		put_page(pages[i]);
4594 	}
4595  out:
4596 	return written;
4597 }
4598 
4599 static int tracing_clock_show(struct seq_file *m, void *v)
4600 {
4601 	struct trace_array *tr = m->private;
4602 	int i;
4603 
4604 	for (i = 0; i < ARRAY_SIZE(trace_clocks); i++)
4605 		seq_printf(m,
4606 			"%s%s%s%s", i ? " " : "",
4607 			i == tr->clock_id ? "[" : "", trace_clocks[i].name,
4608 			i == tr->clock_id ? "]" : "");
4609 	seq_putc(m, '\n');
4610 
4611 	return 0;
4612 }
4613 
4614 static ssize_t tracing_clock_write(struct file *filp, const char __user *ubuf,
4615 				   size_t cnt, loff_t *fpos)
4616 {
4617 	struct seq_file *m = filp->private_data;
4618 	struct trace_array *tr = m->private;
4619 	char buf[64];
4620 	const char *clockstr;
4621 	int i;
4622 
4623 	if (cnt >= sizeof(buf))
4624 		return -EINVAL;
4625 
4626 	if (copy_from_user(&buf, ubuf, cnt))
4627 		return -EFAULT;
4628 
4629 	buf[cnt] = 0;
4630 
4631 	clockstr = strstrip(buf);
4632 
4633 	for (i = 0; i < ARRAY_SIZE(trace_clocks); i++) {
4634 		if (strcmp(trace_clocks[i].name, clockstr) == 0)
4635 			break;
4636 	}
4637 	if (i == ARRAY_SIZE(trace_clocks))
4638 		return -EINVAL;
4639 
4640 	mutex_lock(&trace_types_lock);
4641 
4642 	tr->clock_id = i;
4643 
4644 	ring_buffer_set_clock(tr->trace_buffer.buffer, trace_clocks[i].func);
4645 
4646 	/*
4647 	 * New clock may not be consistent with the previous clock.
4648 	 * Reset the buffer so that it doesn't have incomparable timestamps.
4649 	 */
4650 	tracing_reset_online_cpus(&global_trace.trace_buffer);
4651 
4652 #ifdef CONFIG_TRACER_MAX_TRACE
4653 	if (tr->flags & TRACE_ARRAY_FL_GLOBAL && tr->max_buffer.buffer)
4654 		ring_buffer_set_clock(tr->max_buffer.buffer, trace_clocks[i].func);
4655 	tracing_reset_online_cpus(&global_trace.max_buffer);
4656 #endif
4657 
4658 	mutex_unlock(&trace_types_lock);
4659 
4660 	*fpos += cnt;
4661 
4662 	return cnt;
4663 }
4664 
4665 static int tracing_clock_open(struct inode *inode, struct file *file)
4666 {
4667 	struct trace_array *tr = inode->i_private;
4668 	int ret;
4669 
4670 	if (tracing_disabled)
4671 		return -ENODEV;
4672 
4673 	if (trace_array_get(tr))
4674 		return -ENODEV;
4675 
4676 	ret = single_open(file, tracing_clock_show, inode->i_private);
4677 	if (ret < 0)
4678 		trace_array_put(tr);
4679 
4680 	return ret;
4681 }
4682 
4683 struct ftrace_buffer_info {
4684 	struct trace_iterator	iter;
4685 	void			*spare;
4686 	unsigned int		read;
4687 };
4688 
4689 #ifdef CONFIG_TRACER_SNAPSHOT
4690 static int tracing_snapshot_open(struct inode *inode, struct file *file)
4691 {
4692 	struct trace_cpu *tc = inode->i_private;
4693 	struct trace_array *tr = tc->tr;
4694 	struct trace_iterator *iter;
4695 	struct seq_file *m;
4696 	int ret = 0;
4697 
4698 	if (trace_array_get(tr) < 0)
4699 		return -ENODEV;
4700 
4701 	if (file->f_mode & FMODE_READ) {
4702 		iter = __tracing_open(tr, tc, inode, file, true);
4703 		if (IS_ERR(iter))
4704 			ret = PTR_ERR(iter);
4705 	} else {
4706 		/* Writes still need the seq_file to hold the private data */
4707 		m = kzalloc(sizeof(*m), GFP_KERNEL);
4708 		if (!m)
4709 			return -ENOMEM;
4710 		iter = kzalloc(sizeof(*iter), GFP_KERNEL);
4711 		if (!iter) {
4712 			kfree(m);
4713 			return -ENOMEM;
4714 		}
4715 		iter->tr = tr;
4716 		iter->trace_buffer = &tc->tr->max_buffer;
4717 		iter->cpu_file = tc->cpu;
4718 		m->private = iter;
4719 		file->private_data = m;
4720 	}
4721 
4722 	if (ret < 0)
4723 		trace_array_put(tr);
4724 
4725 	return ret;
4726 }
4727 
4728 static ssize_t
4729 tracing_snapshot_write(struct file *filp, const char __user *ubuf, size_t cnt,
4730 		       loff_t *ppos)
4731 {
4732 	struct seq_file *m = filp->private_data;
4733 	struct trace_iterator *iter = m->private;
4734 	struct trace_array *tr = iter->tr;
4735 	unsigned long val;
4736 	int ret;
4737 
4738 	ret = tracing_update_buffers();
4739 	if (ret < 0)
4740 		return ret;
4741 
4742 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
4743 	if (ret)
4744 		return ret;
4745 
4746 	mutex_lock(&trace_types_lock);
4747 
4748 	if (tr->current_trace->use_max_tr) {
4749 		ret = -EBUSY;
4750 		goto out;
4751 	}
4752 
4753 	switch (val) {
4754 	case 0:
4755 		if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
4756 			ret = -EINVAL;
4757 			break;
4758 		}
4759 		if (tr->allocated_snapshot)
4760 			free_snapshot(tr);
4761 		break;
4762 	case 1:
4763 /* Only allow per-cpu swap if the ring buffer supports it */
4764 #ifndef CONFIG_RING_BUFFER_ALLOW_SWAP
4765 		if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
4766 			ret = -EINVAL;
4767 			break;
4768 		}
4769 #endif
4770 		if (!tr->allocated_snapshot) {
4771 			ret = alloc_snapshot(tr);
4772 			if (ret < 0)
4773 				break;
4774 		}
4775 		local_irq_disable();
4776 		/* Now, we're going to swap */
4777 		if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
4778 			update_max_tr(tr, current, smp_processor_id());
4779 		else
4780 			update_max_tr_single(tr, current, iter->cpu_file);
4781 		local_irq_enable();
4782 		break;
4783 	default:
4784 		if (tr->allocated_snapshot) {
4785 			if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
4786 				tracing_reset_online_cpus(&tr->max_buffer);
4787 			else
4788 				tracing_reset(&tr->max_buffer, iter->cpu_file);
4789 		}
4790 		break;
4791 	}
4792 
4793 	if (ret >= 0) {
4794 		*ppos += cnt;
4795 		ret = cnt;
4796 	}
4797 out:
4798 	mutex_unlock(&trace_types_lock);
4799 	return ret;
4800 }
4801 
4802 static int tracing_snapshot_release(struct inode *inode, struct file *file)
4803 {
4804 	struct seq_file *m = file->private_data;
4805 	int ret;
4806 
4807 	ret = tracing_release(inode, file);
4808 
4809 	if (file->f_mode & FMODE_READ)
4810 		return ret;
4811 
4812 	/* If write only, the seq_file is just a stub */
4813 	if (m)
4814 		kfree(m->private);
4815 	kfree(m);
4816 
4817 	return 0;
4818 }
4819 
4820 static int tracing_buffers_open(struct inode *inode, struct file *filp);
4821 static ssize_t tracing_buffers_read(struct file *filp, char __user *ubuf,
4822 				    size_t count, loff_t *ppos);
4823 static int tracing_buffers_release(struct inode *inode, struct file *file);
4824 static ssize_t tracing_buffers_splice_read(struct file *file, loff_t *ppos,
4825 		   struct pipe_inode_info *pipe, size_t len, unsigned int flags);
4826 
4827 static int snapshot_raw_open(struct inode *inode, struct file *filp)
4828 {
4829 	struct ftrace_buffer_info *info;
4830 	int ret;
4831 
4832 	ret = tracing_buffers_open(inode, filp);
4833 	if (ret < 0)
4834 		return ret;
4835 
4836 	info = filp->private_data;
4837 
4838 	if (info->iter.trace->use_max_tr) {
4839 		tracing_buffers_release(inode, filp);
4840 		return -EBUSY;
4841 	}
4842 
4843 	info->iter.snapshot = true;
4844 	info->iter.trace_buffer = &info->iter.tr->max_buffer;
4845 
4846 	return ret;
4847 }
4848 
4849 #endif /* CONFIG_TRACER_SNAPSHOT */
4850 
4851 
4852 static const struct file_operations tracing_max_lat_fops = {
4853 	.open		= tracing_open_generic,
4854 	.read		= tracing_max_lat_read,
4855 	.write		= tracing_max_lat_write,
4856 	.llseek		= generic_file_llseek,
4857 };
4858 
4859 static const struct file_operations set_tracer_fops = {
4860 	.open		= tracing_open_generic,
4861 	.read		= tracing_set_trace_read,
4862 	.write		= tracing_set_trace_write,
4863 	.llseek		= generic_file_llseek,
4864 };
4865 
4866 static const struct file_operations tracing_pipe_fops = {
4867 	.open		= tracing_open_pipe,
4868 	.poll		= tracing_poll_pipe,
4869 	.read		= tracing_read_pipe,
4870 	.splice_read	= tracing_splice_read_pipe,
4871 	.release	= tracing_release_pipe,
4872 	.llseek		= no_llseek,
4873 };
4874 
4875 static const struct file_operations tracing_entries_fops = {
4876 	.open		= tracing_open_generic_tc,
4877 	.read		= tracing_entries_read,
4878 	.write		= tracing_entries_write,
4879 	.llseek		= generic_file_llseek,
4880 	.release	= tracing_release_generic_tc,
4881 };
4882 
4883 static const struct file_operations tracing_total_entries_fops = {
4884 	.open		= tracing_open_generic_tr,
4885 	.read		= tracing_total_entries_read,
4886 	.llseek		= generic_file_llseek,
4887 	.release	= tracing_release_generic_tr,
4888 };
4889 
4890 static const struct file_operations tracing_free_buffer_fops = {
4891 	.open		= tracing_open_generic_tr,
4892 	.write		= tracing_free_buffer_write,
4893 	.release	= tracing_free_buffer_release,
4894 };
4895 
4896 static const struct file_operations tracing_mark_fops = {
4897 	.open		= tracing_open_generic_tr,
4898 	.write		= tracing_mark_write,
4899 	.llseek		= generic_file_llseek,
4900 	.release	= tracing_release_generic_tr,
4901 };
4902 
4903 static const struct file_operations trace_clock_fops = {
4904 	.open		= tracing_clock_open,
4905 	.read		= seq_read,
4906 	.llseek		= seq_lseek,
4907 	.release	= tracing_single_release_tr,
4908 	.write		= tracing_clock_write,
4909 };
4910 
4911 #ifdef CONFIG_TRACER_SNAPSHOT
4912 static const struct file_operations snapshot_fops = {
4913 	.open		= tracing_snapshot_open,
4914 	.read		= seq_read,
4915 	.write		= tracing_snapshot_write,
4916 	.llseek		= tracing_seek,
4917 	.release	= tracing_snapshot_release,
4918 };
4919 
4920 static const struct file_operations snapshot_raw_fops = {
4921 	.open		= snapshot_raw_open,
4922 	.read		= tracing_buffers_read,
4923 	.release	= tracing_buffers_release,
4924 	.splice_read	= tracing_buffers_splice_read,
4925 	.llseek		= no_llseek,
4926 };
4927 
4928 #endif /* CONFIG_TRACER_SNAPSHOT */
4929 
4930 static int tracing_buffers_open(struct inode *inode, struct file *filp)
4931 {
4932 	struct trace_cpu *tc = inode->i_private;
4933 	struct trace_array *tr = tc->tr;
4934 	struct ftrace_buffer_info *info;
4935 	int ret;
4936 
4937 	if (tracing_disabled)
4938 		return -ENODEV;
4939 
4940 	if (trace_array_get(tr) < 0)
4941 		return -ENODEV;
4942 
4943 	info = kzalloc(sizeof(*info), GFP_KERNEL);
4944 	if (!info) {
4945 		trace_array_put(tr);
4946 		return -ENOMEM;
4947 	}
4948 
4949 	mutex_lock(&trace_types_lock);
4950 
4951 	tr->ref++;
4952 
4953 	info->iter.tr		= tr;
4954 	info->iter.cpu_file	= tc->cpu;
4955 	info->iter.trace	= tr->current_trace;
4956 	info->iter.trace_buffer = &tr->trace_buffer;
4957 	info->spare		= NULL;
4958 	/* Force reading ring buffer for first read */
4959 	info->read		= (unsigned int)-1;
4960 
4961 	filp->private_data = info;
4962 
4963 	mutex_unlock(&trace_types_lock);
4964 
4965 	ret = nonseekable_open(inode, filp);
4966 	if (ret < 0)
4967 		trace_array_put(tr);
4968 
4969 	return ret;
4970 }
4971 
4972 static unsigned int
4973 tracing_buffers_poll(struct file *filp, poll_table *poll_table)
4974 {
4975 	struct ftrace_buffer_info *info = filp->private_data;
4976 	struct trace_iterator *iter = &info->iter;
4977 
4978 	return trace_poll(iter, filp, poll_table);
4979 }
4980 
4981 static ssize_t
4982 tracing_buffers_read(struct file *filp, char __user *ubuf,
4983 		     size_t count, loff_t *ppos)
4984 {
4985 	struct ftrace_buffer_info *info = filp->private_data;
4986 	struct trace_iterator *iter = &info->iter;
4987 	ssize_t ret;
4988 	ssize_t size;
4989 
4990 	if (!count)
4991 		return 0;
4992 
4993 	mutex_lock(&trace_types_lock);
4994 
4995 #ifdef CONFIG_TRACER_MAX_TRACE
4996 	if (iter->snapshot && iter->tr->current_trace->use_max_tr) {
4997 		size = -EBUSY;
4998 		goto out_unlock;
4999 	}
5000 #endif
5001 
5002 	if (!info->spare)
5003 		info->spare = ring_buffer_alloc_read_page(iter->trace_buffer->buffer,
5004 							  iter->cpu_file);
5005 	size = -ENOMEM;
5006 	if (!info->spare)
5007 		goto out_unlock;
5008 
5009 	/* Do we have previous read data to read? */
5010 	if (info->read < PAGE_SIZE)
5011 		goto read;
5012 
5013  again:
5014 	trace_access_lock(iter->cpu_file);
5015 	ret = ring_buffer_read_page(iter->trace_buffer->buffer,
5016 				    &info->spare,
5017 				    count,
5018 				    iter->cpu_file, 0);
5019 	trace_access_unlock(iter->cpu_file);
5020 
5021 	if (ret < 0) {
5022 		if (trace_empty(iter)) {
5023 			if ((filp->f_flags & O_NONBLOCK)) {
5024 				size = -EAGAIN;
5025 				goto out_unlock;
5026 			}
5027 			mutex_unlock(&trace_types_lock);
5028 			iter->trace->wait_pipe(iter);
5029 			mutex_lock(&trace_types_lock);
5030 			if (signal_pending(current)) {
5031 				size = -EINTR;
5032 				goto out_unlock;
5033 			}
5034 			goto again;
5035 		}
5036 		size = 0;
5037 		goto out_unlock;
5038 	}
5039 
5040 	info->read = 0;
5041  read:
5042 	size = PAGE_SIZE - info->read;
5043 	if (size > count)
5044 		size = count;
5045 
5046 	ret = copy_to_user(ubuf, info->spare + info->read, size);
5047 	if (ret == size) {
5048 		size = -EFAULT;
5049 		goto out_unlock;
5050 	}
5051 	size -= ret;
5052 
5053 	*ppos += size;
5054 	info->read += size;
5055 
5056  out_unlock:
5057 	mutex_unlock(&trace_types_lock);
5058 
5059 	return size;
5060 }
5061 
5062 static int tracing_buffers_release(struct inode *inode, struct file *file)
5063 {
5064 	struct ftrace_buffer_info *info = file->private_data;
5065 	struct trace_iterator *iter = &info->iter;
5066 
5067 	mutex_lock(&trace_types_lock);
5068 
5069 	__trace_array_put(iter->tr);
5070 
5071 	if (info->spare)
5072 		ring_buffer_free_read_page(iter->trace_buffer->buffer, info->spare);
5073 	kfree(info);
5074 
5075 	mutex_unlock(&trace_types_lock);
5076 
5077 	return 0;
5078 }
5079 
5080 struct buffer_ref {
5081 	struct ring_buffer	*buffer;
5082 	void			*page;
5083 	int			ref;
5084 };
5085 
5086 static void buffer_pipe_buf_release(struct pipe_inode_info *pipe,
5087 				    struct pipe_buffer *buf)
5088 {
5089 	struct buffer_ref *ref = (struct buffer_ref *)buf->private;
5090 
5091 	if (--ref->ref)
5092 		return;
5093 
5094 	ring_buffer_free_read_page(ref->buffer, ref->page);
5095 	kfree(ref);
5096 	buf->private = 0;
5097 }
5098 
5099 static void buffer_pipe_buf_get(struct pipe_inode_info *pipe,
5100 				struct pipe_buffer *buf)
5101 {
5102 	struct buffer_ref *ref = (struct buffer_ref *)buf->private;
5103 
5104 	ref->ref++;
5105 }
5106 
5107 /* Pipe buffer operations for a buffer. */
5108 static const struct pipe_buf_operations buffer_pipe_buf_ops = {
5109 	.can_merge		= 0,
5110 	.map			= generic_pipe_buf_map,
5111 	.unmap			= generic_pipe_buf_unmap,
5112 	.confirm		= generic_pipe_buf_confirm,
5113 	.release		= buffer_pipe_buf_release,
5114 	.steal			= generic_pipe_buf_steal,
5115 	.get			= buffer_pipe_buf_get,
5116 };
5117 
5118 /*
5119  * Callback from splice_to_pipe(), if we need to release some pages
5120  * at the end of the spd in case we error'ed out in filling the pipe.
5121  */
5122 static void buffer_spd_release(struct splice_pipe_desc *spd, unsigned int i)
5123 {
5124 	struct buffer_ref *ref =
5125 		(struct buffer_ref *)spd->partial[i].private;
5126 
5127 	if (--ref->ref)
5128 		return;
5129 
5130 	ring_buffer_free_read_page(ref->buffer, ref->page);
5131 	kfree(ref);
5132 	spd->partial[i].private = 0;
5133 }
5134 
5135 static ssize_t
5136 tracing_buffers_splice_read(struct file *file, loff_t *ppos,
5137 			    struct pipe_inode_info *pipe, size_t len,
5138 			    unsigned int flags)
5139 {
5140 	struct ftrace_buffer_info *info = file->private_data;
5141 	struct trace_iterator *iter = &info->iter;
5142 	struct partial_page partial_def[PIPE_DEF_BUFFERS];
5143 	struct page *pages_def[PIPE_DEF_BUFFERS];
5144 	struct splice_pipe_desc spd = {
5145 		.pages		= pages_def,
5146 		.partial	= partial_def,
5147 		.nr_pages_max	= PIPE_DEF_BUFFERS,
5148 		.flags		= flags,
5149 		.ops		= &buffer_pipe_buf_ops,
5150 		.spd_release	= buffer_spd_release,
5151 	};
5152 	struct buffer_ref *ref;
5153 	int entries, size, i;
5154 	ssize_t ret;
5155 
5156 	mutex_lock(&trace_types_lock);
5157 
5158 #ifdef CONFIG_TRACER_MAX_TRACE
5159 	if (iter->snapshot && iter->tr->current_trace->use_max_tr) {
5160 		ret = -EBUSY;
5161 		goto out;
5162 	}
5163 #endif
5164 
5165 	if (splice_grow_spd(pipe, &spd)) {
5166 		ret = -ENOMEM;
5167 		goto out;
5168 	}
5169 
5170 	if (*ppos & (PAGE_SIZE - 1)) {
5171 		ret = -EINVAL;
5172 		goto out;
5173 	}
5174 
5175 	if (len & (PAGE_SIZE - 1)) {
5176 		if (len < PAGE_SIZE) {
5177 			ret = -EINVAL;
5178 			goto out;
5179 		}
5180 		len &= PAGE_MASK;
5181 	}
5182 
5183  again:
5184 	trace_access_lock(iter->cpu_file);
5185 	entries = ring_buffer_entries_cpu(iter->trace_buffer->buffer, iter->cpu_file);
5186 
5187 	for (i = 0; i < pipe->buffers && len && entries; i++, len -= PAGE_SIZE) {
5188 		struct page *page;
5189 		int r;
5190 
5191 		ref = kzalloc(sizeof(*ref), GFP_KERNEL);
5192 		if (!ref)
5193 			break;
5194 
5195 		ref->ref = 1;
5196 		ref->buffer = iter->trace_buffer->buffer;
5197 		ref->page = ring_buffer_alloc_read_page(ref->buffer, iter->cpu_file);
5198 		if (!ref->page) {
5199 			kfree(ref);
5200 			break;
5201 		}
5202 
5203 		r = ring_buffer_read_page(ref->buffer, &ref->page,
5204 					  len, iter->cpu_file, 1);
5205 		if (r < 0) {
5206 			ring_buffer_free_read_page(ref->buffer, ref->page);
5207 			kfree(ref);
5208 			break;
5209 		}
5210 
5211 		/*
5212 		 * zero out any left over data, this is going to
5213 		 * user land.
5214 		 */
5215 		size = ring_buffer_page_len(ref->page);
5216 		if (size < PAGE_SIZE)
5217 			memset(ref->page + size, 0, PAGE_SIZE - size);
5218 
5219 		page = virt_to_page(ref->page);
5220 
5221 		spd.pages[i] = page;
5222 		spd.partial[i].len = PAGE_SIZE;
5223 		spd.partial[i].offset = 0;
5224 		spd.partial[i].private = (unsigned long)ref;
5225 		spd.nr_pages++;
5226 		*ppos += PAGE_SIZE;
5227 
5228 		entries = ring_buffer_entries_cpu(iter->trace_buffer->buffer, iter->cpu_file);
5229 	}
5230 
5231 	trace_access_unlock(iter->cpu_file);
5232 	spd.nr_pages = i;
5233 
5234 	/* did we read anything? */
5235 	if (!spd.nr_pages) {
5236 		if ((file->f_flags & O_NONBLOCK) || (flags & SPLICE_F_NONBLOCK)) {
5237 			ret = -EAGAIN;
5238 			goto out;
5239 		}
5240 		mutex_unlock(&trace_types_lock);
5241 		iter->trace->wait_pipe(iter);
5242 		mutex_lock(&trace_types_lock);
5243 		if (signal_pending(current)) {
5244 			ret = -EINTR;
5245 			goto out;
5246 		}
5247 		goto again;
5248 	}
5249 
5250 	ret = splice_to_pipe(pipe, &spd);
5251 	splice_shrink_spd(&spd);
5252 out:
5253 	mutex_unlock(&trace_types_lock);
5254 
5255 	return ret;
5256 }
5257 
5258 static const struct file_operations tracing_buffers_fops = {
5259 	.open		= tracing_buffers_open,
5260 	.read		= tracing_buffers_read,
5261 	.poll		= tracing_buffers_poll,
5262 	.release	= tracing_buffers_release,
5263 	.splice_read	= tracing_buffers_splice_read,
5264 	.llseek		= no_llseek,
5265 };
5266 
5267 static ssize_t
5268 tracing_stats_read(struct file *filp, char __user *ubuf,
5269 		   size_t count, loff_t *ppos)
5270 {
5271 	struct trace_cpu *tc = filp->private_data;
5272 	struct trace_array *tr = tc->tr;
5273 	struct trace_buffer *trace_buf = &tr->trace_buffer;
5274 	struct trace_seq *s;
5275 	unsigned long cnt;
5276 	unsigned long long t;
5277 	unsigned long usec_rem;
5278 	int cpu = tc->cpu;
5279 
5280 	s = kmalloc(sizeof(*s), GFP_KERNEL);
5281 	if (!s)
5282 		return -ENOMEM;
5283 
5284 	trace_seq_init(s);
5285 
5286 	cnt = ring_buffer_entries_cpu(trace_buf->buffer, cpu);
5287 	trace_seq_printf(s, "entries: %ld\n", cnt);
5288 
5289 	cnt = ring_buffer_overrun_cpu(trace_buf->buffer, cpu);
5290 	trace_seq_printf(s, "overrun: %ld\n", cnt);
5291 
5292 	cnt = ring_buffer_commit_overrun_cpu(trace_buf->buffer, cpu);
5293 	trace_seq_printf(s, "commit overrun: %ld\n", cnt);
5294 
5295 	cnt = ring_buffer_bytes_cpu(trace_buf->buffer, cpu);
5296 	trace_seq_printf(s, "bytes: %ld\n", cnt);
5297 
5298 	if (trace_clocks[tr->clock_id].in_ns) {
5299 		/* local or global for trace_clock */
5300 		t = ns2usecs(ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
5301 		usec_rem = do_div(t, USEC_PER_SEC);
5302 		trace_seq_printf(s, "oldest event ts: %5llu.%06lu\n",
5303 								t, usec_rem);
5304 
5305 		t = ns2usecs(ring_buffer_time_stamp(trace_buf->buffer, cpu));
5306 		usec_rem = do_div(t, USEC_PER_SEC);
5307 		trace_seq_printf(s, "now ts: %5llu.%06lu\n", t, usec_rem);
5308 	} else {
5309 		/* counter or tsc mode for trace_clock */
5310 		trace_seq_printf(s, "oldest event ts: %llu\n",
5311 				ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
5312 
5313 		trace_seq_printf(s, "now ts: %llu\n",
5314 				ring_buffer_time_stamp(trace_buf->buffer, cpu));
5315 	}
5316 
5317 	cnt = ring_buffer_dropped_events_cpu(trace_buf->buffer, cpu);
5318 	trace_seq_printf(s, "dropped events: %ld\n", cnt);
5319 
5320 	cnt = ring_buffer_read_events_cpu(trace_buf->buffer, cpu);
5321 	trace_seq_printf(s, "read events: %ld\n", cnt);
5322 
5323 	count = simple_read_from_buffer(ubuf, count, ppos, s->buffer, s->len);
5324 
5325 	kfree(s);
5326 
5327 	return count;
5328 }
5329 
5330 static const struct file_operations tracing_stats_fops = {
5331 	.open		= tracing_open_generic,
5332 	.read		= tracing_stats_read,
5333 	.llseek		= generic_file_llseek,
5334 };
5335 
5336 #ifdef CONFIG_DYNAMIC_FTRACE
5337 
5338 int __weak ftrace_arch_read_dyn_info(char *buf, int size)
5339 {
5340 	return 0;
5341 }
5342 
5343 static ssize_t
5344 tracing_read_dyn_info(struct file *filp, char __user *ubuf,
5345 		  size_t cnt, loff_t *ppos)
5346 {
5347 	static char ftrace_dyn_info_buffer[1024];
5348 	static DEFINE_MUTEX(dyn_info_mutex);
5349 	unsigned long *p = filp->private_data;
5350 	char *buf = ftrace_dyn_info_buffer;
5351 	int size = ARRAY_SIZE(ftrace_dyn_info_buffer);
5352 	int r;
5353 
5354 	mutex_lock(&dyn_info_mutex);
5355 	r = sprintf(buf, "%ld ", *p);
5356 
5357 	r += ftrace_arch_read_dyn_info(buf+r, (size-1)-r);
5358 	buf[r++] = '\n';
5359 
5360 	r = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
5361 
5362 	mutex_unlock(&dyn_info_mutex);
5363 
5364 	return r;
5365 }
5366 
5367 static const struct file_operations tracing_dyn_info_fops = {
5368 	.open		= tracing_open_generic,
5369 	.read		= tracing_read_dyn_info,
5370 	.llseek		= generic_file_llseek,
5371 };
5372 #endif /* CONFIG_DYNAMIC_FTRACE */
5373 
5374 #if defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE)
5375 static void
5376 ftrace_snapshot(unsigned long ip, unsigned long parent_ip, void **data)
5377 {
5378 	tracing_snapshot();
5379 }
5380 
5381 static void
5382 ftrace_count_snapshot(unsigned long ip, unsigned long parent_ip, void **data)
5383 {
5384 	unsigned long *count = (long *)data;
5385 
5386 	if (!*count)
5387 		return;
5388 
5389 	if (*count != -1)
5390 		(*count)--;
5391 
5392 	tracing_snapshot();
5393 }
5394 
5395 static int
5396 ftrace_snapshot_print(struct seq_file *m, unsigned long ip,
5397 		      struct ftrace_probe_ops *ops, void *data)
5398 {
5399 	long count = (long)data;
5400 
5401 	seq_printf(m, "%ps:", (void *)ip);
5402 
5403 	seq_printf(m, "snapshot");
5404 
5405 	if (count == -1)
5406 		seq_printf(m, ":unlimited\n");
5407 	else
5408 		seq_printf(m, ":count=%ld\n", count);
5409 
5410 	return 0;
5411 }
5412 
5413 static struct ftrace_probe_ops snapshot_probe_ops = {
5414 	.func			= ftrace_snapshot,
5415 	.print			= ftrace_snapshot_print,
5416 };
5417 
5418 static struct ftrace_probe_ops snapshot_count_probe_ops = {
5419 	.func			= ftrace_count_snapshot,
5420 	.print			= ftrace_snapshot_print,
5421 };
5422 
5423 static int
5424 ftrace_trace_snapshot_callback(struct ftrace_hash *hash,
5425 			       char *glob, char *cmd, char *param, int enable)
5426 {
5427 	struct ftrace_probe_ops *ops;
5428 	void *count = (void *)-1;
5429 	char *number;
5430 	int ret;
5431 
5432 	/* hash funcs only work with set_ftrace_filter */
5433 	if (!enable)
5434 		return -EINVAL;
5435 
5436 	ops = param ? &snapshot_count_probe_ops :  &snapshot_probe_ops;
5437 
5438 	if (glob[0] == '!') {
5439 		unregister_ftrace_function_probe_func(glob+1, ops);
5440 		return 0;
5441 	}
5442 
5443 	if (!param)
5444 		goto out_reg;
5445 
5446 	number = strsep(&param, ":");
5447 
5448 	if (!strlen(number))
5449 		goto out_reg;
5450 
5451 	/*
5452 	 * We use the callback data field (which is a pointer)
5453 	 * as our counter.
5454 	 */
5455 	ret = kstrtoul(number, 0, (unsigned long *)&count);
5456 	if (ret)
5457 		return ret;
5458 
5459  out_reg:
5460 	ret = register_ftrace_function_probe(glob, ops, count);
5461 
5462 	if (ret >= 0)
5463 		alloc_snapshot(&global_trace);
5464 
5465 	return ret < 0 ? ret : 0;
5466 }
5467 
5468 static struct ftrace_func_command ftrace_snapshot_cmd = {
5469 	.name			= "snapshot",
5470 	.func			= ftrace_trace_snapshot_callback,
5471 };
5472 
5473 static int register_snapshot_cmd(void)
5474 {
5475 	return register_ftrace_command(&ftrace_snapshot_cmd);
5476 }
5477 #else
5478 static inline int register_snapshot_cmd(void) { return 0; }
5479 #endif /* defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE) */
5480 
5481 struct dentry *tracing_init_dentry_tr(struct trace_array *tr)
5482 {
5483 	if (tr->dir)
5484 		return tr->dir;
5485 
5486 	if (!debugfs_initialized())
5487 		return NULL;
5488 
5489 	if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
5490 		tr->dir = debugfs_create_dir("tracing", NULL);
5491 
5492 	if (!tr->dir)
5493 		pr_warn_once("Could not create debugfs directory 'tracing'\n");
5494 
5495 	return tr->dir;
5496 }
5497 
5498 struct dentry *tracing_init_dentry(void)
5499 {
5500 	return tracing_init_dentry_tr(&global_trace);
5501 }
5502 
5503 static struct dentry *tracing_dentry_percpu(struct trace_array *tr, int cpu)
5504 {
5505 	struct dentry *d_tracer;
5506 
5507 	if (tr->percpu_dir)
5508 		return tr->percpu_dir;
5509 
5510 	d_tracer = tracing_init_dentry_tr(tr);
5511 	if (!d_tracer)
5512 		return NULL;
5513 
5514 	tr->percpu_dir = debugfs_create_dir("per_cpu", d_tracer);
5515 
5516 	WARN_ONCE(!tr->percpu_dir,
5517 		  "Could not create debugfs directory 'per_cpu/%d'\n", cpu);
5518 
5519 	return tr->percpu_dir;
5520 }
5521 
5522 static void
5523 tracing_init_debugfs_percpu(struct trace_array *tr, long cpu)
5524 {
5525 	struct trace_array_cpu *data = per_cpu_ptr(tr->trace_buffer.data, cpu);
5526 	struct dentry *d_percpu = tracing_dentry_percpu(tr, cpu);
5527 	struct dentry *d_cpu;
5528 	char cpu_dir[30]; /* 30 characters should be more than enough */
5529 
5530 	if (!d_percpu)
5531 		return;
5532 
5533 	snprintf(cpu_dir, 30, "cpu%ld", cpu);
5534 	d_cpu = debugfs_create_dir(cpu_dir, d_percpu);
5535 	if (!d_cpu) {
5536 		pr_warning("Could not create debugfs '%s' entry\n", cpu_dir);
5537 		return;
5538 	}
5539 
5540 	/* per cpu trace_pipe */
5541 	trace_create_file("trace_pipe", 0444, d_cpu,
5542 			(void *)&data->trace_cpu, &tracing_pipe_fops);
5543 
5544 	/* per cpu trace */
5545 	trace_create_file("trace", 0644, d_cpu,
5546 			(void *)&data->trace_cpu, &tracing_fops);
5547 
5548 	trace_create_file("trace_pipe_raw", 0444, d_cpu,
5549 			(void *)&data->trace_cpu, &tracing_buffers_fops);
5550 
5551 	trace_create_file("stats", 0444, d_cpu,
5552 			(void *)&data->trace_cpu, &tracing_stats_fops);
5553 
5554 	trace_create_file("buffer_size_kb", 0444, d_cpu,
5555 			(void *)&data->trace_cpu, &tracing_entries_fops);
5556 
5557 #ifdef CONFIG_TRACER_SNAPSHOT
5558 	trace_create_file("snapshot", 0644, d_cpu,
5559 			  (void *)&data->trace_cpu, &snapshot_fops);
5560 
5561 	trace_create_file("snapshot_raw", 0444, d_cpu,
5562 			(void *)&data->trace_cpu, &snapshot_raw_fops);
5563 #endif
5564 }
5565 
5566 #ifdef CONFIG_FTRACE_SELFTEST
5567 /* Let selftest have access to static functions in this file */
5568 #include "trace_selftest.c"
5569 #endif
5570 
5571 struct trace_option_dentry {
5572 	struct tracer_opt		*opt;
5573 	struct tracer_flags		*flags;
5574 	struct trace_array		*tr;
5575 	struct dentry			*entry;
5576 };
5577 
5578 static ssize_t
5579 trace_options_read(struct file *filp, char __user *ubuf, size_t cnt,
5580 			loff_t *ppos)
5581 {
5582 	struct trace_option_dentry *topt = filp->private_data;
5583 	char *buf;
5584 
5585 	if (topt->flags->val & topt->opt->bit)
5586 		buf = "1\n";
5587 	else
5588 		buf = "0\n";
5589 
5590 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
5591 }
5592 
5593 static ssize_t
5594 trace_options_write(struct file *filp, const char __user *ubuf, size_t cnt,
5595 			 loff_t *ppos)
5596 {
5597 	struct trace_option_dentry *topt = filp->private_data;
5598 	unsigned long val;
5599 	int ret;
5600 
5601 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
5602 	if (ret)
5603 		return ret;
5604 
5605 	if (val != 0 && val != 1)
5606 		return -EINVAL;
5607 
5608 	if (!!(topt->flags->val & topt->opt->bit) != val) {
5609 		mutex_lock(&trace_types_lock);
5610 		ret = __set_tracer_option(topt->tr->current_trace, topt->flags,
5611 					  topt->opt, !val);
5612 		mutex_unlock(&trace_types_lock);
5613 		if (ret)
5614 			return ret;
5615 	}
5616 
5617 	*ppos += cnt;
5618 
5619 	return cnt;
5620 }
5621 
5622 
5623 static const struct file_operations trace_options_fops = {
5624 	.open = tracing_open_generic,
5625 	.read = trace_options_read,
5626 	.write = trace_options_write,
5627 	.llseek	= generic_file_llseek,
5628 };
5629 
5630 static ssize_t
5631 trace_options_core_read(struct file *filp, char __user *ubuf, size_t cnt,
5632 			loff_t *ppos)
5633 {
5634 	long index = (long)filp->private_data;
5635 	char *buf;
5636 
5637 	if (trace_flags & (1 << index))
5638 		buf = "1\n";
5639 	else
5640 		buf = "0\n";
5641 
5642 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
5643 }
5644 
5645 static ssize_t
5646 trace_options_core_write(struct file *filp, const char __user *ubuf, size_t cnt,
5647 			 loff_t *ppos)
5648 {
5649 	struct trace_array *tr = &global_trace;
5650 	long index = (long)filp->private_data;
5651 	unsigned long val;
5652 	int ret;
5653 
5654 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
5655 	if (ret)
5656 		return ret;
5657 
5658 	if (val != 0 && val != 1)
5659 		return -EINVAL;
5660 
5661 	mutex_lock(&trace_types_lock);
5662 	ret = set_tracer_flag(tr, 1 << index, val);
5663 	mutex_unlock(&trace_types_lock);
5664 
5665 	if (ret < 0)
5666 		return ret;
5667 
5668 	*ppos += cnt;
5669 
5670 	return cnt;
5671 }
5672 
5673 static const struct file_operations trace_options_core_fops = {
5674 	.open = tracing_open_generic,
5675 	.read = trace_options_core_read,
5676 	.write = trace_options_core_write,
5677 	.llseek = generic_file_llseek,
5678 };
5679 
5680 struct dentry *trace_create_file(const char *name,
5681 				 umode_t mode,
5682 				 struct dentry *parent,
5683 				 void *data,
5684 				 const struct file_operations *fops)
5685 {
5686 	struct dentry *ret;
5687 
5688 	ret = debugfs_create_file(name, mode, parent, data, fops);
5689 	if (!ret)
5690 		pr_warning("Could not create debugfs '%s' entry\n", name);
5691 
5692 	return ret;
5693 }
5694 
5695 
5696 static struct dentry *trace_options_init_dentry(struct trace_array *tr)
5697 {
5698 	struct dentry *d_tracer;
5699 
5700 	if (tr->options)
5701 		return tr->options;
5702 
5703 	d_tracer = tracing_init_dentry_tr(tr);
5704 	if (!d_tracer)
5705 		return NULL;
5706 
5707 	tr->options = debugfs_create_dir("options", d_tracer);
5708 	if (!tr->options) {
5709 		pr_warning("Could not create debugfs directory 'options'\n");
5710 		return NULL;
5711 	}
5712 
5713 	return tr->options;
5714 }
5715 
5716 static void
5717 create_trace_option_file(struct trace_array *tr,
5718 			 struct trace_option_dentry *topt,
5719 			 struct tracer_flags *flags,
5720 			 struct tracer_opt *opt)
5721 {
5722 	struct dentry *t_options;
5723 
5724 	t_options = trace_options_init_dentry(tr);
5725 	if (!t_options)
5726 		return;
5727 
5728 	topt->flags = flags;
5729 	topt->opt = opt;
5730 	topt->tr = tr;
5731 
5732 	topt->entry = trace_create_file(opt->name, 0644, t_options, topt,
5733 				    &trace_options_fops);
5734 
5735 }
5736 
5737 static struct trace_option_dentry *
5738 create_trace_option_files(struct trace_array *tr, struct tracer *tracer)
5739 {
5740 	struct trace_option_dentry *topts;
5741 	struct tracer_flags *flags;
5742 	struct tracer_opt *opts;
5743 	int cnt;
5744 
5745 	if (!tracer)
5746 		return NULL;
5747 
5748 	flags = tracer->flags;
5749 
5750 	if (!flags || !flags->opts)
5751 		return NULL;
5752 
5753 	opts = flags->opts;
5754 
5755 	for (cnt = 0; opts[cnt].name; cnt++)
5756 		;
5757 
5758 	topts = kcalloc(cnt + 1, sizeof(*topts), GFP_KERNEL);
5759 	if (!topts)
5760 		return NULL;
5761 
5762 	for (cnt = 0; opts[cnt].name; cnt++)
5763 		create_trace_option_file(tr, &topts[cnt], flags,
5764 					 &opts[cnt]);
5765 
5766 	return topts;
5767 }
5768 
5769 static void
5770 destroy_trace_option_files(struct trace_option_dentry *topts)
5771 {
5772 	int cnt;
5773 
5774 	if (!topts)
5775 		return;
5776 
5777 	for (cnt = 0; topts[cnt].opt; cnt++) {
5778 		if (topts[cnt].entry)
5779 			debugfs_remove(topts[cnt].entry);
5780 	}
5781 
5782 	kfree(topts);
5783 }
5784 
5785 static struct dentry *
5786 create_trace_option_core_file(struct trace_array *tr,
5787 			      const char *option, long index)
5788 {
5789 	struct dentry *t_options;
5790 
5791 	t_options = trace_options_init_dentry(tr);
5792 	if (!t_options)
5793 		return NULL;
5794 
5795 	return trace_create_file(option, 0644, t_options, (void *)index,
5796 				    &trace_options_core_fops);
5797 }
5798 
5799 static __init void create_trace_options_dir(struct trace_array *tr)
5800 {
5801 	struct dentry *t_options;
5802 	int i;
5803 
5804 	t_options = trace_options_init_dentry(tr);
5805 	if (!t_options)
5806 		return;
5807 
5808 	for (i = 0; trace_options[i]; i++)
5809 		create_trace_option_core_file(tr, trace_options[i], i);
5810 }
5811 
5812 static ssize_t
5813 rb_simple_read(struct file *filp, char __user *ubuf,
5814 	       size_t cnt, loff_t *ppos)
5815 {
5816 	struct trace_array *tr = filp->private_data;
5817 	char buf[64];
5818 	int r;
5819 
5820 	r = tracer_tracing_is_on(tr);
5821 	r = sprintf(buf, "%d\n", r);
5822 
5823 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
5824 }
5825 
5826 static ssize_t
5827 rb_simple_write(struct file *filp, const char __user *ubuf,
5828 		size_t cnt, loff_t *ppos)
5829 {
5830 	struct trace_array *tr = filp->private_data;
5831 	struct ring_buffer *buffer = tr->trace_buffer.buffer;
5832 	unsigned long val;
5833 	int ret;
5834 
5835 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
5836 	if (ret)
5837 		return ret;
5838 
5839 	if (buffer) {
5840 		mutex_lock(&trace_types_lock);
5841 		if (val) {
5842 			tracer_tracing_on(tr);
5843 			if (tr->current_trace->start)
5844 				tr->current_trace->start(tr);
5845 		} else {
5846 			tracer_tracing_off(tr);
5847 			if (tr->current_trace->stop)
5848 				tr->current_trace->stop(tr);
5849 		}
5850 		mutex_unlock(&trace_types_lock);
5851 	}
5852 
5853 	(*ppos)++;
5854 
5855 	return cnt;
5856 }
5857 
5858 static const struct file_operations rb_simple_fops = {
5859 	.open		= tracing_open_generic_tr,
5860 	.read		= rb_simple_read,
5861 	.write		= rb_simple_write,
5862 	.release	= tracing_release_generic_tr,
5863 	.llseek		= default_llseek,
5864 };
5865 
5866 struct dentry *trace_instance_dir;
5867 
5868 static void
5869 init_tracer_debugfs(struct trace_array *tr, struct dentry *d_tracer);
5870 
5871 static void init_trace_buffers(struct trace_array *tr, struct trace_buffer *buf)
5872 {
5873 	int cpu;
5874 
5875 	for_each_tracing_cpu(cpu) {
5876 		memset(per_cpu_ptr(buf->data, cpu), 0, sizeof(struct trace_array_cpu));
5877 		per_cpu_ptr(buf->data, cpu)->trace_cpu.cpu = cpu;
5878 		per_cpu_ptr(buf->data, cpu)->trace_cpu.tr = tr;
5879 	}
5880 }
5881 
5882 static int
5883 allocate_trace_buffer(struct trace_array *tr, struct trace_buffer *buf, int size)
5884 {
5885 	enum ring_buffer_flags rb_flags;
5886 
5887 	rb_flags = trace_flags & TRACE_ITER_OVERWRITE ? RB_FL_OVERWRITE : 0;
5888 
5889 	buf->buffer = ring_buffer_alloc(size, rb_flags);
5890 	if (!buf->buffer)
5891 		return -ENOMEM;
5892 
5893 	buf->data = alloc_percpu(struct trace_array_cpu);
5894 	if (!buf->data) {
5895 		ring_buffer_free(buf->buffer);
5896 		return -ENOMEM;
5897 	}
5898 
5899 	init_trace_buffers(tr, buf);
5900 
5901 	/* Allocate the first page for all buffers */
5902 	set_buffer_entries(&tr->trace_buffer,
5903 			   ring_buffer_size(tr->trace_buffer.buffer, 0));
5904 
5905 	return 0;
5906 }
5907 
5908 static int allocate_trace_buffers(struct trace_array *tr, int size)
5909 {
5910 	int ret;
5911 
5912 	ret = allocate_trace_buffer(tr, &tr->trace_buffer, size);
5913 	if (ret)
5914 		return ret;
5915 
5916 #ifdef CONFIG_TRACER_MAX_TRACE
5917 	ret = allocate_trace_buffer(tr, &tr->max_buffer,
5918 				    allocate_snapshot ? size : 1);
5919 	if (WARN_ON(ret)) {
5920 		ring_buffer_free(tr->trace_buffer.buffer);
5921 		free_percpu(tr->trace_buffer.data);
5922 		return -ENOMEM;
5923 	}
5924 	tr->allocated_snapshot = allocate_snapshot;
5925 
5926 	/*
5927 	 * Only the top level trace array gets its snapshot allocated
5928 	 * from the kernel command line.
5929 	 */
5930 	allocate_snapshot = false;
5931 #endif
5932 	return 0;
5933 }
5934 
5935 static int new_instance_create(const char *name)
5936 {
5937 	struct trace_array *tr;
5938 	int ret;
5939 
5940 	mutex_lock(&trace_types_lock);
5941 
5942 	ret = -EEXIST;
5943 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
5944 		if (tr->name && strcmp(tr->name, name) == 0)
5945 			goto out_unlock;
5946 	}
5947 
5948 	ret = -ENOMEM;
5949 	tr = kzalloc(sizeof(*tr), GFP_KERNEL);
5950 	if (!tr)
5951 		goto out_unlock;
5952 
5953 	tr->name = kstrdup(name, GFP_KERNEL);
5954 	if (!tr->name)
5955 		goto out_free_tr;
5956 
5957 	raw_spin_lock_init(&tr->start_lock);
5958 
5959 	tr->current_trace = &nop_trace;
5960 
5961 	INIT_LIST_HEAD(&tr->systems);
5962 	INIT_LIST_HEAD(&tr->events);
5963 
5964 	if (allocate_trace_buffers(tr, trace_buf_size) < 0)
5965 		goto out_free_tr;
5966 
5967 	/* Holder for file callbacks */
5968 	tr->trace_cpu.cpu = RING_BUFFER_ALL_CPUS;
5969 	tr->trace_cpu.tr = tr;
5970 
5971 	tr->dir = debugfs_create_dir(name, trace_instance_dir);
5972 	if (!tr->dir)
5973 		goto out_free_tr;
5974 
5975 	ret = event_trace_add_tracer(tr->dir, tr);
5976 	if (ret)
5977 		goto out_free_tr;
5978 
5979 	init_tracer_debugfs(tr, tr->dir);
5980 
5981 	list_add(&tr->list, &ftrace_trace_arrays);
5982 
5983 	mutex_unlock(&trace_types_lock);
5984 
5985 	return 0;
5986 
5987  out_free_tr:
5988 	if (tr->trace_buffer.buffer)
5989 		ring_buffer_free(tr->trace_buffer.buffer);
5990 	kfree(tr->name);
5991 	kfree(tr);
5992 
5993  out_unlock:
5994 	mutex_unlock(&trace_types_lock);
5995 
5996 	return ret;
5997 
5998 }
5999 
6000 static int instance_delete(const char *name)
6001 {
6002 	struct trace_array *tr;
6003 	int found = 0;
6004 	int ret;
6005 
6006 	mutex_lock(&trace_types_lock);
6007 
6008 	ret = -ENODEV;
6009 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
6010 		if (tr->name && strcmp(tr->name, name) == 0) {
6011 			found = 1;
6012 			break;
6013 		}
6014 	}
6015 	if (!found)
6016 		goto out_unlock;
6017 
6018 	ret = -EBUSY;
6019 	if (tr->ref)
6020 		goto out_unlock;
6021 
6022 	list_del(&tr->list);
6023 
6024 	event_trace_del_tracer(tr);
6025 	debugfs_remove_recursive(tr->dir);
6026 	free_percpu(tr->trace_buffer.data);
6027 	ring_buffer_free(tr->trace_buffer.buffer);
6028 
6029 	kfree(tr->name);
6030 	kfree(tr);
6031 
6032 	ret = 0;
6033 
6034  out_unlock:
6035 	mutex_unlock(&trace_types_lock);
6036 
6037 	return ret;
6038 }
6039 
6040 static int instance_mkdir (struct inode *inode, struct dentry *dentry, umode_t mode)
6041 {
6042 	struct dentry *parent;
6043 	int ret;
6044 
6045 	/* Paranoid: Make sure the parent is the "instances" directory */
6046 	parent = hlist_entry(inode->i_dentry.first, struct dentry, d_alias);
6047 	if (WARN_ON_ONCE(parent != trace_instance_dir))
6048 		return -ENOENT;
6049 
6050 	/*
6051 	 * The inode mutex is locked, but debugfs_create_dir() will also
6052 	 * take the mutex. As the instances directory can not be destroyed
6053 	 * or changed in any other way, it is safe to unlock it, and
6054 	 * let the dentry try. If two users try to make the same dir at
6055 	 * the same time, then the new_instance_create() will determine the
6056 	 * winner.
6057 	 */
6058 	mutex_unlock(&inode->i_mutex);
6059 
6060 	ret = new_instance_create(dentry->d_iname);
6061 
6062 	mutex_lock(&inode->i_mutex);
6063 
6064 	return ret;
6065 }
6066 
6067 static int instance_rmdir(struct inode *inode, struct dentry *dentry)
6068 {
6069 	struct dentry *parent;
6070 	int ret;
6071 
6072 	/* Paranoid: Make sure the parent is the "instances" directory */
6073 	parent = hlist_entry(inode->i_dentry.first, struct dentry, d_alias);
6074 	if (WARN_ON_ONCE(parent != trace_instance_dir))
6075 		return -ENOENT;
6076 
6077 	/* The caller did a dget() on dentry */
6078 	mutex_unlock(&dentry->d_inode->i_mutex);
6079 
6080 	/*
6081 	 * The inode mutex is locked, but debugfs_create_dir() will also
6082 	 * take the mutex. As the instances directory can not be destroyed
6083 	 * or changed in any other way, it is safe to unlock it, and
6084 	 * let the dentry try. If two users try to make the same dir at
6085 	 * the same time, then the instance_delete() will determine the
6086 	 * winner.
6087 	 */
6088 	mutex_unlock(&inode->i_mutex);
6089 
6090 	ret = instance_delete(dentry->d_iname);
6091 
6092 	mutex_lock_nested(&inode->i_mutex, I_MUTEX_PARENT);
6093 	mutex_lock(&dentry->d_inode->i_mutex);
6094 
6095 	return ret;
6096 }
6097 
6098 static const struct inode_operations instance_dir_inode_operations = {
6099 	.lookup		= simple_lookup,
6100 	.mkdir		= instance_mkdir,
6101 	.rmdir		= instance_rmdir,
6102 };
6103 
6104 static __init void create_trace_instances(struct dentry *d_tracer)
6105 {
6106 	trace_instance_dir = debugfs_create_dir("instances", d_tracer);
6107 	if (WARN_ON(!trace_instance_dir))
6108 		return;
6109 
6110 	/* Hijack the dir inode operations, to allow mkdir */
6111 	trace_instance_dir->d_inode->i_op = &instance_dir_inode_operations;
6112 }
6113 
6114 static void
6115 init_tracer_debugfs(struct trace_array *tr, struct dentry *d_tracer)
6116 {
6117 	int cpu;
6118 
6119 	trace_create_file("trace_options", 0644, d_tracer,
6120 			  tr, &tracing_iter_fops);
6121 
6122 	trace_create_file("trace", 0644, d_tracer,
6123 			(void *)&tr->trace_cpu, &tracing_fops);
6124 
6125 	trace_create_file("trace_pipe", 0444, d_tracer,
6126 			(void *)&tr->trace_cpu, &tracing_pipe_fops);
6127 
6128 	trace_create_file("buffer_size_kb", 0644, d_tracer,
6129 			(void *)&tr->trace_cpu, &tracing_entries_fops);
6130 
6131 	trace_create_file("buffer_total_size_kb", 0444, d_tracer,
6132 			  tr, &tracing_total_entries_fops);
6133 
6134 	trace_create_file("free_buffer", 0200, d_tracer,
6135 			  tr, &tracing_free_buffer_fops);
6136 
6137 	trace_create_file("trace_marker", 0220, d_tracer,
6138 			  tr, &tracing_mark_fops);
6139 
6140 	trace_create_file("trace_clock", 0644, d_tracer, tr,
6141 			  &trace_clock_fops);
6142 
6143 	trace_create_file("tracing_on", 0644, d_tracer,
6144 			    tr, &rb_simple_fops);
6145 
6146 #ifdef CONFIG_TRACER_SNAPSHOT
6147 	trace_create_file("snapshot", 0644, d_tracer,
6148 			  (void *)&tr->trace_cpu, &snapshot_fops);
6149 #endif
6150 
6151 	for_each_tracing_cpu(cpu)
6152 		tracing_init_debugfs_percpu(tr, cpu);
6153 
6154 }
6155 
6156 static __init int tracer_init_debugfs(void)
6157 {
6158 	struct dentry *d_tracer;
6159 
6160 	trace_access_lock_init();
6161 
6162 	d_tracer = tracing_init_dentry();
6163 	if (!d_tracer)
6164 		return 0;
6165 
6166 	init_tracer_debugfs(&global_trace, d_tracer);
6167 
6168 	trace_create_file("tracing_cpumask", 0644, d_tracer,
6169 			&global_trace, &tracing_cpumask_fops);
6170 
6171 	trace_create_file("available_tracers", 0444, d_tracer,
6172 			&global_trace, &show_traces_fops);
6173 
6174 	trace_create_file("current_tracer", 0644, d_tracer,
6175 			&global_trace, &set_tracer_fops);
6176 
6177 #ifdef CONFIG_TRACER_MAX_TRACE
6178 	trace_create_file("tracing_max_latency", 0644, d_tracer,
6179 			&tracing_max_latency, &tracing_max_lat_fops);
6180 #endif
6181 
6182 	trace_create_file("tracing_thresh", 0644, d_tracer,
6183 			&tracing_thresh, &tracing_max_lat_fops);
6184 
6185 	trace_create_file("README", 0444, d_tracer,
6186 			NULL, &tracing_readme_fops);
6187 
6188 	trace_create_file("saved_cmdlines", 0444, d_tracer,
6189 			NULL, &tracing_saved_cmdlines_fops);
6190 
6191 #ifdef CONFIG_DYNAMIC_FTRACE
6192 	trace_create_file("dyn_ftrace_total_info", 0444, d_tracer,
6193 			&ftrace_update_tot_cnt, &tracing_dyn_info_fops);
6194 #endif
6195 
6196 	create_trace_instances(d_tracer);
6197 
6198 	create_trace_options_dir(&global_trace);
6199 
6200 	return 0;
6201 }
6202 
6203 static int trace_panic_handler(struct notifier_block *this,
6204 			       unsigned long event, void *unused)
6205 {
6206 	if (ftrace_dump_on_oops)
6207 		ftrace_dump(ftrace_dump_on_oops);
6208 	return NOTIFY_OK;
6209 }
6210 
6211 static struct notifier_block trace_panic_notifier = {
6212 	.notifier_call  = trace_panic_handler,
6213 	.next           = NULL,
6214 	.priority       = 150   /* priority: INT_MAX >= x >= 0 */
6215 };
6216 
6217 static int trace_die_handler(struct notifier_block *self,
6218 			     unsigned long val,
6219 			     void *data)
6220 {
6221 	switch (val) {
6222 	case DIE_OOPS:
6223 		if (ftrace_dump_on_oops)
6224 			ftrace_dump(ftrace_dump_on_oops);
6225 		break;
6226 	default:
6227 		break;
6228 	}
6229 	return NOTIFY_OK;
6230 }
6231 
6232 static struct notifier_block trace_die_notifier = {
6233 	.notifier_call = trace_die_handler,
6234 	.priority = 200
6235 };
6236 
6237 /*
6238  * printk is set to max of 1024, we really don't need it that big.
6239  * Nothing should be printing 1000 characters anyway.
6240  */
6241 #define TRACE_MAX_PRINT		1000
6242 
6243 /*
6244  * Define here KERN_TRACE so that we have one place to modify
6245  * it if we decide to change what log level the ftrace dump
6246  * should be at.
6247  */
6248 #define KERN_TRACE		KERN_EMERG
6249 
6250 void
6251 trace_printk_seq(struct trace_seq *s)
6252 {
6253 	/* Probably should print a warning here. */
6254 	if (s->len >= TRACE_MAX_PRINT)
6255 		s->len = TRACE_MAX_PRINT;
6256 
6257 	/* should be zero ended, but we are paranoid. */
6258 	s->buffer[s->len] = 0;
6259 
6260 	printk(KERN_TRACE "%s", s->buffer);
6261 
6262 	trace_seq_init(s);
6263 }
6264 
6265 void trace_init_global_iter(struct trace_iterator *iter)
6266 {
6267 	iter->tr = &global_trace;
6268 	iter->trace = iter->tr->current_trace;
6269 	iter->cpu_file = RING_BUFFER_ALL_CPUS;
6270 	iter->trace_buffer = &global_trace.trace_buffer;
6271 }
6272 
6273 void ftrace_dump(enum ftrace_dump_mode oops_dump_mode)
6274 {
6275 	/* use static because iter can be a bit big for the stack */
6276 	static struct trace_iterator iter;
6277 	static atomic_t dump_running;
6278 	unsigned int old_userobj;
6279 	unsigned long flags;
6280 	int cnt = 0, cpu;
6281 
6282 	/* Only allow one dump user at a time. */
6283 	if (atomic_inc_return(&dump_running) != 1) {
6284 		atomic_dec(&dump_running);
6285 		return;
6286 	}
6287 
6288 	/*
6289 	 * Always turn off tracing when we dump.
6290 	 * We don't need to show trace output of what happens
6291 	 * between multiple crashes.
6292 	 *
6293 	 * If the user does a sysrq-z, then they can re-enable
6294 	 * tracing with echo 1 > tracing_on.
6295 	 */
6296 	tracing_off();
6297 
6298 	local_irq_save(flags);
6299 
6300 	/* Simulate the iterator */
6301 	trace_init_global_iter(&iter);
6302 
6303 	for_each_tracing_cpu(cpu) {
6304 		atomic_inc(&per_cpu_ptr(iter.tr->trace_buffer.data, cpu)->disabled);
6305 	}
6306 
6307 	old_userobj = trace_flags & TRACE_ITER_SYM_USEROBJ;
6308 
6309 	/* don't look at user memory in panic mode */
6310 	trace_flags &= ~TRACE_ITER_SYM_USEROBJ;
6311 
6312 	switch (oops_dump_mode) {
6313 	case DUMP_ALL:
6314 		iter.cpu_file = RING_BUFFER_ALL_CPUS;
6315 		break;
6316 	case DUMP_ORIG:
6317 		iter.cpu_file = raw_smp_processor_id();
6318 		break;
6319 	case DUMP_NONE:
6320 		goto out_enable;
6321 	default:
6322 		printk(KERN_TRACE "Bad dumping mode, switching to all CPUs dump\n");
6323 		iter.cpu_file = RING_BUFFER_ALL_CPUS;
6324 	}
6325 
6326 	printk(KERN_TRACE "Dumping ftrace buffer:\n");
6327 
6328 	/* Did function tracer already get disabled? */
6329 	if (ftrace_is_dead()) {
6330 		printk("# WARNING: FUNCTION TRACING IS CORRUPTED\n");
6331 		printk("#          MAY BE MISSING FUNCTION EVENTS\n");
6332 	}
6333 
6334 	/*
6335 	 * We need to stop all tracing on all CPUS to read the
6336 	 * the next buffer. This is a bit expensive, but is
6337 	 * not done often. We fill all what we can read,
6338 	 * and then release the locks again.
6339 	 */
6340 
6341 	while (!trace_empty(&iter)) {
6342 
6343 		if (!cnt)
6344 			printk(KERN_TRACE "---------------------------------\n");
6345 
6346 		cnt++;
6347 
6348 		/* reset all but tr, trace, and overruns */
6349 		memset(&iter.seq, 0,
6350 		       sizeof(struct trace_iterator) -
6351 		       offsetof(struct trace_iterator, seq));
6352 		iter.iter_flags |= TRACE_FILE_LAT_FMT;
6353 		iter.pos = -1;
6354 
6355 		if (trace_find_next_entry_inc(&iter) != NULL) {
6356 			int ret;
6357 
6358 			ret = print_trace_line(&iter);
6359 			if (ret != TRACE_TYPE_NO_CONSUME)
6360 				trace_consume(&iter);
6361 		}
6362 		touch_nmi_watchdog();
6363 
6364 		trace_printk_seq(&iter.seq);
6365 	}
6366 
6367 	if (!cnt)
6368 		printk(KERN_TRACE "   (ftrace buffer empty)\n");
6369 	else
6370 		printk(KERN_TRACE "---------------------------------\n");
6371 
6372  out_enable:
6373 	trace_flags |= old_userobj;
6374 
6375 	for_each_tracing_cpu(cpu) {
6376 		atomic_dec(&per_cpu_ptr(iter.trace_buffer->data, cpu)->disabled);
6377 	}
6378  	atomic_dec(&dump_running);
6379 	local_irq_restore(flags);
6380 }
6381 EXPORT_SYMBOL_GPL(ftrace_dump);
6382 
6383 __init static int tracer_alloc_buffers(void)
6384 {
6385 	int ring_buf_size;
6386 	int ret = -ENOMEM;
6387 
6388 
6389 	if (!alloc_cpumask_var(&tracing_buffer_mask, GFP_KERNEL))
6390 		goto out;
6391 
6392 	if (!alloc_cpumask_var(&tracing_cpumask, GFP_KERNEL))
6393 		goto out_free_buffer_mask;
6394 
6395 	/* Only allocate trace_printk buffers if a trace_printk exists */
6396 	if (__stop___trace_bprintk_fmt != __start___trace_bprintk_fmt)
6397 		/* Must be called before global_trace.buffer is allocated */
6398 		trace_printk_init_buffers();
6399 
6400 	/* To save memory, keep the ring buffer size to its minimum */
6401 	if (ring_buffer_expanded)
6402 		ring_buf_size = trace_buf_size;
6403 	else
6404 		ring_buf_size = 1;
6405 
6406 	cpumask_copy(tracing_buffer_mask, cpu_possible_mask);
6407 	cpumask_copy(tracing_cpumask, cpu_all_mask);
6408 
6409 	raw_spin_lock_init(&global_trace.start_lock);
6410 
6411 	/* TODO: make the number of buffers hot pluggable with CPUS */
6412 	if (allocate_trace_buffers(&global_trace, ring_buf_size) < 0) {
6413 		printk(KERN_ERR "tracer: failed to allocate ring buffer!\n");
6414 		WARN_ON(1);
6415 		goto out_free_cpumask;
6416 	}
6417 
6418 	if (global_trace.buffer_disabled)
6419 		tracing_off();
6420 
6421 	trace_init_cmdlines();
6422 
6423 	/*
6424 	 * register_tracer() might reference current_trace, so it
6425 	 * needs to be set before we register anything. This is
6426 	 * just a bootstrap of current_trace anyway.
6427 	 */
6428 	global_trace.current_trace = &nop_trace;
6429 
6430 	register_tracer(&nop_trace);
6431 
6432 	/* All seems OK, enable tracing */
6433 	tracing_disabled = 0;
6434 
6435 	atomic_notifier_chain_register(&panic_notifier_list,
6436 				       &trace_panic_notifier);
6437 
6438 	register_die_notifier(&trace_die_notifier);
6439 
6440 	global_trace.flags = TRACE_ARRAY_FL_GLOBAL;
6441 
6442 	/* Holder for file callbacks */
6443 	global_trace.trace_cpu.cpu = RING_BUFFER_ALL_CPUS;
6444 	global_trace.trace_cpu.tr = &global_trace;
6445 
6446 	INIT_LIST_HEAD(&global_trace.systems);
6447 	INIT_LIST_HEAD(&global_trace.events);
6448 	list_add(&global_trace.list, &ftrace_trace_arrays);
6449 
6450 	while (trace_boot_options) {
6451 		char *option;
6452 
6453 		option = strsep(&trace_boot_options, ",");
6454 		trace_set_options(&global_trace, option);
6455 	}
6456 
6457 	register_snapshot_cmd();
6458 
6459 	return 0;
6460 
6461 out_free_cpumask:
6462 	free_percpu(global_trace.trace_buffer.data);
6463 #ifdef CONFIG_TRACER_MAX_TRACE
6464 	free_percpu(global_trace.max_buffer.data);
6465 #endif
6466 	free_cpumask_var(tracing_cpumask);
6467 out_free_buffer_mask:
6468 	free_cpumask_var(tracing_buffer_mask);
6469 out:
6470 	return ret;
6471 }
6472 
6473 __init static int clear_boot_tracer(void)
6474 {
6475 	/*
6476 	 * The default tracer at boot buffer is an init section.
6477 	 * This function is called in lateinit. If we did not
6478 	 * find the boot tracer, then clear it out, to prevent
6479 	 * later registration from accessing the buffer that is
6480 	 * about to be freed.
6481 	 */
6482 	if (!default_bootup_tracer)
6483 		return 0;
6484 
6485 	printk(KERN_INFO "ftrace bootup tracer '%s' not registered.\n",
6486 	       default_bootup_tracer);
6487 	default_bootup_tracer = NULL;
6488 
6489 	return 0;
6490 }
6491 
6492 early_initcall(tracer_alloc_buffers);
6493 fs_initcall(tracer_init_debugfs);
6494 late_initcall(clear_boot_tracer);
6495