xref: /linux/kernel/trace/trace.c (revision e9f0878c4b2004ac19581274c1ae4c61ae3ca70e)
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * ring buffer based function tracer
4  *
5  * Copyright (C) 2007-2012 Steven Rostedt <srostedt@redhat.com>
6  * Copyright (C) 2008 Ingo Molnar <mingo@redhat.com>
7  *
8  * Originally taken from the RT patch by:
9  *    Arnaldo Carvalho de Melo <acme@redhat.com>
10  *
11  * Based on code from the latency_tracer, that is:
12  *  Copyright (C) 2004-2006 Ingo Molnar
13  *  Copyright (C) 2004 Nadia Yvette Chambers
14  */
15 #include <linux/ring_buffer.h>
16 #include <generated/utsrelease.h>
17 #include <linux/stacktrace.h>
18 #include <linux/writeback.h>
19 #include <linux/kallsyms.h>
20 #include <linux/seq_file.h>
21 #include <linux/notifier.h>
22 #include <linux/irqflags.h>
23 #include <linux/debugfs.h>
24 #include <linux/tracefs.h>
25 #include <linux/pagemap.h>
26 #include <linux/hardirq.h>
27 #include <linux/linkage.h>
28 #include <linux/uaccess.h>
29 #include <linux/vmalloc.h>
30 #include <linux/ftrace.h>
31 #include <linux/module.h>
32 #include <linux/percpu.h>
33 #include <linux/splice.h>
34 #include <linux/kdebug.h>
35 #include <linux/string.h>
36 #include <linux/mount.h>
37 #include <linux/rwsem.h>
38 #include <linux/slab.h>
39 #include <linux/ctype.h>
40 #include <linux/init.h>
41 #include <linux/poll.h>
42 #include <linux/nmi.h>
43 #include <linux/fs.h>
44 #include <linux/trace.h>
45 #include <linux/sched/clock.h>
46 #include <linux/sched/rt.h>
47 
48 #include "trace.h"
49 #include "trace_output.h"
50 
51 /*
52  * On boot up, the ring buffer is set to the minimum size, so that
53  * we do not waste memory on systems that are not using tracing.
54  */
55 bool ring_buffer_expanded;
56 
57 /*
58  * We need to change this state when a selftest is running.
59  * A selftest will lurk into the ring-buffer to count the
60  * entries inserted during the selftest although some concurrent
61  * insertions into the ring-buffer such as trace_printk could occurred
62  * at the same time, giving false positive or negative results.
63  */
64 static bool __read_mostly tracing_selftest_running;
65 
66 /*
67  * If a tracer is running, we do not want to run SELFTEST.
68  */
69 bool __read_mostly tracing_selftest_disabled;
70 
71 /* Pipe tracepoints to printk */
72 struct trace_iterator *tracepoint_print_iter;
73 int tracepoint_printk;
74 static DEFINE_STATIC_KEY_FALSE(tracepoint_printk_key);
75 
76 /* For tracers that don't implement custom flags */
77 static struct tracer_opt dummy_tracer_opt[] = {
78 	{ }
79 };
80 
81 static int
82 dummy_set_flag(struct trace_array *tr, u32 old_flags, u32 bit, int set)
83 {
84 	return 0;
85 }
86 
87 /*
88  * To prevent the comm cache from being overwritten when no
89  * tracing is active, only save the comm when a trace event
90  * occurred.
91  */
92 static DEFINE_PER_CPU(bool, trace_taskinfo_save);
93 
94 /*
95  * Kill all tracing for good (never come back).
96  * It is initialized to 1 but will turn to zero if the initialization
97  * of the tracer is successful. But that is the only place that sets
98  * this back to zero.
99  */
100 static int tracing_disabled = 1;
101 
102 cpumask_var_t __read_mostly	tracing_buffer_mask;
103 
104 /*
105  * ftrace_dump_on_oops - variable to dump ftrace buffer on oops
106  *
107  * If there is an oops (or kernel panic) and the ftrace_dump_on_oops
108  * is set, then ftrace_dump is called. This will output the contents
109  * of the ftrace buffers to the console.  This is very useful for
110  * capturing traces that lead to crashes and outputing it to a
111  * serial console.
112  *
113  * It is default off, but you can enable it with either specifying
114  * "ftrace_dump_on_oops" in the kernel command line, or setting
115  * /proc/sys/kernel/ftrace_dump_on_oops
116  * Set 1 if you want to dump buffers of all CPUs
117  * Set 2 if you want to dump the buffer of the CPU that triggered oops
118  */
119 
120 enum ftrace_dump_mode ftrace_dump_on_oops;
121 
122 /* When set, tracing will stop when a WARN*() is hit */
123 int __disable_trace_on_warning;
124 
125 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
126 /* Map of enums to their values, for "eval_map" file */
127 struct trace_eval_map_head {
128 	struct module			*mod;
129 	unsigned long			length;
130 };
131 
132 union trace_eval_map_item;
133 
134 struct trace_eval_map_tail {
135 	/*
136 	 * "end" is first and points to NULL as it must be different
137 	 * than "mod" or "eval_string"
138 	 */
139 	union trace_eval_map_item	*next;
140 	const char			*end;	/* points to NULL */
141 };
142 
143 static DEFINE_MUTEX(trace_eval_mutex);
144 
145 /*
146  * The trace_eval_maps are saved in an array with two extra elements,
147  * one at the beginning, and one at the end. The beginning item contains
148  * the count of the saved maps (head.length), and the module they
149  * belong to if not built in (head.mod). The ending item contains a
150  * pointer to the next array of saved eval_map items.
151  */
152 union trace_eval_map_item {
153 	struct trace_eval_map		map;
154 	struct trace_eval_map_head	head;
155 	struct trace_eval_map_tail	tail;
156 };
157 
158 static union trace_eval_map_item *trace_eval_maps;
159 #endif /* CONFIG_TRACE_EVAL_MAP_FILE */
160 
161 static int tracing_set_tracer(struct trace_array *tr, const char *buf);
162 
163 #define MAX_TRACER_SIZE		100
164 static char bootup_tracer_buf[MAX_TRACER_SIZE] __initdata;
165 static char *default_bootup_tracer;
166 
167 static bool allocate_snapshot;
168 
169 static int __init set_cmdline_ftrace(char *str)
170 {
171 	strlcpy(bootup_tracer_buf, str, MAX_TRACER_SIZE);
172 	default_bootup_tracer = bootup_tracer_buf;
173 	/* We are using ftrace early, expand it */
174 	ring_buffer_expanded = true;
175 	return 1;
176 }
177 __setup("ftrace=", set_cmdline_ftrace);
178 
179 static int __init set_ftrace_dump_on_oops(char *str)
180 {
181 	if (*str++ != '=' || !*str) {
182 		ftrace_dump_on_oops = DUMP_ALL;
183 		return 1;
184 	}
185 
186 	if (!strcmp("orig_cpu", str)) {
187 		ftrace_dump_on_oops = DUMP_ORIG;
188                 return 1;
189         }
190 
191         return 0;
192 }
193 __setup("ftrace_dump_on_oops", set_ftrace_dump_on_oops);
194 
195 static int __init stop_trace_on_warning(char *str)
196 {
197 	if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
198 		__disable_trace_on_warning = 1;
199 	return 1;
200 }
201 __setup("traceoff_on_warning", stop_trace_on_warning);
202 
203 static int __init boot_alloc_snapshot(char *str)
204 {
205 	allocate_snapshot = true;
206 	/* We also need the main ring buffer expanded */
207 	ring_buffer_expanded = true;
208 	return 1;
209 }
210 __setup("alloc_snapshot", boot_alloc_snapshot);
211 
212 
213 static char trace_boot_options_buf[MAX_TRACER_SIZE] __initdata;
214 
215 static int __init set_trace_boot_options(char *str)
216 {
217 	strlcpy(trace_boot_options_buf, str, MAX_TRACER_SIZE);
218 	return 0;
219 }
220 __setup("trace_options=", set_trace_boot_options);
221 
222 static char trace_boot_clock_buf[MAX_TRACER_SIZE] __initdata;
223 static char *trace_boot_clock __initdata;
224 
225 static int __init set_trace_boot_clock(char *str)
226 {
227 	strlcpy(trace_boot_clock_buf, str, MAX_TRACER_SIZE);
228 	trace_boot_clock = trace_boot_clock_buf;
229 	return 0;
230 }
231 __setup("trace_clock=", set_trace_boot_clock);
232 
233 static int __init set_tracepoint_printk(char *str)
234 {
235 	if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
236 		tracepoint_printk = 1;
237 	return 1;
238 }
239 __setup("tp_printk", set_tracepoint_printk);
240 
241 unsigned long long ns2usecs(u64 nsec)
242 {
243 	nsec += 500;
244 	do_div(nsec, 1000);
245 	return nsec;
246 }
247 
248 /* trace_flags holds trace_options default values */
249 #define TRACE_DEFAULT_FLAGS						\
250 	(FUNCTION_DEFAULT_FLAGS |					\
251 	 TRACE_ITER_PRINT_PARENT | TRACE_ITER_PRINTK |			\
252 	 TRACE_ITER_ANNOTATE | TRACE_ITER_CONTEXT_INFO |		\
253 	 TRACE_ITER_RECORD_CMD | TRACE_ITER_OVERWRITE |			\
254 	 TRACE_ITER_IRQ_INFO | TRACE_ITER_MARKERS)
255 
256 /* trace_options that are only supported by global_trace */
257 #define TOP_LEVEL_TRACE_FLAGS (TRACE_ITER_PRINTK |			\
258 	       TRACE_ITER_PRINTK_MSGONLY | TRACE_ITER_RECORD_CMD)
259 
260 /* trace_flags that are default zero for instances */
261 #define ZEROED_TRACE_FLAGS \
262 	(TRACE_ITER_EVENT_FORK | TRACE_ITER_FUNC_FORK)
263 
264 /*
265  * The global_trace is the descriptor that holds the top-level tracing
266  * buffers for the live tracing.
267  */
268 static struct trace_array global_trace = {
269 	.trace_flags = TRACE_DEFAULT_FLAGS,
270 };
271 
272 LIST_HEAD(ftrace_trace_arrays);
273 
274 int trace_array_get(struct trace_array *this_tr)
275 {
276 	struct trace_array *tr;
277 	int ret = -ENODEV;
278 
279 	mutex_lock(&trace_types_lock);
280 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
281 		if (tr == this_tr) {
282 			tr->ref++;
283 			ret = 0;
284 			break;
285 		}
286 	}
287 	mutex_unlock(&trace_types_lock);
288 
289 	return ret;
290 }
291 
292 static void __trace_array_put(struct trace_array *this_tr)
293 {
294 	WARN_ON(!this_tr->ref);
295 	this_tr->ref--;
296 }
297 
298 void trace_array_put(struct trace_array *this_tr)
299 {
300 	mutex_lock(&trace_types_lock);
301 	__trace_array_put(this_tr);
302 	mutex_unlock(&trace_types_lock);
303 }
304 
305 int call_filter_check_discard(struct trace_event_call *call, void *rec,
306 			      struct ring_buffer *buffer,
307 			      struct ring_buffer_event *event)
308 {
309 	if (unlikely(call->flags & TRACE_EVENT_FL_FILTERED) &&
310 	    !filter_match_preds(call->filter, rec)) {
311 		__trace_event_discard_commit(buffer, event);
312 		return 1;
313 	}
314 
315 	return 0;
316 }
317 
318 void trace_free_pid_list(struct trace_pid_list *pid_list)
319 {
320 	vfree(pid_list->pids);
321 	kfree(pid_list);
322 }
323 
324 /**
325  * trace_find_filtered_pid - check if a pid exists in a filtered_pid list
326  * @filtered_pids: The list of pids to check
327  * @search_pid: The PID to find in @filtered_pids
328  *
329  * Returns true if @search_pid is fonud in @filtered_pids, and false otherwis.
330  */
331 bool
332 trace_find_filtered_pid(struct trace_pid_list *filtered_pids, pid_t search_pid)
333 {
334 	/*
335 	 * If pid_max changed after filtered_pids was created, we
336 	 * by default ignore all pids greater than the previous pid_max.
337 	 */
338 	if (search_pid >= filtered_pids->pid_max)
339 		return false;
340 
341 	return test_bit(search_pid, filtered_pids->pids);
342 }
343 
344 /**
345  * trace_ignore_this_task - should a task be ignored for tracing
346  * @filtered_pids: The list of pids to check
347  * @task: The task that should be ignored if not filtered
348  *
349  * Checks if @task should be traced or not from @filtered_pids.
350  * Returns true if @task should *NOT* be traced.
351  * Returns false if @task should be traced.
352  */
353 bool
354 trace_ignore_this_task(struct trace_pid_list *filtered_pids, struct task_struct *task)
355 {
356 	/*
357 	 * Return false, because if filtered_pids does not exist,
358 	 * all pids are good to trace.
359 	 */
360 	if (!filtered_pids)
361 		return false;
362 
363 	return !trace_find_filtered_pid(filtered_pids, task->pid);
364 }
365 
366 /**
367  * trace_pid_filter_add_remove_task - Add or remove a task from a pid_list
368  * @pid_list: The list to modify
369  * @self: The current task for fork or NULL for exit
370  * @task: The task to add or remove
371  *
372  * If adding a task, if @self is defined, the task is only added if @self
373  * is also included in @pid_list. This happens on fork and tasks should
374  * only be added when the parent is listed. If @self is NULL, then the
375  * @task pid will be removed from the list, which would happen on exit
376  * of a task.
377  */
378 void trace_filter_add_remove_task(struct trace_pid_list *pid_list,
379 				  struct task_struct *self,
380 				  struct task_struct *task)
381 {
382 	if (!pid_list)
383 		return;
384 
385 	/* For forks, we only add if the forking task is listed */
386 	if (self) {
387 		if (!trace_find_filtered_pid(pid_list, self->pid))
388 			return;
389 	}
390 
391 	/* Sorry, but we don't support pid_max changing after setting */
392 	if (task->pid >= pid_list->pid_max)
393 		return;
394 
395 	/* "self" is set for forks, and NULL for exits */
396 	if (self)
397 		set_bit(task->pid, pid_list->pids);
398 	else
399 		clear_bit(task->pid, pid_list->pids);
400 }
401 
402 /**
403  * trace_pid_next - Used for seq_file to get to the next pid of a pid_list
404  * @pid_list: The pid list to show
405  * @v: The last pid that was shown (+1 the actual pid to let zero be displayed)
406  * @pos: The position of the file
407  *
408  * This is used by the seq_file "next" operation to iterate the pids
409  * listed in a trace_pid_list structure.
410  *
411  * Returns the pid+1 as we want to display pid of zero, but NULL would
412  * stop the iteration.
413  */
414 void *trace_pid_next(struct trace_pid_list *pid_list, void *v, loff_t *pos)
415 {
416 	unsigned long pid = (unsigned long)v;
417 
418 	(*pos)++;
419 
420 	/* pid already is +1 of the actual prevous bit */
421 	pid = find_next_bit(pid_list->pids, pid_list->pid_max, pid);
422 
423 	/* Return pid + 1 to allow zero to be represented */
424 	if (pid < pid_list->pid_max)
425 		return (void *)(pid + 1);
426 
427 	return NULL;
428 }
429 
430 /**
431  * trace_pid_start - Used for seq_file to start reading pid lists
432  * @pid_list: The pid list to show
433  * @pos: The position of the file
434  *
435  * This is used by seq_file "start" operation to start the iteration
436  * of listing pids.
437  *
438  * Returns the pid+1 as we want to display pid of zero, but NULL would
439  * stop the iteration.
440  */
441 void *trace_pid_start(struct trace_pid_list *pid_list, loff_t *pos)
442 {
443 	unsigned long pid;
444 	loff_t l = 0;
445 
446 	pid = find_first_bit(pid_list->pids, pid_list->pid_max);
447 	if (pid >= pid_list->pid_max)
448 		return NULL;
449 
450 	/* Return pid + 1 so that zero can be the exit value */
451 	for (pid++; pid && l < *pos;
452 	     pid = (unsigned long)trace_pid_next(pid_list, (void *)pid, &l))
453 		;
454 	return (void *)pid;
455 }
456 
457 /**
458  * trace_pid_show - show the current pid in seq_file processing
459  * @m: The seq_file structure to write into
460  * @v: A void pointer of the pid (+1) value to display
461  *
462  * Can be directly used by seq_file operations to display the current
463  * pid value.
464  */
465 int trace_pid_show(struct seq_file *m, void *v)
466 {
467 	unsigned long pid = (unsigned long)v - 1;
468 
469 	seq_printf(m, "%lu\n", pid);
470 	return 0;
471 }
472 
473 /* 128 should be much more than enough */
474 #define PID_BUF_SIZE		127
475 
476 int trace_pid_write(struct trace_pid_list *filtered_pids,
477 		    struct trace_pid_list **new_pid_list,
478 		    const char __user *ubuf, size_t cnt)
479 {
480 	struct trace_pid_list *pid_list;
481 	struct trace_parser parser;
482 	unsigned long val;
483 	int nr_pids = 0;
484 	ssize_t read = 0;
485 	ssize_t ret = 0;
486 	loff_t pos;
487 	pid_t pid;
488 
489 	if (trace_parser_get_init(&parser, PID_BUF_SIZE + 1))
490 		return -ENOMEM;
491 
492 	/*
493 	 * Always recreate a new array. The write is an all or nothing
494 	 * operation. Always create a new array when adding new pids by
495 	 * the user. If the operation fails, then the current list is
496 	 * not modified.
497 	 */
498 	pid_list = kmalloc(sizeof(*pid_list), GFP_KERNEL);
499 	if (!pid_list)
500 		return -ENOMEM;
501 
502 	pid_list->pid_max = READ_ONCE(pid_max);
503 
504 	/* Only truncating will shrink pid_max */
505 	if (filtered_pids && filtered_pids->pid_max > pid_list->pid_max)
506 		pid_list->pid_max = filtered_pids->pid_max;
507 
508 	pid_list->pids = vzalloc((pid_list->pid_max + 7) >> 3);
509 	if (!pid_list->pids) {
510 		kfree(pid_list);
511 		return -ENOMEM;
512 	}
513 
514 	if (filtered_pids) {
515 		/* copy the current bits to the new max */
516 		for_each_set_bit(pid, filtered_pids->pids,
517 				 filtered_pids->pid_max) {
518 			set_bit(pid, pid_list->pids);
519 			nr_pids++;
520 		}
521 	}
522 
523 	while (cnt > 0) {
524 
525 		pos = 0;
526 
527 		ret = trace_get_user(&parser, ubuf, cnt, &pos);
528 		if (ret < 0 || !trace_parser_loaded(&parser))
529 			break;
530 
531 		read += ret;
532 		ubuf += ret;
533 		cnt -= ret;
534 
535 		ret = -EINVAL;
536 		if (kstrtoul(parser.buffer, 0, &val))
537 			break;
538 		if (val >= pid_list->pid_max)
539 			break;
540 
541 		pid = (pid_t)val;
542 
543 		set_bit(pid, pid_list->pids);
544 		nr_pids++;
545 
546 		trace_parser_clear(&parser);
547 		ret = 0;
548 	}
549 	trace_parser_put(&parser);
550 
551 	if (ret < 0) {
552 		trace_free_pid_list(pid_list);
553 		return ret;
554 	}
555 
556 	if (!nr_pids) {
557 		/* Cleared the list of pids */
558 		trace_free_pid_list(pid_list);
559 		read = ret;
560 		pid_list = NULL;
561 	}
562 
563 	*new_pid_list = pid_list;
564 
565 	return read;
566 }
567 
568 static u64 buffer_ftrace_now(struct trace_buffer *buf, int cpu)
569 {
570 	u64 ts;
571 
572 	/* Early boot up does not have a buffer yet */
573 	if (!buf->buffer)
574 		return trace_clock_local();
575 
576 	ts = ring_buffer_time_stamp(buf->buffer, cpu);
577 	ring_buffer_normalize_time_stamp(buf->buffer, cpu, &ts);
578 
579 	return ts;
580 }
581 
582 u64 ftrace_now(int cpu)
583 {
584 	return buffer_ftrace_now(&global_trace.trace_buffer, cpu);
585 }
586 
587 /**
588  * tracing_is_enabled - Show if global_trace has been disabled
589  *
590  * Shows if the global trace has been enabled or not. It uses the
591  * mirror flag "buffer_disabled" to be used in fast paths such as for
592  * the irqsoff tracer. But it may be inaccurate due to races. If you
593  * need to know the accurate state, use tracing_is_on() which is a little
594  * slower, but accurate.
595  */
596 int tracing_is_enabled(void)
597 {
598 	/*
599 	 * For quick access (irqsoff uses this in fast path), just
600 	 * return the mirror variable of the state of the ring buffer.
601 	 * It's a little racy, but we don't really care.
602 	 */
603 	smp_rmb();
604 	return !global_trace.buffer_disabled;
605 }
606 
607 /*
608  * trace_buf_size is the size in bytes that is allocated
609  * for a buffer. Note, the number of bytes is always rounded
610  * to page size.
611  *
612  * This number is purposely set to a low number of 16384.
613  * If the dump on oops happens, it will be much appreciated
614  * to not have to wait for all that output. Anyway this can be
615  * boot time and run time configurable.
616  */
617 #define TRACE_BUF_SIZE_DEFAULT	1441792UL /* 16384 * 88 (sizeof(entry)) */
618 
619 static unsigned long		trace_buf_size = TRACE_BUF_SIZE_DEFAULT;
620 
621 /* trace_types holds a link list of available tracers. */
622 static struct tracer		*trace_types __read_mostly;
623 
624 /*
625  * trace_types_lock is used to protect the trace_types list.
626  */
627 DEFINE_MUTEX(trace_types_lock);
628 
629 /*
630  * serialize the access of the ring buffer
631  *
632  * ring buffer serializes readers, but it is low level protection.
633  * The validity of the events (which returns by ring_buffer_peek() ..etc)
634  * are not protected by ring buffer.
635  *
636  * The content of events may become garbage if we allow other process consumes
637  * these events concurrently:
638  *   A) the page of the consumed events may become a normal page
639  *      (not reader page) in ring buffer, and this page will be rewrited
640  *      by events producer.
641  *   B) The page of the consumed events may become a page for splice_read,
642  *      and this page will be returned to system.
643  *
644  * These primitives allow multi process access to different cpu ring buffer
645  * concurrently.
646  *
647  * These primitives don't distinguish read-only and read-consume access.
648  * Multi read-only access are also serialized.
649  */
650 
651 #ifdef CONFIG_SMP
652 static DECLARE_RWSEM(all_cpu_access_lock);
653 static DEFINE_PER_CPU(struct mutex, cpu_access_lock);
654 
655 static inline void trace_access_lock(int cpu)
656 {
657 	if (cpu == RING_BUFFER_ALL_CPUS) {
658 		/* gain it for accessing the whole ring buffer. */
659 		down_write(&all_cpu_access_lock);
660 	} else {
661 		/* gain it for accessing a cpu ring buffer. */
662 
663 		/* Firstly block other trace_access_lock(RING_BUFFER_ALL_CPUS). */
664 		down_read(&all_cpu_access_lock);
665 
666 		/* Secondly block other access to this @cpu ring buffer. */
667 		mutex_lock(&per_cpu(cpu_access_lock, cpu));
668 	}
669 }
670 
671 static inline void trace_access_unlock(int cpu)
672 {
673 	if (cpu == RING_BUFFER_ALL_CPUS) {
674 		up_write(&all_cpu_access_lock);
675 	} else {
676 		mutex_unlock(&per_cpu(cpu_access_lock, cpu));
677 		up_read(&all_cpu_access_lock);
678 	}
679 }
680 
681 static inline void trace_access_lock_init(void)
682 {
683 	int cpu;
684 
685 	for_each_possible_cpu(cpu)
686 		mutex_init(&per_cpu(cpu_access_lock, cpu));
687 }
688 
689 #else
690 
691 static DEFINE_MUTEX(access_lock);
692 
693 static inline void trace_access_lock(int cpu)
694 {
695 	(void)cpu;
696 	mutex_lock(&access_lock);
697 }
698 
699 static inline void trace_access_unlock(int cpu)
700 {
701 	(void)cpu;
702 	mutex_unlock(&access_lock);
703 }
704 
705 static inline void trace_access_lock_init(void)
706 {
707 }
708 
709 #endif
710 
711 #ifdef CONFIG_STACKTRACE
712 static void __ftrace_trace_stack(struct ring_buffer *buffer,
713 				 unsigned long flags,
714 				 int skip, int pc, struct pt_regs *regs);
715 static inline void ftrace_trace_stack(struct trace_array *tr,
716 				      struct ring_buffer *buffer,
717 				      unsigned long flags,
718 				      int skip, int pc, struct pt_regs *regs);
719 
720 #else
721 static inline void __ftrace_trace_stack(struct ring_buffer *buffer,
722 					unsigned long flags,
723 					int skip, int pc, struct pt_regs *regs)
724 {
725 }
726 static inline void ftrace_trace_stack(struct trace_array *tr,
727 				      struct ring_buffer *buffer,
728 				      unsigned long flags,
729 				      int skip, int pc, struct pt_regs *regs)
730 {
731 }
732 
733 #endif
734 
735 static __always_inline void
736 trace_event_setup(struct ring_buffer_event *event,
737 		  int type, unsigned long flags, int pc)
738 {
739 	struct trace_entry *ent = ring_buffer_event_data(event);
740 
741 	tracing_generic_entry_update(ent, flags, pc);
742 	ent->type = type;
743 }
744 
745 static __always_inline struct ring_buffer_event *
746 __trace_buffer_lock_reserve(struct ring_buffer *buffer,
747 			  int type,
748 			  unsigned long len,
749 			  unsigned long flags, int pc)
750 {
751 	struct ring_buffer_event *event;
752 
753 	event = ring_buffer_lock_reserve(buffer, len);
754 	if (event != NULL)
755 		trace_event_setup(event, type, flags, pc);
756 
757 	return event;
758 }
759 
760 void tracer_tracing_on(struct trace_array *tr)
761 {
762 	if (tr->trace_buffer.buffer)
763 		ring_buffer_record_on(tr->trace_buffer.buffer);
764 	/*
765 	 * This flag is looked at when buffers haven't been allocated
766 	 * yet, or by some tracers (like irqsoff), that just want to
767 	 * know if the ring buffer has been disabled, but it can handle
768 	 * races of where it gets disabled but we still do a record.
769 	 * As the check is in the fast path of the tracers, it is more
770 	 * important to be fast than accurate.
771 	 */
772 	tr->buffer_disabled = 0;
773 	/* Make the flag seen by readers */
774 	smp_wmb();
775 }
776 
777 /**
778  * tracing_on - enable tracing buffers
779  *
780  * This function enables tracing buffers that may have been
781  * disabled with tracing_off.
782  */
783 void tracing_on(void)
784 {
785 	tracer_tracing_on(&global_trace);
786 }
787 EXPORT_SYMBOL_GPL(tracing_on);
788 
789 
790 static __always_inline void
791 __buffer_unlock_commit(struct ring_buffer *buffer, struct ring_buffer_event *event)
792 {
793 	__this_cpu_write(trace_taskinfo_save, true);
794 
795 	/* If this is the temp buffer, we need to commit fully */
796 	if (this_cpu_read(trace_buffered_event) == event) {
797 		/* Length is in event->array[0] */
798 		ring_buffer_write(buffer, event->array[0], &event->array[1]);
799 		/* Release the temp buffer */
800 		this_cpu_dec(trace_buffered_event_cnt);
801 	} else
802 		ring_buffer_unlock_commit(buffer, event);
803 }
804 
805 /**
806  * __trace_puts - write a constant string into the trace buffer.
807  * @ip:	   The address of the caller
808  * @str:   The constant string to write
809  * @size:  The size of the string.
810  */
811 int __trace_puts(unsigned long ip, const char *str, int size)
812 {
813 	struct ring_buffer_event *event;
814 	struct ring_buffer *buffer;
815 	struct print_entry *entry;
816 	unsigned long irq_flags;
817 	int alloc;
818 	int pc;
819 
820 	if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
821 		return 0;
822 
823 	pc = preempt_count();
824 
825 	if (unlikely(tracing_selftest_running || tracing_disabled))
826 		return 0;
827 
828 	alloc = sizeof(*entry) + size + 2; /* possible \n added */
829 
830 	local_save_flags(irq_flags);
831 	buffer = global_trace.trace_buffer.buffer;
832 	event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, alloc,
833 					    irq_flags, pc);
834 	if (!event)
835 		return 0;
836 
837 	entry = ring_buffer_event_data(event);
838 	entry->ip = ip;
839 
840 	memcpy(&entry->buf, str, size);
841 
842 	/* Add a newline if necessary */
843 	if (entry->buf[size - 1] != '\n') {
844 		entry->buf[size] = '\n';
845 		entry->buf[size + 1] = '\0';
846 	} else
847 		entry->buf[size] = '\0';
848 
849 	__buffer_unlock_commit(buffer, event);
850 	ftrace_trace_stack(&global_trace, buffer, irq_flags, 4, pc, NULL);
851 
852 	return size;
853 }
854 EXPORT_SYMBOL_GPL(__trace_puts);
855 
856 /**
857  * __trace_bputs - write the pointer to a constant string into trace buffer
858  * @ip:	   The address of the caller
859  * @str:   The constant string to write to the buffer to
860  */
861 int __trace_bputs(unsigned long ip, const char *str)
862 {
863 	struct ring_buffer_event *event;
864 	struct ring_buffer *buffer;
865 	struct bputs_entry *entry;
866 	unsigned long irq_flags;
867 	int size = sizeof(struct bputs_entry);
868 	int pc;
869 
870 	if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
871 		return 0;
872 
873 	pc = preempt_count();
874 
875 	if (unlikely(tracing_selftest_running || tracing_disabled))
876 		return 0;
877 
878 	local_save_flags(irq_flags);
879 	buffer = global_trace.trace_buffer.buffer;
880 	event = __trace_buffer_lock_reserve(buffer, TRACE_BPUTS, size,
881 					    irq_flags, pc);
882 	if (!event)
883 		return 0;
884 
885 	entry = ring_buffer_event_data(event);
886 	entry->ip			= ip;
887 	entry->str			= str;
888 
889 	__buffer_unlock_commit(buffer, event);
890 	ftrace_trace_stack(&global_trace, buffer, irq_flags, 4, pc, NULL);
891 
892 	return 1;
893 }
894 EXPORT_SYMBOL_GPL(__trace_bputs);
895 
896 #ifdef CONFIG_TRACER_SNAPSHOT
897 void tracing_snapshot_instance(struct trace_array *tr)
898 {
899 	struct tracer *tracer = tr->current_trace;
900 	unsigned long flags;
901 
902 	if (in_nmi()) {
903 		internal_trace_puts("*** SNAPSHOT CALLED FROM NMI CONTEXT ***\n");
904 		internal_trace_puts("*** snapshot is being ignored        ***\n");
905 		return;
906 	}
907 
908 	if (!tr->allocated_snapshot) {
909 		internal_trace_puts("*** SNAPSHOT NOT ALLOCATED ***\n");
910 		internal_trace_puts("*** stopping trace here!   ***\n");
911 		tracing_off();
912 		return;
913 	}
914 
915 	/* Note, snapshot can not be used when the tracer uses it */
916 	if (tracer->use_max_tr) {
917 		internal_trace_puts("*** LATENCY TRACER ACTIVE ***\n");
918 		internal_trace_puts("*** Can not use snapshot (sorry) ***\n");
919 		return;
920 	}
921 
922 	local_irq_save(flags);
923 	update_max_tr(tr, current, smp_processor_id());
924 	local_irq_restore(flags);
925 }
926 
927 /**
928  * tracing_snapshot - take a snapshot of the current buffer.
929  *
930  * This causes a swap between the snapshot buffer and the current live
931  * tracing buffer. You can use this to take snapshots of the live
932  * trace when some condition is triggered, but continue to trace.
933  *
934  * Note, make sure to allocate the snapshot with either
935  * a tracing_snapshot_alloc(), or by doing it manually
936  * with: echo 1 > /sys/kernel/debug/tracing/snapshot
937  *
938  * If the snapshot buffer is not allocated, it will stop tracing.
939  * Basically making a permanent snapshot.
940  */
941 void tracing_snapshot(void)
942 {
943 	struct trace_array *tr = &global_trace;
944 
945 	tracing_snapshot_instance(tr);
946 }
947 EXPORT_SYMBOL_GPL(tracing_snapshot);
948 
949 static int resize_buffer_duplicate_size(struct trace_buffer *trace_buf,
950 					struct trace_buffer *size_buf, int cpu_id);
951 static void set_buffer_entries(struct trace_buffer *buf, unsigned long val);
952 
953 int tracing_alloc_snapshot_instance(struct trace_array *tr)
954 {
955 	int ret;
956 
957 	if (!tr->allocated_snapshot) {
958 
959 		/* allocate spare buffer */
960 		ret = resize_buffer_duplicate_size(&tr->max_buffer,
961 				   &tr->trace_buffer, RING_BUFFER_ALL_CPUS);
962 		if (ret < 0)
963 			return ret;
964 
965 		tr->allocated_snapshot = true;
966 	}
967 
968 	return 0;
969 }
970 
971 static void free_snapshot(struct trace_array *tr)
972 {
973 	/*
974 	 * We don't free the ring buffer. instead, resize it because
975 	 * The max_tr ring buffer has some state (e.g. ring->clock) and
976 	 * we want preserve it.
977 	 */
978 	ring_buffer_resize(tr->max_buffer.buffer, 1, RING_BUFFER_ALL_CPUS);
979 	set_buffer_entries(&tr->max_buffer, 1);
980 	tracing_reset_online_cpus(&tr->max_buffer);
981 	tr->allocated_snapshot = false;
982 }
983 
984 /**
985  * tracing_alloc_snapshot - allocate snapshot buffer.
986  *
987  * This only allocates the snapshot buffer if it isn't already
988  * allocated - it doesn't also take a snapshot.
989  *
990  * This is meant to be used in cases where the snapshot buffer needs
991  * to be set up for events that can't sleep but need to be able to
992  * trigger a snapshot.
993  */
994 int tracing_alloc_snapshot(void)
995 {
996 	struct trace_array *tr = &global_trace;
997 	int ret;
998 
999 	ret = tracing_alloc_snapshot_instance(tr);
1000 	WARN_ON(ret < 0);
1001 
1002 	return ret;
1003 }
1004 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
1005 
1006 /**
1007  * tracing_snapshot_alloc - allocate and take a snapshot of the current buffer.
1008  *
1009  * This is similar to tracing_snapshot(), but it will allocate the
1010  * snapshot buffer if it isn't already allocated. Use this only
1011  * where it is safe to sleep, as the allocation may sleep.
1012  *
1013  * This causes a swap between the snapshot buffer and the current live
1014  * tracing buffer. You can use this to take snapshots of the live
1015  * trace when some condition is triggered, but continue to trace.
1016  */
1017 void tracing_snapshot_alloc(void)
1018 {
1019 	int ret;
1020 
1021 	ret = tracing_alloc_snapshot();
1022 	if (ret < 0)
1023 		return;
1024 
1025 	tracing_snapshot();
1026 }
1027 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
1028 #else
1029 void tracing_snapshot(void)
1030 {
1031 	WARN_ONCE(1, "Snapshot feature not enabled, but internal snapshot used");
1032 }
1033 EXPORT_SYMBOL_GPL(tracing_snapshot);
1034 int tracing_alloc_snapshot(void)
1035 {
1036 	WARN_ONCE(1, "Snapshot feature not enabled, but snapshot allocation used");
1037 	return -ENODEV;
1038 }
1039 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
1040 void tracing_snapshot_alloc(void)
1041 {
1042 	/* Give warning */
1043 	tracing_snapshot();
1044 }
1045 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
1046 #endif /* CONFIG_TRACER_SNAPSHOT */
1047 
1048 void tracer_tracing_off(struct trace_array *tr)
1049 {
1050 	if (tr->trace_buffer.buffer)
1051 		ring_buffer_record_off(tr->trace_buffer.buffer);
1052 	/*
1053 	 * This flag is looked at when buffers haven't been allocated
1054 	 * yet, or by some tracers (like irqsoff), that just want to
1055 	 * know if the ring buffer has been disabled, but it can handle
1056 	 * races of where it gets disabled but we still do a record.
1057 	 * As the check is in the fast path of the tracers, it is more
1058 	 * important to be fast than accurate.
1059 	 */
1060 	tr->buffer_disabled = 1;
1061 	/* Make the flag seen by readers */
1062 	smp_wmb();
1063 }
1064 
1065 /**
1066  * tracing_off - turn off tracing buffers
1067  *
1068  * This function stops the tracing buffers from recording data.
1069  * It does not disable any overhead the tracers themselves may
1070  * be causing. This function simply causes all recording to
1071  * the ring buffers to fail.
1072  */
1073 void tracing_off(void)
1074 {
1075 	tracer_tracing_off(&global_trace);
1076 }
1077 EXPORT_SYMBOL_GPL(tracing_off);
1078 
1079 void disable_trace_on_warning(void)
1080 {
1081 	if (__disable_trace_on_warning)
1082 		tracing_off();
1083 }
1084 
1085 /**
1086  * tracer_tracing_is_on - show real state of ring buffer enabled
1087  * @tr : the trace array to know if ring buffer is enabled
1088  *
1089  * Shows real state of the ring buffer if it is enabled or not.
1090  */
1091 bool tracer_tracing_is_on(struct trace_array *tr)
1092 {
1093 	if (tr->trace_buffer.buffer)
1094 		return ring_buffer_record_is_on(tr->trace_buffer.buffer);
1095 	return !tr->buffer_disabled;
1096 }
1097 
1098 /**
1099  * tracing_is_on - show state of ring buffers enabled
1100  */
1101 int tracing_is_on(void)
1102 {
1103 	return tracer_tracing_is_on(&global_trace);
1104 }
1105 EXPORT_SYMBOL_GPL(tracing_is_on);
1106 
1107 static int __init set_buf_size(char *str)
1108 {
1109 	unsigned long buf_size;
1110 
1111 	if (!str)
1112 		return 0;
1113 	buf_size = memparse(str, &str);
1114 	/* nr_entries can not be zero */
1115 	if (buf_size == 0)
1116 		return 0;
1117 	trace_buf_size = buf_size;
1118 	return 1;
1119 }
1120 __setup("trace_buf_size=", set_buf_size);
1121 
1122 static int __init set_tracing_thresh(char *str)
1123 {
1124 	unsigned long threshold;
1125 	int ret;
1126 
1127 	if (!str)
1128 		return 0;
1129 	ret = kstrtoul(str, 0, &threshold);
1130 	if (ret < 0)
1131 		return 0;
1132 	tracing_thresh = threshold * 1000;
1133 	return 1;
1134 }
1135 __setup("tracing_thresh=", set_tracing_thresh);
1136 
1137 unsigned long nsecs_to_usecs(unsigned long nsecs)
1138 {
1139 	return nsecs / 1000;
1140 }
1141 
1142 /*
1143  * TRACE_FLAGS is defined as a tuple matching bit masks with strings.
1144  * It uses C(a, b) where 'a' is the eval (enum) name and 'b' is the string that
1145  * matches it. By defining "C(a, b) b", TRACE_FLAGS becomes a list
1146  * of strings in the order that the evals (enum) were defined.
1147  */
1148 #undef C
1149 #define C(a, b) b
1150 
1151 /* These must match the bit postions in trace_iterator_flags */
1152 static const char *trace_options[] = {
1153 	TRACE_FLAGS
1154 	NULL
1155 };
1156 
1157 static struct {
1158 	u64 (*func)(void);
1159 	const char *name;
1160 	int in_ns;		/* is this clock in nanoseconds? */
1161 } trace_clocks[] = {
1162 	{ trace_clock_local,		"local",	1 },
1163 	{ trace_clock_global,		"global",	1 },
1164 	{ trace_clock_counter,		"counter",	0 },
1165 	{ trace_clock_jiffies,		"uptime",	0 },
1166 	{ trace_clock,			"perf",		1 },
1167 	{ ktime_get_mono_fast_ns,	"mono",		1 },
1168 	{ ktime_get_raw_fast_ns,	"mono_raw",	1 },
1169 	{ ktime_get_boot_fast_ns,	"boot",		1 },
1170 	ARCH_TRACE_CLOCKS
1171 };
1172 
1173 bool trace_clock_in_ns(struct trace_array *tr)
1174 {
1175 	if (trace_clocks[tr->clock_id].in_ns)
1176 		return true;
1177 
1178 	return false;
1179 }
1180 
1181 /*
1182  * trace_parser_get_init - gets the buffer for trace parser
1183  */
1184 int trace_parser_get_init(struct trace_parser *parser, int size)
1185 {
1186 	memset(parser, 0, sizeof(*parser));
1187 
1188 	parser->buffer = kmalloc(size, GFP_KERNEL);
1189 	if (!parser->buffer)
1190 		return 1;
1191 
1192 	parser->size = size;
1193 	return 0;
1194 }
1195 
1196 /*
1197  * trace_parser_put - frees the buffer for trace parser
1198  */
1199 void trace_parser_put(struct trace_parser *parser)
1200 {
1201 	kfree(parser->buffer);
1202 	parser->buffer = NULL;
1203 }
1204 
1205 /*
1206  * trace_get_user - reads the user input string separated by  space
1207  * (matched by isspace(ch))
1208  *
1209  * For each string found the 'struct trace_parser' is updated,
1210  * and the function returns.
1211  *
1212  * Returns number of bytes read.
1213  *
1214  * See kernel/trace/trace.h for 'struct trace_parser' details.
1215  */
1216 int trace_get_user(struct trace_parser *parser, const char __user *ubuf,
1217 	size_t cnt, loff_t *ppos)
1218 {
1219 	char ch;
1220 	size_t read = 0;
1221 	ssize_t ret;
1222 
1223 	if (!*ppos)
1224 		trace_parser_clear(parser);
1225 
1226 	ret = get_user(ch, ubuf++);
1227 	if (ret)
1228 		goto out;
1229 
1230 	read++;
1231 	cnt--;
1232 
1233 	/*
1234 	 * The parser is not finished with the last write,
1235 	 * continue reading the user input without skipping spaces.
1236 	 */
1237 	if (!parser->cont) {
1238 		/* skip white space */
1239 		while (cnt && isspace(ch)) {
1240 			ret = get_user(ch, ubuf++);
1241 			if (ret)
1242 				goto out;
1243 			read++;
1244 			cnt--;
1245 		}
1246 
1247 		parser->idx = 0;
1248 
1249 		/* only spaces were written */
1250 		if (isspace(ch) || !ch) {
1251 			*ppos += read;
1252 			ret = read;
1253 			goto out;
1254 		}
1255 	}
1256 
1257 	/* read the non-space input */
1258 	while (cnt && !isspace(ch) && ch) {
1259 		if (parser->idx < parser->size - 1)
1260 			parser->buffer[parser->idx++] = ch;
1261 		else {
1262 			ret = -EINVAL;
1263 			goto out;
1264 		}
1265 		ret = get_user(ch, ubuf++);
1266 		if (ret)
1267 			goto out;
1268 		read++;
1269 		cnt--;
1270 	}
1271 
1272 	/* We either got finished input or we have to wait for another call. */
1273 	if (isspace(ch) || !ch) {
1274 		parser->buffer[parser->idx] = 0;
1275 		parser->cont = false;
1276 	} else if (parser->idx < parser->size - 1) {
1277 		parser->cont = true;
1278 		parser->buffer[parser->idx++] = ch;
1279 		/* Make sure the parsed string always terminates with '\0'. */
1280 		parser->buffer[parser->idx] = 0;
1281 	} else {
1282 		ret = -EINVAL;
1283 		goto out;
1284 	}
1285 
1286 	*ppos += read;
1287 	ret = read;
1288 
1289 out:
1290 	return ret;
1291 }
1292 
1293 /* TODO add a seq_buf_to_buffer() */
1294 static ssize_t trace_seq_to_buffer(struct trace_seq *s, void *buf, size_t cnt)
1295 {
1296 	int len;
1297 
1298 	if (trace_seq_used(s) <= s->seq.readpos)
1299 		return -EBUSY;
1300 
1301 	len = trace_seq_used(s) - s->seq.readpos;
1302 	if (cnt > len)
1303 		cnt = len;
1304 	memcpy(buf, s->buffer + s->seq.readpos, cnt);
1305 
1306 	s->seq.readpos += cnt;
1307 	return cnt;
1308 }
1309 
1310 unsigned long __read_mostly	tracing_thresh;
1311 
1312 #ifdef CONFIG_TRACER_MAX_TRACE
1313 /*
1314  * Copy the new maximum trace into the separate maximum-trace
1315  * structure. (this way the maximum trace is permanently saved,
1316  * for later retrieval via /sys/kernel/tracing/tracing_max_latency)
1317  */
1318 static void
1319 __update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
1320 {
1321 	struct trace_buffer *trace_buf = &tr->trace_buffer;
1322 	struct trace_buffer *max_buf = &tr->max_buffer;
1323 	struct trace_array_cpu *data = per_cpu_ptr(trace_buf->data, cpu);
1324 	struct trace_array_cpu *max_data = per_cpu_ptr(max_buf->data, cpu);
1325 
1326 	max_buf->cpu = cpu;
1327 	max_buf->time_start = data->preempt_timestamp;
1328 
1329 	max_data->saved_latency = tr->max_latency;
1330 	max_data->critical_start = data->critical_start;
1331 	max_data->critical_end = data->critical_end;
1332 
1333 	memcpy(max_data->comm, tsk->comm, TASK_COMM_LEN);
1334 	max_data->pid = tsk->pid;
1335 	/*
1336 	 * If tsk == current, then use current_uid(), as that does not use
1337 	 * RCU. The irq tracer can be called out of RCU scope.
1338 	 */
1339 	if (tsk == current)
1340 		max_data->uid = current_uid();
1341 	else
1342 		max_data->uid = task_uid(tsk);
1343 
1344 	max_data->nice = tsk->static_prio - 20 - MAX_RT_PRIO;
1345 	max_data->policy = tsk->policy;
1346 	max_data->rt_priority = tsk->rt_priority;
1347 
1348 	/* record this tasks comm */
1349 	tracing_record_cmdline(tsk);
1350 }
1351 
1352 /**
1353  * update_max_tr - snapshot all trace buffers from global_trace to max_tr
1354  * @tr: tracer
1355  * @tsk: the task with the latency
1356  * @cpu: The cpu that initiated the trace.
1357  *
1358  * Flip the buffers between the @tr and the max_tr and record information
1359  * about which task was the cause of this latency.
1360  */
1361 void
1362 update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
1363 {
1364 	if (tr->stop_count)
1365 		return;
1366 
1367 	WARN_ON_ONCE(!irqs_disabled());
1368 
1369 	if (!tr->allocated_snapshot) {
1370 		/* Only the nop tracer should hit this when disabling */
1371 		WARN_ON_ONCE(tr->current_trace != &nop_trace);
1372 		return;
1373 	}
1374 
1375 	arch_spin_lock(&tr->max_lock);
1376 
1377 	/* Inherit the recordable setting from trace_buffer */
1378 	if (ring_buffer_record_is_set_on(tr->trace_buffer.buffer))
1379 		ring_buffer_record_on(tr->max_buffer.buffer);
1380 	else
1381 		ring_buffer_record_off(tr->max_buffer.buffer);
1382 
1383 	swap(tr->trace_buffer.buffer, tr->max_buffer.buffer);
1384 
1385 	__update_max_tr(tr, tsk, cpu);
1386 	arch_spin_unlock(&tr->max_lock);
1387 }
1388 
1389 /**
1390  * update_max_tr_single - only copy one trace over, and reset the rest
1391  * @tr - tracer
1392  * @tsk - task with the latency
1393  * @cpu - the cpu of the buffer to copy.
1394  *
1395  * Flip the trace of a single CPU buffer between the @tr and the max_tr.
1396  */
1397 void
1398 update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu)
1399 {
1400 	int ret;
1401 
1402 	if (tr->stop_count)
1403 		return;
1404 
1405 	WARN_ON_ONCE(!irqs_disabled());
1406 	if (!tr->allocated_snapshot) {
1407 		/* Only the nop tracer should hit this when disabling */
1408 		WARN_ON_ONCE(tr->current_trace != &nop_trace);
1409 		return;
1410 	}
1411 
1412 	arch_spin_lock(&tr->max_lock);
1413 
1414 	ret = ring_buffer_swap_cpu(tr->max_buffer.buffer, tr->trace_buffer.buffer, cpu);
1415 
1416 	if (ret == -EBUSY) {
1417 		/*
1418 		 * We failed to swap the buffer due to a commit taking
1419 		 * place on this CPU. We fail to record, but we reset
1420 		 * the max trace buffer (no one writes directly to it)
1421 		 * and flag that it failed.
1422 		 */
1423 		trace_array_printk_buf(tr->max_buffer.buffer, _THIS_IP_,
1424 			"Failed to swap buffers due to commit in progress\n");
1425 	}
1426 
1427 	WARN_ON_ONCE(ret && ret != -EAGAIN && ret != -EBUSY);
1428 
1429 	__update_max_tr(tr, tsk, cpu);
1430 	arch_spin_unlock(&tr->max_lock);
1431 }
1432 #endif /* CONFIG_TRACER_MAX_TRACE */
1433 
1434 static int wait_on_pipe(struct trace_iterator *iter, bool full)
1435 {
1436 	/* Iterators are static, they should be filled or empty */
1437 	if (trace_buffer_iter(iter, iter->cpu_file))
1438 		return 0;
1439 
1440 	return ring_buffer_wait(iter->trace_buffer->buffer, iter->cpu_file,
1441 				full);
1442 }
1443 
1444 #ifdef CONFIG_FTRACE_STARTUP_TEST
1445 static bool selftests_can_run;
1446 
1447 struct trace_selftests {
1448 	struct list_head		list;
1449 	struct tracer			*type;
1450 };
1451 
1452 static LIST_HEAD(postponed_selftests);
1453 
1454 static int save_selftest(struct tracer *type)
1455 {
1456 	struct trace_selftests *selftest;
1457 
1458 	selftest = kmalloc(sizeof(*selftest), GFP_KERNEL);
1459 	if (!selftest)
1460 		return -ENOMEM;
1461 
1462 	selftest->type = type;
1463 	list_add(&selftest->list, &postponed_selftests);
1464 	return 0;
1465 }
1466 
1467 static int run_tracer_selftest(struct tracer *type)
1468 {
1469 	struct trace_array *tr = &global_trace;
1470 	struct tracer *saved_tracer = tr->current_trace;
1471 	int ret;
1472 
1473 	if (!type->selftest || tracing_selftest_disabled)
1474 		return 0;
1475 
1476 	/*
1477 	 * If a tracer registers early in boot up (before scheduling is
1478 	 * initialized and such), then do not run its selftests yet.
1479 	 * Instead, run it a little later in the boot process.
1480 	 */
1481 	if (!selftests_can_run)
1482 		return save_selftest(type);
1483 
1484 	/*
1485 	 * Run a selftest on this tracer.
1486 	 * Here we reset the trace buffer, and set the current
1487 	 * tracer to be this tracer. The tracer can then run some
1488 	 * internal tracing to verify that everything is in order.
1489 	 * If we fail, we do not register this tracer.
1490 	 */
1491 	tracing_reset_online_cpus(&tr->trace_buffer);
1492 
1493 	tr->current_trace = type;
1494 
1495 #ifdef CONFIG_TRACER_MAX_TRACE
1496 	if (type->use_max_tr) {
1497 		/* If we expanded the buffers, make sure the max is expanded too */
1498 		if (ring_buffer_expanded)
1499 			ring_buffer_resize(tr->max_buffer.buffer, trace_buf_size,
1500 					   RING_BUFFER_ALL_CPUS);
1501 		tr->allocated_snapshot = true;
1502 	}
1503 #endif
1504 
1505 	/* the test is responsible for initializing and enabling */
1506 	pr_info("Testing tracer %s: ", type->name);
1507 	ret = type->selftest(type, tr);
1508 	/* the test is responsible for resetting too */
1509 	tr->current_trace = saved_tracer;
1510 	if (ret) {
1511 		printk(KERN_CONT "FAILED!\n");
1512 		/* Add the warning after printing 'FAILED' */
1513 		WARN_ON(1);
1514 		return -1;
1515 	}
1516 	/* Only reset on passing, to avoid touching corrupted buffers */
1517 	tracing_reset_online_cpus(&tr->trace_buffer);
1518 
1519 #ifdef CONFIG_TRACER_MAX_TRACE
1520 	if (type->use_max_tr) {
1521 		tr->allocated_snapshot = false;
1522 
1523 		/* Shrink the max buffer again */
1524 		if (ring_buffer_expanded)
1525 			ring_buffer_resize(tr->max_buffer.buffer, 1,
1526 					   RING_BUFFER_ALL_CPUS);
1527 	}
1528 #endif
1529 
1530 	printk(KERN_CONT "PASSED\n");
1531 	return 0;
1532 }
1533 
1534 static __init int init_trace_selftests(void)
1535 {
1536 	struct trace_selftests *p, *n;
1537 	struct tracer *t, **last;
1538 	int ret;
1539 
1540 	selftests_can_run = true;
1541 
1542 	mutex_lock(&trace_types_lock);
1543 
1544 	if (list_empty(&postponed_selftests))
1545 		goto out;
1546 
1547 	pr_info("Running postponed tracer tests:\n");
1548 
1549 	list_for_each_entry_safe(p, n, &postponed_selftests, list) {
1550 		ret = run_tracer_selftest(p->type);
1551 		/* If the test fails, then warn and remove from available_tracers */
1552 		if (ret < 0) {
1553 			WARN(1, "tracer: %s failed selftest, disabling\n",
1554 			     p->type->name);
1555 			last = &trace_types;
1556 			for (t = trace_types; t; t = t->next) {
1557 				if (t == p->type) {
1558 					*last = t->next;
1559 					break;
1560 				}
1561 				last = &t->next;
1562 			}
1563 		}
1564 		list_del(&p->list);
1565 		kfree(p);
1566 	}
1567 
1568  out:
1569 	mutex_unlock(&trace_types_lock);
1570 
1571 	return 0;
1572 }
1573 core_initcall(init_trace_selftests);
1574 #else
1575 static inline int run_tracer_selftest(struct tracer *type)
1576 {
1577 	return 0;
1578 }
1579 #endif /* CONFIG_FTRACE_STARTUP_TEST */
1580 
1581 static void add_tracer_options(struct trace_array *tr, struct tracer *t);
1582 
1583 static void __init apply_trace_boot_options(void);
1584 
1585 /**
1586  * register_tracer - register a tracer with the ftrace system.
1587  * @type - the plugin for the tracer
1588  *
1589  * Register a new plugin tracer.
1590  */
1591 int __init register_tracer(struct tracer *type)
1592 {
1593 	struct tracer *t;
1594 	int ret = 0;
1595 
1596 	if (!type->name) {
1597 		pr_info("Tracer must have a name\n");
1598 		return -1;
1599 	}
1600 
1601 	if (strlen(type->name) >= MAX_TRACER_SIZE) {
1602 		pr_info("Tracer has a name longer than %d\n", MAX_TRACER_SIZE);
1603 		return -1;
1604 	}
1605 
1606 	mutex_lock(&trace_types_lock);
1607 
1608 	tracing_selftest_running = true;
1609 
1610 	for (t = trace_types; t; t = t->next) {
1611 		if (strcmp(type->name, t->name) == 0) {
1612 			/* already found */
1613 			pr_info("Tracer %s already registered\n",
1614 				type->name);
1615 			ret = -1;
1616 			goto out;
1617 		}
1618 	}
1619 
1620 	if (!type->set_flag)
1621 		type->set_flag = &dummy_set_flag;
1622 	if (!type->flags) {
1623 		/*allocate a dummy tracer_flags*/
1624 		type->flags = kmalloc(sizeof(*type->flags), GFP_KERNEL);
1625 		if (!type->flags) {
1626 			ret = -ENOMEM;
1627 			goto out;
1628 		}
1629 		type->flags->val = 0;
1630 		type->flags->opts = dummy_tracer_opt;
1631 	} else
1632 		if (!type->flags->opts)
1633 			type->flags->opts = dummy_tracer_opt;
1634 
1635 	/* store the tracer for __set_tracer_option */
1636 	type->flags->trace = type;
1637 
1638 	ret = run_tracer_selftest(type);
1639 	if (ret < 0)
1640 		goto out;
1641 
1642 	type->next = trace_types;
1643 	trace_types = type;
1644 	add_tracer_options(&global_trace, type);
1645 
1646  out:
1647 	tracing_selftest_running = false;
1648 	mutex_unlock(&trace_types_lock);
1649 
1650 	if (ret || !default_bootup_tracer)
1651 		goto out_unlock;
1652 
1653 	if (strncmp(default_bootup_tracer, type->name, MAX_TRACER_SIZE))
1654 		goto out_unlock;
1655 
1656 	printk(KERN_INFO "Starting tracer '%s'\n", type->name);
1657 	/* Do we want this tracer to start on bootup? */
1658 	tracing_set_tracer(&global_trace, type->name);
1659 	default_bootup_tracer = NULL;
1660 
1661 	apply_trace_boot_options();
1662 
1663 	/* disable other selftests, since this will break it. */
1664 	tracing_selftest_disabled = true;
1665 #ifdef CONFIG_FTRACE_STARTUP_TEST
1666 	printk(KERN_INFO "Disabling FTRACE selftests due to running tracer '%s'\n",
1667 	       type->name);
1668 #endif
1669 
1670  out_unlock:
1671 	return ret;
1672 }
1673 
1674 void tracing_reset(struct trace_buffer *buf, int cpu)
1675 {
1676 	struct ring_buffer *buffer = buf->buffer;
1677 
1678 	if (!buffer)
1679 		return;
1680 
1681 	ring_buffer_record_disable(buffer);
1682 
1683 	/* Make sure all commits have finished */
1684 	synchronize_sched();
1685 	ring_buffer_reset_cpu(buffer, cpu);
1686 
1687 	ring_buffer_record_enable(buffer);
1688 }
1689 
1690 void tracing_reset_online_cpus(struct trace_buffer *buf)
1691 {
1692 	struct ring_buffer *buffer = buf->buffer;
1693 	int cpu;
1694 
1695 	if (!buffer)
1696 		return;
1697 
1698 	ring_buffer_record_disable(buffer);
1699 
1700 	/* Make sure all commits have finished */
1701 	synchronize_sched();
1702 
1703 	buf->time_start = buffer_ftrace_now(buf, buf->cpu);
1704 
1705 	for_each_online_cpu(cpu)
1706 		ring_buffer_reset_cpu(buffer, cpu);
1707 
1708 	ring_buffer_record_enable(buffer);
1709 }
1710 
1711 /* Must have trace_types_lock held */
1712 void tracing_reset_all_online_cpus(void)
1713 {
1714 	struct trace_array *tr;
1715 
1716 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
1717 		if (!tr->clear_trace)
1718 			continue;
1719 		tr->clear_trace = false;
1720 		tracing_reset_online_cpus(&tr->trace_buffer);
1721 #ifdef CONFIG_TRACER_MAX_TRACE
1722 		tracing_reset_online_cpus(&tr->max_buffer);
1723 #endif
1724 	}
1725 }
1726 
1727 static int *tgid_map;
1728 
1729 #define SAVED_CMDLINES_DEFAULT 128
1730 #define NO_CMDLINE_MAP UINT_MAX
1731 static arch_spinlock_t trace_cmdline_lock = __ARCH_SPIN_LOCK_UNLOCKED;
1732 struct saved_cmdlines_buffer {
1733 	unsigned map_pid_to_cmdline[PID_MAX_DEFAULT+1];
1734 	unsigned *map_cmdline_to_pid;
1735 	unsigned cmdline_num;
1736 	int cmdline_idx;
1737 	char *saved_cmdlines;
1738 };
1739 static struct saved_cmdlines_buffer *savedcmd;
1740 
1741 /* temporary disable recording */
1742 static atomic_t trace_record_taskinfo_disabled __read_mostly;
1743 
1744 static inline char *get_saved_cmdlines(int idx)
1745 {
1746 	return &savedcmd->saved_cmdlines[idx * TASK_COMM_LEN];
1747 }
1748 
1749 static inline void set_cmdline(int idx, const char *cmdline)
1750 {
1751 	memcpy(get_saved_cmdlines(idx), cmdline, TASK_COMM_LEN);
1752 }
1753 
1754 static int allocate_cmdlines_buffer(unsigned int val,
1755 				    struct saved_cmdlines_buffer *s)
1756 {
1757 	s->map_cmdline_to_pid = kmalloc_array(val,
1758 					      sizeof(*s->map_cmdline_to_pid),
1759 					      GFP_KERNEL);
1760 	if (!s->map_cmdline_to_pid)
1761 		return -ENOMEM;
1762 
1763 	s->saved_cmdlines = kmalloc_array(TASK_COMM_LEN, val, GFP_KERNEL);
1764 	if (!s->saved_cmdlines) {
1765 		kfree(s->map_cmdline_to_pid);
1766 		return -ENOMEM;
1767 	}
1768 
1769 	s->cmdline_idx = 0;
1770 	s->cmdline_num = val;
1771 	memset(&s->map_pid_to_cmdline, NO_CMDLINE_MAP,
1772 	       sizeof(s->map_pid_to_cmdline));
1773 	memset(s->map_cmdline_to_pid, NO_CMDLINE_MAP,
1774 	       val * sizeof(*s->map_cmdline_to_pid));
1775 
1776 	return 0;
1777 }
1778 
1779 static int trace_create_savedcmd(void)
1780 {
1781 	int ret;
1782 
1783 	savedcmd = kmalloc(sizeof(*savedcmd), GFP_KERNEL);
1784 	if (!savedcmd)
1785 		return -ENOMEM;
1786 
1787 	ret = allocate_cmdlines_buffer(SAVED_CMDLINES_DEFAULT, savedcmd);
1788 	if (ret < 0) {
1789 		kfree(savedcmd);
1790 		savedcmd = NULL;
1791 		return -ENOMEM;
1792 	}
1793 
1794 	return 0;
1795 }
1796 
1797 int is_tracing_stopped(void)
1798 {
1799 	return global_trace.stop_count;
1800 }
1801 
1802 /**
1803  * tracing_start - quick start of the tracer
1804  *
1805  * If tracing is enabled but was stopped by tracing_stop,
1806  * this will start the tracer back up.
1807  */
1808 void tracing_start(void)
1809 {
1810 	struct ring_buffer *buffer;
1811 	unsigned long flags;
1812 
1813 	if (tracing_disabled)
1814 		return;
1815 
1816 	raw_spin_lock_irqsave(&global_trace.start_lock, flags);
1817 	if (--global_trace.stop_count) {
1818 		if (global_trace.stop_count < 0) {
1819 			/* Someone screwed up their debugging */
1820 			WARN_ON_ONCE(1);
1821 			global_trace.stop_count = 0;
1822 		}
1823 		goto out;
1824 	}
1825 
1826 	/* Prevent the buffers from switching */
1827 	arch_spin_lock(&global_trace.max_lock);
1828 
1829 	buffer = global_trace.trace_buffer.buffer;
1830 	if (buffer)
1831 		ring_buffer_record_enable(buffer);
1832 
1833 #ifdef CONFIG_TRACER_MAX_TRACE
1834 	buffer = global_trace.max_buffer.buffer;
1835 	if (buffer)
1836 		ring_buffer_record_enable(buffer);
1837 #endif
1838 
1839 	arch_spin_unlock(&global_trace.max_lock);
1840 
1841  out:
1842 	raw_spin_unlock_irqrestore(&global_trace.start_lock, flags);
1843 }
1844 
1845 static void tracing_start_tr(struct trace_array *tr)
1846 {
1847 	struct ring_buffer *buffer;
1848 	unsigned long flags;
1849 
1850 	if (tracing_disabled)
1851 		return;
1852 
1853 	/* If global, we need to also start the max tracer */
1854 	if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
1855 		return tracing_start();
1856 
1857 	raw_spin_lock_irqsave(&tr->start_lock, flags);
1858 
1859 	if (--tr->stop_count) {
1860 		if (tr->stop_count < 0) {
1861 			/* Someone screwed up their debugging */
1862 			WARN_ON_ONCE(1);
1863 			tr->stop_count = 0;
1864 		}
1865 		goto out;
1866 	}
1867 
1868 	buffer = tr->trace_buffer.buffer;
1869 	if (buffer)
1870 		ring_buffer_record_enable(buffer);
1871 
1872  out:
1873 	raw_spin_unlock_irqrestore(&tr->start_lock, flags);
1874 }
1875 
1876 /**
1877  * tracing_stop - quick stop of the tracer
1878  *
1879  * Light weight way to stop tracing. Use in conjunction with
1880  * tracing_start.
1881  */
1882 void tracing_stop(void)
1883 {
1884 	struct ring_buffer *buffer;
1885 	unsigned long flags;
1886 
1887 	raw_spin_lock_irqsave(&global_trace.start_lock, flags);
1888 	if (global_trace.stop_count++)
1889 		goto out;
1890 
1891 	/* Prevent the buffers from switching */
1892 	arch_spin_lock(&global_trace.max_lock);
1893 
1894 	buffer = global_trace.trace_buffer.buffer;
1895 	if (buffer)
1896 		ring_buffer_record_disable(buffer);
1897 
1898 #ifdef CONFIG_TRACER_MAX_TRACE
1899 	buffer = global_trace.max_buffer.buffer;
1900 	if (buffer)
1901 		ring_buffer_record_disable(buffer);
1902 #endif
1903 
1904 	arch_spin_unlock(&global_trace.max_lock);
1905 
1906  out:
1907 	raw_spin_unlock_irqrestore(&global_trace.start_lock, flags);
1908 }
1909 
1910 static void tracing_stop_tr(struct trace_array *tr)
1911 {
1912 	struct ring_buffer *buffer;
1913 	unsigned long flags;
1914 
1915 	/* If global, we need to also stop the max tracer */
1916 	if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
1917 		return tracing_stop();
1918 
1919 	raw_spin_lock_irqsave(&tr->start_lock, flags);
1920 	if (tr->stop_count++)
1921 		goto out;
1922 
1923 	buffer = tr->trace_buffer.buffer;
1924 	if (buffer)
1925 		ring_buffer_record_disable(buffer);
1926 
1927  out:
1928 	raw_spin_unlock_irqrestore(&tr->start_lock, flags);
1929 }
1930 
1931 static int trace_save_cmdline(struct task_struct *tsk)
1932 {
1933 	unsigned pid, idx;
1934 
1935 	/* treat recording of idle task as a success */
1936 	if (!tsk->pid)
1937 		return 1;
1938 
1939 	if (unlikely(tsk->pid > PID_MAX_DEFAULT))
1940 		return 0;
1941 
1942 	/*
1943 	 * It's not the end of the world if we don't get
1944 	 * the lock, but we also don't want to spin
1945 	 * nor do we want to disable interrupts,
1946 	 * so if we miss here, then better luck next time.
1947 	 */
1948 	if (!arch_spin_trylock(&trace_cmdline_lock))
1949 		return 0;
1950 
1951 	idx = savedcmd->map_pid_to_cmdline[tsk->pid];
1952 	if (idx == NO_CMDLINE_MAP) {
1953 		idx = (savedcmd->cmdline_idx + 1) % savedcmd->cmdline_num;
1954 
1955 		/*
1956 		 * Check whether the cmdline buffer at idx has a pid
1957 		 * mapped. We are going to overwrite that entry so we
1958 		 * need to clear the map_pid_to_cmdline. Otherwise we
1959 		 * would read the new comm for the old pid.
1960 		 */
1961 		pid = savedcmd->map_cmdline_to_pid[idx];
1962 		if (pid != NO_CMDLINE_MAP)
1963 			savedcmd->map_pid_to_cmdline[pid] = NO_CMDLINE_MAP;
1964 
1965 		savedcmd->map_cmdline_to_pid[idx] = tsk->pid;
1966 		savedcmd->map_pid_to_cmdline[tsk->pid] = idx;
1967 
1968 		savedcmd->cmdline_idx = idx;
1969 	}
1970 
1971 	set_cmdline(idx, tsk->comm);
1972 
1973 	arch_spin_unlock(&trace_cmdline_lock);
1974 
1975 	return 1;
1976 }
1977 
1978 static void __trace_find_cmdline(int pid, char comm[])
1979 {
1980 	unsigned map;
1981 
1982 	if (!pid) {
1983 		strcpy(comm, "<idle>");
1984 		return;
1985 	}
1986 
1987 	if (WARN_ON_ONCE(pid < 0)) {
1988 		strcpy(comm, "<XXX>");
1989 		return;
1990 	}
1991 
1992 	if (pid > PID_MAX_DEFAULT) {
1993 		strcpy(comm, "<...>");
1994 		return;
1995 	}
1996 
1997 	map = savedcmd->map_pid_to_cmdline[pid];
1998 	if (map != NO_CMDLINE_MAP)
1999 		strlcpy(comm, get_saved_cmdlines(map), TASK_COMM_LEN);
2000 	else
2001 		strcpy(comm, "<...>");
2002 }
2003 
2004 void trace_find_cmdline(int pid, char comm[])
2005 {
2006 	preempt_disable();
2007 	arch_spin_lock(&trace_cmdline_lock);
2008 
2009 	__trace_find_cmdline(pid, comm);
2010 
2011 	arch_spin_unlock(&trace_cmdline_lock);
2012 	preempt_enable();
2013 }
2014 
2015 int trace_find_tgid(int pid)
2016 {
2017 	if (unlikely(!tgid_map || !pid || pid > PID_MAX_DEFAULT))
2018 		return 0;
2019 
2020 	return tgid_map[pid];
2021 }
2022 
2023 static int trace_save_tgid(struct task_struct *tsk)
2024 {
2025 	/* treat recording of idle task as a success */
2026 	if (!tsk->pid)
2027 		return 1;
2028 
2029 	if (unlikely(!tgid_map || tsk->pid > PID_MAX_DEFAULT))
2030 		return 0;
2031 
2032 	tgid_map[tsk->pid] = tsk->tgid;
2033 	return 1;
2034 }
2035 
2036 static bool tracing_record_taskinfo_skip(int flags)
2037 {
2038 	if (unlikely(!(flags & (TRACE_RECORD_CMDLINE | TRACE_RECORD_TGID))))
2039 		return true;
2040 	if (atomic_read(&trace_record_taskinfo_disabled) || !tracing_is_on())
2041 		return true;
2042 	if (!__this_cpu_read(trace_taskinfo_save))
2043 		return true;
2044 	return false;
2045 }
2046 
2047 /**
2048  * tracing_record_taskinfo - record the task info of a task
2049  *
2050  * @task  - task to record
2051  * @flags - TRACE_RECORD_CMDLINE for recording comm
2052  *        - TRACE_RECORD_TGID for recording tgid
2053  */
2054 void tracing_record_taskinfo(struct task_struct *task, int flags)
2055 {
2056 	bool done;
2057 
2058 	if (tracing_record_taskinfo_skip(flags))
2059 		return;
2060 
2061 	/*
2062 	 * Record as much task information as possible. If some fail, continue
2063 	 * to try to record the others.
2064 	 */
2065 	done = !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(task);
2066 	done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(task);
2067 
2068 	/* If recording any information failed, retry again soon. */
2069 	if (!done)
2070 		return;
2071 
2072 	__this_cpu_write(trace_taskinfo_save, false);
2073 }
2074 
2075 /**
2076  * tracing_record_taskinfo_sched_switch - record task info for sched_switch
2077  *
2078  * @prev - previous task during sched_switch
2079  * @next - next task during sched_switch
2080  * @flags - TRACE_RECORD_CMDLINE for recording comm
2081  *          TRACE_RECORD_TGID for recording tgid
2082  */
2083 void tracing_record_taskinfo_sched_switch(struct task_struct *prev,
2084 					  struct task_struct *next, int flags)
2085 {
2086 	bool done;
2087 
2088 	if (tracing_record_taskinfo_skip(flags))
2089 		return;
2090 
2091 	/*
2092 	 * Record as much task information as possible. If some fail, continue
2093 	 * to try to record the others.
2094 	 */
2095 	done  = !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(prev);
2096 	done &= !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(next);
2097 	done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(prev);
2098 	done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(next);
2099 
2100 	/* If recording any information failed, retry again soon. */
2101 	if (!done)
2102 		return;
2103 
2104 	__this_cpu_write(trace_taskinfo_save, false);
2105 }
2106 
2107 /* Helpers to record a specific task information */
2108 void tracing_record_cmdline(struct task_struct *task)
2109 {
2110 	tracing_record_taskinfo(task, TRACE_RECORD_CMDLINE);
2111 }
2112 
2113 void tracing_record_tgid(struct task_struct *task)
2114 {
2115 	tracing_record_taskinfo(task, TRACE_RECORD_TGID);
2116 }
2117 
2118 /*
2119  * Several functions return TRACE_TYPE_PARTIAL_LINE if the trace_seq
2120  * overflowed, and TRACE_TYPE_HANDLED otherwise. This helper function
2121  * simplifies those functions and keeps them in sync.
2122  */
2123 enum print_line_t trace_handle_return(struct trace_seq *s)
2124 {
2125 	return trace_seq_has_overflowed(s) ?
2126 		TRACE_TYPE_PARTIAL_LINE : TRACE_TYPE_HANDLED;
2127 }
2128 EXPORT_SYMBOL_GPL(trace_handle_return);
2129 
2130 void
2131 tracing_generic_entry_update(struct trace_entry *entry, unsigned long flags,
2132 			     int pc)
2133 {
2134 	struct task_struct *tsk = current;
2135 
2136 	entry->preempt_count		= pc & 0xff;
2137 	entry->pid			= (tsk) ? tsk->pid : 0;
2138 	entry->flags =
2139 #ifdef CONFIG_TRACE_IRQFLAGS_SUPPORT
2140 		(irqs_disabled_flags(flags) ? TRACE_FLAG_IRQS_OFF : 0) |
2141 #else
2142 		TRACE_FLAG_IRQS_NOSUPPORT |
2143 #endif
2144 		((pc & NMI_MASK    ) ? TRACE_FLAG_NMI     : 0) |
2145 		((pc & HARDIRQ_MASK) ? TRACE_FLAG_HARDIRQ : 0) |
2146 		((pc & SOFTIRQ_OFFSET) ? TRACE_FLAG_SOFTIRQ : 0) |
2147 		(tif_need_resched() ? TRACE_FLAG_NEED_RESCHED : 0) |
2148 		(test_preempt_need_resched() ? TRACE_FLAG_PREEMPT_RESCHED : 0);
2149 }
2150 EXPORT_SYMBOL_GPL(tracing_generic_entry_update);
2151 
2152 struct ring_buffer_event *
2153 trace_buffer_lock_reserve(struct ring_buffer *buffer,
2154 			  int type,
2155 			  unsigned long len,
2156 			  unsigned long flags, int pc)
2157 {
2158 	return __trace_buffer_lock_reserve(buffer, type, len, flags, pc);
2159 }
2160 
2161 DEFINE_PER_CPU(struct ring_buffer_event *, trace_buffered_event);
2162 DEFINE_PER_CPU(int, trace_buffered_event_cnt);
2163 static int trace_buffered_event_ref;
2164 
2165 /**
2166  * trace_buffered_event_enable - enable buffering events
2167  *
2168  * When events are being filtered, it is quicker to use a temporary
2169  * buffer to write the event data into if there's a likely chance
2170  * that it will not be committed. The discard of the ring buffer
2171  * is not as fast as committing, and is much slower than copying
2172  * a commit.
2173  *
2174  * When an event is to be filtered, allocate per cpu buffers to
2175  * write the event data into, and if the event is filtered and discarded
2176  * it is simply dropped, otherwise, the entire data is to be committed
2177  * in one shot.
2178  */
2179 void trace_buffered_event_enable(void)
2180 {
2181 	struct ring_buffer_event *event;
2182 	struct page *page;
2183 	int cpu;
2184 
2185 	WARN_ON_ONCE(!mutex_is_locked(&event_mutex));
2186 
2187 	if (trace_buffered_event_ref++)
2188 		return;
2189 
2190 	for_each_tracing_cpu(cpu) {
2191 		page = alloc_pages_node(cpu_to_node(cpu),
2192 					GFP_KERNEL | __GFP_NORETRY, 0);
2193 		if (!page)
2194 			goto failed;
2195 
2196 		event = page_address(page);
2197 		memset(event, 0, sizeof(*event));
2198 
2199 		per_cpu(trace_buffered_event, cpu) = event;
2200 
2201 		preempt_disable();
2202 		if (cpu == smp_processor_id() &&
2203 		    this_cpu_read(trace_buffered_event) !=
2204 		    per_cpu(trace_buffered_event, cpu))
2205 			WARN_ON_ONCE(1);
2206 		preempt_enable();
2207 	}
2208 
2209 	return;
2210  failed:
2211 	trace_buffered_event_disable();
2212 }
2213 
2214 static void enable_trace_buffered_event(void *data)
2215 {
2216 	/* Probably not needed, but do it anyway */
2217 	smp_rmb();
2218 	this_cpu_dec(trace_buffered_event_cnt);
2219 }
2220 
2221 static void disable_trace_buffered_event(void *data)
2222 {
2223 	this_cpu_inc(trace_buffered_event_cnt);
2224 }
2225 
2226 /**
2227  * trace_buffered_event_disable - disable buffering events
2228  *
2229  * When a filter is removed, it is faster to not use the buffered
2230  * events, and to commit directly into the ring buffer. Free up
2231  * the temp buffers when there are no more users. This requires
2232  * special synchronization with current events.
2233  */
2234 void trace_buffered_event_disable(void)
2235 {
2236 	int cpu;
2237 
2238 	WARN_ON_ONCE(!mutex_is_locked(&event_mutex));
2239 
2240 	if (WARN_ON_ONCE(!trace_buffered_event_ref))
2241 		return;
2242 
2243 	if (--trace_buffered_event_ref)
2244 		return;
2245 
2246 	preempt_disable();
2247 	/* For each CPU, set the buffer as used. */
2248 	smp_call_function_many(tracing_buffer_mask,
2249 			       disable_trace_buffered_event, NULL, 1);
2250 	preempt_enable();
2251 
2252 	/* Wait for all current users to finish */
2253 	synchronize_sched();
2254 
2255 	for_each_tracing_cpu(cpu) {
2256 		free_page((unsigned long)per_cpu(trace_buffered_event, cpu));
2257 		per_cpu(trace_buffered_event, cpu) = NULL;
2258 	}
2259 	/*
2260 	 * Make sure trace_buffered_event is NULL before clearing
2261 	 * trace_buffered_event_cnt.
2262 	 */
2263 	smp_wmb();
2264 
2265 	preempt_disable();
2266 	/* Do the work on each cpu */
2267 	smp_call_function_many(tracing_buffer_mask,
2268 			       enable_trace_buffered_event, NULL, 1);
2269 	preempt_enable();
2270 }
2271 
2272 static struct ring_buffer *temp_buffer;
2273 
2274 struct ring_buffer_event *
2275 trace_event_buffer_lock_reserve(struct ring_buffer **current_rb,
2276 			  struct trace_event_file *trace_file,
2277 			  int type, unsigned long len,
2278 			  unsigned long flags, int pc)
2279 {
2280 	struct ring_buffer_event *entry;
2281 	int val;
2282 
2283 	*current_rb = trace_file->tr->trace_buffer.buffer;
2284 
2285 	if (!ring_buffer_time_stamp_abs(*current_rb) && (trace_file->flags &
2286 	     (EVENT_FILE_FL_SOFT_DISABLED | EVENT_FILE_FL_FILTERED)) &&
2287 	    (entry = this_cpu_read(trace_buffered_event))) {
2288 		/* Try to use the per cpu buffer first */
2289 		val = this_cpu_inc_return(trace_buffered_event_cnt);
2290 		if (val == 1) {
2291 			trace_event_setup(entry, type, flags, pc);
2292 			entry->array[0] = len;
2293 			return entry;
2294 		}
2295 		this_cpu_dec(trace_buffered_event_cnt);
2296 	}
2297 
2298 	entry = __trace_buffer_lock_reserve(*current_rb,
2299 					    type, len, flags, pc);
2300 	/*
2301 	 * If tracing is off, but we have triggers enabled
2302 	 * we still need to look at the event data. Use the temp_buffer
2303 	 * to store the trace event for the tigger to use. It's recusive
2304 	 * safe and will not be recorded anywhere.
2305 	 */
2306 	if (!entry && trace_file->flags & EVENT_FILE_FL_TRIGGER_COND) {
2307 		*current_rb = temp_buffer;
2308 		entry = __trace_buffer_lock_reserve(*current_rb,
2309 						    type, len, flags, pc);
2310 	}
2311 	return entry;
2312 }
2313 EXPORT_SYMBOL_GPL(trace_event_buffer_lock_reserve);
2314 
2315 static DEFINE_SPINLOCK(tracepoint_iter_lock);
2316 static DEFINE_MUTEX(tracepoint_printk_mutex);
2317 
2318 static void output_printk(struct trace_event_buffer *fbuffer)
2319 {
2320 	struct trace_event_call *event_call;
2321 	struct trace_event *event;
2322 	unsigned long flags;
2323 	struct trace_iterator *iter = tracepoint_print_iter;
2324 
2325 	/* We should never get here if iter is NULL */
2326 	if (WARN_ON_ONCE(!iter))
2327 		return;
2328 
2329 	event_call = fbuffer->trace_file->event_call;
2330 	if (!event_call || !event_call->event.funcs ||
2331 	    !event_call->event.funcs->trace)
2332 		return;
2333 
2334 	event = &fbuffer->trace_file->event_call->event;
2335 
2336 	spin_lock_irqsave(&tracepoint_iter_lock, flags);
2337 	trace_seq_init(&iter->seq);
2338 	iter->ent = fbuffer->entry;
2339 	event_call->event.funcs->trace(iter, 0, event);
2340 	trace_seq_putc(&iter->seq, 0);
2341 	printk("%s", iter->seq.buffer);
2342 
2343 	spin_unlock_irqrestore(&tracepoint_iter_lock, flags);
2344 }
2345 
2346 int tracepoint_printk_sysctl(struct ctl_table *table, int write,
2347 			     void __user *buffer, size_t *lenp,
2348 			     loff_t *ppos)
2349 {
2350 	int save_tracepoint_printk;
2351 	int ret;
2352 
2353 	mutex_lock(&tracepoint_printk_mutex);
2354 	save_tracepoint_printk = tracepoint_printk;
2355 
2356 	ret = proc_dointvec(table, write, buffer, lenp, ppos);
2357 
2358 	/*
2359 	 * This will force exiting early, as tracepoint_printk
2360 	 * is always zero when tracepoint_printk_iter is not allocated
2361 	 */
2362 	if (!tracepoint_print_iter)
2363 		tracepoint_printk = 0;
2364 
2365 	if (save_tracepoint_printk == tracepoint_printk)
2366 		goto out;
2367 
2368 	if (tracepoint_printk)
2369 		static_key_enable(&tracepoint_printk_key.key);
2370 	else
2371 		static_key_disable(&tracepoint_printk_key.key);
2372 
2373  out:
2374 	mutex_unlock(&tracepoint_printk_mutex);
2375 
2376 	return ret;
2377 }
2378 
2379 void trace_event_buffer_commit(struct trace_event_buffer *fbuffer)
2380 {
2381 	if (static_key_false(&tracepoint_printk_key.key))
2382 		output_printk(fbuffer);
2383 
2384 	event_trigger_unlock_commit(fbuffer->trace_file, fbuffer->buffer,
2385 				    fbuffer->event, fbuffer->entry,
2386 				    fbuffer->flags, fbuffer->pc);
2387 }
2388 EXPORT_SYMBOL_GPL(trace_event_buffer_commit);
2389 
2390 /*
2391  * Skip 3:
2392  *
2393  *   trace_buffer_unlock_commit_regs()
2394  *   trace_event_buffer_commit()
2395  *   trace_event_raw_event_xxx()
2396  */
2397 # define STACK_SKIP 3
2398 
2399 void trace_buffer_unlock_commit_regs(struct trace_array *tr,
2400 				     struct ring_buffer *buffer,
2401 				     struct ring_buffer_event *event,
2402 				     unsigned long flags, int pc,
2403 				     struct pt_regs *regs)
2404 {
2405 	__buffer_unlock_commit(buffer, event);
2406 
2407 	/*
2408 	 * If regs is not set, then skip the necessary functions.
2409 	 * Note, we can still get here via blktrace, wakeup tracer
2410 	 * and mmiotrace, but that's ok if they lose a function or
2411 	 * two. They are not that meaningful.
2412 	 */
2413 	ftrace_trace_stack(tr, buffer, flags, regs ? 0 : STACK_SKIP, pc, regs);
2414 	ftrace_trace_userstack(buffer, flags, pc);
2415 }
2416 
2417 /*
2418  * Similar to trace_buffer_unlock_commit_regs() but do not dump stack.
2419  */
2420 void
2421 trace_buffer_unlock_commit_nostack(struct ring_buffer *buffer,
2422 				   struct ring_buffer_event *event)
2423 {
2424 	__buffer_unlock_commit(buffer, event);
2425 }
2426 
2427 static void
2428 trace_process_export(struct trace_export *export,
2429 	       struct ring_buffer_event *event)
2430 {
2431 	struct trace_entry *entry;
2432 	unsigned int size = 0;
2433 
2434 	entry = ring_buffer_event_data(event);
2435 	size = ring_buffer_event_length(event);
2436 	export->write(export, entry, size);
2437 }
2438 
2439 static DEFINE_MUTEX(ftrace_export_lock);
2440 
2441 static struct trace_export __rcu *ftrace_exports_list __read_mostly;
2442 
2443 static DEFINE_STATIC_KEY_FALSE(ftrace_exports_enabled);
2444 
2445 static inline void ftrace_exports_enable(void)
2446 {
2447 	static_branch_enable(&ftrace_exports_enabled);
2448 }
2449 
2450 static inline void ftrace_exports_disable(void)
2451 {
2452 	static_branch_disable(&ftrace_exports_enabled);
2453 }
2454 
2455 void ftrace_exports(struct ring_buffer_event *event)
2456 {
2457 	struct trace_export *export;
2458 
2459 	preempt_disable_notrace();
2460 
2461 	export = rcu_dereference_raw_notrace(ftrace_exports_list);
2462 	while (export) {
2463 		trace_process_export(export, event);
2464 		export = rcu_dereference_raw_notrace(export->next);
2465 	}
2466 
2467 	preempt_enable_notrace();
2468 }
2469 
2470 static inline void
2471 add_trace_export(struct trace_export **list, struct trace_export *export)
2472 {
2473 	rcu_assign_pointer(export->next, *list);
2474 	/*
2475 	 * We are entering export into the list but another
2476 	 * CPU might be walking that list. We need to make sure
2477 	 * the export->next pointer is valid before another CPU sees
2478 	 * the export pointer included into the list.
2479 	 */
2480 	rcu_assign_pointer(*list, export);
2481 }
2482 
2483 static inline int
2484 rm_trace_export(struct trace_export **list, struct trace_export *export)
2485 {
2486 	struct trace_export **p;
2487 
2488 	for (p = list; *p != NULL; p = &(*p)->next)
2489 		if (*p == export)
2490 			break;
2491 
2492 	if (*p != export)
2493 		return -1;
2494 
2495 	rcu_assign_pointer(*p, (*p)->next);
2496 
2497 	return 0;
2498 }
2499 
2500 static inline void
2501 add_ftrace_export(struct trace_export **list, struct trace_export *export)
2502 {
2503 	if (*list == NULL)
2504 		ftrace_exports_enable();
2505 
2506 	add_trace_export(list, export);
2507 }
2508 
2509 static inline int
2510 rm_ftrace_export(struct trace_export **list, struct trace_export *export)
2511 {
2512 	int ret;
2513 
2514 	ret = rm_trace_export(list, export);
2515 	if (*list == NULL)
2516 		ftrace_exports_disable();
2517 
2518 	return ret;
2519 }
2520 
2521 int register_ftrace_export(struct trace_export *export)
2522 {
2523 	if (WARN_ON_ONCE(!export->write))
2524 		return -1;
2525 
2526 	mutex_lock(&ftrace_export_lock);
2527 
2528 	add_ftrace_export(&ftrace_exports_list, export);
2529 
2530 	mutex_unlock(&ftrace_export_lock);
2531 
2532 	return 0;
2533 }
2534 EXPORT_SYMBOL_GPL(register_ftrace_export);
2535 
2536 int unregister_ftrace_export(struct trace_export *export)
2537 {
2538 	int ret;
2539 
2540 	mutex_lock(&ftrace_export_lock);
2541 
2542 	ret = rm_ftrace_export(&ftrace_exports_list, export);
2543 
2544 	mutex_unlock(&ftrace_export_lock);
2545 
2546 	return ret;
2547 }
2548 EXPORT_SYMBOL_GPL(unregister_ftrace_export);
2549 
2550 void
2551 trace_function(struct trace_array *tr,
2552 	       unsigned long ip, unsigned long parent_ip, unsigned long flags,
2553 	       int pc)
2554 {
2555 	struct trace_event_call *call = &event_function;
2556 	struct ring_buffer *buffer = tr->trace_buffer.buffer;
2557 	struct ring_buffer_event *event;
2558 	struct ftrace_entry *entry;
2559 
2560 	event = __trace_buffer_lock_reserve(buffer, TRACE_FN, sizeof(*entry),
2561 					    flags, pc);
2562 	if (!event)
2563 		return;
2564 	entry	= ring_buffer_event_data(event);
2565 	entry->ip			= ip;
2566 	entry->parent_ip		= parent_ip;
2567 
2568 	if (!call_filter_check_discard(call, entry, buffer, event)) {
2569 		if (static_branch_unlikely(&ftrace_exports_enabled))
2570 			ftrace_exports(event);
2571 		__buffer_unlock_commit(buffer, event);
2572 	}
2573 }
2574 
2575 #ifdef CONFIG_STACKTRACE
2576 
2577 #define FTRACE_STACK_MAX_ENTRIES (PAGE_SIZE / sizeof(unsigned long))
2578 struct ftrace_stack {
2579 	unsigned long		calls[FTRACE_STACK_MAX_ENTRIES];
2580 };
2581 
2582 static DEFINE_PER_CPU(struct ftrace_stack, ftrace_stack);
2583 static DEFINE_PER_CPU(int, ftrace_stack_reserve);
2584 
2585 static void __ftrace_trace_stack(struct ring_buffer *buffer,
2586 				 unsigned long flags,
2587 				 int skip, int pc, struct pt_regs *regs)
2588 {
2589 	struct trace_event_call *call = &event_kernel_stack;
2590 	struct ring_buffer_event *event;
2591 	struct stack_entry *entry;
2592 	struct stack_trace trace;
2593 	int use_stack;
2594 	int size = FTRACE_STACK_ENTRIES;
2595 
2596 	trace.nr_entries	= 0;
2597 	trace.skip		= skip;
2598 
2599 	/*
2600 	 * Add one, for this function and the call to save_stack_trace()
2601 	 * If regs is set, then these functions will not be in the way.
2602 	 */
2603 #ifndef CONFIG_UNWINDER_ORC
2604 	if (!regs)
2605 		trace.skip++;
2606 #endif
2607 
2608 	/*
2609 	 * Since events can happen in NMIs there's no safe way to
2610 	 * use the per cpu ftrace_stacks. We reserve it and if an interrupt
2611 	 * or NMI comes in, it will just have to use the default
2612 	 * FTRACE_STACK_SIZE.
2613 	 */
2614 	preempt_disable_notrace();
2615 
2616 	use_stack = __this_cpu_inc_return(ftrace_stack_reserve);
2617 	/*
2618 	 * We don't need any atomic variables, just a barrier.
2619 	 * If an interrupt comes in, we don't care, because it would
2620 	 * have exited and put the counter back to what we want.
2621 	 * We just need a barrier to keep gcc from moving things
2622 	 * around.
2623 	 */
2624 	barrier();
2625 	if (use_stack == 1) {
2626 		trace.entries		= this_cpu_ptr(ftrace_stack.calls);
2627 		trace.max_entries	= FTRACE_STACK_MAX_ENTRIES;
2628 
2629 		if (regs)
2630 			save_stack_trace_regs(regs, &trace);
2631 		else
2632 			save_stack_trace(&trace);
2633 
2634 		if (trace.nr_entries > size)
2635 			size = trace.nr_entries;
2636 	} else
2637 		/* From now on, use_stack is a boolean */
2638 		use_stack = 0;
2639 
2640 	size *= sizeof(unsigned long);
2641 
2642 	event = __trace_buffer_lock_reserve(buffer, TRACE_STACK,
2643 					    sizeof(*entry) + size, flags, pc);
2644 	if (!event)
2645 		goto out;
2646 	entry = ring_buffer_event_data(event);
2647 
2648 	memset(&entry->caller, 0, size);
2649 
2650 	if (use_stack)
2651 		memcpy(&entry->caller, trace.entries,
2652 		       trace.nr_entries * sizeof(unsigned long));
2653 	else {
2654 		trace.max_entries	= FTRACE_STACK_ENTRIES;
2655 		trace.entries		= entry->caller;
2656 		if (regs)
2657 			save_stack_trace_regs(regs, &trace);
2658 		else
2659 			save_stack_trace(&trace);
2660 	}
2661 
2662 	entry->size = trace.nr_entries;
2663 
2664 	if (!call_filter_check_discard(call, entry, buffer, event))
2665 		__buffer_unlock_commit(buffer, event);
2666 
2667  out:
2668 	/* Again, don't let gcc optimize things here */
2669 	barrier();
2670 	__this_cpu_dec(ftrace_stack_reserve);
2671 	preempt_enable_notrace();
2672 
2673 }
2674 
2675 static inline void ftrace_trace_stack(struct trace_array *tr,
2676 				      struct ring_buffer *buffer,
2677 				      unsigned long flags,
2678 				      int skip, int pc, struct pt_regs *regs)
2679 {
2680 	if (!(tr->trace_flags & TRACE_ITER_STACKTRACE))
2681 		return;
2682 
2683 	__ftrace_trace_stack(buffer, flags, skip, pc, regs);
2684 }
2685 
2686 void __trace_stack(struct trace_array *tr, unsigned long flags, int skip,
2687 		   int pc)
2688 {
2689 	struct ring_buffer *buffer = tr->trace_buffer.buffer;
2690 
2691 	if (rcu_is_watching()) {
2692 		__ftrace_trace_stack(buffer, flags, skip, pc, NULL);
2693 		return;
2694 	}
2695 
2696 	/*
2697 	 * When an NMI triggers, RCU is enabled via rcu_nmi_enter(),
2698 	 * but if the above rcu_is_watching() failed, then the NMI
2699 	 * triggered someplace critical, and rcu_irq_enter() should
2700 	 * not be called from NMI.
2701 	 */
2702 	if (unlikely(in_nmi()))
2703 		return;
2704 
2705 	rcu_irq_enter_irqson();
2706 	__ftrace_trace_stack(buffer, flags, skip, pc, NULL);
2707 	rcu_irq_exit_irqson();
2708 }
2709 
2710 /**
2711  * trace_dump_stack - record a stack back trace in the trace buffer
2712  * @skip: Number of functions to skip (helper handlers)
2713  */
2714 void trace_dump_stack(int skip)
2715 {
2716 	unsigned long flags;
2717 
2718 	if (tracing_disabled || tracing_selftest_running)
2719 		return;
2720 
2721 	local_save_flags(flags);
2722 
2723 #ifndef CONFIG_UNWINDER_ORC
2724 	/* Skip 1 to skip this function. */
2725 	skip++;
2726 #endif
2727 	__ftrace_trace_stack(global_trace.trace_buffer.buffer,
2728 			     flags, skip, preempt_count(), NULL);
2729 }
2730 
2731 static DEFINE_PER_CPU(int, user_stack_count);
2732 
2733 void
2734 ftrace_trace_userstack(struct ring_buffer *buffer, unsigned long flags, int pc)
2735 {
2736 	struct trace_event_call *call = &event_user_stack;
2737 	struct ring_buffer_event *event;
2738 	struct userstack_entry *entry;
2739 	struct stack_trace trace;
2740 
2741 	if (!(global_trace.trace_flags & TRACE_ITER_USERSTACKTRACE))
2742 		return;
2743 
2744 	/*
2745 	 * NMIs can not handle page faults, even with fix ups.
2746 	 * The save user stack can (and often does) fault.
2747 	 */
2748 	if (unlikely(in_nmi()))
2749 		return;
2750 
2751 	/*
2752 	 * prevent recursion, since the user stack tracing may
2753 	 * trigger other kernel events.
2754 	 */
2755 	preempt_disable();
2756 	if (__this_cpu_read(user_stack_count))
2757 		goto out;
2758 
2759 	__this_cpu_inc(user_stack_count);
2760 
2761 	event = __trace_buffer_lock_reserve(buffer, TRACE_USER_STACK,
2762 					    sizeof(*entry), flags, pc);
2763 	if (!event)
2764 		goto out_drop_count;
2765 	entry	= ring_buffer_event_data(event);
2766 
2767 	entry->tgid		= current->tgid;
2768 	memset(&entry->caller, 0, sizeof(entry->caller));
2769 
2770 	trace.nr_entries	= 0;
2771 	trace.max_entries	= FTRACE_STACK_ENTRIES;
2772 	trace.skip		= 0;
2773 	trace.entries		= entry->caller;
2774 
2775 	save_stack_trace_user(&trace);
2776 	if (!call_filter_check_discard(call, entry, buffer, event))
2777 		__buffer_unlock_commit(buffer, event);
2778 
2779  out_drop_count:
2780 	__this_cpu_dec(user_stack_count);
2781  out:
2782 	preempt_enable();
2783 }
2784 
2785 #ifdef UNUSED
2786 static void __trace_userstack(struct trace_array *tr, unsigned long flags)
2787 {
2788 	ftrace_trace_userstack(tr, flags, preempt_count());
2789 }
2790 #endif /* UNUSED */
2791 
2792 #endif /* CONFIG_STACKTRACE */
2793 
2794 /* created for use with alloc_percpu */
2795 struct trace_buffer_struct {
2796 	int nesting;
2797 	char buffer[4][TRACE_BUF_SIZE];
2798 };
2799 
2800 static struct trace_buffer_struct *trace_percpu_buffer;
2801 
2802 /*
2803  * Thise allows for lockless recording.  If we're nested too deeply, then
2804  * this returns NULL.
2805  */
2806 static char *get_trace_buf(void)
2807 {
2808 	struct trace_buffer_struct *buffer = this_cpu_ptr(trace_percpu_buffer);
2809 
2810 	if (!buffer || buffer->nesting >= 4)
2811 		return NULL;
2812 
2813 	buffer->nesting++;
2814 
2815 	/* Interrupts must see nesting incremented before we use the buffer */
2816 	barrier();
2817 	return &buffer->buffer[buffer->nesting][0];
2818 }
2819 
2820 static void put_trace_buf(void)
2821 {
2822 	/* Don't let the decrement of nesting leak before this */
2823 	barrier();
2824 	this_cpu_dec(trace_percpu_buffer->nesting);
2825 }
2826 
2827 static int alloc_percpu_trace_buffer(void)
2828 {
2829 	struct trace_buffer_struct *buffers;
2830 
2831 	buffers = alloc_percpu(struct trace_buffer_struct);
2832 	if (WARN(!buffers, "Could not allocate percpu trace_printk buffer"))
2833 		return -ENOMEM;
2834 
2835 	trace_percpu_buffer = buffers;
2836 	return 0;
2837 }
2838 
2839 static int buffers_allocated;
2840 
2841 void trace_printk_init_buffers(void)
2842 {
2843 	if (buffers_allocated)
2844 		return;
2845 
2846 	if (alloc_percpu_trace_buffer())
2847 		return;
2848 
2849 	/* trace_printk() is for debug use only. Don't use it in production. */
2850 
2851 	pr_warn("\n");
2852 	pr_warn("**********************************************************\n");
2853 	pr_warn("**   NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE   **\n");
2854 	pr_warn("**                                                      **\n");
2855 	pr_warn("** trace_printk() being used. Allocating extra memory.  **\n");
2856 	pr_warn("**                                                      **\n");
2857 	pr_warn("** This means that this is a DEBUG kernel and it is     **\n");
2858 	pr_warn("** unsafe for production use.                           **\n");
2859 	pr_warn("**                                                      **\n");
2860 	pr_warn("** If you see this message and you are not debugging    **\n");
2861 	pr_warn("** the kernel, report this immediately to your vendor!  **\n");
2862 	pr_warn("**                                                      **\n");
2863 	pr_warn("**   NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE   **\n");
2864 	pr_warn("**********************************************************\n");
2865 
2866 	/* Expand the buffers to set size */
2867 	tracing_update_buffers();
2868 
2869 	buffers_allocated = 1;
2870 
2871 	/*
2872 	 * trace_printk_init_buffers() can be called by modules.
2873 	 * If that happens, then we need to start cmdline recording
2874 	 * directly here. If the global_trace.buffer is already
2875 	 * allocated here, then this was called by module code.
2876 	 */
2877 	if (global_trace.trace_buffer.buffer)
2878 		tracing_start_cmdline_record();
2879 }
2880 
2881 void trace_printk_start_comm(void)
2882 {
2883 	/* Start tracing comms if trace printk is set */
2884 	if (!buffers_allocated)
2885 		return;
2886 	tracing_start_cmdline_record();
2887 }
2888 
2889 static void trace_printk_start_stop_comm(int enabled)
2890 {
2891 	if (!buffers_allocated)
2892 		return;
2893 
2894 	if (enabled)
2895 		tracing_start_cmdline_record();
2896 	else
2897 		tracing_stop_cmdline_record();
2898 }
2899 
2900 /**
2901  * trace_vbprintk - write binary msg to tracing buffer
2902  *
2903  */
2904 int trace_vbprintk(unsigned long ip, const char *fmt, va_list args)
2905 {
2906 	struct trace_event_call *call = &event_bprint;
2907 	struct ring_buffer_event *event;
2908 	struct ring_buffer *buffer;
2909 	struct trace_array *tr = &global_trace;
2910 	struct bprint_entry *entry;
2911 	unsigned long flags;
2912 	char *tbuffer;
2913 	int len = 0, size, pc;
2914 
2915 	if (unlikely(tracing_selftest_running || tracing_disabled))
2916 		return 0;
2917 
2918 	/* Don't pollute graph traces with trace_vprintk internals */
2919 	pause_graph_tracing();
2920 
2921 	pc = preempt_count();
2922 	preempt_disable_notrace();
2923 
2924 	tbuffer = get_trace_buf();
2925 	if (!tbuffer) {
2926 		len = 0;
2927 		goto out_nobuffer;
2928 	}
2929 
2930 	len = vbin_printf((u32 *)tbuffer, TRACE_BUF_SIZE/sizeof(int), fmt, args);
2931 
2932 	if (len > TRACE_BUF_SIZE/sizeof(int) || len < 0)
2933 		goto out;
2934 
2935 	local_save_flags(flags);
2936 	size = sizeof(*entry) + sizeof(u32) * len;
2937 	buffer = tr->trace_buffer.buffer;
2938 	event = __trace_buffer_lock_reserve(buffer, TRACE_BPRINT, size,
2939 					    flags, pc);
2940 	if (!event)
2941 		goto out;
2942 	entry = ring_buffer_event_data(event);
2943 	entry->ip			= ip;
2944 	entry->fmt			= fmt;
2945 
2946 	memcpy(entry->buf, tbuffer, sizeof(u32) * len);
2947 	if (!call_filter_check_discard(call, entry, buffer, event)) {
2948 		__buffer_unlock_commit(buffer, event);
2949 		ftrace_trace_stack(tr, buffer, flags, 6, pc, NULL);
2950 	}
2951 
2952 out:
2953 	put_trace_buf();
2954 
2955 out_nobuffer:
2956 	preempt_enable_notrace();
2957 	unpause_graph_tracing();
2958 
2959 	return len;
2960 }
2961 EXPORT_SYMBOL_GPL(trace_vbprintk);
2962 
2963 __printf(3, 0)
2964 static int
2965 __trace_array_vprintk(struct ring_buffer *buffer,
2966 		      unsigned long ip, const char *fmt, va_list args)
2967 {
2968 	struct trace_event_call *call = &event_print;
2969 	struct ring_buffer_event *event;
2970 	int len = 0, size, pc;
2971 	struct print_entry *entry;
2972 	unsigned long flags;
2973 	char *tbuffer;
2974 
2975 	if (tracing_disabled || tracing_selftest_running)
2976 		return 0;
2977 
2978 	/* Don't pollute graph traces with trace_vprintk internals */
2979 	pause_graph_tracing();
2980 
2981 	pc = preempt_count();
2982 	preempt_disable_notrace();
2983 
2984 
2985 	tbuffer = get_trace_buf();
2986 	if (!tbuffer) {
2987 		len = 0;
2988 		goto out_nobuffer;
2989 	}
2990 
2991 	len = vscnprintf(tbuffer, TRACE_BUF_SIZE, fmt, args);
2992 
2993 	local_save_flags(flags);
2994 	size = sizeof(*entry) + len + 1;
2995 	event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
2996 					    flags, pc);
2997 	if (!event)
2998 		goto out;
2999 	entry = ring_buffer_event_data(event);
3000 	entry->ip = ip;
3001 
3002 	memcpy(&entry->buf, tbuffer, len + 1);
3003 	if (!call_filter_check_discard(call, entry, buffer, event)) {
3004 		__buffer_unlock_commit(buffer, event);
3005 		ftrace_trace_stack(&global_trace, buffer, flags, 6, pc, NULL);
3006 	}
3007 
3008 out:
3009 	put_trace_buf();
3010 
3011 out_nobuffer:
3012 	preempt_enable_notrace();
3013 	unpause_graph_tracing();
3014 
3015 	return len;
3016 }
3017 
3018 __printf(3, 0)
3019 int trace_array_vprintk(struct trace_array *tr,
3020 			unsigned long ip, const char *fmt, va_list args)
3021 {
3022 	return __trace_array_vprintk(tr->trace_buffer.buffer, ip, fmt, args);
3023 }
3024 
3025 __printf(3, 0)
3026 int trace_array_printk(struct trace_array *tr,
3027 		       unsigned long ip, const char *fmt, ...)
3028 {
3029 	int ret;
3030 	va_list ap;
3031 
3032 	if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
3033 		return 0;
3034 
3035 	va_start(ap, fmt);
3036 	ret = trace_array_vprintk(tr, ip, fmt, ap);
3037 	va_end(ap);
3038 	return ret;
3039 }
3040 
3041 __printf(3, 4)
3042 int trace_array_printk_buf(struct ring_buffer *buffer,
3043 			   unsigned long ip, const char *fmt, ...)
3044 {
3045 	int ret;
3046 	va_list ap;
3047 
3048 	if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
3049 		return 0;
3050 
3051 	va_start(ap, fmt);
3052 	ret = __trace_array_vprintk(buffer, ip, fmt, ap);
3053 	va_end(ap);
3054 	return ret;
3055 }
3056 
3057 __printf(2, 0)
3058 int trace_vprintk(unsigned long ip, const char *fmt, va_list args)
3059 {
3060 	return trace_array_vprintk(&global_trace, ip, fmt, args);
3061 }
3062 EXPORT_SYMBOL_GPL(trace_vprintk);
3063 
3064 static void trace_iterator_increment(struct trace_iterator *iter)
3065 {
3066 	struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, iter->cpu);
3067 
3068 	iter->idx++;
3069 	if (buf_iter)
3070 		ring_buffer_read(buf_iter, NULL);
3071 }
3072 
3073 static struct trace_entry *
3074 peek_next_entry(struct trace_iterator *iter, int cpu, u64 *ts,
3075 		unsigned long *lost_events)
3076 {
3077 	struct ring_buffer_event *event;
3078 	struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, cpu);
3079 
3080 	if (buf_iter)
3081 		event = ring_buffer_iter_peek(buf_iter, ts);
3082 	else
3083 		event = ring_buffer_peek(iter->trace_buffer->buffer, cpu, ts,
3084 					 lost_events);
3085 
3086 	if (event) {
3087 		iter->ent_size = ring_buffer_event_length(event);
3088 		return ring_buffer_event_data(event);
3089 	}
3090 	iter->ent_size = 0;
3091 	return NULL;
3092 }
3093 
3094 static struct trace_entry *
3095 __find_next_entry(struct trace_iterator *iter, int *ent_cpu,
3096 		  unsigned long *missing_events, u64 *ent_ts)
3097 {
3098 	struct ring_buffer *buffer = iter->trace_buffer->buffer;
3099 	struct trace_entry *ent, *next = NULL;
3100 	unsigned long lost_events = 0, next_lost = 0;
3101 	int cpu_file = iter->cpu_file;
3102 	u64 next_ts = 0, ts;
3103 	int next_cpu = -1;
3104 	int next_size = 0;
3105 	int cpu;
3106 
3107 	/*
3108 	 * If we are in a per_cpu trace file, don't bother by iterating over
3109 	 * all cpu and peek directly.
3110 	 */
3111 	if (cpu_file > RING_BUFFER_ALL_CPUS) {
3112 		if (ring_buffer_empty_cpu(buffer, cpu_file))
3113 			return NULL;
3114 		ent = peek_next_entry(iter, cpu_file, ent_ts, missing_events);
3115 		if (ent_cpu)
3116 			*ent_cpu = cpu_file;
3117 
3118 		return ent;
3119 	}
3120 
3121 	for_each_tracing_cpu(cpu) {
3122 
3123 		if (ring_buffer_empty_cpu(buffer, cpu))
3124 			continue;
3125 
3126 		ent = peek_next_entry(iter, cpu, &ts, &lost_events);
3127 
3128 		/*
3129 		 * Pick the entry with the smallest timestamp:
3130 		 */
3131 		if (ent && (!next || ts < next_ts)) {
3132 			next = ent;
3133 			next_cpu = cpu;
3134 			next_ts = ts;
3135 			next_lost = lost_events;
3136 			next_size = iter->ent_size;
3137 		}
3138 	}
3139 
3140 	iter->ent_size = next_size;
3141 
3142 	if (ent_cpu)
3143 		*ent_cpu = next_cpu;
3144 
3145 	if (ent_ts)
3146 		*ent_ts = next_ts;
3147 
3148 	if (missing_events)
3149 		*missing_events = next_lost;
3150 
3151 	return next;
3152 }
3153 
3154 /* Find the next real entry, without updating the iterator itself */
3155 struct trace_entry *trace_find_next_entry(struct trace_iterator *iter,
3156 					  int *ent_cpu, u64 *ent_ts)
3157 {
3158 	return __find_next_entry(iter, ent_cpu, NULL, ent_ts);
3159 }
3160 
3161 /* Find the next real entry, and increment the iterator to the next entry */
3162 void *trace_find_next_entry_inc(struct trace_iterator *iter)
3163 {
3164 	iter->ent = __find_next_entry(iter, &iter->cpu,
3165 				      &iter->lost_events, &iter->ts);
3166 
3167 	if (iter->ent)
3168 		trace_iterator_increment(iter);
3169 
3170 	return iter->ent ? iter : NULL;
3171 }
3172 
3173 static void trace_consume(struct trace_iterator *iter)
3174 {
3175 	ring_buffer_consume(iter->trace_buffer->buffer, iter->cpu, &iter->ts,
3176 			    &iter->lost_events);
3177 }
3178 
3179 static void *s_next(struct seq_file *m, void *v, loff_t *pos)
3180 {
3181 	struct trace_iterator *iter = m->private;
3182 	int i = (int)*pos;
3183 	void *ent;
3184 
3185 	WARN_ON_ONCE(iter->leftover);
3186 
3187 	(*pos)++;
3188 
3189 	/* can't go backwards */
3190 	if (iter->idx > i)
3191 		return NULL;
3192 
3193 	if (iter->idx < 0)
3194 		ent = trace_find_next_entry_inc(iter);
3195 	else
3196 		ent = iter;
3197 
3198 	while (ent && iter->idx < i)
3199 		ent = trace_find_next_entry_inc(iter);
3200 
3201 	iter->pos = *pos;
3202 
3203 	return ent;
3204 }
3205 
3206 void tracing_iter_reset(struct trace_iterator *iter, int cpu)
3207 {
3208 	struct ring_buffer_event *event;
3209 	struct ring_buffer_iter *buf_iter;
3210 	unsigned long entries = 0;
3211 	u64 ts;
3212 
3213 	per_cpu_ptr(iter->trace_buffer->data, cpu)->skipped_entries = 0;
3214 
3215 	buf_iter = trace_buffer_iter(iter, cpu);
3216 	if (!buf_iter)
3217 		return;
3218 
3219 	ring_buffer_iter_reset(buf_iter);
3220 
3221 	/*
3222 	 * We could have the case with the max latency tracers
3223 	 * that a reset never took place on a cpu. This is evident
3224 	 * by the timestamp being before the start of the buffer.
3225 	 */
3226 	while ((event = ring_buffer_iter_peek(buf_iter, &ts))) {
3227 		if (ts >= iter->trace_buffer->time_start)
3228 			break;
3229 		entries++;
3230 		ring_buffer_read(buf_iter, NULL);
3231 	}
3232 
3233 	per_cpu_ptr(iter->trace_buffer->data, cpu)->skipped_entries = entries;
3234 }
3235 
3236 /*
3237  * The current tracer is copied to avoid a global locking
3238  * all around.
3239  */
3240 static void *s_start(struct seq_file *m, loff_t *pos)
3241 {
3242 	struct trace_iterator *iter = m->private;
3243 	struct trace_array *tr = iter->tr;
3244 	int cpu_file = iter->cpu_file;
3245 	void *p = NULL;
3246 	loff_t l = 0;
3247 	int cpu;
3248 
3249 	/*
3250 	 * copy the tracer to avoid using a global lock all around.
3251 	 * iter->trace is a copy of current_trace, the pointer to the
3252 	 * name may be used instead of a strcmp(), as iter->trace->name
3253 	 * will point to the same string as current_trace->name.
3254 	 */
3255 	mutex_lock(&trace_types_lock);
3256 	if (unlikely(tr->current_trace && iter->trace->name != tr->current_trace->name))
3257 		*iter->trace = *tr->current_trace;
3258 	mutex_unlock(&trace_types_lock);
3259 
3260 #ifdef CONFIG_TRACER_MAX_TRACE
3261 	if (iter->snapshot && iter->trace->use_max_tr)
3262 		return ERR_PTR(-EBUSY);
3263 #endif
3264 
3265 	if (!iter->snapshot)
3266 		atomic_inc(&trace_record_taskinfo_disabled);
3267 
3268 	if (*pos != iter->pos) {
3269 		iter->ent = NULL;
3270 		iter->cpu = 0;
3271 		iter->idx = -1;
3272 
3273 		if (cpu_file == RING_BUFFER_ALL_CPUS) {
3274 			for_each_tracing_cpu(cpu)
3275 				tracing_iter_reset(iter, cpu);
3276 		} else
3277 			tracing_iter_reset(iter, cpu_file);
3278 
3279 		iter->leftover = 0;
3280 		for (p = iter; p && l < *pos; p = s_next(m, p, &l))
3281 			;
3282 
3283 	} else {
3284 		/*
3285 		 * If we overflowed the seq_file before, then we want
3286 		 * to just reuse the trace_seq buffer again.
3287 		 */
3288 		if (iter->leftover)
3289 			p = iter;
3290 		else {
3291 			l = *pos - 1;
3292 			p = s_next(m, p, &l);
3293 		}
3294 	}
3295 
3296 	trace_event_read_lock();
3297 	trace_access_lock(cpu_file);
3298 	return p;
3299 }
3300 
3301 static void s_stop(struct seq_file *m, void *p)
3302 {
3303 	struct trace_iterator *iter = m->private;
3304 
3305 #ifdef CONFIG_TRACER_MAX_TRACE
3306 	if (iter->snapshot && iter->trace->use_max_tr)
3307 		return;
3308 #endif
3309 
3310 	if (!iter->snapshot)
3311 		atomic_dec(&trace_record_taskinfo_disabled);
3312 
3313 	trace_access_unlock(iter->cpu_file);
3314 	trace_event_read_unlock();
3315 }
3316 
3317 static void
3318 get_total_entries(struct trace_buffer *buf,
3319 		  unsigned long *total, unsigned long *entries)
3320 {
3321 	unsigned long count;
3322 	int cpu;
3323 
3324 	*total = 0;
3325 	*entries = 0;
3326 
3327 	for_each_tracing_cpu(cpu) {
3328 		count = ring_buffer_entries_cpu(buf->buffer, cpu);
3329 		/*
3330 		 * If this buffer has skipped entries, then we hold all
3331 		 * entries for the trace and we need to ignore the
3332 		 * ones before the time stamp.
3333 		 */
3334 		if (per_cpu_ptr(buf->data, cpu)->skipped_entries) {
3335 			count -= per_cpu_ptr(buf->data, cpu)->skipped_entries;
3336 			/* total is the same as the entries */
3337 			*total += count;
3338 		} else
3339 			*total += count +
3340 				ring_buffer_overrun_cpu(buf->buffer, cpu);
3341 		*entries += count;
3342 	}
3343 }
3344 
3345 static void print_lat_help_header(struct seq_file *m)
3346 {
3347 	seq_puts(m, "#                  _------=> CPU#            \n"
3348 		    "#                 / _-----=> irqs-off        \n"
3349 		    "#                | / _----=> need-resched    \n"
3350 		    "#                || / _---=> hardirq/softirq \n"
3351 		    "#                ||| / _--=> preempt-depth   \n"
3352 		    "#                |||| /     delay            \n"
3353 		    "#  cmd     pid   ||||| time  |   caller      \n"
3354 		    "#     \\   /      |||||  \\    |   /         \n");
3355 }
3356 
3357 static void print_event_info(struct trace_buffer *buf, struct seq_file *m)
3358 {
3359 	unsigned long total;
3360 	unsigned long entries;
3361 
3362 	get_total_entries(buf, &total, &entries);
3363 	seq_printf(m, "# entries-in-buffer/entries-written: %lu/%lu   #P:%d\n",
3364 		   entries, total, num_online_cpus());
3365 	seq_puts(m, "#\n");
3366 }
3367 
3368 static void print_func_help_header(struct trace_buffer *buf, struct seq_file *m,
3369 				   unsigned int flags)
3370 {
3371 	bool tgid = flags & TRACE_ITER_RECORD_TGID;
3372 
3373 	print_event_info(buf, m);
3374 
3375 	seq_printf(m, "#           TASK-PID   %s  CPU#   TIMESTAMP  FUNCTION\n", tgid ? "TGID     " : "");
3376 	seq_printf(m, "#              | |     %s    |       |         |\n",	 tgid ? "  |      " : "");
3377 }
3378 
3379 static void print_func_help_header_irq(struct trace_buffer *buf, struct seq_file *m,
3380 				       unsigned int flags)
3381 {
3382 	bool tgid = flags & TRACE_ITER_RECORD_TGID;
3383 	const char tgid_space[] = "          ";
3384 	const char space[] = "  ";
3385 
3386 	seq_printf(m, "#                          %s  _-----=> irqs-off\n",
3387 		   tgid ? tgid_space : space);
3388 	seq_printf(m, "#                          %s / _----=> need-resched\n",
3389 		   tgid ? tgid_space : space);
3390 	seq_printf(m, "#                          %s| / _---=> hardirq/softirq\n",
3391 		   tgid ? tgid_space : space);
3392 	seq_printf(m, "#                          %s|| / _--=> preempt-depth\n",
3393 		   tgid ? tgid_space : space);
3394 	seq_printf(m, "#                          %s||| /     delay\n",
3395 		   tgid ? tgid_space : space);
3396 	seq_printf(m, "#           TASK-PID %sCPU#  ||||    TIMESTAMP  FUNCTION\n",
3397 		   tgid ? "   TGID   " : space);
3398 	seq_printf(m, "#              | |   %s  |   ||||       |         |\n",
3399 		   tgid ? "     |    " : space);
3400 }
3401 
3402 void
3403 print_trace_header(struct seq_file *m, struct trace_iterator *iter)
3404 {
3405 	unsigned long sym_flags = (global_trace.trace_flags & TRACE_ITER_SYM_MASK);
3406 	struct trace_buffer *buf = iter->trace_buffer;
3407 	struct trace_array_cpu *data = per_cpu_ptr(buf->data, buf->cpu);
3408 	struct tracer *type = iter->trace;
3409 	unsigned long entries;
3410 	unsigned long total;
3411 	const char *name = "preemption";
3412 
3413 	name = type->name;
3414 
3415 	get_total_entries(buf, &total, &entries);
3416 
3417 	seq_printf(m, "# %s latency trace v1.1.5 on %s\n",
3418 		   name, UTS_RELEASE);
3419 	seq_puts(m, "# -----------------------------------"
3420 		 "---------------------------------\n");
3421 	seq_printf(m, "# latency: %lu us, #%lu/%lu, CPU#%d |"
3422 		   " (M:%s VP:%d, KP:%d, SP:%d HP:%d",
3423 		   nsecs_to_usecs(data->saved_latency),
3424 		   entries,
3425 		   total,
3426 		   buf->cpu,
3427 #if defined(CONFIG_PREEMPT_NONE)
3428 		   "server",
3429 #elif defined(CONFIG_PREEMPT_VOLUNTARY)
3430 		   "desktop",
3431 #elif defined(CONFIG_PREEMPT)
3432 		   "preempt",
3433 #else
3434 		   "unknown",
3435 #endif
3436 		   /* These are reserved for later use */
3437 		   0, 0, 0, 0);
3438 #ifdef CONFIG_SMP
3439 	seq_printf(m, " #P:%d)\n", num_online_cpus());
3440 #else
3441 	seq_puts(m, ")\n");
3442 #endif
3443 	seq_puts(m, "#    -----------------\n");
3444 	seq_printf(m, "#    | task: %.16s-%d "
3445 		   "(uid:%d nice:%ld policy:%ld rt_prio:%ld)\n",
3446 		   data->comm, data->pid,
3447 		   from_kuid_munged(seq_user_ns(m), data->uid), data->nice,
3448 		   data->policy, data->rt_priority);
3449 	seq_puts(m, "#    -----------------\n");
3450 
3451 	if (data->critical_start) {
3452 		seq_puts(m, "#  => started at: ");
3453 		seq_print_ip_sym(&iter->seq, data->critical_start, sym_flags);
3454 		trace_print_seq(m, &iter->seq);
3455 		seq_puts(m, "\n#  => ended at:   ");
3456 		seq_print_ip_sym(&iter->seq, data->critical_end, sym_flags);
3457 		trace_print_seq(m, &iter->seq);
3458 		seq_puts(m, "\n#\n");
3459 	}
3460 
3461 	seq_puts(m, "#\n");
3462 }
3463 
3464 static void test_cpu_buff_start(struct trace_iterator *iter)
3465 {
3466 	struct trace_seq *s = &iter->seq;
3467 	struct trace_array *tr = iter->tr;
3468 
3469 	if (!(tr->trace_flags & TRACE_ITER_ANNOTATE))
3470 		return;
3471 
3472 	if (!(iter->iter_flags & TRACE_FILE_ANNOTATE))
3473 		return;
3474 
3475 	if (cpumask_available(iter->started) &&
3476 	    cpumask_test_cpu(iter->cpu, iter->started))
3477 		return;
3478 
3479 	if (per_cpu_ptr(iter->trace_buffer->data, iter->cpu)->skipped_entries)
3480 		return;
3481 
3482 	if (cpumask_available(iter->started))
3483 		cpumask_set_cpu(iter->cpu, iter->started);
3484 
3485 	/* Don't print started cpu buffer for the first entry of the trace */
3486 	if (iter->idx > 1)
3487 		trace_seq_printf(s, "##### CPU %u buffer started ####\n",
3488 				iter->cpu);
3489 }
3490 
3491 static enum print_line_t print_trace_fmt(struct trace_iterator *iter)
3492 {
3493 	struct trace_array *tr = iter->tr;
3494 	struct trace_seq *s = &iter->seq;
3495 	unsigned long sym_flags = (tr->trace_flags & TRACE_ITER_SYM_MASK);
3496 	struct trace_entry *entry;
3497 	struct trace_event *event;
3498 
3499 	entry = iter->ent;
3500 
3501 	test_cpu_buff_start(iter);
3502 
3503 	event = ftrace_find_event(entry->type);
3504 
3505 	if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
3506 		if (iter->iter_flags & TRACE_FILE_LAT_FMT)
3507 			trace_print_lat_context(iter);
3508 		else
3509 			trace_print_context(iter);
3510 	}
3511 
3512 	if (trace_seq_has_overflowed(s))
3513 		return TRACE_TYPE_PARTIAL_LINE;
3514 
3515 	if (event)
3516 		return event->funcs->trace(iter, sym_flags, event);
3517 
3518 	trace_seq_printf(s, "Unknown type %d\n", entry->type);
3519 
3520 	return trace_handle_return(s);
3521 }
3522 
3523 static enum print_line_t print_raw_fmt(struct trace_iterator *iter)
3524 {
3525 	struct trace_array *tr = iter->tr;
3526 	struct trace_seq *s = &iter->seq;
3527 	struct trace_entry *entry;
3528 	struct trace_event *event;
3529 
3530 	entry = iter->ent;
3531 
3532 	if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO)
3533 		trace_seq_printf(s, "%d %d %llu ",
3534 				 entry->pid, iter->cpu, iter->ts);
3535 
3536 	if (trace_seq_has_overflowed(s))
3537 		return TRACE_TYPE_PARTIAL_LINE;
3538 
3539 	event = ftrace_find_event(entry->type);
3540 	if (event)
3541 		return event->funcs->raw(iter, 0, event);
3542 
3543 	trace_seq_printf(s, "%d ?\n", entry->type);
3544 
3545 	return trace_handle_return(s);
3546 }
3547 
3548 static enum print_line_t print_hex_fmt(struct trace_iterator *iter)
3549 {
3550 	struct trace_array *tr = iter->tr;
3551 	struct trace_seq *s = &iter->seq;
3552 	unsigned char newline = '\n';
3553 	struct trace_entry *entry;
3554 	struct trace_event *event;
3555 
3556 	entry = iter->ent;
3557 
3558 	if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
3559 		SEQ_PUT_HEX_FIELD(s, entry->pid);
3560 		SEQ_PUT_HEX_FIELD(s, iter->cpu);
3561 		SEQ_PUT_HEX_FIELD(s, iter->ts);
3562 		if (trace_seq_has_overflowed(s))
3563 			return TRACE_TYPE_PARTIAL_LINE;
3564 	}
3565 
3566 	event = ftrace_find_event(entry->type);
3567 	if (event) {
3568 		enum print_line_t ret = event->funcs->hex(iter, 0, event);
3569 		if (ret != TRACE_TYPE_HANDLED)
3570 			return ret;
3571 	}
3572 
3573 	SEQ_PUT_FIELD(s, newline);
3574 
3575 	return trace_handle_return(s);
3576 }
3577 
3578 static enum print_line_t print_bin_fmt(struct trace_iterator *iter)
3579 {
3580 	struct trace_array *tr = iter->tr;
3581 	struct trace_seq *s = &iter->seq;
3582 	struct trace_entry *entry;
3583 	struct trace_event *event;
3584 
3585 	entry = iter->ent;
3586 
3587 	if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
3588 		SEQ_PUT_FIELD(s, entry->pid);
3589 		SEQ_PUT_FIELD(s, iter->cpu);
3590 		SEQ_PUT_FIELD(s, iter->ts);
3591 		if (trace_seq_has_overflowed(s))
3592 			return TRACE_TYPE_PARTIAL_LINE;
3593 	}
3594 
3595 	event = ftrace_find_event(entry->type);
3596 	return event ? event->funcs->binary(iter, 0, event) :
3597 		TRACE_TYPE_HANDLED;
3598 }
3599 
3600 int trace_empty(struct trace_iterator *iter)
3601 {
3602 	struct ring_buffer_iter *buf_iter;
3603 	int cpu;
3604 
3605 	/* If we are looking at one CPU buffer, only check that one */
3606 	if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
3607 		cpu = iter->cpu_file;
3608 		buf_iter = trace_buffer_iter(iter, cpu);
3609 		if (buf_iter) {
3610 			if (!ring_buffer_iter_empty(buf_iter))
3611 				return 0;
3612 		} else {
3613 			if (!ring_buffer_empty_cpu(iter->trace_buffer->buffer, cpu))
3614 				return 0;
3615 		}
3616 		return 1;
3617 	}
3618 
3619 	for_each_tracing_cpu(cpu) {
3620 		buf_iter = trace_buffer_iter(iter, cpu);
3621 		if (buf_iter) {
3622 			if (!ring_buffer_iter_empty(buf_iter))
3623 				return 0;
3624 		} else {
3625 			if (!ring_buffer_empty_cpu(iter->trace_buffer->buffer, cpu))
3626 				return 0;
3627 		}
3628 	}
3629 
3630 	return 1;
3631 }
3632 
3633 /*  Called with trace_event_read_lock() held. */
3634 enum print_line_t print_trace_line(struct trace_iterator *iter)
3635 {
3636 	struct trace_array *tr = iter->tr;
3637 	unsigned long trace_flags = tr->trace_flags;
3638 	enum print_line_t ret;
3639 
3640 	if (iter->lost_events) {
3641 		trace_seq_printf(&iter->seq, "CPU:%d [LOST %lu EVENTS]\n",
3642 				 iter->cpu, iter->lost_events);
3643 		if (trace_seq_has_overflowed(&iter->seq))
3644 			return TRACE_TYPE_PARTIAL_LINE;
3645 	}
3646 
3647 	if (iter->trace && iter->trace->print_line) {
3648 		ret = iter->trace->print_line(iter);
3649 		if (ret != TRACE_TYPE_UNHANDLED)
3650 			return ret;
3651 	}
3652 
3653 	if (iter->ent->type == TRACE_BPUTS &&
3654 			trace_flags & TRACE_ITER_PRINTK &&
3655 			trace_flags & TRACE_ITER_PRINTK_MSGONLY)
3656 		return trace_print_bputs_msg_only(iter);
3657 
3658 	if (iter->ent->type == TRACE_BPRINT &&
3659 			trace_flags & TRACE_ITER_PRINTK &&
3660 			trace_flags & TRACE_ITER_PRINTK_MSGONLY)
3661 		return trace_print_bprintk_msg_only(iter);
3662 
3663 	if (iter->ent->type == TRACE_PRINT &&
3664 			trace_flags & TRACE_ITER_PRINTK &&
3665 			trace_flags & TRACE_ITER_PRINTK_MSGONLY)
3666 		return trace_print_printk_msg_only(iter);
3667 
3668 	if (trace_flags & TRACE_ITER_BIN)
3669 		return print_bin_fmt(iter);
3670 
3671 	if (trace_flags & TRACE_ITER_HEX)
3672 		return print_hex_fmt(iter);
3673 
3674 	if (trace_flags & TRACE_ITER_RAW)
3675 		return print_raw_fmt(iter);
3676 
3677 	return print_trace_fmt(iter);
3678 }
3679 
3680 void trace_latency_header(struct seq_file *m)
3681 {
3682 	struct trace_iterator *iter = m->private;
3683 	struct trace_array *tr = iter->tr;
3684 
3685 	/* print nothing if the buffers are empty */
3686 	if (trace_empty(iter))
3687 		return;
3688 
3689 	if (iter->iter_flags & TRACE_FILE_LAT_FMT)
3690 		print_trace_header(m, iter);
3691 
3692 	if (!(tr->trace_flags & TRACE_ITER_VERBOSE))
3693 		print_lat_help_header(m);
3694 }
3695 
3696 void trace_default_header(struct seq_file *m)
3697 {
3698 	struct trace_iterator *iter = m->private;
3699 	struct trace_array *tr = iter->tr;
3700 	unsigned long trace_flags = tr->trace_flags;
3701 
3702 	if (!(trace_flags & TRACE_ITER_CONTEXT_INFO))
3703 		return;
3704 
3705 	if (iter->iter_flags & TRACE_FILE_LAT_FMT) {
3706 		/* print nothing if the buffers are empty */
3707 		if (trace_empty(iter))
3708 			return;
3709 		print_trace_header(m, iter);
3710 		if (!(trace_flags & TRACE_ITER_VERBOSE))
3711 			print_lat_help_header(m);
3712 	} else {
3713 		if (!(trace_flags & TRACE_ITER_VERBOSE)) {
3714 			if (trace_flags & TRACE_ITER_IRQ_INFO)
3715 				print_func_help_header_irq(iter->trace_buffer,
3716 							   m, trace_flags);
3717 			else
3718 				print_func_help_header(iter->trace_buffer, m,
3719 						       trace_flags);
3720 		}
3721 	}
3722 }
3723 
3724 static void test_ftrace_alive(struct seq_file *m)
3725 {
3726 	if (!ftrace_is_dead())
3727 		return;
3728 	seq_puts(m, "# WARNING: FUNCTION TRACING IS CORRUPTED\n"
3729 		    "#          MAY BE MISSING FUNCTION EVENTS\n");
3730 }
3731 
3732 #ifdef CONFIG_TRACER_MAX_TRACE
3733 static void show_snapshot_main_help(struct seq_file *m)
3734 {
3735 	seq_puts(m, "# echo 0 > snapshot : Clears and frees snapshot buffer\n"
3736 		    "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
3737 		    "#                      Takes a snapshot of the main buffer.\n"
3738 		    "# echo 2 > snapshot : Clears snapshot buffer (but does not allocate or free)\n"
3739 		    "#                      (Doesn't have to be '2' works with any number that\n"
3740 		    "#                       is not a '0' or '1')\n");
3741 }
3742 
3743 static void show_snapshot_percpu_help(struct seq_file *m)
3744 {
3745 	seq_puts(m, "# echo 0 > snapshot : Invalid for per_cpu snapshot file.\n");
3746 #ifdef CONFIG_RING_BUFFER_ALLOW_SWAP
3747 	seq_puts(m, "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
3748 		    "#                      Takes a snapshot of the main buffer for this cpu.\n");
3749 #else
3750 	seq_puts(m, "# echo 1 > snapshot : Not supported with this kernel.\n"
3751 		    "#                     Must use main snapshot file to allocate.\n");
3752 #endif
3753 	seq_puts(m, "# echo 2 > snapshot : Clears this cpu's snapshot buffer (but does not allocate)\n"
3754 		    "#                      (Doesn't have to be '2' works with any number that\n"
3755 		    "#                       is not a '0' or '1')\n");
3756 }
3757 
3758 static void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter)
3759 {
3760 	if (iter->tr->allocated_snapshot)
3761 		seq_puts(m, "#\n# * Snapshot is allocated *\n#\n");
3762 	else
3763 		seq_puts(m, "#\n# * Snapshot is freed *\n#\n");
3764 
3765 	seq_puts(m, "# Snapshot commands:\n");
3766 	if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
3767 		show_snapshot_main_help(m);
3768 	else
3769 		show_snapshot_percpu_help(m);
3770 }
3771 #else
3772 /* Should never be called */
3773 static inline void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter) { }
3774 #endif
3775 
3776 static int s_show(struct seq_file *m, void *v)
3777 {
3778 	struct trace_iterator *iter = v;
3779 	int ret;
3780 
3781 	if (iter->ent == NULL) {
3782 		if (iter->tr) {
3783 			seq_printf(m, "# tracer: %s\n", iter->trace->name);
3784 			seq_puts(m, "#\n");
3785 			test_ftrace_alive(m);
3786 		}
3787 		if (iter->snapshot && trace_empty(iter))
3788 			print_snapshot_help(m, iter);
3789 		else if (iter->trace && iter->trace->print_header)
3790 			iter->trace->print_header(m);
3791 		else
3792 			trace_default_header(m);
3793 
3794 	} else if (iter->leftover) {
3795 		/*
3796 		 * If we filled the seq_file buffer earlier, we
3797 		 * want to just show it now.
3798 		 */
3799 		ret = trace_print_seq(m, &iter->seq);
3800 
3801 		/* ret should this time be zero, but you never know */
3802 		iter->leftover = ret;
3803 
3804 	} else {
3805 		print_trace_line(iter);
3806 		ret = trace_print_seq(m, &iter->seq);
3807 		/*
3808 		 * If we overflow the seq_file buffer, then it will
3809 		 * ask us for this data again at start up.
3810 		 * Use that instead.
3811 		 *  ret is 0 if seq_file write succeeded.
3812 		 *        -1 otherwise.
3813 		 */
3814 		iter->leftover = ret;
3815 	}
3816 
3817 	return 0;
3818 }
3819 
3820 /*
3821  * Should be used after trace_array_get(), trace_types_lock
3822  * ensures that i_cdev was already initialized.
3823  */
3824 static inline int tracing_get_cpu(struct inode *inode)
3825 {
3826 	if (inode->i_cdev) /* See trace_create_cpu_file() */
3827 		return (long)inode->i_cdev - 1;
3828 	return RING_BUFFER_ALL_CPUS;
3829 }
3830 
3831 static const struct seq_operations tracer_seq_ops = {
3832 	.start		= s_start,
3833 	.next		= s_next,
3834 	.stop		= s_stop,
3835 	.show		= s_show,
3836 };
3837 
3838 static struct trace_iterator *
3839 __tracing_open(struct inode *inode, struct file *file, bool snapshot)
3840 {
3841 	struct trace_array *tr = inode->i_private;
3842 	struct trace_iterator *iter;
3843 	int cpu;
3844 
3845 	if (tracing_disabled)
3846 		return ERR_PTR(-ENODEV);
3847 
3848 	iter = __seq_open_private(file, &tracer_seq_ops, sizeof(*iter));
3849 	if (!iter)
3850 		return ERR_PTR(-ENOMEM);
3851 
3852 	iter->buffer_iter = kcalloc(nr_cpu_ids, sizeof(*iter->buffer_iter),
3853 				    GFP_KERNEL);
3854 	if (!iter->buffer_iter)
3855 		goto release;
3856 
3857 	/*
3858 	 * We make a copy of the current tracer to avoid concurrent
3859 	 * changes on it while we are reading.
3860 	 */
3861 	mutex_lock(&trace_types_lock);
3862 	iter->trace = kzalloc(sizeof(*iter->trace), GFP_KERNEL);
3863 	if (!iter->trace)
3864 		goto fail;
3865 
3866 	*iter->trace = *tr->current_trace;
3867 
3868 	if (!zalloc_cpumask_var(&iter->started, GFP_KERNEL))
3869 		goto fail;
3870 
3871 	iter->tr = tr;
3872 
3873 #ifdef CONFIG_TRACER_MAX_TRACE
3874 	/* Currently only the top directory has a snapshot */
3875 	if (tr->current_trace->print_max || snapshot)
3876 		iter->trace_buffer = &tr->max_buffer;
3877 	else
3878 #endif
3879 		iter->trace_buffer = &tr->trace_buffer;
3880 	iter->snapshot = snapshot;
3881 	iter->pos = -1;
3882 	iter->cpu_file = tracing_get_cpu(inode);
3883 	mutex_init(&iter->mutex);
3884 
3885 	/* Notify the tracer early; before we stop tracing. */
3886 	if (iter->trace && iter->trace->open)
3887 		iter->trace->open(iter);
3888 
3889 	/* Annotate start of buffers if we had overruns */
3890 	if (ring_buffer_overruns(iter->trace_buffer->buffer))
3891 		iter->iter_flags |= TRACE_FILE_ANNOTATE;
3892 
3893 	/* Output in nanoseconds only if we are using a clock in nanoseconds. */
3894 	if (trace_clocks[tr->clock_id].in_ns)
3895 		iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
3896 
3897 	/* stop the trace while dumping if we are not opening "snapshot" */
3898 	if (!iter->snapshot)
3899 		tracing_stop_tr(tr);
3900 
3901 	if (iter->cpu_file == RING_BUFFER_ALL_CPUS) {
3902 		for_each_tracing_cpu(cpu) {
3903 			iter->buffer_iter[cpu] =
3904 				ring_buffer_read_prepare(iter->trace_buffer->buffer, cpu);
3905 		}
3906 		ring_buffer_read_prepare_sync();
3907 		for_each_tracing_cpu(cpu) {
3908 			ring_buffer_read_start(iter->buffer_iter[cpu]);
3909 			tracing_iter_reset(iter, cpu);
3910 		}
3911 	} else {
3912 		cpu = iter->cpu_file;
3913 		iter->buffer_iter[cpu] =
3914 			ring_buffer_read_prepare(iter->trace_buffer->buffer, cpu);
3915 		ring_buffer_read_prepare_sync();
3916 		ring_buffer_read_start(iter->buffer_iter[cpu]);
3917 		tracing_iter_reset(iter, cpu);
3918 	}
3919 
3920 	mutex_unlock(&trace_types_lock);
3921 
3922 	return iter;
3923 
3924  fail:
3925 	mutex_unlock(&trace_types_lock);
3926 	kfree(iter->trace);
3927 	kfree(iter->buffer_iter);
3928 release:
3929 	seq_release_private(inode, file);
3930 	return ERR_PTR(-ENOMEM);
3931 }
3932 
3933 int tracing_open_generic(struct inode *inode, struct file *filp)
3934 {
3935 	if (tracing_disabled)
3936 		return -ENODEV;
3937 
3938 	filp->private_data = inode->i_private;
3939 	return 0;
3940 }
3941 
3942 bool tracing_is_disabled(void)
3943 {
3944 	return (tracing_disabled) ? true: false;
3945 }
3946 
3947 /*
3948  * Open and update trace_array ref count.
3949  * Must have the current trace_array passed to it.
3950  */
3951 static int tracing_open_generic_tr(struct inode *inode, struct file *filp)
3952 {
3953 	struct trace_array *tr = inode->i_private;
3954 
3955 	if (tracing_disabled)
3956 		return -ENODEV;
3957 
3958 	if (trace_array_get(tr) < 0)
3959 		return -ENODEV;
3960 
3961 	filp->private_data = inode->i_private;
3962 
3963 	return 0;
3964 }
3965 
3966 static int tracing_release(struct inode *inode, struct file *file)
3967 {
3968 	struct trace_array *tr = inode->i_private;
3969 	struct seq_file *m = file->private_data;
3970 	struct trace_iterator *iter;
3971 	int cpu;
3972 
3973 	if (!(file->f_mode & FMODE_READ)) {
3974 		trace_array_put(tr);
3975 		return 0;
3976 	}
3977 
3978 	/* Writes do not use seq_file */
3979 	iter = m->private;
3980 	mutex_lock(&trace_types_lock);
3981 
3982 	for_each_tracing_cpu(cpu) {
3983 		if (iter->buffer_iter[cpu])
3984 			ring_buffer_read_finish(iter->buffer_iter[cpu]);
3985 	}
3986 
3987 	if (iter->trace && iter->trace->close)
3988 		iter->trace->close(iter);
3989 
3990 	if (!iter->snapshot)
3991 		/* reenable tracing if it was previously enabled */
3992 		tracing_start_tr(tr);
3993 
3994 	__trace_array_put(tr);
3995 
3996 	mutex_unlock(&trace_types_lock);
3997 
3998 	mutex_destroy(&iter->mutex);
3999 	free_cpumask_var(iter->started);
4000 	kfree(iter->trace);
4001 	kfree(iter->buffer_iter);
4002 	seq_release_private(inode, file);
4003 
4004 	return 0;
4005 }
4006 
4007 static int tracing_release_generic_tr(struct inode *inode, struct file *file)
4008 {
4009 	struct trace_array *tr = inode->i_private;
4010 
4011 	trace_array_put(tr);
4012 	return 0;
4013 }
4014 
4015 static int tracing_single_release_tr(struct inode *inode, struct file *file)
4016 {
4017 	struct trace_array *tr = inode->i_private;
4018 
4019 	trace_array_put(tr);
4020 
4021 	return single_release(inode, file);
4022 }
4023 
4024 static int tracing_open(struct inode *inode, struct file *file)
4025 {
4026 	struct trace_array *tr = inode->i_private;
4027 	struct trace_iterator *iter;
4028 	int ret = 0;
4029 
4030 	if (trace_array_get(tr) < 0)
4031 		return -ENODEV;
4032 
4033 	/* If this file was open for write, then erase contents */
4034 	if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC)) {
4035 		int cpu = tracing_get_cpu(inode);
4036 		struct trace_buffer *trace_buf = &tr->trace_buffer;
4037 
4038 #ifdef CONFIG_TRACER_MAX_TRACE
4039 		if (tr->current_trace->print_max)
4040 			trace_buf = &tr->max_buffer;
4041 #endif
4042 
4043 		if (cpu == RING_BUFFER_ALL_CPUS)
4044 			tracing_reset_online_cpus(trace_buf);
4045 		else
4046 			tracing_reset(trace_buf, cpu);
4047 	}
4048 
4049 	if (file->f_mode & FMODE_READ) {
4050 		iter = __tracing_open(inode, file, false);
4051 		if (IS_ERR(iter))
4052 			ret = PTR_ERR(iter);
4053 		else if (tr->trace_flags & TRACE_ITER_LATENCY_FMT)
4054 			iter->iter_flags |= TRACE_FILE_LAT_FMT;
4055 	}
4056 
4057 	if (ret < 0)
4058 		trace_array_put(tr);
4059 
4060 	return ret;
4061 }
4062 
4063 /*
4064  * Some tracers are not suitable for instance buffers.
4065  * A tracer is always available for the global array (toplevel)
4066  * or if it explicitly states that it is.
4067  */
4068 static bool
4069 trace_ok_for_array(struct tracer *t, struct trace_array *tr)
4070 {
4071 	return (tr->flags & TRACE_ARRAY_FL_GLOBAL) || t->allow_instances;
4072 }
4073 
4074 /* Find the next tracer that this trace array may use */
4075 static struct tracer *
4076 get_tracer_for_array(struct trace_array *tr, struct tracer *t)
4077 {
4078 	while (t && !trace_ok_for_array(t, tr))
4079 		t = t->next;
4080 
4081 	return t;
4082 }
4083 
4084 static void *
4085 t_next(struct seq_file *m, void *v, loff_t *pos)
4086 {
4087 	struct trace_array *tr = m->private;
4088 	struct tracer *t = v;
4089 
4090 	(*pos)++;
4091 
4092 	if (t)
4093 		t = get_tracer_for_array(tr, t->next);
4094 
4095 	return t;
4096 }
4097 
4098 static void *t_start(struct seq_file *m, loff_t *pos)
4099 {
4100 	struct trace_array *tr = m->private;
4101 	struct tracer *t;
4102 	loff_t l = 0;
4103 
4104 	mutex_lock(&trace_types_lock);
4105 
4106 	t = get_tracer_for_array(tr, trace_types);
4107 	for (; t && l < *pos; t = t_next(m, t, &l))
4108 			;
4109 
4110 	return t;
4111 }
4112 
4113 static void t_stop(struct seq_file *m, void *p)
4114 {
4115 	mutex_unlock(&trace_types_lock);
4116 }
4117 
4118 static int t_show(struct seq_file *m, void *v)
4119 {
4120 	struct tracer *t = v;
4121 
4122 	if (!t)
4123 		return 0;
4124 
4125 	seq_puts(m, t->name);
4126 	if (t->next)
4127 		seq_putc(m, ' ');
4128 	else
4129 		seq_putc(m, '\n');
4130 
4131 	return 0;
4132 }
4133 
4134 static const struct seq_operations show_traces_seq_ops = {
4135 	.start		= t_start,
4136 	.next		= t_next,
4137 	.stop		= t_stop,
4138 	.show		= t_show,
4139 };
4140 
4141 static int show_traces_open(struct inode *inode, struct file *file)
4142 {
4143 	struct trace_array *tr = inode->i_private;
4144 	struct seq_file *m;
4145 	int ret;
4146 
4147 	if (tracing_disabled)
4148 		return -ENODEV;
4149 
4150 	ret = seq_open(file, &show_traces_seq_ops);
4151 	if (ret)
4152 		return ret;
4153 
4154 	m = file->private_data;
4155 	m->private = tr;
4156 
4157 	return 0;
4158 }
4159 
4160 static ssize_t
4161 tracing_write_stub(struct file *filp, const char __user *ubuf,
4162 		   size_t count, loff_t *ppos)
4163 {
4164 	return count;
4165 }
4166 
4167 loff_t tracing_lseek(struct file *file, loff_t offset, int whence)
4168 {
4169 	int ret;
4170 
4171 	if (file->f_mode & FMODE_READ)
4172 		ret = seq_lseek(file, offset, whence);
4173 	else
4174 		file->f_pos = ret = 0;
4175 
4176 	return ret;
4177 }
4178 
4179 static const struct file_operations tracing_fops = {
4180 	.open		= tracing_open,
4181 	.read		= seq_read,
4182 	.write		= tracing_write_stub,
4183 	.llseek		= tracing_lseek,
4184 	.release	= tracing_release,
4185 };
4186 
4187 static const struct file_operations show_traces_fops = {
4188 	.open		= show_traces_open,
4189 	.read		= seq_read,
4190 	.release	= seq_release,
4191 	.llseek		= seq_lseek,
4192 };
4193 
4194 static ssize_t
4195 tracing_cpumask_read(struct file *filp, char __user *ubuf,
4196 		     size_t count, loff_t *ppos)
4197 {
4198 	struct trace_array *tr = file_inode(filp)->i_private;
4199 	char *mask_str;
4200 	int len;
4201 
4202 	len = snprintf(NULL, 0, "%*pb\n",
4203 		       cpumask_pr_args(tr->tracing_cpumask)) + 1;
4204 	mask_str = kmalloc(len, GFP_KERNEL);
4205 	if (!mask_str)
4206 		return -ENOMEM;
4207 
4208 	len = snprintf(mask_str, len, "%*pb\n",
4209 		       cpumask_pr_args(tr->tracing_cpumask));
4210 	if (len >= count) {
4211 		count = -EINVAL;
4212 		goto out_err;
4213 	}
4214 	count = simple_read_from_buffer(ubuf, count, ppos, mask_str, len);
4215 
4216 out_err:
4217 	kfree(mask_str);
4218 
4219 	return count;
4220 }
4221 
4222 static ssize_t
4223 tracing_cpumask_write(struct file *filp, const char __user *ubuf,
4224 		      size_t count, loff_t *ppos)
4225 {
4226 	struct trace_array *tr = file_inode(filp)->i_private;
4227 	cpumask_var_t tracing_cpumask_new;
4228 	int err, cpu;
4229 
4230 	if (!alloc_cpumask_var(&tracing_cpumask_new, GFP_KERNEL))
4231 		return -ENOMEM;
4232 
4233 	err = cpumask_parse_user(ubuf, count, tracing_cpumask_new);
4234 	if (err)
4235 		goto err_unlock;
4236 
4237 	local_irq_disable();
4238 	arch_spin_lock(&tr->max_lock);
4239 	for_each_tracing_cpu(cpu) {
4240 		/*
4241 		 * Increase/decrease the disabled counter if we are
4242 		 * about to flip a bit in the cpumask:
4243 		 */
4244 		if (cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
4245 				!cpumask_test_cpu(cpu, tracing_cpumask_new)) {
4246 			atomic_inc(&per_cpu_ptr(tr->trace_buffer.data, cpu)->disabled);
4247 			ring_buffer_record_disable_cpu(tr->trace_buffer.buffer, cpu);
4248 		}
4249 		if (!cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
4250 				cpumask_test_cpu(cpu, tracing_cpumask_new)) {
4251 			atomic_dec(&per_cpu_ptr(tr->trace_buffer.data, cpu)->disabled);
4252 			ring_buffer_record_enable_cpu(tr->trace_buffer.buffer, cpu);
4253 		}
4254 	}
4255 	arch_spin_unlock(&tr->max_lock);
4256 	local_irq_enable();
4257 
4258 	cpumask_copy(tr->tracing_cpumask, tracing_cpumask_new);
4259 	free_cpumask_var(tracing_cpumask_new);
4260 
4261 	return count;
4262 
4263 err_unlock:
4264 	free_cpumask_var(tracing_cpumask_new);
4265 
4266 	return err;
4267 }
4268 
4269 static const struct file_operations tracing_cpumask_fops = {
4270 	.open		= tracing_open_generic_tr,
4271 	.read		= tracing_cpumask_read,
4272 	.write		= tracing_cpumask_write,
4273 	.release	= tracing_release_generic_tr,
4274 	.llseek		= generic_file_llseek,
4275 };
4276 
4277 static int tracing_trace_options_show(struct seq_file *m, void *v)
4278 {
4279 	struct tracer_opt *trace_opts;
4280 	struct trace_array *tr = m->private;
4281 	u32 tracer_flags;
4282 	int i;
4283 
4284 	mutex_lock(&trace_types_lock);
4285 	tracer_flags = tr->current_trace->flags->val;
4286 	trace_opts = tr->current_trace->flags->opts;
4287 
4288 	for (i = 0; trace_options[i]; i++) {
4289 		if (tr->trace_flags & (1 << i))
4290 			seq_printf(m, "%s\n", trace_options[i]);
4291 		else
4292 			seq_printf(m, "no%s\n", trace_options[i]);
4293 	}
4294 
4295 	for (i = 0; trace_opts[i].name; i++) {
4296 		if (tracer_flags & trace_opts[i].bit)
4297 			seq_printf(m, "%s\n", trace_opts[i].name);
4298 		else
4299 			seq_printf(m, "no%s\n", trace_opts[i].name);
4300 	}
4301 	mutex_unlock(&trace_types_lock);
4302 
4303 	return 0;
4304 }
4305 
4306 static int __set_tracer_option(struct trace_array *tr,
4307 			       struct tracer_flags *tracer_flags,
4308 			       struct tracer_opt *opts, int neg)
4309 {
4310 	struct tracer *trace = tracer_flags->trace;
4311 	int ret;
4312 
4313 	ret = trace->set_flag(tr, tracer_flags->val, opts->bit, !neg);
4314 	if (ret)
4315 		return ret;
4316 
4317 	if (neg)
4318 		tracer_flags->val &= ~opts->bit;
4319 	else
4320 		tracer_flags->val |= opts->bit;
4321 	return 0;
4322 }
4323 
4324 /* Try to assign a tracer specific option */
4325 static int set_tracer_option(struct trace_array *tr, char *cmp, int neg)
4326 {
4327 	struct tracer *trace = tr->current_trace;
4328 	struct tracer_flags *tracer_flags = trace->flags;
4329 	struct tracer_opt *opts = NULL;
4330 	int i;
4331 
4332 	for (i = 0; tracer_flags->opts[i].name; i++) {
4333 		opts = &tracer_flags->opts[i];
4334 
4335 		if (strcmp(cmp, opts->name) == 0)
4336 			return __set_tracer_option(tr, trace->flags, opts, neg);
4337 	}
4338 
4339 	return -EINVAL;
4340 }
4341 
4342 /* Some tracers require overwrite to stay enabled */
4343 int trace_keep_overwrite(struct tracer *tracer, u32 mask, int set)
4344 {
4345 	if (tracer->enabled && (mask & TRACE_ITER_OVERWRITE) && !set)
4346 		return -1;
4347 
4348 	return 0;
4349 }
4350 
4351 int set_tracer_flag(struct trace_array *tr, unsigned int mask, int enabled)
4352 {
4353 	/* do nothing if flag is already set */
4354 	if (!!(tr->trace_flags & mask) == !!enabled)
4355 		return 0;
4356 
4357 	/* Give the tracer a chance to approve the change */
4358 	if (tr->current_trace->flag_changed)
4359 		if (tr->current_trace->flag_changed(tr, mask, !!enabled))
4360 			return -EINVAL;
4361 
4362 	if (enabled)
4363 		tr->trace_flags |= mask;
4364 	else
4365 		tr->trace_flags &= ~mask;
4366 
4367 	if (mask == TRACE_ITER_RECORD_CMD)
4368 		trace_event_enable_cmd_record(enabled);
4369 
4370 	if (mask == TRACE_ITER_RECORD_TGID) {
4371 		if (!tgid_map)
4372 			tgid_map = kcalloc(PID_MAX_DEFAULT + 1,
4373 					   sizeof(*tgid_map),
4374 					   GFP_KERNEL);
4375 		if (!tgid_map) {
4376 			tr->trace_flags &= ~TRACE_ITER_RECORD_TGID;
4377 			return -ENOMEM;
4378 		}
4379 
4380 		trace_event_enable_tgid_record(enabled);
4381 	}
4382 
4383 	if (mask == TRACE_ITER_EVENT_FORK)
4384 		trace_event_follow_fork(tr, enabled);
4385 
4386 	if (mask == TRACE_ITER_FUNC_FORK)
4387 		ftrace_pid_follow_fork(tr, enabled);
4388 
4389 	if (mask == TRACE_ITER_OVERWRITE) {
4390 		ring_buffer_change_overwrite(tr->trace_buffer.buffer, enabled);
4391 #ifdef CONFIG_TRACER_MAX_TRACE
4392 		ring_buffer_change_overwrite(tr->max_buffer.buffer, enabled);
4393 #endif
4394 	}
4395 
4396 	if (mask == TRACE_ITER_PRINTK) {
4397 		trace_printk_start_stop_comm(enabled);
4398 		trace_printk_control(enabled);
4399 	}
4400 
4401 	return 0;
4402 }
4403 
4404 static int trace_set_options(struct trace_array *tr, char *option)
4405 {
4406 	char *cmp;
4407 	int neg = 0;
4408 	int ret;
4409 	size_t orig_len = strlen(option);
4410 
4411 	cmp = strstrip(option);
4412 
4413 	if (strncmp(cmp, "no", 2) == 0) {
4414 		neg = 1;
4415 		cmp += 2;
4416 	}
4417 
4418 	mutex_lock(&trace_types_lock);
4419 
4420 	ret = match_string(trace_options, -1, cmp);
4421 	/* If no option could be set, test the specific tracer options */
4422 	if (ret < 0)
4423 		ret = set_tracer_option(tr, cmp, neg);
4424 	else
4425 		ret = set_tracer_flag(tr, 1 << ret, !neg);
4426 
4427 	mutex_unlock(&trace_types_lock);
4428 
4429 	/*
4430 	 * If the first trailing whitespace is replaced with '\0' by strstrip,
4431 	 * turn it back into a space.
4432 	 */
4433 	if (orig_len > strlen(option))
4434 		option[strlen(option)] = ' ';
4435 
4436 	return ret;
4437 }
4438 
4439 static void __init apply_trace_boot_options(void)
4440 {
4441 	char *buf = trace_boot_options_buf;
4442 	char *option;
4443 
4444 	while (true) {
4445 		option = strsep(&buf, ",");
4446 
4447 		if (!option)
4448 			break;
4449 
4450 		if (*option)
4451 			trace_set_options(&global_trace, option);
4452 
4453 		/* Put back the comma to allow this to be called again */
4454 		if (buf)
4455 			*(buf - 1) = ',';
4456 	}
4457 }
4458 
4459 static ssize_t
4460 tracing_trace_options_write(struct file *filp, const char __user *ubuf,
4461 			size_t cnt, loff_t *ppos)
4462 {
4463 	struct seq_file *m = filp->private_data;
4464 	struct trace_array *tr = m->private;
4465 	char buf[64];
4466 	int ret;
4467 
4468 	if (cnt >= sizeof(buf))
4469 		return -EINVAL;
4470 
4471 	if (copy_from_user(buf, ubuf, cnt))
4472 		return -EFAULT;
4473 
4474 	buf[cnt] = 0;
4475 
4476 	ret = trace_set_options(tr, buf);
4477 	if (ret < 0)
4478 		return ret;
4479 
4480 	*ppos += cnt;
4481 
4482 	return cnt;
4483 }
4484 
4485 static int tracing_trace_options_open(struct inode *inode, struct file *file)
4486 {
4487 	struct trace_array *tr = inode->i_private;
4488 	int ret;
4489 
4490 	if (tracing_disabled)
4491 		return -ENODEV;
4492 
4493 	if (trace_array_get(tr) < 0)
4494 		return -ENODEV;
4495 
4496 	ret = single_open(file, tracing_trace_options_show, inode->i_private);
4497 	if (ret < 0)
4498 		trace_array_put(tr);
4499 
4500 	return ret;
4501 }
4502 
4503 static const struct file_operations tracing_iter_fops = {
4504 	.open		= tracing_trace_options_open,
4505 	.read		= seq_read,
4506 	.llseek		= seq_lseek,
4507 	.release	= tracing_single_release_tr,
4508 	.write		= tracing_trace_options_write,
4509 };
4510 
4511 static const char readme_msg[] =
4512 	"tracing mini-HOWTO:\n\n"
4513 	"# echo 0 > tracing_on : quick way to disable tracing\n"
4514 	"# echo 1 > tracing_on : quick way to re-enable tracing\n\n"
4515 	" Important files:\n"
4516 	"  trace\t\t\t- The static contents of the buffer\n"
4517 	"\t\t\t  To clear the buffer write into this file: echo > trace\n"
4518 	"  trace_pipe\t\t- A consuming read to see the contents of the buffer\n"
4519 	"  current_tracer\t- function and latency tracers\n"
4520 	"  available_tracers\t- list of configured tracers for current_tracer\n"
4521 	"  buffer_size_kb\t- view and modify size of per cpu buffer\n"
4522 	"  buffer_total_size_kb  - view total size of all cpu buffers\n\n"
4523 	"  trace_clock\t\t-change the clock used to order events\n"
4524 	"       local:   Per cpu clock but may not be synced across CPUs\n"
4525 	"      global:   Synced across CPUs but slows tracing down.\n"
4526 	"     counter:   Not a clock, but just an increment\n"
4527 	"      uptime:   Jiffy counter from time of boot\n"
4528 	"        perf:   Same clock that perf events use\n"
4529 #ifdef CONFIG_X86_64
4530 	"     x86-tsc:   TSC cycle counter\n"
4531 #endif
4532 	"\n  timestamp_mode\t-view the mode used to timestamp events\n"
4533 	"       delta:   Delta difference against a buffer-wide timestamp\n"
4534 	"    absolute:   Absolute (standalone) timestamp\n"
4535 	"\n  trace_marker\t\t- Writes into this file writes into the kernel buffer\n"
4536 	"\n  trace_marker_raw\t\t- Writes into this file writes binary data into the kernel buffer\n"
4537 	"  tracing_cpumask\t- Limit which CPUs to trace\n"
4538 	"  instances\t\t- Make sub-buffers with: mkdir instances/foo\n"
4539 	"\t\t\t  Remove sub-buffer with rmdir\n"
4540 	"  trace_options\t\t- Set format or modify how tracing happens\n"
4541 	"\t\t\t  Disable an option by adding a suffix 'no' to the\n"
4542 	"\t\t\t  option name\n"
4543 	"  saved_cmdlines_size\t- echo command number in here to store comm-pid list\n"
4544 #ifdef CONFIG_DYNAMIC_FTRACE
4545 	"\n  available_filter_functions - list of functions that can be filtered on\n"
4546 	"  set_ftrace_filter\t- echo function name in here to only trace these\n"
4547 	"\t\t\t  functions\n"
4548 	"\t     accepts: func_full_name or glob-matching-pattern\n"
4549 	"\t     modules: Can select a group via module\n"
4550 	"\t      Format: :mod:<module-name>\n"
4551 	"\t     example: echo :mod:ext3 > set_ftrace_filter\n"
4552 	"\t    triggers: a command to perform when function is hit\n"
4553 	"\t      Format: <function>:<trigger>[:count]\n"
4554 	"\t     trigger: traceon, traceoff\n"
4555 	"\t\t      enable_event:<system>:<event>\n"
4556 	"\t\t      disable_event:<system>:<event>\n"
4557 #ifdef CONFIG_STACKTRACE
4558 	"\t\t      stacktrace\n"
4559 #endif
4560 #ifdef CONFIG_TRACER_SNAPSHOT
4561 	"\t\t      snapshot\n"
4562 #endif
4563 	"\t\t      dump\n"
4564 	"\t\t      cpudump\n"
4565 	"\t     example: echo do_fault:traceoff > set_ftrace_filter\n"
4566 	"\t              echo do_trap:traceoff:3 > set_ftrace_filter\n"
4567 	"\t     The first one will disable tracing every time do_fault is hit\n"
4568 	"\t     The second will disable tracing at most 3 times when do_trap is hit\n"
4569 	"\t       The first time do trap is hit and it disables tracing, the\n"
4570 	"\t       counter will decrement to 2. If tracing is already disabled,\n"
4571 	"\t       the counter will not decrement. It only decrements when the\n"
4572 	"\t       trigger did work\n"
4573 	"\t     To remove trigger without count:\n"
4574 	"\t       echo '!<function>:<trigger> > set_ftrace_filter\n"
4575 	"\t     To remove trigger with a count:\n"
4576 	"\t       echo '!<function>:<trigger>:0 > set_ftrace_filter\n"
4577 	"  set_ftrace_notrace\t- echo function name in here to never trace.\n"
4578 	"\t    accepts: func_full_name, *func_end, func_begin*, *func_middle*\n"
4579 	"\t    modules: Can select a group via module command :mod:\n"
4580 	"\t    Does not accept triggers\n"
4581 #endif /* CONFIG_DYNAMIC_FTRACE */
4582 #ifdef CONFIG_FUNCTION_TRACER
4583 	"  set_ftrace_pid\t- Write pid(s) to only function trace those pids\n"
4584 	"\t\t    (function)\n"
4585 #endif
4586 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
4587 	"  set_graph_function\t- Trace the nested calls of a function (function_graph)\n"
4588 	"  set_graph_notrace\t- Do not trace the nested calls of a function (function_graph)\n"
4589 	"  max_graph_depth\t- Trace a limited depth of nested calls (0 is unlimited)\n"
4590 #endif
4591 #ifdef CONFIG_TRACER_SNAPSHOT
4592 	"\n  snapshot\t\t- Like 'trace' but shows the content of the static\n"
4593 	"\t\t\t  snapshot buffer. Read the contents for more\n"
4594 	"\t\t\t  information\n"
4595 #endif
4596 #ifdef CONFIG_STACK_TRACER
4597 	"  stack_trace\t\t- Shows the max stack trace when active\n"
4598 	"  stack_max_size\t- Shows current max stack size that was traced\n"
4599 	"\t\t\t  Write into this file to reset the max size (trigger a\n"
4600 	"\t\t\t  new trace)\n"
4601 #ifdef CONFIG_DYNAMIC_FTRACE
4602 	"  stack_trace_filter\t- Like set_ftrace_filter but limits what stack_trace\n"
4603 	"\t\t\t  traces\n"
4604 #endif
4605 #endif /* CONFIG_STACK_TRACER */
4606 #ifdef CONFIG_KPROBE_EVENTS
4607 	"  kprobe_events\t\t- Add/remove/show the kernel dynamic events\n"
4608 	"\t\t\t  Write into this file to define/undefine new trace events.\n"
4609 #endif
4610 #ifdef CONFIG_UPROBE_EVENTS
4611 	"  uprobe_events\t\t- Add/remove/show the userspace dynamic events\n"
4612 	"\t\t\t  Write into this file to define/undefine new trace events.\n"
4613 #endif
4614 #if defined(CONFIG_KPROBE_EVENTS) || defined(CONFIG_UPROBE_EVENTS)
4615 	"\t  accepts: event-definitions (one definition per line)\n"
4616 	"\t   Format: p[:[<group>/]<event>] <place> [<args>]\n"
4617 	"\t           r[maxactive][:[<group>/]<event>] <place> [<args>]\n"
4618 	"\t           -:[<group>/]<event>\n"
4619 #ifdef CONFIG_KPROBE_EVENTS
4620 	"\t    place: [<module>:]<symbol>[+<offset>]|<memaddr>\n"
4621   "place (kretprobe): [<module>:]<symbol>[+<offset>]|<memaddr>\n"
4622 #endif
4623 #ifdef CONFIG_UPROBE_EVENTS
4624 	"\t    place: <path>:<offset>\n"
4625 #endif
4626 	"\t     args: <name>=fetcharg[:type]\n"
4627 	"\t fetcharg: %<register>, @<address>, @<symbol>[+|-<offset>],\n"
4628 	"\t           $stack<index>, $stack, $retval, $comm\n"
4629 	"\t     type: s8/16/32/64, u8/16/32/64, x8/16/32/64, string,\n"
4630 	"\t           b<bit-width>@<bit-offset>/<container-size>\n"
4631 #endif
4632 	"  events/\t\t- Directory containing all trace event subsystems:\n"
4633 	"      enable\t\t- Write 0/1 to enable/disable tracing of all events\n"
4634 	"  events/<system>/\t- Directory containing all trace events for <system>:\n"
4635 	"      enable\t\t- Write 0/1 to enable/disable tracing of all <system>\n"
4636 	"\t\t\t  events\n"
4637 	"      filter\t\t- If set, only events passing filter are traced\n"
4638 	"  events/<system>/<event>/\t- Directory containing control files for\n"
4639 	"\t\t\t  <event>:\n"
4640 	"      enable\t\t- Write 0/1 to enable/disable tracing of <event>\n"
4641 	"      filter\t\t- If set, only events passing filter are traced\n"
4642 	"      trigger\t\t- If set, a command to perform when event is hit\n"
4643 	"\t    Format: <trigger>[:count][if <filter>]\n"
4644 	"\t   trigger: traceon, traceoff\n"
4645 	"\t            enable_event:<system>:<event>\n"
4646 	"\t            disable_event:<system>:<event>\n"
4647 #ifdef CONFIG_HIST_TRIGGERS
4648 	"\t            enable_hist:<system>:<event>\n"
4649 	"\t            disable_hist:<system>:<event>\n"
4650 #endif
4651 #ifdef CONFIG_STACKTRACE
4652 	"\t\t    stacktrace\n"
4653 #endif
4654 #ifdef CONFIG_TRACER_SNAPSHOT
4655 	"\t\t    snapshot\n"
4656 #endif
4657 #ifdef CONFIG_HIST_TRIGGERS
4658 	"\t\t    hist (see below)\n"
4659 #endif
4660 	"\t   example: echo traceoff > events/block/block_unplug/trigger\n"
4661 	"\t            echo traceoff:3 > events/block/block_unplug/trigger\n"
4662 	"\t            echo 'enable_event:kmem:kmalloc:3 if nr_rq > 1' > \\\n"
4663 	"\t                  events/block/block_unplug/trigger\n"
4664 	"\t   The first disables tracing every time block_unplug is hit.\n"
4665 	"\t   The second disables tracing the first 3 times block_unplug is hit.\n"
4666 	"\t   The third enables the kmalloc event the first 3 times block_unplug\n"
4667 	"\t     is hit and has value of greater than 1 for the 'nr_rq' event field.\n"
4668 	"\t   Like function triggers, the counter is only decremented if it\n"
4669 	"\t    enabled or disabled tracing.\n"
4670 	"\t   To remove a trigger without a count:\n"
4671 	"\t     echo '!<trigger> > <system>/<event>/trigger\n"
4672 	"\t   To remove a trigger with a count:\n"
4673 	"\t     echo '!<trigger>:0 > <system>/<event>/trigger\n"
4674 	"\t   Filters can be ignored when removing a trigger.\n"
4675 #ifdef CONFIG_HIST_TRIGGERS
4676 	"      hist trigger\t- If set, event hits are aggregated into a hash table\n"
4677 	"\t    Format: hist:keys=<field1[,field2,...]>\n"
4678 	"\t            [:values=<field1[,field2,...]>]\n"
4679 	"\t            [:sort=<field1[,field2,...]>]\n"
4680 	"\t            [:size=#entries]\n"
4681 	"\t            [:pause][:continue][:clear]\n"
4682 	"\t            [:name=histname1]\n"
4683 	"\t            [if <filter>]\n\n"
4684 	"\t    When a matching event is hit, an entry is added to a hash\n"
4685 	"\t    table using the key(s) and value(s) named, and the value of a\n"
4686 	"\t    sum called 'hitcount' is incremented.  Keys and values\n"
4687 	"\t    correspond to fields in the event's format description.  Keys\n"
4688 	"\t    can be any field, or the special string 'stacktrace'.\n"
4689 	"\t    Compound keys consisting of up to two fields can be specified\n"
4690 	"\t    by the 'keys' keyword.  Values must correspond to numeric\n"
4691 	"\t    fields.  Sort keys consisting of up to two fields can be\n"
4692 	"\t    specified using the 'sort' keyword.  The sort direction can\n"
4693 	"\t    be modified by appending '.descending' or '.ascending' to a\n"
4694 	"\t    sort field.  The 'size' parameter can be used to specify more\n"
4695 	"\t    or fewer than the default 2048 entries for the hashtable size.\n"
4696 	"\t    If a hist trigger is given a name using the 'name' parameter,\n"
4697 	"\t    its histogram data will be shared with other triggers of the\n"
4698 	"\t    same name, and trigger hits will update this common data.\n\n"
4699 	"\t    Reading the 'hist' file for the event will dump the hash\n"
4700 	"\t    table in its entirety to stdout.  If there are multiple hist\n"
4701 	"\t    triggers attached to an event, there will be a table for each\n"
4702 	"\t    trigger in the output.  The table displayed for a named\n"
4703 	"\t    trigger will be the same as any other instance having the\n"
4704 	"\t    same name.  The default format used to display a given field\n"
4705 	"\t    can be modified by appending any of the following modifiers\n"
4706 	"\t    to the field name, as applicable:\n\n"
4707 	"\t            .hex        display a number as a hex value\n"
4708 	"\t            .sym        display an address as a symbol\n"
4709 	"\t            .sym-offset display an address as a symbol and offset\n"
4710 	"\t            .execname   display a common_pid as a program name\n"
4711 	"\t            .syscall    display a syscall id as a syscall name\n"
4712 	"\t            .log2       display log2 value rather than raw number\n"
4713 	"\t            .usecs      display a common_timestamp in microseconds\n\n"
4714 	"\t    The 'pause' parameter can be used to pause an existing hist\n"
4715 	"\t    trigger or to start a hist trigger but not log any events\n"
4716 	"\t    until told to do so.  'continue' can be used to start or\n"
4717 	"\t    restart a paused hist trigger.\n\n"
4718 	"\t    The 'clear' parameter will clear the contents of a running\n"
4719 	"\t    hist trigger and leave its current paused/active state\n"
4720 	"\t    unchanged.\n\n"
4721 	"\t    The enable_hist and disable_hist triggers can be used to\n"
4722 	"\t    have one event conditionally start and stop another event's\n"
4723 	"\t    already-attached hist trigger.  The syntax is analagous to\n"
4724 	"\t    the enable_event and disable_event triggers.\n"
4725 #endif
4726 ;
4727 
4728 static ssize_t
4729 tracing_readme_read(struct file *filp, char __user *ubuf,
4730 		       size_t cnt, loff_t *ppos)
4731 {
4732 	return simple_read_from_buffer(ubuf, cnt, ppos,
4733 					readme_msg, strlen(readme_msg));
4734 }
4735 
4736 static const struct file_operations tracing_readme_fops = {
4737 	.open		= tracing_open_generic,
4738 	.read		= tracing_readme_read,
4739 	.llseek		= generic_file_llseek,
4740 };
4741 
4742 static void *saved_tgids_next(struct seq_file *m, void *v, loff_t *pos)
4743 {
4744 	int *ptr = v;
4745 
4746 	if (*pos || m->count)
4747 		ptr++;
4748 
4749 	(*pos)++;
4750 
4751 	for (; ptr <= &tgid_map[PID_MAX_DEFAULT]; ptr++) {
4752 		if (trace_find_tgid(*ptr))
4753 			return ptr;
4754 	}
4755 
4756 	return NULL;
4757 }
4758 
4759 static void *saved_tgids_start(struct seq_file *m, loff_t *pos)
4760 {
4761 	void *v;
4762 	loff_t l = 0;
4763 
4764 	if (!tgid_map)
4765 		return NULL;
4766 
4767 	v = &tgid_map[0];
4768 	while (l <= *pos) {
4769 		v = saved_tgids_next(m, v, &l);
4770 		if (!v)
4771 			return NULL;
4772 	}
4773 
4774 	return v;
4775 }
4776 
4777 static void saved_tgids_stop(struct seq_file *m, void *v)
4778 {
4779 }
4780 
4781 static int saved_tgids_show(struct seq_file *m, void *v)
4782 {
4783 	int pid = (int *)v - tgid_map;
4784 
4785 	seq_printf(m, "%d %d\n", pid, trace_find_tgid(pid));
4786 	return 0;
4787 }
4788 
4789 static const struct seq_operations tracing_saved_tgids_seq_ops = {
4790 	.start		= saved_tgids_start,
4791 	.stop		= saved_tgids_stop,
4792 	.next		= saved_tgids_next,
4793 	.show		= saved_tgids_show,
4794 };
4795 
4796 static int tracing_saved_tgids_open(struct inode *inode, struct file *filp)
4797 {
4798 	if (tracing_disabled)
4799 		return -ENODEV;
4800 
4801 	return seq_open(filp, &tracing_saved_tgids_seq_ops);
4802 }
4803 
4804 
4805 static const struct file_operations tracing_saved_tgids_fops = {
4806 	.open		= tracing_saved_tgids_open,
4807 	.read		= seq_read,
4808 	.llseek		= seq_lseek,
4809 	.release	= seq_release,
4810 };
4811 
4812 static void *saved_cmdlines_next(struct seq_file *m, void *v, loff_t *pos)
4813 {
4814 	unsigned int *ptr = v;
4815 
4816 	if (*pos || m->count)
4817 		ptr++;
4818 
4819 	(*pos)++;
4820 
4821 	for (; ptr < &savedcmd->map_cmdline_to_pid[savedcmd->cmdline_num];
4822 	     ptr++) {
4823 		if (*ptr == -1 || *ptr == NO_CMDLINE_MAP)
4824 			continue;
4825 
4826 		return ptr;
4827 	}
4828 
4829 	return NULL;
4830 }
4831 
4832 static void *saved_cmdlines_start(struct seq_file *m, loff_t *pos)
4833 {
4834 	void *v;
4835 	loff_t l = 0;
4836 
4837 	preempt_disable();
4838 	arch_spin_lock(&trace_cmdline_lock);
4839 
4840 	v = &savedcmd->map_cmdline_to_pid[0];
4841 	while (l <= *pos) {
4842 		v = saved_cmdlines_next(m, v, &l);
4843 		if (!v)
4844 			return NULL;
4845 	}
4846 
4847 	return v;
4848 }
4849 
4850 static void saved_cmdlines_stop(struct seq_file *m, void *v)
4851 {
4852 	arch_spin_unlock(&trace_cmdline_lock);
4853 	preempt_enable();
4854 }
4855 
4856 static int saved_cmdlines_show(struct seq_file *m, void *v)
4857 {
4858 	char buf[TASK_COMM_LEN];
4859 	unsigned int *pid = v;
4860 
4861 	__trace_find_cmdline(*pid, buf);
4862 	seq_printf(m, "%d %s\n", *pid, buf);
4863 	return 0;
4864 }
4865 
4866 static const struct seq_operations tracing_saved_cmdlines_seq_ops = {
4867 	.start		= saved_cmdlines_start,
4868 	.next		= saved_cmdlines_next,
4869 	.stop		= saved_cmdlines_stop,
4870 	.show		= saved_cmdlines_show,
4871 };
4872 
4873 static int tracing_saved_cmdlines_open(struct inode *inode, struct file *filp)
4874 {
4875 	if (tracing_disabled)
4876 		return -ENODEV;
4877 
4878 	return seq_open(filp, &tracing_saved_cmdlines_seq_ops);
4879 }
4880 
4881 static const struct file_operations tracing_saved_cmdlines_fops = {
4882 	.open		= tracing_saved_cmdlines_open,
4883 	.read		= seq_read,
4884 	.llseek		= seq_lseek,
4885 	.release	= seq_release,
4886 };
4887 
4888 static ssize_t
4889 tracing_saved_cmdlines_size_read(struct file *filp, char __user *ubuf,
4890 				 size_t cnt, loff_t *ppos)
4891 {
4892 	char buf[64];
4893 	int r;
4894 
4895 	arch_spin_lock(&trace_cmdline_lock);
4896 	r = scnprintf(buf, sizeof(buf), "%u\n", savedcmd->cmdline_num);
4897 	arch_spin_unlock(&trace_cmdline_lock);
4898 
4899 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
4900 }
4901 
4902 static void free_saved_cmdlines_buffer(struct saved_cmdlines_buffer *s)
4903 {
4904 	kfree(s->saved_cmdlines);
4905 	kfree(s->map_cmdline_to_pid);
4906 	kfree(s);
4907 }
4908 
4909 static int tracing_resize_saved_cmdlines(unsigned int val)
4910 {
4911 	struct saved_cmdlines_buffer *s, *savedcmd_temp;
4912 
4913 	s = kmalloc(sizeof(*s), GFP_KERNEL);
4914 	if (!s)
4915 		return -ENOMEM;
4916 
4917 	if (allocate_cmdlines_buffer(val, s) < 0) {
4918 		kfree(s);
4919 		return -ENOMEM;
4920 	}
4921 
4922 	arch_spin_lock(&trace_cmdline_lock);
4923 	savedcmd_temp = savedcmd;
4924 	savedcmd = s;
4925 	arch_spin_unlock(&trace_cmdline_lock);
4926 	free_saved_cmdlines_buffer(savedcmd_temp);
4927 
4928 	return 0;
4929 }
4930 
4931 static ssize_t
4932 tracing_saved_cmdlines_size_write(struct file *filp, const char __user *ubuf,
4933 				  size_t cnt, loff_t *ppos)
4934 {
4935 	unsigned long val;
4936 	int ret;
4937 
4938 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
4939 	if (ret)
4940 		return ret;
4941 
4942 	/* must have at least 1 entry or less than PID_MAX_DEFAULT */
4943 	if (!val || val > PID_MAX_DEFAULT)
4944 		return -EINVAL;
4945 
4946 	ret = tracing_resize_saved_cmdlines((unsigned int)val);
4947 	if (ret < 0)
4948 		return ret;
4949 
4950 	*ppos += cnt;
4951 
4952 	return cnt;
4953 }
4954 
4955 static const struct file_operations tracing_saved_cmdlines_size_fops = {
4956 	.open		= tracing_open_generic,
4957 	.read		= tracing_saved_cmdlines_size_read,
4958 	.write		= tracing_saved_cmdlines_size_write,
4959 };
4960 
4961 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
4962 static union trace_eval_map_item *
4963 update_eval_map(union trace_eval_map_item *ptr)
4964 {
4965 	if (!ptr->map.eval_string) {
4966 		if (ptr->tail.next) {
4967 			ptr = ptr->tail.next;
4968 			/* Set ptr to the next real item (skip head) */
4969 			ptr++;
4970 		} else
4971 			return NULL;
4972 	}
4973 	return ptr;
4974 }
4975 
4976 static void *eval_map_next(struct seq_file *m, void *v, loff_t *pos)
4977 {
4978 	union trace_eval_map_item *ptr = v;
4979 
4980 	/*
4981 	 * Paranoid! If ptr points to end, we don't want to increment past it.
4982 	 * This really should never happen.
4983 	 */
4984 	ptr = update_eval_map(ptr);
4985 	if (WARN_ON_ONCE(!ptr))
4986 		return NULL;
4987 
4988 	ptr++;
4989 
4990 	(*pos)++;
4991 
4992 	ptr = update_eval_map(ptr);
4993 
4994 	return ptr;
4995 }
4996 
4997 static void *eval_map_start(struct seq_file *m, loff_t *pos)
4998 {
4999 	union trace_eval_map_item *v;
5000 	loff_t l = 0;
5001 
5002 	mutex_lock(&trace_eval_mutex);
5003 
5004 	v = trace_eval_maps;
5005 	if (v)
5006 		v++;
5007 
5008 	while (v && l < *pos) {
5009 		v = eval_map_next(m, v, &l);
5010 	}
5011 
5012 	return v;
5013 }
5014 
5015 static void eval_map_stop(struct seq_file *m, void *v)
5016 {
5017 	mutex_unlock(&trace_eval_mutex);
5018 }
5019 
5020 static int eval_map_show(struct seq_file *m, void *v)
5021 {
5022 	union trace_eval_map_item *ptr = v;
5023 
5024 	seq_printf(m, "%s %ld (%s)\n",
5025 		   ptr->map.eval_string, ptr->map.eval_value,
5026 		   ptr->map.system);
5027 
5028 	return 0;
5029 }
5030 
5031 static const struct seq_operations tracing_eval_map_seq_ops = {
5032 	.start		= eval_map_start,
5033 	.next		= eval_map_next,
5034 	.stop		= eval_map_stop,
5035 	.show		= eval_map_show,
5036 };
5037 
5038 static int tracing_eval_map_open(struct inode *inode, struct file *filp)
5039 {
5040 	if (tracing_disabled)
5041 		return -ENODEV;
5042 
5043 	return seq_open(filp, &tracing_eval_map_seq_ops);
5044 }
5045 
5046 static const struct file_operations tracing_eval_map_fops = {
5047 	.open		= tracing_eval_map_open,
5048 	.read		= seq_read,
5049 	.llseek		= seq_lseek,
5050 	.release	= seq_release,
5051 };
5052 
5053 static inline union trace_eval_map_item *
5054 trace_eval_jmp_to_tail(union trace_eval_map_item *ptr)
5055 {
5056 	/* Return tail of array given the head */
5057 	return ptr + ptr->head.length + 1;
5058 }
5059 
5060 static void
5061 trace_insert_eval_map_file(struct module *mod, struct trace_eval_map **start,
5062 			   int len)
5063 {
5064 	struct trace_eval_map **stop;
5065 	struct trace_eval_map **map;
5066 	union trace_eval_map_item *map_array;
5067 	union trace_eval_map_item *ptr;
5068 
5069 	stop = start + len;
5070 
5071 	/*
5072 	 * The trace_eval_maps contains the map plus a head and tail item,
5073 	 * where the head holds the module and length of array, and the
5074 	 * tail holds a pointer to the next list.
5075 	 */
5076 	map_array = kmalloc_array(len + 2, sizeof(*map_array), GFP_KERNEL);
5077 	if (!map_array) {
5078 		pr_warn("Unable to allocate trace eval mapping\n");
5079 		return;
5080 	}
5081 
5082 	mutex_lock(&trace_eval_mutex);
5083 
5084 	if (!trace_eval_maps)
5085 		trace_eval_maps = map_array;
5086 	else {
5087 		ptr = trace_eval_maps;
5088 		for (;;) {
5089 			ptr = trace_eval_jmp_to_tail(ptr);
5090 			if (!ptr->tail.next)
5091 				break;
5092 			ptr = ptr->tail.next;
5093 
5094 		}
5095 		ptr->tail.next = map_array;
5096 	}
5097 	map_array->head.mod = mod;
5098 	map_array->head.length = len;
5099 	map_array++;
5100 
5101 	for (map = start; (unsigned long)map < (unsigned long)stop; map++) {
5102 		map_array->map = **map;
5103 		map_array++;
5104 	}
5105 	memset(map_array, 0, sizeof(*map_array));
5106 
5107 	mutex_unlock(&trace_eval_mutex);
5108 }
5109 
5110 static void trace_create_eval_file(struct dentry *d_tracer)
5111 {
5112 	trace_create_file("eval_map", 0444, d_tracer,
5113 			  NULL, &tracing_eval_map_fops);
5114 }
5115 
5116 #else /* CONFIG_TRACE_EVAL_MAP_FILE */
5117 static inline void trace_create_eval_file(struct dentry *d_tracer) { }
5118 static inline void trace_insert_eval_map_file(struct module *mod,
5119 			      struct trace_eval_map **start, int len) { }
5120 #endif /* !CONFIG_TRACE_EVAL_MAP_FILE */
5121 
5122 static void trace_insert_eval_map(struct module *mod,
5123 				  struct trace_eval_map **start, int len)
5124 {
5125 	struct trace_eval_map **map;
5126 
5127 	if (len <= 0)
5128 		return;
5129 
5130 	map = start;
5131 
5132 	trace_event_eval_update(map, len);
5133 
5134 	trace_insert_eval_map_file(mod, start, len);
5135 }
5136 
5137 static ssize_t
5138 tracing_set_trace_read(struct file *filp, char __user *ubuf,
5139 		       size_t cnt, loff_t *ppos)
5140 {
5141 	struct trace_array *tr = filp->private_data;
5142 	char buf[MAX_TRACER_SIZE+2];
5143 	int r;
5144 
5145 	mutex_lock(&trace_types_lock);
5146 	r = sprintf(buf, "%s\n", tr->current_trace->name);
5147 	mutex_unlock(&trace_types_lock);
5148 
5149 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
5150 }
5151 
5152 int tracer_init(struct tracer *t, struct trace_array *tr)
5153 {
5154 	tracing_reset_online_cpus(&tr->trace_buffer);
5155 	return t->init(tr);
5156 }
5157 
5158 static void set_buffer_entries(struct trace_buffer *buf, unsigned long val)
5159 {
5160 	int cpu;
5161 
5162 	for_each_tracing_cpu(cpu)
5163 		per_cpu_ptr(buf->data, cpu)->entries = val;
5164 }
5165 
5166 #ifdef CONFIG_TRACER_MAX_TRACE
5167 /* resize @tr's buffer to the size of @size_tr's entries */
5168 static int resize_buffer_duplicate_size(struct trace_buffer *trace_buf,
5169 					struct trace_buffer *size_buf, int cpu_id)
5170 {
5171 	int cpu, ret = 0;
5172 
5173 	if (cpu_id == RING_BUFFER_ALL_CPUS) {
5174 		for_each_tracing_cpu(cpu) {
5175 			ret = ring_buffer_resize(trace_buf->buffer,
5176 				 per_cpu_ptr(size_buf->data, cpu)->entries, cpu);
5177 			if (ret < 0)
5178 				break;
5179 			per_cpu_ptr(trace_buf->data, cpu)->entries =
5180 				per_cpu_ptr(size_buf->data, cpu)->entries;
5181 		}
5182 	} else {
5183 		ret = ring_buffer_resize(trace_buf->buffer,
5184 				 per_cpu_ptr(size_buf->data, cpu_id)->entries, cpu_id);
5185 		if (ret == 0)
5186 			per_cpu_ptr(trace_buf->data, cpu_id)->entries =
5187 				per_cpu_ptr(size_buf->data, cpu_id)->entries;
5188 	}
5189 
5190 	return ret;
5191 }
5192 #endif /* CONFIG_TRACER_MAX_TRACE */
5193 
5194 static int __tracing_resize_ring_buffer(struct trace_array *tr,
5195 					unsigned long size, int cpu)
5196 {
5197 	int ret;
5198 
5199 	/*
5200 	 * If kernel or user changes the size of the ring buffer
5201 	 * we use the size that was given, and we can forget about
5202 	 * expanding it later.
5203 	 */
5204 	ring_buffer_expanded = true;
5205 
5206 	/* May be called before buffers are initialized */
5207 	if (!tr->trace_buffer.buffer)
5208 		return 0;
5209 
5210 	ret = ring_buffer_resize(tr->trace_buffer.buffer, size, cpu);
5211 	if (ret < 0)
5212 		return ret;
5213 
5214 #ifdef CONFIG_TRACER_MAX_TRACE
5215 	if (!(tr->flags & TRACE_ARRAY_FL_GLOBAL) ||
5216 	    !tr->current_trace->use_max_tr)
5217 		goto out;
5218 
5219 	ret = ring_buffer_resize(tr->max_buffer.buffer, size, cpu);
5220 	if (ret < 0) {
5221 		int r = resize_buffer_duplicate_size(&tr->trace_buffer,
5222 						     &tr->trace_buffer, cpu);
5223 		if (r < 0) {
5224 			/*
5225 			 * AARGH! We are left with different
5226 			 * size max buffer!!!!
5227 			 * The max buffer is our "snapshot" buffer.
5228 			 * When a tracer needs a snapshot (one of the
5229 			 * latency tracers), it swaps the max buffer
5230 			 * with the saved snap shot. We succeeded to
5231 			 * update the size of the main buffer, but failed to
5232 			 * update the size of the max buffer. But when we tried
5233 			 * to reset the main buffer to the original size, we
5234 			 * failed there too. This is very unlikely to
5235 			 * happen, but if it does, warn and kill all
5236 			 * tracing.
5237 			 */
5238 			WARN_ON(1);
5239 			tracing_disabled = 1;
5240 		}
5241 		return ret;
5242 	}
5243 
5244 	if (cpu == RING_BUFFER_ALL_CPUS)
5245 		set_buffer_entries(&tr->max_buffer, size);
5246 	else
5247 		per_cpu_ptr(tr->max_buffer.data, cpu)->entries = size;
5248 
5249  out:
5250 #endif /* CONFIG_TRACER_MAX_TRACE */
5251 
5252 	if (cpu == RING_BUFFER_ALL_CPUS)
5253 		set_buffer_entries(&tr->trace_buffer, size);
5254 	else
5255 		per_cpu_ptr(tr->trace_buffer.data, cpu)->entries = size;
5256 
5257 	return ret;
5258 }
5259 
5260 static ssize_t tracing_resize_ring_buffer(struct trace_array *tr,
5261 					  unsigned long size, int cpu_id)
5262 {
5263 	int ret = size;
5264 
5265 	mutex_lock(&trace_types_lock);
5266 
5267 	if (cpu_id != RING_BUFFER_ALL_CPUS) {
5268 		/* make sure, this cpu is enabled in the mask */
5269 		if (!cpumask_test_cpu(cpu_id, tracing_buffer_mask)) {
5270 			ret = -EINVAL;
5271 			goto out;
5272 		}
5273 	}
5274 
5275 	ret = __tracing_resize_ring_buffer(tr, size, cpu_id);
5276 	if (ret < 0)
5277 		ret = -ENOMEM;
5278 
5279 out:
5280 	mutex_unlock(&trace_types_lock);
5281 
5282 	return ret;
5283 }
5284 
5285 
5286 /**
5287  * tracing_update_buffers - used by tracing facility to expand ring buffers
5288  *
5289  * To save on memory when the tracing is never used on a system with it
5290  * configured in. The ring buffers are set to a minimum size. But once
5291  * a user starts to use the tracing facility, then they need to grow
5292  * to their default size.
5293  *
5294  * This function is to be called when a tracer is about to be used.
5295  */
5296 int tracing_update_buffers(void)
5297 {
5298 	int ret = 0;
5299 
5300 	mutex_lock(&trace_types_lock);
5301 	if (!ring_buffer_expanded)
5302 		ret = __tracing_resize_ring_buffer(&global_trace, trace_buf_size,
5303 						RING_BUFFER_ALL_CPUS);
5304 	mutex_unlock(&trace_types_lock);
5305 
5306 	return ret;
5307 }
5308 
5309 struct trace_option_dentry;
5310 
5311 static void
5312 create_trace_option_files(struct trace_array *tr, struct tracer *tracer);
5313 
5314 /*
5315  * Used to clear out the tracer before deletion of an instance.
5316  * Must have trace_types_lock held.
5317  */
5318 static void tracing_set_nop(struct trace_array *tr)
5319 {
5320 	if (tr->current_trace == &nop_trace)
5321 		return;
5322 
5323 	tr->current_trace->enabled--;
5324 
5325 	if (tr->current_trace->reset)
5326 		tr->current_trace->reset(tr);
5327 
5328 	tr->current_trace = &nop_trace;
5329 }
5330 
5331 static void add_tracer_options(struct trace_array *tr, struct tracer *t)
5332 {
5333 	/* Only enable if the directory has been created already. */
5334 	if (!tr->dir)
5335 		return;
5336 
5337 	create_trace_option_files(tr, t);
5338 }
5339 
5340 static int tracing_set_tracer(struct trace_array *tr, const char *buf)
5341 {
5342 	struct tracer *t;
5343 #ifdef CONFIG_TRACER_MAX_TRACE
5344 	bool had_max_tr;
5345 #endif
5346 	int ret = 0;
5347 
5348 	mutex_lock(&trace_types_lock);
5349 
5350 	if (!ring_buffer_expanded) {
5351 		ret = __tracing_resize_ring_buffer(tr, trace_buf_size,
5352 						RING_BUFFER_ALL_CPUS);
5353 		if (ret < 0)
5354 			goto out;
5355 		ret = 0;
5356 	}
5357 
5358 	for (t = trace_types; t; t = t->next) {
5359 		if (strcmp(t->name, buf) == 0)
5360 			break;
5361 	}
5362 	if (!t) {
5363 		ret = -EINVAL;
5364 		goto out;
5365 	}
5366 	if (t == tr->current_trace)
5367 		goto out;
5368 
5369 	/* Some tracers won't work on kernel command line */
5370 	if (system_state < SYSTEM_RUNNING && t->noboot) {
5371 		pr_warn("Tracer '%s' is not allowed on command line, ignored\n",
5372 			t->name);
5373 		goto out;
5374 	}
5375 
5376 	/* Some tracers are only allowed for the top level buffer */
5377 	if (!trace_ok_for_array(t, tr)) {
5378 		ret = -EINVAL;
5379 		goto out;
5380 	}
5381 
5382 	/* If trace pipe files are being read, we can't change the tracer */
5383 	if (tr->current_trace->ref) {
5384 		ret = -EBUSY;
5385 		goto out;
5386 	}
5387 
5388 	trace_branch_disable();
5389 
5390 	tr->current_trace->enabled--;
5391 
5392 	if (tr->current_trace->reset)
5393 		tr->current_trace->reset(tr);
5394 
5395 	/* Current trace needs to be nop_trace before synchronize_sched */
5396 	tr->current_trace = &nop_trace;
5397 
5398 #ifdef CONFIG_TRACER_MAX_TRACE
5399 	had_max_tr = tr->allocated_snapshot;
5400 
5401 	if (had_max_tr && !t->use_max_tr) {
5402 		/*
5403 		 * We need to make sure that the update_max_tr sees that
5404 		 * current_trace changed to nop_trace to keep it from
5405 		 * swapping the buffers after we resize it.
5406 		 * The update_max_tr is called from interrupts disabled
5407 		 * so a synchronized_sched() is sufficient.
5408 		 */
5409 		synchronize_sched();
5410 		free_snapshot(tr);
5411 	}
5412 #endif
5413 
5414 #ifdef CONFIG_TRACER_MAX_TRACE
5415 	if (t->use_max_tr && !had_max_tr) {
5416 		ret = tracing_alloc_snapshot_instance(tr);
5417 		if (ret < 0)
5418 			goto out;
5419 	}
5420 #endif
5421 
5422 	if (t->init) {
5423 		ret = tracer_init(t, tr);
5424 		if (ret)
5425 			goto out;
5426 	}
5427 
5428 	tr->current_trace = t;
5429 	tr->current_trace->enabled++;
5430 	trace_branch_enable(tr);
5431  out:
5432 	mutex_unlock(&trace_types_lock);
5433 
5434 	return ret;
5435 }
5436 
5437 static ssize_t
5438 tracing_set_trace_write(struct file *filp, const char __user *ubuf,
5439 			size_t cnt, loff_t *ppos)
5440 {
5441 	struct trace_array *tr = filp->private_data;
5442 	char buf[MAX_TRACER_SIZE+1];
5443 	int i;
5444 	size_t ret;
5445 	int err;
5446 
5447 	ret = cnt;
5448 
5449 	if (cnt > MAX_TRACER_SIZE)
5450 		cnt = MAX_TRACER_SIZE;
5451 
5452 	if (copy_from_user(buf, ubuf, cnt))
5453 		return -EFAULT;
5454 
5455 	buf[cnt] = 0;
5456 
5457 	/* strip ending whitespace. */
5458 	for (i = cnt - 1; i > 0 && isspace(buf[i]); i--)
5459 		buf[i] = 0;
5460 
5461 	err = tracing_set_tracer(tr, buf);
5462 	if (err)
5463 		return err;
5464 
5465 	*ppos += ret;
5466 
5467 	return ret;
5468 }
5469 
5470 static ssize_t
5471 tracing_nsecs_read(unsigned long *ptr, char __user *ubuf,
5472 		   size_t cnt, loff_t *ppos)
5473 {
5474 	char buf[64];
5475 	int r;
5476 
5477 	r = snprintf(buf, sizeof(buf), "%ld\n",
5478 		     *ptr == (unsigned long)-1 ? -1 : nsecs_to_usecs(*ptr));
5479 	if (r > sizeof(buf))
5480 		r = sizeof(buf);
5481 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
5482 }
5483 
5484 static ssize_t
5485 tracing_nsecs_write(unsigned long *ptr, const char __user *ubuf,
5486 		    size_t cnt, loff_t *ppos)
5487 {
5488 	unsigned long val;
5489 	int ret;
5490 
5491 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
5492 	if (ret)
5493 		return ret;
5494 
5495 	*ptr = val * 1000;
5496 
5497 	return cnt;
5498 }
5499 
5500 static ssize_t
5501 tracing_thresh_read(struct file *filp, char __user *ubuf,
5502 		    size_t cnt, loff_t *ppos)
5503 {
5504 	return tracing_nsecs_read(&tracing_thresh, ubuf, cnt, ppos);
5505 }
5506 
5507 static ssize_t
5508 tracing_thresh_write(struct file *filp, const char __user *ubuf,
5509 		     size_t cnt, loff_t *ppos)
5510 {
5511 	struct trace_array *tr = filp->private_data;
5512 	int ret;
5513 
5514 	mutex_lock(&trace_types_lock);
5515 	ret = tracing_nsecs_write(&tracing_thresh, ubuf, cnt, ppos);
5516 	if (ret < 0)
5517 		goto out;
5518 
5519 	if (tr->current_trace->update_thresh) {
5520 		ret = tr->current_trace->update_thresh(tr);
5521 		if (ret < 0)
5522 			goto out;
5523 	}
5524 
5525 	ret = cnt;
5526 out:
5527 	mutex_unlock(&trace_types_lock);
5528 
5529 	return ret;
5530 }
5531 
5532 #if defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)
5533 
5534 static ssize_t
5535 tracing_max_lat_read(struct file *filp, char __user *ubuf,
5536 		     size_t cnt, loff_t *ppos)
5537 {
5538 	return tracing_nsecs_read(filp->private_data, ubuf, cnt, ppos);
5539 }
5540 
5541 static ssize_t
5542 tracing_max_lat_write(struct file *filp, const char __user *ubuf,
5543 		      size_t cnt, loff_t *ppos)
5544 {
5545 	return tracing_nsecs_write(filp->private_data, ubuf, cnt, ppos);
5546 }
5547 
5548 #endif
5549 
5550 static int tracing_open_pipe(struct inode *inode, struct file *filp)
5551 {
5552 	struct trace_array *tr = inode->i_private;
5553 	struct trace_iterator *iter;
5554 	int ret = 0;
5555 
5556 	if (tracing_disabled)
5557 		return -ENODEV;
5558 
5559 	if (trace_array_get(tr) < 0)
5560 		return -ENODEV;
5561 
5562 	mutex_lock(&trace_types_lock);
5563 
5564 	/* create a buffer to store the information to pass to userspace */
5565 	iter = kzalloc(sizeof(*iter), GFP_KERNEL);
5566 	if (!iter) {
5567 		ret = -ENOMEM;
5568 		__trace_array_put(tr);
5569 		goto out;
5570 	}
5571 
5572 	trace_seq_init(&iter->seq);
5573 	iter->trace = tr->current_trace;
5574 
5575 	if (!alloc_cpumask_var(&iter->started, GFP_KERNEL)) {
5576 		ret = -ENOMEM;
5577 		goto fail;
5578 	}
5579 
5580 	/* trace pipe does not show start of buffer */
5581 	cpumask_setall(iter->started);
5582 
5583 	if (tr->trace_flags & TRACE_ITER_LATENCY_FMT)
5584 		iter->iter_flags |= TRACE_FILE_LAT_FMT;
5585 
5586 	/* Output in nanoseconds only if we are using a clock in nanoseconds. */
5587 	if (trace_clocks[tr->clock_id].in_ns)
5588 		iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
5589 
5590 	iter->tr = tr;
5591 	iter->trace_buffer = &tr->trace_buffer;
5592 	iter->cpu_file = tracing_get_cpu(inode);
5593 	mutex_init(&iter->mutex);
5594 	filp->private_data = iter;
5595 
5596 	if (iter->trace->pipe_open)
5597 		iter->trace->pipe_open(iter);
5598 
5599 	nonseekable_open(inode, filp);
5600 
5601 	tr->current_trace->ref++;
5602 out:
5603 	mutex_unlock(&trace_types_lock);
5604 	return ret;
5605 
5606 fail:
5607 	kfree(iter->trace);
5608 	kfree(iter);
5609 	__trace_array_put(tr);
5610 	mutex_unlock(&trace_types_lock);
5611 	return ret;
5612 }
5613 
5614 static int tracing_release_pipe(struct inode *inode, struct file *file)
5615 {
5616 	struct trace_iterator *iter = file->private_data;
5617 	struct trace_array *tr = inode->i_private;
5618 
5619 	mutex_lock(&trace_types_lock);
5620 
5621 	tr->current_trace->ref--;
5622 
5623 	if (iter->trace->pipe_close)
5624 		iter->trace->pipe_close(iter);
5625 
5626 	mutex_unlock(&trace_types_lock);
5627 
5628 	free_cpumask_var(iter->started);
5629 	mutex_destroy(&iter->mutex);
5630 	kfree(iter);
5631 
5632 	trace_array_put(tr);
5633 
5634 	return 0;
5635 }
5636 
5637 static __poll_t
5638 trace_poll(struct trace_iterator *iter, struct file *filp, poll_table *poll_table)
5639 {
5640 	struct trace_array *tr = iter->tr;
5641 
5642 	/* Iterators are static, they should be filled or empty */
5643 	if (trace_buffer_iter(iter, iter->cpu_file))
5644 		return EPOLLIN | EPOLLRDNORM;
5645 
5646 	if (tr->trace_flags & TRACE_ITER_BLOCK)
5647 		/*
5648 		 * Always select as readable when in blocking mode
5649 		 */
5650 		return EPOLLIN | EPOLLRDNORM;
5651 	else
5652 		return ring_buffer_poll_wait(iter->trace_buffer->buffer, iter->cpu_file,
5653 					     filp, poll_table);
5654 }
5655 
5656 static __poll_t
5657 tracing_poll_pipe(struct file *filp, poll_table *poll_table)
5658 {
5659 	struct trace_iterator *iter = filp->private_data;
5660 
5661 	return trace_poll(iter, filp, poll_table);
5662 }
5663 
5664 /* Must be called with iter->mutex held. */
5665 static int tracing_wait_pipe(struct file *filp)
5666 {
5667 	struct trace_iterator *iter = filp->private_data;
5668 	int ret;
5669 
5670 	while (trace_empty(iter)) {
5671 
5672 		if ((filp->f_flags & O_NONBLOCK)) {
5673 			return -EAGAIN;
5674 		}
5675 
5676 		/*
5677 		 * We block until we read something and tracing is disabled.
5678 		 * We still block if tracing is disabled, but we have never
5679 		 * read anything. This allows a user to cat this file, and
5680 		 * then enable tracing. But after we have read something,
5681 		 * we give an EOF when tracing is again disabled.
5682 		 *
5683 		 * iter->pos will be 0 if we haven't read anything.
5684 		 */
5685 		if (!tracer_tracing_is_on(iter->tr) && iter->pos)
5686 			break;
5687 
5688 		mutex_unlock(&iter->mutex);
5689 
5690 		ret = wait_on_pipe(iter, false);
5691 
5692 		mutex_lock(&iter->mutex);
5693 
5694 		if (ret)
5695 			return ret;
5696 	}
5697 
5698 	return 1;
5699 }
5700 
5701 /*
5702  * Consumer reader.
5703  */
5704 static ssize_t
5705 tracing_read_pipe(struct file *filp, char __user *ubuf,
5706 		  size_t cnt, loff_t *ppos)
5707 {
5708 	struct trace_iterator *iter = filp->private_data;
5709 	ssize_t sret;
5710 
5711 	/*
5712 	 * Avoid more than one consumer on a single file descriptor
5713 	 * This is just a matter of traces coherency, the ring buffer itself
5714 	 * is protected.
5715 	 */
5716 	mutex_lock(&iter->mutex);
5717 
5718 	/* return any leftover data */
5719 	sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
5720 	if (sret != -EBUSY)
5721 		goto out;
5722 
5723 	trace_seq_init(&iter->seq);
5724 
5725 	if (iter->trace->read) {
5726 		sret = iter->trace->read(iter, filp, ubuf, cnt, ppos);
5727 		if (sret)
5728 			goto out;
5729 	}
5730 
5731 waitagain:
5732 	sret = tracing_wait_pipe(filp);
5733 	if (sret <= 0)
5734 		goto out;
5735 
5736 	/* stop when tracing is finished */
5737 	if (trace_empty(iter)) {
5738 		sret = 0;
5739 		goto out;
5740 	}
5741 
5742 	if (cnt >= PAGE_SIZE)
5743 		cnt = PAGE_SIZE - 1;
5744 
5745 	/* reset all but tr, trace, and overruns */
5746 	memset(&iter->seq, 0,
5747 	       sizeof(struct trace_iterator) -
5748 	       offsetof(struct trace_iterator, seq));
5749 	cpumask_clear(iter->started);
5750 	iter->pos = -1;
5751 
5752 	trace_event_read_lock();
5753 	trace_access_lock(iter->cpu_file);
5754 	while (trace_find_next_entry_inc(iter) != NULL) {
5755 		enum print_line_t ret;
5756 		int save_len = iter->seq.seq.len;
5757 
5758 		ret = print_trace_line(iter);
5759 		if (ret == TRACE_TYPE_PARTIAL_LINE) {
5760 			/* don't print partial lines */
5761 			iter->seq.seq.len = save_len;
5762 			break;
5763 		}
5764 		if (ret != TRACE_TYPE_NO_CONSUME)
5765 			trace_consume(iter);
5766 
5767 		if (trace_seq_used(&iter->seq) >= cnt)
5768 			break;
5769 
5770 		/*
5771 		 * Setting the full flag means we reached the trace_seq buffer
5772 		 * size and we should leave by partial output condition above.
5773 		 * One of the trace_seq_* functions is not used properly.
5774 		 */
5775 		WARN_ONCE(iter->seq.full, "full flag set for trace type %d",
5776 			  iter->ent->type);
5777 	}
5778 	trace_access_unlock(iter->cpu_file);
5779 	trace_event_read_unlock();
5780 
5781 	/* Now copy what we have to the user */
5782 	sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
5783 	if (iter->seq.seq.readpos >= trace_seq_used(&iter->seq))
5784 		trace_seq_init(&iter->seq);
5785 
5786 	/*
5787 	 * If there was nothing to send to user, in spite of consuming trace
5788 	 * entries, go back to wait for more entries.
5789 	 */
5790 	if (sret == -EBUSY)
5791 		goto waitagain;
5792 
5793 out:
5794 	mutex_unlock(&iter->mutex);
5795 
5796 	return sret;
5797 }
5798 
5799 static void tracing_spd_release_pipe(struct splice_pipe_desc *spd,
5800 				     unsigned int idx)
5801 {
5802 	__free_page(spd->pages[idx]);
5803 }
5804 
5805 static const struct pipe_buf_operations tracing_pipe_buf_ops = {
5806 	.can_merge		= 0,
5807 	.confirm		= generic_pipe_buf_confirm,
5808 	.release		= generic_pipe_buf_release,
5809 	.steal			= generic_pipe_buf_steal,
5810 	.get			= generic_pipe_buf_get,
5811 };
5812 
5813 static size_t
5814 tracing_fill_pipe_page(size_t rem, struct trace_iterator *iter)
5815 {
5816 	size_t count;
5817 	int save_len;
5818 	int ret;
5819 
5820 	/* Seq buffer is page-sized, exactly what we need. */
5821 	for (;;) {
5822 		save_len = iter->seq.seq.len;
5823 		ret = print_trace_line(iter);
5824 
5825 		if (trace_seq_has_overflowed(&iter->seq)) {
5826 			iter->seq.seq.len = save_len;
5827 			break;
5828 		}
5829 
5830 		/*
5831 		 * This should not be hit, because it should only
5832 		 * be set if the iter->seq overflowed. But check it
5833 		 * anyway to be safe.
5834 		 */
5835 		if (ret == TRACE_TYPE_PARTIAL_LINE) {
5836 			iter->seq.seq.len = save_len;
5837 			break;
5838 		}
5839 
5840 		count = trace_seq_used(&iter->seq) - save_len;
5841 		if (rem < count) {
5842 			rem = 0;
5843 			iter->seq.seq.len = save_len;
5844 			break;
5845 		}
5846 
5847 		if (ret != TRACE_TYPE_NO_CONSUME)
5848 			trace_consume(iter);
5849 		rem -= count;
5850 		if (!trace_find_next_entry_inc(iter))	{
5851 			rem = 0;
5852 			iter->ent = NULL;
5853 			break;
5854 		}
5855 	}
5856 
5857 	return rem;
5858 }
5859 
5860 static ssize_t tracing_splice_read_pipe(struct file *filp,
5861 					loff_t *ppos,
5862 					struct pipe_inode_info *pipe,
5863 					size_t len,
5864 					unsigned int flags)
5865 {
5866 	struct page *pages_def[PIPE_DEF_BUFFERS];
5867 	struct partial_page partial_def[PIPE_DEF_BUFFERS];
5868 	struct trace_iterator *iter = filp->private_data;
5869 	struct splice_pipe_desc spd = {
5870 		.pages		= pages_def,
5871 		.partial	= partial_def,
5872 		.nr_pages	= 0, /* This gets updated below. */
5873 		.nr_pages_max	= PIPE_DEF_BUFFERS,
5874 		.ops		= &tracing_pipe_buf_ops,
5875 		.spd_release	= tracing_spd_release_pipe,
5876 	};
5877 	ssize_t ret;
5878 	size_t rem;
5879 	unsigned int i;
5880 
5881 	if (splice_grow_spd(pipe, &spd))
5882 		return -ENOMEM;
5883 
5884 	mutex_lock(&iter->mutex);
5885 
5886 	if (iter->trace->splice_read) {
5887 		ret = iter->trace->splice_read(iter, filp,
5888 					       ppos, pipe, len, flags);
5889 		if (ret)
5890 			goto out_err;
5891 	}
5892 
5893 	ret = tracing_wait_pipe(filp);
5894 	if (ret <= 0)
5895 		goto out_err;
5896 
5897 	if (!iter->ent && !trace_find_next_entry_inc(iter)) {
5898 		ret = -EFAULT;
5899 		goto out_err;
5900 	}
5901 
5902 	trace_event_read_lock();
5903 	trace_access_lock(iter->cpu_file);
5904 
5905 	/* Fill as many pages as possible. */
5906 	for (i = 0, rem = len; i < spd.nr_pages_max && rem; i++) {
5907 		spd.pages[i] = alloc_page(GFP_KERNEL);
5908 		if (!spd.pages[i])
5909 			break;
5910 
5911 		rem = tracing_fill_pipe_page(rem, iter);
5912 
5913 		/* Copy the data into the page, so we can start over. */
5914 		ret = trace_seq_to_buffer(&iter->seq,
5915 					  page_address(spd.pages[i]),
5916 					  trace_seq_used(&iter->seq));
5917 		if (ret < 0) {
5918 			__free_page(spd.pages[i]);
5919 			break;
5920 		}
5921 		spd.partial[i].offset = 0;
5922 		spd.partial[i].len = trace_seq_used(&iter->seq);
5923 
5924 		trace_seq_init(&iter->seq);
5925 	}
5926 
5927 	trace_access_unlock(iter->cpu_file);
5928 	trace_event_read_unlock();
5929 	mutex_unlock(&iter->mutex);
5930 
5931 	spd.nr_pages = i;
5932 
5933 	if (i)
5934 		ret = splice_to_pipe(pipe, &spd);
5935 	else
5936 		ret = 0;
5937 out:
5938 	splice_shrink_spd(&spd);
5939 	return ret;
5940 
5941 out_err:
5942 	mutex_unlock(&iter->mutex);
5943 	goto out;
5944 }
5945 
5946 static ssize_t
5947 tracing_entries_read(struct file *filp, char __user *ubuf,
5948 		     size_t cnt, loff_t *ppos)
5949 {
5950 	struct inode *inode = file_inode(filp);
5951 	struct trace_array *tr = inode->i_private;
5952 	int cpu = tracing_get_cpu(inode);
5953 	char buf[64];
5954 	int r = 0;
5955 	ssize_t ret;
5956 
5957 	mutex_lock(&trace_types_lock);
5958 
5959 	if (cpu == RING_BUFFER_ALL_CPUS) {
5960 		int cpu, buf_size_same;
5961 		unsigned long size;
5962 
5963 		size = 0;
5964 		buf_size_same = 1;
5965 		/* check if all cpu sizes are same */
5966 		for_each_tracing_cpu(cpu) {
5967 			/* fill in the size from first enabled cpu */
5968 			if (size == 0)
5969 				size = per_cpu_ptr(tr->trace_buffer.data, cpu)->entries;
5970 			if (size != per_cpu_ptr(tr->trace_buffer.data, cpu)->entries) {
5971 				buf_size_same = 0;
5972 				break;
5973 			}
5974 		}
5975 
5976 		if (buf_size_same) {
5977 			if (!ring_buffer_expanded)
5978 				r = sprintf(buf, "%lu (expanded: %lu)\n",
5979 					    size >> 10,
5980 					    trace_buf_size >> 10);
5981 			else
5982 				r = sprintf(buf, "%lu\n", size >> 10);
5983 		} else
5984 			r = sprintf(buf, "X\n");
5985 	} else
5986 		r = sprintf(buf, "%lu\n", per_cpu_ptr(tr->trace_buffer.data, cpu)->entries >> 10);
5987 
5988 	mutex_unlock(&trace_types_lock);
5989 
5990 	ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
5991 	return ret;
5992 }
5993 
5994 static ssize_t
5995 tracing_entries_write(struct file *filp, const char __user *ubuf,
5996 		      size_t cnt, loff_t *ppos)
5997 {
5998 	struct inode *inode = file_inode(filp);
5999 	struct trace_array *tr = inode->i_private;
6000 	unsigned long val;
6001 	int ret;
6002 
6003 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
6004 	if (ret)
6005 		return ret;
6006 
6007 	/* must have at least 1 entry */
6008 	if (!val)
6009 		return -EINVAL;
6010 
6011 	/* value is in KB */
6012 	val <<= 10;
6013 	ret = tracing_resize_ring_buffer(tr, val, tracing_get_cpu(inode));
6014 	if (ret < 0)
6015 		return ret;
6016 
6017 	*ppos += cnt;
6018 
6019 	return cnt;
6020 }
6021 
6022 static ssize_t
6023 tracing_total_entries_read(struct file *filp, char __user *ubuf,
6024 				size_t cnt, loff_t *ppos)
6025 {
6026 	struct trace_array *tr = filp->private_data;
6027 	char buf[64];
6028 	int r, cpu;
6029 	unsigned long size = 0, expanded_size = 0;
6030 
6031 	mutex_lock(&trace_types_lock);
6032 	for_each_tracing_cpu(cpu) {
6033 		size += per_cpu_ptr(tr->trace_buffer.data, cpu)->entries >> 10;
6034 		if (!ring_buffer_expanded)
6035 			expanded_size += trace_buf_size >> 10;
6036 	}
6037 	if (ring_buffer_expanded)
6038 		r = sprintf(buf, "%lu\n", size);
6039 	else
6040 		r = sprintf(buf, "%lu (expanded: %lu)\n", size, expanded_size);
6041 	mutex_unlock(&trace_types_lock);
6042 
6043 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6044 }
6045 
6046 static ssize_t
6047 tracing_free_buffer_write(struct file *filp, const char __user *ubuf,
6048 			  size_t cnt, loff_t *ppos)
6049 {
6050 	/*
6051 	 * There is no need to read what the user has written, this function
6052 	 * is just to make sure that there is no error when "echo" is used
6053 	 */
6054 
6055 	*ppos += cnt;
6056 
6057 	return cnt;
6058 }
6059 
6060 static int
6061 tracing_free_buffer_release(struct inode *inode, struct file *filp)
6062 {
6063 	struct trace_array *tr = inode->i_private;
6064 
6065 	/* disable tracing ? */
6066 	if (tr->trace_flags & TRACE_ITER_STOP_ON_FREE)
6067 		tracer_tracing_off(tr);
6068 	/* resize the ring buffer to 0 */
6069 	tracing_resize_ring_buffer(tr, 0, RING_BUFFER_ALL_CPUS);
6070 
6071 	trace_array_put(tr);
6072 
6073 	return 0;
6074 }
6075 
6076 static ssize_t
6077 tracing_mark_write(struct file *filp, const char __user *ubuf,
6078 					size_t cnt, loff_t *fpos)
6079 {
6080 	struct trace_array *tr = filp->private_data;
6081 	struct ring_buffer_event *event;
6082 	enum event_trigger_type tt = ETT_NONE;
6083 	struct ring_buffer *buffer;
6084 	struct print_entry *entry;
6085 	unsigned long irq_flags;
6086 	const char faulted[] = "<faulted>";
6087 	ssize_t written;
6088 	int size;
6089 	int len;
6090 
6091 /* Used in tracing_mark_raw_write() as well */
6092 #define FAULTED_SIZE (sizeof(faulted) - 1) /* '\0' is already accounted for */
6093 
6094 	if (tracing_disabled)
6095 		return -EINVAL;
6096 
6097 	if (!(tr->trace_flags & TRACE_ITER_MARKERS))
6098 		return -EINVAL;
6099 
6100 	if (cnt > TRACE_BUF_SIZE)
6101 		cnt = TRACE_BUF_SIZE;
6102 
6103 	BUILD_BUG_ON(TRACE_BUF_SIZE >= PAGE_SIZE);
6104 
6105 	local_save_flags(irq_flags);
6106 	size = sizeof(*entry) + cnt + 2; /* add '\0' and possible '\n' */
6107 
6108 	/* If less than "<faulted>", then make sure we can still add that */
6109 	if (cnt < FAULTED_SIZE)
6110 		size += FAULTED_SIZE - cnt;
6111 
6112 	buffer = tr->trace_buffer.buffer;
6113 	event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
6114 					    irq_flags, preempt_count());
6115 	if (unlikely(!event))
6116 		/* Ring buffer disabled, return as if not open for write */
6117 		return -EBADF;
6118 
6119 	entry = ring_buffer_event_data(event);
6120 	entry->ip = _THIS_IP_;
6121 
6122 	len = __copy_from_user_inatomic(&entry->buf, ubuf, cnt);
6123 	if (len) {
6124 		memcpy(&entry->buf, faulted, FAULTED_SIZE);
6125 		cnt = FAULTED_SIZE;
6126 		written = -EFAULT;
6127 	} else
6128 		written = cnt;
6129 	len = cnt;
6130 
6131 	if (tr->trace_marker_file && !list_empty(&tr->trace_marker_file->triggers)) {
6132 		/* do not add \n before testing triggers, but add \0 */
6133 		entry->buf[cnt] = '\0';
6134 		tt = event_triggers_call(tr->trace_marker_file, entry, event);
6135 	}
6136 
6137 	if (entry->buf[cnt - 1] != '\n') {
6138 		entry->buf[cnt] = '\n';
6139 		entry->buf[cnt + 1] = '\0';
6140 	} else
6141 		entry->buf[cnt] = '\0';
6142 
6143 	__buffer_unlock_commit(buffer, event);
6144 
6145 	if (tt)
6146 		event_triggers_post_call(tr->trace_marker_file, tt);
6147 
6148 	if (written > 0)
6149 		*fpos += written;
6150 
6151 	return written;
6152 }
6153 
6154 /* Limit it for now to 3K (including tag) */
6155 #define RAW_DATA_MAX_SIZE (1024*3)
6156 
6157 static ssize_t
6158 tracing_mark_raw_write(struct file *filp, const char __user *ubuf,
6159 					size_t cnt, loff_t *fpos)
6160 {
6161 	struct trace_array *tr = filp->private_data;
6162 	struct ring_buffer_event *event;
6163 	struct ring_buffer *buffer;
6164 	struct raw_data_entry *entry;
6165 	const char faulted[] = "<faulted>";
6166 	unsigned long irq_flags;
6167 	ssize_t written;
6168 	int size;
6169 	int len;
6170 
6171 #define FAULT_SIZE_ID (FAULTED_SIZE + sizeof(int))
6172 
6173 	if (tracing_disabled)
6174 		return -EINVAL;
6175 
6176 	if (!(tr->trace_flags & TRACE_ITER_MARKERS))
6177 		return -EINVAL;
6178 
6179 	/* The marker must at least have a tag id */
6180 	if (cnt < sizeof(unsigned int) || cnt > RAW_DATA_MAX_SIZE)
6181 		return -EINVAL;
6182 
6183 	if (cnt > TRACE_BUF_SIZE)
6184 		cnt = TRACE_BUF_SIZE;
6185 
6186 	BUILD_BUG_ON(TRACE_BUF_SIZE >= PAGE_SIZE);
6187 
6188 	local_save_flags(irq_flags);
6189 	size = sizeof(*entry) + cnt;
6190 	if (cnt < FAULT_SIZE_ID)
6191 		size += FAULT_SIZE_ID - cnt;
6192 
6193 	buffer = tr->trace_buffer.buffer;
6194 	event = __trace_buffer_lock_reserve(buffer, TRACE_RAW_DATA, size,
6195 					    irq_flags, preempt_count());
6196 	if (!event)
6197 		/* Ring buffer disabled, return as if not open for write */
6198 		return -EBADF;
6199 
6200 	entry = ring_buffer_event_data(event);
6201 
6202 	len = __copy_from_user_inatomic(&entry->id, ubuf, cnt);
6203 	if (len) {
6204 		entry->id = -1;
6205 		memcpy(&entry->buf, faulted, FAULTED_SIZE);
6206 		written = -EFAULT;
6207 	} else
6208 		written = cnt;
6209 
6210 	__buffer_unlock_commit(buffer, event);
6211 
6212 	if (written > 0)
6213 		*fpos += written;
6214 
6215 	return written;
6216 }
6217 
6218 static int tracing_clock_show(struct seq_file *m, void *v)
6219 {
6220 	struct trace_array *tr = m->private;
6221 	int i;
6222 
6223 	for (i = 0; i < ARRAY_SIZE(trace_clocks); i++)
6224 		seq_printf(m,
6225 			"%s%s%s%s", i ? " " : "",
6226 			i == tr->clock_id ? "[" : "", trace_clocks[i].name,
6227 			i == tr->clock_id ? "]" : "");
6228 	seq_putc(m, '\n');
6229 
6230 	return 0;
6231 }
6232 
6233 int tracing_set_clock(struct trace_array *tr, const char *clockstr)
6234 {
6235 	int i;
6236 
6237 	for (i = 0; i < ARRAY_SIZE(trace_clocks); i++) {
6238 		if (strcmp(trace_clocks[i].name, clockstr) == 0)
6239 			break;
6240 	}
6241 	if (i == ARRAY_SIZE(trace_clocks))
6242 		return -EINVAL;
6243 
6244 	mutex_lock(&trace_types_lock);
6245 
6246 	tr->clock_id = i;
6247 
6248 	ring_buffer_set_clock(tr->trace_buffer.buffer, trace_clocks[i].func);
6249 
6250 	/*
6251 	 * New clock may not be consistent with the previous clock.
6252 	 * Reset the buffer so that it doesn't have incomparable timestamps.
6253 	 */
6254 	tracing_reset_online_cpus(&tr->trace_buffer);
6255 
6256 #ifdef CONFIG_TRACER_MAX_TRACE
6257 	if (tr->max_buffer.buffer)
6258 		ring_buffer_set_clock(tr->max_buffer.buffer, trace_clocks[i].func);
6259 	tracing_reset_online_cpus(&tr->max_buffer);
6260 #endif
6261 
6262 	mutex_unlock(&trace_types_lock);
6263 
6264 	return 0;
6265 }
6266 
6267 static ssize_t tracing_clock_write(struct file *filp, const char __user *ubuf,
6268 				   size_t cnt, loff_t *fpos)
6269 {
6270 	struct seq_file *m = filp->private_data;
6271 	struct trace_array *tr = m->private;
6272 	char buf[64];
6273 	const char *clockstr;
6274 	int ret;
6275 
6276 	if (cnt >= sizeof(buf))
6277 		return -EINVAL;
6278 
6279 	if (copy_from_user(buf, ubuf, cnt))
6280 		return -EFAULT;
6281 
6282 	buf[cnt] = 0;
6283 
6284 	clockstr = strstrip(buf);
6285 
6286 	ret = tracing_set_clock(tr, clockstr);
6287 	if (ret)
6288 		return ret;
6289 
6290 	*fpos += cnt;
6291 
6292 	return cnt;
6293 }
6294 
6295 static int tracing_clock_open(struct inode *inode, struct file *file)
6296 {
6297 	struct trace_array *tr = inode->i_private;
6298 	int ret;
6299 
6300 	if (tracing_disabled)
6301 		return -ENODEV;
6302 
6303 	if (trace_array_get(tr))
6304 		return -ENODEV;
6305 
6306 	ret = single_open(file, tracing_clock_show, inode->i_private);
6307 	if (ret < 0)
6308 		trace_array_put(tr);
6309 
6310 	return ret;
6311 }
6312 
6313 static int tracing_time_stamp_mode_show(struct seq_file *m, void *v)
6314 {
6315 	struct trace_array *tr = m->private;
6316 
6317 	mutex_lock(&trace_types_lock);
6318 
6319 	if (ring_buffer_time_stamp_abs(tr->trace_buffer.buffer))
6320 		seq_puts(m, "delta [absolute]\n");
6321 	else
6322 		seq_puts(m, "[delta] absolute\n");
6323 
6324 	mutex_unlock(&trace_types_lock);
6325 
6326 	return 0;
6327 }
6328 
6329 static int tracing_time_stamp_mode_open(struct inode *inode, struct file *file)
6330 {
6331 	struct trace_array *tr = inode->i_private;
6332 	int ret;
6333 
6334 	if (tracing_disabled)
6335 		return -ENODEV;
6336 
6337 	if (trace_array_get(tr))
6338 		return -ENODEV;
6339 
6340 	ret = single_open(file, tracing_time_stamp_mode_show, inode->i_private);
6341 	if (ret < 0)
6342 		trace_array_put(tr);
6343 
6344 	return ret;
6345 }
6346 
6347 int tracing_set_time_stamp_abs(struct trace_array *tr, bool abs)
6348 {
6349 	int ret = 0;
6350 
6351 	mutex_lock(&trace_types_lock);
6352 
6353 	if (abs && tr->time_stamp_abs_ref++)
6354 		goto out;
6355 
6356 	if (!abs) {
6357 		if (WARN_ON_ONCE(!tr->time_stamp_abs_ref)) {
6358 			ret = -EINVAL;
6359 			goto out;
6360 		}
6361 
6362 		if (--tr->time_stamp_abs_ref)
6363 			goto out;
6364 	}
6365 
6366 	ring_buffer_set_time_stamp_abs(tr->trace_buffer.buffer, abs);
6367 
6368 #ifdef CONFIG_TRACER_MAX_TRACE
6369 	if (tr->max_buffer.buffer)
6370 		ring_buffer_set_time_stamp_abs(tr->max_buffer.buffer, abs);
6371 #endif
6372  out:
6373 	mutex_unlock(&trace_types_lock);
6374 
6375 	return ret;
6376 }
6377 
6378 struct ftrace_buffer_info {
6379 	struct trace_iterator	iter;
6380 	void			*spare;
6381 	unsigned int		spare_cpu;
6382 	unsigned int		read;
6383 };
6384 
6385 #ifdef CONFIG_TRACER_SNAPSHOT
6386 static int tracing_snapshot_open(struct inode *inode, struct file *file)
6387 {
6388 	struct trace_array *tr = inode->i_private;
6389 	struct trace_iterator *iter;
6390 	struct seq_file *m;
6391 	int ret = 0;
6392 
6393 	if (trace_array_get(tr) < 0)
6394 		return -ENODEV;
6395 
6396 	if (file->f_mode & FMODE_READ) {
6397 		iter = __tracing_open(inode, file, true);
6398 		if (IS_ERR(iter))
6399 			ret = PTR_ERR(iter);
6400 	} else {
6401 		/* Writes still need the seq_file to hold the private data */
6402 		ret = -ENOMEM;
6403 		m = kzalloc(sizeof(*m), GFP_KERNEL);
6404 		if (!m)
6405 			goto out;
6406 		iter = kzalloc(sizeof(*iter), GFP_KERNEL);
6407 		if (!iter) {
6408 			kfree(m);
6409 			goto out;
6410 		}
6411 		ret = 0;
6412 
6413 		iter->tr = tr;
6414 		iter->trace_buffer = &tr->max_buffer;
6415 		iter->cpu_file = tracing_get_cpu(inode);
6416 		m->private = iter;
6417 		file->private_data = m;
6418 	}
6419 out:
6420 	if (ret < 0)
6421 		trace_array_put(tr);
6422 
6423 	return ret;
6424 }
6425 
6426 static ssize_t
6427 tracing_snapshot_write(struct file *filp, const char __user *ubuf, size_t cnt,
6428 		       loff_t *ppos)
6429 {
6430 	struct seq_file *m = filp->private_data;
6431 	struct trace_iterator *iter = m->private;
6432 	struct trace_array *tr = iter->tr;
6433 	unsigned long val;
6434 	int ret;
6435 
6436 	ret = tracing_update_buffers();
6437 	if (ret < 0)
6438 		return ret;
6439 
6440 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
6441 	if (ret)
6442 		return ret;
6443 
6444 	mutex_lock(&trace_types_lock);
6445 
6446 	if (tr->current_trace->use_max_tr) {
6447 		ret = -EBUSY;
6448 		goto out;
6449 	}
6450 
6451 	switch (val) {
6452 	case 0:
6453 		if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
6454 			ret = -EINVAL;
6455 			break;
6456 		}
6457 		if (tr->allocated_snapshot)
6458 			free_snapshot(tr);
6459 		break;
6460 	case 1:
6461 /* Only allow per-cpu swap if the ring buffer supports it */
6462 #ifndef CONFIG_RING_BUFFER_ALLOW_SWAP
6463 		if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
6464 			ret = -EINVAL;
6465 			break;
6466 		}
6467 #endif
6468 		if (!tr->allocated_snapshot) {
6469 			ret = tracing_alloc_snapshot_instance(tr);
6470 			if (ret < 0)
6471 				break;
6472 		}
6473 		local_irq_disable();
6474 		/* Now, we're going to swap */
6475 		if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
6476 			update_max_tr(tr, current, smp_processor_id());
6477 		else
6478 			update_max_tr_single(tr, current, iter->cpu_file);
6479 		local_irq_enable();
6480 		break;
6481 	default:
6482 		if (tr->allocated_snapshot) {
6483 			if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
6484 				tracing_reset_online_cpus(&tr->max_buffer);
6485 			else
6486 				tracing_reset(&tr->max_buffer, iter->cpu_file);
6487 		}
6488 		break;
6489 	}
6490 
6491 	if (ret >= 0) {
6492 		*ppos += cnt;
6493 		ret = cnt;
6494 	}
6495 out:
6496 	mutex_unlock(&trace_types_lock);
6497 	return ret;
6498 }
6499 
6500 static int tracing_snapshot_release(struct inode *inode, struct file *file)
6501 {
6502 	struct seq_file *m = file->private_data;
6503 	int ret;
6504 
6505 	ret = tracing_release(inode, file);
6506 
6507 	if (file->f_mode & FMODE_READ)
6508 		return ret;
6509 
6510 	/* If write only, the seq_file is just a stub */
6511 	if (m)
6512 		kfree(m->private);
6513 	kfree(m);
6514 
6515 	return 0;
6516 }
6517 
6518 static int tracing_buffers_open(struct inode *inode, struct file *filp);
6519 static ssize_t tracing_buffers_read(struct file *filp, char __user *ubuf,
6520 				    size_t count, loff_t *ppos);
6521 static int tracing_buffers_release(struct inode *inode, struct file *file);
6522 static ssize_t tracing_buffers_splice_read(struct file *file, loff_t *ppos,
6523 		   struct pipe_inode_info *pipe, size_t len, unsigned int flags);
6524 
6525 static int snapshot_raw_open(struct inode *inode, struct file *filp)
6526 {
6527 	struct ftrace_buffer_info *info;
6528 	int ret;
6529 
6530 	ret = tracing_buffers_open(inode, filp);
6531 	if (ret < 0)
6532 		return ret;
6533 
6534 	info = filp->private_data;
6535 
6536 	if (info->iter.trace->use_max_tr) {
6537 		tracing_buffers_release(inode, filp);
6538 		return -EBUSY;
6539 	}
6540 
6541 	info->iter.snapshot = true;
6542 	info->iter.trace_buffer = &info->iter.tr->max_buffer;
6543 
6544 	return ret;
6545 }
6546 
6547 #endif /* CONFIG_TRACER_SNAPSHOT */
6548 
6549 
6550 static const struct file_operations tracing_thresh_fops = {
6551 	.open		= tracing_open_generic,
6552 	.read		= tracing_thresh_read,
6553 	.write		= tracing_thresh_write,
6554 	.llseek		= generic_file_llseek,
6555 };
6556 
6557 #if defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)
6558 static const struct file_operations tracing_max_lat_fops = {
6559 	.open		= tracing_open_generic,
6560 	.read		= tracing_max_lat_read,
6561 	.write		= tracing_max_lat_write,
6562 	.llseek		= generic_file_llseek,
6563 };
6564 #endif
6565 
6566 static const struct file_operations set_tracer_fops = {
6567 	.open		= tracing_open_generic,
6568 	.read		= tracing_set_trace_read,
6569 	.write		= tracing_set_trace_write,
6570 	.llseek		= generic_file_llseek,
6571 };
6572 
6573 static const struct file_operations tracing_pipe_fops = {
6574 	.open		= tracing_open_pipe,
6575 	.poll		= tracing_poll_pipe,
6576 	.read		= tracing_read_pipe,
6577 	.splice_read	= tracing_splice_read_pipe,
6578 	.release	= tracing_release_pipe,
6579 	.llseek		= no_llseek,
6580 };
6581 
6582 static const struct file_operations tracing_entries_fops = {
6583 	.open		= tracing_open_generic_tr,
6584 	.read		= tracing_entries_read,
6585 	.write		= tracing_entries_write,
6586 	.llseek		= generic_file_llseek,
6587 	.release	= tracing_release_generic_tr,
6588 };
6589 
6590 static const struct file_operations tracing_total_entries_fops = {
6591 	.open		= tracing_open_generic_tr,
6592 	.read		= tracing_total_entries_read,
6593 	.llseek		= generic_file_llseek,
6594 	.release	= tracing_release_generic_tr,
6595 };
6596 
6597 static const struct file_operations tracing_free_buffer_fops = {
6598 	.open		= tracing_open_generic_tr,
6599 	.write		= tracing_free_buffer_write,
6600 	.release	= tracing_free_buffer_release,
6601 };
6602 
6603 static const struct file_operations tracing_mark_fops = {
6604 	.open		= tracing_open_generic_tr,
6605 	.write		= tracing_mark_write,
6606 	.llseek		= generic_file_llseek,
6607 	.release	= tracing_release_generic_tr,
6608 };
6609 
6610 static const struct file_operations tracing_mark_raw_fops = {
6611 	.open		= tracing_open_generic_tr,
6612 	.write		= tracing_mark_raw_write,
6613 	.llseek		= generic_file_llseek,
6614 	.release	= tracing_release_generic_tr,
6615 };
6616 
6617 static const struct file_operations trace_clock_fops = {
6618 	.open		= tracing_clock_open,
6619 	.read		= seq_read,
6620 	.llseek		= seq_lseek,
6621 	.release	= tracing_single_release_tr,
6622 	.write		= tracing_clock_write,
6623 };
6624 
6625 static const struct file_operations trace_time_stamp_mode_fops = {
6626 	.open		= tracing_time_stamp_mode_open,
6627 	.read		= seq_read,
6628 	.llseek		= seq_lseek,
6629 	.release	= tracing_single_release_tr,
6630 };
6631 
6632 #ifdef CONFIG_TRACER_SNAPSHOT
6633 static const struct file_operations snapshot_fops = {
6634 	.open		= tracing_snapshot_open,
6635 	.read		= seq_read,
6636 	.write		= tracing_snapshot_write,
6637 	.llseek		= tracing_lseek,
6638 	.release	= tracing_snapshot_release,
6639 };
6640 
6641 static const struct file_operations snapshot_raw_fops = {
6642 	.open		= snapshot_raw_open,
6643 	.read		= tracing_buffers_read,
6644 	.release	= tracing_buffers_release,
6645 	.splice_read	= tracing_buffers_splice_read,
6646 	.llseek		= no_llseek,
6647 };
6648 
6649 #endif /* CONFIG_TRACER_SNAPSHOT */
6650 
6651 static int tracing_buffers_open(struct inode *inode, struct file *filp)
6652 {
6653 	struct trace_array *tr = inode->i_private;
6654 	struct ftrace_buffer_info *info;
6655 	int ret;
6656 
6657 	if (tracing_disabled)
6658 		return -ENODEV;
6659 
6660 	if (trace_array_get(tr) < 0)
6661 		return -ENODEV;
6662 
6663 	info = kzalloc(sizeof(*info), GFP_KERNEL);
6664 	if (!info) {
6665 		trace_array_put(tr);
6666 		return -ENOMEM;
6667 	}
6668 
6669 	mutex_lock(&trace_types_lock);
6670 
6671 	info->iter.tr		= tr;
6672 	info->iter.cpu_file	= tracing_get_cpu(inode);
6673 	info->iter.trace	= tr->current_trace;
6674 	info->iter.trace_buffer = &tr->trace_buffer;
6675 	info->spare		= NULL;
6676 	/* Force reading ring buffer for first read */
6677 	info->read		= (unsigned int)-1;
6678 
6679 	filp->private_data = info;
6680 
6681 	tr->current_trace->ref++;
6682 
6683 	mutex_unlock(&trace_types_lock);
6684 
6685 	ret = nonseekable_open(inode, filp);
6686 	if (ret < 0)
6687 		trace_array_put(tr);
6688 
6689 	return ret;
6690 }
6691 
6692 static __poll_t
6693 tracing_buffers_poll(struct file *filp, poll_table *poll_table)
6694 {
6695 	struct ftrace_buffer_info *info = filp->private_data;
6696 	struct trace_iterator *iter = &info->iter;
6697 
6698 	return trace_poll(iter, filp, poll_table);
6699 }
6700 
6701 static ssize_t
6702 tracing_buffers_read(struct file *filp, char __user *ubuf,
6703 		     size_t count, loff_t *ppos)
6704 {
6705 	struct ftrace_buffer_info *info = filp->private_data;
6706 	struct trace_iterator *iter = &info->iter;
6707 	ssize_t ret = 0;
6708 	ssize_t size;
6709 
6710 	if (!count)
6711 		return 0;
6712 
6713 #ifdef CONFIG_TRACER_MAX_TRACE
6714 	if (iter->snapshot && iter->tr->current_trace->use_max_tr)
6715 		return -EBUSY;
6716 #endif
6717 
6718 	if (!info->spare) {
6719 		info->spare = ring_buffer_alloc_read_page(iter->trace_buffer->buffer,
6720 							  iter->cpu_file);
6721 		if (IS_ERR(info->spare)) {
6722 			ret = PTR_ERR(info->spare);
6723 			info->spare = NULL;
6724 		} else {
6725 			info->spare_cpu = iter->cpu_file;
6726 		}
6727 	}
6728 	if (!info->spare)
6729 		return ret;
6730 
6731 	/* Do we have previous read data to read? */
6732 	if (info->read < PAGE_SIZE)
6733 		goto read;
6734 
6735  again:
6736 	trace_access_lock(iter->cpu_file);
6737 	ret = ring_buffer_read_page(iter->trace_buffer->buffer,
6738 				    &info->spare,
6739 				    count,
6740 				    iter->cpu_file, 0);
6741 	trace_access_unlock(iter->cpu_file);
6742 
6743 	if (ret < 0) {
6744 		if (trace_empty(iter)) {
6745 			if ((filp->f_flags & O_NONBLOCK))
6746 				return -EAGAIN;
6747 
6748 			ret = wait_on_pipe(iter, false);
6749 			if (ret)
6750 				return ret;
6751 
6752 			goto again;
6753 		}
6754 		return 0;
6755 	}
6756 
6757 	info->read = 0;
6758  read:
6759 	size = PAGE_SIZE - info->read;
6760 	if (size > count)
6761 		size = count;
6762 
6763 	ret = copy_to_user(ubuf, info->spare + info->read, size);
6764 	if (ret == size)
6765 		return -EFAULT;
6766 
6767 	size -= ret;
6768 
6769 	*ppos += size;
6770 	info->read += size;
6771 
6772 	return size;
6773 }
6774 
6775 static int tracing_buffers_release(struct inode *inode, struct file *file)
6776 {
6777 	struct ftrace_buffer_info *info = file->private_data;
6778 	struct trace_iterator *iter = &info->iter;
6779 
6780 	mutex_lock(&trace_types_lock);
6781 
6782 	iter->tr->current_trace->ref--;
6783 
6784 	__trace_array_put(iter->tr);
6785 
6786 	if (info->spare)
6787 		ring_buffer_free_read_page(iter->trace_buffer->buffer,
6788 					   info->spare_cpu, info->spare);
6789 	kfree(info);
6790 
6791 	mutex_unlock(&trace_types_lock);
6792 
6793 	return 0;
6794 }
6795 
6796 struct buffer_ref {
6797 	struct ring_buffer	*buffer;
6798 	void			*page;
6799 	int			cpu;
6800 	int			ref;
6801 };
6802 
6803 static void buffer_pipe_buf_release(struct pipe_inode_info *pipe,
6804 				    struct pipe_buffer *buf)
6805 {
6806 	struct buffer_ref *ref = (struct buffer_ref *)buf->private;
6807 
6808 	if (--ref->ref)
6809 		return;
6810 
6811 	ring_buffer_free_read_page(ref->buffer, ref->cpu, ref->page);
6812 	kfree(ref);
6813 	buf->private = 0;
6814 }
6815 
6816 static void buffer_pipe_buf_get(struct pipe_inode_info *pipe,
6817 				struct pipe_buffer *buf)
6818 {
6819 	struct buffer_ref *ref = (struct buffer_ref *)buf->private;
6820 
6821 	ref->ref++;
6822 }
6823 
6824 /* Pipe buffer operations for a buffer. */
6825 static const struct pipe_buf_operations buffer_pipe_buf_ops = {
6826 	.can_merge		= 0,
6827 	.confirm		= generic_pipe_buf_confirm,
6828 	.release		= buffer_pipe_buf_release,
6829 	.steal			= generic_pipe_buf_steal,
6830 	.get			= buffer_pipe_buf_get,
6831 };
6832 
6833 /*
6834  * Callback from splice_to_pipe(), if we need to release some pages
6835  * at the end of the spd in case we error'ed out in filling the pipe.
6836  */
6837 static void buffer_spd_release(struct splice_pipe_desc *spd, unsigned int i)
6838 {
6839 	struct buffer_ref *ref =
6840 		(struct buffer_ref *)spd->partial[i].private;
6841 
6842 	if (--ref->ref)
6843 		return;
6844 
6845 	ring_buffer_free_read_page(ref->buffer, ref->cpu, ref->page);
6846 	kfree(ref);
6847 	spd->partial[i].private = 0;
6848 }
6849 
6850 static ssize_t
6851 tracing_buffers_splice_read(struct file *file, loff_t *ppos,
6852 			    struct pipe_inode_info *pipe, size_t len,
6853 			    unsigned int flags)
6854 {
6855 	struct ftrace_buffer_info *info = file->private_data;
6856 	struct trace_iterator *iter = &info->iter;
6857 	struct partial_page partial_def[PIPE_DEF_BUFFERS];
6858 	struct page *pages_def[PIPE_DEF_BUFFERS];
6859 	struct splice_pipe_desc spd = {
6860 		.pages		= pages_def,
6861 		.partial	= partial_def,
6862 		.nr_pages_max	= PIPE_DEF_BUFFERS,
6863 		.ops		= &buffer_pipe_buf_ops,
6864 		.spd_release	= buffer_spd_release,
6865 	};
6866 	struct buffer_ref *ref;
6867 	int entries, i;
6868 	ssize_t ret = 0;
6869 
6870 #ifdef CONFIG_TRACER_MAX_TRACE
6871 	if (iter->snapshot && iter->tr->current_trace->use_max_tr)
6872 		return -EBUSY;
6873 #endif
6874 
6875 	if (*ppos & (PAGE_SIZE - 1))
6876 		return -EINVAL;
6877 
6878 	if (len & (PAGE_SIZE - 1)) {
6879 		if (len < PAGE_SIZE)
6880 			return -EINVAL;
6881 		len &= PAGE_MASK;
6882 	}
6883 
6884 	if (splice_grow_spd(pipe, &spd))
6885 		return -ENOMEM;
6886 
6887  again:
6888 	trace_access_lock(iter->cpu_file);
6889 	entries = ring_buffer_entries_cpu(iter->trace_buffer->buffer, iter->cpu_file);
6890 
6891 	for (i = 0; i < spd.nr_pages_max && len && entries; i++, len -= PAGE_SIZE) {
6892 		struct page *page;
6893 		int r;
6894 
6895 		ref = kzalloc(sizeof(*ref), GFP_KERNEL);
6896 		if (!ref) {
6897 			ret = -ENOMEM;
6898 			break;
6899 		}
6900 
6901 		ref->ref = 1;
6902 		ref->buffer = iter->trace_buffer->buffer;
6903 		ref->page = ring_buffer_alloc_read_page(ref->buffer, iter->cpu_file);
6904 		if (IS_ERR(ref->page)) {
6905 			ret = PTR_ERR(ref->page);
6906 			ref->page = NULL;
6907 			kfree(ref);
6908 			break;
6909 		}
6910 		ref->cpu = iter->cpu_file;
6911 
6912 		r = ring_buffer_read_page(ref->buffer, &ref->page,
6913 					  len, iter->cpu_file, 1);
6914 		if (r < 0) {
6915 			ring_buffer_free_read_page(ref->buffer, ref->cpu,
6916 						   ref->page);
6917 			kfree(ref);
6918 			break;
6919 		}
6920 
6921 		page = virt_to_page(ref->page);
6922 
6923 		spd.pages[i] = page;
6924 		spd.partial[i].len = PAGE_SIZE;
6925 		spd.partial[i].offset = 0;
6926 		spd.partial[i].private = (unsigned long)ref;
6927 		spd.nr_pages++;
6928 		*ppos += PAGE_SIZE;
6929 
6930 		entries = ring_buffer_entries_cpu(iter->trace_buffer->buffer, iter->cpu_file);
6931 	}
6932 
6933 	trace_access_unlock(iter->cpu_file);
6934 	spd.nr_pages = i;
6935 
6936 	/* did we read anything? */
6937 	if (!spd.nr_pages) {
6938 		if (ret)
6939 			goto out;
6940 
6941 		ret = -EAGAIN;
6942 		if ((file->f_flags & O_NONBLOCK) || (flags & SPLICE_F_NONBLOCK))
6943 			goto out;
6944 
6945 		ret = wait_on_pipe(iter, true);
6946 		if (ret)
6947 			goto out;
6948 
6949 		goto again;
6950 	}
6951 
6952 	ret = splice_to_pipe(pipe, &spd);
6953 out:
6954 	splice_shrink_spd(&spd);
6955 
6956 	return ret;
6957 }
6958 
6959 static const struct file_operations tracing_buffers_fops = {
6960 	.open		= tracing_buffers_open,
6961 	.read		= tracing_buffers_read,
6962 	.poll		= tracing_buffers_poll,
6963 	.release	= tracing_buffers_release,
6964 	.splice_read	= tracing_buffers_splice_read,
6965 	.llseek		= no_llseek,
6966 };
6967 
6968 static ssize_t
6969 tracing_stats_read(struct file *filp, char __user *ubuf,
6970 		   size_t count, loff_t *ppos)
6971 {
6972 	struct inode *inode = file_inode(filp);
6973 	struct trace_array *tr = inode->i_private;
6974 	struct trace_buffer *trace_buf = &tr->trace_buffer;
6975 	int cpu = tracing_get_cpu(inode);
6976 	struct trace_seq *s;
6977 	unsigned long cnt;
6978 	unsigned long long t;
6979 	unsigned long usec_rem;
6980 
6981 	s = kmalloc(sizeof(*s), GFP_KERNEL);
6982 	if (!s)
6983 		return -ENOMEM;
6984 
6985 	trace_seq_init(s);
6986 
6987 	cnt = ring_buffer_entries_cpu(trace_buf->buffer, cpu);
6988 	trace_seq_printf(s, "entries: %ld\n", cnt);
6989 
6990 	cnt = ring_buffer_overrun_cpu(trace_buf->buffer, cpu);
6991 	trace_seq_printf(s, "overrun: %ld\n", cnt);
6992 
6993 	cnt = ring_buffer_commit_overrun_cpu(trace_buf->buffer, cpu);
6994 	trace_seq_printf(s, "commit overrun: %ld\n", cnt);
6995 
6996 	cnt = ring_buffer_bytes_cpu(trace_buf->buffer, cpu);
6997 	trace_seq_printf(s, "bytes: %ld\n", cnt);
6998 
6999 	if (trace_clocks[tr->clock_id].in_ns) {
7000 		/* local or global for trace_clock */
7001 		t = ns2usecs(ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
7002 		usec_rem = do_div(t, USEC_PER_SEC);
7003 		trace_seq_printf(s, "oldest event ts: %5llu.%06lu\n",
7004 								t, usec_rem);
7005 
7006 		t = ns2usecs(ring_buffer_time_stamp(trace_buf->buffer, cpu));
7007 		usec_rem = do_div(t, USEC_PER_SEC);
7008 		trace_seq_printf(s, "now ts: %5llu.%06lu\n", t, usec_rem);
7009 	} else {
7010 		/* counter or tsc mode for trace_clock */
7011 		trace_seq_printf(s, "oldest event ts: %llu\n",
7012 				ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
7013 
7014 		trace_seq_printf(s, "now ts: %llu\n",
7015 				ring_buffer_time_stamp(trace_buf->buffer, cpu));
7016 	}
7017 
7018 	cnt = ring_buffer_dropped_events_cpu(trace_buf->buffer, cpu);
7019 	trace_seq_printf(s, "dropped events: %ld\n", cnt);
7020 
7021 	cnt = ring_buffer_read_events_cpu(trace_buf->buffer, cpu);
7022 	trace_seq_printf(s, "read events: %ld\n", cnt);
7023 
7024 	count = simple_read_from_buffer(ubuf, count, ppos,
7025 					s->buffer, trace_seq_used(s));
7026 
7027 	kfree(s);
7028 
7029 	return count;
7030 }
7031 
7032 static const struct file_operations tracing_stats_fops = {
7033 	.open		= tracing_open_generic_tr,
7034 	.read		= tracing_stats_read,
7035 	.llseek		= generic_file_llseek,
7036 	.release	= tracing_release_generic_tr,
7037 };
7038 
7039 #ifdef CONFIG_DYNAMIC_FTRACE
7040 
7041 static ssize_t
7042 tracing_read_dyn_info(struct file *filp, char __user *ubuf,
7043 		  size_t cnt, loff_t *ppos)
7044 {
7045 	unsigned long *p = filp->private_data;
7046 	char buf[64]; /* Not too big for a shallow stack */
7047 	int r;
7048 
7049 	r = scnprintf(buf, 63, "%ld", *p);
7050 	buf[r++] = '\n';
7051 
7052 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
7053 }
7054 
7055 static const struct file_operations tracing_dyn_info_fops = {
7056 	.open		= tracing_open_generic,
7057 	.read		= tracing_read_dyn_info,
7058 	.llseek		= generic_file_llseek,
7059 };
7060 #endif /* CONFIG_DYNAMIC_FTRACE */
7061 
7062 #if defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE)
7063 static void
7064 ftrace_snapshot(unsigned long ip, unsigned long parent_ip,
7065 		struct trace_array *tr, struct ftrace_probe_ops *ops,
7066 		void *data)
7067 {
7068 	tracing_snapshot_instance(tr);
7069 }
7070 
7071 static void
7072 ftrace_count_snapshot(unsigned long ip, unsigned long parent_ip,
7073 		      struct trace_array *tr, struct ftrace_probe_ops *ops,
7074 		      void *data)
7075 {
7076 	struct ftrace_func_mapper *mapper = data;
7077 	long *count = NULL;
7078 
7079 	if (mapper)
7080 		count = (long *)ftrace_func_mapper_find_ip(mapper, ip);
7081 
7082 	if (count) {
7083 
7084 		if (*count <= 0)
7085 			return;
7086 
7087 		(*count)--;
7088 	}
7089 
7090 	tracing_snapshot_instance(tr);
7091 }
7092 
7093 static int
7094 ftrace_snapshot_print(struct seq_file *m, unsigned long ip,
7095 		      struct ftrace_probe_ops *ops, void *data)
7096 {
7097 	struct ftrace_func_mapper *mapper = data;
7098 	long *count = NULL;
7099 
7100 	seq_printf(m, "%ps:", (void *)ip);
7101 
7102 	seq_puts(m, "snapshot");
7103 
7104 	if (mapper)
7105 		count = (long *)ftrace_func_mapper_find_ip(mapper, ip);
7106 
7107 	if (count)
7108 		seq_printf(m, ":count=%ld\n", *count);
7109 	else
7110 		seq_puts(m, ":unlimited\n");
7111 
7112 	return 0;
7113 }
7114 
7115 static int
7116 ftrace_snapshot_init(struct ftrace_probe_ops *ops, struct trace_array *tr,
7117 		     unsigned long ip, void *init_data, void **data)
7118 {
7119 	struct ftrace_func_mapper *mapper = *data;
7120 
7121 	if (!mapper) {
7122 		mapper = allocate_ftrace_func_mapper();
7123 		if (!mapper)
7124 			return -ENOMEM;
7125 		*data = mapper;
7126 	}
7127 
7128 	return ftrace_func_mapper_add_ip(mapper, ip, init_data);
7129 }
7130 
7131 static void
7132 ftrace_snapshot_free(struct ftrace_probe_ops *ops, struct trace_array *tr,
7133 		     unsigned long ip, void *data)
7134 {
7135 	struct ftrace_func_mapper *mapper = data;
7136 
7137 	if (!ip) {
7138 		if (!mapper)
7139 			return;
7140 		free_ftrace_func_mapper(mapper, NULL);
7141 		return;
7142 	}
7143 
7144 	ftrace_func_mapper_remove_ip(mapper, ip);
7145 }
7146 
7147 static struct ftrace_probe_ops snapshot_probe_ops = {
7148 	.func			= ftrace_snapshot,
7149 	.print			= ftrace_snapshot_print,
7150 };
7151 
7152 static struct ftrace_probe_ops snapshot_count_probe_ops = {
7153 	.func			= ftrace_count_snapshot,
7154 	.print			= ftrace_snapshot_print,
7155 	.init			= ftrace_snapshot_init,
7156 	.free			= ftrace_snapshot_free,
7157 };
7158 
7159 static int
7160 ftrace_trace_snapshot_callback(struct trace_array *tr, struct ftrace_hash *hash,
7161 			       char *glob, char *cmd, char *param, int enable)
7162 {
7163 	struct ftrace_probe_ops *ops;
7164 	void *count = (void *)-1;
7165 	char *number;
7166 	int ret;
7167 
7168 	if (!tr)
7169 		return -ENODEV;
7170 
7171 	/* hash funcs only work with set_ftrace_filter */
7172 	if (!enable)
7173 		return -EINVAL;
7174 
7175 	ops = param ? &snapshot_count_probe_ops :  &snapshot_probe_ops;
7176 
7177 	if (glob[0] == '!')
7178 		return unregister_ftrace_function_probe_func(glob+1, tr, ops);
7179 
7180 	if (!param)
7181 		goto out_reg;
7182 
7183 	number = strsep(&param, ":");
7184 
7185 	if (!strlen(number))
7186 		goto out_reg;
7187 
7188 	/*
7189 	 * We use the callback data field (which is a pointer)
7190 	 * as our counter.
7191 	 */
7192 	ret = kstrtoul(number, 0, (unsigned long *)&count);
7193 	if (ret)
7194 		return ret;
7195 
7196  out_reg:
7197 	ret = tracing_alloc_snapshot_instance(tr);
7198 	if (ret < 0)
7199 		goto out;
7200 
7201 	ret = register_ftrace_function_probe(glob, tr, ops, count);
7202 
7203  out:
7204 	return ret < 0 ? ret : 0;
7205 }
7206 
7207 static struct ftrace_func_command ftrace_snapshot_cmd = {
7208 	.name			= "snapshot",
7209 	.func			= ftrace_trace_snapshot_callback,
7210 };
7211 
7212 static __init int register_snapshot_cmd(void)
7213 {
7214 	return register_ftrace_command(&ftrace_snapshot_cmd);
7215 }
7216 #else
7217 static inline __init int register_snapshot_cmd(void) { return 0; }
7218 #endif /* defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE) */
7219 
7220 static struct dentry *tracing_get_dentry(struct trace_array *tr)
7221 {
7222 	if (WARN_ON(!tr->dir))
7223 		return ERR_PTR(-ENODEV);
7224 
7225 	/* Top directory uses NULL as the parent */
7226 	if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
7227 		return NULL;
7228 
7229 	/* All sub buffers have a descriptor */
7230 	return tr->dir;
7231 }
7232 
7233 static struct dentry *tracing_dentry_percpu(struct trace_array *tr, int cpu)
7234 {
7235 	struct dentry *d_tracer;
7236 
7237 	if (tr->percpu_dir)
7238 		return tr->percpu_dir;
7239 
7240 	d_tracer = tracing_get_dentry(tr);
7241 	if (IS_ERR(d_tracer))
7242 		return NULL;
7243 
7244 	tr->percpu_dir = tracefs_create_dir("per_cpu", d_tracer);
7245 
7246 	WARN_ONCE(!tr->percpu_dir,
7247 		  "Could not create tracefs directory 'per_cpu/%d'\n", cpu);
7248 
7249 	return tr->percpu_dir;
7250 }
7251 
7252 static struct dentry *
7253 trace_create_cpu_file(const char *name, umode_t mode, struct dentry *parent,
7254 		      void *data, long cpu, const struct file_operations *fops)
7255 {
7256 	struct dentry *ret = trace_create_file(name, mode, parent, data, fops);
7257 
7258 	if (ret) /* See tracing_get_cpu() */
7259 		d_inode(ret)->i_cdev = (void *)(cpu + 1);
7260 	return ret;
7261 }
7262 
7263 static void
7264 tracing_init_tracefs_percpu(struct trace_array *tr, long cpu)
7265 {
7266 	struct dentry *d_percpu = tracing_dentry_percpu(tr, cpu);
7267 	struct dentry *d_cpu;
7268 	char cpu_dir[30]; /* 30 characters should be more than enough */
7269 
7270 	if (!d_percpu)
7271 		return;
7272 
7273 	snprintf(cpu_dir, 30, "cpu%ld", cpu);
7274 	d_cpu = tracefs_create_dir(cpu_dir, d_percpu);
7275 	if (!d_cpu) {
7276 		pr_warn("Could not create tracefs '%s' entry\n", cpu_dir);
7277 		return;
7278 	}
7279 
7280 	/* per cpu trace_pipe */
7281 	trace_create_cpu_file("trace_pipe", 0444, d_cpu,
7282 				tr, cpu, &tracing_pipe_fops);
7283 
7284 	/* per cpu trace */
7285 	trace_create_cpu_file("trace", 0644, d_cpu,
7286 				tr, cpu, &tracing_fops);
7287 
7288 	trace_create_cpu_file("trace_pipe_raw", 0444, d_cpu,
7289 				tr, cpu, &tracing_buffers_fops);
7290 
7291 	trace_create_cpu_file("stats", 0444, d_cpu,
7292 				tr, cpu, &tracing_stats_fops);
7293 
7294 	trace_create_cpu_file("buffer_size_kb", 0444, d_cpu,
7295 				tr, cpu, &tracing_entries_fops);
7296 
7297 #ifdef CONFIG_TRACER_SNAPSHOT
7298 	trace_create_cpu_file("snapshot", 0644, d_cpu,
7299 				tr, cpu, &snapshot_fops);
7300 
7301 	trace_create_cpu_file("snapshot_raw", 0444, d_cpu,
7302 				tr, cpu, &snapshot_raw_fops);
7303 #endif
7304 }
7305 
7306 #ifdef CONFIG_FTRACE_SELFTEST
7307 /* Let selftest have access to static functions in this file */
7308 #include "trace_selftest.c"
7309 #endif
7310 
7311 static ssize_t
7312 trace_options_read(struct file *filp, char __user *ubuf, size_t cnt,
7313 			loff_t *ppos)
7314 {
7315 	struct trace_option_dentry *topt = filp->private_data;
7316 	char *buf;
7317 
7318 	if (topt->flags->val & topt->opt->bit)
7319 		buf = "1\n";
7320 	else
7321 		buf = "0\n";
7322 
7323 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
7324 }
7325 
7326 static ssize_t
7327 trace_options_write(struct file *filp, const char __user *ubuf, size_t cnt,
7328 			 loff_t *ppos)
7329 {
7330 	struct trace_option_dentry *topt = filp->private_data;
7331 	unsigned long val;
7332 	int ret;
7333 
7334 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
7335 	if (ret)
7336 		return ret;
7337 
7338 	if (val != 0 && val != 1)
7339 		return -EINVAL;
7340 
7341 	if (!!(topt->flags->val & topt->opt->bit) != val) {
7342 		mutex_lock(&trace_types_lock);
7343 		ret = __set_tracer_option(topt->tr, topt->flags,
7344 					  topt->opt, !val);
7345 		mutex_unlock(&trace_types_lock);
7346 		if (ret)
7347 			return ret;
7348 	}
7349 
7350 	*ppos += cnt;
7351 
7352 	return cnt;
7353 }
7354 
7355 
7356 static const struct file_operations trace_options_fops = {
7357 	.open = tracing_open_generic,
7358 	.read = trace_options_read,
7359 	.write = trace_options_write,
7360 	.llseek	= generic_file_llseek,
7361 };
7362 
7363 /*
7364  * In order to pass in both the trace_array descriptor as well as the index
7365  * to the flag that the trace option file represents, the trace_array
7366  * has a character array of trace_flags_index[], which holds the index
7367  * of the bit for the flag it represents. index[0] == 0, index[1] == 1, etc.
7368  * The address of this character array is passed to the flag option file
7369  * read/write callbacks.
7370  *
7371  * In order to extract both the index and the trace_array descriptor,
7372  * get_tr_index() uses the following algorithm.
7373  *
7374  *   idx = *ptr;
7375  *
7376  * As the pointer itself contains the address of the index (remember
7377  * index[1] == 1).
7378  *
7379  * Then to get the trace_array descriptor, by subtracting that index
7380  * from the ptr, we get to the start of the index itself.
7381  *
7382  *   ptr - idx == &index[0]
7383  *
7384  * Then a simple container_of() from that pointer gets us to the
7385  * trace_array descriptor.
7386  */
7387 static void get_tr_index(void *data, struct trace_array **ptr,
7388 			 unsigned int *pindex)
7389 {
7390 	*pindex = *(unsigned char *)data;
7391 
7392 	*ptr = container_of(data - *pindex, struct trace_array,
7393 			    trace_flags_index);
7394 }
7395 
7396 static ssize_t
7397 trace_options_core_read(struct file *filp, char __user *ubuf, size_t cnt,
7398 			loff_t *ppos)
7399 {
7400 	void *tr_index = filp->private_data;
7401 	struct trace_array *tr;
7402 	unsigned int index;
7403 	char *buf;
7404 
7405 	get_tr_index(tr_index, &tr, &index);
7406 
7407 	if (tr->trace_flags & (1 << index))
7408 		buf = "1\n";
7409 	else
7410 		buf = "0\n";
7411 
7412 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
7413 }
7414 
7415 static ssize_t
7416 trace_options_core_write(struct file *filp, const char __user *ubuf, size_t cnt,
7417 			 loff_t *ppos)
7418 {
7419 	void *tr_index = filp->private_data;
7420 	struct trace_array *tr;
7421 	unsigned int index;
7422 	unsigned long val;
7423 	int ret;
7424 
7425 	get_tr_index(tr_index, &tr, &index);
7426 
7427 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
7428 	if (ret)
7429 		return ret;
7430 
7431 	if (val != 0 && val != 1)
7432 		return -EINVAL;
7433 
7434 	mutex_lock(&trace_types_lock);
7435 	ret = set_tracer_flag(tr, 1 << index, val);
7436 	mutex_unlock(&trace_types_lock);
7437 
7438 	if (ret < 0)
7439 		return ret;
7440 
7441 	*ppos += cnt;
7442 
7443 	return cnt;
7444 }
7445 
7446 static const struct file_operations trace_options_core_fops = {
7447 	.open = tracing_open_generic,
7448 	.read = trace_options_core_read,
7449 	.write = trace_options_core_write,
7450 	.llseek = generic_file_llseek,
7451 };
7452 
7453 struct dentry *trace_create_file(const char *name,
7454 				 umode_t mode,
7455 				 struct dentry *parent,
7456 				 void *data,
7457 				 const struct file_operations *fops)
7458 {
7459 	struct dentry *ret;
7460 
7461 	ret = tracefs_create_file(name, mode, parent, data, fops);
7462 	if (!ret)
7463 		pr_warn("Could not create tracefs '%s' entry\n", name);
7464 
7465 	return ret;
7466 }
7467 
7468 
7469 static struct dentry *trace_options_init_dentry(struct trace_array *tr)
7470 {
7471 	struct dentry *d_tracer;
7472 
7473 	if (tr->options)
7474 		return tr->options;
7475 
7476 	d_tracer = tracing_get_dentry(tr);
7477 	if (IS_ERR(d_tracer))
7478 		return NULL;
7479 
7480 	tr->options = tracefs_create_dir("options", d_tracer);
7481 	if (!tr->options) {
7482 		pr_warn("Could not create tracefs directory 'options'\n");
7483 		return NULL;
7484 	}
7485 
7486 	return tr->options;
7487 }
7488 
7489 static void
7490 create_trace_option_file(struct trace_array *tr,
7491 			 struct trace_option_dentry *topt,
7492 			 struct tracer_flags *flags,
7493 			 struct tracer_opt *opt)
7494 {
7495 	struct dentry *t_options;
7496 
7497 	t_options = trace_options_init_dentry(tr);
7498 	if (!t_options)
7499 		return;
7500 
7501 	topt->flags = flags;
7502 	topt->opt = opt;
7503 	topt->tr = tr;
7504 
7505 	topt->entry = trace_create_file(opt->name, 0644, t_options, topt,
7506 				    &trace_options_fops);
7507 
7508 }
7509 
7510 static void
7511 create_trace_option_files(struct trace_array *tr, struct tracer *tracer)
7512 {
7513 	struct trace_option_dentry *topts;
7514 	struct trace_options *tr_topts;
7515 	struct tracer_flags *flags;
7516 	struct tracer_opt *opts;
7517 	int cnt;
7518 	int i;
7519 
7520 	if (!tracer)
7521 		return;
7522 
7523 	flags = tracer->flags;
7524 
7525 	if (!flags || !flags->opts)
7526 		return;
7527 
7528 	/*
7529 	 * If this is an instance, only create flags for tracers
7530 	 * the instance may have.
7531 	 */
7532 	if (!trace_ok_for_array(tracer, tr))
7533 		return;
7534 
7535 	for (i = 0; i < tr->nr_topts; i++) {
7536 		/* Make sure there's no duplicate flags. */
7537 		if (WARN_ON_ONCE(tr->topts[i].tracer->flags == tracer->flags))
7538 			return;
7539 	}
7540 
7541 	opts = flags->opts;
7542 
7543 	for (cnt = 0; opts[cnt].name; cnt++)
7544 		;
7545 
7546 	topts = kcalloc(cnt + 1, sizeof(*topts), GFP_KERNEL);
7547 	if (!topts)
7548 		return;
7549 
7550 	tr_topts = krealloc(tr->topts, sizeof(*tr->topts) * (tr->nr_topts + 1),
7551 			    GFP_KERNEL);
7552 	if (!tr_topts) {
7553 		kfree(topts);
7554 		return;
7555 	}
7556 
7557 	tr->topts = tr_topts;
7558 	tr->topts[tr->nr_topts].tracer = tracer;
7559 	tr->topts[tr->nr_topts].topts = topts;
7560 	tr->nr_topts++;
7561 
7562 	for (cnt = 0; opts[cnt].name; cnt++) {
7563 		create_trace_option_file(tr, &topts[cnt], flags,
7564 					 &opts[cnt]);
7565 		WARN_ONCE(topts[cnt].entry == NULL,
7566 			  "Failed to create trace option: %s",
7567 			  opts[cnt].name);
7568 	}
7569 }
7570 
7571 static struct dentry *
7572 create_trace_option_core_file(struct trace_array *tr,
7573 			      const char *option, long index)
7574 {
7575 	struct dentry *t_options;
7576 
7577 	t_options = trace_options_init_dentry(tr);
7578 	if (!t_options)
7579 		return NULL;
7580 
7581 	return trace_create_file(option, 0644, t_options,
7582 				 (void *)&tr->trace_flags_index[index],
7583 				 &trace_options_core_fops);
7584 }
7585 
7586 static void create_trace_options_dir(struct trace_array *tr)
7587 {
7588 	struct dentry *t_options;
7589 	bool top_level = tr == &global_trace;
7590 	int i;
7591 
7592 	t_options = trace_options_init_dentry(tr);
7593 	if (!t_options)
7594 		return;
7595 
7596 	for (i = 0; trace_options[i]; i++) {
7597 		if (top_level ||
7598 		    !((1 << i) & TOP_LEVEL_TRACE_FLAGS))
7599 			create_trace_option_core_file(tr, trace_options[i], i);
7600 	}
7601 }
7602 
7603 static ssize_t
7604 rb_simple_read(struct file *filp, char __user *ubuf,
7605 	       size_t cnt, loff_t *ppos)
7606 {
7607 	struct trace_array *tr = filp->private_data;
7608 	char buf[64];
7609 	int r;
7610 
7611 	r = tracer_tracing_is_on(tr);
7612 	r = sprintf(buf, "%d\n", r);
7613 
7614 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
7615 }
7616 
7617 static ssize_t
7618 rb_simple_write(struct file *filp, const char __user *ubuf,
7619 		size_t cnt, loff_t *ppos)
7620 {
7621 	struct trace_array *tr = filp->private_data;
7622 	struct ring_buffer *buffer = tr->trace_buffer.buffer;
7623 	unsigned long val;
7624 	int ret;
7625 
7626 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
7627 	if (ret)
7628 		return ret;
7629 
7630 	if (buffer) {
7631 		mutex_lock(&trace_types_lock);
7632 		if (!!val == tracer_tracing_is_on(tr)) {
7633 			val = 0; /* do nothing */
7634 		} else if (val) {
7635 			tracer_tracing_on(tr);
7636 			if (tr->current_trace->start)
7637 				tr->current_trace->start(tr);
7638 		} else {
7639 			tracer_tracing_off(tr);
7640 			if (tr->current_trace->stop)
7641 				tr->current_trace->stop(tr);
7642 		}
7643 		mutex_unlock(&trace_types_lock);
7644 	}
7645 
7646 	(*ppos)++;
7647 
7648 	return cnt;
7649 }
7650 
7651 static const struct file_operations rb_simple_fops = {
7652 	.open		= tracing_open_generic_tr,
7653 	.read		= rb_simple_read,
7654 	.write		= rb_simple_write,
7655 	.release	= tracing_release_generic_tr,
7656 	.llseek		= default_llseek,
7657 };
7658 
7659 struct dentry *trace_instance_dir;
7660 
7661 static void
7662 init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer);
7663 
7664 static int
7665 allocate_trace_buffer(struct trace_array *tr, struct trace_buffer *buf, int size)
7666 {
7667 	enum ring_buffer_flags rb_flags;
7668 
7669 	rb_flags = tr->trace_flags & TRACE_ITER_OVERWRITE ? RB_FL_OVERWRITE : 0;
7670 
7671 	buf->tr = tr;
7672 
7673 	buf->buffer = ring_buffer_alloc(size, rb_flags);
7674 	if (!buf->buffer)
7675 		return -ENOMEM;
7676 
7677 	buf->data = alloc_percpu(struct trace_array_cpu);
7678 	if (!buf->data) {
7679 		ring_buffer_free(buf->buffer);
7680 		buf->buffer = NULL;
7681 		return -ENOMEM;
7682 	}
7683 
7684 	/* Allocate the first page for all buffers */
7685 	set_buffer_entries(&tr->trace_buffer,
7686 			   ring_buffer_size(tr->trace_buffer.buffer, 0));
7687 
7688 	return 0;
7689 }
7690 
7691 static int allocate_trace_buffers(struct trace_array *tr, int size)
7692 {
7693 	int ret;
7694 
7695 	ret = allocate_trace_buffer(tr, &tr->trace_buffer, size);
7696 	if (ret)
7697 		return ret;
7698 
7699 #ifdef CONFIG_TRACER_MAX_TRACE
7700 	ret = allocate_trace_buffer(tr, &tr->max_buffer,
7701 				    allocate_snapshot ? size : 1);
7702 	if (WARN_ON(ret)) {
7703 		ring_buffer_free(tr->trace_buffer.buffer);
7704 		tr->trace_buffer.buffer = NULL;
7705 		free_percpu(tr->trace_buffer.data);
7706 		tr->trace_buffer.data = NULL;
7707 		return -ENOMEM;
7708 	}
7709 	tr->allocated_snapshot = allocate_snapshot;
7710 
7711 	/*
7712 	 * Only the top level trace array gets its snapshot allocated
7713 	 * from the kernel command line.
7714 	 */
7715 	allocate_snapshot = false;
7716 #endif
7717 	return 0;
7718 }
7719 
7720 static void free_trace_buffer(struct trace_buffer *buf)
7721 {
7722 	if (buf->buffer) {
7723 		ring_buffer_free(buf->buffer);
7724 		buf->buffer = NULL;
7725 		free_percpu(buf->data);
7726 		buf->data = NULL;
7727 	}
7728 }
7729 
7730 static void free_trace_buffers(struct trace_array *tr)
7731 {
7732 	if (!tr)
7733 		return;
7734 
7735 	free_trace_buffer(&tr->trace_buffer);
7736 
7737 #ifdef CONFIG_TRACER_MAX_TRACE
7738 	free_trace_buffer(&tr->max_buffer);
7739 #endif
7740 }
7741 
7742 static void init_trace_flags_index(struct trace_array *tr)
7743 {
7744 	int i;
7745 
7746 	/* Used by the trace options files */
7747 	for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++)
7748 		tr->trace_flags_index[i] = i;
7749 }
7750 
7751 static void __update_tracer_options(struct trace_array *tr)
7752 {
7753 	struct tracer *t;
7754 
7755 	for (t = trace_types; t; t = t->next)
7756 		add_tracer_options(tr, t);
7757 }
7758 
7759 static void update_tracer_options(struct trace_array *tr)
7760 {
7761 	mutex_lock(&trace_types_lock);
7762 	__update_tracer_options(tr);
7763 	mutex_unlock(&trace_types_lock);
7764 }
7765 
7766 static int instance_mkdir(const char *name)
7767 {
7768 	struct trace_array *tr;
7769 	int ret;
7770 
7771 	mutex_lock(&event_mutex);
7772 	mutex_lock(&trace_types_lock);
7773 
7774 	ret = -EEXIST;
7775 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
7776 		if (tr->name && strcmp(tr->name, name) == 0)
7777 			goto out_unlock;
7778 	}
7779 
7780 	ret = -ENOMEM;
7781 	tr = kzalloc(sizeof(*tr), GFP_KERNEL);
7782 	if (!tr)
7783 		goto out_unlock;
7784 
7785 	tr->name = kstrdup(name, GFP_KERNEL);
7786 	if (!tr->name)
7787 		goto out_free_tr;
7788 
7789 	if (!alloc_cpumask_var(&tr->tracing_cpumask, GFP_KERNEL))
7790 		goto out_free_tr;
7791 
7792 	tr->trace_flags = global_trace.trace_flags & ~ZEROED_TRACE_FLAGS;
7793 
7794 	cpumask_copy(tr->tracing_cpumask, cpu_all_mask);
7795 
7796 	raw_spin_lock_init(&tr->start_lock);
7797 
7798 	tr->max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
7799 
7800 	tr->current_trace = &nop_trace;
7801 
7802 	INIT_LIST_HEAD(&tr->systems);
7803 	INIT_LIST_HEAD(&tr->events);
7804 	INIT_LIST_HEAD(&tr->hist_vars);
7805 
7806 	if (allocate_trace_buffers(tr, trace_buf_size) < 0)
7807 		goto out_free_tr;
7808 
7809 	tr->dir = tracefs_create_dir(name, trace_instance_dir);
7810 	if (!tr->dir)
7811 		goto out_free_tr;
7812 
7813 	ret = event_trace_add_tracer(tr->dir, tr);
7814 	if (ret) {
7815 		tracefs_remove_recursive(tr->dir);
7816 		goto out_free_tr;
7817 	}
7818 
7819 	ftrace_init_trace_array(tr);
7820 
7821 	init_tracer_tracefs(tr, tr->dir);
7822 	init_trace_flags_index(tr);
7823 	__update_tracer_options(tr);
7824 
7825 	list_add(&tr->list, &ftrace_trace_arrays);
7826 
7827 	mutex_unlock(&trace_types_lock);
7828 	mutex_unlock(&event_mutex);
7829 
7830 	return 0;
7831 
7832  out_free_tr:
7833 	free_trace_buffers(tr);
7834 	free_cpumask_var(tr->tracing_cpumask);
7835 	kfree(tr->name);
7836 	kfree(tr);
7837 
7838  out_unlock:
7839 	mutex_unlock(&trace_types_lock);
7840 	mutex_unlock(&event_mutex);
7841 
7842 	return ret;
7843 
7844 }
7845 
7846 static int instance_rmdir(const char *name)
7847 {
7848 	struct trace_array *tr;
7849 	int found = 0;
7850 	int ret;
7851 	int i;
7852 
7853 	mutex_lock(&event_mutex);
7854 	mutex_lock(&trace_types_lock);
7855 
7856 	ret = -ENODEV;
7857 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
7858 		if (tr->name && strcmp(tr->name, name) == 0) {
7859 			found = 1;
7860 			break;
7861 		}
7862 	}
7863 	if (!found)
7864 		goto out_unlock;
7865 
7866 	ret = -EBUSY;
7867 	if (tr->ref || (tr->current_trace && tr->current_trace->ref))
7868 		goto out_unlock;
7869 
7870 	list_del(&tr->list);
7871 
7872 	/* Disable all the flags that were enabled coming in */
7873 	for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++) {
7874 		if ((1 << i) & ZEROED_TRACE_FLAGS)
7875 			set_tracer_flag(tr, 1 << i, 0);
7876 	}
7877 
7878 	tracing_set_nop(tr);
7879 	clear_ftrace_function_probes(tr);
7880 	event_trace_del_tracer(tr);
7881 	ftrace_clear_pids(tr);
7882 	ftrace_destroy_function_files(tr);
7883 	tracefs_remove_recursive(tr->dir);
7884 	free_trace_buffers(tr);
7885 
7886 	for (i = 0; i < tr->nr_topts; i++) {
7887 		kfree(tr->topts[i].topts);
7888 	}
7889 	kfree(tr->topts);
7890 
7891 	free_cpumask_var(tr->tracing_cpumask);
7892 	kfree(tr->name);
7893 	kfree(tr);
7894 
7895 	ret = 0;
7896 
7897  out_unlock:
7898 	mutex_unlock(&trace_types_lock);
7899 	mutex_unlock(&event_mutex);
7900 
7901 	return ret;
7902 }
7903 
7904 static __init void create_trace_instances(struct dentry *d_tracer)
7905 {
7906 	trace_instance_dir = tracefs_create_instance_dir("instances", d_tracer,
7907 							 instance_mkdir,
7908 							 instance_rmdir);
7909 	if (WARN_ON(!trace_instance_dir))
7910 		return;
7911 }
7912 
7913 static void
7914 init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer)
7915 {
7916 	struct trace_event_file *file;
7917 	int cpu;
7918 
7919 	trace_create_file("available_tracers", 0444, d_tracer,
7920 			tr, &show_traces_fops);
7921 
7922 	trace_create_file("current_tracer", 0644, d_tracer,
7923 			tr, &set_tracer_fops);
7924 
7925 	trace_create_file("tracing_cpumask", 0644, d_tracer,
7926 			  tr, &tracing_cpumask_fops);
7927 
7928 	trace_create_file("trace_options", 0644, d_tracer,
7929 			  tr, &tracing_iter_fops);
7930 
7931 	trace_create_file("trace", 0644, d_tracer,
7932 			  tr, &tracing_fops);
7933 
7934 	trace_create_file("trace_pipe", 0444, d_tracer,
7935 			  tr, &tracing_pipe_fops);
7936 
7937 	trace_create_file("buffer_size_kb", 0644, d_tracer,
7938 			  tr, &tracing_entries_fops);
7939 
7940 	trace_create_file("buffer_total_size_kb", 0444, d_tracer,
7941 			  tr, &tracing_total_entries_fops);
7942 
7943 	trace_create_file("free_buffer", 0200, d_tracer,
7944 			  tr, &tracing_free_buffer_fops);
7945 
7946 	trace_create_file("trace_marker", 0220, d_tracer,
7947 			  tr, &tracing_mark_fops);
7948 
7949 	file = __find_event_file(tr, "ftrace", "print");
7950 	if (file && file->dir)
7951 		trace_create_file("trigger", 0644, file->dir, file,
7952 				  &event_trigger_fops);
7953 	tr->trace_marker_file = file;
7954 
7955 	trace_create_file("trace_marker_raw", 0220, d_tracer,
7956 			  tr, &tracing_mark_raw_fops);
7957 
7958 	trace_create_file("trace_clock", 0644, d_tracer, tr,
7959 			  &trace_clock_fops);
7960 
7961 	trace_create_file("tracing_on", 0644, d_tracer,
7962 			  tr, &rb_simple_fops);
7963 
7964 	trace_create_file("timestamp_mode", 0444, d_tracer, tr,
7965 			  &trace_time_stamp_mode_fops);
7966 
7967 	create_trace_options_dir(tr);
7968 
7969 #if defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)
7970 	trace_create_file("tracing_max_latency", 0644, d_tracer,
7971 			&tr->max_latency, &tracing_max_lat_fops);
7972 #endif
7973 
7974 	if (ftrace_create_function_files(tr, d_tracer))
7975 		WARN(1, "Could not allocate function filter files");
7976 
7977 #ifdef CONFIG_TRACER_SNAPSHOT
7978 	trace_create_file("snapshot", 0644, d_tracer,
7979 			  tr, &snapshot_fops);
7980 #endif
7981 
7982 	for_each_tracing_cpu(cpu)
7983 		tracing_init_tracefs_percpu(tr, cpu);
7984 
7985 	ftrace_init_tracefs(tr, d_tracer);
7986 }
7987 
7988 static struct vfsmount *trace_automount(struct dentry *mntpt, void *ingore)
7989 {
7990 	struct vfsmount *mnt;
7991 	struct file_system_type *type;
7992 
7993 	/*
7994 	 * To maintain backward compatibility for tools that mount
7995 	 * debugfs to get to the tracing facility, tracefs is automatically
7996 	 * mounted to the debugfs/tracing directory.
7997 	 */
7998 	type = get_fs_type("tracefs");
7999 	if (!type)
8000 		return NULL;
8001 	mnt = vfs_submount(mntpt, type, "tracefs", NULL);
8002 	put_filesystem(type);
8003 	if (IS_ERR(mnt))
8004 		return NULL;
8005 	mntget(mnt);
8006 
8007 	return mnt;
8008 }
8009 
8010 /**
8011  * tracing_init_dentry - initialize top level trace array
8012  *
8013  * This is called when creating files or directories in the tracing
8014  * directory. It is called via fs_initcall() by any of the boot up code
8015  * and expects to return the dentry of the top level tracing directory.
8016  */
8017 struct dentry *tracing_init_dentry(void)
8018 {
8019 	struct trace_array *tr = &global_trace;
8020 
8021 	/* The top level trace array uses  NULL as parent */
8022 	if (tr->dir)
8023 		return NULL;
8024 
8025 	if (WARN_ON(!tracefs_initialized()) ||
8026 		(IS_ENABLED(CONFIG_DEBUG_FS) &&
8027 		 WARN_ON(!debugfs_initialized())))
8028 		return ERR_PTR(-ENODEV);
8029 
8030 	/*
8031 	 * As there may still be users that expect the tracing
8032 	 * files to exist in debugfs/tracing, we must automount
8033 	 * the tracefs file system there, so older tools still
8034 	 * work with the newer kerenl.
8035 	 */
8036 	tr->dir = debugfs_create_automount("tracing", NULL,
8037 					   trace_automount, NULL);
8038 	if (!tr->dir) {
8039 		pr_warn_once("Could not create debugfs directory 'tracing'\n");
8040 		return ERR_PTR(-ENOMEM);
8041 	}
8042 
8043 	return NULL;
8044 }
8045 
8046 extern struct trace_eval_map *__start_ftrace_eval_maps[];
8047 extern struct trace_eval_map *__stop_ftrace_eval_maps[];
8048 
8049 static void __init trace_eval_init(void)
8050 {
8051 	int len;
8052 
8053 	len = __stop_ftrace_eval_maps - __start_ftrace_eval_maps;
8054 	trace_insert_eval_map(NULL, __start_ftrace_eval_maps, len);
8055 }
8056 
8057 #ifdef CONFIG_MODULES
8058 static void trace_module_add_evals(struct module *mod)
8059 {
8060 	if (!mod->num_trace_evals)
8061 		return;
8062 
8063 	/*
8064 	 * Modules with bad taint do not have events created, do
8065 	 * not bother with enums either.
8066 	 */
8067 	if (trace_module_has_bad_taint(mod))
8068 		return;
8069 
8070 	trace_insert_eval_map(mod, mod->trace_evals, mod->num_trace_evals);
8071 }
8072 
8073 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
8074 static void trace_module_remove_evals(struct module *mod)
8075 {
8076 	union trace_eval_map_item *map;
8077 	union trace_eval_map_item **last = &trace_eval_maps;
8078 
8079 	if (!mod->num_trace_evals)
8080 		return;
8081 
8082 	mutex_lock(&trace_eval_mutex);
8083 
8084 	map = trace_eval_maps;
8085 
8086 	while (map) {
8087 		if (map->head.mod == mod)
8088 			break;
8089 		map = trace_eval_jmp_to_tail(map);
8090 		last = &map->tail.next;
8091 		map = map->tail.next;
8092 	}
8093 	if (!map)
8094 		goto out;
8095 
8096 	*last = trace_eval_jmp_to_tail(map)->tail.next;
8097 	kfree(map);
8098  out:
8099 	mutex_unlock(&trace_eval_mutex);
8100 }
8101 #else
8102 static inline void trace_module_remove_evals(struct module *mod) { }
8103 #endif /* CONFIG_TRACE_EVAL_MAP_FILE */
8104 
8105 static int trace_module_notify(struct notifier_block *self,
8106 			       unsigned long val, void *data)
8107 {
8108 	struct module *mod = data;
8109 
8110 	switch (val) {
8111 	case MODULE_STATE_COMING:
8112 		trace_module_add_evals(mod);
8113 		break;
8114 	case MODULE_STATE_GOING:
8115 		trace_module_remove_evals(mod);
8116 		break;
8117 	}
8118 
8119 	return 0;
8120 }
8121 
8122 static struct notifier_block trace_module_nb = {
8123 	.notifier_call = trace_module_notify,
8124 	.priority = 0,
8125 };
8126 #endif /* CONFIG_MODULES */
8127 
8128 static __init int tracer_init_tracefs(void)
8129 {
8130 	struct dentry *d_tracer;
8131 
8132 	trace_access_lock_init();
8133 
8134 	d_tracer = tracing_init_dentry();
8135 	if (IS_ERR(d_tracer))
8136 		return 0;
8137 
8138 	event_trace_init();
8139 
8140 	init_tracer_tracefs(&global_trace, d_tracer);
8141 	ftrace_init_tracefs_toplevel(&global_trace, d_tracer);
8142 
8143 	trace_create_file("tracing_thresh", 0644, d_tracer,
8144 			&global_trace, &tracing_thresh_fops);
8145 
8146 	trace_create_file("README", 0444, d_tracer,
8147 			NULL, &tracing_readme_fops);
8148 
8149 	trace_create_file("saved_cmdlines", 0444, d_tracer,
8150 			NULL, &tracing_saved_cmdlines_fops);
8151 
8152 	trace_create_file("saved_cmdlines_size", 0644, d_tracer,
8153 			  NULL, &tracing_saved_cmdlines_size_fops);
8154 
8155 	trace_create_file("saved_tgids", 0444, d_tracer,
8156 			NULL, &tracing_saved_tgids_fops);
8157 
8158 	trace_eval_init();
8159 
8160 	trace_create_eval_file(d_tracer);
8161 
8162 #ifdef CONFIG_MODULES
8163 	register_module_notifier(&trace_module_nb);
8164 #endif
8165 
8166 #ifdef CONFIG_DYNAMIC_FTRACE
8167 	trace_create_file("dyn_ftrace_total_info", 0444, d_tracer,
8168 			&ftrace_update_tot_cnt, &tracing_dyn_info_fops);
8169 #endif
8170 
8171 	create_trace_instances(d_tracer);
8172 
8173 	update_tracer_options(&global_trace);
8174 
8175 	return 0;
8176 }
8177 
8178 static int trace_panic_handler(struct notifier_block *this,
8179 			       unsigned long event, void *unused)
8180 {
8181 	if (ftrace_dump_on_oops)
8182 		ftrace_dump(ftrace_dump_on_oops);
8183 	return NOTIFY_OK;
8184 }
8185 
8186 static struct notifier_block trace_panic_notifier = {
8187 	.notifier_call  = trace_panic_handler,
8188 	.next           = NULL,
8189 	.priority       = 150   /* priority: INT_MAX >= x >= 0 */
8190 };
8191 
8192 static int trace_die_handler(struct notifier_block *self,
8193 			     unsigned long val,
8194 			     void *data)
8195 {
8196 	switch (val) {
8197 	case DIE_OOPS:
8198 		if (ftrace_dump_on_oops)
8199 			ftrace_dump(ftrace_dump_on_oops);
8200 		break;
8201 	default:
8202 		break;
8203 	}
8204 	return NOTIFY_OK;
8205 }
8206 
8207 static struct notifier_block trace_die_notifier = {
8208 	.notifier_call = trace_die_handler,
8209 	.priority = 200
8210 };
8211 
8212 /*
8213  * printk is set to max of 1024, we really don't need it that big.
8214  * Nothing should be printing 1000 characters anyway.
8215  */
8216 #define TRACE_MAX_PRINT		1000
8217 
8218 /*
8219  * Define here KERN_TRACE so that we have one place to modify
8220  * it if we decide to change what log level the ftrace dump
8221  * should be at.
8222  */
8223 #define KERN_TRACE		KERN_EMERG
8224 
8225 void
8226 trace_printk_seq(struct trace_seq *s)
8227 {
8228 	/* Probably should print a warning here. */
8229 	if (s->seq.len >= TRACE_MAX_PRINT)
8230 		s->seq.len = TRACE_MAX_PRINT;
8231 
8232 	/*
8233 	 * More paranoid code. Although the buffer size is set to
8234 	 * PAGE_SIZE, and TRACE_MAX_PRINT is 1000, this is just
8235 	 * an extra layer of protection.
8236 	 */
8237 	if (WARN_ON_ONCE(s->seq.len >= s->seq.size))
8238 		s->seq.len = s->seq.size - 1;
8239 
8240 	/* should be zero ended, but we are paranoid. */
8241 	s->buffer[s->seq.len] = 0;
8242 
8243 	printk(KERN_TRACE "%s", s->buffer);
8244 
8245 	trace_seq_init(s);
8246 }
8247 
8248 void trace_init_global_iter(struct trace_iterator *iter)
8249 {
8250 	iter->tr = &global_trace;
8251 	iter->trace = iter->tr->current_trace;
8252 	iter->cpu_file = RING_BUFFER_ALL_CPUS;
8253 	iter->trace_buffer = &global_trace.trace_buffer;
8254 
8255 	if (iter->trace && iter->trace->open)
8256 		iter->trace->open(iter);
8257 
8258 	/* Annotate start of buffers if we had overruns */
8259 	if (ring_buffer_overruns(iter->trace_buffer->buffer))
8260 		iter->iter_flags |= TRACE_FILE_ANNOTATE;
8261 
8262 	/* Output in nanoseconds only if we are using a clock in nanoseconds. */
8263 	if (trace_clocks[iter->tr->clock_id].in_ns)
8264 		iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
8265 }
8266 
8267 void ftrace_dump(enum ftrace_dump_mode oops_dump_mode)
8268 {
8269 	/* use static because iter can be a bit big for the stack */
8270 	static struct trace_iterator iter;
8271 	static atomic_t dump_running;
8272 	struct trace_array *tr = &global_trace;
8273 	unsigned int old_userobj;
8274 	unsigned long flags;
8275 	int cnt = 0, cpu;
8276 
8277 	/* Only allow one dump user at a time. */
8278 	if (atomic_inc_return(&dump_running) != 1) {
8279 		atomic_dec(&dump_running);
8280 		return;
8281 	}
8282 
8283 	/*
8284 	 * Always turn off tracing when we dump.
8285 	 * We don't need to show trace output of what happens
8286 	 * between multiple crashes.
8287 	 *
8288 	 * If the user does a sysrq-z, then they can re-enable
8289 	 * tracing with echo 1 > tracing_on.
8290 	 */
8291 	tracing_off();
8292 
8293 	local_irq_save(flags);
8294 	printk_nmi_direct_enter();
8295 
8296 	/* Simulate the iterator */
8297 	trace_init_global_iter(&iter);
8298 
8299 	for_each_tracing_cpu(cpu) {
8300 		atomic_inc(&per_cpu_ptr(iter.trace_buffer->data, cpu)->disabled);
8301 	}
8302 
8303 	old_userobj = tr->trace_flags & TRACE_ITER_SYM_USEROBJ;
8304 
8305 	/* don't look at user memory in panic mode */
8306 	tr->trace_flags &= ~TRACE_ITER_SYM_USEROBJ;
8307 
8308 	switch (oops_dump_mode) {
8309 	case DUMP_ALL:
8310 		iter.cpu_file = RING_BUFFER_ALL_CPUS;
8311 		break;
8312 	case DUMP_ORIG:
8313 		iter.cpu_file = raw_smp_processor_id();
8314 		break;
8315 	case DUMP_NONE:
8316 		goto out_enable;
8317 	default:
8318 		printk(KERN_TRACE "Bad dumping mode, switching to all CPUs dump\n");
8319 		iter.cpu_file = RING_BUFFER_ALL_CPUS;
8320 	}
8321 
8322 	printk(KERN_TRACE "Dumping ftrace buffer:\n");
8323 
8324 	/* Did function tracer already get disabled? */
8325 	if (ftrace_is_dead()) {
8326 		printk("# WARNING: FUNCTION TRACING IS CORRUPTED\n");
8327 		printk("#          MAY BE MISSING FUNCTION EVENTS\n");
8328 	}
8329 
8330 	/*
8331 	 * We need to stop all tracing on all CPUS to read the
8332 	 * the next buffer. This is a bit expensive, but is
8333 	 * not done often. We fill all what we can read,
8334 	 * and then release the locks again.
8335 	 */
8336 
8337 	while (!trace_empty(&iter)) {
8338 
8339 		if (!cnt)
8340 			printk(KERN_TRACE "---------------------------------\n");
8341 
8342 		cnt++;
8343 
8344 		/* reset all but tr, trace, and overruns */
8345 		memset(&iter.seq, 0,
8346 		       sizeof(struct trace_iterator) -
8347 		       offsetof(struct trace_iterator, seq));
8348 		iter.iter_flags |= TRACE_FILE_LAT_FMT;
8349 		iter.pos = -1;
8350 
8351 		if (trace_find_next_entry_inc(&iter) != NULL) {
8352 			int ret;
8353 
8354 			ret = print_trace_line(&iter);
8355 			if (ret != TRACE_TYPE_NO_CONSUME)
8356 				trace_consume(&iter);
8357 		}
8358 		touch_nmi_watchdog();
8359 
8360 		trace_printk_seq(&iter.seq);
8361 	}
8362 
8363 	if (!cnt)
8364 		printk(KERN_TRACE "   (ftrace buffer empty)\n");
8365 	else
8366 		printk(KERN_TRACE "---------------------------------\n");
8367 
8368  out_enable:
8369 	tr->trace_flags |= old_userobj;
8370 
8371 	for_each_tracing_cpu(cpu) {
8372 		atomic_dec(&per_cpu_ptr(iter.trace_buffer->data, cpu)->disabled);
8373 	}
8374 	atomic_dec(&dump_running);
8375 	printk_nmi_direct_exit();
8376 	local_irq_restore(flags);
8377 }
8378 EXPORT_SYMBOL_GPL(ftrace_dump);
8379 
8380 int trace_run_command(const char *buf, int (*createfn)(int, char **))
8381 {
8382 	char **argv;
8383 	int argc, ret;
8384 
8385 	argc = 0;
8386 	ret = 0;
8387 	argv = argv_split(GFP_KERNEL, buf, &argc);
8388 	if (!argv)
8389 		return -ENOMEM;
8390 
8391 	if (argc)
8392 		ret = createfn(argc, argv);
8393 
8394 	argv_free(argv);
8395 
8396 	return ret;
8397 }
8398 
8399 #define WRITE_BUFSIZE  4096
8400 
8401 ssize_t trace_parse_run_command(struct file *file, const char __user *buffer,
8402 				size_t count, loff_t *ppos,
8403 				int (*createfn)(int, char **))
8404 {
8405 	char *kbuf, *buf, *tmp;
8406 	int ret = 0;
8407 	size_t done = 0;
8408 	size_t size;
8409 
8410 	kbuf = kmalloc(WRITE_BUFSIZE, GFP_KERNEL);
8411 	if (!kbuf)
8412 		return -ENOMEM;
8413 
8414 	while (done < count) {
8415 		size = count - done;
8416 
8417 		if (size >= WRITE_BUFSIZE)
8418 			size = WRITE_BUFSIZE - 1;
8419 
8420 		if (copy_from_user(kbuf, buffer + done, size)) {
8421 			ret = -EFAULT;
8422 			goto out;
8423 		}
8424 		kbuf[size] = '\0';
8425 		buf = kbuf;
8426 		do {
8427 			tmp = strchr(buf, '\n');
8428 			if (tmp) {
8429 				*tmp = '\0';
8430 				size = tmp - buf + 1;
8431 			} else {
8432 				size = strlen(buf);
8433 				if (done + size < count) {
8434 					if (buf != kbuf)
8435 						break;
8436 					/* This can accept WRITE_BUFSIZE - 2 ('\n' + '\0') */
8437 					pr_warn("Line length is too long: Should be less than %d\n",
8438 						WRITE_BUFSIZE - 2);
8439 					ret = -EINVAL;
8440 					goto out;
8441 				}
8442 			}
8443 			done += size;
8444 
8445 			/* Remove comments */
8446 			tmp = strchr(buf, '#');
8447 
8448 			if (tmp)
8449 				*tmp = '\0';
8450 
8451 			ret = trace_run_command(buf, createfn);
8452 			if (ret)
8453 				goto out;
8454 			buf += size;
8455 
8456 		} while (done < count);
8457 	}
8458 	ret = done;
8459 
8460 out:
8461 	kfree(kbuf);
8462 
8463 	return ret;
8464 }
8465 
8466 __init static int tracer_alloc_buffers(void)
8467 {
8468 	int ring_buf_size;
8469 	int ret = -ENOMEM;
8470 
8471 	/*
8472 	 * Make sure we don't accidently add more trace options
8473 	 * than we have bits for.
8474 	 */
8475 	BUILD_BUG_ON(TRACE_ITER_LAST_BIT > TRACE_FLAGS_MAX_SIZE);
8476 
8477 	if (!alloc_cpumask_var(&tracing_buffer_mask, GFP_KERNEL))
8478 		goto out;
8479 
8480 	if (!alloc_cpumask_var(&global_trace.tracing_cpumask, GFP_KERNEL))
8481 		goto out_free_buffer_mask;
8482 
8483 	/* Only allocate trace_printk buffers if a trace_printk exists */
8484 	if (__stop___trace_bprintk_fmt != __start___trace_bprintk_fmt)
8485 		/* Must be called before global_trace.buffer is allocated */
8486 		trace_printk_init_buffers();
8487 
8488 	/* To save memory, keep the ring buffer size to its minimum */
8489 	if (ring_buffer_expanded)
8490 		ring_buf_size = trace_buf_size;
8491 	else
8492 		ring_buf_size = 1;
8493 
8494 	cpumask_copy(tracing_buffer_mask, cpu_possible_mask);
8495 	cpumask_copy(global_trace.tracing_cpumask, cpu_all_mask);
8496 
8497 	raw_spin_lock_init(&global_trace.start_lock);
8498 
8499 	/*
8500 	 * The prepare callbacks allocates some memory for the ring buffer. We
8501 	 * don't free the buffer if the if the CPU goes down. If we were to free
8502 	 * the buffer, then the user would lose any trace that was in the
8503 	 * buffer. The memory will be removed once the "instance" is removed.
8504 	 */
8505 	ret = cpuhp_setup_state_multi(CPUHP_TRACE_RB_PREPARE,
8506 				      "trace/RB:preapre", trace_rb_cpu_prepare,
8507 				      NULL);
8508 	if (ret < 0)
8509 		goto out_free_cpumask;
8510 	/* Used for event triggers */
8511 	ret = -ENOMEM;
8512 	temp_buffer = ring_buffer_alloc(PAGE_SIZE, RB_FL_OVERWRITE);
8513 	if (!temp_buffer)
8514 		goto out_rm_hp_state;
8515 
8516 	if (trace_create_savedcmd() < 0)
8517 		goto out_free_temp_buffer;
8518 
8519 	/* TODO: make the number of buffers hot pluggable with CPUS */
8520 	if (allocate_trace_buffers(&global_trace, ring_buf_size) < 0) {
8521 		printk(KERN_ERR "tracer: failed to allocate ring buffer!\n");
8522 		WARN_ON(1);
8523 		goto out_free_savedcmd;
8524 	}
8525 
8526 	if (global_trace.buffer_disabled)
8527 		tracing_off();
8528 
8529 	if (trace_boot_clock) {
8530 		ret = tracing_set_clock(&global_trace, trace_boot_clock);
8531 		if (ret < 0)
8532 			pr_warn("Trace clock %s not defined, going back to default\n",
8533 				trace_boot_clock);
8534 	}
8535 
8536 	/*
8537 	 * register_tracer() might reference current_trace, so it
8538 	 * needs to be set before we register anything. This is
8539 	 * just a bootstrap of current_trace anyway.
8540 	 */
8541 	global_trace.current_trace = &nop_trace;
8542 
8543 	global_trace.max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
8544 
8545 	ftrace_init_global_array_ops(&global_trace);
8546 
8547 	init_trace_flags_index(&global_trace);
8548 
8549 	register_tracer(&nop_trace);
8550 
8551 	/* Function tracing may start here (via kernel command line) */
8552 	init_function_trace();
8553 
8554 	/* All seems OK, enable tracing */
8555 	tracing_disabled = 0;
8556 
8557 	atomic_notifier_chain_register(&panic_notifier_list,
8558 				       &trace_panic_notifier);
8559 
8560 	register_die_notifier(&trace_die_notifier);
8561 
8562 	global_trace.flags = TRACE_ARRAY_FL_GLOBAL;
8563 
8564 	INIT_LIST_HEAD(&global_trace.systems);
8565 	INIT_LIST_HEAD(&global_trace.events);
8566 	INIT_LIST_HEAD(&global_trace.hist_vars);
8567 	list_add(&global_trace.list, &ftrace_trace_arrays);
8568 
8569 	apply_trace_boot_options();
8570 
8571 	register_snapshot_cmd();
8572 
8573 	return 0;
8574 
8575 out_free_savedcmd:
8576 	free_saved_cmdlines_buffer(savedcmd);
8577 out_free_temp_buffer:
8578 	ring_buffer_free(temp_buffer);
8579 out_rm_hp_state:
8580 	cpuhp_remove_multi_state(CPUHP_TRACE_RB_PREPARE);
8581 out_free_cpumask:
8582 	free_cpumask_var(global_trace.tracing_cpumask);
8583 out_free_buffer_mask:
8584 	free_cpumask_var(tracing_buffer_mask);
8585 out:
8586 	return ret;
8587 }
8588 
8589 void __init early_trace_init(void)
8590 {
8591 	if (tracepoint_printk) {
8592 		tracepoint_print_iter =
8593 			kmalloc(sizeof(*tracepoint_print_iter), GFP_KERNEL);
8594 		if (WARN_ON(!tracepoint_print_iter))
8595 			tracepoint_printk = 0;
8596 		else
8597 			static_key_enable(&tracepoint_printk_key.key);
8598 	}
8599 	tracer_alloc_buffers();
8600 }
8601 
8602 void __init trace_init(void)
8603 {
8604 	trace_event_init();
8605 }
8606 
8607 __init static int clear_boot_tracer(void)
8608 {
8609 	/*
8610 	 * The default tracer at boot buffer is an init section.
8611 	 * This function is called in lateinit. If we did not
8612 	 * find the boot tracer, then clear it out, to prevent
8613 	 * later registration from accessing the buffer that is
8614 	 * about to be freed.
8615 	 */
8616 	if (!default_bootup_tracer)
8617 		return 0;
8618 
8619 	printk(KERN_INFO "ftrace bootup tracer '%s' not registered.\n",
8620 	       default_bootup_tracer);
8621 	default_bootup_tracer = NULL;
8622 
8623 	return 0;
8624 }
8625 
8626 fs_initcall(tracer_init_tracefs);
8627 late_initcall_sync(clear_boot_tracer);
8628 
8629 #ifdef CONFIG_HAVE_UNSTABLE_SCHED_CLOCK
8630 __init static int tracing_set_default_clock(void)
8631 {
8632 	/* sched_clock_stable() is determined in late_initcall */
8633 	if (!trace_boot_clock && !sched_clock_stable()) {
8634 		printk(KERN_WARNING
8635 		       "Unstable clock detected, switching default tracing clock to \"global\"\n"
8636 		       "If you want to keep using the local clock, then add:\n"
8637 		       "  \"trace_clock=local\"\n"
8638 		       "on the kernel command line\n");
8639 		tracing_set_clock(&global_trace, "global");
8640 	}
8641 
8642 	return 0;
8643 }
8644 late_initcall_sync(tracing_set_default_clock);
8645 #endif
8646