xref: /linux/kernel/trace/trace.c (revision 895931232d9358e0016f580f26b336c29c9528cc)
1 /*
2  * ring buffer based function tracer
3  *
4  * Copyright (C) 2007-2012 Steven Rostedt <srostedt@redhat.com>
5  * Copyright (C) 2008 Ingo Molnar <mingo@redhat.com>
6  *
7  * Originally taken from the RT patch by:
8  *    Arnaldo Carvalho de Melo <acme@redhat.com>
9  *
10  * Based on code from the latency_tracer, that is:
11  *  Copyright (C) 2004-2006 Ingo Molnar
12  *  Copyright (C) 2004 Nadia Yvette Chambers
13  */
14 #include <linux/ring_buffer.h>
15 #include <generated/utsrelease.h>
16 #include <linux/stacktrace.h>
17 #include <linux/writeback.h>
18 #include <linux/kallsyms.h>
19 #include <linux/seq_file.h>
20 #include <linux/notifier.h>
21 #include <linux/irqflags.h>
22 #include <linux/debugfs.h>
23 #include <linux/tracefs.h>
24 #include <linux/pagemap.h>
25 #include <linux/hardirq.h>
26 #include <linux/linkage.h>
27 #include <linux/uaccess.h>
28 #include <linux/vmalloc.h>
29 #include <linux/ftrace.h>
30 #include <linux/module.h>
31 #include <linux/percpu.h>
32 #include <linux/splice.h>
33 #include <linux/kdebug.h>
34 #include <linux/string.h>
35 #include <linux/mount.h>
36 #include <linux/rwsem.h>
37 #include <linux/slab.h>
38 #include <linux/ctype.h>
39 #include <linux/init.h>
40 #include <linux/poll.h>
41 #include <linux/nmi.h>
42 #include <linux/fs.h>
43 #include <linux/trace.h>
44 #include <linux/sched/rt.h>
45 
46 #include "trace.h"
47 #include "trace_output.h"
48 
49 /*
50  * On boot up, the ring buffer is set to the minimum size, so that
51  * we do not waste memory on systems that are not using tracing.
52  */
53 bool ring_buffer_expanded;
54 
55 /*
56  * We need to change this state when a selftest is running.
57  * A selftest will lurk into the ring-buffer to count the
58  * entries inserted during the selftest although some concurrent
59  * insertions into the ring-buffer such as trace_printk could occurred
60  * at the same time, giving false positive or negative results.
61  */
62 static bool __read_mostly tracing_selftest_running;
63 
64 /*
65  * If a tracer is running, we do not want to run SELFTEST.
66  */
67 bool __read_mostly tracing_selftest_disabled;
68 
69 /* Pipe tracepoints to printk */
70 struct trace_iterator *tracepoint_print_iter;
71 int tracepoint_printk;
72 static DEFINE_STATIC_KEY_FALSE(tracepoint_printk_key);
73 
74 /* For tracers that don't implement custom flags */
75 static struct tracer_opt dummy_tracer_opt[] = {
76 	{ }
77 };
78 
79 static int
80 dummy_set_flag(struct trace_array *tr, u32 old_flags, u32 bit, int set)
81 {
82 	return 0;
83 }
84 
85 /*
86  * To prevent the comm cache from being overwritten when no
87  * tracing is active, only save the comm when a trace event
88  * occurred.
89  */
90 static DEFINE_PER_CPU(bool, trace_taskinfo_save);
91 
92 /*
93  * Kill all tracing for good (never come back).
94  * It is initialized to 1 but will turn to zero if the initialization
95  * of the tracer is successful. But that is the only place that sets
96  * this back to zero.
97  */
98 static int tracing_disabled = 1;
99 
100 cpumask_var_t __read_mostly	tracing_buffer_mask;
101 
102 /*
103  * ftrace_dump_on_oops - variable to dump ftrace buffer on oops
104  *
105  * If there is an oops (or kernel panic) and the ftrace_dump_on_oops
106  * is set, then ftrace_dump is called. This will output the contents
107  * of the ftrace buffers to the console.  This is very useful for
108  * capturing traces that lead to crashes and outputing it to a
109  * serial console.
110  *
111  * It is default off, but you can enable it with either specifying
112  * "ftrace_dump_on_oops" in the kernel command line, or setting
113  * /proc/sys/kernel/ftrace_dump_on_oops
114  * Set 1 if you want to dump buffers of all CPUs
115  * Set 2 if you want to dump the buffer of the CPU that triggered oops
116  */
117 
118 enum ftrace_dump_mode ftrace_dump_on_oops;
119 
120 /* When set, tracing will stop when a WARN*() is hit */
121 int __disable_trace_on_warning;
122 
123 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
124 /* Map of enums to their values, for "eval_map" file */
125 struct trace_eval_map_head {
126 	struct module			*mod;
127 	unsigned long			length;
128 };
129 
130 union trace_eval_map_item;
131 
132 struct trace_eval_map_tail {
133 	/*
134 	 * "end" is first and points to NULL as it must be different
135 	 * than "mod" or "eval_string"
136 	 */
137 	union trace_eval_map_item	*next;
138 	const char			*end;	/* points to NULL */
139 };
140 
141 static DEFINE_MUTEX(trace_eval_mutex);
142 
143 /*
144  * The trace_eval_maps are saved in an array with two extra elements,
145  * one at the beginning, and one at the end. The beginning item contains
146  * the count of the saved maps (head.length), and the module they
147  * belong to if not built in (head.mod). The ending item contains a
148  * pointer to the next array of saved eval_map items.
149  */
150 union trace_eval_map_item {
151 	struct trace_eval_map		map;
152 	struct trace_eval_map_head	head;
153 	struct trace_eval_map_tail	tail;
154 };
155 
156 static union trace_eval_map_item *trace_eval_maps;
157 #endif /* CONFIG_TRACE_EVAL_MAP_FILE */
158 
159 static int tracing_set_tracer(struct trace_array *tr, const char *buf);
160 
161 #define MAX_TRACER_SIZE		100
162 static char bootup_tracer_buf[MAX_TRACER_SIZE] __initdata;
163 static char *default_bootup_tracer;
164 
165 static bool allocate_snapshot;
166 
167 static int __init set_cmdline_ftrace(char *str)
168 {
169 	strlcpy(bootup_tracer_buf, str, MAX_TRACER_SIZE);
170 	default_bootup_tracer = bootup_tracer_buf;
171 	/* We are using ftrace early, expand it */
172 	ring_buffer_expanded = true;
173 	return 1;
174 }
175 __setup("ftrace=", set_cmdline_ftrace);
176 
177 static int __init set_ftrace_dump_on_oops(char *str)
178 {
179 	if (*str++ != '=' || !*str) {
180 		ftrace_dump_on_oops = DUMP_ALL;
181 		return 1;
182 	}
183 
184 	if (!strcmp("orig_cpu", str)) {
185 		ftrace_dump_on_oops = DUMP_ORIG;
186                 return 1;
187         }
188 
189         return 0;
190 }
191 __setup("ftrace_dump_on_oops", set_ftrace_dump_on_oops);
192 
193 static int __init stop_trace_on_warning(char *str)
194 {
195 	if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
196 		__disable_trace_on_warning = 1;
197 	return 1;
198 }
199 __setup("traceoff_on_warning", stop_trace_on_warning);
200 
201 static int __init boot_alloc_snapshot(char *str)
202 {
203 	allocate_snapshot = true;
204 	/* We also need the main ring buffer expanded */
205 	ring_buffer_expanded = true;
206 	return 1;
207 }
208 __setup("alloc_snapshot", boot_alloc_snapshot);
209 
210 
211 static char trace_boot_options_buf[MAX_TRACER_SIZE] __initdata;
212 
213 static int __init set_trace_boot_options(char *str)
214 {
215 	strlcpy(trace_boot_options_buf, str, MAX_TRACER_SIZE);
216 	return 0;
217 }
218 __setup("trace_options=", set_trace_boot_options);
219 
220 static char trace_boot_clock_buf[MAX_TRACER_SIZE] __initdata;
221 static char *trace_boot_clock __initdata;
222 
223 static int __init set_trace_boot_clock(char *str)
224 {
225 	strlcpy(trace_boot_clock_buf, str, MAX_TRACER_SIZE);
226 	trace_boot_clock = trace_boot_clock_buf;
227 	return 0;
228 }
229 __setup("trace_clock=", set_trace_boot_clock);
230 
231 static int __init set_tracepoint_printk(char *str)
232 {
233 	if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
234 		tracepoint_printk = 1;
235 	return 1;
236 }
237 __setup("tp_printk", set_tracepoint_printk);
238 
239 unsigned long long ns2usecs(u64 nsec)
240 {
241 	nsec += 500;
242 	do_div(nsec, 1000);
243 	return nsec;
244 }
245 
246 /* trace_flags holds trace_options default values */
247 #define TRACE_DEFAULT_FLAGS						\
248 	(FUNCTION_DEFAULT_FLAGS |					\
249 	 TRACE_ITER_PRINT_PARENT | TRACE_ITER_PRINTK |			\
250 	 TRACE_ITER_ANNOTATE | TRACE_ITER_CONTEXT_INFO |		\
251 	 TRACE_ITER_RECORD_CMD | TRACE_ITER_OVERWRITE |			\
252 	 TRACE_ITER_IRQ_INFO | TRACE_ITER_MARKERS)
253 
254 /* trace_options that are only supported by global_trace */
255 #define TOP_LEVEL_TRACE_FLAGS (TRACE_ITER_PRINTK |			\
256 	       TRACE_ITER_PRINTK_MSGONLY | TRACE_ITER_RECORD_CMD)
257 
258 /* trace_flags that are default zero for instances */
259 #define ZEROED_TRACE_FLAGS \
260 	(TRACE_ITER_EVENT_FORK | TRACE_ITER_FUNC_FORK)
261 
262 /*
263  * The global_trace is the descriptor that holds the top-level tracing
264  * buffers for the live tracing.
265  */
266 static struct trace_array global_trace = {
267 	.trace_flags = TRACE_DEFAULT_FLAGS,
268 };
269 
270 LIST_HEAD(ftrace_trace_arrays);
271 
272 int trace_array_get(struct trace_array *this_tr)
273 {
274 	struct trace_array *tr;
275 	int ret = -ENODEV;
276 
277 	mutex_lock(&trace_types_lock);
278 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
279 		if (tr == this_tr) {
280 			tr->ref++;
281 			ret = 0;
282 			break;
283 		}
284 	}
285 	mutex_unlock(&trace_types_lock);
286 
287 	return ret;
288 }
289 
290 static void __trace_array_put(struct trace_array *this_tr)
291 {
292 	WARN_ON(!this_tr->ref);
293 	this_tr->ref--;
294 }
295 
296 void trace_array_put(struct trace_array *this_tr)
297 {
298 	mutex_lock(&trace_types_lock);
299 	__trace_array_put(this_tr);
300 	mutex_unlock(&trace_types_lock);
301 }
302 
303 int call_filter_check_discard(struct trace_event_call *call, void *rec,
304 			      struct ring_buffer *buffer,
305 			      struct ring_buffer_event *event)
306 {
307 	if (unlikely(call->flags & TRACE_EVENT_FL_FILTERED) &&
308 	    !filter_match_preds(call->filter, rec)) {
309 		__trace_event_discard_commit(buffer, event);
310 		return 1;
311 	}
312 
313 	return 0;
314 }
315 
316 void trace_free_pid_list(struct trace_pid_list *pid_list)
317 {
318 	vfree(pid_list->pids);
319 	kfree(pid_list);
320 }
321 
322 /**
323  * trace_find_filtered_pid - check if a pid exists in a filtered_pid list
324  * @filtered_pids: The list of pids to check
325  * @search_pid: The PID to find in @filtered_pids
326  *
327  * Returns true if @search_pid is fonud in @filtered_pids, and false otherwis.
328  */
329 bool
330 trace_find_filtered_pid(struct trace_pid_list *filtered_pids, pid_t search_pid)
331 {
332 	/*
333 	 * If pid_max changed after filtered_pids was created, we
334 	 * by default ignore all pids greater than the previous pid_max.
335 	 */
336 	if (search_pid >= filtered_pids->pid_max)
337 		return false;
338 
339 	return test_bit(search_pid, filtered_pids->pids);
340 }
341 
342 /**
343  * trace_ignore_this_task - should a task be ignored for tracing
344  * @filtered_pids: The list of pids to check
345  * @task: The task that should be ignored if not filtered
346  *
347  * Checks if @task should be traced or not from @filtered_pids.
348  * Returns true if @task should *NOT* be traced.
349  * Returns false if @task should be traced.
350  */
351 bool
352 trace_ignore_this_task(struct trace_pid_list *filtered_pids, struct task_struct *task)
353 {
354 	/*
355 	 * Return false, because if filtered_pids does not exist,
356 	 * all pids are good to trace.
357 	 */
358 	if (!filtered_pids)
359 		return false;
360 
361 	return !trace_find_filtered_pid(filtered_pids, task->pid);
362 }
363 
364 /**
365  * trace_pid_filter_add_remove - Add or remove a task from a pid_list
366  * @pid_list: The list to modify
367  * @self: The current task for fork or NULL for exit
368  * @task: The task to add or remove
369  *
370  * If adding a task, if @self is defined, the task is only added if @self
371  * is also included in @pid_list. This happens on fork and tasks should
372  * only be added when the parent is listed. If @self is NULL, then the
373  * @task pid will be removed from the list, which would happen on exit
374  * of a task.
375  */
376 void trace_filter_add_remove_task(struct trace_pid_list *pid_list,
377 				  struct task_struct *self,
378 				  struct task_struct *task)
379 {
380 	if (!pid_list)
381 		return;
382 
383 	/* For forks, we only add if the forking task is listed */
384 	if (self) {
385 		if (!trace_find_filtered_pid(pid_list, self->pid))
386 			return;
387 	}
388 
389 	/* Sorry, but we don't support pid_max changing after setting */
390 	if (task->pid >= pid_list->pid_max)
391 		return;
392 
393 	/* "self" is set for forks, and NULL for exits */
394 	if (self)
395 		set_bit(task->pid, pid_list->pids);
396 	else
397 		clear_bit(task->pid, pid_list->pids);
398 }
399 
400 /**
401  * trace_pid_next - Used for seq_file to get to the next pid of a pid_list
402  * @pid_list: The pid list to show
403  * @v: The last pid that was shown (+1 the actual pid to let zero be displayed)
404  * @pos: The position of the file
405  *
406  * This is used by the seq_file "next" operation to iterate the pids
407  * listed in a trace_pid_list structure.
408  *
409  * Returns the pid+1 as we want to display pid of zero, but NULL would
410  * stop the iteration.
411  */
412 void *trace_pid_next(struct trace_pid_list *pid_list, void *v, loff_t *pos)
413 {
414 	unsigned long pid = (unsigned long)v;
415 
416 	(*pos)++;
417 
418 	/* pid already is +1 of the actual prevous bit */
419 	pid = find_next_bit(pid_list->pids, pid_list->pid_max, pid);
420 
421 	/* Return pid + 1 to allow zero to be represented */
422 	if (pid < pid_list->pid_max)
423 		return (void *)(pid + 1);
424 
425 	return NULL;
426 }
427 
428 /**
429  * trace_pid_start - Used for seq_file to start reading pid lists
430  * @pid_list: The pid list to show
431  * @pos: The position of the file
432  *
433  * This is used by seq_file "start" operation to start the iteration
434  * of listing pids.
435  *
436  * Returns the pid+1 as we want to display pid of zero, but NULL would
437  * stop the iteration.
438  */
439 void *trace_pid_start(struct trace_pid_list *pid_list, loff_t *pos)
440 {
441 	unsigned long pid;
442 	loff_t l = 0;
443 
444 	pid = find_first_bit(pid_list->pids, pid_list->pid_max);
445 	if (pid >= pid_list->pid_max)
446 		return NULL;
447 
448 	/* Return pid + 1 so that zero can be the exit value */
449 	for (pid++; pid && l < *pos;
450 	     pid = (unsigned long)trace_pid_next(pid_list, (void *)pid, &l))
451 		;
452 	return (void *)pid;
453 }
454 
455 /**
456  * trace_pid_show - show the current pid in seq_file processing
457  * @m: The seq_file structure to write into
458  * @v: A void pointer of the pid (+1) value to display
459  *
460  * Can be directly used by seq_file operations to display the current
461  * pid value.
462  */
463 int trace_pid_show(struct seq_file *m, void *v)
464 {
465 	unsigned long pid = (unsigned long)v - 1;
466 
467 	seq_printf(m, "%lu\n", pid);
468 	return 0;
469 }
470 
471 /* 128 should be much more than enough */
472 #define PID_BUF_SIZE		127
473 
474 int trace_pid_write(struct trace_pid_list *filtered_pids,
475 		    struct trace_pid_list **new_pid_list,
476 		    const char __user *ubuf, size_t cnt)
477 {
478 	struct trace_pid_list *pid_list;
479 	struct trace_parser parser;
480 	unsigned long val;
481 	int nr_pids = 0;
482 	ssize_t read = 0;
483 	ssize_t ret = 0;
484 	loff_t pos;
485 	pid_t pid;
486 
487 	if (trace_parser_get_init(&parser, PID_BUF_SIZE + 1))
488 		return -ENOMEM;
489 
490 	/*
491 	 * Always recreate a new array. The write is an all or nothing
492 	 * operation. Always create a new array when adding new pids by
493 	 * the user. If the operation fails, then the current list is
494 	 * not modified.
495 	 */
496 	pid_list = kmalloc(sizeof(*pid_list), GFP_KERNEL);
497 	if (!pid_list)
498 		return -ENOMEM;
499 
500 	pid_list->pid_max = READ_ONCE(pid_max);
501 
502 	/* Only truncating will shrink pid_max */
503 	if (filtered_pids && filtered_pids->pid_max > pid_list->pid_max)
504 		pid_list->pid_max = filtered_pids->pid_max;
505 
506 	pid_list->pids = vzalloc((pid_list->pid_max + 7) >> 3);
507 	if (!pid_list->pids) {
508 		kfree(pid_list);
509 		return -ENOMEM;
510 	}
511 
512 	if (filtered_pids) {
513 		/* copy the current bits to the new max */
514 		for_each_set_bit(pid, filtered_pids->pids,
515 				 filtered_pids->pid_max) {
516 			set_bit(pid, pid_list->pids);
517 			nr_pids++;
518 		}
519 	}
520 
521 	while (cnt > 0) {
522 
523 		pos = 0;
524 
525 		ret = trace_get_user(&parser, ubuf, cnt, &pos);
526 		if (ret < 0 || !trace_parser_loaded(&parser))
527 			break;
528 
529 		read += ret;
530 		ubuf += ret;
531 		cnt -= ret;
532 
533 		parser.buffer[parser.idx] = 0;
534 
535 		ret = -EINVAL;
536 		if (kstrtoul(parser.buffer, 0, &val))
537 			break;
538 		if (val >= pid_list->pid_max)
539 			break;
540 
541 		pid = (pid_t)val;
542 
543 		set_bit(pid, pid_list->pids);
544 		nr_pids++;
545 
546 		trace_parser_clear(&parser);
547 		ret = 0;
548 	}
549 	trace_parser_put(&parser);
550 
551 	if (ret < 0) {
552 		trace_free_pid_list(pid_list);
553 		return ret;
554 	}
555 
556 	if (!nr_pids) {
557 		/* Cleared the list of pids */
558 		trace_free_pid_list(pid_list);
559 		read = ret;
560 		pid_list = NULL;
561 	}
562 
563 	*new_pid_list = pid_list;
564 
565 	return read;
566 }
567 
568 static u64 buffer_ftrace_now(struct trace_buffer *buf, int cpu)
569 {
570 	u64 ts;
571 
572 	/* Early boot up does not have a buffer yet */
573 	if (!buf->buffer)
574 		return trace_clock_local();
575 
576 	ts = ring_buffer_time_stamp(buf->buffer, cpu);
577 	ring_buffer_normalize_time_stamp(buf->buffer, cpu, &ts);
578 
579 	return ts;
580 }
581 
582 u64 ftrace_now(int cpu)
583 {
584 	return buffer_ftrace_now(&global_trace.trace_buffer, cpu);
585 }
586 
587 /**
588  * tracing_is_enabled - Show if global_trace has been disabled
589  *
590  * Shows if the global trace has been enabled or not. It uses the
591  * mirror flag "buffer_disabled" to be used in fast paths such as for
592  * the irqsoff tracer. But it may be inaccurate due to races. If you
593  * need to know the accurate state, use tracing_is_on() which is a little
594  * slower, but accurate.
595  */
596 int tracing_is_enabled(void)
597 {
598 	/*
599 	 * For quick access (irqsoff uses this in fast path), just
600 	 * return the mirror variable of the state of the ring buffer.
601 	 * It's a little racy, but we don't really care.
602 	 */
603 	smp_rmb();
604 	return !global_trace.buffer_disabled;
605 }
606 
607 /*
608  * trace_buf_size is the size in bytes that is allocated
609  * for a buffer. Note, the number of bytes is always rounded
610  * to page size.
611  *
612  * This number is purposely set to a low number of 16384.
613  * If the dump on oops happens, it will be much appreciated
614  * to not have to wait for all that output. Anyway this can be
615  * boot time and run time configurable.
616  */
617 #define TRACE_BUF_SIZE_DEFAULT	1441792UL /* 16384 * 88 (sizeof(entry)) */
618 
619 static unsigned long		trace_buf_size = TRACE_BUF_SIZE_DEFAULT;
620 
621 /* trace_types holds a link list of available tracers. */
622 static struct tracer		*trace_types __read_mostly;
623 
624 /*
625  * trace_types_lock is used to protect the trace_types list.
626  */
627 DEFINE_MUTEX(trace_types_lock);
628 
629 /*
630  * serialize the access of the ring buffer
631  *
632  * ring buffer serializes readers, but it is low level protection.
633  * The validity of the events (which returns by ring_buffer_peek() ..etc)
634  * are not protected by ring buffer.
635  *
636  * The content of events may become garbage if we allow other process consumes
637  * these events concurrently:
638  *   A) the page of the consumed events may become a normal page
639  *      (not reader page) in ring buffer, and this page will be rewrited
640  *      by events producer.
641  *   B) The page of the consumed events may become a page for splice_read,
642  *      and this page will be returned to system.
643  *
644  * These primitives allow multi process access to different cpu ring buffer
645  * concurrently.
646  *
647  * These primitives don't distinguish read-only and read-consume access.
648  * Multi read-only access are also serialized.
649  */
650 
651 #ifdef CONFIG_SMP
652 static DECLARE_RWSEM(all_cpu_access_lock);
653 static DEFINE_PER_CPU(struct mutex, cpu_access_lock);
654 
655 static inline void trace_access_lock(int cpu)
656 {
657 	if (cpu == RING_BUFFER_ALL_CPUS) {
658 		/* gain it for accessing the whole ring buffer. */
659 		down_write(&all_cpu_access_lock);
660 	} else {
661 		/* gain it for accessing a cpu ring buffer. */
662 
663 		/* Firstly block other trace_access_lock(RING_BUFFER_ALL_CPUS). */
664 		down_read(&all_cpu_access_lock);
665 
666 		/* Secondly block other access to this @cpu ring buffer. */
667 		mutex_lock(&per_cpu(cpu_access_lock, cpu));
668 	}
669 }
670 
671 static inline void trace_access_unlock(int cpu)
672 {
673 	if (cpu == RING_BUFFER_ALL_CPUS) {
674 		up_write(&all_cpu_access_lock);
675 	} else {
676 		mutex_unlock(&per_cpu(cpu_access_lock, cpu));
677 		up_read(&all_cpu_access_lock);
678 	}
679 }
680 
681 static inline void trace_access_lock_init(void)
682 {
683 	int cpu;
684 
685 	for_each_possible_cpu(cpu)
686 		mutex_init(&per_cpu(cpu_access_lock, cpu));
687 }
688 
689 #else
690 
691 static DEFINE_MUTEX(access_lock);
692 
693 static inline void trace_access_lock(int cpu)
694 {
695 	(void)cpu;
696 	mutex_lock(&access_lock);
697 }
698 
699 static inline void trace_access_unlock(int cpu)
700 {
701 	(void)cpu;
702 	mutex_unlock(&access_lock);
703 }
704 
705 static inline void trace_access_lock_init(void)
706 {
707 }
708 
709 #endif
710 
711 #ifdef CONFIG_STACKTRACE
712 static void __ftrace_trace_stack(struct ring_buffer *buffer,
713 				 unsigned long flags,
714 				 int skip, int pc, struct pt_regs *regs);
715 static inline void ftrace_trace_stack(struct trace_array *tr,
716 				      struct ring_buffer *buffer,
717 				      unsigned long flags,
718 				      int skip, int pc, struct pt_regs *regs);
719 
720 #else
721 static inline void __ftrace_trace_stack(struct ring_buffer *buffer,
722 					unsigned long flags,
723 					int skip, int pc, struct pt_regs *regs)
724 {
725 }
726 static inline void ftrace_trace_stack(struct trace_array *tr,
727 				      struct ring_buffer *buffer,
728 				      unsigned long flags,
729 				      int skip, int pc, struct pt_regs *regs)
730 {
731 }
732 
733 #endif
734 
735 static __always_inline void
736 trace_event_setup(struct ring_buffer_event *event,
737 		  int type, unsigned long flags, int pc)
738 {
739 	struct trace_entry *ent = ring_buffer_event_data(event);
740 
741 	tracing_generic_entry_update(ent, flags, pc);
742 	ent->type = type;
743 }
744 
745 static __always_inline struct ring_buffer_event *
746 __trace_buffer_lock_reserve(struct ring_buffer *buffer,
747 			  int type,
748 			  unsigned long len,
749 			  unsigned long flags, int pc)
750 {
751 	struct ring_buffer_event *event;
752 
753 	event = ring_buffer_lock_reserve(buffer, len);
754 	if (event != NULL)
755 		trace_event_setup(event, type, flags, pc);
756 
757 	return event;
758 }
759 
760 void tracer_tracing_on(struct trace_array *tr)
761 {
762 	if (tr->trace_buffer.buffer)
763 		ring_buffer_record_on(tr->trace_buffer.buffer);
764 	/*
765 	 * This flag is looked at when buffers haven't been allocated
766 	 * yet, or by some tracers (like irqsoff), that just want to
767 	 * know if the ring buffer has been disabled, but it can handle
768 	 * races of where it gets disabled but we still do a record.
769 	 * As the check is in the fast path of the tracers, it is more
770 	 * important to be fast than accurate.
771 	 */
772 	tr->buffer_disabled = 0;
773 	/* Make the flag seen by readers */
774 	smp_wmb();
775 }
776 
777 /**
778  * tracing_on - enable tracing buffers
779  *
780  * This function enables tracing buffers that may have been
781  * disabled with tracing_off.
782  */
783 void tracing_on(void)
784 {
785 	tracer_tracing_on(&global_trace);
786 }
787 EXPORT_SYMBOL_GPL(tracing_on);
788 
789 
790 static __always_inline void
791 __buffer_unlock_commit(struct ring_buffer *buffer, struct ring_buffer_event *event)
792 {
793 	__this_cpu_write(trace_taskinfo_save, true);
794 
795 	/* If this is the temp buffer, we need to commit fully */
796 	if (this_cpu_read(trace_buffered_event) == event) {
797 		/* Length is in event->array[0] */
798 		ring_buffer_write(buffer, event->array[0], &event->array[1]);
799 		/* Release the temp buffer */
800 		this_cpu_dec(trace_buffered_event_cnt);
801 	} else
802 		ring_buffer_unlock_commit(buffer, event);
803 }
804 
805 /**
806  * __trace_puts - write a constant string into the trace buffer.
807  * @ip:	   The address of the caller
808  * @str:   The constant string to write
809  * @size:  The size of the string.
810  */
811 int __trace_puts(unsigned long ip, const char *str, int size)
812 {
813 	struct ring_buffer_event *event;
814 	struct ring_buffer *buffer;
815 	struct print_entry *entry;
816 	unsigned long irq_flags;
817 	int alloc;
818 	int pc;
819 
820 	if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
821 		return 0;
822 
823 	pc = preempt_count();
824 
825 	if (unlikely(tracing_selftest_running || tracing_disabled))
826 		return 0;
827 
828 	alloc = sizeof(*entry) + size + 2; /* possible \n added */
829 
830 	local_save_flags(irq_flags);
831 	buffer = global_trace.trace_buffer.buffer;
832 	event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, alloc,
833 					    irq_flags, pc);
834 	if (!event)
835 		return 0;
836 
837 	entry = ring_buffer_event_data(event);
838 	entry->ip = ip;
839 
840 	memcpy(&entry->buf, str, size);
841 
842 	/* Add a newline if necessary */
843 	if (entry->buf[size - 1] != '\n') {
844 		entry->buf[size] = '\n';
845 		entry->buf[size + 1] = '\0';
846 	} else
847 		entry->buf[size] = '\0';
848 
849 	__buffer_unlock_commit(buffer, event);
850 	ftrace_trace_stack(&global_trace, buffer, irq_flags, 4, pc, NULL);
851 
852 	return size;
853 }
854 EXPORT_SYMBOL_GPL(__trace_puts);
855 
856 /**
857  * __trace_bputs - write the pointer to a constant string into trace buffer
858  * @ip:	   The address of the caller
859  * @str:   The constant string to write to the buffer to
860  */
861 int __trace_bputs(unsigned long ip, const char *str)
862 {
863 	struct ring_buffer_event *event;
864 	struct ring_buffer *buffer;
865 	struct bputs_entry *entry;
866 	unsigned long irq_flags;
867 	int size = sizeof(struct bputs_entry);
868 	int pc;
869 
870 	if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
871 		return 0;
872 
873 	pc = preempt_count();
874 
875 	if (unlikely(tracing_selftest_running || tracing_disabled))
876 		return 0;
877 
878 	local_save_flags(irq_flags);
879 	buffer = global_trace.trace_buffer.buffer;
880 	event = __trace_buffer_lock_reserve(buffer, TRACE_BPUTS, size,
881 					    irq_flags, pc);
882 	if (!event)
883 		return 0;
884 
885 	entry = ring_buffer_event_data(event);
886 	entry->ip			= ip;
887 	entry->str			= str;
888 
889 	__buffer_unlock_commit(buffer, event);
890 	ftrace_trace_stack(&global_trace, buffer, irq_flags, 4, pc, NULL);
891 
892 	return 1;
893 }
894 EXPORT_SYMBOL_GPL(__trace_bputs);
895 
896 #ifdef CONFIG_TRACER_SNAPSHOT
897 static void tracing_snapshot_instance(struct trace_array *tr)
898 {
899 	struct tracer *tracer = tr->current_trace;
900 	unsigned long flags;
901 
902 	if (in_nmi()) {
903 		internal_trace_puts("*** SNAPSHOT CALLED FROM NMI CONTEXT ***\n");
904 		internal_trace_puts("*** snapshot is being ignored        ***\n");
905 		return;
906 	}
907 
908 	if (!tr->allocated_snapshot) {
909 		internal_trace_puts("*** SNAPSHOT NOT ALLOCATED ***\n");
910 		internal_trace_puts("*** stopping trace here!   ***\n");
911 		tracing_off();
912 		return;
913 	}
914 
915 	/* Note, snapshot can not be used when the tracer uses it */
916 	if (tracer->use_max_tr) {
917 		internal_trace_puts("*** LATENCY TRACER ACTIVE ***\n");
918 		internal_trace_puts("*** Can not use snapshot (sorry) ***\n");
919 		return;
920 	}
921 
922 	local_irq_save(flags);
923 	update_max_tr(tr, current, smp_processor_id());
924 	local_irq_restore(flags);
925 }
926 
927 /**
928  * trace_snapshot - take a snapshot of the current buffer.
929  *
930  * This causes a swap between the snapshot buffer and the current live
931  * tracing buffer. You can use this to take snapshots of the live
932  * trace when some condition is triggered, but continue to trace.
933  *
934  * Note, make sure to allocate the snapshot with either
935  * a tracing_snapshot_alloc(), or by doing it manually
936  * with: echo 1 > /sys/kernel/debug/tracing/snapshot
937  *
938  * If the snapshot buffer is not allocated, it will stop tracing.
939  * Basically making a permanent snapshot.
940  */
941 void tracing_snapshot(void)
942 {
943 	struct trace_array *tr = &global_trace;
944 
945 	tracing_snapshot_instance(tr);
946 }
947 EXPORT_SYMBOL_GPL(tracing_snapshot);
948 
949 static int resize_buffer_duplicate_size(struct trace_buffer *trace_buf,
950 					struct trace_buffer *size_buf, int cpu_id);
951 static void set_buffer_entries(struct trace_buffer *buf, unsigned long val);
952 
953 static int alloc_snapshot(struct trace_array *tr)
954 {
955 	int ret;
956 
957 	if (!tr->allocated_snapshot) {
958 
959 		/* allocate spare buffer */
960 		ret = resize_buffer_duplicate_size(&tr->max_buffer,
961 				   &tr->trace_buffer, RING_BUFFER_ALL_CPUS);
962 		if (ret < 0)
963 			return ret;
964 
965 		tr->allocated_snapshot = true;
966 	}
967 
968 	return 0;
969 }
970 
971 static void free_snapshot(struct trace_array *tr)
972 {
973 	/*
974 	 * We don't free the ring buffer. instead, resize it because
975 	 * The max_tr ring buffer has some state (e.g. ring->clock) and
976 	 * we want preserve it.
977 	 */
978 	ring_buffer_resize(tr->max_buffer.buffer, 1, RING_BUFFER_ALL_CPUS);
979 	set_buffer_entries(&tr->max_buffer, 1);
980 	tracing_reset_online_cpus(&tr->max_buffer);
981 	tr->allocated_snapshot = false;
982 }
983 
984 /**
985  * tracing_alloc_snapshot - allocate snapshot buffer.
986  *
987  * This only allocates the snapshot buffer if it isn't already
988  * allocated - it doesn't also take a snapshot.
989  *
990  * This is meant to be used in cases where the snapshot buffer needs
991  * to be set up for events that can't sleep but need to be able to
992  * trigger a snapshot.
993  */
994 int tracing_alloc_snapshot(void)
995 {
996 	struct trace_array *tr = &global_trace;
997 	int ret;
998 
999 	ret = alloc_snapshot(tr);
1000 	WARN_ON(ret < 0);
1001 
1002 	return ret;
1003 }
1004 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
1005 
1006 /**
1007  * trace_snapshot_alloc - allocate and take a snapshot of the current buffer.
1008  *
1009  * This is similar to trace_snapshot(), but it will allocate the
1010  * snapshot buffer if it isn't already allocated. Use this only
1011  * where it is safe to sleep, as the allocation may sleep.
1012  *
1013  * This causes a swap between the snapshot buffer and the current live
1014  * tracing buffer. You can use this to take snapshots of the live
1015  * trace when some condition is triggered, but continue to trace.
1016  */
1017 void tracing_snapshot_alloc(void)
1018 {
1019 	int ret;
1020 
1021 	ret = tracing_alloc_snapshot();
1022 	if (ret < 0)
1023 		return;
1024 
1025 	tracing_snapshot();
1026 }
1027 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
1028 #else
1029 void tracing_snapshot(void)
1030 {
1031 	WARN_ONCE(1, "Snapshot feature not enabled, but internal snapshot used");
1032 }
1033 EXPORT_SYMBOL_GPL(tracing_snapshot);
1034 int tracing_alloc_snapshot(void)
1035 {
1036 	WARN_ONCE(1, "Snapshot feature not enabled, but snapshot allocation used");
1037 	return -ENODEV;
1038 }
1039 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
1040 void tracing_snapshot_alloc(void)
1041 {
1042 	/* Give warning */
1043 	tracing_snapshot();
1044 }
1045 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
1046 #endif /* CONFIG_TRACER_SNAPSHOT */
1047 
1048 void tracer_tracing_off(struct trace_array *tr)
1049 {
1050 	if (tr->trace_buffer.buffer)
1051 		ring_buffer_record_off(tr->trace_buffer.buffer);
1052 	/*
1053 	 * This flag is looked at when buffers haven't been allocated
1054 	 * yet, or by some tracers (like irqsoff), that just want to
1055 	 * know if the ring buffer has been disabled, but it can handle
1056 	 * races of where it gets disabled but we still do a record.
1057 	 * As the check is in the fast path of the tracers, it is more
1058 	 * important to be fast than accurate.
1059 	 */
1060 	tr->buffer_disabled = 1;
1061 	/* Make the flag seen by readers */
1062 	smp_wmb();
1063 }
1064 
1065 /**
1066  * tracing_off - turn off tracing buffers
1067  *
1068  * This function stops the tracing buffers from recording data.
1069  * It does not disable any overhead the tracers themselves may
1070  * be causing. This function simply causes all recording to
1071  * the ring buffers to fail.
1072  */
1073 void tracing_off(void)
1074 {
1075 	tracer_tracing_off(&global_trace);
1076 }
1077 EXPORT_SYMBOL_GPL(tracing_off);
1078 
1079 void disable_trace_on_warning(void)
1080 {
1081 	if (__disable_trace_on_warning)
1082 		tracing_off();
1083 }
1084 
1085 /**
1086  * tracer_tracing_is_on - show real state of ring buffer enabled
1087  * @tr : the trace array to know if ring buffer is enabled
1088  *
1089  * Shows real state of the ring buffer if it is enabled or not.
1090  */
1091 int tracer_tracing_is_on(struct trace_array *tr)
1092 {
1093 	if (tr->trace_buffer.buffer)
1094 		return ring_buffer_record_is_on(tr->trace_buffer.buffer);
1095 	return !tr->buffer_disabled;
1096 }
1097 
1098 /**
1099  * tracing_is_on - show state of ring buffers enabled
1100  */
1101 int tracing_is_on(void)
1102 {
1103 	return tracer_tracing_is_on(&global_trace);
1104 }
1105 EXPORT_SYMBOL_GPL(tracing_is_on);
1106 
1107 static int __init set_buf_size(char *str)
1108 {
1109 	unsigned long buf_size;
1110 
1111 	if (!str)
1112 		return 0;
1113 	buf_size = memparse(str, &str);
1114 	/* nr_entries can not be zero */
1115 	if (buf_size == 0)
1116 		return 0;
1117 	trace_buf_size = buf_size;
1118 	return 1;
1119 }
1120 __setup("trace_buf_size=", set_buf_size);
1121 
1122 static int __init set_tracing_thresh(char *str)
1123 {
1124 	unsigned long threshold;
1125 	int ret;
1126 
1127 	if (!str)
1128 		return 0;
1129 	ret = kstrtoul(str, 0, &threshold);
1130 	if (ret < 0)
1131 		return 0;
1132 	tracing_thresh = threshold * 1000;
1133 	return 1;
1134 }
1135 __setup("tracing_thresh=", set_tracing_thresh);
1136 
1137 unsigned long nsecs_to_usecs(unsigned long nsecs)
1138 {
1139 	return nsecs / 1000;
1140 }
1141 
1142 /*
1143  * TRACE_FLAGS is defined as a tuple matching bit masks with strings.
1144  * It uses C(a, b) where 'a' is the eval (enum) name and 'b' is the string that
1145  * matches it. By defining "C(a, b) b", TRACE_FLAGS becomes a list
1146  * of strings in the order that the evals (enum) were defined.
1147  */
1148 #undef C
1149 #define C(a, b) b
1150 
1151 /* These must match the bit postions in trace_iterator_flags */
1152 static const char *trace_options[] = {
1153 	TRACE_FLAGS
1154 	NULL
1155 };
1156 
1157 static struct {
1158 	u64 (*func)(void);
1159 	const char *name;
1160 	int in_ns;		/* is this clock in nanoseconds? */
1161 } trace_clocks[] = {
1162 	{ trace_clock_local,		"local",	1 },
1163 	{ trace_clock_global,		"global",	1 },
1164 	{ trace_clock_counter,		"counter",	0 },
1165 	{ trace_clock_jiffies,		"uptime",	0 },
1166 	{ trace_clock,			"perf",		1 },
1167 	{ ktime_get_mono_fast_ns,	"mono",		1 },
1168 	{ ktime_get_raw_fast_ns,	"mono_raw",	1 },
1169 	{ ktime_get_boot_fast_ns,	"boot",		1 },
1170 	ARCH_TRACE_CLOCKS
1171 };
1172 
1173 /*
1174  * trace_parser_get_init - gets the buffer for trace parser
1175  */
1176 int trace_parser_get_init(struct trace_parser *parser, int size)
1177 {
1178 	memset(parser, 0, sizeof(*parser));
1179 
1180 	parser->buffer = kmalloc(size, GFP_KERNEL);
1181 	if (!parser->buffer)
1182 		return 1;
1183 
1184 	parser->size = size;
1185 	return 0;
1186 }
1187 
1188 /*
1189  * trace_parser_put - frees the buffer for trace parser
1190  */
1191 void trace_parser_put(struct trace_parser *parser)
1192 {
1193 	kfree(parser->buffer);
1194 	parser->buffer = NULL;
1195 }
1196 
1197 /*
1198  * trace_get_user - reads the user input string separated by  space
1199  * (matched by isspace(ch))
1200  *
1201  * For each string found the 'struct trace_parser' is updated,
1202  * and the function returns.
1203  *
1204  * Returns number of bytes read.
1205  *
1206  * See kernel/trace/trace.h for 'struct trace_parser' details.
1207  */
1208 int trace_get_user(struct trace_parser *parser, const char __user *ubuf,
1209 	size_t cnt, loff_t *ppos)
1210 {
1211 	char ch;
1212 	size_t read = 0;
1213 	ssize_t ret;
1214 
1215 	if (!*ppos)
1216 		trace_parser_clear(parser);
1217 
1218 	ret = get_user(ch, ubuf++);
1219 	if (ret)
1220 		goto out;
1221 
1222 	read++;
1223 	cnt--;
1224 
1225 	/*
1226 	 * The parser is not finished with the last write,
1227 	 * continue reading the user input without skipping spaces.
1228 	 */
1229 	if (!parser->cont) {
1230 		/* skip white space */
1231 		while (cnt && isspace(ch)) {
1232 			ret = get_user(ch, ubuf++);
1233 			if (ret)
1234 				goto out;
1235 			read++;
1236 			cnt--;
1237 		}
1238 
1239 		/* only spaces were written */
1240 		if (isspace(ch)) {
1241 			*ppos += read;
1242 			ret = read;
1243 			goto out;
1244 		}
1245 
1246 		parser->idx = 0;
1247 	}
1248 
1249 	/* read the non-space input */
1250 	while (cnt && !isspace(ch)) {
1251 		if (parser->idx < parser->size - 1)
1252 			parser->buffer[parser->idx++] = ch;
1253 		else {
1254 			ret = -EINVAL;
1255 			goto out;
1256 		}
1257 		ret = get_user(ch, ubuf++);
1258 		if (ret)
1259 			goto out;
1260 		read++;
1261 		cnt--;
1262 	}
1263 
1264 	/* We either got finished input or we have to wait for another call. */
1265 	if (isspace(ch)) {
1266 		parser->buffer[parser->idx] = 0;
1267 		parser->cont = false;
1268 	} else if (parser->idx < parser->size - 1) {
1269 		parser->cont = true;
1270 		parser->buffer[parser->idx++] = ch;
1271 	} else {
1272 		ret = -EINVAL;
1273 		goto out;
1274 	}
1275 
1276 	*ppos += read;
1277 	ret = read;
1278 
1279 out:
1280 	return ret;
1281 }
1282 
1283 /* TODO add a seq_buf_to_buffer() */
1284 static ssize_t trace_seq_to_buffer(struct trace_seq *s, void *buf, size_t cnt)
1285 {
1286 	int len;
1287 
1288 	if (trace_seq_used(s) <= s->seq.readpos)
1289 		return -EBUSY;
1290 
1291 	len = trace_seq_used(s) - s->seq.readpos;
1292 	if (cnt > len)
1293 		cnt = len;
1294 	memcpy(buf, s->buffer + s->seq.readpos, cnt);
1295 
1296 	s->seq.readpos += cnt;
1297 	return cnt;
1298 }
1299 
1300 unsigned long __read_mostly	tracing_thresh;
1301 
1302 #ifdef CONFIG_TRACER_MAX_TRACE
1303 /*
1304  * Copy the new maximum trace into the separate maximum-trace
1305  * structure. (this way the maximum trace is permanently saved,
1306  * for later retrieval via /sys/kernel/debug/tracing/latency_trace)
1307  */
1308 static void
1309 __update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
1310 {
1311 	struct trace_buffer *trace_buf = &tr->trace_buffer;
1312 	struct trace_buffer *max_buf = &tr->max_buffer;
1313 	struct trace_array_cpu *data = per_cpu_ptr(trace_buf->data, cpu);
1314 	struct trace_array_cpu *max_data = per_cpu_ptr(max_buf->data, cpu);
1315 
1316 	max_buf->cpu = cpu;
1317 	max_buf->time_start = data->preempt_timestamp;
1318 
1319 	max_data->saved_latency = tr->max_latency;
1320 	max_data->critical_start = data->critical_start;
1321 	max_data->critical_end = data->critical_end;
1322 
1323 	memcpy(max_data->comm, tsk->comm, TASK_COMM_LEN);
1324 	max_data->pid = tsk->pid;
1325 	/*
1326 	 * If tsk == current, then use current_uid(), as that does not use
1327 	 * RCU. The irq tracer can be called out of RCU scope.
1328 	 */
1329 	if (tsk == current)
1330 		max_data->uid = current_uid();
1331 	else
1332 		max_data->uid = task_uid(tsk);
1333 
1334 	max_data->nice = tsk->static_prio - 20 - MAX_RT_PRIO;
1335 	max_data->policy = tsk->policy;
1336 	max_data->rt_priority = tsk->rt_priority;
1337 
1338 	/* record this tasks comm */
1339 	tracing_record_cmdline(tsk);
1340 }
1341 
1342 /**
1343  * update_max_tr - snapshot all trace buffers from global_trace to max_tr
1344  * @tr: tracer
1345  * @tsk: the task with the latency
1346  * @cpu: The cpu that initiated the trace.
1347  *
1348  * Flip the buffers between the @tr and the max_tr and record information
1349  * about which task was the cause of this latency.
1350  */
1351 void
1352 update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
1353 {
1354 	struct ring_buffer *buf;
1355 
1356 	if (tr->stop_count)
1357 		return;
1358 
1359 	WARN_ON_ONCE(!irqs_disabled());
1360 
1361 	if (!tr->allocated_snapshot) {
1362 		/* Only the nop tracer should hit this when disabling */
1363 		WARN_ON_ONCE(tr->current_trace != &nop_trace);
1364 		return;
1365 	}
1366 
1367 	arch_spin_lock(&tr->max_lock);
1368 
1369 	buf = tr->trace_buffer.buffer;
1370 	tr->trace_buffer.buffer = tr->max_buffer.buffer;
1371 	tr->max_buffer.buffer = buf;
1372 
1373 	__update_max_tr(tr, tsk, cpu);
1374 	arch_spin_unlock(&tr->max_lock);
1375 }
1376 
1377 /**
1378  * update_max_tr_single - only copy one trace over, and reset the rest
1379  * @tr - tracer
1380  * @tsk - task with the latency
1381  * @cpu - the cpu of the buffer to copy.
1382  *
1383  * Flip the trace of a single CPU buffer between the @tr and the max_tr.
1384  */
1385 void
1386 update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu)
1387 {
1388 	int ret;
1389 
1390 	if (tr->stop_count)
1391 		return;
1392 
1393 	WARN_ON_ONCE(!irqs_disabled());
1394 	if (!tr->allocated_snapshot) {
1395 		/* Only the nop tracer should hit this when disabling */
1396 		WARN_ON_ONCE(tr->current_trace != &nop_trace);
1397 		return;
1398 	}
1399 
1400 	arch_spin_lock(&tr->max_lock);
1401 
1402 	ret = ring_buffer_swap_cpu(tr->max_buffer.buffer, tr->trace_buffer.buffer, cpu);
1403 
1404 	if (ret == -EBUSY) {
1405 		/*
1406 		 * We failed to swap the buffer due to a commit taking
1407 		 * place on this CPU. We fail to record, but we reset
1408 		 * the max trace buffer (no one writes directly to it)
1409 		 * and flag that it failed.
1410 		 */
1411 		trace_array_printk_buf(tr->max_buffer.buffer, _THIS_IP_,
1412 			"Failed to swap buffers due to commit in progress\n");
1413 	}
1414 
1415 	WARN_ON_ONCE(ret && ret != -EAGAIN && ret != -EBUSY);
1416 
1417 	__update_max_tr(tr, tsk, cpu);
1418 	arch_spin_unlock(&tr->max_lock);
1419 }
1420 #endif /* CONFIG_TRACER_MAX_TRACE */
1421 
1422 static int wait_on_pipe(struct trace_iterator *iter, bool full)
1423 {
1424 	/* Iterators are static, they should be filled or empty */
1425 	if (trace_buffer_iter(iter, iter->cpu_file))
1426 		return 0;
1427 
1428 	return ring_buffer_wait(iter->trace_buffer->buffer, iter->cpu_file,
1429 				full);
1430 }
1431 
1432 #ifdef CONFIG_FTRACE_STARTUP_TEST
1433 static bool selftests_can_run;
1434 
1435 struct trace_selftests {
1436 	struct list_head		list;
1437 	struct tracer			*type;
1438 };
1439 
1440 static LIST_HEAD(postponed_selftests);
1441 
1442 static int save_selftest(struct tracer *type)
1443 {
1444 	struct trace_selftests *selftest;
1445 
1446 	selftest = kmalloc(sizeof(*selftest), GFP_KERNEL);
1447 	if (!selftest)
1448 		return -ENOMEM;
1449 
1450 	selftest->type = type;
1451 	list_add(&selftest->list, &postponed_selftests);
1452 	return 0;
1453 }
1454 
1455 static int run_tracer_selftest(struct tracer *type)
1456 {
1457 	struct trace_array *tr = &global_trace;
1458 	struct tracer *saved_tracer = tr->current_trace;
1459 	int ret;
1460 
1461 	if (!type->selftest || tracing_selftest_disabled)
1462 		return 0;
1463 
1464 	/*
1465 	 * If a tracer registers early in boot up (before scheduling is
1466 	 * initialized and such), then do not run its selftests yet.
1467 	 * Instead, run it a little later in the boot process.
1468 	 */
1469 	if (!selftests_can_run)
1470 		return save_selftest(type);
1471 
1472 	/*
1473 	 * Run a selftest on this tracer.
1474 	 * Here we reset the trace buffer, and set the current
1475 	 * tracer to be this tracer. The tracer can then run some
1476 	 * internal tracing to verify that everything is in order.
1477 	 * If we fail, we do not register this tracer.
1478 	 */
1479 	tracing_reset_online_cpus(&tr->trace_buffer);
1480 
1481 	tr->current_trace = type;
1482 
1483 #ifdef CONFIG_TRACER_MAX_TRACE
1484 	if (type->use_max_tr) {
1485 		/* If we expanded the buffers, make sure the max is expanded too */
1486 		if (ring_buffer_expanded)
1487 			ring_buffer_resize(tr->max_buffer.buffer, trace_buf_size,
1488 					   RING_BUFFER_ALL_CPUS);
1489 		tr->allocated_snapshot = true;
1490 	}
1491 #endif
1492 
1493 	/* the test is responsible for initializing and enabling */
1494 	pr_info("Testing tracer %s: ", type->name);
1495 	ret = type->selftest(type, tr);
1496 	/* the test is responsible for resetting too */
1497 	tr->current_trace = saved_tracer;
1498 	if (ret) {
1499 		printk(KERN_CONT "FAILED!\n");
1500 		/* Add the warning after printing 'FAILED' */
1501 		WARN_ON(1);
1502 		return -1;
1503 	}
1504 	/* Only reset on passing, to avoid touching corrupted buffers */
1505 	tracing_reset_online_cpus(&tr->trace_buffer);
1506 
1507 #ifdef CONFIG_TRACER_MAX_TRACE
1508 	if (type->use_max_tr) {
1509 		tr->allocated_snapshot = false;
1510 
1511 		/* Shrink the max buffer again */
1512 		if (ring_buffer_expanded)
1513 			ring_buffer_resize(tr->max_buffer.buffer, 1,
1514 					   RING_BUFFER_ALL_CPUS);
1515 	}
1516 #endif
1517 
1518 	printk(KERN_CONT "PASSED\n");
1519 	return 0;
1520 }
1521 
1522 static __init int init_trace_selftests(void)
1523 {
1524 	struct trace_selftests *p, *n;
1525 	struct tracer *t, **last;
1526 	int ret;
1527 
1528 	selftests_can_run = true;
1529 
1530 	mutex_lock(&trace_types_lock);
1531 
1532 	if (list_empty(&postponed_selftests))
1533 		goto out;
1534 
1535 	pr_info("Running postponed tracer tests:\n");
1536 
1537 	list_for_each_entry_safe(p, n, &postponed_selftests, list) {
1538 		ret = run_tracer_selftest(p->type);
1539 		/* If the test fails, then warn and remove from available_tracers */
1540 		if (ret < 0) {
1541 			WARN(1, "tracer: %s failed selftest, disabling\n",
1542 			     p->type->name);
1543 			last = &trace_types;
1544 			for (t = trace_types; t; t = t->next) {
1545 				if (t == p->type) {
1546 					*last = t->next;
1547 					break;
1548 				}
1549 				last = &t->next;
1550 			}
1551 		}
1552 		list_del(&p->list);
1553 		kfree(p);
1554 	}
1555 
1556  out:
1557 	mutex_unlock(&trace_types_lock);
1558 
1559 	return 0;
1560 }
1561 core_initcall(init_trace_selftests);
1562 #else
1563 static inline int run_tracer_selftest(struct tracer *type)
1564 {
1565 	return 0;
1566 }
1567 #endif /* CONFIG_FTRACE_STARTUP_TEST */
1568 
1569 static void add_tracer_options(struct trace_array *tr, struct tracer *t);
1570 
1571 static void __init apply_trace_boot_options(void);
1572 
1573 /**
1574  * register_tracer - register a tracer with the ftrace system.
1575  * @type - the plugin for the tracer
1576  *
1577  * Register a new plugin tracer.
1578  */
1579 int __init register_tracer(struct tracer *type)
1580 {
1581 	struct tracer *t;
1582 	int ret = 0;
1583 
1584 	if (!type->name) {
1585 		pr_info("Tracer must have a name\n");
1586 		return -1;
1587 	}
1588 
1589 	if (strlen(type->name) >= MAX_TRACER_SIZE) {
1590 		pr_info("Tracer has a name longer than %d\n", MAX_TRACER_SIZE);
1591 		return -1;
1592 	}
1593 
1594 	mutex_lock(&trace_types_lock);
1595 
1596 	tracing_selftest_running = true;
1597 
1598 	for (t = trace_types; t; t = t->next) {
1599 		if (strcmp(type->name, t->name) == 0) {
1600 			/* already found */
1601 			pr_info("Tracer %s already registered\n",
1602 				type->name);
1603 			ret = -1;
1604 			goto out;
1605 		}
1606 	}
1607 
1608 	if (!type->set_flag)
1609 		type->set_flag = &dummy_set_flag;
1610 	if (!type->flags) {
1611 		/*allocate a dummy tracer_flags*/
1612 		type->flags = kmalloc(sizeof(*type->flags), GFP_KERNEL);
1613 		if (!type->flags) {
1614 			ret = -ENOMEM;
1615 			goto out;
1616 		}
1617 		type->flags->val = 0;
1618 		type->flags->opts = dummy_tracer_opt;
1619 	} else
1620 		if (!type->flags->opts)
1621 			type->flags->opts = dummy_tracer_opt;
1622 
1623 	/* store the tracer for __set_tracer_option */
1624 	type->flags->trace = type;
1625 
1626 	ret = run_tracer_selftest(type);
1627 	if (ret < 0)
1628 		goto out;
1629 
1630 	type->next = trace_types;
1631 	trace_types = type;
1632 	add_tracer_options(&global_trace, type);
1633 
1634  out:
1635 	tracing_selftest_running = false;
1636 	mutex_unlock(&trace_types_lock);
1637 
1638 	if (ret || !default_bootup_tracer)
1639 		goto out_unlock;
1640 
1641 	if (strncmp(default_bootup_tracer, type->name, MAX_TRACER_SIZE))
1642 		goto out_unlock;
1643 
1644 	printk(KERN_INFO "Starting tracer '%s'\n", type->name);
1645 	/* Do we want this tracer to start on bootup? */
1646 	tracing_set_tracer(&global_trace, type->name);
1647 	default_bootup_tracer = NULL;
1648 
1649 	apply_trace_boot_options();
1650 
1651 	/* disable other selftests, since this will break it. */
1652 	tracing_selftest_disabled = true;
1653 #ifdef CONFIG_FTRACE_STARTUP_TEST
1654 	printk(KERN_INFO "Disabling FTRACE selftests due to running tracer '%s'\n",
1655 	       type->name);
1656 #endif
1657 
1658  out_unlock:
1659 	return ret;
1660 }
1661 
1662 void tracing_reset(struct trace_buffer *buf, int cpu)
1663 {
1664 	struct ring_buffer *buffer = buf->buffer;
1665 
1666 	if (!buffer)
1667 		return;
1668 
1669 	ring_buffer_record_disable(buffer);
1670 
1671 	/* Make sure all commits have finished */
1672 	synchronize_sched();
1673 	ring_buffer_reset_cpu(buffer, cpu);
1674 
1675 	ring_buffer_record_enable(buffer);
1676 }
1677 
1678 void tracing_reset_online_cpus(struct trace_buffer *buf)
1679 {
1680 	struct ring_buffer *buffer = buf->buffer;
1681 	int cpu;
1682 
1683 	if (!buffer)
1684 		return;
1685 
1686 	ring_buffer_record_disable(buffer);
1687 
1688 	/* Make sure all commits have finished */
1689 	synchronize_sched();
1690 
1691 	buf->time_start = buffer_ftrace_now(buf, buf->cpu);
1692 
1693 	for_each_online_cpu(cpu)
1694 		ring_buffer_reset_cpu(buffer, cpu);
1695 
1696 	ring_buffer_record_enable(buffer);
1697 }
1698 
1699 /* Must have trace_types_lock held */
1700 void tracing_reset_all_online_cpus(void)
1701 {
1702 	struct trace_array *tr;
1703 
1704 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
1705 		tracing_reset_online_cpus(&tr->trace_buffer);
1706 #ifdef CONFIG_TRACER_MAX_TRACE
1707 		tracing_reset_online_cpus(&tr->max_buffer);
1708 #endif
1709 	}
1710 }
1711 
1712 static int *tgid_map;
1713 
1714 #define SAVED_CMDLINES_DEFAULT 128
1715 #define NO_CMDLINE_MAP UINT_MAX
1716 static arch_spinlock_t trace_cmdline_lock = __ARCH_SPIN_LOCK_UNLOCKED;
1717 struct saved_cmdlines_buffer {
1718 	unsigned map_pid_to_cmdline[PID_MAX_DEFAULT+1];
1719 	unsigned *map_cmdline_to_pid;
1720 	unsigned cmdline_num;
1721 	int cmdline_idx;
1722 	char *saved_cmdlines;
1723 };
1724 static struct saved_cmdlines_buffer *savedcmd;
1725 
1726 /* temporary disable recording */
1727 static atomic_t trace_record_taskinfo_disabled __read_mostly;
1728 
1729 static inline char *get_saved_cmdlines(int idx)
1730 {
1731 	return &savedcmd->saved_cmdlines[idx * TASK_COMM_LEN];
1732 }
1733 
1734 static inline void set_cmdline(int idx, const char *cmdline)
1735 {
1736 	memcpy(get_saved_cmdlines(idx), cmdline, TASK_COMM_LEN);
1737 }
1738 
1739 static int allocate_cmdlines_buffer(unsigned int val,
1740 				    struct saved_cmdlines_buffer *s)
1741 {
1742 	s->map_cmdline_to_pid = kmalloc(val * sizeof(*s->map_cmdline_to_pid),
1743 					GFP_KERNEL);
1744 	if (!s->map_cmdline_to_pid)
1745 		return -ENOMEM;
1746 
1747 	s->saved_cmdlines = kmalloc(val * TASK_COMM_LEN, GFP_KERNEL);
1748 	if (!s->saved_cmdlines) {
1749 		kfree(s->map_cmdline_to_pid);
1750 		return -ENOMEM;
1751 	}
1752 
1753 	s->cmdline_idx = 0;
1754 	s->cmdline_num = val;
1755 	memset(&s->map_pid_to_cmdline, NO_CMDLINE_MAP,
1756 	       sizeof(s->map_pid_to_cmdline));
1757 	memset(s->map_cmdline_to_pid, NO_CMDLINE_MAP,
1758 	       val * sizeof(*s->map_cmdline_to_pid));
1759 
1760 	return 0;
1761 }
1762 
1763 static int trace_create_savedcmd(void)
1764 {
1765 	int ret;
1766 
1767 	savedcmd = kmalloc(sizeof(*savedcmd), GFP_KERNEL);
1768 	if (!savedcmd)
1769 		return -ENOMEM;
1770 
1771 	ret = allocate_cmdlines_buffer(SAVED_CMDLINES_DEFAULT, savedcmd);
1772 	if (ret < 0) {
1773 		kfree(savedcmd);
1774 		savedcmd = NULL;
1775 		return -ENOMEM;
1776 	}
1777 
1778 	return 0;
1779 }
1780 
1781 int is_tracing_stopped(void)
1782 {
1783 	return global_trace.stop_count;
1784 }
1785 
1786 /**
1787  * tracing_start - quick start of the tracer
1788  *
1789  * If tracing is enabled but was stopped by tracing_stop,
1790  * this will start the tracer back up.
1791  */
1792 void tracing_start(void)
1793 {
1794 	struct ring_buffer *buffer;
1795 	unsigned long flags;
1796 
1797 	if (tracing_disabled)
1798 		return;
1799 
1800 	raw_spin_lock_irqsave(&global_trace.start_lock, flags);
1801 	if (--global_trace.stop_count) {
1802 		if (global_trace.stop_count < 0) {
1803 			/* Someone screwed up their debugging */
1804 			WARN_ON_ONCE(1);
1805 			global_trace.stop_count = 0;
1806 		}
1807 		goto out;
1808 	}
1809 
1810 	/* Prevent the buffers from switching */
1811 	arch_spin_lock(&global_trace.max_lock);
1812 
1813 	buffer = global_trace.trace_buffer.buffer;
1814 	if (buffer)
1815 		ring_buffer_record_enable(buffer);
1816 
1817 #ifdef CONFIG_TRACER_MAX_TRACE
1818 	buffer = global_trace.max_buffer.buffer;
1819 	if (buffer)
1820 		ring_buffer_record_enable(buffer);
1821 #endif
1822 
1823 	arch_spin_unlock(&global_trace.max_lock);
1824 
1825  out:
1826 	raw_spin_unlock_irqrestore(&global_trace.start_lock, flags);
1827 }
1828 
1829 static void tracing_start_tr(struct trace_array *tr)
1830 {
1831 	struct ring_buffer *buffer;
1832 	unsigned long flags;
1833 
1834 	if (tracing_disabled)
1835 		return;
1836 
1837 	/* If global, we need to also start the max tracer */
1838 	if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
1839 		return tracing_start();
1840 
1841 	raw_spin_lock_irqsave(&tr->start_lock, flags);
1842 
1843 	if (--tr->stop_count) {
1844 		if (tr->stop_count < 0) {
1845 			/* Someone screwed up their debugging */
1846 			WARN_ON_ONCE(1);
1847 			tr->stop_count = 0;
1848 		}
1849 		goto out;
1850 	}
1851 
1852 	buffer = tr->trace_buffer.buffer;
1853 	if (buffer)
1854 		ring_buffer_record_enable(buffer);
1855 
1856  out:
1857 	raw_spin_unlock_irqrestore(&tr->start_lock, flags);
1858 }
1859 
1860 /**
1861  * tracing_stop - quick stop of the tracer
1862  *
1863  * Light weight way to stop tracing. Use in conjunction with
1864  * tracing_start.
1865  */
1866 void tracing_stop(void)
1867 {
1868 	struct ring_buffer *buffer;
1869 	unsigned long flags;
1870 
1871 	raw_spin_lock_irqsave(&global_trace.start_lock, flags);
1872 	if (global_trace.stop_count++)
1873 		goto out;
1874 
1875 	/* Prevent the buffers from switching */
1876 	arch_spin_lock(&global_trace.max_lock);
1877 
1878 	buffer = global_trace.trace_buffer.buffer;
1879 	if (buffer)
1880 		ring_buffer_record_disable(buffer);
1881 
1882 #ifdef CONFIG_TRACER_MAX_TRACE
1883 	buffer = global_trace.max_buffer.buffer;
1884 	if (buffer)
1885 		ring_buffer_record_disable(buffer);
1886 #endif
1887 
1888 	arch_spin_unlock(&global_trace.max_lock);
1889 
1890  out:
1891 	raw_spin_unlock_irqrestore(&global_trace.start_lock, flags);
1892 }
1893 
1894 static void tracing_stop_tr(struct trace_array *tr)
1895 {
1896 	struct ring_buffer *buffer;
1897 	unsigned long flags;
1898 
1899 	/* If global, we need to also stop the max tracer */
1900 	if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
1901 		return tracing_stop();
1902 
1903 	raw_spin_lock_irqsave(&tr->start_lock, flags);
1904 	if (tr->stop_count++)
1905 		goto out;
1906 
1907 	buffer = tr->trace_buffer.buffer;
1908 	if (buffer)
1909 		ring_buffer_record_disable(buffer);
1910 
1911  out:
1912 	raw_spin_unlock_irqrestore(&tr->start_lock, flags);
1913 }
1914 
1915 static int trace_save_cmdline(struct task_struct *tsk)
1916 {
1917 	unsigned pid, idx;
1918 
1919 	if (!tsk->pid || unlikely(tsk->pid > PID_MAX_DEFAULT))
1920 		return 0;
1921 
1922 	/*
1923 	 * It's not the end of the world if we don't get
1924 	 * the lock, but we also don't want to spin
1925 	 * nor do we want to disable interrupts,
1926 	 * so if we miss here, then better luck next time.
1927 	 */
1928 	if (!arch_spin_trylock(&trace_cmdline_lock))
1929 		return 0;
1930 
1931 	idx = savedcmd->map_pid_to_cmdline[tsk->pid];
1932 	if (idx == NO_CMDLINE_MAP) {
1933 		idx = (savedcmd->cmdline_idx + 1) % savedcmd->cmdline_num;
1934 
1935 		/*
1936 		 * Check whether the cmdline buffer at idx has a pid
1937 		 * mapped. We are going to overwrite that entry so we
1938 		 * need to clear the map_pid_to_cmdline. Otherwise we
1939 		 * would read the new comm for the old pid.
1940 		 */
1941 		pid = savedcmd->map_cmdline_to_pid[idx];
1942 		if (pid != NO_CMDLINE_MAP)
1943 			savedcmd->map_pid_to_cmdline[pid] = NO_CMDLINE_MAP;
1944 
1945 		savedcmd->map_cmdline_to_pid[idx] = tsk->pid;
1946 		savedcmd->map_pid_to_cmdline[tsk->pid] = idx;
1947 
1948 		savedcmd->cmdline_idx = idx;
1949 	}
1950 
1951 	set_cmdline(idx, tsk->comm);
1952 
1953 	arch_spin_unlock(&trace_cmdline_lock);
1954 
1955 	return 1;
1956 }
1957 
1958 static void __trace_find_cmdline(int pid, char comm[])
1959 {
1960 	unsigned map;
1961 
1962 	if (!pid) {
1963 		strcpy(comm, "<idle>");
1964 		return;
1965 	}
1966 
1967 	if (WARN_ON_ONCE(pid < 0)) {
1968 		strcpy(comm, "<XXX>");
1969 		return;
1970 	}
1971 
1972 	if (pid > PID_MAX_DEFAULT) {
1973 		strcpy(comm, "<...>");
1974 		return;
1975 	}
1976 
1977 	map = savedcmd->map_pid_to_cmdline[pid];
1978 	if (map != NO_CMDLINE_MAP)
1979 		strlcpy(comm, get_saved_cmdlines(map), TASK_COMM_LEN);
1980 	else
1981 		strcpy(comm, "<...>");
1982 }
1983 
1984 void trace_find_cmdline(int pid, char comm[])
1985 {
1986 	preempt_disable();
1987 	arch_spin_lock(&trace_cmdline_lock);
1988 
1989 	__trace_find_cmdline(pid, comm);
1990 
1991 	arch_spin_unlock(&trace_cmdline_lock);
1992 	preempt_enable();
1993 }
1994 
1995 int trace_find_tgid(int pid)
1996 {
1997 	if (unlikely(!tgid_map || !pid || pid > PID_MAX_DEFAULT))
1998 		return 0;
1999 
2000 	return tgid_map[pid];
2001 }
2002 
2003 static int trace_save_tgid(struct task_struct *tsk)
2004 {
2005 	if (unlikely(!tgid_map || !tsk->pid || tsk->pid > PID_MAX_DEFAULT))
2006 		return 0;
2007 
2008 	tgid_map[tsk->pid] = tsk->tgid;
2009 	return 1;
2010 }
2011 
2012 static bool tracing_record_taskinfo_skip(int flags)
2013 {
2014 	if (unlikely(!(flags & (TRACE_RECORD_CMDLINE | TRACE_RECORD_TGID))))
2015 		return true;
2016 	if (atomic_read(&trace_record_taskinfo_disabled) || !tracing_is_on())
2017 		return true;
2018 	if (!__this_cpu_read(trace_taskinfo_save))
2019 		return true;
2020 	return false;
2021 }
2022 
2023 /**
2024  * tracing_record_taskinfo - record the task info of a task
2025  *
2026  * @task  - task to record
2027  * @flags - TRACE_RECORD_CMDLINE for recording comm
2028  *        - TRACE_RECORD_TGID for recording tgid
2029  */
2030 void tracing_record_taskinfo(struct task_struct *task, int flags)
2031 {
2032 	if (tracing_record_taskinfo_skip(flags))
2033 		return;
2034 	if ((flags & TRACE_RECORD_CMDLINE) && !trace_save_cmdline(task))
2035 		return;
2036 	if ((flags & TRACE_RECORD_TGID) && !trace_save_tgid(task))
2037 		return;
2038 
2039 	__this_cpu_write(trace_taskinfo_save, false);
2040 }
2041 
2042 /**
2043  * tracing_record_taskinfo_sched_switch - record task info for sched_switch
2044  *
2045  * @prev - previous task during sched_switch
2046  * @next - next task during sched_switch
2047  * @flags - TRACE_RECORD_CMDLINE for recording comm
2048  *          TRACE_RECORD_TGID for recording tgid
2049  */
2050 void tracing_record_taskinfo_sched_switch(struct task_struct *prev,
2051 					  struct task_struct *next, int flags)
2052 {
2053 	if (tracing_record_taskinfo_skip(flags))
2054 		return;
2055 
2056 	if ((flags & TRACE_RECORD_CMDLINE) &&
2057 	    (!trace_save_cmdline(prev) || !trace_save_cmdline(next)))
2058 		return;
2059 
2060 	if ((flags & TRACE_RECORD_TGID) &&
2061 	    (!trace_save_tgid(prev) || !trace_save_tgid(next)))
2062 		return;
2063 
2064 	__this_cpu_write(trace_taskinfo_save, false);
2065 }
2066 
2067 /* Helpers to record a specific task information */
2068 void tracing_record_cmdline(struct task_struct *task)
2069 {
2070 	tracing_record_taskinfo(task, TRACE_RECORD_CMDLINE);
2071 }
2072 
2073 void tracing_record_tgid(struct task_struct *task)
2074 {
2075 	tracing_record_taskinfo(task, TRACE_RECORD_TGID);
2076 }
2077 
2078 /*
2079  * Several functions return TRACE_TYPE_PARTIAL_LINE if the trace_seq
2080  * overflowed, and TRACE_TYPE_HANDLED otherwise. This helper function
2081  * simplifies those functions and keeps them in sync.
2082  */
2083 enum print_line_t trace_handle_return(struct trace_seq *s)
2084 {
2085 	return trace_seq_has_overflowed(s) ?
2086 		TRACE_TYPE_PARTIAL_LINE : TRACE_TYPE_HANDLED;
2087 }
2088 EXPORT_SYMBOL_GPL(trace_handle_return);
2089 
2090 void
2091 tracing_generic_entry_update(struct trace_entry *entry, unsigned long flags,
2092 			     int pc)
2093 {
2094 	struct task_struct *tsk = current;
2095 
2096 	entry->preempt_count		= pc & 0xff;
2097 	entry->pid			= (tsk) ? tsk->pid : 0;
2098 	entry->flags =
2099 #ifdef CONFIG_TRACE_IRQFLAGS_SUPPORT
2100 		(irqs_disabled_flags(flags) ? TRACE_FLAG_IRQS_OFF : 0) |
2101 #else
2102 		TRACE_FLAG_IRQS_NOSUPPORT |
2103 #endif
2104 		((pc & NMI_MASK    ) ? TRACE_FLAG_NMI     : 0) |
2105 		((pc & HARDIRQ_MASK) ? TRACE_FLAG_HARDIRQ : 0) |
2106 		((pc & SOFTIRQ_OFFSET) ? TRACE_FLAG_SOFTIRQ : 0) |
2107 		(tif_need_resched() ? TRACE_FLAG_NEED_RESCHED : 0) |
2108 		(test_preempt_need_resched() ? TRACE_FLAG_PREEMPT_RESCHED : 0);
2109 }
2110 EXPORT_SYMBOL_GPL(tracing_generic_entry_update);
2111 
2112 struct ring_buffer_event *
2113 trace_buffer_lock_reserve(struct ring_buffer *buffer,
2114 			  int type,
2115 			  unsigned long len,
2116 			  unsigned long flags, int pc)
2117 {
2118 	return __trace_buffer_lock_reserve(buffer, type, len, flags, pc);
2119 }
2120 
2121 DEFINE_PER_CPU(struct ring_buffer_event *, trace_buffered_event);
2122 DEFINE_PER_CPU(int, trace_buffered_event_cnt);
2123 static int trace_buffered_event_ref;
2124 
2125 /**
2126  * trace_buffered_event_enable - enable buffering events
2127  *
2128  * When events are being filtered, it is quicker to use a temporary
2129  * buffer to write the event data into if there's a likely chance
2130  * that it will not be committed. The discard of the ring buffer
2131  * is not as fast as committing, and is much slower than copying
2132  * a commit.
2133  *
2134  * When an event is to be filtered, allocate per cpu buffers to
2135  * write the event data into, and if the event is filtered and discarded
2136  * it is simply dropped, otherwise, the entire data is to be committed
2137  * in one shot.
2138  */
2139 void trace_buffered_event_enable(void)
2140 {
2141 	struct ring_buffer_event *event;
2142 	struct page *page;
2143 	int cpu;
2144 
2145 	WARN_ON_ONCE(!mutex_is_locked(&event_mutex));
2146 
2147 	if (trace_buffered_event_ref++)
2148 		return;
2149 
2150 	for_each_tracing_cpu(cpu) {
2151 		page = alloc_pages_node(cpu_to_node(cpu),
2152 					GFP_KERNEL | __GFP_NORETRY, 0);
2153 		if (!page)
2154 			goto failed;
2155 
2156 		event = page_address(page);
2157 		memset(event, 0, sizeof(*event));
2158 
2159 		per_cpu(trace_buffered_event, cpu) = event;
2160 
2161 		preempt_disable();
2162 		if (cpu == smp_processor_id() &&
2163 		    this_cpu_read(trace_buffered_event) !=
2164 		    per_cpu(trace_buffered_event, cpu))
2165 			WARN_ON_ONCE(1);
2166 		preempt_enable();
2167 	}
2168 
2169 	return;
2170  failed:
2171 	trace_buffered_event_disable();
2172 }
2173 
2174 static void enable_trace_buffered_event(void *data)
2175 {
2176 	/* Probably not needed, but do it anyway */
2177 	smp_rmb();
2178 	this_cpu_dec(trace_buffered_event_cnt);
2179 }
2180 
2181 static void disable_trace_buffered_event(void *data)
2182 {
2183 	this_cpu_inc(trace_buffered_event_cnt);
2184 }
2185 
2186 /**
2187  * trace_buffered_event_disable - disable buffering events
2188  *
2189  * When a filter is removed, it is faster to not use the buffered
2190  * events, and to commit directly into the ring buffer. Free up
2191  * the temp buffers when there are no more users. This requires
2192  * special synchronization with current events.
2193  */
2194 void trace_buffered_event_disable(void)
2195 {
2196 	int cpu;
2197 
2198 	WARN_ON_ONCE(!mutex_is_locked(&event_mutex));
2199 
2200 	if (WARN_ON_ONCE(!trace_buffered_event_ref))
2201 		return;
2202 
2203 	if (--trace_buffered_event_ref)
2204 		return;
2205 
2206 	preempt_disable();
2207 	/* For each CPU, set the buffer as used. */
2208 	smp_call_function_many(tracing_buffer_mask,
2209 			       disable_trace_buffered_event, NULL, 1);
2210 	preempt_enable();
2211 
2212 	/* Wait for all current users to finish */
2213 	synchronize_sched();
2214 
2215 	for_each_tracing_cpu(cpu) {
2216 		free_page((unsigned long)per_cpu(trace_buffered_event, cpu));
2217 		per_cpu(trace_buffered_event, cpu) = NULL;
2218 	}
2219 	/*
2220 	 * Make sure trace_buffered_event is NULL before clearing
2221 	 * trace_buffered_event_cnt.
2222 	 */
2223 	smp_wmb();
2224 
2225 	preempt_disable();
2226 	/* Do the work on each cpu */
2227 	smp_call_function_many(tracing_buffer_mask,
2228 			       enable_trace_buffered_event, NULL, 1);
2229 	preempt_enable();
2230 }
2231 
2232 static struct ring_buffer *temp_buffer;
2233 
2234 struct ring_buffer_event *
2235 trace_event_buffer_lock_reserve(struct ring_buffer **current_rb,
2236 			  struct trace_event_file *trace_file,
2237 			  int type, unsigned long len,
2238 			  unsigned long flags, int pc)
2239 {
2240 	struct ring_buffer_event *entry;
2241 	int val;
2242 
2243 	*current_rb = trace_file->tr->trace_buffer.buffer;
2244 
2245 	if ((trace_file->flags &
2246 	     (EVENT_FILE_FL_SOFT_DISABLED | EVENT_FILE_FL_FILTERED)) &&
2247 	    (entry = this_cpu_read(trace_buffered_event))) {
2248 		/* Try to use the per cpu buffer first */
2249 		val = this_cpu_inc_return(trace_buffered_event_cnt);
2250 		if (val == 1) {
2251 			trace_event_setup(entry, type, flags, pc);
2252 			entry->array[0] = len;
2253 			return entry;
2254 		}
2255 		this_cpu_dec(trace_buffered_event_cnt);
2256 	}
2257 
2258 	entry = __trace_buffer_lock_reserve(*current_rb,
2259 					    type, len, flags, pc);
2260 	/*
2261 	 * If tracing is off, but we have triggers enabled
2262 	 * we still need to look at the event data. Use the temp_buffer
2263 	 * to store the trace event for the tigger to use. It's recusive
2264 	 * safe and will not be recorded anywhere.
2265 	 */
2266 	if (!entry && trace_file->flags & EVENT_FILE_FL_TRIGGER_COND) {
2267 		*current_rb = temp_buffer;
2268 		entry = __trace_buffer_lock_reserve(*current_rb,
2269 						    type, len, flags, pc);
2270 	}
2271 	return entry;
2272 }
2273 EXPORT_SYMBOL_GPL(trace_event_buffer_lock_reserve);
2274 
2275 static DEFINE_SPINLOCK(tracepoint_iter_lock);
2276 static DEFINE_MUTEX(tracepoint_printk_mutex);
2277 
2278 static void output_printk(struct trace_event_buffer *fbuffer)
2279 {
2280 	struct trace_event_call *event_call;
2281 	struct trace_event *event;
2282 	unsigned long flags;
2283 	struct trace_iterator *iter = tracepoint_print_iter;
2284 
2285 	/* We should never get here if iter is NULL */
2286 	if (WARN_ON_ONCE(!iter))
2287 		return;
2288 
2289 	event_call = fbuffer->trace_file->event_call;
2290 	if (!event_call || !event_call->event.funcs ||
2291 	    !event_call->event.funcs->trace)
2292 		return;
2293 
2294 	event = &fbuffer->trace_file->event_call->event;
2295 
2296 	spin_lock_irqsave(&tracepoint_iter_lock, flags);
2297 	trace_seq_init(&iter->seq);
2298 	iter->ent = fbuffer->entry;
2299 	event_call->event.funcs->trace(iter, 0, event);
2300 	trace_seq_putc(&iter->seq, 0);
2301 	printk("%s", iter->seq.buffer);
2302 
2303 	spin_unlock_irqrestore(&tracepoint_iter_lock, flags);
2304 }
2305 
2306 int tracepoint_printk_sysctl(struct ctl_table *table, int write,
2307 			     void __user *buffer, size_t *lenp,
2308 			     loff_t *ppos)
2309 {
2310 	int save_tracepoint_printk;
2311 	int ret;
2312 
2313 	mutex_lock(&tracepoint_printk_mutex);
2314 	save_tracepoint_printk = tracepoint_printk;
2315 
2316 	ret = proc_dointvec(table, write, buffer, lenp, ppos);
2317 
2318 	/*
2319 	 * This will force exiting early, as tracepoint_printk
2320 	 * is always zero when tracepoint_printk_iter is not allocated
2321 	 */
2322 	if (!tracepoint_print_iter)
2323 		tracepoint_printk = 0;
2324 
2325 	if (save_tracepoint_printk == tracepoint_printk)
2326 		goto out;
2327 
2328 	if (tracepoint_printk)
2329 		static_key_enable(&tracepoint_printk_key.key);
2330 	else
2331 		static_key_disable(&tracepoint_printk_key.key);
2332 
2333  out:
2334 	mutex_unlock(&tracepoint_printk_mutex);
2335 
2336 	return ret;
2337 }
2338 
2339 void trace_event_buffer_commit(struct trace_event_buffer *fbuffer)
2340 {
2341 	if (static_key_false(&tracepoint_printk_key.key))
2342 		output_printk(fbuffer);
2343 
2344 	event_trigger_unlock_commit(fbuffer->trace_file, fbuffer->buffer,
2345 				    fbuffer->event, fbuffer->entry,
2346 				    fbuffer->flags, fbuffer->pc);
2347 }
2348 EXPORT_SYMBOL_GPL(trace_event_buffer_commit);
2349 
2350 void trace_buffer_unlock_commit_regs(struct trace_array *tr,
2351 				     struct ring_buffer *buffer,
2352 				     struct ring_buffer_event *event,
2353 				     unsigned long flags, int pc,
2354 				     struct pt_regs *regs)
2355 {
2356 	__buffer_unlock_commit(buffer, event);
2357 
2358 	/*
2359 	 * If regs is not set, then skip the following callers:
2360 	 *   trace_buffer_unlock_commit_regs
2361 	 *   event_trigger_unlock_commit
2362 	 *   trace_event_buffer_commit
2363 	 *   trace_event_raw_event_sched_switch
2364 	 * Note, we can still get here via blktrace, wakeup tracer
2365 	 * and mmiotrace, but that's ok if they lose a function or
2366 	 * two. They are that meaningful.
2367 	 */
2368 	ftrace_trace_stack(tr, buffer, flags, regs ? 0 : 4, pc, regs);
2369 	ftrace_trace_userstack(buffer, flags, pc);
2370 }
2371 
2372 /*
2373  * Similar to trace_buffer_unlock_commit_regs() but do not dump stack.
2374  */
2375 void
2376 trace_buffer_unlock_commit_nostack(struct ring_buffer *buffer,
2377 				   struct ring_buffer_event *event)
2378 {
2379 	__buffer_unlock_commit(buffer, event);
2380 }
2381 
2382 static void
2383 trace_process_export(struct trace_export *export,
2384 	       struct ring_buffer_event *event)
2385 {
2386 	struct trace_entry *entry;
2387 	unsigned int size = 0;
2388 
2389 	entry = ring_buffer_event_data(event);
2390 	size = ring_buffer_event_length(event);
2391 	export->write(entry, size);
2392 }
2393 
2394 static DEFINE_MUTEX(ftrace_export_lock);
2395 
2396 static struct trace_export __rcu *ftrace_exports_list __read_mostly;
2397 
2398 static DEFINE_STATIC_KEY_FALSE(ftrace_exports_enabled);
2399 
2400 static inline void ftrace_exports_enable(void)
2401 {
2402 	static_branch_enable(&ftrace_exports_enabled);
2403 }
2404 
2405 static inline void ftrace_exports_disable(void)
2406 {
2407 	static_branch_disable(&ftrace_exports_enabled);
2408 }
2409 
2410 void ftrace_exports(struct ring_buffer_event *event)
2411 {
2412 	struct trace_export *export;
2413 
2414 	preempt_disable_notrace();
2415 
2416 	export = rcu_dereference_raw_notrace(ftrace_exports_list);
2417 	while (export) {
2418 		trace_process_export(export, event);
2419 		export = rcu_dereference_raw_notrace(export->next);
2420 	}
2421 
2422 	preempt_enable_notrace();
2423 }
2424 
2425 static inline void
2426 add_trace_export(struct trace_export **list, struct trace_export *export)
2427 {
2428 	rcu_assign_pointer(export->next, *list);
2429 	/*
2430 	 * We are entering export into the list but another
2431 	 * CPU might be walking that list. We need to make sure
2432 	 * the export->next pointer is valid before another CPU sees
2433 	 * the export pointer included into the list.
2434 	 */
2435 	rcu_assign_pointer(*list, export);
2436 }
2437 
2438 static inline int
2439 rm_trace_export(struct trace_export **list, struct trace_export *export)
2440 {
2441 	struct trace_export **p;
2442 
2443 	for (p = list; *p != NULL; p = &(*p)->next)
2444 		if (*p == export)
2445 			break;
2446 
2447 	if (*p != export)
2448 		return -1;
2449 
2450 	rcu_assign_pointer(*p, (*p)->next);
2451 
2452 	return 0;
2453 }
2454 
2455 static inline void
2456 add_ftrace_export(struct trace_export **list, struct trace_export *export)
2457 {
2458 	if (*list == NULL)
2459 		ftrace_exports_enable();
2460 
2461 	add_trace_export(list, export);
2462 }
2463 
2464 static inline int
2465 rm_ftrace_export(struct trace_export **list, struct trace_export *export)
2466 {
2467 	int ret;
2468 
2469 	ret = rm_trace_export(list, export);
2470 	if (*list == NULL)
2471 		ftrace_exports_disable();
2472 
2473 	return ret;
2474 }
2475 
2476 int register_ftrace_export(struct trace_export *export)
2477 {
2478 	if (WARN_ON_ONCE(!export->write))
2479 		return -1;
2480 
2481 	mutex_lock(&ftrace_export_lock);
2482 
2483 	add_ftrace_export(&ftrace_exports_list, export);
2484 
2485 	mutex_unlock(&ftrace_export_lock);
2486 
2487 	return 0;
2488 }
2489 EXPORT_SYMBOL_GPL(register_ftrace_export);
2490 
2491 int unregister_ftrace_export(struct trace_export *export)
2492 {
2493 	int ret;
2494 
2495 	mutex_lock(&ftrace_export_lock);
2496 
2497 	ret = rm_ftrace_export(&ftrace_exports_list, export);
2498 
2499 	mutex_unlock(&ftrace_export_lock);
2500 
2501 	return ret;
2502 }
2503 EXPORT_SYMBOL_GPL(unregister_ftrace_export);
2504 
2505 void
2506 trace_function(struct trace_array *tr,
2507 	       unsigned long ip, unsigned long parent_ip, unsigned long flags,
2508 	       int pc)
2509 {
2510 	struct trace_event_call *call = &event_function;
2511 	struct ring_buffer *buffer = tr->trace_buffer.buffer;
2512 	struct ring_buffer_event *event;
2513 	struct ftrace_entry *entry;
2514 
2515 	event = __trace_buffer_lock_reserve(buffer, TRACE_FN, sizeof(*entry),
2516 					    flags, pc);
2517 	if (!event)
2518 		return;
2519 	entry	= ring_buffer_event_data(event);
2520 	entry->ip			= ip;
2521 	entry->parent_ip		= parent_ip;
2522 
2523 	if (!call_filter_check_discard(call, entry, buffer, event)) {
2524 		if (static_branch_unlikely(&ftrace_exports_enabled))
2525 			ftrace_exports(event);
2526 		__buffer_unlock_commit(buffer, event);
2527 	}
2528 }
2529 
2530 #ifdef CONFIG_STACKTRACE
2531 
2532 #define FTRACE_STACK_MAX_ENTRIES (PAGE_SIZE / sizeof(unsigned long))
2533 struct ftrace_stack {
2534 	unsigned long		calls[FTRACE_STACK_MAX_ENTRIES];
2535 };
2536 
2537 static DEFINE_PER_CPU(struct ftrace_stack, ftrace_stack);
2538 static DEFINE_PER_CPU(int, ftrace_stack_reserve);
2539 
2540 static void __ftrace_trace_stack(struct ring_buffer *buffer,
2541 				 unsigned long flags,
2542 				 int skip, int pc, struct pt_regs *regs)
2543 {
2544 	struct trace_event_call *call = &event_kernel_stack;
2545 	struct ring_buffer_event *event;
2546 	struct stack_entry *entry;
2547 	struct stack_trace trace;
2548 	int use_stack;
2549 	int size = FTRACE_STACK_ENTRIES;
2550 
2551 	trace.nr_entries	= 0;
2552 	trace.skip		= skip;
2553 
2554 	/*
2555 	 * Add two, for this function and the call to save_stack_trace()
2556 	 * If regs is set, then these functions will not be in the way.
2557 	 */
2558 	if (!regs)
2559 		trace.skip += 2;
2560 
2561 	/*
2562 	 * Since events can happen in NMIs there's no safe way to
2563 	 * use the per cpu ftrace_stacks. We reserve it and if an interrupt
2564 	 * or NMI comes in, it will just have to use the default
2565 	 * FTRACE_STACK_SIZE.
2566 	 */
2567 	preempt_disable_notrace();
2568 
2569 	use_stack = __this_cpu_inc_return(ftrace_stack_reserve);
2570 	/*
2571 	 * We don't need any atomic variables, just a barrier.
2572 	 * If an interrupt comes in, we don't care, because it would
2573 	 * have exited and put the counter back to what we want.
2574 	 * We just need a barrier to keep gcc from moving things
2575 	 * around.
2576 	 */
2577 	barrier();
2578 	if (use_stack == 1) {
2579 		trace.entries		= this_cpu_ptr(ftrace_stack.calls);
2580 		trace.max_entries	= FTRACE_STACK_MAX_ENTRIES;
2581 
2582 		if (regs)
2583 			save_stack_trace_regs(regs, &trace);
2584 		else
2585 			save_stack_trace(&trace);
2586 
2587 		if (trace.nr_entries > size)
2588 			size = trace.nr_entries;
2589 	} else
2590 		/* From now on, use_stack is a boolean */
2591 		use_stack = 0;
2592 
2593 	size *= sizeof(unsigned long);
2594 
2595 	event = __trace_buffer_lock_reserve(buffer, TRACE_STACK,
2596 					    sizeof(*entry) + size, flags, pc);
2597 	if (!event)
2598 		goto out;
2599 	entry = ring_buffer_event_data(event);
2600 
2601 	memset(&entry->caller, 0, size);
2602 
2603 	if (use_stack)
2604 		memcpy(&entry->caller, trace.entries,
2605 		       trace.nr_entries * sizeof(unsigned long));
2606 	else {
2607 		trace.max_entries	= FTRACE_STACK_ENTRIES;
2608 		trace.entries		= entry->caller;
2609 		if (regs)
2610 			save_stack_trace_regs(regs, &trace);
2611 		else
2612 			save_stack_trace(&trace);
2613 	}
2614 
2615 	entry->size = trace.nr_entries;
2616 
2617 	if (!call_filter_check_discard(call, entry, buffer, event))
2618 		__buffer_unlock_commit(buffer, event);
2619 
2620  out:
2621 	/* Again, don't let gcc optimize things here */
2622 	barrier();
2623 	__this_cpu_dec(ftrace_stack_reserve);
2624 	preempt_enable_notrace();
2625 
2626 }
2627 
2628 static inline void ftrace_trace_stack(struct trace_array *tr,
2629 				      struct ring_buffer *buffer,
2630 				      unsigned long flags,
2631 				      int skip, int pc, struct pt_regs *regs)
2632 {
2633 	if (!(tr->trace_flags & TRACE_ITER_STACKTRACE))
2634 		return;
2635 
2636 	__ftrace_trace_stack(buffer, flags, skip, pc, regs);
2637 }
2638 
2639 void __trace_stack(struct trace_array *tr, unsigned long flags, int skip,
2640 		   int pc)
2641 {
2642 	struct ring_buffer *buffer = tr->trace_buffer.buffer;
2643 
2644 	if (rcu_is_watching()) {
2645 		__ftrace_trace_stack(buffer, flags, skip, pc, NULL);
2646 		return;
2647 	}
2648 
2649 	/*
2650 	 * When an NMI triggers, RCU is enabled via rcu_nmi_enter(),
2651 	 * but if the above rcu_is_watching() failed, then the NMI
2652 	 * triggered someplace critical, and rcu_irq_enter() should
2653 	 * not be called from NMI.
2654 	 */
2655 	if (unlikely(in_nmi()))
2656 		return;
2657 
2658 	/*
2659 	 * It is possible that a function is being traced in a
2660 	 * location that RCU is not watching. A call to
2661 	 * rcu_irq_enter() will make sure that it is, but there's
2662 	 * a few internal rcu functions that could be traced
2663 	 * where that wont work either. In those cases, we just
2664 	 * do nothing.
2665 	 */
2666 	if (unlikely(rcu_irq_enter_disabled()))
2667 		return;
2668 
2669 	rcu_irq_enter_irqson();
2670 	__ftrace_trace_stack(buffer, flags, skip, pc, NULL);
2671 	rcu_irq_exit_irqson();
2672 }
2673 
2674 /**
2675  * trace_dump_stack - record a stack back trace in the trace buffer
2676  * @skip: Number of functions to skip (helper handlers)
2677  */
2678 void trace_dump_stack(int skip)
2679 {
2680 	unsigned long flags;
2681 
2682 	if (tracing_disabled || tracing_selftest_running)
2683 		return;
2684 
2685 	local_save_flags(flags);
2686 
2687 	/*
2688 	 * Skip 3 more, seems to get us at the caller of
2689 	 * this function.
2690 	 */
2691 	skip += 3;
2692 	__ftrace_trace_stack(global_trace.trace_buffer.buffer,
2693 			     flags, skip, preempt_count(), NULL);
2694 }
2695 
2696 static DEFINE_PER_CPU(int, user_stack_count);
2697 
2698 void
2699 ftrace_trace_userstack(struct ring_buffer *buffer, unsigned long flags, int pc)
2700 {
2701 	struct trace_event_call *call = &event_user_stack;
2702 	struct ring_buffer_event *event;
2703 	struct userstack_entry *entry;
2704 	struct stack_trace trace;
2705 
2706 	if (!(global_trace.trace_flags & TRACE_ITER_USERSTACKTRACE))
2707 		return;
2708 
2709 	/*
2710 	 * NMIs can not handle page faults, even with fix ups.
2711 	 * The save user stack can (and often does) fault.
2712 	 */
2713 	if (unlikely(in_nmi()))
2714 		return;
2715 
2716 	/*
2717 	 * prevent recursion, since the user stack tracing may
2718 	 * trigger other kernel events.
2719 	 */
2720 	preempt_disable();
2721 	if (__this_cpu_read(user_stack_count))
2722 		goto out;
2723 
2724 	__this_cpu_inc(user_stack_count);
2725 
2726 	event = __trace_buffer_lock_reserve(buffer, TRACE_USER_STACK,
2727 					    sizeof(*entry), flags, pc);
2728 	if (!event)
2729 		goto out_drop_count;
2730 	entry	= ring_buffer_event_data(event);
2731 
2732 	entry->tgid		= current->tgid;
2733 	memset(&entry->caller, 0, sizeof(entry->caller));
2734 
2735 	trace.nr_entries	= 0;
2736 	trace.max_entries	= FTRACE_STACK_ENTRIES;
2737 	trace.skip		= 0;
2738 	trace.entries		= entry->caller;
2739 
2740 	save_stack_trace_user(&trace);
2741 	if (!call_filter_check_discard(call, entry, buffer, event))
2742 		__buffer_unlock_commit(buffer, event);
2743 
2744  out_drop_count:
2745 	__this_cpu_dec(user_stack_count);
2746  out:
2747 	preempt_enable();
2748 }
2749 
2750 #ifdef UNUSED
2751 static void __trace_userstack(struct trace_array *tr, unsigned long flags)
2752 {
2753 	ftrace_trace_userstack(tr, flags, preempt_count());
2754 }
2755 #endif /* UNUSED */
2756 
2757 #endif /* CONFIG_STACKTRACE */
2758 
2759 /* created for use with alloc_percpu */
2760 struct trace_buffer_struct {
2761 	int nesting;
2762 	char buffer[4][TRACE_BUF_SIZE];
2763 };
2764 
2765 static struct trace_buffer_struct *trace_percpu_buffer;
2766 
2767 /*
2768  * Thise allows for lockless recording.  If we're nested too deeply, then
2769  * this returns NULL.
2770  */
2771 static char *get_trace_buf(void)
2772 {
2773 	struct trace_buffer_struct *buffer = this_cpu_ptr(trace_percpu_buffer);
2774 
2775 	if (!buffer || buffer->nesting >= 4)
2776 		return NULL;
2777 
2778 	return &buffer->buffer[buffer->nesting++][0];
2779 }
2780 
2781 static void put_trace_buf(void)
2782 {
2783 	this_cpu_dec(trace_percpu_buffer->nesting);
2784 }
2785 
2786 static int alloc_percpu_trace_buffer(void)
2787 {
2788 	struct trace_buffer_struct *buffers;
2789 
2790 	buffers = alloc_percpu(struct trace_buffer_struct);
2791 	if (WARN(!buffers, "Could not allocate percpu trace_printk buffer"))
2792 		return -ENOMEM;
2793 
2794 	trace_percpu_buffer = buffers;
2795 	return 0;
2796 }
2797 
2798 static int buffers_allocated;
2799 
2800 void trace_printk_init_buffers(void)
2801 {
2802 	if (buffers_allocated)
2803 		return;
2804 
2805 	if (alloc_percpu_trace_buffer())
2806 		return;
2807 
2808 	/* trace_printk() is for debug use only. Don't use it in production. */
2809 
2810 	pr_warn("\n");
2811 	pr_warn("**********************************************************\n");
2812 	pr_warn("**   NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE   **\n");
2813 	pr_warn("**                                                      **\n");
2814 	pr_warn("** trace_printk() being used. Allocating extra memory.  **\n");
2815 	pr_warn("**                                                      **\n");
2816 	pr_warn("** This means that this is a DEBUG kernel and it is     **\n");
2817 	pr_warn("** unsafe for production use.                           **\n");
2818 	pr_warn("**                                                      **\n");
2819 	pr_warn("** If you see this message and you are not debugging    **\n");
2820 	pr_warn("** the kernel, report this immediately to your vendor!  **\n");
2821 	pr_warn("**                                                      **\n");
2822 	pr_warn("**   NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE   **\n");
2823 	pr_warn("**********************************************************\n");
2824 
2825 	/* Expand the buffers to set size */
2826 	tracing_update_buffers();
2827 
2828 	buffers_allocated = 1;
2829 
2830 	/*
2831 	 * trace_printk_init_buffers() can be called by modules.
2832 	 * If that happens, then we need to start cmdline recording
2833 	 * directly here. If the global_trace.buffer is already
2834 	 * allocated here, then this was called by module code.
2835 	 */
2836 	if (global_trace.trace_buffer.buffer)
2837 		tracing_start_cmdline_record();
2838 }
2839 
2840 void trace_printk_start_comm(void)
2841 {
2842 	/* Start tracing comms if trace printk is set */
2843 	if (!buffers_allocated)
2844 		return;
2845 	tracing_start_cmdline_record();
2846 }
2847 
2848 static void trace_printk_start_stop_comm(int enabled)
2849 {
2850 	if (!buffers_allocated)
2851 		return;
2852 
2853 	if (enabled)
2854 		tracing_start_cmdline_record();
2855 	else
2856 		tracing_stop_cmdline_record();
2857 }
2858 
2859 /**
2860  * trace_vbprintk - write binary msg to tracing buffer
2861  *
2862  */
2863 int trace_vbprintk(unsigned long ip, const char *fmt, va_list args)
2864 {
2865 	struct trace_event_call *call = &event_bprint;
2866 	struct ring_buffer_event *event;
2867 	struct ring_buffer *buffer;
2868 	struct trace_array *tr = &global_trace;
2869 	struct bprint_entry *entry;
2870 	unsigned long flags;
2871 	char *tbuffer;
2872 	int len = 0, size, pc;
2873 
2874 	if (unlikely(tracing_selftest_running || tracing_disabled))
2875 		return 0;
2876 
2877 	/* Don't pollute graph traces with trace_vprintk internals */
2878 	pause_graph_tracing();
2879 
2880 	pc = preempt_count();
2881 	preempt_disable_notrace();
2882 
2883 	tbuffer = get_trace_buf();
2884 	if (!tbuffer) {
2885 		len = 0;
2886 		goto out_nobuffer;
2887 	}
2888 
2889 	len = vbin_printf((u32 *)tbuffer, TRACE_BUF_SIZE/sizeof(int), fmt, args);
2890 
2891 	if (len > TRACE_BUF_SIZE/sizeof(int) || len < 0)
2892 		goto out;
2893 
2894 	local_save_flags(flags);
2895 	size = sizeof(*entry) + sizeof(u32) * len;
2896 	buffer = tr->trace_buffer.buffer;
2897 	event = __trace_buffer_lock_reserve(buffer, TRACE_BPRINT, size,
2898 					    flags, pc);
2899 	if (!event)
2900 		goto out;
2901 	entry = ring_buffer_event_data(event);
2902 	entry->ip			= ip;
2903 	entry->fmt			= fmt;
2904 
2905 	memcpy(entry->buf, tbuffer, sizeof(u32) * len);
2906 	if (!call_filter_check_discard(call, entry, buffer, event)) {
2907 		__buffer_unlock_commit(buffer, event);
2908 		ftrace_trace_stack(tr, buffer, flags, 6, pc, NULL);
2909 	}
2910 
2911 out:
2912 	put_trace_buf();
2913 
2914 out_nobuffer:
2915 	preempt_enable_notrace();
2916 	unpause_graph_tracing();
2917 
2918 	return len;
2919 }
2920 EXPORT_SYMBOL_GPL(trace_vbprintk);
2921 
2922 static int
2923 __trace_array_vprintk(struct ring_buffer *buffer,
2924 		      unsigned long ip, const char *fmt, va_list args)
2925 {
2926 	struct trace_event_call *call = &event_print;
2927 	struct ring_buffer_event *event;
2928 	int len = 0, size, pc;
2929 	struct print_entry *entry;
2930 	unsigned long flags;
2931 	char *tbuffer;
2932 
2933 	if (tracing_disabled || tracing_selftest_running)
2934 		return 0;
2935 
2936 	/* Don't pollute graph traces with trace_vprintk internals */
2937 	pause_graph_tracing();
2938 
2939 	pc = preempt_count();
2940 	preempt_disable_notrace();
2941 
2942 
2943 	tbuffer = get_trace_buf();
2944 	if (!tbuffer) {
2945 		len = 0;
2946 		goto out_nobuffer;
2947 	}
2948 
2949 	len = vscnprintf(tbuffer, TRACE_BUF_SIZE, fmt, args);
2950 
2951 	local_save_flags(flags);
2952 	size = sizeof(*entry) + len + 1;
2953 	event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
2954 					    flags, pc);
2955 	if (!event)
2956 		goto out;
2957 	entry = ring_buffer_event_data(event);
2958 	entry->ip = ip;
2959 
2960 	memcpy(&entry->buf, tbuffer, len + 1);
2961 	if (!call_filter_check_discard(call, entry, buffer, event)) {
2962 		__buffer_unlock_commit(buffer, event);
2963 		ftrace_trace_stack(&global_trace, buffer, flags, 6, pc, NULL);
2964 	}
2965 
2966 out:
2967 	put_trace_buf();
2968 
2969 out_nobuffer:
2970 	preempt_enable_notrace();
2971 	unpause_graph_tracing();
2972 
2973 	return len;
2974 }
2975 
2976 int trace_array_vprintk(struct trace_array *tr,
2977 			unsigned long ip, const char *fmt, va_list args)
2978 {
2979 	return __trace_array_vprintk(tr->trace_buffer.buffer, ip, fmt, args);
2980 }
2981 
2982 int trace_array_printk(struct trace_array *tr,
2983 		       unsigned long ip, const char *fmt, ...)
2984 {
2985 	int ret;
2986 	va_list ap;
2987 
2988 	if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
2989 		return 0;
2990 
2991 	va_start(ap, fmt);
2992 	ret = trace_array_vprintk(tr, ip, fmt, ap);
2993 	va_end(ap);
2994 	return ret;
2995 }
2996 
2997 int trace_array_printk_buf(struct ring_buffer *buffer,
2998 			   unsigned long ip, const char *fmt, ...)
2999 {
3000 	int ret;
3001 	va_list ap;
3002 
3003 	if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
3004 		return 0;
3005 
3006 	va_start(ap, fmt);
3007 	ret = __trace_array_vprintk(buffer, ip, fmt, ap);
3008 	va_end(ap);
3009 	return ret;
3010 }
3011 
3012 int trace_vprintk(unsigned long ip, const char *fmt, va_list args)
3013 {
3014 	return trace_array_vprintk(&global_trace, ip, fmt, args);
3015 }
3016 EXPORT_SYMBOL_GPL(trace_vprintk);
3017 
3018 static void trace_iterator_increment(struct trace_iterator *iter)
3019 {
3020 	struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, iter->cpu);
3021 
3022 	iter->idx++;
3023 	if (buf_iter)
3024 		ring_buffer_read(buf_iter, NULL);
3025 }
3026 
3027 static struct trace_entry *
3028 peek_next_entry(struct trace_iterator *iter, int cpu, u64 *ts,
3029 		unsigned long *lost_events)
3030 {
3031 	struct ring_buffer_event *event;
3032 	struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, cpu);
3033 
3034 	if (buf_iter)
3035 		event = ring_buffer_iter_peek(buf_iter, ts);
3036 	else
3037 		event = ring_buffer_peek(iter->trace_buffer->buffer, cpu, ts,
3038 					 lost_events);
3039 
3040 	if (event) {
3041 		iter->ent_size = ring_buffer_event_length(event);
3042 		return ring_buffer_event_data(event);
3043 	}
3044 	iter->ent_size = 0;
3045 	return NULL;
3046 }
3047 
3048 static struct trace_entry *
3049 __find_next_entry(struct trace_iterator *iter, int *ent_cpu,
3050 		  unsigned long *missing_events, u64 *ent_ts)
3051 {
3052 	struct ring_buffer *buffer = iter->trace_buffer->buffer;
3053 	struct trace_entry *ent, *next = NULL;
3054 	unsigned long lost_events = 0, next_lost = 0;
3055 	int cpu_file = iter->cpu_file;
3056 	u64 next_ts = 0, ts;
3057 	int next_cpu = -1;
3058 	int next_size = 0;
3059 	int cpu;
3060 
3061 	/*
3062 	 * If we are in a per_cpu trace file, don't bother by iterating over
3063 	 * all cpu and peek directly.
3064 	 */
3065 	if (cpu_file > RING_BUFFER_ALL_CPUS) {
3066 		if (ring_buffer_empty_cpu(buffer, cpu_file))
3067 			return NULL;
3068 		ent = peek_next_entry(iter, cpu_file, ent_ts, missing_events);
3069 		if (ent_cpu)
3070 			*ent_cpu = cpu_file;
3071 
3072 		return ent;
3073 	}
3074 
3075 	for_each_tracing_cpu(cpu) {
3076 
3077 		if (ring_buffer_empty_cpu(buffer, cpu))
3078 			continue;
3079 
3080 		ent = peek_next_entry(iter, cpu, &ts, &lost_events);
3081 
3082 		/*
3083 		 * Pick the entry with the smallest timestamp:
3084 		 */
3085 		if (ent && (!next || ts < next_ts)) {
3086 			next = ent;
3087 			next_cpu = cpu;
3088 			next_ts = ts;
3089 			next_lost = lost_events;
3090 			next_size = iter->ent_size;
3091 		}
3092 	}
3093 
3094 	iter->ent_size = next_size;
3095 
3096 	if (ent_cpu)
3097 		*ent_cpu = next_cpu;
3098 
3099 	if (ent_ts)
3100 		*ent_ts = next_ts;
3101 
3102 	if (missing_events)
3103 		*missing_events = next_lost;
3104 
3105 	return next;
3106 }
3107 
3108 /* Find the next real entry, without updating the iterator itself */
3109 struct trace_entry *trace_find_next_entry(struct trace_iterator *iter,
3110 					  int *ent_cpu, u64 *ent_ts)
3111 {
3112 	return __find_next_entry(iter, ent_cpu, NULL, ent_ts);
3113 }
3114 
3115 /* Find the next real entry, and increment the iterator to the next entry */
3116 void *trace_find_next_entry_inc(struct trace_iterator *iter)
3117 {
3118 	iter->ent = __find_next_entry(iter, &iter->cpu,
3119 				      &iter->lost_events, &iter->ts);
3120 
3121 	if (iter->ent)
3122 		trace_iterator_increment(iter);
3123 
3124 	return iter->ent ? iter : NULL;
3125 }
3126 
3127 static void trace_consume(struct trace_iterator *iter)
3128 {
3129 	ring_buffer_consume(iter->trace_buffer->buffer, iter->cpu, &iter->ts,
3130 			    &iter->lost_events);
3131 }
3132 
3133 static void *s_next(struct seq_file *m, void *v, loff_t *pos)
3134 {
3135 	struct trace_iterator *iter = m->private;
3136 	int i = (int)*pos;
3137 	void *ent;
3138 
3139 	WARN_ON_ONCE(iter->leftover);
3140 
3141 	(*pos)++;
3142 
3143 	/* can't go backwards */
3144 	if (iter->idx > i)
3145 		return NULL;
3146 
3147 	if (iter->idx < 0)
3148 		ent = trace_find_next_entry_inc(iter);
3149 	else
3150 		ent = iter;
3151 
3152 	while (ent && iter->idx < i)
3153 		ent = trace_find_next_entry_inc(iter);
3154 
3155 	iter->pos = *pos;
3156 
3157 	return ent;
3158 }
3159 
3160 void tracing_iter_reset(struct trace_iterator *iter, int cpu)
3161 {
3162 	struct ring_buffer_event *event;
3163 	struct ring_buffer_iter *buf_iter;
3164 	unsigned long entries = 0;
3165 	u64 ts;
3166 
3167 	per_cpu_ptr(iter->trace_buffer->data, cpu)->skipped_entries = 0;
3168 
3169 	buf_iter = trace_buffer_iter(iter, cpu);
3170 	if (!buf_iter)
3171 		return;
3172 
3173 	ring_buffer_iter_reset(buf_iter);
3174 
3175 	/*
3176 	 * We could have the case with the max latency tracers
3177 	 * that a reset never took place on a cpu. This is evident
3178 	 * by the timestamp being before the start of the buffer.
3179 	 */
3180 	while ((event = ring_buffer_iter_peek(buf_iter, &ts))) {
3181 		if (ts >= iter->trace_buffer->time_start)
3182 			break;
3183 		entries++;
3184 		ring_buffer_read(buf_iter, NULL);
3185 	}
3186 
3187 	per_cpu_ptr(iter->trace_buffer->data, cpu)->skipped_entries = entries;
3188 }
3189 
3190 /*
3191  * The current tracer is copied to avoid a global locking
3192  * all around.
3193  */
3194 static void *s_start(struct seq_file *m, loff_t *pos)
3195 {
3196 	struct trace_iterator *iter = m->private;
3197 	struct trace_array *tr = iter->tr;
3198 	int cpu_file = iter->cpu_file;
3199 	void *p = NULL;
3200 	loff_t l = 0;
3201 	int cpu;
3202 
3203 	/*
3204 	 * copy the tracer to avoid using a global lock all around.
3205 	 * iter->trace is a copy of current_trace, the pointer to the
3206 	 * name may be used instead of a strcmp(), as iter->trace->name
3207 	 * will point to the same string as current_trace->name.
3208 	 */
3209 	mutex_lock(&trace_types_lock);
3210 	if (unlikely(tr->current_trace && iter->trace->name != tr->current_trace->name))
3211 		*iter->trace = *tr->current_trace;
3212 	mutex_unlock(&trace_types_lock);
3213 
3214 #ifdef CONFIG_TRACER_MAX_TRACE
3215 	if (iter->snapshot && iter->trace->use_max_tr)
3216 		return ERR_PTR(-EBUSY);
3217 #endif
3218 
3219 	if (!iter->snapshot)
3220 		atomic_inc(&trace_record_taskinfo_disabled);
3221 
3222 	if (*pos != iter->pos) {
3223 		iter->ent = NULL;
3224 		iter->cpu = 0;
3225 		iter->idx = -1;
3226 
3227 		if (cpu_file == RING_BUFFER_ALL_CPUS) {
3228 			for_each_tracing_cpu(cpu)
3229 				tracing_iter_reset(iter, cpu);
3230 		} else
3231 			tracing_iter_reset(iter, cpu_file);
3232 
3233 		iter->leftover = 0;
3234 		for (p = iter; p && l < *pos; p = s_next(m, p, &l))
3235 			;
3236 
3237 	} else {
3238 		/*
3239 		 * If we overflowed the seq_file before, then we want
3240 		 * to just reuse the trace_seq buffer again.
3241 		 */
3242 		if (iter->leftover)
3243 			p = iter;
3244 		else {
3245 			l = *pos - 1;
3246 			p = s_next(m, p, &l);
3247 		}
3248 	}
3249 
3250 	trace_event_read_lock();
3251 	trace_access_lock(cpu_file);
3252 	return p;
3253 }
3254 
3255 static void s_stop(struct seq_file *m, void *p)
3256 {
3257 	struct trace_iterator *iter = m->private;
3258 
3259 #ifdef CONFIG_TRACER_MAX_TRACE
3260 	if (iter->snapshot && iter->trace->use_max_tr)
3261 		return;
3262 #endif
3263 
3264 	if (!iter->snapshot)
3265 		atomic_dec(&trace_record_taskinfo_disabled);
3266 
3267 	trace_access_unlock(iter->cpu_file);
3268 	trace_event_read_unlock();
3269 }
3270 
3271 static void
3272 get_total_entries(struct trace_buffer *buf,
3273 		  unsigned long *total, unsigned long *entries)
3274 {
3275 	unsigned long count;
3276 	int cpu;
3277 
3278 	*total = 0;
3279 	*entries = 0;
3280 
3281 	for_each_tracing_cpu(cpu) {
3282 		count = ring_buffer_entries_cpu(buf->buffer, cpu);
3283 		/*
3284 		 * If this buffer has skipped entries, then we hold all
3285 		 * entries for the trace and we need to ignore the
3286 		 * ones before the time stamp.
3287 		 */
3288 		if (per_cpu_ptr(buf->data, cpu)->skipped_entries) {
3289 			count -= per_cpu_ptr(buf->data, cpu)->skipped_entries;
3290 			/* total is the same as the entries */
3291 			*total += count;
3292 		} else
3293 			*total += count +
3294 				ring_buffer_overrun_cpu(buf->buffer, cpu);
3295 		*entries += count;
3296 	}
3297 }
3298 
3299 static void print_lat_help_header(struct seq_file *m)
3300 {
3301 	seq_puts(m, "#                  _------=> CPU#            \n"
3302 		    "#                 / _-----=> irqs-off        \n"
3303 		    "#                | / _----=> need-resched    \n"
3304 		    "#                || / _---=> hardirq/softirq \n"
3305 		    "#                ||| / _--=> preempt-depth   \n"
3306 		    "#                |||| /     delay            \n"
3307 		    "#  cmd     pid   ||||| time  |   caller      \n"
3308 		    "#     \\   /      |||||  \\    |   /         \n");
3309 }
3310 
3311 static void print_event_info(struct trace_buffer *buf, struct seq_file *m)
3312 {
3313 	unsigned long total;
3314 	unsigned long entries;
3315 
3316 	get_total_entries(buf, &total, &entries);
3317 	seq_printf(m, "# entries-in-buffer/entries-written: %lu/%lu   #P:%d\n",
3318 		   entries, total, num_online_cpus());
3319 	seq_puts(m, "#\n");
3320 }
3321 
3322 static void print_func_help_header(struct trace_buffer *buf, struct seq_file *m,
3323 				   unsigned int flags)
3324 {
3325 	bool tgid = flags & TRACE_ITER_RECORD_TGID;
3326 
3327 	print_event_info(buf, m);
3328 
3329 	seq_printf(m, "#           TASK-PID   CPU#   %s  TIMESTAMP  FUNCTION\n", tgid ? "TGID     " : "");
3330 	seq_printf(m, "#              | |       |    %s     |         |\n",	 tgid ? "  |      " : "");
3331 }
3332 
3333 static void print_func_help_header_irq(struct trace_buffer *buf, struct seq_file *m,
3334 				       unsigned int flags)
3335 {
3336 	bool tgid = flags & TRACE_ITER_RECORD_TGID;
3337 
3338 	seq_printf(m, "#                          %s  _-----=> irqs-off\n",	    tgid ? "          " : "");
3339 	seq_printf(m, "#                          %s / _----=> need-resched\n",	    tgid ? "          " : "");
3340 	seq_printf(m, "#                          %s| / _---=> hardirq/softirq\n",  tgid ? "          " : "");
3341 	seq_printf(m, "#                          %s|| / _--=> preempt-depth\n",    tgid ? "          " : "");
3342 	seq_printf(m, "#                          %s||| /     delay\n",		    tgid ? "          " : "");
3343 	seq_printf(m, "#           TASK-PID   CPU#%s||||    TIMESTAMP  FUNCTION\n", tgid ? "   TGID   " : "");
3344 	seq_printf(m, "#              | |       | %s||||       |         |\n",	    tgid ? "     |    " : "");
3345 }
3346 
3347 void
3348 print_trace_header(struct seq_file *m, struct trace_iterator *iter)
3349 {
3350 	unsigned long sym_flags = (global_trace.trace_flags & TRACE_ITER_SYM_MASK);
3351 	struct trace_buffer *buf = iter->trace_buffer;
3352 	struct trace_array_cpu *data = per_cpu_ptr(buf->data, buf->cpu);
3353 	struct tracer *type = iter->trace;
3354 	unsigned long entries;
3355 	unsigned long total;
3356 	const char *name = "preemption";
3357 
3358 	name = type->name;
3359 
3360 	get_total_entries(buf, &total, &entries);
3361 
3362 	seq_printf(m, "# %s latency trace v1.1.5 on %s\n",
3363 		   name, UTS_RELEASE);
3364 	seq_puts(m, "# -----------------------------------"
3365 		 "---------------------------------\n");
3366 	seq_printf(m, "# latency: %lu us, #%lu/%lu, CPU#%d |"
3367 		   " (M:%s VP:%d, KP:%d, SP:%d HP:%d",
3368 		   nsecs_to_usecs(data->saved_latency),
3369 		   entries,
3370 		   total,
3371 		   buf->cpu,
3372 #if defined(CONFIG_PREEMPT_NONE)
3373 		   "server",
3374 #elif defined(CONFIG_PREEMPT_VOLUNTARY)
3375 		   "desktop",
3376 #elif defined(CONFIG_PREEMPT)
3377 		   "preempt",
3378 #else
3379 		   "unknown",
3380 #endif
3381 		   /* These are reserved for later use */
3382 		   0, 0, 0, 0);
3383 #ifdef CONFIG_SMP
3384 	seq_printf(m, " #P:%d)\n", num_online_cpus());
3385 #else
3386 	seq_puts(m, ")\n");
3387 #endif
3388 	seq_puts(m, "#    -----------------\n");
3389 	seq_printf(m, "#    | task: %.16s-%d "
3390 		   "(uid:%d nice:%ld policy:%ld rt_prio:%ld)\n",
3391 		   data->comm, data->pid,
3392 		   from_kuid_munged(seq_user_ns(m), data->uid), data->nice,
3393 		   data->policy, data->rt_priority);
3394 	seq_puts(m, "#    -----------------\n");
3395 
3396 	if (data->critical_start) {
3397 		seq_puts(m, "#  => started at: ");
3398 		seq_print_ip_sym(&iter->seq, data->critical_start, sym_flags);
3399 		trace_print_seq(m, &iter->seq);
3400 		seq_puts(m, "\n#  => ended at:   ");
3401 		seq_print_ip_sym(&iter->seq, data->critical_end, sym_flags);
3402 		trace_print_seq(m, &iter->seq);
3403 		seq_puts(m, "\n#\n");
3404 	}
3405 
3406 	seq_puts(m, "#\n");
3407 }
3408 
3409 static void test_cpu_buff_start(struct trace_iterator *iter)
3410 {
3411 	struct trace_seq *s = &iter->seq;
3412 	struct trace_array *tr = iter->tr;
3413 
3414 	if (!(tr->trace_flags & TRACE_ITER_ANNOTATE))
3415 		return;
3416 
3417 	if (!(iter->iter_flags & TRACE_FILE_ANNOTATE))
3418 		return;
3419 
3420 	if (cpumask_available(iter->started) &&
3421 	    cpumask_test_cpu(iter->cpu, iter->started))
3422 		return;
3423 
3424 	if (per_cpu_ptr(iter->trace_buffer->data, iter->cpu)->skipped_entries)
3425 		return;
3426 
3427 	if (cpumask_available(iter->started))
3428 		cpumask_set_cpu(iter->cpu, iter->started);
3429 
3430 	/* Don't print started cpu buffer for the first entry of the trace */
3431 	if (iter->idx > 1)
3432 		trace_seq_printf(s, "##### CPU %u buffer started ####\n",
3433 				iter->cpu);
3434 }
3435 
3436 static enum print_line_t print_trace_fmt(struct trace_iterator *iter)
3437 {
3438 	struct trace_array *tr = iter->tr;
3439 	struct trace_seq *s = &iter->seq;
3440 	unsigned long sym_flags = (tr->trace_flags & TRACE_ITER_SYM_MASK);
3441 	struct trace_entry *entry;
3442 	struct trace_event *event;
3443 
3444 	entry = iter->ent;
3445 
3446 	test_cpu_buff_start(iter);
3447 
3448 	event = ftrace_find_event(entry->type);
3449 
3450 	if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
3451 		if (iter->iter_flags & TRACE_FILE_LAT_FMT)
3452 			trace_print_lat_context(iter);
3453 		else
3454 			trace_print_context(iter);
3455 	}
3456 
3457 	if (trace_seq_has_overflowed(s))
3458 		return TRACE_TYPE_PARTIAL_LINE;
3459 
3460 	if (event)
3461 		return event->funcs->trace(iter, sym_flags, event);
3462 
3463 	trace_seq_printf(s, "Unknown type %d\n", entry->type);
3464 
3465 	return trace_handle_return(s);
3466 }
3467 
3468 static enum print_line_t print_raw_fmt(struct trace_iterator *iter)
3469 {
3470 	struct trace_array *tr = iter->tr;
3471 	struct trace_seq *s = &iter->seq;
3472 	struct trace_entry *entry;
3473 	struct trace_event *event;
3474 
3475 	entry = iter->ent;
3476 
3477 	if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO)
3478 		trace_seq_printf(s, "%d %d %llu ",
3479 				 entry->pid, iter->cpu, iter->ts);
3480 
3481 	if (trace_seq_has_overflowed(s))
3482 		return TRACE_TYPE_PARTIAL_LINE;
3483 
3484 	event = ftrace_find_event(entry->type);
3485 	if (event)
3486 		return event->funcs->raw(iter, 0, event);
3487 
3488 	trace_seq_printf(s, "%d ?\n", entry->type);
3489 
3490 	return trace_handle_return(s);
3491 }
3492 
3493 static enum print_line_t print_hex_fmt(struct trace_iterator *iter)
3494 {
3495 	struct trace_array *tr = iter->tr;
3496 	struct trace_seq *s = &iter->seq;
3497 	unsigned char newline = '\n';
3498 	struct trace_entry *entry;
3499 	struct trace_event *event;
3500 
3501 	entry = iter->ent;
3502 
3503 	if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
3504 		SEQ_PUT_HEX_FIELD(s, entry->pid);
3505 		SEQ_PUT_HEX_FIELD(s, iter->cpu);
3506 		SEQ_PUT_HEX_FIELD(s, iter->ts);
3507 		if (trace_seq_has_overflowed(s))
3508 			return TRACE_TYPE_PARTIAL_LINE;
3509 	}
3510 
3511 	event = ftrace_find_event(entry->type);
3512 	if (event) {
3513 		enum print_line_t ret = event->funcs->hex(iter, 0, event);
3514 		if (ret != TRACE_TYPE_HANDLED)
3515 			return ret;
3516 	}
3517 
3518 	SEQ_PUT_FIELD(s, newline);
3519 
3520 	return trace_handle_return(s);
3521 }
3522 
3523 static enum print_line_t print_bin_fmt(struct trace_iterator *iter)
3524 {
3525 	struct trace_array *tr = iter->tr;
3526 	struct trace_seq *s = &iter->seq;
3527 	struct trace_entry *entry;
3528 	struct trace_event *event;
3529 
3530 	entry = iter->ent;
3531 
3532 	if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
3533 		SEQ_PUT_FIELD(s, entry->pid);
3534 		SEQ_PUT_FIELD(s, iter->cpu);
3535 		SEQ_PUT_FIELD(s, iter->ts);
3536 		if (trace_seq_has_overflowed(s))
3537 			return TRACE_TYPE_PARTIAL_LINE;
3538 	}
3539 
3540 	event = ftrace_find_event(entry->type);
3541 	return event ? event->funcs->binary(iter, 0, event) :
3542 		TRACE_TYPE_HANDLED;
3543 }
3544 
3545 int trace_empty(struct trace_iterator *iter)
3546 {
3547 	struct ring_buffer_iter *buf_iter;
3548 	int cpu;
3549 
3550 	/* If we are looking at one CPU buffer, only check that one */
3551 	if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
3552 		cpu = iter->cpu_file;
3553 		buf_iter = trace_buffer_iter(iter, cpu);
3554 		if (buf_iter) {
3555 			if (!ring_buffer_iter_empty(buf_iter))
3556 				return 0;
3557 		} else {
3558 			if (!ring_buffer_empty_cpu(iter->trace_buffer->buffer, cpu))
3559 				return 0;
3560 		}
3561 		return 1;
3562 	}
3563 
3564 	for_each_tracing_cpu(cpu) {
3565 		buf_iter = trace_buffer_iter(iter, cpu);
3566 		if (buf_iter) {
3567 			if (!ring_buffer_iter_empty(buf_iter))
3568 				return 0;
3569 		} else {
3570 			if (!ring_buffer_empty_cpu(iter->trace_buffer->buffer, cpu))
3571 				return 0;
3572 		}
3573 	}
3574 
3575 	return 1;
3576 }
3577 
3578 /*  Called with trace_event_read_lock() held. */
3579 enum print_line_t print_trace_line(struct trace_iterator *iter)
3580 {
3581 	struct trace_array *tr = iter->tr;
3582 	unsigned long trace_flags = tr->trace_flags;
3583 	enum print_line_t ret;
3584 
3585 	if (iter->lost_events) {
3586 		trace_seq_printf(&iter->seq, "CPU:%d [LOST %lu EVENTS]\n",
3587 				 iter->cpu, iter->lost_events);
3588 		if (trace_seq_has_overflowed(&iter->seq))
3589 			return TRACE_TYPE_PARTIAL_LINE;
3590 	}
3591 
3592 	if (iter->trace && iter->trace->print_line) {
3593 		ret = iter->trace->print_line(iter);
3594 		if (ret != TRACE_TYPE_UNHANDLED)
3595 			return ret;
3596 	}
3597 
3598 	if (iter->ent->type == TRACE_BPUTS &&
3599 			trace_flags & TRACE_ITER_PRINTK &&
3600 			trace_flags & TRACE_ITER_PRINTK_MSGONLY)
3601 		return trace_print_bputs_msg_only(iter);
3602 
3603 	if (iter->ent->type == TRACE_BPRINT &&
3604 			trace_flags & TRACE_ITER_PRINTK &&
3605 			trace_flags & TRACE_ITER_PRINTK_MSGONLY)
3606 		return trace_print_bprintk_msg_only(iter);
3607 
3608 	if (iter->ent->type == TRACE_PRINT &&
3609 			trace_flags & TRACE_ITER_PRINTK &&
3610 			trace_flags & TRACE_ITER_PRINTK_MSGONLY)
3611 		return trace_print_printk_msg_only(iter);
3612 
3613 	if (trace_flags & TRACE_ITER_BIN)
3614 		return print_bin_fmt(iter);
3615 
3616 	if (trace_flags & TRACE_ITER_HEX)
3617 		return print_hex_fmt(iter);
3618 
3619 	if (trace_flags & TRACE_ITER_RAW)
3620 		return print_raw_fmt(iter);
3621 
3622 	return print_trace_fmt(iter);
3623 }
3624 
3625 void trace_latency_header(struct seq_file *m)
3626 {
3627 	struct trace_iterator *iter = m->private;
3628 	struct trace_array *tr = iter->tr;
3629 
3630 	/* print nothing if the buffers are empty */
3631 	if (trace_empty(iter))
3632 		return;
3633 
3634 	if (iter->iter_flags & TRACE_FILE_LAT_FMT)
3635 		print_trace_header(m, iter);
3636 
3637 	if (!(tr->trace_flags & TRACE_ITER_VERBOSE))
3638 		print_lat_help_header(m);
3639 }
3640 
3641 void trace_default_header(struct seq_file *m)
3642 {
3643 	struct trace_iterator *iter = m->private;
3644 	struct trace_array *tr = iter->tr;
3645 	unsigned long trace_flags = tr->trace_flags;
3646 
3647 	if (!(trace_flags & TRACE_ITER_CONTEXT_INFO))
3648 		return;
3649 
3650 	if (iter->iter_flags & TRACE_FILE_LAT_FMT) {
3651 		/* print nothing if the buffers are empty */
3652 		if (trace_empty(iter))
3653 			return;
3654 		print_trace_header(m, iter);
3655 		if (!(trace_flags & TRACE_ITER_VERBOSE))
3656 			print_lat_help_header(m);
3657 	} else {
3658 		if (!(trace_flags & TRACE_ITER_VERBOSE)) {
3659 			if (trace_flags & TRACE_ITER_IRQ_INFO)
3660 				print_func_help_header_irq(iter->trace_buffer,
3661 							   m, trace_flags);
3662 			else
3663 				print_func_help_header(iter->trace_buffer, m,
3664 						       trace_flags);
3665 		}
3666 	}
3667 }
3668 
3669 static void test_ftrace_alive(struct seq_file *m)
3670 {
3671 	if (!ftrace_is_dead())
3672 		return;
3673 	seq_puts(m, "# WARNING: FUNCTION TRACING IS CORRUPTED\n"
3674 		    "#          MAY BE MISSING FUNCTION EVENTS\n");
3675 }
3676 
3677 #ifdef CONFIG_TRACER_MAX_TRACE
3678 static void show_snapshot_main_help(struct seq_file *m)
3679 {
3680 	seq_puts(m, "# echo 0 > snapshot : Clears and frees snapshot buffer\n"
3681 		    "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
3682 		    "#                      Takes a snapshot of the main buffer.\n"
3683 		    "# echo 2 > snapshot : Clears snapshot buffer (but does not allocate or free)\n"
3684 		    "#                      (Doesn't have to be '2' works with any number that\n"
3685 		    "#                       is not a '0' or '1')\n");
3686 }
3687 
3688 static void show_snapshot_percpu_help(struct seq_file *m)
3689 {
3690 	seq_puts(m, "# echo 0 > snapshot : Invalid for per_cpu snapshot file.\n");
3691 #ifdef CONFIG_RING_BUFFER_ALLOW_SWAP
3692 	seq_puts(m, "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
3693 		    "#                      Takes a snapshot of the main buffer for this cpu.\n");
3694 #else
3695 	seq_puts(m, "# echo 1 > snapshot : Not supported with this kernel.\n"
3696 		    "#                     Must use main snapshot file to allocate.\n");
3697 #endif
3698 	seq_puts(m, "# echo 2 > snapshot : Clears this cpu's snapshot buffer (but does not allocate)\n"
3699 		    "#                      (Doesn't have to be '2' works with any number that\n"
3700 		    "#                       is not a '0' or '1')\n");
3701 }
3702 
3703 static void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter)
3704 {
3705 	if (iter->tr->allocated_snapshot)
3706 		seq_puts(m, "#\n# * Snapshot is allocated *\n#\n");
3707 	else
3708 		seq_puts(m, "#\n# * Snapshot is freed *\n#\n");
3709 
3710 	seq_puts(m, "# Snapshot commands:\n");
3711 	if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
3712 		show_snapshot_main_help(m);
3713 	else
3714 		show_snapshot_percpu_help(m);
3715 }
3716 #else
3717 /* Should never be called */
3718 static inline void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter) { }
3719 #endif
3720 
3721 static int s_show(struct seq_file *m, void *v)
3722 {
3723 	struct trace_iterator *iter = v;
3724 	int ret;
3725 
3726 	if (iter->ent == NULL) {
3727 		if (iter->tr) {
3728 			seq_printf(m, "# tracer: %s\n", iter->trace->name);
3729 			seq_puts(m, "#\n");
3730 			test_ftrace_alive(m);
3731 		}
3732 		if (iter->snapshot && trace_empty(iter))
3733 			print_snapshot_help(m, iter);
3734 		else if (iter->trace && iter->trace->print_header)
3735 			iter->trace->print_header(m);
3736 		else
3737 			trace_default_header(m);
3738 
3739 	} else if (iter->leftover) {
3740 		/*
3741 		 * If we filled the seq_file buffer earlier, we
3742 		 * want to just show it now.
3743 		 */
3744 		ret = trace_print_seq(m, &iter->seq);
3745 
3746 		/* ret should this time be zero, but you never know */
3747 		iter->leftover = ret;
3748 
3749 	} else {
3750 		print_trace_line(iter);
3751 		ret = trace_print_seq(m, &iter->seq);
3752 		/*
3753 		 * If we overflow the seq_file buffer, then it will
3754 		 * ask us for this data again at start up.
3755 		 * Use that instead.
3756 		 *  ret is 0 if seq_file write succeeded.
3757 		 *        -1 otherwise.
3758 		 */
3759 		iter->leftover = ret;
3760 	}
3761 
3762 	return 0;
3763 }
3764 
3765 /*
3766  * Should be used after trace_array_get(), trace_types_lock
3767  * ensures that i_cdev was already initialized.
3768  */
3769 static inline int tracing_get_cpu(struct inode *inode)
3770 {
3771 	if (inode->i_cdev) /* See trace_create_cpu_file() */
3772 		return (long)inode->i_cdev - 1;
3773 	return RING_BUFFER_ALL_CPUS;
3774 }
3775 
3776 static const struct seq_operations tracer_seq_ops = {
3777 	.start		= s_start,
3778 	.next		= s_next,
3779 	.stop		= s_stop,
3780 	.show		= s_show,
3781 };
3782 
3783 static struct trace_iterator *
3784 __tracing_open(struct inode *inode, struct file *file, bool snapshot)
3785 {
3786 	struct trace_array *tr = inode->i_private;
3787 	struct trace_iterator *iter;
3788 	int cpu;
3789 
3790 	if (tracing_disabled)
3791 		return ERR_PTR(-ENODEV);
3792 
3793 	iter = __seq_open_private(file, &tracer_seq_ops, sizeof(*iter));
3794 	if (!iter)
3795 		return ERR_PTR(-ENOMEM);
3796 
3797 	iter->buffer_iter = kcalloc(nr_cpu_ids, sizeof(*iter->buffer_iter),
3798 				    GFP_KERNEL);
3799 	if (!iter->buffer_iter)
3800 		goto release;
3801 
3802 	/*
3803 	 * We make a copy of the current tracer to avoid concurrent
3804 	 * changes on it while we are reading.
3805 	 */
3806 	mutex_lock(&trace_types_lock);
3807 	iter->trace = kzalloc(sizeof(*iter->trace), GFP_KERNEL);
3808 	if (!iter->trace)
3809 		goto fail;
3810 
3811 	*iter->trace = *tr->current_trace;
3812 
3813 	if (!zalloc_cpumask_var(&iter->started, GFP_KERNEL))
3814 		goto fail;
3815 
3816 	iter->tr = tr;
3817 
3818 #ifdef CONFIG_TRACER_MAX_TRACE
3819 	/* Currently only the top directory has a snapshot */
3820 	if (tr->current_trace->print_max || snapshot)
3821 		iter->trace_buffer = &tr->max_buffer;
3822 	else
3823 #endif
3824 		iter->trace_buffer = &tr->trace_buffer;
3825 	iter->snapshot = snapshot;
3826 	iter->pos = -1;
3827 	iter->cpu_file = tracing_get_cpu(inode);
3828 	mutex_init(&iter->mutex);
3829 
3830 	/* Notify the tracer early; before we stop tracing. */
3831 	if (iter->trace && iter->trace->open)
3832 		iter->trace->open(iter);
3833 
3834 	/* Annotate start of buffers if we had overruns */
3835 	if (ring_buffer_overruns(iter->trace_buffer->buffer))
3836 		iter->iter_flags |= TRACE_FILE_ANNOTATE;
3837 
3838 	/* Output in nanoseconds only if we are using a clock in nanoseconds. */
3839 	if (trace_clocks[tr->clock_id].in_ns)
3840 		iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
3841 
3842 	/* stop the trace while dumping if we are not opening "snapshot" */
3843 	if (!iter->snapshot)
3844 		tracing_stop_tr(tr);
3845 
3846 	if (iter->cpu_file == RING_BUFFER_ALL_CPUS) {
3847 		for_each_tracing_cpu(cpu) {
3848 			iter->buffer_iter[cpu] =
3849 				ring_buffer_read_prepare(iter->trace_buffer->buffer, cpu);
3850 		}
3851 		ring_buffer_read_prepare_sync();
3852 		for_each_tracing_cpu(cpu) {
3853 			ring_buffer_read_start(iter->buffer_iter[cpu]);
3854 			tracing_iter_reset(iter, cpu);
3855 		}
3856 	} else {
3857 		cpu = iter->cpu_file;
3858 		iter->buffer_iter[cpu] =
3859 			ring_buffer_read_prepare(iter->trace_buffer->buffer, cpu);
3860 		ring_buffer_read_prepare_sync();
3861 		ring_buffer_read_start(iter->buffer_iter[cpu]);
3862 		tracing_iter_reset(iter, cpu);
3863 	}
3864 
3865 	mutex_unlock(&trace_types_lock);
3866 
3867 	return iter;
3868 
3869  fail:
3870 	mutex_unlock(&trace_types_lock);
3871 	kfree(iter->trace);
3872 	kfree(iter->buffer_iter);
3873 release:
3874 	seq_release_private(inode, file);
3875 	return ERR_PTR(-ENOMEM);
3876 }
3877 
3878 int tracing_open_generic(struct inode *inode, struct file *filp)
3879 {
3880 	if (tracing_disabled)
3881 		return -ENODEV;
3882 
3883 	filp->private_data = inode->i_private;
3884 	return 0;
3885 }
3886 
3887 bool tracing_is_disabled(void)
3888 {
3889 	return (tracing_disabled) ? true: false;
3890 }
3891 
3892 /*
3893  * Open and update trace_array ref count.
3894  * Must have the current trace_array passed to it.
3895  */
3896 static int tracing_open_generic_tr(struct inode *inode, struct file *filp)
3897 {
3898 	struct trace_array *tr = inode->i_private;
3899 
3900 	if (tracing_disabled)
3901 		return -ENODEV;
3902 
3903 	if (trace_array_get(tr) < 0)
3904 		return -ENODEV;
3905 
3906 	filp->private_data = inode->i_private;
3907 
3908 	return 0;
3909 }
3910 
3911 static int tracing_release(struct inode *inode, struct file *file)
3912 {
3913 	struct trace_array *tr = inode->i_private;
3914 	struct seq_file *m = file->private_data;
3915 	struct trace_iterator *iter;
3916 	int cpu;
3917 
3918 	if (!(file->f_mode & FMODE_READ)) {
3919 		trace_array_put(tr);
3920 		return 0;
3921 	}
3922 
3923 	/* Writes do not use seq_file */
3924 	iter = m->private;
3925 	mutex_lock(&trace_types_lock);
3926 
3927 	for_each_tracing_cpu(cpu) {
3928 		if (iter->buffer_iter[cpu])
3929 			ring_buffer_read_finish(iter->buffer_iter[cpu]);
3930 	}
3931 
3932 	if (iter->trace && iter->trace->close)
3933 		iter->trace->close(iter);
3934 
3935 	if (!iter->snapshot)
3936 		/* reenable tracing if it was previously enabled */
3937 		tracing_start_tr(tr);
3938 
3939 	__trace_array_put(tr);
3940 
3941 	mutex_unlock(&trace_types_lock);
3942 
3943 	mutex_destroy(&iter->mutex);
3944 	free_cpumask_var(iter->started);
3945 	kfree(iter->trace);
3946 	kfree(iter->buffer_iter);
3947 	seq_release_private(inode, file);
3948 
3949 	return 0;
3950 }
3951 
3952 static int tracing_release_generic_tr(struct inode *inode, struct file *file)
3953 {
3954 	struct trace_array *tr = inode->i_private;
3955 
3956 	trace_array_put(tr);
3957 	return 0;
3958 }
3959 
3960 static int tracing_single_release_tr(struct inode *inode, struct file *file)
3961 {
3962 	struct trace_array *tr = inode->i_private;
3963 
3964 	trace_array_put(tr);
3965 
3966 	return single_release(inode, file);
3967 }
3968 
3969 static int tracing_open(struct inode *inode, struct file *file)
3970 {
3971 	struct trace_array *tr = inode->i_private;
3972 	struct trace_iterator *iter;
3973 	int ret = 0;
3974 
3975 	if (trace_array_get(tr) < 0)
3976 		return -ENODEV;
3977 
3978 	/* If this file was open for write, then erase contents */
3979 	if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC)) {
3980 		int cpu = tracing_get_cpu(inode);
3981 
3982 		if (cpu == RING_BUFFER_ALL_CPUS)
3983 			tracing_reset_online_cpus(&tr->trace_buffer);
3984 		else
3985 			tracing_reset(&tr->trace_buffer, cpu);
3986 	}
3987 
3988 	if (file->f_mode & FMODE_READ) {
3989 		iter = __tracing_open(inode, file, false);
3990 		if (IS_ERR(iter))
3991 			ret = PTR_ERR(iter);
3992 		else if (tr->trace_flags & TRACE_ITER_LATENCY_FMT)
3993 			iter->iter_flags |= TRACE_FILE_LAT_FMT;
3994 	}
3995 
3996 	if (ret < 0)
3997 		trace_array_put(tr);
3998 
3999 	return ret;
4000 }
4001 
4002 /*
4003  * Some tracers are not suitable for instance buffers.
4004  * A tracer is always available for the global array (toplevel)
4005  * or if it explicitly states that it is.
4006  */
4007 static bool
4008 trace_ok_for_array(struct tracer *t, struct trace_array *tr)
4009 {
4010 	return (tr->flags & TRACE_ARRAY_FL_GLOBAL) || t->allow_instances;
4011 }
4012 
4013 /* Find the next tracer that this trace array may use */
4014 static struct tracer *
4015 get_tracer_for_array(struct trace_array *tr, struct tracer *t)
4016 {
4017 	while (t && !trace_ok_for_array(t, tr))
4018 		t = t->next;
4019 
4020 	return t;
4021 }
4022 
4023 static void *
4024 t_next(struct seq_file *m, void *v, loff_t *pos)
4025 {
4026 	struct trace_array *tr = m->private;
4027 	struct tracer *t = v;
4028 
4029 	(*pos)++;
4030 
4031 	if (t)
4032 		t = get_tracer_for_array(tr, t->next);
4033 
4034 	return t;
4035 }
4036 
4037 static void *t_start(struct seq_file *m, loff_t *pos)
4038 {
4039 	struct trace_array *tr = m->private;
4040 	struct tracer *t;
4041 	loff_t l = 0;
4042 
4043 	mutex_lock(&trace_types_lock);
4044 
4045 	t = get_tracer_for_array(tr, trace_types);
4046 	for (; t && l < *pos; t = t_next(m, t, &l))
4047 			;
4048 
4049 	return t;
4050 }
4051 
4052 static void t_stop(struct seq_file *m, void *p)
4053 {
4054 	mutex_unlock(&trace_types_lock);
4055 }
4056 
4057 static int t_show(struct seq_file *m, void *v)
4058 {
4059 	struct tracer *t = v;
4060 
4061 	if (!t)
4062 		return 0;
4063 
4064 	seq_puts(m, t->name);
4065 	if (t->next)
4066 		seq_putc(m, ' ');
4067 	else
4068 		seq_putc(m, '\n');
4069 
4070 	return 0;
4071 }
4072 
4073 static const struct seq_operations show_traces_seq_ops = {
4074 	.start		= t_start,
4075 	.next		= t_next,
4076 	.stop		= t_stop,
4077 	.show		= t_show,
4078 };
4079 
4080 static int show_traces_open(struct inode *inode, struct file *file)
4081 {
4082 	struct trace_array *tr = inode->i_private;
4083 	struct seq_file *m;
4084 	int ret;
4085 
4086 	if (tracing_disabled)
4087 		return -ENODEV;
4088 
4089 	ret = seq_open(file, &show_traces_seq_ops);
4090 	if (ret)
4091 		return ret;
4092 
4093 	m = file->private_data;
4094 	m->private = tr;
4095 
4096 	return 0;
4097 }
4098 
4099 static ssize_t
4100 tracing_write_stub(struct file *filp, const char __user *ubuf,
4101 		   size_t count, loff_t *ppos)
4102 {
4103 	return count;
4104 }
4105 
4106 loff_t tracing_lseek(struct file *file, loff_t offset, int whence)
4107 {
4108 	int ret;
4109 
4110 	if (file->f_mode & FMODE_READ)
4111 		ret = seq_lseek(file, offset, whence);
4112 	else
4113 		file->f_pos = ret = 0;
4114 
4115 	return ret;
4116 }
4117 
4118 static const struct file_operations tracing_fops = {
4119 	.open		= tracing_open,
4120 	.read		= seq_read,
4121 	.write		= tracing_write_stub,
4122 	.llseek		= tracing_lseek,
4123 	.release	= tracing_release,
4124 };
4125 
4126 static const struct file_operations show_traces_fops = {
4127 	.open		= show_traces_open,
4128 	.read		= seq_read,
4129 	.release	= seq_release,
4130 	.llseek		= seq_lseek,
4131 };
4132 
4133 /*
4134  * The tracer itself will not take this lock, but still we want
4135  * to provide a consistent cpumask to user-space:
4136  */
4137 static DEFINE_MUTEX(tracing_cpumask_update_lock);
4138 
4139 /*
4140  * Temporary storage for the character representation of the
4141  * CPU bitmask (and one more byte for the newline):
4142  */
4143 static char mask_str[NR_CPUS + 1];
4144 
4145 static ssize_t
4146 tracing_cpumask_read(struct file *filp, char __user *ubuf,
4147 		     size_t count, loff_t *ppos)
4148 {
4149 	struct trace_array *tr = file_inode(filp)->i_private;
4150 	int len;
4151 
4152 	mutex_lock(&tracing_cpumask_update_lock);
4153 
4154 	len = snprintf(mask_str, count, "%*pb\n",
4155 		       cpumask_pr_args(tr->tracing_cpumask));
4156 	if (len >= count) {
4157 		count = -EINVAL;
4158 		goto out_err;
4159 	}
4160 	count = simple_read_from_buffer(ubuf, count, ppos, mask_str, NR_CPUS+1);
4161 
4162 out_err:
4163 	mutex_unlock(&tracing_cpumask_update_lock);
4164 
4165 	return count;
4166 }
4167 
4168 static ssize_t
4169 tracing_cpumask_write(struct file *filp, const char __user *ubuf,
4170 		      size_t count, loff_t *ppos)
4171 {
4172 	struct trace_array *tr = file_inode(filp)->i_private;
4173 	cpumask_var_t tracing_cpumask_new;
4174 	int err, cpu;
4175 
4176 	if (!alloc_cpumask_var(&tracing_cpumask_new, GFP_KERNEL))
4177 		return -ENOMEM;
4178 
4179 	err = cpumask_parse_user(ubuf, count, tracing_cpumask_new);
4180 	if (err)
4181 		goto err_unlock;
4182 
4183 	mutex_lock(&tracing_cpumask_update_lock);
4184 
4185 	local_irq_disable();
4186 	arch_spin_lock(&tr->max_lock);
4187 	for_each_tracing_cpu(cpu) {
4188 		/*
4189 		 * Increase/decrease the disabled counter if we are
4190 		 * about to flip a bit in the cpumask:
4191 		 */
4192 		if (cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
4193 				!cpumask_test_cpu(cpu, tracing_cpumask_new)) {
4194 			atomic_inc(&per_cpu_ptr(tr->trace_buffer.data, cpu)->disabled);
4195 			ring_buffer_record_disable_cpu(tr->trace_buffer.buffer, cpu);
4196 		}
4197 		if (!cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
4198 				cpumask_test_cpu(cpu, tracing_cpumask_new)) {
4199 			atomic_dec(&per_cpu_ptr(tr->trace_buffer.data, cpu)->disabled);
4200 			ring_buffer_record_enable_cpu(tr->trace_buffer.buffer, cpu);
4201 		}
4202 	}
4203 	arch_spin_unlock(&tr->max_lock);
4204 	local_irq_enable();
4205 
4206 	cpumask_copy(tr->tracing_cpumask, tracing_cpumask_new);
4207 
4208 	mutex_unlock(&tracing_cpumask_update_lock);
4209 	free_cpumask_var(tracing_cpumask_new);
4210 
4211 	return count;
4212 
4213 err_unlock:
4214 	free_cpumask_var(tracing_cpumask_new);
4215 
4216 	return err;
4217 }
4218 
4219 static const struct file_operations tracing_cpumask_fops = {
4220 	.open		= tracing_open_generic_tr,
4221 	.read		= tracing_cpumask_read,
4222 	.write		= tracing_cpumask_write,
4223 	.release	= tracing_release_generic_tr,
4224 	.llseek		= generic_file_llseek,
4225 };
4226 
4227 static int tracing_trace_options_show(struct seq_file *m, void *v)
4228 {
4229 	struct tracer_opt *trace_opts;
4230 	struct trace_array *tr = m->private;
4231 	u32 tracer_flags;
4232 	int i;
4233 
4234 	mutex_lock(&trace_types_lock);
4235 	tracer_flags = tr->current_trace->flags->val;
4236 	trace_opts = tr->current_trace->flags->opts;
4237 
4238 	for (i = 0; trace_options[i]; i++) {
4239 		if (tr->trace_flags & (1 << i))
4240 			seq_printf(m, "%s\n", trace_options[i]);
4241 		else
4242 			seq_printf(m, "no%s\n", trace_options[i]);
4243 	}
4244 
4245 	for (i = 0; trace_opts[i].name; i++) {
4246 		if (tracer_flags & trace_opts[i].bit)
4247 			seq_printf(m, "%s\n", trace_opts[i].name);
4248 		else
4249 			seq_printf(m, "no%s\n", trace_opts[i].name);
4250 	}
4251 	mutex_unlock(&trace_types_lock);
4252 
4253 	return 0;
4254 }
4255 
4256 static int __set_tracer_option(struct trace_array *tr,
4257 			       struct tracer_flags *tracer_flags,
4258 			       struct tracer_opt *opts, int neg)
4259 {
4260 	struct tracer *trace = tracer_flags->trace;
4261 	int ret;
4262 
4263 	ret = trace->set_flag(tr, tracer_flags->val, opts->bit, !neg);
4264 	if (ret)
4265 		return ret;
4266 
4267 	if (neg)
4268 		tracer_flags->val &= ~opts->bit;
4269 	else
4270 		tracer_flags->val |= opts->bit;
4271 	return 0;
4272 }
4273 
4274 /* Try to assign a tracer specific option */
4275 static int set_tracer_option(struct trace_array *tr, char *cmp, int neg)
4276 {
4277 	struct tracer *trace = tr->current_trace;
4278 	struct tracer_flags *tracer_flags = trace->flags;
4279 	struct tracer_opt *opts = NULL;
4280 	int i;
4281 
4282 	for (i = 0; tracer_flags->opts[i].name; i++) {
4283 		opts = &tracer_flags->opts[i];
4284 
4285 		if (strcmp(cmp, opts->name) == 0)
4286 			return __set_tracer_option(tr, trace->flags, opts, neg);
4287 	}
4288 
4289 	return -EINVAL;
4290 }
4291 
4292 /* Some tracers require overwrite to stay enabled */
4293 int trace_keep_overwrite(struct tracer *tracer, u32 mask, int set)
4294 {
4295 	if (tracer->enabled && (mask & TRACE_ITER_OVERWRITE) && !set)
4296 		return -1;
4297 
4298 	return 0;
4299 }
4300 
4301 int set_tracer_flag(struct trace_array *tr, unsigned int mask, int enabled)
4302 {
4303 	/* do nothing if flag is already set */
4304 	if (!!(tr->trace_flags & mask) == !!enabled)
4305 		return 0;
4306 
4307 	/* Give the tracer a chance to approve the change */
4308 	if (tr->current_trace->flag_changed)
4309 		if (tr->current_trace->flag_changed(tr, mask, !!enabled))
4310 			return -EINVAL;
4311 
4312 	if (enabled)
4313 		tr->trace_flags |= mask;
4314 	else
4315 		tr->trace_flags &= ~mask;
4316 
4317 	if (mask == TRACE_ITER_RECORD_CMD)
4318 		trace_event_enable_cmd_record(enabled);
4319 
4320 	if (mask == TRACE_ITER_RECORD_TGID) {
4321 		if (!tgid_map)
4322 			tgid_map = kzalloc((PID_MAX_DEFAULT + 1) * sizeof(*tgid_map),
4323 					   GFP_KERNEL);
4324 		if (!tgid_map) {
4325 			tr->trace_flags &= ~TRACE_ITER_RECORD_TGID;
4326 			return -ENOMEM;
4327 		}
4328 
4329 		trace_event_enable_tgid_record(enabled);
4330 	}
4331 
4332 	if (mask == TRACE_ITER_EVENT_FORK)
4333 		trace_event_follow_fork(tr, enabled);
4334 
4335 	if (mask == TRACE_ITER_FUNC_FORK)
4336 		ftrace_pid_follow_fork(tr, enabled);
4337 
4338 	if (mask == TRACE_ITER_OVERWRITE) {
4339 		ring_buffer_change_overwrite(tr->trace_buffer.buffer, enabled);
4340 #ifdef CONFIG_TRACER_MAX_TRACE
4341 		ring_buffer_change_overwrite(tr->max_buffer.buffer, enabled);
4342 #endif
4343 	}
4344 
4345 	if (mask == TRACE_ITER_PRINTK) {
4346 		trace_printk_start_stop_comm(enabled);
4347 		trace_printk_control(enabled);
4348 	}
4349 
4350 	return 0;
4351 }
4352 
4353 static int trace_set_options(struct trace_array *tr, char *option)
4354 {
4355 	char *cmp;
4356 	int neg = 0;
4357 	int ret = -ENODEV;
4358 	int i;
4359 	size_t orig_len = strlen(option);
4360 
4361 	cmp = strstrip(option);
4362 
4363 	if (strncmp(cmp, "no", 2) == 0) {
4364 		neg = 1;
4365 		cmp += 2;
4366 	}
4367 
4368 	mutex_lock(&trace_types_lock);
4369 
4370 	for (i = 0; trace_options[i]; i++) {
4371 		if (strcmp(cmp, trace_options[i]) == 0) {
4372 			ret = set_tracer_flag(tr, 1 << i, !neg);
4373 			break;
4374 		}
4375 	}
4376 
4377 	/* If no option could be set, test the specific tracer options */
4378 	if (!trace_options[i])
4379 		ret = set_tracer_option(tr, cmp, neg);
4380 
4381 	mutex_unlock(&trace_types_lock);
4382 
4383 	/*
4384 	 * If the first trailing whitespace is replaced with '\0' by strstrip,
4385 	 * turn it back into a space.
4386 	 */
4387 	if (orig_len > strlen(option))
4388 		option[strlen(option)] = ' ';
4389 
4390 	return ret;
4391 }
4392 
4393 static void __init apply_trace_boot_options(void)
4394 {
4395 	char *buf = trace_boot_options_buf;
4396 	char *option;
4397 
4398 	while (true) {
4399 		option = strsep(&buf, ",");
4400 
4401 		if (!option)
4402 			break;
4403 
4404 		if (*option)
4405 			trace_set_options(&global_trace, option);
4406 
4407 		/* Put back the comma to allow this to be called again */
4408 		if (buf)
4409 			*(buf - 1) = ',';
4410 	}
4411 }
4412 
4413 static ssize_t
4414 tracing_trace_options_write(struct file *filp, const char __user *ubuf,
4415 			size_t cnt, loff_t *ppos)
4416 {
4417 	struct seq_file *m = filp->private_data;
4418 	struct trace_array *tr = m->private;
4419 	char buf[64];
4420 	int ret;
4421 
4422 	if (cnt >= sizeof(buf))
4423 		return -EINVAL;
4424 
4425 	if (copy_from_user(buf, ubuf, cnt))
4426 		return -EFAULT;
4427 
4428 	buf[cnt] = 0;
4429 
4430 	ret = trace_set_options(tr, buf);
4431 	if (ret < 0)
4432 		return ret;
4433 
4434 	*ppos += cnt;
4435 
4436 	return cnt;
4437 }
4438 
4439 static int tracing_trace_options_open(struct inode *inode, struct file *file)
4440 {
4441 	struct trace_array *tr = inode->i_private;
4442 	int ret;
4443 
4444 	if (tracing_disabled)
4445 		return -ENODEV;
4446 
4447 	if (trace_array_get(tr) < 0)
4448 		return -ENODEV;
4449 
4450 	ret = single_open(file, tracing_trace_options_show, inode->i_private);
4451 	if (ret < 0)
4452 		trace_array_put(tr);
4453 
4454 	return ret;
4455 }
4456 
4457 static const struct file_operations tracing_iter_fops = {
4458 	.open		= tracing_trace_options_open,
4459 	.read		= seq_read,
4460 	.llseek		= seq_lseek,
4461 	.release	= tracing_single_release_tr,
4462 	.write		= tracing_trace_options_write,
4463 };
4464 
4465 static const char readme_msg[] =
4466 	"tracing mini-HOWTO:\n\n"
4467 	"# echo 0 > tracing_on : quick way to disable tracing\n"
4468 	"# echo 1 > tracing_on : quick way to re-enable tracing\n\n"
4469 	" Important files:\n"
4470 	"  trace\t\t\t- The static contents of the buffer\n"
4471 	"\t\t\t  To clear the buffer write into this file: echo > trace\n"
4472 	"  trace_pipe\t\t- A consuming read to see the contents of the buffer\n"
4473 	"  current_tracer\t- function and latency tracers\n"
4474 	"  available_tracers\t- list of configured tracers for current_tracer\n"
4475 	"  buffer_size_kb\t- view and modify size of per cpu buffer\n"
4476 	"  buffer_total_size_kb  - view total size of all cpu buffers\n\n"
4477 	"  trace_clock\t\t-change the clock used to order events\n"
4478 	"       local:   Per cpu clock but may not be synced across CPUs\n"
4479 	"      global:   Synced across CPUs but slows tracing down.\n"
4480 	"     counter:   Not a clock, but just an increment\n"
4481 	"      uptime:   Jiffy counter from time of boot\n"
4482 	"        perf:   Same clock that perf events use\n"
4483 #ifdef CONFIG_X86_64
4484 	"     x86-tsc:   TSC cycle counter\n"
4485 #endif
4486 	"\n  trace_marker\t\t- Writes into this file writes into the kernel buffer\n"
4487 	"\n  trace_marker_raw\t\t- Writes into this file writes binary data into the kernel buffer\n"
4488 	"  tracing_cpumask\t- Limit which CPUs to trace\n"
4489 	"  instances\t\t- Make sub-buffers with: mkdir instances/foo\n"
4490 	"\t\t\t  Remove sub-buffer with rmdir\n"
4491 	"  trace_options\t\t- Set format or modify how tracing happens\n"
4492 	"\t\t\t  Disable an option by adding a suffix 'no' to the\n"
4493 	"\t\t\t  option name\n"
4494 	"  saved_cmdlines_size\t- echo command number in here to store comm-pid list\n"
4495 #ifdef CONFIG_DYNAMIC_FTRACE
4496 	"\n  available_filter_functions - list of functions that can be filtered on\n"
4497 	"  set_ftrace_filter\t- echo function name in here to only trace these\n"
4498 	"\t\t\t  functions\n"
4499 	"\t     accepts: func_full_name or glob-matching-pattern\n"
4500 	"\t     modules: Can select a group via module\n"
4501 	"\t      Format: :mod:<module-name>\n"
4502 	"\t     example: echo :mod:ext3 > set_ftrace_filter\n"
4503 	"\t    triggers: a command to perform when function is hit\n"
4504 	"\t      Format: <function>:<trigger>[:count]\n"
4505 	"\t     trigger: traceon, traceoff\n"
4506 	"\t\t      enable_event:<system>:<event>\n"
4507 	"\t\t      disable_event:<system>:<event>\n"
4508 #ifdef CONFIG_STACKTRACE
4509 	"\t\t      stacktrace\n"
4510 #endif
4511 #ifdef CONFIG_TRACER_SNAPSHOT
4512 	"\t\t      snapshot\n"
4513 #endif
4514 	"\t\t      dump\n"
4515 	"\t\t      cpudump\n"
4516 	"\t     example: echo do_fault:traceoff > set_ftrace_filter\n"
4517 	"\t              echo do_trap:traceoff:3 > set_ftrace_filter\n"
4518 	"\t     The first one will disable tracing every time do_fault is hit\n"
4519 	"\t     The second will disable tracing at most 3 times when do_trap is hit\n"
4520 	"\t       The first time do trap is hit and it disables tracing, the\n"
4521 	"\t       counter will decrement to 2. If tracing is already disabled,\n"
4522 	"\t       the counter will not decrement. It only decrements when the\n"
4523 	"\t       trigger did work\n"
4524 	"\t     To remove trigger without count:\n"
4525 	"\t       echo '!<function>:<trigger> > set_ftrace_filter\n"
4526 	"\t     To remove trigger with a count:\n"
4527 	"\t       echo '!<function>:<trigger>:0 > set_ftrace_filter\n"
4528 	"  set_ftrace_notrace\t- echo function name in here to never trace.\n"
4529 	"\t    accepts: func_full_name, *func_end, func_begin*, *func_middle*\n"
4530 	"\t    modules: Can select a group via module command :mod:\n"
4531 	"\t    Does not accept triggers\n"
4532 #endif /* CONFIG_DYNAMIC_FTRACE */
4533 #ifdef CONFIG_FUNCTION_TRACER
4534 	"  set_ftrace_pid\t- Write pid(s) to only function trace those pids\n"
4535 	"\t\t    (function)\n"
4536 #endif
4537 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
4538 	"  set_graph_function\t- Trace the nested calls of a function (function_graph)\n"
4539 	"  set_graph_notrace\t- Do not trace the nested calls of a function (function_graph)\n"
4540 	"  max_graph_depth\t- Trace a limited depth of nested calls (0 is unlimited)\n"
4541 #endif
4542 #ifdef CONFIG_TRACER_SNAPSHOT
4543 	"\n  snapshot\t\t- Like 'trace' but shows the content of the static\n"
4544 	"\t\t\t  snapshot buffer. Read the contents for more\n"
4545 	"\t\t\t  information\n"
4546 #endif
4547 #ifdef CONFIG_STACK_TRACER
4548 	"  stack_trace\t\t- Shows the max stack trace when active\n"
4549 	"  stack_max_size\t- Shows current max stack size that was traced\n"
4550 	"\t\t\t  Write into this file to reset the max size (trigger a\n"
4551 	"\t\t\t  new trace)\n"
4552 #ifdef CONFIG_DYNAMIC_FTRACE
4553 	"  stack_trace_filter\t- Like set_ftrace_filter but limits what stack_trace\n"
4554 	"\t\t\t  traces\n"
4555 #endif
4556 #endif /* CONFIG_STACK_TRACER */
4557 #ifdef CONFIG_KPROBE_EVENTS
4558 	"  kprobe_events\t\t- Add/remove/show the kernel dynamic events\n"
4559 	"\t\t\t  Write into this file to define/undefine new trace events.\n"
4560 #endif
4561 #ifdef CONFIG_UPROBE_EVENTS
4562 	"  uprobe_events\t\t- Add/remove/show the userspace dynamic events\n"
4563 	"\t\t\t  Write into this file to define/undefine new trace events.\n"
4564 #endif
4565 #if defined(CONFIG_KPROBE_EVENTS) || defined(CONFIG_UPROBE_EVENTS)
4566 	"\t  accepts: event-definitions (one definition per line)\n"
4567 	"\t   Format: p[:[<group>/]<event>] <place> [<args>]\n"
4568 	"\t           r[maxactive][:[<group>/]<event>] <place> [<args>]\n"
4569 	"\t           -:[<group>/]<event>\n"
4570 #ifdef CONFIG_KPROBE_EVENTS
4571 	"\t    place: [<module>:]<symbol>[+<offset>]|<memaddr>\n"
4572   "place (kretprobe): [<module>:]<symbol>[+<offset>]|<memaddr>\n"
4573 #endif
4574 #ifdef CONFIG_UPROBE_EVENTS
4575 	"\t    place: <path>:<offset>\n"
4576 #endif
4577 	"\t     args: <name>=fetcharg[:type]\n"
4578 	"\t fetcharg: %<register>, @<address>, @<symbol>[+|-<offset>],\n"
4579 	"\t           $stack<index>, $stack, $retval, $comm\n"
4580 	"\t     type: s8/16/32/64, u8/16/32/64, x8/16/32/64, string,\n"
4581 	"\t           b<bit-width>@<bit-offset>/<container-size>\n"
4582 #endif
4583 	"  events/\t\t- Directory containing all trace event subsystems:\n"
4584 	"      enable\t\t- Write 0/1 to enable/disable tracing of all events\n"
4585 	"  events/<system>/\t- Directory containing all trace events for <system>:\n"
4586 	"      enable\t\t- Write 0/1 to enable/disable tracing of all <system>\n"
4587 	"\t\t\t  events\n"
4588 	"      filter\t\t- If set, only events passing filter are traced\n"
4589 	"  events/<system>/<event>/\t- Directory containing control files for\n"
4590 	"\t\t\t  <event>:\n"
4591 	"      enable\t\t- Write 0/1 to enable/disable tracing of <event>\n"
4592 	"      filter\t\t- If set, only events passing filter are traced\n"
4593 	"      trigger\t\t- If set, a command to perform when event is hit\n"
4594 	"\t    Format: <trigger>[:count][if <filter>]\n"
4595 	"\t   trigger: traceon, traceoff\n"
4596 	"\t            enable_event:<system>:<event>\n"
4597 	"\t            disable_event:<system>:<event>\n"
4598 #ifdef CONFIG_HIST_TRIGGERS
4599 	"\t            enable_hist:<system>:<event>\n"
4600 	"\t            disable_hist:<system>:<event>\n"
4601 #endif
4602 #ifdef CONFIG_STACKTRACE
4603 	"\t\t    stacktrace\n"
4604 #endif
4605 #ifdef CONFIG_TRACER_SNAPSHOT
4606 	"\t\t    snapshot\n"
4607 #endif
4608 #ifdef CONFIG_HIST_TRIGGERS
4609 	"\t\t    hist (see below)\n"
4610 #endif
4611 	"\t   example: echo traceoff > events/block/block_unplug/trigger\n"
4612 	"\t            echo traceoff:3 > events/block/block_unplug/trigger\n"
4613 	"\t            echo 'enable_event:kmem:kmalloc:3 if nr_rq > 1' > \\\n"
4614 	"\t                  events/block/block_unplug/trigger\n"
4615 	"\t   The first disables tracing every time block_unplug is hit.\n"
4616 	"\t   The second disables tracing the first 3 times block_unplug is hit.\n"
4617 	"\t   The third enables the kmalloc event the first 3 times block_unplug\n"
4618 	"\t     is hit and has value of greater than 1 for the 'nr_rq' event field.\n"
4619 	"\t   Like function triggers, the counter is only decremented if it\n"
4620 	"\t    enabled or disabled tracing.\n"
4621 	"\t   To remove a trigger without a count:\n"
4622 	"\t     echo '!<trigger> > <system>/<event>/trigger\n"
4623 	"\t   To remove a trigger with a count:\n"
4624 	"\t     echo '!<trigger>:0 > <system>/<event>/trigger\n"
4625 	"\t   Filters can be ignored when removing a trigger.\n"
4626 #ifdef CONFIG_HIST_TRIGGERS
4627 	"      hist trigger\t- If set, event hits are aggregated into a hash table\n"
4628 	"\t    Format: hist:keys=<field1[,field2,...]>\n"
4629 	"\t            [:values=<field1[,field2,...]>]\n"
4630 	"\t            [:sort=<field1[,field2,...]>]\n"
4631 	"\t            [:size=#entries]\n"
4632 	"\t            [:pause][:continue][:clear]\n"
4633 	"\t            [:name=histname1]\n"
4634 	"\t            [if <filter>]\n\n"
4635 	"\t    When a matching event is hit, an entry is added to a hash\n"
4636 	"\t    table using the key(s) and value(s) named, and the value of a\n"
4637 	"\t    sum called 'hitcount' is incremented.  Keys and values\n"
4638 	"\t    correspond to fields in the event's format description.  Keys\n"
4639 	"\t    can be any field, or the special string 'stacktrace'.\n"
4640 	"\t    Compound keys consisting of up to two fields can be specified\n"
4641 	"\t    by the 'keys' keyword.  Values must correspond to numeric\n"
4642 	"\t    fields.  Sort keys consisting of up to two fields can be\n"
4643 	"\t    specified using the 'sort' keyword.  The sort direction can\n"
4644 	"\t    be modified by appending '.descending' or '.ascending' to a\n"
4645 	"\t    sort field.  The 'size' parameter can be used to specify more\n"
4646 	"\t    or fewer than the default 2048 entries for the hashtable size.\n"
4647 	"\t    If a hist trigger is given a name using the 'name' parameter,\n"
4648 	"\t    its histogram data will be shared with other triggers of the\n"
4649 	"\t    same name, and trigger hits will update this common data.\n\n"
4650 	"\t    Reading the 'hist' file for the event will dump the hash\n"
4651 	"\t    table in its entirety to stdout.  If there are multiple hist\n"
4652 	"\t    triggers attached to an event, there will be a table for each\n"
4653 	"\t    trigger in the output.  The table displayed for a named\n"
4654 	"\t    trigger will be the same as any other instance having the\n"
4655 	"\t    same name.  The default format used to display a given field\n"
4656 	"\t    can be modified by appending any of the following modifiers\n"
4657 	"\t    to the field name, as applicable:\n\n"
4658 	"\t            .hex        display a number as a hex value\n"
4659 	"\t            .sym        display an address as a symbol\n"
4660 	"\t            .sym-offset display an address as a symbol and offset\n"
4661 	"\t            .execname   display a common_pid as a program name\n"
4662 	"\t            .syscall    display a syscall id as a syscall name\n\n"
4663 	"\t            .log2       display log2 value rather than raw number\n\n"
4664 	"\t    The 'pause' parameter can be used to pause an existing hist\n"
4665 	"\t    trigger or to start a hist trigger but not log any events\n"
4666 	"\t    until told to do so.  'continue' can be used to start or\n"
4667 	"\t    restart a paused hist trigger.\n\n"
4668 	"\t    The 'clear' parameter will clear the contents of a running\n"
4669 	"\t    hist trigger and leave its current paused/active state\n"
4670 	"\t    unchanged.\n\n"
4671 	"\t    The enable_hist and disable_hist triggers can be used to\n"
4672 	"\t    have one event conditionally start and stop another event's\n"
4673 	"\t    already-attached hist trigger.  The syntax is analagous to\n"
4674 	"\t    the enable_event and disable_event triggers.\n"
4675 #endif
4676 ;
4677 
4678 static ssize_t
4679 tracing_readme_read(struct file *filp, char __user *ubuf,
4680 		       size_t cnt, loff_t *ppos)
4681 {
4682 	return simple_read_from_buffer(ubuf, cnt, ppos,
4683 					readme_msg, strlen(readme_msg));
4684 }
4685 
4686 static const struct file_operations tracing_readme_fops = {
4687 	.open		= tracing_open_generic,
4688 	.read		= tracing_readme_read,
4689 	.llseek		= generic_file_llseek,
4690 };
4691 
4692 static void *saved_cmdlines_next(struct seq_file *m, void *v, loff_t *pos)
4693 {
4694 	unsigned int *ptr = v;
4695 
4696 	if (*pos || m->count)
4697 		ptr++;
4698 
4699 	(*pos)++;
4700 
4701 	for (; ptr < &savedcmd->map_cmdline_to_pid[savedcmd->cmdline_num];
4702 	     ptr++) {
4703 		if (*ptr == -1 || *ptr == NO_CMDLINE_MAP)
4704 			continue;
4705 
4706 		return ptr;
4707 	}
4708 
4709 	return NULL;
4710 }
4711 
4712 static void *saved_cmdlines_start(struct seq_file *m, loff_t *pos)
4713 {
4714 	void *v;
4715 	loff_t l = 0;
4716 
4717 	preempt_disable();
4718 	arch_spin_lock(&trace_cmdline_lock);
4719 
4720 	v = &savedcmd->map_cmdline_to_pid[0];
4721 	while (l <= *pos) {
4722 		v = saved_cmdlines_next(m, v, &l);
4723 		if (!v)
4724 			return NULL;
4725 	}
4726 
4727 	return v;
4728 }
4729 
4730 static void saved_cmdlines_stop(struct seq_file *m, void *v)
4731 {
4732 	arch_spin_unlock(&trace_cmdline_lock);
4733 	preempt_enable();
4734 }
4735 
4736 static int saved_cmdlines_show(struct seq_file *m, void *v)
4737 {
4738 	char buf[TASK_COMM_LEN];
4739 	unsigned int *pid = v;
4740 
4741 	__trace_find_cmdline(*pid, buf);
4742 	seq_printf(m, "%d %s\n", *pid, buf);
4743 	return 0;
4744 }
4745 
4746 static const struct seq_operations tracing_saved_cmdlines_seq_ops = {
4747 	.start		= saved_cmdlines_start,
4748 	.next		= saved_cmdlines_next,
4749 	.stop		= saved_cmdlines_stop,
4750 	.show		= saved_cmdlines_show,
4751 };
4752 
4753 static int tracing_saved_cmdlines_open(struct inode *inode, struct file *filp)
4754 {
4755 	if (tracing_disabled)
4756 		return -ENODEV;
4757 
4758 	return seq_open(filp, &tracing_saved_cmdlines_seq_ops);
4759 }
4760 
4761 static const struct file_operations tracing_saved_cmdlines_fops = {
4762 	.open		= tracing_saved_cmdlines_open,
4763 	.read		= seq_read,
4764 	.llseek		= seq_lseek,
4765 	.release	= seq_release,
4766 };
4767 
4768 static ssize_t
4769 tracing_saved_cmdlines_size_read(struct file *filp, char __user *ubuf,
4770 				 size_t cnt, loff_t *ppos)
4771 {
4772 	char buf[64];
4773 	int r;
4774 
4775 	arch_spin_lock(&trace_cmdline_lock);
4776 	r = scnprintf(buf, sizeof(buf), "%u\n", savedcmd->cmdline_num);
4777 	arch_spin_unlock(&trace_cmdline_lock);
4778 
4779 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
4780 }
4781 
4782 static void free_saved_cmdlines_buffer(struct saved_cmdlines_buffer *s)
4783 {
4784 	kfree(s->saved_cmdlines);
4785 	kfree(s->map_cmdline_to_pid);
4786 	kfree(s);
4787 }
4788 
4789 static int tracing_resize_saved_cmdlines(unsigned int val)
4790 {
4791 	struct saved_cmdlines_buffer *s, *savedcmd_temp;
4792 
4793 	s = kmalloc(sizeof(*s), GFP_KERNEL);
4794 	if (!s)
4795 		return -ENOMEM;
4796 
4797 	if (allocate_cmdlines_buffer(val, s) < 0) {
4798 		kfree(s);
4799 		return -ENOMEM;
4800 	}
4801 
4802 	arch_spin_lock(&trace_cmdline_lock);
4803 	savedcmd_temp = savedcmd;
4804 	savedcmd = s;
4805 	arch_spin_unlock(&trace_cmdline_lock);
4806 	free_saved_cmdlines_buffer(savedcmd_temp);
4807 
4808 	return 0;
4809 }
4810 
4811 static ssize_t
4812 tracing_saved_cmdlines_size_write(struct file *filp, const char __user *ubuf,
4813 				  size_t cnt, loff_t *ppos)
4814 {
4815 	unsigned long val;
4816 	int ret;
4817 
4818 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
4819 	if (ret)
4820 		return ret;
4821 
4822 	/* must have at least 1 entry or less than PID_MAX_DEFAULT */
4823 	if (!val || val > PID_MAX_DEFAULT)
4824 		return -EINVAL;
4825 
4826 	ret = tracing_resize_saved_cmdlines((unsigned int)val);
4827 	if (ret < 0)
4828 		return ret;
4829 
4830 	*ppos += cnt;
4831 
4832 	return cnt;
4833 }
4834 
4835 static const struct file_operations tracing_saved_cmdlines_size_fops = {
4836 	.open		= tracing_open_generic,
4837 	.read		= tracing_saved_cmdlines_size_read,
4838 	.write		= tracing_saved_cmdlines_size_write,
4839 };
4840 
4841 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
4842 static union trace_eval_map_item *
4843 update_eval_map(union trace_eval_map_item *ptr)
4844 {
4845 	if (!ptr->map.eval_string) {
4846 		if (ptr->tail.next) {
4847 			ptr = ptr->tail.next;
4848 			/* Set ptr to the next real item (skip head) */
4849 			ptr++;
4850 		} else
4851 			return NULL;
4852 	}
4853 	return ptr;
4854 }
4855 
4856 static void *eval_map_next(struct seq_file *m, void *v, loff_t *pos)
4857 {
4858 	union trace_eval_map_item *ptr = v;
4859 
4860 	/*
4861 	 * Paranoid! If ptr points to end, we don't want to increment past it.
4862 	 * This really should never happen.
4863 	 */
4864 	ptr = update_eval_map(ptr);
4865 	if (WARN_ON_ONCE(!ptr))
4866 		return NULL;
4867 
4868 	ptr++;
4869 
4870 	(*pos)++;
4871 
4872 	ptr = update_eval_map(ptr);
4873 
4874 	return ptr;
4875 }
4876 
4877 static void *eval_map_start(struct seq_file *m, loff_t *pos)
4878 {
4879 	union trace_eval_map_item *v;
4880 	loff_t l = 0;
4881 
4882 	mutex_lock(&trace_eval_mutex);
4883 
4884 	v = trace_eval_maps;
4885 	if (v)
4886 		v++;
4887 
4888 	while (v && l < *pos) {
4889 		v = eval_map_next(m, v, &l);
4890 	}
4891 
4892 	return v;
4893 }
4894 
4895 static void eval_map_stop(struct seq_file *m, void *v)
4896 {
4897 	mutex_unlock(&trace_eval_mutex);
4898 }
4899 
4900 static int eval_map_show(struct seq_file *m, void *v)
4901 {
4902 	union trace_eval_map_item *ptr = v;
4903 
4904 	seq_printf(m, "%s %ld (%s)\n",
4905 		   ptr->map.eval_string, ptr->map.eval_value,
4906 		   ptr->map.system);
4907 
4908 	return 0;
4909 }
4910 
4911 static const struct seq_operations tracing_eval_map_seq_ops = {
4912 	.start		= eval_map_start,
4913 	.next		= eval_map_next,
4914 	.stop		= eval_map_stop,
4915 	.show		= eval_map_show,
4916 };
4917 
4918 static int tracing_eval_map_open(struct inode *inode, struct file *filp)
4919 {
4920 	if (tracing_disabled)
4921 		return -ENODEV;
4922 
4923 	return seq_open(filp, &tracing_eval_map_seq_ops);
4924 }
4925 
4926 static const struct file_operations tracing_eval_map_fops = {
4927 	.open		= tracing_eval_map_open,
4928 	.read		= seq_read,
4929 	.llseek		= seq_lseek,
4930 	.release	= seq_release,
4931 };
4932 
4933 static inline union trace_eval_map_item *
4934 trace_eval_jmp_to_tail(union trace_eval_map_item *ptr)
4935 {
4936 	/* Return tail of array given the head */
4937 	return ptr + ptr->head.length + 1;
4938 }
4939 
4940 static void
4941 trace_insert_eval_map_file(struct module *mod, struct trace_eval_map **start,
4942 			   int len)
4943 {
4944 	struct trace_eval_map **stop;
4945 	struct trace_eval_map **map;
4946 	union trace_eval_map_item *map_array;
4947 	union trace_eval_map_item *ptr;
4948 
4949 	stop = start + len;
4950 
4951 	/*
4952 	 * The trace_eval_maps contains the map plus a head and tail item,
4953 	 * where the head holds the module and length of array, and the
4954 	 * tail holds a pointer to the next list.
4955 	 */
4956 	map_array = kmalloc(sizeof(*map_array) * (len + 2), GFP_KERNEL);
4957 	if (!map_array) {
4958 		pr_warn("Unable to allocate trace eval mapping\n");
4959 		return;
4960 	}
4961 
4962 	mutex_lock(&trace_eval_mutex);
4963 
4964 	if (!trace_eval_maps)
4965 		trace_eval_maps = map_array;
4966 	else {
4967 		ptr = trace_eval_maps;
4968 		for (;;) {
4969 			ptr = trace_eval_jmp_to_tail(ptr);
4970 			if (!ptr->tail.next)
4971 				break;
4972 			ptr = ptr->tail.next;
4973 
4974 		}
4975 		ptr->tail.next = map_array;
4976 	}
4977 	map_array->head.mod = mod;
4978 	map_array->head.length = len;
4979 	map_array++;
4980 
4981 	for (map = start; (unsigned long)map < (unsigned long)stop; map++) {
4982 		map_array->map = **map;
4983 		map_array++;
4984 	}
4985 	memset(map_array, 0, sizeof(*map_array));
4986 
4987 	mutex_unlock(&trace_eval_mutex);
4988 }
4989 
4990 static void trace_create_eval_file(struct dentry *d_tracer)
4991 {
4992 	trace_create_file("eval_map", 0444, d_tracer,
4993 			  NULL, &tracing_eval_map_fops);
4994 }
4995 
4996 #else /* CONFIG_TRACE_EVAL_MAP_FILE */
4997 static inline void trace_create_eval_file(struct dentry *d_tracer) { }
4998 static inline void trace_insert_eval_map_file(struct module *mod,
4999 			      struct trace_eval_map **start, int len) { }
5000 #endif /* !CONFIG_TRACE_EVAL_MAP_FILE */
5001 
5002 static void trace_insert_eval_map(struct module *mod,
5003 				  struct trace_eval_map **start, int len)
5004 {
5005 	struct trace_eval_map **map;
5006 
5007 	if (len <= 0)
5008 		return;
5009 
5010 	map = start;
5011 
5012 	trace_event_eval_update(map, len);
5013 
5014 	trace_insert_eval_map_file(mod, start, len);
5015 }
5016 
5017 static ssize_t
5018 tracing_set_trace_read(struct file *filp, char __user *ubuf,
5019 		       size_t cnt, loff_t *ppos)
5020 {
5021 	struct trace_array *tr = filp->private_data;
5022 	char buf[MAX_TRACER_SIZE+2];
5023 	int r;
5024 
5025 	mutex_lock(&trace_types_lock);
5026 	r = sprintf(buf, "%s\n", tr->current_trace->name);
5027 	mutex_unlock(&trace_types_lock);
5028 
5029 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
5030 }
5031 
5032 int tracer_init(struct tracer *t, struct trace_array *tr)
5033 {
5034 	tracing_reset_online_cpus(&tr->trace_buffer);
5035 	return t->init(tr);
5036 }
5037 
5038 static void set_buffer_entries(struct trace_buffer *buf, unsigned long val)
5039 {
5040 	int cpu;
5041 
5042 	for_each_tracing_cpu(cpu)
5043 		per_cpu_ptr(buf->data, cpu)->entries = val;
5044 }
5045 
5046 #ifdef CONFIG_TRACER_MAX_TRACE
5047 /* resize @tr's buffer to the size of @size_tr's entries */
5048 static int resize_buffer_duplicate_size(struct trace_buffer *trace_buf,
5049 					struct trace_buffer *size_buf, int cpu_id)
5050 {
5051 	int cpu, ret = 0;
5052 
5053 	if (cpu_id == RING_BUFFER_ALL_CPUS) {
5054 		for_each_tracing_cpu(cpu) {
5055 			ret = ring_buffer_resize(trace_buf->buffer,
5056 				 per_cpu_ptr(size_buf->data, cpu)->entries, cpu);
5057 			if (ret < 0)
5058 				break;
5059 			per_cpu_ptr(trace_buf->data, cpu)->entries =
5060 				per_cpu_ptr(size_buf->data, cpu)->entries;
5061 		}
5062 	} else {
5063 		ret = ring_buffer_resize(trace_buf->buffer,
5064 				 per_cpu_ptr(size_buf->data, cpu_id)->entries, cpu_id);
5065 		if (ret == 0)
5066 			per_cpu_ptr(trace_buf->data, cpu_id)->entries =
5067 				per_cpu_ptr(size_buf->data, cpu_id)->entries;
5068 	}
5069 
5070 	return ret;
5071 }
5072 #endif /* CONFIG_TRACER_MAX_TRACE */
5073 
5074 static int __tracing_resize_ring_buffer(struct trace_array *tr,
5075 					unsigned long size, int cpu)
5076 {
5077 	int ret;
5078 
5079 	/*
5080 	 * If kernel or user changes the size of the ring buffer
5081 	 * we use the size that was given, and we can forget about
5082 	 * expanding it later.
5083 	 */
5084 	ring_buffer_expanded = true;
5085 
5086 	/* May be called before buffers are initialized */
5087 	if (!tr->trace_buffer.buffer)
5088 		return 0;
5089 
5090 	ret = ring_buffer_resize(tr->trace_buffer.buffer, size, cpu);
5091 	if (ret < 0)
5092 		return ret;
5093 
5094 #ifdef CONFIG_TRACER_MAX_TRACE
5095 	if (!(tr->flags & TRACE_ARRAY_FL_GLOBAL) ||
5096 	    !tr->current_trace->use_max_tr)
5097 		goto out;
5098 
5099 	ret = ring_buffer_resize(tr->max_buffer.buffer, size, cpu);
5100 	if (ret < 0) {
5101 		int r = resize_buffer_duplicate_size(&tr->trace_buffer,
5102 						     &tr->trace_buffer, cpu);
5103 		if (r < 0) {
5104 			/*
5105 			 * AARGH! We are left with different
5106 			 * size max buffer!!!!
5107 			 * The max buffer is our "snapshot" buffer.
5108 			 * When a tracer needs a snapshot (one of the
5109 			 * latency tracers), it swaps the max buffer
5110 			 * with the saved snap shot. We succeeded to
5111 			 * update the size of the main buffer, but failed to
5112 			 * update the size of the max buffer. But when we tried
5113 			 * to reset the main buffer to the original size, we
5114 			 * failed there too. This is very unlikely to
5115 			 * happen, but if it does, warn and kill all
5116 			 * tracing.
5117 			 */
5118 			WARN_ON(1);
5119 			tracing_disabled = 1;
5120 		}
5121 		return ret;
5122 	}
5123 
5124 	if (cpu == RING_BUFFER_ALL_CPUS)
5125 		set_buffer_entries(&tr->max_buffer, size);
5126 	else
5127 		per_cpu_ptr(tr->max_buffer.data, cpu)->entries = size;
5128 
5129  out:
5130 #endif /* CONFIG_TRACER_MAX_TRACE */
5131 
5132 	if (cpu == RING_BUFFER_ALL_CPUS)
5133 		set_buffer_entries(&tr->trace_buffer, size);
5134 	else
5135 		per_cpu_ptr(tr->trace_buffer.data, cpu)->entries = size;
5136 
5137 	return ret;
5138 }
5139 
5140 static ssize_t tracing_resize_ring_buffer(struct trace_array *tr,
5141 					  unsigned long size, int cpu_id)
5142 {
5143 	int ret = size;
5144 
5145 	mutex_lock(&trace_types_lock);
5146 
5147 	if (cpu_id != RING_BUFFER_ALL_CPUS) {
5148 		/* make sure, this cpu is enabled in the mask */
5149 		if (!cpumask_test_cpu(cpu_id, tracing_buffer_mask)) {
5150 			ret = -EINVAL;
5151 			goto out;
5152 		}
5153 	}
5154 
5155 	ret = __tracing_resize_ring_buffer(tr, size, cpu_id);
5156 	if (ret < 0)
5157 		ret = -ENOMEM;
5158 
5159 out:
5160 	mutex_unlock(&trace_types_lock);
5161 
5162 	return ret;
5163 }
5164 
5165 
5166 /**
5167  * tracing_update_buffers - used by tracing facility to expand ring buffers
5168  *
5169  * To save on memory when the tracing is never used on a system with it
5170  * configured in. The ring buffers are set to a minimum size. But once
5171  * a user starts to use the tracing facility, then they need to grow
5172  * to their default size.
5173  *
5174  * This function is to be called when a tracer is about to be used.
5175  */
5176 int tracing_update_buffers(void)
5177 {
5178 	int ret = 0;
5179 
5180 	mutex_lock(&trace_types_lock);
5181 	if (!ring_buffer_expanded)
5182 		ret = __tracing_resize_ring_buffer(&global_trace, trace_buf_size,
5183 						RING_BUFFER_ALL_CPUS);
5184 	mutex_unlock(&trace_types_lock);
5185 
5186 	return ret;
5187 }
5188 
5189 struct trace_option_dentry;
5190 
5191 static void
5192 create_trace_option_files(struct trace_array *tr, struct tracer *tracer);
5193 
5194 /*
5195  * Used to clear out the tracer before deletion of an instance.
5196  * Must have trace_types_lock held.
5197  */
5198 static void tracing_set_nop(struct trace_array *tr)
5199 {
5200 	if (tr->current_trace == &nop_trace)
5201 		return;
5202 
5203 	tr->current_trace->enabled--;
5204 
5205 	if (tr->current_trace->reset)
5206 		tr->current_trace->reset(tr);
5207 
5208 	tr->current_trace = &nop_trace;
5209 }
5210 
5211 static void add_tracer_options(struct trace_array *tr, struct tracer *t)
5212 {
5213 	/* Only enable if the directory has been created already. */
5214 	if (!tr->dir)
5215 		return;
5216 
5217 	create_trace_option_files(tr, t);
5218 }
5219 
5220 static int tracing_set_tracer(struct trace_array *tr, const char *buf)
5221 {
5222 	struct tracer *t;
5223 #ifdef CONFIG_TRACER_MAX_TRACE
5224 	bool had_max_tr;
5225 #endif
5226 	int ret = 0;
5227 
5228 	mutex_lock(&trace_types_lock);
5229 
5230 	if (!ring_buffer_expanded) {
5231 		ret = __tracing_resize_ring_buffer(tr, trace_buf_size,
5232 						RING_BUFFER_ALL_CPUS);
5233 		if (ret < 0)
5234 			goto out;
5235 		ret = 0;
5236 	}
5237 
5238 	for (t = trace_types; t; t = t->next) {
5239 		if (strcmp(t->name, buf) == 0)
5240 			break;
5241 	}
5242 	if (!t) {
5243 		ret = -EINVAL;
5244 		goto out;
5245 	}
5246 	if (t == tr->current_trace)
5247 		goto out;
5248 
5249 	/* Some tracers are only allowed for the top level buffer */
5250 	if (!trace_ok_for_array(t, tr)) {
5251 		ret = -EINVAL;
5252 		goto out;
5253 	}
5254 
5255 	/* If trace pipe files are being read, we can't change the tracer */
5256 	if (tr->current_trace->ref) {
5257 		ret = -EBUSY;
5258 		goto out;
5259 	}
5260 
5261 	trace_branch_disable();
5262 
5263 	tr->current_trace->enabled--;
5264 
5265 	if (tr->current_trace->reset)
5266 		tr->current_trace->reset(tr);
5267 
5268 	/* Current trace needs to be nop_trace before synchronize_sched */
5269 	tr->current_trace = &nop_trace;
5270 
5271 #ifdef CONFIG_TRACER_MAX_TRACE
5272 	had_max_tr = tr->allocated_snapshot;
5273 
5274 	if (had_max_tr && !t->use_max_tr) {
5275 		/*
5276 		 * We need to make sure that the update_max_tr sees that
5277 		 * current_trace changed to nop_trace to keep it from
5278 		 * swapping the buffers after we resize it.
5279 		 * The update_max_tr is called from interrupts disabled
5280 		 * so a synchronized_sched() is sufficient.
5281 		 */
5282 		synchronize_sched();
5283 		free_snapshot(tr);
5284 	}
5285 #endif
5286 
5287 #ifdef CONFIG_TRACER_MAX_TRACE
5288 	if (t->use_max_tr && !had_max_tr) {
5289 		ret = alloc_snapshot(tr);
5290 		if (ret < 0)
5291 			goto out;
5292 	}
5293 #endif
5294 
5295 	if (t->init) {
5296 		ret = tracer_init(t, tr);
5297 		if (ret)
5298 			goto out;
5299 	}
5300 
5301 	tr->current_trace = t;
5302 	tr->current_trace->enabled++;
5303 	trace_branch_enable(tr);
5304  out:
5305 	mutex_unlock(&trace_types_lock);
5306 
5307 	return ret;
5308 }
5309 
5310 static ssize_t
5311 tracing_set_trace_write(struct file *filp, const char __user *ubuf,
5312 			size_t cnt, loff_t *ppos)
5313 {
5314 	struct trace_array *tr = filp->private_data;
5315 	char buf[MAX_TRACER_SIZE+1];
5316 	int i;
5317 	size_t ret;
5318 	int err;
5319 
5320 	ret = cnt;
5321 
5322 	if (cnt > MAX_TRACER_SIZE)
5323 		cnt = MAX_TRACER_SIZE;
5324 
5325 	if (copy_from_user(buf, ubuf, cnt))
5326 		return -EFAULT;
5327 
5328 	buf[cnt] = 0;
5329 
5330 	/* strip ending whitespace. */
5331 	for (i = cnt - 1; i > 0 && isspace(buf[i]); i--)
5332 		buf[i] = 0;
5333 
5334 	err = tracing_set_tracer(tr, buf);
5335 	if (err)
5336 		return err;
5337 
5338 	*ppos += ret;
5339 
5340 	return ret;
5341 }
5342 
5343 static ssize_t
5344 tracing_nsecs_read(unsigned long *ptr, char __user *ubuf,
5345 		   size_t cnt, loff_t *ppos)
5346 {
5347 	char buf[64];
5348 	int r;
5349 
5350 	r = snprintf(buf, sizeof(buf), "%ld\n",
5351 		     *ptr == (unsigned long)-1 ? -1 : nsecs_to_usecs(*ptr));
5352 	if (r > sizeof(buf))
5353 		r = sizeof(buf);
5354 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
5355 }
5356 
5357 static ssize_t
5358 tracing_nsecs_write(unsigned long *ptr, const char __user *ubuf,
5359 		    size_t cnt, loff_t *ppos)
5360 {
5361 	unsigned long val;
5362 	int ret;
5363 
5364 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
5365 	if (ret)
5366 		return ret;
5367 
5368 	*ptr = val * 1000;
5369 
5370 	return cnt;
5371 }
5372 
5373 static ssize_t
5374 tracing_thresh_read(struct file *filp, char __user *ubuf,
5375 		    size_t cnt, loff_t *ppos)
5376 {
5377 	return tracing_nsecs_read(&tracing_thresh, ubuf, cnt, ppos);
5378 }
5379 
5380 static ssize_t
5381 tracing_thresh_write(struct file *filp, const char __user *ubuf,
5382 		     size_t cnt, loff_t *ppos)
5383 {
5384 	struct trace_array *tr = filp->private_data;
5385 	int ret;
5386 
5387 	mutex_lock(&trace_types_lock);
5388 	ret = tracing_nsecs_write(&tracing_thresh, ubuf, cnt, ppos);
5389 	if (ret < 0)
5390 		goto out;
5391 
5392 	if (tr->current_trace->update_thresh) {
5393 		ret = tr->current_trace->update_thresh(tr);
5394 		if (ret < 0)
5395 			goto out;
5396 	}
5397 
5398 	ret = cnt;
5399 out:
5400 	mutex_unlock(&trace_types_lock);
5401 
5402 	return ret;
5403 }
5404 
5405 #if defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)
5406 
5407 static ssize_t
5408 tracing_max_lat_read(struct file *filp, char __user *ubuf,
5409 		     size_t cnt, loff_t *ppos)
5410 {
5411 	return tracing_nsecs_read(filp->private_data, ubuf, cnt, ppos);
5412 }
5413 
5414 static ssize_t
5415 tracing_max_lat_write(struct file *filp, const char __user *ubuf,
5416 		      size_t cnt, loff_t *ppos)
5417 {
5418 	return tracing_nsecs_write(filp->private_data, ubuf, cnt, ppos);
5419 }
5420 
5421 #endif
5422 
5423 static int tracing_open_pipe(struct inode *inode, struct file *filp)
5424 {
5425 	struct trace_array *tr = inode->i_private;
5426 	struct trace_iterator *iter;
5427 	int ret = 0;
5428 
5429 	if (tracing_disabled)
5430 		return -ENODEV;
5431 
5432 	if (trace_array_get(tr) < 0)
5433 		return -ENODEV;
5434 
5435 	mutex_lock(&trace_types_lock);
5436 
5437 	/* create a buffer to store the information to pass to userspace */
5438 	iter = kzalloc(sizeof(*iter), GFP_KERNEL);
5439 	if (!iter) {
5440 		ret = -ENOMEM;
5441 		__trace_array_put(tr);
5442 		goto out;
5443 	}
5444 
5445 	trace_seq_init(&iter->seq);
5446 	iter->trace = tr->current_trace;
5447 
5448 	if (!alloc_cpumask_var(&iter->started, GFP_KERNEL)) {
5449 		ret = -ENOMEM;
5450 		goto fail;
5451 	}
5452 
5453 	/* trace pipe does not show start of buffer */
5454 	cpumask_setall(iter->started);
5455 
5456 	if (tr->trace_flags & TRACE_ITER_LATENCY_FMT)
5457 		iter->iter_flags |= TRACE_FILE_LAT_FMT;
5458 
5459 	/* Output in nanoseconds only if we are using a clock in nanoseconds. */
5460 	if (trace_clocks[tr->clock_id].in_ns)
5461 		iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
5462 
5463 	iter->tr = tr;
5464 	iter->trace_buffer = &tr->trace_buffer;
5465 	iter->cpu_file = tracing_get_cpu(inode);
5466 	mutex_init(&iter->mutex);
5467 	filp->private_data = iter;
5468 
5469 	if (iter->trace->pipe_open)
5470 		iter->trace->pipe_open(iter);
5471 
5472 	nonseekable_open(inode, filp);
5473 
5474 	tr->current_trace->ref++;
5475 out:
5476 	mutex_unlock(&trace_types_lock);
5477 	return ret;
5478 
5479 fail:
5480 	kfree(iter->trace);
5481 	kfree(iter);
5482 	__trace_array_put(tr);
5483 	mutex_unlock(&trace_types_lock);
5484 	return ret;
5485 }
5486 
5487 static int tracing_release_pipe(struct inode *inode, struct file *file)
5488 {
5489 	struct trace_iterator *iter = file->private_data;
5490 	struct trace_array *tr = inode->i_private;
5491 
5492 	mutex_lock(&trace_types_lock);
5493 
5494 	tr->current_trace->ref--;
5495 
5496 	if (iter->trace->pipe_close)
5497 		iter->trace->pipe_close(iter);
5498 
5499 	mutex_unlock(&trace_types_lock);
5500 
5501 	free_cpumask_var(iter->started);
5502 	mutex_destroy(&iter->mutex);
5503 	kfree(iter);
5504 
5505 	trace_array_put(tr);
5506 
5507 	return 0;
5508 }
5509 
5510 static unsigned int
5511 trace_poll(struct trace_iterator *iter, struct file *filp, poll_table *poll_table)
5512 {
5513 	struct trace_array *tr = iter->tr;
5514 
5515 	/* Iterators are static, they should be filled or empty */
5516 	if (trace_buffer_iter(iter, iter->cpu_file))
5517 		return POLLIN | POLLRDNORM;
5518 
5519 	if (tr->trace_flags & TRACE_ITER_BLOCK)
5520 		/*
5521 		 * Always select as readable when in blocking mode
5522 		 */
5523 		return POLLIN | POLLRDNORM;
5524 	else
5525 		return ring_buffer_poll_wait(iter->trace_buffer->buffer, iter->cpu_file,
5526 					     filp, poll_table);
5527 }
5528 
5529 static unsigned int
5530 tracing_poll_pipe(struct file *filp, poll_table *poll_table)
5531 {
5532 	struct trace_iterator *iter = filp->private_data;
5533 
5534 	return trace_poll(iter, filp, poll_table);
5535 }
5536 
5537 /* Must be called with iter->mutex held. */
5538 static int tracing_wait_pipe(struct file *filp)
5539 {
5540 	struct trace_iterator *iter = filp->private_data;
5541 	int ret;
5542 
5543 	while (trace_empty(iter)) {
5544 
5545 		if ((filp->f_flags & O_NONBLOCK)) {
5546 			return -EAGAIN;
5547 		}
5548 
5549 		/*
5550 		 * We block until we read something and tracing is disabled.
5551 		 * We still block if tracing is disabled, but we have never
5552 		 * read anything. This allows a user to cat this file, and
5553 		 * then enable tracing. But after we have read something,
5554 		 * we give an EOF when tracing is again disabled.
5555 		 *
5556 		 * iter->pos will be 0 if we haven't read anything.
5557 		 */
5558 		if (!tracing_is_on() && iter->pos)
5559 			break;
5560 
5561 		mutex_unlock(&iter->mutex);
5562 
5563 		ret = wait_on_pipe(iter, false);
5564 
5565 		mutex_lock(&iter->mutex);
5566 
5567 		if (ret)
5568 			return ret;
5569 	}
5570 
5571 	return 1;
5572 }
5573 
5574 /*
5575  * Consumer reader.
5576  */
5577 static ssize_t
5578 tracing_read_pipe(struct file *filp, char __user *ubuf,
5579 		  size_t cnt, loff_t *ppos)
5580 {
5581 	struct trace_iterator *iter = filp->private_data;
5582 	ssize_t sret;
5583 
5584 	/*
5585 	 * Avoid more than one consumer on a single file descriptor
5586 	 * This is just a matter of traces coherency, the ring buffer itself
5587 	 * is protected.
5588 	 */
5589 	mutex_lock(&iter->mutex);
5590 
5591 	/* return any leftover data */
5592 	sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
5593 	if (sret != -EBUSY)
5594 		goto out;
5595 
5596 	trace_seq_init(&iter->seq);
5597 
5598 	if (iter->trace->read) {
5599 		sret = iter->trace->read(iter, filp, ubuf, cnt, ppos);
5600 		if (sret)
5601 			goto out;
5602 	}
5603 
5604 waitagain:
5605 	sret = tracing_wait_pipe(filp);
5606 	if (sret <= 0)
5607 		goto out;
5608 
5609 	/* stop when tracing is finished */
5610 	if (trace_empty(iter)) {
5611 		sret = 0;
5612 		goto out;
5613 	}
5614 
5615 	if (cnt >= PAGE_SIZE)
5616 		cnt = PAGE_SIZE - 1;
5617 
5618 	/* reset all but tr, trace, and overruns */
5619 	memset(&iter->seq, 0,
5620 	       sizeof(struct trace_iterator) -
5621 	       offsetof(struct trace_iterator, seq));
5622 	cpumask_clear(iter->started);
5623 	iter->pos = -1;
5624 
5625 	trace_event_read_lock();
5626 	trace_access_lock(iter->cpu_file);
5627 	while (trace_find_next_entry_inc(iter) != NULL) {
5628 		enum print_line_t ret;
5629 		int save_len = iter->seq.seq.len;
5630 
5631 		ret = print_trace_line(iter);
5632 		if (ret == TRACE_TYPE_PARTIAL_LINE) {
5633 			/* don't print partial lines */
5634 			iter->seq.seq.len = save_len;
5635 			break;
5636 		}
5637 		if (ret != TRACE_TYPE_NO_CONSUME)
5638 			trace_consume(iter);
5639 
5640 		if (trace_seq_used(&iter->seq) >= cnt)
5641 			break;
5642 
5643 		/*
5644 		 * Setting the full flag means we reached the trace_seq buffer
5645 		 * size and we should leave by partial output condition above.
5646 		 * One of the trace_seq_* functions is not used properly.
5647 		 */
5648 		WARN_ONCE(iter->seq.full, "full flag set for trace type %d",
5649 			  iter->ent->type);
5650 	}
5651 	trace_access_unlock(iter->cpu_file);
5652 	trace_event_read_unlock();
5653 
5654 	/* Now copy what we have to the user */
5655 	sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
5656 	if (iter->seq.seq.readpos >= trace_seq_used(&iter->seq))
5657 		trace_seq_init(&iter->seq);
5658 
5659 	/*
5660 	 * If there was nothing to send to user, in spite of consuming trace
5661 	 * entries, go back to wait for more entries.
5662 	 */
5663 	if (sret == -EBUSY)
5664 		goto waitagain;
5665 
5666 out:
5667 	mutex_unlock(&iter->mutex);
5668 
5669 	return sret;
5670 }
5671 
5672 static void tracing_spd_release_pipe(struct splice_pipe_desc *spd,
5673 				     unsigned int idx)
5674 {
5675 	__free_page(spd->pages[idx]);
5676 }
5677 
5678 static const struct pipe_buf_operations tracing_pipe_buf_ops = {
5679 	.can_merge		= 0,
5680 	.confirm		= generic_pipe_buf_confirm,
5681 	.release		= generic_pipe_buf_release,
5682 	.steal			= generic_pipe_buf_steal,
5683 	.get			= generic_pipe_buf_get,
5684 };
5685 
5686 static size_t
5687 tracing_fill_pipe_page(size_t rem, struct trace_iterator *iter)
5688 {
5689 	size_t count;
5690 	int save_len;
5691 	int ret;
5692 
5693 	/* Seq buffer is page-sized, exactly what we need. */
5694 	for (;;) {
5695 		save_len = iter->seq.seq.len;
5696 		ret = print_trace_line(iter);
5697 
5698 		if (trace_seq_has_overflowed(&iter->seq)) {
5699 			iter->seq.seq.len = save_len;
5700 			break;
5701 		}
5702 
5703 		/*
5704 		 * This should not be hit, because it should only
5705 		 * be set if the iter->seq overflowed. But check it
5706 		 * anyway to be safe.
5707 		 */
5708 		if (ret == TRACE_TYPE_PARTIAL_LINE) {
5709 			iter->seq.seq.len = save_len;
5710 			break;
5711 		}
5712 
5713 		count = trace_seq_used(&iter->seq) - save_len;
5714 		if (rem < count) {
5715 			rem = 0;
5716 			iter->seq.seq.len = save_len;
5717 			break;
5718 		}
5719 
5720 		if (ret != TRACE_TYPE_NO_CONSUME)
5721 			trace_consume(iter);
5722 		rem -= count;
5723 		if (!trace_find_next_entry_inc(iter))	{
5724 			rem = 0;
5725 			iter->ent = NULL;
5726 			break;
5727 		}
5728 	}
5729 
5730 	return rem;
5731 }
5732 
5733 static ssize_t tracing_splice_read_pipe(struct file *filp,
5734 					loff_t *ppos,
5735 					struct pipe_inode_info *pipe,
5736 					size_t len,
5737 					unsigned int flags)
5738 {
5739 	struct page *pages_def[PIPE_DEF_BUFFERS];
5740 	struct partial_page partial_def[PIPE_DEF_BUFFERS];
5741 	struct trace_iterator *iter = filp->private_data;
5742 	struct splice_pipe_desc spd = {
5743 		.pages		= pages_def,
5744 		.partial	= partial_def,
5745 		.nr_pages	= 0, /* This gets updated below. */
5746 		.nr_pages_max	= PIPE_DEF_BUFFERS,
5747 		.ops		= &tracing_pipe_buf_ops,
5748 		.spd_release	= tracing_spd_release_pipe,
5749 	};
5750 	ssize_t ret;
5751 	size_t rem;
5752 	unsigned int i;
5753 
5754 	if (splice_grow_spd(pipe, &spd))
5755 		return -ENOMEM;
5756 
5757 	mutex_lock(&iter->mutex);
5758 
5759 	if (iter->trace->splice_read) {
5760 		ret = iter->trace->splice_read(iter, filp,
5761 					       ppos, pipe, len, flags);
5762 		if (ret)
5763 			goto out_err;
5764 	}
5765 
5766 	ret = tracing_wait_pipe(filp);
5767 	if (ret <= 0)
5768 		goto out_err;
5769 
5770 	if (!iter->ent && !trace_find_next_entry_inc(iter)) {
5771 		ret = -EFAULT;
5772 		goto out_err;
5773 	}
5774 
5775 	trace_event_read_lock();
5776 	trace_access_lock(iter->cpu_file);
5777 
5778 	/* Fill as many pages as possible. */
5779 	for (i = 0, rem = len; i < spd.nr_pages_max && rem; i++) {
5780 		spd.pages[i] = alloc_page(GFP_KERNEL);
5781 		if (!spd.pages[i])
5782 			break;
5783 
5784 		rem = tracing_fill_pipe_page(rem, iter);
5785 
5786 		/* Copy the data into the page, so we can start over. */
5787 		ret = trace_seq_to_buffer(&iter->seq,
5788 					  page_address(spd.pages[i]),
5789 					  trace_seq_used(&iter->seq));
5790 		if (ret < 0) {
5791 			__free_page(spd.pages[i]);
5792 			break;
5793 		}
5794 		spd.partial[i].offset = 0;
5795 		spd.partial[i].len = trace_seq_used(&iter->seq);
5796 
5797 		trace_seq_init(&iter->seq);
5798 	}
5799 
5800 	trace_access_unlock(iter->cpu_file);
5801 	trace_event_read_unlock();
5802 	mutex_unlock(&iter->mutex);
5803 
5804 	spd.nr_pages = i;
5805 
5806 	if (i)
5807 		ret = splice_to_pipe(pipe, &spd);
5808 	else
5809 		ret = 0;
5810 out:
5811 	splice_shrink_spd(&spd);
5812 	return ret;
5813 
5814 out_err:
5815 	mutex_unlock(&iter->mutex);
5816 	goto out;
5817 }
5818 
5819 static ssize_t
5820 tracing_entries_read(struct file *filp, char __user *ubuf,
5821 		     size_t cnt, loff_t *ppos)
5822 {
5823 	struct inode *inode = file_inode(filp);
5824 	struct trace_array *tr = inode->i_private;
5825 	int cpu = tracing_get_cpu(inode);
5826 	char buf[64];
5827 	int r = 0;
5828 	ssize_t ret;
5829 
5830 	mutex_lock(&trace_types_lock);
5831 
5832 	if (cpu == RING_BUFFER_ALL_CPUS) {
5833 		int cpu, buf_size_same;
5834 		unsigned long size;
5835 
5836 		size = 0;
5837 		buf_size_same = 1;
5838 		/* check if all cpu sizes are same */
5839 		for_each_tracing_cpu(cpu) {
5840 			/* fill in the size from first enabled cpu */
5841 			if (size == 0)
5842 				size = per_cpu_ptr(tr->trace_buffer.data, cpu)->entries;
5843 			if (size != per_cpu_ptr(tr->trace_buffer.data, cpu)->entries) {
5844 				buf_size_same = 0;
5845 				break;
5846 			}
5847 		}
5848 
5849 		if (buf_size_same) {
5850 			if (!ring_buffer_expanded)
5851 				r = sprintf(buf, "%lu (expanded: %lu)\n",
5852 					    size >> 10,
5853 					    trace_buf_size >> 10);
5854 			else
5855 				r = sprintf(buf, "%lu\n", size >> 10);
5856 		} else
5857 			r = sprintf(buf, "X\n");
5858 	} else
5859 		r = sprintf(buf, "%lu\n", per_cpu_ptr(tr->trace_buffer.data, cpu)->entries >> 10);
5860 
5861 	mutex_unlock(&trace_types_lock);
5862 
5863 	ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
5864 	return ret;
5865 }
5866 
5867 static ssize_t
5868 tracing_entries_write(struct file *filp, const char __user *ubuf,
5869 		      size_t cnt, loff_t *ppos)
5870 {
5871 	struct inode *inode = file_inode(filp);
5872 	struct trace_array *tr = inode->i_private;
5873 	unsigned long val;
5874 	int ret;
5875 
5876 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
5877 	if (ret)
5878 		return ret;
5879 
5880 	/* must have at least 1 entry */
5881 	if (!val)
5882 		return -EINVAL;
5883 
5884 	/* value is in KB */
5885 	val <<= 10;
5886 	ret = tracing_resize_ring_buffer(tr, val, tracing_get_cpu(inode));
5887 	if (ret < 0)
5888 		return ret;
5889 
5890 	*ppos += cnt;
5891 
5892 	return cnt;
5893 }
5894 
5895 static ssize_t
5896 tracing_total_entries_read(struct file *filp, char __user *ubuf,
5897 				size_t cnt, loff_t *ppos)
5898 {
5899 	struct trace_array *tr = filp->private_data;
5900 	char buf[64];
5901 	int r, cpu;
5902 	unsigned long size = 0, expanded_size = 0;
5903 
5904 	mutex_lock(&trace_types_lock);
5905 	for_each_tracing_cpu(cpu) {
5906 		size += per_cpu_ptr(tr->trace_buffer.data, cpu)->entries >> 10;
5907 		if (!ring_buffer_expanded)
5908 			expanded_size += trace_buf_size >> 10;
5909 	}
5910 	if (ring_buffer_expanded)
5911 		r = sprintf(buf, "%lu\n", size);
5912 	else
5913 		r = sprintf(buf, "%lu (expanded: %lu)\n", size, expanded_size);
5914 	mutex_unlock(&trace_types_lock);
5915 
5916 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
5917 }
5918 
5919 static ssize_t
5920 tracing_free_buffer_write(struct file *filp, const char __user *ubuf,
5921 			  size_t cnt, loff_t *ppos)
5922 {
5923 	/*
5924 	 * There is no need to read what the user has written, this function
5925 	 * is just to make sure that there is no error when "echo" is used
5926 	 */
5927 
5928 	*ppos += cnt;
5929 
5930 	return cnt;
5931 }
5932 
5933 static int
5934 tracing_free_buffer_release(struct inode *inode, struct file *filp)
5935 {
5936 	struct trace_array *tr = inode->i_private;
5937 
5938 	/* disable tracing ? */
5939 	if (tr->trace_flags & TRACE_ITER_STOP_ON_FREE)
5940 		tracer_tracing_off(tr);
5941 	/* resize the ring buffer to 0 */
5942 	tracing_resize_ring_buffer(tr, 0, RING_BUFFER_ALL_CPUS);
5943 
5944 	trace_array_put(tr);
5945 
5946 	return 0;
5947 }
5948 
5949 static ssize_t
5950 tracing_mark_write(struct file *filp, const char __user *ubuf,
5951 					size_t cnt, loff_t *fpos)
5952 {
5953 	struct trace_array *tr = filp->private_data;
5954 	struct ring_buffer_event *event;
5955 	struct ring_buffer *buffer;
5956 	struct print_entry *entry;
5957 	unsigned long irq_flags;
5958 	const char faulted[] = "<faulted>";
5959 	ssize_t written;
5960 	int size;
5961 	int len;
5962 
5963 /* Used in tracing_mark_raw_write() as well */
5964 #define FAULTED_SIZE (sizeof(faulted) - 1) /* '\0' is already accounted for */
5965 
5966 	if (tracing_disabled)
5967 		return -EINVAL;
5968 
5969 	if (!(tr->trace_flags & TRACE_ITER_MARKERS))
5970 		return -EINVAL;
5971 
5972 	if (cnt > TRACE_BUF_SIZE)
5973 		cnt = TRACE_BUF_SIZE;
5974 
5975 	BUILD_BUG_ON(TRACE_BUF_SIZE >= PAGE_SIZE);
5976 
5977 	local_save_flags(irq_flags);
5978 	size = sizeof(*entry) + cnt + 2; /* add '\0' and possible '\n' */
5979 
5980 	/* If less than "<faulted>", then make sure we can still add that */
5981 	if (cnt < FAULTED_SIZE)
5982 		size += FAULTED_SIZE - cnt;
5983 
5984 	buffer = tr->trace_buffer.buffer;
5985 	event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
5986 					    irq_flags, preempt_count());
5987 	if (unlikely(!event))
5988 		/* Ring buffer disabled, return as if not open for write */
5989 		return -EBADF;
5990 
5991 	entry = ring_buffer_event_data(event);
5992 	entry->ip = _THIS_IP_;
5993 
5994 	len = __copy_from_user_inatomic(&entry->buf, ubuf, cnt);
5995 	if (len) {
5996 		memcpy(&entry->buf, faulted, FAULTED_SIZE);
5997 		cnt = FAULTED_SIZE;
5998 		written = -EFAULT;
5999 	} else
6000 		written = cnt;
6001 	len = cnt;
6002 
6003 	if (entry->buf[cnt - 1] != '\n') {
6004 		entry->buf[cnt] = '\n';
6005 		entry->buf[cnt + 1] = '\0';
6006 	} else
6007 		entry->buf[cnt] = '\0';
6008 
6009 	__buffer_unlock_commit(buffer, event);
6010 
6011 	if (written > 0)
6012 		*fpos += written;
6013 
6014 	return written;
6015 }
6016 
6017 /* Limit it for now to 3K (including tag) */
6018 #define RAW_DATA_MAX_SIZE (1024*3)
6019 
6020 static ssize_t
6021 tracing_mark_raw_write(struct file *filp, const char __user *ubuf,
6022 					size_t cnt, loff_t *fpos)
6023 {
6024 	struct trace_array *tr = filp->private_data;
6025 	struct ring_buffer_event *event;
6026 	struct ring_buffer *buffer;
6027 	struct raw_data_entry *entry;
6028 	const char faulted[] = "<faulted>";
6029 	unsigned long irq_flags;
6030 	ssize_t written;
6031 	int size;
6032 	int len;
6033 
6034 #define FAULT_SIZE_ID (FAULTED_SIZE + sizeof(int))
6035 
6036 	if (tracing_disabled)
6037 		return -EINVAL;
6038 
6039 	if (!(tr->trace_flags & TRACE_ITER_MARKERS))
6040 		return -EINVAL;
6041 
6042 	/* The marker must at least have a tag id */
6043 	if (cnt < sizeof(unsigned int) || cnt > RAW_DATA_MAX_SIZE)
6044 		return -EINVAL;
6045 
6046 	if (cnt > TRACE_BUF_SIZE)
6047 		cnt = TRACE_BUF_SIZE;
6048 
6049 	BUILD_BUG_ON(TRACE_BUF_SIZE >= PAGE_SIZE);
6050 
6051 	local_save_flags(irq_flags);
6052 	size = sizeof(*entry) + cnt;
6053 	if (cnt < FAULT_SIZE_ID)
6054 		size += FAULT_SIZE_ID - cnt;
6055 
6056 	buffer = tr->trace_buffer.buffer;
6057 	event = __trace_buffer_lock_reserve(buffer, TRACE_RAW_DATA, size,
6058 					    irq_flags, preempt_count());
6059 	if (!event)
6060 		/* Ring buffer disabled, return as if not open for write */
6061 		return -EBADF;
6062 
6063 	entry = ring_buffer_event_data(event);
6064 
6065 	len = __copy_from_user_inatomic(&entry->id, ubuf, cnt);
6066 	if (len) {
6067 		entry->id = -1;
6068 		memcpy(&entry->buf, faulted, FAULTED_SIZE);
6069 		written = -EFAULT;
6070 	} else
6071 		written = cnt;
6072 
6073 	__buffer_unlock_commit(buffer, event);
6074 
6075 	if (written > 0)
6076 		*fpos += written;
6077 
6078 	return written;
6079 }
6080 
6081 static int tracing_clock_show(struct seq_file *m, void *v)
6082 {
6083 	struct trace_array *tr = m->private;
6084 	int i;
6085 
6086 	for (i = 0; i < ARRAY_SIZE(trace_clocks); i++)
6087 		seq_printf(m,
6088 			"%s%s%s%s", i ? " " : "",
6089 			i == tr->clock_id ? "[" : "", trace_clocks[i].name,
6090 			i == tr->clock_id ? "]" : "");
6091 	seq_putc(m, '\n');
6092 
6093 	return 0;
6094 }
6095 
6096 static int tracing_set_clock(struct trace_array *tr, const char *clockstr)
6097 {
6098 	int i;
6099 
6100 	for (i = 0; i < ARRAY_SIZE(trace_clocks); i++) {
6101 		if (strcmp(trace_clocks[i].name, clockstr) == 0)
6102 			break;
6103 	}
6104 	if (i == ARRAY_SIZE(trace_clocks))
6105 		return -EINVAL;
6106 
6107 	mutex_lock(&trace_types_lock);
6108 
6109 	tr->clock_id = i;
6110 
6111 	ring_buffer_set_clock(tr->trace_buffer.buffer, trace_clocks[i].func);
6112 
6113 	/*
6114 	 * New clock may not be consistent with the previous clock.
6115 	 * Reset the buffer so that it doesn't have incomparable timestamps.
6116 	 */
6117 	tracing_reset_online_cpus(&tr->trace_buffer);
6118 
6119 #ifdef CONFIG_TRACER_MAX_TRACE
6120 	if (tr->flags & TRACE_ARRAY_FL_GLOBAL && tr->max_buffer.buffer)
6121 		ring_buffer_set_clock(tr->max_buffer.buffer, trace_clocks[i].func);
6122 	tracing_reset_online_cpus(&tr->max_buffer);
6123 #endif
6124 
6125 	mutex_unlock(&trace_types_lock);
6126 
6127 	return 0;
6128 }
6129 
6130 static ssize_t tracing_clock_write(struct file *filp, const char __user *ubuf,
6131 				   size_t cnt, loff_t *fpos)
6132 {
6133 	struct seq_file *m = filp->private_data;
6134 	struct trace_array *tr = m->private;
6135 	char buf[64];
6136 	const char *clockstr;
6137 	int ret;
6138 
6139 	if (cnt >= sizeof(buf))
6140 		return -EINVAL;
6141 
6142 	if (copy_from_user(buf, ubuf, cnt))
6143 		return -EFAULT;
6144 
6145 	buf[cnt] = 0;
6146 
6147 	clockstr = strstrip(buf);
6148 
6149 	ret = tracing_set_clock(tr, clockstr);
6150 	if (ret)
6151 		return ret;
6152 
6153 	*fpos += cnt;
6154 
6155 	return cnt;
6156 }
6157 
6158 static int tracing_clock_open(struct inode *inode, struct file *file)
6159 {
6160 	struct trace_array *tr = inode->i_private;
6161 	int ret;
6162 
6163 	if (tracing_disabled)
6164 		return -ENODEV;
6165 
6166 	if (trace_array_get(tr))
6167 		return -ENODEV;
6168 
6169 	ret = single_open(file, tracing_clock_show, inode->i_private);
6170 	if (ret < 0)
6171 		trace_array_put(tr);
6172 
6173 	return ret;
6174 }
6175 
6176 struct ftrace_buffer_info {
6177 	struct trace_iterator	iter;
6178 	void			*spare;
6179 	unsigned int		spare_cpu;
6180 	unsigned int		read;
6181 };
6182 
6183 #ifdef CONFIG_TRACER_SNAPSHOT
6184 static int tracing_snapshot_open(struct inode *inode, struct file *file)
6185 {
6186 	struct trace_array *tr = inode->i_private;
6187 	struct trace_iterator *iter;
6188 	struct seq_file *m;
6189 	int ret = 0;
6190 
6191 	if (trace_array_get(tr) < 0)
6192 		return -ENODEV;
6193 
6194 	if (file->f_mode & FMODE_READ) {
6195 		iter = __tracing_open(inode, file, true);
6196 		if (IS_ERR(iter))
6197 			ret = PTR_ERR(iter);
6198 	} else {
6199 		/* Writes still need the seq_file to hold the private data */
6200 		ret = -ENOMEM;
6201 		m = kzalloc(sizeof(*m), GFP_KERNEL);
6202 		if (!m)
6203 			goto out;
6204 		iter = kzalloc(sizeof(*iter), GFP_KERNEL);
6205 		if (!iter) {
6206 			kfree(m);
6207 			goto out;
6208 		}
6209 		ret = 0;
6210 
6211 		iter->tr = tr;
6212 		iter->trace_buffer = &tr->max_buffer;
6213 		iter->cpu_file = tracing_get_cpu(inode);
6214 		m->private = iter;
6215 		file->private_data = m;
6216 	}
6217 out:
6218 	if (ret < 0)
6219 		trace_array_put(tr);
6220 
6221 	return ret;
6222 }
6223 
6224 static ssize_t
6225 tracing_snapshot_write(struct file *filp, const char __user *ubuf, size_t cnt,
6226 		       loff_t *ppos)
6227 {
6228 	struct seq_file *m = filp->private_data;
6229 	struct trace_iterator *iter = m->private;
6230 	struct trace_array *tr = iter->tr;
6231 	unsigned long val;
6232 	int ret;
6233 
6234 	ret = tracing_update_buffers();
6235 	if (ret < 0)
6236 		return ret;
6237 
6238 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
6239 	if (ret)
6240 		return ret;
6241 
6242 	mutex_lock(&trace_types_lock);
6243 
6244 	if (tr->current_trace->use_max_tr) {
6245 		ret = -EBUSY;
6246 		goto out;
6247 	}
6248 
6249 	switch (val) {
6250 	case 0:
6251 		if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
6252 			ret = -EINVAL;
6253 			break;
6254 		}
6255 		if (tr->allocated_snapshot)
6256 			free_snapshot(tr);
6257 		break;
6258 	case 1:
6259 /* Only allow per-cpu swap if the ring buffer supports it */
6260 #ifndef CONFIG_RING_BUFFER_ALLOW_SWAP
6261 		if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
6262 			ret = -EINVAL;
6263 			break;
6264 		}
6265 #endif
6266 		if (!tr->allocated_snapshot) {
6267 			ret = alloc_snapshot(tr);
6268 			if (ret < 0)
6269 				break;
6270 		}
6271 		local_irq_disable();
6272 		/* Now, we're going to swap */
6273 		if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
6274 			update_max_tr(tr, current, smp_processor_id());
6275 		else
6276 			update_max_tr_single(tr, current, iter->cpu_file);
6277 		local_irq_enable();
6278 		break;
6279 	default:
6280 		if (tr->allocated_snapshot) {
6281 			if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
6282 				tracing_reset_online_cpus(&tr->max_buffer);
6283 			else
6284 				tracing_reset(&tr->max_buffer, iter->cpu_file);
6285 		}
6286 		break;
6287 	}
6288 
6289 	if (ret >= 0) {
6290 		*ppos += cnt;
6291 		ret = cnt;
6292 	}
6293 out:
6294 	mutex_unlock(&trace_types_lock);
6295 	return ret;
6296 }
6297 
6298 static int tracing_snapshot_release(struct inode *inode, struct file *file)
6299 {
6300 	struct seq_file *m = file->private_data;
6301 	int ret;
6302 
6303 	ret = tracing_release(inode, file);
6304 
6305 	if (file->f_mode & FMODE_READ)
6306 		return ret;
6307 
6308 	/* If write only, the seq_file is just a stub */
6309 	if (m)
6310 		kfree(m->private);
6311 	kfree(m);
6312 
6313 	return 0;
6314 }
6315 
6316 static int tracing_buffers_open(struct inode *inode, struct file *filp);
6317 static ssize_t tracing_buffers_read(struct file *filp, char __user *ubuf,
6318 				    size_t count, loff_t *ppos);
6319 static int tracing_buffers_release(struct inode *inode, struct file *file);
6320 static ssize_t tracing_buffers_splice_read(struct file *file, loff_t *ppos,
6321 		   struct pipe_inode_info *pipe, size_t len, unsigned int flags);
6322 
6323 static int snapshot_raw_open(struct inode *inode, struct file *filp)
6324 {
6325 	struct ftrace_buffer_info *info;
6326 	int ret;
6327 
6328 	ret = tracing_buffers_open(inode, filp);
6329 	if (ret < 0)
6330 		return ret;
6331 
6332 	info = filp->private_data;
6333 
6334 	if (info->iter.trace->use_max_tr) {
6335 		tracing_buffers_release(inode, filp);
6336 		return -EBUSY;
6337 	}
6338 
6339 	info->iter.snapshot = true;
6340 	info->iter.trace_buffer = &info->iter.tr->max_buffer;
6341 
6342 	return ret;
6343 }
6344 
6345 #endif /* CONFIG_TRACER_SNAPSHOT */
6346 
6347 
6348 static const struct file_operations tracing_thresh_fops = {
6349 	.open		= tracing_open_generic,
6350 	.read		= tracing_thresh_read,
6351 	.write		= tracing_thresh_write,
6352 	.llseek		= generic_file_llseek,
6353 };
6354 
6355 #if defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)
6356 static const struct file_operations tracing_max_lat_fops = {
6357 	.open		= tracing_open_generic,
6358 	.read		= tracing_max_lat_read,
6359 	.write		= tracing_max_lat_write,
6360 	.llseek		= generic_file_llseek,
6361 };
6362 #endif
6363 
6364 static const struct file_operations set_tracer_fops = {
6365 	.open		= tracing_open_generic,
6366 	.read		= tracing_set_trace_read,
6367 	.write		= tracing_set_trace_write,
6368 	.llseek		= generic_file_llseek,
6369 };
6370 
6371 static const struct file_operations tracing_pipe_fops = {
6372 	.open		= tracing_open_pipe,
6373 	.poll		= tracing_poll_pipe,
6374 	.read		= tracing_read_pipe,
6375 	.splice_read	= tracing_splice_read_pipe,
6376 	.release	= tracing_release_pipe,
6377 	.llseek		= no_llseek,
6378 };
6379 
6380 static const struct file_operations tracing_entries_fops = {
6381 	.open		= tracing_open_generic_tr,
6382 	.read		= tracing_entries_read,
6383 	.write		= tracing_entries_write,
6384 	.llseek		= generic_file_llseek,
6385 	.release	= tracing_release_generic_tr,
6386 };
6387 
6388 static const struct file_operations tracing_total_entries_fops = {
6389 	.open		= tracing_open_generic_tr,
6390 	.read		= tracing_total_entries_read,
6391 	.llseek		= generic_file_llseek,
6392 	.release	= tracing_release_generic_tr,
6393 };
6394 
6395 static const struct file_operations tracing_free_buffer_fops = {
6396 	.open		= tracing_open_generic_tr,
6397 	.write		= tracing_free_buffer_write,
6398 	.release	= tracing_free_buffer_release,
6399 };
6400 
6401 static const struct file_operations tracing_mark_fops = {
6402 	.open		= tracing_open_generic_tr,
6403 	.write		= tracing_mark_write,
6404 	.llseek		= generic_file_llseek,
6405 	.release	= tracing_release_generic_tr,
6406 };
6407 
6408 static const struct file_operations tracing_mark_raw_fops = {
6409 	.open		= tracing_open_generic_tr,
6410 	.write		= tracing_mark_raw_write,
6411 	.llseek		= generic_file_llseek,
6412 	.release	= tracing_release_generic_tr,
6413 };
6414 
6415 static const struct file_operations trace_clock_fops = {
6416 	.open		= tracing_clock_open,
6417 	.read		= seq_read,
6418 	.llseek		= seq_lseek,
6419 	.release	= tracing_single_release_tr,
6420 	.write		= tracing_clock_write,
6421 };
6422 
6423 #ifdef CONFIG_TRACER_SNAPSHOT
6424 static const struct file_operations snapshot_fops = {
6425 	.open		= tracing_snapshot_open,
6426 	.read		= seq_read,
6427 	.write		= tracing_snapshot_write,
6428 	.llseek		= tracing_lseek,
6429 	.release	= tracing_snapshot_release,
6430 };
6431 
6432 static const struct file_operations snapshot_raw_fops = {
6433 	.open		= snapshot_raw_open,
6434 	.read		= tracing_buffers_read,
6435 	.release	= tracing_buffers_release,
6436 	.splice_read	= tracing_buffers_splice_read,
6437 	.llseek		= no_llseek,
6438 };
6439 
6440 #endif /* CONFIG_TRACER_SNAPSHOT */
6441 
6442 static int tracing_buffers_open(struct inode *inode, struct file *filp)
6443 {
6444 	struct trace_array *tr = inode->i_private;
6445 	struct ftrace_buffer_info *info;
6446 	int ret;
6447 
6448 	if (tracing_disabled)
6449 		return -ENODEV;
6450 
6451 	if (trace_array_get(tr) < 0)
6452 		return -ENODEV;
6453 
6454 	info = kzalloc(sizeof(*info), GFP_KERNEL);
6455 	if (!info) {
6456 		trace_array_put(tr);
6457 		return -ENOMEM;
6458 	}
6459 
6460 	mutex_lock(&trace_types_lock);
6461 
6462 	info->iter.tr		= tr;
6463 	info->iter.cpu_file	= tracing_get_cpu(inode);
6464 	info->iter.trace	= tr->current_trace;
6465 	info->iter.trace_buffer = &tr->trace_buffer;
6466 	info->spare		= NULL;
6467 	/* Force reading ring buffer for first read */
6468 	info->read		= (unsigned int)-1;
6469 
6470 	filp->private_data = info;
6471 
6472 	tr->current_trace->ref++;
6473 
6474 	mutex_unlock(&trace_types_lock);
6475 
6476 	ret = nonseekable_open(inode, filp);
6477 	if (ret < 0)
6478 		trace_array_put(tr);
6479 
6480 	return ret;
6481 }
6482 
6483 static unsigned int
6484 tracing_buffers_poll(struct file *filp, poll_table *poll_table)
6485 {
6486 	struct ftrace_buffer_info *info = filp->private_data;
6487 	struct trace_iterator *iter = &info->iter;
6488 
6489 	return trace_poll(iter, filp, poll_table);
6490 }
6491 
6492 static ssize_t
6493 tracing_buffers_read(struct file *filp, char __user *ubuf,
6494 		     size_t count, loff_t *ppos)
6495 {
6496 	struct ftrace_buffer_info *info = filp->private_data;
6497 	struct trace_iterator *iter = &info->iter;
6498 	ssize_t ret;
6499 	ssize_t size;
6500 
6501 	if (!count)
6502 		return 0;
6503 
6504 #ifdef CONFIG_TRACER_MAX_TRACE
6505 	if (iter->snapshot && iter->tr->current_trace->use_max_tr)
6506 		return -EBUSY;
6507 #endif
6508 
6509 	if (!info->spare) {
6510 		info->spare = ring_buffer_alloc_read_page(iter->trace_buffer->buffer,
6511 							  iter->cpu_file);
6512 		info->spare_cpu = iter->cpu_file;
6513 	}
6514 	if (!info->spare)
6515 		return -ENOMEM;
6516 
6517 	/* Do we have previous read data to read? */
6518 	if (info->read < PAGE_SIZE)
6519 		goto read;
6520 
6521  again:
6522 	trace_access_lock(iter->cpu_file);
6523 	ret = ring_buffer_read_page(iter->trace_buffer->buffer,
6524 				    &info->spare,
6525 				    count,
6526 				    iter->cpu_file, 0);
6527 	trace_access_unlock(iter->cpu_file);
6528 
6529 	if (ret < 0) {
6530 		if (trace_empty(iter)) {
6531 			if ((filp->f_flags & O_NONBLOCK))
6532 				return -EAGAIN;
6533 
6534 			ret = wait_on_pipe(iter, false);
6535 			if (ret)
6536 				return ret;
6537 
6538 			goto again;
6539 		}
6540 		return 0;
6541 	}
6542 
6543 	info->read = 0;
6544  read:
6545 	size = PAGE_SIZE - info->read;
6546 	if (size > count)
6547 		size = count;
6548 
6549 	ret = copy_to_user(ubuf, info->spare + info->read, size);
6550 	if (ret == size)
6551 		return -EFAULT;
6552 
6553 	size -= ret;
6554 
6555 	*ppos += size;
6556 	info->read += size;
6557 
6558 	return size;
6559 }
6560 
6561 static int tracing_buffers_release(struct inode *inode, struct file *file)
6562 {
6563 	struct ftrace_buffer_info *info = file->private_data;
6564 	struct trace_iterator *iter = &info->iter;
6565 
6566 	mutex_lock(&trace_types_lock);
6567 
6568 	iter->tr->current_trace->ref--;
6569 
6570 	__trace_array_put(iter->tr);
6571 
6572 	if (info->spare)
6573 		ring_buffer_free_read_page(iter->trace_buffer->buffer,
6574 					   info->spare_cpu, info->spare);
6575 	kfree(info);
6576 
6577 	mutex_unlock(&trace_types_lock);
6578 
6579 	return 0;
6580 }
6581 
6582 struct buffer_ref {
6583 	struct ring_buffer	*buffer;
6584 	void			*page;
6585 	int			cpu;
6586 	int			ref;
6587 };
6588 
6589 static void buffer_pipe_buf_release(struct pipe_inode_info *pipe,
6590 				    struct pipe_buffer *buf)
6591 {
6592 	struct buffer_ref *ref = (struct buffer_ref *)buf->private;
6593 
6594 	if (--ref->ref)
6595 		return;
6596 
6597 	ring_buffer_free_read_page(ref->buffer, ref->cpu, ref->page);
6598 	kfree(ref);
6599 	buf->private = 0;
6600 }
6601 
6602 static void buffer_pipe_buf_get(struct pipe_inode_info *pipe,
6603 				struct pipe_buffer *buf)
6604 {
6605 	struct buffer_ref *ref = (struct buffer_ref *)buf->private;
6606 
6607 	ref->ref++;
6608 }
6609 
6610 /* Pipe buffer operations for a buffer. */
6611 static const struct pipe_buf_operations buffer_pipe_buf_ops = {
6612 	.can_merge		= 0,
6613 	.confirm		= generic_pipe_buf_confirm,
6614 	.release		= buffer_pipe_buf_release,
6615 	.steal			= generic_pipe_buf_steal,
6616 	.get			= buffer_pipe_buf_get,
6617 };
6618 
6619 /*
6620  * Callback from splice_to_pipe(), if we need to release some pages
6621  * at the end of the spd in case we error'ed out in filling the pipe.
6622  */
6623 static void buffer_spd_release(struct splice_pipe_desc *spd, unsigned int i)
6624 {
6625 	struct buffer_ref *ref =
6626 		(struct buffer_ref *)spd->partial[i].private;
6627 
6628 	if (--ref->ref)
6629 		return;
6630 
6631 	ring_buffer_free_read_page(ref->buffer, ref->cpu, ref->page);
6632 	kfree(ref);
6633 	spd->partial[i].private = 0;
6634 }
6635 
6636 static ssize_t
6637 tracing_buffers_splice_read(struct file *file, loff_t *ppos,
6638 			    struct pipe_inode_info *pipe, size_t len,
6639 			    unsigned int flags)
6640 {
6641 	struct ftrace_buffer_info *info = file->private_data;
6642 	struct trace_iterator *iter = &info->iter;
6643 	struct partial_page partial_def[PIPE_DEF_BUFFERS];
6644 	struct page *pages_def[PIPE_DEF_BUFFERS];
6645 	struct splice_pipe_desc spd = {
6646 		.pages		= pages_def,
6647 		.partial	= partial_def,
6648 		.nr_pages_max	= PIPE_DEF_BUFFERS,
6649 		.ops		= &buffer_pipe_buf_ops,
6650 		.spd_release	= buffer_spd_release,
6651 	};
6652 	struct buffer_ref *ref;
6653 	int entries, size, i;
6654 	ssize_t ret = 0;
6655 
6656 #ifdef CONFIG_TRACER_MAX_TRACE
6657 	if (iter->snapshot && iter->tr->current_trace->use_max_tr)
6658 		return -EBUSY;
6659 #endif
6660 
6661 	if (*ppos & (PAGE_SIZE - 1))
6662 		return -EINVAL;
6663 
6664 	if (len & (PAGE_SIZE - 1)) {
6665 		if (len < PAGE_SIZE)
6666 			return -EINVAL;
6667 		len &= PAGE_MASK;
6668 	}
6669 
6670 	if (splice_grow_spd(pipe, &spd))
6671 		return -ENOMEM;
6672 
6673  again:
6674 	trace_access_lock(iter->cpu_file);
6675 	entries = ring_buffer_entries_cpu(iter->trace_buffer->buffer, iter->cpu_file);
6676 
6677 	for (i = 0; i < spd.nr_pages_max && len && entries; i++, len -= PAGE_SIZE) {
6678 		struct page *page;
6679 		int r;
6680 
6681 		ref = kzalloc(sizeof(*ref), GFP_KERNEL);
6682 		if (!ref) {
6683 			ret = -ENOMEM;
6684 			break;
6685 		}
6686 
6687 		ref->ref = 1;
6688 		ref->buffer = iter->trace_buffer->buffer;
6689 		ref->page = ring_buffer_alloc_read_page(ref->buffer, iter->cpu_file);
6690 		if (!ref->page) {
6691 			ret = -ENOMEM;
6692 			kfree(ref);
6693 			break;
6694 		}
6695 		ref->cpu = iter->cpu_file;
6696 
6697 		r = ring_buffer_read_page(ref->buffer, &ref->page,
6698 					  len, iter->cpu_file, 1);
6699 		if (r < 0) {
6700 			ring_buffer_free_read_page(ref->buffer, ref->cpu,
6701 						   ref->page);
6702 			kfree(ref);
6703 			break;
6704 		}
6705 
6706 		/*
6707 		 * zero out any left over data, this is going to
6708 		 * user land.
6709 		 */
6710 		size = ring_buffer_page_len(ref->page);
6711 		if (size < PAGE_SIZE)
6712 			memset(ref->page + size, 0, PAGE_SIZE - size);
6713 
6714 		page = virt_to_page(ref->page);
6715 
6716 		spd.pages[i] = page;
6717 		spd.partial[i].len = PAGE_SIZE;
6718 		spd.partial[i].offset = 0;
6719 		spd.partial[i].private = (unsigned long)ref;
6720 		spd.nr_pages++;
6721 		*ppos += PAGE_SIZE;
6722 
6723 		entries = ring_buffer_entries_cpu(iter->trace_buffer->buffer, iter->cpu_file);
6724 	}
6725 
6726 	trace_access_unlock(iter->cpu_file);
6727 	spd.nr_pages = i;
6728 
6729 	/* did we read anything? */
6730 	if (!spd.nr_pages) {
6731 		if (ret)
6732 			goto out;
6733 
6734 		ret = -EAGAIN;
6735 		if ((file->f_flags & O_NONBLOCK) || (flags & SPLICE_F_NONBLOCK))
6736 			goto out;
6737 
6738 		ret = wait_on_pipe(iter, true);
6739 		if (ret)
6740 			goto out;
6741 
6742 		goto again;
6743 	}
6744 
6745 	ret = splice_to_pipe(pipe, &spd);
6746 out:
6747 	splice_shrink_spd(&spd);
6748 
6749 	return ret;
6750 }
6751 
6752 static const struct file_operations tracing_buffers_fops = {
6753 	.open		= tracing_buffers_open,
6754 	.read		= tracing_buffers_read,
6755 	.poll		= tracing_buffers_poll,
6756 	.release	= tracing_buffers_release,
6757 	.splice_read	= tracing_buffers_splice_read,
6758 	.llseek		= no_llseek,
6759 };
6760 
6761 static ssize_t
6762 tracing_stats_read(struct file *filp, char __user *ubuf,
6763 		   size_t count, loff_t *ppos)
6764 {
6765 	struct inode *inode = file_inode(filp);
6766 	struct trace_array *tr = inode->i_private;
6767 	struct trace_buffer *trace_buf = &tr->trace_buffer;
6768 	int cpu = tracing_get_cpu(inode);
6769 	struct trace_seq *s;
6770 	unsigned long cnt;
6771 	unsigned long long t;
6772 	unsigned long usec_rem;
6773 
6774 	s = kmalloc(sizeof(*s), GFP_KERNEL);
6775 	if (!s)
6776 		return -ENOMEM;
6777 
6778 	trace_seq_init(s);
6779 
6780 	cnt = ring_buffer_entries_cpu(trace_buf->buffer, cpu);
6781 	trace_seq_printf(s, "entries: %ld\n", cnt);
6782 
6783 	cnt = ring_buffer_overrun_cpu(trace_buf->buffer, cpu);
6784 	trace_seq_printf(s, "overrun: %ld\n", cnt);
6785 
6786 	cnt = ring_buffer_commit_overrun_cpu(trace_buf->buffer, cpu);
6787 	trace_seq_printf(s, "commit overrun: %ld\n", cnt);
6788 
6789 	cnt = ring_buffer_bytes_cpu(trace_buf->buffer, cpu);
6790 	trace_seq_printf(s, "bytes: %ld\n", cnt);
6791 
6792 	if (trace_clocks[tr->clock_id].in_ns) {
6793 		/* local or global for trace_clock */
6794 		t = ns2usecs(ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
6795 		usec_rem = do_div(t, USEC_PER_SEC);
6796 		trace_seq_printf(s, "oldest event ts: %5llu.%06lu\n",
6797 								t, usec_rem);
6798 
6799 		t = ns2usecs(ring_buffer_time_stamp(trace_buf->buffer, cpu));
6800 		usec_rem = do_div(t, USEC_PER_SEC);
6801 		trace_seq_printf(s, "now ts: %5llu.%06lu\n", t, usec_rem);
6802 	} else {
6803 		/* counter or tsc mode for trace_clock */
6804 		trace_seq_printf(s, "oldest event ts: %llu\n",
6805 				ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
6806 
6807 		trace_seq_printf(s, "now ts: %llu\n",
6808 				ring_buffer_time_stamp(trace_buf->buffer, cpu));
6809 	}
6810 
6811 	cnt = ring_buffer_dropped_events_cpu(trace_buf->buffer, cpu);
6812 	trace_seq_printf(s, "dropped events: %ld\n", cnt);
6813 
6814 	cnt = ring_buffer_read_events_cpu(trace_buf->buffer, cpu);
6815 	trace_seq_printf(s, "read events: %ld\n", cnt);
6816 
6817 	count = simple_read_from_buffer(ubuf, count, ppos,
6818 					s->buffer, trace_seq_used(s));
6819 
6820 	kfree(s);
6821 
6822 	return count;
6823 }
6824 
6825 static const struct file_operations tracing_stats_fops = {
6826 	.open		= tracing_open_generic_tr,
6827 	.read		= tracing_stats_read,
6828 	.llseek		= generic_file_llseek,
6829 	.release	= tracing_release_generic_tr,
6830 };
6831 
6832 #ifdef CONFIG_DYNAMIC_FTRACE
6833 
6834 static ssize_t
6835 tracing_read_dyn_info(struct file *filp, char __user *ubuf,
6836 		  size_t cnt, loff_t *ppos)
6837 {
6838 	unsigned long *p = filp->private_data;
6839 	char buf[64]; /* Not too big for a shallow stack */
6840 	int r;
6841 
6842 	r = scnprintf(buf, 63, "%ld", *p);
6843 	buf[r++] = '\n';
6844 
6845 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6846 }
6847 
6848 static const struct file_operations tracing_dyn_info_fops = {
6849 	.open		= tracing_open_generic,
6850 	.read		= tracing_read_dyn_info,
6851 	.llseek		= generic_file_llseek,
6852 };
6853 #endif /* CONFIG_DYNAMIC_FTRACE */
6854 
6855 #if defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE)
6856 static void
6857 ftrace_snapshot(unsigned long ip, unsigned long parent_ip,
6858 		struct trace_array *tr, struct ftrace_probe_ops *ops,
6859 		void *data)
6860 {
6861 	tracing_snapshot_instance(tr);
6862 }
6863 
6864 static void
6865 ftrace_count_snapshot(unsigned long ip, unsigned long parent_ip,
6866 		      struct trace_array *tr, struct ftrace_probe_ops *ops,
6867 		      void *data)
6868 {
6869 	struct ftrace_func_mapper *mapper = data;
6870 	long *count = NULL;
6871 
6872 	if (mapper)
6873 		count = (long *)ftrace_func_mapper_find_ip(mapper, ip);
6874 
6875 	if (count) {
6876 
6877 		if (*count <= 0)
6878 			return;
6879 
6880 		(*count)--;
6881 	}
6882 
6883 	tracing_snapshot_instance(tr);
6884 }
6885 
6886 static int
6887 ftrace_snapshot_print(struct seq_file *m, unsigned long ip,
6888 		      struct ftrace_probe_ops *ops, void *data)
6889 {
6890 	struct ftrace_func_mapper *mapper = data;
6891 	long *count = NULL;
6892 
6893 	seq_printf(m, "%ps:", (void *)ip);
6894 
6895 	seq_puts(m, "snapshot");
6896 
6897 	if (mapper)
6898 		count = (long *)ftrace_func_mapper_find_ip(mapper, ip);
6899 
6900 	if (count)
6901 		seq_printf(m, ":count=%ld\n", *count);
6902 	else
6903 		seq_puts(m, ":unlimited\n");
6904 
6905 	return 0;
6906 }
6907 
6908 static int
6909 ftrace_snapshot_init(struct ftrace_probe_ops *ops, struct trace_array *tr,
6910 		     unsigned long ip, void *init_data, void **data)
6911 {
6912 	struct ftrace_func_mapper *mapper = *data;
6913 
6914 	if (!mapper) {
6915 		mapper = allocate_ftrace_func_mapper();
6916 		if (!mapper)
6917 			return -ENOMEM;
6918 		*data = mapper;
6919 	}
6920 
6921 	return ftrace_func_mapper_add_ip(mapper, ip, init_data);
6922 }
6923 
6924 static void
6925 ftrace_snapshot_free(struct ftrace_probe_ops *ops, struct trace_array *tr,
6926 		     unsigned long ip, void *data)
6927 {
6928 	struct ftrace_func_mapper *mapper = data;
6929 
6930 	if (!ip) {
6931 		if (!mapper)
6932 			return;
6933 		free_ftrace_func_mapper(mapper, NULL);
6934 		return;
6935 	}
6936 
6937 	ftrace_func_mapper_remove_ip(mapper, ip);
6938 }
6939 
6940 static struct ftrace_probe_ops snapshot_probe_ops = {
6941 	.func			= ftrace_snapshot,
6942 	.print			= ftrace_snapshot_print,
6943 };
6944 
6945 static struct ftrace_probe_ops snapshot_count_probe_ops = {
6946 	.func			= ftrace_count_snapshot,
6947 	.print			= ftrace_snapshot_print,
6948 	.init			= ftrace_snapshot_init,
6949 	.free			= ftrace_snapshot_free,
6950 };
6951 
6952 static int
6953 ftrace_trace_snapshot_callback(struct trace_array *tr, struct ftrace_hash *hash,
6954 			       char *glob, char *cmd, char *param, int enable)
6955 {
6956 	struct ftrace_probe_ops *ops;
6957 	void *count = (void *)-1;
6958 	char *number;
6959 	int ret;
6960 
6961 	if (!tr)
6962 		return -ENODEV;
6963 
6964 	/* hash funcs only work with set_ftrace_filter */
6965 	if (!enable)
6966 		return -EINVAL;
6967 
6968 	ops = param ? &snapshot_count_probe_ops :  &snapshot_probe_ops;
6969 
6970 	if (glob[0] == '!')
6971 		return unregister_ftrace_function_probe_func(glob+1, tr, ops);
6972 
6973 	if (!param)
6974 		goto out_reg;
6975 
6976 	number = strsep(&param, ":");
6977 
6978 	if (!strlen(number))
6979 		goto out_reg;
6980 
6981 	/*
6982 	 * We use the callback data field (which is a pointer)
6983 	 * as our counter.
6984 	 */
6985 	ret = kstrtoul(number, 0, (unsigned long *)&count);
6986 	if (ret)
6987 		return ret;
6988 
6989  out_reg:
6990 	ret = alloc_snapshot(tr);
6991 	if (ret < 0)
6992 		goto out;
6993 
6994 	ret = register_ftrace_function_probe(glob, tr, ops, count);
6995 
6996  out:
6997 	return ret < 0 ? ret : 0;
6998 }
6999 
7000 static struct ftrace_func_command ftrace_snapshot_cmd = {
7001 	.name			= "snapshot",
7002 	.func			= ftrace_trace_snapshot_callback,
7003 };
7004 
7005 static __init int register_snapshot_cmd(void)
7006 {
7007 	return register_ftrace_command(&ftrace_snapshot_cmd);
7008 }
7009 #else
7010 static inline __init int register_snapshot_cmd(void) { return 0; }
7011 #endif /* defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE) */
7012 
7013 static struct dentry *tracing_get_dentry(struct trace_array *tr)
7014 {
7015 	if (WARN_ON(!tr->dir))
7016 		return ERR_PTR(-ENODEV);
7017 
7018 	/* Top directory uses NULL as the parent */
7019 	if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
7020 		return NULL;
7021 
7022 	/* All sub buffers have a descriptor */
7023 	return tr->dir;
7024 }
7025 
7026 static struct dentry *tracing_dentry_percpu(struct trace_array *tr, int cpu)
7027 {
7028 	struct dentry *d_tracer;
7029 
7030 	if (tr->percpu_dir)
7031 		return tr->percpu_dir;
7032 
7033 	d_tracer = tracing_get_dentry(tr);
7034 	if (IS_ERR(d_tracer))
7035 		return NULL;
7036 
7037 	tr->percpu_dir = tracefs_create_dir("per_cpu", d_tracer);
7038 
7039 	WARN_ONCE(!tr->percpu_dir,
7040 		  "Could not create tracefs directory 'per_cpu/%d'\n", cpu);
7041 
7042 	return tr->percpu_dir;
7043 }
7044 
7045 static struct dentry *
7046 trace_create_cpu_file(const char *name, umode_t mode, struct dentry *parent,
7047 		      void *data, long cpu, const struct file_operations *fops)
7048 {
7049 	struct dentry *ret = trace_create_file(name, mode, parent, data, fops);
7050 
7051 	if (ret) /* See tracing_get_cpu() */
7052 		d_inode(ret)->i_cdev = (void *)(cpu + 1);
7053 	return ret;
7054 }
7055 
7056 static void
7057 tracing_init_tracefs_percpu(struct trace_array *tr, long cpu)
7058 {
7059 	struct dentry *d_percpu = tracing_dentry_percpu(tr, cpu);
7060 	struct dentry *d_cpu;
7061 	char cpu_dir[30]; /* 30 characters should be more than enough */
7062 
7063 	if (!d_percpu)
7064 		return;
7065 
7066 	snprintf(cpu_dir, 30, "cpu%ld", cpu);
7067 	d_cpu = tracefs_create_dir(cpu_dir, d_percpu);
7068 	if (!d_cpu) {
7069 		pr_warn("Could not create tracefs '%s' entry\n", cpu_dir);
7070 		return;
7071 	}
7072 
7073 	/* per cpu trace_pipe */
7074 	trace_create_cpu_file("trace_pipe", 0444, d_cpu,
7075 				tr, cpu, &tracing_pipe_fops);
7076 
7077 	/* per cpu trace */
7078 	trace_create_cpu_file("trace", 0644, d_cpu,
7079 				tr, cpu, &tracing_fops);
7080 
7081 	trace_create_cpu_file("trace_pipe_raw", 0444, d_cpu,
7082 				tr, cpu, &tracing_buffers_fops);
7083 
7084 	trace_create_cpu_file("stats", 0444, d_cpu,
7085 				tr, cpu, &tracing_stats_fops);
7086 
7087 	trace_create_cpu_file("buffer_size_kb", 0444, d_cpu,
7088 				tr, cpu, &tracing_entries_fops);
7089 
7090 #ifdef CONFIG_TRACER_SNAPSHOT
7091 	trace_create_cpu_file("snapshot", 0644, d_cpu,
7092 				tr, cpu, &snapshot_fops);
7093 
7094 	trace_create_cpu_file("snapshot_raw", 0444, d_cpu,
7095 				tr, cpu, &snapshot_raw_fops);
7096 #endif
7097 }
7098 
7099 #ifdef CONFIG_FTRACE_SELFTEST
7100 /* Let selftest have access to static functions in this file */
7101 #include "trace_selftest.c"
7102 #endif
7103 
7104 static ssize_t
7105 trace_options_read(struct file *filp, char __user *ubuf, size_t cnt,
7106 			loff_t *ppos)
7107 {
7108 	struct trace_option_dentry *topt = filp->private_data;
7109 	char *buf;
7110 
7111 	if (topt->flags->val & topt->opt->bit)
7112 		buf = "1\n";
7113 	else
7114 		buf = "0\n";
7115 
7116 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
7117 }
7118 
7119 static ssize_t
7120 trace_options_write(struct file *filp, const char __user *ubuf, size_t cnt,
7121 			 loff_t *ppos)
7122 {
7123 	struct trace_option_dentry *topt = filp->private_data;
7124 	unsigned long val;
7125 	int ret;
7126 
7127 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
7128 	if (ret)
7129 		return ret;
7130 
7131 	if (val != 0 && val != 1)
7132 		return -EINVAL;
7133 
7134 	if (!!(topt->flags->val & topt->opt->bit) != val) {
7135 		mutex_lock(&trace_types_lock);
7136 		ret = __set_tracer_option(topt->tr, topt->flags,
7137 					  topt->opt, !val);
7138 		mutex_unlock(&trace_types_lock);
7139 		if (ret)
7140 			return ret;
7141 	}
7142 
7143 	*ppos += cnt;
7144 
7145 	return cnt;
7146 }
7147 
7148 
7149 static const struct file_operations trace_options_fops = {
7150 	.open = tracing_open_generic,
7151 	.read = trace_options_read,
7152 	.write = trace_options_write,
7153 	.llseek	= generic_file_llseek,
7154 };
7155 
7156 /*
7157  * In order to pass in both the trace_array descriptor as well as the index
7158  * to the flag that the trace option file represents, the trace_array
7159  * has a character array of trace_flags_index[], which holds the index
7160  * of the bit for the flag it represents. index[0] == 0, index[1] == 1, etc.
7161  * The address of this character array is passed to the flag option file
7162  * read/write callbacks.
7163  *
7164  * In order to extract both the index and the trace_array descriptor,
7165  * get_tr_index() uses the following algorithm.
7166  *
7167  *   idx = *ptr;
7168  *
7169  * As the pointer itself contains the address of the index (remember
7170  * index[1] == 1).
7171  *
7172  * Then to get the trace_array descriptor, by subtracting that index
7173  * from the ptr, we get to the start of the index itself.
7174  *
7175  *   ptr - idx == &index[0]
7176  *
7177  * Then a simple container_of() from that pointer gets us to the
7178  * trace_array descriptor.
7179  */
7180 static void get_tr_index(void *data, struct trace_array **ptr,
7181 			 unsigned int *pindex)
7182 {
7183 	*pindex = *(unsigned char *)data;
7184 
7185 	*ptr = container_of(data - *pindex, struct trace_array,
7186 			    trace_flags_index);
7187 }
7188 
7189 static ssize_t
7190 trace_options_core_read(struct file *filp, char __user *ubuf, size_t cnt,
7191 			loff_t *ppos)
7192 {
7193 	void *tr_index = filp->private_data;
7194 	struct trace_array *tr;
7195 	unsigned int index;
7196 	char *buf;
7197 
7198 	get_tr_index(tr_index, &tr, &index);
7199 
7200 	if (tr->trace_flags & (1 << index))
7201 		buf = "1\n";
7202 	else
7203 		buf = "0\n";
7204 
7205 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
7206 }
7207 
7208 static ssize_t
7209 trace_options_core_write(struct file *filp, const char __user *ubuf, size_t cnt,
7210 			 loff_t *ppos)
7211 {
7212 	void *tr_index = filp->private_data;
7213 	struct trace_array *tr;
7214 	unsigned int index;
7215 	unsigned long val;
7216 	int ret;
7217 
7218 	get_tr_index(tr_index, &tr, &index);
7219 
7220 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
7221 	if (ret)
7222 		return ret;
7223 
7224 	if (val != 0 && val != 1)
7225 		return -EINVAL;
7226 
7227 	mutex_lock(&trace_types_lock);
7228 	ret = set_tracer_flag(tr, 1 << index, val);
7229 	mutex_unlock(&trace_types_lock);
7230 
7231 	if (ret < 0)
7232 		return ret;
7233 
7234 	*ppos += cnt;
7235 
7236 	return cnt;
7237 }
7238 
7239 static const struct file_operations trace_options_core_fops = {
7240 	.open = tracing_open_generic,
7241 	.read = trace_options_core_read,
7242 	.write = trace_options_core_write,
7243 	.llseek = generic_file_llseek,
7244 };
7245 
7246 struct dentry *trace_create_file(const char *name,
7247 				 umode_t mode,
7248 				 struct dentry *parent,
7249 				 void *data,
7250 				 const struct file_operations *fops)
7251 {
7252 	struct dentry *ret;
7253 
7254 	ret = tracefs_create_file(name, mode, parent, data, fops);
7255 	if (!ret)
7256 		pr_warn("Could not create tracefs '%s' entry\n", name);
7257 
7258 	return ret;
7259 }
7260 
7261 
7262 static struct dentry *trace_options_init_dentry(struct trace_array *tr)
7263 {
7264 	struct dentry *d_tracer;
7265 
7266 	if (tr->options)
7267 		return tr->options;
7268 
7269 	d_tracer = tracing_get_dentry(tr);
7270 	if (IS_ERR(d_tracer))
7271 		return NULL;
7272 
7273 	tr->options = tracefs_create_dir("options", d_tracer);
7274 	if (!tr->options) {
7275 		pr_warn("Could not create tracefs directory 'options'\n");
7276 		return NULL;
7277 	}
7278 
7279 	return tr->options;
7280 }
7281 
7282 static void
7283 create_trace_option_file(struct trace_array *tr,
7284 			 struct trace_option_dentry *topt,
7285 			 struct tracer_flags *flags,
7286 			 struct tracer_opt *opt)
7287 {
7288 	struct dentry *t_options;
7289 
7290 	t_options = trace_options_init_dentry(tr);
7291 	if (!t_options)
7292 		return;
7293 
7294 	topt->flags = flags;
7295 	topt->opt = opt;
7296 	topt->tr = tr;
7297 
7298 	topt->entry = trace_create_file(opt->name, 0644, t_options, topt,
7299 				    &trace_options_fops);
7300 
7301 }
7302 
7303 static void
7304 create_trace_option_files(struct trace_array *tr, struct tracer *tracer)
7305 {
7306 	struct trace_option_dentry *topts;
7307 	struct trace_options *tr_topts;
7308 	struct tracer_flags *flags;
7309 	struct tracer_opt *opts;
7310 	int cnt;
7311 	int i;
7312 
7313 	if (!tracer)
7314 		return;
7315 
7316 	flags = tracer->flags;
7317 
7318 	if (!flags || !flags->opts)
7319 		return;
7320 
7321 	/*
7322 	 * If this is an instance, only create flags for tracers
7323 	 * the instance may have.
7324 	 */
7325 	if (!trace_ok_for_array(tracer, tr))
7326 		return;
7327 
7328 	for (i = 0; i < tr->nr_topts; i++) {
7329 		/* Make sure there's no duplicate flags. */
7330 		if (WARN_ON_ONCE(tr->topts[i].tracer->flags == tracer->flags))
7331 			return;
7332 	}
7333 
7334 	opts = flags->opts;
7335 
7336 	for (cnt = 0; opts[cnt].name; cnt++)
7337 		;
7338 
7339 	topts = kcalloc(cnt + 1, sizeof(*topts), GFP_KERNEL);
7340 	if (!topts)
7341 		return;
7342 
7343 	tr_topts = krealloc(tr->topts, sizeof(*tr->topts) * (tr->nr_topts + 1),
7344 			    GFP_KERNEL);
7345 	if (!tr_topts) {
7346 		kfree(topts);
7347 		return;
7348 	}
7349 
7350 	tr->topts = tr_topts;
7351 	tr->topts[tr->nr_topts].tracer = tracer;
7352 	tr->topts[tr->nr_topts].topts = topts;
7353 	tr->nr_topts++;
7354 
7355 	for (cnt = 0; opts[cnt].name; cnt++) {
7356 		create_trace_option_file(tr, &topts[cnt], flags,
7357 					 &opts[cnt]);
7358 		WARN_ONCE(topts[cnt].entry == NULL,
7359 			  "Failed to create trace option: %s",
7360 			  opts[cnt].name);
7361 	}
7362 }
7363 
7364 static struct dentry *
7365 create_trace_option_core_file(struct trace_array *tr,
7366 			      const char *option, long index)
7367 {
7368 	struct dentry *t_options;
7369 
7370 	t_options = trace_options_init_dentry(tr);
7371 	if (!t_options)
7372 		return NULL;
7373 
7374 	return trace_create_file(option, 0644, t_options,
7375 				 (void *)&tr->trace_flags_index[index],
7376 				 &trace_options_core_fops);
7377 }
7378 
7379 static void create_trace_options_dir(struct trace_array *tr)
7380 {
7381 	struct dentry *t_options;
7382 	bool top_level = tr == &global_trace;
7383 	int i;
7384 
7385 	t_options = trace_options_init_dentry(tr);
7386 	if (!t_options)
7387 		return;
7388 
7389 	for (i = 0; trace_options[i]; i++) {
7390 		if (top_level ||
7391 		    !((1 << i) & TOP_LEVEL_TRACE_FLAGS))
7392 			create_trace_option_core_file(tr, trace_options[i], i);
7393 	}
7394 }
7395 
7396 static ssize_t
7397 rb_simple_read(struct file *filp, char __user *ubuf,
7398 	       size_t cnt, loff_t *ppos)
7399 {
7400 	struct trace_array *tr = filp->private_data;
7401 	char buf[64];
7402 	int r;
7403 
7404 	r = tracer_tracing_is_on(tr);
7405 	r = sprintf(buf, "%d\n", r);
7406 
7407 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
7408 }
7409 
7410 static ssize_t
7411 rb_simple_write(struct file *filp, const char __user *ubuf,
7412 		size_t cnt, loff_t *ppos)
7413 {
7414 	struct trace_array *tr = filp->private_data;
7415 	struct ring_buffer *buffer = tr->trace_buffer.buffer;
7416 	unsigned long val;
7417 	int ret;
7418 
7419 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
7420 	if (ret)
7421 		return ret;
7422 
7423 	if (buffer) {
7424 		mutex_lock(&trace_types_lock);
7425 		if (val) {
7426 			tracer_tracing_on(tr);
7427 			if (tr->current_trace->start)
7428 				tr->current_trace->start(tr);
7429 		} else {
7430 			tracer_tracing_off(tr);
7431 			if (tr->current_trace->stop)
7432 				tr->current_trace->stop(tr);
7433 		}
7434 		mutex_unlock(&trace_types_lock);
7435 	}
7436 
7437 	(*ppos)++;
7438 
7439 	return cnt;
7440 }
7441 
7442 static const struct file_operations rb_simple_fops = {
7443 	.open		= tracing_open_generic_tr,
7444 	.read		= rb_simple_read,
7445 	.write		= rb_simple_write,
7446 	.release	= tracing_release_generic_tr,
7447 	.llseek		= default_llseek,
7448 };
7449 
7450 struct dentry *trace_instance_dir;
7451 
7452 static void
7453 init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer);
7454 
7455 static int
7456 allocate_trace_buffer(struct trace_array *tr, struct trace_buffer *buf, int size)
7457 {
7458 	enum ring_buffer_flags rb_flags;
7459 
7460 	rb_flags = tr->trace_flags & TRACE_ITER_OVERWRITE ? RB_FL_OVERWRITE : 0;
7461 
7462 	buf->tr = tr;
7463 
7464 	buf->buffer = ring_buffer_alloc(size, rb_flags);
7465 	if (!buf->buffer)
7466 		return -ENOMEM;
7467 
7468 	buf->data = alloc_percpu(struct trace_array_cpu);
7469 	if (!buf->data) {
7470 		ring_buffer_free(buf->buffer);
7471 		return -ENOMEM;
7472 	}
7473 
7474 	/* Allocate the first page for all buffers */
7475 	set_buffer_entries(&tr->trace_buffer,
7476 			   ring_buffer_size(tr->trace_buffer.buffer, 0));
7477 
7478 	return 0;
7479 }
7480 
7481 static int allocate_trace_buffers(struct trace_array *tr, int size)
7482 {
7483 	int ret;
7484 
7485 	ret = allocate_trace_buffer(tr, &tr->trace_buffer, size);
7486 	if (ret)
7487 		return ret;
7488 
7489 #ifdef CONFIG_TRACER_MAX_TRACE
7490 	ret = allocate_trace_buffer(tr, &tr->max_buffer,
7491 				    allocate_snapshot ? size : 1);
7492 	if (WARN_ON(ret)) {
7493 		ring_buffer_free(tr->trace_buffer.buffer);
7494 		free_percpu(tr->trace_buffer.data);
7495 		return -ENOMEM;
7496 	}
7497 	tr->allocated_snapshot = allocate_snapshot;
7498 
7499 	/*
7500 	 * Only the top level trace array gets its snapshot allocated
7501 	 * from the kernel command line.
7502 	 */
7503 	allocate_snapshot = false;
7504 #endif
7505 	return 0;
7506 }
7507 
7508 static void free_trace_buffer(struct trace_buffer *buf)
7509 {
7510 	if (buf->buffer) {
7511 		ring_buffer_free(buf->buffer);
7512 		buf->buffer = NULL;
7513 		free_percpu(buf->data);
7514 		buf->data = NULL;
7515 	}
7516 }
7517 
7518 static void free_trace_buffers(struct trace_array *tr)
7519 {
7520 	if (!tr)
7521 		return;
7522 
7523 	free_trace_buffer(&tr->trace_buffer);
7524 
7525 #ifdef CONFIG_TRACER_MAX_TRACE
7526 	free_trace_buffer(&tr->max_buffer);
7527 #endif
7528 }
7529 
7530 static void init_trace_flags_index(struct trace_array *tr)
7531 {
7532 	int i;
7533 
7534 	/* Used by the trace options files */
7535 	for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++)
7536 		tr->trace_flags_index[i] = i;
7537 }
7538 
7539 static void __update_tracer_options(struct trace_array *tr)
7540 {
7541 	struct tracer *t;
7542 
7543 	for (t = trace_types; t; t = t->next)
7544 		add_tracer_options(tr, t);
7545 }
7546 
7547 static void update_tracer_options(struct trace_array *tr)
7548 {
7549 	mutex_lock(&trace_types_lock);
7550 	__update_tracer_options(tr);
7551 	mutex_unlock(&trace_types_lock);
7552 }
7553 
7554 static int instance_mkdir(const char *name)
7555 {
7556 	struct trace_array *tr;
7557 	int ret;
7558 
7559 	mutex_lock(&trace_types_lock);
7560 
7561 	ret = -EEXIST;
7562 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
7563 		if (tr->name && strcmp(tr->name, name) == 0)
7564 			goto out_unlock;
7565 	}
7566 
7567 	ret = -ENOMEM;
7568 	tr = kzalloc(sizeof(*tr), GFP_KERNEL);
7569 	if (!tr)
7570 		goto out_unlock;
7571 
7572 	tr->name = kstrdup(name, GFP_KERNEL);
7573 	if (!tr->name)
7574 		goto out_free_tr;
7575 
7576 	if (!alloc_cpumask_var(&tr->tracing_cpumask, GFP_KERNEL))
7577 		goto out_free_tr;
7578 
7579 	tr->trace_flags = global_trace.trace_flags & ~ZEROED_TRACE_FLAGS;
7580 
7581 	cpumask_copy(tr->tracing_cpumask, cpu_all_mask);
7582 
7583 	raw_spin_lock_init(&tr->start_lock);
7584 
7585 	tr->max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
7586 
7587 	tr->current_trace = &nop_trace;
7588 
7589 	INIT_LIST_HEAD(&tr->systems);
7590 	INIT_LIST_HEAD(&tr->events);
7591 
7592 	if (allocate_trace_buffers(tr, trace_buf_size) < 0)
7593 		goto out_free_tr;
7594 
7595 	tr->dir = tracefs_create_dir(name, trace_instance_dir);
7596 	if (!tr->dir)
7597 		goto out_free_tr;
7598 
7599 	ret = event_trace_add_tracer(tr->dir, tr);
7600 	if (ret) {
7601 		tracefs_remove_recursive(tr->dir);
7602 		goto out_free_tr;
7603 	}
7604 
7605 	ftrace_init_trace_array(tr);
7606 
7607 	init_tracer_tracefs(tr, tr->dir);
7608 	init_trace_flags_index(tr);
7609 	__update_tracer_options(tr);
7610 
7611 	list_add(&tr->list, &ftrace_trace_arrays);
7612 
7613 	mutex_unlock(&trace_types_lock);
7614 
7615 	return 0;
7616 
7617  out_free_tr:
7618 	free_trace_buffers(tr);
7619 	free_cpumask_var(tr->tracing_cpumask);
7620 	kfree(tr->name);
7621 	kfree(tr);
7622 
7623  out_unlock:
7624 	mutex_unlock(&trace_types_lock);
7625 
7626 	return ret;
7627 
7628 }
7629 
7630 static int instance_rmdir(const char *name)
7631 {
7632 	struct trace_array *tr;
7633 	int found = 0;
7634 	int ret;
7635 	int i;
7636 
7637 	mutex_lock(&trace_types_lock);
7638 
7639 	ret = -ENODEV;
7640 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
7641 		if (tr->name && strcmp(tr->name, name) == 0) {
7642 			found = 1;
7643 			break;
7644 		}
7645 	}
7646 	if (!found)
7647 		goto out_unlock;
7648 
7649 	ret = -EBUSY;
7650 	if (tr->ref || (tr->current_trace && tr->current_trace->ref))
7651 		goto out_unlock;
7652 
7653 	list_del(&tr->list);
7654 
7655 	/* Disable all the flags that were enabled coming in */
7656 	for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++) {
7657 		if ((1 << i) & ZEROED_TRACE_FLAGS)
7658 			set_tracer_flag(tr, 1 << i, 0);
7659 	}
7660 
7661 	tracing_set_nop(tr);
7662 	clear_ftrace_function_probes(tr);
7663 	event_trace_del_tracer(tr);
7664 	ftrace_clear_pids(tr);
7665 	ftrace_destroy_function_files(tr);
7666 	tracefs_remove_recursive(tr->dir);
7667 	free_trace_buffers(tr);
7668 
7669 	for (i = 0; i < tr->nr_topts; i++) {
7670 		kfree(tr->topts[i].topts);
7671 	}
7672 	kfree(tr->topts);
7673 
7674 	kfree(tr->name);
7675 	kfree(tr);
7676 
7677 	ret = 0;
7678 
7679  out_unlock:
7680 	mutex_unlock(&trace_types_lock);
7681 
7682 	return ret;
7683 }
7684 
7685 static __init void create_trace_instances(struct dentry *d_tracer)
7686 {
7687 	trace_instance_dir = tracefs_create_instance_dir("instances", d_tracer,
7688 							 instance_mkdir,
7689 							 instance_rmdir);
7690 	if (WARN_ON(!trace_instance_dir))
7691 		return;
7692 }
7693 
7694 static void
7695 init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer)
7696 {
7697 	int cpu;
7698 
7699 	trace_create_file("available_tracers", 0444, d_tracer,
7700 			tr, &show_traces_fops);
7701 
7702 	trace_create_file("current_tracer", 0644, d_tracer,
7703 			tr, &set_tracer_fops);
7704 
7705 	trace_create_file("tracing_cpumask", 0644, d_tracer,
7706 			  tr, &tracing_cpumask_fops);
7707 
7708 	trace_create_file("trace_options", 0644, d_tracer,
7709 			  tr, &tracing_iter_fops);
7710 
7711 	trace_create_file("trace", 0644, d_tracer,
7712 			  tr, &tracing_fops);
7713 
7714 	trace_create_file("trace_pipe", 0444, d_tracer,
7715 			  tr, &tracing_pipe_fops);
7716 
7717 	trace_create_file("buffer_size_kb", 0644, d_tracer,
7718 			  tr, &tracing_entries_fops);
7719 
7720 	trace_create_file("buffer_total_size_kb", 0444, d_tracer,
7721 			  tr, &tracing_total_entries_fops);
7722 
7723 	trace_create_file("free_buffer", 0200, d_tracer,
7724 			  tr, &tracing_free_buffer_fops);
7725 
7726 	trace_create_file("trace_marker", 0220, d_tracer,
7727 			  tr, &tracing_mark_fops);
7728 
7729 	trace_create_file("trace_marker_raw", 0220, d_tracer,
7730 			  tr, &tracing_mark_raw_fops);
7731 
7732 	trace_create_file("trace_clock", 0644, d_tracer, tr,
7733 			  &trace_clock_fops);
7734 
7735 	trace_create_file("tracing_on", 0644, d_tracer,
7736 			  tr, &rb_simple_fops);
7737 
7738 	create_trace_options_dir(tr);
7739 
7740 #if defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)
7741 	trace_create_file("tracing_max_latency", 0644, d_tracer,
7742 			&tr->max_latency, &tracing_max_lat_fops);
7743 #endif
7744 
7745 	if (ftrace_create_function_files(tr, d_tracer))
7746 		WARN(1, "Could not allocate function filter files");
7747 
7748 #ifdef CONFIG_TRACER_SNAPSHOT
7749 	trace_create_file("snapshot", 0644, d_tracer,
7750 			  tr, &snapshot_fops);
7751 #endif
7752 
7753 	for_each_tracing_cpu(cpu)
7754 		tracing_init_tracefs_percpu(tr, cpu);
7755 
7756 	ftrace_init_tracefs(tr, d_tracer);
7757 }
7758 
7759 static struct vfsmount *trace_automount(struct dentry *mntpt, void *ingore)
7760 {
7761 	struct vfsmount *mnt;
7762 	struct file_system_type *type;
7763 
7764 	/*
7765 	 * To maintain backward compatibility for tools that mount
7766 	 * debugfs to get to the tracing facility, tracefs is automatically
7767 	 * mounted to the debugfs/tracing directory.
7768 	 */
7769 	type = get_fs_type("tracefs");
7770 	if (!type)
7771 		return NULL;
7772 	mnt = vfs_submount(mntpt, type, "tracefs", NULL);
7773 	put_filesystem(type);
7774 	if (IS_ERR(mnt))
7775 		return NULL;
7776 	mntget(mnt);
7777 
7778 	return mnt;
7779 }
7780 
7781 /**
7782  * tracing_init_dentry - initialize top level trace array
7783  *
7784  * This is called when creating files or directories in the tracing
7785  * directory. It is called via fs_initcall() by any of the boot up code
7786  * and expects to return the dentry of the top level tracing directory.
7787  */
7788 struct dentry *tracing_init_dentry(void)
7789 {
7790 	struct trace_array *tr = &global_trace;
7791 
7792 	/* The top level trace array uses  NULL as parent */
7793 	if (tr->dir)
7794 		return NULL;
7795 
7796 	if (WARN_ON(!tracefs_initialized()) ||
7797 		(IS_ENABLED(CONFIG_DEBUG_FS) &&
7798 		 WARN_ON(!debugfs_initialized())))
7799 		return ERR_PTR(-ENODEV);
7800 
7801 	/*
7802 	 * As there may still be users that expect the tracing
7803 	 * files to exist in debugfs/tracing, we must automount
7804 	 * the tracefs file system there, so older tools still
7805 	 * work with the newer kerenl.
7806 	 */
7807 	tr->dir = debugfs_create_automount("tracing", NULL,
7808 					   trace_automount, NULL);
7809 	if (!tr->dir) {
7810 		pr_warn_once("Could not create debugfs directory 'tracing'\n");
7811 		return ERR_PTR(-ENOMEM);
7812 	}
7813 
7814 	return NULL;
7815 }
7816 
7817 extern struct trace_eval_map *__start_ftrace_eval_maps[];
7818 extern struct trace_eval_map *__stop_ftrace_eval_maps[];
7819 
7820 static void __init trace_eval_init(void)
7821 {
7822 	int len;
7823 
7824 	len = __stop_ftrace_eval_maps - __start_ftrace_eval_maps;
7825 	trace_insert_eval_map(NULL, __start_ftrace_eval_maps, len);
7826 }
7827 
7828 #ifdef CONFIG_MODULES
7829 static void trace_module_add_evals(struct module *mod)
7830 {
7831 	if (!mod->num_trace_evals)
7832 		return;
7833 
7834 	/*
7835 	 * Modules with bad taint do not have events created, do
7836 	 * not bother with enums either.
7837 	 */
7838 	if (trace_module_has_bad_taint(mod))
7839 		return;
7840 
7841 	trace_insert_eval_map(mod, mod->trace_evals, mod->num_trace_evals);
7842 }
7843 
7844 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
7845 static void trace_module_remove_evals(struct module *mod)
7846 {
7847 	union trace_eval_map_item *map;
7848 	union trace_eval_map_item **last = &trace_eval_maps;
7849 
7850 	if (!mod->num_trace_evals)
7851 		return;
7852 
7853 	mutex_lock(&trace_eval_mutex);
7854 
7855 	map = trace_eval_maps;
7856 
7857 	while (map) {
7858 		if (map->head.mod == mod)
7859 			break;
7860 		map = trace_eval_jmp_to_tail(map);
7861 		last = &map->tail.next;
7862 		map = map->tail.next;
7863 	}
7864 	if (!map)
7865 		goto out;
7866 
7867 	*last = trace_eval_jmp_to_tail(map)->tail.next;
7868 	kfree(map);
7869  out:
7870 	mutex_unlock(&trace_eval_mutex);
7871 }
7872 #else
7873 static inline void trace_module_remove_evals(struct module *mod) { }
7874 #endif /* CONFIG_TRACE_EVAL_MAP_FILE */
7875 
7876 static int trace_module_notify(struct notifier_block *self,
7877 			       unsigned long val, void *data)
7878 {
7879 	struct module *mod = data;
7880 
7881 	switch (val) {
7882 	case MODULE_STATE_COMING:
7883 		trace_module_add_evals(mod);
7884 		break;
7885 	case MODULE_STATE_GOING:
7886 		trace_module_remove_evals(mod);
7887 		break;
7888 	}
7889 
7890 	return 0;
7891 }
7892 
7893 static struct notifier_block trace_module_nb = {
7894 	.notifier_call = trace_module_notify,
7895 	.priority = 0,
7896 };
7897 #endif /* CONFIG_MODULES */
7898 
7899 static __init int tracer_init_tracefs(void)
7900 {
7901 	struct dentry *d_tracer;
7902 
7903 	trace_access_lock_init();
7904 
7905 	d_tracer = tracing_init_dentry();
7906 	if (IS_ERR(d_tracer))
7907 		return 0;
7908 
7909 	init_tracer_tracefs(&global_trace, d_tracer);
7910 	ftrace_init_tracefs_toplevel(&global_trace, d_tracer);
7911 
7912 	trace_create_file("tracing_thresh", 0644, d_tracer,
7913 			&global_trace, &tracing_thresh_fops);
7914 
7915 	trace_create_file("README", 0444, d_tracer,
7916 			NULL, &tracing_readme_fops);
7917 
7918 	trace_create_file("saved_cmdlines", 0444, d_tracer,
7919 			NULL, &tracing_saved_cmdlines_fops);
7920 
7921 	trace_create_file("saved_cmdlines_size", 0644, d_tracer,
7922 			  NULL, &tracing_saved_cmdlines_size_fops);
7923 
7924 	trace_eval_init();
7925 
7926 	trace_create_eval_file(d_tracer);
7927 
7928 #ifdef CONFIG_MODULES
7929 	register_module_notifier(&trace_module_nb);
7930 #endif
7931 
7932 #ifdef CONFIG_DYNAMIC_FTRACE
7933 	trace_create_file("dyn_ftrace_total_info", 0444, d_tracer,
7934 			&ftrace_update_tot_cnt, &tracing_dyn_info_fops);
7935 #endif
7936 
7937 	create_trace_instances(d_tracer);
7938 
7939 	update_tracer_options(&global_trace);
7940 
7941 	return 0;
7942 }
7943 
7944 static int trace_panic_handler(struct notifier_block *this,
7945 			       unsigned long event, void *unused)
7946 {
7947 	if (ftrace_dump_on_oops)
7948 		ftrace_dump(ftrace_dump_on_oops);
7949 	return NOTIFY_OK;
7950 }
7951 
7952 static struct notifier_block trace_panic_notifier = {
7953 	.notifier_call  = trace_panic_handler,
7954 	.next           = NULL,
7955 	.priority       = 150   /* priority: INT_MAX >= x >= 0 */
7956 };
7957 
7958 static int trace_die_handler(struct notifier_block *self,
7959 			     unsigned long val,
7960 			     void *data)
7961 {
7962 	switch (val) {
7963 	case DIE_OOPS:
7964 		if (ftrace_dump_on_oops)
7965 			ftrace_dump(ftrace_dump_on_oops);
7966 		break;
7967 	default:
7968 		break;
7969 	}
7970 	return NOTIFY_OK;
7971 }
7972 
7973 static struct notifier_block trace_die_notifier = {
7974 	.notifier_call = trace_die_handler,
7975 	.priority = 200
7976 };
7977 
7978 /*
7979  * printk is set to max of 1024, we really don't need it that big.
7980  * Nothing should be printing 1000 characters anyway.
7981  */
7982 #define TRACE_MAX_PRINT		1000
7983 
7984 /*
7985  * Define here KERN_TRACE so that we have one place to modify
7986  * it if we decide to change what log level the ftrace dump
7987  * should be at.
7988  */
7989 #define KERN_TRACE		KERN_EMERG
7990 
7991 void
7992 trace_printk_seq(struct trace_seq *s)
7993 {
7994 	/* Probably should print a warning here. */
7995 	if (s->seq.len >= TRACE_MAX_PRINT)
7996 		s->seq.len = TRACE_MAX_PRINT;
7997 
7998 	/*
7999 	 * More paranoid code. Although the buffer size is set to
8000 	 * PAGE_SIZE, and TRACE_MAX_PRINT is 1000, this is just
8001 	 * an extra layer of protection.
8002 	 */
8003 	if (WARN_ON_ONCE(s->seq.len >= s->seq.size))
8004 		s->seq.len = s->seq.size - 1;
8005 
8006 	/* should be zero ended, but we are paranoid. */
8007 	s->buffer[s->seq.len] = 0;
8008 
8009 	printk(KERN_TRACE "%s", s->buffer);
8010 
8011 	trace_seq_init(s);
8012 }
8013 
8014 void trace_init_global_iter(struct trace_iterator *iter)
8015 {
8016 	iter->tr = &global_trace;
8017 	iter->trace = iter->tr->current_trace;
8018 	iter->cpu_file = RING_BUFFER_ALL_CPUS;
8019 	iter->trace_buffer = &global_trace.trace_buffer;
8020 
8021 	if (iter->trace && iter->trace->open)
8022 		iter->trace->open(iter);
8023 
8024 	/* Annotate start of buffers if we had overruns */
8025 	if (ring_buffer_overruns(iter->trace_buffer->buffer))
8026 		iter->iter_flags |= TRACE_FILE_ANNOTATE;
8027 
8028 	/* Output in nanoseconds only if we are using a clock in nanoseconds. */
8029 	if (trace_clocks[iter->tr->clock_id].in_ns)
8030 		iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
8031 }
8032 
8033 void ftrace_dump(enum ftrace_dump_mode oops_dump_mode)
8034 {
8035 	/* use static because iter can be a bit big for the stack */
8036 	static struct trace_iterator iter;
8037 	static atomic_t dump_running;
8038 	struct trace_array *tr = &global_trace;
8039 	unsigned int old_userobj;
8040 	unsigned long flags;
8041 	int cnt = 0, cpu;
8042 
8043 	/* Only allow one dump user at a time. */
8044 	if (atomic_inc_return(&dump_running) != 1) {
8045 		atomic_dec(&dump_running);
8046 		return;
8047 	}
8048 
8049 	/*
8050 	 * Always turn off tracing when we dump.
8051 	 * We don't need to show trace output of what happens
8052 	 * between multiple crashes.
8053 	 *
8054 	 * If the user does a sysrq-z, then they can re-enable
8055 	 * tracing with echo 1 > tracing_on.
8056 	 */
8057 	tracing_off();
8058 
8059 	local_irq_save(flags);
8060 
8061 	/* Simulate the iterator */
8062 	trace_init_global_iter(&iter);
8063 
8064 	for_each_tracing_cpu(cpu) {
8065 		atomic_inc(&per_cpu_ptr(iter.trace_buffer->data, cpu)->disabled);
8066 	}
8067 
8068 	old_userobj = tr->trace_flags & TRACE_ITER_SYM_USEROBJ;
8069 
8070 	/* don't look at user memory in panic mode */
8071 	tr->trace_flags &= ~TRACE_ITER_SYM_USEROBJ;
8072 
8073 	switch (oops_dump_mode) {
8074 	case DUMP_ALL:
8075 		iter.cpu_file = RING_BUFFER_ALL_CPUS;
8076 		break;
8077 	case DUMP_ORIG:
8078 		iter.cpu_file = raw_smp_processor_id();
8079 		break;
8080 	case DUMP_NONE:
8081 		goto out_enable;
8082 	default:
8083 		printk(KERN_TRACE "Bad dumping mode, switching to all CPUs dump\n");
8084 		iter.cpu_file = RING_BUFFER_ALL_CPUS;
8085 	}
8086 
8087 	printk(KERN_TRACE "Dumping ftrace buffer:\n");
8088 
8089 	/* Did function tracer already get disabled? */
8090 	if (ftrace_is_dead()) {
8091 		printk("# WARNING: FUNCTION TRACING IS CORRUPTED\n");
8092 		printk("#          MAY BE MISSING FUNCTION EVENTS\n");
8093 	}
8094 
8095 	/*
8096 	 * We need to stop all tracing on all CPUS to read the
8097 	 * the next buffer. This is a bit expensive, but is
8098 	 * not done often. We fill all what we can read,
8099 	 * and then release the locks again.
8100 	 */
8101 
8102 	while (!trace_empty(&iter)) {
8103 
8104 		if (!cnt)
8105 			printk(KERN_TRACE "---------------------------------\n");
8106 
8107 		cnt++;
8108 
8109 		/* reset all but tr, trace, and overruns */
8110 		memset(&iter.seq, 0,
8111 		       sizeof(struct trace_iterator) -
8112 		       offsetof(struct trace_iterator, seq));
8113 		iter.iter_flags |= TRACE_FILE_LAT_FMT;
8114 		iter.pos = -1;
8115 
8116 		if (trace_find_next_entry_inc(&iter) != NULL) {
8117 			int ret;
8118 
8119 			ret = print_trace_line(&iter);
8120 			if (ret != TRACE_TYPE_NO_CONSUME)
8121 				trace_consume(&iter);
8122 		}
8123 		touch_nmi_watchdog();
8124 
8125 		trace_printk_seq(&iter.seq);
8126 	}
8127 
8128 	if (!cnt)
8129 		printk(KERN_TRACE "   (ftrace buffer empty)\n");
8130 	else
8131 		printk(KERN_TRACE "---------------------------------\n");
8132 
8133  out_enable:
8134 	tr->trace_flags |= old_userobj;
8135 
8136 	for_each_tracing_cpu(cpu) {
8137 		atomic_dec(&per_cpu_ptr(iter.trace_buffer->data, cpu)->disabled);
8138 	}
8139  	atomic_dec(&dump_running);
8140 	local_irq_restore(flags);
8141 }
8142 EXPORT_SYMBOL_GPL(ftrace_dump);
8143 
8144 __init static int tracer_alloc_buffers(void)
8145 {
8146 	int ring_buf_size;
8147 	int ret = -ENOMEM;
8148 
8149 	/*
8150 	 * Make sure we don't accidently add more trace options
8151 	 * than we have bits for.
8152 	 */
8153 	BUILD_BUG_ON(TRACE_ITER_LAST_BIT > TRACE_FLAGS_MAX_SIZE);
8154 
8155 	if (!alloc_cpumask_var(&tracing_buffer_mask, GFP_KERNEL))
8156 		goto out;
8157 
8158 	if (!alloc_cpumask_var(&global_trace.tracing_cpumask, GFP_KERNEL))
8159 		goto out_free_buffer_mask;
8160 
8161 	/* Only allocate trace_printk buffers if a trace_printk exists */
8162 	if (__stop___trace_bprintk_fmt != __start___trace_bprintk_fmt)
8163 		/* Must be called before global_trace.buffer is allocated */
8164 		trace_printk_init_buffers();
8165 
8166 	/* To save memory, keep the ring buffer size to its minimum */
8167 	if (ring_buffer_expanded)
8168 		ring_buf_size = trace_buf_size;
8169 	else
8170 		ring_buf_size = 1;
8171 
8172 	cpumask_copy(tracing_buffer_mask, cpu_possible_mask);
8173 	cpumask_copy(global_trace.tracing_cpumask, cpu_all_mask);
8174 
8175 	raw_spin_lock_init(&global_trace.start_lock);
8176 
8177 	/*
8178 	 * The prepare callbacks allocates some memory for the ring buffer. We
8179 	 * don't free the buffer if the if the CPU goes down. If we were to free
8180 	 * the buffer, then the user would lose any trace that was in the
8181 	 * buffer. The memory will be removed once the "instance" is removed.
8182 	 */
8183 	ret = cpuhp_setup_state_multi(CPUHP_TRACE_RB_PREPARE,
8184 				      "trace/RB:preapre", trace_rb_cpu_prepare,
8185 				      NULL);
8186 	if (ret < 0)
8187 		goto out_free_cpumask;
8188 	/* Used for event triggers */
8189 	temp_buffer = ring_buffer_alloc(PAGE_SIZE, RB_FL_OVERWRITE);
8190 	if (!temp_buffer)
8191 		goto out_rm_hp_state;
8192 
8193 	if (trace_create_savedcmd() < 0)
8194 		goto out_free_temp_buffer;
8195 
8196 	/* TODO: make the number of buffers hot pluggable with CPUS */
8197 	if (allocate_trace_buffers(&global_trace, ring_buf_size) < 0) {
8198 		printk(KERN_ERR "tracer: failed to allocate ring buffer!\n");
8199 		WARN_ON(1);
8200 		goto out_free_savedcmd;
8201 	}
8202 
8203 	if (global_trace.buffer_disabled)
8204 		tracing_off();
8205 
8206 	if (trace_boot_clock) {
8207 		ret = tracing_set_clock(&global_trace, trace_boot_clock);
8208 		if (ret < 0)
8209 			pr_warn("Trace clock %s not defined, going back to default\n",
8210 				trace_boot_clock);
8211 	}
8212 
8213 	/*
8214 	 * register_tracer() might reference current_trace, so it
8215 	 * needs to be set before we register anything. This is
8216 	 * just a bootstrap of current_trace anyway.
8217 	 */
8218 	global_trace.current_trace = &nop_trace;
8219 
8220 	global_trace.max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
8221 
8222 	ftrace_init_global_array_ops(&global_trace);
8223 
8224 	init_trace_flags_index(&global_trace);
8225 
8226 	register_tracer(&nop_trace);
8227 
8228 	/* Function tracing may start here (via kernel command line) */
8229 	init_function_trace();
8230 
8231 	/* All seems OK, enable tracing */
8232 	tracing_disabled = 0;
8233 
8234 	atomic_notifier_chain_register(&panic_notifier_list,
8235 				       &trace_panic_notifier);
8236 
8237 	register_die_notifier(&trace_die_notifier);
8238 
8239 	global_trace.flags = TRACE_ARRAY_FL_GLOBAL;
8240 
8241 	INIT_LIST_HEAD(&global_trace.systems);
8242 	INIT_LIST_HEAD(&global_trace.events);
8243 	list_add(&global_trace.list, &ftrace_trace_arrays);
8244 
8245 	apply_trace_boot_options();
8246 
8247 	register_snapshot_cmd();
8248 
8249 	return 0;
8250 
8251 out_free_savedcmd:
8252 	free_saved_cmdlines_buffer(savedcmd);
8253 out_free_temp_buffer:
8254 	ring_buffer_free(temp_buffer);
8255 out_rm_hp_state:
8256 	cpuhp_remove_multi_state(CPUHP_TRACE_RB_PREPARE);
8257 out_free_cpumask:
8258 	free_cpumask_var(global_trace.tracing_cpumask);
8259 out_free_buffer_mask:
8260 	free_cpumask_var(tracing_buffer_mask);
8261 out:
8262 	return ret;
8263 }
8264 
8265 void __init early_trace_init(void)
8266 {
8267 	if (tracepoint_printk) {
8268 		tracepoint_print_iter =
8269 			kmalloc(sizeof(*tracepoint_print_iter), GFP_KERNEL);
8270 		if (WARN_ON(!tracepoint_print_iter))
8271 			tracepoint_printk = 0;
8272 		else
8273 			static_key_enable(&tracepoint_printk_key.key);
8274 	}
8275 	tracer_alloc_buffers();
8276 }
8277 
8278 void __init trace_init(void)
8279 {
8280 	trace_event_init();
8281 }
8282 
8283 __init static int clear_boot_tracer(void)
8284 {
8285 	/*
8286 	 * The default tracer at boot buffer is an init section.
8287 	 * This function is called in lateinit. If we did not
8288 	 * find the boot tracer, then clear it out, to prevent
8289 	 * later registration from accessing the buffer that is
8290 	 * about to be freed.
8291 	 */
8292 	if (!default_bootup_tracer)
8293 		return 0;
8294 
8295 	printk(KERN_INFO "ftrace bootup tracer '%s' not registered.\n",
8296 	       default_bootup_tracer);
8297 	default_bootup_tracer = NULL;
8298 
8299 	return 0;
8300 }
8301 
8302 fs_initcall(tracer_init_tracefs);
8303 late_initcall(clear_boot_tracer);
8304