xref: /linux/kernel/trace/trace.c (revision 7505576d1c1ac0cfe85fdf90999433dd8b673012)
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * ring buffer based function tracer
4  *
5  * Copyright (C) 2007-2012 Steven Rostedt <srostedt@redhat.com>
6  * Copyright (C) 2008 Ingo Molnar <mingo@redhat.com>
7  *
8  * Originally taken from the RT patch by:
9  *    Arnaldo Carvalho de Melo <acme@redhat.com>
10  *
11  * Based on code from the latency_tracer, that is:
12  *  Copyright (C) 2004-2006 Ingo Molnar
13  *  Copyright (C) 2004 Nadia Yvette Chambers
14  */
15 #include <linux/ring_buffer.h>
16 #include <generated/utsrelease.h>
17 #include <linux/stacktrace.h>
18 #include <linux/writeback.h>
19 #include <linux/kallsyms.h>
20 #include <linux/security.h>
21 #include <linux/seq_file.h>
22 #include <linux/notifier.h>
23 #include <linux/irqflags.h>
24 #include <linux/debugfs.h>
25 #include <linux/tracefs.h>
26 #include <linux/pagemap.h>
27 #include <linux/hardirq.h>
28 #include <linux/linkage.h>
29 #include <linux/uaccess.h>
30 #include <linux/vmalloc.h>
31 #include <linux/ftrace.h>
32 #include <linux/module.h>
33 #include <linux/percpu.h>
34 #include <linux/splice.h>
35 #include <linux/kdebug.h>
36 #include <linux/string.h>
37 #include <linux/mount.h>
38 #include <linux/rwsem.h>
39 #include <linux/slab.h>
40 #include <linux/ctype.h>
41 #include <linux/init.h>
42 #include <linux/poll.h>
43 #include <linux/nmi.h>
44 #include <linux/fs.h>
45 #include <linux/trace.h>
46 #include <linux/sched/clock.h>
47 #include <linux/sched/rt.h>
48 
49 #include "trace.h"
50 #include "trace_output.h"
51 
52 /*
53  * On boot up, the ring buffer is set to the minimum size, so that
54  * we do not waste memory on systems that are not using tracing.
55  */
56 bool ring_buffer_expanded;
57 
58 /*
59  * We need to change this state when a selftest is running.
60  * A selftest will lurk into the ring-buffer to count the
61  * entries inserted during the selftest although some concurrent
62  * insertions into the ring-buffer such as trace_printk could occurred
63  * at the same time, giving false positive or negative results.
64  */
65 static bool __read_mostly tracing_selftest_running;
66 
67 /*
68  * If a tracer is running, we do not want to run SELFTEST.
69  */
70 bool __read_mostly tracing_selftest_disabled;
71 
72 /* Pipe tracepoints to printk */
73 struct trace_iterator *tracepoint_print_iter;
74 int tracepoint_printk;
75 static DEFINE_STATIC_KEY_FALSE(tracepoint_printk_key);
76 
77 /* For tracers that don't implement custom flags */
78 static struct tracer_opt dummy_tracer_opt[] = {
79 	{ }
80 };
81 
82 static int
83 dummy_set_flag(struct trace_array *tr, u32 old_flags, u32 bit, int set)
84 {
85 	return 0;
86 }
87 
88 /*
89  * To prevent the comm cache from being overwritten when no
90  * tracing is active, only save the comm when a trace event
91  * occurred.
92  */
93 static DEFINE_PER_CPU(bool, trace_taskinfo_save);
94 
95 /*
96  * Kill all tracing for good (never come back).
97  * It is initialized to 1 but will turn to zero if the initialization
98  * of the tracer is successful. But that is the only place that sets
99  * this back to zero.
100  */
101 static int tracing_disabled = 1;
102 
103 cpumask_var_t __read_mostly	tracing_buffer_mask;
104 
105 /*
106  * ftrace_dump_on_oops - variable to dump ftrace buffer on oops
107  *
108  * If there is an oops (or kernel panic) and the ftrace_dump_on_oops
109  * is set, then ftrace_dump is called. This will output the contents
110  * of the ftrace buffers to the console.  This is very useful for
111  * capturing traces that lead to crashes and outputing it to a
112  * serial console.
113  *
114  * It is default off, but you can enable it with either specifying
115  * "ftrace_dump_on_oops" in the kernel command line, or setting
116  * /proc/sys/kernel/ftrace_dump_on_oops
117  * Set 1 if you want to dump buffers of all CPUs
118  * Set 2 if you want to dump the buffer of the CPU that triggered oops
119  */
120 
121 enum ftrace_dump_mode ftrace_dump_on_oops;
122 
123 /* When set, tracing will stop when a WARN*() is hit */
124 int __disable_trace_on_warning;
125 
126 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
127 /* Map of enums to their values, for "eval_map" file */
128 struct trace_eval_map_head {
129 	struct module			*mod;
130 	unsigned long			length;
131 };
132 
133 union trace_eval_map_item;
134 
135 struct trace_eval_map_tail {
136 	/*
137 	 * "end" is first and points to NULL as it must be different
138 	 * than "mod" or "eval_string"
139 	 */
140 	union trace_eval_map_item	*next;
141 	const char			*end;	/* points to NULL */
142 };
143 
144 static DEFINE_MUTEX(trace_eval_mutex);
145 
146 /*
147  * The trace_eval_maps are saved in an array with two extra elements,
148  * one at the beginning, and one at the end. The beginning item contains
149  * the count of the saved maps (head.length), and the module they
150  * belong to if not built in (head.mod). The ending item contains a
151  * pointer to the next array of saved eval_map items.
152  */
153 union trace_eval_map_item {
154 	struct trace_eval_map		map;
155 	struct trace_eval_map_head	head;
156 	struct trace_eval_map_tail	tail;
157 };
158 
159 static union trace_eval_map_item *trace_eval_maps;
160 #endif /* CONFIG_TRACE_EVAL_MAP_FILE */
161 
162 static int tracing_set_tracer(struct trace_array *tr, const char *buf);
163 static void ftrace_trace_userstack(struct ring_buffer *buffer,
164 				   unsigned long flags, int pc);
165 
166 #define MAX_TRACER_SIZE		100
167 static char bootup_tracer_buf[MAX_TRACER_SIZE] __initdata;
168 static char *default_bootup_tracer;
169 
170 static bool allocate_snapshot;
171 
172 static int __init set_cmdline_ftrace(char *str)
173 {
174 	strlcpy(bootup_tracer_buf, str, MAX_TRACER_SIZE);
175 	default_bootup_tracer = bootup_tracer_buf;
176 	/* We are using ftrace early, expand it */
177 	ring_buffer_expanded = true;
178 	return 1;
179 }
180 __setup("ftrace=", set_cmdline_ftrace);
181 
182 static int __init set_ftrace_dump_on_oops(char *str)
183 {
184 	if (*str++ != '=' || !*str) {
185 		ftrace_dump_on_oops = DUMP_ALL;
186 		return 1;
187 	}
188 
189 	if (!strcmp("orig_cpu", str)) {
190 		ftrace_dump_on_oops = DUMP_ORIG;
191                 return 1;
192         }
193 
194         return 0;
195 }
196 __setup("ftrace_dump_on_oops", set_ftrace_dump_on_oops);
197 
198 static int __init stop_trace_on_warning(char *str)
199 {
200 	if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
201 		__disable_trace_on_warning = 1;
202 	return 1;
203 }
204 __setup("traceoff_on_warning", stop_trace_on_warning);
205 
206 static int __init boot_alloc_snapshot(char *str)
207 {
208 	allocate_snapshot = true;
209 	/* We also need the main ring buffer expanded */
210 	ring_buffer_expanded = true;
211 	return 1;
212 }
213 __setup("alloc_snapshot", boot_alloc_snapshot);
214 
215 
216 static char trace_boot_options_buf[MAX_TRACER_SIZE] __initdata;
217 
218 static int __init set_trace_boot_options(char *str)
219 {
220 	strlcpy(trace_boot_options_buf, str, MAX_TRACER_SIZE);
221 	return 0;
222 }
223 __setup("trace_options=", set_trace_boot_options);
224 
225 static char trace_boot_clock_buf[MAX_TRACER_SIZE] __initdata;
226 static char *trace_boot_clock __initdata;
227 
228 static int __init set_trace_boot_clock(char *str)
229 {
230 	strlcpy(trace_boot_clock_buf, str, MAX_TRACER_SIZE);
231 	trace_boot_clock = trace_boot_clock_buf;
232 	return 0;
233 }
234 __setup("trace_clock=", set_trace_boot_clock);
235 
236 static int __init set_tracepoint_printk(char *str)
237 {
238 	if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
239 		tracepoint_printk = 1;
240 	return 1;
241 }
242 __setup("tp_printk", set_tracepoint_printk);
243 
244 unsigned long long ns2usecs(u64 nsec)
245 {
246 	nsec += 500;
247 	do_div(nsec, 1000);
248 	return nsec;
249 }
250 
251 /* trace_flags holds trace_options default values */
252 #define TRACE_DEFAULT_FLAGS						\
253 	(FUNCTION_DEFAULT_FLAGS |					\
254 	 TRACE_ITER_PRINT_PARENT | TRACE_ITER_PRINTK |			\
255 	 TRACE_ITER_ANNOTATE | TRACE_ITER_CONTEXT_INFO |		\
256 	 TRACE_ITER_RECORD_CMD | TRACE_ITER_OVERWRITE |			\
257 	 TRACE_ITER_IRQ_INFO | TRACE_ITER_MARKERS)
258 
259 /* trace_options that are only supported by global_trace */
260 #define TOP_LEVEL_TRACE_FLAGS (TRACE_ITER_PRINTK |			\
261 	       TRACE_ITER_PRINTK_MSGONLY | TRACE_ITER_RECORD_CMD)
262 
263 /* trace_flags that are default zero for instances */
264 #define ZEROED_TRACE_FLAGS \
265 	(TRACE_ITER_EVENT_FORK | TRACE_ITER_FUNC_FORK)
266 
267 /*
268  * The global_trace is the descriptor that holds the top-level tracing
269  * buffers for the live tracing.
270  */
271 static struct trace_array global_trace = {
272 	.trace_flags = TRACE_DEFAULT_FLAGS,
273 };
274 
275 LIST_HEAD(ftrace_trace_arrays);
276 
277 int trace_array_get(struct trace_array *this_tr)
278 {
279 	struct trace_array *tr;
280 	int ret = -ENODEV;
281 
282 	mutex_lock(&trace_types_lock);
283 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
284 		if (tr == this_tr) {
285 			tr->ref++;
286 			ret = 0;
287 			break;
288 		}
289 	}
290 	mutex_unlock(&trace_types_lock);
291 
292 	return ret;
293 }
294 
295 static void __trace_array_put(struct trace_array *this_tr)
296 {
297 	WARN_ON(!this_tr->ref);
298 	this_tr->ref--;
299 }
300 
301 void trace_array_put(struct trace_array *this_tr)
302 {
303 	mutex_lock(&trace_types_lock);
304 	__trace_array_put(this_tr);
305 	mutex_unlock(&trace_types_lock);
306 }
307 
308 int tracing_check_open_get_tr(struct trace_array *tr)
309 {
310 	int ret;
311 
312 	ret = security_locked_down(LOCKDOWN_TRACEFS);
313 	if (ret)
314 		return ret;
315 
316 	if (tracing_disabled)
317 		return -ENODEV;
318 
319 	if (tr && trace_array_get(tr) < 0)
320 		return -ENODEV;
321 
322 	return 0;
323 }
324 
325 int call_filter_check_discard(struct trace_event_call *call, void *rec,
326 			      struct ring_buffer *buffer,
327 			      struct ring_buffer_event *event)
328 {
329 	if (unlikely(call->flags & TRACE_EVENT_FL_FILTERED) &&
330 	    !filter_match_preds(call->filter, rec)) {
331 		__trace_event_discard_commit(buffer, event);
332 		return 1;
333 	}
334 
335 	return 0;
336 }
337 
338 void trace_free_pid_list(struct trace_pid_list *pid_list)
339 {
340 	vfree(pid_list->pids);
341 	kfree(pid_list);
342 }
343 
344 /**
345  * trace_find_filtered_pid - check if a pid exists in a filtered_pid list
346  * @filtered_pids: The list of pids to check
347  * @search_pid: The PID to find in @filtered_pids
348  *
349  * Returns true if @search_pid is fonud in @filtered_pids, and false otherwis.
350  */
351 bool
352 trace_find_filtered_pid(struct trace_pid_list *filtered_pids, pid_t search_pid)
353 {
354 	/*
355 	 * If pid_max changed after filtered_pids was created, we
356 	 * by default ignore all pids greater than the previous pid_max.
357 	 */
358 	if (search_pid >= filtered_pids->pid_max)
359 		return false;
360 
361 	return test_bit(search_pid, filtered_pids->pids);
362 }
363 
364 /**
365  * trace_ignore_this_task - should a task be ignored for tracing
366  * @filtered_pids: The list of pids to check
367  * @task: The task that should be ignored if not filtered
368  *
369  * Checks if @task should be traced or not from @filtered_pids.
370  * Returns true if @task should *NOT* be traced.
371  * Returns false if @task should be traced.
372  */
373 bool
374 trace_ignore_this_task(struct trace_pid_list *filtered_pids, struct task_struct *task)
375 {
376 	/*
377 	 * Return false, because if filtered_pids does not exist,
378 	 * all pids are good to trace.
379 	 */
380 	if (!filtered_pids)
381 		return false;
382 
383 	return !trace_find_filtered_pid(filtered_pids, task->pid);
384 }
385 
386 /**
387  * trace_filter_add_remove_task - Add or remove a task from a pid_list
388  * @pid_list: The list to modify
389  * @self: The current task for fork or NULL for exit
390  * @task: The task to add or remove
391  *
392  * If adding a task, if @self is defined, the task is only added if @self
393  * is also included in @pid_list. This happens on fork and tasks should
394  * only be added when the parent is listed. If @self is NULL, then the
395  * @task pid will be removed from the list, which would happen on exit
396  * of a task.
397  */
398 void trace_filter_add_remove_task(struct trace_pid_list *pid_list,
399 				  struct task_struct *self,
400 				  struct task_struct *task)
401 {
402 	if (!pid_list)
403 		return;
404 
405 	/* For forks, we only add if the forking task is listed */
406 	if (self) {
407 		if (!trace_find_filtered_pid(pid_list, self->pid))
408 			return;
409 	}
410 
411 	/* Sorry, but we don't support pid_max changing after setting */
412 	if (task->pid >= pid_list->pid_max)
413 		return;
414 
415 	/* "self" is set for forks, and NULL for exits */
416 	if (self)
417 		set_bit(task->pid, pid_list->pids);
418 	else
419 		clear_bit(task->pid, pid_list->pids);
420 }
421 
422 /**
423  * trace_pid_next - Used for seq_file to get to the next pid of a pid_list
424  * @pid_list: The pid list to show
425  * @v: The last pid that was shown (+1 the actual pid to let zero be displayed)
426  * @pos: The position of the file
427  *
428  * This is used by the seq_file "next" operation to iterate the pids
429  * listed in a trace_pid_list structure.
430  *
431  * Returns the pid+1 as we want to display pid of zero, but NULL would
432  * stop the iteration.
433  */
434 void *trace_pid_next(struct trace_pid_list *pid_list, void *v, loff_t *pos)
435 {
436 	unsigned long pid = (unsigned long)v;
437 
438 	(*pos)++;
439 
440 	/* pid already is +1 of the actual prevous bit */
441 	pid = find_next_bit(pid_list->pids, pid_list->pid_max, pid);
442 
443 	/* Return pid + 1 to allow zero to be represented */
444 	if (pid < pid_list->pid_max)
445 		return (void *)(pid + 1);
446 
447 	return NULL;
448 }
449 
450 /**
451  * trace_pid_start - Used for seq_file to start reading pid lists
452  * @pid_list: The pid list to show
453  * @pos: The position of the file
454  *
455  * This is used by seq_file "start" operation to start the iteration
456  * of listing pids.
457  *
458  * Returns the pid+1 as we want to display pid of zero, but NULL would
459  * stop the iteration.
460  */
461 void *trace_pid_start(struct trace_pid_list *pid_list, loff_t *pos)
462 {
463 	unsigned long pid;
464 	loff_t l = 0;
465 
466 	pid = find_first_bit(pid_list->pids, pid_list->pid_max);
467 	if (pid >= pid_list->pid_max)
468 		return NULL;
469 
470 	/* Return pid + 1 so that zero can be the exit value */
471 	for (pid++; pid && l < *pos;
472 	     pid = (unsigned long)trace_pid_next(pid_list, (void *)pid, &l))
473 		;
474 	return (void *)pid;
475 }
476 
477 /**
478  * trace_pid_show - show the current pid in seq_file processing
479  * @m: The seq_file structure to write into
480  * @v: A void pointer of the pid (+1) value to display
481  *
482  * Can be directly used by seq_file operations to display the current
483  * pid value.
484  */
485 int trace_pid_show(struct seq_file *m, void *v)
486 {
487 	unsigned long pid = (unsigned long)v - 1;
488 
489 	seq_printf(m, "%lu\n", pid);
490 	return 0;
491 }
492 
493 /* 128 should be much more than enough */
494 #define PID_BUF_SIZE		127
495 
496 int trace_pid_write(struct trace_pid_list *filtered_pids,
497 		    struct trace_pid_list **new_pid_list,
498 		    const char __user *ubuf, size_t cnt)
499 {
500 	struct trace_pid_list *pid_list;
501 	struct trace_parser parser;
502 	unsigned long val;
503 	int nr_pids = 0;
504 	ssize_t read = 0;
505 	ssize_t ret = 0;
506 	loff_t pos;
507 	pid_t pid;
508 
509 	if (trace_parser_get_init(&parser, PID_BUF_SIZE + 1))
510 		return -ENOMEM;
511 
512 	/*
513 	 * Always recreate a new array. The write is an all or nothing
514 	 * operation. Always create a new array when adding new pids by
515 	 * the user. If the operation fails, then the current list is
516 	 * not modified.
517 	 */
518 	pid_list = kmalloc(sizeof(*pid_list), GFP_KERNEL);
519 	if (!pid_list) {
520 		trace_parser_put(&parser);
521 		return -ENOMEM;
522 	}
523 
524 	pid_list->pid_max = READ_ONCE(pid_max);
525 
526 	/* Only truncating will shrink pid_max */
527 	if (filtered_pids && filtered_pids->pid_max > pid_list->pid_max)
528 		pid_list->pid_max = filtered_pids->pid_max;
529 
530 	pid_list->pids = vzalloc((pid_list->pid_max + 7) >> 3);
531 	if (!pid_list->pids) {
532 		trace_parser_put(&parser);
533 		kfree(pid_list);
534 		return -ENOMEM;
535 	}
536 
537 	if (filtered_pids) {
538 		/* copy the current bits to the new max */
539 		for_each_set_bit(pid, filtered_pids->pids,
540 				 filtered_pids->pid_max) {
541 			set_bit(pid, pid_list->pids);
542 			nr_pids++;
543 		}
544 	}
545 
546 	while (cnt > 0) {
547 
548 		pos = 0;
549 
550 		ret = trace_get_user(&parser, ubuf, cnt, &pos);
551 		if (ret < 0 || !trace_parser_loaded(&parser))
552 			break;
553 
554 		read += ret;
555 		ubuf += ret;
556 		cnt -= ret;
557 
558 		ret = -EINVAL;
559 		if (kstrtoul(parser.buffer, 0, &val))
560 			break;
561 		if (val >= pid_list->pid_max)
562 			break;
563 
564 		pid = (pid_t)val;
565 
566 		set_bit(pid, pid_list->pids);
567 		nr_pids++;
568 
569 		trace_parser_clear(&parser);
570 		ret = 0;
571 	}
572 	trace_parser_put(&parser);
573 
574 	if (ret < 0) {
575 		trace_free_pid_list(pid_list);
576 		return ret;
577 	}
578 
579 	if (!nr_pids) {
580 		/* Cleared the list of pids */
581 		trace_free_pid_list(pid_list);
582 		read = ret;
583 		pid_list = NULL;
584 	}
585 
586 	*new_pid_list = pid_list;
587 
588 	return read;
589 }
590 
591 static u64 buffer_ftrace_now(struct trace_buffer *buf, int cpu)
592 {
593 	u64 ts;
594 
595 	/* Early boot up does not have a buffer yet */
596 	if (!buf->buffer)
597 		return trace_clock_local();
598 
599 	ts = ring_buffer_time_stamp(buf->buffer, cpu);
600 	ring_buffer_normalize_time_stamp(buf->buffer, cpu, &ts);
601 
602 	return ts;
603 }
604 
605 u64 ftrace_now(int cpu)
606 {
607 	return buffer_ftrace_now(&global_trace.trace_buffer, cpu);
608 }
609 
610 /**
611  * tracing_is_enabled - Show if global_trace has been disabled
612  *
613  * Shows if the global trace has been enabled or not. It uses the
614  * mirror flag "buffer_disabled" to be used in fast paths such as for
615  * the irqsoff tracer. But it may be inaccurate due to races. If you
616  * need to know the accurate state, use tracing_is_on() which is a little
617  * slower, but accurate.
618  */
619 int tracing_is_enabled(void)
620 {
621 	/*
622 	 * For quick access (irqsoff uses this in fast path), just
623 	 * return the mirror variable of the state of the ring buffer.
624 	 * It's a little racy, but we don't really care.
625 	 */
626 	smp_rmb();
627 	return !global_trace.buffer_disabled;
628 }
629 
630 /*
631  * trace_buf_size is the size in bytes that is allocated
632  * for a buffer. Note, the number of bytes is always rounded
633  * to page size.
634  *
635  * This number is purposely set to a low number of 16384.
636  * If the dump on oops happens, it will be much appreciated
637  * to not have to wait for all that output. Anyway this can be
638  * boot time and run time configurable.
639  */
640 #define TRACE_BUF_SIZE_DEFAULT	1441792UL /* 16384 * 88 (sizeof(entry)) */
641 
642 static unsigned long		trace_buf_size = TRACE_BUF_SIZE_DEFAULT;
643 
644 /* trace_types holds a link list of available tracers. */
645 static struct tracer		*trace_types __read_mostly;
646 
647 /*
648  * trace_types_lock is used to protect the trace_types list.
649  */
650 DEFINE_MUTEX(trace_types_lock);
651 
652 /*
653  * serialize the access of the ring buffer
654  *
655  * ring buffer serializes readers, but it is low level protection.
656  * The validity of the events (which returns by ring_buffer_peek() ..etc)
657  * are not protected by ring buffer.
658  *
659  * The content of events may become garbage if we allow other process consumes
660  * these events concurrently:
661  *   A) the page of the consumed events may become a normal page
662  *      (not reader page) in ring buffer, and this page will be rewrited
663  *      by events producer.
664  *   B) The page of the consumed events may become a page for splice_read,
665  *      and this page will be returned to system.
666  *
667  * These primitives allow multi process access to different cpu ring buffer
668  * concurrently.
669  *
670  * These primitives don't distinguish read-only and read-consume access.
671  * Multi read-only access are also serialized.
672  */
673 
674 #ifdef CONFIG_SMP
675 static DECLARE_RWSEM(all_cpu_access_lock);
676 static DEFINE_PER_CPU(struct mutex, cpu_access_lock);
677 
678 static inline void trace_access_lock(int cpu)
679 {
680 	if (cpu == RING_BUFFER_ALL_CPUS) {
681 		/* gain it for accessing the whole ring buffer. */
682 		down_write(&all_cpu_access_lock);
683 	} else {
684 		/* gain it for accessing a cpu ring buffer. */
685 
686 		/* Firstly block other trace_access_lock(RING_BUFFER_ALL_CPUS). */
687 		down_read(&all_cpu_access_lock);
688 
689 		/* Secondly block other access to this @cpu ring buffer. */
690 		mutex_lock(&per_cpu(cpu_access_lock, cpu));
691 	}
692 }
693 
694 static inline void trace_access_unlock(int cpu)
695 {
696 	if (cpu == RING_BUFFER_ALL_CPUS) {
697 		up_write(&all_cpu_access_lock);
698 	} else {
699 		mutex_unlock(&per_cpu(cpu_access_lock, cpu));
700 		up_read(&all_cpu_access_lock);
701 	}
702 }
703 
704 static inline void trace_access_lock_init(void)
705 {
706 	int cpu;
707 
708 	for_each_possible_cpu(cpu)
709 		mutex_init(&per_cpu(cpu_access_lock, cpu));
710 }
711 
712 #else
713 
714 static DEFINE_MUTEX(access_lock);
715 
716 static inline void trace_access_lock(int cpu)
717 {
718 	(void)cpu;
719 	mutex_lock(&access_lock);
720 }
721 
722 static inline void trace_access_unlock(int cpu)
723 {
724 	(void)cpu;
725 	mutex_unlock(&access_lock);
726 }
727 
728 static inline void trace_access_lock_init(void)
729 {
730 }
731 
732 #endif
733 
734 #ifdef CONFIG_STACKTRACE
735 static void __ftrace_trace_stack(struct ring_buffer *buffer,
736 				 unsigned long flags,
737 				 int skip, int pc, struct pt_regs *regs);
738 static inline void ftrace_trace_stack(struct trace_array *tr,
739 				      struct ring_buffer *buffer,
740 				      unsigned long flags,
741 				      int skip, int pc, struct pt_regs *regs);
742 
743 #else
744 static inline void __ftrace_trace_stack(struct ring_buffer *buffer,
745 					unsigned long flags,
746 					int skip, int pc, struct pt_regs *regs)
747 {
748 }
749 static inline void ftrace_trace_stack(struct trace_array *tr,
750 				      struct ring_buffer *buffer,
751 				      unsigned long flags,
752 				      int skip, int pc, struct pt_regs *regs)
753 {
754 }
755 
756 #endif
757 
758 static __always_inline void
759 trace_event_setup(struct ring_buffer_event *event,
760 		  int type, unsigned long flags, int pc)
761 {
762 	struct trace_entry *ent = ring_buffer_event_data(event);
763 
764 	tracing_generic_entry_update(ent, type, flags, pc);
765 }
766 
767 static __always_inline struct ring_buffer_event *
768 __trace_buffer_lock_reserve(struct ring_buffer *buffer,
769 			  int type,
770 			  unsigned long len,
771 			  unsigned long flags, int pc)
772 {
773 	struct ring_buffer_event *event;
774 
775 	event = ring_buffer_lock_reserve(buffer, len);
776 	if (event != NULL)
777 		trace_event_setup(event, type, flags, pc);
778 
779 	return event;
780 }
781 
782 void tracer_tracing_on(struct trace_array *tr)
783 {
784 	if (tr->trace_buffer.buffer)
785 		ring_buffer_record_on(tr->trace_buffer.buffer);
786 	/*
787 	 * This flag is looked at when buffers haven't been allocated
788 	 * yet, or by some tracers (like irqsoff), that just want to
789 	 * know if the ring buffer has been disabled, but it can handle
790 	 * races of where it gets disabled but we still do a record.
791 	 * As the check is in the fast path of the tracers, it is more
792 	 * important to be fast than accurate.
793 	 */
794 	tr->buffer_disabled = 0;
795 	/* Make the flag seen by readers */
796 	smp_wmb();
797 }
798 
799 /**
800  * tracing_on - enable tracing buffers
801  *
802  * This function enables tracing buffers that may have been
803  * disabled with tracing_off.
804  */
805 void tracing_on(void)
806 {
807 	tracer_tracing_on(&global_trace);
808 }
809 EXPORT_SYMBOL_GPL(tracing_on);
810 
811 
812 static __always_inline void
813 __buffer_unlock_commit(struct ring_buffer *buffer, struct ring_buffer_event *event)
814 {
815 	__this_cpu_write(trace_taskinfo_save, true);
816 
817 	/* If this is the temp buffer, we need to commit fully */
818 	if (this_cpu_read(trace_buffered_event) == event) {
819 		/* Length is in event->array[0] */
820 		ring_buffer_write(buffer, event->array[0], &event->array[1]);
821 		/* Release the temp buffer */
822 		this_cpu_dec(trace_buffered_event_cnt);
823 	} else
824 		ring_buffer_unlock_commit(buffer, event);
825 }
826 
827 /**
828  * __trace_puts - write a constant string into the trace buffer.
829  * @ip:	   The address of the caller
830  * @str:   The constant string to write
831  * @size:  The size of the string.
832  */
833 int __trace_puts(unsigned long ip, const char *str, int size)
834 {
835 	struct ring_buffer_event *event;
836 	struct ring_buffer *buffer;
837 	struct print_entry *entry;
838 	unsigned long irq_flags;
839 	int alloc;
840 	int pc;
841 
842 	if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
843 		return 0;
844 
845 	pc = preempt_count();
846 
847 	if (unlikely(tracing_selftest_running || tracing_disabled))
848 		return 0;
849 
850 	alloc = sizeof(*entry) + size + 2; /* possible \n added */
851 
852 	local_save_flags(irq_flags);
853 	buffer = global_trace.trace_buffer.buffer;
854 	event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, alloc,
855 					    irq_flags, pc);
856 	if (!event)
857 		return 0;
858 
859 	entry = ring_buffer_event_data(event);
860 	entry->ip = ip;
861 
862 	memcpy(&entry->buf, str, size);
863 
864 	/* Add a newline if necessary */
865 	if (entry->buf[size - 1] != '\n') {
866 		entry->buf[size] = '\n';
867 		entry->buf[size + 1] = '\0';
868 	} else
869 		entry->buf[size] = '\0';
870 
871 	__buffer_unlock_commit(buffer, event);
872 	ftrace_trace_stack(&global_trace, buffer, irq_flags, 4, pc, NULL);
873 
874 	return size;
875 }
876 EXPORT_SYMBOL_GPL(__trace_puts);
877 
878 /**
879  * __trace_bputs - write the pointer to a constant string into trace buffer
880  * @ip:	   The address of the caller
881  * @str:   The constant string to write to the buffer to
882  */
883 int __trace_bputs(unsigned long ip, const char *str)
884 {
885 	struct ring_buffer_event *event;
886 	struct ring_buffer *buffer;
887 	struct bputs_entry *entry;
888 	unsigned long irq_flags;
889 	int size = sizeof(struct bputs_entry);
890 	int pc;
891 
892 	if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
893 		return 0;
894 
895 	pc = preempt_count();
896 
897 	if (unlikely(tracing_selftest_running || tracing_disabled))
898 		return 0;
899 
900 	local_save_flags(irq_flags);
901 	buffer = global_trace.trace_buffer.buffer;
902 	event = __trace_buffer_lock_reserve(buffer, TRACE_BPUTS, size,
903 					    irq_flags, pc);
904 	if (!event)
905 		return 0;
906 
907 	entry = ring_buffer_event_data(event);
908 	entry->ip			= ip;
909 	entry->str			= str;
910 
911 	__buffer_unlock_commit(buffer, event);
912 	ftrace_trace_stack(&global_trace, buffer, irq_flags, 4, pc, NULL);
913 
914 	return 1;
915 }
916 EXPORT_SYMBOL_GPL(__trace_bputs);
917 
918 #ifdef CONFIG_TRACER_SNAPSHOT
919 void tracing_snapshot_instance_cond(struct trace_array *tr, void *cond_data)
920 {
921 	struct tracer *tracer = tr->current_trace;
922 	unsigned long flags;
923 
924 	if (in_nmi()) {
925 		internal_trace_puts("*** SNAPSHOT CALLED FROM NMI CONTEXT ***\n");
926 		internal_trace_puts("*** snapshot is being ignored        ***\n");
927 		return;
928 	}
929 
930 	if (!tr->allocated_snapshot) {
931 		internal_trace_puts("*** SNAPSHOT NOT ALLOCATED ***\n");
932 		internal_trace_puts("*** stopping trace here!   ***\n");
933 		tracing_off();
934 		return;
935 	}
936 
937 	/* Note, snapshot can not be used when the tracer uses it */
938 	if (tracer->use_max_tr) {
939 		internal_trace_puts("*** LATENCY TRACER ACTIVE ***\n");
940 		internal_trace_puts("*** Can not use snapshot (sorry) ***\n");
941 		return;
942 	}
943 
944 	local_irq_save(flags);
945 	update_max_tr(tr, current, smp_processor_id(), cond_data);
946 	local_irq_restore(flags);
947 }
948 
949 void tracing_snapshot_instance(struct trace_array *tr)
950 {
951 	tracing_snapshot_instance_cond(tr, NULL);
952 }
953 
954 /**
955  * tracing_snapshot - take a snapshot of the current buffer.
956  *
957  * This causes a swap between the snapshot buffer and the current live
958  * tracing buffer. You can use this to take snapshots of the live
959  * trace when some condition is triggered, but continue to trace.
960  *
961  * Note, make sure to allocate the snapshot with either
962  * a tracing_snapshot_alloc(), or by doing it manually
963  * with: echo 1 > /sys/kernel/debug/tracing/snapshot
964  *
965  * If the snapshot buffer is not allocated, it will stop tracing.
966  * Basically making a permanent snapshot.
967  */
968 void tracing_snapshot(void)
969 {
970 	struct trace_array *tr = &global_trace;
971 
972 	tracing_snapshot_instance(tr);
973 }
974 EXPORT_SYMBOL_GPL(tracing_snapshot);
975 
976 /**
977  * tracing_snapshot_cond - conditionally take a snapshot of the current buffer.
978  * @tr:		The tracing instance to snapshot
979  * @cond_data:	The data to be tested conditionally, and possibly saved
980  *
981  * This is the same as tracing_snapshot() except that the snapshot is
982  * conditional - the snapshot will only happen if the
983  * cond_snapshot.update() implementation receiving the cond_data
984  * returns true, which means that the trace array's cond_snapshot
985  * update() operation used the cond_data to determine whether the
986  * snapshot should be taken, and if it was, presumably saved it along
987  * with the snapshot.
988  */
989 void tracing_snapshot_cond(struct trace_array *tr, void *cond_data)
990 {
991 	tracing_snapshot_instance_cond(tr, cond_data);
992 }
993 EXPORT_SYMBOL_GPL(tracing_snapshot_cond);
994 
995 /**
996  * tracing_snapshot_cond_data - get the user data associated with a snapshot
997  * @tr:		The tracing instance
998  *
999  * When the user enables a conditional snapshot using
1000  * tracing_snapshot_cond_enable(), the user-defined cond_data is saved
1001  * with the snapshot.  This accessor is used to retrieve it.
1002  *
1003  * Should not be called from cond_snapshot.update(), since it takes
1004  * the tr->max_lock lock, which the code calling
1005  * cond_snapshot.update() has already done.
1006  *
1007  * Returns the cond_data associated with the trace array's snapshot.
1008  */
1009 void *tracing_cond_snapshot_data(struct trace_array *tr)
1010 {
1011 	void *cond_data = NULL;
1012 
1013 	arch_spin_lock(&tr->max_lock);
1014 
1015 	if (tr->cond_snapshot)
1016 		cond_data = tr->cond_snapshot->cond_data;
1017 
1018 	arch_spin_unlock(&tr->max_lock);
1019 
1020 	return cond_data;
1021 }
1022 EXPORT_SYMBOL_GPL(tracing_cond_snapshot_data);
1023 
1024 static int resize_buffer_duplicate_size(struct trace_buffer *trace_buf,
1025 					struct trace_buffer *size_buf, int cpu_id);
1026 static void set_buffer_entries(struct trace_buffer *buf, unsigned long val);
1027 
1028 int tracing_alloc_snapshot_instance(struct trace_array *tr)
1029 {
1030 	int ret;
1031 
1032 	if (!tr->allocated_snapshot) {
1033 
1034 		/* allocate spare buffer */
1035 		ret = resize_buffer_duplicate_size(&tr->max_buffer,
1036 				   &tr->trace_buffer, RING_BUFFER_ALL_CPUS);
1037 		if (ret < 0)
1038 			return ret;
1039 
1040 		tr->allocated_snapshot = true;
1041 	}
1042 
1043 	return 0;
1044 }
1045 
1046 static void free_snapshot(struct trace_array *tr)
1047 {
1048 	/*
1049 	 * We don't free the ring buffer. instead, resize it because
1050 	 * The max_tr ring buffer has some state (e.g. ring->clock) and
1051 	 * we want preserve it.
1052 	 */
1053 	ring_buffer_resize(tr->max_buffer.buffer, 1, RING_BUFFER_ALL_CPUS);
1054 	set_buffer_entries(&tr->max_buffer, 1);
1055 	tracing_reset_online_cpus(&tr->max_buffer);
1056 	tr->allocated_snapshot = false;
1057 }
1058 
1059 /**
1060  * tracing_alloc_snapshot - allocate snapshot buffer.
1061  *
1062  * This only allocates the snapshot buffer if it isn't already
1063  * allocated - it doesn't also take a snapshot.
1064  *
1065  * This is meant to be used in cases where the snapshot buffer needs
1066  * to be set up for events that can't sleep but need to be able to
1067  * trigger a snapshot.
1068  */
1069 int tracing_alloc_snapshot(void)
1070 {
1071 	struct trace_array *tr = &global_trace;
1072 	int ret;
1073 
1074 	ret = tracing_alloc_snapshot_instance(tr);
1075 	WARN_ON(ret < 0);
1076 
1077 	return ret;
1078 }
1079 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
1080 
1081 /**
1082  * tracing_snapshot_alloc - allocate and take a snapshot of the current buffer.
1083  *
1084  * This is similar to tracing_snapshot(), but it will allocate the
1085  * snapshot buffer if it isn't already allocated. Use this only
1086  * where it is safe to sleep, as the allocation may sleep.
1087  *
1088  * This causes a swap between the snapshot buffer and the current live
1089  * tracing buffer. You can use this to take snapshots of the live
1090  * trace when some condition is triggered, but continue to trace.
1091  */
1092 void tracing_snapshot_alloc(void)
1093 {
1094 	int ret;
1095 
1096 	ret = tracing_alloc_snapshot();
1097 	if (ret < 0)
1098 		return;
1099 
1100 	tracing_snapshot();
1101 }
1102 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
1103 
1104 /**
1105  * tracing_snapshot_cond_enable - enable conditional snapshot for an instance
1106  * @tr:		The tracing instance
1107  * @cond_data:	User data to associate with the snapshot
1108  * @update:	Implementation of the cond_snapshot update function
1109  *
1110  * Check whether the conditional snapshot for the given instance has
1111  * already been enabled, or if the current tracer is already using a
1112  * snapshot; if so, return -EBUSY, else create a cond_snapshot and
1113  * save the cond_data and update function inside.
1114  *
1115  * Returns 0 if successful, error otherwise.
1116  */
1117 int tracing_snapshot_cond_enable(struct trace_array *tr, void *cond_data,
1118 				 cond_update_fn_t update)
1119 {
1120 	struct cond_snapshot *cond_snapshot;
1121 	int ret = 0;
1122 
1123 	cond_snapshot = kzalloc(sizeof(*cond_snapshot), GFP_KERNEL);
1124 	if (!cond_snapshot)
1125 		return -ENOMEM;
1126 
1127 	cond_snapshot->cond_data = cond_data;
1128 	cond_snapshot->update = update;
1129 
1130 	mutex_lock(&trace_types_lock);
1131 
1132 	ret = tracing_alloc_snapshot_instance(tr);
1133 	if (ret)
1134 		goto fail_unlock;
1135 
1136 	if (tr->current_trace->use_max_tr) {
1137 		ret = -EBUSY;
1138 		goto fail_unlock;
1139 	}
1140 
1141 	/*
1142 	 * The cond_snapshot can only change to NULL without the
1143 	 * trace_types_lock. We don't care if we race with it going
1144 	 * to NULL, but we want to make sure that it's not set to
1145 	 * something other than NULL when we get here, which we can
1146 	 * do safely with only holding the trace_types_lock and not
1147 	 * having to take the max_lock.
1148 	 */
1149 	if (tr->cond_snapshot) {
1150 		ret = -EBUSY;
1151 		goto fail_unlock;
1152 	}
1153 
1154 	arch_spin_lock(&tr->max_lock);
1155 	tr->cond_snapshot = cond_snapshot;
1156 	arch_spin_unlock(&tr->max_lock);
1157 
1158 	mutex_unlock(&trace_types_lock);
1159 
1160 	return ret;
1161 
1162  fail_unlock:
1163 	mutex_unlock(&trace_types_lock);
1164 	kfree(cond_snapshot);
1165 	return ret;
1166 }
1167 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_enable);
1168 
1169 /**
1170  * tracing_snapshot_cond_disable - disable conditional snapshot for an instance
1171  * @tr:		The tracing instance
1172  *
1173  * Check whether the conditional snapshot for the given instance is
1174  * enabled; if so, free the cond_snapshot associated with it,
1175  * otherwise return -EINVAL.
1176  *
1177  * Returns 0 if successful, error otherwise.
1178  */
1179 int tracing_snapshot_cond_disable(struct trace_array *tr)
1180 {
1181 	int ret = 0;
1182 
1183 	arch_spin_lock(&tr->max_lock);
1184 
1185 	if (!tr->cond_snapshot)
1186 		ret = -EINVAL;
1187 	else {
1188 		kfree(tr->cond_snapshot);
1189 		tr->cond_snapshot = NULL;
1190 	}
1191 
1192 	arch_spin_unlock(&tr->max_lock);
1193 
1194 	return ret;
1195 }
1196 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_disable);
1197 #else
1198 void tracing_snapshot(void)
1199 {
1200 	WARN_ONCE(1, "Snapshot feature not enabled, but internal snapshot used");
1201 }
1202 EXPORT_SYMBOL_GPL(tracing_snapshot);
1203 void tracing_snapshot_cond(struct trace_array *tr, void *cond_data)
1204 {
1205 	WARN_ONCE(1, "Snapshot feature not enabled, but internal conditional snapshot used");
1206 }
1207 EXPORT_SYMBOL_GPL(tracing_snapshot_cond);
1208 int tracing_alloc_snapshot(void)
1209 {
1210 	WARN_ONCE(1, "Snapshot feature not enabled, but snapshot allocation used");
1211 	return -ENODEV;
1212 }
1213 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
1214 void tracing_snapshot_alloc(void)
1215 {
1216 	/* Give warning */
1217 	tracing_snapshot();
1218 }
1219 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
1220 void *tracing_cond_snapshot_data(struct trace_array *tr)
1221 {
1222 	return NULL;
1223 }
1224 EXPORT_SYMBOL_GPL(tracing_cond_snapshot_data);
1225 int tracing_snapshot_cond_enable(struct trace_array *tr, void *cond_data, cond_update_fn_t update)
1226 {
1227 	return -ENODEV;
1228 }
1229 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_enable);
1230 int tracing_snapshot_cond_disable(struct trace_array *tr)
1231 {
1232 	return false;
1233 }
1234 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_disable);
1235 #endif /* CONFIG_TRACER_SNAPSHOT */
1236 
1237 void tracer_tracing_off(struct trace_array *tr)
1238 {
1239 	if (tr->trace_buffer.buffer)
1240 		ring_buffer_record_off(tr->trace_buffer.buffer);
1241 	/*
1242 	 * This flag is looked at when buffers haven't been allocated
1243 	 * yet, or by some tracers (like irqsoff), that just want to
1244 	 * know if the ring buffer has been disabled, but it can handle
1245 	 * races of where it gets disabled but we still do a record.
1246 	 * As the check is in the fast path of the tracers, it is more
1247 	 * important to be fast than accurate.
1248 	 */
1249 	tr->buffer_disabled = 1;
1250 	/* Make the flag seen by readers */
1251 	smp_wmb();
1252 }
1253 
1254 /**
1255  * tracing_off - turn off tracing buffers
1256  *
1257  * This function stops the tracing buffers from recording data.
1258  * It does not disable any overhead the tracers themselves may
1259  * be causing. This function simply causes all recording to
1260  * the ring buffers to fail.
1261  */
1262 void tracing_off(void)
1263 {
1264 	tracer_tracing_off(&global_trace);
1265 }
1266 EXPORT_SYMBOL_GPL(tracing_off);
1267 
1268 void disable_trace_on_warning(void)
1269 {
1270 	if (__disable_trace_on_warning)
1271 		tracing_off();
1272 }
1273 
1274 /**
1275  * tracer_tracing_is_on - show real state of ring buffer enabled
1276  * @tr : the trace array to know if ring buffer is enabled
1277  *
1278  * Shows real state of the ring buffer if it is enabled or not.
1279  */
1280 bool tracer_tracing_is_on(struct trace_array *tr)
1281 {
1282 	if (tr->trace_buffer.buffer)
1283 		return ring_buffer_record_is_on(tr->trace_buffer.buffer);
1284 	return !tr->buffer_disabled;
1285 }
1286 
1287 /**
1288  * tracing_is_on - show state of ring buffers enabled
1289  */
1290 int tracing_is_on(void)
1291 {
1292 	return tracer_tracing_is_on(&global_trace);
1293 }
1294 EXPORT_SYMBOL_GPL(tracing_is_on);
1295 
1296 static int __init set_buf_size(char *str)
1297 {
1298 	unsigned long buf_size;
1299 
1300 	if (!str)
1301 		return 0;
1302 	buf_size = memparse(str, &str);
1303 	/* nr_entries can not be zero */
1304 	if (buf_size == 0)
1305 		return 0;
1306 	trace_buf_size = buf_size;
1307 	return 1;
1308 }
1309 __setup("trace_buf_size=", set_buf_size);
1310 
1311 static int __init set_tracing_thresh(char *str)
1312 {
1313 	unsigned long threshold;
1314 	int ret;
1315 
1316 	if (!str)
1317 		return 0;
1318 	ret = kstrtoul(str, 0, &threshold);
1319 	if (ret < 0)
1320 		return 0;
1321 	tracing_thresh = threshold * 1000;
1322 	return 1;
1323 }
1324 __setup("tracing_thresh=", set_tracing_thresh);
1325 
1326 unsigned long nsecs_to_usecs(unsigned long nsecs)
1327 {
1328 	return nsecs / 1000;
1329 }
1330 
1331 /*
1332  * TRACE_FLAGS is defined as a tuple matching bit masks with strings.
1333  * It uses C(a, b) where 'a' is the eval (enum) name and 'b' is the string that
1334  * matches it. By defining "C(a, b) b", TRACE_FLAGS becomes a list
1335  * of strings in the order that the evals (enum) were defined.
1336  */
1337 #undef C
1338 #define C(a, b) b
1339 
1340 /* These must match the bit postions in trace_iterator_flags */
1341 static const char *trace_options[] = {
1342 	TRACE_FLAGS
1343 	NULL
1344 };
1345 
1346 static struct {
1347 	u64 (*func)(void);
1348 	const char *name;
1349 	int in_ns;		/* is this clock in nanoseconds? */
1350 } trace_clocks[] = {
1351 	{ trace_clock_local,		"local",	1 },
1352 	{ trace_clock_global,		"global",	1 },
1353 	{ trace_clock_counter,		"counter",	0 },
1354 	{ trace_clock_jiffies,		"uptime",	0 },
1355 	{ trace_clock,			"perf",		1 },
1356 	{ ktime_get_mono_fast_ns,	"mono",		1 },
1357 	{ ktime_get_raw_fast_ns,	"mono_raw",	1 },
1358 	{ ktime_get_boot_fast_ns,	"boot",		1 },
1359 	ARCH_TRACE_CLOCKS
1360 };
1361 
1362 bool trace_clock_in_ns(struct trace_array *tr)
1363 {
1364 	if (trace_clocks[tr->clock_id].in_ns)
1365 		return true;
1366 
1367 	return false;
1368 }
1369 
1370 /*
1371  * trace_parser_get_init - gets the buffer for trace parser
1372  */
1373 int trace_parser_get_init(struct trace_parser *parser, int size)
1374 {
1375 	memset(parser, 0, sizeof(*parser));
1376 
1377 	parser->buffer = kmalloc(size, GFP_KERNEL);
1378 	if (!parser->buffer)
1379 		return 1;
1380 
1381 	parser->size = size;
1382 	return 0;
1383 }
1384 
1385 /*
1386  * trace_parser_put - frees the buffer for trace parser
1387  */
1388 void trace_parser_put(struct trace_parser *parser)
1389 {
1390 	kfree(parser->buffer);
1391 	parser->buffer = NULL;
1392 }
1393 
1394 /*
1395  * trace_get_user - reads the user input string separated by  space
1396  * (matched by isspace(ch))
1397  *
1398  * For each string found the 'struct trace_parser' is updated,
1399  * and the function returns.
1400  *
1401  * Returns number of bytes read.
1402  *
1403  * See kernel/trace/trace.h for 'struct trace_parser' details.
1404  */
1405 int trace_get_user(struct trace_parser *parser, const char __user *ubuf,
1406 	size_t cnt, loff_t *ppos)
1407 {
1408 	char ch;
1409 	size_t read = 0;
1410 	ssize_t ret;
1411 
1412 	if (!*ppos)
1413 		trace_parser_clear(parser);
1414 
1415 	ret = get_user(ch, ubuf++);
1416 	if (ret)
1417 		goto out;
1418 
1419 	read++;
1420 	cnt--;
1421 
1422 	/*
1423 	 * The parser is not finished with the last write,
1424 	 * continue reading the user input without skipping spaces.
1425 	 */
1426 	if (!parser->cont) {
1427 		/* skip white space */
1428 		while (cnt && isspace(ch)) {
1429 			ret = get_user(ch, ubuf++);
1430 			if (ret)
1431 				goto out;
1432 			read++;
1433 			cnt--;
1434 		}
1435 
1436 		parser->idx = 0;
1437 
1438 		/* only spaces were written */
1439 		if (isspace(ch) || !ch) {
1440 			*ppos += read;
1441 			ret = read;
1442 			goto out;
1443 		}
1444 	}
1445 
1446 	/* read the non-space input */
1447 	while (cnt && !isspace(ch) && ch) {
1448 		if (parser->idx < parser->size - 1)
1449 			parser->buffer[parser->idx++] = ch;
1450 		else {
1451 			ret = -EINVAL;
1452 			goto out;
1453 		}
1454 		ret = get_user(ch, ubuf++);
1455 		if (ret)
1456 			goto out;
1457 		read++;
1458 		cnt--;
1459 	}
1460 
1461 	/* We either got finished input or we have to wait for another call. */
1462 	if (isspace(ch) || !ch) {
1463 		parser->buffer[parser->idx] = 0;
1464 		parser->cont = false;
1465 	} else if (parser->idx < parser->size - 1) {
1466 		parser->cont = true;
1467 		parser->buffer[parser->idx++] = ch;
1468 		/* Make sure the parsed string always terminates with '\0'. */
1469 		parser->buffer[parser->idx] = 0;
1470 	} else {
1471 		ret = -EINVAL;
1472 		goto out;
1473 	}
1474 
1475 	*ppos += read;
1476 	ret = read;
1477 
1478 out:
1479 	return ret;
1480 }
1481 
1482 /* TODO add a seq_buf_to_buffer() */
1483 static ssize_t trace_seq_to_buffer(struct trace_seq *s, void *buf, size_t cnt)
1484 {
1485 	int len;
1486 
1487 	if (trace_seq_used(s) <= s->seq.readpos)
1488 		return -EBUSY;
1489 
1490 	len = trace_seq_used(s) - s->seq.readpos;
1491 	if (cnt > len)
1492 		cnt = len;
1493 	memcpy(buf, s->buffer + s->seq.readpos, cnt);
1494 
1495 	s->seq.readpos += cnt;
1496 	return cnt;
1497 }
1498 
1499 unsigned long __read_mostly	tracing_thresh;
1500 
1501 #ifdef CONFIG_TRACER_MAX_TRACE
1502 /*
1503  * Copy the new maximum trace into the separate maximum-trace
1504  * structure. (this way the maximum trace is permanently saved,
1505  * for later retrieval via /sys/kernel/tracing/tracing_max_latency)
1506  */
1507 static void
1508 __update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
1509 {
1510 	struct trace_buffer *trace_buf = &tr->trace_buffer;
1511 	struct trace_buffer *max_buf = &tr->max_buffer;
1512 	struct trace_array_cpu *data = per_cpu_ptr(trace_buf->data, cpu);
1513 	struct trace_array_cpu *max_data = per_cpu_ptr(max_buf->data, cpu);
1514 
1515 	max_buf->cpu = cpu;
1516 	max_buf->time_start = data->preempt_timestamp;
1517 
1518 	max_data->saved_latency = tr->max_latency;
1519 	max_data->critical_start = data->critical_start;
1520 	max_data->critical_end = data->critical_end;
1521 
1522 	strncpy(max_data->comm, tsk->comm, TASK_COMM_LEN);
1523 	max_data->pid = tsk->pid;
1524 	/*
1525 	 * If tsk == current, then use current_uid(), as that does not use
1526 	 * RCU. The irq tracer can be called out of RCU scope.
1527 	 */
1528 	if (tsk == current)
1529 		max_data->uid = current_uid();
1530 	else
1531 		max_data->uid = task_uid(tsk);
1532 
1533 	max_data->nice = tsk->static_prio - 20 - MAX_RT_PRIO;
1534 	max_data->policy = tsk->policy;
1535 	max_data->rt_priority = tsk->rt_priority;
1536 
1537 	/* record this tasks comm */
1538 	tracing_record_cmdline(tsk);
1539 }
1540 
1541 /**
1542  * update_max_tr - snapshot all trace buffers from global_trace to max_tr
1543  * @tr: tracer
1544  * @tsk: the task with the latency
1545  * @cpu: The cpu that initiated the trace.
1546  * @cond_data: User data associated with a conditional snapshot
1547  *
1548  * Flip the buffers between the @tr and the max_tr and record information
1549  * about which task was the cause of this latency.
1550  */
1551 void
1552 update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu,
1553 	      void *cond_data)
1554 {
1555 	if (tr->stop_count)
1556 		return;
1557 
1558 	WARN_ON_ONCE(!irqs_disabled());
1559 
1560 	if (!tr->allocated_snapshot) {
1561 		/* Only the nop tracer should hit this when disabling */
1562 		WARN_ON_ONCE(tr->current_trace != &nop_trace);
1563 		return;
1564 	}
1565 
1566 	arch_spin_lock(&tr->max_lock);
1567 
1568 	/* Inherit the recordable setting from trace_buffer */
1569 	if (ring_buffer_record_is_set_on(tr->trace_buffer.buffer))
1570 		ring_buffer_record_on(tr->max_buffer.buffer);
1571 	else
1572 		ring_buffer_record_off(tr->max_buffer.buffer);
1573 
1574 #ifdef CONFIG_TRACER_SNAPSHOT
1575 	if (tr->cond_snapshot && !tr->cond_snapshot->update(tr, cond_data))
1576 		goto out_unlock;
1577 #endif
1578 	swap(tr->trace_buffer.buffer, tr->max_buffer.buffer);
1579 
1580 	__update_max_tr(tr, tsk, cpu);
1581 
1582  out_unlock:
1583 	arch_spin_unlock(&tr->max_lock);
1584 }
1585 
1586 /**
1587  * update_max_tr_single - only copy one trace over, and reset the rest
1588  * @tr: tracer
1589  * @tsk: task with the latency
1590  * @cpu: the cpu of the buffer to copy.
1591  *
1592  * Flip the trace of a single CPU buffer between the @tr and the max_tr.
1593  */
1594 void
1595 update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu)
1596 {
1597 	int ret;
1598 
1599 	if (tr->stop_count)
1600 		return;
1601 
1602 	WARN_ON_ONCE(!irqs_disabled());
1603 	if (!tr->allocated_snapshot) {
1604 		/* Only the nop tracer should hit this when disabling */
1605 		WARN_ON_ONCE(tr->current_trace != &nop_trace);
1606 		return;
1607 	}
1608 
1609 	arch_spin_lock(&tr->max_lock);
1610 
1611 	ret = ring_buffer_swap_cpu(tr->max_buffer.buffer, tr->trace_buffer.buffer, cpu);
1612 
1613 	if (ret == -EBUSY) {
1614 		/*
1615 		 * We failed to swap the buffer due to a commit taking
1616 		 * place on this CPU. We fail to record, but we reset
1617 		 * the max trace buffer (no one writes directly to it)
1618 		 * and flag that it failed.
1619 		 */
1620 		trace_array_printk_buf(tr->max_buffer.buffer, _THIS_IP_,
1621 			"Failed to swap buffers due to commit in progress\n");
1622 	}
1623 
1624 	WARN_ON_ONCE(ret && ret != -EAGAIN && ret != -EBUSY);
1625 
1626 	__update_max_tr(tr, tsk, cpu);
1627 	arch_spin_unlock(&tr->max_lock);
1628 }
1629 #endif /* CONFIG_TRACER_MAX_TRACE */
1630 
1631 static int wait_on_pipe(struct trace_iterator *iter, int full)
1632 {
1633 	/* Iterators are static, they should be filled or empty */
1634 	if (trace_buffer_iter(iter, iter->cpu_file))
1635 		return 0;
1636 
1637 	return ring_buffer_wait(iter->trace_buffer->buffer, iter->cpu_file,
1638 				full);
1639 }
1640 
1641 #ifdef CONFIG_FTRACE_STARTUP_TEST
1642 static bool selftests_can_run;
1643 
1644 struct trace_selftests {
1645 	struct list_head		list;
1646 	struct tracer			*type;
1647 };
1648 
1649 static LIST_HEAD(postponed_selftests);
1650 
1651 static int save_selftest(struct tracer *type)
1652 {
1653 	struct trace_selftests *selftest;
1654 
1655 	selftest = kmalloc(sizeof(*selftest), GFP_KERNEL);
1656 	if (!selftest)
1657 		return -ENOMEM;
1658 
1659 	selftest->type = type;
1660 	list_add(&selftest->list, &postponed_selftests);
1661 	return 0;
1662 }
1663 
1664 static int run_tracer_selftest(struct tracer *type)
1665 {
1666 	struct trace_array *tr = &global_trace;
1667 	struct tracer *saved_tracer = tr->current_trace;
1668 	int ret;
1669 
1670 	if (!type->selftest || tracing_selftest_disabled)
1671 		return 0;
1672 
1673 	/*
1674 	 * If a tracer registers early in boot up (before scheduling is
1675 	 * initialized and such), then do not run its selftests yet.
1676 	 * Instead, run it a little later in the boot process.
1677 	 */
1678 	if (!selftests_can_run)
1679 		return save_selftest(type);
1680 
1681 	/*
1682 	 * Run a selftest on this tracer.
1683 	 * Here we reset the trace buffer, and set the current
1684 	 * tracer to be this tracer. The tracer can then run some
1685 	 * internal tracing to verify that everything is in order.
1686 	 * If we fail, we do not register this tracer.
1687 	 */
1688 	tracing_reset_online_cpus(&tr->trace_buffer);
1689 
1690 	tr->current_trace = type;
1691 
1692 #ifdef CONFIG_TRACER_MAX_TRACE
1693 	if (type->use_max_tr) {
1694 		/* If we expanded the buffers, make sure the max is expanded too */
1695 		if (ring_buffer_expanded)
1696 			ring_buffer_resize(tr->max_buffer.buffer, trace_buf_size,
1697 					   RING_BUFFER_ALL_CPUS);
1698 		tr->allocated_snapshot = true;
1699 	}
1700 #endif
1701 
1702 	/* the test is responsible for initializing and enabling */
1703 	pr_info("Testing tracer %s: ", type->name);
1704 	ret = type->selftest(type, tr);
1705 	/* the test is responsible for resetting too */
1706 	tr->current_trace = saved_tracer;
1707 	if (ret) {
1708 		printk(KERN_CONT "FAILED!\n");
1709 		/* Add the warning after printing 'FAILED' */
1710 		WARN_ON(1);
1711 		return -1;
1712 	}
1713 	/* Only reset on passing, to avoid touching corrupted buffers */
1714 	tracing_reset_online_cpus(&tr->trace_buffer);
1715 
1716 #ifdef CONFIG_TRACER_MAX_TRACE
1717 	if (type->use_max_tr) {
1718 		tr->allocated_snapshot = false;
1719 
1720 		/* Shrink the max buffer again */
1721 		if (ring_buffer_expanded)
1722 			ring_buffer_resize(tr->max_buffer.buffer, 1,
1723 					   RING_BUFFER_ALL_CPUS);
1724 	}
1725 #endif
1726 
1727 	printk(KERN_CONT "PASSED\n");
1728 	return 0;
1729 }
1730 
1731 static __init int init_trace_selftests(void)
1732 {
1733 	struct trace_selftests *p, *n;
1734 	struct tracer *t, **last;
1735 	int ret;
1736 
1737 	selftests_can_run = true;
1738 
1739 	mutex_lock(&trace_types_lock);
1740 
1741 	if (list_empty(&postponed_selftests))
1742 		goto out;
1743 
1744 	pr_info("Running postponed tracer tests:\n");
1745 
1746 	list_for_each_entry_safe(p, n, &postponed_selftests, list) {
1747 		/* This loop can take minutes when sanitizers are enabled, so
1748 		 * lets make sure we allow RCU processing.
1749 		 */
1750 		cond_resched();
1751 		ret = run_tracer_selftest(p->type);
1752 		/* If the test fails, then warn and remove from available_tracers */
1753 		if (ret < 0) {
1754 			WARN(1, "tracer: %s failed selftest, disabling\n",
1755 			     p->type->name);
1756 			last = &trace_types;
1757 			for (t = trace_types; t; t = t->next) {
1758 				if (t == p->type) {
1759 					*last = t->next;
1760 					break;
1761 				}
1762 				last = &t->next;
1763 			}
1764 		}
1765 		list_del(&p->list);
1766 		kfree(p);
1767 	}
1768 
1769  out:
1770 	mutex_unlock(&trace_types_lock);
1771 
1772 	return 0;
1773 }
1774 core_initcall(init_trace_selftests);
1775 #else
1776 static inline int run_tracer_selftest(struct tracer *type)
1777 {
1778 	return 0;
1779 }
1780 #endif /* CONFIG_FTRACE_STARTUP_TEST */
1781 
1782 static void add_tracer_options(struct trace_array *tr, struct tracer *t);
1783 
1784 static void __init apply_trace_boot_options(void);
1785 
1786 /**
1787  * register_tracer - register a tracer with the ftrace system.
1788  * @type: the plugin for the tracer
1789  *
1790  * Register a new plugin tracer.
1791  */
1792 int __init register_tracer(struct tracer *type)
1793 {
1794 	struct tracer *t;
1795 	int ret = 0;
1796 
1797 	if (!type->name) {
1798 		pr_info("Tracer must have a name\n");
1799 		return -1;
1800 	}
1801 
1802 	if (strlen(type->name) >= MAX_TRACER_SIZE) {
1803 		pr_info("Tracer has a name longer than %d\n", MAX_TRACER_SIZE);
1804 		return -1;
1805 	}
1806 
1807 	mutex_lock(&trace_types_lock);
1808 
1809 	tracing_selftest_running = true;
1810 
1811 	for (t = trace_types; t; t = t->next) {
1812 		if (strcmp(type->name, t->name) == 0) {
1813 			/* already found */
1814 			pr_info("Tracer %s already registered\n",
1815 				type->name);
1816 			ret = -1;
1817 			goto out;
1818 		}
1819 	}
1820 
1821 	if (!type->set_flag)
1822 		type->set_flag = &dummy_set_flag;
1823 	if (!type->flags) {
1824 		/*allocate a dummy tracer_flags*/
1825 		type->flags = kmalloc(sizeof(*type->flags), GFP_KERNEL);
1826 		if (!type->flags) {
1827 			ret = -ENOMEM;
1828 			goto out;
1829 		}
1830 		type->flags->val = 0;
1831 		type->flags->opts = dummy_tracer_opt;
1832 	} else
1833 		if (!type->flags->opts)
1834 			type->flags->opts = dummy_tracer_opt;
1835 
1836 	/* store the tracer for __set_tracer_option */
1837 	type->flags->trace = type;
1838 
1839 	ret = run_tracer_selftest(type);
1840 	if (ret < 0)
1841 		goto out;
1842 
1843 	type->next = trace_types;
1844 	trace_types = type;
1845 	add_tracer_options(&global_trace, type);
1846 
1847  out:
1848 	tracing_selftest_running = false;
1849 	mutex_unlock(&trace_types_lock);
1850 
1851 	if (ret || !default_bootup_tracer)
1852 		goto out_unlock;
1853 
1854 	if (strncmp(default_bootup_tracer, type->name, MAX_TRACER_SIZE))
1855 		goto out_unlock;
1856 
1857 	printk(KERN_INFO "Starting tracer '%s'\n", type->name);
1858 	/* Do we want this tracer to start on bootup? */
1859 	tracing_set_tracer(&global_trace, type->name);
1860 	default_bootup_tracer = NULL;
1861 
1862 	apply_trace_boot_options();
1863 
1864 	/* disable other selftests, since this will break it. */
1865 	tracing_selftest_disabled = true;
1866 #ifdef CONFIG_FTRACE_STARTUP_TEST
1867 	printk(KERN_INFO "Disabling FTRACE selftests due to running tracer '%s'\n",
1868 	       type->name);
1869 #endif
1870 
1871  out_unlock:
1872 	return ret;
1873 }
1874 
1875 static void tracing_reset_cpu(struct trace_buffer *buf, int cpu)
1876 {
1877 	struct ring_buffer *buffer = buf->buffer;
1878 
1879 	if (!buffer)
1880 		return;
1881 
1882 	ring_buffer_record_disable(buffer);
1883 
1884 	/* Make sure all commits have finished */
1885 	synchronize_rcu();
1886 	ring_buffer_reset_cpu(buffer, cpu);
1887 
1888 	ring_buffer_record_enable(buffer);
1889 }
1890 
1891 void tracing_reset_online_cpus(struct trace_buffer *buf)
1892 {
1893 	struct ring_buffer *buffer = buf->buffer;
1894 	int cpu;
1895 
1896 	if (!buffer)
1897 		return;
1898 
1899 	ring_buffer_record_disable(buffer);
1900 
1901 	/* Make sure all commits have finished */
1902 	synchronize_rcu();
1903 
1904 	buf->time_start = buffer_ftrace_now(buf, buf->cpu);
1905 
1906 	for_each_online_cpu(cpu)
1907 		ring_buffer_reset_cpu(buffer, cpu);
1908 
1909 	ring_buffer_record_enable(buffer);
1910 }
1911 
1912 /* Must have trace_types_lock held */
1913 void tracing_reset_all_online_cpus(void)
1914 {
1915 	struct trace_array *tr;
1916 
1917 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
1918 		if (!tr->clear_trace)
1919 			continue;
1920 		tr->clear_trace = false;
1921 		tracing_reset_online_cpus(&tr->trace_buffer);
1922 #ifdef CONFIG_TRACER_MAX_TRACE
1923 		tracing_reset_online_cpus(&tr->max_buffer);
1924 #endif
1925 	}
1926 }
1927 
1928 static int *tgid_map;
1929 
1930 #define SAVED_CMDLINES_DEFAULT 128
1931 #define NO_CMDLINE_MAP UINT_MAX
1932 static arch_spinlock_t trace_cmdline_lock = __ARCH_SPIN_LOCK_UNLOCKED;
1933 struct saved_cmdlines_buffer {
1934 	unsigned map_pid_to_cmdline[PID_MAX_DEFAULT+1];
1935 	unsigned *map_cmdline_to_pid;
1936 	unsigned cmdline_num;
1937 	int cmdline_idx;
1938 	char *saved_cmdlines;
1939 };
1940 static struct saved_cmdlines_buffer *savedcmd;
1941 
1942 /* temporary disable recording */
1943 static atomic_t trace_record_taskinfo_disabled __read_mostly;
1944 
1945 static inline char *get_saved_cmdlines(int idx)
1946 {
1947 	return &savedcmd->saved_cmdlines[idx * TASK_COMM_LEN];
1948 }
1949 
1950 static inline void set_cmdline(int idx, const char *cmdline)
1951 {
1952 	strncpy(get_saved_cmdlines(idx), cmdline, TASK_COMM_LEN);
1953 }
1954 
1955 static int allocate_cmdlines_buffer(unsigned int val,
1956 				    struct saved_cmdlines_buffer *s)
1957 {
1958 	s->map_cmdline_to_pid = kmalloc_array(val,
1959 					      sizeof(*s->map_cmdline_to_pid),
1960 					      GFP_KERNEL);
1961 	if (!s->map_cmdline_to_pid)
1962 		return -ENOMEM;
1963 
1964 	s->saved_cmdlines = kmalloc_array(TASK_COMM_LEN, val, GFP_KERNEL);
1965 	if (!s->saved_cmdlines) {
1966 		kfree(s->map_cmdline_to_pid);
1967 		return -ENOMEM;
1968 	}
1969 
1970 	s->cmdline_idx = 0;
1971 	s->cmdline_num = val;
1972 	memset(&s->map_pid_to_cmdline, NO_CMDLINE_MAP,
1973 	       sizeof(s->map_pid_to_cmdline));
1974 	memset(s->map_cmdline_to_pid, NO_CMDLINE_MAP,
1975 	       val * sizeof(*s->map_cmdline_to_pid));
1976 
1977 	return 0;
1978 }
1979 
1980 static int trace_create_savedcmd(void)
1981 {
1982 	int ret;
1983 
1984 	savedcmd = kmalloc(sizeof(*savedcmd), GFP_KERNEL);
1985 	if (!savedcmd)
1986 		return -ENOMEM;
1987 
1988 	ret = allocate_cmdlines_buffer(SAVED_CMDLINES_DEFAULT, savedcmd);
1989 	if (ret < 0) {
1990 		kfree(savedcmd);
1991 		savedcmd = NULL;
1992 		return -ENOMEM;
1993 	}
1994 
1995 	return 0;
1996 }
1997 
1998 int is_tracing_stopped(void)
1999 {
2000 	return global_trace.stop_count;
2001 }
2002 
2003 /**
2004  * tracing_start - quick start of the tracer
2005  *
2006  * If tracing is enabled but was stopped by tracing_stop,
2007  * this will start the tracer back up.
2008  */
2009 void tracing_start(void)
2010 {
2011 	struct ring_buffer *buffer;
2012 	unsigned long flags;
2013 
2014 	if (tracing_disabled)
2015 		return;
2016 
2017 	raw_spin_lock_irqsave(&global_trace.start_lock, flags);
2018 	if (--global_trace.stop_count) {
2019 		if (global_trace.stop_count < 0) {
2020 			/* Someone screwed up their debugging */
2021 			WARN_ON_ONCE(1);
2022 			global_trace.stop_count = 0;
2023 		}
2024 		goto out;
2025 	}
2026 
2027 	/* Prevent the buffers from switching */
2028 	arch_spin_lock(&global_trace.max_lock);
2029 
2030 	buffer = global_trace.trace_buffer.buffer;
2031 	if (buffer)
2032 		ring_buffer_record_enable(buffer);
2033 
2034 #ifdef CONFIG_TRACER_MAX_TRACE
2035 	buffer = global_trace.max_buffer.buffer;
2036 	if (buffer)
2037 		ring_buffer_record_enable(buffer);
2038 #endif
2039 
2040 	arch_spin_unlock(&global_trace.max_lock);
2041 
2042  out:
2043 	raw_spin_unlock_irqrestore(&global_trace.start_lock, flags);
2044 }
2045 
2046 static void tracing_start_tr(struct trace_array *tr)
2047 {
2048 	struct ring_buffer *buffer;
2049 	unsigned long flags;
2050 
2051 	if (tracing_disabled)
2052 		return;
2053 
2054 	/* If global, we need to also start the max tracer */
2055 	if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
2056 		return tracing_start();
2057 
2058 	raw_spin_lock_irqsave(&tr->start_lock, flags);
2059 
2060 	if (--tr->stop_count) {
2061 		if (tr->stop_count < 0) {
2062 			/* Someone screwed up their debugging */
2063 			WARN_ON_ONCE(1);
2064 			tr->stop_count = 0;
2065 		}
2066 		goto out;
2067 	}
2068 
2069 	buffer = tr->trace_buffer.buffer;
2070 	if (buffer)
2071 		ring_buffer_record_enable(buffer);
2072 
2073  out:
2074 	raw_spin_unlock_irqrestore(&tr->start_lock, flags);
2075 }
2076 
2077 /**
2078  * tracing_stop - quick stop of the tracer
2079  *
2080  * Light weight way to stop tracing. Use in conjunction with
2081  * tracing_start.
2082  */
2083 void tracing_stop(void)
2084 {
2085 	struct ring_buffer *buffer;
2086 	unsigned long flags;
2087 
2088 	raw_spin_lock_irqsave(&global_trace.start_lock, flags);
2089 	if (global_trace.stop_count++)
2090 		goto out;
2091 
2092 	/* Prevent the buffers from switching */
2093 	arch_spin_lock(&global_trace.max_lock);
2094 
2095 	buffer = global_trace.trace_buffer.buffer;
2096 	if (buffer)
2097 		ring_buffer_record_disable(buffer);
2098 
2099 #ifdef CONFIG_TRACER_MAX_TRACE
2100 	buffer = global_trace.max_buffer.buffer;
2101 	if (buffer)
2102 		ring_buffer_record_disable(buffer);
2103 #endif
2104 
2105 	arch_spin_unlock(&global_trace.max_lock);
2106 
2107  out:
2108 	raw_spin_unlock_irqrestore(&global_trace.start_lock, flags);
2109 }
2110 
2111 static void tracing_stop_tr(struct trace_array *tr)
2112 {
2113 	struct ring_buffer *buffer;
2114 	unsigned long flags;
2115 
2116 	/* If global, we need to also stop the max tracer */
2117 	if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
2118 		return tracing_stop();
2119 
2120 	raw_spin_lock_irqsave(&tr->start_lock, flags);
2121 	if (tr->stop_count++)
2122 		goto out;
2123 
2124 	buffer = tr->trace_buffer.buffer;
2125 	if (buffer)
2126 		ring_buffer_record_disable(buffer);
2127 
2128  out:
2129 	raw_spin_unlock_irqrestore(&tr->start_lock, flags);
2130 }
2131 
2132 static int trace_save_cmdline(struct task_struct *tsk)
2133 {
2134 	unsigned pid, idx;
2135 
2136 	/* treat recording of idle task as a success */
2137 	if (!tsk->pid)
2138 		return 1;
2139 
2140 	if (unlikely(tsk->pid > PID_MAX_DEFAULT))
2141 		return 0;
2142 
2143 	/*
2144 	 * It's not the end of the world if we don't get
2145 	 * the lock, but we also don't want to spin
2146 	 * nor do we want to disable interrupts,
2147 	 * so if we miss here, then better luck next time.
2148 	 */
2149 	if (!arch_spin_trylock(&trace_cmdline_lock))
2150 		return 0;
2151 
2152 	idx = savedcmd->map_pid_to_cmdline[tsk->pid];
2153 	if (idx == NO_CMDLINE_MAP) {
2154 		idx = (savedcmd->cmdline_idx + 1) % savedcmd->cmdline_num;
2155 
2156 		/*
2157 		 * Check whether the cmdline buffer at idx has a pid
2158 		 * mapped. We are going to overwrite that entry so we
2159 		 * need to clear the map_pid_to_cmdline. Otherwise we
2160 		 * would read the new comm for the old pid.
2161 		 */
2162 		pid = savedcmd->map_cmdline_to_pid[idx];
2163 		if (pid != NO_CMDLINE_MAP)
2164 			savedcmd->map_pid_to_cmdline[pid] = NO_CMDLINE_MAP;
2165 
2166 		savedcmd->map_cmdline_to_pid[idx] = tsk->pid;
2167 		savedcmd->map_pid_to_cmdline[tsk->pid] = idx;
2168 
2169 		savedcmd->cmdline_idx = idx;
2170 	}
2171 
2172 	set_cmdline(idx, tsk->comm);
2173 
2174 	arch_spin_unlock(&trace_cmdline_lock);
2175 
2176 	return 1;
2177 }
2178 
2179 static void __trace_find_cmdline(int pid, char comm[])
2180 {
2181 	unsigned map;
2182 
2183 	if (!pid) {
2184 		strcpy(comm, "<idle>");
2185 		return;
2186 	}
2187 
2188 	if (WARN_ON_ONCE(pid < 0)) {
2189 		strcpy(comm, "<XXX>");
2190 		return;
2191 	}
2192 
2193 	if (pid > PID_MAX_DEFAULT) {
2194 		strcpy(comm, "<...>");
2195 		return;
2196 	}
2197 
2198 	map = savedcmd->map_pid_to_cmdline[pid];
2199 	if (map != NO_CMDLINE_MAP)
2200 		strlcpy(comm, get_saved_cmdlines(map), TASK_COMM_LEN);
2201 	else
2202 		strcpy(comm, "<...>");
2203 }
2204 
2205 void trace_find_cmdline(int pid, char comm[])
2206 {
2207 	preempt_disable();
2208 	arch_spin_lock(&trace_cmdline_lock);
2209 
2210 	__trace_find_cmdline(pid, comm);
2211 
2212 	arch_spin_unlock(&trace_cmdline_lock);
2213 	preempt_enable();
2214 }
2215 
2216 int trace_find_tgid(int pid)
2217 {
2218 	if (unlikely(!tgid_map || !pid || pid > PID_MAX_DEFAULT))
2219 		return 0;
2220 
2221 	return tgid_map[pid];
2222 }
2223 
2224 static int trace_save_tgid(struct task_struct *tsk)
2225 {
2226 	/* treat recording of idle task as a success */
2227 	if (!tsk->pid)
2228 		return 1;
2229 
2230 	if (unlikely(!tgid_map || tsk->pid > PID_MAX_DEFAULT))
2231 		return 0;
2232 
2233 	tgid_map[tsk->pid] = tsk->tgid;
2234 	return 1;
2235 }
2236 
2237 static bool tracing_record_taskinfo_skip(int flags)
2238 {
2239 	if (unlikely(!(flags & (TRACE_RECORD_CMDLINE | TRACE_RECORD_TGID))))
2240 		return true;
2241 	if (atomic_read(&trace_record_taskinfo_disabled) || !tracing_is_on())
2242 		return true;
2243 	if (!__this_cpu_read(trace_taskinfo_save))
2244 		return true;
2245 	return false;
2246 }
2247 
2248 /**
2249  * tracing_record_taskinfo - record the task info of a task
2250  *
2251  * @task:  task to record
2252  * @flags: TRACE_RECORD_CMDLINE for recording comm
2253  *         TRACE_RECORD_TGID for recording tgid
2254  */
2255 void tracing_record_taskinfo(struct task_struct *task, int flags)
2256 {
2257 	bool done;
2258 
2259 	if (tracing_record_taskinfo_skip(flags))
2260 		return;
2261 
2262 	/*
2263 	 * Record as much task information as possible. If some fail, continue
2264 	 * to try to record the others.
2265 	 */
2266 	done = !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(task);
2267 	done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(task);
2268 
2269 	/* If recording any information failed, retry again soon. */
2270 	if (!done)
2271 		return;
2272 
2273 	__this_cpu_write(trace_taskinfo_save, false);
2274 }
2275 
2276 /**
2277  * tracing_record_taskinfo_sched_switch - record task info for sched_switch
2278  *
2279  * @prev: previous task during sched_switch
2280  * @next: next task during sched_switch
2281  * @flags: TRACE_RECORD_CMDLINE for recording comm
2282  *         TRACE_RECORD_TGID for recording tgid
2283  */
2284 void tracing_record_taskinfo_sched_switch(struct task_struct *prev,
2285 					  struct task_struct *next, int flags)
2286 {
2287 	bool done;
2288 
2289 	if (tracing_record_taskinfo_skip(flags))
2290 		return;
2291 
2292 	/*
2293 	 * Record as much task information as possible. If some fail, continue
2294 	 * to try to record the others.
2295 	 */
2296 	done  = !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(prev);
2297 	done &= !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(next);
2298 	done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(prev);
2299 	done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(next);
2300 
2301 	/* If recording any information failed, retry again soon. */
2302 	if (!done)
2303 		return;
2304 
2305 	__this_cpu_write(trace_taskinfo_save, false);
2306 }
2307 
2308 /* Helpers to record a specific task information */
2309 void tracing_record_cmdline(struct task_struct *task)
2310 {
2311 	tracing_record_taskinfo(task, TRACE_RECORD_CMDLINE);
2312 }
2313 
2314 void tracing_record_tgid(struct task_struct *task)
2315 {
2316 	tracing_record_taskinfo(task, TRACE_RECORD_TGID);
2317 }
2318 
2319 /*
2320  * Several functions return TRACE_TYPE_PARTIAL_LINE if the trace_seq
2321  * overflowed, and TRACE_TYPE_HANDLED otherwise. This helper function
2322  * simplifies those functions and keeps them in sync.
2323  */
2324 enum print_line_t trace_handle_return(struct trace_seq *s)
2325 {
2326 	return trace_seq_has_overflowed(s) ?
2327 		TRACE_TYPE_PARTIAL_LINE : TRACE_TYPE_HANDLED;
2328 }
2329 EXPORT_SYMBOL_GPL(trace_handle_return);
2330 
2331 void
2332 tracing_generic_entry_update(struct trace_entry *entry, unsigned short type,
2333 			     unsigned long flags, int pc)
2334 {
2335 	struct task_struct *tsk = current;
2336 
2337 	entry->preempt_count		= pc & 0xff;
2338 	entry->pid			= (tsk) ? tsk->pid : 0;
2339 	entry->type			= type;
2340 	entry->flags =
2341 #ifdef CONFIG_TRACE_IRQFLAGS_SUPPORT
2342 		(irqs_disabled_flags(flags) ? TRACE_FLAG_IRQS_OFF : 0) |
2343 #else
2344 		TRACE_FLAG_IRQS_NOSUPPORT |
2345 #endif
2346 		((pc & NMI_MASK    ) ? TRACE_FLAG_NMI     : 0) |
2347 		((pc & HARDIRQ_MASK) ? TRACE_FLAG_HARDIRQ : 0) |
2348 		((pc & SOFTIRQ_OFFSET) ? TRACE_FLAG_SOFTIRQ : 0) |
2349 		(tif_need_resched() ? TRACE_FLAG_NEED_RESCHED : 0) |
2350 		(test_preempt_need_resched() ? TRACE_FLAG_PREEMPT_RESCHED : 0);
2351 }
2352 EXPORT_SYMBOL_GPL(tracing_generic_entry_update);
2353 
2354 struct ring_buffer_event *
2355 trace_buffer_lock_reserve(struct ring_buffer *buffer,
2356 			  int type,
2357 			  unsigned long len,
2358 			  unsigned long flags, int pc)
2359 {
2360 	return __trace_buffer_lock_reserve(buffer, type, len, flags, pc);
2361 }
2362 
2363 DEFINE_PER_CPU(struct ring_buffer_event *, trace_buffered_event);
2364 DEFINE_PER_CPU(int, trace_buffered_event_cnt);
2365 static int trace_buffered_event_ref;
2366 
2367 /**
2368  * trace_buffered_event_enable - enable buffering events
2369  *
2370  * When events are being filtered, it is quicker to use a temporary
2371  * buffer to write the event data into if there's a likely chance
2372  * that it will not be committed. The discard of the ring buffer
2373  * is not as fast as committing, and is much slower than copying
2374  * a commit.
2375  *
2376  * When an event is to be filtered, allocate per cpu buffers to
2377  * write the event data into, and if the event is filtered and discarded
2378  * it is simply dropped, otherwise, the entire data is to be committed
2379  * in one shot.
2380  */
2381 void trace_buffered_event_enable(void)
2382 {
2383 	struct ring_buffer_event *event;
2384 	struct page *page;
2385 	int cpu;
2386 
2387 	WARN_ON_ONCE(!mutex_is_locked(&event_mutex));
2388 
2389 	if (trace_buffered_event_ref++)
2390 		return;
2391 
2392 	for_each_tracing_cpu(cpu) {
2393 		page = alloc_pages_node(cpu_to_node(cpu),
2394 					GFP_KERNEL | __GFP_NORETRY, 0);
2395 		if (!page)
2396 			goto failed;
2397 
2398 		event = page_address(page);
2399 		memset(event, 0, sizeof(*event));
2400 
2401 		per_cpu(trace_buffered_event, cpu) = event;
2402 
2403 		preempt_disable();
2404 		if (cpu == smp_processor_id() &&
2405 		    this_cpu_read(trace_buffered_event) !=
2406 		    per_cpu(trace_buffered_event, cpu))
2407 			WARN_ON_ONCE(1);
2408 		preempt_enable();
2409 	}
2410 
2411 	return;
2412  failed:
2413 	trace_buffered_event_disable();
2414 }
2415 
2416 static void enable_trace_buffered_event(void *data)
2417 {
2418 	/* Probably not needed, but do it anyway */
2419 	smp_rmb();
2420 	this_cpu_dec(trace_buffered_event_cnt);
2421 }
2422 
2423 static void disable_trace_buffered_event(void *data)
2424 {
2425 	this_cpu_inc(trace_buffered_event_cnt);
2426 }
2427 
2428 /**
2429  * trace_buffered_event_disable - disable buffering events
2430  *
2431  * When a filter is removed, it is faster to not use the buffered
2432  * events, and to commit directly into the ring buffer. Free up
2433  * the temp buffers when there are no more users. This requires
2434  * special synchronization with current events.
2435  */
2436 void trace_buffered_event_disable(void)
2437 {
2438 	int cpu;
2439 
2440 	WARN_ON_ONCE(!mutex_is_locked(&event_mutex));
2441 
2442 	if (WARN_ON_ONCE(!trace_buffered_event_ref))
2443 		return;
2444 
2445 	if (--trace_buffered_event_ref)
2446 		return;
2447 
2448 	preempt_disable();
2449 	/* For each CPU, set the buffer as used. */
2450 	smp_call_function_many(tracing_buffer_mask,
2451 			       disable_trace_buffered_event, NULL, 1);
2452 	preempt_enable();
2453 
2454 	/* Wait for all current users to finish */
2455 	synchronize_rcu();
2456 
2457 	for_each_tracing_cpu(cpu) {
2458 		free_page((unsigned long)per_cpu(trace_buffered_event, cpu));
2459 		per_cpu(trace_buffered_event, cpu) = NULL;
2460 	}
2461 	/*
2462 	 * Make sure trace_buffered_event is NULL before clearing
2463 	 * trace_buffered_event_cnt.
2464 	 */
2465 	smp_wmb();
2466 
2467 	preempt_disable();
2468 	/* Do the work on each cpu */
2469 	smp_call_function_many(tracing_buffer_mask,
2470 			       enable_trace_buffered_event, NULL, 1);
2471 	preempt_enable();
2472 }
2473 
2474 static struct ring_buffer *temp_buffer;
2475 
2476 struct ring_buffer_event *
2477 trace_event_buffer_lock_reserve(struct ring_buffer **current_rb,
2478 			  struct trace_event_file *trace_file,
2479 			  int type, unsigned long len,
2480 			  unsigned long flags, int pc)
2481 {
2482 	struct ring_buffer_event *entry;
2483 	int val;
2484 
2485 	*current_rb = trace_file->tr->trace_buffer.buffer;
2486 
2487 	if (!ring_buffer_time_stamp_abs(*current_rb) && (trace_file->flags &
2488 	     (EVENT_FILE_FL_SOFT_DISABLED | EVENT_FILE_FL_FILTERED)) &&
2489 	    (entry = this_cpu_read(trace_buffered_event))) {
2490 		/* Try to use the per cpu buffer first */
2491 		val = this_cpu_inc_return(trace_buffered_event_cnt);
2492 		if (val == 1) {
2493 			trace_event_setup(entry, type, flags, pc);
2494 			entry->array[0] = len;
2495 			return entry;
2496 		}
2497 		this_cpu_dec(trace_buffered_event_cnt);
2498 	}
2499 
2500 	entry = __trace_buffer_lock_reserve(*current_rb,
2501 					    type, len, flags, pc);
2502 	/*
2503 	 * If tracing is off, but we have triggers enabled
2504 	 * we still need to look at the event data. Use the temp_buffer
2505 	 * to store the trace event for the tigger to use. It's recusive
2506 	 * safe and will not be recorded anywhere.
2507 	 */
2508 	if (!entry && trace_file->flags & EVENT_FILE_FL_TRIGGER_COND) {
2509 		*current_rb = temp_buffer;
2510 		entry = __trace_buffer_lock_reserve(*current_rb,
2511 						    type, len, flags, pc);
2512 	}
2513 	return entry;
2514 }
2515 EXPORT_SYMBOL_GPL(trace_event_buffer_lock_reserve);
2516 
2517 static DEFINE_SPINLOCK(tracepoint_iter_lock);
2518 static DEFINE_MUTEX(tracepoint_printk_mutex);
2519 
2520 static void output_printk(struct trace_event_buffer *fbuffer)
2521 {
2522 	struct trace_event_call *event_call;
2523 	struct trace_event *event;
2524 	unsigned long flags;
2525 	struct trace_iterator *iter = tracepoint_print_iter;
2526 
2527 	/* We should never get here if iter is NULL */
2528 	if (WARN_ON_ONCE(!iter))
2529 		return;
2530 
2531 	event_call = fbuffer->trace_file->event_call;
2532 	if (!event_call || !event_call->event.funcs ||
2533 	    !event_call->event.funcs->trace)
2534 		return;
2535 
2536 	event = &fbuffer->trace_file->event_call->event;
2537 
2538 	spin_lock_irqsave(&tracepoint_iter_lock, flags);
2539 	trace_seq_init(&iter->seq);
2540 	iter->ent = fbuffer->entry;
2541 	event_call->event.funcs->trace(iter, 0, event);
2542 	trace_seq_putc(&iter->seq, 0);
2543 	printk("%s", iter->seq.buffer);
2544 
2545 	spin_unlock_irqrestore(&tracepoint_iter_lock, flags);
2546 }
2547 
2548 int tracepoint_printk_sysctl(struct ctl_table *table, int write,
2549 			     void __user *buffer, size_t *lenp,
2550 			     loff_t *ppos)
2551 {
2552 	int save_tracepoint_printk;
2553 	int ret;
2554 
2555 	mutex_lock(&tracepoint_printk_mutex);
2556 	save_tracepoint_printk = tracepoint_printk;
2557 
2558 	ret = proc_dointvec(table, write, buffer, lenp, ppos);
2559 
2560 	/*
2561 	 * This will force exiting early, as tracepoint_printk
2562 	 * is always zero when tracepoint_printk_iter is not allocated
2563 	 */
2564 	if (!tracepoint_print_iter)
2565 		tracepoint_printk = 0;
2566 
2567 	if (save_tracepoint_printk == tracepoint_printk)
2568 		goto out;
2569 
2570 	if (tracepoint_printk)
2571 		static_key_enable(&tracepoint_printk_key.key);
2572 	else
2573 		static_key_disable(&tracepoint_printk_key.key);
2574 
2575  out:
2576 	mutex_unlock(&tracepoint_printk_mutex);
2577 
2578 	return ret;
2579 }
2580 
2581 void trace_event_buffer_commit(struct trace_event_buffer *fbuffer)
2582 {
2583 	if (static_key_false(&tracepoint_printk_key.key))
2584 		output_printk(fbuffer);
2585 
2586 	event_trigger_unlock_commit(fbuffer->trace_file, fbuffer->buffer,
2587 				    fbuffer->event, fbuffer->entry,
2588 				    fbuffer->flags, fbuffer->pc);
2589 }
2590 EXPORT_SYMBOL_GPL(trace_event_buffer_commit);
2591 
2592 /*
2593  * Skip 3:
2594  *
2595  *   trace_buffer_unlock_commit_regs()
2596  *   trace_event_buffer_commit()
2597  *   trace_event_raw_event_xxx()
2598  */
2599 # define STACK_SKIP 3
2600 
2601 void trace_buffer_unlock_commit_regs(struct trace_array *tr,
2602 				     struct ring_buffer *buffer,
2603 				     struct ring_buffer_event *event,
2604 				     unsigned long flags, int pc,
2605 				     struct pt_regs *regs)
2606 {
2607 	__buffer_unlock_commit(buffer, event);
2608 
2609 	/*
2610 	 * If regs is not set, then skip the necessary functions.
2611 	 * Note, we can still get here via blktrace, wakeup tracer
2612 	 * and mmiotrace, but that's ok if they lose a function or
2613 	 * two. They are not that meaningful.
2614 	 */
2615 	ftrace_trace_stack(tr, buffer, flags, regs ? 0 : STACK_SKIP, pc, regs);
2616 	ftrace_trace_userstack(buffer, flags, pc);
2617 }
2618 
2619 /*
2620  * Similar to trace_buffer_unlock_commit_regs() but do not dump stack.
2621  */
2622 void
2623 trace_buffer_unlock_commit_nostack(struct ring_buffer *buffer,
2624 				   struct ring_buffer_event *event)
2625 {
2626 	__buffer_unlock_commit(buffer, event);
2627 }
2628 
2629 static void
2630 trace_process_export(struct trace_export *export,
2631 	       struct ring_buffer_event *event)
2632 {
2633 	struct trace_entry *entry;
2634 	unsigned int size = 0;
2635 
2636 	entry = ring_buffer_event_data(event);
2637 	size = ring_buffer_event_length(event);
2638 	export->write(export, entry, size);
2639 }
2640 
2641 static DEFINE_MUTEX(ftrace_export_lock);
2642 
2643 static struct trace_export __rcu *ftrace_exports_list __read_mostly;
2644 
2645 static DEFINE_STATIC_KEY_FALSE(ftrace_exports_enabled);
2646 
2647 static inline void ftrace_exports_enable(void)
2648 {
2649 	static_branch_enable(&ftrace_exports_enabled);
2650 }
2651 
2652 static inline void ftrace_exports_disable(void)
2653 {
2654 	static_branch_disable(&ftrace_exports_enabled);
2655 }
2656 
2657 static void ftrace_exports(struct ring_buffer_event *event)
2658 {
2659 	struct trace_export *export;
2660 
2661 	preempt_disable_notrace();
2662 
2663 	export = rcu_dereference_raw_check(ftrace_exports_list);
2664 	while (export) {
2665 		trace_process_export(export, event);
2666 		export = rcu_dereference_raw_check(export->next);
2667 	}
2668 
2669 	preempt_enable_notrace();
2670 }
2671 
2672 static inline void
2673 add_trace_export(struct trace_export **list, struct trace_export *export)
2674 {
2675 	rcu_assign_pointer(export->next, *list);
2676 	/*
2677 	 * We are entering export into the list but another
2678 	 * CPU might be walking that list. We need to make sure
2679 	 * the export->next pointer is valid before another CPU sees
2680 	 * the export pointer included into the list.
2681 	 */
2682 	rcu_assign_pointer(*list, export);
2683 }
2684 
2685 static inline int
2686 rm_trace_export(struct trace_export **list, struct trace_export *export)
2687 {
2688 	struct trace_export **p;
2689 
2690 	for (p = list; *p != NULL; p = &(*p)->next)
2691 		if (*p == export)
2692 			break;
2693 
2694 	if (*p != export)
2695 		return -1;
2696 
2697 	rcu_assign_pointer(*p, (*p)->next);
2698 
2699 	return 0;
2700 }
2701 
2702 static inline void
2703 add_ftrace_export(struct trace_export **list, struct trace_export *export)
2704 {
2705 	if (*list == NULL)
2706 		ftrace_exports_enable();
2707 
2708 	add_trace_export(list, export);
2709 }
2710 
2711 static inline int
2712 rm_ftrace_export(struct trace_export **list, struct trace_export *export)
2713 {
2714 	int ret;
2715 
2716 	ret = rm_trace_export(list, export);
2717 	if (*list == NULL)
2718 		ftrace_exports_disable();
2719 
2720 	return ret;
2721 }
2722 
2723 int register_ftrace_export(struct trace_export *export)
2724 {
2725 	if (WARN_ON_ONCE(!export->write))
2726 		return -1;
2727 
2728 	mutex_lock(&ftrace_export_lock);
2729 
2730 	add_ftrace_export(&ftrace_exports_list, export);
2731 
2732 	mutex_unlock(&ftrace_export_lock);
2733 
2734 	return 0;
2735 }
2736 EXPORT_SYMBOL_GPL(register_ftrace_export);
2737 
2738 int unregister_ftrace_export(struct trace_export *export)
2739 {
2740 	int ret;
2741 
2742 	mutex_lock(&ftrace_export_lock);
2743 
2744 	ret = rm_ftrace_export(&ftrace_exports_list, export);
2745 
2746 	mutex_unlock(&ftrace_export_lock);
2747 
2748 	return ret;
2749 }
2750 EXPORT_SYMBOL_GPL(unregister_ftrace_export);
2751 
2752 void
2753 trace_function(struct trace_array *tr,
2754 	       unsigned long ip, unsigned long parent_ip, unsigned long flags,
2755 	       int pc)
2756 {
2757 	struct trace_event_call *call = &event_function;
2758 	struct ring_buffer *buffer = tr->trace_buffer.buffer;
2759 	struct ring_buffer_event *event;
2760 	struct ftrace_entry *entry;
2761 
2762 	event = __trace_buffer_lock_reserve(buffer, TRACE_FN, sizeof(*entry),
2763 					    flags, pc);
2764 	if (!event)
2765 		return;
2766 	entry	= ring_buffer_event_data(event);
2767 	entry->ip			= ip;
2768 	entry->parent_ip		= parent_ip;
2769 
2770 	if (!call_filter_check_discard(call, entry, buffer, event)) {
2771 		if (static_branch_unlikely(&ftrace_exports_enabled))
2772 			ftrace_exports(event);
2773 		__buffer_unlock_commit(buffer, event);
2774 	}
2775 }
2776 
2777 #ifdef CONFIG_STACKTRACE
2778 
2779 /* Allow 4 levels of nesting: normal, softirq, irq, NMI */
2780 #define FTRACE_KSTACK_NESTING	4
2781 
2782 #define FTRACE_KSTACK_ENTRIES	(PAGE_SIZE / FTRACE_KSTACK_NESTING)
2783 
2784 struct ftrace_stack {
2785 	unsigned long		calls[FTRACE_KSTACK_ENTRIES];
2786 };
2787 
2788 
2789 struct ftrace_stacks {
2790 	struct ftrace_stack	stacks[FTRACE_KSTACK_NESTING];
2791 };
2792 
2793 static DEFINE_PER_CPU(struct ftrace_stacks, ftrace_stacks);
2794 static DEFINE_PER_CPU(int, ftrace_stack_reserve);
2795 
2796 static void __ftrace_trace_stack(struct ring_buffer *buffer,
2797 				 unsigned long flags,
2798 				 int skip, int pc, struct pt_regs *regs)
2799 {
2800 	struct trace_event_call *call = &event_kernel_stack;
2801 	struct ring_buffer_event *event;
2802 	unsigned int size, nr_entries;
2803 	struct ftrace_stack *fstack;
2804 	struct stack_entry *entry;
2805 	int stackidx;
2806 
2807 	/*
2808 	 * Add one, for this function and the call to save_stack_trace()
2809 	 * If regs is set, then these functions will not be in the way.
2810 	 */
2811 #ifndef CONFIG_UNWINDER_ORC
2812 	if (!regs)
2813 		skip++;
2814 #endif
2815 
2816 	/*
2817 	 * Since events can happen in NMIs there's no safe way to
2818 	 * use the per cpu ftrace_stacks. We reserve it and if an interrupt
2819 	 * or NMI comes in, it will just have to use the default
2820 	 * FTRACE_STACK_SIZE.
2821 	 */
2822 	preempt_disable_notrace();
2823 
2824 	stackidx = __this_cpu_inc_return(ftrace_stack_reserve) - 1;
2825 
2826 	/* This should never happen. If it does, yell once and skip */
2827 	if (WARN_ON_ONCE(stackidx > FTRACE_KSTACK_NESTING))
2828 		goto out;
2829 
2830 	/*
2831 	 * The above __this_cpu_inc_return() is 'atomic' cpu local. An
2832 	 * interrupt will either see the value pre increment or post
2833 	 * increment. If the interrupt happens pre increment it will have
2834 	 * restored the counter when it returns.  We just need a barrier to
2835 	 * keep gcc from moving things around.
2836 	 */
2837 	barrier();
2838 
2839 	fstack = this_cpu_ptr(ftrace_stacks.stacks) + stackidx;
2840 	size = ARRAY_SIZE(fstack->calls);
2841 
2842 	if (regs) {
2843 		nr_entries = stack_trace_save_regs(regs, fstack->calls,
2844 						   size, skip);
2845 	} else {
2846 		nr_entries = stack_trace_save(fstack->calls, size, skip);
2847 	}
2848 
2849 	size = nr_entries * sizeof(unsigned long);
2850 	event = __trace_buffer_lock_reserve(buffer, TRACE_STACK,
2851 					    sizeof(*entry) + size, flags, pc);
2852 	if (!event)
2853 		goto out;
2854 	entry = ring_buffer_event_data(event);
2855 
2856 	memcpy(&entry->caller, fstack->calls, size);
2857 	entry->size = nr_entries;
2858 
2859 	if (!call_filter_check_discard(call, entry, buffer, event))
2860 		__buffer_unlock_commit(buffer, event);
2861 
2862  out:
2863 	/* Again, don't let gcc optimize things here */
2864 	barrier();
2865 	__this_cpu_dec(ftrace_stack_reserve);
2866 	preempt_enable_notrace();
2867 
2868 }
2869 
2870 static inline void ftrace_trace_stack(struct trace_array *tr,
2871 				      struct ring_buffer *buffer,
2872 				      unsigned long flags,
2873 				      int skip, int pc, struct pt_regs *regs)
2874 {
2875 	if (!(tr->trace_flags & TRACE_ITER_STACKTRACE))
2876 		return;
2877 
2878 	__ftrace_trace_stack(buffer, flags, skip, pc, regs);
2879 }
2880 
2881 void __trace_stack(struct trace_array *tr, unsigned long flags, int skip,
2882 		   int pc)
2883 {
2884 	struct ring_buffer *buffer = tr->trace_buffer.buffer;
2885 
2886 	if (rcu_is_watching()) {
2887 		__ftrace_trace_stack(buffer, flags, skip, pc, NULL);
2888 		return;
2889 	}
2890 
2891 	/*
2892 	 * When an NMI triggers, RCU is enabled via rcu_nmi_enter(),
2893 	 * but if the above rcu_is_watching() failed, then the NMI
2894 	 * triggered someplace critical, and rcu_irq_enter() should
2895 	 * not be called from NMI.
2896 	 */
2897 	if (unlikely(in_nmi()))
2898 		return;
2899 
2900 	rcu_irq_enter_irqson();
2901 	__ftrace_trace_stack(buffer, flags, skip, pc, NULL);
2902 	rcu_irq_exit_irqson();
2903 }
2904 
2905 /**
2906  * trace_dump_stack - record a stack back trace in the trace buffer
2907  * @skip: Number of functions to skip (helper handlers)
2908  */
2909 void trace_dump_stack(int skip)
2910 {
2911 	unsigned long flags;
2912 
2913 	if (tracing_disabled || tracing_selftest_running)
2914 		return;
2915 
2916 	local_save_flags(flags);
2917 
2918 #ifndef CONFIG_UNWINDER_ORC
2919 	/* Skip 1 to skip this function. */
2920 	skip++;
2921 #endif
2922 	__ftrace_trace_stack(global_trace.trace_buffer.buffer,
2923 			     flags, skip, preempt_count(), NULL);
2924 }
2925 EXPORT_SYMBOL_GPL(trace_dump_stack);
2926 
2927 #ifdef CONFIG_USER_STACKTRACE_SUPPORT
2928 static DEFINE_PER_CPU(int, user_stack_count);
2929 
2930 static void
2931 ftrace_trace_userstack(struct ring_buffer *buffer, unsigned long flags, int pc)
2932 {
2933 	struct trace_event_call *call = &event_user_stack;
2934 	struct ring_buffer_event *event;
2935 	struct userstack_entry *entry;
2936 
2937 	if (!(global_trace.trace_flags & TRACE_ITER_USERSTACKTRACE))
2938 		return;
2939 
2940 	/*
2941 	 * NMIs can not handle page faults, even with fix ups.
2942 	 * The save user stack can (and often does) fault.
2943 	 */
2944 	if (unlikely(in_nmi()))
2945 		return;
2946 
2947 	/*
2948 	 * prevent recursion, since the user stack tracing may
2949 	 * trigger other kernel events.
2950 	 */
2951 	preempt_disable();
2952 	if (__this_cpu_read(user_stack_count))
2953 		goto out;
2954 
2955 	__this_cpu_inc(user_stack_count);
2956 
2957 	event = __trace_buffer_lock_reserve(buffer, TRACE_USER_STACK,
2958 					    sizeof(*entry), flags, pc);
2959 	if (!event)
2960 		goto out_drop_count;
2961 	entry	= ring_buffer_event_data(event);
2962 
2963 	entry->tgid		= current->tgid;
2964 	memset(&entry->caller, 0, sizeof(entry->caller));
2965 
2966 	stack_trace_save_user(entry->caller, FTRACE_STACK_ENTRIES);
2967 	if (!call_filter_check_discard(call, entry, buffer, event))
2968 		__buffer_unlock_commit(buffer, event);
2969 
2970  out_drop_count:
2971 	__this_cpu_dec(user_stack_count);
2972  out:
2973 	preempt_enable();
2974 }
2975 #else /* CONFIG_USER_STACKTRACE_SUPPORT */
2976 static void ftrace_trace_userstack(struct ring_buffer *buffer,
2977 				   unsigned long flags, int pc)
2978 {
2979 }
2980 #endif /* !CONFIG_USER_STACKTRACE_SUPPORT */
2981 
2982 #endif /* CONFIG_STACKTRACE */
2983 
2984 /* created for use with alloc_percpu */
2985 struct trace_buffer_struct {
2986 	int nesting;
2987 	char buffer[4][TRACE_BUF_SIZE];
2988 };
2989 
2990 static struct trace_buffer_struct *trace_percpu_buffer;
2991 
2992 /*
2993  * Thise allows for lockless recording.  If we're nested too deeply, then
2994  * this returns NULL.
2995  */
2996 static char *get_trace_buf(void)
2997 {
2998 	struct trace_buffer_struct *buffer = this_cpu_ptr(trace_percpu_buffer);
2999 
3000 	if (!buffer || buffer->nesting >= 4)
3001 		return NULL;
3002 
3003 	buffer->nesting++;
3004 
3005 	/* Interrupts must see nesting incremented before we use the buffer */
3006 	barrier();
3007 	return &buffer->buffer[buffer->nesting][0];
3008 }
3009 
3010 static void put_trace_buf(void)
3011 {
3012 	/* Don't let the decrement of nesting leak before this */
3013 	barrier();
3014 	this_cpu_dec(trace_percpu_buffer->nesting);
3015 }
3016 
3017 static int alloc_percpu_trace_buffer(void)
3018 {
3019 	struct trace_buffer_struct *buffers;
3020 
3021 	buffers = alloc_percpu(struct trace_buffer_struct);
3022 	if (WARN(!buffers, "Could not allocate percpu trace_printk buffer"))
3023 		return -ENOMEM;
3024 
3025 	trace_percpu_buffer = buffers;
3026 	return 0;
3027 }
3028 
3029 static int buffers_allocated;
3030 
3031 void trace_printk_init_buffers(void)
3032 {
3033 	if (buffers_allocated)
3034 		return;
3035 
3036 	if (alloc_percpu_trace_buffer())
3037 		return;
3038 
3039 	/* trace_printk() is for debug use only. Don't use it in production. */
3040 
3041 	pr_warn("\n");
3042 	pr_warn("**********************************************************\n");
3043 	pr_warn("**   NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE   **\n");
3044 	pr_warn("**                                                      **\n");
3045 	pr_warn("** trace_printk() being used. Allocating extra memory.  **\n");
3046 	pr_warn("**                                                      **\n");
3047 	pr_warn("** This means that this is a DEBUG kernel and it is     **\n");
3048 	pr_warn("** unsafe for production use.                           **\n");
3049 	pr_warn("**                                                      **\n");
3050 	pr_warn("** If you see this message and you are not debugging    **\n");
3051 	pr_warn("** the kernel, report this immediately to your vendor!  **\n");
3052 	pr_warn("**                                                      **\n");
3053 	pr_warn("**   NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE   **\n");
3054 	pr_warn("**********************************************************\n");
3055 
3056 	/* Expand the buffers to set size */
3057 	tracing_update_buffers();
3058 
3059 	buffers_allocated = 1;
3060 
3061 	/*
3062 	 * trace_printk_init_buffers() can be called by modules.
3063 	 * If that happens, then we need to start cmdline recording
3064 	 * directly here. If the global_trace.buffer is already
3065 	 * allocated here, then this was called by module code.
3066 	 */
3067 	if (global_trace.trace_buffer.buffer)
3068 		tracing_start_cmdline_record();
3069 }
3070 EXPORT_SYMBOL_GPL(trace_printk_init_buffers);
3071 
3072 void trace_printk_start_comm(void)
3073 {
3074 	/* Start tracing comms if trace printk is set */
3075 	if (!buffers_allocated)
3076 		return;
3077 	tracing_start_cmdline_record();
3078 }
3079 
3080 static void trace_printk_start_stop_comm(int enabled)
3081 {
3082 	if (!buffers_allocated)
3083 		return;
3084 
3085 	if (enabled)
3086 		tracing_start_cmdline_record();
3087 	else
3088 		tracing_stop_cmdline_record();
3089 }
3090 
3091 /**
3092  * trace_vbprintk - write binary msg to tracing buffer
3093  * @ip:    The address of the caller
3094  * @fmt:   The string format to write to the buffer
3095  * @args:  Arguments for @fmt
3096  */
3097 int trace_vbprintk(unsigned long ip, const char *fmt, va_list args)
3098 {
3099 	struct trace_event_call *call = &event_bprint;
3100 	struct ring_buffer_event *event;
3101 	struct ring_buffer *buffer;
3102 	struct trace_array *tr = &global_trace;
3103 	struct bprint_entry *entry;
3104 	unsigned long flags;
3105 	char *tbuffer;
3106 	int len = 0, size, pc;
3107 
3108 	if (unlikely(tracing_selftest_running || tracing_disabled))
3109 		return 0;
3110 
3111 	/* Don't pollute graph traces with trace_vprintk internals */
3112 	pause_graph_tracing();
3113 
3114 	pc = preempt_count();
3115 	preempt_disable_notrace();
3116 
3117 	tbuffer = get_trace_buf();
3118 	if (!tbuffer) {
3119 		len = 0;
3120 		goto out_nobuffer;
3121 	}
3122 
3123 	len = vbin_printf((u32 *)tbuffer, TRACE_BUF_SIZE/sizeof(int), fmt, args);
3124 
3125 	if (len > TRACE_BUF_SIZE/sizeof(int) || len < 0)
3126 		goto out;
3127 
3128 	local_save_flags(flags);
3129 	size = sizeof(*entry) + sizeof(u32) * len;
3130 	buffer = tr->trace_buffer.buffer;
3131 	event = __trace_buffer_lock_reserve(buffer, TRACE_BPRINT, size,
3132 					    flags, pc);
3133 	if (!event)
3134 		goto out;
3135 	entry = ring_buffer_event_data(event);
3136 	entry->ip			= ip;
3137 	entry->fmt			= fmt;
3138 
3139 	memcpy(entry->buf, tbuffer, sizeof(u32) * len);
3140 	if (!call_filter_check_discard(call, entry, buffer, event)) {
3141 		__buffer_unlock_commit(buffer, event);
3142 		ftrace_trace_stack(tr, buffer, flags, 6, pc, NULL);
3143 	}
3144 
3145 out:
3146 	put_trace_buf();
3147 
3148 out_nobuffer:
3149 	preempt_enable_notrace();
3150 	unpause_graph_tracing();
3151 
3152 	return len;
3153 }
3154 EXPORT_SYMBOL_GPL(trace_vbprintk);
3155 
3156 __printf(3, 0)
3157 static int
3158 __trace_array_vprintk(struct ring_buffer *buffer,
3159 		      unsigned long ip, const char *fmt, va_list args)
3160 {
3161 	struct trace_event_call *call = &event_print;
3162 	struct ring_buffer_event *event;
3163 	int len = 0, size, pc;
3164 	struct print_entry *entry;
3165 	unsigned long flags;
3166 	char *tbuffer;
3167 
3168 	if (tracing_disabled || tracing_selftest_running)
3169 		return 0;
3170 
3171 	/* Don't pollute graph traces with trace_vprintk internals */
3172 	pause_graph_tracing();
3173 
3174 	pc = preempt_count();
3175 	preempt_disable_notrace();
3176 
3177 
3178 	tbuffer = get_trace_buf();
3179 	if (!tbuffer) {
3180 		len = 0;
3181 		goto out_nobuffer;
3182 	}
3183 
3184 	len = vscnprintf(tbuffer, TRACE_BUF_SIZE, fmt, args);
3185 
3186 	local_save_flags(flags);
3187 	size = sizeof(*entry) + len + 1;
3188 	event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
3189 					    flags, pc);
3190 	if (!event)
3191 		goto out;
3192 	entry = ring_buffer_event_data(event);
3193 	entry->ip = ip;
3194 
3195 	memcpy(&entry->buf, tbuffer, len + 1);
3196 	if (!call_filter_check_discard(call, entry, buffer, event)) {
3197 		__buffer_unlock_commit(buffer, event);
3198 		ftrace_trace_stack(&global_trace, buffer, flags, 6, pc, NULL);
3199 	}
3200 
3201 out:
3202 	put_trace_buf();
3203 
3204 out_nobuffer:
3205 	preempt_enable_notrace();
3206 	unpause_graph_tracing();
3207 
3208 	return len;
3209 }
3210 
3211 __printf(3, 0)
3212 int trace_array_vprintk(struct trace_array *tr,
3213 			unsigned long ip, const char *fmt, va_list args)
3214 {
3215 	return __trace_array_vprintk(tr->trace_buffer.buffer, ip, fmt, args);
3216 }
3217 
3218 __printf(3, 0)
3219 int trace_array_printk(struct trace_array *tr,
3220 		       unsigned long ip, const char *fmt, ...)
3221 {
3222 	int ret;
3223 	va_list ap;
3224 
3225 	if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
3226 		return 0;
3227 
3228 	va_start(ap, fmt);
3229 	ret = trace_array_vprintk(tr, ip, fmt, ap);
3230 	va_end(ap);
3231 	return ret;
3232 }
3233 EXPORT_SYMBOL_GPL(trace_array_printk);
3234 
3235 __printf(3, 4)
3236 int trace_array_printk_buf(struct ring_buffer *buffer,
3237 			   unsigned long ip, const char *fmt, ...)
3238 {
3239 	int ret;
3240 	va_list ap;
3241 
3242 	if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
3243 		return 0;
3244 
3245 	va_start(ap, fmt);
3246 	ret = __trace_array_vprintk(buffer, ip, fmt, ap);
3247 	va_end(ap);
3248 	return ret;
3249 }
3250 
3251 __printf(2, 0)
3252 int trace_vprintk(unsigned long ip, const char *fmt, va_list args)
3253 {
3254 	return trace_array_vprintk(&global_trace, ip, fmt, args);
3255 }
3256 EXPORT_SYMBOL_GPL(trace_vprintk);
3257 
3258 static void trace_iterator_increment(struct trace_iterator *iter)
3259 {
3260 	struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, iter->cpu);
3261 
3262 	iter->idx++;
3263 	if (buf_iter)
3264 		ring_buffer_read(buf_iter, NULL);
3265 }
3266 
3267 static struct trace_entry *
3268 peek_next_entry(struct trace_iterator *iter, int cpu, u64 *ts,
3269 		unsigned long *lost_events)
3270 {
3271 	struct ring_buffer_event *event;
3272 	struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, cpu);
3273 
3274 	if (buf_iter)
3275 		event = ring_buffer_iter_peek(buf_iter, ts);
3276 	else
3277 		event = ring_buffer_peek(iter->trace_buffer->buffer, cpu, ts,
3278 					 lost_events);
3279 
3280 	if (event) {
3281 		iter->ent_size = ring_buffer_event_length(event);
3282 		return ring_buffer_event_data(event);
3283 	}
3284 	iter->ent_size = 0;
3285 	return NULL;
3286 }
3287 
3288 static struct trace_entry *
3289 __find_next_entry(struct trace_iterator *iter, int *ent_cpu,
3290 		  unsigned long *missing_events, u64 *ent_ts)
3291 {
3292 	struct ring_buffer *buffer = iter->trace_buffer->buffer;
3293 	struct trace_entry *ent, *next = NULL;
3294 	unsigned long lost_events = 0, next_lost = 0;
3295 	int cpu_file = iter->cpu_file;
3296 	u64 next_ts = 0, ts;
3297 	int next_cpu = -1;
3298 	int next_size = 0;
3299 	int cpu;
3300 
3301 	/*
3302 	 * If we are in a per_cpu trace file, don't bother by iterating over
3303 	 * all cpu and peek directly.
3304 	 */
3305 	if (cpu_file > RING_BUFFER_ALL_CPUS) {
3306 		if (ring_buffer_empty_cpu(buffer, cpu_file))
3307 			return NULL;
3308 		ent = peek_next_entry(iter, cpu_file, ent_ts, missing_events);
3309 		if (ent_cpu)
3310 			*ent_cpu = cpu_file;
3311 
3312 		return ent;
3313 	}
3314 
3315 	for_each_tracing_cpu(cpu) {
3316 
3317 		if (ring_buffer_empty_cpu(buffer, cpu))
3318 			continue;
3319 
3320 		ent = peek_next_entry(iter, cpu, &ts, &lost_events);
3321 
3322 		/*
3323 		 * Pick the entry with the smallest timestamp:
3324 		 */
3325 		if (ent && (!next || ts < next_ts)) {
3326 			next = ent;
3327 			next_cpu = cpu;
3328 			next_ts = ts;
3329 			next_lost = lost_events;
3330 			next_size = iter->ent_size;
3331 		}
3332 	}
3333 
3334 	iter->ent_size = next_size;
3335 
3336 	if (ent_cpu)
3337 		*ent_cpu = next_cpu;
3338 
3339 	if (ent_ts)
3340 		*ent_ts = next_ts;
3341 
3342 	if (missing_events)
3343 		*missing_events = next_lost;
3344 
3345 	return next;
3346 }
3347 
3348 /* Find the next real entry, without updating the iterator itself */
3349 struct trace_entry *trace_find_next_entry(struct trace_iterator *iter,
3350 					  int *ent_cpu, u64 *ent_ts)
3351 {
3352 	return __find_next_entry(iter, ent_cpu, NULL, ent_ts);
3353 }
3354 
3355 /* Find the next real entry, and increment the iterator to the next entry */
3356 void *trace_find_next_entry_inc(struct trace_iterator *iter)
3357 {
3358 	iter->ent = __find_next_entry(iter, &iter->cpu,
3359 				      &iter->lost_events, &iter->ts);
3360 
3361 	if (iter->ent)
3362 		trace_iterator_increment(iter);
3363 
3364 	return iter->ent ? iter : NULL;
3365 }
3366 
3367 static void trace_consume(struct trace_iterator *iter)
3368 {
3369 	ring_buffer_consume(iter->trace_buffer->buffer, iter->cpu, &iter->ts,
3370 			    &iter->lost_events);
3371 }
3372 
3373 static void *s_next(struct seq_file *m, void *v, loff_t *pos)
3374 {
3375 	struct trace_iterator *iter = m->private;
3376 	int i = (int)*pos;
3377 	void *ent;
3378 
3379 	WARN_ON_ONCE(iter->leftover);
3380 
3381 	(*pos)++;
3382 
3383 	/* can't go backwards */
3384 	if (iter->idx > i)
3385 		return NULL;
3386 
3387 	if (iter->idx < 0)
3388 		ent = trace_find_next_entry_inc(iter);
3389 	else
3390 		ent = iter;
3391 
3392 	while (ent && iter->idx < i)
3393 		ent = trace_find_next_entry_inc(iter);
3394 
3395 	iter->pos = *pos;
3396 
3397 	return ent;
3398 }
3399 
3400 void tracing_iter_reset(struct trace_iterator *iter, int cpu)
3401 {
3402 	struct ring_buffer_event *event;
3403 	struct ring_buffer_iter *buf_iter;
3404 	unsigned long entries = 0;
3405 	u64 ts;
3406 
3407 	per_cpu_ptr(iter->trace_buffer->data, cpu)->skipped_entries = 0;
3408 
3409 	buf_iter = trace_buffer_iter(iter, cpu);
3410 	if (!buf_iter)
3411 		return;
3412 
3413 	ring_buffer_iter_reset(buf_iter);
3414 
3415 	/*
3416 	 * We could have the case with the max latency tracers
3417 	 * that a reset never took place on a cpu. This is evident
3418 	 * by the timestamp being before the start of the buffer.
3419 	 */
3420 	while ((event = ring_buffer_iter_peek(buf_iter, &ts))) {
3421 		if (ts >= iter->trace_buffer->time_start)
3422 			break;
3423 		entries++;
3424 		ring_buffer_read(buf_iter, NULL);
3425 	}
3426 
3427 	per_cpu_ptr(iter->trace_buffer->data, cpu)->skipped_entries = entries;
3428 }
3429 
3430 /*
3431  * The current tracer is copied to avoid a global locking
3432  * all around.
3433  */
3434 static void *s_start(struct seq_file *m, loff_t *pos)
3435 {
3436 	struct trace_iterator *iter = m->private;
3437 	struct trace_array *tr = iter->tr;
3438 	int cpu_file = iter->cpu_file;
3439 	void *p = NULL;
3440 	loff_t l = 0;
3441 	int cpu;
3442 
3443 	/*
3444 	 * copy the tracer to avoid using a global lock all around.
3445 	 * iter->trace is a copy of current_trace, the pointer to the
3446 	 * name may be used instead of a strcmp(), as iter->trace->name
3447 	 * will point to the same string as current_trace->name.
3448 	 */
3449 	mutex_lock(&trace_types_lock);
3450 	if (unlikely(tr->current_trace && iter->trace->name != tr->current_trace->name))
3451 		*iter->trace = *tr->current_trace;
3452 	mutex_unlock(&trace_types_lock);
3453 
3454 #ifdef CONFIG_TRACER_MAX_TRACE
3455 	if (iter->snapshot && iter->trace->use_max_tr)
3456 		return ERR_PTR(-EBUSY);
3457 #endif
3458 
3459 	if (!iter->snapshot)
3460 		atomic_inc(&trace_record_taskinfo_disabled);
3461 
3462 	if (*pos != iter->pos) {
3463 		iter->ent = NULL;
3464 		iter->cpu = 0;
3465 		iter->idx = -1;
3466 
3467 		if (cpu_file == RING_BUFFER_ALL_CPUS) {
3468 			for_each_tracing_cpu(cpu)
3469 				tracing_iter_reset(iter, cpu);
3470 		} else
3471 			tracing_iter_reset(iter, cpu_file);
3472 
3473 		iter->leftover = 0;
3474 		for (p = iter; p && l < *pos; p = s_next(m, p, &l))
3475 			;
3476 
3477 	} else {
3478 		/*
3479 		 * If we overflowed the seq_file before, then we want
3480 		 * to just reuse the trace_seq buffer again.
3481 		 */
3482 		if (iter->leftover)
3483 			p = iter;
3484 		else {
3485 			l = *pos - 1;
3486 			p = s_next(m, p, &l);
3487 		}
3488 	}
3489 
3490 	trace_event_read_lock();
3491 	trace_access_lock(cpu_file);
3492 	return p;
3493 }
3494 
3495 static void s_stop(struct seq_file *m, void *p)
3496 {
3497 	struct trace_iterator *iter = m->private;
3498 
3499 #ifdef CONFIG_TRACER_MAX_TRACE
3500 	if (iter->snapshot && iter->trace->use_max_tr)
3501 		return;
3502 #endif
3503 
3504 	if (!iter->snapshot)
3505 		atomic_dec(&trace_record_taskinfo_disabled);
3506 
3507 	trace_access_unlock(iter->cpu_file);
3508 	trace_event_read_unlock();
3509 }
3510 
3511 static void
3512 get_total_entries_cpu(struct trace_buffer *buf, unsigned long *total,
3513 		      unsigned long *entries, int cpu)
3514 {
3515 	unsigned long count;
3516 
3517 	count = ring_buffer_entries_cpu(buf->buffer, cpu);
3518 	/*
3519 	 * If this buffer has skipped entries, then we hold all
3520 	 * entries for the trace and we need to ignore the
3521 	 * ones before the time stamp.
3522 	 */
3523 	if (per_cpu_ptr(buf->data, cpu)->skipped_entries) {
3524 		count -= per_cpu_ptr(buf->data, cpu)->skipped_entries;
3525 		/* total is the same as the entries */
3526 		*total = count;
3527 	} else
3528 		*total = count +
3529 			ring_buffer_overrun_cpu(buf->buffer, cpu);
3530 	*entries = count;
3531 }
3532 
3533 static void
3534 get_total_entries(struct trace_buffer *buf,
3535 		  unsigned long *total, unsigned long *entries)
3536 {
3537 	unsigned long t, e;
3538 	int cpu;
3539 
3540 	*total = 0;
3541 	*entries = 0;
3542 
3543 	for_each_tracing_cpu(cpu) {
3544 		get_total_entries_cpu(buf, &t, &e, cpu);
3545 		*total += t;
3546 		*entries += e;
3547 	}
3548 }
3549 
3550 unsigned long trace_total_entries_cpu(struct trace_array *tr, int cpu)
3551 {
3552 	unsigned long total, entries;
3553 
3554 	if (!tr)
3555 		tr = &global_trace;
3556 
3557 	get_total_entries_cpu(&tr->trace_buffer, &total, &entries, cpu);
3558 
3559 	return entries;
3560 }
3561 
3562 unsigned long trace_total_entries(struct trace_array *tr)
3563 {
3564 	unsigned long total, entries;
3565 
3566 	if (!tr)
3567 		tr = &global_trace;
3568 
3569 	get_total_entries(&tr->trace_buffer, &total, &entries);
3570 
3571 	return entries;
3572 }
3573 
3574 static void print_lat_help_header(struct seq_file *m)
3575 {
3576 	seq_puts(m, "#                  _------=> CPU#            \n"
3577 		    "#                 / _-----=> irqs-off        \n"
3578 		    "#                | / _----=> need-resched    \n"
3579 		    "#                || / _---=> hardirq/softirq \n"
3580 		    "#                ||| / _--=> preempt-depth   \n"
3581 		    "#                |||| /     delay            \n"
3582 		    "#  cmd     pid   ||||| time  |   caller      \n"
3583 		    "#     \\   /      |||||  \\    |   /         \n");
3584 }
3585 
3586 static void print_event_info(struct trace_buffer *buf, struct seq_file *m)
3587 {
3588 	unsigned long total;
3589 	unsigned long entries;
3590 
3591 	get_total_entries(buf, &total, &entries);
3592 	seq_printf(m, "# entries-in-buffer/entries-written: %lu/%lu   #P:%d\n",
3593 		   entries, total, num_online_cpus());
3594 	seq_puts(m, "#\n");
3595 }
3596 
3597 static void print_func_help_header(struct trace_buffer *buf, struct seq_file *m,
3598 				   unsigned int flags)
3599 {
3600 	bool tgid = flags & TRACE_ITER_RECORD_TGID;
3601 
3602 	print_event_info(buf, m);
3603 
3604 	seq_printf(m, "#           TASK-PID   %s  CPU#   TIMESTAMP  FUNCTION\n", tgid ? "TGID     " : "");
3605 	seq_printf(m, "#              | |     %s    |       |         |\n",	 tgid ? "  |      " : "");
3606 }
3607 
3608 static void print_func_help_header_irq(struct trace_buffer *buf, struct seq_file *m,
3609 				       unsigned int flags)
3610 {
3611 	bool tgid = flags & TRACE_ITER_RECORD_TGID;
3612 	const char *space = "          ";
3613 	int prec = tgid ? 10 : 2;
3614 
3615 	print_event_info(buf, m);
3616 
3617 	seq_printf(m, "#                          %.*s  _-----=> irqs-off\n", prec, space);
3618 	seq_printf(m, "#                          %.*s / _----=> need-resched\n", prec, space);
3619 	seq_printf(m, "#                          %.*s| / _---=> hardirq/softirq\n", prec, space);
3620 	seq_printf(m, "#                          %.*s|| / _--=> preempt-depth\n", prec, space);
3621 	seq_printf(m, "#                          %.*s||| /     delay\n", prec, space);
3622 	seq_printf(m, "#           TASK-PID %.*sCPU#  ||||    TIMESTAMP  FUNCTION\n", prec, "   TGID   ");
3623 	seq_printf(m, "#              | |   %.*s  |   ||||       |         |\n", prec, "     |    ");
3624 }
3625 
3626 void
3627 print_trace_header(struct seq_file *m, struct trace_iterator *iter)
3628 {
3629 	unsigned long sym_flags = (global_trace.trace_flags & TRACE_ITER_SYM_MASK);
3630 	struct trace_buffer *buf = iter->trace_buffer;
3631 	struct trace_array_cpu *data = per_cpu_ptr(buf->data, buf->cpu);
3632 	struct tracer *type = iter->trace;
3633 	unsigned long entries;
3634 	unsigned long total;
3635 	const char *name = "preemption";
3636 
3637 	name = type->name;
3638 
3639 	get_total_entries(buf, &total, &entries);
3640 
3641 	seq_printf(m, "# %s latency trace v1.1.5 on %s\n",
3642 		   name, UTS_RELEASE);
3643 	seq_puts(m, "# -----------------------------------"
3644 		 "---------------------------------\n");
3645 	seq_printf(m, "# latency: %lu us, #%lu/%lu, CPU#%d |"
3646 		   " (M:%s VP:%d, KP:%d, SP:%d HP:%d",
3647 		   nsecs_to_usecs(data->saved_latency),
3648 		   entries,
3649 		   total,
3650 		   buf->cpu,
3651 #if defined(CONFIG_PREEMPT_NONE)
3652 		   "server",
3653 #elif defined(CONFIG_PREEMPT_VOLUNTARY)
3654 		   "desktop",
3655 #elif defined(CONFIG_PREEMPT)
3656 		   "preempt",
3657 #else
3658 		   "unknown",
3659 #endif
3660 		   /* These are reserved for later use */
3661 		   0, 0, 0, 0);
3662 #ifdef CONFIG_SMP
3663 	seq_printf(m, " #P:%d)\n", num_online_cpus());
3664 #else
3665 	seq_puts(m, ")\n");
3666 #endif
3667 	seq_puts(m, "#    -----------------\n");
3668 	seq_printf(m, "#    | task: %.16s-%d "
3669 		   "(uid:%d nice:%ld policy:%ld rt_prio:%ld)\n",
3670 		   data->comm, data->pid,
3671 		   from_kuid_munged(seq_user_ns(m), data->uid), data->nice,
3672 		   data->policy, data->rt_priority);
3673 	seq_puts(m, "#    -----------------\n");
3674 
3675 	if (data->critical_start) {
3676 		seq_puts(m, "#  => started at: ");
3677 		seq_print_ip_sym(&iter->seq, data->critical_start, sym_flags);
3678 		trace_print_seq(m, &iter->seq);
3679 		seq_puts(m, "\n#  => ended at:   ");
3680 		seq_print_ip_sym(&iter->seq, data->critical_end, sym_flags);
3681 		trace_print_seq(m, &iter->seq);
3682 		seq_puts(m, "\n#\n");
3683 	}
3684 
3685 	seq_puts(m, "#\n");
3686 }
3687 
3688 static void test_cpu_buff_start(struct trace_iterator *iter)
3689 {
3690 	struct trace_seq *s = &iter->seq;
3691 	struct trace_array *tr = iter->tr;
3692 
3693 	if (!(tr->trace_flags & TRACE_ITER_ANNOTATE))
3694 		return;
3695 
3696 	if (!(iter->iter_flags & TRACE_FILE_ANNOTATE))
3697 		return;
3698 
3699 	if (cpumask_available(iter->started) &&
3700 	    cpumask_test_cpu(iter->cpu, iter->started))
3701 		return;
3702 
3703 	if (per_cpu_ptr(iter->trace_buffer->data, iter->cpu)->skipped_entries)
3704 		return;
3705 
3706 	if (cpumask_available(iter->started))
3707 		cpumask_set_cpu(iter->cpu, iter->started);
3708 
3709 	/* Don't print started cpu buffer for the first entry of the trace */
3710 	if (iter->idx > 1)
3711 		trace_seq_printf(s, "##### CPU %u buffer started ####\n",
3712 				iter->cpu);
3713 }
3714 
3715 static enum print_line_t print_trace_fmt(struct trace_iterator *iter)
3716 {
3717 	struct trace_array *tr = iter->tr;
3718 	struct trace_seq *s = &iter->seq;
3719 	unsigned long sym_flags = (tr->trace_flags & TRACE_ITER_SYM_MASK);
3720 	struct trace_entry *entry;
3721 	struct trace_event *event;
3722 
3723 	entry = iter->ent;
3724 
3725 	test_cpu_buff_start(iter);
3726 
3727 	event = ftrace_find_event(entry->type);
3728 
3729 	if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
3730 		if (iter->iter_flags & TRACE_FILE_LAT_FMT)
3731 			trace_print_lat_context(iter);
3732 		else
3733 			trace_print_context(iter);
3734 	}
3735 
3736 	if (trace_seq_has_overflowed(s))
3737 		return TRACE_TYPE_PARTIAL_LINE;
3738 
3739 	if (event)
3740 		return event->funcs->trace(iter, sym_flags, event);
3741 
3742 	trace_seq_printf(s, "Unknown type %d\n", entry->type);
3743 
3744 	return trace_handle_return(s);
3745 }
3746 
3747 static enum print_line_t print_raw_fmt(struct trace_iterator *iter)
3748 {
3749 	struct trace_array *tr = iter->tr;
3750 	struct trace_seq *s = &iter->seq;
3751 	struct trace_entry *entry;
3752 	struct trace_event *event;
3753 
3754 	entry = iter->ent;
3755 
3756 	if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO)
3757 		trace_seq_printf(s, "%d %d %llu ",
3758 				 entry->pid, iter->cpu, iter->ts);
3759 
3760 	if (trace_seq_has_overflowed(s))
3761 		return TRACE_TYPE_PARTIAL_LINE;
3762 
3763 	event = ftrace_find_event(entry->type);
3764 	if (event)
3765 		return event->funcs->raw(iter, 0, event);
3766 
3767 	trace_seq_printf(s, "%d ?\n", entry->type);
3768 
3769 	return trace_handle_return(s);
3770 }
3771 
3772 static enum print_line_t print_hex_fmt(struct trace_iterator *iter)
3773 {
3774 	struct trace_array *tr = iter->tr;
3775 	struct trace_seq *s = &iter->seq;
3776 	unsigned char newline = '\n';
3777 	struct trace_entry *entry;
3778 	struct trace_event *event;
3779 
3780 	entry = iter->ent;
3781 
3782 	if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
3783 		SEQ_PUT_HEX_FIELD(s, entry->pid);
3784 		SEQ_PUT_HEX_FIELD(s, iter->cpu);
3785 		SEQ_PUT_HEX_FIELD(s, iter->ts);
3786 		if (trace_seq_has_overflowed(s))
3787 			return TRACE_TYPE_PARTIAL_LINE;
3788 	}
3789 
3790 	event = ftrace_find_event(entry->type);
3791 	if (event) {
3792 		enum print_line_t ret = event->funcs->hex(iter, 0, event);
3793 		if (ret != TRACE_TYPE_HANDLED)
3794 			return ret;
3795 	}
3796 
3797 	SEQ_PUT_FIELD(s, newline);
3798 
3799 	return trace_handle_return(s);
3800 }
3801 
3802 static enum print_line_t print_bin_fmt(struct trace_iterator *iter)
3803 {
3804 	struct trace_array *tr = iter->tr;
3805 	struct trace_seq *s = &iter->seq;
3806 	struct trace_entry *entry;
3807 	struct trace_event *event;
3808 
3809 	entry = iter->ent;
3810 
3811 	if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
3812 		SEQ_PUT_FIELD(s, entry->pid);
3813 		SEQ_PUT_FIELD(s, iter->cpu);
3814 		SEQ_PUT_FIELD(s, iter->ts);
3815 		if (trace_seq_has_overflowed(s))
3816 			return TRACE_TYPE_PARTIAL_LINE;
3817 	}
3818 
3819 	event = ftrace_find_event(entry->type);
3820 	return event ? event->funcs->binary(iter, 0, event) :
3821 		TRACE_TYPE_HANDLED;
3822 }
3823 
3824 int trace_empty(struct trace_iterator *iter)
3825 {
3826 	struct ring_buffer_iter *buf_iter;
3827 	int cpu;
3828 
3829 	/* If we are looking at one CPU buffer, only check that one */
3830 	if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
3831 		cpu = iter->cpu_file;
3832 		buf_iter = trace_buffer_iter(iter, cpu);
3833 		if (buf_iter) {
3834 			if (!ring_buffer_iter_empty(buf_iter))
3835 				return 0;
3836 		} else {
3837 			if (!ring_buffer_empty_cpu(iter->trace_buffer->buffer, cpu))
3838 				return 0;
3839 		}
3840 		return 1;
3841 	}
3842 
3843 	for_each_tracing_cpu(cpu) {
3844 		buf_iter = trace_buffer_iter(iter, cpu);
3845 		if (buf_iter) {
3846 			if (!ring_buffer_iter_empty(buf_iter))
3847 				return 0;
3848 		} else {
3849 			if (!ring_buffer_empty_cpu(iter->trace_buffer->buffer, cpu))
3850 				return 0;
3851 		}
3852 	}
3853 
3854 	return 1;
3855 }
3856 
3857 /*  Called with trace_event_read_lock() held. */
3858 enum print_line_t print_trace_line(struct trace_iterator *iter)
3859 {
3860 	struct trace_array *tr = iter->tr;
3861 	unsigned long trace_flags = tr->trace_flags;
3862 	enum print_line_t ret;
3863 
3864 	if (iter->lost_events) {
3865 		trace_seq_printf(&iter->seq, "CPU:%d [LOST %lu EVENTS]\n",
3866 				 iter->cpu, iter->lost_events);
3867 		if (trace_seq_has_overflowed(&iter->seq))
3868 			return TRACE_TYPE_PARTIAL_LINE;
3869 	}
3870 
3871 	if (iter->trace && iter->trace->print_line) {
3872 		ret = iter->trace->print_line(iter);
3873 		if (ret != TRACE_TYPE_UNHANDLED)
3874 			return ret;
3875 	}
3876 
3877 	if (iter->ent->type == TRACE_BPUTS &&
3878 			trace_flags & TRACE_ITER_PRINTK &&
3879 			trace_flags & TRACE_ITER_PRINTK_MSGONLY)
3880 		return trace_print_bputs_msg_only(iter);
3881 
3882 	if (iter->ent->type == TRACE_BPRINT &&
3883 			trace_flags & TRACE_ITER_PRINTK &&
3884 			trace_flags & TRACE_ITER_PRINTK_MSGONLY)
3885 		return trace_print_bprintk_msg_only(iter);
3886 
3887 	if (iter->ent->type == TRACE_PRINT &&
3888 			trace_flags & TRACE_ITER_PRINTK &&
3889 			trace_flags & TRACE_ITER_PRINTK_MSGONLY)
3890 		return trace_print_printk_msg_only(iter);
3891 
3892 	if (trace_flags & TRACE_ITER_BIN)
3893 		return print_bin_fmt(iter);
3894 
3895 	if (trace_flags & TRACE_ITER_HEX)
3896 		return print_hex_fmt(iter);
3897 
3898 	if (trace_flags & TRACE_ITER_RAW)
3899 		return print_raw_fmt(iter);
3900 
3901 	return print_trace_fmt(iter);
3902 }
3903 
3904 void trace_latency_header(struct seq_file *m)
3905 {
3906 	struct trace_iterator *iter = m->private;
3907 	struct trace_array *tr = iter->tr;
3908 
3909 	/* print nothing if the buffers are empty */
3910 	if (trace_empty(iter))
3911 		return;
3912 
3913 	if (iter->iter_flags & TRACE_FILE_LAT_FMT)
3914 		print_trace_header(m, iter);
3915 
3916 	if (!(tr->trace_flags & TRACE_ITER_VERBOSE))
3917 		print_lat_help_header(m);
3918 }
3919 
3920 void trace_default_header(struct seq_file *m)
3921 {
3922 	struct trace_iterator *iter = m->private;
3923 	struct trace_array *tr = iter->tr;
3924 	unsigned long trace_flags = tr->trace_flags;
3925 
3926 	if (!(trace_flags & TRACE_ITER_CONTEXT_INFO))
3927 		return;
3928 
3929 	if (iter->iter_flags & TRACE_FILE_LAT_FMT) {
3930 		/* print nothing if the buffers are empty */
3931 		if (trace_empty(iter))
3932 			return;
3933 		print_trace_header(m, iter);
3934 		if (!(trace_flags & TRACE_ITER_VERBOSE))
3935 			print_lat_help_header(m);
3936 	} else {
3937 		if (!(trace_flags & TRACE_ITER_VERBOSE)) {
3938 			if (trace_flags & TRACE_ITER_IRQ_INFO)
3939 				print_func_help_header_irq(iter->trace_buffer,
3940 							   m, trace_flags);
3941 			else
3942 				print_func_help_header(iter->trace_buffer, m,
3943 						       trace_flags);
3944 		}
3945 	}
3946 }
3947 
3948 static void test_ftrace_alive(struct seq_file *m)
3949 {
3950 	if (!ftrace_is_dead())
3951 		return;
3952 	seq_puts(m, "# WARNING: FUNCTION TRACING IS CORRUPTED\n"
3953 		    "#          MAY BE MISSING FUNCTION EVENTS\n");
3954 }
3955 
3956 #ifdef CONFIG_TRACER_MAX_TRACE
3957 static void show_snapshot_main_help(struct seq_file *m)
3958 {
3959 	seq_puts(m, "# echo 0 > snapshot : Clears and frees snapshot buffer\n"
3960 		    "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
3961 		    "#                      Takes a snapshot of the main buffer.\n"
3962 		    "# echo 2 > snapshot : Clears snapshot buffer (but does not allocate or free)\n"
3963 		    "#                      (Doesn't have to be '2' works with any number that\n"
3964 		    "#                       is not a '0' or '1')\n");
3965 }
3966 
3967 static void show_snapshot_percpu_help(struct seq_file *m)
3968 {
3969 	seq_puts(m, "# echo 0 > snapshot : Invalid for per_cpu snapshot file.\n");
3970 #ifdef CONFIG_RING_BUFFER_ALLOW_SWAP
3971 	seq_puts(m, "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
3972 		    "#                      Takes a snapshot of the main buffer for this cpu.\n");
3973 #else
3974 	seq_puts(m, "# echo 1 > snapshot : Not supported with this kernel.\n"
3975 		    "#                     Must use main snapshot file to allocate.\n");
3976 #endif
3977 	seq_puts(m, "# echo 2 > snapshot : Clears this cpu's snapshot buffer (but does not allocate)\n"
3978 		    "#                      (Doesn't have to be '2' works with any number that\n"
3979 		    "#                       is not a '0' or '1')\n");
3980 }
3981 
3982 static void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter)
3983 {
3984 	if (iter->tr->allocated_snapshot)
3985 		seq_puts(m, "#\n# * Snapshot is allocated *\n#\n");
3986 	else
3987 		seq_puts(m, "#\n# * Snapshot is freed *\n#\n");
3988 
3989 	seq_puts(m, "# Snapshot commands:\n");
3990 	if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
3991 		show_snapshot_main_help(m);
3992 	else
3993 		show_snapshot_percpu_help(m);
3994 }
3995 #else
3996 /* Should never be called */
3997 static inline void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter) { }
3998 #endif
3999 
4000 static int s_show(struct seq_file *m, void *v)
4001 {
4002 	struct trace_iterator *iter = v;
4003 	int ret;
4004 
4005 	if (iter->ent == NULL) {
4006 		if (iter->tr) {
4007 			seq_printf(m, "# tracer: %s\n", iter->trace->name);
4008 			seq_puts(m, "#\n");
4009 			test_ftrace_alive(m);
4010 		}
4011 		if (iter->snapshot && trace_empty(iter))
4012 			print_snapshot_help(m, iter);
4013 		else if (iter->trace && iter->trace->print_header)
4014 			iter->trace->print_header(m);
4015 		else
4016 			trace_default_header(m);
4017 
4018 	} else if (iter->leftover) {
4019 		/*
4020 		 * If we filled the seq_file buffer earlier, we
4021 		 * want to just show it now.
4022 		 */
4023 		ret = trace_print_seq(m, &iter->seq);
4024 
4025 		/* ret should this time be zero, but you never know */
4026 		iter->leftover = ret;
4027 
4028 	} else {
4029 		print_trace_line(iter);
4030 		ret = trace_print_seq(m, &iter->seq);
4031 		/*
4032 		 * If we overflow the seq_file buffer, then it will
4033 		 * ask us for this data again at start up.
4034 		 * Use that instead.
4035 		 *  ret is 0 if seq_file write succeeded.
4036 		 *        -1 otherwise.
4037 		 */
4038 		iter->leftover = ret;
4039 	}
4040 
4041 	return 0;
4042 }
4043 
4044 /*
4045  * Should be used after trace_array_get(), trace_types_lock
4046  * ensures that i_cdev was already initialized.
4047  */
4048 static inline int tracing_get_cpu(struct inode *inode)
4049 {
4050 	if (inode->i_cdev) /* See trace_create_cpu_file() */
4051 		return (long)inode->i_cdev - 1;
4052 	return RING_BUFFER_ALL_CPUS;
4053 }
4054 
4055 static const struct seq_operations tracer_seq_ops = {
4056 	.start		= s_start,
4057 	.next		= s_next,
4058 	.stop		= s_stop,
4059 	.show		= s_show,
4060 };
4061 
4062 static struct trace_iterator *
4063 __tracing_open(struct inode *inode, struct file *file, bool snapshot)
4064 {
4065 	struct trace_array *tr = inode->i_private;
4066 	struct trace_iterator *iter;
4067 	int cpu;
4068 
4069 	if (tracing_disabled)
4070 		return ERR_PTR(-ENODEV);
4071 
4072 	iter = __seq_open_private(file, &tracer_seq_ops, sizeof(*iter));
4073 	if (!iter)
4074 		return ERR_PTR(-ENOMEM);
4075 
4076 	iter->buffer_iter = kcalloc(nr_cpu_ids, sizeof(*iter->buffer_iter),
4077 				    GFP_KERNEL);
4078 	if (!iter->buffer_iter)
4079 		goto release;
4080 
4081 	/*
4082 	 * We make a copy of the current tracer to avoid concurrent
4083 	 * changes on it while we are reading.
4084 	 */
4085 	mutex_lock(&trace_types_lock);
4086 	iter->trace = kzalloc(sizeof(*iter->trace), GFP_KERNEL);
4087 	if (!iter->trace)
4088 		goto fail;
4089 
4090 	*iter->trace = *tr->current_trace;
4091 
4092 	if (!zalloc_cpumask_var(&iter->started, GFP_KERNEL))
4093 		goto fail;
4094 
4095 	iter->tr = tr;
4096 
4097 #ifdef CONFIG_TRACER_MAX_TRACE
4098 	/* Currently only the top directory has a snapshot */
4099 	if (tr->current_trace->print_max || snapshot)
4100 		iter->trace_buffer = &tr->max_buffer;
4101 	else
4102 #endif
4103 		iter->trace_buffer = &tr->trace_buffer;
4104 	iter->snapshot = snapshot;
4105 	iter->pos = -1;
4106 	iter->cpu_file = tracing_get_cpu(inode);
4107 	mutex_init(&iter->mutex);
4108 
4109 	/* Notify the tracer early; before we stop tracing. */
4110 	if (iter->trace && iter->trace->open)
4111 		iter->trace->open(iter);
4112 
4113 	/* Annotate start of buffers if we had overruns */
4114 	if (ring_buffer_overruns(iter->trace_buffer->buffer))
4115 		iter->iter_flags |= TRACE_FILE_ANNOTATE;
4116 
4117 	/* Output in nanoseconds only if we are using a clock in nanoseconds. */
4118 	if (trace_clocks[tr->clock_id].in_ns)
4119 		iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
4120 
4121 	/* stop the trace while dumping if we are not opening "snapshot" */
4122 	if (!iter->snapshot)
4123 		tracing_stop_tr(tr);
4124 
4125 	if (iter->cpu_file == RING_BUFFER_ALL_CPUS) {
4126 		for_each_tracing_cpu(cpu) {
4127 			iter->buffer_iter[cpu] =
4128 				ring_buffer_read_prepare(iter->trace_buffer->buffer,
4129 							 cpu, GFP_KERNEL);
4130 		}
4131 		ring_buffer_read_prepare_sync();
4132 		for_each_tracing_cpu(cpu) {
4133 			ring_buffer_read_start(iter->buffer_iter[cpu]);
4134 			tracing_iter_reset(iter, cpu);
4135 		}
4136 	} else {
4137 		cpu = iter->cpu_file;
4138 		iter->buffer_iter[cpu] =
4139 			ring_buffer_read_prepare(iter->trace_buffer->buffer,
4140 						 cpu, GFP_KERNEL);
4141 		ring_buffer_read_prepare_sync();
4142 		ring_buffer_read_start(iter->buffer_iter[cpu]);
4143 		tracing_iter_reset(iter, cpu);
4144 	}
4145 
4146 	mutex_unlock(&trace_types_lock);
4147 
4148 	return iter;
4149 
4150  fail:
4151 	mutex_unlock(&trace_types_lock);
4152 	kfree(iter->trace);
4153 	kfree(iter->buffer_iter);
4154 release:
4155 	seq_release_private(inode, file);
4156 	return ERR_PTR(-ENOMEM);
4157 }
4158 
4159 int tracing_open_generic(struct inode *inode, struct file *filp)
4160 {
4161 	int ret;
4162 
4163 	ret = tracing_check_open_get_tr(NULL);
4164 	if (ret)
4165 		return ret;
4166 
4167 	filp->private_data = inode->i_private;
4168 	return 0;
4169 }
4170 
4171 bool tracing_is_disabled(void)
4172 {
4173 	return (tracing_disabled) ? true: false;
4174 }
4175 
4176 /*
4177  * Open and update trace_array ref count.
4178  * Must have the current trace_array passed to it.
4179  */
4180 int tracing_open_generic_tr(struct inode *inode, struct file *filp)
4181 {
4182 	struct trace_array *tr = inode->i_private;
4183 	int ret;
4184 
4185 	ret = tracing_check_open_get_tr(tr);
4186 	if (ret)
4187 		return ret;
4188 
4189 	filp->private_data = inode->i_private;
4190 
4191 	return 0;
4192 }
4193 
4194 static int tracing_release(struct inode *inode, struct file *file)
4195 {
4196 	struct trace_array *tr = inode->i_private;
4197 	struct seq_file *m = file->private_data;
4198 	struct trace_iterator *iter;
4199 	int cpu;
4200 
4201 	if (!(file->f_mode & FMODE_READ)) {
4202 		trace_array_put(tr);
4203 		return 0;
4204 	}
4205 
4206 	/* Writes do not use seq_file */
4207 	iter = m->private;
4208 	mutex_lock(&trace_types_lock);
4209 
4210 	for_each_tracing_cpu(cpu) {
4211 		if (iter->buffer_iter[cpu])
4212 			ring_buffer_read_finish(iter->buffer_iter[cpu]);
4213 	}
4214 
4215 	if (iter->trace && iter->trace->close)
4216 		iter->trace->close(iter);
4217 
4218 	if (!iter->snapshot)
4219 		/* reenable tracing if it was previously enabled */
4220 		tracing_start_tr(tr);
4221 
4222 	__trace_array_put(tr);
4223 
4224 	mutex_unlock(&trace_types_lock);
4225 
4226 	mutex_destroy(&iter->mutex);
4227 	free_cpumask_var(iter->started);
4228 	kfree(iter->trace);
4229 	kfree(iter->buffer_iter);
4230 	seq_release_private(inode, file);
4231 
4232 	return 0;
4233 }
4234 
4235 static int tracing_release_generic_tr(struct inode *inode, struct file *file)
4236 {
4237 	struct trace_array *tr = inode->i_private;
4238 
4239 	trace_array_put(tr);
4240 	return 0;
4241 }
4242 
4243 static int tracing_single_release_tr(struct inode *inode, struct file *file)
4244 {
4245 	struct trace_array *tr = inode->i_private;
4246 
4247 	trace_array_put(tr);
4248 
4249 	return single_release(inode, file);
4250 }
4251 
4252 static int tracing_open(struct inode *inode, struct file *file)
4253 {
4254 	struct trace_array *tr = inode->i_private;
4255 	struct trace_iterator *iter;
4256 	int ret;
4257 
4258 	ret = tracing_check_open_get_tr(tr);
4259 	if (ret)
4260 		return ret;
4261 
4262 	/* If this file was open for write, then erase contents */
4263 	if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC)) {
4264 		int cpu = tracing_get_cpu(inode);
4265 		struct trace_buffer *trace_buf = &tr->trace_buffer;
4266 
4267 #ifdef CONFIG_TRACER_MAX_TRACE
4268 		if (tr->current_trace->print_max)
4269 			trace_buf = &tr->max_buffer;
4270 #endif
4271 
4272 		if (cpu == RING_BUFFER_ALL_CPUS)
4273 			tracing_reset_online_cpus(trace_buf);
4274 		else
4275 			tracing_reset_cpu(trace_buf, cpu);
4276 	}
4277 
4278 	if (file->f_mode & FMODE_READ) {
4279 		iter = __tracing_open(inode, file, false);
4280 		if (IS_ERR(iter))
4281 			ret = PTR_ERR(iter);
4282 		else if (tr->trace_flags & TRACE_ITER_LATENCY_FMT)
4283 			iter->iter_flags |= TRACE_FILE_LAT_FMT;
4284 	}
4285 
4286 	if (ret < 0)
4287 		trace_array_put(tr);
4288 
4289 	return ret;
4290 }
4291 
4292 /*
4293  * Some tracers are not suitable for instance buffers.
4294  * A tracer is always available for the global array (toplevel)
4295  * or if it explicitly states that it is.
4296  */
4297 static bool
4298 trace_ok_for_array(struct tracer *t, struct trace_array *tr)
4299 {
4300 	return (tr->flags & TRACE_ARRAY_FL_GLOBAL) || t->allow_instances;
4301 }
4302 
4303 /* Find the next tracer that this trace array may use */
4304 static struct tracer *
4305 get_tracer_for_array(struct trace_array *tr, struct tracer *t)
4306 {
4307 	while (t && !trace_ok_for_array(t, tr))
4308 		t = t->next;
4309 
4310 	return t;
4311 }
4312 
4313 static void *
4314 t_next(struct seq_file *m, void *v, loff_t *pos)
4315 {
4316 	struct trace_array *tr = m->private;
4317 	struct tracer *t = v;
4318 
4319 	(*pos)++;
4320 
4321 	if (t)
4322 		t = get_tracer_for_array(tr, t->next);
4323 
4324 	return t;
4325 }
4326 
4327 static void *t_start(struct seq_file *m, loff_t *pos)
4328 {
4329 	struct trace_array *tr = m->private;
4330 	struct tracer *t;
4331 	loff_t l = 0;
4332 
4333 	mutex_lock(&trace_types_lock);
4334 
4335 	t = get_tracer_for_array(tr, trace_types);
4336 	for (; t && l < *pos; t = t_next(m, t, &l))
4337 			;
4338 
4339 	return t;
4340 }
4341 
4342 static void t_stop(struct seq_file *m, void *p)
4343 {
4344 	mutex_unlock(&trace_types_lock);
4345 }
4346 
4347 static int t_show(struct seq_file *m, void *v)
4348 {
4349 	struct tracer *t = v;
4350 
4351 	if (!t)
4352 		return 0;
4353 
4354 	seq_puts(m, t->name);
4355 	if (t->next)
4356 		seq_putc(m, ' ');
4357 	else
4358 		seq_putc(m, '\n');
4359 
4360 	return 0;
4361 }
4362 
4363 static const struct seq_operations show_traces_seq_ops = {
4364 	.start		= t_start,
4365 	.next		= t_next,
4366 	.stop		= t_stop,
4367 	.show		= t_show,
4368 };
4369 
4370 static int show_traces_open(struct inode *inode, struct file *file)
4371 {
4372 	struct trace_array *tr = inode->i_private;
4373 	struct seq_file *m;
4374 	int ret;
4375 
4376 	ret = tracing_check_open_get_tr(tr);
4377 	if (ret)
4378 		return ret;
4379 
4380 	ret = seq_open(file, &show_traces_seq_ops);
4381 	if (ret) {
4382 		trace_array_put(tr);
4383 		return ret;
4384 	}
4385 
4386 	m = file->private_data;
4387 	m->private = tr;
4388 
4389 	return 0;
4390 }
4391 
4392 static int show_traces_release(struct inode *inode, struct file *file)
4393 {
4394 	struct trace_array *tr = inode->i_private;
4395 
4396 	trace_array_put(tr);
4397 	return seq_release(inode, file);
4398 }
4399 
4400 static ssize_t
4401 tracing_write_stub(struct file *filp, const char __user *ubuf,
4402 		   size_t count, loff_t *ppos)
4403 {
4404 	return count;
4405 }
4406 
4407 loff_t tracing_lseek(struct file *file, loff_t offset, int whence)
4408 {
4409 	int ret;
4410 
4411 	if (file->f_mode & FMODE_READ)
4412 		ret = seq_lseek(file, offset, whence);
4413 	else
4414 		file->f_pos = ret = 0;
4415 
4416 	return ret;
4417 }
4418 
4419 static const struct file_operations tracing_fops = {
4420 	.open		= tracing_open,
4421 	.read		= seq_read,
4422 	.write		= tracing_write_stub,
4423 	.llseek		= tracing_lseek,
4424 	.release	= tracing_release,
4425 };
4426 
4427 static const struct file_operations show_traces_fops = {
4428 	.open		= show_traces_open,
4429 	.read		= seq_read,
4430 	.llseek		= seq_lseek,
4431 	.release	= show_traces_release,
4432 };
4433 
4434 static ssize_t
4435 tracing_cpumask_read(struct file *filp, char __user *ubuf,
4436 		     size_t count, loff_t *ppos)
4437 {
4438 	struct trace_array *tr = file_inode(filp)->i_private;
4439 	char *mask_str;
4440 	int len;
4441 
4442 	len = snprintf(NULL, 0, "%*pb\n",
4443 		       cpumask_pr_args(tr->tracing_cpumask)) + 1;
4444 	mask_str = kmalloc(len, GFP_KERNEL);
4445 	if (!mask_str)
4446 		return -ENOMEM;
4447 
4448 	len = snprintf(mask_str, len, "%*pb\n",
4449 		       cpumask_pr_args(tr->tracing_cpumask));
4450 	if (len >= count) {
4451 		count = -EINVAL;
4452 		goto out_err;
4453 	}
4454 	count = simple_read_from_buffer(ubuf, count, ppos, mask_str, len);
4455 
4456 out_err:
4457 	kfree(mask_str);
4458 
4459 	return count;
4460 }
4461 
4462 static ssize_t
4463 tracing_cpumask_write(struct file *filp, const char __user *ubuf,
4464 		      size_t count, loff_t *ppos)
4465 {
4466 	struct trace_array *tr = file_inode(filp)->i_private;
4467 	cpumask_var_t tracing_cpumask_new;
4468 	int err, cpu;
4469 
4470 	if (!alloc_cpumask_var(&tracing_cpumask_new, GFP_KERNEL))
4471 		return -ENOMEM;
4472 
4473 	err = cpumask_parse_user(ubuf, count, tracing_cpumask_new);
4474 	if (err)
4475 		goto err_unlock;
4476 
4477 	local_irq_disable();
4478 	arch_spin_lock(&tr->max_lock);
4479 	for_each_tracing_cpu(cpu) {
4480 		/*
4481 		 * Increase/decrease the disabled counter if we are
4482 		 * about to flip a bit in the cpumask:
4483 		 */
4484 		if (cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
4485 				!cpumask_test_cpu(cpu, tracing_cpumask_new)) {
4486 			atomic_inc(&per_cpu_ptr(tr->trace_buffer.data, cpu)->disabled);
4487 			ring_buffer_record_disable_cpu(tr->trace_buffer.buffer, cpu);
4488 		}
4489 		if (!cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
4490 				cpumask_test_cpu(cpu, tracing_cpumask_new)) {
4491 			atomic_dec(&per_cpu_ptr(tr->trace_buffer.data, cpu)->disabled);
4492 			ring_buffer_record_enable_cpu(tr->trace_buffer.buffer, cpu);
4493 		}
4494 	}
4495 	arch_spin_unlock(&tr->max_lock);
4496 	local_irq_enable();
4497 
4498 	cpumask_copy(tr->tracing_cpumask, tracing_cpumask_new);
4499 	free_cpumask_var(tracing_cpumask_new);
4500 
4501 	return count;
4502 
4503 err_unlock:
4504 	free_cpumask_var(tracing_cpumask_new);
4505 
4506 	return err;
4507 }
4508 
4509 static const struct file_operations tracing_cpumask_fops = {
4510 	.open		= tracing_open_generic_tr,
4511 	.read		= tracing_cpumask_read,
4512 	.write		= tracing_cpumask_write,
4513 	.release	= tracing_release_generic_tr,
4514 	.llseek		= generic_file_llseek,
4515 };
4516 
4517 static int tracing_trace_options_show(struct seq_file *m, void *v)
4518 {
4519 	struct tracer_opt *trace_opts;
4520 	struct trace_array *tr = m->private;
4521 	u32 tracer_flags;
4522 	int i;
4523 
4524 	mutex_lock(&trace_types_lock);
4525 	tracer_flags = tr->current_trace->flags->val;
4526 	trace_opts = tr->current_trace->flags->opts;
4527 
4528 	for (i = 0; trace_options[i]; i++) {
4529 		if (tr->trace_flags & (1 << i))
4530 			seq_printf(m, "%s\n", trace_options[i]);
4531 		else
4532 			seq_printf(m, "no%s\n", trace_options[i]);
4533 	}
4534 
4535 	for (i = 0; trace_opts[i].name; i++) {
4536 		if (tracer_flags & trace_opts[i].bit)
4537 			seq_printf(m, "%s\n", trace_opts[i].name);
4538 		else
4539 			seq_printf(m, "no%s\n", trace_opts[i].name);
4540 	}
4541 	mutex_unlock(&trace_types_lock);
4542 
4543 	return 0;
4544 }
4545 
4546 static int __set_tracer_option(struct trace_array *tr,
4547 			       struct tracer_flags *tracer_flags,
4548 			       struct tracer_opt *opts, int neg)
4549 {
4550 	struct tracer *trace = tracer_flags->trace;
4551 	int ret;
4552 
4553 	ret = trace->set_flag(tr, tracer_flags->val, opts->bit, !neg);
4554 	if (ret)
4555 		return ret;
4556 
4557 	if (neg)
4558 		tracer_flags->val &= ~opts->bit;
4559 	else
4560 		tracer_flags->val |= opts->bit;
4561 	return 0;
4562 }
4563 
4564 /* Try to assign a tracer specific option */
4565 static int set_tracer_option(struct trace_array *tr, char *cmp, int neg)
4566 {
4567 	struct tracer *trace = tr->current_trace;
4568 	struct tracer_flags *tracer_flags = trace->flags;
4569 	struct tracer_opt *opts = NULL;
4570 	int i;
4571 
4572 	for (i = 0; tracer_flags->opts[i].name; i++) {
4573 		opts = &tracer_flags->opts[i];
4574 
4575 		if (strcmp(cmp, opts->name) == 0)
4576 			return __set_tracer_option(tr, trace->flags, opts, neg);
4577 	}
4578 
4579 	return -EINVAL;
4580 }
4581 
4582 /* Some tracers require overwrite to stay enabled */
4583 int trace_keep_overwrite(struct tracer *tracer, u32 mask, int set)
4584 {
4585 	if (tracer->enabled && (mask & TRACE_ITER_OVERWRITE) && !set)
4586 		return -1;
4587 
4588 	return 0;
4589 }
4590 
4591 int set_tracer_flag(struct trace_array *tr, unsigned int mask, int enabled)
4592 {
4593 	/* do nothing if flag is already set */
4594 	if (!!(tr->trace_flags & mask) == !!enabled)
4595 		return 0;
4596 
4597 	/* Give the tracer a chance to approve the change */
4598 	if (tr->current_trace->flag_changed)
4599 		if (tr->current_trace->flag_changed(tr, mask, !!enabled))
4600 			return -EINVAL;
4601 
4602 	if (enabled)
4603 		tr->trace_flags |= mask;
4604 	else
4605 		tr->trace_flags &= ~mask;
4606 
4607 	if (mask == TRACE_ITER_RECORD_CMD)
4608 		trace_event_enable_cmd_record(enabled);
4609 
4610 	if (mask == TRACE_ITER_RECORD_TGID) {
4611 		if (!tgid_map)
4612 			tgid_map = kcalloc(PID_MAX_DEFAULT + 1,
4613 					   sizeof(*tgid_map),
4614 					   GFP_KERNEL);
4615 		if (!tgid_map) {
4616 			tr->trace_flags &= ~TRACE_ITER_RECORD_TGID;
4617 			return -ENOMEM;
4618 		}
4619 
4620 		trace_event_enable_tgid_record(enabled);
4621 	}
4622 
4623 	if (mask == TRACE_ITER_EVENT_FORK)
4624 		trace_event_follow_fork(tr, enabled);
4625 
4626 	if (mask == TRACE_ITER_FUNC_FORK)
4627 		ftrace_pid_follow_fork(tr, enabled);
4628 
4629 	if (mask == TRACE_ITER_OVERWRITE) {
4630 		ring_buffer_change_overwrite(tr->trace_buffer.buffer, enabled);
4631 #ifdef CONFIG_TRACER_MAX_TRACE
4632 		ring_buffer_change_overwrite(tr->max_buffer.buffer, enabled);
4633 #endif
4634 	}
4635 
4636 	if (mask == TRACE_ITER_PRINTK) {
4637 		trace_printk_start_stop_comm(enabled);
4638 		trace_printk_control(enabled);
4639 	}
4640 
4641 	return 0;
4642 }
4643 
4644 static int trace_set_options(struct trace_array *tr, char *option)
4645 {
4646 	char *cmp;
4647 	int neg = 0;
4648 	int ret;
4649 	size_t orig_len = strlen(option);
4650 	int len;
4651 
4652 	cmp = strstrip(option);
4653 
4654 	len = str_has_prefix(cmp, "no");
4655 	if (len)
4656 		neg = 1;
4657 
4658 	cmp += len;
4659 
4660 	mutex_lock(&trace_types_lock);
4661 
4662 	ret = match_string(trace_options, -1, cmp);
4663 	/* If no option could be set, test the specific tracer options */
4664 	if (ret < 0)
4665 		ret = set_tracer_option(tr, cmp, neg);
4666 	else
4667 		ret = set_tracer_flag(tr, 1 << ret, !neg);
4668 
4669 	mutex_unlock(&trace_types_lock);
4670 
4671 	/*
4672 	 * If the first trailing whitespace is replaced with '\0' by strstrip,
4673 	 * turn it back into a space.
4674 	 */
4675 	if (orig_len > strlen(option))
4676 		option[strlen(option)] = ' ';
4677 
4678 	return ret;
4679 }
4680 
4681 static void __init apply_trace_boot_options(void)
4682 {
4683 	char *buf = trace_boot_options_buf;
4684 	char *option;
4685 
4686 	while (true) {
4687 		option = strsep(&buf, ",");
4688 
4689 		if (!option)
4690 			break;
4691 
4692 		if (*option)
4693 			trace_set_options(&global_trace, option);
4694 
4695 		/* Put back the comma to allow this to be called again */
4696 		if (buf)
4697 			*(buf - 1) = ',';
4698 	}
4699 }
4700 
4701 static ssize_t
4702 tracing_trace_options_write(struct file *filp, const char __user *ubuf,
4703 			size_t cnt, loff_t *ppos)
4704 {
4705 	struct seq_file *m = filp->private_data;
4706 	struct trace_array *tr = m->private;
4707 	char buf[64];
4708 	int ret;
4709 
4710 	if (cnt >= sizeof(buf))
4711 		return -EINVAL;
4712 
4713 	if (copy_from_user(buf, ubuf, cnt))
4714 		return -EFAULT;
4715 
4716 	buf[cnt] = 0;
4717 
4718 	ret = trace_set_options(tr, buf);
4719 	if (ret < 0)
4720 		return ret;
4721 
4722 	*ppos += cnt;
4723 
4724 	return cnt;
4725 }
4726 
4727 static int tracing_trace_options_open(struct inode *inode, struct file *file)
4728 {
4729 	struct trace_array *tr = inode->i_private;
4730 	int ret;
4731 
4732 	ret = tracing_check_open_get_tr(tr);
4733 	if (ret)
4734 		return ret;
4735 
4736 	ret = single_open(file, tracing_trace_options_show, inode->i_private);
4737 	if (ret < 0)
4738 		trace_array_put(tr);
4739 
4740 	return ret;
4741 }
4742 
4743 static const struct file_operations tracing_iter_fops = {
4744 	.open		= tracing_trace_options_open,
4745 	.read		= seq_read,
4746 	.llseek		= seq_lseek,
4747 	.release	= tracing_single_release_tr,
4748 	.write		= tracing_trace_options_write,
4749 };
4750 
4751 static const char readme_msg[] =
4752 	"tracing mini-HOWTO:\n\n"
4753 	"# echo 0 > tracing_on : quick way to disable tracing\n"
4754 	"# echo 1 > tracing_on : quick way to re-enable tracing\n\n"
4755 	" Important files:\n"
4756 	"  trace\t\t\t- The static contents of the buffer\n"
4757 	"\t\t\t  To clear the buffer write into this file: echo > trace\n"
4758 	"  trace_pipe\t\t- A consuming read to see the contents of the buffer\n"
4759 	"  current_tracer\t- function and latency tracers\n"
4760 	"  available_tracers\t- list of configured tracers for current_tracer\n"
4761 	"  error_log\t- error log for failed commands (that support it)\n"
4762 	"  buffer_size_kb\t- view and modify size of per cpu buffer\n"
4763 	"  buffer_total_size_kb  - view total size of all cpu buffers\n\n"
4764 	"  trace_clock\t\t-change the clock used to order events\n"
4765 	"       local:   Per cpu clock but may not be synced across CPUs\n"
4766 	"      global:   Synced across CPUs but slows tracing down.\n"
4767 	"     counter:   Not a clock, but just an increment\n"
4768 	"      uptime:   Jiffy counter from time of boot\n"
4769 	"        perf:   Same clock that perf events use\n"
4770 #ifdef CONFIG_X86_64
4771 	"     x86-tsc:   TSC cycle counter\n"
4772 #endif
4773 	"\n  timestamp_mode\t-view the mode used to timestamp events\n"
4774 	"       delta:   Delta difference against a buffer-wide timestamp\n"
4775 	"    absolute:   Absolute (standalone) timestamp\n"
4776 	"\n  trace_marker\t\t- Writes into this file writes into the kernel buffer\n"
4777 	"\n  trace_marker_raw\t\t- Writes into this file writes binary data into the kernel buffer\n"
4778 	"  tracing_cpumask\t- Limit which CPUs to trace\n"
4779 	"  instances\t\t- Make sub-buffers with: mkdir instances/foo\n"
4780 	"\t\t\t  Remove sub-buffer with rmdir\n"
4781 	"  trace_options\t\t- Set format or modify how tracing happens\n"
4782 	"\t\t\t  Disable an option by prefixing 'no' to the\n"
4783 	"\t\t\t  option name\n"
4784 	"  saved_cmdlines_size\t- echo command number in here to store comm-pid list\n"
4785 #ifdef CONFIG_DYNAMIC_FTRACE
4786 	"\n  available_filter_functions - list of functions that can be filtered on\n"
4787 	"  set_ftrace_filter\t- echo function name in here to only trace these\n"
4788 	"\t\t\t  functions\n"
4789 	"\t     accepts: func_full_name or glob-matching-pattern\n"
4790 	"\t     modules: Can select a group via module\n"
4791 	"\t      Format: :mod:<module-name>\n"
4792 	"\t     example: echo :mod:ext3 > set_ftrace_filter\n"
4793 	"\t    triggers: a command to perform when function is hit\n"
4794 	"\t      Format: <function>:<trigger>[:count]\n"
4795 	"\t     trigger: traceon, traceoff\n"
4796 	"\t\t      enable_event:<system>:<event>\n"
4797 	"\t\t      disable_event:<system>:<event>\n"
4798 #ifdef CONFIG_STACKTRACE
4799 	"\t\t      stacktrace\n"
4800 #endif
4801 #ifdef CONFIG_TRACER_SNAPSHOT
4802 	"\t\t      snapshot\n"
4803 #endif
4804 	"\t\t      dump\n"
4805 	"\t\t      cpudump\n"
4806 	"\t     example: echo do_fault:traceoff > set_ftrace_filter\n"
4807 	"\t              echo do_trap:traceoff:3 > set_ftrace_filter\n"
4808 	"\t     The first one will disable tracing every time do_fault is hit\n"
4809 	"\t     The second will disable tracing at most 3 times when do_trap is hit\n"
4810 	"\t       The first time do trap is hit and it disables tracing, the\n"
4811 	"\t       counter will decrement to 2. If tracing is already disabled,\n"
4812 	"\t       the counter will not decrement. It only decrements when the\n"
4813 	"\t       trigger did work\n"
4814 	"\t     To remove trigger without count:\n"
4815 	"\t       echo '!<function>:<trigger> > set_ftrace_filter\n"
4816 	"\t     To remove trigger with a count:\n"
4817 	"\t       echo '!<function>:<trigger>:0 > set_ftrace_filter\n"
4818 	"  set_ftrace_notrace\t- echo function name in here to never trace.\n"
4819 	"\t    accepts: func_full_name, *func_end, func_begin*, *func_middle*\n"
4820 	"\t    modules: Can select a group via module command :mod:\n"
4821 	"\t    Does not accept triggers\n"
4822 #endif /* CONFIG_DYNAMIC_FTRACE */
4823 #ifdef CONFIG_FUNCTION_TRACER
4824 	"  set_ftrace_pid\t- Write pid(s) to only function trace those pids\n"
4825 	"\t\t    (function)\n"
4826 #endif
4827 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
4828 	"  set_graph_function\t- Trace the nested calls of a function (function_graph)\n"
4829 	"  set_graph_notrace\t- Do not trace the nested calls of a function (function_graph)\n"
4830 	"  max_graph_depth\t- Trace a limited depth of nested calls (0 is unlimited)\n"
4831 #endif
4832 #ifdef CONFIG_TRACER_SNAPSHOT
4833 	"\n  snapshot\t\t- Like 'trace' but shows the content of the static\n"
4834 	"\t\t\t  snapshot buffer. Read the contents for more\n"
4835 	"\t\t\t  information\n"
4836 #endif
4837 #ifdef CONFIG_STACK_TRACER
4838 	"  stack_trace\t\t- Shows the max stack trace when active\n"
4839 	"  stack_max_size\t- Shows current max stack size that was traced\n"
4840 	"\t\t\t  Write into this file to reset the max size (trigger a\n"
4841 	"\t\t\t  new trace)\n"
4842 #ifdef CONFIG_DYNAMIC_FTRACE
4843 	"  stack_trace_filter\t- Like set_ftrace_filter but limits what stack_trace\n"
4844 	"\t\t\t  traces\n"
4845 #endif
4846 #endif /* CONFIG_STACK_TRACER */
4847 #ifdef CONFIG_DYNAMIC_EVENTS
4848 	"  dynamic_events\t\t- Create/append/remove/show the generic dynamic events\n"
4849 	"\t\t\t  Write into this file to define/undefine new trace events.\n"
4850 #endif
4851 #ifdef CONFIG_KPROBE_EVENTS
4852 	"  kprobe_events\t\t- Create/append/remove/show the kernel dynamic events\n"
4853 	"\t\t\t  Write into this file to define/undefine new trace events.\n"
4854 #endif
4855 #ifdef CONFIG_UPROBE_EVENTS
4856 	"  uprobe_events\t\t- Create/append/remove/show the userspace dynamic events\n"
4857 	"\t\t\t  Write into this file to define/undefine new trace events.\n"
4858 #endif
4859 #if defined(CONFIG_KPROBE_EVENTS) || defined(CONFIG_UPROBE_EVENTS)
4860 	"\t  accepts: event-definitions (one definition per line)\n"
4861 	"\t   Format: p[:[<group>/]<event>] <place> [<args>]\n"
4862 	"\t           r[maxactive][:[<group>/]<event>] <place> [<args>]\n"
4863 #ifdef CONFIG_HIST_TRIGGERS
4864 	"\t           s:[synthetic/]<event> <field> [<field>]\n"
4865 #endif
4866 	"\t           -:[<group>/]<event>\n"
4867 #ifdef CONFIG_KPROBE_EVENTS
4868 	"\t    place: [<module>:]<symbol>[+<offset>]|<memaddr>\n"
4869   "place (kretprobe): [<module>:]<symbol>[+<offset>]|<memaddr>\n"
4870 #endif
4871 #ifdef CONFIG_UPROBE_EVENTS
4872   "   place (uprobe): <path>:<offset>[(ref_ctr_offset)]\n"
4873 #endif
4874 	"\t     args: <name>=fetcharg[:type]\n"
4875 	"\t fetcharg: %<register>, @<address>, @<symbol>[+|-<offset>],\n"
4876 #ifdef CONFIG_HAVE_FUNCTION_ARG_ACCESS_API
4877 	"\t           $stack<index>, $stack, $retval, $comm, $arg<N>,\n"
4878 #else
4879 	"\t           $stack<index>, $stack, $retval, $comm,\n"
4880 #endif
4881 	"\t           +|-[u]<offset>(<fetcharg>), \\imm-value, \\\"imm-string\"\n"
4882 	"\t     type: s8/16/32/64, u8/16/32/64, x8/16/32/64, string, symbol,\n"
4883 	"\t           b<bit-width>@<bit-offset>/<container-size>, ustring,\n"
4884 	"\t           <type>\\[<array-size>\\]\n"
4885 #ifdef CONFIG_HIST_TRIGGERS
4886 	"\t    field: <stype> <name>;\n"
4887 	"\t    stype: u8/u16/u32/u64, s8/s16/s32/s64, pid_t,\n"
4888 	"\t           [unsigned] char/int/long\n"
4889 #endif
4890 #endif
4891 	"  events/\t\t- Directory containing all trace event subsystems:\n"
4892 	"      enable\t\t- Write 0/1 to enable/disable tracing of all events\n"
4893 	"  events/<system>/\t- Directory containing all trace events for <system>:\n"
4894 	"      enable\t\t- Write 0/1 to enable/disable tracing of all <system>\n"
4895 	"\t\t\t  events\n"
4896 	"      filter\t\t- If set, only events passing filter are traced\n"
4897 	"  events/<system>/<event>/\t- Directory containing control files for\n"
4898 	"\t\t\t  <event>:\n"
4899 	"      enable\t\t- Write 0/1 to enable/disable tracing of <event>\n"
4900 	"      filter\t\t- If set, only events passing filter are traced\n"
4901 	"      trigger\t\t- If set, a command to perform when event is hit\n"
4902 	"\t    Format: <trigger>[:count][if <filter>]\n"
4903 	"\t   trigger: traceon, traceoff\n"
4904 	"\t            enable_event:<system>:<event>\n"
4905 	"\t            disable_event:<system>:<event>\n"
4906 #ifdef CONFIG_HIST_TRIGGERS
4907 	"\t            enable_hist:<system>:<event>\n"
4908 	"\t            disable_hist:<system>:<event>\n"
4909 #endif
4910 #ifdef CONFIG_STACKTRACE
4911 	"\t\t    stacktrace\n"
4912 #endif
4913 #ifdef CONFIG_TRACER_SNAPSHOT
4914 	"\t\t    snapshot\n"
4915 #endif
4916 #ifdef CONFIG_HIST_TRIGGERS
4917 	"\t\t    hist (see below)\n"
4918 #endif
4919 	"\t   example: echo traceoff > events/block/block_unplug/trigger\n"
4920 	"\t            echo traceoff:3 > events/block/block_unplug/trigger\n"
4921 	"\t            echo 'enable_event:kmem:kmalloc:3 if nr_rq > 1' > \\\n"
4922 	"\t                  events/block/block_unplug/trigger\n"
4923 	"\t   The first disables tracing every time block_unplug is hit.\n"
4924 	"\t   The second disables tracing the first 3 times block_unplug is hit.\n"
4925 	"\t   The third enables the kmalloc event the first 3 times block_unplug\n"
4926 	"\t     is hit and has value of greater than 1 for the 'nr_rq' event field.\n"
4927 	"\t   Like function triggers, the counter is only decremented if it\n"
4928 	"\t    enabled or disabled tracing.\n"
4929 	"\t   To remove a trigger without a count:\n"
4930 	"\t     echo '!<trigger> > <system>/<event>/trigger\n"
4931 	"\t   To remove a trigger with a count:\n"
4932 	"\t     echo '!<trigger>:0 > <system>/<event>/trigger\n"
4933 	"\t   Filters can be ignored when removing a trigger.\n"
4934 #ifdef CONFIG_HIST_TRIGGERS
4935 	"      hist trigger\t- If set, event hits are aggregated into a hash table\n"
4936 	"\t    Format: hist:keys=<field1[,field2,...]>\n"
4937 	"\t            [:values=<field1[,field2,...]>]\n"
4938 	"\t            [:sort=<field1[,field2,...]>]\n"
4939 	"\t            [:size=#entries]\n"
4940 	"\t            [:pause][:continue][:clear]\n"
4941 	"\t            [:name=histname1]\n"
4942 	"\t            [:<handler>.<action>]\n"
4943 	"\t            [if <filter>]\n\n"
4944 	"\t    When a matching event is hit, an entry is added to a hash\n"
4945 	"\t    table using the key(s) and value(s) named, and the value of a\n"
4946 	"\t    sum called 'hitcount' is incremented.  Keys and values\n"
4947 	"\t    correspond to fields in the event's format description.  Keys\n"
4948 	"\t    can be any field, or the special string 'stacktrace'.\n"
4949 	"\t    Compound keys consisting of up to two fields can be specified\n"
4950 	"\t    by the 'keys' keyword.  Values must correspond to numeric\n"
4951 	"\t    fields.  Sort keys consisting of up to two fields can be\n"
4952 	"\t    specified using the 'sort' keyword.  The sort direction can\n"
4953 	"\t    be modified by appending '.descending' or '.ascending' to a\n"
4954 	"\t    sort field.  The 'size' parameter can be used to specify more\n"
4955 	"\t    or fewer than the default 2048 entries for the hashtable size.\n"
4956 	"\t    If a hist trigger is given a name using the 'name' parameter,\n"
4957 	"\t    its histogram data will be shared with other triggers of the\n"
4958 	"\t    same name, and trigger hits will update this common data.\n\n"
4959 	"\t    Reading the 'hist' file for the event will dump the hash\n"
4960 	"\t    table in its entirety to stdout.  If there are multiple hist\n"
4961 	"\t    triggers attached to an event, there will be a table for each\n"
4962 	"\t    trigger in the output.  The table displayed for a named\n"
4963 	"\t    trigger will be the same as any other instance having the\n"
4964 	"\t    same name.  The default format used to display a given field\n"
4965 	"\t    can be modified by appending any of the following modifiers\n"
4966 	"\t    to the field name, as applicable:\n\n"
4967 	"\t            .hex        display a number as a hex value\n"
4968 	"\t            .sym        display an address as a symbol\n"
4969 	"\t            .sym-offset display an address as a symbol and offset\n"
4970 	"\t            .execname   display a common_pid as a program name\n"
4971 	"\t            .syscall    display a syscall id as a syscall name\n"
4972 	"\t            .log2       display log2 value rather than raw number\n"
4973 	"\t            .usecs      display a common_timestamp in microseconds\n\n"
4974 	"\t    The 'pause' parameter can be used to pause an existing hist\n"
4975 	"\t    trigger or to start a hist trigger but not log any events\n"
4976 	"\t    until told to do so.  'continue' can be used to start or\n"
4977 	"\t    restart a paused hist trigger.\n\n"
4978 	"\t    The 'clear' parameter will clear the contents of a running\n"
4979 	"\t    hist trigger and leave its current paused/active state\n"
4980 	"\t    unchanged.\n\n"
4981 	"\t    The enable_hist and disable_hist triggers can be used to\n"
4982 	"\t    have one event conditionally start and stop another event's\n"
4983 	"\t    already-attached hist trigger.  The syntax is analogous to\n"
4984 	"\t    the enable_event and disable_event triggers.\n\n"
4985 	"\t    Hist trigger handlers and actions are executed whenever a\n"
4986 	"\t    a histogram entry is added or updated.  They take the form:\n\n"
4987 	"\t        <handler>.<action>\n\n"
4988 	"\t    The available handlers are:\n\n"
4989 	"\t        onmatch(matching.event)  - invoke on addition or update\n"
4990 	"\t        onmax(var)               - invoke if var exceeds current max\n"
4991 	"\t        onchange(var)            - invoke action if var changes\n\n"
4992 	"\t    The available actions are:\n\n"
4993 	"\t        trace(<synthetic_event>,param list)  - generate synthetic event\n"
4994 	"\t        save(field,...)                      - save current event fields\n"
4995 #ifdef CONFIG_TRACER_SNAPSHOT
4996 	"\t        snapshot()                           - snapshot the trace buffer\n"
4997 #endif
4998 #endif
4999 ;
5000 
5001 static ssize_t
5002 tracing_readme_read(struct file *filp, char __user *ubuf,
5003 		       size_t cnt, loff_t *ppos)
5004 {
5005 	return simple_read_from_buffer(ubuf, cnt, ppos,
5006 					readme_msg, strlen(readme_msg));
5007 }
5008 
5009 static const struct file_operations tracing_readme_fops = {
5010 	.open		= tracing_open_generic,
5011 	.read		= tracing_readme_read,
5012 	.llseek		= generic_file_llseek,
5013 };
5014 
5015 static void *saved_tgids_next(struct seq_file *m, void *v, loff_t *pos)
5016 {
5017 	int *ptr = v;
5018 
5019 	if (*pos || m->count)
5020 		ptr++;
5021 
5022 	(*pos)++;
5023 
5024 	for (; ptr <= &tgid_map[PID_MAX_DEFAULT]; ptr++) {
5025 		if (trace_find_tgid(*ptr))
5026 			return ptr;
5027 	}
5028 
5029 	return NULL;
5030 }
5031 
5032 static void *saved_tgids_start(struct seq_file *m, loff_t *pos)
5033 {
5034 	void *v;
5035 	loff_t l = 0;
5036 
5037 	if (!tgid_map)
5038 		return NULL;
5039 
5040 	v = &tgid_map[0];
5041 	while (l <= *pos) {
5042 		v = saved_tgids_next(m, v, &l);
5043 		if (!v)
5044 			return NULL;
5045 	}
5046 
5047 	return v;
5048 }
5049 
5050 static void saved_tgids_stop(struct seq_file *m, void *v)
5051 {
5052 }
5053 
5054 static int saved_tgids_show(struct seq_file *m, void *v)
5055 {
5056 	int pid = (int *)v - tgid_map;
5057 
5058 	seq_printf(m, "%d %d\n", pid, trace_find_tgid(pid));
5059 	return 0;
5060 }
5061 
5062 static const struct seq_operations tracing_saved_tgids_seq_ops = {
5063 	.start		= saved_tgids_start,
5064 	.stop		= saved_tgids_stop,
5065 	.next		= saved_tgids_next,
5066 	.show		= saved_tgids_show,
5067 };
5068 
5069 static int tracing_saved_tgids_open(struct inode *inode, struct file *filp)
5070 {
5071 	int ret;
5072 
5073 	ret = tracing_check_open_get_tr(NULL);
5074 	if (ret)
5075 		return ret;
5076 
5077 	return seq_open(filp, &tracing_saved_tgids_seq_ops);
5078 }
5079 
5080 
5081 static const struct file_operations tracing_saved_tgids_fops = {
5082 	.open		= tracing_saved_tgids_open,
5083 	.read		= seq_read,
5084 	.llseek		= seq_lseek,
5085 	.release	= seq_release,
5086 };
5087 
5088 static void *saved_cmdlines_next(struct seq_file *m, void *v, loff_t *pos)
5089 {
5090 	unsigned int *ptr = v;
5091 
5092 	if (*pos || m->count)
5093 		ptr++;
5094 
5095 	(*pos)++;
5096 
5097 	for (; ptr < &savedcmd->map_cmdline_to_pid[savedcmd->cmdline_num];
5098 	     ptr++) {
5099 		if (*ptr == -1 || *ptr == NO_CMDLINE_MAP)
5100 			continue;
5101 
5102 		return ptr;
5103 	}
5104 
5105 	return NULL;
5106 }
5107 
5108 static void *saved_cmdlines_start(struct seq_file *m, loff_t *pos)
5109 {
5110 	void *v;
5111 	loff_t l = 0;
5112 
5113 	preempt_disable();
5114 	arch_spin_lock(&trace_cmdline_lock);
5115 
5116 	v = &savedcmd->map_cmdline_to_pid[0];
5117 	while (l <= *pos) {
5118 		v = saved_cmdlines_next(m, v, &l);
5119 		if (!v)
5120 			return NULL;
5121 	}
5122 
5123 	return v;
5124 }
5125 
5126 static void saved_cmdlines_stop(struct seq_file *m, void *v)
5127 {
5128 	arch_spin_unlock(&trace_cmdline_lock);
5129 	preempt_enable();
5130 }
5131 
5132 static int saved_cmdlines_show(struct seq_file *m, void *v)
5133 {
5134 	char buf[TASK_COMM_LEN];
5135 	unsigned int *pid = v;
5136 
5137 	__trace_find_cmdline(*pid, buf);
5138 	seq_printf(m, "%d %s\n", *pid, buf);
5139 	return 0;
5140 }
5141 
5142 static const struct seq_operations tracing_saved_cmdlines_seq_ops = {
5143 	.start		= saved_cmdlines_start,
5144 	.next		= saved_cmdlines_next,
5145 	.stop		= saved_cmdlines_stop,
5146 	.show		= saved_cmdlines_show,
5147 };
5148 
5149 static int tracing_saved_cmdlines_open(struct inode *inode, struct file *filp)
5150 {
5151 	int ret;
5152 
5153 	ret = tracing_check_open_get_tr(NULL);
5154 	if (ret)
5155 		return ret;
5156 
5157 	return seq_open(filp, &tracing_saved_cmdlines_seq_ops);
5158 }
5159 
5160 static const struct file_operations tracing_saved_cmdlines_fops = {
5161 	.open		= tracing_saved_cmdlines_open,
5162 	.read		= seq_read,
5163 	.llseek		= seq_lseek,
5164 	.release	= seq_release,
5165 };
5166 
5167 static ssize_t
5168 tracing_saved_cmdlines_size_read(struct file *filp, char __user *ubuf,
5169 				 size_t cnt, loff_t *ppos)
5170 {
5171 	char buf[64];
5172 	int r;
5173 
5174 	arch_spin_lock(&trace_cmdline_lock);
5175 	r = scnprintf(buf, sizeof(buf), "%u\n", savedcmd->cmdline_num);
5176 	arch_spin_unlock(&trace_cmdline_lock);
5177 
5178 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
5179 }
5180 
5181 static void free_saved_cmdlines_buffer(struct saved_cmdlines_buffer *s)
5182 {
5183 	kfree(s->saved_cmdlines);
5184 	kfree(s->map_cmdline_to_pid);
5185 	kfree(s);
5186 }
5187 
5188 static int tracing_resize_saved_cmdlines(unsigned int val)
5189 {
5190 	struct saved_cmdlines_buffer *s, *savedcmd_temp;
5191 
5192 	s = kmalloc(sizeof(*s), GFP_KERNEL);
5193 	if (!s)
5194 		return -ENOMEM;
5195 
5196 	if (allocate_cmdlines_buffer(val, s) < 0) {
5197 		kfree(s);
5198 		return -ENOMEM;
5199 	}
5200 
5201 	arch_spin_lock(&trace_cmdline_lock);
5202 	savedcmd_temp = savedcmd;
5203 	savedcmd = s;
5204 	arch_spin_unlock(&trace_cmdline_lock);
5205 	free_saved_cmdlines_buffer(savedcmd_temp);
5206 
5207 	return 0;
5208 }
5209 
5210 static ssize_t
5211 tracing_saved_cmdlines_size_write(struct file *filp, const char __user *ubuf,
5212 				  size_t cnt, loff_t *ppos)
5213 {
5214 	unsigned long val;
5215 	int ret;
5216 
5217 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
5218 	if (ret)
5219 		return ret;
5220 
5221 	/* must have at least 1 entry or less than PID_MAX_DEFAULT */
5222 	if (!val || val > PID_MAX_DEFAULT)
5223 		return -EINVAL;
5224 
5225 	ret = tracing_resize_saved_cmdlines((unsigned int)val);
5226 	if (ret < 0)
5227 		return ret;
5228 
5229 	*ppos += cnt;
5230 
5231 	return cnt;
5232 }
5233 
5234 static const struct file_operations tracing_saved_cmdlines_size_fops = {
5235 	.open		= tracing_open_generic,
5236 	.read		= tracing_saved_cmdlines_size_read,
5237 	.write		= tracing_saved_cmdlines_size_write,
5238 };
5239 
5240 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
5241 static union trace_eval_map_item *
5242 update_eval_map(union trace_eval_map_item *ptr)
5243 {
5244 	if (!ptr->map.eval_string) {
5245 		if (ptr->tail.next) {
5246 			ptr = ptr->tail.next;
5247 			/* Set ptr to the next real item (skip head) */
5248 			ptr++;
5249 		} else
5250 			return NULL;
5251 	}
5252 	return ptr;
5253 }
5254 
5255 static void *eval_map_next(struct seq_file *m, void *v, loff_t *pos)
5256 {
5257 	union trace_eval_map_item *ptr = v;
5258 
5259 	/*
5260 	 * Paranoid! If ptr points to end, we don't want to increment past it.
5261 	 * This really should never happen.
5262 	 */
5263 	ptr = update_eval_map(ptr);
5264 	if (WARN_ON_ONCE(!ptr))
5265 		return NULL;
5266 
5267 	ptr++;
5268 
5269 	(*pos)++;
5270 
5271 	ptr = update_eval_map(ptr);
5272 
5273 	return ptr;
5274 }
5275 
5276 static void *eval_map_start(struct seq_file *m, loff_t *pos)
5277 {
5278 	union trace_eval_map_item *v;
5279 	loff_t l = 0;
5280 
5281 	mutex_lock(&trace_eval_mutex);
5282 
5283 	v = trace_eval_maps;
5284 	if (v)
5285 		v++;
5286 
5287 	while (v && l < *pos) {
5288 		v = eval_map_next(m, v, &l);
5289 	}
5290 
5291 	return v;
5292 }
5293 
5294 static void eval_map_stop(struct seq_file *m, void *v)
5295 {
5296 	mutex_unlock(&trace_eval_mutex);
5297 }
5298 
5299 static int eval_map_show(struct seq_file *m, void *v)
5300 {
5301 	union trace_eval_map_item *ptr = v;
5302 
5303 	seq_printf(m, "%s %ld (%s)\n",
5304 		   ptr->map.eval_string, ptr->map.eval_value,
5305 		   ptr->map.system);
5306 
5307 	return 0;
5308 }
5309 
5310 static const struct seq_operations tracing_eval_map_seq_ops = {
5311 	.start		= eval_map_start,
5312 	.next		= eval_map_next,
5313 	.stop		= eval_map_stop,
5314 	.show		= eval_map_show,
5315 };
5316 
5317 static int tracing_eval_map_open(struct inode *inode, struct file *filp)
5318 {
5319 	int ret;
5320 
5321 	ret = tracing_check_open_get_tr(NULL);
5322 	if (ret)
5323 		return ret;
5324 
5325 	return seq_open(filp, &tracing_eval_map_seq_ops);
5326 }
5327 
5328 static const struct file_operations tracing_eval_map_fops = {
5329 	.open		= tracing_eval_map_open,
5330 	.read		= seq_read,
5331 	.llseek		= seq_lseek,
5332 	.release	= seq_release,
5333 };
5334 
5335 static inline union trace_eval_map_item *
5336 trace_eval_jmp_to_tail(union trace_eval_map_item *ptr)
5337 {
5338 	/* Return tail of array given the head */
5339 	return ptr + ptr->head.length + 1;
5340 }
5341 
5342 static void
5343 trace_insert_eval_map_file(struct module *mod, struct trace_eval_map **start,
5344 			   int len)
5345 {
5346 	struct trace_eval_map **stop;
5347 	struct trace_eval_map **map;
5348 	union trace_eval_map_item *map_array;
5349 	union trace_eval_map_item *ptr;
5350 
5351 	stop = start + len;
5352 
5353 	/*
5354 	 * The trace_eval_maps contains the map plus a head and tail item,
5355 	 * where the head holds the module and length of array, and the
5356 	 * tail holds a pointer to the next list.
5357 	 */
5358 	map_array = kmalloc_array(len + 2, sizeof(*map_array), GFP_KERNEL);
5359 	if (!map_array) {
5360 		pr_warn("Unable to allocate trace eval mapping\n");
5361 		return;
5362 	}
5363 
5364 	mutex_lock(&trace_eval_mutex);
5365 
5366 	if (!trace_eval_maps)
5367 		trace_eval_maps = map_array;
5368 	else {
5369 		ptr = trace_eval_maps;
5370 		for (;;) {
5371 			ptr = trace_eval_jmp_to_tail(ptr);
5372 			if (!ptr->tail.next)
5373 				break;
5374 			ptr = ptr->tail.next;
5375 
5376 		}
5377 		ptr->tail.next = map_array;
5378 	}
5379 	map_array->head.mod = mod;
5380 	map_array->head.length = len;
5381 	map_array++;
5382 
5383 	for (map = start; (unsigned long)map < (unsigned long)stop; map++) {
5384 		map_array->map = **map;
5385 		map_array++;
5386 	}
5387 	memset(map_array, 0, sizeof(*map_array));
5388 
5389 	mutex_unlock(&trace_eval_mutex);
5390 }
5391 
5392 static void trace_create_eval_file(struct dentry *d_tracer)
5393 {
5394 	trace_create_file("eval_map", 0444, d_tracer,
5395 			  NULL, &tracing_eval_map_fops);
5396 }
5397 
5398 #else /* CONFIG_TRACE_EVAL_MAP_FILE */
5399 static inline void trace_create_eval_file(struct dentry *d_tracer) { }
5400 static inline void trace_insert_eval_map_file(struct module *mod,
5401 			      struct trace_eval_map **start, int len) { }
5402 #endif /* !CONFIG_TRACE_EVAL_MAP_FILE */
5403 
5404 static void trace_insert_eval_map(struct module *mod,
5405 				  struct trace_eval_map **start, int len)
5406 {
5407 	struct trace_eval_map **map;
5408 
5409 	if (len <= 0)
5410 		return;
5411 
5412 	map = start;
5413 
5414 	trace_event_eval_update(map, len);
5415 
5416 	trace_insert_eval_map_file(mod, start, len);
5417 }
5418 
5419 static ssize_t
5420 tracing_set_trace_read(struct file *filp, char __user *ubuf,
5421 		       size_t cnt, loff_t *ppos)
5422 {
5423 	struct trace_array *tr = filp->private_data;
5424 	char buf[MAX_TRACER_SIZE+2];
5425 	int r;
5426 
5427 	mutex_lock(&trace_types_lock);
5428 	r = sprintf(buf, "%s\n", tr->current_trace->name);
5429 	mutex_unlock(&trace_types_lock);
5430 
5431 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
5432 }
5433 
5434 int tracer_init(struct tracer *t, struct trace_array *tr)
5435 {
5436 	tracing_reset_online_cpus(&tr->trace_buffer);
5437 	return t->init(tr);
5438 }
5439 
5440 static void set_buffer_entries(struct trace_buffer *buf, unsigned long val)
5441 {
5442 	int cpu;
5443 
5444 	for_each_tracing_cpu(cpu)
5445 		per_cpu_ptr(buf->data, cpu)->entries = val;
5446 }
5447 
5448 #ifdef CONFIG_TRACER_MAX_TRACE
5449 /* resize @tr's buffer to the size of @size_tr's entries */
5450 static int resize_buffer_duplicate_size(struct trace_buffer *trace_buf,
5451 					struct trace_buffer *size_buf, int cpu_id)
5452 {
5453 	int cpu, ret = 0;
5454 
5455 	if (cpu_id == RING_BUFFER_ALL_CPUS) {
5456 		for_each_tracing_cpu(cpu) {
5457 			ret = ring_buffer_resize(trace_buf->buffer,
5458 				 per_cpu_ptr(size_buf->data, cpu)->entries, cpu);
5459 			if (ret < 0)
5460 				break;
5461 			per_cpu_ptr(trace_buf->data, cpu)->entries =
5462 				per_cpu_ptr(size_buf->data, cpu)->entries;
5463 		}
5464 	} else {
5465 		ret = ring_buffer_resize(trace_buf->buffer,
5466 				 per_cpu_ptr(size_buf->data, cpu_id)->entries, cpu_id);
5467 		if (ret == 0)
5468 			per_cpu_ptr(trace_buf->data, cpu_id)->entries =
5469 				per_cpu_ptr(size_buf->data, cpu_id)->entries;
5470 	}
5471 
5472 	return ret;
5473 }
5474 #endif /* CONFIG_TRACER_MAX_TRACE */
5475 
5476 static int __tracing_resize_ring_buffer(struct trace_array *tr,
5477 					unsigned long size, int cpu)
5478 {
5479 	int ret;
5480 
5481 	/*
5482 	 * If kernel or user changes the size of the ring buffer
5483 	 * we use the size that was given, and we can forget about
5484 	 * expanding it later.
5485 	 */
5486 	ring_buffer_expanded = true;
5487 
5488 	/* May be called before buffers are initialized */
5489 	if (!tr->trace_buffer.buffer)
5490 		return 0;
5491 
5492 	ret = ring_buffer_resize(tr->trace_buffer.buffer, size, cpu);
5493 	if (ret < 0)
5494 		return ret;
5495 
5496 #ifdef CONFIG_TRACER_MAX_TRACE
5497 	if (!(tr->flags & TRACE_ARRAY_FL_GLOBAL) ||
5498 	    !tr->current_trace->use_max_tr)
5499 		goto out;
5500 
5501 	ret = ring_buffer_resize(tr->max_buffer.buffer, size, cpu);
5502 	if (ret < 0) {
5503 		int r = resize_buffer_duplicate_size(&tr->trace_buffer,
5504 						     &tr->trace_buffer, cpu);
5505 		if (r < 0) {
5506 			/*
5507 			 * AARGH! We are left with different
5508 			 * size max buffer!!!!
5509 			 * The max buffer is our "snapshot" buffer.
5510 			 * When a tracer needs a snapshot (one of the
5511 			 * latency tracers), it swaps the max buffer
5512 			 * with the saved snap shot. We succeeded to
5513 			 * update the size of the main buffer, but failed to
5514 			 * update the size of the max buffer. But when we tried
5515 			 * to reset the main buffer to the original size, we
5516 			 * failed there too. This is very unlikely to
5517 			 * happen, but if it does, warn and kill all
5518 			 * tracing.
5519 			 */
5520 			WARN_ON(1);
5521 			tracing_disabled = 1;
5522 		}
5523 		return ret;
5524 	}
5525 
5526 	if (cpu == RING_BUFFER_ALL_CPUS)
5527 		set_buffer_entries(&tr->max_buffer, size);
5528 	else
5529 		per_cpu_ptr(tr->max_buffer.data, cpu)->entries = size;
5530 
5531  out:
5532 #endif /* CONFIG_TRACER_MAX_TRACE */
5533 
5534 	if (cpu == RING_BUFFER_ALL_CPUS)
5535 		set_buffer_entries(&tr->trace_buffer, size);
5536 	else
5537 		per_cpu_ptr(tr->trace_buffer.data, cpu)->entries = size;
5538 
5539 	return ret;
5540 }
5541 
5542 static ssize_t tracing_resize_ring_buffer(struct trace_array *tr,
5543 					  unsigned long size, int cpu_id)
5544 {
5545 	int ret = size;
5546 
5547 	mutex_lock(&trace_types_lock);
5548 
5549 	if (cpu_id != RING_BUFFER_ALL_CPUS) {
5550 		/* make sure, this cpu is enabled in the mask */
5551 		if (!cpumask_test_cpu(cpu_id, tracing_buffer_mask)) {
5552 			ret = -EINVAL;
5553 			goto out;
5554 		}
5555 	}
5556 
5557 	ret = __tracing_resize_ring_buffer(tr, size, cpu_id);
5558 	if (ret < 0)
5559 		ret = -ENOMEM;
5560 
5561 out:
5562 	mutex_unlock(&trace_types_lock);
5563 
5564 	return ret;
5565 }
5566 
5567 
5568 /**
5569  * tracing_update_buffers - used by tracing facility to expand ring buffers
5570  *
5571  * To save on memory when the tracing is never used on a system with it
5572  * configured in. The ring buffers are set to a minimum size. But once
5573  * a user starts to use the tracing facility, then they need to grow
5574  * to their default size.
5575  *
5576  * This function is to be called when a tracer is about to be used.
5577  */
5578 int tracing_update_buffers(void)
5579 {
5580 	int ret = 0;
5581 
5582 	mutex_lock(&trace_types_lock);
5583 	if (!ring_buffer_expanded)
5584 		ret = __tracing_resize_ring_buffer(&global_trace, trace_buf_size,
5585 						RING_BUFFER_ALL_CPUS);
5586 	mutex_unlock(&trace_types_lock);
5587 
5588 	return ret;
5589 }
5590 
5591 struct trace_option_dentry;
5592 
5593 static void
5594 create_trace_option_files(struct trace_array *tr, struct tracer *tracer);
5595 
5596 /*
5597  * Used to clear out the tracer before deletion of an instance.
5598  * Must have trace_types_lock held.
5599  */
5600 static void tracing_set_nop(struct trace_array *tr)
5601 {
5602 	if (tr->current_trace == &nop_trace)
5603 		return;
5604 
5605 	tr->current_trace->enabled--;
5606 
5607 	if (tr->current_trace->reset)
5608 		tr->current_trace->reset(tr);
5609 
5610 	tr->current_trace = &nop_trace;
5611 }
5612 
5613 static void add_tracer_options(struct trace_array *tr, struct tracer *t)
5614 {
5615 	/* Only enable if the directory has been created already. */
5616 	if (!tr->dir)
5617 		return;
5618 
5619 	create_trace_option_files(tr, t);
5620 }
5621 
5622 static int tracing_set_tracer(struct trace_array *tr, const char *buf)
5623 {
5624 	struct tracer *t;
5625 #ifdef CONFIG_TRACER_MAX_TRACE
5626 	bool had_max_tr;
5627 #endif
5628 	int ret = 0;
5629 
5630 	mutex_lock(&trace_types_lock);
5631 
5632 	if (!ring_buffer_expanded) {
5633 		ret = __tracing_resize_ring_buffer(tr, trace_buf_size,
5634 						RING_BUFFER_ALL_CPUS);
5635 		if (ret < 0)
5636 			goto out;
5637 		ret = 0;
5638 	}
5639 
5640 	for (t = trace_types; t; t = t->next) {
5641 		if (strcmp(t->name, buf) == 0)
5642 			break;
5643 	}
5644 	if (!t) {
5645 		ret = -EINVAL;
5646 		goto out;
5647 	}
5648 	if (t == tr->current_trace)
5649 		goto out;
5650 
5651 #ifdef CONFIG_TRACER_SNAPSHOT
5652 	if (t->use_max_tr) {
5653 		arch_spin_lock(&tr->max_lock);
5654 		if (tr->cond_snapshot)
5655 			ret = -EBUSY;
5656 		arch_spin_unlock(&tr->max_lock);
5657 		if (ret)
5658 			goto out;
5659 	}
5660 #endif
5661 	/* Some tracers won't work on kernel command line */
5662 	if (system_state < SYSTEM_RUNNING && t->noboot) {
5663 		pr_warn("Tracer '%s' is not allowed on command line, ignored\n",
5664 			t->name);
5665 		goto out;
5666 	}
5667 
5668 	/* Some tracers are only allowed for the top level buffer */
5669 	if (!trace_ok_for_array(t, tr)) {
5670 		ret = -EINVAL;
5671 		goto out;
5672 	}
5673 
5674 	/* If trace pipe files are being read, we can't change the tracer */
5675 	if (tr->current_trace->ref) {
5676 		ret = -EBUSY;
5677 		goto out;
5678 	}
5679 
5680 	trace_branch_disable();
5681 
5682 	tr->current_trace->enabled--;
5683 
5684 	if (tr->current_trace->reset)
5685 		tr->current_trace->reset(tr);
5686 
5687 	/* Current trace needs to be nop_trace before synchronize_rcu */
5688 	tr->current_trace = &nop_trace;
5689 
5690 #ifdef CONFIG_TRACER_MAX_TRACE
5691 	had_max_tr = tr->allocated_snapshot;
5692 
5693 	if (had_max_tr && !t->use_max_tr) {
5694 		/*
5695 		 * We need to make sure that the update_max_tr sees that
5696 		 * current_trace changed to nop_trace to keep it from
5697 		 * swapping the buffers after we resize it.
5698 		 * The update_max_tr is called from interrupts disabled
5699 		 * so a synchronized_sched() is sufficient.
5700 		 */
5701 		synchronize_rcu();
5702 		free_snapshot(tr);
5703 	}
5704 #endif
5705 
5706 #ifdef CONFIG_TRACER_MAX_TRACE
5707 	if (t->use_max_tr && !had_max_tr) {
5708 		ret = tracing_alloc_snapshot_instance(tr);
5709 		if (ret < 0)
5710 			goto out;
5711 	}
5712 #endif
5713 
5714 	if (t->init) {
5715 		ret = tracer_init(t, tr);
5716 		if (ret)
5717 			goto out;
5718 	}
5719 
5720 	tr->current_trace = t;
5721 	tr->current_trace->enabled++;
5722 	trace_branch_enable(tr);
5723  out:
5724 	mutex_unlock(&trace_types_lock);
5725 
5726 	return ret;
5727 }
5728 
5729 static ssize_t
5730 tracing_set_trace_write(struct file *filp, const char __user *ubuf,
5731 			size_t cnt, loff_t *ppos)
5732 {
5733 	struct trace_array *tr = filp->private_data;
5734 	char buf[MAX_TRACER_SIZE+1];
5735 	int i;
5736 	size_t ret;
5737 	int err;
5738 
5739 	ret = cnt;
5740 
5741 	if (cnt > MAX_TRACER_SIZE)
5742 		cnt = MAX_TRACER_SIZE;
5743 
5744 	if (copy_from_user(buf, ubuf, cnt))
5745 		return -EFAULT;
5746 
5747 	buf[cnt] = 0;
5748 
5749 	/* strip ending whitespace. */
5750 	for (i = cnt - 1; i > 0 && isspace(buf[i]); i--)
5751 		buf[i] = 0;
5752 
5753 	err = tracing_set_tracer(tr, buf);
5754 	if (err)
5755 		return err;
5756 
5757 	*ppos += ret;
5758 
5759 	return ret;
5760 }
5761 
5762 static ssize_t
5763 tracing_nsecs_read(unsigned long *ptr, char __user *ubuf,
5764 		   size_t cnt, loff_t *ppos)
5765 {
5766 	char buf[64];
5767 	int r;
5768 
5769 	r = snprintf(buf, sizeof(buf), "%ld\n",
5770 		     *ptr == (unsigned long)-1 ? -1 : nsecs_to_usecs(*ptr));
5771 	if (r > sizeof(buf))
5772 		r = sizeof(buf);
5773 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
5774 }
5775 
5776 static ssize_t
5777 tracing_nsecs_write(unsigned long *ptr, const char __user *ubuf,
5778 		    size_t cnt, loff_t *ppos)
5779 {
5780 	unsigned long val;
5781 	int ret;
5782 
5783 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
5784 	if (ret)
5785 		return ret;
5786 
5787 	*ptr = val * 1000;
5788 
5789 	return cnt;
5790 }
5791 
5792 static ssize_t
5793 tracing_thresh_read(struct file *filp, char __user *ubuf,
5794 		    size_t cnt, loff_t *ppos)
5795 {
5796 	return tracing_nsecs_read(&tracing_thresh, ubuf, cnt, ppos);
5797 }
5798 
5799 static ssize_t
5800 tracing_thresh_write(struct file *filp, const char __user *ubuf,
5801 		     size_t cnt, loff_t *ppos)
5802 {
5803 	struct trace_array *tr = filp->private_data;
5804 	int ret;
5805 
5806 	mutex_lock(&trace_types_lock);
5807 	ret = tracing_nsecs_write(&tracing_thresh, ubuf, cnt, ppos);
5808 	if (ret < 0)
5809 		goto out;
5810 
5811 	if (tr->current_trace->update_thresh) {
5812 		ret = tr->current_trace->update_thresh(tr);
5813 		if (ret < 0)
5814 			goto out;
5815 	}
5816 
5817 	ret = cnt;
5818 out:
5819 	mutex_unlock(&trace_types_lock);
5820 
5821 	return ret;
5822 }
5823 
5824 #if defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)
5825 
5826 static ssize_t
5827 tracing_max_lat_read(struct file *filp, char __user *ubuf,
5828 		     size_t cnt, loff_t *ppos)
5829 {
5830 	return tracing_nsecs_read(filp->private_data, ubuf, cnt, ppos);
5831 }
5832 
5833 static ssize_t
5834 tracing_max_lat_write(struct file *filp, const char __user *ubuf,
5835 		      size_t cnt, loff_t *ppos)
5836 {
5837 	return tracing_nsecs_write(filp->private_data, ubuf, cnt, ppos);
5838 }
5839 
5840 #endif
5841 
5842 static int tracing_open_pipe(struct inode *inode, struct file *filp)
5843 {
5844 	struct trace_array *tr = inode->i_private;
5845 	struct trace_iterator *iter;
5846 	int ret;
5847 
5848 	ret = tracing_check_open_get_tr(tr);
5849 	if (ret)
5850 		return ret;
5851 
5852 	mutex_lock(&trace_types_lock);
5853 
5854 	/* create a buffer to store the information to pass to userspace */
5855 	iter = kzalloc(sizeof(*iter), GFP_KERNEL);
5856 	if (!iter) {
5857 		ret = -ENOMEM;
5858 		__trace_array_put(tr);
5859 		goto out;
5860 	}
5861 
5862 	trace_seq_init(&iter->seq);
5863 	iter->trace = tr->current_trace;
5864 
5865 	if (!alloc_cpumask_var(&iter->started, GFP_KERNEL)) {
5866 		ret = -ENOMEM;
5867 		goto fail;
5868 	}
5869 
5870 	/* trace pipe does not show start of buffer */
5871 	cpumask_setall(iter->started);
5872 
5873 	if (tr->trace_flags & TRACE_ITER_LATENCY_FMT)
5874 		iter->iter_flags |= TRACE_FILE_LAT_FMT;
5875 
5876 	/* Output in nanoseconds only if we are using a clock in nanoseconds. */
5877 	if (trace_clocks[tr->clock_id].in_ns)
5878 		iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
5879 
5880 	iter->tr = tr;
5881 	iter->trace_buffer = &tr->trace_buffer;
5882 	iter->cpu_file = tracing_get_cpu(inode);
5883 	mutex_init(&iter->mutex);
5884 	filp->private_data = iter;
5885 
5886 	if (iter->trace->pipe_open)
5887 		iter->trace->pipe_open(iter);
5888 
5889 	nonseekable_open(inode, filp);
5890 
5891 	tr->current_trace->ref++;
5892 out:
5893 	mutex_unlock(&trace_types_lock);
5894 	return ret;
5895 
5896 fail:
5897 	kfree(iter);
5898 	__trace_array_put(tr);
5899 	mutex_unlock(&trace_types_lock);
5900 	return ret;
5901 }
5902 
5903 static int tracing_release_pipe(struct inode *inode, struct file *file)
5904 {
5905 	struct trace_iterator *iter = file->private_data;
5906 	struct trace_array *tr = inode->i_private;
5907 
5908 	mutex_lock(&trace_types_lock);
5909 
5910 	tr->current_trace->ref--;
5911 
5912 	if (iter->trace->pipe_close)
5913 		iter->trace->pipe_close(iter);
5914 
5915 	mutex_unlock(&trace_types_lock);
5916 
5917 	free_cpumask_var(iter->started);
5918 	mutex_destroy(&iter->mutex);
5919 	kfree(iter);
5920 
5921 	trace_array_put(tr);
5922 
5923 	return 0;
5924 }
5925 
5926 static __poll_t
5927 trace_poll(struct trace_iterator *iter, struct file *filp, poll_table *poll_table)
5928 {
5929 	struct trace_array *tr = iter->tr;
5930 
5931 	/* Iterators are static, they should be filled or empty */
5932 	if (trace_buffer_iter(iter, iter->cpu_file))
5933 		return EPOLLIN | EPOLLRDNORM;
5934 
5935 	if (tr->trace_flags & TRACE_ITER_BLOCK)
5936 		/*
5937 		 * Always select as readable when in blocking mode
5938 		 */
5939 		return EPOLLIN | EPOLLRDNORM;
5940 	else
5941 		return ring_buffer_poll_wait(iter->trace_buffer->buffer, iter->cpu_file,
5942 					     filp, poll_table);
5943 }
5944 
5945 static __poll_t
5946 tracing_poll_pipe(struct file *filp, poll_table *poll_table)
5947 {
5948 	struct trace_iterator *iter = filp->private_data;
5949 
5950 	return trace_poll(iter, filp, poll_table);
5951 }
5952 
5953 /* Must be called with iter->mutex held. */
5954 static int tracing_wait_pipe(struct file *filp)
5955 {
5956 	struct trace_iterator *iter = filp->private_data;
5957 	int ret;
5958 
5959 	while (trace_empty(iter)) {
5960 
5961 		if ((filp->f_flags & O_NONBLOCK)) {
5962 			return -EAGAIN;
5963 		}
5964 
5965 		/*
5966 		 * We block until we read something and tracing is disabled.
5967 		 * We still block if tracing is disabled, but we have never
5968 		 * read anything. This allows a user to cat this file, and
5969 		 * then enable tracing. But after we have read something,
5970 		 * we give an EOF when tracing is again disabled.
5971 		 *
5972 		 * iter->pos will be 0 if we haven't read anything.
5973 		 */
5974 		if (!tracer_tracing_is_on(iter->tr) && iter->pos)
5975 			break;
5976 
5977 		mutex_unlock(&iter->mutex);
5978 
5979 		ret = wait_on_pipe(iter, 0);
5980 
5981 		mutex_lock(&iter->mutex);
5982 
5983 		if (ret)
5984 			return ret;
5985 	}
5986 
5987 	return 1;
5988 }
5989 
5990 /*
5991  * Consumer reader.
5992  */
5993 static ssize_t
5994 tracing_read_pipe(struct file *filp, char __user *ubuf,
5995 		  size_t cnt, loff_t *ppos)
5996 {
5997 	struct trace_iterator *iter = filp->private_data;
5998 	ssize_t sret;
5999 
6000 	/*
6001 	 * Avoid more than one consumer on a single file descriptor
6002 	 * This is just a matter of traces coherency, the ring buffer itself
6003 	 * is protected.
6004 	 */
6005 	mutex_lock(&iter->mutex);
6006 
6007 	/* return any leftover data */
6008 	sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
6009 	if (sret != -EBUSY)
6010 		goto out;
6011 
6012 	trace_seq_init(&iter->seq);
6013 
6014 	if (iter->trace->read) {
6015 		sret = iter->trace->read(iter, filp, ubuf, cnt, ppos);
6016 		if (sret)
6017 			goto out;
6018 	}
6019 
6020 waitagain:
6021 	sret = tracing_wait_pipe(filp);
6022 	if (sret <= 0)
6023 		goto out;
6024 
6025 	/* stop when tracing is finished */
6026 	if (trace_empty(iter)) {
6027 		sret = 0;
6028 		goto out;
6029 	}
6030 
6031 	if (cnt >= PAGE_SIZE)
6032 		cnt = PAGE_SIZE - 1;
6033 
6034 	/* reset all but tr, trace, and overruns */
6035 	memset(&iter->seq, 0,
6036 	       sizeof(struct trace_iterator) -
6037 	       offsetof(struct trace_iterator, seq));
6038 	cpumask_clear(iter->started);
6039 	trace_seq_init(&iter->seq);
6040 	iter->pos = -1;
6041 
6042 	trace_event_read_lock();
6043 	trace_access_lock(iter->cpu_file);
6044 	while (trace_find_next_entry_inc(iter) != NULL) {
6045 		enum print_line_t ret;
6046 		int save_len = iter->seq.seq.len;
6047 
6048 		ret = print_trace_line(iter);
6049 		if (ret == TRACE_TYPE_PARTIAL_LINE) {
6050 			/* don't print partial lines */
6051 			iter->seq.seq.len = save_len;
6052 			break;
6053 		}
6054 		if (ret != TRACE_TYPE_NO_CONSUME)
6055 			trace_consume(iter);
6056 
6057 		if (trace_seq_used(&iter->seq) >= cnt)
6058 			break;
6059 
6060 		/*
6061 		 * Setting the full flag means we reached the trace_seq buffer
6062 		 * size and we should leave by partial output condition above.
6063 		 * One of the trace_seq_* functions is not used properly.
6064 		 */
6065 		WARN_ONCE(iter->seq.full, "full flag set for trace type %d",
6066 			  iter->ent->type);
6067 	}
6068 	trace_access_unlock(iter->cpu_file);
6069 	trace_event_read_unlock();
6070 
6071 	/* Now copy what we have to the user */
6072 	sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
6073 	if (iter->seq.seq.readpos >= trace_seq_used(&iter->seq))
6074 		trace_seq_init(&iter->seq);
6075 
6076 	/*
6077 	 * If there was nothing to send to user, in spite of consuming trace
6078 	 * entries, go back to wait for more entries.
6079 	 */
6080 	if (sret == -EBUSY)
6081 		goto waitagain;
6082 
6083 out:
6084 	mutex_unlock(&iter->mutex);
6085 
6086 	return sret;
6087 }
6088 
6089 static void tracing_spd_release_pipe(struct splice_pipe_desc *spd,
6090 				     unsigned int idx)
6091 {
6092 	__free_page(spd->pages[idx]);
6093 }
6094 
6095 static const struct pipe_buf_operations tracing_pipe_buf_ops = {
6096 	.confirm		= generic_pipe_buf_confirm,
6097 	.release		= generic_pipe_buf_release,
6098 	.steal			= generic_pipe_buf_steal,
6099 	.get			= generic_pipe_buf_get,
6100 };
6101 
6102 static size_t
6103 tracing_fill_pipe_page(size_t rem, struct trace_iterator *iter)
6104 {
6105 	size_t count;
6106 	int save_len;
6107 	int ret;
6108 
6109 	/* Seq buffer is page-sized, exactly what we need. */
6110 	for (;;) {
6111 		save_len = iter->seq.seq.len;
6112 		ret = print_trace_line(iter);
6113 
6114 		if (trace_seq_has_overflowed(&iter->seq)) {
6115 			iter->seq.seq.len = save_len;
6116 			break;
6117 		}
6118 
6119 		/*
6120 		 * This should not be hit, because it should only
6121 		 * be set if the iter->seq overflowed. But check it
6122 		 * anyway to be safe.
6123 		 */
6124 		if (ret == TRACE_TYPE_PARTIAL_LINE) {
6125 			iter->seq.seq.len = save_len;
6126 			break;
6127 		}
6128 
6129 		count = trace_seq_used(&iter->seq) - save_len;
6130 		if (rem < count) {
6131 			rem = 0;
6132 			iter->seq.seq.len = save_len;
6133 			break;
6134 		}
6135 
6136 		if (ret != TRACE_TYPE_NO_CONSUME)
6137 			trace_consume(iter);
6138 		rem -= count;
6139 		if (!trace_find_next_entry_inc(iter))	{
6140 			rem = 0;
6141 			iter->ent = NULL;
6142 			break;
6143 		}
6144 	}
6145 
6146 	return rem;
6147 }
6148 
6149 static ssize_t tracing_splice_read_pipe(struct file *filp,
6150 					loff_t *ppos,
6151 					struct pipe_inode_info *pipe,
6152 					size_t len,
6153 					unsigned int flags)
6154 {
6155 	struct page *pages_def[PIPE_DEF_BUFFERS];
6156 	struct partial_page partial_def[PIPE_DEF_BUFFERS];
6157 	struct trace_iterator *iter = filp->private_data;
6158 	struct splice_pipe_desc spd = {
6159 		.pages		= pages_def,
6160 		.partial	= partial_def,
6161 		.nr_pages	= 0, /* This gets updated below. */
6162 		.nr_pages_max	= PIPE_DEF_BUFFERS,
6163 		.ops		= &tracing_pipe_buf_ops,
6164 		.spd_release	= tracing_spd_release_pipe,
6165 	};
6166 	ssize_t ret;
6167 	size_t rem;
6168 	unsigned int i;
6169 
6170 	if (splice_grow_spd(pipe, &spd))
6171 		return -ENOMEM;
6172 
6173 	mutex_lock(&iter->mutex);
6174 
6175 	if (iter->trace->splice_read) {
6176 		ret = iter->trace->splice_read(iter, filp,
6177 					       ppos, pipe, len, flags);
6178 		if (ret)
6179 			goto out_err;
6180 	}
6181 
6182 	ret = tracing_wait_pipe(filp);
6183 	if (ret <= 0)
6184 		goto out_err;
6185 
6186 	if (!iter->ent && !trace_find_next_entry_inc(iter)) {
6187 		ret = -EFAULT;
6188 		goto out_err;
6189 	}
6190 
6191 	trace_event_read_lock();
6192 	trace_access_lock(iter->cpu_file);
6193 
6194 	/* Fill as many pages as possible. */
6195 	for (i = 0, rem = len; i < spd.nr_pages_max && rem; i++) {
6196 		spd.pages[i] = alloc_page(GFP_KERNEL);
6197 		if (!spd.pages[i])
6198 			break;
6199 
6200 		rem = tracing_fill_pipe_page(rem, iter);
6201 
6202 		/* Copy the data into the page, so we can start over. */
6203 		ret = trace_seq_to_buffer(&iter->seq,
6204 					  page_address(spd.pages[i]),
6205 					  trace_seq_used(&iter->seq));
6206 		if (ret < 0) {
6207 			__free_page(spd.pages[i]);
6208 			break;
6209 		}
6210 		spd.partial[i].offset = 0;
6211 		spd.partial[i].len = trace_seq_used(&iter->seq);
6212 
6213 		trace_seq_init(&iter->seq);
6214 	}
6215 
6216 	trace_access_unlock(iter->cpu_file);
6217 	trace_event_read_unlock();
6218 	mutex_unlock(&iter->mutex);
6219 
6220 	spd.nr_pages = i;
6221 
6222 	if (i)
6223 		ret = splice_to_pipe(pipe, &spd);
6224 	else
6225 		ret = 0;
6226 out:
6227 	splice_shrink_spd(&spd);
6228 	return ret;
6229 
6230 out_err:
6231 	mutex_unlock(&iter->mutex);
6232 	goto out;
6233 }
6234 
6235 static ssize_t
6236 tracing_entries_read(struct file *filp, char __user *ubuf,
6237 		     size_t cnt, loff_t *ppos)
6238 {
6239 	struct inode *inode = file_inode(filp);
6240 	struct trace_array *tr = inode->i_private;
6241 	int cpu = tracing_get_cpu(inode);
6242 	char buf[64];
6243 	int r = 0;
6244 	ssize_t ret;
6245 
6246 	mutex_lock(&trace_types_lock);
6247 
6248 	if (cpu == RING_BUFFER_ALL_CPUS) {
6249 		int cpu, buf_size_same;
6250 		unsigned long size;
6251 
6252 		size = 0;
6253 		buf_size_same = 1;
6254 		/* check if all cpu sizes are same */
6255 		for_each_tracing_cpu(cpu) {
6256 			/* fill in the size from first enabled cpu */
6257 			if (size == 0)
6258 				size = per_cpu_ptr(tr->trace_buffer.data, cpu)->entries;
6259 			if (size != per_cpu_ptr(tr->trace_buffer.data, cpu)->entries) {
6260 				buf_size_same = 0;
6261 				break;
6262 			}
6263 		}
6264 
6265 		if (buf_size_same) {
6266 			if (!ring_buffer_expanded)
6267 				r = sprintf(buf, "%lu (expanded: %lu)\n",
6268 					    size >> 10,
6269 					    trace_buf_size >> 10);
6270 			else
6271 				r = sprintf(buf, "%lu\n", size >> 10);
6272 		} else
6273 			r = sprintf(buf, "X\n");
6274 	} else
6275 		r = sprintf(buf, "%lu\n", per_cpu_ptr(tr->trace_buffer.data, cpu)->entries >> 10);
6276 
6277 	mutex_unlock(&trace_types_lock);
6278 
6279 	ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6280 	return ret;
6281 }
6282 
6283 static ssize_t
6284 tracing_entries_write(struct file *filp, const char __user *ubuf,
6285 		      size_t cnt, loff_t *ppos)
6286 {
6287 	struct inode *inode = file_inode(filp);
6288 	struct trace_array *tr = inode->i_private;
6289 	unsigned long val;
6290 	int ret;
6291 
6292 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
6293 	if (ret)
6294 		return ret;
6295 
6296 	/* must have at least 1 entry */
6297 	if (!val)
6298 		return -EINVAL;
6299 
6300 	/* value is in KB */
6301 	val <<= 10;
6302 	ret = tracing_resize_ring_buffer(tr, val, tracing_get_cpu(inode));
6303 	if (ret < 0)
6304 		return ret;
6305 
6306 	*ppos += cnt;
6307 
6308 	return cnt;
6309 }
6310 
6311 static ssize_t
6312 tracing_total_entries_read(struct file *filp, char __user *ubuf,
6313 				size_t cnt, loff_t *ppos)
6314 {
6315 	struct trace_array *tr = filp->private_data;
6316 	char buf[64];
6317 	int r, cpu;
6318 	unsigned long size = 0, expanded_size = 0;
6319 
6320 	mutex_lock(&trace_types_lock);
6321 	for_each_tracing_cpu(cpu) {
6322 		size += per_cpu_ptr(tr->trace_buffer.data, cpu)->entries >> 10;
6323 		if (!ring_buffer_expanded)
6324 			expanded_size += trace_buf_size >> 10;
6325 	}
6326 	if (ring_buffer_expanded)
6327 		r = sprintf(buf, "%lu\n", size);
6328 	else
6329 		r = sprintf(buf, "%lu (expanded: %lu)\n", size, expanded_size);
6330 	mutex_unlock(&trace_types_lock);
6331 
6332 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6333 }
6334 
6335 static ssize_t
6336 tracing_free_buffer_write(struct file *filp, const char __user *ubuf,
6337 			  size_t cnt, loff_t *ppos)
6338 {
6339 	/*
6340 	 * There is no need to read what the user has written, this function
6341 	 * is just to make sure that there is no error when "echo" is used
6342 	 */
6343 
6344 	*ppos += cnt;
6345 
6346 	return cnt;
6347 }
6348 
6349 static int
6350 tracing_free_buffer_release(struct inode *inode, struct file *filp)
6351 {
6352 	struct trace_array *tr = inode->i_private;
6353 
6354 	/* disable tracing ? */
6355 	if (tr->trace_flags & TRACE_ITER_STOP_ON_FREE)
6356 		tracer_tracing_off(tr);
6357 	/* resize the ring buffer to 0 */
6358 	tracing_resize_ring_buffer(tr, 0, RING_BUFFER_ALL_CPUS);
6359 
6360 	trace_array_put(tr);
6361 
6362 	return 0;
6363 }
6364 
6365 static ssize_t
6366 tracing_mark_write(struct file *filp, const char __user *ubuf,
6367 					size_t cnt, loff_t *fpos)
6368 {
6369 	struct trace_array *tr = filp->private_data;
6370 	struct ring_buffer_event *event;
6371 	enum event_trigger_type tt = ETT_NONE;
6372 	struct ring_buffer *buffer;
6373 	struct print_entry *entry;
6374 	unsigned long irq_flags;
6375 	ssize_t written;
6376 	int size;
6377 	int len;
6378 
6379 /* Used in tracing_mark_raw_write() as well */
6380 #define FAULTED_STR "<faulted>"
6381 #define FAULTED_SIZE (sizeof(FAULTED_STR) - 1) /* '\0' is already accounted for */
6382 
6383 	if (tracing_disabled)
6384 		return -EINVAL;
6385 
6386 	if (!(tr->trace_flags & TRACE_ITER_MARKERS))
6387 		return -EINVAL;
6388 
6389 	if (cnt > TRACE_BUF_SIZE)
6390 		cnt = TRACE_BUF_SIZE;
6391 
6392 	BUILD_BUG_ON(TRACE_BUF_SIZE >= PAGE_SIZE);
6393 
6394 	local_save_flags(irq_flags);
6395 	size = sizeof(*entry) + cnt + 2; /* add '\0' and possible '\n' */
6396 
6397 	/* If less than "<faulted>", then make sure we can still add that */
6398 	if (cnt < FAULTED_SIZE)
6399 		size += FAULTED_SIZE - cnt;
6400 
6401 	buffer = tr->trace_buffer.buffer;
6402 	event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
6403 					    irq_flags, preempt_count());
6404 	if (unlikely(!event))
6405 		/* Ring buffer disabled, return as if not open for write */
6406 		return -EBADF;
6407 
6408 	entry = ring_buffer_event_data(event);
6409 	entry->ip = _THIS_IP_;
6410 
6411 	len = __copy_from_user_inatomic(&entry->buf, ubuf, cnt);
6412 	if (len) {
6413 		memcpy(&entry->buf, FAULTED_STR, FAULTED_SIZE);
6414 		cnt = FAULTED_SIZE;
6415 		written = -EFAULT;
6416 	} else
6417 		written = cnt;
6418 	len = cnt;
6419 
6420 	if (tr->trace_marker_file && !list_empty(&tr->trace_marker_file->triggers)) {
6421 		/* do not add \n before testing triggers, but add \0 */
6422 		entry->buf[cnt] = '\0';
6423 		tt = event_triggers_call(tr->trace_marker_file, entry, event);
6424 	}
6425 
6426 	if (entry->buf[cnt - 1] != '\n') {
6427 		entry->buf[cnt] = '\n';
6428 		entry->buf[cnt + 1] = '\0';
6429 	} else
6430 		entry->buf[cnt] = '\0';
6431 
6432 	__buffer_unlock_commit(buffer, event);
6433 
6434 	if (tt)
6435 		event_triggers_post_call(tr->trace_marker_file, tt);
6436 
6437 	if (written > 0)
6438 		*fpos += written;
6439 
6440 	return written;
6441 }
6442 
6443 /* Limit it for now to 3K (including tag) */
6444 #define RAW_DATA_MAX_SIZE (1024*3)
6445 
6446 static ssize_t
6447 tracing_mark_raw_write(struct file *filp, const char __user *ubuf,
6448 					size_t cnt, loff_t *fpos)
6449 {
6450 	struct trace_array *tr = filp->private_data;
6451 	struct ring_buffer_event *event;
6452 	struct ring_buffer *buffer;
6453 	struct raw_data_entry *entry;
6454 	unsigned long irq_flags;
6455 	ssize_t written;
6456 	int size;
6457 	int len;
6458 
6459 #define FAULT_SIZE_ID (FAULTED_SIZE + sizeof(int))
6460 
6461 	if (tracing_disabled)
6462 		return -EINVAL;
6463 
6464 	if (!(tr->trace_flags & TRACE_ITER_MARKERS))
6465 		return -EINVAL;
6466 
6467 	/* The marker must at least have a tag id */
6468 	if (cnt < sizeof(unsigned int) || cnt > RAW_DATA_MAX_SIZE)
6469 		return -EINVAL;
6470 
6471 	if (cnt > TRACE_BUF_SIZE)
6472 		cnt = TRACE_BUF_SIZE;
6473 
6474 	BUILD_BUG_ON(TRACE_BUF_SIZE >= PAGE_SIZE);
6475 
6476 	local_save_flags(irq_flags);
6477 	size = sizeof(*entry) + cnt;
6478 	if (cnt < FAULT_SIZE_ID)
6479 		size += FAULT_SIZE_ID - cnt;
6480 
6481 	buffer = tr->trace_buffer.buffer;
6482 	event = __trace_buffer_lock_reserve(buffer, TRACE_RAW_DATA, size,
6483 					    irq_flags, preempt_count());
6484 	if (!event)
6485 		/* Ring buffer disabled, return as if not open for write */
6486 		return -EBADF;
6487 
6488 	entry = ring_buffer_event_data(event);
6489 
6490 	len = __copy_from_user_inatomic(&entry->id, ubuf, cnt);
6491 	if (len) {
6492 		entry->id = -1;
6493 		memcpy(&entry->buf, FAULTED_STR, FAULTED_SIZE);
6494 		written = -EFAULT;
6495 	} else
6496 		written = cnt;
6497 
6498 	__buffer_unlock_commit(buffer, event);
6499 
6500 	if (written > 0)
6501 		*fpos += written;
6502 
6503 	return written;
6504 }
6505 
6506 static int tracing_clock_show(struct seq_file *m, void *v)
6507 {
6508 	struct trace_array *tr = m->private;
6509 	int i;
6510 
6511 	for (i = 0; i < ARRAY_SIZE(trace_clocks); i++)
6512 		seq_printf(m,
6513 			"%s%s%s%s", i ? " " : "",
6514 			i == tr->clock_id ? "[" : "", trace_clocks[i].name,
6515 			i == tr->clock_id ? "]" : "");
6516 	seq_putc(m, '\n');
6517 
6518 	return 0;
6519 }
6520 
6521 int tracing_set_clock(struct trace_array *tr, const char *clockstr)
6522 {
6523 	int i;
6524 
6525 	for (i = 0; i < ARRAY_SIZE(trace_clocks); i++) {
6526 		if (strcmp(trace_clocks[i].name, clockstr) == 0)
6527 			break;
6528 	}
6529 	if (i == ARRAY_SIZE(trace_clocks))
6530 		return -EINVAL;
6531 
6532 	mutex_lock(&trace_types_lock);
6533 
6534 	tr->clock_id = i;
6535 
6536 	ring_buffer_set_clock(tr->trace_buffer.buffer, trace_clocks[i].func);
6537 
6538 	/*
6539 	 * New clock may not be consistent with the previous clock.
6540 	 * Reset the buffer so that it doesn't have incomparable timestamps.
6541 	 */
6542 	tracing_reset_online_cpus(&tr->trace_buffer);
6543 
6544 #ifdef CONFIG_TRACER_MAX_TRACE
6545 	if (tr->max_buffer.buffer)
6546 		ring_buffer_set_clock(tr->max_buffer.buffer, trace_clocks[i].func);
6547 	tracing_reset_online_cpus(&tr->max_buffer);
6548 #endif
6549 
6550 	mutex_unlock(&trace_types_lock);
6551 
6552 	return 0;
6553 }
6554 
6555 static ssize_t tracing_clock_write(struct file *filp, const char __user *ubuf,
6556 				   size_t cnt, loff_t *fpos)
6557 {
6558 	struct seq_file *m = filp->private_data;
6559 	struct trace_array *tr = m->private;
6560 	char buf[64];
6561 	const char *clockstr;
6562 	int ret;
6563 
6564 	if (cnt >= sizeof(buf))
6565 		return -EINVAL;
6566 
6567 	if (copy_from_user(buf, ubuf, cnt))
6568 		return -EFAULT;
6569 
6570 	buf[cnt] = 0;
6571 
6572 	clockstr = strstrip(buf);
6573 
6574 	ret = tracing_set_clock(tr, clockstr);
6575 	if (ret)
6576 		return ret;
6577 
6578 	*fpos += cnt;
6579 
6580 	return cnt;
6581 }
6582 
6583 static int tracing_clock_open(struct inode *inode, struct file *file)
6584 {
6585 	struct trace_array *tr = inode->i_private;
6586 	int ret;
6587 
6588 	ret = tracing_check_open_get_tr(tr);
6589 	if (ret)
6590 		return ret;
6591 
6592 	ret = single_open(file, tracing_clock_show, inode->i_private);
6593 	if (ret < 0)
6594 		trace_array_put(tr);
6595 
6596 	return ret;
6597 }
6598 
6599 static int tracing_time_stamp_mode_show(struct seq_file *m, void *v)
6600 {
6601 	struct trace_array *tr = m->private;
6602 
6603 	mutex_lock(&trace_types_lock);
6604 
6605 	if (ring_buffer_time_stamp_abs(tr->trace_buffer.buffer))
6606 		seq_puts(m, "delta [absolute]\n");
6607 	else
6608 		seq_puts(m, "[delta] absolute\n");
6609 
6610 	mutex_unlock(&trace_types_lock);
6611 
6612 	return 0;
6613 }
6614 
6615 static int tracing_time_stamp_mode_open(struct inode *inode, struct file *file)
6616 {
6617 	struct trace_array *tr = inode->i_private;
6618 	int ret;
6619 
6620 	ret = tracing_check_open_get_tr(tr);
6621 	if (ret)
6622 		return ret;
6623 
6624 	ret = single_open(file, tracing_time_stamp_mode_show, inode->i_private);
6625 	if (ret < 0)
6626 		trace_array_put(tr);
6627 
6628 	return ret;
6629 }
6630 
6631 int tracing_set_time_stamp_abs(struct trace_array *tr, bool abs)
6632 {
6633 	int ret = 0;
6634 
6635 	mutex_lock(&trace_types_lock);
6636 
6637 	if (abs && tr->time_stamp_abs_ref++)
6638 		goto out;
6639 
6640 	if (!abs) {
6641 		if (WARN_ON_ONCE(!tr->time_stamp_abs_ref)) {
6642 			ret = -EINVAL;
6643 			goto out;
6644 		}
6645 
6646 		if (--tr->time_stamp_abs_ref)
6647 			goto out;
6648 	}
6649 
6650 	ring_buffer_set_time_stamp_abs(tr->trace_buffer.buffer, abs);
6651 
6652 #ifdef CONFIG_TRACER_MAX_TRACE
6653 	if (tr->max_buffer.buffer)
6654 		ring_buffer_set_time_stamp_abs(tr->max_buffer.buffer, abs);
6655 #endif
6656  out:
6657 	mutex_unlock(&trace_types_lock);
6658 
6659 	return ret;
6660 }
6661 
6662 struct ftrace_buffer_info {
6663 	struct trace_iterator	iter;
6664 	void			*spare;
6665 	unsigned int		spare_cpu;
6666 	unsigned int		read;
6667 };
6668 
6669 #ifdef CONFIG_TRACER_SNAPSHOT
6670 static int tracing_snapshot_open(struct inode *inode, struct file *file)
6671 {
6672 	struct trace_array *tr = inode->i_private;
6673 	struct trace_iterator *iter;
6674 	struct seq_file *m;
6675 	int ret;
6676 
6677 	ret = tracing_check_open_get_tr(tr);
6678 	if (ret)
6679 		return ret;
6680 
6681 	if (file->f_mode & FMODE_READ) {
6682 		iter = __tracing_open(inode, file, true);
6683 		if (IS_ERR(iter))
6684 			ret = PTR_ERR(iter);
6685 	} else {
6686 		/* Writes still need the seq_file to hold the private data */
6687 		ret = -ENOMEM;
6688 		m = kzalloc(sizeof(*m), GFP_KERNEL);
6689 		if (!m)
6690 			goto out;
6691 		iter = kzalloc(sizeof(*iter), GFP_KERNEL);
6692 		if (!iter) {
6693 			kfree(m);
6694 			goto out;
6695 		}
6696 		ret = 0;
6697 
6698 		iter->tr = tr;
6699 		iter->trace_buffer = &tr->max_buffer;
6700 		iter->cpu_file = tracing_get_cpu(inode);
6701 		m->private = iter;
6702 		file->private_data = m;
6703 	}
6704 out:
6705 	if (ret < 0)
6706 		trace_array_put(tr);
6707 
6708 	return ret;
6709 }
6710 
6711 static ssize_t
6712 tracing_snapshot_write(struct file *filp, const char __user *ubuf, size_t cnt,
6713 		       loff_t *ppos)
6714 {
6715 	struct seq_file *m = filp->private_data;
6716 	struct trace_iterator *iter = m->private;
6717 	struct trace_array *tr = iter->tr;
6718 	unsigned long val;
6719 	int ret;
6720 
6721 	ret = tracing_update_buffers();
6722 	if (ret < 0)
6723 		return ret;
6724 
6725 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
6726 	if (ret)
6727 		return ret;
6728 
6729 	mutex_lock(&trace_types_lock);
6730 
6731 	if (tr->current_trace->use_max_tr) {
6732 		ret = -EBUSY;
6733 		goto out;
6734 	}
6735 
6736 	arch_spin_lock(&tr->max_lock);
6737 	if (tr->cond_snapshot)
6738 		ret = -EBUSY;
6739 	arch_spin_unlock(&tr->max_lock);
6740 	if (ret)
6741 		goto out;
6742 
6743 	switch (val) {
6744 	case 0:
6745 		if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
6746 			ret = -EINVAL;
6747 			break;
6748 		}
6749 		if (tr->allocated_snapshot)
6750 			free_snapshot(tr);
6751 		break;
6752 	case 1:
6753 /* Only allow per-cpu swap if the ring buffer supports it */
6754 #ifndef CONFIG_RING_BUFFER_ALLOW_SWAP
6755 		if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
6756 			ret = -EINVAL;
6757 			break;
6758 		}
6759 #endif
6760 		if (tr->allocated_snapshot)
6761 			ret = resize_buffer_duplicate_size(&tr->max_buffer,
6762 					&tr->trace_buffer, iter->cpu_file);
6763 		else
6764 			ret = tracing_alloc_snapshot_instance(tr);
6765 		if (ret < 0)
6766 			break;
6767 		local_irq_disable();
6768 		/* Now, we're going to swap */
6769 		if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
6770 			update_max_tr(tr, current, smp_processor_id(), NULL);
6771 		else
6772 			update_max_tr_single(tr, current, iter->cpu_file);
6773 		local_irq_enable();
6774 		break;
6775 	default:
6776 		if (tr->allocated_snapshot) {
6777 			if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
6778 				tracing_reset_online_cpus(&tr->max_buffer);
6779 			else
6780 				tracing_reset_cpu(&tr->max_buffer, iter->cpu_file);
6781 		}
6782 		break;
6783 	}
6784 
6785 	if (ret >= 0) {
6786 		*ppos += cnt;
6787 		ret = cnt;
6788 	}
6789 out:
6790 	mutex_unlock(&trace_types_lock);
6791 	return ret;
6792 }
6793 
6794 static int tracing_snapshot_release(struct inode *inode, struct file *file)
6795 {
6796 	struct seq_file *m = file->private_data;
6797 	int ret;
6798 
6799 	ret = tracing_release(inode, file);
6800 
6801 	if (file->f_mode & FMODE_READ)
6802 		return ret;
6803 
6804 	/* If write only, the seq_file is just a stub */
6805 	if (m)
6806 		kfree(m->private);
6807 	kfree(m);
6808 
6809 	return 0;
6810 }
6811 
6812 static int tracing_buffers_open(struct inode *inode, struct file *filp);
6813 static ssize_t tracing_buffers_read(struct file *filp, char __user *ubuf,
6814 				    size_t count, loff_t *ppos);
6815 static int tracing_buffers_release(struct inode *inode, struct file *file);
6816 static ssize_t tracing_buffers_splice_read(struct file *file, loff_t *ppos,
6817 		   struct pipe_inode_info *pipe, size_t len, unsigned int flags);
6818 
6819 static int snapshot_raw_open(struct inode *inode, struct file *filp)
6820 {
6821 	struct ftrace_buffer_info *info;
6822 	int ret;
6823 
6824 	/* The following checks for tracefs lockdown */
6825 	ret = tracing_buffers_open(inode, filp);
6826 	if (ret < 0)
6827 		return ret;
6828 
6829 	info = filp->private_data;
6830 
6831 	if (info->iter.trace->use_max_tr) {
6832 		tracing_buffers_release(inode, filp);
6833 		return -EBUSY;
6834 	}
6835 
6836 	info->iter.snapshot = true;
6837 	info->iter.trace_buffer = &info->iter.tr->max_buffer;
6838 
6839 	return ret;
6840 }
6841 
6842 #endif /* CONFIG_TRACER_SNAPSHOT */
6843 
6844 
6845 static const struct file_operations tracing_thresh_fops = {
6846 	.open		= tracing_open_generic,
6847 	.read		= tracing_thresh_read,
6848 	.write		= tracing_thresh_write,
6849 	.llseek		= generic_file_llseek,
6850 };
6851 
6852 #if defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)
6853 static const struct file_operations tracing_max_lat_fops = {
6854 	.open		= tracing_open_generic,
6855 	.read		= tracing_max_lat_read,
6856 	.write		= tracing_max_lat_write,
6857 	.llseek		= generic_file_llseek,
6858 };
6859 #endif
6860 
6861 static const struct file_operations set_tracer_fops = {
6862 	.open		= tracing_open_generic,
6863 	.read		= tracing_set_trace_read,
6864 	.write		= tracing_set_trace_write,
6865 	.llseek		= generic_file_llseek,
6866 };
6867 
6868 static const struct file_operations tracing_pipe_fops = {
6869 	.open		= tracing_open_pipe,
6870 	.poll		= tracing_poll_pipe,
6871 	.read		= tracing_read_pipe,
6872 	.splice_read	= tracing_splice_read_pipe,
6873 	.release	= tracing_release_pipe,
6874 	.llseek		= no_llseek,
6875 };
6876 
6877 static const struct file_operations tracing_entries_fops = {
6878 	.open		= tracing_open_generic_tr,
6879 	.read		= tracing_entries_read,
6880 	.write		= tracing_entries_write,
6881 	.llseek		= generic_file_llseek,
6882 	.release	= tracing_release_generic_tr,
6883 };
6884 
6885 static const struct file_operations tracing_total_entries_fops = {
6886 	.open		= tracing_open_generic_tr,
6887 	.read		= tracing_total_entries_read,
6888 	.llseek		= generic_file_llseek,
6889 	.release	= tracing_release_generic_tr,
6890 };
6891 
6892 static const struct file_operations tracing_free_buffer_fops = {
6893 	.open		= tracing_open_generic_tr,
6894 	.write		= tracing_free_buffer_write,
6895 	.release	= tracing_free_buffer_release,
6896 };
6897 
6898 static const struct file_operations tracing_mark_fops = {
6899 	.open		= tracing_open_generic_tr,
6900 	.write		= tracing_mark_write,
6901 	.llseek		= generic_file_llseek,
6902 	.release	= tracing_release_generic_tr,
6903 };
6904 
6905 static const struct file_operations tracing_mark_raw_fops = {
6906 	.open		= tracing_open_generic_tr,
6907 	.write		= tracing_mark_raw_write,
6908 	.llseek		= generic_file_llseek,
6909 	.release	= tracing_release_generic_tr,
6910 };
6911 
6912 static const struct file_operations trace_clock_fops = {
6913 	.open		= tracing_clock_open,
6914 	.read		= seq_read,
6915 	.llseek		= seq_lseek,
6916 	.release	= tracing_single_release_tr,
6917 	.write		= tracing_clock_write,
6918 };
6919 
6920 static const struct file_operations trace_time_stamp_mode_fops = {
6921 	.open		= tracing_time_stamp_mode_open,
6922 	.read		= seq_read,
6923 	.llseek		= seq_lseek,
6924 	.release	= tracing_single_release_tr,
6925 };
6926 
6927 #ifdef CONFIG_TRACER_SNAPSHOT
6928 static const struct file_operations snapshot_fops = {
6929 	.open		= tracing_snapshot_open,
6930 	.read		= seq_read,
6931 	.write		= tracing_snapshot_write,
6932 	.llseek		= tracing_lseek,
6933 	.release	= tracing_snapshot_release,
6934 };
6935 
6936 static const struct file_operations snapshot_raw_fops = {
6937 	.open		= snapshot_raw_open,
6938 	.read		= tracing_buffers_read,
6939 	.release	= tracing_buffers_release,
6940 	.splice_read	= tracing_buffers_splice_read,
6941 	.llseek		= no_llseek,
6942 };
6943 
6944 #endif /* CONFIG_TRACER_SNAPSHOT */
6945 
6946 #define TRACING_LOG_ERRS_MAX	8
6947 #define TRACING_LOG_LOC_MAX	128
6948 
6949 #define CMD_PREFIX "  Command: "
6950 
6951 struct err_info {
6952 	const char	**errs;	/* ptr to loc-specific array of err strings */
6953 	u8		type;	/* index into errs -> specific err string */
6954 	u8		pos;	/* MAX_FILTER_STR_VAL = 256 */
6955 	u64		ts;
6956 };
6957 
6958 struct tracing_log_err {
6959 	struct list_head	list;
6960 	struct err_info		info;
6961 	char			loc[TRACING_LOG_LOC_MAX]; /* err location */
6962 	char			cmd[MAX_FILTER_STR_VAL]; /* what caused err */
6963 };
6964 
6965 static DEFINE_MUTEX(tracing_err_log_lock);
6966 
6967 static struct tracing_log_err *get_tracing_log_err(struct trace_array *tr)
6968 {
6969 	struct tracing_log_err *err;
6970 
6971 	if (tr->n_err_log_entries < TRACING_LOG_ERRS_MAX) {
6972 		err = kzalloc(sizeof(*err), GFP_KERNEL);
6973 		if (!err)
6974 			err = ERR_PTR(-ENOMEM);
6975 		tr->n_err_log_entries++;
6976 
6977 		return err;
6978 	}
6979 
6980 	err = list_first_entry(&tr->err_log, struct tracing_log_err, list);
6981 	list_del(&err->list);
6982 
6983 	return err;
6984 }
6985 
6986 /**
6987  * err_pos - find the position of a string within a command for error careting
6988  * @cmd: The tracing command that caused the error
6989  * @str: The string to position the caret at within @cmd
6990  *
6991  * Finds the position of the first occurence of @str within @cmd.  The
6992  * return value can be passed to tracing_log_err() for caret placement
6993  * within @cmd.
6994  *
6995  * Returns the index within @cmd of the first occurence of @str or 0
6996  * if @str was not found.
6997  */
6998 unsigned int err_pos(char *cmd, const char *str)
6999 {
7000 	char *found;
7001 
7002 	if (WARN_ON(!strlen(cmd)))
7003 		return 0;
7004 
7005 	found = strstr(cmd, str);
7006 	if (found)
7007 		return found - cmd;
7008 
7009 	return 0;
7010 }
7011 
7012 /**
7013  * tracing_log_err - write an error to the tracing error log
7014  * @tr: The associated trace array for the error (NULL for top level array)
7015  * @loc: A string describing where the error occurred
7016  * @cmd: The tracing command that caused the error
7017  * @errs: The array of loc-specific static error strings
7018  * @type: The index into errs[], which produces the specific static err string
7019  * @pos: The position the caret should be placed in the cmd
7020  *
7021  * Writes an error into tracing/error_log of the form:
7022  *
7023  * <loc>: error: <text>
7024  *   Command: <cmd>
7025  *              ^
7026  *
7027  * tracing/error_log is a small log file containing the last
7028  * TRACING_LOG_ERRS_MAX errors (8).  Memory for errors isn't allocated
7029  * unless there has been a tracing error, and the error log can be
7030  * cleared and have its memory freed by writing the empty string in
7031  * truncation mode to it i.e. echo > tracing/error_log.
7032  *
7033  * NOTE: the @errs array along with the @type param are used to
7034  * produce a static error string - this string is not copied and saved
7035  * when the error is logged - only a pointer to it is saved.  See
7036  * existing callers for examples of how static strings are typically
7037  * defined for use with tracing_log_err().
7038  */
7039 void tracing_log_err(struct trace_array *tr,
7040 		     const char *loc, const char *cmd,
7041 		     const char **errs, u8 type, u8 pos)
7042 {
7043 	struct tracing_log_err *err;
7044 
7045 	if (!tr)
7046 		tr = &global_trace;
7047 
7048 	mutex_lock(&tracing_err_log_lock);
7049 	err = get_tracing_log_err(tr);
7050 	if (PTR_ERR(err) == -ENOMEM) {
7051 		mutex_unlock(&tracing_err_log_lock);
7052 		return;
7053 	}
7054 
7055 	snprintf(err->loc, TRACING_LOG_LOC_MAX, "%s: error: ", loc);
7056 	snprintf(err->cmd, MAX_FILTER_STR_VAL,"\n" CMD_PREFIX "%s\n", cmd);
7057 
7058 	err->info.errs = errs;
7059 	err->info.type = type;
7060 	err->info.pos = pos;
7061 	err->info.ts = local_clock();
7062 
7063 	list_add_tail(&err->list, &tr->err_log);
7064 	mutex_unlock(&tracing_err_log_lock);
7065 }
7066 
7067 static void clear_tracing_err_log(struct trace_array *tr)
7068 {
7069 	struct tracing_log_err *err, *next;
7070 
7071 	mutex_lock(&tracing_err_log_lock);
7072 	list_for_each_entry_safe(err, next, &tr->err_log, list) {
7073 		list_del(&err->list);
7074 		kfree(err);
7075 	}
7076 
7077 	tr->n_err_log_entries = 0;
7078 	mutex_unlock(&tracing_err_log_lock);
7079 }
7080 
7081 static void *tracing_err_log_seq_start(struct seq_file *m, loff_t *pos)
7082 {
7083 	struct trace_array *tr = m->private;
7084 
7085 	mutex_lock(&tracing_err_log_lock);
7086 
7087 	return seq_list_start(&tr->err_log, *pos);
7088 }
7089 
7090 static void *tracing_err_log_seq_next(struct seq_file *m, void *v, loff_t *pos)
7091 {
7092 	struct trace_array *tr = m->private;
7093 
7094 	return seq_list_next(v, &tr->err_log, pos);
7095 }
7096 
7097 static void tracing_err_log_seq_stop(struct seq_file *m, void *v)
7098 {
7099 	mutex_unlock(&tracing_err_log_lock);
7100 }
7101 
7102 static void tracing_err_log_show_pos(struct seq_file *m, u8 pos)
7103 {
7104 	u8 i;
7105 
7106 	for (i = 0; i < sizeof(CMD_PREFIX) - 1; i++)
7107 		seq_putc(m, ' ');
7108 	for (i = 0; i < pos; i++)
7109 		seq_putc(m, ' ');
7110 	seq_puts(m, "^\n");
7111 }
7112 
7113 static int tracing_err_log_seq_show(struct seq_file *m, void *v)
7114 {
7115 	struct tracing_log_err *err = v;
7116 
7117 	if (err) {
7118 		const char *err_text = err->info.errs[err->info.type];
7119 		u64 sec = err->info.ts;
7120 		u32 nsec;
7121 
7122 		nsec = do_div(sec, NSEC_PER_SEC);
7123 		seq_printf(m, "[%5llu.%06u] %s%s", sec, nsec / 1000,
7124 			   err->loc, err_text);
7125 		seq_printf(m, "%s", err->cmd);
7126 		tracing_err_log_show_pos(m, err->info.pos);
7127 	}
7128 
7129 	return 0;
7130 }
7131 
7132 static const struct seq_operations tracing_err_log_seq_ops = {
7133 	.start  = tracing_err_log_seq_start,
7134 	.next   = tracing_err_log_seq_next,
7135 	.stop   = tracing_err_log_seq_stop,
7136 	.show   = tracing_err_log_seq_show
7137 };
7138 
7139 static int tracing_err_log_open(struct inode *inode, struct file *file)
7140 {
7141 	struct trace_array *tr = inode->i_private;
7142 	int ret = 0;
7143 
7144 	ret = tracing_check_open_get_tr(tr);
7145 	if (ret)
7146 		return ret;
7147 
7148 	/* If this file was opened for write, then erase contents */
7149 	if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC))
7150 		clear_tracing_err_log(tr);
7151 
7152 	if (file->f_mode & FMODE_READ) {
7153 		ret = seq_open(file, &tracing_err_log_seq_ops);
7154 		if (!ret) {
7155 			struct seq_file *m = file->private_data;
7156 			m->private = tr;
7157 		} else {
7158 			trace_array_put(tr);
7159 		}
7160 	}
7161 	return ret;
7162 }
7163 
7164 static ssize_t tracing_err_log_write(struct file *file,
7165 				     const char __user *buffer,
7166 				     size_t count, loff_t *ppos)
7167 {
7168 	return count;
7169 }
7170 
7171 static int tracing_err_log_release(struct inode *inode, struct file *file)
7172 {
7173 	struct trace_array *tr = inode->i_private;
7174 
7175 	trace_array_put(tr);
7176 
7177 	if (file->f_mode & FMODE_READ)
7178 		seq_release(inode, file);
7179 
7180 	return 0;
7181 }
7182 
7183 static const struct file_operations tracing_err_log_fops = {
7184 	.open           = tracing_err_log_open,
7185 	.write		= tracing_err_log_write,
7186 	.read           = seq_read,
7187 	.llseek         = seq_lseek,
7188 	.release        = tracing_err_log_release,
7189 };
7190 
7191 static int tracing_buffers_open(struct inode *inode, struct file *filp)
7192 {
7193 	struct trace_array *tr = inode->i_private;
7194 	struct ftrace_buffer_info *info;
7195 	int ret;
7196 
7197 	ret = tracing_check_open_get_tr(tr);
7198 	if (ret)
7199 		return ret;
7200 
7201 	info = kzalloc(sizeof(*info), GFP_KERNEL);
7202 	if (!info) {
7203 		trace_array_put(tr);
7204 		return -ENOMEM;
7205 	}
7206 
7207 	mutex_lock(&trace_types_lock);
7208 
7209 	info->iter.tr		= tr;
7210 	info->iter.cpu_file	= tracing_get_cpu(inode);
7211 	info->iter.trace	= tr->current_trace;
7212 	info->iter.trace_buffer = &tr->trace_buffer;
7213 	info->spare		= NULL;
7214 	/* Force reading ring buffer for first read */
7215 	info->read		= (unsigned int)-1;
7216 
7217 	filp->private_data = info;
7218 
7219 	tr->current_trace->ref++;
7220 
7221 	mutex_unlock(&trace_types_lock);
7222 
7223 	ret = nonseekable_open(inode, filp);
7224 	if (ret < 0)
7225 		trace_array_put(tr);
7226 
7227 	return ret;
7228 }
7229 
7230 static __poll_t
7231 tracing_buffers_poll(struct file *filp, poll_table *poll_table)
7232 {
7233 	struct ftrace_buffer_info *info = filp->private_data;
7234 	struct trace_iterator *iter = &info->iter;
7235 
7236 	return trace_poll(iter, filp, poll_table);
7237 }
7238 
7239 static ssize_t
7240 tracing_buffers_read(struct file *filp, char __user *ubuf,
7241 		     size_t count, loff_t *ppos)
7242 {
7243 	struct ftrace_buffer_info *info = filp->private_data;
7244 	struct trace_iterator *iter = &info->iter;
7245 	ssize_t ret = 0;
7246 	ssize_t size;
7247 
7248 	if (!count)
7249 		return 0;
7250 
7251 #ifdef CONFIG_TRACER_MAX_TRACE
7252 	if (iter->snapshot && iter->tr->current_trace->use_max_tr)
7253 		return -EBUSY;
7254 #endif
7255 
7256 	if (!info->spare) {
7257 		info->spare = ring_buffer_alloc_read_page(iter->trace_buffer->buffer,
7258 							  iter->cpu_file);
7259 		if (IS_ERR(info->spare)) {
7260 			ret = PTR_ERR(info->spare);
7261 			info->spare = NULL;
7262 		} else {
7263 			info->spare_cpu = iter->cpu_file;
7264 		}
7265 	}
7266 	if (!info->spare)
7267 		return ret;
7268 
7269 	/* Do we have previous read data to read? */
7270 	if (info->read < PAGE_SIZE)
7271 		goto read;
7272 
7273  again:
7274 	trace_access_lock(iter->cpu_file);
7275 	ret = ring_buffer_read_page(iter->trace_buffer->buffer,
7276 				    &info->spare,
7277 				    count,
7278 				    iter->cpu_file, 0);
7279 	trace_access_unlock(iter->cpu_file);
7280 
7281 	if (ret < 0) {
7282 		if (trace_empty(iter)) {
7283 			if ((filp->f_flags & O_NONBLOCK))
7284 				return -EAGAIN;
7285 
7286 			ret = wait_on_pipe(iter, 0);
7287 			if (ret)
7288 				return ret;
7289 
7290 			goto again;
7291 		}
7292 		return 0;
7293 	}
7294 
7295 	info->read = 0;
7296  read:
7297 	size = PAGE_SIZE - info->read;
7298 	if (size > count)
7299 		size = count;
7300 
7301 	ret = copy_to_user(ubuf, info->spare + info->read, size);
7302 	if (ret == size)
7303 		return -EFAULT;
7304 
7305 	size -= ret;
7306 
7307 	*ppos += size;
7308 	info->read += size;
7309 
7310 	return size;
7311 }
7312 
7313 static int tracing_buffers_release(struct inode *inode, struct file *file)
7314 {
7315 	struct ftrace_buffer_info *info = file->private_data;
7316 	struct trace_iterator *iter = &info->iter;
7317 
7318 	mutex_lock(&trace_types_lock);
7319 
7320 	iter->tr->current_trace->ref--;
7321 
7322 	__trace_array_put(iter->tr);
7323 
7324 	if (info->spare)
7325 		ring_buffer_free_read_page(iter->trace_buffer->buffer,
7326 					   info->spare_cpu, info->spare);
7327 	kfree(info);
7328 
7329 	mutex_unlock(&trace_types_lock);
7330 
7331 	return 0;
7332 }
7333 
7334 struct buffer_ref {
7335 	struct ring_buffer	*buffer;
7336 	void			*page;
7337 	int			cpu;
7338 	refcount_t		refcount;
7339 };
7340 
7341 static void buffer_ref_release(struct buffer_ref *ref)
7342 {
7343 	if (!refcount_dec_and_test(&ref->refcount))
7344 		return;
7345 	ring_buffer_free_read_page(ref->buffer, ref->cpu, ref->page);
7346 	kfree(ref);
7347 }
7348 
7349 static void buffer_pipe_buf_release(struct pipe_inode_info *pipe,
7350 				    struct pipe_buffer *buf)
7351 {
7352 	struct buffer_ref *ref = (struct buffer_ref *)buf->private;
7353 
7354 	buffer_ref_release(ref);
7355 	buf->private = 0;
7356 }
7357 
7358 static bool buffer_pipe_buf_get(struct pipe_inode_info *pipe,
7359 				struct pipe_buffer *buf)
7360 {
7361 	struct buffer_ref *ref = (struct buffer_ref *)buf->private;
7362 
7363 	if (refcount_read(&ref->refcount) > INT_MAX/2)
7364 		return false;
7365 
7366 	refcount_inc(&ref->refcount);
7367 	return true;
7368 }
7369 
7370 /* Pipe buffer operations for a buffer. */
7371 static const struct pipe_buf_operations buffer_pipe_buf_ops = {
7372 	.confirm		= generic_pipe_buf_confirm,
7373 	.release		= buffer_pipe_buf_release,
7374 	.steal			= generic_pipe_buf_nosteal,
7375 	.get			= buffer_pipe_buf_get,
7376 };
7377 
7378 /*
7379  * Callback from splice_to_pipe(), if we need to release some pages
7380  * at the end of the spd in case we error'ed out in filling the pipe.
7381  */
7382 static void buffer_spd_release(struct splice_pipe_desc *spd, unsigned int i)
7383 {
7384 	struct buffer_ref *ref =
7385 		(struct buffer_ref *)spd->partial[i].private;
7386 
7387 	buffer_ref_release(ref);
7388 	spd->partial[i].private = 0;
7389 }
7390 
7391 static ssize_t
7392 tracing_buffers_splice_read(struct file *file, loff_t *ppos,
7393 			    struct pipe_inode_info *pipe, size_t len,
7394 			    unsigned int flags)
7395 {
7396 	struct ftrace_buffer_info *info = file->private_data;
7397 	struct trace_iterator *iter = &info->iter;
7398 	struct partial_page partial_def[PIPE_DEF_BUFFERS];
7399 	struct page *pages_def[PIPE_DEF_BUFFERS];
7400 	struct splice_pipe_desc spd = {
7401 		.pages		= pages_def,
7402 		.partial	= partial_def,
7403 		.nr_pages_max	= PIPE_DEF_BUFFERS,
7404 		.ops		= &buffer_pipe_buf_ops,
7405 		.spd_release	= buffer_spd_release,
7406 	};
7407 	struct buffer_ref *ref;
7408 	int entries, i;
7409 	ssize_t ret = 0;
7410 
7411 #ifdef CONFIG_TRACER_MAX_TRACE
7412 	if (iter->snapshot && iter->tr->current_trace->use_max_tr)
7413 		return -EBUSY;
7414 #endif
7415 
7416 	if (*ppos & (PAGE_SIZE - 1))
7417 		return -EINVAL;
7418 
7419 	if (len & (PAGE_SIZE - 1)) {
7420 		if (len < PAGE_SIZE)
7421 			return -EINVAL;
7422 		len &= PAGE_MASK;
7423 	}
7424 
7425 	if (splice_grow_spd(pipe, &spd))
7426 		return -ENOMEM;
7427 
7428  again:
7429 	trace_access_lock(iter->cpu_file);
7430 	entries = ring_buffer_entries_cpu(iter->trace_buffer->buffer, iter->cpu_file);
7431 
7432 	for (i = 0; i < spd.nr_pages_max && len && entries; i++, len -= PAGE_SIZE) {
7433 		struct page *page;
7434 		int r;
7435 
7436 		ref = kzalloc(sizeof(*ref), GFP_KERNEL);
7437 		if (!ref) {
7438 			ret = -ENOMEM;
7439 			break;
7440 		}
7441 
7442 		refcount_set(&ref->refcount, 1);
7443 		ref->buffer = iter->trace_buffer->buffer;
7444 		ref->page = ring_buffer_alloc_read_page(ref->buffer, iter->cpu_file);
7445 		if (IS_ERR(ref->page)) {
7446 			ret = PTR_ERR(ref->page);
7447 			ref->page = NULL;
7448 			kfree(ref);
7449 			break;
7450 		}
7451 		ref->cpu = iter->cpu_file;
7452 
7453 		r = ring_buffer_read_page(ref->buffer, &ref->page,
7454 					  len, iter->cpu_file, 1);
7455 		if (r < 0) {
7456 			ring_buffer_free_read_page(ref->buffer, ref->cpu,
7457 						   ref->page);
7458 			kfree(ref);
7459 			break;
7460 		}
7461 
7462 		page = virt_to_page(ref->page);
7463 
7464 		spd.pages[i] = page;
7465 		spd.partial[i].len = PAGE_SIZE;
7466 		spd.partial[i].offset = 0;
7467 		spd.partial[i].private = (unsigned long)ref;
7468 		spd.nr_pages++;
7469 		*ppos += PAGE_SIZE;
7470 
7471 		entries = ring_buffer_entries_cpu(iter->trace_buffer->buffer, iter->cpu_file);
7472 	}
7473 
7474 	trace_access_unlock(iter->cpu_file);
7475 	spd.nr_pages = i;
7476 
7477 	/* did we read anything? */
7478 	if (!spd.nr_pages) {
7479 		if (ret)
7480 			goto out;
7481 
7482 		ret = -EAGAIN;
7483 		if ((file->f_flags & O_NONBLOCK) || (flags & SPLICE_F_NONBLOCK))
7484 			goto out;
7485 
7486 		ret = wait_on_pipe(iter, iter->tr->buffer_percent);
7487 		if (ret)
7488 			goto out;
7489 
7490 		goto again;
7491 	}
7492 
7493 	ret = splice_to_pipe(pipe, &spd);
7494 out:
7495 	splice_shrink_spd(&spd);
7496 
7497 	return ret;
7498 }
7499 
7500 static const struct file_operations tracing_buffers_fops = {
7501 	.open		= tracing_buffers_open,
7502 	.read		= tracing_buffers_read,
7503 	.poll		= tracing_buffers_poll,
7504 	.release	= tracing_buffers_release,
7505 	.splice_read	= tracing_buffers_splice_read,
7506 	.llseek		= no_llseek,
7507 };
7508 
7509 static ssize_t
7510 tracing_stats_read(struct file *filp, char __user *ubuf,
7511 		   size_t count, loff_t *ppos)
7512 {
7513 	struct inode *inode = file_inode(filp);
7514 	struct trace_array *tr = inode->i_private;
7515 	struct trace_buffer *trace_buf = &tr->trace_buffer;
7516 	int cpu = tracing_get_cpu(inode);
7517 	struct trace_seq *s;
7518 	unsigned long cnt;
7519 	unsigned long long t;
7520 	unsigned long usec_rem;
7521 
7522 	s = kmalloc(sizeof(*s), GFP_KERNEL);
7523 	if (!s)
7524 		return -ENOMEM;
7525 
7526 	trace_seq_init(s);
7527 
7528 	cnt = ring_buffer_entries_cpu(trace_buf->buffer, cpu);
7529 	trace_seq_printf(s, "entries: %ld\n", cnt);
7530 
7531 	cnt = ring_buffer_overrun_cpu(trace_buf->buffer, cpu);
7532 	trace_seq_printf(s, "overrun: %ld\n", cnt);
7533 
7534 	cnt = ring_buffer_commit_overrun_cpu(trace_buf->buffer, cpu);
7535 	trace_seq_printf(s, "commit overrun: %ld\n", cnt);
7536 
7537 	cnt = ring_buffer_bytes_cpu(trace_buf->buffer, cpu);
7538 	trace_seq_printf(s, "bytes: %ld\n", cnt);
7539 
7540 	if (trace_clocks[tr->clock_id].in_ns) {
7541 		/* local or global for trace_clock */
7542 		t = ns2usecs(ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
7543 		usec_rem = do_div(t, USEC_PER_SEC);
7544 		trace_seq_printf(s, "oldest event ts: %5llu.%06lu\n",
7545 								t, usec_rem);
7546 
7547 		t = ns2usecs(ring_buffer_time_stamp(trace_buf->buffer, cpu));
7548 		usec_rem = do_div(t, USEC_PER_SEC);
7549 		trace_seq_printf(s, "now ts: %5llu.%06lu\n", t, usec_rem);
7550 	} else {
7551 		/* counter or tsc mode for trace_clock */
7552 		trace_seq_printf(s, "oldest event ts: %llu\n",
7553 				ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
7554 
7555 		trace_seq_printf(s, "now ts: %llu\n",
7556 				ring_buffer_time_stamp(trace_buf->buffer, cpu));
7557 	}
7558 
7559 	cnt = ring_buffer_dropped_events_cpu(trace_buf->buffer, cpu);
7560 	trace_seq_printf(s, "dropped events: %ld\n", cnt);
7561 
7562 	cnt = ring_buffer_read_events_cpu(trace_buf->buffer, cpu);
7563 	trace_seq_printf(s, "read events: %ld\n", cnt);
7564 
7565 	count = simple_read_from_buffer(ubuf, count, ppos,
7566 					s->buffer, trace_seq_used(s));
7567 
7568 	kfree(s);
7569 
7570 	return count;
7571 }
7572 
7573 static const struct file_operations tracing_stats_fops = {
7574 	.open		= tracing_open_generic_tr,
7575 	.read		= tracing_stats_read,
7576 	.llseek		= generic_file_llseek,
7577 	.release	= tracing_release_generic_tr,
7578 };
7579 
7580 #ifdef CONFIG_DYNAMIC_FTRACE
7581 
7582 static ssize_t
7583 tracing_read_dyn_info(struct file *filp, char __user *ubuf,
7584 		  size_t cnt, loff_t *ppos)
7585 {
7586 	unsigned long *p = filp->private_data;
7587 	char buf[64]; /* Not too big for a shallow stack */
7588 	int r;
7589 
7590 	r = scnprintf(buf, 63, "%ld", *p);
7591 	buf[r++] = '\n';
7592 
7593 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
7594 }
7595 
7596 static const struct file_operations tracing_dyn_info_fops = {
7597 	.open		= tracing_open_generic,
7598 	.read		= tracing_read_dyn_info,
7599 	.llseek		= generic_file_llseek,
7600 };
7601 #endif /* CONFIG_DYNAMIC_FTRACE */
7602 
7603 #if defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE)
7604 static void
7605 ftrace_snapshot(unsigned long ip, unsigned long parent_ip,
7606 		struct trace_array *tr, struct ftrace_probe_ops *ops,
7607 		void *data)
7608 {
7609 	tracing_snapshot_instance(tr);
7610 }
7611 
7612 static void
7613 ftrace_count_snapshot(unsigned long ip, unsigned long parent_ip,
7614 		      struct trace_array *tr, struct ftrace_probe_ops *ops,
7615 		      void *data)
7616 {
7617 	struct ftrace_func_mapper *mapper = data;
7618 	long *count = NULL;
7619 
7620 	if (mapper)
7621 		count = (long *)ftrace_func_mapper_find_ip(mapper, ip);
7622 
7623 	if (count) {
7624 
7625 		if (*count <= 0)
7626 			return;
7627 
7628 		(*count)--;
7629 	}
7630 
7631 	tracing_snapshot_instance(tr);
7632 }
7633 
7634 static int
7635 ftrace_snapshot_print(struct seq_file *m, unsigned long ip,
7636 		      struct ftrace_probe_ops *ops, void *data)
7637 {
7638 	struct ftrace_func_mapper *mapper = data;
7639 	long *count = NULL;
7640 
7641 	seq_printf(m, "%ps:", (void *)ip);
7642 
7643 	seq_puts(m, "snapshot");
7644 
7645 	if (mapper)
7646 		count = (long *)ftrace_func_mapper_find_ip(mapper, ip);
7647 
7648 	if (count)
7649 		seq_printf(m, ":count=%ld\n", *count);
7650 	else
7651 		seq_puts(m, ":unlimited\n");
7652 
7653 	return 0;
7654 }
7655 
7656 static int
7657 ftrace_snapshot_init(struct ftrace_probe_ops *ops, struct trace_array *tr,
7658 		     unsigned long ip, void *init_data, void **data)
7659 {
7660 	struct ftrace_func_mapper *mapper = *data;
7661 
7662 	if (!mapper) {
7663 		mapper = allocate_ftrace_func_mapper();
7664 		if (!mapper)
7665 			return -ENOMEM;
7666 		*data = mapper;
7667 	}
7668 
7669 	return ftrace_func_mapper_add_ip(mapper, ip, init_data);
7670 }
7671 
7672 static void
7673 ftrace_snapshot_free(struct ftrace_probe_ops *ops, struct trace_array *tr,
7674 		     unsigned long ip, void *data)
7675 {
7676 	struct ftrace_func_mapper *mapper = data;
7677 
7678 	if (!ip) {
7679 		if (!mapper)
7680 			return;
7681 		free_ftrace_func_mapper(mapper, NULL);
7682 		return;
7683 	}
7684 
7685 	ftrace_func_mapper_remove_ip(mapper, ip);
7686 }
7687 
7688 static struct ftrace_probe_ops snapshot_probe_ops = {
7689 	.func			= ftrace_snapshot,
7690 	.print			= ftrace_snapshot_print,
7691 };
7692 
7693 static struct ftrace_probe_ops snapshot_count_probe_ops = {
7694 	.func			= ftrace_count_snapshot,
7695 	.print			= ftrace_snapshot_print,
7696 	.init			= ftrace_snapshot_init,
7697 	.free			= ftrace_snapshot_free,
7698 };
7699 
7700 static int
7701 ftrace_trace_snapshot_callback(struct trace_array *tr, struct ftrace_hash *hash,
7702 			       char *glob, char *cmd, char *param, int enable)
7703 {
7704 	struct ftrace_probe_ops *ops;
7705 	void *count = (void *)-1;
7706 	char *number;
7707 	int ret;
7708 
7709 	if (!tr)
7710 		return -ENODEV;
7711 
7712 	/* hash funcs only work with set_ftrace_filter */
7713 	if (!enable)
7714 		return -EINVAL;
7715 
7716 	ops = param ? &snapshot_count_probe_ops :  &snapshot_probe_ops;
7717 
7718 	if (glob[0] == '!')
7719 		return unregister_ftrace_function_probe_func(glob+1, tr, ops);
7720 
7721 	if (!param)
7722 		goto out_reg;
7723 
7724 	number = strsep(&param, ":");
7725 
7726 	if (!strlen(number))
7727 		goto out_reg;
7728 
7729 	/*
7730 	 * We use the callback data field (which is a pointer)
7731 	 * as our counter.
7732 	 */
7733 	ret = kstrtoul(number, 0, (unsigned long *)&count);
7734 	if (ret)
7735 		return ret;
7736 
7737  out_reg:
7738 	ret = tracing_alloc_snapshot_instance(tr);
7739 	if (ret < 0)
7740 		goto out;
7741 
7742 	ret = register_ftrace_function_probe(glob, tr, ops, count);
7743 
7744  out:
7745 	return ret < 0 ? ret : 0;
7746 }
7747 
7748 static struct ftrace_func_command ftrace_snapshot_cmd = {
7749 	.name			= "snapshot",
7750 	.func			= ftrace_trace_snapshot_callback,
7751 };
7752 
7753 static __init int register_snapshot_cmd(void)
7754 {
7755 	return register_ftrace_command(&ftrace_snapshot_cmd);
7756 }
7757 #else
7758 static inline __init int register_snapshot_cmd(void) { return 0; }
7759 #endif /* defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE) */
7760 
7761 static struct dentry *tracing_get_dentry(struct trace_array *tr)
7762 {
7763 	if (WARN_ON(!tr->dir))
7764 		return ERR_PTR(-ENODEV);
7765 
7766 	/* Top directory uses NULL as the parent */
7767 	if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
7768 		return NULL;
7769 
7770 	/* All sub buffers have a descriptor */
7771 	return tr->dir;
7772 }
7773 
7774 static struct dentry *tracing_dentry_percpu(struct trace_array *tr, int cpu)
7775 {
7776 	struct dentry *d_tracer;
7777 
7778 	if (tr->percpu_dir)
7779 		return tr->percpu_dir;
7780 
7781 	d_tracer = tracing_get_dentry(tr);
7782 	if (IS_ERR(d_tracer))
7783 		return NULL;
7784 
7785 	tr->percpu_dir = tracefs_create_dir("per_cpu", d_tracer);
7786 
7787 	WARN_ONCE(!tr->percpu_dir,
7788 		  "Could not create tracefs directory 'per_cpu/%d'\n", cpu);
7789 
7790 	return tr->percpu_dir;
7791 }
7792 
7793 static struct dentry *
7794 trace_create_cpu_file(const char *name, umode_t mode, struct dentry *parent,
7795 		      void *data, long cpu, const struct file_operations *fops)
7796 {
7797 	struct dentry *ret = trace_create_file(name, mode, parent, data, fops);
7798 
7799 	if (ret) /* See tracing_get_cpu() */
7800 		d_inode(ret)->i_cdev = (void *)(cpu + 1);
7801 	return ret;
7802 }
7803 
7804 static void
7805 tracing_init_tracefs_percpu(struct trace_array *tr, long cpu)
7806 {
7807 	struct dentry *d_percpu = tracing_dentry_percpu(tr, cpu);
7808 	struct dentry *d_cpu;
7809 	char cpu_dir[30]; /* 30 characters should be more than enough */
7810 
7811 	if (!d_percpu)
7812 		return;
7813 
7814 	snprintf(cpu_dir, 30, "cpu%ld", cpu);
7815 	d_cpu = tracefs_create_dir(cpu_dir, d_percpu);
7816 	if (!d_cpu) {
7817 		pr_warn("Could not create tracefs '%s' entry\n", cpu_dir);
7818 		return;
7819 	}
7820 
7821 	/* per cpu trace_pipe */
7822 	trace_create_cpu_file("trace_pipe", 0444, d_cpu,
7823 				tr, cpu, &tracing_pipe_fops);
7824 
7825 	/* per cpu trace */
7826 	trace_create_cpu_file("trace", 0644, d_cpu,
7827 				tr, cpu, &tracing_fops);
7828 
7829 	trace_create_cpu_file("trace_pipe_raw", 0444, d_cpu,
7830 				tr, cpu, &tracing_buffers_fops);
7831 
7832 	trace_create_cpu_file("stats", 0444, d_cpu,
7833 				tr, cpu, &tracing_stats_fops);
7834 
7835 	trace_create_cpu_file("buffer_size_kb", 0444, d_cpu,
7836 				tr, cpu, &tracing_entries_fops);
7837 
7838 #ifdef CONFIG_TRACER_SNAPSHOT
7839 	trace_create_cpu_file("snapshot", 0644, d_cpu,
7840 				tr, cpu, &snapshot_fops);
7841 
7842 	trace_create_cpu_file("snapshot_raw", 0444, d_cpu,
7843 				tr, cpu, &snapshot_raw_fops);
7844 #endif
7845 }
7846 
7847 #ifdef CONFIG_FTRACE_SELFTEST
7848 /* Let selftest have access to static functions in this file */
7849 #include "trace_selftest.c"
7850 #endif
7851 
7852 static ssize_t
7853 trace_options_read(struct file *filp, char __user *ubuf, size_t cnt,
7854 			loff_t *ppos)
7855 {
7856 	struct trace_option_dentry *topt = filp->private_data;
7857 	char *buf;
7858 
7859 	if (topt->flags->val & topt->opt->bit)
7860 		buf = "1\n";
7861 	else
7862 		buf = "0\n";
7863 
7864 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
7865 }
7866 
7867 static ssize_t
7868 trace_options_write(struct file *filp, const char __user *ubuf, size_t cnt,
7869 			 loff_t *ppos)
7870 {
7871 	struct trace_option_dentry *topt = filp->private_data;
7872 	unsigned long val;
7873 	int ret;
7874 
7875 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
7876 	if (ret)
7877 		return ret;
7878 
7879 	if (val != 0 && val != 1)
7880 		return -EINVAL;
7881 
7882 	if (!!(topt->flags->val & topt->opt->bit) != val) {
7883 		mutex_lock(&trace_types_lock);
7884 		ret = __set_tracer_option(topt->tr, topt->flags,
7885 					  topt->opt, !val);
7886 		mutex_unlock(&trace_types_lock);
7887 		if (ret)
7888 			return ret;
7889 	}
7890 
7891 	*ppos += cnt;
7892 
7893 	return cnt;
7894 }
7895 
7896 
7897 static const struct file_operations trace_options_fops = {
7898 	.open = tracing_open_generic,
7899 	.read = trace_options_read,
7900 	.write = trace_options_write,
7901 	.llseek	= generic_file_llseek,
7902 };
7903 
7904 /*
7905  * In order to pass in both the trace_array descriptor as well as the index
7906  * to the flag that the trace option file represents, the trace_array
7907  * has a character array of trace_flags_index[], which holds the index
7908  * of the bit for the flag it represents. index[0] == 0, index[1] == 1, etc.
7909  * The address of this character array is passed to the flag option file
7910  * read/write callbacks.
7911  *
7912  * In order to extract both the index and the trace_array descriptor,
7913  * get_tr_index() uses the following algorithm.
7914  *
7915  *   idx = *ptr;
7916  *
7917  * As the pointer itself contains the address of the index (remember
7918  * index[1] == 1).
7919  *
7920  * Then to get the trace_array descriptor, by subtracting that index
7921  * from the ptr, we get to the start of the index itself.
7922  *
7923  *   ptr - idx == &index[0]
7924  *
7925  * Then a simple container_of() from that pointer gets us to the
7926  * trace_array descriptor.
7927  */
7928 static void get_tr_index(void *data, struct trace_array **ptr,
7929 			 unsigned int *pindex)
7930 {
7931 	*pindex = *(unsigned char *)data;
7932 
7933 	*ptr = container_of(data - *pindex, struct trace_array,
7934 			    trace_flags_index);
7935 }
7936 
7937 static ssize_t
7938 trace_options_core_read(struct file *filp, char __user *ubuf, size_t cnt,
7939 			loff_t *ppos)
7940 {
7941 	void *tr_index = filp->private_data;
7942 	struct trace_array *tr;
7943 	unsigned int index;
7944 	char *buf;
7945 
7946 	get_tr_index(tr_index, &tr, &index);
7947 
7948 	if (tr->trace_flags & (1 << index))
7949 		buf = "1\n";
7950 	else
7951 		buf = "0\n";
7952 
7953 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
7954 }
7955 
7956 static ssize_t
7957 trace_options_core_write(struct file *filp, const char __user *ubuf, size_t cnt,
7958 			 loff_t *ppos)
7959 {
7960 	void *tr_index = filp->private_data;
7961 	struct trace_array *tr;
7962 	unsigned int index;
7963 	unsigned long val;
7964 	int ret;
7965 
7966 	get_tr_index(tr_index, &tr, &index);
7967 
7968 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
7969 	if (ret)
7970 		return ret;
7971 
7972 	if (val != 0 && val != 1)
7973 		return -EINVAL;
7974 
7975 	mutex_lock(&trace_types_lock);
7976 	ret = set_tracer_flag(tr, 1 << index, val);
7977 	mutex_unlock(&trace_types_lock);
7978 
7979 	if (ret < 0)
7980 		return ret;
7981 
7982 	*ppos += cnt;
7983 
7984 	return cnt;
7985 }
7986 
7987 static const struct file_operations trace_options_core_fops = {
7988 	.open = tracing_open_generic,
7989 	.read = trace_options_core_read,
7990 	.write = trace_options_core_write,
7991 	.llseek = generic_file_llseek,
7992 };
7993 
7994 struct dentry *trace_create_file(const char *name,
7995 				 umode_t mode,
7996 				 struct dentry *parent,
7997 				 void *data,
7998 				 const struct file_operations *fops)
7999 {
8000 	struct dentry *ret;
8001 
8002 	ret = tracefs_create_file(name, mode, parent, data, fops);
8003 	if (!ret)
8004 		pr_warn("Could not create tracefs '%s' entry\n", name);
8005 
8006 	return ret;
8007 }
8008 
8009 
8010 static struct dentry *trace_options_init_dentry(struct trace_array *tr)
8011 {
8012 	struct dentry *d_tracer;
8013 
8014 	if (tr->options)
8015 		return tr->options;
8016 
8017 	d_tracer = tracing_get_dentry(tr);
8018 	if (IS_ERR(d_tracer))
8019 		return NULL;
8020 
8021 	tr->options = tracefs_create_dir("options", d_tracer);
8022 	if (!tr->options) {
8023 		pr_warn("Could not create tracefs directory 'options'\n");
8024 		return NULL;
8025 	}
8026 
8027 	return tr->options;
8028 }
8029 
8030 static void
8031 create_trace_option_file(struct trace_array *tr,
8032 			 struct trace_option_dentry *topt,
8033 			 struct tracer_flags *flags,
8034 			 struct tracer_opt *opt)
8035 {
8036 	struct dentry *t_options;
8037 
8038 	t_options = trace_options_init_dentry(tr);
8039 	if (!t_options)
8040 		return;
8041 
8042 	topt->flags = flags;
8043 	topt->opt = opt;
8044 	topt->tr = tr;
8045 
8046 	topt->entry = trace_create_file(opt->name, 0644, t_options, topt,
8047 				    &trace_options_fops);
8048 
8049 }
8050 
8051 static void
8052 create_trace_option_files(struct trace_array *tr, struct tracer *tracer)
8053 {
8054 	struct trace_option_dentry *topts;
8055 	struct trace_options *tr_topts;
8056 	struct tracer_flags *flags;
8057 	struct tracer_opt *opts;
8058 	int cnt;
8059 	int i;
8060 
8061 	if (!tracer)
8062 		return;
8063 
8064 	flags = tracer->flags;
8065 
8066 	if (!flags || !flags->opts)
8067 		return;
8068 
8069 	/*
8070 	 * If this is an instance, only create flags for tracers
8071 	 * the instance may have.
8072 	 */
8073 	if (!trace_ok_for_array(tracer, tr))
8074 		return;
8075 
8076 	for (i = 0; i < tr->nr_topts; i++) {
8077 		/* Make sure there's no duplicate flags. */
8078 		if (WARN_ON_ONCE(tr->topts[i].tracer->flags == tracer->flags))
8079 			return;
8080 	}
8081 
8082 	opts = flags->opts;
8083 
8084 	for (cnt = 0; opts[cnt].name; cnt++)
8085 		;
8086 
8087 	topts = kcalloc(cnt + 1, sizeof(*topts), GFP_KERNEL);
8088 	if (!topts)
8089 		return;
8090 
8091 	tr_topts = krealloc(tr->topts, sizeof(*tr->topts) * (tr->nr_topts + 1),
8092 			    GFP_KERNEL);
8093 	if (!tr_topts) {
8094 		kfree(topts);
8095 		return;
8096 	}
8097 
8098 	tr->topts = tr_topts;
8099 	tr->topts[tr->nr_topts].tracer = tracer;
8100 	tr->topts[tr->nr_topts].topts = topts;
8101 	tr->nr_topts++;
8102 
8103 	for (cnt = 0; opts[cnt].name; cnt++) {
8104 		create_trace_option_file(tr, &topts[cnt], flags,
8105 					 &opts[cnt]);
8106 		WARN_ONCE(topts[cnt].entry == NULL,
8107 			  "Failed to create trace option: %s",
8108 			  opts[cnt].name);
8109 	}
8110 }
8111 
8112 static struct dentry *
8113 create_trace_option_core_file(struct trace_array *tr,
8114 			      const char *option, long index)
8115 {
8116 	struct dentry *t_options;
8117 
8118 	t_options = trace_options_init_dentry(tr);
8119 	if (!t_options)
8120 		return NULL;
8121 
8122 	return trace_create_file(option, 0644, t_options,
8123 				 (void *)&tr->trace_flags_index[index],
8124 				 &trace_options_core_fops);
8125 }
8126 
8127 static void create_trace_options_dir(struct trace_array *tr)
8128 {
8129 	struct dentry *t_options;
8130 	bool top_level = tr == &global_trace;
8131 	int i;
8132 
8133 	t_options = trace_options_init_dentry(tr);
8134 	if (!t_options)
8135 		return;
8136 
8137 	for (i = 0; trace_options[i]; i++) {
8138 		if (top_level ||
8139 		    !((1 << i) & TOP_LEVEL_TRACE_FLAGS))
8140 			create_trace_option_core_file(tr, trace_options[i], i);
8141 	}
8142 }
8143 
8144 static ssize_t
8145 rb_simple_read(struct file *filp, char __user *ubuf,
8146 	       size_t cnt, loff_t *ppos)
8147 {
8148 	struct trace_array *tr = filp->private_data;
8149 	char buf[64];
8150 	int r;
8151 
8152 	r = tracer_tracing_is_on(tr);
8153 	r = sprintf(buf, "%d\n", r);
8154 
8155 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
8156 }
8157 
8158 static ssize_t
8159 rb_simple_write(struct file *filp, const char __user *ubuf,
8160 		size_t cnt, loff_t *ppos)
8161 {
8162 	struct trace_array *tr = filp->private_data;
8163 	struct ring_buffer *buffer = tr->trace_buffer.buffer;
8164 	unsigned long val;
8165 	int ret;
8166 
8167 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
8168 	if (ret)
8169 		return ret;
8170 
8171 	if (buffer) {
8172 		mutex_lock(&trace_types_lock);
8173 		if (!!val == tracer_tracing_is_on(tr)) {
8174 			val = 0; /* do nothing */
8175 		} else if (val) {
8176 			tracer_tracing_on(tr);
8177 			if (tr->current_trace->start)
8178 				tr->current_trace->start(tr);
8179 		} else {
8180 			tracer_tracing_off(tr);
8181 			if (tr->current_trace->stop)
8182 				tr->current_trace->stop(tr);
8183 		}
8184 		mutex_unlock(&trace_types_lock);
8185 	}
8186 
8187 	(*ppos)++;
8188 
8189 	return cnt;
8190 }
8191 
8192 static const struct file_operations rb_simple_fops = {
8193 	.open		= tracing_open_generic_tr,
8194 	.read		= rb_simple_read,
8195 	.write		= rb_simple_write,
8196 	.release	= tracing_release_generic_tr,
8197 	.llseek		= default_llseek,
8198 };
8199 
8200 static ssize_t
8201 buffer_percent_read(struct file *filp, char __user *ubuf,
8202 		    size_t cnt, loff_t *ppos)
8203 {
8204 	struct trace_array *tr = filp->private_data;
8205 	char buf[64];
8206 	int r;
8207 
8208 	r = tr->buffer_percent;
8209 	r = sprintf(buf, "%d\n", r);
8210 
8211 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
8212 }
8213 
8214 static ssize_t
8215 buffer_percent_write(struct file *filp, const char __user *ubuf,
8216 		     size_t cnt, loff_t *ppos)
8217 {
8218 	struct trace_array *tr = filp->private_data;
8219 	unsigned long val;
8220 	int ret;
8221 
8222 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
8223 	if (ret)
8224 		return ret;
8225 
8226 	if (val > 100)
8227 		return -EINVAL;
8228 
8229 	if (!val)
8230 		val = 1;
8231 
8232 	tr->buffer_percent = val;
8233 
8234 	(*ppos)++;
8235 
8236 	return cnt;
8237 }
8238 
8239 static const struct file_operations buffer_percent_fops = {
8240 	.open		= tracing_open_generic_tr,
8241 	.read		= buffer_percent_read,
8242 	.write		= buffer_percent_write,
8243 	.release	= tracing_release_generic_tr,
8244 	.llseek		= default_llseek,
8245 };
8246 
8247 static struct dentry *trace_instance_dir;
8248 
8249 static void
8250 init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer);
8251 
8252 static int
8253 allocate_trace_buffer(struct trace_array *tr, struct trace_buffer *buf, int size)
8254 {
8255 	enum ring_buffer_flags rb_flags;
8256 
8257 	rb_flags = tr->trace_flags & TRACE_ITER_OVERWRITE ? RB_FL_OVERWRITE : 0;
8258 
8259 	buf->tr = tr;
8260 
8261 	buf->buffer = ring_buffer_alloc(size, rb_flags);
8262 	if (!buf->buffer)
8263 		return -ENOMEM;
8264 
8265 	buf->data = alloc_percpu(struct trace_array_cpu);
8266 	if (!buf->data) {
8267 		ring_buffer_free(buf->buffer);
8268 		buf->buffer = NULL;
8269 		return -ENOMEM;
8270 	}
8271 
8272 	/* Allocate the first page for all buffers */
8273 	set_buffer_entries(&tr->trace_buffer,
8274 			   ring_buffer_size(tr->trace_buffer.buffer, 0));
8275 
8276 	return 0;
8277 }
8278 
8279 static int allocate_trace_buffers(struct trace_array *tr, int size)
8280 {
8281 	int ret;
8282 
8283 	ret = allocate_trace_buffer(tr, &tr->trace_buffer, size);
8284 	if (ret)
8285 		return ret;
8286 
8287 #ifdef CONFIG_TRACER_MAX_TRACE
8288 	ret = allocate_trace_buffer(tr, &tr->max_buffer,
8289 				    allocate_snapshot ? size : 1);
8290 	if (WARN_ON(ret)) {
8291 		ring_buffer_free(tr->trace_buffer.buffer);
8292 		tr->trace_buffer.buffer = NULL;
8293 		free_percpu(tr->trace_buffer.data);
8294 		tr->trace_buffer.data = NULL;
8295 		return -ENOMEM;
8296 	}
8297 	tr->allocated_snapshot = allocate_snapshot;
8298 
8299 	/*
8300 	 * Only the top level trace array gets its snapshot allocated
8301 	 * from the kernel command line.
8302 	 */
8303 	allocate_snapshot = false;
8304 #endif
8305 	return 0;
8306 }
8307 
8308 static void free_trace_buffer(struct trace_buffer *buf)
8309 {
8310 	if (buf->buffer) {
8311 		ring_buffer_free(buf->buffer);
8312 		buf->buffer = NULL;
8313 		free_percpu(buf->data);
8314 		buf->data = NULL;
8315 	}
8316 }
8317 
8318 static void free_trace_buffers(struct trace_array *tr)
8319 {
8320 	if (!tr)
8321 		return;
8322 
8323 	free_trace_buffer(&tr->trace_buffer);
8324 
8325 #ifdef CONFIG_TRACER_MAX_TRACE
8326 	free_trace_buffer(&tr->max_buffer);
8327 #endif
8328 }
8329 
8330 static void init_trace_flags_index(struct trace_array *tr)
8331 {
8332 	int i;
8333 
8334 	/* Used by the trace options files */
8335 	for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++)
8336 		tr->trace_flags_index[i] = i;
8337 }
8338 
8339 static void __update_tracer_options(struct trace_array *tr)
8340 {
8341 	struct tracer *t;
8342 
8343 	for (t = trace_types; t; t = t->next)
8344 		add_tracer_options(tr, t);
8345 }
8346 
8347 static void update_tracer_options(struct trace_array *tr)
8348 {
8349 	mutex_lock(&trace_types_lock);
8350 	__update_tracer_options(tr);
8351 	mutex_unlock(&trace_types_lock);
8352 }
8353 
8354 struct trace_array *trace_array_create(const char *name)
8355 {
8356 	struct trace_array *tr;
8357 	int ret;
8358 
8359 	mutex_lock(&event_mutex);
8360 	mutex_lock(&trace_types_lock);
8361 
8362 	ret = -EEXIST;
8363 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
8364 		if (tr->name && strcmp(tr->name, name) == 0)
8365 			goto out_unlock;
8366 	}
8367 
8368 	ret = -ENOMEM;
8369 	tr = kzalloc(sizeof(*tr), GFP_KERNEL);
8370 	if (!tr)
8371 		goto out_unlock;
8372 
8373 	tr->name = kstrdup(name, GFP_KERNEL);
8374 	if (!tr->name)
8375 		goto out_free_tr;
8376 
8377 	if (!alloc_cpumask_var(&tr->tracing_cpumask, GFP_KERNEL))
8378 		goto out_free_tr;
8379 
8380 	tr->trace_flags = global_trace.trace_flags & ~ZEROED_TRACE_FLAGS;
8381 
8382 	cpumask_copy(tr->tracing_cpumask, cpu_all_mask);
8383 
8384 	raw_spin_lock_init(&tr->start_lock);
8385 
8386 	tr->max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
8387 
8388 	tr->current_trace = &nop_trace;
8389 
8390 	INIT_LIST_HEAD(&tr->systems);
8391 	INIT_LIST_HEAD(&tr->events);
8392 	INIT_LIST_HEAD(&tr->hist_vars);
8393 	INIT_LIST_HEAD(&tr->err_log);
8394 
8395 	if (allocate_trace_buffers(tr, trace_buf_size) < 0)
8396 		goto out_free_tr;
8397 
8398 	tr->dir = tracefs_create_dir(name, trace_instance_dir);
8399 	if (!tr->dir)
8400 		goto out_free_tr;
8401 
8402 	ret = event_trace_add_tracer(tr->dir, tr);
8403 	if (ret) {
8404 		tracefs_remove_recursive(tr->dir);
8405 		goto out_free_tr;
8406 	}
8407 
8408 	ftrace_init_trace_array(tr);
8409 
8410 	init_tracer_tracefs(tr, tr->dir);
8411 	init_trace_flags_index(tr);
8412 	__update_tracer_options(tr);
8413 
8414 	list_add(&tr->list, &ftrace_trace_arrays);
8415 
8416 	mutex_unlock(&trace_types_lock);
8417 	mutex_unlock(&event_mutex);
8418 
8419 	return tr;
8420 
8421  out_free_tr:
8422 	free_trace_buffers(tr);
8423 	free_cpumask_var(tr->tracing_cpumask);
8424 	kfree(tr->name);
8425 	kfree(tr);
8426 
8427  out_unlock:
8428 	mutex_unlock(&trace_types_lock);
8429 	mutex_unlock(&event_mutex);
8430 
8431 	return ERR_PTR(ret);
8432 }
8433 EXPORT_SYMBOL_GPL(trace_array_create);
8434 
8435 static int instance_mkdir(const char *name)
8436 {
8437 	return PTR_ERR_OR_ZERO(trace_array_create(name));
8438 }
8439 
8440 static int __remove_instance(struct trace_array *tr)
8441 {
8442 	int i;
8443 
8444 	if (tr->ref || (tr->current_trace && tr->current_trace->ref))
8445 		return -EBUSY;
8446 
8447 	list_del(&tr->list);
8448 
8449 	/* Disable all the flags that were enabled coming in */
8450 	for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++) {
8451 		if ((1 << i) & ZEROED_TRACE_FLAGS)
8452 			set_tracer_flag(tr, 1 << i, 0);
8453 	}
8454 
8455 	tracing_set_nop(tr);
8456 	clear_ftrace_function_probes(tr);
8457 	event_trace_del_tracer(tr);
8458 	ftrace_clear_pids(tr);
8459 	ftrace_destroy_function_files(tr);
8460 	tracefs_remove_recursive(tr->dir);
8461 	free_trace_buffers(tr);
8462 
8463 	for (i = 0; i < tr->nr_topts; i++) {
8464 		kfree(tr->topts[i].topts);
8465 	}
8466 	kfree(tr->topts);
8467 
8468 	free_cpumask_var(tr->tracing_cpumask);
8469 	kfree(tr->name);
8470 	kfree(tr);
8471 	tr = NULL;
8472 
8473 	return 0;
8474 }
8475 
8476 int trace_array_destroy(struct trace_array *tr)
8477 {
8478 	int ret;
8479 
8480 	if (!tr)
8481 		return -EINVAL;
8482 
8483 	mutex_lock(&event_mutex);
8484 	mutex_lock(&trace_types_lock);
8485 
8486 	ret = __remove_instance(tr);
8487 
8488 	mutex_unlock(&trace_types_lock);
8489 	mutex_unlock(&event_mutex);
8490 
8491 	return ret;
8492 }
8493 EXPORT_SYMBOL_GPL(trace_array_destroy);
8494 
8495 static int instance_rmdir(const char *name)
8496 {
8497 	struct trace_array *tr;
8498 	int ret;
8499 
8500 	mutex_lock(&event_mutex);
8501 	mutex_lock(&trace_types_lock);
8502 
8503 	ret = -ENODEV;
8504 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
8505 		if (tr->name && strcmp(tr->name, name) == 0) {
8506 			ret = __remove_instance(tr);
8507 			break;
8508 		}
8509 	}
8510 
8511 	mutex_unlock(&trace_types_lock);
8512 	mutex_unlock(&event_mutex);
8513 
8514 	return ret;
8515 }
8516 
8517 static __init void create_trace_instances(struct dentry *d_tracer)
8518 {
8519 	trace_instance_dir = tracefs_create_instance_dir("instances", d_tracer,
8520 							 instance_mkdir,
8521 							 instance_rmdir);
8522 	if (WARN_ON(!trace_instance_dir))
8523 		return;
8524 }
8525 
8526 static void
8527 init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer)
8528 {
8529 	struct trace_event_file *file;
8530 	int cpu;
8531 
8532 	trace_create_file("available_tracers", 0444, d_tracer,
8533 			tr, &show_traces_fops);
8534 
8535 	trace_create_file("current_tracer", 0644, d_tracer,
8536 			tr, &set_tracer_fops);
8537 
8538 	trace_create_file("tracing_cpumask", 0644, d_tracer,
8539 			  tr, &tracing_cpumask_fops);
8540 
8541 	trace_create_file("trace_options", 0644, d_tracer,
8542 			  tr, &tracing_iter_fops);
8543 
8544 	trace_create_file("trace", 0644, d_tracer,
8545 			  tr, &tracing_fops);
8546 
8547 	trace_create_file("trace_pipe", 0444, d_tracer,
8548 			  tr, &tracing_pipe_fops);
8549 
8550 	trace_create_file("buffer_size_kb", 0644, d_tracer,
8551 			  tr, &tracing_entries_fops);
8552 
8553 	trace_create_file("buffer_total_size_kb", 0444, d_tracer,
8554 			  tr, &tracing_total_entries_fops);
8555 
8556 	trace_create_file("free_buffer", 0200, d_tracer,
8557 			  tr, &tracing_free_buffer_fops);
8558 
8559 	trace_create_file("trace_marker", 0220, d_tracer,
8560 			  tr, &tracing_mark_fops);
8561 
8562 	file = __find_event_file(tr, "ftrace", "print");
8563 	if (file && file->dir)
8564 		trace_create_file("trigger", 0644, file->dir, file,
8565 				  &event_trigger_fops);
8566 	tr->trace_marker_file = file;
8567 
8568 	trace_create_file("trace_marker_raw", 0220, d_tracer,
8569 			  tr, &tracing_mark_raw_fops);
8570 
8571 	trace_create_file("trace_clock", 0644, d_tracer, tr,
8572 			  &trace_clock_fops);
8573 
8574 	trace_create_file("tracing_on", 0644, d_tracer,
8575 			  tr, &rb_simple_fops);
8576 
8577 	trace_create_file("timestamp_mode", 0444, d_tracer, tr,
8578 			  &trace_time_stamp_mode_fops);
8579 
8580 	tr->buffer_percent = 50;
8581 
8582 	trace_create_file("buffer_percent", 0444, d_tracer,
8583 			tr, &buffer_percent_fops);
8584 
8585 	create_trace_options_dir(tr);
8586 
8587 #if defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)
8588 	trace_create_file("tracing_max_latency", 0644, d_tracer,
8589 			&tr->max_latency, &tracing_max_lat_fops);
8590 #endif
8591 
8592 	if (ftrace_create_function_files(tr, d_tracer))
8593 		WARN(1, "Could not allocate function filter files");
8594 
8595 #ifdef CONFIG_TRACER_SNAPSHOT
8596 	trace_create_file("snapshot", 0644, d_tracer,
8597 			  tr, &snapshot_fops);
8598 #endif
8599 
8600 	trace_create_file("error_log", 0644, d_tracer,
8601 			  tr, &tracing_err_log_fops);
8602 
8603 	for_each_tracing_cpu(cpu)
8604 		tracing_init_tracefs_percpu(tr, cpu);
8605 
8606 	ftrace_init_tracefs(tr, d_tracer);
8607 }
8608 
8609 static struct vfsmount *trace_automount(struct dentry *mntpt, void *ingore)
8610 {
8611 	struct vfsmount *mnt;
8612 	struct file_system_type *type;
8613 
8614 	/*
8615 	 * To maintain backward compatibility for tools that mount
8616 	 * debugfs to get to the tracing facility, tracefs is automatically
8617 	 * mounted to the debugfs/tracing directory.
8618 	 */
8619 	type = get_fs_type("tracefs");
8620 	if (!type)
8621 		return NULL;
8622 	mnt = vfs_submount(mntpt, type, "tracefs", NULL);
8623 	put_filesystem(type);
8624 	if (IS_ERR(mnt))
8625 		return NULL;
8626 	mntget(mnt);
8627 
8628 	return mnt;
8629 }
8630 
8631 /**
8632  * tracing_init_dentry - initialize top level trace array
8633  *
8634  * This is called when creating files or directories in the tracing
8635  * directory. It is called via fs_initcall() by any of the boot up code
8636  * and expects to return the dentry of the top level tracing directory.
8637  */
8638 struct dentry *tracing_init_dentry(void)
8639 {
8640 	struct trace_array *tr = &global_trace;
8641 
8642 	/* The top level trace array uses  NULL as parent */
8643 	if (tr->dir)
8644 		return NULL;
8645 
8646 	if (WARN_ON(!tracefs_initialized()) ||
8647 		(IS_ENABLED(CONFIG_DEBUG_FS) &&
8648 		 WARN_ON(!debugfs_initialized())))
8649 		return ERR_PTR(-ENODEV);
8650 
8651 	/*
8652 	 * As there may still be users that expect the tracing
8653 	 * files to exist in debugfs/tracing, we must automount
8654 	 * the tracefs file system there, so older tools still
8655 	 * work with the newer kerenl.
8656 	 */
8657 	tr->dir = debugfs_create_automount("tracing", NULL,
8658 					   trace_automount, NULL);
8659 
8660 	return NULL;
8661 }
8662 
8663 extern struct trace_eval_map *__start_ftrace_eval_maps[];
8664 extern struct trace_eval_map *__stop_ftrace_eval_maps[];
8665 
8666 static void __init trace_eval_init(void)
8667 {
8668 	int len;
8669 
8670 	len = __stop_ftrace_eval_maps - __start_ftrace_eval_maps;
8671 	trace_insert_eval_map(NULL, __start_ftrace_eval_maps, len);
8672 }
8673 
8674 #ifdef CONFIG_MODULES
8675 static void trace_module_add_evals(struct module *mod)
8676 {
8677 	if (!mod->num_trace_evals)
8678 		return;
8679 
8680 	/*
8681 	 * Modules with bad taint do not have events created, do
8682 	 * not bother with enums either.
8683 	 */
8684 	if (trace_module_has_bad_taint(mod))
8685 		return;
8686 
8687 	trace_insert_eval_map(mod, mod->trace_evals, mod->num_trace_evals);
8688 }
8689 
8690 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
8691 static void trace_module_remove_evals(struct module *mod)
8692 {
8693 	union trace_eval_map_item *map;
8694 	union trace_eval_map_item **last = &trace_eval_maps;
8695 
8696 	if (!mod->num_trace_evals)
8697 		return;
8698 
8699 	mutex_lock(&trace_eval_mutex);
8700 
8701 	map = trace_eval_maps;
8702 
8703 	while (map) {
8704 		if (map->head.mod == mod)
8705 			break;
8706 		map = trace_eval_jmp_to_tail(map);
8707 		last = &map->tail.next;
8708 		map = map->tail.next;
8709 	}
8710 	if (!map)
8711 		goto out;
8712 
8713 	*last = trace_eval_jmp_to_tail(map)->tail.next;
8714 	kfree(map);
8715  out:
8716 	mutex_unlock(&trace_eval_mutex);
8717 }
8718 #else
8719 static inline void trace_module_remove_evals(struct module *mod) { }
8720 #endif /* CONFIG_TRACE_EVAL_MAP_FILE */
8721 
8722 static int trace_module_notify(struct notifier_block *self,
8723 			       unsigned long val, void *data)
8724 {
8725 	struct module *mod = data;
8726 
8727 	switch (val) {
8728 	case MODULE_STATE_COMING:
8729 		trace_module_add_evals(mod);
8730 		break;
8731 	case MODULE_STATE_GOING:
8732 		trace_module_remove_evals(mod);
8733 		break;
8734 	}
8735 
8736 	return 0;
8737 }
8738 
8739 static struct notifier_block trace_module_nb = {
8740 	.notifier_call = trace_module_notify,
8741 	.priority = 0,
8742 };
8743 #endif /* CONFIG_MODULES */
8744 
8745 static __init int tracer_init_tracefs(void)
8746 {
8747 	struct dentry *d_tracer;
8748 
8749 	trace_access_lock_init();
8750 
8751 	d_tracer = tracing_init_dentry();
8752 	if (IS_ERR(d_tracer))
8753 		return 0;
8754 
8755 	event_trace_init();
8756 
8757 	init_tracer_tracefs(&global_trace, d_tracer);
8758 	ftrace_init_tracefs_toplevel(&global_trace, d_tracer);
8759 
8760 	trace_create_file("tracing_thresh", 0644, d_tracer,
8761 			&global_trace, &tracing_thresh_fops);
8762 
8763 	trace_create_file("README", 0444, d_tracer,
8764 			NULL, &tracing_readme_fops);
8765 
8766 	trace_create_file("saved_cmdlines", 0444, d_tracer,
8767 			NULL, &tracing_saved_cmdlines_fops);
8768 
8769 	trace_create_file("saved_cmdlines_size", 0644, d_tracer,
8770 			  NULL, &tracing_saved_cmdlines_size_fops);
8771 
8772 	trace_create_file("saved_tgids", 0444, d_tracer,
8773 			NULL, &tracing_saved_tgids_fops);
8774 
8775 	trace_eval_init();
8776 
8777 	trace_create_eval_file(d_tracer);
8778 
8779 #ifdef CONFIG_MODULES
8780 	register_module_notifier(&trace_module_nb);
8781 #endif
8782 
8783 #ifdef CONFIG_DYNAMIC_FTRACE
8784 	trace_create_file("dyn_ftrace_total_info", 0444, d_tracer,
8785 			&ftrace_update_tot_cnt, &tracing_dyn_info_fops);
8786 #endif
8787 
8788 	create_trace_instances(d_tracer);
8789 
8790 	update_tracer_options(&global_trace);
8791 
8792 	return 0;
8793 }
8794 
8795 static int trace_panic_handler(struct notifier_block *this,
8796 			       unsigned long event, void *unused)
8797 {
8798 	if (ftrace_dump_on_oops)
8799 		ftrace_dump(ftrace_dump_on_oops);
8800 	return NOTIFY_OK;
8801 }
8802 
8803 static struct notifier_block trace_panic_notifier = {
8804 	.notifier_call  = trace_panic_handler,
8805 	.next           = NULL,
8806 	.priority       = 150   /* priority: INT_MAX >= x >= 0 */
8807 };
8808 
8809 static int trace_die_handler(struct notifier_block *self,
8810 			     unsigned long val,
8811 			     void *data)
8812 {
8813 	switch (val) {
8814 	case DIE_OOPS:
8815 		if (ftrace_dump_on_oops)
8816 			ftrace_dump(ftrace_dump_on_oops);
8817 		break;
8818 	default:
8819 		break;
8820 	}
8821 	return NOTIFY_OK;
8822 }
8823 
8824 static struct notifier_block trace_die_notifier = {
8825 	.notifier_call = trace_die_handler,
8826 	.priority = 200
8827 };
8828 
8829 /*
8830  * printk is set to max of 1024, we really don't need it that big.
8831  * Nothing should be printing 1000 characters anyway.
8832  */
8833 #define TRACE_MAX_PRINT		1000
8834 
8835 /*
8836  * Define here KERN_TRACE so that we have one place to modify
8837  * it if we decide to change what log level the ftrace dump
8838  * should be at.
8839  */
8840 #define KERN_TRACE		KERN_EMERG
8841 
8842 void
8843 trace_printk_seq(struct trace_seq *s)
8844 {
8845 	/* Probably should print a warning here. */
8846 	if (s->seq.len >= TRACE_MAX_PRINT)
8847 		s->seq.len = TRACE_MAX_PRINT;
8848 
8849 	/*
8850 	 * More paranoid code. Although the buffer size is set to
8851 	 * PAGE_SIZE, and TRACE_MAX_PRINT is 1000, this is just
8852 	 * an extra layer of protection.
8853 	 */
8854 	if (WARN_ON_ONCE(s->seq.len >= s->seq.size))
8855 		s->seq.len = s->seq.size - 1;
8856 
8857 	/* should be zero ended, but we are paranoid. */
8858 	s->buffer[s->seq.len] = 0;
8859 
8860 	printk(KERN_TRACE "%s", s->buffer);
8861 
8862 	trace_seq_init(s);
8863 }
8864 
8865 void trace_init_global_iter(struct trace_iterator *iter)
8866 {
8867 	iter->tr = &global_trace;
8868 	iter->trace = iter->tr->current_trace;
8869 	iter->cpu_file = RING_BUFFER_ALL_CPUS;
8870 	iter->trace_buffer = &global_trace.trace_buffer;
8871 
8872 	if (iter->trace && iter->trace->open)
8873 		iter->trace->open(iter);
8874 
8875 	/* Annotate start of buffers if we had overruns */
8876 	if (ring_buffer_overruns(iter->trace_buffer->buffer))
8877 		iter->iter_flags |= TRACE_FILE_ANNOTATE;
8878 
8879 	/* Output in nanoseconds only if we are using a clock in nanoseconds. */
8880 	if (trace_clocks[iter->tr->clock_id].in_ns)
8881 		iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
8882 }
8883 
8884 void ftrace_dump(enum ftrace_dump_mode oops_dump_mode)
8885 {
8886 	/* use static because iter can be a bit big for the stack */
8887 	static struct trace_iterator iter;
8888 	static atomic_t dump_running;
8889 	struct trace_array *tr = &global_trace;
8890 	unsigned int old_userobj;
8891 	unsigned long flags;
8892 	int cnt = 0, cpu;
8893 
8894 	/* Only allow one dump user at a time. */
8895 	if (atomic_inc_return(&dump_running) != 1) {
8896 		atomic_dec(&dump_running);
8897 		return;
8898 	}
8899 
8900 	/*
8901 	 * Always turn off tracing when we dump.
8902 	 * We don't need to show trace output of what happens
8903 	 * between multiple crashes.
8904 	 *
8905 	 * If the user does a sysrq-z, then they can re-enable
8906 	 * tracing with echo 1 > tracing_on.
8907 	 */
8908 	tracing_off();
8909 
8910 	local_irq_save(flags);
8911 	printk_nmi_direct_enter();
8912 
8913 	/* Simulate the iterator */
8914 	trace_init_global_iter(&iter);
8915 
8916 	for_each_tracing_cpu(cpu) {
8917 		atomic_inc(&per_cpu_ptr(iter.trace_buffer->data, cpu)->disabled);
8918 	}
8919 
8920 	old_userobj = tr->trace_flags & TRACE_ITER_SYM_USEROBJ;
8921 
8922 	/* don't look at user memory in panic mode */
8923 	tr->trace_flags &= ~TRACE_ITER_SYM_USEROBJ;
8924 
8925 	switch (oops_dump_mode) {
8926 	case DUMP_ALL:
8927 		iter.cpu_file = RING_BUFFER_ALL_CPUS;
8928 		break;
8929 	case DUMP_ORIG:
8930 		iter.cpu_file = raw_smp_processor_id();
8931 		break;
8932 	case DUMP_NONE:
8933 		goto out_enable;
8934 	default:
8935 		printk(KERN_TRACE "Bad dumping mode, switching to all CPUs dump\n");
8936 		iter.cpu_file = RING_BUFFER_ALL_CPUS;
8937 	}
8938 
8939 	printk(KERN_TRACE "Dumping ftrace buffer:\n");
8940 
8941 	/* Did function tracer already get disabled? */
8942 	if (ftrace_is_dead()) {
8943 		printk("# WARNING: FUNCTION TRACING IS CORRUPTED\n");
8944 		printk("#          MAY BE MISSING FUNCTION EVENTS\n");
8945 	}
8946 
8947 	/*
8948 	 * We need to stop all tracing on all CPUS to read the
8949 	 * the next buffer. This is a bit expensive, but is
8950 	 * not done often. We fill all what we can read,
8951 	 * and then release the locks again.
8952 	 */
8953 
8954 	while (!trace_empty(&iter)) {
8955 
8956 		if (!cnt)
8957 			printk(KERN_TRACE "---------------------------------\n");
8958 
8959 		cnt++;
8960 
8961 		trace_iterator_reset(&iter);
8962 		iter.iter_flags |= TRACE_FILE_LAT_FMT;
8963 
8964 		if (trace_find_next_entry_inc(&iter) != NULL) {
8965 			int ret;
8966 
8967 			ret = print_trace_line(&iter);
8968 			if (ret != TRACE_TYPE_NO_CONSUME)
8969 				trace_consume(&iter);
8970 		}
8971 		touch_nmi_watchdog();
8972 
8973 		trace_printk_seq(&iter.seq);
8974 	}
8975 
8976 	if (!cnt)
8977 		printk(KERN_TRACE "   (ftrace buffer empty)\n");
8978 	else
8979 		printk(KERN_TRACE "---------------------------------\n");
8980 
8981  out_enable:
8982 	tr->trace_flags |= old_userobj;
8983 
8984 	for_each_tracing_cpu(cpu) {
8985 		atomic_dec(&per_cpu_ptr(iter.trace_buffer->data, cpu)->disabled);
8986 	}
8987 	atomic_dec(&dump_running);
8988 	printk_nmi_direct_exit();
8989 	local_irq_restore(flags);
8990 }
8991 EXPORT_SYMBOL_GPL(ftrace_dump);
8992 
8993 int trace_run_command(const char *buf, int (*createfn)(int, char **))
8994 {
8995 	char **argv;
8996 	int argc, ret;
8997 
8998 	argc = 0;
8999 	ret = 0;
9000 	argv = argv_split(GFP_KERNEL, buf, &argc);
9001 	if (!argv)
9002 		return -ENOMEM;
9003 
9004 	if (argc)
9005 		ret = createfn(argc, argv);
9006 
9007 	argv_free(argv);
9008 
9009 	return ret;
9010 }
9011 
9012 #define WRITE_BUFSIZE  4096
9013 
9014 ssize_t trace_parse_run_command(struct file *file, const char __user *buffer,
9015 				size_t count, loff_t *ppos,
9016 				int (*createfn)(int, char **))
9017 {
9018 	char *kbuf, *buf, *tmp;
9019 	int ret = 0;
9020 	size_t done = 0;
9021 	size_t size;
9022 
9023 	kbuf = kmalloc(WRITE_BUFSIZE, GFP_KERNEL);
9024 	if (!kbuf)
9025 		return -ENOMEM;
9026 
9027 	while (done < count) {
9028 		size = count - done;
9029 
9030 		if (size >= WRITE_BUFSIZE)
9031 			size = WRITE_BUFSIZE - 1;
9032 
9033 		if (copy_from_user(kbuf, buffer + done, size)) {
9034 			ret = -EFAULT;
9035 			goto out;
9036 		}
9037 		kbuf[size] = '\0';
9038 		buf = kbuf;
9039 		do {
9040 			tmp = strchr(buf, '\n');
9041 			if (tmp) {
9042 				*tmp = '\0';
9043 				size = tmp - buf + 1;
9044 			} else {
9045 				size = strlen(buf);
9046 				if (done + size < count) {
9047 					if (buf != kbuf)
9048 						break;
9049 					/* This can accept WRITE_BUFSIZE - 2 ('\n' + '\0') */
9050 					pr_warn("Line length is too long: Should be less than %d\n",
9051 						WRITE_BUFSIZE - 2);
9052 					ret = -EINVAL;
9053 					goto out;
9054 				}
9055 			}
9056 			done += size;
9057 
9058 			/* Remove comments */
9059 			tmp = strchr(buf, '#');
9060 
9061 			if (tmp)
9062 				*tmp = '\0';
9063 
9064 			ret = trace_run_command(buf, createfn);
9065 			if (ret)
9066 				goto out;
9067 			buf += size;
9068 
9069 		} while (done < count);
9070 	}
9071 	ret = done;
9072 
9073 out:
9074 	kfree(kbuf);
9075 
9076 	return ret;
9077 }
9078 
9079 __init static int tracer_alloc_buffers(void)
9080 {
9081 	int ring_buf_size;
9082 	int ret = -ENOMEM;
9083 
9084 	/*
9085 	 * Make sure we don't accidently add more trace options
9086 	 * than we have bits for.
9087 	 */
9088 	BUILD_BUG_ON(TRACE_ITER_LAST_BIT > TRACE_FLAGS_MAX_SIZE);
9089 
9090 	if (!alloc_cpumask_var(&tracing_buffer_mask, GFP_KERNEL))
9091 		goto out;
9092 
9093 	if (!alloc_cpumask_var(&global_trace.tracing_cpumask, GFP_KERNEL))
9094 		goto out_free_buffer_mask;
9095 
9096 	/* Only allocate trace_printk buffers if a trace_printk exists */
9097 	if (__stop___trace_bprintk_fmt != __start___trace_bprintk_fmt)
9098 		/* Must be called before global_trace.buffer is allocated */
9099 		trace_printk_init_buffers();
9100 
9101 	/* To save memory, keep the ring buffer size to its minimum */
9102 	if (ring_buffer_expanded)
9103 		ring_buf_size = trace_buf_size;
9104 	else
9105 		ring_buf_size = 1;
9106 
9107 	cpumask_copy(tracing_buffer_mask, cpu_possible_mask);
9108 	cpumask_copy(global_trace.tracing_cpumask, cpu_all_mask);
9109 
9110 	raw_spin_lock_init(&global_trace.start_lock);
9111 
9112 	/*
9113 	 * The prepare callbacks allocates some memory for the ring buffer. We
9114 	 * don't free the buffer if the if the CPU goes down. If we were to free
9115 	 * the buffer, then the user would lose any trace that was in the
9116 	 * buffer. The memory will be removed once the "instance" is removed.
9117 	 */
9118 	ret = cpuhp_setup_state_multi(CPUHP_TRACE_RB_PREPARE,
9119 				      "trace/RB:preapre", trace_rb_cpu_prepare,
9120 				      NULL);
9121 	if (ret < 0)
9122 		goto out_free_cpumask;
9123 	/* Used for event triggers */
9124 	ret = -ENOMEM;
9125 	temp_buffer = ring_buffer_alloc(PAGE_SIZE, RB_FL_OVERWRITE);
9126 	if (!temp_buffer)
9127 		goto out_rm_hp_state;
9128 
9129 	if (trace_create_savedcmd() < 0)
9130 		goto out_free_temp_buffer;
9131 
9132 	/* TODO: make the number of buffers hot pluggable with CPUS */
9133 	if (allocate_trace_buffers(&global_trace, ring_buf_size) < 0) {
9134 		printk(KERN_ERR "tracer: failed to allocate ring buffer!\n");
9135 		WARN_ON(1);
9136 		goto out_free_savedcmd;
9137 	}
9138 
9139 	if (global_trace.buffer_disabled)
9140 		tracing_off();
9141 
9142 	if (trace_boot_clock) {
9143 		ret = tracing_set_clock(&global_trace, trace_boot_clock);
9144 		if (ret < 0)
9145 			pr_warn("Trace clock %s not defined, going back to default\n",
9146 				trace_boot_clock);
9147 	}
9148 
9149 	/*
9150 	 * register_tracer() might reference current_trace, so it
9151 	 * needs to be set before we register anything. This is
9152 	 * just a bootstrap of current_trace anyway.
9153 	 */
9154 	global_trace.current_trace = &nop_trace;
9155 
9156 	global_trace.max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
9157 
9158 	ftrace_init_global_array_ops(&global_trace);
9159 
9160 	init_trace_flags_index(&global_trace);
9161 
9162 	register_tracer(&nop_trace);
9163 
9164 	/* Function tracing may start here (via kernel command line) */
9165 	init_function_trace();
9166 
9167 	/* All seems OK, enable tracing */
9168 	tracing_disabled = 0;
9169 
9170 	atomic_notifier_chain_register(&panic_notifier_list,
9171 				       &trace_panic_notifier);
9172 
9173 	register_die_notifier(&trace_die_notifier);
9174 
9175 	global_trace.flags = TRACE_ARRAY_FL_GLOBAL;
9176 
9177 	INIT_LIST_HEAD(&global_trace.systems);
9178 	INIT_LIST_HEAD(&global_trace.events);
9179 	INIT_LIST_HEAD(&global_trace.hist_vars);
9180 	INIT_LIST_HEAD(&global_trace.err_log);
9181 	list_add(&global_trace.list, &ftrace_trace_arrays);
9182 
9183 	apply_trace_boot_options();
9184 
9185 	register_snapshot_cmd();
9186 
9187 	return 0;
9188 
9189 out_free_savedcmd:
9190 	free_saved_cmdlines_buffer(savedcmd);
9191 out_free_temp_buffer:
9192 	ring_buffer_free(temp_buffer);
9193 out_rm_hp_state:
9194 	cpuhp_remove_multi_state(CPUHP_TRACE_RB_PREPARE);
9195 out_free_cpumask:
9196 	free_cpumask_var(global_trace.tracing_cpumask);
9197 out_free_buffer_mask:
9198 	free_cpumask_var(tracing_buffer_mask);
9199 out:
9200 	return ret;
9201 }
9202 
9203 void __init early_trace_init(void)
9204 {
9205 	if (tracepoint_printk) {
9206 		tracepoint_print_iter =
9207 			kmalloc(sizeof(*tracepoint_print_iter), GFP_KERNEL);
9208 		if (WARN_ON(!tracepoint_print_iter))
9209 			tracepoint_printk = 0;
9210 		else
9211 			static_key_enable(&tracepoint_printk_key.key);
9212 	}
9213 	tracer_alloc_buffers();
9214 }
9215 
9216 void __init trace_init(void)
9217 {
9218 	trace_event_init();
9219 }
9220 
9221 __init static int clear_boot_tracer(void)
9222 {
9223 	/*
9224 	 * The default tracer at boot buffer is an init section.
9225 	 * This function is called in lateinit. If we did not
9226 	 * find the boot tracer, then clear it out, to prevent
9227 	 * later registration from accessing the buffer that is
9228 	 * about to be freed.
9229 	 */
9230 	if (!default_bootup_tracer)
9231 		return 0;
9232 
9233 	printk(KERN_INFO "ftrace bootup tracer '%s' not registered.\n",
9234 	       default_bootup_tracer);
9235 	default_bootup_tracer = NULL;
9236 
9237 	return 0;
9238 }
9239 
9240 fs_initcall(tracer_init_tracefs);
9241 late_initcall_sync(clear_boot_tracer);
9242 
9243 #ifdef CONFIG_HAVE_UNSTABLE_SCHED_CLOCK
9244 __init static int tracing_set_default_clock(void)
9245 {
9246 	/* sched_clock_stable() is determined in late_initcall */
9247 	if (!trace_boot_clock && !sched_clock_stable()) {
9248 		printk(KERN_WARNING
9249 		       "Unstable clock detected, switching default tracing clock to \"global\"\n"
9250 		       "If you want to keep using the local clock, then add:\n"
9251 		       "  \"trace_clock=local\"\n"
9252 		       "on the kernel command line\n");
9253 		tracing_set_clock(&global_trace, "global");
9254 	}
9255 
9256 	return 0;
9257 }
9258 late_initcall_sync(tracing_set_default_clock);
9259 #endif
9260