xref: /linux/kernel/trace/trace.c (revision ce615f5c1f73537c8267035d58b3d0c70e19b8da)
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * ring buffer based function tracer
4  *
5  * Copyright (C) 2007-2012 Steven Rostedt <srostedt@redhat.com>
6  * Copyright (C) 2008 Ingo Molnar <mingo@redhat.com>
7  *
8  * Originally taken from the RT patch by:
9  *    Arnaldo Carvalho de Melo <acme@redhat.com>
10  *
11  * Based on code from the latency_tracer, that is:
12  *  Copyright (C) 2004-2006 Ingo Molnar
13  *  Copyright (C) 2004 Nadia Yvette Chambers
14  */
15 #include <linux/ring_buffer.h>
16 #include <generated/utsrelease.h>
17 #include <linux/stacktrace.h>
18 #include <linux/writeback.h>
19 #include <linux/kallsyms.h>
20 #include <linux/security.h>
21 #include <linux/seq_file.h>
22 #include <linux/notifier.h>
23 #include <linux/irqflags.h>
24 #include <linux/debugfs.h>
25 #include <linux/tracefs.h>
26 #include <linux/pagemap.h>
27 #include <linux/hardirq.h>
28 #include <linux/linkage.h>
29 #include <linux/uaccess.h>
30 #include <linux/vmalloc.h>
31 #include <linux/ftrace.h>
32 #include <linux/module.h>
33 #include <linux/percpu.h>
34 #include <linux/splice.h>
35 #include <linux/kdebug.h>
36 #include <linux/string.h>
37 #include <linux/mount.h>
38 #include <linux/rwsem.h>
39 #include <linux/slab.h>
40 #include <linux/ctype.h>
41 #include <linux/init.h>
42 #include <linux/poll.h>
43 #include <linux/nmi.h>
44 #include <linux/fs.h>
45 #include <linux/trace.h>
46 #include <linux/sched/clock.h>
47 #include <linux/sched/rt.h>
48 #include <linux/fsnotify.h>
49 #include <linux/irq_work.h>
50 #include <linux/workqueue.h>
51 
52 #include "trace.h"
53 #include "trace_output.h"
54 
55 /*
56  * On boot up, the ring buffer is set to the minimum size, so that
57  * we do not waste memory on systems that are not using tracing.
58  */
59 bool ring_buffer_expanded;
60 
61 /*
62  * We need to change this state when a selftest is running.
63  * A selftest will lurk into the ring-buffer to count the
64  * entries inserted during the selftest although some concurrent
65  * insertions into the ring-buffer such as trace_printk could occurred
66  * at the same time, giving false positive or negative results.
67  */
68 static bool __read_mostly tracing_selftest_running;
69 
70 /*
71  * If a tracer is running, we do not want to run SELFTEST.
72  */
73 bool __read_mostly tracing_selftest_disabled;
74 
75 /* Pipe tracepoints to printk */
76 struct trace_iterator *tracepoint_print_iter;
77 int tracepoint_printk;
78 static DEFINE_STATIC_KEY_FALSE(tracepoint_printk_key);
79 
80 /* For tracers that don't implement custom flags */
81 static struct tracer_opt dummy_tracer_opt[] = {
82 	{ }
83 };
84 
85 static int
86 dummy_set_flag(struct trace_array *tr, u32 old_flags, u32 bit, int set)
87 {
88 	return 0;
89 }
90 
91 /*
92  * To prevent the comm cache from being overwritten when no
93  * tracing is active, only save the comm when a trace event
94  * occurred.
95  */
96 static DEFINE_PER_CPU(bool, trace_taskinfo_save);
97 
98 /*
99  * Kill all tracing for good (never come back).
100  * It is initialized to 1 but will turn to zero if the initialization
101  * of the tracer is successful. But that is the only place that sets
102  * this back to zero.
103  */
104 static int tracing_disabled = 1;
105 
106 cpumask_var_t __read_mostly	tracing_buffer_mask;
107 
108 /*
109  * ftrace_dump_on_oops - variable to dump ftrace buffer on oops
110  *
111  * If there is an oops (or kernel panic) and the ftrace_dump_on_oops
112  * is set, then ftrace_dump is called. This will output the contents
113  * of the ftrace buffers to the console.  This is very useful for
114  * capturing traces that lead to crashes and outputing it to a
115  * serial console.
116  *
117  * It is default off, but you can enable it with either specifying
118  * "ftrace_dump_on_oops" in the kernel command line, or setting
119  * /proc/sys/kernel/ftrace_dump_on_oops
120  * Set 1 if you want to dump buffers of all CPUs
121  * Set 2 if you want to dump the buffer of the CPU that triggered oops
122  */
123 
124 enum ftrace_dump_mode ftrace_dump_on_oops;
125 
126 /* When set, tracing will stop when a WARN*() is hit */
127 int __disable_trace_on_warning;
128 
129 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
130 /* Map of enums to their values, for "eval_map" file */
131 struct trace_eval_map_head {
132 	struct module			*mod;
133 	unsigned long			length;
134 };
135 
136 union trace_eval_map_item;
137 
138 struct trace_eval_map_tail {
139 	/*
140 	 * "end" is first and points to NULL as it must be different
141 	 * than "mod" or "eval_string"
142 	 */
143 	union trace_eval_map_item	*next;
144 	const char			*end;	/* points to NULL */
145 };
146 
147 static DEFINE_MUTEX(trace_eval_mutex);
148 
149 /*
150  * The trace_eval_maps are saved in an array with two extra elements,
151  * one at the beginning, and one at the end. The beginning item contains
152  * the count of the saved maps (head.length), and the module they
153  * belong to if not built in (head.mod). The ending item contains a
154  * pointer to the next array of saved eval_map items.
155  */
156 union trace_eval_map_item {
157 	struct trace_eval_map		map;
158 	struct trace_eval_map_head	head;
159 	struct trace_eval_map_tail	tail;
160 };
161 
162 static union trace_eval_map_item *trace_eval_maps;
163 #endif /* CONFIG_TRACE_EVAL_MAP_FILE */
164 
165 int tracing_set_tracer(struct trace_array *tr, const char *buf);
166 static void ftrace_trace_userstack(struct trace_buffer *buffer,
167 				   unsigned long flags, int pc);
168 
169 #define MAX_TRACER_SIZE		100
170 static char bootup_tracer_buf[MAX_TRACER_SIZE] __initdata;
171 static char *default_bootup_tracer;
172 
173 static bool allocate_snapshot;
174 
175 static int __init set_cmdline_ftrace(char *str)
176 {
177 	strlcpy(bootup_tracer_buf, str, MAX_TRACER_SIZE);
178 	default_bootup_tracer = bootup_tracer_buf;
179 	/* We are using ftrace early, expand it */
180 	ring_buffer_expanded = true;
181 	return 1;
182 }
183 __setup("ftrace=", set_cmdline_ftrace);
184 
185 static int __init set_ftrace_dump_on_oops(char *str)
186 {
187 	if (*str++ != '=' || !*str) {
188 		ftrace_dump_on_oops = DUMP_ALL;
189 		return 1;
190 	}
191 
192 	if (!strcmp("orig_cpu", str)) {
193 		ftrace_dump_on_oops = DUMP_ORIG;
194                 return 1;
195         }
196 
197         return 0;
198 }
199 __setup("ftrace_dump_on_oops", set_ftrace_dump_on_oops);
200 
201 static int __init stop_trace_on_warning(char *str)
202 {
203 	if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
204 		__disable_trace_on_warning = 1;
205 	return 1;
206 }
207 __setup("traceoff_on_warning", stop_trace_on_warning);
208 
209 static int __init boot_alloc_snapshot(char *str)
210 {
211 	allocate_snapshot = true;
212 	/* We also need the main ring buffer expanded */
213 	ring_buffer_expanded = true;
214 	return 1;
215 }
216 __setup("alloc_snapshot", boot_alloc_snapshot);
217 
218 
219 static char trace_boot_options_buf[MAX_TRACER_SIZE] __initdata;
220 
221 static int __init set_trace_boot_options(char *str)
222 {
223 	strlcpy(trace_boot_options_buf, str, MAX_TRACER_SIZE);
224 	return 0;
225 }
226 __setup("trace_options=", set_trace_boot_options);
227 
228 static char trace_boot_clock_buf[MAX_TRACER_SIZE] __initdata;
229 static char *trace_boot_clock __initdata;
230 
231 static int __init set_trace_boot_clock(char *str)
232 {
233 	strlcpy(trace_boot_clock_buf, str, MAX_TRACER_SIZE);
234 	trace_boot_clock = trace_boot_clock_buf;
235 	return 0;
236 }
237 __setup("trace_clock=", set_trace_boot_clock);
238 
239 static int __init set_tracepoint_printk(char *str)
240 {
241 	if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
242 		tracepoint_printk = 1;
243 	return 1;
244 }
245 __setup("tp_printk", set_tracepoint_printk);
246 
247 unsigned long long ns2usecs(u64 nsec)
248 {
249 	nsec += 500;
250 	do_div(nsec, 1000);
251 	return nsec;
252 }
253 
254 /* trace_flags holds trace_options default values */
255 #define TRACE_DEFAULT_FLAGS						\
256 	(FUNCTION_DEFAULT_FLAGS |					\
257 	 TRACE_ITER_PRINT_PARENT | TRACE_ITER_PRINTK |			\
258 	 TRACE_ITER_ANNOTATE | TRACE_ITER_CONTEXT_INFO |		\
259 	 TRACE_ITER_RECORD_CMD | TRACE_ITER_OVERWRITE |			\
260 	 TRACE_ITER_IRQ_INFO | TRACE_ITER_MARKERS)
261 
262 /* trace_options that are only supported by global_trace */
263 #define TOP_LEVEL_TRACE_FLAGS (TRACE_ITER_PRINTK |			\
264 	       TRACE_ITER_PRINTK_MSGONLY | TRACE_ITER_RECORD_CMD)
265 
266 /* trace_flags that are default zero for instances */
267 #define ZEROED_TRACE_FLAGS \
268 	(TRACE_ITER_EVENT_FORK | TRACE_ITER_FUNC_FORK)
269 
270 /*
271  * The global_trace is the descriptor that holds the top-level tracing
272  * buffers for the live tracing.
273  */
274 static struct trace_array global_trace = {
275 	.trace_flags = TRACE_DEFAULT_FLAGS,
276 };
277 
278 LIST_HEAD(ftrace_trace_arrays);
279 
280 int trace_array_get(struct trace_array *this_tr)
281 {
282 	struct trace_array *tr;
283 	int ret = -ENODEV;
284 
285 	mutex_lock(&trace_types_lock);
286 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
287 		if (tr == this_tr) {
288 			tr->ref++;
289 			ret = 0;
290 			break;
291 		}
292 	}
293 	mutex_unlock(&trace_types_lock);
294 
295 	return ret;
296 }
297 
298 static void __trace_array_put(struct trace_array *this_tr)
299 {
300 	WARN_ON(!this_tr->ref);
301 	this_tr->ref--;
302 }
303 
304 /**
305  * trace_array_put - Decrement the reference counter for this trace array.
306  *
307  * NOTE: Use this when we no longer need the trace array returned by
308  * trace_array_get_by_name(). This ensures the trace array can be later
309  * destroyed.
310  *
311  */
312 void trace_array_put(struct trace_array *this_tr)
313 {
314 	if (!this_tr)
315 		return;
316 
317 	mutex_lock(&trace_types_lock);
318 	__trace_array_put(this_tr);
319 	mutex_unlock(&trace_types_lock);
320 }
321 EXPORT_SYMBOL_GPL(trace_array_put);
322 
323 int tracing_check_open_get_tr(struct trace_array *tr)
324 {
325 	int ret;
326 
327 	ret = security_locked_down(LOCKDOWN_TRACEFS);
328 	if (ret)
329 		return ret;
330 
331 	if (tracing_disabled)
332 		return -ENODEV;
333 
334 	if (tr && trace_array_get(tr) < 0)
335 		return -ENODEV;
336 
337 	return 0;
338 }
339 
340 int call_filter_check_discard(struct trace_event_call *call, void *rec,
341 			      struct trace_buffer *buffer,
342 			      struct ring_buffer_event *event)
343 {
344 	if (unlikely(call->flags & TRACE_EVENT_FL_FILTERED) &&
345 	    !filter_match_preds(call->filter, rec)) {
346 		__trace_event_discard_commit(buffer, event);
347 		return 1;
348 	}
349 
350 	return 0;
351 }
352 
353 void trace_free_pid_list(struct trace_pid_list *pid_list)
354 {
355 	vfree(pid_list->pids);
356 	kfree(pid_list);
357 }
358 
359 /**
360  * trace_find_filtered_pid - check if a pid exists in a filtered_pid list
361  * @filtered_pids: The list of pids to check
362  * @search_pid: The PID to find in @filtered_pids
363  *
364  * Returns true if @search_pid is fonud in @filtered_pids, and false otherwis.
365  */
366 bool
367 trace_find_filtered_pid(struct trace_pid_list *filtered_pids, pid_t search_pid)
368 {
369 	/*
370 	 * If pid_max changed after filtered_pids was created, we
371 	 * by default ignore all pids greater than the previous pid_max.
372 	 */
373 	if (search_pid >= filtered_pids->pid_max)
374 		return false;
375 
376 	return test_bit(search_pid, filtered_pids->pids);
377 }
378 
379 /**
380  * trace_ignore_this_task - should a task be ignored for tracing
381  * @filtered_pids: The list of pids to check
382  * @task: The task that should be ignored if not filtered
383  *
384  * Checks if @task should be traced or not from @filtered_pids.
385  * Returns true if @task should *NOT* be traced.
386  * Returns false if @task should be traced.
387  */
388 bool
389 trace_ignore_this_task(struct trace_pid_list *filtered_pids,
390 		       struct trace_pid_list *filtered_no_pids,
391 		       struct task_struct *task)
392 {
393 	/*
394 	 * If filterd_no_pids is not empty, and the task's pid is listed
395 	 * in filtered_no_pids, then return true.
396 	 * Otherwise, if filtered_pids is empty, that means we can
397 	 * trace all tasks. If it has content, then only trace pids
398 	 * within filtered_pids.
399 	 */
400 
401 	return (filtered_pids &&
402 		!trace_find_filtered_pid(filtered_pids, task->pid)) ||
403 		(filtered_no_pids &&
404 		 trace_find_filtered_pid(filtered_no_pids, task->pid));
405 }
406 
407 /**
408  * trace_filter_add_remove_task - Add or remove a task from a pid_list
409  * @pid_list: The list to modify
410  * @self: The current task for fork or NULL for exit
411  * @task: The task to add or remove
412  *
413  * If adding a task, if @self is defined, the task is only added if @self
414  * is also included in @pid_list. This happens on fork and tasks should
415  * only be added when the parent is listed. If @self is NULL, then the
416  * @task pid will be removed from the list, which would happen on exit
417  * of a task.
418  */
419 void trace_filter_add_remove_task(struct trace_pid_list *pid_list,
420 				  struct task_struct *self,
421 				  struct task_struct *task)
422 {
423 	if (!pid_list)
424 		return;
425 
426 	/* For forks, we only add if the forking task is listed */
427 	if (self) {
428 		if (!trace_find_filtered_pid(pid_list, self->pid))
429 			return;
430 	}
431 
432 	/* Sorry, but we don't support pid_max changing after setting */
433 	if (task->pid >= pid_list->pid_max)
434 		return;
435 
436 	/* "self" is set for forks, and NULL for exits */
437 	if (self)
438 		set_bit(task->pid, pid_list->pids);
439 	else
440 		clear_bit(task->pid, pid_list->pids);
441 }
442 
443 /**
444  * trace_pid_next - Used for seq_file to get to the next pid of a pid_list
445  * @pid_list: The pid list to show
446  * @v: The last pid that was shown (+1 the actual pid to let zero be displayed)
447  * @pos: The position of the file
448  *
449  * This is used by the seq_file "next" operation to iterate the pids
450  * listed in a trace_pid_list structure.
451  *
452  * Returns the pid+1 as we want to display pid of zero, but NULL would
453  * stop the iteration.
454  */
455 void *trace_pid_next(struct trace_pid_list *pid_list, void *v, loff_t *pos)
456 {
457 	unsigned long pid = (unsigned long)v;
458 
459 	(*pos)++;
460 
461 	/* pid already is +1 of the actual prevous bit */
462 	pid = find_next_bit(pid_list->pids, pid_list->pid_max, pid);
463 
464 	/* Return pid + 1 to allow zero to be represented */
465 	if (pid < pid_list->pid_max)
466 		return (void *)(pid + 1);
467 
468 	return NULL;
469 }
470 
471 /**
472  * trace_pid_start - Used for seq_file to start reading pid lists
473  * @pid_list: The pid list to show
474  * @pos: The position of the file
475  *
476  * This is used by seq_file "start" operation to start the iteration
477  * of listing pids.
478  *
479  * Returns the pid+1 as we want to display pid of zero, but NULL would
480  * stop the iteration.
481  */
482 void *trace_pid_start(struct trace_pid_list *pid_list, loff_t *pos)
483 {
484 	unsigned long pid;
485 	loff_t l = 0;
486 
487 	pid = find_first_bit(pid_list->pids, pid_list->pid_max);
488 	if (pid >= pid_list->pid_max)
489 		return NULL;
490 
491 	/* Return pid + 1 so that zero can be the exit value */
492 	for (pid++; pid && l < *pos;
493 	     pid = (unsigned long)trace_pid_next(pid_list, (void *)pid, &l))
494 		;
495 	return (void *)pid;
496 }
497 
498 /**
499  * trace_pid_show - show the current pid in seq_file processing
500  * @m: The seq_file structure to write into
501  * @v: A void pointer of the pid (+1) value to display
502  *
503  * Can be directly used by seq_file operations to display the current
504  * pid value.
505  */
506 int trace_pid_show(struct seq_file *m, void *v)
507 {
508 	unsigned long pid = (unsigned long)v - 1;
509 
510 	seq_printf(m, "%lu\n", pid);
511 	return 0;
512 }
513 
514 /* 128 should be much more than enough */
515 #define PID_BUF_SIZE		127
516 
517 int trace_pid_write(struct trace_pid_list *filtered_pids,
518 		    struct trace_pid_list **new_pid_list,
519 		    const char __user *ubuf, size_t cnt)
520 {
521 	struct trace_pid_list *pid_list;
522 	struct trace_parser parser;
523 	unsigned long val;
524 	int nr_pids = 0;
525 	ssize_t read = 0;
526 	ssize_t ret = 0;
527 	loff_t pos;
528 	pid_t pid;
529 
530 	if (trace_parser_get_init(&parser, PID_BUF_SIZE + 1))
531 		return -ENOMEM;
532 
533 	/*
534 	 * Always recreate a new array. The write is an all or nothing
535 	 * operation. Always create a new array when adding new pids by
536 	 * the user. If the operation fails, then the current list is
537 	 * not modified.
538 	 */
539 	pid_list = kmalloc(sizeof(*pid_list), GFP_KERNEL);
540 	if (!pid_list) {
541 		trace_parser_put(&parser);
542 		return -ENOMEM;
543 	}
544 
545 	pid_list->pid_max = READ_ONCE(pid_max);
546 
547 	/* Only truncating will shrink pid_max */
548 	if (filtered_pids && filtered_pids->pid_max > pid_list->pid_max)
549 		pid_list->pid_max = filtered_pids->pid_max;
550 
551 	pid_list->pids = vzalloc((pid_list->pid_max + 7) >> 3);
552 	if (!pid_list->pids) {
553 		trace_parser_put(&parser);
554 		kfree(pid_list);
555 		return -ENOMEM;
556 	}
557 
558 	if (filtered_pids) {
559 		/* copy the current bits to the new max */
560 		for_each_set_bit(pid, filtered_pids->pids,
561 				 filtered_pids->pid_max) {
562 			set_bit(pid, pid_list->pids);
563 			nr_pids++;
564 		}
565 	}
566 
567 	while (cnt > 0) {
568 
569 		pos = 0;
570 
571 		ret = trace_get_user(&parser, ubuf, cnt, &pos);
572 		if (ret < 0 || !trace_parser_loaded(&parser))
573 			break;
574 
575 		read += ret;
576 		ubuf += ret;
577 		cnt -= ret;
578 
579 		ret = -EINVAL;
580 		if (kstrtoul(parser.buffer, 0, &val))
581 			break;
582 		if (val >= pid_list->pid_max)
583 			break;
584 
585 		pid = (pid_t)val;
586 
587 		set_bit(pid, pid_list->pids);
588 		nr_pids++;
589 
590 		trace_parser_clear(&parser);
591 		ret = 0;
592 	}
593 	trace_parser_put(&parser);
594 
595 	if (ret < 0) {
596 		trace_free_pid_list(pid_list);
597 		return ret;
598 	}
599 
600 	if (!nr_pids) {
601 		/* Cleared the list of pids */
602 		trace_free_pid_list(pid_list);
603 		read = ret;
604 		pid_list = NULL;
605 	}
606 
607 	*new_pid_list = pid_list;
608 
609 	return read;
610 }
611 
612 static u64 buffer_ftrace_now(struct array_buffer *buf, int cpu)
613 {
614 	u64 ts;
615 
616 	/* Early boot up does not have a buffer yet */
617 	if (!buf->buffer)
618 		return trace_clock_local();
619 
620 	ts = ring_buffer_time_stamp(buf->buffer, cpu);
621 	ring_buffer_normalize_time_stamp(buf->buffer, cpu, &ts);
622 
623 	return ts;
624 }
625 
626 u64 ftrace_now(int cpu)
627 {
628 	return buffer_ftrace_now(&global_trace.array_buffer, cpu);
629 }
630 
631 /**
632  * tracing_is_enabled - Show if global_trace has been disabled
633  *
634  * Shows if the global trace has been enabled or not. It uses the
635  * mirror flag "buffer_disabled" to be used in fast paths such as for
636  * the irqsoff tracer. But it may be inaccurate due to races. If you
637  * need to know the accurate state, use tracing_is_on() which is a little
638  * slower, but accurate.
639  */
640 int tracing_is_enabled(void)
641 {
642 	/*
643 	 * For quick access (irqsoff uses this in fast path), just
644 	 * return the mirror variable of the state of the ring buffer.
645 	 * It's a little racy, but we don't really care.
646 	 */
647 	smp_rmb();
648 	return !global_trace.buffer_disabled;
649 }
650 
651 /*
652  * trace_buf_size is the size in bytes that is allocated
653  * for a buffer. Note, the number of bytes is always rounded
654  * to page size.
655  *
656  * This number is purposely set to a low number of 16384.
657  * If the dump on oops happens, it will be much appreciated
658  * to not have to wait for all that output. Anyway this can be
659  * boot time and run time configurable.
660  */
661 #define TRACE_BUF_SIZE_DEFAULT	1441792UL /* 16384 * 88 (sizeof(entry)) */
662 
663 static unsigned long		trace_buf_size = TRACE_BUF_SIZE_DEFAULT;
664 
665 /* trace_types holds a link list of available tracers. */
666 static struct tracer		*trace_types __read_mostly;
667 
668 /*
669  * trace_types_lock is used to protect the trace_types list.
670  */
671 DEFINE_MUTEX(trace_types_lock);
672 
673 /*
674  * serialize the access of the ring buffer
675  *
676  * ring buffer serializes readers, but it is low level protection.
677  * The validity of the events (which returns by ring_buffer_peek() ..etc)
678  * are not protected by ring buffer.
679  *
680  * The content of events may become garbage if we allow other process consumes
681  * these events concurrently:
682  *   A) the page of the consumed events may become a normal page
683  *      (not reader page) in ring buffer, and this page will be rewrited
684  *      by events producer.
685  *   B) The page of the consumed events may become a page for splice_read,
686  *      and this page will be returned to system.
687  *
688  * These primitives allow multi process access to different cpu ring buffer
689  * concurrently.
690  *
691  * These primitives don't distinguish read-only and read-consume access.
692  * Multi read-only access are also serialized.
693  */
694 
695 #ifdef CONFIG_SMP
696 static DECLARE_RWSEM(all_cpu_access_lock);
697 static DEFINE_PER_CPU(struct mutex, cpu_access_lock);
698 
699 static inline void trace_access_lock(int cpu)
700 {
701 	if (cpu == RING_BUFFER_ALL_CPUS) {
702 		/* gain it for accessing the whole ring buffer. */
703 		down_write(&all_cpu_access_lock);
704 	} else {
705 		/* gain it for accessing a cpu ring buffer. */
706 
707 		/* Firstly block other trace_access_lock(RING_BUFFER_ALL_CPUS). */
708 		down_read(&all_cpu_access_lock);
709 
710 		/* Secondly block other access to this @cpu ring buffer. */
711 		mutex_lock(&per_cpu(cpu_access_lock, cpu));
712 	}
713 }
714 
715 static inline void trace_access_unlock(int cpu)
716 {
717 	if (cpu == RING_BUFFER_ALL_CPUS) {
718 		up_write(&all_cpu_access_lock);
719 	} else {
720 		mutex_unlock(&per_cpu(cpu_access_lock, cpu));
721 		up_read(&all_cpu_access_lock);
722 	}
723 }
724 
725 static inline void trace_access_lock_init(void)
726 {
727 	int cpu;
728 
729 	for_each_possible_cpu(cpu)
730 		mutex_init(&per_cpu(cpu_access_lock, cpu));
731 }
732 
733 #else
734 
735 static DEFINE_MUTEX(access_lock);
736 
737 static inline void trace_access_lock(int cpu)
738 {
739 	(void)cpu;
740 	mutex_lock(&access_lock);
741 }
742 
743 static inline void trace_access_unlock(int cpu)
744 {
745 	(void)cpu;
746 	mutex_unlock(&access_lock);
747 }
748 
749 static inline void trace_access_lock_init(void)
750 {
751 }
752 
753 #endif
754 
755 #ifdef CONFIG_STACKTRACE
756 static void __ftrace_trace_stack(struct trace_buffer *buffer,
757 				 unsigned long flags,
758 				 int skip, int pc, struct pt_regs *regs);
759 static inline void ftrace_trace_stack(struct trace_array *tr,
760 				      struct trace_buffer *buffer,
761 				      unsigned long flags,
762 				      int skip, int pc, struct pt_regs *regs);
763 
764 #else
765 static inline void __ftrace_trace_stack(struct trace_buffer *buffer,
766 					unsigned long flags,
767 					int skip, int pc, struct pt_regs *regs)
768 {
769 }
770 static inline void ftrace_trace_stack(struct trace_array *tr,
771 				      struct trace_buffer *buffer,
772 				      unsigned long flags,
773 				      int skip, int pc, struct pt_regs *regs)
774 {
775 }
776 
777 #endif
778 
779 static __always_inline void
780 trace_event_setup(struct ring_buffer_event *event,
781 		  int type, unsigned long flags, int pc)
782 {
783 	struct trace_entry *ent = ring_buffer_event_data(event);
784 
785 	tracing_generic_entry_update(ent, type, flags, pc);
786 }
787 
788 static __always_inline struct ring_buffer_event *
789 __trace_buffer_lock_reserve(struct trace_buffer *buffer,
790 			  int type,
791 			  unsigned long len,
792 			  unsigned long flags, int pc)
793 {
794 	struct ring_buffer_event *event;
795 
796 	event = ring_buffer_lock_reserve(buffer, len);
797 	if (event != NULL)
798 		trace_event_setup(event, type, flags, pc);
799 
800 	return event;
801 }
802 
803 void tracer_tracing_on(struct trace_array *tr)
804 {
805 	if (tr->array_buffer.buffer)
806 		ring_buffer_record_on(tr->array_buffer.buffer);
807 	/*
808 	 * This flag is looked at when buffers haven't been allocated
809 	 * yet, or by some tracers (like irqsoff), that just want to
810 	 * know if the ring buffer has been disabled, but it can handle
811 	 * races of where it gets disabled but we still do a record.
812 	 * As the check is in the fast path of the tracers, it is more
813 	 * important to be fast than accurate.
814 	 */
815 	tr->buffer_disabled = 0;
816 	/* Make the flag seen by readers */
817 	smp_wmb();
818 }
819 
820 /**
821  * tracing_on - enable tracing buffers
822  *
823  * This function enables tracing buffers that may have been
824  * disabled with tracing_off.
825  */
826 void tracing_on(void)
827 {
828 	tracer_tracing_on(&global_trace);
829 }
830 EXPORT_SYMBOL_GPL(tracing_on);
831 
832 
833 static __always_inline void
834 __buffer_unlock_commit(struct trace_buffer *buffer, struct ring_buffer_event *event)
835 {
836 	__this_cpu_write(trace_taskinfo_save, true);
837 
838 	/* If this is the temp buffer, we need to commit fully */
839 	if (this_cpu_read(trace_buffered_event) == event) {
840 		/* Length is in event->array[0] */
841 		ring_buffer_write(buffer, event->array[0], &event->array[1]);
842 		/* Release the temp buffer */
843 		this_cpu_dec(trace_buffered_event_cnt);
844 	} else
845 		ring_buffer_unlock_commit(buffer, event);
846 }
847 
848 /**
849  * __trace_puts - write a constant string into the trace buffer.
850  * @ip:	   The address of the caller
851  * @str:   The constant string to write
852  * @size:  The size of the string.
853  */
854 int __trace_puts(unsigned long ip, const char *str, int size)
855 {
856 	struct ring_buffer_event *event;
857 	struct trace_buffer *buffer;
858 	struct print_entry *entry;
859 	unsigned long irq_flags;
860 	int alloc;
861 	int pc;
862 
863 	if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
864 		return 0;
865 
866 	pc = preempt_count();
867 
868 	if (unlikely(tracing_selftest_running || tracing_disabled))
869 		return 0;
870 
871 	alloc = sizeof(*entry) + size + 2; /* possible \n added */
872 
873 	local_save_flags(irq_flags);
874 	buffer = global_trace.array_buffer.buffer;
875 	ring_buffer_nest_start(buffer);
876 	event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, alloc,
877 					    irq_flags, pc);
878 	if (!event) {
879 		size = 0;
880 		goto out;
881 	}
882 
883 	entry = ring_buffer_event_data(event);
884 	entry->ip = ip;
885 
886 	memcpy(&entry->buf, str, size);
887 
888 	/* Add a newline if necessary */
889 	if (entry->buf[size - 1] != '\n') {
890 		entry->buf[size] = '\n';
891 		entry->buf[size + 1] = '\0';
892 	} else
893 		entry->buf[size] = '\0';
894 
895 	__buffer_unlock_commit(buffer, event);
896 	ftrace_trace_stack(&global_trace, buffer, irq_flags, 4, pc, NULL);
897  out:
898 	ring_buffer_nest_end(buffer);
899 	return size;
900 }
901 EXPORT_SYMBOL_GPL(__trace_puts);
902 
903 /**
904  * __trace_bputs - write the pointer to a constant string into trace buffer
905  * @ip:	   The address of the caller
906  * @str:   The constant string to write to the buffer to
907  */
908 int __trace_bputs(unsigned long ip, const char *str)
909 {
910 	struct ring_buffer_event *event;
911 	struct trace_buffer *buffer;
912 	struct bputs_entry *entry;
913 	unsigned long irq_flags;
914 	int size = sizeof(struct bputs_entry);
915 	int ret = 0;
916 	int pc;
917 
918 	if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
919 		return 0;
920 
921 	pc = preempt_count();
922 
923 	if (unlikely(tracing_selftest_running || tracing_disabled))
924 		return 0;
925 
926 	local_save_flags(irq_flags);
927 	buffer = global_trace.array_buffer.buffer;
928 
929 	ring_buffer_nest_start(buffer);
930 	event = __trace_buffer_lock_reserve(buffer, TRACE_BPUTS, size,
931 					    irq_flags, pc);
932 	if (!event)
933 		goto out;
934 
935 	entry = ring_buffer_event_data(event);
936 	entry->ip			= ip;
937 	entry->str			= str;
938 
939 	__buffer_unlock_commit(buffer, event);
940 	ftrace_trace_stack(&global_trace, buffer, irq_flags, 4, pc, NULL);
941 
942 	ret = 1;
943  out:
944 	ring_buffer_nest_end(buffer);
945 	return ret;
946 }
947 EXPORT_SYMBOL_GPL(__trace_bputs);
948 
949 #ifdef CONFIG_TRACER_SNAPSHOT
950 static void tracing_snapshot_instance_cond(struct trace_array *tr,
951 					   void *cond_data)
952 {
953 	struct tracer *tracer = tr->current_trace;
954 	unsigned long flags;
955 
956 	if (in_nmi()) {
957 		internal_trace_puts("*** SNAPSHOT CALLED FROM NMI CONTEXT ***\n");
958 		internal_trace_puts("*** snapshot is being ignored        ***\n");
959 		return;
960 	}
961 
962 	if (!tr->allocated_snapshot) {
963 		internal_trace_puts("*** SNAPSHOT NOT ALLOCATED ***\n");
964 		internal_trace_puts("*** stopping trace here!   ***\n");
965 		tracing_off();
966 		return;
967 	}
968 
969 	/* Note, snapshot can not be used when the tracer uses it */
970 	if (tracer->use_max_tr) {
971 		internal_trace_puts("*** LATENCY TRACER ACTIVE ***\n");
972 		internal_trace_puts("*** Can not use snapshot (sorry) ***\n");
973 		return;
974 	}
975 
976 	local_irq_save(flags);
977 	update_max_tr(tr, current, smp_processor_id(), cond_data);
978 	local_irq_restore(flags);
979 }
980 
981 void tracing_snapshot_instance(struct trace_array *tr)
982 {
983 	tracing_snapshot_instance_cond(tr, NULL);
984 }
985 
986 /**
987  * tracing_snapshot - take a snapshot of the current buffer.
988  *
989  * This causes a swap between the snapshot buffer and the current live
990  * tracing buffer. You can use this to take snapshots of the live
991  * trace when some condition is triggered, but continue to trace.
992  *
993  * Note, make sure to allocate the snapshot with either
994  * a tracing_snapshot_alloc(), or by doing it manually
995  * with: echo 1 > /sys/kernel/debug/tracing/snapshot
996  *
997  * If the snapshot buffer is not allocated, it will stop tracing.
998  * Basically making a permanent snapshot.
999  */
1000 void tracing_snapshot(void)
1001 {
1002 	struct trace_array *tr = &global_trace;
1003 
1004 	tracing_snapshot_instance(tr);
1005 }
1006 EXPORT_SYMBOL_GPL(tracing_snapshot);
1007 
1008 /**
1009  * tracing_snapshot_cond - conditionally take a snapshot of the current buffer.
1010  * @tr:		The tracing instance to snapshot
1011  * @cond_data:	The data to be tested conditionally, and possibly saved
1012  *
1013  * This is the same as tracing_snapshot() except that the snapshot is
1014  * conditional - the snapshot will only happen if the
1015  * cond_snapshot.update() implementation receiving the cond_data
1016  * returns true, which means that the trace array's cond_snapshot
1017  * update() operation used the cond_data to determine whether the
1018  * snapshot should be taken, and if it was, presumably saved it along
1019  * with the snapshot.
1020  */
1021 void tracing_snapshot_cond(struct trace_array *tr, void *cond_data)
1022 {
1023 	tracing_snapshot_instance_cond(tr, cond_data);
1024 }
1025 EXPORT_SYMBOL_GPL(tracing_snapshot_cond);
1026 
1027 /**
1028  * tracing_snapshot_cond_data - get the user data associated with a snapshot
1029  * @tr:		The tracing instance
1030  *
1031  * When the user enables a conditional snapshot using
1032  * tracing_snapshot_cond_enable(), the user-defined cond_data is saved
1033  * with the snapshot.  This accessor is used to retrieve it.
1034  *
1035  * Should not be called from cond_snapshot.update(), since it takes
1036  * the tr->max_lock lock, which the code calling
1037  * cond_snapshot.update() has already done.
1038  *
1039  * Returns the cond_data associated with the trace array's snapshot.
1040  */
1041 void *tracing_cond_snapshot_data(struct trace_array *tr)
1042 {
1043 	void *cond_data = NULL;
1044 
1045 	arch_spin_lock(&tr->max_lock);
1046 
1047 	if (tr->cond_snapshot)
1048 		cond_data = tr->cond_snapshot->cond_data;
1049 
1050 	arch_spin_unlock(&tr->max_lock);
1051 
1052 	return cond_data;
1053 }
1054 EXPORT_SYMBOL_GPL(tracing_cond_snapshot_data);
1055 
1056 static int resize_buffer_duplicate_size(struct array_buffer *trace_buf,
1057 					struct array_buffer *size_buf, int cpu_id);
1058 static void set_buffer_entries(struct array_buffer *buf, unsigned long val);
1059 
1060 int tracing_alloc_snapshot_instance(struct trace_array *tr)
1061 {
1062 	int ret;
1063 
1064 	if (!tr->allocated_snapshot) {
1065 
1066 		/* allocate spare buffer */
1067 		ret = resize_buffer_duplicate_size(&tr->max_buffer,
1068 				   &tr->array_buffer, RING_BUFFER_ALL_CPUS);
1069 		if (ret < 0)
1070 			return ret;
1071 
1072 		tr->allocated_snapshot = true;
1073 	}
1074 
1075 	return 0;
1076 }
1077 
1078 static void free_snapshot(struct trace_array *tr)
1079 {
1080 	/*
1081 	 * We don't free the ring buffer. instead, resize it because
1082 	 * The max_tr ring buffer has some state (e.g. ring->clock) and
1083 	 * we want preserve it.
1084 	 */
1085 	ring_buffer_resize(tr->max_buffer.buffer, 1, RING_BUFFER_ALL_CPUS);
1086 	set_buffer_entries(&tr->max_buffer, 1);
1087 	tracing_reset_online_cpus(&tr->max_buffer);
1088 	tr->allocated_snapshot = false;
1089 }
1090 
1091 /**
1092  * tracing_alloc_snapshot - allocate snapshot buffer.
1093  *
1094  * This only allocates the snapshot buffer if it isn't already
1095  * allocated - it doesn't also take a snapshot.
1096  *
1097  * This is meant to be used in cases where the snapshot buffer needs
1098  * to be set up for events that can't sleep but need to be able to
1099  * trigger a snapshot.
1100  */
1101 int tracing_alloc_snapshot(void)
1102 {
1103 	struct trace_array *tr = &global_trace;
1104 	int ret;
1105 
1106 	ret = tracing_alloc_snapshot_instance(tr);
1107 	WARN_ON(ret < 0);
1108 
1109 	return ret;
1110 }
1111 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
1112 
1113 /**
1114  * tracing_snapshot_alloc - allocate and take a snapshot of the current buffer.
1115  *
1116  * This is similar to tracing_snapshot(), but it will allocate the
1117  * snapshot buffer if it isn't already allocated. Use this only
1118  * where it is safe to sleep, as the allocation may sleep.
1119  *
1120  * This causes a swap between the snapshot buffer and the current live
1121  * tracing buffer. You can use this to take snapshots of the live
1122  * trace when some condition is triggered, but continue to trace.
1123  */
1124 void tracing_snapshot_alloc(void)
1125 {
1126 	int ret;
1127 
1128 	ret = tracing_alloc_snapshot();
1129 	if (ret < 0)
1130 		return;
1131 
1132 	tracing_snapshot();
1133 }
1134 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
1135 
1136 /**
1137  * tracing_snapshot_cond_enable - enable conditional snapshot for an instance
1138  * @tr:		The tracing instance
1139  * @cond_data:	User data to associate with the snapshot
1140  * @update:	Implementation of the cond_snapshot update function
1141  *
1142  * Check whether the conditional snapshot for the given instance has
1143  * already been enabled, or if the current tracer is already using a
1144  * snapshot; if so, return -EBUSY, else create a cond_snapshot and
1145  * save the cond_data and update function inside.
1146  *
1147  * Returns 0 if successful, error otherwise.
1148  */
1149 int tracing_snapshot_cond_enable(struct trace_array *tr, void *cond_data,
1150 				 cond_update_fn_t update)
1151 {
1152 	struct cond_snapshot *cond_snapshot;
1153 	int ret = 0;
1154 
1155 	cond_snapshot = kzalloc(sizeof(*cond_snapshot), GFP_KERNEL);
1156 	if (!cond_snapshot)
1157 		return -ENOMEM;
1158 
1159 	cond_snapshot->cond_data = cond_data;
1160 	cond_snapshot->update = update;
1161 
1162 	mutex_lock(&trace_types_lock);
1163 
1164 	ret = tracing_alloc_snapshot_instance(tr);
1165 	if (ret)
1166 		goto fail_unlock;
1167 
1168 	if (tr->current_trace->use_max_tr) {
1169 		ret = -EBUSY;
1170 		goto fail_unlock;
1171 	}
1172 
1173 	/*
1174 	 * The cond_snapshot can only change to NULL without the
1175 	 * trace_types_lock. We don't care if we race with it going
1176 	 * to NULL, but we want to make sure that it's not set to
1177 	 * something other than NULL when we get here, which we can
1178 	 * do safely with only holding the trace_types_lock and not
1179 	 * having to take the max_lock.
1180 	 */
1181 	if (tr->cond_snapshot) {
1182 		ret = -EBUSY;
1183 		goto fail_unlock;
1184 	}
1185 
1186 	arch_spin_lock(&tr->max_lock);
1187 	tr->cond_snapshot = cond_snapshot;
1188 	arch_spin_unlock(&tr->max_lock);
1189 
1190 	mutex_unlock(&trace_types_lock);
1191 
1192 	return ret;
1193 
1194  fail_unlock:
1195 	mutex_unlock(&trace_types_lock);
1196 	kfree(cond_snapshot);
1197 	return ret;
1198 }
1199 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_enable);
1200 
1201 /**
1202  * tracing_snapshot_cond_disable - disable conditional snapshot for an instance
1203  * @tr:		The tracing instance
1204  *
1205  * Check whether the conditional snapshot for the given instance is
1206  * enabled; if so, free the cond_snapshot associated with it,
1207  * otherwise return -EINVAL.
1208  *
1209  * Returns 0 if successful, error otherwise.
1210  */
1211 int tracing_snapshot_cond_disable(struct trace_array *tr)
1212 {
1213 	int ret = 0;
1214 
1215 	arch_spin_lock(&tr->max_lock);
1216 
1217 	if (!tr->cond_snapshot)
1218 		ret = -EINVAL;
1219 	else {
1220 		kfree(tr->cond_snapshot);
1221 		tr->cond_snapshot = NULL;
1222 	}
1223 
1224 	arch_spin_unlock(&tr->max_lock);
1225 
1226 	return ret;
1227 }
1228 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_disable);
1229 #else
1230 void tracing_snapshot(void)
1231 {
1232 	WARN_ONCE(1, "Snapshot feature not enabled, but internal snapshot used");
1233 }
1234 EXPORT_SYMBOL_GPL(tracing_snapshot);
1235 void tracing_snapshot_cond(struct trace_array *tr, void *cond_data)
1236 {
1237 	WARN_ONCE(1, "Snapshot feature not enabled, but internal conditional snapshot used");
1238 }
1239 EXPORT_SYMBOL_GPL(tracing_snapshot_cond);
1240 int tracing_alloc_snapshot(void)
1241 {
1242 	WARN_ONCE(1, "Snapshot feature not enabled, but snapshot allocation used");
1243 	return -ENODEV;
1244 }
1245 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
1246 void tracing_snapshot_alloc(void)
1247 {
1248 	/* Give warning */
1249 	tracing_snapshot();
1250 }
1251 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
1252 void *tracing_cond_snapshot_data(struct trace_array *tr)
1253 {
1254 	return NULL;
1255 }
1256 EXPORT_SYMBOL_GPL(tracing_cond_snapshot_data);
1257 int tracing_snapshot_cond_enable(struct trace_array *tr, void *cond_data, cond_update_fn_t update)
1258 {
1259 	return -ENODEV;
1260 }
1261 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_enable);
1262 int tracing_snapshot_cond_disable(struct trace_array *tr)
1263 {
1264 	return false;
1265 }
1266 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_disable);
1267 #endif /* CONFIG_TRACER_SNAPSHOT */
1268 
1269 void tracer_tracing_off(struct trace_array *tr)
1270 {
1271 	if (tr->array_buffer.buffer)
1272 		ring_buffer_record_off(tr->array_buffer.buffer);
1273 	/*
1274 	 * This flag is looked at when buffers haven't been allocated
1275 	 * yet, or by some tracers (like irqsoff), that just want to
1276 	 * know if the ring buffer has been disabled, but it can handle
1277 	 * races of where it gets disabled but we still do a record.
1278 	 * As the check is in the fast path of the tracers, it is more
1279 	 * important to be fast than accurate.
1280 	 */
1281 	tr->buffer_disabled = 1;
1282 	/* Make the flag seen by readers */
1283 	smp_wmb();
1284 }
1285 
1286 /**
1287  * tracing_off - turn off tracing buffers
1288  *
1289  * This function stops the tracing buffers from recording data.
1290  * It does not disable any overhead the tracers themselves may
1291  * be causing. This function simply causes all recording to
1292  * the ring buffers to fail.
1293  */
1294 void tracing_off(void)
1295 {
1296 	tracer_tracing_off(&global_trace);
1297 }
1298 EXPORT_SYMBOL_GPL(tracing_off);
1299 
1300 void disable_trace_on_warning(void)
1301 {
1302 	if (__disable_trace_on_warning) {
1303 		trace_array_printk_buf(global_trace.array_buffer.buffer, _THIS_IP_,
1304 			"Disabling tracing due to warning\n");
1305 		tracing_off();
1306 	}
1307 }
1308 
1309 /**
1310  * tracer_tracing_is_on - show real state of ring buffer enabled
1311  * @tr : the trace array to know if ring buffer is enabled
1312  *
1313  * Shows real state of the ring buffer if it is enabled or not.
1314  */
1315 bool tracer_tracing_is_on(struct trace_array *tr)
1316 {
1317 	if (tr->array_buffer.buffer)
1318 		return ring_buffer_record_is_on(tr->array_buffer.buffer);
1319 	return !tr->buffer_disabled;
1320 }
1321 
1322 /**
1323  * tracing_is_on - show state of ring buffers enabled
1324  */
1325 int tracing_is_on(void)
1326 {
1327 	return tracer_tracing_is_on(&global_trace);
1328 }
1329 EXPORT_SYMBOL_GPL(tracing_is_on);
1330 
1331 static int __init set_buf_size(char *str)
1332 {
1333 	unsigned long buf_size;
1334 
1335 	if (!str)
1336 		return 0;
1337 	buf_size = memparse(str, &str);
1338 	/* nr_entries can not be zero */
1339 	if (buf_size == 0)
1340 		return 0;
1341 	trace_buf_size = buf_size;
1342 	return 1;
1343 }
1344 __setup("trace_buf_size=", set_buf_size);
1345 
1346 static int __init set_tracing_thresh(char *str)
1347 {
1348 	unsigned long threshold;
1349 	int ret;
1350 
1351 	if (!str)
1352 		return 0;
1353 	ret = kstrtoul(str, 0, &threshold);
1354 	if (ret < 0)
1355 		return 0;
1356 	tracing_thresh = threshold * 1000;
1357 	return 1;
1358 }
1359 __setup("tracing_thresh=", set_tracing_thresh);
1360 
1361 unsigned long nsecs_to_usecs(unsigned long nsecs)
1362 {
1363 	return nsecs / 1000;
1364 }
1365 
1366 /*
1367  * TRACE_FLAGS is defined as a tuple matching bit masks with strings.
1368  * It uses C(a, b) where 'a' is the eval (enum) name and 'b' is the string that
1369  * matches it. By defining "C(a, b) b", TRACE_FLAGS becomes a list
1370  * of strings in the order that the evals (enum) were defined.
1371  */
1372 #undef C
1373 #define C(a, b) b
1374 
1375 /* These must match the bit postions in trace_iterator_flags */
1376 static const char *trace_options[] = {
1377 	TRACE_FLAGS
1378 	NULL
1379 };
1380 
1381 static struct {
1382 	u64 (*func)(void);
1383 	const char *name;
1384 	int in_ns;		/* is this clock in nanoseconds? */
1385 } trace_clocks[] = {
1386 	{ trace_clock_local,		"local",	1 },
1387 	{ trace_clock_global,		"global",	1 },
1388 	{ trace_clock_counter,		"counter",	0 },
1389 	{ trace_clock_jiffies,		"uptime",	0 },
1390 	{ trace_clock,			"perf",		1 },
1391 	{ ktime_get_mono_fast_ns,	"mono",		1 },
1392 	{ ktime_get_raw_fast_ns,	"mono_raw",	1 },
1393 	{ ktime_get_boot_fast_ns,	"boot",		1 },
1394 	ARCH_TRACE_CLOCKS
1395 };
1396 
1397 bool trace_clock_in_ns(struct trace_array *tr)
1398 {
1399 	if (trace_clocks[tr->clock_id].in_ns)
1400 		return true;
1401 
1402 	return false;
1403 }
1404 
1405 /*
1406  * trace_parser_get_init - gets the buffer for trace parser
1407  */
1408 int trace_parser_get_init(struct trace_parser *parser, int size)
1409 {
1410 	memset(parser, 0, sizeof(*parser));
1411 
1412 	parser->buffer = kmalloc(size, GFP_KERNEL);
1413 	if (!parser->buffer)
1414 		return 1;
1415 
1416 	parser->size = size;
1417 	return 0;
1418 }
1419 
1420 /*
1421  * trace_parser_put - frees the buffer for trace parser
1422  */
1423 void trace_parser_put(struct trace_parser *parser)
1424 {
1425 	kfree(parser->buffer);
1426 	parser->buffer = NULL;
1427 }
1428 
1429 /*
1430  * trace_get_user - reads the user input string separated by  space
1431  * (matched by isspace(ch))
1432  *
1433  * For each string found the 'struct trace_parser' is updated,
1434  * and the function returns.
1435  *
1436  * Returns number of bytes read.
1437  *
1438  * See kernel/trace/trace.h for 'struct trace_parser' details.
1439  */
1440 int trace_get_user(struct trace_parser *parser, const char __user *ubuf,
1441 	size_t cnt, loff_t *ppos)
1442 {
1443 	char ch;
1444 	size_t read = 0;
1445 	ssize_t ret;
1446 
1447 	if (!*ppos)
1448 		trace_parser_clear(parser);
1449 
1450 	ret = get_user(ch, ubuf++);
1451 	if (ret)
1452 		goto out;
1453 
1454 	read++;
1455 	cnt--;
1456 
1457 	/*
1458 	 * The parser is not finished with the last write,
1459 	 * continue reading the user input without skipping spaces.
1460 	 */
1461 	if (!parser->cont) {
1462 		/* skip white space */
1463 		while (cnt && isspace(ch)) {
1464 			ret = get_user(ch, ubuf++);
1465 			if (ret)
1466 				goto out;
1467 			read++;
1468 			cnt--;
1469 		}
1470 
1471 		parser->idx = 0;
1472 
1473 		/* only spaces were written */
1474 		if (isspace(ch) || !ch) {
1475 			*ppos += read;
1476 			ret = read;
1477 			goto out;
1478 		}
1479 	}
1480 
1481 	/* read the non-space input */
1482 	while (cnt && !isspace(ch) && ch) {
1483 		if (parser->idx < parser->size - 1)
1484 			parser->buffer[parser->idx++] = ch;
1485 		else {
1486 			ret = -EINVAL;
1487 			goto out;
1488 		}
1489 		ret = get_user(ch, ubuf++);
1490 		if (ret)
1491 			goto out;
1492 		read++;
1493 		cnt--;
1494 	}
1495 
1496 	/* We either got finished input or we have to wait for another call. */
1497 	if (isspace(ch) || !ch) {
1498 		parser->buffer[parser->idx] = 0;
1499 		parser->cont = false;
1500 	} else if (parser->idx < parser->size - 1) {
1501 		parser->cont = true;
1502 		parser->buffer[parser->idx++] = ch;
1503 		/* Make sure the parsed string always terminates with '\0'. */
1504 		parser->buffer[parser->idx] = 0;
1505 	} else {
1506 		ret = -EINVAL;
1507 		goto out;
1508 	}
1509 
1510 	*ppos += read;
1511 	ret = read;
1512 
1513 out:
1514 	return ret;
1515 }
1516 
1517 /* TODO add a seq_buf_to_buffer() */
1518 static ssize_t trace_seq_to_buffer(struct trace_seq *s, void *buf, size_t cnt)
1519 {
1520 	int len;
1521 
1522 	if (trace_seq_used(s) <= s->seq.readpos)
1523 		return -EBUSY;
1524 
1525 	len = trace_seq_used(s) - s->seq.readpos;
1526 	if (cnt > len)
1527 		cnt = len;
1528 	memcpy(buf, s->buffer + s->seq.readpos, cnt);
1529 
1530 	s->seq.readpos += cnt;
1531 	return cnt;
1532 }
1533 
1534 unsigned long __read_mostly	tracing_thresh;
1535 static const struct file_operations tracing_max_lat_fops;
1536 
1537 #if (defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)) && \
1538 	defined(CONFIG_FSNOTIFY)
1539 
1540 static struct workqueue_struct *fsnotify_wq;
1541 
1542 static void latency_fsnotify_workfn(struct work_struct *work)
1543 {
1544 	struct trace_array *tr = container_of(work, struct trace_array,
1545 					      fsnotify_work);
1546 	fsnotify_inode(tr->d_max_latency->d_inode, FS_MODIFY);
1547 }
1548 
1549 static void latency_fsnotify_workfn_irq(struct irq_work *iwork)
1550 {
1551 	struct trace_array *tr = container_of(iwork, struct trace_array,
1552 					      fsnotify_irqwork);
1553 	queue_work(fsnotify_wq, &tr->fsnotify_work);
1554 }
1555 
1556 static void trace_create_maxlat_file(struct trace_array *tr,
1557 				     struct dentry *d_tracer)
1558 {
1559 	INIT_WORK(&tr->fsnotify_work, latency_fsnotify_workfn);
1560 	init_irq_work(&tr->fsnotify_irqwork, latency_fsnotify_workfn_irq);
1561 	tr->d_max_latency = trace_create_file("tracing_max_latency", 0644,
1562 					      d_tracer, &tr->max_latency,
1563 					      &tracing_max_lat_fops);
1564 }
1565 
1566 __init static int latency_fsnotify_init(void)
1567 {
1568 	fsnotify_wq = alloc_workqueue("tr_max_lat_wq",
1569 				      WQ_UNBOUND | WQ_HIGHPRI, 0);
1570 	if (!fsnotify_wq) {
1571 		pr_err("Unable to allocate tr_max_lat_wq\n");
1572 		return -ENOMEM;
1573 	}
1574 	return 0;
1575 }
1576 
1577 late_initcall_sync(latency_fsnotify_init);
1578 
1579 void latency_fsnotify(struct trace_array *tr)
1580 {
1581 	if (!fsnotify_wq)
1582 		return;
1583 	/*
1584 	 * We cannot call queue_work(&tr->fsnotify_work) from here because it's
1585 	 * possible that we are called from __schedule() or do_idle(), which
1586 	 * could cause a deadlock.
1587 	 */
1588 	irq_work_queue(&tr->fsnotify_irqwork);
1589 }
1590 
1591 /*
1592  * (defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)) && \
1593  *  defined(CONFIG_FSNOTIFY)
1594  */
1595 #else
1596 
1597 #define trace_create_maxlat_file(tr, d_tracer)				\
1598 	trace_create_file("tracing_max_latency", 0644, d_tracer,	\
1599 			  &tr->max_latency, &tracing_max_lat_fops)
1600 
1601 #endif
1602 
1603 #ifdef CONFIG_TRACER_MAX_TRACE
1604 /*
1605  * Copy the new maximum trace into the separate maximum-trace
1606  * structure. (this way the maximum trace is permanently saved,
1607  * for later retrieval via /sys/kernel/tracing/tracing_max_latency)
1608  */
1609 static void
1610 __update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
1611 {
1612 	struct array_buffer *trace_buf = &tr->array_buffer;
1613 	struct array_buffer *max_buf = &tr->max_buffer;
1614 	struct trace_array_cpu *data = per_cpu_ptr(trace_buf->data, cpu);
1615 	struct trace_array_cpu *max_data = per_cpu_ptr(max_buf->data, cpu);
1616 
1617 	max_buf->cpu = cpu;
1618 	max_buf->time_start = data->preempt_timestamp;
1619 
1620 	max_data->saved_latency = tr->max_latency;
1621 	max_data->critical_start = data->critical_start;
1622 	max_data->critical_end = data->critical_end;
1623 
1624 	strncpy(max_data->comm, tsk->comm, TASK_COMM_LEN);
1625 	max_data->pid = tsk->pid;
1626 	/*
1627 	 * If tsk == current, then use current_uid(), as that does not use
1628 	 * RCU. The irq tracer can be called out of RCU scope.
1629 	 */
1630 	if (tsk == current)
1631 		max_data->uid = current_uid();
1632 	else
1633 		max_data->uid = task_uid(tsk);
1634 
1635 	max_data->nice = tsk->static_prio - 20 - MAX_RT_PRIO;
1636 	max_data->policy = tsk->policy;
1637 	max_data->rt_priority = tsk->rt_priority;
1638 
1639 	/* record this tasks comm */
1640 	tracing_record_cmdline(tsk);
1641 	latency_fsnotify(tr);
1642 }
1643 
1644 /**
1645  * update_max_tr - snapshot all trace buffers from global_trace to max_tr
1646  * @tr: tracer
1647  * @tsk: the task with the latency
1648  * @cpu: The cpu that initiated the trace.
1649  * @cond_data: User data associated with a conditional snapshot
1650  *
1651  * Flip the buffers between the @tr and the max_tr and record information
1652  * about which task was the cause of this latency.
1653  */
1654 void
1655 update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu,
1656 	      void *cond_data)
1657 {
1658 	if (tr->stop_count)
1659 		return;
1660 
1661 	WARN_ON_ONCE(!irqs_disabled());
1662 
1663 	if (!tr->allocated_snapshot) {
1664 		/* Only the nop tracer should hit this when disabling */
1665 		WARN_ON_ONCE(tr->current_trace != &nop_trace);
1666 		return;
1667 	}
1668 
1669 	arch_spin_lock(&tr->max_lock);
1670 
1671 	/* Inherit the recordable setting from array_buffer */
1672 	if (ring_buffer_record_is_set_on(tr->array_buffer.buffer))
1673 		ring_buffer_record_on(tr->max_buffer.buffer);
1674 	else
1675 		ring_buffer_record_off(tr->max_buffer.buffer);
1676 
1677 #ifdef CONFIG_TRACER_SNAPSHOT
1678 	if (tr->cond_snapshot && !tr->cond_snapshot->update(tr, cond_data))
1679 		goto out_unlock;
1680 #endif
1681 	swap(tr->array_buffer.buffer, tr->max_buffer.buffer);
1682 
1683 	__update_max_tr(tr, tsk, cpu);
1684 
1685  out_unlock:
1686 	arch_spin_unlock(&tr->max_lock);
1687 }
1688 
1689 /**
1690  * update_max_tr_single - only copy one trace over, and reset the rest
1691  * @tr: tracer
1692  * @tsk: task with the latency
1693  * @cpu: the cpu of the buffer to copy.
1694  *
1695  * Flip the trace of a single CPU buffer between the @tr and the max_tr.
1696  */
1697 void
1698 update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu)
1699 {
1700 	int ret;
1701 
1702 	if (tr->stop_count)
1703 		return;
1704 
1705 	WARN_ON_ONCE(!irqs_disabled());
1706 	if (!tr->allocated_snapshot) {
1707 		/* Only the nop tracer should hit this when disabling */
1708 		WARN_ON_ONCE(tr->current_trace != &nop_trace);
1709 		return;
1710 	}
1711 
1712 	arch_spin_lock(&tr->max_lock);
1713 
1714 	ret = ring_buffer_swap_cpu(tr->max_buffer.buffer, tr->array_buffer.buffer, cpu);
1715 
1716 	if (ret == -EBUSY) {
1717 		/*
1718 		 * We failed to swap the buffer due to a commit taking
1719 		 * place on this CPU. We fail to record, but we reset
1720 		 * the max trace buffer (no one writes directly to it)
1721 		 * and flag that it failed.
1722 		 */
1723 		trace_array_printk_buf(tr->max_buffer.buffer, _THIS_IP_,
1724 			"Failed to swap buffers due to commit in progress\n");
1725 	}
1726 
1727 	WARN_ON_ONCE(ret && ret != -EAGAIN && ret != -EBUSY);
1728 
1729 	__update_max_tr(tr, tsk, cpu);
1730 	arch_spin_unlock(&tr->max_lock);
1731 }
1732 #endif /* CONFIG_TRACER_MAX_TRACE */
1733 
1734 static int wait_on_pipe(struct trace_iterator *iter, int full)
1735 {
1736 	/* Iterators are static, they should be filled or empty */
1737 	if (trace_buffer_iter(iter, iter->cpu_file))
1738 		return 0;
1739 
1740 	return ring_buffer_wait(iter->array_buffer->buffer, iter->cpu_file,
1741 				full);
1742 }
1743 
1744 #ifdef CONFIG_FTRACE_STARTUP_TEST
1745 static bool selftests_can_run;
1746 
1747 struct trace_selftests {
1748 	struct list_head		list;
1749 	struct tracer			*type;
1750 };
1751 
1752 static LIST_HEAD(postponed_selftests);
1753 
1754 static int save_selftest(struct tracer *type)
1755 {
1756 	struct trace_selftests *selftest;
1757 
1758 	selftest = kmalloc(sizeof(*selftest), GFP_KERNEL);
1759 	if (!selftest)
1760 		return -ENOMEM;
1761 
1762 	selftest->type = type;
1763 	list_add(&selftest->list, &postponed_selftests);
1764 	return 0;
1765 }
1766 
1767 static int run_tracer_selftest(struct tracer *type)
1768 {
1769 	struct trace_array *tr = &global_trace;
1770 	struct tracer *saved_tracer = tr->current_trace;
1771 	int ret;
1772 
1773 	if (!type->selftest || tracing_selftest_disabled)
1774 		return 0;
1775 
1776 	/*
1777 	 * If a tracer registers early in boot up (before scheduling is
1778 	 * initialized and such), then do not run its selftests yet.
1779 	 * Instead, run it a little later in the boot process.
1780 	 */
1781 	if (!selftests_can_run)
1782 		return save_selftest(type);
1783 
1784 	/*
1785 	 * Run a selftest on this tracer.
1786 	 * Here we reset the trace buffer, and set the current
1787 	 * tracer to be this tracer. The tracer can then run some
1788 	 * internal tracing to verify that everything is in order.
1789 	 * If we fail, we do not register this tracer.
1790 	 */
1791 	tracing_reset_online_cpus(&tr->array_buffer);
1792 
1793 	tr->current_trace = type;
1794 
1795 #ifdef CONFIG_TRACER_MAX_TRACE
1796 	if (type->use_max_tr) {
1797 		/* If we expanded the buffers, make sure the max is expanded too */
1798 		if (ring_buffer_expanded)
1799 			ring_buffer_resize(tr->max_buffer.buffer, trace_buf_size,
1800 					   RING_BUFFER_ALL_CPUS);
1801 		tr->allocated_snapshot = true;
1802 	}
1803 #endif
1804 
1805 	/* the test is responsible for initializing and enabling */
1806 	pr_info("Testing tracer %s: ", type->name);
1807 	ret = type->selftest(type, tr);
1808 	/* the test is responsible for resetting too */
1809 	tr->current_trace = saved_tracer;
1810 	if (ret) {
1811 		printk(KERN_CONT "FAILED!\n");
1812 		/* Add the warning after printing 'FAILED' */
1813 		WARN_ON(1);
1814 		return -1;
1815 	}
1816 	/* Only reset on passing, to avoid touching corrupted buffers */
1817 	tracing_reset_online_cpus(&tr->array_buffer);
1818 
1819 #ifdef CONFIG_TRACER_MAX_TRACE
1820 	if (type->use_max_tr) {
1821 		tr->allocated_snapshot = false;
1822 
1823 		/* Shrink the max buffer again */
1824 		if (ring_buffer_expanded)
1825 			ring_buffer_resize(tr->max_buffer.buffer, 1,
1826 					   RING_BUFFER_ALL_CPUS);
1827 	}
1828 #endif
1829 
1830 	printk(KERN_CONT "PASSED\n");
1831 	return 0;
1832 }
1833 
1834 static __init int init_trace_selftests(void)
1835 {
1836 	struct trace_selftests *p, *n;
1837 	struct tracer *t, **last;
1838 	int ret;
1839 
1840 	selftests_can_run = true;
1841 
1842 	mutex_lock(&trace_types_lock);
1843 
1844 	if (list_empty(&postponed_selftests))
1845 		goto out;
1846 
1847 	pr_info("Running postponed tracer tests:\n");
1848 
1849 	tracing_selftest_running = true;
1850 	list_for_each_entry_safe(p, n, &postponed_selftests, list) {
1851 		/* This loop can take minutes when sanitizers are enabled, so
1852 		 * lets make sure we allow RCU processing.
1853 		 */
1854 		cond_resched();
1855 		ret = run_tracer_selftest(p->type);
1856 		/* If the test fails, then warn and remove from available_tracers */
1857 		if (ret < 0) {
1858 			WARN(1, "tracer: %s failed selftest, disabling\n",
1859 			     p->type->name);
1860 			last = &trace_types;
1861 			for (t = trace_types; t; t = t->next) {
1862 				if (t == p->type) {
1863 					*last = t->next;
1864 					break;
1865 				}
1866 				last = &t->next;
1867 			}
1868 		}
1869 		list_del(&p->list);
1870 		kfree(p);
1871 	}
1872 	tracing_selftest_running = false;
1873 
1874  out:
1875 	mutex_unlock(&trace_types_lock);
1876 
1877 	return 0;
1878 }
1879 core_initcall(init_trace_selftests);
1880 #else
1881 static inline int run_tracer_selftest(struct tracer *type)
1882 {
1883 	return 0;
1884 }
1885 #endif /* CONFIG_FTRACE_STARTUP_TEST */
1886 
1887 static void add_tracer_options(struct trace_array *tr, struct tracer *t);
1888 
1889 static void __init apply_trace_boot_options(void);
1890 
1891 /**
1892  * register_tracer - register a tracer with the ftrace system.
1893  * @type: the plugin for the tracer
1894  *
1895  * Register a new plugin tracer.
1896  */
1897 int __init register_tracer(struct tracer *type)
1898 {
1899 	struct tracer *t;
1900 	int ret = 0;
1901 
1902 	if (!type->name) {
1903 		pr_info("Tracer must have a name\n");
1904 		return -1;
1905 	}
1906 
1907 	if (strlen(type->name) >= MAX_TRACER_SIZE) {
1908 		pr_info("Tracer has a name longer than %d\n", MAX_TRACER_SIZE);
1909 		return -1;
1910 	}
1911 
1912 	if (security_locked_down(LOCKDOWN_TRACEFS)) {
1913 		pr_warn("Can not register tracer %s due to lockdown\n",
1914 			   type->name);
1915 		return -EPERM;
1916 	}
1917 
1918 	mutex_lock(&trace_types_lock);
1919 
1920 	tracing_selftest_running = true;
1921 
1922 	for (t = trace_types; t; t = t->next) {
1923 		if (strcmp(type->name, t->name) == 0) {
1924 			/* already found */
1925 			pr_info("Tracer %s already registered\n",
1926 				type->name);
1927 			ret = -1;
1928 			goto out;
1929 		}
1930 	}
1931 
1932 	if (!type->set_flag)
1933 		type->set_flag = &dummy_set_flag;
1934 	if (!type->flags) {
1935 		/*allocate a dummy tracer_flags*/
1936 		type->flags = kmalloc(sizeof(*type->flags), GFP_KERNEL);
1937 		if (!type->flags) {
1938 			ret = -ENOMEM;
1939 			goto out;
1940 		}
1941 		type->flags->val = 0;
1942 		type->flags->opts = dummy_tracer_opt;
1943 	} else
1944 		if (!type->flags->opts)
1945 			type->flags->opts = dummy_tracer_opt;
1946 
1947 	/* store the tracer for __set_tracer_option */
1948 	type->flags->trace = type;
1949 
1950 	ret = run_tracer_selftest(type);
1951 	if (ret < 0)
1952 		goto out;
1953 
1954 	type->next = trace_types;
1955 	trace_types = type;
1956 	add_tracer_options(&global_trace, type);
1957 
1958  out:
1959 	tracing_selftest_running = false;
1960 	mutex_unlock(&trace_types_lock);
1961 
1962 	if (ret || !default_bootup_tracer)
1963 		goto out_unlock;
1964 
1965 	if (strncmp(default_bootup_tracer, type->name, MAX_TRACER_SIZE))
1966 		goto out_unlock;
1967 
1968 	printk(KERN_INFO "Starting tracer '%s'\n", type->name);
1969 	/* Do we want this tracer to start on bootup? */
1970 	tracing_set_tracer(&global_trace, type->name);
1971 	default_bootup_tracer = NULL;
1972 
1973 	apply_trace_boot_options();
1974 
1975 	/* disable other selftests, since this will break it. */
1976 	tracing_selftest_disabled = true;
1977 #ifdef CONFIG_FTRACE_STARTUP_TEST
1978 	printk(KERN_INFO "Disabling FTRACE selftests due to running tracer '%s'\n",
1979 	       type->name);
1980 #endif
1981 
1982  out_unlock:
1983 	return ret;
1984 }
1985 
1986 static void tracing_reset_cpu(struct array_buffer *buf, int cpu)
1987 {
1988 	struct trace_buffer *buffer = buf->buffer;
1989 
1990 	if (!buffer)
1991 		return;
1992 
1993 	ring_buffer_record_disable(buffer);
1994 
1995 	/* Make sure all commits have finished */
1996 	synchronize_rcu();
1997 	ring_buffer_reset_cpu(buffer, cpu);
1998 
1999 	ring_buffer_record_enable(buffer);
2000 }
2001 
2002 void tracing_reset_online_cpus(struct array_buffer *buf)
2003 {
2004 	struct trace_buffer *buffer = buf->buffer;
2005 	int cpu;
2006 
2007 	if (!buffer)
2008 		return;
2009 
2010 	ring_buffer_record_disable(buffer);
2011 
2012 	/* Make sure all commits have finished */
2013 	synchronize_rcu();
2014 
2015 	buf->time_start = buffer_ftrace_now(buf, buf->cpu);
2016 
2017 	for_each_online_cpu(cpu)
2018 		ring_buffer_reset_cpu(buffer, cpu);
2019 
2020 	ring_buffer_record_enable(buffer);
2021 }
2022 
2023 /* Must have trace_types_lock held */
2024 void tracing_reset_all_online_cpus(void)
2025 {
2026 	struct trace_array *tr;
2027 
2028 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
2029 		if (!tr->clear_trace)
2030 			continue;
2031 		tr->clear_trace = false;
2032 		tracing_reset_online_cpus(&tr->array_buffer);
2033 #ifdef CONFIG_TRACER_MAX_TRACE
2034 		tracing_reset_online_cpus(&tr->max_buffer);
2035 #endif
2036 	}
2037 }
2038 
2039 static int *tgid_map;
2040 
2041 #define SAVED_CMDLINES_DEFAULT 128
2042 #define NO_CMDLINE_MAP UINT_MAX
2043 static arch_spinlock_t trace_cmdline_lock = __ARCH_SPIN_LOCK_UNLOCKED;
2044 struct saved_cmdlines_buffer {
2045 	unsigned map_pid_to_cmdline[PID_MAX_DEFAULT+1];
2046 	unsigned *map_cmdline_to_pid;
2047 	unsigned cmdline_num;
2048 	int cmdline_idx;
2049 	char *saved_cmdlines;
2050 };
2051 static struct saved_cmdlines_buffer *savedcmd;
2052 
2053 /* temporary disable recording */
2054 static atomic_t trace_record_taskinfo_disabled __read_mostly;
2055 
2056 static inline char *get_saved_cmdlines(int idx)
2057 {
2058 	return &savedcmd->saved_cmdlines[idx * TASK_COMM_LEN];
2059 }
2060 
2061 static inline void set_cmdline(int idx, const char *cmdline)
2062 {
2063 	strncpy(get_saved_cmdlines(idx), cmdline, TASK_COMM_LEN);
2064 }
2065 
2066 static int allocate_cmdlines_buffer(unsigned int val,
2067 				    struct saved_cmdlines_buffer *s)
2068 {
2069 	s->map_cmdline_to_pid = kmalloc_array(val,
2070 					      sizeof(*s->map_cmdline_to_pid),
2071 					      GFP_KERNEL);
2072 	if (!s->map_cmdline_to_pid)
2073 		return -ENOMEM;
2074 
2075 	s->saved_cmdlines = kmalloc_array(TASK_COMM_LEN, val, GFP_KERNEL);
2076 	if (!s->saved_cmdlines) {
2077 		kfree(s->map_cmdline_to_pid);
2078 		return -ENOMEM;
2079 	}
2080 
2081 	s->cmdline_idx = 0;
2082 	s->cmdline_num = val;
2083 	memset(&s->map_pid_to_cmdline, NO_CMDLINE_MAP,
2084 	       sizeof(s->map_pid_to_cmdline));
2085 	memset(s->map_cmdline_to_pid, NO_CMDLINE_MAP,
2086 	       val * sizeof(*s->map_cmdline_to_pid));
2087 
2088 	return 0;
2089 }
2090 
2091 static int trace_create_savedcmd(void)
2092 {
2093 	int ret;
2094 
2095 	savedcmd = kmalloc(sizeof(*savedcmd), GFP_KERNEL);
2096 	if (!savedcmd)
2097 		return -ENOMEM;
2098 
2099 	ret = allocate_cmdlines_buffer(SAVED_CMDLINES_DEFAULT, savedcmd);
2100 	if (ret < 0) {
2101 		kfree(savedcmd);
2102 		savedcmd = NULL;
2103 		return -ENOMEM;
2104 	}
2105 
2106 	return 0;
2107 }
2108 
2109 int is_tracing_stopped(void)
2110 {
2111 	return global_trace.stop_count;
2112 }
2113 
2114 /**
2115  * tracing_start - quick start of the tracer
2116  *
2117  * If tracing is enabled but was stopped by tracing_stop,
2118  * this will start the tracer back up.
2119  */
2120 void tracing_start(void)
2121 {
2122 	struct trace_buffer *buffer;
2123 	unsigned long flags;
2124 
2125 	if (tracing_disabled)
2126 		return;
2127 
2128 	raw_spin_lock_irqsave(&global_trace.start_lock, flags);
2129 	if (--global_trace.stop_count) {
2130 		if (global_trace.stop_count < 0) {
2131 			/* Someone screwed up their debugging */
2132 			WARN_ON_ONCE(1);
2133 			global_trace.stop_count = 0;
2134 		}
2135 		goto out;
2136 	}
2137 
2138 	/* Prevent the buffers from switching */
2139 	arch_spin_lock(&global_trace.max_lock);
2140 
2141 	buffer = global_trace.array_buffer.buffer;
2142 	if (buffer)
2143 		ring_buffer_record_enable(buffer);
2144 
2145 #ifdef CONFIG_TRACER_MAX_TRACE
2146 	buffer = global_trace.max_buffer.buffer;
2147 	if (buffer)
2148 		ring_buffer_record_enable(buffer);
2149 #endif
2150 
2151 	arch_spin_unlock(&global_trace.max_lock);
2152 
2153  out:
2154 	raw_spin_unlock_irqrestore(&global_trace.start_lock, flags);
2155 }
2156 
2157 static void tracing_start_tr(struct trace_array *tr)
2158 {
2159 	struct trace_buffer *buffer;
2160 	unsigned long flags;
2161 
2162 	if (tracing_disabled)
2163 		return;
2164 
2165 	/* If global, we need to also start the max tracer */
2166 	if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
2167 		return tracing_start();
2168 
2169 	raw_spin_lock_irqsave(&tr->start_lock, flags);
2170 
2171 	if (--tr->stop_count) {
2172 		if (tr->stop_count < 0) {
2173 			/* Someone screwed up their debugging */
2174 			WARN_ON_ONCE(1);
2175 			tr->stop_count = 0;
2176 		}
2177 		goto out;
2178 	}
2179 
2180 	buffer = tr->array_buffer.buffer;
2181 	if (buffer)
2182 		ring_buffer_record_enable(buffer);
2183 
2184  out:
2185 	raw_spin_unlock_irqrestore(&tr->start_lock, flags);
2186 }
2187 
2188 /**
2189  * tracing_stop - quick stop of the tracer
2190  *
2191  * Light weight way to stop tracing. Use in conjunction with
2192  * tracing_start.
2193  */
2194 void tracing_stop(void)
2195 {
2196 	struct trace_buffer *buffer;
2197 	unsigned long flags;
2198 
2199 	raw_spin_lock_irqsave(&global_trace.start_lock, flags);
2200 	if (global_trace.stop_count++)
2201 		goto out;
2202 
2203 	/* Prevent the buffers from switching */
2204 	arch_spin_lock(&global_trace.max_lock);
2205 
2206 	buffer = global_trace.array_buffer.buffer;
2207 	if (buffer)
2208 		ring_buffer_record_disable(buffer);
2209 
2210 #ifdef CONFIG_TRACER_MAX_TRACE
2211 	buffer = global_trace.max_buffer.buffer;
2212 	if (buffer)
2213 		ring_buffer_record_disable(buffer);
2214 #endif
2215 
2216 	arch_spin_unlock(&global_trace.max_lock);
2217 
2218  out:
2219 	raw_spin_unlock_irqrestore(&global_trace.start_lock, flags);
2220 }
2221 
2222 static void tracing_stop_tr(struct trace_array *tr)
2223 {
2224 	struct trace_buffer *buffer;
2225 	unsigned long flags;
2226 
2227 	/* If global, we need to also stop the max tracer */
2228 	if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
2229 		return tracing_stop();
2230 
2231 	raw_spin_lock_irqsave(&tr->start_lock, flags);
2232 	if (tr->stop_count++)
2233 		goto out;
2234 
2235 	buffer = tr->array_buffer.buffer;
2236 	if (buffer)
2237 		ring_buffer_record_disable(buffer);
2238 
2239  out:
2240 	raw_spin_unlock_irqrestore(&tr->start_lock, flags);
2241 }
2242 
2243 static int trace_save_cmdline(struct task_struct *tsk)
2244 {
2245 	unsigned pid, idx;
2246 
2247 	/* treat recording of idle task as a success */
2248 	if (!tsk->pid)
2249 		return 1;
2250 
2251 	if (unlikely(tsk->pid > PID_MAX_DEFAULT))
2252 		return 0;
2253 
2254 	/*
2255 	 * It's not the end of the world if we don't get
2256 	 * the lock, but we also don't want to spin
2257 	 * nor do we want to disable interrupts,
2258 	 * so if we miss here, then better luck next time.
2259 	 */
2260 	if (!arch_spin_trylock(&trace_cmdline_lock))
2261 		return 0;
2262 
2263 	idx = savedcmd->map_pid_to_cmdline[tsk->pid];
2264 	if (idx == NO_CMDLINE_MAP) {
2265 		idx = (savedcmd->cmdline_idx + 1) % savedcmd->cmdline_num;
2266 
2267 		/*
2268 		 * Check whether the cmdline buffer at idx has a pid
2269 		 * mapped. We are going to overwrite that entry so we
2270 		 * need to clear the map_pid_to_cmdline. Otherwise we
2271 		 * would read the new comm for the old pid.
2272 		 */
2273 		pid = savedcmd->map_cmdline_to_pid[idx];
2274 		if (pid != NO_CMDLINE_MAP)
2275 			savedcmd->map_pid_to_cmdline[pid] = NO_CMDLINE_MAP;
2276 
2277 		savedcmd->map_cmdline_to_pid[idx] = tsk->pid;
2278 		savedcmd->map_pid_to_cmdline[tsk->pid] = idx;
2279 
2280 		savedcmd->cmdline_idx = idx;
2281 	}
2282 
2283 	set_cmdline(idx, tsk->comm);
2284 
2285 	arch_spin_unlock(&trace_cmdline_lock);
2286 
2287 	return 1;
2288 }
2289 
2290 static void __trace_find_cmdline(int pid, char comm[])
2291 {
2292 	unsigned map;
2293 
2294 	if (!pid) {
2295 		strcpy(comm, "<idle>");
2296 		return;
2297 	}
2298 
2299 	if (WARN_ON_ONCE(pid < 0)) {
2300 		strcpy(comm, "<XXX>");
2301 		return;
2302 	}
2303 
2304 	if (pid > PID_MAX_DEFAULT) {
2305 		strcpy(comm, "<...>");
2306 		return;
2307 	}
2308 
2309 	map = savedcmd->map_pid_to_cmdline[pid];
2310 	if (map != NO_CMDLINE_MAP)
2311 		strlcpy(comm, get_saved_cmdlines(map), TASK_COMM_LEN);
2312 	else
2313 		strcpy(comm, "<...>");
2314 }
2315 
2316 void trace_find_cmdline(int pid, char comm[])
2317 {
2318 	preempt_disable();
2319 	arch_spin_lock(&trace_cmdline_lock);
2320 
2321 	__trace_find_cmdline(pid, comm);
2322 
2323 	arch_spin_unlock(&trace_cmdline_lock);
2324 	preempt_enable();
2325 }
2326 
2327 int trace_find_tgid(int pid)
2328 {
2329 	if (unlikely(!tgid_map || !pid || pid > PID_MAX_DEFAULT))
2330 		return 0;
2331 
2332 	return tgid_map[pid];
2333 }
2334 
2335 static int trace_save_tgid(struct task_struct *tsk)
2336 {
2337 	/* treat recording of idle task as a success */
2338 	if (!tsk->pid)
2339 		return 1;
2340 
2341 	if (unlikely(!tgid_map || tsk->pid > PID_MAX_DEFAULT))
2342 		return 0;
2343 
2344 	tgid_map[tsk->pid] = tsk->tgid;
2345 	return 1;
2346 }
2347 
2348 static bool tracing_record_taskinfo_skip(int flags)
2349 {
2350 	if (unlikely(!(flags & (TRACE_RECORD_CMDLINE | TRACE_RECORD_TGID))))
2351 		return true;
2352 	if (atomic_read(&trace_record_taskinfo_disabled) || !tracing_is_on())
2353 		return true;
2354 	if (!__this_cpu_read(trace_taskinfo_save))
2355 		return true;
2356 	return false;
2357 }
2358 
2359 /**
2360  * tracing_record_taskinfo - record the task info of a task
2361  *
2362  * @task:  task to record
2363  * @flags: TRACE_RECORD_CMDLINE for recording comm
2364  *         TRACE_RECORD_TGID for recording tgid
2365  */
2366 void tracing_record_taskinfo(struct task_struct *task, int flags)
2367 {
2368 	bool done;
2369 
2370 	if (tracing_record_taskinfo_skip(flags))
2371 		return;
2372 
2373 	/*
2374 	 * Record as much task information as possible. If some fail, continue
2375 	 * to try to record the others.
2376 	 */
2377 	done = !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(task);
2378 	done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(task);
2379 
2380 	/* If recording any information failed, retry again soon. */
2381 	if (!done)
2382 		return;
2383 
2384 	__this_cpu_write(trace_taskinfo_save, false);
2385 }
2386 
2387 /**
2388  * tracing_record_taskinfo_sched_switch - record task info for sched_switch
2389  *
2390  * @prev: previous task during sched_switch
2391  * @next: next task during sched_switch
2392  * @flags: TRACE_RECORD_CMDLINE for recording comm
2393  *         TRACE_RECORD_TGID for recording tgid
2394  */
2395 void tracing_record_taskinfo_sched_switch(struct task_struct *prev,
2396 					  struct task_struct *next, int flags)
2397 {
2398 	bool done;
2399 
2400 	if (tracing_record_taskinfo_skip(flags))
2401 		return;
2402 
2403 	/*
2404 	 * Record as much task information as possible. If some fail, continue
2405 	 * to try to record the others.
2406 	 */
2407 	done  = !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(prev);
2408 	done &= !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(next);
2409 	done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(prev);
2410 	done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(next);
2411 
2412 	/* If recording any information failed, retry again soon. */
2413 	if (!done)
2414 		return;
2415 
2416 	__this_cpu_write(trace_taskinfo_save, false);
2417 }
2418 
2419 /* Helpers to record a specific task information */
2420 void tracing_record_cmdline(struct task_struct *task)
2421 {
2422 	tracing_record_taskinfo(task, TRACE_RECORD_CMDLINE);
2423 }
2424 
2425 void tracing_record_tgid(struct task_struct *task)
2426 {
2427 	tracing_record_taskinfo(task, TRACE_RECORD_TGID);
2428 }
2429 
2430 /*
2431  * Several functions return TRACE_TYPE_PARTIAL_LINE if the trace_seq
2432  * overflowed, and TRACE_TYPE_HANDLED otherwise. This helper function
2433  * simplifies those functions and keeps them in sync.
2434  */
2435 enum print_line_t trace_handle_return(struct trace_seq *s)
2436 {
2437 	return trace_seq_has_overflowed(s) ?
2438 		TRACE_TYPE_PARTIAL_LINE : TRACE_TYPE_HANDLED;
2439 }
2440 EXPORT_SYMBOL_GPL(trace_handle_return);
2441 
2442 void
2443 tracing_generic_entry_update(struct trace_entry *entry, unsigned short type,
2444 			     unsigned long flags, int pc)
2445 {
2446 	struct task_struct *tsk = current;
2447 
2448 	entry->preempt_count		= pc & 0xff;
2449 	entry->pid			= (tsk) ? tsk->pid : 0;
2450 	entry->type			= type;
2451 	entry->flags =
2452 #ifdef CONFIG_TRACE_IRQFLAGS_SUPPORT
2453 		(irqs_disabled_flags(flags) ? TRACE_FLAG_IRQS_OFF : 0) |
2454 #else
2455 		TRACE_FLAG_IRQS_NOSUPPORT |
2456 #endif
2457 		((pc & NMI_MASK    ) ? TRACE_FLAG_NMI     : 0) |
2458 		((pc & HARDIRQ_MASK) ? TRACE_FLAG_HARDIRQ : 0) |
2459 		((pc & SOFTIRQ_OFFSET) ? TRACE_FLAG_SOFTIRQ : 0) |
2460 		(tif_need_resched() ? TRACE_FLAG_NEED_RESCHED : 0) |
2461 		(test_preempt_need_resched() ? TRACE_FLAG_PREEMPT_RESCHED : 0);
2462 }
2463 EXPORT_SYMBOL_GPL(tracing_generic_entry_update);
2464 
2465 struct ring_buffer_event *
2466 trace_buffer_lock_reserve(struct trace_buffer *buffer,
2467 			  int type,
2468 			  unsigned long len,
2469 			  unsigned long flags, int pc)
2470 {
2471 	return __trace_buffer_lock_reserve(buffer, type, len, flags, pc);
2472 }
2473 
2474 DEFINE_PER_CPU(struct ring_buffer_event *, trace_buffered_event);
2475 DEFINE_PER_CPU(int, trace_buffered_event_cnt);
2476 static int trace_buffered_event_ref;
2477 
2478 /**
2479  * trace_buffered_event_enable - enable buffering events
2480  *
2481  * When events are being filtered, it is quicker to use a temporary
2482  * buffer to write the event data into if there's a likely chance
2483  * that it will not be committed. The discard of the ring buffer
2484  * is not as fast as committing, and is much slower than copying
2485  * a commit.
2486  *
2487  * When an event is to be filtered, allocate per cpu buffers to
2488  * write the event data into, and if the event is filtered and discarded
2489  * it is simply dropped, otherwise, the entire data is to be committed
2490  * in one shot.
2491  */
2492 void trace_buffered_event_enable(void)
2493 {
2494 	struct ring_buffer_event *event;
2495 	struct page *page;
2496 	int cpu;
2497 
2498 	WARN_ON_ONCE(!mutex_is_locked(&event_mutex));
2499 
2500 	if (trace_buffered_event_ref++)
2501 		return;
2502 
2503 	for_each_tracing_cpu(cpu) {
2504 		page = alloc_pages_node(cpu_to_node(cpu),
2505 					GFP_KERNEL | __GFP_NORETRY, 0);
2506 		if (!page)
2507 			goto failed;
2508 
2509 		event = page_address(page);
2510 		memset(event, 0, sizeof(*event));
2511 
2512 		per_cpu(trace_buffered_event, cpu) = event;
2513 
2514 		preempt_disable();
2515 		if (cpu == smp_processor_id() &&
2516 		    this_cpu_read(trace_buffered_event) !=
2517 		    per_cpu(trace_buffered_event, cpu))
2518 			WARN_ON_ONCE(1);
2519 		preempt_enable();
2520 	}
2521 
2522 	return;
2523  failed:
2524 	trace_buffered_event_disable();
2525 }
2526 
2527 static void enable_trace_buffered_event(void *data)
2528 {
2529 	/* Probably not needed, but do it anyway */
2530 	smp_rmb();
2531 	this_cpu_dec(trace_buffered_event_cnt);
2532 }
2533 
2534 static void disable_trace_buffered_event(void *data)
2535 {
2536 	this_cpu_inc(trace_buffered_event_cnt);
2537 }
2538 
2539 /**
2540  * trace_buffered_event_disable - disable buffering events
2541  *
2542  * When a filter is removed, it is faster to not use the buffered
2543  * events, and to commit directly into the ring buffer. Free up
2544  * the temp buffers when there are no more users. This requires
2545  * special synchronization with current events.
2546  */
2547 void trace_buffered_event_disable(void)
2548 {
2549 	int cpu;
2550 
2551 	WARN_ON_ONCE(!mutex_is_locked(&event_mutex));
2552 
2553 	if (WARN_ON_ONCE(!trace_buffered_event_ref))
2554 		return;
2555 
2556 	if (--trace_buffered_event_ref)
2557 		return;
2558 
2559 	preempt_disable();
2560 	/* For each CPU, set the buffer as used. */
2561 	smp_call_function_many(tracing_buffer_mask,
2562 			       disable_trace_buffered_event, NULL, 1);
2563 	preempt_enable();
2564 
2565 	/* Wait for all current users to finish */
2566 	synchronize_rcu();
2567 
2568 	for_each_tracing_cpu(cpu) {
2569 		free_page((unsigned long)per_cpu(trace_buffered_event, cpu));
2570 		per_cpu(trace_buffered_event, cpu) = NULL;
2571 	}
2572 	/*
2573 	 * Make sure trace_buffered_event is NULL before clearing
2574 	 * trace_buffered_event_cnt.
2575 	 */
2576 	smp_wmb();
2577 
2578 	preempt_disable();
2579 	/* Do the work on each cpu */
2580 	smp_call_function_many(tracing_buffer_mask,
2581 			       enable_trace_buffered_event, NULL, 1);
2582 	preempt_enable();
2583 }
2584 
2585 static struct trace_buffer *temp_buffer;
2586 
2587 struct ring_buffer_event *
2588 trace_event_buffer_lock_reserve(struct trace_buffer **current_rb,
2589 			  struct trace_event_file *trace_file,
2590 			  int type, unsigned long len,
2591 			  unsigned long flags, int pc)
2592 {
2593 	struct ring_buffer_event *entry;
2594 	int val;
2595 
2596 	*current_rb = trace_file->tr->array_buffer.buffer;
2597 
2598 	if (!ring_buffer_time_stamp_abs(*current_rb) && (trace_file->flags &
2599 	     (EVENT_FILE_FL_SOFT_DISABLED | EVENT_FILE_FL_FILTERED)) &&
2600 	    (entry = this_cpu_read(trace_buffered_event))) {
2601 		/* Try to use the per cpu buffer first */
2602 		val = this_cpu_inc_return(trace_buffered_event_cnt);
2603 		if (val == 1) {
2604 			trace_event_setup(entry, type, flags, pc);
2605 			entry->array[0] = len;
2606 			return entry;
2607 		}
2608 		this_cpu_dec(trace_buffered_event_cnt);
2609 	}
2610 
2611 	entry = __trace_buffer_lock_reserve(*current_rb,
2612 					    type, len, flags, pc);
2613 	/*
2614 	 * If tracing is off, but we have triggers enabled
2615 	 * we still need to look at the event data. Use the temp_buffer
2616 	 * to store the trace event for the tigger to use. It's recusive
2617 	 * safe and will not be recorded anywhere.
2618 	 */
2619 	if (!entry && trace_file->flags & EVENT_FILE_FL_TRIGGER_COND) {
2620 		*current_rb = temp_buffer;
2621 		entry = __trace_buffer_lock_reserve(*current_rb,
2622 						    type, len, flags, pc);
2623 	}
2624 	return entry;
2625 }
2626 EXPORT_SYMBOL_GPL(trace_event_buffer_lock_reserve);
2627 
2628 static DEFINE_SPINLOCK(tracepoint_iter_lock);
2629 static DEFINE_MUTEX(tracepoint_printk_mutex);
2630 
2631 static void output_printk(struct trace_event_buffer *fbuffer)
2632 {
2633 	struct trace_event_call *event_call;
2634 	struct trace_event_file *file;
2635 	struct trace_event *event;
2636 	unsigned long flags;
2637 	struct trace_iterator *iter = tracepoint_print_iter;
2638 
2639 	/* We should never get here if iter is NULL */
2640 	if (WARN_ON_ONCE(!iter))
2641 		return;
2642 
2643 	event_call = fbuffer->trace_file->event_call;
2644 	if (!event_call || !event_call->event.funcs ||
2645 	    !event_call->event.funcs->trace)
2646 		return;
2647 
2648 	file = fbuffer->trace_file;
2649 	if (test_bit(EVENT_FILE_FL_SOFT_DISABLED_BIT, &file->flags) ||
2650 	    (unlikely(file->flags & EVENT_FILE_FL_FILTERED) &&
2651 	     !filter_match_preds(file->filter, fbuffer->entry)))
2652 		return;
2653 
2654 	event = &fbuffer->trace_file->event_call->event;
2655 
2656 	spin_lock_irqsave(&tracepoint_iter_lock, flags);
2657 	trace_seq_init(&iter->seq);
2658 	iter->ent = fbuffer->entry;
2659 	event_call->event.funcs->trace(iter, 0, event);
2660 	trace_seq_putc(&iter->seq, 0);
2661 	printk("%s", iter->seq.buffer);
2662 
2663 	spin_unlock_irqrestore(&tracepoint_iter_lock, flags);
2664 }
2665 
2666 int tracepoint_printk_sysctl(struct ctl_table *table, int write,
2667 			     void *buffer, size_t *lenp,
2668 			     loff_t *ppos)
2669 {
2670 	int save_tracepoint_printk;
2671 	int ret;
2672 
2673 	mutex_lock(&tracepoint_printk_mutex);
2674 	save_tracepoint_printk = tracepoint_printk;
2675 
2676 	ret = proc_dointvec(table, write, buffer, lenp, ppos);
2677 
2678 	/*
2679 	 * This will force exiting early, as tracepoint_printk
2680 	 * is always zero when tracepoint_printk_iter is not allocated
2681 	 */
2682 	if (!tracepoint_print_iter)
2683 		tracepoint_printk = 0;
2684 
2685 	if (save_tracepoint_printk == tracepoint_printk)
2686 		goto out;
2687 
2688 	if (tracepoint_printk)
2689 		static_key_enable(&tracepoint_printk_key.key);
2690 	else
2691 		static_key_disable(&tracepoint_printk_key.key);
2692 
2693  out:
2694 	mutex_unlock(&tracepoint_printk_mutex);
2695 
2696 	return ret;
2697 }
2698 
2699 void trace_event_buffer_commit(struct trace_event_buffer *fbuffer)
2700 {
2701 	if (static_key_false(&tracepoint_printk_key.key))
2702 		output_printk(fbuffer);
2703 
2704 	event_trigger_unlock_commit_regs(fbuffer->trace_file, fbuffer->buffer,
2705 				    fbuffer->event, fbuffer->entry,
2706 				    fbuffer->flags, fbuffer->pc, fbuffer->regs);
2707 }
2708 EXPORT_SYMBOL_GPL(trace_event_buffer_commit);
2709 
2710 /*
2711  * Skip 3:
2712  *
2713  *   trace_buffer_unlock_commit_regs()
2714  *   trace_event_buffer_commit()
2715  *   trace_event_raw_event_xxx()
2716  */
2717 # define STACK_SKIP 3
2718 
2719 void trace_buffer_unlock_commit_regs(struct trace_array *tr,
2720 				     struct trace_buffer *buffer,
2721 				     struct ring_buffer_event *event,
2722 				     unsigned long flags, int pc,
2723 				     struct pt_regs *regs)
2724 {
2725 	__buffer_unlock_commit(buffer, event);
2726 
2727 	/*
2728 	 * If regs is not set, then skip the necessary functions.
2729 	 * Note, we can still get here via blktrace, wakeup tracer
2730 	 * and mmiotrace, but that's ok if they lose a function or
2731 	 * two. They are not that meaningful.
2732 	 */
2733 	ftrace_trace_stack(tr, buffer, flags, regs ? 0 : STACK_SKIP, pc, regs);
2734 	ftrace_trace_userstack(buffer, flags, pc);
2735 }
2736 
2737 /*
2738  * Similar to trace_buffer_unlock_commit_regs() but do not dump stack.
2739  */
2740 void
2741 trace_buffer_unlock_commit_nostack(struct trace_buffer *buffer,
2742 				   struct ring_buffer_event *event)
2743 {
2744 	__buffer_unlock_commit(buffer, event);
2745 }
2746 
2747 static void
2748 trace_process_export(struct trace_export *export,
2749 	       struct ring_buffer_event *event)
2750 {
2751 	struct trace_entry *entry;
2752 	unsigned int size = 0;
2753 
2754 	entry = ring_buffer_event_data(event);
2755 	size = ring_buffer_event_length(event);
2756 	export->write(export, entry, size);
2757 }
2758 
2759 static DEFINE_MUTEX(ftrace_export_lock);
2760 
2761 static struct trace_export __rcu *ftrace_exports_list __read_mostly;
2762 
2763 static DEFINE_STATIC_KEY_FALSE(ftrace_exports_enabled);
2764 
2765 static inline void ftrace_exports_enable(void)
2766 {
2767 	static_branch_enable(&ftrace_exports_enabled);
2768 }
2769 
2770 static inline void ftrace_exports_disable(void)
2771 {
2772 	static_branch_disable(&ftrace_exports_enabled);
2773 }
2774 
2775 static void ftrace_exports(struct ring_buffer_event *event)
2776 {
2777 	struct trace_export *export;
2778 
2779 	preempt_disable_notrace();
2780 
2781 	export = rcu_dereference_raw_check(ftrace_exports_list);
2782 	while (export) {
2783 		trace_process_export(export, event);
2784 		export = rcu_dereference_raw_check(export->next);
2785 	}
2786 
2787 	preempt_enable_notrace();
2788 }
2789 
2790 static inline void
2791 add_trace_export(struct trace_export **list, struct trace_export *export)
2792 {
2793 	rcu_assign_pointer(export->next, *list);
2794 	/*
2795 	 * We are entering export into the list but another
2796 	 * CPU might be walking that list. We need to make sure
2797 	 * the export->next pointer is valid before another CPU sees
2798 	 * the export pointer included into the list.
2799 	 */
2800 	rcu_assign_pointer(*list, export);
2801 }
2802 
2803 static inline int
2804 rm_trace_export(struct trace_export **list, struct trace_export *export)
2805 {
2806 	struct trace_export **p;
2807 
2808 	for (p = list; *p != NULL; p = &(*p)->next)
2809 		if (*p == export)
2810 			break;
2811 
2812 	if (*p != export)
2813 		return -1;
2814 
2815 	rcu_assign_pointer(*p, (*p)->next);
2816 
2817 	return 0;
2818 }
2819 
2820 static inline void
2821 add_ftrace_export(struct trace_export **list, struct trace_export *export)
2822 {
2823 	if (*list == NULL)
2824 		ftrace_exports_enable();
2825 
2826 	add_trace_export(list, export);
2827 }
2828 
2829 static inline int
2830 rm_ftrace_export(struct trace_export **list, struct trace_export *export)
2831 {
2832 	int ret;
2833 
2834 	ret = rm_trace_export(list, export);
2835 	if (*list == NULL)
2836 		ftrace_exports_disable();
2837 
2838 	return ret;
2839 }
2840 
2841 int register_ftrace_export(struct trace_export *export)
2842 {
2843 	if (WARN_ON_ONCE(!export->write))
2844 		return -1;
2845 
2846 	mutex_lock(&ftrace_export_lock);
2847 
2848 	add_ftrace_export(&ftrace_exports_list, export);
2849 
2850 	mutex_unlock(&ftrace_export_lock);
2851 
2852 	return 0;
2853 }
2854 EXPORT_SYMBOL_GPL(register_ftrace_export);
2855 
2856 int unregister_ftrace_export(struct trace_export *export)
2857 {
2858 	int ret;
2859 
2860 	mutex_lock(&ftrace_export_lock);
2861 
2862 	ret = rm_ftrace_export(&ftrace_exports_list, export);
2863 
2864 	mutex_unlock(&ftrace_export_lock);
2865 
2866 	return ret;
2867 }
2868 EXPORT_SYMBOL_GPL(unregister_ftrace_export);
2869 
2870 void
2871 trace_function(struct trace_array *tr,
2872 	       unsigned long ip, unsigned long parent_ip, unsigned long flags,
2873 	       int pc)
2874 {
2875 	struct trace_event_call *call = &event_function;
2876 	struct trace_buffer *buffer = tr->array_buffer.buffer;
2877 	struct ring_buffer_event *event;
2878 	struct ftrace_entry *entry;
2879 
2880 	event = __trace_buffer_lock_reserve(buffer, TRACE_FN, sizeof(*entry),
2881 					    flags, pc);
2882 	if (!event)
2883 		return;
2884 	entry	= ring_buffer_event_data(event);
2885 	entry->ip			= ip;
2886 	entry->parent_ip		= parent_ip;
2887 
2888 	if (!call_filter_check_discard(call, entry, buffer, event)) {
2889 		if (static_branch_unlikely(&ftrace_exports_enabled))
2890 			ftrace_exports(event);
2891 		__buffer_unlock_commit(buffer, event);
2892 	}
2893 }
2894 
2895 #ifdef CONFIG_STACKTRACE
2896 
2897 /* Allow 4 levels of nesting: normal, softirq, irq, NMI */
2898 #define FTRACE_KSTACK_NESTING	4
2899 
2900 #define FTRACE_KSTACK_ENTRIES	(PAGE_SIZE / FTRACE_KSTACK_NESTING)
2901 
2902 struct ftrace_stack {
2903 	unsigned long		calls[FTRACE_KSTACK_ENTRIES];
2904 };
2905 
2906 
2907 struct ftrace_stacks {
2908 	struct ftrace_stack	stacks[FTRACE_KSTACK_NESTING];
2909 };
2910 
2911 static DEFINE_PER_CPU(struct ftrace_stacks, ftrace_stacks);
2912 static DEFINE_PER_CPU(int, ftrace_stack_reserve);
2913 
2914 static void __ftrace_trace_stack(struct trace_buffer *buffer,
2915 				 unsigned long flags,
2916 				 int skip, int pc, struct pt_regs *regs)
2917 {
2918 	struct trace_event_call *call = &event_kernel_stack;
2919 	struct ring_buffer_event *event;
2920 	unsigned int size, nr_entries;
2921 	struct ftrace_stack *fstack;
2922 	struct stack_entry *entry;
2923 	int stackidx;
2924 
2925 	/*
2926 	 * Add one, for this function and the call to save_stack_trace()
2927 	 * If regs is set, then these functions will not be in the way.
2928 	 */
2929 #ifndef CONFIG_UNWINDER_ORC
2930 	if (!regs)
2931 		skip++;
2932 #endif
2933 
2934 	/*
2935 	 * Since events can happen in NMIs there's no safe way to
2936 	 * use the per cpu ftrace_stacks. We reserve it and if an interrupt
2937 	 * or NMI comes in, it will just have to use the default
2938 	 * FTRACE_STACK_SIZE.
2939 	 */
2940 	preempt_disable_notrace();
2941 
2942 	stackidx = __this_cpu_inc_return(ftrace_stack_reserve) - 1;
2943 
2944 	/* This should never happen. If it does, yell once and skip */
2945 	if (WARN_ON_ONCE(stackidx > FTRACE_KSTACK_NESTING))
2946 		goto out;
2947 
2948 	/*
2949 	 * The above __this_cpu_inc_return() is 'atomic' cpu local. An
2950 	 * interrupt will either see the value pre increment or post
2951 	 * increment. If the interrupt happens pre increment it will have
2952 	 * restored the counter when it returns.  We just need a barrier to
2953 	 * keep gcc from moving things around.
2954 	 */
2955 	barrier();
2956 
2957 	fstack = this_cpu_ptr(ftrace_stacks.stacks) + stackidx;
2958 	size = ARRAY_SIZE(fstack->calls);
2959 
2960 	if (regs) {
2961 		nr_entries = stack_trace_save_regs(regs, fstack->calls,
2962 						   size, skip);
2963 	} else {
2964 		nr_entries = stack_trace_save(fstack->calls, size, skip);
2965 	}
2966 
2967 	size = nr_entries * sizeof(unsigned long);
2968 	event = __trace_buffer_lock_reserve(buffer, TRACE_STACK,
2969 					    sizeof(*entry) + size, flags, pc);
2970 	if (!event)
2971 		goto out;
2972 	entry = ring_buffer_event_data(event);
2973 
2974 	memcpy(&entry->caller, fstack->calls, size);
2975 	entry->size = nr_entries;
2976 
2977 	if (!call_filter_check_discard(call, entry, buffer, event))
2978 		__buffer_unlock_commit(buffer, event);
2979 
2980  out:
2981 	/* Again, don't let gcc optimize things here */
2982 	barrier();
2983 	__this_cpu_dec(ftrace_stack_reserve);
2984 	preempt_enable_notrace();
2985 
2986 }
2987 
2988 static inline void ftrace_trace_stack(struct trace_array *tr,
2989 				      struct trace_buffer *buffer,
2990 				      unsigned long flags,
2991 				      int skip, int pc, struct pt_regs *regs)
2992 {
2993 	if (!(tr->trace_flags & TRACE_ITER_STACKTRACE))
2994 		return;
2995 
2996 	__ftrace_trace_stack(buffer, flags, skip, pc, regs);
2997 }
2998 
2999 void __trace_stack(struct trace_array *tr, unsigned long flags, int skip,
3000 		   int pc)
3001 {
3002 	struct trace_buffer *buffer = tr->array_buffer.buffer;
3003 
3004 	if (rcu_is_watching()) {
3005 		__ftrace_trace_stack(buffer, flags, skip, pc, NULL);
3006 		return;
3007 	}
3008 
3009 	/*
3010 	 * When an NMI triggers, RCU is enabled via rcu_nmi_enter(),
3011 	 * but if the above rcu_is_watching() failed, then the NMI
3012 	 * triggered someplace critical, and rcu_irq_enter() should
3013 	 * not be called from NMI.
3014 	 */
3015 	if (unlikely(in_nmi()))
3016 		return;
3017 
3018 	rcu_irq_enter_irqson();
3019 	__ftrace_trace_stack(buffer, flags, skip, pc, NULL);
3020 	rcu_irq_exit_irqson();
3021 }
3022 
3023 /**
3024  * trace_dump_stack - record a stack back trace in the trace buffer
3025  * @skip: Number of functions to skip (helper handlers)
3026  */
3027 void trace_dump_stack(int skip)
3028 {
3029 	unsigned long flags;
3030 
3031 	if (tracing_disabled || tracing_selftest_running)
3032 		return;
3033 
3034 	local_save_flags(flags);
3035 
3036 #ifndef CONFIG_UNWINDER_ORC
3037 	/* Skip 1 to skip this function. */
3038 	skip++;
3039 #endif
3040 	__ftrace_trace_stack(global_trace.array_buffer.buffer,
3041 			     flags, skip, preempt_count(), NULL);
3042 }
3043 EXPORT_SYMBOL_GPL(trace_dump_stack);
3044 
3045 #ifdef CONFIG_USER_STACKTRACE_SUPPORT
3046 static DEFINE_PER_CPU(int, user_stack_count);
3047 
3048 static void
3049 ftrace_trace_userstack(struct trace_buffer *buffer, unsigned long flags, int pc)
3050 {
3051 	struct trace_event_call *call = &event_user_stack;
3052 	struct ring_buffer_event *event;
3053 	struct userstack_entry *entry;
3054 
3055 	if (!(global_trace.trace_flags & TRACE_ITER_USERSTACKTRACE))
3056 		return;
3057 
3058 	/*
3059 	 * NMIs can not handle page faults, even with fix ups.
3060 	 * The save user stack can (and often does) fault.
3061 	 */
3062 	if (unlikely(in_nmi()))
3063 		return;
3064 
3065 	/*
3066 	 * prevent recursion, since the user stack tracing may
3067 	 * trigger other kernel events.
3068 	 */
3069 	preempt_disable();
3070 	if (__this_cpu_read(user_stack_count))
3071 		goto out;
3072 
3073 	__this_cpu_inc(user_stack_count);
3074 
3075 	event = __trace_buffer_lock_reserve(buffer, TRACE_USER_STACK,
3076 					    sizeof(*entry), flags, pc);
3077 	if (!event)
3078 		goto out_drop_count;
3079 	entry	= ring_buffer_event_data(event);
3080 
3081 	entry->tgid		= current->tgid;
3082 	memset(&entry->caller, 0, sizeof(entry->caller));
3083 
3084 	stack_trace_save_user(entry->caller, FTRACE_STACK_ENTRIES);
3085 	if (!call_filter_check_discard(call, entry, buffer, event))
3086 		__buffer_unlock_commit(buffer, event);
3087 
3088  out_drop_count:
3089 	__this_cpu_dec(user_stack_count);
3090  out:
3091 	preempt_enable();
3092 }
3093 #else /* CONFIG_USER_STACKTRACE_SUPPORT */
3094 static void ftrace_trace_userstack(struct trace_buffer *buffer,
3095 				   unsigned long flags, int pc)
3096 {
3097 }
3098 #endif /* !CONFIG_USER_STACKTRACE_SUPPORT */
3099 
3100 #endif /* CONFIG_STACKTRACE */
3101 
3102 /* created for use with alloc_percpu */
3103 struct trace_buffer_struct {
3104 	int nesting;
3105 	char buffer[4][TRACE_BUF_SIZE];
3106 };
3107 
3108 static struct trace_buffer_struct *trace_percpu_buffer;
3109 
3110 /*
3111  * Thise allows for lockless recording.  If we're nested too deeply, then
3112  * this returns NULL.
3113  */
3114 static char *get_trace_buf(void)
3115 {
3116 	struct trace_buffer_struct *buffer = this_cpu_ptr(trace_percpu_buffer);
3117 
3118 	if (!buffer || buffer->nesting >= 4)
3119 		return NULL;
3120 
3121 	buffer->nesting++;
3122 
3123 	/* Interrupts must see nesting incremented before we use the buffer */
3124 	barrier();
3125 	return &buffer->buffer[buffer->nesting][0];
3126 }
3127 
3128 static void put_trace_buf(void)
3129 {
3130 	/* Don't let the decrement of nesting leak before this */
3131 	barrier();
3132 	this_cpu_dec(trace_percpu_buffer->nesting);
3133 }
3134 
3135 static int alloc_percpu_trace_buffer(void)
3136 {
3137 	struct trace_buffer_struct *buffers;
3138 
3139 	buffers = alloc_percpu(struct trace_buffer_struct);
3140 	if (MEM_FAIL(!buffers, "Could not allocate percpu trace_printk buffer"))
3141 		return -ENOMEM;
3142 
3143 	trace_percpu_buffer = buffers;
3144 	return 0;
3145 }
3146 
3147 static int buffers_allocated;
3148 
3149 void trace_printk_init_buffers(void)
3150 {
3151 	if (buffers_allocated)
3152 		return;
3153 
3154 	if (alloc_percpu_trace_buffer())
3155 		return;
3156 
3157 	/* trace_printk() is for debug use only. Don't use it in production. */
3158 
3159 	pr_warn("\n");
3160 	pr_warn("**********************************************************\n");
3161 	pr_warn("**   NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE   **\n");
3162 	pr_warn("**                                                      **\n");
3163 	pr_warn("** trace_printk() being used. Allocating extra memory.  **\n");
3164 	pr_warn("**                                                      **\n");
3165 	pr_warn("** This means that this is a DEBUG kernel and it is     **\n");
3166 	pr_warn("** unsafe for production use.                           **\n");
3167 	pr_warn("**                                                      **\n");
3168 	pr_warn("** If you see this message and you are not debugging    **\n");
3169 	pr_warn("** the kernel, report this immediately to your vendor!  **\n");
3170 	pr_warn("**                                                      **\n");
3171 	pr_warn("**   NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE   **\n");
3172 	pr_warn("**********************************************************\n");
3173 
3174 	/* Expand the buffers to set size */
3175 	tracing_update_buffers();
3176 
3177 	buffers_allocated = 1;
3178 
3179 	/*
3180 	 * trace_printk_init_buffers() can be called by modules.
3181 	 * If that happens, then we need to start cmdline recording
3182 	 * directly here. If the global_trace.buffer is already
3183 	 * allocated here, then this was called by module code.
3184 	 */
3185 	if (global_trace.array_buffer.buffer)
3186 		tracing_start_cmdline_record();
3187 }
3188 EXPORT_SYMBOL_GPL(trace_printk_init_buffers);
3189 
3190 void trace_printk_start_comm(void)
3191 {
3192 	/* Start tracing comms if trace printk is set */
3193 	if (!buffers_allocated)
3194 		return;
3195 	tracing_start_cmdline_record();
3196 }
3197 
3198 static void trace_printk_start_stop_comm(int enabled)
3199 {
3200 	if (!buffers_allocated)
3201 		return;
3202 
3203 	if (enabled)
3204 		tracing_start_cmdline_record();
3205 	else
3206 		tracing_stop_cmdline_record();
3207 }
3208 
3209 /**
3210  * trace_vbprintk - write binary msg to tracing buffer
3211  * @ip:    The address of the caller
3212  * @fmt:   The string format to write to the buffer
3213  * @args:  Arguments for @fmt
3214  */
3215 int trace_vbprintk(unsigned long ip, const char *fmt, va_list args)
3216 {
3217 	struct trace_event_call *call = &event_bprint;
3218 	struct ring_buffer_event *event;
3219 	struct trace_buffer *buffer;
3220 	struct trace_array *tr = &global_trace;
3221 	struct bprint_entry *entry;
3222 	unsigned long flags;
3223 	char *tbuffer;
3224 	int len = 0, size, pc;
3225 
3226 	if (unlikely(tracing_selftest_running || tracing_disabled))
3227 		return 0;
3228 
3229 	/* Don't pollute graph traces with trace_vprintk internals */
3230 	pause_graph_tracing();
3231 
3232 	pc = preempt_count();
3233 	preempt_disable_notrace();
3234 
3235 	tbuffer = get_trace_buf();
3236 	if (!tbuffer) {
3237 		len = 0;
3238 		goto out_nobuffer;
3239 	}
3240 
3241 	len = vbin_printf((u32 *)tbuffer, TRACE_BUF_SIZE/sizeof(int), fmt, args);
3242 
3243 	if (len > TRACE_BUF_SIZE/sizeof(int) || len < 0)
3244 		goto out_put;
3245 
3246 	local_save_flags(flags);
3247 	size = sizeof(*entry) + sizeof(u32) * len;
3248 	buffer = tr->array_buffer.buffer;
3249 	ring_buffer_nest_start(buffer);
3250 	event = __trace_buffer_lock_reserve(buffer, TRACE_BPRINT, size,
3251 					    flags, pc);
3252 	if (!event)
3253 		goto out;
3254 	entry = ring_buffer_event_data(event);
3255 	entry->ip			= ip;
3256 	entry->fmt			= fmt;
3257 
3258 	memcpy(entry->buf, tbuffer, sizeof(u32) * len);
3259 	if (!call_filter_check_discard(call, entry, buffer, event)) {
3260 		__buffer_unlock_commit(buffer, event);
3261 		ftrace_trace_stack(tr, buffer, flags, 6, pc, NULL);
3262 	}
3263 
3264 out:
3265 	ring_buffer_nest_end(buffer);
3266 out_put:
3267 	put_trace_buf();
3268 
3269 out_nobuffer:
3270 	preempt_enable_notrace();
3271 	unpause_graph_tracing();
3272 
3273 	return len;
3274 }
3275 EXPORT_SYMBOL_GPL(trace_vbprintk);
3276 
3277 __printf(3, 0)
3278 static int
3279 __trace_array_vprintk(struct trace_buffer *buffer,
3280 		      unsigned long ip, const char *fmt, va_list args)
3281 {
3282 	struct trace_event_call *call = &event_print;
3283 	struct ring_buffer_event *event;
3284 	int len = 0, size, pc;
3285 	struct print_entry *entry;
3286 	unsigned long flags;
3287 	char *tbuffer;
3288 
3289 	if (tracing_disabled || tracing_selftest_running)
3290 		return 0;
3291 
3292 	/* Don't pollute graph traces with trace_vprintk internals */
3293 	pause_graph_tracing();
3294 
3295 	pc = preempt_count();
3296 	preempt_disable_notrace();
3297 
3298 
3299 	tbuffer = get_trace_buf();
3300 	if (!tbuffer) {
3301 		len = 0;
3302 		goto out_nobuffer;
3303 	}
3304 
3305 	len = vscnprintf(tbuffer, TRACE_BUF_SIZE, fmt, args);
3306 
3307 	local_save_flags(flags);
3308 	size = sizeof(*entry) + len + 1;
3309 	ring_buffer_nest_start(buffer);
3310 	event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
3311 					    flags, pc);
3312 	if (!event)
3313 		goto out;
3314 	entry = ring_buffer_event_data(event);
3315 	entry->ip = ip;
3316 
3317 	memcpy(&entry->buf, tbuffer, len + 1);
3318 	if (!call_filter_check_discard(call, entry, buffer, event)) {
3319 		__buffer_unlock_commit(buffer, event);
3320 		ftrace_trace_stack(&global_trace, buffer, flags, 6, pc, NULL);
3321 	}
3322 
3323 out:
3324 	ring_buffer_nest_end(buffer);
3325 	put_trace_buf();
3326 
3327 out_nobuffer:
3328 	preempt_enable_notrace();
3329 	unpause_graph_tracing();
3330 
3331 	return len;
3332 }
3333 
3334 __printf(3, 0)
3335 int trace_array_vprintk(struct trace_array *tr,
3336 			unsigned long ip, const char *fmt, va_list args)
3337 {
3338 	return __trace_array_vprintk(tr->array_buffer.buffer, ip, fmt, args);
3339 }
3340 
3341 __printf(3, 0)
3342 int trace_array_printk(struct trace_array *tr,
3343 		       unsigned long ip, const char *fmt, ...)
3344 {
3345 	int ret;
3346 	va_list ap;
3347 
3348 	if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
3349 		return 0;
3350 
3351 	if (!tr)
3352 		return -ENOENT;
3353 
3354 	va_start(ap, fmt);
3355 	ret = trace_array_vprintk(tr, ip, fmt, ap);
3356 	va_end(ap);
3357 	return ret;
3358 }
3359 EXPORT_SYMBOL_GPL(trace_array_printk);
3360 
3361 __printf(3, 4)
3362 int trace_array_printk_buf(struct trace_buffer *buffer,
3363 			   unsigned long ip, const char *fmt, ...)
3364 {
3365 	int ret;
3366 	va_list ap;
3367 
3368 	if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
3369 		return 0;
3370 
3371 	va_start(ap, fmt);
3372 	ret = __trace_array_vprintk(buffer, ip, fmt, ap);
3373 	va_end(ap);
3374 	return ret;
3375 }
3376 
3377 __printf(2, 0)
3378 int trace_vprintk(unsigned long ip, const char *fmt, va_list args)
3379 {
3380 	return trace_array_vprintk(&global_trace, ip, fmt, args);
3381 }
3382 EXPORT_SYMBOL_GPL(trace_vprintk);
3383 
3384 static void trace_iterator_increment(struct trace_iterator *iter)
3385 {
3386 	struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, iter->cpu);
3387 
3388 	iter->idx++;
3389 	if (buf_iter)
3390 		ring_buffer_iter_advance(buf_iter);
3391 }
3392 
3393 static struct trace_entry *
3394 peek_next_entry(struct trace_iterator *iter, int cpu, u64 *ts,
3395 		unsigned long *lost_events)
3396 {
3397 	struct ring_buffer_event *event;
3398 	struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, cpu);
3399 
3400 	if (buf_iter) {
3401 		event = ring_buffer_iter_peek(buf_iter, ts);
3402 		if (lost_events)
3403 			*lost_events = ring_buffer_iter_dropped(buf_iter) ?
3404 				(unsigned long)-1 : 0;
3405 	} else {
3406 		event = ring_buffer_peek(iter->array_buffer->buffer, cpu, ts,
3407 					 lost_events);
3408 	}
3409 
3410 	if (event) {
3411 		iter->ent_size = ring_buffer_event_length(event);
3412 		return ring_buffer_event_data(event);
3413 	}
3414 	iter->ent_size = 0;
3415 	return NULL;
3416 }
3417 
3418 static struct trace_entry *
3419 __find_next_entry(struct trace_iterator *iter, int *ent_cpu,
3420 		  unsigned long *missing_events, u64 *ent_ts)
3421 {
3422 	struct trace_buffer *buffer = iter->array_buffer->buffer;
3423 	struct trace_entry *ent, *next = NULL;
3424 	unsigned long lost_events = 0, next_lost = 0;
3425 	int cpu_file = iter->cpu_file;
3426 	u64 next_ts = 0, ts;
3427 	int next_cpu = -1;
3428 	int next_size = 0;
3429 	int cpu;
3430 
3431 	/*
3432 	 * If we are in a per_cpu trace file, don't bother by iterating over
3433 	 * all cpu and peek directly.
3434 	 */
3435 	if (cpu_file > RING_BUFFER_ALL_CPUS) {
3436 		if (ring_buffer_empty_cpu(buffer, cpu_file))
3437 			return NULL;
3438 		ent = peek_next_entry(iter, cpu_file, ent_ts, missing_events);
3439 		if (ent_cpu)
3440 			*ent_cpu = cpu_file;
3441 
3442 		return ent;
3443 	}
3444 
3445 	for_each_tracing_cpu(cpu) {
3446 
3447 		if (ring_buffer_empty_cpu(buffer, cpu))
3448 			continue;
3449 
3450 		ent = peek_next_entry(iter, cpu, &ts, &lost_events);
3451 
3452 		/*
3453 		 * Pick the entry with the smallest timestamp:
3454 		 */
3455 		if (ent && (!next || ts < next_ts)) {
3456 			next = ent;
3457 			next_cpu = cpu;
3458 			next_ts = ts;
3459 			next_lost = lost_events;
3460 			next_size = iter->ent_size;
3461 		}
3462 	}
3463 
3464 	iter->ent_size = next_size;
3465 
3466 	if (ent_cpu)
3467 		*ent_cpu = next_cpu;
3468 
3469 	if (ent_ts)
3470 		*ent_ts = next_ts;
3471 
3472 	if (missing_events)
3473 		*missing_events = next_lost;
3474 
3475 	return next;
3476 }
3477 
3478 #define STATIC_TEMP_BUF_SIZE	128
3479 static char static_temp_buf[STATIC_TEMP_BUF_SIZE];
3480 
3481 /* Find the next real entry, without updating the iterator itself */
3482 struct trace_entry *trace_find_next_entry(struct trace_iterator *iter,
3483 					  int *ent_cpu, u64 *ent_ts)
3484 {
3485 	/* __find_next_entry will reset ent_size */
3486 	int ent_size = iter->ent_size;
3487 	struct trace_entry *entry;
3488 
3489 	/*
3490 	 * If called from ftrace_dump(), then the iter->temp buffer
3491 	 * will be the static_temp_buf and not created from kmalloc.
3492 	 * If the entry size is greater than the buffer, we can
3493 	 * not save it. Just return NULL in that case. This is only
3494 	 * used to add markers when two consecutive events' time
3495 	 * stamps have a large delta. See trace_print_lat_context()
3496 	 */
3497 	if (iter->temp == static_temp_buf &&
3498 	    STATIC_TEMP_BUF_SIZE < ent_size)
3499 		return NULL;
3500 
3501 	/*
3502 	 * The __find_next_entry() may call peek_next_entry(), which may
3503 	 * call ring_buffer_peek() that may make the contents of iter->ent
3504 	 * undefined. Need to copy iter->ent now.
3505 	 */
3506 	if (iter->ent && iter->ent != iter->temp) {
3507 		if ((!iter->temp || iter->temp_size < iter->ent_size) &&
3508 		    !WARN_ON_ONCE(iter->temp == static_temp_buf)) {
3509 			kfree(iter->temp);
3510 			iter->temp = kmalloc(iter->ent_size, GFP_KERNEL);
3511 			if (!iter->temp)
3512 				return NULL;
3513 		}
3514 		memcpy(iter->temp, iter->ent, iter->ent_size);
3515 		iter->temp_size = iter->ent_size;
3516 		iter->ent = iter->temp;
3517 	}
3518 	entry = __find_next_entry(iter, ent_cpu, NULL, ent_ts);
3519 	/* Put back the original ent_size */
3520 	iter->ent_size = ent_size;
3521 
3522 	return entry;
3523 }
3524 
3525 /* Find the next real entry, and increment the iterator to the next entry */
3526 void *trace_find_next_entry_inc(struct trace_iterator *iter)
3527 {
3528 	iter->ent = __find_next_entry(iter, &iter->cpu,
3529 				      &iter->lost_events, &iter->ts);
3530 
3531 	if (iter->ent)
3532 		trace_iterator_increment(iter);
3533 
3534 	return iter->ent ? iter : NULL;
3535 }
3536 
3537 static void trace_consume(struct trace_iterator *iter)
3538 {
3539 	ring_buffer_consume(iter->array_buffer->buffer, iter->cpu, &iter->ts,
3540 			    &iter->lost_events);
3541 }
3542 
3543 static void *s_next(struct seq_file *m, void *v, loff_t *pos)
3544 {
3545 	struct trace_iterator *iter = m->private;
3546 	int i = (int)*pos;
3547 	void *ent;
3548 
3549 	WARN_ON_ONCE(iter->leftover);
3550 
3551 	(*pos)++;
3552 
3553 	/* can't go backwards */
3554 	if (iter->idx > i)
3555 		return NULL;
3556 
3557 	if (iter->idx < 0)
3558 		ent = trace_find_next_entry_inc(iter);
3559 	else
3560 		ent = iter;
3561 
3562 	while (ent && iter->idx < i)
3563 		ent = trace_find_next_entry_inc(iter);
3564 
3565 	iter->pos = *pos;
3566 
3567 	return ent;
3568 }
3569 
3570 void tracing_iter_reset(struct trace_iterator *iter, int cpu)
3571 {
3572 	struct ring_buffer_iter *buf_iter;
3573 	unsigned long entries = 0;
3574 	u64 ts;
3575 
3576 	per_cpu_ptr(iter->array_buffer->data, cpu)->skipped_entries = 0;
3577 
3578 	buf_iter = trace_buffer_iter(iter, cpu);
3579 	if (!buf_iter)
3580 		return;
3581 
3582 	ring_buffer_iter_reset(buf_iter);
3583 
3584 	/*
3585 	 * We could have the case with the max latency tracers
3586 	 * that a reset never took place on a cpu. This is evident
3587 	 * by the timestamp being before the start of the buffer.
3588 	 */
3589 	while (ring_buffer_iter_peek(buf_iter, &ts)) {
3590 		if (ts >= iter->array_buffer->time_start)
3591 			break;
3592 		entries++;
3593 		ring_buffer_iter_advance(buf_iter);
3594 	}
3595 
3596 	per_cpu_ptr(iter->array_buffer->data, cpu)->skipped_entries = entries;
3597 }
3598 
3599 /*
3600  * The current tracer is copied to avoid a global locking
3601  * all around.
3602  */
3603 static void *s_start(struct seq_file *m, loff_t *pos)
3604 {
3605 	struct trace_iterator *iter = m->private;
3606 	struct trace_array *tr = iter->tr;
3607 	int cpu_file = iter->cpu_file;
3608 	void *p = NULL;
3609 	loff_t l = 0;
3610 	int cpu;
3611 
3612 	/*
3613 	 * copy the tracer to avoid using a global lock all around.
3614 	 * iter->trace is a copy of current_trace, the pointer to the
3615 	 * name may be used instead of a strcmp(), as iter->trace->name
3616 	 * will point to the same string as current_trace->name.
3617 	 */
3618 	mutex_lock(&trace_types_lock);
3619 	if (unlikely(tr->current_trace && iter->trace->name != tr->current_trace->name))
3620 		*iter->trace = *tr->current_trace;
3621 	mutex_unlock(&trace_types_lock);
3622 
3623 #ifdef CONFIG_TRACER_MAX_TRACE
3624 	if (iter->snapshot && iter->trace->use_max_tr)
3625 		return ERR_PTR(-EBUSY);
3626 #endif
3627 
3628 	if (!iter->snapshot)
3629 		atomic_inc(&trace_record_taskinfo_disabled);
3630 
3631 	if (*pos != iter->pos) {
3632 		iter->ent = NULL;
3633 		iter->cpu = 0;
3634 		iter->idx = -1;
3635 
3636 		if (cpu_file == RING_BUFFER_ALL_CPUS) {
3637 			for_each_tracing_cpu(cpu)
3638 				tracing_iter_reset(iter, cpu);
3639 		} else
3640 			tracing_iter_reset(iter, cpu_file);
3641 
3642 		iter->leftover = 0;
3643 		for (p = iter; p && l < *pos; p = s_next(m, p, &l))
3644 			;
3645 
3646 	} else {
3647 		/*
3648 		 * If we overflowed the seq_file before, then we want
3649 		 * to just reuse the trace_seq buffer again.
3650 		 */
3651 		if (iter->leftover)
3652 			p = iter;
3653 		else {
3654 			l = *pos - 1;
3655 			p = s_next(m, p, &l);
3656 		}
3657 	}
3658 
3659 	trace_event_read_lock();
3660 	trace_access_lock(cpu_file);
3661 	return p;
3662 }
3663 
3664 static void s_stop(struct seq_file *m, void *p)
3665 {
3666 	struct trace_iterator *iter = m->private;
3667 
3668 #ifdef CONFIG_TRACER_MAX_TRACE
3669 	if (iter->snapshot && iter->trace->use_max_tr)
3670 		return;
3671 #endif
3672 
3673 	if (!iter->snapshot)
3674 		atomic_dec(&trace_record_taskinfo_disabled);
3675 
3676 	trace_access_unlock(iter->cpu_file);
3677 	trace_event_read_unlock();
3678 }
3679 
3680 static void
3681 get_total_entries_cpu(struct array_buffer *buf, unsigned long *total,
3682 		      unsigned long *entries, int cpu)
3683 {
3684 	unsigned long count;
3685 
3686 	count = ring_buffer_entries_cpu(buf->buffer, cpu);
3687 	/*
3688 	 * If this buffer has skipped entries, then we hold all
3689 	 * entries for the trace and we need to ignore the
3690 	 * ones before the time stamp.
3691 	 */
3692 	if (per_cpu_ptr(buf->data, cpu)->skipped_entries) {
3693 		count -= per_cpu_ptr(buf->data, cpu)->skipped_entries;
3694 		/* total is the same as the entries */
3695 		*total = count;
3696 	} else
3697 		*total = count +
3698 			ring_buffer_overrun_cpu(buf->buffer, cpu);
3699 	*entries = count;
3700 }
3701 
3702 static void
3703 get_total_entries(struct array_buffer *buf,
3704 		  unsigned long *total, unsigned long *entries)
3705 {
3706 	unsigned long t, e;
3707 	int cpu;
3708 
3709 	*total = 0;
3710 	*entries = 0;
3711 
3712 	for_each_tracing_cpu(cpu) {
3713 		get_total_entries_cpu(buf, &t, &e, cpu);
3714 		*total += t;
3715 		*entries += e;
3716 	}
3717 }
3718 
3719 unsigned long trace_total_entries_cpu(struct trace_array *tr, int cpu)
3720 {
3721 	unsigned long total, entries;
3722 
3723 	if (!tr)
3724 		tr = &global_trace;
3725 
3726 	get_total_entries_cpu(&tr->array_buffer, &total, &entries, cpu);
3727 
3728 	return entries;
3729 }
3730 
3731 unsigned long trace_total_entries(struct trace_array *tr)
3732 {
3733 	unsigned long total, entries;
3734 
3735 	if (!tr)
3736 		tr = &global_trace;
3737 
3738 	get_total_entries(&tr->array_buffer, &total, &entries);
3739 
3740 	return entries;
3741 }
3742 
3743 static void print_lat_help_header(struct seq_file *m)
3744 {
3745 	seq_puts(m, "#                  _------=> CPU#            \n"
3746 		    "#                 / _-----=> irqs-off        \n"
3747 		    "#                | / _----=> need-resched    \n"
3748 		    "#                || / _---=> hardirq/softirq \n"
3749 		    "#                ||| / _--=> preempt-depth   \n"
3750 		    "#                |||| /     delay            \n"
3751 		    "#  cmd     pid   ||||| time  |   caller      \n"
3752 		    "#     \\   /      |||||  \\    |   /         \n");
3753 }
3754 
3755 static void print_event_info(struct array_buffer *buf, struct seq_file *m)
3756 {
3757 	unsigned long total;
3758 	unsigned long entries;
3759 
3760 	get_total_entries(buf, &total, &entries);
3761 	seq_printf(m, "# entries-in-buffer/entries-written: %lu/%lu   #P:%d\n",
3762 		   entries, total, num_online_cpus());
3763 	seq_puts(m, "#\n");
3764 }
3765 
3766 static void print_func_help_header(struct array_buffer *buf, struct seq_file *m,
3767 				   unsigned int flags)
3768 {
3769 	bool tgid = flags & TRACE_ITER_RECORD_TGID;
3770 
3771 	print_event_info(buf, m);
3772 
3773 	seq_printf(m, "#           TASK-PID   %s  CPU#   TIMESTAMP  FUNCTION\n", tgid ? "TGID     " : "");
3774 	seq_printf(m, "#              | |     %s    |       |         |\n",	 tgid ? "  |      " : "");
3775 }
3776 
3777 static void print_func_help_header_irq(struct array_buffer *buf, struct seq_file *m,
3778 				       unsigned int flags)
3779 {
3780 	bool tgid = flags & TRACE_ITER_RECORD_TGID;
3781 	const char *space = "          ";
3782 	int prec = tgid ? 10 : 2;
3783 
3784 	print_event_info(buf, m);
3785 
3786 	seq_printf(m, "#                          %.*s  _-----=> irqs-off\n", prec, space);
3787 	seq_printf(m, "#                          %.*s / _----=> need-resched\n", prec, space);
3788 	seq_printf(m, "#                          %.*s| / _---=> hardirq/softirq\n", prec, space);
3789 	seq_printf(m, "#                          %.*s|| / _--=> preempt-depth\n", prec, space);
3790 	seq_printf(m, "#                          %.*s||| /     delay\n", prec, space);
3791 	seq_printf(m, "#           TASK-PID %.*sCPU#  ||||    TIMESTAMP  FUNCTION\n", prec, "   TGID   ");
3792 	seq_printf(m, "#              | |   %.*s  |   ||||       |         |\n", prec, "     |    ");
3793 }
3794 
3795 void
3796 print_trace_header(struct seq_file *m, struct trace_iterator *iter)
3797 {
3798 	unsigned long sym_flags = (global_trace.trace_flags & TRACE_ITER_SYM_MASK);
3799 	struct array_buffer *buf = iter->array_buffer;
3800 	struct trace_array_cpu *data = per_cpu_ptr(buf->data, buf->cpu);
3801 	struct tracer *type = iter->trace;
3802 	unsigned long entries;
3803 	unsigned long total;
3804 	const char *name = "preemption";
3805 
3806 	name = type->name;
3807 
3808 	get_total_entries(buf, &total, &entries);
3809 
3810 	seq_printf(m, "# %s latency trace v1.1.5 on %s\n",
3811 		   name, UTS_RELEASE);
3812 	seq_puts(m, "# -----------------------------------"
3813 		 "---------------------------------\n");
3814 	seq_printf(m, "# latency: %lu us, #%lu/%lu, CPU#%d |"
3815 		   " (M:%s VP:%d, KP:%d, SP:%d HP:%d",
3816 		   nsecs_to_usecs(data->saved_latency),
3817 		   entries,
3818 		   total,
3819 		   buf->cpu,
3820 #if defined(CONFIG_PREEMPT_NONE)
3821 		   "server",
3822 #elif defined(CONFIG_PREEMPT_VOLUNTARY)
3823 		   "desktop",
3824 #elif defined(CONFIG_PREEMPT)
3825 		   "preempt",
3826 #elif defined(CONFIG_PREEMPT_RT)
3827 		   "preempt_rt",
3828 #else
3829 		   "unknown",
3830 #endif
3831 		   /* These are reserved for later use */
3832 		   0, 0, 0, 0);
3833 #ifdef CONFIG_SMP
3834 	seq_printf(m, " #P:%d)\n", num_online_cpus());
3835 #else
3836 	seq_puts(m, ")\n");
3837 #endif
3838 	seq_puts(m, "#    -----------------\n");
3839 	seq_printf(m, "#    | task: %.16s-%d "
3840 		   "(uid:%d nice:%ld policy:%ld rt_prio:%ld)\n",
3841 		   data->comm, data->pid,
3842 		   from_kuid_munged(seq_user_ns(m), data->uid), data->nice,
3843 		   data->policy, data->rt_priority);
3844 	seq_puts(m, "#    -----------------\n");
3845 
3846 	if (data->critical_start) {
3847 		seq_puts(m, "#  => started at: ");
3848 		seq_print_ip_sym(&iter->seq, data->critical_start, sym_flags);
3849 		trace_print_seq(m, &iter->seq);
3850 		seq_puts(m, "\n#  => ended at:   ");
3851 		seq_print_ip_sym(&iter->seq, data->critical_end, sym_flags);
3852 		trace_print_seq(m, &iter->seq);
3853 		seq_puts(m, "\n#\n");
3854 	}
3855 
3856 	seq_puts(m, "#\n");
3857 }
3858 
3859 static void test_cpu_buff_start(struct trace_iterator *iter)
3860 {
3861 	struct trace_seq *s = &iter->seq;
3862 	struct trace_array *tr = iter->tr;
3863 
3864 	if (!(tr->trace_flags & TRACE_ITER_ANNOTATE))
3865 		return;
3866 
3867 	if (!(iter->iter_flags & TRACE_FILE_ANNOTATE))
3868 		return;
3869 
3870 	if (cpumask_available(iter->started) &&
3871 	    cpumask_test_cpu(iter->cpu, iter->started))
3872 		return;
3873 
3874 	if (per_cpu_ptr(iter->array_buffer->data, iter->cpu)->skipped_entries)
3875 		return;
3876 
3877 	if (cpumask_available(iter->started))
3878 		cpumask_set_cpu(iter->cpu, iter->started);
3879 
3880 	/* Don't print started cpu buffer for the first entry of the trace */
3881 	if (iter->idx > 1)
3882 		trace_seq_printf(s, "##### CPU %u buffer started ####\n",
3883 				iter->cpu);
3884 }
3885 
3886 static enum print_line_t print_trace_fmt(struct trace_iterator *iter)
3887 {
3888 	struct trace_array *tr = iter->tr;
3889 	struct trace_seq *s = &iter->seq;
3890 	unsigned long sym_flags = (tr->trace_flags & TRACE_ITER_SYM_MASK);
3891 	struct trace_entry *entry;
3892 	struct trace_event *event;
3893 
3894 	entry = iter->ent;
3895 
3896 	test_cpu_buff_start(iter);
3897 
3898 	event = ftrace_find_event(entry->type);
3899 
3900 	if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
3901 		if (iter->iter_flags & TRACE_FILE_LAT_FMT)
3902 			trace_print_lat_context(iter);
3903 		else
3904 			trace_print_context(iter);
3905 	}
3906 
3907 	if (trace_seq_has_overflowed(s))
3908 		return TRACE_TYPE_PARTIAL_LINE;
3909 
3910 	if (event)
3911 		return event->funcs->trace(iter, sym_flags, event);
3912 
3913 	trace_seq_printf(s, "Unknown type %d\n", entry->type);
3914 
3915 	return trace_handle_return(s);
3916 }
3917 
3918 static enum print_line_t print_raw_fmt(struct trace_iterator *iter)
3919 {
3920 	struct trace_array *tr = iter->tr;
3921 	struct trace_seq *s = &iter->seq;
3922 	struct trace_entry *entry;
3923 	struct trace_event *event;
3924 
3925 	entry = iter->ent;
3926 
3927 	if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO)
3928 		trace_seq_printf(s, "%d %d %llu ",
3929 				 entry->pid, iter->cpu, iter->ts);
3930 
3931 	if (trace_seq_has_overflowed(s))
3932 		return TRACE_TYPE_PARTIAL_LINE;
3933 
3934 	event = ftrace_find_event(entry->type);
3935 	if (event)
3936 		return event->funcs->raw(iter, 0, event);
3937 
3938 	trace_seq_printf(s, "%d ?\n", entry->type);
3939 
3940 	return trace_handle_return(s);
3941 }
3942 
3943 static enum print_line_t print_hex_fmt(struct trace_iterator *iter)
3944 {
3945 	struct trace_array *tr = iter->tr;
3946 	struct trace_seq *s = &iter->seq;
3947 	unsigned char newline = '\n';
3948 	struct trace_entry *entry;
3949 	struct trace_event *event;
3950 
3951 	entry = iter->ent;
3952 
3953 	if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
3954 		SEQ_PUT_HEX_FIELD(s, entry->pid);
3955 		SEQ_PUT_HEX_FIELD(s, iter->cpu);
3956 		SEQ_PUT_HEX_FIELD(s, iter->ts);
3957 		if (trace_seq_has_overflowed(s))
3958 			return TRACE_TYPE_PARTIAL_LINE;
3959 	}
3960 
3961 	event = ftrace_find_event(entry->type);
3962 	if (event) {
3963 		enum print_line_t ret = event->funcs->hex(iter, 0, event);
3964 		if (ret != TRACE_TYPE_HANDLED)
3965 			return ret;
3966 	}
3967 
3968 	SEQ_PUT_FIELD(s, newline);
3969 
3970 	return trace_handle_return(s);
3971 }
3972 
3973 static enum print_line_t print_bin_fmt(struct trace_iterator *iter)
3974 {
3975 	struct trace_array *tr = iter->tr;
3976 	struct trace_seq *s = &iter->seq;
3977 	struct trace_entry *entry;
3978 	struct trace_event *event;
3979 
3980 	entry = iter->ent;
3981 
3982 	if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
3983 		SEQ_PUT_FIELD(s, entry->pid);
3984 		SEQ_PUT_FIELD(s, iter->cpu);
3985 		SEQ_PUT_FIELD(s, iter->ts);
3986 		if (trace_seq_has_overflowed(s))
3987 			return TRACE_TYPE_PARTIAL_LINE;
3988 	}
3989 
3990 	event = ftrace_find_event(entry->type);
3991 	return event ? event->funcs->binary(iter, 0, event) :
3992 		TRACE_TYPE_HANDLED;
3993 }
3994 
3995 int trace_empty(struct trace_iterator *iter)
3996 {
3997 	struct ring_buffer_iter *buf_iter;
3998 	int cpu;
3999 
4000 	/* If we are looking at one CPU buffer, only check that one */
4001 	if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
4002 		cpu = iter->cpu_file;
4003 		buf_iter = trace_buffer_iter(iter, cpu);
4004 		if (buf_iter) {
4005 			if (!ring_buffer_iter_empty(buf_iter))
4006 				return 0;
4007 		} else {
4008 			if (!ring_buffer_empty_cpu(iter->array_buffer->buffer, cpu))
4009 				return 0;
4010 		}
4011 		return 1;
4012 	}
4013 
4014 	for_each_tracing_cpu(cpu) {
4015 		buf_iter = trace_buffer_iter(iter, cpu);
4016 		if (buf_iter) {
4017 			if (!ring_buffer_iter_empty(buf_iter))
4018 				return 0;
4019 		} else {
4020 			if (!ring_buffer_empty_cpu(iter->array_buffer->buffer, cpu))
4021 				return 0;
4022 		}
4023 	}
4024 
4025 	return 1;
4026 }
4027 
4028 /*  Called with trace_event_read_lock() held. */
4029 enum print_line_t print_trace_line(struct trace_iterator *iter)
4030 {
4031 	struct trace_array *tr = iter->tr;
4032 	unsigned long trace_flags = tr->trace_flags;
4033 	enum print_line_t ret;
4034 
4035 	if (iter->lost_events) {
4036 		if (iter->lost_events == (unsigned long)-1)
4037 			trace_seq_printf(&iter->seq, "CPU:%d [LOST EVENTS]\n",
4038 					 iter->cpu);
4039 		else
4040 			trace_seq_printf(&iter->seq, "CPU:%d [LOST %lu EVENTS]\n",
4041 					 iter->cpu, iter->lost_events);
4042 		if (trace_seq_has_overflowed(&iter->seq))
4043 			return TRACE_TYPE_PARTIAL_LINE;
4044 	}
4045 
4046 	if (iter->trace && iter->trace->print_line) {
4047 		ret = iter->trace->print_line(iter);
4048 		if (ret != TRACE_TYPE_UNHANDLED)
4049 			return ret;
4050 	}
4051 
4052 	if (iter->ent->type == TRACE_BPUTS &&
4053 			trace_flags & TRACE_ITER_PRINTK &&
4054 			trace_flags & TRACE_ITER_PRINTK_MSGONLY)
4055 		return trace_print_bputs_msg_only(iter);
4056 
4057 	if (iter->ent->type == TRACE_BPRINT &&
4058 			trace_flags & TRACE_ITER_PRINTK &&
4059 			trace_flags & TRACE_ITER_PRINTK_MSGONLY)
4060 		return trace_print_bprintk_msg_only(iter);
4061 
4062 	if (iter->ent->type == TRACE_PRINT &&
4063 			trace_flags & TRACE_ITER_PRINTK &&
4064 			trace_flags & TRACE_ITER_PRINTK_MSGONLY)
4065 		return trace_print_printk_msg_only(iter);
4066 
4067 	if (trace_flags & TRACE_ITER_BIN)
4068 		return print_bin_fmt(iter);
4069 
4070 	if (trace_flags & TRACE_ITER_HEX)
4071 		return print_hex_fmt(iter);
4072 
4073 	if (trace_flags & TRACE_ITER_RAW)
4074 		return print_raw_fmt(iter);
4075 
4076 	return print_trace_fmt(iter);
4077 }
4078 
4079 void trace_latency_header(struct seq_file *m)
4080 {
4081 	struct trace_iterator *iter = m->private;
4082 	struct trace_array *tr = iter->tr;
4083 
4084 	/* print nothing if the buffers are empty */
4085 	if (trace_empty(iter))
4086 		return;
4087 
4088 	if (iter->iter_flags & TRACE_FILE_LAT_FMT)
4089 		print_trace_header(m, iter);
4090 
4091 	if (!(tr->trace_flags & TRACE_ITER_VERBOSE))
4092 		print_lat_help_header(m);
4093 }
4094 
4095 void trace_default_header(struct seq_file *m)
4096 {
4097 	struct trace_iterator *iter = m->private;
4098 	struct trace_array *tr = iter->tr;
4099 	unsigned long trace_flags = tr->trace_flags;
4100 
4101 	if (!(trace_flags & TRACE_ITER_CONTEXT_INFO))
4102 		return;
4103 
4104 	if (iter->iter_flags & TRACE_FILE_LAT_FMT) {
4105 		/* print nothing if the buffers are empty */
4106 		if (trace_empty(iter))
4107 			return;
4108 		print_trace_header(m, iter);
4109 		if (!(trace_flags & TRACE_ITER_VERBOSE))
4110 			print_lat_help_header(m);
4111 	} else {
4112 		if (!(trace_flags & TRACE_ITER_VERBOSE)) {
4113 			if (trace_flags & TRACE_ITER_IRQ_INFO)
4114 				print_func_help_header_irq(iter->array_buffer,
4115 							   m, trace_flags);
4116 			else
4117 				print_func_help_header(iter->array_buffer, m,
4118 						       trace_flags);
4119 		}
4120 	}
4121 }
4122 
4123 static void test_ftrace_alive(struct seq_file *m)
4124 {
4125 	if (!ftrace_is_dead())
4126 		return;
4127 	seq_puts(m, "# WARNING: FUNCTION TRACING IS CORRUPTED\n"
4128 		    "#          MAY BE MISSING FUNCTION EVENTS\n");
4129 }
4130 
4131 #ifdef CONFIG_TRACER_MAX_TRACE
4132 static void show_snapshot_main_help(struct seq_file *m)
4133 {
4134 	seq_puts(m, "# echo 0 > snapshot : Clears and frees snapshot buffer\n"
4135 		    "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
4136 		    "#                      Takes a snapshot of the main buffer.\n"
4137 		    "# echo 2 > snapshot : Clears snapshot buffer (but does not allocate or free)\n"
4138 		    "#                      (Doesn't have to be '2' works with any number that\n"
4139 		    "#                       is not a '0' or '1')\n");
4140 }
4141 
4142 static void show_snapshot_percpu_help(struct seq_file *m)
4143 {
4144 	seq_puts(m, "# echo 0 > snapshot : Invalid for per_cpu snapshot file.\n");
4145 #ifdef CONFIG_RING_BUFFER_ALLOW_SWAP
4146 	seq_puts(m, "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
4147 		    "#                      Takes a snapshot of the main buffer for this cpu.\n");
4148 #else
4149 	seq_puts(m, "# echo 1 > snapshot : Not supported with this kernel.\n"
4150 		    "#                     Must use main snapshot file to allocate.\n");
4151 #endif
4152 	seq_puts(m, "# echo 2 > snapshot : Clears this cpu's snapshot buffer (but does not allocate)\n"
4153 		    "#                      (Doesn't have to be '2' works with any number that\n"
4154 		    "#                       is not a '0' or '1')\n");
4155 }
4156 
4157 static void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter)
4158 {
4159 	if (iter->tr->allocated_snapshot)
4160 		seq_puts(m, "#\n# * Snapshot is allocated *\n#\n");
4161 	else
4162 		seq_puts(m, "#\n# * Snapshot is freed *\n#\n");
4163 
4164 	seq_puts(m, "# Snapshot commands:\n");
4165 	if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
4166 		show_snapshot_main_help(m);
4167 	else
4168 		show_snapshot_percpu_help(m);
4169 }
4170 #else
4171 /* Should never be called */
4172 static inline void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter) { }
4173 #endif
4174 
4175 static int s_show(struct seq_file *m, void *v)
4176 {
4177 	struct trace_iterator *iter = v;
4178 	int ret;
4179 
4180 	if (iter->ent == NULL) {
4181 		if (iter->tr) {
4182 			seq_printf(m, "# tracer: %s\n", iter->trace->name);
4183 			seq_puts(m, "#\n");
4184 			test_ftrace_alive(m);
4185 		}
4186 		if (iter->snapshot && trace_empty(iter))
4187 			print_snapshot_help(m, iter);
4188 		else if (iter->trace && iter->trace->print_header)
4189 			iter->trace->print_header(m);
4190 		else
4191 			trace_default_header(m);
4192 
4193 	} else if (iter->leftover) {
4194 		/*
4195 		 * If we filled the seq_file buffer earlier, we
4196 		 * want to just show it now.
4197 		 */
4198 		ret = trace_print_seq(m, &iter->seq);
4199 
4200 		/* ret should this time be zero, but you never know */
4201 		iter->leftover = ret;
4202 
4203 	} else {
4204 		print_trace_line(iter);
4205 		ret = trace_print_seq(m, &iter->seq);
4206 		/*
4207 		 * If we overflow the seq_file buffer, then it will
4208 		 * ask us for this data again at start up.
4209 		 * Use that instead.
4210 		 *  ret is 0 if seq_file write succeeded.
4211 		 *        -1 otherwise.
4212 		 */
4213 		iter->leftover = ret;
4214 	}
4215 
4216 	return 0;
4217 }
4218 
4219 /*
4220  * Should be used after trace_array_get(), trace_types_lock
4221  * ensures that i_cdev was already initialized.
4222  */
4223 static inline int tracing_get_cpu(struct inode *inode)
4224 {
4225 	if (inode->i_cdev) /* See trace_create_cpu_file() */
4226 		return (long)inode->i_cdev - 1;
4227 	return RING_BUFFER_ALL_CPUS;
4228 }
4229 
4230 static const struct seq_operations tracer_seq_ops = {
4231 	.start		= s_start,
4232 	.next		= s_next,
4233 	.stop		= s_stop,
4234 	.show		= s_show,
4235 };
4236 
4237 static struct trace_iterator *
4238 __tracing_open(struct inode *inode, struct file *file, bool snapshot)
4239 {
4240 	struct trace_array *tr = inode->i_private;
4241 	struct trace_iterator *iter;
4242 	int cpu;
4243 
4244 	if (tracing_disabled)
4245 		return ERR_PTR(-ENODEV);
4246 
4247 	iter = __seq_open_private(file, &tracer_seq_ops, sizeof(*iter));
4248 	if (!iter)
4249 		return ERR_PTR(-ENOMEM);
4250 
4251 	iter->buffer_iter = kcalloc(nr_cpu_ids, sizeof(*iter->buffer_iter),
4252 				    GFP_KERNEL);
4253 	if (!iter->buffer_iter)
4254 		goto release;
4255 
4256 	/*
4257 	 * trace_find_next_entry() may need to save off iter->ent.
4258 	 * It will place it into the iter->temp buffer. As most
4259 	 * events are less than 128, allocate a buffer of that size.
4260 	 * If one is greater, then trace_find_next_entry() will
4261 	 * allocate a new buffer to adjust for the bigger iter->ent.
4262 	 * It's not critical if it fails to get allocated here.
4263 	 */
4264 	iter->temp = kmalloc(128, GFP_KERNEL);
4265 	if (iter->temp)
4266 		iter->temp_size = 128;
4267 
4268 	/*
4269 	 * We make a copy of the current tracer to avoid concurrent
4270 	 * changes on it while we are reading.
4271 	 */
4272 	mutex_lock(&trace_types_lock);
4273 	iter->trace = kzalloc(sizeof(*iter->trace), GFP_KERNEL);
4274 	if (!iter->trace)
4275 		goto fail;
4276 
4277 	*iter->trace = *tr->current_trace;
4278 
4279 	if (!zalloc_cpumask_var(&iter->started, GFP_KERNEL))
4280 		goto fail;
4281 
4282 	iter->tr = tr;
4283 
4284 #ifdef CONFIG_TRACER_MAX_TRACE
4285 	/* Currently only the top directory has a snapshot */
4286 	if (tr->current_trace->print_max || snapshot)
4287 		iter->array_buffer = &tr->max_buffer;
4288 	else
4289 #endif
4290 		iter->array_buffer = &tr->array_buffer;
4291 	iter->snapshot = snapshot;
4292 	iter->pos = -1;
4293 	iter->cpu_file = tracing_get_cpu(inode);
4294 	mutex_init(&iter->mutex);
4295 
4296 	/* Notify the tracer early; before we stop tracing. */
4297 	if (iter->trace->open)
4298 		iter->trace->open(iter);
4299 
4300 	/* Annotate start of buffers if we had overruns */
4301 	if (ring_buffer_overruns(iter->array_buffer->buffer))
4302 		iter->iter_flags |= TRACE_FILE_ANNOTATE;
4303 
4304 	/* Output in nanoseconds only if we are using a clock in nanoseconds. */
4305 	if (trace_clocks[tr->clock_id].in_ns)
4306 		iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
4307 
4308 	/*
4309 	 * If pause-on-trace is enabled, then stop the trace while
4310 	 * dumping, unless this is the "snapshot" file
4311 	 */
4312 	if (!iter->snapshot && (tr->trace_flags & TRACE_ITER_PAUSE_ON_TRACE))
4313 		tracing_stop_tr(tr);
4314 
4315 	if (iter->cpu_file == RING_BUFFER_ALL_CPUS) {
4316 		for_each_tracing_cpu(cpu) {
4317 			iter->buffer_iter[cpu] =
4318 				ring_buffer_read_prepare(iter->array_buffer->buffer,
4319 							 cpu, GFP_KERNEL);
4320 		}
4321 		ring_buffer_read_prepare_sync();
4322 		for_each_tracing_cpu(cpu) {
4323 			ring_buffer_read_start(iter->buffer_iter[cpu]);
4324 			tracing_iter_reset(iter, cpu);
4325 		}
4326 	} else {
4327 		cpu = iter->cpu_file;
4328 		iter->buffer_iter[cpu] =
4329 			ring_buffer_read_prepare(iter->array_buffer->buffer,
4330 						 cpu, GFP_KERNEL);
4331 		ring_buffer_read_prepare_sync();
4332 		ring_buffer_read_start(iter->buffer_iter[cpu]);
4333 		tracing_iter_reset(iter, cpu);
4334 	}
4335 
4336 	mutex_unlock(&trace_types_lock);
4337 
4338 	return iter;
4339 
4340  fail:
4341 	mutex_unlock(&trace_types_lock);
4342 	kfree(iter->trace);
4343 	kfree(iter->temp);
4344 	kfree(iter->buffer_iter);
4345 release:
4346 	seq_release_private(inode, file);
4347 	return ERR_PTR(-ENOMEM);
4348 }
4349 
4350 int tracing_open_generic(struct inode *inode, struct file *filp)
4351 {
4352 	int ret;
4353 
4354 	ret = tracing_check_open_get_tr(NULL);
4355 	if (ret)
4356 		return ret;
4357 
4358 	filp->private_data = inode->i_private;
4359 	return 0;
4360 }
4361 
4362 bool tracing_is_disabled(void)
4363 {
4364 	return (tracing_disabled) ? true: false;
4365 }
4366 
4367 /*
4368  * Open and update trace_array ref count.
4369  * Must have the current trace_array passed to it.
4370  */
4371 int tracing_open_generic_tr(struct inode *inode, struct file *filp)
4372 {
4373 	struct trace_array *tr = inode->i_private;
4374 	int ret;
4375 
4376 	ret = tracing_check_open_get_tr(tr);
4377 	if (ret)
4378 		return ret;
4379 
4380 	filp->private_data = inode->i_private;
4381 
4382 	return 0;
4383 }
4384 
4385 static int tracing_release(struct inode *inode, struct file *file)
4386 {
4387 	struct trace_array *tr = inode->i_private;
4388 	struct seq_file *m = file->private_data;
4389 	struct trace_iterator *iter;
4390 	int cpu;
4391 
4392 	if (!(file->f_mode & FMODE_READ)) {
4393 		trace_array_put(tr);
4394 		return 0;
4395 	}
4396 
4397 	/* Writes do not use seq_file */
4398 	iter = m->private;
4399 	mutex_lock(&trace_types_lock);
4400 
4401 	for_each_tracing_cpu(cpu) {
4402 		if (iter->buffer_iter[cpu])
4403 			ring_buffer_read_finish(iter->buffer_iter[cpu]);
4404 	}
4405 
4406 	if (iter->trace && iter->trace->close)
4407 		iter->trace->close(iter);
4408 
4409 	if (!iter->snapshot && tr->stop_count)
4410 		/* reenable tracing if it was previously enabled */
4411 		tracing_start_tr(tr);
4412 
4413 	__trace_array_put(tr);
4414 
4415 	mutex_unlock(&trace_types_lock);
4416 
4417 	mutex_destroy(&iter->mutex);
4418 	free_cpumask_var(iter->started);
4419 	kfree(iter->temp);
4420 	kfree(iter->trace);
4421 	kfree(iter->buffer_iter);
4422 	seq_release_private(inode, file);
4423 
4424 	return 0;
4425 }
4426 
4427 static int tracing_release_generic_tr(struct inode *inode, struct file *file)
4428 {
4429 	struct trace_array *tr = inode->i_private;
4430 
4431 	trace_array_put(tr);
4432 	return 0;
4433 }
4434 
4435 static int tracing_single_release_tr(struct inode *inode, struct file *file)
4436 {
4437 	struct trace_array *tr = inode->i_private;
4438 
4439 	trace_array_put(tr);
4440 
4441 	return single_release(inode, file);
4442 }
4443 
4444 static int tracing_open(struct inode *inode, struct file *file)
4445 {
4446 	struct trace_array *tr = inode->i_private;
4447 	struct trace_iterator *iter;
4448 	int ret;
4449 
4450 	ret = tracing_check_open_get_tr(tr);
4451 	if (ret)
4452 		return ret;
4453 
4454 	/* If this file was open for write, then erase contents */
4455 	if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC)) {
4456 		int cpu = tracing_get_cpu(inode);
4457 		struct array_buffer *trace_buf = &tr->array_buffer;
4458 
4459 #ifdef CONFIG_TRACER_MAX_TRACE
4460 		if (tr->current_trace->print_max)
4461 			trace_buf = &tr->max_buffer;
4462 #endif
4463 
4464 		if (cpu == RING_BUFFER_ALL_CPUS)
4465 			tracing_reset_online_cpus(trace_buf);
4466 		else
4467 			tracing_reset_cpu(trace_buf, cpu);
4468 	}
4469 
4470 	if (file->f_mode & FMODE_READ) {
4471 		iter = __tracing_open(inode, file, false);
4472 		if (IS_ERR(iter))
4473 			ret = PTR_ERR(iter);
4474 		else if (tr->trace_flags & TRACE_ITER_LATENCY_FMT)
4475 			iter->iter_flags |= TRACE_FILE_LAT_FMT;
4476 	}
4477 
4478 	if (ret < 0)
4479 		trace_array_put(tr);
4480 
4481 	return ret;
4482 }
4483 
4484 /*
4485  * Some tracers are not suitable for instance buffers.
4486  * A tracer is always available for the global array (toplevel)
4487  * or if it explicitly states that it is.
4488  */
4489 static bool
4490 trace_ok_for_array(struct tracer *t, struct trace_array *tr)
4491 {
4492 	return (tr->flags & TRACE_ARRAY_FL_GLOBAL) || t->allow_instances;
4493 }
4494 
4495 /* Find the next tracer that this trace array may use */
4496 static struct tracer *
4497 get_tracer_for_array(struct trace_array *tr, struct tracer *t)
4498 {
4499 	while (t && !trace_ok_for_array(t, tr))
4500 		t = t->next;
4501 
4502 	return t;
4503 }
4504 
4505 static void *
4506 t_next(struct seq_file *m, void *v, loff_t *pos)
4507 {
4508 	struct trace_array *tr = m->private;
4509 	struct tracer *t = v;
4510 
4511 	(*pos)++;
4512 
4513 	if (t)
4514 		t = get_tracer_for_array(tr, t->next);
4515 
4516 	return t;
4517 }
4518 
4519 static void *t_start(struct seq_file *m, loff_t *pos)
4520 {
4521 	struct trace_array *tr = m->private;
4522 	struct tracer *t;
4523 	loff_t l = 0;
4524 
4525 	mutex_lock(&trace_types_lock);
4526 
4527 	t = get_tracer_for_array(tr, trace_types);
4528 	for (; t && l < *pos; t = t_next(m, t, &l))
4529 			;
4530 
4531 	return t;
4532 }
4533 
4534 static void t_stop(struct seq_file *m, void *p)
4535 {
4536 	mutex_unlock(&trace_types_lock);
4537 }
4538 
4539 static int t_show(struct seq_file *m, void *v)
4540 {
4541 	struct tracer *t = v;
4542 
4543 	if (!t)
4544 		return 0;
4545 
4546 	seq_puts(m, t->name);
4547 	if (t->next)
4548 		seq_putc(m, ' ');
4549 	else
4550 		seq_putc(m, '\n');
4551 
4552 	return 0;
4553 }
4554 
4555 static const struct seq_operations show_traces_seq_ops = {
4556 	.start		= t_start,
4557 	.next		= t_next,
4558 	.stop		= t_stop,
4559 	.show		= t_show,
4560 };
4561 
4562 static int show_traces_open(struct inode *inode, struct file *file)
4563 {
4564 	struct trace_array *tr = inode->i_private;
4565 	struct seq_file *m;
4566 	int ret;
4567 
4568 	ret = tracing_check_open_get_tr(tr);
4569 	if (ret)
4570 		return ret;
4571 
4572 	ret = seq_open(file, &show_traces_seq_ops);
4573 	if (ret) {
4574 		trace_array_put(tr);
4575 		return ret;
4576 	}
4577 
4578 	m = file->private_data;
4579 	m->private = tr;
4580 
4581 	return 0;
4582 }
4583 
4584 static int show_traces_release(struct inode *inode, struct file *file)
4585 {
4586 	struct trace_array *tr = inode->i_private;
4587 
4588 	trace_array_put(tr);
4589 	return seq_release(inode, file);
4590 }
4591 
4592 static ssize_t
4593 tracing_write_stub(struct file *filp, const char __user *ubuf,
4594 		   size_t count, loff_t *ppos)
4595 {
4596 	return count;
4597 }
4598 
4599 loff_t tracing_lseek(struct file *file, loff_t offset, int whence)
4600 {
4601 	int ret;
4602 
4603 	if (file->f_mode & FMODE_READ)
4604 		ret = seq_lseek(file, offset, whence);
4605 	else
4606 		file->f_pos = ret = 0;
4607 
4608 	return ret;
4609 }
4610 
4611 static const struct file_operations tracing_fops = {
4612 	.open		= tracing_open,
4613 	.read		= seq_read,
4614 	.write		= tracing_write_stub,
4615 	.llseek		= tracing_lseek,
4616 	.release	= tracing_release,
4617 };
4618 
4619 static const struct file_operations show_traces_fops = {
4620 	.open		= show_traces_open,
4621 	.read		= seq_read,
4622 	.llseek		= seq_lseek,
4623 	.release	= show_traces_release,
4624 };
4625 
4626 static ssize_t
4627 tracing_cpumask_read(struct file *filp, char __user *ubuf,
4628 		     size_t count, loff_t *ppos)
4629 {
4630 	struct trace_array *tr = file_inode(filp)->i_private;
4631 	char *mask_str;
4632 	int len;
4633 
4634 	len = snprintf(NULL, 0, "%*pb\n",
4635 		       cpumask_pr_args(tr->tracing_cpumask)) + 1;
4636 	mask_str = kmalloc(len, GFP_KERNEL);
4637 	if (!mask_str)
4638 		return -ENOMEM;
4639 
4640 	len = snprintf(mask_str, len, "%*pb\n",
4641 		       cpumask_pr_args(tr->tracing_cpumask));
4642 	if (len >= count) {
4643 		count = -EINVAL;
4644 		goto out_err;
4645 	}
4646 	count = simple_read_from_buffer(ubuf, count, ppos, mask_str, len);
4647 
4648 out_err:
4649 	kfree(mask_str);
4650 
4651 	return count;
4652 }
4653 
4654 int tracing_set_cpumask(struct trace_array *tr,
4655 			cpumask_var_t tracing_cpumask_new)
4656 {
4657 	int cpu;
4658 
4659 	if (!tr)
4660 		return -EINVAL;
4661 
4662 	local_irq_disable();
4663 	arch_spin_lock(&tr->max_lock);
4664 	for_each_tracing_cpu(cpu) {
4665 		/*
4666 		 * Increase/decrease the disabled counter if we are
4667 		 * about to flip a bit in the cpumask:
4668 		 */
4669 		if (cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
4670 				!cpumask_test_cpu(cpu, tracing_cpumask_new)) {
4671 			atomic_inc(&per_cpu_ptr(tr->array_buffer.data, cpu)->disabled);
4672 			ring_buffer_record_disable_cpu(tr->array_buffer.buffer, cpu);
4673 		}
4674 		if (!cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
4675 				cpumask_test_cpu(cpu, tracing_cpumask_new)) {
4676 			atomic_dec(&per_cpu_ptr(tr->array_buffer.data, cpu)->disabled);
4677 			ring_buffer_record_enable_cpu(tr->array_buffer.buffer, cpu);
4678 		}
4679 	}
4680 	arch_spin_unlock(&tr->max_lock);
4681 	local_irq_enable();
4682 
4683 	cpumask_copy(tr->tracing_cpumask, tracing_cpumask_new);
4684 
4685 	return 0;
4686 }
4687 
4688 static ssize_t
4689 tracing_cpumask_write(struct file *filp, const char __user *ubuf,
4690 		      size_t count, loff_t *ppos)
4691 {
4692 	struct trace_array *tr = file_inode(filp)->i_private;
4693 	cpumask_var_t tracing_cpumask_new;
4694 	int err;
4695 
4696 	if (!alloc_cpumask_var(&tracing_cpumask_new, GFP_KERNEL))
4697 		return -ENOMEM;
4698 
4699 	err = cpumask_parse_user(ubuf, count, tracing_cpumask_new);
4700 	if (err)
4701 		goto err_free;
4702 
4703 	err = tracing_set_cpumask(tr, tracing_cpumask_new);
4704 	if (err)
4705 		goto err_free;
4706 
4707 	free_cpumask_var(tracing_cpumask_new);
4708 
4709 	return count;
4710 
4711 err_free:
4712 	free_cpumask_var(tracing_cpumask_new);
4713 
4714 	return err;
4715 }
4716 
4717 static const struct file_operations tracing_cpumask_fops = {
4718 	.open		= tracing_open_generic_tr,
4719 	.read		= tracing_cpumask_read,
4720 	.write		= tracing_cpumask_write,
4721 	.release	= tracing_release_generic_tr,
4722 	.llseek		= generic_file_llseek,
4723 };
4724 
4725 static int tracing_trace_options_show(struct seq_file *m, void *v)
4726 {
4727 	struct tracer_opt *trace_opts;
4728 	struct trace_array *tr = m->private;
4729 	u32 tracer_flags;
4730 	int i;
4731 
4732 	mutex_lock(&trace_types_lock);
4733 	tracer_flags = tr->current_trace->flags->val;
4734 	trace_opts = tr->current_trace->flags->opts;
4735 
4736 	for (i = 0; trace_options[i]; i++) {
4737 		if (tr->trace_flags & (1 << i))
4738 			seq_printf(m, "%s\n", trace_options[i]);
4739 		else
4740 			seq_printf(m, "no%s\n", trace_options[i]);
4741 	}
4742 
4743 	for (i = 0; trace_opts[i].name; i++) {
4744 		if (tracer_flags & trace_opts[i].bit)
4745 			seq_printf(m, "%s\n", trace_opts[i].name);
4746 		else
4747 			seq_printf(m, "no%s\n", trace_opts[i].name);
4748 	}
4749 	mutex_unlock(&trace_types_lock);
4750 
4751 	return 0;
4752 }
4753 
4754 static int __set_tracer_option(struct trace_array *tr,
4755 			       struct tracer_flags *tracer_flags,
4756 			       struct tracer_opt *opts, int neg)
4757 {
4758 	struct tracer *trace = tracer_flags->trace;
4759 	int ret;
4760 
4761 	ret = trace->set_flag(tr, tracer_flags->val, opts->bit, !neg);
4762 	if (ret)
4763 		return ret;
4764 
4765 	if (neg)
4766 		tracer_flags->val &= ~opts->bit;
4767 	else
4768 		tracer_flags->val |= opts->bit;
4769 	return 0;
4770 }
4771 
4772 /* Try to assign a tracer specific option */
4773 static int set_tracer_option(struct trace_array *tr, char *cmp, int neg)
4774 {
4775 	struct tracer *trace = tr->current_trace;
4776 	struct tracer_flags *tracer_flags = trace->flags;
4777 	struct tracer_opt *opts = NULL;
4778 	int i;
4779 
4780 	for (i = 0; tracer_flags->opts[i].name; i++) {
4781 		opts = &tracer_flags->opts[i];
4782 
4783 		if (strcmp(cmp, opts->name) == 0)
4784 			return __set_tracer_option(tr, trace->flags, opts, neg);
4785 	}
4786 
4787 	return -EINVAL;
4788 }
4789 
4790 /* Some tracers require overwrite to stay enabled */
4791 int trace_keep_overwrite(struct tracer *tracer, u32 mask, int set)
4792 {
4793 	if (tracer->enabled && (mask & TRACE_ITER_OVERWRITE) && !set)
4794 		return -1;
4795 
4796 	return 0;
4797 }
4798 
4799 int set_tracer_flag(struct trace_array *tr, unsigned int mask, int enabled)
4800 {
4801 	if ((mask == TRACE_ITER_RECORD_TGID) ||
4802 	    (mask == TRACE_ITER_RECORD_CMD))
4803 		lockdep_assert_held(&event_mutex);
4804 
4805 	/* do nothing if flag is already set */
4806 	if (!!(tr->trace_flags & mask) == !!enabled)
4807 		return 0;
4808 
4809 	/* Give the tracer a chance to approve the change */
4810 	if (tr->current_trace->flag_changed)
4811 		if (tr->current_trace->flag_changed(tr, mask, !!enabled))
4812 			return -EINVAL;
4813 
4814 	if (enabled)
4815 		tr->trace_flags |= mask;
4816 	else
4817 		tr->trace_flags &= ~mask;
4818 
4819 	if (mask == TRACE_ITER_RECORD_CMD)
4820 		trace_event_enable_cmd_record(enabled);
4821 
4822 	if (mask == TRACE_ITER_RECORD_TGID) {
4823 		if (!tgid_map)
4824 			tgid_map = kvcalloc(PID_MAX_DEFAULT + 1,
4825 					   sizeof(*tgid_map),
4826 					   GFP_KERNEL);
4827 		if (!tgid_map) {
4828 			tr->trace_flags &= ~TRACE_ITER_RECORD_TGID;
4829 			return -ENOMEM;
4830 		}
4831 
4832 		trace_event_enable_tgid_record(enabled);
4833 	}
4834 
4835 	if (mask == TRACE_ITER_EVENT_FORK)
4836 		trace_event_follow_fork(tr, enabled);
4837 
4838 	if (mask == TRACE_ITER_FUNC_FORK)
4839 		ftrace_pid_follow_fork(tr, enabled);
4840 
4841 	if (mask == TRACE_ITER_OVERWRITE) {
4842 		ring_buffer_change_overwrite(tr->array_buffer.buffer, enabled);
4843 #ifdef CONFIG_TRACER_MAX_TRACE
4844 		ring_buffer_change_overwrite(tr->max_buffer.buffer, enabled);
4845 #endif
4846 	}
4847 
4848 	if (mask == TRACE_ITER_PRINTK) {
4849 		trace_printk_start_stop_comm(enabled);
4850 		trace_printk_control(enabled);
4851 	}
4852 
4853 	return 0;
4854 }
4855 
4856 int trace_set_options(struct trace_array *tr, char *option)
4857 {
4858 	char *cmp;
4859 	int neg = 0;
4860 	int ret;
4861 	size_t orig_len = strlen(option);
4862 	int len;
4863 
4864 	cmp = strstrip(option);
4865 
4866 	len = str_has_prefix(cmp, "no");
4867 	if (len)
4868 		neg = 1;
4869 
4870 	cmp += len;
4871 
4872 	mutex_lock(&event_mutex);
4873 	mutex_lock(&trace_types_lock);
4874 
4875 	ret = match_string(trace_options, -1, cmp);
4876 	/* If no option could be set, test the specific tracer options */
4877 	if (ret < 0)
4878 		ret = set_tracer_option(tr, cmp, neg);
4879 	else
4880 		ret = set_tracer_flag(tr, 1 << ret, !neg);
4881 
4882 	mutex_unlock(&trace_types_lock);
4883 	mutex_unlock(&event_mutex);
4884 
4885 	/*
4886 	 * If the first trailing whitespace is replaced with '\0' by strstrip,
4887 	 * turn it back into a space.
4888 	 */
4889 	if (orig_len > strlen(option))
4890 		option[strlen(option)] = ' ';
4891 
4892 	return ret;
4893 }
4894 
4895 static void __init apply_trace_boot_options(void)
4896 {
4897 	char *buf = trace_boot_options_buf;
4898 	char *option;
4899 
4900 	while (true) {
4901 		option = strsep(&buf, ",");
4902 
4903 		if (!option)
4904 			break;
4905 
4906 		if (*option)
4907 			trace_set_options(&global_trace, option);
4908 
4909 		/* Put back the comma to allow this to be called again */
4910 		if (buf)
4911 			*(buf - 1) = ',';
4912 	}
4913 }
4914 
4915 static ssize_t
4916 tracing_trace_options_write(struct file *filp, const char __user *ubuf,
4917 			size_t cnt, loff_t *ppos)
4918 {
4919 	struct seq_file *m = filp->private_data;
4920 	struct trace_array *tr = m->private;
4921 	char buf[64];
4922 	int ret;
4923 
4924 	if (cnt >= sizeof(buf))
4925 		return -EINVAL;
4926 
4927 	if (copy_from_user(buf, ubuf, cnt))
4928 		return -EFAULT;
4929 
4930 	buf[cnt] = 0;
4931 
4932 	ret = trace_set_options(tr, buf);
4933 	if (ret < 0)
4934 		return ret;
4935 
4936 	*ppos += cnt;
4937 
4938 	return cnt;
4939 }
4940 
4941 static int tracing_trace_options_open(struct inode *inode, struct file *file)
4942 {
4943 	struct trace_array *tr = inode->i_private;
4944 	int ret;
4945 
4946 	ret = tracing_check_open_get_tr(tr);
4947 	if (ret)
4948 		return ret;
4949 
4950 	ret = single_open(file, tracing_trace_options_show, inode->i_private);
4951 	if (ret < 0)
4952 		trace_array_put(tr);
4953 
4954 	return ret;
4955 }
4956 
4957 static const struct file_operations tracing_iter_fops = {
4958 	.open		= tracing_trace_options_open,
4959 	.read		= seq_read,
4960 	.llseek		= seq_lseek,
4961 	.release	= tracing_single_release_tr,
4962 	.write		= tracing_trace_options_write,
4963 };
4964 
4965 static const char readme_msg[] =
4966 	"tracing mini-HOWTO:\n\n"
4967 	"# echo 0 > tracing_on : quick way to disable tracing\n"
4968 	"# echo 1 > tracing_on : quick way to re-enable tracing\n\n"
4969 	" Important files:\n"
4970 	"  trace\t\t\t- The static contents of the buffer\n"
4971 	"\t\t\t  To clear the buffer write into this file: echo > trace\n"
4972 	"  trace_pipe\t\t- A consuming read to see the contents of the buffer\n"
4973 	"  current_tracer\t- function and latency tracers\n"
4974 	"  available_tracers\t- list of configured tracers for current_tracer\n"
4975 	"  error_log\t- error log for failed commands (that support it)\n"
4976 	"  buffer_size_kb\t- view and modify size of per cpu buffer\n"
4977 	"  buffer_total_size_kb  - view total size of all cpu buffers\n\n"
4978 	"  trace_clock\t\t-change the clock used to order events\n"
4979 	"       local:   Per cpu clock but may not be synced across CPUs\n"
4980 	"      global:   Synced across CPUs but slows tracing down.\n"
4981 	"     counter:   Not a clock, but just an increment\n"
4982 	"      uptime:   Jiffy counter from time of boot\n"
4983 	"        perf:   Same clock that perf events use\n"
4984 #ifdef CONFIG_X86_64
4985 	"     x86-tsc:   TSC cycle counter\n"
4986 #endif
4987 	"\n  timestamp_mode\t-view the mode used to timestamp events\n"
4988 	"       delta:   Delta difference against a buffer-wide timestamp\n"
4989 	"    absolute:   Absolute (standalone) timestamp\n"
4990 	"\n  trace_marker\t\t- Writes into this file writes into the kernel buffer\n"
4991 	"\n  trace_marker_raw\t\t- Writes into this file writes binary data into the kernel buffer\n"
4992 	"  tracing_cpumask\t- Limit which CPUs to trace\n"
4993 	"  instances\t\t- Make sub-buffers with: mkdir instances/foo\n"
4994 	"\t\t\t  Remove sub-buffer with rmdir\n"
4995 	"  trace_options\t\t- Set format or modify how tracing happens\n"
4996 	"\t\t\t  Disable an option by prefixing 'no' to the\n"
4997 	"\t\t\t  option name\n"
4998 	"  saved_cmdlines_size\t- echo command number in here to store comm-pid list\n"
4999 #ifdef CONFIG_DYNAMIC_FTRACE
5000 	"\n  available_filter_functions - list of functions that can be filtered on\n"
5001 	"  set_ftrace_filter\t- echo function name in here to only trace these\n"
5002 	"\t\t\t  functions\n"
5003 	"\t     accepts: func_full_name or glob-matching-pattern\n"
5004 	"\t     modules: Can select a group via module\n"
5005 	"\t      Format: :mod:<module-name>\n"
5006 	"\t     example: echo :mod:ext3 > set_ftrace_filter\n"
5007 	"\t    triggers: a command to perform when function is hit\n"
5008 	"\t      Format: <function>:<trigger>[:count]\n"
5009 	"\t     trigger: traceon, traceoff\n"
5010 	"\t\t      enable_event:<system>:<event>\n"
5011 	"\t\t      disable_event:<system>:<event>\n"
5012 #ifdef CONFIG_STACKTRACE
5013 	"\t\t      stacktrace\n"
5014 #endif
5015 #ifdef CONFIG_TRACER_SNAPSHOT
5016 	"\t\t      snapshot\n"
5017 #endif
5018 	"\t\t      dump\n"
5019 	"\t\t      cpudump\n"
5020 	"\t     example: echo do_fault:traceoff > set_ftrace_filter\n"
5021 	"\t              echo do_trap:traceoff:3 > set_ftrace_filter\n"
5022 	"\t     The first one will disable tracing every time do_fault is hit\n"
5023 	"\t     The second will disable tracing at most 3 times when do_trap is hit\n"
5024 	"\t       The first time do trap is hit and it disables tracing, the\n"
5025 	"\t       counter will decrement to 2. If tracing is already disabled,\n"
5026 	"\t       the counter will not decrement. It only decrements when the\n"
5027 	"\t       trigger did work\n"
5028 	"\t     To remove trigger without count:\n"
5029 	"\t       echo '!<function>:<trigger> > set_ftrace_filter\n"
5030 	"\t     To remove trigger with a count:\n"
5031 	"\t       echo '!<function>:<trigger>:0 > set_ftrace_filter\n"
5032 	"  set_ftrace_notrace\t- echo function name in here to never trace.\n"
5033 	"\t    accepts: func_full_name, *func_end, func_begin*, *func_middle*\n"
5034 	"\t    modules: Can select a group via module command :mod:\n"
5035 	"\t    Does not accept triggers\n"
5036 #endif /* CONFIG_DYNAMIC_FTRACE */
5037 #ifdef CONFIG_FUNCTION_TRACER
5038 	"  set_ftrace_pid\t- Write pid(s) to only function trace those pids\n"
5039 	"\t\t    (function)\n"
5040 	"  set_ftrace_notrace_pid\t- Write pid(s) to not function trace those pids\n"
5041 	"\t\t    (function)\n"
5042 #endif
5043 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
5044 	"  set_graph_function\t- Trace the nested calls of a function (function_graph)\n"
5045 	"  set_graph_notrace\t- Do not trace the nested calls of a function (function_graph)\n"
5046 	"  max_graph_depth\t- Trace a limited depth of nested calls (0 is unlimited)\n"
5047 #endif
5048 #ifdef CONFIG_TRACER_SNAPSHOT
5049 	"\n  snapshot\t\t- Like 'trace' but shows the content of the static\n"
5050 	"\t\t\t  snapshot buffer. Read the contents for more\n"
5051 	"\t\t\t  information\n"
5052 #endif
5053 #ifdef CONFIG_STACK_TRACER
5054 	"  stack_trace\t\t- Shows the max stack trace when active\n"
5055 	"  stack_max_size\t- Shows current max stack size that was traced\n"
5056 	"\t\t\t  Write into this file to reset the max size (trigger a\n"
5057 	"\t\t\t  new trace)\n"
5058 #ifdef CONFIG_DYNAMIC_FTRACE
5059 	"  stack_trace_filter\t- Like set_ftrace_filter but limits what stack_trace\n"
5060 	"\t\t\t  traces\n"
5061 #endif
5062 #endif /* CONFIG_STACK_TRACER */
5063 #ifdef CONFIG_DYNAMIC_EVENTS
5064 	"  dynamic_events\t\t- Create/append/remove/show the generic dynamic events\n"
5065 	"\t\t\t  Write into this file to define/undefine new trace events.\n"
5066 #endif
5067 #ifdef CONFIG_KPROBE_EVENTS
5068 	"  kprobe_events\t\t- Create/append/remove/show the kernel dynamic events\n"
5069 	"\t\t\t  Write into this file to define/undefine new trace events.\n"
5070 #endif
5071 #ifdef CONFIG_UPROBE_EVENTS
5072 	"  uprobe_events\t\t- Create/append/remove/show the userspace dynamic events\n"
5073 	"\t\t\t  Write into this file to define/undefine new trace events.\n"
5074 #endif
5075 #if defined(CONFIG_KPROBE_EVENTS) || defined(CONFIG_UPROBE_EVENTS)
5076 	"\t  accepts: event-definitions (one definition per line)\n"
5077 	"\t   Format: p[:[<group>/]<event>] <place> [<args>]\n"
5078 	"\t           r[maxactive][:[<group>/]<event>] <place> [<args>]\n"
5079 #ifdef CONFIG_HIST_TRIGGERS
5080 	"\t           s:[synthetic/]<event> <field> [<field>]\n"
5081 #endif
5082 	"\t           -:[<group>/]<event>\n"
5083 #ifdef CONFIG_KPROBE_EVENTS
5084 	"\t    place: [<module>:]<symbol>[+<offset>]|<memaddr>\n"
5085   "place (kretprobe): [<module>:]<symbol>[+<offset>]|<memaddr>\n"
5086 #endif
5087 #ifdef CONFIG_UPROBE_EVENTS
5088   "   place (uprobe): <path>:<offset>[(ref_ctr_offset)]\n"
5089 #endif
5090 	"\t     args: <name>=fetcharg[:type]\n"
5091 	"\t fetcharg: %<register>, @<address>, @<symbol>[+|-<offset>],\n"
5092 #ifdef CONFIG_HAVE_FUNCTION_ARG_ACCESS_API
5093 	"\t           $stack<index>, $stack, $retval, $comm, $arg<N>,\n"
5094 #else
5095 	"\t           $stack<index>, $stack, $retval, $comm,\n"
5096 #endif
5097 	"\t           +|-[u]<offset>(<fetcharg>), \\imm-value, \\\"imm-string\"\n"
5098 	"\t     type: s8/16/32/64, u8/16/32/64, x8/16/32/64, string, symbol,\n"
5099 	"\t           b<bit-width>@<bit-offset>/<container-size>, ustring,\n"
5100 	"\t           <type>\\[<array-size>\\]\n"
5101 #ifdef CONFIG_HIST_TRIGGERS
5102 	"\t    field: <stype> <name>;\n"
5103 	"\t    stype: u8/u16/u32/u64, s8/s16/s32/s64, pid_t,\n"
5104 	"\t           [unsigned] char/int/long\n"
5105 #endif
5106 #endif
5107 	"  events/\t\t- Directory containing all trace event subsystems:\n"
5108 	"      enable\t\t- Write 0/1 to enable/disable tracing of all events\n"
5109 	"  events/<system>/\t- Directory containing all trace events for <system>:\n"
5110 	"      enable\t\t- Write 0/1 to enable/disable tracing of all <system>\n"
5111 	"\t\t\t  events\n"
5112 	"      filter\t\t- If set, only events passing filter are traced\n"
5113 	"  events/<system>/<event>/\t- Directory containing control files for\n"
5114 	"\t\t\t  <event>:\n"
5115 	"      enable\t\t- Write 0/1 to enable/disable tracing of <event>\n"
5116 	"      filter\t\t- If set, only events passing filter are traced\n"
5117 	"      trigger\t\t- If set, a command to perform when event is hit\n"
5118 	"\t    Format: <trigger>[:count][if <filter>]\n"
5119 	"\t   trigger: traceon, traceoff\n"
5120 	"\t            enable_event:<system>:<event>\n"
5121 	"\t            disable_event:<system>:<event>\n"
5122 #ifdef CONFIG_HIST_TRIGGERS
5123 	"\t            enable_hist:<system>:<event>\n"
5124 	"\t            disable_hist:<system>:<event>\n"
5125 #endif
5126 #ifdef CONFIG_STACKTRACE
5127 	"\t\t    stacktrace\n"
5128 #endif
5129 #ifdef CONFIG_TRACER_SNAPSHOT
5130 	"\t\t    snapshot\n"
5131 #endif
5132 #ifdef CONFIG_HIST_TRIGGERS
5133 	"\t\t    hist (see below)\n"
5134 #endif
5135 	"\t   example: echo traceoff > events/block/block_unplug/trigger\n"
5136 	"\t            echo traceoff:3 > events/block/block_unplug/trigger\n"
5137 	"\t            echo 'enable_event:kmem:kmalloc:3 if nr_rq > 1' > \\\n"
5138 	"\t                  events/block/block_unplug/trigger\n"
5139 	"\t   The first disables tracing every time block_unplug is hit.\n"
5140 	"\t   The second disables tracing the first 3 times block_unplug is hit.\n"
5141 	"\t   The third enables the kmalloc event the first 3 times block_unplug\n"
5142 	"\t     is hit and has value of greater than 1 for the 'nr_rq' event field.\n"
5143 	"\t   Like function triggers, the counter is only decremented if it\n"
5144 	"\t    enabled or disabled tracing.\n"
5145 	"\t   To remove a trigger without a count:\n"
5146 	"\t     echo '!<trigger> > <system>/<event>/trigger\n"
5147 	"\t   To remove a trigger with a count:\n"
5148 	"\t     echo '!<trigger>:0 > <system>/<event>/trigger\n"
5149 	"\t   Filters can be ignored when removing a trigger.\n"
5150 #ifdef CONFIG_HIST_TRIGGERS
5151 	"      hist trigger\t- If set, event hits are aggregated into a hash table\n"
5152 	"\t    Format: hist:keys=<field1[,field2,...]>\n"
5153 	"\t            [:values=<field1[,field2,...]>]\n"
5154 	"\t            [:sort=<field1[,field2,...]>]\n"
5155 	"\t            [:size=#entries]\n"
5156 	"\t            [:pause][:continue][:clear]\n"
5157 	"\t            [:name=histname1]\n"
5158 	"\t            [:<handler>.<action>]\n"
5159 	"\t            [if <filter>]\n\n"
5160 	"\t    When a matching event is hit, an entry is added to a hash\n"
5161 	"\t    table using the key(s) and value(s) named, and the value of a\n"
5162 	"\t    sum called 'hitcount' is incremented.  Keys and values\n"
5163 	"\t    correspond to fields in the event's format description.  Keys\n"
5164 	"\t    can be any field, or the special string 'stacktrace'.\n"
5165 	"\t    Compound keys consisting of up to two fields can be specified\n"
5166 	"\t    by the 'keys' keyword.  Values must correspond to numeric\n"
5167 	"\t    fields.  Sort keys consisting of up to two fields can be\n"
5168 	"\t    specified using the 'sort' keyword.  The sort direction can\n"
5169 	"\t    be modified by appending '.descending' or '.ascending' to a\n"
5170 	"\t    sort field.  The 'size' parameter can be used to specify more\n"
5171 	"\t    or fewer than the default 2048 entries for the hashtable size.\n"
5172 	"\t    If a hist trigger is given a name using the 'name' parameter,\n"
5173 	"\t    its histogram data will be shared with other triggers of the\n"
5174 	"\t    same name, and trigger hits will update this common data.\n\n"
5175 	"\t    Reading the 'hist' file for the event will dump the hash\n"
5176 	"\t    table in its entirety to stdout.  If there are multiple hist\n"
5177 	"\t    triggers attached to an event, there will be a table for each\n"
5178 	"\t    trigger in the output.  The table displayed for a named\n"
5179 	"\t    trigger will be the same as any other instance having the\n"
5180 	"\t    same name.  The default format used to display a given field\n"
5181 	"\t    can be modified by appending any of the following modifiers\n"
5182 	"\t    to the field name, as applicable:\n\n"
5183 	"\t            .hex        display a number as a hex value\n"
5184 	"\t            .sym        display an address as a symbol\n"
5185 	"\t            .sym-offset display an address as a symbol and offset\n"
5186 	"\t            .execname   display a common_pid as a program name\n"
5187 	"\t            .syscall    display a syscall id as a syscall name\n"
5188 	"\t            .log2       display log2 value rather than raw number\n"
5189 	"\t            .usecs      display a common_timestamp in microseconds\n\n"
5190 	"\t    The 'pause' parameter can be used to pause an existing hist\n"
5191 	"\t    trigger or to start a hist trigger but not log any events\n"
5192 	"\t    until told to do so.  'continue' can be used to start or\n"
5193 	"\t    restart a paused hist trigger.\n\n"
5194 	"\t    The 'clear' parameter will clear the contents of a running\n"
5195 	"\t    hist trigger and leave its current paused/active state\n"
5196 	"\t    unchanged.\n\n"
5197 	"\t    The enable_hist and disable_hist triggers can be used to\n"
5198 	"\t    have one event conditionally start and stop another event's\n"
5199 	"\t    already-attached hist trigger.  The syntax is analogous to\n"
5200 	"\t    the enable_event and disable_event triggers.\n\n"
5201 	"\t    Hist trigger handlers and actions are executed whenever a\n"
5202 	"\t    a histogram entry is added or updated.  They take the form:\n\n"
5203 	"\t        <handler>.<action>\n\n"
5204 	"\t    The available handlers are:\n\n"
5205 	"\t        onmatch(matching.event)  - invoke on addition or update\n"
5206 	"\t        onmax(var)               - invoke if var exceeds current max\n"
5207 	"\t        onchange(var)            - invoke action if var changes\n\n"
5208 	"\t    The available actions are:\n\n"
5209 	"\t        trace(<synthetic_event>,param list)  - generate synthetic event\n"
5210 	"\t        save(field,...)                      - save current event fields\n"
5211 #ifdef CONFIG_TRACER_SNAPSHOT
5212 	"\t        snapshot()                           - snapshot the trace buffer\n"
5213 #endif
5214 #endif
5215 ;
5216 
5217 static ssize_t
5218 tracing_readme_read(struct file *filp, char __user *ubuf,
5219 		       size_t cnt, loff_t *ppos)
5220 {
5221 	return simple_read_from_buffer(ubuf, cnt, ppos,
5222 					readme_msg, strlen(readme_msg));
5223 }
5224 
5225 static const struct file_operations tracing_readme_fops = {
5226 	.open		= tracing_open_generic,
5227 	.read		= tracing_readme_read,
5228 	.llseek		= generic_file_llseek,
5229 };
5230 
5231 static void *saved_tgids_next(struct seq_file *m, void *v, loff_t *pos)
5232 {
5233 	int *ptr = v;
5234 
5235 	if (*pos || m->count)
5236 		ptr++;
5237 
5238 	(*pos)++;
5239 
5240 	for (; ptr <= &tgid_map[PID_MAX_DEFAULT]; ptr++) {
5241 		if (trace_find_tgid(*ptr))
5242 			return ptr;
5243 	}
5244 
5245 	return NULL;
5246 }
5247 
5248 static void *saved_tgids_start(struct seq_file *m, loff_t *pos)
5249 {
5250 	void *v;
5251 	loff_t l = 0;
5252 
5253 	if (!tgid_map)
5254 		return NULL;
5255 
5256 	v = &tgid_map[0];
5257 	while (l <= *pos) {
5258 		v = saved_tgids_next(m, v, &l);
5259 		if (!v)
5260 			return NULL;
5261 	}
5262 
5263 	return v;
5264 }
5265 
5266 static void saved_tgids_stop(struct seq_file *m, void *v)
5267 {
5268 }
5269 
5270 static int saved_tgids_show(struct seq_file *m, void *v)
5271 {
5272 	int pid = (int *)v - tgid_map;
5273 
5274 	seq_printf(m, "%d %d\n", pid, trace_find_tgid(pid));
5275 	return 0;
5276 }
5277 
5278 static const struct seq_operations tracing_saved_tgids_seq_ops = {
5279 	.start		= saved_tgids_start,
5280 	.stop		= saved_tgids_stop,
5281 	.next		= saved_tgids_next,
5282 	.show		= saved_tgids_show,
5283 };
5284 
5285 static int tracing_saved_tgids_open(struct inode *inode, struct file *filp)
5286 {
5287 	int ret;
5288 
5289 	ret = tracing_check_open_get_tr(NULL);
5290 	if (ret)
5291 		return ret;
5292 
5293 	return seq_open(filp, &tracing_saved_tgids_seq_ops);
5294 }
5295 
5296 
5297 static const struct file_operations tracing_saved_tgids_fops = {
5298 	.open		= tracing_saved_tgids_open,
5299 	.read		= seq_read,
5300 	.llseek		= seq_lseek,
5301 	.release	= seq_release,
5302 };
5303 
5304 static void *saved_cmdlines_next(struct seq_file *m, void *v, loff_t *pos)
5305 {
5306 	unsigned int *ptr = v;
5307 
5308 	if (*pos || m->count)
5309 		ptr++;
5310 
5311 	(*pos)++;
5312 
5313 	for (; ptr < &savedcmd->map_cmdline_to_pid[savedcmd->cmdline_num];
5314 	     ptr++) {
5315 		if (*ptr == -1 || *ptr == NO_CMDLINE_MAP)
5316 			continue;
5317 
5318 		return ptr;
5319 	}
5320 
5321 	return NULL;
5322 }
5323 
5324 static void *saved_cmdlines_start(struct seq_file *m, loff_t *pos)
5325 {
5326 	void *v;
5327 	loff_t l = 0;
5328 
5329 	preempt_disable();
5330 	arch_spin_lock(&trace_cmdline_lock);
5331 
5332 	v = &savedcmd->map_cmdline_to_pid[0];
5333 	while (l <= *pos) {
5334 		v = saved_cmdlines_next(m, v, &l);
5335 		if (!v)
5336 			return NULL;
5337 	}
5338 
5339 	return v;
5340 }
5341 
5342 static void saved_cmdlines_stop(struct seq_file *m, void *v)
5343 {
5344 	arch_spin_unlock(&trace_cmdline_lock);
5345 	preempt_enable();
5346 }
5347 
5348 static int saved_cmdlines_show(struct seq_file *m, void *v)
5349 {
5350 	char buf[TASK_COMM_LEN];
5351 	unsigned int *pid = v;
5352 
5353 	__trace_find_cmdline(*pid, buf);
5354 	seq_printf(m, "%d %s\n", *pid, buf);
5355 	return 0;
5356 }
5357 
5358 static const struct seq_operations tracing_saved_cmdlines_seq_ops = {
5359 	.start		= saved_cmdlines_start,
5360 	.next		= saved_cmdlines_next,
5361 	.stop		= saved_cmdlines_stop,
5362 	.show		= saved_cmdlines_show,
5363 };
5364 
5365 static int tracing_saved_cmdlines_open(struct inode *inode, struct file *filp)
5366 {
5367 	int ret;
5368 
5369 	ret = tracing_check_open_get_tr(NULL);
5370 	if (ret)
5371 		return ret;
5372 
5373 	return seq_open(filp, &tracing_saved_cmdlines_seq_ops);
5374 }
5375 
5376 static const struct file_operations tracing_saved_cmdlines_fops = {
5377 	.open		= tracing_saved_cmdlines_open,
5378 	.read		= seq_read,
5379 	.llseek		= seq_lseek,
5380 	.release	= seq_release,
5381 };
5382 
5383 static ssize_t
5384 tracing_saved_cmdlines_size_read(struct file *filp, char __user *ubuf,
5385 				 size_t cnt, loff_t *ppos)
5386 {
5387 	char buf[64];
5388 	int r;
5389 
5390 	arch_spin_lock(&trace_cmdline_lock);
5391 	r = scnprintf(buf, sizeof(buf), "%u\n", savedcmd->cmdline_num);
5392 	arch_spin_unlock(&trace_cmdline_lock);
5393 
5394 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
5395 }
5396 
5397 static void free_saved_cmdlines_buffer(struct saved_cmdlines_buffer *s)
5398 {
5399 	kfree(s->saved_cmdlines);
5400 	kfree(s->map_cmdline_to_pid);
5401 	kfree(s);
5402 }
5403 
5404 static int tracing_resize_saved_cmdlines(unsigned int val)
5405 {
5406 	struct saved_cmdlines_buffer *s, *savedcmd_temp;
5407 
5408 	s = kmalloc(sizeof(*s), GFP_KERNEL);
5409 	if (!s)
5410 		return -ENOMEM;
5411 
5412 	if (allocate_cmdlines_buffer(val, s) < 0) {
5413 		kfree(s);
5414 		return -ENOMEM;
5415 	}
5416 
5417 	arch_spin_lock(&trace_cmdline_lock);
5418 	savedcmd_temp = savedcmd;
5419 	savedcmd = s;
5420 	arch_spin_unlock(&trace_cmdline_lock);
5421 	free_saved_cmdlines_buffer(savedcmd_temp);
5422 
5423 	return 0;
5424 }
5425 
5426 static ssize_t
5427 tracing_saved_cmdlines_size_write(struct file *filp, const char __user *ubuf,
5428 				  size_t cnt, loff_t *ppos)
5429 {
5430 	unsigned long val;
5431 	int ret;
5432 
5433 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
5434 	if (ret)
5435 		return ret;
5436 
5437 	/* must have at least 1 entry or less than PID_MAX_DEFAULT */
5438 	if (!val || val > PID_MAX_DEFAULT)
5439 		return -EINVAL;
5440 
5441 	ret = tracing_resize_saved_cmdlines((unsigned int)val);
5442 	if (ret < 0)
5443 		return ret;
5444 
5445 	*ppos += cnt;
5446 
5447 	return cnt;
5448 }
5449 
5450 static const struct file_operations tracing_saved_cmdlines_size_fops = {
5451 	.open		= tracing_open_generic,
5452 	.read		= tracing_saved_cmdlines_size_read,
5453 	.write		= tracing_saved_cmdlines_size_write,
5454 };
5455 
5456 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
5457 static union trace_eval_map_item *
5458 update_eval_map(union trace_eval_map_item *ptr)
5459 {
5460 	if (!ptr->map.eval_string) {
5461 		if (ptr->tail.next) {
5462 			ptr = ptr->tail.next;
5463 			/* Set ptr to the next real item (skip head) */
5464 			ptr++;
5465 		} else
5466 			return NULL;
5467 	}
5468 	return ptr;
5469 }
5470 
5471 static void *eval_map_next(struct seq_file *m, void *v, loff_t *pos)
5472 {
5473 	union trace_eval_map_item *ptr = v;
5474 
5475 	/*
5476 	 * Paranoid! If ptr points to end, we don't want to increment past it.
5477 	 * This really should never happen.
5478 	 */
5479 	(*pos)++;
5480 	ptr = update_eval_map(ptr);
5481 	if (WARN_ON_ONCE(!ptr))
5482 		return NULL;
5483 
5484 	ptr++;
5485 	ptr = update_eval_map(ptr);
5486 
5487 	return ptr;
5488 }
5489 
5490 static void *eval_map_start(struct seq_file *m, loff_t *pos)
5491 {
5492 	union trace_eval_map_item *v;
5493 	loff_t l = 0;
5494 
5495 	mutex_lock(&trace_eval_mutex);
5496 
5497 	v = trace_eval_maps;
5498 	if (v)
5499 		v++;
5500 
5501 	while (v && l < *pos) {
5502 		v = eval_map_next(m, v, &l);
5503 	}
5504 
5505 	return v;
5506 }
5507 
5508 static void eval_map_stop(struct seq_file *m, void *v)
5509 {
5510 	mutex_unlock(&trace_eval_mutex);
5511 }
5512 
5513 static int eval_map_show(struct seq_file *m, void *v)
5514 {
5515 	union trace_eval_map_item *ptr = v;
5516 
5517 	seq_printf(m, "%s %ld (%s)\n",
5518 		   ptr->map.eval_string, ptr->map.eval_value,
5519 		   ptr->map.system);
5520 
5521 	return 0;
5522 }
5523 
5524 static const struct seq_operations tracing_eval_map_seq_ops = {
5525 	.start		= eval_map_start,
5526 	.next		= eval_map_next,
5527 	.stop		= eval_map_stop,
5528 	.show		= eval_map_show,
5529 };
5530 
5531 static int tracing_eval_map_open(struct inode *inode, struct file *filp)
5532 {
5533 	int ret;
5534 
5535 	ret = tracing_check_open_get_tr(NULL);
5536 	if (ret)
5537 		return ret;
5538 
5539 	return seq_open(filp, &tracing_eval_map_seq_ops);
5540 }
5541 
5542 static const struct file_operations tracing_eval_map_fops = {
5543 	.open		= tracing_eval_map_open,
5544 	.read		= seq_read,
5545 	.llseek		= seq_lseek,
5546 	.release	= seq_release,
5547 };
5548 
5549 static inline union trace_eval_map_item *
5550 trace_eval_jmp_to_tail(union trace_eval_map_item *ptr)
5551 {
5552 	/* Return tail of array given the head */
5553 	return ptr + ptr->head.length + 1;
5554 }
5555 
5556 static void
5557 trace_insert_eval_map_file(struct module *mod, struct trace_eval_map **start,
5558 			   int len)
5559 {
5560 	struct trace_eval_map **stop;
5561 	struct trace_eval_map **map;
5562 	union trace_eval_map_item *map_array;
5563 	union trace_eval_map_item *ptr;
5564 
5565 	stop = start + len;
5566 
5567 	/*
5568 	 * The trace_eval_maps contains the map plus a head and tail item,
5569 	 * where the head holds the module and length of array, and the
5570 	 * tail holds a pointer to the next list.
5571 	 */
5572 	map_array = kmalloc_array(len + 2, sizeof(*map_array), GFP_KERNEL);
5573 	if (!map_array) {
5574 		pr_warn("Unable to allocate trace eval mapping\n");
5575 		return;
5576 	}
5577 
5578 	mutex_lock(&trace_eval_mutex);
5579 
5580 	if (!trace_eval_maps)
5581 		trace_eval_maps = map_array;
5582 	else {
5583 		ptr = trace_eval_maps;
5584 		for (;;) {
5585 			ptr = trace_eval_jmp_to_tail(ptr);
5586 			if (!ptr->tail.next)
5587 				break;
5588 			ptr = ptr->tail.next;
5589 
5590 		}
5591 		ptr->tail.next = map_array;
5592 	}
5593 	map_array->head.mod = mod;
5594 	map_array->head.length = len;
5595 	map_array++;
5596 
5597 	for (map = start; (unsigned long)map < (unsigned long)stop; map++) {
5598 		map_array->map = **map;
5599 		map_array++;
5600 	}
5601 	memset(map_array, 0, sizeof(*map_array));
5602 
5603 	mutex_unlock(&trace_eval_mutex);
5604 }
5605 
5606 static void trace_create_eval_file(struct dentry *d_tracer)
5607 {
5608 	trace_create_file("eval_map", 0444, d_tracer,
5609 			  NULL, &tracing_eval_map_fops);
5610 }
5611 
5612 #else /* CONFIG_TRACE_EVAL_MAP_FILE */
5613 static inline void trace_create_eval_file(struct dentry *d_tracer) { }
5614 static inline void trace_insert_eval_map_file(struct module *mod,
5615 			      struct trace_eval_map **start, int len) { }
5616 #endif /* !CONFIG_TRACE_EVAL_MAP_FILE */
5617 
5618 static void trace_insert_eval_map(struct module *mod,
5619 				  struct trace_eval_map **start, int len)
5620 {
5621 	struct trace_eval_map **map;
5622 
5623 	if (len <= 0)
5624 		return;
5625 
5626 	map = start;
5627 
5628 	trace_event_eval_update(map, len);
5629 
5630 	trace_insert_eval_map_file(mod, start, len);
5631 }
5632 
5633 static ssize_t
5634 tracing_set_trace_read(struct file *filp, char __user *ubuf,
5635 		       size_t cnt, loff_t *ppos)
5636 {
5637 	struct trace_array *tr = filp->private_data;
5638 	char buf[MAX_TRACER_SIZE+2];
5639 	int r;
5640 
5641 	mutex_lock(&trace_types_lock);
5642 	r = sprintf(buf, "%s\n", tr->current_trace->name);
5643 	mutex_unlock(&trace_types_lock);
5644 
5645 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
5646 }
5647 
5648 int tracer_init(struct tracer *t, struct trace_array *tr)
5649 {
5650 	tracing_reset_online_cpus(&tr->array_buffer);
5651 	return t->init(tr);
5652 }
5653 
5654 static void set_buffer_entries(struct array_buffer *buf, unsigned long val)
5655 {
5656 	int cpu;
5657 
5658 	for_each_tracing_cpu(cpu)
5659 		per_cpu_ptr(buf->data, cpu)->entries = val;
5660 }
5661 
5662 #ifdef CONFIG_TRACER_MAX_TRACE
5663 /* resize @tr's buffer to the size of @size_tr's entries */
5664 static int resize_buffer_duplicate_size(struct array_buffer *trace_buf,
5665 					struct array_buffer *size_buf, int cpu_id)
5666 {
5667 	int cpu, ret = 0;
5668 
5669 	if (cpu_id == RING_BUFFER_ALL_CPUS) {
5670 		for_each_tracing_cpu(cpu) {
5671 			ret = ring_buffer_resize(trace_buf->buffer,
5672 				 per_cpu_ptr(size_buf->data, cpu)->entries, cpu);
5673 			if (ret < 0)
5674 				break;
5675 			per_cpu_ptr(trace_buf->data, cpu)->entries =
5676 				per_cpu_ptr(size_buf->data, cpu)->entries;
5677 		}
5678 	} else {
5679 		ret = ring_buffer_resize(trace_buf->buffer,
5680 				 per_cpu_ptr(size_buf->data, cpu_id)->entries, cpu_id);
5681 		if (ret == 0)
5682 			per_cpu_ptr(trace_buf->data, cpu_id)->entries =
5683 				per_cpu_ptr(size_buf->data, cpu_id)->entries;
5684 	}
5685 
5686 	return ret;
5687 }
5688 #endif /* CONFIG_TRACER_MAX_TRACE */
5689 
5690 static int __tracing_resize_ring_buffer(struct trace_array *tr,
5691 					unsigned long size, int cpu)
5692 {
5693 	int ret;
5694 
5695 	/*
5696 	 * If kernel or user changes the size of the ring buffer
5697 	 * we use the size that was given, and we can forget about
5698 	 * expanding it later.
5699 	 */
5700 	ring_buffer_expanded = true;
5701 
5702 	/* May be called before buffers are initialized */
5703 	if (!tr->array_buffer.buffer)
5704 		return 0;
5705 
5706 	ret = ring_buffer_resize(tr->array_buffer.buffer, size, cpu);
5707 	if (ret < 0)
5708 		return ret;
5709 
5710 #ifdef CONFIG_TRACER_MAX_TRACE
5711 	if (!(tr->flags & TRACE_ARRAY_FL_GLOBAL) ||
5712 	    !tr->current_trace->use_max_tr)
5713 		goto out;
5714 
5715 	ret = ring_buffer_resize(tr->max_buffer.buffer, size, cpu);
5716 	if (ret < 0) {
5717 		int r = resize_buffer_duplicate_size(&tr->array_buffer,
5718 						     &tr->array_buffer, cpu);
5719 		if (r < 0) {
5720 			/*
5721 			 * AARGH! We are left with different
5722 			 * size max buffer!!!!
5723 			 * The max buffer is our "snapshot" buffer.
5724 			 * When a tracer needs a snapshot (one of the
5725 			 * latency tracers), it swaps the max buffer
5726 			 * with the saved snap shot. We succeeded to
5727 			 * update the size of the main buffer, but failed to
5728 			 * update the size of the max buffer. But when we tried
5729 			 * to reset the main buffer to the original size, we
5730 			 * failed there too. This is very unlikely to
5731 			 * happen, but if it does, warn and kill all
5732 			 * tracing.
5733 			 */
5734 			WARN_ON(1);
5735 			tracing_disabled = 1;
5736 		}
5737 		return ret;
5738 	}
5739 
5740 	if (cpu == RING_BUFFER_ALL_CPUS)
5741 		set_buffer_entries(&tr->max_buffer, size);
5742 	else
5743 		per_cpu_ptr(tr->max_buffer.data, cpu)->entries = size;
5744 
5745  out:
5746 #endif /* CONFIG_TRACER_MAX_TRACE */
5747 
5748 	if (cpu == RING_BUFFER_ALL_CPUS)
5749 		set_buffer_entries(&tr->array_buffer, size);
5750 	else
5751 		per_cpu_ptr(tr->array_buffer.data, cpu)->entries = size;
5752 
5753 	return ret;
5754 }
5755 
5756 ssize_t tracing_resize_ring_buffer(struct trace_array *tr,
5757 				  unsigned long size, int cpu_id)
5758 {
5759 	int ret = size;
5760 
5761 	mutex_lock(&trace_types_lock);
5762 
5763 	if (cpu_id != RING_BUFFER_ALL_CPUS) {
5764 		/* make sure, this cpu is enabled in the mask */
5765 		if (!cpumask_test_cpu(cpu_id, tracing_buffer_mask)) {
5766 			ret = -EINVAL;
5767 			goto out;
5768 		}
5769 	}
5770 
5771 	ret = __tracing_resize_ring_buffer(tr, size, cpu_id);
5772 	if (ret < 0)
5773 		ret = -ENOMEM;
5774 
5775 out:
5776 	mutex_unlock(&trace_types_lock);
5777 
5778 	return ret;
5779 }
5780 
5781 
5782 /**
5783  * tracing_update_buffers - used by tracing facility to expand ring buffers
5784  *
5785  * To save on memory when the tracing is never used on a system with it
5786  * configured in. The ring buffers are set to a minimum size. But once
5787  * a user starts to use the tracing facility, then they need to grow
5788  * to their default size.
5789  *
5790  * This function is to be called when a tracer is about to be used.
5791  */
5792 int tracing_update_buffers(void)
5793 {
5794 	int ret = 0;
5795 
5796 	mutex_lock(&trace_types_lock);
5797 	if (!ring_buffer_expanded)
5798 		ret = __tracing_resize_ring_buffer(&global_trace, trace_buf_size,
5799 						RING_BUFFER_ALL_CPUS);
5800 	mutex_unlock(&trace_types_lock);
5801 
5802 	return ret;
5803 }
5804 
5805 struct trace_option_dentry;
5806 
5807 static void
5808 create_trace_option_files(struct trace_array *tr, struct tracer *tracer);
5809 
5810 /*
5811  * Used to clear out the tracer before deletion of an instance.
5812  * Must have trace_types_lock held.
5813  */
5814 static void tracing_set_nop(struct trace_array *tr)
5815 {
5816 	if (tr->current_trace == &nop_trace)
5817 		return;
5818 
5819 	tr->current_trace->enabled--;
5820 
5821 	if (tr->current_trace->reset)
5822 		tr->current_trace->reset(tr);
5823 
5824 	tr->current_trace = &nop_trace;
5825 }
5826 
5827 static void add_tracer_options(struct trace_array *tr, struct tracer *t)
5828 {
5829 	/* Only enable if the directory has been created already. */
5830 	if (!tr->dir)
5831 		return;
5832 
5833 	create_trace_option_files(tr, t);
5834 }
5835 
5836 int tracing_set_tracer(struct trace_array *tr, const char *buf)
5837 {
5838 	struct tracer *t;
5839 #ifdef CONFIG_TRACER_MAX_TRACE
5840 	bool had_max_tr;
5841 #endif
5842 	int ret = 0;
5843 
5844 	mutex_lock(&trace_types_lock);
5845 
5846 	if (!ring_buffer_expanded) {
5847 		ret = __tracing_resize_ring_buffer(tr, trace_buf_size,
5848 						RING_BUFFER_ALL_CPUS);
5849 		if (ret < 0)
5850 			goto out;
5851 		ret = 0;
5852 	}
5853 
5854 	for (t = trace_types; t; t = t->next) {
5855 		if (strcmp(t->name, buf) == 0)
5856 			break;
5857 	}
5858 	if (!t) {
5859 		ret = -EINVAL;
5860 		goto out;
5861 	}
5862 	if (t == tr->current_trace)
5863 		goto out;
5864 
5865 #ifdef CONFIG_TRACER_SNAPSHOT
5866 	if (t->use_max_tr) {
5867 		arch_spin_lock(&tr->max_lock);
5868 		if (tr->cond_snapshot)
5869 			ret = -EBUSY;
5870 		arch_spin_unlock(&tr->max_lock);
5871 		if (ret)
5872 			goto out;
5873 	}
5874 #endif
5875 	/* Some tracers won't work on kernel command line */
5876 	if (system_state < SYSTEM_RUNNING && t->noboot) {
5877 		pr_warn("Tracer '%s' is not allowed on command line, ignored\n",
5878 			t->name);
5879 		goto out;
5880 	}
5881 
5882 	/* Some tracers are only allowed for the top level buffer */
5883 	if (!trace_ok_for_array(t, tr)) {
5884 		ret = -EINVAL;
5885 		goto out;
5886 	}
5887 
5888 	/* If trace pipe files are being read, we can't change the tracer */
5889 	if (tr->current_trace->ref) {
5890 		ret = -EBUSY;
5891 		goto out;
5892 	}
5893 
5894 	trace_branch_disable();
5895 
5896 	tr->current_trace->enabled--;
5897 
5898 	if (tr->current_trace->reset)
5899 		tr->current_trace->reset(tr);
5900 
5901 	/* Current trace needs to be nop_trace before synchronize_rcu */
5902 	tr->current_trace = &nop_trace;
5903 
5904 #ifdef CONFIG_TRACER_MAX_TRACE
5905 	had_max_tr = tr->allocated_snapshot;
5906 
5907 	if (had_max_tr && !t->use_max_tr) {
5908 		/*
5909 		 * We need to make sure that the update_max_tr sees that
5910 		 * current_trace changed to nop_trace to keep it from
5911 		 * swapping the buffers after we resize it.
5912 		 * The update_max_tr is called from interrupts disabled
5913 		 * so a synchronized_sched() is sufficient.
5914 		 */
5915 		synchronize_rcu();
5916 		free_snapshot(tr);
5917 	}
5918 #endif
5919 
5920 #ifdef CONFIG_TRACER_MAX_TRACE
5921 	if (t->use_max_tr && !had_max_tr) {
5922 		ret = tracing_alloc_snapshot_instance(tr);
5923 		if (ret < 0)
5924 			goto out;
5925 	}
5926 #endif
5927 
5928 	if (t->init) {
5929 		ret = tracer_init(t, tr);
5930 		if (ret)
5931 			goto out;
5932 	}
5933 
5934 	tr->current_trace = t;
5935 	tr->current_trace->enabled++;
5936 	trace_branch_enable(tr);
5937  out:
5938 	mutex_unlock(&trace_types_lock);
5939 
5940 	return ret;
5941 }
5942 
5943 static ssize_t
5944 tracing_set_trace_write(struct file *filp, const char __user *ubuf,
5945 			size_t cnt, loff_t *ppos)
5946 {
5947 	struct trace_array *tr = filp->private_data;
5948 	char buf[MAX_TRACER_SIZE+1];
5949 	int i;
5950 	size_t ret;
5951 	int err;
5952 
5953 	ret = cnt;
5954 
5955 	if (cnt > MAX_TRACER_SIZE)
5956 		cnt = MAX_TRACER_SIZE;
5957 
5958 	if (copy_from_user(buf, ubuf, cnt))
5959 		return -EFAULT;
5960 
5961 	buf[cnt] = 0;
5962 
5963 	/* strip ending whitespace. */
5964 	for (i = cnt - 1; i > 0 && isspace(buf[i]); i--)
5965 		buf[i] = 0;
5966 
5967 	err = tracing_set_tracer(tr, buf);
5968 	if (err)
5969 		return err;
5970 
5971 	*ppos += ret;
5972 
5973 	return ret;
5974 }
5975 
5976 static ssize_t
5977 tracing_nsecs_read(unsigned long *ptr, char __user *ubuf,
5978 		   size_t cnt, loff_t *ppos)
5979 {
5980 	char buf[64];
5981 	int r;
5982 
5983 	r = snprintf(buf, sizeof(buf), "%ld\n",
5984 		     *ptr == (unsigned long)-1 ? -1 : nsecs_to_usecs(*ptr));
5985 	if (r > sizeof(buf))
5986 		r = sizeof(buf);
5987 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
5988 }
5989 
5990 static ssize_t
5991 tracing_nsecs_write(unsigned long *ptr, const char __user *ubuf,
5992 		    size_t cnt, loff_t *ppos)
5993 {
5994 	unsigned long val;
5995 	int ret;
5996 
5997 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
5998 	if (ret)
5999 		return ret;
6000 
6001 	*ptr = val * 1000;
6002 
6003 	return cnt;
6004 }
6005 
6006 static ssize_t
6007 tracing_thresh_read(struct file *filp, char __user *ubuf,
6008 		    size_t cnt, loff_t *ppos)
6009 {
6010 	return tracing_nsecs_read(&tracing_thresh, ubuf, cnt, ppos);
6011 }
6012 
6013 static ssize_t
6014 tracing_thresh_write(struct file *filp, const char __user *ubuf,
6015 		     size_t cnt, loff_t *ppos)
6016 {
6017 	struct trace_array *tr = filp->private_data;
6018 	int ret;
6019 
6020 	mutex_lock(&trace_types_lock);
6021 	ret = tracing_nsecs_write(&tracing_thresh, ubuf, cnt, ppos);
6022 	if (ret < 0)
6023 		goto out;
6024 
6025 	if (tr->current_trace->update_thresh) {
6026 		ret = tr->current_trace->update_thresh(tr);
6027 		if (ret < 0)
6028 			goto out;
6029 	}
6030 
6031 	ret = cnt;
6032 out:
6033 	mutex_unlock(&trace_types_lock);
6034 
6035 	return ret;
6036 }
6037 
6038 #if defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)
6039 
6040 static ssize_t
6041 tracing_max_lat_read(struct file *filp, char __user *ubuf,
6042 		     size_t cnt, loff_t *ppos)
6043 {
6044 	return tracing_nsecs_read(filp->private_data, ubuf, cnt, ppos);
6045 }
6046 
6047 static ssize_t
6048 tracing_max_lat_write(struct file *filp, const char __user *ubuf,
6049 		      size_t cnt, loff_t *ppos)
6050 {
6051 	return tracing_nsecs_write(filp->private_data, ubuf, cnt, ppos);
6052 }
6053 
6054 #endif
6055 
6056 static int tracing_open_pipe(struct inode *inode, struct file *filp)
6057 {
6058 	struct trace_array *tr = inode->i_private;
6059 	struct trace_iterator *iter;
6060 	int ret;
6061 
6062 	ret = tracing_check_open_get_tr(tr);
6063 	if (ret)
6064 		return ret;
6065 
6066 	mutex_lock(&trace_types_lock);
6067 
6068 	/* create a buffer to store the information to pass to userspace */
6069 	iter = kzalloc(sizeof(*iter), GFP_KERNEL);
6070 	if (!iter) {
6071 		ret = -ENOMEM;
6072 		__trace_array_put(tr);
6073 		goto out;
6074 	}
6075 
6076 	trace_seq_init(&iter->seq);
6077 	iter->trace = tr->current_trace;
6078 
6079 	if (!alloc_cpumask_var(&iter->started, GFP_KERNEL)) {
6080 		ret = -ENOMEM;
6081 		goto fail;
6082 	}
6083 
6084 	/* trace pipe does not show start of buffer */
6085 	cpumask_setall(iter->started);
6086 
6087 	if (tr->trace_flags & TRACE_ITER_LATENCY_FMT)
6088 		iter->iter_flags |= TRACE_FILE_LAT_FMT;
6089 
6090 	/* Output in nanoseconds only if we are using a clock in nanoseconds. */
6091 	if (trace_clocks[tr->clock_id].in_ns)
6092 		iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
6093 
6094 	iter->tr = tr;
6095 	iter->array_buffer = &tr->array_buffer;
6096 	iter->cpu_file = tracing_get_cpu(inode);
6097 	mutex_init(&iter->mutex);
6098 	filp->private_data = iter;
6099 
6100 	if (iter->trace->pipe_open)
6101 		iter->trace->pipe_open(iter);
6102 
6103 	nonseekable_open(inode, filp);
6104 
6105 	tr->current_trace->ref++;
6106 out:
6107 	mutex_unlock(&trace_types_lock);
6108 	return ret;
6109 
6110 fail:
6111 	kfree(iter);
6112 	__trace_array_put(tr);
6113 	mutex_unlock(&trace_types_lock);
6114 	return ret;
6115 }
6116 
6117 static int tracing_release_pipe(struct inode *inode, struct file *file)
6118 {
6119 	struct trace_iterator *iter = file->private_data;
6120 	struct trace_array *tr = inode->i_private;
6121 
6122 	mutex_lock(&trace_types_lock);
6123 
6124 	tr->current_trace->ref--;
6125 
6126 	if (iter->trace->pipe_close)
6127 		iter->trace->pipe_close(iter);
6128 
6129 	mutex_unlock(&trace_types_lock);
6130 
6131 	free_cpumask_var(iter->started);
6132 	mutex_destroy(&iter->mutex);
6133 	kfree(iter);
6134 
6135 	trace_array_put(tr);
6136 
6137 	return 0;
6138 }
6139 
6140 static __poll_t
6141 trace_poll(struct trace_iterator *iter, struct file *filp, poll_table *poll_table)
6142 {
6143 	struct trace_array *tr = iter->tr;
6144 
6145 	/* Iterators are static, they should be filled or empty */
6146 	if (trace_buffer_iter(iter, iter->cpu_file))
6147 		return EPOLLIN | EPOLLRDNORM;
6148 
6149 	if (tr->trace_flags & TRACE_ITER_BLOCK)
6150 		/*
6151 		 * Always select as readable when in blocking mode
6152 		 */
6153 		return EPOLLIN | EPOLLRDNORM;
6154 	else
6155 		return ring_buffer_poll_wait(iter->array_buffer->buffer, iter->cpu_file,
6156 					     filp, poll_table);
6157 }
6158 
6159 static __poll_t
6160 tracing_poll_pipe(struct file *filp, poll_table *poll_table)
6161 {
6162 	struct trace_iterator *iter = filp->private_data;
6163 
6164 	return trace_poll(iter, filp, poll_table);
6165 }
6166 
6167 /* Must be called with iter->mutex held. */
6168 static int tracing_wait_pipe(struct file *filp)
6169 {
6170 	struct trace_iterator *iter = filp->private_data;
6171 	int ret;
6172 
6173 	while (trace_empty(iter)) {
6174 
6175 		if ((filp->f_flags & O_NONBLOCK)) {
6176 			return -EAGAIN;
6177 		}
6178 
6179 		/*
6180 		 * We block until we read something and tracing is disabled.
6181 		 * We still block if tracing is disabled, but we have never
6182 		 * read anything. This allows a user to cat this file, and
6183 		 * then enable tracing. But after we have read something,
6184 		 * we give an EOF when tracing is again disabled.
6185 		 *
6186 		 * iter->pos will be 0 if we haven't read anything.
6187 		 */
6188 		if (!tracer_tracing_is_on(iter->tr) && iter->pos)
6189 			break;
6190 
6191 		mutex_unlock(&iter->mutex);
6192 
6193 		ret = wait_on_pipe(iter, 0);
6194 
6195 		mutex_lock(&iter->mutex);
6196 
6197 		if (ret)
6198 			return ret;
6199 	}
6200 
6201 	return 1;
6202 }
6203 
6204 /*
6205  * Consumer reader.
6206  */
6207 static ssize_t
6208 tracing_read_pipe(struct file *filp, char __user *ubuf,
6209 		  size_t cnt, loff_t *ppos)
6210 {
6211 	struct trace_iterator *iter = filp->private_data;
6212 	ssize_t sret;
6213 
6214 	/*
6215 	 * Avoid more than one consumer on a single file descriptor
6216 	 * This is just a matter of traces coherency, the ring buffer itself
6217 	 * is protected.
6218 	 */
6219 	mutex_lock(&iter->mutex);
6220 
6221 	/* return any leftover data */
6222 	sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
6223 	if (sret != -EBUSY)
6224 		goto out;
6225 
6226 	trace_seq_init(&iter->seq);
6227 
6228 	if (iter->trace->read) {
6229 		sret = iter->trace->read(iter, filp, ubuf, cnt, ppos);
6230 		if (sret)
6231 			goto out;
6232 	}
6233 
6234 waitagain:
6235 	sret = tracing_wait_pipe(filp);
6236 	if (sret <= 0)
6237 		goto out;
6238 
6239 	/* stop when tracing is finished */
6240 	if (trace_empty(iter)) {
6241 		sret = 0;
6242 		goto out;
6243 	}
6244 
6245 	if (cnt >= PAGE_SIZE)
6246 		cnt = PAGE_SIZE - 1;
6247 
6248 	/* reset all but tr, trace, and overruns */
6249 	memset(&iter->seq, 0,
6250 	       sizeof(struct trace_iterator) -
6251 	       offsetof(struct trace_iterator, seq));
6252 	cpumask_clear(iter->started);
6253 	trace_seq_init(&iter->seq);
6254 	iter->pos = -1;
6255 
6256 	trace_event_read_lock();
6257 	trace_access_lock(iter->cpu_file);
6258 	while (trace_find_next_entry_inc(iter) != NULL) {
6259 		enum print_line_t ret;
6260 		int save_len = iter->seq.seq.len;
6261 
6262 		ret = print_trace_line(iter);
6263 		if (ret == TRACE_TYPE_PARTIAL_LINE) {
6264 			/* don't print partial lines */
6265 			iter->seq.seq.len = save_len;
6266 			break;
6267 		}
6268 		if (ret != TRACE_TYPE_NO_CONSUME)
6269 			trace_consume(iter);
6270 
6271 		if (trace_seq_used(&iter->seq) >= cnt)
6272 			break;
6273 
6274 		/*
6275 		 * Setting the full flag means we reached the trace_seq buffer
6276 		 * size and we should leave by partial output condition above.
6277 		 * One of the trace_seq_* functions is not used properly.
6278 		 */
6279 		WARN_ONCE(iter->seq.full, "full flag set for trace type %d",
6280 			  iter->ent->type);
6281 	}
6282 	trace_access_unlock(iter->cpu_file);
6283 	trace_event_read_unlock();
6284 
6285 	/* Now copy what we have to the user */
6286 	sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
6287 	if (iter->seq.seq.readpos >= trace_seq_used(&iter->seq))
6288 		trace_seq_init(&iter->seq);
6289 
6290 	/*
6291 	 * If there was nothing to send to user, in spite of consuming trace
6292 	 * entries, go back to wait for more entries.
6293 	 */
6294 	if (sret == -EBUSY)
6295 		goto waitagain;
6296 
6297 out:
6298 	mutex_unlock(&iter->mutex);
6299 
6300 	return sret;
6301 }
6302 
6303 static void tracing_spd_release_pipe(struct splice_pipe_desc *spd,
6304 				     unsigned int idx)
6305 {
6306 	__free_page(spd->pages[idx]);
6307 }
6308 
6309 static size_t
6310 tracing_fill_pipe_page(size_t rem, struct trace_iterator *iter)
6311 {
6312 	size_t count;
6313 	int save_len;
6314 	int ret;
6315 
6316 	/* Seq buffer is page-sized, exactly what we need. */
6317 	for (;;) {
6318 		save_len = iter->seq.seq.len;
6319 		ret = print_trace_line(iter);
6320 
6321 		if (trace_seq_has_overflowed(&iter->seq)) {
6322 			iter->seq.seq.len = save_len;
6323 			break;
6324 		}
6325 
6326 		/*
6327 		 * This should not be hit, because it should only
6328 		 * be set if the iter->seq overflowed. But check it
6329 		 * anyway to be safe.
6330 		 */
6331 		if (ret == TRACE_TYPE_PARTIAL_LINE) {
6332 			iter->seq.seq.len = save_len;
6333 			break;
6334 		}
6335 
6336 		count = trace_seq_used(&iter->seq) - save_len;
6337 		if (rem < count) {
6338 			rem = 0;
6339 			iter->seq.seq.len = save_len;
6340 			break;
6341 		}
6342 
6343 		if (ret != TRACE_TYPE_NO_CONSUME)
6344 			trace_consume(iter);
6345 		rem -= count;
6346 		if (!trace_find_next_entry_inc(iter))	{
6347 			rem = 0;
6348 			iter->ent = NULL;
6349 			break;
6350 		}
6351 	}
6352 
6353 	return rem;
6354 }
6355 
6356 static ssize_t tracing_splice_read_pipe(struct file *filp,
6357 					loff_t *ppos,
6358 					struct pipe_inode_info *pipe,
6359 					size_t len,
6360 					unsigned int flags)
6361 {
6362 	struct page *pages_def[PIPE_DEF_BUFFERS];
6363 	struct partial_page partial_def[PIPE_DEF_BUFFERS];
6364 	struct trace_iterator *iter = filp->private_data;
6365 	struct splice_pipe_desc spd = {
6366 		.pages		= pages_def,
6367 		.partial	= partial_def,
6368 		.nr_pages	= 0, /* This gets updated below. */
6369 		.nr_pages_max	= PIPE_DEF_BUFFERS,
6370 		.ops		= &default_pipe_buf_ops,
6371 		.spd_release	= tracing_spd_release_pipe,
6372 	};
6373 	ssize_t ret;
6374 	size_t rem;
6375 	unsigned int i;
6376 
6377 	if (splice_grow_spd(pipe, &spd))
6378 		return -ENOMEM;
6379 
6380 	mutex_lock(&iter->mutex);
6381 
6382 	if (iter->trace->splice_read) {
6383 		ret = iter->trace->splice_read(iter, filp,
6384 					       ppos, pipe, len, flags);
6385 		if (ret)
6386 			goto out_err;
6387 	}
6388 
6389 	ret = tracing_wait_pipe(filp);
6390 	if (ret <= 0)
6391 		goto out_err;
6392 
6393 	if (!iter->ent && !trace_find_next_entry_inc(iter)) {
6394 		ret = -EFAULT;
6395 		goto out_err;
6396 	}
6397 
6398 	trace_event_read_lock();
6399 	trace_access_lock(iter->cpu_file);
6400 
6401 	/* Fill as many pages as possible. */
6402 	for (i = 0, rem = len; i < spd.nr_pages_max && rem; i++) {
6403 		spd.pages[i] = alloc_page(GFP_KERNEL);
6404 		if (!spd.pages[i])
6405 			break;
6406 
6407 		rem = tracing_fill_pipe_page(rem, iter);
6408 
6409 		/* Copy the data into the page, so we can start over. */
6410 		ret = trace_seq_to_buffer(&iter->seq,
6411 					  page_address(spd.pages[i]),
6412 					  trace_seq_used(&iter->seq));
6413 		if (ret < 0) {
6414 			__free_page(spd.pages[i]);
6415 			break;
6416 		}
6417 		spd.partial[i].offset = 0;
6418 		spd.partial[i].len = trace_seq_used(&iter->seq);
6419 
6420 		trace_seq_init(&iter->seq);
6421 	}
6422 
6423 	trace_access_unlock(iter->cpu_file);
6424 	trace_event_read_unlock();
6425 	mutex_unlock(&iter->mutex);
6426 
6427 	spd.nr_pages = i;
6428 
6429 	if (i)
6430 		ret = splice_to_pipe(pipe, &spd);
6431 	else
6432 		ret = 0;
6433 out:
6434 	splice_shrink_spd(&spd);
6435 	return ret;
6436 
6437 out_err:
6438 	mutex_unlock(&iter->mutex);
6439 	goto out;
6440 }
6441 
6442 static ssize_t
6443 tracing_entries_read(struct file *filp, char __user *ubuf,
6444 		     size_t cnt, loff_t *ppos)
6445 {
6446 	struct inode *inode = file_inode(filp);
6447 	struct trace_array *tr = inode->i_private;
6448 	int cpu = tracing_get_cpu(inode);
6449 	char buf[64];
6450 	int r = 0;
6451 	ssize_t ret;
6452 
6453 	mutex_lock(&trace_types_lock);
6454 
6455 	if (cpu == RING_BUFFER_ALL_CPUS) {
6456 		int cpu, buf_size_same;
6457 		unsigned long size;
6458 
6459 		size = 0;
6460 		buf_size_same = 1;
6461 		/* check if all cpu sizes are same */
6462 		for_each_tracing_cpu(cpu) {
6463 			/* fill in the size from first enabled cpu */
6464 			if (size == 0)
6465 				size = per_cpu_ptr(tr->array_buffer.data, cpu)->entries;
6466 			if (size != per_cpu_ptr(tr->array_buffer.data, cpu)->entries) {
6467 				buf_size_same = 0;
6468 				break;
6469 			}
6470 		}
6471 
6472 		if (buf_size_same) {
6473 			if (!ring_buffer_expanded)
6474 				r = sprintf(buf, "%lu (expanded: %lu)\n",
6475 					    size >> 10,
6476 					    trace_buf_size >> 10);
6477 			else
6478 				r = sprintf(buf, "%lu\n", size >> 10);
6479 		} else
6480 			r = sprintf(buf, "X\n");
6481 	} else
6482 		r = sprintf(buf, "%lu\n", per_cpu_ptr(tr->array_buffer.data, cpu)->entries >> 10);
6483 
6484 	mutex_unlock(&trace_types_lock);
6485 
6486 	ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6487 	return ret;
6488 }
6489 
6490 static ssize_t
6491 tracing_entries_write(struct file *filp, const char __user *ubuf,
6492 		      size_t cnt, loff_t *ppos)
6493 {
6494 	struct inode *inode = file_inode(filp);
6495 	struct trace_array *tr = inode->i_private;
6496 	unsigned long val;
6497 	int ret;
6498 
6499 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
6500 	if (ret)
6501 		return ret;
6502 
6503 	/* must have at least 1 entry */
6504 	if (!val)
6505 		return -EINVAL;
6506 
6507 	/* value is in KB */
6508 	val <<= 10;
6509 	ret = tracing_resize_ring_buffer(tr, val, tracing_get_cpu(inode));
6510 	if (ret < 0)
6511 		return ret;
6512 
6513 	*ppos += cnt;
6514 
6515 	return cnt;
6516 }
6517 
6518 static ssize_t
6519 tracing_total_entries_read(struct file *filp, char __user *ubuf,
6520 				size_t cnt, loff_t *ppos)
6521 {
6522 	struct trace_array *tr = filp->private_data;
6523 	char buf[64];
6524 	int r, cpu;
6525 	unsigned long size = 0, expanded_size = 0;
6526 
6527 	mutex_lock(&trace_types_lock);
6528 	for_each_tracing_cpu(cpu) {
6529 		size += per_cpu_ptr(tr->array_buffer.data, cpu)->entries >> 10;
6530 		if (!ring_buffer_expanded)
6531 			expanded_size += trace_buf_size >> 10;
6532 	}
6533 	if (ring_buffer_expanded)
6534 		r = sprintf(buf, "%lu\n", size);
6535 	else
6536 		r = sprintf(buf, "%lu (expanded: %lu)\n", size, expanded_size);
6537 	mutex_unlock(&trace_types_lock);
6538 
6539 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6540 }
6541 
6542 static ssize_t
6543 tracing_free_buffer_write(struct file *filp, const char __user *ubuf,
6544 			  size_t cnt, loff_t *ppos)
6545 {
6546 	/*
6547 	 * There is no need to read what the user has written, this function
6548 	 * is just to make sure that there is no error when "echo" is used
6549 	 */
6550 
6551 	*ppos += cnt;
6552 
6553 	return cnt;
6554 }
6555 
6556 static int
6557 tracing_free_buffer_release(struct inode *inode, struct file *filp)
6558 {
6559 	struct trace_array *tr = inode->i_private;
6560 
6561 	/* disable tracing ? */
6562 	if (tr->trace_flags & TRACE_ITER_STOP_ON_FREE)
6563 		tracer_tracing_off(tr);
6564 	/* resize the ring buffer to 0 */
6565 	tracing_resize_ring_buffer(tr, 0, RING_BUFFER_ALL_CPUS);
6566 
6567 	trace_array_put(tr);
6568 
6569 	return 0;
6570 }
6571 
6572 static ssize_t
6573 tracing_mark_write(struct file *filp, const char __user *ubuf,
6574 					size_t cnt, loff_t *fpos)
6575 {
6576 	struct trace_array *tr = filp->private_data;
6577 	struct ring_buffer_event *event;
6578 	enum event_trigger_type tt = ETT_NONE;
6579 	struct trace_buffer *buffer;
6580 	struct print_entry *entry;
6581 	unsigned long irq_flags;
6582 	ssize_t written;
6583 	int size;
6584 	int len;
6585 
6586 /* Used in tracing_mark_raw_write() as well */
6587 #define FAULTED_STR "<faulted>"
6588 #define FAULTED_SIZE (sizeof(FAULTED_STR) - 1) /* '\0' is already accounted for */
6589 
6590 	if (tracing_disabled)
6591 		return -EINVAL;
6592 
6593 	if (!(tr->trace_flags & TRACE_ITER_MARKERS))
6594 		return -EINVAL;
6595 
6596 	if (cnt > TRACE_BUF_SIZE)
6597 		cnt = TRACE_BUF_SIZE;
6598 
6599 	BUILD_BUG_ON(TRACE_BUF_SIZE >= PAGE_SIZE);
6600 
6601 	local_save_flags(irq_flags);
6602 	size = sizeof(*entry) + cnt + 2; /* add '\0' and possible '\n' */
6603 
6604 	/* If less than "<faulted>", then make sure we can still add that */
6605 	if (cnt < FAULTED_SIZE)
6606 		size += FAULTED_SIZE - cnt;
6607 
6608 	buffer = tr->array_buffer.buffer;
6609 	event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
6610 					    irq_flags, preempt_count());
6611 	if (unlikely(!event))
6612 		/* Ring buffer disabled, return as if not open for write */
6613 		return -EBADF;
6614 
6615 	entry = ring_buffer_event_data(event);
6616 	entry->ip = _THIS_IP_;
6617 
6618 	len = __copy_from_user_inatomic(&entry->buf, ubuf, cnt);
6619 	if (len) {
6620 		memcpy(&entry->buf, FAULTED_STR, FAULTED_SIZE);
6621 		cnt = FAULTED_SIZE;
6622 		written = -EFAULT;
6623 	} else
6624 		written = cnt;
6625 	len = cnt;
6626 
6627 	if (tr->trace_marker_file && !list_empty(&tr->trace_marker_file->triggers)) {
6628 		/* do not add \n before testing triggers, but add \0 */
6629 		entry->buf[cnt] = '\0';
6630 		tt = event_triggers_call(tr->trace_marker_file, entry, event);
6631 	}
6632 
6633 	if (entry->buf[cnt - 1] != '\n') {
6634 		entry->buf[cnt] = '\n';
6635 		entry->buf[cnt + 1] = '\0';
6636 	} else
6637 		entry->buf[cnt] = '\0';
6638 
6639 	__buffer_unlock_commit(buffer, event);
6640 
6641 	if (tt)
6642 		event_triggers_post_call(tr->trace_marker_file, tt);
6643 
6644 	if (written > 0)
6645 		*fpos += written;
6646 
6647 	return written;
6648 }
6649 
6650 /* Limit it for now to 3K (including tag) */
6651 #define RAW_DATA_MAX_SIZE (1024*3)
6652 
6653 static ssize_t
6654 tracing_mark_raw_write(struct file *filp, const char __user *ubuf,
6655 					size_t cnt, loff_t *fpos)
6656 {
6657 	struct trace_array *tr = filp->private_data;
6658 	struct ring_buffer_event *event;
6659 	struct trace_buffer *buffer;
6660 	struct raw_data_entry *entry;
6661 	unsigned long irq_flags;
6662 	ssize_t written;
6663 	int size;
6664 	int len;
6665 
6666 #define FAULT_SIZE_ID (FAULTED_SIZE + sizeof(int))
6667 
6668 	if (tracing_disabled)
6669 		return -EINVAL;
6670 
6671 	if (!(tr->trace_flags & TRACE_ITER_MARKERS))
6672 		return -EINVAL;
6673 
6674 	/* The marker must at least have a tag id */
6675 	if (cnt < sizeof(unsigned int) || cnt > RAW_DATA_MAX_SIZE)
6676 		return -EINVAL;
6677 
6678 	if (cnt > TRACE_BUF_SIZE)
6679 		cnt = TRACE_BUF_SIZE;
6680 
6681 	BUILD_BUG_ON(TRACE_BUF_SIZE >= PAGE_SIZE);
6682 
6683 	local_save_flags(irq_flags);
6684 	size = sizeof(*entry) + cnt;
6685 	if (cnt < FAULT_SIZE_ID)
6686 		size += FAULT_SIZE_ID - cnt;
6687 
6688 	buffer = tr->array_buffer.buffer;
6689 	event = __trace_buffer_lock_reserve(buffer, TRACE_RAW_DATA, size,
6690 					    irq_flags, preempt_count());
6691 	if (!event)
6692 		/* Ring buffer disabled, return as if not open for write */
6693 		return -EBADF;
6694 
6695 	entry = ring_buffer_event_data(event);
6696 
6697 	len = __copy_from_user_inatomic(&entry->id, ubuf, cnt);
6698 	if (len) {
6699 		entry->id = -1;
6700 		memcpy(&entry->buf, FAULTED_STR, FAULTED_SIZE);
6701 		written = -EFAULT;
6702 	} else
6703 		written = cnt;
6704 
6705 	__buffer_unlock_commit(buffer, event);
6706 
6707 	if (written > 0)
6708 		*fpos += written;
6709 
6710 	return written;
6711 }
6712 
6713 static int tracing_clock_show(struct seq_file *m, void *v)
6714 {
6715 	struct trace_array *tr = m->private;
6716 	int i;
6717 
6718 	for (i = 0; i < ARRAY_SIZE(trace_clocks); i++)
6719 		seq_printf(m,
6720 			"%s%s%s%s", i ? " " : "",
6721 			i == tr->clock_id ? "[" : "", trace_clocks[i].name,
6722 			i == tr->clock_id ? "]" : "");
6723 	seq_putc(m, '\n');
6724 
6725 	return 0;
6726 }
6727 
6728 int tracing_set_clock(struct trace_array *tr, const char *clockstr)
6729 {
6730 	int i;
6731 
6732 	for (i = 0; i < ARRAY_SIZE(trace_clocks); i++) {
6733 		if (strcmp(trace_clocks[i].name, clockstr) == 0)
6734 			break;
6735 	}
6736 	if (i == ARRAY_SIZE(trace_clocks))
6737 		return -EINVAL;
6738 
6739 	mutex_lock(&trace_types_lock);
6740 
6741 	tr->clock_id = i;
6742 
6743 	ring_buffer_set_clock(tr->array_buffer.buffer, trace_clocks[i].func);
6744 
6745 	/*
6746 	 * New clock may not be consistent with the previous clock.
6747 	 * Reset the buffer so that it doesn't have incomparable timestamps.
6748 	 */
6749 	tracing_reset_online_cpus(&tr->array_buffer);
6750 
6751 #ifdef CONFIG_TRACER_MAX_TRACE
6752 	if (tr->max_buffer.buffer)
6753 		ring_buffer_set_clock(tr->max_buffer.buffer, trace_clocks[i].func);
6754 	tracing_reset_online_cpus(&tr->max_buffer);
6755 #endif
6756 
6757 	mutex_unlock(&trace_types_lock);
6758 
6759 	return 0;
6760 }
6761 
6762 static ssize_t tracing_clock_write(struct file *filp, const char __user *ubuf,
6763 				   size_t cnt, loff_t *fpos)
6764 {
6765 	struct seq_file *m = filp->private_data;
6766 	struct trace_array *tr = m->private;
6767 	char buf[64];
6768 	const char *clockstr;
6769 	int ret;
6770 
6771 	if (cnt >= sizeof(buf))
6772 		return -EINVAL;
6773 
6774 	if (copy_from_user(buf, ubuf, cnt))
6775 		return -EFAULT;
6776 
6777 	buf[cnt] = 0;
6778 
6779 	clockstr = strstrip(buf);
6780 
6781 	ret = tracing_set_clock(tr, clockstr);
6782 	if (ret)
6783 		return ret;
6784 
6785 	*fpos += cnt;
6786 
6787 	return cnt;
6788 }
6789 
6790 static int tracing_clock_open(struct inode *inode, struct file *file)
6791 {
6792 	struct trace_array *tr = inode->i_private;
6793 	int ret;
6794 
6795 	ret = tracing_check_open_get_tr(tr);
6796 	if (ret)
6797 		return ret;
6798 
6799 	ret = single_open(file, tracing_clock_show, inode->i_private);
6800 	if (ret < 0)
6801 		trace_array_put(tr);
6802 
6803 	return ret;
6804 }
6805 
6806 static int tracing_time_stamp_mode_show(struct seq_file *m, void *v)
6807 {
6808 	struct trace_array *tr = m->private;
6809 
6810 	mutex_lock(&trace_types_lock);
6811 
6812 	if (ring_buffer_time_stamp_abs(tr->array_buffer.buffer))
6813 		seq_puts(m, "delta [absolute]\n");
6814 	else
6815 		seq_puts(m, "[delta] absolute\n");
6816 
6817 	mutex_unlock(&trace_types_lock);
6818 
6819 	return 0;
6820 }
6821 
6822 static int tracing_time_stamp_mode_open(struct inode *inode, struct file *file)
6823 {
6824 	struct trace_array *tr = inode->i_private;
6825 	int ret;
6826 
6827 	ret = tracing_check_open_get_tr(tr);
6828 	if (ret)
6829 		return ret;
6830 
6831 	ret = single_open(file, tracing_time_stamp_mode_show, inode->i_private);
6832 	if (ret < 0)
6833 		trace_array_put(tr);
6834 
6835 	return ret;
6836 }
6837 
6838 int tracing_set_time_stamp_abs(struct trace_array *tr, bool abs)
6839 {
6840 	int ret = 0;
6841 
6842 	mutex_lock(&trace_types_lock);
6843 
6844 	if (abs && tr->time_stamp_abs_ref++)
6845 		goto out;
6846 
6847 	if (!abs) {
6848 		if (WARN_ON_ONCE(!tr->time_stamp_abs_ref)) {
6849 			ret = -EINVAL;
6850 			goto out;
6851 		}
6852 
6853 		if (--tr->time_stamp_abs_ref)
6854 			goto out;
6855 	}
6856 
6857 	ring_buffer_set_time_stamp_abs(tr->array_buffer.buffer, abs);
6858 
6859 #ifdef CONFIG_TRACER_MAX_TRACE
6860 	if (tr->max_buffer.buffer)
6861 		ring_buffer_set_time_stamp_abs(tr->max_buffer.buffer, abs);
6862 #endif
6863  out:
6864 	mutex_unlock(&trace_types_lock);
6865 
6866 	return ret;
6867 }
6868 
6869 struct ftrace_buffer_info {
6870 	struct trace_iterator	iter;
6871 	void			*spare;
6872 	unsigned int		spare_cpu;
6873 	unsigned int		read;
6874 };
6875 
6876 #ifdef CONFIG_TRACER_SNAPSHOT
6877 static int tracing_snapshot_open(struct inode *inode, struct file *file)
6878 {
6879 	struct trace_array *tr = inode->i_private;
6880 	struct trace_iterator *iter;
6881 	struct seq_file *m;
6882 	int ret;
6883 
6884 	ret = tracing_check_open_get_tr(tr);
6885 	if (ret)
6886 		return ret;
6887 
6888 	if (file->f_mode & FMODE_READ) {
6889 		iter = __tracing_open(inode, file, true);
6890 		if (IS_ERR(iter))
6891 			ret = PTR_ERR(iter);
6892 	} else {
6893 		/* Writes still need the seq_file to hold the private data */
6894 		ret = -ENOMEM;
6895 		m = kzalloc(sizeof(*m), GFP_KERNEL);
6896 		if (!m)
6897 			goto out;
6898 		iter = kzalloc(sizeof(*iter), GFP_KERNEL);
6899 		if (!iter) {
6900 			kfree(m);
6901 			goto out;
6902 		}
6903 		ret = 0;
6904 
6905 		iter->tr = tr;
6906 		iter->array_buffer = &tr->max_buffer;
6907 		iter->cpu_file = tracing_get_cpu(inode);
6908 		m->private = iter;
6909 		file->private_data = m;
6910 	}
6911 out:
6912 	if (ret < 0)
6913 		trace_array_put(tr);
6914 
6915 	return ret;
6916 }
6917 
6918 static ssize_t
6919 tracing_snapshot_write(struct file *filp, const char __user *ubuf, size_t cnt,
6920 		       loff_t *ppos)
6921 {
6922 	struct seq_file *m = filp->private_data;
6923 	struct trace_iterator *iter = m->private;
6924 	struct trace_array *tr = iter->tr;
6925 	unsigned long val;
6926 	int ret;
6927 
6928 	ret = tracing_update_buffers();
6929 	if (ret < 0)
6930 		return ret;
6931 
6932 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
6933 	if (ret)
6934 		return ret;
6935 
6936 	mutex_lock(&trace_types_lock);
6937 
6938 	if (tr->current_trace->use_max_tr) {
6939 		ret = -EBUSY;
6940 		goto out;
6941 	}
6942 
6943 	arch_spin_lock(&tr->max_lock);
6944 	if (tr->cond_snapshot)
6945 		ret = -EBUSY;
6946 	arch_spin_unlock(&tr->max_lock);
6947 	if (ret)
6948 		goto out;
6949 
6950 	switch (val) {
6951 	case 0:
6952 		if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
6953 			ret = -EINVAL;
6954 			break;
6955 		}
6956 		if (tr->allocated_snapshot)
6957 			free_snapshot(tr);
6958 		break;
6959 	case 1:
6960 /* Only allow per-cpu swap if the ring buffer supports it */
6961 #ifndef CONFIG_RING_BUFFER_ALLOW_SWAP
6962 		if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
6963 			ret = -EINVAL;
6964 			break;
6965 		}
6966 #endif
6967 		if (tr->allocated_snapshot)
6968 			ret = resize_buffer_duplicate_size(&tr->max_buffer,
6969 					&tr->array_buffer, iter->cpu_file);
6970 		else
6971 			ret = tracing_alloc_snapshot_instance(tr);
6972 		if (ret < 0)
6973 			break;
6974 		local_irq_disable();
6975 		/* Now, we're going to swap */
6976 		if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
6977 			update_max_tr(tr, current, smp_processor_id(), NULL);
6978 		else
6979 			update_max_tr_single(tr, current, iter->cpu_file);
6980 		local_irq_enable();
6981 		break;
6982 	default:
6983 		if (tr->allocated_snapshot) {
6984 			if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
6985 				tracing_reset_online_cpus(&tr->max_buffer);
6986 			else
6987 				tracing_reset_cpu(&tr->max_buffer, iter->cpu_file);
6988 		}
6989 		break;
6990 	}
6991 
6992 	if (ret >= 0) {
6993 		*ppos += cnt;
6994 		ret = cnt;
6995 	}
6996 out:
6997 	mutex_unlock(&trace_types_lock);
6998 	return ret;
6999 }
7000 
7001 static int tracing_snapshot_release(struct inode *inode, struct file *file)
7002 {
7003 	struct seq_file *m = file->private_data;
7004 	int ret;
7005 
7006 	ret = tracing_release(inode, file);
7007 
7008 	if (file->f_mode & FMODE_READ)
7009 		return ret;
7010 
7011 	/* If write only, the seq_file is just a stub */
7012 	if (m)
7013 		kfree(m->private);
7014 	kfree(m);
7015 
7016 	return 0;
7017 }
7018 
7019 static int tracing_buffers_open(struct inode *inode, struct file *filp);
7020 static ssize_t tracing_buffers_read(struct file *filp, char __user *ubuf,
7021 				    size_t count, loff_t *ppos);
7022 static int tracing_buffers_release(struct inode *inode, struct file *file);
7023 static ssize_t tracing_buffers_splice_read(struct file *file, loff_t *ppos,
7024 		   struct pipe_inode_info *pipe, size_t len, unsigned int flags);
7025 
7026 static int snapshot_raw_open(struct inode *inode, struct file *filp)
7027 {
7028 	struct ftrace_buffer_info *info;
7029 	int ret;
7030 
7031 	/* The following checks for tracefs lockdown */
7032 	ret = tracing_buffers_open(inode, filp);
7033 	if (ret < 0)
7034 		return ret;
7035 
7036 	info = filp->private_data;
7037 
7038 	if (info->iter.trace->use_max_tr) {
7039 		tracing_buffers_release(inode, filp);
7040 		return -EBUSY;
7041 	}
7042 
7043 	info->iter.snapshot = true;
7044 	info->iter.array_buffer = &info->iter.tr->max_buffer;
7045 
7046 	return ret;
7047 }
7048 
7049 #endif /* CONFIG_TRACER_SNAPSHOT */
7050 
7051 
7052 static const struct file_operations tracing_thresh_fops = {
7053 	.open		= tracing_open_generic,
7054 	.read		= tracing_thresh_read,
7055 	.write		= tracing_thresh_write,
7056 	.llseek		= generic_file_llseek,
7057 };
7058 
7059 #if defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)
7060 static const struct file_operations tracing_max_lat_fops = {
7061 	.open		= tracing_open_generic,
7062 	.read		= tracing_max_lat_read,
7063 	.write		= tracing_max_lat_write,
7064 	.llseek		= generic_file_llseek,
7065 };
7066 #endif
7067 
7068 static const struct file_operations set_tracer_fops = {
7069 	.open		= tracing_open_generic,
7070 	.read		= tracing_set_trace_read,
7071 	.write		= tracing_set_trace_write,
7072 	.llseek		= generic_file_llseek,
7073 };
7074 
7075 static const struct file_operations tracing_pipe_fops = {
7076 	.open		= tracing_open_pipe,
7077 	.poll		= tracing_poll_pipe,
7078 	.read		= tracing_read_pipe,
7079 	.splice_read	= tracing_splice_read_pipe,
7080 	.release	= tracing_release_pipe,
7081 	.llseek		= no_llseek,
7082 };
7083 
7084 static const struct file_operations tracing_entries_fops = {
7085 	.open		= tracing_open_generic_tr,
7086 	.read		= tracing_entries_read,
7087 	.write		= tracing_entries_write,
7088 	.llseek		= generic_file_llseek,
7089 	.release	= tracing_release_generic_tr,
7090 };
7091 
7092 static const struct file_operations tracing_total_entries_fops = {
7093 	.open		= tracing_open_generic_tr,
7094 	.read		= tracing_total_entries_read,
7095 	.llseek		= generic_file_llseek,
7096 	.release	= tracing_release_generic_tr,
7097 };
7098 
7099 static const struct file_operations tracing_free_buffer_fops = {
7100 	.open		= tracing_open_generic_tr,
7101 	.write		= tracing_free_buffer_write,
7102 	.release	= tracing_free_buffer_release,
7103 };
7104 
7105 static const struct file_operations tracing_mark_fops = {
7106 	.open		= tracing_open_generic_tr,
7107 	.write		= tracing_mark_write,
7108 	.llseek		= generic_file_llseek,
7109 	.release	= tracing_release_generic_tr,
7110 };
7111 
7112 static const struct file_operations tracing_mark_raw_fops = {
7113 	.open		= tracing_open_generic_tr,
7114 	.write		= tracing_mark_raw_write,
7115 	.llseek		= generic_file_llseek,
7116 	.release	= tracing_release_generic_tr,
7117 };
7118 
7119 static const struct file_operations trace_clock_fops = {
7120 	.open		= tracing_clock_open,
7121 	.read		= seq_read,
7122 	.llseek		= seq_lseek,
7123 	.release	= tracing_single_release_tr,
7124 	.write		= tracing_clock_write,
7125 };
7126 
7127 static const struct file_operations trace_time_stamp_mode_fops = {
7128 	.open		= tracing_time_stamp_mode_open,
7129 	.read		= seq_read,
7130 	.llseek		= seq_lseek,
7131 	.release	= tracing_single_release_tr,
7132 };
7133 
7134 #ifdef CONFIG_TRACER_SNAPSHOT
7135 static const struct file_operations snapshot_fops = {
7136 	.open		= tracing_snapshot_open,
7137 	.read		= seq_read,
7138 	.write		= tracing_snapshot_write,
7139 	.llseek		= tracing_lseek,
7140 	.release	= tracing_snapshot_release,
7141 };
7142 
7143 static const struct file_operations snapshot_raw_fops = {
7144 	.open		= snapshot_raw_open,
7145 	.read		= tracing_buffers_read,
7146 	.release	= tracing_buffers_release,
7147 	.splice_read	= tracing_buffers_splice_read,
7148 	.llseek		= no_llseek,
7149 };
7150 
7151 #endif /* CONFIG_TRACER_SNAPSHOT */
7152 
7153 #define TRACING_LOG_ERRS_MAX	8
7154 #define TRACING_LOG_LOC_MAX	128
7155 
7156 #define CMD_PREFIX "  Command: "
7157 
7158 struct err_info {
7159 	const char	**errs;	/* ptr to loc-specific array of err strings */
7160 	u8		type;	/* index into errs -> specific err string */
7161 	u8		pos;	/* MAX_FILTER_STR_VAL = 256 */
7162 	u64		ts;
7163 };
7164 
7165 struct tracing_log_err {
7166 	struct list_head	list;
7167 	struct err_info		info;
7168 	char			loc[TRACING_LOG_LOC_MAX]; /* err location */
7169 	char			cmd[MAX_FILTER_STR_VAL]; /* what caused err */
7170 };
7171 
7172 static DEFINE_MUTEX(tracing_err_log_lock);
7173 
7174 static struct tracing_log_err *get_tracing_log_err(struct trace_array *tr)
7175 {
7176 	struct tracing_log_err *err;
7177 
7178 	if (tr->n_err_log_entries < TRACING_LOG_ERRS_MAX) {
7179 		err = kzalloc(sizeof(*err), GFP_KERNEL);
7180 		if (!err)
7181 			err = ERR_PTR(-ENOMEM);
7182 		tr->n_err_log_entries++;
7183 
7184 		return err;
7185 	}
7186 
7187 	err = list_first_entry(&tr->err_log, struct tracing_log_err, list);
7188 	list_del(&err->list);
7189 
7190 	return err;
7191 }
7192 
7193 /**
7194  * err_pos - find the position of a string within a command for error careting
7195  * @cmd: The tracing command that caused the error
7196  * @str: The string to position the caret at within @cmd
7197  *
7198  * Finds the position of the first occurence of @str within @cmd.  The
7199  * return value can be passed to tracing_log_err() for caret placement
7200  * within @cmd.
7201  *
7202  * Returns the index within @cmd of the first occurence of @str or 0
7203  * if @str was not found.
7204  */
7205 unsigned int err_pos(char *cmd, const char *str)
7206 {
7207 	char *found;
7208 
7209 	if (WARN_ON(!strlen(cmd)))
7210 		return 0;
7211 
7212 	found = strstr(cmd, str);
7213 	if (found)
7214 		return found - cmd;
7215 
7216 	return 0;
7217 }
7218 
7219 /**
7220  * tracing_log_err - write an error to the tracing error log
7221  * @tr: The associated trace array for the error (NULL for top level array)
7222  * @loc: A string describing where the error occurred
7223  * @cmd: The tracing command that caused the error
7224  * @errs: The array of loc-specific static error strings
7225  * @type: The index into errs[], which produces the specific static err string
7226  * @pos: The position the caret should be placed in the cmd
7227  *
7228  * Writes an error into tracing/error_log of the form:
7229  *
7230  * <loc>: error: <text>
7231  *   Command: <cmd>
7232  *              ^
7233  *
7234  * tracing/error_log is a small log file containing the last
7235  * TRACING_LOG_ERRS_MAX errors (8).  Memory for errors isn't allocated
7236  * unless there has been a tracing error, and the error log can be
7237  * cleared and have its memory freed by writing the empty string in
7238  * truncation mode to it i.e. echo > tracing/error_log.
7239  *
7240  * NOTE: the @errs array along with the @type param are used to
7241  * produce a static error string - this string is not copied and saved
7242  * when the error is logged - only a pointer to it is saved.  See
7243  * existing callers for examples of how static strings are typically
7244  * defined for use with tracing_log_err().
7245  */
7246 void tracing_log_err(struct trace_array *tr,
7247 		     const char *loc, const char *cmd,
7248 		     const char **errs, u8 type, u8 pos)
7249 {
7250 	struct tracing_log_err *err;
7251 
7252 	if (!tr)
7253 		tr = &global_trace;
7254 
7255 	mutex_lock(&tracing_err_log_lock);
7256 	err = get_tracing_log_err(tr);
7257 	if (PTR_ERR(err) == -ENOMEM) {
7258 		mutex_unlock(&tracing_err_log_lock);
7259 		return;
7260 	}
7261 
7262 	snprintf(err->loc, TRACING_LOG_LOC_MAX, "%s: error: ", loc);
7263 	snprintf(err->cmd, MAX_FILTER_STR_VAL,"\n" CMD_PREFIX "%s\n", cmd);
7264 
7265 	err->info.errs = errs;
7266 	err->info.type = type;
7267 	err->info.pos = pos;
7268 	err->info.ts = local_clock();
7269 
7270 	list_add_tail(&err->list, &tr->err_log);
7271 	mutex_unlock(&tracing_err_log_lock);
7272 }
7273 
7274 static void clear_tracing_err_log(struct trace_array *tr)
7275 {
7276 	struct tracing_log_err *err, *next;
7277 
7278 	mutex_lock(&tracing_err_log_lock);
7279 	list_for_each_entry_safe(err, next, &tr->err_log, list) {
7280 		list_del(&err->list);
7281 		kfree(err);
7282 	}
7283 
7284 	tr->n_err_log_entries = 0;
7285 	mutex_unlock(&tracing_err_log_lock);
7286 }
7287 
7288 static void *tracing_err_log_seq_start(struct seq_file *m, loff_t *pos)
7289 {
7290 	struct trace_array *tr = m->private;
7291 
7292 	mutex_lock(&tracing_err_log_lock);
7293 
7294 	return seq_list_start(&tr->err_log, *pos);
7295 }
7296 
7297 static void *tracing_err_log_seq_next(struct seq_file *m, void *v, loff_t *pos)
7298 {
7299 	struct trace_array *tr = m->private;
7300 
7301 	return seq_list_next(v, &tr->err_log, pos);
7302 }
7303 
7304 static void tracing_err_log_seq_stop(struct seq_file *m, void *v)
7305 {
7306 	mutex_unlock(&tracing_err_log_lock);
7307 }
7308 
7309 static void tracing_err_log_show_pos(struct seq_file *m, u8 pos)
7310 {
7311 	u8 i;
7312 
7313 	for (i = 0; i < sizeof(CMD_PREFIX) - 1; i++)
7314 		seq_putc(m, ' ');
7315 	for (i = 0; i < pos; i++)
7316 		seq_putc(m, ' ');
7317 	seq_puts(m, "^\n");
7318 }
7319 
7320 static int tracing_err_log_seq_show(struct seq_file *m, void *v)
7321 {
7322 	struct tracing_log_err *err = v;
7323 
7324 	if (err) {
7325 		const char *err_text = err->info.errs[err->info.type];
7326 		u64 sec = err->info.ts;
7327 		u32 nsec;
7328 
7329 		nsec = do_div(sec, NSEC_PER_SEC);
7330 		seq_printf(m, "[%5llu.%06u] %s%s", sec, nsec / 1000,
7331 			   err->loc, err_text);
7332 		seq_printf(m, "%s", err->cmd);
7333 		tracing_err_log_show_pos(m, err->info.pos);
7334 	}
7335 
7336 	return 0;
7337 }
7338 
7339 static const struct seq_operations tracing_err_log_seq_ops = {
7340 	.start  = tracing_err_log_seq_start,
7341 	.next   = tracing_err_log_seq_next,
7342 	.stop   = tracing_err_log_seq_stop,
7343 	.show   = tracing_err_log_seq_show
7344 };
7345 
7346 static int tracing_err_log_open(struct inode *inode, struct file *file)
7347 {
7348 	struct trace_array *tr = inode->i_private;
7349 	int ret = 0;
7350 
7351 	ret = tracing_check_open_get_tr(tr);
7352 	if (ret)
7353 		return ret;
7354 
7355 	/* If this file was opened for write, then erase contents */
7356 	if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC))
7357 		clear_tracing_err_log(tr);
7358 
7359 	if (file->f_mode & FMODE_READ) {
7360 		ret = seq_open(file, &tracing_err_log_seq_ops);
7361 		if (!ret) {
7362 			struct seq_file *m = file->private_data;
7363 			m->private = tr;
7364 		} else {
7365 			trace_array_put(tr);
7366 		}
7367 	}
7368 	return ret;
7369 }
7370 
7371 static ssize_t tracing_err_log_write(struct file *file,
7372 				     const char __user *buffer,
7373 				     size_t count, loff_t *ppos)
7374 {
7375 	return count;
7376 }
7377 
7378 static int tracing_err_log_release(struct inode *inode, struct file *file)
7379 {
7380 	struct trace_array *tr = inode->i_private;
7381 
7382 	trace_array_put(tr);
7383 
7384 	if (file->f_mode & FMODE_READ)
7385 		seq_release(inode, file);
7386 
7387 	return 0;
7388 }
7389 
7390 static const struct file_operations tracing_err_log_fops = {
7391 	.open           = tracing_err_log_open,
7392 	.write		= tracing_err_log_write,
7393 	.read           = seq_read,
7394 	.llseek         = seq_lseek,
7395 	.release        = tracing_err_log_release,
7396 };
7397 
7398 static int tracing_buffers_open(struct inode *inode, struct file *filp)
7399 {
7400 	struct trace_array *tr = inode->i_private;
7401 	struct ftrace_buffer_info *info;
7402 	int ret;
7403 
7404 	ret = tracing_check_open_get_tr(tr);
7405 	if (ret)
7406 		return ret;
7407 
7408 	info = kzalloc(sizeof(*info), GFP_KERNEL);
7409 	if (!info) {
7410 		trace_array_put(tr);
7411 		return -ENOMEM;
7412 	}
7413 
7414 	mutex_lock(&trace_types_lock);
7415 
7416 	info->iter.tr		= tr;
7417 	info->iter.cpu_file	= tracing_get_cpu(inode);
7418 	info->iter.trace	= tr->current_trace;
7419 	info->iter.array_buffer = &tr->array_buffer;
7420 	info->spare		= NULL;
7421 	/* Force reading ring buffer for first read */
7422 	info->read		= (unsigned int)-1;
7423 
7424 	filp->private_data = info;
7425 
7426 	tr->current_trace->ref++;
7427 
7428 	mutex_unlock(&trace_types_lock);
7429 
7430 	ret = nonseekable_open(inode, filp);
7431 	if (ret < 0)
7432 		trace_array_put(tr);
7433 
7434 	return ret;
7435 }
7436 
7437 static __poll_t
7438 tracing_buffers_poll(struct file *filp, poll_table *poll_table)
7439 {
7440 	struct ftrace_buffer_info *info = filp->private_data;
7441 	struct trace_iterator *iter = &info->iter;
7442 
7443 	return trace_poll(iter, filp, poll_table);
7444 }
7445 
7446 static ssize_t
7447 tracing_buffers_read(struct file *filp, char __user *ubuf,
7448 		     size_t count, loff_t *ppos)
7449 {
7450 	struct ftrace_buffer_info *info = filp->private_data;
7451 	struct trace_iterator *iter = &info->iter;
7452 	ssize_t ret = 0;
7453 	ssize_t size;
7454 
7455 	if (!count)
7456 		return 0;
7457 
7458 #ifdef CONFIG_TRACER_MAX_TRACE
7459 	if (iter->snapshot && iter->tr->current_trace->use_max_tr)
7460 		return -EBUSY;
7461 #endif
7462 
7463 	if (!info->spare) {
7464 		info->spare = ring_buffer_alloc_read_page(iter->array_buffer->buffer,
7465 							  iter->cpu_file);
7466 		if (IS_ERR(info->spare)) {
7467 			ret = PTR_ERR(info->spare);
7468 			info->spare = NULL;
7469 		} else {
7470 			info->spare_cpu = iter->cpu_file;
7471 		}
7472 	}
7473 	if (!info->spare)
7474 		return ret;
7475 
7476 	/* Do we have previous read data to read? */
7477 	if (info->read < PAGE_SIZE)
7478 		goto read;
7479 
7480  again:
7481 	trace_access_lock(iter->cpu_file);
7482 	ret = ring_buffer_read_page(iter->array_buffer->buffer,
7483 				    &info->spare,
7484 				    count,
7485 				    iter->cpu_file, 0);
7486 	trace_access_unlock(iter->cpu_file);
7487 
7488 	if (ret < 0) {
7489 		if (trace_empty(iter)) {
7490 			if ((filp->f_flags & O_NONBLOCK))
7491 				return -EAGAIN;
7492 
7493 			ret = wait_on_pipe(iter, 0);
7494 			if (ret)
7495 				return ret;
7496 
7497 			goto again;
7498 		}
7499 		return 0;
7500 	}
7501 
7502 	info->read = 0;
7503  read:
7504 	size = PAGE_SIZE - info->read;
7505 	if (size > count)
7506 		size = count;
7507 
7508 	ret = copy_to_user(ubuf, info->spare + info->read, size);
7509 	if (ret == size)
7510 		return -EFAULT;
7511 
7512 	size -= ret;
7513 
7514 	*ppos += size;
7515 	info->read += size;
7516 
7517 	return size;
7518 }
7519 
7520 static int tracing_buffers_release(struct inode *inode, struct file *file)
7521 {
7522 	struct ftrace_buffer_info *info = file->private_data;
7523 	struct trace_iterator *iter = &info->iter;
7524 
7525 	mutex_lock(&trace_types_lock);
7526 
7527 	iter->tr->current_trace->ref--;
7528 
7529 	__trace_array_put(iter->tr);
7530 
7531 	if (info->spare)
7532 		ring_buffer_free_read_page(iter->array_buffer->buffer,
7533 					   info->spare_cpu, info->spare);
7534 	kfree(info);
7535 
7536 	mutex_unlock(&trace_types_lock);
7537 
7538 	return 0;
7539 }
7540 
7541 struct buffer_ref {
7542 	struct trace_buffer	*buffer;
7543 	void			*page;
7544 	int			cpu;
7545 	refcount_t		refcount;
7546 };
7547 
7548 static void buffer_ref_release(struct buffer_ref *ref)
7549 {
7550 	if (!refcount_dec_and_test(&ref->refcount))
7551 		return;
7552 	ring_buffer_free_read_page(ref->buffer, ref->cpu, ref->page);
7553 	kfree(ref);
7554 }
7555 
7556 static void buffer_pipe_buf_release(struct pipe_inode_info *pipe,
7557 				    struct pipe_buffer *buf)
7558 {
7559 	struct buffer_ref *ref = (struct buffer_ref *)buf->private;
7560 
7561 	buffer_ref_release(ref);
7562 	buf->private = 0;
7563 }
7564 
7565 static bool buffer_pipe_buf_get(struct pipe_inode_info *pipe,
7566 				struct pipe_buffer *buf)
7567 {
7568 	struct buffer_ref *ref = (struct buffer_ref *)buf->private;
7569 
7570 	if (refcount_read(&ref->refcount) > INT_MAX/2)
7571 		return false;
7572 
7573 	refcount_inc(&ref->refcount);
7574 	return true;
7575 }
7576 
7577 /* Pipe buffer operations for a buffer. */
7578 static const struct pipe_buf_operations buffer_pipe_buf_ops = {
7579 	.release		= buffer_pipe_buf_release,
7580 	.get			= buffer_pipe_buf_get,
7581 };
7582 
7583 /*
7584  * Callback from splice_to_pipe(), if we need to release some pages
7585  * at the end of the spd in case we error'ed out in filling the pipe.
7586  */
7587 static void buffer_spd_release(struct splice_pipe_desc *spd, unsigned int i)
7588 {
7589 	struct buffer_ref *ref =
7590 		(struct buffer_ref *)spd->partial[i].private;
7591 
7592 	buffer_ref_release(ref);
7593 	spd->partial[i].private = 0;
7594 }
7595 
7596 static ssize_t
7597 tracing_buffers_splice_read(struct file *file, loff_t *ppos,
7598 			    struct pipe_inode_info *pipe, size_t len,
7599 			    unsigned int flags)
7600 {
7601 	struct ftrace_buffer_info *info = file->private_data;
7602 	struct trace_iterator *iter = &info->iter;
7603 	struct partial_page partial_def[PIPE_DEF_BUFFERS];
7604 	struct page *pages_def[PIPE_DEF_BUFFERS];
7605 	struct splice_pipe_desc spd = {
7606 		.pages		= pages_def,
7607 		.partial	= partial_def,
7608 		.nr_pages_max	= PIPE_DEF_BUFFERS,
7609 		.ops		= &buffer_pipe_buf_ops,
7610 		.spd_release	= buffer_spd_release,
7611 	};
7612 	struct buffer_ref *ref;
7613 	int entries, i;
7614 	ssize_t ret = 0;
7615 
7616 #ifdef CONFIG_TRACER_MAX_TRACE
7617 	if (iter->snapshot && iter->tr->current_trace->use_max_tr)
7618 		return -EBUSY;
7619 #endif
7620 
7621 	if (*ppos & (PAGE_SIZE - 1))
7622 		return -EINVAL;
7623 
7624 	if (len & (PAGE_SIZE - 1)) {
7625 		if (len < PAGE_SIZE)
7626 			return -EINVAL;
7627 		len &= PAGE_MASK;
7628 	}
7629 
7630 	if (splice_grow_spd(pipe, &spd))
7631 		return -ENOMEM;
7632 
7633  again:
7634 	trace_access_lock(iter->cpu_file);
7635 	entries = ring_buffer_entries_cpu(iter->array_buffer->buffer, iter->cpu_file);
7636 
7637 	for (i = 0; i < spd.nr_pages_max && len && entries; i++, len -= PAGE_SIZE) {
7638 		struct page *page;
7639 		int r;
7640 
7641 		ref = kzalloc(sizeof(*ref), GFP_KERNEL);
7642 		if (!ref) {
7643 			ret = -ENOMEM;
7644 			break;
7645 		}
7646 
7647 		refcount_set(&ref->refcount, 1);
7648 		ref->buffer = iter->array_buffer->buffer;
7649 		ref->page = ring_buffer_alloc_read_page(ref->buffer, iter->cpu_file);
7650 		if (IS_ERR(ref->page)) {
7651 			ret = PTR_ERR(ref->page);
7652 			ref->page = NULL;
7653 			kfree(ref);
7654 			break;
7655 		}
7656 		ref->cpu = iter->cpu_file;
7657 
7658 		r = ring_buffer_read_page(ref->buffer, &ref->page,
7659 					  len, iter->cpu_file, 1);
7660 		if (r < 0) {
7661 			ring_buffer_free_read_page(ref->buffer, ref->cpu,
7662 						   ref->page);
7663 			kfree(ref);
7664 			break;
7665 		}
7666 
7667 		page = virt_to_page(ref->page);
7668 
7669 		spd.pages[i] = page;
7670 		spd.partial[i].len = PAGE_SIZE;
7671 		spd.partial[i].offset = 0;
7672 		spd.partial[i].private = (unsigned long)ref;
7673 		spd.nr_pages++;
7674 		*ppos += PAGE_SIZE;
7675 
7676 		entries = ring_buffer_entries_cpu(iter->array_buffer->buffer, iter->cpu_file);
7677 	}
7678 
7679 	trace_access_unlock(iter->cpu_file);
7680 	spd.nr_pages = i;
7681 
7682 	/* did we read anything? */
7683 	if (!spd.nr_pages) {
7684 		if (ret)
7685 			goto out;
7686 
7687 		ret = -EAGAIN;
7688 		if ((file->f_flags & O_NONBLOCK) || (flags & SPLICE_F_NONBLOCK))
7689 			goto out;
7690 
7691 		ret = wait_on_pipe(iter, iter->tr->buffer_percent);
7692 		if (ret)
7693 			goto out;
7694 
7695 		goto again;
7696 	}
7697 
7698 	ret = splice_to_pipe(pipe, &spd);
7699 out:
7700 	splice_shrink_spd(&spd);
7701 
7702 	return ret;
7703 }
7704 
7705 static const struct file_operations tracing_buffers_fops = {
7706 	.open		= tracing_buffers_open,
7707 	.read		= tracing_buffers_read,
7708 	.poll		= tracing_buffers_poll,
7709 	.release	= tracing_buffers_release,
7710 	.splice_read	= tracing_buffers_splice_read,
7711 	.llseek		= no_llseek,
7712 };
7713 
7714 static ssize_t
7715 tracing_stats_read(struct file *filp, char __user *ubuf,
7716 		   size_t count, loff_t *ppos)
7717 {
7718 	struct inode *inode = file_inode(filp);
7719 	struct trace_array *tr = inode->i_private;
7720 	struct array_buffer *trace_buf = &tr->array_buffer;
7721 	int cpu = tracing_get_cpu(inode);
7722 	struct trace_seq *s;
7723 	unsigned long cnt;
7724 	unsigned long long t;
7725 	unsigned long usec_rem;
7726 
7727 	s = kmalloc(sizeof(*s), GFP_KERNEL);
7728 	if (!s)
7729 		return -ENOMEM;
7730 
7731 	trace_seq_init(s);
7732 
7733 	cnt = ring_buffer_entries_cpu(trace_buf->buffer, cpu);
7734 	trace_seq_printf(s, "entries: %ld\n", cnt);
7735 
7736 	cnt = ring_buffer_overrun_cpu(trace_buf->buffer, cpu);
7737 	trace_seq_printf(s, "overrun: %ld\n", cnt);
7738 
7739 	cnt = ring_buffer_commit_overrun_cpu(trace_buf->buffer, cpu);
7740 	trace_seq_printf(s, "commit overrun: %ld\n", cnt);
7741 
7742 	cnt = ring_buffer_bytes_cpu(trace_buf->buffer, cpu);
7743 	trace_seq_printf(s, "bytes: %ld\n", cnt);
7744 
7745 	if (trace_clocks[tr->clock_id].in_ns) {
7746 		/* local or global for trace_clock */
7747 		t = ns2usecs(ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
7748 		usec_rem = do_div(t, USEC_PER_SEC);
7749 		trace_seq_printf(s, "oldest event ts: %5llu.%06lu\n",
7750 								t, usec_rem);
7751 
7752 		t = ns2usecs(ring_buffer_time_stamp(trace_buf->buffer, cpu));
7753 		usec_rem = do_div(t, USEC_PER_SEC);
7754 		trace_seq_printf(s, "now ts: %5llu.%06lu\n", t, usec_rem);
7755 	} else {
7756 		/* counter or tsc mode for trace_clock */
7757 		trace_seq_printf(s, "oldest event ts: %llu\n",
7758 				ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
7759 
7760 		trace_seq_printf(s, "now ts: %llu\n",
7761 				ring_buffer_time_stamp(trace_buf->buffer, cpu));
7762 	}
7763 
7764 	cnt = ring_buffer_dropped_events_cpu(trace_buf->buffer, cpu);
7765 	trace_seq_printf(s, "dropped events: %ld\n", cnt);
7766 
7767 	cnt = ring_buffer_read_events_cpu(trace_buf->buffer, cpu);
7768 	trace_seq_printf(s, "read events: %ld\n", cnt);
7769 
7770 	count = simple_read_from_buffer(ubuf, count, ppos,
7771 					s->buffer, trace_seq_used(s));
7772 
7773 	kfree(s);
7774 
7775 	return count;
7776 }
7777 
7778 static const struct file_operations tracing_stats_fops = {
7779 	.open		= tracing_open_generic_tr,
7780 	.read		= tracing_stats_read,
7781 	.llseek		= generic_file_llseek,
7782 	.release	= tracing_release_generic_tr,
7783 };
7784 
7785 #ifdef CONFIG_DYNAMIC_FTRACE
7786 
7787 static ssize_t
7788 tracing_read_dyn_info(struct file *filp, char __user *ubuf,
7789 		  size_t cnt, loff_t *ppos)
7790 {
7791 	ssize_t ret;
7792 	char *buf;
7793 	int r;
7794 
7795 	/* 256 should be plenty to hold the amount needed */
7796 	buf = kmalloc(256, GFP_KERNEL);
7797 	if (!buf)
7798 		return -ENOMEM;
7799 
7800 	r = scnprintf(buf, 256, "%ld pages:%ld groups: %ld\n",
7801 		      ftrace_update_tot_cnt,
7802 		      ftrace_number_of_pages,
7803 		      ftrace_number_of_groups);
7804 
7805 	ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
7806 	kfree(buf);
7807 	return ret;
7808 }
7809 
7810 static const struct file_operations tracing_dyn_info_fops = {
7811 	.open		= tracing_open_generic,
7812 	.read		= tracing_read_dyn_info,
7813 	.llseek		= generic_file_llseek,
7814 };
7815 #endif /* CONFIG_DYNAMIC_FTRACE */
7816 
7817 #if defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE)
7818 static void
7819 ftrace_snapshot(unsigned long ip, unsigned long parent_ip,
7820 		struct trace_array *tr, struct ftrace_probe_ops *ops,
7821 		void *data)
7822 {
7823 	tracing_snapshot_instance(tr);
7824 }
7825 
7826 static void
7827 ftrace_count_snapshot(unsigned long ip, unsigned long parent_ip,
7828 		      struct trace_array *tr, struct ftrace_probe_ops *ops,
7829 		      void *data)
7830 {
7831 	struct ftrace_func_mapper *mapper = data;
7832 	long *count = NULL;
7833 
7834 	if (mapper)
7835 		count = (long *)ftrace_func_mapper_find_ip(mapper, ip);
7836 
7837 	if (count) {
7838 
7839 		if (*count <= 0)
7840 			return;
7841 
7842 		(*count)--;
7843 	}
7844 
7845 	tracing_snapshot_instance(tr);
7846 }
7847 
7848 static int
7849 ftrace_snapshot_print(struct seq_file *m, unsigned long ip,
7850 		      struct ftrace_probe_ops *ops, void *data)
7851 {
7852 	struct ftrace_func_mapper *mapper = data;
7853 	long *count = NULL;
7854 
7855 	seq_printf(m, "%ps:", (void *)ip);
7856 
7857 	seq_puts(m, "snapshot");
7858 
7859 	if (mapper)
7860 		count = (long *)ftrace_func_mapper_find_ip(mapper, ip);
7861 
7862 	if (count)
7863 		seq_printf(m, ":count=%ld\n", *count);
7864 	else
7865 		seq_puts(m, ":unlimited\n");
7866 
7867 	return 0;
7868 }
7869 
7870 static int
7871 ftrace_snapshot_init(struct ftrace_probe_ops *ops, struct trace_array *tr,
7872 		     unsigned long ip, void *init_data, void **data)
7873 {
7874 	struct ftrace_func_mapper *mapper = *data;
7875 
7876 	if (!mapper) {
7877 		mapper = allocate_ftrace_func_mapper();
7878 		if (!mapper)
7879 			return -ENOMEM;
7880 		*data = mapper;
7881 	}
7882 
7883 	return ftrace_func_mapper_add_ip(mapper, ip, init_data);
7884 }
7885 
7886 static void
7887 ftrace_snapshot_free(struct ftrace_probe_ops *ops, struct trace_array *tr,
7888 		     unsigned long ip, void *data)
7889 {
7890 	struct ftrace_func_mapper *mapper = data;
7891 
7892 	if (!ip) {
7893 		if (!mapper)
7894 			return;
7895 		free_ftrace_func_mapper(mapper, NULL);
7896 		return;
7897 	}
7898 
7899 	ftrace_func_mapper_remove_ip(mapper, ip);
7900 }
7901 
7902 static struct ftrace_probe_ops snapshot_probe_ops = {
7903 	.func			= ftrace_snapshot,
7904 	.print			= ftrace_snapshot_print,
7905 };
7906 
7907 static struct ftrace_probe_ops snapshot_count_probe_ops = {
7908 	.func			= ftrace_count_snapshot,
7909 	.print			= ftrace_snapshot_print,
7910 	.init			= ftrace_snapshot_init,
7911 	.free			= ftrace_snapshot_free,
7912 };
7913 
7914 static int
7915 ftrace_trace_snapshot_callback(struct trace_array *tr, struct ftrace_hash *hash,
7916 			       char *glob, char *cmd, char *param, int enable)
7917 {
7918 	struct ftrace_probe_ops *ops;
7919 	void *count = (void *)-1;
7920 	char *number;
7921 	int ret;
7922 
7923 	if (!tr)
7924 		return -ENODEV;
7925 
7926 	/* hash funcs only work with set_ftrace_filter */
7927 	if (!enable)
7928 		return -EINVAL;
7929 
7930 	ops = param ? &snapshot_count_probe_ops :  &snapshot_probe_ops;
7931 
7932 	if (glob[0] == '!')
7933 		return unregister_ftrace_function_probe_func(glob+1, tr, ops);
7934 
7935 	if (!param)
7936 		goto out_reg;
7937 
7938 	number = strsep(&param, ":");
7939 
7940 	if (!strlen(number))
7941 		goto out_reg;
7942 
7943 	/*
7944 	 * We use the callback data field (which is a pointer)
7945 	 * as our counter.
7946 	 */
7947 	ret = kstrtoul(number, 0, (unsigned long *)&count);
7948 	if (ret)
7949 		return ret;
7950 
7951  out_reg:
7952 	ret = tracing_alloc_snapshot_instance(tr);
7953 	if (ret < 0)
7954 		goto out;
7955 
7956 	ret = register_ftrace_function_probe(glob, tr, ops, count);
7957 
7958  out:
7959 	return ret < 0 ? ret : 0;
7960 }
7961 
7962 static struct ftrace_func_command ftrace_snapshot_cmd = {
7963 	.name			= "snapshot",
7964 	.func			= ftrace_trace_snapshot_callback,
7965 };
7966 
7967 static __init int register_snapshot_cmd(void)
7968 {
7969 	return register_ftrace_command(&ftrace_snapshot_cmd);
7970 }
7971 #else
7972 static inline __init int register_snapshot_cmd(void) { return 0; }
7973 #endif /* defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE) */
7974 
7975 static struct dentry *tracing_get_dentry(struct trace_array *tr)
7976 {
7977 	if (WARN_ON(!tr->dir))
7978 		return ERR_PTR(-ENODEV);
7979 
7980 	/* Top directory uses NULL as the parent */
7981 	if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
7982 		return NULL;
7983 
7984 	/* All sub buffers have a descriptor */
7985 	return tr->dir;
7986 }
7987 
7988 static struct dentry *tracing_dentry_percpu(struct trace_array *tr, int cpu)
7989 {
7990 	struct dentry *d_tracer;
7991 
7992 	if (tr->percpu_dir)
7993 		return tr->percpu_dir;
7994 
7995 	d_tracer = tracing_get_dentry(tr);
7996 	if (IS_ERR(d_tracer))
7997 		return NULL;
7998 
7999 	tr->percpu_dir = tracefs_create_dir("per_cpu", d_tracer);
8000 
8001 	MEM_FAIL(!tr->percpu_dir,
8002 		  "Could not create tracefs directory 'per_cpu/%d'\n", cpu);
8003 
8004 	return tr->percpu_dir;
8005 }
8006 
8007 static struct dentry *
8008 trace_create_cpu_file(const char *name, umode_t mode, struct dentry *parent,
8009 		      void *data, long cpu, const struct file_operations *fops)
8010 {
8011 	struct dentry *ret = trace_create_file(name, mode, parent, data, fops);
8012 
8013 	if (ret) /* See tracing_get_cpu() */
8014 		d_inode(ret)->i_cdev = (void *)(cpu + 1);
8015 	return ret;
8016 }
8017 
8018 static void
8019 tracing_init_tracefs_percpu(struct trace_array *tr, long cpu)
8020 {
8021 	struct dentry *d_percpu = tracing_dentry_percpu(tr, cpu);
8022 	struct dentry *d_cpu;
8023 	char cpu_dir[30]; /* 30 characters should be more than enough */
8024 
8025 	if (!d_percpu)
8026 		return;
8027 
8028 	snprintf(cpu_dir, 30, "cpu%ld", cpu);
8029 	d_cpu = tracefs_create_dir(cpu_dir, d_percpu);
8030 	if (!d_cpu) {
8031 		pr_warn("Could not create tracefs '%s' entry\n", cpu_dir);
8032 		return;
8033 	}
8034 
8035 	/* per cpu trace_pipe */
8036 	trace_create_cpu_file("trace_pipe", 0444, d_cpu,
8037 				tr, cpu, &tracing_pipe_fops);
8038 
8039 	/* per cpu trace */
8040 	trace_create_cpu_file("trace", 0644, d_cpu,
8041 				tr, cpu, &tracing_fops);
8042 
8043 	trace_create_cpu_file("trace_pipe_raw", 0444, d_cpu,
8044 				tr, cpu, &tracing_buffers_fops);
8045 
8046 	trace_create_cpu_file("stats", 0444, d_cpu,
8047 				tr, cpu, &tracing_stats_fops);
8048 
8049 	trace_create_cpu_file("buffer_size_kb", 0444, d_cpu,
8050 				tr, cpu, &tracing_entries_fops);
8051 
8052 #ifdef CONFIG_TRACER_SNAPSHOT
8053 	trace_create_cpu_file("snapshot", 0644, d_cpu,
8054 				tr, cpu, &snapshot_fops);
8055 
8056 	trace_create_cpu_file("snapshot_raw", 0444, d_cpu,
8057 				tr, cpu, &snapshot_raw_fops);
8058 #endif
8059 }
8060 
8061 #ifdef CONFIG_FTRACE_SELFTEST
8062 /* Let selftest have access to static functions in this file */
8063 #include "trace_selftest.c"
8064 #endif
8065 
8066 static ssize_t
8067 trace_options_read(struct file *filp, char __user *ubuf, size_t cnt,
8068 			loff_t *ppos)
8069 {
8070 	struct trace_option_dentry *topt = filp->private_data;
8071 	char *buf;
8072 
8073 	if (topt->flags->val & topt->opt->bit)
8074 		buf = "1\n";
8075 	else
8076 		buf = "0\n";
8077 
8078 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
8079 }
8080 
8081 static ssize_t
8082 trace_options_write(struct file *filp, const char __user *ubuf, size_t cnt,
8083 			 loff_t *ppos)
8084 {
8085 	struct trace_option_dentry *topt = filp->private_data;
8086 	unsigned long val;
8087 	int ret;
8088 
8089 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
8090 	if (ret)
8091 		return ret;
8092 
8093 	if (val != 0 && val != 1)
8094 		return -EINVAL;
8095 
8096 	if (!!(topt->flags->val & topt->opt->bit) != val) {
8097 		mutex_lock(&trace_types_lock);
8098 		ret = __set_tracer_option(topt->tr, topt->flags,
8099 					  topt->opt, !val);
8100 		mutex_unlock(&trace_types_lock);
8101 		if (ret)
8102 			return ret;
8103 	}
8104 
8105 	*ppos += cnt;
8106 
8107 	return cnt;
8108 }
8109 
8110 
8111 static const struct file_operations trace_options_fops = {
8112 	.open = tracing_open_generic,
8113 	.read = trace_options_read,
8114 	.write = trace_options_write,
8115 	.llseek	= generic_file_llseek,
8116 };
8117 
8118 /*
8119  * In order to pass in both the trace_array descriptor as well as the index
8120  * to the flag that the trace option file represents, the trace_array
8121  * has a character array of trace_flags_index[], which holds the index
8122  * of the bit for the flag it represents. index[0] == 0, index[1] == 1, etc.
8123  * The address of this character array is passed to the flag option file
8124  * read/write callbacks.
8125  *
8126  * In order to extract both the index and the trace_array descriptor,
8127  * get_tr_index() uses the following algorithm.
8128  *
8129  *   idx = *ptr;
8130  *
8131  * As the pointer itself contains the address of the index (remember
8132  * index[1] == 1).
8133  *
8134  * Then to get the trace_array descriptor, by subtracting that index
8135  * from the ptr, we get to the start of the index itself.
8136  *
8137  *   ptr - idx == &index[0]
8138  *
8139  * Then a simple container_of() from that pointer gets us to the
8140  * trace_array descriptor.
8141  */
8142 static void get_tr_index(void *data, struct trace_array **ptr,
8143 			 unsigned int *pindex)
8144 {
8145 	*pindex = *(unsigned char *)data;
8146 
8147 	*ptr = container_of(data - *pindex, struct trace_array,
8148 			    trace_flags_index);
8149 }
8150 
8151 static ssize_t
8152 trace_options_core_read(struct file *filp, char __user *ubuf, size_t cnt,
8153 			loff_t *ppos)
8154 {
8155 	void *tr_index = filp->private_data;
8156 	struct trace_array *tr;
8157 	unsigned int index;
8158 	char *buf;
8159 
8160 	get_tr_index(tr_index, &tr, &index);
8161 
8162 	if (tr->trace_flags & (1 << index))
8163 		buf = "1\n";
8164 	else
8165 		buf = "0\n";
8166 
8167 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
8168 }
8169 
8170 static ssize_t
8171 trace_options_core_write(struct file *filp, const char __user *ubuf, size_t cnt,
8172 			 loff_t *ppos)
8173 {
8174 	void *tr_index = filp->private_data;
8175 	struct trace_array *tr;
8176 	unsigned int index;
8177 	unsigned long val;
8178 	int ret;
8179 
8180 	get_tr_index(tr_index, &tr, &index);
8181 
8182 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
8183 	if (ret)
8184 		return ret;
8185 
8186 	if (val != 0 && val != 1)
8187 		return -EINVAL;
8188 
8189 	mutex_lock(&event_mutex);
8190 	mutex_lock(&trace_types_lock);
8191 	ret = set_tracer_flag(tr, 1 << index, val);
8192 	mutex_unlock(&trace_types_lock);
8193 	mutex_unlock(&event_mutex);
8194 
8195 	if (ret < 0)
8196 		return ret;
8197 
8198 	*ppos += cnt;
8199 
8200 	return cnt;
8201 }
8202 
8203 static const struct file_operations trace_options_core_fops = {
8204 	.open = tracing_open_generic,
8205 	.read = trace_options_core_read,
8206 	.write = trace_options_core_write,
8207 	.llseek = generic_file_llseek,
8208 };
8209 
8210 struct dentry *trace_create_file(const char *name,
8211 				 umode_t mode,
8212 				 struct dentry *parent,
8213 				 void *data,
8214 				 const struct file_operations *fops)
8215 {
8216 	struct dentry *ret;
8217 
8218 	ret = tracefs_create_file(name, mode, parent, data, fops);
8219 	if (!ret)
8220 		pr_warn("Could not create tracefs '%s' entry\n", name);
8221 
8222 	return ret;
8223 }
8224 
8225 
8226 static struct dentry *trace_options_init_dentry(struct trace_array *tr)
8227 {
8228 	struct dentry *d_tracer;
8229 
8230 	if (tr->options)
8231 		return tr->options;
8232 
8233 	d_tracer = tracing_get_dentry(tr);
8234 	if (IS_ERR(d_tracer))
8235 		return NULL;
8236 
8237 	tr->options = tracefs_create_dir("options", d_tracer);
8238 	if (!tr->options) {
8239 		pr_warn("Could not create tracefs directory 'options'\n");
8240 		return NULL;
8241 	}
8242 
8243 	return tr->options;
8244 }
8245 
8246 static void
8247 create_trace_option_file(struct trace_array *tr,
8248 			 struct trace_option_dentry *topt,
8249 			 struct tracer_flags *flags,
8250 			 struct tracer_opt *opt)
8251 {
8252 	struct dentry *t_options;
8253 
8254 	t_options = trace_options_init_dentry(tr);
8255 	if (!t_options)
8256 		return;
8257 
8258 	topt->flags = flags;
8259 	topt->opt = opt;
8260 	topt->tr = tr;
8261 
8262 	topt->entry = trace_create_file(opt->name, 0644, t_options, topt,
8263 				    &trace_options_fops);
8264 
8265 }
8266 
8267 static void
8268 create_trace_option_files(struct trace_array *tr, struct tracer *tracer)
8269 {
8270 	struct trace_option_dentry *topts;
8271 	struct trace_options *tr_topts;
8272 	struct tracer_flags *flags;
8273 	struct tracer_opt *opts;
8274 	int cnt;
8275 	int i;
8276 
8277 	if (!tracer)
8278 		return;
8279 
8280 	flags = tracer->flags;
8281 
8282 	if (!flags || !flags->opts)
8283 		return;
8284 
8285 	/*
8286 	 * If this is an instance, only create flags for tracers
8287 	 * the instance may have.
8288 	 */
8289 	if (!trace_ok_for_array(tracer, tr))
8290 		return;
8291 
8292 	for (i = 0; i < tr->nr_topts; i++) {
8293 		/* Make sure there's no duplicate flags. */
8294 		if (WARN_ON_ONCE(tr->topts[i].tracer->flags == tracer->flags))
8295 			return;
8296 	}
8297 
8298 	opts = flags->opts;
8299 
8300 	for (cnt = 0; opts[cnt].name; cnt++)
8301 		;
8302 
8303 	topts = kcalloc(cnt + 1, sizeof(*topts), GFP_KERNEL);
8304 	if (!topts)
8305 		return;
8306 
8307 	tr_topts = krealloc(tr->topts, sizeof(*tr->topts) * (tr->nr_topts + 1),
8308 			    GFP_KERNEL);
8309 	if (!tr_topts) {
8310 		kfree(topts);
8311 		return;
8312 	}
8313 
8314 	tr->topts = tr_topts;
8315 	tr->topts[tr->nr_topts].tracer = tracer;
8316 	tr->topts[tr->nr_topts].topts = topts;
8317 	tr->nr_topts++;
8318 
8319 	for (cnt = 0; opts[cnt].name; cnt++) {
8320 		create_trace_option_file(tr, &topts[cnt], flags,
8321 					 &opts[cnt]);
8322 		MEM_FAIL(topts[cnt].entry == NULL,
8323 			  "Failed to create trace option: %s",
8324 			  opts[cnt].name);
8325 	}
8326 }
8327 
8328 static struct dentry *
8329 create_trace_option_core_file(struct trace_array *tr,
8330 			      const char *option, long index)
8331 {
8332 	struct dentry *t_options;
8333 
8334 	t_options = trace_options_init_dentry(tr);
8335 	if (!t_options)
8336 		return NULL;
8337 
8338 	return trace_create_file(option, 0644, t_options,
8339 				 (void *)&tr->trace_flags_index[index],
8340 				 &trace_options_core_fops);
8341 }
8342 
8343 static void create_trace_options_dir(struct trace_array *tr)
8344 {
8345 	struct dentry *t_options;
8346 	bool top_level = tr == &global_trace;
8347 	int i;
8348 
8349 	t_options = trace_options_init_dentry(tr);
8350 	if (!t_options)
8351 		return;
8352 
8353 	for (i = 0; trace_options[i]; i++) {
8354 		if (top_level ||
8355 		    !((1 << i) & TOP_LEVEL_TRACE_FLAGS))
8356 			create_trace_option_core_file(tr, trace_options[i], i);
8357 	}
8358 }
8359 
8360 static ssize_t
8361 rb_simple_read(struct file *filp, char __user *ubuf,
8362 	       size_t cnt, loff_t *ppos)
8363 {
8364 	struct trace_array *tr = filp->private_data;
8365 	char buf[64];
8366 	int r;
8367 
8368 	r = tracer_tracing_is_on(tr);
8369 	r = sprintf(buf, "%d\n", r);
8370 
8371 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
8372 }
8373 
8374 static ssize_t
8375 rb_simple_write(struct file *filp, const char __user *ubuf,
8376 		size_t cnt, loff_t *ppos)
8377 {
8378 	struct trace_array *tr = filp->private_data;
8379 	struct trace_buffer *buffer = tr->array_buffer.buffer;
8380 	unsigned long val;
8381 	int ret;
8382 
8383 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
8384 	if (ret)
8385 		return ret;
8386 
8387 	if (buffer) {
8388 		mutex_lock(&trace_types_lock);
8389 		if (!!val == tracer_tracing_is_on(tr)) {
8390 			val = 0; /* do nothing */
8391 		} else if (val) {
8392 			tracer_tracing_on(tr);
8393 			if (tr->current_trace->start)
8394 				tr->current_trace->start(tr);
8395 		} else {
8396 			tracer_tracing_off(tr);
8397 			if (tr->current_trace->stop)
8398 				tr->current_trace->stop(tr);
8399 		}
8400 		mutex_unlock(&trace_types_lock);
8401 	}
8402 
8403 	(*ppos)++;
8404 
8405 	return cnt;
8406 }
8407 
8408 static const struct file_operations rb_simple_fops = {
8409 	.open		= tracing_open_generic_tr,
8410 	.read		= rb_simple_read,
8411 	.write		= rb_simple_write,
8412 	.release	= tracing_release_generic_tr,
8413 	.llseek		= default_llseek,
8414 };
8415 
8416 static ssize_t
8417 buffer_percent_read(struct file *filp, char __user *ubuf,
8418 		    size_t cnt, loff_t *ppos)
8419 {
8420 	struct trace_array *tr = filp->private_data;
8421 	char buf[64];
8422 	int r;
8423 
8424 	r = tr->buffer_percent;
8425 	r = sprintf(buf, "%d\n", r);
8426 
8427 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
8428 }
8429 
8430 static ssize_t
8431 buffer_percent_write(struct file *filp, const char __user *ubuf,
8432 		     size_t cnt, loff_t *ppos)
8433 {
8434 	struct trace_array *tr = filp->private_data;
8435 	unsigned long val;
8436 	int ret;
8437 
8438 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
8439 	if (ret)
8440 		return ret;
8441 
8442 	if (val > 100)
8443 		return -EINVAL;
8444 
8445 	if (!val)
8446 		val = 1;
8447 
8448 	tr->buffer_percent = val;
8449 
8450 	(*ppos)++;
8451 
8452 	return cnt;
8453 }
8454 
8455 static const struct file_operations buffer_percent_fops = {
8456 	.open		= tracing_open_generic_tr,
8457 	.read		= buffer_percent_read,
8458 	.write		= buffer_percent_write,
8459 	.release	= tracing_release_generic_tr,
8460 	.llseek		= default_llseek,
8461 };
8462 
8463 static struct dentry *trace_instance_dir;
8464 
8465 static void
8466 init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer);
8467 
8468 static int
8469 allocate_trace_buffer(struct trace_array *tr, struct array_buffer *buf, int size)
8470 {
8471 	enum ring_buffer_flags rb_flags;
8472 
8473 	rb_flags = tr->trace_flags & TRACE_ITER_OVERWRITE ? RB_FL_OVERWRITE : 0;
8474 
8475 	buf->tr = tr;
8476 
8477 	buf->buffer = ring_buffer_alloc(size, rb_flags);
8478 	if (!buf->buffer)
8479 		return -ENOMEM;
8480 
8481 	buf->data = alloc_percpu(struct trace_array_cpu);
8482 	if (!buf->data) {
8483 		ring_buffer_free(buf->buffer);
8484 		buf->buffer = NULL;
8485 		return -ENOMEM;
8486 	}
8487 
8488 	/* Allocate the first page for all buffers */
8489 	set_buffer_entries(&tr->array_buffer,
8490 			   ring_buffer_size(tr->array_buffer.buffer, 0));
8491 
8492 	return 0;
8493 }
8494 
8495 static int allocate_trace_buffers(struct trace_array *tr, int size)
8496 {
8497 	int ret;
8498 
8499 	ret = allocate_trace_buffer(tr, &tr->array_buffer, size);
8500 	if (ret)
8501 		return ret;
8502 
8503 #ifdef CONFIG_TRACER_MAX_TRACE
8504 	ret = allocate_trace_buffer(tr, &tr->max_buffer,
8505 				    allocate_snapshot ? size : 1);
8506 	if (MEM_FAIL(ret, "Failed to allocate trace buffer\n")) {
8507 		ring_buffer_free(tr->array_buffer.buffer);
8508 		tr->array_buffer.buffer = NULL;
8509 		free_percpu(tr->array_buffer.data);
8510 		tr->array_buffer.data = NULL;
8511 		return -ENOMEM;
8512 	}
8513 	tr->allocated_snapshot = allocate_snapshot;
8514 
8515 	/*
8516 	 * Only the top level trace array gets its snapshot allocated
8517 	 * from the kernel command line.
8518 	 */
8519 	allocate_snapshot = false;
8520 #endif
8521 
8522 	return 0;
8523 }
8524 
8525 static void free_trace_buffer(struct array_buffer *buf)
8526 {
8527 	if (buf->buffer) {
8528 		ring_buffer_free(buf->buffer);
8529 		buf->buffer = NULL;
8530 		free_percpu(buf->data);
8531 		buf->data = NULL;
8532 	}
8533 }
8534 
8535 static void free_trace_buffers(struct trace_array *tr)
8536 {
8537 	if (!tr)
8538 		return;
8539 
8540 	free_trace_buffer(&tr->array_buffer);
8541 
8542 #ifdef CONFIG_TRACER_MAX_TRACE
8543 	free_trace_buffer(&tr->max_buffer);
8544 #endif
8545 }
8546 
8547 static void init_trace_flags_index(struct trace_array *tr)
8548 {
8549 	int i;
8550 
8551 	/* Used by the trace options files */
8552 	for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++)
8553 		tr->trace_flags_index[i] = i;
8554 }
8555 
8556 static void __update_tracer_options(struct trace_array *tr)
8557 {
8558 	struct tracer *t;
8559 
8560 	for (t = trace_types; t; t = t->next)
8561 		add_tracer_options(tr, t);
8562 }
8563 
8564 static void update_tracer_options(struct trace_array *tr)
8565 {
8566 	mutex_lock(&trace_types_lock);
8567 	__update_tracer_options(tr);
8568 	mutex_unlock(&trace_types_lock);
8569 }
8570 
8571 /* Must have trace_types_lock held */
8572 struct trace_array *trace_array_find(const char *instance)
8573 {
8574 	struct trace_array *tr, *found = NULL;
8575 
8576 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
8577 		if (tr->name && strcmp(tr->name, instance) == 0) {
8578 			found = tr;
8579 			break;
8580 		}
8581 	}
8582 
8583 	return found;
8584 }
8585 
8586 struct trace_array *trace_array_find_get(const char *instance)
8587 {
8588 	struct trace_array *tr;
8589 
8590 	mutex_lock(&trace_types_lock);
8591 	tr = trace_array_find(instance);
8592 	if (tr)
8593 		tr->ref++;
8594 	mutex_unlock(&trace_types_lock);
8595 
8596 	return tr;
8597 }
8598 
8599 static struct trace_array *trace_array_create(const char *name)
8600 {
8601 	struct trace_array *tr;
8602 	int ret;
8603 
8604 	ret = -ENOMEM;
8605 	tr = kzalloc(sizeof(*tr), GFP_KERNEL);
8606 	if (!tr)
8607 		return ERR_PTR(ret);
8608 
8609 	tr->name = kstrdup(name, GFP_KERNEL);
8610 	if (!tr->name)
8611 		goto out_free_tr;
8612 
8613 	if (!alloc_cpumask_var(&tr->tracing_cpumask, GFP_KERNEL))
8614 		goto out_free_tr;
8615 
8616 	tr->trace_flags = global_trace.trace_flags & ~ZEROED_TRACE_FLAGS;
8617 
8618 	cpumask_copy(tr->tracing_cpumask, cpu_all_mask);
8619 
8620 	raw_spin_lock_init(&tr->start_lock);
8621 
8622 	tr->max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
8623 
8624 	tr->current_trace = &nop_trace;
8625 
8626 	INIT_LIST_HEAD(&tr->systems);
8627 	INIT_LIST_HEAD(&tr->events);
8628 	INIT_LIST_HEAD(&tr->hist_vars);
8629 	INIT_LIST_HEAD(&tr->err_log);
8630 
8631 	if (allocate_trace_buffers(tr, trace_buf_size) < 0)
8632 		goto out_free_tr;
8633 
8634 	tr->dir = tracefs_create_dir(name, trace_instance_dir);
8635 	if (!tr->dir)
8636 		goto out_free_tr;
8637 
8638 	ret = event_trace_add_tracer(tr->dir, tr);
8639 	if (ret) {
8640 		tracefs_remove(tr->dir);
8641 		goto out_free_tr;
8642 	}
8643 
8644 	ftrace_init_trace_array(tr);
8645 
8646 	init_tracer_tracefs(tr, tr->dir);
8647 	init_trace_flags_index(tr);
8648 	__update_tracer_options(tr);
8649 
8650 	list_add(&tr->list, &ftrace_trace_arrays);
8651 
8652 	tr->ref++;
8653 
8654 
8655 	return tr;
8656 
8657  out_free_tr:
8658 	free_trace_buffers(tr);
8659 	free_cpumask_var(tr->tracing_cpumask);
8660 	kfree(tr->name);
8661 	kfree(tr);
8662 
8663 	return ERR_PTR(ret);
8664 }
8665 
8666 static int instance_mkdir(const char *name)
8667 {
8668 	struct trace_array *tr;
8669 	int ret;
8670 
8671 	mutex_lock(&event_mutex);
8672 	mutex_lock(&trace_types_lock);
8673 
8674 	ret = -EEXIST;
8675 	if (trace_array_find(name))
8676 		goto out_unlock;
8677 
8678 	tr = trace_array_create(name);
8679 
8680 	ret = PTR_ERR_OR_ZERO(tr);
8681 
8682 out_unlock:
8683 	mutex_unlock(&trace_types_lock);
8684 	mutex_unlock(&event_mutex);
8685 	return ret;
8686 }
8687 
8688 /**
8689  * trace_array_get_by_name - Create/Lookup a trace array, given its name.
8690  * @name: The name of the trace array to be looked up/created.
8691  *
8692  * Returns pointer to trace array with given name.
8693  * NULL, if it cannot be created.
8694  *
8695  * NOTE: This function increments the reference counter associated with the
8696  * trace array returned. This makes sure it cannot be freed while in use.
8697  * Use trace_array_put() once the trace array is no longer needed.
8698  * If the trace_array is to be freed, trace_array_destroy() needs to
8699  * be called after the trace_array_put(), or simply let user space delete
8700  * it from the tracefs instances directory. But until the
8701  * trace_array_put() is called, user space can not delete it.
8702  *
8703  */
8704 struct trace_array *trace_array_get_by_name(const char *name)
8705 {
8706 	struct trace_array *tr;
8707 
8708 	mutex_lock(&event_mutex);
8709 	mutex_lock(&trace_types_lock);
8710 
8711 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
8712 		if (tr->name && strcmp(tr->name, name) == 0)
8713 			goto out_unlock;
8714 	}
8715 
8716 	tr = trace_array_create(name);
8717 
8718 	if (IS_ERR(tr))
8719 		tr = NULL;
8720 out_unlock:
8721 	if (tr)
8722 		tr->ref++;
8723 
8724 	mutex_unlock(&trace_types_lock);
8725 	mutex_unlock(&event_mutex);
8726 	return tr;
8727 }
8728 EXPORT_SYMBOL_GPL(trace_array_get_by_name);
8729 
8730 static int __remove_instance(struct trace_array *tr)
8731 {
8732 	int i;
8733 
8734 	/* Reference counter for a newly created trace array = 1. */
8735 	if (tr->ref > 1 || (tr->current_trace && tr->current_trace->ref))
8736 		return -EBUSY;
8737 
8738 	list_del(&tr->list);
8739 
8740 	/* Disable all the flags that were enabled coming in */
8741 	for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++) {
8742 		if ((1 << i) & ZEROED_TRACE_FLAGS)
8743 			set_tracer_flag(tr, 1 << i, 0);
8744 	}
8745 
8746 	tracing_set_nop(tr);
8747 	clear_ftrace_function_probes(tr);
8748 	event_trace_del_tracer(tr);
8749 	ftrace_clear_pids(tr);
8750 	ftrace_destroy_function_files(tr);
8751 	tracefs_remove(tr->dir);
8752 	free_trace_buffers(tr);
8753 
8754 	for (i = 0; i < tr->nr_topts; i++) {
8755 		kfree(tr->topts[i].topts);
8756 	}
8757 	kfree(tr->topts);
8758 
8759 	free_cpumask_var(tr->tracing_cpumask);
8760 	kfree(tr->name);
8761 	kfree(tr);
8762 	tr = NULL;
8763 
8764 	return 0;
8765 }
8766 
8767 int trace_array_destroy(struct trace_array *this_tr)
8768 {
8769 	struct trace_array *tr;
8770 	int ret;
8771 
8772 	if (!this_tr)
8773 		return -EINVAL;
8774 
8775 	mutex_lock(&event_mutex);
8776 	mutex_lock(&trace_types_lock);
8777 
8778 	ret = -ENODEV;
8779 
8780 	/* Making sure trace array exists before destroying it. */
8781 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
8782 		if (tr == this_tr) {
8783 			ret = __remove_instance(tr);
8784 			break;
8785 		}
8786 	}
8787 
8788 	mutex_unlock(&trace_types_lock);
8789 	mutex_unlock(&event_mutex);
8790 
8791 	return ret;
8792 }
8793 EXPORT_SYMBOL_GPL(trace_array_destroy);
8794 
8795 static int instance_rmdir(const char *name)
8796 {
8797 	struct trace_array *tr;
8798 	int ret;
8799 
8800 	mutex_lock(&event_mutex);
8801 	mutex_lock(&trace_types_lock);
8802 
8803 	ret = -ENODEV;
8804 	tr = trace_array_find(name);
8805 	if (tr)
8806 		ret = __remove_instance(tr);
8807 
8808 	mutex_unlock(&trace_types_lock);
8809 	mutex_unlock(&event_mutex);
8810 
8811 	return ret;
8812 }
8813 
8814 static __init void create_trace_instances(struct dentry *d_tracer)
8815 {
8816 	trace_instance_dir = tracefs_create_instance_dir("instances", d_tracer,
8817 							 instance_mkdir,
8818 							 instance_rmdir);
8819 	if (MEM_FAIL(!trace_instance_dir, "Failed to create instances directory\n"))
8820 		return;
8821 }
8822 
8823 static void
8824 init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer)
8825 {
8826 	struct trace_event_file *file;
8827 	int cpu;
8828 
8829 	trace_create_file("available_tracers", 0444, d_tracer,
8830 			tr, &show_traces_fops);
8831 
8832 	trace_create_file("current_tracer", 0644, d_tracer,
8833 			tr, &set_tracer_fops);
8834 
8835 	trace_create_file("tracing_cpumask", 0644, d_tracer,
8836 			  tr, &tracing_cpumask_fops);
8837 
8838 	trace_create_file("trace_options", 0644, d_tracer,
8839 			  tr, &tracing_iter_fops);
8840 
8841 	trace_create_file("trace", 0644, d_tracer,
8842 			  tr, &tracing_fops);
8843 
8844 	trace_create_file("trace_pipe", 0444, d_tracer,
8845 			  tr, &tracing_pipe_fops);
8846 
8847 	trace_create_file("buffer_size_kb", 0644, d_tracer,
8848 			  tr, &tracing_entries_fops);
8849 
8850 	trace_create_file("buffer_total_size_kb", 0444, d_tracer,
8851 			  tr, &tracing_total_entries_fops);
8852 
8853 	trace_create_file("free_buffer", 0200, d_tracer,
8854 			  tr, &tracing_free_buffer_fops);
8855 
8856 	trace_create_file("trace_marker", 0220, d_tracer,
8857 			  tr, &tracing_mark_fops);
8858 
8859 	file = __find_event_file(tr, "ftrace", "print");
8860 	if (file && file->dir)
8861 		trace_create_file("trigger", 0644, file->dir, file,
8862 				  &event_trigger_fops);
8863 	tr->trace_marker_file = file;
8864 
8865 	trace_create_file("trace_marker_raw", 0220, d_tracer,
8866 			  tr, &tracing_mark_raw_fops);
8867 
8868 	trace_create_file("trace_clock", 0644, d_tracer, tr,
8869 			  &trace_clock_fops);
8870 
8871 	trace_create_file("tracing_on", 0644, d_tracer,
8872 			  tr, &rb_simple_fops);
8873 
8874 	trace_create_file("timestamp_mode", 0444, d_tracer, tr,
8875 			  &trace_time_stamp_mode_fops);
8876 
8877 	tr->buffer_percent = 50;
8878 
8879 	trace_create_file("buffer_percent", 0444, d_tracer,
8880 			tr, &buffer_percent_fops);
8881 
8882 	create_trace_options_dir(tr);
8883 
8884 #if defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)
8885 	trace_create_maxlat_file(tr, d_tracer);
8886 #endif
8887 
8888 	if (ftrace_create_function_files(tr, d_tracer))
8889 		MEM_FAIL(1, "Could not allocate function filter files");
8890 
8891 #ifdef CONFIG_TRACER_SNAPSHOT
8892 	trace_create_file("snapshot", 0644, d_tracer,
8893 			  tr, &snapshot_fops);
8894 #endif
8895 
8896 	trace_create_file("error_log", 0644, d_tracer,
8897 			  tr, &tracing_err_log_fops);
8898 
8899 	for_each_tracing_cpu(cpu)
8900 		tracing_init_tracefs_percpu(tr, cpu);
8901 
8902 	ftrace_init_tracefs(tr, d_tracer);
8903 }
8904 
8905 static struct vfsmount *trace_automount(struct dentry *mntpt, void *ingore)
8906 {
8907 	struct vfsmount *mnt;
8908 	struct file_system_type *type;
8909 
8910 	/*
8911 	 * To maintain backward compatibility for tools that mount
8912 	 * debugfs to get to the tracing facility, tracefs is automatically
8913 	 * mounted to the debugfs/tracing directory.
8914 	 */
8915 	type = get_fs_type("tracefs");
8916 	if (!type)
8917 		return NULL;
8918 	mnt = vfs_submount(mntpt, type, "tracefs", NULL);
8919 	put_filesystem(type);
8920 	if (IS_ERR(mnt))
8921 		return NULL;
8922 	mntget(mnt);
8923 
8924 	return mnt;
8925 }
8926 
8927 /**
8928  * tracing_init_dentry - initialize top level trace array
8929  *
8930  * This is called when creating files or directories in the tracing
8931  * directory. It is called via fs_initcall() by any of the boot up code
8932  * and expects to return the dentry of the top level tracing directory.
8933  */
8934 struct dentry *tracing_init_dentry(void)
8935 {
8936 	struct trace_array *tr = &global_trace;
8937 
8938 	if (security_locked_down(LOCKDOWN_TRACEFS)) {
8939 		pr_warn("Tracing disabled due to lockdown\n");
8940 		return ERR_PTR(-EPERM);
8941 	}
8942 
8943 	/* The top level trace array uses  NULL as parent */
8944 	if (tr->dir)
8945 		return NULL;
8946 
8947 	if (WARN_ON(!tracefs_initialized()))
8948 		return ERR_PTR(-ENODEV);
8949 
8950 	/*
8951 	 * As there may still be users that expect the tracing
8952 	 * files to exist in debugfs/tracing, we must automount
8953 	 * the tracefs file system there, so older tools still
8954 	 * work with the newer kerenl.
8955 	 */
8956 	tr->dir = debugfs_create_automount("tracing", NULL,
8957 					   trace_automount, NULL);
8958 
8959 	return NULL;
8960 }
8961 
8962 extern struct trace_eval_map *__start_ftrace_eval_maps[];
8963 extern struct trace_eval_map *__stop_ftrace_eval_maps[];
8964 
8965 static void __init trace_eval_init(void)
8966 {
8967 	int len;
8968 
8969 	len = __stop_ftrace_eval_maps - __start_ftrace_eval_maps;
8970 	trace_insert_eval_map(NULL, __start_ftrace_eval_maps, len);
8971 }
8972 
8973 #ifdef CONFIG_MODULES
8974 static void trace_module_add_evals(struct module *mod)
8975 {
8976 	if (!mod->num_trace_evals)
8977 		return;
8978 
8979 	/*
8980 	 * Modules with bad taint do not have events created, do
8981 	 * not bother with enums either.
8982 	 */
8983 	if (trace_module_has_bad_taint(mod))
8984 		return;
8985 
8986 	trace_insert_eval_map(mod, mod->trace_evals, mod->num_trace_evals);
8987 }
8988 
8989 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
8990 static void trace_module_remove_evals(struct module *mod)
8991 {
8992 	union trace_eval_map_item *map;
8993 	union trace_eval_map_item **last = &trace_eval_maps;
8994 
8995 	if (!mod->num_trace_evals)
8996 		return;
8997 
8998 	mutex_lock(&trace_eval_mutex);
8999 
9000 	map = trace_eval_maps;
9001 
9002 	while (map) {
9003 		if (map->head.mod == mod)
9004 			break;
9005 		map = trace_eval_jmp_to_tail(map);
9006 		last = &map->tail.next;
9007 		map = map->tail.next;
9008 	}
9009 	if (!map)
9010 		goto out;
9011 
9012 	*last = trace_eval_jmp_to_tail(map)->tail.next;
9013 	kfree(map);
9014  out:
9015 	mutex_unlock(&trace_eval_mutex);
9016 }
9017 #else
9018 static inline void trace_module_remove_evals(struct module *mod) { }
9019 #endif /* CONFIG_TRACE_EVAL_MAP_FILE */
9020 
9021 static int trace_module_notify(struct notifier_block *self,
9022 			       unsigned long val, void *data)
9023 {
9024 	struct module *mod = data;
9025 
9026 	switch (val) {
9027 	case MODULE_STATE_COMING:
9028 		trace_module_add_evals(mod);
9029 		break;
9030 	case MODULE_STATE_GOING:
9031 		trace_module_remove_evals(mod);
9032 		break;
9033 	}
9034 
9035 	return 0;
9036 }
9037 
9038 static struct notifier_block trace_module_nb = {
9039 	.notifier_call = trace_module_notify,
9040 	.priority = 0,
9041 };
9042 #endif /* CONFIG_MODULES */
9043 
9044 static __init int tracer_init_tracefs(void)
9045 {
9046 	struct dentry *d_tracer;
9047 
9048 	trace_access_lock_init();
9049 
9050 	d_tracer = tracing_init_dentry();
9051 	if (IS_ERR(d_tracer))
9052 		return 0;
9053 
9054 	event_trace_init();
9055 
9056 	init_tracer_tracefs(&global_trace, d_tracer);
9057 	ftrace_init_tracefs_toplevel(&global_trace, d_tracer);
9058 
9059 	trace_create_file("tracing_thresh", 0644, d_tracer,
9060 			&global_trace, &tracing_thresh_fops);
9061 
9062 	trace_create_file("README", 0444, d_tracer,
9063 			NULL, &tracing_readme_fops);
9064 
9065 	trace_create_file("saved_cmdlines", 0444, d_tracer,
9066 			NULL, &tracing_saved_cmdlines_fops);
9067 
9068 	trace_create_file("saved_cmdlines_size", 0644, d_tracer,
9069 			  NULL, &tracing_saved_cmdlines_size_fops);
9070 
9071 	trace_create_file("saved_tgids", 0444, d_tracer,
9072 			NULL, &tracing_saved_tgids_fops);
9073 
9074 	trace_eval_init();
9075 
9076 	trace_create_eval_file(d_tracer);
9077 
9078 #ifdef CONFIG_MODULES
9079 	register_module_notifier(&trace_module_nb);
9080 #endif
9081 
9082 #ifdef CONFIG_DYNAMIC_FTRACE
9083 	trace_create_file("dyn_ftrace_total_info", 0444, d_tracer,
9084 			NULL, &tracing_dyn_info_fops);
9085 #endif
9086 
9087 	create_trace_instances(d_tracer);
9088 
9089 	update_tracer_options(&global_trace);
9090 
9091 	return 0;
9092 }
9093 
9094 static int trace_panic_handler(struct notifier_block *this,
9095 			       unsigned long event, void *unused)
9096 {
9097 	if (ftrace_dump_on_oops)
9098 		ftrace_dump(ftrace_dump_on_oops);
9099 	return NOTIFY_OK;
9100 }
9101 
9102 static struct notifier_block trace_panic_notifier = {
9103 	.notifier_call  = trace_panic_handler,
9104 	.next           = NULL,
9105 	.priority       = 150   /* priority: INT_MAX >= x >= 0 */
9106 };
9107 
9108 static int trace_die_handler(struct notifier_block *self,
9109 			     unsigned long val,
9110 			     void *data)
9111 {
9112 	switch (val) {
9113 	case DIE_OOPS:
9114 		if (ftrace_dump_on_oops)
9115 			ftrace_dump(ftrace_dump_on_oops);
9116 		break;
9117 	default:
9118 		break;
9119 	}
9120 	return NOTIFY_OK;
9121 }
9122 
9123 static struct notifier_block trace_die_notifier = {
9124 	.notifier_call = trace_die_handler,
9125 	.priority = 200
9126 };
9127 
9128 /*
9129  * printk is set to max of 1024, we really don't need it that big.
9130  * Nothing should be printing 1000 characters anyway.
9131  */
9132 #define TRACE_MAX_PRINT		1000
9133 
9134 /*
9135  * Define here KERN_TRACE so that we have one place to modify
9136  * it if we decide to change what log level the ftrace dump
9137  * should be at.
9138  */
9139 #define KERN_TRACE		KERN_EMERG
9140 
9141 void
9142 trace_printk_seq(struct trace_seq *s)
9143 {
9144 	/* Probably should print a warning here. */
9145 	if (s->seq.len >= TRACE_MAX_PRINT)
9146 		s->seq.len = TRACE_MAX_PRINT;
9147 
9148 	/*
9149 	 * More paranoid code. Although the buffer size is set to
9150 	 * PAGE_SIZE, and TRACE_MAX_PRINT is 1000, this is just
9151 	 * an extra layer of protection.
9152 	 */
9153 	if (WARN_ON_ONCE(s->seq.len >= s->seq.size))
9154 		s->seq.len = s->seq.size - 1;
9155 
9156 	/* should be zero ended, but we are paranoid. */
9157 	s->buffer[s->seq.len] = 0;
9158 
9159 	printk(KERN_TRACE "%s", s->buffer);
9160 
9161 	trace_seq_init(s);
9162 }
9163 
9164 void trace_init_global_iter(struct trace_iterator *iter)
9165 {
9166 	iter->tr = &global_trace;
9167 	iter->trace = iter->tr->current_trace;
9168 	iter->cpu_file = RING_BUFFER_ALL_CPUS;
9169 	iter->array_buffer = &global_trace.array_buffer;
9170 
9171 	if (iter->trace && iter->trace->open)
9172 		iter->trace->open(iter);
9173 
9174 	/* Annotate start of buffers if we had overruns */
9175 	if (ring_buffer_overruns(iter->array_buffer->buffer))
9176 		iter->iter_flags |= TRACE_FILE_ANNOTATE;
9177 
9178 	/* Output in nanoseconds only if we are using a clock in nanoseconds. */
9179 	if (trace_clocks[iter->tr->clock_id].in_ns)
9180 		iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
9181 }
9182 
9183 void ftrace_dump(enum ftrace_dump_mode oops_dump_mode)
9184 {
9185 	/* use static because iter can be a bit big for the stack */
9186 	static struct trace_iterator iter;
9187 	static atomic_t dump_running;
9188 	struct trace_array *tr = &global_trace;
9189 	unsigned int old_userobj;
9190 	unsigned long flags;
9191 	int cnt = 0, cpu;
9192 
9193 	/* Only allow one dump user at a time. */
9194 	if (atomic_inc_return(&dump_running) != 1) {
9195 		atomic_dec(&dump_running);
9196 		return;
9197 	}
9198 
9199 	/*
9200 	 * Always turn off tracing when we dump.
9201 	 * We don't need to show trace output of what happens
9202 	 * between multiple crashes.
9203 	 *
9204 	 * If the user does a sysrq-z, then they can re-enable
9205 	 * tracing with echo 1 > tracing_on.
9206 	 */
9207 	tracing_off();
9208 
9209 	local_irq_save(flags);
9210 	printk_nmi_direct_enter();
9211 
9212 	/* Simulate the iterator */
9213 	trace_init_global_iter(&iter);
9214 	/* Can not use kmalloc for iter.temp */
9215 	iter.temp = static_temp_buf;
9216 	iter.temp_size = STATIC_TEMP_BUF_SIZE;
9217 
9218 	for_each_tracing_cpu(cpu) {
9219 		atomic_inc(&per_cpu_ptr(iter.array_buffer->data, cpu)->disabled);
9220 	}
9221 
9222 	old_userobj = tr->trace_flags & TRACE_ITER_SYM_USEROBJ;
9223 
9224 	/* don't look at user memory in panic mode */
9225 	tr->trace_flags &= ~TRACE_ITER_SYM_USEROBJ;
9226 
9227 	switch (oops_dump_mode) {
9228 	case DUMP_ALL:
9229 		iter.cpu_file = RING_BUFFER_ALL_CPUS;
9230 		break;
9231 	case DUMP_ORIG:
9232 		iter.cpu_file = raw_smp_processor_id();
9233 		break;
9234 	case DUMP_NONE:
9235 		goto out_enable;
9236 	default:
9237 		printk(KERN_TRACE "Bad dumping mode, switching to all CPUs dump\n");
9238 		iter.cpu_file = RING_BUFFER_ALL_CPUS;
9239 	}
9240 
9241 	printk(KERN_TRACE "Dumping ftrace buffer:\n");
9242 
9243 	/* Did function tracer already get disabled? */
9244 	if (ftrace_is_dead()) {
9245 		printk("# WARNING: FUNCTION TRACING IS CORRUPTED\n");
9246 		printk("#          MAY BE MISSING FUNCTION EVENTS\n");
9247 	}
9248 
9249 	/*
9250 	 * We need to stop all tracing on all CPUS to read the
9251 	 * the next buffer. This is a bit expensive, but is
9252 	 * not done often. We fill all what we can read,
9253 	 * and then release the locks again.
9254 	 */
9255 
9256 	while (!trace_empty(&iter)) {
9257 
9258 		if (!cnt)
9259 			printk(KERN_TRACE "---------------------------------\n");
9260 
9261 		cnt++;
9262 
9263 		trace_iterator_reset(&iter);
9264 		iter.iter_flags |= TRACE_FILE_LAT_FMT;
9265 
9266 		if (trace_find_next_entry_inc(&iter) != NULL) {
9267 			int ret;
9268 
9269 			ret = print_trace_line(&iter);
9270 			if (ret != TRACE_TYPE_NO_CONSUME)
9271 				trace_consume(&iter);
9272 		}
9273 		touch_nmi_watchdog();
9274 
9275 		trace_printk_seq(&iter.seq);
9276 	}
9277 
9278 	if (!cnt)
9279 		printk(KERN_TRACE "   (ftrace buffer empty)\n");
9280 	else
9281 		printk(KERN_TRACE "---------------------------------\n");
9282 
9283  out_enable:
9284 	tr->trace_flags |= old_userobj;
9285 
9286 	for_each_tracing_cpu(cpu) {
9287 		atomic_dec(&per_cpu_ptr(iter.array_buffer->data, cpu)->disabled);
9288 	}
9289 	atomic_dec(&dump_running);
9290 	printk_nmi_direct_exit();
9291 	local_irq_restore(flags);
9292 }
9293 EXPORT_SYMBOL_GPL(ftrace_dump);
9294 
9295 int trace_run_command(const char *buf, int (*createfn)(int, char **))
9296 {
9297 	char **argv;
9298 	int argc, ret;
9299 
9300 	argc = 0;
9301 	ret = 0;
9302 	argv = argv_split(GFP_KERNEL, buf, &argc);
9303 	if (!argv)
9304 		return -ENOMEM;
9305 
9306 	if (argc)
9307 		ret = createfn(argc, argv);
9308 
9309 	argv_free(argv);
9310 
9311 	return ret;
9312 }
9313 
9314 #define WRITE_BUFSIZE  4096
9315 
9316 ssize_t trace_parse_run_command(struct file *file, const char __user *buffer,
9317 				size_t count, loff_t *ppos,
9318 				int (*createfn)(int, char **))
9319 {
9320 	char *kbuf, *buf, *tmp;
9321 	int ret = 0;
9322 	size_t done = 0;
9323 	size_t size;
9324 
9325 	kbuf = kmalloc(WRITE_BUFSIZE, GFP_KERNEL);
9326 	if (!kbuf)
9327 		return -ENOMEM;
9328 
9329 	while (done < count) {
9330 		size = count - done;
9331 
9332 		if (size >= WRITE_BUFSIZE)
9333 			size = WRITE_BUFSIZE - 1;
9334 
9335 		if (copy_from_user(kbuf, buffer + done, size)) {
9336 			ret = -EFAULT;
9337 			goto out;
9338 		}
9339 		kbuf[size] = '\0';
9340 		buf = kbuf;
9341 		do {
9342 			tmp = strchr(buf, '\n');
9343 			if (tmp) {
9344 				*tmp = '\0';
9345 				size = tmp - buf + 1;
9346 			} else {
9347 				size = strlen(buf);
9348 				if (done + size < count) {
9349 					if (buf != kbuf)
9350 						break;
9351 					/* This can accept WRITE_BUFSIZE - 2 ('\n' + '\0') */
9352 					pr_warn("Line length is too long: Should be less than %d\n",
9353 						WRITE_BUFSIZE - 2);
9354 					ret = -EINVAL;
9355 					goto out;
9356 				}
9357 			}
9358 			done += size;
9359 
9360 			/* Remove comments */
9361 			tmp = strchr(buf, '#');
9362 
9363 			if (tmp)
9364 				*tmp = '\0';
9365 
9366 			ret = trace_run_command(buf, createfn);
9367 			if (ret)
9368 				goto out;
9369 			buf += size;
9370 
9371 		} while (done < count);
9372 	}
9373 	ret = done;
9374 
9375 out:
9376 	kfree(kbuf);
9377 
9378 	return ret;
9379 }
9380 
9381 __init static int tracer_alloc_buffers(void)
9382 {
9383 	int ring_buf_size;
9384 	int ret = -ENOMEM;
9385 
9386 
9387 	if (security_locked_down(LOCKDOWN_TRACEFS)) {
9388 		pr_warn("Tracing disabled due to lockdown\n");
9389 		return -EPERM;
9390 	}
9391 
9392 	/*
9393 	 * Make sure we don't accidently add more trace options
9394 	 * than we have bits for.
9395 	 */
9396 	BUILD_BUG_ON(TRACE_ITER_LAST_BIT > TRACE_FLAGS_MAX_SIZE);
9397 
9398 	if (!alloc_cpumask_var(&tracing_buffer_mask, GFP_KERNEL))
9399 		goto out;
9400 
9401 	if (!alloc_cpumask_var(&global_trace.tracing_cpumask, GFP_KERNEL))
9402 		goto out_free_buffer_mask;
9403 
9404 	/* Only allocate trace_printk buffers if a trace_printk exists */
9405 	if (&__stop___trace_bprintk_fmt != &__start___trace_bprintk_fmt)
9406 		/* Must be called before global_trace.buffer is allocated */
9407 		trace_printk_init_buffers();
9408 
9409 	/* To save memory, keep the ring buffer size to its minimum */
9410 	if (ring_buffer_expanded)
9411 		ring_buf_size = trace_buf_size;
9412 	else
9413 		ring_buf_size = 1;
9414 
9415 	cpumask_copy(tracing_buffer_mask, cpu_possible_mask);
9416 	cpumask_copy(global_trace.tracing_cpumask, cpu_all_mask);
9417 
9418 	raw_spin_lock_init(&global_trace.start_lock);
9419 
9420 	/*
9421 	 * The prepare callbacks allocates some memory for the ring buffer. We
9422 	 * don't free the buffer if the if the CPU goes down. If we were to free
9423 	 * the buffer, then the user would lose any trace that was in the
9424 	 * buffer. The memory will be removed once the "instance" is removed.
9425 	 */
9426 	ret = cpuhp_setup_state_multi(CPUHP_TRACE_RB_PREPARE,
9427 				      "trace/RB:preapre", trace_rb_cpu_prepare,
9428 				      NULL);
9429 	if (ret < 0)
9430 		goto out_free_cpumask;
9431 	/* Used for event triggers */
9432 	ret = -ENOMEM;
9433 	temp_buffer = ring_buffer_alloc(PAGE_SIZE, RB_FL_OVERWRITE);
9434 	if (!temp_buffer)
9435 		goto out_rm_hp_state;
9436 
9437 	if (trace_create_savedcmd() < 0)
9438 		goto out_free_temp_buffer;
9439 
9440 	/* TODO: make the number of buffers hot pluggable with CPUS */
9441 	if (allocate_trace_buffers(&global_trace, ring_buf_size) < 0) {
9442 		MEM_FAIL(1, "tracer: failed to allocate ring buffer!\n");
9443 		goto out_free_savedcmd;
9444 	}
9445 
9446 	if (global_trace.buffer_disabled)
9447 		tracing_off();
9448 
9449 	if (trace_boot_clock) {
9450 		ret = tracing_set_clock(&global_trace, trace_boot_clock);
9451 		if (ret < 0)
9452 			pr_warn("Trace clock %s not defined, going back to default\n",
9453 				trace_boot_clock);
9454 	}
9455 
9456 	/*
9457 	 * register_tracer() might reference current_trace, so it
9458 	 * needs to be set before we register anything. This is
9459 	 * just a bootstrap of current_trace anyway.
9460 	 */
9461 	global_trace.current_trace = &nop_trace;
9462 
9463 	global_trace.max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
9464 
9465 	ftrace_init_global_array_ops(&global_trace);
9466 
9467 	init_trace_flags_index(&global_trace);
9468 
9469 	register_tracer(&nop_trace);
9470 
9471 	/* Function tracing may start here (via kernel command line) */
9472 	init_function_trace();
9473 
9474 	/* All seems OK, enable tracing */
9475 	tracing_disabled = 0;
9476 
9477 	atomic_notifier_chain_register(&panic_notifier_list,
9478 				       &trace_panic_notifier);
9479 
9480 	register_die_notifier(&trace_die_notifier);
9481 
9482 	global_trace.flags = TRACE_ARRAY_FL_GLOBAL;
9483 
9484 	INIT_LIST_HEAD(&global_trace.systems);
9485 	INIT_LIST_HEAD(&global_trace.events);
9486 	INIT_LIST_HEAD(&global_trace.hist_vars);
9487 	INIT_LIST_HEAD(&global_trace.err_log);
9488 	list_add(&global_trace.list, &ftrace_trace_arrays);
9489 
9490 	apply_trace_boot_options();
9491 
9492 	register_snapshot_cmd();
9493 
9494 	return 0;
9495 
9496 out_free_savedcmd:
9497 	free_saved_cmdlines_buffer(savedcmd);
9498 out_free_temp_buffer:
9499 	ring_buffer_free(temp_buffer);
9500 out_rm_hp_state:
9501 	cpuhp_remove_multi_state(CPUHP_TRACE_RB_PREPARE);
9502 out_free_cpumask:
9503 	free_cpumask_var(global_trace.tracing_cpumask);
9504 out_free_buffer_mask:
9505 	free_cpumask_var(tracing_buffer_mask);
9506 out:
9507 	return ret;
9508 }
9509 
9510 void __init early_trace_init(void)
9511 {
9512 	if (tracepoint_printk) {
9513 		tracepoint_print_iter =
9514 			kmalloc(sizeof(*tracepoint_print_iter), GFP_KERNEL);
9515 		if (MEM_FAIL(!tracepoint_print_iter,
9516 			     "Failed to allocate trace iterator\n"))
9517 			tracepoint_printk = 0;
9518 		else
9519 			static_key_enable(&tracepoint_printk_key.key);
9520 	}
9521 	tracer_alloc_buffers();
9522 }
9523 
9524 void __init trace_init(void)
9525 {
9526 	trace_event_init();
9527 }
9528 
9529 __init static int clear_boot_tracer(void)
9530 {
9531 	/*
9532 	 * The default tracer at boot buffer is an init section.
9533 	 * This function is called in lateinit. If we did not
9534 	 * find the boot tracer, then clear it out, to prevent
9535 	 * later registration from accessing the buffer that is
9536 	 * about to be freed.
9537 	 */
9538 	if (!default_bootup_tracer)
9539 		return 0;
9540 
9541 	printk(KERN_INFO "ftrace bootup tracer '%s' not registered.\n",
9542 	       default_bootup_tracer);
9543 	default_bootup_tracer = NULL;
9544 
9545 	return 0;
9546 }
9547 
9548 fs_initcall(tracer_init_tracefs);
9549 late_initcall_sync(clear_boot_tracer);
9550 
9551 #ifdef CONFIG_HAVE_UNSTABLE_SCHED_CLOCK
9552 __init static int tracing_set_default_clock(void)
9553 {
9554 	/* sched_clock_stable() is determined in late_initcall */
9555 	if (!trace_boot_clock && !sched_clock_stable()) {
9556 		if (security_locked_down(LOCKDOWN_TRACEFS)) {
9557 			pr_warn("Can not set tracing clock due to lockdown\n");
9558 			return -EPERM;
9559 		}
9560 
9561 		printk(KERN_WARNING
9562 		       "Unstable clock detected, switching default tracing clock to \"global\"\n"
9563 		       "If you want to keep using the local clock, then add:\n"
9564 		       "  \"trace_clock=local\"\n"
9565 		       "on the kernel command line\n");
9566 		tracing_set_clock(&global_trace, "global");
9567 	}
9568 
9569 	return 0;
9570 }
9571 late_initcall_sync(tracing_set_default_clock);
9572 #endif
9573