xref: /linux/kernel/trace/trace.c (revision cea0f76a483d1270ac6f6513964e3e75193dda48)
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * ring buffer based function tracer
4  *
5  * Copyright (C) 2007-2012 Steven Rostedt <srostedt@redhat.com>
6  * Copyright (C) 2008 Ingo Molnar <mingo@redhat.com>
7  *
8  * Originally taken from the RT patch by:
9  *    Arnaldo Carvalho de Melo <acme@redhat.com>
10  *
11  * Based on code from the latency_tracer, that is:
12  *  Copyright (C) 2004-2006 Ingo Molnar
13  *  Copyright (C) 2004 Nadia Yvette Chambers
14  */
15 #include <linux/ring_buffer.h>
16 #include <generated/utsrelease.h>
17 #include <linux/stacktrace.h>
18 #include <linux/writeback.h>
19 #include <linux/kallsyms.h>
20 #include <linux/security.h>
21 #include <linux/seq_file.h>
22 #include <linux/notifier.h>
23 #include <linux/irqflags.h>
24 #include <linux/debugfs.h>
25 #include <linux/tracefs.h>
26 #include <linux/pagemap.h>
27 #include <linux/hardirq.h>
28 #include <linux/linkage.h>
29 #include <linux/uaccess.h>
30 #include <linux/vmalloc.h>
31 #include <linux/ftrace.h>
32 #include <linux/module.h>
33 #include <linux/percpu.h>
34 #include <linux/splice.h>
35 #include <linux/kdebug.h>
36 #include <linux/string.h>
37 #include <linux/mount.h>
38 #include <linux/rwsem.h>
39 #include <linux/slab.h>
40 #include <linux/ctype.h>
41 #include <linux/init.h>
42 #include <linux/poll.h>
43 #include <linux/nmi.h>
44 #include <linux/fs.h>
45 #include <linux/trace.h>
46 #include <linux/sched/clock.h>
47 #include <linux/sched/rt.h>
48 #include <linux/fsnotify.h>
49 #include <linux/irq_work.h>
50 #include <linux/workqueue.h>
51 
52 #include "trace.h"
53 #include "trace_output.h"
54 
55 /*
56  * On boot up, the ring buffer is set to the minimum size, so that
57  * we do not waste memory on systems that are not using tracing.
58  */
59 bool ring_buffer_expanded;
60 
61 /*
62  * We need to change this state when a selftest is running.
63  * A selftest will lurk into the ring-buffer to count the
64  * entries inserted during the selftest although some concurrent
65  * insertions into the ring-buffer such as trace_printk could occurred
66  * at the same time, giving false positive or negative results.
67  */
68 static bool __read_mostly tracing_selftest_running;
69 
70 /*
71  * If a tracer is running, we do not want to run SELFTEST.
72  */
73 bool __read_mostly tracing_selftest_disabled;
74 
75 /* Pipe tracepoints to printk */
76 struct trace_iterator *tracepoint_print_iter;
77 int tracepoint_printk;
78 static DEFINE_STATIC_KEY_FALSE(tracepoint_printk_key);
79 
80 /* For tracers that don't implement custom flags */
81 static struct tracer_opt dummy_tracer_opt[] = {
82 	{ }
83 };
84 
85 static int
86 dummy_set_flag(struct trace_array *tr, u32 old_flags, u32 bit, int set)
87 {
88 	return 0;
89 }
90 
91 /*
92  * To prevent the comm cache from being overwritten when no
93  * tracing is active, only save the comm when a trace event
94  * occurred.
95  */
96 static DEFINE_PER_CPU(bool, trace_taskinfo_save);
97 
98 /*
99  * Kill all tracing for good (never come back).
100  * It is initialized to 1 but will turn to zero if the initialization
101  * of the tracer is successful. But that is the only place that sets
102  * this back to zero.
103  */
104 static int tracing_disabled = 1;
105 
106 cpumask_var_t __read_mostly	tracing_buffer_mask;
107 
108 /*
109  * ftrace_dump_on_oops - variable to dump ftrace buffer on oops
110  *
111  * If there is an oops (or kernel panic) and the ftrace_dump_on_oops
112  * is set, then ftrace_dump is called. This will output the contents
113  * of the ftrace buffers to the console.  This is very useful for
114  * capturing traces that lead to crashes and outputing it to a
115  * serial console.
116  *
117  * It is default off, but you can enable it with either specifying
118  * "ftrace_dump_on_oops" in the kernel command line, or setting
119  * /proc/sys/kernel/ftrace_dump_on_oops
120  * Set 1 if you want to dump buffers of all CPUs
121  * Set 2 if you want to dump the buffer of the CPU that triggered oops
122  */
123 
124 enum ftrace_dump_mode ftrace_dump_on_oops;
125 
126 /* When set, tracing will stop when a WARN*() is hit */
127 int __disable_trace_on_warning;
128 
129 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
130 /* Map of enums to their values, for "eval_map" file */
131 struct trace_eval_map_head {
132 	struct module			*mod;
133 	unsigned long			length;
134 };
135 
136 union trace_eval_map_item;
137 
138 struct trace_eval_map_tail {
139 	/*
140 	 * "end" is first and points to NULL as it must be different
141 	 * than "mod" or "eval_string"
142 	 */
143 	union trace_eval_map_item	*next;
144 	const char			*end;	/* points to NULL */
145 };
146 
147 static DEFINE_MUTEX(trace_eval_mutex);
148 
149 /*
150  * The trace_eval_maps are saved in an array with two extra elements,
151  * one at the beginning, and one at the end. The beginning item contains
152  * the count of the saved maps (head.length), and the module they
153  * belong to if not built in (head.mod). The ending item contains a
154  * pointer to the next array of saved eval_map items.
155  */
156 union trace_eval_map_item {
157 	struct trace_eval_map		map;
158 	struct trace_eval_map_head	head;
159 	struct trace_eval_map_tail	tail;
160 };
161 
162 static union trace_eval_map_item *trace_eval_maps;
163 #endif /* CONFIG_TRACE_EVAL_MAP_FILE */
164 
165 int tracing_set_tracer(struct trace_array *tr, const char *buf);
166 static void ftrace_trace_userstack(struct trace_buffer *buffer,
167 				   unsigned long flags, int pc);
168 
169 #define MAX_TRACER_SIZE		100
170 static char bootup_tracer_buf[MAX_TRACER_SIZE] __initdata;
171 static char *default_bootup_tracer;
172 
173 static bool allocate_snapshot;
174 
175 static int __init set_cmdline_ftrace(char *str)
176 {
177 	strlcpy(bootup_tracer_buf, str, MAX_TRACER_SIZE);
178 	default_bootup_tracer = bootup_tracer_buf;
179 	/* We are using ftrace early, expand it */
180 	ring_buffer_expanded = true;
181 	return 1;
182 }
183 __setup("ftrace=", set_cmdline_ftrace);
184 
185 static int __init set_ftrace_dump_on_oops(char *str)
186 {
187 	if (*str++ != '=' || !*str) {
188 		ftrace_dump_on_oops = DUMP_ALL;
189 		return 1;
190 	}
191 
192 	if (!strcmp("orig_cpu", str)) {
193 		ftrace_dump_on_oops = DUMP_ORIG;
194                 return 1;
195         }
196 
197         return 0;
198 }
199 __setup("ftrace_dump_on_oops", set_ftrace_dump_on_oops);
200 
201 static int __init stop_trace_on_warning(char *str)
202 {
203 	if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
204 		__disable_trace_on_warning = 1;
205 	return 1;
206 }
207 __setup("traceoff_on_warning", stop_trace_on_warning);
208 
209 static int __init boot_alloc_snapshot(char *str)
210 {
211 	allocate_snapshot = true;
212 	/* We also need the main ring buffer expanded */
213 	ring_buffer_expanded = true;
214 	return 1;
215 }
216 __setup("alloc_snapshot", boot_alloc_snapshot);
217 
218 
219 static char trace_boot_options_buf[MAX_TRACER_SIZE] __initdata;
220 
221 static int __init set_trace_boot_options(char *str)
222 {
223 	strlcpy(trace_boot_options_buf, str, MAX_TRACER_SIZE);
224 	return 0;
225 }
226 __setup("trace_options=", set_trace_boot_options);
227 
228 static char trace_boot_clock_buf[MAX_TRACER_SIZE] __initdata;
229 static char *trace_boot_clock __initdata;
230 
231 static int __init set_trace_boot_clock(char *str)
232 {
233 	strlcpy(trace_boot_clock_buf, str, MAX_TRACER_SIZE);
234 	trace_boot_clock = trace_boot_clock_buf;
235 	return 0;
236 }
237 __setup("trace_clock=", set_trace_boot_clock);
238 
239 static int __init set_tracepoint_printk(char *str)
240 {
241 	if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
242 		tracepoint_printk = 1;
243 	return 1;
244 }
245 __setup("tp_printk", set_tracepoint_printk);
246 
247 unsigned long long ns2usecs(u64 nsec)
248 {
249 	nsec += 500;
250 	do_div(nsec, 1000);
251 	return nsec;
252 }
253 
254 /* trace_flags holds trace_options default values */
255 #define TRACE_DEFAULT_FLAGS						\
256 	(FUNCTION_DEFAULT_FLAGS |					\
257 	 TRACE_ITER_PRINT_PARENT | TRACE_ITER_PRINTK |			\
258 	 TRACE_ITER_ANNOTATE | TRACE_ITER_CONTEXT_INFO |		\
259 	 TRACE_ITER_RECORD_CMD | TRACE_ITER_OVERWRITE |			\
260 	 TRACE_ITER_IRQ_INFO | TRACE_ITER_MARKERS)
261 
262 /* trace_options that are only supported by global_trace */
263 #define TOP_LEVEL_TRACE_FLAGS (TRACE_ITER_PRINTK |			\
264 	       TRACE_ITER_PRINTK_MSGONLY | TRACE_ITER_RECORD_CMD)
265 
266 /* trace_flags that are default zero for instances */
267 #define ZEROED_TRACE_FLAGS \
268 	(TRACE_ITER_EVENT_FORK | TRACE_ITER_FUNC_FORK)
269 
270 /*
271  * The global_trace is the descriptor that holds the top-level tracing
272  * buffers for the live tracing.
273  */
274 static struct trace_array global_trace = {
275 	.trace_flags = TRACE_DEFAULT_FLAGS,
276 };
277 
278 LIST_HEAD(ftrace_trace_arrays);
279 
280 int trace_array_get(struct trace_array *this_tr)
281 {
282 	struct trace_array *tr;
283 	int ret = -ENODEV;
284 
285 	mutex_lock(&trace_types_lock);
286 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
287 		if (tr == this_tr) {
288 			tr->ref++;
289 			ret = 0;
290 			break;
291 		}
292 	}
293 	mutex_unlock(&trace_types_lock);
294 
295 	return ret;
296 }
297 
298 static void __trace_array_put(struct trace_array *this_tr)
299 {
300 	WARN_ON(!this_tr->ref);
301 	this_tr->ref--;
302 }
303 
304 /**
305  * trace_array_put - Decrement the reference counter for this trace array.
306  *
307  * NOTE: Use this when we no longer need the trace array returned by
308  * trace_array_get_by_name(). This ensures the trace array can be later
309  * destroyed.
310  *
311  */
312 void trace_array_put(struct trace_array *this_tr)
313 {
314 	if (!this_tr)
315 		return;
316 
317 	mutex_lock(&trace_types_lock);
318 	__trace_array_put(this_tr);
319 	mutex_unlock(&trace_types_lock);
320 }
321 EXPORT_SYMBOL_GPL(trace_array_put);
322 
323 int tracing_check_open_get_tr(struct trace_array *tr)
324 {
325 	int ret;
326 
327 	ret = security_locked_down(LOCKDOWN_TRACEFS);
328 	if (ret)
329 		return ret;
330 
331 	if (tracing_disabled)
332 		return -ENODEV;
333 
334 	if (tr && trace_array_get(tr) < 0)
335 		return -ENODEV;
336 
337 	return 0;
338 }
339 
340 int call_filter_check_discard(struct trace_event_call *call, void *rec,
341 			      struct trace_buffer *buffer,
342 			      struct ring_buffer_event *event)
343 {
344 	if (unlikely(call->flags & TRACE_EVENT_FL_FILTERED) &&
345 	    !filter_match_preds(call->filter, rec)) {
346 		__trace_event_discard_commit(buffer, event);
347 		return 1;
348 	}
349 
350 	return 0;
351 }
352 
353 void trace_free_pid_list(struct trace_pid_list *pid_list)
354 {
355 	vfree(pid_list->pids);
356 	kfree(pid_list);
357 }
358 
359 /**
360  * trace_find_filtered_pid - check if a pid exists in a filtered_pid list
361  * @filtered_pids: The list of pids to check
362  * @search_pid: The PID to find in @filtered_pids
363  *
364  * Returns true if @search_pid is fonud in @filtered_pids, and false otherwis.
365  */
366 bool
367 trace_find_filtered_pid(struct trace_pid_list *filtered_pids, pid_t search_pid)
368 {
369 	/*
370 	 * If pid_max changed after filtered_pids was created, we
371 	 * by default ignore all pids greater than the previous pid_max.
372 	 */
373 	if (search_pid >= filtered_pids->pid_max)
374 		return false;
375 
376 	return test_bit(search_pid, filtered_pids->pids);
377 }
378 
379 /**
380  * trace_ignore_this_task - should a task be ignored for tracing
381  * @filtered_pids: The list of pids to check
382  * @task: The task that should be ignored if not filtered
383  *
384  * Checks if @task should be traced or not from @filtered_pids.
385  * Returns true if @task should *NOT* be traced.
386  * Returns false if @task should be traced.
387  */
388 bool
389 trace_ignore_this_task(struct trace_pid_list *filtered_pids,
390 		       struct trace_pid_list *filtered_no_pids,
391 		       struct task_struct *task)
392 {
393 	/*
394 	 * If filterd_no_pids is not empty, and the task's pid is listed
395 	 * in filtered_no_pids, then return true.
396 	 * Otherwise, if filtered_pids is empty, that means we can
397 	 * trace all tasks. If it has content, then only trace pids
398 	 * within filtered_pids.
399 	 */
400 
401 	return (filtered_pids &&
402 		!trace_find_filtered_pid(filtered_pids, task->pid)) ||
403 		(filtered_no_pids &&
404 		 trace_find_filtered_pid(filtered_no_pids, task->pid));
405 }
406 
407 /**
408  * trace_filter_add_remove_task - Add or remove a task from a pid_list
409  * @pid_list: The list to modify
410  * @self: The current task for fork or NULL for exit
411  * @task: The task to add or remove
412  *
413  * If adding a task, if @self is defined, the task is only added if @self
414  * is also included in @pid_list. This happens on fork and tasks should
415  * only be added when the parent is listed. If @self is NULL, then the
416  * @task pid will be removed from the list, which would happen on exit
417  * of a task.
418  */
419 void trace_filter_add_remove_task(struct trace_pid_list *pid_list,
420 				  struct task_struct *self,
421 				  struct task_struct *task)
422 {
423 	if (!pid_list)
424 		return;
425 
426 	/* For forks, we only add if the forking task is listed */
427 	if (self) {
428 		if (!trace_find_filtered_pid(pid_list, self->pid))
429 			return;
430 	}
431 
432 	/* Sorry, but we don't support pid_max changing after setting */
433 	if (task->pid >= pid_list->pid_max)
434 		return;
435 
436 	/* "self" is set for forks, and NULL for exits */
437 	if (self)
438 		set_bit(task->pid, pid_list->pids);
439 	else
440 		clear_bit(task->pid, pid_list->pids);
441 }
442 
443 /**
444  * trace_pid_next - Used for seq_file to get to the next pid of a pid_list
445  * @pid_list: The pid list to show
446  * @v: The last pid that was shown (+1 the actual pid to let zero be displayed)
447  * @pos: The position of the file
448  *
449  * This is used by the seq_file "next" operation to iterate the pids
450  * listed in a trace_pid_list structure.
451  *
452  * Returns the pid+1 as we want to display pid of zero, but NULL would
453  * stop the iteration.
454  */
455 void *trace_pid_next(struct trace_pid_list *pid_list, void *v, loff_t *pos)
456 {
457 	unsigned long pid = (unsigned long)v;
458 
459 	(*pos)++;
460 
461 	/* pid already is +1 of the actual prevous bit */
462 	pid = find_next_bit(pid_list->pids, pid_list->pid_max, pid);
463 
464 	/* Return pid + 1 to allow zero to be represented */
465 	if (pid < pid_list->pid_max)
466 		return (void *)(pid + 1);
467 
468 	return NULL;
469 }
470 
471 /**
472  * trace_pid_start - Used for seq_file to start reading pid lists
473  * @pid_list: The pid list to show
474  * @pos: The position of the file
475  *
476  * This is used by seq_file "start" operation to start the iteration
477  * of listing pids.
478  *
479  * Returns the pid+1 as we want to display pid of zero, but NULL would
480  * stop the iteration.
481  */
482 void *trace_pid_start(struct trace_pid_list *pid_list, loff_t *pos)
483 {
484 	unsigned long pid;
485 	loff_t l = 0;
486 
487 	pid = find_first_bit(pid_list->pids, pid_list->pid_max);
488 	if (pid >= pid_list->pid_max)
489 		return NULL;
490 
491 	/* Return pid + 1 so that zero can be the exit value */
492 	for (pid++; pid && l < *pos;
493 	     pid = (unsigned long)trace_pid_next(pid_list, (void *)pid, &l))
494 		;
495 	return (void *)pid;
496 }
497 
498 /**
499  * trace_pid_show - show the current pid in seq_file processing
500  * @m: The seq_file structure to write into
501  * @v: A void pointer of the pid (+1) value to display
502  *
503  * Can be directly used by seq_file operations to display the current
504  * pid value.
505  */
506 int trace_pid_show(struct seq_file *m, void *v)
507 {
508 	unsigned long pid = (unsigned long)v - 1;
509 
510 	seq_printf(m, "%lu\n", pid);
511 	return 0;
512 }
513 
514 /* 128 should be much more than enough */
515 #define PID_BUF_SIZE		127
516 
517 int trace_pid_write(struct trace_pid_list *filtered_pids,
518 		    struct trace_pid_list **new_pid_list,
519 		    const char __user *ubuf, size_t cnt)
520 {
521 	struct trace_pid_list *pid_list;
522 	struct trace_parser parser;
523 	unsigned long val;
524 	int nr_pids = 0;
525 	ssize_t read = 0;
526 	ssize_t ret = 0;
527 	loff_t pos;
528 	pid_t pid;
529 
530 	if (trace_parser_get_init(&parser, PID_BUF_SIZE + 1))
531 		return -ENOMEM;
532 
533 	/*
534 	 * Always recreate a new array. The write is an all or nothing
535 	 * operation. Always create a new array when adding new pids by
536 	 * the user. If the operation fails, then the current list is
537 	 * not modified.
538 	 */
539 	pid_list = kmalloc(sizeof(*pid_list), GFP_KERNEL);
540 	if (!pid_list) {
541 		trace_parser_put(&parser);
542 		return -ENOMEM;
543 	}
544 
545 	pid_list->pid_max = READ_ONCE(pid_max);
546 
547 	/* Only truncating will shrink pid_max */
548 	if (filtered_pids && filtered_pids->pid_max > pid_list->pid_max)
549 		pid_list->pid_max = filtered_pids->pid_max;
550 
551 	pid_list->pids = vzalloc((pid_list->pid_max + 7) >> 3);
552 	if (!pid_list->pids) {
553 		trace_parser_put(&parser);
554 		kfree(pid_list);
555 		return -ENOMEM;
556 	}
557 
558 	if (filtered_pids) {
559 		/* copy the current bits to the new max */
560 		for_each_set_bit(pid, filtered_pids->pids,
561 				 filtered_pids->pid_max) {
562 			set_bit(pid, pid_list->pids);
563 			nr_pids++;
564 		}
565 	}
566 
567 	while (cnt > 0) {
568 
569 		pos = 0;
570 
571 		ret = trace_get_user(&parser, ubuf, cnt, &pos);
572 		if (ret < 0 || !trace_parser_loaded(&parser))
573 			break;
574 
575 		read += ret;
576 		ubuf += ret;
577 		cnt -= ret;
578 
579 		ret = -EINVAL;
580 		if (kstrtoul(parser.buffer, 0, &val))
581 			break;
582 		if (val >= pid_list->pid_max)
583 			break;
584 
585 		pid = (pid_t)val;
586 
587 		set_bit(pid, pid_list->pids);
588 		nr_pids++;
589 
590 		trace_parser_clear(&parser);
591 		ret = 0;
592 	}
593 	trace_parser_put(&parser);
594 
595 	if (ret < 0) {
596 		trace_free_pid_list(pid_list);
597 		return ret;
598 	}
599 
600 	if (!nr_pids) {
601 		/* Cleared the list of pids */
602 		trace_free_pid_list(pid_list);
603 		read = ret;
604 		pid_list = NULL;
605 	}
606 
607 	*new_pid_list = pid_list;
608 
609 	return read;
610 }
611 
612 static u64 buffer_ftrace_now(struct array_buffer *buf, int cpu)
613 {
614 	u64 ts;
615 
616 	/* Early boot up does not have a buffer yet */
617 	if (!buf->buffer)
618 		return trace_clock_local();
619 
620 	ts = ring_buffer_time_stamp(buf->buffer, cpu);
621 	ring_buffer_normalize_time_stamp(buf->buffer, cpu, &ts);
622 
623 	return ts;
624 }
625 
626 u64 ftrace_now(int cpu)
627 {
628 	return buffer_ftrace_now(&global_trace.array_buffer, cpu);
629 }
630 
631 /**
632  * tracing_is_enabled - Show if global_trace has been disabled
633  *
634  * Shows if the global trace has been enabled or not. It uses the
635  * mirror flag "buffer_disabled" to be used in fast paths such as for
636  * the irqsoff tracer. But it may be inaccurate due to races. If you
637  * need to know the accurate state, use tracing_is_on() which is a little
638  * slower, but accurate.
639  */
640 int tracing_is_enabled(void)
641 {
642 	/*
643 	 * For quick access (irqsoff uses this in fast path), just
644 	 * return the mirror variable of the state of the ring buffer.
645 	 * It's a little racy, but we don't really care.
646 	 */
647 	smp_rmb();
648 	return !global_trace.buffer_disabled;
649 }
650 
651 /*
652  * trace_buf_size is the size in bytes that is allocated
653  * for a buffer. Note, the number of bytes is always rounded
654  * to page size.
655  *
656  * This number is purposely set to a low number of 16384.
657  * If the dump on oops happens, it will be much appreciated
658  * to not have to wait for all that output. Anyway this can be
659  * boot time and run time configurable.
660  */
661 #define TRACE_BUF_SIZE_DEFAULT	1441792UL /* 16384 * 88 (sizeof(entry)) */
662 
663 static unsigned long		trace_buf_size = TRACE_BUF_SIZE_DEFAULT;
664 
665 /* trace_types holds a link list of available tracers. */
666 static struct tracer		*trace_types __read_mostly;
667 
668 /*
669  * trace_types_lock is used to protect the trace_types list.
670  */
671 DEFINE_MUTEX(trace_types_lock);
672 
673 /*
674  * serialize the access of the ring buffer
675  *
676  * ring buffer serializes readers, but it is low level protection.
677  * The validity of the events (which returns by ring_buffer_peek() ..etc)
678  * are not protected by ring buffer.
679  *
680  * The content of events may become garbage if we allow other process consumes
681  * these events concurrently:
682  *   A) the page of the consumed events may become a normal page
683  *      (not reader page) in ring buffer, and this page will be rewrited
684  *      by events producer.
685  *   B) The page of the consumed events may become a page for splice_read,
686  *      and this page will be returned to system.
687  *
688  * These primitives allow multi process access to different cpu ring buffer
689  * concurrently.
690  *
691  * These primitives don't distinguish read-only and read-consume access.
692  * Multi read-only access are also serialized.
693  */
694 
695 #ifdef CONFIG_SMP
696 static DECLARE_RWSEM(all_cpu_access_lock);
697 static DEFINE_PER_CPU(struct mutex, cpu_access_lock);
698 
699 static inline void trace_access_lock(int cpu)
700 {
701 	if (cpu == RING_BUFFER_ALL_CPUS) {
702 		/* gain it for accessing the whole ring buffer. */
703 		down_write(&all_cpu_access_lock);
704 	} else {
705 		/* gain it for accessing a cpu ring buffer. */
706 
707 		/* Firstly block other trace_access_lock(RING_BUFFER_ALL_CPUS). */
708 		down_read(&all_cpu_access_lock);
709 
710 		/* Secondly block other access to this @cpu ring buffer. */
711 		mutex_lock(&per_cpu(cpu_access_lock, cpu));
712 	}
713 }
714 
715 static inline void trace_access_unlock(int cpu)
716 {
717 	if (cpu == RING_BUFFER_ALL_CPUS) {
718 		up_write(&all_cpu_access_lock);
719 	} else {
720 		mutex_unlock(&per_cpu(cpu_access_lock, cpu));
721 		up_read(&all_cpu_access_lock);
722 	}
723 }
724 
725 static inline void trace_access_lock_init(void)
726 {
727 	int cpu;
728 
729 	for_each_possible_cpu(cpu)
730 		mutex_init(&per_cpu(cpu_access_lock, cpu));
731 }
732 
733 #else
734 
735 static DEFINE_MUTEX(access_lock);
736 
737 static inline void trace_access_lock(int cpu)
738 {
739 	(void)cpu;
740 	mutex_lock(&access_lock);
741 }
742 
743 static inline void trace_access_unlock(int cpu)
744 {
745 	(void)cpu;
746 	mutex_unlock(&access_lock);
747 }
748 
749 static inline void trace_access_lock_init(void)
750 {
751 }
752 
753 #endif
754 
755 #ifdef CONFIG_STACKTRACE
756 static void __ftrace_trace_stack(struct trace_buffer *buffer,
757 				 unsigned long flags,
758 				 int skip, int pc, struct pt_regs *regs);
759 static inline void ftrace_trace_stack(struct trace_array *tr,
760 				      struct trace_buffer *buffer,
761 				      unsigned long flags,
762 				      int skip, int pc, struct pt_regs *regs);
763 
764 #else
765 static inline void __ftrace_trace_stack(struct trace_buffer *buffer,
766 					unsigned long flags,
767 					int skip, int pc, struct pt_regs *regs)
768 {
769 }
770 static inline void ftrace_trace_stack(struct trace_array *tr,
771 				      struct trace_buffer *buffer,
772 				      unsigned long flags,
773 				      int skip, int pc, struct pt_regs *regs)
774 {
775 }
776 
777 #endif
778 
779 static __always_inline void
780 trace_event_setup(struct ring_buffer_event *event,
781 		  int type, unsigned long flags, int pc)
782 {
783 	struct trace_entry *ent = ring_buffer_event_data(event);
784 
785 	tracing_generic_entry_update(ent, type, flags, pc);
786 }
787 
788 static __always_inline struct ring_buffer_event *
789 __trace_buffer_lock_reserve(struct trace_buffer *buffer,
790 			  int type,
791 			  unsigned long len,
792 			  unsigned long flags, int pc)
793 {
794 	struct ring_buffer_event *event;
795 
796 	event = ring_buffer_lock_reserve(buffer, len);
797 	if (event != NULL)
798 		trace_event_setup(event, type, flags, pc);
799 
800 	return event;
801 }
802 
803 void tracer_tracing_on(struct trace_array *tr)
804 {
805 	if (tr->array_buffer.buffer)
806 		ring_buffer_record_on(tr->array_buffer.buffer);
807 	/*
808 	 * This flag is looked at when buffers haven't been allocated
809 	 * yet, or by some tracers (like irqsoff), that just want to
810 	 * know if the ring buffer has been disabled, but it can handle
811 	 * races of where it gets disabled but we still do a record.
812 	 * As the check is in the fast path of the tracers, it is more
813 	 * important to be fast than accurate.
814 	 */
815 	tr->buffer_disabled = 0;
816 	/* Make the flag seen by readers */
817 	smp_wmb();
818 }
819 
820 /**
821  * tracing_on - enable tracing buffers
822  *
823  * This function enables tracing buffers that may have been
824  * disabled with tracing_off.
825  */
826 void tracing_on(void)
827 {
828 	tracer_tracing_on(&global_trace);
829 }
830 EXPORT_SYMBOL_GPL(tracing_on);
831 
832 
833 static __always_inline void
834 __buffer_unlock_commit(struct trace_buffer *buffer, struct ring_buffer_event *event)
835 {
836 	__this_cpu_write(trace_taskinfo_save, true);
837 
838 	/* If this is the temp buffer, we need to commit fully */
839 	if (this_cpu_read(trace_buffered_event) == event) {
840 		/* Length is in event->array[0] */
841 		ring_buffer_write(buffer, event->array[0], &event->array[1]);
842 		/* Release the temp buffer */
843 		this_cpu_dec(trace_buffered_event_cnt);
844 	} else
845 		ring_buffer_unlock_commit(buffer, event);
846 }
847 
848 /**
849  * __trace_puts - write a constant string into the trace buffer.
850  * @ip:	   The address of the caller
851  * @str:   The constant string to write
852  * @size:  The size of the string.
853  */
854 int __trace_puts(unsigned long ip, const char *str, int size)
855 {
856 	struct ring_buffer_event *event;
857 	struct trace_buffer *buffer;
858 	struct print_entry *entry;
859 	unsigned long irq_flags;
860 	int alloc;
861 	int pc;
862 
863 	if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
864 		return 0;
865 
866 	pc = preempt_count();
867 
868 	if (unlikely(tracing_selftest_running || tracing_disabled))
869 		return 0;
870 
871 	alloc = sizeof(*entry) + size + 2; /* possible \n added */
872 
873 	local_save_flags(irq_flags);
874 	buffer = global_trace.array_buffer.buffer;
875 	ring_buffer_nest_start(buffer);
876 	event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, alloc,
877 					    irq_flags, pc);
878 	if (!event) {
879 		size = 0;
880 		goto out;
881 	}
882 
883 	entry = ring_buffer_event_data(event);
884 	entry->ip = ip;
885 
886 	memcpy(&entry->buf, str, size);
887 
888 	/* Add a newline if necessary */
889 	if (entry->buf[size - 1] != '\n') {
890 		entry->buf[size] = '\n';
891 		entry->buf[size + 1] = '\0';
892 	} else
893 		entry->buf[size] = '\0';
894 
895 	__buffer_unlock_commit(buffer, event);
896 	ftrace_trace_stack(&global_trace, buffer, irq_flags, 4, pc, NULL);
897  out:
898 	ring_buffer_nest_end(buffer);
899 	return size;
900 }
901 EXPORT_SYMBOL_GPL(__trace_puts);
902 
903 /**
904  * __trace_bputs - write the pointer to a constant string into trace buffer
905  * @ip:	   The address of the caller
906  * @str:   The constant string to write to the buffer to
907  */
908 int __trace_bputs(unsigned long ip, const char *str)
909 {
910 	struct ring_buffer_event *event;
911 	struct trace_buffer *buffer;
912 	struct bputs_entry *entry;
913 	unsigned long irq_flags;
914 	int size = sizeof(struct bputs_entry);
915 	int ret = 0;
916 	int pc;
917 
918 	if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
919 		return 0;
920 
921 	pc = preempt_count();
922 
923 	if (unlikely(tracing_selftest_running || tracing_disabled))
924 		return 0;
925 
926 	local_save_flags(irq_flags);
927 	buffer = global_trace.array_buffer.buffer;
928 
929 	ring_buffer_nest_start(buffer);
930 	event = __trace_buffer_lock_reserve(buffer, TRACE_BPUTS, size,
931 					    irq_flags, pc);
932 	if (!event)
933 		goto out;
934 
935 	entry = ring_buffer_event_data(event);
936 	entry->ip			= ip;
937 	entry->str			= str;
938 
939 	__buffer_unlock_commit(buffer, event);
940 	ftrace_trace_stack(&global_trace, buffer, irq_flags, 4, pc, NULL);
941 
942 	ret = 1;
943  out:
944 	ring_buffer_nest_end(buffer);
945 	return ret;
946 }
947 EXPORT_SYMBOL_GPL(__trace_bputs);
948 
949 #ifdef CONFIG_TRACER_SNAPSHOT
950 static void tracing_snapshot_instance_cond(struct trace_array *tr,
951 					   void *cond_data)
952 {
953 	struct tracer *tracer = tr->current_trace;
954 	unsigned long flags;
955 
956 	if (in_nmi()) {
957 		internal_trace_puts("*** SNAPSHOT CALLED FROM NMI CONTEXT ***\n");
958 		internal_trace_puts("*** snapshot is being ignored        ***\n");
959 		return;
960 	}
961 
962 	if (!tr->allocated_snapshot) {
963 		internal_trace_puts("*** SNAPSHOT NOT ALLOCATED ***\n");
964 		internal_trace_puts("*** stopping trace here!   ***\n");
965 		tracing_off();
966 		return;
967 	}
968 
969 	/* Note, snapshot can not be used when the tracer uses it */
970 	if (tracer->use_max_tr) {
971 		internal_trace_puts("*** LATENCY TRACER ACTIVE ***\n");
972 		internal_trace_puts("*** Can not use snapshot (sorry) ***\n");
973 		return;
974 	}
975 
976 	local_irq_save(flags);
977 	update_max_tr(tr, current, smp_processor_id(), cond_data);
978 	local_irq_restore(flags);
979 }
980 
981 void tracing_snapshot_instance(struct trace_array *tr)
982 {
983 	tracing_snapshot_instance_cond(tr, NULL);
984 }
985 
986 /**
987  * tracing_snapshot - take a snapshot of the current buffer.
988  *
989  * This causes a swap between the snapshot buffer and the current live
990  * tracing buffer. You can use this to take snapshots of the live
991  * trace when some condition is triggered, but continue to trace.
992  *
993  * Note, make sure to allocate the snapshot with either
994  * a tracing_snapshot_alloc(), or by doing it manually
995  * with: echo 1 > /sys/kernel/debug/tracing/snapshot
996  *
997  * If the snapshot buffer is not allocated, it will stop tracing.
998  * Basically making a permanent snapshot.
999  */
1000 void tracing_snapshot(void)
1001 {
1002 	struct trace_array *tr = &global_trace;
1003 
1004 	tracing_snapshot_instance(tr);
1005 }
1006 EXPORT_SYMBOL_GPL(tracing_snapshot);
1007 
1008 /**
1009  * tracing_snapshot_cond - conditionally take a snapshot of the current buffer.
1010  * @tr:		The tracing instance to snapshot
1011  * @cond_data:	The data to be tested conditionally, and possibly saved
1012  *
1013  * This is the same as tracing_snapshot() except that the snapshot is
1014  * conditional - the snapshot will only happen if the
1015  * cond_snapshot.update() implementation receiving the cond_data
1016  * returns true, which means that the trace array's cond_snapshot
1017  * update() operation used the cond_data to determine whether the
1018  * snapshot should be taken, and if it was, presumably saved it along
1019  * with the snapshot.
1020  */
1021 void tracing_snapshot_cond(struct trace_array *tr, void *cond_data)
1022 {
1023 	tracing_snapshot_instance_cond(tr, cond_data);
1024 }
1025 EXPORT_SYMBOL_GPL(tracing_snapshot_cond);
1026 
1027 /**
1028  * tracing_snapshot_cond_data - get the user data associated with a snapshot
1029  * @tr:		The tracing instance
1030  *
1031  * When the user enables a conditional snapshot using
1032  * tracing_snapshot_cond_enable(), the user-defined cond_data is saved
1033  * with the snapshot.  This accessor is used to retrieve it.
1034  *
1035  * Should not be called from cond_snapshot.update(), since it takes
1036  * the tr->max_lock lock, which the code calling
1037  * cond_snapshot.update() has already done.
1038  *
1039  * Returns the cond_data associated with the trace array's snapshot.
1040  */
1041 void *tracing_cond_snapshot_data(struct trace_array *tr)
1042 {
1043 	void *cond_data = NULL;
1044 
1045 	arch_spin_lock(&tr->max_lock);
1046 
1047 	if (tr->cond_snapshot)
1048 		cond_data = tr->cond_snapshot->cond_data;
1049 
1050 	arch_spin_unlock(&tr->max_lock);
1051 
1052 	return cond_data;
1053 }
1054 EXPORT_SYMBOL_GPL(tracing_cond_snapshot_data);
1055 
1056 static int resize_buffer_duplicate_size(struct array_buffer *trace_buf,
1057 					struct array_buffer *size_buf, int cpu_id);
1058 static void set_buffer_entries(struct array_buffer *buf, unsigned long val);
1059 
1060 int tracing_alloc_snapshot_instance(struct trace_array *tr)
1061 {
1062 	int ret;
1063 
1064 	if (!tr->allocated_snapshot) {
1065 
1066 		/* allocate spare buffer */
1067 		ret = resize_buffer_duplicate_size(&tr->max_buffer,
1068 				   &tr->array_buffer, RING_BUFFER_ALL_CPUS);
1069 		if (ret < 0)
1070 			return ret;
1071 
1072 		tr->allocated_snapshot = true;
1073 	}
1074 
1075 	return 0;
1076 }
1077 
1078 static void free_snapshot(struct trace_array *tr)
1079 {
1080 	/*
1081 	 * We don't free the ring buffer. instead, resize it because
1082 	 * The max_tr ring buffer has some state (e.g. ring->clock) and
1083 	 * we want preserve it.
1084 	 */
1085 	ring_buffer_resize(tr->max_buffer.buffer, 1, RING_BUFFER_ALL_CPUS);
1086 	set_buffer_entries(&tr->max_buffer, 1);
1087 	tracing_reset_online_cpus(&tr->max_buffer);
1088 	tr->allocated_snapshot = false;
1089 }
1090 
1091 /**
1092  * tracing_alloc_snapshot - allocate snapshot buffer.
1093  *
1094  * This only allocates the snapshot buffer if it isn't already
1095  * allocated - it doesn't also take a snapshot.
1096  *
1097  * This is meant to be used in cases where the snapshot buffer needs
1098  * to be set up for events that can't sleep but need to be able to
1099  * trigger a snapshot.
1100  */
1101 int tracing_alloc_snapshot(void)
1102 {
1103 	struct trace_array *tr = &global_trace;
1104 	int ret;
1105 
1106 	ret = tracing_alloc_snapshot_instance(tr);
1107 	WARN_ON(ret < 0);
1108 
1109 	return ret;
1110 }
1111 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
1112 
1113 /**
1114  * tracing_snapshot_alloc - allocate and take a snapshot of the current buffer.
1115  *
1116  * This is similar to tracing_snapshot(), but it will allocate the
1117  * snapshot buffer if it isn't already allocated. Use this only
1118  * where it is safe to sleep, as the allocation may sleep.
1119  *
1120  * This causes a swap between the snapshot buffer and the current live
1121  * tracing buffer. You can use this to take snapshots of the live
1122  * trace when some condition is triggered, but continue to trace.
1123  */
1124 void tracing_snapshot_alloc(void)
1125 {
1126 	int ret;
1127 
1128 	ret = tracing_alloc_snapshot();
1129 	if (ret < 0)
1130 		return;
1131 
1132 	tracing_snapshot();
1133 }
1134 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
1135 
1136 /**
1137  * tracing_snapshot_cond_enable - enable conditional snapshot for an instance
1138  * @tr:		The tracing instance
1139  * @cond_data:	User data to associate with the snapshot
1140  * @update:	Implementation of the cond_snapshot update function
1141  *
1142  * Check whether the conditional snapshot for the given instance has
1143  * already been enabled, or if the current tracer is already using a
1144  * snapshot; if so, return -EBUSY, else create a cond_snapshot and
1145  * save the cond_data and update function inside.
1146  *
1147  * Returns 0 if successful, error otherwise.
1148  */
1149 int tracing_snapshot_cond_enable(struct trace_array *tr, void *cond_data,
1150 				 cond_update_fn_t update)
1151 {
1152 	struct cond_snapshot *cond_snapshot;
1153 	int ret = 0;
1154 
1155 	cond_snapshot = kzalloc(sizeof(*cond_snapshot), GFP_KERNEL);
1156 	if (!cond_snapshot)
1157 		return -ENOMEM;
1158 
1159 	cond_snapshot->cond_data = cond_data;
1160 	cond_snapshot->update = update;
1161 
1162 	mutex_lock(&trace_types_lock);
1163 
1164 	ret = tracing_alloc_snapshot_instance(tr);
1165 	if (ret)
1166 		goto fail_unlock;
1167 
1168 	if (tr->current_trace->use_max_tr) {
1169 		ret = -EBUSY;
1170 		goto fail_unlock;
1171 	}
1172 
1173 	/*
1174 	 * The cond_snapshot can only change to NULL without the
1175 	 * trace_types_lock. We don't care if we race with it going
1176 	 * to NULL, but we want to make sure that it's not set to
1177 	 * something other than NULL when we get here, which we can
1178 	 * do safely with only holding the trace_types_lock and not
1179 	 * having to take the max_lock.
1180 	 */
1181 	if (tr->cond_snapshot) {
1182 		ret = -EBUSY;
1183 		goto fail_unlock;
1184 	}
1185 
1186 	arch_spin_lock(&tr->max_lock);
1187 	tr->cond_snapshot = cond_snapshot;
1188 	arch_spin_unlock(&tr->max_lock);
1189 
1190 	mutex_unlock(&trace_types_lock);
1191 
1192 	return ret;
1193 
1194  fail_unlock:
1195 	mutex_unlock(&trace_types_lock);
1196 	kfree(cond_snapshot);
1197 	return ret;
1198 }
1199 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_enable);
1200 
1201 /**
1202  * tracing_snapshot_cond_disable - disable conditional snapshot for an instance
1203  * @tr:		The tracing instance
1204  *
1205  * Check whether the conditional snapshot for the given instance is
1206  * enabled; if so, free the cond_snapshot associated with it,
1207  * otherwise return -EINVAL.
1208  *
1209  * Returns 0 if successful, error otherwise.
1210  */
1211 int tracing_snapshot_cond_disable(struct trace_array *tr)
1212 {
1213 	int ret = 0;
1214 
1215 	arch_spin_lock(&tr->max_lock);
1216 
1217 	if (!tr->cond_snapshot)
1218 		ret = -EINVAL;
1219 	else {
1220 		kfree(tr->cond_snapshot);
1221 		tr->cond_snapshot = NULL;
1222 	}
1223 
1224 	arch_spin_unlock(&tr->max_lock);
1225 
1226 	return ret;
1227 }
1228 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_disable);
1229 #else
1230 void tracing_snapshot(void)
1231 {
1232 	WARN_ONCE(1, "Snapshot feature not enabled, but internal snapshot used");
1233 }
1234 EXPORT_SYMBOL_GPL(tracing_snapshot);
1235 void tracing_snapshot_cond(struct trace_array *tr, void *cond_data)
1236 {
1237 	WARN_ONCE(1, "Snapshot feature not enabled, but internal conditional snapshot used");
1238 }
1239 EXPORT_SYMBOL_GPL(tracing_snapshot_cond);
1240 int tracing_alloc_snapshot(void)
1241 {
1242 	WARN_ONCE(1, "Snapshot feature not enabled, but snapshot allocation used");
1243 	return -ENODEV;
1244 }
1245 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
1246 void tracing_snapshot_alloc(void)
1247 {
1248 	/* Give warning */
1249 	tracing_snapshot();
1250 }
1251 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
1252 void *tracing_cond_snapshot_data(struct trace_array *tr)
1253 {
1254 	return NULL;
1255 }
1256 EXPORT_SYMBOL_GPL(tracing_cond_snapshot_data);
1257 int tracing_snapshot_cond_enable(struct trace_array *tr, void *cond_data, cond_update_fn_t update)
1258 {
1259 	return -ENODEV;
1260 }
1261 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_enable);
1262 int tracing_snapshot_cond_disable(struct trace_array *tr)
1263 {
1264 	return false;
1265 }
1266 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_disable);
1267 #endif /* CONFIG_TRACER_SNAPSHOT */
1268 
1269 void tracer_tracing_off(struct trace_array *tr)
1270 {
1271 	if (tr->array_buffer.buffer)
1272 		ring_buffer_record_off(tr->array_buffer.buffer);
1273 	/*
1274 	 * This flag is looked at when buffers haven't been allocated
1275 	 * yet, or by some tracers (like irqsoff), that just want to
1276 	 * know if the ring buffer has been disabled, but it can handle
1277 	 * races of where it gets disabled but we still do a record.
1278 	 * As the check is in the fast path of the tracers, it is more
1279 	 * important to be fast than accurate.
1280 	 */
1281 	tr->buffer_disabled = 1;
1282 	/* Make the flag seen by readers */
1283 	smp_wmb();
1284 }
1285 
1286 /**
1287  * tracing_off - turn off tracing buffers
1288  *
1289  * This function stops the tracing buffers from recording data.
1290  * It does not disable any overhead the tracers themselves may
1291  * be causing. This function simply causes all recording to
1292  * the ring buffers to fail.
1293  */
1294 void tracing_off(void)
1295 {
1296 	tracer_tracing_off(&global_trace);
1297 }
1298 EXPORT_SYMBOL_GPL(tracing_off);
1299 
1300 void disable_trace_on_warning(void)
1301 {
1302 	if (__disable_trace_on_warning) {
1303 		trace_array_printk_buf(global_trace.array_buffer.buffer, _THIS_IP_,
1304 			"Disabling tracing due to warning\n");
1305 		tracing_off();
1306 	}
1307 }
1308 
1309 /**
1310  * tracer_tracing_is_on - show real state of ring buffer enabled
1311  * @tr : the trace array to know if ring buffer is enabled
1312  *
1313  * Shows real state of the ring buffer if it is enabled or not.
1314  */
1315 bool tracer_tracing_is_on(struct trace_array *tr)
1316 {
1317 	if (tr->array_buffer.buffer)
1318 		return ring_buffer_record_is_on(tr->array_buffer.buffer);
1319 	return !tr->buffer_disabled;
1320 }
1321 
1322 /**
1323  * tracing_is_on - show state of ring buffers enabled
1324  */
1325 int tracing_is_on(void)
1326 {
1327 	return tracer_tracing_is_on(&global_trace);
1328 }
1329 EXPORT_SYMBOL_GPL(tracing_is_on);
1330 
1331 static int __init set_buf_size(char *str)
1332 {
1333 	unsigned long buf_size;
1334 
1335 	if (!str)
1336 		return 0;
1337 	buf_size = memparse(str, &str);
1338 	/* nr_entries can not be zero */
1339 	if (buf_size == 0)
1340 		return 0;
1341 	trace_buf_size = buf_size;
1342 	return 1;
1343 }
1344 __setup("trace_buf_size=", set_buf_size);
1345 
1346 static int __init set_tracing_thresh(char *str)
1347 {
1348 	unsigned long threshold;
1349 	int ret;
1350 
1351 	if (!str)
1352 		return 0;
1353 	ret = kstrtoul(str, 0, &threshold);
1354 	if (ret < 0)
1355 		return 0;
1356 	tracing_thresh = threshold * 1000;
1357 	return 1;
1358 }
1359 __setup("tracing_thresh=", set_tracing_thresh);
1360 
1361 unsigned long nsecs_to_usecs(unsigned long nsecs)
1362 {
1363 	return nsecs / 1000;
1364 }
1365 
1366 /*
1367  * TRACE_FLAGS is defined as a tuple matching bit masks with strings.
1368  * It uses C(a, b) where 'a' is the eval (enum) name and 'b' is the string that
1369  * matches it. By defining "C(a, b) b", TRACE_FLAGS becomes a list
1370  * of strings in the order that the evals (enum) were defined.
1371  */
1372 #undef C
1373 #define C(a, b) b
1374 
1375 /* These must match the bit postions in trace_iterator_flags */
1376 static const char *trace_options[] = {
1377 	TRACE_FLAGS
1378 	NULL
1379 };
1380 
1381 static struct {
1382 	u64 (*func)(void);
1383 	const char *name;
1384 	int in_ns;		/* is this clock in nanoseconds? */
1385 } trace_clocks[] = {
1386 	{ trace_clock_local,		"local",	1 },
1387 	{ trace_clock_global,		"global",	1 },
1388 	{ trace_clock_counter,		"counter",	0 },
1389 	{ trace_clock_jiffies,		"uptime",	0 },
1390 	{ trace_clock,			"perf",		1 },
1391 	{ ktime_get_mono_fast_ns,	"mono",		1 },
1392 	{ ktime_get_raw_fast_ns,	"mono_raw",	1 },
1393 	{ ktime_get_boot_fast_ns,	"boot",		1 },
1394 	ARCH_TRACE_CLOCKS
1395 };
1396 
1397 bool trace_clock_in_ns(struct trace_array *tr)
1398 {
1399 	if (trace_clocks[tr->clock_id].in_ns)
1400 		return true;
1401 
1402 	return false;
1403 }
1404 
1405 /*
1406  * trace_parser_get_init - gets the buffer for trace parser
1407  */
1408 int trace_parser_get_init(struct trace_parser *parser, int size)
1409 {
1410 	memset(parser, 0, sizeof(*parser));
1411 
1412 	parser->buffer = kmalloc(size, GFP_KERNEL);
1413 	if (!parser->buffer)
1414 		return 1;
1415 
1416 	parser->size = size;
1417 	return 0;
1418 }
1419 
1420 /*
1421  * trace_parser_put - frees the buffer for trace parser
1422  */
1423 void trace_parser_put(struct trace_parser *parser)
1424 {
1425 	kfree(parser->buffer);
1426 	parser->buffer = NULL;
1427 }
1428 
1429 /*
1430  * trace_get_user - reads the user input string separated by  space
1431  * (matched by isspace(ch))
1432  *
1433  * For each string found the 'struct trace_parser' is updated,
1434  * and the function returns.
1435  *
1436  * Returns number of bytes read.
1437  *
1438  * See kernel/trace/trace.h for 'struct trace_parser' details.
1439  */
1440 int trace_get_user(struct trace_parser *parser, const char __user *ubuf,
1441 	size_t cnt, loff_t *ppos)
1442 {
1443 	char ch;
1444 	size_t read = 0;
1445 	ssize_t ret;
1446 
1447 	if (!*ppos)
1448 		trace_parser_clear(parser);
1449 
1450 	ret = get_user(ch, ubuf++);
1451 	if (ret)
1452 		goto out;
1453 
1454 	read++;
1455 	cnt--;
1456 
1457 	/*
1458 	 * The parser is not finished with the last write,
1459 	 * continue reading the user input without skipping spaces.
1460 	 */
1461 	if (!parser->cont) {
1462 		/* skip white space */
1463 		while (cnt && isspace(ch)) {
1464 			ret = get_user(ch, ubuf++);
1465 			if (ret)
1466 				goto out;
1467 			read++;
1468 			cnt--;
1469 		}
1470 
1471 		parser->idx = 0;
1472 
1473 		/* only spaces were written */
1474 		if (isspace(ch) || !ch) {
1475 			*ppos += read;
1476 			ret = read;
1477 			goto out;
1478 		}
1479 	}
1480 
1481 	/* read the non-space input */
1482 	while (cnt && !isspace(ch) && ch) {
1483 		if (parser->idx < parser->size - 1)
1484 			parser->buffer[parser->idx++] = ch;
1485 		else {
1486 			ret = -EINVAL;
1487 			goto out;
1488 		}
1489 		ret = get_user(ch, ubuf++);
1490 		if (ret)
1491 			goto out;
1492 		read++;
1493 		cnt--;
1494 	}
1495 
1496 	/* We either got finished input or we have to wait for another call. */
1497 	if (isspace(ch) || !ch) {
1498 		parser->buffer[parser->idx] = 0;
1499 		parser->cont = false;
1500 	} else if (parser->idx < parser->size - 1) {
1501 		parser->cont = true;
1502 		parser->buffer[parser->idx++] = ch;
1503 		/* Make sure the parsed string always terminates with '\0'. */
1504 		parser->buffer[parser->idx] = 0;
1505 	} else {
1506 		ret = -EINVAL;
1507 		goto out;
1508 	}
1509 
1510 	*ppos += read;
1511 	ret = read;
1512 
1513 out:
1514 	return ret;
1515 }
1516 
1517 /* TODO add a seq_buf_to_buffer() */
1518 static ssize_t trace_seq_to_buffer(struct trace_seq *s, void *buf, size_t cnt)
1519 {
1520 	int len;
1521 
1522 	if (trace_seq_used(s) <= s->seq.readpos)
1523 		return -EBUSY;
1524 
1525 	len = trace_seq_used(s) - s->seq.readpos;
1526 	if (cnt > len)
1527 		cnt = len;
1528 	memcpy(buf, s->buffer + s->seq.readpos, cnt);
1529 
1530 	s->seq.readpos += cnt;
1531 	return cnt;
1532 }
1533 
1534 unsigned long __read_mostly	tracing_thresh;
1535 static const struct file_operations tracing_max_lat_fops;
1536 
1537 #if (defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)) && \
1538 	defined(CONFIG_FSNOTIFY)
1539 
1540 static struct workqueue_struct *fsnotify_wq;
1541 
1542 static void latency_fsnotify_workfn(struct work_struct *work)
1543 {
1544 	struct trace_array *tr = container_of(work, struct trace_array,
1545 					      fsnotify_work);
1546 	fsnotify(tr->d_max_latency->d_inode, FS_MODIFY,
1547 		 tr->d_max_latency->d_inode, FSNOTIFY_EVENT_INODE, NULL, 0);
1548 }
1549 
1550 static void latency_fsnotify_workfn_irq(struct irq_work *iwork)
1551 {
1552 	struct trace_array *tr = container_of(iwork, struct trace_array,
1553 					      fsnotify_irqwork);
1554 	queue_work(fsnotify_wq, &tr->fsnotify_work);
1555 }
1556 
1557 static void trace_create_maxlat_file(struct trace_array *tr,
1558 				     struct dentry *d_tracer)
1559 {
1560 	INIT_WORK(&tr->fsnotify_work, latency_fsnotify_workfn);
1561 	init_irq_work(&tr->fsnotify_irqwork, latency_fsnotify_workfn_irq);
1562 	tr->d_max_latency = trace_create_file("tracing_max_latency", 0644,
1563 					      d_tracer, &tr->max_latency,
1564 					      &tracing_max_lat_fops);
1565 }
1566 
1567 __init static int latency_fsnotify_init(void)
1568 {
1569 	fsnotify_wq = alloc_workqueue("tr_max_lat_wq",
1570 				      WQ_UNBOUND | WQ_HIGHPRI, 0);
1571 	if (!fsnotify_wq) {
1572 		pr_err("Unable to allocate tr_max_lat_wq\n");
1573 		return -ENOMEM;
1574 	}
1575 	return 0;
1576 }
1577 
1578 late_initcall_sync(latency_fsnotify_init);
1579 
1580 void latency_fsnotify(struct trace_array *tr)
1581 {
1582 	if (!fsnotify_wq)
1583 		return;
1584 	/*
1585 	 * We cannot call queue_work(&tr->fsnotify_work) from here because it's
1586 	 * possible that we are called from __schedule() or do_idle(), which
1587 	 * could cause a deadlock.
1588 	 */
1589 	irq_work_queue(&tr->fsnotify_irqwork);
1590 }
1591 
1592 /*
1593  * (defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)) && \
1594  *  defined(CONFIG_FSNOTIFY)
1595  */
1596 #else
1597 
1598 #define trace_create_maxlat_file(tr, d_tracer)				\
1599 	trace_create_file("tracing_max_latency", 0644, d_tracer,	\
1600 			  &tr->max_latency, &tracing_max_lat_fops)
1601 
1602 #endif
1603 
1604 #ifdef CONFIG_TRACER_MAX_TRACE
1605 /*
1606  * Copy the new maximum trace into the separate maximum-trace
1607  * structure. (this way the maximum trace is permanently saved,
1608  * for later retrieval via /sys/kernel/tracing/tracing_max_latency)
1609  */
1610 static void
1611 __update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
1612 {
1613 	struct array_buffer *trace_buf = &tr->array_buffer;
1614 	struct array_buffer *max_buf = &tr->max_buffer;
1615 	struct trace_array_cpu *data = per_cpu_ptr(trace_buf->data, cpu);
1616 	struct trace_array_cpu *max_data = per_cpu_ptr(max_buf->data, cpu);
1617 
1618 	max_buf->cpu = cpu;
1619 	max_buf->time_start = data->preempt_timestamp;
1620 
1621 	max_data->saved_latency = tr->max_latency;
1622 	max_data->critical_start = data->critical_start;
1623 	max_data->critical_end = data->critical_end;
1624 
1625 	strncpy(max_data->comm, tsk->comm, TASK_COMM_LEN);
1626 	max_data->pid = tsk->pid;
1627 	/*
1628 	 * If tsk == current, then use current_uid(), as that does not use
1629 	 * RCU. The irq tracer can be called out of RCU scope.
1630 	 */
1631 	if (tsk == current)
1632 		max_data->uid = current_uid();
1633 	else
1634 		max_data->uid = task_uid(tsk);
1635 
1636 	max_data->nice = tsk->static_prio - 20 - MAX_RT_PRIO;
1637 	max_data->policy = tsk->policy;
1638 	max_data->rt_priority = tsk->rt_priority;
1639 
1640 	/* record this tasks comm */
1641 	tracing_record_cmdline(tsk);
1642 	latency_fsnotify(tr);
1643 }
1644 
1645 /**
1646  * update_max_tr - snapshot all trace buffers from global_trace to max_tr
1647  * @tr: tracer
1648  * @tsk: the task with the latency
1649  * @cpu: The cpu that initiated the trace.
1650  * @cond_data: User data associated with a conditional snapshot
1651  *
1652  * Flip the buffers between the @tr and the max_tr and record information
1653  * about which task was the cause of this latency.
1654  */
1655 void
1656 update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu,
1657 	      void *cond_data)
1658 {
1659 	if (tr->stop_count)
1660 		return;
1661 
1662 	WARN_ON_ONCE(!irqs_disabled());
1663 
1664 	if (!tr->allocated_snapshot) {
1665 		/* Only the nop tracer should hit this when disabling */
1666 		WARN_ON_ONCE(tr->current_trace != &nop_trace);
1667 		return;
1668 	}
1669 
1670 	arch_spin_lock(&tr->max_lock);
1671 
1672 	/* Inherit the recordable setting from array_buffer */
1673 	if (ring_buffer_record_is_set_on(tr->array_buffer.buffer))
1674 		ring_buffer_record_on(tr->max_buffer.buffer);
1675 	else
1676 		ring_buffer_record_off(tr->max_buffer.buffer);
1677 
1678 #ifdef CONFIG_TRACER_SNAPSHOT
1679 	if (tr->cond_snapshot && !tr->cond_snapshot->update(tr, cond_data))
1680 		goto out_unlock;
1681 #endif
1682 	swap(tr->array_buffer.buffer, tr->max_buffer.buffer);
1683 
1684 	__update_max_tr(tr, tsk, cpu);
1685 
1686  out_unlock:
1687 	arch_spin_unlock(&tr->max_lock);
1688 }
1689 
1690 /**
1691  * update_max_tr_single - only copy one trace over, and reset the rest
1692  * @tr: tracer
1693  * @tsk: task with the latency
1694  * @cpu: the cpu of the buffer to copy.
1695  *
1696  * Flip the trace of a single CPU buffer between the @tr and the max_tr.
1697  */
1698 void
1699 update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu)
1700 {
1701 	int ret;
1702 
1703 	if (tr->stop_count)
1704 		return;
1705 
1706 	WARN_ON_ONCE(!irqs_disabled());
1707 	if (!tr->allocated_snapshot) {
1708 		/* Only the nop tracer should hit this when disabling */
1709 		WARN_ON_ONCE(tr->current_trace != &nop_trace);
1710 		return;
1711 	}
1712 
1713 	arch_spin_lock(&tr->max_lock);
1714 
1715 	ret = ring_buffer_swap_cpu(tr->max_buffer.buffer, tr->array_buffer.buffer, cpu);
1716 
1717 	if (ret == -EBUSY) {
1718 		/*
1719 		 * We failed to swap the buffer due to a commit taking
1720 		 * place on this CPU. We fail to record, but we reset
1721 		 * the max trace buffer (no one writes directly to it)
1722 		 * and flag that it failed.
1723 		 */
1724 		trace_array_printk_buf(tr->max_buffer.buffer, _THIS_IP_,
1725 			"Failed to swap buffers due to commit in progress\n");
1726 	}
1727 
1728 	WARN_ON_ONCE(ret && ret != -EAGAIN && ret != -EBUSY);
1729 
1730 	__update_max_tr(tr, tsk, cpu);
1731 	arch_spin_unlock(&tr->max_lock);
1732 }
1733 #endif /* CONFIG_TRACER_MAX_TRACE */
1734 
1735 static int wait_on_pipe(struct trace_iterator *iter, int full)
1736 {
1737 	/* Iterators are static, they should be filled or empty */
1738 	if (trace_buffer_iter(iter, iter->cpu_file))
1739 		return 0;
1740 
1741 	return ring_buffer_wait(iter->array_buffer->buffer, iter->cpu_file,
1742 				full);
1743 }
1744 
1745 #ifdef CONFIG_FTRACE_STARTUP_TEST
1746 static bool selftests_can_run;
1747 
1748 struct trace_selftests {
1749 	struct list_head		list;
1750 	struct tracer			*type;
1751 };
1752 
1753 static LIST_HEAD(postponed_selftests);
1754 
1755 static int save_selftest(struct tracer *type)
1756 {
1757 	struct trace_selftests *selftest;
1758 
1759 	selftest = kmalloc(sizeof(*selftest), GFP_KERNEL);
1760 	if (!selftest)
1761 		return -ENOMEM;
1762 
1763 	selftest->type = type;
1764 	list_add(&selftest->list, &postponed_selftests);
1765 	return 0;
1766 }
1767 
1768 static int run_tracer_selftest(struct tracer *type)
1769 {
1770 	struct trace_array *tr = &global_trace;
1771 	struct tracer *saved_tracer = tr->current_trace;
1772 	int ret;
1773 
1774 	if (!type->selftest || tracing_selftest_disabled)
1775 		return 0;
1776 
1777 	/*
1778 	 * If a tracer registers early in boot up (before scheduling is
1779 	 * initialized and such), then do not run its selftests yet.
1780 	 * Instead, run it a little later in the boot process.
1781 	 */
1782 	if (!selftests_can_run)
1783 		return save_selftest(type);
1784 
1785 	/*
1786 	 * Run a selftest on this tracer.
1787 	 * Here we reset the trace buffer, and set the current
1788 	 * tracer to be this tracer. The tracer can then run some
1789 	 * internal tracing to verify that everything is in order.
1790 	 * If we fail, we do not register this tracer.
1791 	 */
1792 	tracing_reset_online_cpus(&tr->array_buffer);
1793 
1794 	tr->current_trace = type;
1795 
1796 #ifdef CONFIG_TRACER_MAX_TRACE
1797 	if (type->use_max_tr) {
1798 		/* If we expanded the buffers, make sure the max is expanded too */
1799 		if (ring_buffer_expanded)
1800 			ring_buffer_resize(tr->max_buffer.buffer, trace_buf_size,
1801 					   RING_BUFFER_ALL_CPUS);
1802 		tr->allocated_snapshot = true;
1803 	}
1804 #endif
1805 
1806 	/* the test is responsible for initializing and enabling */
1807 	pr_info("Testing tracer %s: ", type->name);
1808 	ret = type->selftest(type, tr);
1809 	/* the test is responsible for resetting too */
1810 	tr->current_trace = saved_tracer;
1811 	if (ret) {
1812 		printk(KERN_CONT "FAILED!\n");
1813 		/* Add the warning after printing 'FAILED' */
1814 		WARN_ON(1);
1815 		return -1;
1816 	}
1817 	/* Only reset on passing, to avoid touching corrupted buffers */
1818 	tracing_reset_online_cpus(&tr->array_buffer);
1819 
1820 #ifdef CONFIG_TRACER_MAX_TRACE
1821 	if (type->use_max_tr) {
1822 		tr->allocated_snapshot = false;
1823 
1824 		/* Shrink the max buffer again */
1825 		if (ring_buffer_expanded)
1826 			ring_buffer_resize(tr->max_buffer.buffer, 1,
1827 					   RING_BUFFER_ALL_CPUS);
1828 	}
1829 #endif
1830 
1831 	printk(KERN_CONT "PASSED\n");
1832 	return 0;
1833 }
1834 
1835 static __init int init_trace_selftests(void)
1836 {
1837 	struct trace_selftests *p, *n;
1838 	struct tracer *t, **last;
1839 	int ret;
1840 
1841 	selftests_can_run = true;
1842 
1843 	mutex_lock(&trace_types_lock);
1844 
1845 	if (list_empty(&postponed_selftests))
1846 		goto out;
1847 
1848 	pr_info("Running postponed tracer tests:\n");
1849 
1850 	tracing_selftest_running = true;
1851 	list_for_each_entry_safe(p, n, &postponed_selftests, list) {
1852 		/* This loop can take minutes when sanitizers are enabled, so
1853 		 * lets make sure we allow RCU processing.
1854 		 */
1855 		cond_resched();
1856 		ret = run_tracer_selftest(p->type);
1857 		/* If the test fails, then warn and remove from available_tracers */
1858 		if (ret < 0) {
1859 			WARN(1, "tracer: %s failed selftest, disabling\n",
1860 			     p->type->name);
1861 			last = &trace_types;
1862 			for (t = trace_types; t; t = t->next) {
1863 				if (t == p->type) {
1864 					*last = t->next;
1865 					break;
1866 				}
1867 				last = &t->next;
1868 			}
1869 		}
1870 		list_del(&p->list);
1871 		kfree(p);
1872 	}
1873 	tracing_selftest_running = false;
1874 
1875  out:
1876 	mutex_unlock(&trace_types_lock);
1877 
1878 	return 0;
1879 }
1880 core_initcall(init_trace_selftests);
1881 #else
1882 static inline int run_tracer_selftest(struct tracer *type)
1883 {
1884 	return 0;
1885 }
1886 #endif /* CONFIG_FTRACE_STARTUP_TEST */
1887 
1888 static void add_tracer_options(struct trace_array *tr, struct tracer *t);
1889 
1890 static void __init apply_trace_boot_options(void);
1891 
1892 /**
1893  * register_tracer - register a tracer with the ftrace system.
1894  * @type: the plugin for the tracer
1895  *
1896  * Register a new plugin tracer.
1897  */
1898 int __init register_tracer(struct tracer *type)
1899 {
1900 	struct tracer *t;
1901 	int ret = 0;
1902 
1903 	if (!type->name) {
1904 		pr_info("Tracer must have a name\n");
1905 		return -1;
1906 	}
1907 
1908 	if (strlen(type->name) >= MAX_TRACER_SIZE) {
1909 		pr_info("Tracer has a name longer than %d\n", MAX_TRACER_SIZE);
1910 		return -1;
1911 	}
1912 
1913 	if (security_locked_down(LOCKDOWN_TRACEFS)) {
1914 		pr_warn("Can not register tracer %s due to lockdown\n",
1915 			   type->name);
1916 		return -EPERM;
1917 	}
1918 
1919 	mutex_lock(&trace_types_lock);
1920 
1921 	tracing_selftest_running = true;
1922 
1923 	for (t = trace_types; t; t = t->next) {
1924 		if (strcmp(type->name, t->name) == 0) {
1925 			/* already found */
1926 			pr_info("Tracer %s already registered\n",
1927 				type->name);
1928 			ret = -1;
1929 			goto out;
1930 		}
1931 	}
1932 
1933 	if (!type->set_flag)
1934 		type->set_flag = &dummy_set_flag;
1935 	if (!type->flags) {
1936 		/*allocate a dummy tracer_flags*/
1937 		type->flags = kmalloc(sizeof(*type->flags), GFP_KERNEL);
1938 		if (!type->flags) {
1939 			ret = -ENOMEM;
1940 			goto out;
1941 		}
1942 		type->flags->val = 0;
1943 		type->flags->opts = dummy_tracer_opt;
1944 	} else
1945 		if (!type->flags->opts)
1946 			type->flags->opts = dummy_tracer_opt;
1947 
1948 	/* store the tracer for __set_tracer_option */
1949 	type->flags->trace = type;
1950 
1951 	ret = run_tracer_selftest(type);
1952 	if (ret < 0)
1953 		goto out;
1954 
1955 	type->next = trace_types;
1956 	trace_types = type;
1957 	add_tracer_options(&global_trace, type);
1958 
1959  out:
1960 	tracing_selftest_running = false;
1961 	mutex_unlock(&trace_types_lock);
1962 
1963 	if (ret || !default_bootup_tracer)
1964 		goto out_unlock;
1965 
1966 	if (strncmp(default_bootup_tracer, type->name, MAX_TRACER_SIZE))
1967 		goto out_unlock;
1968 
1969 	printk(KERN_INFO "Starting tracer '%s'\n", type->name);
1970 	/* Do we want this tracer to start on bootup? */
1971 	tracing_set_tracer(&global_trace, type->name);
1972 	default_bootup_tracer = NULL;
1973 
1974 	apply_trace_boot_options();
1975 
1976 	/* disable other selftests, since this will break it. */
1977 	tracing_selftest_disabled = true;
1978 #ifdef CONFIG_FTRACE_STARTUP_TEST
1979 	printk(KERN_INFO "Disabling FTRACE selftests due to running tracer '%s'\n",
1980 	       type->name);
1981 #endif
1982 
1983  out_unlock:
1984 	return ret;
1985 }
1986 
1987 static void tracing_reset_cpu(struct array_buffer *buf, int cpu)
1988 {
1989 	struct trace_buffer *buffer = buf->buffer;
1990 
1991 	if (!buffer)
1992 		return;
1993 
1994 	ring_buffer_record_disable(buffer);
1995 
1996 	/* Make sure all commits have finished */
1997 	synchronize_rcu();
1998 	ring_buffer_reset_cpu(buffer, cpu);
1999 
2000 	ring_buffer_record_enable(buffer);
2001 }
2002 
2003 void tracing_reset_online_cpus(struct array_buffer *buf)
2004 {
2005 	struct trace_buffer *buffer = buf->buffer;
2006 	int cpu;
2007 
2008 	if (!buffer)
2009 		return;
2010 
2011 	ring_buffer_record_disable(buffer);
2012 
2013 	/* Make sure all commits have finished */
2014 	synchronize_rcu();
2015 
2016 	buf->time_start = buffer_ftrace_now(buf, buf->cpu);
2017 
2018 	for_each_online_cpu(cpu)
2019 		ring_buffer_reset_cpu(buffer, cpu);
2020 
2021 	ring_buffer_record_enable(buffer);
2022 }
2023 
2024 /* Must have trace_types_lock held */
2025 void tracing_reset_all_online_cpus(void)
2026 {
2027 	struct trace_array *tr;
2028 
2029 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
2030 		if (!tr->clear_trace)
2031 			continue;
2032 		tr->clear_trace = false;
2033 		tracing_reset_online_cpus(&tr->array_buffer);
2034 #ifdef CONFIG_TRACER_MAX_TRACE
2035 		tracing_reset_online_cpus(&tr->max_buffer);
2036 #endif
2037 	}
2038 }
2039 
2040 static int *tgid_map;
2041 
2042 #define SAVED_CMDLINES_DEFAULT 128
2043 #define NO_CMDLINE_MAP UINT_MAX
2044 static arch_spinlock_t trace_cmdline_lock = __ARCH_SPIN_LOCK_UNLOCKED;
2045 struct saved_cmdlines_buffer {
2046 	unsigned map_pid_to_cmdline[PID_MAX_DEFAULT+1];
2047 	unsigned *map_cmdline_to_pid;
2048 	unsigned cmdline_num;
2049 	int cmdline_idx;
2050 	char *saved_cmdlines;
2051 };
2052 static struct saved_cmdlines_buffer *savedcmd;
2053 
2054 /* temporary disable recording */
2055 static atomic_t trace_record_taskinfo_disabled __read_mostly;
2056 
2057 static inline char *get_saved_cmdlines(int idx)
2058 {
2059 	return &savedcmd->saved_cmdlines[idx * TASK_COMM_LEN];
2060 }
2061 
2062 static inline void set_cmdline(int idx, const char *cmdline)
2063 {
2064 	strncpy(get_saved_cmdlines(idx), cmdline, TASK_COMM_LEN);
2065 }
2066 
2067 static int allocate_cmdlines_buffer(unsigned int val,
2068 				    struct saved_cmdlines_buffer *s)
2069 {
2070 	s->map_cmdline_to_pid = kmalloc_array(val,
2071 					      sizeof(*s->map_cmdline_to_pid),
2072 					      GFP_KERNEL);
2073 	if (!s->map_cmdline_to_pid)
2074 		return -ENOMEM;
2075 
2076 	s->saved_cmdlines = kmalloc_array(TASK_COMM_LEN, val, GFP_KERNEL);
2077 	if (!s->saved_cmdlines) {
2078 		kfree(s->map_cmdline_to_pid);
2079 		return -ENOMEM;
2080 	}
2081 
2082 	s->cmdline_idx = 0;
2083 	s->cmdline_num = val;
2084 	memset(&s->map_pid_to_cmdline, NO_CMDLINE_MAP,
2085 	       sizeof(s->map_pid_to_cmdline));
2086 	memset(s->map_cmdline_to_pid, NO_CMDLINE_MAP,
2087 	       val * sizeof(*s->map_cmdline_to_pid));
2088 
2089 	return 0;
2090 }
2091 
2092 static int trace_create_savedcmd(void)
2093 {
2094 	int ret;
2095 
2096 	savedcmd = kmalloc(sizeof(*savedcmd), GFP_KERNEL);
2097 	if (!savedcmd)
2098 		return -ENOMEM;
2099 
2100 	ret = allocate_cmdlines_buffer(SAVED_CMDLINES_DEFAULT, savedcmd);
2101 	if (ret < 0) {
2102 		kfree(savedcmd);
2103 		savedcmd = NULL;
2104 		return -ENOMEM;
2105 	}
2106 
2107 	return 0;
2108 }
2109 
2110 int is_tracing_stopped(void)
2111 {
2112 	return global_trace.stop_count;
2113 }
2114 
2115 /**
2116  * tracing_start - quick start of the tracer
2117  *
2118  * If tracing is enabled but was stopped by tracing_stop,
2119  * this will start the tracer back up.
2120  */
2121 void tracing_start(void)
2122 {
2123 	struct trace_buffer *buffer;
2124 	unsigned long flags;
2125 
2126 	if (tracing_disabled)
2127 		return;
2128 
2129 	raw_spin_lock_irqsave(&global_trace.start_lock, flags);
2130 	if (--global_trace.stop_count) {
2131 		if (global_trace.stop_count < 0) {
2132 			/* Someone screwed up their debugging */
2133 			WARN_ON_ONCE(1);
2134 			global_trace.stop_count = 0;
2135 		}
2136 		goto out;
2137 	}
2138 
2139 	/* Prevent the buffers from switching */
2140 	arch_spin_lock(&global_trace.max_lock);
2141 
2142 	buffer = global_trace.array_buffer.buffer;
2143 	if (buffer)
2144 		ring_buffer_record_enable(buffer);
2145 
2146 #ifdef CONFIG_TRACER_MAX_TRACE
2147 	buffer = global_trace.max_buffer.buffer;
2148 	if (buffer)
2149 		ring_buffer_record_enable(buffer);
2150 #endif
2151 
2152 	arch_spin_unlock(&global_trace.max_lock);
2153 
2154  out:
2155 	raw_spin_unlock_irqrestore(&global_trace.start_lock, flags);
2156 }
2157 
2158 static void tracing_start_tr(struct trace_array *tr)
2159 {
2160 	struct trace_buffer *buffer;
2161 	unsigned long flags;
2162 
2163 	if (tracing_disabled)
2164 		return;
2165 
2166 	/* If global, we need to also start the max tracer */
2167 	if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
2168 		return tracing_start();
2169 
2170 	raw_spin_lock_irqsave(&tr->start_lock, flags);
2171 
2172 	if (--tr->stop_count) {
2173 		if (tr->stop_count < 0) {
2174 			/* Someone screwed up their debugging */
2175 			WARN_ON_ONCE(1);
2176 			tr->stop_count = 0;
2177 		}
2178 		goto out;
2179 	}
2180 
2181 	buffer = tr->array_buffer.buffer;
2182 	if (buffer)
2183 		ring_buffer_record_enable(buffer);
2184 
2185  out:
2186 	raw_spin_unlock_irqrestore(&tr->start_lock, flags);
2187 }
2188 
2189 /**
2190  * tracing_stop - quick stop of the tracer
2191  *
2192  * Light weight way to stop tracing. Use in conjunction with
2193  * tracing_start.
2194  */
2195 void tracing_stop(void)
2196 {
2197 	struct trace_buffer *buffer;
2198 	unsigned long flags;
2199 
2200 	raw_spin_lock_irqsave(&global_trace.start_lock, flags);
2201 	if (global_trace.stop_count++)
2202 		goto out;
2203 
2204 	/* Prevent the buffers from switching */
2205 	arch_spin_lock(&global_trace.max_lock);
2206 
2207 	buffer = global_trace.array_buffer.buffer;
2208 	if (buffer)
2209 		ring_buffer_record_disable(buffer);
2210 
2211 #ifdef CONFIG_TRACER_MAX_TRACE
2212 	buffer = global_trace.max_buffer.buffer;
2213 	if (buffer)
2214 		ring_buffer_record_disable(buffer);
2215 #endif
2216 
2217 	arch_spin_unlock(&global_trace.max_lock);
2218 
2219  out:
2220 	raw_spin_unlock_irqrestore(&global_trace.start_lock, flags);
2221 }
2222 
2223 static void tracing_stop_tr(struct trace_array *tr)
2224 {
2225 	struct trace_buffer *buffer;
2226 	unsigned long flags;
2227 
2228 	/* If global, we need to also stop the max tracer */
2229 	if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
2230 		return tracing_stop();
2231 
2232 	raw_spin_lock_irqsave(&tr->start_lock, flags);
2233 	if (tr->stop_count++)
2234 		goto out;
2235 
2236 	buffer = tr->array_buffer.buffer;
2237 	if (buffer)
2238 		ring_buffer_record_disable(buffer);
2239 
2240  out:
2241 	raw_spin_unlock_irqrestore(&tr->start_lock, flags);
2242 }
2243 
2244 static int trace_save_cmdline(struct task_struct *tsk)
2245 {
2246 	unsigned pid, idx;
2247 
2248 	/* treat recording of idle task as a success */
2249 	if (!tsk->pid)
2250 		return 1;
2251 
2252 	if (unlikely(tsk->pid > PID_MAX_DEFAULT))
2253 		return 0;
2254 
2255 	/*
2256 	 * It's not the end of the world if we don't get
2257 	 * the lock, but we also don't want to spin
2258 	 * nor do we want to disable interrupts,
2259 	 * so if we miss here, then better luck next time.
2260 	 */
2261 	if (!arch_spin_trylock(&trace_cmdline_lock))
2262 		return 0;
2263 
2264 	idx = savedcmd->map_pid_to_cmdline[tsk->pid];
2265 	if (idx == NO_CMDLINE_MAP) {
2266 		idx = (savedcmd->cmdline_idx + 1) % savedcmd->cmdline_num;
2267 
2268 		/*
2269 		 * Check whether the cmdline buffer at idx has a pid
2270 		 * mapped. We are going to overwrite that entry so we
2271 		 * need to clear the map_pid_to_cmdline. Otherwise we
2272 		 * would read the new comm for the old pid.
2273 		 */
2274 		pid = savedcmd->map_cmdline_to_pid[idx];
2275 		if (pid != NO_CMDLINE_MAP)
2276 			savedcmd->map_pid_to_cmdline[pid] = NO_CMDLINE_MAP;
2277 
2278 		savedcmd->map_cmdline_to_pid[idx] = tsk->pid;
2279 		savedcmd->map_pid_to_cmdline[tsk->pid] = idx;
2280 
2281 		savedcmd->cmdline_idx = idx;
2282 	}
2283 
2284 	set_cmdline(idx, tsk->comm);
2285 
2286 	arch_spin_unlock(&trace_cmdline_lock);
2287 
2288 	return 1;
2289 }
2290 
2291 static void __trace_find_cmdline(int pid, char comm[])
2292 {
2293 	unsigned map;
2294 
2295 	if (!pid) {
2296 		strcpy(comm, "<idle>");
2297 		return;
2298 	}
2299 
2300 	if (WARN_ON_ONCE(pid < 0)) {
2301 		strcpy(comm, "<XXX>");
2302 		return;
2303 	}
2304 
2305 	if (pid > PID_MAX_DEFAULT) {
2306 		strcpy(comm, "<...>");
2307 		return;
2308 	}
2309 
2310 	map = savedcmd->map_pid_to_cmdline[pid];
2311 	if (map != NO_CMDLINE_MAP)
2312 		strlcpy(comm, get_saved_cmdlines(map), TASK_COMM_LEN);
2313 	else
2314 		strcpy(comm, "<...>");
2315 }
2316 
2317 void trace_find_cmdline(int pid, char comm[])
2318 {
2319 	preempt_disable();
2320 	arch_spin_lock(&trace_cmdline_lock);
2321 
2322 	__trace_find_cmdline(pid, comm);
2323 
2324 	arch_spin_unlock(&trace_cmdline_lock);
2325 	preempt_enable();
2326 }
2327 
2328 int trace_find_tgid(int pid)
2329 {
2330 	if (unlikely(!tgid_map || !pid || pid > PID_MAX_DEFAULT))
2331 		return 0;
2332 
2333 	return tgid_map[pid];
2334 }
2335 
2336 static int trace_save_tgid(struct task_struct *tsk)
2337 {
2338 	/* treat recording of idle task as a success */
2339 	if (!tsk->pid)
2340 		return 1;
2341 
2342 	if (unlikely(!tgid_map || tsk->pid > PID_MAX_DEFAULT))
2343 		return 0;
2344 
2345 	tgid_map[tsk->pid] = tsk->tgid;
2346 	return 1;
2347 }
2348 
2349 static bool tracing_record_taskinfo_skip(int flags)
2350 {
2351 	if (unlikely(!(flags & (TRACE_RECORD_CMDLINE | TRACE_RECORD_TGID))))
2352 		return true;
2353 	if (atomic_read(&trace_record_taskinfo_disabled) || !tracing_is_on())
2354 		return true;
2355 	if (!__this_cpu_read(trace_taskinfo_save))
2356 		return true;
2357 	return false;
2358 }
2359 
2360 /**
2361  * tracing_record_taskinfo - record the task info of a task
2362  *
2363  * @task:  task to record
2364  * @flags: TRACE_RECORD_CMDLINE for recording comm
2365  *         TRACE_RECORD_TGID for recording tgid
2366  */
2367 void tracing_record_taskinfo(struct task_struct *task, int flags)
2368 {
2369 	bool done;
2370 
2371 	if (tracing_record_taskinfo_skip(flags))
2372 		return;
2373 
2374 	/*
2375 	 * Record as much task information as possible. If some fail, continue
2376 	 * to try to record the others.
2377 	 */
2378 	done = !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(task);
2379 	done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(task);
2380 
2381 	/* If recording any information failed, retry again soon. */
2382 	if (!done)
2383 		return;
2384 
2385 	__this_cpu_write(trace_taskinfo_save, false);
2386 }
2387 
2388 /**
2389  * tracing_record_taskinfo_sched_switch - record task info for sched_switch
2390  *
2391  * @prev: previous task during sched_switch
2392  * @next: next task during sched_switch
2393  * @flags: TRACE_RECORD_CMDLINE for recording comm
2394  *         TRACE_RECORD_TGID for recording tgid
2395  */
2396 void tracing_record_taskinfo_sched_switch(struct task_struct *prev,
2397 					  struct task_struct *next, int flags)
2398 {
2399 	bool done;
2400 
2401 	if (tracing_record_taskinfo_skip(flags))
2402 		return;
2403 
2404 	/*
2405 	 * Record as much task information as possible. If some fail, continue
2406 	 * to try to record the others.
2407 	 */
2408 	done  = !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(prev);
2409 	done &= !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(next);
2410 	done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(prev);
2411 	done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(next);
2412 
2413 	/* If recording any information failed, retry again soon. */
2414 	if (!done)
2415 		return;
2416 
2417 	__this_cpu_write(trace_taskinfo_save, false);
2418 }
2419 
2420 /* Helpers to record a specific task information */
2421 void tracing_record_cmdline(struct task_struct *task)
2422 {
2423 	tracing_record_taskinfo(task, TRACE_RECORD_CMDLINE);
2424 }
2425 
2426 void tracing_record_tgid(struct task_struct *task)
2427 {
2428 	tracing_record_taskinfo(task, TRACE_RECORD_TGID);
2429 }
2430 
2431 /*
2432  * Several functions return TRACE_TYPE_PARTIAL_LINE if the trace_seq
2433  * overflowed, and TRACE_TYPE_HANDLED otherwise. This helper function
2434  * simplifies those functions and keeps them in sync.
2435  */
2436 enum print_line_t trace_handle_return(struct trace_seq *s)
2437 {
2438 	return trace_seq_has_overflowed(s) ?
2439 		TRACE_TYPE_PARTIAL_LINE : TRACE_TYPE_HANDLED;
2440 }
2441 EXPORT_SYMBOL_GPL(trace_handle_return);
2442 
2443 void
2444 tracing_generic_entry_update(struct trace_entry *entry, unsigned short type,
2445 			     unsigned long flags, int pc)
2446 {
2447 	struct task_struct *tsk = current;
2448 
2449 	entry->preempt_count		= pc & 0xff;
2450 	entry->pid			= (tsk) ? tsk->pid : 0;
2451 	entry->type			= type;
2452 	entry->flags =
2453 #ifdef CONFIG_TRACE_IRQFLAGS_SUPPORT
2454 		(irqs_disabled_flags(flags) ? TRACE_FLAG_IRQS_OFF : 0) |
2455 #else
2456 		TRACE_FLAG_IRQS_NOSUPPORT |
2457 #endif
2458 		((pc & NMI_MASK    ) ? TRACE_FLAG_NMI     : 0) |
2459 		((pc & HARDIRQ_MASK) ? TRACE_FLAG_HARDIRQ : 0) |
2460 		((pc & SOFTIRQ_OFFSET) ? TRACE_FLAG_SOFTIRQ : 0) |
2461 		(tif_need_resched() ? TRACE_FLAG_NEED_RESCHED : 0) |
2462 		(test_preempt_need_resched() ? TRACE_FLAG_PREEMPT_RESCHED : 0);
2463 }
2464 EXPORT_SYMBOL_GPL(tracing_generic_entry_update);
2465 
2466 struct ring_buffer_event *
2467 trace_buffer_lock_reserve(struct trace_buffer *buffer,
2468 			  int type,
2469 			  unsigned long len,
2470 			  unsigned long flags, int pc)
2471 {
2472 	return __trace_buffer_lock_reserve(buffer, type, len, flags, pc);
2473 }
2474 
2475 DEFINE_PER_CPU(struct ring_buffer_event *, trace_buffered_event);
2476 DEFINE_PER_CPU(int, trace_buffered_event_cnt);
2477 static int trace_buffered_event_ref;
2478 
2479 /**
2480  * trace_buffered_event_enable - enable buffering events
2481  *
2482  * When events are being filtered, it is quicker to use a temporary
2483  * buffer to write the event data into if there's a likely chance
2484  * that it will not be committed. The discard of the ring buffer
2485  * is not as fast as committing, and is much slower than copying
2486  * a commit.
2487  *
2488  * When an event is to be filtered, allocate per cpu buffers to
2489  * write the event data into, and if the event is filtered and discarded
2490  * it is simply dropped, otherwise, the entire data is to be committed
2491  * in one shot.
2492  */
2493 void trace_buffered_event_enable(void)
2494 {
2495 	struct ring_buffer_event *event;
2496 	struct page *page;
2497 	int cpu;
2498 
2499 	WARN_ON_ONCE(!mutex_is_locked(&event_mutex));
2500 
2501 	if (trace_buffered_event_ref++)
2502 		return;
2503 
2504 	for_each_tracing_cpu(cpu) {
2505 		page = alloc_pages_node(cpu_to_node(cpu),
2506 					GFP_KERNEL | __GFP_NORETRY, 0);
2507 		if (!page)
2508 			goto failed;
2509 
2510 		event = page_address(page);
2511 		memset(event, 0, sizeof(*event));
2512 
2513 		per_cpu(trace_buffered_event, cpu) = event;
2514 
2515 		preempt_disable();
2516 		if (cpu == smp_processor_id() &&
2517 		    this_cpu_read(trace_buffered_event) !=
2518 		    per_cpu(trace_buffered_event, cpu))
2519 			WARN_ON_ONCE(1);
2520 		preempt_enable();
2521 	}
2522 
2523 	return;
2524  failed:
2525 	trace_buffered_event_disable();
2526 }
2527 
2528 static void enable_trace_buffered_event(void *data)
2529 {
2530 	/* Probably not needed, but do it anyway */
2531 	smp_rmb();
2532 	this_cpu_dec(trace_buffered_event_cnt);
2533 }
2534 
2535 static void disable_trace_buffered_event(void *data)
2536 {
2537 	this_cpu_inc(trace_buffered_event_cnt);
2538 }
2539 
2540 /**
2541  * trace_buffered_event_disable - disable buffering events
2542  *
2543  * When a filter is removed, it is faster to not use the buffered
2544  * events, and to commit directly into the ring buffer. Free up
2545  * the temp buffers when there are no more users. This requires
2546  * special synchronization with current events.
2547  */
2548 void trace_buffered_event_disable(void)
2549 {
2550 	int cpu;
2551 
2552 	WARN_ON_ONCE(!mutex_is_locked(&event_mutex));
2553 
2554 	if (WARN_ON_ONCE(!trace_buffered_event_ref))
2555 		return;
2556 
2557 	if (--trace_buffered_event_ref)
2558 		return;
2559 
2560 	preempt_disable();
2561 	/* For each CPU, set the buffer as used. */
2562 	smp_call_function_many(tracing_buffer_mask,
2563 			       disable_trace_buffered_event, NULL, 1);
2564 	preempt_enable();
2565 
2566 	/* Wait for all current users to finish */
2567 	synchronize_rcu();
2568 
2569 	for_each_tracing_cpu(cpu) {
2570 		free_page((unsigned long)per_cpu(trace_buffered_event, cpu));
2571 		per_cpu(trace_buffered_event, cpu) = NULL;
2572 	}
2573 	/*
2574 	 * Make sure trace_buffered_event is NULL before clearing
2575 	 * trace_buffered_event_cnt.
2576 	 */
2577 	smp_wmb();
2578 
2579 	preempt_disable();
2580 	/* Do the work on each cpu */
2581 	smp_call_function_many(tracing_buffer_mask,
2582 			       enable_trace_buffered_event, NULL, 1);
2583 	preempt_enable();
2584 }
2585 
2586 static struct trace_buffer *temp_buffer;
2587 
2588 struct ring_buffer_event *
2589 trace_event_buffer_lock_reserve(struct trace_buffer **current_rb,
2590 			  struct trace_event_file *trace_file,
2591 			  int type, unsigned long len,
2592 			  unsigned long flags, int pc)
2593 {
2594 	struct ring_buffer_event *entry;
2595 	int val;
2596 
2597 	*current_rb = trace_file->tr->array_buffer.buffer;
2598 
2599 	if (!ring_buffer_time_stamp_abs(*current_rb) && (trace_file->flags &
2600 	     (EVENT_FILE_FL_SOFT_DISABLED | EVENT_FILE_FL_FILTERED)) &&
2601 	    (entry = this_cpu_read(trace_buffered_event))) {
2602 		/* Try to use the per cpu buffer first */
2603 		val = this_cpu_inc_return(trace_buffered_event_cnt);
2604 		if (val == 1) {
2605 			trace_event_setup(entry, type, flags, pc);
2606 			entry->array[0] = len;
2607 			return entry;
2608 		}
2609 		this_cpu_dec(trace_buffered_event_cnt);
2610 	}
2611 
2612 	entry = __trace_buffer_lock_reserve(*current_rb,
2613 					    type, len, flags, pc);
2614 	/*
2615 	 * If tracing is off, but we have triggers enabled
2616 	 * we still need to look at the event data. Use the temp_buffer
2617 	 * to store the trace event for the tigger to use. It's recusive
2618 	 * safe and will not be recorded anywhere.
2619 	 */
2620 	if (!entry && trace_file->flags & EVENT_FILE_FL_TRIGGER_COND) {
2621 		*current_rb = temp_buffer;
2622 		entry = __trace_buffer_lock_reserve(*current_rb,
2623 						    type, len, flags, pc);
2624 	}
2625 	return entry;
2626 }
2627 EXPORT_SYMBOL_GPL(trace_event_buffer_lock_reserve);
2628 
2629 static DEFINE_SPINLOCK(tracepoint_iter_lock);
2630 static DEFINE_MUTEX(tracepoint_printk_mutex);
2631 
2632 static void output_printk(struct trace_event_buffer *fbuffer)
2633 {
2634 	struct trace_event_call *event_call;
2635 	struct trace_event_file *file;
2636 	struct trace_event *event;
2637 	unsigned long flags;
2638 	struct trace_iterator *iter = tracepoint_print_iter;
2639 
2640 	/* We should never get here if iter is NULL */
2641 	if (WARN_ON_ONCE(!iter))
2642 		return;
2643 
2644 	event_call = fbuffer->trace_file->event_call;
2645 	if (!event_call || !event_call->event.funcs ||
2646 	    !event_call->event.funcs->trace)
2647 		return;
2648 
2649 	file = fbuffer->trace_file;
2650 	if (test_bit(EVENT_FILE_FL_SOFT_DISABLED_BIT, &file->flags) ||
2651 	    (unlikely(file->flags & EVENT_FILE_FL_FILTERED) &&
2652 	     !filter_match_preds(file->filter, fbuffer->entry)))
2653 		return;
2654 
2655 	event = &fbuffer->trace_file->event_call->event;
2656 
2657 	spin_lock_irqsave(&tracepoint_iter_lock, flags);
2658 	trace_seq_init(&iter->seq);
2659 	iter->ent = fbuffer->entry;
2660 	event_call->event.funcs->trace(iter, 0, event);
2661 	trace_seq_putc(&iter->seq, 0);
2662 	printk("%s", iter->seq.buffer);
2663 
2664 	spin_unlock_irqrestore(&tracepoint_iter_lock, flags);
2665 }
2666 
2667 int tracepoint_printk_sysctl(struct ctl_table *table, int write,
2668 			     void *buffer, size_t *lenp,
2669 			     loff_t *ppos)
2670 {
2671 	int save_tracepoint_printk;
2672 	int ret;
2673 
2674 	mutex_lock(&tracepoint_printk_mutex);
2675 	save_tracepoint_printk = tracepoint_printk;
2676 
2677 	ret = proc_dointvec(table, write, buffer, lenp, ppos);
2678 
2679 	/*
2680 	 * This will force exiting early, as tracepoint_printk
2681 	 * is always zero when tracepoint_printk_iter is not allocated
2682 	 */
2683 	if (!tracepoint_print_iter)
2684 		tracepoint_printk = 0;
2685 
2686 	if (save_tracepoint_printk == tracepoint_printk)
2687 		goto out;
2688 
2689 	if (tracepoint_printk)
2690 		static_key_enable(&tracepoint_printk_key.key);
2691 	else
2692 		static_key_disable(&tracepoint_printk_key.key);
2693 
2694  out:
2695 	mutex_unlock(&tracepoint_printk_mutex);
2696 
2697 	return ret;
2698 }
2699 
2700 void trace_event_buffer_commit(struct trace_event_buffer *fbuffer)
2701 {
2702 	if (static_key_false(&tracepoint_printk_key.key))
2703 		output_printk(fbuffer);
2704 
2705 	event_trigger_unlock_commit_regs(fbuffer->trace_file, fbuffer->buffer,
2706 				    fbuffer->event, fbuffer->entry,
2707 				    fbuffer->flags, fbuffer->pc, fbuffer->regs);
2708 }
2709 EXPORT_SYMBOL_GPL(trace_event_buffer_commit);
2710 
2711 /*
2712  * Skip 3:
2713  *
2714  *   trace_buffer_unlock_commit_regs()
2715  *   trace_event_buffer_commit()
2716  *   trace_event_raw_event_xxx()
2717  */
2718 # define STACK_SKIP 3
2719 
2720 void trace_buffer_unlock_commit_regs(struct trace_array *tr,
2721 				     struct trace_buffer *buffer,
2722 				     struct ring_buffer_event *event,
2723 				     unsigned long flags, int pc,
2724 				     struct pt_regs *regs)
2725 {
2726 	__buffer_unlock_commit(buffer, event);
2727 
2728 	/*
2729 	 * If regs is not set, then skip the necessary functions.
2730 	 * Note, we can still get here via blktrace, wakeup tracer
2731 	 * and mmiotrace, but that's ok if they lose a function or
2732 	 * two. They are not that meaningful.
2733 	 */
2734 	ftrace_trace_stack(tr, buffer, flags, regs ? 0 : STACK_SKIP, pc, regs);
2735 	ftrace_trace_userstack(buffer, flags, pc);
2736 }
2737 
2738 /*
2739  * Similar to trace_buffer_unlock_commit_regs() but do not dump stack.
2740  */
2741 void
2742 trace_buffer_unlock_commit_nostack(struct trace_buffer *buffer,
2743 				   struct ring_buffer_event *event)
2744 {
2745 	__buffer_unlock_commit(buffer, event);
2746 }
2747 
2748 static void
2749 trace_process_export(struct trace_export *export,
2750 	       struct ring_buffer_event *event)
2751 {
2752 	struct trace_entry *entry;
2753 	unsigned int size = 0;
2754 
2755 	entry = ring_buffer_event_data(event);
2756 	size = ring_buffer_event_length(event);
2757 	export->write(export, entry, size);
2758 }
2759 
2760 static DEFINE_MUTEX(ftrace_export_lock);
2761 
2762 static struct trace_export __rcu *ftrace_exports_list __read_mostly;
2763 
2764 static DEFINE_STATIC_KEY_FALSE(ftrace_exports_enabled);
2765 
2766 static inline void ftrace_exports_enable(void)
2767 {
2768 	static_branch_enable(&ftrace_exports_enabled);
2769 }
2770 
2771 static inline void ftrace_exports_disable(void)
2772 {
2773 	static_branch_disable(&ftrace_exports_enabled);
2774 }
2775 
2776 static void ftrace_exports(struct ring_buffer_event *event)
2777 {
2778 	struct trace_export *export;
2779 
2780 	preempt_disable_notrace();
2781 
2782 	export = rcu_dereference_raw_check(ftrace_exports_list);
2783 	while (export) {
2784 		trace_process_export(export, event);
2785 		export = rcu_dereference_raw_check(export->next);
2786 	}
2787 
2788 	preempt_enable_notrace();
2789 }
2790 
2791 static inline void
2792 add_trace_export(struct trace_export **list, struct trace_export *export)
2793 {
2794 	rcu_assign_pointer(export->next, *list);
2795 	/*
2796 	 * We are entering export into the list but another
2797 	 * CPU might be walking that list. We need to make sure
2798 	 * the export->next pointer is valid before another CPU sees
2799 	 * the export pointer included into the list.
2800 	 */
2801 	rcu_assign_pointer(*list, export);
2802 }
2803 
2804 static inline int
2805 rm_trace_export(struct trace_export **list, struct trace_export *export)
2806 {
2807 	struct trace_export **p;
2808 
2809 	for (p = list; *p != NULL; p = &(*p)->next)
2810 		if (*p == export)
2811 			break;
2812 
2813 	if (*p != export)
2814 		return -1;
2815 
2816 	rcu_assign_pointer(*p, (*p)->next);
2817 
2818 	return 0;
2819 }
2820 
2821 static inline void
2822 add_ftrace_export(struct trace_export **list, struct trace_export *export)
2823 {
2824 	if (*list == NULL)
2825 		ftrace_exports_enable();
2826 
2827 	add_trace_export(list, export);
2828 }
2829 
2830 static inline int
2831 rm_ftrace_export(struct trace_export **list, struct trace_export *export)
2832 {
2833 	int ret;
2834 
2835 	ret = rm_trace_export(list, export);
2836 	if (*list == NULL)
2837 		ftrace_exports_disable();
2838 
2839 	return ret;
2840 }
2841 
2842 int register_ftrace_export(struct trace_export *export)
2843 {
2844 	if (WARN_ON_ONCE(!export->write))
2845 		return -1;
2846 
2847 	mutex_lock(&ftrace_export_lock);
2848 
2849 	add_ftrace_export(&ftrace_exports_list, export);
2850 
2851 	mutex_unlock(&ftrace_export_lock);
2852 
2853 	return 0;
2854 }
2855 EXPORT_SYMBOL_GPL(register_ftrace_export);
2856 
2857 int unregister_ftrace_export(struct trace_export *export)
2858 {
2859 	int ret;
2860 
2861 	mutex_lock(&ftrace_export_lock);
2862 
2863 	ret = rm_ftrace_export(&ftrace_exports_list, export);
2864 
2865 	mutex_unlock(&ftrace_export_lock);
2866 
2867 	return ret;
2868 }
2869 EXPORT_SYMBOL_GPL(unregister_ftrace_export);
2870 
2871 void
2872 trace_function(struct trace_array *tr,
2873 	       unsigned long ip, unsigned long parent_ip, unsigned long flags,
2874 	       int pc)
2875 {
2876 	struct trace_event_call *call = &event_function;
2877 	struct trace_buffer *buffer = tr->array_buffer.buffer;
2878 	struct ring_buffer_event *event;
2879 	struct ftrace_entry *entry;
2880 
2881 	event = __trace_buffer_lock_reserve(buffer, TRACE_FN, sizeof(*entry),
2882 					    flags, pc);
2883 	if (!event)
2884 		return;
2885 	entry	= ring_buffer_event_data(event);
2886 	entry->ip			= ip;
2887 	entry->parent_ip		= parent_ip;
2888 
2889 	if (!call_filter_check_discard(call, entry, buffer, event)) {
2890 		if (static_branch_unlikely(&ftrace_exports_enabled))
2891 			ftrace_exports(event);
2892 		__buffer_unlock_commit(buffer, event);
2893 	}
2894 }
2895 
2896 #ifdef CONFIG_STACKTRACE
2897 
2898 /* Allow 4 levels of nesting: normal, softirq, irq, NMI */
2899 #define FTRACE_KSTACK_NESTING	4
2900 
2901 #define FTRACE_KSTACK_ENTRIES	(PAGE_SIZE / FTRACE_KSTACK_NESTING)
2902 
2903 struct ftrace_stack {
2904 	unsigned long		calls[FTRACE_KSTACK_ENTRIES];
2905 };
2906 
2907 
2908 struct ftrace_stacks {
2909 	struct ftrace_stack	stacks[FTRACE_KSTACK_NESTING];
2910 };
2911 
2912 static DEFINE_PER_CPU(struct ftrace_stacks, ftrace_stacks);
2913 static DEFINE_PER_CPU(int, ftrace_stack_reserve);
2914 
2915 static void __ftrace_trace_stack(struct trace_buffer *buffer,
2916 				 unsigned long flags,
2917 				 int skip, int pc, struct pt_regs *regs)
2918 {
2919 	struct trace_event_call *call = &event_kernel_stack;
2920 	struct ring_buffer_event *event;
2921 	unsigned int size, nr_entries;
2922 	struct ftrace_stack *fstack;
2923 	struct stack_entry *entry;
2924 	int stackidx;
2925 
2926 	/*
2927 	 * Add one, for this function and the call to save_stack_trace()
2928 	 * If regs is set, then these functions will not be in the way.
2929 	 */
2930 #ifndef CONFIG_UNWINDER_ORC
2931 	if (!regs)
2932 		skip++;
2933 #endif
2934 
2935 	/*
2936 	 * Since events can happen in NMIs there's no safe way to
2937 	 * use the per cpu ftrace_stacks. We reserve it and if an interrupt
2938 	 * or NMI comes in, it will just have to use the default
2939 	 * FTRACE_STACK_SIZE.
2940 	 */
2941 	preempt_disable_notrace();
2942 
2943 	stackidx = __this_cpu_inc_return(ftrace_stack_reserve) - 1;
2944 
2945 	/* This should never happen. If it does, yell once and skip */
2946 	if (WARN_ON_ONCE(stackidx > FTRACE_KSTACK_NESTING))
2947 		goto out;
2948 
2949 	/*
2950 	 * The above __this_cpu_inc_return() is 'atomic' cpu local. An
2951 	 * interrupt will either see the value pre increment or post
2952 	 * increment. If the interrupt happens pre increment it will have
2953 	 * restored the counter when it returns.  We just need a barrier to
2954 	 * keep gcc from moving things around.
2955 	 */
2956 	barrier();
2957 
2958 	fstack = this_cpu_ptr(ftrace_stacks.stacks) + stackidx;
2959 	size = ARRAY_SIZE(fstack->calls);
2960 
2961 	if (regs) {
2962 		nr_entries = stack_trace_save_regs(regs, fstack->calls,
2963 						   size, skip);
2964 	} else {
2965 		nr_entries = stack_trace_save(fstack->calls, size, skip);
2966 	}
2967 
2968 	size = nr_entries * sizeof(unsigned long);
2969 	event = __trace_buffer_lock_reserve(buffer, TRACE_STACK,
2970 					    sizeof(*entry) + size, flags, pc);
2971 	if (!event)
2972 		goto out;
2973 	entry = ring_buffer_event_data(event);
2974 
2975 	memcpy(&entry->caller, fstack->calls, size);
2976 	entry->size = nr_entries;
2977 
2978 	if (!call_filter_check_discard(call, entry, buffer, event))
2979 		__buffer_unlock_commit(buffer, event);
2980 
2981  out:
2982 	/* Again, don't let gcc optimize things here */
2983 	barrier();
2984 	__this_cpu_dec(ftrace_stack_reserve);
2985 	preempt_enable_notrace();
2986 
2987 }
2988 
2989 static inline void ftrace_trace_stack(struct trace_array *tr,
2990 				      struct trace_buffer *buffer,
2991 				      unsigned long flags,
2992 				      int skip, int pc, struct pt_regs *regs)
2993 {
2994 	if (!(tr->trace_flags & TRACE_ITER_STACKTRACE))
2995 		return;
2996 
2997 	__ftrace_trace_stack(buffer, flags, skip, pc, regs);
2998 }
2999 
3000 void __trace_stack(struct trace_array *tr, unsigned long flags, int skip,
3001 		   int pc)
3002 {
3003 	struct trace_buffer *buffer = tr->array_buffer.buffer;
3004 
3005 	if (rcu_is_watching()) {
3006 		__ftrace_trace_stack(buffer, flags, skip, pc, NULL);
3007 		return;
3008 	}
3009 
3010 	/*
3011 	 * When an NMI triggers, RCU is enabled via rcu_nmi_enter(),
3012 	 * but if the above rcu_is_watching() failed, then the NMI
3013 	 * triggered someplace critical, and rcu_irq_enter() should
3014 	 * not be called from NMI.
3015 	 */
3016 	if (unlikely(in_nmi()))
3017 		return;
3018 
3019 	rcu_irq_enter_irqson();
3020 	__ftrace_trace_stack(buffer, flags, skip, pc, NULL);
3021 	rcu_irq_exit_irqson();
3022 }
3023 
3024 /**
3025  * trace_dump_stack - record a stack back trace in the trace buffer
3026  * @skip: Number of functions to skip (helper handlers)
3027  */
3028 void trace_dump_stack(int skip)
3029 {
3030 	unsigned long flags;
3031 
3032 	if (tracing_disabled || tracing_selftest_running)
3033 		return;
3034 
3035 	local_save_flags(flags);
3036 
3037 #ifndef CONFIG_UNWINDER_ORC
3038 	/* Skip 1 to skip this function. */
3039 	skip++;
3040 #endif
3041 	__ftrace_trace_stack(global_trace.array_buffer.buffer,
3042 			     flags, skip, preempt_count(), NULL);
3043 }
3044 EXPORT_SYMBOL_GPL(trace_dump_stack);
3045 
3046 #ifdef CONFIG_USER_STACKTRACE_SUPPORT
3047 static DEFINE_PER_CPU(int, user_stack_count);
3048 
3049 static void
3050 ftrace_trace_userstack(struct trace_buffer *buffer, unsigned long flags, int pc)
3051 {
3052 	struct trace_event_call *call = &event_user_stack;
3053 	struct ring_buffer_event *event;
3054 	struct userstack_entry *entry;
3055 
3056 	if (!(global_trace.trace_flags & TRACE_ITER_USERSTACKTRACE))
3057 		return;
3058 
3059 	/*
3060 	 * NMIs can not handle page faults, even with fix ups.
3061 	 * The save user stack can (and often does) fault.
3062 	 */
3063 	if (unlikely(in_nmi()))
3064 		return;
3065 
3066 	/*
3067 	 * prevent recursion, since the user stack tracing may
3068 	 * trigger other kernel events.
3069 	 */
3070 	preempt_disable();
3071 	if (__this_cpu_read(user_stack_count))
3072 		goto out;
3073 
3074 	__this_cpu_inc(user_stack_count);
3075 
3076 	event = __trace_buffer_lock_reserve(buffer, TRACE_USER_STACK,
3077 					    sizeof(*entry), flags, pc);
3078 	if (!event)
3079 		goto out_drop_count;
3080 	entry	= ring_buffer_event_data(event);
3081 
3082 	entry->tgid		= current->tgid;
3083 	memset(&entry->caller, 0, sizeof(entry->caller));
3084 
3085 	stack_trace_save_user(entry->caller, FTRACE_STACK_ENTRIES);
3086 	if (!call_filter_check_discard(call, entry, buffer, event))
3087 		__buffer_unlock_commit(buffer, event);
3088 
3089  out_drop_count:
3090 	__this_cpu_dec(user_stack_count);
3091  out:
3092 	preempt_enable();
3093 }
3094 #else /* CONFIG_USER_STACKTRACE_SUPPORT */
3095 static void ftrace_trace_userstack(struct trace_buffer *buffer,
3096 				   unsigned long flags, int pc)
3097 {
3098 }
3099 #endif /* !CONFIG_USER_STACKTRACE_SUPPORT */
3100 
3101 #endif /* CONFIG_STACKTRACE */
3102 
3103 /* created for use with alloc_percpu */
3104 struct trace_buffer_struct {
3105 	int nesting;
3106 	char buffer[4][TRACE_BUF_SIZE];
3107 };
3108 
3109 static struct trace_buffer_struct *trace_percpu_buffer;
3110 
3111 /*
3112  * Thise allows for lockless recording.  If we're nested too deeply, then
3113  * this returns NULL.
3114  */
3115 static char *get_trace_buf(void)
3116 {
3117 	struct trace_buffer_struct *buffer = this_cpu_ptr(trace_percpu_buffer);
3118 
3119 	if (!buffer || buffer->nesting >= 4)
3120 		return NULL;
3121 
3122 	buffer->nesting++;
3123 
3124 	/* Interrupts must see nesting incremented before we use the buffer */
3125 	barrier();
3126 	return &buffer->buffer[buffer->nesting][0];
3127 }
3128 
3129 static void put_trace_buf(void)
3130 {
3131 	/* Don't let the decrement of nesting leak before this */
3132 	barrier();
3133 	this_cpu_dec(trace_percpu_buffer->nesting);
3134 }
3135 
3136 static int alloc_percpu_trace_buffer(void)
3137 {
3138 	struct trace_buffer_struct *buffers;
3139 
3140 	buffers = alloc_percpu(struct trace_buffer_struct);
3141 	if (MEM_FAIL(!buffers, "Could not allocate percpu trace_printk buffer"))
3142 		return -ENOMEM;
3143 
3144 	trace_percpu_buffer = buffers;
3145 	return 0;
3146 }
3147 
3148 static int buffers_allocated;
3149 
3150 void trace_printk_init_buffers(void)
3151 {
3152 	if (buffers_allocated)
3153 		return;
3154 
3155 	if (alloc_percpu_trace_buffer())
3156 		return;
3157 
3158 	/* trace_printk() is for debug use only. Don't use it in production. */
3159 
3160 	pr_warn("\n");
3161 	pr_warn("**********************************************************\n");
3162 	pr_warn("**   NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE   **\n");
3163 	pr_warn("**                                                      **\n");
3164 	pr_warn("** trace_printk() being used. Allocating extra memory.  **\n");
3165 	pr_warn("**                                                      **\n");
3166 	pr_warn("** This means that this is a DEBUG kernel and it is     **\n");
3167 	pr_warn("** unsafe for production use.                           **\n");
3168 	pr_warn("**                                                      **\n");
3169 	pr_warn("** If you see this message and you are not debugging    **\n");
3170 	pr_warn("** the kernel, report this immediately to your vendor!  **\n");
3171 	pr_warn("**                                                      **\n");
3172 	pr_warn("**   NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE   **\n");
3173 	pr_warn("**********************************************************\n");
3174 
3175 	/* Expand the buffers to set size */
3176 	tracing_update_buffers();
3177 
3178 	buffers_allocated = 1;
3179 
3180 	/*
3181 	 * trace_printk_init_buffers() can be called by modules.
3182 	 * If that happens, then we need to start cmdline recording
3183 	 * directly here. If the global_trace.buffer is already
3184 	 * allocated here, then this was called by module code.
3185 	 */
3186 	if (global_trace.array_buffer.buffer)
3187 		tracing_start_cmdline_record();
3188 }
3189 EXPORT_SYMBOL_GPL(trace_printk_init_buffers);
3190 
3191 void trace_printk_start_comm(void)
3192 {
3193 	/* Start tracing comms if trace printk is set */
3194 	if (!buffers_allocated)
3195 		return;
3196 	tracing_start_cmdline_record();
3197 }
3198 
3199 static void trace_printk_start_stop_comm(int enabled)
3200 {
3201 	if (!buffers_allocated)
3202 		return;
3203 
3204 	if (enabled)
3205 		tracing_start_cmdline_record();
3206 	else
3207 		tracing_stop_cmdline_record();
3208 }
3209 
3210 /**
3211  * trace_vbprintk - write binary msg to tracing buffer
3212  * @ip:    The address of the caller
3213  * @fmt:   The string format to write to the buffer
3214  * @args:  Arguments for @fmt
3215  */
3216 int trace_vbprintk(unsigned long ip, const char *fmt, va_list args)
3217 {
3218 	struct trace_event_call *call = &event_bprint;
3219 	struct ring_buffer_event *event;
3220 	struct trace_buffer *buffer;
3221 	struct trace_array *tr = &global_trace;
3222 	struct bprint_entry *entry;
3223 	unsigned long flags;
3224 	char *tbuffer;
3225 	int len = 0, size, pc;
3226 
3227 	if (unlikely(tracing_selftest_running || tracing_disabled))
3228 		return 0;
3229 
3230 	/* Don't pollute graph traces with trace_vprintk internals */
3231 	pause_graph_tracing();
3232 
3233 	pc = preempt_count();
3234 	preempt_disable_notrace();
3235 
3236 	tbuffer = get_trace_buf();
3237 	if (!tbuffer) {
3238 		len = 0;
3239 		goto out_nobuffer;
3240 	}
3241 
3242 	len = vbin_printf((u32 *)tbuffer, TRACE_BUF_SIZE/sizeof(int), fmt, args);
3243 
3244 	if (len > TRACE_BUF_SIZE/sizeof(int) || len < 0)
3245 		goto out_put;
3246 
3247 	local_save_flags(flags);
3248 	size = sizeof(*entry) + sizeof(u32) * len;
3249 	buffer = tr->array_buffer.buffer;
3250 	ring_buffer_nest_start(buffer);
3251 	event = __trace_buffer_lock_reserve(buffer, TRACE_BPRINT, size,
3252 					    flags, pc);
3253 	if (!event)
3254 		goto out;
3255 	entry = ring_buffer_event_data(event);
3256 	entry->ip			= ip;
3257 	entry->fmt			= fmt;
3258 
3259 	memcpy(entry->buf, tbuffer, sizeof(u32) * len);
3260 	if (!call_filter_check_discard(call, entry, buffer, event)) {
3261 		__buffer_unlock_commit(buffer, event);
3262 		ftrace_trace_stack(tr, buffer, flags, 6, pc, NULL);
3263 	}
3264 
3265 out:
3266 	ring_buffer_nest_end(buffer);
3267 out_put:
3268 	put_trace_buf();
3269 
3270 out_nobuffer:
3271 	preempt_enable_notrace();
3272 	unpause_graph_tracing();
3273 
3274 	return len;
3275 }
3276 EXPORT_SYMBOL_GPL(trace_vbprintk);
3277 
3278 __printf(3, 0)
3279 static int
3280 __trace_array_vprintk(struct trace_buffer *buffer,
3281 		      unsigned long ip, const char *fmt, va_list args)
3282 {
3283 	struct trace_event_call *call = &event_print;
3284 	struct ring_buffer_event *event;
3285 	int len = 0, size, pc;
3286 	struct print_entry *entry;
3287 	unsigned long flags;
3288 	char *tbuffer;
3289 
3290 	if (tracing_disabled || tracing_selftest_running)
3291 		return 0;
3292 
3293 	/* Don't pollute graph traces with trace_vprintk internals */
3294 	pause_graph_tracing();
3295 
3296 	pc = preempt_count();
3297 	preempt_disable_notrace();
3298 
3299 
3300 	tbuffer = get_trace_buf();
3301 	if (!tbuffer) {
3302 		len = 0;
3303 		goto out_nobuffer;
3304 	}
3305 
3306 	len = vscnprintf(tbuffer, TRACE_BUF_SIZE, fmt, args);
3307 
3308 	local_save_flags(flags);
3309 	size = sizeof(*entry) + len + 1;
3310 	ring_buffer_nest_start(buffer);
3311 	event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
3312 					    flags, pc);
3313 	if (!event)
3314 		goto out;
3315 	entry = ring_buffer_event_data(event);
3316 	entry->ip = ip;
3317 
3318 	memcpy(&entry->buf, tbuffer, len + 1);
3319 	if (!call_filter_check_discard(call, entry, buffer, event)) {
3320 		__buffer_unlock_commit(buffer, event);
3321 		ftrace_trace_stack(&global_trace, buffer, flags, 6, pc, NULL);
3322 	}
3323 
3324 out:
3325 	ring_buffer_nest_end(buffer);
3326 	put_trace_buf();
3327 
3328 out_nobuffer:
3329 	preempt_enable_notrace();
3330 	unpause_graph_tracing();
3331 
3332 	return len;
3333 }
3334 
3335 __printf(3, 0)
3336 int trace_array_vprintk(struct trace_array *tr,
3337 			unsigned long ip, const char *fmt, va_list args)
3338 {
3339 	return __trace_array_vprintk(tr->array_buffer.buffer, ip, fmt, args);
3340 }
3341 
3342 __printf(3, 0)
3343 int trace_array_printk(struct trace_array *tr,
3344 		       unsigned long ip, const char *fmt, ...)
3345 {
3346 	int ret;
3347 	va_list ap;
3348 
3349 	if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
3350 		return 0;
3351 
3352 	if (!tr)
3353 		return -ENOENT;
3354 
3355 	va_start(ap, fmt);
3356 	ret = trace_array_vprintk(tr, ip, fmt, ap);
3357 	va_end(ap);
3358 	return ret;
3359 }
3360 EXPORT_SYMBOL_GPL(trace_array_printk);
3361 
3362 __printf(3, 4)
3363 int trace_array_printk_buf(struct trace_buffer *buffer,
3364 			   unsigned long ip, const char *fmt, ...)
3365 {
3366 	int ret;
3367 	va_list ap;
3368 
3369 	if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
3370 		return 0;
3371 
3372 	va_start(ap, fmt);
3373 	ret = __trace_array_vprintk(buffer, ip, fmt, ap);
3374 	va_end(ap);
3375 	return ret;
3376 }
3377 
3378 __printf(2, 0)
3379 int trace_vprintk(unsigned long ip, const char *fmt, va_list args)
3380 {
3381 	return trace_array_vprintk(&global_trace, ip, fmt, args);
3382 }
3383 EXPORT_SYMBOL_GPL(trace_vprintk);
3384 
3385 static void trace_iterator_increment(struct trace_iterator *iter)
3386 {
3387 	struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, iter->cpu);
3388 
3389 	iter->idx++;
3390 	if (buf_iter)
3391 		ring_buffer_iter_advance(buf_iter);
3392 }
3393 
3394 static struct trace_entry *
3395 peek_next_entry(struct trace_iterator *iter, int cpu, u64 *ts,
3396 		unsigned long *lost_events)
3397 {
3398 	struct ring_buffer_event *event;
3399 	struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, cpu);
3400 
3401 	if (buf_iter) {
3402 		event = ring_buffer_iter_peek(buf_iter, ts);
3403 		if (lost_events)
3404 			*lost_events = ring_buffer_iter_dropped(buf_iter) ?
3405 				(unsigned long)-1 : 0;
3406 	} else {
3407 		event = ring_buffer_peek(iter->array_buffer->buffer, cpu, ts,
3408 					 lost_events);
3409 	}
3410 
3411 	if (event) {
3412 		iter->ent_size = ring_buffer_event_length(event);
3413 		return ring_buffer_event_data(event);
3414 	}
3415 	iter->ent_size = 0;
3416 	return NULL;
3417 }
3418 
3419 static struct trace_entry *
3420 __find_next_entry(struct trace_iterator *iter, int *ent_cpu,
3421 		  unsigned long *missing_events, u64 *ent_ts)
3422 {
3423 	struct trace_buffer *buffer = iter->array_buffer->buffer;
3424 	struct trace_entry *ent, *next = NULL;
3425 	unsigned long lost_events = 0, next_lost = 0;
3426 	int cpu_file = iter->cpu_file;
3427 	u64 next_ts = 0, ts;
3428 	int next_cpu = -1;
3429 	int next_size = 0;
3430 	int cpu;
3431 
3432 	/*
3433 	 * If we are in a per_cpu trace file, don't bother by iterating over
3434 	 * all cpu and peek directly.
3435 	 */
3436 	if (cpu_file > RING_BUFFER_ALL_CPUS) {
3437 		if (ring_buffer_empty_cpu(buffer, cpu_file))
3438 			return NULL;
3439 		ent = peek_next_entry(iter, cpu_file, ent_ts, missing_events);
3440 		if (ent_cpu)
3441 			*ent_cpu = cpu_file;
3442 
3443 		return ent;
3444 	}
3445 
3446 	for_each_tracing_cpu(cpu) {
3447 
3448 		if (ring_buffer_empty_cpu(buffer, cpu))
3449 			continue;
3450 
3451 		ent = peek_next_entry(iter, cpu, &ts, &lost_events);
3452 
3453 		/*
3454 		 * Pick the entry with the smallest timestamp:
3455 		 */
3456 		if (ent && (!next || ts < next_ts)) {
3457 			next = ent;
3458 			next_cpu = cpu;
3459 			next_ts = ts;
3460 			next_lost = lost_events;
3461 			next_size = iter->ent_size;
3462 		}
3463 	}
3464 
3465 	iter->ent_size = next_size;
3466 
3467 	if (ent_cpu)
3468 		*ent_cpu = next_cpu;
3469 
3470 	if (ent_ts)
3471 		*ent_ts = next_ts;
3472 
3473 	if (missing_events)
3474 		*missing_events = next_lost;
3475 
3476 	return next;
3477 }
3478 
3479 #define STATIC_TEMP_BUF_SIZE	128
3480 static char static_temp_buf[STATIC_TEMP_BUF_SIZE];
3481 
3482 /* Find the next real entry, without updating the iterator itself */
3483 struct trace_entry *trace_find_next_entry(struct trace_iterator *iter,
3484 					  int *ent_cpu, u64 *ent_ts)
3485 {
3486 	/* __find_next_entry will reset ent_size */
3487 	int ent_size = iter->ent_size;
3488 	struct trace_entry *entry;
3489 
3490 	/*
3491 	 * If called from ftrace_dump(), then the iter->temp buffer
3492 	 * will be the static_temp_buf and not created from kmalloc.
3493 	 * If the entry size is greater than the buffer, we can
3494 	 * not save it. Just return NULL in that case. This is only
3495 	 * used to add markers when two consecutive events' time
3496 	 * stamps have a large delta. See trace_print_lat_context()
3497 	 */
3498 	if (iter->temp == static_temp_buf &&
3499 	    STATIC_TEMP_BUF_SIZE < ent_size)
3500 		return NULL;
3501 
3502 	/*
3503 	 * The __find_next_entry() may call peek_next_entry(), which may
3504 	 * call ring_buffer_peek() that may make the contents of iter->ent
3505 	 * undefined. Need to copy iter->ent now.
3506 	 */
3507 	if (iter->ent && iter->ent != iter->temp) {
3508 		if ((!iter->temp || iter->temp_size < iter->ent_size) &&
3509 		    !WARN_ON_ONCE(iter->temp == static_temp_buf)) {
3510 			kfree(iter->temp);
3511 			iter->temp = kmalloc(iter->ent_size, GFP_KERNEL);
3512 			if (!iter->temp)
3513 				return NULL;
3514 		}
3515 		memcpy(iter->temp, iter->ent, iter->ent_size);
3516 		iter->temp_size = iter->ent_size;
3517 		iter->ent = iter->temp;
3518 	}
3519 	entry = __find_next_entry(iter, ent_cpu, NULL, ent_ts);
3520 	/* Put back the original ent_size */
3521 	iter->ent_size = ent_size;
3522 
3523 	return entry;
3524 }
3525 
3526 /* Find the next real entry, and increment the iterator to the next entry */
3527 void *trace_find_next_entry_inc(struct trace_iterator *iter)
3528 {
3529 	iter->ent = __find_next_entry(iter, &iter->cpu,
3530 				      &iter->lost_events, &iter->ts);
3531 
3532 	if (iter->ent)
3533 		trace_iterator_increment(iter);
3534 
3535 	return iter->ent ? iter : NULL;
3536 }
3537 
3538 static void trace_consume(struct trace_iterator *iter)
3539 {
3540 	ring_buffer_consume(iter->array_buffer->buffer, iter->cpu, &iter->ts,
3541 			    &iter->lost_events);
3542 }
3543 
3544 static void *s_next(struct seq_file *m, void *v, loff_t *pos)
3545 {
3546 	struct trace_iterator *iter = m->private;
3547 	int i = (int)*pos;
3548 	void *ent;
3549 
3550 	WARN_ON_ONCE(iter->leftover);
3551 
3552 	(*pos)++;
3553 
3554 	/* can't go backwards */
3555 	if (iter->idx > i)
3556 		return NULL;
3557 
3558 	if (iter->idx < 0)
3559 		ent = trace_find_next_entry_inc(iter);
3560 	else
3561 		ent = iter;
3562 
3563 	while (ent && iter->idx < i)
3564 		ent = trace_find_next_entry_inc(iter);
3565 
3566 	iter->pos = *pos;
3567 
3568 	return ent;
3569 }
3570 
3571 void tracing_iter_reset(struct trace_iterator *iter, int cpu)
3572 {
3573 	struct ring_buffer_event *event;
3574 	struct ring_buffer_iter *buf_iter;
3575 	unsigned long entries = 0;
3576 	u64 ts;
3577 
3578 	per_cpu_ptr(iter->array_buffer->data, cpu)->skipped_entries = 0;
3579 
3580 	buf_iter = trace_buffer_iter(iter, cpu);
3581 	if (!buf_iter)
3582 		return;
3583 
3584 	ring_buffer_iter_reset(buf_iter);
3585 
3586 	/*
3587 	 * We could have the case with the max latency tracers
3588 	 * that a reset never took place on a cpu. This is evident
3589 	 * by the timestamp being before the start of the buffer.
3590 	 */
3591 	while ((event = ring_buffer_iter_peek(buf_iter, &ts))) {
3592 		if (ts >= iter->array_buffer->time_start)
3593 			break;
3594 		entries++;
3595 		ring_buffer_iter_advance(buf_iter);
3596 	}
3597 
3598 	per_cpu_ptr(iter->array_buffer->data, cpu)->skipped_entries = entries;
3599 }
3600 
3601 /*
3602  * The current tracer is copied to avoid a global locking
3603  * all around.
3604  */
3605 static void *s_start(struct seq_file *m, loff_t *pos)
3606 {
3607 	struct trace_iterator *iter = m->private;
3608 	struct trace_array *tr = iter->tr;
3609 	int cpu_file = iter->cpu_file;
3610 	void *p = NULL;
3611 	loff_t l = 0;
3612 	int cpu;
3613 
3614 	/*
3615 	 * copy the tracer to avoid using a global lock all around.
3616 	 * iter->trace is a copy of current_trace, the pointer to the
3617 	 * name may be used instead of a strcmp(), as iter->trace->name
3618 	 * will point to the same string as current_trace->name.
3619 	 */
3620 	mutex_lock(&trace_types_lock);
3621 	if (unlikely(tr->current_trace && iter->trace->name != tr->current_trace->name))
3622 		*iter->trace = *tr->current_trace;
3623 	mutex_unlock(&trace_types_lock);
3624 
3625 #ifdef CONFIG_TRACER_MAX_TRACE
3626 	if (iter->snapshot && iter->trace->use_max_tr)
3627 		return ERR_PTR(-EBUSY);
3628 #endif
3629 
3630 	if (!iter->snapshot)
3631 		atomic_inc(&trace_record_taskinfo_disabled);
3632 
3633 	if (*pos != iter->pos) {
3634 		iter->ent = NULL;
3635 		iter->cpu = 0;
3636 		iter->idx = -1;
3637 
3638 		if (cpu_file == RING_BUFFER_ALL_CPUS) {
3639 			for_each_tracing_cpu(cpu)
3640 				tracing_iter_reset(iter, cpu);
3641 		} else
3642 			tracing_iter_reset(iter, cpu_file);
3643 
3644 		iter->leftover = 0;
3645 		for (p = iter; p && l < *pos; p = s_next(m, p, &l))
3646 			;
3647 
3648 	} else {
3649 		/*
3650 		 * If we overflowed the seq_file before, then we want
3651 		 * to just reuse the trace_seq buffer again.
3652 		 */
3653 		if (iter->leftover)
3654 			p = iter;
3655 		else {
3656 			l = *pos - 1;
3657 			p = s_next(m, p, &l);
3658 		}
3659 	}
3660 
3661 	trace_event_read_lock();
3662 	trace_access_lock(cpu_file);
3663 	return p;
3664 }
3665 
3666 static void s_stop(struct seq_file *m, void *p)
3667 {
3668 	struct trace_iterator *iter = m->private;
3669 
3670 #ifdef CONFIG_TRACER_MAX_TRACE
3671 	if (iter->snapshot && iter->trace->use_max_tr)
3672 		return;
3673 #endif
3674 
3675 	if (!iter->snapshot)
3676 		atomic_dec(&trace_record_taskinfo_disabled);
3677 
3678 	trace_access_unlock(iter->cpu_file);
3679 	trace_event_read_unlock();
3680 }
3681 
3682 static void
3683 get_total_entries_cpu(struct array_buffer *buf, unsigned long *total,
3684 		      unsigned long *entries, int cpu)
3685 {
3686 	unsigned long count;
3687 
3688 	count = ring_buffer_entries_cpu(buf->buffer, cpu);
3689 	/*
3690 	 * If this buffer has skipped entries, then we hold all
3691 	 * entries for the trace and we need to ignore the
3692 	 * ones before the time stamp.
3693 	 */
3694 	if (per_cpu_ptr(buf->data, cpu)->skipped_entries) {
3695 		count -= per_cpu_ptr(buf->data, cpu)->skipped_entries;
3696 		/* total is the same as the entries */
3697 		*total = count;
3698 	} else
3699 		*total = count +
3700 			ring_buffer_overrun_cpu(buf->buffer, cpu);
3701 	*entries = count;
3702 }
3703 
3704 static void
3705 get_total_entries(struct array_buffer *buf,
3706 		  unsigned long *total, unsigned long *entries)
3707 {
3708 	unsigned long t, e;
3709 	int cpu;
3710 
3711 	*total = 0;
3712 	*entries = 0;
3713 
3714 	for_each_tracing_cpu(cpu) {
3715 		get_total_entries_cpu(buf, &t, &e, cpu);
3716 		*total += t;
3717 		*entries += e;
3718 	}
3719 }
3720 
3721 unsigned long trace_total_entries_cpu(struct trace_array *tr, int cpu)
3722 {
3723 	unsigned long total, entries;
3724 
3725 	if (!tr)
3726 		tr = &global_trace;
3727 
3728 	get_total_entries_cpu(&tr->array_buffer, &total, &entries, cpu);
3729 
3730 	return entries;
3731 }
3732 
3733 unsigned long trace_total_entries(struct trace_array *tr)
3734 {
3735 	unsigned long total, entries;
3736 
3737 	if (!tr)
3738 		tr = &global_trace;
3739 
3740 	get_total_entries(&tr->array_buffer, &total, &entries);
3741 
3742 	return entries;
3743 }
3744 
3745 static void print_lat_help_header(struct seq_file *m)
3746 {
3747 	seq_puts(m, "#                  _------=> CPU#            \n"
3748 		    "#                 / _-----=> irqs-off        \n"
3749 		    "#                | / _----=> need-resched    \n"
3750 		    "#                || / _---=> hardirq/softirq \n"
3751 		    "#                ||| / _--=> preempt-depth   \n"
3752 		    "#                |||| /     delay            \n"
3753 		    "#  cmd     pid   ||||| time  |   caller      \n"
3754 		    "#     \\   /      |||||  \\    |   /         \n");
3755 }
3756 
3757 static void print_event_info(struct array_buffer *buf, struct seq_file *m)
3758 {
3759 	unsigned long total;
3760 	unsigned long entries;
3761 
3762 	get_total_entries(buf, &total, &entries);
3763 	seq_printf(m, "# entries-in-buffer/entries-written: %lu/%lu   #P:%d\n",
3764 		   entries, total, num_online_cpus());
3765 	seq_puts(m, "#\n");
3766 }
3767 
3768 static void print_func_help_header(struct array_buffer *buf, struct seq_file *m,
3769 				   unsigned int flags)
3770 {
3771 	bool tgid = flags & TRACE_ITER_RECORD_TGID;
3772 
3773 	print_event_info(buf, m);
3774 
3775 	seq_printf(m, "#           TASK-PID   %s  CPU#   TIMESTAMP  FUNCTION\n", tgid ? "TGID     " : "");
3776 	seq_printf(m, "#              | |     %s    |       |         |\n",	 tgid ? "  |      " : "");
3777 }
3778 
3779 static void print_func_help_header_irq(struct array_buffer *buf, struct seq_file *m,
3780 				       unsigned int flags)
3781 {
3782 	bool tgid = flags & TRACE_ITER_RECORD_TGID;
3783 	const char *space = "          ";
3784 	int prec = tgid ? 10 : 2;
3785 
3786 	print_event_info(buf, m);
3787 
3788 	seq_printf(m, "#                          %.*s  _-----=> irqs-off\n", prec, space);
3789 	seq_printf(m, "#                          %.*s / _----=> need-resched\n", prec, space);
3790 	seq_printf(m, "#                          %.*s| / _---=> hardirq/softirq\n", prec, space);
3791 	seq_printf(m, "#                          %.*s|| / _--=> preempt-depth\n", prec, space);
3792 	seq_printf(m, "#                          %.*s||| /     delay\n", prec, space);
3793 	seq_printf(m, "#           TASK-PID %.*sCPU#  ||||    TIMESTAMP  FUNCTION\n", prec, "   TGID   ");
3794 	seq_printf(m, "#              | |   %.*s  |   ||||       |         |\n", prec, "     |    ");
3795 }
3796 
3797 void
3798 print_trace_header(struct seq_file *m, struct trace_iterator *iter)
3799 {
3800 	unsigned long sym_flags = (global_trace.trace_flags & TRACE_ITER_SYM_MASK);
3801 	struct array_buffer *buf = iter->array_buffer;
3802 	struct trace_array_cpu *data = per_cpu_ptr(buf->data, buf->cpu);
3803 	struct tracer *type = iter->trace;
3804 	unsigned long entries;
3805 	unsigned long total;
3806 	const char *name = "preemption";
3807 
3808 	name = type->name;
3809 
3810 	get_total_entries(buf, &total, &entries);
3811 
3812 	seq_printf(m, "# %s latency trace v1.1.5 on %s\n",
3813 		   name, UTS_RELEASE);
3814 	seq_puts(m, "# -----------------------------------"
3815 		 "---------------------------------\n");
3816 	seq_printf(m, "# latency: %lu us, #%lu/%lu, CPU#%d |"
3817 		   " (M:%s VP:%d, KP:%d, SP:%d HP:%d",
3818 		   nsecs_to_usecs(data->saved_latency),
3819 		   entries,
3820 		   total,
3821 		   buf->cpu,
3822 #if defined(CONFIG_PREEMPT_NONE)
3823 		   "server",
3824 #elif defined(CONFIG_PREEMPT_VOLUNTARY)
3825 		   "desktop",
3826 #elif defined(CONFIG_PREEMPT)
3827 		   "preempt",
3828 #elif defined(CONFIG_PREEMPT_RT)
3829 		   "preempt_rt",
3830 #else
3831 		   "unknown",
3832 #endif
3833 		   /* These are reserved for later use */
3834 		   0, 0, 0, 0);
3835 #ifdef CONFIG_SMP
3836 	seq_printf(m, " #P:%d)\n", num_online_cpus());
3837 #else
3838 	seq_puts(m, ")\n");
3839 #endif
3840 	seq_puts(m, "#    -----------------\n");
3841 	seq_printf(m, "#    | task: %.16s-%d "
3842 		   "(uid:%d nice:%ld policy:%ld rt_prio:%ld)\n",
3843 		   data->comm, data->pid,
3844 		   from_kuid_munged(seq_user_ns(m), data->uid), data->nice,
3845 		   data->policy, data->rt_priority);
3846 	seq_puts(m, "#    -----------------\n");
3847 
3848 	if (data->critical_start) {
3849 		seq_puts(m, "#  => started at: ");
3850 		seq_print_ip_sym(&iter->seq, data->critical_start, sym_flags);
3851 		trace_print_seq(m, &iter->seq);
3852 		seq_puts(m, "\n#  => ended at:   ");
3853 		seq_print_ip_sym(&iter->seq, data->critical_end, sym_flags);
3854 		trace_print_seq(m, &iter->seq);
3855 		seq_puts(m, "\n#\n");
3856 	}
3857 
3858 	seq_puts(m, "#\n");
3859 }
3860 
3861 static void test_cpu_buff_start(struct trace_iterator *iter)
3862 {
3863 	struct trace_seq *s = &iter->seq;
3864 	struct trace_array *tr = iter->tr;
3865 
3866 	if (!(tr->trace_flags & TRACE_ITER_ANNOTATE))
3867 		return;
3868 
3869 	if (!(iter->iter_flags & TRACE_FILE_ANNOTATE))
3870 		return;
3871 
3872 	if (cpumask_available(iter->started) &&
3873 	    cpumask_test_cpu(iter->cpu, iter->started))
3874 		return;
3875 
3876 	if (per_cpu_ptr(iter->array_buffer->data, iter->cpu)->skipped_entries)
3877 		return;
3878 
3879 	if (cpumask_available(iter->started))
3880 		cpumask_set_cpu(iter->cpu, iter->started);
3881 
3882 	/* Don't print started cpu buffer for the first entry of the trace */
3883 	if (iter->idx > 1)
3884 		trace_seq_printf(s, "##### CPU %u buffer started ####\n",
3885 				iter->cpu);
3886 }
3887 
3888 static enum print_line_t print_trace_fmt(struct trace_iterator *iter)
3889 {
3890 	struct trace_array *tr = iter->tr;
3891 	struct trace_seq *s = &iter->seq;
3892 	unsigned long sym_flags = (tr->trace_flags & TRACE_ITER_SYM_MASK);
3893 	struct trace_entry *entry;
3894 	struct trace_event *event;
3895 
3896 	entry = iter->ent;
3897 
3898 	test_cpu_buff_start(iter);
3899 
3900 	event = ftrace_find_event(entry->type);
3901 
3902 	if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
3903 		if (iter->iter_flags & TRACE_FILE_LAT_FMT)
3904 			trace_print_lat_context(iter);
3905 		else
3906 			trace_print_context(iter);
3907 	}
3908 
3909 	if (trace_seq_has_overflowed(s))
3910 		return TRACE_TYPE_PARTIAL_LINE;
3911 
3912 	if (event)
3913 		return event->funcs->trace(iter, sym_flags, event);
3914 
3915 	trace_seq_printf(s, "Unknown type %d\n", entry->type);
3916 
3917 	return trace_handle_return(s);
3918 }
3919 
3920 static enum print_line_t print_raw_fmt(struct trace_iterator *iter)
3921 {
3922 	struct trace_array *tr = iter->tr;
3923 	struct trace_seq *s = &iter->seq;
3924 	struct trace_entry *entry;
3925 	struct trace_event *event;
3926 
3927 	entry = iter->ent;
3928 
3929 	if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO)
3930 		trace_seq_printf(s, "%d %d %llu ",
3931 				 entry->pid, iter->cpu, iter->ts);
3932 
3933 	if (trace_seq_has_overflowed(s))
3934 		return TRACE_TYPE_PARTIAL_LINE;
3935 
3936 	event = ftrace_find_event(entry->type);
3937 	if (event)
3938 		return event->funcs->raw(iter, 0, event);
3939 
3940 	trace_seq_printf(s, "%d ?\n", entry->type);
3941 
3942 	return trace_handle_return(s);
3943 }
3944 
3945 static enum print_line_t print_hex_fmt(struct trace_iterator *iter)
3946 {
3947 	struct trace_array *tr = iter->tr;
3948 	struct trace_seq *s = &iter->seq;
3949 	unsigned char newline = '\n';
3950 	struct trace_entry *entry;
3951 	struct trace_event *event;
3952 
3953 	entry = iter->ent;
3954 
3955 	if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
3956 		SEQ_PUT_HEX_FIELD(s, entry->pid);
3957 		SEQ_PUT_HEX_FIELD(s, iter->cpu);
3958 		SEQ_PUT_HEX_FIELD(s, iter->ts);
3959 		if (trace_seq_has_overflowed(s))
3960 			return TRACE_TYPE_PARTIAL_LINE;
3961 	}
3962 
3963 	event = ftrace_find_event(entry->type);
3964 	if (event) {
3965 		enum print_line_t ret = event->funcs->hex(iter, 0, event);
3966 		if (ret != TRACE_TYPE_HANDLED)
3967 			return ret;
3968 	}
3969 
3970 	SEQ_PUT_FIELD(s, newline);
3971 
3972 	return trace_handle_return(s);
3973 }
3974 
3975 static enum print_line_t print_bin_fmt(struct trace_iterator *iter)
3976 {
3977 	struct trace_array *tr = iter->tr;
3978 	struct trace_seq *s = &iter->seq;
3979 	struct trace_entry *entry;
3980 	struct trace_event *event;
3981 
3982 	entry = iter->ent;
3983 
3984 	if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
3985 		SEQ_PUT_FIELD(s, entry->pid);
3986 		SEQ_PUT_FIELD(s, iter->cpu);
3987 		SEQ_PUT_FIELD(s, iter->ts);
3988 		if (trace_seq_has_overflowed(s))
3989 			return TRACE_TYPE_PARTIAL_LINE;
3990 	}
3991 
3992 	event = ftrace_find_event(entry->type);
3993 	return event ? event->funcs->binary(iter, 0, event) :
3994 		TRACE_TYPE_HANDLED;
3995 }
3996 
3997 int trace_empty(struct trace_iterator *iter)
3998 {
3999 	struct ring_buffer_iter *buf_iter;
4000 	int cpu;
4001 
4002 	/* If we are looking at one CPU buffer, only check that one */
4003 	if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
4004 		cpu = iter->cpu_file;
4005 		buf_iter = trace_buffer_iter(iter, cpu);
4006 		if (buf_iter) {
4007 			if (!ring_buffer_iter_empty(buf_iter))
4008 				return 0;
4009 		} else {
4010 			if (!ring_buffer_empty_cpu(iter->array_buffer->buffer, cpu))
4011 				return 0;
4012 		}
4013 		return 1;
4014 	}
4015 
4016 	for_each_tracing_cpu(cpu) {
4017 		buf_iter = trace_buffer_iter(iter, cpu);
4018 		if (buf_iter) {
4019 			if (!ring_buffer_iter_empty(buf_iter))
4020 				return 0;
4021 		} else {
4022 			if (!ring_buffer_empty_cpu(iter->array_buffer->buffer, cpu))
4023 				return 0;
4024 		}
4025 	}
4026 
4027 	return 1;
4028 }
4029 
4030 /*  Called with trace_event_read_lock() held. */
4031 enum print_line_t print_trace_line(struct trace_iterator *iter)
4032 {
4033 	struct trace_array *tr = iter->tr;
4034 	unsigned long trace_flags = tr->trace_flags;
4035 	enum print_line_t ret;
4036 
4037 	if (iter->lost_events) {
4038 		if (iter->lost_events == (unsigned long)-1)
4039 			trace_seq_printf(&iter->seq, "CPU:%d [LOST EVENTS]\n",
4040 					 iter->cpu);
4041 		else
4042 			trace_seq_printf(&iter->seq, "CPU:%d [LOST %lu EVENTS]\n",
4043 					 iter->cpu, iter->lost_events);
4044 		if (trace_seq_has_overflowed(&iter->seq))
4045 			return TRACE_TYPE_PARTIAL_LINE;
4046 	}
4047 
4048 	if (iter->trace && iter->trace->print_line) {
4049 		ret = iter->trace->print_line(iter);
4050 		if (ret != TRACE_TYPE_UNHANDLED)
4051 			return ret;
4052 	}
4053 
4054 	if (iter->ent->type == TRACE_BPUTS &&
4055 			trace_flags & TRACE_ITER_PRINTK &&
4056 			trace_flags & TRACE_ITER_PRINTK_MSGONLY)
4057 		return trace_print_bputs_msg_only(iter);
4058 
4059 	if (iter->ent->type == TRACE_BPRINT &&
4060 			trace_flags & TRACE_ITER_PRINTK &&
4061 			trace_flags & TRACE_ITER_PRINTK_MSGONLY)
4062 		return trace_print_bprintk_msg_only(iter);
4063 
4064 	if (iter->ent->type == TRACE_PRINT &&
4065 			trace_flags & TRACE_ITER_PRINTK &&
4066 			trace_flags & TRACE_ITER_PRINTK_MSGONLY)
4067 		return trace_print_printk_msg_only(iter);
4068 
4069 	if (trace_flags & TRACE_ITER_BIN)
4070 		return print_bin_fmt(iter);
4071 
4072 	if (trace_flags & TRACE_ITER_HEX)
4073 		return print_hex_fmt(iter);
4074 
4075 	if (trace_flags & TRACE_ITER_RAW)
4076 		return print_raw_fmt(iter);
4077 
4078 	return print_trace_fmt(iter);
4079 }
4080 
4081 void trace_latency_header(struct seq_file *m)
4082 {
4083 	struct trace_iterator *iter = m->private;
4084 	struct trace_array *tr = iter->tr;
4085 
4086 	/* print nothing if the buffers are empty */
4087 	if (trace_empty(iter))
4088 		return;
4089 
4090 	if (iter->iter_flags & TRACE_FILE_LAT_FMT)
4091 		print_trace_header(m, iter);
4092 
4093 	if (!(tr->trace_flags & TRACE_ITER_VERBOSE))
4094 		print_lat_help_header(m);
4095 }
4096 
4097 void trace_default_header(struct seq_file *m)
4098 {
4099 	struct trace_iterator *iter = m->private;
4100 	struct trace_array *tr = iter->tr;
4101 	unsigned long trace_flags = tr->trace_flags;
4102 
4103 	if (!(trace_flags & TRACE_ITER_CONTEXT_INFO))
4104 		return;
4105 
4106 	if (iter->iter_flags & TRACE_FILE_LAT_FMT) {
4107 		/* print nothing if the buffers are empty */
4108 		if (trace_empty(iter))
4109 			return;
4110 		print_trace_header(m, iter);
4111 		if (!(trace_flags & TRACE_ITER_VERBOSE))
4112 			print_lat_help_header(m);
4113 	} else {
4114 		if (!(trace_flags & TRACE_ITER_VERBOSE)) {
4115 			if (trace_flags & TRACE_ITER_IRQ_INFO)
4116 				print_func_help_header_irq(iter->array_buffer,
4117 							   m, trace_flags);
4118 			else
4119 				print_func_help_header(iter->array_buffer, m,
4120 						       trace_flags);
4121 		}
4122 	}
4123 }
4124 
4125 static void test_ftrace_alive(struct seq_file *m)
4126 {
4127 	if (!ftrace_is_dead())
4128 		return;
4129 	seq_puts(m, "# WARNING: FUNCTION TRACING IS CORRUPTED\n"
4130 		    "#          MAY BE MISSING FUNCTION EVENTS\n");
4131 }
4132 
4133 #ifdef CONFIG_TRACER_MAX_TRACE
4134 static void show_snapshot_main_help(struct seq_file *m)
4135 {
4136 	seq_puts(m, "# echo 0 > snapshot : Clears and frees snapshot buffer\n"
4137 		    "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
4138 		    "#                      Takes a snapshot of the main buffer.\n"
4139 		    "# echo 2 > snapshot : Clears snapshot buffer (but does not allocate or free)\n"
4140 		    "#                      (Doesn't have to be '2' works with any number that\n"
4141 		    "#                       is not a '0' or '1')\n");
4142 }
4143 
4144 static void show_snapshot_percpu_help(struct seq_file *m)
4145 {
4146 	seq_puts(m, "# echo 0 > snapshot : Invalid for per_cpu snapshot file.\n");
4147 #ifdef CONFIG_RING_BUFFER_ALLOW_SWAP
4148 	seq_puts(m, "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
4149 		    "#                      Takes a snapshot of the main buffer for this cpu.\n");
4150 #else
4151 	seq_puts(m, "# echo 1 > snapshot : Not supported with this kernel.\n"
4152 		    "#                     Must use main snapshot file to allocate.\n");
4153 #endif
4154 	seq_puts(m, "# echo 2 > snapshot : Clears this cpu's snapshot buffer (but does not allocate)\n"
4155 		    "#                      (Doesn't have to be '2' works with any number that\n"
4156 		    "#                       is not a '0' or '1')\n");
4157 }
4158 
4159 static void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter)
4160 {
4161 	if (iter->tr->allocated_snapshot)
4162 		seq_puts(m, "#\n# * Snapshot is allocated *\n#\n");
4163 	else
4164 		seq_puts(m, "#\n# * Snapshot is freed *\n#\n");
4165 
4166 	seq_puts(m, "# Snapshot commands:\n");
4167 	if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
4168 		show_snapshot_main_help(m);
4169 	else
4170 		show_snapshot_percpu_help(m);
4171 }
4172 #else
4173 /* Should never be called */
4174 static inline void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter) { }
4175 #endif
4176 
4177 static int s_show(struct seq_file *m, void *v)
4178 {
4179 	struct trace_iterator *iter = v;
4180 	int ret;
4181 
4182 	if (iter->ent == NULL) {
4183 		if (iter->tr) {
4184 			seq_printf(m, "# tracer: %s\n", iter->trace->name);
4185 			seq_puts(m, "#\n");
4186 			test_ftrace_alive(m);
4187 		}
4188 		if (iter->snapshot && trace_empty(iter))
4189 			print_snapshot_help(m, iter);
4190 		else if (iter->trace && iter->trace->print_header)
4191 			iter->trace->print_header(m);
4192 		else
4193 			trace_default_header(m);
4194 
4195 	} else if (iter->leftover) {
4196 		/*
4197 		 * If we filled the seq_file buffer earlier, we
4198 		 * want to just show it now.
4199 		 */
4200 		ret = trace_print_seq(m, &iter->seq);
4201 
4202 		/* ret should this time be zero, but you never know */
4203 		iter->leftover = ret;
4204 
4205 	} else {
4206 		print_trace_line(iter);
4207 		ret = trace_print_seq(m, &iter->seq);
4208 		/*
4209 		 * If we overflow the seq_file buffer, then it will
4210 		 * ask us for this data again at start up.
4211 		 * Use that instead.
4212 		 *  ret is 0 if seq_file write succeeded.
4213 		 *        -1 otherwise.
4214 		 */
4215 		iter->leftover = ret;
4216 	}
4217 
4218 	return 0;
4219 }
4220 
4221 /*
4222  * Should be used after trace_array_get(), trace_types_lock
4223  * ensures that i_cdev was already initialized.
4224  */
4225 static inline int tracing_get_cpu(struct inode *inode)
4226 {
4227 	if (inode->i_cdev) /* See trace_create_cpu_file() */
4228 		return (long)inode->i_cdev - 1;
4229 	return RING_BUFFER_ALL_CPUS;
4230 }
4231 
4232 static const struct seq_operations tracer_seq_ops = {
4233 	.start		= s_start,
4234 	.next		= s_next,
4235 	.stop		= s_stop,
4236 	.show		= s_show,
4237 };
4238 
4239 static struct trace_iterator *
4240 __tracing_open(struct inode *inode, struct file *file, bool snapshot)
4241 {
4242 	struct trace_array *tr = inode->i_private;
4243 	struct trace_iterator *iter;
4244 	int cpu;
4245 
4246 	if (tracing_disabled)
4247 		return ERR_PTR(-ENODEV);
4248 
4249 	iter = __seq_open_private(file, &tracer_seq_ops, sizeof(*iter));
4250 	if (!iter)
4251 		return ERR_PTR(-ENOMEM);
4252 
4253 	iter->buffer_iter = kcalloc(nr_cpu_ids, sizeof(*iter->buffer_iter),
4254 				    GFP_KERNEL);
4255 	if (!iter->buffer_iter)
4256 		goto release;
4257 
4258 	/*
4259 	 * trace_find_next_entry() may need to save off iter->ent.
4260 	 * It will place it into the iter->temp buffer. As most
4261 	 * events are less than 128, allocate a buffer of that size.
4262 	 * If one is greater, then trace_find_next_entry() will
4263 	 * allocate a new buffer to adjust for the bigger iter->ent.
4264 	 * It's not critical if it fails to get allocated here.
4265 	 */
4266 	iter->temp = kmalloc(128, GFP_KERNEL);
4267 	if (iter->temp)
4268 		iter->temp_size = 128;
4269 
4270 	/*
4271 	 * We make a copy of the current tracer to avoid concurrent
4272 	 * changes on it while we are reading.
4273 	 */
4274 	mutex_lock(&trace_types_lock);
4275 	iter->trace = kzalloc(sizeof(*iter->trace), GFP_KERNEL);
4276 	if (!iter->trace)
4277 		goto fail;
4278 
4279 	*iter->trace = *tr->current_trace;
4280 
4281 	if (!zalloc_cpumask_var(&iter->started, GFP_KERNEL))
4282 		goto fail;
4283 
4284 	iter->tr = tr;
4285 
4286 #ifdef CONFIG_TRACER_MAX_TRACE
4287 	/* Currently only the top directory has a snapshot */
4288 	if (tr->current_trace->print_max || snapshot)
4289 		iter->array_buffer = &tr->max_buffer;
4290 	else
4291 #endif
4292 		iter->array_buffer = &tr->array_buffer;
4293 	iter->snapshot = snapshot;
4294 	iter->pos = -1;
4295 	iter->cpu_file = tracing_get_cpu(inode);
4296 	mutex_init(&iter->mutex);
4297 
4298 	/* Notify the tracer early; before we stop tracing. */
4299 	if (iter->trace->open)
4300 		iter->trace->open(iter);
4301 
4302 	/* Annotate start of buffers if we had overruns */
4303 	if (ring_buffer_overruns(iter->array_buffer->buffer))
4304 		iter->iter_flags |= TRACE_FILE_ANNOTATE;
4305 
4306 	/* Output in nanoseconds only if we are using a clock in nanoseconds. */
4307 	if (trace_clocks[tr->clock_id].in_ns)
4308 		iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
4309 
4310 	/*
4311 	 * If pause-on-trace is enabled, then stop the trace while
4312 	 * dumping, unless this is the "snapshot" file
4313 	 */
4314 	if (!iter->snapshot && (tr->trace_flags & TRACE_ITER_PAUSE_ON_TRACE))
4315 		tracing_stop_tr(tr);
4316 
4317 	if (iter->cpu_file == RING_BUFFER_ALL_CPUS) {
4318 		for_each_tracing_cpu(cpu) {
4319 			iter->buffer_iter[cpu] =
4320 				ring_buffer_read_prepare(iter->array_buffer->buffer,
4321 							 cpu, GFP_KERNEL);
4322 		}
4323 		ring_buffer_read_prepare_sync();
4324 		for_each_tracing_cpu(cpu) {
4325 			ring_buffer_read_start(iter->buffer_iter[cpu]);
4326 			tracing_iter_reset(iter, cpu);
4327 		}
4328 	} else {
4329 		cpu = iter->cpu_file;
4330 		iter->buffer_iter[cpu] =
4331 			ring_buffer_read_prepare(iter->array_buffer->buffer,
4332 						 cpu, GFP_KERNEL);
4333 		ring_buffer_read_prepare_sync();
4334 		ring_buffer_read_start(iter->buffer_iter[cpu]);
4335 		tracing_iter_reset(iter, cpu);
4336 	}
4337 
4338 	mutex_unlock(&trace_types_lock);
4339 
4340 	return iter;
4341 
4342  fail:
4343 	mutex_unlock(&trace_types_lock);
4344 	kfree(iter->trace);
4345 	kfree(iter->temp);
4346 	kfree(iter->buffer_iter);
4347 release:
4348 	seq_release_private(inode, file);
4349 	return ERR_PTR(-ENOMEM);
4350 }
4351 
4352 int tracing_open_generic(struct inode *inode, struct file *filp)
4353 {
4354 	int ret;
4355 
4356 	ret = tracing_check_open_get_tr(NULL);
4357 	if (ret)
4358 		return ret;
4359 
4360 	filp->private_data = inode->i_private;
4361 	return 0;
4362 }
4363 
4364 bool tracing_is_disabled(void)
4365 {
4366 	return (tracing_disabled) ? true: false;
4367 }
4368 
4369 /*
4370  * Open and update trace_array ref count.
4371  * Must have the current trace_array passed to it.
4372  */
4373 int tracing_open_generic_tr(struct inode *inode, struct file *filp)
4374 {
4375 	struct trace_array *tr = inode->i_private;
4376 	int ret;
4377 
4378 	ret = tracing_check_open_get_tr(tr);
4379 	if (ret)
4380 		return ret;
4381 
4382 	filp->private_data = inode->i_private;
4383 
4384 	return 0;
4385 }
4386 
4387 static int tracing_release(struct inode *inode, struct file *file)
4388 {
4389 	struct trace_array *tr = inode->i_private;
4390 	struct seq_file *m = file->private_data;
4391 	struct trace_iterator *iter;
4392 	int cpu;
4393 
4394 	if (!(file->f_mode & FMODE_READ)) {
4395 		trace_array_put(tr);
4396 		return 0;
4397 	}
4398 
4399 	/* Writes do not use seq_file */
4400 	iter = m->private;
4401 	mutex_lock(&trace_types_lock);
4402 
4403 	for_each_tracing_cpu(cpu) {
4404 		if (iter->buffer_iter[cpu])
4405 			ring_buffer_read_finish(iter->buffer_iter[cpu]);
4406 	}
4407 
4408 	if (iter->trace && iter->trace->close)
4409 		iter->trace->close(iter);
4410 
4411 	if (!iter->snapshot && tr->stop_count)
4412 		/* reenable tracing if it was previously enabled */
4413 		tracing_start_tr(tr);
4414 
4415 	__trace_array_put(tr);
4416 
4417 	mutex_unlock(&trace_types_lock);
4418 
4419 	mutex_destroy(&iter->mutex);
4420 	free_cpumask_var(iter->started);
4421 	kfree(iter->temp);
4422 	kfree(iter->trace);
4423 	kfree(iter->buffer_iter);
4424 	seq_release_private(inode, file);
4425 
4426 	return 0;
4427 }
4428 
4429 static int tracing_release_generic_tr(struct inode *inode, struct file *file)
4430 {
4431 	struct trace_array *tr = inode->i_private;
4432 
4433 	trace_array_put(tr);
4434 	return 0;
4435 }
4436 
4437 static int tracing_single_release_tr(struct inode *inode, struct file *file)
4438 {
4439 	struct trace_array *tr = inode->i_private;
4440 
4441 	trace_array_put(tr);
4442 
4443 	return single_release(inode, file);
4444 }
4445 
4446 static int tracing_open(struct inode *inode, struct file *file)
4447 {
4448 	struct trace_array *tr = inode->i_private;
4449 	struct trace_iterator *iter;
4450 	int ret;
4451 
4452 	ret = tracing_check_open_get_tr(tr);
4453 	if (ret)
4454 		return ret;
4455 
4456 	/* If this file was open for write, then erase contents */
4457 	if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC)) {
4458 		int cpu = tracing_get_cpu(inode);
4459 		struct array_buffer *trace_buf = &tr->array_buffer;
4460 
4461 #ifdef CONFIG_TRACER_MAX_TRACE
4462 		if (tr->current_trace->print_max)
4463 			trace_buf = &tr->max_buffer;
4464 #endif
4465 
4466 		if (cpu == RING_BUFFER_ALL_CPUS)
4467 			tracing_reset_online_cpus(trace_buf);
4468 		else
4469 			tracing_reset_cpu(trace_buf, cpu);
4470 	}
4471 
4472 	if (file->f_mode & FMODE_READ) {
4473 		iter = __tracing_open(inode, file, false);
4474 		if (IS_ERR(iter))
4475 			ret = PTR_ERR(iter);
4476 		else if (tr->trace_flags & TRACE_ITER_LATENCY_FMT)
4477 			iter->iter_flags |= TRACE_FILE_LAT_FMT;
4478 	}
4479 
4480 	if (ret < 0)
4481 		trace_array_put(tr);
4482 
4483 	return ret;
4484 }
4485 
4486 /*
4487  * Some tracers are not suitable for instance buffers.
4488  * A tracer is always available for the global array (toplevel)
4489  * or if it explicitly states that it is.
4490  */
4491 static bool
4492 trace_ok_for_array(struct tracer *t, struct trace_array *tr)
4493 {
4494 	return (tr->flags & TRACE_ARRAY_FL_GLOBAL) || t->allow_instances;
4495 }
4496 
4497 /* Find the next tracer that this trace array may use */
4498 static struct tracer *
4499 get_tracer_for_array(struct trace_array *tr, struct tracer *t)
4500 {
4501 	while (t && !trace_ok_for_array(t, tr))
4502 		t = t->next;
4503 
4504 	return t;
4505 }
4506 
4507 static void *
4508 t_next(struct seq_file *m, void *v, loff_t *pos)
4509 {
4510 	struct trace_array *tr = m->private;
4511 	struct tracer *t = v;
4512 
4513 	(*pos)++;
4514 
4515 	if (t)
4516 		t = get_tracer_for_array(tr, t->next);
4517 
4518 	return t;
4519 }
4520 
4521 static void *t_start(struct seq_file *m, loff_t *pos)
4522 {
4523 	struct trace_array *tr = m->private;
4524 	struct tracer *t;
4525 	loff_t l = 0;
4526 
4527 	mutex_lock(&trace_types_lock);
4528 
4529 	t = get_tracer_for_array(tr, trace_types);
4530 	for (; t && l < *pos; t = t_next(m, t, &l))
4531 			;
4532 
4533 	return t;
4534 }
4535 
4536 static void t_stop(struct seq_file *m, void *p)
4537 {
4538 	mutex_unlock(&trace_types_lock);
4539 }
4540 
4541 static int t_show(struct seq_file *m, void *v)
4542 {
4543 	struct tracer *t = v;
4544 
4545 	if (!t)
4546 		return 0;
4547 
4548 	seq_puts(m, t->name);
4549 	if (t->next)
4550 		seq_putc(m, ' ');
4551 	else
4552 		seq_putc(m, '\n');
4553 
4554 	return 0;
4555 }
4556 
4557 static const struct seq_operations show_traces_seq_ops = {
4558 	.start		= t_start,
4559 	.next		= t_next,
4560 	.stop		= t_stop,
4561 	.show		= t_show,
4562 };
4563 
4564 static int show_traces_open(struct inode *inode, struct file *file)
4565 {
4566 	struct trace_array *tr = inode->i_private;
4567 	struct seq_file *m;
4568 	int ret;
4569 
4570 	ret = tracing_check_open_get_tr(tr);
4571 	if (ret)
4572 		return ret;
4573 
4574 	ret = seq_open(file, &show_traces_seq_ops);
4575 	if (ret) {
4576 		trace_array_put(tr);
4577 		return ret;
4578 	}
4579 
4580 	m = file->private_data;
4581 	m->private = tr;
4582 
4583 	return 0;
4584 }
4585 
4586 static int show_traces_release(struct inode *inode, struct file *file)
4587 {
4588 	struct trace_array *tr = inode->i_private;
4589 
4590 	trace_array_put(tr);
4591 	return seq_release(inode, file);
4592 }
4593 
4594 static ssize_t
4595 tracing_write_stub(struct file *filp, const char __user *ubuf,
4596 		   size_t count, loff_t *ppos)
4597 {
4598 	return count;
4599 }
4600 
4601 loff_t tracing_lseek(struct file *file, loff_t offset, int whence)
4602 {
4603 	int ret;
4604 
4605 	if (file->f_mode & FMODE_READ)
4606 		ret = seq_lseek(file, offset, whence);
4607 	else
4608 		file->f_pos = ret = 0;
4609 
4610 	return ret;
4611 }
4612 
4613 static const struct file_operations tracing_fops = {
4614 	.open		= tracing_open,
4615 	.read		= seq_read,
4616 	.write		= tracing_write_stub,
4617 	.llseek		= tracing_lseek,
4618 	.release	= tracing_release,
4619 };
4620 
4621 static const struct file_operations show_traces_fops = {
4622 	.open		= show_traces_open,
4623 	.read		= seq_read,
4624 	.llseek		= seq_lseek,
4625 	.release	= show_traces_release,
4626 };
4627 
4628 static ssize_t
4629 tracing_cpumask_read(struct file *filp, char __user *ubuf,
4630 		     size_t count, loff_t *ppos)
4631 {
4632 	struct trace_array *tr = file_inode(filp)->i_private;
4633 	char *mask_str;
4634 	int len;
4635 
4636 	len = snprintf(NULL, 0, "%*pb\n",
4637 		       cpumask_pr_args(tr->tracing_cpumask)) + 1;
4638 	mask_str = kmalloc(len, GFP_KERNEL);
4639 	if (!mask_str)
4640 		return -ENOMEM;
4641 
4642 	len = snprintf(mask_str, len, "%*pb\n",
4643 		       cpumask_pr_args(tr->tracing_cpumask));
4644 	if (len >= count) {
4645 		count = -EINVAL;
4646 		goto out_err;
4647 	}
4648 	count = simple_read_from_buffer(ubuf, count, ppos, mask_str, len);
4649 
4650 out_err:
4651 	kfree(mask_str);
4652 
4653 	return count;
4654 }
4655 
4656 int tracing_set_cpumask(struct trace_array *tr,
4657 			cpumask_var_t tracing_cpumask_new)
4658 {
4659 	int cpu;
4660 
4661 	if (!tr)
4662 		return -EINVAL;
4663 
4664 	local_irq_disable();
4665 	arch_spin_lock(&tr->max_lock);
4666 	for_each_tracing_cpu(cpu) {
4667 		/*
4668 		 * Increase/decrease the disabled counter if we are
4669 		 * about to flip a bit in the cpumask:
4670 		 */
4671 		if (cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
4672 				!cpumask_test_cpu(cpu, tracing_cpumask_new)) {
4673 			atomic_inc(&per_cpu_ptr(tr->array_buffer.data, cpu)->disabled);
4674 			ring_buffer_record_disable_cpu(tr->array_buffer.buffer, cpu);
4675 		}
4676 		if (!cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
4677 				cpumask_test_cpu(cpu, tracing_cpumask_new)) {
4678 			atomic_dec(&per_cpu_ptr(tr->array_buffer.data, cpu)->disabled);
4679 			ring_buffer_record_enable_cpu(tr->array_buffer.buffer, cpu);
4680 		}
4681 	}
4682 	arch_spin_unlock(&tr->max_lock);
4683 	local_irq_enable();
4684 
4685 	cpumask_copy(tr->tracing_cpumask, tracing_cpumask_new);
4686 
4687 	return 0;
4688 }
4689 
4690 static ssize_t
4691 tracing_cpumask_write(struct file *filp, const char __user *ubuf,
4692 		      size_t count, loff_t *ppos)
4693 {
4694 	struct trace_array *tr = file_inode(filp)->i_private;
4695 	cpumask_var_t tracing_cpumask_new;
4696 	int err;
4697 
4698 	if (!alloc_cpumask_var(&tracing_cpumask_new, GFP_KERNEL))
4699 		return -ENOMEM;
4700 
4701 	err = cpumask_parse_user(ubuf, count, tracing_cpumask_new);
4702 	if (err)
4703 		goto err_free;
4704 
4705 	err = tracing_set_cpumask(tr, tracing_cpumask_new);
4706 	if (err)
4707 		goto err_free;
4708 
4709 	free_cpumask_var(tracing_cpumask_new);
4710 
4711 	return count;
4712 
4713 err_free:
4714 	free_cpumask_var(tracing_cpumask_new);
4715 
4716 	return err;
4717 }
4718 
4719 static const struct file_operations tracing_cpumask_fops = {
4720 	.open		= tracing_open_generic_tr,
4721 	.read		= tracing_cpumask_read,
4722 	.write		= tracing_cpumask_write,
4723 	.release	= tracing_release_generic_tr,
4724 	.llseek		= generic_file_llseek,
4725 };
4726 
4727 static int tracing_trace_options_show(struct seq_file *m, void *v)
4728 {
4729 	struct tracer_opt *trace_opts;
4730 	struct trace_array *tr = m->private;
4731 	u32 tracer_flags;
4732 	int i;
4733 
4734 	mutex_lock(&trace_types_lock);
4735 	tracer_flags = tr->current_trace->flags->val;
4736 	trace_opts = tr->current_trace->flags->opts;
4737 
4738 	for (i = 0; trace_options[i]; i++) {
4739 		if (tr->trace_flags & (1 << i))
4740 			seq_printf(m, "%s\n", trace_options[i]);
4741 		else
4742 			seq_printf(m, "no%s\n", trace_options[i]);
4743 	}
4744 
4745 	for (i = 0; trace_opts[i].name; i++) {
4746 		if (tracer_flags & trace_opts[i].bit)
4747 			seq_printf(m, "%s\n", trace_opts[i].name);
4748 		else
4749 			seq_printf(m, "no%s\n", trace_opts[i].name);
4750 	}
4751 	mutex_unlock(&trace_types_lock);
4752 
4753 	return 0;
4754 }
4755 
4756 static int __set_tracer_option(struct trace_array *tr,
4757 			       struct tracer_flags *tracer_flags,
4758 			       struct tracer_opt *opts, int neg)
4759 {
4760 	struct tracer *trace = tracer_flags->trace;
4761 	int ret;
4762 
4763 	ret = trace->set_flag(tr, tracer_flags->val, opts->bit, !neg);
4764 	if (ret)
4765 		return ret;
4766 
4767 	if (neg)
4768 		tracer_flags->val &= ~opts->bit;
4769 	else
4770 		tracer_flags->val |= opts->bit;
4771 	return 0;
4772 }
4773 
4774 /* Try to assign a tracer specific option */
4775 static int set_tracer_option(struct trace_array *tr, char *cmp, int neg)
4776 {
4777 	struct tracer *trace = tr->current_trace;
4778 	struct tracer_flags *tracer_flags = trace->flags;
4779 	struct tracer_opt *opts = NULL;
4780 	int i;
4781 
4782 	for (i = 0; tracer_flags->opts[i].name; i++) {
4783 		opts = &tracer_flags->opts[i];
4784 
4785 		if (strcmp(cmp, opts->name) == 0)
4786 			return __set_tracer_option(tr, trace->flags, opts, neg);
4787 	}
4788 
4789 	return -EINVAL;
4790 }
4791 
4792 /* Some tracers require overwrite to stay enabled */
4793 int trace_keep_overwrite(struct tracer *tracer, u32 mask, int set)
4794 {
4795 	if (tracer->enabled && (mask & TRACE_ITER_OVERWRITE) && !set)
4796 		return -1;
4797 
4798 	return 0;
4799 }
4800 
4801 int set_tracer_flag(struct trace_array *tr, unsigned int mask, int enabled)
4802 {
4803 	if ((mask == TRACE_ITER_RECORD_TGID) ||
4804 	    (mask == TRACE_ITER_RECORD_CMD))
4805 		lockdep_assert_held(&event_mutex);
4806 
4807 	/* do nothing if flag is already set */
4808 	if (!!(tr->trace_flags & mask) == !!enabled)
4809 		return 0;
4810 
4811 	/* Give the tracer a chance to approve the change */
4812 	if (tr->current_trace->flag_changed)
4813 		if (tr->current_trace->flag_changed(tr, mask, !!enabled))
4814 			return -EINVAL;
4815 
4816 	if (enabled)
4817 		tr->trace_flags |= mask;
4818 	else
4819 		tr->trace_flags &= ~mask;
4820 
4821 	if (mask == TRACE_ITER_RECORD_CMD)
4822 		trace_event_enable_cmd_record(enabled);
4823 
4824 	if (mask == TRACE_ITER_RECORD_TGID) {
4825 		if (!tgid_map)
4826 			tgid_map = kvcalloc(PID_MAX_DEFAULT + 1,
4827 					   sizeof(*tgid_map),
4828 					   GFP_KERNEL);
4829 		if (!tgid_map) {
4830 			tr->trace_flags &= ~TRACE_ITER_RECORD_TGID;
4831 			return -ENOMEM;
4832 		}
4833 
4834 		trace_event_enable_tgid_record(enabled);
4835 	}
4836 
4837 	if (mask == TRACE_ITER_EVENT_FORK)
4838 		trace_event_follow_fork(tr, enabled);
4839 
4840 	if (mask == TRACE_ITER_FUNC_FORK)
4841 		ftrace_pid_follow_fork(tr, enabled);
4842 
4843 	if (mask == TRACE_ITER_OVERWRITE) {
4844 		ring_buffer_change_overwrite(tr->array_buffer.buffer, enabled);
4845 #ifdef CONFIG_TRACER_MAX_TRACE
4846 		ring_buffer_change_overwrite(tr->max_buffer.buffer, enabled);
4847 #endif
4848 	}
4849 
4850 	if (mask == TRACE_ITER_PRINTK) {
4851 		trace_printk_start_stop_comm(enabled);
4852 		trace_printk_control(enabled);
4853 	}
4854 
4855 	return 0;
4856 }
4857 
4858 int trace_set_options(struct trace_array *tr, char *option)
4859 {
4860 	char *cmp;
4861 	int neg = 0;
4862 	int ret;
4863 	size_t orig_len = strlen(option);
4864 	int len;
4865 
4866 	cmp = strstrip(option);
4867 
4868 	len = str_has_prefix(cmp, "no");
4869 	if (len)
4870 		neg = 1;
4871 
4872 	cmp += len;
4873 
4874 	mutex_lock(&event_mutex);
4875 	mutex_lock(&trace_types_lock);
4876 
4877 	ret = match_string(trace_options, -1, cmp);
4878 	/* If no option could be set, test the specific tracer options */
4879 	if (ret < 0)
4880 		ret = set_tracer_option(tr, cmp, neg);
4881 	else
4882 		ret = set_tracer_flag(tr, 1 << ret, !neg);
4883 
4884 	mutex_unlock(&trace_types_lock);
4885 	mutex_unlock(&event_mutex);
4886 
4887 	/*
4888 	 * If the first trailing whitespace is replaced with '\0' by strstrip,
4889 	 * turn it back into a space.
4890 	 */
4891 	if (orig_len > strlen(option))
4892 		option[strlen(option)] = ' ';
4893 
4894 	return ret;
4895 }
4896 
4897 static void __init apply_trace_boot_options(void)
4898 {
4899 	char *buf = trace_boot_options_buf;
4900 	char *option;
4901 
4902 	while (true) {
4903 		option = strsep(&buf, ",");
4904 
4905 		if (!option)
4906 			break;
4907 
4908 		if (*option)
4909 			trace_set_options(&global_trace, option);
4910 
4911 		/* Put back the comma to allow this to be called again */
4912 		if (buf)
4913 			*(buf - 1) = ',';
4914 	}
4915 }
4916 
4917 static ssize_t
4918 tracing_trace_options_write(struct file *filp, const char __user *ubuf,
4919 			size_t cnt, loff_t *ppos)
4920 {
4921 	struct seq_file *m = filp->private_data;
4922 	struct trace_array *tr = m->private;
4923 	char buf[64];
4924 	int ret;
4925 
4926 	if (cnt >= sizeof(buf))
4927 		return -EINVAL;
4928 
4929 	if (copy_from_user(buf, ubuf, cnt))
4930 		return -EFAULT;
4931 
4932 	buf[cnt] = 0;
4933 
4934 	ret = trace_set_options(tr, buf);
4935 	if (ret < 0)
4936 		return ret;
4937 
4938 	*ppos += cnt;
4939 
4940 	return cnt;
4941 }
4942 
4943 static int tracing_trace_options_open(struct inode *inode, struct file *file)
4944 {
4945 	struct trace_array *tr = inode->i_private;
4946 	int ret;
4947 
4948 	ret = tracing_check_open_get_tr(tr);
4949 	if (ret)
4950 		return ret;
4951 
4952 	ret = single_open(file, tracing_trace_options_show, inode->i_private);
4953 	if (ret < 0)
4954 		trace_array_put(tr);
4955 
4956 	return ret;
4957 }
4958 
4959 static const struct file_operations tracing_iter_fops = {
4960 	.open		= tracing_trace_options_open,
4961 	.read		= seq_read,
4962 	.llseek		= seq_lseek,
4963 	.release	= tracing_single_release_tr,
4964 	.write		= tracing_trace_options_write,
4965 };
4966 
4967 static const char readme_msg[] =
4968 	"tracing mini-HOWTO:\n\n"
4969 	"# echo 0 > tracing_on : quick way to disable tracing\n"
4970 	"# echo 1 > tracing_on : quick way to re-enable tracing\n\n"
4971 	" Important files:\n"
4972 	"  trace\t\t\t- The static contents of the buffer\n"
4973 	"\t\t\t  To clear the buffer write into this file: echo > trace\n"
4974 	"  trace_pipe\t\t- A consuming read to see the contents of the buffer\n"
4975 	"  current_tracer\t- function and latency tracers\n"
4976 	"  available_tracers\t- list of configured tracers for current_tracer\n"
4977 	"  error_log\t- error log for failed commands (that support it)\n"
4978 	"  buffer_size_kb\t- view and modify size of per cpu buffer\n"
4979 	"  buffer_total_size_kb  - view total size of all cpu buffers\n\n"
4980 	"  trace_clock\t\t-change the clock used to order events\n"
4981 	"       local:   Per cpu clock but may not be synced across CPUs\n"
4982 	"      global:   Synced across CPUs but slows tracing down.\n"
4983 	"     counter:   Not a clock, but just an increment\n"
4984 	"      uptime:   Jiffy counter from time of boot\n"
4985 	"        perf:   Same clock that perf events use\n"
4986 #ifdef CONFIG_X86_64
4987 	"     x86-tsc:   TSC cycle counter\n"
4988 #endif
4989 	"\n  timestamp_mode\t-view the mode used to timestamp events\n"
4990 	"       delta:   Delta difference against a buffer-wide timestamp\n"
4991 	"    absolute:   Absolute (standalone) timestamp\n"
4992 	"\n  trace_marker\t\t- Writes into this file writes into the kernel buffer\n"
4993 	"\n  trace_marker_raw\t\t- Writes into this file writes binary data into the kernel buffer\n"
4994 	"  tracing_cpumask\t- Limit which CPUs to trace\n"
4995 	"  instances\t\t- Make sub-buffers with: mkdir instances/foo\n"
4996 	"\t\t\t  Remove sub-buffer with rmdir\n"
4997 	"  trace_options\t\t- Set format or modify how tracing happens\n"
4998 	"\t\t\t  Disable an option by prefixing 'no' to the\n"
4999 	"\t\t\t  option name\n"
5000 	"  saved_cmdlines_size\t- echo command number in here to store comm-pid list\n"
5001 #ifdef CONFIG_DYNAMIC_FTRACE
5002 	"\n  available_filter_functions - list of functions that can be filtered on\n"
5003 	"  set_ftrace_filter\t- echo function name in here to only trace these\n"
5004 	"\t\t\t  functions\n"
5005 	"\t     accepts: func_full_name or glob-matching-pattern\n"
5006 	"\t     modules: Can select a group via module\n"
5007 	"\t      Format: :mod:<module-name>\n"
5008 	"\t     example: echo :mod:ext3 > set_ftrace_filter\n"
5009 	"\t    triggers: a command to perform when function is hit\n"
5010 	"\t      Format: <function>:<trigger>[:count]\n"
5011 	"\t     trigger: traceon, traceoff\n"
5012 	"\t\t      enable_event:<system>:<event>\n"
5013 	"\t\t      disable_event:<system>:<event>\n"
5014 #ifdef CONFIG_STACKTRACE
5015 	"\t\t      stacktrace\n"
5016 #endif
5017 #ifdef CONFIG_TRACER_SNAPSHOT
5018 	"\t\t      snapshot\n"
5019 #endif
5020 	"\t\t      dump\n"
5021 	"\t\t      cpudump\n"
5022 	"\t     example: echo do_fault:traceoff > set_ftrace_filter\n"
5023 	"\t              echo do_trap:traceoff:3 > set_ftrace_filter\n"
5024 	"\t     The first one will disable tracing every time do_fault is hit\n"
5025 	"\t     The second will disable tracing at most 3 times when do_trap is hit\n"
5026 	"\t       The first time do trap is hit and it disables tracing, the\n"
5027 	"\t       counter will decrement to 2. If tracing is already disabled,\n"
5028 	"\t       the counter will not decrement. It only decrements when the\n"
5029 	"\t       trigger did work\n"
5030 	"\t     To remove trigger without count:\n"
5031 	"\t       echo '!<function>:<trigger> > set_ftrace_filter\n"
5032 	"\t     To remove trigger with a count:\n"
5033 	"\t       echo '!<function>:<trigger>:0 > set_ftrace_filter\n"
5034 	"  set_ftrace_notrace\t- echo function name in here to never trace.\n"
5035 	"\t    accepts: func_full_name, *func_end, func_begin*, *func_middle*\n"
5036 	"\t    modules: Can select a group via module command :mod:\n"
5037 	"\t    Does not accept triggers\n"
5038 #endif /* CONFIG_DYNAMIC_FTRACE */
5039 #ifdef CONFIG_FUNCTION_TRACER
5040 	"  set_ftrace_pid\t- Write pid(s) to only function trace those pids\n"
5041 	"\t\t    (function)\n"
5042 	"  set_ftrace_notrace_pid\t- Write pid(s) to not function trace those pids\n"
5043 	"\t\t    (function)\n"
5044 #endif
5045 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
5046 	"  set_graph_function\t- Trace the nested calls of a function (function_graph)\n"
5047 	"  set_graph_notrace\t- Do not trace the nested calls of a function (function_graph)\n"
5048 	"  max_graph_depth\t- Trace a limited depth of nested calls (0 is unlimited)\n"
5049 #endif
5050 #ifdef CONFIG_TRACER_SNAPSHOT
5051 	"\n  snapshot\t\t- Like 'trace' but shows the content of the static\n"
5052 	"\t\t\t  snapshot buffer. Read the contents for more\n"
5053 	"\t\t\t  information\n"
5054 #endif
5055 #ifdef CONFIG_STACK_TRACER
5056 	"  stack_trace\t\t- Shows the max stack trace when active\n"
5057 	"  stack_max_size\t- Shows current max stack size that was traced\n"
5058 	"\t\t\t  Write into this file to reset the max size (trigger a\n"
5059 	"\t\t\t  new trace)\n"
5060 #ifdef CONFIG_DYNAMIC_FTRACE
5061 	"  stack_trace_filter\t- Like set_ftrace_filter but limits what stack_trace\n"
5062 	"\t\t\t  traces\n"
5063 #endif
5064 #endif /* CONFIG_STACK_TRACER */
5065 #ifdef CONFIG_DYNAMIC_EVENTS
5066 	"  dynamic_events\t\t- Create/append/remove/show the generic dynamic events\n"
5067 	"\t\t\t  Write into this file to define/undefine new trace events.\n"
5068 #endif
5069 #ifdef CONFIG_KPROBE_EVENTS
5070 	"  kprobe_events\t\t- Create/append/remove/show the kernel dynamic events\n"
5071 	"\t\t\t  Write into this file to define/undefine new trace events.\n"
5072 #endif
5073 #ifdef CONFIG_UPROBE_EVENTS
5074 	"  uprobe_events\t\t- Create/append/remove/show the userspace dynamic events\n"
5075 	"\t\t\t  Write into this file to define/undefine new trace events.\n"
5076 #endif
5077 #if defined(CONFIG_KPROBE_EVENTS) || defined(CONFIG_UPROBE_EVENTS)
5078 	"\t  accepts: event-definitions (one definition per line)\n"
5079 	"\t   Format: p[:[<group>/]<event>] <place> [<args>]\n"
5080 	"\t           r[maxactive][:[<group>/]<event>] <place> [<args>]\n"
5081 #ifdef CONFIG_HIST_TRIGGERS
5082 	"\t           s:[synthetic/]<event> <field> [<field>]\n"
5083 #endif
5084 	"\t           -:[<group>/]<event>\n"
5085 #ifdef CONFIG_KPROBE_EVENTS
5086 	"\t    place: [<module>:]<symbol>[+<offset>]|<memaddr>\n"
5087   "place (kretprobe): [<module>:]<symbol>[+<offset>]|<memaddr>\n"
5088 #endif
5089 #ifdef CONFIG_UPROBE_EVENTS
5090   "   place (uprobe): <path>:<offset>[(ref_ctr_offset)]\n"
5091 #endif
5092 	"\t     args: <name>=fetcharg[:type]\n"
5093 	"\t fetcharg: %<register>, @<address>, @<symbol>[+|-<offset>],\n"
5094 #ifdef CONFIG_HAVE_FUNCTION_ARG_ACCESS_API
5095 	"\t           $stack<index>, $stack, $retval, $comm, $arg<N>,\n"
5096 #else
5097 	"\t           $stack<index>, $stack, $retval, $comm,\n"
5098 #endif
5099 	"\t           +|-[u]<offset>(<fetcharg>), \\imm-value, \\\"imm-string\"\n"
5100 	"\t     type: s8/16/32/64, u8/16/32/64, x8/16/32/64, string, symbol,\n"
5101 	"\t           b<bit-width>@<bit-offset>/<container-size>, ustring,\n"
5102 	"\t           <type>\\[<array-size>\\]\n"
5103 #ifdef CONFIG_HIST_TRIGGERS
5104 	"\t    field: <stype> <name>;\n"
5105 	"\t    stype: u8/u16/u32/u64, s8/s16/s32/s64, pid_t,\n"
5106 	"\t           [unsigned] char/int/long\n"
5107 #endif
5108 #endif
5109 	"  events/\t\t- Directory containing all trace event subsystems:\n"
5110 	"      enable\t\t- Write 0/1 to enable/disable tracing of all events\n"
5111 	"  events/<system>/\t- Directory containing all trace events for <system>:\n"
5112 	"      enable\t\t- Write 0/1 to enable/disable tracing of all <system>\n"
5113 	"\t\t\t  events\n"
5114 	"      filter\t\t- If set, only events passing filter are traced\n"
5115 	"  events/<system>/<event>/\t- Directory containing control files for\n"
5116 	"\t\t\t  <event>:\n"
5117 	"      enable\t\t- Write 0/1 to enable/disable tracing of <event>\n"
5118 	"      filter\t\t- If set, only events passing filter are traced\n"
5119 	"      trigger\t\t- If set, a command to perform when event is hit\n"
5120 	"\t    Format: <trigger>[:count][if <filter>]\n"
5121 	"\t   trigger: traceon, traceoff\n"
5122 	"\t            enable_event:<system>:<event>\n"
5123 	"\t            disable_event:<system>:<event>\n"
5124 #ifdef CONFIG_HIST_TRIGGERS
5125 	"\t            enable_hist:<system>:<event>\n"
5126 	"\t            disable_hist:<system>:<event>\n"
5127 #endif
5128 #ifdef CONFIG_STACKTRACE
5129 	"\t\t    stacktrace\n"
5130 #endif
5131 #ifdef CONFIG_TRACER_SNAPSHOT
5132 	"\t\t    snapshot\n"
5133 #endif
5134 #ifdef CONFIG_HIST_TRIGGERS
5135 	"\t\t    hist (see below)\n"
5136 #endif
5137 	"\t   example: echo traceoff > events/block/block_unplug/trigger\n"
5138 	"\t            echo traceoff:3 > events/block/block_unplug/trigger\n"
5139 	"\t            echo 'enable_event:kmem:kmalloc:3 if nr_rq > 1' > \\\n"
5140 	"\t                  events/block/block_unplug/trigger\n"
5141 	"\t   The first disables tracing every time block_unplug is hit.\n"
5142 	"\t   The second disables tracing the first 3 times block_unplug is hit.\n"
5143 	"\t   The third enables the kmalloc event the first 3 times block_unplug\n"
5144 	"\t     is hit and has value of greater than 1 for the 'nr_rq' event field.\n"
5145 	"\t   Like function triggers, the counter is only decremented if it\n"
5146 	"\t    enabled or disabled tracing.\n"
5147 	"\t   To remove a trigger without a count:\n"
5148 	"\t     echo '!<trigger> > <system>/<event>/trigger\n"
5149 	"\t   To remove a trigger with a count:\n"
5150 	"\t     echo '!<trigger>:0 > <system>/<event>/trigger\n"
5151 	"\t   Filters can be ignored when removing a trigger.\n"
5152 #ifdef CONFIG_HIST_TRIGGERS
5153 	"      hist trigger\t- If set, event hits are aggregated into a hash table\n"
5154 	"\t    Format: hist:keys=<field1[,field2,...]>\n"
5155 	"\t            [:values=<field1[,field2,...]>]\n"
5156 	"\t            [:sort=<field1[,field2,...]>]\n"
5157 	"\t            [:size=#entries]\n"
5158 	"\t            [:pause][:continue][:clear]\n"
5159 	"\t            [:name=histname1]\n"
5160 	"\t            [:<handler>.<action>]\n"
5161 	"\t            [if <filter>]\n\n"
5162 	"\t    When a matching event is hit, an entry is added to a hash\n"
5163 	"\t    table using the key(s) and value(s) named, and the value of a\n"
5164 	"\t    sum called 'hitcount' is incremented.  Keys and values\n"
5165 	"\t    correspond to fields in the event's format description.  Keys\n"
5166 	"\t    can be any field, or the special string 'stacktrace'.\n"
5167 	"\t    Compound keys consisting of up to two fields can be specified\n"
5168 	"\t    by the 'keys' keyword.  Values must correspond to numeric\n"
5169 	"\t    fields.  Sort keys consisting of up to two fields can be\n"
5170 	"\t    specified using the 'sort' keyword.  The sort direction can\n"
5171 	"\t    be modified by appending '.descending' or '.ascending' to a\n"
5172 	"\t    sort field.  The 'size' parameter can be used to specify more\n"
5173 	"\t    or fewer than the default 2048 entries for the hashtable size.\n"
5174 	"\t    If a hist trigger is given a name using the 'name' parameter,\n"
5175 	"\t    its histogram data will be shared with other triggers of the\n"
5176 	"\t    same name, and trigger hits will update this common data.\n\n"
5177 	"\t    Reading the 'hist' file for the event will dump the hash\n"
5178 	"\t    table in its entirety to stdout.  If there are multiple hist\n"
5179 	"\t    triggers attached to an event, there will be a table for each\n"
5180 	"\t    trigger in the output.  The table displayed for a named\n"
5181 	"\t    trigger will be the same as any other instance having the\n"
5182 	"\t    same name.  The default format used to display a given field\n"
5183 	"\t    can be modified by appending any of the following modifiers\n"
5184 	"\t    to the field name, as applicable:\n\n"
5185 	"\t            .hex        display a number as a hex value\n"
5186 	"\t            .sym        display an address as a symbol\n"
5187 	"\t            .sym-offset display an address as a symbol and offset\n"
5188 	"\t            .execname   display a common_pid as a program name\n"
5189 	"\t            .syscall    display a syscall id as a syscall name\n"
5190 	"\t            .log2       display log2 value rather than raw number\n"
5191 	"\t            .usecs      display a common_timestamp in microseconds\n\n"
5192 	"\t    The 'pause' parameter can be used to pause an existing hist\n"
5193 	"\t    trigger or to start a hist trigger but not log any events\n"
5194 	"\t    until told to do so.  'continue' can be used to start or\n"
5195 	"\t    restart a paused hist trigger.\n\n"
5196 	"\t    The 'clear' parameter will clear the contents of a running\n"
5197 	"\t    hist trigger and leave its current paused/active state\n"
5198 	"\t    unchanged.\n\n"
5199 	"\t    The enable_hist and disable_hist triggers can be used to\n"
5200 	"\t    have one event conditionally start and stop another event's\n"
5201 	"\t    already-attached hist trigger.  The syntax is analogous to\n"
5202 	"\t    the enable_event and disable_event triggers.\n\n"
5203 	"\t    Hist trigger handlers and actions are executed whenever a\n"
5204 	"\t    a histogram entry is added or updated.  They take the form:\n\n"
5205 	"\t        <handler>.<action>\n\n"
5206 	"\t    The available handlers are:\n\n"
5207 	"\t        onmatch(matching.event)  - invoke on addition or update\n"
5208 	"\t        onmax(var)               - invoke if var exceeds current max\n"
5209 	"\t        onchange(var)            - invoke action if var changes\n\n"
5210 	"\t    The available actions are:\n\n"
5211 	"\t        trace(<synthetic_event>,param list)  - generate synthetic event\n"
5212 	"\t        save(field,...)                      - save current event fields\n"
5213 #ifdef CONFIG_TRACER_SNAPSHOT
5214 	"\t        snapshot()                           - snapshot the trace buffer\n"
5215 #endif
5216 #endif
5217 ;
5218 
5219 static ssize_t
5220 tracing_readme_read(struct file *filp, char __user *ubuf,
5221 		       size_t cnt, loff_t *ppos)
5222 {
5223 	return simple_read_from_buffer(ubuf, cnt, ppos,
5224 					readme_msg, strlen(readme_msg));
5225 }
5226 
5227 static const struct file_operations tracing_readme_fops = {
5228 	.open		= tracing_open_generic,
5229 	.read		= tracing_readme_read,
5230 	.llseek		= generic_file_llseek,
5231 };
5232 
5233 static void *saved_tgids_next(struct seq_file *m, void *v, loff_t *pos)
5234 {
5235 	int *ptr = v;
5236 
5237 	if (*pos || m->count)
5238 		ptr++;
5239 
5240 	(*pos)++;
5241 
5242 	for (; ptr <= &tgid_map[PID_MAX_DEFAULT]; ptr++) {
5243 		if (trace_find_tgid(*ptr))
5244 			return ptr;
5245 	}
5246 
5247 	return NULL;
5248 }
5249 
5250 static void *saved_tgids_start(struct seq_file *m, loff_t *pos)
5251 {
5252 	void *v;
5253 	loff_t l = 0;
5254 
5255 	if (!tgid_map)
5256 		return NULL;
5257 
5258 	v = &tgid_map[0];
5259 	while (l <= *pos) {
5260 		v = saved_tgids_next(m, v, &l);
5261 		if (!v)
5262 			return NULL;
5263 	}
5264 
5265 	return v;
5266 }
5267 
5268 static void saved_tgids_stop(struct seq_file *m, void *v)
5269 {
5270 }
5271 
5272 static int saved_tgids_show(struct seq_file *m, void *v)
5273 {
5274 	int pid = (int *)v - tgid_map;
5275 
5276 	seq_printf(m, "%d %d\n", pid, trace_find_tgid(pid));
5277 	return 0;
5278 }
5279 
5280 static const struct seq_operations tracing_saved_tgids_seq_ops = {
5281 	.start		= saved_tgids_start,
5282 	.stop		= saved_tgids_stop,
5283 	.next		= saved_tgids_next,
5284 	.show		= saved_tgids_show,
5285 };
5286 
5287 static int tracing_saved_tgids_open(struct inode *inode, struct file *filp)
5288 {
5289 	int ret;
5290 
5291 	ret = tracing_check_open_get_tr(NULL);
5292 	if (ret)
5293 		return ret;
5294 
5295 	return seq_open(filp, &tracing_saved_tgids_seq_ops);
5296 }
5297 
5298 
5299 static const struct file_operations tracing_saved_tgids_fops = {
5300 	.open		= tracing_saved_tgids_open,
5301 	.read		= seq_read,
5302 	.llseek		= seq_lseek,
5303 	.release	= seq_release,
5304 };
5305 
5306 static void *saved_cmdlines_next(struct seq_file *m, void *v, loff_t *pos)
5307 {
5308 	unsigned int *ptr = v;
5309 
5310 	if (*pos || m->count)
5311 		ptr++;
5312 
5313 	(*pos)++;
5314 
5315 	for (; ptr < &savedcmd->map_cmdline_to_pid[savedcmd->cmdline_num];
5316 	     ptr++) {
5317 		if (*ptr == -1 || *ptr == NO_CMDLINE_MAP)
5318 			continue;
5319 
5320 		return ptr;
5321 	}
5322 
5323 	return NULL;
5324 }
5325 
5326 static void *saved_cmdlines_start(struct seq_file *m, loff_t *pos)
5327 {
5328 	void *v;
5329 	loff_t l = 0;
5330 
5331 	preempt_disable();
5332 	arch_spin_lock(&trace_cmdline_lock);
5333 
5334 	v = &savedcmd->map_cmdline_to_pid[0];
5335 	while (l <= *pos) {
5336 		v = saved_cmdlines_next(m, v, &l);
5337 		if (!v)
5338 			return NULL;
5339 	}
5340 
5341 	return v;
5342 }
5343 
5344 static void saved_cmdlines_stop(struct seq_file *m, void *v)
5345 {
5346 	arch_spin_unlock(&trace_cmdline_lock);
5347 	preempt_enable();
5348 }
5349 
5350 static int saved_cmdlines_show(struct seq_file *m, void *v)
5351 {
5352 	char buf[TASK_COMM_LEN];
5353 	unsigned int *pid = v;
5354 
5355 	__trace_find_cmdline(*pid, buf);
5356 	seq_printf(m, "%d %s\n", *pid, buf);
5357 	return 0;
5358 }
5359 
5360 static const struct seq_operations tracing_saved_cmdlines_seq_ops = {
5361 	.start		= saved_cmdlines_start,
5362 	.next		= saved_cmdlines_next,
5363 	.stop		= saved_cmdlines_stop,
5364 	.show		= saved_cmdlines_show,
5365 };
5366 
5367 static int tracing_saved_cmdlines_open(struct inode *inode, struct file *filp)
5368 {
5369 	int ret;
5370 
5371 	ret = tracing_check_open_get_tr(NULL);
5372 	if (ret)
5373 		return ret;
5374 
5375 	return seq_open(filp, &tracing_saved_cmdlines_seq_ops);
5376 }
5377 
5378 static const struct file_operations tracing_saved_cmdlines_fops = {
5379 	.open		= tracing_saved_cmdlines_open,
5380 	.read		= seq_read,
5381 	.llseek		= seq_lseek,
5382 	.release	= seq_release,
5383 };
5384 
5385 static ssize_t
5386 tracing_saved_cmdlines_size_read(struct file *filp, char __user *ubuf,
5387 				 size_t cnt, loff_t *ppos)
5388 {
5389 	char buf[64];
5390 	int r;
5391 
5392 	arch_spin_lock(&trace_cmdline_lock);
5393 	r = scnprintf(buf, sizeof(buf), "%u\n", savedcmd->cmdline_num);
5394 	arch_spin_unlock(&trace_cmdline_lock);
5395 
5396 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
5397 }
5398 
5399 static void free_saved_cmdlines_buffer(struct saved_cmdlines_buffer *s)
5400 {
5401 	kfree(s->saved_cmdlines);
5402 	kfree(s->map_cmdline_to_pid);
5403 	kfree(s);
5404 }
5405 
5406 static int tracing_resize_saved_cmdlines(unsigned int val)
5407 {
5408 	struct saved_cmdlines_buffer *s, *savedcmd_temp;
5409 
5410 	s = kmalloc(sizeof(*s), GFP_KERNEL);
5411 	if (!s)
5412 		return -ENOMEM;
5413 
5414 	if (allocate_cmdlines_buffer(val, s) < 0) {
5415 		kfree(s);
5416 		return -ENOMEM;
5417 	}
5418 
5419 	arch_spin_lock(&trace_cmdline_lock);
5420 	savedcmd_temp = savedcmd;
5421 	savedcmd = s;
5422 	arch_spin_unlock(&trace_cmdline_lock);
5423 	free_saved_cmdlines_buffer(savedcmd_temp);
5424 
5425 	return 0;
5426 }
5427 
5428 static ssize_t
5429 tracing_saved_cmdlines_size_write(struct file *filp, const char __user *ubuf,
5430 				  size_t cnt, loff_t *ppos)
5431 {
5432 	unsigned long val;
5433 	int ret;
5434 
5435 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
5436 	if (ret)
5437 		return ret;
5438 
5439 	/* must have at least 1 entry or less than PID_MAX_DEFAULT */
5440 	if (!val || val > PID_MAX_DEFAULT)
5441 		return -EINVAL;
5442 
5443 	ret = tracing_resize_saved_cmdlines((unsigned int)val);
5444 	if (ret < 0)
5445 		return ret;
5446 
5447 	*ppos += cnt;
5448 
5449 	return cnt;
5450 }
5451 
5452 static const struct file_operations tracing_saved_cmdlines_size_fops = {
5453 	.open		= tracing_open_generic,
5454 	.read		= tracing_saved_cmdlines_size_read,
5455 	.write		= tracing_saved_cmdlines_size_write,
5456 };
5457 
5458 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
5459 static union trace_eval_map_item *
5460 update_eval_map(union trace_eval_map_item *ptr)
5461 {
5462 	if (!ptr->map.eval_string) {
5463 		if (ptr->tail.next) {
5464 			ptr = ptr->tail.next;
5465 			/* Set ptr to the next real item (skip head) */
5466 			ptr++;
5467 		} else
5468 			return NULL;
5469 	}
5470 	return ptr;
5471 }
5472 
5473 static void *eval_map_next(struct seq_file *m, void *v, loff_t *pos)
5474 {
5475 	union trace_eval_map_item *ptr = v;
5476 
5477 	/*
5478 	 * Paranoid! If ptr points to end, we don't want to increment past it.
5479 	 * This really should never happen.
5480 	 */
5481 	(*pos)++;
5482 	ptr = update_eval_map(ptr);
5483 	if (WARN_ON_ONCE(!ptr))
5484 		return NULL;
5485 
5486 	ptr++;
5487 	ptr = update_eval_map(ptr);
5488 
5489 	return ptr;
5490 }
5491 
5492 static void *eval_map_start(struct seq_file *m, loff_t *pos)
5493 {
5494 	union trace_eval_map_item *v;
5495 	loff_t l = 0;
5496 
5497 	mutex_lock(&trace_eval_mutex);
5498 
5499 	v = trace_eval_maps;
5500 	if (v)
5501 		v++;
5502 
5503 	while (v && l < *pos) {
5504 		v = eval_map_next(m, v, &l);
5505 	}
5506 
5507 	return v;
5508 }
5509 
5510 static void eval_map_stop(struct seq_file *m, void *v)
5511 {
5512 	mutex_unlock(&trace_eval_mutex);
5513 }
5514 
5515 static int eval_map_show(struct seq_file *m, void *v)
5516 {
5517 	union trace_eval_map_item *ptr = v;
5518 
5519 	seq_printf(m, "%s %ld (%s)\n",
5520 		   ptr->map.eval_string, ptr->map.eval_value,
5521 		   ptr->map.system);
5522 
5523 	return 0;
5524 }
5525 
5526 static const struct seq_operations tracing_eval_map_seq_ops = {
5527 	.start		= eval_map_start,
5528 	.next		= eval_map_next,
5529 	.stop		= eval_map_stop,
5530 	.show		= eval_map_show,
5531 };
5532 
5533 static int tracing_eval_map_open(struct inode *inode, struct file *filp)
5534 {
5535 	int ret;
5536 
5537 	ret = tracing_check_open_get_tr(NULL);
5538 	if (ret)
5539 		return ret;
5540 
5541 	return seq_open(filp, &tracing_eval_map_seq_ops);
5542 }
5543 
5544 static const struct file_operations tracing_eval_map_fops = {
5545 	.open		= tracing_eval_map_open,
5546 	.read		= seq_read,
5547 	.llseek		= seq_lseek,
5548 	.release	= seq_release,
5549 };
5550 
5551 static inline union trace_eval_map_item *
5552 trace_eval_jmp_to_tail(union trace_eval_map_item *ptr)
5553 {
5554 	/* Return tail of array given the head */
5555 	return ptr + ptr->head.length + 1;
5556 }
5557 
5558 static void
5559 trace_insert_eval_map_file(struct module *mod, struct trace_eval_map **start,
5560 			   int len)
5561 {
5562 	struct trace_eval_map **stop;
5563 	struct trace_eval_map **map;
5564 	union trace_eval_map_item *map_array;
5565 	union trace_eval_map_item *ptr;
5566 
5567 	stop = start + len;
5568 
5569 	/*
5570 	 * The trace_eval_maps contains the map plus a head and tail item,
5571 	 * where the head holds the module and length of array, and the
5572 	 * tail holds a pointer to the next list.
5573 	 */
5574 	map_array = kmalloc_array(len + 2, sizeof(*map_array), GFP_KERNEL);
5575 	if (!map_array) {
5576 		pr_warn("Unable to allocate trace eval mapping\n");
5577 		return;
5578 	}
5579 
5580 	mutex_lock(&trace_eval_mutex);
5581 
5582 	if (!trace_eval_maps)
5583 		trace_eval_maps = map_array;
5584 	else {
5585 		ptr = trace_eval_maps;
5586 		for (;;) {
5587 			ptr = trace_eval_jmp_to_tail(ptr);
5588 			if (!ptr->tail.next)
5589 				break;
5590 			ptr = ptr->tail.next;
5591 
5592 		}
5593 		ptr->tail.next = map_array;
5594 	}
5595 	map_array->head.mod = mod;
5596 	map_array->head.length = len;
5597 	map_array++;
5598 
5599 	for (map = start; (unsigned long)map < (unsigned long)stop; map++) {
5600 		map_array->map = **map;
5601 		map_array++;
5602 	}
5603 	memset(map_array, 0, sizeof(*map_array));
5604 
5605 	mutex_unlock(&trace_eval_mutex);
5606 }
5607 
5608 static void trace_create_eval_file(struct dentry *d_tracer)
5609 {
5610 	trace_create_file("eval_map", 0444, d_tracer,
5611 			  NULL, &tracing_eval_map_fops);
5612 }
5613 
5614 #else /* CONFIG_TRACE_EVAL_MAP_FILE */
5615 static inline void trace_create_eval_file(struct dentry *d_tracer) { }
5616 static inline void trace_insert_eval_map_file(struct module *mod,
5617 			      struct trace_eval_map **start, int len) { }
5618 #endif /* !CONFIG_TRACE_EVAL_MAP_FILE */
5619 
5620 static void trace_insert_eval_map(struct module *mod,
5621 				  struct trace_eval_map **start, int len)
5622 {
5623 	struct trace_eval_map **map;
5624 
5625 	if (len <= 0)
5626 		return;
5627 
5628 	map = start;
5629 
5630 	trace_event_eval_update(map, len);
5631 
5632 	trace_insert_eval_map_file(mod, start, len);
5633 }
5634 
5635 static ssize_t
5636 tracing_set_trace_read(struct file *filp, char __user *ubuf,
5637 		       size_t cnt, loff_t *ppos)
5638 {
5639 	struct trace_array *tr = filp->private_data;
5640 	char buf[MAX_TRACER_SIZE+2];
5641 	int r;
5642 
5643 	mutex_lock(&trace_types_lock);
5644 	r = sprintf(buf, "%s\n", tr->current_trace->name);
5645 	mutex_unlock(&trace_types_lock);
5646 
5647 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
5648 }
5649 
5650 int tracer_init(struct tracer *t, struct trace_array *tr)
5651 {
5652 	tracing_reset_online_cpus(&tr->array_buffer);
5653 	return t->init(tr);
5654 }
5655 
5656 static void set_buffer_entries(struct array_buffer *buf, unsigned long val)
5657 {
5658 	int cpu;
5659 
5660 	for_each_tracing_cpu(cpu)
5661 		per_cpu_ptr(buf->data, cpu)->entries = val;
5662 }
5663 
5664 #ifdef CONFIG_TRACER_MAX_TRACE
5665 /* resize @tr's buffer to the size of @size_tr's entries */
5666 static int resize_buffer_duplicate_size(struct array_buffer *trace_buf,
5667 					struct array_buffer *size_buf, int cpu_id)
5668 {
5669 	int cpu, ret = 0;
5670 
5671 	if (cpu_id == RING_BUFFER_ALL_CPUS) {
5672 		for_each_tracing_cpu(cpu) {
5673 			ret = ring_buffer_resize(trace_buf->buffer,
5674 				 per_cpu_ptr(size_buf->data, cpu)->entries, cpu);
5675 			if (ret < 0)
5676 				break;
5677 			per_cpu_ptr(trace_buf->data, cpu)->entries =
5678 				per_cpu_ptr(size_buf->data, cpu)->entries;
5679 		}
5680 	} else {
5681 		ret = ring_buffer_resize(trace_buf->buffer,
5682 				 per_cpu_ptr(size_buf->data, cpu_id)->entries, cpu_id);
5683 		if (ret == 0)
5684 			per_cpu_ptr(trace_buf->data, cpu_id)->entries =
5685 				per_cpu_ptr(size_buf->data, cpu_id)->entries;
5686 	}
5687 
5688 	return ret;
5689 }
5690 #endif /* CONFIG_TRACER_MAX_TRACE */
5691 
5692 static int __tracing_resize_ring_buffer(struct trace_array *tr,
5693 					unsigned long size, int cpu)
5694 {
5695 	int ret;
5696 
5697 	/*
5698 	 * If kernel or user changes the size of the ring buffer
5699 	 * we use the size that was given, and we can forget about
5700 	 * expanding it later.
5701 	 */
5702 	ring_buffer_expanded = true;
5703 
5704 	/* May be called before buffers are initialized */
5705 	if (!tr->array_buffer.buffer)
5706 		return 0;
5707 
5708 	ret = ring_buffer_resize(tr->array_buffer.buffer, size, cpu);
5709 	if (ret < 0)
5710 		return ret;
5711 
5712 #ifdef CONFIG_TRACER_MAX_TRACE
5713 	if (!(tr->flags & TRACE_ARRAY_FL_GLOBAL) ||
5714 	    !tr->current_trace->use_max_tr)
5715 		goto out;
5716 
5717 	ret = ring_buffer_resize(tr->max_buffer.buffer, size, cpu);
5718 	if (ret < 0) {
5719 		int r = resize_buffer_duplicate_size(&tr->array_buffer,
5720 						     &tr->array_buffer, cpu);
5721 		if (r < 0) {
5722 			/*
5723 			 * AARGH! We are left with different
5724 			 * size max buffer!!!!
5725 			 * The max buffer is our "snapshot" buffer.
5726 			 * When a tracer needs a snapshot (one of the
5727 			 * latency tracers), it swaps the max buffer
5728 			 * with the saved snap shot. We succeeded to
5729 			 * update the size of the main buffer, but failed to
5730 			 * update the size of the max buffer. But when we tried
5731 			 * to reset the main buffer to the original size, we
5732 			 * failed there too. This is very unlikely to
5733 			 * happen, but if it does, warn and kill all
5734 			 * tracing.
5735 			 */
5736 			WARN_ON(1);
5737 			tracing_disabled = 1;
5738 		}
5739 		return ret;
5740 	}
5741 
5742 	if (cpu == RING_BUFFER_ALL_CPUS)
5743 		set_buffer_entries(&tr->max_buffer, size);
5744 	else
5745 		per_cpu_ptr(tr->max_buffer.data, cpu)->entries = size;
5746 
5747  out:
5748 #endif /* CONFIG_TRACER_MAX_TRACE */
5749 
5750 	if (cpu == RING_BUFFER_ALL_CPUS)
5751 		set_buffer_entries(&tr->array_buffer, size);
5752 	else
5753 		per_cpu_ptr(tr->array_buffer.data, cpu)->entries = size;
5754 
5755 	return ret;
5756 }
5757 
5758 ssize_t tracing_resize_ring_buffer(struct trace_array *tr,
5759 				  unsigned long size, int cpu_id)
5760 {
5761 	int ret = size;
5762 
5763 	mutex_lock(&trace_types_lock);
5764 
5765 	if (cpu_id != RING_BUFFER_ALL_CPUS) {
5766 		/* make sure, this cpu is enabled in the mask */
5767 		if (!cpumask_test_cpu(cpu_id, tracing_buffer_mask)) {
5768 			ret = -EINVAL;
5769 			goto out;
5770 		}
5771 	}
5772 
5773 	ret = __tracing_resize_ring_buffer(tr, size, cpu_id);
5774 	if (ret < 0)
5775 		ret = -ENOMEM;
5776 
5777 out:
5778 	mutex_unlock(&trace_types_lock);
5779 
5780 	return ret;
5781 }
5782 
5783 
5784 /**
5785  * tracing_update_buffers - used by tracing facility to expand ring buffers
5786  *
5787  * To save on memory when the tracing is never used on a system with it
5788  * configured in. The ring buffers are set to a minimum size. But once
5789  * a user starts to use the tracing facility, then they need to grow
5790  * to their default size.
5791  *
5792  * This function is to be called when a tracer is about to be used.
5793  */
5794 int tracing_update_buffers(void)
5795 {
5796 	int ret = 0;
5797 
5798 	mutex_lock(&trace_types_lock);
5799 	if (!ring_buffer_expanded)
5800 		ret = __tracing_resize_ring_buffer(&global_trace, trace_buf_size,
5801 						RING_BUFFER_ALL_CPUS);
5802 	mutex_unlock(&trace_types_lock);
5803 
5804 	return ret;
5805 }
5806 
5807 struct trace_option_dentry;
5808 
5809 static void
5810 create_trace_option_files(struct trace_array *tr, struct tracer *tracer);
5811 
5812 /*
5813  * Used to clear out the tracer before deletion of an instance.
5814  * Must have trace_types_lock held.
5815  */
5816 static void tracing_set_nop(struct trace_array *tr)
5817 {
5818 	if (tr->current_trace == &nop_trace)
5819 		return;
5820 
5821 	tr->current_trace->enabled--;
5822 
5823 	if (tr->current_trace->reset)
5824 		tr->current_trace->reset(tr);
5825 
5826 	tr->current_trace = &nop_trace;
5827 }
5828 
5829 static void add_tracer_options(struct trace_array *tr, struct tracer *t)
5830 {
5831 	/* Only enable if the directory has been created already. */
5832 	if (!tr->dir)
5833 		return;
5834 
5835 	create_trace_option_files(tr, t);
5836 }
5837 
5838 int tracing_set_tracer(struct trace_array *tr, const char *buf)
5839 {
5840 	struct tracer *t;
5841 #ifdef CONFIG_TRACER_MAX_TRACE
5842 	bool had_max_tr;
5843 #endif
5844 	int ret = 0;
5845 
5846 	mutex_lock(&trace_types_lock);
5847 
5848 	if (!ring_buffer_expanded) {
5849 		ret = __tracing_resize_ring_buffer(tr, trace_buf_size,
5850 						RING_BUFFER_ALL_CPUS);
5851 		if (ret < 0)
5852 			goto out;
5853 		ret = 0;
5854 	}
5855 
5856 	for (t = trace_types; t; t = t->next) {
5857 		if (strcmp(t->name, buf) == 0)
5858 			break;
5859 	}
5860 	if (!t) {
5861 		ret = -EINVAL;
5862 		goto out;
5863 	}
5864 	if (t == tr->current_trace)
5865 		goto out;
5866 
5867 #ifdef CONFIG_TRACER_SNAPSHOT
5868 	if (t->use_max_tr) {
5869 		arch_spin_lock(&tr->max_lock);
5870 		if (tr->cond_snapshot)
5871 			ret = -EBUSY;
5872 		arch_spin_unlock(&tr->max_lock);
5873 		if (ret)
5874 			goto out;
5875 	}
5876 #endif
5877 	/* Some tracers won't work on kernel command line */
5878 	if (system_state < SYSTEM_RUNNING && t->noboot) {
5879 		pr_warn("Tracer '%s' is not allowed on command line, ignored\n",
5880 			t->name);
5881 		goto out;
5882 	}
5883 
5884 	/* Some tracers are only allowed for the top level buffer */
5885 	if (!trace_ok_for_array(t, tr)) {
5886 		ret = -EINVAL;
5887 		goto out;
5888 	}
5889 
5890 	/* If trace pipe files are being read, we can't change the tracer */
5891 	if (tr->current_trace->ref) {
5892 		ret = -EBUSY;
5893 		goto out;
5894 	}
5895 
5896 	trace_branch_disable();
5897 
5898 	tr->current_trace->enabled--;
5899 
5900 	if (tr->current_trace->reset)
5901 		tr->current_trace->reset(tr);
5902 
5903 	/* Current trace needs to be nop_trace before synchronize_rcu */
5904 	tr->current_trace = &nop_trace;
5905 
5906 #ifdef CONFIG_TRACER_MAX_TRACE
5907 	had_max_tr = tr->allocated_snapshot;
5908 
5909 	if (had_max_tr && !t->use_max_tr) {
5910 		/*
5911 		 * We need to make sure that the update_max_tr sees that
5912 		 * current_trace changed to nop_trace to keep it from
5913 		 * swapping the buffers after we resize it.
5914 		 * The update_max_tr is called from interrupts disabled
5915 		 * so a synchronized_sched() is sufficient.
5916 		 */
5917 		synchronize_rcu();
5918 		free_snapshot(tr);
5919 	}
5920 #endif
5921 
5922 #ifdef CONFIG_TRACER_MAX_TRACE
5923 	if (t->use_max_tr && !had_max_tr) {
5924 		ret = tracing_alloc_snapshot_instance(tr);
5925 		if (ret < 0)
5926 			goto out;
5927 	}
5928 #endif
5929 
5930 	if (t->init) {
5931 		ret = tracer_init(t, tr);
5932 		if (ret)
5933 			goto out;
5934 	}
5935 
5936 	tr->current_trace = t;
5937 	tr->current_trace->enabled++;
5938 	trace_branch_enable(tr);
5939  out:
5940 	mutex_unlock(&trace_types_lock);
5941 
5942 	return ret;
5943 }
5944 
5945 static ssize_t
5946 tracing_set_trace_write(struct file *filp, const char __user *ubuf,
5947 			size_t cnt, loff_t *ppos)
5948 {
5949 	struct trace_array *tr = filp->private_data;
5950 	char buf[MAX_TRACER_SIZE+1];
5951 	int i;
5952 	size_t ret;
5953 	int err;
5954 
5955 	ret = cnt;
5956 
5957 	if (cnt > MAX_TRACER_SIZE)
5958 		cnt = MAX_TRACER_SIZE;
5959 
5960 	if (copy_from_user(buf, ubuf, cnt))
5961 		return -EFAULT;
5962 
5963 	buf[cnt] = 0;
5964 
5965 	/* strip ending whitespace. */
5966 	for (i = cnt - 1; i > 0 && isspace(buf[i]); i--)
5967 		buf[i] = 0;
5968 
5969 	err = tracing_set_tracer(tr, buf);
5970 	if (err)
5971 		return err;
5972 
5973 	*ppos += ret;
5974 
5975 	return ret;
5976 }
5977 
5978 static ssize_t
5979 tracing_nsecs_read(unsigned long *ptr, char __user *ubuf,
5980 		   size_t cnt, loff_t *ppos)
5981 {
5982 	char buf[64];
5983 	int r;
5984 
5985 	r = snprintf(buf, sizeof(buf), "%ld\n",
5986 		     *ptr == (unsigned long)-1 ? -1 : nsecs_to_usecs(*ptr));
5987 	if (r > sizeof(buf))
5988 		r = sizeof(buf);
5989 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
5990 }
5991 
5992 static ssize_t
5993 tracing_nsecs_write(unsigned long *ptr, const char __user *ubuf,
5994 		    size_t cnt, loff_t *ppos)
5995 {
5996 	unsigned long val;
5997 	int ret;
5998 
5999 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
6000 	if (ret)
6001 		return ret;
6002 
6003 	*ptr = val * 1000;
6004 
6005 	return cnt;
6006 }
6007 
6008 static ssize_t
6009 tracing_thresh_read(struct file *filp, char __user *ubuf,
6010 		    size_t cnt, loff_t *ppos)
6011 {
6012 	return tracing_nsecs_read(&tracing_thresh, ubuf, cnt, ppos);
6013 }
6014 
6015 static ssize_t
6016 tracing_thresh_write(struct file *filp, const char __user *ubuf,
6017 		     size_t cnt, loff_t *ppos)
6018 {
6019 	struct trace_array *tr = filp->private_data;
6020 	int ret;
6021 
6022 	mutex_lock(&trace_types_lock);
6023 	ret = tracing_nsecs_write(&tracing_thresh, ubuf, cnt, ppos);
6024 	if (ret < 0)
6025 		goto out;
6026 
6027 	if (tr->current_trace->update_thresh) {
6028 		ret = tr->current_trace->update_thresh(tr);
6029 		if (ret < 0)
6030 			goto out;
6031 	}
6032 
6033 	ret = cnt;
6034 out:
6035 	mutex_unlock(&trace_types_lock);
6036 
6037 	return ret;
6038 }
6039 
6040 #if defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)
6041 
6042 static ssize_t
6043 tracing_max_lat_read(struct file *filp, char __user *ubuf,
6044 		     size_t cnt, loff_t *ppos)
6045 {
6046 	return tracing_nsecs_read(filp->private_data, ubuf, cnt, ppos);
6047 }
6048 
6049 static ssize_t
6050 tracing_max_lat_write(struct file *filp, const char __user *ubuf,
6051 		      size_t cnt, loff_t *ppos)
6052 {
6053 	return tracing_nsecs_write(filp->private_data, ubuf, cnt, ppos);
6054 }
6055 
6056 #endif
6057 
6058 static int tracing_open_pipe(struct inode *inode, struct file *filp)
6059 {
6060 	struct trace_array *tr = inode->i_private;
6061 	struct trace_iterator *iter;
6062 	int ret;
6063 
6064 	ret = tracing_check_open_get_tr(tr);
6065 	if (ret)
6066 		return ret;
6067 
6068 	mutex_lock(&trace_types_lock);
6069 
6070 	/* create a buffer to store the information to pass to userspace */
6071 	iter = kzalloc(sizeof(*iter), GFP_KERNEL);
6072 	if (!iter) {
6073 		ret = -ENOMEM;
6074 		__trace_array_put(tr);
6075 		goto out;
6076 	}
6077 
6078 	trace_seq_init(&iter->seq);
6079 	iter->trace = tr->current_trace;
6080 
6081 	if (!alloc_cpumask_var(&iter->started, GFP_KERNEL)) {
6082 		ret = -ENOMEM;
6083 		goto fail;
6084 	}
6085 
6086 	/* trace pipe does not show start of buffer */
6087 	cpumask_setall(iter->started);
6088 
6089 	if (tr->trace_flags & TRACE_ITER_LATENCY_FMT)
6090 		iter->iter_flags |= TRACE_FILE_LAT_FMT;
6091 
6092 	/* Output in nanoseconds only if we are using a clock in nanoseconds. */
6093 	if (trace_clocks[tr->clock_id].in_ns)
6094 		iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
6095 
6096 	iter->tr = tr;
6097 	iter->array_buffer = &tr->array_buffer;
6098 	iter->cpu_file = tracing_get_cpu(inode);
6099 	mutex_init(&iter->mutex);
6100 	filp->private_data = iter;
6101 
6102 	if (iter->trace->pipe_open)
6103 		iter->trace->pipe_open(iter);
6104 
6105 	nonseekable_open(inode, filp);
6106 
6107 	tr->current_trace->ref++;
6108 out:
6109 	mutex_unlock(&trace_types_lock);
6110 	return ret;
6111 
6112 fail:
6113 	kfree(iter);
6114 	__trace_array_put(tr);
6115 	mutex_unlock(&trace_types_lock);
6116 	return ret;
6117 }
6118 
6119 static int tracing_release_pipe(struct inode *inode, struct file *file)
6120 {
6121 	struct trace_iterator *iter = file->private_data;
6122 	struct trace_array *tr = inode->i_private;
6123 
6124 	mutex_lock(&trace_types_lock);
6125 
6126 	tr->current_trace->ref--;
6127 
6128 	if (iter->trace->pipe_close)
6129 		iter->trace->pipe_close(iter);
6130 
6131 	mutex_unlock(&trace_types_lock);
6132 
6133 	free_cpumask_var(iter->started);
6134 	mutex_destroy(&iter->mutex);
6135 	kfree(iter);
6136 
6137 	trace_array_put(tr);
6138 
6139 	return 0;
6140 }
6141 
6142 static __poll_t
6143 trace_poll(struct trace_iterator *iter, struct file *filp, poll_table *poll_table)
6144 {
6145 	struct trace_array *tr = iter->tr;
6146 
6147 	/* Iterators are static, they should be filled or empty */
6148 	if (trace_buffer_iter(iter, iter->cpu_file))
6149 		return EPOLLIN | EPOLLRDNORM;
6150 
6151 	if (tr->trace_flags & TRACE_ITER_BLOCK)
6152 		/*
6153 		 * Always select as readable when in blocking mode
6154 		 */
6155 		return EPOLLIN | EPOLLRDNORM;
6156 	else
6157 		return ring_buffer_poll_wait(iter->array_buffer->buffer, iter->cpu_file,
6158 					     filp, poll_table);
6159 }
6160 
6161 static __poll_t
6162 tracing_poll_pipe(struct file *filp, poll_table *poll_table)
6163 {
6164 	struct trace_iterator *iter = filp->private_data;
6165 
6166 	return trace_poll(iter, filp, poll_table);
6167 }
6168 
6169 /* Must be called with iter->mutex held. */
6170 static int tracing_wait_pipe(struct file *filp)
6171 {
6172 	struct trace_iterator *iter = filp->private_data;
6173 	int ret;
6174 
6175 	while (trace_empty(iter)) {
6176 
6177 		if ((filp->f_flags & O_NONBLOCK)) {
6178 			return -EAGAIN;
6179 		}
6180 
6181 		/*
6182 		 * We block until we read something and tracing is disabled.
6183 		 * We still block if tracing is disabled, but we have never
6184 		 * read anything. This allows a user to cat this file, and
6185 		 * then enable tracing. But after we have read something,
6186 		 * we give an EOF when tracing is again disabled.
6187 		 *
6188 		 * iter->pos will be 0 if we haven't read anything.
6189 		 */
6190 		if (!tracer_tracing_is_on(iter->tr) && iter->pos)
6191 			break;
6192 
6193 		mutex_unlock(&iter->mutex);
6194 
6195 		ret = wait_on_pipe(iter, 0);
6196 
6197 		mutex_lock(&iter->mutex);
6198 
6199 		if (ret)
6200 			return ret;
6201 	}
6202 
6203 	return 1;
6204 }
6205 
6206 /*
6207  * Consumer reader.
6208  */
6209 static ssize_t
6210 tracing_read_pipe(struct file *filp, char __user *ubuf,
6211 		  size_t cnt, loff_t *ppos)
6212 {
6213 	struct trace_iterator *iter = filp->private_data;
6214 	ssize_t sret;
6215 
6216 	/*
6217 	 * Avoid more than one consumer on a single file descriptor
6218 	 * This is just a matter of traces coherency, the ring buffer itself
6219 	 * is protected.
6220 	 */
6221 	mutex_lock(&iter->mutex);
6222 
6223 	/* return any leftover data */
6224 	sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
6225 	if (sret != -EBUSY)
6226 		goto out;
6227 
6228 	trace_seq_init(&iter->seq);
6229 
6230 	if (iter->trace->read) {
6231 		sret = iter->trace->read(iter, filp, ubuf, cnt, ppos);
6232 		if (sret)
6233 			goto out;
6234 	}
6235 
6236 waitagain:
6237 	sret = tracing_wait_pipe(filp);
6238 	if (sret <= 0)
6239 		goto out;
6240 
6241 	/* stop when tracing is finished */
6242 	if (trace_empty(iter)) {
6243 		sret = 0;
6244 		goto out;
6245 	}
6246 
6247 	if (cnt >= PAGE_SIZE)
6248 		cnt = PAGE_SIZE - 1;
6249 
6250 	/* reset all but tr, trace, and overruns */
6251 	memset(&iter->seq, 0,
6252 	       sizeof(struct trace_iterator) -
6253 	       offsetof(struct trace_iterator, seq));
6254 	cpumask_clear(iter->started);
6255 	trace_seq_init(&iter->seq);
6256 	iter->pos = -1;
6257 
6258 	trace_event_read_lock();
6259 	trace_access_lock(iter->cpu_file);
6260 	while (trace_find_next_entry_inc(iter) != NULL) {
6261 		enum print_line_t ret;
6262 		int save_len = iter->seq.seq.len;
6263 
6264 		ret = print_trace_line(iter);
6265 		if (ret == TRACE_TYPE_PARTIAL_LINE) {
6266 			/* don't print partial lines */
6267 			iter->seq.seq.len = save_len;
6268 			break;
6269 		}
6270 		if (ret != TRACE_TYPE_NO_CONSUME)
6271 			trace_consume(iter);
6272 
6273 		if (trace_seq_used(&iter->seq) >= cnt)
6274 			break;
6275 
6276 		/*
6277 		 * Setting the full flag means we reached the trace_seq buffer
6278 		 * size and we should leave by partial output condition above.
6279 		 * One of the trace_seq_* functions is not used properly.
6280 		 */
6281 		WARN_ONCE(iter->seq.full, "full flag set for trace type %d",
6282 			  iter->ent->type);
6283 	}
6284 	trace_access_unlock(iter->cpu_file);
6285 	trace_event_read_unlock();
6286 
6287 	/* Now copy what we have to the user */
6288 	sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
6289 	if (iter->seq.seq.readpos >= trace_seq_used(&iter->seq))
6290 		trace_seq_init(&iter->seq);
6291 
6292 	/*
6293 	 * If there was nothing to send to user, in spite of consuming trace
6294 	 * entries, go back to wait for more entries.
6295 	 */
6296 	if (sret == -EBUSY)
6297 		goto waitagain;
6298 
6299 out:
6300 	mutex_unlock(&iter->mutex);
6301 
6302 	return sret;
6303 }
6304 
6305 static void tracing_spd_release_pipe(struct splice_pipe_desc *spd,
6306 				     unsigned int idx)
6307 {
6308 	__free_page(spd->pages[idx]);
6309 }
6310 
6311 static size_t
6312 tracing_fill_pipe_page(size_t rem, struct trace_iterator *iter)
6313 {
6314 	size_t count;
6315 	int save_len;
6316 	int ret;
6317 
6318 	/* Seq buffer is page-sized, exactly what we need. */
6319 	for (;;) {
6320 		save_len = iter->seq.seq.len;
6321 		ret = print_trace_line(iter);
6322 
6323 		if (trace_seq_has_overflowed(&iter->seq)) {
6324 			iter->seq.seq.len = save_len;
6325 			break;
6326 		}
6327 
6328 		/*
6329 		 * This should not be hit, because it should only
6330 		 * be set if the iter->seq overflowed. But check it
6331 		 * anyway to be safe.
6332 		 */
6333 		if (ret == TRACE_TYPE_PARTIAL_LINE) {
6334 			iter->seq.seq.len = save_len;
6335 			break;
6336 		}
6337 
6338 		count = trace_seq_used(&iter->seq) - save_len;
6339 		if (rem < count) {
6340 			rem = 0;
6341 			iter->seq.seq.len = save_len;
6342 			break;
6343 		}
6344 
6345 		if (ret != TRACE_TYPE_NO_CONSUME)
6346 			trace_consume(iter);
6347 		rem -= count;
6348 		if (!trace_find_next_entry_inc(iter))	{
6349 			rem = 0;
6350 			iter->ent = NULL;
6351 			break;
6352 		}
6353 	}
6354 
6355 	return rem;
6356 }
6357 
6358 static ssize_t tracing_splice_read_pipe(struct file *filp,
6359 					loff_t *ppos,
6360 					struct pipe_inode_info *pipe,
6361 					size_t len,
6362 					unsigned int flags)
6363 {
6364 	struct page *pages_def[PIPE_DEF_BUFFERS];
6365 	struct partial_page partial_def[PIPE_DEF_BUFFERS];
6366 	struct trace_iterator *iter = filp->private_data;
6367 	struct splice_pipe_desc spd = {
6368 		.pages		= pages_def,
6369 		.partial	= partial_def,
6370 		.nr_pages	= 0, /* This gets updated below. */
6371 		.nr_pages_max	= PIPE_DEF_BUFFERS,
6372 		.ops		= &default_pipe_buf_ops,
6373 		.spd_release	= tracing_spd_release_pipe,
6374 	};
6375 	ssize_t ret;
6376 	size_t rem;
6377 	unsigned int i;
6378 
6379 	if (splice_grow_spd(pipe, &spd))
6380 		return -ENOMEM;
6381 
6382 	mutex_lock(&iter->mutex);
6383 
6384 	if (iter->trace->splice_read) {
6385 		ret = iter->trace->splice_read(iter, filp,
6386 					       ppos, pipe, len, flags);
6387 		if (ret)
6388 			goto out_err;
6389 	}
6390 
6391 	ret = tracing_wait_pipe(filp);
6392 	if (ret <= 0)
6393 		goto out_err;
6394 
6395 	if (!iter->ent && !trace_find_next_entry_inc(iter)) {
6396 		ret = -EFAULT;
6397 		goto out_err;
6398 	}
6399 
6400 	trace_event_read_lock();
6401 	trace_access_lock(iter->cpu_file);
6402 
6403 	/* Fill as many pages as possible. */
6404 	for (i = 0, rem = len; i < spd.nr_pages_max && rem; i++) {
6405 		spd.pages[i] = alloc_page(GFP_KERNEL);
6406 		if (!spd.pages[i])
6407 			break;
6408 
6409 		rem = tracing_fill_pipe_page(rem, iter);
6410 
6411 		/* Copy the data into the page, so we can start over. */
6412 		ret = trace_seq_to_buffer(&iter->seq,
6413 					  page_address(spd.pages[i]),
6414 					  trace_seq_used(&iter->seq));
6415 		if (ret < 0) {
6416 			__free_page(spd.pages[i]);
6417 			break;
6418 		}
6419 		spd.partial[i].offset = 0;
6420 		spd.partial[i].len = trace_seq_used(&iter->seq);
6421 
6422 		trace_seq_init(&iter->seq);
6423 	}
6424 
6425 	trace_access_unlock(iter->cpu_file);
6426 	trace_event_read_unlock();
6427 	mutex_unlock(&iter->mutex);
6428 
6429 	spd.nr_pages = i;
6430 
6431 	if (i)
6432 		ret = splice_to_pipe(pipe, &spd);
6433 	else
6434 		ret = 0;
6435 out:
6436 	splice_shrink_spd(&spd);
6437 	return ret;
6438 
6439 out_err:
6440 	mutex_unlock(&iter->mutex);
6441 	goto out;
6442 }
6443 
6444 static ssize_t
6445 tracing_entries_read(struct file *filp, char __user *ubuf,
6446 		     size_t cnt, loff_t *ppos)
6447 {
6448 	struct inode *inode = file_inode(filp);
6449 	struct trace_array *tr = inode->i_private;
6450 	int cpu = tracing_get_cpu(inode);
6451 	char buf[64];
6452 	int r = 0;
6453 	ssize_t ret;
6454 
6455 	mutex_lock(&trace_types_lock);
6456 
6457 	if (cpu == RING_BUFFER_ALL_CPUS) {
6458 		int cpu, buf_size_same;
6459 		unsigned long size;
6460 
6461 		size = 0;
6462 		buf_size_same = 1;
6463 		/* check if all cpu sizes are same */
6464 		for_each_tracing_cpu(cpu) {
6465 			/* fill in the size from first enabled cpu */
6466 			if (size == 0)
6467 				size = per_cpu_ptr(tr->array_buffer.data, cpu)->entries;
6468 			if (size != per_cpu_ptr(tr->array_buffer.data, cpu)->entries) {
6469 				buf_size_same = 0;
6470 				break;
6471 			}
6472 		}
6473 
6474 		if (buf_size_same) {
6475 			if (!ring_buffer_expanded)
6476 				r = sprintf(buf, "%lu (expanded: %lu)\n",
6477 					    size >> 10,
6478 					    trace_buf_size >> 10);
6479 			else
6480 				r = sprintf(buf, "%lu\n", size >> 10);
6481 		} else
6482 			r = sprintf(buf, "X\n");
6483 	} else
6484 		r = sprintf(buf, "%lu\n", per_cpu_ptr(tr->array_buffer.data, cpu)->entries >> 10);
6485 
6486 	mutex_unlock(&trace_types_lock);
6487 
6488 	ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6489 	return ret;
6490 }
6491 
6492 static ssize_t
6493 tracing_entries_write(struct file *filp, const char __user *ubuf,
6494 		      size_t cnt, loff_t *ppos)
6495 {
6496 	struct inode *inode = file_inode(filp);
6497 	struct trace_array *tr = inode->i_private;
6498 	unsigned long val;
6499 	int ret;
6500 
6501 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
6502 	if (ret)
6503 		return ret;
6504 
6505 	/* must have at least 1 entry */
6506 	if (!val)
6507 		return -EINVAL;
6508 
6509 	/* value is in KB */
6510 	val <<= 10;
6511 	ret = tracing_resize_ring_buffer(tr, val, tracing_get_cpu(inode));
6512 	if (ret < 0)
6513 		return ret;
6514 
6515 	*ppos += cnt;
6516 
6517 	return cnt;
6518 }
6519 
6520 static ssize_t
6521 tracing_total_entries_read(struct file *filp, char __user *ubuf,
6522 				size_t cnt, loff_t *ppos)
6523 {
6524 	struct trace_array *tr = filp->private_data;
6525 	char buf[64];
6526 	int r, cpu;
6527 	unsigned long size = 0, expanded_size = 0;
6528 
6529 	mutex_lock(&trace_types_lock);
6530 	for_each_tracing_cpu(cpu) {
6531 		size += per_cpu_ptr(tr->array_buffer.data, cpu)->entries >> 10;
6532 		if (!ring_buffer_expanded)
6533 			expanded_size += trace_buf_size >> 10;
6534 	}
6535 	if (ring_buffer_expanded)
6536 		r = sprintf(buf, "%lu\n", size);
6537 	else
6538 		r = sprintf(buf, "%lu (expanded: %lu)\n", size, expanded_size);
6539 	mutex_unlock(&trace_types_lock);
6540 
6541 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6542 }
6543 
6544 static ssize_t
6545 tracing_free_buffer_write(struct file *filp, const char __user *ubuf,
6546 			  size_t cnt, loff_t *ppos)
6547 {
6548 	/*
6549 	 * There is no need to read what the user has written, this function
6550 	 * is just to make sure that there is no error when "echo" is used
6551 	 */
6552 
6553 	*ppos += cnt;
6554 
6555 	return cnt;
6556 }
6557 
6558 static int
6559 tracing_free_buffer_release(struct inode *inode, struct file *filp)
6560 {
6561 	struct trace_array *tr = inode->i_private;
6562 
6563 	/* disable tracing ? */
6564 	if (tr->trace_flags & TRACE_ITER_STOP_ON_FREE)
6565 		tracer_tracing_off(tr);
6566 	/* resize the ring buffer to 0 */
6567 	tracing_resize_ring_buffer(tr, 0, RING_BUFFER_ALL_CPUS);
6568 
6569 	trace_array_put(tr);
6570 
6571 	return 0;
6572 }
6573 
6574 static ssize_t
6575 tracing_mark_write(struct file *filp, const char __user *ubuf,
6576 					size_t cnt, loff_t *fpos)
6577 {
6578 	struct trace_array *tr = filp->private_data;
6579 	struct ring_buffer_event *event;
6580 	enum event_trigger_type tt = ETT_NONE;
6581 	struct trace_buffer *buffer;
6582 	struct print_entry *entry;
6583 	unsigned long irq_flags;
6584 	ssize_t written;
6585 	int size;
6586 	int len;
6587 
6588 /* Used in tracing_mark_raw_write() as well */
6589 #define FAULTED_STR "<faulted>"
6590 #define FAULTED_SIZE (sizeof(FAULTED_STR) - 1) /* '\0' is already accounted for */
6591 
6592 	if (tracing_disabled)
6593 		return -EINVAL;
6594 
6595 	if (!(tr->trace_flags & TRACE_ITER_MARKERS))
6596 		return -EINVAL;
6597 
6598 	if (cnt > TRACE_BUF_SIZE)
6599 		cnt = TRACE_BUF_SIZE;
6600 
6601 	BUILD_BUG_ON(TRACE_BUF_SIZE >= PAGE_SIZE);
6602 
6603 	local_save_flags(irq_flags);
6604 	size = sizeof(*entry) + cnt + 2; /* add '\0' and possible '\n' */
6605 
6606 	/* If less than "<faulted>", then make sure we can still add that */
6607 	if (cnt < FAULTED_SIZE)
6608 		size += FAULTED_SIZE - cnt;
6609 
6610 	buffer = tr->array_buffer.buffer;
6611 	event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
6612 					    irq_flags, preempt_count());
6613 	if (unlikely(!event))
6614 		/* Ring buffer disabled, return as if not open for write */
6615 		return -EBADF;
6616 
6617 	entry = ring_buffer_event_data(event);
6618 	entry->ip = _THIS_IP_;
6619 
6620 	len = __copy_from_user_inatomic(&entry->buf, ubuf, cnt);
6621 	if (len) {
6622 		memcpy(&entry->buf, FAULTED_STR, FAULTED_SIZE);
6623 		cnt = FAULTED_SIZE;
6624 		written = -EFAULT;
6625 	} else
6626 		written = cnt;
6627 	len = cnt;
6628 
6629 	if (tr->trace_marker_file && !list_empty(&tr->trace_marker_file->triggers)) {
6630 		/* do not add \n before testing triggers, but add \0 */
6631 		entry->buf[cnt] = '\0';
6632 		tt = event_triggers_call(tr->trace_marker_file, entry, event);
6633 	}
6634 
6635 	if (entry->buf[cnt - 1] != '\n') {
6636 		entry->buf[cnt] = '\n';
6637 		entry->buf[cnt + 1] = '\0';
6638 	} else
6639 		entry->buf[cnt] = '\0';
6640 
6641 	__buffer_unlock_commit(buffer, event);
6642 
6643 	if (tt)
6644 		event_triggers_post_call(tr->trace_marker_file, tt);
6645 
6646 	if (written > 0)
6647 		*fpos += written;
6648 
6649 	return written;
6650 }
6651 
6652 /* Limit it for now to 3K (including tag) */
6653 #define RAW_DATA_MAX_SIZE (1024*3)
6654 
6655 static ssize_t
6656 tracing_mark_raw_write(struct file *filp, const char __user *ubuf,
6657 					size_t cnt, loff_t *fpos)
6658 {
6659 	struct trace_array *tr = filp->private_data;
6660 	struct ring_buffer_event *event;
6661 	struct trace_buffer *buffer;
6662 	struct raw_data_entry *entry;
6663 	unsigned long irq_flags;
6664 	ssize_t written;
6665 	int size;
6666 	int len;
6667 
6668 #define FAULT_SIZE_ID (FAULTED_SIZE + sizeof(int))
6669 
6670 	if (tracing_disabled)
6671 		return -EINVAL;
6672 
6673 	if (!(tr->trace_flags & TRACE_ITER_MARKERS))
6674 		return -EINVAL;
6675 
6676 	/* The marker must at least have a tag id */
6677 	if (cnt < sizeof(unsigned int) || cnt > RAW_DATA_MAX_SIZE)
6678 		return -EINVAL;
6679 
6680 	if (cnt > TRACE_BUF_SIZE)
6681 		cnt = TRACE_BUF_SIZE;
6682 
6683 	BUILD_BUG_ON(TRACE_BUF_SIZE >= PAGE_SIZE);
6684 
6685 	local_save_flags(irq_flags);
6686 	size = sizeof(*entry) + cnt;
6687 	if (cnt < FAULT_SIZE_ID)
6688 		size += FAULT_SIZE_ID - cnt;
6689 
6690 	buffer = tr->array_buffer.buffer;
6691 	event = __trace_buffer_lock_reserve(buffer, TRACE_RAW_DATA, size,
6692 					    irq_flags, preempt_count());
6693 	if (!event)
6694 		/* Ring buffer disabled, return as if not open for write */
6695 		return -EBADF;
6696 
6697 	entry = ring_buffer_event_data(event);
6698 
6699 	len = __copy_from_user_inatomic(&entry->id, ubuf, cnt);
6700 	if (len) {
6701 		entry->id = -1;
6702 		memcpy(&entry->buf, FAULTED_STR, FAULTED_SIZE);
6703 		written = -EFAULT;
6704 	} else
6705 		written = cnt;
6706 
6707 	__buffer_unlock_commit(buffer, event);
6708 
6709 	if (written > 0)
6710 		*fpos += written;
6711 
6712 	return written;
6713 }
6714 
6715 static int tracing_clock_show(struct seq_file *m, void *v)
6716 {
6717 	struct trace_array *tr = m->private;
6718 	int i;
6719 
6720 	for (i = 0; i < ARRAY_SIZE(trace_clocks); i++)
6721 		seq_printf(m,
6722 			"%s%s%s%s", i ? " " : "",
6723 			i == tr->clock_id ? "[" : "", trace_clocks[i].name,
6724 			i == tr->clock_id ? "]" : "");
6725 	seq_putc(m, '\n');
6726 
6727 	return 0;
6728 }
6729 
6730 int tracing_set_clock(struct trace_array *tr, const char *clockstr)
6731 {
6732 	int i;
6733 
6734 	for (i = 0; i < ARRAY_SIZE(trace_clocks); i++) {
6735 		if (strcmp(trace_clocks[i].name, clockstr) == 0)
6736 			break;
6737 	}
6738 	if (i == ARRAY_SIZE(trace_clocks))
6739 		return -EINVAL;
6740 
6741 	mutex_lock(&trace_types_lock);
6742 
6743 	tr->clock_id = i;
6744 
6745 	ring_buffer_set_clock(tr->array_buffer.buffer, trace_clocks[i].func);
6746 
6747 	/*
6748 	 * New clock may not be consistent with the previous clock.
6749 	 * Reset the buffer so that it doesn't have incomparable timestamps.
6750 	 */
6751 	tracing_reset_online_cpus(&tr->array_buffer);
6752 
6753 #ifdef CONFIG_TRACER_MAX_TRACE
6754 	if (tr->max_buffer.buffer)
6755 		ring_buffer_set_clock(tr->max_buffer.buffer, trace_clocks[i].func);
6756 	tracing_reset_online_cpus(&tr->max_buffer);
6757 #endif
6758 
6759 	mutex_unlock(&trace_types_lock);
6760 
6761 	return 0;
6762 }
6763 
6764 static ssize_t tracing_clock_write(struct file *filp, const char __user *ubuf,
6765 				   size_t cnt, loff_t *fpos)
6766 {
6767 	struct seq_file *m = filp->private_data;
6768 	struct trace_array *tr = m->private;
6769 	char buf[64];
6770 	const char *clockstr;
6771 	int ret;
6772 
6773 	if (cnt >= sizeof(buf))
6774 		return -EINVAL;
6775 
6776 	if (copy_from_user(buf, ubuf, cnt))
6777 		return -EFAULT;
6778 
6779 	buf[cnt] = 0;
6780 
6781 	clockstr = strstrip(buf);
6782 
6783 	ret = tracing_set_clock(tr, clockstr);
6784 	if (ret)
6785 		return ret;
6786 
6787 	*fpos += cnt;
6788 
6789 	return cnt;
6790 }
6791 
6792 static int tracing_clock_open(struct inode *inode, struct file *file)
6793 {
6794 	struct trace_array *tr = inode->i_private;
6795 	int ret;
6796 
6797 	ret = tracing_check_open_get_tr(tr);
6798 	if (ret)
6799 		return ret;
6800 
6801 	ret = single_open(file, tracing_clock_show, inode->i_private);
6802 	if (ret < 0)
6803 		trace_array_put(tr);
6804 
6805 	return ret;
6806 }
6807 
6808 static int tracing_time_stamp_mode_show(struct seq_file *m, void *v)
6809 {
6810 	struct trace_array *tr = m->private;
6811 
6812 	mutex_lock(&trace_types_lock);
6813 
6814 	if (ring_buffer_time_stamp_abs(tr->array_buffer.buffer))
6815 		seq_puts(m, "delta [absolute]\n");
6816 	else
6817 		seq_puts(m, "[delta] absolute\n");
6818 
6819 	mutex_unlock(&trace_types_lock);
6820 
6821 	return 0;
6822 }
6823 
6824 static int tracing_time_stamp_mode_open(struct inode *inode, struct file *file)
6825 {
6826 	struct trace_array *tr = inode->i_private;
6827 	int ret;
6828 
6829 	ret = tracing_check_open_get_tr(tr);
6830 	if (ret)
6831 		return ret;
6832 
6833 	ret = single_open(file, tracing_time_stamp_mode_show, inode->i_private);
6834 	if (ret < 0)
6835 		trace_array_put(tr);
6836 
6837 	return ret;
6838 }
6839 
6840 int tracing_set_time_stamp_abs(struct trace_array *tr, bool abs)
6841 {
6842 	int ret = 0;
6843 
6844 	mutex_lock(&trace_types_lock);
6845 
6846 	if (abs && tr->time_stamp_abs_ref++)
6847 		goto out;
6848 
6849 	if (!abs) {
6850 		if (WARN_ON_ONCE(!tr->time_stamp_abs_ref)) {
6851 			ret = -EINVAL;
6852 			goto out;
6853 		}
6854 
6855 		if (--tr->time_stamp_abs_ref)
6856 			goto out;
6857 	}
6858 
6859 	ring_buffer_set_time_stamp_abs(tr->array_buffer.buffer, abs);
6860 
6861 #ifdef CONFIG_TRACER_MAX_TRACE
6862 	if (tr->max_buffer.buffer)
6863 		ring_buffer_set_time_stamp_abs(tr->max_buffer.buffer, abs);
6864 #endif
6865  out:
6866 	mutex_unlock(&trace_types_lock);
6867 
6868 	return ret;
6869 }
6870 
6871 struct ftrace_buffer_info {
6872 	struct trace_iterator	iter;
6873 	void			*spare;
6874 	unsigned int		spare_cpu;
6875 	unsigned int		read;
6876 };
6877 
6878 #ifdef CONFIG_TRACER_SNAPSHOT
6879 static int tracing_snapshot_open(struct inode *inode, struct file *file)
6880 {
6881 	struct trace_array *tr = inode->i_private;
6882 	struct trace_iterator *iter;
6883 	struct seq_file *m;
6884 	int ret;
6885 
6886 	ret = tracing_check_open_get_tr(tr);
6887 	if (ret)
6888 		return ret;
6889 
6890 	if (file->f_mode & FMODE_READ) {
6891 		iter = __tracing_open(inode, file, true);
6892 		if (IS_ERR(iter))
6893 			ret = PTR_ERR(iter);
6894 	} else {
6895 		/* Writes still need the seq_file to hold the private data */
6896 		ret = -ENOMEM;
6897 		m = kzalloc(sizeof(*m), GFP_KERNEL);
6898 		if (!m)
6899 			goto out;
6900 		iter = kzalloc(sizeof(*iter), GFP_KERNEL);
6901 		if (!iter) {
6902 			kfree(m);
6903 			goto out;
6904 		}
6905 		ret = 0;
6906 
6907 		iter->tr = tr;
6908 		iter->array_buffer = &tr->max_buffer;
6909 		iter->cpu_file = tracing_get_cpu(inode);
6910 		m->private = iter;
6911 		file->private_data = m;
6912 	}
6913 out:
6914 	if (ret < 0)
6915 		trace_array_put(tr);
6916 
6917 	return ret;
6918 }
6919 
6920 static ssize_t
6921 tracing_snapshot_write(struct file *filp, const char __user *ubuf, size_t cnt,
6922 		       loff_t *ppos)
6923 {
6924 	struct seq_file *m = filp->private_data;
6925 	struct trace_iterator *iter = m->private;
6926 	struct trace_array *tr = iter->tr;
6927 	unsigned long val;
6928 	int ret;
6929 
6930 	ret = tracing_update_buffers();
6931 	if (ret < 0)
6932 		return ret;
6933 
6934 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
6935 	if (ret)
6936 		return ret;
6937 
6938 	mutex_lock(&trace_types_lock);
6939 
6940 	if (tr->current_trace->use_max_tr) {
6941 		ret = -EBUSY;
6942 		goto out;
6943 	}
6944 
6945 	arch_spin_lock(&tr->max_lock);
6946 	if (tr->cond_snapshot)
6947 		ret = -EBUSY;
6948 	arch_spin_unlock(&tr->max_lock);
6949 	if (ret)
6950 		goto out;
6951 
6952 	switch (val) {
6953 	case 0:
6954 		if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
6955 			ret = -EINVAL;
6956 			break;
6957 		}
6958 		if (tr->allocated_snapshot)
6959 			free_snapshot(tr);
6960 		break;
6961 	case 1:
6962 /* Only allow per-cpu swap if the ring buffer supports it */
6963 #ifndef CONFIG_RING_BUFFER_ALLOW_SWAP
6964 		if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
6965 			ret = -EINVAL;
6966 			break;
6967 		}
6968 #endif
6969 		if (tr->allocated_snapshot)
6970 			ret = resize_buffer_duplicate_size(&tr->max_buffer,
6971 					&tr->array_buffer, iter->cpu_file);
6972 		else
6973 			ret = tracing_alloc_snapshot_instance(tr);
6974 		if (ret < 0)
6975 			break;
6976 		local_irq_disable();
6977 		/* Now, we're going to swap */
6978 		if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
6979 			update_max_tr(tr, current, smp_processor_id(), NULL);
6980 		else
6981 			update_max_tr_single(tr, current, iter->cpu_file);
6982 		local_irq_enable();
6983 		break;
6984 	default:
6985 		if (tr->allocated_snapshot) {
6986 			if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
6987 				tracing_reset_online_cpus(&tr->max_buffer);
6988 			else
6989 				tracing_reset_cpu(&tr->max_buffer, iter->cpu_file);
6990 		}
6991 		break;
6992 	}
6993 
6994 	if (ret >= 0) {
6995 		*ppos += cnt;
6996 		ret = cnt;
6997 	}
6998 out:
6999 	mutex_unlock(&trace_types_lock);
7000 	return ret;
7001 }
7002 
7003 static int tracing_snapshot_release(struct inode *inode, struct file *file)
7004 {
7005 	struct seq_file *m = file->private_data;
7006 	int ret;
7007 
7008 	ret = tracing_release(inode, file);
7009 
7010 	if (file->f_mode & FMODE_READ)
7011 		return ret;
7012 
7013 	/* If write only, the seq_file is just a stub */
7014 	if (m)
7015 		kfree(m->private);
7016 	kfree(m);
7017 
7018 	return 0;
7019 }
7020 
7021 static int tracing_buffers_open(struct inode *inode, struct file *filp);
7022 static ssize_t tracing_buffers_read(struct file *filp, char __user *ubuf,
7023 				    size_t count, loff_t *ppos);
7024 static int tracing_buffers_release(struct inode *inode, struct file *file);
7025 static ssize_t tracing_buffers_splice_read(struct file *file, loff_t *ppos,
7026 		   struct pipe_inode_info *pipe, size_t len, unsigned int flags);
7027 
7028 static int snapshot_raw_open(struct inode *inode, struct file *filp)
7029 {
7030 	struct ftrace_buffer_info *info;
7031 	int ret;
7032 
7033 	/* The following checks for tracefs lockdown */
7034 	ret = tracing_buffers_open(inode, filp);
7035 	if (ret < 0)
7036 		return ret;
7037 
7038 	info = filp->private_data;
7039 
7040 	if (info->iter.trace->use_max_tr) {
7041 		tracing_buffers_release(inode, filp);
7042 		return -EBUSY;
7043 	}
7044 
7045 	info->iter.snapshot = true;
7046 	info->iter.array_buffer = &info->iter.tr->max_buffer;
7047 
7048 	return ret;
7049 }
7050 
7051 #endif /* CONFIG_TRACER_SNAPSHOT */
7052 
7053 
7054 static const struct file_operations tracing_thresh_fops = {
7055 	.open		= tracing_open_generic,
7056 	.read		= tracing_thresh_read,
7057 	.write		= tracing_thresh_write,
7058 	.llseek		= generic_file_llseek,
7059 };
7060 
7061 #if defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)
7062 static const struct file_operations tracing_max_lat_fops = {
7063 	.open		= tracing_open_generic,
7064 	.read		= tracing_max_lat_read,
7065 	.write		= tracing_max_lat_write,
7066 	.llseek		= generic_file_llseek,
7067 };
7068 #endif
7069 
7070 static const struct file_operations set_tracer_fops = {
7071 	.open		= tracing_open_generic,
7072 	.read		= tracing_set_trace_read,
7073 	.write		= tracing_set_trace_write,
7074 	.llseek		= generic_file_llseek,
7075 };
7076 
7077 static const struct file_operations tracing_pipe_fops = {
7078 	.open		= tracing_open_pipe,
7079 	.poll		= tracing_poll_pipe,
7080 	.read		= tracing_read_pipe,
7081 	.splice_read	= tracing_splice_read_pipe,
7082 	.release	= tracing_release_pipe,
7083 	.llseek		= no_llseek,
7084 };
7085 
7086 static const struct file_operations tracing_entries_fops = {
7087 	.open		= tracing_open_generic_tr,
7088 	.read		= tracing_entries_read,
7089 	.write		= tracing_entries_write,
7090 	.llseek		= generic_file_llseek,
7091 	.release	= tracing_release_generic_tr,
7092 };
7093 
7094 static const struct file_operations tracing_total_entries_fops = {
7095 	.open		= tracing_open_generic_tr,
7096 	.read		= tracing_total_entries_read,
7097 	.llseek		= generic_file_llseek,
7098 	.release	= tracing_release_generic_tr,
7099 };
7100 
7101 static const struct file_operations tracing_free_buffer_fops = {
7102 	.open		= tracing_open_generic_tr,
7103 	.write		= tracing_free_buffer_write,
7104 	.release	= tracing_free_buffer_release,
7105 };
7106 
7107 static const struct file_operations tracing_mark_fops = {
7108 	.open		= tracing_open_generic_tr,
7109 	.write		= tracing_mark_write,
7110 	.llseek		= generic_file_llseek,
7111 	.release	= tracing_release_generic_tr,
7112 };
7113 
7114 static const struct file_operations tracing_mark_raw_fops = {
7115 	.open		= tracing_open_generic_tr,
7116 	.write		= tracing_mark_raw_write,
7117 	.llseek		= generic_file_llseek,
7118 	.release	= tracing_release_generic_tr,
7119 };
7120 
7121 static const struct file_operations trace_clock_fops = {
7122 	.open		= tracing_clock_open,
7123 	.read		= seq_read,
7124 	.llseek		= seq_lseek,
7125 	.release	= tracing_single_release_tr,
7126 	.write		= tracing_clock_write,
7127 };
7128 
7129 static const struct file_operations trace_time_stamp_mode_fops = {
7130 	.open		= tracing_time_stamp_mode_open,
7131 	.read		= seq_read,
7132 	.llseek		= seq_lseek,
7133 	.release	= tracing_single_release_tr,
7134 };
7135 
7136 #ifdef CONFIG_TRACER_SNAPSHOT
7137 static const struct file_operations snapshot_fops = {
7138 	.open		= tracing_snapshot_open,
7139 	.read		= seq_read,
7140 	.write		= tracing_snapshot_write,
7141 	.llseek		= tracing_lseek,
7142 	.release	= tracing_snapshot_release,
7143 };
7144 
7145 static const struct file_operations snapshot_raw_fops = {
7146 	.open		= snapshot_raw_open,
7147 	.read		= tracing_buffers_read,
7148 	.release	= tracing_buffers_release,
7149 	.splice_read	= tracing_buffers_splice_read,
7150 	.llseek		= no_llseek,
7151 };
7152 
7153 #endif /* CONFIG_TRACER_SNAPSHOT */
7154 
7155 #define TRACING_LOG_ERRS_MAX	8
7156 #define TRACING_LOG_LOC_MAX	128
7157 
7158 #define CMD_PREFIX "  Command: "
7159 
7160 struct err_info {
7161 	const char	**errs;	/* ptr to loc-specific array of err strings */
7162 	u8		type;	/* index into errs -> specific err string */
7163 	u8		pos;	/* MAX_FILTER_STR_VAL = 256 */
7164 	u64		ts;
7165 };
7166 
7167 struct tracing_log_err {
7168 	struct list_head	list;
7169 	struct err_info		info;
7170 	char			loc[TRACING_LOG_LOC_MAX]; /* err location */
7171 	char			cmd[MAX_FILTER_STR_VAL]; /* what caused err */
7172 };
7173 
7174 static DEFINE_MUTEX(tracing_err_log_lock);
7175 
7176 static struct tracing_log_err *get_tracing_log_err(struct trace_array *tr)
7177 {
7178 	struct tracing_log_err *err;
7179 
7180 	if (tr->n_err_log_entries < TRACING_LOG_ERRS_MAX) {
7181 		err = kzalloc(sizeof(*err), GFP_KERNEL);
7182 		if (!err)
7183 			err = ERR_PTR(-ENOMEM);
7184 		tr->n_err_log_entries++;
7185 
7186 		return err;
7187 	}
7188 
7189 	err = list_first_entry(&tr->err_log, struct tracing_log_err, list);
7190 	list_del(&err->list);
7191 
7192 	return err;
7193 }
7194 
7195 /**
7196  * err_pos - find the position of a string within a command for error careting
7197  * @cmd: The tracing command that caused the error
7198  * @str: The string to position the caret at within @cmd
7199  *
7200  * Finds the position of the first occurence of @str within @cmd.  The
7201  * return value can be passed to tracing_log_err() for caret placement
7202  * within @cmd.
7203  *
7204  * Returns the index within @cmd of the first occurence of @str or 0
7205  * if @str was not found.
7206  */
7207 unsigned int err_pos(char *cmd, const char *str)
7208 {
7209 	char *found;
7210 
7211 	if (WARN_ON(!strlen(cmd)))
7212 		return 0;
7213 
7214 	found = strstr(cmd, str);
7215 	if (found)
7216 		return found - cmd;
7217 
7218 	return 0;
7219 }
7220 
7221 /**
7222  * tracing_log_err - write an error to the tracing error log
7223  * @tr: The associated trace array for the error (NULL for top level array)
7224  * @loc: A string describing where the error occurred
7225  * @cmd: The tracing command that caused the error
7226  * @errs: The array of loc-specific static error strings
7227  * @type: The index into errs[], which produces the specific static err string
7228  * @pos: The position the caret should be placed in the cmd
7229  *
7230  * Writes an error into tracing/error_log of the form:
7231  *
7232  * <loc>: error: <text>
7233  *   Command: <cmd>
7234  *              ^
7235  *
7236  * tracing/error_log is a small log file containing the last
7237  * TRACING_LOG_ERRS_MAX errors (8).  Memory for errors isn't allocated
7238  * unless there has been a tracing error, and the error log can be
7239  * cleared and have its memory freed by writing the empty string in
7240  * truncation mode to it i.e. echo > tracing/error_log.
7241  *
7242  * NOTE: the @errs array along with the @type param are used to
7243  * produce a static error string - this string is not copied and saved
7244  * when the error is logged - only a pointer to it is saved.  See
7245  * existing callers for examples of how static strings are typically
7246  * defined for use with tracing_log_err().
7247  */
7248 void tracing_log_err(struct trace_array *tr,
7249 		     const char *loc, const char *cmd,
7250 		     const char **errs, u8 type, u8 pos)
7251 {
7252 	struct tracing_log_err *err;
7253 
7254 	if (!tr)
7255 		tr = &global_trace;
7256 
7257 	mutex_lock(&tracing_err_log_lock);
7258 	err = get_tracing_log_err(tr);
7259 	if (PTR_ERR(err) == -ENOMEM) {
7260 		mutex_unlock(&tracing_err_log_lock);
7261 		return;
7262 	}
7263 
7264 	snprintf(err->loc, TRACING_LOG_LOC_MAX, "%s: error: ", loc);
7265 	snprintf(err->cmd, MAX_FILTER_STR_VAL,"\n" CMD_PREFIX "%s\n", cmd);
7266 
7267 	err->info.errs = errs;
7268 	err->info.type = type;
7269 	err->info.pos = pos;
7270 	err->info.ts = local_clock();
7271 
7272 	list_add_tail(&err->list, &tr->err_log);
7273 	mutex_unlock(&tracing_err_log_lock);
7274 }
7275 
7276 static void clear_tracing_err_log(struct trace_array *tr)
7277 {
7278 	struct tracing_log_err *err, *next;
7279 
7280 	mutex_lock(&tracing_err_log_lock);
7281 	list_for_each_entry_safe(err, next, &tr->err_log, list) {
7282 		list_del(&err->list);
7283 		kfree(err);
7284 	}
7285 
7286 	tr->n_err_log_entries = 0;
7287 	mutex_unlock(&tracing_err_log_lock);
7288 }
7289 
7290 static void *tracing_err_log_seq_start(struct seq_file *m, loff_t *pos)
7291 {
7292 	struct trace_array *tr = m->private;
7293 
7294 	mutex_lock(&tracing_err_log_lock);
7295 
7296 	return seq_list_start(&tr->err_log, *pos);
7297 }
7298 
7299 static void *tracing_err_log_seq_next(struct seq_file *m, void *v, loff_t *pos)
7300 {
7301 	struct trace_array *tr = m->private;
7302 
7303 	return seq_list_next(v, &tr->err_log, pos);
7304 }
7305 
7306 static void tracing_err_log_seq_stop(struct seq_file *m, void *v)
7307 {
7308 	mutex_unlock(&tracing_err_log_lock);
7309 }
7310 
7311 static void tracing_err_log_show_pos(struct seq_file *m, u8 pos)
7312 {
7313 	u8 i;
7314 
7315 	for (i = 0; i < sizeof(CMD_PREFIX) - 1; i++)
7316 		seq_putc(m, ' ');
7317 	for (i = 0; i < pos; i++)
7318 		seq_putc(m, ' ');
7319 	seq_puts(m, "^\n");
7320 }
7321 
7322 static int tracing_err_log_seq_show(struct seq_file *m, void *v)
7323 {
7324 	struct tracing_log_err *err = v;
7325 
7326 	if (err) {
7327 		const char *err_text = err->info.errs[err->info.type];
7328 		u64 sec = err->info.ts;
7329 		u32 nsec;
7330 
7331 		nsec = do_div(sec, NSEC_PER_SEC);
7332 		seq_printf(m, "[%5llu.%06u] %s%s", sec, nsec / 1000,
7333 			   err->loc, err_text);
7334 		seq_printf(m, "%s", err->cmd);
7335 		tracing_err_log_show_pos(m, err->info.pos);
7336 	}
7337 
7338 	return 0;
7339 }
7340 
7341 static const struct seq_operations tracing_err_log_seq_ops = {
7342 	.start  = tracing_err_log_seq_start,
7343 	.next   = tracing_err_log_seq_next,
7344 	.stop   = tracing_err_log_seq_stop,
7345 	.show   = tracing_err_log_seq_show
7346 };
7347 
7348 static int tracing_err_log_open(struct inode *inode, struct file *file)
7349 {
7350 	struct trace_array *tr = inode->i_private;
7351 	int ret = 0;
7352 
7353 	ret = tracing_check_open_get_tr(tr);
7354 	if (ret)
7355 		return ret;
7356 
7357 	/* If this file was opened for write, then erase contents */
7358 	if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC))
7359 		clear_tracing_err_log(tr);
7360 
7361 	if (file->f_mode & FMODE_READ) {
7362 		ret = seq_open(file, &tracing_err_log_seq_ops);
7363 		if (!ret) {
7364 			struct seq_file *m = file->private_data;
7365 			m->private = tr;
7366 		} else {
7367 			trace_array_put(tr);
7368 		}
7369 	}
7370 	return ret;
7371 }
7372 
7373 static ssize_t tracing_err_log_write(struct file *file,
7374 				     const char __user *buffer,
7375 				     size_t count, loff_t *ppos)
7376 {
7377 	return count;
7378 }
7379 
7380 static int tracing_err_log_release(struct inode *inode, struct file *file)
7381 {
7382 	struct trace_array *tr = inode->i_private;
7383 
7384 	trace_array_put(tr);
7385 
7386 	if (file->f_mode & FMODE_READ)
7387 		seq_release(inode, file);
7388 
7389 	return 0;
7390 }
7391 
7392 static const struct file_operations tracing_err_log_fops = {
7393 	.open           = tracing_err_log_open,
7394 	.write		= tracing_err_log_write,
7395 	.read           = seq_read,
7396 	.llseek         = seq_lseek,
7397 	.release        = tracing_err_log_release,
7398 };
7399 
7400 static int tracing_buffers_open(struct inode *inode, struct file *filp)
7401 {
7402 	struct trace_array *tr = inode->i_private;
7403 	struct ftrace_buffer_info *info;
7404 	int ret;
7405 
7406 	ret = tracing_check_open_get_tr(tr);
7407 	if (ret)
7408 		return ret;
7409 
7410 	info = kzalloc(sizeof(*info), GFP_KERNEL);
7411 	if (!info) {
7412 		trace_array_put(tr);
7413 		return -ENOMEM;
7414 	}
7415 
7416 	mutex_lock(&trace_types_lock);
7417 
7418 	info->iter.tr		= tr;
7419 	info->iter.cpu_file	= tracing_get_cpu(inode);
7420 	info->iter.trace	= tr->current_trace;
7421 	info->iter.array_buffer = &tr->array_buffer;
7422 	info->spare		= NULL;
7423 	/* Force reading ring buffer for first read */
7424 	info->read		= (unsigned int)-1;
7425 
7426 	filp->private_data = info;
7427 
7428 	tr->current_trace->ref++;
7429 
7430 	mutex_unlock(&trace_types_lock);
7431 
7432 	ret = nonseekable_open(inode, filp);
7433 	if (ret < 0)
7434 		trace_array_put(tr);
7435 
7436 	return ret;
7437 }
7438 
7439 static __poll_t
7440 tracing_buffers_poll(struct file *filp, poll_table *poll_table)
7441 {
7442 	struct ftrace_buffer_info *info = filp->private_data;
7443 	struct trace_iterator *iter = &info->iter;
7444 
7445 	return trace_poll(iter, filp, poll_table);
7446 }
7447 
7448 static ssize_t
7449 tracing_buffers_read(struct file *filp, char __user *ubuf,
7450 		     size_t count, loff_t *ppos)
7451 {
7452 	struct ftrace_buffer_info *info = filp->private_data;
7453 	struct trace_iterator *iter = &info->iter;
7454 	ssize_t ret = 0;
7455 	ssize_t size;
7456 
7457 	if (!count)
7458 		return 0;
7459 
7460 #ifdef CONFIG_TRACER_MAX_TRACE
7461 	if (iter->snapshot && iter->tr->current_trace->use_max_tr)
7462 		return -EBUSY;
7463 #endif
7464 
7465 	if (!info->spare) {
7466 		info->spare = ring_buffer_alloc_read_page(iter->array_buffer->buffer,
7467 							  iter->cpu_file);
7468 		if (IS_ERR(info->spare)) {
7469 			ret = PTR_ERR(info->spare);
7470 			info->spare = NULL;
7471 		} else {
7472 			info->spare_cpu = iter->cpu_file;
7473 		}
7474 	}
7475 	if (!info->spare)
7476 		return ret;
7477 
7478 	/* Do we have previous read data to read? */
7479 	if (info->read < PAGE_SIZE)
7480 		goto read;
7481 
7482  again:
7483 	trace_access_lock(iter->cpu_file);
7484 	ret = ring_buffer_read_page(iter->array_buffer->buffer,
7485 				    &info->spare,
7486 				    count,
7487 				    iter->cpu_file, 0);
7488 	trace_access_unlock(iter->cpu_file);
7489 
7490 	if (ret < 0) {
7491 		if (trace_empty(iter)) {
7492 			if ((filp->f_flags & O_NONBLOCK))
7493 				return -EAGAIN;
7494 
7495 			ret = wait_on_pipe(iter, 0);
7496 			if (ret)
7497 				return ret;
7498 
7499 			goto again;
7500 		}
7501 		return 0;
7502 	}
7503 
7504 	info->read = 0;
7505  read:
7506 	size = PAGE_SIZE - info->read;
7507 	if (size > count)
7508 		size = count;
7509 
7510 	ret = copy_to_user(ubuf, info->spare + info->read, size);
7511 	if (ret == size)
7512 		return -EFAULT;
7513 
7514 	size -= ret;
7515 
7516 	*ppos += size;
7517 	info->read += size;
7518 
7519 	return size;
7520 }
7521 
7522 static int tracing_buffers_release(struct inode *inode, struct file *file)
7523 {
7524 	struct ftrace_buffer_info *info = file->private_data;
7525 	struct trace_iterator *iter = &info->iter;
7526 
7527 	mutex_lock(&trace_types_lock);
7528 
7529 	iter->tr->current_trace->ref--;
7530 
7531 	__trace_array_put(iter->tr);
7532 
7533 	if (info->spare)
7534 		ring_buffer_free_read_page(iter->array_buffer->buffer,
7535 					   info->spare_cpu, info->spare);
7536 	kfree(info);
7537 
7538 	mutex_unlock(&trace_types_lock);
7539 
7540 	return 0;
7541 }
7542 
7543 struct buffer_ref {
7544 	struct trace_buffer	*buffer;
7545 	void			*page;
7546 	int			cpu;
7547 	refcount_t		refcount;
7548 };
7549 
7550 static void buffer_ref_release(struct buffer_ref *ref)
7551 {
7552 	if (!refcount_dec_and_test(&ref->refcount))
7553 		return;
7554 	ring_buffer_free_read_page(ref->buffer, ref->cpu, ref->page);
7555 	kfree(ref);
7556 }
7557 
7558 static void buffer_pipe_buf_release(struct pipe_inode_info *pipe,
7559 				    struct pipe_buffer *buf)
7560 {
7561 	struct buffer_ref *ref = (struct buffer_ref *)buf->private;
7562 
7563 	buffer_ref_release(ref);
7564 	buf->private = 0;
7565 }
7566 
7567 static bool buffer_pipe_buf_get(struct pipe_inode_info *pipe,
7568 				struct pipe_buffer *buf)
7569 {
7570 	struct buffer_ref *ref = (struct buffer_ref *)buf->private;
7571 
7572 	if (refcount_read(&ref->refcount) > INT_MAX/2)
7573 		return false;
7574 
7575 	refcount_inc(&ref->refcount);
7576 	return true;
7577 }
7578 
7579 /* Pipe buffer operations for a buffer. */
7580 static const struct pipe_buf_operations buffer_pipe_buf_ops = {
7581 	.release		= buffer_pipe_buf_release,
7582 	.get			= buffer_pipe_buf_get,
7583 };
7584 
7585 /*
7586  * Callback from splice_to_pipe(), if we need to release some pages
7587  * at the end of the spd in case we error'ed out in filling the pipe.
7588  */
7589 static void buffer_spd_release(struct splice_pipe_desc *spd, unsigned int i)
7590 {
7591 	struct buffer_ref *ref =
7592 		(struct buffer_ref *)spd->partial[i].private;
7593 
7594 	buffer_ref_release(ref);
7595 	spd->partial[i].private = 0;
7596 }
7597 
7598 static ssize_t
7599 tracing_buffers_splice_read(struct file *file, loff_t *ppos,
7600 			    struct pipe_inode_info *pipe, size_t len,
7601 			    unsigned int flags)
7602 {
7603 	struct ftrace_buffer_info *info = file->private_data;
7604 	struct trace_iterator *iter = &info->iter;
7605 	struct partial_page partial_def[PIPE_DEF_BUFFERS];
7606 	struct page *pages_def[PIPE_DEF_BUFFERS];
7607 	struct splice_pipe_desc spd = {
7608 		.pages		= pages_def,
7609 		.partial	= partial_def,
7610 		.nr_pages_max	= PIPE_DEF_BUFFERS,
7611 		.ops		= &buffer_pipe_buf_ops,
7612 		.spd_release	= buffer_spd_release,
7613 	};
7614 	struct buffer_ref *ref;
7615 	int entries, i;
7616 	ssize_t ret = 0;
7617 
7618 #ifdef CONFIG_TRACER_MAX_TRACE
7619 	if (iter->snapshot && iter->tr->current_trace->use_max_tr)
7620 		return -EBUSY;
7621 #endif
7622 
7623 	if (*ppos & (PAGE_SIZE - 1))
7624 		return -EINVAL;
7625 
7626 	if (len & (PAGE_SIZE - 1)) {
7627 		if (len < PAGE_SIZE)
7628 			return -EINVAL;
7629 		len &= PAGE_MASK;
7630 	}
7631 
7632 	if (splice_grow_spd(pipe, &spd))
7633 		return -ENOMEM;
7634 
7635  again:
7636 	trace_access_lock(iter->cpu_file);
7637 	entries = ring_buffer_entries_cpu(iter->array_buffer->buffer, iter->cpu_file);
7638 
7639 	for (i = 0; i < spd.nr_pages_max && len && entries; i++, len -= PAGE_SIZE) {
7640 		struct page *page;
7641 		int r;
7642 
7643 		ref = kzalloc(sizeof(*ref), GFP_KERNEL);
7644 		if (!ref) {
7645 			ret = -ENOMEM;
7646 			break;
7647 		}
7648 
7649 		refcount_set(&ref->refcount, 1);
7650 		ref->buffer = iter->array_buffer->buffer;
7651 		ref->page = ring_buffer_alloc_read_page(ref->buffer, iter->cpu_file);
7652 		if (IS_ERR(ref->page)) {
7653 			ret = PTR_ERR(ref->page);
7654 			ref->page = NULL;
7655 			kfree(ref);
7656 			break;
7657 		}
7658 		ref->cpu = iter->cpu_file;
7659 
7660 		r = ring_buffer_read_page(ref->buffer, &ref->page,
7661 					  len, iter->cpu_file, 1);
7662 		if (r < 0) {
7663 			ring_buffer_free_read_page(ref->buffer, ref->cpu,
7664 						   ref->page);
7665 			kfree(ref);
7666 			break;
7667 		}
7668 
7669 		page = virt_to_page(ref->page);
7670 
7671 		spd.pages[i] = page;
7672 		spd.partial[i].len = PAGE_SIZE;
7673 		spd.partial[i].offset = 0;
7674 		spd.partial[i].private = (unsigned long)ref;
7675 		spd.nr_pages++;
7676 		*ppos += PAGE_SIZE;
7677 
7678 		entries = ring_buffer_entries_cpu(iter->array_buffer->buffer, iter->cpu_file);
7679 	}
7680 
7681 	trace_access_unlock(iter->cpu_file);
7682 	spd.nr_pages = i;
7683 
7684 	/* did we read anything? */
7685 	if (!spd.nr_pages) {
7686 		if (ret)
7687 			goto out;
7688 
7689 		ret = -EAGAIN;
7690 		if ((file->f_flags & O_NONBLOCK) || (flags & SPLICE_F_NONBLOCK))
7691 			goto out;
7692 
7693 		ret = wait_on_pipe(iter, iter->tr->buffer_percent);
7694 		if (ret)
7695 			goto out;
7696 
7697 		goto again;
7698 	}
7699 
7700 	ret = splice_to_pipe(pipe, &spd);
7701 out:
7702 	splice_shrink_spd(&spd);
7703 
7704 	return ret;
7705 }
7706 
7707 static const struct file_operations tracing_buffers_fops = {
7708 	.open		= tracing_buffers_open,
7709 	.read		= tracing_buffers_read,
7710 	.poll		= tracing_buffers_poll,
7711 	.release	= tracing_buffers_release,
7712 	.splice_read	= tracing_buffers_splice_read,
7713 	.llseek		= no_llseek,
7714 };
7715 
7716 static ssize_t
7717 tracing_stats_read(struct file *filp, char __user *ubuf,
7718 		   size_t count, loff_t *ppos)
7719 {
7720 	struct inode *inode = file_inode(filp);
7721 	struct trace_array *tr = inode->i_private;
7722 	struct array_buffer *trace_buf = &tr->array_buffer;
7723 	int cpu = tracing_get_cpu(inode);
7724 	struct trace_seq *s;
7725 	unsigned long cnt;
7726 	unsigned long long t;
7727 	unsigned long usec_rem;
7728 
7729 	s = kmalloc(sizeof(*s), GFP_KERNEL);
7730 	if (!s)
7731 		return -ENOMEM;
7732 
7733 	trace_seq_init(s);
7734 
7735 	cnt = ring_buffer_entries_cpu(trace_buf->buffer, cpu);
7736 	trace_seq_printf(s, "entries: %ld\n", cnt);
7737 
7738 	cnt = ring_buffer_overrun_cpu(trace_buf->buffer, cpu);
7739 	trace_seq_printf(s, "overrun: %ld\n", cnt);
7740 
7741 	cnt = ring_buffer_commit_overrun_cpu(trace_buf->buffer, cpu);
7742 	trace_seq_printf(s, "commit overrun: %ld\n", cnt);
7743 
7744 	cnt = ring_buffer_bytes_cpu(trace_buf->buffer, cpu);
7745 	trace_seq_printf(s, "bytes: %ld\n", cnt);
7746 
7747 	if (trace_clocks[tr->clock_id].in_ns) {
7748 		/* local or global for trace_clock */
7749 		t = ns2usecs(ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
7750 		usec_rem = do_div(t, USEC_PER_SEC);
7751 		trace_seq_printf(s, "oldest event ts: %5llu.%06lu\n",
7752 								t, usec_rem);
7753 
7754 		t = ns2usecs(ring_buffer_time_stamp(trace_buf->buffer, cpu));
7755 		usec_rem = do_div(t, USEC_PER_SEC);
7756 		trace_seq_printf(s, "now ts: %5llu.%06lu\n", t, usec_rem);
7757 	} else {
7758 		/* counter or tsc mode for trace_clock */
7759 		trace_seq_printf(s, "oldest event ts: %llu\n",
7760 				ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
7761 
7762 		trace_seq_printf(s, "now ts: %llu\n",
7763 				ring_buffer_time_stamp(trace_buf->buffer, cpu));
7764 	}
7765 
7766 	cnt = ring_buffer_dropped_events_cpu(trace_buf->buffer, cpu);
7767 	trace_seq_printf(s, "dropped events: %ld\n", cnt);
7768 
7769 	cnt = ring_buffer_read_events_cpu(trace_buf->buffer, cpu);
7770 	trace_seq_printf(s, "read events: %ld\n", cnt);
7771 
7772 	count = simple_read_from_buffer(ubuf, count, ppos,
7773 					s->buffer, trace_seq_used(s));
7774 
7775 	kfree(s);
7776 
7777 	return count;
7778 }
7779 
7780 static const struct file_operations tracing_stats_fops = {
7781 	.open		= tracing_open_generic_tr,
7782 	.read		= tracing_stats_read,
7783 	.llseek		= generic_file_llseek,
7784 	.release	= tracing_release_generic_tr,
7785 };
7786 
7787 #ifdef CONFIG_DYNAMIC_FTRACE
7788 
7789 static ssize_t
7790 tracing_read_dyn_info(struct file *filp, char __user *ubuf,
7791 		  size_t cnt, loff_t *ppos)
7792 {
7793 	ssize_t ret;
7794 	char *buf;
7795 	int r;
7796 
7797 	/* 256 should be plenty to hold the amount needed */
7798 	buf = kmalloc(256, GFP_KERNEL);
7799 	if (!buf)
7800 		return -ENOMEM;
7801 
7802 	r = scnprintf(buf, 256, "%ld pages:%ld groups: %ld\n",
7803 		      ftrace_update_tot_cnt,
7804 		      ftrace_number_of_pages,
7805 		      ftrace_number_of_groups);
7806 
7807 	ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
7808 	kfree(buf);
7809 	return ret;
7810 }
7811 
7812 static const struct file_operations tracing_dyn_info_fops = {
7813 	.open		= tracing_open_generic,
7814 	.read		= tracing_read_dyn_info,
7815 	.llseek		= generic_file_llseek,
7816 };
7817 #endif /* CONFIG_DYNAMIC_FTRACE */
7818 
7819 #if defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE)
7820 static void
7821 ftrace_snapshot(unsigned long ip, unsigned long parent_ip,
7822 		struct trace_array *tr, struct ftrace_probe_ops *ops,
7823 		void *data)
7824 {
7825 	tracing_snapshot_instance(tr);
7826 }
7827 
7828 static void
7829 ftrace_count_snapshot(unsigned long ip, unsigned long parent_ip,
7830 		      struct trace_array *tr, struct ftrace_probe_ops *ops,
7831 		      void *data)
7832 {
7833 	struct ftrace_func_mapper *mapper = data;
7834 	long *count = NULL;
7835 
7836 	if (mapper)
7837 		count = (long *)ftrace_func_mapper_find_ip(mapper, ip);
7838 
7839 	if (count) {
7840 
7841 		if (*count <= 0)
7842 			return;
7843 
7844 		(*count)--;
7845 	}
7846 
7847 	tracing_snapshot_instance(tr);
7848 }
7849 
7850 static int
7851 ftrace_snapshot_print(struct seq_file *m, unsigned long ip,
7852 		      struct ftrace_probe_ops *ops, void *data)
7853 {
7854 	struct ftrace_func_mapper *mapper = data;
7855 	long *count = NULL;
7856 
7857 	seq_printf(m, "%ps:", (void *)ip);
7858 
7859 	seq_puts(m, "snapshot");
7860 
7861 	if (mapper)
7862 		count = (long *)ftrace_func_mapper_find_ip(mapper, ip);
7863 
7864 	if (count)
7865 		seq_printf(m, ":count=%ld\n", *count);
7866 	else
7867 		seq_puts(m, ":unlimited\n");
7868 
7869 	return 0;
7870 }
7871 
7872 static int
7873 ftrace_snapshot_init(struct ftrace_probe_ops *ops, struct trace_array *tr,
7874 		     unsigned long ip, void *init_data, void **data)
7875 {
7876 	struct ftrace_func_mapper *mapper = *data;
7877 
7878 	if (!mapper) {
7879 		mapper = allocate_ftrace_func_mapper();
7880 		if (!mapper)
7881 			return -ENOMEM;
7882 		*data = mapper;
7883 	}
7884 
7885 	return ftrace_func_mapper_add_ip(mapper, ip, init_data);
7886 }
7887 
7888 static void
7889 ftrace_snapshot_free(struct ftrace_probe_ops *ops, struct trace_array *tr,
7890 		     unsigned long ip, void *data)
7891 {
7892 	struct ftrace_func_mapper *mapper = data;
7893 
7894 	if (!ip) {
7895 		if (!mapper)
7896 			return;
7897 		free_ftrace_func_mapper(mapper, NULL);
7898 		return;
7899 	}
7900 
7901 	ftrace_func_mapper_remove_ip(mapper, ip);
7902 }
7903 
7904 static struct ftrace_probe_ops snapshot_probe_ops = {
7905 	.func			= ftrace_snapshot,
7906 	.print			= ftrace_snapshot_print,
7907 };
7908 
7909 static struct ftrace_probe_ops snapshot_count_probe_ops = {
7910 	.func			= ftrace_count_snapshot,
7911 	.print			= ftrace_snapshot_print,
7912 	.init			= ftrace_snapshot_init,
7913 	.free			= ftrace_snapshot_free,
7914 };
7915 
7916 static int
7917 ftrace_trace_snapshot_callback(struct trace_array *tr, struct ftrace_hash *hash,
7918 			       char *glob, char *cmd, char *param, int enable)
7919 {
7920 	struct ftrace_probe_ops *ops;
7921 	void *count = (void *)-1;
7922 	char *number;
7923 	int ret;
7924 
7925 	if (!tr)
7926 		return -ENODEV;
7927 
7928 	/* hash funcs only work with set_ftrace_filter */
7929 	if (!enable)
7930 		return -EINVAL;
7931 
7932 	ops = param ? &snapshot_count_probe_ops :  &snapshot_probe_ops;
7933 
7934 	if (glob[0] == '!')
7935 		return unregister_ftrace_function_probe_func(glob+1, tr, ops);
7936 
7937 	if (!param)
7938 		goto out_reg;
7939 
7940 	number = strsep(&param, ":");
7941 
7942 	if (!strlen(number))
7943 		goto out_reg;
7944 
7945 	/*
7946 	 * We use the callback data field (which is a pointer)
7947 	 * as our counter.
7948 	 */
7949 	ret = kstrtoul(number, 0, (unsigned long *)&count);
7950 	if (ret)
7951 		return ret;
7952 
7953  out_reg:
7954 	ret = tracing_alloc_snapshot_instance(tr);
7955 	if (ret < 0)
7956 		goto out;
7957 
7958 	ret = register_ftrace_function_probe(glob, tr, ops, count);
7959 
7960  out:
7961 	return ret < 0 ? ret : 0;
7962 }
7963 
7964 static struct ftrace_func_command ftrace_snapshot_cmd = {
7965 	.name			= "snapshot",
7966 	.func			= ftrace_trace_snapshot_callback,
7967 };
7968 
7969 static __init int register_snapshot_cmd(void)
7970 {
7971 	return register_ftrace_command(&ftrace_snapshot_cmd);
7972 }
7973 #else
7974 static inline __init int register_snapshot_cmd(void) { return 0; }
7975 #endif /* defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE) */
7976 
7977 static struct dentry *tracing_get_dentry(struct trace_array *tr)
7978 {
7979 	if (WARN_ON(!tr->dir))
7980 		return ERR_PTR(-ENODEV);
7981 
7982 	/* Top directory uses NULL as the parent */
7983 	if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
7984 		return NULL;
7985 
7986 	/* All sub buffers have a descriptor */
7987 	return tr->dir;
7988 }
7989 
7990 static struct dentry *tracing_dentry_percpu(struct trace_array *tr, int cpu)
7991 {
7992 	struct dentry *d_tracer;
7993 
7994 	if (tr->percpu_dir)
7995 		return tr->percpu_dir;
7996 
7997 	d_tracer = tracing_get_dentry(tr);
7998 	if (IS_ERR(d_tracer))
7999 		return NULL;
8000 
8001 	tr->percpu_dir = tracefs_create_dir("per_cpu", d_tracer);
8002 
8003 	MEM_FAIL(!tr->percpu_dir,
8004 		  "Could not create tracefs directory 'per_cpu/%d'\n", cpu);
8005 
8006 	return tr->percpu_dir;
8007 }
8008 
8009 static struct dentry *
8010 trace_create_cpu_file(const char *name, umode_t mode, struct dentry *parent,
8011 		      void *data, long cpu, const struct file_operations *fops)
8012 {
8013 	struct dentry *ret = trace_create_file(name, mode, parent, data, fops);
8014 
8015 	if (ret) /* See tracing_get_cpu() */
8016 		d_inode(ret)->i_cdev = (void *)(cpu + 1);
8017 	return ret;
8018 }
8019 
8020 static void
8021 tracing_init_tracefs_percpu(struct trace_array *tr, long cpu)
8022 {
8023 	struct dentry *d_percpu = tracing_dentry_percpu(tr, cpu);
8024 	struct dentry *d_cpu;
8025 	char cpu_dir[30]; /* 30 characters should be more than enough */
8026 
8027 	if (!d_percpu)
8028 		return;
8029 
8030 	snprintf(cpu_dir, 30, "cpu%ld", cpu);
8031 	d_cpu = tracefs_create_dir(cpu_dir, d_percpu);
8032 	if (!d_cpu) {
8033 		pr_warn("Could not create tracefs '%s' entry\n", cpu_dir);
8034 		return;
8035 	}
8036 
8037 	/* per cpu trace_pipe */
8038 	trace_create_cpu_file("trace_pipe", 0444, d_cpu,
8039 				tr, cpu, &tracing_pipe_fops);
8040 
8041 	/* per cpu trace */
8042 	trace_create_cpu_file("trace", 0644, d_cpu,
8043 				tr, cpu, &tracing_fops);
8044 
8045 	trace_create_cpu_file("trace_pipe_raw", 0444, d_cpu,
8046 				tr, cpu, &tracing_buffers_fops);
8047 
8048 	trace_create_cpu_file("stats", 0444, d_cpu,
8049 				tr, cpu, &tracing_stats_fops);
8050 
8051 	trace_create_cpu_file("buffer_size_kb", 0444, d_cpu,
8052 				tr, cpu, &tracing_entries_fops);
8053 
8054 #ifdef CONFIG_TRACER_SNAPSHOT
8055 	trace_create_cpu_file("snapshot", 0644, d_cpu,
8056 				tr, cpu, &snapshot_fops);
8057 
8058 	trace_create_cpu_file("snapshot_raw", 0444, d_cpu,
8059 				tr, cpu, &snapshot_raw_fops);
8060 #endif
8061 }
8062 
8063 #ifdef CONFIG_FTRACE_SELFTEST
8064 /* Let selftest have access to static functions in this file */
8065 #include "trace_selftest.c"
8066 #endif
8067 
8068 static ssize_t
8069 trace_options_read(struct file *filp, char __user *ubuf, size_t cnt,
8070 			loff_t *ppos)
8071 {
8072 	struct trace_option_dentry *topt = filp->private_data;
8073 	char *buf;
8074 
8075 	if (topt->flags->val & topt->opt->bit)
8076 		buf = "1\n";
8077 	else
8078 		buf = "0\n";
8079 
8080 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
8081 }
8082 
8083 static ssize_t
8084 trace_options_write(struct file *filp, const char __user *ubuf, size_t cnt,
8085 			 loff_t *ppos)
8086 {
8087 	struct trace_option_dentry *topt = filp->private_data;
8088 	unsigned long val;
8089 	int ret;
8090 
8091 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
8092 	if (ret)
8093 		return ret;
8094 
8095 	if (val != 0 && val != 1)
8096 		return -EINVAL;
8097 
8098 	if (!!(topt->flags->val & topt->opt->bit) != val) {
8099 		mutex_lock(&trace_types_lock);
8100 		ret = __set_tracer_option(topt->tr, topt->flags,
8101 					  topt->opt, !val);
8102 		mutex_unlock(&trace_types_lock);
8103 		if (ret)
8104 			return ret;
8105 	}
8106 
8107 	*ppos += cnt;
8108 
8109 	return cnt;
8110 }
8111 
8112 
8113 static const struct file_operations trace_options_fops = {
8114 	.open = tracing_open_generic,
8115 	.read = trace_options_read,
8116 	.write = trace_options_write,
8117 	.llseek	= generic_file_llseek,
8118 };
8119 
8120 /*
8121  * In order to pass in both the trace_array descriptor as well as the index
8122  * to the flag that the trace option file represents, the trace_array
8123  * has a character array of trace_flags_index[], which holds the index
8124  * of the bit for the flag it represents. index[0] == 0, index[1] == 1, etc.
8125  * The address of this character array is passed to the flag option file
8126  * read/write callbacks.
8127  *
8128  * In order to extract both the index and the trace_array descriptor,
8129  * get_tr_index() uses the following algorithm.
8130  *
8131  *   idx = *ptr;
8132  *
8133  * As the pointer itself contains the address of the index (remember
8134  * index[1] == 1).
8135  *
8136  * Then to get the trace_array descriptor, by subtracting that index
8137  * from the ptr, we get to the start of the index itself.
8138  *
8139  *   ptr - idx == &index[0]
8140  *
8141  * Then a simple container_of() from that pointer gets us to the
8142  * trace_array descriptor.
8143  */
8144 static void get_tr_index(void *data, struct trace_array **ptr,
8145 			 unsigned int *pindex)
8146 {
8147 	*pindex = *(unsigned char *)data;
8148 
8149 	*ptr = container_of(data - *pindex, struct trace_array,
8150 			    trace_flags_index);
8151 }
8152 
8153 static ssize_t
8154 trace_options_core_read(struct file *filp, char __user *ubuf, size_t cnt,
8155 			loff_t *ppos)
8156 {
8157 	void *tr_index = filp->private_data;
8158 	struct trace_array *tr;
8159 	unsigned int index;
8160 	char *buf;
8161 
8162 	get_tr_index(tr_index, &tr, &index);
8163 
8164 	if (tr->trace_flags & (1 << index))
8165 		buf = "1\n";
8166 	else
8167 		buf = "0\n";
8168 
8169 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
8170 }
8171 
8172 static ssize_t
8173 trace_options_core_write(struct file *filp, const char __user *ubuf, size_t cnt,
8174 			 loff_t *ppos)
8175 {
8176 	void *tr_index = filp->private_data;
8177 	struct trace_array *tr;
8178 	unsigned int index;
8179 	unsigned long val;
8180 	int ret;
8181 
8182 	get_tr_index(tr_index, &tr, &index);
8183 
8184 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
8185 	if (ret)
8186 		return ret;
8187 
8188 	if (val != 0 && val != 1)
8189 		return -EINVAL;
8190 
8191 	mutex_lock(&event_mutex);
8192 	mutex_lock(&trace_types_lock);
8193 	ret = set_tracer_flag(tr, 1 << index, val);
8194 	mutex_unlock(&trace_types_lock);
8195 	mutex_unlock(&event_mutex);
8196 
8197 	if (ret < 0)
8198 		return ret;
8199 
8200 	*ppos += cnt;
8201 
8202 	return cnt;
8203 }
8204 
8205 static const struct file_operations trace_options_core_fops = {
8206 	.open = tracing_open_generic,
8207 	.read = trace_options_core_read,
8208 	.write = trace_options_core_write,
8209 	.llseek = generic_file_llseek,
8210 };
8211 
8212 struct dentry *trace_create_file(const char *name,
8213 				 umode_t mode,
8214 				 struct dentry *parent,
8215 				 void *data,
8216 				 const struct file_operations *fops)
8217 {
8218 	struct dentry *ret;
8219 
8220 	ret = tracefs_create_file(name, mode, parent, data, fops);
8221 	if (!ret)
8222 		pr_warn("Could not create tracefs '%s' entry\n", name);
8223 
8224 	return ret;
8225 }
8226 
8227 
8228 static struct dentry *trace_options_init_dentry(struct trace_array *tr)
8229 {
8230 	struct dentry *d_tracer;
8231 
8232 	if (tr->options)
8233 		return tr->options;
8234 
8235 	d_tracer = tracing_get_dentry(tr);
8236 	if (IS_ERR(d_tracer))
8237 		return NULL;
8238 
8239 	tr->options = tracefs_create_dir("options", d_tracer);
8240 	if (!tr->options) {
8241 		pr_warn("Could not create tracefs directory 'options'\n");
8242 		return NULL;
8243 	}
8244 
8245 	return tr->options;
8246 }
8247 
8248 static void
8249 create_trace_option_file(struct trace_array *tr,
8250 			 struct trace_option_dentry *topt,
8251 			 struct tracer_flags *flags,
8252 			 struct tracer_opt *opt)
8253 {
8254 	struct dentry *t_options;
8255 
8256 	t_options = trace_options_init_dentry(tr);
8257 	if (!t_options)
8258 		return;
8259 
8260 	topt->flags = flags;
8261 	topt->opt = opt;
8262 	topt->tr = tr;
8263 
8264 	topt->entry = trace_create_file(opt->name, 0644, t_options, topt,
8265 				    &trace_options_fops);
8266 
8267 }
8268 
8269 static void
8270 create_trace_option_files(struct trace_array *tr, struct tracer *tracer)
8271 {
8272 	struct trace_option_dentry *topts;
8273 	struct trace_options *tr_topts;
8274 	struct tracer_flags *flags;
8275 	struct tracer_opt *opts;
8276 	int cnt;
8277 	int i;
8278 
8279 	if (!tracer)
8280 		return;
8281 
8282 	flags = tracer->flags;
8283 
8284 	if (!flags || !flags->opts)
8285 		return;
8286 
8287 	/*
8288 	 * If this is an instance, only create flags for tracers
8289 	 * the instance may have.
8290 	 */
8291 	if (!trace_ok_for_array(tracer, tr))
8292 		return;
8293 
8294 	for (i = 0; i < tr->nr_topts; i++) {
8295 		/* Make sure there's no duplicate flags. */
8296 		if (WARN_ON_ONCE(tr->topts[i].tracer->flags == tracer->flags))
8297 			return;
8298 	}
8299 
8300 	opts = flags->opts;
8301 
8302 	for (cnt = 0; opts[cnt].name; cnt++)
8303 		;
8304 
8305 	topts = kcalloc(cnt + 1, sizeof(*topts), GFP_KERNEL);
8306 	if (!topts)
8307 		return;
8308 
8309 	tr_topts = krealloc(tr->topts, sizeof(*tr->topts) * (tr->nr_topts + 1),
8310 			    GFP_KERNEL);
8311 	if (!tr_topts) {
8312 		kfree(topts);
8313 		return;
8314 	}
8315 
8316 	tr->topts = tr_topts;
8317 	tr->topts[tr->nr_topts].tracer = tracer;
8318 	tr->topts[tr->nr_topts].topts = topts;
8319 	tr->nr_topts++;
8320 
8321 	for (cnt = 0; opts[cnt].name; cnt++) {
8322 		create_trace_option_file(tr, &topts[cnt], flags,
8323 					 &opts[cnt]);
8324 		MEM_FAIL(topts[cnt].entry == NULL,
8325 			  "Failed to create trace option: %s",
8326 			  opts[cnt].name);
8327 	}
8328 }
8329 
8330 static struct dentry *
8331 create_trace_option_core_file(struct trace_array *tr,
8332 			      const char *option, long index)
8333 {
8334 	struct dentry *t_options;
8335 
8336 	t_options = trace_options_init_dentry(tr);
8337 	if (!t_options)
8338 		return NULL;
8339 
8340 	return trace_create_file(option, 0644, t_options,
8341 				 (void *)&tr->trace_flags_index[index],
8342 				 &trace_options_core_fops);
8343 }
8344 
8345 static void create_trace_options_dir(struct trace_array *tr)
8346 {
8347 	struct dentry *t_options;
8348 	bool top_level = tr == &global_trace;
8349 	int i;
8350 
8351 	t_options = trace_options_init_dentry(tr);
8352 	if (!t_options)
8353 		return;
8354 
8355 	for (i = 0; trace_options[i]; i++) {
8356 		if (top_level ||
8357 		    !((1 << i) & TOP_LEVEL_TRACE_FLAGS))
8358 			create_trace_option_core_file(tr, trace_options[i], i);
8359 	}
8360 }
8361 
8362 static ssize_t
8363 rb_simple_read(struct file *filp, char __user *ubuf,
8364 	       size_t cnt, loff_t *ppos)
8365 {
8366 	struct trace_array *tr = filp->private_data;
8367 	char buf[64];
8368 	int r;
8369 
8370 	r = tracer_tracing_is_on(tr);
8371 	r = sprintf(buf, "%d\n", r);
8372 
8373 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
8374 }
8375 
8376 static ssize_t
8377 rb_simple_write(struct file *filp, const char __user *ubuf,
8378 		size_t cnt, loff_t *ppos)
8379 {
8380 	struct trace_array *tr = filp->private_data;
8381 	struct trace_buffer *buffer = tr->array_buffer.buffer;
8382 	unsigned long val;
8383 	int ret;
8384 
8385 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
8386 	if (ret)
8387 		return ret;
8388 
8389 	if (buffer) {
8390 		mutex_lock(&trace_types_lock);
8391 		if (!!val == tracer_tracing_is_on(tr)) {
8392 			val = 0; /* do nothing */
8393 		} else if (val) {
8394 			tracer_tracing_on(tr);
8395 			if (tr->current_trace->start)
8396 				tr->current_trace->start(tr);
8397 		} else {
8398 			tracer_tracing_off(tr);
8399 			if (tr->current_trace->stop)
8400 				tr->current_trace->stop(tr);
8401 		}
8402 		mutex_unlock(&trace_types_lock);
8403 	}
8404 
8405 	(*ppos)++;
8406 
8407 	return cnt;
8408 }
8409 
8410 static const struct file_operations rb_simple_fops = {
8411 	.open		= tracing_open_generic_tr,
8412 	.read		= rb_simple_read,
8413 	.write		= rb_simple_write,
8414 	.release	= tracing_release_generic_tr,
8415 	.llseek		= default_llseek,
8416 };
8417 
8418 static ssize_t
8419 buffer_percent_read(struct file *filp, char __user *ubuf,
8420 		    size_t cnt, loff_t *ppos)
8421 {
8422 	struct trace_array *tr = filp->private_data;
8423 	char buf[64];
8424 	int r;
8425 
8426 	r = tr->buffer_percent;
8427 	r = sprintf(buf, "%d\n", r);
8428 
8429 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
8430 }
8431 
8432 static ssize_t
8433 buffer_percent_write(struct file *filp, const char __user *ubuf,
8434 		     size_t cnt, loff_t *ppos)
8435 {
8436 	struct trace_array *tr = filp->private_data;
8437 	unsigned long val;
8438 	int ret;
8439 
8440 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
8441 	if (ret)
8442 		return ret;
8443 
8444 	if (val > 100)
8445 		return -EINVAL;
8446 
8447 	if (!val)
8448 		val = 1;
8449 
8450 	tr->buffer_percent = val;
8451 
8452 	(*ppos)++;
8453 
8454 	return cnt;
8455 }
8456 
8457 static const struct file_operations buffer_percent_fops = {
8458 	.open		= tracing_open_generic_tr,
8459 	.read		= buffer_percent_read,
8460 	.write		= buffer_percent_write,
8461 	.release	= tracing_release_generic_tr,
8462 	.llseek		= default_llseek,
8463 };
8464 
8465 static struct dentry *trace_instance_dir;
8466 
8467 static void
8468 init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer);
8469 
8470 static int
8471 allocate_trace_buffer(struct trace_array *tr, struct array_buffer *buf, int size)
8472 {
8473 	enum ring_buffer_flags rb_flags;
8474 
8475 	rb_flags = tr->trace_flags & TRACE_ITER_OVERWRITE ? RB_FL_OVERWRITE : 0;
8476 
8477 	buf->tr = tr;
8478 
8479 	buf->buffer = ring_buffer_alloc(size, rb_flags);
8480 	if (!buf->buffer)
8481 		return -ENOMEM;
8482 
8483 	buf->data = alloc_percpu(struct trace_array_cpu);
8484 	if (!buf->data) {
8485 		ring_buffer_free(buf->buffer);
8486 		buf->buffer = NULL;
8487 		return -ENOMEM;
8488 	}
8489 
8490 	/* Allocate the first page for all buffers */
8491 	set_buffer_entries(&tr->array_buffer,
8492 			   ring_buffer_size(tr->array_buffer.buffer, 0));
8493 
8494 	return 0;
8495 }
8496 
8497 static int allocate_trace_buffers(struct trace_array *tr, int size)
8498 {
8499 	int ret;
8500 
8501 	ret = allocate_trace_buffer(tr, &tr->array_buffer, size);
8502 	if (ret)
8503 		return ret;
8504 
8505 #ifdef CONFIG_TRACER_MAX_TRACE
8506 	ret = allocate_trace_buffer(tr, &tr->max_buffer,
8507 				    allocate_snapshot ? size : 1);
8508 	if (MEM_FAIL(ret, "Failed to allocate trace buffer\n")) {
8509 		ring_buffer_free(tr->array_buffer.buffer);
8510 		tr->array_buffer.buffer = NULL;
8511 		free_percpu(tr->array_buffer.data);
8512 		tr->array_buffer.data = NULL;
8513 		return -ENOMEM;
8514 	}
8515 	tr->allocated_snapshot = allocate_snapshot;
8516 
8517 	/*
8518 	 * Only the top level trace array gets its snapshot allocated
8519 	 * from the kernel command line.
8520 	 */
8521 	allocate_snapshot = false;
8522 #endif
8523 
8524 	return 0;
8525 }
8526 
8527 static void free_trace_buffer(struct array_buffer *buf)
8528 {
8529 	if (buf->buffer) {
8530 		ring_buffer_free(buf->buffer);
8531 		buf->buffer = NULL;
8532 		free_percpu(buf->data);
8533 		buf->data = NULL;
8534 	}
8535 }
8536 
8537 static void free_trace_buffers(struct trace_array *tr)
8538 {
8539 	if (!tr)
8540 		return;
8541 
8542 	free_trace_buffer(&tr->array_buffer);
8543 
8544 #ifdef CONFIG_TRACER_MAX_TRACE
8545 	free_trace_buffer(&tr->max_buffer);
8546 #endif
8547 }
8548 
8549 static void init_trace_flags_index(struct trace_array *tr)
8550 {
8551 	int i;
8552 
8553 	/* Used by the trace options files */
8554 	for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++)
8555 		tr->trace_flags_index[i] = i;
8556 }
8557 
8558 static void __update_tracer_options(struct trace_array *tr)
8559 {
8560 	struct tracer *t;
8561 
8562 	for (t = trace_types; t; t = t->next)
8563 		add_tracer_options(tr, t);
8564 }
8565 
8566 static void update_tracer_options(struct trace_array *tr)
8567 {
8568 	mutex_lock(&trace_types_lock);
8569 	__update_tracer_options(tr);
8570 	mutex_unlock(&trace_types_lock);
8571 }
8572 
8573 /* Must have trace_types_lock held */
8574 struct trace_array *trace_array_find(const char *instance)
8575 {
8576 	struct trace_array *tr, *found = NULL;
8577 
8578 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
8579 		if (tr->name && strcmp(tr->name, instance) == 0) {
8580 			found = tr;
8581 			break;
8582 		}
8583 	}
8584 
8585 	return found;
8586 }
8587 
8588 struct trace_array *trace_array_find_get(const char *instance)
8589 {
8590 	struct trace_array *tr;
8591 
8592 	mutex_lock(&trace_types_lock);
8593 	tr = trace_array_find(instance);
8594 	if (tr)
8595 		tr->ref++;
8596 	mutex_unlock(&trace_types_lock);
8597 
8598 	return tr;
8599 }
8600 
8601 static struct trace_array *trace_array_create(const char *name)
8602 {
8603 	struct trace_array *tr;
8604 	int ret;
8605 
8606 	ret = -ENOMEM;
8607 	tr = kzalloc(sizeof(*tr), GFP_KERNEL);
8608 	if (!tr)
8609 		return ERR_PTR(ret);
8610 
8611 	tr->name = kstrdup(name, GFP_KERNEL);
8612 	if (!tr->name)
8613 		goto out_free_tr;
8614 
8615 	if (!alloc_cpumask_var(&tr->tracing_cpumask, GFP_KERNEL))
8616 		goto out_free_tr;
8617 
8618 	tr->trace_flags = global_trace.trace_flags & ~ZEROED_TRACE_FLAGS;
8619 
8620 	cpumask_copy(tr->tracing_cpumask, cpu_all_mask);
8621 
8622 	raw_spin_lock_init(&tr->start_lock);
8623 
8624 	tr->max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
8625 
8626 	tr->current_trace = &nop_trace;
8627 
8628 	INIT_LIST_HEAD(&tr->systems);
8629 	INIT_LIST_HEAD(&tr->events);
8630 	INIT_LIST_HEAD(&tr->hist_vars);
8631 	INIT_LIST_HEAD(&tr->err_log);
8632 
8633 	if (allocate_trace_buffers(tr, trace_buf_size) < 0)
8634 		goto out_free_tr;
8635 
8636 	tr->dir = tracefs_create_dir(name, trace_instance_dir);
8637 	if (!tr->dir)
8638 		goto out_free_tr;
8639 
8640 	ret = event_trace_add_tracer(tr->dir, tr);
8641 	if (ret) {
8642 		tracefs_remove(tr->dir);
8643 		goto out_free_tr;
8644 	}
8645 
8646 	ftrace_init_trace_array(tr);
8647 
8648 	init_tracer_tracefs(tr, tr->dir);
8649 	init_trace_flags_index(tr);
8650 	__update_tracer_options(tr);
8651 
8652 	list_add(&tr->list, &ftrace_trace_arrays);
8653 
8654 	tr->ref++;
8655 
8656 
8657 	return tr;
8658 
8659  out_free_tr:
8660 	free_trace_buffers(tr);
8661 	free_cpumask_var(tr->tracing_cpumask);
8662 	kfree(tr->name);
8663 	kfree(tr);
8664 
8665 	return ERR_PTR(ret);
8666 }
8667 
8668 static int instance_mkdir(const char *name)
8669 {
8670 	struct trace_array *tr;
8671 	int ret;
8672 
8673 	mutex_lock(&event_mutex);
8674 	mutex_lock(&trace_types_lock);
8675 
8676 	ret = -EEXIST;
8677 	if (trace_array_find(name))
8678 		goto out_unlock;
8679 
8680 	tr = trace_array_create(name);
8681 
8682 	ret = PTR_ERR_OR_ZERO(tr);
8683 
8684 out_unlock:
8685 	mutex_unlock(&trace_types_lock);
8686 	mutex_unlock(&event_mutex);
8687 	return ret;
8688 }
8689 
8690 /**
8691  * trace_array_get_by_name - Create/Lookup a trace array, given its name.
8692  * @name: The name of the trace array to be looked up/created.
8693  *
8694  * Returns pointer to trace array with given name.
8695  * NULL, if it cannot be created.
8696  *
8697  * NOTE: This function increments the reference counter associated with the
8698  * trace array returned. This makes sure it cannot be freed while in use.
8699  * Use trace_array_put() once the trace array is no longer needed.
8700  * If the trace_array is to be freed, trace_array_destroy() needs to
8701  * be called after the trace_array_put(), or simply let user space delete
8702  * it from the tracefs instances directory. But until the
8703  * trace_array_put() is called, user space can not delete it.
8704  *
8705  */
8706 struct trace_array *trace_array_get_by_name(const char *name)
8707 {
8708 	struct trace_array *tr;
8709 
8710 	mutex_lock(&event_mutex);
8711 	mutex_lock(&trace_types_lock);
8712 
8713 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
8714 		if (tr->name && strcmp(tr->name, name) == 0)
8715 			goto out_unlock;
8716 	}
8717 
8718 	tr = trace_array_create(name);
8719 
8720 	if (IS_ERR(tr))
8721 		tr = NULL;
8722 out_unlock:
8723 	if (tr)
8724 		tr->ref++;
8725 
8726 	mutex_unlock(&trace_types_lock);
8727 	mutex_unlock(&event_mutex);
8728 	return tr;
8729 }
8730 EXPORT_SYMBOL_GPL(trace_array_get_by_name);
8731 
8732 static int __remove_instance(struct trace_array *tr)
8733 {
8734 	int i;
8735 
8736 	/* Reference counter for a newly created trace array = 1. */
8737 	if (tr->ref > 1 || (tr->current_trace && tr->current_trace->ref))
8738 		return -EBUSY;
8739 
8740 	list_del(&tr->list);
8741 
8742 	/* Disable all the flags that were enabled coming in */
8743 	for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++) {
8744 		if ((1 << i) & ZEROED_TRACE_FLAGS)
8745 			set_tracer_flag(tr, 1 << i, 0);
8746 	}
8747 
8748 	tracing_set_nop(tr);
8749 	clear_ftrace_function_probes(tr);
8750 	event_trace_del_tracer(tr);
8751 	ftrace_clear_pids(tr);
8752 	ftrace_destroy_function_files(tr);
8753 	tracefs_remove(tr->dir);
8754 	free_trace_buffers(tr);
8755 
8756 	for (i = 0; i < tr->nr_topts; i++) {
8757 		kfree(tr->topts[i].topts);
8758 	}
8759 	kfree(tr->topts);
8760 
8761 	free_cpumask_var(tr->tracing_cpumask);
8762 	kfree(tr->name);
8763 	kfree(tr);
8764 	tr = NULL;
8765 
8766 	return 0;
8767 }
8768 
8769 int trace_array_destroy(struct trace_array *this_tr)
8770 {
8771 	struct trace_array *tr;
8772 	int ret;
8773 
8774 	if (!this_tr)
8775 		return -EINVAL;
8776 
8777 	mutex_lock(&event_mutex);
8778 	mutex_lock(&trace_types_lock);
8779 
8780 	ret = -ENODEV;
8781 
8782 	/* Making sure trace array exists before destroying it. */
8783 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
8784 		if (tr == this_tr) {
8785 			ret = __remove_instance(tr);
8786 			break;
8787 		}
8788 	}
8789 
8790 	mutex_unlock(&trace_types_lock);
8791 	mutex_unlock(&event_mutex);
8792 
8793 	return ret;
8794 }
8795 EXPORT_SYMBOL_GPL(trace_array_destroy);
8796 
8797 static int instance_rmdir(const char *name)
8798 {
8799 	struct trace_array *tr;
8800 	int ret;
8801 
8802 	mutex_lock(&event_mutex);
8803 	mutex_lock(&trace_types_lock);
8804 
8805 	ret = -ENODEV;
8806 	tr = trace_array_find(name);
8807 	if (tr)
8808 		ret = __remove_instance(tr);
8809 
8810 	mutex_unlock(&trace_types_lock);
8811 	mutex_unlock(&event_mutex);
8812 
8813 	return ret;
8814 }
8815 
8816 static __init void create_trace_instances(struct dentry *d_tracer)
8817 {
8818 	trace_instance_dir = tracefs_create_instance_dir("instances", d_tracer,
8819 							 instance_mkdir,
8820 							 instance_rmdir);
8821 	if (MEM_FAIL(!trace_instance_dir, "Failed to create instances directory\n"))
8822 		return;
8823 }
8824 
8825 static void
8826 init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer)
8827 {
8828 	struct trace_event_file *file;
8829 	int cpu;
8830 
8831 	trace_create_file("available_tracers", 0444, d_tracer,
8832 			tr, &show_traces_fops);
8833 
8834 	trace_create_file("current_tracer", 0644, d_tracer,
8835 			tr, &set_tracer_fops);
8836 
8837 	trace_create_file("tracing_cpumask", 0644, d_tracer,
8838 			  tr, &tracing_cpumask_fops);
8839 
8840 	trace_create_file("trace_options", 0644, d_tracer,
8841 			  tr, &tracing_iter_fops);
8842 
8843 	trace_create_file("trace", 0644, d_tracer,
8844 			  tr, &tracing_fops);
8845 
8846 	trace_create_file("trace_pipe", 0444, d_tracer,
8847 			  tr, &tracing_pipe_fops);
8848 
8849 	trace_create_file("buffer_size_kb", 0644, d_tracer,
8850 			  tr, &tracing_entries_fops);
8851 
8852 	trace_create_file("buffer_total_size_kb", 0444, d_tracer,
8853 			  tr, &tracing_total_entries_fops);
8854 
8855 	trace_create_file("free_buffer", 0200, d_tracer,
8856 			  tr, &tracing_free_buffer_fops);
8857 
8858 	trace_create_file("trace_marker", 0220, d_tracer,
8859 			  tr, &tracing_mark_fops);
8860 
8861 	file = __find_event_file(tr, "ftrace", "print");
8862 	if (file && file->dir)
8863 		trace_create_file("trigger", 0644, file->dir, file,
8864 				  &event_trigger_fops);
8865 	tr->trace_marker_file = file;
8866 
8867 	trace_create_file("trace_marker_raw", 0220, d_tracer,
8868 			  tr, &tracing_mark_raw_fops);
8869 
8870 	trace_create_file("trace_clock", 0644, d_tracer, tr,
8871 			  &trace_clock_fops);
8872 
8873 	trace_create_file("tracing_on", 0644, d_tracer,
8874 			  tr, &rb_simple_fops);
8875 
8876 	trace_create_file("timestamp_mode", 0444, d_tracer, tr,
8877 			  &trace_time_stamp_mode_fops);
8878 
8879 	tr->buffer_percent = 50;
8880 
8881 	trace_create_file("buffer_percent", 0444, d_tracer,
8882 			tr, &buffer_percent_fops);
8883 
8884 	create_trace_options_dir(tr);
8885 
8886 #if defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)
8887 	trace_create_maxlat_file(tr, d_tracer);
8888 #endif
8889 
8890 	if (ftrace_create_function_files(tr, d_tracer))
8891 		MEM_FAIL(1, "Could not allocate function filter files");
8892 
8893 #ifdef CONFIG_TRACER_SNAPSHOT
8894 	trace_create_file("snapshot", 0644, d_tracer,
8895 			  tr, &snapshot_fops);
8896 #endif
8897 
8898 	trace_create_file("error_log", 0644, d_tracer,
8899 			  tr, &tracing_err_log_fops);
8900 
8901 	for_each_tracing_cpu(cpu)
8902 		tracing_init_tracefs_percpu(tr, cpu);
8903 
8904 	ftrace_init_tracefs(tr, d_tracer);
8905 }
8906 
8907 static struct vfsmount *trace_automount(struct dentry *mntpt, void *ingore)
8908 {
8909 	struct vfsmount *mnt;
8910 	struct file_system_type *type;
8911 
8912 	/*
8913 	 * To maintain backward compatibility for tools that mount
8914 	 * debugfs to get to the tracing facility, tracefs is automatically
8915 	 * mounted to the debugfs/tracing directory.
8916 	 */
8917 	type = get_fs_type("tracefs");
8918 	if (!type)
8919 		return NULL;
8920 	mnt = vfs_submount(mntpt, type, "tracefs", NULL);
8921 	put_filesystem(type);
8922 	if (IS_ERR(mnt))
8923 		return NULL;
8924 	mntget(mnt);
8925 
8926 	return mnt;
8927 }
8928 
8929 /**
8930  * tracing_init_dentry - initialize top level trace array
8931  *
8932  * This is called when creating files or directories in the tracing
8933  * directory. It is called via fs_initcall() by any of the boot up code
8934  * and expects to return the dentry of the top level tracing directory.
8935  */
8936 struct dentry *tracing_init_dentry(void)
8937 {
8938 	struct trace_array *tr = &global_trace;
8939 
8940 	if (security_locked_down(LOCKDOWN_TRACEFS)) {
8941 		pr_warn("Tracing disabled due to lockdown\n");
8942 		return ERR_PTR(-EPERM);
8943 	}
8944 
8945 	/* The top level trace array uses  NULL as parent */
8946 	if (tr->dir)
8947 		return NULL;
8948 
8949 	if (WARN_ON(!tracefs_initialized()) ||
8950 		(IS_ENABLED(CONFIG_DEBUG_FS) &&
8951 		 WARN_ON(!debugfs_initialized())))
8952 		return ERR_PTR(-ENODEV);
8953 
8954 	/*
8955 	 * As there may still be users that expect the tracing
8956 	 * files to exist in debugfs/tracing, we must automount
8957 	 * the tracefs file system there, so older tools still
8958 	 * work with the newer kerenl.
8959 	 */
8960 	tr->dir = debugfs_create_automount("tracing", NULL,
8961 					   trace_automount, NULL);
8962 
8963 	return NULL;
8964 }
8965 
8966 extern struct trace_eval_map *__start_ftrace_eval_maps[];
8967 extern struct trace_eval_map *__stop_ftrace_eval_maps[];
8968 
8969 static void __init trace_eval_init(void)
8970 {
8971 	int len;
8972 
8973 	len = __stop_ftrace_eval_maps - __start_ftrace_eval_maps;
8974 	trace_insert_eval_map(NULL, __start_ftrace_eval_maps, len);
8975 }
8976 
8977 #ifdef CONFIG_MODULES
8978 static void trace_module_add_evals(struct module *mod)
8979 {
8980 	if (!mod->num_trace_evals)
8981 		return;
8982 
8983 	/*
8984 	 * Modules with bad taint do not have events created, do
8985 	 * not bother with enums either.
8986 	 */
8987 	if (trace_module_has_bad_taint(mod))
8988 		return;
8989 
8990 	trace_insert_eval_map(mod, mod->trace_evals, mod->num_trace_evals);
8991 }
8992 
8993 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
8994 static void trace_module_remove_evals(struct module *mod)
8995 {
8996 	union trace_eval_map_item *map;
8997 	union trace_eval_map_item **last = &trace_eval_maps;
8998 
8999 	if (!mod->num_trace_evals)
9000 		return;
9001 
9002 	mutex_lock(&trace_eval_mutex);
9003 
9004 	map = trace_eval_maps;
9005 
9006 	while (map) {
9007 		if (map->head.mod == mod)
9008 			break;
9009 		map = trace_eval_jmp_to_tail(map);
9010 		last = &map->tail.next;
9011 		map = map->tail.next;
9012 	}
9013 	if (!map)
9014 		goto out;
9015 
9016 	*last = trace_eval_jmp_to_tail(map)->tail.next;
9017 	kfree(map);
9018  out:
9019 	mutex_unlock(&trace_eval_mutex);
9020 }
9021 #else
9022 static inline void trace_module_remove_evals(struct module *mod) { }
9023 #endif /* CONFIG_TRACE_EVAL_MAP_FILE */
9024 
9025 static int trace_module_notify(struct notifier_block *self,
9026 			       unsigned long val, void *data)
9027 {
9028 	struct module *mod = data;
9029 
9030 	switch (val) {
9031 	case MODULE_STATE_COMING:
9032 		trace_module_add_evals(mod);
9033 		break;
9034 	case MODULE_STATE_GOING:
9035 		trace_module_remove_evals(mod);
9036 		break;
9037 	}
9038 
9039 	return 0;
9040 }
9041 
9042 static struct notifier_block trace_module_nb = {
9043 	.notifier_call = trace_module_notify,
9044 	.priority = 0,
9045 };
9046 #endif /* CONFIG_MODULES */
9047 
9048 static __init int tracer_init_tracefs(void)
9049 {
9050 	struct dentry *d_tracer;
9051 
9052 	trace_access_lock_init();
9053 
9054 	d_tracer = tracing_init_dentry();
9055 	if (IS_ERR(d_tracer))
9056 		return 0;
9057 
9058 	event_trace_init();
9059 
9060 	init_tracer_tracefs(&global_trace, d_tracer);
9061 	ftrace_init_tracefs_toplevel(&global_trace, d_tracer);
9062 
9063 	trace_create_file("tracing_thresh", 0644, d_tracer,
9064 			&global_trace, &tracing_thresh_fops);
9065 
9066 	trace_create_file("README", 0444, d_tracer,
9067 			NULL, &tracing_readme_fops);
9068 
9069 	trace_create_file("saved_cmdlines", 0444, d_tracer,
9070 			NULL, &tracing_saved_cmdlines_fops);
9071 
9072 	trace_create_file("saved_cmdlines_size", 0644, d_tracer,
9073 			  NULL, &tracing_saved_cmdlines_size_fops);
9074 
9075 	trace_create_file("saved_tgids", 0444, d_tracer,
9076 			NULL, &tracing_saved_tgids_fops);
9077 
9078 	trace_eval_init();
9079 
9080 	trace_create_eval_file(d_tracer);
9081 
9082 #ifdef CONFIG_MODULES
9083 	register_module_notifier(&trace_module_nb);
9084 #endif
9085 
9086 #ifdef CONFIG_DYNAMIC_FTRACE
9087 	trace_create_file("dyn_ftrace_total_info", 0444, d_tracer,
9088 			NULL, &tracing_dyn_info_fops);
9089 #endif
9090 
9091 	create_trace_instances(d_tracer);
9092 
9093 	update_tracer_options(&global_trace);
9094 
9095 	return 0;
9096 }
9097 
9098 static int trace_panic_handler(struct notifier_block *this,
9099 			       unsigned long event, void *unused)
9100 {
9101 	if (ftrace_dump_on_oops)
9102 		ftrace_dump(ftrace_dump_on_oops);
9103 	return NOTIFY_OK;
9104 }
9105 
9106 static struct notifier_block trace_panic_notifier = {
9107 	.notifier_call  = trace_panic_handler,
9108 	.next           = NULL,
9109 	.priority       = 150   /* priority: INT_MAX >= x >= 0 */
9110 };
9111 
9112 static int trace_die_handler(struct notifier_block *self,
9113 			     unsigned long val,
9114 			     void *data)
9115 {
9116 	switch (val) {
9117 	case DIE_OOPS:
9118 		if (ftrace_dump_on_oops)
9119 			ftrace_dump(ftrace_dump_on_oops);
9120 		break;
9121 	default:
9122 		break;
9123 	}
9124 	return NOTIFY_OK;
9125 }
9126 
9127 static struct notifier_block trace_die_notifier = {
9128 	.notifier_call = trace_die_handler,
9129 	.priority = 200
9130 };
9131 
9132 /*
9133  * printk is set to max of 1024, we really don't need it that big.
9134  * Nothing should be printing 1000 characters anyway.
9135  */
9136 #define TRACE_MAX_PRINT		1000
9137 
9138 /*
9139  * Define here KERN_TRACE so that we have one place to modify
9140  * it if we decide to change what log level the ftrace dump
9141  * should be at.
9142  */
9143 #define KERN_TRACE		KERN_EMERG
9144 
9145 void
9146 trace_printk_seq(struct trace_seq *s)
9147 {
9148 	/* Probably should print a warning here. */
9149 	if (s->seq.len >= TRACE_MAX_PRINT)
9150 		s->seq.len = TRACE_MAX_PRINT;
9151 
9152 	/*
9153 	 * More paranoid code. Although the buffer size is set to
9154 	 * PAGE_SIZE, and TRACE_MAX_PRINT is 1000, this is just
9155 	 * an extra layer of protection.
9156 	 */
9157 	if (WARN_ON_ONCE(s->seq.len >= s->seq.size))
9158 		s->seq.len = s->seq.size - 1;
9159 
9160 	/* should be zero ended, but we are paranoid. */
9161 	s->buffer[s->seq.len] = 0;
9162 
9163 	printk(KERN_TRACE "%s", s->buffer);
9164 
9165 	trace_seq_init(s);
9166 }
9167 
9168 void trace_init_global_iter(struct trace_iterator *iter)
9169 {
9170 	iter->tr = &global_trace;
9171 	iter->trace = iter->tr->current_trace;
9172 	iter->cpu_file = RING_BUFFER_ALL_CPUS;
9173 	iter->array_buffer = &global_trace.array_buffer;
9174 
9175 	if (iter->trace && iter->trace->open)
9176 		iter->trace->open(iter);
9177 
9178 	/* Annotate start of buffers if we had overruns */
9179 	if (ring_buffer_overruns(iter->array_buffer->buffer))
9180 		iter->iter_flags |= TRACE_FILE_ANNOTATE;
9181 
9182 	/* Output in nanoseconds only if we are using a clock in nanoseconds. */
9183 	if (trace_clocks[iter->tr->clock_id].in_ns)
9184 		iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
9185 }
9186 
9187 void ftrace_dump(enum ftrace_dump_mode oops_dump_mode)
9188 {
9189 	/* use static because iter can be a bit big for the stack */
9190 	static struct trace_iterator iter;
9191 	static atomic_t dump_running;
9192 	struct trace_array *tr = &global_trace;
9193 	unsigned int old_userobj;
9194 	unsigned long flags;
9195 	int cnt = 0, cpu;
9196 
9197 	/* Only allow one dump user at a time. */
9198 	if (atomic_inc_return(&dump_running) != 1) {
9199 		atomic_dec(&dump_running);
9200 		return;
9201 	}
9202 
9203 	/*
9204 	 * Always turn off tracing when we dump.
9205 	 * We don't need to show trace output of what happens
9206 	 * between multiple crashes.
9207 	 *
9208 	 * If the user does a sysrq-z, then they can re-enable
9209 	 * tracing with echo 1 > tracing_on.
9210 	 */
9211 	tracing_off();
9212 
9213 	local_irq_save(flags);
9214 	printk_nmi_direct_enter();
9215 
9216 	/* Simulate the iterator */
9217 	trace_init_global_iter(&iter);
9218 	/* Can not use kmalloc for iter.temp */
9219 	iter.temp = static_temp_buf;
9220 	iter.temp_size = STATIC_TEMP_BUF_SIZE;
9221 
9222 	for_each_tracing_cpu(cpu) {
9223 		atomic_inc(&per_cpu_ptr(iter.array_buffer->data, cpu)->disabled);
9224 	}
9225 
9226 	old_userobj = tr->trace_flags & TRACE_ITER_SYM_USEROBJ;
9227 
9228 	/* don't look at user memory in panic mode */
9229 	tr->trace_flags &= ~TRACE_ITER_SYM_USEROBJ;
9230 
9231 	switch (oops_dump_mode) {
9232 	case DUMP_ALL:
9233 		iter.cpu_file = RING_BUFFER_ALL_CPUS;
9234 		break;
9235 	case DUMP_ORIG:
9236 		iter.cpu_file = raw_smp_processor_id();
9237 		break;
9238 	case DUMP_NONE:
9239 		goto out_enable;
9240 	default:
9241 		printk(KERN_TRACE "Bad dumping mode, switching to all CPUs dump\n");
9242 		iter.cpu_file = RING_BUFFER_ALL_CPUS;
9243 	}
9244 
9245 	printk(KERN_TRACE "Dumping ftrace buffer:\n");
9246 
9247 	/* Did function tracer already get disabled? */
9248 	if (ftrace_is_dead()) {
9249 		printk("# WARNING: FUNCTION TRACING IS CORRUPTED\n");
9250 		printk("#          MAY BE MISSING FUNCTION EVENTS\n");
9251 	}
9252 
9253 	/*
9254 	 * We need to stop all tracing on all CPUS to read the
9255 	 * the next buffer. This is a bit expensive, but is
9256 	 * not done often. We fill all what we can read,
9257 	 * and then release the locks again.
9258 	 */
9259 
9260 	while (!trace_empty(&iter)) {
9261 
9262 		if (!cnt)
9263 			printk(KERN_TRACE "---------------------------------\n");
9264 
9265 		cnt++;
9266 
9267 		trace_iterator_reset(&iter);
9268 		iter.iter_flags |= TRACE_FILE_LAT_FMT;
9269 
9270 		if (trace_find_next_entry_inc(&iter) != NULL) {
9271 			int ret;
9272 
9273 			ret = print_trace_line(&iter);
9274 			if (ret != TRACE_TYPE_NO_CONSUME)
9275 				trace_consume(&iter);
9276 		}
9277 		touch_nmi_watchdog();
9278 
9279 		trace_printk_seq(&iter.seq);
9280 	}
9281 
9282 	if (!cnt)
9283 		printk(KERN_TRACE "   (ftrace buffer empty)\n");
9284 	else
9285 		printk(KERN_TRACE "---------------------------------\n");
9286 
9287  out_enable:
9288 	tr->trace_flags |= old_userobj;
9289 
9290 	for_each_tracing_cpu(cpu) {
9291 		atomic_dec(&per_cpu_ptr(iter.array_buffer->data, cpu)->disabled);
9292 	}
9293 	atomic_dec(&dump_running);
9294 	printk_nmi_direct_exit();
9295 	local_irq_restore(flags);
9296 }
9297 EXPORT_SYMBOL_GPL(ftrace_dump);
9298 
9299 int trace_run_command(const char *buf, int (*createfn)(int, char **))
9300 {
9301 	char **argv;
9302 	int argc, ret;
9303 
9304 	argc = 0;
9305 	ret = 0;
9306 	argv = argv_split(GFP_KERNEL, buf, &argc);
9307 	if (!argv)
9308 		return -ENOMEM;
9309 
9310 	if (argc)
9311 		ret = createfn(argc, argv);
9312 
9313 	argv_free(argv);
9314 
9315 	return ret;
9316 }
9317 
9318 #define WRITE_BUFSIZE  4096
9319 
9320 ssize_t trace_parse_run_command(struct file *file, const char __user *buffer,
9321 				size_t count, loff_t *ppos,
9322 				int (*createfn)(int, char **))
9323 {
9324 	char *kbuf, *buf, *tmp;
9325 	int ret = 0;
9326 	size_t done = 0;
9327 	size_t size;
9328 
9329 	kbuf = kmalloc(WRITE_BUFSIZE, GFP_KERNEL);
9330 	if (!kbuf)
9331 		return -ENOMEM;
9332 
9333 	while (done < count) {
9334 		size = count - done;
9335 
9336 		if (size >= WRITE_BUFSIZE)
9337 			size = WRITE_BUFSIZE - 1;
9338 
9339 		if (copy_from_user(kbuf, buffer + done, size)) {
9340 			ret = -EFAULT;
9341 			goto out;
9342 		}
9343 		kbuf[size] = '\0';
9344 		buf = kbuf;
9345 		do {
9346 			tmp = strchr(buf, '\n');
9347 			if (tmp) {
9348 				*tmp = '\0';
9349 				size = tmp - buf + 1;
9350 			} else {
9351 				size = strlen(buf);
9352 				if (done + size < count) {
9353 					if (buf != kbuf)
9354 						break;
9355 					/* This can accept WRITE_BUFSIZE - 2 ('\n' + '\0') */
9356 					pr_warn("Line length is too long: Should be less than %d\n",
9357 						WRITE_BUFSIZE - 2);
9358 					ret = -EINVAL;
9359 					goto out;
9360 				}
9361 			}
9362 			done += size;
9363 
9364 			/* Remove comments */
9365 			tmp = strchr(buf, '#');
9366 
9367 			if (tmp)
9368 				*tmp = '\0';
9369 
9370 			ret = trace_run_command(buf, createfn);
9371 			if (ret)
9372 				goto out;
9373 			buf += size;
9374 
9375 		} while (done < count);
9376 	}
9377 	ret = done;
9378 
9379 out:
9380 	kfree(kbuf);
9381 
9382 	return ret;
9383 }
9384 
9385 __init static int tracer_alloc_buffers(void)
9386 {
9387 	int ring_buf_size;
9388 	int ret = -ENOMEM;
9389 
9390 
9391 	if (security_locked_down(LOCKDOWN_TRACEFS)) {
9392 		pr_warn("Tracing disabled due to lockdown\n");
9393 		return -EPERM;
9394 	}
9395 
9396 	/*
9397 	 * Make sure we don't accidently add more trace options
9398 	 * than we have bits for.
9399 	 */
9400 	BUILD_BUG_ON(TRACE_ITER_LAST_BIT > TRACE_FLAGS_MAX_SIZE);
9401 
9402 	if (!alloc_cpumask_var(&tracing_buffer_mask, GFP_KERNEL))
9403 		goto out;
9404 
9405 	if (!alloc_cpumask_var(&global_trace.tracing_cpumask, GFP_KERNEL))
9406 		goto out_free_buffer_mask;
9407 
9408 	/* Only allocate trace_printk buffers if a trace_printk exists */
9409 	if (&__stop___trace_bprintk_fmt != &__start___trace_bprintk_fmt)
9410 		/* Must be called before global_trace.buffer is allocated */
9411 		trace_printk_init_buffers();
9412 
9413 	/* To save memory, keep the ring buffer size to its minimum */
9414 	if (ring_buffer_expanded)
9415 		ring_buf_size = trace_buf_size;
9416 	else
9417 		ring_buf_size = 1;
9418 
9419 	cpumask_copy(tracing_buffer_mask, cpu_possible_mask);
9420 	cpumask_copy(global_trace.tracing_cpumask, cpu_all_mask);
9421 
9422 	raw_spin_lock_init(&global_trace.start_lock);
9423 
9424 	/*
9425 	 * The prepare callbacks allocates some memory for the ring buffer. We
9426 	 * don't free the buffer if the if the CPU goes down. If we were to free
9427 	 * the buffer, then the user would lose any trace that was in the
9428 	 * buffer. The memory will be removed once the "instance" is removed.
9429 	 */
9430 	ret = cpuhp_setup_state_multi(CPUHP_TRACE_RB_PREPARE,
9431 				      "trace/RB:preapre", trace_rb_cpu_prepare,
9432 				      NULL);
9433 	if (ret < 0)
9434 		goto out_free_cpumask;
9435 	/* Used for event triggers */
9436 	ret = -ENOMEM;
9437 	temp_buffer = ring_buffer_alloc(PAGE_SIZE, RB_FL_OVERWRITE);
9438 	if (!temp_buffer)
9439 		goto out_rm_hp_state;
9440 
9441 	if (trace_create_savedcmd() < 0)
9442 		goto out_free_temp_buffer;
9443 
9444 	/* TODO: make the number of buffers hot pluggable with CPUS */
9445 	if (allocate_trace_buffers(&global_trace, ring_buf_size) < 0) {
9446 		MEM_FAIL(1, "tracer: failed to allocate ring buffer!\n");
9447 		goto out_free_savedcmd;
9448 	}
9449 
9450 	if (global_trace.buffer_disabled)
9451 		tracing_off();
9452 
9453 	if (trace_boot_clock) {
9454 		ret = tracing_set_clock(&global_trace, trace_boot_clock);
9455 		if (ret < 0)
9456 			pr_warn("Trace clock %s not defined, going back to default\n",
9457 				trace_boot_clock);
9458 	}
9459 
9460 	/*
9461 	 * register_tracer() might reference current_trace, so it
9462 	 * needs to be set before we register anything. This is
9463 	 * just a bootstrap of current_trace anyway.
9464 	 */
9465 	global_trace.current_trace = &nop_trace;
9466 
9467 	global_trace.max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
9468 
9469 	ftrace_init_global_array_ops(&global_trace);
9470 
9471 	init_trace_flags_index(&global_trace);
9472 
9473 	register_tracer(&nop_trace);
9474 
9475 	/* Function tracing may start here (via kernel command line) */
9476 	init_function_trace();
9477 
9478 	/* All seems OK, enable tracing */
9479 	tracing_disabled = 0;
9480 
9481 	atomic_notifier_chain_register(&panic_notifier_list,
9482 				       &trace_panic_notifier);
9483 
9484 	register_die_notifier(&trace_die_notifier);
9485 
9486 	global_trace.flags = TRACE_ARRAY_FL_GLOBAL;
9487 
9488 	INIT_LIST_HEAD(&global_trace.systems);
9489 	INIT_LIST_HEAD(&global_trace.events);
9490 	INIT_LIST_HEAD(&global_trace.hist_vars);
9491 	INIT_LIST_HEAD(&global_trace.err_log);
9492 	list_add(&global_trace.list, &ftrace_trace_arrays);
9493 
9494 	apply_trace_boot_options();
9495 
9496 	register_snapshot_cmd();
9497 
9498 	return 0;
9499 
9500 out_free_savedcmd:
9501 	free_saved_cmdlines_buffer(savedcmd);
9502 out_free_temp_buffer:
9503 	ring_buffer_free(temp_buffer);
9504 out_rm_hp_state:
9505 	cpuhp_remove_multi_state(CPUHP_TRACE_RB_PREPARE);
9506 out_free_cpumask:
9507 	free_cpumask_var(global_trace.tracing_cpumask);
9508 out_free_buffer_mask:
9509 	free_cpumask_var(tracing_buffer_mask);
9510 out:
9511 	return ret;
9512 }
9513 
9514 void __init early_trace_init(void)
9515 {
9516 	if (tracepoint_printk) {
9517 		tracepoint_print_iter =
9518 			kmalloc(sizeof(*tracepoint_print_iter), GFP_KERNEL);
9519 		if (MEM_FAIL(!tracepoint_print_iter,
9520 			     "Failed to allocate trace iterator\n"))
9521 			tracepoint_printk = 0;
9522 		else
9523 			static_key_enable(&tracepoint_printk_key.key);
9524 	}
9525 	tracer_alloc_buffers();
9526 }
9527 
9528 void __init trace_init(void)
9529 {
9530 	trace_event_init();
9531 }
9532 
9533 __init static int clear_boot_tracer(void)
9534 {
9535 	/*
9536 	 * The default tracer at boot buffer is an init section.
9537 	 * This function is called in lateinit. If we did not
9538 	 * find the boot tracer, then clear it out, to prevent
9539 	 * later registration from accessing the buffer that is
9540 	 * about to be freed.
9541 	 */
9542 	if (!default_bootup_tracer)
9543 		return 0;
9544 
9545 	printk(KERN_INFO "ftrace bootup tracer '%s' not registered.\n",
9546 	       default_bootup_tracer);
9547 	default_bootup_tracer = NULL;
9548 
9549 	return 0;
9550 }
9551 
9552 fs_initcall(tracer_init_tracefs);
9553 late_initcall_sync(clear_boot_tracer);
9554 
9555 #ifdef CONFIG_HAVE_UNSTABLE_SCHED_CLOCK
9556 __init static int tracing_set_default_clock(void)
9557 {
9558 	/* sched_clock_stable() is determined in late_initcall */
9559 	if (!trace_boot_clock && !sched_clock_stable()) {
9560 		if (security_locked_down(LOCKDOWN_TRACEFS)) {
9561 			pr_warn("Can not set tracing clock due to lockdown\n");
9562 			return -EPERM;
9563 		}
9564 
9565 		printk(KERN_WARNING
9566 		       "Unstable clock detected, switching default tracing clock to \"global\"\n"
9567 		       "If you want to keep using the local clock, then add:\n"
9568 		       "  \"trace_clock=local\"\n"
9569 		       "on the kernel command line\n");
9570 		tracing_set_clock(&global_trace, "global");
9571 	}
9572 
9573 	return 0;
9574 }
9575 late_initcall_sync(tracing_set_default_clock);
9576 #endif
9577