xref: /linux/kernel/trace/trace.c (revision 666ed8bfd1de3b091cf32ca03b651757dd86cfff)
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * ring buffer based function tracer
4  *
5  * Copyright (C) 2007-2012 Steven Rostedt <srostedt@redhat.com>
6  * Copyright (C) 2008 Ingo Molnar <mingo@redhat.com>
7  *
8  * Originally taken from the RT patch by:
9  *    Arnaldo Carvalho de Melo <acme@redhat.com>
10  *
11  * Based on code from the latency_tracer, that is:
12  *  Copyright (C) 2004-2006 Ingo Molnar
13  *  Copyright (C) 2004 Nadia Yvette Chambers
14  */
15 #include <linux/ring_buffer.h>
16 #include <generated/utsrelease.h>
17 #include <linux/stacktrace.h>
18 #include <linux/writeback.h>
19 #include <linux/kallsyms.h>
20 #include <linux/security.h>
21 #include <linux/seq_file.h>
22 #include <linux/notifier.h>
23 #include <linux/irqflags.h>
24 #include <linux/debugfs.h>
25 #include <linux/tracefs.h>
26 #include <linux/pagemap.h>
27 #include <linux/hardirq.h>
28 #include <linux/linkage.h>
29 #include <linux/uaccess.h>
30 #include <linux/vmalloc.h>
31 #include <linux/ftrace.h>
32 #include <linux/module.h>
33 #include <linux/percpu.h>
34 #include <linux/splice.h>
35 #include <linux/kdebug.h>
36 #include <linux/string.h>
37 #include <linux/mount.h>
38 #include <linux/rwsem.h>
39 #include <linux/slab.h>
40 #include <linux/ctype.h>
41 #include <linux/init.h>
42 #include <linux/poll.h>
43 #include <linux/nmi.h>
44 #include <linux/fs.h>
45 #include <linux/trace.h>
46 #include <linux/sched/clock.h>
47 #include <linux/sched/rt.h>
48 #include <linux/fsnotify.h>
49 #include <linux/irq_work.h>
50 #include <linux/workqueue.h>
51 
52 #include "trace.h"
53 #include "trace_output.h"
54 
55 /*
56  * On boot up, the ring buffer is set to the minimum size, so that
57  * we do not waste memory on systems that are not using tracing.
58  */
59 bool ring_buffer_expanded;
60 
61 /*
62  * We need to change this state when a selftest is running.
63  * A selftest will lurk into the ring-buffer to count the
64  * entries inserted during the selftest although some concurrent
65  * insertions into the ring-buffer such as trace_printk could occurred
66  * at the same time, giving false positive or negative results.
67  */
68 static bool __read_mostly tracing_selftest_running;
69 
70 /*
71  * If a tracer is running, we do not want to run SELFTEST.
72  */
73 bool __read_mostly tracing_selftest_disabled;
74 
75 /* Pipe tracepoints to printk */
76 struct trace_iterator *tracepoint_print_iter;
77 int tracepoint_printk;
78 static DEFINE_STATIC_KEY_FALSE(tracepoint_printk_key);
79 
80 /* For tracers that don't implement custom flags */
81 static struct tracer_opt dummy_tracer_opt[] = {
82 	{ }
83 };
84 
85 static int
86 dummy_set_flag(struct trace_array *tr, u32 old_flags, u32 bit, int set)
87 {
88 	return 0;
89 }
90 
91 /*
92  * To prevent the comm cache from being overwritten when no
93  * tracing is active, only save the comm when a trace event
94  * occurred.
95  */
96 static DEFINE_PER_CPU(bool, trace_taskinfo_save);
97 
98 /*
99  * Kill all tracing for good (never come back).
100  * It is initialized to 1 but will turn to zero if the initialization
101  * of the tracer is successful. But that is the only place that sets
102  * this back to zero.
103  */
104 static int tracing_disabled = 1;
105 
106 cpumask_var_t __read_mostly	tracing_buffer_mask;
107 
108 /*
109  * ftrace_dump_on_oops - variable to dump ftrace buffer on oops
110  *
111  * If there is an oops (or kernel panic) and the ftrace_dump_on_oops
112  * is set, then ftrace_dump is called. This will output the contents
113  * of the ftrace buffers to the console.  This is very useful for
114  * capturing traces that lead to crashes and outputing it to a
115  * serial console.
116  *
117  * It is default off, but you can enable it with either specifying
118  * "ftrace_dump_on_oops" in the kernel command line, or setting
119  * /proc/sys/kernel/ftrace_dump_on_oops
120  * Set 1 if you want to dump buffers of all CPUs
121  * Set 2 if you want to dump the buffer of the CPU that triggered oops
122  */
123 
124 enum ftrace_dump_mode ftrace_dump_on_oops;
125 
126 /* When set, tracing will stop when a WARN*() is hit */
127 int __disable_trace_on_warning;
128 
129 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
130 /* Map of enums to their values, for "eval_map" file */
131 struct trace_eval_map_head {
132 	struct module			*mod;
133 	unsigned long			length;
134 };
135 
136 union trace_eval_map_item;
137 
138 struct trace_eval_map_tail {
139 	/*
140 	 * "end" is first and points to NULL as it must be different
141 	 * than "mod" or "eval_string"
142 	 */
143 	union trace_eval_map_item	*next;
144 	const char			*end;	/* points to NULL */
145 };
146 
147 static DEFINE_MUTEX(trace_eval_mutex);
148 
149 /*
150  * The trace_eval_maps are saved in an array with two extra elements,
151  * one at the beginning, and one at the end. The beginning item contains
152  * the count of the saved maps (head.length), and the module they
153  * belong to if not built in (head.mod). The ending item contains a
154  * pointer to the next array of saved eval_map items.
155  */
156 union trace_eval_map_item {
157 	struct trace_eval_map		map;
158 	struct trace_eval_map_head	head;
159 	struct trace_eval_map_tail	tail;
160 };
161 
162 static union trace_eval_map_item *trace_eval_maps;
163 #endif /* CONFIG_TRACE_EVAL_MAP_FILE */
164 
165 int tracing_set_tracer(struct trace_array *tr, const char *buf);
166 static void ftrace_trace_userstack(struct trace_buffer *buffer,
167 				   unsigned long flags, int pc);
168 
169 #define MAX_TRACER_SIZE		100
170 static char bootup_tracer_buf[MAX_TRACER_SIZE] __initdata;
171 static char *default_bootup_tracer;
172 
173 static bool allocate_snapshot;
174 
175 static int __init set_cmdline_ftrace(char *str)
176 {
177 	strlcpy(bootup_tracer_buf, str, MAX_TRACER_SIZE);
178 	default_bootup_tracer = bootup_tracer_buf;
179 	/* We are using ftrace early, expand it */
180 	ring_buffer_expanded = true;
181 	return 1;
182 }
183 __setup("ftrace=", set_cmdline_ftrace);
184 
185 static int __init set_ftrace_dump_on_oops(char *str)
186 {
187 	if (*str++ != '=' || !*str) {
188 		ftrace_dump_on_oops = DUMP_ALL;
189 		return 1;
190 	}
191 
192 	if (!strcmp("orig_cpu", str)) {
193 		ftrace_dump_on_oops = DUMP_ORIG;
194                 return 1;
195         }
196 
197         return 0;
198 }
199 __setup("ftrace_dump_on_oops", set_ftrace_dump_on_oops);
200 
201 static int __init stop_trace_on_warning(char *str)
202 {
203 	if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
204 		__disable_trace_on_warning = 1;
205 	return 1;
206 }
207 __setup("traceoff_on_warning", stop_trace_on_warning);
208 
209 static int __init boot_alloc_snapshot(char *str)
210 {
211 	allocate_snapshot = true;
212 	/* We also need the main ring buffer expanded */
213 	ring_buffer_expanded = true;
214 	return 1;
215 }
216 __setup("alloc_snapshot", boot_alloc_snapshot);
217 
218 
219 static char trace_boot_options_buf[MAX_TRACER_SIZE] __initdata;
220 
221 static int __init set_trace_boot_options(char *str)
222 {
223 	strlcpy(trace_boot_options_buf, str, MAX_TRACER_SIZE);
224 	return 0;
225 }
226 __setup("trace_options=", set_trace_boot_options);
227 
228 static char trace_boot_clock_buf[MAX_TRACER_SIZE] __initdata;
229 static char *trace_boot_clock __initdata;
230 
231 static int __init set_trace_boot_clock(char *str)
232 {
233 	strlcpy(trace_boot_clock_buf, str, MAX_TRACER_SIZE);
234 	trace_boot_clock = trace_boot_clock_buf;
235 	return 0;
236 }
237 __setup("trace_clock=", set_trace_boot_clock);
238 
239 static int __init set_tracepoint_printk(char *str)
240 {
241 	if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
242 		tracepoint_printk = 1;
243 	return 1;
244 }
245 __setup("tp_printk", set_tracepoint_printk);
246 
247 unsigned long long ns2usecs(u64 nsec)
248 {
249 	nsec += 500;
250 	do_div(nsec, 1000);
251 	return nsec;
252 }
253 
254 /* trace_flags holds trace_options default values */
255 #define TRACE_DEFAULT_FLAGS						\
256 	(FUNCTION_DEFAULT_FLAGS |					\
257 	 TRACE_ITER_PRINT_PARENT | TRACE_ITER_PRINTK |			\
258 	 TRACE_ITER_ANNOTATE | TRACE_ITER_CONTEXT_INFO |		\
259 	 TRACE_ITER_RECORD_CMD | TRACE_ITER_OVERWRITE |			\
260 	 TRACE_ITER_IRQ_INFO | TRACE_ITER_MARKERS)
261 
262 /* trace_options that are only supported by global_trace */
263 #define TOP_LEVEL_TRACE_FLAGS (TRACE_ITER_PRINTK |			\
264 	       TRACE_ITER_PRINTK_MSGONLY | TRACE_ITER_RECORD_CMD)
265 
266 /* trace_flags that are default zero for instances */
267 #define ZEROED_TRACE_FLAGS \
268 	(TRACE_ITER_EVENT_FORK | TRACE_ITER_FUNC_FORK)
269 
270 /*
271  * The global_trace is the descriptor that holds the top-level tracing
272  * buffers for the live tracing.
273  */
274 static struct trace_array global_trace = {
275 	.trace_flags = TRACE_DEFAULT_FLAGS,
276 };
277 
278 LIST_HEAD(ftrace_trace_arrays);
279 
280 int trace_array_get(struct trace_array *this_tr)
281 {
282 	struct trace_array *tr;
283 	int ret = -ENODEV;
284 
285 	mutex_lock(&trace_types_lock);
286 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
287 		if (tr == this_tr) {
288 			tr->ref++;
289 			ret = 0;
290 			break;
291 		}
292 	}
293 	mutex_unlock(&trace_types_lock);
294 
295 	return ret;
296 }
297 
298 static void __trace_array_put(struct trace_array *this_tr)
299 {
300 	WARN_ON(!this_tr->ref);
301 	this_tr->ref--;
302 }
303 
304 /**
305  * trace_array_put - Decrement the reference counter for this trace array.
306  *
307  * NOTE: Use this when we no longer need the trace array returned by
308  * trace_array_get_by_name(). This ensures the trace array can be later
309  * destroyed.
310  *
311  */
312 void trace_array_put(struct trace_array *this_tr)
313 {
314 	if (!this_tr)
315 		return;
316 
317 	mutex_lock(&trace_types_lock);
318 	__trace_array_put(this_tr);
319 	mutex_unlock(&trace_types_lock);
320 }
321 EXPORT_SYMBOL_GPL(trace_array_put);
322 
323 int tracing_check_open_get_tr(struct trace_array *tr)
324 {
325 	int ret;
326 
327 	ret = security_locked_down(LOCKDOWN_TRACEFS);
328 	if (ret)
329 		return ret;
330 
331 	if (tracing_disabled)
332 		return -ENODEV;
333 
334 	if (tr && trace_array_get(tr) < 0)
335 		return -ENODEV;
336 
337 	return 0;
338 }
339 
340 int call_filter_check_discard(struct trace_event_call *call, void *rec,
341 			      struct trace_buffer *buffer,
342 			      struct ring_buffer_event *event)
343 {
344 	if (unlikely(call->flags & TRACE_EVENT_FL_FILTERED) &&
345 	    !filter_match_preds(call->filter, rec)) {
346 		__trace_event_discard_commit(buffer, event);
347 		return 1;
348 	}
349 
350 	return 0;
351 }
352 
353 void trace_free_pid_list(struct trace_pid_list *pid_list)
354 {
355 	vfree(pid_list->pids);
356 	kfree(pid_list);
357 }
358 
359 /**
360  * trace_find_filtered_pid - check if a pid exists in a filtered_pid list
361  * @filtered_pids: The list of pids to check
362  * @search_pid: The PID to find in @filtered_pids
363  *
364  * Returns true if @search_pid is fonud in @filtered_pids, and false otherwis.
365  */
366 bool
367 trace_find_filtered_pid(struct trace_pid_list *filtered_pids, pid_t search_pid)
368 {
369 	/*
370 	 * If pid_max changed after filtered_pids was created, we
371 	 * by default ignore all pids greater than the previous pid_max.
372 	 */
373 	if (search_pid >= filtered_pids->pid_max)
374 		return false;
375 
376 	return test_bit(search_pid, filtered_pids->pids);
377 }
378 
379 /**
380  * trace_ignore_this_task - should a task be ignored for tracing
381  * @filtered_pids: The list of pids to check
382  * @task: The task that should be ignored if not filtered
383  *
384  * Checks if @task should be traced or not from @filtered_pids.
385  * Returns true if @task should *NOT* be traced.
386  * Returns false if @task should be traced.
387  */
388 bool
389 trace_ignore_this_task(struct trace_pid_list *filtered_pids,
390 		       struct trace_pid_list *filtered_no_pids,
391 		       struct task_struct *task)
392 {
393 	/*
394 	 * If filterd_no_pids is not empty, and the task's pid is listed
395 	 * in filtered_no_pids, then return true.
396 	 * Otherwise, if filtered_pids is empty, that means we can
397 	 * trace all tasks. If it has content, then only trace pids
398 	 * within filtered_pids.
399 	 */
400 
401 	return (filtered_pids &&
402 		!trace_find_filtered_pid(filtered_pids, task->pid)) ||
403 		(filtered_no_pids &&
404 		 trace_find_filtered_pid(filtered_no_pids, task->pid));
405 }
406 
407 /**
408  * trace_filter_add_remove_task - Add or remove a task from a pid_list
409  * @pid_list: The list to modify
410  * @self: The current task for fork or NULL for exit
411  * @task: The task to add or remove
412  *
413  * If adding a task, if @self is defined, the task is only added if @self
414  * is also included in @pid_list. This happens on fork and tasks should
415  * only be added when the parent is listed. If @self is NULL, then the
416  * @task pid will be removed from the list, which would happen on exit
417  * of a task.
418  */
419 void trace_filter_add_remove_task(struct trace_pid_list *pid_list,
420 				  struct task_struct *self,
421 				  struct task_struct *task)
422 {
423 	if (!pid_list)
424 		return;
425 
426 	/* For forks, we only add if the forking task is listed */
427 	if (self) {
428 		if (!trace_find_filtered_pid(pid_list, self->pid))
429 			return;
430 	}
431 
432 	/* Sorry, but we don't support pid_max changing after setting */
433 	if (task->pid >= pid_list->pid_max)
434 		return;
435 
436 	/* "self" is set for forks, and NULL for exits */
437 	if (self)
438 		set_bit(task->pid, pid_list->pids);
439 	else
440 		clear_bit(task->pid, pid_list->pids);
441 }
442 
443 /**
444  * trace_pid_next - Used for seq_file to get to the next pid of a pid_list
445  * @pid_list: The pid list to show
446  * @v: The last pid that was shown (+1 the actual pid to let zero be displayed)
447  * @pos: The position of the file
448  *
449  * This is used by the seq_file "next" operation to iterate the pids
450  * listed in a trace_pid_list structure.
451  *
452  * Returns the pid+1 as we want to display pid of zero, but NULL would
453  * stop the iteration.
454  */
455 void *trace_pid_next(struct trace_pid_list *pid_list, void *v, loff_t *pos)
456 {
457 	unsigned long pid = (unsigned long)v;
458 
459 	(*pos)++;
460 
461 	/* pid already is +1 of the actual prevous bit */
462 	pid = find_next_bit(pid_list->pids, pid_list->pid_max, pid);
463 
464 	/* Return pid + 1 to allow zero to be represented */
465 	if (pid < pid_list->pid_max)
466 		return (void *)(pid + 1);
467 
468 	return NULL;
469 }
470 
471 /**
472  * trace_pid_start - Used for seq_file to start reading pid lists
473  * @pid_list: The pid list to show
474  * @pos: The position of the file
475  *
476  * This is used by seq_file "start" operation to start the iteration
477  * of listing pids.
478  *
479  * Returns the pid+1 as we want to display pid of zero, but NULL would
480  * stop the iteration.
481  */
482 void *trace_pid_start(struct trace_pid_list *pid_list, loff_t *pos)
483 {
484 	unsigned long pid;
485 	loff_t l = 0;
486 
487 	pid = find_first_bit(pid_list->pids, pid_list->pid_max);
488 	if (pid >= pid_list->pid_max)
489 		return NULL;
490 
491 	/* Return pid + 1 so that zero can be the exit value */
492 	for (pid++; pid && l < *pos;
493 	     pid = (unsigned long)trace_pid_next(pid_list, (void *)pid, &l))
494 		;
495 	return (void *)pid;
496 }
497 
498 /**
499  * trace_pid_show - show the current pid in seq_file processing
500  * @m: The seq_file structure to write into
501  * @v: A void pointer of the pid (+1) value to display
502  *
503  * Can be directly used by seq_file operations to display the current
504  * pid value.
505  */
506 int trace_pid_show(struct seq_file *m, void *v)
507 {
508 	unsigned long pid = (unsigned long)v - 1;
509 
510 	seq_printf(m, "%lu\n", pid);
511 	return 0;
512 }
513 
514 /* 128 should be much more than enough */
515 #define PID_BUF_SIZE		127
516 
517 int trace_pid_write(struct trace_pid_list *filtered_pids,
518 		    struct trace_pid_list **new_pid_list,
519 		    const char __user *ubuf, size_t cnt)
520 {
521 	struct trace_pid_list *pid_list;
522 	struct trace_parser parser;
523 	unsigned long val;
524 	int nr_pids = 0;
525 	ssize_t read = 0;
526 	ssize_t ret = 0;
527 	loff_t pos;
528 	pid_t pid;
529 
530 	if (trace_parser_get_init(&parser, PID_BUF_SIZE + 1))
531 		return -ENOMEM;
532 
533 	/*
534 	 * Always recreate a new array. The write is an all or nothing
535 	 * operation. Always create a new array when adding new pids by
536 	 * the user. If the operation fails, then the current list is
537 	 * not modified.
538 	 */
539 	pid_list = kmalloc(sizeof(*pid_list), GFP_KERNEL);
540 	if (!pid_list) {
541 		trace_parser_put(&parser);
542 		return -ENOMEM;
543 	}
544 
545 	pid_list->pid_max = READ_ONCE(pid_max);
546 
547 	/* Only truncating will shrink pid_max */
548 	if (filtered_pids && filtered_pids->pid_max > pid_list->pid_max)
549 		pid_list->pid_max = filtered_pids->pid_max;
550 
551 	pid_list->pids = vzalloc((pid_list->pid_max + 7) >> 3);
552 	if (!pid_list->pids) {
553 		trace_parser_put(&parser);
554 		kfree(pid_list);
555 		return -ENOMEM;
556 	}
557 
558 	if (filtered_pids) {
559 		/* copy the current bits to the new max */
560 		for_each_set_bit(pid, filtered_pids->pids,
561 				 filtered_pids->pid_max) {
562 			set_bit(pid, pid_list->pids);
563 			nr_pids++;
564 		}
565 	}
566 
567 	while (cnt > 0) {
568 
569 		pos = 0;
570 
571 		ret = trace_get_user(&parser, ubuf, cnt, &pos);
572 		if (ret < 0 || !trace_parser_loaded(&parser))
573 			break;
574 
575 		read += ret;
576 		ubuf += ret;
577 		cnt -= ret;
578 
579 		ret = -EINVAL;
580 		if (kstrtoul(parser.buffer, 0, &val))
581 			break;
582 		if (val >= pid_list->pid_max)
583 			break;
584 
585 		pid = (pid_t)val;
586 
587 		set_bit(pid, pid_list->pids);
588 		nr_pids++;
589 
590 		trace_parser_clear(&parser);
591 		ret = 0;
592 	}
593 	trace_parser_put(&parser);
594 
595 	if (ret < 0) {
596 		trace_free_pid_list(pid_list);
597 		return ret;
598 	}
599 
600 	if (!nr_pids) {
601 		/* Cleared the list of pids */
602 		trace_free_pid_list(pid_list);
603 		read = ret;
604 		pid_list = NULL;
605 	}
606 
607 	*new_pid_list = pid_list;
608 
609 	return read;
610 }
611 
612 static u64 buffer_ftrace_now(struct array_buffer *buf, int cpu)
613 {
614 	u64 ts;
615 
616 	/* Early boot up does not have a buffer yet */
617 	if (!buf->buffer)
618 		return trace_clock_local();
619 
620 	ts = ring_buffer_time_stamp(buf->buffer, cpu);
621 	ring_buffer_normalize_time_stamp(buf->buffer, cpu, &ts);
622 
623 	return ts;
624 }
625 
626 u64 ftrace_now(int cpu)
627 {
628 	return buffer_ftrace_now(&global_trace.array_buffer, cpu);
629 }
630 
631 /**
632  * tracing_is_enabled - Show if global_trace has been disabled
633  *
634  * Shows if the global trace has been enabled or not. It uses the
635  * mirror flag "buffer_disabled" to be used in fast paths such as for
636  * the irqsoff tracer. But it may be inaccurate due to races. If you
637  * need to know the accurate state, use tracing_is_on() which is a little
638  * slower, but accurate.
639  */
640 int tracing_is_enabled(void)
641 {
642 	/*
643 	 * For quick access (irqsoff uses this in fast path), just
644 	 * return the mirror variable of the state of the ring buffer.
645 	 * It's a little racy, but we don't really care.
646 	 */
647 	smp_rmb();
648 	return !global_trace.buffer_disabled;
649 }
650 
651 /*
652  * trace_buf_size is the size in bytes that is allocated
653  * for a buffer. Note, the number of bytes is always rounded
654  * to page size.
655  *
656  * This number is purposely set to a low number of 16384.
657  * If the dump on oops happens, it will be much appreciated
658  * to not have to wait for all that output. Anyway this can be
659  * boot time and run time configurable.
660  */
661 #define TRACE_BUF_SIZE_DEFAULT	1441792UL /* 16384 * 88 (sizeof(entry)) */
662 
663 static unsigned long		trace_buf_size = TRACE_BUF_SIZE_DEFAULT;
664 
665 /* trace_types holds a link list of available tracers. */
666 static struct tracer		*trace_types __read_mostly;
667 
668 /*
669  * trace_types_lock is used to protect the trace_types list.
670  */
671 DEFINE_MUTEX(trace_types_lock);
672 
673 /*
674  * serialize the access of the ring buffer
675  *
676  * ring buffer serializes readers, but it is low level protection.
677  * The validity of the events (which returns by ring_buffer_peek() ..etc)
678  * are not protected by ring buffer.
679  *
680  * The content of events may become garbage if we allow other process consumes
681  * these events concurrently:
682  *   A) the page of the consumed events may become a normal page
683  *      (not reader page) in ring buffer, and this page will be rewrited
684  *      by events producer.
685  *   B) The page of the consumed events may become a page for splice_read,
686  *      and this page will be returned to system.
687  *
688  * These primitives allow multi process access to different cpu ring buffer
689  * concurrently.
690  *
691  * These primitives don't distinguish read-only and read-consume access.
692  * Multi read-only access are also serialized.
693  */
694 
695 #ifdef CONFIG_SMP
696 static DECLARE_RWSEM(all_cpu_access_lock);
697 static DEFINE_PER_CPU(struct mutex, cpu_access_lock);
698 
699 static inline void trace_access_lock(int cpu)
700 {
701 	if (cpu == RING_BUFFER_ALL_CPUS) {
702 		/* gain it for accessing the whole ring buffer. */
703 		down_write(&all_cpu_access_lock);
704 	} else {
705 		/* gain it for accessing a cpu ring buffer. */
706 
707 		/* Firstly block other trace_access_lock(RING_BUFFER_ALL_CPUS). */
708 		down_read(&all_cpu_access_lock);
709 
710 		/* Secondly block other access to this @cpu ring buffer. */
711 		mutex_lock(&per_cpu(cpu_access_lock, cpu));
712 	}
713 }
714 
715 static inline void trace_access_unlock(int cpu)
716 {
717 	if (cpu == RING_BUFFER_ALL_CPUS) {
718 		up_write(&all_cpu_access_lock);
719 	} else {
720 		mutex_unlock(&per_cpu(cpu_access_lock, cpu));
721 		up_read(&all_cpu_access_lock);
722 	}
723 }
724 
725 static inline void trace_access_lock_init(void)
726 {
727 	int cpu;
728 
729 	for_each_possible_cpu(cpu)
730 		mutex_init(&per_cpu(cpu_access_lock, cpu));
731 }
732 
733 #else
734 
735 static DEFINE_MUTEX(access_lock);
736 
737 static inline void trace_access_lock(int cpu)
738 {
739 	(void)cpu;
740 	mutex_lock(&access_lock);
741 }
742 
743 static inline void trace_access_unlock(int cpu)
744 {
745 	(void)cpu;
746 	mutex_unlock(&access_lock);
747 }
748 
749 static inline void trace_access_lock_init(void)
750 {
751 }
752 
753 #endif
754 
755 #ifdef CONFIG_STACKTRACE
756 static void __ftrace_trace_stack(struct trace_buffer *buffer,
757 				 unsigned long flags,
758 				 int skip, int pc, struct pt_regs *regs);
759 static inline void ftrace_trace_stack(struct trace_array *tr,
760 				      struct trace_buffer *buffer,
761 				      unsigned long flags,
762 				      int skip, int pc, struct pt_regs *regs);
763 
764 #else
765 static inline void __ftrace_trace_stack(struct trace_buffer *buffer,
766 					unsigned long flags,
767 					int skip, int pc, struct pt_regs *regs)
768 {
769 }
770 static inline void ftrace_trace_stack(struct trace_array *tr,
771 				      struct trace_buffer *buffer,
772 				      unsigned long flags,
773 				      int skip, int pc, struct pt_regs *regs)
774 {
775 }
776 
777 #endif
778 
779 static __always_inline void
780 trace_event_setup(struct ring_buffer_event *event,
781 		  int type, unsigned long flags, int pc)
782 {
783 	struct trace_entry *ent = ring_buffer_event_data(event);
784 
785 	tracing_generic_entry_update(ent, type, flags, pc);
786 }
787 
788 static __always_inline struct ring_buffer_event *
789 __trace_buffer_lock_reserve(struct trace_buffer *buffer,
790 			  int type,
791 			  unsigned long len,
792 			  unsigned long flags, int pc)
793 {
794 	struct ring_buffer_event *event;
795 
796 	event = ring_buffer_lock_reserve(buffer, len);
797 	if (event != NULL)
798 		trace_event_setup(event, type, flags, pc);
799 
800 	return event;
801 }
802 
803 void tracer_tracing_on(struct trace_array *tr)
804 {
805 	if (tr->array_buffer.buffer)
806 		ring_buffer_record_on(tr->array_buffer.buffer);
807 	/*
808 	 * This flag is looked at when buffers haven't been allocated
809 	 * yet, or by some tracers (like irqsoff), that just want to
810 	 * know if the ring buffer has been disabled, but it can handle
811 	 * races of where it gets disabled but we still do a record.
812 	 * As the check is in the fast path of the tracers, it is more
813 	 * important to be fast than accurate.
814 	 */
815 	tr->buffer_disabled = 0;
816 	/* Make the flag seen by readers */
817 	smp_wmb();
818 }
819 
820 /**
821  * tracing_on - enable tracing buffers
822  *
823  * This function enables tracing buffers that may have been
824  * disabled with tracing_off.
825  */
826 void tracing_on(void)
827 {
828 	tracer_tracing_on(&global_trace);
829 }
830 EXPORT_SYMBOL_GPL(tracing_on);
831 
832 
833 static __always_inline void
834 __buffer_unlock_commit(struct trace_buffer *buffer, struct ring_buffer_event *event)
835 {
836 	__this_cpu_write(trace_taskinfo_save, true);
837 
838 	/* If this is the temp buffer, we need to commit fully */
839 	if (this_cpu_read(trace_buffered_event) == event) {
840 		/* Length is in event->array[0] */
841 		ring_buffer_write(buffer, event->array[0], &event->array[1]);
842 		/* Release the temp buffer */
843 		this_cpu_dec(trace_buffered_event_cnt);
844 	} else
845 		ring_buffer_unlock_commit(buffer, event);
846 }
847 
848 /**
849  * __trace_puts - write a constant string into the trace buffer.
850  * @ip:	   The address of the caller
851  * @str:   The constant string to write
852  * @size:  The size of the string.
853  */
854 int __trace_puts(unsigned long ip, const char *str, int size)
855 {
856 	struct ring_buffer_event *event;
857 	struct trace_buffer *buffer;
858 	struct print_entry *entry;
859 	unsigned long irq_flags;
860 	int alloc;
861 	int pc;
862 
863 	if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
864 		return 0;
865 
866 	pc = preempt_count();
867 
868 	if (unlikely(tracing_selftest_running || tracing_disabled))
869 		return 0;
870 
871 	alloc = sizeof(*entry) + size + 2; /* possible \n added */
872 
873 	local_save_flags(irq_flags);
874 	buffer = global_trace.array_buffer.buffer;
875 	ring_buffer_nest_start(buffer);
876 	event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, alloc,
877 					    irq_flags, pc);
878 	if (!event) {
879 		size = 0;
880 		goto out;
881 	}
882 
883 	entry = ring_buffer_event_data(event);
884 	entry->ip = ip;
885 
886 	memcpy(&entry->buf, str, size);
887 
888 	/* Add a newline if necessary */
889 	if (entry->buf[size - 1] != '\n') {
890 		entry->buf[size] = '\n';
891 		entry->buf[size + 1] = '\0';
892 	} else
893 		entry->buf[size] = '\0';
894 
895 	__buffer_unlock_commit(buffer, event);
896 	ftrace_trace_stack(&global_trace, buffer, irq_flags, 4, pc, NULL);
897  out:
898 	ring_buffer_nest_end(buffer);
899 	return size;
900 }
901 EXPORT_SYMBOL_GPL(__trace_puts);
902 
903 /**
904  * __trace_bputs - write the pointer to a constant string into trace buffer
905  * @ip:	   The address of the caller
906  * @str:   The constant string to write to the buffer to
907  */
908 int __trace_bputs(unsigned long ip, const char *str)
909 {
910 	struct ring_buffer_event *event;
911 	struct trace_buffer *buffer;
912 	struct bputs_entry *entry;
913 	unsigned long irq_flags;
914 	int size = sizeof(struct bputs_entry);
915 	int ret = 0;
916 	int pc;
917 
918 	if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
919 		return 0;
920 
921 	pc = preempt_count();
922 
923 	if (unlikely(tracing_selftest_running || tracing_disabled))
924 		return 0;
925 
926 	local_save_flags(irq_flags);
927 	buffer = global_trace.array_buffer.buffer;
928 
929 	ring_buffer_nest_start(buffer);
930 	event = __trace_buffer_lock_reserve(buffer, TRACE_BPUTS, size,
931 					    irq_flags, pc);
932 	if (!event)
933 		goto out;
934 
935 	entry = ring_buffer_event_data(event);
936 	entry->ip			= ip;
937 	entry->str			= str;
938 
939 	__buffer_unlock_commit(buffer, event);
940 	ftrace_trace_stack(&global_trace, buffer, irq_flags, 4, pc, NULL);
941 
942 	ret = 1;
943  out:
944 	ring_buffer_nest_end(buffer);
945 	return ret;
946 }
947 EXPORT_SYMBOL_GPL(__trace_bputs);
948 
949 #ifdef CONFIG_TRACER_SNAPSHOT
950 void tracing_snapshot_instance_cond(struct trace_array *tr, void *cond_data)
951 {
952 	struct tracer *tracer = tr->current_trace;
953 	unsigned long flags;
954 
955 	if (in_nmi()) {
956 		internal_trace_puts("*** SNAPSHOT CALLED FROM NMI CONTEXT ***\n");
957 		internal_trace_puts("*** snapshot is being ignored        ***\n");
958 		return;
959 	}
960 
961 	if (!tr->allocated_snapshot) {
962 		internal_trace_puts("*** SNAPSHOT NOT ALLOCATED ***\n");
963 		internal_trace_puts("*** stopping trace here!   ***\n");
964 		tracing_off();
965 		return;
966 	}
967 
968 	/* Note, snapshot can not be used when the tracer uses it */
969 	if (tracer->use_max_tr) {
970 		internal_trace_puts("*** LATENCY TRACER ACTIVE ***\n");
971 		internal_trace_puts("*** Can not use snapshot (sorry) ***\n");
972 		return;
973 	}
974 
975 	local_irq_save(flags);
976 	update_max_tr(tr, current, smp_processor_id(), cond_data);
977 	local_irq_restore(flags);
978 }
979 
980 void tracing_snapshot_instance(struct trace_array *tr)
981 {
982 	tracing_snapshot_instance_cond(tr, NULL);
983 }
984 
985 /**
986  * tracing_snapshot - take a snapshot of the current buffer.
987  *
988  * This causes a swap between the snapshot buffer and the current live
989  * tracing buffer. You can use this to take snapshots of the live
990  * trace when some condition is triggered, but continue to trace.
991  *
992  * Note, make sure to allocate the snapshot with either
993  * a tracing_snapshot_alloc(), or by doing it manually
994  * with: echo 1 > /sys/kernel/debug/tracing/snapshot
995  *
996  * If the snapshot buffer is not allocated, it will stop tracing.
997  * Basically making a permanent snapshot.
998  */
999 void tracing_snapshot(void)
1000 {
1001 	struct trace_array *tr = &global_trace;
1002 
1003 	tracing_snapshot_instance(tr);
1004 }
1005 EXPORT_SYMBOL_GPL(tracing_snapshot);
1006 
1007 /**
1008  * tracing_snapshot_cond - conditionally take a snapshot of the current buffer.
1009  * @tr:		The tracing instance to snapshot
1010  * @cond_data:	The data to be tested conditionally, and possibly saved
1011  *
1012  * This is the same as tracing_snapshot() except that the snapshot is
1013  * conditional - the snapshot will only happen if the
1014  * cond_snapshot.update() implementation receiving the cond_data
1015  * returns true, which means that the trace array's cond_snapshot
1016  * update() operation used the cond_data to determine whether the
1017  * snapshot should be taken, and if it was, presumably saved it along
1018  * with the snapshot.
1019  */
1020 void tracing_snapshot_cond(struct trace_array *tr, void *cond_data)
1021 {
1022 	tracing_snapshot_instance_cond(tr, cond_data);
1023 }
1024 EXPORT_SYMBOL_GPL(tracing_snapshot_cond);
1025 
1026 /**
1027  * tracing_snapshot_cond_data - get the user data associated with a snapshot
1028  * @tr:		The tracing instance
1029  *
1030  * When the user enables a conditional snapshot using
1031  * tracing_snapshot_cond_enable(), the user-defined cond_data is saved
1032  * with the snapshot.  This accessor is used to retrieve it.
1033  *
1034  * Should not be called from cond_snapshot.update(), since it takes
1035  * the tr->max_lock lock, which the code calling
1036  * cond_snapshot.update() has already done.
1037  *
1038  * Returns the cond_data associated with the trace array's snapshot.
1039  */
1040 void *tracing_cond_snapshot_data(struct trace_array *tr)
1041 {
1042 	void *cond_data = NULL;
1043 
1044 	arch_spin_lock(&tr->max_lock);
1045 
1046 	if (tr->cond_snapshot)
1047 		cond_data = tr->cond_snapshot->cond_data;
1048 
1049 	arch_spin_unlock(&tr->max_lock);
1050 
1051 	return cond_data;
1052 }
1053 EXPORT_SYMBOL_GPL(tracing_cond_snapshot_data);
1054 
1055 static int resize_buffer_duplicate_size(struct array_buffer *trace_buf,
1056 					struct array_buffer *size_buf, int cpu_id);
1057 static void set_buffer_entries(struct array_buffer *buf, unsigned long val);
1058 
1059 int tracing_alloc_snapshot_instance(struct trace_array *tr)
1060 {
1061 	int ret;
1062 
1063 	if (!tr->allocated_snapshot) {
1064 
1065 		/* allocate spare buffer */
1066 		ret = resize_buffer_duplicate_size(&tr->max_buffer,
1067 				   &tr->array_buffer, RING_BUFFER_ALL_CPUS);
1068 		if (ret < 0)
1069 			return ret;
1070 
1071 		tr->allocated_snapshot = true;
1072 	}
1073 
1074 	return 0;
1075 }
1076 
1077 static void free_snapshot(struct trace_array *tr)
1078 {
1079 	/*
1080 	 * We don't free the ring buffer. instead, resize it because
1081 	 * The max_tr ring buffer has some state (e.g. ring->clock) and
1082 	 * we want preserve it.
1083 	 */
1084 	ring_buffer_resize(tr->max_buffer.buffer, 1, RING_BUFFER_ALL_CPUS);
1085 	set_buffer_entries(&tr->max_buffer, 1);
1086 	tracing_reset_online_cpus(&tr->max_buffer);
1087 	tr->allocated_snapshot = false;
1088 }
1089 
1090 /**
1091  * tracing_alloc_snapshot - allocate snapshot buffer.
1092  *
1093  * This only allocates the snapshot buffer if it isn't already
1094  * allocated - it doesn't also take a snapshot.
1095  *
1096  * This is meant to be used in cases where the snapshot buffer needs
1097  * to be set up for events that can't sleep but need to be able to
1098  * trigger a snapshot.
1099  */
1100 int tracing_alloc_snapshot(void)
1101 {
1102 	struct trace_array *tr = &global_trace;
1103 	int ret;
1104 
1105 	ret = tracing_alloc_snapshot_instance(tr);
1106 	WARN_ON(ret < 0);
1107 
1108 	return ret;
1109 }
1110 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
1111 
1112 /**
1113  * tracing_snapshot_alloc - allocate and take a snapshot of the current buffer.
1114  *
1115  * This is similar to tracing_snapshot(), but it will allocate the
1116  * snapshot buffer if it isn't already allocated. Use this only
1117  * where it is safe to sleep, as the allocation may sleep.
1118  *
1119  * This causes a swap between the snapshot buffer and the current live
1120  * tracing buffer. You can use this to take snapshots of the live
1121  * trace when some condition is triggered, but continue to trace.
1122  */
1123 void tracing_snapshot_alloc(void)
1124 {
1125 	int ret;
1126 
1127 	ret = tracing_alloc_snapshot();
1128 	if (ret < 0)
1129 		return;
1130 
1131 	tracing_snapshot();
1132 }
1133 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
1134 
1135 /**
1136  * tracing_snapshot_cond_enable - enable conditional snapshot for an instance
1137  * @tr:		The tracing instance
1138  * @cond_data:	User data to associate with the snapshot
1139  * @update:	Implementation of the cond_snapshot update function
1140  *
1141  * Check whether the conditional snapshot for the given instance has
1142  * already been enabled, or if the current tracer is already using a
1143  * snapshot; if so, return -EBUSY, else create a cond_snapshot and
1144  * save the cond_data and update function inside.
1145  *
1146  * Returns 0 if successful, error otherwise.
1147  */
1148 int tracing_snapshot_cond_enable(struct trace_array *tr, void *cond_data,
1149 				 cond_update_fn_t update)
1150 {
1151 	struct cond_snapshot *cond_snapshot;
1152 	int ret = 0;
1153 
1154 	cond_snapshot = kzalloc(sizeof(*cond_snapshot), GFP_KERNEL);
1155 	if (!cond_snapshot)
1156 		return -ENOMEM;
1157 
1158 	cond_snapshot->cond_data = cond_data;
1159 	cond_snapshot->update = update;
1160 
1161 	mutex_lock(&trace_types_lock);
1162 
1163 	ret = tracing_alloc_snapshot_instance(tr);
1164 	if (ret)
1165 		goto fail_unlock;
1166 
1167 	if (tr->current_trace->use_max_tr) {
1168 		ret = -EBUSY;
1169 		goto fail_unlock;
1170 	}
1171 
1172 	/*
1173 	 * The cond_snapshot can only change to NULL without the
1174 	 * trace_types_lock. We don't care if we race with it going
1175 	 * to NULL, but we want to make sure that it's not set to
1176 	 * something other than NULL when we get here, which we can
1177 	 * do safely with only holding the trace_types_lock and not
1178 	 * having to take the max_lock.
1179 	 */
1180 	if (tr->cond_snapshot) {
1181 		ret = -EBUSY;
1182 		goto fail_unlock;
1183 	}
1184 
1185 	arch_spin_lock(&tr->max_lock);
1186 	tr->cond_snapshot = cond_snapshot;
1187 	arch_spin_unlock(&tr->max_lock);
1188 
1189 	mutex_unlock(&trace_types_lock);
1190 
1191 	return ret;
1192 
1193  fail_unlock:
1194 	mutex_unlock(&trace_types_lock);
1195 	kfree(cond_snapshot);
1196 	return ret;
1197 }
1198 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_enable);
1199 
1200 /**
1201  * tracing_snapshot_cond_disable - disable conditional snapshot for an instance
1202  * @tr:		The tracing instance
1203  *
1204  * Check whether the conditional snapshot for the given instance is
1205  * enabled; if so, free the cond_snapshot associated with it,
1206  * otherwise return -EINVAL.
1207  *
1208  * Returns 0 if successful, error otherwise.
1209  */
1210 int tracing_snapshot_cond_disable(struct trace_array *tr)
1211 {
1212 	int ret = 0;
1213 
1214 	arch_spin_lock(&tr->max_lock);
1215 
1216 	if (!tr->cond_snapshot)
1217 		ret = -EINVAL;
1218 	else {
1219 		kfree(tr->cond_snapshot);
1220 		tr->cond_snapshot = NULL;
1221 	}
1222 
1223 	arch_spin_unlock(&tr->max_lock);
1224 
1225 	return ret;
1226 }
1227 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_disable);
1228 #else
1229 void tracing_snapshot(void)
1230 {
1231 	WARN_ONCE(1, "Snapshot feature not enabled, but internal snapshot used");
1232 }
1233 EXPORT_SYMBOL_GPL(tracing_snapshot);
1234 void tracing_snapshot_cond(struct trace_array *tr, void *cond_data)
1235 {
1236 	WARN_ONCE(1, "Snapshot feature not enabled, but internal conditional snapshot used");
1237 }
1238 EXPORT_SYMBOL_GPL(tracing_snapshot_cond);
1239 int tracing_alloc_snapshot(void)
1240 {
1241 	WARN_ONCE(1, "Snapshot feature not enabled, but snapshot allocation used");
1242 	return -ENODEV;
1243 }
1244 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
1245 void tracing_snapshot_alloc(void)
1246 {
1247 	/* Give warning */
1248 	tracing_snapshot();
1249 }
1250 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
1251 void *tracing_cond_snapshot_data(struct trace_array *tr)
1252 {
1253 	return NULL;
1254 }
1255 EXPORT_SYMBOL_GPL(tracing_cond_snapshot_data);
1256 int tracing_snapshot_cond_enable(struct trace_array *tr, void *cond_data, cond_update_fn_t update)
1257 {
1258 	return -ENODEV;
1259 }
1260 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_enable);
1261 int tracing_snapshot_cond_disable(struct trace_array *tr)
1262 {
1263 	return false;
1264 }
1265 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_disable);
1266 #endif /* CONFIG_TRACER_SNAPSHOT */
1267 
1268 void tracer_tracing_off(struct trace_array *tr)
1269 {
1270 	if (tr->array_buffer.buffer)
1271 		ring_buffer_record_off(tr->array_buffer.buffer);
1272 	/*
1273 	 * This flag is looked at when buffers haven't been allocated
1274 	 * yet, or by some tracers (like irqsoff), that just want to
1275 	 * know if the ring buffer has been disabled, but it can handle
1276 	 * races of where it gets disabled but we still do a record.
1277 	 * As the check is in the fast path of the tracers, it is more
1278 	 * important to be fast than accurate.
1279 	 */
1280 	tr->buffer_disabled = 1;
1281 	/* Make the flag seen by readers */
1282 	smp_wmb();
1283 }
1284 
1285 /**
1286  * tracing_off - turn off tracing buffers
1287  *
1288  * This function stops the tracing buffers from recording data.
1289  * It does not disable any overhead the tracers themselves may
1290  * be causing. This function simply causes all recording to
1291  * the ring buffers to fail.
1292  */
1293 void tracing_off(void)
1294 {
1295 	tracer_tracing_off(&global_trace);
1296 }
1297 EXPORT_SYMBOL_GPL(tracing_off);
1298 
1299 void disable_trace_on_warning(void)
1300 {
1301 	if (__disable_trace_on_warning)
1302 		tracing_off();
1303 }
1304 
1305 /**
1306  * tracer_tracing_is_on - show real state of ring buffer enabled
1307  * @tr : the trace array to know if ring buffer is enabled
1308  *
1309  * Shows real state of the ring buffer if it is enabled or not.
1310  */
1311 bool tracer_tracing_is_on(struct trace_array *tr)
1312 {
1313 	if (tr->array_buffer.buffer)
1314 		return ring_buffer_record_is_on(tr->array_buffer.buffer);
1315 	return !tr->buffer_disabled;
1316 }
1317 
1318 /**
1319  * tracing_is_on - show state of ring buffers enabled
1320  */
1321 int tracing_is_on(void)
1322 {
1323 	return tracer_tracing_is_on(&global_trace);
1324 }
1325 EXPORT_SYMBOL_GPL(tracing_is_on);
1326 
1327 static int __init set_buf_size(char *str)
1328 {
1329 	unsigned long buf_size;
1330 
1331 	if (!str)
1332 		return 0;
1333 	buf_size = memparse(str, &str);
1334 	/* nr_entries can not be zero */
1335 	if (buf_size == 0)
1336 		return 0;
1337 	trace_buf_size = buf_size;
1338 	return 1;
1339 }
1340 __setup("trace_buf_size=", set_buf_size);
1341 
1342 static int __init set_tracing_thresh(char *str)
1343 {
1344 	unsigned long threshold;
1345 	int ret;
1346 
1347 	if (!str)
1348 		return 0;
1349 	ret = kstrtoul(str, 0, &threshold);
1350 	if (ret < 0)
1351 		return 0;
1352 	tracing_thresh = threshold * 1000;
1353 	return 1;
1354 }
1355 __setup("tracing_thresh=", set_tracing_thresh);
1356 
1357 unsigned long nsecs_to_usecs(unsigned long nsecs)
1358 {
1359 	return nsecs / 1000;
1360 }
1361 
1362 /*
1363  * TRACE_FLAGS is defined as a tuple matching bit masks with strings.
1364  * It uses C(a, b) where 'a' is the eval (enum) name and 'b' is the string that
1365  * matches it. By defining "C(a, b) b", TRACE_FLAGS becomes a list
1366  * of strings in the order that the evals (enum) were defined.
1367  */
1368 #undef C
1369 #define C(a, b) b
1370 
1371 /* These must match the bit postions in trace_iterator_flags */
1372 static const char *trace_options[] = {
1373 	TRACE_FLAGS
1374 	NULL
1375 };
1376 
1377 static struct {
1378 	u64 (*func)(void);
1379 	const char *name;
1380 	int in_ns;		/* is this clock in nanoseconds? */
1381 } trace_clocks[] = {
1382 	{ trace_clock_local,		"local",	1 },
1383 	{ trace_clock_global,		"global",	1 },
1384 	{ trace_clock_counter,		"counter",	0 },
1385 	{ trace_clock_jiffies,		"uptime",	0 },
1386 	{ trace_clock,			"perf",		1 },
1387 	{ ktime_get_mono_fast_ns,	"mono",		1 },
1388 	{ ktime_get_raw_fast_ns,	"mono_raw",	1 },
1389 	{ ktime_get_boot_fast_ns,	"boot",		1 },
1390 	ARCH_TRACE_CLOCKS
1391 };
1392 
1393 bool trace_clock_in_ns(struct trace_array *tr)
1394 {
1395 	if (trace_clocks[tr->clock_id].in_ns)
1396 		return true;
1397 
1398 	return false;
1399 }
1400 
1401 /*
1402  * trace_parser_get_init - gets the buffer for trace parser
1403  */
1404 int trace_parser_get_init(struct trace_parser *parser, int size)
1405 {
1406 	memset(parser, 0, sizeof(*parser));
1407 
1408 	parser->buffer = kmalloc(size, GFP_KERNEL);
1409 	if (!parser->buffer)
1410 		return 1;
1411 
1412 	parser->size = size;
1413 	return 0;
1414 }
1415 
1416 /*
1417  * trace_parser_put - frees the buffer for trace parser
1418  */
1419 void trace_parser_put(struct trace_parser *parser)
1420 {
1421 	kfree(parser->buffer);
1422 	parser->buffer = NULL;
1423 }
1424 
1425 /*
1426  * trace_get_user - reads the user input string separated by  space
1427  * (matched by isspace(ch))
1428  *
1429  * For each string found the 'struct trace_parser' is updated,
1430  * and the function returns.
1431  *
1432  * Returns number of bytes read.
1433  *
1434  * See kernel/trace/trace.h for 'struct trace_parser' details.
1435  */
1436 int trace_get_user(struct trace_parser *parser, const char __user *ubuf,
1437 	size_t cnt, loff_t *ppos)
1438 {
1439 	char ch;
1440 	size_t read = 0;
1441 	ssize_t ret;
1442 
1443 	if (!*ppos)
1444 		trace_parser_clear(parser);
1445 
1446 	ret = get_user(ch, ubuf++);
1447 	if (ret)
1448 		goto out;
1449 
1450 	read++;
1451 	cnt--;
1452 
1453 	/*
1454 	 * The parser is not finished with the last write,
1455 	 * continue reading the user input without skipping spaces.
1456 	 */
1457 	if (!parser->cont) {
1458 		/* skip white space */
1459 		while (cnt && isspace(ch)) {
1460 			ret = get_user(ch, ubuf++);
1461 			if (ret)
1462 				goto out;
1463 			read++;
1464 			cnt--;
1465 		}
1466 
1467 		parser->idx = 0;
1468 
1469 		/* only spaces were written */
1470 		if (isspace(ch) || !ch) {
1471 			*ppos += read;
1472 			ret = read;
1473 			goto out;
1474 		}
1475 	}
1476 
1477 	/* read the non-space input */
1478 	while (cnt && !isspace(ch) && ch) {
1479 		if (parser->idx < parser->size - 1)
1480 			parser->buffer[parser->idx++] = ch;
1481 		else {
1482 			ret = -EINVAL;
1483 			goto out;
1484 		}
1485 		ret = get_user(ch, ubuf++);
1486 		if (ret)
1487 			goto out;
1488 		read++;
1489 		cnt--;
1490 	}
1491 
1492 	/* We either got finished input or we have to wait for another call. */
1493 	if (isspace(ch) || !ch) {
1494 		parser->buffer[parser->idx] = 0;
1495 		parser->cont = false;
1496 	} else if (parser->idx < parser->size - 1) {
1497 		parser->cont = true;
1498 		parser->buffer[parser->idx++] = ch;
1499 		/* Make sure the parsed string always terminates with '\0'. */
1500 		parser->buffer[parser->idx] = 0;
1501 	} else {
1502 		ret = -EINVAL;
1503 		goto out;
1504 	}
1505 
1506 	*ppos += read;
1507 	ret = read;
1508 
1509 out:
1510 	return ret;
1511 }
1512 
1513 /* TODO add a seq_buf_to_buffer() */
1514 static ssize_t trace_seq_to_buffer(struct trace_seq *s, void *buf, size_t cnt)
1515 {
1516 	int len;
1517 
1518 	if (trace_seq_used(s) <= s->seq.readpos)
1519 		return -EBUSY;
1520 
1521 	len = trace_seq_used(s) - s->seq.readpos;
1522 	if (cnt > len)
1523 		cnt = len;
1524 	memcpy(buf, s->buffer + s->seq.readpos, cnt);
1525 
1526 	s->seq.readpos += cnt;
1527 	return cnt;
1528 }
1529 
1530 unsigned long __read_mostly	tracing_thresh;
1531 static const struct file_operations tracing_max_lat_fops;
1532 
1533 #if (defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)) && \
1534 	defined(CONFIG_FSNOTIFY)
1535 
1536 static struct workqueue_struct *fsnotify_wq;
1537 
1538 static void latency_fsnotify_workfn(struct work_struct *work)
1539 {
1540 	struct trace_array *tr = container_of(work, struct trace_array,
1541 					      fsnotify_work);
1542 	fsnotify(tr->d_max_latency->d_inode, FS_MODIFY,
1543 		 tr->d_max_latency->d_inode, FSNOTIFY_EVENT_INODE, NULL, 0);
1544 }
1545 
1546 static void latency_fsnotify_workfn_irq(struct irq_work *iwork)
1547 {
1548 	struct trace_array *tr = container_of(iwork, struct trace_array,
1549 					      fsnotify_irqwork);
1550 	queue_work(fsnotify_wq, &tr->fsnotify_work);
1551 }
1552 
1553 static void trace_create_maxlat_file(struct trace_array *tr,
1554 				     struct dentry *d_tracer)
1555 {
1556 	INIT_WORK(&tr->fsnotify_work, latency_fsnotify_workfn);
1557 	init_irq_work(&tr->fsnotify_irqwork, latency_fsnotify_workfn_irq);
1558 	tr->d_max_latency = trace_create_file("tracing_max_latency", 0644,
1559 					      d_tracer, &tr->max_latency,
1560 					      &tracing_max_lat_fops);
1561 }
1562 
1563 __init static int latency_fsnotify_init(void)
1564 {
1565 	fsnotify_wq = alloc_workqueue("tr_max_lat_wq",
1566 				      WQ_UNBOUND | WQ_HIGHPRI, 0);
1567 	if (!fsnotify_wq) {
1568 		pr_err("Unable to allocate tr_max_lat_wq\n");
1569 		return -ENOMEM;
1570 	}
1571 	return 0;
1572 }
1573 
1574 late_initcall_sync(latency_fsnotify_init);
1575 
1576 void latency_fsnotify(struct trace_array *tr)
1577 {
1578 	if (!fsnotify_wq)
1579 		return;
1580 	/*
1581 	 * We cannot call queue_work(&tr->fsnotify_work) from here because it's
1582 	 * possible that we are called from __schedule() or do_idle(), which
1583 	 * could cause a deadlock.
1584 	 */
1585 	irq_work_queue(&tr->fsnotify_irqwork);
1586 }
1587 
1588 /*
1589  * (defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)) && \
1590  *  defined(CONFIG_FSNOTIFY)
1591  */
1592 #else
1593 
1594 #define trace_create_maxlat_file(tr, d_tracer)				\
1595 	trace_create_file("tracing_max_latency", 0644, d_tracer,	\
1596 			  &tr->max_latency, &tracing_max_lat_fops)
1597 
1598 #endif
1599 
1600 #ifdef CONFIG_TRACER_MAX_TRACE
1601 /*
1602  * Copy the new maximum trace into the separate maximum-trace
1603  * structure. (this way the maximum trace is permanently saved,
1604  * for later retrieval via /sys/kernel/tracing/tracing_max_latency)
1605  */
1606 static void
1607 __update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
1608 {
1609 	struct array_buffer *trace_buf = &tr->array_buffer;
1610 	struct array_buffer *max_buf = &tr->max_buffer;
1611 	struct trace_array_cpu *data = per_cpu_ptr(trace_buf->data, cpu);
1612 	struct trace_array_cpu *max_data = per_cpu_ptr(max_buf->data, cpu);
1613 
1614 	max_buf->cpu = cpu;
1615 	max_buf->time_start = data->preempt_timestamp;
1616 
1617 	max_data->saved_latency = tr->max_latency;
1618 	max_data->critical_start = data->critical_start;
1619 	max_data->critical_end = data->critical_end;
1620 
1621 	strncpy(max_data->comm, tsk->comm, TASK_COMM_LEN);
1622 	max_data->pid = tsk->pid;
1623 	/*
1624 	 * If tsk == current, then use current_uid(), as that does not use
1625 	 * RCU. The irq tracer can be called out of RCU scope.
1626 	 */
1627 	if (tsk == current)
1628 		max_data->uid = current_uid();
1629 	else
1630 		max_data->uid = task_uid(tsk);
1631 
1632 	max_data->nice = tsk->static_prio - 20 - MAX_RT_PRIO;
1633 	max_data->policy = tsk->policy;
1634 	max_data->rt_priority = tsk->rt_priority;
1635 
1636 	/* record this tasks comm */
1637 	tracing_record_cmdline(tsk);
1638 	latency_fsnotify(tr);
1639 }
1640 
1641 /**
1642  * update_max_tr - snapshot all trace buffers from global_trace to max_tr
1643  * @tr: tracer
1644  * @tsk: the task with the latency
1645  * @cpu: The cpu that initiated the trace.
1646  * @cond_data: User data associated with a conditional snapshot
1647  *
1648  * Flip the buffers between the @tr and the max_tr and record information
1649  * about which task was the cause of this latency.
1650  */
1651 void
1652 update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu,
1653 	      void *cond_data)
1654 {
1655 	if (tr->stop_count)
1656 		return;
1657 
1658 	WARN_ON_ONCE(!irqs_disabled());
1659 
1660 	if (!tr->allocated_snapshot) {
1661 		/* Only the nop tracer should hit this when disabling */
1662 		WARN_ON_ONCE(tr->current_trace != &nop_trace);
1663 		return;
1664 	}
1665 
1666 	arch_spin_lock(&tr->max_lock);
1667 
1668 	/* Inherit the recordable setting from array_buffer */
1669 	if (ring_buffer_record_is_set_on(tr->array_buffer.buffer))
1670 		ring_buffer_record_on(tr->max_buffer.buffer);
1671 	else
1672 		ring_buffer_record_off(tr->max_buffer.buffer);
1673 
1674 #ifdef CONFIG_TRACER_SNAPSHOT
1675 	if (tr->cond_snapshot && !tr->cond_snapshot->update(tr, cond_data))
1676 		goto out_unlock;
1677 #endif
1678 	swap(tr->array_buffer.buffer, tr->max_buffer.buffer);
1679 
1680 	__update_max_tr(tr, tsk, cpu);
1681 
1682  out_unlock:
1683 	arch_spin_unlock(&tr->max_lock);
1684 }
1685 
1686 /**
1687  * update_max_tr_single - only copy one trace over, and reset the rest
1688  * @tr: tracer
1689  * @tsk: task with the latency
1690  * @cpu: the cpu of the buffer to copy.
1691  *
1692  * Flip the trace of a single CPU buffer between the @tr and the max_tr.
1693  */
1694 void
1695 update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu)
1696 {
1697 	int ret;
1698 
1699 	if (tr->stop_count)
1700 		return;
1701 
1702 	WARN_ON_ONCE(!irqs_disabled());
1703 	if (!tr->allocated_snapshot) {
1704 		/* Only the nop tracer should hit this when disabling */
1705 		WARN_ON_ONCE(tr->current_trace != &nop_trace);
1706 		return;
1707 	}
1708 
1709 	arch_spin_lock(&tr->max_lock);
1710 
1711 	ret = ring_buffer_swap_cpu(tr->max_buffer.buffer, tr->array_buffer.buffer, cpu);
1712 
1713 	if (ret == -EBUSY) {
1714 		/*
1715 		 * We failed to swap the buffer due to a commit taking
1716 		 * place on this CPU. We fail to record, but we reset
1717 		 * the max trace buffer (no one writes directly to it)
1718 		 * and flag that it failed.
1719 		 */
1720 		trace_array_printk_buf(tr->max_buffer.buffer, _THIS_IP_,
1721 			"Failed to swap buffers due to commit in progress\n");
1722 	}
1723 
1724 	WARN_ON_ONCE(ret && ret != -EAGAIN && ret != -EBUSY);
1725 
1726 	__update_max_tr(tr, tsk, cpu);
1727 	arch_spin_unlock(&tr->max_lock);
1728 }
1729 #endif /* CONFIG_TRACER_MAX_TRACE */
1730 
1731 static int wait_on_pipe(struct trace_iterator *iter, int full)
1732 {
1733 	/* Iterators are static, they should be filled or empty */
1734 	if (trace_buffer_iter(iter, iter->cpu_file))
1735 		return 0;
1736 
1737 	return ring_buffer_wait(iter->array_buffer->buffer, iter->cpu_file,
1738 				full);
1739 }
1740 
1741 #ifdef CONFIG_FTRACE_STARTUP_TEST
1742 static bool selftests_can_run;
1743 
1744 struct trace_selftests {
1745 	struct list_head		list;
1746 	struct tracer			*type;
1747 };
1748 
1749 static LIST_HEAD(postponed_selftests);
1750 
1751 static int save_selftest(struct tracer *type)
1752 {
1753 	struct trace_selftests *selftest;
1754 
1755 	selftest = kmalloc(sizeof(*selftest), GFP_KERNEL);
1756 	if (!selftest)
1757 		return -ENOMEM;
1758 
1759 	selftest->type = type;
1760 	list_add(&selftest->list, &postponed_selftests);
1761 	return 0;
1762 }
1763 
1764 static int run_tracer_selftest(struct tracer *type)
1765 {
1766 	struct trace_array *tr = &global_trace;
1767 	struct tracer *saved_tracer = tr->current_trace;
1768 	int ret;
1769 
1770 	if (!type->selftest || tracing_selftest_disabled)
1771 		return 0;
1772 
1773 	/*
1774 	 * If a tracer registers early in boot up (before scheduling is
1775 	 * initialized and such), then do not run its selftests yet.
1776 	 * Instead, run it a little later in the boot process.
1777 	 */
1778 	if (!selftests_can_run)
1779 		return save_selftest(type);
1780 
1781 	/*
1782 	 * Run a selftest on this tracer.
1783 	 * Here we reset the trace buffer, and set the current
1784 	 * tracer to be this tracer. The tracer can then run some
1785 	 * internal tracing to verify that everything is in order.
1786 	 * If we fail, we do not register this tracer.
1787 	 */
1788 	tracing_reset_online_cpus(&tr->array_buffer);
1789 
1790 	tr->current_trace = type;
1791 
1792 #ifdef CONFIG_TRACER_MAX_TRACE
1793 	if (type->use_max_tr) {
1794 		/* If we expanded the buffers, make sure the max is expanded too */
1795 		if (ring_buffer_expanded)
1796 			ring_buffer_resize(tr->max_buffer.buffer, trace_buf_size,
1797 					   RING_BUFFER_ALL_CPUS);
1798 		tr->allocated_snapshot = true;
1799 	}
1800 #endif
1801 
1802 	/* the test is responsible for initializing and enabling */
1803 	pr_info("Testing tracer %s: ", type->name);
1804 	ret = type->selftest(type, tr);
1805 	/* the test is responsible for resetting too */
1806 	tr->current_trace = saved_tracer;
1807 	if (ret) {
1808 		printk(KERN_CONT "FAILED!\n");
1809 		/* Add the warning after printing 'FAILED' */
1810 		WARN_ON(1);
1811 		return -1;
1812 	}
1813 	/* Only reset on passing, to avoid touching corrupted buffers */
1814 	tracing_reset_online_cpus(&tr->array_buffer);
1815 
1816 #ifdef CONFIG_TRACER_MAX_TRACE
1817 	if (type->use_max_tr) {
1818 		tr->allocated_snapshot = false;
1819 
1820 		/* Shrink the max buffer again */
1821 		if (ring_buffer_expanded)
1822 			ring_buffer_resize(tr->max_buffer.buffer, 1,
1823 					   RING_BUFFER_ALL_CPUS);
1824 	}
1825 #endif
1826 
1827 	printk(KERN_CONT "PASSED\n");
1828 	return 0;
1829 }
1830 
1831 static __init int init_trace_selftests(void)
1832 {
1833 	struct trace_selftests *p, *n;
1834 	struct tracer *t, **last;
1835 	int ret;
1836 
1837 	selftests_can_run = true;
1838 
1839 	mutex_lock(&trace_types_lock);
1840 
1841 	if (list_empty(&postponed_selftests))
1842 		goto out;
1843 
1844 	pr_info("Running postponed tracer tests:\n");
1845 
1846 	tracing_selftest_running = true;
1847 	list_for_each_entry_safe(p, n, &postponed_selftests, list) {
1848 		/* This loop can take minutes when sanitizers are enabled, so
1849 		 * lets make sure we allow RCU processing.
1850 		 */
1851 		cond_resched();
1852 		ret = run_tracer_selftest(p->type);
1853 		/* If the test fails, then warn and remove from available_tracers */
1854 		if (ret < 0) {
1855 			WARN(1, "tracer: %s failed selftest, disabling\n",
1856 			     p->type->name);
1857 			last = &trace_types;
1858 			for (t = trace_types; t; t = t->next) {
1859 				if (t == p->type) {
1860 					*last = t->next;
1861 					break;
1862 				}
1863 				last = &t->next;
1864 			}
1865 		}
1866 		list_del(&p->list);
1867 		kfree(p);
1868 	}
1869 	tracing_selftest_running = false;
1870 
1871  out:
1872 	mutex_unlock(&trace_types_lock);
1873 
1874 	return 0;
1875 }
1876 core_initcall(init_trace_selftests);
1877 #else
1878 static inline int run_tracer_selftest(struct tracer *type)
1879 {
1880 	return 0;
1881 }
1882 #endif /* CONFIG_FTRACE_STARTUP_TEST */
1883 
1884 static void add_tracer_options(struct trace_array *tr, struct tracer *t);
1885 
1886 static void __init apply_trace_boot_options(void);
1887 
1888 /**
1889  * register_tracer - register a tracer with the ftrace system.
1890  * @type: the plugin for the tracer
1891  *
1892  * Register a new plugin tracer.
1893  */
1894 int __init register_tracer(struct tracer *type)
1895 {
1896 	struct tracer *t;
1897 	int ret = 0;
1898 
1899 	if (!type->name) {
1900 		pr_info("Tracer must have a name\n");
1901 		return -1;
1902 	}
1903 
1904 	if (strlen(type->name) >= MAX_TRACER_SIZE) {
1905 		pr_info("Tracer has a name longer than %d\n", MAX_TRACER_SIZE);
1906 		return -1;
1907 	}
1908 
1909 	if (security_locked_down(LOCKDOWN_TRACEFS)) {
1910 		pr_warn("Can not register tracer %s due to lockdown\n",
1911 			   type->name);
1912 		return -EPERM;
1913 	}
1914 
1915 	mutex_lock(&trace_types_lock);
1916 
1917 	tracing_selftest_running = true;
1918 
1919 	for (t = trace_types; t; t = t->next) {
1920 		if (strcmp(type->name, t->name) == 0) {
1921 			/* already found */
1922 			pr_info("Tracer %s already registered\n",
1923 				type->name);
1924 			ret = -1;
1925 			goto out;
1926 		}
1927 	}
1928 
1929 	if (!type->set_flag)
1930 		type->set_flag = &dummy_set_flag;
1931 	if (!type->flags) {
1932 		/*allocate a dummy tracer_flags*/
1933 		type->flags = kmalloc(sizeof(*type->flags), GFP_KERNEL);
1934 		if (!type->flags) {
1935 			ret = -ENOMEM;
1936 			goto out;
1937 		}
1938 		type->flags->val = 0;
1939 		type->flags->opts = dummy_tracer_opt;
1940 	} else
1941 		if (!type->flags->opts)
1942 			type->flags->opts = dummy_tracer_opt;
1943 
1944 	/* store the tracer for __set_tracer_option */
1945 	type->flags->trace = type;
1946 
1947 	ret = run_tracer_selftest(type);
1948 	if (ret < 0)
1949 		goto out;
1950 
1951 	type->next = trace_types;
1952 	trace_types = type;
1953 	add_tracer_options(&global_trace, type);
1954 
1955  out:
1956 	tracing_selftest_running = false;
1957 	mutex_unlock(&trace_types_lock);
1958 
1959 	if (ret || !default_bootup_tracer)
1960 		goto out_unlock;
1961 
1962 	if (strncmp(default_bootup_tracer, type->name, MAX_TRACER_SIZE))
1963 		goto out_unlock;
1964 
1965 	printk(KERN_INFO "Starting tracer '%s'\n", type->name);
1966 	/* Do we want this tracer to start on bootup? */
1967 	tracing_set_tracer(&global_trace, type->name);
1968 	default_bootup_tracer = NULL;
1969 
1970 	apply_trace_boot_options();
1971 
1972 	/* disable other selftests, since this will break it. */
1973 	tracing_selftest_disabled = true;
1974 #ifdef CONFIG_FTRACE_STARTUP_TEST
1975 	printk(KERN_INFO "Disabling FTRACE selftests due to running tracer '%s'\n",
1976 	       type->name);
1977 #endif
1978 
1979  out_unlock:
1980 	return ret;
1981 }
1982 
1983 static void tracing_reset_cpu(struct array_buffer *buf, int cpu)
1984 {
1985 	struct trace_buffer *buffer = buf->buffer;
1986 
1987 	if (!buffer)
1988 		return;
1989 
1990 	ring_buffer_record_disable(buffer);
1991 
1992 	/* Make sure all commits have finished */
1993 	synchronize_rcu();
1994 	ring_buffer_reset_cpu(buffer, cpu);
1995 
1996 	ring_buffer_record_enable(buffer);
1997 }
1998 
1999 void tracing_reset_online_cpus(struct array_buffer *buf)
2000 {
2001 	struct trace_buffer *buffer = buf->buffer;
2002 	int cpu;
2003 
2004 	if (!buffer)
2005 		return;
2006 
2007 	ring_buffer_record_disable(buffer);
2008 
2009 	/* Make sure all commits have finished */
2010 	synchronize_rcu();
2011 
2012 	buf->time_start = buffer_ftrace_now(buf, buf->cpu);
2013 
2014 	for_each_online_cpu(cpu)
2015 		ring_buffer_reset_cpu(buffer, cpu);
2016 
2017 	ring_buffer_record_enable(buffer);
2018 }
2019 
2020 /* Must have trace_types_lock held */
2021 void tracing_reset_all_online_cpus(void)
2022 {
2023 	struct trace_array *tr;
2024 
2025 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
2026 		if (!tr->clear_trace)
2027 			continue;
2028 		tr->clear_trace = false;
2029 		tracing_reset_online_cpus(&tr->array_buffer);
2030 #ifdef CONFIG_TRACER_MAX_TRACE
2031 		tracing_reset_online_cpus(&tr->max_buffer);
2032 #endif
2033 	}
2034 }
2035 
2036 static int *tgid_map;
2037 
2038 #define SAVED_CMDLINES_DEFAULT 128
2039 #define NO_CMDLINE_MAP UINT_MAX
2040 static arch_spinlock_t trace_cmdline_lock = __ARCH_SPIN_LOCK_UNLOCKED;
2041 struct saved_cmdlines_buffer {
2042 	unsigned map_pid_to_cmdline[PID_MAX_DEFAULT+1];
2043 	unsigned *map_cmdline_to_pid;
2044 	unsigned cmdline_num;
2045 	int cmdline_idx;
2046 	char *saved_cmdlines;
2047 };
2048 static struct saved_cmdlines_buffer *savedcmd;
2049 
2050 /* temporary disable recording */
2051 static atomic_t trace_record_taskinfo_disabled __read_mostly;
2052 
2053 static inline char *get_saved_cmdlines(int idx)
2054 {
2055 	return &savedcmd->saved_cmdlines[idx * TASK_COMM_LEN];
2056 }
2057 
2058 static inline void set_cmdline(int idx, const char *cmdline)
2059 {
2060 	strncpy(get_saved_cmdlines(idx), cmdline, TASK_COMM_LEN);
2061 }
2062 
2063 static int allocate_cmdlines_buffer(unsigned int val,
2064 				    struct saved_cmdlines_buffer *s)
2065 {
2066 	s->map_cmdline_to_pid = kmalloc_array(val,
2067 					      sizeof(*s->map_cmdline_to_pid),
2068 					      GFP_KERNEL);
2069 	if (!s->map_cmdline_to_pid)
2070 		return -ENOMEM;
2071 
2072 	s->saved_cmdlines = kmalloc_array(TASK_COMM_LEN, val, GFP_KERNEL);
2073 	if (!s->saved_cmdlines) {
2074 		kfree(s->map_cmdline_to_pid);
2075 		return -ENOMEM;
2076 	}
2077 
2078 	s->cmdline_idx = 0;
2079 	s->cmdline_num = val;
2080 	memset(&s->map_pid_to_cmdline, NO_CMDLINE_MAP,
2081 	       sizeof(s->map_pid_to_cmdline));
2082 	memset(s->map_cmdline_to_pid, NO_CMDLINE_MAP,
2083 	       val * sizeof(*s->map_cmdline_to_pid));
2084 
2085 	return 0;
2086 }
2087 
2088 static int trace_create_savedcmd(void)
2089 {
2090 	int ret;
2091 
2092 	savedcmd = kmalloc(sizeof(*savedcmd), GFP_KERNEL);
2093 	if (!savedcmd)
2094 		return -ENOMEM;
2095 
2096 	ret = allocate_cmdlines_buffer(SAVED_CMDLINES_DEFAULT, savedcmd);
2097 	if (ret < 0) {
2098 		kfree(savedcmd);
2099 		savedcmd = NULL;
2100 		return -ENOMEM;
2101 	}
2102 
2103 	return 0;
2104 }
2105 
2106 int is_tracing_stopped(void)
2107 {
2108 	return global_trace.stop_count;
2109 }
2110 
2111 /**
2112  * tracing_start - quick start of the tracer
2113  *
2114  * If tracing is enabled but was stopped by tracing_stop,
2115  * this will start the tracer back up.
2116  */
2117 void tracing_start(void)
2118 {
2119 	struct trace_buffer *buffer;
2120 	unsigned long flags;
2121 
2122 	if (tracing_disabled)
2123 		return;
2124 
2125 	raw_spin_lock_irqsave(&global_trace.start_lock, flags);
2126 	if (--global_trace.stop_count) {
2127 		if (global_trace.stop_count < 0) {
2128 			/* Someone screwed up their debugging */
2129 			WARN_ON_ONCE(1);
2130 			global_trace.stop_count = 0;
2131 		}
2132 		goto out;
2133 	}
2134 
2135 	/* Prevent the buffers from switching */
2136 	arch_spin_lock(&global_trace.max_lock);
2137 
2138 	buffer = global_trace.array_buffer.buffer;
2139 	if (buffer)
2140 		ring_buffer_record_enable(buffer);
2141 
2142 #ifdef CONFIG_TRACER_MAX_TRACE
2143 	buffer = global_trace.max_buffer.buffer;
2144 	if (buffer)
2145 		ring_buffer_record_enable(buffer);
2146 #endif
2147 
2148 	arch_spin_unlock(&global_trace.max_lock);
2149 
2150  out:
2151 	raw_spin_unlock_irqrestore(&global_trace.start_lock, flags);
2152 }
2153 
2154 static void tracing_start_tr(struct trace_array *tr)
2155 {
2156 	struct trace_buffer *buffer;
2157 	unsigned long flags;
2158 
2159 	if (tracing_disabled)
2160 		return;
2161 
2162 	/* If global, we need to also start the max tracer */
2163 	if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
2164 		return tracing_start();
2165 
2166 	raw_spin_lock_irqsave(&tr->start_lock, flags);
2167 
2168 	if (--tr->stop_count) {
2169 		if (tr->stop_count < 0) {
2170 			/* Someone screwed up their debugging */
2171 			WARN_ON_ONCE(1);
2172 			tr->stop_count = 0;
2173 		}
2174 		goto out;
2175 	}
2176 
2177 	buffer = tr->array_buffer.buffer;
2178 	if (buffer)
2179 		ring_buffer_record_enable(buffer);
2180 
2181  out:
2182 	raw_spin_unlock_irqrestore(&tr->start_lock, flags);
2183 }
2184 
2185 /**
2186  * tracing_stop - quick stop of the tracer
2187  *
2188  * Light weight way to stop tracing. Use in conjunction with
2189  * tracing_start.
2190  */
2191 void tracing_stop(void)
2192 {
2193 	struct trace_buffer *buffer;
2194 	unsigned long flags;
2195 
2196 	raw_spin_lock_irqsave(&global_trace.start_lock, flags);
2197 	if (global_trace.stop_count++)
2198 		goto out;
2199 
2200 	/* Prevent the buffers from switching */
2201 	arch_spin_lock(&global_trace.max_lock);
2202 
2203 	buffer = global_trace.array_buffer.buffer;
2204 	if (buffer)
2205 		ring_buffer_record_disable(buffer);
2206 
2207 #ifdef CONFIG_TRACER_MAX_TRACE
2208 	buffer = global_trace.max_buffer.buffer;
2209 	if (buffer)
2210 		ring_buffer_record_disable(buffer);
2211 #endif
2212 
2213 	arch_spin_unlock(&global_trace.max_lock);
2214 
2215  out:
2216 	raw_spin_unlock_irqrestore(&global_trace.start_lock, flags);
2217 }
2218 
2219 static void tracing_stop_tr(struct trace_array *tr)
2220 {
2221 	struct trace_buffer *buffer;
2222 	unsigned long flags;
2223 
2224 	/* If global, we need to also stop the max tracer */
2225 	if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
2226 		return tracing_stop();
2227 
2228 	raw_spin_lock_irqsave(&tr->start_lock, flags);
2229 	if (tr->stop_count++)
2230 		goto out;
2231 
2232 	buffer = tr->array_buffer.buffer;
2233 	if (buffer)
2234 		ring_buffer_record_disable(buffer);
2235 
2236  out:
2237 	raw_spin_unlock_irqrestore(&tr->start_lock, flags);
2238 }
2239 
2240 static int trace_save_cmdline(struct task_struct *tsk)
2241 {
2242 	unsigned pid, idx;
2243 
2244 	/* treat recording of idle task as a success */
2245 	if (!tsk->pid)
2246 		return 1;
2247 
2248 	if (unlikely(tsk->pid > PID_MAX_DEFAULT))
2249 		return 0;
2250 
2251 	/*
2252 	 * It's not the end of the world if we don't get
2253 	 * the lock, but we also don't want to spin
2254 	 * nor do we want to disable interrupts,
2255 	 * so if we miss here, then better luck next time.
2256 	 */
2257 	if (!arch_spin_trylock(&trace_cmdline_lock))
2258 		return 0;
2259 
2260 	idx = savedcmd->map_pid_to_cmdline[tsk->pid];
2261 	if (idx == NO_CMDLINE_MAP) {
2262 		idx = (savedcmd->cmdline_idx + 1) % savedcmd->cmdline_num;
2263 
2264 		/*
2265 		 * Check whether the cmdline buffer at idx has a pid
2266 		 * mapped. We are going to overwrite that entry so we
2267 		 * need to clear the map_pid_to_cmdline. Otherwise we
2268 		 * would read the new comm for the old pid.
2269 		 */
2270 		pid = savedcmd->map_cmdline_to_pid[idx];
2271 		if (pid != NO_CMDLINE_MAP)
2272 			savedcmd->map_pid_to_cmdline[pid] = NO_CMDLINE_MAP;
2273 
2274 		savedcmd->map_cmdline_to_pid[idx] = tsk->pid;
2275 		savedcmd->map_pid_to_cmdline[tsk->pid] = idx;
2276 
2277 		savedcmd->cmdline_idx = idx;
2278 	}
2279 
2280 	set_cmdline(idx, tsk->comm);
2281 
2282 	arch_spin_unlock(&trace_cmdline_lock);
2283 
2284 	return 1;
2285 }
2286 
2287 static void __trace_find_cmdline(int pid, char comm[])
2288 {
2289 	unsigned map;
2290 
2291 	if (!pid) {
2292 		strcpy(comm, "<idle>");
2293 		return;
2294 	}
2295 
2296 	if (WARN_ON_ONCE(pid < 0)) {
2297 		strcpy(comm, "<XXX>");
2298 		return;
2299 	}
2300 
2301 	if (pid > PID_MAX_DEFAULT) {
2302 		strcpy(comm, "<...>");
2303 		return;
2304 	}
2305 
2306 	map = savedcmd->map_pid_to_cmdline[pid];
2307 	if (map != NO_CMDLINE_MAP)
2308 		strlcpy(comm, get_saved_cmdlines(map), TASK_COMM_LEN);
2309 	else
2310 		strcpy(comm, "<...>");
2311 }
2312 
2313 void trace_find_cmdline(int pid, char comm[])
2314 {
2315 	preempt_disable();
2316 	arch_spin_lock(&trace_cmdline_lock);
2317 
2318 	__trace_find_cmdline(pid, comm);
2319 
2320 	arch_spin_unlock(&trace_cmdline_lock);
2321 	preempt_enable();
2322 }
2323 
2324 int trace_find_tgid(int pid)
2325 {
2326 	if (unlikely(!tgid_map || !pid || pid > PID_MAX_DEFAULT))
2327 		return 0;
2328 
2329 	return tgid_map[pid];
2330 }
2331 
2332 static int trace_save_tgid(struct task_struct *tsk)
2333 {
2334 	/* treat recording of idle task as a success */
2335 	if (!tsk->pid)
2336 		return 1;
2337 
2338 	if (unlikely(!tgid_map || tsk->pid > PID_MAX_DEFAULT))
2339 		return 0;
2340 
2341 	tgid_map[tsk->pid] = tsk->tgid;
2342 	return 1;
2343 }
2344 
2345 static bool tracing_record_taskinfo_skip(int flags)
2346 {
2347 	if (unlikely(!(flags & (TRACE_RECORD_CMDLINE | TRACE_RECORD_TGID))))
2348 		return true;
2349 	if (atomic_read(&trace_record_taskinfo_disabled) || !tracing_is_on())
2350 		return true;
2351 	if (!__this_cpu_read(trace_taskinfo_save))
2352 		return true;
2353 	return false;
2354 }
2355 
2356 /**
2357  * tracing_record_taskinfo - record the task info of a task
2358  *
2359  * @task:  task to record
2360  * @flags: TRACE_RECORD_CMDLINE for recording comm
2361  *         TRACE_RECORD_TGID for recording tgid
2362  */
2363 void tracing_record_taskinfo(struct task_struct *task, int flags)
2364 {
2365 	bool done;
2366 
2367 	if (tracing_record_taskinfo_skip(flags))
2368 		return;
2369 
2370 	/*
2371 	 * Record as much task information as possible. If some fail, continue
2372 	 * to try to record the others.
2373 	 */
2374 	done = !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(task);
2375 	done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(task);
2376 
2377 	/* If recording any information failed, retry again soon. */
2378 	if (!done)
2379 		return;
2380 
2381 	__this_cpu_write(trace_taskinfo_save, false);
2382 }
2383 
2384 /**
2385  * tracing_record_taskinfo_sched_switch - record task info for sched_switch
2386  *
2387  * @prev: previous task during sched_switch
2388  * @next: next task during sched_switch
2389  * @flags: TRACE_RECORD_CMDLINE for recording comm
2390  *         TRACE_RECORD_TGID for recording tgid
2391  */
2392 void tracing_record_taskinfo_sched_switch(struct task_struct *prev,
2393 					  struct task_struct *next, int flags)
2394 {
2395 	bool done;
2396 
2397 	if (tracing_record_taskinfo_skip(flags))
2398 		return;
2399 
2400 	/*
2401 	 * Record as much task information as possible. If some fail, continue
2402 	 * to try to record the others.
2403 	 */
2404 	done  = !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(prev);
2405 	done &= !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(next);
2406 	done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(prev);
2407 	done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(next);
2408 
2409 	/* If recording any information failed, retry again soon. */
2410 	if (!done)
2411 		return;
2412 
2413 	__this_cpu_write(trace_taskinfo_save, false);
2414 }
2415 
2416 /* Helpers to record a specific task information */
2417 void tracing_record_cmdline(struct task_struct *task)
2418 {
2419 	tracing_record_taskinfo(task, TRACE_RECORD_CMDLINE);
2420 }
2421 
2422 void tracing_record_tgid(struct task_struct *task)
2423 {
2424 	tracing_record_taskinfo(task, TRACE_RECORD_TGID);
2425 }
2426 
2427 /*
2428  * Several functions return TRACE_TYPE_PARTIAL_LINE if the trace_seq
2429  * overflowed, and TRACE_TYPE_HANDLED otherwise. This helper function
2430  * simplifies those functions and keeps them in sync.
2431  */
2432 enum print_line_t trace_handle_return(struct trace_seq *s)
2433 {
2434 	return trace_seq_has_overflowed(s) ?
2435 		TRACE_TYPE_PARTIAL_LINE : TRACE_TYPE_HANDLED;
2436 }
2437 EXPORT_SYMBOL_GPL(trace_handle_return);
2438 
2439 void
2440 tracing_generic_entry_update(struct trace_entry *entry, unsigned short type,
2441 			     unsigned long flags, int pc)
2442 {
2443 	struct task_struct *tsk = current;
2444 
2445 	entry->preempt_count		= pc & 0xff;
2446 	entry->pid			= (tsk) ? tsk->pid : 0;
2447 	entry->type			= type;
2448 	entry->flags =
2449 #ifdef CONFIG_TRACE_IRQFLAGS_SUPPORT
2450 		(irqs_disabled_flags(flags) ? TRACE_FLAG_IRQS_OFF : 0) |
2451 #else
2452 		TRACE_FLAG_IRQS_NOSUPPORT |
2453 #endif
2454 		((pc & NMI_MASK    ) ? TRACE_FLAG_NMI     : 0) |
2455 		((pc & HARDIRQ_MASK) ? TRACE_FLAG_HARDIRQ : 0) |
2456 		((pc & SOFTIRQ_OFFSET) ? TRACE_FLAG_SOFTIRQ : 0) |
2457 		(tif_need_resched() ? TRACE_FLAG_NEED_RESCHED : 0) |
2458 		(test_preempt_need_resched() ? TRACE_FLAG_PREEMPT_RESCHED : 0);
2459 }
2460 EXPORT_SYMBOL_GPL(tracing_generic_entry_update);
2461 
2462 struct ring_buffer_event *
2463 trace_buffer_lock_reserve(struct trace_buffer *buffer,
2464 			  int type,
2465 			  unsigned long len,
2466 			  unsigned long flags, int pc)
2467 {
2468 	return __trace_buffer_lock_reserve(buffer, type, len, flags, pc);
2469 }
2470 
2471 DEFINE_PER_CPU(struct ring_buffer_event *, trace_buffered_event);
2472 DEFINE_PER_CPU(int, trace_buffered_event_cnt);
2473 static int trace_buffered_event_ref;
2474 
2475 /**
2476  * trace_buffered_event_enable - enable buffering events
2477  *
2478  * When events are being filtered, it is quicker to use a temporary
2479  * buffer to write the event data into if there's a likely chance
2480  * that it will not be committed. The discard of the ring buffer
2481  * is not as fast as committing, and is much slower than copying
2482  * a commit.
2483  *
2484  * When an event is to be filtered, allocate per cpu buffers to
2485  * write the event data into, and if the event is filtered and discarded
2486  * it is simply dropped, otherwise, the entire data is to be committed
2487  * in one shot.
2488  */
2489 void trace_buffered_event_enable(void)
2490 {
2491 	struct ring_buffer_event *event;
2492 	struct page *page;
2493 	int cpu;
2494 
2495 	WARN_ON_ONCE(!mutex_is_locked(&event_mutex));
2496 
2497 	if (trace_buffered_event_ref++)
2498 		return;
2499 
2500 	for_each_tracing_cpu(cpu) {
2501 		page = alloc_pages_node(cpu_to_node(cpu),
2502 					GFP_KERNEL | __GFP_NORETRY, 0);
2503 		if (!page)
2504 			goto failed;
2505 
2506 		event = page_address(page);
2507 		memset(event, 0, sizeof(*event));
2508 
2509 		per_cpu(trace_buffered_event, cpu) = event;
2510 
2511 		preempt_disable();
2512 		if (cpu == smp_processor_id() &&
2513 		    this_cpu_read(trace_buffered_event) !=
2514 		    per_cpu(trace_buffered_event, cpu))
2515 			WARN_ON_ONCE(1);
2516 		preempt_enable();
2517 	}
2518 
2519 	return;
2520  failed:
2521 	trace_buffered_event_disable();
2522 }
2523 
2524 static void enable_trace_buffered_event(void *data)
2525 {
2526 	/* Probably not needed, but do it anyway */
2527 	smp_rmb();
2528 	this_cpu_dec(trace_buffered_event_cnt);
2529 }
2530 
2531 static void disable_trace_buffered_event(void *data)
2532 {
2533 	this_cpu_inc(trace_buffered_event_cnt);
2534 }
2535 
2536 /**
2537  * trace_buffered_event_disable - disable buffering events
2538  *
2539  * When a filter is removed, it is faster to not use the buffered
2540  * events, and to commit directly into the ring buffer. Free up
2541  * the temp buffers when there are no more users. This requires
2542  * special synchronization with current events.
2543  */
2544 void trace_buffered_event_disable(void)
2545 {
2546 	int cpu;
2547 
2548 	WARN_ON_ONCE(!mutex_is_locked(&event_mutex));
2549 
2550 	if (WARN_ON_ONCE(!trace_buffered_event_ref))
2551 		return;
2552 
2553 	if (--trace_buffered_event_ref)
2554 		return;
2555 
2556 	preempt_disable();
2557 	/* For each CPU, set the buffer as used. */
2558 	smp_call_function_many(tracing_buffer_mask,
2559 			       disable_trace_buffered_event, NULL, 1);
2560 	preempt_enable();
2561 
2562 	/* Wait for all current users to finish */
2563 	synchronize_rcu();
2564 
2565 	for_each_tracing_cpu(cpu) {
2566 		free_page((unsigned long)per_cpu(trace_buffered_event, cpu));
2567 		per_cpu(trace_buffered_event, cpu) = NULL;
2568 	}
2569 	/*
2570 	 * Make sure trace_buffered_event is NULL before clearing
2571 	 * trace_buffered_event_cnt.
2572 	 */
2573 	smp_wmb();
2574 
2575 	preempt_disable();
2576 	/* Do the work on each cpu */
2577 	smp_call_function_many(tracing_buffer_mask,
2578 			       enable_trace_buffered_event, NULL, 1);
2579 	preempt_enable();
2580 }
2581 
2582 static struct trace_buffer *temp_buffer;
2583 
2584 struct ring_buffer_event *
2585 trace_event_buffer_lock_reserve(struct trace_buffer **current_rb,
2586 			  struct trace_event_file *trace_file,
2587 			  int type, unsigned long len,
2588 			  unsigned long flags, int pc)
2589 {
2590 	struct ring_buffer_event *entry;
2591 	int val;
2592 
2593 	*current_rb = trace_file->tr->array_buffer.buffer;
2594 
2595 	if (!ring_buffer_time_stamp_abs(*current_rb) && (trace_file->flags &
2596 	     (EVENT_FILE_FL_SOFT_DISABLED | EVENT_FILE_FL_FILTERED)) &&
2597 	    (entry = this_cpu_read(trace_buffered_event))) {
2598 		/* Try to use the per cpu buffer first */
2599 		val = this_cpu_inc_return(trace_buffered_event_cnt);
2600 		if (val == 1) {
2601 			trace_event_setup(entry, type, flags, pc);
2602 			entry->array[0] = len;
2603 			return entry;
2604 		}
2605 		this_cpu_dec(trace_buffered_event_cnt);
2606 	}
2607 
2608 	entry = __trace_buffer_lock_reserve(*current_rb,
2609 					    type, len, flags, pc);
2610 	/*
2611 	 * If tracing is off, but we have triggers enabled
2612 	 * we still need to look at the event data. Use the temp_buffer
2613 	 * to store the trace event for the tigger to use. It's recusive
2614 	 * safe and will not be recorded anywhere.
2615 	 */
2616 	if (!entry && trace_file->flags & EVENT_FILE_FL_TRIGGER_COND) {
2617 		*current_rb = temp_buffer;
2618 		entry = __trace_buffer_lock_reserve(*current_rb,
2619 						    type, len, flags, pc);
2620 	}
2621 	return entry;
2622 }
2623 EXPORT_SYMBOL_GPL(trace_event_buffer_lock_reserve);
2624 
2625 static DEFINE_SPINLOCK(tracepoint_iter_lock);
2626 static DEFINE_MUTEX(tracepoint_printk_mutex);
2627 
2628 static void output_printk(struct trace_event_buffer *fbuffer)
2629 {
2630 	struct trace_event_call *event_call;
2631 	struct trace_event_file *file;
2632 	struct trace_event *event;
2633 	unsigned long flags;
2634 	struct trace_iterator *iter = tracepoint_print_iter;
2635 
2636 	/* We should never get here if iter is NULL */
2637 	if (WARN_ON_ONCE(!iter))
2638 		return;
2639 
2640 	event_call = fbuffer->trace_file->event_call;
2641 	if (!event_call || !event_call->event.funcs ||
2642 	    !event_call->event.funcs->trace)
2643 		return;
2644 
2645 	file = fbuffer->trace_file;
2646 	if (test_bit(EVENT_FILE_FL_SOFT_DISABLED_BIT, &file->flags) ||
2647 	    (unlikely(file->flags & EVENT_FILE_FL_FILTERED) &&
2648 	     !filter_match_preds(file->filter, fbuffer->entry)))
2649 		return;
2650 
2651 	event = &fbuffer->trace_file->event_call->event;
2652 
2653 	spin_lock_irqsave(&tracepoint_iter_lock, flags);
2654 	trace_seq_init(&iter->seq);
2655 	iter->ent = fbuffer->entry;
2656 	event_call->event.funcs->trace(iter, 0, event);
2657 	trace_seq_putc(&iter->seq, 0);
2658 	printk("%s", iter->seq.buffer);
2659 
2660 	spin_unlock_irqrestore(&tracepoint_iter_lock, flags);
2661 }
2662 
2663 int tracepoint_printk_sysctl(struct ctl_table *table, int write,
2664 			     void __user *buffer, size_t *lenp,
2665 			     loff_t *ppos)
2666 {
2667 	int save_tracepoint_printk;
2668 	int ret;
2669 
2670 	mutex_lock(&tracepoint_printk_mutex);
2671 	save_tracepoint_printk = tracepoint_printk;
2672 
2673 	ret = proc_dointvec(table, write, buffer, lenp, ppos);
2674 
2675 	/*
2676 	 * This will force exiting early, as tracepoint_printk
2677 	 * is always zero when tracepoint_printk_iter is not allocated
2678 	 */
2679 	if (!tracepoint_print_iter)
2680 		tracepoint_printk = 0;
2681 
2682 	if (save_tracepoint_printk == tracepoint_printk)
2683 		goto out;
2684 
2685 	if (tracepoint_printk)
2686 		static_key_enable(&tracepoint_printk_key.key);
2687 	else
2688 		static_key_disable(&tracepoint_printk_key.key);
2689 
2690  out:
2691 	mutex_unlock(&tracepoint_printk_mutex);
2692 
2693 	return ret;
2694 }
2695 
2696 void trace_event_buffer_commit(struct trace_event_buffer *fbuffer)
2697 {
2698 	if (static_key_false(&tracepoint_printk_key.key))
2699 		output_printk(fbuffer);
2700 
2701 	event_trigger_unlock_commit_regs(fbuffer->trace_file, fbuffer->buffer,
2702 				    fbuffer->event, fbuffer->entry,
2703 				    fbuffer->flags, fbuffer->pc, fbuffer->regs);
2704 }
2705 EXPORT_SYMBOL_GPL(trace_event_buffer_commit);
2706 
2707 /*
2708  * Skip 3:
2709  *
2710  *   trace_buffer_unlock_commit_regs()
2711  *   trace_event_buffer_commit()
2712  *   trace_event_raw_event_xxx()
2713  */
2714 # define STACK_SKIP 3
2715 
2716 void trace_buffer_unlock_commit_regs(struct trace_array *tr,
2717 				     struct trace_buffer *buffer,
2718 				     struct ring_buffer_event *event,
2719 				     unsigned long flags, int pc,
2720 				     struct pt_regs *regs)
2721 {
2722 	__buffer_unlock_commit(buffer, event);
2723 
2724 	/*
2725 	 * If regs is not set, then skip the necessary functions.
2726 	 * Note, we can still get here via blktrace, wakeup tracer
2727 	 * and mmiotrace, but that's ok if they lose a function or
2728 	 * two. They are not that meaningful.
2729 	 */
2730 	ftrace_trace_stack(tr, buffer, flags, regs ? 0 : STACK_SKIP, pc, regs);
2731 	ftrace_trace_userstack(buffer, flags, pc);
2732 }
2733 
2734 /*
2735  * Similar to trace_buffer_unlock_commit_regs() but do not dump stack.
2736  */
2737 void
2738 trace_buffer_unlock_commit_nostack(struct trace_buffer *buffer,
2739 				   struct ring_buffer_event *event)
2740 {
2741 	__buffer_unlock_commit(buffer, event);
2742 }
2743 
2744 static void
2745 trace_process_export(struct trace_export *export,
2746 	       struct ring_buffer_event *event)
2747 {
2748 	struct trace_entry *entry;
2749 	unsigned int size = 0;
2750 
2751 	entry = ring_buffer_event_data(event);
2752 	size = ring_buffer_event_length(event);
2753 	export->write(export, entry, size);
2754 }
2755 
2756 static DEFINE_MUTEX(ftrace_export_lock);
2757 
2758 static struct trace_export __rcu *ftrace_exports_list __read_mostly;
2759 
2760 static DEFINE_STATIC_KEY_FALSE(ftrace_exports_enabled);
2761 
2762 static inline void ftrace_exports_enable(void)
2763 {
2764 	static_branch_enable(&ftrace_exports_enabled);
2765 }
2766 
2767 static inline void ftrace_exports_disable(void)
2768 {
2769 	static_branch_disable(&ftrace_exports_enabled);
2770 }
2771 
2772 static void ftrace_exports(struct ring_buffer_event *event)
2773 {
2774 	struct trace_export *export;
2775 
2776 	preempt_disable_notrace();
2777 
2778 	export = rcu_dereference_raw_check(ftrace_exports_list);
2779 	while (export) {
2780 		trace_process_export(export, event);
2781 		export = rcu_dereference_raw_check(export->next);
2782 	}
2783 
2784 	preempt_enable_notrace();
2785 }
2786 
2787 static inline void
2788 add_trace_export(struct trace_export **list, struct trace_export *export)
2789 {
2790 	rcu_assign_pointer(export->next, *list);
2791 	/*
2792 	 * We are entering export into the list but another
2793 	 * CPU might be walking that list. We need to make sure
2794 	 * the export->next pointer is valid before another CPU sees
2795 	 * the export pointer included into the list.
2796 	 */
2797 	rcu_assign_pointer(*list, export);
2798 }
2799 
2800 static inline int
2801 rm_trace_export(struct trace_export **list, struct trace_export *export)
2802 {
2803 	struct trace_export **p;
2804 
2805 	for (p = list; *p != NULL; p = &(*p)->next)
2806 		if (*p == export)
2807 			break;
2808 
2809 	if (*p != export)
2810 		return -1;
2811 
2812 	rcu_assign_pointer(*p, (*p)->next);
2813 
2814 	return 0;
2815 }
2816 
2817 static inline void
2818 add_ftrace_export(struct trace_export **list, struct trace_export *export)
2819 {
2820 	if (*list == NULL)
2821 		ftrace_exports_enable();
2822 
2823 	add_trace_export(list, export);
2824 }
2825 
2826 static inline int
2827 rm_ftrace_export(struct trace_export **list, struct trace_export *export)
2828 {
2829 	int ret;
2830 
2831 	ret = rm_trace_export(list, export);
2832 	if (*list == NULL)
2833 		ftrace_exports_disable();
2834 
2835 	return ret;
2836 }
2837 
2838 int register_ftrace_export(struct trace_export *export)
2839 {
2840 	if (WARN_ON_ONCE(!export->write))
2841 		return -1;
2842 
2843 	mutex_lock(&ftrace_export_lock);
2844 
2845 	add_ftrace_export(&ftrace_exports_list, export);
2846 
2847 	mutex_unlock(&ftrace_export_lock);
2848 
2849 	return 0;
2850 }
2851 EXPORT_SYMBOL_GPL(register_ftrace_export);
2852 
2853 int unregister_ftrace_export(struct trace_export *export)
2854 {
2855 	int ret;
2856 
2857 	mutex_lock(&ftrace_export_lock);
2858 
2859 	ret = rm_ftrace_export(&ftrace_exports_list, export);
2860 
2861 	mutex_unlock(&ftrace_export_lock);
2862 
2863 	return ret;
2864 }
2865 EXPORT_SYMBOL_GPL(unregister_ftrace_export);
2866 
2867 void
2868 trace_function(struct trace_array *tr,
2869 	       unsigned long ip, unsigned long parent_ip, unsigned long flags,
2870 	       int pc)
2871 {
2872 	struct trace_event_call *call = &event_function;
2873 	struct trace_buffer *buffer = tr->array_buffer.buffer;
2874 	struct ring_buffer_event *event;
2875 	struct ftrace_entry *entry;
2876 
2877 	event = __trace_buffer_lock_reserve(buffer, TRACE_FN, sizeof(*entry),
2878 					    flags, pc);
2879 	if (!event)
2880 		return;
2881 	entry	= ring_buffer_event_data(event);
2882 	entry->ip			= ip;
2883 	entry->parent_ip		= parent_ip;
2884 
2885 	if (!call_filter_check_discard(call, entry, buffer, event)) {
2886 		if (static_branch_unlikely(&ftrace_exports_enabled))
2887 			ftrace_exports(event);
2888 		__buffer_unlock_commit(buffer, event);
2889 	}
2890 }
2891 
2892 #ifdef CONFIG_STACKTRACE
2893 
2894 /* Allow 4 levels of nesting: normal, softirq, irq, NMI */
2895 #define FTRACE_KSTACK_NESTING	4
2896 
2897 #define FTRACE_KSTACK_ENTRIES	(PAGE_SIZE / FTRACE_KSTACK_NESTING)
2898 
2899 struct ftrace_stack {
2900 	unsigned long		calls[FTRACE_KSTACK_ENTRIES];
2901 };
2902 
2903 
2904 struct ftrace_stacks {
2905 	struct ftrace_stack	stacks[FTRACE_KSTACK_NESTING];
2906 };
2907 
2908 static DEFINE_PER_CPU(struct ftrace_stacks, ftrace_stacks);
2909 static DEFINE_PER_CPU(int, ftrace_stack_reserve);
2910 
2911 static void __ftrace_trace_stack(struct trace_buffer *buffer,
2912 				 unsigned long flags,
2913 				 int skip, int pc, struct pt_regs *regs)
2914 {
2915 	struct trace_event_call *call = &event_kernel_stack;
2916 	struct ring_buffer_event *event;
2917 	unsigned int size, nr_entries;
2918 	struct ftrace_stack *fstack;
2919 	struct stack_entry *entry;
2920 	int stackidx;
2921 
2922 	/*
2923 	 * Add one, for this function and the call to save_stack_trace()
2924 	 * If regs is set, then these functions will not be in the way.
2925 	 */
2926 #ifndef CONFIG_UNWINDER_ORC
2927 	if (!regs)
2928 		skip++;
2929 #endif
2930 
2931 	/*
2932 	 * Since events can happen in NMIs there's no safe way to
2933 	 * use the per cpu ftrace_stacks. We reserve it and if an interrupt
2934 	 * or NMI comes in, it will just have to use the default
2935 	 * FTRACE_STACK_SIZE.
2936 	 */
2937 	preempt_disable_notrace();
2938 
2939 	stackidx = __this_cpu_inc_return(ftrace_stack_reserve) - 1;
2940 
2941 	/* This should never happen. If it does, yell once and skip */
2942 	if (WARN_ON_ONCE(stackidx > FTRACE_KSTACK_NESTING))
2943 		goto out;
2944 
2945 	/*
2946 	 * The above __this_cpu_inc_return() is 'atomic' cpu local. An
2947 	 * interrupt will either see the value pre increment or post
2948 	 * increment. If the interrupt happens pre increment it will have
2949 	 * restored the counter when it returns.  We just need a barrier to
2950 	 * keep gcc from moving things around.
2951 	 */
2952 	barrier();
2953 
2954 	fstack = this_cpu_ptr(ftrace_stacks.stacks) + stackidx;
2955 	size = ARRAY_SIZE(fstack->calls);
2956 
2957 	if (regs) {
2958 		nr_entries = stack_trace_save_regs(regs, fstack->calls,
2959 						   size, skip);
2960 	} else {
2961 		nr_entries = stack_trace_save(fstack->calls, size, skip);
2962 	}
2963 
2964 	size = nr_entries * sizeof(unsigned long);
2965 	event = __trace_buffer_lock_reserve(buffer, TRACE_STACK,
2966 					    sizeof(*entry) + size, flags, pc);
2967 	if (!event)
2968 		goto out;
2969 	entry = ring_buffer_event_data(event);
2970 
2971 	memcpy(&entry->caller, fstack->calls, size);
2972 	entry->size = nr_entries;
2973 
2974 	if (!call_filter_check_discard(call, entry, buffer, event))
2975 		__buffer_unlock_commit(buffer, event);
2976 
2977  out:
2978 	/* Again, don't let gcc optimize things here */
2979 	barrier();
2980 	__this_cpu_dec(ftrace_stack_reserve);
2981 	preempt_enable_notrace();
2982 
2983 }
2984 
2985 static inline void ftrace_trace_stack(struct trace_array *tr,
2986 				      struct trace_buffer *buffer,
2987 				      unsigned long flags,
2988 				      int skip, int pc, struct pt_regs *regs)
2989 {
2990 	if (!(tr->trace_flags & TRACE_ITER_STACKTRACE))
2991 		return;
2992 
2993 	__ftrace_trace_stack(buffer, flags, skip, pc, regs);
2994 }
2995 
2996 void __trace_stack(struct trace_array *tr, unsigned long flags, int skip,
2997 		   int pc)
2998 {
2999 	struct trace_buffer *buffer = tr->array_buffer.buffer;
3000 
3001 	if (rcu_is_watching()) {
3002 		__ftrace_trace_stack(buffer, flags, skip, pc, NULL);
3003 		return;
3004 	}
3005 
3006 	/*
3007 	 * When an NMI triggers, RCU is enabled via rcu_nmi_enter(),
3008 	 * but if the above rcu_is_watching() failed, then the NMI
3009 	 * triggered someplace critical, and rcu_irq_enter() should
3010 	 * not be called from NMI.
3011 	 */
3012 	if (unlikely(in_nmi()))
3013 		return;
3014 
3015 	rcu_irq_enter_irqson();
3016 	__ftrace_trace_stack(buffer, flags, skip, pc, NULL);
3017 	rcu_irq_exit_irqson();
3018 }
3019 
3020 /**
3021  * trace_dump_stack - record a stack back trace in the trace buffer
3022  * @skip: Number of functions to skip (helper handlers)
3023  */
3024 void trace_dump_stack(int skip)
3025 {
3026 	unsigned long flags;
3027 
3028 	if (tracing_disabled || tracing_selftest_running)
3029 		return;
3030 
3031 	local_save_flags(flags);
3032 
3033 #ifndef CONFIG_UNWINDER_ORC
3034 	/* Skip 1 to skip this function. */
3035 	skip++;
3036 #endif
3037 	__ftrace_trace_stack(global_trace.array_buffer.buffer,
3038 			     flags, skip, preempt_count(), NULL);
3039 }
3040 EXPORT_SYMBOL_GPL(trace_dump_stack);
3041 
3042 #ifdef CONFIG_USER_STACKTRACE_SUPPORT
3043 static DEFINE_PER_CPU(int, user_stack_count);
3044 
3045 static void
3046 ftrace_trace_userstack(struct trace_buffer *buffer, unsigned long flags, int pc)
3047 {
3048 	struct trace_event_call *call = &event_user_stack;
3049 	struct ring_buffer_event *event;
3050 	struct userstack_entry *entry;
3051 
3052 	if (!(global_trace.trace_flags & TRACE_ITER_USERSTACKTRACE))
3053 		return;
3054 
3055 	/*
3056 	 * NMIs can not handle page faults, even with fix ups.
3057 	 * The save user stack can (and often does) fault.
3058 	 */
3059 	if (unlikely(in_nmi()))
3060 		return;
3061 
3062 	/*
3063 	 * prevent recursion, since the user stack tracing may
3064 	 * trigger other kernel events.
3065 	 */
3066 	preempt_disable();
3067 	if (__this_cpu_read(user_stack_count))
3068 		goto out;
3069 
3070 	__this_cpu_inc(user_stack_count);
3071 
3072 	event = __trace_buffer_lock_reserve(buffer, TRACE_USER_STACK,
3073 					    sizeof(*entry), flags, pc);
3074 	if (!event)
3075 		goto out_drop_count;
3076 	entry	= ring_buffer_event_data(event);
3077 
3078 	entry->tgid		= current->tgid;
3079 	memset(&entry->caller, 0, sizeof(entry->caller));
3080 
3081 	stack_trace_save_user(entry->caller, FTRACE_STACK_ENTRIES);
3082 	if (!call_filter_check_discard(call, entry, buffer, event))
3083 		__buffer_unlock_commit(buffer, event);
3084 
3085  out_drop_count:
3086 	__this_cpu_dec(user_stack_count);
3087  out:
3088 	preempt_enable();
3089 }
3090 #else /* CONFIG_USER_STACKTRACE_SUPPORT */
3091 static void ftrace_trace_userstack(struct trace_buffer *buffer,
3092 				   unsigned long flags, int pc)
3093 {
3094 }
3095 #endif /* !CONFIG_USER_STACKTRACE_SUPPORT */
3096 
3097 #endif /* CONFIG_STACKTRACE */
3098 
3099 /* created for use with alloc_percpu */
3100 struct trace_buffer_struct {
3101 	int nesting;
3102 	char buffer[4][TRACE_BUF_SIZE];
3103 };
3104 
3105 static struct trace_buffer_struct *trace_percpu_buffer;
3106 
3107 /*
3108  * Thise allows for lockless recording.  If we're nested too deeply, then
3109  * this returns NULL.
3110  */
3111 static char *get_trace_buf(void)
3112 {
3113 	struct trace_buffer_struct *buffer = this_cpu_ptr(trace_percpu_buffer);
3114 
3115 	if (!buffer || buffer->nesting >= 4)
3116 		return NULL;
3117 
3118 	buffer->nesting++;
3119 
3120 	/* Interrupts must see nesting incremented before we use the buffer */
3121 	barrier();
3122 	return &buffer->buffer[buffer->nesting][0];
3123 }
3124 
3125 static void put_trace_buf(void)
3126 {
3127 	/* Don't let the decrement of nesting leak before this */
3128 	barrier();
3129 	this_cpu_dec(trace_percpu_buffer->nesting);
3130 }
3131 
3132 static int alloc_percpu_trace_buffer(void)
3133 {
3134 	struct trace_buffer_struct *buffers;
3135 
3136 	buffers = alloc_percpu(struct trace_buffer_struct);
3137 	if (MEM_FAIL(!buffers, "Could not allocate percpu trace_printk buffer"))
3138 		return -ENOMEM;
3139 
3140 	trace_percpu_buffer = buffers;
3141 	return 0;
3142 }
3143 
3144 static int buffers_allocated;
3145 
3146 void trace_printk_init_buffers(void)
3147 {
3148 	if (buffers_allocated)
3149 		return;
3150 
3151 	if (alloc_percpu_trace_buffer())
3152 		return;
3153 
3154 	/* trace_printk() is for debug use only. Don't use it in production. */
3155 
3156 	pr_warn("\n");
3157 	pr_warn("**********************************************************\n");
3158 	pr_warn("**   NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE   **\n");
3159 	pr_warn("**                                                      **\n");
3160 	pr_warn("** trace_printk() being used. Allocating extra memory.  **\n");
3161 	pr_warn("**                                                      **\n");
3162 	pr_warn("** This means that this is a DEBUG kernel and it is     **\n");
3163 	pr_warn("** unsafe for production use.                           **\n");
3164 	pr_warn("**                                                      **\n");
3165 	pr_warn("** If you see this message and you are not debugging    **\n");
3166 	pr_warn("** the kernel, report this immediately to your vendor!  **\n");
3167 	pr_warn("**                                                      **\n");
3168 	pr_warn("**   NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE   **\n");
3169 	pr_warn("**********************************************************\n");
3170 
3171 	/* Expand the buffers to set size */
3172 	tracing_update_buffers();
3173 
3174 	buffers_allocated = 1;
3175 
3176 	/*
3177 	 * trace_printk_init_buffers() can be called by modules.
3178 	 * If that happens, then we need to start cmdline recording
3179 	 * directly here. If the global_trace.buffer is already
3180 	 * allocated here, then this was called by module code.
3181 	 */
3182 	if (global_trace.array_buffer.buffer)
3183 		tracing_start_cmdline_record();
3184 }
3185 EXPORT_SYMBOL_GPL(trace_printk_init_buffers);
3186 
3187 void trace_printk_start_comm(void)
3188 {
3189 	/* Start tracing comms if trace printk is set */
3190 	if (!buffers_allocated)
3191 		return;
3192 	tracing_start_cmdline_record();
3193 }
3194 
3195 static void trace_printk_start_stop_comm(int enabled)
3196 {
3197 	if (!buffers_allocated)
3198 		return;
3199 
3200 	if (enabled)
3201 		tracing_start_cmdline_record();
3202 	else
3203 		tracing_stop_cmdline_record();
3204 }
3205 
3206 /**
3207  * trace_vbprintk - write binary msg to tracing buffer
3208  * @ip:    The address of the caller
3209  * @fmt:   The string format to write to the buffer
3210  * @args:  Arguments for @fmt
3211  */
3212 int trace_vbprintk(unsigned long ip, const char *fmt, va_list args)
3213 {
3214 	struct trace_event_call *call = &event_bprint;
3215 	struct ring_buffer_event *event;
3216 	struct trace_buffer *buffer;
3217 	struct trace_array *tr = &global_trace;
3218 	struct bprint_entry *entry;
3219 	unsigned long flags;
3220 	char *tbuffer;
3221 	int len = 0, size, pc;
3222 
3223 	if (unlikely(tracing_selftest_running || tracing_disabled))
3224 		return 0;
3225 
3226 	/* Don't pollute graph traces with trace_vprintk internals */
3227 	pause_graph_tracing();
3228 
3229 	pc = preempt_count();
3230 	preempt_disable_notrace();
3231 
3232 	tbuffer = get_trace_buf();
3233 	if (!tbuffer) {
3234 		len = 0;
3235 		goto out_nobuffer;
3236 	}
3237 
3238 	len = vbin_printf((u32 *)tbuffer, TRACE_BUF_SIZE/sizeof(int), fmt, args);
3239 
3240 	if (len > TRACE_BUF_SIZE/sizeof(int) || len < 0)
3241 		goto out_put;
3242 
3243 	local_save_flags(flags);
3244 	size = sizeof(*entry) + sizeof(u32) * len;
3245 	buffer = tr->array_buffer.buffer;
3246 	ring_buffer_nest_start(buffer);
3247 	event = __trace_buffer_lock_reserve(buffer, TRACE_BPRINT, size,
3248 					    flags, pc);
3249 	if (!event)
3250 		goto out;
3251 	entry = ring_buffer_event_data(event);
3252 	entry->ip			= ip;
3253 	entry->fmt			= fmt;
3254 
3255 	memcpy(entry->buf, tbuffer, sizeof(u32) * len);
3256 	if (!call_filter_check_discard(call, entry, buffer, event)) {
3257 		__buffer_unlock_commit(buffer, event);
3258 		ftrace_trace_stack(tr, buffer, flags, 6, pc, NULL);
3259 	}
3260 
3261 out:
3262 	ring_buffer_nest_end(buffer);
3263 out_put:
3264 	put_trace_buf();
3265 
3266 out_nobuffer:
3267 	preempt_enable_notrace();
3268 	unpause_graph_tracing();
3269 
3270 	return len;
3271 }
3272 EXPORT_SYMBOL_GPL(trace_vbprintk);
3273 
3274 __printf(3, 0)
3275 static int
3276 __trace_array_vprintk(struct trace_buffer *buffer,
3277 		      unsigned long ip, const char *fmt, va_list args)
3278 {
3279 	struct trace_event_call *call = &event_print;
3280 	struct ring_buffer_event *event;
3281 	int len = 0, size, pc;
3282 	struct print_entry *entry;
3283 	unsigned long flags;
3284 	char *tbuffer;
3285 
3286 	if (tracing_disabled || tracing_selftest_running)
3287 		return 0;
3288 
3289 	/* Don't pollute graph traces with trace_vprintk internals */
3290 	pause_graph_tracing();
3291 
3292 	pc = preempt_count();
3293 	preempt_disable_notrace();
3294 
3295 
3296 	tbuffer = get_trace_buf();
3297 	if (!tbuffer) {
3298 		len = 0;
3299 		goto out_nobuffer;
3300 	}
3301 
3302 	len = vscnprintf(tbuffer, TRACE_BUF_SIZE, fmt, args);
3303 
3304 	local_save_flags(flags);
3305 	size = sizeof(*entry) + len + 1;
3306 	ring_buffer_nest_start(buffer);
3307 	event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
3308 					    flags, pc);
3309 	if (!event)
3310 		goto out;
3311 	entry = ring_buffer_event_data(event);
3312 	entry->ip = ip;
3313 
3314 	memcpy(&entry->buf, tbuffer, len + 1);
3315 	if (!call_filter_check_discard(call, entry, buffer, event)) {
3316 		__buffer_unlock_commit(buffer, event);
3317 		ftrace_trace_stack(&global_trace, buffer, flags, 6, pc, NULL);
3318 	}
3319 
3320 out:
3321 	ring_buffer_nest_end(buffer);
3322 	put_trace_buf();
3323 
3324 out_nobuffer:
3325 	preempt_enable_notrace();
3326 	unpause_graph_tracing();
3327 
3328 	return len;
3329 }
3330 
3331 __printf(3, 0)
3332 int trace_array_vprintk(struct trace_array *tr,
3333 			unsigned long ip, const char *fmt, va_list args)
3334 {
3335 	return __trace_array_vprintk(tr->array_buffer.buffer, ip, fmt, args);
3336 }
3337 
3338 __printf(3, 0)
3339 int trace_array_printk(struct trace_array *tr,
3340 		       unsigned long ip, const char *fmt, ...)
3341 {
3342 	int ret;
3343 	va_list ap;
3344 
3345 	if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
3346 		return 0;
3347 
3348 	if (!tr)
3349 		return -ENOENT;
3350 
3351 	va_start(ap, fmt);
3352 	ret = trace_array_vprintk(tr, ip, fmt, ap);
3353 	va_end(ap);
3354 	return ret;
3355 }
3356 EXPORT_SYMBOL_GPL(trace_array_printk);
3357 
3358 __printf(3, 4)
3359 int trace_array_printk_buf(struct trace_buffer *buffer,
3360 			   unsigned long ip, const char *fmt, ...)
3361 {
3362 	int ret;
3363 	va_list ap;
3364 
3365 	if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
3366 		return 0;
3367 
3368 	va_start(ap, fmt);
3369 	ret = __trace_array_vprintk(buffer, ip, fmt, ap);
3370 	va_end(ap);
3371 	return ret;
3372 }
3373 
3374 __printf(2, 0)
3375 int trace_vprintk(unsigned long ip, const char *fmt, va_list args)
3376 {
3377 	return trace_array_vprintk(&global_trace, ip, fmt, args);
3378 }
3379 EXPORT_SYMBOL_GPL(trace_vprintk);
3380 
3381 static void trace_iterator_increment(struct trace_iterator *iter)
3382 {
3383 	struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, iter->cpu);
3384 
3385 	iter->idx++;
3386 	if (buf_iter)
3387 		ring_buffer_iter_advance(buf_iter);
3388 }
3389 
3390 static struct trace_entry *
3391 peek_next_entry(struct trace_iterator *iter, int cpu, u64 *ts,
3392 		unsigned long *lost_events)
3393 {
3394 	struct ring_buffer_event *event;
3395 	struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, cpu);
3396 
3397 	if (buf_iter) {
3398 		event = ring_buffer_iter_peek(buf_iter, ts);
3399 		if (lost_events)
3400 			*lost_events = ring_buffer_iter_dropped(buf_iter) ?
3401 				(unsigned long)-1 : 0;
3402 	} else {
3403 		event = ring_buffer_peek(iter->array_buffer->buffer, cpu, ts,
3404 					 lost_events);
3405 	}
3406 
3407 	if (event) {
3408 		iter->ent_size = ring_buffer_event_length(event);
3409 		return ring_buffer_event_data(event);
3410 	}
3411 	iter->ent_size = 0;
3412 	return NULL;
3413 }
3414 
3415 static struct trace_entry *
3416 __find_next_entry(struct trace_iterator *iter, int *ent_cpu,
3417 		  unsigned long *missing_events, u64 *ent_ts)
3418 {
3419 	struct trace_buffer *buffer = iter->array_buffer->buffer;
3420 	struct trace_entry *ent, *next = NULL;
3421 	unsigned long lost_events = 0, next_lost = 0;
3422 	int cpu_file = iter->cpu_file;
3423 	u64 next_ts = 0, ts;
3424 	int next_cpu = -1;
3425 	int next_size = 0;
3426 	int cpu;
3427 
3428 	/*
3429 	 * If we are in a per_cpu trace file, don't bother by iterating over
3430 	 * all cpu and peek directly.
3431 	 */
3432 	if (cpu_file > RING_BUFFER_ALL_CPUS) {
3433 		if (ring_buffer_empty_cpu(buffer, cpu_file))
3434 			return NULL;
3435 		ent = peek_next_entry(iter, cpu_file, ent_ts, missing_events);
3436 		if (ent_cpu)
3437 			*ent_cpu = cpu_file;
3438 
3439 		return ent;
3440 	}
3441 
3442 	for_each_tracing_cpu(cpu) {
3443 
3444 		if (ring_buffer_empty_cpu(buffer, cpu))
3445 			continue;
3446 
3447 		ent = peek_next_entry(iter, cpu, &ts, &lost_events);
3448 
3449 		/*
3450 		 * Pick the entry with the smallest timestamp:
3451 		 */
3452 		if (ent && (!next || ts < next_ts)) {
3453 			next = ent;
3454 			next_cpu = cpu;
3455 			next_ts = ts;
3456 			next_lost = lost_events;
3457 			next_size = iter->ent_size;
3458 		}
3459 	}
3460 
3461 	iter->ent_size = next_size;
3462 
3463 	if (ent_cpu)
3464 		*ent_cpu = next_cpu;
3465 
3466 	if (ent_ts)
3467 		*ent_ts = next_ts;
3468 
3469 	if (missing_events)
3470 		*missing_events = next_lost;
3471 
3472 	return next;
3473 }
3474 
3475 #define STATIC_TEMP_BUF_SIZE	128
3476 static char static_temp_buf[STATIC_TEMP_BUF_SIZE];
3477 
3478 /* Find the next real entry, without updating the iterator itself */
3479 struct trace_entry *trace_find_next_entry(struct trace_iterator *iter,
3480 					  int *ent_cpu, u64 *ent_ts)
3481 {
3482 	/* __find_next_entry will reset ent_size */
3483 	int ent_size = iter->ent_size;
3484 	struct trace_entry *entry;
3485 
3486 	/*
3487 	 * If called from ftrace_dump(), then the iter->temp buffer
3488 	 * will be the static_temp_buf and not created from kmalloc.
3489 	 * If the entry size is greater than the buffer, we can
3490 	 * not save it. Just return NULL in that case. This is only
3491 	 * used to add markers when two consecutive events' time
3492 	 * stamps have a large delta. See trace_print_lat_context()
3493 	 */
3494 	if (iter->temp == static_temp_buf &&
3495 	    STATIC_TEMP_BUF_SIZE < ent_size)
3496 		return NULL;
3497 
3498 	/*
3499 	 * The __find_next_entry() may call peek_next_entry(), which may
3500 	 * call ring_buffer_peek() that may make the contents of iter->ent
3501 	 * undefined. Need to copy iter->ent now.
3502 	 */
3503 	if (iter->ent && iter->ent != iter->temp) {
3504 		if ((!iter->temp || iter->temp_size < iter->ent_size) &&
3505 		    !WARN_ON_ONCE(iter->temp == static_temp_buf)) {
3506 			kfree(iter->temp);
3507 			iter->temp = kmalloc(iter->ent_size, GFP_KERNEL);
3508 			if (!iter->temp)
3509 				return NULL;
3510 		}
3511 		memcpy(iter->temp, iter->ent, iter->ent_size);
3512 		iter->temp_size = iter->ent_size;
3513 		iter->ent = iter->temp;
3514 	}
3515 	entry = __find_next_entry(iter, ent_cpu, NULL, ent_ts);
3516 	/* Put back the original ent_size */
3517 	iter->ent_size = ent_size;
3518 
3519 	return entry;
3520 }
3521 
3522 /* Find the next real entry, and increment the iterator to the next entry */
3523 void *trace_find_next_entry_inc(struct trace_iterator *iter)
3524 {
3525 	iter->ent = __find_next_entry(iter, &iter->cpu,
3526 				      &iter->lost_events, &iter->ts);
3527 
3528 	if (iter->ent)
3529 		trace_iterator_increment(iter);
3530 
3531 	return iter->ent ? iter : NULL;
3532 }
3533 
3534 static void trace_consume(struct trace_iterator *iter)
3535 {
3536 	ring_buffer_consume(iter->array_buffer->buffer, iter->cpu, &iter->ts,
3537 			    &iter->lost_events);
3538 }
3539 
3540 static void *s_next(struct seq_file *m, void *v, loff_t *pos)
3541 {
3542 	struct trace_iterator *iter = m->private;
3543 	int i = (int)*pos;
3544 	void *ent;
3545 
3546 	WARN_ON_ONCE(iter->leftover);
3547 
3548 	(*pos)++;
3549 
3550 	/* can't go backwards */
3551 	if (iter->idx > i)
3552 		return NULL;
3553 
3554 	if (iter->idx < 0)
3555 		ent = trace_find_next_entry_inc(iter);
3556 	else
3557 		ent = iter;
3558 
3559 	while (ent && iter->idx < i)
3560 		ent = trace_find_next_entry_inc(iter);
3561 
3562 	iter->pos = *pos;
3563 
3564 	return ent;
3565 }
3566 
3567 void tracing_iter_reset(struct trace_iterator *iter, int cpu)
3568 {
3569 	struct ring_buffer_event *event;
3570 	struct ring_buffer_iter *buf_iter;
3571 	unsigned long entries = 0;
3572 	u64 ts;
3573 
3574 	per_cpu_ptr(iter->array_buffer->data, cpu)->skipped_entries = 0;
3575 
3576 	buf_iter = trace_buffer_iter(iter, cpu);
3577 	if (!buf_iter)
3578 		return;
3579 
3580 	ring_buffer_iter_reset(buf_iter);
3581 
3582 	/*
3583 	 * We could have the case with the max latency tracers
3584 	 * that a reset never took place on a cpu. This is evident
3585 	 * by the timestamp being before the start of the buffer.
3586 	 */
3587 	while ((event = ring_buffer_iter_peek(buf_iter, &ts))) {
3588 		if (ts >= iter->array_buffer->time_start)
3589 			break;
3590 		entries++;
3591 		ring_buffer_iter_advance(buf_iter);
3592 	}
3593 
3594 	per_cpu_ptr(iter->array_buffer->data, cpu)->skipped_entries = entries;
3595 }
3596 
3597 /*
3598  * The current tracer is copied to avoid a global locking
3599  * all around.
3600  */
3601 static void *s_start(struct seq_file *m, loff_t *pos)
3602 {
3603 	struct trace_iterator *iter = m->private;
3604 	struct trace_array *tr = iter->tr;
3605 	int cpu_file = iter->cpu_file;
3606 	void *p = NULL;
3607 	loff_t l = 0;
3608 	int cpu;
3609 
3610 	/*
3611 	 * copy the tracer to avoid using a global lock all around.
3612 	 * iter->trace is a copy of current_trace, the pointer to the
3613 	 * name may be used instead of a strcmp(), as iter->trace->name
3614 	 * will point to the same string as current_trace->name.
3615 	 */
3616 	mutex_lock(&trace_types_lock);
3617 	if (unlikely(tr->current_trace && iter->trace->name != tr->current_trace->name))
3618 		*iter->trace = *tr->current_trace;
3619 	mutex_unlock(&trace_types_lock);
3620 
3621 #ifdef CONFIG_TRACER_MAX_TRACE
3622 	if (iter->snapshot && iter->trace->use_max_tr)
3623 		return ERR_PTR(-EBUSY);
3624 #endif
3625 
3626 	if (!iter->snapshot)
3627 		atomic_inc(&trace_record_taskinfo_disabled);
3628 
3629 	if (*pos != iter->pos) {
3630 		iter->ent = NULL;
3631 		iter->cpu = 0;
3632 		iter->idx = -1;
3633 
3634 		if (cpu_file == RING_BUFFER_ALL_CPUS) {
3635 			for_each_tracing_cpu(cpu)
3636 				tracing_iter_reset(iter, cpu);
3637 		} else
3638 			tracing_iter_reset(iter, cpu_file);
3639 
3640 		iter->leftover = 0;
3641 		for (p = iter; p && l < *pos; p = s_next(m, p, &l))
3642 			;
3643 
3644 	} else {
3645 		/*
3646 		 * If we overflowed the seq_file before, then we want
3647 		 * to just reuse the trace_seq buffer again.
3648 		 */
3649 		if (iter->leftover)
3650 			p = iter;
3651 		else {
3652 			l = *pos - 1;
3653 			p = s_next(m, p, &l);
3654 		}
3655 	}
3656 
3657 	trace_event_read_lock();
3658 	trace_access_lock(cpu_file);
3659 	return p;
3660 }
3661 
3662 static void s_stop(struct seq_file *m, void *p)
3663 {
3664 	struct trace_iterator *iter = m->private;
3665 
3666 #ifdef CONFIG_TRACER_MAX_TRACE
3667 	if (iter->snapshot && iter->trace->use_max_tr)
3668 		return;
3669 #endif
3670 
3671 	if (!iter->snapshot)
3672 		atomic_dec(&trace_record_taskinfo_disabled);
3673 
3674 	trace_access_unlock(iter->cpu_file);
3675 	trace_event_read_unlock();
3676 }
3677 
3678 static void
3679 get_total_entries_cpu(struct array_buffer *buf, unsigned long *total,
3680 		      unsigned long *entries, int cpu)
3681 {
3682 	unsigned long count;
3683 
3684 	count = ring_buffer_entries_cpu(buf->buffer, cpu);
3685 	/*
3686 	 * If this buffer has skipped entries, then we hold all
3687 	 * entries for the trace and we need to ignore the
3688 	 * ones before the time stamp.
3689 	 */
3690 	if (per_cpu_ptr(buf->data, cpu)->skipped_entries) {
3691 		count -= per_cpu_ptr(buf->data, cpu)->skipped_entries;
3692 		/* total is the same as the entries */
3693 		*total = count;
3694 	} else
3695 		*total = count +
3696 			ring_buffer_overrun_cpu(buf->buffer, cpu);
3697 	*entries = count;
3698 }
3699 
3700 static void
3701 get_total_entries(struct array_buffer *buf,
3702 		  unsigned long *total, unsigned long *entries)
3703 {
3704 	unsigned long t, e;
3705 	int cpu;
3706 
3707 	*total = 0;
3708 	*entries = 0;
3709 
3710 	for_each_tracing_cpu(cpu) {
3711 		get_total_entries_cpu(buf, &t, &e, cpu);
3712 		*total += t;
3713 		*entries += e;
3714 	}
3715 }
3716 
3717 unsigned long trace_total_entries_cpu(struct trace_array *tr, int cpu)
3718 {
3719 	unsigned long total, entries;
3720 
3721 	if (!tr)
3722 		tr = &global_trace;
3723 
3724 	get_total_entries_cpu(&tr->array_buffer, &total, &entries, cpu);
3725 
3726 	return entries;
3727 }
3728 
3729 unsigned long trace_total_entries(struct trace_array *tr)
3730 {
3731 	unsigned long total, entries;
3732 
3733 	if (!tr)
3734 		tr = &global_trace;
3735 
3736 	get_total_entries(&tr->array_buffer, &total, &entries);
3737 
3738 	return entries;
3739 }
3740 
3741 static void print_lat_help_header(struct seq_file *m)
3742 {
3743 	seq_puts(m, "#                  _------=> CPU#            \n"
3744 		    "#                 / _-----=> irqs-off        \n"
3745 		    "#                | / _----=> need-resched    \n"
3746 		    "#                || / _---=> hardirq/softirq \n"
3747 		    "#                ||| / _--=> preempt-depth   \n"
3748 		    "#                |||| /     delay            \n"
3749 		    "#  cmd     pid   ||||| time  |   caller      \n"
3750 		    "#     \\   /      |||||  \\    |   /         \n");
3751 }
3752 
3753 static void print_event_info(struct array_buffer *buf, struct seq_file *m)
3754 {
3755 	unsigned long total;
3756 	unsigned long entries;
3757 
3758 	get_total_entries(buf, &total, &entries);
3759 	seq_printf(m, "# entries-in-buffer/entries-written: %lu/%lu   #P:%d\n",
3760 		   entries, total, num_online_cpus());
3761 	seq_puts(m, "#\n");
3762 }
3763 
3764 static void print_func_help_header(struct array_buffer *buf, struct seq_file *m,
3765 				   unsigned int flags)
3766 {
3767 	bool tgid = flags & TRACE_ITER_RECORD_TGID;
3768 
3769 	print_event_info(buf, m);
3770 
3771 	seq_printf(m, "#           TASK-PID   %s  CPU#   TIMESTAMP  FUNCTION\n", tgid ? "TGID     " : "");
3772 	seq_printf(m, "#              | |     %s    |       |         |\n",	 tgid ? "  |      " : "");
3773 }
3774 
3775 static void print_func_help_header_irq(struct array_buffer *buf, struct seq_file *m,
3776 				       unsigned int flags)
3777 {
3778 	bool tgid = flags & TRACE_ITER_RECORD_TGID;
3779 	const char *space = "          ";
3780 	int prec = tgid ? 10 : 2;
3781 
3782 	print_event_info(buf, m);
3783 
3784 	seq_printf(m, "#                          %.*s  _-----=> irqs-off\n", prec, space);
3785 	seq_printf(m, "#                          %.*s / _----=> need-resched\n", prec, space);
3786 	seq_printf(m, "#                          %.*s| / _---=> hardirq/softirq\n", prec, space);
3787 	seq_printf(m, "#                          %.*s|| / _--=> preempt-depth\n", prec, space);
3788 	seq_printf(m, "#                          %.*s||| /     delay\n", prec, space);
3789 	seq_printf(m, "#           TASK-PID %.*sCPU#  ||||    TIMESTAMP  FUNCTION\n", prec, "   TGID   ");
3790 	seq_printf(m, "#              | |   %.*s  |   ||||       |         |\n", prec, "     |    ");
3791 }
3792 
3793 void
3794 print_trace_header(struct seq_file *m, struct trace_iterator *iter)
3795 {
3796 	unsigned long sym_flags = (global_trace.trace_flags & TRACE_ITER_SYM_MASK);
3797 	struct array_buffer *buf = iter->array_buffer;
3798 	struct trace_array_cpu *data = per_cpu_ptr(buf->data, buf->cpu);
3799 	struct tracer *type = iter->trace;
3800 	unsigned long entries;
3801 	unsigned long total;
3802 	const char *name = "preemption";
3803 
3804 	name = type->name;
3805 
3806 	get_total_entries(buf, &total, &entries);
3807 
3808 	seq_printf(m, "# %s latency trace v1.1.5 on %s\n",
3809 		   name, UTS_RELEASE);
3810 	seq_puts(m, "# -----------------------------------"
3811 		 "---------------------------------\n");
3812 	seq_printf(m, "# latency: %lu us, #%lu/%lu, CPU#%d |"
3813 		   " (M:%s VP:%d, KP:%d, SP:%d HP:%d",
3814 		   nsecs_to_usecs(data->saved_latency),
3815 		   entries,
3816 		   total,
3817 		   buf->cpu,
3818 #if defined(CONFIG_PREEMPT_NONE)
3819 		   "server",
3820 #elif defined(CONFIG_PREEMPT_VOLUNTARY)
3821 		   "desktop",
3822 #elif defined(CONFIG_PREEMPT)
3823 		   "preempt",
3824 #elif defined(CONFIG_PREEMPT_RT)
3825 		   "preempt_rt",
3826 #else
3827 		   "unknown",
3828 #endif
3829 		   /* These are reserved for later use */
3830 		   0, 0, 0, 0);
3831 #ifdef CONFIG_SMP
3832 	seq_printf(m, " #P:%d)\n", num_online_cpus());
3833 #else
3834 	seq_puts(m, ")\n");
3835 #endif
3836 	seq_puts(m, "#    -----------------\n");
3837 	seq_printf(m, "#    | task: %.16s-%d "
3838 		   "(uid:%d nice:%ld policy:%ld rt_prio:%ld)\n",
3839 		   data->comm, data->pid,
3840 		   from_kuid_munged(seq_user_ns(m), data->uid), data->nice,
3841 		   data->policy, data->rt_priority);
3842 	seq_puts(m, "#    -----------------\n");
3843 
3844 	if (data->critical_start) {
3845 		seq_puts(m, "#  => started at: ");
3846 		seq_print_ip_sym(&iter->seq, data->critical_start, sym_flags);
3847 		trace_print_seq(m, &iter->seq);
3848 		seq_puts(m, "\n#  => ended at:   ");
3849 		seq_print_ip_sym(&iter->seq, data->critical_end, sym_flags);
3850 		trace_print_seq(m, &iter->seq);
3851 		seq_puts(m, "\n#\n");
3852 	}
3853 
3854 	seq_puts(m, "#\n");
3855 }
3856 
3857 static void test_cpu_buff_start(struct trace_iterator *iter)
3858 {
3859 	struct trace_seq *s = &iter->seq;
3860 	struct trace_array *tr = iter->tr;
3861 
3862 	if (!(tr->trace_flags & TRACE_ITER_ANNOTATE))
3863 		return;
3864 
3865 	if (!(iter->iter_flags & TRACE_FILE_ANNOTATE))
3866 		return;
3867 
3868 	if (cpumask_available(iter->started) &&
3869 	    cpumask_test_cpu(iter->cpu, iter->started))
3870 		return;
3871 
3872 	if (per_cpu_ptr(iter->array_buffer->data, iter->cpu)->skipped_entries)
3873 		return;
3874 
3875 	if (cpumask_available(iter->started))
3876 		cpumask_set_cpu(iter->cpu, iter->started);
3877 
3878 	/* Don't print started cpu buffer for the first entry of the trace */
3879 	if (iter->idx > 1)
3880 		trace_seq_printf(s, "##### CPU %u buffer started ####\n",
3881 				iter->cpu);
3882 }
3883 
3884 static enum print_line_t print_trace_fmt(struct trace_iterator *iter)
3885 {
3886 	struct trace_array *tr = iter->tr;
3887 	struct trace_seq *s = &iter->seq;
3888 	unsigned long sym_flags = (tr->trace_flags & TRACE_ITER_SYM_MASK);
3889 	struct trace_entry *entry;
3890 	struct trace_event *event;
3891 
3892 	entry = iter->ent;
3893 
3894 	test_cpu_buff_start(iter);
3895 
3896 	event = ftrace_find_event(entry->type);
3897 
3898 	if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
3899 		if (iter->iter_flags & TRACE_FILE_LAT_FMT)
3900 			trace_print_lat_context(iter);
3901 		else
3902 			trace_print_context(iter);
3903 	}
3904 
3905 	if (trace_seq_has_overflowed(s))
3906 		return TRACE_TYPE_PARTIAL_LINE;
3907 
3908 	if (event)
3909 		return event->funcs->trace(iter, sym_flags, event);
3910 
3911 	trace_seq_printf(s, "Unknown type %d\n", entry->type);
3912 
3913 	return trace_handle_return(s);
3914 }
3915 
3916 static enum print_line_t print_raw_fmt(struct trace_iterator *iter)
3917 {
3918 	struct trace_array *tr = iter->tr;
3919 	struct trace_seq *s = &iter->seq;
3920 	struct trace_entry *entry;
3921 	struct trace_event *event;
3922 
3923 	entry = iter->ent;
3924 
3925 	if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO)
3926 		trace_seq_printf(s, "%d %d %llu ",
3927 				 entry->pid, iter->cpu, iter->ts);
3928 
3929 	if (trace_seq_has_overflowed(s))
3930 		return TRACE_TYPE_PARTIAL_LINE;
3931 
3932 	event = ftrace_find_event(entry->type);
3933 	if (event)
3934 		return event->funcs->raw(iter, 0, event);
3935 
3936 	trace_seq_printf(s, "%d ?\n", entry->type);
3937 
3938 	return trace_handle_return(s);
3939 }
3940 
3941 static enum print_line_t print_hex_fmt(struct trace_iterator *iter)
3942 {
3943 	struct trace_array *tr = iter->tr;
3944 	struct trace_seq *s = &iter->seq;
3945 	unsigned char newline = '\n';
3946 	struct trace_entry *entry;
3947 	struct trace_event *event;
3948 
3949 	entry = iter->ent;
3950 
3951 	if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
3952 		SEQ_PUT_HEX_FIELD(s, entry->pid);
3953 		SEQ_PUT_HEX_FIELD(s, iter->cpu);
3954 		SEQ_PUT_HEX_FIELD(s, iter->ts);
3955 		if (trace_seq_has_overflowed(s))
3956 			return TRACE_TYPE_PARTIAL_LINE;
3957 	}
3958 
3959 	event = ftrace_find_event(entry->type);
3960 	if (event) {
3961 		enum print_line_t ret = event->funcs->hex(iter, 0, event);
3962 		if (ret != TRACE_TYPE_HANDLED)
3963 			return ret;
3964 	}
3965 
3966 	SEQ_PUT_FIELD(s, newline);
3967 
3968 	return trace_handle_return(s);
3969 }
3970 
3971 static enum print_line_t print_bin_fmt(struct trace_iterator *iter)
3972 {
3973 	struct trace_array *tr = iter->tr;
3974 	struct trace_seq *s = &iter->seq;
3975 	struct trace_entry *entry;
3976 	struct trace_event *event;
3977 
3978 	entry = iter->ent;
3979 
3980 	if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
3981 		SEQ_PUT_FIELD(s, entry->pid);
3982 		SEQ_PUT_FIELD(s, iter->cpu);
3983 		SEQ_PUT_FIELD(s, iter->ts);
3984 		if (trace_seq_has_overflowed(s))
3985 			return TRACE_TYPE_PARTIAL_LINE;
3986 	}
3987 
3988 	event = ftrace_find_event(entry->type);
3989 	return event ? event->funcs->binary(iter, 0, event) :
3990 		TRACE_TYPE_HANDLED;
3991 }
3992 
3993 int trace_empty(struct trace_iterator *iter)
3994 {
3995 	struct ring_buffer_iter *buf_iter;
3996 	int cpu;
3997 
3998 	/* If we are looking at one CPU buffer, only check that one */
3999 	if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
4000 		cpu = iter->cpu_file;
4001 		buf_iter = trace_buffer_iter(iter, cpu);
4002 		if (buf_iter) {
4003 			if (!ring_buffer_iter_empty(buf_iter))
4004 				return 0;
4005 		} else {
4006 			if (!ring_buffer_empty_cpu(iter->array_buffer->buffer, cpu))
4007 				return 0;
4008 		}
4009 		return 1;
4010 	}
4011 
4012 	for_each_tracing_cpu(cpu) {
4013 		buf_iter = trace_buffer_iter(iter, cpu);
4014 		if (buf_iter) {
4015 			if (!ring_buffer_iter_empty(buf_iter))
4016 				return 0;
4017 		} else {
4018 			if (!ring_buffer_empty_cpu(iter->array_buffer->buffer, cpu))
4019 				return 0;
4020 		}
4021 	}
4022 
4023 	return 1;
4024 }
4025 
4026 /*  Called with trace_event_read_lock() held. */
4027 enum print_line_t print_trace_line(struct trace_iterator *iter)
4028 {
4029 	struct trace_array *tr = iter->tr;
4030 	unsigned long trace_flags = tr->trace_flags;
4031 	enum print_line_t ret;
4032 
4033 	if (iter->lost_events) {
4034 		if (iter->lost_events == (unsigned long)-1)
4035 			trace_seq_printf(&iter->seq, "CPU:%d [LOST EVENTS]\n",
4036 					 iter->cpu);
4037 		else
4038 			trace_seq_printf(&iter->seq, "CPU:%d [LOST %lu EVENTS]\n",
4039 					 iter->cpu, iter->lost_events);
4040 		if (trace_seq_has_overflowed(&iter->seq))
4041 			return TRACE_TYPE_PARTIAL_LINE;
4042 	}
4043 
4044 	if (iter->trace && iter->trace->print_line) {
4045 		ret = iter->trace->print_line(iter);
4046 		if (ret != TRACE_TYPE_UNHANDLED)
4047 			return ret;
4048 	}
4049 
4050 	if (iter->ent->type == TRACE_BPUTS &&
4051 			trace_flags & TRACE_ITER_PRINTK &&
4052 			trace_flags & TRACE_ITER_PRINTK_MSGONLY)
4053 		return trace_print_bputs_msg_only(iter);
4054 
4055 	if (iter->ent->type == TRACE_BPRINT &&
4056 			trace_flags & TRACE_ITER_PRINTK &&
4057 			trace_flags & TRACE_ITER_PRINTK_MSGONLY)
4058 		return trace_print_bprintk_msg_only(iter);
4059 
4060 	if (iter->ent->type == TRACE_PRINT &&
4061 			trace_flags & TRACE_ITER_PRINTK &&
4062 			trace_flags & TRACE_ITER_PRINTK_MSGONLY)
4063 		return trace_print_printk_msg_only(iter);
4064 
4065 	if (trace_flags & TRACE_ITER_BIN)
4066 		return print_bin_fmt(iter);
4067 
4068 	if (trace_flags & TRACE_ITER_HEX)
4069 		return print_hex_fmt(iter);
4070 
4071 	if (trace_flags & TRACE_ITER_RAW)
4072 		return print_raw_fmt(iter);
4073 
4074 	return print_trace_fmt(iter);
4075 }
4076 
4077 void trace_latency_header(struct seq_file *m)
4078 {
4079 	struct trace_iterator *iter = m->private;
4080 	struct trace_array *tr = iter->tr;
4081 
4082 	/* print nothing if the buffers are empty */
4083 	if (trace_empty(iter))
4084 		return;
4085 
4086 	if (iter->iter_flags & TRACE_FILE_LAT_FMT)
4087 		print_trace_header(m, iter);
4088 
4089 	if (!(tr->trace_flags & TRACE_ITER_VERBOSE))
4090 		print_lat_help_header(m);
4091 }
4092 
4093 void trace_default_header(struct seq_file *m)
4094 {
4095 	struct trace_iterator *iter = m->private;
4096 	struct trace_array *tr = iter->tr;
4097 	unsigned long trace_flags = tr->trace_flags;
4098 
4099 	if (!(trace_flags & TRACE_ITER_CONTEXT_INFO))
4100 		return;
4101 
4102 	if (iter->iter_flags & TRACE_FILE_LAT_FMT) {
4103 		/* print nothing if the buffers are empty */
4104 		if (trace_empty(iter))
4105 			return;
4106 		print_trace_header(m, iter);
4107 		if (!(trace_flags & TRACE_ITER_VERBOSE))
4108 			print_lat_help_header(m);
4109 	} else {
4110 		if (!(trace_flags & TRACE_ITER_VERBOSE)) {
4111 			if (trace_flags & TRACE_ITER_IRQ_INFO)
4112 				print_func_help_header_irq(iter->array_buffer,
4113 							   m, trace_flags);
4114 			else
4115 				print_func_help_header(iter->array_buffer, m,
4116 						       trace_flags);
4117 		}
4118 	}
4119 }
4120 
4121 static void test_ftrace_alive(struct seq_file *m)
4122 {
4123 	if (!ftrace_is_dead())
4124 		return;
4125 	seq_puts(m, "# WARNING: FUNCTION TRACING IS CORRUPTED\n"
4126 		    "#          MAY BE MISSING FUNCTION EVENTS\n");
4127 }
4128 
4129 #ifdef CONFIG_TRACER_MAX_TRACE
4130 static void show_snapshot_main_help(struct seq_file *m)
4131 {
4132 	seq_puts(m, "# echo 0 > snapshot : Clears and frees snapshot buffer\n"
4133 		    "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
4134 		    "#                      Takes a snapshot of the main buffer.\n"
4135 		    "# echo 2 > snapshot : Clears snapshot buffer (but does not allocate or free)\n"
4136 		    "#                      (Doesn't have to be '2' works with any number that\n"
4137 		    "#                       is not a '0' or '1')\n");
4138 }
4139 
4140 static void show_snapshot_percpu_help(struct seq_file *m)
4141 {
4142 	seq_puts(m, "# echo 0 > snapshot : Invalid for per_cpu snapshot file.\n");
4143 #ifdef CONFIG_RING_BUFFER_ALLOW_SWAP
4144 	seq_puts(m, "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
4145 		    "#                      Takes a snapshot of the main buffer for this cpu.\n");
4146 #else
4147 	seq_puts(m, "# echo 1 > snapshot : Not supported with this kernel.\n"
4148 		    "#                     Must use main snapshot file to allocate.\n");
4149 #endif
4150 	seq_puts(m, "# echo 2 > snapshot : Clears this cpu's snapshot buffer (but does not allocate)\n"
4151 		    "#                      (Doesn't have to be '2' works with any number that\n"
4152 		    "#                       is not a '0' or '1')\n");
4153 }
4154 
4155 static void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter)
4156 {
4157 	if (iter->tr->allocated_snapshot)
4158 		seq_puts(m, "#\n# * Snapshot is allocated *\n#\n");
4159 	else
4160 		seq_puts(m, "#\n# * Snapshot is freed *\n#\n");
4161 
4162 	seq_puts(m, "# Snapshot commands:\n");
4163 	if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
4164 		show_snapshot_main_help(m);
4165 	else
4166 		show_snapshot_percpu_help(m);
4167 }
4168 #else
4169 /* Should never be called */
4170 static inline void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter) { }
4171 #endif
4172 
4173 static int s_show(struct seq_file *m, void *v)
4174 {
4175 	struct trace_iterator *iter = v;
4176 	int ret;
4177 
4178 	if (iter->ent == NULL) {
4179 		if (iter->tr) {
4180 			seq_printf(m, "# tracer: %s\n", iter->trace->name);
4181 			seq_puts(m, "#\n");
4182 			test_ftrace_alive(m);
4183 		}
4184 		if (iter->snapshot && trace_empty(iter))
4185 			print_snapshot_help(m, iter);
4186 		else if (iter->trace && iter->trace->print_header)
4187 			iter->trace->print_header(m);
4188 		else
4189 			trace_default_header(m);
4190 
4191 	} else if (iter->leftover) {
4192 		/*
4193 		 * If we filled the seq_file buffer earlier, we
4194 		 * want to just show it now.
4195 		 */
4196 		ret = trace_print_seq(m, &iter->seq);
4197 
4198 		/* ret should this time be zero, but you never know */
4199 		iter->leftover = ret;
4200 
4201 	} else {
4202 		print_trace_line(iter);
4203 		ret = trace_print_seq(m, &iter->seq);
4204 		/*
4205 		 * If we overflow the seq_file buffer, then it will
4206 		 * ask us for this data again at start up.
4207 		 * Use that instead.
4208 		 *  ret is 0 if seq_file write succeeded.
4209 		 *        -1 otherwise.
4210 		 */
4211 		iter->leftover = ret;
4212 	}
4213 
4214 	return 0;
4215 }
4216 
4217 /*
4218  * Should be used after trace_array_get(), trace_types_lock
4219  * ensures that i_cdev was already initialized.
4220  */
4221 static inline int tracing_get_cpu(struct inode *inode)
4222 {
4223 	if (inode->i_cdev) /* See trace_create_cpu_file() */
4224 		return (long)inode->i_cdev - 1;
4225 	return RING_BUFFER_ALL_CPUS;
4226 }
4227 
4228 static const struct seq_operations tracer_seq_ops = {
4229 	.start		= s_start,
4230 	.next		= s_next,
4231 	.stop		= s_stop,
4232 	.show		= s_show,
4233 };
4234 
4235 static struct trace_iterator *
4236 __tracing_open(struct inode *inode, struct file *file, bool snapshot)
4237 {
4238 	struct trace_array *tr = inode->i_private;
4239 	struct trace_iterator *iter;
4240 	int cpu;
4241 
4242 	if (tracing_disabled)
4243 		return ERR_PTR(-ENODEV);
4244 
4245 	iter = __seq_open_private(file, &tracer_seq_ops, sizeof(*iter));
4246 	if (!iter)
4247 		return ERR_PTR(-ENOMEM);
4248 
4249 	iter->buffer_iter = kcalloc(nr_cpu_ids, sizeof(*iter->buffer_iter),
4250 				    GFP_KERNEL);
4251 	if (!iter->buffer_iter)
4252 		goto release;
4253 
4254 	/*
4255 	 * trace_find_next_entry() may need to save off iter->ent.
4256 	 * It will place it into the iter->temp buffer. As most
4257 	 * events are less than 128, allocate a buffer of that size.
4258 	 * If one is greater, then trace_find_next_entry() will
4259 	 * allocate a new buffer to adjust for the bigger iter->ent.
4260 	 * It's not critical if it fails to get allocated here.
4261 	 */
4262 	iter->temp = kmalloc(128, GFP_KERNEL);
4263 	if (iter->temp)
4264 		iter->temp_size = 128;
4265 
4266 	/*
4267 	 * We make a copy of the current tracer to avoid concurrent
4268 	 * changes on it while we are reading.
4269 	 */
4270 	mutex_lock(&trace_types_lock);
4271 	iter->trace = kzalloc(sizeof(*iter->trace), GFP_KERNEL);
4272 	if (!iter->trace)
4273 		goto fail;
4274 
4275 	*iter->trace = *tr->current_trace;
4276 
4277 	if (!zalloc_cpumask_var(&iter->started, GFP_KERNEL))
4278 		goto fail;
4279 
4280 	iter->tr = tr;
4281 
4282 #ifdef CONFIG_TRACER_MAX_TRACE
4283 	/* Currently only the top directory has a snapshot */
4284 	if (tr->current_trace->print_max || snapshot)
4285 		iter->array_buffer = &tr->max_buffer;
4286 	else
4287 #endif
4288 		iter->array_buffer = &tr->array_buffer;
4289 	iter->snapshot = snapshot;
4290 	iter->pos = -1;
4291 	iter->cpu_file = tracing_get_cpu(inode);
4292 	mutex_init(&iter->mutex);
4293 
4294 	/* Notify the tracer early; before we stop tracing. */
4295 	if (iter->trace->open)
4296 		iter->trace->open(iter);
4297 
4298 	/* Annotate start of buffers if we had overruns */
4299 	if (ring_buffer_overruns(iter->array_buffer->buffer))
4300 		iter->iter_flags |= TRACE_FILE_ANNOTATE;
4301 
4302 	/* Output in nanoseconds only if we are using a clock in nanoseconds. */
4303 	if (trace_clocks[tr->clock_id].in_ns)
4304 		iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
4305 
4306 	/*
4307 	 * If pause-on-trace is enabled, then stop the trace while
4308 	 * dumping, unless this is the "snapshot" file
4309 	 */
4310 	if (!iter->snapshot && (tr->trace_flags & TRACE_ITER_PAUSE_ON_TRACE))
4311 		tracing_stop_tr(tr);
4312 
4313 	if (iter->cpu_file == RING_BUFFER_ALL_CPUS) {
4314 		for_each_tracing_cpu(cpu) {
4315 			iter->buffer_iter[cpu] =
4316 				ring_buffer_read_prepare(iter->array_buffer->buffer,
4317 							 cpu, GFP_KERNEL);
4318 		}
4319 		ring_buffer_read_prepare_sync();
4320 		for_each_tracing_cpu(cpu) {
4321 			ring_buffer_read_start(iter->buffer_iter[cpu]);
4322 			tracing_iter_reset(iter, cpu);
4323 		}
4324 	} else {
4325 		cpu = iter->cpu_file;
4326 		iter->buffer_iter[cpu] =
4327 			ring_buffer_read_prepare(iter->array_buffer->buffer,
4328 						 cpu, GFP_KERNEL);
4329 		ring_buffer_read_prepare_sync();
4330 		ring_buffer_read_start(iter->buffer_iter[cpu]);
4331 		tracing_iter_reset(iter, cpu);
4332 	}
4333 
4334 	mutex_unlock(&trace_types_lock);
4335 
4336 	return iter;
4337 
4338  fail:
4339 	mutex_unlock(&trace_types_lock);
4340 	kfree(iter->trace);
4341 	kfree(iter->temp);
4342 	kfree(iter->buffer_iter);
4343 release:
4344 	seq_release_private(inode, file);
4345 	return ERR_PTR(-ENOMEM);
4346 }
4347 
4348 int tracing_open_generic(struct inode *inode, struct file *filp)
4349 {
4350 	int ret;
4351 
4352 	ret = tracing_check_open_get_tr(NULL);
4353 	if (ret)
4354 		return ret;
4355 
4356 	filp->private_data = inode->i_private;
4357 	return 0;
4358 }
4359 
4360 bool tracing_is_disabled(void)
4361 {
4362 	return (tracing_disabled) ? true: false;
4363 }
4364 
4365 /*
4366  * Open and update trace_array ref count.
4367  * Must have the current trace_array passed to it.
4368  */
4369 int tracing_open_generic_tr(struct inode *inode, struct file *filp)
4370 {
4371 	struct trace_array *tr = inode->i_private;
4372 	int ret;
4373 
4374 	ret = tracing_check_open_get_tr(tr);
4375 	if (ret)
4376 		return ret;
4377 
4378 	filp->private_data = inode->i_private;
4379 
4380 	return 0;
4381 }
4382 
4383 static int tracing_release(struct inode *inode, struct file *file)
4384 {
4385 	struct trace_array *tr = inode->i_private;
4386 	struct seq_file *m = file->private_data;
4387 	struct trace_iterator *iter;
4388 	int cpu;
4389 
4390 	if (!(file->f_mode & FMODE_READ)) {
4391 		trace_array_put(tr);
4392 		return 0;
4393 	}
4394 
4395 	/* Writes do not use seq_file */
4396 	iter = m->private;
4397 	mutex_lock(&trace_types_lock);
4398 
4399 	for_each_tracing_cpu(cpu) {
4400 		if (iter->buffer_iter[cpu])
4401 			ring_buffer_read_finish(iter->buffer_iter[cpu]);
4402 	}
4403 
4404 	if (iter->trace && iter->trace->close)
4405 		iter->trace->close(iter);
4406 
4407 	if (!iter->snapshot && tr->stop_count)
4408 		/* reenable tracing if it was previously enabled */
4409 		tracing_start_tr(tr);
4410 
4411 	__trace_array_put(tr);
4412 
4413 	mutex_unlock(&trace_types_lock);
4414 
4415 	mutex_destroy(&iter->mutex);
4416 	free_cpumask_var(iter->started);
4417 	kfree(iter->temp);
4418 	kfree(iter->trace);
4419 	kfree(iter->buffer_iter);
4420 	seq_release_private(inode, file);
4421 
4422 	return 0;
4423 }
4424 
4425 static int tracing_release_generic_tr(struct inode *inode, struct file *file)
4426 {
4427 	struct trace_array *tr = inode->i_private;
4428 
4429 	trace_array_put(tr);
4430 	return 0;
4431 }
4432 
4433 static int tracing_single_release_tr(struct inode *inode, struct file *file)
4434 {
4435 	struct trace_array *tr = inode->i_private;
4436 
4437 	trace_array_put(tr);
4438 
4439 	return single_release(inode, file);
4440 }
4441 
4442 static int tracing_open(struct inode *inode, struct file *file)
4443 {
4444 	struct trace_array *tr = inode->i_private;
4445 	struct trace_iterator *iter;
4446 	int ret;
4447 
4448 	ret = tracing_check_open_get_tr(tr);
4449 	if (ret)
4450 		return ret;
4451 
4452 	/* If this file was open for write, then erase contents */
4453 	if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC)) {
4454 		int cpu = tracing_get_cpu(inode);
4455 		struct array_buffer *trace_buf = &tr->array_buffer;
4456 
4457 #ifdef CONFIG_TRACER_MAX_TRACE
4458 		if (tr->current_trace->print_max)
4459 			trace_buf = &tr->max_buffer;
4460 #endif
4461 
4462 		if (cpu == RING_BUFFER_ALL_CPUS)
4463 			tracing_reset_online_cpus(trace_buf);
4464 		else
4465 			tracing_reset_cpu(trace_buf, cpu);
4466 	}
4467 
4468 	if (file->f_mode & FMODE_READ) {
4469 		iter = __tracing_open(inode, file, false);
4470 		if (IS_ERR(iter))
4471 			ret = PTR_ERR(iter);
4472 		else if (tr->trace_flags & TRACE_ITER_LATENCY_FMT)
4473 			iter->iter_flags |= TRACE_FILE_LAT_FMT;
4474 	}
4475 
4476 	if (ret < 0)
4477 		trace_array_put(tr);
4478 
4479 	return ret;
4480 }
4481 
4482 /*
4483  * Some tracers are not suitable for instance buffers.
4484  * A tracer is always available for the global array (toplevel)
4485  * or if it explicitly states that it is.
4486  */
4487 static bool
4488 trace_ok_for_array(struct tracer *t, struct trace_array *tr)
4489 {
4490 	return (tr->flags & TRACE_ARRAY_FL_GLOBAL) || t->allow_instances;
4491 }
4492 
4493 /* Find the next tracer that this trace array may use */
4494 static struct tracer *
4495 get_tracer_for_array(struct trace_array *tr, struct tracer *t)
4496 {
4497 	while (t && !trace_ok_for_array(t, tr))
4498 		t = t->next;
4499 
4500 	return t;
4501 }
4502 
4503 static void *
4504 t_next(struct seq_file *m, void *v, loff_t *pos)
4505 {
4506 	struct trace_array *tr = m->private;
4507 	struct tracer *t = v;
4508 
4509 	(*pos)++;
4510 
4511 	if (t)
4512 		t = get_tracer_for_array(tr, t->next);
4513 
4514 	return t;
4515 }
4516 
4517 static void *t_start(struct seq_file *m, loff_t *pos)
4518 {
4519 	struct trace_array *tr = m->private;
4520 	struct tracer *t;
4521 	loff_t l = 0;
4522 
4523 	mutex_lock(&trace_types_lock);
4524 
4525 	t = get_tracer_for_array(tr, trace_types);
4526 	for (; t && l < *pos; t = t_next(m, t, &l))
4527 			;
4528 
4529 	return t;
4530 }
4531 
4532 static void t_stop(struct seq_file *m, void *p)
4533 {
4534 	mutex_unlock(&trace_types_lock);
4535 }
4536 
4537 static int t_show(struct seq_file *m, void *v)
4538 {
4539 	struct tracer *t = v;
4540 
4541 	if (!t)
4542 		return 0;
4543 
4544 	seq_puts(m, t->name);
4545 	if (t->next)
4546 		seq_putc(m, ' ');
4547 	else
4548 		seq_putc(m, '\n');
4549 
4550 	return 0;
4551 }
4552 
4553 static const struct seq_operations show_traces_seq_ops = {
4554 	.start		= t_start,
4555 	.next		= t_next,
4556 	.stop		= t_stop,
4557 	.show		= t_show,
4558 };
4559 
4560 static int show_traces_open(struct inode *inode, struct file *file)
4561 {
4562 	struct trace_array *tr = inode->i_private;
4563 	struct seq_file *m;
4564 	int ret;
4565 
4566 	ret = tracing_check_open_get_tr(tr);
4567 	if (ret)
4568 		return ret;
4569 
4570 	ret = seq_open(file, &show_traces_seq_ops);
4571 	if (ret) {
4572 		trace_array_put(tr);
4573 		return ret;
4574 	}
4575 
4576 	m = file->private_data;
4577 	m->private = tr;
4578 
4579 	return 0;
4580 }
4581 
4582 static int show_traces_release(struct inode *inode, struct file *file)
4583 {
4584 	struct trace_array *tr = inode->i_private;
4585 
4586 	trace_array_put(tr);
4587 	return seq_release(inode, file);
4588 }
4589 
4590 static ssize_t
4591 tracing_write_stub(struct file *filp, const char __user *ubuf,
4592 		   size_t count, loff_t *ppos)
4593 {
4594 	return count;
4595 }
4596 
4597 loff_t tracing_lseek(struct file *file, loff_t offset, int whence)
4598 {
4599 	int ret;
4600 
4601 	if (file->f_mode & FMODE_READ)
4602 		ret = seq_lseek(file, offset, whence);
4603 	else
4604 		file->f_pos = ret = 0;
4605 
4606 	return ret;
4607 }
4608 
4609 static const struct file_operations tracing_fops = {
4610 	.open		= tracing_open,
4611 	.read		= seq_read,
4612 	.write		= tracing_write_stub,
4613 	.llseek		= tracing_lseek,
4614 	.release	= tracing_release,
4615 };
4616 
4617 static const struct file_operations show_traces_fops = {
4618 	.open		= show_traces_open,
4619 	.read		= seq_read,
4620 	.llseek		= seq_lseek,
4621 	.release	= show_traces_release,
4622 };
4623 
4624 static ssize_t
4625 tracing_cpumask_read(struct file *filp, char __user *ubuf,
4626 		     size_t count, loff_t *ppos)
4627 {
4628 	struct trace_array *tr = file_inode(filp)->i_private;
4629 	char *mask_str;
4630 	int len;
4631 
4632 	len = snprintf(NULL, 0, "%*pb\n",
4633 		       cpumask_pr_args(tr->tracing_cpumask)) + 1;
4634 	mask_str = kmalloc(len, GFP_KERNEL);
4635 	if (!mask_str)
4636 		return -ENOMEM;
4637 
4638 	len = snprintf(mask_str, len, "%*pb\n",
4639 		       cpumask_pr_args(tr->tracing_cpumask));
4640 	if (len >= count) {
4641 		count = -EINVAL;
4642 		goto out_err;
4643 	}
4644 	count = simple_read_from_buffer(ubuf, count, ppos, mask_str, len);
4645 
4646 out_err:
4647 	kfree(mask_str);
4648 
4649 	return count;
4650 }
4651 
4652 int tracing_set_cpumask(struct trace_array *tr,
4653 			cpumask_var_t tracing_cpumask_new)
4654 {
4655 	int cpu;
4656 
4657 	if (!tr)
4658 		return -EINVAL;
4659 
4660 	local_irq_disable();
4661 	arch_spin_lock(&tr->max_lock);
4662 	for_each_tracing_cpu(cpu) {
4663 		/*
4664 		 * Increase/decrease the disabled counter if we are
4665 		 * about to flip a bit in the cpumask:
4666 		 */
4667 		if (cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
4668 				!cpumask_test_cpu(cpu, tracing_cpumask_new)) {
4669 			atomic_inc(&per_cpu_ptr(tr->array_buffer.data, cpu)->disabled);
4670 			ring_buffer_record_disable_cpu(tr->array_buffer.buffer, cpu);
4671 		}
4672 		if (!cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
4673 				cpumask_test_cpu(cpu, tracing_cpumask_new)) {
4674 			atomic_dec(&per_cpu_ptr(tr->array_buffer.data, cpu)->disabled);
4675 			ring_buffer_record_enable_cpu(tr->array_buffer.buffer, cpu);
4676 		}
4677 	}
4678 	arch_spin_unlock(&tr->max_lock);
4679 	local_irq_enable();
4680 
4681 	cpumask_copy(tr->tracing_cpumask, tracing_cpumask_new);
4682 
4683 	return 0;
4684 }
4685 
4686 static ssize_t
4687 tracing_cpumask_write(struct file *filp, const char __user *ubuf,
4688 		      size_t count, loff_t *ppos)
4689 {
4690 	struct trace_array *tr = file_inode(filp)->i_private;
4691 	cpumask_var_t tracing_cpumask_new;
4692 	int err;
4693 
4694 	if (!alloc_cpumask_var(&tracing_cpumask_new, GFP_KERNEL))
4695 		return -ENOMEM;
4696 
4697 	err = cpumask_parse_user(ubuf, count, tracing_cpumask_new);
4698 	if (err)
4699 		goto err_free;
4700 
4701 	err = tracing_set_cpumask(tr, tracing_cpumask_new);
4702 	if (err)
4703 		goto err_free;
4704 
4705 	free_cpumask_var(tracing_cpumask_new);
4706 
4707 	return count;
4708 
4709 err_free:
4710 	free_cpumask_var(tracing_cpumask_new);
4711 
4712 	return err;
4713 }
4714 
4715 static const struct file_operations tracing_cpumask_fops = {
4716 	.open		= tracing_open_generic_tr,
4717 	.read		= tracing_cpumask_read,
4718 	.write		= tracing_cpumask_write,
4719 	.release	= tracing_release_generic_tr,
4720 	.llseek		= generic_file_llseek,
4721 };
4722 
4723 static int tracing_trace_options_show(struct seq_file *m, void *v)
4724 {
4725 	struct tracer_opt *trace_opts;
4726 	struct trace_array *tr = m->private;
4727 	u32 tracer_flags;
4728 	int i;
4729 
4730 	mutex_lock(&trace_types_lock);
4731 	tracer_flags = tr->current_trace->flags->val;
4732 	trace_opts = tr->current_trace->flags->opts;
4733 
4734 	for (i = 0; trace_options[i]; i++) {
4735 		if (tr->trace_flags & (1 << i))
4736 			seq_printf(m, "%s\n", trace_options[i]);
4737 		else
4738 			seq_printf(m, "no%s\n", trace_options[i]);
4739 	}
4740 
4741 	for (i = 0; trace_opts[i].name; i++) {
4742 		if (tracer_flags & trace_opts[i].bit)
4743 			seq_printf(m, "%s\n", trace_opts[i].name);
4744 		else
4745 			seq_printf(m, "no%s\n", trace_opts[i].name);
4746 	}
4747 	mutex_unlock(&trace_types_lock);
4748 
4749 	return 0;
4750 }
4751 
4752 static int __set_tracer_option(struct trace_array *tr,
4753 			       struct tracer_flags *tracer_flags,
4754 			       struct tracer_opt *opts, int neg)
4755 {
4756 	struct tracer *trace = tracer_flags->trace;
4757 	int ret;
4758 
4759 	ret = trace->set_flag(tr, tracer_flags->val, opts->bit, !neg);
4760 	if (ret)
4761 		return ret;
4762 
4763 	if (neg)
4764 		tracer_flags->val &= ~opts->bit;
4765 	else
4766 		tracer_flags->val |= opts->bit;
4767 	return 0;
4768 }
4769 
4770 /* Try to assign a tracer specific option */
4771 static int set_tracer_option(struct trace_array *tr, char *cmp, int neg)
4772 {
4773 	struct tracer *trace = tr->current_trace;
4774 	struct tracer_flags *tracer_flags = trace->flags;
4775 	struct tracer_opt *opts = NULL;
4776 	int i;
4777 
4778 	for (i = 0; tracer_flags->opts[i].name; i++) {
4779 		opts = &tracer_flags->opts[i];
4780 
4781 		if (strcmp(cmp, opts->name) == 0)
4782 			return __set_tracer_option(tr, trace->flags, opts, neg);
4783 	}
4784 
4785 	return -EINVAL;
4786 }
4787 
4788 /* Some tracers require overwrite to stay enabled */
4789 int trace_keep_overwrite(struct tracer *tracer, u32 mask, int set)
4790 {
4791 	if (tracer->enabled && (mask & TRACE_ITER_OVERWRITE) && !set)
4792 		return -1;
4793 
4794 	return 0;
4795 }
4796 
4797 int set_tracer_flag(struct trace_array *tr, unsigned int mask, int enabled)
4798 {
4799 	if ((mask == TRACE_ITER_RECORD_TGID) ||
4800 	    (mask == TRACE_ITER_RECORD_CMD))
4801 		lockdep_assert_held(&event_mutex);
4802 
4803 	/* do nothing if flag is already set */
4804 	if (!!(tr->trace_flags & mask) == !!enabled)
4805 		return 0;
4806 
4807 	/* Give the tracer a chance to approve the change */
4808 	if (tr->current_trace->flag_changed)
4809 		if (tr->current_trace->flag_changed(tr, mask, !!enabled))
4810 			return -EINVAL;
4811 
4812 	if (enabled)
4813 		tr->trace_flags |= mask;
4814 	else
4815 		tr->trace_flags &= ~mask;
4816 
4817 	if (mask == TRACE_ITER_RECORD_CMD)
4818 		trace_event_enable_cmd_record(enabled);
4819 
4820 	if (mask == TRACE_ITER_RECORD_TGID) {
4821 		if (!tgid_map)
4822 			tgid_map = kvcalloc(PID_MAX_DEFAULT + 1,
4823 					   sizeof(*tgid_map),
4824 					   GFP_KERNEL);
4825 		if (!tgid_map) {
4826 			tr->trace_flags &= ~TRACE_ITER_RECORD_TGID;
4827 			return -ENOMEM;
4828 		}
4829 
4830 		trace_event_enable_tgid_record(enabled);
4831 	}
4832 
4833 	if (mask == TRACE_ITER_EVENT_FORK)
4834 		trace_event_follow_fork(tr, enabled);
4835 
4836 	if (mask == TRACE_ITER_FUNC_FORK)
4837 		ftrace_pid_follow_fork(tr, enabled);
4838 
4839 	if (mask == TRACE_ITER_OVERWRITE) {
4840 		ring_buffer_change_overwrite(tr->array_buffer.buffer, enabled);
4841 #ifdef CONFIG_TRACER_MAX_TRACE
4842 		ring_buffer_change_overwrite(tr->max_buffer.buffer, enabled);
4843 #endif
4844 	}
4845 
4846 	if (mask == TRACE_ITER_PRINTK) {
4847 		trace_printk_start_stop_comm(enabled);
4848 		trace_printk_control(enabled);
4849 	}
4850 
4851 	return 0;
4852 }
4853 
4854 int trace_set_options(struct trace_array *tr, char *option)
4855 {
4856 	char *cmp;
4857 	int neg = 0;
4858 	int ret;
4859 	size_t orig_len = strlen(option);
4860 	int len;
4861 
4862 	cmp = strstrip(option);
4863 
4864 	len = str_has_prefix(cmp, "no");
4865 	if (len)
4866 		neg = 1;
4867 
4868 	cmp += len;
4869 
4870 	mutex_lock(&event_mutex);
4871 	mutex_lock(&trace_types_lock);
4872 
4873 	ret = match_string(trace_options, -1, cmp);
4874 	/* If no option could be set, test the specific tracer options */
4875 	if (ret < 0)
4876 		ret = set_tracer_option(tr, cmp, neg);
4877 	else
4878 		ret = set_tracer_flag(tr, 1 << ret, !neg);
4879 
4880 	mutex_unlock(&trace_types_lock);
4881 	mutex_unlock(&event_mutex);
4882 
4883 	/*
4884 	 * If the first trailing whitespace is replaced with '\0' by strstrip,
4885 	 * turn it back into a space.
4886 	 */
4887 	if (orig_len > strlen(option))
4888 		option[strlen(option)] = ' ';
4889 
4890 	return ret;
4891 }
4892 
4893 static void __init apply_trace_boot_options(void)
4894 {
4895 	char *buf = trace_boot_options_buf;
4896 	char *option;
4897 
4898 	while (true) {
4899 		option = strsep(&buf, ",");
4900 
4901 		if (!option)
4902 			break;
4903 
4904 		if (*option)
4905 			trace_set_options(&global_trace, option);
4906 
4907 		/* Put back the comma to allow this to be called again */
4908 		if (buf)
4909 			*(buf - 1) = ',';
4910 	}
4911 }
4912 
4913 static ssize_t
4914 tracing_trace_options_write(struct file *filp, const char __user *ubuf,
4915 			size_t cnt, loff_t *ppos)
4916 {
4917 	struct seq_file *m = filp->private_data;
4918 	struct trace_array *tr = m->private;
4919 	char buf[64];
4920 	int ret;
4921 
4922 	if (cnt >= sizeof(buf))
4923 		return -EINVAL;
4924 
4925 	if (copy_from_user(buf, ubuf, cnt))
4926 		return -EFAULT;
4927 
4928 	buf[cnt] = 0;
4929 
4930 	ret = trace_set_options(tr, buf);
4931 	if (ret < 0)
4932 		return ret;
4933 
4934 	*ppos += cnt;
4935 
4936 	return cnt;
4937 }
4938 
4939 static int tracing_trace_options_open(struct inode *inode, struct file *file)
4940 {
4941 	struct trace_array *tr = inode->i_private;
4942 	int ret;
4943 
4944 	ret = tracing_check_open_get_tr(tr);
4945 	if (ret)
4946 		return ret;
4947 
4948 	ret = single_open(file, tracing_trace_options_show, inode->i_private);
4949 	if (ret < 0)
4950 		trace_array_put(tr);
4951 
4952 	return ret;
4953 }
4954 
4955 static const struct file_operations tracing_iter_fops = {
4956 	.open		= tracing_trace_options_open,
4957 	.read		= seq_read,
4958 	.llseek		= seq_lseek,
4959 	.release	= tracing_single_release_tr,
4960 	.write		= tracing_trace_options_write,
4961 };
4962 
4963 static const char readme_msg[] =
4964 	"tracing mini-HOWTO:\n\n"
4965 	"# echo 0 > tracing_on : quick way to disable tracing\n"
4966 	"# echo 1 > tracing_on : quick way to re-enable tracing\n\n"
4967 	" Important files:\n"
4968 	"  trace\t\t\t- The static contents of the buffer\n"
4969 	"\t\t\t  To clear the buffer write into this file: echo > trace\n"
4970 	"  trace_pipe\t\t- A consuming read to see the contents of the buffer\n"
4971 	"  current_tracer\t- function and latency tracers\n"
4972 	"  available_tracers\t- list of configured tracers for current_tracer\n"
4973 	"  error_log\t- error log for failed commands (that support it)\n"
4974 	"  buffer_size_kb\t- view and modify size of per cpu buffer\n"
4975 	"  buffer_total_size_kb  - view total size of all cpu buffers\n\n"
4976 	"  trace_clock\t\t-change the clock used to order events\n"
4977 	"       local:   Per cpu clock but may not be synced across CPUs\n"
4978 	"      global:   Synced across CPUs but slows tracing down.\n"
4979 	"     counter:   Not a clock, but just an increment\n"
4980 	"      uptime:   Jiffy counter from time of boot\n"
4981 	"        perf:   Same clock that perf events use\n"
4982 #ifdef CONFIG_X86_64
4983 	"     x86-tsc:   TSC cycle counter\n"
4984 #endif
4985 	"\n  timestamp_mode\t-view the mode used to timestamp events\n"
4986 	"       delta:   Delta difference against a buffer-wide timestamp\n"
4987 	"    absolute:   Absolute (standalone) timestamp\n"
4988 	"\n  trace_marker\t\t- Writes into this file writes into the kernel buffer\n"
4989 	"\n  trace_marker_raw\t\t- Writes into this file writes binary data into the kernel buffer\n"
4990 	"  tracing_cpumask\t- Limit which CPUs to trace\n"
4991 	"  instances\t\t- Make sub-buffers with: mkdir instances/foo\n"
4992 	"\t\t\t  Remove sub-buffer with rmdir\n"
4993 	"  trace_options\t\t- Set format or modify how tracing happens\n"
4994 	"\t\t\t  Disable an option by prefixing 'no' to the\n"
4995 	"\t\t\t  option name\n"
4996 	"  saved_cmdlines_size\t- echo command number in here to store comm-pid list\n"
4997 #ifdef CONFIG_DYNAMIC_FTRACE
4998 	"\n  available_filter_functions - list of functions that can be filtered on\n"
4999 	"  set_ftrace_filter\t- echo function name in here to only trace these\n"
5000 	"\t\t\t  functions\n"
5001 	"\t     accepts: func_full_name or glob-matching-pattern\n"
5002 	"\t     modules: Can select a group via module\n"
5003 	"\t      Format: :mod:<module-name>\n"
5004 	"\t     example: echo :mod:ext3 > set_ftrace_filter\n"
5005 	"\t    triggers: a command to perform when function is hit\n"
5006 	"\t      Format: <function>:<trigger>[:count]\n"
5007 	"\t     trigger: traceon, traceoff\n"
5008 	"\t\t      enable_event:<system>:<event>\n"
5009 	"\t\t      disable_event:<system>:<event>\n"
5010 #ifdef CONFIG_STACKTRACE
5011 	"\t\t      stacktrace\n"
5012 #endif
5013 #ifdef CONFIG_TRACER_SNAPSHOT
5014 	"\t\t      snapshot\n"
5015 #endif
5016 	"\t\t      dump\n"
5017 	"\t\t      cpudump\n"
5018 	"\t     example: echo do_fault:traceoff > set_ftrace_filter\n"
5019 	"\t              echo do_trap:traceoff:3 > set_ftrace_filter\n"
5020 	"\t     The first one will disable tracing every time do_fault is hit\n"
5021 	"\t     The second will disable tracing at most 3 times when do_trap is hit\n"
5022 	"\t       The first time do trap is hit and it disables tracing, the\n"
5023 	"\t       counter will decrement to 2. If tracing is already disabled,\n"
5024 	"\t       the counter will not decrement. It only decrements when the\n"
5025 	"\t       trigger did work\n"
5026 	"\t     To remove trigger without count:\n"
5027 	"\t       echo '!<function>:<trigger> > set_ftrace_filter\n"
5028 	"\t     To remove trigger with a count:\n"
5029 	"\t       echo '!<function>:<trigger>:0 > set_ftrace_filter\n"
5030 	"  set_ftrace_notrace\t- echo function name in here to never trace.\n"
5031 	"\t    accepts: func_full_name, *func_end, func_begin*, *func_middle*\n"
5032 	"\t    modules: Can select a group via module command :mod:\n"
5033 	"\t    Does not accept triggers\n"
5034 #endif /* CONFIG_DYNAMIC_FTRACE */
5035 #ifdef CONFIG_FUNCTION_TRACER
5036 	"  set_ftrace_pid\t- Write pid(s) to only function trace those pids\n"
5037 	"\t\t    (function)\n"
5038 	"  set_ftrace_notrace_pid\t- Write pid(s) to not function trace those pids\n"
5039 	"\t\t    (function)\n"
5040 #endif
5041 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
5042 	"  set_graph_function\t- Trace the nested calls of a function (function_graph)\n"
5043 	"  set_graph_notrace\t- Do not trace the nested calls of a function (function_graph)\n"
5044 	"  max_graph_depth\t- Trace a limited depth of nested calls (0 is unlimited)\n"
5045 #endif
5046 #ifdef CONFIG_TRACER_SNAPSHOT
5047 	"\n  snapshot\t\t- Like 'trace' but shows the content of the static\n"
5048 	"\t\t\t  snapshot buffer. Read the contents for more\n"
5049 	"\t\t\t  information\n"
5050 #endif
5051 #ifdef CONFIG_STACK_TRACER
5052 	"  stack_trace\t\t- Shows the max stack trace when active\n"
5053 	"  stack_max_size\t- Shows current max stack size that was traced\n"
5054 	"\t\t\t  Write into this file to reset the max size (trigger a\n"
5055 	"\t\t\t  new trace)\n"
5056 #ifdef CONFIG_DYNAMIC_FTRACE
5057 	"  stack_trace_filter\t- Like set_ftrace_filter but limits what stack_trace\n"
5058 	"\t\t\t  traces\n"
5059 #endif
5060 #endif /* CONFIG_STACK_TRACER */
5061 #ifdef CONFIG_DYNAMIC_EVENTS
5062 	"  dynamic_events\t\t- Create/append/remove/show the generic dynamic events\n"
5063 	"\t\t\t  Write into this file to define/undefine new trace events.\n"
5064 #endif
5065 #ifdef CONFIG_KPROBE_EVENTS
5066 	"  kprobe_events\t\t- Create/append/remove/show the kernel dynamic events\n"
5067 	"\t\t\t  Write into this file to define/undefine new trace events.\n"
5068 #endif
5069 #ifdef CONFIG_UPROBE_EVENTS
5070 	"  uprobe_events\t\t- Create/append/remove/show the userspace dynamic events\n"
5071 	"\t\t\t  Write into this file to define/undefine new trace events.\n"
5072 #endif
5073 #if defined(CONFIG_KPROBE_EVENTS) || defined(CONFIG_UPROBE_EVENTS)
5074 	"\t  accepts: event-definitions (one definition per line)\n"
5075 	"\t   Format: p[:[<group>/]<event>] <place> [<args>]\n"
5076 	"\t           r[maxactive][:[<group>/]<event>] <place> [<args>]\n"
5077 #ifdef CONFIG_HIST_TRIGGERS
5078 	"\t           s:[synthetic/]<event> <field> [<field>]\n"
5079 #endif
5080 	"\t           -:[<group>/]<event>\n"
5081 #ifdef CONFIG_KPROBE_EVENTS
5082 	"\t    place: [<module>:]<symbol>[+<offset>]|<memaddr>\n"
5083   "place (kretprobe): [<module>:]<symbol>[+<offset>]|<memaddr>\n"
5084 #endif
5085 #ifdef CONFIG_UPROBE_EVENTS
5086   "   place (uprobe): <path>:<offset>[(ref_ctr_offset)]\n"
5087 #endif
5088 	"\t     args: <name>=fetcharg[:type]\n"
5089 	"\t fetcharg: %<register>, @<address>, @<symbol>[+|-<offset>],\n"
5090 #ifdef CONFIG_HAVE_FUNCTION_ARG_ACCESS_API
5091 	"\t           $stack<index>, $stack, $retval, $comm, $arg<N>,\n"
5092 #else
5093 	"\t           $stack<index>, $stack, $retval, $comm,\n"
5094 #endif
5095 	"\t           +|-[u]<offset>(<fetcharg>), \\imm-value, \\\"imm-string\"\n"
5096 	"\t     type: s8/16/32/64, u8/16/32/64, x8/16/32/64, string, symbol,\n"
5097 	"\t           b<bit-width>@<bit-offset>/<container-size>, ustring,\n"
5098 	"\t           <type>\\[<array-size>\\]\n"
5099 #ifdef CONFIG_HIST_TRIGGERS
5100 	"\t    field: <stype> <name>;\n"
5101 	"\t    stype: u8/u16/u32/u64, s8/s16/s32/s64, pid_t,\n"
5102 	"\t           [unsigned] char/int/long\n"
5103 #endif
5104 #endif
5105 	"  events/\t\t- Directory containing all trace event subsystems:\n"
5106 	"      enable\t\t- Write 0/1 to enable/disable tracing of all events\n"
5107 	"  events/<system>/\t- Directory containing all trace events for <system>:\n"
5108 	"      enable\t\t- Write 0/1 to enable/disable tracing of all <system>\n"
5109 	"\t\t\t  events\n"
5110 	"      filter\t\t- If set, only events passing filter are traced\n"
5111 	"  events/<system>/<event>/\t- Directory containing control files for\n"
5112 	"\t\t\t  <event>:\n"
5113 	"      enable\t\t- Write 0/1 to enable/disable tracing of <event>\n"
5114 	"      filter\t\t- If set, only events passing filter are traced\n"
5115 	"      trigger\t\t- If set, a command to perform when event is hit\n"
5116 	"\t    Format: <trigger>[:count][if <filter>]\n"
5117 	"\t   trigger: traceon, traceoff\n"
5118 	"\t            enable_event:<system>:<event>\n"
5119 	"\t            disable_event:<system>:<event>\n"
5120 #ifdef CONFIG_HIST_TRIGGERS
5121 	"\t            enable_hist:<system>:<event>\n"
5122 	"\t            disable_hist:<system>:<event>\n"
5123 #endif
5124 #ifdef CONFIG_STACKTRACE
5125 	"\t\t    stacktrace\n"
5126 #endif
5127 #ifdef CONFIG_TRACER_SNAPSHOT
5128 	"\t\t    snapshot\n"
5129 #endif
5130 #ifdef CONFIG_HIST_TRIGGERS
5131 	"\t\t    hist (see below)\n"
5132 #endif
5133 	"\t   example: echo traceoff > events/block/block_unplug/trigger\n"
5134 	"\t            echo traceoff:3 > events/block/block_unplug/trigger\n"
5135 	"\t            echo 'enable_event:kmem:kmalloc:3 if nr_rq > 1' > \\\n"
5136 	"\t                  events/block/block_unplug/trigger\n"
5137 	"\t   The first disables tracing every time block_unplug is hit.\n"
5138 	"\t   The second disables tracing the first 3 times block_unplug is hit.\n"
5139 	"\t   The third enables the kmalloc event the first 3 times block_unplug\n"
5140 	"\t     is hit and has value of greater than 1 for the 'nr_rq' event field.\n"
5141 	"\t   Like function triggers, the counter is only decremented if it\n"
5142 	"\t    enabled or disabled tracing.\n"
5143 	"\t   To remove a trigger without a count:\n"
5144 	"\t     echo '!<trigger> > <system>/<event>/trigger\n"
5145 	"\t   To remove a trigger with a count:\n"
5146 	"\t     echo '!<trigger>:0 > <system>/<event>/trigger\n"
5147 	"\t   Filters can be ignored when removing a trigger.\n"
5148 #ifdef CONFIG_HIST_TRIGGERS
5149 	"      hist trigger\t- If set, event hits are aggregated into a hash table\n"
5150 	"\t    Format: hist:keys=<field1[,field2,...]>\n"
5151 	"\t            [:values=<field1[,field2,...]>]\n"
5152 	"\t            [:sort=<field1[,field2,...]>]\n"
5153 	"\t            [:size=#entries]\n"
5154 	"\t            [:pause][:continue][:clear]\n"
5155 	"\t            [:name=histname1]\n"
5156 	"\t            [:<handler>.<action>]\n"
5157 	"\t            [if <filter>]\n\n"
5158 	"\t    When a matching event is hit, an entry is added to a hash\n"
5159 	"\t    table using the key(s) and value(s) named, and the value of a\n"
5160 	"\t    sum called 'hitcount' is incremented.  Keys and values\n"
5161 	"\t    correspond to fields in the event's format description.  Keys\n"
5162 	"\t    can be any field, or the special string 'stacktrace'.\n"
5163 	"\t    Compound keys consisting of up to two fields can be specified\n"
5164 	"\t    by the 'keys' keyword.  Values must correspond to numeric\n"
5165 	"\t    fields.  Sort keys consisting of up to two fields can be\n"
5166 	"\t    specified using the 'sort' keyword.  The sort direction can\n"
5167 	"\t    be modified by appending '.descending' or '.ascending' to a\n"
5168 	"\t    sort field.  The 'size' parameter can be used to specify more\n"
5169 	"\t    or fewer than the default 2048 entries for the hashtable size.\n"
5170 	"\t    If a hist trigger is given a name using the 'name' parameter,\n"
5171 	"\t    its histogram data will be shared with other triggers of the\n"
5172 	"\t    same name, and trigger hits will update this common data.\n\n"
5173 	"\t    Reading the 'hist' file for the event will dump the hash\n"
5174 	"\t    table in its entirety to stdout.  If there are multiple hist\n"
5175 	"\t    triggers attached to an event, there will be a table for each\n"
5176 	"\t    trigger in the output.  The table displayed for a named\n"
5177 	"\t    trigger will be the same as any other instance having the\n"
5178 	"\t    same name.  The default format used to display a given field\n"
5179 	"\t    can be modified by appending any of the following modifiers\n"
5180 	"\t    to the field name, as applicable:\n\n"
5181 	"\t            .hex        display a number as a hex value\n"
5182 	"\t            .sym        display an address as a symbol\n"
5183 	"\t            .sym-offset display an address as a symbol and offset\n"
5184 	"\t            .execname   display a common_pid as a program name\n"
5185 	"\t            .syscall    display a syscall id as a syscall name\n"
5186 	"\t            .log2       display log2 value rather than raw number\n"
5187 	"\t            .usecs      display a common_timestamp in microseconds\n\n"
5188 	"\t    The 'pause' parameter can be used to pause an existing hist\n"
5189 	"\t    trigger or to start a hist trigger but not log any events\n"
5190 	"\t    until told to do so.  'continue' can be used to start or\n"
5191 	"\t    restart a paused hist trigger.\n\n"
5192 	"\t    The 'clear' parameter will clear the contents of a running\n"
5193 	"\t    hist trigger and leave its current paused/active state\n"
5194 	"\t    unchanged.\n\n"
5195 	"\t    The enable_hist and disable_hist triggers can be used to\n"
5196 	"\t    have one event conditionally start and stop another event's\n"
5197 	"\t    already-attached hist trigger.  The syntax is analogous to\n"
5198 	"\t    the enable_event and disable_event triggers.\n\n"
5199 	"\t    Hist trigger handlers and actions are executed whenever a\n"
5200 	"\t    a histogram entry is added or updated.  They take the form:\n\n"
5201 	"\t        <handler>.<action>\n\n"
5202 	"\t    The available handlers are:\n\n"
5203 	"\t        onmatch(matching.event)  - invoke on addition or update\n"
5204 	"\t        onmax(var)               - invoke if var exceeds current max\n"
5205 	"\t        onchange(var)            - invoke action if var changes\n\n"
5206 	"\t    The available actions are:\n\n"
5207 	"\t        trace(<synthetic_event>,param list)  - generate synthetic event\n"
5208 	"\t        save(field,...)                      - save current event fields\n"
5209 #ifdef CONFIG_TRACER_SNAPSHOT
5210 	"\t        snapshot()                           - snapshot the trace buffer\n"
5211 #endif
5212 #endif
5213 ;
5214 
5215 static ssize_t
5216 tracing_readme_read(struct file *filp, char __user *ubuf,
5217 		       size_t cnt, loff_t *ppos)
5218 {
5219 	return simple_read_from_buffer(ubuf, cnt, ppos,
5220 					readme_msg, strlen(readme_msg));
5221 }
5222 
5223 static const struct file_operations tracing_readme_fops = {
5224 	.open		= tracing_open_generic,
5225 	.read		= tracing_readme_read,
5226 	.llseek		= generic_file_llseek,
5227 };
5228 
5229 static void *saved_tgids_next(struct seq_file *m, void *v, loff_t *pos)
5230 {
5231 	int *ptr = v;
5232 
5233 	if (*pos || m->count)
5234 		ptr++;
5235 
5236 	(*pos)++;
5237 
5238 	for (; ptr <= &tgid_map[PID_MAX_DEFAULT]; ptr++) {
5239 		if (trace_find_tgid(*ptr))
5240 			return ptr;
5241 	}
5242 
5243 	return NULL;
5244 }
5245 
5246 static void *saved_tgids_start(struct seq_file *m, loff_t *pos)
5247 {
5248 	void *v;
5249 	loff_t l = 0;
5250 
5251 	if (!tgid_map)
5252 		return NULL;
5253 
5254 	v = &tgid_map[0];
5255 	while (l <= *pos) {
5256 		v = saved_tgids_next(m, v, &l);
5257 		if (!v)
5258 			return NULL;
5259 	}
5260 
5261 	return v;
5262 }
5263 
5264 static void saved_tgids_stop(struct seq_file *m, void *v)
5265 {
5266 }
5267 
5268 static int saved_tgids_show(struct seq_file *m, void *v)
5269 {
5270 	int pid = (int *)v - tgid_map;
5271 
5272 	seq_printf(m, "%d %d\n", pid, trace_find_tgid(pid));
5273 	return 0;
5274 }
5275 
5276 static const struct seq_operations tracing_saved_tgids_seq_ops = {
5277 	.start		= saved_tgids_start,
5278 	.stop		= saved_tgids_stop,
5279 	.next		= saved_tgids_next,
5280 	.show		= saved_tgids_show,
5281 };
5282 
5283 static int tracing_saved_tgids_open(struct inode *inode, struct file *filp)
5284 {
5285 	int ret;
5286 
5287 	ret = tracing_check_open_get_tr(NULL);
5288 	if (ret)
5289 		return ret;
5290 
5291 	return seq_open(filp, &tracing_saved_tgids_seq_ops);
5292 }
5293 
5294 
5295 static const struct file_operations tracing_saved_tgids_fops = {
5296 	.open		= tracing_saved_tgids_open,
5297 	.read		= seq_read,
5298 	.llseek		= seq_lseek,
5299 	.release	= seq_release,
5300 };
5301 
5302 static void *saved_cmdlines_next(struct seq_file *m, void *v, loff_t *pos)
5303 {
5304 	unsigned int *ptr = v;
5305 
5306 	if (*pos || m->count)
5307 		ptr++;
5308 
5309 	(*pos)++;
5310 
5311 	for (; ptr < &savedcmd->map_cmdline_to_pid[savedcmd->cmdline_num];
5312 	     ptr++) {
5313 		if (*ptr == -1 || *ptr == NO_CMDLINE_MAP)
5314 			continue;
5315 
5316 		return ptr;
5317 	}
5318 
5319 	return NULL;
5320 }
5321 
5322 static void *saved_cmdlines_start(struct seq_file *m, loff_t *pos)
5323 {
5324 	void *v;
5325 	loff_t l = 0;
5326 
5327 	preempt_disable();
5328 	arch_spin_lock(&trace_cmdline_lock);
5329 
5330 	v = &savedcmd->map_cmdline_to_pid[0];
5331 	while (l <= *pos) {
5332 		v = saved_cmdlines_next(m, v, &l);
5333 		if (!v)
5334 			return NULL;
5335 	}
5336 
5337 	return v;
5338 }
5339 
5340 static void saved_cmdlines_stop(struct seq_file *m, void *v)
5341 {
5342 	arch_spin_unlock(&trace_cmdline_lock);
5343 	preempt_enable();
5344 }
5345 
5346 static int saved_cmdlines_show(struct seq_file *m, void *v)
5347 {
5348 	char buf[TASK_COMM_LEN];
5349 	unsigned int *pid = v;
5350 
5351 	__trace_find_cmdline(*pid, buf);
5352 	seq_printf(m, "%d %s\n", *pid, buf);
5353 	return 0;
5354 }
5355 
5356 static const struct seq_operations tracing_saved_cmdlines_seq_ops = {
5357 	.start		= saved_cmdlines_start,
5358 	.next		= saved_cmdlines_next,
5359 	.stop		= saved_cmdlines_stop,
5360 	.show		= saved_cmdlines_show,
5361 };
5362 
5363 static int tracing_saved_cmdlines_open(struct inode *inode, struct file *filp)
5364 {
5365 	int ret;
5366 
5367 	ret = tracing_check_open_get_tr(NULL);
5368 	if (ret)
5369 		return ret;
5370 
5371 	return seq_open(filp, &tracing_saved_cmdlines_seq_ops);
5372 }
5373 
5374 static const struct file_operations tracing_saved_cmdlines_fops = {
5375 	.open		= tracing_saved_cmdlines_open,
5376 	.read		= seq_read,
5377 	.llseek		= seq_lseek,
5378 	.release	= seq_release,
5379 };
5380 
5381 static ssize_t
5382 tracing_saved_cmdlines_size_read(struct file *filp, char __user *ubuf,
5383 				 size_t cnt, loff_t *ppos)
5384 {
5385 	char buf[64];
5386 	int r;
5387 
5388 	arch_spin_lock(&trace_cmdline_lock);
5389 	r = scnprintf(buf, sizeof(buf), "%u\n", savedcmd->cmdline_num);
5390 	arch_spin_unlock(&trace_cmdline_lock);
5391 
5392 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
5393 }
5394 
5395 static void free_saved_cmdlines_buffer(struct saved_cmdlines_buffer *s)
5396 {
5397 	kfree(s->saved_cmdlines);
5398 	kfree(s->map_cmdline_to_pid);
5399 	kfree(s);
5400 }
5401 
5402 static int tracing_resize_saved_cmdlines(unsigned int val)
5403 {
5404 	struct saved_cmdlines_buffer *s, *savedcmd_temp;
5405 
5406 	s = kmalloc(sizeof(*s), GFP_KERNEL);
5407 	if (!s)
5408 		return -ENOMEM;
5409 
5410 	if (allocate_cmdlines_buffer(val, s) < 0) {
5411 		kfree(s);
5412 		return -ENOMEM;
5413 	}
5414 
5415 	arch_spin_lock(&trace_cmdline_lock);
5416 	savedcmd_temp = savedcmd;
5417 	savedcmd = s;
5418 	arch_spin_unlock(&trace_cmdline_lock);
5419 	free_saved_cmdlines_buffer(savedcmd_temp);
5420 
5421 	return 0;
5422 }
5423 
5424 static ssize_t
5425 tracing_saved_cmdlines_size_write(struct file *filp, const char __user *ubuf,
5426 				  size_t cnt, loff_t *ppos)
5427 {
5428 	unsigned long val;
5429 	int ret;
5430 
5431 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
5432 	if (ret)
5433 		return ret;
5434 
5435 	/* must have at least 1 entry or less than PID_MAX_DEFAULT */
5436 	if (!val || val > PID_MAX_DEFAULT)
5437 		return -EINVAL;
5438 
5439 	ret = tracing_resize_saved_cmdlines((unsigned int)val);
5440 	if (ret < 0)
5441 		return ret;
5442 
5443 	*ppos += cnt;
5444 
5445 	return cnt;
5446 }
5447 
5448 static const struct file_operations tracing_saved_cmdlines_size_fops = {
5449 	.open		= tracing_open_generic,
5450 	.read		= tracing_saved_cmdlines_size_read,
5451 	.write		= tracing_saved_cmdlines_size_write,
5452 };
5453 
5454 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
5455 static union trace_eval_map_item *
5456 update_eval_map(union trace_eval_map_item *ptr)
5457 {
5458 	if (!ptr->map.eval_string) {
5459 		if (ptr->tail.next) {
5460 			ptr = ptr->tail.next;
5461 			/* Set ptr to the next real item (skip head) */
5462 			ptr++;
5463 		} else
5464 			return NULL;
5465 	}
5466 	return ptr;
5467 }
5468 
5469 static void *eval_map_next(struct seq_file *m, void *v, loff_t *pos)
5470 {
5471 	union trace_eval_map_item *ptr = v;
5472 
5473 	/*
5474 	 * Paranoid! If ptr points to end, we don't want to increment past it.
5475 	 * This really should never happen.
5476 	 */
5477 	(*pos)++;
5478 	ptr = update_eval_map(ptr);
5479 	if (WARN_ON_ONCE(!ptr))
5480 		return NULL;
5481 
5482 	ptr++;
5483 	ptr = update_eval_map(ptr);
5484 
5485 	return ptr;
5486 }
5487 
5488 static void *eval_map_start(struct seq_file *m, loff_t *pos)
5489 {
5490 	union trace_eval_map_item *v;
5491 	loff_t l = 0;
5492 
5493 	mutex_lock(&trace_eval_mutex);
5494 
5495 	v = trace_eval_maps;
5496 	if (v)
5497 		v++;
5498 
5499 	while (v && l < *pos) {
5500 		v = eval_map_next(m, v, &l);
5501 	}
5502 
5503 	return v;
5504 }
5505 
5506 static void eval_map_stop(struct seq_file *m, void *v)
5507 {
5508 	mutex_unlock(&trace_eval_mutex);
5509 }
5510 
5511 static int eval_map_show(struct seq_file *m, void *v)
5512 {
5513 	union trace_eval_map_item *ptr = v;
5514 
5515 	seq_printf(m, "%s %ld (%s)\n",
5516 		   ptr->map.eval_string, ptr->map.eval_value,
5517 		   ptr->map.system);
5518 
5519 	return 0;
5520 }
5521 
5522 static const struct seq_operations tracing_eval_map_seq_ops = {
5523 	.start		= eval_map_start,
5524 	.next		= eval_map_next,
5525 	.stop		= eval_map_stop,
5526 	.show		= eval_map_show,
5527 };
5528 
5529 static int tracing_eval_map_open(struct inode *inode, struct file *filp)
5530 {
5531 	int ret;
5532 
5533 	ret = tracing_check_open_get_tr(NULL);
5534 	if (ret)
5535 		return ret;
5536 
5537 	return seq_open(filp, &tracing_eval_map_seq_ops);
5538 }
5539 
5540 static const struct file_operations tracing_eval_map_fops = {
5541 	.open		= tracing_eval_map_open,
5542 	.read		= seq_read,
5543 	.llseek		= seq_lseek,
5544 	.release	= seq_release,
5545 };
5546 
5547 static inline union trace_eval_map_item *
5548 trace_eval_jmp_to_tail(union trace_eval_map_item *ptr)
5549 {
5550 	/* Return tail of array given the head */
5551 	return ptr + ptr->head.length + 1;
5552 }
5553 
5554 static void
5555 trace_insert_eval_map_file(struct module *mod, struct trace_eval_map **start,
5556 			   int len)
5557 {
5558 	struct trace_eval_map **stop;
5559 	struct trace_eval_map **map;
5560 	union trace_eval_map_item *map_array;
5561 	union trace_eval_map_item *ptr;
5562 
5563 	stop = start + len;
5564 
5565 	/*
5566 	 * The trace_eval_maps contains the map plus a head and tail item,
5567 	 * where the head holds the module and length of array, and the
5568 	 * tail holds a pointer to the next list.
5569 	 */
5570 	map_array = kmalloc_array(len + 2, sizeof(*map_array), GFP_KERNEL);
5571 	if (!map_array) {
5572 		pr_warn("Unable to allocate trace eval mapping\n");
5573 		return;
5574 	}
5575 
5576 	mutex_lock(&trace_eval_mutex);
5577 
5578 	if (!trace_eval_maps)
5579 		trace_eval_maps = map_array;
5580 	else {
5581 		ptr = trace_eval_maps;
5582 		for (;;) {
5583 			ptr = trace_eval_jmp_to_tail(ptr);
5584 			if (!ptr->tail.next)
5585 				break;
5586 			ptr = ptr->tail.next;
5587 
5588 		}
5589 		ptr->tail.next = map_array;
5590 	}
5591 	map_array->head.mod = mod;
5592 	map_array->head.length = len;
5593 	map_array++;
5594 
5595 	for (map = start; (unsigned long)map < (unsigned long)stop; map++) {
5596 		map_array->map = **map;
5597 		map_array++;
5598 	}
5599 	memset(map_array, 0, sizeof(*map_array));
5600 
5601 	mutex_unlock(&trace_eval_mutex);
5602 }
5603 
5604 static void trace_create_eval_file(struct dentry *d_tracer)
5605 {
5606 	trace_create_file("eval_map", 0444, d_tracer,
5607 			  NULL, &tracing_eval_map_fops);
5608 }
5609 
5610 #else /* CONFIG_TRACE_EVAL_MAP_FILE */
5611 static inline void trace_create_eval_file(struct dentry *d_tracer) { }
5612 static inline void trace_insert_eval_map_file(struct module *mod,
5613 			      struct trace_eval_map **start, int len) { }
5614 #endif /* !CONFIG_TRACE_EVAL_MAP_FILE */
5615 
5616 static void trace_insert_eval_map(struct module *mod,
5617 				  struct trace_eval_map **start, int len)
5618 {
5619 	struct trace_eval_map **map;
5620 
5621 	if (len <= 0)
5622 		return;
5623 
5624 	map = start;
5625 
5626 	trace_event_eval_update(map, len);
5627 
5628 	trace_insert_eval_map_file(mod, start, len);
5629 }
5630 
5631 static ssize_t
5632 tracing_set_trace_read(struct file *filp, char __user *ubuf,
5633 		       size_t cnt, loff_t *ppos)
5634 {
5635 	struct trace_array *tr = filp->private_data;
5636 	char buf[MAX_TRACER_SIZE+2];
5637 	int r;
5638 
5639 	mutex_lock(&trace_types_lock);
5640 	r = sprintf(buf, "%s\n", tr->current_trace->name);
5641 	mutex_unlock(&trace_types_lock);
5642 
5643 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
5644 }
5645 
5646 int tracer_init(struct tracer *t, struct trace_array *tr)
5647 {
5648 	tracing_reset_online_cpus(&tr->array_buffer);
5649 	return t->init(tr);
5650 }
5651 
5652 static void set_buffer_entries(struct array_buffer *buf, unsigned long val)
5653 {
5654 	int cpu;
5655 
5656 	for_each_tracing_cpu(cpu)
5657 		per_cpu_ptr(buf->data, cpu)->entries = val;
5658 }
5659 
5660 #ifdef CONFIG_TRACER_MAX_TRACE
5661 /* resize @tr's buffer to the size of @size_tr's entries */
5662 static int resize_buffer_duplicate_size(struct array_buffer *trace_buf,
5663 					struct array_buffer *size_buf, int cpu_id)
5664 {
5665 	int cpu, ret = 0;
5666 
5667 	if (cpu_id == RING_BUFFER_ALL_CPUS) {
5668 		for_each_tracing_cpu(cpu) {
5669 			ret = ring_buffer_resize(trace_buf->buffer,
5670 				 per_cpu_ptr(size_buf->data, cpu)->entries, cpu);
5671 			if (ret < 0)
5672 				break;
5673 			per_cpu_ptr(trace_buf->data, cpu)->entries =
5674 				per_cpu_ptr(size_buf->data, cpu)->entries;
5675 		}
5676 	} else {
5677 		ret = ring_buffer_resize(trace_buf->buffer,
5678 				 per_cpu_ptr(size_buf->data, cpu_id)->entries, cpu_id);
5679 		if (ret == 0)
5680 			per_cpu_ptr(trace_buf->data, cpu_id)->entries =
5681 				per_cpu_ptr(size_buf->data, cpu_id)->entries;
5682 	}
5683 
5684 	return ret;
5685 }
5686 #endif /* CONFIG_TRACER_MAX_TRACE */
5687 
5688 static int __tracing_resize_ring_buffer(struct trace_array *tr,
5689 					unsigned long size, int cpu)
5690 {
5691 	int ret;
5692 
5693 	/*
5694 	 * If kernel or user changes the size of the ring buffer
5695 	 * we use the size that was given, and we can forget about
5696 	 * expanding it later.
5697 	 */
5698 	ring_buffer_expanded = true;
5699 
5700 	/* May be called before buffers are initialized */
5701 	if (!tr->array_buffer.buffer)
5702 		return 0;
5703 
5704 	ret = ring_buffer_resize(tr->array_buffer.buffer, size, cpu);
5705 	if (ret < 0)
5706 		return ret;
5707 
5708 #ifdef CONFIG_TRACER_MAX_TRACE
5709 	if (!(tr->flags & TRACE_ARRAY_FL_GLOBAL) ||
5710 	    !tr->current_trace->use_max_tr)
5711 		goto out;
5712 
5713 	ret = ring_buffer_resize(tr->max_buffer.buffer, size, cpu);
5714 	if (ret < 0) {
5715 		int r = resize_buffer_duplicate_size(&tr->array_buffer,
5716 						     &tr->array_buffer, cpu);
5717 		if (r < 0) {
5718 			/*
5719 			 * AARGH! We are left with different
5720 			 * size max buffer!!!!
5721 			 * The max buffer is our "snapshot" buffer.
5722 			 * When a tracer needs a snapshot (one of the
5723 			 * latency tracers), it swaps the max buffer
5724 			 * with the saved snap shot. We succeeded to
5725 			 * update the size of the main buffer, but failed to
5726 			 * update the size of the max buffer. But when we tried
5727 			 * to reset the main buffer to the original size, we
5728 			 * failed there too. This is very unlikely to
5729 			 * happen, but if it does, warn and kill all
5730 			 * tracing.
5731 			 */
5732 			WARN_ON(1);
5733 			tracing_disabled = 1;
5734 		}
5735 		return ret;
5736 	}
5737 
5738 	if (cpu == RING_BUFFER_ALL_CPUS)
5739 		set_buffer_entries(&tr->max_buffer, size);
5740 	else
5741 		per_cpu_ptr(tr->max_buffer.data, cpu)->entries = size;
5742 
5743  out:
5744 #endif /* CONFIG_TRACER_MAX_TRACE */
5745 
5746 	if (cpu == RING_BUFFER_ALL_CPUS)
5747 		set_buffer_entries(&tr->array_buffer, size);
5748 	else
5749 		per_cpu_ptr(tr->array_buffer.data, cpu)->entries = size;
5750 
5751 	return ret;
5752 }
5753 
5754 ssize_t tracing_resize_ring_buffer(struct trace_array *tr,
5755 				  unsigned long size, int cpu_id)
5756 {
5757 	int ret = size;
5758 
5759 	mutex_lock(&trace_types_lock);
5760 
5761 	if (cpu_id != RING_BUFFER_ALL_CPUS) {
5762 		/* make sure, this cpu is enabled in the mask */
5763 		if (!cpumask_test_cpu(cpu_id, tracing_buffer_mask)) {
5764 			ret = -EINVAL;
5765 			goto out;
5766 		}
5767 	}
5768 
5769 	ret = __tracing_resize_ring_buffer(tr, size, cpu_id);
5770 	if (ret < 0)
5771 		ret = -ENOMEM;
5772 
5773 out:
5774 	mutex_unlock(&trace_types_lock);
5775 
5776 	return ret;
5777 }
5778 
5779 
5780 /**
5781  * tracing_update_buffers - used by tracing facility to expand ring buffers
5782  *
5783  * To save on memory when the tracing is never used on a system with it
5784  * configured in. The ring buffers are set to a minimum size. But once
5785  * a user starts to use the tracing facility, then they need to grow
5786  * to their default size.
5787  *
5788  * This function is to be called when a tracer is about to be used.
5789  */
5790 int tracing_update_buffers(void)
5791 {
5792 	int ret = 0;
5793 
5794 	mutex_lock(&trace_types_lock);
5795 	if (!ring_buffer_expanded)
5796 		ret = __tracing_resize_ring_buffer(&global_trace, trace_buf_size,
5797 						RING_BUFFER_ALL_CPUS);
5798 	mutex_unlock(&trace_types_lock);
5799 
5800 	return ret;
5801 }
5802 
5803 struct trace_option_dentry;
5804 
5805 static void
5806 create_trace_option_files(struct trace_array *tr, struct tracer *tracer);
5807 
5808 /*
5809  * Used to clear out the tracer before deletion of an instance.
5810  * Must have trace_types_lock held.
5811  */
5812 static void tracing_set_nop(struct trace_array *tr)
5813 {
5814 	if (tr->current_trace == &nop_trace)
5815 		return;
5816 
5817 	tr->current_trace->enabled--;
5818 
5819 	if (tr->current_trace->reset)
5820 		tr->current_trace->reset(tr);
5821 
5822 	tr->current_trace = &nop_trace;
5823 }
5824 
5825 static void add_tracer_options(struct trace_array *tr, struct tracer *t)
5826 {
5827 	/* Only enable if the directory has been created already. */
5828 	if (!tr->dir)
5829 		return;
5830 
5831 	create_trace_option_files(tr, t);
5832 }
5833 
5834 int tracing_set_tracer(struct trace_array *tr, const char *buf)
5835 {
5836 	struct tracer *t;
5837 #ifdef CONFIG_TRACER_MAX_TRACE
5838 	bool had_max_tr;
5839 #endif
5840 	int ret = 0;
5841 
5842 	mutex_lock(&trace_types_lock);
5843 
5844 	if (!ring_buffer_expanded) {
5845 		ret = __tracing_resize_ring_buffer(tr, trace_buf_size,
5846 						RING_BUFFER_ALL_CPUS);
5847 		if (ret < 0)
5848 			goto out;
5849 		ret = 0;
5850 	}
5851 
5852 	for (t = trace_types; t; t = t->next) {
5853 		if (strcmp(t->name, buf) == 0)
5854 			break;
5855 	}
5856 	if (!t) {
5857 		ret = -EINVAL;
5858 		goto out;
5859 	}
5860 	if (t == tr->current_trace)
5861 		goto out;
5862 
5863 #ifdef CONFIG_TRACER_SNAPSHOT
5864 	if (t->use_max_tr) {
5865 		arch_spin_lock(&tr->max_lock);
5866 		if (tr->cond_snapshot)
5867 			ret = -EBUSY;
5868 		arch_spin_unlock(&tr->max_lock);
5869 		if (ret)
5870 			goto out;
5871 	}
5872 #endif
5873 	/* Some tracers won't work on kernel command line */
5874 	if (system_state < SYSTEM_RUNNING && t->noboot) {
5875 		pr_warn("Tracer '%s' is not allowed on command line, ignored\n",
5876 			t->name);
5877 		goto out;
5878 	}
5879 
5880 	/* Some tracers are only allowed for the top level buffer */
5881 	if (!trace_ok_for_array(t, tr)) {
5882 		ret = -EINVAL;
5883 		goto out;
5884 	}
5885 
5886 	/* If trace pipe files are being read, we can't change the tracer */
5887 	if (tr->current_trace->ref) {
5888 		ret = -EBUSY;
5889 		goto out;
5890 	}
5891 
5892 	trace_branch_disable();
5893 
5894 	tr->current_trace->enabled--;
5895 
5896 	if (tr->current_trace->reset)
5897 		tr->current_trace->reset(tr);
5898 
5899 	/* Current trace needs to be nop_trace before synchronize_rcu */
5900 	tr->current_trace = &nop_trace;
5901 
5902 #ifdef CONFIG_TRACER_MAX_TRACE
5903 	had_max_tr = tr->allocated_snapshot;
5904 
5905 	if (had_max_tr && !t->use_max_tr) {
5906 		/*
5907 		 * We need to make sure that the update_max_tr sees that
5908 		 * current_trace changed to nop_trace to keep it from
5909 		 * swapping the buffers after we resize it.
5910 		 * The update_max_tr is called from interrupts disabled
5911 		 * so a synchronized_sched() is sufficient.
5912 		 */
5913 		synchronize_rcu();
5914 		free_snapshot(tr);
5915 	}
5916 #endif
5917 
5918 #ifdef CONFIG_TRACER_MAX_TRACE
5919 	if (t->use_max_tr && !had_max_tr) {
5920 		ret = tracing_alloc_snapshot_instance(tr);
5921 		if (ret < 0)
5922 			goto out;
5923 	}
5924 #endif
5925 
5926 	if (t->init) {
5927 		ret = tracer_init(t, tr);
5928 		if (ret)
5929 			goto out;
5930 	}
5931 
5932 	tr->current_trace = t;
5933 	tr->current_trace->enabled++;
5934 	trace_branch_enable(tr);
5935  out:
5936 	mutex_unlock(&trace_types_lock);
5937 
5938 	return ret;
5939 }
5940 
5941 static ssize_t
5942 tracing_set_trace_write(struct file *filp, const char __user *ubuf,
5943 			size_t cnt, loff_t *ppos)
5944 {
5945 	struct trace_array *tr = filp->private_data;
5946 	char buf[MAX_TRACER_SIZE+1];
5947 	int i;
5948 	size_t ret;
5949 	int err;
5950 
5951 	ret = cnt;
5952 
5953 	if (cnt > MAX_TRACER_SIZE)
5954 		cnt = MAX_TRACER_SIZE;
5955 
5956 	if (copy_from_user(buf, ubuf, cnt))
5957 		return -EFAULT;
5958 
5959 	buf[cnt] = 0;
5960 
5961 	/* strip ending whitespace. */
5962 	for (i = cnt - 1; i > 0 && isspace(buf[i]); i--)
5963 		buf[i] = 0;
5964 
5965 	err = tracing_set_tracer(tr, buf);
5966 	if (err)
5967 		return err;
5968 
5969 	*ppos += ret;
5970 
5971 	return ret;
5972 }
5973 
5974 static ssize_t
5975 tracing_nsecs_read(unsigned long *ptr, char __user *ubuf,
5976 		   size_t cnt, loff_t *ppos)
5977 {
5978 	char buf[64];
5979 	int r;
5980 
5981 	r = snprintf(buf, sizeof(buf), "%ld\n",
5982 		     *ptr == (unsigned long)-1 ? -1 : nsecs_to_usecs(*ptr));
5983 	if (r > sizeof(buf))
5984 		r = sizeof(buf);
5985 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
5986 }
5987 
5988 static ssize_t
5989 tracing_nsecs_write(unsigned long *ptr, const char __user *ubuf,
5990 		    size_t cnt, loff_t *ppos)
5991 {
5992 	unsigned long val;
5993 	int ret;
5994 
5995 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
5996 	if (ret)
5997 		return ret;
5998 
5999 	*ptr = val * 1000;
6000 
6001 	return cnt;
6002 }
6003 
6004 static ssize_t
6005 tracing_thresh_read(struct file *filp, char __user *ubuf,
6006 		    size_t cnt, loff_t *ppos)
6007 {
6008 	return tracing_nsecs_read(&tracing_thresh, ubuf, cnt, ppos);
6009 }
6010 
6011 static ssize_t
6012 tracing_thresh_write(struct file *filp, const char __user *ubuf,
6013 		     size_t cnt, loff_t *ppos)
6014 {
6015 	struct trace_array *tr = filp->private_data;
6016 	int ret;
6017 
6018 	mutex_lock(&trace_types_lock);
6019 	ret = tracing_nsecs_write(&tracing_thresh, ubuf, cnt, ppos);
6020 	if (ret < 0)
6021 		goto out;
6022 
6023 	if (tr->current_trace->update_thresh) {
6024 		ret = tr->current_trace->update_thresh(tr);
6025 		if (ret < 0)
6026 			goto out;
6027 	}
6028 
6029 	ret = cnt;
6030 out:
6031 	mutex_unlock(&trace_types_lock);
6032 
6033 	return ret;
6034 }
6035 
6036 #if defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)
6037 
6038 static ssize_t
6039 tracing_max_lat_read(struct file *filp, char __user *ubuf,
6040 		     size_t cnt, loff_t *ppos)
6041 {
6042 	return tracing_nsecs_read(filp->private_data, ubuf, cnt, ppos);
6043 }
6044 
6045 static ssize_t
6046 tracing_max_lat_write(struct file *filp, const char __user *ubuf,
6047 		      size_t cnt, loff_t *ppos)
6048 {
6049 	return tracing_nsecs_write(filp->private_data, ubuf, cnt, ppos);
6050 }
6051 
6052 #endif
6053 
6054 static int tracing_open_pipe(struct inode *inode, struct file *filp)
6055 {
6056 	struct trace_array *tr = inode->i_private;
6057 	struct trace_iterator *iter;
6058 	int ret;
6059 
6060 	ret = tracing_check_open_get_tr(tr);
6061 	if (ret)
6062 		return ret;
6063 
6064 	mutex_lock(&trace_types_lock);
6065 
6066 	/* create a buffer to store the information to pass to userspace */
6067 	iter = kzalloc(sizeof(*iter), GFP_KERNEL);
6068 	if (!iter) {
6069 		ret = -ENOMEM;
6070 		__trace_array_put(tr);
6071 		goto out;
6072 	}
6073 
6074 	trace_seq_init(&iter->seq);
6075 	iter->trace = tr->current_trace;
6076 
6077 	if (!alloc_cpumask_var(&iter->started, GFP_KERNEL)) {
6078 		ret = -ENOMEM;
6079 		goto fail;
6080 	}
6081 
6082 	/* trace pipe does not show start of buffer */
6083 	cpumask_setall(iter->started);
6084 
6085 	if (tr->trace_flags & TRACE_ITER_LATENCY_FMT)
6086 		iter->iter_flags |= TRACE_FILE_LAT_FMT;
6087 
6088 	/* Output in nanoseconds only if we are using a clock in nanoseconds. */
6089 	if (trace_clocks[tr->clock_id].in_ns)
6090 		iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
6091 
6092 	iter->tr = tr;
6093 	iter->array_buffer = &tr->array_buffer;
6094 	iter->cpu_file = tracing_get_cpu(inode);
6095 	mutex_init(&iter->mutex);
6096 	filp->private_data = iter;
6097 
6098 	if (iter->trace->pipe_open)
6099 		iter->trace->pipe_open(iter);
6100 
6101 	nonseekable_open(inode, filp);
6102 
6103 	tr->current_trace->ref++;
6104 out:
6105 	mutex_unlock(&trace_types_lock);
6106 	return ret;
6107 
6108 fail:
6109 	kfree(iter);
6110 	__trace_array_put(tr);
6111 	mutex_unlock(&trace_types_lock);
6112 	return ret;
6113 }
6114 
6115 static int tracing_release_pipe(struct inode *inode, struct file *file)
6116 {
6117 	struct trace_iterator *iter = file->private_data;
6118 	struct trace_array *tr = inode->i_private;
6119 
6120 	mutex_lock(&trace_types_lock);
6121 
6122 	tr->current_trace->ref--;
6123 
6124 	if (iter->trace->pipe_close)
6125 		iter->trace->pipe_close(iter);
6126 
6127 	mutex_unlock(&trace_types_lock);
6128 
6129 	free_cpumask_var(iter->started);
6130 	mutex_destroy(&iter->mutex);
6131 	kfree(iter);
6132 
6133 	trace_array_put(tr);
6134 
6135 	return 0;
6136 }
6137 
6138 static __poll_t
6139 trace_poll(struct trace_iterator *iter, struct file *filp, poll_table *poll_table)
6140 {
6141 	struct trace_array *tr = iter->tr;
6142 
6143 	/* Iterators are static, they should be filled or empty */
6144 	if (trace_buffer_iter(iter, iter->cpu_file))
6145 		return EPOLLIN | EPOLLRDNORM;
6146 
6147 	if (tr->trace_flags & TRACE_ITER_BLOCK)
6148 		/*
6149 		 * Always select as readable when in blocking mode
6150 		 */
6151 		return EPOLLIN | EPOLLRDNORM;
6152 	else
6153 		return ring_buffer_poll_wait(iter->array_buffer->buffer, iter->cpu_file,
6154 					     filp, poll_table);
6155 }
6156 
6157 static __poll_t
6158 tracing_poll_pipe(struct file *filp, poll_table *poll_table)
6159 {
6160 	struct trace_iterator *iter = filp->private_data;
6161 
6162 	return trace_poll(iter, filp, poll_table);
6163 }
6164 
6165 /* Must be called with iter->mutex held. */
6166 static int tracing_wait_pipe(struct file *filp)
6167 {
6168 	struct trace_iterator *iter = filp->private_data;
6169 	int ret;
6170 
6171 	while (trace_empty(iter)) {
6172 
6173 		if ((filp->f_flags & O_NONBLOCK)) {
6174 			return -EAGAIN;
6175 		}
6176 
6177 		/*
6178 		 * We block until we read something and tracing is disabled.
6179 		 * We still block if tracing is disabled, but we have never
6180 		 * read anything. This allows a user to cat this file, and
6181 		 * then enable tracing. But after we have read something,
6182 		 * we give an EOF when tracing is again disabled.
6183 		 *
6184 		 * iter->pos will be 0 if we haven't read anything.
6185 		 */
6186 		if (!tracer_tracing_is_on(iter->tr) && iter->pos)
6187 			break;
6188 
6189 		mutex_unlock(&iter->mutex);
6190 
6191 		ret = wait_on_pipe(iter, 0);
6192 
6193 		mutex_lock(&iter->mutex);
6194 
6195 		if (ret)
6196 			return ret;
6197 	}
6198 
6199 	return 1;
6200 }
6201 
6202 /*
6203  * Consumer reader.
6204  */
6205 static ssize_t
6206 tracing_read_pipe(struct file *filp, char __user *ubuf,
6207 		  size_t cnt, loff_t *ppos)
6208 {
6209 	struct trace_iterator *iter = filp->private_data;
6210 	ssize_t sret;
6211 
6212 	/*
6213 	 * Avoid more than one consumer on a single file descriptor
6214 	 * This is just a matter of traces coherency, the ring buffer itself
6215 	 * is protected.
6216 	 */
6217 	mutex_lock(&iter->mutex);
6218 
6219 	/* return any leftover data */
6220 	sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
6221 	if (sret != -EBUSY)
6222 		goto out;
6223 
6224 	trace_seq_init(&iter->seq);
6225 
6226 	if (iter->trace->read) {
6227 		sret = iter->trace->read(iter, filp, ubuf, cnt, ppos);
6228 		if (sret)
6229 			goto out;
6230 	}
6231 
6232 waitagain:
6233 	sret = tracing_wait_pipe(filp);
6234 	if (sret <= 0)
6235 		goto out;
6236 
6237 	/* stop when tracing is finished */
6238 	if (trace_empty(iter)) {
6239 		sret = 0;
6240 		goto out;
6241 	}
6242 
6243 	if (cnt >= PAGE_SIZE)
6244 		cnt = PAGE_SIZE - 1;
6245 
6246 	/* reset all but tr, trace, and overruns */
6247 	memset(&iter->seq, 0,
6248 	       sizeof(struct trace_iterator) -
6249 	       offsetof(struct trace_iterator, seq));
6250 	cpumask_clear(iter->started);
6251 	trace_seq_init(&iter->seq);
6252 	iter->pos = -1;
6253 
6254 	trace_event_read_lock();
6255 	trace_access_lock(iter->cpu_file);
6256 	while (trace_find_next_entry_inc(iter) != NULL) {
6257 		enum print_line_t ret;
6258 		int save_len = iter->seq.seq.len;
6259 
6260 		ret = print_trace_line(iter);
6261 		if (ret == TRACE_TYPE_PARTIAL_LINE) {
6262 			/* don't print partial lines */
6263 			iter->seq.seq.len = save_len;
6264 			break;
6265 		}
6266 		if (ret != TRACE_TYPE_NO_CONSUME)
6267 			trace_consume(iter);
6268 
6269 		if (trace_seq_used(&iter->seq) >= cnt)
6270 			break;
6271 
6272 		/*
6273 		 * Setting the full flag means we reached the trace_seq buffer
6274 		 * size and we should leave by partial output condition above.
6275 		 * One of the trace_seq_* functions is not used properly.
6276 		 */
6277 		WARN_ONCE(iter->seq.full, "full flag set for trace type %d",
6278 			  iter->ent->type);
6279 	}
6280 	trace_access_unlock(iter->cpu_file);
6281 	trace_event_read_unlock();
6282 
6283 	/* Now copy what we have to the user */
6284 	sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
6285 	if (iter->seq.seq.readpos >= trace_seq_used(&iter->seq))
6286 		trace_seq_init(&iter->seq);
6287 
6288 	/*
6289 	 * If there was nothing to send to user, in spite of consuming trace
6290 	 * entries, go back to wait for more entries.
6291 	 */
6292 	if (sret == -EBUSY)
6293 		goto waitagain;
6294 
6295 out:
6296 	mutex_unlock(&iter->mutex);
6297 
6298 	return sret;
6299 }
6300 
6301 static void tracing_spd_release_pipe(struct splice_pipe_desc *spd,
6302 				     unsigned int idx)
6303 {
6304 	__free_page(spd->pages[idx]);
6305 }
6306 
6307 static const struct pipe_buf_operations tracing_pipe_buf_ops = {
6308 	.confirm		= generic_pipe_buf_confirm,
6309 	.release		= generic_pipe_buf_release,
6310 	.steal			= generic_pipe_buf_steal,
6311 	.get			= generic_pipe_buf_get,
6312 };
6313 
6314 static size_t
6315 tracing_fill_pipe_page(size_t rem, struct trace_iterator *iter)
6316 {
6317 	size_t count;
6318 	int save_len;
6319 	int ret;
6320 
6321 	/* Seq buffer is page-sized, exactly what we need. */
6322 	for (;;) {
6323 		save_len = iter->seq.seq.len;
6324 		ret = print_trace_line(iter);
6325 
6326 		if (trace_seq_has_overflowed(&iter->seq)) {
6327 			iter->seq.seq.len = save_len;
6328 			break;
6329 		}
6330 
6331 		/*
6332 		 * This should not be hit, because it should only
6333 		 * be set if the iter->seq overflowed. But check it
6334 		 * anyway to be safe.
6335 		 */
6336 		if (ret == TRACE_TYPE_PARTIAL_LINE) {
6337 			iter->seq.seq.len = save_len;
6338 			break;
6339 		}
6340 
6341 		count = trace_seq_used(&iter->seq) - save_len;
6342 		if (rem < count) {
6343 			rem = 0;
6344 			iter->seq.seq.len = save_len;
6345 			break;
6346 		}
6347 
6348 		if (ret != TRACE_TYPE_NO_CONSUME)
6349 			trace_consume(iter);
6350 		rem -= count;
6351 		if (!trace_find_next_entry_inc(iter))	{
6352 			rem = 0;
6353 			iter->ent = NULL;
6354 			break;
6355 		}
6356 	}
6357 
6358 	return rem;
6359 }
6360 
6361 static ssize_t tracing_splice_read_pipe(struct file *filp,
6362 					loff_t *ppos,
6363 					struct pipe_inode_info *pipe,
6364 					size_t len,
6365 					unsigned int flags)
6366 {
6367 	struct page *pages_def[PIPE_DEF_BUFFERS];
6368 	struct partial_page partial_def[PIPE_DEF_BUFFERS];
6369 	struct trace_iterator *iter = filp->private_data;
6370 	struct splice_pipe_desc spd = {
6371 		.pages		= pages_def,
6372 		.partial	= partial_def,
6373 		.nr_pages	= 0, /* This gets updated below. */
6374 		.nr_pages_max	= PIPE_DEF_BUFFERS,
6375 		.ops		= &tracing_pipe_buf_ops,
6376 		.spd_release	= tracing_spd_release_pipe,
6377 	};
6378 	ssize_t ret;
6379 	size_t rem;
6380 	unsigned int i;
6381 
6382 	if (splice_grow_spd(pipe, &spd))
6383 		return -ENOMEM;
6384 
6385 	mutex_lock(&iter->mutex);
6386 
6387 	if (iter->trace->splice_read) {
6388 		ret = iter->trace->splice_read(iter, filp,
6389 					       ppos, pipe, len, flags);
6390 		if (ret)
6391 			goto out_err;
6392 	}
6393 
6394 	ret = tracing_wait_pipe(filp);
6395 	if (ret <= 0)
6396 		goto out_err;
6397 
6398 	if (!iter->ent && !trace_find_next_entry_inc(iter)) {
6399 		ret = -EFAULT;
6400 		goto out_err;
6401 	}
6402 
6403 	trace_event_read_lock();
6404 	trace_access_lock(iter->cpu_file);
6405 
6406 	/* Fill as many pages as possible. */
6407 	for (i = 0, rem = len; i < spd.nr_pages_max && rem; i++) {
6408 		spd.pages[i] = alloc_page(GFP_KERNEL);
6409 		if (!spd.pages[i])
6410 			break;
6411 
6412 		rem = tracing_fill_pipe_page(rem, iter);
6413 
6414 		/* Copy the data into the page, so we can start over. */
6415 		ret = trace_seq_to_buffer(&iter->seq,
6416 					  page_address(spd.pages[i]),
6417 					  trace_seq_used(&iter->seq));
6418 		if (ret < 0) {
6419 			__free_page(spd.pages[i]);
6420 			break;
6421 		}
6422 		spd.partial[i].offset = 0;
6423 		spd.partial[i].len = trace_seq_used(&iter->seq);
6424 
6425 		trace_seq_init(&iter->seq);
6426 	}
6427 
6428 	trace_access_unlock(iter->cpu_file);
6429 	trace_event_read_unlock();
6430 	mutex_unlock(&iter->mutex);
6431 
6432 	spd.nr_pages = i;
6433 
6434 	if (i)
6435 		ret = splice_to_pipe(pipe, &spd);
6436 	else
6437 		ret = 0;
6438 out:
6439 	splice_shrink_spd(&spd);
6440 	return ret;
6441 
6442 out_err:
6443 	mutex_unlock(&iter->mutex);
6444 	goto out;
6445 }
6446 
6447 static ssize_t
6448 tracing_entries_read(struct file *filp, char __user *ubuf,
6449 		     size_t cnt, loff_t *ppos)
6450 {
6451 	struct inode *inode = file_inode(filp);
6452 	struct trace_array *tr = inode->i_private;
6453 	int cpu = tracing_get_cpu(inode);
6454 	char buf[64];
6455 	int r = 0;
6456 	ssize_t ret;
6457 
6458 	mutex_lock(&trace_types_lock);
6459 
6460 	if (cpu == RING_BUFFER_ALL_CPUS) {
6461 		int cpu, buf_size_same;
6462 		unsigned long size;
6463 
6464 		size = 0;
6465 		buf_size_same = 1;
6466 		/* check if all cpu sizes are same */
6467 		for_each_tracing_cpu(cpu) {
6468 			/* fill in the size from first enabled cpu */
6469 			if (size == 0)
6470 				size = per_cpu_ptr(tr->array_buffer.data, cpu)->entries;
6471 			if (size != per_cpu_ptr(tr->array_buffer.data, cpu)->entries) {
6472 				buf_size_same = 0;
6473 				break;
6474 			}
6475 		}
6476 
6477 		if (buf_size_same) {
6478 			if (!ring_buffer_expanded)
6479 				r = sprintf(buf, "%lu (expanded: %lu)\n",
6480 					    size >> 10,
6481 					    trace_buf_size >> 10);
6482 			else
6483 				r = sprintf(buf, "%lu\n", size >> 10);
6484 		} else
6485 			r = sprintf(buf, "X\n");
6486 	} else
6487 		r = sprintf(buf, "%lu\n", per_cpu_ptr(tr->array_buffer.data, cpu)->entries >> 10);
6488 
6489 	mutex_unlock(&trace_types_lock);
6490 
6491 	ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6492 	return ret;
6493 }
6494 
6495 static ssize_t
6496 tracing_entries_write(struct file *filp, const char __user *ubuf,
6497 		      size_t cnt, loff_t *ppos)
6498 {
6499 	struct inode *inode = file_inode(filp);
6500 	struct trace_array *tr = inode->i_private;
6501 	unsigned long val;
6502 	int ret;
6503 
6504 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
6505 	if (ret)
6506 		return ret;
6507 
6508 	/* must have at least 1 entry */
6509 	if (!val)
6510 		return -EINVAL;
6511 
6512 	/* value is in KB */
6513 	val <<= 10;
6514 	ret = tracing_resize_ring_buffer(tr, val, tracing_get_cpu(inode));
6515 	if (ret < 0)
6516 		return ret;
6517 
6518 	*ppos += cnt;
6519 
6520 	return cnt;
6521 }
6522 
6523 static ssize_t
6524 tracing_total_entries_read(struct file *filp, char __user *ubuf,
6525 				size_t cnt, loff_t *ppos)
6526 {
6527 	struct trace_array *tr = filp->private_data;
6528 	char buf[64];
6529 	int r, cpu;
6530 	unsigned long size = 0, expanded_size = 0;
6531 
6532 	mutex_lock(&trace_types_lock);
6533 	for_each_tracing_cpu(cpu) {
6534 		size += per_cpu_ptr(tr->array_buffer.data, cpu)->entries >> 10;
6535 		if (!ring_buffer_expanded)
6536 			expanded_size += trace_buf_size >> 10;
6537 	}
6538 	if (ring_buffer_expanded)
6539 		r = sprintf(buf, "%lu\n", size);
6540 	else
6541 		r = sprintf(buf, "%lu (expanded: %lu)\n", size, expanded_size);
6542 	mutex_unlock(&trace_types_lock);
6543 
6544 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6545 }
6546 
6547 static ssize_t
6548 tracing_free_buffer_write(struct file *filp, const char __user *ubuf,
6549 			  size_t cnt, loff_t *ppos)
6550 {
6551 	/*
6552 	 * There is no need to read what the user has written, this function
6553 	 * is just to make sure that there is no error when "echo" is used
6554 	 */
6555 
6556 	*ppos += cnt;
6557 
6558 	return cnt;
6559 }
6560 
6561 static int
6562 tracing_free_buffer_release(struct inode *inode, struct file *filp)
6563 {
6564 	struct trace_array *tr = inode->i_private;
6565 
6566 	/* disable tracing ? */
6567 	if (tr->trace_flags & TRACE_ITER_STOP_ON_FREE)
6568 		tracer_tracing_off(tr);
6569 	/* resize the ring buffer to 0 */
6570 	tracing_resize_ring_buffer(tr, 0, RING_BUFFER_ALL_CPUS);
6571 
6572 	trace_array_put(tr);
6573 
6574 	return 0;
6575 }
6576 
6577 static ssize_t
6578 tracing_mark_write(struct file *filp, const char __user *ubuf,
6579 					size_t cnt, loff_t *fpos)
6580 {
6581 	struct trace_array *tr = filp->private_data;
6582 	struct ring_buffer_event *event;
6583 	enum event_trigger_type tt = ETT_NONE;
6584 	struct trace_buffer *buffer;
6585 	struct print_entry *entry;
6586 	unsigned long irq_flags;
6587 	ssize_t written;
6588 	int size;
6589 	int len;
6590 
6591 /* Used in tracing_mark_raw_write() as well */
6592 #define FAULTED_STR "<faulted>"
6593 #define FAULTED_SIZE (sizeof(FAULTED_STR) - 1) /* '\0' is already accounted for */
6594 
6595 	if (tracing_disabled)
6596 		return -EINVAL;
6597 
6598 	if (!(tr->trace_flags & TRACE_ITER_MARKERS))
6599 		return -EINVAL;
6600 
6601 	if (cnt > TRACE_BUF_SIZE)
6602 		cnt = TRACE_BUF_SIZE;
6603 
6604 	BUILD_BUG_ON(TRACE_BUF_SIZE >= PAGE_SIZE);
6605 
6606 	local_save_flags(irq_flags);
6607 	size = sizeof(*entry) + cnt + 2; /* add '\0' and possible '\n' */
6608 
6609 	/* If less than "<faulted>", then make sure we can still add that */
6610 	if (cnt < FAULTED_SIZE)
6611 		size += FAULTED_SIZE - cnt;
6612 
6613 	buffer = tr->array_buffer.buffer;
6614 	event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
6615 					    irq_flags, preempt_count());
6616 	if (unlikely(!event))
6617 		/* Ring buffer disabled, return as if not open for write */
6618 		return -EBADF;
6619 
6620 	entry = ring_buffer_event_data(event);
6621 	entry->ip = _THIS_IP_;
6622 
6623 	len = __copy_from_user_inatomic(&entry->buf, ubuf, cnt);
6624 	if (len) {
6625 		memcpy(&entry->buf, FAULTED_STR, FAULTED_SIZE);
6626 		cnt = FAULTED_SIZE;
6627 		written = -EFAULT;
6628 	} else
6629 		written = cnt;
6630 	len = cnt;
6631 
6632 	if (tr->trace_marker_file && !list_empty(&tr->trace_marker_file->triggers)) {
6633 		/* do not add \n before testing triggers, but add \0 */
6634 		entry->buf[cnt] = '\0';
6635 		tt = event_triggers_call(tr->trace_marker_file, entry, event);
6636 	}
6637 
6638 	if (entry->buf[cnt - 1] != '\n') {
6639 		entry->buf[cnt] = '\n';
6640 		entry->buf[cnt + 1] = '\0';
6641 	} else
6642 		entry->buf[cnt] = '\0';
6643 
6644 	__buffer_unlock_commit(buffer, event);
6645 
6646 	if (tt)
6647 		event_triggers_post_call(tr->trace_marker_file, tt);
6648 
6649 	if (written > 0)
6650 		*fpos += written;
6651 
6652 	return written;
6653 }
6654 
6655 /* Limit it for now to 3K (including tag) */
6656 #define RAW_DATA_MAX_SIZE (1024*3)
6657 
6658 static ssize_t
6659 tracing_mark_raw_write(struct file *filp, const char __user *ubuf,
6660 					size_t cnt, loff_t *fpos)
6661 {
6662 	struct trace_array *tr = filp->private_data;
6663 	struct ring_buffer_event *event;
6664 	struct trace_buffer *buffer;
6665 	struct raw_data_entry *entry;
6666 	unsigned long irq_flags;
6667 	ssize_t written;
6668 	int size;
6669 	int len;
6670 
6671 #define FAULT_SIZE_ID (FAULTED_SIZE + sizeof(int))
6672 
6673 	if (tracing_disabled)
6674 		return -EINVAL;
6675 
6676 	if (!(tr->trace_flags & TRACE_ITER_MARKERS))
6677 		return -EINVAL;
6678 
6679 	/* The marker must at least have a tag id */
6680 	if (cnt < sizeof(unsigned int) || cnt > RAW_DATA_MAX_SIZE)
6681 		return -EINVAL;
6682 
6683 	if (cnt > TRACE_BUF_SIZE)
6684 		cnt = TRACE_BUF_SIZE;
6685 
6686 	BUILD_BUG_ON(TRACE_BUF_SIZE >= PAGE_SIZE);
6687 
6688 	local_save_flags(irq_flags);
6689 	size = sizeof(*entry) + cnt;
6690 	if (cnt < FAULT_SIZE_ID)
6691 		size += FAULT_SIZE_ID - cnt;
6692 
6693 	buffer = tr->array_buffer.buffer;
6694 	event = __trace_buffer_lock_reserve(buffer, TRACE_RAW_DATA, size,
6695 					    irq_flags, preempt_count());
6696 	if (!event)
6697 		/* Ring buffer disabled, return as if not open for write */
6698 		return -EBADF;
6699 
6700 	entry = ring_buffer_event_data(event);
6701 
6702 	len = __copy_from_user_inatomic(&entry->id, ubuf, cnt);
6703 	if (len) {
6704 		entry->id = -1;
6705 		memcpy(&entry->buf, FAULTED_STR, FAULTED_SIZE);
6706 		written = -EFAULT;
6707 	} else
6708 		written = cnt;
6709 
6710 	__buffer_unlock_commit(buffer, event);
6711 
6712 	if (written > 0)
6713 		*fpos += written;
6714 
6715 	return written;
6716 }
6717 
6718 static int tracing_clock_show(struct seq_file *m, void *v)
6719 {
6720 	struct trace_array *tr = m->private;
6721 	int i;
6722 
6723 	for (i = 0; i < ARRAY_SIZE(trace_clocks); i++)
6724 		seq_printf(m,
6725 			"%s%s%s%s", i ? " " : "",
6726 			i == tr->clock_id ? "[" : "", trace_clocks[i].name,
6727 			i == tr->clock_id ? "]" : "");
6728 	seq_putc(m, '\n');
6729 
6730 	return 0;
6731 }
6732 
6733 int tracing_set_clock(struct trace_array *tr, const char *clockstr)
6734 {
6735 	int i;
6736 
6737 	for (i = 0; i < ARRAY_SIZE(trace_clocks); i++) {
6738 		if (strcmp(trace_clocks[i].name, clockstr) == 0)
6739 			break;
6740 	}
6741 	if (i == ARRAY_SIZE(trace_clocks))
6742 		return -EINVAL;
6743 
6744 	mutex_lock(&trace_types_lock);
6745 
6746 	tr->clock_id = i;
6747 
6748 	ring_buffer_set_clock(tr->array_buffer.buffer, trace_clocks[i].func);
6749 
6750 	/*
6751 	 * New clock may not be consistent with the previous clock.
6752 	 * Reset the buffer so that it doesn't have incomparable timestamps.
6753 	 */
6754 	tracing_reset_online_cpus(&tr->array_buffer);
6755 
6756 #ifdef CONFIG_TRACER_MAX_TRACE
6757 	if (tr->max_buffer.buffer)
6758 		ring_buffer_set_clock(tr->max_buffer.buffer, trace_clocks[i].func);
6759 	tracing_reset_online_cpus(&tr->max_buffer);
6760 #endif
6761 
6762 	mutex_unlock(&trace_types_lock);
6763 
6764 	return 0;
6765 }
6766 
6767 static ssize_t tracing_clock_write(struct file *filp, const char __user *ubuf,
6768 				   size_t cnt, loff_t *fpos)
6769 {
6770 	struct seq_file *m = filp->private_data;
6771 	struct trace_array *tr = m->private;
6772 	char buf[64];
6773 	const char *clockstr;
6774 	int ret;
6775 
6776 	if (cnt >= sizeof(buf))
6777 		return -EINVAL;
6778 
6779 	if (copy_from_user(buf, ubuf, cnt))
6780 		return -EFAULT;
6781 
6782 	buf[cnt] = 0;
6783 
6784 	clockstr = strstrip(buf);
6785 
6786 	ret = tracing_set_clock(tr, clockstr);
6787 	if (ret)
6788 		return ret;
6789 
6790 	*fpos += cnt;
6791 
6792 	return cnt;
6793 }
6794 
6795 static int tracing_clock_open(struct inode *inode, struct file *file)
6796 {
6797 	struct trace_array *tr = inode->i_private;
6798 	int ret;
6799 
6800 	ret = tracing_check_open_get_tr(tr);
6801 	if (ret)
6802 		return ret;
6803 
6804 	ret = single_open(file, tracing_clock_show, inode->i_private);
6805 	if (ret < 0)
6806 		trace_array_put(tr);
6807 
6808 	return ret;
6809 }
6810 
6811 static int tracing_time_stamp_mode_show(struct seq_file *m, void *v)
6812 {
6813 	struct trace_array *tr = m->private;
6814 
6815 	mutex_lock(&trace_types_lock);
6816 
6817 	if (ring_buffer_time_stamp_abs(tr->array_buffer.buffer))
6818 		seq_puts(m, "delta [absolute]\n");
6819 	else
6820 		seq_puts(m, "[delta] absolute\n");
6821 
6822 	mutex_unlock(&trace_types_lock);
6823 
6824 	return 0;
6825 }
6826 
6827 static int tracing_time_stamp_mode_open(struct inode *inode, struct file *file)
6828 {
6829 	struct trace_array *tr = inode->i_private;
6830 	int ret;
6831 
6832 	ret = tracing_check_open_get_tr(tr);
6833 	if (ret)
6834 		return ret;
6835 
6836 	ret = single_open(file, tracing_time_stamp_mode_show, inode->i_private);
6837 	if (ret < 0)
6838 		trace_array_put(tr);
6839 
6840 	return ret;
6841 }
6842 
6843 int tracing_set_time_stamp_abs(struct trace_array *tr, bool abs)
6844 {
6845 	int ret = 0;
6846 
6847 	mutex_lock(&trace_types_lock);
6848 
6849 	if (abs && tr->time_stamp_abs_ref++)
6850 		goto out;
6851 
6852 	if (!abs) {
6853 		if (WARN_ON_ONCE(!tr->time_stamp_abs_ref)) {
6854 			ret = -EINVAL;
6855 			goto out;
6856 		}
6857 
6858 		if (--tr->time_stamp_abs_ref)
6859 			goto out;
6860 	}
6861 
6862 	ring_buffer_set_time_stamp_abs(tr->array_buffer.buffer, abs);
6863 
6864 #ifdef CONFIG_TRACER_MAX_TRACE
6865 	if (tr->max_buffer.buffer)
6866 		ring_buffer_set_time_stamp_abs(tr->max_buffer.buffer, abs);
6867 #endif
6868  out:
6869 	mutex_unlock(&trace_types_lock);
6870 
6871 	return ret;
6872 }
6873 
6874 struct ftrace_buffer_info {
6875 	struct trace_iterator	iter;
6876 	void			*spare;
6877 	unsigned int		spare_cpu;
6878 	unsigned int		read;
6879 };
6880 
6881 #ifdef CONFIG_TRACER_SNAPSHOT
6882 static int tracing_snapshot_open(struct inode *inode, struct file *file)
6883 {
6884 	struct trace_array *tr = inode->i_private;
6885 	struct trace_iterator *iter;
6886 	struct seq_file *m;
6887 	int ret;
6888 
6889 	ret = tracing_check_open_get_tr(tr);
6890 	if (ret)
6891 		return ret;
6892 
6893 	if (file->f_mode & FMODE_READ) {
6894 		iter = __tracing_open(inode, file, true);
6895 		if (IS_ERR(iter))
6896 			ret = PTR_ERR(iter);
6897 	} else {
6898 		/* Writes still need the seq_file to hold the private data */
6899 		ret = -ENOMEM;
6900 		m = kzalloc(sizeof(*m), GFP_KERNEL);
6901 		if (!m)
6902 			goto out;
6903 		iter = kzalloc(sizeof(*iter), GFP_KERNEL);
6904 		if (!iter) {
6905 			kfree(m);
6906 			goto out;
6907 		}
6908 		ret = 0;
6909 
6910 		iter->tr = tr;
6911 		iter->array_buffer = &tr->max_buffer;
6912 		iter->cpu_file = tracing_get_cpu(inode);
6913 		m->private = iter;
6914 		file->private_data = m;
6915 	}
6916 out:
6917 	if (ret < 0)
6918 		trace_array_put(tr);
6919 
6920 	return ret;
6921 }
6922 
6923 static ssize_t
6924 tracing_snapshot_write(struct file *filp, const char __user *ubuf, size_t cnt,
6925 		       loff_t *ppos)
6926 {
6927 	struct seq_file *m = filp->private_data;
6928 	struct trace_iterator *iter = m->private;
6929 	struct trace_array *tr = iter->tr;
6930 	unsigned long val;
6931 	int ret;
6932 
6933 	ret = tracing_update_buffers();
6934 	if (ret < 0)
6935 		return ret;
6936 
6937 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
6938 	if (ret)
6939 		return ret;
6940 
6941 	mutex_lock(&trace_types_lock);
6942 
6943 	if (tr->current_trace->use_max_tr) {
6944 		ret = -EBUSY;
6945 		goto out;
6946 	}
6947 
6948 	arch_spin_lock(&tr->max_lock);
6949 	if (tr->cond_snapshot)
6950 		ret = -EBUSY;
6951 	arch_spin_unlock(&tr->max_lock);
6952 	if (ret)
6953 		goto out;
6954 
6955 	switch (val) {
6956 	case 0:
6957 		if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
6958 			ret = -EINVAL;
6959 			break;
6960 		}
6961 		if (tr->allocated_snapshot)
6962 			free_snapshot(tr);
6963 		break;
6964 	case 1:
6965 /* Only allow per-cpu swap if the ring buffer supports it */
6966 #ifndef CONFIG_RING_BUFFER_ALLOW_SWAP
6967 		if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
6968 			ret = -EINVAL;
6969 			break;
6970 		}
6971 #endif
6972 		if (tr->allocated_snapshot)
6973 			ret = resize_buffer_duplicate_size(&tr->max_buffer,
6974 					&tr->array_buffer, iter->cpu_file);
6975 		else
6976 			ret = tracing_alloc_snapshot_instance(tr);
6977 		if (ret < 0)
6978 			break;
6979 		local_irq_disable();
6980 		/* Now, we're going to swap */
6981 		if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
6982 			update_max_tr(tr, current, smp_processor_id(), NULL);
6983 		else
6984 			update_max_tr_single(tr, current, iter->cpu_file);
6985 		local_irq_enable();
6986 		break;
6987 	default:
6988 		if (tr->allocated_snapshot) {
6989 			if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
6990 				tracing_reset_online_cpus(&tr->max_buffer);
6991 			else
6992 				tracing_reset_cpu(&tr->max_buffer, iter->cpu_file);
6993 		}
6994 		break;
6995 	}
6996 
6997 	if (ret >= 0) {
6998 		*ppos += cnt;
6999 		ret = cnt;
7000 	}
7001 out:
7002 	mutex_unlock(&trace_types_lock);
7003 	return ret;
7004 }
7005 
7006 static int tracing_snapshot_release(struct inode *inode, struct file *file)
7007 {
7008 	struct seq_file *m = file->private_data;
7009 	int ret;
7010 
7011 	ret = tracing_release(inode, file);
7012 
7013 	if (file->f_mode & FMODE_READ)
7014 		return ret;
7015 
7016 	/* If write only, the seq_file is just a stub */
7017 	if (m)
7018 		kfree(m->private);
7019 	kfree(m);
7020 
7021 	return 0;
7022 }
7023 
7024 static int tracing_buffers_open(struct inode *inode, struct file *filp);
7025 static ssize_t tracing_buffers_read(struct file *filp, char __user *ubuf,
7026 				    size_t count, loff_t *ppos);
7027 static int tracing_buffers_release(struct inode *inode, struct file *file);
7028 static ssize_t tracing_buffers_splice_read(struct file *file, loff_t *ppos,
7029 		   struct pipe_inode_info *pipe, size_t len, unsigned int flags);
7030 
7031 static int snapshot_raw_open(struct inode *inode, struct file *filp)
7032 {
7033 	struct ftrace_buffer_info *info;
7034 	int ret;
7035 
7036 	/* The following checks for tracefs lockdown */
7037 	ret = tracing_buffers_open(inode, filp);
7038 	if (ret < 0)
7039 		return ret;
7040 
7041 	info = filp->private_data;
7042 
7043 	if (info->iter.trace->use_max_tr) {
7044 		tracing_buffers_release(inode, filp);
7045 		return -EBUSY;
7046 	}
7047 
7048 	info->iter.snapshot = true;
7049 	info->iter.array_buffer = &info->iter.tr->max_buffer;
7050 
7051 	return ret;
7052 }
7053 
7054 #endif /* CONFIG_TRACER_SNAPSHOT */
7055 
7056 
7057 static const struct file_operations tracing_thresh_fops = {
7058 	.open		= tracing_open_generic,
7059 	.read		= tracing_thresh_read,
7060 	.write		= tracing_thresh_write,
7061 	.llseek		= generic_file_llseek,
7062 };
7063 
7064 #if defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)
7065 static const struct file_operations tracing_max_lat_fops = {
7066 	.open		= tracing_open_generic,
7067 	.read		= tracing_max_lat_read,
7068 	.write		= tracing_max_lat_write,
7069 	.llseek		= generic_file_llseek,
7070 };
7071 #endif
7072 
7073 static const struct file_operations set_tracer_fops = {
7074 	.open		= tracing_open_generic,
7075 	.read		= tracing_set_trace_read,
7076 	.write		= tracing_set_trace_write,
7077 	.llseek		= generic_file_llseek,
7078 };
7079 
7080 static const struct file_operations tracing_pipe_fops = {
7081 	.open		= tracing_open_pipe,
7082 	.poll		= tracing_poll_pipe,
7083 	.read		= tracing_read_pipe,
7084 	.splice_read	= tracing_splice_read_pipe,
7085 	.release	= tracing_release_pipe,
7086 	.llseek		= no_llseek,
7087 };
7088 
7089 static const struct file_operations tracing_entries_fops = {
7090 	.open		= tracing_open_generic_tr,
7091 	.read		= tracing_entries_read,
7092 	.write		= tracing_entries_write,
7093 	.llseek		= generic_file_llseek,
7094 	.release	= tracing_release_generic_tr,
7095 };
7096 
7097 static const struct file_operations tracing_total_entries_fops = {
7098 	.open		= tracing_open_generic_tr,
7099 	.read		= tracing_total_entries_read,
7100 	.llseek		= generic_file_llseek,
7101 	.release	= tracing_release_generic_tr,
7102 };
7103 
7104 static const struct file_operations tracing_free_buffer_fops = {
7105 	.open		= tracing_open_generic_tr,
7106 	.write		= tracing_free_buffer_write,
7107 	.release	= tracing_free_buffer_release,
7108 };
7109 
7110 static const struct file_operations tracing_mark_fops = {
7111 	.open		= tracing_open_generic_tr,
7112 	.write		= tracing_mark_write,
7113 	.llseek		= generic_file_llseek,
7114 	.release	= tracing_release_generic_tr,
7115 };
7116 
7117 static const struct file_operations tracing_mark_raw_fops = {
7118 	.open		= tracing_open_generic_tr,
7119 	.write		= tracing_mark_raw_write,
7120 	.llseek		= generic_file_llseek,
7121 	.release	= tracing_release_generic_tr,
7122 };
7123 
7124 static const struct file_operations trace_clock_fops = {
7125 	.open		= tracing_clock_open,
7126 	.read		= seq_read,
7127 	.llseek		= seq_lseek,
7128 	.release	= tracing_single_release_tr,
7129 	.write		= tracing_clock_write,
7130 };
7131 
7132 static const struct file_operations trace_time_stamp_mode_fops = {
7133 	.open		= tracing_time_stamp_mode_open,
7134 	.read		= seq_read,
7135 	.llseek		= seq_lseek,
7136 	.release	= tracing_single_release_tr,
7137 };
7138 
7139 #ifdef CONFIG_TRACER_SNAPSHOT
7140 static const struct file_operations snapshot_fops = {
7141 	.open		= tracing_snapshot_open,
7142 	.read		= seq_read,
7143 	.write		= tracing_snapshot_write,
7144 	.llseek		= tracing_lseek,
7145 	.release	= tracing_snapshot_release,
7146 };
7147 
7148 static const struct file_operations snapshot_raw_fops = {
7149 	.open		= snapshot_raw_open,
7150 	.read		= tracing_buffers_read,
7151 	.release	= tracing_buffers_release,
7152 	.splice_read	= tracing_buffers_splice_read,
7153 	.llseek		= no_llseek,
7154 };
7155 
7156 #endif /* CONFIG_TRACER_SNAPSHOT */
7157 
7158 #define TRACING_LOG_ERRS_MAX	8
7159 #define TRACING_LOG_LOC_MAX	128
7160 
7161 #define CMD_PREFIX "  Command: "
7162 
7163 struct err_info {
7164 	const char	**errs;	/* ptr to loc-specific array of err strings */
7165 	u8		type;	/* index into errs -> specific err string */
7166 	u8		pos;	/* MAX_FILTER_STR_VAL = 256 */
7167 	u64		ts;
7168 };
7169 
7170 struct tracing_log_err {
7171 	struct list_head	list;
7172 	struct err_info		info;
7173 	char			loc[TRACING_LOG_LOC_MAX]; /* err location */
7174 	char			cmd[MAX_FILTER_STR_VAL]; /* what caused err */
7175 };
7176 
7177 static DEFINE_MUTEX(tracing_err_log_lock);
7178 
7179 static struct tracing_log_err *get_tracing_log_err(struct trace_array *tr)
7180 {
7181 	struct tracing_log_err *err;
7182 
7183 	if (tr->n_err_log_entries < TRACING_LOG_ERRS_MAX) {
7184 		err = kzalloc(sizeof(*err), GFP_KERNEL);
7185 		if (!err)
7186 			err = ERR_PTR(-ENOMEM);
7187 		tr->n_err_log_entries++;
7188 
7189 		return err;
7190 	}
7191 
7192 	err = list_first_entry(&tr->err_log, struct tracing_log_err, list);
7193 	list_del(&err->list);
7194 
7195 	return err;
7196 }
7197 
7198 /**
7199  * err_pos - find the position of a string within a command for error careting
7200  * @cmd: The tracing command that caused the error
7201  * @str: The string to position the caret at within @cmd
7202  *
7203  * Finds the position of the first occurence of @str within @cmd.  The
7204  * return value can be passed to tracing_log_err() for caret placement
7205  * within @cmd.
7206  *
7207  * Returns the index within @cmd of the first occurence of @str or 0
7208  * if @str was not found.
7209  */
7210 unsigned int err_pos(char *cmd, const char *str)
7211 {
7212 	char *found;
7213 
7214 	if (WARN_ON(!strlen(cmd)))
7215 		return 0;
7216 
7217 	found = strstr(cmd, str);
7218 	if (found)
7219 		return found - cmd;
7220 
7221 	return 0;
7222 }
7223 
7224 /**
7225  * tracing_log_err - write an error to the tracing error log
7226  * @tr: The associated trace array for the error (NULL for top level array)
7227  * @loc: A string describing where the error occurred
7228  * @cmd: The tracing command that caused the error
7229  * @errs: The array of loc-specific static error strings
7230  * @type: The index into errs[], which produces the specific static err string
7231  * @pos: The position the caret should be placed in the cmd
7232  *
7233  * Writes an error into tracing/error_log of the form:
7234  *
7235  * <loc>: error: <text>
7236  *   Command: <cmd>
7237  *              ^
7238  *
7239  * tracing/error_log is a small log file containing the last
7240  * TRACING_LOG_ERRS_MAX errors (8).  Memory for errors isn't allocated
7241  * unless there has been a tracing error, and the error log can be
7242  * cleared and have its memory freed by writing the empty string in
7243  * truncation mode to it i.e. echo > tracing/error_log.
7244  *
7245  * NOTE: the @errs array along with the @type param are used to
7246  * produce a static error string - this string is not copied and saved
7247  * when the error is logged - only a pointer to it is saved.  See
7248  * existing callers for examples of how static strings are typically
7249  * defined for use with tracing_log_err().
7250  */
7251 void tracing_log_err(struct trace_array *tr,
7252 		     const char *loc, const char *cmd,
7253 		     const char **errs, u8 type, u8 pos)
7254 {
7255 	struct tracing_log_err *err;
7256 
7257 	if (!tr)
7258 		tr = &global_trace;
7259 
7260 	mutex_lock(&tracing_err_log_lock);
7261 	err = get_tracing_log_err(tr);
7262 	if (PTR_ERR(err) == -ENOMEM) {
7263 		mutex_unlock(&tracing_err_log_lock);
7264 		return;
7265 	}
7266 
7267 	snprintf(err->loc, TRACING_LOG_LOC_MAX, "%s: error: ", loc);
7268 	snprintf(err->cmd, MAX_FILTER_STR_VAL,"\n" CMD_PREFIX "%s\n", cmd);
7269 
7270 	err->info.errs = errs;
7271 	err->info.type = type;
7272 	err->info.pos = pos;
7273 	err->info.ts = local_clock();
7274 
7275 	list_add_tail(&err->list, &tr->err_log);
7276 	mutex_unlock(&tracing_err_log_lock);
7277 }
7278 
7279 static void clear_tracing_err_log(struct trace_array *tr)
7280 {
7281 	struct tracing_log_err *err, *next;
7282 
7283 	mutex_lock(&tracing_err_log_lock);
7284 	list_for_each_entry_safe(err, next, &tr->err_log, list) {
7285 		list_del(&err->list);
7286 		kfree(err);
7287 	}
7288 
7289 	tr->n_err_log_entries = 0;
7290 	mutex_unlock(&tracing_err_log_lock);
7291 }
7292 
7293 static void *tracing_err_log_seq_start(struct seq_file *m, loff_t *pos)
7294 {
7295 	struct trace_array *tr = m->private;
7296 
7297 	mutex_lock(&tracing_err_log_lock);
7298 
7299 	return seq_list_start(&tr->err_log, *pos);
7300 }
7301 
7302 static void *tracing_err_log_seq_next(struct seq_file *m, void *v, loff_t *pos)
7303 {
7304 	struct trace_array *tr = m->private;
7305 
7306 	return seq_list_next(v, &tr->err_log, pos);
7307 }
7308 
7309 static void tracing_err_log_seq_stop(struct seq_file *m, void *v)
7310 {
7311 	mutex_unlock(&tracing_err_log_lock);
7312 }
7313 
7314 static void tracing_err_log_show_pos(struct seq_file *m, u8 pos)
7315 {
7316 	u8 i;
7317 
7318 	for (i = 0; i < sizeof(CMD_PREFIX) - 1; i++)
7319 		seq_putc(m, ' ');
7320 	for (i = 0; i < pos; i++)
7321 		seq_putc(m, ' ');
7322 	seq_puts(m, "^\n");
7323 }
7324 
7325 static int tracing_err_log_seq_show(struct seq_file *m, void *v)
7326 {
7327 	struct tracing_log_err *err = v;
7328 
7329 	if (err) {
7330 		const char *err_text = err->info.errs[err->info.type];
7331 		u64 sec = err->info.ts;
7332 		u32 nsec;
7333 
7334 		nsec = do_div(sec, NSEC_PER_SEC);
7335 		seq_printf(m, "[%5llu.%06u] %s%s", sec, nsec / 1000,
7336 			   err->loc, err_text);
7337 		seq_printf(m, "%s", err->cmd);
7338 		tracing_err_log_show_pos(m, err->info.pos);
7339 	}
7340 
7341 	return 0;
7342 }
7343 
7344 static const struct seq_operations tracing_err_log_seq_ops = {
7345 	.start  = tracing_err_log_seq_start,
7346 	.next   = tracing_err_log_seq_next,
7347 	.stop   = tracing_err_log_seq_stop,
7348 	.show   = tracing_err_log_seq_show
7349 };
7350 
7351 static int tracing_err_log_open(struct inode *inode, struct file *file)
7352 {
7353 	struct trace_array *tr = inode->i_private;
7354 	int ret = 0;
7355 
7356 	ret = tracing_check_open_get_tr(tr);
7357 	if (ret)
7358 		return ret;
7359 
7360 	/* If this file was opened for write, then erase contents */
7361 	if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC))
7362 		clear_tracing_err_log(tr);
7363 
7364 	if (file->f_mode & FMODE_READ) {
7365 		ret = seq_open(file, &tracing_err_log_seq_ops);
7366 		if (!ret) {
7367 			struct seq_file *m = file->private_data;
7368 			m->private = tr;
7369 		} else {
7370 			trace_array_put(tr);
7371 		}
7372 	}
7373 	return ret;
7374 }
7375 
7376 static ssize_t tracing_err_log_write(struct file *file,
7377 				     const char __user *buffer,
7378 				     size_t count, loff_t *ppos)
7379 {
7380 	return count;
7381 }
7382 
7383 static int tracing_err_log_release(struct inode *inode, struct file *file)
7384 {
7385 	struct trace_array *tr = inode->i_private;
7386 
7387 	trace_array_put(tr);
7388 
7389 	if (file->f_mode & FMODE_READ)
7390 		seq_release(inode, file);
7391 
7392 	return 0;
7393 }
7394 
7395 static const struct file_operations tracing_err_log_fops = {
7396 	.open           = tracing_err_log_open,
7397 	.write		= tracing_err_log_write,
7398 	.read           = seq_read,
7399 	.llseek         = seq_lseek,
7400 	.release        = tracing_err_log_release,
7401 };
7402 
7403 static int tracing_buffers_open(struct inode *inode, struct file *filp)
7404 {
7405 	struct trace_array *tr = inode->i_private;
7406 	struct ftrace_buffer_info *info;
7407 	int ret;
7408 
7409 	ret = tracing_check_open_get_tr(tr);
7410 	if (ret)
7411 		return ret;
7412 
7413 	info = kzalloc(sizeof(*info), GFP_KERNEL);
7414 	if (!info) {
7415 		trace_array_put(tr);
7416 		return -ENOMEM;
7417 	}
7418 
7419 	mutex_lock(&trace_types_lock);
7420 
7421 	info->iter.tr		= tr;
7422 	info->iter.cpu_file	= tracing_get_cpu(inode);
7423 	info->iter.trace	= tr->current_trace;
7424 	info->iter.array_buffer = &tr->array_buffer;
7425 	info->spare		= NULL;
7426 	/* Force reading ring buffer for first read */
7427 	info->read		= (unsigned int)-1;
7428 
7429 	filp->private_data = info;
7430 
7431 	tr->current_trace->ref++;
7432 
7433 	mutex_unlock(&trace_types_lock);
7434 
7435 	ret = nonseekable_open(inode, filp);
7436 	if (ret < 0)
7437 		trace_array_put(tr);
7438 
7439 	return ret;
7440 }
7441 
7442 static __poll_t
7443 tracing_buffers_poll(struct file *filp, poll_table *poll_table)
7444 {
7445 	struct ftrace_buffer_info *info = filp->private_data;
7446 	struct trace_iterator *iter = &info->iter;
7447 
7448 	return trace_poll(iter, filp, poll_table);
7449 }
7450 
7451 static ssize_t
7452 tracing_buffers_read(struct file *filp, char __user *ubuf,
7453 		     size_t count, loff_t *ppos)
7454 {
7455 	struct ftrace_buffer_info *info = filp->private_data;
7456 	struct trace_iterator *iter = &info->iter;
7457 	ssize_t ret = 0;
7458 	ssize_t size;
7459 
7460 	if (!count)
7461 		return 0;
7462 
7463 #ifdef CONFIG_TRACER_MAX_TRACE
7464 	if (iter->snapshot && iter->tr->current_trace->use_max_tr)
7465 		return -EBUSY;
7466 #endif
7467 
7468 	if (!info->spare) {
7469 		info->spare = ring_buffer_alloc_read_page(iter->array_buffer->buffer,
7470 							  iter->cpu_file);
7471 		if (IS_ERR(info->spare)) {
7472 			ret = PTR_ERR(info->spare);
7473 			info->spare = NULL;
7474 		} else {
7475 			info->spare_cpu = iter->cpu_file;
7476 		}
7477 	}
7478 	if (!info->spare)
7479 		return ret;
7480 
7481 	/* Do we have previous read data to read? */
7482 	if (info->read < PAGE_SIZE)
7483 		goto read;
7484 
7485  again:
7486 	trace_access_lock(iter->cpu_file);
7487 	ret = ring_buffer_read_page(iter->array_buffer->buffer,
7488 				    &info->spare,
7489 				    count,
7490 				    iter->cpu_file, 0);
7491 	trace_access_unlock(iter->cpu_file);
7492 
7493 	if (ret < 0) {
7494 		if (trace_empty(iter)) {
7495 			if ((filp->f_flags & O_NONBLOCK))
7496 				return -EAGAIN;
7497 
7498 			ret = wait_on_pipe(iter, 0);
7499 			if (ret)
7500 				return ret;
7501 
7502 			goto again;
7503 		}
7504 		return 0;
7505 	}
7506 
7507 	info->read = 0;
7508  read:
7509 	size = PAGE_SIZE - info->read;
7510 	if (size > count)
7511 		size = count;
7512 
7513 	ret = copy_to_user(ubuf, info->spare + info->read, size);
7514 	if (ret == size)
7515 		return -EFAULT;
7516 
7517 	size -= ret;
7518 
7519 	*ppos += size;
7520 	info->read += size;
7521 
7522 	return size;
7523 }
7524 
7525 static int tracing_buffers_release(struct inode *inode, struct file *file)
7526 {
7527 	struct ftrace_buffer_info *info = file->private_data;
7528 	struct trace_iterator *iter = &info->iter;
7529 
7530 	mutex_lock(&trace_types_lock);
7531 
7532 	iter->tr->current_trace->ref--;
7533 
7534 	__trace_array_put(iter->tr);
7535 
7536 	if (info->spare)
7537 		ring_buffer_free_read_page(iter->array_buffer->buffer,
7538 					   info->spare_cpu, info->spare);
7539 	kfree(info);
7540 
7541 	mutex_unlock(&trace_types_lock);
7542 
7543 	return 0;
7544 }
7545 
7546 struct buffer_ref {
7547 	struct trace_buffer	*buffer;
7548 	void			*page;
7549 	int			cpu;
7550 	refcount_t		refcount;
7551 };
7552 
7553 static void buffer_ref_release(struct buffer_ref *ref)
7554 {
7555 	if (!refcount_dec_and_test(&ref->refcount))
7556 		return;
7557 	ring_buffer_free_read_page(ref->buffer, ref->cpu, ref->page);
7558 	kfree(ref);
7559 }
7560 
7561 static void buffer_pipe_buf_release(struct pipe_inode_info *pipe,
7562 				    struct pipe_buffer *buf)
7563 {
7564 	struct buffer_ref *ref = (struct buffer_ref *)buf->private;
7565 
7566 	buffer_ref_release(ref);
7567 	buf->private = 0;
7568 }
7569 
7570 static bool buffer_pipe_buf_get(struct pipe_inode_info *pipe,
7571 				struct pipe_buffer *buf)
7572 {
7573 	struct buffer_ref *ref = (struct buffer_ref *)buf->private;
7574 
7575 	if (refcount_read(&ref->refcount) > INT_MAX/2)
7576 		return false;
7577 
7578 	refcount_inc(&ref->refcount);
7579 	return true;
7580 }
7581 
7582 /* Pipe buffer operations for a buffer. */
7583 static const struct pipe_buf_operations buffer_pipe_buf_ops = {
7584 	.confirm		= generic_pipe_buf_confirm,
7585 	.release		= buffer_pipe_buf_release,
7586 	.steal			= generic_pipe_buf_nosteal,
7587 	.get			= buffer_pipe_buf_get,
7588 };
7589 
7590 /*
7591  * Callback from splice_to_pipe(), if we need to release some pages
7592  * at the end of the spd in case we error'ed out in filling the pipe.
7593  */
7594 static void buffer_spd_release(struct splice_pipe_desc *spd, unsigned int i)
7595 {
7596 	struct buffer_ref *ref =
7597 		(struct buffer_ref *)spd->partial[i].private;
7598 
7599 	buffer_ref_release(ref);
7600 	spd->partial[i].private = 0;
7601 }
7602 
7603 static ssize_t
7604 tracing_buffers_splice_read(struct file *file, loff_t *ppos,
7605 			    struct pipe_inode_info *pipe, size_t len,
7606 			    unsigned int flags)
7607 {
7608 	struct ftrace_buffer_info *info = file->private_data;
7609 	struct trace_iterator *iter = &info->iter;
7610 	struct partial_page partial_def[PIPE_DEF_BUFFERS];
7611 	struct page *pages_def[PIPE_DEF_BUFFERS];
7612 	struct splice_pipe_desc spd = {
7613 		.pages		= pages_def,
7614 		.partial	= partial_def,
7615 		.nr_pages_max	= PIPE_DEF_BUFFERS,
7616 		.ops		= &buffer_pipe_buf_ops,
7617 		.spd_release	= buffer_spd_release,
7618 	};
7619 	struct buffer_ref *ref;
7620 	int entries, i;
7621 	ssize_t ret = 0;
7622 
7623 #ifdef CONFIG_TRACER_MAX_TRACE
7624 	if (iter->snapshot && iter->tr->current_trace->use_max_tr)
7625 		return -EBUSY;
7626 #endif
7627 
7628 	if (*ppos & (PAGE_SIZE - 1))
7629 		return -EINVAL;
7630 
7631 	if (len & (PAGE_SIZE - 1)) {
7632 		if (len < PAGE_SIZE)
7633 			return -EINVAL;
7634 		len &= PAGE_MASK;
7635 	}
7636 
7637 	if (splice_grow_spd(pipe, &spd))
7638 		return -ENOMEM;
7639 
7640  again:
7641 	trace_access_lock(iter->cpu_file);
7642 	entries = ring_buffer_entries_cpu(iter->array_buffer->buffer, iter->cpu_file);
7643 
7644 	for (i = 0; i < spd.nr_pages_max && len && entries; i++, len -= PAGE_SIZE) {
7645 		struct page *page;
7646 		int r;
7647 
7648 		ref = kzalloc(sizeof(*ref), GFP_KERNEL);
7649 		if (!ref) {
7650 			ret = -ENOMEM;
7651 			break;
7652 		}
7653 
7654 		refcount_set(&ref->refcount, 1);
7655 		ref->buffer = iter->array_buffer->buffer;
7656 		ref->page = ring_buffer_alloc_read_page(ref->buffer, iter->cpu_file);
7657 		if (IS_ERR(ref->page)) {
7658 			ret = PTR_ERR(ref->page);
7659 			ref->page = NULL;
7660 			kfree(ref);
7661 			break;
7662 		}
7663 		ref->cpu = iter->cpu_file;
7664 
7665 		r = ring_buffer_read_page(ref->buffer, &ref->page,
7666 					  len, iter->cpu_file, 1);
7667 		if (r < 0) {
7668 			ring_buffer_free_read_page(ref->buffer, ref->cpu,
7669 						   ref->page);
7670 			kfree(ref);
7671 			break;
7672 		}
7673 
7674 		page = virt_to_page(ref->page);
7675 
7676 		spd.pages[i] = page;
7677 		spd.partial[i].len = PAGE_SIZE;
7678 		spd.partial[i].offset = 0;
7679 		spd.partial[i].private = (unsigned long)ref;
7680 		spd.nr_pages++;
7681 		*ppos += PAGE_SIZE;
7682 
7683 		entries = ring_buffer_entries_cpu(iter->array_buffer->buffer, iter->cpu_file);
7684 	}
7685 
7686 	trace_access_unlock(iter->cpu_file);
7687 	spd.nr_pages = i;
7688 
7689 	/* did we read anything? */
7690 	if (!spd.nr_pages) {
7691 		if (ret)
7692 			goto out;
7693 
7694 		ret = -EAGAIN;
7695 		if ((file->f_flags & O_NONBLOCK) || (flags & SPLICE_F_NONBLOCK))
7696 			goto out;
7697 
7698 		ret = wait_on_pipe(iter, iter->tr->buffer_percent);
7699 		if (ret)
7700 			goto out;
7701 
7702 		goto again;
7703 	}
7704 
7705 	ret = splice_to_pipe(pipe, &spd);
7706 out:
7707 	splice_shrink_spd(&spd);
7708 
7709 	return ret;
7710 }
7711 
7712 static const struct file_operations tracing_buffers_fops = {
7713 	.open		= tracing_buffers_open,
7714 	.read		= tracing_buffers_read,
7715 	.poll		= tracing_buffers_poll,
7716 	.release	= tracing_buffers_release,
7717 	.splice_read	= tracing_buffers_splice_read,
7718 	.llseek		= no_llseek,
7719 };
7720 
7721 static ssize_t
7722 tracing_stats_read(struct file *filp, char __user *ubuf,
7723 		   size_t count, loff_t *ppos)
7724 {
7725 	struct inode *inode = file_inode(filp);
7726 	struct trace_array *tr = inode->i_private;
7727 	struct array_buffer *trace_buf = &tr->array_buffer;
7728 	int cpu = tracing_get_cpu(inode);
7729 	struct trace_seq *s;
7730 	unsigned long cnt;
7731 	unsigned long long t;
7732 	unsigned long usec_rem;
7733 
7734 	s = kmalloc(sizeof(*s), GFP_KERNEL);
7735 	if (!s)
7736 		return -ENOMEM;
7737 
7738 	trace_seq_init(s);
7739 
7740 	cnt = ring_buffer_entries_cpu(trace_buf->buffer, cpu);
7741 	trace_seq_printf(s, "entries: %ld\n", cnt);
7742 
7743 	cnt = ring_buffer_overrun_cpu(trace_buf->buffer, cpu);
7744 	trace_seq_printf(s, "overrun: %ld\n", cnt);
7745 
7746 	cnt = ring_buffer_commit_overrun_cpu(trace_buf->buffer, cpu);
7747 	trace_seq_printf(s, "commit overrun: %ld\n", cnt);
7748 
7749 	cnt = ring_buffer_bytes_cpu(trace_buf->buffer, cpu);
7750 	trace_seq_printf(s, "bytes: %ld\n", cnt);
7751 
7752 	if (trace_clocks[tr->clock_id].in_ns) {
7753 		/* local or global for trace_clock */
7754 		t = ns2usecs(ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
7755 		usec_rem = do_div(t, USEC_PER_SEC);
7756 		trace_seq_printf(s, "oldest event ts: %5llu.%06lu\n",
7757 								t, usec_rem);
7758 
7759 		t = ns2usecs(ring_buffer_time_stamp(trace_buf->buffer, cpu));
7760 		usec_rem = do_div(t, USEC_PER_SEC);
7761 		trace_seq_printf(s, "now ts: %5llu.%06lu\n", t, usec_rem);
7762 	} else {
7763 		/* counter or tsc mode for trace_clock */
7764 		trace_seq_printf(s, "oldest event ts: %llu\n",
7765 				ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
7766 
7767 		trace_seq_printf(s, "now ts: %llu\n",
7768 				ring_buffer_time_stamp(trace_buf->buffer, cpu));
7769 	}
7770 
7771 	cnt = ring_buffer_dropped_events_cpu(trace_buf->buffer, cpu);
7772 	trace_seq_printf(s, "dropped events: %ld\n", cnt);
7773 
7774 	cnt = ring_buffer_read_events_cpu(trace_buf->buffer, cpu);
7775 	trace_seq_printf(s, "read events: %ld\n", cnt);
7776 
7777 	count = simple_read_from_buffer(ubuf, count, ppos,
7778 					s->buffer, trace_seq_used(s));
7779 
7780 	kfree(s);
7781 
7782 	return count;
7783 }
7784 
7785 static const struct file_operations tracing_stats_fops = {
7786 	.open		= tracing_open_generic_tr,
7787 	.read		= tracing_stats_read,
7788 	.llseek		= generic_file_llseek,
7789 	.release	= tracing_release_generic_tr,
7790 };
7791 
7792 #ifdef CONFIG_DYNAMIC_FTRACE
7793 
7794 static ssize_t
7795 tracing_read_dyn_info(struct file *filp, char __user *ubuf,
7796 		  size_t cnt, loff_t *ppos)
7797 {
7798 	ssize_t ret;
7799 	char *buf;
7800 	int r;
7801 
7802 	/* 256 should be plenty to hold the amount needed */
7803 	buf = kmalloc(256, GFP_KERNEL);
7804 	if (!buf)
7805 		return -ENOMEM;
7806 
7807 	r = scnprintf(buf, 256, "%ld pages:%ld groups: %ld\n",
7808 		      ftrace_update_tot_cnt,
7809 		      ftrace_number_of_pages,
7810 		      ftrace_number_of_groups);
7811 
7812 	ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
7813 	kfree(buf);
7814 	return ret;
7815 }
7816 
7817 static const struct file_operations tracing_dyn_info_fops = {
7818 	.open		= tracing_open_generic,
7819 	.read		= tracing_read_dyn_info,
7820 	.llseek		= generic_file_llseek,
7821 };
7822 #endif /* CONFIG_DYNAMIC_FTRACE */
7823 
7824 #if defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE)
7825 static void
7826 ftrace_snapshot(unsigned long ip, unsigned long parent_ip,
7827 		struct trace_array *tr, struct ftrace_probe_ops *ops,
7828 		void *data)
7829 {
7830 	tracing_snapshot_instance(tr);
7831 }
7832 
7833 static void
7834 ftrace_count_snapshot(unsigned long ip, unsigned long parent_ip,
7835 		      struct trace_array *tr, struct ftrace_probe_ops *ops,
7836 		      void *data)
7837 {
7838 	struct ftrace_func_mapper *mapper = data;
7839 	long *count = NULL;
7840 
7841 	if (mapper)
7842 		count = (long *)ftrace_func_mapper_find_ip(mapper, ip);
7843 
7844 	if (count) {
7845 
7846 		if (*count <= 0)
7847 			return;
7848 
7849 		(*count)--;
7850 	}
7851 
7852 	tracing_snapshot_instance(tr);
7853 }
7854 
7855 static int
7856 ftrace_snapshot_print(struct seq_file *m, unsigned long ip,
7857 		      struct ftrace_probe_ops *ops, void *data)
7858 {
7859 	struct ftrace_func_mapper *mapper = data;
7860 	long *count = NULL;
7861 
7862 	seq_printf(m, "%ps:", (void *)ip);
7863 
7864 	seq_puts(m, "snapshot");
7865 
7866 	if (mapper)
7867 		count = (long *)ftrace_func_mapper_find_ip(mapper, ip);
7868 
7869 	if (count)
7870 		seq_printf(m, ":count=%ld\n", *count);
7871 	else
7872 		seq_puts(m, ":unlimited\n");
7873 
7874 	return 0;
7875 }
7876 
7877 static int
7878 ftrace_snapshot_init(struct ftrace_probe_ops *ops, struct trace_array *tr,
7879 		     unsigned long ip, void *init_data, void **data)
7880 {
7881 	struct ftrace_func_mapper *mapper = *data;
7882 
7883 	if (!mapper) {
7884 		mapper = allocate_ftrace_func_mapper();
7885 		if (!mapper)
7886 			return -ENOMEM;
7887 		*data = mapper;
7888 	}
7889 
7890 	return ftrace_func_mapper_add_ip(mapper, ip, init_data);
7891 }
7892 
7893 static void
7894 ftrace_snapshot_free(struct ftrace_probe_ops *ops, struct trace_array *tr,
7895 		     unsigned long ip, void *data)
7896 {
7897 	struct ftrace_func_mapper *mapper = data;
7898 
7899 	if (!ip) {
7900 		if (!mapper)
7901 			return;
7902 		free_ftrace_func_mapper(mapper, NULL);
7903 		return;
7904 	}
7905 
7906 	ftrace_func_mapper_remove_ip(mapper, ip);
7907 }
7908 
7909 static struct ftrace_probe_ops snapshot_probe_ops = {
7910 	.func			= ftrace_snapshot,
7911 	.print			= ftrace_snapshot_print,
7912 };
7913 
7914 static struct ftrace_probe_ops snapshot_count_probe_ops = {
7915 	.func			= ftrace_count_snapshot,
7916 	.print			= ftrace_snapshot_print,
7917 	.init			= ftrace_snapshot_init,
7918 	.free			= ftrace_snapshot_free,
7919 };
7920 
7921 static int
7922 ftrace_trace_snapshot_callback(struct trace_array *tr, struct ftrace_hash *hash,
7923 			       char *glob, char *cmd, char *param, int enable)
7924 {
7925 	struct ftrace_probe_ops *ops;
7926 	void *count = (void *)-1;
7927 	char *number;
7928 	int ret;
7929 
7930 	if (!tr)
7931 		return -ENODEV;
7932 
7933 	/* hash funcs only work with set_ftrace_filter */
7934 	if (!enable)
7935 		return -EINVAL;
7936 
7937 	ops = param ? &snapshot_count_probe_ops :  &snapshot_probe_ops;
7938 
7939 	if (glob[0] == '!')
7940 		return unregister_ftrace_function_probe_func(glob+1, tr, ops);
7941 
7942 	if (!param)
7943 		goto out_reg;
7944 
7945 	number = strsep(&param, ":");
7946 
7947 	if (!strlen(number))
7948 		goto out_reg;
7949 
7950 	/*
7951 	 * We use the callback data field (which is a pointer)
7952 	 * as our counter.
7953 	 */
7954 	ret = kstrtoul(number, 0, (unsigned long *)&count);
7955 	if (ret)
7956 		return ret;
7957 
7958  out_reg:
7959 	ret = tracing_alloc_snapshot_instance(tr);
7960 	if (ret < 0)
7961 		goto out;
7962 
7963 	ret = register_ftrace_function_probe(glob, tr, ops, count);
7964 
7965  out:
7966 	return ret < 0 ? ret : 0;
7967 }
7968 
7969 static struct ftrace_func_command ftrace_snapshot_cmd = {
7970 	.name			= "snapshot",
7971 	.func			= ftrace_trace_snapshot_callback,
7972 };
7973 
7974 static __init int register_snapshot_cmd(void)
7975 {
7976 	return register_ftrace_command(&ftrace_snapshot_cmd);
7977 }
7978 #else
7979 static inline __init int register_snapshot_cmd(void) { return 0; }
7980 #endif /* defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE) */
7981 
7982 static struct dentry *tracing_get_dentry(struct trace_array *tr)
7983 {
7984 	if (WARN_ON(!tr->dir))
7985 		return ERR_PTR(-ENODEV);
7986 
7987 	/* Top directory uses NULL as the parent */
7988 	if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
7989 		return NULL;
7990 
7991 	/* All sub buffers have a descriptor */
7992 	return tr->dir;
7993 }
7994 
7995 static struct dentry *tracing_dentry_percpu(struct trace_array *tr, int cpu)
7996 {
7997 	struct dentry *d_tracer;
7998 
7999 	if (tr->percpu_dir)
8000 		return tr->percpu_dir;
8001 
8002 	d_tracer = tracing_get_dentry(tr);
8003 	if (IS_ERR(d_tracer))
8004 		return NULL;
8005 
8006 	tr->percpu_dir = tracefs_create_dir("per_cpu", d_tracer);
8007 
8008 	MEM_FAIL(!tr->percpu_dir,
8009 		  "Could not create tracefs directory 'per_cpu/%d'\n", cpu);
8010 
8011 	return tr->percpu_dir;
8012 }
8013 
8014 static struct dentry *
8015 trace_create_cpu_file(const char *name, umode_t mode, struct dentry *parent,
8016 		      void *data, long cpu, const struct file_operations *fops)
8017 {
8018 	struct dentry *ret = trace_create_file(name, mode, parent, data, fops);
8019 
8020 	if (ret) /* See tracing_get_cpu() */
8021 		d_inode(ret)->i_cdev = (void *)(cpu + 1);
8022 	return ret;
8023 }
8024 
8025 static void
8026 tracing_init_tracefs_percpu(struct trace_array *tr, long cpu)
8027 {
8028 	struct dentry *d_percpu = tracing_dentry_percpu(tr, cpu);
8029 	struct dentry *d_cpu;
8030 	char cpu_dir[30]; /* 30 characters should be more than enough */
8031 
8032 	if (!d_percpu)
8033 		return;
8034 
8035 	snprintf(cpu_dir, 30, "cpu%ld", cpu);
8036 	d_cpu = tracefs_create_dir(cpu_dir, d_percpu);
8037 	if (!d_cpu) {
8038 		pr_warn("Could not create tracefs '%s' entry\n", cpu_dir);
8039 		return;
8040 	}
8041 
8042 	/* per cpu trace_pipe */
8043 	trace_create_cpu_file("trace_pipe", 0444, d_cpu,
8044 				tr, cpu, &tracing_pipe_fops);
8045 
8046 	/* per cpu trace */
8047 	trace_create_cpu_file("trace", 0644, d_cpu,
8048 				tr, cpu, &tracing_fops);
8049 
8050 	trace_create_cpu_file("trace_pipe_raw", 0444, d_cpu,
8051 				tr, cpu, &tracing_buffers_fops);
8052 
8053 	trace_create_cpu_file("stats", 0444, d_cpu,
8054 				tr, cpu, &tracing_stats_fops);
8055 
8056 	trace_create_cpu_file("buffer_size_kb", 0444, d_cpu,
8057 				tr, cpu, &tracing_entries_fops);
8058 
8059 #ifdef CONFIG_TRACER_SNAPSHOT
8060 	trace_create_cpu_file("snapshot", 0644, d_cpu,
8061 				tr, cpu, &snapshot_fops);
8062 
8063 	trace_create_cpu_file("snapshot_raw", 0444, d_cpu,
8064 				tr, cpu, &snapshot_raw_fops);
8065 #endif
8066 }
8067 
8068 #ifdef CONFIG_FTRACE_SELFTEST
8069 /* Let selftest have access to static functions in this file */
8070 #include "trace_selftest.c"
8071 #endif
8072 
8073 static ssize_t
8074 trace_options_read(struct file *filp, char __user *ubuf, size_t cnt,
8075 			loff_t *ppos)
8076 {
8077 	struct trace_option_dentry *topt = filp->private_data;
8078 	char *buf;
8079 
8080 	if (topt->flags->val & topt->opt->bit)
8081 		buf = "1\n";
8082 	else
8083 		buf = "0\n";
8084 
8085 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
8086 }
8087 
8088 static ssize_t
8089 trace_options_write(struct file *filp, const char __user *ubuf, size_t cnt,
8090 			 loff_t *ppos)
8091 {
8092 	struct trace_option_dentry *topt = filp->private_data;
8093 	unsigned long val;
8094 	int ret;
8095 
8096 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
8097 	if (ret)
8098 		return ret;
8099 
8100 	if (val != 0 && val != 1)
8101 		return -EINVAL;
8102 
8103 	if (!!(topt->flags->val & topt->opt->bit) != val) {
8104 		mutex_lock(&trace_types_lock);
8105 		ret = __set_tracer_option(topt->tr, topt->flags,
8106 					  topt->opt, !val);
8107 		mutex_unlock(&trace_types_lock);
8108 		if (ret)
8109 			return ret;
8110 	}
8111 
8112 	*ppos += cnt;
8113 
8114 	return cnt;
8115 }
8116 
8117 
8118 static const struct file_operations trace_options_fops = {
8119 	.open = tracing_open_generic,
8120 	.read = trace_options_read,
8121 	.write = trace_options_write,
8122 	.llseek	= generic_file_llseek,
8123 };
8124 
8125 /*
8126  * In order to pass in both the trace_array descriptor as well as the index
8127  * to the flag that the trace option file represents, the trace_array
8128  * has a character array of trace_flags_index[], which holds the index
8129  * of the bit for the flag it represents. index[0] == 0, index[1] == 1, etc.
8130  * The address of this character array is passed to the flag option file
8131  * read/write callbacks.
8132  *
8133  * In order to extract both the index and the trace_array descriptor,
8134  * get_tr_index() uses the following algorithm.
8135  *
8136  *   idx = *ptr;
8137  *
8138  * As the pointer itself contains the address of the index (remember
8139  * index[1] == 1).
8140  *
8141  * Then to get the trace_array descriptor, by subtracting that index
8142  * from the ptr, we get to the start of the index itself.
8143  *
8144  *   ptr - idx == &index[0]
8145  *
8146  * Then a simple container_of() from that pointer gets us to the
8147  * trace_array descriptor.
8148  */
8149 static void get_tr_index(void *data, struct trace_array **ptr,
8150 			 unsigned int *pindex)
8151 {
8152 	*pindex = *(unsigned char *)data;
8153 
8154 	*ptr = container_of(data - *pindex, struct trace_array,
8155 			    trace_flags_index);
8156 }
8157 
8158 static ssize_t
8159 trace_options_core_read(struct file *filp, char __user *ubuf, size_t cnt,
8160 			loff_t *ppos)
8161 {
8162 	void *tr_index = filp->private_data;
8163 	struct trace_array *tr;
8164 	unsigned int index;
8165 	char *buf;
8166 
8167 	get_tr_index(tr_index, &tr, &index);
8168 
8169 	if (tr->trace_flags & (1 << index))
8170 		buf = "1\n";
8171 	else
8172 		buf = "0\n";
8173 
8174 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
8175 }
8176 
8177 static ssize_t
8178 trace_options_core_write(struct file *filp, const char __user *ubuf, size_t cnt,
8179 			 loff_t *ppos)
8180 {
8181 	void *tr_index = filp->private_data;
8182 	struct trace_array *tr;
8183 	unsigned int index;
8184 	unsigned long val;
8185 	int ret;
8186 
8187 	get_tr_index(tr_index, &tr, &index);
8188 
8189 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
8190 	if (ret)
8191 		return ret;
8192 
8193 	if (val != 0 && val != 1)
8194 		return -EINVAL;
8195 
8196 	mutex_lock(&event_mutex);
8197 	mutex_lock(&trace_types_lock);
8198 	ret = set_tracer_flag(tr, 1 << index, val);
8199 	mutex_unlock(&trace_types_lock);
8200 	mutex_unlock(&event_mutex);
8201 
8202 	if (ret < 0)
8203 		return ret;
8204 
8205 	*ppos += cnt;
8206 
8207 	return cnt;
8208 }
8209 
8210 static const struct file_operations trace_options_core_fops = {
8211 	.open = tracing_open_generic,
8212 	.read = trace_options_core_read,
8213 	.write = trace_options_core_write,
8214 	.llseek = generic_file_llseek,
8215 };
8216 
8217 struct dentry *trace_create_file(const char *name,
8218 				 umode_t mode,
8219 				 struct dentry *parent,
8220 				 void *data,
8221 				 const struct file_operations *fops)
8222 {
8223 	struct dentry *ret;
8224 
8225 	ret = tracefs_create_file(name, mode, parent, data, fops);
8226 	if (!ret)
8227 		pr_warn("Could not create tracefs '%s' entry\n", name);
8228 
8229 	return ret;
8230 }
8231 
8232 
8233 static struct dentry *trace_options_init_dentry(struct trace_array *tr)
8234 {
8235 	struct dentry *d_tracer;
8236 
8237 	if (tr->options)
8238 		return tr->options;
8239 
8240 	d_tracer = tracing_get_dentry(tr);
8241 	if (IS_ERR(d_tracer))
8242 		return NULL;
8243 
8244 	tr->options = tracefs_create_dir("options", d_tracer);
8245 	if (!tr->options) {
8246 		pr_warn("Could not create tracefs directory 'options'\n");
8247 		return NULL;
8248 	}
8249 
8250 	return tr->options;
8251 }
8252 
8253 static void
8254 create_trace_option_file(struct trace_array *tr,
8255 			 struct trace_option_dentry *topt,
8256 			 struct tracer_flags *flags,
8257 			 struct tracer_opt *opt)
8258 {
8259 	struct dentry *t_options;
8260 
8261 	t_options = trace_options_init_dentry(tr);
8262 	if (!t_options)
8263 		return;
8264 
8265 	topt->flags = flags;
8266 	topt->opt = opt;
8267 	topt->tr = tr;
8268 
8269 	topt->entry = trace_create_file(opt->name, 0644, t_options, topt,
8270 				    &trace_options_fops);
8271 
8272 }
8273 
8274 static void
8275 create_trace_option_files(struct trace_array *tr, struct tracer *tracer)
8276 {
8277 	struct trace_option_dentry *topts;
8278 	struct trace_options *tr_topts;
8279 	struct tracer_flags *flags;
8280 	struct tracer_opt *opts;
8281 	int cnt;
8282 	int i;
8283 
8284 	if (!tracer)
8285 		return;
8286 
8287 	flags = tracer->flags;
8288 
8289 	if (!flags || !flags->opts)
8290 		return;
8291 
8292 	/*
8293 	 * If this is an instance, only create flags for tracers
8294 	 * the instance may have.
8295 	 */
8296 	if (!trace_ok_for_array(tracer, tr))
8297 		return;
8298 
8299 	for (i = 0; i < tr->nr_topts; i++) {
8300 		/* Make sure there's no duplicate flags. */
8301 		if (WARN_ON_ONCE(tr->topts[i].tracer->flags == tracer->flags))
8302 			return;
8303 	}
8304 
8305 	opts = flags->opts;
8306 
8307 	for (cnt = 0; opts[cnt].name; cnt++)
8308 		;
8309 
8310 	topts = kcalloc(cnt + 1, sizeof(*topts), GFP_KERNEL);
8311 	if (!topts)
8312 		return;
8313 
8314 	tr_topts = krealloc(tr->topts, sizeof(*tr->topts) * (tr->nr_topts + 1),
8315 			    GFP_KERNEL);
8316 	if (!tr_topts) {
8317 		kfree(topts);
8318 		return;
8319 	}
8320 
8321 	tr->topts = tr_topts;
8322 	tr->topts[tr->nr_topts].tracer = tracer;
8323 	tr->topts[tr->nr_topts].topts = topts;
8324 	tr->nr_topts++;
8325 
8326 	for (cnt = 0; opts[cnt].name; cnt++) {
8327 		create_trace_option_file(tr, &topts[cnt], flags,
8328 					 &opts[cnt]);
8329 		MEM_FAIL(topts[cnt].entry == NULL,
8330 			  "Failed to create trace option: %s",
8331 			  opts[cnt].name);
8332 	}
8333 }
8334 
8335 static struct dentry *
8336 create_trace_option_core_file(struct trace_array *tr,
8337 			      const char *option, long index)
8338 {
8339 	struct dentry *t_options;
8340 
8341 	t_options = trace_options_init_dentry(tr);
8342 	if (!t_options)
8343 		return NULL;
8344 
8345 	return trace_create_file(option, 0644, t_options,
8346 				 (void *)&tr->trace_flags_index[index],
8347 				 &trace_options_core_fops);
8348 }
8349 
8350 static void create_trace_options_dir(struct trace_array *tr)
8351 {
8352 	struct dentry *t_options;
8353 	bool top_level = tr == &global_trace;
8354 	int i;
8355 
8356 	t_options = trace_options_init_dentry(tr);
8357 	if (!t_options)
8358 		return;
8359 
8360 	for (i = 0; trace_options[i]; i++) {
8361 		if (top_level ||
8362 		    !((1 << i) & TOP_LEVEL_TRACE_FLAGS))
8363 			create_trace_option_core_file(tr, trace_options[i], i);
8364 	}
8365 }
8366 
8367 static ssize_t
8368 rb_simple_read(struct file *filp, char __user *ubuf,
8369 	       size_t cnt, loff_t *ppos)
8370 {
8371 	struct trace_array *tr = filp->private_data;
8372 	char buf[64];
8373 	int r;
8374 
8375 	r = tracer_tracing_is_on(tr);
8376 	r = sprintf(buf, "%d\n", r);
8377 
8378 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
8379 }
8380 
8381 static ssize_t
8382 rb_simple_write(struct file *filp, const char __user *ubuf,
8383 		size_t cnt, loff_t *ppos)
8384 {
8385 	struct trace_array *tr = filp->private_data;
8386 	struct trace_buffer *buffer = tr->array_buffer.buffer;
8387 	unsigned long val;
8388 	int ret;
8389 
8390 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
8391 	if (ret)
8392 		return ret;
8393 
8394 	if (buffer) {
8395 		mutex_lock(&trace_types_lock);
8396 		if (!!val == tracer_tracing_is_on(tr)) {
8397 			val = 0; /* do nothing */
8398 		} else if (val) {
8399 			tracer_tracing_on(tr);
8400 			if (tr->current_trace->start)
8401 				tr->current_trace->start(tr);
8402 		} else {
8403 			tracer_tracing_off(tr);
8404 			if (tr->current_trace->stop)
8405 				tr->current_trace->stop(tr);
8406 		}
8407 		mutex_unlock(&trace_types_lock);
8408 	}
8409 
8410 	(*ppos)++;
8411 
8412 	return cnt;
8413 }
8414 
8415 static const struct file_operations rb_simple_fops = {
8416 	.open		= tracing_open_generic_tr,
8417 	.read		= rb_simple_read,
8418 	.write		= rb_simple_write,
8419 	.release	= tracing_release_generic_tr,
8420 	.llseek		= default_llseek,
8421 };
8422 
8423 static ssize_t
8424 buffer_percent_read(struct file *filp, char __user *ubuf,
8425 		    size_t cnt, loff_t *ppos)
8426 {
8427 	struct trace_array *tr = filp->private_data;
8428 	char buf[64];
8429 	int r;
8430 
8431 	r = tr->buffer_percent;
8432 	r = sprintf(buf, "%d\n", r);
8433 
8434 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
8435 }
8436 
8437 static ssize_t
8438 buffer_percent_write(struct file *filp, const char __user *ubuf,
8439 		     size_t cnt, loff_t *ppos)
8440 {
8441 	struct trace_array *tr = filp->private_data;
8442 	unsigned long val;
8443 	int ret;
8444 
8445 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
8446 	if (ret)
8447 		return ret;
8448 
8449 	if (val > 100)
8450 		return -EINVAL;
8451 
8452 	if (!val)
8453 		val = 1;
8454 
8455 	tr->buffer_percent = val;
8456 
8457 	(*ppos)++;
8458 
8459 	return cnt;
8460 }
8461 
8462 static const struct file_operations buffer_percent_fops = {
8463 	.open		= tracing_open_generic_tr,
8464 	.read		= buffer_percent_read,
8465 	.write		= buffer_percent_write,
8466 	.release	= tracing_release_generic_tr,
8467 	.llseek		= default_llseek,
8468 };
8469 
8470 static struct dentry *trace_instance_dir;
8471 
8472 static void
8473 init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer);
8474 
8475 static int
8476 allocate_trace_buffer(struct trace_array *tr, struct array_buffer *buf, int size)
8477 {
8478 	enum ring_buffer_flags rb_flags;
8479 
8480 	rb_flags = tr->trace_flags & TRACE_ITER_OVERWRITE ? RB_FL_OVERWRITE : 0;
8481 
8482 	buf->tr = tr;
8483 
8484 	buf->buffer = ring_buffer_alloc(size, rb_flags);
8485 	if (!buf->buffer)
8486 		return -ENOMEM;
8487 
8488 	buf->data = alloc_percpu(struct trace_array_cpu);
8489 	if (!buf->data) {
8490 		ring_buffer_free(buf->buffer);
8491 		buf->buffer = NULL;
8492 		return -ENOMEM;
8493 	}
8494 
8495 	/* Allocate the first page for all buffers */
8496 	set_buffer_entries(&tr->array_buffer,
8497 			   ring_buffer_size(tr->array_buffer.buffer, 0));
8498 
8499 	return 0;
8500 }
8501 
8502 static int allocate_trace_buffers(struct trace_array *tr, int size)
8503 {
8504 	int ret;
8505 
8506 	ret = allocate_trace_buffer(tr, &tr->array_buffer, size);
8507 	if (ret)
8508 		return ret;
8509 
8510 #ifdef CONFIG_TRACER_MAX_TRACE
8511 	ret = allocate_trace_buffer(tr, &tr->max_buffer,
8512 				    allocate_snapshot ? size : 1);
8513 	if (MEM_FAIL(ret, "Failed to allocate trace buffer\n")) {
8514 		ring_buffer_free(tr->array_buffer.buffer);
8515 		tr->array_buffer.buffer = NULL;
8516 		free_percpu(tr->array_buffer.data);
8517 		tr->array_buffer.data = NULL;
8518 		return -ENOMEM;
8519 	}
8520 	tr->allocated_snapshot = allocate_snapshot;
8521 
8522 	/*
8523 	 * Only the top level trace array gets its snapshot allocated
8524 	 * from the kernel command line.
8525 	 */
8526 	allocate_snapshot = false;
8527 #endif
8528 	return 0;
8529 }
8530 
8531 static void free_trace_buffer(struct array_buffer *buf)
8532 {
8533 	if (buf->buffer) {
8534 		ring_buffer_free(buf->buffer);
8535 		buf->buffer = NULL;
8536 		free_percpu(buf->data);
8537 		buf->data = NULL;
8538 	}
8539 }
8540 
8541 static void free_trace_buffers(struct trace_array *tr)
8542 {
8543 	if (!tr)
8544 		return;
8545 
8546 	free_trace_buffer(&tr->array_buffer);
8547 
8548 #ifdef CONFIG_TRACER_MAX_TRACE
8549 	free_trace_buffer(&tr->max_buffer);
8550 #endif
8551 }
8552 
8553 static void init_trace_flags_index(struct trace_array *tr)
8554 {
8555 	int i;
8556 
8557 	/* Used by the trace options files */
8558 	for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++)
8559 		tr->trace_flags_index[i] = i;
8560 }
8561 
8562 static void __update_tracer_options(struct trace_array *tr)
8563 {
8564 	struct tracer *t;
8565 
8566 	for (t = trace_types; t; t = t->next)
8567 		add_tracer_options(tr, t);
8568 }
8569 
8570 static void update_tracer_options(struct trace_array *tr)
8571 {
8572 	mutex_lock(&trace_types_lock);
8573 	__update_tracer_options(tr);
8574 	mutex_unlock(&trace_types_lock);
8575 }
8576 
8577 /* Must have trace_types_lock held */
8578 struct trace_array *trace_array_find(const char *instance)
8579 {
8580 	struct trace_array *tr, *found = NULL;
8581 
8582 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
8583 		if (tr->name && strcmp(tr->name, instance) == 0) {
8584 			found = tr;
8585 			break;
8586 		}
8587 	}
8588 
8589 	return found;
8590 }
8591 
8592 struct trace_array *trace_array_find_get(const char *instance)
8593 {
8594 	struct trace_array *tr;
8595 
8596 	mutex_lock(&trace_types_lock);
8597 	tr = trace_array_find(instance);
8598 	if (tr)
8599 		tr->ref++;
8600 	mutex_unlock(&trace_types_lock);
8601 
8602 	return tr;
8603 }
8604 
8605 static struct trace_array *trace_array_create(const char *name)
8606 {
8607 	struct trace_array *tr;
8608 	int ret;
8609 
8610 	ret = -ENOMEM;
8611 	tr = kzalloc(sizeof(*tr), GFP_KERNEL);
8612 	if (!tr)
8613 		return ERR_PTR(ret);
8614 
8615 	tr->name = kstrdup(name, GFP_KERNEL);
8616 	if (!tr->name)
8617 		goto out_free_tr;
8618 
8619 	if (!alloc_cpumask_var(&tr->tracing_cpumask, GFP_KERNEL))
8620 		goto out_free_tr;
8621 
8622 	tr->trace_flags = global_trace.trace_flags & ~ZEROED_TRACE_FLAGS;
8623 
8624 	cpumask_copy(tr->tracing_cpumask, cpu_all_mask);
8625 
8626 	raw_spin_lock_init(&tr->start_lock);
8627 
8628 	tr->max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
8629 
8630 	tr->current_trace = &nop_trace;
8631 
8632 	INIT_LIST_HEAD(&tr->systems);
8633 	INIT_LIST_HEAD(&tr->events);
8634 	INIT_LIST_HEAD(&tr->hist_vars);
8635 	INIT_LIST_HEAD(&tr->err_log);
8636 
8637 	if (allocate_trace_buffers(tr, trace_buf_size) < 0)
8638 		goto out_free_tr;
8639 
8640 	tr->dir = tracefs_create_dir(name, trace_instance_dir);
8641 	if (!tr->dir)
8642 		goto out_free_tr;
8643 
8644 	ret = event_trace_add_tracer(tr->dir, tr);
8645 	if (ret) {
8646 		tracefs_remove(tr->dir);
8647 		goto out_free_tr;
8648 	}
8649 
8650 	ftrace_init_trace_array(tr);
8651 
8652 	init_tracer_tracefs(tr, tr->dir);
8653 	init_trace_flags_index(tr);
8654 	__update_tracer_options(tr);
8655 
8656 	list_add(&tr->list, &ftrace_trace_arrays);
8657 
8658 	tr->ref++;
8659 
8660 
8661 	return tr;
8662 
8663  out_free_tr:
8664 	free_trace_buffers(tr);
8665 	free_cpumask_var(tr->tracing_cpumask);
8666 	kfree(tr->name);
8667 	kfree(tr);
8668 
8669 	return ERR_PTR(ret);
8670 }
8671 
8672 static int instance_mkdir(const char *name)
8673 {
8674 	struct trace_array *tr;
8675 	int ret;
8676 
8677 	mutex_lock(&event_mutex);
8678 	mutex_lock(&trace_types_lock);
8679 
8680 	ret = -EEXIST;
8681 	if (trace_array_find(name))
8682 		goto out_unlock;
8683 
8684 	tr = trace_array_create(name);
8685 
8686 	ret = PTR_ERR_OR_ZERO(tr);
8687 
8688 out_unlock:
8689 	mutex_unlock(&trace_types_lock);
8690 	mutex_unlock(&event_mutex);
8691 	return ret;
8692 }
8693 
8694 /**
8695  * trace_array_get_by_name - Create/Lookup a trace array, given its name.
8696  * @name: The name of the trace array to be looked up/created.
8697  *
8698  * Returns pointer to trace array with given name.
8699  * NULL, if it cannot be created.
8700  *
8701  * NOTE: This function increments the reference counter associated with the
8702  * trace array returned. This makes sure it cannot be freed while in use.
8703  * Use trace_array_put() once the trace array is no longer needed.
8704  * If the trace_array is to be freed, trace_array_destroy() needs to
8705  * be called after the trace_array_put(), or simply let user space delete
8706  * it from the tracefs instances directory. But until the
8707  * trace_array_put() is called, user space can not delete it.
8708  *
8709  */
8710 struct trace_array *trace_array_get_by_name(const char *name)
8711 {
8712 	struct trace_array *tr;
8713 
8714 	mutex_lock(&event_mutex);
8715 	mutex_lock(&trace_types_lock);
8716 
8717 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
8718 		if (tr->name && strcmp(tr->name, name) == 0)
8719 			goto out_unlock;
8720 	}
8721 
8722 	tr = trace_array_create(name);
8723 
8724 	if (IS_ERR(tr))
8725 		tr = NULL;
8726 out_unlock:
8727 	if (tr)
8728 		tr->ref++;
8729 
8730 	mutex_unlock(&trace_types_lock);
8731 	mutex_unlock(&event_mutex);
8732 	return tr;
8733 }
8734 EXPORT_SYMBOL_GPL(trace_array_get_by_name);
8735 
8736 static int __remove_instance(struct trace_array *tr)
8737 {
8738 	int i;
8739 
8740 	/* Reference counter for a newly created trace array = 1. */
8741 	if (tr->ref > 1 || (tr->current_trace && tr->current_trace->ref))
8742 		return -EBUSY;
8743 
8744 	list_del(&tr->list);
8745 
8746 	/* Disable all the flags that were enabled coming in */
8747 	for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++) {
8748 		if ((1 << i) & ZEROED_TRACE_FLAGS)
8749 			set_tracer_flag(tr, 1 << i, 0);
8750 	}
8751 
8752 	tracing_set_nop(tr);
8753 	clear_ftrace_function_probes(tr);
8754 	event_trace_del_tracer(tr);
8755 	ftrace_clear_pids(tr);
8756 	ftrace_destroy_function_files(tr);
8757 	tracefs_remove(tr->dir);
8758 	free_trace_buffers(tr);
8759 
8760 	for (i = 0; i < tr->nr_topts; i++) {
8761 		kfree(tr->topts[i].topts);
8762 	}
8763 	kfree(tr->topts);
8764 
8765 	free_cpumask_var(tr->tracing_cpumask);
8766 	kfree(tr->name);
8767 	kfree(tr);
8768 	tr = NULL;
8769 
8770 	return 0;
8771 }
8772 
8773 int trace_array_destroy(struct trace_array *this_tr)
8774 {
8775 	struct trace_array *tr;
8776 	int ret;
8777 
8778 	if (!this_tr)
8779 		return -EINVAL;
8780 
8781 	mutex_lock(&event_mutex);
8782 	mutex_lock(&trace_types_lock);
8783 
8784 	ret = -ENODEV;
8785 
8786 	/* Making sure trace array exists before destroying it. */
8787 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
8788 		if (tr == this_tr) {
8789 			ret = __remove_instance(tr);
8790 			break;
8791 		}
8792 	}
8793 
8794 	mutex_unlock(&trace_types_lock);
8795 	mutex_unlock(&event_mutex);
8796 
8797 	return ret;
8798 }
8799 EXPORT_SYMBOL_GPL(trace_array_destroy);
8800 
8801 static int instance_rmdir(const char *name)
8802 {
8803 	struct trace_array *tr;
8804 	int ret;
8805 
8806 	mutex_lock(&event_mutex);
8807 	mutex_lock(&trace_types_lock);
8808 
8809 	ret = -ENODEV;
8810 	tr = trace_array_find(name);
8811 	if (tr)
8812 		ret = __remove_instance(tr);
8813 
8814 	mutex_unlock(&trace_types_lock);
8815 	mutex_unlock(&event_mutex);
8816 
8817 	return ret;
8818 }
8819 
8820 static __init void create_trace_instances(struct dentry *d_tracer)
8821 {
8822 	trace_instance_dir = tracefs_create_instance_dir("instances", d_tracer,
8823 							 instance_mkdir,
8824 							 instance_rmdir);
8825 	if (MEM_FAIL(!trace_instance_dir, "Failed to create instances directory\n"))
8826 		return;
8827 }
8828 
8829 static void
8830 init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer)
8831 {
8832 	struct trace_event_file *file;
8833 	int cpu;
8834 
8835 	trace_create_file("available_tracers", 0444, d_tracer,
8836 			tr, &show_traces_fops);
8837 
8838 	trace_create_file("current_tracer", 0644, d_tracer,
8839 			tr, &set_tracer_fops);
8840 
8841 	trace_create_file("tracing_cpumask", 0644, d_tracer,
8842 			  tr, &tracing_cpumask_fops);
8843 
8844 	trace_create_file("trace_options", 0644, d_tracer,
8845 			  tr, &tracing_iter_fops);
8846 
8847 	trace_create_file("trace", 0644, d_tracer,
8848 			  tr, &tracing_fops);
8849 
8850 	trace_create_file("trace_pipe", 0444, d_tracer,
8851 			  tr, &tracing_pipe_fops);
8852 
8853 	trace_create_file("buffer_size_kb", 0644, d_tracer,
8854 			  tr, &tracing_entries_fops);
8855 
8856 	trace_create_file("buffer_total_size_kb", 0444, d_tracer,
8857 			  tr, &tracing_total_entries_fops);
8858 
8859 	trace_create_file("free_buffer", 0200, d_tracer,
8860 			  tr, &tracing_free_buffer_fops);
8861 
8862 	trace_create_file("trace_marker", 0220, d_tracer,
8863 			  tr, &tracing_mark_fops);
8864 
8865 	file = __find_event_file(tr, "ftrace", "print");
8866 	if (file && file->dir)
8867 		trace_create_file("trigger", 0644, file->dir, file,
8868 				  &event_trigger_fops);
8869 	tr->trace_marker_file = file;
8870 
8871 	trace_create_file("trace_marker_raw", 0220, d_tracer,
8872 			  tr, &tracing_mark_raw_fops);
8873 
8874 	trace_create_file("trace_clock", 0644, d_tracer, tr,
8875 			  &trace_clock_fops);
8876 
8877 	trace_create_file("tracing_on", 0644, d_tracer,
8878 			  tr, &rb_simple_fops);
8879 
8880 	trace_create_file("timestamp_mode", 0444, d_tracer, tr,
8881 			  &trace_time_stamp_mode_fops);
8882 
8883 	tr->buffer_percent = 50;
8884 
8885 	trace_create_file("buffer_percent", 0444, d_tracer,
8886 			tr, &buffer_percent_fops);
8887 
8888 	create_trace_options_dir(tr);
8889 
8890 #if defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)
8891 	trace_create_maxlat_file(tr, d_tracer);
8892 #endif
8893 
8894 	if (ftrace_create_function_files(tr, d_tracer))
8895 		MEM_FAIL(1, "Could not allocate function filter files");
8896 
8897 #ifdef CONFIG_TRACER_SNAPSHOT
8898 	trace_create_file("snapshot", 0644, d_tracer,
8899 			  tr, &snapshot_fops);
8900 #endif
8901 
8902 	trace_create_file("error_log", 0644, d_tracer,
8903 			  tr, &tracing_err_log_fops);
8904 
8905 	for_each_tracing_cpu(cpu)
8906 		tracing_init_tracefs_percpu(tr, cpu);
8907 
8908 	ftrace_init_tracefs(tr, d_tracer);
8909 }
8910 
8911 static struct vfsmount *trace_automount(struct dentry *mntpt, void *ingore)
8912 {
8913 	struct vfsmount *mnt;
8914 	struct file_system_type *type;
8915 
8916 	/*
8917 	 * To maintain backward compatibility for tools that mount
8918 	 * debugfs to get to the tracing facility, tracefs is automatically
8919 	 * mounted to the debugfs/tracing directory.
8920 	 */
8921 	type = get_fs_type("tracefs");
8922 	if (!type)
8923 		return NULL;
8924 	mnt = vfs_submount(mntpt, type, "tracefs", NULL);
8925 	put_filesystem(type);
8926 	if (IS_ERR(mnt))
8927 		return NULL;
8928 	mntget(mnt);
8929 
8930 	return mnt;
8931 }
8932 
8933 /**
8934  * tracing_init_dentry - initialize top level trace array
8935  *
8936  * This is called when creating files or directories in the tracing
8937  * directory. It is called via fs_initcall() by any of the boot up code
8938  * and expects to return the dentry of the top level tracing directory.
8939  */
8940 struct dentry *tracing_init_dentry(void)
8941 {
8942 	struct trace_array *tr = &global_trace;
8943 
8944 	if (security_locked_down(LOCKDOWN_TRACEFS)) {
8945 		pr_warn("Tracing disabled due to lockdown\n");
8946 		return ERR_PTR(-EPERM);
8947 	}
8948 
8949 	/* The top level trace array uses  NULL as parent */
8950 	if (tr->dir)
8951 		return NULL;
8952 
8953 	if (WARN_ON(!tracefs_initialized()) ||
8954 		(IS_ENABLED(CONFIG_DEBUG_FS) &&
8955 		 WARN_ON(!debugfs_initialized())))
8956 		return ERR_PTR(-ENODEV);
8957 
8958 	/*
8959 	 * As there may still be users that expect the tracing
8960 	 * files to exist in debugfs/tracing, we must automount
8961 	 * the tracefs file system there, so older tools still
8962 	 * work with the newer kerenl.
8963 	 */
8964 	tr->dir = debugfs_create_automount("tracing", NULL,
8965 					   trace_automount, NULL);
8966 
8967 	return NULL;
8968 }
8969 
8970 extern struct trace_eval_map *__start_ftrace_eval_maps[];
8971 extern struct trace_eval_map *__stop_ftrace_eval_maps[];
8972 
8973 static void __init trace_eval_init(void)
8974 {
8975 	int len;
8976 
8977 	len = __stop_ftrace_eval_maps - __start_ftrace_eval_maps;
8978 	trace_insert_eval_map(NULL, __start_ftrace_eval_maps, len);
8979 }
8980 
8981 #ifdef CONFIG_MODULES
8982 static void trace_module_add_evals(struct module *mod)
8983 {
8984 	if (!mod->num_trace_evals)
8985 		return;
8986 
8987 	/*
8988 	 * Modules with bad taint do not have events created, do
8989 	 * not bother with enums either.
8990 	 */
8991 	if (trace_module_has_bad_taint(mod))
8992 		return;
8993 
8994 	trace_insert_eval_map(mod, mod->trace_evals, mod->num_trace_evals);
8995 }
8996 
8997 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
8998 static void trace_module_remove_evals(struct module *mod)
8999 {
9000 	union trace_eval_map_item *map;
9001 	union trace_eval_map_item **last = &trace_eval_maps;
9002 
9003 	if (!mod->num_trace_evals)
9004 		return;
9005 
9006 	mutex_lock(&trace_eval_mutex);
9007 
9008 	map = trace_eval_maps;
9009 
9010 	while (map) {
9011 		if (map->head.mod == mod)
9012 			break;
9013 		map = trace_eval_jmp_to_tail(map);
9014 		last = &map->tail.next;
9015 		map = map->tail.next;
9016 	}
9017 	if (!map)
9018 		goto out;
9019 
9020 	*last = trace_eval_jmp_to_tail(map)->tail.next;
9021 	kfree(map);
9022  out:
9023 	mutex_unlock(&trace_eval_mutex);
9024 }
9025 #else
9026 static inline void trace_module_remove_evals(struct module *mod) { }
9027 #endif /* CONFIG_TRACE_EVAL_MAP_FILE */
9028 
9029 static int trace_module_notify(struct notifier_block *self,
9030 			       unsigned long val, void *data)
9031 {
9032 	struct module *mod = data;
9033 
9034 	switch (val) {
9035 	case MODULE_STATE_COMING:
9036 		trace_module_add_evals(mod);
9037 		break;
9038 	case MODULE_STATE_GOING:
9039 		trace_module_remove_evals(mod);
9040 		break;
9041 	}
9042 
9043 	return 0;
9044 }
9045 
9046 static struct notifier_block trace_module_nb = {
9047 	.notifier_call = trace_module_notify,
9048 	.priority = 0,
9049 };
9050 #endif /* CONFIG_MODULES */
9051 
9052 static __init int tracer_init_tracefs(void)
9053 {
9054 	struct dentry *d_tracer;
9055 
9056 	trace_access_lock_init();
9057 
9058 	d_tracer = tracing_init_dentry();
9059 	if (IS_ERR(d_tracer))
9060 		return 0;
9061 
9062 	event_trace_init();
9063 
9064 	init_tracer_tracefs(&global_trace, d_tracer);
9065 	ftrace_init_tracefs_toplevel(&global_trace, d_tracer);
9066 
9067 	trace_create_file("tracing_thresh", 0644, d_tracer,
9068 			&global_trace, &tracing_thresh_fops);
9069 
9070 	trace_create_file("README", 0444, d_tracer,
9071 			NULL, &tracing_readme_fops);
9072 
9073 	trace_create_file("saved_cmdlines", 0444, d_tracer,
9074 			NULL, &tracing_saved_cmdlines_fops);
9075 
9076 	trace_create_file("saved_cmdlines_size", 0644, d_tracer,
9077 			  NULL, &tracing_saved_cmdlines_size_fops);
9078 
9079 	trace_create_file("saved_tgids", 0444, d_tracer,
9080 			NULL, &tracing_saved_tgids_fops);
9081 
9082 	trace_eval_init();
9083 
9084 	trace_create_eval_file(d_tracer);
9085 
9086 #ifdef CONFIG_MODULES
9087 	register_module_notifier(&trace_module_nb);
9088 #endif
9089 
9090 #ifdef CONFIG_DYNAMIC_FTRACE
9091 	trace_create_file("dyn_ftrace_total_info", 0444, d_tracer,
9092 			NULL, &tracing_dyn_info_fops);
9093 #endif
9094 
9095 	create_trace_instances(d_tracer);
9096 
9097 	update_tracer_options(&global_trace);
9098 
9099 	return 0;
9100 }
9101 
9102 static int trace_panic_handler(struct notifier_block *this,
9103 			       unsigned long event, void *unused)
9104 {
9105 	if (ftrace_dump_on_oops)
9106 		ftrace_dump(ftrace_dump_on_oops);
9107 	return NOTIFY_OK;
9108 }
9109 
9110 static struct notifier_block trace_panic_notifier = {
9111 	.notifier_call  = trace_panic_handler,
9112 	.next           = NULL,
9113 	.priority       = 150   /* priority: INT_MAX >= x >= 0 */
9114 };
9115 
9116 static int trace_die_handler(struct notifier_block *self,
9117 			     unsigned long val,
9118 			     void *data)
9119 {
9120 	switch (val) {
9121 	case DIE_OOPS:
9122 		if (ftrace_dump_on_oops)
9123 			ftrace_dump(ftrace_dump_on_oops);
9124 		break;
9125 	default:
9126 		break;
9127 	}
9128 	return NOTIFY_OK;
9129 }
9130 
9131 static struct notifier_block trace_die_notifier = {
9132 	.notifier_call = trace_die_handler,
9133 	.priority = 200
9134 };
9135 
9136 /*
9137  * printk is set to max of 1024, we really don't need it that big.
9138  * Nothing should be printing 1000 characters anyway.
9139  */
9140 #define TRACE_MAX_PRINT		1000
9141 
9142 /*
9143  * Define here KERN_TRACE so that we have one place to modify
9144  * it if we decide to change what log level the ftrace dump
9145  * should be at.
9146  */
9147 #define KERN_TRACE		KERN_EMERG
9148 
9149 void
9150 trace_printk_seq(struct trace_seq *s)
9151 {
9152 	/* Probably should print a warning here. */
9153 	if (s->seq.len >= TRACE_MAX_PRINT)
9154 		s->seq.len = TRACE_MAX_PRINT;
9155 
9156 	/*
9157 	 * More paranoid code. Although the buffer size is set to
9158 	 * PAGE_SIZE, and TRACE_MAX_PRINT is 1000, this is just
9159 	 * an extra layer of protection.
9160 	 */
9161 	if (WARN_ON_ONCE(s->seq.len >= s->seq.size))
9162 		s->seq.len = s->seq.size - 1;
9163 
9164 	/* should be zero ended, but we are paranoid. */
9165 	s->buffer[s->seq.len] = 0;
9166 
9167 	printk(KERN_TRACE "%s", s->buffer);
9168 
9169 	trace_seq_init(s);
9170 }
9171 
9172 void trace_init_global_iter(struct trace_iterator *iter)
9173 {
9174 	iter->tr = &global_trace;
9175 	iter->trace = iter->tr->current_trace;
9176 	iter->cpu_file = RING_BUFFER_ALL_CPUS;
9177 	iter->array_buffer = &global_trace.array_buffer;
9178 
9179 	if (iter->trace && iter->trace->open)
9180 		iter->trace->open(iter);
9181 
9182 	/* Annotate start of buffers if we had overruns */
9183 	if (ring_buffer_overruns(iter->array_buffer->buffer))
9184 		iter->iter_flags |= TRACE_FILE_ANNOTATE;
9185 
9186 	/* Output in nanoseconds only if we are using a clock in nanoseconds. */
9187 	if (trace_clocks[iter->tr->clock_id].in_ns)
9188 		iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
9189 }
9190 
9191 void ftrace_dump(enum ftrace_dump_mode oops_dump_mode)
9192 {
9193 	/* use static because iter can be a bit big for the stack */
9194 	static struct trace_iterator iter;
9195 	static atomic_t dump_running;
9196 	struct trace_array *tr = &global_trace;
9197 	unsigned int old_userobj;
9198 	unsigned long flags;
9199 	int cnt = 0, cpu;
9200 
9201 	/* Only allow one dump user at a time. */
9202 	if (atomic_inc_return(&dump_running) != 1) {
9203 		atomic_dec(&dump_running);
9204 		return;
9205 	}
9206 
9207 	/*
9208 	 * Always turn off tracing when we dump.
9209 	 * We don't need to show trace output of what happens
9210 	 * between multiple crashes.
9211 	 *
9212 	 * If the user does a sysrq-z, then they can re-enable
9213 	 * tracing with echo 1 > tracing_on.
9214 	 */
9215 	tracing_off();
9216 
9217 	local_irq_save(flags);
9218 	printk_nmi_direct_enter();
9219 
9220 	/* Simulate the iterator */
9221 	trace_init_global_iter(&iter);
9222 	/* Can not use kmalloc for iter.temp */
9223 	iter.temp = static_temp_buf;
9224 	iter.temp_size = STATIC_TEMP_BUF_SIZE;
9225 
9226 	for_each_tracing_cpu(cpu) {
9227 		atomic_inc(&per_cpu_ptr(iter.array_buffer->data, cpu)->disabled);
9228 	}
9229 
9230 	old_userobj = tr->trace_flags & TRACE_ITER_SYM_USEROBJ;
9231 
9232 	/* don't look at user memory in panic mode */
9233 	tr->trace_flags &= ~TRACE_ITER_SYM_USEROBJ;
9234 
9235 	switch (oops_dump_mode) {
9236 	case DUMP_ALL:
9237 		iter.cpu_file = RING_BUFFER_ALL_CPUS;
9238 		break;
9239 	case DUMP_ORIG:
9240 		iter.cpu_file = raw_smp_processor_id();
9241 		break;
9242 	case DUMP_NONE:
9243 		goto out_enable;
9244 	default:
9245 		printk(KERN_TRACE "Bad dumping mode, switching to all CPUs dump\n");
9246 		iter.cpu_file = RING_BUFFER_ALL_CPUS;
9247 	}
9248 
9249 	printk(KERN_TRACE "Dumping ftrace buffer:\n");
9250 
9251 	/* Did function tracer already get disabled? */
9252 	if (ftrace_is_dead()) {
9253 		printk("# WARNING: FUNCTION TRACING IS CORRUPTED\n");
9254 		printk("#          MAY BE MISSING FUNCTION EVENTS\n");
9255 	}
9256 
9257 	/*
9258 	 * We need to stop all tracing on all CPUS to read the
9259 	 * the next buffer. This is a bit expensive, but is
9260 	 * not done often. We fill all what we can read,
9261 	 * and then release the locks again.
9262 	 */
9263 
9264 	while (!trace_empty(&iter)) {
9265 
9266 		if (!cnt)
9267 			printk(KERN_TRACE "---------------------------------\n");
9268 
9269 		cnt++;
9270 
9271 		trace_iterator_reset(&iter);
9272 		iter.iter_flags |= TRACE_FILE_LAT_FMT;
9273 
9274 		if (trace_find_next_entry_inc(&iter) != NULL) {
9275 			int ret;
9276 
9277 			ret = print_trace_line(&iter);
9278 			if (ret != TRACE_TYPE_NO_CONSUME)
9279 				trace_consume(&iter);
9280 		}
9281 		touch_nmi_watchdog();
9282 
9283 		trace_printk_seq(&iter.seq);
9284 	}
9285 
9286 	if (!cnt)
9287 		printk(KERN_TRACE "   (ftrace buffer empty)\n");
9288 	else
9289 		printk(KERN_TRACE "---------------------------------\n");
9290 
9291  out_enable:
9292 	tr->trace_flags |= old_userobj;
9293 
9294 	for_each_tracing_cpu(cpu) {
9295 		atomic_dec(&per_cpu_ptr(iter.array_buffer->data, cpu)->disabled);
9296 	}
9297 	atomic_dec(&dump_running);
9298 	printk_nmi_direct_exit();
9299 	local_irq_restore(flags);
9300 }
9301 EXPORT_SYMBOL_GPL(ftrace_dump);
9302 
9303 int trace_run_command(const char *buf, int (*createfn)(int, char **))
9304 {
9305 	char **argv;
9306 	int argc, ret;
9307 
9308 	argc = 0;
9309 	ret = 0;
9310 	argv = argv_split(GFP_KERNEL, buf, &argc);
9311 	if (!argv)
9312 		return -ENOMEM;
9313 
9314 	if (argc)
9315 		ret = createfn(argc, argv);
9316 
9317 	argv_free(argv);
9318 
9319 	return ret;
9320 }
9321 
9322 #define WRITE_BUFSIZE  4096
9323 
9324 ssize_t trace_parse_run_command(struct file *file, const char __user *buffer,
9325 				size_t count, loff_t *ppos,
9326 				int (*createfn)(int, char **))
9327 {
9328 	char *kbuf, *buf, *tmp;
9329 	int ret = 0;
9330 	size_t done = 0;
9331 	size_t size;
9332 
9333 	kbuf = kmalloc(WRITE_BUFSIZE, GFP_KERNEL);
9334 	if (!kbuf)
9335 		return -ENOMEM;
9336 
9337 	while (done < count) {
9338 		size = count - done;
9339 
9340 		if (size >= WRITE_BUFSIZE)
9341 			size = WRITE_BUFSIZE - 1;
9342 
9343 		if (copy_from_user(kbuf, buffer + done, size)) {
9344 			ret = -EFAULT;
9345 			goto out;
9346 		}
9347 		kbuf[size] = '\0';
9348 		buf = kbuf;
9349 		do {
9350 			tmp = strchr(buf, '\n');
9351 			if (tmp) {
9352 				*tmp = '\0';
9353 				size = tmp - buf + 1;
9354 			} else {
9355 				size = strlen(buf);
9356 				if (done + size < count) {
9357 					if (buf != kbuf)
9358 						break;
9359 					/* This can accept WRITE_BUFSIZE - 2 ('\n' + '\0') */
9360 					pr_warn("Line length is too long: Should be less than %d\n",
9361 						WRITE_BUFSIZE - 2);
9362 					ret = -EINVAL;
9363 					goto out;
9364 				}
9365 			}
9366 			done += size;
9367 
9368 			/* Remove comments */
9369 			tmp = strchr(buf, '#');
9370 
9371 			if (tmp)
9372 				*tmp = '\0';
9373 
9374 			ret = trace_run_command(buf, createfn);
9375 			if (ret)
9376 				goto out;
9377 			buf += size;
9378 
9379 		} while (done < count);
9380 	}
9381 	ret = done;
9382 
9383 out:
9384 	kfree(kbuf);
9385 
9386 	return ret;
9387 }
9388 
9389 __init static int tracer_alloc_buffers(void)
9390 {
9391 	int ring_buf_size;
9392 	int ret = -ENOMEM;
9393 
9394 
9395 	if (security_locked_down(LOCKDOWN_TRACEFS)) {
9396 		pr_warn("Tracing disabled due to lockdown\n");
9397 		return -EPERM;
9398 	}
9399 
9400 	/*
9401 	 * Make sure we don't accidently add more trace options
9402 	 * than we have bits for.
9403 	 */
9404 	BUILD_BUG_ON(TRACE_ITER_LAST_BIT > TRACE_FLAGS_MAX_SIZE);
9405 
9406 	if (!alloc_cpumask_var(&tracing_buffer_mask, GFP_KERNEL))
9407 		goto out;
9408 
9409 	if (!alloc_cpumask_var(&global_trace.tracing_cpumask, GFP_KERNEL))
9410 		goto out_free_buffer_mask;
9411 
9412 	/* Only allocate trace_printk buffers if a trace_printk exists */
9413 	if (&__stop___trace_bprintk_fmt != &__start___trace_bprintk_fmt)
9414 		/* Must be called before global_trace.buffer is allocated */
9415 		trace_printk_init_buffers();
9416 
9417 	/* To save memory, keep the ring buffer size to its minimum */
9418 	if (ring_buffer_expanded)
9419 		ring_buf_size = trace_buf_size;
9420 	else
9421 		ring_buf_size = 1;
9422 
9423 	cpumask_copy(tracing_buffer_mask, cpu_possible_mask);
9424 	cpumask_copy(global_trace.tracing_cpumask, cpu_all_mask);
9425 
9426 	raw_spin_lock_init(&global_trace.start_lock);
9427 
9428 	/*
9429 	 * The prepare callbacks allocates some memory for the ring buffer. We
9430 	 * don't free the buffer if the if the CPU goes down. If we were to free
9431 	 * the buffer, then the user would lose any trace that was in the
9432 	 * buffer. The memory will be removed once the "instance" is removed.
9433 	 */
9434 	ret = cpuhp_setup_state_multi(CPUHP_TRACE_RB_PREPARE,
9435 				      "trace/RB:preapre", trace_rb_cpu_prepare,
9436 				      NULL);
9437 	if (ret < 0)
9438 		goto out_free_cpumask;
9439 	/* Used for event triggers */
9440 	ret = -ENOMEM;
9441 	temp_buffer = ring_buffer_alloc(PAGE_SIZE, RB_FL_OVERWRITE);
9442 	if (!temp_buffer)
9443 		goto out_rm_hp_state;
9444 
9445 	if (trace_create_savedcmd() < 0)
9446 		goto out_free_temp_buffer;
9447 
9448 	/* TODO: make the number of buffers hot pluggable with CPUS */
9449 	if (allocate_trace_buffers(&global_trace, ring_buf_size) < 0) {
9450 		MEM_FAIL(1, "tracer: failed to allocate ring buffer!\n");
9451 		goto out_free_savedcmd;
9452 	}
9453 
9454 	if (global_trace.buffer_disabled)
9455 		tracing_off();
9456 
9457 	if (trace_boot_clock) {
9458 		ret = tracing_set_clock(&global_trace, trace_boot_clock);
9459 		if (ret < 0)
9460 			pr_warn("Trace clock %s not defined, going back to default\n",
9461 				trace_boot_clock);
9462 	}
9463 
9464 	/*
9465 	 * register_tracer() might reference current_trace, so it
9466 	 * needs to be set before we register anything. This is
9467 	 * just a bootstrap of current_trace anyway.
9468 	 */
9469 	global_trace.current_trace = &nop_trace;
9470 
9471 	global_trace.max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
9472 
9473 	ftrace_init_global_array_ops(&global_trace);
9474 
9475 	init_trace_flags_index(&global_trace);
9476 
9477 	register_tracer(&nop_trace);
9478 
9479 	/* Function tracing may start here (via kernel command line) */
9480 	init_function_trace();
9481 
9482 	/* All seems OK, enable tracing */
9483 	tracing_disabled = 0;
9484 
9485 	atomic_notifier_chain_register(&panic_notifier_list,
9486 				       &trace_panic_notifier);
9487 
9488 	register_die_notifier(&trace_die_notifier);
9489 
9490 	global_trace.flags = TRACE_ARRAY_FL_GLOBAL;
9491 
9492 	INIT_LIST_HEAD(&global_trace.systems);
9493 	INIT_LIST_HEAD(&global_trace.events);
9494 	INIT_LIST_HEAD(&global_trace.hist_vars);
9495 	INIT_LIST_HEAD(&global_trace.err_log);
9496 	list_add(&global_trace.list, &ftrace_trace_arrays);
9497 
9498 	apply_trace_boot_options();
9499 
9500 	register_snapshot_cmd();
9501 
9502 	return 0;
9503 
9504 out_free_savedcmd:
9505 	free_saved_cmdlines_buffer(savedcmd);
9506 out_free_temp_buffer:
9507 	ring_buffer_free(temp_buffer);
9508 out_rm_hp_state:
9509 	cpuhp_remove_multi_state(CPUHP_TRACE_RB_PREPARE);
9510 out_free_cpumask:
9511 	free_cpumask_var(global_trace.tracing_cpumask);
9512 out_free_buffer_mask:
9513 	free_cpumask_var(tracing_buffer_mask);
9514 out:
9515 	return ret;
9516 }
9517 
9518 void __init early_trace_init(void)
9519 {
9520 	if (tracepoint_printk) {
9521 		tracepoint_print_iter =
9522 			kmalloc(sizeof(*tracepoint_print_iter), GFP_KERNEL);
9523 		if (MEM_FAIL(!tracepoint_print_iter,
9524 			     "Failed to allocate trace iterator\n"))
9525 			tracepoint_printk = 0;
9526 		else
9527 			static_key_enable(&tracepoint_printk_key.key);
9528 	}
9529 	tracer_alloc_buffers();
9530 }
9531 
9532 void __init trace_init(void)
9533 {
9534 	trace_event_init();
9535 }
9536 
9537 __init static int clear_boot_tracer(void)
9538 {
9539 	/*
9540 	 * The default tracer at boot buffer is an init section.
9541 	 * This function is called in lateinit. If we did not
9542 	 * find the boot tracer, then clear it out, to prevent
9543 	 * later registration from accessing the buffer that is
9544 	 * about to be freed.
9545 	 */
9546 	if (!default_bootup_tracer)
9547 		return 0;
9548 
9549 	printk(KERN_INFO "ftrace bootup tracer '%s' not registered.\n",
9550 	       default_bootup_tracer);
9551 	default_bootup_tracer = NULL;
9552 
9553 	return 0;
9554 }
9555 
9556 fs_initcall(tracer_init_tracefs);
9557 late_initcall_sync(clear_boot_tracer);
9558 
9559 #ifdef CONFIG_HAVE_UNSTABLE_SCHED_CLOCK
9560 __init static int tracing_set_default_clock(void)
9561 {
9562 	/* sched_clock_stable() is determined in late_initcall */
9563 	if (!trace_boot_clock && !sched_clock_stable()) {
9564 		if (security_locked_down(LOCKDOWN_TRACEFS)) {
9565 			pr_warn("Can not set tracing clock due to lockdown\n");
9566 			return -EPERM;
9567 		}
9568 
9569 		printk(KERN_WARNING
9570 		       "Unstable clock detected, switching default tracing clock to \"global\"\n"
9571 		       "If you want to keep using the local clock, then add:\n"
9572 		       "  \"trace_clock=local\"\n"
9573 		       "on the kernel command line\n");
9574 		tracing_set_clock(&global_trace, "global");
9575 	}
9576 
9577 	return 0;
9578 }
9579 late_initcall_sync(tracing_set_default_clock);
9580 #endif
9581