xref: /linux/kernel/trace/trace.c (revision 68550cbc6129159b7a6434796b721e8b66ee12f6)
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * ring buffer based function tracer
4  *
5  * Copyright (C) 2007-2012 Steven Rostedt <srostedt@redhat.com>
6  * Copyright (C) 2008 Ingo Molnar <mingo@redhat.com>
7  *
8  * Originally taken from the RT patch by:
9  *    Arnaldo Carvalho de Melo <acme@redhat.com>
10  *
11  * Based on code from the latency_tracer, that is:
12  *  Copyright (C) 2004-2006 Ingo Molnar
13  *  Copyright (C) 2004 Nadia Yvette Chambers
14  */
15 #include <linux/ring_buffer.h>
16 #include <generated/utsrelease.h>
17 #include <linux/stacktrace.h>
18 #include <linux/writeback.h>
19 #include <linux/kallsyms.h>
20 #include <linux/security.h>
21 #include <linux/seq_file.h>
22 #include <linux/notifier.h>
23 #include <linux/irqflags.h>
24 #include <linux/debugfs.h>
25 #include <linux/tracefs.h>
26 #include <linux/pagemap.h>
27 #include <linux/hardirq.h>
28 #include <linux/linkage.h>
29 #include <linux/uaccess.h>
30 #include <linux/vmalloc.h>
31 #include <linux/ftrace.h>
32 #include <linux/module.h>
33 #include <linux/percpu.h>
34 #include <linux/splice.h>
35 #include <linux/kdebug.h>
36 #include <linux/string.h>
37 #include <linux/mount.h>
38 #include <linux/rwsem.h>
39 #include <linux/slab.h>
40 #include <linux/ctype.h>
41 #include <linux/init.h>
42 #include <linux/panic_notifier.h>
43 #include <linux/poll.h>
44 #include <linux/nmi.h>
45 #include <linux/fs.h>
46 #include <linux/trace.h>
47 #include <linux/sched/clock.h>
48 #include <linux/sched/rt.h>
49 #include <linux/fsnotify.h>
50 #include <linux/irq_work.h>
51 #include <linux/workqueue.h>
52 
53 #include "trace.h"
54 #include "trace_output.h"
55 
56 /*
57  * On boot up, the ring buffer is set to the minimum size, so that
58  * we do not waste memory on systems that are not using tracing.
59  */
60 bool ring_buffer_expanded;
61 
62 /*
63  * We need to change this state when a selftest is running.
64  * A selftest will lurk into the ring-buffer to count the
65  * entries inserted during the selftest although some concurrent
66  * insertions into the ring-buffer such as trace_printk could occurred
67  * at the same time, giving false positive or negative results.
68  */
69 static bool __read_mostly tracing_selftest_running;
70 
71 /*
72  * If boot-time tracing including tracers/events via kernel cmdline
73  * is running, we do not want to run SELFTEST.
74  */
75 bool __read_mostly tracing_selftest_disabled;
76 
77 #ifdef CONFIG_FTRACE_STARTUP_TEST
78 void __init disable_tracing_selftest(const char *reason)
79 {
80 	if (!tracing_selftest_disabled) {
81 		tracing_selftest_disabled = true;
82 		pr_info("Ftrace startup test is disabled due to %s\n", reason);
83 	}
84 }
85 #endif
86 
87 /* Pipe tracepoints to printk */
88 struct trace_iterator *tracepoint_print_iter;
89 int tracepoint_printk;
90 static bool tracepoint_printk_stop_on_boot __initdata;
91 static DEFINE_STATIC_KEY_FALSE(tracepoint_printk_key);
92 
93 /* For tracers that don't implement custom flags */
94 static struct tracer_opt dummy_tracer_opt[] = {
95 	{ }
96 };
97 
98 static int
99 dummy_set_flag(struct trace_array *tr, u32 old_flags, u32 bit, int set)
100 {
101 	return 0;
102 }
103 
104 /*
105  * To prevent the comm cache from being overwritten when no
106  * tracing is active, only save the comm when a trace event
107  * occurred.
108  */
109 static DEFINE_PER_CPU(bool, trace_taskinfo_save);
110 
111 /*
112  * Kill all tracing for good (never come back).
113  * It is initialized to 1 but will turn to zero if the initialization
114  * of the tracer is successful. But that is the only place that sets
115  * this back to zero.
116  */
117 static int tracing_disabled = 1;
118 
119 cpumask_var_t __read_mostly	tracing_buffer_mask;
120 
121 /*
122  * ftrace_dump_on_oops - variable to dump ftrace buffer on oops
123  *
124  * If there is an oops (or kernel panic) and the ftrace_dump_on_oops
125  * is set, then ftrace_dump is called. This will output the contents
126  * of the ftrace buffers to the console.  This is very useful for
127  * capturing traces that lead to crashes and outputing it to a
128  * serial console.
129  *
130  * It is default off, but you can enable it with either specifying
131  * "ftrace_dump_on_oops" in the kernel command line, or setting
132  * /proc/sys/kernel/ftrace_dump_on_oops
133  * Set 1 if you want to dump buffers of all CPUs
134  * Set 2 if you want to dump the buffer of the CPU that triggered oops
135  */
136 
137 enum ftrace_dump_mode ftrace_dump_on_oops;
138 
139 /* When set, tracing will stop when a WARN*() is hit */
140 int __disable_trace_on_warning;
141 
142 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
143 /* Map of enums to their values, for "eval_map" file */
144 struct trace_eval_map_head {
145 	struct module			*mod;
146 	unsigned long			length;
147 };
148 
149 union trace_eval_map_item;
150 
151 struct trace_eval_map_tail {
152 	/*
153 	 * "end" is first and points to NULL as it must be different
154 	 * than "mod" or "eval_string"
155 	 */
156 	union trace_eval_map_item	*next;
157 	const char			*end;	/* points to NULL */
158 };
159 
160 static DEFINE_MUTEX(trace_eval_mutex);
161 
162 /*
163  * The trace_eval_maps are saved in an array with two extra elements,
164  * one at the beginning, and one at the end. The beginning item contains
165  * the count of the saved maps (head.length), and the module they
166  * belong to if not built in (head.mod). The ending item contains a
167  * pointer to the next array of saved eval_map items.
168  */
169 union trace_eval_map_item {
170 	struct trace_eval_map		map;
171 	struct trace_eval_map_head	head;
172 	struct trace_eval_map_tail	tail;
173 };
174 
175 static union trace_eval_map_item *trace_eval_maps;
176 #endif /* CONFIG_TRACE_EVAL_MAP_FILE */
177 
178 int tracing_set_tracer(struct trace_array *tr, const char *buf);
179 static void ftrace_trace_userstack(struct trace_array *tr,
180 				   struct trace_buffer *buffer,
181 				   unsigned int trace_ctx);
182 
183 #define MAX_TRACER_SIZE		100
184 static char bootup_tracer_buf[MAX_TRACER_SIZE] __initdata;
185 static char *default_bootup_tracer;
186 
187 static bool allocate_snapshot;
188 
189 static int __init set_cmdline_ftrace(char *str)
190 {
191 	strlcpy(bootup_tracer_buf, str, MAX_TRACER_SIZE);
192 	default_bootup_tracer = bootup_tracer_buf;
193 	/* We are using ftrace early, expand it */
194 	ring_buffer_expanded = true;
195 	return 1;
196 }
197 __setup("ftrace=", set_cmdline_ftrace);
198 
199 static int __init set_ftrace_dump_on_oops(char *str)
200 {
201 	if (*str++ != '=' || !*str || !strcmp("1", str)) {
202 		ftrace_dump_on_oops = DUMP_ALL;
203 		return 1;
204 	}
205 
206 	if (!strcmp("orig_cpu", str) || !strcmp("2", str)) {
207 		ftrace_dump_on_oops = DUMP_ORIG;
208                 return 1;
209         }
210 
211         return 0;
212 }
213 __setup("ftrace_dump_on_oops", set_ftrace_dump_on_oops);
214 
215 static int __init stop_trace_on_warning(char *str)
216 {
217 	if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
218 		__disable_trace_on_warning = 1;
219 	return 1;
220 }
221 __setup("traceoff_on_warning", stop_trace_on_warning);
222 
223 static int __init boot_alloc_snapshot(char *str)
224 {
225 	allocate_snapshot = true;
226 	/* We also need the main ring buffer expanded */
227 	ring_buffer_expanded = true;
228 	return 1;
229 }
230 __setup("alloc_snapshot", boot_alloc_snapshot);
231 
232 
233 static char trace_boot_options_buf[MAX_TRACER_SIZE] __initdata;
234 
235 static int __init set_trace_boot_options(char *str)
236 {
237 	strlcpy(trace_boot_options_buf, str, MAX_TRACER_SIZE);
238 	return 0;
239 }
240 __setup("trace_options=", set_trace_boot_options);
241 
242 static char trace_boot_clock_buf[MAX_TRACER_SIZE] __initdata;
243 static char *trace_boot_clock __initdata;
244 
245 static int __init set_trace_boot_clock(char *str)
246 {
247 	strlcpy(trace_boot_clock_buf, str, MAX_TRACER_SIZE);
248 	trace_boot_clock = trace_boot_clock_buf;
249 	return 0;
250 }
251 __setup("trace_clock=", set_trace_boot_clock);
252 
253 static int __init set_tracepoint_printk(char *str)
254 {
255 	if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
256 		tracepoint_printk = 1;
257 	return 1;
258 }
259 __setup("tp_printk", set_tracepoint_printk);
260 
261 static int __init set_tracepoint_printk_stop(char *str)
262 {
263 	tracepoint_printk_stop_on_boot = true;
264 	return 1;
265 }
266 __setup("tp_printk_stop_on_boot", set_tracepoint_printk_stop);
267 
268 unsigned long long ns2usecs(u64 nsec)
269 {
270 	nsec += 500;
271 	do_div(nsec, 1000);
272 	return nsec;
273 }
274 
275 static void
276 trace_process_export(struct trace_export *export,
277 	       struct ring_buffer_event *event, int flag)
278 {
279 	struct trace_entry *entry;
280 	unsigned int size = 0;
281 
282 	if (export->flags & flag) {
283 		entry = ring_buffer_event_data(event);
284 		size = ring_buffer_event_length(event);
285 		export->write(export, entry, size);
286 	}
287 }
288 
289 static DEFINE_MUTEX(ftrace_export_lock);
290 
291 static struct trace_export __rcu *ftrace_exports_list __read_mostly;
292 
293 static DEFINE_STATIC_KEY_FALSE(trace_function_exports_enabled);
294 static DEFINE_STATIC_KEY_FALSE(trace_event_exports_enabled);
295 static DEFINE_STATIC_KEY_FALSE(trace_marker_exports_enabled);
296 
297 static inline void ftrace_exports_enable(struct trace_export *export)
298 {
299 	if (export->flags & TRACE_EXPORT_FUNCTION)
300 		static_branch_inc(&trace_function_exports_enabled);
301 
302 	if (export->flags & TRACE_EXPORT_EVENT)
303 		static_branch_inc(&trace_event_exports_enabled);
304 
305 	if (export->flags & TRACE_EXPORT_MARKER)
306 		static_branch_inc(&trace_marker_exports_enabled);
307 }
308 
309 static inline void ftrace_exports_disable(struct trace_export *export)
310 {
311 	if (export->flags & TRACE_EXPORT_FUNCTION)
312 		static_branch_dec(&trace_function_exports_enabled);
313 
314 	if (export->flags & TRACE_EXPORT_EVENT)
315 		static_branch_dec(&trace_event_exports_enabled);
316 
317 	if (export->flags & TRACE_EXPORT_MARKER)
318 		static_branch_dec(&trace_marker_exports_enabled);
319 }
320 
321 static void ftrace_exports(struct ring_buffer_event *event, int flag)
322 {
323 	struct trace_export *export;
324 
325 	preempt_disable_notrace();
326 
327 	export = rcu_dereference_raw_check(ftrace_exports_list);
328 	while (export) {
329 		trace_process_export(export, event, flag);
330 		export = rcu_dereference_raw_check(export->next);
331 	}
332 
333 	preempt_enable_notrace();
334 }
335 
336 static inline void
337 add_trace_export(struct trace_export **list, struct trace_export *export)
338 {
339 	rcu_assign_pointer(export->next, *list);
340 	/*
341 	 * We are entering export into the list but another
342 	 * CPU might be walking that list. We need to make sure
343 	 * the export->next pointer is valid before another CPU sees
344 	 * the export pointer included into the list.
345 	 */
346 	rcu_assign_pointer(*list, export);
347 }
348 
349 static inline int
350 rm_trace_export(struct trace_export **list, struct trace_export *export)
351 {
352 	struct trace_export **p;
353 
354 	for (p = list; *p != NULL; p = &(*p)->next)
355 		if (*p == export)
356 			break;
357 
358 	if (*p != export)
359 		return -1;
360 
361 	rcu_assign_pointer(*p, (*p)->next);
362 
363 	return 0;
364 }
365 
366 static inline void
367 add_ftrace_export(struct trace_export **list, struct trace_export *export)
368 {
369 	ftrace_exports_enable(export);
370 
371 	add_trace_export(list, export);
372 }
373 
374 static inline int
375 rm_ftrace_export(struct trace_export **list, struct trace_export *export)
376 {
377 	int ret;
378 
379 	ret = rm_trace_export(list, export);
380 	ftrace_exports_disable(export);
381 
382 	return ret;
383 }
384 
385 int register_ftrace_export(struct trace_export *export)
386 {
387 	if (WARN_ON_ONCE(!export->write))
388 		return -1;
389 
390 	mutex_lock(&ftrace_export_lock);
391 
392 	add_ftrace_export(&ftrace_exports_list, export);
393 
394 	mutex_unlock(&ftrace_export_lock);
395 
396 	return 0;
397 }
398 EXPORT_SYMBOL_GPL(register_ftrace_export);
399 
400 int unregister_ftrace_export(struct trace_export *export)
401 {
402 	int ret;
403 
404 	mutex_lock(&ftrace_export_lock);
405 
406 	ret = rm_ftrace_export(&ftrace_exports_list, export);
407 
408 	mutex_unlock(&ftrace_export_lock);
409 
410 	return ret;
411 }
412 EXPORT_SYMBOL_GPL(unregister_ftrace_export);
413 
414 /* trace_flags holds trace_options default values */
415 #define TRACE_DEFAULT_FLAGS						\
416 	(FUNCTION_DEFAULT_FLAGS |					\
417 	 TRACE_ITER_PRINT_PARENT | TRACE_ITER_PRINTK |			\
418 	 TRACE_ITER_ANNOTATE | TRACE_ITER_CONTEXT_INFO |		\
419 	 TRACE_ITER_RECORD_CMD | TRACE_ITER_OVERWRITE |			\
420 	 TRACE_ITER_IRQ_INFO | TRACE_ITER_MARKERS |			\
421 	 TRACE_ITER_HASH_PTR)
422 
423 /* trace_options that are only supported by global_trace */
424 #define TOP_LEVEL_TRACE_FLAGS (TRACE_ITER_PRINTK |			\
425 	       TRACE_ITER_PRINTK_MSGONLY | TRACE_ITER_RECORD_CMD)
426 
427 /* trace_flags that are default zero for instances */
428 #define ZEROED_TRACE_FLAGS \
429 	(TRACE_ITER_EVENT_FORK | TRACE_ITER_FUNC_FORK)
430 
431 /*
432  * The global_trace is the descriptor that holds the top-level tracing
433  * buffers for the live tracing.
434  */
435 static struct trace_array global_trace = {
436 	.trace_flags = TRACE_DEFAULT_FLAGS,
437 };
438 
439 LIST_HEAD(ftrace_trace_arrays);
440 
441 int trace_array_get(struct trace_array *this_tr)
442 {
443 	struct trace_array *tr;
444 	int ret = -ENODEV;
445 
446 	mutex_lock(&trace_types_lock);
447 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
448 		if (tr == this_tr) {
449 			tr->ref++;
450 			ret = 0;
451 			break;
452 		}
453 	}
454 	mutex_unlock(&trace_types_lock);
455 
456 	return ret;
457 }
458 
459 static void __trace_array_put(struct trace_array *this_tr)
460 {
461 	WARN_ON(!this_tr->ref);
462 	this_tr->ref--;
463 }
464 
465 /**
466  * trace_array_put - Decrement the reference counter for this trace array.
467  * @this_tr : pointer to the trace array
468  *
469  * NOTE: Use this when we no longer need the trace array returned by
470  * trace_array_get_by_name(). This ensures the trace array can be later
471  * destroyed.
472  *
473  */
474 void trace_array_put(struct trace_array *this_tr)
475 {
476 	if (!this_tr)
477 		return;
478 
479 	mutex_lock(&trace_types_lock);
480 	__trace_array_put(this_tr);
481 	mutex_unlock(&trace_types_lock);
482 }
483 EXPORT_SYMBOL_GPL(trace_array_put);
484 
485 int tracing_check_open_get_tr(struct trace_array *tr)
486 {
487 	int ret;
488 
489 	ret = security_locked_down(LOCKDOWN_TRACEFS);
490 	if (ret)
491 		return ret;
492 
493 	if (tracing_disabled)
494 		return -ENODEV;
495 
496 	if (tr && trace_array_get(tr) < 0)
497 		return -ENODEV;
498 
499 	return 0;
500 }
501 
502 int call_filter_check_discard(struct trace_event_call *call, void *rec,
503 			      struct trace_buffer *buffer,
504 			      struct ring_buffer_event *event)
505 {
506 	if (unlikely(call->flags & TRACE_EVENT_FL_FILTERED) &&
507 	    !filter_match_preds(call->filter, rec)) {
508 		__trace_event_discard_commit(buffer, event);
509 		return 1;
510 	}
511 
512 	return 0;
513 }
514 
515 /**
516  * trace_find_filtered_pid - check if a pid exists in a filtered_pid list
517  * @filtered_pids: The list of pids to check
518  * @search_pid: The PID to find in @filtered_pids
519  *
520  * Returns true if @search_pid is found in @filtered_pids, and false otherwise.
521  */
522 bool
523 trace_find_filtered_pid(struct trace_pid_list *filtered_pids, pid_t search_pid)
524 {
525 	return trace_pid_list_is_set(filtered_pids, search_pid);
526 }
527 
528 /**
529  * trace_ignore_this_task - should a task be ignored for tracing
530  * @filtered_pids: The list of pids to check
531  * @filtered_no_pids: The list of pids not to be traced
532  * @task: The task that should be ignored if not filtered
533  *
534  * Checks if @task should be traced or not from @filtered_pids.
535  * Returns true if @task should *NOT* be traced.
536  * Returns false if @task should be traced.
537  */
538 bool
539 trace_ignore_this_task(struct trace_pid_list *filtered_pids,
540 		       struct trace_pid_list *filtered_no_pids,
541 		       struct task_struct *task)
542 {
543 	/*
544 	 * If filtered_no_pids is not empty, and the task's pid is listed
545 	 * in filtered_no_pids, then return true.
546 	 * Otherwise, if filtered_pids is empty, that means we can
547 	 * trace all tasks. If it has content, then only trace pids
548 	 * within filtered_pids.
549 	 */
550 
551 	return (filtered_pids &&
552 		!trace_find_filtered_pid(filtered_pids, task->pid)) ||
553 		(filtered_no_pids &&
554 		 trace_find_filtered_pid(filtered_no_pids, task->pid));
555 }
556 
557 /**
558  * trace_filter_add_remove_task - Add or remove a task from a pid_list
559  * @pid_list: The list to modify
560  * @self: The current task for fork or NULL for exit
561  * @task: The task to add or remove
562  *
563  * If adding a task, if @self is defined, the task is only added if @self
564  * is also included in @pid_list. This happens on fork and tasks should
565  * only be added when the parent is listed. If @self is NULL, then the
566  * @task pid will be removed from the list, which would happen on exit
567  * of a task.
568  */
569 void trace_filter_add_remove_task(struct trace_pid_list *pid_list,
570 				  struct task_struct *self,
571 				  struct task_struct *task)
572 {
573 	if (!pid_list)
574 		return;
575 
576 	/* For forks, we only add if the forking task is listed */
577 	if (self) {
578 		if (!trace_find_filtered_pid(pid_list, self->pid))
579 			return;
580 	}
581 
582 	/* "self" is set for forks, and NULL for exits */
583 	if (self)
584 		trace_pid_list_set(pid_list, task->pid);
585 	else
586 		trace_pid_list_clear(pid_list, task->pid);
587 }
588 
589 /**
590  * trace_pid_next - Used for seq_file to get to the next pid of a pid_list
591  * @pid_list: The pid list to show
592  * @v: The last pid that was shown (+1 the actual pid to let zero be displayed)
593  * @pos: The position of the file
594  *
595  * This is used by the seq_file "next" operation to iterate the pids
596  * listed in a trace_pid_list structure.
597  *
598  * Returns the pid+1 as we want to display pid of zero, but NULL would
599  * stop the iteration.
600  */
601 void *trace_pid_next(struct trace_pid_list *pid_list, void *v, loff_t *pos)
602 {
603 	long pid = (unsigned long)v;
604 	unsigned int next;
605 
606 	(*pos)++;
607 
608 	/* pid already is +1 of the actual previous bit */
609 	if (trace_pid_list_next(pid_list, pid, &next) < 0)
610 		return NULL;
611 
612 	pid = next;
613 
614 	/* Return pid + 1 to allow zero to be represented */
615 	return (void *)(pid + 1);
616 }
617 
618 /**
619  * trace_pid_start - Used for seq_file to start reading pid lists
620  * @pid_list: The pid list to show
621  * @pos: The position of the file
622  *
623  * This is used by seq_file "start" operation to start the iteration
624  * of listing pids.
625  *
626  * Returns the pid+1 as we want to display pid of zero, but NULL would
627  * stop the iteration.
628  */
629 void *trace_pid_start(struct trace_pid_list *pid_list, loff_t *pos)
630 {
631 	unsigned long pid;
632 	unsigned int first;
633 	loff_t l = 0;
634 
635 	if (trace_pid_list_first(pid_list, &first) < 0)
636 		return NULL;
637 
638 	pid = first;
639 
640 	/* Return pid + 1 so that zero can be the exit value */
641 	for (pid++; pid && l < *pos;
642 	     pid = (unsigned long)trace_pid_next(pid_list, (void *)pid, &l))
643 		;
644 	return (void *)pid;
645 }
646 
647 /**
648  * trace_pid_show - show the current pid in seq_file processing
649  * @m: The seq_file structure to write into
650  * @v: A void pointer of the pid (+1) value to display
651  *
652  * Can be directly used by seq_file operations to display the current
653  * pid value.
654  */
655 int trace_pid_show(struct seq_file *m, void *v)
656 {
657 	unsigned long pid = (unsigned long)v - 1;
658 
659 	seq_printf(m, "%lu\n", pid);
660 	return 0;
661 }
662 
663 /* 128 should be much more than enough */
664 #define PID_BUF_SIZE		127
665 
666 int trace_pid_write(struct trace_pid_list *filtered_pids,
667 		    struct trace_pid_list **new_pid_list,
668 		    const char __user *ubuf, size_t cnt)
669 {
670 	struct trace_pid_list *pid_list;
671 	struct trace_parser parser;
672 	unsigned long val;
673 	int nr_pids = 0;
674 	ssize_t read = 0;
675 	ssize_t ret;
676 	loff_t pos;
677 	pid_t pid;
678 
679 	if (trace_parser_get_init(&parser, PID_BUF_SIZE + 1))
680 		return -ENOMEM;
681 
682 	/*
683 	 * Always recreate a new array. The write is an all or nothing
684 	 * operation. Always create a new array when adding new pids by
685 	 * the user. If the operation fails, then the current list is
686 	 * not modified.
687 	 */
688 	pid_list = trace_pid_list_alloc();
689 	if (!pid_list) {
690 		trace_parser_put(&parser);
691 		return -ENOMEM;
692 	}
693 
694 	if (filtered_pids) {
695 		/* copy the current bits to the new max */
696 		ret = trace_pid_list_first(filtered_pids, &pid);
697 		while (!ret) {
698 			trace_pid_list_set(pid_list, pid);
699 			ret = trace_pid_list_next(filtered_pids, pid + 1, &pid);
700 			nr_pids++;
701 		}
702 	}
703 
704 	ret = 0;
705 	while (cnt > 0) {
706 
707 		pos = 0;
708 
709 		ret = trace_get_user(&parser, ubuf, cnt, &pos);
710 		if (ret < 0 || !trace_parser_loaded(&parser))
711 			break;
712 
713 		read += ret;
714 		ubuf += ret;
715 		cnt -= ret;
716 
717 		ret = -EINVAL;
718 		if (kstrtoul(parser.buffer, 0, &val))
719 			break;
720 
721 		pid = (pid_t)val;
722 
723 		if (trace_pid_list_set(pid_list, pid) < 0) {
724 			ret = -1;
725 			break;
726 		}
727 		nr_pids++;
728 
729 		trace_parser_clear(&parser);
730 		ret = 0;
731 	}
732 	trace_parser_put(&parser);
733 
734 	if (ret < 0) {
735 		trace_pid_list_free(pid_list);
736 		return ret;
737 	}
738 
739 	if (!nr_pids) {
740 		/* Cleared the list of pids */
741 		trace_pid_list_free(pid_list);
742 		read = ret;
743 		pid_list = NULL;
744 	}
745 
746 	*new_pid_list = pid_list;
747 
748 	return read;
749 }
750 
751 static u64 buffer_ftrace_now(struct array_buffer *buf, int cpu)
752 {
753 	u64 ts;
754 
755 	/* Early boot up does not have a buffer yet */
756 	if (!buf->buffer)
757 		return trace_clock_local();
758 
759 	ts = ring_buffer_time_stamp(buf->buffer);
760 	ring_buffer_normalize_time_stamp(buf->buffer, cpu, &ts);
761 
762 	return ts;
763 }
764 
765 u64 ftrace_now(int cpu)
766 {
767 	return buffer_ftrace_now(&global_trace.array_buffer, cpu);
768 }
769 
770 /**
771  * tracing_is_enabled - Show if global_trace has been enabled
772  *
773  * Shows if the global trace has been enabled or not. It uses the
774  * mirror flag "buffer_disabled" to be used in fast paths such as for
775  * the irqsoff tracer. But it may be inaccurate due to races. If you
776  * need to know the accurate state, use tracing_is_on() which is a little
777  * slower, but accurate.
778  */
779 int tracing_is_enabled(void)
780 {
781 	/*
782 	 * For quick access (irqsoff uses this in fast path), just
783 	 * return the mirror variable of the state of the ring buffer.
784 	 * It's a little racy, but we don't really care.
785 	 */
786 	smp_rmb();
787 	return !global_trace.buffer_disabled;
788 }
789 
790 /*
791  * trace_buf_size is the size in bytes that is allocated
792  * for a buffer. Note, the number of bytes is always rounded
793  * to page size.
794  *
795  * This number is purposely set to a low number of 16384.
796  * If the dump on oops happens, it will be much appreciated
797  * to not have to wait for all that output. Anyway this can be
798  * boot time and run time configurable.
799  */
800 #define TRACE_BUF_SIZE_DEFAULT	1441792UL /* 16384 * 88 (sizeof(entry)) */
801 
802 static unsigned long		trace_buf_size = TRACE_BUF_SIZE_DEFAULT;
803 
804 /* trace_types holds a link list of available tracers. */
805 static struct tracer		*trace_types __read_mostly;
806 
807 /*
808  * trace_types_lock is used to protect the trace_types list.
809  */
810 DEFINE_MUTEX(trace_types_lock);
811 
812 /*
813  * serialize the access of the ring buffer
814  *
815  * ring buffer serializes readers, but it is low level protection.
816  * The validity of the events (which returns by ring_buffer_peek() ..etc)
817  * are not protected by ring buffer.
818  *
819  * The content of events may become garbage if we allow other process consumes
820  * these events concurrently:
821  *   A) the page of the consumed events may become a normal page
822  *      (not reader page) in ring buffer, and this page will be rewritten
823  *      by events producer.
824  *   B) The page of the consumed events may become a page for splice_read,
825  *      and this page will be returned to system.
826  *
827  * These primitives allow multi process access to different cpu ring buffer
828  * concurrently.
829  *
830  * These primitives don't distinguish read-only and read-consume access.
831  * Multi read-only access are also serialized.
832  */
833 
834 #ifdef CONFIG_SMP
835 static DECLARE_RWSEM(all_cpu_access_lock);
836 static DEFINE_PER_CPU(struct mutex, cpu_access_lock);
837 
838 static inline void trace_access_lock(int cpu)
839 {
840 	if (cpu == RING_BUFFER_ALL_CPUS) {
841 		/* gain it for accessing the whole ring buffer. */
842 		down_write(&all_cpu_access_lock);
843 	} else {
844 		/* gain it for accessing a cpu ring buffer. */
845 
846 		/* Firstly block other trace_access_lock(RING_BUFFER_ALL_CPUS). */
847 		down_read(&all_cpu_access_lock);
848 
849 		/* Secondly block other access to this @cpu ring buffer. */
850 		mutex_lock(&per_cpu(cpu_access_lock, cpu));
851 	}
852 }
853 
854 static inline void trace_access_unlock(int cpu)
855 {
856 	if (cpu == RING_BUFFER_ALL_CPUS) {
857 		up_write(&all_cpu_access_lock);
858 	} else {
859 		mutex_unlock(&per_cpu(cpu_access_lock, cpu));
860 		up_read(&all_cpu_access_lock);
861 	}
862 }
863 
864 static inline void trace_access_lock_init(void)
865 {
866 	int cpu;
867 
868 	for_each_possible_cpu(cpu)
869 		mutex_init(&per_cpu(cpu_access_lock, cpu));
870 }
871 
872 #else
873 
874 static DEFINE_MUTEX(access_lock);
875 
876 static inline void trace_access_lock(int cpu)
877 {
878 	(void)cpu;
879 	mutex_lock(&access_lock);
880 }
881 
882 static inline void trace_access_unlock(int cpu)
883 {
884 	(void)cpu;
885 	mutex_unlock(&access_lock);
886 }
887 
888 static inline void trace_access_lock_init(void)
889 {
890 }
891 
892 #endif
893 
894 #ifdef CONFIG_STACKTRACE
895 static void __ftrace_trace_stack(struct trace_buffer *buffer,
896 				 unsigned int trace_ctx,
897 				 int skip, struct pt_regs *regs);
898 static inline void ftrace_trace_stack(struct trace_array *tr,
899 				      struct trace_buffer *buffer,
900 				      unsigned int trace_ctx,
901 				      int skip, struct pt_regs *regs);
902 
903 #else
904 static inline void __ftrace_trace_stack(struct trace_buffer *buffer,
905 					unsigned int trace_ctx,
906 					int skip, struct pt_regs *regs)
907 {
908 }
909 static inline void ftrace_trace_stack(struct trace_array *tr,
910 				      struct trace_buffer *buffer,
911 				      unsigned long trace_ctx,
912 				      int skip, struct pt_regs *regs)
913 {
914 }
915 
916 #endif
917 
918 static __always_inline void
919 trace_event_setup(struct ring_buffer_event *event,
920 		  int type, unsigned int trace_ctx)
921 {
922 	struct trace_entry *ent = ring_buffer_event_data(event);
923 
924 	tracing_generic_entry_update(ent, type, trace_ctx);
925 }
926 
927 static __always_inline struct ring_buffer_event *
928 __trace_buffer_lock_reserve(struct trace_buffer *buffer,
929 			  int type,
930 			  unsigned long len,
931 			  unsigned int trace_ctx)
932 {
933 	struct ring_buffer_event *event;
934 
935 	event = ring_buffer_lock_reserve(buffer, len);
936 	if (event != NULL)
937 		trace_event_setup(event, type, trace_ctx);
938 
939 	return event;
940 }
941 
942 void tracer_tracing_on(struct trace_array *tr)
943 {
944 	if (tr->array_buffer.buffer)
945 		ring_buffer_record_on(tr->array_buffer.buffer);
946 	/*
947 	 * This flag is looked at when buffers haven't been allocated
948 	 * yet, or by some tracers (like irqsoff), that just want to
949 	 * know if the ring buffer has been disabled, but it can handle
950 	 * races of where it gets disabled but we still do a record.
951 	 * As the check is in the fast path of the tracers, it is more
952 	 * important to be fast than accurate.
953 	 */
954 	tr->buffer_disabled = 0;
955 	/* Make the flag seen by readers */
956 	smp_wmb();
957 }
958 
959 /**
960  * tracing_on - enable tracing buffers
961  *
962  * This function enables tracing buffers that may have been
963  * disabled with tracing_off.
964  */
965 void tracing_on(void)
966 {
967 	tracer_tracing_on(&global_trace);
968 }
969 EXPORT_SYMBOL_GPL(tracing_on);
970 
971 
972 static __always_inline void
973 __buffer_unlock_commit(struct trace_buffer *buffer, struct ring_buffer_event *event)
974 {
975 	__this_cpu_write(trace_taskinfo_save, true);
976 
977 	/* If this is the temp buffer, we need to commit fully */
978 	if (this_cpu_read(trace_buffered_event) == event) {
979 		/* Length is in event->array[0] */
980 		ring_buffer_write(buffer, event->array[0], &event->array[1]);
981 		/* Release the temp buffer */
982 		this_cpu_dec(trace_buffered_event_cnt);
983 	} else
984 		ring_buffer_unlock_commit(buffer, event);
985 }
986 
987 /**
988  * __trace_puts - write a constant string into the trace buffer.
989  * @ip:	   The address of the caller
990  * @str:   The constant string to write
991  * @size:  The size of the string.
992  */
993 int __trace_puts(unsigned long ip, const char *str, int size)
994 {
995 	struct ring_buffer_event *event;
996 	struct trace_buffer *buffer;
997 	struct print_entry *entry;
998 	unsigned int trace_ctx;
999 	int alloc;
1000 
1001 	if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
1002 		return 0;
1003 
1004 	if (unlikely(tracing_selftest_running || tracing_disabled))
1005 		return 0;
1006 
1007 	alloc = sizeof(*entry) + size + 2; /* possible \n added */
1008 
1009 	trace_ctx = tracing_gen_ctx();
1010 	buffer = global_trace.array_buffer.buffer;
1011 	ring_buffer_nest_start(buffer);
1012 	event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, alloc,
1013 					    trace_ctx);
1014 	if (!event) {
1015 		size = 0;
1016 		goto out;
1017 	}
1018 
1019 	entry = ring_buffer_event_data(event);
1020 	entry->ip = ip;
1021 
1022 	memcpy(&entry->buf, str, size);
1023 
1024 	/* Add a newline if necessary */
1025 	if (entry->buf[size - 1] != '\n') {
1026 		entry->buf[size] = '\n';
1027 		entry->buf[size + 1] = '\0';
1028 	} else
1029 		entry->buf[size] = '\0';
1030 
1031 	__buffer_unlock_commit(buffer, event);
1032 	ftrace_trace_stack(&global_trace, buffer, trace_ctx, 4, NULL);
1033  out:
1034 	ring_buffer_nest_end(buffer);
1035 	return size;
1036 }
1037 EXPORT_SYMBOL_GPL(__trace_puts);
1038 
1039 /**
1040  * __trace_bputs - write the pointer to a constant string into trace buffer
1041  * @ip:	   The address of the caller
1042  * @str:   The constant string to write to the buffer to
1043  */
1044 int __trace_bputs(unsigned long ip, const char *str)
1045 {
1046 	struct ring_buffer_event *event;
1047 	struct trace_buffer *buffer;
1048 	struct bputs_entry *entry;
1049 	unsigned int trace_ctx;
1050 	int size = sizeof(struct bputs_entry);
1051 	int ret = 0;
1052 
1053 	if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
1054 		return 0;
1055 
1056 	if (unlikely(tracing_selftest_running || tracing_disabled))
1057 		return 0;
1058 
1059 	trace_ctx = tracing_gen_ctx();
1060 	buffer = global_trace.array_buffer.buffer;
1061 
1062 	ring_buffer_nest_start(buffer);
1063 	event = __trace_buffer_lock_reserve(buffer, TRACE_BPUTS, size,
1064 					    trace_ctx);
1065 	if (!event)
1066 		goto out;
1067 
1068 	entry = ring_buffer_event_data(event);
1069 	entry->ip			= ip;
1070 	entry->str			= str;
1071 
1072 	__buffer_unlock_commit(buffer, event);
1073 	ftrace_trace_stack(&global_trace, buffer, trace_ctx, 4, NULL);
1074 
1075 	ret = 1;
1076  out:
1077 	ring_buffer_nest_end(buffer);
1078 	return ret;
1079 }
1080 EXPORT_SYMBOL_GPL(__trace_bputs);
1081 
1082 #ifdef CONFIG_TRACER_SNAPSHOT
1083 static void tracing_snapshot_instance_cond(struct trace_array *tr,
1084 					   void *cond_data)
1085 {
1086 	struct tracer *tracer = tr->current_trace;
1087 	unsigned long flags;
1088 
1089 	if (in_nmi()) {
1090 		internal_trace_puts("*** SNAPSHOT CALLED FROM NMI CONTEXT ***\n");
1091 		internal_trace_puts("*** snapshot is being ignored        ***\n");
1092 		return;
1093 	}
1094 
1095 	if (!tr->allocated_snapshot) {
1096 		internal_trace_puts("*** SNAPSHOT NOT ALLOCATED ***\n");
1097 		internal_trace_puts("*** stopping trace here!   ***\n");
1098 		tracing_off();
1099 		return;
1100 	}
1101 
1102 	/* Note, snapshot can not be used when the tracer uses it */
1103 	if (tracer->use_max_tr) {
1104 		internal_trace_puts("*** LATENCY TRACER ACTIVE ***\n");
1105 		internal_trace_puts("*** Can not use snapshot (sorry) ***\n");
1106 		return;
1107 	}
1108 
1109 	local_irq_save(flags);
1110 	update_max_tr(tr, current, smp_processor_id(), cond_data);
1111 	local_irq_restore(flags);
1112 }
1113 
1114 void tracing_snapshot_instance(struct trace_array *tr)
1115 {
1116 	tracing_snapshot_instance_cond(tr, NULL);
1117 }
1118 
1119 /**
1120  * tracing_snapshot - take a snapshot of the current buffer.
1121  *
1122  * This causes a swap between the snapshot buffer and the current live
1123  * tracing buffer. You can use this to take snapshots of the live
1124  * trace when some condition is triggered, but continue to trace.
1125  *
1126  * Note, make sure to allocate the snapshot with either
1127  * a tracing_snapshot_alloc(), or by doing it manually
1128  * with: echo 1 > /sys/kernel/debug/tracing/snapshot
1129  *
1130  * If the snapshot buffer is not allocated, it will stop tracing.
1131  * Basically making a permanent snapshot.
1132  */
1133 void tracing_snapshot(void)
1134 {
1135 	struct trace_array *tr = &global_trace;
1136 
1137 	tracing_snapshot_instance(tr);
1138 }
1139 EXPORT_SYMBOL_GPL(tracing_snapshot);
1140 
1141 /**
1142  * tracing_snapshot_cond - conditionally take a snapshot of the current buffer.
1143  * @tr:		The tracing instance to snapshot
1144  * @cond_data:	The data to be tested conditionally, and possibly saved
1145  *
1146  * This is the same as tracing_snapshot() except that the snapshot is
1147  * conditional - the snapshot will only happen if the
1148  * cond_snapshot.update() implementation receiving the cond_data
1149  * returns true, which means that the trace array's cond_snapshot
1150  * update() operation used the cond_data to determine whether the
1151  * snapshot should be taken, and if it was, presumably saved it along
1152  * with the snapshot.
1153  */
1154 void tracing_snapshot_cond(struct trace_array *tr, void *cond_data)
1155 {
1156 	tracing_snapshot_instance_cond(tr, cond_data);
1157 }
1158 EXPORT_SYMBOL_GPL(tracing_snapshot_cond);
1159 
1160 /**
1161  * tracing_snapshot_cond_data - get the user data associated with a snapshot
1162  * @tr:		The tracing instance
1163  *
1164  * When the user enables a conditional snapshot using
1165  * tracing_snapshot_cond_enable(), the user-defined cond_data is saved
1166  * with the snapshot.  This accessor is used to retrieve it.
1167  *
1168  * Should not be called from cond_snapshot.update(), since it takes
1169  * the tr->max_lock lock, which the code calling
1170  * cond_snapshot.update() has already done.
1171  *
1172  * Returns the cond_data associated with the trace array's snapshot.
1173  */
1174 void *tracing_cond_snapshot_data(struct trace_array *tr)
1175 {
1176 	void *cond_data = NULL;
1177 
1178 	arch_spin_lock(&tr->max_lock);
1179 
1180 	if (tr->cond_snapshot)
1181 		cond_data = tr->cond_snapshot->cond_data;
1182 
1183 	arch_spin_unlock(&tr->max_lock);
1184 
1185 	return cond_data;
1186 }
1187 EXPORT_SYMBOL_GPL(tracing_cond_snapshot_data);
1188 
1189 static int resize_buffer_duplicate_size(struct array_buffer *trace_buf,
1190 					struct array_buffer *size_buf, int cpu_id);
1191 static void set_buffer_entries(struct array_buffer *buf, unsigned long val);
1192 
1193 int tracing_alloc_snapshot_instance(struct trace_array *tr)
1194 {
1195 	int ret;
1196 
1197 	if (!tr->allocated_snapshot) {
1198 
1199 		/* allocate spare buffer */
1200 		ret = resize_buffer_duplicate_size(&tr->max_buffer,
1201 				   &tr->array_buffer, RING_BUFFER_ALL_CPUS);
1202 		if (ret < 0)
1203 			return ret;
1204 
1205 		tr->allocated_snapshot = true;
1206 	}
1207 
1208 	return 0;
1209 }
1210 
1211 static void free_snapshot(struct trace_array *tr)
1212 {
1213 	/*
1214 	 * We don't free the ring buffer. instead, resize it because
1215 	 * The max_tr ring buffer has some state (e.g. ring->clock) and
1216 	 * we want preserve it.
1217 	 */
1218 	ring_buffer_resize(tr->max_buffer.buffer, 1, RING_BUFFER_ALL_CPUS);
1219 	set_buffer_entries(&tr->max_buffer, 1);
1220 	tracing_reset_online_cpus(&tr->max_buffer);
1221 	tr->allocated_snapshot = false;
1222 }
1223 
1224 /**
1225  * tracing_alloc_snapshot - allocate snapshot buffer.
1226  *
1227  * This only allocates the snapshot buffer if it isn't already
1228  * allocated - it doesn't also take a snapshot.
1229  *
1230  * This is meant to be used in cases where the snapshot buffer needs
1231  * to be set up for events that can't sleep but need to be able to
1232  * trigger a snapshot.
1233  */
1234 int tracing_alloc_snapshot(void)
1235 {
1236 	struct trace_array *tr = &global_trace;
1237 	int ret;
1238 
1239 	ret = tracing_alloc_snapshot_instance(tr);
1240 	WARN_ON(ret < 0);
1241 
1242 	return ret;
1243 }
1244 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
1245 
1246 /**
1247  * tracing_snapshot_alloc - allocate and take a snapshot of the current buffer.
1248  *
1249  * This is similar to tracing_snapshot(), but it will allocate the
1250  * snapshot buffer if it isn't already allocated. Use this only
1251  * where it is safe to sleep, as the allocation may sleep.
1252  *
1253  * This causes a swap between the snapshot buffer and the current live
1254  * tracing buffer. You can use this to take snapshots of the live
1255  * trace when some condition is triggered, but continue to trace.
1256  */
1257 void tracing_snapshot_alloc(void)
1258 {
1259 	int ret;
1260 
1261 	ret = tracing_alloc_snapshot();
1262 	if (ret < 0)
1263 		return;
1264 
1265 	tracing_snapshot();
1266 }
1267 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
1268 
1269 /**
1270  * tracing_snapshot_cond_enable - enable conditional snapshot for an instance
1271  * @tr:		The tracing instance
1272  * @cond_data:	User data to associate with the snapshot
1273  * @update:	Implementation of the cond_snapshot update function
1274  *
1275  * Check whether the conditional snapshot for the given instance has
1276  * already been enabled, or if the current tracer is already using a
1277  * snapshot; if so, return -EBUSY, else create a cond_snapshot and
1278  * save the cond_data and update function inside.
1279  *
1280  * Returns 0 if successful, error otherwise.
1281  */
1282 int tracing_snapshot_cond_enable(struct trace_array *tr, void *cond_data,
1283 				 cond_update_fn_t update)
1284 {
1285 	struct cond_snapshot *cond_snapshot;
1286 	int ret = 0;
1287 
1288 	cond_snapshot = kzalloc(sizeof(*cond_snapshot), GFP_KERNEL);
1289 	if (!cond_snapshot)
1290 		return -ENOMEM;
1291 
1292 	cond_snapshot->cond_data = cond_data;
1293 	cond_snapshot->update = update;
1294 
1295 	mutex_lock(&trace_types_lock);
1296 
1297 	ret = tracing_alloc_snapshot_instance(tr);
1298 	if (ret)
1299 		goto fail_unlock;
1300 
1301 	if (tr->current_trace->use_max_tr) {
1302 		ret = -EBUSY;
1303 		goto fail_unlock;
1304 	}
1305 
1306 	/*
1307 	 * The cond_snapshot can only change to NULL without the
1308 	 * trace_types_lock. We don't care if we race with it going
1309 	 * to NULL, but we want to make sure that it's not set to
1310 	 * something other than NULL when we get here, which we can
1311 	 * do safely with only holding the trace_types_lock and not
1312 	 * having to take the max_lock.
1313 	 */
1314 	if (tr->cond_snapshot) {
1315 		ret = -EBUSY;
1316 		goto fail_unlock;
1317 	}
1318 
1319 	arch_spin_lock(&tr->max_lock);
1320 	tr->cond_snapshot = cond_snapshot;
1321 	arch_spin_unlock(&tr->max_lock);
1322 
1323 	mutex_unlock(&trace_types_lock);
1324 
1325 	return ret;
1326 
1327  fail_unlock:
1328 	mutex_unlock(&trace_types_lock);
1329 	kfree(cond_snapshot);
1330 	return ret;
1331 }
1332 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_enable);
1333 
1334 /**
1335  * tracing_snapshot_cond_disable - disable conditional snapshot for an instance
1336  * @tr:		The tracing instance
1337  *
1338  * Check whether the conditional snapshot for the given instance is
1339  * enabled; if so, free the cond_snapshot associated with it,
1340  * otherwise return -EINVAL.
1341  *
1342  * Returns 0 if successful, error otherwise.
1343  */
1344 int tracing_snapshot_cond_disable(struct trace_array *tr)
1345 {
1346 	int ret = 0;
1347 
1348 	arch_spin_lock(&tr->max_lock);
1349 
1350 	if (!tr->cond_snapshot)
1351 		ret = -EINVAL;
1352 	else {
1353 		kfree(tr->cond_snapshot);
1354 		tr->cond_snapshot = NULL;
1355 	}
1356 
1357 	arch_spin_unlock(&tr->max_lock);
1358 
1359 	return ret;
1360 }
1361 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_disable);
1362 #else
1363 void tracing_snapshot(void)
1364 {
1365 	WARN_ONCE(1, "Snapshot feature not enabled, but internal snapshot used");
1366 }
1367 EXPORT_SYMBOL_GPL(tracing_snapshot);
1368 void tracing_snapshot_cond(struct trace_array *tr, void *cond_data)
1369 {
1370 	WARN_ONCE(1, "Snapshot feature not enabled, but internal conditional snapshot used");
1371 }
1372 EXPORT_SYMBOL_GPL(tracing_snapshot_cond);
1373 int tracing_alloc_snapshot(void)
1374 {
1375 	WARN_ONCE(1, "Snapshot feature not enabled, but snapshot allocation used");
1376 	return -ENODEV;
1377 }
1378 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
1379 void tracing_snapshot_alloc(void)
1380 {
1381 	/* Give warning */
1382 	tracing_snapshot();
1383 }
1384 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
1385 void *tracing_cond_snapshot_data(struct trace_array *tr)
1386 {
1387 	return NULL;
1388 }
1389 EXPORT_SYMBOL_GPL(tracing_cond_snapshot_data);
1390 int tracing_snapshot_cond_enable(struct trace_array *tr, void *cond_data, cond_update_fn_t update)
1391 {
1392 	return -ENODEV;
1393 }
1394 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_enable);
1395 int tracing_snapshot_cond_disable(struct trace_array *tr)
1396 {
1397 	return false;
1398 }
1399 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_disable);
1400 #endif /* CONFIG_TRACER_SNAPSHOT */
1401 
1402 void tracer_tracing_off(struct trace_array *tr)
1403 {
1404 	if (tr->array_buffer.buffer)
1405 		ring_buffer_record_off(tr->array_buffer.buffer);
1406 	/*
1407 	 * This flag is looked at when buffers haven't been allocated
1408 	 * yet, or by some tracers (like irqsoff), that just want to
1409 	 * know if the ring buffer has been disabled, but it can handle
1410 	 * races of where it gets disabled but we still do a record.
1411 	 * As the check is in the fast path of the tracers, it is more
1412 	 * important to be fast than accurate.
1413 	 */
1414 	tr->buffer_disabled = 1;
1415 	/* Make the flag seen by readers */
1416 	smp_wmb();
1417 }
1418 
1419 /**
1420  * tracing_off - turn off tracing buffers
1421  *
1422  * This function stops the tracing buffers from recording data.
1423  * It does not disable any overhead the tracers themselves may
1424  * be causing. This function simply causes all recording to
1425  * the ring buffers to fail.
1426  */
1427 void tracing_off(void)
1428 {
1429 	tracer_tracing_off(&global_trace);
1430 }
1431 EXPORT_SYMBOL_GPL(tracing_off);
1432 
1433 void disable_trace_on_warning(void)
1434 {
1435 	if (__disable_trace_on_warning) {
1436 		trace_array_printk_buf(global_trace.array_buffer.buffer, _THIS_IP_,
1437 			"Disabling tracing due to warning\n");
1438 		tracing_off();
1439 	}
1440 }
1441 
1442 /**
1443  * tracer_tracing_is_on - show real state of ring buffer enabled
1444  * @tr : the trace array to know if ring buffer is enabled
1445  *
1446  * Shows real state of the ring buffer if it is enabled or not.
1447  */
1448 bool tracer_tracing_is_on(struct trace_array *tr)
1449 {
1450 	if (tr->array_buffer.buffer)
1451 		return ring_buffer_record_is_on(tr->array_buffer.buffer);
1452 	return !tr->buffer_disabled;
1453 }
1454 
1455 /**
1456  * tracing_is_on - show state of ring buffers enabled
1457  */
1458 int tracing_is_on(void)
1459 {
1460 	return tracer_tracing_is_on(&global_trace);
1461 }
1462 EXPORT_SYMBOL_GPL(tracing_is_on);
1463 
1464 static int __init set_buf_size(char *str)
1465 {
1466 	unsigned long buf_size;
1467 
1468 	if (!str)
1469 		return 0;
1470 	buf_size = memparse(str, &str);
1471 	/* nr_entries can not be zero */
1472 	if (buf_size == 0)
1473 		return 0;
1474 	trace_buf_size = buf_size;
1475 	return 1;
1476 }
1477 __setup("trace_buf_size=", set_buf_size);
1478 
1479 static int __init set_tracing_thresh(char *str)
1480 {
1481 	unsigned long threshold;
1482 	int ret;
1483 
1484 	if (!str)
1485 		return 0;
1486 	ret = kstrtoul(str, 0, &threshold);
1487 	if (ret < 0)
1488 		return 0;
1489 	tracing_thresh = threshold * 1000;
1490 	return 1;
1491 }
1492 __setup("tracing_thresh=", set_tracing_thresh);
1493 
1494 unsigned long nsecs_to_usecs(unsigned long nsecs)
1495 {
1496 	return nsecs / 1000;
1497 }
1498 
1499 /*
1500  * TRACE_FLAGS is defined as a tuple matching bit masks with strings.
1501  * It uses C(a, b) where 'a' is the eval (enum) name and 'b' is the string that
1502  * matches it. By defining "C(a, b) b", TRACE_FLAGS becomes a list
1503  * of strings in the order that the evals (enum) were defined.
1504  */
1505 #undef C
1506 #define C(a, b) b
1507 
1508 /* These must match the bit positions in trace_iterator_flags */
1509 static const char *trace_options[] = {
1510 	TRACE_FLAGS
1511 	NULL
1512 };
1513 
1514 static struct {
1515 	u64 (*func)(void);
1516 	const char *name;
1517 	int in_ns;		/* is this clock in nanoseconds? */
1518 } trace_clocks[] = {
1519 	{ trace_clock_local,		"local",	1 },
1520 	{ trace_clock_global,		"global",	1 },
1521 	{ trace_clock_counter,		"counter",	0 },
1522 	{ trace_clock_jiffies,		"uptime",	0 },
1523 	{ trace_clock,			"perf",		1 },
1524 	{ ktime_get_mono_fast_ns,	"mono",		1 },
1525 	{ ktime_get_raw_fast_ns,	"mono_raw",	1 },
1526 	{ ktime_get_boot_fast_ns,	"boot",		1 },
1527 	ARCH_TRACE_CLOCKS
1528 };
1529 
1530 bool trace_clock_in_ns(struct trace_array *tr)
1531 {
1532 	if (trace_clocks[tr->clock_id].in_ns)
1533 		return true;
1534 
1535 	return false;
1536 }
1537 
1538 /*
1539  * trace_parser_get_init - gets the buffer for trace parser
1540  */
1541 int trace_parser_get_init(struct trace_parser *parser, int size)
1542 {
1543 	memset(parser, 0, sizeof(*parser));
1544 
1545 	parser->buffer = kmalloc(size, GFP_KERNEL);
1546 	if (!parser->buffer)
1547 		return 1;
1548 
1549 	parser->size = size;
1550 	return 0;
1551 }
1552 
1553 /*
1554  * trace_parser_put - frees the buffer for trace parser
1555  */
1556 void trace_parser_put(struct trace_parser *parser)
1557 {
1558 	kfree(parser->buffer);
1559 	parser->buffer = NULL;
1560 }
1561 
1562 /*
1563  * trace_get_user - reads the user input string separated by  space
1564  * (matched by isspace(ch))
1565  *
1566  * For each string found the 'struct trace_parser' is updated,
1567  * and the function returns.
1568  *
1569  * Returns number of bytes read.
1570  *
1571  * See kernel/trace/trace.h for 'struct trace_parser' details.
1572  */
1573 int trace_get_user(struct trace_parser *parser, const char __user *ubuf,
1574 	size_t cnt, loff_t *ppos)
1575 {
1576 	char ch;
1577 	size_t read = 0;
1578 	ssize_t ret;
1579 
1580 	if (!*ppos)
1581 		trace_parser_clear(parser);
1582 
1583 	ret = get_user(ch, ubuf++);
1584 	if (ret)
1585 		goto out;
1586 
1587 	read++;
1588 	cnt--;
1589 
1590 	/*
1591 	 * The parser is not finished with the last write,
1592 	 * continue reading the user input without skipping spaces.
1593 	 */
1594 	if (!parser->cont) {
1595 		/* skip white space */
1596 		while (cnt && isspace(ch)) {
1597 			ret = get_user(ch, ubuf++);
1598 			if (ret)
1599 				goto out;
1600 			read++;
1601 			cnt--;
1602 		}
1603 
1604 		parser->idx = 0;
1605 
1606 		/* only spaces were written */
1607 		if (isspace(ch) || !ch) {
1608 			*ppos += read;
1609 			ret = read;
1610 			goto out;
1611 		}
1612 	}
1613 
1614 	/* read the non-space input */
1615 	while (cnt && !isspace(ch) && ch) {
1616 		if (parser->idx < parser->size - 1)
1617 			parser->buffer[parser->idx++] = ch;
1618 		else {
1619 			ret = -EINVAL;
1620 			goto out;
1621 		}
1622 		ret = get_user(ch, ubuf++);
1623 		if (ret)
1624 			goto out;
1625 		read++;
1626 		cnt--;
1627 	}
1628 
1629 	/* We either got finished input or we have to wait for another call. */
1630 	if (isspace(ch) || !ch) {
1631 		parser->buffer[parser->idx] = 0;
1632 		parser->cont = false;
1633 	} else if (parser->idx < parser->size - 1) {
1634 		parser->cont = true;
1635 		parser->buffer[parser->idx++] = ch;
1636 		/* Make sure the parsed string always terminates with '\0'. */
1637 		parser->buffer[parser->idx] = 0;
1638 	} else {
1639 		ret = -EINVAL;
1640 		goto out;
1641 	}
1642 
1643 	*ppos += read;
1644 	ret = read;
1645 
1646 out:
1647 	return ret;
1648 }
1649 
1650 /* TODO add a seq_buf_to_buffer() */
1651 static ssize_t trace_seq_to_buffer(struct trace_seq *s, void *buf, size_t cnt)
1652 {
1653 	int len;
1654 
1655 	if (trace_seq_used(s) <= s->seq.readpos)
1656 		return -EBUSY;
1657 
1658 	len = trace_seq_used(s) - s->seq.readpos;
1659 	if (cnt > len)
1660 		cnt = len;
1661 	memcpy(buf, s->buffer + s->seq.readpos, cnt);
1662 
1663 	s->seq.readpos += cnt;
1664 	return cnt;
1665 }
1666 
1667 unsigned long __read_mostly	tracing_thresh;
1668 static const struct file_operations tracing_max_lat_fops;
1669 
1670 #ifdef LATENCY_FS_NOTIFY
1671 
1672 static struct workqueue_struct *fsnotify_wq;
1673 
1674 static void latency_fsnotify_workfn(struct work_struct *work)
1675 {
1676 	struct trace_array *tr = container_of(work, struct trace_array,
1677 					      fsnotify_work);
1678 	fsnotify_inode(tr->d_max_latency->d_inode, FS_MODIFY);
1679 }
1680 
1681 static void latency_fsnotify_workfn_irq(struct irq_work *iwork)
1682 {
1683 	struct trace_array *tr = container_of(iwork, struct trace_array,
1684 					      fsnotify_irqwork);
1685 	queue_work(fsnotify_wq, &tr->fsnotify_work);
1686 }
1687 
1688 static void trace_create_maxlat_file(struct trace_array *tr,
1689 				     struct dentry *d_tracer)
1690 {
1691 	INIT_WORK(&tr->fsnotify_work, latency_fsnotify_workfn);
1692 	init_irq_work(&tr->fsnotify_irqwork, latency_fsnotify_workfn_irq);
1693 	tr->d_max_latency = trace_create_file("tracing_max_latency",
1694 					      TRACE_MODE_WRITE,
1695 					      d_tracer, &tr->max_latency,
1696 					      &tracing_max_lat_fops);
1697 }
1698 
1699 __init static int latency_fsnotify_init(void)
1700 {
1701 	fsnotify_wq = alloc_workqueue("tr_max_lat_wq",
1702 				      WQ_UNBOUND | WQ_HIGHPRI, 0);
1703 	if (!fsnotify_wq) {
1704 		pr_err("Unable to allocate tr_max_lat_wq\n");
1705 		return -ENOMEM;
1706 	}
1707 	return 0;
1708 }
1709 
1710 late_initcall_sync(latency_fsnotify_init);
1711 
1712 void latency_fsnotify(struct trace_array *tr)
1713 {
1714 	if (!fsnotify_wq)
1715 		return;
1716 	/*
1717 	 * We cannot call queue_work(&tr->fsnotify_work) from here because it's
1718 	 * possible that we are called from __schedule() or do_idle(), which
1719 	 * could cause a deadlock.
1720 	 */
1721 	irq_work_queue(&tr->fsnotify_irqwork);
1722 }
1723 
1724 #elif defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)	\
1725 	|| defined(CONFIG_OSNOISE_TRACER)
1726 
1727 #define trace_create_maxlat_file(tr, d_tracer)				\
1728 	trace_create_file("tracing_max_latency", TRACE_MODE_WRITE,	\
1729 			  d_tracer, &tr->max_latency, &tracing_max_lat_fops)
1730 
1731 #else
1732 #define trace_create_maxlat_file(tr, d_tracer)	 do { } while (0)
1733 #endif
1734 
1735 #ifdef CONFIG_TRACER_MAX_TRACE
1736 /*
1737  * Copy the new maximum trace into the separate maximum-trace
1738  * structure. (this way the maximum trace is permanently saved,
1739  * for later retrieval via /sys/kernel/tracing/tracing_max_latency)
1740  */
1741 static void
1742 __update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
1743 {
1744 	struct array_buffer *trace_buf = &tr->array_buffer;
1745 	struct array_buffer *max_buf = &tr->max_buffer;
1746 	struct trace_array_cpu *data = per_cpu_ptr(trace_buf->data, cpu);
1747 	struct trace_array_cpu *max_data = per_cpu_ptr(max_buf->data, cpu);
1748 
1749 	max_buf->cpu = cpu;
1750 	max_buf->time_start = data->preempt_timestamp;
1751 
1752 	max_data->saved_latency = tr->max_latency;
1753 	max_data->critical_start = data->critical_start;
1754 	max_data->critical_end = data->critical_end;
1755 
1756 	strncpy(max_data->comm, tsk->comm, TASK_COMM_LEN);
1757 	max_data->pid = tsk->pid;
1758 	/*
1759 	 * If tsk == current, then use current_uid(), as that does not use
1760 	 * RCU. The irq tracer can be called out of RCU scope.
1761 	 */
1762 	if (tsk == current)
1763 		max_data->uid = current_uid();
1764 	else
1765 		max_data->uid = task_uid(tsk);
1766 
1767 	max_data->nice = tsk->static_prio - 20 - MAX_RT_PRIO;
1768 	max_data->policy = tsk->policy;
1769 	max_data->rt_priority = tsk->rt_priority;
1770 
1771 	/* record this tasks comm */
1772 	tracing_record_cmdline(tsk);
1773 	latency_fsnotify(tr);
1774 }
1775 
1776 /**
1777  * update_max_tr - snapshot all trace buffers from global_trace to max_tr
1778  * @tr: tracer
1779  * @tsk: the task with the latency
1780  * @cpu: The cpu that initiated the trace.
1781  * @cond_data: User data associated with a conditional snapshot
1782  *
1783  * Flip the buffers between the @tr and the max_tr and record information
1784  * about which task was the cause of this latency.
1785  */
1786 void
1787 update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu,
1788 	      void *cond_data)
1789 {
1790 	if (tr->stop_count)
1791 		return;
1792 
1793 	WARN_ON_ONCE(!irqs_disabled());
1794 
1795 	if (!tr->allocated_snapshot) {
1796 		/* Only the nop tracer should hit this when disabling */
1797 		WARN_ON_ONCE(tr->current_trace != &nop_trace);
1798 		return;
1799 	}
1800 
1801 	arch_spin_lock(&tr->max_lock);
1802 
1803 	/* Inherit the recordable setting from array_buffer */
1804 	if (ring_buffer_record_is_set_on(tr->array_buffer.buffer))
1805 		ring_buffer_record_on(tr->max_buffer.buffer);
1806 	else
1807 		ring_buffer_record_off(tr->max_buffer.buffer);
1808 
1809 #ifdef CONFIG_TRACER_SNAPSHOT
1810 	if (tr->cond_snapshot && !tr->cond_snapshot->update(tr, cond_data))
1811 		goto out_unlock;
1812 #endif
1813 	swap(tr->array_buffer.buffer, tr->max_buffer.buffer);
1814 
1815 	__update_max_tr(tr, tsk, cpu);
1816 
1817  out_unlock:
1818 	arch_spin_unlock(&tr->max_lock);
1819 }
1820 
1821 /**
1822  * update_max_tr_single - only copy one trace over, and reset the rest
1823  * @tr: tracer
1824  * @tsk: task with the latency
1825  * @cpu: the cpu of the buffer to copy.
1826  *
1827  * Flip the trace of a single CPU buffer between the @tr and the max_tr.
1828  */
1829 void
1830 update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu)
1831 {
1832 	int ret;
1833 
1834 	if (tr->stop_count)
1835 		return;
1836 
1837 	WARN_ON_ONCE(!irqs_disabled());
1838 	if (!tr->allocated_snapshot) {
1839 		/* Only the nop tracer should hit this when disabling */
1840 		WARN_ON_ONCE(tr->current_trace != &nop_trace);
1841 		return;
1842 	}
1843 
1844 	arch_spin_lock(&tr->max_lock);
1845 
1846 	ret = ring_buffer_swap_cpu(tr->max_buffer.buffer, tr->array_buffer.buffer, cpu);
1847 
1848 	if (ret == -EBUSY) {
1849 		/*
1850 		 * We failed to swap the buffer due to a commit taking
1851 		 * place on this CPU. We fail to record, but we reset
1852 		 * the max trace buffer (no one writes directly to it)
1853 		 * and flag that it failed.
1854 		 */
1855 		trace_array_printk_buf(tr->max_buffer.buffer, _THIS_IP_,
1856 			"Failed to swap buffers due to commit in progress\n");
1857 	}
1858 
1859 	WARN_ON_ONCE(ret && ret != -EAGAIN && ret != -EBUSY);
1860 
1861 	__update_max_tr(tr, tsk, cpu);
1862 	arch_spin_unlock(&tr->max_lock);
1863 }
1864 #endif /* CONFIG_TRACER_MAX_TRACE */
1865 
1866 static int wait_on_pipe(struct trace_iterator *iter, int full)
1867 {
1868 	/* Iterators are static, they should be filled or empty */
1869 	if (trace_buffer_iter(iter, iter->cpu_file))
1870 		return 0;
1871 
1872 	return ring_buffer_wait(iter->array_buffer->buffer, iter->cpu_file,
1873 				full);
1874 }
1875 
1876 #ifdef CONFIG_FTRACE_STARTUP_TEST
1877 static bool selftests_can_run;
1878 
1879 struct trace_selftests {
1880 	struct list_head		list;
1881 	struct tracer			*type;
1882 };
1883 
1884 static LIST_HEAD(postponed_selftests);
1885 
1886 static int save_selftest(struct tracer *type)
1887 {
1888 	struct trace_selftests *selftest;
1889 
1890 	selftest = kmalloc(sizeof(*selftest), GFP_KERNEL);
1891 	if (!selftest)
1892 		return -ENOMEM;
1893 
1894 	selftest->type = type;
1895 	list_add(&selftest->list, &postponed_selftests);
1896 	return 0;
1897 }
1898 
1899 static int run_tracer_selftest(struct tracer *type)
1900 {
1901 	struct trace_array *tr = &global_trace;
1902 	struct tracer *saved_tracer = tr->current_trace;
1903 	int ret;
1904 
1905 	if (!type->selftest || tracing_selftest_disabled)
1906 		return 0;
1907 
1908 	/*
1909 	 * If a tracer registers early in boot up (before scheduling is
1910 	 * initialized and such), then do not run its selftests yet.
1911 	 * Instead, run it a little later in the boot process.
1912 	 */
1913 	if (!selftests_can_run)
1914 		return save_selftest(type);
1915 
1916 	if (!tracing_is_on()) {
1917 		pr_warn("Selftest for tracer %s skipped due to tracing disabled\n",
1918 			type->name);
1919 		return 0;
1920 	}
1921 
1922 	/*
1923 	 * Run a selftest on this tracer.
1924 	 * Here we reset the trace buffer, and set the current
1925 	 * tracer to be this tracer. The tracer can then run some
1926 	 * internal tracing to verify that everything is in order.
1927 	 * If we fail, we do not register this tracer.
1928 	 */
1929 	tracing_reset_online_cpus(&tr->array_buffer);
1930 
1931 	tr->current_trace = type;
1932 
1933 #ifdef CONFIG_TRACER_MAX_TRACE
1934 	if (type->use_max_tr) {
1935 		/* If we expanded the buffers, make sure the max is expanded too */
1936 		if (ring_buffer_expanded)
1937 			ring_buffer_resize(tr->max_buffer.buffer, trace_buf_size,
1938 					   RING_BUFFER_ALL_CPUS);
1939 		tr->allocated_snapshot = true;
1940 	}
1941 #endif
1942 
1943 	/* the test is responsible for initializing and enabling */
1944 	pr_info("Testing tracer %s: ", type->name);
1945 	ret = type->selftest(type, tr);
1946 	/* the test is responsible for resetting too */
1947 	tr->current_trace = saved_tracer;
1948 	if (ret) {
1949 		printk(KERN_CONT "FAILED!\n");
1950 		/* Add the warning after printing 'FAILED' */
1951 		WARN_ON(1);
1952 		return -1;
1953 	}
1954 	/* Only reset on passing, to avoid touching corrupted buffers */
1955 	tracing_reset_online_cpus(&tr->array_buffer);
1956 
1957 #ifdef CONFIG_TRACER_MAX_TRACE
1958 	if (type->use_max_tr) {
1959 		tr->allocated_snapshot = false;
1960 
1961 		/* Shrink the max buffer again */
1962 		if (ring_buffer_expanded)
1963 			ring_buffer_resize(tr->max_buffer.buffer, 1,
1964 					   RING_BUFFER_ALL_CPUS);
1965 	}
1966 #endif
1967 
1968 	printk(KERN_CONT "PASSED\n");
1969 	return 0;
1970 }
1971 
1972 static __init int init_trace_selftests(void)
1973 {
1974 	struct trace_selftests *p, *n;
1975 	struct tracer *t, **last;
1976 	int ret;
1977 
1978 	selftests_can_run = true;
1979 
1980 	mutex_lock(&trace_types_lock);
1981 
1982 	if (list_empty(&postponed_selftests))
1983 		goto out;
1984 
1985 	pr_info("Running postponed tracer tests:\n");
1986 
1987 	tracing_selftest_running = true;
1988 	list_for_each_entry_safe(p, n, &postponed_selftests, list) {
1989 		/* This loop can take minutes when sanitizers are enabled, so
1990 		 * lets make sure we allow RCU processing.
1991 		 */
1992 		cond_resched();
1993 		ret = run_tracer_selftest(p->type);
1994 		/* If the test fails, then warn and remove from available_tracers */
1995 		if (ret < 0) {
1996 			WARN(1, "tracer: %s failed selftest, disabling\n",
1997 			     p->type->name);
1998 			last = &trace_types;
1999 			for (t = trace_types; t; t = t->next) {
2000 				if (t == p->type) {
2001 					*last = t->next;
2002 					break;
2003 				}
2004 				last = &t->next;
2005 			}
2006 		}
2007 		list_del(&p->list);
2008 		kfree(p);
2009 	}
2010 	tracing_selftest_running = false;
2011 
2012  out:
2013 	mutex_unlock(&trace_types_lock);
2014 
2015 	return 0;
2016 }
2017 core_initcall(init_trace_selftests);
2018 #else
2019 static inline int run_tracer_selftest(struct tracer *type)
2020 {
2021 	return 0;
2022 }
2023 #endif /* CONFIG_FTRACE_STARTUP_TEST */
2024 
2025 static void add_tracer_options(struct trace_array *tr, struct tracer *t);
2026 
2027 static void __init apply_trace_boot_options(void);
2028 
2029 /**
2030  * register_tracer - register a tracer with the ftrace system.
2031  * @type: the plugin for the tracer
2032  *
2033  * Register a new plugin tracer.
2034  */
2035 int __init register_tracer(struct tracer *type)
2036 {
2037 	struct tracer *t;
2038 	int ret = 0;
2039 
2040 	if (!type->name) {
2041 		pr_info("Tracer must have a name\n");
2042 		return -1;
2043 	}
2044 
2045 	if (strlen(type->name) >= MAX_TRACER_SIZE) {
2046 		pr_info("Tracer has a name longer than %d\n", MAX_TRACER_SIZE);
2047 		return -1;
2048 	}
2049 
2050 	if (security_locked_down(LOCKDOWN_TRACEFS)) {
2051 		pr_warn("Can not register tracer %s due to lockdown\n",
2052 			   type->name);
2053 		return -EPERM;
2054 	}
2055 
2056 	mutex_lock(&trace_types_lock);
2057 
2058 	tracing_selftest_running = true;
2059 
2060 	for (t = trace_types; t; t = t->next) {
2061 		if (strcmp(type->name, t->name) == 0) {
2062 			/* already found */
2063 			pr_info("Tracer %s already registered\n",
2064 				type->name);
2065 			ret = -1;
2066 			goto out;
2067 		}
2068 	}
2069 
2070 	if (!type->set_flag)
2071 		type->set_flag = &dummy_set_flag;
2072 	if (!type->flags) {
2073 		/*allocate a dummy tracer_flags*/
2074 		type->flags = kmalloc(sizeof(*type->flags), GFP_KERNEL);
2075 		if (!type->flags) {
2076 			ret = -ENOMEM;
2077 			goto out;
2078 		}
2079 		type->flags->val = 0;
2080 		type->flags->opts = dummy_tracer_opt;
2081 	} else
2082 		if (!type->flags->opts)
2083 			type->flags->opts = dummy_tracer_opt;
2084 
2085 	/* store the tracer for __set_tracer_option */
2086 	type->flags->trace = type;
2087 
2088 	ret = run_tracer_selftest(type);
2089 	if (ret < 0)
2090 		goto out;
2091 
2092 	type->next = trace_types;
2093 	trace_types = type;
2094 	add_tracer_options(&global_trace, type);
2095 
2096  out:
2097 	tracing_selftest_running = false;
2098 	mutex_unlock(&trace_types_lock);
2099 
2100 	if (ret || !default_bootup_tracer)
2101 		goto out_unlock;
2102 
2103 	if (strncmp(default_bootup_tracer, type->name, MAX_TRACER_SIZE))
2104 		goto out_unlock;
2105 
2106 	printk(KERN_INFO "Starting tracer '%s'\n", type->name);
2107 	/* Do we want this tracer to start on bootup? */
2108 	tracing_set_tracer(&global_trace, type->name);
2109 	default_bootup_tracer = NULL;
2110 
2111 	apply_trace_boot_options();
2112 
2113 	/* disable other selftests, since this will break it. */
2114 	disable_tracing_selftest("running a tracer");
2115 
2116  out_unlock:
2117 	return ret;
2118 }
2119 
2120 static void tracing_reset_cpu(struct array_buffer *buf, int cpu)
2121 {
2122 	struct trace_buffer *buffer = buf->buffer;
2123 
2124 	if (!buffer)
2125 		return;
2126 
2127 	ring_buffer_record_disable(buffer);
2128 
2129 	/* Make sure all commits have finished */
2130 	synchronize_rcu();
2131 	ring_buffer_reset_cpu(buffer, cpu);
2132 
2133 	ring_buffer_record_enable(buffer);
2134 }
2135 
2136 void tracing_reset_online_cpus(struct array_buffer *buf)
2137 {
2138 	struct trace_buffer *buffer = buf->buffer;
2139 
2140 	if (!buffer)
2141 		return;
2142 
2143 	ring_buffer_record_disable(buffer);
2144 
2145 	/* Make sure all commits have finished */
2146 	synchronize_rcu();
2147 
2148 	buf->time_start = buffer_ftrace_now(buf, buf->cpu);
2149 
2150 	ring_buffer_reset_online_cpus(buffer);
2151 
2152 	ring_buffer_record_enable(buffer);
2153 }
2154 
2155 /* Must have trace_types_lock held */
2156 void tracing_reset_all_online_cpus(void)
2157 {
2158 	struct trace_array *tr;
2159 
2160 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
2161 		if (!tr->clear_trace)
2162 			continue;
2163 		tr->clear_trace = false;
2164 		tracing_reset_online_cpus(&tr->array_buffer);
2165 #ifdef CONFIG_TRACER_MAX_TRACE
2166 		tracing_reset_online_cpus(&tr->max_buffer);
2167 #endif
2168 	}
2169 }
2170 
2171 /*
2172  * The tgid_map array maps from pid to tgid; i.e. the value stored at index i
2173  * is the tgid last observed corresponding to pid=i.
2174  */
2175 static int *tgid_map;
2176 
2177 /* The maximum valid index into tgid_map. */
2178 static size_t tgid_map_max;
2179 
2180 #define SAVED_CMDLINES_DEFAULT 128
2181 #define NO_CMDLINE_MAP UINT_MAX
2182 static arch_spinlock_t trace_cmdline_lock = __ARCH_SPIN_LOCK_UNLOCKED;
2183 struct saved_cmdlines_buffer {
2184 	unsigned map_pid_to_cmdline[PID_MAX_DEFAULT+1];
2185 	unsigned *map_cmdline_to_pid;
2186 	unsigned cmdline_num;
2187 	int cmdline_idx;
2188 	char *saved_cmdlines;
2189 };
2190 static struct saved_cmdlines_buffer *savedcmd;
2191 
2192 static inline char *get_saved_cmdlines(int idx)
2193 {
2194 	return &savedcmd->saved_cmdlines[idx * TASK_COMM_LEN];
2195 }
2196 
2197 static inline void set_cmdline(int idx, const char *cmdline)
2198 {
2199 	strncpy(get_saved_cmdlines(idx), cmdline, TASK_COMM_LEN);
2200 }
2201 
2202 static int allocate_cmdlines_buffer(unsigned int val,
2203 				    struct saved_cmdlines_buffer *s)
2204 {
2205 	s->map_cmdline_to_pid = kmalloc_array(val,
2206 					      sizeof(*s->map_cmdline_to_pid),
2207 					      GFP_KERNEL);
2208 	if (!s->map_cmdline_to_pid)
2209 		return -ENOMEM;
2210 
2211 	s->saved_cmdlines = kmalloc_array(TASK_COMM_LEN, val, GFP_KERNEL);
2212 	if (!s->saved_cmdlines) {
2213 		kfree(s->map_cmdline_to_pid);
2214 		return -ENOMEM;
2215 	}
2216 
2217 	s->cmdline_idx = 0;
2218 	s->cmdline_num = val;
2219 	memset(&s->map_pid_to_cmdline, NO_CMDLINE_MAP,
2220 	       sizeof(s->map_pid_to_cmdline));
2221 	memset(s->map_cmdline_to_pid, NO_CMDLINE_MAP,
2222 	       val * sizeof(*s->map_cmdline_to_pid));
2223 
2224 	return 0;
2225 }
2226 
2227 static int trace_create_savedcmd(void)
2228 {
2229 	int ret;
2230 
2231 	savedcmd = kmalloc(sizeof(*savedcmd), GFP_KERNEL);
2232 	if (!savedcmd)
2233 		return -ENOMEM;
2234 
2235 	ret = allocate_cmdlines_buffer(SAVED_CMDLINES_DEFAULT, savedcmd);
2236 	if (ret < 0) {
2237 		kfree(savedcmd);
2238 		savedcmd = NULL;
2239 		return -ENOMEM;
2240 	}
2241 
2242 	return 0;
2243 }
2244 
2245 int is_tracing_stopped(void)
2246 {
2247 	return global_trace.stop_count;
2248 }
2249 
2250 /**
2251  * tracing_start - quick start of the tracer
2252  *
2253  * If tracing is enabled but was stopped by tracing_stop,
2254  * this will start the tracer back up.
2255  */
2256 void tracing_start(void)
2257 {
2258 	struct trace_buffer *buffer;
2259 	unsigned long flags;
2260 
2261 	if (tracing_disabled)
2262 		return;
2263 
2264 	raw_spin_lock_irqsave(&global_trace.start_lock, flags);
2265 	if (--global_trace.stop_count) {
2266 		if (global_trace.stop_count < 0) {
2267 			/* Someone screwed up their debugging */
2268 			WARN_ON_ONCE(1);
2269 			global_trace.stop_count = 0;
2270 		}
2271 		goto out;
2272 	}
2273 
2274 	/* Prevent the buffers from switching */
2275 	arch_spin_lock(&global_trace.max_lock);
2276 
2277 	buffer = global_trace.array_buffer.buffer;
2278 	if (buffer)
2279 		ring_buffer_record_enable(buffer);
2280 
2281 #ifdef CONFIG_TRACER_MAX_TRACE
2282 	buffer = global_trace.max_buffer.buffer;
2283 	if (buffer)
2284 		ring_buffer_record_enable(buffer);
2285 #endif
2286 
2287 	arch_spin_unlock(&global_trace.max_lock);
2288 
2289  out:
2290 	raw_spin_unlock_irqrestore(&global_trace.start_lock, flags);
2291 }
2292 
2293 static void tracing_start_tr(struct trace_array *tr)
2294 {
2295 	struct trace_buffer *buffer;
2296 	unsigned long flags;
2297 
2298 	if (tracing_disabled)
2299 		return;
2300 
2301 	/* If global, we need to also start the max tracer */
2302 	if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
2303 		return tracing_start();
2304 
2305 	raw_spin_lock_irqsave(&tr->start_lock, flags);
2306 
2307 	if (--tr->stop_count) {
2308 		if (tr->stop_count < 0) {
2309 			/* Someone screwed up their debugging */
2310 			WARN_ON_ONCE(1);
2311 			tr->stop_count = 0;
2312 		}
2313 		goto out;
2314 	}
2315 
2316 	buffer = tr->array_buffer.buffer;
2317 	if (buffer)
2318 		ring_buffer_record_enable(buffer);
2319 
2320  out:
2321 	raw_spin_unlock_irqrestore(&tr->start_lock, flags);
2322 }
2323 
2324 /**
2325  * tracing_stop - quick stop of the tracer
2326  *
2327  * Light weight way to stop tracing. Use in conjunction with
2328  * tracing_start.
2329  */
2330 void tracing_stop(void)
2331 {
2332 	struct trace_buffer *buffer;
2333 	unsigned long flags;
2334 
2335 	raw_spin_lock_irqsave(&global_trace.start_lock, flags);
2336 	if (global_trace.stop_count++)
2337 		goto out;
2338 
2339 	/* Prevent the buffers from switching */
2340 	arch_spin_lock(&global_trace.max_lock);
2341 
2342 	buffer = global_trace.array_buffer.buffer;
2343 	if (buffer)
2344 		ring_buffer_record_disable(buffer);
2345 
2346 #ifdef CONFIG_TRACER_MAX_TRACE
2347 	buffer = global_trace.max_buffer.buffer;
2348 	if (buffer)
2349 		ring_buffer_record_disable(buffer);
2350 #endif
2351 
2352 	arch_spin_unlock(&global_trace.max_lock);
2353 
2354  out:
2355 	raw_spin_unlock_irqrestore(&global_trace.start_lock, flags);
2356 }
2357 
2358 static void tracing_stop_tr(struct trace_array *tr)
2359 {
2360 	struct trace_buffer *buffer;
2361 	unsigned long flags;
2362 
2363 	/* If global, we need to also stop the max tracer */
2364 	if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
2365 		return tracing_stop();
2366 
2367 	raw_spin_lock_irqsave(&tr->start_lock, flags);
2368 	if (tr->stop_count++)
2369 		goto out;
2370 
2371 	buffer = tr->array_buffer.buffer;
2372 	if (buffer)
2373 		ring_buffer_record_disable(buffer);
2374 
2375  out:
2376 	raw_spin_unlock_irqrestore(&tr->start_lock, flags);
2377 }
2378 
2379 static int trace_save_cmdline(struct task_struct *tsk)
2380 {
2381 	unsigned tpid, idx;
2382 
2383 	/* treat recording of idle task as a success */
2384 	if (!tsk->pid)
2385 		return 1;
2386 
2387 	tpid = tsk->pid & (PID_MAX_DEFAULT - 1);
2388 
2389 	/*
2390 	 * It's not the end of the world if we don't get
2391 	 * the lock, but we also don't want to spin
2392 	 * nor do we want to disable interrupts,
2393 	 * so if we miss here, then better luck next time.
2394 	 */
2395 	if (!arch_spin_trylock(&trace_cmdline_lock))
2396 		return 0;
2397 
2398 	idx = savedcmd->map_pid_to_cmdline[tpid];
2399 	if (idx == NO_CMDLINE_MAP) {
2400 		idx = (savedcmd->cmdline_idx + 1) % savedcmd->cmdline_num;
2401 
2402 		savedcmd->map_pid_to_cmdline[tpid] = idx;
2403 		savedcmd->cmdline_idx = idx;
2404 	}
2405 
2406 	savedcmd->map_cmdline_to_pid[idx] = tsk->pid;
2407 	set_cmdline(idx, tsk->comm);
2408 
2409 	arch_spin_unlock(&trace_cmdline_lock);
2410 
2411 	return 1;
2412 }
2413 
2414 static void __trace_find_cmdline(int pid, char comm[])
2415 {
2416 	unsigned map;
2417 	int tpid;
2418 
2419 	if (!pid) {
2420 		strcpy(comm, "<idle>");
2421 		return;
2422 	}
2423 
2424 	if (WARN_ON_ONCE(pid < 0)) {
2425 		strcpy(comm, "<XXX>");
2426 		return;
2427 	}
2428 
2429 	tpid = pid & (PID_MAX_DEFAULT - 1);
2430 	map = savedcmd->map_pid_to_cmdline[tpid];
2431 	if (map != NO_CMDLINE_MAP) {
2432 		tpid = savedcmd->map_cmdline_to_pid[map];
2433 		if (tpid == pid) {
2434 			strlcpy(comm, get_saved_cmdlines(map), TASK_COMM_LEN);
2435 			return;
2436 		}
2437 	}
2438 	strcpy(comm, "<...>");
2439 }
2440 
2441 void trace_find_cmdline(int pid, char comm[])
2442 {
2443 	preempt_disable();
2444 	arch_spin_lock(&trace_cmdline_lock);
2445 
2446 	__trace_find_cmdline(pid, comm);
2447 
2448 	arch_spin_unlock(&trace_cmdline_lock);
2449 	preempt_enable();
2450 }
2451 
2452 static int *trace_find_tgid_ptr(int pid)
2453 {
2454 	/*
2455 	 * Pairs with the smp_store_release in set_tracer_flag() to ensure that
2456 	 * if we observe a non-NULL tgid_map then we also observe the correct
2457 	 * tgid_map_max.
2458 	 */
2459 	int *map = smp_load_acquire(&tgid_map);
2460 
2461 	if (unlikely(!map || pid > tgid_map_max))
2462 		return NULL;
2463 
2464 	return &map[pid];
2465 }
2466 
2467 int trace_find_tgid(int pid)
2468 {
2469 	int *ptr = trace_find_tgid_ptr(pid);
2470 
2471 	return ptr ? *ptr : 0;
2472 }
2473 
2474 static int trace_save_tgid(struct task_struct *tsk)
2475 {
2476 	int *ptr;
2477 
2478 	/* treat recording of idle task as a success */
2479 	if (!tsk->pid)
2480 		return 1;
2481 
2482 	ptr = trace_find_tgid_ptr(tsk->pid);
2483 	if (!ptr)
2484 		return 0;
2485 
2486 	*ptr = tsk->tgid;
2487 	return 1;
2488 }
2489 
2490 static bool tracing_record_taskinfo_skip(int flags)
2491 {
2492 	if (unlikely(!(flags & (TRACE_RECORD_CMDLINE | TRACE_RECORD_TGID))))
2493 		return true;
2494 	if (!__this_cpu_read(trace_taskinfo_save))
2495 		return true;
2496 	return false;
2497 }
2498 
2499 /**
2500  * tracing_record_taskinfo - record the task info of a task
2501  *
2502  * @task:  task to record
2503  * @flags: TRACE_RECORD_CMDLINE for recording comm
2504  *         TRACE_RECORD_TGID for recording tgid
2505  */
2506 void tracing_record_taskinfo(struct task_struct *task, int flags)
2507 {
2508 	bool done;
2509 
2510 	if (tracing_record_taskinfo_skip(flags))
2511 		return;
2512 
2513 	/*
2514 	 * Record as much task information as possible. If some fail, continue
2515 	 * to try to record the others.
2516 	 */
2517 	done = !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(task);
2518 	done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(task);
2519 
2520 	/* If recording any information failed, retry again soon. */
2521 	if (!done)
2522 		return;
2523 
2524 	__this_cpu_write(trace_taskinfo_save, false);
2525 }
2526 
2527 /**
2528  * tracing_record_taskinfo_sched_switch - record task info for sched_switch
2529  *
2530  * @prev: previous task during sched_switch
2531  * @next: next task during sched_switch
2532  * @flags: TRACE_RECORD_CMDLINE for recording comm
2533  *         TRACE_RECORD_TGID for recording tgid
2534  */
2535 void tracing_record_taskinfo_sched_switch(struct task_struct *prev,
2536 					  struct task_struct *next, int flags)
2537 {
2538 	bool done;
2539 
2540 	if (tracing_record_taskinfo_skip(flags))
2541 		return;
2542 
2543 	/*
2544 	 * Record as much task information as possible. If some fail, continue
2545 	 * to try to record the others.
2546 	 */
2547 	done  = !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(prev);
2548 	done &= !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(next);
2549 	done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(prev);
2550 	done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(next);
2551 
2552 	/* If recording any information failed, retry again soon. */
2553 	if (!done)
2554 		return;
2555 
2556 	__this_cpu_write(trace_taskinfo_save, false);
2557 }
2558 
2559 /* Helpers to record a specific task information */
2560 void tracing_record_cmdline(struct task_struct *task)
2561 {
2562 	tracing_record_taskinfo(task, TRACE_RECORD_CMDLINE);
2563 }
2564 
2565 void tracing_record_tgid(struct task_struct *task)
2566 {
2567 	tracing_record_taskinfo(task, TRACE_RECORD_TGID);
2568 }
2569 
2570 /*
2571  * Several functions return TRACE_TYPE_PARTIAL_LINE if the trace_seq
2572  * overflowed, and TRACE_TYPE_HANDLED otherwise. This helper function
2573  * simplifies those functions and keeps them in sync.
2574  */
2575 enum print_line_t trace_handle_return(struct trace_seq *s)
2576 {
2577 	return trace_seq_has_overflowed(s) ?
2578 		TRACE_TYPE_PARTIAL_LINE : TRACE_TYPE_HANDLED;
2579 }
2580 EXPORT_SYMBOL_GPL(trace_handle_return);
2581 
2582 static unsigned short migration_disable_value(void)
2583 {
2584 #if defined(CONFIG_SMP)
2585 	return current->migration_disabled;
2586 #else
2587 	return 0;
2588 #endif
2589 }
2590 
2591 unsigned int tracing_gen_ctx_irq_test(unsigned int irqs_status)
2592 {
2593 	unsigned int trace_flags = irqs_status;
2594 	unsigned int pc;
2595 
2596 	pc = preempt_count();
2597 
2598 	if (pc & NMI_MASK)
2599 		trace_flags |= TRACE_FLAG_NMI;
2600 	if (pc & HARDIRQ_MASK)
2601 		trace_flags |= TRACE_FLAG_HARDIRQ;
2602 	if (in_serving_softirq())
2603 		trace_flags |= TRACE_FLAG_SOFTIRQ;
2604 
2605 	if (tif_need_resched())
2606 		trace_flags |= TRACE_FLAG_NEED_RESCHED;
2607 	if (test_preempt_need_resched())
2608 		trace_flags |= TRACE_FLAG_PREEMPT_RESCHED;
2609 	return (trace_flags << 16) | (min_t(unsigned int, pc & 0xff, 0xf)) |
2610 		(min_t(unsigned int, migration_disable_value(), 0xf)) << 4;
2611 }
2612 
2613 struct ring_buffer_event *
2614 trace_buffer_lock_reserve(struct trace_buffer *buffer,
2615 			  int type,
2616 			  unsigned long len,
2617 			  unsigned int trace_ctx)
2618 {
2619 	return __trace_buffer_lock_reserve(buffer, type, len, trace_ctx);
2620 }
2621 
2622 DEFINE_PER_CPU(struct ring_buffer_event *, trace_buffered_event);
2623 DEFINE_PER_CPU(int, trace_buffered_event_cnt);
2624 static int trace_buffered_event_ref;
2625 
2626 /**
2627  * trace_buffered_event_enable - enable buffering events
2628  *
2629  * When events are being filtered, it is quicker to use a temporary
2630  * buffer to write the event data into if there's a likely chance
2631  * that it will not be committed. The discard of the ring buffer
2632  * is not as fast as committing, and is much slower than copying
2633  * a commit.
2634  *
2635  * When an event is to be filtered, allocate per cpu buffers to
2636  * write the event data into, and if the event is filtered and discarded
2637  * it is simply dropped, otherwise, the entire data is to be committed
2638  * in one shot.
2639  */
2640 void trace_buffered_event_enable(void)
2641 {
2642 	struct ring_buffer_event *event;
2643 	struct page *page;
2644 	int cpu;
2645 
2646 	WARN_ON_ONCE(!mutex_is_locked(&event_mutex));
2647 
2648 	if (trace_buffered_event_ref++)
2649 		return;
2650 
2651 	for_each_tracing_cpu(cpu) {
2652 		page = alloc_pages_node(cpu_to_node(cpu),
2653 					GFP_KERNEL | __GFP_NORETRY, 0);
2654 		if (!page)
2655 			goto failed;
2656 
2657 		event = page_address(page);
2658 		memset(event, 0, sizeof(*event));
2659 
2660 		per_cpu(trace_buffered_event, cpu) = event;
2661 
2662 		preempt_disable();
2663 		if (cpu == smp_processor_id() &&
2664 		    __this_cpu_read(trace_buffered_event) !=
2665 		    per_cpu(trace_buffered_event, cpu))
2666 			WARN_ON_ONCE(1);
2667 		preempt_enable();
2668 	}
2669 
2670 	return;
2671  failed:
2672 	trace_buffered_event_disable();
2673 }
2674 
2675 static void enable_trace_buffered_event(void *data)
2676 {
2677 	/* Probably not needed, but do it anyway */
2678 	smp_rmb();
2679 	this_cpu_dec(trace_buffered_event_cnt);
2680 }
2681 
2682 static void disable_trace_buffered_event(void *data)
2683 {
2684 	this_cpu_inc(trace_buffered_event_cnt);
2685 }
2686 
2687 /**
2688  * trace_buffered_event_disable - disable buffering events
2689  *
2690  * When a filter is removed, it is faster to not use the buffered
2691  * events, and to commit directly into the ring buffer. Free up
2692  * the temp buffers when there are no more users. This requires
2693  * special synchronization with current events.
2694  */
2695 void trace_buffered_event_disable(void)
2696 {
2697 	int cpu;
2698 
2699 	WARN_ON_ONCE(!mutex_is_locked(&event_mutex));
2700 
2701 	if (WARN_ON_ONCE(!trace_buffered_event_ref))
2702 		return;
2703 
2704 	if (--trace_buffered_event_ref)
2705 		return;
2706 
2707 	preempt_disable();
2708 	/* For each CPU, set the buffer as used. */
2709 	smp_call_function_many(tracing_buffer_mask,
2710 			       disable_trace_buffered_event, NULL, 1);
2711 	preempt_enable();
2712 
2713 	/* Wait for all current users to finish */
2714 	synchronize_rcu();
2715 
2716 	for_each_tracing_cpu(cpu) {
2717 		free_page((unsigned long)per_cpu(trace_buffered_event, cpu));
2718 		per_cpu(trace_buffered_event, cpu) = NULL;
2719 	}
2720 	/*
2721 	 * Make sure trace_buffered_event is NULL before clearing
2722 	 * trace_buffered_event_cnt.
2723 	 */
2724 	smp_wmb();
2725 
2726 	preempt_disable();
2727 	/* Do the work on each cpu */
2728 	smp_call_function_many(tracing_buffer_mask,
2729 			       enable_trace_buffered_event, NULL, 1);
2730 	preempt_enable();
2731 }
2732 
2733 static struct trace_buffer *temp_buffer;
2734 
2735 struct ring_buffer_event *
2736 trace_event_buffer_lock_reserve(struct trace_buffer **current_rb,
2737 			  struct trace_event_file *trace_file,
2738 			  int type, unsigned long len,
2739 			  unsigned int trace_ctx)
2740 {
2741 	struct ring_buffer_event *entry;
2742 	struct trace_array *tr = trace_file->tr;
2743 	int val;
2744 
2745 	*current_rb = tr->array_buffer.buffer;
2746 
2747 	if (!tr->no_filter_buffering_ref &&
2748 	    (trace_file->flags & (EVENT_FILE_FL_SOFT_DISABLED | EVENT_FILE_FL_FILTERED)) &&
2749 	    (entry = this_cpu_read(trace_buffered_event))) {
2750 		/*
2751 		 * Filtering is on, so try to use the per cpu buffer first.
2752 		 * This buffer will simulate a ring_buffer_event,
2753 		 * where the type_len is zero and the array[0] will
2754 		 * hold the full length.
2755 		 * (see include/linux/ring-buffer.h for details on
2756 		 *  how the ring_buffer_event is structured).
2757 		 *
2758 		 * Using a temp buffer during filtering and copying it
2759 		 * on a matched filter is quicker than writing directly
2760 		 * into the ring buffer and then discarding it when
2761 		 * it doesn't match. That is because the discard
2762 		 * requires several atomic operations to get right.
2763 		 * Copying on match and doing nothing on a failed match
2764 		 * is still quicker than no copy on match, but having
2765 		 * to discard out of the ring buffer on a failed match.
2766 		 */
2767 		int max_len = PAGE_SIZE - struct_size(entry, array, 1);
2768 
2769 		val = this_cpu_inc_return(trace_buffered_event_cnt);
2770 
2771 		/*
2772 		 * Preemption is disabled, but interrupts and NMIs
2773 		 * can still come in now. If that happens after
2774 		 * the above increment, then it will have to go
2775 		 * back to the old method of allocating the event
2776 		 * on the ring buffer, and if the filter fails, it
2777 		 * will have to call ring_buffer_discard_commit()
2778 		 * to remove it.
2779 		 *
2780 		 * Need to also check the unlikely case that the
2781 		 * length is bigger than the temp buffer size.
2782 		 * If that happens, then the reserve is pretty much
2783 		 * guaranteed to fail, as the ring buffer currently
2784 		 * only allows events less than a page. But that may
2785 		 * change in the future, so let the ring buffer reserve
2786 		 * handle the failure in that case.
2787 		 */
2788 		if (val == 1 && likely(len <= max_len)) {
2789 			trace_event_setup(entry, type, trace_ctx);
2790 			entry->array[0] = len;
2791 			return entry;
2792 		}
2793 		this_cpu_dec(trace_buffered_event_cnt);
2794 	}
2795 
2796 	entry = __trace_buffer_lock_reserve(*current_rb, type, len,
2797 					    trace_ctx);
2798 	/*
2799 	 * If tracing is off, but we have triggers enabled
2800 	 * we still need to look at the event data. Use the temp_buffer
2801 	 * to store the trace event for the trigger to use. It's recursive
2802 	 * safe and will not be recorded anywhere.
2803 	 */
2804 	if (!entry && trace_file->flags & EVENT_FILE_FL_TRIGGER_COND) {
2805 		*current_rb = temp_buffer;
2806 		entry = __trace_buffer_lock_reserve(*current_rb, type, len,
2807 						    trace_ctx);
2808 	}
2809 	return entry;
2810 }
2811 EXPORT_SYMBOL_GPL(trace_event_buffer_lock_reserve);
2812 
2813 static DEFINE_SPINLOCK(tracepoint_iter_lock);
2814 static DEFINE_MUTEX(tracepoint_printk_mutex);
2815 
2816 static void output_printk(struct trace_event_buffer *fbuffer)
2817 {
2818 	struct trace_event_call *event_call;
2819 	struct trace_event_file *file;
2820 	struct trace_event *event;
2821 	unsigned long flags;
2822 	struct trace_iterator *iter = tracepoint_print_iter;
2823 
2824 	/* We should never get here if iter is NULL */
2825 	if (WARN_ON_ONCE(!iter))
2826 		return;
2827 
2828 	event_call = fbuffer->trace_file->event_call;
2829 	if (!event_call || !event_call->event.funcs ||
2830 	    !event_call->event.funcs->trace)
2831 		return;
2832 
2833 	file = fbuffer->trace_file;
2834 	if (test_bit(EVENT_FILE_FL_SOFT_DISABLED_BIT, &file->flags) ||
2835 	    (unlikely(file->flags & EVENT_FILE_FL_FILTERED) &&
2836 	     !filter_match_preds(file->filter, fbuffer->entry)))
2837 		return;
2838 
2839 	event = &fbuffer->trace_file->event_call->event;
2840 
2841 	spin_lock_irqsave(&tracepoint_iter_lock, flags);
2842 	trace_seq_init(&iter->seq);
2843 	iter->ent = fbuffer->entry;
2844 	event_call->event.funcs->trace(iter, 0, event);
2845 	trace_seq_putc(&iter->seq, 0);
2846 	printk("%s", iter->seq.buffer);
2847 
2848 	spin_unlock_irqrestore(&tracepoint_iter_lock, flags);
2849 }
2850 
2851 int tracepoint_printk_sysctl(struct ctl_table *table, int write,
2852 			     void *buffer, size_t *lenp,
2853 			     loff_t *ppos)
2854 {
2855 	int save_tracepoint_printk;
2856 	int ret;
2857 
2858 	mutex_lock(&tracepoint_printk_mutex);
2859 	save_tracepoint_printk = tracepoint_printk;
2860 
2861 	ret = proc_dointvec(table, write, buffer, lenp, ppos);
2862 
2863 	/*
2864 	 * This will force exiting early, as tracepoint_printk
2865 	 * is always zero when tracepoint_printk_iter is not allocated
2866 	 */
2867 	if (!tracepoint_print_iter)
2868 		tracepoint_printk = 0;
2869 
2870 	if (save_tracepoint_printk == tracepoint_printk)
2871 		goto out;
2872 
2873 	if (tracepoint_printk)
2874 		static_key_enable(&tracepoint_printk_key.key);
2875 	else
2876 		static_key_disable(&tracepoint_printk_key.key);
2877 
2878  out:
2879 	mutex_unlock(&tracepoint_printk_mutex);
2880 
2881 	return ret;
2882 }
2883 
2884 void trace_event_buffer_commit(struct trace_event_buffer *fbuffer)
2885 {
2886 	enum event_trigger_type tt = ETT_NONE;
2887 	struct trace_event_file *file = fbuffer->trace_file;
2888 
2889 	if (__event_trigger_test_discard(file, fbuffer->buffer, fbuffer->event,
2890 			fbuffer->entry, &tt))
2891 		goto discard;
2892 
2893 	if (static_key_false(&tracepoint_printk_key.key))
2894 		output_printk(fbuffer);
2895 
2896 	if (static_branch_unlikely(&trace_event_exports_enabled))
2897 		ftrace_exports(fbuffer->event, TRACE_EXPORT_EVENT);
2898 
2899 	trace_buffer_unlock_commit_regs(file->tr, fbuffer->buffer,
2900 			fbuffer->event, fbuffer->trace_ctx, fbuffer->regs);
2901 
2902 discard:
2903 	if (tt)
2904 		event_triggers_post_call(file, tt);
2905 
2906 }
2907 EXPORT_SYMBOL_GPL(trace_event_buffer_commit);
2908 
2909 /*
2910  * Skip 3:
2911  *
2912  *   trace_buffer_unlock_commit_regs()
2913  *   trace_event_buffer_commit()
2914  *   trace_event_raw_event_xxx()
2915  */
2916 # define STACK_SKIP 3
2917 
2918 void trace_buffer_unlock_commit_regs(struct trace_array *tr,
2919 				     struct trace_buffer *buffer,
2920 				     struct ring_buffer_event *event,
2921 				     unsigned int trace_ctx,
2922 				     struct pt_regs *regs)
2923 {
2924 	__buffer_unlock_commit(buffer, event);
2925 
2926 	/*
2927 	 * If regs is not set, then skip the necessary functions.
2928 	 * Note, we can still get here via blktrace, wakeup tracer
2929 	 * and mmiotrace, but that's ok if they lose a function or
2930 	 * two. They are not that meaningful.
2931 	 */
2932 	ftrace_trace_stack(tr, buffer, trace_ctx, regs ? 0 : STACK_SKIP, regs);
2933 	ftrace_trace_userstack(tr, buffer, trace_ctx);
2934 }
2935 
2936 /*
2937  * Similar to trace_buffer_unlock_commit_regs() but do not dump stack.
2938  */
2939 void
2940 trace_buffer_unlock_commit_nostack(struct trace_buffer *buffer,
2941 				   struct ring_buffer_event *event)
2942 {
2943 	__buffer_unlock_commit(buffer, event);
2944 }
2945 
2946 void
2947 trace_function(struct trace_array *tr, unsigned long ip, unsigned long
2948 	       parent_ip, unsigned int trace_ctx)
2949 {
2950 	struct trace_event_call *call = &event_function;
2951 	struct trace_buffer *buffer = tr->array_buffer.buffer;
2952 	struct ring_buffer_event *event;
2953 	struct ftrace_entry *entry;
2954 
2955 	event = __trace_buffer_lock_reserve(buffer, TRACE_FN, sizeof(*entry),
2956 					    trace_ctx);
2957 	if (!event)
2958 		return;
2959 	entry	= ring_buffer_event_data(event);
2960 	entry->ip			= ip;
2961 	entry->parent_ip		= parent_ip;
2962 
2963 	if (!call_filter_check_discard(call, entry, buffer, event)) {
2964 		if (static_branch_unlikely(&trace_function_exports_enabled))
2965 			ftrace_exports(event, TRACE_EXPORT_FUNCTION);
2966 		__buffer_unlock_commit(buffer, event);
2967 	}
2968 }
2969 
2970 #ifdef CONFIG_STACKTRACE
2971 
2972 /* Allow 4 levels of nesting: normal, softirq, irq, NMI */
2973 #define FTRACE_KSTACK_NESTING	4
2974 
2975 #define FTRACE_KSTACK_ENTRIES	(PAGE_SIZE / FTRACE_KSTACK_NESTING)
2976 
2977 struct ftrace_stack {
2978 	unsigned long		calls[FTRACE_KSTACK_ENTRIES];
2979 };
2980 
2981 
2982 struct ftrace_stacks {
2983 	struct ftrace_stack	stacks[FTRACE_KSTACK_NESTING];
2984 };
2985 
2986 static DEFINE_PER_CPU(struct ftrace_stacks, ftrace_stacks);
2987 static DEFINE_PER_CPU(int, ftrace_stack_reserve);
2988 
2989 static void __ftrace_trace_stack(struct trace_buffer *buffer,
2990 				 unsigned int trace_ctx,
2991 				 int skip, struct pt_regs *regs)
2992 {
2993 	struct trace_event_call *call = &event_kernel_stack;
2994 	struct ring_buffer_event *event;
2995 	unsigned int size, nr_entries;
2996 	struct ftrace_stack *fstack;
2997 	struct stack_entry *entry;
2998 	int stackidx;
2999 
3000 	/*
3001 	 * Add one, for this function and the call to save_stack_trace()
3002 	 * If regs is set, then these functions will not be in the way.
3003 	 */
3004 #ifndef CONFIG_UNWINDER_ORC
3005 	if (!regs)
3006 		skip++;
3007 #endif
3008 
3009 	preempt_disable_notrace();
3010 
3011 	stackidx = __this_cpu_inc_return(ftrace_stack_reserve) - 1;
3012 
3013 	/* This should never happen. If it does, yell once and skip */
3014 	if (WARN_ON_ONCE(stackidx >= FTRACE_KSTACK_NESTING))
3015 		goto out;
3016 
3017 	/*
3018 	 * The above __this_cpu_inc_return() is 'atomic' cpu local. An
3019 	 * interrupt will either see the value pre increment or post
3020 	 * increment. If the interrupt happens pre increment it will have
3021 	 * restored the counter when it returns.  We just need a barrier to
3022 	 * keep gcc from moving things around.
3023 	 */
3024 	barrier();
3025 
3026 	fstack = this_cpu_ptr(ftrace_stacks.stacks) + stackidx;
3027 	size = ARRAY_SIZE(fstack->calls);
3028 
3029 	if (regs) {
3030 		nr_entries = stack_trace_save_regs(regs, fstack->calls,
3031 						   size, skip);
3032 	} else {
3033 		nr_entries = stack_trace_save(fstack->calls, size, skip);
3034 	}
3035 
3036 	size = nr_entries * sizeof(unsigned long);
3037 	event = __trace_buffer_lock_reserve(buffer, TRACE_STACK,
3038 				    (sizeof(*entry) - sizeof(entry->caller)) + size,
3039 				    trace_ctx);
3040 	if (!event)
3041 		goto out;
3042 	entry = ring_buffer_event_data(event);
3043 
3044 	memcpy(&entry->caller, fstack->calls, size);
3045 	entry->size = nr_entries;
3046 
3047 	if (!call_filter_check_discard(call, entry, buffer, event))
3048 		__buffer_unlock_commit(buffer, event);
3049 
3050  out:
3051 	/* Again, don't let gcc optimize things here */
3052 	barrier();
3053 	__this_cpu_dec(ftrace_stack_reserve);
3054 	preempt_enable_notrace();
3055 
3056 }
3057 
3058 static inline void ftrace_trace_stack(struct trace_array *tr,
3059 				      struct trace_buffer *buffer,
3060 				      unsigned int trace_ctx,
3061 				      int skip, struct pt_regs *regs)
3062 {
3063 	if (!(tr->trace_flags & TRACE_ITER_STACKTRACE))
3064 		return;
3065 
3066 	__ftrace_trace_stack(buffer, trace_ctx, skip, regs);
3067 }
3068 
3069 void __trace_stack(struct trace_array *tr, unsigned int trace_ctx,
3070 		   int skip)
3071 {
3072 	struct trace_buffer *buffer = tr->array_buffer.buffer;
3073 
3074 	if (rcu_is_watching()) {
3075 		__ftrace_trace_stack(buffer, trace_ctx, skip, NULL);
3076 		return;
3077 	}
3078 
3079 	/*
3080 	 * When an NMI triggers, RCU is enabled via rcu_nmi_enter(),
3081 	 * but if the above rcu_is_watching() failed, then the NMI
3082 	 * triggered someplace critical, and rcu_irq_enter() should
3083 	 * not be called from NMI.
3084 	 */
3085 	if (unlikely(in_nmi()))
3086 		return;
3087 
3088 	rcu_irq_enter_irqson();
3089 	__ftrace_trace_stack(buffer, trace_ctx, skip, NULL);
3090 	rcu_irq_exit_irqson();
3091 }
3092 
3093 /**
3094  * trace_dump_stack - record a stack back trace in the trace buffer
3095  * @skip: Number of functions to skip (helper handlers)
3096  */
3097 void trace_dump_stack(int skip)
3098 {
3099 	if (tracing_disabled || tracing_selftest_running)
3100 		return;
3101 
3102 #ifndef CONFIG_UNWINDER_ORC
3103 	/* Skip 1 to skip this function. */
3104 	skip++;
3105 #endif
3106 	__ftrace_trace_stack(global_trace.array_buffer.buffer,
3107 			     tracing_gen_ctx(), skip, NULL);
3108 }
3109 EXPORT_SYMBOL_GPL(trace_dump_stack);
3110 
3111 #ifdef CONFIG_USER_STACKTRACE_SUPPORT
3112 static DEFINE_PER_CPU(int, user_stack_count);
3113 
3114 static void
3115 ftrace_trace_userstack(struct trace_array *tr,
3116 		       struct trace_buffer *buffer, unsigned int trace_ctx)
3117 {
3118 	struct trace_event_call *call = &event_user_stack;
3119 	struct ring_buffer_event *event;
3120 	struct userstack_entry *entry;
3121 
3122 	if (!(tr->trace_flags & TRACE_ITER_USERSTACKTRACE))
3123 		return;
3124 
3125 	/*
3126 	 * NMIs can not handle page faults, even with fix ups.
3127 	 * The save user stack can (and often does) fault.
3128 	 */
3129 	if (unlikely(in_nmi()))
3130 		return;
3131 
3132 	/*
3133 	 * prevent recursion, since the user stack tracing may
3134 	 * trigger other kernel events.
3135 	 */
3136 	preempt_disable();
3137 	if (__this_cpu_read(user_stack_count))
3138 		goto out;
3139 
3140 	__this_cpu_inc(user_stack_count);
3141 
3142 	event = __trace_buffer_lock_reserve(buffer, TRACE_USER_STACK,
3143 					    sizeof(*entry), trace_ctx);
3144 	if (!event)
3145 		goto out_drop_count;
3146 	entry	= ring_buffer_event_data(event);
3147 
3148 	entry->tgid		= current->tgid;
3149 	memset(&entry->caller, 0, sizeof(entry->caller));
3150 
3151 	stack_trace_save_user(entry->caller, FTRACE_STACK_ENTRIES);
3152 	if (!call_filter_check_discard(call, entry, buffer, event))
3153 		__buffer_unlock_commit(buffer, event);
3154 
3155  out_drop_count:
3156 	__this_cpu_dec(user_stack_count);
3157  out:
3158 	preempt_enable();
3159 }
3160 #else /* CONFIG_USER_STACKTRACE_SUPPORT */
3161 static void ftrace_trace_userstack(struct trace_array *tr,
3162 				   struct trace_buffer *buffer,
3163 				   unsigned int trace_ctx)
3164 {
3165 }
3166 #endif /* !CONFIG_USER_STACKTRACE_SUPPORT */
3167 
3168 #endif /* CONFIG_STACKTRACE */
3169 
3170 static inline void
3171 func_repeats_set_delta_ts(struct func_repeats_entry *entry,
3172 			  unsigned long long delta)
3173 {
3174 	entry->bottom_delta_ts = delta & U32_MAX;
3175 	entry->top_delta_ts = (delta >> 32);
3176 }
3177 
3178 void trace_last_func_repeats(struct trace_array *tr,
3179 			     struct trace_func_repeats *last_info,
3180 			     unsigned int trace_ctx)
3181 {
3182 	struct trace_buffer *buffer = tr->array_buffer.buffer;
3183 	struct func_repeats_entry *entry;
3184 	struct ring_buffer_event *event;
3185 	u64 delta;
3186 
3187 	event = __trace_buffer_lock_reserve(buffer, TRACE_FUNC_REPEATS,
3188 					    sizeof(*entry), trace_ctx);
3189 	if (!event)
3190 		return;
3191 
3192 	delta = ring_buffer_event_time_stamp(buffer, event) -
3193 		last_info->ts_last_call;
3194 
3195 	entry = ring_buffer_event_data(event);
3196 	entry->ip = last_info->ip;
3197 	entry->parent_ip = last_info->parent_ip;
3198 	entry->count = last_info->count;
3199 	func_repeats_set_delta_ts(entry, delta);
3200 
3201 	__buffer_unlock_commit(buffer, event);
3202 }
3203 
3204 /* created for use with alloc_percpu */
3205 struct trace_buffer_struct {
3206 	int nesting;
3207 	char buffer[4][TRACE_BUF_SIZE];
3208 };
3209 
3210 static struct trace_buffer_struct *trace_percpu_buffer;
3211 
3212 /*
3213  * This allows for lockless recording.  If we're nested too deeply, then
3214  * this returns NULL.
3215  */
3216 static char *get_trace_buf(void)
3217 {
3218 	struct trace_buffer_struct *buffer = this_cpu_ptr(trace_percpu_buffer);
3219 
3220 	if (!buffer || buffer->nesting >= 4)
3221 		return NULL;
3222 
3223 	buffer->nesting++;
3224 
3225 	/* Interrupts must see nesting incremented before we use the buffer */
3226 	barrier();
3227 	return &buffer->buffer[buffer->nesting - 1][0];
3228 }
3229 
3230 static void put_trace_buf(void)
3231 {
3232 	/* Don't let the decrement of nesting leak before this */
3233 	barrier();
3234 	this_cpu_dec(trace_percpu_buffer->nesting);
3235 }
3236 
3237 static int alloc_percpu_trace_buffer(void)
3238 {
3239 	struct trace_buffer_struct *buffers;
3240 
3241 	if (trace_percpu_buffer)
3242 		return 0;
3243 
3244 	buffers = alloc_percpu(struct trace_buffer_struct);
3245 	if (MEM_FAIL(!buffers, "Could not allocate percpu trace_printk buffer"))
3246 		return -ENOMEM;
3247 
3248 	trace_percpu_buffer = buffers;
3249 	return 0;
3250 }
3251 
3252 static int buffers_allocated;
3253 
3254 void trace_printk_init_buffers(void)
3255 {
3256 	if (buffers_allocated)
3257 		return;
3258 
3259 	if (alloc_percpu_trace_buffer())
3260 		return;
3261 
3262 	/* trace_printk() is for debug use only. Don't use it in production. */
3263 
3264 	pr_warn("\n");
3265 	pr_warn("**********************************************************\n");
3266 	pr_warn("**   NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE   **\n");
3267 	pr_warn("**                                                      **\n");
3268 	pr_warn("** trace_printk() being used. Allocating extra memory.  **\n");
3269 	pr_warn("**                                                      **\n");
3270 	pr_warn("** This means that this is a DEBUG kernel and it is     **\n");
3271 	pr_warn("** unsafe for production use.                           **\n");
3272 	pr_warn("**                                                      **\n");
3273 	pr_warn("** If you see this message and you are not debugging    **\n");
3274 	pr_warn("** the kernel, report this immediately to your vendor!  **\n");
3275 	pr_warn("**                                                      **\n");
3276 	pr_warn("**   NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE   **\n");
3277 	pr_warn("**********************************************************\n");
3278 
3279 	/* Expand the buffers to set size */
3280 	tracing_update_buffers();
3281 
3282 	buffers_allocated = 1;
3283 
3284 	/*
3285 	 * trace_printk_init_buffers() can be called by modules.
3286 	 * If that happens, then we need to start cmdline recording
3287 	 * directly here. If the global_trace.buffer is already
3288 	 * allocated here, then this was called by module code.
3289 	 */
3290 	if (global_trace.array_buffer.buffer)
3291 		tracing_start_cmdline_record();
3292 }
3293 EXPORT_SYMBOL_GPL(trace_printk_init_buffers);
3294 
3295 void trace_printk_start_comm(void)
3296 {
3297 	/* Start tracing comms if trace printk is set */
3298 	if (!buffers_allocated)
3299 		return;
3300 	tracing_start_cmdline_record();
3301 }
3302 
3303 static void trace_printk_start_stop_comm(int enabled)
3304 {
3305 	if (!buffers_allocated)
3306 		return;
3307 
3308 	if (enabled)
3309 		tracing_start_cmdline_record();
3310 	else
3311 		tracing_stop_cmdline_record();
3312 }
3313 
3314 /**
3315  * trace_vbprintk - write binary msg to tracing buffer
3316  * @ip:    The address of the caller
3317  * @fmt:   The string format to write to the buffer
3318  * @args:  Arguments for @fmt
3319  */
3320 int trace_vbprintk(unsigned long ip, const char *fmt, va_list args)
3321 {
3322 	struct trace_event_call *call = &event_bprint;
3323 	struct ring_buffer_event *event;
3324 	struct trace_buffer *buffer;
3325 	struct trace_array *tr = &global_trace;
3326 	struct bprint_entry *entry;
3327 	unsigned int trace_ctx;
3328 	char *tbuffer;
3329 	int len = 0, size;
3330 
3331 	if (unlikely(tracing_selftest_running || tracing_disabled))
3332 		return 0;
3333 
3334 	/* Don't pollute graph traces with trace_vprintk internals */
3335 	pause_graph_tracing();
3336 
3337 	trace_ctx = tracing_gen_ctx();
3338 	preempt_disable_notrace();
3339 
3340 	tbuffer = get_trace_buf();
3341 	if (!tbuffer) {
3342 		len = 0;
3343 		goto out_nobuffer;
3344 	}
3345 
3346 	len = vbin_printf((u32 *)tbuffer, TRACE_BUF_SIZE/sizeof(int), fmt, args);
3347 
3348 	if (len > TRACE_BUF_SIZE/sizeof(int) || len < 0)
3349 		goto out_put;
3350 
3351 	size = sizeof(*entry) + sizeof(u32) * len;
3352 	buffer = tr->array_buffer.buffer;
3353 	ring_buffer_nest_start(buffer);
3354 	event = __trace_buffer_lock_reserve(buffer, TRACE_BPRINT, size,
3355 					    trace_ctx);
3356 	if (!event)
3357 		goto out;
3358 	entry = ring_buffer_event_data(event);
3359 	entry->ip			= ip;
3360 	entry->fmt			= fmt;
3361 
3362 	memcpy(entry->buf, tbuffer, sizeof(u32) * len);
3363 	if (!call_filter_check_discard(call, entry, buffer, event)) {
3364 		__buffer_unlock_commit(buffer, event);
3365 		ftrace_trace_stack(tr, buffer, trace_ctx, 6, NULL);
3366 	}
3367 
3368 out:
3369 	ring_buffer_nest_end(buffer);
3370 out_put:
3371 	put_trace_buf();
3372 
3373 out_nobuffer:
3374 	preempt_enable_notrace();
3375 	unpause_graph_tracing();
3376 
3377 	return len;
3378 }
3379 EXPORT_SYMBOL_GPL(trace_vbprintk);
3380 
3381 __printf(3, 0)
3382 static int
3383 __trace_array_vprintk(struct trace_buffer *buffer,
3384 		      unsigned long ip, const char *fmt, va_list args)
3385 {
3386 	struct trace_event_call *call = &event_print;
3387 	struct ring_buffer_event *event;
3388 	int len = 0, size;
3389 	struct print_entry *entry;
3390 	unsigned int trace_ctx;
3391 	char *tbuffer;
3392 
3393 	if (tracing_disabled || tracing_selftest_running)
3394 		return 0;
3395 
3396 	/* Don't pollute graph traces with trace_vprintk internals */
3397 	pause_graph_tracing();
3398 
3399 	trace_ctx = tracing_gen_ctx();
3400 	preempt_disable_notrace();
3401 
3402 
3403 	tbuffer = get_trace_buf();
3404 	if (!tbuffer) {
3405 		len = 0;
3406 		goto out_nobuffer;
3407 	}
3408 
3409 	len = vscnprintf(tbuffer, TRACE_BUF_SIZE, fmt, args);
3410 
3411 	size = sizeof(*entry) + len + 1;
3412 	ring_buffer_nest_start(buffer);
3413 	event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
3414 					    trace_ctx);
3415 	if (!event)
3416 		goto out;
3417 	entry = ring_buffer_event_data(event);
3418 	entry->ip = ip;
3419 
3420 	memcpy(&entry->buf, tbuffer, len + 1);
3421 	if (!call_filter_check_discard(call, entry, buffer, event)) {
3422 		__buffer_unlock_commit(buffer, event);
3423 		ftrace_trace_stack(&global_trace, buffer, trace_ctx, 6, NULL);
3424 	}
3425 
3426 out:
3427 	ring_buffer_nest_end(buffer);
3428 	put_trace_buf();
3429 
3430 out_nobuffer:
3431 	preempt_enable_notrace();
3432 	unpause_graph_tracing();
3433 
3434 	return len;
3435 }
3436 
3437 __printf(3, 0)
3438 int trace_array_vprintk(struct trace_array *tr,
3439 			unsigned long ip, const char *fmt, va_list args)
3440 {
3441 	return __trace_array_vprintk(tr->array_buffer.buffer, ip, fmt, args);
3442 }
3443 
3444 /**
3445  * trace_array_printk - Print a message to a specific instance
3446  * @tr: The instance trace_array descriptor
3447  * @ip: The instruction pointer that this is called from.
3448  * @fmt: The format to print (printf format)
3449  *
3450  * If a subsystem sets up its own instance, they have the right to
3451  * printk strings into their tracing instance buffer using this
3452  * function. Note, this function will not write into the top level
3453  * buffer (use trace_printk() for that), as writing into the top level
3454  * buffer should only have events that can be individually disabled.
3455  * trace_printk() is only used for debugging a kernel, and should not
3456  * be ever incorporated in normal use.
3457  *
3458  * trace_array_printk() can be used, as it will not add noise to the
3459  * top level tracing buffer.
3460  *
3461  * Note, trace_array_init_printk() must be called on @tr before this
3462  * can be used.
3463  */
3464 __printf(3, 0)
3465 int trace_array_printk(struct trace_array *tr,
3466 		       unsigned long ip, const char *fmt, ...)
3467 {
3468 	int ret;
3469 	va_list ap;
3470 
3471 	if (!tr)
3472 		return -ENOENT;
3473 
3474 	/* This is only allowed for created instances */
3475 	if (tr == &global_trace)
3476 		return 0;
3477 
3478 	if (!(tr->trace_flags & TRACE_ITER_PRINTK))
3479 		return 0;
3480 
3481 	va_start(ap, fmt);
3482 	ret = trace_array_vprintk(tr, ip, fmt, ap);
3483 	va_end(ap);
3484 	return ret;
3485 }
3486 EXPORT_SYMBOL_GPL(trace_array_printk);
3487 
3488 /**
3489  * trace_array_init_printk - Initialize buffers for trace_array_printk()
3490  * @tr: The trace array to initialize the buffers for
3491  *
3492  * As trace_array_printk() only writes into instances, they are OK to
3493  * have in the kernel (unlike trace_printk()). This needs to be called
3494  * before trace_array_printk() can be used on a trace_array.
3495  */
3496 int trace_array_init_printk(struct trace_array *tr)
3497 {
3498 	if (!tr)
3499 		return -ENOENT;
3500 
3501 	/* This is only allowed for created instances */
3502 	if (tr == &global_trace)
3503 		return -EINVAL;
3504 
3505 	return alloc_percpu_trace_buffer();
3506 }
3507 EXPORT_SYMBOL_GPL(trace_array_init_printk);
3508 
3509 __printf(3, 4)
3510 int trace_array_printk_buf(struct trace_buffer *buffer,
3511 			   unsigned long ip, const char *fmt, ...)
3512 {
3513 	int ret;
3514 	va_list ap;
3515 
3516 	if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
3517 		return 0;
3518 
3519 	va_start(ap, fmt);
3520 	ret = __trace_array_vprintk(buffer, ip, fmt, ap);
3521 	va_end(ap);
3522 	return ret;
3523 }
3524 
3525 __printf(2, 0)
3526 int trace_vprintk(unsigned long ip, const char *fmt, va_list args)
3527 {
3528 	return trace_array_vprintk(&global_trace, ip, fmt, args);
3529 }
3530 EXPORT_SYMBOL_GPL(trace_vprintk);
3531 
3532 static void trace_iterator_increment(struct trace_iterator *iter)
3533 {
3534 	struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, iter->cpu);
3535 
3536 	iter->idx++;
3537 	if (buf_iter)
3538 		ring_buffer_iter_advance(buf_iter);
3539 }
3540 
3541 static struct trace_entry *
3542 peek_next_entry(struct trace_iterator *iter, int cpu, u64 *ts,
3543 		unsigned long *lost_events)
3544 {
3545 	struct ring_buffer_event *event;
3546 	struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, cpu);
3547 
3548 	if (buf_iter) {
3549 		event = ring_buffer_iter_peek(buf_iter, ts);
3550 		if (lost_events)
3551 			*lost_events = ring_buffer_iter_dropped(buf_iter) ?
3552 				(unsigned long)-1 : 0;
3553 	} else {
3554 		event = ring_buffer_peek(iter->array_buffer->buffer, cpu, ts,
3555 					 lost_events);
3556 	}
3557 
3558 	if (event) {
3559 		iter->ent_size = ring_buffer_event_length(event);
3560 		return ring_buffer_event_data(event);
3561 	}
3562 	iter->ent_size = 0;
3563 	return NULL;
3564 }
3565 
3566 static struct trace_entry *
3567 __find_next_entry(struct trace_iterator *iter, int *ent_cpu,
3568 		  unsigned long *missing_events, u64 *ent_ts)
3569 {
3570 	struct trace_buffer *buffer = iter->array_buffer->buffer;
3571 	struct trace_entry *ent, *next = NULL;
3572 	unsigned long lost_events = 0, next_lost = 0;
3573 	int cpu_file = iter->cpu_file;
3574 	u64 next_ts = 0, ts;
3575 	int next_cpu = -1;
3576 	int next_size = 0;
3577 	int cpu;
3578 
3579 	/*
3580 	 * If we are in a per_cpu trace file, don't bother by iterating over
3581 	 * all cpu and peek directly.
3582 	 */
3583 	if (cpu_file > RING_BUFFER_ALL_CPUS) {
3584 		if (ring_buffer_empty_cpu(buffer, cpu_file))
3585 			return NULL;
3586 		ent = peek_next_entry(iter, cpu_file, ent_ts, missing_events);
3587 		if (ent_cpu)
3588 			*ent_cpu = cpu_file;
3589 
3590 		return ent;
3591 	}
3592 
3593 	for_each_tracing_cpu(cpu) {
3594 
3595 		if (ring_buffer_empty_cpu(buffer, cpu))
3596 			continue;
3597 
3598 		ent = peek_next_entry(iter, cpu, &ts, &lost_events);
3599 
3600 		/*
3601 		 * Pick the entry with the smallest timestamp:
3602 		 */
3603 		if (ent && (!next || ts < next_ts)) {
3604 			next = ent;
3605 			next_cpu = cpu;
3606 			next_ts = ts;
3607 			next_lost = lost_events;
3608 			next_size = iter->ent_size;
3609 		}
3610 	}
3611 
3612 	iter->ent_size = next_size;
3613 
3614 	if (ent_cpu)
3615 		*ent_cpu = next_cpu;
3616 
3617 	if (ent_ts)
3618 		*ent_ts = next_ts;
3619 
3620 	if (missing_events)
3621 		*missing_events = next_lost;
3622 
3623 	return next;
3624 }
3625 
3626 #define STATIC_FMT_BUF_SIZE	128
3627 static char static_fmt_buf[STATIC_FMT_BUF_SIZE];
3628 
3629 static char *trace_iter_expand_format(struct trace_iterator *iter)
3630 {
3631 	char *tmp;
3632 
3633 	/*
3634 	 * iter->tr is NULL when used with tp_printk, which makes
3635 	 * this get called where it is not safe to call krealloc().
3636 	 */
3637 	if (!iter->tr || iter->fmt == static_fmt_buf)
3638 		return NULL;
3639 
3640 	tmp = krealloc(iter->fmt, iter->fmt_size + STATIC_FMT_BUF_SIZE,
3641 		       GFP_KERNEL);
3642 	if (tmp) {
3643 		iter->fmt_size += STATIC_FMT_BUF_SIZE;
3644 		iter->fmt = tmp;
3645 	}
3646 
3647 	return tmp;
3648 }
3649 
3650 /* Returns true if the string is safe to dereference from an event */
3651 static bool trace_safe_str(struct trace_iterator *iter, const char *str)
3652 {
3653 	unsigned long addr = (unsigned long)str;
3654 	struct trace_event *trace_event;
3655 	struct trace_event_call *event;
3656 
3657 	/* OK if part of the event data */
3658 	if ((addr >= (unsigned long)iter->ent) &&
3659 	    (addr < (unsigned long)iter->ent + iter->ent_size))
3660 		return true;
3661 
3662 	/* OK if part of the temp seq buffer */
3663 	if ((addr >= (unsigned long)iter->tmp_seq.buffer) &&
3664 	    (addr < (unsigned long)iter->tmp_seq.buffer + PAGE_SIZE))
3665 		return true;
3666 
3667 	/* Core rodata can not be freed */
3668 	if (is_kernel_rodata(addr))
3669 		return true;
3670 
3671 	if (trace_is_tracepoint_string(str))
3672 		return true;
3673 
3674 	/*
3675 	 * Now this could be a module event, referencing core module
3676 	 * data, which is OK.
3677 	 */
3678 	if (!iter->ent)
3679 		return false;
3680 
3681 	trace_event = ftrace_find_event(iter->ent->type);
3682 	if (!trace_event)
3683 		return false;
3684 
3685 	event = container_of(trace_event, struct trace_event_call, event);
3686 	if ((event->flags & TRACE_EVENT_FL_DYNAMIC) || !event->module)
3687 		return false;
3688 
3689 	/* Would rather have rodata, but this will suffice */
3690 	if (within_module_core(addr, event->module))
3691 		return true;
3692 
3693 	return false;
3694 }
3695 
3696 static const char *show_buffer(struct trace_seq *s)
3697 {
3698 	struct seq_buf *seq = &s->seq;
3699 
3700 	seq_buf_terminate(seq);
3701 
3702 	return seq->buffer;
3703 }
3704 
3705 static DEFINE_STATIC_KEY_FALSE(trace_no_verify);
3706 
3707 static int test_can_verify_check(const char *fmt, ...)
3708 {
3709 	char buf[16];
3710 	va_list ap;
3711 	int ret;
3712 
3713 	/*
3714 	 * The verifier is dependent on vsnprintf() modifies the va_list
3715 	 * passed to it, where it is sent as a reference. Some architectures
3716 	 * (like x86_32) passes it by value, which means that vsnprintf()
3717 	 * does not modify the va_list passed to it, and the verifier
3718 	 * would then need to be able to understand all the values that
3719 	 * vsnprintf can use. If it is passed by value, then the verifier
3720 	 * is disabled.
3721 	 */
3722 	va_start(ap, fmt);
3723 	vsnprintf(buf, 16, "%d", ap);
3724 	ret = va_arg(ap, int);
3725 	va_end(ap);
3726 
3727 	return ret;
3728 }
3729 
3730 static void test_can_verify(void)
3731 {
3732 	if (!test_can_verify_check("%d %d", 0, 1)) {
3733 		pr_info("trace event string verifier disabled\n");
3734 		static_branch_inc(&trace_no_verify);
3735 	}
3736 }
3737 
3738 /**
3739  * trace_check_vprintf - Check dereferenced strings while writing to the seq buffer
3740  * @iter: The iterator that holds the seq buffer and the event being printed
3741  * @fmt: The format used to print the event
3742  * @ap: The va_list holding the data to print from @fmt.
3743  *
3744  * This writes the data into the @iter->seq buffer using the data from
3745  * @fmt and @ap. If the format has a %s, then the source of the string
3746  * is examined to make sure it is safe to print, otherwise it will
3747  * warn and print "[UNSAFE MEMORY]" in place of the dereferenced string
3748  * pointer.
3749  */
3750 void trace_check_vprintf(struct trace_iterator *iter, const char *fmt,
3751 			 va_list ap)
3752 {
3753 	const char *p = fmt;
3754 	const char *str;
3755 	int i, j;
3756 
3757 	if (WARN_ON_ONCE(!fmt))
3758 		return;
3759 
3760 	if (static_branch_unlikely(&trace_no_verify))
3761 		goto print;
3762 
3763 	/* Don't bother checking when doing a ftrace_dump() */
3764 	if (iter->fmt == static_fmt_buf)
3765 		goto print;
3766 
3767 	while (*p) {
3768 		bool star = false;
3769 		int len = 0;
3770 
3771 		j = 0;
3772 
3773 		/* We only care about %s and variants */
3774 		for (i = 0; p[i]; i++) {
3775 			if (i + 1 >= iter->fmt_size) {
3776 				/*
3777 				 * If we can't expand the copy buffer,
3778 				 * just print it.
3779 				 */
3780 				if (!trace_iter_expand_format(iter))
3781 					goto print;
3782 			}
3783 
3784 			if (p[i] == '\\' && p[i+1]) {
3785 				i++;
3786 				continue;
3787 			}
3788 			if (p[i] == '%') {
3789 				/* Need to test cases like %08.*s */
3790 				for (j = 1; p[i+j]; j++) {
3791 					if (isdigit(p[i+j]) ||
3792 					    p[i+j] == '.')
3793 						continue;
3794 					if (p[i+j] == '*') {
3795 						star = true;
3796 						continue;
3797 					}
3798 					break;
3799 				}
3800 				if (p[i+j] == 's')
3801 					break;
3802 				star = false;
3803 			}
3804 			j = 0;
3805 		}
3806 		/* If no %s found then just print normally */
3807 		if (!p[i])
3808 			break;
3809 
3810 		/* Copy up to the %s, and print that */
3811 		strncpy(iter->fmt, p, i);
3812 		iter->fmt[i] = '\0';
3813 		trace_seq_vprintf(&iter->seq, iter->fmt, ap);
3814 
3815 		/*
3816 		 * If iter->seq is full, the above call no longer guarantees
3817 		 * that ap is in sync with fmt processing, and further calls
3818 		 * to va_arg() can return wrong positional arguments.
3819 		 *
3820 		 * Ensure that ap is no longer used in this case.
3821 		 */
3822 		if (iter->seq.full) {
3823 			p = "";
3824 			break;
3825 		}
3826 
3827 		if (star)
3828 			len = va_arg(ap, int);
3829 
3830 		/* The ap now points to the string data of the %s */
3831 		str = va_arg(ap, const char *);
3832 
3833 		/*
3834 		 * If you hit this warning, it is likely that the
3835 		 * trace event in question used %s on a string that
3836 		 * was saved at the time of the event, but may not be
3837 		 * around when the trace is read. Use __string(),
3838 		 * __assign_str() and __get_str() helpers in the TRACE_EVENT()
3839 		 * instead. See samples/trace_events/trace-events-sample.h
3840 		 * for reference.
3841 		 */
3842 		if (WARN_ONCE(!trace_safe_str(iter, str),
3843 			      "fmt: '%s' current_buffer: '%s'",
3844 			      fmt, show_buffer(&iter->seq))) {
3845 			int ret;
3846 
3847 			/* Try to safely read the string */
3848 			if (star) {
3849 				if (len + 1 > iter->fmt_size)
3850 					len = iter->fmt_size - 1;
3851 				if (len < 0)
3852 					len = 0;
3853 				ret = copy_from_kernel_nofault(iter->fmt, str, len);
3854 				iter->fmt[len] = 0;
3855 				star = false;
3856 			} else {
3857 				ret = strncpy_from_kernel_nofault(iter->fmt, str,
3858 								  iter->fmt_size);
3859 			}
3860 			if (ret < 0)
3861 				trace_seq_printf(&iter->seq, "(0x%px)", str);
3862 			else
3863 				trace_seq_printf(&iter->seq, "(0x%px:%s)",
3864 						 str, iter->fmt);
3865 			str = "[UNSAFE-MEMORY]";
3866 			strcpy(iter->fmt, "%s");
3867 		} else {
3868 			strncpy(iter->fmt, p + i, j + 1);
3869 			iter->fmt[j+1] = '\0';
3870 		}
3871 		if (star)
3872 			trace_seq_printf(&iter->seq, iter->fmt, len, str);
3873 		else
3874 			trace_seq_printf(&iter->seq, iter->fmt, str);
3875 
3876 		p += i + j + 1;
3877 	}
3878  print:
3879 	if (*p)
3880 		trace_seq_vprintf(&iter->seq, p, ap);
3881 }
3882 
3883 const char *trace_event_format(struct trace_iterator *iter, const char *fmt)
3884 {
3885 	const char *p, *new_fmt;
3886 	char *q;
3887 
3888 	if (WARN_ON_ONCE(!fmt))
3889 		return fmt;
3890 
3891 	if (!iter->tr || iter->tr->trace_flags & TRACE_ITER_HASH_PTR)
3892 		return fmt;
3893 
3894 	p = fmt;
3895 	new_fmt = q = iter->fmt;
3896 	while (*p) {
3897 		if (unlikely(q - new_fmt + 3 > iter->fmt_size)) {
3898 			if (!trace_iter_expand_format(iter))
3899 				return fmt;
3900 
3901 			q += iter->fmt - new_fmt;
3902 			new_fmt = iter->fmt;
3903 		}
3904 
3905 		*q++ = *p++;
3906 
3907 		/* Replace %p with %px */
3908 		if (p[-1] == '%') {
3909 			if (p[0] == '%') {
3910 				*q++ = *p++;
3911 			} else if (p[0] == 'p' && !isalnum(p[1])) {
3912 				*q++ = *p++;
3913 				*q++ = 'x';
3914 			}
3915 		}
3916 	}
3917 	*q = '\0';
3918 
3919 	return new_fmt;
3920 }
3921 
3922 #define STATIC_TEMP_BUF_SIZE	128
3923 static char static_temp_buf[STATIC_TEMP_BUF_SIZE] __aligned(4);
3924 
3925 /* Find the next real entry, without updating the iterator itself */
3926 struct trace_entry *trace_find_next_entry(struct trace_iterator *iter,
3927 					  int *ent_cpu, u64 *ent_ts)
3928 {
3929 	/* __find_next_entry will reset ent_size */
3930 	int ent_size = iter->ent_size;
3931 	struct trace_entry *entry;
3932 
3933 	/*
3934 	 * If called from ftrace_dump(), then the iter->temp buffer
3935 	 * will be the static_temp_buf and not created from kmalloc.
3936 	 * If the entry size is greater than the buffer, we can
3937 	 * not save it. Just return NULL in that case. This is only
3938 	 * used to add markers when two consecutive events' time
3939 	 * stamps have a large delta. See trace_print_lat_context()
3940 	 */
3941 	if (iter->temp == static_temp_buf &&
3942 	    STATIC_TEMP_BUF_SIZE < ent_size)
3943 		return NULL;
3944 
3945 	/*
3946 	 * The __find_next_entry() may call peek_next_entry(), which may
3947 	 * call ring_buffer_peek() that may make the contents of iter->ent
3948 	 * undefined. Need to copy iter->ent now.
3949 	 */
3950 	if (iter->ent && iter->ent != iter->temp) {
3951 		if ((!iter->temp || iter->temp_size < iter->ent_size) &&
3952 		    !WARN_ON_ONCE(iter->temp == static_temp_buf)) {
3953 			void *temp;
3954 			temp = kmalloc(iter->ent_size, GFP_KERNEL);
3955 			if (!temp)
3956 				return NULL;
3957 			kfree(iter->temp);
3958 			iter->temp = temp;
3959 			iter->temp_size = iter->ent_size;
3960 		}
3961 		memcpy(iter->temp, iter->ent, iter->ent_size);
3962 		iter->ent = iter->temp;
3963 	}
3964 	entry = __find_next_entry(iter, ent_cpu, NULL, ent_ts);
3965 	/* Put back the original ent_size */
3966 	iter->ent_size = ent_size;
3967 
3968 	return entry;
3969 }
3970 
3971 /* Find the next real entry, and increment the iterator to the next entry */
3972 void *trace_find_next_entry_inc(struct trace_iterator *iter)
3973 {
3974 	iter->ent = __find_next_entry(iter, &iter->cpu,
3975 				      &iter->lost_events, &iter->ts);
3976 
3977 	if (iter->ent)
3978 		trace_iterator_increment(iter);
3979 
3980 	return iter->ent ? iter : NULL;
3981 }
3982 
3983 static void trace_consume(struct trace_iterator *iter)
3984 {
3985 	ring_buffer_consume(iter->array_buffer->buffer, iter->cpu, &iter->ts,
3986 			    &iter->lost_events);
3987 }
3988 
3989 static void *s_next(struct seq_file *m, void *v, loff_t *pos)
3990 {
3991 	struct trace_iterator *iter = m->private;
3992 	int i = (int)*pos;
3993 	void *ent;
3994 
3995 	WARN_ON_ONCE(iter->leftover);
3996 
3997 	(*pos)++;
3998 
3999 	/* can't go backwards */
4000 	if (iter->idx > i)
4001 		return NULL;
4002 
4003 	if (iter->idx < 0)
4004 		ent = trace_find_next_entry_inc(iter);
4005 	else
4006 		ent = iter;
4007 
4008 	while (ent && iter->idx < i)
4009 		ent = trace_find_next_entry_inc(iter);
4010 
4011 	iter->pos = *pos;
4012 
4013 	return ent;
4014 }
4015 
4016 void tracing_iter_reset(struct trace_iterator *iter, int cpu)
4017 {
4018 	struct ring_buffer_iter *buf_iter;
4019 	unsigned long entries = 0;
4020 	u64 ts;
4021 
4022 	per_cpu_ptr(iter->array_buffer->data, cpu)->skipped_entries = 0;
4023 
4024 	buf_iter = trace_buffer_iter(iter, cpu);
4025 	if (!buf_iter)
4026 		return;
4027 
4028 	ring_buffer_iter_reset(buf_iter);
4029 
4030 	/*
4031 	 * We could have the case with the max latency tracers
4032 	 * that a reset never took place on a cpu. This is evident
4033 	 * by the timestamp being before the start of the buffer.
4034 	 */
4035 	while (ring_buffer_iter_peek(buf_iter, &ts)) {
4036 		if (ts >= iter->array_buffer->time_start)
4037 			break;
4038 		entries++;
4039 		ring_buffer_iter_advance(buf_iter);
4040 	}
4041 
4042 	per_cpu_ptr(iter->array_buffer->data, cpu)->skipped_entries = entries;
4043 }
4044 
4045 /*
4046  * The current tracer is copied to avoid a global locking
4047  * all around.
4048  */
4049 static void *s_start(struct seq_file *m, loff_t *pos)
4050 {
4051 	struct trace_iterator *iter = m->private;
4052 	struct trace_array *tr = iter->tr;
4053 	int cpu_file = iter->cpu_file;
4054 	void *p = NULL;
4055 	loff_t l = 0;
4056 	int cpu;
4057 
4058 	/*
4059 	 * copy the tracer to avoid using a global lock all around.
4060 	 * iter->trace is a copy of current_trace, the pointer to the
4061 	 * name may be used instead of a strcmp(), as iter->trace->name
4062 	 * will point to the same string as current_trace->name.
4063 	 */
4064 	mutex_lock(&trace_types_lock);
4065 	if (unlikely(tr->current_trace && iter->trace->name != tr->current_trace->name))
4066 		*iter->trace = *tr->current_trace;
4067 	mutex_unlock(&trace_types_lock);
4068 
4069 #ifdef CONFIG_TRACER_MAX_TRACE
4070 	if (iter->snapshot && iter->trace->use_max_tr)
4071 		return ERR_PTR(-EBUSY);
4072 #endif
4073 
4074 	if (*pos != iter->pos) {
4075 		iter->ent = NULL;
4076 		iter->cpu = 0;
4077 		iter->idx = -1;
4078 
4079 		if (cpu_file == RING_BUFFER_ALL_CPUS) {
4080 			for_each_tracing_cpu(cpu)
4081 				tracing_iter_reset(iter, cpu);
4082 		} else
4083 			tracing_iter_reset(iter, cpu_file);
4084 
4085 		iter->leftover = 0;
4086 		for (p = iter; p && l < *pos; p = s_next(m, p, &l))
4087 			;
4088 
4089 	} else {
4090 		/*
4091 		 * If we overflowed the seq_file before, then we want
4092 		 * to just reuse the trace_seq buffer again.
4093 		 */
4094 		if (iter->leftover)
4095 			p = iter;
4096 		else {
4097 			l = *pos - 1;
4098 			p = s_next(m, p, &l);
4099 		}
4100 	}
4101 
4102 	trace_event_read_lock();
4103 	trace_access_lock(cpu_file);
4104 	return p;
4105 }
4106 
4107 static void s_stop(struct seq_file *m, void *p)
4108 {
4109 	struct trace_iterator *iter = m->private;
4110 
4111 #ifdef CONFIG_TRACER_MAX_TRACE
4112 	if (iter->snapshot && iter->trace->use_max_tr)
4113 		return;
4114 #endif
4115 
4116 	trace_access_unlock(iter->cpu_file);
4117 	trace_event_read_unlock();
4118 }
4119 
4120 static void
4121 get_total_entries_cpu(struct array_buffer *buf, unsigned long *total,
4122 		      unsigned long *entries, int cpu)
4123 {
4124 	unsigned long count;
4125 
4126 	count = ring_buffer_entries_cpu(buf->buffer, cpu);
4127 	/*
4128 	 * If this buffer has skipped entries, then we hold all
4129 	 * entries for the trace and we need to ignore the
4130 	 * ones before the time stamp.
4131 	 */
4132 	if (per_cpu_ptr(buf->data, cpu)->skipped_entries) {
4133 		count -= per_cpu_ptr(buf->data, cpu)->skipped_entries;
4134 		/* total is the same as the entries */
4135 		*total = count;
4136 	} else
4137 		*total = count +
4138 			ring_buffer_overrun_cpu(buf->buffer, cpu);
4139 	*entries = count;
4140 }
4141 
4142 static void
4143 get_total_entries(struct array_buffer *buf,
4144 		  unsigned long *total, unsigned long *entries)
4145 {
4146 	unsigned long t, e;
4147 	int cpu;
4148 
4149 	*total = 0;
4150 	*entries = 0;
4151 
4152 	for_each_tracing_cpu(cpu) {
4153 		get_total_entries_cpu(buf, &t, &e, cpu);
4154 		*total += t;
4155 		*entries += e;
4156 	}
4157 }
4158 
4159 unsigned long trace_total_entries_cpu(struct trace_array *tr, int cpu)
4160 {
4161 	unsigned long total, entries;
4162 
4163 	if (!tr)
4164 		tr = &global_trace;
4165 
4166 	get_total_entries_cpu(&tr->array_buffer, &total, &entries, cpu);
4167 
4168 	return entries;
4169 }
4170 
4171 unsigned long trace_total_entries(struct trace_array *tr)
4172 {
4173 	unsigned long total, entries;
4174 
4175 	if (!tr)
4176 		tr = &global_trace;
4177 
4178 	get_total_entries(&tr->array_buffer, &total, &entries);
4179 
4180 	return entries;
4181 }
4182 
4183 static void print_lat_help_header(struct seq_file *m)
4184 {
4185 	seq_puts(m, "#                    _------=> CPU#            \n"
4186 		    "#                   / _-----=> irqs-off        \n"
4187 		    "#                  | / _----=> need-resched    \n"
4188 		    "#                  || / _---=> hardirq/softirq \n"
4189 		    "#                  ||| / _--=> preempt-depth   \n"
4190 		    "#                  |||| / _-=> migrate-disable \n"
4191 		    "#                  ||||| /     delay           \n"
4192 		    "#  cmd     pid     |||||| time  |   caller     \n"
4193 		    "#     \\   /        ||||||  \\    |    /       \n");
4194 }
4195 
4196 static void print_event_info(struct array_buffer *buf, struct seq_file *m)
4197 {
4198 	unsigned long total;
4199 	unsigned long entries;
4200 
4201 	get_total_entries(buf, &total, &entries);
4202 	seq_printf(m, "# entries-in-buffer/entries-written: %lu/%lu   #P:%d\n",
4203 		   entries, total, num_online_cpus());
4204 	seq_puts(m, "#\n");
4205 }
4206 
4207 static void print_func_help_header(struct array_buffer *buf, struct seq_file *m,
4208 				   unsigned int flags)
4209 {
4210 	bool tgid = flags & TRACE_ITER_RECORD_TGID;
4211 
4212 	print_event_info(buf, m);
4213 
4214 	seq_printf(m, "#           TASK-PID    %s CPU#     TIMESTAMP  FUNCTION\n", tgid ? "   TGID   " : "");
4215 	seq_printf(m, "#              | |      %s   |         |         |\n",      tgid ? "     |    " : "");
4216 }
4217 
4218 static void print_func_help_header_irq(struct array_buffer *buf, struct seq_file *m,
4219 				       unsigned int flags)
4220 {
4221 	bool tgid = flags & TRACE_ITER_RECORD_TGID;
4222 	const char *space = "            ";
4223 	int prec = tgid ? 12 : 2;
4224 
4225 	print_event_info(buf, m);
4226 
4227 	seq_printf(m, "#                            %.*s  _-----=> irqs-off\n", prec, space);
4228 	seq_printf(m, "#                            %.*s / _----=> need-resched\n", prec, space);
4229 	seq_printf(m, "#                            %.*s| / _---=> hardirq/softirq\n", prec, space);
4230 	seq_printf(m, "#                            %.*s|| / _--=> preempt-depth\n", prec, space);
4231 	seq_printf(m, "#                            %.*s||| / _-=> migrate-disable\n", prec, space);
4232 	seq_printf(m, "#                            %.*s|||| /     delay\n", prec, space);
4233 	seq_printf(m, "#           TASK-PID  %.*s CPU#  |||||  TIMESTAMP  FUNCTION\n", prec, "     TGID   ");
4234 	seq_printf(m, "#              | |    %.*s   |   |||||     |         |\n", prec, "       |    ");
4235 }
4236 
4237 void
4238 print_trace_header(struct seq_file *m, struct trace_iterator *iter)
4239 {
4240 	unsigned long sym_flags = (global_trace.trace_flags & TRACE_ITER_SYM_MASK);
4241 	struct array_buffer *buf = iter->array_buffer;
4242 	struct trace_array_cpu *data = per_cpu_ptr(buf->data, buf->cpu);
4243 	struct tracer *type = iter->trace;
4244 	unsigned long entries;
4245 	unsigned long total;
4246 	const char *name = "preemption";
4247 
4248 	name = type->name;
4249 
4250 	get_total_entries(buf, &total, &entries);
4251 
4252 	seq_printf(m, "# %s latency trace v1.1.5 on %s\n",
4253 		   name, UTS_RELEASE);
4254 	seq_puts(m, "# -----------------------------------"
4255 		 "---------------------------------\n");
4256 	seq_printf(m, "# latency: %lu us, #%lu/%lu, CPU#%d |"
4257 		   " (M:%s VP:%d, KP:%d, SP:%d HP:%d",
4258 		   nsecs_to_usecs(data->saved_latency),
4259 		   entries,
4260 		   total,
4261 		   buf->cpu,
4262 #if defined(CONFIG_PREEMPT_NONE)
4263 		   "server",
4264 #elif defined(CONFIG_PREEMPT_VOLUNTARY)
4265 		   "desktop",
4266 #elif defined(CONFIG_PREEMPT)
4267 		   "preempt",
4268 #elif defined(CONFIG_PREEMPT_RT)
4269 		   "preempt_rt",
4270 #else
4271 		   "unknown",
4272 #endif
4273 		   /* These are reserved for later use */
4274 		   0, 0, 0, 0);
4275 #ifdef CONFIG_SMP
4276 	seq_printf(m, " #P:%d)\n", num_online_cpus());
4277 #else
4278 	seq_puts(m, ")\n");
4279 #endif
4280 	seq_puts(m, "#    -----------------\n");
4281 	seq_printf(m, "#    | task: %.16s-%d "
4282 		   "(uid:%d nice:%ld policy:%ld rt_prio:%ld)\n",
4283 		   data->comm, data->pid,
4284 		   from_kuid_munged(seq_user_ns(m), data->uid), data->nice,
4285 		   data->policy, data->rt_priority);
4286 	seq_puts(m, "#    -----------------\n");
4287 
4288 	if (data->critical_start) {
4289 		seq_puts(m, "#  => started at: ");
4290 		seq_print_ip_sym(&iter->seq, data->critical_start, sym_flags);
4291 		trace_print_seq(m, &iter->seq);
4292 		seq_puts(m, "\n#  => ended at:   ");
4293 		seq_print_ip_sym(&iter->seq, data->critical_end, sym_flags);
4294 		trace_print_seq(m, &iter->seq);
4295 		seq_puts(m, "\n#\n");
4296 	}
4297 
4298 	seq_puts(m, "#\n");
4299 }
4300 
4301 static void test_cpu_buff_start(struct trace_iterator *iter)
4302 {
4303 	struct trace_seq *s = &iter->seq;
4304 	struct trace_array *tr = iter->tr;
4305 
4306 	if (!(tr->trace_flags & TRACE_ITER_ANNOTATE))
4307 		return;
4308 
4309 	if (!(iter->iter_flags & TRACE_FILE_ANNOTATE))
4310 		return;
4311 
4312 	if (cpumask_available(iter->started) &&
4313 	    cpumask_test_cpu(iter->cpu, iter->started))
4314 		return;
4315 
4316 	if (per_cpu_ptr(iter->array_buffer->data, iter->cpu)->skipped_entries)
4317 		return;
4318 
4319 	if (cpumask_available(iter->started))
4320 		cpumask_set_cpu(iter->cpu, iter->started);
4321 
4322 	/* Don't print started cpu buffer for the first entry of the trace */
4323 	if (iter->idx > 1)
4324 		trace_seq_printf(s, "##### CPU %u buffer started ####\n",
4325 				iter->cpu);
4326 }
4327 
4328 static enum print_line_t print_trace_fmt(struct trace_iterator *iter)
4329 {
4330 	struct trace_array *tr = iter->tr;
4331 	struct trace_seq *s = &iter->seq;
4332 	unsigned long sym_flags = (tr->trace_flags & TRACE_ITER_SYM_MASK);
4333 	struct trace_entry *entry;
4334 	struct trace_event *event;
4335 
4336 	entry = iter->ent;
4337 
4338 	test_cpu_buff_start(iter);
4339 
4340 	event = ftrace_find_event(entry->type);
4341 
4342 	if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
4343 		if (iter->iter_flags & TRACE_FILE_LAT_FMT)
4344 			trace_print_lat_context(iter);
4345 		else
4346 			trace_print_context(iter);
4347 	}
4348 
4349 	if (trace_seq_has_overflowed(s))
4350 		return TRACE_TYPE_PARTIAL_LINE;
4351 
4352 	if (event)
4353 		return event->funcs->trace(iter, sym_flags, event);
4354 
4355 	trace_seq_printf(s, "Unknown type %d\n", entry->type);
4356 
4357 	return trace_handle_return(s);
4358 }
4359 
4360 static enum print_line_t print_raw_fmt(struct trace_iterator *iter)
4361 {
4362 	struct trace_array *tr = iter->tr;
4363 	struct trace_seq *s = &iter->seq;
4364 	struct trace_entry *entry;
4365 	struct trace_event *event;
4366 
4367 	entry = iter->ent;
4368 
4369 	if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO)
4370 		trace_seq_printf(s, "%d %d %llu ",
4371 				 entry->pid, iter->cpu, iter->ts);
4372 
4373 	if (trace_seq_has_overflowed(s))
4374 		return TRACE_TYPE_PARTIAL_LINE;
4375 
4376 	event = ftrace_find_event(entry->type);
4377 	if (event)
4378 		return event->funcs->raw(iter, 0, event);
4379 
4380 	trace_seq_printf(s, "%d ?\n", entry->type);
4381 
4382 	return trace_handle_return(s);
4383 }
4384 
4385 static enum print_line_t print_hex_fmt(struct trace_iterator *iter)
4386 {
4387 	struct trace_array *tr = iter->tr;
4388 	struct trace_seq *s = &iter->seq;
4389 	unsigned char newline = '\n';
4390 	struct trace_entry *entry;
4391 	struct trace_event *event;
4392 
4393 	entry = iter->ent;
4394 
4395 	if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
4396 		SEQ_PUT_HEX_FIELD(s, entry->pid);
4397 		SEQ_PUT_HEX_FIELD(s, iter->cpu);
4398 		SEQ_PUT_HEX_FIELD(s, iter->ts);
4399 		if (trace_seq_has_overflowed(s))
4400 			return TRACE_TYPE_PARTIAL_LINE;
4401 	}
4402 
4403 	event = ftrace_find_event(entry->type);
4404 	if (event) {
4405 		enum print_line_t ret = event->funcs->hex(iter, 0, event);
4406 		if (ret != TRACE_TYPE_HANDLED)
4407 			return ret;
4408 	}
4409 
4410 	SEQ_PUT_FIELD(s, newline);
4411 
4412 	return trace_handle_return(s);
4413 }
4414 
4415 static enum print_line_t print_bin_fmt(struct trace_iterator *iter)
4416 {
4417 	struct trace_array *tr = iter->tr;
4418 	struct trace_seq *s = &iter->seq;
4419 	struct trace_entry *entry;
4420 	struct trace_event *event;
4421 
4422 	entry = iter->ent;
4423 
4424 	if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
4425 		SEQ_PUT_FIELD(s, entry->pid);
4426 		SEQ_PUT_FIELD(s, iter->cpu);
4427 		SEQ_PUT_FIELD(s, iter->ts);
4428 		if (trace_seq_has_overflowed(s))
4429 			return TRACE_TYPE_PARTIAL_LINE;
4430 	}
4431 
4432 	event = ftrace_find_event(entry->type);
4433 	return event ? event->funcs->binary(iter, 0, event) :
4434 		TRACE_TYPE_HANDLED;
4435 }
4436 
4437 int trace_empty(struct trace_iterator *iter)
4438 {
4439 	struct ring_buffer_iter *buf_iter;
4440 	int cpu;
4441 
4442 	/* If we are looking at one CPU buffer, only check that one */
4443 	if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
4444 		cpu = iter->cpu_file;
4445 		buf_iter = trace_buffer_iter(iter, cpu);
4446 		if (buf_iter) {
4447 			if (!ring_buffer_iter_empty(buf_iter))
4448 				return 0;
4449 		} else {
4450 			if (!ring_buffer_empty_cpu(iter->array_buffer->buffer, cpu))
4451 				return 0;
4452 		}
4453 		return 1;
4454 	}
4455 
4456 	for_each_tracing_cpu(cpu) {
4457 		buf_iter = trace_buffer_iter(iter, cpu);
4458 		if (buf_iter) {
4459 			if (!ring_buffer_iter_empty(buf_iter))
4460 				return 0;
4461 		} else {
4462 			if (!ring_buffer_empty_cpu(iter->array_buffer->buffer, cpu))
4463 				return 0;
4464 		}
4465 	}
4466 
4467 	return 1;
4468 }
4469 
4470 /*  Called with trace_event_read_lock() held. */
4471 enum print_line_t print_trace_line(struct trace_iterator *iter)
4472 {
4473 	struct trace_array *tr = iter->tr;
4474 	unsigned long trace_flags = tr->trace_flags;
4475 	enum print_line_t ret;
4476 
4477 	if (iter->lost_events) {
4478 		if (iter->lost_events == (unsigned long)-1)
4479 			trace_seq_printf(&iter->seq, "CPU:%d [LOST EVENTS]\n",
4480 					 iter->cpu);
4481 		else
4482 			trace_seq_printf(&iter->seq, "CPU:%d [LOST %lu EVENTS]\n",
4483 					 iter->cpu, iter->lost_events);
4484 		if (trace_seq_has_overflowed(&iter->seq))
4485 			return TRACE_TYPE_PARTIAL_LINE;
4486 	}
4487 
4488 	if (iter->trace && iter->trace->print_line) {
4489 		ret = iter->trace->print_line(iter);
4490 		if (ret != TRACE_TYPE_UNHANDLED)
4491 			return ret;
4492 	}
4493 
4494 	if (iter->ent->type == TRACE_BPUTS &&
4495 			trace_flags & TRACE_ITER_PRINTK &&
4496 			trace_flags & TRACE_ITER_PRINTK_MSGONLY)
4497 		return trace_print_bputs_msg_only(iter);
4498 
4499 	if (iter->ent->type == TRACE_BPRINT &&
4500 			trace_flags & TRACE_ITER_PRINTK &&
4501 			trace_flags & TRACE_ITER_PRINTK_MSGONLY)
4502 		return trace_print_bprintk_msg_only(iter);
4503 
4504 	if (iter->ent->type == TRACE_PRINT &&
4505 			trace_flags & TRACE_ITER_PRINTK &&
4506 			trace_flags & TRACE_ITER_PRINTK_MSGONLY)
4507 		return trace_print_printk_msg_only(iter);
4508 
4509 	if (trace_flags & TRACE_ITER_BIN)
4510 		return print_bin_fmt(iter);
4511 
4512 	if (trace_flags & TRACE_ITER_HEX)
4513 		return print_hex_fmt(iter);
4514 
4515 	if (trace_flags & TRACE_ITER_RAW)
4516 		return print_raw_fmt(iter);
4517 
4518 	return print_trace_fmt(iter);
4519 }
4520 
4521 void trace_latency_header(struct seq_file *m)
4522 {
4523 	struct trace_iterator *iter = m->private;
4524 	struct trace_array *tr = iter->tr;
4525 
4526 	/* print nothing if the buffers are empty */
4527 	if (trace_empty(iter))
4528 		return;
4529 
4530 	if (iter->iter_flags & TRACE_FILE_LAT_FMT)
4531 		print_trace_header(m, iter);
4532 
4533 	if (!(tr->trace_flags & TRACE_ITER_VERBOSE))
4534 		print_lat_help_header(m);
4535 }
4536 
4537 void trace_default_header(struct seq_file *m)
4538 {
4539 	struct trace_iterator *iter = m->private;
4540 	struct trace_array *tr = iter->tr;
4541 	unsigned long trace_flags = tr->trace_flags;
4542 
4543 	if (!(trace_flags & TRACE_ITER_CONTEXT_INFO))
4544 		return;
4545 
4546 	if (iter->iter_flags & TRACE_FILE_LAT_FMT) {
4547 		/* print nothing if the buffers are empty */
4548 		if (trace_empty(iter))
4549 			return;
4550 		print_trace_header(m, iter);
4551 		if (!(trace_flags & TRACE_ITER_VERBOSE))
4552 			print_lat_help_header(m);
4553 	} else {
4554 		if (!(trace_flags & TRACE_ITER_VERBOSE)) {
4555 			if (trace_flags & TRACE_ITER_IRQ_INFO)
4556 				print_func_help_header_irq(iter->array_buffer,
4557 							   m, trace_flags);
4558 			else
4559 				print_func_help_header(iter->array_buffer, m,
4560 						       trace_flags);
4561 		}
4562 	}
4563 }
4564 
4565 static void test_ftrace_alive(struct seq_file *m)
4566 {
4567 	if (!ftrace_is_dead())
4568 		return;
4569 	seq_puts(m, "# WARNING: FUNCTION TRACING IS CORRUPTED\n"
4570 		    "#          MAY BE MISSING FUNCTION EVENTS\n");
4571 }
4572 
4573 #ifdef CONFIG_TRACER_MAX_TRACE
4574 static void show_snapshot_main_help(struct seq_file *m)
4575 {
4576 	seq_puts(m, "# echo 0 > snapshot : Clears and frees snapshot buffer\n"
4577 		    "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
4578 		    "#                      Takes a snapshot of the main buffer.\n"
4579 		    "# echo 2 > snapshot : Clears snapshot buffer (but does not allocate or free)\n"
4580 		    "#                      (Doesn't have to be '2' works with any number that\n"
4581 		    "#                       is not a '0' or '1')\n");
4582 }
4583 
4584 static void show_snapshot_percpu_help(struct seq_file *m)
4585 {
4586 	seq_puts(m, "# echo 0 > snapshot : Invalid for per_cpu snapshot file.\n");
4587 #ifdef CONFIG_RING_BUFFER_ALLOW_SWAP
4588 	seq_puts(m, "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
4589 		    "#                      Takes a snapshot of the main buffer for this cpu.\n");
4590 #else
4591 	seq_puts(m, "# echo 1 > snapshot : Not supported with this kernel.\n"
4592 		    "#                     Must use main snapshot file to allocate.\n");
4593 #endif
4594 	seq_puts(m, "# echo 2 > snapshot : Clears this cpu's snapshot buffer (but does not allocate)\n"
4595 		    "#                      (Doesn't have to be '2' works with any number that\n"
4596 		    "#                       is not a '0' or '1')\n");
4597 }
4598 
4599 static void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter)
4600 {
4601 	if (iter->tr->allocated_snapshot)
4602 		seq_puts(m, "#\n# * Snapshot is allocated *\n#\n");
4603 	else
4604 		seq_puts(m, "#\n# * Snapshot is freed *\n#\n");
4605 
4606 	seq_puts(m, "# Snapshot commands:\n");
4607 	if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
4608 		show_snapshot_main_help(m);
4609 	else
4610 		show_snapshot_percpu_help(m);
4611 }
4612 #else
4613 /* Should never be called */
4614 static inline void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter) { }
4615 #endif
4616 
4617 static int s_show(struct seq_file *m, void *v)
4618 {
4619 	struct trace_iterator *iter = v;
4620 	int ret;
4621 
4622 	if (iter->ent == NULL) {
4623 		if (iter->tr) {
4624 			seq_printf(m, "# tracer: %s\n", iter->trace->name);
4625 			seq_puts(m, "#\n");
4626 			test_ftrace_alive(m);
4627 		}
4628 		if (iter->snapshot && trace_empty(iter))
4629 			print_snapshot_help(m, iter);
4630 		else if (iter->trace && iter->trace->print_header)
4631 			iter->trace->print_header(m);
4632 		else
4633 			trace_default_header(m);
4634 
4635 	} else if (iter->leftover) {
4636 		/*
4637 		 * If we filled the seq_file buffer earlier, we
4638 		 * want to just show it now.
4639 		 */
4640 		ret = trace_print_seq(m, &iter->seq);
4641 
4642 		/* ret should this time be zero, but you never know */
4643 		iter->leftover = ret;
4644 
4645 	} else {
4646 		print_trace_line(iter);
4647 		ret = trace_print_seq(m, &iter->seq);
4648 		/*
4649 		 * If we overflow the seq_file buffer, then it will
4650 		 * ask us for this data again at start up.
4651 		 * Use that instead.
4652 		 *  ret is 0 if seq_file write succeeded.
4653 		 *        -1 otherwise.
4654 		 */
4655 		iter->leftover = ret;
4656 	}
4657 
4658 	return 0;
4659 }
4660 
4661 /*
4662  * Should be used after trace_array_get(), trace_types_lock
4663  * ensures that i_cdev was already initialized.
4664  */
4665 static inline int tracing_get_cpu(struct inode *inode)
4666 {
4667 	if (inode->i_cdev) /* See trace_create_cpu_file() */
4668 		return (long)inode->i_cdev - 1;
4669 	return RING_BUFFER_ALL_CPUS;
4670 }
4671 
4672 static const struct seq_operations tracer_seq_ops = {
4673 	.start		= s_start,
4674 	.next		= s_next,
4675 	.stop		= s_stop,
4676 	.show		= s_show,
4677 };
4678 
4679 static struct trace_iterator *
4680 __tracing_open(struct inode *inode, struct file *file, bool snapshot)
4681 {
4682 	struct trace_array *tr = inode->i_private;
4683 	struct trace_iterator *iter;
4684 	int cpu;
4685 
4686 	if (tracing_disabled)
4687 		return ERR_PTR(-ENODEV);
4688 
4689 	iter = __seq_open_private(file, &tracer_seq_ops, sizeof(*iter));
4690 	if (!iter)
4691 		return ERR_PTR(-ENOMEM);
4692 
4693 	iter->buffer_iter = kcalloc(nr_cpu_ids, sizeof(*iter->buffer_iter),
4694 				    GFP_KERNEL);
4695 	if (!iter->buffer_iter)
4696 		goto release;
4697 
4698 	/*
4699 	 * trace_find_next_entry() may need to save off iter->ent.
4700 	 * It will place it into the iter->temp buffer. As most
4701 	 * events are less than 128, allocate a buffer of that size.
4702 	 * If one is greater, then trace_find_next_entry() will
4703 	 * allocate a new buffer to adjust for the bigger iter->ent.
4704 	 * It's not critical if it fails to get allocated here.
4705 	 */
4706 	iter->temp = kmalloc(128, GFP_KERNEL);
4707 	if (iter->temp)
4708 		iter->temp_size = 128;
4709 
4710 	/*
4711 	 * trace_event_printf() may need to modify given format
4712 	 * string to replace %p with %px so that it shows real address
4713 	 * instead of hash value. However, that is only for the event
4714 	 * tracing, other tracer may not need. Defer the allocation
4715 	 * until it is needed.
4716 	 */
4717 	iter->fmt = NULL;
4718 	iter->fmt_size = 0;
4719 
4720 	/*
4721 	 * We make a copy of the current tracer to avoid concurrent
4722 	 * changes on it while we are reading.
4723 	 */
4724 	mutex_lock(&trace_types_lock);
4725 	iter->trace = kzalloc(sizeof(*iter->trace), GFP_KERNEL);
4726 	if (!iter->trace)
4727 		goto fail;
4728 
4729 	*iter->trace = *tr->current_trace;
4730 
4731 	if (!zalloc_cpumask_var(&iter->started, GFP_KERNEL))
4732 		goto fail;
4733 
4734 	iter->tr = tr;
4735 
4736 #ifdef CONFIG_TRACER_MAX_TRACE
4737 	/* Currently only the top directory has a snapshot */
4738 	if (tr->current_trace->print_max || snapshot)
4739 		iter->array_buffer = &tr->max_buffer;
4740 	else
4741 #endif
4742 		iter->array_buffer = &tr->array_buffer;
4743 	iter->snapshot = snapshot;
4744 	iter->pos = -1;
4745 	iter->cpu_file = tracing_get_cpu(inode);
4746 	mutex_init(&iter->mutex);
4747 
4748 	/* Notify the tracer early; before we stop tracing. */
4749 	if (iter->trace->open)
4750 		iter->trace->open(iter);
4751 
4752 	/* Annotate start of buffers if we had overruns */
4753 	if (ring_buffer_overruns(iter->array_buffer->buffer))
4754 		iter->iter_flags |= TRACE_FILE_ANNOTATE;
4755 
4756 	/* Output in nanoseconds only if we are using a clock in nanoseconds. */
4757 	if (trace_clocks[tr->clock_id].in_ns)
4758 		iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
4759 
4760 	/*
4761 	 * If pause-on-trace is enabled, then stop the trace while
4762 	 * dumping, unless this is the "snapshot" file
4763 	 */
4764 	if (!iter->snapshot && (tr->trace_flags & TRACE_ITER_PAUSE_ON_TRACE))
4765 		tracing_stop_tr(tr);
4766 
4767 	if (iter->cpu_file == RING_BUFFER_ALL_CPUS) {
4768 		for_each_tracing_cpu(cpu) {
4769 			iter->buffer_iter[cpu] =
4770 				ring_buffer_read_prepare(iter->array_buffer->buffer,
4771 							 cpu, GFP_KERNEL);
4772 		}
4773 		ring_buffer_read_prepare_sync();
4774 		for_each_tracing_cpu(cpu) {
4775 			ring_buffer_read_start(iter->buffer_iter[cpu]);
4776 			tracing_iter_reset(iter, cpu);
4777 		}
4778 	} else {
4779 		cpu = iter->cpu_file;
4780 		iter->buffer_iter[cpu] =
4781 			ring_buffer_read_prepare(iter->array_buffer->buffer,
4782 						 cpu, GFP_KERNEL);
4783 		ring_buffer_read_prepare_sync();
4784 		ring_buffer_read_start(iter->buffer_iter[cpu]);
4785 		tracing_iter_reset(iter, cpu);
4786 	}
4787 
4788 	mutex_unlock(&trace_types_lock);
4789 
4790 	return iter;
4791 
4792  fail:
4793 	mutex_unlock(&trace_types_lock);
4794 	kfree(iter->trace);
4795 	kfree(iter->temp);
4796 	kfree(iter->buffer_iter);
4797 release:
4798 	seq_release_private(inode, file);
4799 	return ERR_PTR(-ENOMEM);
4800 }
4801 
4802 int tracing_open_generic(struct inode *inode, struct file *filp)
4803 {
4804 	int ret;
4805 
4806 	ret = tracing_check_open_get_tr(NULL);
4807 	if (ret)
4808 		return ret;
4809 
4810 	filp->private_data = inode->i_private;
4811 	return 0;
4812 }
4813 
4814 bool tracing_is_disabled(void)
4815 {
4816 	return (tracing_disabled) ? true: false;
4817 }
4818 
4819 /*
4820  * Open and update trace_array ref count.
4821  * Must have the current trace_array passed to it.
4822  */
4823 int tracing_open_generic_tr(struct inode *inode, struct file *filp)
4824 {
4825 	struct trace_array *tr = inode->i_private;
4826 	int ret;
4827 
4828 	ret = tracing_check_open_get_tr(tr);
4829 	if (ret)
4830 		return ret;
4831 
4832 	filp->private_data = inode->i_private;
4833 
4834 	return 0;
4835 }
4836 
4837 static int tracing_release(struct inode *inode, struct file *file)
4838 {
4839 	struct trace_array *tr = inode->i_private;
4840 	struct seq_file *m = file->private_data;
4841 	struct trace_iterator *iter;
4842 	int cpu;
4843 
4844 	if (!(file->f_mode & FMODE_READ)) {
4845 		trace_array_put(tr);
4846 		return 0;
4847 	}
4848 
4849 	/* Writes do not use seq_file */
4850 	iter = m->private;
4851 	mutex_lock(&trace_types_lock);
4852 
4853 	for_each_tracing_cpu(cpu) {
4854 		if (iter->buffer_iter[cpu])
4855 			ring_buffer_read_finish(iter->buffer_iter[cpu]);
4856 	}
4857 
4858 	if (iter->trace && iter->trace->close)
4859 		iter->trace->close(iter);
4860 
4861 	if (!iter->snapshot && tr->stop_count)
4862 		/* reenable tracing if it was previously enabled */
4863 		tracing_start_tr(tr);
4864 
4865 	__trace_array_put(tr);
4866 
4867 	mutex_unlock(&trace_types_lock);
4868 
4869 	mutex_destroy(&iter->mutex);
4870 	free_cpumask_var(iter->started);
4871 	kfree(iter->fmt);
4872 	kfree(iter->temp);
4873 	kfree(iter->trace);
4874 	kfree(iter->buffer_iter);
4875 	seq_release_private(inode, file);
4876 
4877 	return 0;
4878 }
4879 
4880 static int tracing_release_generic_tr(struct inode *inode, struct file *file)
4881 {
4882 	struct trace_array *tr = inode->i_private;
4883 
4884 	trace_array_put(tr);
4885 	return 0;
4886 }
4887 
4888 static int tracing_single_release_tr(struct inode *inode, struct file *file)
4889 {
4890 	struct trace_array *tr = inode->i_private;
4891 
4892 	trace_array_put(tr);
4893 
4894 	return single_release(inode, file);
4895 }
4896 
4897 static int tracing_open(struct inode *inode, struct file *file)
4898 {
4899 	struct trace_array *tr = inode->i_private;
4900 	struct trace_iterator *iter;
4901 	int ret;
4902 
4903 	ret = tracing_check_open_get_tr(tr);
4904 	if (ret)
4905 		return ret;
4906 
4907 	/* If this file was open for write, then erase contents */
4908 	if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC)) {
4909 		int cpu = tracing_get_cpu(inode);
4910 		struct array_buffer *trace_buf = &tr->array_buffer;
4911 
4912 #ifdef CONFIG_TRACER_MAX_TRACE
4913 		if (tr->current_trace->print_max)
4914 			trace_buf = &tr->max_buffer;
4915 #endif
4916 
4917 		if (cpu == RING_BUFFER_ALL_CPUS)
4918 			tracing_reset_online_cpus(trace_buf);
4919 		else
4920 			tracing_reset_cpu(trace_buf, cpu);
4921 	}
4922 
4923 	if (file->f_mode & FMODE_READ) {
4924 		iter = __tracing_open(inode, file, false);
4925 		if (IS_ERR(iter))
4926 			ret = PTR_ERR(iter);
4927 		else if (tr->trace_flags & TRACE_ITER_LATENCY_FMT)
4928 			iter->iter_flags |= TRACE_FILE_LAT_FMT;
4929 	}
4930 
4931 	if (ret < 0)
4932 		trace_array_put(tr);
4933 
4934 	return ret;
4935 }
4936 
4937 /*
4938  * Some tracers are not suitable for instance buffers.
4939  * A tracer is always available for the global array (toplevel)
4940  * or if it explicitly states that it is.
4941  */
4942 static bool
4943 trace_ok_for_array(struct tracer *t, struct trace_array *tr)
4944 {
4945 	return (tr->flags & TRACE_ARRAY_FL_GLOBAL) || t->allow_instances;
4946 }
4947 
4948 /* Find the next tracer that this trace array may use */
4949 static struct tracer *
4950 get_tracer_for_array(struct trace_array *tr, struct tracer *t)
4951 {
4952 	while (t && !trace_ok_for_array(t, tr))
4953 		t = t->next;
4954 
4955 	return t;
4956 }
4957 
4958 static void *
4959 t_next(struct seq_file *m, void *v, loff_t *pos)
4960 {
4961 	struct trace_array *tr = m->private;
4962 	struct tracer *t = v;
4963 
4964 	(*pos)++;
4965 
4966 	if (t)
4967 		t = get_tracer_for_array(tr, t->next);
4968 
4969 	return t;
4970 }
4971 
4972 static void *t_start(struct seq_file *m, loff_t *pos)
4973 {
4974 	struct trace_array *tr = m->private;
4975 	struct tracer *t;
4976 	loff_t l = 0;
4977 
4978 	mutex_lock(&trace_types_lock);
4979 
4980 	t = get_tracer_for_array(tr, trace_types);
4981 	for (; t && l < *pos; t = t_next(m, t, &l))
4982 			;
4983 
4984 	return t;
4985 }
4986 
4987 static void t_stop(struct seq_file *m, void *p)
4988 {
4989 	mutex_unlock(&trace_types_lock);
4990 }
4991 
4992 static int t_show(struct seq_file *m, void *v)
4993 {
4994 	struct tracer *t = v;
4995 
4996 	if (!t)
4997 		return 0;
4998 
4999 	seq_puts(m, t->name);
5000 	if (t->next)
5001 		seq_putc(m, ' ');
5002 	else
5003 		seq_putc(m, '\n');
5004 
5005 	return 0;
5006 }
5007 
5008 static const struct seq_operations show_traces_seq_ops = {
5009 	.start		= t_start,
5010 	.next		= t_next,
5011 	.stop		= t_stop,
5012 	.show		= t_show,
5013 };
5014 
5015 static int show_traces_open(struct inode *inode, struct file *file)
5016 {
5017 	struct trace_array *tr = inode->i_private;
5018 	struct seq_file *m;
5019 	int ret;
5020 
5021 	ret = tracing_check_open_get_tr(tr);
5022 	if (ret)
5023 		return ret;
5024 
5025 	ret = seq_open(file, &show_traces_seq_ops);
5026 	if (ret) {
5027 		trace_array_put(tr);
5028 		return ret;
5029 	}
5030 
5031 	m = file->private_data;
5032 	m->private = tr;
5033 
5034 	return 0;
5035 }
5036 
5037 static int show_traces_release(struct inode *inode, struct file *file)
5038 {
5039 	struct trace_array *tr = inode->i_private;
5040 
5041 	trace_array_put(tr);
5042 	return seq_release(inode, file);
5043 }
5044 
5045 static ssize_t
5046 tracing_write_stub(struct file *filp, const char __user *ubuf,
5047 		   size_t count, loff_t *ppos)
5048 {
5049 	return count;
5050 }
5051 
5052 loff_t tracing_lseek(struct file *file, loff_t offset, int whence)
5053 {
5054 	int ret;
5055 
5056 	if (file->f_mode & FMODE_READ)
5057 		ret = seq_lseek(file, offset, whence);
5058 	else
5059 		file->f_pos = ret = 0;
5060 
5061 	return ret;
5062 }
5063 
5064 static const struct file_operations tracing_fops = {
5065 	.open		= tracing_open,
5066 	.read		= seq_read,
5067 	.write		= tracing_write_stub,
5068 	.llseek		= tracing_lseek,
5069 	.release	= tracing_release,
5070 };
5071 
5072 static const struct file_operations show_traces_fops = {
5073 	.open		= show_traces_open,
5074 	.read		= seq_read,
5075 	.llseek		= seq_lseek,
5076 	.release	= show_traces_release,
5077 };
5078 
5079 static ssize_t
5080 tracing_cpumask_read(struct file *filp, char __user *ubuf,
5081 		     size_t count, loff_t *ppos)
5082 {
5083 	struct trace_array *tr = file_inode(filp)->i_private;
5084 	char *mask_str;
5085 	int len;
5086 
5087 	len = snprintf(NULL, 0, "%*pb\n",
5088 		       cpumask_pr_args(tr->tracing_cpumask)) + 1;
5089 	mask_str = kmalloc(len, GFP_KERNEL);
5090 	if (!mask_str)
5091 		return -ENOMEM;
5092 
5093 	len = snprintf(mask_str, len, "%*pb\n",
5094 		       cpumask_pr_args(tr->tracing_cpumask));
5095 	if (len >= count) {
5096 		count = -EINVAL;
5097 		goto out_err;
5098 	}
5099 	count = simple_read_from_buffer(ubuf, count, ppos, mask_str, len);
5100 
5101 out_err:
5102 	kfree(mask_str);
5103 
5104 	return count;
5105 }
5106 
5107 int tracing_set_cpumask(struct trace_array *tr,
5108 			cpumask_var_t tracing_cpumask_new)
5109 {
5110 	int cpu;
5111 
5112 	if (!tr)
5113 		return -EINVAL;
5114 
5115 	local_irq_disable();
5116 	arch_spin_lock(&tr->max_lock);
5117 	for_each_tracing_cpu(cpu) {
5118 		/*
5119 		 * Increase/decrease the disabled counter if we are
5120 		 * about to flip a bit in the cpumask:
5121 		 */
5122 		if (cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
5123 				!cpumask_test_cpu(cpu, tracing_cpumask_new)) {
5124 			atomic_inc(&per_cpu_ptr(tr->array_buffer.data, cpu)->disabled);
5125 			ring_buffer_record_disable_cpu(tr->array_buffer.buffer, cpu);
5126 		}
5127 		if (!cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
5128 				cpumask_test_cpu(cpu, tracing_cpumask_new)) {
5129 			atomic_dec(&per_cpu_ptr(tr->array_buffer.data, cpu)->disabled);
5130 			ring_buffer_record_enable_cpu(tr->array_buffer.buffer, cpu);
5131 		}
5132 	}
5133 	arch_spin_unlock(&tr->max_lock);
5134 	local_irq_enable();
5135 
5136 	cpumask_copy(tr->tracing_cpumask, tracing_cpumask_new);
5137 
5138 	return 0;
5139 }
5140 
5141 static ssize_t
5142 tracing_cpumask_write(struct file *filp, const char __user *ubuf,
5143 		      size_t count, loff_t *ppos)
5144 {
5145 	struct trace_array *tr = file_inode(filp)->i_private;
5146 	cpumask_var_t tracing_cpumask_new;
5147 	int err;
5148 
5149 	if (!zalloc_cpumask_var(&tracing_cpumask_new, GFP_KERNEL))
5150 		return -ENOMEM;
5151 
5152 	err = cpumask_parse_user(ubuf, count, tracing_cpumask_new);
5153 	if (err)
5154 		goto err_free;
5155 
5156 	err = tracing_set_cpumask(tr, tracing_cpumask_new);
5157 	if (err)
5158 		goto err_free;
5159 
5160 	free_cpumask_var(tracing_cpumask_new);
5161 
5162 	return count;
5163 
5164 err_free:
5165 	free_cpumask_var(tracing_cpumask_new);
5166 
5167 	return err;
5168 }
5169 
5170 static const struct file_operations tracing_cpumask_fops = {
5171 	.open		= tracing_open_generic_tr,
5172 	.read		= tracing_cpumask_read,
5173 	.write		= tracing_cpumask_write,
5174 	.release	= tracing_release_generic_tr,
5175 	.llseek		= generic_file_llseek,
5176 };
5177 
5178 static int tracing_trace_options_show(struct seq_file *m, void *v)
5179 {
5180 	struct tracer_opt *trace_opts;
5181 	struct trace_array *tr = m->private;
5182 	u32 tracer_flags;
5183 	int i;
5184 
5185 	mutex_lock(&trace_types_lock);
5186 	tracer_flags = tr->current_trace->flags->val;
5187 	trace_opts = tr->current_trace->flags->opts;
5188 
5189 	for (i = 0; trace_options[i]; i++) {
5190 		if (tr->trace_flags & (1 << i))
5191 			seq_printf(m, "%s\n", trace_options[i]);
5192 		else
5193 			seq_printf(m, "no%s\n", trace_options[i]);
5194 	}
5195 
5196 	for (i = 0; trace_opts[i].name; i++) {
5197 		if (tracer_flags & trace_opts[i].bit)
5198 			seq_printf(m, "%s\n", trace_opts[i].name);
5199 		else
5200 			seq_printf(m, "no%s\n", trace_opts[i].name);
5201 	}
5202 	mutex_unlock(&trace_types_lock);
5203 
5204 	return 0;
5205 }
5206 
5207 static int __set_tracer_option(struct trace_array *tr,
5208 			       struct tracer_flags *tracer_flags,
5209 			       struct tracer_opt *opts, int neg)
5210 {
5211 	struct tracer *trace = tracer_flags->trace;
5212 	int ret;
5213 
5214 	ret = trace->set_flag(tr, tracer_flags->val, opts->bit, !neg);
5215 	if (ret)
5216 		return ret;
5217 
5218 	if (neg)
5219 		tracer_flags->val &= ~opts->bit;
5220 	else
5221 		tracer_flags->val |= opts->bit;
5222 	return 0;
5223 }
5224 
5225 /* Try to assign a tracer specific option */
5226 static int set_tracer_option(struct trace_array *tr, char *cmp, int neg)
5227 {
5228 	struct tracer *trace = tr->current_trace;
5229 	struct tracer_flags *tracer_flags = trace->flags;
5230 	struct tracer_opt *opts = NULL;
5231 	int i;
5232 
5233 	for (i = 0; tracer_flags->opts[i].name; i++) {
5234 		opts = &tracer_flags->opts[i];
5235 
5236 		if (strcmp(cmp, opts->name) == 0)
5237 			return __set_tracer_option(tr, trace->flags, opts, neg);
5238 	}
5239 
5240 	return -EINVAL;
5241 }
5242 
5243 /* Some tracers require overwrite to stay enabled */
5244 int trace_keep_overwrite(struct tracer *tracer, u32 mask, int set)
5245 {
5246 	if (tracer->enabled && (mask & TRACE_ITER_OVERWRITE) && !set)
5247 		return -1;
5248 
5249 	return 0;
5250 }
5251 
5252 int set_tracer_flag(struct trace_array *tr, unsigned int mask, int enabled)
5253 {
5254 	int *map;
5255 
5256 	if ((mask == TRACE_ITER_RECORD_TGID) ||
5257 	    (mask == TRACE_ITER_RECORD_CMD))
5258 		lockdep_assert_held(&event_mutex);
5259 
5260 	/* do nothing if flag is already set */
5261 	if (!!(tr->trace_flags & mask) == !!enabled)
5262 		return 0;
5263 
5264 	/* Give the tracer a chance to approve the change */
5265 	if (tr->current_trace->flag_changed)
5266 		if (tr->current_trace->flag_changed(tr, mask, !!enabled))
5267 			return -EINVAL;
5268 
5269 	if (enabled)
5270 		tr->trace_flags |= mask;
5271 	else
5272 		tr->trace_flags &= ~mask;
5273 
5274 	if (mask == TRACE_ITER_RECORD_CMD)
5275 		trace_event_enable_cmd_record(enabled);
5276 
5277 	if (mask == TRACE_ITER_RECORD_TGID) {
5278 		if (!tgid_map) {
5279 			tgid_map_max = pid_max;
5280 			map = kvcalloc(tgid_map_max + 1, sizeof(*tgid_map),
5281 				       GFP_KERNEL);
5282 
5283 			/*
5284 			 * Pairs with smp_load_acquire() in
5285 			 * trace_find_tgid_ptr() to ensure that if it observes
5286 			 * the tgid_map we just allocated then it also observes
5287 			 * the corresponding tgid_map_max value.
5288 			 */
5289 			smp_store_release(&tgid_map, map);
5290 		}
5291 		if (!tgid_map) {
5292 			tr->trace_flags &= ~TRACE_ITER_RECORD_TGID;
5293 			return -ENOMEM;
5294 		}
5295 
5296 		trace_event_enable_tgid_record(enabled);
5297 	}
5298 
5299 	if (mask == TRACE_ITER_EVENT_FORK)
5300 		trace_event_follow_fork(tr, enabled);
5301 
5302 	if (mask == TRACE_ITER_FUNC_FORK)
5303 		ftrace_pid_follow_fork(tr, enabled);
5304 
5305 	if (mask == TRACE_ITER_OVERWRITE) {
5306 		ring_buffer_change_overwrite(tr->array_buffer.buffer, enabled);
5307 #ifdef CONFIG_TRACER_MAX_TRACE
5308 		ring_buffer_change_overwrite(tr->max_buffer.buffer, enabled);
5309 #endif
5310 	}
5311 
5312 	if (mask == TRACE_ITER_PRINTK) {
5313 		trace_printk_start_stop_comm(enabled);
5314 		trace_printk_control(enabled);
5315 	}
5316 
5317 	return 0;
5318 }
5319 
5320 int trace_set_options(struct trace_array *tr, char *option)
5321 {
5322 	char *cmp;
5323 	int neg = 0;
5324 	int ret;
5325 	size_t orig_len = strlen(option);
5326 	int len;
5327 
5328 	cmp = strstrip(option);
5329 
5330 	len = str_has_prefix(cmp, "no");
5331 	if (len)
5332 		neg = 1;
5333 
5334 	cmp += len;
5335 
5336 	mutex_lock(&event_mutex);
5337 	mutex_lock(&trace_types_lock);
5338 
5339 	ret = match_string(trace_options, -1, cmp);
5340 	/* If no option could be set, test the specific tracer options */
5341 	if (ret < 0)
5342 		ret = set_tracer_option(tr, cmp, neg);
5343 	else
5344 		ret = set_tracer_flag(tr, 1 << ret, !neg);
5345 
5346 	mutex_unlock(&trace_types_lock);
5347 	mutex_unlock(&event_mutex);
5348 
5349 	/*
5350 	 * If the first trailing whitespace is replaced with '\0' by strstrip,
5351 	 * turn it back into a space.
5352 	 */
5353 	if (orig_len > strlen(option))
5354 		option[strlen(option)] = ' ';
5355 
5356 	return ret;
5357 }
5358 
5359 static void __init apply_trace_boot_options(void)
5360 {
5361 	char *buf = trace_boot_options_buf;
5362 	char *option;
5363 
5364 	while (true) {
5365 		option = strsep(&buf, ",");
5366 
5367 		if (!option)
5368 			break;
5369 
5370 		if (*option)
5371 			trace_set_options(&global_trace, option);
5372 
5373 		/* Put back the comma to allow this to be called again */
5374 		if (buf)
5375 			*(buf - 1) = ',';
5376 	}
5377 }
5378 
5379 static ssize_t
5380 tracing_trace_options_write(struct file *filp, const char __user *ubuf,
5381 			size_t cnt, loff_t *ppos)
5382 {
5383 	struct seq_file *m = filp->private_data;
5384 	struct trace_array *tr = m->private;
5385 	char buf[64];
5386 	int ret;
5387 
5388 	if (cnt >= sizeof(buf))
5389 		return -EINVAL;
5390 
5391 	if (copy_from_user(buf, ubuf, cnt))
5392 		return -EFAULT;
5393 
5394 	buf[cnt] = 0;
5395 
5396 	ret = trace_set_options(tr, buf);
5397 	if (ret < 0)
5398 		return ret;
5399 
5400 	*ppos += cnt;
5401 
5402 	return cnt;
5403 }
5404 
5405 static int tracing_trace_options_open(struct inode *inode, struct file *file)
5406 {
5407 	struct trace_array *tr = inode->i_private;
5408 	int ret;
5409 
5410 	ret = tracing_check_open_get_tr(tr);
5411 	if (ret)
5412 		return ret;
5413 
5414 	ret = single_open(file, tracing_trace_options_show, inode->i_private);
5415 	if (ret < 0)
5416 		trace_array_put(tr);
5417 
5418 	return ret;
5419 }
5420 
5421 static const struct file_operations tracing_iter_fops = {
5422 	.open		= tracing_trace_options_open,
5423 	.read		= seq_read,
5424 	.llseek		= seq_lseek,
5425 	.release	= tracing_single_release_tr,
5426 	.write		= tracing_trace_options_write,
5427 };
5428 
5429 static const char readme_msg[] =
5430 	"tracing mini-HOWTO:\n\n"
5431 	"# echo 0 > tracing_on : quick way to disable tracing\n"
5432 	"# echo 1 > tracing_on : quick way to re-enable tracing\n\n"
5433 	" Important files:\n"
5434 	"  trace\t\t\t- The static contents of the buffer\n"
5435 	"\t\t\t  To clear the buffer write into this file: echo > trace\n"
5436 	"  trace_pipe\t\t- A consuming read to see the contents of the buffer\n"
5437 	"  current_tracer\t- function and latency tracers\n"
5438 	"  available_tracers\t- list of configured tracers for current_tracer\n"
5439 	"  error_log\t- error log for failed commands (that support it)\n"
5440 	"  buffer_size_kb\t- view and modify size of per cpu buffer\n"
5441 	"  buffer_total_size_kb  - view total size of all cpu buffers\n\n"
5442 	"  trace_clock\t\t-change the clock used to order events\n"
5443 	"       local:   Per cpu clock but may not be synced across CPUs\n"
5444 	"      global:   Synced across CPUs but slows tracing down.\n"
5445 	"     counter:   Not a clock, but just an increment\n"
5446 	"      uptime:   Jiffy counter from time of boot\n"
5447 	"        perf:   Same clock that perf events use\n"
5448 #ifdef CONFIG_X86_64
5449 	"     x86-tsc:   TSC cycle counter\n"
5450 #endif
5451 	"\n  timestamp_mode\t-view the mode used to timestamp events\n"
5452 	"       delta:   Delta difference against a buffer-wide timestamp\n"
5453 	"    absolute:   Absolute (standalone) timestamp\n"
5454 	"\n  trace_marker\t\t- Writes into this file writes into the kernel buffer\n"
5455 	"\n  trace_marker_raw\t\t- Writes into this file writes binary data into the kernel buffer\n"
5456 	"  tracing_cpumask\t- Limit which CPUs to trace\n"
5457 	"  instances\t\t- Make sub-buffers with: mkdir instances/foo\n"
5458 	"\t\t\t  Remove sub-buffer with rmdir\n"
5459 	"  trace_options\t\t- Set format or modify how tracing happens\n"
5460 	"\t\t\t  Disable an option by prefixing 'no' to the\n"
5461 	"\t\t\t  option name\n"
5462 	"  saved_cmdlines_size\t- echo command number in here to store comm-pid list\n"
5463 #ifdef CONFIG_DYNAMIC_FTRACE
5464 	"\n  available_filter_functions - list of functions that can be filtered on\n"
5465 	"  set_ftrace_filter\t- echo function name in here to only trace these\n"
5466 	"\t\t\t  functions\n"
5467 	"\t     accepts: func_full_name or glob-matching-pattern\n"
5468 	"\t     modules: Can select a group via module\n"
5469 	"\t      Format: :mod:<module-name>\n"
5470 	"\t     example: echo :mod:ext3 > set_ftrace_filter\n"
5471 	"\t    triggers: a command to perform when function is hit\n"
5472 	"\t      Format: <function>:<trigger>[:count]\n"
5473 	"\t     trigger: traceon, traceoff\n"
5474 	"\t\t      enable_event:<system>:<event>\n"
5475 	"\t\t      disable_event:<system>:<event>\n"
5476 #ifdef CONFIG_STACKTRACE
5477 	"\t\t      stacktrace\n"
5478 #endif
5479 #ifdef CONFIG_TRACER_SNAPSHOT
5480 	"\t\t      snapshot\n"
5481 #endif
5482 	"\t\t      dump\n"
5483 	"\t\t      cpudump\n"
5484 	"\t     example: echo do_fault:traceoff > set_ftrace_filter\n"
5485 	"\t              echo do_trap:traceoff:3 > set_ftrace_filter\n"
5486 	"\t     The first one will disable tracing every time do_fault is hit\n"
5487 	"\t     The second will disable tracing at most 3 times when do_trap is hit\n"
5488 	"\t       The first time do trap is hit and it disables tracing, the\n"
5489 	"\t       counter will decrement to 2. If tracing is already disabled,\n"
5490 	"\t       the counter will not decrement. It only decrements when the\n"
5491 	"\t       trigger did work\n"
5492 	"\t     To remove trigger without count:\n"
5493 	"\t       echo '!<function>:<trigger> > set_ftrace_filter\n"
5494 	"\t     To remove trigger with a count:\n"
5495 	"\t       echo '!<function>:<trigger>:0 > set_ftrace_filter\n"
5496 	"  set_ftrace_notrace\t- echo function name in here to never trace.\n"
5497 	"\t    accepts: func_full_name, *func_end, func_begin*, *func_middle*\n"
5498 	"\t    modules: Can select a group via module command :mod:\n"
5499 	"\t    Does not accept triggers\n"
5500 #endif /* CONFIG_DYNAMIC_FTRACE */
5501 #ifdef CONFIG_FUNCTION_TRACER
5502 	"  set_ftrace_pid\t- Write pid(s) to only function trace those pids\n"
5503 	"\t\t    (function)\n"
5504 	"  set_ftrace_notrace_pid\t- Write pid(s) to not function trace those pids\n"
5505 	"\t\t    (function)\n"
5506 #endif
5507 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
5508 	"  set_graph_function\t- Trace the nested calls of a function (function_graph)\n"
5509 	"  set_graph_notrace\t- Do not trace the nested calls of a function (function_graph)\n"
5510 	"  max_graph_depth\t- Trace a limited depth of nested calls (0 is unlimited)\n"
5511 #endif
5512 #ifdef CONFIG_TRACER_SNAPSHOT
5513 	"\n  snapshot\t\t- Like 'trace' but shows the content of the static\n"
5514 	"\t\t\t  snapshot buffer. Read the contents for more\n"
5515 	"\t\t\t  information\n"
5516 #endif
5517 #ifdef CONFIG_STACK_TRACER
5518 	"  stack_trace\t\t- Shows the max stack trace when active\n"
5519 	"  stack_max_size\t- Shows current max stack size that was traced\n"
5520 	"\t\t\t  Write into this file to reset the max size (trigger a\n"
5521 	"\t\t\t  new trace)\n"
5522 #ifdef CONFIG_DYNAMIC_FTRACE
5523 	"  stack_trace_filter\t- Like set_ftrace_filter but limits what stack_trace\n"
5524 	"\t\t\t  traces\n"
5525 #endif
5526 #endif /* CONFIG_STACK_TRACER */
5527 #ifdef CONFIG_DYNAMIC_EVENTS
5528 	"  dynamic_events\t\t- Create/append/remove/show the generic dynamic events\n"
5529 	"\t\t\t  Write into this file to define/undefine new trace events.\n"
5530 #endif
5531 #ifdef CONFIG_KPROBE_EVENTS
5532 	"  kprobe_events\t\t- Create/append/remove/show the kernel dynamic events\n"
5533 	"\t\t\t  Write into this file to define/undefine new trace events.\n"
5534 #endif
5535 #ifdef CONFIG_UPROBE_EVENTS
5536 	"  uprobe_events\t\t- Create/append/remove/show the userspace dynamic events\n"
5537 	"\t\t\t  Write into this file to define/undefine new trace events.\n"
5538 #endif
5539 #if defined(CONFIG_KPROBE_EVENTS) || defined(CONFIG_UPROBE_EVENTS)
5540 	"\t  accepts: event-definitions (one definition per line)\n"
5541 	"\t   Format: p[:[<group>/]<event>] <place> [<args>]\n"
5542 	"\t           r[maxactive][:[<group>/]<event>] <place> [<args>]\n"
5543 #ifdef CONFIG_HIST_TRIGGERS
5544 	"\t           s:[synthetic/]<event> <field> [<field>]\n"
5545 #endif
5546 	"\t           e[:[<group>/]<event>] <attached-group>.<attached-event> [<args>]\n"
5547 	"\t           -:[<group>/]<event>\n"
5548 #ifdef CONFIG_KPROBE_EVENTS
5549 	"\t    place: [<module>:]<symbol>[+<offset>]|<memaddr>\n"
5550   "place (kretprobe): [<module>:]<symbol>[+<offset>]%return|<memaddr>\n"
5551 #endif
5552 #ifdef CONFIG_UPROBE_EVENTS
5553   "   place (uprobe): <path>:<offset>[%return][(ref_ctr_offset)]\n"
5554 #endif
5555 	"\t     args: <name>=fetcharg[:type]\n"
5556 	"\t fetcharg: (%<register>|$<efield>), @<address>, @<symbol>[+|-<offset>],\n"
5557 #ifdef CONFIG_HAVE_FUNCTION_ARG_ACCESS_API
5558 	"\t           $stack<index>, $stack, $retval, $comm, $arg<N>,\n"
5559 #else
5560 	"\t           $stack<index>, $stack, $retval, $comm,\n"
5561 #endif
5562 	"\t           +|-[u]<offset>(<fetcharg>), \\imm-value, \\\"imm-string\"\n"
5563 	"\t     type: s8/16/32/64, u8/16/32/64, x8/16/32/64, string, symbol,\n"
5564 	"\t           b<bit-width>@<bit-offset>/<container-size>, ustring,\n"
5565 	"\t           <type>\\[<array-size>\\]\n"
5566 #ifdef CONFIG_HIST_TRIGGERS
5567 	"\t    field: <stype> <name>;\n"
5568 	"\t    stype: u8/u16/u32/u64, s8/s16/s32/s64, pid_t,\n"
5569 	"\t           [unsigned] char/int/long\n"
5570 #endif
5571 	"\t    efield: For event probes ('e' types), the field is on of the fields\n"
5572 	"\t            of the <attached-group>/<attached-event>.\n"
5573 #endif
5574 	"  events/\t\t- Directory containing all trace event subsystems:\n"
5575 	"      enable\t\t- Write 0/1 to enable/disable tracing of all events\n"
5576 	"  events/<system>/\t- Directory containing all trace events for <system>:\n"
5577 	"      enable\t\t- Write 0/1 to enable/disable tracing of all <system>\n"
5578 	"\t\t\t  events\n"
5579 	"      filter\t\t- If set, only events passing filter are traced\n"
5580 	"  events/<system>/<event>/\t- Directory containing control files for\n"
5581 	"\t\t\t  <event>:\n"
5582 	"      enable\t\t- Write 0/1 to enable/disable tracing of <event>\n"
5583 	"      filter\t\t- If set, only events passing filter are traced\n"
5584 	"      trigger\t\t- If set, a command to perform when event is hit\n"
5585 	"\t    Format: <trigger>[:count][if <filter>]\n"
5586 	"\t   trigger: traceon, traceoff\n"
5587 	"\t            enable_event:<system>:<event>\n"
5588 	"\t            disable_event:<system>:<event>\n"
5589 #ifdef CONFIG_HIST_TRIGGERS
5590 	"\t            enable_hist:<system>:<event>\n"
5591 	"\t            disable_hist:<system>:<event>\n"
5592 #endif
5593 #ifdef CONFIG_STACKTRACE
5594 	"\t\t    stacktrace\n"
5595 #endif
5596 #ifdef CONFIG_TRACER_SNAPSHOT
5597 	"\t\t    snapshot\n"
5598 #endif
5599 #ifdef CONFIG_HIST_TRIGGERS
5600 	"\t\t    hist (see below)\n"
5601 #endif
5602 	"\t   example: echo traceoff > events/block/block_unplug/trigger\n"
5603 	"\t            echo traceoff:3 > events/block/block_unplug/trigger\n"
5604 	"\t            echo 'enable_event:kmem:kmalloc:3 if nr_rq > 1' > \\\n"
5605 	"\t                  events/block/block_unplug/trigger\n"
5606 	"\t   The first disables tracing every time block_unplug is hit.\n"
5607 	"\t   The second disables tracing the first 3 times block_unplug is hit.\n"
5608 	"\t   The third enables the kmalloc event the first 3 times block_unplug\n"
5609 	"\t     is hit and has value of greater than 1 for the 'nr_rq' event field.\n"
5610 	"\t   Like function triggers, the counter is only decremented if it\n"
5611 	"\t    enabled or disabled tracing.\n"
5612 	"\t   To remove a trigger without a count:\n"
5613 	"\t     echo '!<trigger> > <system>/<event>/trigger\n"
5614 	"\t   To remove a trigger with a count:\n"
5615 	"\t     echo '!<trigger>:0 > <system>/<event>/trigger\n"
5616 	"\t   Filters can be ignored when removing a trigger.\n"
5617 #ifdef CONFIG_HIST_TRIGGERS
5618 	"      hist trigger\t- If set, event hits are aggregated into a hash table\n"
5619 	"\t    Format: hist:keys=<field1[,field2,...]>\n"
5620 	"\t            [:<var1>=<field|var_ref|numeric_literal>[,<var2>=...]]\n"
5621 	"\t            [:values=<field1[,field2,...]>]\n"
5622 	"\t            [:sort=<field1[,field2,...]>]\n"
5623 	"\t            [:size=#entries]\n"
5624 	"\t            [:pause][:continue][:clear]\n"
5625 	"\t            [:name=histname1]\n"
5626 	"\t            [:<handler>.<action>]\n"
5627 	"\t            [if <filter>]\n\n"
5628 	"\t    Note, special fields can be used as well:\n"
5629 	"\t            common_timestamp - to record current timestamp\n"
5630 	"\t            common_cpu - to record the CPU the event happened on\n"
5631 	"\n"
5632 	"\t    A hist trigger variable can be:\n"
5633 	"\t        - a reference to a field e.g. x=current_timestamp,\n"
5634 	"\t        - a reference to another variable e.g. y=$x,\n"
5635 	"\t        - a numeric literal: e.g. ms_per_sec=1000,\n"
5636 	"\t        - an arithmetic expression: e.g. time_secs=current_timestamp/1000\n"
5637 	"\n"
5638 	"\t    hist trigger aritmethic expressions support addition(+), subtraction(-),\n"
5639 	"\t    multiplication(*) and division(/) operators. An operand can be either a\n"
5640 	"\t    variable reference, field or numeric literal.\n"
5641 	"\n"
5642 	"\t    When a matching event is hit, an entry is added to a hash\n"
5643 	"\t    table using the key(s) and value(s) named, and the value of a\n"
5644 	"\t    sum called 'hitcount' is incremented.  Keys and values\n"
5645 	"\t    correspond to fields in the event's format description.  Keys\n"
5646 	"\t    can be any field, or the special string 'stacktrace'.\n"
5647 	"\t    Compound keys consisting of up to two fields can be specified\n"
5648 	"\t    by the 'keys' keyword.  Values must correspond to numeric\n"
5649 	"\t    fields.  Sort keys consisting of up to two fields can be\n"
5650 	"\t    specified using the 'sort' keyword.  The sort direction can\n"
5651 	"\t    be modified by appending '.descending' or '.ascending' to a\n"
5652 	"\t    sort field.  The 'size' parameter can be used to specify more\n"
5653 	"\t    or fewer than the default 2048 entries for the hashtable size.\n"
5654 	"\t    If a hist trigger is given a name using the 'name' parameter,\n"
5655 	"\t    its histogram data will be shared with other triggers of the\n"
5656 	"\t    same name, and trigger hits will update this common data.\n\n"
5657 	"\t    Reading the 'hist' file for the event will dump the hash\n"
5658 	"\t    table in its entirety to stdout.  If there are multiple hist\n"
5659 	"\t    triggers attached to an event, there will be a table for each\n"
5660 	"\t    trigger in the output.  The table displayed for a named\n"
5661 	"\t    trigger will be the same as any other instance having the\n"
5662 	"\t    same name.  The default format used to display a given field\n"
5663 	"\t    can be modified by appending any of the following modifiers\n"
5664 	"\t    to the field name, as applicable:\n\n"
5665 	"\t            .hex        display a number as a hex value\n"
5666 	"\t            .sym        display an address as a symbol\n"
5667 	"\t            .sym-offset display an address as a symbol and offset\n"
5668 	"\t            .execname   display a common_pid as a program name\n"
5669 	"\t            .syscall    display a syscall id as a syscall name\n"
5670 	"\t            .log2       display log2 value rather than raw number\n"
5671 	"\t            .buckets=size  display values in groups of size rather than raw number\n"
5672 	"\t            .usecs      display a common_timestamp in microseconds\n\n"
5673 	"\t    The 'pause' parameter can be used to pause an existing hist\n"
5674 	"\t    trigger or to start a hist trigger but not log any events\n"
5675 	"\t    until told to do so.  'continue' can be used to start or\n"
5676 	"\t    restart a paused hist trigger.\n\n"
5677 	"\t    The 'clear' parameter will clear the contents of a running\n"
5678 	"\t    hist trigger and leave its current paused/active state\n"
5679 	"\t    unchanged.\n\n"
5680 	"\t    The enable_hist and disable_hist triggers can be used to\n"
5681 	"\t    have one event conditionally start and stop another event's\n"
5682 	"\t    already-attached hist trigger.  The syntax is analogous to\n"
5683 	"\t    the enable_event and disable_event triggers.\n\n"
5684 	"\t    Hist trigger handlers and actions are executed whenever a\n"
5685 	"\t    a histogram entry is added or updated.  They take the form:\n\n"
5686 	"\t        <handler>.<action>\n\n"
5687 	"\t    The available handlers are:\n\n"
5688 	"\t        onmatch(matching.event)  - invoke on addition or update\n"
5689 	"\t        onmax(var)               - invoke if var exceeds current max\n"
5690 	"\t        onchange(var)            - invoke action if var changes\n\n"
5691 	"\t    The available actions are:\n\n"
5692 	"\t        trace(<synthetic_event>,param list)  - generate synthetic event\n"
5693 	"\t        save(field,...)                      - save current event fields\n"
5694 #ifdef CONFIG_TRACER_SNAPSHOT
5695 	"\t        snapshot()                           - snapshot the trace buffer\n\n"
5696 #endif
5697 #ifdef CONFIG_SYNTH_EVENTS
5698 	"  events/synthetic_events\t- Create/append/remove/show synthetic events\n"
5699 	"\t  Write into this file to define/undefine new synthetic events.\n"
5700 	"\t     example: echo 'myevent u64 lat; char name[]' >> synthetic_events\n"
5701 #endif
5702 #endif
5703 ;
5704 
5705 static ssize_t
5706 tracing_readme_read(struct file *filp, char __user *ubuf,
5707 		       size_t cnt, loff_t *ppos)
5708 {
5709 	return simple_read_from_buffer(ubuf, cnt, ppos,
5710 					readme_msg, strlen(readme_msg));
5711 }
5712 
5713 static const struct file_operations tracing_readme_fops = {
5714 	.open		= tracing_open_generic,
5715 	.read		= tracing_readme_read,
5716 	.llseek		= generic_file_llseek,
5717 };
5718 
5719 static void *saved_tgids_next(struct seq_file *m, void *v, loff_t *pos)
5720 {
5721 	int pid = ++(*pos);
5722 
5723 	return trace_find_tgid_ptr(pid);
5724 }
5725 
5726 static void *saved_tgids_start(struct seq_file *m, loff_t *pos)
5727 {
5728 	int pid = *pos;
5729 
5730 	return trace_find_tgid_ptr(pid);
5731 }
5732 
5733 static void saved_tgids_stop(struct seq_file *m, void *v)
5734 {
5735 }
5736 
5737 static int saved_tgids_show(struct seq_file *m, void *v)
5738 {
5739 	int *entry = (int *)v;
5740 	int pid = entry - tgid_map;
5741 	int tgid = *entry;
5742 
5743 	if (tgid == 0)
5744 		return SEQ_SKIP;
5745 
5746 	seq_printf(m, "%d %d\n", pid, tgid);
5747 	return 0;
5748 }
5749 
5750 static const struct seq_operations tracing_saved_tgids_seq_ops = {
5751 	.start		= saved_tgids_start,
5752 	.stop		= saved_tgids_stop,
5753 	.next		= saved_tgids_next,
5754 	.show		= saved_tgids_show,
5755 };
5756 
5757 static int tracing_saved_tgids_open(struct inode *inode, struct file *filp)
5758 {
5759 	int ret;
5760 
5761 	ret = tracing_check_open_get_tr(NULL);
5762 	if (ret)
5763 		return ret;
5764 
5765 	return seq_open(filp, &tracing_saved_tgids_seq_ops);
5766 }
5767 
5768 
5769 static const struct file_operations tracing_saved_tgids_fops = {
5770 	.open		= tracing_saved_tgids_open,
5771 	.read		= seq_read,
5772 	.llseek		= seq_lseek,
5773 	.release	= seq_release,
5774 };
5775 
5776 static void *saved_cmdlines_next(struct seq_file *m, void *v, loff_t *pos)
5777 {
5778 	unsigned int *ptr = v;
5779 
5780 	if (*pos || m->count)
5781 		ptr++;
5782 
5783 	(*pos)++;
5784 
5785 	for (; ptr < &savedcmd->map_cmdline_to_pid[savedcmd->cmdline_num];
5786 	     ptr++) {
5787 		if (*ptr == -1 || *ptr == NO_CMDLINE_MAP)
5788 			continue;
5789 
5790 		return ptr;
5791 	}
5792 
5793 	return NULL;
5794 }
5795 
5796 static void *saved_cmdlines_start(struct seq_file *m, loff_t *pos)
5797 {
5798 	void *v;
5799 	loff_t l = 0;
5800 
5801 	preempt_disable();
5802 	arch_spin_lock(&trace_cmdline_lock);
5803 
5804 	v = &savedcmd->map_cmdline_to_pid[0];
5805 	while (l <= *pos) {
5806 		v = saved_cmdlines_next(m, v, &l);
5807 		if (!v)
5808 			return NULL;
5809 	}
5810 
5811 	return v;
5812 }
5813 
5814 static void saved_cmdlines_stop(struct seq_file *m, void *v)
5815 {
5816 	arch_spin_unlock(&trace_cmdline_lock);
5817 	preempt_enable();
5818 }
5819 
5820 static int saved_cmdlines_show(struct seq_file *m, void *v)
5821 {
5822 	char buf[TASK_COMM_LEN];
5823 	unsigned int *pid = v;
5824 
5825 	__trace_find_cmdline(*pid, buf);
5826 	seq_printf(m, "%d %s\n", *pid, buf);
5827 	return 0;
5828 }
5829 
5830 static const struct seq_operations tracing_saved_cmdlines_seq_ops = {
5831 	.start		= saved_cmdlines_start,
5832 	.next		= saved_cmdlines_next,
5833 	.stop		= saved_cmdlines_stop,
5834 	.show		= saved_cmdlines_show,
5835 };
5836 
5837 static int tracing_saved_cmdlines_open(struct inode *inode, struct file *filp)
5838 {
5839 	int ret;
5840 
5841 	ret = tracing_check_open_get_tr(NULL);
5842 	if (ret)
5843 		return ret;
5844 
5845 	return seq_open(filp, &tracing_saved_cmdlines_seq_ops);
5846 }
5847 
5848 static const struct file_operations tracing_saved_cmdlines_fops = {
5849 	.open		= tracing_saved_cmdlines_open,
5850 	.read		= seq_read,
5851 	.llseek		= seq_lseek,
5852 	.release	= seq_release,
5853 };
5854 
5855 static ssize_t
5856 tracing_saved_cmdlines_size_read(struct file *filp, char __user *ubuf,
5857 				 size_t cnt, loff_t *ppos)
5858 {
5859 	char buf[64];
5860 	int r;
5861 
5862 	arch_spin_lock(&trace_cmdline_lock);
5863 	r = scnprintf(buf, sizeof(buf), "%u\n", savedcmd->cmdline_num);
5864 	arch_spin_unlock(&trace_cmdline_lock);
5865 
5866 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
5867 }
5868 
5869 static void free_saved_cmdlines_buffer(struct saved_cmdlines_buffer *s)
5870 {
5871 	kfree(s->saved_cmdlines);
5872 	kfree(s->map_cmdline_to_pid);
5873 	kfree(s);
5874 }
5875 
5876 static int tracing_resize_saved_cmdlines(unsigned int val)
5877 {
5878 	struct saved_cmdlines_buffer *s, *savedcmd_temp;
5879 
5880 	s = kmalloc(sizeof(*s), GFP_KERNEL);
5881 	if (!s)
5882 		return -ENOMEM;
5883 
5884 	if (allocate_cmdlines_buffer(val, s) < 0) {
5885 		kfree(s);
5886 		return -ENOMEM;
5887 	}
5888 
5889 	arch_spin_lock(&trace_cmdline_lock);
5890 	savedcmd_temp = savedcmd;
5891 	savedcmd = s;
5892 	arch_spin_unlock(&trace_cmdline_lock);
5893 	free_saved_cmdlines_buffer(savedcmd_temp);
5894 
5895 	return 0;
5896 }
5897 
5898 static ssize_t
5899 tracing_saved_cmdlines_size_write(struct file *filp, const char __user *ubuf,
5900 				  size_t cnt, loff_t *ppos)
5901 {
5902 	unsigned long val;
5903 	int ret;
5904 
5905 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
5906 	if (ret)
5907 		return ret;
5908 
5909 	/* must have at least 1 entry or less than PID_MAX_DEFAULT */
5910 	if (!val || val > PID_MAX_DEFAULT)
5911 		return -EINVAL;
5912 
5913 	ret = tracing_resize_saved_cmdlines((unsigned int)val);
5914 	if (ret < 0)
5915 		return ret;
5916 
5917 	*ppos += cnt;
5918 
5919 	return cnt;
5920 }
5921 
5922 static const struct file_operations tracing_saved_cmdlines_size_fops = {
5923 	.open		= tracing_open_generic,
5924 	.read		= tracing_saved_cmdlines_size_read,
5925 	.write		= tracing_saved_cmdlines_size_write,
5926 };
5927 
5928 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
5929 static union trace_eval_map_item *
5930 update_eval_map(union trace_eval_map_item *ptr)
5931 {
5932 	if (!ptr->map.eval_string) {
5933 		if (ptr->tail.next) {
5934 			ptr = ptr->tail.next;
5935 			/* Set ptr to the next real item (skip head) */
5936 			ptr++;
5937 		} else
5938 			return NULL;
5939 	}
5940 	return ptr;
5941 }
5942 
5943 static void *eval_map_next(struct seq_file *m, void *v, loff_t *pos)
5944 {
5945 	union trace_eval_map_item *ptr = v;
5946 
5947 	/*
5948 	 * Paranoid! If ptr points to end, we don't want to increment past it.
5949 	 * This really should never happen.
5950 	 */
5951 	(*pos)++;
5952 	ptr = update_eval_map(ptr);
5953 	if (WARN_ON_ONCE(!ptr))
5954 		return NULL;
5955 
5956 	ptr++;
5957 	ptr = update_eval_map(ptr);
5958 
5959 	return ptr;
5960 }
5961 
5962 static void *eval_map_start(struct seq_file *m, loff_t *pos)
5963 {
5964 	union trace_eval_map_item *v;
5965 	loff_t l = 0;
5966 
5967 	mutex_lock(&trace_eval_mutex);
5968 
5969 	v = trace_eval_maps;
5970 	if (v)
5971 		v++;
5972 
5973 	while (v && l < *pos) {
5974 		v = eval_map_next(m, v, &l);
5975 	}
5976 
5977 	return v;
5978 }
5979 
5980 static void eval_map_stop(struct seq_file *m, void *v)
5981 {
5982 	mutex_unlock(&trace_eval_mutex);
5983 }
5984 
5985 static int eval_map_show(struct seq_file *m, void *v)
5986 {
5987 	union trace_eval_map_item *ptr = v;
5988 
5989 	seq_printf(m, "%s %ld (%s)\n",
5990 		   ptr->map.eval_string, ptr->map.eval_value,
5991 		   ptr->map.system);
5992 
5993 	return 0;
5994 }
5995 
5996 static const struct seq_operations tracing_eval_map_seq_ops = {
5997 	.start		= eval_map_start,
5998 	.next		= eval_map_next,
5999 	.stop		= eval_map_stop,
6000 	.show		= eval_map_show,
6001 };
6002 
6003 static int tracing_eval_map_open(struct inode *inode, struct file *filp)
6004 {
6005 	int ret;
6006 
6007 	ret = tracing_check_open_get_tr(NULL);
6008 	if (ret)
6009 		return ret;
6010 
6011 	return seq_open(filp, &tracing_eval_map_seq_ops);
6012 }
6013 
6014 static const struct file_operations tracing_eval_map_fops = {
6015 	.open		= tracing_eval_map_open,
6016 	.read		= seq_read,
6017 	.llseek		= seq_lseek,
6018 	.release	= seq_release,
6019 };
6020 
6021 static inline union trace_eval_map_item *
6022 trace_eval_jmp_to_tail(union trace_eval_map_item *ptr)
6023 {
6024 	/* Return tail of array given the head */
6025 	return ptr + ptr->head.length + 1;
6026 }
6027 
6028 static void
6029 trace_insert_eval_map_file(struct module *mod, struct trace_eval_map **start,
6030 			   int len)
6031 {
6032 	struct trace_eval_map **stop;
6033 	struct trace_eval_map **map;
6034 	union trace_eval_map_item *map_array;
6035 	union trace_eval_map_item *ptr;
6036 
6037 	stop = start + len;
6038 
6039 	/*
6040 	 * The trace_eval_maps contains the map plus a head and tail item,
6041 	 * where the head holds the module and length of array, and the
6042 	 * tail holds a pointer to the next list.
6043 	 */
6044 	map_array = kmalloc_array(len + 2, sizeof(*map_array), GFP_KERNEL);
6045 	if (!map_array) {
6046 		pr_warn("Unable to allocate trace eval mapping\n");
6047 		return;
6048 	}
6049 
6050 	mutex_lock(&trace_eval_mutex);
6051 
6052 	if (!trace_eval_maps)
6053 		trace_eval_maps = map_array;
6054 	else {
6055 		ptr = trace_eval_maps;
6056 		for (;;) {
6057 			ptr = trace_eval_jmp_to_tail(ptr);
6058 			if (!ptr->tail.next)
6059 				break;
6060 			ptr = ptr->tail.next;
6061 
6062 		}
6063 		ptr->tail.next = map_array;
6064 	}
6065 	map_array->head.mod = mod;
6066 	map_array->head.length = len;
6067 	map_array++;
6068 
6069 	for (map = start; (unsigned long)map < (unsigned long)stop; map++) {
6070 		map_array->map = **map;
6071 		map_array++;
6072 	}
6073 	memset(map_array, 0, sizeof(*map_array));
6074 
6075 	mutex_unlock(&trace_eval_mutex);
6076 }
6077 
6078 static void trace_create_eval_file(struct dentry *d_tracer)
6079 {
6080 	trace_create_file("eval_map", TRACE_MODE_READ, d_tracer,
6081 			  NULL, &tracing_eval_map_fops);
6082 }
6083 
6084 #else /* CONFIG_TRACE_EVAL_MAP_FILE */
6085 static inline void trace_create_eval_file(struct dentry *d_tracer) { }
6086 static inline void trace_insert_eval_map_file(struct module *mod,
6087 			      struct trace_eval_map **start, int len) { }
6088 #endif /* !CONFIG_TRACE_EVAL_MAP_FILE */
6089 
6090 static void trace_insert_eval_map(struct module *mod,
6091 				  struct trace_eval_map **start, int len)
6092 {
6093 	struct trace_eval_map **map;
6094 
6095 	if (len <= 0)
6096 		return;
6097 
6098 	map = start;
6099 
6100 	trace_event_eval_update(map, len);
6101 
6102 	trace_insert_eval_map_file(mod, start, len);
6103 }
6104 
6105 static ssize_t
6106 tracing_set_trace_read(struct file *filp, char __user *ubuf,
6107 		       size_t cnt, loff_t *ppos)
6108 {
6109 	struct trace_array *tr = filp->private_data;
6110 	char buf[MAX_TRACER_SIZE+2];
6111 	int r;
6112 
6113 	mutex_lock(&trace_types_lock);
6114 	r = sprintf(buf, "%s\n", tr->current_trace->name);
6115 	mutex_unlock(&trace_types_lock);
6116 
6117 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6118 }
6119 
6120 int tracer_init(struct tracer *t, struct trace_array *tr)
6121 {
6122 	tracing_reset_online_cpus(&tr->array_buffer);
6123 	return t->init(tr);
6124 }
6125 
6126 static void set_buffer_entries(struct array_buffer *buf, unsigned long val)
6127 {
6128 	int cpu;
6129 
6130 	for_each_tracing_cpu(cpu)
6131 		per_cpu_ptr(buf->data, cpu)->entries = val;
6132 }
6133 
6134 #ifdef CONFIG_TRACER_MAX_TRACE
6135 /* resize @tr's buffer to the size of @size_tr's entries */
6136 static int resize_buffer_duplicate_size(struct array_buffer *trace_buf,
6137 					struct array_buffer *size_buf, int cpu_id)
6138 {
6139 	int cpu, ret = 0;
6140 
6141 	if (cpu_id == RING_BUFFER_ALL_CPUS) {
6142 		for_each_tracing_cpu(cpu) {
6143 			ret = ring_buffer_resize(trace_buf->buffer,
6144 				 per_cpu_ptr(size_buf->data, cpu)->entries, cpu);
6145 			if (ret < 0)
6146 				break;
6147 			per_cpu_ptr(trace_buf->data, cpu)->entries =
6148 				per_cpu_ptr(size_buf->data, cpu)->entries;
6149 		}
6150 	} else {
6151 		ret = ring_buffer_resize(trace_buf->buffer,
6152 				 per_cpu_ptr(size_buf->data, cpu_id)->entries, cpu_id);
6153 		if (ret == 0)
6154 			per_cpu_ptr(trace_buf->data, cpu_id)->entries =
6155 				per_cpu_ptr(size_buf->data, cpu_id)->entries;
6156 	}
6157 
6158 	return ret;
6159 }
6160 #endif /* CONFIG_TRACER_MAX_TRACE */
6161 
6162 static int __tracing_resize_ring_buffer(struct trace_array *tr,
6163 					unsigned long size, int cpu)
6164 {
6165 	int ret;
6166 
6167 	/*
6168 	 * If kernel or user changes the size of the ring buffer
6169 	 * we use the size that was given, and we can forget about
6170 	 * expanding it later.
6171 	 */
6172 	ring_buffer_expanded = true;
6173 
6174 	/* May be called before buffers are initialized */
6175 	if (!tr->array_buffer.buffer)
6176 		return 0;
6177 
6178 	ret = ring_buffer_resize(tr->array_buffer.buffer, size, cpu);
6179 	if (ret < 0)
6180 		return ret;
6181 
6182 #ifdef CONFIG_TRACER_MAX_TRACE
6183 	if (!(tr->flags & TRACE_ARRAY_FL_GLOBAL) ||
6184 	    !tr->current_trace->use_max_tr)
6185 		goto out;
6186 
6187 	ret = ring_buffer_resize(tr->max_buffer.buffer, size, cpu);
6188 	if (ret < 0) {
6189 		int r = resize_buffer_duplicate_size(&tr->array_buffer,
6190 						     &tr->array_buffer, cpu);
6191 		if (r < 0) {
6192 			/*
6193 			 * AARGH! We are left with different
6194 			 * size max buffer!!!!
6195 			 * The max buffer is our "snapshot" buffer.
6196 			 * When a tracer needs a snapshot (one of the
6197 			 * latency tracers), it swaps the max buffer
6198 			 * with the saved snap shot. We succeeded to
6199 			 * update the size of the main buffer, but failed to
6200 			 * update the size of the max buffer. But when we tried
6201 			 * to reset the main buffer to the original size, we
6202 			 * failed there too. This is very unlikely to
6203 			 * happen, but if it does, warn and kill all
6204 			 * tracing.
6205 			 */
6206 			WARN_ON(1);
6207 			tracing_disabled = 1;
6208 		}
6209 		return ret;
6210 	}
6211 
6212 	if (cpu == RING_BUFFER_ALL_CPUS)
6213 		set_buffer_entries(&tr->max_buffer, size);
6214 	else
6215 		per_cpu_ptr(tr->max_buffer.data, cpu)->entries = size;
6216 
6217  out:
6218 #endif /* CONFIG_TRACER_MAX_TRACE */
6219 
6220 	if (cpu == RING_BUFFER_ALL_CPUS)
6221 		set_buffer_entries(&tr->array_buffer, size);
6222 	else
6223 		per_cpu_ptr(tr->array_buffer.data, cpu)->entries = size;
6224 
6225 	return ret;
6226 }
6227 
6228 ssize_t tracing_resize_ring_buffer(struct trace_array *tr,
6229 				  unsigned long size, int cpu_id)
6230 {
6231 	int ret;
6232 
6233 	mutex_lock(&trace_types_lock);
6234 
6235 	if (cpu_id != RING_BUFFER_ALL_CPUS) {
6236 		/* make sure, this cpu is enabled in the mask */
6237 		if (!cpumask_test_cpu(cpu_id, tracing_buffer_mask)) {
6238 			ret = -EINVAL;
6239 			goto out;
6240 		}
6241 	}
6242 
6243 	ret = __tracing_resize_ring_buffer(tr, size, cpu_id);
6244 	if (ret < 0)
6245 		ret = -ENOMEM;
6246 
6247 out:
6248 	mutex_unlock(&trace_types_lock);
6249 
6250 	return ret;
6251 }
6252 
6253 
6254 /**
6255  * tracing_update_buffers - used by tracing facility to expand ring buffers
6256  *
6257  * To save on memory when the tracing is never used on a system with it
6258  * configured in. The ring buffers are set to a minimum size. But once
6259  * a user starts to use the tracing facility, then they need to grow
6260  * to their default size.
6261  *
6262  * This function is to be called when a tracer is about to be used.
6263  */
6264 int tracing_update_buffers(void)
6265 {
6266 	int ret = 0;
6267 
6268 	mutex_lock(&trace_types_lock);
6269 	if (!ring_buffer_expanded)
6270 		ret = __tracing_resize_ring_buffer(&global_trace, trace_buf_size,
6271 						RING_BUFFER_ALL_CPUS);
6272 	mutex_unlock(&trace_types_lock);
6273 
6274 	return ret;
6275 }
6276 
6277 struct trace_option_dentry;
6278 
6279 static void
6280 create_trace_option_files(struct trace_array *tr, struct tracer *tracer);
6281 
6282 /*
6283  * Used to clear out the tracer before deletion of an instance.
6284  * Must have trace_types_lock held.
6285  */
6286 static void tracing_set_nop(struct trace_array *tr)
6287 {
6288 	if (tr->current_trace == &nop_trace)
6289 		return;
6290 
6291 	tr->current_trace->enabled--;
6292 
6293 	if (tr->current_trace->reset)
6294 		tr->current_trace->reset(tr);
6295 
6296 	tr->current_trace = &nop_trace;
6297 }
6298 
6299 static void add_tracer_options(struct trace_array *tr, struct tracer *t)
6300 {
6301 	/* Only enable if the directory has been created already. */
6302 	if (!tr->dir)
6303 		return;
6304 
6305 	create_trace_option_files(tr, t);
6306 }
6307 
6308 int tracing_set_tracer(struct trace_array *tr, const char *buf)
6309 {
6310 	struct tracer *t;
6311 #ifdef CONFIG_TRACER_MAX_TRACE
6312 	bool had_max_tr;
6313 #endif
6314 	int ret = 0;
6315 
6316 	mutex_lock(&trace_types_lock);
6317 
6318 	if (!ring_buffer_expanded) {
6319 		ret = __tracing_resize_ring_buffer(tr, trace_buf_size,
6320 						RING_BUFFER_ALL_CPUS);
6321 		if (ret < 0)
6322 			goto out;
6323 		ret = 0;
6324 	}
6325 
6326 	for (t = trace_types; t; t = t->next) {
6327 		if (strcmp(t->name, buf) == 0)
6328 			break;
6329 	}
6330 	if (!t) {
6331 		ret = -EINVAL;
6332 		goto out;
6333 	}
6334 	if (t == tr->current_trace)
6335 		goto out;
6336 
6337 #ifdef CONFIG_TRACER_SNAPSHOT
6338 	if (t->use_max_tr) {
6339 		arch_spin_lock(&tr->max_lock);
6340 		if (tr->cond_snapshot)
6341 			ret = -EBUSY;
6342 		arch_spin_unlock(&tr->max_lock);
6343 		if (ret)
6344 			goto out;
6345 	}
6346 #endif
6347 	/* Some tracers won't work on kernel command line */
6348 	if (system_state < SYSTEM_RUNNING && t->noboot) {
6349 		pr_warn("Tracer '%s' is not allowed on command line, ignored\n",
6350 			t->name);
6351 		goto out;
6352 	}
6353 
6354 	/* Some tracers are only allowed for the top level buffer */
6355 	if (!trace_ok_for_array(t, tr)) {
6356 		ret = -EINVAL;
6357 		goto out;
6358 	}
6359 
6360 	/* If trace pipe files are being read, we can't change the tracer */
6361 	if (tr->trace_ref) {
6362 		ret = -EBUSY;
6363 		goto out;
6364 	}
6365 
6366 	trace_branch_disable();
6367 
6368 	tr->current_trace->enabled--;
6369 
6370 	if (tr->current_trace->reset)
6371 		tr->current_trace->reset(tr);
6372 
6373 	/* Current trace needs to be nop_trace before synchronize_rcu */
6374 	tr->current_trace = &nop_trace;
6375 
6376 #ifdef CONFIG_TRACER_MAX_TRACE
6377 	had_max_tr = tr->allocated_snapshot;
6378 
6379 	if (had_max_tr && !t->use_max_tr) {
6380 		/*
6381 		 * We need to make sure that the update_max_tr sees that
6382 		 * current_trace changed to nop_trace to keep it from
6383 		 * swapping the buffers after we resize it.
6384 		 * The update_max_tr is called from interrupts disabled
6385 		 * so a synchronized_sched() is sufficient.
6386 		 */
6387 		synchronize_rcu();
6388 		free_snapshot(tr);
6389 	}
6390 #endif
6391 
6392 #ifdef CONFIG_TRACER_MAX_TRACE
6393 	if (t->use_max_tr && !had_max_tr) {
6394 		ret = tracing_alloc_snapshot_instance(tr);
6395 		if (ret < 0)
6396 			goto out;
6397 	}
6398 #endif
6399 
6400 	if (t->init) {
6401 		ret = tracer_init(t, tr);
6402 		if (ret)
6403 			goto out;
6404 	}
6405 
6406 	tr->current_trace = t;
6407 	tr->current_trace->enabled++;
6408 	trace_branch_enable(tr);
6409  out:
6410 	mutex_unlock(&trace_types_lock);
6411 
6412 	return ret;
6413 }
6414 
6415 static ssize_t
6416 tracing_set_trace_write(struct file *filp, const char __user *ubuf,
6417 			size_t cnt, loff_t *ppos)
6418 {
6419 	struct trace_array *tr = filp->private_data;
6420 	char buf[MAX_TRACER_SIZE+1];
6421 	int i;
6422 	size_t ret;
6423 	int err;
6424 
6425 	ret = cnt;
6426 
6427 	if (cnt > MAX_TRACER_SIZE)
6428 		cnt = MAX_TRACER_SIZE;
6429 
6430 	if (copy_from_user(buf, ubuf, cnt))
6431 		return -EFAULT;
6432 
6433 	buf[cnt] = 0;
6434 
6435 	/* strip ending whitespace. */
6436 	for (i = cnt - 1; i > 0 && isspace(buf[i]); i--)
6437 		buf[i] = 0;
6438 
6439 	err = tracing_set_tracer(tr, buf);
6440 	if (err)
6441 		return err;
6442 
6443 	*ppos += ret;
6444 
6445 	return ret;
6446 }
6447 
6448 static ssize_t
6449 tracing_nsecs_read(unsigned long *ptr, char __user *ubuf,
6450 		   size_t cnt, loff_t *ppos)
6451 {
6452 	char buf[64];
6453 	int r;
6454 
6455 	r = snprintf(buf, sizeof(buf), "%ld\n",
6456 		     *ptr == (unsigned long)-1 ? -1 : nsecs_to_usecs(*ptr));
6457 	if (r > sizeof(buf))
6458 		r = sizeof(buf);
6459 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6460 }
6461 
6462 static ssize_t
6463 tracing_nsecs_write(unsigned long *ptr, const char __user *ubuf,
6464 		    size_t cnt, loff_t *ppos)
6465 {
6466 	unsigned long val;
6467 	int ret;
6468 
6469 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
6470 	if (ret)
6471 		return ret;
6472 
6473 	*ptr = val * 1000;
6474 
6475 	return cnt;
6476 }
6477 
6478 static ssize_t
6479 tracing_thresh_read(struct file *filp, char __user *ubuf,
6480 		    size_t cnt, loff_t *ppos)
6481 {
6482 	return tracing_nsecs_read(&tracing_thresh, ubuf, cnt, ppos);
6483 }
6484 
6485 static ssize_t
6486 tracing_thresh_write(struct file *filp, const char __user *ubuf,
6487 		     size_t cnt, loff_t *ppos)
6488 {
6489 	struct trace_array *tr = filp->private_data;
6490 	int ret;
6491 
6492 	mutex_lock(&trace_types_lock);
6493 	ret = tracing_nsecs_write(&tracing_thresh, ubuf, cnt, ppos);
6494 	if (ret < 0)
6495 		goto out;
6496 
6497 	if (tr->current_trace->update_thresh) {
6498 		ret = tr->current_trace->update_thresh(tr);
6499 		if (ret < 0)
6500 			goto out;
6501 	}
6502 
6503 	ret = cnt;
6504 out:
6505 	mutex_unlock(&trace_types_lock);
6506 
6507 	return ret;
6508 }
6509 
6510 #if defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)
6511 
6512 static ssize_t
6513 tracing_max_lat_read(struct file *filp, char __user *ubuf,
6514 		     size_t cnt, loff_t *ppos)
6515 {
6516 	return tracing_nsecs_read(filp->private_data, ubuf, cnt, ppos);
6517 }
6518 
6519 static ssize_t
6520 tracing_max_lat_write(struct file *filp, const char __user *ubuf,
6521 		      size_t cnt, loff_t *ppos)
6522 {
6523 	return tracing_nsecs_write(filp->private_data, ubuf, cnt, ppos);
6524 }
6525 
6526 #endif
6527 
6528 static int tracing_open_pipe(struct inode *inode, struct file *filp)
6529 {
6530 	struct trace_array *tr = inode->i_private;
6531 	struct trace_iterator *iter;
6532 	int ret;
6533 
6534 	ret = tracing_check_open_get_tr(tr);
6535 	if (ret)
6536 		return ret;
6537 
6538 	mutex_lock(&trace_types_lock);
6539 
6540 	/* create a buffer to store the information to pass to userspace */
6541 	iter = kzalloc(sizeof(*iter), GFP_KERNEL);
6542 	if (!iter) {
6543 		ret = -ENOMEM;
6544 		__trace_array_put(tr);
6545 		goto out;
6546 	}
6547 
6548 	trace_seq_init(&iter->seq);
6549 	iter->trace = tr->current_trace;
6550 
6551 	if (!alloc_cpumask_var(&iter->started, GFP_KERNEL)) {
6552 		ret = -ENOMEM;
6553 		goto fail;
6554 	}
6555 
6556 	/* trace pipe does not show start of buffer */
6557 	cpumask_setall(iter->started);
6558 
6559 	if (tr->trace_flags & TRACE_ITER_LATENCY_FMT)
6560 		iter->iter_flags |= TRACE_FILE_LAT_FMT;
6561 
6562 	/* Output in nanoseconds only if we are using a clock in nanoseconds. */
6563 	if (trace_clocks[tr->clock_id].in_ns)
6564 		iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
6565 
6566 	iter->tr = tr;
6567 	iter->array_buffer = &tr->array_buffer;
6568 	iter->cpu_file = tracing_get_cpu(inode);
6569 	mutex_init(&iter->mutex);
6570 	filp->private_data = iter;
6571 
6572 	if (iter->trace->pipe_open)
6573 		iter->trace->pipe_open(iter);
6574 
6575 	nonseekable_open(inode, filp);
6576 
6577 	tr->trace_ref++;
6578 out:
6579 	mutex_unlock(&trace_types_lock);
6580 	return ret;
6581 
6582 fail:
6583 	kfree(iter);
6584 	__trace_array_put(tr);
6585 	mutex_unlock(&trace_types_lock);
6586 	return ret;
6587 }
6588 
6589 static int tracing_release_pipe(struct inode *inode, struct file *file)
6590 {
6591 	struct trace_iterator *iter = file->private_data;
6592 	struct trace_array *tr = inode->i_private;
6593 
6594 	mutex_lock(&trace_types_lock);
6595 
6596 	tr->trace_ref--;
6597 
6598 	if (iter->trace->pipe_close)
6599 		iter->trace->pipe_close(iter);
6600 
6601 	mutex_unlock(&trace_types_lock);
6602 
6603 	free_cpumask_var(iter->started);
6604 	mutex_destroy(&iter->mutex);
6605 	kfree(iter);
6606 
6607 	trace_array_put(tr);
6608 
6609 	return 0;
6610 }
6611 
6612 static __poll_t
6613 trace_poll(struct trace_iterator *iter, struct file *filp, poll_table *poll_table)
6614 {
6615 	struct trace_array *tr = iter->tr;
6616 
6617 	/* Iterators are static, they should be filled or empty */
6618 	if (trace_buffer_iter(iter, iter->cpu_file))
6619 		return EPOLLIN | EPOLLRDNORM;
6620 
6621 	if (tr->trace_flags & TRACE_ITER_BLOCK)
6622 		/*
6623 		 * Always select as readable when in blocking mode
6624 		 */
6625 		return EPOLLIN | EPOLLRDNORM;
6626 	else
6627 		return ring_buffer_poll_wait(iter->array_buffer->buffer, iter->cpu_file,
6628 					     filp, poll_table);
6629 }
6630 
6631 static __poll_t
6632 tracing_poll_pipe(struct file *filp, poll_table *poll_table)
6633 {
6634 	struct trace_iterator *iter = filp->private_data;
6635 
6636 	return trace_poll(iter, filp, poll_table);
6637 }
6638 
6639 /* Must be called with iter->mutex held. */
6640 static int tracing_wait_pipe(struct file *filp)
6641 {
6642 	struct trace_iterator *iter = filp->private_data;
6643 	int ret;
6644 
6645 	while (trace_empty(iter)) {
6646 
6647 		if ((filp->f_flags & O_NONBLOCK)) {
6648 			return -EAGAIN;
6649 		}
6650 
6651 		/*
6652 		 * We block until we read something and tracing is disabled.
6653 		 * We still block if tracing is disabled, but we have never
6654 		 * read anything. This allows a user to cat this file, and
6655 		 * then enable tracing. But after we have read something,
6656 		 * we give an EOF when tracing is again disabled.
6657 		 *
6658 		 * iter->pos will be 0 if we haven't read anything.
6659 		 */
6660 		if (!tracer_tracing_is_on(iter->tr) && iter->pos)
6661 			break;
6662 
6663 		mutex_unlock(&iter->mutex);
6664 
6665 		ret = wait_on_pipe(iter, 0);
6666 
6667 		mutex_lock(&iter->mutex);
6668 
6669 		if (ret)
6670 			return ret;
6671 	}
6672 
6673 	return 1;
6674 }
6675 
6676 /*
6677  * Consumer reader.
6678  */
6679 static ssize_t
6680 tracing_read_pipe(struct file *filp, char __user *ubuf,
6681 		  size_t cnt, loff_t *ppos)
6682 {
6683 	struct trace_iterator *iter = filp->private_data;
6684 	ssize_t sret;
6685 
6686 	/*
6687 	 * Avoid more than one consumer on a single file descriptor
6688 	 * This is just a matter of traces coherency, the ring buffer itself
6689 	 * is protected.
6690 	 */
6691 	mutex_lock(&iter->mutex);
6692 
6693 	/* return any leftover data */
6694 	sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
6695 	if (sret != -EBUSY)
6696 		goto out;
6697 
6698 	trace_seq_init(&iter->seq);
6699 
6700 	if (iter->trace->read) {
6701 		sret = iter->trace->read(iter, filp, ubuf, cnt, ppos);
6702 		if (sret)
6703 			goto out;
6704 	}
6705 
6706 waitagain:
6707 	sret = tracing_wait_pipe(filp);
6708 	if (sret <= 0)
6709 		goto out;
6710 
6711 	/* stop when tracing is finished */
6712 	if (trace_empty(iter)) {
6713 		sret = 0;
6714 		goto out;
6715 	}
6716 
6717 	if (cnt >= PAGE_SIZE)
6718 		cnt = PAGE_SIZE - 1;
6719 
6720 	/* reset all but tr, trace, and overruns */
6721 	memset_startat(iter, 0, seq);
6722 	cpumask_clear(iter->started);
6723 	trace_seq_init(&iter->seq);
6724 	iter->pos = -1;
6725 
6726 	trace_event_read_lock();
6727 	trace_access_lock(iter->cpu_file);
6728 	while (trace_find_next_entry_inc(iter) != NULL) {
6729 		enum print_line_t ret;
6730 		int save_len = iter->seq.seq.len;
6731 
6732 		ret = print_trace_line(iter);
6733 		if (ret == TRACE_TYPE_PARTIAL_LINE) {
6734 			/* don't print partial lines */
6735 			iter->seq.seq.len = save_len;
6736 			break;
6737 		}
6738 		if (ret != TRACE_TYPE_NO_CONSUME)
6739 			trace_consume(iter);
6740 
6741 		if (trace_seq_used(&iter->seq) >= cnt)
6742 			break;
6743 
6744 		/*
6745 		 * Setting the full flag means we reached the trace_seq buffer
6746 		 * size and we should leave by partial output condition above.
6747 		 * One of the trace_seq_* functions is not used properly.
6748 		 */
6749 		WARN_ONCE(iter->seq.full, "full flag set for trace type %d",
6750 			  iter->ent->type);
6751 	}
6752 	trace_access_unlock(iter->cpu_file);
6753 	trace_event_read_unlock();
6754 
6755 	/* Now copy what we have to the user */
6756 	sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
6757 	if (iter->seq.seq.readpos >= trace_seq_used(&iter->seq))
6758 		trace_seq_init(&iter->seq);
6759 
6760 	/*
6761 	 * If there was nothing to send to user, in spite of consuming trace
6762 	 * entries, go back to wait for more entries.
6763 	 */
6764 	if (sret == -EBUSY)
6765 		goto waitagain;
6766 
6767 out:
6768 	mutex_unlock(&iter->mutex);
6769 
6770 	return sret;
6771 }
6772 
6773 static void tracing_spd_release_pipe(struct splice_pipe_desc *spd,
6774 				     unsigned int idx)
6775 {
6776 	__free_page(spd->pages[idx]);
6777 }
6778 
6779 static size_t
6780 tracing_fill_pipe_page(size_t rem, struct trace_iterator *iter)
6781 {
6782 	size_t count;
6783 	int save_len;
6784 	int ret;
6785 
6786 	/* Seq buffer is page-sized, exactly what we need. */
6787 	for (;;) {
6788 		save_len = iter->seq.seq.len;
6789 		ret = print_trace_line(iter);
6790 
6791 		if (trace_seq_has_overflowed(&iter->seq)) {
6792 			iter->seq.seq.len = save_len;
6793 			break;
6794 		}
6795 
6796 		/*
6797 		 * This should not be hit, because it should only
6798 		 * be set if the iter->seq overflowed. But check it
6799 		 * anyway to be safe.
6800 		 */
6801 		if (ret == TRACE_TYPE_PARTIAL_LINE) {
6802 			iter->seq.seq.len = save_len;
6803 			break;
6804 		}
6805 
6806 		count = trace_seq_used(&iter->seq) - save_len;
6807 		if (rem < count) {
6808 			rem = 0;
6809 			iter->seq.seq.len = save_len;
6810 			break;
6811 		}
6812 
6813 		if (ret != TRACE_TYPE_NO_CONSUME)
6814 			trace_consume(iter);
6815 		rem -= count;
6816 		if (!trace_find_next_entry_inc(iter))	{
6817 			rem = 0;
6818 			iter->ent = NULL;
6819 			break;
6820 		}
6821 	}
6822 
6823 	return rem;
6824 }
6825 
6826 static ssize_t tracing_splice_read_pipe(struct file *filp,
6827 					loff_t *ppos,
6828 					struct pipe_inode_info *pipe,
6829 					size_t len,
6830 					unsigned int flags)
6831 {
6832 	struct page *pages_def[PIPE_DEF_BUFFERS];
6833 	struct partial_page partial_def[PIPE_DEF_BUFFERS];
6834 	struct trace_iterator *iter = filp->private_data;
6835 	struct splice_pipe_desc spd = {
6836 		.pages		= pages_def,
6837 		.partial	= partial_def,
6838 		.nr_pages	= 0, /* This gets updated below. */
6839 		.nr_pages_max	= PIPE_DEF_BUFFERS,
6840 		.ops		= &default_pipe_buf_ops,
6841 		.spd_release	= tracing_spd_release_pipe,
6842 	};
6843 	ssize_t ret;
6844 	size_t rem;
6845 	unsigned int i;
6846 
6847 	if (splice_grow_spd(pipe, &spd))
6848 		return -ENOMEM;
6849 
6850 	mutex_lock(&iter->mutex);
6851 
6852 	if (iter->trace->splice_read) {
6853 		ret = iter->trace->splice_read(iter, filp,
6854 					       ppos, pipe, len, flags);
6855 		if (ret)
6856 			goto out_err;
6857 	}
6858 
6859 	ret = tracing_wait_pipe(filp);
6860 	if (ret <= 0)
6861 		goto out_err;
6862 
6863 	if (!iter->ent && !trace_find_next_entry_inc(iter)) {
6864 		ret = -EFAULT;
6865 		goto out_err;
6866 	}
6867 
6868 	trace_event_read_lock();
6869 	trace_access_lock(iter->cpu_file);
6870 
6871 	/* Fill as many pages as possible. */
6872 	for (i = 0, rem = len; i < spd.nr_pages_max && rem; i++) {
6873 		spd.pages[i] = alloc_page(GFP_KERNEL);
6874 		if (!spd.pages[i])
6875 			break;
6876 
6877 		rem = tracing_fill_pipe_page(rem, iter);
6878 
6879 		/* Copy the data into the page, so we can start over. */
6880 		ret = trace_seq_to_buffer(&iter->seq,
6881 					  page_address(spd.pages[i]),
6882 					  trace_seq_used(&iter->seq));
6883 		if (ret < 0) {
6884 			__free_page(spd.pages[i]);
6885 			break;
6886 		}
6887 		spd.partial[i].offset = 0;
6888 		spd.partial[i].len = trace_seq_used(&iter->seq);
6889 
6890 		trace_seq_init(&iter->seq);
6891 	}
6892 
6893 	trace_access_unlock(iter->cpu_file);
6894 	trace_event_read_unlock();
6895 	mutex_unlock(&iter->mutex);
6896 
6897 	spd.nr_pages = i;
6898 
6899 	if (i)
6900 		ret = splice_to_pipe(pipe, &spd);
6901 	else
6902 		ret = 0;
6903 out:
6904 	splice_shrink_spd(&spd);
6905 	return ret;
6906 
6907 out_err:
6908 	mutex_unlock(&iter->mutex);
6909 	goto out;
6910 }
6911 
6912 static ssize_t
6913 tracing_entries_read(struct file *filp, char __user *ubuf,
6914 		     size_t cnt, loff_t *ppos)
6915 {
6916 	struct inode *inode = file_inode(filp);
6917 	struct trace_array *tr = inode->i_private;
6918 	int cpu = tracing_get_cpu(inode);
6919 	char buf[64];
6920 	int r = 0;
6921 	ssize_t ret;
6922 
6923 	mutex_lock(&trace_types_lock);
6924 
6925 	if (cpu == RING_BUFFER_ALL_CPUS) {
6926 		int cpu, buf_size_same;
6927 		unsigned long size;
6928 
6929 		size = 0;
6930 		buf_size_same = 1;
6931 		/* check if all cpu sizes are same */
6932 		for_each_tracing_cpu(cpu) {
6933 			/* fill in the size from first enabled cpu */
6934 			if (size == 0)
6935 				size = per_cpu_ptr(tr->array_buffer.data, cpu)->entries;
6936 			if (size != per_cpu_ptr(tr->array_buffer.data, cpu)->entries) {
6937 				buf_size_same = 0;
6938 				break;
6939 			}
6940 		}
6941 
6942 		if (buf_size_same) {
6943 			if (!ring_buffer_expanded)
6944 				r = sprintf(buf, "%lu (expanded: %lu)\n",
6945 					    size >> 10,
6946 					    trace_buf_size >> 10);
6947 			else
6948 				r = sprintf(buf, "%lu\n", size >> 10);
6949 		} else
6950 			r = sprintf(buf, "X\n");
6951 	} else
6952 		r = sprintf(buf, "%lu\n", per_cpu_ptr(tr->array_buffer.data, cpu)->entries >> 10);
6953 
6954 	mutex_unlock(&trace_types_lock);
6955 
6956 	ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6957 	return ret;
6958 }
6959 
6960 static ssize_t
6961 tracing_entries_write(struct file *filp, const char __user *ubuf,
6962 		      size_t cnt, loff_t *ppos)
6963 {
6964 	struct inode *inode = file_inode(filp);
6965 	struct trace_array *tr = inode->i_private;
6966 	unsigned long val;
6967 	int ret;
6968 
6969 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
6970 	if (ret)
6971 		return ret;
6972 
6973 	/* must have at least 1 entry */
6974 	if (!val)
6975 		return -EINVAL;
6976 
6977 	/* value is in KB */
6978 	val <<= 10;
6979 	ret = tracing_resize_ring_buffer(tr, val, tracing_get_cpu(inode));
6980 	if (ret < 0)
6981 		return ret;
6982 
6983 	*ppos += cnt;
6984 
6985 	return cnt;
6986 }
6987 
6988 static ssize_t
6989 tracing_total_entries_read(struct file *filp, char __user *ubuf,
6990 				size_t cnt, loff_t *ppos)
6991 {
6992 	struct trace_array *tr = filp->private_data;
6993 	char buf[64];
6994 	int r, cpu;
6995 	unsigned long size = 0, expanded_size = 0;
6996 
6997 	mutex_lock(&trace_types_lock);
6998 	for_each_tracing_cpu(cpu) {
6999 		size += per_cpu_ptr(tr->array_buffer.data, cpu)->entries >> 10;
7000 		if (!ring_buffer_expanded)
7001 			expanded_size += trace_buf_size >> 10;
7002 	}
7003 	if (ring_buffer_expanded)
7004 		r = sprintf(buf, "%lu\n", size);
7005 	else
7006 		r = sprintf(buf, "%lu (expanded: %lu)\n", size, expanded_size);
7007 	mutex_unlock(&trace_types_lock);
7008 
7009 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
7010 }
7011 
7012 static ssize_t
7013 tracing_free_buffer_write(struct file *filp, const char __user *ubuf,
7014 			  size_t cnt, loff_t *ppos)
7015 {
7016 	/*
7017 	 * There is no need to read what the user has written, this function
7018 	 * is just to make sure that there is no error when "echo" is used
7019 	 */
7020 
7021 	*ppos += cnt;
7022 
7023 	return cnt;
7024 }
7025 
7026 static int
7027 tracing_free_buffer_release(struct inode *inode, struct file *filp)
7028 {
7029 	struct trace_array *tr = inode->i_private;
7030 
7031 	/* disable tracing ? */
7032 	if (tr->trace_flags & TRACE_ITER_STOP_ON_FREE)
7033 		tracer_tracing_off(tr);
7034 	/* resize the ring buffer to 0 */
7035 	tracing_resize_ring_buffer(tr, 0, RING_BUFFER_ALL_CPUS);
7036 
7037 	trace_array_put(tr);
7038 
7039 	return 0;
7040 }
7041 
7042 static ssize_t
7043 tracing_mark_write(struct file *filp, const char __user *ubuf,
7044 					size_t cnt, loff_t *fpos)
7045 {
7046 	struct trace_array *tr = filp->private_data;
7047 	struct ring_buffer_event *event;
7048 	enum event_trigger_type tt = ETT_NONE;
7049 	struct trace_buffer *buffer;
7050 	struct print_entry *entry;
7051 	ssize_t written;
7052 	int size;
7053 	int len;
7054 
7055 /* Used in tracing_mark_raw_write() as well */
7056 #define FAULTED_STR "<faulted>"
7057 #define FAULTED_SIZE (sizeof(FAULTED_STR) - 1) /* '\0' is already accounted for */
7058 
7059 	if (tracing_disabled)
7060 		return -EINVAL;
7061 
7062 	if (!(tr->trace_flags & TRACE_ITER_MARKERS))
7063 		return -EINVAL;
7064 
7065 	if (cnt > TRACE_BUF_SIZE)
7066 		cnt = TRACE_BUF_SIZE;
7067 
7068 	BUILD_BUG_ON(TRACE_BUF_SIZE >= PAGE_SIZE);
7069 
7070 	size = sizeof(*entry) + cnt + 2; /* add '\0' and possible '\n' */
7071 
7072 	/* If less than "<faulted>", then make sure we can still add that */
7073 	if (cnt < FAULTED_SIZE)
7074 		size += FAULTED_SIZE - cnt;
7075 
7076 	buffer = tr->array_buffer.buffer;
7077 	event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
7078 					    tracing_gen_ctx());
7079 	if (unlikely(!event))
7080 		/* Ring buffer disabled, return as if not open for write */
7081 		return -EBADF;
7082 
7083 	entry = ring_buffer_event_data(event);
7084 	entry->ip = _THIS_IP_;
7085 
7086 	len = __copy_from_user_inatomic(&entry->buf, ubuf, cnt);
7087 	if (len) {
7088 		memcpy(&entry->buf, FAULTED_STR, FAULTED_SIZE);
7089 		cnt = FAULTED_SIZE;
7090 		written = -EFAULT;
7091 	} else
7092 		written = cnt;
7093 
7094 	if (tr->trace_marker_file && !list_empty(&tr->trace_marker_file->triggers)) {
7095 		/* do not add \n before testing triggers, but add \0 */
7096 		entry->buf[cnt] = '\0';
7097 		tt = event_triggers_call(tr->trace_marker_file, buffer, entry, event);
7098 	}
7099 
7100 	if (entry->buf[cnt - 1] != '\n') {
7101 		entry->buf[cnt] = '\n';
7102 		entry->buf[cnt + 1] = '\0';
7103 	} else
7104 		entry->buf[cnt] = '\0';
7105 
7106 	if (static_branch_unlikely(&trace_marker_exports_enabled))
7107 		ftrace_exports(event, TRACE_EXPORT_MARKER);
7108 	__buffer_unlock_commit(buffer, event);
7109 
7110 	if (tt)
7111 		event_triggers_post_call(tr->trace_marker_file, tt);
7112 
7113 	if (written > 0)
7114 		*fpos += written;
7115 
7116 	return written;
7117 }
7118 
7119 /* Limit it for now to 3K (including tag) */
7120 #define RAW_DATA_MAX_SIZE (1024*3)
7121 
7122 static ssize_t
7123 tracing_mark_raw_write(struct file *filp, const char __user *ubuf,
7124 					size_t cnt, loff_t *fpos)
7125 {
7126 	struct trace_array *tr = filp->private_data;
7127 	struct ring_buffer_event *event;
7128 	struct trace_buffer *buffer;
7129 	struct raw_data_entry *entry;
7130 	ssize_t written;
7131 	int size;
7132 	int len;
7133 
7134 #define FAULT_SIZE_ID (FAULTED_SIZE + sizeof(int))
7135 
7136 	if (tracing_disabled)
7137 		return -EINVAL;
7138 
7139 	if (!(tr->trace_flags & TRACE_ITER_MARKERS))
7140 		return -EINVAL;
7141 
7142 	/* The marker must at least have a tag id */
7143 	if (cnt < sizeof(unsigned int) || cnt > RAW_DATA_MAX_SIZE)
7144 		return -EINVAL;
7145 
7146 	if (cnt > TRACE_BUF_SIZE)
7147 		cnt = TRACE_BUF_SIZE;
7148 
7149 	BUILD_BUG_ON(TRACE_BUF_SIZE >= PAGE_SIZE);
7150 
7151 	size = sizeof(*entry) + cnt;
7152 	if (cnt < FAULT_SIZE_ID)
7153 		size += FAULT_SIZE_ID - cnt;
7154 
7155 	buffer = tr->array_buffer.buffer;
7156 	event = __trace_buffer_lock_reserve(buffer, TRACE_RAW_DATA, size,
7157 					    tracing_gen_ctx());
7158 	if (!event)
7159 		/* Ring buffer disabled, return as if not open for write */
7160 		return -EBADF;
7161 
7162 	entry = ring_buffer_event_data(event);
7163 
7164 	len = __copy_from_user_inatomic(&entry->id, ubuf, cnt);
7165 	if (len) {
7166 		entry->id = -1;
7167 		memcpy(&entry->buf, FAULTED_STR, FAULTED_SIZE);
7168 		written = -EFAULT;
7169 	} else
7170 		written = cnt;
7171 
7172 	__buffer_unlock_commit(buffer, event);
7173 
7174 	if (written > 0)
7175 		*fpos += written;
7176 
7177 	return written;
7178 }
7179 
7180 static int tracing_clock_show(struct seq_file *m, void *v)
7181 {
7182 	struct trace_array *tr = m->private;
7183 	int i;
7184 
7185 	for (i = 0; i < ARRAY_SIZE(trace_clocks); i++)
7186 		seq_printf(m,
7187 			"%s%s%s%s", i ? " " : "",
7188 			i == tr->clock_id ? "[" : "", trace_clocks[i].name,
7189 			i == tr->clock_id ? "]" : "");
7190 	seq_putc(m, '\n');
7191 
7192 	return 0;
7193 }
7194 
7195 int tracing_set_clock(struct trace_array *tr, const char *clockstr)
7196 {
7197 	int i;
7198 
7199 	for (i = 0; i < ARRAY_SIZE(trace_clocks); i++) {
7200 		if (strcmp(trace_clocks[i].name, clockstr) == 0)
7201 			break;
7202 	}
7203 	if (i == ARRAY_SIZE(trace_clocks))
7204 		return -EINVAL;
7205 
7206 	mutex_lock(&trace_types_lock);
7207 
7208 	tr->clock_id = i;
7209 
7210 	ring_buffer_set_clock(tr->array_buffer.buffer, trace_clocks[i].func);
7211 
7212 	/*
7213 	 * New clock may not be consistent with the previous clock.
7214 	 * Reset the buffer so that it doesn't have incomparable timestamps.
7215 	 */
7216 	tracing_reset_online_cpus(&tr->array_buffer);
7217 
7218 #ifdef CONFIG_TRACER_MAX_TRACE
7219 	if (tr->max_buffer.buffer)
7220 		ring_buffer_set_clock(tr->max_buffer.buffer, trace_clocks[i].func);
7221 	tracing_reset_online_cpus(&tr->max_buffer);
7222 #endif
7223 
7224 	mutex_unlock(&trace_types_lock);
7225 
7226 	return 0;
7227 }
7228 
7229 static ssize_t tracing_clock_write(struct file *filp, const char __user *ubuf,
7230 				   size_t cnt, loff_t *fpos)
7231 {
7232 	struct seq_file *m = filp->private_data;
7233 	struct trace_array *tr = m->private;
7234 	char buf[64];
7235 	const char *clockstr;
7236 	int ret;
7237 
7238 	if (cnt >= sizeof(buf))
7239 		return -EINVAL;
7240 
7241 	if (copy_from_user(buf, ubuf, cnt))
7242 		return -EFAULT;
7243 
7244 	buf[cnt] = 0;
7245 
7246 	clockstr = strstrip(buf);
7247 
7248 	ret = tracing_set_clock(tr, clockstr);
7249 	if (ret)
7250 		return ret;
7251 
7252 	*fpos += cnt;
7253 
7254 	return cnt;
7255 }
7256 
7257 static int tracing_clock_open(struct inode *inode, struct file *file)
7258 {
7259 	struct trace_array *tr = inode->i_private;
7260 	int ret;
7261 
7262 	ret = tracing_check_open_get_tr(tr);
7263 	if (ret)
7264 		return ret;
7265 
7266 	ret = single_open(file, tracing_clock_show, inode->i_private);
7267 	if (ret < 0)
7268 		trace_array_put(tr);
7269 
7270 	return ret;
7271 }
7272 
7273 static int tracing_time_stamp_mode_show(struct seq_file *m, void *v)
7274 {
7275 	struct trace_array *tr = m->private;
7276 
7277 	mutex_lock(&trace_types_lock);
7278 
7279 	if (ring_buffer_time_stamp_abs(tr->array_buffer.buffer))
7280 		seq_puts(m, "delta [absolute]\n");
7281 	else
7282 		seq_puts(m, "[delta] absolute\n");
7283 
7284 	mutex_unlock(&trace_types_lock);
7285 
7286 	return 0;
7287 }
7288 
7289 static int tracing_time_stamp_mode_open(struct inode *inode, struct file *file)
7290 {
7291 	struct trace_array *tr = inode->i_private;
7292 	int ret;
7293 
7294 	ret = tracing_check_open_get_tr(tr);
7295 	if (ret)
7296 		return ret;
7297 
7298 	ret = single_open(file, tracing_time_stamp_mode_show, inode->i_private);
7299 	if (ret < 0)
7300 		trace_array_put(tr);
7301 
7302 	return ret;
7303 }
7304 
7305 u64 tracing_event_time_stamp(struct trace_buffer *buffer, struct ring_buffer_event *rbe)
7306 {
7307 	if (rbe == this_cpu_read(trace_buffered_event))
7308 		return ring_buffer_time_stamp(buffer);
7309 
7310 	return ring_buffer_event_time_stamp(buffer, rbe);
7311 }
7312 
7313 /*
7314  * Set or disable using the per CPU trace_buffer_event when possible.
7315  */
7316 int tracing_set_filter_buffering(struct trace_array *tr, bool set)
7317 {
7318 	int ret = 0;
7319 
7320 	mutex_lock(&trace_types_lock);
7321 
7322 	if (set && tr->no_filter_buffering_ref++)
7323 		goto out;
7324 
7325 	if (!set) {
7326 		if (WARN_ON_ONCE(!tr->no_filter_buffering_ref)) {
7327 			ret = -EINVAL;
7328 			goto out;
7329 		}
7330 
7331 		--tr->no_filter_buffering_ref;
7332 	}
7333  out:
7334 	mutex_unlock(&trace_types_lock);
7335 
7336 	return ret;
7337 }
7338 
7339 struct ftrace_buffer_info {
7340 	struct trace_iterator	iter;
7341 	void			*spare;
7342 	unsigned int		spare_cpu;
7343 	unsigned int		read;
7344 };
7345 
7346 #ifdef CONFIG_TRACER_SNAPSHOT
7347 static int tracing_snapshot_open(struct inode *inode, struct file *file)
7348 {
7349 	struct trace_array *tr = inode->i_private;
7350 	struct trace_iterator *iter;
7351 	struct seq_file *m;
7352 	int ret;
7353 
7354 	ret = tracing_check_open_get_tr(tr);
7355 	if (ret)
7356 		return ret;
7357 
7358 	if (file->f_mode & FMODE_READ) {
7359 		iter = __tracing_open(inode, file, true);
7360 		if (IS_ERR(iter))
7361 			ret = PTR_ERR(iter);
7362 	} else {
7363 		/* Writes still need the seq_file to hold the private data */
7364 		ret = -ENOMEM;
7365 		m = kzalloc(sizeof(*m), GFP_KERNEL);
7366 		if (!m)
7367 			goto out;
7368 		iter = kzalloc(sizeof(*iter), GFP_KERNEL);
7369 		if (!iter) {
7370 			kfree(m);
7371 			goto out;
7372 		}
7373 		ret = 0;
7374 
7375 		iter->tr = tr;
7376 		iter->array_buffer = &tr->max_buffer;
7377 		iter->cpu_file = tracing_get_cpu(inode);
7378 		m->private = iter;
7379 		file->private_data = m;
7380 	}
7381 out:
7382 	if (ret < 0)
7383 		trace_array_put(tr);
7384 
7385 	return ret;
7386 }
7387 
7388 static ssize_t
7389 tracing_snapshot_write(struct file *filp, const char __user *ubuf, size_t cnt,
7390 		       loff_t *ppos)
7391 {
7392 	struct seq_file *m = filp->private_data;
7393 	struct trace_iterator *iter = m->private;
7394 	struct trace_array *tr = iter->tr;
7395 	unsigned long val;
7396 	int ret;
7397 
7398 	ret = tracing_update_buffers();
7399 	if (ret < 0)
7400 		return ret;
7401 
7402 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
7403 	if (ret)
7404 		return ret;
7405 
7406 	mutex_lock(&trace_types_lock);
7407 
7408 	if (tr->current_trace->use_max_tr) {
7409 		ret = -EBUSY;
7410 		goto out;
7411 	}
7412 
7413 	arch_spin_lock(&tr->max_lock);
7414 	if (tr->cond_snapshot)
7415 		ret = -EBUSY;
7416 	arch_spin_unlock(&tr->max_lock);
7417 	if (ret)
7418 		goto out;
7419 
7420 	switch (val) {
7421 	case 0:
7422 		if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
7423 			ret = -EINVAL;
7424 			break;
7425 		}
7426 		if (tr->allocated_snapshot)
7427 			free_snapshot(tr);
7428 		break;
7429 	case 1:
7430 /* Only allow per-cpu swap if the ring buffer supports it */
7431 #ifndef CONFIG_RING_BUFFER_ALLOW_SWAP
7432 		if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
7433 			ret = -EINVAL;
7434 			break;
7435 		}
7436 #endif
7437 		if (tr->allocated_snapshot)
7438 			ret = resize_buffer_duplicate_size(&tr->max_buffer,
7439 					&tr->array_buffer, iter->cpu_file);
7440 		else
7441 			ret = tracing_alloc_snapshot_instance(tr);
7442 		if (ret < 0)
7443 			break;
7444 		local_irq_disable();
7445 		/* Now, we're going to swap */
7446 		if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
7447 			update_max_tr(tr, current, smp_processor_id(), NULL);
7448 		else
7449 			update_max_tr_single(tr, current, iter->cpu_file);
7450 		local_irq_enable();
7451 		break;
7452 	default:
7453 		if (tr->allocated_snapshot) {
7454 			if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
7455 				tracing_reset_online_cpus(&tr->max_buffer);
7456 			else
7457 				tracing_reset_cpu(&tr->max_buffer, iter->cpu_file);
7458 		}
7459 		break;
7460 	}
7461 
7462 	if (ret >= 0) {
7463 		*ppos += cnt;
7464 		ret = cnt;
7465 	}
7466 out:
7467 	mutex_unlock(&trace_types_lock);
7468 	return ret;
7469 }
7470 
7471 static int tracing_snapshot_release(struct inode *inode, struct file *file)
7472 {
7473 	struct seq_file *m = file->private_data;
7474 	int ret;
7475 
7476 	ret = tracing_release(inode, file);
7477 
7478 	if (file->f_mode & FMODE_READ)
7479 		return ret;
7480 
7481 	/* If write only, the seq_file is just a stub */
7482 	if (m)
7483 		kfree(m->private);
7484 	kfree(m);
7485 
7486 	return 0;
7487 }
7488 
7489 static int tracing_buffers_open(struct inode *inode, struct file *filp);
7490 static ssize_t tracing_buffers_read(struct file *filp, char __user *ubuf,
7491 				    size_t count, loff_t *ppos);
7492 static int tracing_buffers_release(struct inode *inode, struct file *file);
7493 static ssize_t tracing_buffers_splice_read(struct file *file, loff_t *ppos,
7494 		   struct pipe_inode_info *pipe, size_t len, unsigned int flags);
7495 
7496 static int snapshot_raw_open(struct inode *inode, struct file *filp)
7497 {
7498 	struct ftrace_buffer_info *info;
7499 	int ret;
7500 
7501 	/* The following checks for tracefs lockdown */
7502 	ret = tracing_buffers_open(inode, filp);
7503 	if (ret < 0)
7504 		return ret;
7505 
7506 	info = filp->private_data;
7507 
7508 	if (info->iter.trace->use_max_tr) {
7509 		tracing_buffers_release(inode, filp);
7510 		return -EBUSY;
7511 	}
7512 
7513 	info->iter.snapshot = true;
7514 	info->iter.array_buffer = &info->iter.tr->max_buffer;
7515 
7516 	return ret;
7517 }
7518 
7519 #endif /* CONFIG_TRACER_SNAPSHOT */
7520 
7521 
7522 static const struct file_operations tracing_thresh_fops = {
7523 	.open		= tracing_open_generic,
7524 	.read		= tracing_thresh_read,
7525 	.write		= tracing_thresh_write,
7526 	.llseek		= generic_file_llseek,
7527 };
7528 
7529 #if defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)
7530 static const struct file_operations tracing_max_lat_fops = {
7531 	.open		= tracing_open_generic,
7532 	.read		= tracing_max_lat_read,
7533 	.write		= tracing_max_lat_write,
7534 	.llseek		= generic_file_llseek,
7535 };
7536 #endif
7537 
7538 static const struct file_operations set_tracer_fops = {
7539 	.open		= tracing_open_generic,
7540 	.read		= tracing_set_trace_read,
7541 	.write		= tracing_set_trace_write,
7542 	.llseek		= generic_file_llseek,
7543 };
7544 
7545 static const struct file_operations tracing_pipe_fops = {
7546 	.open		= tracing_open_pipe,
7547 	.poll		= tracing_poll_pipe,
7548 	.read		= tracing_read_pipe,
7549 	.splice_read	= tracing_splice_read_pipe,
7550 	.release	= tracing_release_pipe,
7551 	.llseek		= no_llseek,
7552 };
7553 
7554 static const struct file_operations tracing_entries_fops = {
7555 	.open		= tracing_open_generic_tr,
7556 	.read		= tracing_entries_read,
7557 	.write		= tracing_entries_write,
7558 	.llseek		= generic_file_llseek,
7559 	.release	= tracing_release_generic_tr,
7560 };
7561 
7562 static const struct file_operations tracing_total_entries_fops = {
7563 	.open		= tracing_open_generic_tr,
7564 	.read		= tracing_total_entries_read,
7565 	.llseek		= generic_file_llseek,
7566 	.release	= tracing_release_generic_tr,
7567 };
7568 
7569 static const struct file_operations tracing_free_buffer_fops = {
7570 	.open		= tracing_open_generic_tr,
7571 	.write		= tracing_free_buffer_write,
7572 	.release	= tracing_free_buffer_release,
7573 };
7574 
7575 static const struct file_operations tracing_mark_fops = {
7576 	.open		= tracing_open_generic_tr,
7577 	.write		= tracing_mark_write,
7578 	.llseek		= generic_file_llseek,
7579 	.release	= tracing_release_generic_tr,
7580 };
7581 
7582 static const struct file_operations tracing_mark_raw_fops = {
7583 	.open		= tracing_open_generic_tr,
7584 	.write		= tracing_mark_raw_write,
7585 	.llseek		= generic_file_llseek,
7586 	.release	= tracing_release_generic_tr,
7587 };
7588 
7589 static const struct file_operations trace_clock_fops = {
7590 	.open		= tracing_clock_open,
7591 	.read		= seq_read,
7592 	.llseek		= seq_lseek,
7593 	.release	= tracing_single_release_tr,
7594 	.write		= tracing_clock_write,
7595 };
7596 
7597 static const struct file_operations trace_time_stamp_mode_fops = {
7598 	.open		= tracing_time_stamp_mode_open,
7599 	.read		= seq_read,
7600 	.llseek		= seq_lseek,
7601 	.release	= tracing_single_release_tr,
7602 };
7603 
7604 #ifdef CONFIG_TRACER_SNAPSHOT
7605 static const struct file_operations snapshot_fops = {
7606 	.open		= tracing_snapshot_open,
7607 	.read		= seq_read,
7608 	.write		= tracing_snapshot_write,
7609 	.llseek		= tracing_lseek,
7610 	.release	= tracing_snapshot_release,
7611 };
7612 
7613 static const struct file_operations snapshot_raw_fops = {
7614 	.open		= snapshot_raw_open,
7615 	.read		= tracing_buffers_read,
7616 	.release	= tracing_buffers_release,
7617 	.splice_read	= tracing_buffers_splice_read,
7618 	.llseek		= no_llseek,
7619 };
7620 
7621 #endif /* CONFIG_TRACER_SNAPSHOT */
7622 
7623 /*
7624  * trace_min_max_write - Write a u64 value to a trace_min_max_param struct
7625  * @filp: The active open file structure
7626  * @ubuf: The userspace provided buffer to read value into
7627  * @cnt: The maximum number of bytes to read
7628  * @ppos: The current "file" position
7629  *
7630  * This function implements the write interface for a struct trace_min_max_param.
7631  * The filp->private_data must point to a trace_min_max_param structure that
7632  * defines where to write the value, the min and the max acceptable values,
7633  * and a lock to protect the write.
7634  */
7635 static ssize_t
7636 trace_min_max_write(struct file *filp, const char __user *ubuf, size_t cnt, loff_t *ppos)
7637 {
7638 	struct trace_min_max_param *param = filp->private_data;
7639 	u64 val;
7640 	int err;
7641 
7642 	if (!param)
7643 		return -EFAULT;
7644 
7645 	err = kstrtoull_from_user(ubuf, cnt, 10, &val);
7646 	if (err)
7647 		return err;
7648 
7649 	if (param->lock)
7650 		mutex_lock(param->lock);
7651 
7652 	if (param->min && val < *param->min)
7653 		err = -EINVAL;
7654 
7655 	if (param->max && val > *param->max)
7656 		err = -EINVAL;
7657 
7658 	if (!err)
7659 		*param->val = val;
7660 
7661 	if (param->lock)
7662 		mutex_unlock(param->lock);
7663 
7664 	if (err)
7665 		return err;
7666 
7667 	return cnt;
7668 }
7669 
7670 /*
7671  * trace_min_max_read - Read a u64 value from a trace_min_max_param struct
7672  * @filp: The active open file structure
7673  * @ubuf: The userspace provided buffer to read value into
7674  * @cnt: The maximum number of bytes to read
7675  * @ppos: The current "file" position
7676  *
7677  * This function implements the read interface for a struct trace_min_max_param.
7678  * The filp->private_data must point to a trace_min_max_param struct with valid
7679  * data.
7680  */
7681 static ssize_t
7682 trace_min_max_read(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos)
7683 {
7684 	struct trace_min_max_param *param = filp->private_data;
7685 	char buf[U64_STR_SIZE];
7686 	int len;
7687 	u64 val;
7688 
7689 	if (!param)
7690 		return -EFAULT;
7691 
7692 	val = *param->val;
7693 
7694 	if (cnt > sizeof(buf))
7695 		cnt = sizeof(buf);
7696 
7697 	len = snprintf(buf, sizeof(buf), "%llu\n", val);
7698 
7699 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, len);
7700 }
7701 
7702 const struct file_operations trace_min_max_fops = {
7703 	.open		= tracing_open_generic,
7704 	.read		= trace_min_max_read,
7705 	.write		= trace_min_max_write,
7706 };
7707 
7708 #define TRACING_LOG_ERRS_MAX	8
7709 #define TRACING_LOG_LOC_MAX	128
7710 
7711 #define CMD_PREFIX "  Command: "
7712 
7713 struct err_info {
7714 	const char	**errs;	/* ptr to loc-specific array of err strings */
7715 	u8		type;	/* index into errs -> specific err string */
7716 	u8		pos;	/* MAX_FILTER_STR_VAL = 256 */
7717 	u64		ts;
7718 };
7719 
7720 struct tracing_log_err {
7721 	struct list_head	list;
7722 	struct err_info		info;
7723 	char			loc[TRACING_LOG_LOC_MAX]; /* err location */
7724 	char			cmd[MAX_FILTER_STR_VAL]; /* what caused err */
7725 };
7726 
7727 static DEFINE_MUTEX(tracing_err_log_lock);
7728 
7729 static struct tracing_log_err *get_tracing_log_err(struct trace_array *tr)
7730 {
7731 	struct tracing_log_err *err;
7732 
7733 	if (tr->n_err_log_entries < TRACING_LOG_ERRS_MAX) {
7734 		err = kzalloc(sizeof(*err), GFP_KERNEL);
7735 		if (!err)
7736 			err = ERR_PTR(-ENOMEM);
7737 		tr->n_err_log_entries++;
7738 
7739 		return err;
7740 	}
7741 
7742 	err = list_first_entry(&tr->err_log, struct tracing_log_err, list);
7743 	list_del(&err->list);
7744 
7745 	return err;
7746 }
7747 
7748 /**
7749  * err_pos - find the position of a string within a command for error careting
7750  * @cmd: The tracing command that caused the error
7751  * @str: The string to position the caret at within @cmd
7752  *
7753  * Finds the position of the first occurrence of @str within @cmd.  The
7754  * return value can be passed to tracing_log_err() for caret placement
7755  * within @cmd.
7756  *
7757  * Returns the index within @cmd of the first occurrence of @str or 0
7758  * if @str was not found.
7759  */
7760 unsigned int err_pos(char *cmd, const char *str)
7761 {
7762 	char *found;
7763 
7764 	if (WARN_ON(!strlen(cmd)))
7765 		return 0;
7766 
7767 	found = strstr(cmd, str);
7768 	if (found)
7769 		return found - cmd;
7770 
7771 	return 0;
7772 }
7773 
7774 /**
7775  * tracing_log_err - write an error to the tracing error log
7776  * @tr: The associated trace array for the error (NULL for top level array)
7777  * @loc: A string describing where the error occurred
7778  * @cmd: The tracing command that caused the error
7779  * @errs: The array of loc-specific static error strings
7780  * @type: The index into errs[], which produces the specific static err string
7781  * @pos: The position the caret should be placed in the cmd
7782  *
7783  * Writes an error into tracing/error_log of the form:
7784  *
7785  * <loc>: error: <text>
7786  *   Command: <cmd>
7787  *              ^
7788  *
7789  * tracing/error_log is a small log file containing the last
7790  * TRACING_LOG_ERRS_MAX errors (8).  Memory for errors isn't allocated
7791  * unless there has been a tracing error, and the error log can be
7792  * cleared and have its memory freed by writing the empty string in
7793  * truncation mode to it i.e. echo > tracing/error_log.
7794  *
7795  * NOTE: the @errs array along with the @type param are used to
7796  * produce a static error string - this string is not copied and saved
7797  * when the error is logged - only a pointer to it is saved.  See
7798  * existing callers for examples of how static strings are typically
7799  * defined for use with tracing_log_err().
7800  */
7801 void tracing_log_err(struct trace_array *tr,
7802 		     const char *loc, const char *cmd,
7803 		     const char **errs, u8 type, u8 pos)
7804 {
7805 	struct tracing_log_err *err;
7806 
7807 	if (!tr)
7808 		tr = &global_trace;
7809 
7810 	mutex_lock(&tracing_err_log_lock);
7811 	err = get_tracing_log_err(tr);
7812 	if (PTR_ERR(err) == -ENOMEM) {
7813 		mutex_unlock(&tracing_err_log_lock);
7814 		return;
7815 	}
7816 
7817 	snprintf(err->loc, TRACING_LOG_LOC_MAX, "%s: error: ", loc);
7818 	snprintf(err->cmd, MAX_FILTER_STR_VAL,"\n" CMD_PREFIX "%s\n", cmd);
7819 
7820 	err->info.errs = errs;
7821 	err->info.type = type;
7822 	err->info.pos = pos;
7823 	err->info.ts = local_clock();
7824 
7825 	list_add_tail(&err->list, &tr->err_log);
7826 	mutex_unlock(&tracing_err_log_lock);
7827 }
7828 
7829 static void clear_tracing_err_log(struct trace_array *tr)
7830 {
7831 	struct tracing_log_err *err, *next;
7832 
7833 	mutex_lock(&tracing_err_log_lock);
7834 	list_for_each_entry_safe(err, next, &tr->err_log, list) {
7835 		list_del(&err->list);
7836 		kfree(err);
7837 	}
7838 
7839 	tr->n_err_log_entries = 0;
7840 	mutex_unlock(&tracing_err_log_lock);
7841 }
7842 
7843 static void *tracing_err_log_seq_start(struct seq_file *m, loff_t *pos)
7844 {
7845 	struct trace_array *tr = m->private;
7846 
7847 	mutex_lock(&tracing_err_log_lock);
7848 
7849 	return seq_list_start(&tr->err_log, *pos);
7850 }
7851 
7852 static void *tracing_err_log_seq_next(struct seq_file *m, void *v, loff_t *pos)
7853 {
7854 	struct trace_array *tr = m->private;
7855 
7856 	return seq_list_next(v, &tr->err_log, pos);
7857 }
7858 
7859 static void tracing_err_log_seq_stop(struct seq_file *m, void *v)
7860 {
7861 	mutex_unlock(&tracing_err_log_lock);
7862 }
7863 
7864 static void tracing_err_log_show_pos(struct seq_file *m, u8 pos)
7865 {
7866 	u8 i;
7867 
7868 	for (i = 0; i < sizeof(CMD_PREFIX) - 1; i++)
7869 		seq_putc(m, ' ');
7870 	for (i = 0; i < pos; i++)
7871 		seq_putc(m, ' ');
7872 	seq_puts(m, "^\n");
7873 }
7874 
7875 static int tracing_err_log_seq_show(struct seq_file *m, void *v)
7876 {
7877 	struct tracing_log_err *err = v;
7878 
7879 	if (err) {
7880 		const char *err_text = err->info.errs[err->info.type];
7881 		u64 sec = err->info.ts;
7882 		u32 nsec;
7883 
7884 		nsec = do_div(sec, NSEC_PER_SEC);
7885 		seq_printf(m, "[%5llu.%06u] %s%s", sec, nsec / 1000,
7886 			   err->loc, err_text);
7887 		seq_printf(m, "%s", err->cmd);
7888 		tracing_err_log_show_pos(m, err->info.pos);
7889 	}
7890 
7891 	return 0;
7892 }
7893 
7894 static const struct seq_operations tracing_err_log_seq_ops = {
7895 	.start  = tracing_err_log_seq_start,
7896 	.next   = tracing_err_log_seq_next,
7897 	.stop   = tracing_err_log_seq_stop,
7898 	.show   = tracing_err_log_seq_show
7899 };
7900 
7901 static int tracing_err_log_open(struct inode *inode, struct file *file)
7902 {
7903 	struct trace_array *tr = inode->i_private;
7904 	int ret = 0;
7905 
7906 	ret = tracing_check_open_get_tr(tr);
7907 	if (ret)
7908 		return ret;
7909 
7910 	/* If this file was opened for write, then erase contents */
7911 	if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC))
7912 		clear_tracing_err_log(tr);
7913 
7914 	if (file->f_mode & FMODE_READ) {
7915 		ret = seq_open(file, &tracing_err_log_seq_ops);
7916 		if (!ret) {
7917 			struct seq_file *m = file->private_data;
7918 			m->private = tr;
7919 		} else {
7920 			trace_array_put(tr);
7921 		}
7922 	}
7923 	return ret;
7924 }
7925 
7926 static ssize_t tracing_err_log_write(struct file *file,
7927 				     const char __user *buffer,
7928 				     size_t count, loff_t *ppos)
7929 {
7930 	return count;
7931 }
7932 
7933 static int tracing_err_log_release(struct inode *inode, struct file *file)
7934 {
7935 	struct trace_array *tr = inode->i_private;
7936 
7937 	trace_array_put(tr);
7938 
7939 	if (file->f_mode & FMODE_READ)
7940 		seq_release(inode, file);
7941 
7942 	return 0;
7943 }
7944 
7945 static const struct file_operations tracing_err_log_fops = {
7946 	.open           = tracing_err_log_open,
7947 	.write		= tracing_err_log_write,
7948 	.read           = seq_read,
7949 	.llseek         = seq_lseek,
7950 	.release        = tracing_err_log_release,
7951 };
7952 
7953 static int tracing_buffers_open(struct inode *inode, struct file *filp)
7954 {
7955 	struct trace_array *tr = inode->i_private;
7956 	struct ftrace_buffer_info *info;
7957 	int ret;
7958 
7959 	ret = tracing_check_open_get_tr(tr);
7960 	if (ret)
7961 		return ret;
7962 
7963 	info = kvzalloc(sizeof(*info), GFP_KERNEL);
7964 	if (!info) {
7965 		trace_array_put(tr);
7966 		return -ENOMEM;
7967 	}
7968 
7969 	mutex_lock(&trace_types_lock);
7970 
7971 	info->iter.tr		= tr;
7972 	info->iter.cpu_file	= tracing_get_cpu(inode);
7973 	info->iter.trace	= tr->current_trace;
7974 	info->iter.array_buffer = &tr->array_buffer;
7975 	info->spare		= NULL;
7976 	/* Force reading ring buffer for first read */
7977 	info->read		= (unsigned int)-1;
7978 
7979 	filp->private_data = info;
7980 
7981 	tr->trace_ref++;
7982 
7983 	mutex_unlock(&trace_types_lock);
7984 
7985 	ret = nonseekable_open(inode, filp);
7986 	if (ret < 0)
7987 		trace_array_put(tr);
7988 
7989 	return ret;
7990 }
7991 
7992 static __poll_t
7993 tracing_buffers_poll(struct file *filp, poll_table *poll_table)
7994 {
7995 	struct ftrace_buffer_info *info = filp->private_data;
7996 	struct trace_iterator *iter = &info->iter;
7997 
7998 	return trace_poll(iter, filp, poll_table);
7999 }
8000 
8001 static ssize_t
8002 tracing_buffers_read(struct file *filp, char __user *ubuf,
8003 		     size_t count, loff_t *ppos)
8004 {
8005 	struct ftrace_buffer_info *info = filp->private_data;
8006 	struct trace_iterator *iter = &info->iter;
8007 	ssize_t ret = 0;
8008 	ssize_t size;
8009 
8010 	if (!count)
8011 		return 0;
8012 
8013 #ifdef CONFIG_TRACER_MAX_TRACE
8014 	if (iter->snapshot && iter->tr->current_trace->use_max_tr)
8015 		return -EBUSY;
8016 #endif
8017 
8018 	if (!info->spare) {
8019 		info->spare = ring_buffer_alloc_read_page(iter->array_buffer->buffer,
8020 							  iter->cpu_file);
8021 		if (IS_ERR(info->spare)) {
8022 			ret = PTR_ERR(info->spare);
8023 			info->spare = NULL;
8024 		} else {
8025 			info->spare_cpu = iter->cpu_file;
8026 		}
8027 	}
8028 	if (!info->spare)
8029 		return ret;
8030 
8031 	/* Do we have previous read data to read? */
8032 	if (info->read < PAGE_SIZE)
8033 		goto read;
8034 
8035  again:
8036 	trace_access_lock(iter->cpu_file);
8037 	ret = ring_buffer_read_page(iter->array_buffer->buffer,
8038 				    &info->spare,
8039 				    count,
8040 				    iter->cpu_file, 0);
8041 	trace_access_unlock(iter->cpu_file);
8042 
8043 	if (ret < 0) {
8044 		if (trace_empty(iter)) {
8045 			if ((filp->f_flags & O_NONBLOCK))
8046 				return -EAGAIN;
8047 
8048 			ret = wait_on_pipe(iter, 0);
8049 			if (ret)
8050 				return ret;
8051 
8052 			goto again;
8053 		}
8054 		return 0;
8055 	}
8056 
8057 	info->read = 0;
8058  read:
8059 	size = PAGE_SIZE - info->read;
8060 	if (size > count)
8061 		size = count;
8062 
8063 	ret = copy_to_user(ubuf, info->spare + info->read, size);
8064 	if (ret == size)
8065 		return -EFAULT;
8066 
8067 	size -= ret;
8068 
8069 	*ppos += size;
8070 	info->read += size;
8071 
8072 	return size;
8073 }
8074 
8075 static int tracing_buffers_release(struct inode *inode, struct file *file)
8076 {
8077 	struct ftrace_buffer_info *info = file->private_data;
8078 	struct trace_iterator *iter = &info->iter;
8079 
8080 	mutex_lock(&trace_types_lock);
8081 
8082 	iter->tr->trace_ref--;
8083 
8084 	__trace_array_put(iter->tr);
8085 
8086 	if (info->spare)
8087 		ring_buffer_free_read_page(iter->array_buffer->buffer,
8088 					   info->spare_cpu, info->spare);
8089 	kvfree(info);
8090 
8091 	mutex_unlock(&trace_types_lock);
8092 
8093 	return 0;
8094 }
8095 
8096 struct buffer_ref {
8097 	struct trace_buffer	*buffer;
8098 	void			*page;
8099 	int			cpu;
8100 	refcount_t		refcount;
8101 };
8102 
8103 static void buffer_ref_release(struct buffer_ref *ref)
8104 {
8105 	if (!refcount_dec_and_test(&ref->refcount))
8106 		return;
8107 	ring_buffer_free_read_page(ref->buffer, ref->cpu, ref->page);
8108 	kfree(ref);
8109 }
8110 
8111 static void buffer_pipe_buf_release(struct pipe_inode_info *pipe,
8112 				    struct pipe_buffer *buf)
8113 {
8114 	struct buffer_ref *ref = (struct buffer_ref *)buf->private;
8115 
8116 	buffer_ref_release(ref);
8117 	buf->private = 0;
8118 }
8119 
8120 static bool buffer_pipe_buf_get(struct pipe_inode_info *pipe,
8121 				struct pipe_buffer *buf)
8122 {
8123 	struct buffer_ref *ref = (struct buffer_ref *)buf->private;
8124 
8125 	if (refcount_read(&ref->refcount) > INT_MAX/2)
8126 		return false;
8127 
8128 	refcount_inc(&ref->refcount);
8129 	return true;
8130 }
8131 
8132 /* Pipe buffer operations for a buffer. */
8133 static const struct pipe_buf_operations buffer_pipe_buf_ops = {
8134 	.release		= buffer_pipe_buf_release,
8135 	.get			= buffer_pipe_buf_get,
8136 };
8137 
8138 /*
8139  * Callback from splice_to_pipe(), if we need to release some pages
8140  * at the end of the spd in case we error'ed out in filling the pipe.
8141  */
8142 static void buffer_spd_release(struct splice_pipe_desc *spd, unsigned int i)
8143 {
8144 	struct buffer_ref *ref =
8145 		(struct buffer_ref *)spd->partial[i].private;
8146 
8147 	buffer_ref_release(ref);
8148 	spd->partial[i].private = 0;
8149 }
8150 
8151 static ssize_t
8152 tracing_buffers_splice_read(struct file *file, loff_t *ppos,
8153 			    struct pipe_inode_info *pipe, size_t len,
8154 			    unsigned int flags)
8155 {
8156 	struct ftrace_buffer_info *info = file->private_data;
8157 	struct trace_iterator *iter = &info->iter;
8158 	struct partial_page partial_def[PIPE_DEF_BUFFERS];
8159 	struct page *pages_def[PIPE_DEF_BUFFERS];
8160 	struct splice_pipe_desc spd = {
8161 		.pages		= pages_def,
8162 		.partial	= partial_def,
8163 		.nr_pages_max	= PIPE_DEF_BUFFERS,
8164 		.ops		= &buffer_pipe_buf_ops,
8165 		.spd_release	= buffer_spd_release,
8166 	};
8167 	struct buffer_ref *ref;
8168 	int entries, i;
8169 	ssize_t ret = 0;
8170 
8171 #ifdef CONFIG_TRACER_MAX_TRACE
8172 	if (iter->snapshot && iter->tr->current_trace->use_max_tr)
8173 		return -EBUSY;
8174 #endif
8175 
8176 	if (*ppos & (PAGE_SIZE - 1))
8177 		return -EINVAL;
8178 
8179 	if (len & (PAGE_SIZE - 1)) {
8180 		if (len < PAGE_SIZE)
8181 			return -EINVAL;
8182 		len &= PAGE_MASK;
8183 	}
8184 
8185 	if (splice_grow_spd(pipe, &spd))
8186 		return -ENOMEM;
8187 
8188  again:
8189 	trace_access_lock(iter->cpu_file);
8190 	entries = ring_buffer_entries_cpu(iter->array_buffer->buffer, iter->cpu_file);
8191 
8192 	for (i = 0; i < spd.nr_pages_max && len && entries; i++, len -= PAGE_SIZE) {
8193 		struct page *page;
8194 		int r;
8195 
8196 		ref = kzalloc(sizeof(*ref), GFP_KERNEL);
8197 		if (!ref) {
8198 			ret = -ENOMEM;
8199 			break;
8200 		}
8201 
8202 		refcount_set(&ref->refcount, 1);
8203 		ref->buffer = iter->array_buffer->buffer;
8204 		ref->page = ring_buffer_alloc_read_page(ref->buffer, iter->cpu_file);
8205 		if (IS_ERR(ref->page)) {
8206 			ret = PTR_ERR(ref->page);
8207 			ref->page = NULL;
8208 			kfree(ref);
8209 			break;
8210 		}
8211 		ref->cpu = iter->cpu_file;
8212 
8213 		r = ring_buffer_read_page(ref->buffer, &ref->page,
8214 					  len, iter->cpu_file, 1);
8215 		if (r < 0) {
8216 			ring_buffer_free_read_page(ref->buffer, ref->cpu,
8217 						   ref->page);
8218 			kfree(ref);
8219 			break;
8220 		}
8221 
8222 		page = virt_to_page(ref->page);
8223 
8224 		spd.pages[i] = page;
8225 		spd.partial[i].len = PAGE_SIZE;
8226 		spd.partial[i].offset = 0;
8227 		spd.partial[i].private = (unsigned long)ref;
8228 		spd.nr_pages++;
8229 		*ppos += PAGE_SIZE;
8230 
8231 		entries = ring_buffer_entries_cpu(iter->array_buffer->buffer, iter->cpu_file);
8232 	}
8233 
8234 	trace_access_unlock(iter->cpu_file);
8235 	spd.nr_pages = i;
8236 
8237 	/* did we read anything? */
8238 	if (!spd.nr_pages) {
8239 		if (ret)
8240 			goto out;
8241 
8242 		ret = -EAGAIN;
8243 		if ((file->f_flags & O_NONBLOCK) || (flags & SPLICE_F_NONBLOCK))
8244 			goto out;
8245 
8246 		ret = wait_on_pipe(iter, iter->tr->buffer_percent);
8247 		if (ret)
8248 			goto out;
8249 
8250 		goto again;
8251 	}
8252 
8253 	ret = splice_to_pipe(pipe, &spd);
8254 out:
8255 	splice_shrink_spd(&spd);
8256 
8257 	return ret;
8258 }
8259 
8260 static const struct file_operations tracing_buffers_fops = {
8261 	.open		= tracing_buffers_open,
8262 	.read		= tracing_buffers_read,
8263 	.poll		= tracing_buffers_poll,
8264 	.release	= tracing_buffers_release,
8265 	.splice_read	= tracing_buffers_splice_read,
8266 	.llseek		= no_llseek,
8267 };
8268 
8269 static ssize_t
8270 tracing_stats_read(struct file *filp, char __user *ubuf,
8271 		   size_t count, loff_t *ppos)
8272 {
8273 	struct inode *inode = file_inode(filp);
8274 	struct trace_array *tr = inode->i_private;
8275 	struct array_buffer *trace_buf = &tr->array_buffer;
8276 	int cpu = tracing_get_cpu(inode);
8277 	struct trace_seq *s;
8278 	unsigned long cnt;
8279 	unsigned long long t;
8280 	unsigned long usec_rem;
8281 
8282 	s = kmalloc(sizeof(*s), GFP_KERNEL);
8283 	if (!s)
8284 		return -ENOMEM;
8285 
8286 	trace_seq_init(s);
8287 
8288 	cnt = ring_buffer_entries_cpu(trace_buf->buffer, cpu);
8289 	trace_seq_printf(s, "entries: %ld\n", cnt);
8290 
8291 	cnt = ring_buffer_overrun_cpu(trace_buf->buffer, cpu);
8292 	trace_seq_printf(s, "overrun: %ld\n", cnt);
8293 
8294 	cnt = ring_buffer_commit_overrun_cpu(trace_buf->buffer, cpu);
8295 	trace_seq_printf(s, "commit overrun: %ld\n", cnt);
8296 
8297 	cnt = ring_buffer_bytes_cpu(trace_buf->buffer, cpu);
8298 	trace_seq_printf(s, "bytes: %ld\n", cnt);
8299 
8300 	if (trace_clocks[tr->clock_id].in_ns) {
8301 		/* local or global for trace_clock */
8302 		t = ns2usecs(ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
8303 		usec_rem = do_div(t, USEC_PER_SEC);
8304 		trace_seq_printf(s, "oldest event ts: %5llu.%06lu\n",
8305 								t, usec_rem);
8306 
8307 		t = ns2usecs(ring_buffer_time_stamp(trace_buf->buffer));
8308 		usec_rem = do_div(t, USEC_PER_SEC);
8309 		trace_seq_printf(s, "now ts: %5llu.%06lu\n", t, usec_rem);
8310 	} else {
8311 		/* counter or tsc mode for trace_clock */
8312 		trace_seq_printf(s, "oldest event ts: %llu\n",
8313 				ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
8314 
8315 		trace_seq_printf(s, "now ts: %llu\n",
8316 				ring_buffer_time_stamp(trace_buf->buffer));
8317 	}
8318 
8319 	cnt = ring_buffer_dropped_events_cpu(trace_buf->buffer, cpu);
8320 	trace_seq_printf(s, "dropped events: %ld\n", cnt);
8321 
8322 	cnt = ring_buffer_read_events_cpu(trace_buf->buffer, cpu);
8323 	trace_seq_printf(s, "read events: %ld\n", cnt);
8324 
8325 	count = simple_read_from_buffer(ubuf, count, ppos,
8326 					s->buffer, trace_seq_used(s));
8327 
8328 	kfree(s);
8329 
8330 	return count;
8331 }
8332 
8333 static const struct file_operations tracing_stats_fops = {
8334 	.open		= tracing_open_generic_tr,
8335 	.read		= tracing_stats_read,
8336 	.llseek		= generic_file_llseek,
8337 	.release	= tracing_release_generic_tr,
8338 };
8339 
8340 #ifdef CONFIG_DYNAMIC_FTRACE
8341 
8342 static ssize_t
8343 tracing_read_dyn_info(struct file *filp, char __user *ubuf,
8344 		  size_t cnt, loff_t *ppos)
8345 {
8346 	ssize_t ret;
8347 	char *buf;
8348 	int r;
8349 
8350 	/* 256 should be plenty to hold the amount needed */
8351 	buf = kmalloc(256, GFP_KERNEL);
8352 	if (!buf)
8353 		return -ENOMEM;
8354 
8355 	r = scnprintf(buf, 256, "%ld pages:%ld groups: %ld\n",
8356 		      ftrace_update_tot_cnt,
8357 		      ftrace_number_of_pages,
8358 		      ftrace_number_of_groups);
8359 
8360 	ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
8361 	kfree(buf);
8362 	return ret;
8363 }
8364 
8365 static const struct file_operations tracing_dyn_info_fops = {
8366 	.open		= tracing_open_generic,
8367 	.read		= tracing_read_dyn_info,
8368 	.llseek		= generic_file_llseek,
8369 };
8370 #endif /* CONFIG_DYNAMIC_FTRACE */
8371 
8372 #if defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE)
8373 static void
8374 ftrace_snapshot(unsigned long ip, unsigned long parent_ip,
8375 		struct trace_array *tr, struct ftrace_probe_ops *ops,
8376 		void *data)
8377 {
8378 	tracing_snapshot_instance(tr);
8379 }
8380 
8381 static void
8382 ftrace_count_snapshot(unsigned long ip, unsigned long parent_ip,
8383 		      struct trace_array *tr, struct ftrace_probe_ops *ops,
8384 		      void *data)
8385 {
8386 	struct ftrace_func_mapper *mapper = data;
8387 	long *count = NULL;
8388 
8389 	if (mapper)
8390 		count = (long *)ftrace_func_mapper_find_ip(mapper, ip);
8391 
8392 	if (count) {
8393 
8394 		if (*count <= 0)
8395 			return;
8396 
8397 		(*count)--;
8398 	}
8399 
8400 	tracing_snapshot_instance(tr);
8401 }
8402 
8403 static int
8404 ftrace_snapshot_print(struct seq_file *m, unsigned long ip,
8405 		      struct ftrace_probe_ops *ops, void *data)
8406 {
8407 	struct ftrace_func_mapper *mapper = data;
8408 	long *count = NULL;
8409 
8410 	seq_printf(m, "%ps:", (void *)ip);
8411 
8412 	seq_puts(m, "snapshot");
8413 
8414 	if (mapper)
8415 		count = (long *)ftrace_func_mapper_find_ip(mapper, ip);
8416 
8417 	if (count)
8418 		seq_printf(m, ":count=%ld\n", *count);
8419 	else
8420 		seq_puts(m, ":unlimited\n");
8421 
8422 	return 0;
8423 }
8424 
8425 static int
8426 ftrace_snapshot_init(struct ftrace_probe_ops *ops, struct trace_array *tr,
8427 		     unsigned long ip, void *init_data, void **data)
8428 {
8429 	struct ftrace_func_mapper *mapper = *data;
8430 
8431 	if (!mapper) {
8432 		mapper = allocate_ftrace_func_mapper();
8433 		if (!mapper)
8434 			return -ENOMEM;
8435 		*data = mapper;
8436 	}
8437 
8438 	return ftrace_func_mapper_add_ip(mapper, ip, init_data);
8439 }
8440 
8441 static void
8442 ftrace_snapshot_free(struct ftrace_probe_ops *ops, struct trace_array *tr,
8443 		     unsigned long ip, void *data)
8444 {
8445 	struct ftrace_func_mapper *mapper = data;
8446 
8447 	if (!ip) {
8448 		if (!mapper)
8449 			return;
8450 		free_ftrace_func_mapper(mapper, NULL);
8451 		return;
8452 	}
8453 
8454 	ftrace_func_mapper_remove_ip(mapper, ip);
8455 }
8456 
8457 static struct ftrace_probe_ops snapshot_probe_ops = {
8458 	.func			= ftrace_snapshot,
8459 	.print			= ftrace_snapshot_print,
8460 };
8461 
8462 static struct ftrace_probe_ops snapshot_count_probe_ops = {
8463 	.func			= ftrace_count_snapshot,
8464 	.print			= ftrace_snapshot_print,
8465 	.init			= ftrace_snapshot_init,
8466 	.free			= ftrace_snapshot_free,
8467 };
8468 
8469 static int
8470 ftrace_trace_snapshot_callback(struct trace_array *tr, struct ftrace_hash *hash,
8471 			       char *glob, char *cmd, char *param, int enable)
8472 {
8473 	struct ftrace_probe_ops *ops;
8474 	void *count = (void *)-1;
8475 	char *number;
8476 	int ret;
8477 
8478 	if (!tr)
8479 		return -ENODEV;
8480 
8481 	/* hash funcs only work with set_ftrace_filter */
8482 	if (!enable)
8483 		return -EINVAL;
8484 
8485 	ops = param ? &snapshot_count_probe_ops :  &snapshot_probe_ops;
8486 
8487 	if (glob[0] == '!')
8488 		return unregister_ftrace_function_probe_func(glob+1, tr, ops);
8489 
8490 	if (!param)
8491 		goto out_reg;
8492 
8493 	number = strsep(&param, ":");
8494 
8495 	if (!strlen(number))
8496 		goto out_reg;
8497 
8498 	/*
8499 	 * We use the callback data field (which is a pointer)
8500 	 * as our counter.
8501 	 */
8502 	ret = kstrtoul(number, 0, (unsigned long *)&count);
8503 	if (ret)
8504 		return ret;
8505 
8506  out_reg:
8507 	ret = tracing_alloc_snapshot_instance(tr);
8508 	if (ret < 0)
8509 		goto out;
8510 
8511 	ret = register_ftrace_function_probe(glob, tr, ops, count);
8512 
8513  out:
8514 	return ret < 0 ? ret : 0;
8515 }
8516 
8517 static struct ftrace_func_command ftrace_snapshot_cmd = {
8518 	.name			= "snapshot",
8519 	.func			= ftrace_trace_snapshot_callback,
8520 };
8521 
8522 static __init int register_snapshot_cmd(void)
8523 {
8524 	return register_ftrace_command(&ftrace_snapshot_cmd);
8525 }
8526 #else
8527 static inline __init int register_snapshot_cmd(void) { return 0; }
8528 #endif /* defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE) */
8529 
8530 static struct dentry *tracing_get_dentry(struct trace_array *tr)
8531 {
8532 	if (WARN_ON(!tr->dir))
8533 		return ERR_PTR(-ENODEV);
8534 
8535 	/* Top directory uses NULL as the parent */
8536 	if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
8537 		return NULL;
8538 
8539 	/* All sub buffers have a descriptor */
8540 	return tr->dir;
8541 }
8542 
8543 static struct dentry *tracing_dentry_percpu(struct trace_array *tr, int cpu)
8544 {
8545 	struct dentry *d_tracer;
8546 
8547 	if (tr->percpu_dir)
8548 		return tr->percpu_dir;
8549 
8550 	d_tracer = tracing_get_dentry(tr);
8551 	if (IS_ERR(d_tracer))
8552 		return NULL;
8553 
8554 	tr->percpu_dir = tracefs_create_dir("per_cpu", d_tracer);
8555 
8556 	MEM_FAIL(!tr->percpu_dir,
8557 		  "Could not create tracefs directory 'per_cpu/%d'\n", cpu);
8558 
8559 	return tr->percpu_dir;
8560 }
8561 
8562 static struct dentry *
8563 trace_create_cpu_file(const char *name, umode_t mode, struct dentry *parent,
8564 		      void *data, long cpu, const struct file_operations *fops)
8565 {
8566 	struct dentry *ret = trace_create_file(name, mode, parent, data, fops);
8567 
8568 	if (ret) /* See tracing_get_cpu() */
8569 		d_inode(ret)->i_cdev = (void *)(cpu + 1);
8570 	return ret;
8571 }
8572 
8573 static void
8574 tracing_init_tracefs_percpu(struct trace_array *tr, long cpu)
8575 {
8576 	struct dentry *d_percpu = tracing_dentry_percpu(tr, cpu);
8577 	struct dentry *d_cpu;
8578 	char cpu_dir[30]; /* 30 characters should be more than enough */
8579 
8580 	if (!d_percpu)
8581 		return;
8582 
8583 	snprintf(cpu_dir, 30, "cpu%ld", cpu);
8584 	d_cpu = tracefs_create_dir(cpu_dir, d_percpu);
8585 	if (!d_cpu) {
8586 		pr_warn("Could not create tracefs '%s' entry\n", cpu_dir);
8587 		return;
8588 	}
8589 
8590 	/* per cpu trace_pipe */
8591 	trace_create_cpu_file("trace_pipe", TRACE_MODE_READ, d_cpu,
8592 				tr, cpu, &tracing_pipe_fops);
8593 
8594 	/* per cpu trace */
8595 	trace_create_cpu_file("trace", TRACE_MODE_WRITE, d_cpu,
8596 				tr, cpu, &tracing_fops);
8597 
8598 	trace_create_cpu_file("trace_pipe_raw", TRACE_MODE_READ, d_cpu,
8599 				tr, cpu, &tracing_buffers_fops);
8600 
8601 	trace_create_cpu_file("stats", TRACE_MODE_READ, d_cpu,
8602 				tr, cpu, &tracing_stats_fops);
8603 
8604 	trace_create_cpu_file("buffer_size_kb", TRACE_MODE_READ, d_cpu,
8605 				tr, cpu, &tracing_entries_fops);
8606 
8607 #ifdef CONFIG_TRACER_SNAPSHOT
8608 	trace_create_cpu_file("snapshot", TRACE_MODE_WRITE, d_cpu,
8609 				tr, cpu, &snapshot_fops);
8610 
8611 	trace_create_cpu_file("snapshot_raw", TRACE_MODE_READ, d_cpu,
8612 				tr, cpu, &snapshot_raw_fops);
8613 #endif
8614 }
8615 
8616 #ifdef CONFIG_FTRACE_SELFTEST
8617 /* Let selftest have access to static functions in this file */
8618 #include "trace_selftest.c"
8619 #endif
8620 
8621 static ssize_t
8622 trace_options_read(struct file *filp, char __user *ubuf, size_t cnt,
8623 			loff_t *ppos)
8624 {
8625 	struct trace_option_dentry *topt = filp->private_data;
8626 	char *buf;
8627 
8628 	if (topt->flags->val & topt->opt->bit)
8629 		buf = "1\n";
8630 	else
8631 		buf = "0\n";
8632 
8633 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
8634 }
8635 
8636 static ssize_t
8637 trace_options_write(struct file *filp, const char __user *ubuf, size_t cnt,
8638 			 loff_t *ppos)
8639 {
8640 	struct trace_option_dentry *topt = filp->private_data;
8641 	unsigned long val;
8642 	int ret;
8643 
8644 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
8645 	if (ret)
8646 		return ret;
8647 
8648 	if (val != 0 && val != 1)
8649 		return -EINVAL;
8650 
8651 	if (!!(topt->flags->val & topt->opt->bit) != val) {
8652 		mutex_lock(&trace_types_lock);
8653 		ret = __set_tracer_option(topt->tr, topt->flags,
8654 					  topt->opt, !val);
8655 		mutex_unlock(&trace_types_lock);
8656 		if (ret)
8657 			return ret;
8658 	}
8659 
8660 	*ppos += cnt;
8661 
8662 	return cnt;
8663 }
8664 
8665 
8666 static const struct file_operations trace_options_fops = {
8667 	.open = tracing_open_generic,
8668 	.read = trace_options_read,
8669 	.write = trace_options_write,
8670 	.llseek	= generic_file_llseek,
8671 };
8672 
8673 /*
8674  * In order to pass in both the trace_array descriptor as well as the index
8675  * to the flag that the trace option file represents, the trace_array
8676  * has a character array of trace_flags_index[], which holds the index
8677  * of the bit for the flag it represents. index[0] == 0, index[1] == 1, etc.
8678  * The address of this character array is passed to the flag option file
8679  * read/write callbacks.
8680  *
8681  * In order to extract both the index and the trace_array descriptor,
8682  * get_tr_index() uses the following algorithm.
8683  *
8684  *   idx = *ptr;
8685  *
8686  * As the pointer itself contains the address of the index (remember
8687  * index[1] == 1).
8688  *
8689  * Then to get the trace_array descriptor, by subtracting that index
8690  * from the ptr, we get to the start of the index itself.
8691  *
8692  *   ptr - idx == &index[0]
8693  *
8694  * Then a simple container_of() from that pointer gets us to the
8695  * trace_array descriptor.
8696  */
8697 static void get_tr_index(void *data, struct trace_array **ptr,
8698 			 unsigned int *pindex)
8699 {
8700 	*pindex = *(unsigned char *)data;
8701 
8702 	*ptr = container_of(data - *pindex, struct trace_array,
8703 			    trace_flags_index);
8704 }
8705 
8706 static ssize_t
8707 trace_options_core_read(struct file *filp, char __user *ubuf, size_t cnt,
8708 			loff_t *ppos)
8709 {
8710 	void *tr_index = filp->private_data;
8711 	struct trace_array *tr;
8712 	unsigned int index;
8713 	char *buf;
8714 
8715 	get_tr_index(tr_index, &tr, &index);
8716 
8717 	if (tr->trace_flags & (1 << index))
8718 		buf = "1\n";
8719 	else
8720 		buf = "0\n";
8721 
8722 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
8723 }
8724 
8725 static ssize_t
8726 trace_options_core_write(struct file *filp, const char __user *ubuf, size_t cnt,
8727 			 loff_t *ppos)
8728 {
8729 	void *tr_index = filp->private_data;
8730 	struct trace_array *tr;
8731 	unsigned int index;
8732 	unsigned long val;
8733 	int ret;
8734 
8735 	get_tr_index(tr_index, &tr, &index);
8736 
8737 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
8738 	if (ret)
8739 		return ret;
8740 
8741 	if (val != 0 && val != 1)
8742 		return -EINVAL;
8743 
8744 	mutex_lock(&event_mutex);
8745 	mutex_lock(&trace_types_lock);
8746 	ret = set_tracer_flag(tr, 1 << index, val);
8747 	mutex_unlock(&trace_types_lock);
8748 	mutex_unlock(&event_mutex);
8749 
8750 	if (ret < 0)
8751 		return ret;
8752 
8753 	*ppos += cnt;
8754 
8755 	return cnt;
8756 }
8757 
8758 static const struct file_operations trace_options_core_fops = {
8759 	.open = tracing_open_generic,
8760 	.read = trace_options_core_read,
8761 	.write = trace_options_core_write,
8762 	.llseek = generic_file_llseek,
8763 };
8764 
8765 struct dentry *trace_create_file(const char *name,
8766 				 umode_t mode,
8767 				 struct dentry *parent,
8768 				 void *data,
8769 				 const struct file_operations *fops)
8770 {
8771 	struct dentry *ret;
8772 
8773 	ret = tracefs_create_file(name, mode, parent, data, fops);
8774 	if (!ret)
8775 		pr_warn("Could not create tracefs '%s' entry\n", name);
8776 
8777 	return ret;
8778 }
8779 
8780 
8781 static struct dentry *trace_options_init_dentry(struct trace_array *tr)
8782 {
8783 	struct dentry *d_tracer;
8784 
8785 	if (tr->options)
8786 		return tr->options;
8787 
8788 	d_tracer = tracing_get_dentry(tr);
8789 	if (IS_ERR(d_tracer))
8790 		return NULL;
8791 
8792 	tr->options = tracefs_create_dir("options", d_tracer);
8793 	if (!tr->options) {
8794 		pr_warn("Could not create tracefs directory 'options'\n");
8795 		return NULL;
8796 	}
8797 
8798 	return tr->options;
8799 }
8800 
8801 static void
8802 create_trace_option_file(struct trace_array *tr,
8803 			 struct trace_option_dentry *topt,
8804 			 struct tracer_flags *flags,
8805 			 struct tracer_opt *opt)
8806 {
8807 	struct dentry *t_options;
8808 
8809 	t_options = trace_options_init_dentry(tr);
8810 	if (!t_options)
8811 		return;
8812 
8813 	topt->flags = flags;
8814 	topt->opt = opt;
8815 	topt->tr = tr;
8816 
8817 	topt->entry = trace_create_file(opt->name, TRACE_MODE_WRITE,
8818 					t_options, topt, &trace_options_fops);
8819 
8820 }
8821 
8822 static void
8823 create_trace_option_files(struct trace_array *tr, struct tracer *tracer)
8824 {
8825 	struct trace_option_dentry *topts;
8826 	struct trace_options *tr_topts;
8827 	struct tracer_flags *flags;
8828 	struct tracer_opt *opts;
8829 	int cnt;
8830 	int i;
8831 
8832 	if (!tracer)
8833 		return;
8834 
8835 	flags = tracer->flags;
8836 
8837 	if (!flags || !flags->opts)
8838 		return;
8839 
8840 	/*
8841 	 * If this is an instance, only create flags for tracers
8842 	 * the instance may have.
8843 	 */
8844 	if (!trace_ok_for_array(tracer, tr))
8845 		return;
8846 
8847 	for (i = 0; i < tr->nr_topts; i++) {
8848 		/* Make sure there's no duplicate flags. */
8849 		if (WARN_ON_ONCE(tr->topts[i].tracer->flags == tracer->flags))
8850 			return;
8851 	}
8852 
8853 	opts = flags->opts;
8854 
8855 	for (cnt = 0; opts[cnt].name; cnt++)
8856 		;
8857 
8858 	topts = kcalloc(cnt + 1, sizeof(*topts), GFP_KERNEL);
8859 	if (!topts)
8860 		return;
8861 
8862 	tr_topts = krealloc(tr->topts, sizeof(*tr->topts) * (tr->nr_topts + 1),
8863 			    GFP_KERNEL);
8864 	if (!tr_topts) {
8865 		kfree(topts);
8866 		return;
8867 	}
8868 
8869 	tr->topts = tr_topts;
8870 	tr->topts[tr->nr_topts].tracer = tracer;
8871 	tr->topts[tr->nr_topts].topts = topts;
8872 	tr->nr_topts++;
8873 
8874 	for (cnt = 0; opts[cnt].name; cnt++) {
8875 		create_trace_option_file(tr, &topts[cnt], flags,
8876 					 &opts[cnt]);
8877 		MEM_FAIL(topts[cnt].entry == NULL,
8878 			  "Failed to create trace option: %s",
8879 			  opts[cnt].name);
8880 	}
8881 }
8882 
8883 static struct dentry *
8884 create_trace_option_core_file(struct trace_array *tr,
8885 			      const char *option, long index)
8886 {
8887 	struct dentry *t_options;
8888 
8889 	t_options = trace_options_init_dentry(tr);
8890 	if (!t_options)
8891 		return NULL;
8892 
8893 	return trace_create_file(option, TRACE_MODE_WRITE, t_options,
8894 				 (void *)&tr->trace_flags_index[index],
8895 				 &trace_options_core_fops);
8896 }
8897 
8898 static void create_trace_options_dir(struct trace_array *tr)
8899 {
8900 	struct dentry *t_options;
8901 	bool top_level = tr == &global_trace;
8902 	int i;
8903 
8904 	t_options = trace_options_init_dentry(tr);
8905 	if (!t_options)
8906 		return;
8907 
8908 	for (i = 0; trace_options[i]; i++) {
8909 		if (top_level ||
8910 		    !((1 << i) & TOP_LEVEL_TRACE_FLAGS))
8911 			create_trace_option_core_file(tr, trace_options[i], i);
8912 	}
8913 }
8914 
8915 static ssize_t
8916 rb_simple_read(struct file *filp, char __user *ubuf,
8917 	       size_t cnt, loff_t *ppos)
8918 {
8919 	struct trace_array *tr = filp->private_data;
8920 	char buf[64];
8921 	int r;
8922 
8923 	r = tracer_tracing_is_on(tr);
8924 	r = sprintf(buf, "%d\n", r);
8925 
8926 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
8927 }
8928 
8929 static ssize_t
8930 rb_simple_write(struct file *filp, const char __user *ubuf,
8931 		size_t cnt, loff_t *ppos)
8932 {
8933 	struct trace_array *tr = filp->private_data;
8934 	struct trace_buffer *buffer = tr->array_buffer.buffer;
8935 	unsigned long val;
8936 	int ret;
8937 
8938 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
8939 	if (ret)
8940 		return ret;
8941 
8942 	if (buffer) {
8943 		mutex_lock(&trace_types_lock);
8944 		if (!!val == tracer_tracing_is_on(tr)) {
8945 			val = 0; /* do nothing */
8946 		} else if (val) {
8947 			tracer_tracing_on(tr);
8948 			if (tr->current_trace->start)
8949 				tr->current_trace->start(tr);
8950 		} else {
8951 			tracer_tracing_off(tr);
8952 			if (tr->current_trace->stop)
8953 				tr->current_trace->stop(tr);
8954 		}
8955 		mutex_unlock(&trace_types_lock);
8956 	}
8957 
8958 	(*ppos)++;
8959 
8960 	return cnt;
8961 }
8962 
8963 static const struct file_operations rb_simple_fops = {
8964 	.open		= tracing_open_generic_tr,
8965 	.read		= rb_simple_read,
8966 	.write		= rb_simple_write,
8967 	.release	= tracing_release_generic_tr,
8968 	.llseek		= default_llseek,
8969 };
8970 
8971 static ssize_t
8972 buffer_percent_read(struct file *filp, char __user *ubuf,
8973 		    size_t cnt, loff_t *ppos)
8974 {
8975 	struct trace_array *tr = filp->private_data;
8976 	char buf[64];
8977 	int r;
8978 
8979 	r = tr->buffer_percent;
8980 	r = sprintf(buf, "%d\n", r);
8981 
8982 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
8983 }
8984 
8985 static ssize_t
8986 buffer_percent_write(struct file *filp, const char __user *ubuf,
8987 		     size_t cnt, loff_t *ppos)
8988 {
8989 	struct trace_array *tr = filp->private_data;
8990 	unsigned long val;
8991 	int ret;
8992 
8993 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
8994 	if (ret)
8995 		return ret;
8996 
8997 	if (val > 100)
8998 		return -EINVAL;
8999 
9000 	if (!val)
9001 		val = 1;
9002 
9003 	tr->buffer_percent = val;
9004 
9005 	(*ppos)++;
9006 
9007 	return cnt;
9008 }
9009 
9010 static const struct file_operations buffer_percent_fops = {
9011 	.open		= tracing_open_generic_tr,
9012 	.read		= buffer_percent_read,
9013 	.write		= buffer_percent_write,
9014 	.release	= tracing_release_generic_tr,
9015 	.llseek		= default_llseek,
9016 };
9017 
9018 static struct dentry *trace_instance_dir;
9019 
9020 static void
9021 init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer);
9022 
9023 static int
9024 allocate_trace_buffer(struct trace_array *tr, struct array_buffer *buf, int size)
9025 {
9026 	enum ring_buffer_flags rb_flags;
9027 
9028 	rb_flags = tr->trace_flags & TRACE_ITER_OVERWRITE ? RB_FL_OVERWRITE : 0;
9029 
9030 	buf->tr = tr;
9031 
9032 	buf->buffer = ring_buffer_alloc(size, rb_flags);
9033 	if (!buf->buffer)
9034 		return -ENOMEM;
9035 
9036 	buf->data = alloc_percpu(struct trace_array_cpu);
9037 	if (!buf->data) {
9038 		ring_buffer_free(buf->buffer);
9039 		buf->buffer = NULL;
9040 		return -ENOMEM;
9041 	}
9042 
9043 	/* Allocate the first page for all buffers */
9044 	set_buffer_entries(&tr->array_buffer,
9045 			   ring_buffer_size(tr->array_buffer.buffer, 0));
9046 
9047 	return 0;
9048 }
9049 
9050 static int allocate_trace_buffers(struct trace_array *tr, int size)
9051 {
9052 	int ret;
9053 
9054 	ret = allocate_trace_buffer(tr, &tr->array_buffer, size);
9055 	if (ret)
9056 		return ret;
9057 
9058 #ifdef CONFIG_TRACER_MAX_TRACE
9059 	ret = allocate_trace_buffer(tr, &tr->max_buffer,
9060 				    allocate_snapshot ? size : 1);
9061 	if (MEM_FAIL(ret, "Failed to allocate trace buffer\n")) {
9062 		ring_buffer_free(tr->array_buffer.buffer);
9063 		tr->array_buffer.buffer = NULL;
9064 		free_percpu(tr->array_buffer.data);
9065 		tr->array_buffer.data = NULL;
9066 		return -ENOMEM;
9067 	}
9068 	tr->allocated_snapshot = allocate_snapshot;
9069 
9070 	/*
9071 	 * Only the top level trace array gets its snapshot allocated
9072 	 * from the kernel command line.
9073 	 */
9074 	allocate_snapshot = false;
9075 #endif
9076 
9077 	return 0;
9078 }
9079 
9080 static void free_trace_buffer(struct array_buffer *buf)
9081 {
9082 	if (buf->buffer) {
9083 		ring_buffer_free(buf->buffer);
9084 		buf->buffer = NULL;
9085 		free_percpu(buf->data);
9086 		buf->data = NULL;
9087 	}
9088 }
9089 
9090 static void free_trace_buffers(struct trace_array *tr)
9091 {
9092 	if (!tr)
9093 		return;
9094 
9095 	free_trace_buffer(&tr->array_buffer);
9096 
9097 #ifdef CONFIG_TRACER_MAX_TRACE
9098 	free_trace_buffer(&tr->max_buffer);
9099 #endif
9100 }
9101 
9102 static void init_trace_flags_index(struct trace_array *tr)
9103 {
9104 	int i;
9105 
9106 	/* Used by the trace options files */
9107 	for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++)
9108 		tr->trace_flags_index[i] = i;
9109 }
9110 
9111 static void __update_tracer_options(struct trace_array *tr)
9112 {
9113 	struct tracer *t;
9114 
9115 	for (t = trace_types; t; t = t->next)
9116 		add_tracer_options(tr, t);
9117 }
9118 
9119 static void update_tracer_options(struct trace_array *tr)
9120 {
9121 	mutex_lock(&trace_types_lock);
9122 	__update_tracer_options(tr);
9123 	mutex_unlock(&trace_types_lock);
9124 }
9125 
9126 /* Must have trace_types_lock held */
9127 struct trace_array *trace_array_find(const char *instance)
9128 {
9129 	struct trace_array *tr, *found = NULL;
9130 
9131 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
9132 		if (tr->name && strcmp(tr->name, instance) == 0) {
9133 			found = tr;
9134 			break;
9135 		}
9136 	}
9137 
9138 	return found;
9139 }
9140 
9141 struct trace_array *trace_array_find_get(const char *instance)
9142 {
9143 	struct trace_array *tr;
9144 
9145 	mutex_lock(&trace_types_lock);
9146 	tr = trace_array_find(instance);
9147 	if (tr)
9148 		tr->ref++;
9149 	mutex_unlock(&trace_types_lock);
9150 
9151 	return tr;
9152 }
9153 
9154 static int trace_array_create_dir(struct trace_array *tr)
9155 {
9156 	int ret;
9157 
9158 	tr->dir = tracefs_create_dir(tr->name, trace_instance_dir);
9159 	if (!tr->dir)
9160 		return -EINVAL;
9161 
9162 	ret = event_trace_add_tracer(tr->dir, tr);
9163 	if (ret) {
9164 		tracefs_remove(tr->dir);
9165 		return ret;
9166 	}
9167 
9168 	init_tracer_tracefs(tr, tr->dir);
9169 	__update_tracer_options(tr);
9170 
9171 	return ret;
9172 }
9173 
9174 static struct trace_array *trace_array_create(const char *name)
9175 {
9176 	struct trace_array *tr;
9177 	int ret;
9178 
9179 	ret = -ENOMEM;
9180 	tr = kzalloc(sizeof(*tr), GFP_KERNEL);
9181 	if (!tr)
9182 		return ERR_PTR(ret);
9183 
9184 	tr->name = kstrdup(name, GFP_KERNEL);
9185 	if (!tr->name)
9186 		goto out_free_tr;
9187 
9188 	if (!alloc_cpumask_var(&tr->tracing_cpumask, GFP_KERNEL))
9189 		goto out_free_tr;
9190 
9191 	tr->trace_flags = global_trace.trace_flags & ~ZEROED_TRACE_FLAGS;
9192 
9193 	cpumask_copy(tr->tracing_cpumask, cpu_all_mask);
9194 
9195 	raw_spin_lock_init(&tr->start_lock);
9196 
9197 	tr->max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
9198 
9199 	tr->current_trace = &nop_trace;
9200 
9201 	INIT_LIST_HEAD(&tr->systems);
9202 	INIT_LIST_HEAD(&tr->events);
9203 	INIT_LIST_HEAD(&tr->hist_vars);
9204 	INIT_LIST_HEAD(&tr->err_log);
9205 
9206 	if (allocate_trace_buffers(tr, trace_buf_size) < 0)
9207 		goto out_free_tr;
9208 
9209 	if (ftrace_allocate_ftrace_ops(tr) < 0)
9210 		goto out_free_tr;
9211 
9212 	ftrace_init_trace_array(tr);
9213 
9214 	init_trace_flags_index(tr);
9215 
9216 	if (trace_instance_dir) {
9217 		ret = trace_array_create_dir(tr);
9218 		if (ret)
9219 			goto out_free_tr;
9220 	} else
9221 		__trace_early_add_events(tr);
9222 
9223 	list_add(&tr->list, &ftrace_trace_arrays);
9224 
9225 	tr->ref++;
9226 
9227 	return tr;
9228 
9229  out_free_tr:
9230 	ftrace_free_ftrace_ops(tr);
9231 	free_trace_buffers(tr);
9232 	free_cpumask_var(tr->tracing_cpumask);
9233 	kfree(tr->name);
9234 	kfree(tr);
9235 
9236 	return ERR_PTR(ret);
9237 }
9238 
9239 static int instance_mkdir(const char *name)
9240 {
9241 	struct trace_array *tr;
9242 	int ret;
9243 
9244 	mutex_lock(&event_mutex);
9245 	mutex_lock(&trace_types_lock);
9246 
9247 	ret = -EEXIST;
9248 	if (trace_array_find(name))
9249 		goto out_unlock;
9250 
9251 	tr = trace_array_create(name);
9252 
9253 	ret = PTR_ERR_OR_ZERO(tr);
9254 
9255 out_unlock:
9256 	mutex_unlock(&trace_types_lock);
9257 	mutex_unlock(&event_mutex);
9258 	return ret;
9259 }
9260 
9261 /**
9262  * trace_array_get_by_name - Create/Lookup a trace array, given its name.
9263  * @name: The name of the trace array to be looked up/created.
9264  *
9265  * Returns pointer to trace array with given name.
9266  * NULL, if it cannot be created.
9267  *
9268  * NOTE: This function increments the reference counter associated with the
9269  * trace array returned. This makes sure it cannot be freed while in use.
9270  * Use trace_array_put() once the trace array is no longer needed.
9271  * If the trace_array is to be freed, trace_array_destroy() needs to
9272  * be called after the trace_array_put(), or simply let user space delete
9273  * it from the tracefs instances directory. But until the
9274  * trace_array_put() is called, user space can not delete it.
9275  *
9276  */
9277 struct trace_array *trace_array_get_by_name(const char *name)
9278 {
9279 	struct trace_array *tr;
9280 
9281 	mutex_lock(&event_mutex);
9282 	mutex_lock(&trace_types_lock);
9283 
9284 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
9285 		if (tr->name && strcmp(tr->name, name) == 0)
9286 			goto out_unlock;
9287 	}
9288 
9289 	tr = trace_array_create(name);
9290 
9291 	if (IS_ERR(tr))
9292 		tr = NULL;
9293 out_unlock:
9294 	if (tr)
9295 		tr->ref++;
9296 
9297 	mutex_unlock(&trace_types_lock);
9298 	mutex_unlock(&event_mutex);
9299 	return tr;
9300 }
9301 EXPORT_SYMBOL_GPL(trace_array_get_by_name);
9302 
9303 static int __remove_instance(struct trace_array *tr)
9304 {
9305 	int i;
9306 
9307 	/* Reference counter for a newly created trace array = 1. */
9308 	if (tr->ref > 1 || (tr->current_trace && tr->trace_ref))
9309 		return -EBUSY;
9310 
9311 	list_del(&tr->list);
9312 
9313 	/* Disable all the flags that were enabled coming in */
9314 	for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++) {
9315 		if ((1 << i) & ZEROED_TRACE_FLAGS)
9316 			set_tracer_flag(tr, 1 << i, 0);
9317 	}
9318 
9319 	tracing_set_nop(tr);
9320 	clear_ftrace_function_probes(tr);
9321 	event_trace_del_tracer(tr);
9322 	ftrace_clear_pids(tr);
9323 	ftrace_destroy_function_files(tr);
9324 	tracefs_remove(tr->dir);
9325 	free_percpu(tr->last_func_repeats);
9326 	free_trace_buffers(tr);
9327 
9328 	for (i = 0; i < tr->nr_topts; i++) {
9329 		kfree(tr->topts[i].topts);
9330 	}
9331 	kfree(tr->topts);
9332 
9333 	free_cpumask_var(tr->tracing_cpumask);
9334 	kfree(tr->name);
9335 	kfree(tr);
9336 
9337 	return 0;
9338 }
9339 
9340 int trace_array_destroy(struct trace_array *this_tr)
9341 {
9342 	struct trace_array *tr;
9343 	int ret;
9344 
9345 	if (!this_tr)
9346 		return -EINVAL;
9347 
9348 	mutex_lock(&event_mutex);
9349 	mutex_lock(&trace_types_lock);
9350 
9351 	ret = -ENODEV;
9352 
9353 	/* Making sure trace array exists before destroying it. */
9354 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
9355 		if (tr == this_tr) {
9356 			ret = __remove_instance(tr);
9357 			break;
9358 		}
9359 	}
9360 
9361 	mutex_unlock(&trace_types_lock);
9362 	mutex_unlock(&event_mutex);
9363 
9364 	return ret;
9365 }
9366 EXPORT_SYMBOL_GPL(trace_array_destroy);
9367 
9368 static int instance_rmdir(const char *name)
9369 {
9370 	struct trace_array *tr;
9371 	int ret;
9372 
9373 	mutex_lock(&event_mutex);
9374 	mutex_lock(&trace_types_lock);
9375 
9376 	ret = -ENODEV;
9377 	tr = trace_array_find(name);
9378 	if (tr)
9379 		ret = __remove_instance(tr);
9380 
9381 	mutex_unlock(&trace_types_lock);
9382 	mutex_unlock(&event_mutex);
9383 
9384 	return ret;
9385 }
9386 
9387 static __init void create_trace_instances(struct dentry *d_tracer)
9388 {
9389 	struct trace_array *tr;
9390 
9391 	trace_instance_dir = tracefs_create_instance_dir("instances", d_tracer,
9392 							 instance_mkdir,
9393 							 instance_rmdir);
9394 	if (MEM_FAIL(!trace_instance_dir, "Failed to create instances directory\n"))
9395 		return;
9396 
9397 	mutex_lock(&event_mutex);
9398 	mutex_lock(&trace_types_lock);
9399 
9400 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
9401 		if (!tr->name)
9402 			continue;
9403 		if (MEM_FAIL(trace_array_create_dir(tr) < 0,
9404 			     "Failed to create instance directory\n"))
9405 			break;
9406 	}
9407 
9408 	mutex_unlock(&trace_types_lock);
9409 	mutex_unlock(&event_mutex);
9410 }
9411 
9412 static void
9413 init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer)
9414 {
9415 	struct trace_event_file *file;
9416 	int cpu;
9417 
9418 	trace_create_file("available_tracers", TRACE_MODE_READ, d_tracer,
9419 			tr, &show_traces_fops);
9420 
9421 	trace_create_file("current_tracer", TRACE_MODE_WRITE, d_tracer,
9422 			tr, &set_tracer_fops);
9423 
9424 	trace_create_file("tracing_cpumask", TRACE_MODE_WRITE, d_tracer,
9425 			  tr, &tracing_cpumask_fops);
9426 
9427 	trace_create_file("trace_options", TRACE_MODE_WRITE, d_tracer,
9428 			  tr, &tracing_iter_fops);
9429 
9430 	trace_create_file("trace", TRACE_MODE_WRITE, d_tracer,
9431 			  tr, &tracing_fops);
9432 
9433 	trace_create_file("trace_pipe", TRACE_MODE_READ, d_tracer,
9434 			  tr, &tracing_pipe_fops);
9435 
9436 	trace_create_file("buffer_size_kb", TRACE_MODE_WRITE, d_tracer,
9437 			  tr, &tracing_entries_fops);
9438 
9439 	trace_create_file("buffer_total_size_kb", TRACE_MODE_READ, d_tracer,
9440 			  tr, &tracing_total_entries_fops);
9441 
9442 	trace_create_file("free_buffer", 0200, d_tracer,
9443 			  tr, &tracing_free_buffer_fops);
9444 
9445 	trace_create_file("trace_marker", 0220, d_tracer,
9446 			  tr, &tracing_mark_fops);
9447 
9448 	file = __find_event_file(tr, "ftrace", "print");
9449 	if (file && file->dir)
9450 		trace_create_file("trigger", TRACE_MODE_WRITE, file->dir,
9451 				  file, &event_trigger_fops);
9452 	tr->trace_marker_file = file;
9453 
9454 	trace_create_file("trace_marker_raw", 0220, d_tracer,
9455 			  tr, &tracing_mark_raw_fops);
9456 
9457 	trace_create_file("trace_clock", TRACE_MODE_WRITE, d_tracer, tr,
9458 			  &trace_clock_fops);
9459 
9460 	trace_create_file("tracing_on", TRACE_MODE_WRITE, d_tracer,
9461 			  tr, &rb_simple_fops);
9462 
9463 	trace_create_file("timestamp_mode", TRACE_MODE_READ, d_tracer, tr,
9464 			  &trace_time_stamp_mode_fops);
9465 
9466 	tr->buffer_percent = 50;
9467 
9468 	trace_create_file("buffer_percent", TRACE_MODE_READ, d_tracer,
9469 			tr, &buffer_percent_fops);
9470 
9471 	create_trace_options_dir(tr);
9472 
9473 	trace_create_maxlat_file(tr, d_tracer);
9474 
9475 	if (ftrace_create_function_files(tr, d_tracer))
9476 		MEM_FAIL(1, "Could not allocate function filter files");
9477 
9478 #ifdef CONFIG_TRACER_SNAPSHOT
9479 	trace_create_file("snapshot", TRACE_MODE_WRITE, d_tracer,
9480 			  tr, &snapshot_fops);
9481 #endif
9482 
9483 	trace_create_file("error_log", TRACE_MODE_WRITE, d_tracer,
9484 			  tr, &tracing_err_log_fops);
9485 
9486 	for_each_tracing_cpu(cpu)
9487 		tracing_init_tracefs_percpu(tr, cpu);
9488 
9489 	ftrace_init_tracefs(tr, d_tracer);
9490 }
9491 
9492 static struct vfsmount *trace_automount(struct dentry *mntpt, void *ingore)
9493 {
9494 	struct vfsmount *mnt;
9495 	struct file_system_type *type;
9496 
9497 	/*
9498 	 * To maintain backward compatibility for tools that mount
9499 	 * debugfs to get to the tracing facility, tracefs is automatically
9500 	 * mounted to the debugfs/tracing directory.
9501 	 */
9502 	type = get_fs_type("tracefs");
9503 	if (!type)
9504 		return NULL;
9505 	mnt = vfs_submount(mntpt, type, "tracefs", NULL);
9506 	put_filesystem(type);
9507 	if (IS_ERR(mnt))
9508 		return NULL;
9509 	mntget(mnt);
9510 
9511 	return mnt;
9512 }
9513 
9514 /**
9515  * tracing_init_dentry - initialize top level trace array
9516  *
9517  * This is called when creating files or directories in the tracing
9518  * directory. It is called via fs_initcall() by any of the boot up code
9519  * and expects to return the dentry of the top level tracing directory.
9520  */
9521 int tracing_init_dentry(void)
9522 {
9523 	struct trace_array *tr = &global_trace;
9524 
9525 	if (security_locked_down(LOCKDOWN_TRACEFS)) {
9526 		pr_warn("Tracing disabled due to lockdown\n");
9527 		return -EPERM;
9528 	}
9529 
9530 	/* The top level trace array uses  NULL as parent */
9531 	if (tr->dir)
9532 		return 0;
9533 
9534 	if (WARN_ON(!tracefs_initialized()))
9535 		return -ENODEV;
9536 
9537 	/*
9538 	 * As there may still be users that expect the tracing
9539 	 * files to exist in debugfs/tracing, we must automount
9540 	 * the tracefs file system there, so older tools still
9541 	 * work with the newer kernel.
9542 	 */
9543 	tr->dir = debugfs_create_automount("tracing", NULL,
9544 					   trace_automount, NULL);
9545 
9546 	return 0;
9547 }
9548 
9549 extern struct trace_eval_map *__start_ftrace_eval_maps[];
9550 extern struct trace_eval_map *__stop_ftrace_eval_maps[];
9551 
9552 static struct workqueue_struct *eval_map_wq __initdata;
9553 static struct work_struct eval_map_work __initdata;
9554 
9555 static void __init eval_map_work_func(struct work_struct *work)
9556 {
9557 	int len;
9558 
9559 	len = __stop_ftrace_eval_maps - __start_ftrace_eval_maps;
9560 	trace_insert_eval_map(NULL, __start_ftrace_eval_maps, len);
9561 }
9562 
9563 static int __init trace_eval_init(void)
9564 {
9565 	INIT_WORK(&eval_map_work, eval_map_work_func);
9566 
9567 	eval_map_wq = alloc_workqueue("eval_map_wq", WQ_UNBOUND, 0);
9568 	if (!eval_map_wq) {
9569 		pr_err("Unable to allocate eval_map_wq\n");
9570 		/* Do work here */
9571 		eval_map_work_func(&eval_map_work);
9572 		return -ENOMEM;
9573 	}
9574 
9575 	queue_work(eval_map_wq, &eval_map_work);
9576 	return 0;
9577 }
9578 
9579 static int __init trace_eval_sync(void)
9580 {
9581 	/* Make sure the eval map updates are finished */
9582 	if (eval_map_wq)
9583 		destroy_workqueue(eval_map_wq);
9584 	return 0;
9585 }
9586 
9587 late_initcall_sync(trace_eval_sync);
9588 
9589 
9590 #ifdef CONFIG_MODULES
9591 static void trace_module_add_evals(struct module *mod)
9592 {
9593 	if (!mod->num_trace_evals)
9594 		return;
9595 
9596 	/*
9597 	 * Modules with bad taint do not have events created, do
9598 	 * not bother with enums either.
9599 	 */
9600 	if (trace_module_has_bad_taint(mod))
9601 		return;
9602 
9603 	trace_insert_eval_map(mod, mod->trace_evals, mod->num_trace_evals);
9604 }
9605 
9606 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
9607 static void trace_module_remove_evals(struct module *mod)
9608 {
9609 	union trace_eval_map_item *map;
9610 	union trace_eval_map_item **last = &trace_eval_maps;
9611 
9612 	if (!mod->num_trace_evals)
9613 		return;
9614 
9615 	mutex_lock(&trace_eval_mutex);
9616 
9617 	map = trace_eval_maps;
9618 
9619 	while (map) {
9620 		if (map->head.mod == mod)
9621 			break;
9622 		map = trace_eval_jmp_to_tail(map);
9623 		last = &map->tail.next;
9624 		map = map->tail.next;
9625 	}
9626 	if (!map)
9627 		goto out;
9628 
9629 	*last = trace_eval_jmp_to_tail(map)->tail.next;
9630 	kfree(map);
9631  out:
9632 	mutex_unlock(&trace_eval_mutex);
9633 }
9634 #else
9635 static inline void trace_module_remove_evals(struct module *mod) { }
9636 #endif /* CONFIG_TRACE_EVAL_MAP_FILE */
9637 
9638 static int trace_module_notify(struct notifier_block *self,
9639 			       unsigned long val, void *data)
9640 {
9641 	struct module *mod = data;
9642 
9643 	switch (val) {
9644 	case MODULE_STATE_COMING:
9645 		trace_module_add_evals(mod);
9646 		break;
9647 	case MODULE_STATE_GOING:
9648 		trace_module_remove_evals(mod);
9649 		break;
9650 	}
9651 
9652 	return NOTIFY_OK;
9653 }
9654 
9655 static struct notifier_block trace_module_nb = {
9656 	.notifier_call = trace_module_notify,
9657 	.priority = 0,
9658 };
9659 #endif /* CONFIG_MODULES */
9660 
9661 static __init int tracer_init_tracefs(void)
9662 {
9663 	int ret;
9664 
9665 	trace_access_lock_init();
9666 
9667 	ret = tracing_init_dentry();
9668 	if (ret)
9669 		return 0;
9670 
9671 	event_trace_init();
9672 
9673 	init_tracer_tracefs(&global_trace, NULL);
9674 	ftrace_init_tracefs_toplevel(&global_trace, NULL);
9675 
9676 	trace_create_file("tracing_thresh", TRACE_MODE_WRITE, NULL,
9677 			&global_trace, &tracing_thresh_fops);
9678 
9679 	trace_create_file("README", TRACE_MODE_READ, NULL,
9680 			NULL, &tracing_readme_fops);
9681 
9682 	trace_create_file("saved_cmdlines", TRACE_MODE_READ, NULL,
9683 			NULL, &tracing_saved_cmdlines_fops);
9684 
9685 	trace_create_file("saved_cmdlines_size", TRACE_MODE_WRITE, NULL,
9686 			  NULL, &tracing_saved_cmdlines_size_fops);
9687 
9688 	trace_create_file("saved_tgids", TRACE_MODE_READ, NULL,
9689 			NULL, &tracing_saved_tgids_fops);
9690 
9691 	trace_eval_init();
9692 
9693 	trace_create_eval_file(NULL);
9694 
9695 #ifdef CONFIG_MODULES
9696 	register_module_notifier(&trace_module_nb);
9697 #endif
9698 
9699 #ifdef CONFIG_DYNAMIC_FTRACE
9700 	trace_create_file("dyn_ftrace_total_info", TRACE_MODE_READ, NULL,
9701 			NULL, &tracing_dyn_info_fops);
9702 #endif
9703 
9704 	create_trace_instances(NULL);
9705 
9706 	update_tracer_options(&global_trace);
9707 
9708 	return 0;
9709 }
9710 
9711 fs_initcall(tracer_init_tracefs);
9712 
9713 static int trace_panic_handler(struct notifier_block *this,
9714 			       unsigned long event, void *unused)
9715 {
9716 	if (ftrace_dump_on_oops)
9717 		ftrace_dump(ftrace_dump_on_oops);
9718 	return NOTIFY_OK;
9719 }
9720 
9721 static struct notifier_block trace_panic_notifier = {
9722 	.notifier_call  = trace_panic_handler,
9723 	.next           = NULL,
9724 	.priority       = 150   /* priority: INT_MAX >= x >= 0 */
9725 };
9726 
9727 static int trace_die_handler(struct notifier_block *self,
9728 			     unsigned long val,
9729 			     void *data)
9730 {
9731 	switch (val) {
9732 	case DIE_OOPS:
9733 		if (ftrace_dump_on_oops)
9734 			ftrace_dump(ftrace_dump_on_oops);
9735 		break;
9736 	default:
9737 		break;
9738 	}
9739 	return NOTIFY_OK;
9740 }
9741 
9742 static struct notifier_block trace_die_notifier = {
9743 	.notifier_call = trace_die_handler,
9744 	.priority = 200
9745 };
9746 
9747 /*
9748  * printk is set to max of 1024, we really don't need it that big.
9749  * Nothing should be printing 1000 characters anyway.
9750  */
9751 #define TRACE_MAX_PRINT		1000
9752 
9753 /*
9754  * Define here KERN_TRACE so that we have one place to modify
9755  * it if we decide to change what log level the ftrace dump
9756  * should be at.
9757  */
9758 #define KERN_TRACE		KERN_EMERG
9759 
9760 void
9761 trace_printk_seq(struct trace_seq *s)
9762 {
9763 	/* Probably should print a warning here. */
9764 	if (s->seq.len >= TRACE_MAX_PRINT)
9765 		s->seq.len = TRACE_MAX_PRINT;
9766 
9767 	/*
9768 	 * More paranoid code. Although the buffer size is set to
9769 	 * PAGE_SIZE, and TRACE_MAX_PRINT is 1000, this is just
9770 	 * an extra layer of protection.
9771 	 */
9772 	if (WARN_ON_ONCE(s->seq.len >= s->seq.size))
9773 		s->seq.len = s->seq.size - 1;
9774 
9775 	/* should be zero ended, but we are paranoid. */
9776 	s->buffer[s->seq.len] = 0;
9777 
9778 	printk(KERN_TRACE "%s", s->buffer);
9779 
9780 	trace_seq_init(s);
9781 }
9782 
9783 void trace_init_global_iter(struct trace_iterator *iter)
9784 {
9785 	iter->tr = &global_trace;
9786 	iter->trace = iter->tr->current_trace;
9787 	iter->cpu_file = RING_BUFFER_ALL_CPUS;
9788 	iter->array_buffer = &global_trace.array_buffer;
9789 
9790 	if (iter->trace && iter->trace->open)
9791 		iter->trace->open(iter);
9792 
9793 	/* Annotate start of buffers if we had overruns */
9794 	if (ring_buffer_overruns(iter->array_buffer->buffer))
9795 		iter->iter_flags |= TRACE_FILE_ANNOTATE;
9796 
9797 	/* Output in nanoseconds only if we are using a clock in nanoseconds. */
9798 	if (trace_clocks[iter->tr->clock_id].in_ns)
9799 		iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
9800 }
9801 
9802 void ftrace_dump(enum ftrace_dump_mode oops_dump_mode)
9803 {
9804 	/* use static because iter can be a bit big for the stack */
9805 	static struct trace_iterator iter;
9806 	static atomic_t dump_running;
9807 	struct trace_array *tr = &global_trace;
9808 	unsigned int old_userobj;
9809 	unsigned long flags;
9810 	int cnt = 0, cpu;
9811 
9812 	/* Only allow one dump user at a time. */
9813 	if (atomic_inc_return(&dump_running) != 1) {
9814 		atomic_dec(&dump_running);
9815 		return;
9816 	}
9817 
9818 	/*
9819 	 * Always turn off tracing when we dump.
9820 	 * We don't need to show trace output of what happens
9821 	 * between multiple crashes.
9822 	 *
9823 	 * If the user does a sysrq-z, then they can re-enable
9824 	 * tracing with echo 1 > tracing_on.
9825 	 */
9826 	tracing_off();
9827 
9828 	local_irq_save(flags);
9829 
9830 	/* Simulate the iterator */
9831 	trace_init_global_iter(&iter);
9832 	/* Can not use kmalloc for iter.temp and iter.fmt */
9833 	iter.temp = static_temp_buf;
9834 	iter.temp_size = STATIC_TEMP_BUF_SIZE;
9835 	iter.fmt = static_fmt_buf;
9836 	iter.fmt_size = STATIC_FMT_BUF_SIZE;
9837 
9838 	for_each_tracing_cpu(cpu) {
9839 		atomic_inc(&per_cpu_ptr(iter.array_buffer->data, cpu)->disabled);
9840 	}
9841 
9842 	old_userobj = tr->trace_flags & TRACE_ITER_SYM_USEROBJ;
9843 
9844 	/* don't look at user memory in panic mode */
9845 	tr->trace_flags &= ~TRACE_ITER_SYM_USEROBJ;
9846 
9847 	switch (oops_dump_mode) {
9848 	case DUMP_ALL:
9849 		iter.cpu_file = RING_BUFFER_ALL_CPUS;
9850 		break;
9851 	case DUMP_ORIG:
9852 		iter.cpu_file = raw_smp_processor_id();
9853 		break;
9854 	case DUMP_NONE:
9855 		goto out_enable;
9856 	default:
9857 		printk(KERN_TRACE "Bad dumping mode, switching to all CPUs dump\n");
9858 		iter.cpu_file = RING_BUFFER_ALL_CPUS;
9859 	}
9860 
9861 	printk(KERN_TRACE "Dumping ftrace buffer:\n");
9862 
9863 	/* Did function tracer already get disabled? */
9864 	if (ftrace_is_dead()) {
9865 		printk("# WARNING: FUNCTION TRACING IS CORRUPTED\n");
9866 		printk("#          MAY BE MISSING FUNCTION EVENTS\n");
9867 	}
9868 
9869 	/*
9870 	 * We need to stop all tracing on all CPUS to read
9871 	 * the next buffer. This is a bit expensive, but is
9872 	 * not done often. We fill all what we can read,
9873 	 * and then release the locks again.
9874 	 */
9875 
9876 	while (!trace_empty(&iter)) {
9877 
9878 		if (!cnt)
9879 			printk(KERN_TRACE "---------------------------------\n");
9880 
9881 		cnt++;
9882 
9883 		trace_iterator_reset(&iter);
9884 		iter.iter_flags |= TRACE_FILE_LAT_FMT;
9885 
9886 		if (trace_find_next_entry_inc(&iter) != NULL) {
9887 			int ret;
9888 
9889 			ret = print_trace_line(&iter);
9890 			if (ret != TRACE_TYPE_NO_CONSUME)
9891 				trace_consume(&iter);
9892 		}
9893 		touch_nmi_watchdog();
9894 
9895 		trace_printk_seq(&iter.seq);
9896 	}
9897 
9898 	if (!cnt)
9899 		printk(KERN_TRACE "   (ftrace buffer empty)\n");
9900 	else
9901 		printk(KERN_TRACE "---------------------------------\n");
9902 
9903  out_enable:
9904 	tr->trace_flags |= old_userobj;
9905 
9906 	for_each_tracing_cpu(cpu) {
9907 		atomic_dec(&per_cpu_ptr(iter.array_buffer->data, cpu)->disabled);
9908 	}
9909 	atomic_dec(&dump_running);
9910 	local_irq_restore(flags);
9911 }
9912 EXPORT_SYMBOL_GPL(ftrace_dump);
9913 
9914 #define WRITE_BUFSIZE  4096
9915 
9916 ssize_t trace_parse_run_command(struct file *file, const char __user *buffer,
9917 				size_t count, loff_t *ppos,
9918 				int (*createfn)(const char *))
9919 {
9920 	char *kbuf, *buf, *tmp;
9921 	int ret = 0;
9922 	size_t done = 0;
9923 	size_t size;
9924 
9925 	kbuf = kmalloc(WRITE_BUFSIZE, GFP_KERNEL);
9926 	if (!kbuf)
9927 		return -ENOMEM;
9928 
9929 	while (done < count) {
9930 		size = count - done;
9931 
9932 		if (size >= WRITE_BUFSIZE)
9933 			size = WRITE_BUFSIZE - 1;
9934 
9935 		if (copy_from_user(kbuf, buffer + done, size)) {
9936 			ret = -EFAULT;
9937 			goto out;
9938 		}
9939 		kbuf[size] = '\0';
9940 		buf = kbuf;
9941 		do {
9942 			tmp = strchr(buf, '\n');
9943 			if (tmp) {
9944 				*tmp = '\0';
9945 				size = tmp - buf + 1;
9946 			} else {
9947 				size = strlen(buf);
9948 				if (done + size < count) {
9949 					if (buf != kbuf)
9950 						break;
9951 					/* This can accept WRITE_BUFSIZE - 2 ('\n' + '\0') */
9952 					pr_warn("Line length is too long: Should be less than %d\n",
9953 						WRITE_BUFSIZE - 2);
9954 					ret = -EINVAL;
9955 					goto out;
9956 				}
9957 			}
9958 			done += size;
9959 
9960 			/* Remove comments */
9961 			tmp = strchr(buf, '#');
9962 
9963 			if (tmp)
9964 				*tmp = '\0';
9965 
9966 			ret = createfn(buf);
9967 			if (ret)
9968 				goto out;
9969 			buf += size;
9970 
9971 		} while (done < count);
9972 	}
9973 	ret = done;
9974 
9975 out:
9976 	kfree(kbuf);
9977 
9978 	return ret;
9979 }
9980 
9981 __init static int tracer_alloc_buffers(void)
9982 {
9983 	int ring_buf_size;
9984 	int ret = -ENOMEM;
9985 
9986 
9987 	if (security_locked_down(LOCKDOWN_TRACEFS)) {
9988 		pr_warn("Tracing disabled due to lockdown\n");
9989 		return -EPERM;
9990 	}
9991 
9992 	/*
9993 	 * Make sure we don't accidentally add more trace options
9994 	 * than we have bits for.
9995 	 */
9996 	BUILD_BUG_ON(TRACE_ITER_LAST_BIT > TRACE_FLAGS_MAX_SIZE);
9997 
9998 	if (!alloc_cpumask_var(&tracing_buffer_mask, GFP_KERNEL))
9999 		goto out;
10000 
10001 	if (!alloc_cpumask_var(&global_trace.tracing_cpumask, GFP_KERNEL))
10002 		goto out_free_buffer_mask;
10003 
10004 	/* Only allocate trace_printk buffers if a trace_printk exists */
10005 	if (&__stop___trace_bprintk_fmt != &__start___trace_bprintk_fmt)
10006 		/* Must be called before global_trace.buffer is allocated */
10007 		trace_printk_init_buffers();
10008 
10009 	/* To save memory, keep the ring buffer size to its minimum */
10010 	if (ring_buffer_expanded)
10011 		ring_buf_size = trace_buf_size;
10012 	else
10013 		ring_buf_size = 1;
10014 
10015 	cpumask_copy(tracing_buffer_mask, cpu_possible_mask);
10016 	cpumask_copy(global_trace.tracing_cpumask, cpu_all_mask);
10017 
10018 	raw_spin_lock_init(&global_trace.start_lock);
10019 
10020 	/*
10021 	 * The prepare callbacks allocates some memory for the ring buffer. We
10022 	 * don't free the buffer if the CPU goes down. If we were to free
10023 	 * the buffer, then the user would lose any trace that was in the
10024 	 * buffer. The memory will be removed once the "instance" is removed.
10025 	 */
10026 	ret = cpuhp_setup_state_multi(CPUHP_TRACE_RB_PREPARE,
10027 				      "trace/RB:preapre", trace_rb_cpu_prepare,
10028 				      NULL);
10029 	if (ret < 0)
10030 		goto out_free_cpumask;
10031 	/* Used for event triggers */
10032 	ret = -ENOMEM;
10033 	temp_buffer = ring_buffer_alloc(PAGE_SIZE, RB_FL_OVERWRITE);
10034 	if (!temp_buffer)
10035 		goto out_rm_hp_state;
10036 
10037 	if (trace_create_savedcmd() < 0)
10038 		goto out_free_temp_buffer;
10039 
10040 	/* TODO: make the number of buffers hot pluggable with CPUS */
10041 	if (allocate_trace_buffers(&global_trace, ring_buf_size) < 0) {
10042 		MEM_FAIL(1, "tracer: failed to allocate ring buffer!\n");
10043 		goto out_free_savedcmd;
10044 	}
10045 
10046 	if (global_trace.buffer_disabled)
10047 		tracing_off();
10048 
10049 	if (trace_boot_clock) {
10050 		ret = tracing_set_clock(&global_trace, trace_boot_clock);
10051 		if (ret < 0)
10052 			pr_warn("Trace clock %s not defined, going back to default\n",
10053 				trace_boot_clock);
10054 	}
10055 
10056 	/*
10057 	 * register_tracer() might reference current_trace, so it
10058 	 * needs to be set before we register anything. This is
10059 	 * just a bootstrap of current_trace anyway.
10060 	 */
10061 	global_trace.current_trace = &nop_trace;
10062 
10063 	global_trace.max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
10064 
10065 	ftrace_init_global_array_ops(&global_trace);
10066 
10067 	init_trace_flags_index(&global_trace);
10068 
10069 	register_tracer(&nop_trace);
10070 
10071 	/* Function tracing may start here (via kernel command line) */
10072 	init_function_trace();
10073 
10074 	/* All seems OK, enable tracing */
10075 	tracing_disabled = 0;
10076 
10077 	atomic_notifier_chain_register(&panic_notifier_list,
10078 				       &trace_panic_notifier);
10079 
10080 	register_die_notifier(&trace_die_notifier);
10081 
10082 	global_trace.flags = TRACE_ARRAY_FL_GLOBAL;
10083 
10084 	INIT_LIST_HEAD(&global_trace.systems);
10085 	INIT_LIST_HEAD(&global_trace.events);
10086 	INIT_LIST_HEAD(&global_trace.hist_vars);
10087 	INIT_LIST_HEAD(&global_trace.err_log);
10088 	list_add(&global_trace.list, &ftrace_trace_arrays);
10089 
10090 	apply_trace_boot_options();
10091 
10092 	register_snapshot_cmd();
10093 
10094 	test_can_verify();
10095 
10096 	return 0;
10097 
10098 out_free_savedcmd:
10099 	free_saved_cmdlines_buffer(savedcmd);
10100 out_free_temp_buffer:
10101 	ring_buffer_free(temp_buffer);
10102 out_rm_hp_state:
10103 	cpuhp_remove_multi_state(CPUHP_TRACE_RB_PREPARE);
10104 out_free_cpumask:
10105 	free_cpumask_var(global_trace.tracing_cpumask);
10106 out_free_buffer_mask:
10107 	free_cpumask_var(tracing_buffer_mask);
10108 out:
10109 	return ret;
10110 }
10111 
10112 void __init early_trace_init(void)
10113 {
10114 	if (tracepoint_printk) {
10115 		tracepoint_print_iter =
10116 			kzalloc(sizeof(*tracepoint_print_iter), GFP_KERNEL);
10117 		if (MEM_FAIL(!tracepoint_print_iter,
10118 			     "Failed to allocate trace iterator\n"))
10119 			tracepoint_printk = 0;
10120 		else
10121 			static_key_enable(&tracepoint_printk_key.key);
10122 	}
10123 	tracer_alloc_buffers();
10124 }
10125 
10126 void __init trace_init(void)
10127 {
10128 	trace_event_init();
10129 }
10130 
10131 __init static void clear_boot_tracer(void)
10132 {
10133 	/*
10134 	 * The default tracer at boot buffer is an init section.
10135 	 * This function is called in lateinit. If we did not
10136 	 * find the boot tracer, then clear it out, to prevent
10137 	 * later registration from accessing the buffer that is
10138 	 * about to be freed.
10139 	 */
10140 	if (!default_bootup_tracer)
10141 		return;
10142 
10143 	printk(KERN_INFO "ftrace bootup tracer '%s' not registered.\n",
10144 	       default_bootup_tracer);
10145 	default_bootup_tracer = NULL;
10146 }
10147 
10148 #ifdef CONFIG_HAVE_UNSTABLE_SCHED_CLOCK
10149 __init static void tracing_set_default_clock(void)
10150 {
10151 	/* sched_clock_stable() is determined in late_initcall */
10152 	if (!trace_boot_clock && !sched_clock_stable()) {
10153 		if (security_locked_down(LOCKDOWN_TRACEFS)) {
10154 			pr_warn("Can not set tracing clock due to lockdown\n");
10155 			return;
10156 		}
10157 
10158 		printk(KERN_WARNING
10159 		       "Unstable clock detected, switching default tracing clock to \"global\"\n"
10160 		       "If you want to keep using the local clock, then add:\n"
10161 		       "  \"trace_clock=local\"\n"
10162 		       "on the kernel command line\n");
10163 		tracing_set_clock(&global_trace, "global");
10164 	}
10165 }
10166 #else
10167 static inline void tracing_set_default_clock(void) { }
10168 #endif
10169 
10170 __init static int late_trace_init(void)
10171 {
10172 	if (tracepoint_printk && tracepoint_printk_stop_on_boot) {
10173 		static_key_disable(&tracepoint_printk_key.key);
10174 		tracepoint_printk = 0;
10175 	}
10176 
10177 	tracing_set_default_clock();
10178 	clear_boot_tracer();
10179 	return 0;
10180 }
10181 
10182 late_initcall_sync(late_trace_init);
10183