xref: /linux/kernel/trace/trace.c (revision 4e9f63c9e5c2597692567ee1cb0851a21104a531)
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * ring buffer based function tracer
4  *
5  * Copyright (C) 2007-2012 Steven Rostedt <srostedt@redhat.com>
6  * Copyright (C) 2008 Ingo Molnar <mingo@redhat.com>
7  *
8  * Originally taken from the RT patch by:
9  *    Arnaldo Carvalho de Melo <acme@redhat.com>
10  *
11  * Based on code from the latency_tracer, that is:
12  *  Copyright (C) 2004-2006 Ingo Molnar
13  *  Copyright (C) 2004 Nadia Yvette Chambers
14  */
15 #include <linux/ring_buffer.h>
16 #include <generated/utsrelease.h>
17 #include <linux/stacktrace.h>
18 #include <linux/writeback.h>
19 #include <linux/kallsyms.h>
20 #include <linux/security.h>
21 #include <linux/seq_file.h>
22 #include <linux/notifier.h>
23 #include <linux/irqflags.h>
24 #include <linux/debugfs.h>
25 #include <linux/tracefs.h>
26 #include <linux/pagemap.h>
27 #include <linux/hardirq.h>
28 #include <linux/linkage.h>
29 #include <linux/uaccess.h>
30 #include <linux/vmalloc.h>
31 #include <linux/ftrace.h>
32 #include <linux/module.h>
33 #include <linux/percpu.h>
34 #include <linux/splice.h>
35 #include <linux/kdebug.h>
36 #include <linux/string.h>
37 #include <linux/mount.h>
38 #include <linux/rwsem.h>
39 #include <linux/slab.h>
40 #include <linux/ctype.h>
41 #include <linux/init.h>
42 #include <linux/panic_notifier.h>
43 #include <linux/poll.h>
44 #include <linux/nmi.h>
45 #include <linux/fs.h>
46 #include <linux/trace.h>
47 #include <linux/sched/clock.h>
48 #include <linux/sched/rt.h>
49 #include <linux/fsnotify.h>
50 #include <linux/irq_work.h>
51 #include <linux/workqueue.h>
52 
53 #include "trace.h"
54 #include "trace_output.h"
55 
56 /*
57  * On boot up, the ring buffer is set to the minimum size, so that
58  * we do not waste memory on systems that are not using tracing.
59  */
60 bool ring_buffer_expanded;
61 
62 /*
63  * We need to change this state when a selftest is running.
64  * A selftest will lurk into the ring-buffer to count the
65  * entries inserted during the selftest although some concurrent
66  * insertions into the ring-buffer such as trace_printk could occurred
67  * at the same time, giving false positive or negative results.
68  */
69 static bool __read_mostly tracing_selftest_running;
70 
71 /*
72  * If boot-time tracing including tracers/events via kernel cmdline
73  * is running, we do not want to run SELFTEST.
74  */
75 bool __read_mostly tracing_selftest_disabled;
76 
77 #ifdef CONFIG_FTRACE_STARTUP_TEST
78 void __init disable_tracing_selftest(const char *reason)
79 {
80 	if (!tracing_selftest_disabled) {
81 		tracing_selftest_disabled = true;
82 		pr_info("Ftrace startup test is disabled due to %s\n", reason);
83 	}
84 }
85 #endif
86 
87 /* Pipe tracepoints to printk */
88 struct trace_iterator *tracepoint_print_iter;
89 int tracepoint_printk;
90 static bool tracepoint_printk_stop_on_boot __initdata;
91 static DEFINE_STATIC_KEY_FALSE(tracepoint_printk_key);
92 
93 /* For tracers that don't implement custom flags */
94 static struct tracer_opt dummy_tracer_opt[] = {
95 	{ }
96 };
97 
98 static int
99 dummy_set_flag(struct trace_array *tr, u32 old_flags, u32 bit, int set)
100 {
101 	return 0;
102 }
103 
104 /*
105  * To prevent the comm cache from being overwritten when no
106  * tracing is active, only save the comm when a trace event
107  * occurred.
108  */
109 static DEFINE_PER_CPU(bool, trace_taskinfo_save);
110 
111 /*
112  * Kill all tracing for good (never come back).
113  * It is initialized to 1 but will turn to zero if the initialization
114  * of the tracer is successful. But that is the only place that sets
115  * this back to zero.
116  */
117 static int tracing_disabled = 1;
118 
119 cpumask_var_t __read_mostly	tracing_buffer_mask;
120 
121 /*
122  * ftrace_dump_on_oops - variable to dump ftrace buffer on oops
123  *
124  * If there is an oops (or kernel panic) and the ftrace_dump_on_oops
125  * is set, then ftrace_dump is called. This will output the contents
126  * of the ftrace buffers to the console.  This is very useful for
127  * capturing traces that lead to crashes and outputing it to a
128  * serial console.
129  *
130  * It is default off, but you can enable it with either specifying
131  * "ftrace_dump_on_oops" in the kernel command line, or setting
132  * /proc/sys/kernel/ftrace_dump_on_oops
133  * Set 1 if you want to dump buffers of all CPUs
134  * Set 2 if you want to dump the buffer of the CPU that triggered oops
135  */
136 
137 enum ftrace_dump_mode ftrace_dump_on_oops;
138 
139 /* When set, tracing will stop when a WARN*() is hit */
140 int __disable_trace_on_warning;
141 
142 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
143 /* Map of enums to their values, for "eval_map" file */
144 struct trace_eval_map_head {
145 	struct module			*mod;
146 	unsigned long			length;
147 };
148 
149 union trace_eval_map_item;
150 
151 struct trace_eval_map_tail {
152 	/*
153 	 * "end" is first and points to NULL as it must be different
154 	 * than "mod" or "eval_string"
155 	 */
156 	union trace_eval_map_item	*next;
157 	const char			*end;	/* points to NULL */
158 };
159 
160 static DEFINE_MUTEX(trace_eval_mutex);
161 
162 /*
163  * The trace_eval_maps are saved in an array with two extra elements,
164  * one at the beginning, and one at the end. The beginning item contains
165  * the count of the saved maps (head.length), and the module they
166  * belong to if not built in (head.mod). The ending item contains a
167  * pointer to the next array of saved eval_map items.
168  */
169 union trace_eval_map_item {
170 	struct trace_eval_map		map;
171 	struct trace_eval_map_head	head;
172 	struct trace_eval_map_tail	tail;
173 };
174 
175 static union trace_eval_map_item *trace_eval_maps;
176 #endif /* CONFIG_TRACE_EVAL_MAP_FILE */
177 
178 int tracing_set_tracer(struct trace_array *tr, const char *buf);
179 static void ftrace_trace_userstack(struct trace_array *tr,
180 				   struct trace_buffer *buffer,
181 				   unsigned int trace_ctx);
182 
183 #define MAX_TRACER_SIZE		100
184 static char bootup_tracer_buf[MAX_TRACER_SIZE] __initdata;
185 static char *default_bootup_tracer;
186 
187 static bool allocate_snapshot;
188 
189 static int __init set_cmdline_ftrace(char *str)
190 {
191 	strlcpy(bootup_tracer_buf, str, MAX_TRACER_SIZE);
192 	default_bootup_tracer = bootup_tracer_buf;
193 	/* We are using ftrace early, expand it */
194 	ring_buffer_expanded = true;
195 	return 1;
196 }
197 __setup("ftrace=", set_cmdline_ftrace);
198 
199 static int __init set_ftrace_dump_on_oops(char *str)
200 {
201 	if (*str++ != '=' || !*str || !strcmp("1", str)) {
202 		ftrace_dump_on_oops = DUMP_ALL;
203 		return 1;
204 	}
205 
206 	if (!strcmp("orig_cpu", str) || !strcmp("2", str)) {
207 		ftrace_dump_on_oops = DUMP_ORIG;
208                 return 1;
209         }
210 
211         return 0;
212 }
213 __setup("ftrace_dump_on_oops", set_ftrace_dump_on_oops);
214 
215 static int __init stop_trace_on_warning(char *str)
216 {
217 	if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
218 		__disable_trace_on_warning = 1;
219 	return 1;
220 }
221 __setup("traceoff_on_warning", stop_trace_on_warning);
222 
223 static int __init boot_alloc_snapshot(char *str)
224 {
225 	allocate_snapshot = true;
226 	/* We also need the main ring buffer expanded */
227 	ring_buffer_expanded = true;
228 	return 1;
229 }
230 __setup("alloc_snapshot", boot_alloc_snapshot);
231 
232 
233 static char trace_boot_options_buf[MAX_TRACER_SIZE] __initdata;
234 
235 static int __init set_trace_boot_options(char *str)
236 {
237 	strlcpy(trace_boot_options_buf, str, MAX_TRACER_SIZE);
238 	return 0;
239 }
240 __setup("trace_options=", set_trace_boot_options);
241 
242 static char trace_boot_clock_buf[MAX_TRACER_SIZE] __initdata;
243 static char *trace_boot_clock __initdata;
244 
245 static int __init set_trace_boot_clock(char *str)
246 {
247 	strlcpy(trace_boot_clock_buf, str, MAX_TRACER_SIZE);
248 	trace_boot_clock = trace_boot_clock_buf;
249 	return 0;
250 }
251 __setup("trace_clock=", set_trace_boot_clock);
252 
253 static int __init set_tracepoint_printk(char *str)
254 {
255 	if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
256 		tracepoint_printk = 1;
257 	return 1;
258 }
259 __setup("tp_printk", set_tracepoint_printk);
260 
261 static int __init set_tracepoint_printk_stop(char *str)
262 {
263 	tracepoint_printk_stop_on_boot = true;
264 	return 1;
265 }
266 __setup("tp_printk_stop_on_boot", set_tracepoint_printk_stop);
267 
268 unsigned long long ns2usecs(u64 nsec)
269 {
270 	nsec += 500;
271 	do_div(nsec, 1000);
272 	return nsec;
273 }
274 
275 static void
276 trace_process_export(struct trace_export *export,
277 	       struct ring_buffer_event *event, int flag)
278 {
279 	struct trace_entry *entry;
280 	unsigned int size = 0;
281 
282 	if (export->flags & flag) {
283 		entry = ring_buffer_event_data(event);
284 		size = ring_buffer_event_length(event);
285 		export->write(export, entry, size);
286 	}
287 }
288 
289 static DEFINE_MUTEX(ftrace_export_lock);
290 
291 static struct trace_export __rcu *ftrace_exports_list __read_mostly;
292 
293 static DEFINE_STATIC_KEY_FALSE(trace_function_exports_enabled);
294 static DEFINE_STATIC_KEY_FALSE(trace_event_exports_enabled);
295 static DEFINE_STATIC_KEY_FALSE(trace_marker_exports_enabled);
296 
297 static inline void ftrace_exports_enable(struct trace_export *export)
298 {
299 	if (export->flags & TRACE_EXPORT_FUNCTION)
300 		static_branch_inc(&trace_function_exports_enabled);
301 
302 	if (export->flags & TRACE_EXPORT_EVENT)
303 		static_branch_inc(&trace_event_exports_enabled);
304 
305 	if (export->flags & TRACE_EXPORT_MARKER)
306 		static_branch_inc(&trace_marker_exports_enabled);
307 }
308 
309 static inline void ftrace_exports_disable(struct trace_export *export)
310 {
311 	if (export->flags & TRACE_EXPORT_FUNCTION)
312 		static_branch_dec(&trace_function_exports_enabled);
313 
314 	if (export->flags & TRACE_EXPORT_EVENT)
315 		static_branch_dec(&trace_event_exports_enabled);
316 
317 	if (export->flags & TRACE_EXPORT_MARKER)
318 		static_branch_dec(&trace_marker_exports_enabled);
319 }
320 
321 static void ftrace_exports(struct ring_buffer_event *event, int flag)
322 {
323 	struct trace_export *export;
324 
325 	preempt_disable_notrace();
326 
327 	export = rcu_dereference_raw_check(ftrace_exports_list);
328 	while (export) {
329 		trace_process_export(export, event, flag);
330 		export = rcu_dereference_raw_check(export->next);
331 	}
332 
333 	preempt_enable_notrace();
334 }
335 
336 static inline void
337 add_trace_export(struct trace_export **list, struct trace_export *export)
338 {
339 	rcu_assign_pointer(export->next, *list);
340 	/*
341 	 * We are entering export into the list but another
342 	 * CPU might be walking that list. We need to make sure
343 	 * the export->next pointer is valid before another CPU sees
344 	 * the export pointer included into the list.
345 	 */
346 	rcu_assign_pointer(*list, export);
347 }
348 
349 static inline int
350 rm_trace_export(struct trace_export **list, struct trace_export *export)
351 {
352 	struct trace_export **p;
353 
354 	for (p = list; *p != NULL; p = &(*p)->next)
355 		if (*p == export)
356 			break;
357 
358 	if (*p != export)
359 		return -1;
360 
361 	rcu_assign_pointer(*p, (*p)->next);
362 
363 	return 0;
364 }
365 
366 static inline void
367 add_ftrace_export(struct trace_export **list, struct trace_export *export)
368 {
369 	ftrace_exports_enable(export);
370 
371 	add_trace_export(list, export);
372 }
373 
374 static inline int
375 rm_ftrace_export(struct trace_export **list, struct trace_export *export)
376 {
377 	int ret;
378 
379 	ret = rm_trace_export(list, export);
380 	ftrace_exports_disable(export);
381 
382 	return ret;
383 }
384 
385 int register_ftrace_export(struct trace_export *export)
386 {
387 	if (WARN_ON_ONCE(!export->write))
388 		return -1;
389 
390 	mutex_lock(&ftrace_export_lock);
391 
392 	add_ftrace_export(&ftrace_exports_list, export);
393 
394 	mutex_unlock(&ftrace_export_lock);
395 
396 	return 0;
397 }
398 EXPORT_SYMBOL_GPL(register_ftrace_export);
399 
400 int unregister_ftrace_export(struct trace_export *export)
401 {
402 	int ret;
403 
404 	mutex_lock(&ftrace_export_lock);
405 
406 	ret = rm_ftrace_export(&ftrace_exports_list, export);
407 
408 	mutex_unlock(&ftrace_export_lock);
409 
410 	return ret;
411 }
412 EXPORT_SYMBOL_GPL(unregister_ftrace_export);
413 
414 /* trace_flags holds trace_options default values */
415 #define TRACE_DEFAULT_FLAGS						\
416 	(FUNCTION_DEFAULT_FLAGS |					\
417 	 TRACE_ITER_PRINT_PARENT | TRACE_ITER_PRINTK |			\
418 	 TRACE_ITER_ANNOTATE | TRACE_ITER_CONTEXT_INFO |		\
419 	 TRACE_ITER_RECORD_CMD | TRACE_ITER_OVERWRITE |			\
420 	 TRACE_ITER_IRQ_INFO | TRACE_ITER_MARKERS |			\
421 	 TRACE_ITER_HASH_PTR)
422 
423 /* trace_options that are only supported by global_trace */
424 #define TOP_LEVEL_TRACE_FLAGS (TRACE_ITER_PRINTK |			\
425 	       TRACE_ITER_PRINTK_MSGONLY | TRACE_ITER_RECORD_CMD)
426 
427 /* trace_flags that are default zero for instances */
428 #define ZEROED_TRACE_FLAGS \
429 	(TRACE_ITER_EVENT_FORK | TRACE_ITER_FUNC_FORK)
430 
431 /*
432  * The global_trace is the descriptor that holds the top-level tracing
433  * buffers for the live tracing.
434  */
435 static struct trace_array global_trace = {
436 	.trace_flags = TRACE_DEFAULT_FLAGS,
437 };
438 
439 LIST_HEAD(ftrace_trace_arrays);
440 
441 int trace_array_get(struct trace_array *this_tr)
442 {
443 	struct trace_array *tr;
444 	int ret = -ENODEV;
445 
446 	mutex_lock(&trace_types_lock);
447 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
448 		if (tr == this_tr) {
449 			tr->ref++;
450 			ret = 0;
451 			break;
452 		}
453 	}
454 	mutex_unlock(&trace_types_lock);
455 
456 	return ret;
457 }
458 
459 static void __trace_array_put(struct trace_array *this_tr)
460 {
461 	WARN_ON(!this_tr->ref);
462 	this_tr->ref--;
463 }
464 
465 /**
466  * trace_array_put - Decrement the reference counter for this trace array.
467  * @this_tr : pointer to the trace array
468  *
469  * NOTE: Use this when we no longer need the trace array returned by
470  * trace_array_get_by_name(). This ensures the trace array can be later
471  * destroyed.
472  *
473  */
474 void trace_array_put(struct trace_array *this_tr)
475 {
476 	if (!this_tr)
477 		return;
478 
479 	mutex_lock(&trace_types_lock);
480 	__trace_array_put(this_tr);
481 	mutex_unlock(&trace_types_lock);
482 }
483 EXPORT_SYMBOL_GPL(trace_array_put);
484 
485 int tracing_check_open_get_tr(struct trace_array *tr)
486 {
487 	int ret;
488 
489 	ret = security_locked_down(LOCKDOWN_TRACEFS);
490 	if (ret)
491 		return ret;
492 
493 	if (tracing_disabled)
494 		return -ENODEV;
495 
496 	if (tr && trace_array_get(tr) < 0)
497 		return -ENODEV;
498 
499 	return 0;
500 }
501 
502 int call_filter_check_discard(struct trace_event_call *call, void *rec,
503 			      struct trace_buffer *buffer,
504 			      struct ring_buffer_event *event)
505 {
506 	if (unlikely(call->flags & TRACE_EVENT_FL_FILTERED) &&
507 	    !filter_match_preds(call->filter, rec)) {
508 		__trace_event_discard_commit(buffer, event);
509 		return 1;
510 	}
511 
512 	return 0;
513 }
514 
515 /**
516  * trace_find_filtered_pid - check if a pid exists in a filtered_pid list
517  * @filtered_pids: The list of pids to check
518  * @search_pid: The PID to find in @filtered_pids
519  *
520  * Returns true if @search_pid is found in @filtered_pids, and false otherwise.
521  */
522 bool
523 trace_find_filtered_pid(struct trace_pid_list *filtered_pids, pid_t search_pid)
524 {
525 	return trace_pid_list_is_set(filtered_pids, search_pid);
526 }
527 
528 /**
529  * trace_ignore_this_task - should a task be ignored for tracing
530  * @filtered_pids: The list of pids to check
531  * @filtered_no_pids: The list of pids not to be traced
532  * @task: The task that should be ignored if not filtered
533  *
534  * Checks if @task should be traced or not from @filtered_pids.
535  * Returns true if @task should *NOT* be traced.
536  * Returns false if @task should be traced.
537  */
538 bool
539 trace_ignore_this_task(struct trace_pid_list *filtered_pids,
540 		       struct trace_pid_list *filtered_no_pids,
541 		       struct task_struct *task)
542 {
543 	/*
544 	 * If filtered_no_pids is not empty, and the task's pid is listed
545 	 * in filtered_no_pids, then return true.
546 	 * Otherwise, if filtered_pids is empty, that means we can
547 	 * trace all tasks. If it has content, then only trace pids
548 	 * within filtered_pids.
549 	 */
550 
551 	return (filtered_pids &&
552 		!trace_find_filtered_pid(filtered_pids, task->pid)) ||
553 		(filtered_no_pids &&
554 		 trace_find_filtered_pid(filtered_no_pids, task->pid));
555 }
556 
557 /**
558  * trace_filter_add_remove_task - Add or remove a task from a pid_list
559  * @pid_list: The list to modify
560  * @self: The current task for fork or NULL for exit
561  * @task: The task to add or remove
562  *
563  * If adding a task, if @self is defined, the task is only added if @self
564  * is also included in @pid_list. This happens on fork and tasks should
565  * only be added when the parent is listed. If @self is NULL, then the
566  * @task pid will be removed from the list, which would happen on exit
567  * of a task.
568  */
569 void trace_filter_add_remove_task(struct trace_pid_list *pid_list,
570 				  struct task_struct *self,
571 				  struct task_struct *task)
572 {
573 	if (!pid_list)
574 		return;
575 
576 	/* For forks, we only add if the forking task is listed */
577 	if (self) {
578 		if (!trace_find_filtered_pid(pid_list, self->pid))
579 			return;
580 	}
581 
582 	/* "self" is set for forks, and NULL for exits */
583 	if (self)
584 		trace_pid_list_set(pid_list, task->pid);
585 	else
586 		trace_pid_list_clear(pid_list, task->pid);
587 }
588 
589 /**
590  * trace_pid_next - Used for seq_file to get to the next pid of a pid_list
591  * @pid_list: The pid list to show
592  * @v: The last pid that was shown (+1 the actual pid to let zero be displayed)
593  * @pos: The position of the file
594  *
595  * This is used by the seq_file "next" operation to iterate the pids
596  * listed in a trace_pid_list structure.
597  *
598  * Returns the pid+1 as we want to display pid of zero, but NULL would
599  * stop the iteration.
600  */
601 void *trace_pid_next(struct trace_pid_list *pid_list, void *v, loff_t *pos)
602 {
603 	long pid = (unsigned long)v;
604 	unsigned int next;
605 
606 	(*pos)++;
607 
608 	/* pid already is +1 of the actual previous bit */
609 	if (trace_pid_list_next(pid_list, pid, &next) < 0)
610 		return NULL;
611 
612 	pid = next;
613 
614 	/* Return pid + 1 to allow zero to be represented */
615 	return (void *)(pid + 1);
616 }
617 
618 /**
619  * trace_pid_start - Used for seq_file to start reading pid lists
620  * @pid_list: The pid list to show
621  * @pos: The position of the file
622  *
623  * This is used by seq_file "start" operation to start the iteration
624  * of listing pids.
625  *
626  * Returns the pid+1 as we want to display pid of zero, but NULL would
627  * stop the iteration.
628  */
629 void *trace_pid_start(struct trace_pid_list *pid_list, loff_t *pos)
630 {
631 	unsigned long pid;
632 	unsigned int first;
633 	loff_t l = 0;
634 
635 	if (trace_pid_list_first(pid_list, &first) < 0)
636 		return NULL;
637 
638 	pid = first;
639 
640 	/* Return pid + 1 so that zero can be the exit value */
641 	for (pid++; pid && l < *pos;
642 	     pid = (unsigned long)trace_pid_next(pid_list, (void *)pid, &l))
643 		;
644 	return (void *)pid;
645 }
646 
647 /**
648  * trace_pid_show - show the current pid in seq_file processing
649  * @m: The seq_file structure to write into
650  * @v: A void pointer of the pid (+1) value to display
651  *
652  * Can be directly used by seq_file operations to display the current
653  * pid value.
654  */
655 int trace_pid_show(struct seq_file *m, void *v)
656 {
657 	unsigned long pid = (unsigned long)v - 1;
658 
659 	seq_printf(m, "%lu\n", pid);
660 	return 0;
661 }
662 
663 /* 128 should be much more than enough */
664 #define PID_BUF_SIZE		127
665 
666 int trace_pid_write(struct trace_pid_list *filtered_pids,
667 		    struct trace_pid_list **new_pid_list,
668 		    const char __user *ubuf, size_t cnt)
669 {
670 	struct trace_pid_list *pid_list;
671 	struct trace_parser parser;
672 	unsigned long val;
673 	int nr_pids = 0;
674 	ssize_t read = 0;
675 	ssize_t ret;
676 	loff_t pos;
677 	pid_t pid;
678 
679 	if (trace_parser_get_init(&parser, PID_BUF_SIZE + 1))
680 		return -ENOMEM;
681 
682 	/*
683 	 * Always recreate a new array. The write is an all or nothing
684 	 * operation. Always create a new array when adding new pids by
685 	 * the user. If the operation fails, then the current list is
686 	 * not modified.
687 	 */
688 	pid_list = trace_pid_list_alloc();
689 	if (!pid_list) {
690 		trace_parser_put(&parser);
691 		return -ENOMEM;
692 	}
693 
694 	if (filtered_pids) {
695 		/* copy the current bits to the new max */
696 		ret = trace_pid_list_first(filtered_pids, &pid);
697 		while (!ret) {
698 			trace_pid_list_set(pid_list, pid);
699 			ret = trace_pid_list_next(filtered_pids, pid + 1, &pid);
700 			nr_pids++;
701 		}
702 	}
703 
704 	ret = 0;
705 	while (cnt > 0) {
706 
707 		pos = 0;
708 
709 		ret = trace_get_user(&parser, ubuf, cnt, &pos);
710 		if (ret < 0 || !trace_parser_loaded(&parser))
711 			break;
712 
713 		read += ret;
714 		ubuf += ret;
715 		cnt -= ret;
716 
717 		ret = -EINVAL;
718 		if (kstrtoul(parser.buffer, 0, &val))
719 			break;
720 
721 		pid = (pid_t)val;
722 
723 		if (trace_pid_list_set(pid_list, pid) < 0) {
724 			ret = -1;
725 			break;
726 		}
727 		nr_pids++;
728 
729 		trace_parser_clear(&parser);
730 		ret = 0;
731 	}
732 	trace_parser_put(&parser);
733 
734 	if (ret < 0) {
735 		trace_pid_list_free(pid_list);
736 		return ret;
737 	}
738 
739 	if (!nr_pids) {
740 		/* Cleared the list of pids */
741 		trace_pid_list_free(pid_list);
742 		read = ret;
743 		pid_list = NULL;
744 	}
745 
746 	*new_pid_list = pid_list;
747 
748 	return read;
749 }
750 
751 static u64 buffer_ftrace_now(struct array_buffer *buf, int cpu)
752 {
753 	u64 ts;
754 
755 	/* Early boot up does not have a buffer yet */
756 	if (!buf->buffer)
757 		return trace_clock_local();
758 
759 	ts = ring_buffer_time_stamp(buf->buffer);
760 	ring_buffer_normalize_time_stamp(buf->buffer, cpu, &ts);
761 
762 	return ts;
763 }
764 
765 u64 ftrace_now(int cpu)
766 {
767 	return buffer_ftrace_now(&global_trace.array_buffer, cpu);
768 }
769 
770 /**
771  * tracing_is_enabled - Show if global_trace has been enabled
772  *
773  * Shows if the global trace has been enabled or not. It uses the
774  * mirror flag "buffer_disabled" to be used in fast paths such as for
775  * the irqsoff tracer. But it may be inaccurate due to races. If you
776  * need to know the accurate state, use tracing_is_on() which is a little
777  * slower, but accurate.
778  */
779 int tracing_is_enabled(void)
780 {
781 	/*
782 	 * For quick access (irqsoff uses this in fast path), just
783 	 * return the mirror variable of the state of the ring buffer.
784 	 * It's a little racy, but we don't really care.
785 	 */
786 	smp_rmb();
787 	return !global_trace.buffer_disabled;
788 }
789 
790 /*
791  * trace_buf_size is the size in bytes that is allocated
792  * for a buffer. Note, the number of bytes is always rounded
793  * to page size.
794  *
795  * This number is purposely set to a low number of 16384.
796  * If the dump on oops happens, it will be much appreciated
797  * to not have to wait for all that output. Anyway this can be
798  * boot time and run time configurable.
799  */
800 #define TRACE_BUF_SIZE_DEFAULT	1441792UL /* 16384 * 88 (sizeof(entry)) */
801 
802 static unsigned long		trace_buf_size = TRACE_BUF_SIZE_DEFAULT;
803 
804 /* trace_types holds a link list of available tracers. */
805 static struct tracer		*trace_types __read_mostly;
806 
807 /*
808  * trace_types_lock is used to protect the trace_types list.
809  */
810 DEFINE_MUTEX(trace_types_lock);
811 
812 /*
813  * serialize the access of the ring buffer
814  *
815  * ring buffer serializes readers, but it is low level protection.
816  * The validity of the events (which returns by ring_buffer_peek() ..etc)
817  * are not protected by ring buffer.
818  *
819  * The content of events may become garbage if we allow other process consumes
820  * these events concurrently:
821  *   A) the page of the consumed events may become a normal page
822  *      (not reader page) in ring buffer, and this page will be rewritten
823  *      by events producer.
824  *   B) The page of the consumed events may become a page for splice_read,
825  *      and this page will be returned to system.
826  *
827  * These primitives allow multi process access to different cpu ring buffer
828  * concurrently.
829  *
830  * These primitives don't distinguish read-only and read-consume access.
831  * Multi read-only access are also serialized.
832  */
833 
834 #ifdef CONFIG_SMP
835 static DECLARE_RWSEM(all_cpu_access_lock);
836 static DEFINE_PER_CPU(struct mutex, cpu_access_lock);
837 
838 static inline void trace_access_lock(int cpu)
839 {
840 	if (cpu == RING_BUFFER_ALL_CPUS) {
841 		/* gain it for accessing the whole ring buffer. */
842 		down_write(&all_cpu_access_lock);
843 	} else {
844 		/* gain it for accessing a cpu ring buffer. */
845 
846 		/* Firstly block other trace_access_lock(RING_BUFFER_ALL_CPUS). */
847 		down_read(&all_cpu_access_lock);
848 
849 		/* Secondly block other access to this @cpu ring buffer. */
850 		mutex_lock(&per_cpu(cpu_access_lock, cpu));
851 	}
852 }
853 
854 static inline void trace_access_unlock(int cpu)
855 {
856 	if (cpu == RING_BUFFER_ALL_CPUS) {
857 		up_write(&all_cpu_access_lock);
858 	} else {
859 		mutex_unlock(&per_cpu(cpu_access_lock, cpu));
860 		up_read(&all_cpu_access_lock);
861 	}
862 }
863 
864 static inline void trace_access_lock_init(void)
865 {
866 	int cpu;
867 
868 	for_each_possible_cpu(cpu)
869 		mutex_init(&per_cpu(cpu_access_lock, cpu));
870 }
871 
872 #else
873 
874 static DEFINE_MUTEX(access_lock);
875 
876 static inline void trace_access_lock(int cpu)
877 {
878 	(void)cpu;
879 	mutex_lock(&access_lock);
880 }
881 
882 static inline void trace_access_unlock(int cpu)
883 {
884 	(void)cpu;
885 	mutex_unlock(&access_lock);
886 }
887 
888 static inline void trace_access_lock_init(void)
889 {
890 }
891 
892 #endif
893 
894 #ifdef CONFIG_STACKTRACE
895 static void __ftrace_trace_stack(struct trace_buffer *buffer,
896 				 unsigned int trace_ctx,
897 				 int skip, struct pt_regs *regs);
898 static inline void ftrace_trace_stack(struct trace_array *tr,
899 				      struct trace_buffer *buffer,
900 				      unsigned int trace_ctx,
901 				      int skip, struct pt_regs *regs);
902 
903 #else
904 static inline void __ftrace_trace_stack(struct trace_buffer *buffer,
905 					unsigned int trace_ctx,
906 					int skip, struct pt_regs *regs)
907 {
908 }
909 static inline void ftrace_trace_stack(struct trace_array *tr,
910 				      struct trace_buffer *buffer,
911 				      unsigned long trace_ctx,
912 				      int skip, struct pt_regs *regs)
913 {
914 }
915 
916 #endif
917 
918 static __always_inline void
919 trace_event_setup(struct ring_buffer_event *event,
920 		  int type, unsigned int trace_ctx)
921 {
922 	struct trace_entry *ent = ring_buffer_event_data(event);
923 
924 	tracing_generic_entry_update(ent, type, trace_ctx);
925 }
926 
927 static __always_inline struct ring_buffer_event *
928 __trace_buffer_lock_reserve(struct trace_buffer *buffer,
929 			  int type,
930 			  unsigned long len,
931 			  unsigned int trace_ctx)
932 {
933 	struct ring_buffer_event *event;
934 
935 	event = ring_buffer_lock_reserve(buffer, len);
936 	if (event != NULL)
937 		trace_event_setup(event, type, trace_ctx);
938 
939 	return event;
940 }
941 
942 void tracer_tracing_on(struct trace_array *tr)
943 {
944 	if (tr->array_buffer.buffer)
945 		ring_buffer_record_on(tr->array_buffer.buffer);
946 	/*
947 	 * This flag is looked at when buffers haven't been allocated
948 	 * yet, or by some tracers (like irqsoff), that just want to
949 	 * know if the ring buffer has been disabled, but it can handle
950 	 * races of where it gets disabled but we still do a record.
951 	 * As the check is in the fast path of the tracers, it is more
952 	 * important to be fast than accurate.
953 	 */
954 	tr->buffer_disabled = 0;
955 	/* Make the flag seen by readers */
956 	smp_wmb();
957 }
958 
959 /**
960  * tracing_on - enable tracing buffers
961  *
962  * This function enables tracing buffers that may have been
963  * disabled with tracing_off.
964  */
965 void tracing_on(void)
966 {
967 	tracer_tracing_on(&global_trace);
968 }
969 EXPORT_SYMBOL_GPL(tracing_on);
970 
971 
972 static __always_inline void
973 __buffer_unlock_commit(struct trace_buffer *buffer, struct ring_buffer_event *event)
974 {
975 	__this_cpu_write(trace_taskinfo_save, true);
976 
977 	/* If this is the temp buffer, we need to commit fully */
978 	if (this_cpu_read(trace_buffered_event) == event) {
979 		/* Length is in event->array[0] */
980 		ring_buffer_write(buffer, event->array[0], &event->array[1]);
981 		/* Release the temp buffer */
982 		this_cpu_dec(trace_buffered_event_cnt);
983 	} else
984 		ring_buffer_unlock_commit(buffer, event);
985 }
986 
987 /**
988  * __trace_puts - write a constant string into the trace buffer.
989  * @ip:	   The address of the caller
990  * @str:   The constant string to write
991  * @size:  The size of the string.
992  */
993 int __trace_puts(unsigned long ip, const char *str, int size)
994 {
995 	struct ring_buffer_event *event;
996 	struct trace_buffer *buffer;
997 	struct print_entry *entry;
998 	unsigned int trace_ctx;
999 	int alloc;
1000 
1001 	if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
1002 		return 0;
1003 
1004 	if (unlikely(tracing_selftest_running || tracing_disabled))
1005 		return 0;
1006 
1007 	alloc = sizeof(*entry) + size + 2; /* possible \n added */
1008 
1009 	trace_ctx = tracing_gen_ctx();
1010 	buffer = global_trace.array_buffer.buffer;
1011 	ring_buffer_nest_start(buffer);
1012 	event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, alloc,
1013 					    trace_ctx);
1014 	if (!event) {
1015 		size = 0;
1016 		goto out;
1017 	}
1018 
1019 	entry = ring_buffer_event_data(event);
1020 	entry->ip = ip;
1021 
1022 	memcpy(&entry->buf, str, size);
1023 
1024 	/* Add a newline if necessary */
1025 	if (entry->buf[size - 1] != '\n') {
1026 		entry->buf[size] = '\n';
1027 		entry->buf[size + 1] = '\0';
1028 	} else
1029 		entry->buf[size] = '\0';
1030 
1031 	__buffer_unlock_commit(buffer, event);
1032 	ftrace_trace_stack(&global_trace, buffer, trace_ctx, 4, NULL);
1033  out:
1034 	ring_buffer_nest_end(buffer);
1035 	return size;
1036 }
1037 EXPORT_SYMBOL_GPL(__trace_puts);
1038 
1039 /**
1040  * __trace_bputs - write the pointer to a constant string into trace buffer
1041  * @ip:	   The address of the caller
1042  * @str:   The constant string to write to the buffer to
1043  */
1044 int __trace_bputs(unsigned long ip, const char *str)
1045 {
1046 	struct ring_buffer_event *event;
1047 	struct trace_buffer *buffer;
1048 	struct bputs_entry *entry;
1049 	unsigned int trace_ctx;
1050 	int size = sizeof(struct bputs_entry);
1051 	int ret = 0;
1052 
1053 	if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
1054 		return 0;
1055 
1056 	if (unlikely(tracing_selftest_running || tracing_disabled))
1057 		return 0;
1058 
1059 	trace_ctx = tracing_gen_ctx();
1060 	buffer = global_trace.array_buffer.buffer;
1061 
1062 	ring_buffer_nest_start(buffer);
1063 	event = __trace_buffer_lock_reserve(buffer, TRACE_BPUTS, size,
1064 					    trace_ctx);
1065 	if (!event)
1066 		goto out;
1067 
1068 	entry = ring_buffer_event_data(event);
1069 	entry->ip			= ip;
1070 	entry->str			= str;
1071 
1072 	__buffer_unlock_commit(buffer, event);
1073 	ftrace_trace_stack(&global_trace, buffer, trace_ctx, 4, NULL);
1074 
1075 	ret = 1;
1076  out:
1077 	ring_buffer_nest_end(buffer);
1078 	return ret;
1079 }
1080 EXPORT_SYMBOL_GPL(__trace_bputs);
1081 
1082 #ifdef CONFIG_TRACER_SNAPSHOT
1083 static void tracing_snapshot_instance_cond(struct trace_array *tr,
1084 					   void *cond_data)
1085 {
1086 	struct tracer *tracer = tr->current_trace;
1087 	unsigned long flags;
1088 
1089 	if (in_nmi()) {
1090 		internal_trace_puts("*** SNAPSHOT CALLED FROM NMI CONTEXT ***\n");
1091 		internal_trace_puts("*** snapshot is being ignored        ***\n");
1092 		return;
1093 	}
1094 
1095 	if (!tr->allocated_snapshot) {
1096 		internal_trace_puts("*** SNAPSHOT NOT ALLOCATED ***\n");
1097 		internal_trace_puts("*** stopping trace here!   ***\n");
1098 		tracing_off();
1099 		return;
1100 	}
1101 
1102 	/* Note, snapshot can not be used when the tracer uses it */
1103 	if (tracer->use_max_tr) {
1104 		internal_trace_puts("*** LATENCY TRACER ACTIVE ***\n");
1105 		internal_trace_puts("*** Can not use snapshot (sorry) ***\n");
1106 		return;
1107 	}
1108 
1109 	local_irq_save(flags);
1110 	update_max_tr(tr, current, smp_processor_id(), cond_data);
1111 	local_irq_restore(flags);
1112 }
1113 
1114 void tracing_snapshot_instance(struct trace_array *tr)
1115 {
1116 	tracing_snapshot_instance_cond(tr, NULL);
1117 }
1118 
1119 /**
1120  * tracing_snapshot - take a snapshot of the current buffer.
1121  *
1122  * This causes a swap between the snapshot buffer and the current live
1123  * tracing buffer. You can use this to take snapshots of the live
1124  * trace when some condition is triggered, but continue to trace.
1125  *
1126  * Note, make sure to allocate the snapshot with either
1127  * a tracing_snapshot_alloc(), or by doing it manually
1128  * with: echo 1 > /sys/kernel/debug/tracing/snapshot
1129  *
1130  * If the snapshot buffer is not allocated, it will stop tracing.
1131  * Basically making a permanent snapshot.
1132  */
1133 void tracing_snapshot(void)
1134 {
1135 	struct trace_array *tr = &global_trace;
1136 
1137 	tracing_snapshot_instance(tr);
1138 }
1139 EXPORT_SYMBOL_GPL(tracing_snapshot);
1140 
1141 /**
1142  * tracing_snapshot_cond - conditionally take a snapshot of the current buffer.
1143  * @tr:		The tracing instance to snapshot
1144  * @cond_data:	The data to be tested conditionally, and possibly saved
1145  *
1146  * This is the same as tracing_snapshot() except that the snapshot is
1147  * conditional - the snapshot will only happen if the
1148  * cond_snapshot.update() implementation receiving the cond_data
1149  * returns true, which means that the trace array's cond_snapshot
1150  * update() operation used the cond_data to determine whether the
1151  * snapshot should be taken, and if it was, presumably saved it along
1152  * with the snapshot.
1153  */
1154 void tracing_snapshot_cond(struct trace_array *tr, void *cond_data)
1155 {
1156 	tracing_snapshot_instance_cond(tr, cond_data);
1157 }
1158 EXPORT_SYMBOL_GPL(tracing_snapshot_cond);
1159 
1160 /**
1161  * tracing_snapshot_cond_data - get the user data associated with a snapshot
1162  * @tr:		The tracing instance
1163  *
1164  * When the user enables a conditional snapshot using
1165  * tracing_snapshot_cond_enable(), the user-defined cond_data is saved
1166  * with the snapshot.  This accessor is used to retrieve it.
1167  *
1168  * Should not be called from cond_snapshot.update(), since it takes
1169  * the tr->max_lock lock, which the code calling
1170  * cond_snapshot.update() has already done.
1171  *
1172  * Returns the cond_data associated with the trace array's snapshot.
1173  */
1174 void *tracing_cond_snapshot_data(struct trace_array *tr)
1175 {
1176 	void *cond_data = NULL;
1177 
1178 	arch_spin_lock(&tr->max_lock);
1179 
1180 	if (tr->cond_snapshot)
1181 		cond_data = tr->cond_snapshot->cond_data;
1182 
1183 	arch_spin_unlock(&tr->max_lock);
1184 
1185 	return cond_data;
1186 }
1187 EXPORT_SYMBOL_GPL(tracing_cond_snapshot_data);
1188 
1189 static int resize_buffer_duplicate_size(struct array_buffer *trace_buf,
1190 					struct array_buffer *size_buf, int cpu_id);
1191 static void set_buffer_entries(struct array_buffer *buf, unsigned long val);
1192 
1193 int tracing_alloc_snapshot_instance(struct trace_array *tr)
1194 {
1195 	int ret;
1196 
1197 	if (!tr->allocated_snapshot) {
1198 
1199 		/* allocate spare buffer */
1200 		ret = resize_buffer_duplicate_size(&tr->max_buffer,
1201 				   &tr->array_buffer, RING_BUFFER_ALL_CPUS);
1202 		if (ret < 0)
1203 			return ret;
1204 
1205 		tr->allocated_snapshot = true;
1206 	}
1207 
1208 	return 0;
1209 }
1210 
1211 static void free_snapshot(struct trace_array *tr)
1212 {
1213 	/*
1214 	 * We don't free the ring buffer. instead, resize it because
1215 	 * The max_tr ring buffer has some state (e.g. ring->clock) and
1216 	 * we want preserve it.
1217 	 */
1218 	ring_buffer_resize(tr->max_buffer.buffer, 1, RING_BUFFER_ALL_CPUS);
1219 	set_buffer_entries(&tr->max_buffer, 1);
1220 	tracing_reset_online_cpus(&tr->max_buffer);
1221 	tr->allocated_snapshot = false;
1222 }
1223 
1224 /**
1225  * tracing_alloc_snapshot - allocate snapshot buffer.
1226  *
1227  * This only allocates the snapshot buffer if it isn't already
1228  * allocated - it doesn't also take a snapshot.
1229  *
1230  * This is meant to be used in cases where the snapshot buffer needs
1231  * to be set up for events that can't sleep but need to be able to
1232  * trigger a snapshot.
1233  */
1234 int tracing_alloc_snapshot(void)
1235 {
1236 	struct trace_array *tr = &global_trace;
1237 	int ret;
1238 
1239 	ret = tracing_alloc_snapshot_instance(tr);
1240 	WARN_ON(ret < 0);
1241 
1242 	return ret;
1243 }
1244 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
1245 
1246 /**
1247  * tracing_snapshot_alloc - allocate and take a snapshot of the current buffer.
1248  *
1249  * This is similar to tracing_snapshot(), but it will allocate the
1250  * snapshot buffer if it isn't already allocated. Use this only
1251  * where it is safe to sleep, as the allocation may sleep.
1252  *
1253  * This causes a swap between the snapshot buffer and the current live
1254  * tracing buffer. You can use this to take snapshots of the live
1255  * trace when some condition is triggered, but continue to trace.
1256  */
1257 void tracing_snapshot_alloc(void)
1258 {
1259 	int ret;
1260 
1261 	ret = tracing_alloc_snapshot();
1262 	if (ret < 0)
1263 		return;
1264 
1265 	tracing_snapshot();
1266 }
1267 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
1268 
1269 /**
1270  * tracing_snapshot_cond_enable - enable conditional snapshot for an instance
1271  * @tr:		The tracing instance
1272  * @cond_data:	User data to associate with the snapshot
1273  * @update:	Implementation of the cond_snapshot update function
1274  *
1275  * Check whether the conditional snapshot for the given instance has
1276  * already been enabled, or if the current tracer is already using a
1277  * snapshot; if so, return -EBUSY, else create a cond_snapshot and
1278  * save the cond_data and update function inside.
1279  *
1280  * Returns 0 if successful, error otherwise.
1281  */
1282 int tracing_snapshot_cond_enable(struct trace_array *tr, void *cond_data,
1283 				 cond_update_fn_t update)
1284 {
1285 	struct cond_snapshot *cond_snapshot;
1286 	int ret = 0;
1287 
1288 	cond_snapshot = kzalloc(sizeof(*cond_snapshot), GFP_KERNEL);
1289 	if (!cond_snapshot)
1290 		return -ENOMEM;
1291 
1292 	cond_snapshot->cond_data = cond_data;
1293 	cond_snapshot->update = update;
1294 
1295 	mutex_lock(&trace_types_lock);
1296 
1297 	ret = tracing_alloc_snapshot_instance(tr);
1298 	if (ret)
1299 		goto fail_unlock;
1300 
1301 	if (tr->current_trace->use_max_tr) {
1302 		ret = -EBUSY;
1303 		goto fail_unlock;
1304 	}
1305 
1306 	/*
1307 	 * The cond_snapshot can only change to NULL without the
1308 	 * trace_types_lock. We don't care if we race with it going
1309 	 * to NULL, but we want to make sure that it's not set to
1310 	 * something other than NULL when we get here, which we can
1311 	 * do safely with only holding the trace_types_lock and not
1312 	 * having to take the max_lock.
1313 	 */
1314 	if (tr->cond_snapshot) {
1315 		ret = -EBUSY;
1316 		goto fail_unlock;
1317 	}
1318 
1319 	arch_spin_lock(&tr->max_lock);
1320 	tr->cond_snapshot = cond_snapshot;
1321 	arch_spin_unlock(&tr->max_lock);
1322 
1323 	mutex_unlock(&trace_types_lock);
1324 
1325 	return ret;
1326 
1327  fail_unlock:
1328 	mutex_unlock(&trace_types_lock);
1329 	kfree(cond_snapshot);
1330 	return ret;
1331 }
1332 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_enable);
1333 
1334 /**
1335  * tracing_snapshot_cond_disable - disable conditional snapshot for an instance
1336  * @tr:		The tracing instance
1337  *
1338  * Check whether the conditional snapshot for the given instance is
1339  * enabled; if so, free the cond_snapshot associated with it,
1340  * otherwise return -EINVAL.
1341  *
1342  * Returns 0 if successful, error otherwise.
1343  */
1344 int tracing_snapshot_cond_disable(struct trace_array *tr)
1345 {
1346 	int ret = 0;
1347 
1348 	arch_spin_lock(&tr->max_lock);
1349 
1350 	if (!tr->cond_snapshot)
1351 		ret = -EINVAL;
1352 	else {
1353 		kfree(tr->cond_snapshot);
1354 		tr->cond_snapshot = NULL;
1355 	}
1356 
1357 	arch_spin_unlock(&tr->max_lock);
1358 
1359 	return ret;
1360 }
1361 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_disable);
1362 #else
1363 void tracing_snapshot(void)
1364 {
1365 	WARN_ONCE(1, "Snapshot feature not enabled, but internal snapshot used");
1366 }
1367 EXPORT_SYMBOL_GPL(tracing_snapshot);
1368 void tracing_snapshot_cond(struct trace_array *tr, void *cond_data)
1369 {
1370 	WARN_ONCE(1, "Snapshot feature not enabled, but internal conditional snapshot used");
1371 }
1372 EXPORT_SYMBOL_GPL(tracing_snapshot_cond);
1373 int tracing_alloc_snapshot(void)
1374 {
1375 	WARN_ONCE(1, "Snapshot feature not enabled, but snapshot allocation used");
1376 	return -ENODEV;
1377 }
1378 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
1379 void tracing_snapshot_alloc(void)
1380 {
1381 	/* Give warning */
1382 	tracing_snapshot();
1383 }
1384 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
1385 void *tracing_cond_snapshot_data(struct trace_array *tr)
1386 {
1387 	return NULL;
1388 }
1389 EXPORT_SYMBOL_GPL(tracing_cond_snapshot_data);
1390 int tracing_snapshot_cond_enable(struct trace_array *tr, void *cond_data, cond_update_fn_t update)
1391 {
1392 	return -ENODEV;
1393 }
1394 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_enable);
1395 int tracing_snapshot_cond_disable(struct trace_array *tr)
1396 {
1397 	return false;
1398 }
1399 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_disable);
1400 #endif /* CONFIG_TRACER_SNAPSHOT */
1401 
1402 void tracer_tracing_off(struct trace_array *tr)
1403 {
1404 	if (tr->array_buffer.buffer)
1405 		ring_buffer_record_off(tr->array_buffer.buffer);
1406 	/*
1407 	 * This flag is looked at when buffers haven't been allocated
1408 	 * yet, or by some tracers (like irqsoff), that just want to
1409 	 * know if the ring buffer has been disabled, but it can handle
1410 	 * races of where it gets disabled but we still do a record.
1411 	 * As the check is in the fast path of the tracers, it is more
1412 	 * important to be fast than accurate.
1413 	 */
1414 	tr->buffer_disabled = 1;
1415 	/* Make the flag seen by readers */
1416 	smp_wmb();
1417 }
1418 
1419 /**
1420  * tracing_off - turn off tracing buffers
1421  *
1422  * This function stops the tracing buffers from recording data.
1423  * It does not disable any overhead the tracers themselves may
1424  * be causing. This function simply causes all recording to
1425  * the ring buffers to fail.
1426  */
1427 void tracing_off(void)
1428 {
1429 	tracer_tracing_off(&global_trace);
1430 }
1431 EXPORT_SYMBOL_GPL(tracing_off);
1432 
1433 void disable_trace_on_warning(void)
1434 {
1435 	if (__disable_trace_on_warning) {
1436 		trace_array_printk_buf(global_trace.array_buffer.buffer, _THIS_IP_,
1437 			"Disabling tracing due to warning\n");
1438 		tracing_off();
1439 	}
1440 }
1441 
1442 /**
1443  * tracer_tracing_is_on - show real state of ring buffer enabled
1444  * @tr : the trace array to know if ring buffer is enabled
1445  *
1446  * Shows real state of the ring buffer if it is enabled or not.
1447  */
1448 bool tracer_tracing_is_on(struct trace_array *tr)
1449 {
1450 	if (tr->array_buffer.buffer)
1451 		return ring_buffer_record_is_on(tr->array_buffer.buffer);
1452 	return !tr->buffer_disabled;
1453 }
1454 
1455 /**
1456  * tracing_is_on - show state of ring buffers enabled
1457  */
1458 int tracing_is_on(void)
1459 {
1460 	return tracer_tracing_is_on(&global_trace);
1461 }
1462 EXPORT_SYMBOL_GPL(tracing_is_on);
1463 
1464 static int __init set_buf_size(char *str)
1465 {
1466 	unsigned long buf_size;
1467 
1468 	if (!str)
1469 		return 0;
1470 	buf_size = memparse(str, &str);
1471 	/* nr_entries can not be zero */
1472 	if (buf_size == 0)
1473 		return 0;
1474 	trace_buf_size = buf_size;
1475 	return 1;
1476 }
1477 __setup("trace_buf_size=", set_buf_size);
1478 
1479 static int __init set_tracing_thresh(char *str)
1480 {
1481 	unsigned long threshold;
1482 	int ret;
1483 
1484 	if (!str)
1485 		return 0;
1486 	ret = kstrtoul(str, 0, &threshold);
1487 	if (ret < 0)
1488 		return 0;
1489 	tracing_thresh = threshold * 1000;
1490 	return 1;
1491 }
1492 __setup("tracing_thresh=", set_tracing_thresh);
1493 
1494 unsigned long nsecs_to_usecs(unsigned long nsecs)
1495 {
1496 	return nsecs / 1000;
1497 }
1498 
1499 /*
1500  * TRACE_FLAGS is defined as a tuple matching bit masks with strings.
1501  * It uses C(a, b) where 'a' is the eval (enum) name and 'b' is the string that
1502  * matches it. By defining "C(a, b) b", TRACE_FLAGS becomes a list
1503  * of strings in the order that the evals (enum) were defined.
1504  */
1505 #undef C
1506 #define C(a, b) b
1507 
1508 /* These must match the bit positions in trace_iterator_flags */
1509 static const char *trace_options[] = {
1510 	TRACE_FLAGS
1511 	NULL
1512 };
1513 
1514 static struct {
1515 	u64 (*func)(void);
1516 	const char *name;
1517 	int in_ns;		/* is this clock in nanoseconds? */
1518 } trace_clocks[] = {
1519 	{ trace_clock_local,		"local",	1 },
1520 	{ trace_clock_global,		"global",	1 },
1521 	{ trace_clock_counter,		"counter",	0 },
1522 	{ trace_clock_jiffies,		"uptime",	0 },
1523 	{ trace_clock,			"perf",		1 },
1524 	{ ktime_get_mono_fast_ns,	"mono",		1 },
1525 	{ ktime_get_raw_fast_ns,	"mono_raw",	1 },
1526 	{ ktime_get_boot_fast_ns,	"boot",		1 },
1527 	ARCH_TRACE_CLOCKS
1528 };
1529 
1530 bool trace_clock_in_ns(struct trace_array *tr)
1531 {
1532 	if (trace_clocks[tr->clock_id].in_ns)
1533 		return true;
1534 
1535 	return false;
1536 }
1537 
1538 /*
1539  * trace_parser_get_init - gets the buffer for trace parser
1540  */
1541 int trace_parser_get_init(struct trace_parser *parser, int size)
1542 {
1543 	memset(parser, 0, sizeof(*parser));
1544 
1545 	parser->buffer = kmalloc(size, GFP_KERNEL);
1546 	if (!parser->buffer)
1547 		return 1;
1548 
1549 	parser->size = size;
1550 	return 0;
1551 }
1552 
1553 /*
1554  * trace_parser_put - frees the buffer for trace parser
1555  */
1556 void trace_parser_put(struct trace_parser *parser)
1557 {
1558 	kfree(parser->buffer);
1559 	parser->buffer = NULL;
1560 }
1561 
1562 /*
1563  * trace_get_user - reads the user input string separated by  space
1564  * (matched by isspace(ch))
1565  *
1566  * For each string found the 'struct trace_parser' is updated,
1567  * and the function returns.
1568  *
1569  * Returns number of bytes read.
1570  *
1571  * See kernel/trace/trace.h for 'struct trace_parser' details.
1572  */
1573 int trace_get_user(struct trace_parser *parser, const char __user *ubuf,
1574 	size_t cnt, loff_t *ppos)
1575 {
1576 	char ch;
1577 	size_t read = 0;
1578 	ssize_t ret;
1579 
1580 	if (!*ppos)
1581 		trace_parser_clear(parser);
1582 
1583 	ret = get_user(ch, ubuf++);
1584 	if (ret)
1585 		goto out;
1586 
1587 	read++;
1588 	cnt--;
1589 
1590 	/*
1591 	 * The parser is not finished with the last write,
1592 	 * continue reading the user input without skipping spaces.
1593 	 */
1594 	if (!parser->cont) {
1595 		/* skip white space */
1596 		while (cnt && isspace(ch)) {
1597 			ret = get_user(ch, ubuf++);
1598 			if (ret)
1599 				goto out;
1600 			read++;
1601 			cnt--;
1602 		}
1603 
1604 		parser->idx = 0;
1605 
1606 		/* only spaces were written */
1607 		if (isspace(ch) || !ch) {
1608 			*ppos += read;
1609 			ret = read;
1610 			goto out;
1611 		}
1612 	}
1613 
1614 	/* read the non-space input */
1615 	while (cnt && !isspace(ch) && ch) {
1616 		if (parser->idx < parser->size - 1)
1617 			parser->buffer[parser->idx++] = ch;
1618 		else {
1619 			ret = -EINVAL;
1620 			goto out;
1621 		}
1622 		ret = get_user(ch, ubuf++);
1623 		if (ret)
1624 			goto out;
1625 		read++;
1626 		cnt--;
1627 	}
1628 
1629 	/* We either got finished input or we have to wait for another call. */
1630 	if (isspace(ch) || !ch) {
1631 		parser->buffer[parser->idx] = 0;
1632 		parser->cont = false;
1633 	} else if (parser->idx < parser->size - 1) {
1634 		parser->cont = true;
1635 		parser->buffer[parser->idx++] = ch;
1636 		/* Make sure the parsed string always terminates with '\0'. */
1637 		parser->buffer[parser->idx] = 0;
1638 	} else {
1639 		ret = -EINVAL;
1640 		goto out;
1641 	}
1642 
1643 	*ppos += read;
1644 	ret = read;
1645 
1646 out:
1647 	return ret;
1648 }
1649 
1650 /* TODO add a seq_buf_to_buffer() */
1651 static ssize_t trace_seq_to_buffer(struct trace_seq *s, void *buf, size_t cnt)
1652 {
1653 	int len;
1654 
1655 	if (trace_seq_used(s) <= s->seq.readpos)
1656 		return -EBUSY;
1657 
1658 	len = trace_seq_used(s) - s->seq.readpos;
1659 	if (cnt > len)
1660 		cnt = len;
1661 	memcpy(buf, s->buffer + s->seq.readpos, cnt);
1662 
1663 	s->seq.readpos += cnt;
1664 	return cnt;
1665 }
1666 
1667 unsigned long __read_mostly	tracing_thresh;
1668 static const struct file_operations tracing_max_lat_fops;
1669 
1670 #ifdef LATENCY_FS_NOTIFY
1671 
1672 static struct workqueue_struct *fsnotify_wq;
1673 
1674 static void latency_fsnotify_workfn(struct work_struct *work)
1675 {
1676 	struct trace_array *tr = container_of(work, struct trace_array,
1677 					      fsnotify_work);
1678 	fsnotify_inode(tr->d_max_latency->d_inode, FS_MODIFY);
1679 }
1680 
1681 static void latency_fsnotify_workfn_irq(struct irq_work *iwork)
1682 {
1683 	struct trace_array *tr = container_of(iwork, struct trace_array,
1684 					      fsnotify_irqwork);
1685 	queue_work(fsnotify_wq, &tr->fsnotify_work);
1686 }
1687 
1688 static void trace_create_maxlat_file(struct trace_array *tr,
1689 				     struct dentry *d_tracer)
1690 {
1691 	INIT_WORK(&tr->fsnotify_work, latency_fsnotify_workfn);
1692 	init_irq_work(&tr->fsnotify_irqwork, latency_fsnotify_workfn_irq);
1693 	tr->d_max_latency = trace_create_file("tracing_max_latency",
1694 					      TRACE_MODE_WRITE,
1695 					      d_tracer, &tr->max_latency,
1696 					      &tracing_max_lat_fops);
1697 }
1698 
1699 __init static int latency_fsnotify_init(void)
1700 {
1701 	fsnotify_wq = alloc_workqueue("tr_max_lat_wq",
1702 				      WQ_UNBOUND | WQ_HIGHPRI, 0);
1703 	if (!fsnotify_wq) {
1704 		pr_err("Unable to allocate tr_max_lat_wq\n");
1705 		return -ENOMEM;
1706 	}
1707 	return 0;
1708 }
1709 
1710 late_initcall_sync(latency_fsnotify_init);
1711 
1712 void latency_fsnotify(struct trace_array *tr)
1713 {
1714 	if (!fsnotify_wq)
1715 		return;
1716 	/*
1717 	 * We cannot call queue_work(&tr->fsnotify_work) from here because it's
1718 	 * possible that we are called from __schedule() or do_idle(), which
1719 	 * could cause a deadlock.
1720 	 */
1721 	irq_work_queue(&tr->fsnotify_irqwork);
1722 }
1723 
1724 /*
1725  * (defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)) && \
1726  *  defined(CONFIG_FSNOTIFY)
1727  */
1728 #else
1729 
1730 #define trace_create_maxlat_file(tr, d_tracer)				\
1731 	trace_create_file("tracing_max_latency", TRACE_MODE_WRITE,	\
1732 			  d_tracer, &tr->max_latency, &tracing_max_lat_fops)
1733 
1734 #endif
1735 
1736 #ifdef CONFIG_TRACER_MAX_TRACE
1737 /*
1738  * Copy the new maximum trace into the separate maximum-trace
1739  * structure. (this way the maximum trace is permanently saved,
1740  * for later retrieval via /sys/kernel/tracing/tracing_max_latency)
1741  */
1742 static void
1743 __update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
1744 {
1745 	struct array_buffer *trace_buf = &tr->array_buffer;
1746 	struct array_buffer *max_buf = &tr->max_buffer;
1747 	struct trace_array_cpu *data = per_cpu_ptr(trace_buf->data, cpu);
1748 	struct trace_array_cpu *max_data = per_cpu_ptr(max_buf->data, cpu);
1749 
1750 	max_buf->cpu = cpu;
1751 	max_buf->time_start = data->preempt_timestamp;
1752 
1753 	max_data->saved_latency = tr->max_latency;
1754 	max_data->critical_start = data->critical_start;
1755 	max_data->critical_end = data->critical_end;
1756 
1757 	strncpy(max_data->comm, tsk->comm, TASK_COMM_LEN);
1758 	max_data->pid = tsk->pid;
1759 	/*
1760 	 * If tsk == current, then use current_uid(), as that does not use
1761 	 * RCU. The irq tracer can be called out of RCU scope.
1762 	 */
1763 	if (tsk == current)
1764 		max_data->uid = current_uid();
1765 	else
1766 		max_data->uid = task_uid(tsk);
1767 
1768 	max_data->nice = tsk->static_prio - 20 - MAX_RT_PRIO;
1769 	max_data->policy = tsk->policy;
1770 	max_data->rt_priority = tsk->rt_priority;
1771 
1772 	/* record this tasks comm */
1773 	tracing_record_cmdline(tsk);
1774 	latency_fsnotify(tr);
1775 }
1776 
1777 /**
1778  * update_max_tr - snapshot all trace buffers from global_trace to max_tr
1779  * @tr: tracer
1780  * @tsk: the task with the latency
1781  * @cpu: The cpu that initiated the trace.
1782  * @cond_data: User data associated with a conditional snapshot
1783  *
1784  * Flip the buffers between the @tr and the max_tr and record information
1785  * about which task was the cause of this latency.
1786  */
1787 void
1788 update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu,
1789 	      void *cond_data)
1790 {
1791 	if (tr->stop_count)
1792 		return;
1793 
1794 	WARN_ON_ONCE(!irqs_disabled());
1795 
1796 	if (!tr->allocated_snapshot) {
1797 		/* Only the nop tracer should hit this when disabling */
1798 		WARN_ON_ONCE(tr->current_trace != &nop_trace);
1799 		return;
1800 	}
1801 
1802 	arch_spin_lock(&tr->max_lock);
1803 
1804 	/* Inherit the recordable setting from array_buffer */
1805 	if (ring_buffer_record_is_set_on(tr->array_buffer.buffer))
1806 		ring_buffer_record_on(tr->max_buffer.buffer);
1807 	else
1808 		ring_buffer_record_off(tr->max_buffer.buffer);
1809 
1810 #ifdef CONFIG_TRACER_SNAPSHOT
1811 	if (tr->cond_snapshot && !tr->cond_snapshot->update(tr, cond_data))
1812 		goto out_unlock;
1813 #endif
1814 	swap(tr->array_buffer.buffer, tr->max_buffer.buffer);
1815 
1816 	__update_max_tr(tr, tsk, cpu);
1817 
1818  out_unlock:
1819 	arch_spin_unlock(&tr->max_lock);
1820 }
1821 
1822 /**
1823  * update_max_tr_single - only copy one trace over, and reset the rest
1824  * @tr: tracer
1825  * @tsk: task with the latency
1826  * @cpu: the cpu of the buffer to copy.
1827  *
1828  * Flip the trace of a single CPU buffer between the @tr and the max_tr.
1829  */
1830 void
1831 update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu)
1832 {
1833 	int ret;
1834 
1835 	if (tr->stop_count)
1836 		return;
1837 
1838 	WARN_ON_ONCE(!irqs_disabled());
1839 	if (!tr->allocated_snapshot) {
1840 		/* Only the nop tracer should hit this when disabling */
1841 		WARN_ON_ONCE(tr->current_trace != &nop_trace);
1842 		return;
1843 	}
1844 
1845 	arch_spin_lock(&tr->max_lock);
1846 
1847 	ret = ring_buffer_swap_cpu(tr->max_buffer.buffer, tr->array_buffer.buffer, cpu);
1848 
1849 	if (ret == -EBUSY) {
1850 		/*
1851 		 * We failed to swap the buffer due to a commit taking
1852 		 * place on this CPU. We fail to record, but we reset
1853 		 * the max trace buffer (no one writes directly to it)
1854 		 * and flag that it failed.
1855 		 */
1856 		trace_array_printk_buf(tr->max_buffer.buffer, _THIS_IP_,
1857 			"Failed to swap buffers due to commit in progress\n");
1858 	}
1859 
1860 	WARN_ON_ONCE(ret && ret != -EAGAIN && ret != -EBUSY);
1861 
1862 	__update_max_tr(tr, tsk, cpu);
1863 	arch_spin_unlock(&tr->max_lock);
1864 }
1865 #endif /* CONFIG_TRACER_MAX_TRACE */
1866 
1867 static int wait_on_pipe(struct trace_iterator *iter, int full)
1868 {
1869 	/* Iterators are static, they should be filled or empty */
1870 	if (trace_buffer_iter(iter, iter->cpu_file))
1871 		return 0;
1872 
1873 	return ring_buffer_wait(iter->array_buffer->buffer, iter->cpu_file,
1874 				full);
1875 }
1876 
1877 #ifdef CONFIG_FTRACE_STARTUP_TEST
1878 static bool selftests_can_run;
1879 
1880 struct trace_selftests {
1881 	struct list_head		list;
1882 	struct tracer			*type;
1883 };
1884 
1885 static LIST_HEAD(postponed_selftests);
1886 
1887 static int save_selftest(struct tracer *type)
1888 {
1889 	struct trace_selftests *selftest;
1890 
1891 	selftest = kmalloc(sizeof(*selftest), GFP_KERNEL);
1892 	if (!selftest)
1893 		return -ENOMEM;
1894 
1895 	selftest->type = type;
1896 	list_add(&selftest->list, &postponed_selftests);
1897 	return 0;
1898 }
1899 
1900 static int run_tracer_selftest(struct tracer *type)
1901 {
1902 	struct trace_array *tr = &global_trace;
1903 	struct tracer *saved_tracer = tr->current_trace;
1904 	int ret;
1905 
1906 	if (!type->selftest || tracing_selftest_disabled)
1907 		return 0;
1908 
1909 	/*
1910 	 * If a tracer registers early in boot up (before scheduling is
1911 	 * initialized and such), then do not run its selftests yet.
1912 	 * Instead, run it a little later in the boot process.
1913 	 */
1914 	if (!selftests_can_run)
1915 		return save_selftest(type);
1916 
1917 	if (!tracing_is_on()) {
1918 		pr_warn("Selftest for tracer %s skipped due to tracing disabled\n",
1919 			type->name);
1920 		return 0;
1921 	}
1922 
1923 	/*
1924 	 * Run a selftest on this tracer.
1925 	 * Here we reset the trace buffer, and set the current
1926 	 * tracer to be this tracer. The tracer can then run some
1927 	 * internal tracing to verify that everything is in order.
1928 	 * If we fail, we do not register this tracer.
1929 	 */
1930 	tracing_reset_online_cpus(&tr->array_buffer);
1931 
1932 	tr->current_trace = type;
1933 
1934 #ifdef CONFIG_TRACER_MAX_TRACE
1935 	if (type->use_max_tr) {
1936 		/* If we expanded the buffers, make sure the max is expanded too */
1937 		if (ring_buffer_expanded)
1938 			ring_buffer_resize(tr->max_buffer.buffer, trace_buf_size,
1939 					   RING_BUFFER_ALL_CPUS);
1940 		tr->allocated_snapshot = true;
1941 	}
1942 #endif
1943 
1944 	/* the test is responsible for initializing and enabling */
1945 	pr_info("Testing tracer %s: ", type->name);
1946 	ret = type->selftest(type, tr);
1947 	/* the test is responsible for resetting too */
1948 	tr->current_trace = saved_tracer;
1949 	if (ret) {
1950 		printk(KERN_CONT "FAILED!\n");
1951 		/* Add the warning after printing 'FAILED' */
1952 		WARN_ON(1);
1953 		return -1;
1954 	}
1955 	/* Only reset on passing, to avoid touching corrupted buffers */
1956 	tracing_reset_online_cpus(&tr->array_buffer);
1957 
1958 #ifdef CONFIG_TRACER_MAX_TRACE
1959 	if (type->use_max_tr) {
1960 		tr->allocated_snapshot = false;
1961 
1962 		/* Shrink the max buffer again */
1963 		if (ring_buffer_expanded)
1964 			ring_buffer_resize(tr->max_buffer.buffer, 1,
1965 					   RING_BUFFER_ALL_CPUS);
1966 	}
1967 #endif
1968 
1969 	printk(KERN_CONT "PASSED\n");
1970 	return 0;
1971 }
1972 
1973 static __init int init_trace_selftests(void)
1974 {
1975 	struct trace_selftests *p, *n;
1976 	struct tracer *t, **last;
1977 	int ret;
1978 
1979 	selftests_can_run = true;
1980 
1981 	mutex_lock(&trace_types_lock);
1982 
1983 	if (list_empty(&postponed_selftests))
1984 		goto out;
1985 
1986 	pr_info("Running postponed tracer tests:\n");
1987 
1988 	tracing_selftest_running = true;
1989 	list_for_each_entry_safe(p, n, &postponed_selftests, list) {
1990 		/* This loop can take minutes when sanitizers are enabled, so
1991 		 * lets make sure we allow RCU processing.
1992 		 */
1993 		cond_resched();
1994 		ret = run_tracer_selftest(p->type);
1995 		/* If the test fails, then warn and remove from available_tracers */
1996 		if (ret < 0) {
1997 			WARN(1, "tracer: %s failed selftest, disabling\n",
1998 			     p->type->name);
1999 			last = &trace_types;
2000 			for (t = trace_types; t; t = t->next) {
2001 				if (t == p->type) {
2002 					*last = t->next;
2003 					break;
2004 				}
2005 				last = &t->next;
2006 			}
2007 		}
2008 		list_del(&p->list);
2009 		kfree(p);
2010 	}
2011 	tracing_selftest_running = false;
2012 
2013  out:
2014 	mutex_unlock(&trace_types_lock);
2015 
2016 	return 0;
2017 }
2018 core_initcall(init_trace_selftests);
2019 #else
2020 static inline int run_tracer_selftest(struct tracer *type)
2021 {
2022 	return 0;
2023 }
2024 #endif /* CONFIG_FTRACE_STARTUP_TEST */
2025 
2026 static void add_tracer_options(struct trace_array *tr, struct tracer *t);
2027 
2028 static void __init apply_trace_boot_options(void);
2029 
2030 /**
2031  * register_tracer - register a tracer with the ftrace system.
2032  * @type: the plugin for the tracer
2033  *
2034  * Register a new plugin tracer.
2035  */
2036 int __init register_tracer(struct tracer *type)
2037 {
2038 	struct tracer *t;
2039 	int ret = 0;
2040 
2041 	if (!type->name) {
2042 		pr_info("Tracer must have a name\n");
2043 		return -1;
2044 	}
2045 
2046 	if (strlen(type->name) >= MAX_TRACER_SIZE) {
2047 		pr_info("Tracer has a name longer than %d\n", MAX_TRACER_SIZE);
2048 		return -1;
2049 	}
2050 
2051 	if (security_locked_down(LOCKDOWN_TRACEFS)) {
2052 		pr_warn("Can not register tracer %s due to lockdown\n",
2053 			   type->name);
2054 		return -EPERM;
2055 	}
2056 
2057 	mutex_lock(&trace_types_lock);
2058 
2059 	tracing_selftest_running = true;
2060 
2061 	for (t = trace_types; t; t = t->next) {
2062 		if (strcmp(type->name, t->name) == 0) {
2063 			/* already found */
2064 			pr_info("Tracer %s already registered\n",
2065 				type->name);
2066 			ret = -1;
2067 			goto out;
2068 		}
2069 	}
2070 
2071 	if (!type->set_flag)
2072 		type->set_flag = &dummy_set_flag;
2073 	if (!type->flags) {
2074 		/*allocate a dummy tracer_flags*/
2075 		type->flags = kmalloc(sizeof(*type->flags), GFP_KERNEL);
2076 		if (!type->flags) {
2077 			ret = -ENOMEM;
2078 			goto out;
2079 		}
2080 		type->flags->val = 0;
2081 		type->flags->opts = dummy_tracer_opt;
2082 	} else
2083 		if (!type->flags->opts)
2084 			type->flags->opts = dummy_tracer_opt;
2085 
2086 	/* store the tracer for __set_tracer_option */
2087 	type->flags->trace = type;
2088 
2089 	ret = run_tracer_selftest(type);
2090 	if (ret < 0)
2091 		goto out;
2092 
2093 	type->next = trace_types;
2094 	trace_types = type;
2095 	add_tracer_options(&global_trace, type);
2096 
2097  out:
2098 	tracing_selftest_running = false;
2099 	mutex_unlock(&trace_types_lock);
2100 
2101 	if (ret || !default_bootup_tracer)
2102 		goto out_unlock;
2103 
2104 	if (strncmp(default_bootup_tracer, type->name, MAX_TRACER_SIZE))
2105 		goto out_unlock;
2106 
2107 	printk(KERN_INFO "Starting tracer '%s'\n", type->name);
2108 	/* Do we want this tracer to start on bootup? */
2109 	tracing_set_tracer(&global_trace, type->name);
2110 	default_bootup_tracer = NULL;
2111 
2112 	apply_trace_boot_options();
2113 
2114 	/* disable other selftests, since this will break it. */
2115 	disable_tracing_selftest("running a tracer");
2116 
2117  out_unlock:
2118 	return ret;
2119 }
2120 
2121 static void tracing_reset_cpu(struct array_buffer *buf, int cpu)
2122 {
2123 	struct trace_buffer *buffer = buf->buffer;
2124 
2125 	if (!buffer)
2126 		return;
2127 
2128 	ring_buffer_record_disable(buffer);
2129 
2130 	/* Make sure all commits have finished */
2131 	synchronize_rcu();
2132 	ring_buffer_reset_cpu(buffer, cpu);
2133 
2134 	ring_buffer_record_enable(buffer);
2135 }
2136 
2137 void tracing_reset_online_cpus(struct array_buffer *buf)
2138 {
2139 	struct trace_buffer *buffer = buf->buffer;
2140 
2141 	if (!buffer)
2142 		return;
2143 
2144 	ring_buffer_record_disable(buffer);
2145 
2146 	/* Make sure all commits have finished */
2147 	synchronize_rcu();
2148 
2149 	buf->time_start = buffer_ftrace_now(buf, buf->cpu);
2150 
2151 	ring_buffer_reset_online_cpus(buffer);
2152 
2153 	ring_buffer_record_enable(buffer);
2154 }
2155 
2156 /* Must have trace_types_lock held */
2157 void tracing_reset_all_online_cpus(void)
2158 {
2159 	struct trace_array *tr;
2160 
2161 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
2162 		if (!tr->clear_trace)
2163 			continue;
2164 		tr->clear_trace = false;
2165 		tracing_reset_online_cpus(&tr->array_buffer);
2166 #ifdef CONFIG_TRACER_MAX_TRACE
2167 		tracing_reset_online_cpus(&tr->max_buffer);
2168 #endif
2169 	}
2170 }
2171 
2172 /*
2173  * The tgid_map array maps from pid to tgid; i.e. the value stored at index i
2174  * is the tgid last observed corresponding to pid=i.
2175  */
2176 static int *tgid_map;
2177 
2178 /* The maximum valid index into tgid_map. */
2179 static size_t tgid_map_max;
2180 
2181 #define SAVED_CMDLINES_DEFAULT 128
2182 #define NO_CMDLINE_MAP UINT_MAX
2183 static arch_spinlock_t trace_cmdline_lock = __ARCH_SPIN_LOCK_UNLOCKED;
2184 struct saved_cmdlines_buffer {
2185 	unsigned map_pid_to_cmdline[PID_MAX_DEFAULT+1];
2186 	unsigned *map_cmdline_to_pid;
2187 	unsigned cmdline_num;
2188 	int cmdline_idx;
2189 	char *saved_cmdlines;
2190 };
2191 static struct saved_cmdlines_buffer *savedcmd;
2192 
2193 static inline char *get_saved_cmdlines(int idx)
2194 {
2195 	return &savedcmd->saved_cmdlines[idx * TASK_COMM_LEN];
2196 }
2197 
2198 static inline void set_cmdline(int idx, const char *cmdline)
2199 {
2200 	strncpy(get_saved_cmdlines(idx), cmdline, TASK_COMM_LEN);
2201 }
2202 
2203 static int allocate_cmdlines_buffer(unsigned int val,
2204 				    struct saved_cmdlines_buffer *s)
2205 {
2206 	s->map_cmdline_to_pid = kmalloc_array(val,
2207 					      sizeof(*s->map_cmdline_to_pid),
2208 					      GFP_KERNEL);
2209 	if (!s->map_cmdline_to_pid)
2210 		return -ENOMEM;
2211 
2212 	s->saved_cmdlines = kmalloc_array(TASK_COMM_LEN, val, GFP_KERNEL);
2213 	if (!s->saved_cmdlines) {
2214 		kfree(s->map_cmdline_to_pid);
2215 		return -ENOMEM;
2216 	}
2217 
2218 	s->cmdline_idx = 0;
2219 	s->cmdline_num = val;
2220 	memset(&s->map_pid_to_cmdline, NO_CMDLINE_MAP,
2221 	       sizeof(s->map_pid_to_cmdline));
2222 	memset(s->map_cmdline_to_pid, NO_CMDLINE_MAP,
2223 	       val * sizeof(*s->map_cmdline_to_pid));
2224 
2225 	return 0;
2226 }
2227 
2228 static int trace_create_savedcmd(void)
2229 {
2230 	int ret;
2231 
2232 	savedcmd = kmalloc(sizeof(*savedcmd), GFP_KERNEL);
2233 	if (!savedcmd)
2234 		return -ENOMEM;
2235 
2236 	ret = allocate_cmdlines_buffer(SAVED_CMDLINES_DEFAULT, savedcmd);
2237 	if (ret < 0) {
2238 		kfree(savedcmd);
2239 		savedcmd = NULL;
2240 		return -ENOMEM;
2241 	}
2242 
2243 	return 0;
2244 }
2245 
2246 int is_tracing_stopped(void)
2247 {
2248 	return global_trace.stop_count;
2249 }
2250 
2251 /**
2252  * tracing_start - quick start of the tracer
2253  *
2254  * If tracing is enabled but was stopped by tracing_stop,
2255  * this will start the tracer back up.
2256  */
2257 void tracing_start(void)
2258 {
2259 	struct trace_buffer *buffer;
2260 	unsigned long flags;
2261 
2262 	if (tracing_disabled)
2263 		return;
2264 
2265 	raw_spin_lock_irqsave(&global_trace.start_lock, flags);
2266 	if (--global_trace.stop_count) {
2267 		if (global_trace.stop_count < 0) {
2268 			/* Someone screwed up their debugging */
2269 			WARN_ON_ONCE(1);
2270 			global_trace.stop_count = 0;
2271 		}
2272 		goto out;
2273 	}
2274 
2275 	/* Prevent the buffers from switching */
2276 	arch_spin_lock(&global_trace.max_lock);
2277 
2278 	buffer = global_trace.array_buffer.buffer;
2279 	if (buffer)
2280 		ring_buffer_record_enable(buffer);
2281 
2282 #ifdef CONFIG_TRACER_MAX_TRACE
2283 	buffer = global_trace.max_buffer.buffer;
2284 	if (buffer)
2285 		ring_buffer_record_enable(buffer);
2286 #endif
2287 
2288 	arch_spin_unlock(&global_trace.max_lock);
2289 
2290  out:
2291 	raw_spin_unlock_irqrestore(&global_trace.start_lock, flags);
2292 }
2293 
2294 static void tracing_start_tr(struct trace_array *tr)
2295 {
2296 	struct trace_buffer *buffer;
2297 	unsigned long flags;
2298 
2299 	if (tracing_disabled)
2300 		return;
2301 
2302 	/* If global, we need to also start the max tracer */
2303 	if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
2304 		return tracing_start();
2305 
2306 	raw_spin_lock_irqsave(&tr->start_lock, flags);
2307 
2308 	if (--tr->stop_count) {
2309 		if (tr->stop_count < 0) {
2310 			/* Someone screwed up their debugging */
2311 			WARN_ON_ONCE(1);
2312 			tr->stop_count = 0;
2313 		}
2314 		goto out;
2315 	}
2316 
2317 	buffer = tr->array_buffer.buffer;
2318 	if (buffer)
2319 		ring_buffer_record_enable(buffer);
2320 
2321  out:
2322 	raw_spin_unlock_irqrestore(&tr->start_lock, flags);
2323 }
2324 
2325 /**
2326  * tracing_stop - quick stop of the tracer
2327  *
2328  * Light weight way to stop tracing. Use in conjunction with
2329  * tracing_start.
2330  */
2331 void tracing_stop(void)
2332 {
2333 	struct trace_buffer *buffer;
2334 	unsigned long flags;
2335 
2336 	raw_spin_lock_irqsave(&global_trace.start_lock, flags);
2337 	if (global_trace.stop_count++)
2338 		goto out;
2339 
2340 	/* Prevent the buffers from switching */
2341 	arch_spin_lock(&global_trace.max_lock);
2342 
2343 	buffer = global_trace.array_buffer.buffer;
2344 	if (buffer)
2345 		ring_buffer_record_disable(buffer);
2346 
2347 #ifdef CONFIG_TRACER_MAX_TRACE
2348 	buffer = global_trace.max_buffer.buffer;
2349 	if (buffer)
2350 		ring_buffer_record_disable(buffer);
2351 #endif
2352 
2353 	arch_spin_unlock(&global_trace.max_lock);
2354 
2355  out:
2356 	raw_spin_unlock_irqrestore(&global_trace.start_lock, flags);
2357 }
2358 
2359 static void tracing_stop_tr(struct trace_array *tr)
2360 {
2361 	struct trace_buffer *buffer;
2362 	unsigned long flags;
2363 
2364 	/* If global, we need to also stop the max tracer */
2365 	if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
2366 		return tracing_stop();
2367 
2368 	raw_spin_lock_irqsave(&tr->start_lock, flags);
2369 	if (tr->stop_count++)
2370 		goto out;
2371 
2372 	buffer = tr->array_buffer.buffer;
2373 	if (buffer)
2374 		ring_buffer_record_disable(buffer);
2375 
2376  out:
2377 	raw_spin_unlock_irqrestore(&tr->start_lock, flags);
2378 }
2379 
2380 static int trace_save_cmdline(struct task_struct *tsk)
2381 {
2382 	unsigned tpid, idx;
2383 
2384 	/* treat recording of idle task as a success */
2385 	if (!tsk->pid)
2386 		return 1;
2387 
2388 	tpid = tsk->pid & (PID_MAX_DEFAULT - 1);
2389 
2390 	/*
2391 	 * It's not the end of the world if we don't get
2392 	 * the lock, but we also don't want to spin
2393 	 * nor do we want to disable interrupts,
2394 	 * so if we miss here, then better luck next time.
2395 	 */
2396 	if (!arch_spin_trylock(&trace_cmdline_lock))
2397 		return 0;
2398 
2399 	idx = savedcmd->map_pid_to_cmdline[tpid];
2400 	if (idx == NO_CMDLINE_MAP) {
2401 		idx = (savedcmd->cmdline_idx + 1) % savedcmd->cmdline_num;
2402 
2403 		savedcmd->map_pid_to_cmdline[tpid] = idx;
2404 		savedcmd->cmdline_idx = idx;
2405 	}
2406 
2407 	savedcmd->map_cmdline_to_pid[idx] = tsk->pid;
2408 	set_cmdline(idx, tsk->comm);
2409 
2410 	arch_spin_unlock(&trace_cmdline_lock);
2411 
2412 	return 1;
2413 }
2414 
2415 static void __trace_find_cmdline(int pid, char comm[])
2416 {
2417 	unsigned map;
2418 	int tpid;
2419 
2420 	if (!pid) {
2421 		strcpy(comm, "<idle>");
2422 		return;
2423 	}
2424 
2425 	if (WARN_ON_ONCE(pid < 0)) {
2426 		strcpy(comm, "<XXX>");
2427 		return;
2428 	}
2429 
2430 	tpid = pid & (PID_MAX_DEFAULT - 1);
2431 	map = savedcmd->map_pid_to_cmdline[tpid];
2432 	if (map != NO_CMDLINE_MAP) {
2433 		tpid = savedcmd->map_cmdline_to_pid[map];
2434 		if (tpid == pid) {
2435 			strlcpy(comm, get_saved_cmdlines(map), TASK_COMM_LEN);
2436 			return;
2437 		}
2438 	}
2439 	strcpy(comm, "<...>");
2440 }
2441 
2442 void trace_find_cmdline(int pid, char comm[])
2443 {
2444 	preempt_disable();
2445 	arch_spin_lock(&trace_cmdline_lock);
2446 
2447 	__trace_find_cmdline(pid, comm);
2448 
2449 	arch_spin_unlock(&trace_cmdline_lock);
2450 	preempt_enable();
2451 }
2452 
2453 static int *trace_find_tgid_ptr(int pid)
2454 {
2455 	/*
2456 	 * Pairs with the smp_store_release in set_tracer_flag() to ensure that
2457 	 * if we observe a non-NULL tgid_map then we also observe the correct
2458 	 * tgid_map_max.
2459 	 */
2460 	int *map = smp_load_acquire(&tgid_map);
2461 
2462 	if (unlikely(!map || pid > tgid_map_max))
2463 		return NULL;
2464 
2465 	return &map[pid];
2466 }
2467 
2468 int trace_find_tgid(int pid)
2469 {
2470 	int *ptr = trace_find_tgid_ptr(pid);
2471 
2472 	return ptr ? *ptr : 0;
2473 }
2474 
2475 static int trace_save_tgid(struct task_struct *tsk)
2476 {
2477 	int *ptr;
2478 
2479 	/* treat recording of idle task as a success */
2480 	if (!tsk->pid)
2481 		return 1;
2482 
2483 	ptr = trace_find_tgid_ptr(tsk->pid);
2484 	if (!ptr)
2485 		return 0;
2486 
2487 	*ptr = tsk->tgid;
2488 	return 1;
2489 }
2490 
2491 static bool tracing_record_taskinfo_skip(int flags)
2492 {
2493 	if (unlikely(!(flags & (TRACE_RECORD_CMDLINE | TRACE_RECORD_TGID))))
2494 		return true;
2495 	if (!__this_cpu_read(trace_taskinfo_save))
2496 		return true;
2497 	return false;
2498 }
2499 
2500 /**
2501  * tracing_record_taskinfo - record the task info of a task
2502  *
2503  * @task:  task to record
2504  * @flags: TRACE_RECORD_CMDLINE for recording comm
2505  *         TRACE_RECORD_TGID for recording tgid
2506  */
2507 void tracing_record_taskinfo(struct task_struct *task, int flags)
2508 {
2509 	bool done;
2510 
2511 	if (tracing_record_taskinfo_skip(flags))
2512 		return;
2513 
2514 	/*
2515 	 * Record as much task information as possible. If some fail, continue
2516 	 * to try to record the others.
2517 	 */
2518 	done = !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(task);
2519 	done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(task);
2520 
2521 	/* If recording any information failed, retry again soon. */
2522 	if (!done)
2523 		return;
2524 
2525 	__this_cpu_write(trace_taskinfo_save, false);
2526 }
2527 
2528 /**
2529  * tracing_record_taskinfo_sched_switch - record task info for sched_switch
2530  *
2531  * @prev: previous task during sched_switch
2532  * @next: next task during sched_switch
2533  * @flags: TRACE_RECORD_CMDLINE for recording comm
2534  *         TRACE_RECORD_TGID for recording tgid
2535  */
2536 void tracing_record_taskinfo_sched_switch(struct task_struct *prev,
2537 					  struct task_struct *next, int flags)
2538 {
2539 	bool done;
2540 
2541 	if (tracing_record_taskinfo_skip(flags))
2542 		return;
2543 
2544 	/*
2545 	 * Record as much task information as possible. If some fail, continue
2546 	 * to try to record the others.
2547 	 */
2548 	done  = !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(prev);
2549 	done &= !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(next);
2550 	done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(prev);
2551 	done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(next);
2552 
2553 	/* If recording any information failed, retry again soon. */
2554 	if (!done)
2555 		return;
2556 
2557 	__this_cpu_write(trace_taskinfo_save, false);
2558 }
2559 
2560 /* Helpers to record a specific task information */
2561 void tracing_record_cmdline(struct task_struct *task)
2562 {
2563 	tracing_record_taskinfo(task, TRACE_RECORD_CMDLINE);
2564 }
2565 
2566 void tracing_record_tgid(struct task_struct *task)
2567 {
2568 	tracing_record_taskinfo(task, TRACE_RECORD_TGID);
2569 }
2570 
2571 /*
2572  * Several functions return TRACE_TYPE_PARTIAL_LINE if the trace_seq
2573  * overflowed, and TRACE_TYPE_HANDLED otherwise. This helper function
2574  * simplifies those functions and keeps them in sync.
2575  */
2576 enum print_line_t trace_handle_return(struct trace_seq *s)
2577 {
2578 	return trace_seq_has_overflowed(s) ?
2579 		TRACE_TYPE_PARTIAL_LINE : TRACE_TYPE_HANDLED;
2580 }
2581 EXPORT_SYMBOL_GPL(trace_handle_return);
2582 
2583 static unsigned short migration_disable_value(void)
2584 {
2585 #if defined(CONFIG_SMP)
2586 	return current->migration_disabled;
2587 #else
2588 	return 0;
2589 #endif
2590 }
2591 
2592 unsigned int tracing_gen_ctx_irq_test(unsigned int irqs_status)
2593 {
2594 	unsigned int trace_flags = irqs_status;
2595 	unsigned int pc;
2596 
2597 	pc = preempt_count();
2598 
2599 	if (pc & NMI_MASK)
2600 		trace_flags |= TRACE_FLAG_NMI;
2601 	if (pc & HARDIRQ_MASK)
2602 		trace_flags |= TRACE_FLAG_HARDIRQ;
2603 	if (in_serving_softirq())
2604 		trace_flags |= TRACE_FLAG_SOFTIRQ;
2605 
2606 	if (tif_need_resched())
2607 		trace_flags |= TRACE_FLAG_NEED_RESCHED;
2608 	if (test_preempt_need_resched())
2609 		trace_flags |= TRACE_FLAG_PREEMPT_RESCHED;
2610 	return (trace_flags << 16) | (min_t(unsigned int, pc & 0xff, 0xf)) |
2611 		(min_t(unsigned int, migration_disable_value(), 0xf)) << 4;
2612 }
2613 
2614 struct ring_buffer_event *
2615 trace_buffer_lock_reserve(struct trace_buffer *buffer,
2616 			  int type,
2617 			  unsigned long len,
2618 			  unsigned int trace_ctx)
2619 {
2620 	return __trace_buffer_lock_reserve(buffer, type, len, trace_ctx);
2621 }
2622 
2623 DEFINE_PER_CPU(struct ring_buffer_event *, trace_buffered_event);
2624 DEFINE_PER_CPU(int, trace_buffered_event_cnt);
2625 static int trace_buffered_event_ref;
2626 
2627 /**
2628  * trace_buffered_event_enable - enable buffering events
2629  *
2630  * When events are being filtered, it is quicker to use a temporary
2631  * buffer to write the event data into if there's a likely chance
2632  * that it will not be committed. The discard of the ring buffer
2633  * is not as fast as committing, and is much slower than copying
2634  * a commit.
2635  *
2636  * When an event is to be filtered, allocate per cpu buffers to
2637  * write the event data into, and if the event is filtered and discarded
2638  * it is simply dropped, otherwise, the entire data is to be committed
2639  * in one shot.
2640  */
2641 void trace_buffered_event_enable(void)
2642 {
2643 	struct ring_buffer_event *event;
2644 	struct page *page;
2645 	int cpu;
2646 
2647 	WARN_ON_ONCE(!mutex_is_locked(&event_mutex));
2648 
2649 	if (trace_buffered_event_ref++)
2650 		return;
2651 
2652 	for_each_tracing_cpu(cpu) {
2653 		page = alloc_pages_node(cpu_to_node(cpu),
2654 					GFP_KERNEL | __GFP_NORETRY, 0);
2655 		if (!page)
2656 			goto failed;
2657 
2658 		event = page_address(page);
2659 		memset(event, 0, sizeof(*event));
2660 
2661 		per_cpu(trace_buffered_event, cpu) = event;
2662 
2663 		preempt_disable();
2664 		if (cpu == smp_processor_id() &&
2665 		    __this_cpu_read(trace_buffered_event) !=
2666 		    per_cpu(trace_buffered_event, cpu))
2667 			WARN_ON_ONCE(1);
2668 		preempt_enable();
2669 	}
2670 
2671 	return;
2672  failed:
2673 	trace_buffered_event_disable();
2674 }
2675 
2676 static void enable_trace_buffered_event(void *data)
2677 {
2678 	/* Probably not needed, but do it anyway */
2679 	smp_rmb();
2680 	this_cpu_dec(trace_buffered_event_cnt);
2681 }
2682 
2683 static void disable_trace_buffered_event(void *data)
2684 {
2685 	this_cpu_inc(trace_buffered_event_cnt);
2686 }
2687 
2688 /**
2689  * trace_buffered_event_disable - disable buffering events
2690  *
2691  * When a filter is removed, it is faster to not use the buffered
2692  * events, and to commit directly into the ring buffer. Free up
2693  * the temp buffers when there are no more users. This requires
2694  * special synchronization with current events.
2695  */
2696 void trace_buffered_event_disable(void)
2697 {
2698 	int cpu;
2699 
2700 	WARN_ON_ONCE(!mutex_is_locked(&event_mutex));
2701 
2702 	if (WARN_ON_ONCE(!trace_buffered_event_ref))
2703 		return;
2704 
2705 	if (--trace_buffered_event_ref)
2706 		return;
2707 
2708 	preempt_disable();
2709 	/* For each CPU, set the buffer as used. */
2710 	smp_call_function_many(tracing_buffer_mask,
2711 			       disable_trace_buffered_event, NULL, 1);
2712 	preempt_enable();
2713 
2714 	/* Wait for all current users to finish */
2715 	synchronize_rcu();
2716 
2717 	for_each_tracing_cpu(cpu) {
2718 		free_page((unsigned long)per_cpu(trace_buffered_event, cpu));
2719 		per_cpu(trace_buffered_event, cpu) = NULL;
2720 	}
2721 	/*
2722 	 * Make sure trace_buffered_event is NULL before clearing
2723 	 * trace_buffered_event_cnt.
2724 	 */
2725 	smp_wmb();
2726 
2727 	preempt_disable();
2728 	/* Do the work on each cpu */
2729 	smp_call_function_many(tracing_buffer_mask,
2730 			       enable_trace_buffered_event, NULL, 1);
2731 	preempt_enable();
2732 }
2733 
2734 static struct trace_buffer *temp_buffer;
2735 
2736 struct ring_buffer_event *
2737 trace_event_buffer_lock_reserve(struct trace_buffer **current_rb,
2738 			  struct trace_event_file *trace_file,
2739 			  int type, unsigned long len,
2740 			  unsigned int trace_ctx)
2741 {
2742 	struct ring_buffer_event *entry;
2743 	struct trace_array *tr = trace_file->tr;
2744 	int val;
2745 
2746 	*current_rb = tr->array_buffer.buffer;
2747 
2748 	if (!tr->no_filter_buffering_ref &&
2749 	    (trace_file->flags & (EVENT_FILE_FL_SOFT_DISABLED | EVENT_FILE_FL_FILTERED)) &&
2750 	    (entry = this_cpu_read(trace_buffered_event))) {
2751 		/*
2752 		 * Filtering is on, so try to use the per cpu buffer first.
2753 		 * This buffer will simulate a ring_buffer_event,
2754 		 * where the type_len is zero and the array[0] will
2755 		 * hold the full length.
2756 		 * (see include/linux/ring-buffer.h for details on
2757 		 *  how the ring_buffer_event is structured).
2758 		 *
2759 		 * Using a temp buffer during filtering and copying it
2760 		 * on a matched filter is quicker than writing directly
2761 		 * into the ring buffer and then discarding it when
2762 		 * it doesn't match. That is because the discard
2763 		 * requires several atomic operations to get right.
2764 		 * Copying on match and doing nothing on a failed match
2765 		 * is still quicker than no copy on match, but having
2766 		 * to discard out of the ring buffer on a failed match.
2767 		 */
2768 		int max_len = PAGE_SIZE - struct_size(entry, array, 1);
2769 
2770 		val = this_cpu_inc_return(trace_buffered_event_cnt);
2771 
2772 		/*
2773 		 * Preemption is disabled, but interrupts and NMIs
2774 		 * can still come in now. If that happens after
2775 		 * the above increment, then it will have to go
2776 		 * back to the old method of allocating the event
2777 		 * on the ring buffer, and if the filter fails, it
2778 		 * will have to call ring_buffer_discard_commit()
2779 		 * to remove it.
2780 		 *
2781 		 * Need to also check the unlikely case that the
2782 		 * length is bigger than the temp buffer size.
2783 		 * If that happens, then the reserve is pretty much
2784 		 * guaranteed to fail, as the ring buffer currently
2785 		 * only allows events less than a page. But that may
2786 		 * change in the future, so let the ring buffer reserve
2787 		 * handle the failure in that case.
2788 		 */
2789 		if (val == 1 && likely(len <= max_len)) {
2790 			trace_event_setup(entry, type, trace_ctx);
2791 			entry->array[0] = len;
2792 			return entry;
2793 		}
2794 		this_cpu_dec(trace_buffered_event_cnt);
2795 	}
2796 
2797 	entry = __trace_buffer_lock_reserve(*current_rb, type, len,
2798 					    trace_ctx);
2799 	/*
2800 	 * If tracing is off, but we have triggers enabled
2801 	 * we still need to look at the event data. Use the temp_buffer
2802 	 * to store the trace event for the trigger to use. It's recursive
2803 	 * safe and will not be recorded anywhere.
2804 	 */
2805 	if (!entry && trace_file->flags & EVENT_FILE_FL_TRIGGER_COND) {
2806 		*current_rb = temp_buffer;
2807 		entry = __trace_buffer_lock_reserve(*current_rb, type, len,
2808 						    trace_ctx);
2809 	}
2810 	return entry;
2811 }
2812 EXPORT_SYMBOL_GPL(trace_event_buffer_lock_reserve);
2813 
2814 static DEFINE_SPINLOCK(tracepoint_iter_lock);
2815 static DEFINE_MUTEX(tracepoint_printk_mutex);
2816 
2817 static void output_printk(struct trace_event_buffer *fbuffer)
2818 {
2819 	struct trace_event_call *event_call;
2820 	struct trace_event_file *file;
2821 	struct trace_event *event;
2822 	unsigned long flags;
2823 	struct trace_iterator *iter = tracepoint_print_iter;
2824 
2825 	/* We should never get here if iter is NULL */
2826 	if (WARN_ON_ONCE(!iter))
2827 		return;
2828 
2829 	event_call = fbuffer->trace_file->event_call;
2830 	if (!event_call || !event_call->event.funcs ||
2831 	    !event_call->event.funcs->trace)
2832 		return;
2833 
2834 	file = fbuffer->trace_file;
2835 	if (test_bit(EVENT_FILE_FL_SOFT_DISABLED_BIT, &file->flags) ||
2836 	    (unlikely(file->flags & EVENT_FILE_FL_FILTERED) &&
2837 	     !filter_match_preds(file->filter, fbuffer->entry)))
2838 		return;
2839 
2840 	event = &fbuffer->trace_file->event_call->event;
2841 
2842 	spin_lock_irqsave(&tracepoint_iter_lock, flags);
2843 	trace_seq_init(&iter->seq);
2844 	iter->ent = fbuffer->entry;
2845 	event_call->event.funcs->trace(iter, 0, event);
2846 	trace_seq_putc(&iter->seq, 0);
2847 	printk("%s", iter->seq.buffer);
2848 
2849 	spin_unlock_irqrestore(&tracepoint_iter_lock, flags);
2850 }
2851 
2852 int tracepoint_printk_sysctl(struct ctl_table *table, int write,
2853 			     void *buffer, size_t *lenp,
2854 			     loff_t *ppos)
2855 {
2856 	int save_tracepoint_printk;
2857 	int ret;
2858 
2859 	mutex_lock(&tracepoint_printk_mutex);
2860 	save_tracepoint_printk = tracepoint_printk;
2861 
2862 	ret = proc_dointvec(table, write, buffer, lenp, ppos);
2863 
2864 	/*
2865 	 * This will force exiting early, as tracepoint_printk
2866 	 * is always zero when tracepoint_printk_iter is not allocated
2867 	 */
2868 	if (!tracepoint_print_iter)
2869 		tracepoint_printk = 0;
2870 
2871 	if (save_tracepoint_printk == tracepoint_printk)
2872 		goto out;
2873 
2874 	if (tracepoint_printk)
2875 		static_key_enable(&tracepoint_printk_key.key);
2876 	else
2877 		static_key_disable(&tracepoint_printk_key.key);
2878 
2879  out:
2880 	mutex_unlock(&tracepoint_printk_mutex);
2881 
2882 	return ret;
2883 }
2884 
2885 void trace_event_buffer_commit(struct trace_event_buffer *fbuffer)
2886 {
2887 	enum event_trigger_type tt = ETT_NONE;
2888 	struct trace_event_file *file = fbuffer->trace_file;
2889 
2890 	if (__event_trigger_test_discard(file, fbuffer->buffer, fbuffer->event,
2891 			fbuffer->entry, &tt))
2892 		goto discard;
2893 
2894 	if (static_key_false(&tracepoint_printk_key.key))
2895 		output_printk(fbuffer);
2896 
2897 	if (static_branch_unlikely(&trace_event_exports_enabled))
2898 		ftrace_exports(fbuffer->event, TRACE_EXPORT_EVENT);
2899 
2900 	trace_buffer_unlock_commit_regs(file->tr, fbuffer->buffer,
2901 			fbuffer->event, fbuffer->trace_ctx, fbuffer->regs);
2902 
2903 discard:
2904 	if (tt)
2905 		event_triggers_post_call(file, tt);
2906 
2907 }
2908 EXPORT_SYMBOL_GPL(trace_event_buffer_commit);
2909 
2910 /*
2911  * Skip 3:
2912  *
2913  *   trace_buffer_unlock_commit_regs()
2914  *   trace_event_buffer_commit()
2915  *   trace_event_raw_event_xxx()
2916  */
2917 # define STACK_SKIP 3
2918 
2919 void trace_buffer_unlock_commit_regs(struct trace_array *tr,
2920 				     struct trace_buffer *buffer,
2921 				     struct ring_buffer_event *event,
2922 				     unsigned int trace_ctx,
2923 				     struct pt_regs *regs)
2924 {
2925 	__buffer_unlock_commit(buffer, event);
2926 
2927 	/*
2928 	 * If regs is not set, then skip the necessary functions.
2929 	 * Note, we can still get here via blktrace, wakeup tracer
2930 	 * and mmiotrace, but that's ok if they lose a function or
2931 	 * two. They are not that meaningful.
2932 	 */
2933 	ftrace_trace_stack(tr, buffer, trace_ctx, regs ? 0 : STACK_SKIP, regs);
2934 	ftrace_trace_userstack(tr, buffer, trace_ctx);
2935 }
2936 
2937 /*
2938  * Similar to trace_buffer_unlock_commit_regs() but do not dump stack.
2939  */
2940 void
2941 trace_buffer_unlock_commit_nostack(struct trace_buffer *buffer,
2942 				   struct ring_buffer_event *event)
2943 {
2944 	__buffer_unlock_commit(buffer, event);
2945 }
2946 
2947 void
2948 trace_function(struct trace_array *tr, unsigned long ip, unsigned long
2949 	       parent_ip, unsigned int trace_ctx)
2950 {
2951 	struct trace_event_call *call = &event_function;
2952 	struct trace_buffer *buffer = tr->array_buffer.buffer;
2953 	struct ring_buffer_event *event;
2954 	struct ftrace_entry *entry;
2955 
2956 	event = __trace_buffer_lock_reserve(buffer, TRACE_FN, sizeof(*entry),
2957 					    trace_ctx);
2958 	if (!event)
2959 		return;
2960 	entry	= ring_buffer_event_data(event);
2961 	entry->ip			= ip;
2962 	entry->parent_ip		= parent_ip;
2963 
2964 	if (!call_filter_check_discard(call, entry, buffer, event)) {
2965 		if (static_branch_unlikely(&trace_function_exports_enabled))
2966 			ftrace_exports(event, TRACE_EXPORT_FUNCTION);
2967 		__buffer_unlock_commit(buffer, event);
2968 	}
2969 }
2970 
2971 #ifdef CONFIG_STACKTRACE
2972 
2973 /* Allow 4 levels of nesting: normal, softirq, irq, NMI */
2974 #define FTRACE_KSTACK_NESTING	4
2975 
2976 #define FTRACE_KSTACK_ENTRIES	(PAGE_SIZE / FTRACE_KSTACK_NESTING)
2977 
2978 struct ftrace_stack {
2979 	unsigned long		calls[FTRACE_KSTACK_ENTRIES];
2980 };
2981 
2982 
2983 struct ftrace_stacks {
2984 	struct ftrace_stack	stacks[FTRACE_KSTACK_NESTING];
2985 };
2986 
2987 static DEFINE_PER_CPU(struct ftrace_stacks, ftrace_stacks);
2988 static DEFINE_PER_CPU(int, ftrace_stack_reserve);
2989 
2990 static void __ftrace_trace_stack(struct trace_buffer *buffer,
2991 				 unsigned int trace_ctx,
2992 				 int skip, struct pt_regs *regs)
2993 {
2994 	struct trace_event_call *call = &event_kernel_stack;
2995 	struct ring_buffer_event *event;
2996 	unsigned int size, nr_entries;
2997 	struct ftrace_stack *fstack;
2998 	struct stack_entry *entry;
2999 	int stackidx;
3000 
3001 	/*
3002 	 * Add one, for this function and the call to save_stack_trace()
3003 	 * If regs is set, then these functions will not be in the way.
3004 	 */
3005 #ifndef CONFIG_UNWINDER_ORC
3006 	if (!regs)
3007 		skip++;
3008 #endif
3009 
3010 	preempt_disable_notrace();
3011 
3012 	stackidx = __this_cpu_inc_return(ftrace_stack_reserve) - 1;
3013 
3014 	/* This should never happen. If it does, yell once and skip */
3015 	if (WARN_ON_ONCE(stackidx >= FTRACE_KSTACK_NESTING))
3016 		goto out;
3017 
3018 	/*
3019 	 * The above __this_cpu_inc_return() is 'atomic' cpu local. An
3020 	 * interrupt will either see the value pre increment or post
3021 	 * increment. If the interrupt happens pre increment it will have
3022 	 * restored the counter when it returns.  We just need a barrier to
3023 	 * keep gcc from moving things around.
3024 	 */
3025 	barrier();
3026 
3027 	fstack = this_cpu_ptr(ftrace_stacks.stacks) + stackidx;
3028 	size = ARRAY_SIZE(fstack->calls);
3029 
3030 	if (regs) {
3031 		nr_entries = stack_trace_save_regs(regs, fstack->calls,
3032 						   size, skip);
3033 	} else {
3034 		nr_entries = stack_trace_save(fstack->calls, size, skip);
3035 	}
3036 
3037 	size = nr_entries * sizeof(unsigned long);
3038 	event = __trace_buffer_lock_reserve(buffer, TRACE_STACK,
3039 				    (sizeof(*entry) - sizeof(entry->caller)) + size,
3040 				    trace_ctx);
3041 	if (!event)
3042 		goto out;
3043 	entry = ring_buffer_event_data(event);
3044 
3045 	memcpy(&entry->caller, fstack->calls, size);
3046 	entry->size = nr_entries;
3047 
3048 	if (!call_filter_check_discard(call, entry, buffer, event))
3049 		__buffer_unlock_commit(buffer, event);
3050 
3051  out:
3052 	/* Again, don't let gcc optimize things here */
3053 	barrier();
3054 	__this_cpu_dec(ftrace_stack_reserve);
3055 	preempt_enable_notrace();
3056 
3057 }
3058 
3059 static inline void ftrace_trace_stack(struct trace_array *tr,
3060 				      struct trace_buffer *buffer,
3061 				      unsigned int trace_ctx,
3062 				      int skip, struct pt_regs *regs)
3063 {
3064 	if (!(tr->trace_flags & TRACE_ITER_STACKTRACE))
3065 		return;
3066 
3067 	__ftrace_trace_stack(buffer, trace_ctx, skip, regs);
3068 }
3069 
3070 void __trace_stack(struct trace_array *tr, unsigned int trace_ctx,
3071 		   int skip)
3072 {
3073 	struct trace_buffer *buffer = tr->array_buffer.buffer;
3074 
3075 	if (rcu_is_watching()) {
3076 		__ftrace_trace_stack(buffer, trace_ctx, skip, NULL);
3077 		return;
3078 	}
3079 
3080 	/*
3081 	 * When an NMI triggers, RCU is enabled via rcu_nmi_enter(),
3082 	 * but if the above rcu_is_watching() failed, then the NMI
3083 	 * triggered someplace critical, and rcu_irq_enter() should
3084 	 * not be called from NMI.
3085 	 */
3086 	if (unlikely(in_nmi()))
3087 		return;
3088 
3089 	rcu_irq_enter_irqson();
3090 	__ftrace_trace_stack(buffer, trace_ctx, skip, NULL);
3091 	rcu_irq_exit_irqson();
3092 }
3093 
3094 /**
3095  * trace_dump_stack - record a stack back trace in the trace buffer
3096  * @skip: Number of functions to skip (helper handlers)
3097  */
3098 void trace_dump_stack(int skip)
3099 {
3100 	if (tracing_disabled || tracing_selftest_running)
3101 		return;
3102 
3103 #ifndef CONFIG_UNWINDER_ORC
3104 	/* Skip 1 to skip this function. */
3105 	skip++;
3106 #endif
3107 	__ftrace_trace_stack(global_trace.array_buffer.buffer,
3108 			     tracing_gen_ctx(), skip, NULL);
3109 }
3110 EXPORT_SYMBOL_GPL(trace_dump_stack);
3111 
3112 #ifdef CONFIG_USER_STACKTRACE_SUPPORT
3113 static DEFINE_PER_CPU(int, user_stack_count);
3114 
3115 static void
3116 ftrace_trace_userstack(struct trace_array *tr,
3117 		       struct trace_buffer *buffer, unsigned int trace_ctx)
3118 {
3119 	struct trace_event_call *call = &event_user_stack;
3120 	struct ring_buffer_event *event;
3121 	struct userstack_entry *entry;
3122 
3123 	if (!(tr->trace_flags & TRACE_ITER_USERSTACKTRACE))
3124 		return;
3125 
3126 	/*
3127 	 * NMIs can not handle page faults, even with fix ups.
3128 	 * The save user stack can (and often does) fault.
3129 	 */
3130 	if (unlikely(in_nmi()))
3131 		return;
3132 
3133 	/*
3134 	 * prevent recursion, since the user stack tracing may
3135 	 * trigger other kernel events.
3136 	 */
3137 	preempt_disable();
3138 	if (__this_cpu_read(user_stack_count))
3139 		goto out;
3140 
3141 	__this_cpu_inc(user_stack_count);
3142 
3143 	event = __trace_buffer_lock_reserve(buffer, TRACE_USER_STACK,
3144 					    sizeof(*entry), trace_ctx);
3145 	if (!event)
3146 		goto out_drop_count;
3147 	entry	= ring_buffer_event_data(event);
3148 
3149 	entry->tgid		= current->tgid;
3150 	memset(&entry->caller, 0, sizeof(entry->caller));
3151 
3152 	stack_trace_save_user(entry->caller, FTRACE_STACK_ENTRIES);
3153 	if (!call_filter_check_discard(call, entry, buffer, event))
3154 		__buffer_unlock_commit(buffer, event);
3155 
3156  out_drop_count:
3157 	__this_cpu_dec(user_stack_count);
3158  out:
3159 	preempt_enable();
3160 }
3161 #else /* CONFIG_USER_STACKTRACE_SUPPORT */
3162 static void ftrace_trace_userstack(struct trace_array *tr,
3163 				   struct trace_buffer *buffer,
3164 				   unsigned int trace_ctx)
3165 {
3166 }
3167 #endif /* !CONFIG_USER_STACKTRACE_SUPPORT */
3168 
3169 #endif /* CONFIG_STACKTRACE */
3170 
3171 static inline void
3172 func_repeats_set_delta_ts(struct func_repeats_entry *entry,
3173 			  unsigned long long delta)
3174 {
3175 	entry->bottom_delta_ts = delta & U32_MAX;
3176 	entry->top_delta_ts = (delta >> 32);
3177 }
3178 
3179 void trace_last_func_repeats(struct trace_array *tr,
3180 			     struct trace_func_repeats *last_info,
3181 			     unsigned int trace_ctx)
3182 {
3183 	struct trace_buffer *buffer = tr->array_buffer.buffer;
3184 	struct func_repeats_entry *entry;
3185 	struct ring_buffer_event *event;
3186 	u64 delta;
3187 
3188 	event = __trace_buffer_lock_reserve(buffer, TRACE_FUNC_REPEATS,
3189 					    sizeof(*entry), trace_ctx);
3190 	if (!event)
3191 		return;
3192 
3193 	delta = ring_buffer_event_time_stamp(buffer, event) -
3194 		last_info->ts_last_call;
3195 
3196 	entry = ring_buffer_event_data(event);
3197 	entry->ip = last_info->ip;
3198 	entry->parent_ip = last_info->parent_ip;
3199 	entry->count = last_info->count;
3200 	func_repeats_set_delta_ts(entry, delta);
3201 
3202 	__buffer_unlock_commit(buffer, event);
3203 }
3204 
3205 /* created for use with alloc_percpu */
3206 struct trace_buffer_struct {
3207 	int nesting;
3208 	char buffer[4][TRACE_BUF_SIZE];
3209 };
3210 
3211 static struct trace_buffer_struct *trace_percpu_buffer;
3212 
3213 /*
3214  * This allows for lockless recording.  If we're nested too deeply, then
3215  * this returns NULL.
3216  */
3217 static char *get_trace_buf(void)
3218 {
3219 	struct trace_buffer_struct *buffer = this_cpu_ptr(trace_percpu_buffer);
3220 
3221 	if (!buffer || buffer->nesting >= 4)
3222 		return NULL;
3223 
3224 	buffer->nesting++;
3225 
3226 	/* Interrupts must see nesting incremented before we use the buffer */
3227 	barrier();
3228 	return &buffer->buffer[buffer->nesting - 1][0];
3229 }
3230 
3231 static void put_trace_buf(void)
3232 {
3233 	/* Don't let the decrement of nesting leak before this */
3234 	barrier();
3235 	this_cpu_dec(trace_percpu_buffer->nesting);
3236 }
3237 
3238 static int alloc_percpu_trace_buffer(void)
3239 {
3240 	struct trace_buffer_struct *buffers;
3241 
3242 	if (trace_percpu_buffer)
3243 		return 0;
3244 
3245 	buffers = alloc_percpu(struct trace_buffer_struct);
3246 	if (MEM_FAIL(!buffers, "Could not allocate percpu trace_printk buffer"))
3247 		return -ENOMEM;
3248 
3249 	trace_percpu_buffer = buffers;
3250 	return 0;
3251 }
3252 
3253 static int buffers_allocated;
3254 
3255 void trace_printk_init_buffers(void)
3256 {
3257 	if (buffers_allocated)
3258 		return;
3259 
3260 	if (alloc_percpu_trace_buffer())
3261 		return;
3262 
3263 	/* trace_printk() is for debug use only. Don't use it in production. */
3264 
3265 	pr_warn("\n");
3266 	pr_warn("**********************************************************\n");
3267 	pr_warn("**   NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE   **\n");
3268 	pr_warn("**                                                      **\n");
3269 	pr_warn("** trace_printk() being used. Allocating extra memory.  **\n");
3270 	pr_warn("**                                                      **\n");
3271 	pr_warn("** This means that this is a DEBUG kernel and it is     **\n");
3272 	pr_warn("** unsafe for production use.                           **\n");
3273 	pr_warn("**                                                      **\n");
3274 	pr_warn("** If you see this message and you are not debugging    **\n");
3275 	pr_warn("** the kernel, report this immediately to your vendor!  **\n");
3276 	pr_warn("**                                                      **\n");
3277 	pr_warn("**   NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE   **\n");
3278 	pr_warn("**********************************************************\n");
3279 
3280 	/* Expand the buffers to set size */
3281 	tracing_update_buffers();
3282 
3283 	buffers_allocated = 1;
3284 
3285 	/*
3286 	 * trace_printk_init_buffers() can be called by modules.
3287 	 * If that happens, then we need to start cmdline recording
3288 	 * directly here. If the global_trace.buffer is already
3289 	 * allocated here, then this was called by module code.
3290 	 */
3291 	if (global_trace.array_buffer.buffer)
3292 		tracing_start_cmdline_record();
3293 }
3294 EXPORT_SYMBOL_GPL(trace_printk_init_buffers);
3295 
3296 void trace_printk_start_comm(void)
3297 {
3298 	/* Start tracing comms if trace printk is set */
3299 	if (!buffers_allocated)
3300 		return;
3301 	tracing_start_cmdline_record();
3302 }
3303 
3304 static void trace_printk_start_stop_comm(int enabled)
3305 {
3306 	if (!buffers_allocated)
3307 		return;
3308 
3309 	if (enabled)
3310 		tracing_start_cmdline_record();
3311 	else
3312 		tracing_stop_cmdline_record();
3313 }
3314 
3315 /**
3316  * trace_vbprintk - write binary msg to tracing buffer
3317  * @ip:    The address of the caller
3318  * @fmt:   The string format to write to the buffer
3319  * @args:  Arguments for @fmt
3320  */
3321 int trace_vbprintk(unsigned long ip, const char *fmt, va_list args)
3322 {
3323 	struct trace_event_call *call = &event_bprint;
3324 	struct ring_buffer_event *event;
3325 	struct trace_buffer *buffer;
3326 	struct trace_array *tr = &global_trace;
3327 	struct bprint_entry *entry;
3328 	unsigned int trace_ctx;
3329 	char *tbuffer;
3330 	int len = 0, size;
3331 
3332 	if (unlikely(tracing_selftest_running || tracing_disabled))
3333 		return 0;
3334 
3335 	/* Don't pollute graph traces with trace_vprintk internals */
3336 	pause_graph_tracing();
3337 
3338 	trace_ctx = tracing_gen_ctx();
3339 	preempt_disable_notrace();
3340 
3341 	tbuffer = get_trace_buf();
3342 	if (!tbuffer) {
3343 		len = 0;
3344 		goto out_nobuffer;
3345 	}
3346 
3347 	len = vbin_printf((u32 *)tbuffer, TRACE_BUF_SIZE/sizeof(int), fmt, args);
3348 
3349 	if (len > TRACE_BUF_SIZE/sizeof(int) || len < 0)
3350 		goto out_put;
3351 
3352 	size = sizeof(*entry) + sizeof(u32) * len;
3353 	buffer = tr->array_buffer.buffer;
3354 	ring_buffer_nest_start(buffer);
3355 	event = __trace_buffer_lock_reserve(buffer, TRACE_BPRINT, size,
3356 					    trace_ctx);
3357 	if (!event)
3358 		goto out;
3359 	entry = ring_buffer_event_data(event);
3360 	entry->ip			= ip;
3361 	entry->fmt			= fmt;
3362 
3363 	memcpy(entry->buf, tbuffer, sizeof(u32) * len);
3364 	if (!call_filter_check_discard(call, entry, buffer, event)) {
3365 		__buffer_unlock_commit(buffer, event);
3366 		ftrace_trace_stack(tr, buffer, trace_ctx, 6, NULL);
3367 	}
3368 
3369 out:
3370 	ring_buffer_nest_end(buffer);
3371 out_put:
3372 	put_trace_buf();
3373 
3374 out_nobuffer:
3375 	preempt_enable_notrace();
3376 	unpause_graph_tracing();
3377 
3378 	return len;
3379 }
3380 EXPORT_SYMBOL_GPL(trace_vbprintk);
3381 
3382 __printf(3, 0)
3383 static int
3384 __trace_array_vprintk(struct trace_buffer *buffer,
3385 		      unsigned long ip, const char *fmt, va_list args)
3386 {
3387 	struct trace_event_call *call = &event_print;
3388 	struct ring_buffer_event *event;
3389 	int len = 0, size;
3390 	struct print_entry *entry;
3391 	unsigned int trace_ctx;
3392 	char *tbuffer;
3393 
3394 	if (tracing_disabled || tracing_selftest_running)
3395 		return 0;
3396 
3397 	/* Don't pollute graph traces with trace_vprintk internals */
3398 	pause_graph_tracing();
3399 
3400 	trace_ctx = tracing_gen_ctx();
3401 	preempt_disable_notrace();
3402 
3403 
3404 	tbuffer = get_trace_buf();
3405 	if (!tbuffer) {
3406 		len = 0;
3407 		goto out_nobuffer;
3408 	}
3409 
3410 	len = vscnprintf(tbuffer, TRACE_BUF_SIZE, fmt, args);
3411 
3412 	size = sizeof(*entry) + len + 1;
3413 	ring_buffer_nest_start(buffer);
3414 	event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
3415 					    trace_ctx);
3416 	if (!event)
3417 		goto out;
3418 	entry = ring_buffer_event_data(event);
3419 	entry->ip = ip;
3420 
3421 	memcpy(&entry->buf, tbuffer, len + 1);
3422 	if (!call_filter_check_discard(call, entry, buffer, event)) {
3423 		__buffer_unlock_commit(buffer, event);
3424 		ftrace_trace_stack(&global_trace, buffer, trace_ctx, 6, NULL);
3425 	}
3426 
3427 out:
3428 	ring_buffer_nest_end(buffer);
3429 	put_trace_buf();
3430 
3431 out_nobuffer:
3432 	preempt_enable_notrace();
3433 	unpause_graph_tracing();
3434 
3435 	return len;
3436 }
3437 
3438 __printf(3, 0)
3439 int trace_array_vprintk(struct trace_array *tr,
3440 			unsigned long ip, const char *fmt, va_list args)
3441 {
3442 	return __trace_array_vprintk(tr->array_buffer.buffer, ip, fmt, args);
3443 }
3444 
3445 /**
3446  * trace_array_printk - Print a message to a specific instance
3447  * @tr: The instance trace_array descriptor
3448  * @ip: The instruction pointer that this is called from.
3449  * @fmt: The format to print (printf format)
3450  *
3451  * If a subsystem sets up its own instance, they have the right to
3452  * printk strings into their tracing instance buffer using this
3453  * function. Note, this function will not write into the top level
3454  * buffer (use trace_printk() for that), as writing into the top level
3455  * buffer should only have events that can be individually disabled.
3456  * trace_printk() is only used for debugging a kernel, and should not
3457  * be ever incorporated in normal use.
3458  *
3459  * trace_array_printk() can be used, as it will not add noise to the
3460  * top level tracing buffer.
3461  *
3462  * Note, trace_array_init_printk() must be called on @tr before this
3463  * can be used.
3464  */
3465 __printf(3, 0)
3466 int trace_array_printk(struct trace_array *tr,
3467 		       unsigned long ip, const char *fmt, ...)
3468 {
3469 	int ret;
3470 	va_list ap;
3471 
3472 	if (!tr)
3473 		return -ENOENT;
3474 
3475 	/* This is only allowed for created instances */
3476 	if (tr == &global_trace)
3477 		return 0;
3478 
3479 	if (!(tr->trace_flags & TRACE_ITER_PRINTK))
3480 		return 0;
3481 
3482 	va_start(ap, fmt);
3483 	ret = trace_array_vprintk(tr, ip, fmt, ap);
3484 	va_end(ap);
3485 	return ret;
3486 }
3487 EXPORT_SYMBOL_GPL(trace_array_printk);
3488 
3489 /**
3490  * trace_array_init_printk - Initialize buffers for trace_array_printk()
3491  * @tr: The trace array to initialize the buffers for
3492  *
3493  * As trace_array_printk() only writes into instances, they are OK to
3494  * have in the kernel (unlike trace_printk()). This needs to be called
3495  * before trace_array_printk() can be used on a trace_array.
3496  */
3497 int trace_array_init_printk(struct trace_array *tr)
3498 {
3499 	if (!tr)
3500 		return -ENOENT;
3501 
3502 	/* This is only allowed for created instances */
3503 	if (tr == &global_trace)
3504 		return -EINVAL;
3505 
3506 	return alloc_percpu_trace_buffer();
3507 }
3508 EXPORT_SYMBOL_GPL(trace_array_init_printk);
3509 
3510 __printf(3, 4)
3511 int trace_array_printk_buf(struct trace_buffer *buffer,
3512 			   unsigned long ip, const char *fmt, ...)
3513 {
3514 	int ret;
3515 	va_list ap;
3516 
3517 	if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
3518 		return 0;
3519 
3520 	va_start(ap, fmt);
3521 	ret = __trace_array_vprintk(buffer, ip, fmt, ap);
3522 	va_end(ap);
3523 	return ret;
3524 }
3525 
3526 __printf(2, 0)
3527 int trace_vprintk(unsigned long ip, const char *fmt, va_list args)
3528 {
3529 	return trace_array_vprintk(&global_trace, ip, fmt, args);
3530 }
3531 EXPORT_SYMBOL_GPL(trace_vprintk);
3532 
3533 static void trace_iterator_increment(struct trace_iterator *iter)
3534 {
3535 	struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, iter->cpu);
3536 
3537 	iter->idx++;
3538 	if (buf_iter)
3539 		ring_buffer_iter_advance(buf_iter);
3540 }
3541 
3542 static struct trace_entry *
3543 peek_next_entry(struct trace_iterator *iter, int cpu, u64 *ts,
3544 		unsigned long *lost_events)
3545 {
3546 	struct ring_buffer_event *event;
3547 	struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, cpu);
3548 
3549 	if (buf_iter) {
3550 		event = ring_buffer_iter_peek(buf_iter, ts);
3551 		if (lost_events)
3552 			*lost_events = ring_buffer_iter_dropped(buf_iter) ?
3553 				(unsigned long)-1 : 0;
3554 	} else {
3555 		event = ring_buffer_peek(iter->array_buffer->buffer, cpu, ts,
3556 					 lost_events);
3557 	}
3558 
3559 	if (event) {
3560 		iter->ent_size = ring_buffer_event_length(event);
3561 		return ring_buffer_event_data(event);
3562 	}
3563 	iter->ent_size = 0;
3564 	return NULL;
3565 }
3566 
3567 static struct trace_entry *
3568 __find_next_entry(struct trace_iterator *iter, int *ent_cpu,
3569 		  unsigned long *missing_events, u64 *ent_ts)
3570 {
3571 	struct trace_buffer *buffer = iter->array_buffer->buffer;
3572 	struct trace_entry *ent, *next = NULL;
3573 	unsigned long lost_events = 0, next_lost = 0;
3574 	int cpu_file = iter->cpu_file;
3575 	u64 next_ts = 0, ts;
3576 	int next_cpu = -1;
3577 	int next_size = 0;
3578 	int cpu;
3579 
3580 	/*
3581 	 * If we are in a per_cpu trace file, don't bother by iterating over
3582 	 * all cpu and peek directly.
3583 	 */
3584 	if (cpu_file > RING_BUFFER_ALL_CPUS) {
3585 		if (ring_buffer_empty_cpu(buffer, cpu_file))
3586 			return NULL;
3587 		ent = peek_next_entry(iter, cpu_file, ent_ts, missing_events);
3588 		if (ent_cpu)
3589 			*ent_cpu = cpu_file;
3590 
3591 		return ent;
3592 	}
3593 
3594 	for_each_tracing_cpu(cpu) {
3595 
3596 		if (ring_buffer_empty_cpu(buffer, cpu))
3597 			continue;
3598 
3599 		ent = peek_next_entry(iter, cpu, &ts, &lost_events);
3600 
3601 		/*
3602 		 * Pick the entry with the smallest timestamp:
3603 		 */
3604 		if (ent && (!next || ts < next_ts)) {
3605 			next = ent;
3606 			next_cpu = cpu;
3607 			next_ts = ts;
3608 			next_lost = lost_events;
3609 			next_size = iter->ent_size;
3610 		}
3611 	}
3612 
3613 	iter->ent_size = next_size;
3614 
3615 	if (ent_cpu)
3616 		*ent_cpu = next_cpu;
3617 
3618 	if (ent_ts)
3619 		*ent_ts = next_ts;
3620 
3621 	if (missing_events)
3622 		*missing_events = next_lost;
3623 
3624 	return next;
3625 }
3626 
3627 #define STATIC_FMT_BUF_SIZE	128
3628 static char static_fmt_buf[STATIC_FMT_BUF_SIZE];
3629 
3630 static char *trace_iter_expand_format(struct trace_iterator *iter)
3631 {
3632 	char *tmp;
3633 
3634 	/*
3635 	 * iter->tr is NULL when used with tp_printk, which makes
3636 	 * this get called where it is not safe to call krealloc().
3637 	 */
3638 	if (!iter->tr || iter->fmt == static_fmt_buf)
3639 		return NULL;
3640 
3641 	tmp = krealloc(iter->fmt, iter->fmt_size + STATIC_FMT_BUF_SIZE,
3642 		       GFP_KERNEL);
3643 	if (tmp) {
3644 		iter->fmt_size += STATIC_FMT_BUF_SIZE;
3645 		iter->fmt = tmp;
3646 	}
3647 
3648 	return tmp;
3649 }
3650 
3651 /* Returns true if the string is safe to dereference from an event */
3652 static bool trace_safe_str(struct trace_iterator *iter, const char *str)
3653 {
3654 	unsigned long addr = (unsigned long)str;
3655 	struct trace_event *trace_event;
3656 	struct trace_event_call *event;
3657 
3658 	/* OK if part of the event data */
3659 	if ((addr >= (unsigned long)iter->ent) &&
3660 	    (addr < (unsigned long)iter->ent + iter->ent_size))
3661 		return true;
3662 
3663 	/* OK if part of the temp seq buffer */
3664 	if ((addr >= (unsigned long)iter->tmp_seq.buffer) &&
3665 	    (addr < (unsigned long)iter->tmp_seq.buffer + PAGE_SIZE))
3666 		return true;
3667 
3668 	/* Core rodata can not be freed */
3669 	if (is_kernel_rodata(addr))
3670 		return true;
3671 
3672 	if (trace_is_tracepoint_string(str))
3673 		return true;
3674 
3675 	/*
3676 	 * Now this could be a module event, referencing core module
3677 	 * data, which is OK.
3678 	 */
3679 	if (!iter->ent)
3680 		return false;
3681 
3682 	trace_event = ftrace_find_event(iter->ent->type);
3683 	if (!trace_event)
3684 		return false;
3685 
3686 	event = container_of(trace_event, struct trace_event_call, event);
3687 	if ((event->flags & TRACE_EVENT_FL_DYNAMIC) || !event->module)
3688 		return false;
3689 
3690 	/* Would rather have rodata, but this will suffice */
3691 	if (within_module_core(addr, event->module))
3692 		return true;
3693 
3694 	return false;
3695 }
3696 
3697 static const char *show_buffer(struct trace_seq *s)
3698 {
3699 	struct seq_buf *seq = &s->seq;
3700 
3701 	seq_buf_terminate(seq);
3702 
3703 	return seq->buffer;
3704 }
3705 
3706 static DEFINE_STATIC_KEY_FALSE(trace_no_verify);
3707 
3708 static int test_can_verify_check(const char *fmt, ...)
3709 {
3710 	char buf[16];
3711 	va_list ap;
3712 	int ret;
3713 
3714 	/*
3715 	 * The verifier is dependent on vsnprintf() modifies the va_list
3716 	 * passed to it, where it is sent as a reference. Some architectures
3717 	 * (like x86_32) passes it by value, which means that vsnprintf()
3718 	 * does not modify the va_list passed to it, and the verifier
3719 	 * would then need to be able to understand all the values that
3720 	 * vsnprintf can use. If it is passed by value, then the verifier
3721 	 * is disabled.
3722 	 */
3723 	va_start(ap, fmt);
3724 	vsnprintf(buf, 16, "%d", ap);
3725 	ret = va_arg(ap, int);
3726 	va_end(ap);
3727 
3728 	return ret;
3729 }
3730 
3731 static void test_can_verify(void)
3732 {
3733 	if (!test_can_verify_check("%d %d", 0, 1)) {
3734 		pr_info("trace event string verifier disabled\n");
3735 		static_branch_inc(&trace_no_verify);
3736 	}
3737 }
3738 
3739 /**
3740  * trace_check_vprintf - Check dereferenced strings while writing to the seq buffer
3741  * @iter: The iterator that holds the seq buffer and the event being printed
3742  * @fmt: The format used to print the event
3743  * @ap: The va_list holding the data to print from @fmt.
3744  *
3745  * This writes the data into the @iter->seq buffer using the data from
3746  * @fmt and @ap. If the format has a %s, then the source of the string
3747  * is examined to make sure it is safe to print, otherwise it will
3748  * warn and print "[UNSAFE MEMORY]" in place of the dereferenced string
3749  * pointer.
3750  */
3751 void trace_check_vprintf(struct trace_iterator *iter, const char *fmt,
3752 			 va_list ap)
3753 {
3754 	const char *p = fmt;
3755 	const char *str;
3756 	int i, j;
3757 
3758 	if (WARN_ON_ONCE(!fmt))
3759 		return;
3760 
3761 	if (static_branch_unlikely(&trace_no_verify))
3762 		goto print;
3763 
3764 	/* Don't bother checking when doing a ftrace_dump() */
3765 	if (iter->fmt == static_fmt_buf)
3766 		goto print;
3767 
3768 	while (*p) {
3769 		bool star = false;
3770 		int len = 0;
3771 
3772 		j = 0;
3773 
3774 		/* We only care about %s and variants */
3775 		for (i = 0; p[i]; i++) {
3776 			if (i + 1 >= iter->fmt_size) {
3777 				/*
3778 				 * If we can't expand the copy buffer,
3779 				 * just print it.
3780 				 */
3781 				if (!trace_iter_expand_format(iter))
3782 					goto print;
3783 			}
3784 
3785 			if (p[i] == '\\' && p[i+1]) {
3786 				i++;
3787 				continue;
3788 			}
3789 			if (p[i] == '%') {
3790 				/* Need to test cases like %08.*s */
3791 				for (j = 1; p[i+j]; j++) {
3792 					if (isdigit(p[i+j]) ||
3793 					    p[i+j] == '.')
3794 						continue;
3795 					if (p[i+j] == '*') {
3796 						star = true;
3797 						continue;
3798 					}
3799 					break;
3800 				}
3801 				if (p[i+j] == 's')
3802 					break;
3803 				star = false;
3804 			}
3805 			j = 0;
3806 		}
3807 		/* If no %s found then just print normally */
3808 		if (!p[i])
3809 			break;
3810 
3811 		/* Copy up to the %s, and print that */
3812 		strncpy(iter->fmt, p, i);
3813 		iter->fmt[i] = '\0';
3814 		trace_seq_vprintf(&iter->seq, iter->fmt, ap);
3815 
3816 		if (star)
3817 			len = va_arg(ap, int);
3818 
3819 		/* The ap now points to the string data of the %s */
3820 		str = va_arg(ap, const char *);
3821 
3822 		/*
3823 		 * If you hit this warning, it is likely that the
3824 		 * trace event in question used %s on a string that
3825 		 * was saved at the time of the event, but may not be
3826 		 * around when the trace is read. Use __string(),
3827 		 * __assign_str() and __get_str() helpers in the TRACE_EVENT()
3828 		 * instead. See samples/trace_events/trace-events-sample.h
3829 		 * for reference.
3830 		 */
3831 		if (WARN_ONCE(!trace_safe_str(iter, str),
3832 			      "fmt: '%s' current_buffer: '%s'",
3833 			      fmt, show_buffer(&iter->seq))) {
3834 			int ret;
3835 
3836 			/* Try to safely read the string */
3837 			if (star) {
3838 				if (len + 1 > iter->fmt_size)
3839 					len = iter->fmt_size - 1;
3840 				if (len < 0)
3841 					len = 0;
3842 				ret = copy_from_kernel_nofault(iter->fmt, str, len);
3843 				iter->fmt[len] = 0;
3844 				star = false;
3845 			} else {
3846 				ret = strncpy_from_kernel_nofault(iter->fmt, str,
3847 								  iter->fmt_size);
3848 			}
3849 			if (ret < 0)
3850 				trace_seq_printf(&iter->seq, "(0x%px)", str);
3851 			else
3852 				trace_seq_printf(&iter->seq, "(0x%px:%s)",
3853 						 str, iter->fmt);
3854 			str = "[UNSAFE-MEMORY]";
3855 			strcpy(iter->fmt, "%s");
3856 		} else {
3857 			strncpy(iter->fmt, p + i, j + 1);
3858 			iter->fmt[j+1] = '\0';
3859 		}
3860 		if (star)
3861 			trace_seq_printf(&iter->seq, iter->fmt, len, str);
3862 		else
3863 			trace_seq_printf(&iter->seq, iter->fmt, str);
3864 
3865 		p += i + j + 1;
3866 	}
3867  print:
3868 	if (*p)
3869 		trace_seq_vprintf(&iter->seq, p, ap);
3870 }
3871 
3872 const char *trace_event_format(struct trace_iterator *iter, const char *fmt)
3873 {
3874 	const char *p, *new_fmt;
3875 	char *q;
3876 
3877 	if (WARN_ON_ONCE(!fmt))
3878 		return fmt;
3879 
3880 	if (!iter->tr || iter->tr->trace_flags & TRACE_ITER_HASH_PTR)
3881 		return fmt;
3882 
3883 	p = fmt;
3884 	new_fmt = q = iter->fmt;
3885 	while (*p) {
3886 		if (unlikely(q - new_fmt + 3 > iter->fmt_size)) {
3887 			if (!trace_iter_expand_format(iter))
3888 				return fmt;
3889 
3890 			q += iter->fmt - new_fmt;
3891 			new_fmt = iter->fmt;
3892 		}
3893 
3894 		*q++ = *p++;
3895 
3896 		/* Replace %p with %px */
3897 		if (p[-1] == '%') {
3898 			if (p[0] == '%') {
3899 				*q++ = *p++;
3900 			} else if (p[0] == 'p' && !isalnum(p[1])) {
3901 				*q++ = *p++;
3902 				*q++ = 'x';
3903 			}
3904 		}
3905 	}
3906 	*q = '\0';
3907 
3908 	return new_fmt;
3909 }
3910 
3911 #define STATIC_TEMP_BUF_SIZE	128
3912 static char static_temp_buf[STATIC_TEMP_BUF_SIZE] __aligned(4);
3913 
3914 /* Find the next real entry, without updating the iterator itself */
3915 struct trace_entry *trace_find_next_entry(struct trace_iterator *iter,
3916 					  int *ent_cpu, u64 *ent_ts)
3917 {
3918 	/* __find_next_entry will reset ent_size */
3919 	int ent_size = iter->ent_size;
3920 	struct trace_entry *entry;
3921 
3922 	/*
3923 	 * If called from ftrace_dump(), then the iter->temp buffer
3924 	 * will be the static_temp_buf and not created from kmalloc.
3925 	 * If the entry size is greater than the buffer, we can
3926 	 * not save it. Just return NULL in that case. This is only
3927 	 * used to add markers when two consecutive events' time
3928 	 * stamps have a large delta. See trace_print_lat_context()
3929 	 */
3930 	if (iter->temp == static_temp_buf &&
3931 	    STATIC_TEMP_BUF_SIZE < ent_size)
3932 		return NULL;
3933 
3934 	/*
3935 	 * The __find_next_entry() may call peek_next_entry(), which may
3936 	 * call ring_buffer_peek() that may make the contents of iter->ent
3937 	 * undefined. Need to copy iter->ent now.
3938 	 */
3939 	if (iter->ent && iter->ent != iter->temp) {
3940 		if ((!iter->temp || iter->temp_size < iter->ent_size) &&
3941 		    !WARN_ON_ONCE(iter->temp == static_temp_buf)) {
3942 			void *temp;
3943 			temp = kmalloc(iter->ent_size, GFP_KERNEL);
3944 			if (!temp)
3945 				return NULL;
3946 			kfree(iter->temp);
3947 			iter->temp = temp;
3948 			iter->temp_size = iter->ent_size;
3949 		}
3950 		memcpy(iter->temp, iter->ent, iter->ent_size);
3951 		iter->ent = iter->temp;
3952 	}
3953 	entry = __find_next_entry(iter, ent_cpu, NULL, ent_ts);
3954 	/* Put back the original ent_size */
3955 	iter->ent_size = ent_size;
3956 
3957 	return entry;
3958 }
3959 
3960 /* Find the next real entry, and increment the iterator to the next entry */
3961 void *trace_find_next_entry_inc(struct trace_iterator *iter)
3962 {
3963 	iter->ent = __find_next_entry(iter, &iter->cpu,
3964 				      &iter->lost_events, &iter->ts);
3965 
3966 	if (iter->ent)
3967 		trace_iterator_increment(iter);
3968 
3969 	return iter->ent ? iter : NULL;
3970 }
3971 
3972 static void trace_consume(struct trace_iterator *iter)
3973 {
3974 	ring_buffer_consume(iter->array_buffer->buffer, iter->cpu, &iter->ts,
3975 			    &iter->lost_events);
3976 }
3977 
3978 static void *s_next(struct seq_file *m, void *v, loff_t *pos)
3979 {
3980 	struct trace_iterator *iter = m->private;
3981 	int i = (int)*pos;
3982 	void *ent;
3983 
3984 	WARN_ON_ONCE(iter->leftover);
3985 
3986 	(*pos)++;
3987 
3988 	/* can't go backwards */
3989 	if (iter->idx > i)
3990 		return NULL;
3991 
3992 	if (iter->idx < 0)
3993 		ent = trace_find_next_entry_inc(iter);
3994 	else
3995 		ent = iter;
3996 
3997 	while (ent && iter->idx < i)
3998 		ent = trace_find_next_entry_inc(iter);
3999 
4000 	iter->pos = *pos;
4001 
4002 	return ent;
4003 }
4004 
4005 void tracing_iter_reset(struct trace_iterator *iter, int cpu)
4006 {
4007 	struct ring_buffer_iter *buf_iter;
4008 	unsigned long entries = 0;
4009 	u64 ts;
4010 
4011 	per_cpu_ptr(iter->array_buffer->data, cpu)->skipped_entries = 0;
4012 
4013 	buf_iter = trace_buffer_iter(iter, cpu);
4014 	if (!buf_iter)
4015 		return;
4016 
4017 	ring_buffer_iter_reset(buf_iter);
4018 
4019 	/*
4020 	 * We could have the case with the max latency tracers
4021 	 * that a reset never took place on a cpu. This is evident
4022 	 * by the timestamp being before the start of the buffer.
4023 	 */
4024 	while (ring_buffer_iter_peek(buf_iter, &ts)) {
4025 		if (ts >= iter->array_buffer->time_start)
4026 			break;
4027 		entries++;
4028 		ring_buffer_iter_advance(buf_iter);
4029 	}
4030 
4031 	per_cpu_ptr(iter->array_buffer->data, cpu)->skipped_entries = entries;
4032 }
4033 
4034 /*
4035  * The current tracer is copied to avoid a global locking
4036  * all around.
4037  */
4038 static void *s_start(struct seq_file *m, loff_t *pos)
4039 {
4040 	struct trace_iterator *iter = m->private;
4041 	struct trace_array *tr = iter->tr;
4042 	int cpu_file = iter->cpu_file;
4043 	void *p = NULL;
4044 	loff_t l = 0;
4045 	int cpu;
4046 
4047 	/*
4048 	 * copy the tracer to avoid using a global lock all around.
4049 	 * iter->trace is a copy of current_trace, the pointer to the
4050 	 * name may be used instead of a strcmp(), as iter->trace->name
4051 	 * will point to the same string as current_trace->name.
4052 	 */
4053 	mutex_lock(&trace_types_lock);
4054 	if (unlikely(tr->current_trace && iter->trace->name != tr->current_trace->name))
4055 		*iter->trace = *tr->current_trace;
4056 	mutex_unlock(&trace_types_lock);
4057 
4058 #ifdef CONFIG_TRACER_MAX_TRACE
4059 	if (iter->snapshot && iter->trace->use_max_tr)
4060 		return ERR_PTR(-EBUSY);
4061 #endif
4062 
4063 	if (*pos != iter->pos) {
4064 		iter->ent = NULL;
4065 		iter->cpu = 0;
4066 		iter->idx = -1;
4067 
4068 		if (cpu_file == RING_BUFFER_ALL_CPUS) {
4069 			for_each_tracing_cpu(cpu)
4070 				tracing_iter_reset(iter, cpu);
4071 		} else
4072 			tracing_iter_reset(iter, cpu_file);
4073 
4074 		iter->leftover = 0;
4075 		for (p = iter; p && l < *pos; p = s_next(m, p, &l))
4076 			;
4077 
4078 	} else {
4079 		/*
4080 		 * If we overflowed the seq_file before, then we want
4081 		 * to just reuse the trace_seq buffer again.
4082 		 */
4083 		if (iter->leftover)
4084 			p = iter;
4085 		else {
4086 			l = *pos - 1;
4087 			p = s_next(m, p, &l);
4088 		}
4089 	}
4090 
4091 	trace_event_read_lock();
4092 	trace_access_lock(cpu_file);
4093 	return p;
4094 }
4095 
4096 static void s_stop(struct seq_file *m, void *p)
4097 {
4098 	struct trace_iterator *iter = m->private;
4099 
4100 #ifdef CONFIG_TRACER_MAX_TRACE
4101 	if (iter->snapshot && iter->trace->use_max_tr)
4102 		return;
4103 #endif
4104 
4105 	trace_access_unlock(iter->cpu_file);
4106 	trace_event_read_unlock();
4107 }
4108 
4109 static void
4110 get_total_entries_cpu(struct array_buffer *buf, unsigned long *total,
4111 		      unsigned long *entries, int cpu)
4112 {
4113 	unsigned long count;
4114 
4115 	count = ring_buffer_entries_cpu(buf->buffer, cpu);
4116 	/*
4117 	 * If this buffer has skipped entries, then we hold all
4118 	 * entries for the trace and we need to ignore the
4119 	 * ones before the time stamp.
4120 	 */
4121 	if (per_cpu_ptr(buf->data, cpu)->skipped_entries) {
4122 		count -= per_cpu_ptr(buf->data, cpu)->skipped_entries;
4123 		/* total is the same as the entries */
4124 		*total = count;
4125 	} else
4126 		*total = count +
4127 			ring_buffer_overrun_cpu(buf->buffer, cpu);
4128 	*entries = count;
4129 }
4130 
4131 static void
4132 get_total_entries(struct array_buffer *buf,
4133 		  unsigned long *total, unsigned long *entries)
4134 {
4135 	unsigned long t, e;
4136 	int cpu;
4137 
4138 	*total = 0;
4139 	*entries = 0;
4140 
4141 	for_each_tracing_cpu(cpu) {
4142 		get_total_entries_cpu(buf, &t, &e, cpu);
4143 		*total += t;
4144 		*entries += e;
4145 	}
4146 }
4147 
4148 unsigned long trace_total_entries_cpu(struct trace_array *tr, int cpu)
4149 {
4150 	unsigned long total, entries;
4151 
4152 	if (!tr)
4153 		tr = &global_trace;
4154 
4155 	get_total_entries_cpu(&tr->array_buffer, &total, &entries, cpu);
4156 
4157 	return entries;
4158 }
4159 
4160 unsigned long trace_total_entries(struct trace_array *tr)
4161 {
4162 	unsigned long total, entries;
4163 
4164 	if (!tr)
4165 		tr = &global_trace;
4166 
4167 	get_total_entries(&tr->array_buffer, &total, &entries);
4168 
4169 	return entries;
4170 }
4171 
4172 static void print_lat_help_header(struct seq_file *m)
4173 {
4174 	seq_puts(m, "#                    _------=> CPU#            \n"
4175 		    "#                   / _-----=> irqs-off        \n"
4176 		    "#                  | / _----=> need-resched    \n"
4177 		    "#                  || / _---=> hardirq/softirq \n"
4178 		    "#                  ||| / _--=> preempt-depth   \n"
4179 		    "#                  |||| / _-=> migrate-disable \n"
4180 		    "#                  ||||| /     delay           \n"
4181 		    "#  cmd     pid     |||||| time  |   caller     \n"
4182 		    "#     \\   /        ||||||  \\    |    /       \n");
4183 }
4184 
4185 static void print_event_info(struct array_buffer *buf, struct seq_file *m)
4186 {
4187 	unsigned long total;
4188 	unsigned long entries;
4189 
4190 	get_total_entries(buf, &total, &entries);
4191 	seq_printf(m, "# entries-in-buffer/entries-written: %lu/%lu   #P:%d\n",
4192 		   entries, total, num_online_cpus());
4193 	seq_puts(m, "#\n");
4194 }
4195 
4196 static void print_func_help_header(struct array_buffer *buf, struct seq_file *m,
4197 				   unsigned int flags)
4198 {
4199 	bool tgid = flags & TRACE_ITER_RECORD_TGID;
4200 
4201 	print_event_info(buf, m);
4202 
4203 	seq_printf(m, "#           TASK-PID    %s CPU#     TIMESTAMP  FUNCTION\n", tgid ? "   TGID   " : "");
4204 	seq_printf(m, "#              | |      %s   |         |         |\n",      tgid ? "     |    " : "");
4205 }
4206 
4207 static void print_func_help_header_irq(struct array_buffer *buf, struct seq_file *m,
4208 				       unsigned int flags)
4209 {
4210 	bool tgid = flags & TRACE_ITER_RECORD_TGID;
4211 	const char *space = "            ";
4212 	int prec = tgid ? 12 : 2;
4213 
4214 	print_event_info(buf, m);
4215 
4216 	seq_printf(m, "#                            %.*s  _-----=> irqs-off\n", prec, space);
4217 	seq_printf(m, "#                            %.*s / _----=> need-resched\n", prec, space);
4218 	seq_printf(m, "#                            %.*s| / _---=> hardirq/softirq\n", prec, space);
4219 	seq_printf(m, "#                            %.*s|| / _--=> preempt-depth\n", prec, space);
4220 	seq_printf(m, "#                            %.*s||| / _-=> migrate-disable\n", prec, space);
4221 	seq_printf(m, "#                            %.*s|||| /     delay\n", prec, space);
4222 	seq_printf(m, "#           TASK-PID  %.*s CPU#  |||||  TIMESTAMP  FUNCTION\n", prec, "     TGID   ");
4223 	seq_printf(m, "#              | |    %.*s   |   |||||     |         |\n", prec, "       |    ");
4224 }
4225 
4226 void
4227 print_trace_header(struct seq_file *m, struct trace_iterator *iter)
4228 {
4229 	unsigned long sym_flags = (global_trace.trace_flags & TRACE_ITER_SYM_MASK);
4230 	struct array_buffer *buf = iter->array_buffer;
4231 	struct trace_array_cpu *data = per_cpu_ptr(buf->data, buf->cpu);
4232 	struct tracer *type = iter->trace;
4233 	unsigned long entries;
4234 	unsigned long total;
4235 	const char *name = "preemption";
4236 
4237 	name = type->name;
4238 
4239 	get_total_entries(buf, &total, &entries);
4240 
4241 	seq_printf(m, "# %s latency trace v1.1.5 on %s\n",
4242 		   name, UTS_RELEASE);
4243 	seq_puts(m, "# -----------------------------------"
4244 		 "---------------------------------\n");
4245 	seq_printf(m, "# latency: %lu us, #%lu/%lu, CPU#%d |"
4246 		   " (M:%s VP:%d, KP:%d, SP:%d HP:%d",
4247 		   nsecs_to_usecs(data->saved_latency),
4248 		   entries,
4249 		   total,
4250 		   buf->cpu,
4251 #if defined(CONFIG_PREEMPT_NONE)
4252 		   "server",
4253 #elif defined(CONFIG_PREEMPT_VOLUNTARY)
4254 		   "desktop",
4255 #elif defined(CONFIG_PREEMPT)
4256 		   "preempt",
4257 #elif defined(CONFIG_PREEMPT_RT)
4258 		   "preempt_rt",
4259 #else
4260 		   "unknown",
4261 #endif
4262 		   /* These are reserved for later use */
4263 		   0, 0, 0, 0);
4264 #ifdef CONFIG_SMP
4265 	seq_printf(m, " #P:%d)\n", num_online_cpus());
4266 #else
4267 	seq_puts(m, ")\n");
4268 #endif
4269 	seq_puts(m, "#    -----------------\n");
4270 	seq_printf(m, "#    | task: %.16s-%d "
4271 		   "(uid:%d nice:%ld policy:%ld rt_prio:%ld)\n",
4272 		   data->comm, data->pid,
4273 		   from_kuid_munged(seq_user_ns(m), data->uid), data->nice,
4274 		   data->policy, data->rt_priority);
4275 	seq_puts(m, "#    -----------------\n");
4276 
4277 	if (data->critical_start) {
4278 		seq_puts(m, "#  => started at: ");
4279 		seq_print_ip_sym(&iter->seq, data->critical_start, sym_flags);
4280 		trace_print_seq(m, &iter->seq);
4281 		seq_puts(m, "\n#  => ended at:   ");
4282 		seq_print_ip_sym(&iter->seq, data->critical_end, sym_flags);
4283 		trace_print_seq(m, &iter->seq);
4284 		seq_puts(m, "\n#\n");
4285 	}
4286 
4287 	seq_puts(m, "#\n");
4288 }
4289 
4290 static void test_cpu_buff_start(struct trace_iterator *iter)
4291 {
4292 	struct trace_seq *s = &iter->seq;
4293 	struct trace_array *tr = iter->tr;
4294 
4295 	if (!(tr->trace_flags & TRACE_ITER_ANNOTATE))
4296 		return;
4297 
4298 	if (!(iter->iter_flags & TRACE_FILE_ANNOTATE))
4299 		return;
4300 
4301 	if (cpumask_available(iter->started) &&
4302 	    cpumask_test_cpu(iter->cpu, iter->started))
4303 		return;
4304 
4305 	if (per_cpu_ptr(iter->array_buffer->data, iter->cpu)->skipped_entries)
4306 		return;
4307 
4308 	if (cpumask_available(iter->started))
4309 		cpumask_set_cpu(iter->cpu, iter->started);
4310 
4311 	/* Don't print started cpu buffer for the first entry of the trace */
4312 	if (iter->idx > 1)
4313 		trace_seq_printf(s, "##### CPU %u buffer started ####\n",
4314 				iter->cpu);
4315 }
4316 
4317 static enum print_line_t print_trace_fmt(struct trace_iterator *iter)
4318 {
4319 	struct trace_array *tr = iter->tr;
4320 	struct trace_seq *s = &iter->seq;
4321 	unsigned long sym_flags = (tr->trace_flags & TRACE_ITER_SYM_MASK);
4322 	struct trace_entry *entry;
4323 	struct trace_event *event;
4324 
4325 	entry = iter->ent;
4326 
4327 	test_cpu_buff_start(iter);
4328 
4329 	event = ftrace_find_event(entry->type);
4330 
4331 	if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
4332 		if (iter->iter_flags & TRACE_FILE_LAT_FMT)
4333 			trace_print_lat_context(iter);
4334 		else
4335 			trace_print_context(iter);
4336 	}
4337 
4338 	if (trace_seq_has_overflowed(s))
4339 		return TRACE_TYPE_PARTIAL_LINE;
4340 
4341 	if (event)
4342 		return event->funcs->trace(iter, sym_flags, event);
4343 
4344 	trace_seq_printf(s, "Unknown type %d\n", entry->type);
4345 
4346 	return trace_handle_return(s);
4347 }
4348 
4349 static enum print_line_t print_raw_fmt(struct trace_iterator *iter)
4350 {
4351 	struct trace_array *tr = iter->tr;
4352 	struct trace_seq *s = &iter->seq;
4353 	struct trace_entry *entry;
4354 	struct trace_event *event;
4355 
4356 	entry = iter->ent;
4357 
4358 	if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO)
4359 		trace_seq_printf(s, "%d %d %llu ",
4360 				 entry->pid, iter->cpu, iter->ts);
4361 
4362 	if (trace_seq_has_overflowed(s))
4363 		return TRACE_TYPE_PARTIAL_LINE;
4364 
4365 	event = ftrace_find_event(entry->type);
4366 	if (event)
4367 		return event->funcs->raw(iter, 0, event);
4368 
4369 	trace_seq_printf(s, "%d ?\n", entry->type);
4370 
4371 	return trace_handle_return(s);
4372 }
4373 
4374 static enum print_line_t print_hex_fmt(struct trace_iterator *iter)
4375 {
4376 	struct trace_array *tr = iter->tr;
4377 	struct trace_seq *s = &iter->seq;
4378 	unsigned char newline = '\n';
4379 	struct trace_entry *entry;
4380 	struct trace_event *event;
4381 
4382 	entry = iter->ent;
4383 
4384 	if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
4385 		SEQ_PUT_HEX_FIELD(s, entry->pid);
4386 		SEQ_PUT_HEX_FIELD(s, iter->cpu);
4387 		SEQ_PUT_HEX_FIELD(s, iter->ts);
4388 		if (trace_seq_has_overflowed(s))
4389 			return TRACE_TYPE_PARTIAL_LINE;
4390 	}
4391 
4392 	event = ftrace_find_event(entry->type);
4393 	if (event) {
4394 		enum print_line_t ret = event->funcs->hex(iter, 0, event);
4395 		if (ret != TRACE_TYPE_HANDLED)
4396 			return ret;
4397 	}
4398 
4399 	SEQ_PUT_FIELD(s, newline);
4400 
4401 	return trace_handle_return(s);
4402 }
4403 
4404 static enum print_line_t print_bin_fmt(struct trace_iterator *iter)
4405 {
4406 	struct trace_array *tr = iter->tr;
4407 	struct trace_seq *s = &iter->seq;
4408 	struct trace_entry *entry;
4409 	struct trace_event *event;
4410 
4411 	entry = iter->ent;
4412 
4413 	if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
4414 		SEQ_PUT_FIELD(s, entry->pid);
4415 		SEQ_PUT_FIELD(s, iter->cpu);
4416 		SEQ_PUT_FIELD(s, iter->ts);
4417 		if (trace_seq_has_overflowed(s))
4418 			return TRACE_TYPE_PARTIAL_LINE;
4419 	}
4420 
4421 	event = ftrace_find_event(entry->type);
4422 	return event ? event->funcs->binary(iter, 0, event) :
4423 		TRACE_TYPE_HANDLED;
4424 }
4425 
4426 int trace_empty(struct trace_iterator *iter)
4427 {
4428 	struct ring_buffer_iter *buf_iter;
4429 	int cpu;
4430 
4431 	/* If we are looking at one CPU buffer, only check that one */
4432 	if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
4433 		cpu = iter->cpu_file;
4434 		buf_iter = trace_buffer_iter(iter, cpu);
4435 		if (buf_iter) {
4436 			if (!ring_buffer_iter_empty(buf_iter))
4437 				return 0;
4438 		} else {
4439 			if (!ring_buffer_empty_cpu(iter->array_buffer->buffer, cpu))
4440 				return 0;
4441 		}
4442 		return 1;
4443 	}
4444 
4445 	for_each_tracing_cpu(cpu) {
4446 		buf_iter = trace_buffer_iter(iter, cpu);
4447 		if (buf_iter) {
4448 			if (!ring_buffer_iter_empty(buf_iter))
4449 				return 0;
4450 		} else {
4451 			if (!ring_buffer_empty_cpu(iter->array_buffer->buffer, cpu))
4452 				return 0;
4453 		}
4454 	}
4455 
4456 	return 1;
4457 }
4458 
4459 /*  Called with trace_event_read_lock() held. */
4460 enum print_line_t print_trace_line(struct trace_iterator *iter)
4461 {
4462 	struct trace_array *tr = iter->tr;
4463 	unsigned long trace_flags = tr->trace_flags;
4464 	enum print_line_t ret;
4465 
4466 	if (iter->lost_events) {
4467 		if (iter->lost_events == (unsigned long)-1)
4468 			trace_seq_printf(&iter->seq, "CPU:%d [LOST EVENTS]\n",
4469 					 iter->cpu);
4470 		else
4471 			trace_seq_printf(&iter->seq, "CPU:%d [LOST %lu EVENTS]\n",
4472 					 iter->cpu, iter->lost_events);
4473 		if (trace_seq_has_overflowed(&iter->seq))
4474 			return TRACE_TYPE_PARTIAL_LINE;
4475 	}
4476 
4477 	if (iter->trace && iter->trace->print_line) {
4478 		ret = iter->trace->print_line(iter);
4479 		if (ret != TRACE_TYPE_UNHANDLED)
4480 			return ret;
4481 	}
4482 
4483 	if (iter->ent->type == TRACE_BPUTS &&
4484 			trace_flags & TRACE_ITER_PRINTK &&
4485 			trace_flags & TRACE_ITER_PRINTK_MSGONLY)
4486 		return trace_print_bputs_msg_only(iter);
4487 
4488 	if (iter->ent->type == TRACE_BPRINT &&
4489 			trace_flags & TRACE_ITER_PRINTK &&
4490 			trace_flags & TRACE_ITER_PRINTK_MSGONLY)
4491 		return trace_print_bprintk_msg_only(iter);
4492 
4493 	if (iter->ent->type == TRACE_PRINT &&
4494 			trace_flags & TRACE_ITER_PRINTK &&
4495 			trace_flags & TRACE_ITER_PRINTK_MSGONLY)
4496 		return trace_print_printk_msg_only(iter);
4497 
4498 	if (trace_flags & TRACE_ITER_BIN)
4499 		return print_bin_fmt(iter);
4500 
4501 	if (trace_flags & TRACE_ITER_HEX)
4502 		return print_hex_fmt(iter);
4503 
4504 	if (trace_flags & TRACE_ITER_RAW)
4505 		return print_raw_fmt(iter);
4506 
4507 	return print_trace_fmt(iter);
4508 }
4509 
4510 void trace_latency_header(struct seq_file *m)
4511 {
4512 	struct trace_iterator *iter = m->private;
4513 	struct trace_array *tr = iter->tr;
4514 
4515 	/* print nothing if the buffers are empty */
4516 	if (trace_empty(iter))
4517 		return;
4518 
4519 	if (iter->iter_flags & TRACE_FILE_LAT_FMT)
4520 		print_trace_header(m, iter);
4521 
4522 	if (!(tr->trace_flags & TRACE_ITER_VERBOSE))
4523 		print_lat_help_header(m);
4524 }
4525 
4526 void trace_default_header(struct seq_file *m)
4527 {
4528 	struct trace_iterator *iter = m->private;
4529 	struct trace_array *tr = iter->tr;
4530 	unsigned long trace_flags = tr->trace_flags;
4531 
4532 	if (!(trace_flags & TRACE_ITER_CONTEXT_INFO))
4533 		return;
4534 
4535 	if (iter->iter_flags & TRACE_FILE_LAT_FMT) {
4536 		/* print nothing if the buffers are empty */
4537 		if (trace_empty(iter))
4538 			return;
4539 		print_trace_header(m, iter);
4540 		if (!(trace_flags & TRACE_ITER_VERBOSE))
4541 			print_lat_help_header(m);
4542 	} else {
4543 		if (!(trace_flags & TRACE_ITER_VERBOSE)) {
4544 			if (trace_flags & TRACE_ITER_IRQ_INFO)
4545 				print_func_help_header_irq(iter->array_buffer,
4546 							   m, trace_flags);
4547 			else
4548 				print_func_help_header(iter->array_buffer, m,
4549 						       trace_flags);
4550 		}
4551 	}
4552 }
4553 
4554 static void test_ftrace_alive(struct seq_file *m)
4555 {
4556 	if (!ftrace_is_dead())
4557 		return;
4558 	seq_puts(m, "# WARNING: FUNCTION TRACING IS CORRUPTED\n"
4559 		    "#          MAY BE MISSING FUNCTION EVENTS\n");
4560 }
4561 
4562 #ifdef CONFIG_TRACER_MAX_TRACE
4563 static void show_snapshot_main_help(struct seq_file *m)
4564 {
4565 	seq_puts(m, "# echo 0 > snapshot : Clears and frees snapshot buffer\n"
4566 		    "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
4567 		    "#                      Takes a snapshot of the main buffer.\n"
4568 		    "# echo 2 > snapshot : Clears snapshot buffer (but does not allocate or free)\n"
4569 		    "#                      (Doesn't have to be '2' works with any number that\n"
4570 		    "#                       is not a '0' or '1')\n");
4571 }
4572 
4573 static void show_snapshot_percpu_help(struct seq_file *m)
4574 {
4575 	seq_puts(m, "# echo 0 > snapshot : Invalid for per_cpu snapshot file.\n");
4576 #ifdef CONFIG_RING_BUFFER_ALLOW_SWAP
4577 	seq_puts(m, "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
4578 		    "#                      Takes a snapshot of the main buffer for this cpu.\n");
4579 #else
4580 	seq_puts(m, "# echo 1 > snapshot : Not supported with this kernel.\n"
4581 		    "#                     Must use main snapshot file to allocate.\n");
4582 #endif
4583 	seq_puts(m, "# echo 2 > snapshot : Clears this cpu's snapshot buffer (but does not allocate)\n"
4584 		    "#                      (Doesn't have to be '2' works with any number that\n"
4585 		    "#                       is not a '0' or '1')\n");
4586 }
4587 
4588 static void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter)
4589 {
4590 	if (iter->tr->allocated_snapshot)
4591 		seq_puts(m, "#\n# * Snapshot is allocated *\n#\n");
4592 	else
4593 		seq_puts(m, "#\n# * Snapshot is freed *\n#\n");
4594 
4595 	seq_puts(m, "# Snapshot commands:\n");
4596 	if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
4597 		show_snapshot_main_help(m);
4598 	else
4599 		show_snapshot_percpu_help(m);
4600 }
4601 #else
4602 /* Should never be called */
4603 static inline void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter) { }
4604 #endif
4605 
4606 static int s_show(struct seq_file *m, void *v)
4607 {
4608 	struct trace_iterator *iter = v;
4609 	int ret;
4610 
4611 	if (iter->ent == NULL) {
4612 		if (iter->tr) {
4613 			seq_printf(m, "# tracer: %s\n", iter->trace->name);
4614 			seq_puts(m, "#\n");
4615 			test_ftrace_alive(m);
4616 		}
4617 		if (iter->snapshot && trace_empty(iter))
4618 			print_snapshot_help(m, iter);
4619 		else if (iter->trace && iter->trace->print_header)
4620 			iter->trace->print_header(m);
4621 		else
4622 			trace_default_header(m);
4623 
4624 	} else if (iter->leftover) {
4625 		/*
4626 		 * If we filled the seq_file buffer earlier, we
4627 		 * want to just show it now.
4628 		 */
4629 		ret = trace_print_seq(m, &iter->seq);
4630 
4631 		/* ret should this time be zero, but you never know */
4632 		iter->leftover = ret;
4633 
4634 	} else {
4635 		print_trace_line(iter);
4636 		ret = trace_print_seq(m, &iter->seq);
4637 		/*
4638 		 * If we overflow the seq_file buffer, then it will
4639 		 * ask us for this data again at start up.
4640 		 * Use that instead.
4641 		 *  ret is 0 if seq_file write succeeded.
4642 		 *        -1 otherwise.
4643 		 */
4644 		iter->leftover = ret;
4645 	}
4646 
4647 	return 0;
4648 }
4649 
4650 /*
4651  * Should be used after trace_array_get(), trace_types_lock
4652  * ensures that i_cdev was already initialized.
4653  */
4654 static inline int tracing_get_cpu(struct inode *inode)
4655 {
4656 	if (inode->i_cdev) /* See trace_create_cpu_file() */
4657 		return (long)inode->i_cdev - 1;
4658 	return RING_BUFFER_ALL_CPUS;
4659 }
4660 
4661 static const struct seq_operations tracer_seq_ops = {
4662 	.start		= s_start,
4663 	.next		= s_next,
4664 	.stop		= s_stop,
4665 	.show		= s_show,
4666 };
4667 
4668 static struct trace_iterator *
4669 __tracing_open(struct inode *inode, struct file *file, bool snapshot)
4670 {
4671 	struct trace_array *tr = inode->i_private;
4672 	struct trace_iterator *iter;
4673 	int cpu;
4674 
4675 	if (tracing_disabled)
4676 		return ERR_PTR(-ENODEV);
4677 
4678 	iter = __seq_open_private(file, &tracer_seq_ops, sizeof(*iter));
4679 	if (!iter)
4680 		return ERR_PTR(-ENOMEM);
4681 
4682 	iter->buffer_iter = kcalloc(nr_cpu_ids, sizeof(*iter->buffer_iter),
4683 				    GFP_KERNEL);
4684 	if (!iter->buffer_iter)
4685 		goto release;
4686 
4687 	/*
4688 	 * trace_find_next_entry() may need to save off iter->ent.
4689 	 * It will place it into the iter->temp buffer. As most
4690 	 * events are less than 128, allocate a buffer of that size.
4691 	 * If one is greater, then trace_find_next_entry() will
4692 	 * allocate a new buffer to adjust for the bigger iter->ent.
4693 	 * It's not critical if it fails to get allocated here.
4694 	 */
4695 	iter->temp = kmalloc(128, GFP_KERNEL);
4696 	if (iter->temp)
4697 		iter->temp_size = 128;
4698 
4699 	/*
4700 	 * trace_event_printf() may need to modify given format
4701 	 * string to replace %p with %px so that it shows real address
4702 	 * instead of hash value. However, that is only for the event
4703 	 * tracing, other tracer may not need. Defer the allocation
4704 	 * until it is needed.
4705 	 */
4706 	iter->fmt = NULL;
4707 	iter->fmt_size = 0;
4708 
4709 	/*
4710 	 * We make a copy of the current tracer to avoid concurrent
4711 	 * changes on it while we are reading.
4712 	 */
4713 	mutex_lock(&trace_types_lock);
4714 	iter->trace = kzalloc(sizeof(*iter->trace), GFP_KERNEL);
4715 	if (!iter->trace)
4716 		goto fail;
4717 
4718 	*iter->trace = *tr->current_trace;
4719 
4720 	if (!zalloc_cpumask_var(&iter->started, GFP_KERNEL))
4721 		goto fail;
4722 
4723 	iter->tr = tr;
4724 
4725 #ifdef CONFIG_TRACER_MAX_TRACE
4726 	/* Currently only the top directory has a snapshot */
4727 	if (tr->current_trace->print_max || snapshot)
4728 		iter->array_buffer = &tr->max_buffer;
4729 	else
4730 #endif
4731 		iter->array_buffer = &tr->array_buffer;
4732 	iter->snapshot = snapshot;
4733 	iter->pos = -1;
4734 	iter->cpu_file = tracing_get_cpu(inode);
4735 	mutex_init(&iter->mutex);
4736 
4737 	/* Notify the tracer early; before we stop tracing. */
4738 	if (iter->trace->open)
4739 		iter->trace->open(iter);
4740 
4741 	/* Annotate start of buffers if we had overruns */
4742 	if (ring_buffer_overruns(iter->array_buffer->buffer))
4743 		iter->iter_flags |= TRACE_FILE_ANNOTATE;
4744 
4745 	/* Output in nanoseconds only if we are using a clock in nanoseconds. */
4746 	if (trace_clocks[tr->clock_id].in_ns)
4747 		iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
4748 
4749 	/*
4750 	 * If pause-on-trace is enabled, then stop the trace while
4751 	 * dumping, unless this is the "snapshot" file
4752 	 */
4753 	if (!iter->snapshot && (tr->trace_flags & TRACE_ITER_PAUSE_ON_TRACE))
4754 		tracing_stop_tr(tr);
4755 
4756 	if (iter->cpu_file == RING_BUFFER_ALL_CPUS) {
4757 		for_each_tracing_cpu(cpu) {
4758 			iter->buffer_iter[cpu] =
4759 				ring_buffer_read_prepare(iter->array_buffer->buffer,
4760 							 cpu, GFP_KERNEL);
4761 		}
4762 		ring_buffer_read_prepare_sync();
4763 		for_each_tracing_cpu(cpu) {
4764 			ring_buffer_read_start(iter->buffer_iter[cpu]);
4765 			tracing_iter_reset(iter, cpu);
4766 		}
4767 	} else {
4768 		cpu = iter->cpu_file;
4769 		iter->buffer_iter[cpu] =
4770 			ring_buffer_read_prepare(iter->array_buffer->buffer,
4771 						 cpu, GFP_KERNEL);
4772 		ring_buffer_read_prepare_sync();
4773 		ring_buffer_read_start(iter->buffer_iter[cpu]);
4774 		tracing_iter_reset(iter, cpu);
4775 	}
4776 
4777 	mutex_unlock(&trace_types_lock);
4778 
4779 	return iter;
4780 
4781  fail:
4782 	mutex_unlock(&trace_types_lock);
4783 	kfree(iter->trace);
4784 	kfree(iter->temp);
4785 	kfree(iter->buffer_iter);
4786 release:
4787 	seq_release_private(inode, file);
4788 	return ERR_PTR(-ENOMEM);
4789 }
4790 
4791 int tracing_open_generic(struct inode *inode, struct file *filp)
4792 {
4793 	int ret;
4794 
4795 	ret = tracing_check_open_get_tr(NULL);
4796 	if (ret)
4797 		return ret;
4798 
4799 	filp->private_data = inode->i_private;
4800 	return 0;
4801 }
4802 
4803 bool tracing_is_disabled(void)
4804 {
4805 	return (tracing_disabled) ? true: false;
4806 }
4807 
4808 /*
4809  * Open and update trace_array ref count.
4810  * Must have the current trace_array passed to it.
4811  */
4812 int tracing_open_generic_tr(struct inode *inode, struct file *filp)
4813 {
4814 	struct trace_array *tr = inode->i_private;
4815 	int ret;
4816 
4817 	ret = tracing_check_open_get_tr(tr);
4818 	if (ret)
4819 		return ret;
4820 
4821 	filp->private_data = inode->i_private;
4822 
4823 	return 0;
4824 }
4825 
4826 static int tracing_release(struct inode *inode, struct file *file)
4827 {
4828 	struct trace_array *tr = inode->i_private;
4829 	struct seq_file *m = file->private_data;
4830 	struct trace_iterator *iter;
4831 	int cpu;
4832 
4833 	if (!(file->f_mode & FMODE_READ)) {
4834 		trace_array_put(tr);
4835 		return 0;
4836 	}
4837 
4838 	/* Writes do not use seq_file */
4839 	iter = m->private;
4840 	mutex_lock(&trace_types_lock);
4841 
4842 	for_each_tracing_cpu(cpu) {
4843 		if (iter->buffer_iter[cpu])
4844 			ring_buffer_read_finish(iter->buffer_iter[cpu]);
4845 	}
4846 
4847 	if (iter->trace && iter->trace->close)
4848 		iter->trace->close(iter);
4849 
4850 	if (!iter->snapshot && tr->stop_count)
4851 		/* reenable tracing if it was previously enabled */
4852 		tracing_start_tr(tr);
4853 
4854 	__trace_array_put(tr);
4855 
4856 	mutex_unlock(&trace_types_lock);
4857 
4858 	mutex_destroy(&iter->mutex);
4859 	free_cpumask_var(iter->started);
4860 	kfree(iter->fmt);
4861 	kfree(iter->temp);
4862 	kfree(iter->trace);
4863 	kfree(iter->buffer_iter);
4864 	seq_release_private(inode, file);
4865 
4866 	return 0;
4867 }
4868 
4869 static int tracing_release_generic_tr(struct inode *inode, struct file *file)
4870 {
4871 	struct trace_array *tr = inode->i_private;
4872 
4873 	trace_array_put(tr);
4874 	return 0;
4875 }
4876 
4877 static int tracing_single_release_tr(struct inode *inode, struct file *file)
4878 {
4879 	struct trace_array *tr = inode->i_private;
4880 
4881 	trace_array_put(tr);
4882 
4883 	return single_release(inode, file);
4884 }
4885 
4886 static int tracing_open(struct inode *inode, struct file *file)
4887 {
4888 	struct trace_array *tr = inode->i_private;
4889 	struct trace_iterator *iter;
4890 	int ret;
4891 
4892 	ret = tracing_check_open_get_tr(tr);
4893 	if (ret)
4894 		return ret;
4895 
4896 	/* If this file was open for write, then erase contents */
4897 	if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC)) {
4898 		int cpu = tracing_get_cpu(inode);
4899 		struct array_buffer *trace_buf = &tr->array_buffer;
4900 
4901 #ifdef CONFIG_TRACER_MAX_TRACE
4902 		if (tr->current_trace->print_max)
4903 			trace_buf = &tr->max_buffer;
4904 #endif
4905 
4906 		if (cpu == RING_BUFFER_ALL_CPUS)
4907 			tracing_reset_online_cpus(trace_buf);
4908 		else
4909 			tracing_reset_cpu(trace_buf, cpu);
4910 	}
4911 
4912 	if (file->f_mode & FMODE_READ) {
4913 		iter = __tracing_open(inode, file, false);
4914 		if (IS_ERR(iter))
4915 			ret = PTR_ERR(iter);
4916 		else if (tr->trace_flags & TRACE_ITER_LATENCY_FMT)
4917 			iter->iter_flags |= TRACE_FILE_LAT_FMT;
4918 	}
4919 
4920 	if (ret < 0)
4921 		trace_array_put(tr);
4922 
4923 	return ret;
4924 }
4925 
4926 /*
4927  * Some tracers are not suitable for instance buffers.
4928  * A tracer is always available for the global array (toplevel)
4929  * or if it explicitly states that it is.
4930  */
4931 static bool
4932 trace_ok_for_array(struct tracer *t, struct trace_array *tr)
4933 {
4934 	return (tr->flags & TRACE_ARRAY_FL_GLOBAL) || t->allow_instances;
4935 }
4936 
4937 /* Find the next tracer that this trace array may use */
4938 static struct tracer *
4939 get_tracer_for_array(struct trace_array *tr, struct tracer *t)
4940 {
4941 	while (t && !trace_ok_for_array(t, tr))
4942 		t = t->next;
4943 
4944 	return t;
4945 }
4946 
4947 static void *
4948 t_next(struct seq_file *m, void *v, loff_t *pos)
4949 {
4950 	struct trace_array *tr = m->private;
4951 	struct tracer *t = v;
4952 
4953 	(*pos)++;
4954 
4955 	if (t)
4956 		t = get_tracer_for_array(tr, t->next);
4957 
4958 	return t;
4959 }
4960 
4961 static void *t_start(struct seq_file *m, loff_t *pos)
4962 {
4963 	struct trace_array *tr = m->private;
4964 	struct tracer *t;
4965 	loff_t l = 0;
4966 
4967 	mutex_lock(&trace_types_lock);
4968 
4969 	t = get_tracer_for_array(tr, trace_types);
4970 	for (; t && l < *pos; t = t_next(m, t, &l))
4971 			;
4972 
4973 	return t;
4974 }
4975 
4976 static void t_stop(struct seq_file *m, void *p)
4977 {
4978 	mutex_unlock(&trace_types_lock);
4979 }
4980 
4981 static int t_show(struct seq_file *m, void *v)
4982 {
4983 	struct tracer *t = v;
4984 
4985 	if (!t)
4986 		return 0;
4987 
4988 	seq_puts(m, t->name);
4989 	if (t->next)
4990 		seq_putc(m, ' ');
4991 	else
4992 		seq_putc(m, '\n');
4993 
4994 	return 0;
4995 }
4996 
4997 static const struct seq_operations show_traces_seq_ops = {
4998 	.start		= t_start,
4999 	.next		= t_next,
5000 	.stop		= t_stop,
5001 	.show		= t_show,
5002 };
5003 
5004 static int show_traces_open(struct inode *inode, struct file *file)
5005 {
5006 	struct trace_array *tr = inode->i_private;
5007 	struct seq_file *m;
5008 	int ret;
5009 
5010 	ret = tracing_check_open_get_tr(tr);
5011 	if (ret)
5012 		return ret;
5013 
5014 	ret = seq_open(file, &show_traces_seq_ops);
5015 	if (ret) {
5016 		trace_array_put(tr);
5017 		return ret;
5018 	}
5019 
5020 	m = file->private_data;
5021 	m->private = tr;
5022 
5023 	return 0;
5024 }
5025 
5026 static int show_traces_release(struct inode *inode, struct file *file)
5027 {
5028 	struct trace_array *tr = inode->i_private;
5029 
5030 	trace_array_put(tr);
5031 	return seq_release(inode, file);
5032 }
5033 
5034 static ssize_t
5035 tracing_write_stub(struct file *filp, const char __user *ubuf,
5036 		   size_t count, loff_t *ppos)
5037 {
5038 	return count;
5039 }
5040 
5041 loff_t tracing_lseek(struct file *file, loff_t offset, int whence)
5042 {
5043 	int ret;
5044 
5045 	if (file->f_mode & FMODE_READ)
5046 		ret = seq_lseek(file, offset, whence);
5047 	else
5048 		file->f_pos = ret = 0;
5049 
5050 	return ret;
5051 }
5052 
5053 static const struct file_operations tracing_fops = {
5054 	.open		= tracing_open,
5055 	.read		= seq_read,
5056 	.write		= tracing_write_stub,
5057 	.llseek		= tracing_lseek,
5058 	.release	= tracing_release,
5059 };
5060 
5061 static const struct file_operations show_traces_fops = {
5062 	.open		= show_traces_open,
5063 	.read		= seq_read,
5064 	.llseek		= seq_lseek,
5065 	.release	= show_traces_release,
5066 };
5067 
5068 static ssize_t
5069 tracing_cpumask_read(struct file *filp, char __user *ubuf,
5070 		     size_t count, loff_t *ppos)
5071 {
5072 	struct trace_array *tr = file_inode(filp)->i_private;
5073 	char *mask_str;
5074 	int len;
5075 
5076 	len = snprintf(NULL, 0, "%*pb\n",
5077 		       cpumask_pr_args(tr->tracing_cpumask)) + 1;
5078 	mask_str = kmalloc(len, GFP_KERNEL);
5079 	if (!mask_str)
5080 		return -ENOMEM;
5081 
5082 	len = snprintf(mask_str, len, "%*pb\n",
5083 		       cpumask_pr_args(tr->tracing_cpumask));
5084 	if (len >= count) {
5085 		count = -EINVAL;
5086 		goto out_err;
5087 	}
5088 	count = simple_read_from_buffer(ubuf, count, ppos, mask_str, len);
5089 
5090 out_err:
5091 	kfree(mask_str);
5092 
5093 	return count;
5094 }
5095 
5096 int tracing_set_cpumask(struct trace_array *tr,
5097 			cpumask_var_t tracing_cpumask_new)
5098 {
5099 	int cpu;
5100 
5101 	if (!tr)
5102 		return -EINVAL;
5103 
5104 	local_irq_disable();
5105 	arch_spin_lock(&tr->max_lock);
5106 	for_each_tracing_cpu(cpu) {
5107 		/*
5108 		 * Increase/decrease the disabled counter if we are
5109 		 * about to flip a bit in the cpumask:
5110 		 */
5111 		if (cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
5112 				!cpumask_test_cpu(cpu, tracing_cpumask_new)) {
5113 			atomic_inc(&per_cpu_ptr(tr->array_buffer.data, cpu)->disabled);
5114 			ring_buffer_record_disable_cpu(tr->array_buffer.buffer, cpu);
5115 		}
5116 		if (!cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
5117 				cpumask_test_cpu(cpu, tracing_cpumask_new)) {
5118 			atomic_dec(&per_cpu_ptr(tr->array_buffer.data, cpu)->disabled);
5119 			ring_buffer_record_enable_cpu(tr->array_buffer.buffer, cpu);
5120 		}
5121 	}
5122 	arch_spin_unlock(&tr->max_lock);
5123 	local_irq_enable();
5124 
5125 	cpumask_copy(tr->tracing_cpumask, tracing_cpumask_new);
5126 
5127 	return 0;
5128 }
5129 
5130 static ssize_t
5131 tracing_cpumask_write(struct file *filp, const char __user *ubuf,
5132 		      size_t count, loff_t *ppos)
5133 {
5134 	struct trace_array *tr = file_inode(filp)->i_private;
5135 	cpumask_var_t tracing_cpumask_new;
5136 	int err;
5137 
5138 	if (!zalloc_cpumask_var(&tracing_cpumask_new, GFP_KERNEL))
5139 		return -ENOMEM;
5140 
5141 	err = cpumask_parse_user(ubuf, count, tracing_cpumask_new);
5142 	if (err)
5143 		goto err_free;
5144 
5145 	err = tracing_set_cpumask(tr, tracing_cpumask_new);
5146 	if (err)
5147 		goto err_free;
5148 
5149 	free_cpumask_var(tracing_cpumask_new);
5150 
5151 	return count;
5152 
5153 err_free:
5154 	free_cpumask_var(tracing_cpumask_new);
5155 
5156 	return err;
5157 }
5158 
5159 static const struct file_operations tracing_cpumask_fops = {
5160 	.open		= tracing_open_generic_tr,
5161 	.read		= tracing_cpumask_read,
5162 	.write		= tracing_cpumask_write,
5163 	.release	= tracing_release_generic_tr,
5164 	.llseek		= generic_file_llseek,
5165 };
5166 
5167 static int tracing_trace_options_show(struct seq_file *m, void *v)
5168 {
5169 	struct tracer_opt *trace_opts;
5170 	struct trace_array *tr = m->private;
5171 	u32 tracer_flags;
5172 	int i;
5173 
5174 	mutex_lock(&trace_types_lock);
5175 	tracer_flags = tr->current_trace->flags->val;
5176 	trace_opts = tr->current_trace->flags->opts;
5177 
5178 	for (i = 0; trace_options[i]; i++) {
5179 		if (tr->trace_flags & (1 << i))
5180 			seq_printf(m, "%s\n", trace_options[i]);
5181 		else
5182 			seq_printf(m, "no%s\n", trace_options[i]);
5183 	}
5184 
5185 	for (i = 0; trace_opts[i].name; i++) {
5186 		if (tracer_flags & trace_opts[i].bit)
5187 			seq_printf(m, "%s\n", trace_opts[i].name);
5188 		else
5189 			seq_printf(m, "no%s\n", trace_opts[i].name);
5190 	}
5191 	mutex_unlock(&trace_types_lock);
5192 
5193 	return 0;
5194 }
5195 
5196 static int __set_tracer_option(struct trace_array *tr,
5197 			       struct tracer_flags *tracer_flags,
5198 			       struct tracer_opt *opts, int neg)
5199 {
5200 	struct tracer *trace = tracer_flags->trace;
5201 	int ret;
5202 
5203 	ret = trace->set_flag(tr, tracer_flags->val, opts->bit, !neg);
5204 	if (ret)
5205 		return ret;
5206 
5207 	if (neg)
5208 		tracer_flags->val &= ~opts->bit;
5209 	else
5210 		tracer_flags->val |= opts->bit;
5211 	return 0;
5212 }
5213 
5214 /* Try to assign a tracer specific option */
5215 static int set_tracer_option(struct trace_array *tr, char *cmp, int neg)
5216 {
5217 	struct tracer *trace = tr->current_trace;
5218 	struct tracer_flags *tracer_flags = trace->flags;
5219 	struct tracer_opt *opts = NULL;
5220 	int i;
5221 
5222 	for (i = 0; tracer_flags->opts[i].name; i++) {
5223 		opts = &tracer_flags->opts[i];
5224 
5225 		if (strcmp(cmp, opts->name) == 0)
5226 			return __set_tracer_option(tr, trace->flags, opts, neg);
5227 	}
5228 
5229 	return -EINVAL;
5230 }
5231 
5232 /* Some tracers require overwrite to stay enabled */
5233 int trace_keep_overwrite(struct tracer *tracer, u32 mask, int set)
5234 {
5235 	if (tracer->enabled && (mask & TRACE_ITER_OVERWRITE) && !set)
5236 		return -1;
5237 
5238 	return 0;
5239 }
5240 
5241 int set_tracer_flag(struct trace_array *tr, unsigned int mask, int enabled)
5242 {
5243 	int *map;
5244 
5245 	if ((mask == TRACE_ITER_RECORD_TGID) ||
5246 	    (mask == TRACE_ITER_RECORD_CMD))
5247 		lockdep_assert_held(&event_mutex);
5248 
5249 	/* do nothing if flag is already set */
5250 	if (!!(tr->trace_flags & mask) == !!enabled)
5251 		return 0;
5252 
5253 	/* Give the tracer a chance to approve the change */
5254 	if (tr->current_trace->flag_changed)
5255 		if (tr->current_trace->flag_changed(tr, mask, !!enabled))
5256 			return -EINVAL;
5257 
5258 	if (enabled)
5259 		tr->trace_flags |= mask;
5260 	else
5261 		tr->trace_flags &= ~mask;
5262 
5263 	if (mask == TRACE_ITER_RECORD_CMD)
5264 		trace_event_enable_cmd_record(enabled);
5265 
5266 	if (mask == TRACE_ITER_RECORD_TGID) {
5267 		if (!tgid_map) {
5268 			tgid_map_max = pid_max;
5269 			map = kvcalloc(tgid_map_max + 1, sizeof(*tgid_map),
5270 				       GFP_KERNEL);
5271 
5272 			/*
5273 			 * Pairs with smp_load_acquire() in
5274 			 * trace_find_tgid_ptr() to ensure that if it observes
5275 			 * the tgid_map we just allocated then it also observes
5276 			 * the corresponding tgid_map_max value.
5277 			 */
5278 			smp_store_release(&tgid_map, map);
5279 		}
5280 		if (!tgid_map) {
5281 			tr->trace_flags &= ~TRACE_ITER_RECORD_TGID;
5282 			return -ENOMEM;
5283 		}
5284 
5285 		trace_event_enable_tgid_record(enabled);
5286 	}
5287 
5288 	if (mask == TRACE_ITER_EVENT_FORK)
5289 		trace_event_follow_fork(tr, enabled);
5290 
5291 	if (mask == TRACE_ITER_FUNC_FORK)
5292 		ftrace_pid_follow_fork(tr, enabled);
5293 
5294 	if (mask == TRACE_ITER_OVERWRITE) {
5295 		ring_buffer_change_overwrite(tr->array_buffer.buffer, enabled);
5296 #ifdef CONFIG_TRACER_MAX_TRACE
5297 		ring_buffer_change_overwrite(tr->max_buffer.buffer, enabled);
5298 #endif
5299 	}
5300 
5301 	if (mask == TRACE_ITER_PRINTK) {
5302 		trace_printk_start_stop_comm(enabled);
5303 		trace_printk_control(enabled);
5304 	}
5305 
5306 	return 0;
5307 }
5308 
5309 int trace_set_options(struct trace_array *tr, char *option)
5310 {
5311 	char *cmp;
5312 	int neg = 0;
5313 	int ret;
5314 	size_t orig_len = strlen(option);
5315 	int len;
5316 
5317 	cmp = strstrip(option);
5318 
5319 	len = str_has_prefix(cmp, "no");
5320 	if (len)
5321 		neg = 1;
5322 
5323 	cmp += len;
5324 
5325 	mutex_lock(&event_mutex);
5326 	mutex_lock(&trace_types_lock);
5327 
5328 	ret = match_string(trace_options, -1, cmp);
5329 	/* If no option could be set, test the specific tracer options */
5330 	if (ret < 0)
5331 		ret = set_tracer_option(tr, cmp, neg);
5332 	else
5333 		ret = set_tracer_flag(tr, 1 << ret, !neg);
5334 
5335 	mutex_unlock(&trace_types_lock);
5336 	mutex_unlock(&event_mutex);
5337 
5338 	/*
5339 	 * If the first trailing whitespace is replaced with '\0' by strstrip,
5340 	 * turn it back into a space.
5341 	 */
5342 	if (orig_len > strlen(option))
5343 		option[strlen(option)] = ' ';
5344 
5345 	return ret;
5346 }
5347 
5348 static void __init apply_trace_boot_options(void)
5349 {
5350 	char *buf = trace_boot_options_buf;
5351 	char *option;
5352 
5353 	while (true) {
5354 		option = strsep(&buf, ",");
5355 
5356 		if (!option)
5357 			break;
5358 
5359 		if (*option)
5360 			trace_set_options(&global_trace, option);
5361 
5362 		/* Put back the comma to allow this to be called again */
5363 		if (buf)
5364 			*(buf - 1) = ',';
5365 	}
5366 }
5367 
5368 static ssize_t
5369 tracing_trace_options_write(struct file *filp, const char __user *ubuf,
5370 			size_t cnt, loff_t *ppos)
5371 {
5372 	struct seq_file *m = filp->private_data;
5373 	struct trace_array *tr = m->private;
5374 	char buf[64];
5375 	int ret;
5376 
5377 	if (cnt >= sizeof(buf))
5378 		return -EINVAL;
5379 
5380 	if (copy_from_user(buf, ubuf, cnt))
5381 		return -EFAULT;
5382 
5383 	buf[cnt] = 0;
5384 
5385 	ret = trace_set_options(tr, buf);
5386 	if (ret < 0)
5387 		return ret;
5388 
5389 	*ppos += cnt;
5390 
5391 	return cnt;
5392 }
5393 
5394 static int tracing_trace_options_open(struct inode *inode, struct file *file)
5395 {
5396 	struct trace_array *tr = inode->i_private;
5397 	int ret;
5398 
5399 	ret = tracing_check_open_get_tr(tr);
5400 	if (ret)
5401 		return ret;
5402 
5403 	ret = single_open(file, tracing_trace_options_show, inode->i_private);
5404 	if (ret < 0)
5405 		trace_array_put(tr);
5406 
5407 	return ret;
5408 }
5409 
5410 static const struct file_operations tracing_iter_fops = {
5411 	.open		= tracing_trace_options_open,
5412 	.read		= seq_read,
5413 	.llseek		= seq_lseek,
5414 	.release	= tracing_single_release_tr,
5415 	.write		= tracing_trace_options_write,
5416 };
5417 
5418 static const char readme_msg[] =
5419 	"tracing mini-HOWTO:\n\n"
5420 	"# echo 0 > tracing_on : quick way to disable tracing\n"
5421 	"# echo 1 > tracing_on : quick way to re-enable tracing\n\n"
5422 	" Important files:\n"
5423 	"  trace\t\t\t- The static contents of the buffer\n"
5424 	"\t\t\t  To clear the buffer write into this file: echo > trace\n"
5425 	"  trace_pipe\t\t- A consuming read to see the contents of the buffer\n"
5426 	"  current_tracer\t- function and latency tracers\n"
5427 	"  available_tracers\t- list of configured tracers for current_tracer\n"
5428 	"  error_log\t- error log for failed commands (that support it)\n"
5429 	"  buffer_size_kb\t- view and modify size of per cpu buffer\n"
5430 	"  buffer_total_size_kb  - view total size of all cpu buffers\n\n"
5431 	"  trace_clock\t\t-change the clock used to order events\n"
5432 	"       local:   Per cpu clock but may not be synced across CPUs\n"
5433 	"      global:   Synced across CPUs but slows tracing down.\n"
5434 	"     counter:   Not a clock, but just an increment\n"
5435 	"      uptime:   Jiffy counter from time of boot\n"
5436 	"        perf:   Same clock that perf events use\n"
5437 #ifdef CONFIG_X86_64
5438 	"     x86-tsc:   TSC cycle counter\n"
5439 #endif
5440 	"\n  timestamp_mode\t-view the mode used to timestamp events\n"
5441 	"       delta:   Delta difference against a buffer-wide timestamp\n"
5442 	"    absolute:   Absolute (standalone) timestamp\n"
5443 	"\n  trace_marker\t\t- Writes into this file writes into the kernel buffer\n"
5444 	"\n  trace_marker_raw\t\t- Writes into this file writes binary data into the kernel buffer\n"
5445 	"  tracing_cpumask\t- Limit which CPUs to trace\n"
5446 	"  instances\t\t- Make sub-buffers with: mkdir instances/foo\n"
5447 	"\t\t\t  Remove sub-buffer with rmdir\n"
5448 	"  trace_options\t\t- Set format or modify how tracing happens\n"
5449 	"\t\t\t  Disable an option by prefixing 'no' to the\n"
5450 	"\t\t\t  option name\n"
5451 	"  saved_cmdlines_size\t- echo command number in here to store comm-pid list\n"
5452 #ifdef CONFIG_DYNAMIC_FTRACE
5453 	"\n  available_filter_functions - list of functions that can be filtered on\n"
5454 	"  set_ftrace_filter\t- echo function name in here to only trace these\n"
5455 	"\t\t\t  functions\n"
5456 	"\t     accepts: func_full_name or glob-matching-pattern\n"
5457 	"\t     modules: Can select a group via module\n"
5458 	"\t      Format: :mod:<module-name>\n"
5459 	"\t     example: echo :mod:ext3 > set_ftrace_filter\n"
5460 	"\t    triggers: a command to perform when function is hit\n"
5461 	"\t      Format: <function>:<trigger>[:count]\n"
5462 	"\t     trigger: traceon, traceoff\n"
5463 	"\t\t      enable_event:<system>:<event>\n"
5464 	"\t\t      disable_event:<system>:<event>\n"
5465 #ifdef CONFIG_STACKTRACE
5466 	"\t\t      stacktrace\n"
5467 #endif
5468 #ifdef CONFIG_TRACER_SNAPSHOT
5469 	"\t\t      snapshot\n"
5470 #endif
5471 	"\t\t      dump\n"
5472 	"\t\t      cpudump\n"
5473 	"\t     example: echo do_fault:traceoff > set_ftrace_filter\n"
5474 	"\t              echo do_trap:traceoff:3 > set_ftrace_filter\n"
5475 	"\t     The first one will disable tracing every time do_fault is hit\n"
5476 	"\t     The second will disable tracing at most 3 times when do_trap is hit\n"
5477 	"\t       The first time do trap is hit and it disables tracing, the\n"
5478 	"\t       counter will decrement to 2. If tracing is already disabled,\n"
5479 	"\t       the counter will not decrement. It only decrements when the\n"
5480 	"\t       trigger did work\n"
5481 	"\t     To remove trigger without count:\n"
5482 	"\t       echo '!<function>:<trigger> > set_ftrace_filter\n"
5483 	"\t     To remove trigger with a count:\n"
5484 	"\t       echo '!<function>:<trigger>:0 > set_ftrace_filter\n"
5485 	"  set_ftrace_notrace\t- echo function name in here to never trace.\n"
5486 	"\t    accepts: func_full_name, *func_end, func_begin*, *func_middle*\n"
5487 	"\t    modules: Can select a group via module command :mod:\n"
5488 	"\t    Does not accept triggers\n"
5489 #endif /* CONFIG_DYNAMIC_FTRACE */
5490 #ifdef CONFIG_FUNCTION_TRACER
5491 	"  set_ftrace_pid\t- Write pid(s) to only function trace those pids\n"
5492 	"\t\t    (function)\n"
5493 	"  set_ftrace_notrace_pid\t- Write pid(s) to not function trace those pids\n"
5494 	"\t\t    (function)\n"
5495 #endif
5496 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
5497 	"  set_graph_function\t- Trace the nested calls of a function (function_graph)\n"
5498 	"  set_graph_notrace\t- Do not trace the nested calls of a function (function_graph)\n"
5499 	"  max_graph_depth\t- Trace a limited depth of nested calls (0 is unlimited)\n"
5500 #endif
5501 #ifdef CONFIG_TRACER_SNAPSHOT
5502 	"\n  snapshot\t\t- Like 'trace' but shows the content of the static\n"
5503 	"\t\t\t  snapshot buffer. Read the contents for more\n"
5504 	"\t\t\t  information\n"
5505 #endif
5506 #ifdef CONFIG_STACK_TRACER
5507 	"  stack_trace\t\t- Shows the max stack trace when active\n"
5508 	"  stack_max_size\t- Shows current max stack size that was traced\n"
5509 	"\t\t\t  Write into this file to reset the max size (trigger a\n"
5510 	"\t\t\t  new trace)\n"
5511 #ifdef CONFIG_DYNAMIC_FTRACE
5512 	"  stack_trace_filter\t- Like set_ftrace_filter but limits what stack_trace\n"
5513 	"\t\t\t  traces\n"
5514 #endif
5515 #endif /* CONFIG_STACK_TRACER */
5516 #ifdef CONFIG_DYNAMIC_EVENTS
5517 	"  dynamic_events\t\t- Create/append/remove/show the generic dynamic events\n"
5518 	"\t\t\t  Write into this file to define/undefine new trace events.\n"
5519 #endif
5520 #ifdef CONFIG_KPROBE_EVENTS
5521 	"  kprobe_events\t\t- Create/append/remove/show the kernel dynamic events\n"
5522 	"\t\t\t  Write into this file to define/undefine new trace events.\n"
5523 #endif
5524 #ifdef CONFIG_UPROBE_EVENTS
5525 	"  uprobe_events\t\t- Create/append/remove/show the userspace dynamic events\n"
5526 	"\t\t\t  Write into this file to define/undefine new trace events.\n"
5527 #endif
5528 #if defined(CONFIG_KPROBE_EVENTS) || defined(CONFIG_UPROBE_EVENTS)
5529 	"\t  accepts: event-definitions (one definition per line)\n"
5530 	"\t   Format: p[:[<group>/]<event>] <place> [<args>]\n"
5531 	"\t           r[maxactive][:[<group>/]<event>] <place> [<args>]\n"
5532 #ifdef CONFIG_HIST_TRIGGERS
5533 	"\t           s:[synthetic/]<event> <field> [<field>]\n"
5534 #endif
5535 	"\t           e[:[<group>/]<event>] <attached-group>.<attached-event> [<args>]\n"
5536 	"\t           -:[<group>/]<event>\n"
5537 #ifdef CONFIG_KPROBE_EVENTS
5538 	"\t    place: [<module>:]<symbol>[+<offset>]|<memaddr>\n"
5539   "place (kretprobe): [<module>:]<symbol>[+<offset>]%return|<memaddr>\n"
5540 #endif
5541 #ifdef CONFIG_UPROBE_EVENTS
5542   "   place (uprobe): <path>:<offset>[%return][(ref_ctr_offset)]\n"
5543 #endif
5544 	"\t     args: <name>=fetcharg[:type]\n"
5545 	"\t fetcharg: (%<register>|$<efield>), @<address>, @<symbol>[+|-<offset>],\n"
5546 #ifdef CONFIG_HAVE_FUNCTION_ARG_ACCESS_API
5547 	"\t           $stack<index>, $stack, $retval, $comm, $arg<N>,\n"
5548 #else
5549 	"\t           $stack<index>, $stack, $retval, $comm,\n"
5550 #endif
5551 	"\t           +|-[u]<offset>(<fetcharg>), \\imm-value, \\\"imm-string\"\n"
5552 	"\t     type: s8/16/32/64, u8/16/32/64, x8/16/32/64, string, symbol,\n"
5553 	"\t           b<bit-width>@<bit-offset>/<container-size>, ustring,\n"
5554 	"\t           <type>\\[<array-size>\\]\n"
5555 #ifdef CONFIG_HIST_TRIGGERS
5556 	"\t    field: <stype> <name>;\n"
5557 	"\t    stype: u8/u16/u32/u64, s8/s16/s32/s64, pid_t,\n"
5558 	"\t           [unsigned] char/int/long\n"
5559 #endif
5560 	"\t    efield: For event probes ('e' types), the field is on of the fields\n"
5561 	"\t            of the <attached-group>/<attached-event>.\n"
5562 #endif
5563 	"  events/\t\t- Directory containing all trace event subsystems:\n"
5564 	"      enable\t\t- Write 0/1 to enable/disable tracing of all events\n"
5565 	"  events/<system>/\t- Directory containing all trace events for <system>:\n"
5566 	"      enable\t\t- Write 0/1 to enable/disable tracing of all <system>\n"
5567 	"\t\t\t  events\n"
5568 	"      filter\t\t- If set, only events passing filter are traced\n"
5569 	"  events/<system>/<event>/\t- Directory containing control files for\n"
5570 	"\t\t\t  <event>:\n"
5571 	"      enable\t\t- Write 0/1 to enable/disable tracing of <event>\n"
5572 	"      filter\t\t- If set, only events passing filter are traced\n"
5573 	"      trigger\t\t- If set, a command to perform when event is hit\n"
5574 	"\t    Format: <trigger>[:count][if <filter>]\n"
5575 	"\t   trigger: traceon, traceoff\n"
5576 	"\t            enable_event:<system>:<event>\n"
5577 	"\t            disable_event:<system>:<event>\n"
5578 #ifdef CONFIG_HIST_TRIGGERS
5579 	"\t            enable_hist:<system>:<event>\n"
5580 	"\t            disable_hist:<system>:<event>\n"
5581 #endif
5582 #ifdef CONFIG_STACKTRACE
5583 	"\t\t    stacktrace\n"
5584 #endif
5585 #ifdef CONFIG_TRACER_SNAPSHOT
5586 	"\t\t    snapshot\n"
5587 #endif
5588 #ifdef CONFIG_HIST_TRIGGERS
5589 	"\t\t    hist (see below)\n"
5590 #endif
5591 	"\t   example: echo traceoff > events/block/block_unplug/trigger\n"
5592 	"\t            echo traceoff:3 > events/block/block_unplug/trigger\n"
5593 	"\t            echo 'enable_event:kmem:kmalloc:3 if nr_rq > 1' > \\\n"
5594 	"\t                  events/block/block_unplug/trigger\n"
5595 	"\t   The first disables tracing every time block_unplug is hit.\n"
5596 	"\t   The second disables tracing the first 3 times block_unplug is hit.\n"
5597 	"\t   The third enables the kmalloc event the first 3 times block_unplug\n"
5598 	"\t     is hit and has value of greater than 1 for the 'nr_rq' event field.\n"
5599 	"\t   Like function triggers, the counter is only decremented if it\n"
5600 	"\t    enabled or disabled tracing.\n"
5601 	"\t   To remove a trigger without a count:\n"
5602 	"\t     echo '!<trigger> > <system>/<event>/trigger\n"
5603 	"\t   To remove a trigger with a count:\n"
5604 	"\t     echo '!<trigger>:0 > <system>/<event>/trigger\n"
5605 	"\t   Filters can be ignored when removing a trigger.\n"
5606 #ifdef CONFIG_HIST_TRIGGERS
5607 	"      hist trigger\t- If set, event hits are aggregated into a hash table\n"
5608 	"\t    Format: hist:keys=<field1[,field2,...]>\n"
5609 	"\t            [:<var1>=<field|var_ref|numeric_literal>[,<var2>=...]]\n"
5610 	"\t            [:values=<field1[,field2,...]>]\n"
5611 	"\t            [:sort=<field1[,field2,...]>]\n"
5612 	"\t            [:size=#entries]\n"
5613 	"\t            [:pause][:continue][:clear]\n"
5614 	"\t            [:name=histname1]\n"
5615 	"\t            [:<handler>.<action>]\n"
5616 	"\t            [if <filter>]\n\n"
5617 	"\t    Note, special fields can be used as well:\n"
5618 	"\t            common_timestamp - to record current timestamp\n"
5619 	"\t            common_cpu - to record the CPU the event happened on\n"
5620 	"\n"
5621 	"\t    A hist trigger variable can be:\n"
5622 	"\t        - a reference to a field e.g. x=current_timestamp,\n"
5623 	"\t        - a reference to another variable e.g. y=$x,\n"
5624 	"\t        - a numeric literal: e.g. ms_per_sec=1000,\n"
5625 	"\t        - an arithmetic expression: e.g. time_secs=current_timestamp/1000\n"
5626 	"\n"
5627 	"\t    hist trigger aritmethic expressions support addition(+), subtraction(-),\n"
5628 	"\t    multiplication(*) and division(/) operators. An operand can be either a\n"
5629 	"\t    variable reference, field or numeric literal.\n"
5630 	"\n"
5631 	"\t    When a matching event is hit, an entry is added to a hash\n"
5632 	"\t    table using the key(s) and value(s) named, and the value of a\n"
5633 	"\t    sum called 'hitcount' is incremented.  Keys and values\n"
5634 	"\t    correspond to fields in the event's format description.  Keys\n"
5635 	"\t    can be any field, or the special string 'stacktrace'.\n"
5636 	"\t    Compound keys consisting of up to two fields can be specified\n"
5637 	"\t    by the 'keys' keyword.  Values must correspond to numeric\n"
5638 	"\t    fields.  Sort keys consisting of up to two fields can be\n"
5639 	"\t    specified using the 'sort' keyword.  The sort direction can\n"
5640 	"\t    be modified by appending '.descending' or '.ascending' to a\n"
5641 	"\t    sort field.  The 'size' parameter can be used to specify more\n"
5642 	"\t    or fewer than the default 2048 entries for the hashtable size.\n"
5643 	"\t    If a hist trigger is given a name using the 'name' parameter,\n"
5644 	"\t    its histogram data will be shared with other triggers of the\n"
5645 	"\t    same name, and trigger hits will update this common data.\n\n"
5646 	"\t    Reading the 'hist' file for the event will dump the hash\n"
5647 	"\t    table in its entirety to stdout.  If there are multiple hist\n"
5648 	"\t    triggers attached to an event, there will be a table for each\n"
5649 	"\t    trigger in the output.  The table displayed for a named\n"
5650 	"\t    trigger will be the same as any other instance having the\n"
5651 	"\t    same name.  The default format used to display a given field\n"
5652 	"\t    can be modified by appending any of the following modifiers\n"
5653 	"\t    to the field name, as applicable:\n\n"
5654 	"\t            .hex        display a number as a hex value\n"
5655 	"\t            .sym        display an address as a symbol\n"
5656 	"\t            .sym-offset display an address as a symbol and offset\n"
5657 	"\t            .execname   display a common_pid as a program name\n"
5658 	"\t            .syscall    display a syscall id as a syscall name\n"
5659 	"\t            .log2       display log2 value rather than raw number\n"
5660 	"\t            .buckets=size  display values in groups of size rather than raw number\n"
5661 	"\t            .usecs      display a common_timestamp in microseconds\n\n"
5662 	"\t    The 'pause' parameter can be used to pause an existing hist\n"
5663 	"\t    trigger or to start a hist trigger but not log any events\n"
5664 	"\t    until told to do so.  'continue' can be used to start or\n"
5665 	"\t    restart a paused hist trigger.\n\n"
5666 	"\t    The 'clear' parameter will clear the contents of a running\n"
5667 	"\t    hist trigger and leave its current paused/active state\n"
5668 	"\t    unchanged.\n\n"
5669 	"\t    The enable_hist and disable_hist triggers can be used to\n"
5670 	"\t    have one event conditionally start and stop another event's\n"
5671 	"\t    already-attached hist trigger.  The syntax is analogous to\n"
5672 	"\t    the enable_event and disable_event triggers.\n\n"
5673 	"\t    Hist trigger handlers and actions are executed whenever a\n"
5674 	"\t    a histogram entry is added or updated.  They take the form:\n\n"
5675 	"\t        <handler>.<action>\n\n"
5676 	"\t    The available handlers are:\n\n"
5677 	"\t        onmatch(matching.event)  - invoke on addition or update\n"
5678 	"\t        onmax(var)               - invoke if var exceeds current max\n"
5679 	"\t        onchange(var)            - invoke action if var changes\n\n"
5680 	"\t    The available actions are:\n\n"
5681 	"\t        trace(<synthetic_event>,param list)  - generate synthetic event\n"
5682 	"\t        save(field,...)                      - save current event fields\n"
5683 #ifdef CONFIG_TRACER_SNAPSHOT
5684 	"\t        snapshot()                           - snapshot the trace buffer\n\n"
5685 #endif
5686 #ifdef CONFIG_SYNTH_EVENTS
5687 	"  events/synthetic_events\t- Create/append/remove/show synthetic events\n"
5688 	"\t  Write into this file to define/undefine new synthetic events.\n"
5689 	"\t     example: echo 'myevent u64 lat; char name[]' >> synthetic_events\n"
5690 #endif
5691 #endif
5692 ;
5693 
5694 static ssize_t
5695 tracing_readme_read(struct file *filp, char __user *ubuf,
5696 		       size_t cnt, loff_t *ppos)
5697 {
5698 	return simple_read_from_buffer(ubuf, cnt, ppos,
5699 					readme_msg, strlen(readme_msg));
5700 }
5701 
5702 static const struct file_operations tracing_readme_fops = {
5703 	.open		= tracing_open_generic,
5704 	.read		= tracing_readme_read,
5705 	.llseek		= generic_file_llseek,
5706 };
5707 
5708 static void *saved_tgids_next(struct seq_file *m, void *v, loff_t *pos)
5709 {
5710 	int pid = ++(*pos);
5711 
5712 	return trace_find_tgid_ptr(pid);
5713 }
5714 
5715 static void *saved_tgids_start(struct seq_file *m, loff_t *pos)
5716 {
5717 	int pid = *pos;
5718 
5719 	return trace_find_tgid_ptr(pid);
5720 }
5721 
5722 static void saved_tgids_stop(struct seq_file *m, void *v)
5723 {
5724 }
5725 
5726 static int saved_tgids_show(struct seq_file *m, void *v)
5727 {
5728 	int *entry = (int *)v;
5729 	int pid = entry - tgid_map;
5730 	int tgid = *entry;
5731 
5732 	if (tgid == 0)
5733 		return SEQ_SKIP;
5734 
5735 	seq_printf(m, "%d %d\n", pid, tgid);
5736 	return 0;
5737 }
5738 
5739 static const struct seq_operations tracing_saved_tgids_seq_ops = {
5740 	.start		= saved_tgids_start,
5741 	.stop		= saved_tgids_stop,
5742 	.next		= saved_tgids_next,
5743 	.show		= saved_tgids_show,
5744 };
5745 
5746 static int tracing_saved_tgids_open(struct inode *inode, struct file *filp)
5747 {
5748 	int ret;
5749 
5750 	ret = tracing_check_open_get_tr(NULL);
5751 	if (ret)
5752 		return ret;
5753 
5754 	return seq_open(filp, &tracing_saved_tgids_seq_ops);
5755 }
5756 
5757 
5758 static const struct file_operations tracing_saved_tgids_fops = {
5759 	.open		= tracing_saved_tgids_open,
5760 	.read		= seq_read,
5761 	.llseek		= seq_lseek,
5762 	.release	= seq_release,
5763 };
5764 
5765 static void *saved_cmdlines_next(struct seq_file *m, void *v, loff_t *pos)
5766 {
5767 	unsigned int *ptr = v;
5768 
5769 	if (*pos || m->count)
5770 		ptr++;
5771 
5772 	(*pos)++;
5773 
5774 	for (; ptr < &savedcmd->map_cmdline_to_pid[savedcmd->cmdline_num];
5775 	     ptr++) {
5776 		if (*ptr == -1 || *ptr == NO_CMDLINE_MAP)
5777 			continue;
5778 
5779 		return ptr;
5780 	}
5781 
5782 	return NULL;
5783 }
5784 
5785 static void *saved_cmdlines_start(struct seq_file *m, loff_t *pos)
5786 {
5787 	void *v;
5788 	loff_t l = 0;
5789 
5790 	preempt_disable();
5791 	arch_spin_lock(&trace_cmdline_lock);
5792 
5793 	v = &savedcmd->map_cmdline_to_pid[0];
5794 	while (l <= *pos) {
5795 		v = saved_cmdlines_next(m, v, &l);
5796 		if (!v)
5797 			return NULL;
5798 	}
5799 
5800 	return v;
5801 }
5802 
5803 static void saved_cmdlines_stop(struct seq_file *m, void *v)
5804 {
5805 	arch_spin_unlock(&trace_cmdline_lock);
5806 	preempt_enable();
5807 }
5808 
5809 static int saved_cmdlines_show(struct seq_file *m, void *v)
5810 {
5811 	char buf[TASK_COMM_LEN];
5812 	unsigned int *pid = v;
5813 
5814 	__trace_find_cmdline(*pid, buf);
5815 	seq_printf(m, "%d %s\n", *pid, buf);
5816 	return 0;
5817 }
5818 
5819 static const struct seq_operations tracing_saved_cmdlines_seq_ops = {
5820 	.start		= saved_cmdlines_start,
5821 	.next		= saved_cmdlines_next,
5822 	.stop		= saved_cmdlines_stop,
5823 	.show		= saved_cmdlines_show,
5824 };
5825 
5826 static int tracing_saved_cmdlines_open(struct inode *inode, struct file *filp)
5827 {
5828 	int ret;
5829 
5830 	ret = tracing_check_open_get_tr(NULL);
5831 	if (ret)
5832 		return ret;
5833 
5834 	return seq_open(filp, &tracing_saved_cmdlines_seq_ops);
5835 }
5836 
5837 static const struct file_operations tracing_saved_cmdlines_fops = {
5838 	.open		= tracing_saved_cmdlines_open,
5839 	.read		= seq_read,
5840 	.llseek		= seq_lseek,
5841 	.release	= seq_release,
5842 };
5843 
5844 static ssize_t
5845 tracing_saved_cmdlines_size_read(struct file *filp, char __user *ubuf,
5846 				 size_t cnt, loff_t *ppos)
5847 {
5848 	char buf[64];
5849 	int r;
5850 
5851 	arch_spin_lock(&trace_cmdline_lock);
5852 	r = scnprintf(buf, sizeof(buf), "%u\n", savedcmd->cmdline_num);
5853 	arch_spin_unlock(&trace_cmdline_lock);
5854 
5855 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
5856 }
5857 
5858 static void free_saved_cmdlines_buffer(struct saved_cmdlines_buffer *s)
5859 {
5860 	kfree(s->saved_cmdlines);
5861 	kfree(s->map_cmdline_to_pid);
5862 	kfree(s);
5863 }
5864 
5865 static int tracing_resize_saved_cmdlines(unsigned int val)
5866 {
5867 	struct saved_cmdlines_buffer *s, *savedcmd_temp;
5868 
5869 	s = kmalloc(sizeof(*s), GFP_KERNEL);
5870 	if (!s)
5871 		return -ENOMEM;
5872 
5873 	if (allocate_cmdlines_buffer(val, s) < 0) {
5874 		kfree(s);
5875 		return -ENOMEM;
5876 	}
5877 
5878 	arch_spin_lock(&trace_cmdline_lock);
5879 	savedcmd_temp = savedcmd;
5880 	savedcmd = s;
5881 	arch_spin_unlock(&trace_cmdline_lock);
5882 	free_saved_cmdlines_buffer(savedcmd_temp);
5883 
5884 	return 0;
5885 }
5886 
5887 static ssize_t
5888 tracing_saved_cmdlines_size_write(struct file *filp, const char __user *ubuf,
5889 				  size_t cnt, loff_t *ppos)
5890 {
5891 	unsigned long val;
5892 	int ret;
5893 
5894 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
5895 	if (ret)
5896 		return ret;
5897 
5898 	/* must have at least 1 entry or less than PID_MAX_DEFAULT */
5899 	if (!val || val > PID_MAX_DEFAULT)
5900 		return -EINVAL;
5901 
5902 	ret = tracing_resize_saved_cmdlines((unsigned int)val);
5903 	if (ret < 0)
5904 		return ret;
5905 
5906 	*ppos += cnt;
5907 
5908 	return cnt;
5909 }
5910 
5911 static const struct file_operations tracing_saved_cmdlines_size_fops = {
5912 	.open		= tracing_open_generic,
5913 	.read		= tracing_saved_cmdlines_size_read,
5914 	.write		= tracing_saved_cmdlines_size_write,
5915 };
5916 
5917 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
5918 static union trace_eval_map_item *
5919 update_eval_map(union trace_eval_map_item *ptr)
5920 {
5921 	if (!ptr->map.eval_string) {
5922 		if (ptr->tail.next) {
5923 			ptr = ptr->tail.next;
5924 			/* Set ptr to the next real item (skip head) */
5925 			ptr++;
5926 		} else
5927 			return NULL;
5928 	}
5929 	return ptr;
5930 }
5931 
5932 static void *eval_map_next(struct seq_file *m, void *v, loff_t *pos)
5933 {
5934 	union trace_eval_map_item *ptr = v;
5935 
5936 	/*
5937 	 * Paranoid! If ptr points to end, we don't want to increment past it.
5938 	 * This really should never happen.
5939 	 */
5940 	(*pos)++;
5941 	ptr = update_eval_map(ptr);
5942 	if (WARN_ON_ONCE(!ptr))
5943 		return NULL;
5944 
5945 	ptr++;
5946 	ptr = update_eval_map(ptr);
5947 
5948 	return ptr;
5949 }
5950 
5951 static void *eval_map_start(struct seq_file *m, loff_t *pos)
5952 {
5953 	union trace_eval_map_item *v;
5954 	loff_t l = 0;
5955 
5956 	mutex_lock(&trace_eval_mutex);
5957 
5958 	v = trace_eval_maps;
5959 	if (v)
5960 		v++;
5961 
5962 	while (v && l < *pos) {
5963 		v = eval_map_next(m, v, &l);
5964 	}
5965 
5966 	return v;
5967 }
5968 
5969 static void eval_map_stop(struct seq_file *m, void *v)
5970 {
5971 	mutex_unlock(&trace_eval_mutex);
5972 }
5973 
5974 static int eval_map_show(struct seq_file *m, void *v)
5975 {
5976 	union trace_eval_map_item *ptr = v;
5977 
5978 	seq_printf(m, "%s %ld (%s)\n",
5979 		   ptr->map.eval_string, ptr->map.eval_value,
5980 		   ptr->map.system);
5981 
5982 	return 0;
5983 }
5984 
5985 static const struct seq_operations tracing_eval_map_seq_ops = {
5986 	.start		= eval_map_start,
5987 	.next		= eval_map_next,
5988 	.stop		= eval_map_stop,
5989 	.show		= eval_map_show,
5990 };
5991 
5992 static int tracing_eval_map_open(struct inode *inode, struct file *filp)
5993 {
5994 	int ret;
5995 
5996 	ret = tracing_check_open_get_tr(NULL);
5997 	if (ret)
5998 		return ret;
5999 
6000 	return seq_open(filp, &tracing_eval_map_seq_ops);
6001 }
6002 
6003 static const struct file_operations tracing_eval_map_fops = {
6004 	.open		= tracing_eval_map_open,
6005 	.read		= seq_read,
6006 	.llseek		= seq_lseek,
6007 	.release	= seq_release,
6008 };
6009 
6010 static inline union trace_eval_map_item *
6011 trace_eval_jmp_to_tail(union trace_eval_map_item *ptr)
6012 {
6013 	/* Return tail of array given the head */
6014 	return ptr + ptr->head.length + 1;
6015 }
6016 
6017 static void
6018 trace_insert_eval_map_file(struct module *mod, struct trace_eval_map **start,
6019 			   int len)
6020 {
6021 	struct trace_eval_map **stop;
6022 	struct trace_eval_map **map;
6023 	union trace_eval_map_item *map_array;
6024 	union trace_eval_map_item *ptr;
6025 
6026 	stop = start + len;
6027 
6028 	/*
6029 	 * The trace_eval_maps contains the map plus a head and tail item,
6030 	 * where the head holds the module and length of array, and the
6031 	 * tail holds a pointer to the next list.
6032 	 */
6033 	map_array = kmalloc_array(len + 2, sizeof(*map_array), GFP_KERNEL);
6034 	if (!map_array) {
6035 		pr_warn("Unable to allocate trace eval mapping\n");
6036 		return;
6037 	}
6038 
6039 	mutex_lock(&trace_eval_mutex);
6040 
6041 	if (!trace_eval_maps)
6042 		trace_eval_maps = map_array;
6043 	else {
6044 		ptr = trace_eval_maps;
6045 		for (;;) {
6046 			ptr = trace_eval_jmp_to_tail(ptr);
6047 			if (!ptr->tail.next)
6048 				break;
6049 			ptr = ptr->tail.next;
6050 
6051 		}
6052 		ptr->tail.next = map_array;
6053 	}
6054 	map_array->head.mod = mod;
6055 	map_array->head.length = len;
6056 	map_array++;
6057 
6058 	for (map = start; (unsigned long)map < (unsigned long)stop; map++) {
6059 		map_array->map = **map;
6060 		map_array++;
6061 	}
6062 	memset(map_array, 0, sizeof(*map_array));
6063 
6064 	mutex_unlock(&trace_eval_mutex);
6065 }
6066 
6067 static void trace_create_eval_file(struct dentry *d_tracer)
6068 {
6069 	trace_create_file("eval_map", TRACE_MODE_READ, d_tracer,
6070 			  NULL, &tracing_eval_map_fops);
6071 }
6072 
6073 #else /* CONFIG_TRACE_EVAL_MAP_FILE */
6074 static inline void trace_create_eval_file(struct dentry *d_tracer) { }
6075 static inline void trace_insert_eval_map_file(struct module *mod,
6076 			      struct trace_eval_map **start, int len) { }
6077 #endif /* !CONFIG_TRACE_EVAL_MAP_FILE */
6078 
6079 static void trace_insert_eval_map(struct module *mod,
6080 				  struct trace_eval_map **start, int len)
6081 {
6082 	struct trace_eval_map **map;
6083 
6084 	if (len <= 0)
6085 		return;
6086 
6087 	map = start;
6088 
6089 	trace_event_eval_update(map, len);
6090 
6091 	trace_insert_eval_map_file(mod, start, len);
6092 }
6093 
6094 static ssize_t
6095 tracing_set_trace_read(struct file *filp, char __user *ubuf,
6096 		       size_t cnt, loff_t *ppos)
6097 {
6098 	struct trace_array *tr = filp->private_data;
6099 	char buf[MAX_TRACER_SIZE+2];
6100 	int r;
6101 
6102 	mutex_lock(&trace_types_lock);
6103 	r = sprintf(buf, "%s\n", tr->current_trace->name);
6104 	mutex_unlock(&trace_types_lock);
6105 
6106 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6107 }
6108 
6109 int tracer_init(struct tracer *t, struct trace_array *tr)
6110 {
6111 	tracing_reset_online_cpus(&tr->array_buffer);
6112 	return t->init(tr);
6113 }
6114 
6115 static void set_buffer_entries(struct array_buffer *buf, unsigned long val)
6116 {
6117 	int cpu;
6118 
6119 	for_each_tracing_cpu(cpu)
6120 		per_cpu_ptr(buf->data, cpu)->entries = val;
6121 }
6122 
6123 #ifdef CONFIG_TRACER_MAX_TRACE
6124 /* resize @tr's buffer to the size of @size_tr's entries */
6125 static int resize_buffer_duplicate_size(struct array_buffer *trace_buf,
6126 					struct array_buffer *size_buf, int cpu_id)
6127 {
6128 	int cpu, ret = 0;
6129 
6130 	if (cpu_id == RING_BUFFER_ALL_CPUS) {
6131 		for_each_tracing_cpu(cpu) {
6132 			ret = ring_buffer_resize(trace_buf->buffer,
6133 				 per_cpu_ptr(size_buf->data, cpu)->entries, cpu);
6134 			if (ret < 0)
6135 				break;
6136 			per_cpu_ptr(trace_buf->data, cpu)->entries =
6137 				per_cpu_ptr(size_buf->data, cpu)->entries;
6138 		}
6139 	} else {
6140 		ret = ring_buffer_resize(trace_buf->buffer,
6141 				 per_cpu_ptr(size_buf->data, cpu_id)->entries, cpu_id);
6142 		if (ret == 0)
6143 			per_cpu_ptr(trace_buf->data, cpu_id)->entries =
6144 				per_cpu_ptr(size_buf->data, cpu_id)->entries;
6145 	}
6146 
6147 	return ret;
6148 }
6149 #endif /* CONFIG_TRACER_MAX_TRACE */
6150 
6151 static int __tracing_resize_ring_buffer(struct trace_array *tr,
6152 					unsigned long size, int cpu)
6153 {
6154 	int ret;
6155 
6156 	/*
6157 	 * If kernel or user changes the size of the ring buffer
6158 	 * we use the size that was given, and we can forget about
6159 	 * expanding it later.
6160 	 */
6161 	ring_buffer_expanded = true;
6162 
6163 	/* May be called before buffers are initialized */
6164 	if (!tr->array_buffer.buffer)
6165 		return 0;
6166 
6167 	ret = ring_buffer_resize(tr->array_buffer.buffer, size, cpu);
6168 	if (ret < 0)
6169 		return ret;
6170 
6171 #ifdef CONFIG_TRACER_MAX_TRACE
6172 	if (!(tr->flags & TRACE_ARRAY_FL_GLOBAL) ||
6173 	    !tr->current_trace->use_max_tr)
6174 		goto out;
6175 
6176 	ret = ring_buffer_resize(tr->max_buffer.buffer, size, cpu);
6177 	if (ret < 0) {
6178 		int r = resize_buffer_duplicate_size(&tr->array_buffer,
6179 						     &tr->array_buffer, cpu);
6180 		if (r < 0) {
6181 			/*
6182 			 * AARGH! We are left with different
6183 			 * size max buffer!!!!
6184 			 * The max buffer is our "snapshot" buffer.
6185 			 * When a tracer needs a snapshot (one of the
6186 			 * latency tracers), it swaps the max buffer
6187 			 * with the saved snap shot. We succeeded to
6188 			 * update the size of the main buffer, but failed to
6189 			 * update the size of the max buffer. But when we tried
6190 			 * to reset the main buffer to the original size, we
6191 			 * failed there too. This is very unlikely to
6192 			 * happen, but if it does, warn and kill all
6193 			 * tracing.
6194 			 */
6195 			WARN_ON(1);
6196 			tracing_disabled = 1;
6197 		}
6198 		return ret;
6199 	}
6200 
6201 	if (cpu == RING_BUFFER_ALL_CPUS)
6202 		set_buffer_entries(&tr->max_buffer, size);
6203 	else
6204 		per_cpu_ptr(tr->max_buffer.data, cpu)->entries = size;
6205 
6206  out:
6207 #endif /* CONFIG_TRACER_MAX_TRACE */
6208 
6209 	if (cpu == RING_BUFFER_ALL_CPUS)
6210 		set_buffer_entries(&tr->array_buffer, size);
6211 	else
6212 		per_cpu_ptr(tr->array_buffer.data, cpu)->entries = size;
6213 
6214 	return ret;
6215 }
6216 
6217 ssize_t tracing_resize_ring_buffer(struct trace_array *tr,
6218 				  unsigned long size, int cpu_id)
6219 {
6220 	int ret;
6221 
6222 	mutex_lock(&trace_types_lock);
6223 
6224 	if (cpu_id != RING_BUFFER_ALL_CPUS) {
6225 		/* make sure, this cpu is enabled in the mask */
6226 		if (!cpumask_test_cpu(cpu_id, tracing_buffer_mask)) {
6227 			ret = -EINVAL;
6228 			goto out;
6229 		}
6230 	}
6231 
6232 	ret = __tracing_resize_ring_buffer(tr, size, cpu_id);
6233 	if (ret < 0)
6234 		ret = -ENOMEM;
6235 
6236 out:
6237 	mutex_unlock(&trace_types_lock);
6238 
6239 	return ret;
6240 }
6241 
6242 
6243 /**
6244  * tracing_update_buffers - used by tracing facility to expand ring buffers
6245  *
6246  * To save on memory when the tracing is never used on a system with it
6247  * configured in. The ring buffers are set to a minimum size. But once
6248  * a user starts to use the tracing facility, then they need to grow
6249  * to their default size.
6250  *
6251  * This function is to be called when a tracer is about to be used.
6252  */
6253 int tracing_update_buffers(void)
6254 {
6255 	int ret = 0;
6256 
6257 	mutex_lock(&trace_types_lock);
6258 	if (!ring_buffer_expanded)
6259 		ret = __tracing_resize_ring_buffer(&global_trace, trace_buf_size,
6260 						RING_BUFFER_ALL_CPUS);
6261 	mutex_unlock(&trace_types_lock);
6262 
6263 	return ret;
6264 }
6265 
6266 struct trace_option_dentry;
6267 
6268 static void
6269 create_trace_option_files(struct trace_array *tr, struct tracer *tracer);
6270 
6271 /*
6272  * Used to clear out the tracer before deletion of an instance.
6273  * Must have trace_types_lock held.
6274  */
6275 static void tracing_set_nop(struct trace_array *tr)
6276 {
6277 	if (tr->current_trace == &nop_trace)
6278 		return;
6279 
6280 	tr->current_trace->enabled--;
6281 
6282 	if (tr->current_trace->reset)
6283 		tr->current_trace->reset(tr);
6284 
6285 	tr->current_trace = &nop_trace;
6286 }
6287 
6288 static void add_tracer_options(struct trace_array *tr, struct tracer *t)
6289 {
6290 	/* Only enable if the directory has been created already. */
6291 	if (!tr->dir)
6292 		return;
6293 
6294 	create_trace_option_files(tr, t);
6295 }
6296 
6297 int tracing_set_tracer(struct trace_array *tr, const char *buf)
6298 {
6299 	struct tracer *t;
6300 #ifdef CONFIG_TRACER_MAX_TRACE
6301 	bool had_max_tr;
6302 #endif
6303 	int ret = 0;
6304 
6305 	mutex_lock(&trace_types_lock);
6306 
6307 	if (!ring_buffer_expanded) {
6308 		ret = __tracing_resize_ring_buffer(tr, trace_buf_size,
6309 						RING_BUFFER_ALL_CPUS);
6310 		if (ret < 0)
6311 			goto out;
6312 		ret = 0;
6313 	}
6314 
6315 	for (t = trace_types; t; t = t->next) {
6316 		if (strcmp(t->name, buf) == 0)
6317 			break;
6318 	}
6319 	if (!t) {
6320 		ret = -EINVAL;
6321 		goto out;
6322 	}
6323 	if (t == tr->current_trace)
6324 		goto out;
6325 
6326 #ifdef CONFIG_TRACER_SNAPSHOT
6327 	if (t->use_max_tr) {
6328 		arch_spin_lock(&tr->max_lock);
6329 		if (tr->cond_snapshot)
6330 			ret = -EBUSY;
6331 		arch_spin_unlock(&tr->max_lock);
6332 		if (ret)
6333 			goto out;
6334 	}
6335 #endif
6336 	/* Some tracers won't work on kernel command line */
6337 	if (system_state < SYSTEM_RUNNING && t->noboot) {
6338 		pr_warn("Tracer '%s' is not allowed on command line, ignored\n",
6339 			t->name);
6340 		goto out;
6341 	}
6342 
6343 	/* Some tracers are only allowed for the top level buffer */
6344 	if (!trace_ok_for_array(t, tr)) {
6345 		ret = -EINVAL;
6346 		goto out;
6347 	}
6348 
6349 	/* If trace pipe files are being read, we can't change the tracer */
6350 	if (tr->trace_ref) {
6351 		ret = -EBUSY;
6352 		goto out;
6353 	}
6354 
6355 	trace_branch_disable();
6356 
6357 	tr->current_trace->enabled--;
6358 
6359 	if (tr->current_trace->reset)
6360 		tr->current_trace->reset(tr);
6361 
6362 	/* Current trace needs to be nop_trace before synchronize_rcu */
6363 	tr->current_trace = &nop_trace;
6364 
6365 #ifdef CONFIG_TRACER_MAX_TRACE
6366 	had_max_tr = tr->allocated_snapshot;
6367 
6368 	if (had_max_tr && !t->use_max_tr) {
6369 		/*
6370 		 * We need to make sure that the update_max_tr sees that
6371 		 * current_trace changed to nop_trace to keep it from
6372 		 * swapping the buffers after we resize it.
6373 		 * The update_max_tr is called from interrupts disabled
6374 		 * so a synchronized_sched() is sufficient.
6375 		 */
6376 		synchronize_rcu();
6377 		free_snapshot(tr);
6378 	}
6379 #endif
6380 
6381 #ifdef CONFIG_TRACER_MAX_TRACE
6382 	if (t->use_max_tr && !had_max_tr) {
6383 		ret = tracing_alloc_snapshot_instance(tr);
6384 		if (ret < 0)
6385 			goto out;
6386 	}
6387 #endif
6388 
6389 	if (t->init) {
6390 		ret = tracer_init(t, tr);
6391 		if (ret)
6392 			goto out;
6393 	}
6394 
6395 	tr->current_trace = t;
6396 	tr->current_trace->enabled++;
6397 	trace_branch_enable(tr);
6398  out:
6399 	mutex_unlock(&trace_types_lock);
6400 
6401 	return ret;
6402 }
6403 
6404 static ssize_t
6405 tracing_set_trace_write(struct file *filp, const char __user *ubuf,
6406 			size_t cnt, loff_t *ppos)
6407 {
6408 	struct trace_array *tr = filp->private_data;
6409 	char buf[MAX_TRACER_SIZE+1];
6410 	int i;
6411 	size_t ret;
6412 	int err;
6413 
6414 	ret = cnt;
6415 
6416 	if (cnt > MAX_TRACER_SIZE)
6417 		cnt = MAX_TRACER_SIZE;
6418 
6419 	if (copy_from_user(buf, ubuf, cnt))
6420 		return -EFAULT;
6421 
6422 	buf[cnt] = 0;
6423 
6424 	/* strip ending whitespace. */
6425 	for (i = cnt - 1; i > 0 && isspace(buf[i]); i--)
6426 		buf[i] = 0;
6427 
6428 	err = tracing_set_tracer(tr, buf);
6429 	if (err)
6430 		return err;
6431 
6432 	*ppos += ret;
6433 
6434 	return ret;
6435 }
6436 
6437 static ssize_t
6438 tracing_nsecs_read(unsigned long *ptr, char __user *ubuf,
6439 		   size_t cnt, loff_t *ppos)
6440 {
6441 	char buf[64];
6442 	int r;
6443 
6444 	r = snprintf(buf, sizeof(buf), "%ld\n",
6445 		     *ptr == (unsigned long)-1 ? -1 : nsecs_to_usecs(*ptr));
6446 	if (r > sizeof(buf))
6447 		r = sizeof(buf);
6448 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6449 }
6450 
6451 static ssize_t
6452 tracing_nsecs_write(unsigned long *ptr, const char __user *ubuf,
6453 		    size_t cnt, loff_t *ppos)
6454 {
6455 	unsigned long val;
6456 	int ret;
6457 
6458 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
6459 	if (ret)
6460 		return ret;
6461 
6462 	*ptr = val * 1000;
6463 
6464 	return cnt;
6465 }
6466 
6467 static ssize_t
6468 tracing_thresh_read(struct file *filp, char __user *ubuf,
6469 		    size_t cnt, loff_t *ppos)
6470 {
6471 	return tracing_nsecs_read(&tracing_thresh, ubuf, cnt, ppos);
6472 }
6473 
6474 static ssize_t
6475 tracing_thresh_write(struct file *filp, const char __user *ubuf,
6476 		     size_t cnt, loff_t *ppos)
6477 {
6478 	struct trace_array *tr = filp->private_data;
6479 	int ret;
6480 
6481 	mutex_lock(&trace_types_lock);
6482 	ret = tracing_nsecs_write(&tracing_thresh, ubuf, cnt, ppos);
6483 	if (ret < 0)
6484 		goto out;
6485 
6486 	if (tr->current_trace->update_thresh) {
6487 		ret = tr->current_trace->update_thresh(tr);
6488 		if (ret < 0)
6489 			goto out;
6490 	}
6491 
6492 	ret = cnt;
6493 out:
6494 	mutex_unlock(&trace_types_lock);
6495 
6496 	return ret;
6497 }
6498 
6499 #if defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)
6500 
6501 static ssize_t
6502 tracing_max_lat_read(struct file *filp, char __user *ubuf,
6503 		     size_t cnt, loff_t *ppos)
6504 {
6505 	return tracing_nsecs_read(filp->private_data, ubuf, cnt, ppos);
6506 }
6507 
6508 static ssize_t
6509 tracing_max_lat_write(struct file *filp, const char __user *ubuf,
6510 		      size_t cnt, loff_t *ppos)
6511 {
6512 	return tracing_nsecs_write(filp->private_data, ubuf, cnt, ppos);
6513 }
6514 
6515 #endif
6516 
6517 static int tracing_open_pipe(struct inode *inode, struct file *filp)
6518 {
6519 	struct trace_array *tr = inode->i_private;
6520 	struct trace_iterator *iter;
6521 	int ret;
6522 
6523 	ret = tracing_check_open_get_tr(tr);
6524 	if (ret)
6525 		return ret;
6526 
6527 	mutex_lock(&trace_types_lock);
6528 
6529 	/* create a buffer to store the information to pass to userspace */
6530 	iter = kzalloc(sizeof(*iter), GFP_KERNEL);
6531 	if (!iter) {
6532 		ret = -ENOMEM;
6533 		__trace_array_put(tr);
6534 		goto out;
6535 	}
6536 
6537 	trace_seq_init(&iter->seq);
6538 	iter->trace = tr->current_trace;
6539 
6540 	if (!alloc_cpumask_var(&iter->started, GFP_KERNEL)) {
6541 		ret = -ENOMEM;
6542 		goto fail;
6543 	}
6544 
6545 	/* trace pipe does not show start of buffer */
6546 	cpumask_setall(iter->started);
6547 
6548 	if (tr->trace_flags & TRACE_ITER_LATENCY_FMT)
6549 		iter->iter_flags |= TRACE_FILE_LAT_FMT;
6550 
6551 	/* Output in nanoseconds only if we are using a clock in nanoseconds. */
6552 	if (trace_clocks[tr->clock_id].in_ns)
6553 		iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
6554 
6555 	iter->tr = tr;
6556 	iter->array_buffer = &tr->array_buffer;
6557 	iter->cpu_file = tracing_get_cpu(inode);
6558 	mutex_init(&iter->mutex);
6559 	filp->private_data = iter;
6560 
6561 	if (iter->trace->pipe_open)
6562 		iter->trace->pipe_open(iter);
6563 
6564 	nonseekable_open(inode, filp);
6565 
6566 	tr->trace_ref++;
6567 out:
6568 	mutex_unlock(&trace_types_lock);
6569 	return ret;
6570 
6571 fail:
6572 	kfree(iter);
6573 	__trace_array_put(tr);
6574 	mutex_unlock(&trace_types_lock);
6575 	return ret;
6576 }
6577 
6578 static int tracing_release_pipe(struct inode *inode, struct file *file)
6579 {
6580 	struct trace_iterator *iter = file->private_data;
6581 	struct trace_array *tr = inode->i_private;
6582 
6583 	mutex_lock(&trace_types_lock);
6584 
6585 	tr->trace_ref--;
6586 
6587 	if (iter->trace->pipe_close)
6588 		iter->trace->pipe_close(iter);
6589 
6590 	mutex_unlock(&trace_types_lock);
6591 
6592 	free_cpumask_var(iter->started);
6593 	mutex_destroy(&iter->mutex);
6594 	kfree(iter);
6595 
6596 	trace_array_put(tr);
6597 
6598 	return 0;
6599 }
6600 
6601 static __poll_t
6602 trace_poll(struct trace_iterator *iter, struct file *filp, poll_table *poll_table)
6603 {
6604 	struct trace_array *tr = iter->tr;
6605 
6606 	/* Iterators are static, they should be filled or empty */
6607 	if (trace_buffer_iter(iter, iter->cpu_file))
6608 		return EPOLLIN | EPOLLRDNORM;
6609 
6610 	if (tr->trace_flags & TRACE_ITER_BLOCK)
6611 		/*
6612 		 * Always select as readable when in blocking mode
6613 		 */
6614 		return EPOLLIN | EPOLLRDNORM;
6615 	else
6616 		return ring_buffer_poll_wait(iter->array_buffer->buffer, iter->cpu_file,
6617 					     filp, poll_table);
6618 }
6619 
6620 static __poll_t
6621 tracing_poll_pipe(struct file *filp, poll_table *poll_table)
6622 {
6623 	struct trace_iterator *iter = filp->private_data;
6624 
6625 	return trace_poll(iter, filp, poll_table);
6626 }
6627 
6628 /* Must be called with iter->mutex held. */
6629 static int tracing_wait_pipe(struct file *filp)
6630 {
6631 	struct trace_iterator *iter = filp->private_data;
6632 	int ret;
6633 
6634 	while (trace_empty(iter)) {
6635 
6636 		if ((filp->f_flags & O_NONBLOCK)) {
6637 			return -EAGAIN;
6638 		}
6639 
6640 		/*
6641 		 * We block until we read something and tracing is disabled.
6642 		 * We still block if tracing is disabled, but we have never
6643 		 * read anything. This allows a user to cat this file, and
6644 		 * then enable tracing. But after we have read something,
6645 		 * we give an EOF when tracing is again disabled.
6646 		 *
6647 		 * iter->pos will be 0 if we haven't read anything.
6648 		 */
6649 		if (!tracer_tracing_is_on(iter->tr) && iter->pos)
6650 			break;
6651 
6652 		mutex_unlock(&iter->mutex);
6653 
6654 		ret = wait_on_pipe(iter, 0);
6655 
6656 		mutex_lock(&iter->mutex);
6657 
6658 		if (ret)
6659 			return ret;
6660 	}
6661 
6662 	return 1;
6663 }
6664 
6665 /*
6666  * Consumer reader.
6667  */
6668 static ssize_t
6669 tracing_read_pipe(struct file *filp, char __user *ubuf,
6670 		  size_t cnt, loff_t *ppos)
6671 {
6672 	struct trace_iterator *iter = filp->private_data;
6673 	ssize_t sret;
6674 
6675 	/*
6676 	 * Avoid more than one consumer on a single file descriptor
6677 	 * This is just a matter of traces coherency, the ring buffer itself
6678 	 * is protected.
6679 	 */
6680 	mutex_lock(&iter->mutex);
6681 
6682 	/* return any leftover data */
6683 	sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
6684 	if (sret != -EBUSY)
6685 		goto out;
6686 
6687 	trace_seq_init(&iter->seq);
6688 
6689 	if (iter->trace->read) {
6690 		sret = iter->trace->read(iter, filp, ubuf, cnt, ppos);
6691 		if (sret)
6692 			goto out;
6693 	}
6694 
6695 waitagain:
6696 	sret = tracing_wait_pipe(filp);
6697 	if (sret <= 0)
6698 		goto out;
6699 
6700 	/* stop when tracing is finished */
6701 	if (trace_empty(iter)) {
6702 		sret = 0;
6703 		goto out;
6704 	}
6705 
6706 	if (cnt >= PAGE_SIZE)
6707 		cnt = PAGE_SIZE - 1;
6708 
6709 	/* reset all but tr, trace, and overruns */
6710 	memset(&iter->seq, 0,
6711 	       sizeof(struct trace_iterator) -
6712 	       offsetof(struct trace_iterator, seq));
6713 	cpumask_clear(iter->started);
6714 	trace_seq_init(&iter->seq);
6715 	iter->pos = -1;
6716 
6717 	trace_event_read_lock();
6718 	trace_access_lock(iter->cpu_file);
6719 	while (trace_find_next_entry_inc(iter) != NULL) {
6720 		enum print_line_t ret;
6721 		int save_len = iter->seq.seq.len;
6722 
6723 		ret = print_trace_line(iter);
6724 		if (ret == TRACE_TYPE_PARTIAL_LINE) {
6725 			/* don't print partial lines */
6726 			iter->seq.seq.len = save_len;
6727 			break;
6728 		}
6729 		if (ret != TRACE_TYPE_NO_CONSUME)
6730 			trace_consume(iter);
6731 
6732 		if (trace_seq_used(&iter->seq) >= cnt)
6733 			break;
6734 
6735 		/*
6736 		 * Setting the full flag means we reached the trace_seq buffer
6737 		 * size and we should leave by partial output condition above.
6738 		 * One of the trace_seq_* functions is not used properly.
6739 		 */
6740 		WARN_ONCE(iter->seq.full, "full flag set for trace type %d",
6741 			  iter->ent->type);
6742 	}
6743 	trace_access_unlock(iter->cpu_file);
6744 	trace_event_read_unlock();
6745 
6746 	/* Now copy what we have to the user */
6747 	sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
6748 	if (iter->seq.seq.readpos >= trace_seq_used(&iter->seq))
6749 		trace_seq_init(&iter->seq);
6750 
6751 	/*
6752 	 * If there was nothing to send to user, in spite of consuming trace
6753 	 * entries, go back to wait for more entries.
6754 	 */
6755 	if (sret == -EBUSY)
6756 		goto waitagain;
6757 
6758 out:
6759 	mutex_unlock(&iter->mutex);
6760 
6761 	return sret;
6762 }
6763 
6764 static void tracing_spd_release_pipe(struct splice_pipe_desc *spd,
6765 				     unsigned int idx)
6766 {
6767 	__free_page(spd->pages[idx]);
6768 }
6769 
6770 static size_t
6771 tracing_fill_pipe_page(size_t rem, struct trace_iterator *iter)
6772 {
6773 	size_t count;
6774 	int save_len;
6775 	int ret;
6776 
6777 	/* Seq buffer is page-sized, exactly what we need. */
6778 	for (;;) {
6779 		save_len = iter->seq.seq.len;
6780 		ret = print_trace_line(iter);
6781 
6782 		if (trace_seq_has_overflowed(&iter->seq)) {
6783 			iter->seq.seq.len = save_len;
6784 			break;
6785 		}
6786 
6787 		/*
6788 		 * This should not be hit, because it should only
6789 		 * be set if the iter->seq overflowed. But check it
6790 		 * anyway to be safe.
6791 		 */
6792 		if (ret == TRACE_TYPE_PARTIAL_LINE) {
6793 			iter->seq.seq.len = save_len;
6794 			break;
6795 		}
6796 
6797 		count = trace_seq_used(&iter->seq) - save_len;
6798 		if (rem < count) {
6799 			rem = 0;
6800 			iter->seq.seq.len = save_len;
6801 			break;
6802 		}
6803 
6804 		if (ret != TRACE_TYPE_NO_CONSUME)
6805 			trace_consume(iter);
6806 		rem -= count;
6807 		if (!trace_find_next_entry_inc(iter))	{
6808 			rem = 0;
6809 			iter->ent = NULL;
6810 			break;
6811 		}
6812 	}
6813 
6814 	return rem;
6815 }
6816 
6817 static ssize_t tracing_splice_read_pipe(struct file *filp,
6818 					loff_t *ppos,
6819 					struct pipe_inode_info *pipe,
6820 					size_t len,
6821 					unsigned int flags)
6822 {
6823 	struct page *pages_def[PIPE_DEF_BUFFERS];
6824 	struct partial_page partial_def[PIPE_DEF_BUFFERS];
6825 	struct trace_iterator *iter = filp->private_data;
6826 	struct splice_pipe_desc spd = {
6827 		.pages		= pages_def,
6828 		.partial	= partial_def,
6829 		.nr_pages	= 0, /* This gets updated below. */
6830 		.nr_pages_max	= PIPE_DEF_BUFFERS,
6831 		.ops		= &default_pipe_buf_ops,
6832 		.spd_release	= tracing_spd_release_pipe,
6833 	};
6834 	ssize_t ret;
6835 	size_t rem;
6836 	unsigned int i;
6837 
6838 	if (splice_grow_spd(pipe, &spd))
6839 		return -ENOMEM;
6840 
6841 	mutex_lock(&iter->mutex);
6842 
6843 	if (iter->trace->splice_read) {
6844 		ret = iter->trace->splice_read(iter, filp,
6845 					       ppos, pipe, len, flags);
6846 		if (ret)
6847 			goto out_err;
6848 	}
6849 
6850 	ret = tracing_wait_pipe(filp);
6851 	if (ret <= 0)
6852 		goto out_err;
6853 
6854 	if (!iter->ent && !trace_find_next_entry_inc(iter)) {
6855 		ret = -EFAULT;
6856 		goto out_err;
6857 	}
6858 
6859 	trace_event_read_lock();
6860 	trace_access_lock(iter->cpu_file);
6861 
6862 	/* Fill as many pages as possible. */
6863 	for (i = 0, rem = len; i < spd.nr_pages_max && rem; i++) {
6864 		spd.pages[i] = alloc_page(GFP_KERNEL);
6865 		if (!spd.pages[i])
6866 			break;
6867 
6868 		rem = tracing_fill_pipe_page(rem, iter);
6869 
6870 		/* Copy the data into the page, so we can start over. */
6871 		ret = trace_seq_to_buffer(&iter->seq,
6872 					  page_address(spd.pages[i]),
6873 					  trace_seq_used(&iter->seq));
6874 		if (ret < 0) {
6875 			__free_page(spd.pages[i]);
6876 			break;
6877 		}
6878 		spd.partial[i].offset = 0;
6879 		spd.partial[i].len = trace_seq_used(&iter->seq);
6880 
6881 		trace_seq_init(&iter->seq);
6882 	}
6883 
6884 	trace_access_unlock(iter->cpu_file);
6885 	trace_event_read_unlock();
6886 	mutex_unlock(&iter->mutex);
6887 
6888 	spd.nr_pages = i;
6889 
6890 	if (i)
6891 		ret = splice_to_pipe(pipe, &spd);
6892 	else
6893 		ret = 0;
6894 out:
6895 	splice_shrink_spd(&spd);
6896 	return ret;
6897 
6898 out_err:
6899 	mutex_unlock(&iter->mutex);
6900 	goto out;
6901 }
6902 
6903 static ssize_t
6904 tracing_entries_read(struct file *filp, char __user *ubuf,
6905 		     size_t cnt, loff_t *ppos)
6906 {
6907 	struct inode *inode = file_inode(filp);
6908 	struct trace_array *tr = inode->i_private;
6909 	int cpu = tracing_get_cpu(inode);
6910 	char buf[64];
6911 	int r = 0;
6912 	ssize_t ret;
6913 
6914 	mutex_lock(&trace_types_lock);
6915 
6916 	if (cpu == RING_BUFFER_ALL_CPUS) {
6917 		int cpu, buf_size_same;
6918 		unsigned long size;
6919 
6920 		size = 0;
6921 		buf_size_same = 1;
6922 		/* check if all cpu sizes are same */
6923 		for_each_tracing_cpu(cpu) {
6924 			/* fill in the size from first enabled cpu */
6925 			if (size == 0)
6926 				size = per_cpu_ptr(tr->array_buffer.data, cpu)->entries;
6927 			if (size != per_cpu_ptr(tr->array_buffer.data, cpu)->entries) {
6928 				buf_size_same = 0;
6929 				break;
6930 			}
6931 		}
6932 
6933 		if (buf_size_same) {
6934 			if (!ring_buffer_expanded)
6935 				r = sprintf(buf, "%lu (expanded: %lu)\n",
6936 					    size >> 10,
6937 					    trace_buf_size >> 10);
6938 			else
6939 				r = sprintf(buf, "%lu\n", size >> 10);
6940 		} else
6941 			r = sprintf(buf, "X\n");
6942 	} else
6943 		r = sprintf(buf, "%lu\n", per_cpu_ptr(tr->array_buffer.data, cpu)->entries >> 10);
6944 
6945 	mutex_unlock(&trace_types_lock);
6946 
6947 	ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6948 	return ret;
6949 }
6950 
6951 static ssize_t
6952 tracing_entries_write(struct file *filp, const char __user *ubuf,
6953 		      size_t cnt, loff_t *ppos)
6954 {
6955 	struct inode *inode = file_inode(filp);
6956 	struct trace_array *tr = inode->i_private;
6957 	unsigned long val;
6958 	int ret;
6959 
6960 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
6961 	if (ret)
6962 		return ret;
6963 
6964 	/* must have at least 1 entry */
6965 	if (!val)
6966 		return -EINVAL;
6967 
6968 	/* value is in KB */
6969 	val <<= 10;
6970 	ret = tracing_resize_ring_buffer(tr, val, tracing_get_cpu(inode));
6971 	if (ret < 0)
6972 		return ret;
6973 
6974 	*ppos += cnt;
6975 
6976 	return cnt;
6977 }
6978 
6979 static ssize_t
6980 tracing_total_entries_read(struct file *filp, char __user *ubuf,
6981 				size_t cnt, loff_t *ppos)
6982 {
6983 	struct trace_array *tr = filp->private_data;
6984 	char buf[64];
6985 	int r, cpu;
6986 	unsigned long size = 0, expanded_size = 0;
6987 
6988 	mutex_lock(&trace_types_lock);
6989 	for_each_tracing_cpu(cpu) {
6990 		size += per_cpu_ptr(tr->array_buffer.data, cpu)->entries >> 10;
6991 		if (!ring_buffer_expanded)
6992 			expanded_size += trace_buf_size >> 10;
6993 	}
6994 	if (ring_buffer_expanded)
6995 		r = sprintf(buf, "%lu\n", size);
6996 	else
6997 		r = sprintf(buf, "%lu (expanded: %lu)\n", size, expanded_size);
6998 	mutex_unlock(&trace_types_lock);
6999 
7000 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
7001 }
7002 
7003 static ssize_t
7004 tracing_free_buffer_write(struct file *filp, const char __user *ubuf,
7005 			  size_t cnt, loff_t *ppos)
7006 {
7007 	/*
7008 	 * There is no need to read what the user has written, this function
7009 	 * is just to make sure that there is no error when "echo" is used
7010 	 */
7011 
7012 	*ppos += cnt;
7013 
7014 	return cnt;
7015 }
7016 
7017 static int
7018 tracing_free_buffer_release(struct inode *inode, struct file *filp)
7019 {
7020 	struct trace_array *tr = inode->i_private;
7021 
7022 	/* disable tracing ? */
7023 	if (tr->trace_flags & TRACE_ITER_STOP_ON_FREE)
7024 		tracer_tracing_off(tr);
7025 	/* resize the ring buffer to 0 */
7026 	tracing_resize_ring_buffer(tr, 0, RING_BUFFER_ALL_CPUS);
7027 
7028 	trace_array_put(tr);
7029 
7030 	return 0;
7031 }
7032 
7033 static ssize_t
7034 tracing_mark_write(struct file *filp, const char __user *ubuf,
7035 					size_t cnt, loff_t *fpos)
7036 {
7037 	struct trace_array *tr = filp->private_data;
7038 	struct ring_buffer_event *event;
7039 	enum event_trigger_type tt = ETT_NONE;
7040 	struct trace_buffer *buffer;
7041 	struct print_entry *entry;
7042 	ssize_t written;
7043 	int size;
7044 	int len;
7045 
7046 /* Used in tracing_mark_raw_write() as well */
7047 #define FAULTED_STR "<faulted>"
7048 #define FAULTED_SIZE (sizeof(FAULTED_STR) - 1) /* '\0' is already accounted for */
7049 
7050 	if (tracing_disabled)
7051 		return -EINVAL;
7052 
7053 	if (!(tr->trace_flags & TRACE_ITER_MARKERS))
7054 		return -EINVAL;
7055 
7056 	if (cnt > TRACE_BUF_SIZE)
7057 		cnt = TRACE_BUF_SIZE;
7058 
7059 	BUILD_BUG_ON(TRACE_BUF_SIZE >= PAGE_SIZE);
7060 
7061 	size = sizeof(*entry) + cnt + 2; /* add '\0' and possible '\n' */
7062 
7063 	/* If less than "<faulted>", then make sure we can still add that */
7064 	if (cnt < FAULTED_SIZE)
7065 		size += FAULTED_SIZE - cnt;
7066 
7067 	buffer = tr->array_buffer.buffer;
7068 	event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
7069 					    tracing_gen_ctx());
7070 	if (unlikely(!event))
7071 		/* Ring buffer disabled, return as if not open for write */
7072 		return -EBADF;
7073 
7074 	entry = ring_buffer_event_data(event);
7075 	entry->ip = _THIS_IP_;
7076 
7077 	len = __copy_from_user_inatomic(&entry->buf, ubuf, cnt);
7078 	if (len) {
7079 		memcpy(&entry->buf, FAULTED_STR, FAULTED_SIZE);
7080 		cnt = FAULTED_SIZE;
7081 		written = -EFAULT;
7082 	} else
7083 		written = cnt;
7084 
7085 	if (tr->trace_marker_file && !list_empty(&tr->trace_marker_file->triggers)) {
7086 		/* do not add \n before testing triggers, but add \0 */
7087 		entry->buf[cnt] = '\0';
7088 		tt = event_triggers_call(tr->trace_marker_file, buffer, entry, event);
7089 	}
7090 
7091 	if (entry->buf[cnt - 1] != '\n') {
7092 		entry->buf[cnt] = '\n';
7093 		entry->buf[cnt + 1] = '\0';
7094 	} else
7095 		entry->buf[cnt] = '\0';
7096 
7097 	if (static_branch_unlikely(&trace_marker_exports_enabled))
7098 		ftrace_exports(event, TRACE_EXPORT_MARKER);
7099 	__buffer_unlock_commit(buffer, event);
7100 
7101 	if (tt)
7102 		event_triggers_post_call(tr->trace_marker_file, tt);
7103 
7104 	if (written > 0)
7105 		*fpos += written;
7106 
7107 	return written;
7108 }
7109 
7110 /* Limit it for now to 3K (including tag) */
7111 #define RAW_DATA_MAX_SIZE (1024*3)
7112 
7113 static ssize_t
7114 tracing_mark_raw_write(struct file *filp, const char __user *ubuf,
7115 					size_t cnt, loff_t *fpos)
7116 {
7117 	struct trace_array *tr = filp->private_data;
7118 	struct ring_buffer_event *event;
7119 	struct trace_buffer *buffer;
7120 	struct raw_data_entry *entry;
7121 	ssize_t written;
7122 	int size;
7123 	int len;
7124 
7125 #define FAULT_SIZE_ID (FAULTED_SIZE + sizeof(int))
7126 
7127 	if (tracing_disabled)
7128 		return -EINVAL;
7129 
7130 	if (!(tr->trace_flags & TRACE_ITER_MARKERS))
7131 		return -EINVAL;
7132 
7133 	/* The marker must at least have a tag id */
7134 	if (cnt < sizeof(unsigned int) || cnt > RAW_DATA_MAX_SIZE)
7135 		return -EINVAL;
7136 
7137 	if (cnt > TRACE_BUF_SIZE)
7138 		cnt = TRACE_BUF_SIZE;
7139 
7140 	BUILD_BUG_ON(TRACE_BUF_SIZE >= PAGE_SIZE);
7141 
7142 	size = sizeof(*entry) + cnt;
7143 	if (cnt < FAULT_SIZE_ID)
7144 		size += FAULT_SIZE_ID - cnt;
7145 
7146 	buffer = tr->array_buffer.buffer;
7147 	event = __trace_buffer_lock_reserve(buffer, TRACE_RAW_DATA, size,
7148 					    tracing_gen_ctx());
7149 	if (!event)
7150 		/* Ring buffer disabled, return as if not open for write */
7151 		return -EBADF;
7152 
7153 	entry = ring_buffer_event_data(event);
7154 
7155 	len = __copy_from_user_inatomic(&entry->id, ubuf, cnt);
7156 	if (len) {
7157 		entry->id = -1;
7158 		memcpy(&entry->buf, FAULTED_STR, FAULTED_SIZE);
7159 		written = -EFAULT;
7160 	} else
7161 		written = cnt;
7162 
7163 	__buffer_unlock_commit(buffer, event);
7164 
7165 	if (written > 0)
7166 		*fpos += written;
7167 
7168 	return written;
7169 }
7170 
7171 static int tracing_clock_show(struct seq_file *m, void *v)
7172 {
7173 	struct trace_array *tr = m->private;
7174 	int i;
7175 
7176 	for (i = 0; i < ARRAY_SIZE(trace_clocks); i++)
7177 		seq_printf(m,
7178 			"%s%s%s%s", i ? " " : "",
7179 			i == tr->clock_id ? "[" : "", trace_clocks[i].name,
7180 			i == tr->clock_id ? "]" : "");
7181 	seq_putc(m, '\n');
7182 
7183 	return 0;
7184 }
7185 
7186 int tracing_set_clock(struct trace_array *tr, const char *clockstr)
7187 {
7188 	int i;
7189 
7190 	for (i = 0; i < ARRAY_SIZE(trace_clocks); i++) {
7191 		if (strcmp(trace_clocks[i].name, clockstr) == 0)
7192 			break;
7193 	}
7194 	if (i == ARRAY_SIZE(trace_clocks))
7195 		return -EINVAL;
7196 
7197 	mutex_lock(&trace_types_lock);
7198 
7199 	tr->clock_id = i;
7200 
7201 	ring_buffer_set_clock(tr->array_buffer.buffer, trace_clocks[i].func);
7202 
7203 	/*
7204 	 * New clock may not be consistent with the previous clock.
7205 	 * Reset the buffer so that it doesn't have incomparable timestamps.
7206 	 */
7207 	tracing_reset_online_cpus(&tr->array_buffer);
7208 
7209 #ifdef CONFIG_TRACER_MAX_TRACE
7210 	if (tr->max_buffer.buffer)
7211 		ring_buffer_set_clock(tr->max_buffer.buffer, trace_clocks[i].func);
7212 	tracing_reset_online_cpus(&tr->max_buffer);
7213 #endif
7214 
7215 	mutex_unlock(&trace_types_lock);
7216 
7217 	return 0;
7218 }
7219 
7220 static ssize_t tracing_clock_write(struct file *filp, const char __user *ubuf,
7221 				   size_t cnt, loff_t *fpos)
7222 {
7223 	struct seq_file *m = filp->private_data;
7224 	struct trace_array *tr = m->private;
7225 	char buf[64];
7226 	const char *clockstr;
7227 	int ret;
7228 
7229 	if (cnt >= sizeof(buf))
7230 		return -EINVAL;
7231 
7232 	if (copy_from_user(buf, ubuf, cnt))
7233 		return -EFAULT;
7234 
7235 	buf[cnt] = 0;
7236 
7237 	clockstr = strstrip(buf);
7238 
7239 	ret = tracing_set_clock(tr, clockstr);
7240 	if (ret)
7241 		return ret;
7242 
7243 	*fpos += cnt;
7244 
7245 	return cnt;
7246 }
7247 
7248 static int tracing_clock_open(struct inode *inode, struct file *file)
7249 {
7250 	struct trace_array *tr = inode->i_private;
7251 	int ret;
7252 
7253 	ret = tracing_check_open_get_tr(tr);
7254 	if (ret)
7255 		return ret;
7256 
7257 	ret = single_open(file, tracing_clock_show, inode->i_private);
7258 	if (ret < 0)
7259 		trace_array_put(tr);
7260 
7261 	return ret;
7262 }
7263 
7264 static int tracing_time_stamp_mode_show(struct seq_file *m, void *v)
7265 {
7266 	struct trace_array *tr = m->private;
7267 
7268 	mutex_lock(&trace_types_lock);
7269 
7270 	if (ring_buffer_time_stamp_abs(tr->array_buffer.buffer))
7271 		seq_puts(m, "delta [absolute]\n");
7272 	else
7273 		seq_puts(m, "[delta] absolute\n");
7274 
7275 	mutex_unlock(&trace_types_lock);
7276 
7277 	return 0;
7278 }
7279 
7280 static int tracing_time_stamp_mode_open(struct inode *inode, struct file *file)
7281 {
7282 	struct trace_array *tr = inode->i_private;
7283 	int ret;
7284 
7285 	ret = tracing_check_open_get_tr(tr);
7286 	if (ret)
7287 		return ret;
7288 
7289 	ret = single_open(file, tracing_time_stamp_mode_show, inode->i_private);
7290 	if (ret < 0)
7291 		trace_array_put(tr);
7292 
7293 	return ret;
7294 }
7295 
7296 u64 tracing_event_time_stamp(struct trace_buffer *buffer, struct ring_buffer_event *rbe)
7297 {
7298 	if (rbe == this_cpu_read(trace_buffered_event))
7299 		return ring_buffer_time_stamp(buffer);
7300 
7301 	return ring_buffer_event_time_stamp(buffer, rbe);
7302 }
7303 
7304 /*
7305  * Set or disable using the per CPU trace_buffer_event when possible.
7306  */
7307 int tracing_set_filter_buffering(struct trace_array *tr, bool set)
7308 {
7309 	int ret = 0;
7310 
7311 	mutex_lock(&trace_types_lock);
7312 
7313 	if (set && tr->no_filter_buffering_ref++)
7314 		goto out;
7315 
7316 	if (!set) {
7317 		if (WARN_ON_ONCE(!tr->no_filter_buffering_ref)) {
7318 			ret = -EINVAL;
7319 			goto out;
7320 		}
7321 
7322 		--tr->no_filter_buffering_ref;
7323 	}
7324  out:
7325 	mutex_unlock(&trace_types_lock);
7326 
7327 	return ret;
7328 }
7329 
7330 struct ftrace_buffer_info {
7331 	struct trace_iterator	iter;
7332 	void			*spare;
7333 	unsigned int		spare_cpu;
7334 	unsigned int		read;
7335 };
7336 
7337 #ifdef CONFIG_TRACER_SNAPSHOT
7338 static int tracing_snapshot_open(struct inode *inode, struct file *file)
7339 {
7340 	struct trace_array *tr = inode->i_private;
7341 	struct trace_iterator *iter;
7342 	struct seq_file *m;
7343 	int ret;
7344 
7345 	ret = tracing_check_open_get_tr(tr);
7346 	if (ret)
7347 		return ret;
7348 
7349 	if (file->f_mode & FMODE_READ) {
7350 		iter = __tracing_open(inode, file, true);
7351 		if (IS_ERR(iter))
7352 			ret = PTR_ERR(iter);
7353 	} else {
7354 		/* Writes still need the seq_file to hold the private data */
7355 		ret = -ENOMEM;
7356 		m = kzalloc(sizeof(*m), GFP_KERNEL);
7357 		if (!m)
7358 			goto out;
7359 		iter = kzalloc(sizeof(*iter), GFP_KERNEL);
7360 		if (!iter) {
7361 			kfree(m);
7362 			goto out;
7363 		}
7364 		ret = 0;
7365 
7366 		iter->tr = tr;
7367 		iter->array_buffer = &tr->max_buffer;
7368 		iter->cpu_file = tracing_get_cpu(inode);
7369 		m->private = iter;
7370 		file->private_data = m;
7371 	}
7372 out:
7373 	if (ret < 0)
7374 		trace_array_put(tr);
7375 
7376 	return ret;
7377 }
7378 
7379 static ssize_t
7380 tracing_snapshot_write(struct file *filp, const char __user *ubuf, size_t cnt,
7381 		       loff_t *ppos)
7382 {
7383 	struct seq_file *m = filp->private_data;
7384 	struct trace_iterator *iter = m->private;
7385 	struct trace_array *tr = iter->tr;
7386 	unsigned long val;
7387 	int ret;
7388 
7389 	ret = tracing_update_buffers();
7390 	if (ret < 0)
7391 		return ret;
7392 
7393 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
7394 	if (ret)
7395 		return ret;
7396 
7397 	mutex_lock(&trace_types_lock);
7398 
7399 	if (tr->current_trace->use_max_tr) {
7400 		ret = -EBUSY;
7401 		goto out;
7402 	}
7403 
7404 	arch_spin_lock(&tr->max_lock);
7405 	if (tr->cond_snapshot)
7406 		ret = -EBUSY;
7407 	arch_spin_unlock(&tr->max_lock);
7408 	if (ret)
7409 		goto out;
7410 
7411 	switch (val) {
7412 	case 0:
7413 		if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
7414 			ret = -EINVAL;
7415 			break;
7416 		}
7417 		if (tr->allocated_snapshot)
7418 			free_snapshot(tr);
7419 		break;
7420 	case 1:
7421 /* Only allow per-cpu swap if the ring buffer supports it */
7422 #ifndef CONFIG_RING_BUFFER_ALLOW_SWAP
7423 		if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
7424 			ret = -EINVAL;
7425 			break;
7426 		}
7427 #endif
7428 		if (tr->allocated_snapshot)
7429 			ret = resize_buffer_duplicate_size(&tr->max_buffer,
7430 					&tr->array_buffer, iter->cpu_file);
7431 		else
7432 			ret = tracing_alloc_snapshot_instance(tr);
7433 		if (ret < 0)
7434 			break;
7435 		local_irq_disable();
7436 		/* Now, we're going to swap */
7437 		if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
7438 			update_max_tr(tr, current, smp_processor_id(), NULL);
7439 		else
7440 			update_max_tr_single(tr, current, iter->cpu_file);
7441 		local_irq_enable();
7442 		break;
7443 	default:
7444 		if (tr->allocated_snapshot) {
7445 			if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
7446 				tracing_reset_online_cpus(&tr->max_buffer);
7447 			else
7448 				tracing_reset_cpu(&tr->max_buffer, iter->cpu_file);
7449 		}
7450 		break;
7451 	}
7452 
7453 	if (ret >= 0) {
7454 		*ppos += cnt;
7455 		ret = cnt;
7456 	}
7457 out:
7458 	mutex_unlock(&trace_types_lock);
7459 	return ret;
7460 }
7461 
7462 static int tracing_snapshot_release(struct inode *inode, struct file *file)
7463 {
7464 	struct seq_file *m = file->private_data;
7465 	int ret;
7466 
7467 	ret = tracing_release(inode, file);
7468 
7469 	if (file->f_mode & FMODE_READ)
7470 		return ret;
7471 
7472 	/* If write only, the seq_file is just a stub */
7473 	if (m)
7474 		kfree(m->private);
7475 	kfree(m);
7476 
7477 	return 0;
7478 }
7479 
7480 static int tracing_buffers_open(struct inode *inode, struct file *filp);
7481 static ssize_t tracing_buffers_read(struct file *filp, char __user *ubuf,
7482 				    size_t count, loff_t *ppos);
7483 static int tracing_buffers_release(struct inode *inode, struct file *file);
7484 static ssize_t tracing_buffers_splice_read(struct file *file, loff_t *ppos,
7485 		   struct pipe_inode_info *pipe, size_t len, unsigned int flags);
7486 
7487 static int snapshot_raw_open(struct inode *inode, struct file *filp)
7488 {
7489 	struct ftrace_buffer_info *info;
7490 	int ret;
7491 
7492 	/* The following checks for tracefs lockdown */
7493 	ret = tracing_buffers_open(inode, filp);
7494 	if (ret < 0)
7495 		return ret;
7496 
7497 	info = filp->private_data;
7498 
7499 	if (info->iter.trace->use_max_tr) {
7500 		tracing_buffers_release(inode, filp);
7501 		return -EBUSY;
7502 	}
7503 
7504 	info->iter.snapshot = true;
7505 	info->iter.array_buffer = &info->iter.tr->max_buffer;
7506 
7507 	return ret;
7508 }
7509 
7510 #endif /* CONFIG_TRACER_SNAPSHOT */
7511 
7512 
7513 static const struct file_operations tracing_thresh_fops = {
7514 	.open		= tracing_open_generic,
7515 	.read		= tracing_thresh_read,
7516 	.write		= tracing_thresh_write,
7517 	.llseek		= generic_file_llseek,
7518 };
7519 
7520 #if defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)
7521 static const struct file_operations tracing_max_lat_fops = {
7522 	.open		= tracing_open_generic,
7523 	.read		= tracing_max_lat_read,
7524 	.write		= tracing_max_lat_write,
7525 	.llseek		= generic_file_llseek,
7526 };
7527 #endif
7528 
7529 static const struct file_operations set_tracer_fops = {
7530 	.open		= tracing_open_generic,
7531 	.read		= tracing_set_trace_read,
7532 	.write		= tracing_set_trace_write,
7533 	.llseek		= generic_file_llseek,
7534 };
7535 
7536 static const struct file_operations tracing_pipe_fops = {
7537 	.open		= tracing_open_pipe,
7538 	.poll		= tracing_poll_pipe,
7539 	.read		= tracing_read_pipe,
7540 	.splice_read	= tracing_splice_read_pipe,
7541 	.release	= tracing_release_pipe,
7542 	.llseek		= no_llseek,
7543 };
7544 
7545 static const struct file_operations tracing_entries_fops = {
7546 	.open		= tracing_open_generic_tr,
7547 	.read		= tracing_entries_read,
7548 	.write		= tracing_entries_write,
7549 	.llseek		= generic_file_llseek,
7550 	.release	= tracing_release_generic_tr,
7551 };
7552 
7553 static const struct file_operations tracing_total_entries_fops = {
7554 	.open		= tracing_open_generic_tr,
7555 	.read		= tracing_total_entries_read,
7556 	.llseek		= generic_file_llseek,
7557 	.release	= tracing_release_generic_tr,
7558 };
7559 
7560 static const struct file_operations tracing_free_buffer_fops = {
7561 	.open		= tracing_open_generic_tr,
7562 	.write		= tracing_free_buffer_write,
7563 	.release	= tracing_free_buffer_release,
7564 };
7565 
7566 static const struct file_operations tracing_mark_fops = {
7567 	.open		= tracing_open_generic_tr,
7568 	.write		= tracing_mark_write,
7569 	.llseek		= generic_file_llseek,
7570 	.release	= tracing_release_generic_tr,
7571 };
7572 
7573 static const struct file_operations tracing_mark_raw_fops = {
7574 	.open		= tracing_open_generic_tr,
7575 	.write		= tracing_mark_raw_write,
7576 	.llseek		= generic_file_llseek,
7577 	.release	= tracing_release_generic_tr,
7578 };
7579 
7580 static const struct file_operations trace_clock_fops = {
7581 	.open		= tracing_clock_open,
7582 	.read		= seq_read,
7583 	.llseek		= seq_lseek,
7584 	.release	= tracing_single_release_tr,
7585 	.write		= tracing_clock_write,
7586 };
7587 
7588 static const struct file_operations trace_time_stamp_mode_fops = {
7589 	.open		= tracing_time_stamp_mode_open,
7590 	.read		= seq_read,
7591 	.llseek		= seq_lseek,
7592 	.release	= tracing_single_release_tr,
7593 };
7594 
7595 #ifdef CONFIG_TRACER_SNAPSHOT
7596 static const struct file_operations snapshot_fops = {
7597 	.open		= tracing_snapshot_open,
7598 	.read		= seq_read,
7599 	.write		= tracing_snapshot_write,
7600 	.llseek		= tracing_lseek,
7601 	.release	= tracing_snapshot_release,
7602 };
7603 
7604 static const struct file_operations snapshot_raw_fops = {
7605 	.open		= snapshot_raw_open,
7606 	.read		= tracing_buffers_read,
7607 	.release	= tracing_buffers_release,
7608 	.splice_read	= tracing_buffers_splice_read,
7609 	.llseek		= no_llseek,
7610 };
7611 
7612 #endif /* CONFIG_TRACER_SNAPSHOT */
7613 
7614 /*
7615  * trace_min_max_write - Write a u64 value to a trace_min_max_param struct
7616  * @filp: The active open file structure
7617  * @ubuf: The userspace provided buffer to read value into
7618  * @cnt: The maximum number of bytes to read
7619  * @ppos: The current "file" position
7620  *
7621  * This function implements the write interface for a struct trace_min_max_param.
7622  * The filp->private_data must point to a trace_min_max_param structure that
7623  * defines where to write the value, the min and the max acceptable values,
7624  * and a lock to protect the write.
7625  */
7626 static ssize_t
7627 trace_min_max_write(struct file *filp, const char __user *ubuf, size_t cnt, loff_t *ppos)
7628 {
7629 	struct trace_min_max_param *param = filp->private_data;
7630 	u64 val;
7631 	int err;
7632 
7633 	if (!param)
7634 		return -EFAULT;
7635 
7636 	err = kstrtoull_from_user(ubuf, cnt, 10, &val);
7637 	if (err)
7638 		return err;
7639 
7640 	if (param->lock)
7641 		mutex_lock(param->lock);
7642 
7643 	if (param->min && val < *param->min)
7644 		err = -EINVAL;
7645 
7646 	if (param->max && val > *param->max)
7647 		err = -EINVAL;
7648 
7649 	if (!err)
7650 		*param->val = val;
7651 
7652 	if (param->lock)
7653 		mutex_unlock(param->lock);
7654 
7655 	if (err)
7656 		return err;
7657 
7658 	return cnt;
7659 }
7660 
7661 /*
7662  * trace_min_max_read - Read a u64 value from a trace_min_max_param struct
7663  * @filp: The active open file structure
7664  * @ubuf: The userspace provided buffer to read value into
7665  * @cnt: The maximum number of bytes to read
7666  * @ppos: The current "file" position
7667  *
7668  * This function implements the read interface for a struct trace_min_max_param.
7669  * The filp->private_data must point to a trace_min_max_param struct with valid
7670  * data.
7671  */
7672 static ssize_t
7673 trace_min_max_read(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos)
7674 {
7675 	struct trace_min_max_param *param = filp->private_data;
7676 	char buf[U64_STR_SIZE];
7677 	int len;
7678 	u64 val;
7679 
7680 	if (!param)
7681 		return -EFAULT;
7682 
7683 	val = *param->val;
7684 
7685 	if (cnt > sizeof(buf))
7686 		cnt = sizeof(buf);
7687 
7688 	len = snprintf(buf, sizeof(buf), "%llu\n", val);
7689 
7690 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, len);
7691 }
7692 
7693 const struct file_operations trace_min_max_fops = {
7694 	.open		= tracing_open_generic,
7695 	.read		= trace_min_max_read,
7696 	.write		= trace_min_max_write,
7697 };
7698 
7699 #define TRACING_LOG_ERRS_MAX	8
7700 #define TRACING_LOG_LOC_MAX	128
7701 
7702 #define CMD_PREFIX "  Command: "
7703 
7704 struct err_info {
7705 	const char	**errs;	/* ptr to loc-specific array of err strings */
7706 	u8		type;	/* index into errs -> specific err string */
7707 	u8		pos;	/* MAX_FILTER_STR_VAL = 256 */
7708 	u64		ts;
7709 };
7710 
7711 struct tracing_log_err {
7712 	struct list_head	list;
7713 	struct err_info		info;
7714 	char			loc[TRACING_LOG_LOC_MAX]; /* err location */
7715 	char			cmd[MAX_FILTER_STR_VAL]; /* what caused err */
7716 };
7717 
7718 static DEFINE_MUTEX(tracing_err_log_lock);
7719 
7720 static struct tracing_log_err *get_tracing_log_err(struct trace_array *tr)
7721 {
7722 	struct tracing_log_err *err;
7723 
7724 	if (tr->n_err_log_entries < TRACING_LOG_ERRS_MAX) {
7725 		err = kzalloc(sizeof(*err), GFP_KERNEL);
7726 		if (!err)
7727 			err = ERR_PTR(-ENOMEM);
7728 		tr->n_err_log_entries++;
7729 
7730 		return err;
7731 	}
7732 
7733 	err = list_first_entry(&tr->err_log, struct tracing_log_err, list);
7734 	list_del(&err->list);
7735 
7736 	return err;
7737 }
7738 
7739 /**
7740  * err_pos - find the position of a string within a command for error careting
7741  * @cmd: The tracing command that caused the error
7742  * @str: The string to position the caret at within @cmd
7743  *
7744  * Finds the position of the first occurrence of @str within @cmd.  The
7745  * return value can be passed to tracing_log_err() for caret placement
7746  * within @cmd.
7747  *
7748  * Returns the index within @cmd of the first occurrence of @str or 0
7749  * if @str was not found.
7750  */
7751 unsigned int err_pos(char *cmd, const char *str)
7752 {
7753 	char *found;
7754 
7755 	if (WARN_ON(!strlen(cmd)))
7756 		return 0;
7757 
7758 	found = strstr(cmd, str);
7759 	if (found)
7760 		return found - cmd;
7761 
7762 	return 0;
7763 }
7764 
7765 /**
7766  * tracing_log_err - write an error to the tracing error log
7767  * @tr: The associated trace array for the error (NULL for top level array)
7768  * @loc: A string describing where the error occurred
7769  * @cmd: The tracing command that caused the error
7770  * @errs: The array of loc-specific static error strings
7771  * @type: The index into errs[], which produces the specific static err string
7772  * @pos: The position the caret should be placed in the cmd
7773  *
7774  * Writes an error into tracing/error_log of the form:
7775  *
7776  * <loc>: error: <text>
7777  *   Command: <cmd>
7778  *              ^
7779  *
7780  * tracing/error_log is a small log file containing the last
7781  * TRACING_LOG_ERRS_MAX errors (8).  Memory for errors isn't allocated
7782  * unless there has been a tracing error, and the error log can be
7783  * cleared and have its memory freed by writing the empty string in
7784  * truncation mode to it i.e. echo > tracing/error_log.
7785  *
7786  * NOTE: the @errs array along with the @type param are used to
7787  * produce a static error string - this string is not copied and saved
7788  * when the error is logged - only a pointer to it is saved.  See
7789  * existing callers for examples of how static strings are typically
7790  * defined for use with tracing_log_err().
7791  */
7792 void tracing_log_err(struct trace_array *tr,
7793 		     const char *loc, const char *cmd,
7794 		     const char **errs, u8 type, u8 pos)
7795 {
7796 	struct tracing_log_err *err;
7797 
7798 	if (!tr)
7799 		tr = &global_trace;
7800 
7801 	mutex_lock(&tracing_err_log_lock);
7802 	err = get_tracing_log_err(tr);
7803 	if (PTR_ERR(err) == -ENOMEM) {
7804 		mutex_unlock(&tracing_err_log_lock);
7805 		return;
7806 	}
7807 
7808 	snprintf(err->loc, TRACING_LOG_LOC_MAX, "%s: error: ", loc);
7809 	snprintf(err->cmd, MAX_FILTER_STR_VAL,"\n" CMD_PREFIX "%s\n", cmd);
7810 
7811 	err->info.errs = errs;
7812 	err->info.type = type;
7813 	err->info.pos = pos;
7814 	err->info.ts = local_clock();
7815 
7816 	list_add_tail(&err->list, &tr->err_log);
7817 	mutex_unlock(&tracing_err_log_lock);
7818 }
7819 
7820 static void clear_tracing_err_log(struct trace_array *tr)
7821 {
7822 	struct tracing_log_err *err, *next;
7823 
7824 	mutex_lock(&tracing_err_log_lock);
7825 	list_for_each_entry_safe(err, next, &tr->err_log, list) {
7826 		list_del(&err->list);
7827 		kfree(err);
7828 	}
7829 
7830 	tr->n_err_log_entries = 0;
7831 	mutex_unlock(&tracing_err_log_lock);
7832 }
7833 
7834 static void *tracing_err_log_seq_start(struct seq_file *m, loff_t *pos)
7835 {
7836 	struct trace_array *tr = m->private;
7837 
7838 	mutex_lock(&tracing_err_log_lock);
7839 
7840 	return seq_list_start(&tr->err_log, *pos);
7841 }
7842 
7843 static void *tracing_err_log_seq_next(struct seq_file *m, void *v, loff_t *pos)
7844 {
7845 	struct trace_array *tr = m->private;
7846 
7847 	return seq_list_next(v, &tr->err_log, pos);
7848 }
7849 
7850 static void tracing_err_log_seq_stop(struct seq_file *m, void *v)
7851 {
7852 	mutex_unlock(&tracing_err_log_lock);
7853 }
7854 
7855 static void tracing_err_log_show_pos(struct seq_file *m, u8 pos)
7856 {
7857 	u8 i;
7858 
7859 	for (i = 0; i < sizeof(CMD_PREFIX) - 1; i++)
7860 		seq_putc(m, ' ');
7861 	for (i = 0; i < pos; i++)
7862 		seq_putc(m, ' ');
7863 	seq_puts(m, "^\n");
7864 }
7865 
7866 static int tracing_err_log_seq_show(struct seq_file *m, void *v)
7867 {
7868 	struct tracing_log_err *err = v;
7869 
7870 	if (err) {
7871 		const char *err_text = err->info.errs[err->info.type];
7872 		u64 sec = err->info.ts;
7873 		u32 nsec;
7874 
7875 		nsec = do_div(sec, NSEC_PER_SEC);
7876 		seq_printf(m, "[%5llu.%06u] %s%s", sec, nsec / 1000,
7877 			   err->loc, err_text);
7878 		seq_printf(m, "%s", err->cmd);
7879 		tracing_err_log_show_pos(m, err->info.pos);
7880 	}
7881 
7882 	return 0;
7883 }
7884 
7885 static const struct seq_operations tracing_err_log_seq_ops = {
7886 	.start  = tracing_err_log_seq_start,
7887 	.next   = tracing_err_log_seq_next,
7888 	.stop   = tracing_err_log_seq_stop,
7889 	.show   = tracing_err_log_seq_show
7890 };
7891 
7892 static int tracing_err_log_open(struct inode *inode, struct file *file)
7893 {
7894 	struct trace_array *tr = inode->i_private;
7895 	int ret = 0;
7896 
7897 	ret = tracing_check_open_get_tr(tr);
7898 	if (ret)
7899 		return ret;
7900 
7901 	/* If this file was opened for write, then erase contents */
7902 	if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC))
7903 		clear_tracing_err_log(tr);
7904 
7905 	if (file->f_mode & FMODE_READ) {
7906 		ret = seq_open(file, &tracing_err_log_seq_ops);
7907 		if (!ret) {
7908 			struct seq_file *m = file->private_data;
7909 			m->private = tr;
7910 		} else {
7911 			trace_array_put(tr);
7912 		}
7913 	}
7914 	return ret;
7915 }
7916 
7917 static ssize_t tracing_err_log_write(struct file *file,
7918 				     const char __user *buffer,
7919 				     size_t count, loff_t *ppos)
7920 {
7921 	return count;
7922 }
7923 
7924 static int tracing_err_log_release(struct inode *inode, struct file *file)
7925 {
7926 	struct trace_array *tr = inode->i_private;
7927 
7928 	trace_array_put(tr);
7929 
7930 	if (file->f_mode & FMODE_READ)
7931 		seq_release(inode, file);
7932 
7933 	return 0;
7934 }
7935 
7936 static const struct file_operations tracing_err_log_fops = {
7937 	.open           = tracing_err_log_open,
7938 	.write		= tracing_err_log_write,
7939 	.read           = seq_read,
7940 	.llseek         = seq_lseek,
7941 	.release        = tracing_err_log_release,
7942 };
7943 
7944 static int tracing_buffers_open(struct inode *inode, struct file *filp)
7945 {
7946 	struct trace_array *tr = inode->i_private;
7947 	struct ftrace_buffer_info *info;
7948 	int ret;
7949 
7950 	ret = tracing_check_open_get_tr(tr);
7951 	if (ret)
7952 		return ret;
7953 
7954 	info = kvzalloc(sizeof(*info), GFP_KERNEL);
7955 	if (!info) {
7956 		trace_array_put(tr);
7957 		return -ENOMEM;
7958 	}
7959 
7960 	mutex_lock(&trace_types_lock);
7961 
7962 	info->iter.tr		= tr;
7963 	info->iter.cpu_file	= tracing_get_cpu(inode);
7964 	info->iter.trace	= tr->current_trace;
7965 	info->iter.array_buffer = &tr->array_buffer;
7966 	info->spare		= NULL;
7967 	/* Force reading ring buffer for first read */
7968 	info->read		= (unsigned int)-1;
7969 
7970 	filp->private_data = info;
7971 
7972 	tr->trace_ref++;
7973 
7974 	mutex_unlock(&trace_types_lock);
7975 
7976 	ret = nonseekable_open(inode, filp);
7977 	if (ret < 0)
7978 		trace_array_put(tr);
7979 
7980 	return ret;
7981 }
7982 
7983 static __poll_t
7984 tracing_buffers_poll(struct file *filp, poll_table *poll_table)
7985 {
7986 	struct ftrace_buffer_info *info = filp->private_data;
7987 	struct trace_iterator *iter = &info->iter;
7988 
7989 	return trace_poll(iter, filp, poll_table);
7990 }
7991 
7992 static ssize_t
7993 tracing_buffers_read(struct file *filp, char __user *ubuf,
7994 		     size_t count, loff_t *ppos)
7995 {
7996 	struct ftrace_buffer_info *info = filp->private_data;
7997 	struct trace_iterator *iter = &info->iter;
7998 	ssize_t ret = 0;
7999 	ssize_t size;
8000 
8001 	if (!count)
8002 		return 0;
8003 
8004 #ifdef CONFIG_TRACER_MAX_TRACE
8005 	if (iter->snapshot && iter->tr->current_trace->use_max_tr)
8006 		return -EBUSY;
8007 #endif
8008 
8009 	if (!info->spare) {
8010 		info->spare = ring_buffer_alloc_read_page(iter->array_buffer->buffer,
8011 							  iter->cpu_file);
8012 		if (IS_ERR(info->spare)) {
8013 			ret = PTR_ERR(info->spare);
8014 			info->spare = NULL;
8015 		} else {
8016 			info->spare_cpu = iter->cpu_file;
8017 		}
8018 	}
8019 	if (!info->spare)
8020 		return ret;
8021 
8022 	/* Do we have previous read data to read? */
8023 	if (info->read < PAGE_SIZE)
8024 		goto read;
8025 
8026  again:
8027 	trace_access_lock(iter->cpu_file);
8028 	ret = ring_buffer_read_page(iter->array_buffer->buffer,
8029 				    &info->spare,
8030 				    count,
8031 				    iter->cpu_file, 0);
8032 	trace_access_unlock(iter->cpu_file);
8033 
8034 	if (ret < 0) {
8035 		if (trace_empty(iter)) {
8036 			if ((filp->f_flags & O_NONBLOCK))
8037 				return -EAGAIN;
8038 
8039 			ret = wait_on_pipe(iter, 0);
8040 			if (ret)
8041 				return ret;
8042 
8043 			goto again;
8044 		}
8045 		return 0;
8046 	}
8047 
8048 	info->read = 0;
8049  read:
8050 	size = PAGE_SIZE - info->read;
8051 	if (size > count)
8052 		size = count;
8053 
8054 	ret = copy_to_user(ubuf, info->spare + info->read, size);
8055 	if (ret == size)
8056 		return -EFAULT;
8057 
8058 	size -= ret;
8059 
8060 	*ppos += size;
8061 	info->read += size;
8062 
8063 	return size;
8064 }
8065 
8066 static int tracing_buffers_release(struct inode *inode, struct file *file)
8067 {
8068 	struct ftrace_buffer_info *info = file->private_data;
8069 	struct trace_iterator *iter = &info->iter;
8070 
8071 	mutex_lock(&trace_types_lock);
8072 
8073 	iter->tr->trace_ref--;
8074 
8075 	__trace_array_put(iter->tr);
8076 
8077 	if (info->spare)
8078 		ring_buffer_free_read_page(iter->array_buffer->buffer,
8079 					   info->spare_cpu, info->spare);
8080 	kvfree(info);
8081 
8082 	mutex_unlock(&trace_types_lock);
8083 
8084 	return 0;
8085 }
8086 
8087 struct buffer_ref {
8088 	struct trace_buffer	*buffer;
8089 	void			*page;
8090 	int			cpu;
8091 	refcount_t		refcount;
8092 };
8093 
8094 static void buffer_ref_release(struct buffer_ref *ref)
8095 {
8096 	if (!refcount_dec_and_test(&ref->refcount))
8097 		return;
8098 	ring_buffer_free_read_page(ref->buffer, ref->cpu, ref->page);
8099 	kfree(ref);
8100 }
8101 
8102 static void buffer_pipe_buf_release(struct pipe_inode_info *pipe,
8103 				    struct pipe_buffer *buf)
8104 {
8105 	struct buffer_ref *ref = (struct buffer_ref *)buf->private;
8106 
8107 	buffer_ref_release(ref);
8108 	buf->private = 0;
8109 }
8110 
8111 static bool buffer_pipe_buf_get(struct pipe_inode_info *pipe,
8112 				struct pipe_buffer *buf)
8113 {
8114 	struct buffer_ref *ref = (struct buffer_ref *)buf->private;
8115 
8116 	if (refcount_read(&ref->refcount) > INT_MAX/2)
8117 		return false;
8118 
8119 	refcount_inc(&ref->refcount);
8120 	return true;
8121 }
8122 
8123 /* Pipe buffer operations for a buffer. */
8124 static const struct pipe_buf_operations buffer_pipe_buf_ops = {
8125 	.release		= buffer_pipe_buf_release,
8126 	.get			= buffer_pipe_buf_get,
8127 };
8128 
8129 /*
8130  * Callback from splice_to_pipe(), if we need to release some pages
8131  * at the end of the spd in case we error'ed out in filling the pipe.
8132  */
8133 static void buffer_spd_release(struct splice_pipe_desc *spd, unsigned int i)
8134 {
8135 	struct buffer_ref *ref =
8136 		(struct buffer_ref *)spd->partial[i].private;
8137 
8138 	buffer_ref_release(ref);
8139 	spd->partial[i].private = 0;
8140 }
8141 
8142 static ssize_t
8143 tracing_buffers_splice_read(struct file *file, loff_t *ppos,
8144 			    struct pipe_inode_info *pipe, size_t len,
8145 			    unsigned int flags)
8146 {
8147 	struct ftrace_buffer_info *info = file->private_data;
8148 	struct trace_iterator *iter = &info->iter;
8149 	struct partial_page partial_def[PIPE_DEF_BUFFERS];
8150 	struct page *pages_def[PIPE_DEF_BUFFERS];
8151 	struct splice_pipe_desc spd = {
8152 		.pages		= pages_def,
8153 		.partial	= partial_def,
8154 		.nr_pages_max	= PIPE_DEF_BUFFERS,
8155 		.ops		= &buffer_pipe_buf_ops,
8156 		.spd_release	= buffer_spd_release,
8157 	};
8158 	struct buffer_ref *ref;
8159 	int entries, i;
8160 	ssize_t ret = 0;
8161 
8162 #ifdef CONFIG_TRACER_MAX_TRACE
8163 	if (iter->snapshot && iter->tr->current_trace->use_max_tr)
8164 		return -EBUSY;
8165 #endif
8166 
8167 	if (*ppos & (PAGE_SIZE - 1))
8168 		return -EINVAL;
8169 
8170 	if (len & (PAGE_SIZE - 1)) {
8171 		if (len < PAGE_SIZE)
8172 			return -EINVAL;
8173 		len &= PAGE_MASK;
8174 	}
8175 
8176 	if (splice_grow_spd(pipe, &spd))
8177 		return -ENOMEM;
8178 
8179  again:
8180 	trace_access_lock(iter->cpu_file);
8181 	entries = ring_buffer_entries_cpu(iter->array_buffer->buffer, iter->cpu_file);
8182 
8183 	for (i = 0; i < spd.nr_pages_max && len && entries; i++, len -= PAGE_SIZE) {
8184 		struct page *page;
8185 		int r;
8186 
8187 		ref = kzalloc(sizeof(*ref), GFP_KERNEL);
8188 		if (!ref) {
8189 			ret = -ENOMEM;
8190 			break;
8191 		}
8192 
8193 		refcount_set(&ref->refcount, 1);
8194 		ref->buffer = iter->array_buffer->buffer;
8195 		ref->page = ring_buffer_alloc_read_page(ref->buffer, iter->cpu_file);
8196 		if (IS_ERR(ref->page)) {
8197 			ret = PTR_ERR(ref->page);
8198 			ref->page = NULL;
8199 			kfree(ref);
8200 			break;
8201 		}
8202 		ref->cpu = iter->cpu_file;
8203 
8204 		r = ring_buffer_read_page(ref->buffer, &ref->page,
8205 					  len, iter->cpu_file, 1);
8206 		if (r < 0) {
8207 			ring_buffer_free_read_page(ref->buffer, ref->cpu,
8208 						   ref->page);
8209 			kfree(ref);
8210 			break;
8211 		}
8212 
8213 		page = virt_to_page(ref->page);
8214 
8215 		spd.pages[i] = page;
8216 		spd.partial[i].len = PAGE_SIZE;
8217 		spd.partial[i].offset = 0;
8218 		spd.partial[i].private = (unsigned long)ref;
8219 		spd.nr_pages++;
8220 		*ppos += PAGE_SIZE;
8221 
8222 		entries = ring_buffer_entries_cpu(iter->array_buffer->buffer, iter->cpu_file);
8223 	}
8224 
8225 	trace_access_unlock(iter->cpu_file);
8226 	spd.nr_pages = i;
8227 
8228 	/* did we read anything? */
8229 	if (!spd.nr_pages) {
8230 		if (ret)
8231 			goto out;
8232 
8233 		ret = -EAGAIN;
8234 		if ((file->f_flags & O_NONBLOCK) || (flags & SPLICE_F_NONBLOCK))
8235 			goto out;
8236 
8237 		ret = wait_on_pipe(iter, iter->tr->buffer_percent);
8238 		if (ret)
8239 			goto out;
8240 
8241 		goto again;
8242 	}
8243 
8244 	ret = splice_to_pipe(pipe, &spd);
8245 out:
8246 	splice_shrink_spd(&spd);
8247 
8248 	return ret;
8249 }
8250 
8251 static const struct file_operations tracing_buffers_fops = {
8252 	.open		= tracing_buffers_open,
8253 	.read		= tracing_buffers_read,
8254 	.poll		= tracing_buffers_poll,
8255 	.release	= tracing_buffers_release,
8256 	.splice_read	= tracing_buffers_splice_read,
8257 	.llseek		= no_llseek,
8258 };
8259 
8260 static ssize_t
8261 tracing_stats_read(struct file *filp, char __user *ubuf,
8262 		   size_t count, loff_t *ppos)
8263 {
8264 	struct inode *inode = file_inode(filp);
8265 	struct trace_array *tr = inode->i_private;
8266 	struct array_buffer *trace_buf = &tr->array_buffer;
8267 	int cpu = tracing_get_cpu(inode);
8268 	struct trace_seq *s;
8269 	unsigned long cnt;
8270 	unsigned long long t;
8271 	unsigned long usec_rem;
8272 
8273 	s = kmalloc(sizeof(*s), GFP_KERNEL);
8274 	if (!s)
8275 		return -ENOMEM;
8276 
8277 	trace_seq_init(s);
8278 
8279 	cnt = ring_buffer_entries_cpu(trace_buf->buffer, cpu);
8280 	trace_seq_printf(s, "entries: %ld\n", cnt);
8281 
8282 	cnt = ring_buffer_overrun_cpu(trace_buf->buffer, cpu);
8283 	trace_seq_printf(s, "overrun: %ld\n", cnt);
8284 
8285 	cnt = ring_buffer_commit_overrun_cpu(trace_buf->buffer, cpu);
8286 	trace_seq_printf(s, "commit overrun: %ld\n", cnt);
8287 
8288 	cnt = ring_buffer_bytes_cpu(trace_buf->buffer, cpu);
8289 	trace_seq_printf(s, "bytes: %ld\n", cnt);
8290 
8291 	if (trace_clocks[tr->clock_id].in_ns) {
8292 		/* local or global for trace_clock */
8293 		t = ns2usecs(ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
8294 		usec_rem = do_div(t, USEC_PER_SEC);
8295 		trace_seq_printf(s, "oldest event ts: %5llu.%06lu\n",
8296 								t, usec_rem);
8297 
8298 		t = ns2usecs(ring_buffer_time_stamp(trace_buf->buffer));
8299 		usec_rem = do_div(t, USEC_PER_SEC);
8300 		trace_seq_printf(s, "now ts: %5llu.%06lu\n", t, usec_rem);
8301 	} else {
8302 		/* counter or tsc mode for trace_clock */
8303 		trace_seq_printf(s, "oldest event ts: %llu\n",
8304 				ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
8305 
8306 		trace_seq_printf(s, "now ts: %llu\n",
8307 				ring_buffer_time_stamp(trace_buf->buffer));
8308 	}
8309 
8310 	cnt = ring_buffer_dropped_events_cpu(trace_buf->buffer, cpu);
8311 	trace_seq_printf(s, "dropped events: %ld\n", cnt);
8312 
8313 	cnt = ring_buffer_read_events_cpu(trace_buf->buffer, cpu);
8314 	trace_seq_printf(s, "read events: %ld\n", cnt);
8315 
8316 	count = simple_read_from_buffer(ubuf, count, ppos,
8317 					s->buffer, trace_seq_used(s));
8318 
8319 	kfree(s);
8320 
8321 	return count;
8322 }
8323 
8324 static const struct file_operations tracing_stats_fops = {
8325 	.open		= tracing_open_generic_tr,
8326 	.read		= tracing_stats_read,
8327 	.llseek		= generic_file_llseek,
8328 	.release	= tracing_release_generic_tr,
8329 };
8330 
8331 #ifdef CONFIG_DYNAMIC_FTRACE
8332 
8333 static ssize_t
8334 tracing_read_dyn_info(struct file *filp, char __user *ubuf,
8335 		  size_t cnt, loff_t *ppos)
8336 {
8337 	ssize_t ret;
8338 	char *buf;
8339 	int r;
8340 
8341 	/* 256 should be plenty to hold the amount needed */
8342 	buf = kmalloc(256, GFP_KERNEL);
8343 	if (!buf)
8344 		return -ENOMEM;
8345 
8346 	r = scnprintf(buf, 256, "%ld pages:%ld groups: %ld\n",
8347 		      ftrace_update_tot_cnt,
8348 		      ftrace_number_of_pages,
8349 		      ftrace_number_of_groups);
8350 
8351 	ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
8352 	kfree(buf);
8353 	return ret;
8354 }
8355 
8356 static const struct file_operations tracing_dyn_info_fops = {
8357 	.open		= tracing_open_generic,
8358 	.read		= tracing_read_dyn_info,
8359 	.llseek		= generic_file_llseek,
8360 };
8361 #endif /* CONFIG_DYNAMIC_FTRACE */
8362 
8363 #if defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE)
8364 static void
8365 ftrace_snapshot(unsigned long ip, unsigned long parent_ip,
8366 		struct trace_array *tr, struct ftrace_probe_ops *ops,
8367 		void *data)
8368 {
8369 	tracing_snapshot_instance(tr);
8370 }
8371 
8372 static void
8373 ftrace_count_snapshot(unsigned long ip, unsigned long parent_ip,
8374 		      struct trace_array *tr, struct ftrace_probe_ops *ops,
8375 		      void *data)
8376 {
8377 	struct ftrace_func_mapper *mapper = data;
8378 	long *count = NULL;
8379 
8380 	if (mapper)
8381 		count = (long *)ftrace_func_mapper_find_ip(mapper, ip);
8382 
8383 	if (count) {
8384 
8385 		if (*count <= 0)
8386 			return;
8387 
8388 		(*count)--;
8389 	}
8390 
8391 	tracing_snapshot_instance(tr);
8392 }
8393 
8394 static int
8395 ftrace_snapshot_print(struct seq_file *m, unsigned long ip,
8396 		      struct ftrace_probe_ops *ops, void *data)
8397 {
8398 	struct ftrace_func_mapper *mapper = data;
8399 	long *count = NULL;
8400 
8401 	seq_printf(m, "%ps:", (void *)ip);
8402 
8403 	seq_puts(m, "snapshot");
8404 
8405 	if (mapper)
8406 		count = (long *)ftrace_func_mapper_find_ip(mapper, ip);
8407 
8408 	if (count)
8409 		seq_printf(m, ":count=%ld\n", *count);
8410 	else
8411 		seq_puts(m, ":unlimited\n");
8412 
8413 	return 0;
8414 }
8415 
8416 static int
8417 ftrace_snapshot_init(struct ftrace_probe_ops *ops, struct trace_array *tr,
8418 		     unsigned long ip, void *init_data, void **data)
8419 {
8420 	struct ftrace_func_mapper *mapper = *data;
8421 
8422 	if (!mapper) {
8423 		mapper = allocate_ftrace_func_mapper();
8424 		if (!mapper)
8425 			return -ENOMEM;
8426 		*data = mapper;
8427 	}
8428 
8429 	return ftrace_func_mapper_add_ip(mapper, ip, init_data);
8430 }
8431 
8432 static void
8433 ftrace_snapshot_free(struct ftrace_probe_ops *ops, struct trace_array *tr,
8434 		     unsigned long ip, void *data)
8435 {
8436 	struct ftrace_func_mapper *mapper = data;
8437 
8438 	if (!ip) {
8439 		if (!mapper)
8440 			return;
8441 		free_ftrace_func_mapper(mapper, NULL);
8442 		return;
8443 	}
8444 
8445 	ftrace_func_mapper_remove_ip(mapper, ip);
8446 }
8447 
8448 static struct ftrace_probe_ops snapshot_probe_ops = {
8449 	.func			= ftrace_snapshot,
8450 	.print			= ftrace_snapshot_print,
8451 };
8452 
8453 static struct ftrace_probe_ops snapshot_count_probe_ops = {
8454 	.func			= ftrace_count_snapshot,
8455 	.print			= ftrace_snapshot_print,
8456 	.init			= ftrace_snapshot_init,
8457 	.free			= ftrace_snapshot_free,
8458 };
8459 
8460 static int
8461 ftrace_trace_snapshot_callback(struct trace_array *tr, struct ftrace_hash *hash,
8462 			       char *glob, char *cmd, char *param, int enable)
8463 {
8464 	struct ftrace_probe_ops *ops;
8465 	void *count = (void *)-1;
8466 	char *number;
8467 	int ret;
8468 
8469 	if (!tr)
8470 		return -ENODEV;
8471 
8472 	/* hash funcs only work with set_ftrace_filter */
8473 	if (!enable)
8474 		return -EINVAL;
8475 
8476 	ops = param ? &snapshot_count_probe_ops :  &snapshot_probe_ops;
8477 
8478 	if (glob[0] == '!')
8479 		return unregister_ftrace_function_probe_func(glob+1, tr, ops);
8480 
8481 	if (!param)
8482 		goto out_reg;
8483 
8484 	number = strsep(&param, ":");
8485 
8486 	if (!strlen(number))
8487 		goto out_reg;
8488 
8489 	/*
8490 	 * We use the callback data field (which is a pointer)
8491 	 * as our counter.
8492 	 */
8493 	ret = kstrtoul(number, 0, (unsigned long *)&count);
8494 	if (ret)
8495 		return ret;
8496 
8497  out_reg:
8498 	ret = tracing_alloc_snapshot_instance(tr);
8499 	if (ret < 0)
8500 		goto out;
8501 
8502 	ret = register_ftrace_function_probe(glob, tr, ops, count);
8503 
8504  out:
8505 	return ret < 0 ? ret : 0;
8506 }
8507 
8508 static struct ftrace_func_command ftrace_snapshot_cmd = {
8509 	.name			= "snapshot",
8510 	.func			= ftrace_trace_snapshot_callback,
8511 };
8512 
8513 static __init int register_snapshot_cmd(void)
8514 {
8515 	return register_ftrace_command(&ftrace_snapshot_cmd);
8516 }
8517 #else
8518 static inline __init int register_snapshot_cmd(void) { return 0; }
8519 #endif /* defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE) */
8520 
8521 static struct dentry *tracing_get_dentry(struct trace_array *tr)
8522 {
8523 	if (WARN_ON(!tr->dir))
8524 		return ERR_PTR(-ENODEV);
8525 
8526 	/* Top directory uses NULL as the parent */
8527 	if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
8528 		return NULL;
8529 
8530 	/* All sub buffers have a descriptor */
8531 	return tr->dir;
8532 }
8533 
8534 static struct dentry *tracing_dentry_percpu(struct trace_array *tr, int cpu)
8535 {
8536 	struct dentry *d_tracer;
8537 
8538 	if (tr->percpu_dir)
8539 		return tr->percpu_dir;
8540 
8541 	d_tracer = tracing_get_dentry(tr);
8542 	if (IS_ERR(d_tracer))
8543 		return NULL;
8544 
8545 	tr->percpu_dir = tracefs_create_dir("per_cpu", d_tracer);
8546 
8547 	MEM_FAIL(!tr->percpu_dir,
8548 		  "Could not create tracefs directory 'per_cpu/%d'\n", cpu);
8549 
8550 	return tr->percpu_dir;
8551 }
8552 
8553 static struct dentry *
8554 trace_create_cpu_file(const char *name, umode_t mode, struct dentry *parent,
8555 		      void *data, long cpu, const struct file_operations *fops)
8556 {
8557 	struct dentry *ret = trace_create_file(name, mode, parent, data, fops);
8558 
8559 	if (ret) /* See tracing_get_cpu() */
8560 		d_inode(ret)->i_cdev = (void *)(cpu + 1);
8561 	return ret;
8562 }
8563 
8564 static void
8565 tracing_init_tracefs_percpu(struct trace_array *tr, long cpu)
8566 {
8567 	struct dentry *d_percpu = tracing_dentry_percpu(tr, cpu);
8568 	struct dentry *d_cpu;
8569 	char cpu_dir[30]; /* 30 characters should be more than enough */
8570 
8571 	if (!d_percpu)
8572 		return;
8573 
8574 	snprintf(cpu_dir, 30, "cpu%ld", cpu);
8575 	d_cpu = tracefs_create_dir(cpu_dir, d_percpu);
8576 	if (!d_cpu) {
8577 		pr_warn("Could not create tracefs '%s' entry\n", cpu_dir);
8578 		return;
8579 	}
8580 
8581 	/* per cpu trace_pipe */
8582 	trace_create_cpu_file("trace_pipe", TRACE_MODE_READ, d_cpu,
8583 				tr, cpu, &tracing_pipe_fops);
8584 
8585 	/* per cpu trace */
8586 	trace_create_cpu_file("trace", TRACE_MODE_WRITE, d_cpu,
8587 				tr, cpu, &tracing_fops);
8588 
8589 	trace_create_cpu_file("trace_pipe_raw", TRACE_MODE_READ, d_cpu,
8590 				tr, cpu, &tracing_buffers_fops);
8591 
8592 	trace_create_cpu_file("stats", TRACE_MODE_READ, d_cpu,
8593 				tr, cpu, &tracing_stats_fops);
8594 
8595 	trace_create_cpu_file("buffer_size_kb", TRACE_MODE_READ, d_cpu,
8596 				tr, cpu, &tracing_entries_fops);
8597 
8598 #ifdef CONFIG_TRACER_SNAPSHOT
8599 	trace_create_cpu_file("snapshot", TRACE_MODE_WRITE, d_cpu,
8600 				tr, cpu, &snapshot_fops);
8601 
8602 	trace_create_cpu_file("snapshot_raw", TRACE_MODE_READ, d_cpu,
8603 				tr, cpu, &snapshot_raw_fops);
8604 #endif
8605 }
8606 
8607 #ifdef CONFIG_FTRACE_SELFTEST
8608 /* Let selftest have access to static functions in this file */
8609 #include "trace_selftest.c"
8610 #endif
8611 
8612 static ssize_t
8613 trace_options_read(struct file *filp, char __user *ubuf, size_t cnt,
8614 			loff_t *ppos)
8615 {
8616 	struct trace_option_dentry *topt = filp->private_data;
8617 	char *buf;
8618 
8619 	if (topt->flags->val & topt->opt->bit)
8620 		buf = "1\n";
8621 	else
8622 		buf = "0\n";
8623 
8624 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
8625 }
8626 
8627 static ssize_t
8628 trace_options_write(struct file *filp, const char __user *ubuf, size_t cnt,
8629 			 loff_t *ppos)
8630 {
8631 	struct trace_option_dentry *topt = filp->private_data;
8632 	unsigned long val;
8633 	int ret;
8634 
8635 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
8636 	if (ret)
8637 		return ret;
8638 
8639 	if (val != 0 && val != 1)
8640 		return -EINVAL;
8641 
8642 	if (!!(topt->flags->val & topt->opt->bit) != val) {
8643 		mutex_lock(&trace_types_lock);
8644 		ret = __set_tracer_option(topt->tr, topt->flags,
8645 					  topt->opt, !val);
8646 		mutex_unlock(&trace_types_lock);
8647 		if (ret)
8648 			return ret;
8649 	}
8650 
8651 	*ppos += cnt;
8652 
8653 	return cnt;
8654 }
8655 
8656 
8657 static const struct file_operations trace_options_fops = {
8658 	.open = tracing_open_generic,
8659 	.read = trace_options_read,
8660 	.write = trace_options_write,
8661 	.llseek	= generic_file_llseek,
8662 };
8663 
8664 /*
8665  * In order to pass in both the trace_array descriptor as well as the index
8666  * to the flag that the trace option file represents, the trace_array
8667  * has a character array of trace_flags_index[], which holds the index
8668  * of the bit for the flag it represents. index[0] == 0, index[1] == 1, etc.
8669  * The address of this character array is passed to the flag option file
8670  * read/write callbacks.
8671  *
8672  * In order to extract both the index and the trace_array descriptor,
8673  * get_tr_index() uses the following algorithm.
8674  *
8675  *   idx = *ptr;
8676  *
8677  * As the pointer itself contains the address of the index (remember
8678  * index[1] == 1).
8679  *
8680  * Then to get the trace_array descriptor, by subtracting that index
8681  * from the ptr, we get to the start of the index itself.
8682  *
8683  *   ptr - idx == &index[0]
8684  *
8685  * Then a simple container_of() from that pointer gets us to the
8686  * trace_array descriptor.
8687  */
8688 static void get_tr_index(void *data, struct trace_array **ptr,
8689 			 unsigned int *pindex)
8690 {
8691 	*pindex = *(unsigned char *)data;
8692 
8693 	*ptr = container_of(data - *pindex, struct trace_array,
8694 			    trace_flags_index);
8695 }
8696 
8697 static ssize_t
8698 trace_options_core_read(struct file *filp, char __user *ubuf, size_t cnt,
8699 			loff_t *ppos)
8700 {
8701 	void *tr_index = filp->private_data;
8702 	struct trace_array *tr;
8703 	unsigned int index;
8704 	char *buf;
8705 
8706 	get_tr_index(tr_index, &tr, &index);
8707 
8708 	if (tr->trace_flags & (1 << index))
8709 		buf = "1\n";
8710 	else
8711 		buf = "0\n";
8712 
8713 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
8714 }
8715 
8716 static ssize_t
8717 trace_options_core_write(struct file *filp, const char __user *ubuf, size_t cnt,
8718 			 loff_t *ppos)
8719 {
8720 	void *tr_index = filp->private_data;
8721 	struct trace_array *tr;
8722 	unsigned int index;
8723 	unsigned long val;
8724 	int ret;
8725 
8726 	get_tr_index(tr_index, &tr, &index);
8727 
8728 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
8729 	if (ret)
8730 		return ret;
8731 
8732 	if (val != 0 && val != 1)
8733 		return -EINVAL;
8734 
8735 	mutex_lock(&event_mutex);
8736 	mutex_lock(&trace_types_lock);
8737 	ret = set_tracer_flag(tr, 1 << index, val);
8738 	mutex_unlock(&trace_types_lock);
8739 	mutex_unlock(&event_mutex);
8740 
8741 	if (ret < 0)
8742 		return ret;
8743 
8744 	*ppos += cnt;
8745 
8746 	return cnt;
8747 }
8748 
8749 static const struct file_operations trace_options_core_fops = {
8750 	.open = tracing_open_generic,
8751 	.read = trace_options_core_read,
8752 	.write = trace_options_core_write,
8753 	.llseek = generic_file_llseek,
8754 };
8755 
8756 struct dentry *trace_create_file(const char *name,
8757 				 umode_t mode,
8758 				 struct dentry *parent,
8759 				 void *data,
8760 				 const struct file_operations *fops)
8761 {
8762 	struct dentry *ret;
8763 
8764 	ret = tracefs_create_file(name, mode, parent, data, fops);
8765 	if (!ret)
8766 		pr_warn("Could not create tracefs '%s' entry\n", name);
8767 
8768 	return ret;
8769 }
8770 
8771 
8772 static struct dentry *trace_options_init_dentry(struct trace_array *tr)
8773 {
8774 	struct dentry *d_tracer;
8775 
8776 	if (tr->options)
8777 		return tr->options;
8778 
8779 	d_tracer = tracing_get_dentry(tr);
8780 	if (IS_ERR(d_tracer))
8781 		return NULL;
8782 
8783 	tr->options = tracefs_create_dir("options", d_tracer);
8784 	if (!tr->options) {
8785 		pr_warn("Could not create tracefs directory 'options'\n");
8786 		return NULL;
8787 	}
8788 
8789 	return tr->options;
8790 }
8791 
8792 static void
8793 create_trace_option_file(struct trace_array *tr,
8794 			 struct trace_option_dentry *topt,
8795 			 struct tracer_flags *flags,
8796 			 struct tracer_opt *opt)
8797 {
8798 	struct dentry *t_options;
8799 
8800 	t_options = trace_options_init_dentry(tr);
8801 	if (!t_options)
8802 		return;
8803 
8804 	topt->flags = flags;
8805 	topt->opt = opt;
8806 	topt->tr = tr;
8807 
8808 	topt->entry = trace_create_file(opt->name, TRACE_MODE_WRITE,
8809 					t_options, topt, &trace_options_fops);
8810 
8811 }
8812 
8813 static void
8814 create_trace_option_files(struct trace_array *tr, struct tracer *tracer)
8815 {
8816 	struct trace_option_dentry *topts;
8817 	struct trace_options *tr_topts;
8818 	struct tracer_flags *flags;
8819 	struct tracer_opt *opts;
8820 	int cnt;
8821 	int i;
8822 
8823 	if (!tracer)
8824 		return;
8825 
8826 	flags = tracer->flags;
8827 
8828 	if (!flags || !flags->opts)
8829 		return;
8830 
8831 	/*
8832 	 * If this is an instance, only create flags for tracers
8833 	 * the instance may have.
8834 	 */
8835 	if (!trace_ok_for_array(tracer, tr))
8836 		return;
8837 
8838 	for (i = 0; i < tr->nr_topts; i++) {
8839 		/* Make sure there's no duplicate flags. */
8840 		if (WARN_ON_ONCE(tr->topts[i].tracer->flags == tracer->flags))
8841 			return;
8842 	}
8843 
8844 	opts = flags->opts;
8845 
8846 	for (cnt = 0; opts[cnt].name; cnt++)
8847 		;
8848 
8849 	topts = kcalloc(cnt + 1, sizeof(*topts), GFP_KERNEL);
8850 	if (!topts)
8851 		return;
8852 
8853 	tr_topts = krealloc(tr->topts, sizeof(*tr->topts) * (tr->nr_topts + 1),
8854 			    GFP_KERNEL);
8855 	if (!tr_topts) {
8856 		kfree(topts);
8857 		return;
8858 	}
8859 
8860 	tr->topts = tr_topts;
8861 	tr->topts[tr->nr_topts].tracer = tracer;
8862 	tr->topts[tr->nr_topts].topts = topts;
8863 	tr->nr_topts++;
8864 
8865 	for (cnt = 0; opts[cnt].name; cnt++) {
8866 		create_trace_option_file(tr, &topts[cnt], flags,
8867 					 &opts[cnt]);
8868 		MEM_FAIL(topts[cnt].entry == NULL,
8869 			  "Failed to create trace option: %s",
8870 			  opts[cnt].name);
8871 	}
8872 }
8873 
8874 static struct dentry *
8875 create_trace_option_core_file(struct trace_array *tr,
8876 			      const char *option, long index)
8877 {
8878 	struct dentry *t_options;
8879 
8880 	t_options = trace_options_init_dentry(tr);
8881 	if (!t_options)
8882 		return NULL;
8883 
8884 	return trace_create_file(option, TRACE_MODE_WRITE, t_options,
8885 				 (void *)&tr->trace_flags_index[index],
8886 				 &trace_options_core_fops);
8887 }
8888 
8889 static void create_trace_options_dir(struct trace_array *tr)
8890 {
8891 	struct dentry *t_options;
8892 	bool top_level = tr == &global_trace;
8893 	int i;
8894 
8895 	t_options = trace_options_init_dentry(tr);
8896 	if (!t_options)
8897 		return;
8898 
8899 	for (i = 0; trace_options[i]; i++) {
8900 		if (top_level ||
8901 		    !((1 << i) & TOP_LEVEL_TRACE_FLAGS))
8902 			create_trace_option_core_file(tr, trace_options[i], i);
8903 	}
8904 }
8905 
8906 static ssize_t
8907 rb_simple_read(struct file *filp, char __user *ubuf,
8908 	       size_t cnt, loff_t *ppos)
8909 {
8910 	struct trace_array *tr = filp->private_data;
8911 	char buf[64];
8912 	int r;
8913 
8914 	r = tracer_tracing_is_on(tr);
8915 	r = sprintf(buf, "%d\n", r);
8916 
8917 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
8918 }
8919 
8920 static ssize_t
8921 rb_simple_write(struct file *filp, const char __user *ubuf,
8922 		size_t cnt, loff_t *ppos)
8923 {
8924 	struct trace_array *tr = filp->private_data;
8925 	struct trace_buffer *buffer = tr->array_buffer.buffer;
8926 	unsigned long val;
8927 	int ret;
8928 
8929 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
8930 	if (ret)
8931 		return ret;
8932 
8933 	if (buffer) {
8934 		mutex_lock(&trace_types_lock);
8935 		if (!!val == tracer_tracing_is_on(tr)) {
8936 			val = 0; /* do nothing */
8937 		} else if (val) {
8938 			tracer_tracing_on(tr);
8939 			if (tr->current_trace->start)
8940 				tr->current_trace->start(tr);
8941 		} else {
8942 			tracer_tracing_off(tr);
8943 			if (tr->current_trace->stop)
8944 				tr->current_trace->stop(tr);
8945 		}
8946 		mutex_unlock(&trace_types_lock);
8947 	}
8948 
8949 	(*ppos)++;
8950 
8951 	return cnt;
8952 }
8953 
8954 static const struct file_operations rb_simple_fops = {
8955 	.open		= tracing_open_generic_tr,
8956 	.read		= rb_simple_read,
8957 	.write		= rb_simple_write,
8958 	.release	= tracing_release_generic_tr,
8959 	.llseek		= default_llseek,
8960 };
8961 
8962 static ssize_t
8963 buffer_percent_read(struct file *filp, char __user *ubuf,
8964 		    size_t cnt, loff_t *ppos)
8965 {
8966 	struct trace_array *tr = filp->private_data;
8967 	char buf[64];
8968 	int r;
8969 
8970 	r = tr->buffer_percent;
8971 	r = sprintf(buf, "%d\n", r);
8972 
8973 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
8974 }
8975 
8976 static ssize_t
8977 buffer_percent_write(struct file *filp, const char __user *ubuf,
8978 		     size_t cnt, loff_t *ppos)
8979 {
8980 	struct trace_array *tr = filp->private_data;
8981 	unsigned long val;
8982 	int ret;
8983 
8984 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
8985 	if (ret)
8986 		return ret;
8987 
8988 	if (val > 100)
8989 		return -EINVAL;
8990 
8991 	if (!val)
8992 		val = 1;
8993 
8994 	tr->buffer_percent = val;
8995 
8996 	(*ppos)++;
8997 
8998 	return cnt;
8999 }
9000 
9001 static const struct file_operations buffer_percent_fops = {
9002 	.open		= tracing_open_generic_tr,
9003 	.read		= buffer_percent_read,
9004 	.write		= buffer_percent_write,
9005 	.release	= tracing_release_generic_tr,
9006 	.llseek		= default_llseek,
9007 };
9008 
9009 static struct dentry *trace_instance_dir;
9010 
9011 static void
9012 init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer);
9013 
9014 static int
9015 allocate_trace_buffer(struct trace_array *tr, struct array_buffer *buf, int size)
9016 {
9017 	enum ring_buffer_flags rb_flags;
9018 
9019 	rb_flags = tr->trace_flags & TRACE_ITER_OVERWRITE ? RB_FL_OVERWRITE : 0;
9020 
9021 	buf->tr = tr;
9022 
9023 	buf->buffer = ring_buffer_alloc(size, rb_flags);
9024 	if (!buf->buffer)
9025 		return -ENOMEM;
9026 
9027 	buf->data = alloc_percpu(struct trace_array_cpu);
9028 	if (!buf->data) {
9029 		ring_buffer_free(buf->buffer);
9030 		buf->buffer = NULL;
9031 		return -ENOMEM;
9032 	}
9033 
9034 	/* Allocate the first page for all buffers */
9035 	set_buffer_entries(&tr->array_buffer,
9036 			   ring_buffer_size(tr->array_buffer.buffer, 0));
9037 
9038 	return 0;
9039 }
9040 
9041 static int allocate_trace_buffers(struct trace_array *tr, int size)
9042 {
9043 	int ret;
9044 
9045 	ret = allocate_trace_buffer(tr, &tr->array_buffer, size);
9046 	if (ret)
9047 		return ret;
9048 
9049 #ifdef CONFIG_TRACER_MAX_TRACE
9050 	ret = allocate_trace_buffer(tr, &tr->max_buffer,
9051 				    allocate_snapshot ? size : 1);
9052 	if (MEM_FAIL(ret, "Failed to allocate trace buffer\n")) {
9053 		ring_buffer_free(tr->array_buffer.buffer);
9054 		tr->array_buffer.buffer = NULL;
9055 		free_percpu(tr->array_buffer.data);
9056 		tr->array_buffer.data = NULL;
9057 		return -ENOMEM;
9058 	}
9059 	tr->allocated_snapshot = allocate_snapshot;
9060 
9061 	/*
9062 	 * Only the top level trace array gets its snapshot allocated
9063 	 * from the kernel command line.
9064 	 */
9065 	allocate_snapshot = false;
9066 #endif
9067 
9068 	return 0;
9069 }
9070 
9071 static void free_trace_buffer(struct array_buffer *buf)
9072 {
9073 	if (buf->buffer) {
9074 		ring_buffer_free(buf->buffer);
9075 		buf->buffer = NULL;
9076 		free_percpu(buf->data);
9077 		buf->data = NULL;
9078 	}
9079 }
9080 
9081 static void free_trace_buffers(struct trace_array *tr)
9082 {
9083 	if (!tr)
9084 		return;
9085 
9086 	free_trace_buffer(&tr->array_buffer);
9087 
9088 #ifdef CONFIG_TRACER_MAX_TRACE
9089 	free_trace_buffer(&tr->max_buffer);
9090 #endif
9091 }
9092 
9093 static void init_trace_flags_index(struct trace_array *tr)
9094 {
9095 	int i;
9096 
9097 	/* Used by the trace options files */
9098 	for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++)
9099 		tr->trace_flags_index[i] = i;
9100 }
9101 
9102 static void __update_tracer_options(struct trace_array *tr)
9103 {
9104 	struct tracer *t;
9105 
9106 	for (t = trace_types; t; t = t->next)
9107 		add_tracer_options(tr, t);
9108 }
9109 
9110 static void update_tracer_options(struct trace_array *tr)
9111 {
9112 	mutex_lock(&trace_types_lock);
9113 	__update_tracer_options(tr);
9114 	mutex_unlock(&trace_types_lock);
9115 }
9116 
9117 /* Must have trace_types_lock held */
9118 struct trace_array *trace_array_find(const char *instance)
9119 {
9120 	struct trace_array *tr, *found = NULL;
9121 
9122 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
9123 		if (tr->name && strcmp(tr->name, instance) == 0) {
9124 			found = tr;
9125 			break;
9126 		}
9127 	}
9128 
9129 	return found;
9130 }
9131 
9132 struct trace_array *trace_array_find_get(const char *instance)
9133 {
9134 	struct trace_array *tr;
9135 
9136 	mutex_lock(&trace_types_lock);
9137 	tr = trace_array_find(instance);
9138 	if (tr)
9139 		tr->ref++;
9140 	mutex_unlock(&trace_types_lock);
9141 
9142 	return tr;
9143 }
9144 
9145 static int trace_array_create_dir(struct trace_array *tr)
9146 {
9147 	int ret;
9148 
9149 	tr->dir = tracefs_create_dir(tr->name, trace_instance_dir);
9150 	if (!tr->dir)
9151 		return -EINVAL;
9152 
9153 	ret = event_trace_add_tracer(tr->dir, tr);
9154 	if (ret) {
9155 		tracefs_remove(tr->dir);
9156 		return ret;
9157 	}
9158 
9159 	init_tracer_tracefs(tr, tr->dir);
9160 	__update_tracer_options(tr);
9161 
9162 	return ret;
9163 }
9164 
9165 static struct trace_array *trace_array_create(const char *name)
9166 {
9167 	struct trace_array *tr;
9168 	int ret;
9169 
9170 	ret = -ENOMEM;
9171 	tr = kzalloc(sizeof(*tr), GFP_KERNEL);
9172 	if (!tr)
9173 		return ERR_PTR(ret);
9174 
9175 	tr->name = kstrdup(name, GFP_KERNEL);
9176 	if (!tr->name)
9177 		goto out_free_tr;
9178 
9179 	if (!alloc_cpumask_var(&tr->tracing_cpumask, GFP_KERNEL))
9180 		goto out_free_tr;
9181 
9182 	tr->trace_flags = global_trace.trace_flags & ~ZEROED_TRACE_FLAGS;
9183 
9184 	cpumask_copy(tr->tracing_cpumask, cpu_all_mask);
9185 
9186 	raw_spin_lock_init(&tr->start_lock);
9187 
9188 	tr->max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
9189 
9190 	tr->current_trace = &nop_trace;
9191 
9192 	INIT_LIST_HEAD(&tr->systems);
9193 	INIT_LIST_HEAD(&tr->events);
9194 	INIT_LIST_HEAD(&tr->hist_vars);
9195 	INIT_LIST_HEAD(&tr->err_log);
9196 
9197 	if (allocate_trace_buffers(tr, trace_buf_size) < 0)
9198 		goto out_free_tr;
9199 
9200 	if (ftrace_allocate_ftrace_ops(tr) < 0)
9201 		goto out_free_tr;
9202 
9203 	ftrace_init_trace_array(tr);
9204 
9205 	init_trace_flags_index(tr);
9206 
9207 	if (trace_instance_dir) {
9208 		ret = trace_array_create_dir(tr);
9209 		if (ret)
9210 			goto out_free_tr;
9211 	} else
9212 		__trace_early_add_events(tr);
9213 
9214 	list_add(&tr->list, &ftrace_trace_arrays);
9215 
9216 	tr->ref++;
9217 
9218 	return tr;
9219 
9220  out_free_tr:
9221 	ftrace_free_ftrace_ops(tr);
9222 	free_trace_buffers(tr);
9223 	free_cpumask_var(tr->tracing_cpumask);
9224 	kfree(tr->name);
9225 	kfree(tr);
9226 
9227 	return ERR_PTR(ret);
9228 }
9229 
9230 static int instance_mkdir(const char *name)
9231 {
9232 	struct trace_array *tr;
9233 	int ret;
9234 
9235 	mutex_lock(&event_mutex);
9236 	mutex_lock(&trace_types_lock);
9237 
9238 	ret = -EEXIST;
9239 	if (trace_array_find(name))
9240 		goto out_unlock;
9241 
9242 	tr = trace_array_create(name);
9243 
9244 	ret = PTR_ERR_OR_ZERO(tr);
9245 
9246 out_unlock:
9247 	mutex_unlock(&trace_types_lock);
9248 	mutex_unlock(&event_mutex);
9249 	return ret;
9250 }
9251 
9252 /**
9253  * trace_array_get_by_name - Create/Lookup a trace array, given its name.
9254  * @name: The name of the trace array to be looked up/created.
9255  *
9256  * Returns pointer to trace array with given name.
9257  * NULL, if it cannot be created.
9258  *
9259  * NOTE: This function increments the reference counter associated with the
9260  * trace array returned. This makes sure it cannot be freed while in use.
9261  * Use trace_array_put() once the trace array is no longer needed.
9262  * If the trace_array is to be freed, trace_array_destroy() needs to
9263  * be called after the trace_array_put(), or simply let user space delete
9264  * it from the tracefs instances directory. But until the
9265  * trace_array_put() is called, user space can not delete it.
9266  *
9267  */
9268 struct trace_array *trace_array_get_by_name(const char *name)
9269 {
9270 	struct trace_array *tr;
9271 
9272 	mutex_lock(&event_mutex);
9273 	mutex_lock(&trace_types_lock);
9274 
9275 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
9276 		if (tr->name && strcmp(tr->name, name) == 0)
9277 			goto out_unlock;
9278 	}
9279 
9280 	tr = trace_array_create(name);
9281 
9282 	if (IS_ERR(tr))
9283 		tr = NULL;
9284 out_unlock:
9285 	if (tr)
9286 		tr->ref++;
9287 
9288 	mutex_unlock(&trace_types_lock);
9289 	mutex_unlock(&event_mutex);
9290 	return tr;
9291 }
9292 EXPORT_SYMBOL_GPL(trace_array_get_by_name);
9293 
9294 static int __remove_instance(struct trace_array *tr)
9295 {
9296 	int i;
9297 
9298 	/* Reference counter for a newly created trace array = 1. */
9299 	if (tr->ref > 1 || (tr->current_trace && tr->trace_ref))
9300 		return -EBUSY;
9301 
9302 	list_del(&tr->list);
9303 
9304 	/* Disable all the flags that were enabled coming in */
9305 	for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++) {
9306 		if ((1 << i) & ZEROED_TRACE_FLAGS)
9307 			set_tracer_flag(tr, 1 << i, 0);
9308 	}
9309 
9310 	tracing_set_nop(tr);
9311 	clear_ftrace_function_probes(tr);
9312 	event_trace_del_tracer(tr);
9313 	ftrace_clear_pids(tr);
9314 	ftrace_destroy_function_files(tr);
9315 	tracefs_remove(tr->dir);
9316 	free_percpu(tr->last_func_repeats);
9317 	free_trace_buffers(tr);
9318 
9319 	for (i = 0; i < tr->nr_topts; i++) {
9320 		kfree(tr->topts[i].topts);
9321 	}
9322 	kfree(tr->topts);
9323 
9324 	free_cpumask_var(tr->tracing_cpumask);
9325 	kfree(tr->name);
9326 	kfree(tr);
9327 
9328 	return 0;
9329 }
9330 
9331 int trace_array_destroy(struct trace_array *this_tr)
9332 {
9333 	struct trace_array *tr;
9334 	int ret;
9335 
9336 	if (!this_tr)
9337 		return -EINVAL;
9338 
9339 	mutex_lock(&event_mutex);
9340 	mutex_lock(&trace_types_lock);
9341 
9342 	ret = -ENODEV;
9343 
9344 	/* Making sure trace array exists before destroying it. */
9345 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
9346 		if (tr == this_tr) {
9347 			ret = __remove_instance(tr);
9348 			break;
9349 		}
9350 	}
9351 
9352 	mutex_unlock(&trace_types_lock);
9353 	mutex_unlock(&event_mutex);
9354 
9355 	return ret;
9356 }
9357 EXPORT_SYMBOL_GPL(trace_array_destroy);
9358 
9359 static int instance_rmdir(const char *name)
9360 {
9361 	struct trace_array *tr;
9362 	int ret;
9363 
9364 	mutex_lock(&event_mutex);
9365 	mutex_lock(&trace_types_lock);
9366 
9367 	ret = -ENODEV;
9368 	tr = trace_array_find(name);
9369 	if (tr)
9370 		ret = __remove_instance(tr);
9371 
9372 	mutex_unlock(&trace_types_lock);
9373 	mutex_unlock(&event_mutex);
9374 
9375 	return ret;
9376 }
9377 
9378 static __init void create_trace_instances(struct dentry *d_tracer)
9379 {
9380 	struct trace_array *tr;
9381 
9382 	trace_instance_dir = tracefs_create_instance_dir("instances", d_tracer,
9383 							 instance_mkdir,
9384 							 instance_rmdir);
9385 	if (MEM_FAIL(!trace_instance_dir, "Failed to create instances directory\n"))
9386 		return;
9387 
9388 	mutex_lock(&event_mutex);
9389 	mutex_lock(&trace_types_lock);
9390 
9391 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
9392 		if (!tr->name)
9393 			continue;
9394 		if (MEM_FAIL(trace_array_create_dir(tr) < 0,
9395 			     "Failed to create instance directory\n"))
9396 			break;
9397 	}
9398 
9399 	mutex_unlock(&trace_types_lock);
9400 	mutex_unlock(&event_mutex);
9401 }
9402 
9403 static void
9404 init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer)
9405 {
9406 	struct trace_event_file *file;
9407 	int cpu;
9408 
9409 	trace_create_file("available_tracers", TRACE_MODE_READ, d_tracer,
9410 			tr, &show_traces_fops);
9411 
9412 	trace_create_file("current_tracer", TRACE_MODE_WRITE, d_tracer,
9413 			tr, &set_tracer_fops);
9414 
9415 	trace_create_file("tracing_cpumask", TRACE_MODE_WRITE, d_tracer,
9416 			  tr, &tracing_cpumask_fops);
9417 
9418 	trace_create_file("trace_options", TRACE_MODE_WRITE, d_tracer,
9419 			  tr, &tracing_iter_fops);
9420 
9421 	trace_create_file("trace", TRACE_MODE_WRITE, d_tracer,
9422 			  tr, &tracing_fops);
9423 
9424 	trace_create_file("trace_pipe", TRACE_MODE_READ, d_tracer,
9425 			  tr, &tracing_pipe_fops);
9426 
9427 	trace_create_file("buffer_size_kb", TRACE_MODE_WRITE, d_tracer,
9428 			  tr, &tracing_entries_fops);
9429 
9430 	trace_create_file("buffer_total_size_kb", TRACE_MODE_READ, d_tracer,
9431 			  tr, &tracing_total_entries_fops);
9432 
9433 	trace_create_file("free_buffer", 0200, d_tracer,
9434 			  tr, &tracing_free_buffer_fops);
9435 
9436 	trace_create_file("trace_marker", 0220, d_tracer,
9437 			  tr, &tracing_mark_fops);
9438 
9439 	file = __find_event_file(tr, "ftrace", "print");
9440 	if (file && file->dir)
9441 		trace_create_file("trigger", TRACE_MODE_WRITE, file->dir,
9442 				  file, &event_trigger_fops);
9443 	tr->trace_marker_file = file;
9444 
9445 	trace_create_file("trace_marker_raw", 0220, d_tracer,
9446 			  tr, &tracing_mark_raw_fops);
9447 
9448 	trace_create_file("trace_clock", TRACE_MODE_WRITE, d_tracer, tr,
9449 			  &trace_clock_fops);
9450 
9451 	trace_create_file("tracing_on", TRACE_MODE_WRITE, d_tracer,
9452 			  tr, &rb_simple_fops);
9453 
9454 	trace_create_file("timestamp_mode", TRACE_MODE_READ, d_tracer, tr,
9455 			  &trace_time_stamp_mode_fops);
9456 
9457 	tr->buffer_percent = 50;
9458 
9459 	trace_create_file("buffer_percent", TRACE_MODE_READ, d_tracer,
9460 			tr, &buffer_percent_fops);
9461 
9462 	create_trace_options_dir(tr);
9463 
9464 #if defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)
9465 	trace_create_maxlat_file(tr, d_tracer);
9466 #endif
9467 
9468 	if (ftrace_create_function_files(tr, d_tracer))
9469 		MEM_FAIL(1, "Could not allocate function filter files");
9470 
9471 #ifdef CONFIG_TRACER_SNAPSHOT
9472 	trace_create_file("snapshot", TRACE_MODE_WRITE, d_tracer,
9473 			  tr, &snapshot_fops);
9474 #endif
9475 
9476 	trace_create_file("error_log", TRACE_MODE_WRITE, d_tracer,
9477 			  tr, &tracing_err_log_fops);
9478 
9479 	for_each_tracing_cpu(cpu)
9480 		tracing_init_tracefs_percpu(tr, cpu);
9481 
9482 	ftrace_init_tracefs(tr, d_tracer);
9483 }
9484 
9485 static struct vfsmount *trace_automount(struct dentry *mntpt, void *ingore)
9486 {
9487 	struct vfsmount *mnt;
9488 	struct file_system_type *type;
9489 
9490 	/*
9491 	 * To maintain backward compatibility for tools that mount
9492 	 * debugfs to get to the tracing facility, tracefs is automatically
9493 	 * mounted to the debugfs/tracing directory.
9494 	 */
9495 	type = get_fs_type("tracefs");
9496 	if (!type)
9497 		return NULL;
9498 	mnt = vfs_submount(mntpt, type, "tracefs", NULL);
9499 	put_filesystem(type);
9500 	if (IS_ERR(mnt))
9501 		return NULL;
9502 	mntget(mnt);
9503 
9504 	return mnt;
9505 }
9506 
9507 /**
9508  * tracing_init_dentry - initialize top level trace array
9509  *
9510  * This is called when creating files or directories in the tracing
9511  * directory. It is called via fs_initcall() by any of the boot up code
9512  * and expects to return the dentry of the top level tracing directory.
9513  */
9514 int tracing_init_dentry(void)
9515 {
9516 	struct trace_array *tr = &global_trace;
9517 
9518 	if (security_locked_down(LOCKDOWN_TRACEFS)) {
9519 		pr_warn("Tracing disabled due to lockdown\n");
9520 		return -EPERM;
9521 	}
9522 
9523 	/* The top level trace array uses  NULL as parent */
9524 	if (tr->dir)
9525 		return 0;
9526 
9527 	if (WARN_ON(!tracefs_initialized()))
9528 		return -ENODEV;
9529 
9530 	/*
9531 	 * As there may still be users that expect the tracing
9532 	 * files to exist in debugfs/tracing, we must automount
9533 	 * the tracefs file system there, so older tools still
9534 	 * work with the newer kernel.
9535 	 */
9536 	tr->dir = debugfs_create_automount("tracing", NULL,
9537 					   trace_automount, NULL);
9538 
9539 	return 0;
9540 }
9541 
9542 extern struct trace_eval_map *__start_ftrace_eval_maps[];
9543 extern struct trace_eval_map *__stop_ftrace_eval_maps[];
9544 
9545 static struct workqueue_struct *eval_map_wq __initdata;
9546 static struct work_struct eval_map_work __initdata;
9547 
9548 static void __init eval_map_work_func(struct work_struct *work)
9549 {
9550 	int len;
9551 
9552 	len = __stop_ftrace_eval_maps - __start_ftrace_eval_maps;
9553 	trace_insert_eval_map(NULL, __start_ftrace_eval_maps, len);
9554 }
9555 
9556 static int __init trace_eval_init(void)
9557 {
9558 	INIT_WORK(&eval_map_work, eval_map_work_func);
9559 
9560 	eval_map_wq = alloc_workqueue("eval_map_wq", WQ_UNBOUND, 0);
9561 	if (!eval_map_wq) {
9562 		pr_err("Unable to allocate eval_map_wq\n");
9563 		/* Do work here */
9564 		eval_map_work_func(&eval_map_work);
9565 		return -ENOMEM;
9566 	}
9567 
9568 	queue_work(eval_map_wq, &eval_map_work);
9569 	return 0;
9570 }
9571 
9572 static int __init trace_eval_sync(void)
9573 {
9574 	/* Make sure the eval map updates are finished */
9575 	if (eval_map_wq)
9576 		destroy_workqueue(eval_map_wq);
9577 	return 0;
9578 }
9579 
9580 late_initcall_sync(trace_eval_sync);
9581 
9582 
9583 #ifdef CONFIG_MODULES
9584 static void trace_module_add_evals(struct module *mod)
9585 {
9586 	if (!mod->num_trace_evals)
9587 		return;
9588 
9589 	/*
9590 	 * Modules with bad taint do not have events created, do
9591 	 * not bother with enums either.
9592 	 */
9593 	if (trace_module_has_bad_taint(mod))
9594 		return;
9595 
9596 	trace_insert_eval_map(mod, mod->trace_evals, mod->num_trace_evals);
9597 }
9598 
9599 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
9600 static void trace_module_remove_evals(struct module *mod)
9601 {
9602 	union trace_eval_map_item *map;
9603 	union trace_eval_map_item **last = &trace_eval_maps;
9604 
9605 	if (!mod->num_trace_evals)
9606 		return;
9607 
9608 	mutex_lock(&trace_eval_mutex);
9609 
9610 	map = trace_eval_maps;
9611 
9612 	while (map) {
9613 		if (map->head.mod == mod)
9614 			break;
9615 		map = trace_eval_jmp_to_tail(map);
9616 		last = &map->tail.next;
9617 		map = map->tail.next;
9618 	}
9619 	if (!map)
9620 		goto out;
9621 
9622 	*last = trace_eval_jmp_to_tail(map)->tail.next;
9623 	kfree(map);
9624  out:
9625 	mutex_unlock(&trace_eval_mutex);
9626 }
9627 #else
9628 static inline void trace_module_remove_evals(struct module *mod) { }
9629 #endif /* CONFIG_TRACE_EVAL_MAP_FILE */
9630 
9631 static int trace_module_notify(struct notifier_block *self,
9632 			       unsigned long val, void *data)
9633 {
9634 	struct module *mod = data;
9635 
9636 	switch (val) {
9637 	case MODULE_STATE_COMING:
9638 		trace_module_add_evals(mod);
9639 		break;
9640 	case MODULE_STATE_GOING:
9641 		trace_module_remove_evals(mod);
9642 		break;
9643 	}
9644 
9645 	return NOTIFY_OK;
9646 }
9647 
9648 static struct notifier_block trace_module_nb = {
9649 	.notifier_call = trace_module_notify,
9650 	.priority = 0,
9651 };
9652 #endif /* CONFIG_MODULES */
9653 
9654 static __init int tracer_init_tracefs(void)
9655 {
9656 	int ret;
9657 
9658 	trace_access_lock_init();
9659 
9660 	ret = tracing_init_dentry();
9661 	if (ret)
9662 		return 0;
9663 
9664 	event_trace_init();
9665 
9666 	init_tracer_tracefs(&global_trace, NULL);
9667 	ftrace_init_tracefs_toplevel(&global_trace, NULL);
9668 
9669 	trace_create_file("tracing_thresh", TRACE_MODE_WRITE, NULL,
9670 			&global_trace, &tracing_thresh_fops);
9671 
9672 	trace_create_file("README", TRACE_MODE_READ, NULL,
9673 			NULL, &tracing_readme_fops);
9674 
9675 	trace_create_file("saved_cmdlines", TRACE_MODE_READ, NULL,
9676 			NULL, &tracing_saved_cmdlines_fops);
9677 
9678 	trace_create_file("saved_cmdlines_size", TRACE_MODE_WRITE, NULL,
9679 			  NULL, &tracing_saved_cmdlines_size_fops);
9680 
9681 	trace_create_file("saved_tgids", TRACE_MODE_READ, NULL,
9682 			NULL, &tracing_saved_tgids_fops);
9683 
9684 	trace_eval_init();
9685 
9686 	trace_create_eval_file(NULL);
9687 
9688 #ifdef CONFIG_MODULES
9689 	register_module_notifier(&trace_module_nb);
9690 #endif
9691 
9692 #ifdef CONFIG_DYNAMIC_FTRACE
9693 	trace_create_file("dyn_ftrace_total_info", TRACE_MODE_READ, NULL,
9694 			NULL, &tracing_dyn_info_fops);
9695 #endif
9696 
9697 	create_trace_instances(NULL);
9698 
9699 	update_tracer_options(&global_trace);
9700 
9701 	return 0;
9702 }
9703 
9704 fs_initcall(tracer_init_tracefs);
9705 
9706 static int trace_panic_handler(struct notifier_block *this,
9707 			       unsigned long event, void *unused)
9708 {
9709 	if (ftrace_dump_on_oops)
9710 		ftrace_dump(ftrace_dump_on_oops);
9711 	return NOTIFY_OK;
9712 }
9713 
9714 static struct notifier_block trace_panic_notifier = {
9715 	.notifier_call  = trace_panic_handler,
9716 	.next           = NULL,
9717 	.priority       = 150   /* priority: INT_MAX >= x >= 0 */
9718 };
9719 
9720 static int trace_die_handler(struct notifier_block *self,
9721 			     unsigned long val,
9722 			     void *data)
9723 {
9724 	switch (val) {
9725 	case DIE_OOPS:
9726 		if (ftrace_dump_on_oops)
9727 			ftrace_dump(ftrace_dump_on_oops);
9728 		break;
9729 	default:
9730 		break;
9731 	}
9732 	return NOTIFY_OK;
9733 }
9734 
9735 static struct notifier_block trace_die_notifier = {
9736 	.notifier_call = trace_die_handler,
9737 	.priority = 200
9738 };
9739 
9740 /*
9741  * printk is set to max of 1024, we really don't need it that big.
9742  * Nothing should be printing 1000 characters anyway.
9743  */
9744 #define TRACE_MAX_PRINT		1000
9745 
9746 /*
9747  * Define here KERN_TRACE so that we have one place to modify
9748  * it if we decide to change what log level the ftrace dump
9749  * should be at.
9750  */
9751 #define KERN_TRACE		KERN_EMERG
9752 
9753 void
9754 trace_printk_seq(struct trace_seq *s)
9755 {
9756 	/* Probably should print a warning here. */
9757 	if (s->seq.len >= TRACE_MAX_PRINT)
9758 		s->seq.len = TRACE_MAX_PRINT;
9759 
9760 	/*
9761 	 * More paranoid code. Although the buffer size is set to
9762 	 * PAGE_SIZE, and TRACE_MAX_PRINT is 1000, this is just
9763 	 * an extra layer of protection.
9764 	 */
9765 	if (WARN_ON_ONCE(s->seq.len >= s->seq.size))
9766 		s->seq.len = s->seq.size - 1;
9767 
9768 	/* should be zero ended, but we are paranoid. */
9769 	s->buffer[s->seq.len] = 0;
9770 
9771 	printk(KERN_TRACE "%s", s->buffer);
9772 
9773 	trace_seq_init(s);
9774 }
9775 
9776 void trace_init_global_iter(struct trace_iterator *iter)
9777 {
9778 	iter->tr = &global_trace;
9779 	iter->trace = iter->tr->current_trace;
9780 	iter->cpu_file = RING_BUFFER_ALL_CPUS;
9781 	iter->array_buffer = &global_trace.array_buffer;
9782 
9783 	if (iter->trace && iter->trace->open)
9784 		iter->trace->open(iter);
9785 
9786 	/* Annotate start of buffers if we had overruns */
9787 	if (ring_buffer_overruns(iter->array_buffer->buffer))
9788 		iter->iter_flags |= TRACE_FILE_ANNOTATE;
9789 
9790 	/* Output in nanoseconds only if we are using a clock in nanoseconds. */
9791 	if (trace_clocks[iter->tr->clock_id].in_ns)
9792 		iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
9793 }
9794 
9795 void ftrace_dump(enum ftrace_dump_mode oops_dump_mode)
9796 {
9797 	/* use static because iter can be a bit big for the stack */
9798 	static struct trace_iterator iter;
9799 	static atomic_t dump_running;
9800 	struct trace_array *tr = &global_trace;
9801 	unsigned int old_userobj;
9802 	unsigned long flags;
9803 	int cnt = 0, cpu;
9804 
9805 	/* Only allow one dump user at a time. */
9806 	if (atomic_inc_return(&dump_running) != 1) {
9807 		atomic_dec(&dump_running);
9808 		return;
9809 	}
9810 
9811 	/*
9812 	 * Always turn off tracing when we dump.
9813 	 * We don't need to show trace output of what happens
9814 	 * between multiple crashes.
9815 	 *
9816 	 * If the user does a sysrq-z, then they can re-enable
9817 	 * tracing with echo 1 > tracing_on.
9818 	 */
9819 	tracing_off();
9820 
9821 	local_irq_save(flags);
9822 
9823 	/* Simulate the iterator */
9824 	trace_init_global_iter(&iter);
9825 	/* Can not use kmalloc for iter.temp and iter.fmt */
9826 	iter.temp = static_temp_buf;
9827 	iter.temp_size = STATIC_TEMP_BUF_SIZE;
9828 	iter.fmt = static_fmt_buf;
9829 	iter.fmt_size = STATIC_FMT_BUF_SIZE;
9830 
9831 	for_each_tracing_cpu(cpu) {
9832 		atomic_inc(&per_cpu_ptr(iter.array_buffer->data, cpu)->disabled);
9833 	}
9834 
9835 	old_userobj = tr->trace_flags & TRACE_ITER_SYM_USEROBJ;
9836 
9837 	/* don't look at user memory in panic mode */
9838 	tr->trace_flags &= ~TRACE_ITER_SYM_USEROBJ;
9839 
9840 	switch (oops_dump_mode) {
9841 	case DUMP_ALL:
9842 		iter.cpu_file = RING_BUFFER_ALL_CPUS;
9843 		break;
9844 	case DUMP_ORIG:
9845 		iter.cpu_file = raw_smp_processor_id();
9846 		break;
9847 	case DUMP_NONE:
9848 		goto out_enable;
9849 	default:
9850 		printk(KERN_TRACE "Bad dumping mode, switching to all CPUs dump\n");
9851 		iter.cpu_file = RING_BUFFER_ALL_CPUS;
9852 	}
9853 
9854 	printk(KERN_TRACE "Dumping ftrace buffer:\n");
9855 
9856 	/* Did function tracer already get disabled? */
9857 	if (ftrace_is_dead()) {
9858 		printk("# WARNING: FUNCTION TRACING IS CORRUPTED\n");
9859 		printk("#          MAY BE MISSING FUNCTION EVENTS\n");
9860 	}
9861 
9862 	/*
9863 	 * We need to stop all tracing on all CPUS to read
9864 	 * the next buffer. This is a bit expensive, but is
9865 	 * not done often. We fill all what we can read,
9866 	 * and then release the locks again.
9867 	 */
9868 
9869 	while (!trace_empty(&iter)) {
9870 
9871 		if (!cnt)
9872 			printk(KERN_TRACE "---------------------------------\n");
9873 
9874 		cnt++;
9875 
9876 		trace_iterator_reset(&iter);
9877 		iter.iter_flags |= TRACE_FILE_LAT_FMT;
9878 
9879 		if (trace_find_next_entry_inc(&iter) != NULL) {
9880 			int ret;
9881 
9882 			ret = print_trace_line(&iter);
9883 			if (ret != TRACE_TYPE_NO_CONSUME)
9884 				trace_consume(&iter);
9885 		}
9886 		touch_nmi_watchdog();
9887 
9888 		trace_printk_seq(&iter.seq);
9889 	}
9890 
9891 	if (!cnt)
9892 		printk(KERN_TRACE "   (ftrace buffer empty)\n");
9893 	else
9894 		printk(KERN_TRACE "---------------------------------\n");
9895 
9896  out_enable:
9897 	tr->trace_flags |= old_userobj;
9898 
9899 	for_each_tracing_cpu(cpu) {
9900 		atomic_dec(&per_cpu_ptr(iter.array_buffer->data, cpu)->disabled);
9901 	}
9902 	atomic_dec(&dump_running);
9903 	local_irq_restore(flags);
9904 }
9905 EXPORT_SYMBOL_GPL(ftrace_dump);
9906 
9907 #define WRITE_BUFSIZE  4096
9908 
9909 ssize_t trace_parse_run_command(struct file *file, const char __user *buffer,
9910 				size_t count, loff_t *ppos,
9911 				int (*createfn)(const char *))
9912 {
9913 	char *kbuf, *buf, *tmp;
9914 	int ret = 0;
9915 	size_t done = 0;
9916 	size_t size;
9917 
9918 	kbuf = kmalloc(WRITE_BUFSIZE, GFP_KERNEL);
9919 	if (!kbuf)
9920 		return -ENOMEM;
9921 
9922 	while (done < count) {
9923 		size = count - done;
9924 
9925 		if (size >= WRITE_BUFSIZE)
9926 			size = WRITE_BUFSIZE - 1;
9927 
9928 		if (copy_from_user(kbuf, buffer + done, size)) {
9929 			ret = -EFAULT;
9930 			goto out;
9931 		}
9932 		kbuf[size] = '\0';
9933 		buf = kbuf;
9934 		do {
9935 			tmp = strchr(buf, '\n');
9936 			if (tmp) {
9937 				*tmp = '\0';
9938 				size = tmp - buf + 1;
9939 			} else {
9940 				size = strlen(buf);
9941 				if (done + size < count) {
9942 					if (buf != kbuf)
9943 						break;
9944 					/* This can accept WRITE_BUFSIZE - 2 ('\n' + '\0') */
9945 					pr_warn("Line length is too long: Should be less than %d\n",
9946 						WRITE_BUFSIZE - 2);
9947 					ret = -EINVAL;
9948 					goto out;
9949 				}
9950 			}
9951 			done += size;
9952 
9953 			/* Remove comments */
9954 			tmp = strchr(buf, '#');
9955 
9956 			if (tmp)
9957 				*tmp = '\0';
9958 
9959 			ret = createfn(buf);
9960 			if (ret)
9961 				goto out;
9962 			buf += size;
9963 
9964 		} while (done < count);
9965 	}
9966 	ret = done;
9967 
9968 out:
9969 	kfree(kbuf);
9970 
9971 	return ret;
9972 }
9973 
9974 __init static int tracer_alloc_buffers(void)
9975 {
9976 	int ring_buf_size;
9977 	int ret = -ENOMEM;
9978 
9979 
9980 	if (security_locked_down(LOCKDOWN_TRACEFS)) {
9981 		pr_warn("Tracing disabled due to lockdown\n");
9982 		return -EPERM;
9983 	}
9984 
9985 	/*
9986 	 * Make sure we don't accidentally add more trace options
9987 	 * than we have bits for.
9988 	 */
9989 	BUILD_BUG_ON(TRACE_ITER_LAST_BIT > TRACE_FLAGS_MAX_SIZE);
9990 
9991 	if (!alloc_cpumask_var(&tracing_buffer_mask, GFP_KERNEL))
9992 		goto out;
9993 
9994 	if (!alloc_cpumask_var(&global_trace.tracing_cpumask, GFP_KERNEL))
9995 		goto out_free_buffer_mask;
9996 
9997 	/* Only allocate trace_printk buffers if a trace_printk exists */
9998 	if (&__stop___trace_bprintk_fmt != &__start___trace_bprintk_fmt)
9999 		/* Must be called before global_trace.buffer is allocated */
10000 		trace_printk_init_buffers();
10001 
10002 	/* To save memory, keep the ring buffer size to its minimum */
10003 	if (ring_buffer_expanded)
10004 		ring_buf_size = trace_buf_size;
10005 	else
10006 		ring_buf_size = 1;
10007 
10008 	cpumask_copy(tracing_buffer_mask, cpu_possible_mask);
10009 	cpumask_copy(global_trace.tracing_cpumask, cpu_all_mask);
10010 
10011 	raw_spin_lock_init(&global_trace.start_lock);
10012 
10013 	/*
10014 	 * The prepare callbacks allocates some memory for the ring buffer. We
10015 	 * don't free the buffer if the CPU goes down. If we were to free
10016 	 * the buffer, then the user would lose any trace that was in the
10017 	 * buffer. The memory will be removed once the "instance" is removed.
10018 	 */
10019 	ret = cpuhp_setup_state_multi(CPUHP_TRACE_RB_PREPARE,
10020 				      "trace/RB:preapre", trace_rb_cpu_prepare,
10021 				      NULL);
10022 	if (ret < 0)
10023 		goto out_free_cpumask;
10024 	/* Used for event triggers */
10025 	ret = -ENOMEM;
10026 	temp_buffer = ring_buffer_alloc(PAGE_SIZE, RB_FL_OVERWRITE);
10027 	if (!temp_buffer)
10028 		goto out_rm_hp_state;
10029 
10030 	if (trace_create_savedcmd() < 0)
10031 		goto out_free_temp_buffer;
10032 
10033 	/* TODO: make the number of buffers hot pluggable with CPUS */
10034 	if (allocate_trace_buffers(&global_trace, ring_buf_size) < 0) {
10035 		MEM_FAIL(1, "tracer: failed to allocate ring buffer!\n");
10036 		goto out_free_savedcmd;
10037 	}
10038 
10039 	if (global_trace.buffer_disabled)
10040 		tracing_off();
10041 
10042 	if (trace_boot_clock) {
10043 		ret = tracing_set_clock(&global_trace, trace_boot_clock);
10044 		if (ret < 0)
10045 			pr_warn("Trace clock %s not defined, going back to default\n",
10046 				trace_boot_clock);
10047 	}
10048 
10049 	/*
10050 	 * register_tracer() might reference current_trace, so it
10051 	 * needs to be set before we register anything. This is
10052 	 * just a bootstrap of current_trace anyway.
10053 	 */
10054 	global_trace.current_trace = &nop_trace;
10055 
10056 	global_trace.max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
10057 
10058 	ftrace_init_global_array_ops(&global_trace);
10059 
10060 	init_trace_flags_index(&global_trace);
10061 
10062 	register_tracer(&nop_trace);
10063 
10064 	/* Function tracing may start here (via kernel command line) */
10065 	init_function_trace();
10066 
10067 	/* All seems OK, enable tracing */
10068 	tracing_disabled = 0;
10069 
10070 	atomic_notifier_chain_register(&panic_notifier_list,
10071 				       &trace_panic_notifier);
10072 
10073 	register_die_notifier(&trace_die_notifier);
10074 
10075 	global_trace.flags = TRACE_ARRAY_FL_GLOBAL;
10076 
10077 	INIT_LIST_HEAD(&global_trace.systems);
10078 	INIT_LIST_HEAD(&global_trace.events);
10079 	INIT_LIST_HEAD(&global_trace.hist_vars);
10080 	INIT_LIST_HEAD(&global_trace.err_log);
10081 	list_add(&global_trace.list, &ftrace_trace_arrays);
10082 
10083 	apply_trace_boot_options();
10084 
10085 	register_snapshot_cmd();
10086 
10087 	test_can_verify();
10088 
10089 	return 0;
10090 
10091 out_free_savedcmd:
10092 	free_saved_cmdlines_buffer(savedcmd);
10093 out_free_temp_buffer:
10094 	ring_buffer_free(temp_buffer);
10095 out_rm_hp_state:
10096 	cpuhp_remove_multi_state(CPUHP_TRACE_RB_PREPARE);
10097 out_free_cpumask:
10098 	free_cpumask_var(global_trace.tracing_cpumask);
10099 out_free_buffer_mask:
10100 	free_cpumask_var(tracing_buffer_mask);
10101 out:
10102 	return ret;
10103 }
10104 
10105 void __init early_trace_init(void)
10106 {
10107 	if (tracepoint_printk) {
10108 		tracepoint_print_iter =
10109 			kzalloc(sizeof(*tracepoint_print_iter), GFP_KERNEL);
10110 		if (MEM_FAIL(!tracepoint_print_iter,
10111 			     "Failed to allocate trace iterator\n"))
10112 			tracepoint_printk = 0;
10113 		else
10114 			static_key_enable(&tracepoint_printk_key.key);
10115 	}
10116 	tracer_alloc_buffers();
10117 }
10118 
10119 void __init trace_init(void)
10120 {
10121 	trace_event_init();
10122 }
10123 
10124 __init static void clear_boot_tracer(void)
10125 {
10126 	/*
10127 	 * The default tracer at boot buffer is an init section.
10128 	 * This function is called in lateinit. If we did not
10129 	 * find the boot tracer, then clear it out, to prevent
10130 	 * later registration from accessing the buffer that is
10131 	 * about to be freed.
10132 	 */
10133 	if (!default_bootup_tracer)
10134 		return;
10135 
10136 	printk(KERN_INFO "ftrace bootup tracer '%s' not registered.\n",
10137 	       default_bootup_tracer);
10138 	default_bootup_tracer = NULL;
10139 }
10140 
10141 #ifdef CONFIG_HAVE_UNSTABLE_SCHED_CLOCK
10142 __init static void tracing_set_default_clock(void)
10143 {
10144 	/* sched_clock_stable() is determined in late_initcall */
10145 	if (!trace_boot_clock && !sched_clock_stable()) {
10146 		if (security_locked_down(LOCKDOWN_TRACEFS)) {
10147 			pr_warn("Can not set tracing clock due to lockdown\n");
10148 			return;
10149 		}
10150 
10151 		printk(KERN_WARNING
10152 		       "Unstable clock detected, switching default tracing clock to \"global\"\n"
10153 		       "If you want to keep using the local clock, then add:\n"
10154 		       "  \"trace_clock=local\"\n"
10155 		       "on the kernel command line\n");
10156 		tracing_set_clock(&global_trace, "global");
10157 	}
10158 }
10159 #else
10160 static inline void tracing_set_default_clock(void) { }
10161 #endif
10162 
10163 __init static int late_trace_init(void)
10164 {
10165 	if (tracepoint_printk && tracepoint_printk_stop_on_boot) {
10166 		static_key_disable(&tracepoint_printk_key.key);
10167 		tracepoint_printk = 0;
10168 	}
10169 
10170 	tracing_set_default_clock();
10171 	clear_boot_tracer();
10172 	return 0;
10173 }
10174 
10175 late_initcall_sync(late_trace_init);
10176