xref: /linux/kernel/trace/trace.c (revision 314f14abdeca78de6b16f97d796a9966ce4b90ae)
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * ring buffer based function tracer
4  *
5  * Copyright (C) 2007-2012 Steven Rostedt <srostedt@redhat.com>
6  * Copyright (C) 2008 Ingo Molnar <mingo@redhat.com>
7  *
8  * Originally taken from the RT patch by:
9  *    Arnaldo Carvalho de Melo <acme@redhat.com>
10  *
11  * Based on code from the latency_tracer, that is:
12  *  Copyright (C) 2004-2006 Ingo Molnar
13  *  Copyright (C) 2004 Nadia Yvette Chambers
14  */
15 #include <linux/ring_buffer.h>
16 #include <generated/utsrelease.h>
17 #include <linux/stacktrace.h>
18 #include <linux/writeback.h>
19 #include <linux/kallsyms.h>
20 #include <linux/security.h>
21 #include <linux/seq_file.h>
22 #include <linux/notifier.h>
23 #include <linux/irqflags.h>
24 #include <linux/debugfs.h>
25 #include <linux/tracefs.h>
26 #include <linux/pagemap.h>
27 #include <linux/hardirq.h>
28 #include <linux/linkage.h>
29 #include <linux/uaccess.h>
30 #include <linux/vmalloc.h>
31 #include <linux/ftrace.h>
32 #include <linux/module.h>
33 #include <linux/percpu.h>
34 #include <linux/splice.h>
35 #include <linux/kdebug.h>
36 #include <linux/string.h>
37 #include <linux/mount.h>
38 #include <linux/rwsem.h>
39 #include <linux/slab.h>
40 #include <linux/ctype.h>
41 #include <linux/init.h>
42 #include <linux/panic_notifier.h>
43 #include <linux/poll.h>
44 #include <linux/nmi.h>
45 #include <linux/fs.h>
46 #include <linux/trace.h>
47 #include <linux/sched/clock.h>
48 #include <linux/sched/rt.h>
49 #include <linux/fsnotify.h>
50 #include <linux/irq_work.h>
51 #include <linux/workqueue.h>
52 
53 #include "trace.h"
54 #include "trace_output.h"
55 
56 /*
57  * On boot up, the ring buffer is set to the minimum size, so that
58  * we do not waste memory on systems that are not using tracing.
59  */
60 bool ring_buffer_expanded;
61 
62 /*
63  * We need to change this state when a selftest is running.
64  * A selftest will lurk into the ring-buffer to count the
65  * entries inserted during the selftest although some concurrent
66  * insertions into the ring-buffer such as trace_printk could occurred
67  * at the same time, giving false positive or negative results.
68  */
69 static bool __read_mostly tracing_selftest_running;
70 
71 /*
72  * If boot-time tracing including tracers/events via kernel cmdline
73  * is running, we do not want to run SELFTEST.
74  */
75 bool __read_mostly tracing_selftest_disabled;
76 
77 #ifdef CONFIG_FTRACE_STARTUP_TEST
78 void __init disable_tracing_selftest(const char *reason)
79 {
80 	if (!tracing_selftest_disabled) {
81 		tracing_selftest_disabled = true;
82 		pr_info("Ftrace startup test is disabled due to %s\n", reason);
83 	}
84 }
85 #endif
86 
87 /* Pipe tracepoints to printk */
88 struct trace_iterator *tracepoint_print_iter;
89 int tracepoint_printk;
90 static bool tracepoint_printk_stop_on_boot __initdata;
91 static DEFINE_STATIC_KEY_FALSE(tracepoint_printk_key);
92 
93 /* For tracers that don't implement custom flags */
94 static struct tracer_opt dummy_tracer_opt[] = {
95 	{ }
96 };
97 
98 static int
99 dummy_set_flag(struct trace_array *tr, u32 old_flags, u32 bit, int set)
100 {
101 	return 0;
102 }
103 
104 /*
105  * To prevent the comm cache from being overwritten when no
106  * tracing is active, only save the comm when a trace event
107  * occurred.
108  */
109 static DEFINE_PER_CPU(bool, trace_taskinfo_save);
110 
111 /*
112  * Kill all tracing for good (never come back).
113  * It is initialized to 1 but will turn to zero if the initialization
114  * of the tracer is successful. But that is the only place that sets
115  * this back to zero.
116  */
117 static int tracing_disabled = 1;
118 
119 cpumask_var_t __read_mostly	tracing_buffer_mask;
120 
121 /*
122  * ftrace_dump_on_oops - variable to dump ftrace buffer on oops
123  *
124  * If there is an oops (or kernel panic) and the ftrace_dump_on_oops
125  * is set, then ftrace_dump is called. This will output the contents
126  * of the ftrace buffers to the console.  This is very useful for
127  * capturing traces that lead to crashes and outputing it to a
128  * serial console.
129  *
130  * It is default off, but you can enable it with either specifying
131  * "ftrace_dump_on_oops" in the kernel command line, or setting
132  * /proc/sys/kernel/ftrace_dump_on_oops
133  * Set 1 if you want to dump buffers of all CPUs
134  * Set 2 if you want to dump the buffer of the CPU that triggered oops
135  */
136 
137 enum ftrace_dump_mode ftrace_dump_on_oops;
138 
139 /* When set, tracing will stop when a WARN*() is hit */
140 int __disable_trace_on_warning;
141 
142 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
143 /* Map of enums to their values, for "eval_map" file */
144 struct trace_eval_map_head {
145 	struct module			*mod;
146 	unsigned long			length;
147 };
148 
149 union trace_eval_map_item;
150 
151 struct trace_eval_map_tail {
152 	/*
153 	 * "end" is first and points to NULL as it must be different
154 	 * than "mod" or "eval_string"
155 	 */
156 	union trace_eval_map_item	*next;
157 	const char			*end;	/* points to NULL */
158 };
159 
160 static DEFINE_MUTEX(trace_eval_mutex);
161 
162 /*
163  * The trace_eval_maps are saved in an array with two extra elements,
164  * one at the beginning, and one at the end. The beginning item contains
165  * the count of the saved maps (head.length), and the module they
166  * belong to if not built in (head.mod). The ending item contains a
167  * pointer to the next array of saved eval_map items.
168  */
169 union trace_eval_map_item {
170 	struct trace_eval_map		map;
171 	struct trace_eval_map_head	head;
172 	struct trace_eval_map_tail	tail;
173 };
174 
175 static union trace_eval_map_item *trace_eval_maps;
176 #endif /* CONFIG_TRACE_EVAL_MAP_FILE */
177 
178 int tracing_set_tracer(struct trace_array *tr, const char *buf);
179 static void ftrace_trace_userstack(struct trace_array *tr,
180 				   struct trace_buffer *buffer,
181 				   unsigned int trace_ctx);
182 
183 #define MAX_TRACER_SIZE		100
184 static char bootup_tracer_buf[MAX_TRACER_SIZE] __initdata;
185 static char *default_bootup_tracer;
186 
187 static bool allocate_snapshot;
188 
189 static int __init set_cmdline_ftrace(char *str)
190 {
191 	strlcpy(bootup_tracer_buf, str, MAX_TRACER_SIZE);
192 	default_bootup_tracer = bootup_tracer_buf;
193 	/* We are using ftrace early, expand it */
194 	ring_buffer_expanded = true;
195 	return 1;
196 }
197 __setup("ftrace=", set_cmdline_ftrace);
198 
199 static int __init set_ftrace_dump_on_oops(char *str)
200 {
201 	if (*str++ != '=' || !*str || !strcmp("1", str)) {
202 		ftrace_dump_on_oops = DUMP_ALL;
203 		return 1;
204 	}
205 
206 	if (!strcmp("orig_cpu", str) || !strcmp("2", str)) {
207 		ftrace_dump_on_oops = DUMP_ORIG;
208                 return 1;
209         }
210 
211         return 0;
212 }
213 __setup("ftrace_dump_on_oops", set_ftrace_dump_on_oops);
214 
215 static int __init stop_trace_on_warning(char *str)
216 {
217 	if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
218 		__disable_trace_on_warning = 1;
219 	return 1;
220 }
221 __setup("traceoff_on_warning", stop_trace_on_warning);
222 
223 static int __init boot_alloc_snapshot(char *str)
224 {
225 	allocate_snapshot = true;
226 	/* We also need the main ring buffer expanded */
227 	ring_buffer_expanded = true;
228 	return 1;
229 }
230 __setup("alloc_snapshot", boot_alloc_snapshot);
231 
232 
233 static char trace_boot_options_buf[MAX_TRACER_SIZE] __initdata;
234 
235 static int __init set_trace_boot_options(char *str)
236 {
237 	strlcpy(trace_boot_options_buf, str, MAX_TRACER_SIZE);
238 	return 0;
239 }
240 __setup("trace_options=", set_trace_boot_options);
241 
242 static char trace_boot_clock_buf[MAX_TRACER_SIZE] __initdata;
243 static char *trace_boot_clock __initdata;
244 
245 static int __init set_trace_boot_clock(char *str)
246 {
247 	strlcpy(trace_boot_clock_buf, str, MAX_TRACER_SIZE);
248 	trace_boot_clock = trace_boot_clock_buf;
249 	return 0;
250 }
251 __setup("trace_clock=", set_trace_boot_clock);
252 
253 static int __init set_tracepoint_printk(char *str)
254 {
255 	if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
256 		tracepoint_printk = 1;
257 	return 1;
258 }
259 __setup("tp_printk", set_tracepoint_printk);
260 
261 static int __init set_tracepoint_printk_stop(char *str)
262 {
263 	tracepoint_printk_stop_on_boot = true;
264 	return 1;
265 }
266 __setup("tp_printk_stop_on_boot", set_tracepoint_printk_stop);
267 
268 unsigned long long ns2usecs(u64 nsec)
269 {
270 	nsec += 500;
271 	do_div(nsec, 1000);
272 	return nsec;
273 }
274 
275 static void
276 trace_process_export(struct trace_export *export,
277 	       struct ring_buffer_event *event, int flag)
278 {
279 	struct trace_entry *entry;
280 	unsigned int size = 0;
281 
282 	if (export->flags & flag) {
283 		entry = ring_buffer_event_data(event);
284 		size = ring_buffer_event_length(event);
285 		export->write(export, entry, size);
286 	}
287 }
288 
289 static DEFINE_MUTEX(ftrace_export_lock);
290 
291 static struct trace_export __rcu *ftrace_exports_list __read_mostly;
292 
293 static DEFINE_STATIC_KEY_FALSE(trace_function_exports_enabled);
294 static DEFINE_STATIC_KEY_FALSE(trace_event_exports_enabled);
295 static DEFINE_STATIC_KEY_FALSE(trace_marker_exports_enabled);
296 
297 static inline void ftrace_exports_enable(struct trace_export *export)
298 {
299 	if (export->flags & TRACE_EXPORT_FUNCTION)
300 		static_branch_inc(&trace_function_exports_enabled);
301 
302 	if (export->flags & TRACE_EXPORT_EVENT)
303 		static_branch_inc(&trace_event_exports_enabled);
304 
305 	if (export->flags & TRACE_EXPORT_MARKER)
306 		static_branch_inc(&trace_marker_exports_enabled);
307 }
308 
309 static inline void ftrace_exports_disable(struct trace_export *export)
310 {
311 	if (export->flags & TRACE_EXPORT_FUNCTION)
312 		static_branch_dec(&trace_function_exports_enabled);
313 
314 	if (export->flags & TRACE_EXPORT_EVENT)
315 		static_branch_dec(&trace_event_exports_enabled);
316 
317 	if (export->flags & TRACE_EXPORT_MARKER)
318 		static_branch_dec(&trace_marker_exports_enabled);
319 }
320 
321 static void ftrace_exports(struct ring_buffer_event *event, int flag)
322 {
323 	struct trace_export *export;
324 
325 	preempt_disable_notrace();
326 
327 	export = rcu_dereference_raw_check(ftrace_exports_list);
328 	while (export) {
329 		trace_process_export(export, event, flag);
330 		export = rcu_dereference_raw_check(export->next);
331 	}
332 
333 	preempt_enable_notrace();
334 }
335 
336 static inline void
337 add_trace_export(struct trace_export **list, struct trace_export *export)
338 {
339 	rcu_assign_pointer(export->next, *list);
340 	/*
341 	 * We are entering export into the list but another
342 	 * CPU might be walking that list. We need to make sure
343 	 * the export->next pointer is valid before another CPU sees
344 	 * the export pointer included into the list.
345 	 */
346 	rcu_assign_pointer(*list, export);
347 }
348 
349 static inline int
350 rm_trace_export(struct trace_export **list, struct trace_export *export)
351 {
352 	struct trace_export **p;
353 
354 	for (p = list; *p != NULL; p = &(*p)->next)
355 		if (*p == export)
356 			break;
357 
358 	if (*p != export)
359 		return -1;
360 
361 	rcu_assign_pointer(*p, (*p)->next);
362 
363 	return 0;
364 }
365 
366 static inline void
367 add_ftrace_export(struct trace_export **list, struct trace_export *export)
368 {
369 	ftrace_exports_enable(export);
370 
371 	add_trace_export(list, export);
372 }
373 
374 static inline int
375 rm_ftrace_export(struct trace_export **list, struct trace_export *export)
376 {
377 	int ret;
378 
379 	ret = rm_trace_export(list, export);
380 	ftrace_exports_disable(export);
381 
382 	return ret;
383 }
384 
385 int register_ftrace_export(struct trace_export *export)
386 {
387 	if (WARN_ON_ONCE(!export->write))
388 		return -1;
389 
390 	mutex_lock(&ftrace_export_lock);
391 
392 	add_ftrace_export(&ftrace_exports_list, export);
393 
394 	mutex_unlock(&ftrace_export_lock);
395 
396 	return 0;
397 }
398 EXPORT_SYMBOL_GPL(register_ftrace_export);
399 
400 int unregister_ftrace_export(struct trace_export *export)
401 {
402 	int ret;
403 
404 	mutex_lock(&ftrace_export_lock);
405 
406 	ret = rm_ftrace_export(&ftrace_exports_list, export);
407 
408 	mutex_unlock(&ftrace_export_lock);
409 
410 	return ret;
411 }
412 EXPORT_SYMBOL_GPL(unregister_ftrace_export);
413 
414 /* trace_flags holds trace_options default values */
415 #define TRACE_DEFAULT_FLAGS						\
416 	(FUNCTION_DEFAULT_FLAGS |					\
417 	 TRACE_ITER_PRINT_PARENT | TRACE_ITER_PRINTK |			\
418 	 TRACE_ITER_ANNOTATE | TRACE_ITER_CONTEXT_INFO |		\
419 	 TRACE_ITER_RECORD_CMD | TRACE_ITER_OVERWRITE |			\
420 	 TRACE_ITER_IRQ_INFO | TRACE_ITER_MARKERS |			\
421 	 TRACE_ITER_HASH_PTR)
422 
423 /* trace_options that are only supported by global_trace */
424 #define TOP_LEVEL_TRACE_FLAGS (TRACE_ITER_PRINTK |			\
425 	       TRACE_ITER_PRINTK_MSGONLY | TRACE_ITER_RECORD_CMD)
426 
427 /* trace_flags that are default zero for instances */
428 #define ZEROED_TRACE_FLAGS \
429 	(TRACE_ITER_EVENT_FORK | TRACE_ITER_FUNC_FORK)
430 
431 /*
432  * The global_trace is the descriptor that holds the top-level tracing
433  * buffers for the live tracing.
434  */
435 static struct trace_array global_trace = {
436 	.trace_flags = TRACE_DEFAULT_FLAGS,
437 };
438 
439 LIST_HEAD(ftrace_trace_arrays);
440 
441 int trace_array_get(struct trace_array *this_tr)
442 {
443 	struct trace_array *tr;
444 	int ret = -ENODEV;
445 
446 	mutex_lock(&trace_types_lock);
447 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
448 		if (tr == this_tr) {
449 			tr->ref++;
450 			ret = 0;
451 			break;
452 		}
453 	}
454 	mutex_unlock(&trace_types_lock);
455 
456 	return ret;
457 }
458 
459 static void __trace_array_put(struct trace_array *this_tr)
460 {
461 	WARN_ON(!this_tr->ref);
462 	this_tr->ref--;
463 }
464 
465 /**
466  * trace_array_put - Decrement the reference counter for this trace array.
467  * @this_tr : pointer to the trace array
468  *
469  * NOTE: Use this when we no longer need the trace array returned by
470  * trace_array_get_by_name(). This ensures the trace array can be later
471  * destroyed.
472  *
473  */
474 void trace_array_put(struct trace_array *this_tr)
475 {
476 	if (!this_tr)
477 		return;
478 
479 	mutex_lock(&trace_types_lock);
480 	__trace_array_put(this_tr);
481 	mutex_unlock(&trace_types_lock);
482 }
483 EXPORT_SYMBOL_GPL(trace_array_put);
484 
485 int tracing_check_open_get_tr(struct trace_array *tr)
486 {
487 	int ret;
488 
489 	ret = security_locked_down(LOCKDOWN_TRACEFS);
490 	if (ret)
491 		return ret;
492 
493 	if (tracing_disabled)
494 		return -ENODEV;
495 
496 	if (tr && trace_array_get(tr) < 0)
497 		return -ENODEV;
498 
499 	return 0;
500 }
501 
502 int call_filter_check_discard(struct trace_event_call *call, void *rec,
503 			      struct trace_buffer *buffer,
504 			      struct ring_buffer_event *event)
505 {
506 	if (unlikely(call->flags & TRACE_EVENT_FL_FILTERED) &&
507 	    !filter_match_preds(call->filter, rec)) {
508 		__trace_event_discard_commit(buffer, event);
509 		return 1;
510 	}
511 
512 	return 0;
513 }
514 
515 /**
516  * trace_find_filtered_pid - check if a pid exists in a filtered_pid list
517  * @filtered_pids: The list of pids to check
518  * @search_pid: The PID to find in @filtered_pids
519  *
520  * Returns true if @search_pid is found in @filtered_pids, and false otherwise.
521  */
522 bool
523 trace_find_filtered_pid(struct trace_pid_list *filtered_pids, pid_t search_pid)
524 {
525 	return trace_pid_list_is_set(filtered_pids, search_pid);
526 }
527 
528 /**
529  * trace_ignore_this_task - should a task be ignored for tracing
530  * @filtered_pids: The list of pids to check
531  * @filtered_no_pids: The list of pids not to be traced
532  * @task: The task that should be ignored if not filtered
533  *
534  * Checks if @task should be traced or not from @filtered_pids.
535  * Returns true if @task should *NOT* be traced.
536  * Returns false if @task should be traced.
537  */
538 bool
539 trace_ignore_this_task(struct trace_pid_list *filtered_pids,
540 		       struct trace_pid_list *filtered_no_pids,
541 		       struct task_struct *task)
542 {
543 	/*
544 	 * If filtered_no_pids is not empty, and the task's pid is listed
545 	 * in filtered_no_pids, then return true.
546 	 * Otherwise, if filtered_pids is empty, that means we can
547 	 * trace all tasks. If it has content, then only trace pids
548 	 * within filtered_pids.
549 	 */
550 
551 	return (filtered_pids &&
552 		!trace_find_filtered_pid(filtered_pids, task->pid)) ||
553 		(filtered_no_pids &&
554 		 trace_find_filtered_pid(filtered_no_pids, task->pid));
555 }
556 
557 /**
558  * trace_filter_add_remove_task - Add or remove a task from a pid_list
559  * @pid_list: The list to modify
560  * @self: The current task for fork or NULL for exit
561  * @task: The task to add or remove
562  *
563  * If adding a task, if @self is defined, the task is only added if @self
564  * is also included in @pid_list. This happens on fork and tasks should
565  * only be added when the parent is listed. If @self is NULL, then the
566  * @task pid will be removed from the list, which would happen on exit
567  * of a task.
568  */
569 void trace_filter_add_remove_task(struct trace_pid_list *pid_list,
570 				  struct task_struct *self,
571 				  struct task_struct *task)
572 {
573 	if (!pid_list)
574 		return;
575 
576 	/* For forks, we only add if the forking task is listed */
577 	if (self) {
578 		if (!trace_find_filtered_pid(pid_list, self->pid))
579 			return;
580 	}
581 
582 	/* "self" is set for forks, and NULL for exits */
583 	if (self)
584 		trace_pid_list_set(pid_list, task->pid);
585 	else
586 		trace_pid_list_clear(pid_list, task->pid);
587 }
588 
589 /**
590  * trace_pid_next - Used for seq_file to get to the next pid of a pid_list
591  * @pid_list: The pid list to show
592  * @v: The last pid that was shown (+1 the actual pid to let zero be displayed)
593  * @pos: The position of the file
594  *
595  * This is used by the seq_file "next" operation to iterate the pids
596  * listed in a trace_pid_list structure.
597  *
598  * Returns the pid+1 as we want to display pid of zero, but NULL would
599  * stop the iteration.
600  */
601 void *trace_pid_next(struct trace_pid_list *pid_list, void *v, loff_t *pos)
602 {
603 	long pid = (unsigned long)v;
604 	unsigned int next;
605 
606 	(*pos)++;
607 
608 	/* pid already is +1 of the actual previous bit */
609 	if (trace_pid_list_next(pid_list, pid, &next) < 0)
610 		return NULL;
611 
612 	pid = next;
613 
614 	/* Return pid + 1 to allow zero to be represented */
615 	return (void *)(pid + 1);
616 }
617 
618 /**
619  * trace_pid_start - Used for seq_file to start reading pid lists
620  * @pid_list: The pid list to show
621  * @pos: The position of the file
622  *
623  * This is used by seq_file "start" operation to start the iteration
624  * of listing pids.
625  *
626  * Returns the pid+1 as we want to display pid of zero, but NULL would
627  * stop the iteration.
628  */
629 void *trace_pid_start(struct trace_pid_list *pid_list, loff_t *pos)
630 {
631 	unsigned long pid;
632 	unsigned int first;
633 	loff_t l = 0;
634 
635 	if (trace_pid_list_first(pid_list, &first) < 0)
636 		return NULL;
637 
638 	pid = first;
639 
640 	/* Return pid + 1 so that zero can be the exit value */
641 	for (pid++; pid && l < *pos;
642 	     pid = (unsigned long)trace_pid_next(pid_list, (void *)pid, &l))
643 		;
644 	return (void *)pid;
645 }
646 
647 /**
648  * trace_pid_show - show the current pid in seq_file processing
649  * @m: The seq_file structure to write into
650  * @v: A void pointer of the pid (+1) value to display
651  *
652  * Can be directly used by seq_file operations to display the current
653  * pid value.
654  */
655 int trace_pid_show(struct seq_file *m, void *v)
656 {
657 	unsigned long pid = (unsigned long)v - 1;
658 
659 	seq_printf(m, "%lu\n", pid);
660 	return 0;
661 }
662 
663 /* 128 should be much more than enough */
664 #define PID_BUF_SIZE		127
665 
666 int trace_pid_write(struct trace_pid_list *filtered_pids,
667 		    struct trace_pid_list **new_pid_list,
668 		    const char __user *ubuf, size_t cnt)
669 {
670 	struct trace_pid_list *pid_list;
671 	struct trace_parser parser;
672 	unsigned long val;
673 	int nr_pids = 0;
674 	ssize_t read = 0;
675 	ssize_t ret;
676 	loff_t pos;
677 	pid_t pid;
678 
679 	if (trace_parser_get_init(&parser, PID_BUF_SIZE + 1))
680 		return -ENOMEM;
681 
682 	/*
683 	 * Always recreate a new array. The write is an all or nothing
684 	 * operation. Always create a new array when adding new pids by
685 	 * the user. If the operation fails, then the current list is
686 	 * not modified.
687 	 */
688 	pid_list = trace_pid_list_alloc();
689 	if (!pid_list) {
690 		trace_parser_put(&parser);
691 		return -ENOMEM;
692 	}
693 
694 	if (filtered_pids) {
695 		/* copy the current bits to the new max */
696 		ret = trace_pid_list_first(filtered_pids, &pid);
697 		while (!ret) {
698 			trace_pid_list_set(pid_list, pid);
699 			ret = trace_pid_list_next(filtered_pids, pid + 1, &pid);
700 			nr_pids++;
701 		}
702 	}
703 
704 	ret = 0;
705 	while (cnt > 0) {
706 
707 		pos = 0;
708 
709 		ret = trace_get_user(&parser, ubuf, cnt, &pos);
710 		if (ret < 0 || !trace_parser_loaded(&parser))
711 			break;
712 
713 		read += ret;
714 		ubuf += ret;
715 		cnt -= ret;
716 
717 		ret = -EINVAL;
718 		if (kstrtoul(parser.buffer, 0, &val))
719 			break;
720 
721 		pid = (pid_t)val;
722 
723 		if (trace_pid_list_set(pid_list, pid) < 0) {
724 			ret = -1;
725 			break;
726 		}
727 		nr_pids++;
728 
729 		trace_parser_clear(&parser);
730 		ret = 0;
731 	}
732 	trace_parser_put(&parser);
733 
734 	if (ret < 0) {
735 		trace_pid_list_free(pid_list);
736 		return ret;
737 	}
738 
739 	if (!nr_pids) {
740 		/* Cleared the list of pids */
741 		trace_pid_list_free(pid_list);
742 		read = ret;
743 		pid_list = NULL;
744 	}
745 
746 	*new_pid_list = pid_list;
747 
748 	return read;
749 }
750 
751 static u64 buffer_ftrace_now(struct array_buffer *buf, int cpu)
752 {
753 	u64 ts;
754 
755 	/* Early boot up does not have a buffer yet */
756 	if (!buf->buffer)
757 		return trace_clock_local();
758 
759 	ts = ring_buffer_time_stamp(buf->buffer);
760 	ring_buffer_normalize_time_stamp(buf->buffer, cpu, &ts);
761 
762 	return ts;
763 }
764 
765 u64 ftrace_now(int cpu)
766 {
767 	return buffer_ftrace_now(&global_trace.array_buffer, cpu);
768 }
769 
770 /**
771  * tracing_is_enabled - Show if global_trace has been enabled
772  *
773  * Shows if the global trace has been enabled or not. It uses the
774  * mirror flag "buffer_disabled" to be used in fast paths such as for
775  * the irqsoff tracer. But it may be inaccurate due to races. If you
776  * need to know the accurate state, use tracing_is_on() which is a little
777  * slower, but accurate.
778  */
779 int tracing_is_enabled(void)
780 {
781 	/*
782 	 * For quick access (irqsoff uses this in fast path), just
783 	 * return the mirror variable of the state of the ring buffer.
784 	 * It's a little racy, but we don't really care.
785 	 */
786 	smp_rmb();
787 	return !global_trace.buffer_disabled;
788 }
789 
790 /*
791  * trace_buf_size is the size in bytes that is allocated
792  * for a buffer. Note, the number of bytes is always rounded
793  * to page size.
794  *
795  * This number is purposely set to a low number of 16384.
796  * If the dump on oops happens, it will be much appreciated
797  * to not have to wait for all that output. Anyway this can be
798  * boot time and run time configurable.
799  */
800 #define TRACE_BUF_SIZE_DEFAULT	1441792UL /* 16384 * 88 (sizeof(entry)) */
801 
802 static unsigned long		trace_buf_size = TRACE_BUF_SIZE_DEFAULT;
803 
804 /* trace_types holds a link list of available tracers. */
805 static struct tracer		*trace_types __read_mostly;
806 
807 /*
808  * trace_types_lock is used to protect the trace_types list.
809  */
810 DEFINE_MUTEX(trace_types_lock);
811 
812 /*
813  * serialize the access of the ring buffer
814  *
815  * ring buffer serializes readers, but it is low level protection.
816  * The validity of the events (which returns by ring_buffer_peek() ..etc)
817  * are not protected by ring buffer.
818  *
819  * The content of events may become garbage if we allow other process consumes
820  * these events concurrently:
821  *   A) the page of the consumed events may become a normal page
822  *      (not reader page) in ring buffer, and this page will be rewritten
823  *      by events producer.
824  *   B) The page of the consumed events may become a page for splice_read,
825  *      and this page will be returned to system.
826  *
827  * These primitives allow multi process access to different cpu ring buffer
828  * concurrently.
829  *
830  * These primitives don't distinguish read-only and read-consume access.
831  * Multi read-only access are also serialized.
832  */
833 
834 #ifdef CONFIG_SMP
835 static DECLARE_RWSEM(all_cpu_access_lock);
836 static DEFINE_PER_CPU(struct mutex, cpu_access_lock);
837 
838 static inline void trace_access_lock(int cpu)
839 {
840 	if (cpu == RING_BUFFER_ALL_CPUS) {
841 		/* gain it for accessing the whole ring buffer. */
842 		down_write(&all_cpu_access_lock);
843 	} else {
844 		/* gain it for accessing a cpu ring buffer. */
845 
846 		/* Firstly block other trace_access_lock(RING_BUFFER_ALL_CPUS). */
847 		down_read(&all_cpu_access_lock);
848 
849 		/* Secondly block other access to this @cpu ring buffer. */
850 		mutex_lock(&per_cpu(cpu_access_lock, cpu));
851 	}
852 }
853 
854 static inline void trace_access_unlock(int cpu)
855 {
856 	if (cpu == RING_BUFFER_ALL_CPUS) {
857 		up_write(&all_cpu_access_lock);
858 	} else {
859 		mutex_unlock(&per_cpu(cpu_access_lock, cpu));
860 		up_read(&all_cpu_access_lock);
861 	}
862 }
863 
864 static inline void trace_access_lock_init(void)
865 {
866 	int cpu;
867 
868 	for_each_possible_cpu(cpu)
869 		mutex_init(&per_cpu(cpu_access_lock, cpu));
870 }
871 
872 #else
873 
874 static DEFINE_MUTEX(access_lock);
875 
876 static inline void trace_access_lock(int cpu)
877 {
878 	(void)cpu;
879 	mutex_lock(&access_lock);
880 }
881 
882 static inline void trace_access_unlock(int cpu)
883 {
884 	(void)cpu;
885 	mutex_unlock(&access_lock);
886 }
887 
888 static inline void trace_access_lock_init(void)
889 {
890 }
891 
892 #endif
893 
894 #ifdef CONFIG_STACKTRACE
895 static void __ftrace_trace_stack(struct trace_buffer *buffer,
896 				 unsigned int trace_ctx,
897 				 int skip, struct pt_regs *regs);
898 static inline void ftrace_trace_stack(struct trace_array *tr,
899 				      struct trace_buffer *buffer,
900 				      unsigned int trace_ctx,
901 				      int skip, struct pt_regs *regs);
902 
903 #else
904 static inline void __ftrace_trace_stack(struct trace_buffer *buffer,
905 					unsigned int trace_ctx,
906 					int skip, struct pt_regs *regs)
907 {
908 }
909 static inline void ftrace_trace_stack(struct trace_array *tr,
910 				      struct trace_buffer *buffer,
911 				      unsigned long trace_ctx,
912 				      int skip, struct pt_regs *regs)
913 {
914 }
915 
916 #endif
917 
918 static __always_inline void
919 trace_event_setup(struct ring_buffer_event *event,
920 		  int type, unsigned int trace_ctx)
921 {
922 	struct trace_entry *ent = ring_buffer_event_data(event);
923 
924 	tracing_generic_entry_update(ent, type, trace_ctx);
925 }
926 
927 static __always_inline struct ring_buffer_event *
928 __trace_buffer_lock_reserve(struct trace_buffer *buffer,
929 			  int type,
930 			  unsigned long len,
931 			  unsigned int trace_ctx)
932 {
933 	struct ring_buffer_event *event;
934 
935 	event = ring_buffer_lock_reserve(buffer, len);
936 	if (event != NULL)
937 		trace_event_setup(event, type, trace_ctx);
938 
939 	return event;
940 }
941 
942 void tracer_tracing_on(struct trace_array *tr)
943 {
944 	if (tr->array_buffer.buffer)
945 		ring_buffer_record_on(tr->array_buffer.buffer);
946 	/*
947 	 * This flag is looked at when buffers haven't been allocated
948 	 * yet, or by some tracers (like irqsoff), that just want to
949 	 * know if the ring buffer has been disabled, but it can handle
950 	 * races of where it gets disabled but we still do a record.
951 	 * As the check is in the fast path of the tracers, it is more
952 	 * important to be fast than accurate.
953 	 */
954 	tr->buffer_disabled = 0;
955 	/* Make the flag seen by readers */
956 	smp_wmb();
957 }
958 
959 /**
960  * tracing_on - enable tracing buffers
961  *
962  * This function enables tracing buffers that may have been
963  * disabled with tracing_off.
964  */
965 void tracing_on(void)
966 {
967 	tracer_tracing_on(&global_trace);
968 }
969 EXPORT_SYMBOL_GPL(tracing_on);
970 
971 
972 static __always_inline void
973 __buffer_unlock_commit(struct trace_buffer *buffer, struct ring_buffer_event *event)
974 {
975 	__this_cpu_write(trace_taskinfo_save, true);
976 
977 	/* If this is the temp buffer, we need to commit fully */
978 	if (this_cpu_read(trace_buffered_event) == event) {
979 		/* Length is in event->array[0] */
980 		ring_buffer_write(buffer, event->array[0], &event->array[1]);
981 		/* Release the temp buffer */
982 		this_cpu_dec(trace_buffered_event_cnt);
983 	} else
984 		ring_buffer_unlock_commit(buffer, event);
985 }
986 
987 /**
988  * __trace_puts - write a constant string into the trace buffer.
989  * @ip:	   The address of the caller
990  * @str:   The constant string to write
991  * @size:  The size of the string.
992  */
993 int __trace_puts(unsigned long ip, const char *str, int size)
994 {
995 	struct ring_buffer_event *event;
996 	struct trace_buffer *buffer;
997 	struct print_entry *entry;
998 	unsigned int trace_ctx;
999 	int alloc;
1000 
1001 	if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
1002 		return 0;
1003 
1004 	if (unlikely(tracing_selftest_running || tracing_disabled))
1005 		return 0;
1006 
1007 	alloc = sizeof(*entry) + size + 2; /* possible \n added */
1008 
1009 	trace_ctx = tracing_gen_ctx();
1010 	buffer = global_trace.array_buffer.buffer;
1011 	ring_buffer_nest_start(buffer);
1012 	event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, alloc,
1013 					    trace_ctx);
1014 	if (!event) {
1015 		size = 0;
1016 		goto out;
1017 	}
1018 
1019 	entry = ring_buffer_event_data(event);
1020 	entry->ip = ip;
1021 
1022 	memcpy(&entry->buf, str, size);
1023 
1024 	/* Add a newline if necessary */
1025 	if (entry->buf[size - 1] != '\n') {
1026 		entry->buf[size] = '\n';
1027 		entry->buf[size + 1] = '\0';
1028 	} else
1029 		entry->buf[size] = '\0';
1030 
1031 	__buffer_unlock_commit(buffer, event);
1032 	ftrace_trace_stack(&global_trace, buffer, trace_ctx, 4, NULL);
1033  out:
1034 	ring_buffer_nest_end(buffer);
1035 	return size;
1036 }
1037 EXPORT_SYMBOL_GPL(__trace_puts);
1038 
1039 /**
1040  * __trace_bputs - write the pointer to a constant string into trace buffer
1041  * @ip:	   The address of the caller
1042  * @str:   The constant string to write to the buffer to
1043  */
1044 int __trace_bputs(unsigned long ip, const char *str)
1045 {
1046 	struct ring_buffer_event *event;
1047 	struct trace_buffer *buffer;
1048 	struct bputs_entry *entry;
1049 	unsigned int trace_ctx;
1050 	int size = sizeof(struct bputs_entry);
1051 	int ret = 0;
1052 
1053 	if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
1054 		return 0;
1055 
1056 	if (unlikely(tracing_selftest_running || tracing_disabled))
1057 		return 0;
1058 
1059 	trace_ctx = tracing_gen_ctx();
1060 	buffer = global_trace.array_buffer.buffer;
1061 
1062 	ring_buffer_nest_start(buffer);
1063 	event = __trace_buffer_lock_reserve(buffer, TRACE_BPUTS, size,
1064 					    trace_ctx);
1065 	if (!event)
1066 		goto out;
1067 
1068 	entry = ring_buffer_event_data(event);
1069 	entry->ip			= ip;
1070 	entry->str			= str;
1071 
1072 	__buffer_unlock_commit(buffer, event);
1073 	ftrace_trace_stack(&global_trace, buffer, trace_ctx, 4, NULL);
1074 
1075 	ret = 1;
1076  out:
1077 	ring_buffer_nest_end(buffer);
1078 	return ret;
1079 }
1080 EXPORT_SYMBOL_GPL(__trace_bputs);
1081 
1082 #ifdef CONFIG_TRACER_SNAPSHOT
1083 static void tracing_snapshot_instance_cond(struct trace_array *tr,
1084 					   void *cond_data)
1085 {
1086 	struct tracer *tracer = tr->current_trace;
1087 	unsigned long flags;
1088 
1089 	if (in_nmi()) {
1090 		internal_trace_puts("*** SNAPSHOT CALLED FROM NMI CONTEXT ***\n");
1091 		internal_trace_puts("*** snapshot is being ignored        ***\n");
1092 		return;
1093 	}
1094 
1095 	if (!tr->allocated_snapshot) {
1096 		internal_trace_puts("*** SNAPSHOT NOT ALLOCATED ***\n");
1097 		internal_trace_puts("*** stopping trace here!   ***\n");
1098 		tracing_off();
1099 		return;
1100 	}
1101 
1102 	/* Note, snapshot can not be used when the tracer uses it */
1103 	if (tracer->use_max_tr) {
1104 		internal_trace_puts("*** LATENCY TRACER ACTIVE ***\n");
1105 		internal_trace_puts("*** Can not use snapshot (sorry) ***\n");
1106 		return;
1107 	}
1108 
1109 	local_irq_save(flags);
1110 	update_max_tr(tr, current, smp_processor_id(), cond_data);
1111 	local_irq_restore(flags);
1112 }
1113 
1114 void tracing_snapshot_instance(struct trace_array *tr)
1115 {
1116 	tracing_snapshot_instance_cond(tr, NULL);
1117 }
1118 
1119 /**
1120  * tracing_snapshot - take a snapshot of the current buffer.
1121  *
1122  * This causes a swap between the snapshot buffer and the current live
1123  * tracing buffer. You can use this to take snapshots of the live
1124  * trace when some condition is triggered, but continue to trace.
1125  *
1126  * Note, make sure to allocate the snapshot with either
1127  * a tracing_snapshot_alloc(), or by doing it manually
1128  * with: echo 1 > /sys/kernel/debug/tracing/snapshot
1129  *
1130  * If the snapshot buffer is not allocated, it will stop tracing.
1131  * Basically making a permanent snapshot.
1132  */
1133 void tracing_snapshot(void)
1134 {
1135 	struct trace_array *tr = &global_trace;
1136 
1137 	tracing_snapshot_instance(tr);
1138 }
1139 EXPORT_SYMBOL_GPL(tracing_snapshot);
1140 
1141 /**
1142  * tracing_snapshot_cond - conditionally take a snapshot of the current buffer.
1143  * @tr:		The tracing instance to snapshot
1144  * @cond_data:	The data to be tested conditionally, and possibly saved
1145  *
1146  * This is the same as tracing_snapshot() except that the snapshot is
1147  * conditional - the snapshot will only happen if the
1148  * cond_snapshot.update() implementation receiving the cond_data
1149  * returns true, which means that the trace array's cond_snapshot
1150  * update() operation used the cond_data to determine whether the
1151  * snapshot should be taken, and if it was, presumably saved it along
1152  * with the snapshot.
1153  */
1154 void tracing_snapshot_cond(struct trace_array *tr, void *cond_data)
1155 {
1156 	tracing_snapshot_instance_cond(tr, cond_data);
1157 }
1158 EXPORT_SYMBOL_GPL(tracing_snapshot_cond);
1159 
1160 /**
1161  * tracing_snapshot_cond_data - get the user data associated with a snapshot
1162  * @tr:		The tracing instance
1163  *
1164  * When the user enables a conditional snapshot using
1165  * tracing_snapshot_cond_enable(), the user-defined cond_data is saved
1166  * with the snapshot.  This accessor is used to retrieve it.
1167  *
1168  * Should not be called from cond_snapshot.update(), since it takes
1169  * the tr->max_lock lock, which the code calling
1170  * cond_snapshot.update() has already done.
1171  *
1172  * Returns the cond_data associated with the trace array's snapshot.
1173  */
1174 void *tracing_cond_snapshot_data(struct trace_array *tr)
1175 {
1176 	void *cond_data = NULL;
1177 
1178 	arch_spin_lock(&tr->max_lock);
1179 
1180 	if (tr->cond_snapshot)
1181 		cond_data = tr->cond_snapshot->cond_data;
1182 
1183 	arch_spin_unlock(&tr->max_lock);
1184 
1185 	return cond_data;
1186 }
1187 EXPORT_SYMBOL_GPL(tracing_cond_snapshot_data);
1188 
1189 static int resize_buffer_duplicate_size(struct array_buffer *trace_buf,
1190 					struct array_buffer *size_buf, int cpu_id);
1191 static void set_buffer_entries(struct array_buffer *buf, unsigned long val);
1192 
1193 int tracing_alloc_snapshot_instance(struct trace_array *tr)
1194 {
1195 	int ret;
1196 
1197 	if (!tr->allocated_snapshot) {
1198 
1199 		/* allocate spare buffer */
1200 		ret = resize_buffer_duplicate_size(&tr->max_buffer,
1201 				   &tr->array_buffer, RING_BUFFER_ALL_CPUS);
1202 		if (ret < 0)
1203 			return ret;
1204 
1205 		tr->allocated_snapshot = true;
1206 	}
1207 
1208 	return 0;
1209 }
1210 
1211 static void free_snapshot(struct trace_array *tr)
1212 {
1213 	/*
1214 	 * We don't free the ring buffer. instead, resize it because
1215 	 * The max_tr ring buffer has some state (e.g. ring->clock) and
1216 	 * we want preserve it.
1217 	 */
1218 	ring_buffer_resize(tr->max_buffer.buffer, 1, RING_BUFFER_ALL_CPUS);
1219 	set_buffer_entries(&tr->max_buffer, 1);
1220 	tracing_reset_online_cpus(&tr->max_buffer);
1221 	tr->allocated_snapshot = false;
1222 }
1223 
1224 /**
1225  * tracing_alloc_snapshot - allocate snapshot buffer.
1226  *
1227  * This only allocates the snapshot buffer if it isn't already
1228  * allocated - it doesn't also take a snapshot.
1229  *
1230  * This is meant to be used in cases where the snapshot buffer needs
1231  * to be set up for events that can't sleep but need to be able to
1232  * trigger a snapshot.
1233  */
1234 int tracing_alloc_snapshot(void)
1235 {
1236 	struct trace_array *tr = &global_trace;
1237 	int ret;
1238 
1239 	ret = tracing_alloc_snapshot_instance(tr);
1240 	WARN_ON(ret < 0);
1241 
1242 	return ret;
1243 }
1244 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
1245 
1246 /**
1247  * tracing_snapshot_alloc - allocate and take a snapshot of the current buffer.
1248  *
1249  * This is similar to tracing_snapshot(), but it will allocate the
1250  * snapshot buffer if it isn't already allocated. Use this only
1251  * where it is safe to sleep, as the allocation may sleep.
1252  *
1253  * This causes a swap between the snapshot buffer and the current live
1254  * tracing buffer. You can use this to take snapshots of the live
1255  * trace when some condition is triggered, but continue to trace.
1256  */
1257 void tracing_snapshot_alloc(void)
1258 {
1259 	int ret;
1260 
1261 	ret = tracing_alloc_snapshot();
1262 	if (ret < 0)
1263 		return;
1264 
1265 	tracing_snapshot();
1266 }
1267 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
1268 
1269 /**
1270  * tracing_snapshot_cond_enable - enable conditional snapshot for an instance
1271  * @tr:		The tracing instance
1272  * @cond_data:	User data to associate with the snapshot
1273  * @update:	Implementation of the cond_snapshot update function
1274  *
1275  * Check whether the conditional snapshot for the given instance has
1276  * already been enabled, or if the current tracer is already using a
1277  * snapshot; if so, return -EBUSY, else create a cond_snapshot and
1278  * save the cond_data and update function inside.
1279  *
1280  * Returns 0 if successful, error otherwise.
1281  */
1282 int tracing_snapshot_cond_enable(struct trace_array *tr, void *cond_data,
1283 				 cond_update_fn_t update)
1284 {
1285 	struct cond_snapshot *cond_snapshot;
1286 	int ret = 0;
1287 
1288 	cond_snapshot = kzalloc(sizeof(*cond_snapshot), GFP_KERNEL);
1289 	if (!cond_snapshot)
1290 		return -ENOMEM;
1291 
1292 	cond_snapshot->cond_data = cond_data;
1293 	cond_snapshot->update = update;
1294 
1295 	mutex_lock(&trace_types_lock);
1296 
1297 	ret = tracing_alloc_snapshot_instance(tr);
1298 	if (ret)
1299 		goto fail_unlock;
1300 
1301 	if (tr->current_trace->use_max_tr) {
1302 		ret = -EBUSY;
1303 		goto fail_unlock;
1304 	}
1305 
1306 	/*
1307 	 * The cond_snapshot can only change to NULL without the
1308 	 * trace_types_lock. We don't care if we race with it going
1309 	 * to NULL, but we want to make sure that it's not set to
1310 	 * something other than NULL when we get here, which we can
1311 	 * do safely with only holding the trace_types_lock and not
1312 	 * having to take the max_lock.
1313 	 */
1314 	if (tr->cond_snapshot) {
1315 		ret = -EBUSY;
1316 		goto fail_unlock;
1317 	}
1318 
1319 	arch_spin_lock(&tr->max_lock);
1320 	tr->cond_snapshot = cond_snapshot;
1321 	arch_spin_unlock(&tr->max_lock);
1322 
1323 	mutex_unlock(&trace_types_lock);
1324 
1325 	return ret;
1326 
1327  fail_unlock:
1328 	mutex_unlock(&trace_types_lock);
1329 	kfree(cond_snapshot);
1330 	return ret;
1331 }
1332 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_enable);
1333 
1334 /**
1335  * tracing_snapshot_cond_disable - disable conditional snapshot for an instance
1336  * @tr:		The tracing instance
1337  *
1338  * Check whether the conditional snapshot for the given instance is
1339  * enabled; if so, free the cond_snapshot associated with it,
1340  * otherwise return -EINVAL.
1341  *
1342  * Returns 0 if successful, error otherwise.
1343  */
1344 int tracing_snapshot_cond_disable(struct trace_array *tr)
1345 {
1346 	int ret = 0;
1347 
1348 	arch_spin_lock(&tr->max_lock);
1349 
1350 	if (!tr->cond_snapshot)
1351 		ret = -EINVAL;
1352 	else {
1353 		kfree(tr->cond_snapshot);
1354 		tr->cond_snapshot = NULL;
1355 	}
1356 
1357 	arch_spin_unlock(&tr->max_lock);
1358 
1359 	return ret;
1360 }
1361 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_disable);
1362 #else
1363 void tracing_snapshot(void)
1364 {
1365 	WARN_ONCE(1, "Snapshot feature not enabled, but internal snapshot used");
1366 }
1367 EXPORT_SYMBOL_GPL(tracing_snapshot);
1368 void tracing_snapshot_cond(struct trace_array *tr, void *cond_data)
1369 {
1370 	WARN_ONCE(1, "Snapshot feature not enabled, but internal conditional snapshot used");
1371 }
1372 EXPORT_SYMBOL_GPL(tracing_snapshot_cond);
1373 int tracing_alloc_snapshot(void)
1374 {
1375 	WARN_ONCE(1, "Snapshot feature not enabled, but snapshot allocation used");
1376 	return -ENODEV;
1377 }
1378 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
1379 void tracing_snapshot_alloc(void)
1380 {
1381 	/* Give warning */
1382 	tracing_snapshot();
1383 }
1384 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
1385 void *tracing_cond_snapshot_data(struct trace_array *tr)
1386 {
1387 	return NULL;
1388 }
1389 EXPORT_SYMBOL_GPL(tracing_cond_snapshot_data);
1390 int tracing_snapshot_cond_enable(struct trace_array *tr, void *cond_data, cond_update_fn_t update)
1391 {
1392 	return -ENODEV;
1393 }
1394 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_enable);
1395 int tracing_snapshot_cond_disable(struct trace_array *tr)
1396 {
1397 	return false;
1398 }
1399 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_disable);
1400 #endif /* CONFIG_TRACER_SNAPSHOT */
1401 
1402 void tracer_tracing_off(struct trace_array *tr)
1403 {
1404 	if (tr->array_buffer.buffer)
1405 		ring_buffer_record_off(tr->array_buffer.buffer);
1406 	/*
1407 	 * This flag is looked at when buffers haven't been allocated
1408 	 * yet, or by some tracers (like irqsoff), that just want to
1409 	 * know if the ring buffer has been disabled, but it can handle
1410 	 * races of where it gets disabled but we still do a record.
1411 	 * As the check is in the fast path of the tracers, it is more
1412 	 * important to be fast than accurate.
1413 	 */
1414 	tr->buffer_disabled = 1;
1415 	/* Make the flag seen by readers */
1416 	smp_wmb();
1417 }
1418 
1419 /**
1420  * tracing_off - turn off tracing buffers
1421  *
1422  * This function stops the tracing buffers from recording data.
1423  * It does not disable any overhead the tracers themselves may
1424  * be causing. This function simply causes all recording to
1425  * the ring buffers to fail.
1426  */
1427 void tracing_off(void)
1428 {
1429 	tracer_tracing_off(&global_trace);
1430 }
1431 EXPORT_SYMBOL_GPL(tracing_off);
1432 
1433 void disable_trace_on_warning(void)
1434 {
1435 	if (__disable_trace_on_warning) {
1436 		trace_array_printk_buf(global_trace.array_buffer.buffer, _THIS_IP_,
1437 			"Disabling tracing due to warning\n");
1438 		tracing_off();
1439 	}
1440 }
1441 
1442 /**
1443  * tracer_tracing_is_on - show real state of ring buffer enabled
1444  * @tr : the trace array to know if ring buffer is enabled
1445  *
1446  * Shows real state of the ring buffer if it is enabled or not.
1447  */
1448 bool tracer_tracing_is_on(struct trace_array *tr)
1449 {
1450 	if (tr->array_buffer.buffer)
1451 		return ring_buffer_record_is_on(tr->array_buffer.buffer);
1452 	return !tr->buffer_disabled;
1453 }
1454 
1455 /**
1456  * tracing_is_on - show state of ring buffers enabled
1457  */
1458 int tracing_is_on(void)
1459 {
1460 	return tracer_tracing_is_on(&global_trace);
1461 }
1462 EXPORT_SYMBOL_GPL(tracing_is_on);
1463 
1464 static int __init set_buf_size(char *str)
1465 {
1466 	unsigned long buf_size;
1467 
1468 	if (!str)
1469 		return 0;
1470 	buf_size = memparse(str, &str);
1471 	/* nr_entries can not be zero */
1472 	if (buf_size == 0)
1473 		return 0;
1474 	trace_buf_size = buf_size;
1475 	return 1;
1476 }
1477 __setup("trace_buf_size=", set_buf_size);
1478 
1479 static int __init set_tracing_thresh(char *str)
1480 {
1481 	unsigned long threshold;
1482 	int ret;
1483 
1484 	if (!str)
1485 		return 0;
1486 	ret = kstrtoul(str, 0, &threshold);
1487 	if (ret < 0)
1488 		return 0;
1489 	tracing_thresh = threshold * 1000;
1490 	return 1;
1491 }
1492 __setup("tracing_thresh=", set_tracing_thresh);
1493 
1494 unsigned long nsecs_to_usecs(unsigned long nsecs)
1495 {
1496 	return nsecs / 1000;
1497 }
1498 
1499 /*
1500  * TRACE_FLAGS is defined as a tuple matching bit masks with strings.
1501  * It uses C(a, b) where 'a' is the eval (enum) name and 'b' is the string that
1502  * matches it. By defining "C(a, b) b", TRACE_FLAGS becomes a list
1503  * of strings in the order that the evals (enum) were defined.
1504  */
1505 #undef C
1506 #define C(a, b) b
1507 
1508 /* These must match the bit positions in trace_iterator_flags */
1509 static const char *trace_options[] = {
1510 	TRACE_FLAGS
1511 	NULL
1512 };
1513 
1514 static struct {
1515 	u64 (*func)(void);
1516 	const char *name;
1517 	int in_ns;		/* is this clock in nanoseconds? */
1518 } trace_clocks[] = {
1519 	{ trace_clock_local,		"local",	1 },
1520 	{ trace_clock_global,		"global",	1 },
1521 	{ trace_clock_counter,		"counter",	0 },
1522 	{ trace_clock_jiffies,		"uptime",	0 },
1523 	{ trace_clock,			"perf",		1 },
1524 	{ ktime_get_mono_fast_ns,	"mono",		1 },
1525 	{ ktime_get_raw_fast_ns,	"mono_raw",	1 },
1526 	{ ktime_get_boot_fast_ns,	"boot",		1 },
1527 	ARCH_TRACE_CLOCKS
1528 };
1529 
1530 bool trace_clock_in_ns(struct trace_array *tr)
1531 {
1532 	if (trace_clocks[tr->clock_id].in_ns)
1533 		return true;
1534 
1535 	return false;
1536 }
1537 
1538 /*
1539  * trace_parser_get_init - gets the buffer for trace parser
1540  */
1541 int trace_parser_get_init(struct trace_parser *parser, int size)
1542 {
1543 	memset(parser, 0, sizeof(*parser));
1544 
1545 	parser->buffer = kmalloc(size, GFP_KERNEL);
1546 	if (!parser->buffer)
1547 		return 1;
1548 
1549 	parser->size = size;
1550 	return 0;
1551 }
1552 
1553 /*
1554  * trace_parser_put - frees the buffer for trace parser
1555  */
1556 void trace_parser_put(struct trace_parser *parser)
1557 {
1558 	kfree(parser->buffer);
1559 	parser->buffer = NULL;
1560 }
1561 
1562 /*
1563  * trace_get_user - reads the user input string separated by  space
1564  * (matched by isspace(ch))
1565  *
1566  * For each string found the 'struct trace_parser' is updated,
1567  * and the function returns.
1568  *
1569  * Returns number of bytes read.
1570  *
1571  * See kernel/trace/trace.h for 'struct trace_parser' details.
1572  */
1573 int trace_get_user(struct trace_parser *parser, const char __user *ubuf,
1574 	size_t cnt, loff_t *ppos)
1575 {
1576 	char ch;
1577 	size_t read = 0;
1578 	ssize_t ret;
1579 
1580 	if (!*ppos)
1581 		trace_parser_clear(parser);
1582 
1583 	ret = get_user(ch, ubuf++);
1584 	if (ret)
1585 		goto out;
1586 
1587 	read++;
1588 	cnt--;
1589 
1590 	/*
1591 	 * The parser is not finished with the last write,
1592 	 * continue reading the user input without skipping spaces.
1593 	 */
1594 	if (!parser->cont) {
1595 		/* skip white space */
1596 		while (cnt && isspace(ch)) {
1597 			ret = get_user(ch, ubuf++);
1598 			if (ret)
1599 				goto out;
1600 			read++;
1601 			cnt--;
1602 		}
1603 
1604 		parser->idx = 0;
1605 
1606 		/* only spaces were written */
1607 		if (isspace(ch) || !ch) {
1608 			*ppos += read;
1609 			ret = read;
1610 			goto out;
1611 		}
1612 	}
1613 
1614 	/* read the non-space input */
1615 	while (cnt && !isspace(ch) && ch) {
1616 		if (parser->idx < parser->size - 1)
1617 			parser->buffer[parser->idx++] = ch;
1618 		else {
1619 			ret = -EINVAL;
1620 			goto out;
1621 		}
1622 		ret = get_user(ch, ubuf++);
1623 		if (ret)
1624 			goto out;
1625 		read++;
1626 		cnt--;
1627 	}
1628 
1629 	/* We either got finished input or we have to wait for another call. */
1630 	if (isspace(ch) || !ch) {
1631 		parser->buffer[parser->idx] = 0;
1632 		parser->cont = false;
1633 	} else if (parser->idx < parser->size - 1) {
1634 		parser->cont = true;
1635 		parser->buffer[parser->idx++] = ch;
1636 		/* Make sure the parsed string always terminates with '\0'. */
1637 		parser->buffer[parser->idx] = 0;
1638 	} else {
1639 		ret = -EINVAL;
1640 		goto out;
1641 	}
1642 
1643 	*ppos += read;
1644 	ret = read;
1645 
1646 out:
1647 	return ret;
1648 }
1649 
1650 /* TODO add a seq_buf_to_buffer() */
1651 static ssize_t trace_seq_to_buffer(struct trace_seq *s, void *buf, size_t cnt)
1652 {
1653 	int len;
1654 
1655 	if (trace_seq_used(s) <= s->seq.readpos)
1656 		return -EBUSY;
1657 
1658 	len = trace_seq_used(s) - s->seq.readpos;
1659 	if (cnt > len)
1660 		cnt = len;
1661 	memcpy(buf, s->buffer + s->seq.readpos, cnt);
1662 
1663 	s->seq.readpos += cnt;
1664 	return cnt;
1665 }
1666 
1667 unsigned long __read_mostly	tracing_thresh;
1668 static const struct file_operations tracing_max_lat_fops;
1669 
1670 #ifdef LATENCY_FS_NOTIFY
1671 
1672 static struct workqueue_struct *fsnotify_wq;
1673 
1674 static void latency_fsnotify_workfn(struct work_struct *work)
1675 {
1676 	struct trace_array *tr = container_of(work, struct trace_array,
1677 					      fsnotify_work);
1678 	fsnotify_inode(tr->d_max_latency->d_inode, FS_MODIFY);
1679 }
1680 
1681 static void latency_fsnotify_workfn_irq(struct irq_work *iwork)
1682 {
1683 	struct trace_array *tr = container_of(iwork, struct trace_array,
1684 					      fsnotify_irqwork);
1685 	queue_work(fsnotify_wq, &tr->fsnotify_work);
1686 }
1687 
1688 static void trace_create_maxlat_file(struct trace_array *tr,
1689 				     struct dentry *d_tracer)
1690 {
1691 	INIT_WORK(&tr->fsnotify_work, latency_fsnotify_workfn);
1692 	init_irq_work(&tr->fsnotify_irqwork, latency_fsnotify_workfn_irq);
1693 	tr->d_max_latency = trace_create_file("tracing_max_latency",
1694 					      TRACE_MODE_WRITE,
1695 					      d_tracer, &tr->max_latency,
1696 					      &tracing_max_lat_fops);
1697 }
1698 
1699 __init static int latency_fsnotify_init(void)
1700 {
1701 	fsnotify_wq = alloc_workqueue("tr_max_lat_wq",
1702 				      WQ_UNBOUND | WQ_HIGHPRI, 0);
1703 	if (!fsnotify_wq) {
1704 		pr_err("Unable to allocate tr_max_lat_wq\n");
1705 		return -ENOMEM;
1706 	}
1707 	return 0;
1708 }
1709 
1710 late_initcall_sync(latency_fsnotify_init);
1711 
1712 void latency_fsnotify(struct trace_array *tr)
1713 {
1714 	if (!fsnotify_wq)
1715 		return;
1716 	/*
1717 	 * We cannot call queue_work(&tr->fsnotify_work) from here because it's
1718 	 * possible that we are called from __schedule() or do_idle(), which
1719 	 * could cause a deadlock.
1720 	 */
1721 	irq_work_queue(&tr->fsnotify_irqwork);
1722 }
1723 
1724 #elif defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)	\
1725 	|| defined(CONFIG_OSNOISE_TRACER)
1726 
1727 #define trace_create_maxlat_file(tr, d_tracer)				\
1728 	trace_create_file("tracing_max_latency", TRACE_MODE_WRITE,	\
1729 			  d_tracer, &tr->max_latency, &tracing_max_lat_fops)
1730 
1731 #else
1732 #define trace_create_maxlat_file(tr, d_tracer)	 do { } while (0)
1733 #endif
1734 
1735 #ifdef CONFIG_TRACER_MAX_TRACE
1736 /*
1737  * Copy the new maximum trace into the separate maximum-trace
1738  * structure. (this way the maximum trace is permanently saved,
1739  * for later retrieval via /sys/kernel/tracing/tracing_max_latency)
1740  */
1741 static void
1742 __update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
1743 {
1744 	struct array_buffer *trace_buf = &tr->array_buffer;
1745 	struct array_buffer *max_buf = &tr->max_buffer;
1746 	struct trace_array_cpu *data = per_cpu_ptr(trace_buf->data, cpu);
1747 	struct trace_array_cpu *max_data = per_cpu_ptr(max_buf->data, cpu);
1748 
1749 	max_buf->cpu = cpu;
1750 	max_buf->time_start = data->preempt_timestamp;
1751 
1752 	max_data->saved_latency = tr->max_latency;
1753 	max_data->critical_start = data->critical_start;
1754 	max_data->critical_end = data->critical_end;
1755 
1756 	strncpy(max_data->comm, tsk->comm, TASK_COMM_LEN);
1757 	max_data->pid = tsk->pid;
1758 	/*
1759 	 * If tsk == current, then use current_uid(), as that does not use
1760 	 * RCU. The irq tracer can be called out of RCU scope.
1761 	 */
1762 	if (tsk == current)
1763 		max_data->uid = current_uid();
1764 	else
1765 		max_data->uid = task_uid(tsk);
1766 
1767 	max_data->nice = tsk->static_prio - 20 - MAX_RT_PRIO;
1768 	max_data->policy = tsk->policy;
1769 	max_data->rt_priority = tsk->rt_priority;
1770 
1771 	/* record this tasks comm */
1772 	tracing_record_cmdline(tsk);
1773 	latency_fsnotify(tr);
1774 }
1775 
1776 /**
1777  * update_max_tr - snapshot all trace buffers from global_trace to max_tr
1778  * @tr: tracer
1779  * @tsk: the task with the latency
1780  * @cpu: The cpu that initiated the trace.
1781  * @cond_data: User data associated with a conditional snapshot
1782  *
1783  * Flip the buffers between the @tr and the max_tr and record information
1784  * about which task was the cause of this latency.
1785  */
1786 void
1787 update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu,
1788 	      void *cond_data)
1789 {
1790 	if (tr->stop_count)
1791 		return;
1792 
1793 	WARN_ON_ONCE(!irqs_disabled());
1794 
1795 	if (!tr->allocated_snapshot) {
1796 		/* Only the nop tracer should hit this when disabling */
1797 		WARN_ON_ONCE(tr->current_trace != &nop_trace);
1798 		return;
1799 	}
1800 
1801 	arch_spin_lock(&tr->max_lock);
1802 
1803 	/* Inherit the recordable setting from array_buffer */
1804 	if (ring_buffer_record_is_set_on(tr->array_buffer.buffer))
1805 		ring_buffer_record_on(tr->max_buffer.buffer);
1806 	else
1807 		ring_buffer_record_off(tr->max_buffer.buffer);
1808 
1809 #ifdef CONFIG_TRACER_SNAPSHOT
1810 	if (tr->cond_snapshot && !tr->cond_snapshot->update(tr, cond_data))
1811 		goto out_unlock;
1812 #endif
1813 	swap(tr->array_buffer.buffer, tr->max_buffer.buffer);
1814 
1815 	__update_max_tr(tr, tsk, cpu);
1816 
1817  out_unlock:
1818 	arch_spin_unlock(&tr->max_lock);
1819 }
1820 
1821 /**
1822  * update_max_tr_single - only copy one trace over, and reset the rest
1823  * @tr: tracer
1824  * @tsk: task with the latency
1825  * @cpu: the cpu of the buffer to copy.
1826  *
1827  * Flip the trace of a single CPU buffer between the @tr and the max_tr.
1828  */
1829 void
1830 update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu)
1831 {
1832 	int ret;
1833 
1834 	if (tr->stop_count)
1835 		return;
1836 
1837 	WARN_ON_ONCE(!irqs_disabled());
1838 	if (!tr->allocated_snapshot) {
1839 		/* Only the nop tracer should hit this when disabling */
1840 		WARN_ON_ONCE(tr->current_trace != &nop_trace);
1841 		return;
1842 	}
1843 
1844 	arch_spin_lock(&tr->max_lock);
1845 
1846 	ret = ring_buffer_swap_cpu(tr->max_buffer.buffer, tr->array_buffer.buffer, cpu);
1847 
1848 	if (ret == -EBUSY) {
1849 		/*
1850 		 * We failed to swap the buffer due to a commit taking
1851 		 * place on this CPU. We fail to record, but we reset
1852 		 * the max trace buffer (no one writes directly to it)
1853 		 * and flag that it failed.
1854 		 */
1855 		trace_array_printk_buf(tr->max_buffer.buffer, _THIS_IP_,
1856 			"Failed to swap buffers due to commit in progress\n");
1857 	}
1858 
1859 	WARN_ON_ONCE(ret && ret != -EAGAIN && ret != -EBUSY);
1860 
1861 	__update_max_tr(tr, tsk, cpu);
1862 	arch_spin_unlock(&tr->max_lock);
1863 }
1864 #endif /* CONFIG_TRACER_MAX_TRACE */
1865 
1866 static int wait_on_pipe(struct trace_iterator *iter, int full)
1867 {
1868 	/* Iterators are static, they should be filled or empty */
1869 	if (trace_buffer_iter(iter, iter->cpu_file))
1870 		return 0;
1871 
1872 	return ring_buffer_wait(iter->array_buffer->buffer, iter->cpu_file,
1873 				full);
1874 }
1875 
1876 #ifdef CONFIG_FTRACE_STARTUP_TEST
1877 static bool selftests_can_run;
1878 
1879 struct trace_selftests {
1880 	struct list_head		list;
1881 	struct tracer			*type;
1882 };
1883 
1884 static LIST_HEAD(postponed_selftests);
1885 
1886 static int save_selftest(struct tracer *type)
1887 {
1888 	struct trace_selftests *selftest;
1889 
1890 	selftest = kmalloc(sizeof(*selftest), GFP_KERNEL);
1891 	if (!selftest)
1892 		return -ENOMEM;
1893 
1894 	selftest->type = type;
1895 	list_add(&selftest->list, &postponed_selftests);
1896 	return 0;
1897 }
1898 
1899 static int run_tracer_selftest(struct tracer *type)
1900 {
1901 	struct trace_array *tr = &global_trace;
1902 	struct tracer *saved_tracer = tr->current_trace;
1903 	int ret;
1904 
1905 	if (!type->selftest || tracing_selftest_disabled)
1906 		return 0;
1907 
1908 	/*
1909 	 * If a tracer registers early in boot up (before scheduling is
1910 	 * initialized and such), then do not run its selftests yet.
1911 	 * Instead, run it a little later in the boot process.
1912 	 */
1913 	if (!selftests_can_run)
1914 		return save_selftest(type);
1915 
1916 	if (!tracing_is_on()) {
1917 		pr_warn("Selftest for tracer %s skipped due to tracing disabled\n",
1918 			type->name);
1919 		return 0;
1920 	}
1921 
1922 	/*
1923 	 * Run a selftest on this tracer.
1924 	 * Here we reset the trace buffer, and set the current
1925 	 * tracer to be this tracer. The tracer can then run some
1926 	 * internal tracing to verify that everything is in order.
1927 	 * If we fail, we do not register this tracer.
1928 	 */
1929 	tracing_reset_online_cpus(&tr->array_buffer);
1930 
1931 	tr->current_trace = type;
1932 
1933 #ifdef CONFIG_TRACER_MAX_TRACE
1934 	if (type->use_max_tr) {
1935 		/* If we expanded the buffers, make sure the max is expanded too */
1936 		if (ring_buffer_expanded)
1937 			ring_buffer_resize(tr->max_buffer.buffer, trace_buf_size,
1938 					   RING_BUFFER_ALL_CPUS);
1939 		tr->allocated_snapshot = true;
1940 	}
1941 #endif
1942 
1943 	/* the test is responsible for initializing and enabling */
1944 	pr_info("Testing tracer %s: ", type->name);
1945 	ret = type->selftest(type, tr);
1946 	/* the test is responsible for resetting too */
1947 	tr->current_trace = saved_tracer;
1948 	if (ret) {
1949 		printk(KERN_CONT "FAILED!\n");
1950 		/* Add the warning after printing 'FAILED' */
1951 		WARN_ON(1);
1952 		return -1;
1953 	}
1954 	/* Only reset on passing, to avoid touching corrupted buffers */
1955 	tracing_reset_online_cpus(&tr->array_buffer);
1956 
1957 #ifdef CONFIG_TRACER_MAX_TRACE
1958 	if (type->use_max_tr) {
1959 		tr->allocated_snapshot = false;
1960 
1961 		/* Shrink the max buffer again */
1962 		if (ring_buffer_expanded)
1963 			ring_buffer_resize(tr->max_buffer.buffer, 1,
1964 					   RING_BUFFER_ALL_CPUS);
1965 	}
1966 #endif
1967 
1968 	printk(KERN_CONT "PASSED\n");
1969 	return 0;
1970 }
1971 
1972 static __init int init_trace_selftests(void)
1973 {
1974 	struct trace_selftests *p, *n;
1975 	struct tracer *t, **last;
1976 	int ret;
1977 
1978 	selftests_can_run = true;
1979 
1980 	mutex_lock(&trace_types_lock);
1981 
1982 	if (list_empty(&postponed_selftests))
1983 		goto out;
1984 
1985 	pr_info("Running postponed tracer tests:\n");
1986 
1987 	tracing_selftest_running = true;
1988 	list_for_each_entry_safe(p, n, &postponed_selftests, list) {
1989 		/* This loop can take minutes when sanitizers are enabled, so
1990 		 * lets make sure we allow RCU processing.
1991 		 */
1992 		cond_resched();
1993 		ret = run_tracer_selftest(p->type);
1994 		/* If the test fails, then warn and remove from available_tracers */
1995 		if (ret < 0) {
1996 			WARN(1, "tracer: %s failed selftest, disabling\n",
1997 			     p->type->name);
1998 			last = &trace_types;
1999 			for (t = trace_types; t; t = t->next) {
2000 				if (t == p->type) {
2001 					*last = t->next;
2002 					break;
2003 				}
2004 				last = &t->next;
2005 			}
2006 		}
2007 		list_del(&p->list);
2008 		kfree(p);
2009 	}
2010 	tracing_selftest_running = false;
2011 
2012  out:
2013 	mutex_unlock(&trace_types_lock);
2014 
2015 	return 0;
2016 }
2017 core_initcall(init_trace_selftests);
2018 #else
2019 static inline int run_tracer_selftest(struct tracer *type)
2020 {
2021 	return 0;
2022 }
2023 #endif /* CONFIG_FTRACE_STARTUP_TEST */
2024 
2025 static void add_tracer_options(struct trace_array *tr, struct tracer *t);
2026 
2027 static void __init apply_trace_boot_options(void);
2028 
2029 /**
2030  * register_tracer - register a tracer with the ftrace system.
2031  * @type: the plugin for the tracer
2032  *
2033  * Register a new plugin tracer.
2034  */
2035 int __init register_tracer(struct tracer *type)
2036 {
2037 	struct tracer *t;
2038 	int ret = 0;
2039 
2040 	if (!type->name) {
2041 		pr_info("Tracer must have a name\n");
2042 		return -1;
2043 	}
2044 
2045 	if (strlen(type->name) >= MAX_TRACER_SIZE) {
2046 		pr_info("Tracer has a name longer than %d\n", MAX_TRACER_SIZE);
2047 		return -1;
2048 	}
2049 
2050 	if (security_locked_down(LOCKDOWN_TRACEFS)) {
2051 		pr_warn("Can not register tracer %s due to lockdown\n",
2052 			   type->name);
2053 		return -EPERM;
2054 	}
2055 
2056 	mutex_lock(&trace_types_lock);
2057 
2058 	tracing_selftest_running = true;
2059 
2060 	for (t = trace_types; t; t = t->next) {
2061 		if (strcmp(type->name, t->name) == 0) {
2062 			/* already found */
2063 			pr_info("Tracer %s already registered\n",
2064 				type->name);
2065 			ret = -1;
2066 			goto out;
2067 		}
2068 	}
2069 
2070 	if (!type->set_flag)
2071 		type->set_flag = &dummy_set_flag;
2072 	if (!type->flags) {
2073 		/*allocate a dummy tracer_flags*/
2074 		type->flags = kmalloc(sizeof(*type->flags), GFP_KERNEL);
2075 		if (!type->flags) {
2076 			ret = -ENOMEM;
2077 			goto out;
2078 		}
2079 		type->flags->val = 0;
2080 		type->flags->opts = dummy_tracer_opt;
2081 	} else
2082 		if (!type->flags->opts)
2083 			type->flags->opts = dummy_tracer_opt;
2084 
2085 	/* store the tracer for __set_tracer_option */
2086 	type->flags->trace = type;
2087 
2088 	ret = run_tracer_selftest(type);
2089 	if (ret < 0)
2090 		goto out;
2091 
2092 	type->next = trace_types;
2093 	trace_types = type;
2094 	add_tracer_options(&global_trace, type);
2095 
2096  out:
2097 	tracing_selftest_running = false;
2098 	mutex_unlock(&trace_types_lock);
2099 
2100 	if (ret || !default_bootup_tracer)
2101 		goto out_unlock;
2102 
2103 	if (strncmp(default_bootup_tracer, type->name, MAX_TRACER_SIZE))
2104 		goto out_unlock;
2105 
2106 	printk(KERN_INFO "Starting tracer '%s'\n", type->name);
2107 	/* Do we want this tracer to start on bootup? */
2108 	tracing_set_tracer(&global_trace, type->name);
2109 	default_bootup_tracer = NULL;
2110 
2111 	apply_trace_boot_options();
2112 
2113 	/* disable other selftests, since this will break it. */
2114 	disable_tracing_selftest("running a tracer");
2115 
2116  out_unlock:
2117 	return ret;
2118 }
2119 
2120 static void tracing_reset_cpu(struct array_buffer *buf, int cpu)
2121 {
2122 	struct trace_buffer *buffer = buf->buffer;
2123 
2124 	if (!buffer)
2125 		return;
2126 
2127 	ring_buffer_record_disable(buffer);
2128 
2129 	/* Make sure all commits have finished */
2130 	synchronize_rcu();
2131 	ring_buffer_reset_cpu(buffer, cpu);
2132 
2133 	ring_buffer_record_enable(buffer);
2134 }
2135 
2136 void tracing_reset_online_cpus(struct array_buffer *buf)
2137 {
2138 	struct trace_buffer *buffer = buf->buffer;
2139 
2140 	if (!buffer)
2141 		return;
2142 
2143 	ring_buffer_record_disable(buffer);
2144 
2145 	/* Make sure all commits have finished */
2146 	synchronize_rcu();
2147 
2148 	buf->time_start = buffer_ftrace_now(buf, buf->cpu);
2149 
2150 	ring_buffer_reset_online_cpus(buffer);
2151 
2152 	ring_buffer_record_enable(buffer);
2153 }
2154 
2155 /* Must have trace_types_lock held */
2156 void tracing_reset_all_online_cpus(void)
2157 {
2158 	struct trace_array *tr;
2159 
2160 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
2161 		if (!tr->clear_trace)
2162 			continue;
2163 		tr->clear_trace = false;
2164 		tracing_reset_online_cpus(&tr->array_buffer);
2165 #ifdef CONFIG_TRACER_MAX_TRACE
2166 		tracing_reset_online_cpus(&tr->max_buffer);
2167 #endif
2168 	}
2169 }
2170 
2171 /*
2172  * The tgid_map array maps from pid to tgid; i.e. the value stored at index i
2173  * is the tgid last observed corresponding to pid=i.
2174  */
2175 static int *tgid_map;
2176 
2177 /* The maximum valid index into tgid_map. */
2178 static size_t tgid_map_max;
2179 
2180 #define SAVED_CMDLINES_DEFAULT 128
2181 #define NO_CMDLINE_MAP UINT_MAX
2182 static arch_spinlock_t trace_cmdline_lock = __ARCH_SPIN_LOCK_UNLOCKED;
2183 struct saved_cmdlines_buffer {
2184 	unsigned map_pid_to_cmdline[PID_MAX_DEFAULT+1];
2185 	unsigned *map_cmdline_to_pid;
2186 	unsigned cmdline_num;
2187 	int cmdline_idx;
2188 	char *saved_cmdlines;
2189 };
2190 static struct saved_cmdlines_buffer *savedcmd;
2191 
2192 static inline char *get_saved_cmdlines(int idx)
2193 {
2194 	return &savedcmd->saved_cmdlines[idx * TASK_COMM_LEN];
2195 }
2196 
2197 static inline void set_cmdline(int idx, const char *cmdline)
2198 {
2199 	strncpy(get_saved_cmdlines(idx), cmdline, TASK_COMM_LEN);
2200 }
2201 
2202 static int allocate_cmdlines_buffer(unsigned int val,
2203 				    struct saved_cmdlines_buffer *s)
2204 {
2205 	s->map_cmdline_to_pid = kmalloc_array(val,
2206 					      sizeof(*s->map_cmdline_to_pid),
2207 					      GFP_KERNEL);
2208 	if (!s->map_cmdline_to_pid)
2209 		return -ENOMEM;
2210 
2211 	s->saved_cmdlines = kmalloc_array(TASK_COMM_LEN, val, GFP_KERNEL);
2212 	if (!s->saved_cmdlines) {
2213 		kfree(s->map_cmdline_to_pid);
2214 		return -ENOMEM;
2215 	}
2216 
2217 	s->cmdline_idx = 0;
2218 	s->cmdline_num = val;
2219 	memset(&s->map_pid_to_cmdline, NO_CMDLINE_MAP,
2220 	       sizeof(s->map_pid_to_cmdline));
2221 	memset(s->map_cmdline_to_pid, NO_CMDLINE_MAP,
2222 	       val * sizeof(*s->map_cmdline_to_pid));
2223 
2224 	return 0;
2225 }
2226 
2227 static int trace_create_savedcmd(void)
2228 {
2229 	int ret;
2230 
2231 	savedcmd = kmalloc(sizeof(*savedcmd), GFP_KERNEL);
2232 	if (!savedcmd)
2233 		return -ENOMEM;
2234 
2235 	ret = allocate_cmdlines_buffer(SAVED_CMDLINES_DEFAULT, savedcmd);
2236 	if (ret < 0) {
2237 		kfree(savedcmd);
2238 		savedcmd = NULL;
2239 		return -ENOMEM;
2240 	}
2241 
2242 	return 0;
2243 }
2244 
2245 int is_tracing_stopped(void)
2246 {
2247 	return global_trace.stop_count;
2248 }
2249 
2250 /**
2251  * tracing_start - quick start of the tracer
2252  *
2253  * If tracing is enabled but was stopped by tracing_stop,
2254  * this will start the tracer back up.
2255  */
2256 void tracing_start(void)
2257 {
2258 	struct trace_buffer *buffer;
2259 	unsigned long flags;
2260 
2261 	if (tracing_disabled)
2262 		return;
2263 
2264 	raw_spin_lock_irqsave(&global_trace.start_lock, flags);
2265 	if (--global_trace.stop_count) {
2266 		if (global_trace.stop_count < 0) {
2267 			/* Someone screwed up their debugging */
2268 			WARN_ON_ONCE(1);
2269 			global_trace.stop_count = 0;
2270 		}
2271 		goto out;
2272 	}
2273 
2274 	/* Prevent the buffers from switching */
2275 	arch_spin_lock(&global_trace.max_lock);
2276 
2277 	buffer = global_trace.array_buffer.buffer;
2278 	if (buffer)
2279 		ring_buffer_record_enable(buffer);
2280 
2281 #ifdef CONFIG_TRACER_MAX_TRACE
2282 	buffer = global_trace.max_buffer.buffer;
2283 	if (buffer)
2284 		ring_buffer_record_enable(buffer);
2285 #endif
2286 
2287 	arch_spin_unlock(&global_trace.max_lock);
2288 
2289  out:
2290 	raw_spin_unlock_irqrestore(&global_trace.start_lock, flags);
2291 }
2292 
2293 static void tracing_start_tr(struct trace_array *tr)
2294 {
2295 	struct trace_buffer *buffer;
2296 	unsigned long flags;
2297 
2298 	if (tracing_disabled)
2299 		return;
2300 
2301 	/* If global, we need to also start the max tracer */
2302 	if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
2303 		return tracing_start();
2304 
2305 	raw_spin_lock_irqsave(&tr->start_lock, flags);
2306 
2307 	if (--tr->stop_count) {
2308 		if (tr->stop_count < 0) {
2309 			/* Someone screwed up their debugging */
2310 			WARN_ON_ONCE(1);
2311 			tr->stop_count = 0;
2312 		}
2313 		goto out;
2314 	}
2315 
2316 	buffer = tr->array_buffer.buffer;
2317 	if (buffer)
2318 		ring_buffer_record_enable(buffer);
2319 
2320  out:
2321 	raw_spin_unlock_irqrestore(&tr->start_lock, flags);
2322 }
2323 
2324 /**
2325  * tracing_stop - quick stop of the tracer
2326  *
2327  * Light weight way to stop tracing. Use in conjunction with
2328  * tracing_start.
2329  */
2330 void tracing_stop(void)
2331 {
2332 	struct trace_buffer *buffer;
2333 	unsigned long flags;
2334 
2335 	raw_spin_lock_irqsave(&global_trace.start_lock, flags);
2336 	if (global_trace.stop_count++)
2337 		goto out;
2338 
2339 	/* Prevent the buffers from switching */
2340 	arch_spin_lock(&global_trace.max_lock);
2341 
2342 	buffer = global_trace.array_buffer.buffer;
2343 	if (buffer)
2344 		ring_buffer_record_disable(buffer);
2345 
2346 #ifdef CONFIG_TRACER_MAX_TRACE
2347 	buffer = global_trace.max_buffer.buffer;
2348 	if (buffer)
2349 		ring_buffer_record_disable(buffer);
2350 #endif
2351 
2352 	arch_spin_unlock(&global_trace.max_lock);
2353 
2354  out:
2355 	raw_spin_unlock_irqrestore(&global_trace.start_lock, flags);
2356 }
2357 
2358 static void tracing_stop_tr(struct trace_array *tr)
2359 {
2360 	struct trace_buffer *buffer;
2361 	unsigned long flags;
2362 
2363 	/* If global, we need to also stop the max tracer */
2364 	if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
2365 		return tracing_stop();
2366 
2367 	raw_spin_lock_irqsave(&tr->start_lock, flags);
2368 	if (tr->stop_count++)
2369 		goto out;
2370 
2371 	buffer = tr->array_buffer.buffer;
2372 	if (buffer)
2373 		ring_buffer_record_disable(buffer);
2374 
2375  out:
2376 	raw_spin_unlock_irqrestore(&tr->start_lock, flags);
2377 }
2378 
2379 static int trace_save_cmdline(struct task_struct *tsk)
2380 {
2381 	unsigned tpid, idx;
2382 
2383 	/* treat recording of idle task as a success */
2384 	if (!tsk->pid)
2385 		return 1;
2386 
2387 	tpid = tsk->pid & (PID_MAX_DEFAULT - 1);
2388 
2389 	/*
2390 	 * It's not the end of the world if we don't get
2391 	 * the lock, but we also don't want to spin
2392 	 * nor do we want to disable interrupts,
2393 	 * so if we miss here, then better luck next time.
2394 	 */
2395 	if (!arch_spin_trylock(&trace_cmdline_lock))
2396 		return 0;
2397 
2398 	idx = savedcmd->map_pid_to_cmdline[tpid];
2399 	if (idx == NO_CMDLINE_MAP) {
2400 		idx = (savedcmd->cmdline_idx + 1) % savedcmd->cmdline_num;
2401 
2402 		savedcmd->map_pid_to_cmdline[tpid] = idx;
2403 		savedcmd->cmdline_idx = idx;
2404 	}
2405 
2406 	savedcmd->map_cmdline_to_pid[idx] = tsk->pid;
2407 	set_cmdline(idx, tsk->comm);
2408 
2409 	arch_spin_unlock(&trace_cmdline_lock);
2410 
2411 	return 1;
2412 }
2413 
2414 static void __trace_find_cmdline(int pid, char comm[])
2415 {
2416 	unsigned map;
2417 	int tpid;
2418 
2419 	if (!pid) {
2420 		strcpy(comm, "<idle>");
2421 		return;
2422 	}
2423 
2424 	if (WARN_ON_ONCE(pid < 0)) {
2425 		strcpy(comm, "<XXX>");
2426 		return;
2427 	}
2428 
2429 	tpid = pid & (PID_MAX_DEFAULT - 1);
2430 	map = savedcmd->map_pid_to_cmdline[tpid];
2431 	if (map != NO_CMDLINE_MAP) {
2432 		tpid = savedcmd->map_cmdline_to_pid[map];
2433 		if (tpid == pid) {
2434 			strlcpy(comm, get_saved_cmdlines(map), TASK_COMM_LEN);
2435 			return;
2436 		}
2437 	}
2438 	strcpy(comm, "<...>");
2439 }
2440 
2441 void trace_find_cmdline(int pid, char comm[])
2442 {
2443 	preempt_disable();
2444 	arch_spin_lock(&trace_cmdline_lock);
2445 
2446 	__trace_find_cmdline(pid, comm);
2447 
2448 	arch_spin_unlock(&trace_cmdline_lock);
2449 	preempt_enable();
2450 }
2451 
2452 static int *trace_find_tgid_ptr(int pid)
2453 {
2454 	/*
2455 	 * Pairs with the smp_store_release in set_tracer_flag() to ensure that
2456 	 * if we observe a non-NULL tgid_map then we also observe the correct
2457 	 * tgid_map_max.
2458 	 */
2459 	int *map = smp_load_acquire(&tgid_map);
2460 
2461 	if (unlikely(!map || pid > tgid_map_max))
2462 		return NULL;
2463 
2464 	return &map[pid];
2465 }
2466 
2467 int trace_find_tgid(int pid)
2468 {
2469 	int *ptr = trace_find_tgid_ptr(pid);
2470 
2471 	return ptr ? *ptr : 0;
2472 }
2473 
2474 static int trace_save_tgid(struct task_struct *tsk)
2475 {
2476 	int *ptr;
2477 
2478 	/* treat recording of idle task as a success */
2479 	if (!tsk->pid)
2480 		return 1;
2481 
2482 	ptr = trace_find_tgid_ptr(tsk->pid);
2483 	if (!ptr)
2484 		return 0;
2485 
2486 	*ptr = tsk->tgid;
2487 	return 1;
2488 }
2489 
2490 static bool tracing_record_taskinfo_skip(int flags)
2491 {
2492 	if (unlikely(!(flags & (TRACE_RECORD_CMDLINE | TRACE_RECORD_TGID))))
2493 		return true;
2494 	if (!__this_cpu_read(trace_taskinfo_save))
2495 		return true;
2496 	return false;
2497 }
2498 
2499 /**
2500  * tracing_record_taskinfo - record the task info of a task
2501  *
2502  * @task:  task to record
2503  * @flags: TRACE_RECORD_CMDLINE for recording comm
2504  *         TRACE_RECORD_TGID for recording tgid
2505  */
2506 void tracing_record_taskinfo(struct task_struct *task, int flags)
2507 {
2508 	bool done;
2509 
2510 	if (tracing_record_taskinfo_skip(flags))
2511 		return;
2512 
2513 	/*
2514 	 * Record as much task information as possible. If some fail, continue
2515 	 * to try to record the others.
2516 	 */
2517 	done = !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(task);
2518 	done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(task);
2519 
2520 	/* If recording any information failed, retry again soon. */
2521 	if (!done)
2522 		return;
2523 
2524 	__this_cpu_write(trace_taskinfo_save, false);
2525 }
2526 
2527 /**
2528  * tracing_record_taskinfo_sched_switch - record task info for sched_switch
2529  *
2530  * @prev: previous task during sched_switch
2531  * @next: next task during sched_switch
2532  * @flags: TRACE_RECORD_CMDLINE for recording comm
2533  *         TRACE_RECORD_TGID for recording tgid
2534  */
2535 void tracing_record_taskinfo_sched_switch(struct task_struct *prev,
2536 					  struct task_struct *next, int flags)
2537 {
2538 	bool done;
2539 
2540 	if (tracing_record_taskinfo_skip(flags))
2541 		return;
2542 
2543 	/*
2544 	 * Record as much task information as possible. If some fail, continue
2545 	 * to try to record the others.
2546 	 */
2547 	done  = !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(prev);
2548 	done &= !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(next);
2549 	done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(prev);
2550 	done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(next);
2551 
2552 	/* If recording any information failed, retry again soon. */
2553 	if (!done)
2554 		return;
2555 
2556 	__this_cpu_write(trace_taskinfo_save, false);
2557 }
2558 
2559 /* Helpers to record a specific task information */
2560 void tracing_record_cmdline(struct task_struct *task)
2561 {
2562 	tracing_record_taskinfo(task, TRACE_RECORD_CMDLINE);
2563 }
2564 
2565 void tracing_record_tgid(struct task_struct *task)
2566 {
2567 	tracing_record_taskinfo(task, TRACE_RECORD_TGID);
2568 }
2569 
2570 /*
2571  * Several functions return TRACE_TYPE_PARTIAL_LINE if the trace_seq
2572  * overflowed, and TRACE_TYPE_HANDLED otherwise. This helper function
2573  * simplifies those functions and keeps them in sync.
2574  */
2575 enum print_line_t trace_handle_return(struct trace_seq *s)
2576 {
2577 	return trace_seq_has_overflowed(s) ?
2578 		TRACE_TYPE_PARTIAL_LINE : TRACE_TYPE_HANDLED;
2579 }
2580 EXPORT_SYMBOL_GPL(trace_handle_return);
2581 
2582 static unsigned short migration_disable_value(void)
2583 {
2584 #if defined(CONFIG_SMP)
2585 	return current->migration_disabled;
2586 #else
2587 	return 0;
2588 #endif
2589 }
2590 
2591 unsigned int tracing_gen_ctx_irq_test(unsigned int irqs_status)
2592 {
2593 	unsigned int trace_flags = irqs_status;
2594 	unsigned int pc;
2595 
2596 	pc = preempt_count();
2597 
2598 	if (pc & NMI_MASK)
2599 		trace_flags |= TRACE_FLAG_NMI;
2600 	if (pc & HARDIRQ_MASK)
2601 		trace_flags |= TRACE_FLAG_HARDIRQ;
2602 	if (in_serving_softirq())
2603 		trace_flags |= TRACE_FLAG_SOFTIRQ;
2604 
2605 	if (tif_need_resched())
2606 		trace_flags |= TRACE_FLAG_NEED_RESCHED;
2607 	if (test_preempt_need_resched())
2608 		trace_flags |= TRACE_FLAG_PREEMPT_RESCHED;
2609 	return (trace_flags << 16) | (min_t(unsigned int, pc & 0xff, 0xf)) |
2610 		(min_t(unsigned int, migration_disable_value(), 0xf)) << 4;
2611 }
2612 
2613 struct ring_buffer_event *
2614 trace_buffer_lock_reserve(struct trace_buffer *buffer,
2615 			  int type,
2616 			  unsigned long len,
2617 			  unsigned int trace_ctx)
2618 {
2619 	return __trace_buffer_lock_reserve(buffer, type, len, trace_ctx);
2620 }
2621 
2622 DEFINE_PER_CPU(struct ring_buffer_event *, trace_buffered_event);
2623 DEFINE_PER_CPU(int, trace_buffered_event_cnt);
2624 static int trace_buffered_event_ref;
2625 
2626 /**
2627  * trace_buffered_event_enable - enable buffering events
2628  *
2629  * When events are being filtered, it is quicker to use a temporary
2630  * buffer to write the event data into if there's a likely chance
2631  * that it will not be committed. The discard of the ring buffer
2632  * is not as fast as committing, and is much slower than copying
2633  * a commit.
2634  *
2635  * When an event is to be filtered, allocate per cpu buffers to
2636  * write the event data into, and if the event is filtered and discarded
2637  * it is simply dropped, otherwise, the entire data is to be committed
2638  * in one shot.
2639  */
2640 void trace_buffered_event_enable(void)
2641 {
2642 	struct ring_buffer_event *event;
2643 	struct page *page;
2644 	int cpu;
2645 
2646 	WARN_ON_ONCE(!mutex_is_locked(&event_mutex));
2647 
2648 	if (trace_buffered_event_ref++)
2649 		return;
2650 
2651 	for_each_tracing_cpu(cpu) {
2652 		page = alloc_pages_node(cpu_to_node(cpu),
2653 					GFP_KERNEL | __GFP_NORETRY, 0);
2654 		if (!page)
2655 			goto failed;
2656 
2657 		event = page_address(page);
2658 		memset(event, 0, sizeof(*event));
2659 
2660 		per_cpu(trace_buffered_event, cpu) = event;
2661 
2662 		preempt_disable();
2663 		if (cpu == smp_processor_id() &&
2664 		    __this_cpu_read(trace_buffered_event) !=
2665 		    per_cpu(trace_buffered_event, cpu))
2666 			WARN_ON_ONCE(1);
2667 		preempt_enable();
2668 	}
2669 
2670 	return;
2671  failed:
2672 	trace_buffered_event_disable();
2673 }
2674 
2675 static void enable_trace_buffered_event(void *data)
2676 {
2677 	/* Probably not needed, but do it anyway */
2678 	smp_rmb();
2679 	this_cpu_dec(trace_buffered_event_cnt);
2680 }
2681 
2682 static void disable_trace_buffered_event(void *data)
2683 {
2684 	this_cpu_inc(trace_buffered_event_cnt);
2685 }
2686 
2687 /**
2688  * trace_buffered_event_disable - disable buffering events
2689  *
2690  * When a filter is removed, it is faster to not use the buffered
2691  * events, and to commit directly into the ring buffer. Free up
2692  * the temp buffers when there are no more users. This requires
2693  * special synchronization with current events.
2694  */
2695 void trace_buffered_event_disable(void)
2696 {
2697 	int cpu;
2698 
2699 	WARN_ON_ONCE(!mutex_is_locked(&event_mutex));
2700 
2701 	if (WARN_ON_ONCE(!trace_buffered_event_ref))
2702 		return;
2703 
2704 	if (--trace_buffered_event_ref)
2705 		return;
2706 
2707 	preempt_disable();
2708 	/* For each CPU, set the buffer as used. */
2709 	smp_call_function_many(tracing_buffer_mask,
2710 			       disable_trace_buffered_event, NULL, 1);
2711 	preempt_enable();
2712 
2713 	/* Wait for all current users to finish */
2714 	synchronize_rcu();
2715 
2716 	for_each_tracing_cpu(cpu) {
2717 		free_page((unsigned long)per_cpu(trace_buffered_event, cpu));
2718 		per_cpu(trace_buffered_event, cpu) = NULL;
2719 	}
2720 	/*
2721 	 * Make sure trace_buffered_event is NULL before clearing
2722 	 * trace_buffered_event_cnt.
2723 	 */
2724 	smp_wmb();
2725 
2726 	preempt_disable();
2727 	/* Do the work on each cpu */
2728 	smp_call_function_many(tracing_buffer_mask,
2729 			       enable_trace_buffered_event, NULL, 1);
2730 	preempt_enable();
2731 }
2732 
2733 static struct trace_buffer *temp_buffer;
2734 
2735 struct ring_buffer_event *
2736 trace_event_buffer_lock_reserve(struct trace_buffer **current_rb,
2737 			  struct trace_event_file *trace_file,
2738 			  int type, unsigned long len,
2739 			  unsigned int trace_ctx)
2740 {
2741 	struct ring_buffer_event *entry;
2742 	struct trace_array *tr = trace_file->tr;
2743 	int val;
2744 
2745 	*current_rb = tr->array_buffer.buffer;
2746 
2747 	if (!tr->no_filter_buffering_ref &&
2748 	    (trace_file->flags & (EVENT_FILE_FL_SOFT_DISABLED | EVENT_FILE_FL_FILTERED)) &&
2749 	    (entry = this_cpu_read(trace_buffered_event))) {
2750 		/*
2751 		 * Filtering is on, so try to use the per cpu buffer first.
2752 		 * This buffer will simulate a ring_buffer_event,
2753 		 * where the type_len is zero and the array[0] will
2754 		 * hold the full length.
2755 		 * (see include/linux/ring-buffer.h for details on
2756 		 *  how the ring_buffer_event is structured).
2757 		 *
2758 		 * Using a temp buffer during filtering and copying it
2759 		 * on a matched filter is quicker than writing directly
2760 		 * into the ring buffer and then discarding it when
2761 		 * it doesn't match. That is because the discard
2762 		 * requires several atomic operations to get right.
2763 		 * Copying on match and doing nothing on a failed match
2764 		 * is still quicker than no copy on match, but having
2765 		 * to discard out of the ring buffer on a failed match.
2766 		 */
2767 		int max_len = PAGE_SIZE - struct_size(entry, array, 1);
2768 
2769 		val = this_cpu_inc_return(trace_buffered_event_cnt);
2770 
2771 		/*
2772 		 * Preemption is disabled, but interrupts and NMIs
2773 		 * can still come in now. If that happens after
2774 		 * the above increment, then it will have to go
2775 		 * back to the old method of allocating the event
2776 		 * on the ring buffer, and if the filter fails, it
2777 		 * will have to call ring_buffer_discard_commit()
2778 		 * to remove it.
2779 		 *
2780 		 * Need to also check the unlikely case that the
2781 		 * length is bigger than the temp buffer size.
2782 		 * If that happens, then the reserve is pretty much
2783 		 * guaranteed to fail, as the ring buffer currently
2784 		 * only allows events less than a page. But that may
2785 		 * change in the future, so let the ring buffer reserve
2786 		 * handle the failure in that case.
2787 		 */
2788 		if (val == 1 && likely(len <= max_len)) {
2789 			trace_event_setup(entry, type, trace_ctx);
2790 			entry->array[0] = len;
2791 			return entry;
2792 		}
2793 		this_cpu_dec(trace_buffered_event_cnt);
2794 	}
2795 
2796 	entry = __trace_buffer_lock_reserve(*current_rb, type, len,
2797 					    trace_ctx);
2798 	/*
2799 	 * If tracing is off, but we have triggers enabled
2800 	 * we still need to look at the event data. Use the temp_buffer
2801 	 * to store the trace event for the trigger to use. It's recursive
2802 	 * safe and will not be recorded anywhere.
2803 	 */
2804 	if (!entry && trace_file->flags & EVENT_FILE_FL_TRIGGER_COND) {
2805 		*current_rb = temp_buffer;
2806 		entry = __trace_buffer_lock_reserve(*current_rb, type, len,
2807 						    trace_ctx);
2808 	}
2809 	return entry;
2810 }
2811 EXPORT_SYMBOL_GPL(trace_event_buffer_lock_reserve);
2812 
2813 static DEFINE_SPINLOCK(tracepoint_iter_lock);
2814 static DEFINE_MUTEX(tracepoint_printk_mutex);
2815 
2816 static void output_printk(struct trace_event_buffer *fbuffer)
2817 {
2818 	struct trace_event_call *event_call;
2819 	struct trace_event_file *file;
2820 	struct trace_event *event;
2821 	unsigned long flags;
2822 	struct trace_iterator *iter = tracepoint_print_iter;
2823 
2824 	/* We should never get here if iter is NULL */
2825 	if (WARN_ON_ONCE(!iter))
2826 		return;
2827 
2828 	event_call = fbuffer->trace_file->event_call;
2829 	if (!event_call || !event_call->event.funcs ||
2830 	    !event_call->event.funcs->trace)
2831 		return;
2832 
2833 	file = fbuffer->trace_file;
2834 	if (test_bit(EVENT_FILE_FL_SOFT_DISABLED_BIT, &file->flags) ||
2835 	    (unlikely(file->flags & EVENT_FILE_FL_FILTERED) &&
2836 	     !filter_match_preds(file->filter, fbuffer->entry)))
2837 		return;
2838 
2839 	event = &fbuffer->trace_file->event_call->event;
2840 
2841 	spin_lock_irqsave(&tracepoint_iter_lock, flags);
2842 	trace_seq_init(&iter->seq);
2843 	iter->ent = fbuffer->entry;
2844 	event_call->event.funcs->trace(iter, 0, event);
2845 	trace_seq_putc(&iter->seq, 0);
2846 	printk("%s", iter->seq.buffer);
2847 
2848 	spin_unlock_irqrestore(&tracepoint_iter_lock, flags);
2849 }
2850 
2851 int tracepoint_printk_sysctl(struct ctl_table *table, int write,
2852 			     void *buffer, size_t *lenp,
2853 			     loff_t *ppos)
2854 {
2855 	int save_tracepoint_printk;
2856 	int ret;
2857 
2858 	mutex_lock(&tracepoint_printk_mutex);
2859 	save_tracepoint_printk = tracepoint_printk;
2860 
2861 	ret = proc_dointvec(table, write, buffer, lenp, ppos);
2862 
2863 	/*
2864 	 * This will force exiting early, as tracepoint_printk
2865 	 * is always zero when tracepoint_printk_iter is not allocated
2866 	 */
2867 	if (!tracepoint_print_iter)
2868 		tracepoint_printk = 0;
2869 
2870 	if (save_tracepoint_printk == tracepoint_printk)
2871 		goto out;
2872 
2873 	if (tracepoint_printk)
2874 		static_key_enable(&tracepoint_printk_key.key);
2875 	else
2876 		static_key_disable(&tracepoint_printk_key.key);
2877 
2878  out:
2879 	mutex_unlock(&tracepoint_printk_mutex);
2880 
2881 	return ret;
2882 }
2883 
2884 void trace_event_buffer_commit(struct trace_event_buffer *fbuffer)
2885 {
2886 	enum event_trigger_type tt = ETT_NONE;
2887 	struct trace_event_file *file = fbuffer->trace_file;
2888 
2889 	if (__event_trigger_test_discard(file, fbuffer->buffer, fbuffer->event,
2890 			fbuffer->entry, &tt))
2891 		goto discard;
2892 
2893 	if (static_key_false(&tracepoint_printk_key.key))
2894 		output_printk(fbuffer);
2895 
2896 	if (static_branch_unlikely(&trace_event_exports_enabled))
2897 		ftrace_exports(fbuffer->event, TRACE_EXPORT_EVENT);
2898 
2899 	trace_buffer_unlock_commit_regs(file->tr, fbuffer->buffer,
2900 			fbuffer->event, fbuffer->trace_ctx, fbuffer->regs);
2901 
2902 discard:
2903 	if (tt)
2904 		event_triggers_post_call(file, tt);
2905 
2906 }
2907 EXPORT_SYMBOL_GPL(trace_event_buffer_commit);
2908 
2909 /*
2910  * Skip 3:
2911  *
2912  *   trace_buffer_unlock_commit_regs()
2913  *   trace_event_buffer_commit()
2914  *   trace_event_raw_event_xxx()
2915  */
2916 # define STACK_SKIP 3
2917 
2918 void trace_buffer_unlock_commit_regs(struct trace_array *tr,
2919 				     struct trace_buffer *buffer,
2920 				     struct ring_buffer_event *event,
2921 				     unsigned int trace_ctx,
2922 				     struct pt_regs *regs)
2923 {
2924 	__buffer_unlock_commit(buffer, event);
2925 
2926 	/*
2927 	 * If regs is not set, then skip the necessary functions.
2928 	 * Note, we can still get here via blktrace, wakeup tracer
2929 	 * and mmiotrace, but that's ok if they lose a function or
2930 	 * two. They are not that meaningful.
2931 	 */
2932 	ftrace_trace_stack(tr, buffer, trace_ctx, regs ? 0 : STACK_SKIP, regs);
2933 	ftrace_trace_userstack(tr, buffer, trace_ctx);
2934 }
2935 
2936 /*
2937  * Similar to trace_buffer_unlock_commit_regs() but do not dump stack.
2938  */
2939 void
2940 trace_buffer_unlock_commit_nostack(struct trace_buffer *buffer,
2941 				   struct ring_buffer_event *event)
2942 {
2943 	__buffer_unlock_commit(buffer, event);
2944 }
2945 
2946 void
2947 trace_function(struct trace_array *tr, unsigned long ip, unsigned long
2948 	       parent_ip, unsigned int trace_ctx)
2949 {
2950 	struct trace_event_call *call = &event_function;
2951 	struct trace_buffer *buffer = tr->array_buffer.buffer;
2952 	struct ring_buffer_event *event;
2953 	struct ftrace_entry *entry;
2954 
2955 	event = __trace_buffer_lock_reserve(buffer, TRACE_FN, sizeof(*entry),
2956 					    trace_ctx);
2957 	if (!event)
2958 		return;
2959 	entry	= ring_buffer_event_data(event);
2960 	entry->ip			= ip;
2961 	entry->parent_ip		= parent_ip;
2962 
2963 	if (!call_filter_check_discard(call, entry, buffer, event)) {
2964 		if (static_branch_unlikely(&trace_function_exports_enabled))
2965 			ftrace_exports(event, TRACE_EXPORT_FUNCTION);
2966 		__buffer_unlock_commit(buffer, event);
2967 	}
2968 }
2969 
2970 #ifdef CONFIG_STACKTRACE
2971 
2972 /* Allow 4 levels of nesting: normal, softirq, irq, NMI */
2973 #define FTRACE_KSTACK_NESTING	4
2974 
2975 #define FTRACE_KSTACK_ENTRIES	(PAGE_SIZE / FTRACE_KSTACK_NESTING)
2976 
2977 struct ftrace_stack {
2978 	unsigned long		calls[FTRACE_KSTACK_ENTRIES];
2979 };
2980 
2981 
2982 struct ftrace_stacks {
2983 	struct ftrace_stack	stacks[FTRACE_KSTACK_NESTING];
2984 };
2985 
2986 static DEFINE_PER_CPU(struct ftrace_stacks, ftrace_stacks);
2987 static DEFINE_PER_CPU(int, ftrace_stack_reserve);
2988 
2989 static void __ftrace_trace_stack(struct trace_buffer *buffer,
2990 				 unsigned int trace_ctx,
2991 				 int skip, struct pt_regs *regs)
2992 {
2993 	struct trace_event_call *call = &event_kernel_stack;
2994 	struct ring_buffer_event *event;
2995 	unsigned int size, nr_entries;
2996 	struct ftrace_stack *fstack;
2997 	struct stack_entry *entry;
2998 	int stackidx;
2999 
3000 	/*
3001 	 * Add one, for this function and the call to save_stack_trace()
3002 	 * If regs is set, then these functions will not be in the way.
3003 	 */
3004 #ifndef CONFIG_UNWINDER_ORC
3005 	if (!regs)
3006 		skip++;
3007 #endif
3008 
3009 	preempt_disable_notrace();
3010 
3011 	stackidx = __this_cpu_inc_return(ftrace_stack_reserve) - 1;
3012 
3013 	/* This should never happen. If it does, yell once and skip */
3014 	if (WARN_ON_ONCE(stackidx >= FTRACE_KSTACK_NESTING))
3015 		goto out;
3016 
3017 	/*
3018 	 * The above __this_cpu_inc_return() is 'atomic' cpu local. An
3019 	 * interrupt will either see the value pre increment or post
3020 	 * increment. If the interrupt happens pre increment it will have
3021 	 * restored the counter when it returns.  We just need a barrier to
3022 	 * keep gcc from moving things around.
3023 	 */
3024 	barrier();
3025 
3026 	fstack = this_cpu_ptr(ftrace_stacks.stacks) + stackidx;
3027 	size = ARRAY_SIZE(fstack->calls);
3028 
3029 	if (regs) {
3030 		nr_entries = stack_trace_save_regs(regs, fstack->calls,
3031 						   size, skip);
3032 	} else {
3033 		nr_entries = stack_trace_save(fstack->calls, size, skip);
3034 	}
3035 
3036 	size = nr_entries * sizeof(unsigned long);
3037 	event = __trace_buffer_lock_reserve(buffer, TRACE_STACK,
3038 				    (sizeof(*entry) - sizeof(entry->caller)) + size,
3039 				    trace_ctx);
3040 	if (!event)
3041 		goto out;
3042 	entry = ring_buffer_event_data(event);
3043 
3044 	memcpy(&entry->caller, fstack->calls, size);
3045 	entry->size = nr_entries;
3046 
3047 	if (!call_filter_check_discard(call, entry, buffer, event))
3048 		__buffer_unlock_commit(buffer, event);
3049 
3050  out:
3051 	/* Again, don't let gcc optimize things here */
3052 	barrier();
3053 	__this_cpu_dec(ftrace_stack_reserve);
3054 	preempt_enable_notrace();
3055 
3056 }
3057 
3058 static inline void ftrace_trace_stack(struct trace_array *tr,
3059 				      struct trace_buffer *buffer,
3060 				      unsigned int trace_ctx,
3061 				      int skip, struct pt_regs *regs)
3062 {
3063 	if (!(tr->trace_flags & TRACE_ITER_STACKTRACE))
3064 		return;
3065 
3066 	__ftrace_trace_stack(buffer, trace_ctx, skip, regs);
3067 }
3068 
3069 void __trace_stack(struct trace_array *tr, unsigned int trace_ctx,
3070 		   int skip)
3071 {
3072 	struct trace_buffer *buffer = tr->array_buffer.buffer;
3073 
3074 	if (rcu_is_watching()) {
3075 		__ftrace_trace_stack(buffer, trace_ctx, skip, NULL);
3076 		return;
3077 	}
3078 
3079 	/*
3080 	 * When an NMI triggers, RCU is enabled via rcu_nmi_enter(),
3081 	 * but if the above rcu_is_watching() failed, then the NMI
3082 	 * triggered someplace critical, and rcu_irq_enter() should
3083 	 * not be called from NMI.
3084 	 */
3085 	if (unlikely(in_nmi()))
3086 		return;
3087 
3088 	rcu_irq_enter_irqson();
3089 	__ftrace_trace_stack(buffer, trace_ctx, skip, NULL);
3090 	rcu_irq_exit_irqson();
3091 }
3092 
3093 /**
3094  * trace_dump_stack - record a stack back trace in the trace buffer
3095  * @skip: Number of functions to skip (helper handlers)
3096  */
3097 void trace_dump_stack(int skip)
3098 {
3099 	if (tracing_disabled || tracing_selftest_running)
3100 		return;
3101 
3102 #ifndef CONFIG_UNWINDER_ORC
3103 	/* Skip 1 to skip this function. */
3104 	skip++;
3105 #endif
3106 	__ftrace_trace_stack(global_trace.array_buffer.buffer,
3107 			     tracing_gen_ctx(), skip, NULL);
3108 }
3109 EXPORT_SYMBOL_GPL(trace_dump_stack);
3110 
3111 #ifdef CONFIG_USER_STACKTRACE_SUPPORT
3112 static DEFINE_PER_CPU(int, user_stack_count);
3113 
3114 static void
3115 ftrace_trace_userstack(struct trace_array *tr,
3116 		       struct trace_buffer *buffer, unsigned int trace_ctx)
3117 {
3118 	struct trace_event_call *call = &event_user_stack;
3119 	struct ring_buffer_event *event;
3120 	struct userstack_entry *entry;
3121 
3122 	if (!(tr->trace_flags & TRACE_ITER_USERSTACKTRACE))
3123 		return;
3124 
3125 	/*
3126 	 * NMIs can not handle page faults, even with fix ups.
3127 	 * The save user stack can (and often does) fault.
3128 	 */
3129 	if (unlikely(in_nmi()))
3130 		return;
3131 
3132 	/*
3133 	 * prevent recursion, since the user stack tracing may
3134 	 * trigger other kernel events.
3135 	 */
3136 	preempt_disable();
3137 	if (__this_cpu_read(user_stack_count))
3138 		goto out;
3139 
3140 	__this_cpu_inc(user_stack_count);
3141 
3142 	event = __trace_buffer_lock_reserve(buffer, TRACE_USER_STACK,
3143 					    sizeof(*entry), trace_ctx);
3144 	if (!event)
3145 		goto out_drop_count;
3146 	entry	= ring_buffer_event_data(event);
3147 
3148 	entry->tgid		= current->tgid;
3149 	memset(&entry->caller, 0, sizeof(entry->caller));
3150 
3151 	stack_trace_save_user(entry->caller, FTRACE_STACK_ENTRIES);
3152 	if (!call_filter_check_discard(call, entry, buffer, event))
3153 		__buffer_unlock_commit(buffer, event);
3154 
3155  out_drop_count:
3156 	__this_cpu_dec(user_stack_count);
3157  out:
3158 	preempt_enable();
3159 }
3160 #else /* CONFIG_USER_STACKTRACE_SUPPORT */
3161 static void ftrace_trace_userstack(struct trace_array *tr,
3162 				   struct trace_buffer *buffer,
3163 				   unsigned int trace_ctx)
3164 {
3165 }
3166 #endif /* !CONFIG_USER_STACKTRACE_SUPPORT */
3167 
3168 #endif /* CONFIG_STACKTRACE */
3169 
3170 static inline void
3171 func_repeats_set_delta_ts(struct func_repeats_entry *entry,
3172 			  unsigned long long delta)
3173 {
3174 	entry->bottom_delta_ts = delta & U32_MAX;
3175 	entry->top_delta_ts = (delta >> 32);
3176 }
3177 
3178 void trace_last_func_repeats(struct trace_array *tr,
3179 			     struct trace_func_repeats *last_info,
3180 			     unsigned int trace_ctx)
3181 {
3182 	struct trace_buffer *buffer = tr->array_buffer.buffer;
3183 	struct func_repeats_entry *entry;
3184 	struct ring_buffer_event *event;
3185 	u64 delta;
3186 
3187 	event = __trace_buffer_lock_reserve(buffer, TRACE_FUNC_REPEATS,
3188 					    sizeof(*entry), trace_ctx);
3189 	if (!event)
3190 		return;
3191 
3192 	delta = ring_buffer_event_time_stamp(buffer, event) -
3193 		last_info->ts_last_call;
3194 
3195 	entry = ring_buffer_event_data(event);
3196 	entry->ip = last_info->ip;
3197 	entry->parent_ip = last_info->parent_ip;
3198 	entry->count = last_info->count;
3199 	func_repeats_set_delta_ts(entry, delta);
3200 
3201 	__buffer_unlock_commit(buffer, event);
3202 }
3203 
3204 /* created for use with alloc_percpu */
3205 struct trace_buffer_struct {
3206 	int nesting;
3207 	char buffer[4][TRACE_BUF_SIZE];
3208 };
3209 
3210 static struct trace_buffer_struct *trace_percpu_buffer;
3211 
3212 /*
3213  * This allows for lockless recording.  If we're nested too deeply, then
3214  * this returns NULL.
3215  */
3216 static char *get_trace_buf(void)
3217 {
3218 	struct trace_buffer_struct *buffer = this_cpu_ptr(trace_percpu_buffer);
3219 
3220 	if (!buffer || buffer->nesting >= 4)
3221 		return NULL;
3222 
3223 	buffer->nesting++;
3224 
3225 	/* Interrupts must see nesting incremented before we use the buffer */
3226 	barrier();
3227 	return &buffer->buffer[buffer->nesting - 1][0];
3228 }
3229 
3230 static void put_trace_buf(void)
3231 {
3232 	/* Don't let the decrement of nesting leak before this */
3233 	barrier();
3234 	this_cpu_dec(trace_percpu_buffer->nesting);
3235 }
3236 
3237 static int alloc_percpu_trace_buffer(void)
3238 {
3239 	struct trace_buffer_struct *buffers;
3240 
3241 	if (trace_percpu_buffer)
3242 		return 0;
3243 
3244 	buffers = alloc_percpu(struct trace_buffer_struct);
3245 	if (MEM_FAIL(!buffers, "Could not allocate percpu trace_printk buffer"))
3246 		return -ENOMEM;
3247 
3248 	trace_percpu_buffer = buffers;
3249 	return 0;
3250 }
3251 
3252 static int buffers_allocated;
3253 
3254 void trace_printk_init_buffers(void)
3255 {
3256 	if (buffers_allocated)
3257 		return;
3258 
3259 	if (alloc_percpu_trace_buffer())
3260 		return;
3261 
3262 	/* trace_printk() is for debug use only. Don't use it in production. */
3263 
3264 	pr_warn("\n");
3265 	pr_warn("**********************************************************\n");
3266 	pr_warn("**   NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE   **\n");
3267 	pr_warn("**                                                      **\n");
3268 	pr_warn("** trace_printk() being used. Allocating extra memory.  **\n");
3269 	pr_warn("**                                                      **\n");
3270 	pr_warn("** This means that this is a DEBUG kernel and it is     **\n");
3271 	pr_warn("** unsafe for production use.                           **\n");
3272 	pr_warn("**                                                      **\n");
3273 	pr_warn("** If you see this message and you are not debugging    **\n");
3274 	pr_warn("** the kernel, report this immediately to your vendor!  **\n");
3275 	pr_warn("**                                                      **\n");
3276 	pr_warn("**   NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE   **\n");
3277 	pr_warn("**********************************************************\n");
3278 
3279 	/* Expand the buffers to set size */
3280 	tracing_update_buffers();
3281 
3282 	buffers_allocated = 1;
3283 
3284 	/*
3285 	 * trace_printk_init_buffers() can be called by modules.
3286 	 * If that happens, then we need to start cmdline recording
3287 	 * directly here. If the global_trace.buffer is already
3288 	 * allocated here, then this was called by module code.
3289 	 */
3290 	if (global_trace.array_buffer.buffer)
3291 		tracing_start_cmdline_record();
3292 }
3293 EXPORT_SYMBOL_GPL(trace_printk_init_buffers);
3294 
3295 void trace_printk_start_comm(void)
3296 {
3297 	/* Start tracing comms if trace printk is set */
3298 	if (!buffers_allocated)
3299 		return;
3300 	tracing_start_cmdline_record();
3301 }
3302 
3303 static void trace_printk_start_stop_comm(int enabled)
3304 {
3305 	if (!buffers_allocated)
3306 		return;
3307 
3308 	if (enabled)
3309 		tracing_start_cmdline_record();
3310 	else
3311 		tracing_stop_cmdline_record();
3312 }
3313 
3314 /**
3315  * trace_vbprintk - write binary msg to tracing buffer
3316  * @ip:    The address of the caller
3317  * @fmt:   The string format to write to the buffer
3318  * @args:  Arguments for @fmt
3319  */
3320 int trace_vbprintk(unsigned long ip, const char *fmt, va_list args)
3321 {
3322 	struct trace_event_call *call = &event_bprint;
3323 	struct ring_buffer_event *event;
3324 	struct trace_buffer *buffer;
3325 	struct trace_array *tr = &global_trace;
3326 	struct bprint_entry *entry;
3327 	unsigned int trace_ctx;
3328 	char *tbuffer;
3329 	int len = 0, size;
3330 
3331 	if (unlikely(tracing_selftest_running || tracing_disabled))
3332 		return 0;
3333 
3334 	/* Don't pollute graph traces with trace_vprintk internals */
3335 	pause_graph_tracing();
3336 
3337 	trace_ctx = tracing_gen_ctx();
3338 	preempt_disable_notrace();
3339 
3340 	tbuffer = get_trace_buf();
3341 	if (!tbuffer) {
3342 		len = 0;
3343 		goto out_nobuffer;
3344 	}
3345 
3346 	len = vbin_printf((u32 *)tbuffer, TRACE_BUF_SIZE/sizeof(int), fmt, args);
3347 
3348 	if (len > TRACE_BUF_SIZE/sizeof(int) || len < 0)
3349 		goto out_put;
3350 
3351 	size = sizeof(*entry) + sizeof(u32) * len;
3352 	buffer = tr->array_buffer.buffer;
3353 	ring_buffer_nest_start(buffer);
3354 	event = __trace_buffer_lock_reserve(buffer, TRACE_BPRINT, size,
3355 					    trace_ctx);
3356 	if (!event)
3357 		goto out;
3358 	entry = ring_buffer_event_data(event);
3359 	entry->ip			= ip;
3360 	entry->fmt			= fmt;
3361 
3362 	memcpy(entry->buf, tbuffer, sizeof(u32) * len);
3363 	if (!call_filter_check_discard(call, entry, buffer, event)) {
3364 		__buffer_unlock_commit(buffer, event);
3365 		ftrace_trace_stack(tr, buffer, trace_ctx, 6, NULL);
3366 	}
3367 
3368 out:
3369 	ring_buffer_nest_end(buffer);
3370 out_put:
3371 	put_trace_buf();
3372 
3373 out_nobuffer:
3374 	preempt_enable_notrace();
3375 	unpause_graph_tracing();
3376 
3377 	return len;
3378 }
3379 EXPORT_SYMBOL_GPL(trace_vbprintk);
3380 
3381 __printf(3, 0)
3382 static int
3383 __trace_array_vprintk(struct trace_buffer *buffer,
3384 		      unsigned long ip, const char *fmt, va_list args)
3385 {
3386 	struct trace_event_call *call = &event_print;
3387 	struct ring_buffer_event *event;
3388 	int len = 0, size;
3389 	struct print_entry *entry;
3390 	unsigned int trace_ctx;
3391 	char *tbuffer;
3392 
3393 	if (tracing_disabled || tracing_selftest_running)
3394 		return 0;
3395 
3396 	/* Don't pollute graph traces with trace_vprintk internals */
3397 	pause_graph_tracing();
3398 
3399 	trace_ctx = tracing_gen_ctx();
3400 	preempt_disable_notrace();
3401 
3402 
3403 	tbuffer = get_trace_buf();
3404 	if (!tbuffer) {
3405 		len = 0;
3406 		goto out_nobuffer;
3407 	}
3408 
3409 	len = vscnprintf(tbuffer, TRACE_BUF_SIZE, fmt, args);
3410 
3411 	size = sizeof(*entry) + len + 1;
3412 	ring_buffer_nest_start(buffer);
3413 	event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
3414 					    trace_ctx);
3415 	if (!event)
3416 		goto out;
3417 	entry = ring_buffer_event_data(event);
3418 	entry->ip = ip;
3419 
3420 	memcpy(&entry->buf, tbuffer, len + 1);
3421 	if (!call_filter_check_discard(call, entry, buffer, event)) {
3422 		__buffer_unlock_commit(buffer, event);
3423 		ftrace_trace_stack(&global_trace, buffer, trace_ctx, 6, NULL);
3424 	}
3425 
3426 out:
3427 	ring_buffer_nest_end(buffer);
3428 	put_trace_buf();
3429 
3430 out_nobuffer:
3431 	preempt_enable_notrace();
3432 	unpause_graph_tracing();
3433 
3434 	return len;
3435 }
3436 
3437 __printf(3, 0)
3438 int trace_array_vprintk(struct trace_array *tr,
3439 			unsigned long ip, const char *fmt, va_list args)
3440 {
3441 	return __trace_array_vprintk(tr->array_buffer.buffer, ip, fmt, args);
3442 }
3443 
3444 /**
3445  * trace_array_printk - Print a message to a specific instance
3446  * @tr: The instance trace_array descriptor
3447  * @ip: The instruction pointer that this is called from.
3448  * @fmt: The format to print (printf format)
3449  *
3450  * If a subsystem sets up its own instance, they have the right to
3451  * printk strings into their tracing instance buffer using this
3452  * function. Note, this function will not write into the top level
3453  * buffer (use trace_printk() for that), as writing into the top level
3454  * buffer should only have events that can be individually disabled.
3455  * trace_printk() is only used for debugging a kernel, and should not
3456  * be ever incorporated in normal use.
3457  *
3458  * trace_array_printk() can be used, as it will not add noise to the
3459  * top level tracing buffer.
3460  *
3461  * Note, trace_array_init_printk() must be called on @tr before this
3462  * can be used.
3463  */
3464 __printf(3, 0)
3465 int trace_array_printk(struct trace_array *tr,
3466 		       unsigned long ip, const char *fmt, ...)
3467 {
3468 	int ret;
3469 	va_list ap;
3470 
3471 	if (!tr)
3472 		return -ENOENT;
3473 
3474 	/* This is only allowed for created instances */
3475 	if (tr == &global_trace)
3476 		return 0;
3477 
3478 	if (!(tr->trace_flags & TRACE_ITER_PRINTK))
3479 		return 0;
3480 
3481 	va_start(ap, fmt);
3482 	ret = trace_array_vprintk(tr, ip, fmt, ap);
3483 	va_end(ap);
3484 	return ret;
3485 }
3486 EXPORT_SYMBOL_GPL(trace_array_printk);
3487 
3488 /**
3489  * trace_array_init_printk - Initialize buffers for trace_array_printk()
3490  * @tr: The trace array to initialize the buffers for
3491  *
3492  * As trace_array_printk() only writes into instances, they are OK to
3493  * have in the kernel (unlike trace_printk()). This needs to be called
3494  * before trace_array_printk() can be used on a trace_array.
3495  */
3496 int trace_array_init_printk(struct trace_array *tr)
3497 {
3498 	if (!tr)
3499 		return -ENOENT;
3500 
3501 	/* This is only allowed for created instances */
3502 	if (tr == &global_trace)
3503 		return -EINVAL;
3504 
3505 	return alloc_percpu_trace_buffer();
3506 }
3507 EXPORT_SYMBOL_GPL(trace_array_init_printk);
3508 
3509 __printf(3, 4)
3510 int trace_array_printk_buf(struct trace_buffer *buffer,
3511 			   unsigned long ip, const char *fmt, ...)
3512 {
3513 	int ret;
3514 	va_list ap;
3515 
3516 	if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
3517 		return 0;
3518 
3519 	va_start(ap, fmt);
3520 	ret = __trace_array_vprintk(buffer, ip, fmt, ap);
3521 	va_end(ap);
3522 	return ret;
3523 }
3524 
3525 __printf(2, 0)
3526 int trace_vprintk(unsigned long ip, const char *fmt, va_list args)
3527 {
3528 	return trace_array_vprintk(&global_trace, ip, fmt, args);
3529 }
3530 EXPORT_SYMBOL_GPL(trace_vprintk);
3531 
3532 static void trace_iterator_increment(struct trace_iterator *iter)
3533 {
3534 	struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, iter->cpu);
3535 
3536 	iter->idx++;
3537 	if (buf_iter)
3538 		ring_buffer_iter_advance(buf_iter);
3539 }
3540 
3541 static struct trace_entry *
3542 peek_next_entry(struct trace_iterator *iter, int cpu, u64 *ts,
3543 		unsigned long *lost_events)
3544 {
3545 	struct ring_buffer_event *event;
3546 	struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, cpu);
3547 
3548 	if (buf_iter) {
3549 		event = ring_buffer_iter_peek(buf_iter, ts);
3550 		if (lost_events)
3551 			*lost_events = ring_buffer_iter_dropped(buf_iter) ?
3552 				(unsigned long)-1 : 0;
3553 	} else {
3554 		event = ring_buffer_peek(iter->array_buffer->buffer, cpu, ts,
3555 					 lost_events);
3556 	}
3557 
3558 	if (event) {
3559 		iter->ent_size = ring_buffer_event_length(event);
3560 		return ring_buffer_event_data(event);
3561 	}
3562 	iter->ent_size = 0;
3563 	return NULL;
3564 }
3565 
3566 static struct trace_entry *
3567 __find_next_entry(struct trace_iterator *iter, int *ent_cpu,
3568 		  unsigned long *missing_events, u64 *ent_ts)
3569 {
3570 	struct trace_buffer *buffer = iter->array_buffer->buffer;
3571 	struct trace_entry *ent, *next = NULL;
3572 	unsigned long lost_events = 0, next_lost = 0;
3573 	int cpu_file = iter->cpu_file;
3574 	u64 next_ts = 0, ts;
3575 	int next_cpu = -1;
3576 	int next_size = 0;
3577 	int cpu;
3578 
3579 	/*
3580 	 * If we are in a per_cpu trace file, don't bother by iterating over
3581 	 * all cpu and peek directly.
3582 	 */
3583 	if (cpu_file > RING_BUFFER_ALL_CPUS) {
3584 		if (ring_buffer_empty_cpu(buffer, cpu_file))
3585 			return NULL;
3586 		ent = peek_next_entry(iter, cpu_file, ent_ts, missing_events);
3587 		if (ent_cpu)
3588 			*ent_cpu = cpu_file;
3589 
3590 		return ent;
3591 	}
3592 
3593 	for_each_tracing_cpu(cpu) {
3594 
3595 		if (ring_buffer_empty_cpu(buffer, cpu))
3596 			continue;
3597 
3598 		ent = peek_next_entry(iter, cpu, &ts, &lost_events);
3599 
3600 		/*
3601 		 * Pick the entry with the smallest timestamp:
3602 		 */
3603 		if (ent && (!next || ts < next_ts)) {
3604 			next = ent;
3605 			next_cpu = cpu;
3606 			next_ts = ts;
3607 			next_lost = lost_events;
3608 			next_size = iter->ent_size;
3609 		}
3610 	}
3611 
3612 	iter->ent_size = next_size;
3613 
3614 	if (ent_cpu)
3615 		*ent_cpu = next_cpu;
3616 
3617 	if (ent_ts)
3618 		*ent_ts = next_ts;
3619 
3620 	if (missing_events)
3621 		*missing_events = next_lost;
3622 
3623 	return next;
3624 }
3625 
3626 #define STATIC_FMT_BUF_SIZE	128
3627 static char static_fmt_buf[STATIC_FMT_BUF_SIZE];
3628 
3629 static char *trace_iter_expand_format(struct trace_iterator *iter)
3630 {
3631 	char *tmp;
3632 
3633 	/*
3634 	 * iter->tr is NULL when used with tp_printk, which makes
3635 	 * this get called where it is not safe to call krealloc().
3636 	 */
3637 	if (!iter->tr || iter->fmt == static_fmt_buf)
3638 		return NULL;
3639 
3640 	tmp = krealloc(iter->fmt, iter->fmt_size + STATIC_FMT_BUF_SIZE,
3641 		       GFP_KERNEL);
3642 	if (tmp) {
3643 		iter->fmt_size += STATIC_FMT_BUF_SIZE;
3644 		iter->fmt = tmp;
3645 	}
3646 
3647 	return tmp;
3648 }
3649 
3650 /* Returns true if the string is safe to dereference from an event */
3651 static bool trace_safe_str(struct trace_iterator *iter, const char *str)
3652 {
3653 	unsigned long addr = (unsigned long)str;
3654 	struct trace_event *trace_event;
3655 	struct trace_event_call *event;
3656 
3657 	/* OK if part of the event data */
3658 	if ((addr >= (unsigned long)iter->ent) &&
3659 	    (addr < (unsigned long)iter->ent + iter->ent_size))
3660 		return true;
3661 
3662 	/* OK if part of the temp seq buffer */
3663 	if ((addr >= (unsigned long)iter->tmp_seq.buffer) &&
3664 	    (addr < (unsigned long)iter->tmp_seq.buffer + PAGE_SIZE))
3665 		return true;
3666 
3667 	/* Core rodata can not be freed */
3668 	if (is_kernel_rodata(addr))
3669 		return true;
3670 
3671 	if (trace_is_tracepoint_string(str))
3672 		return true;
3673 
3674 	/*
3675 	 * Now this could be a module event, referencing core module
3676 	 * data, which is OK.
3677 	 */
3678 	if (!iter->ent)
3679 		return false;
3680 
3681 	trace_event = ftrace_find_event(iter->ent->type);
3682 	if (!trace_event)
3683 		return false;
3684 
3685 	event = container_of(trace_event, struct trace_event_call, event);
3686 	if ((event->flags & TRACE_EVENT_FL_DYNAMIC) || !event->module)
3687 		return false;
3688 
3689 	/* Would rather have rodata, but this will suffice */
3690 	if (within_module_core(addr, event->module))
3691 		return true;
3692 
3693 	return false;
3694 }
3695 
3696 static const char *show_buffer(struct trace_seq *s)
3697 {
3698 	struct seq_buf *seq = &s->seq;
3699 
3700 	seq_buf_terminate(seq);
3701 
3702 	return seq->buffer;
3703 }
3704 
3705 static DEFINE_STATIC_KEY_FALSE(trace_no_verify);
3706 
3707 static int test_can_verify_check(const char *fmt, ...)
3708 {
3709 	char buf[16];
3710 	va_list ap;
3711 	int ret;
3712 
3713 	/*
3714 	 * The verifier is dependent on vsnprintf() modifies the va_list
3715 	 * passed to it, where it is sent as a reference. Some architectures
3716 	 * (like x86_32) passes it by value, which means that vsnprintf()
3717 	 * does not modify the va_list passed to it, and the verifier
3718 	 * would then need to be able to understand all the values that
3719 	 * vsnprintf can use. If it is passed by value, then the verifier
3720 	 * is disabled.
3721 	 */
3722 	va_start(ap, fmt);
3723 	vsnprintf(buf, 16, "%d", ap);
3724 	ret = va_arg(ap, int);
3725 	va_end(ap);
3726 
3727 	return ret;
3728 }
3729 
3730 static void test_can_verify(void)
3731 {
3732 	if (!test_can_verify_check("%d %d", 0, 1)) {
3733 		pr_info("trace event string verifier disabled\n");
3734 		static_branch_inc(&trace_no_verify);
3735 	}
3736 }
3737 
3738 /**
3739  * trace_check_vprintf - Check dereferenced strings while writing to the seq buffer
3740  * @iter: The iterator that holds the seq buffer and the event being printed
3741  * @fmt: The format used to print the event
3742  * @ap: The va_list holding the data to print from @fmt.
3743  *
3744  * This writes the data into the @iter->seq buffer using the data from
3745  * @fmt and @ap. If the format has a %s, then the source of the string
3746  * is examined to make sure it is safe to print, otherwise it will
3747  * warn and print "[UNSAFE MEMORY]" in place of the dereferenced string
3748  * pointer.
3749  */
3750 void trace_check_vprintf(struct trace_iterator *iter, const char *fmt,
3751 			 va_list ap)
3752 {
3753 	const char *p = fmt;
3754 	const char *str;
3755 	int i, j;
3756 
3757 	if (WARN_ON_ONCE(!fmt))
3758 		return;
3759 
3760 	if (static_branch_unlikely(&trace_no_verify))
3761 		goto print;
3762 
3763 	/* Don't bother checking when doing a ftrace_dump() */
3764 	if (iter->fmt == static_fmt_buf)
3765 		goto print;
3766 
3767 	while (*p) {
3768 		bool star = false;
3769 		int len = 0;
3770 
3771 		j = 0;
3772 
3773 		/* We only care about %s and variants */
3774 		for (i = 0; p[i]; i++) {
3775 			if (i + 1 >= iter->fmt_size) {
3776 				/*
3777 				 * If we can't expand the copy buffer,
3778 				 * just print it.
3779 				 */
3780 				if (!trace_iter_expand_format(iter))
3781 					goto print;
3782 			}
3783 
3784 			if (p[i] == '\\' && p[i+1]) {
3785 				i++;
3786 				continue;
3787 			}
3788 			if (p[i] == '%') {
3789 				/* Need to test cases like %08.*s */
3790 				for (j = 1; p[i+j]; j++) {
3791 					if (isdigit(p[i+j]) ||
3792 					    p[i+j] == '.')
3793 						continue;
3794 					if (p[i+j] == '*') {
3795 						star = true;
3796 						continue;
3797 					}
3798 					break;
3799 				}
3800 				if (p[i+j] == 's')
3801 					break;
3802 				star = false;
3803 			}
3804 			j = 0;
3805 		}
3806 		/* If no %s found then just print normally */
3807 		if (!p[i])
3808 			break;
3809 
3810 		/* Copy up to the %s, and print that */
3811 		strncpy(iter->fmt, p, i);
3812 		iter->fmt[i] = '\0';
3813 		trace_seq_vprintf(&iter->seq, iter->fmt, ap);
3814 
3815 		if (star)
3816 			len = va_arg(ap, int);
3817 
3818 		/* The ap now points to the string data of the %s */
3819 		str = va_arg(ap, const char *);
3820 
3821 		/*
3822 		 * If you hit this warning, it is likely that the
3823 		 * trace event in question used %s on a string that
3824 		 * was saved at the time of the event, but may not be
3825 		 * around when the trace is read. Use __string(),
3826 		 * __assign_str() and __get_str() helpers in the TRACE_EVENT()
3827 		 * instead. See samples/trace_events/trace-events-sample.h
3828 		 * for reference.
3829 		 */
3830 		if (WARN_ONCE(!trace_safe_str(iter, str),
3831 			      "fmt: '%s' current_buffer: '%s'",
3832 			      fmt, show_buffer(&iter->seq))) {
3833 			int ret;
3834 
3835 			/* Try to safely read the string */
3836 			if (star) {
3837 				if (len + 1 > iter->fmt_size)
3838 					len = iter->fmt_size - 1;
3839 				if (len < 0)
3840 					len = 0;
3841 				ret = copy_from_kernel_nofault(iter->fmt, str, len);
3842 				iter->fmt[len] = 0;
3843 				star = false;
3844 			} else {
3845 				ret = strncpy_from_kernel_nofault(iter->fmt, str,
3846 								  iter->fmt_size);
3847 			}
3848 			if (ret < 0)
3849 				trace_seq_printf(&iter->seq, "(0x%px)", str);
3850 			else
3851 				trace_seq_printf(&iter->seq, "(0x%px:%s)",
3852 						 str, iter->fmt);
3853 			str = "[UNSAFE-MEMORY]";
3854 			strcpy(iter->fmt, "%s");
3855 		} else {
3856 			strncpy(iter->fmt, p + i, j + 1);
3857 			iter->fmt[j+1] = '\0';
3858 		}
3859 		if (star)
3860 			trace_seq_printf(&iter->seq, iter->fmt, len, str);
3861 		else
3862 			trace_seq_printf(&iter->seq, iter->fmt, str);
3863 
3864 		p += i + j + 1;
3865 	}
3866  print:
3867 	if (*p)
3868 		trace_seq_vprintf(&iter->seq, p, ap);
3869 }
3870 
3871 const char *trace_event_format(struct trace_iterator *iter, const char *fmt)
3872 {
3873 	const char *p, *new_fmt;
3874 	char *q;
3875 
3876 	if (WARN_ON_ONCE(!fmt))
3877 		return fmt;
3878 
3879 	if (!iter->tr || iter->tr->trace_flags & TRACE_ITER_HASH_PTR)
3880 		return fmt;
3881 
3882 	p = fmt;
3883 	new_fmt = q = iter->fmt;
3884 	while (*p) {
3885 		if (unlikely(q - new_fmt + 3 > iter->fmt_size)) {
3886 			if (!trace_iter_expand_format(iter))
3887 				return fmt;
3888 
3889 			q += iter->fmt - new_fmt;
3890 			new_fmt = iter->fmt;
3891 		}
3892 
3893 		*q++ = *p++;
3894 
3895 		/* Replace %p with %px */
3896 		if (p[-1] == '%') {
3897 			if (p[0] == '%') {
3898 				*q++ = *p++;
3899 			} else if (p[0] == 'p' && !isalnum(p[1])) {
3900 				*q++ = *p++;
3901 				*q++ = 'x';
3902 			}
3903 		}
3904 	}
3905 	*q = '\0';
3906 
3907 	return new_fmt;
3908 }
3909 
3910 #define STATIC_TEMP_BUF_SIZE	128
3911 static char static_temp_buf[STATIC_TEMP_BUF_SIZE] __aligned(4);
3912 
3913 /* Find the next real entry, without updating the iterator itself */
3914 struct trace_entry *trace_find_next_entry(struct trace_iterator *iter,
3915 					  int *ent_cpu, u64 *ent_ts)
3916 {
3917 	/* __find_next_entry will reset ent_size */
3918 	int ent_size = iter->ent_size;
3919 	struct trace_entry *entry;
3920 
3921 	/*
3922 	 * If called from ftrace_dump(), then the iter->temp buffer
3923 	 * will be the static_temp_buf and not created from kmalloc.
3924 	 * If the entry size is greater than the buffer, we can
3925 	 * not save it. Just return NULL in that case. This is only
3926 	 * used to add markers when two consecutive events' time
3927 	 * stamps have a large delta. See trace_print_lat_context()
3928 	 */
3929 	if (iter->temp == static_temp_buf &&
3930 	    STATIC_TEMP_BUF_SIZE < ent_size)
3931 		return NULL;
3932 
3933 	/*
3934 	 * The __find_next_entry() may call peek_next_entry(), which may
3935 	 * call ring_buffer_peek() that may make the contents of iter->ent
3936 	 * undefined. Need to copy iter->ent now.
3937 	 */
3938 	if (iter->ent && iter->ent != iter->temp) {
3939 		if ((!iter->temp || iter->temp_size < iter->ent_size) &&
3940 		    !WARN_ON_ONCE(iter->temp == static_temp_buf)) {
3941 			void *temp;
3942 			temp = kmalloc(iter->ent_size, GFP_KERNEL);
3943 			if (!temp)
3944 				return NULL;
3945 			kfree(iter->temp);
3946 			iter->temp = temp;
3947 			iter->temp_size = iter->ent_size;
3948 		}
3949 		memcpy(iter->temp, iter->ent, iter->ent_size);
3950 		iter->ent = iter->temp;
3951 	}
3952 	entry = __find_next_entry(iter, ent_cpu, NULL, ent_ts);
3953 	/* Put back the original ent_size */
3954 	iter->ent_size = ent_size;
3955 
3956 	return entry;
3957 }
3958 
3959 /* Find the next real entry, and increment the iterator to the next entry */
3960 void *trace_find_next_entry_inc(struct trace_iterator *iter)
3961 {
3962 	iter->ent = __find_next_entry(iter, &iter->cpu,
3963 				      &iter->lost_events, &iter->ts);
3964 
3965 	if (iter->ent)
3966 		trace_iterator_increment(iter);
3967 
3968 	return iter->ent ? iter : NULL;
3969 }
3970 
3971 static void trace_consume(struct trace_iterator *iter)
3972 {
3973 	ring_buffer_consume(iter->array_buffer->buffer, iter->cpu, &iter->ts,
3974 			    &iter->lost_events);
3975 }
3976 
3977 static void *s_next(struct seq_file *m, void *v, loff_t *pos)
3978 {
3979 	struct trace_iterator *iter = m->private;
3980 	int i = (int)*pos;
3981 	void *ent;
3982 
3983 	WARN_ON_ONCE(iter->leftover);
3984 
3985 	(*pos)++;
3986 
3987 	/* can't go backwards */
3988 	if (iter->idx > i)
3989 		return NULL;
3990 
3991 	if (iter->idx < 0)
3992 		ent = trace_find_next_entry_inc(iter);
3993 	else
3994 		ent = iter;
3995 
3996 	while (ent && iter->idx < i)
3997 		ent = trace_find_next_entry_inc(iter);
3998 
3999 	iter->pos = *pos;
4000 
4001 	return ent;
4002 }
4003 
4004 void tracing_iter_reset(struct trace_iterator *iter, int cpu)
4005 {
4006 	struct ring_buffer_iter *buf_iter;
4007 	unsigned long entries = 0;
4008 	u64 ts;
4009 
4010 	per_cpu_ptr(iter->array_buffer->data, cpu)->skipped_entries = 0;
4011 
4012 	buf_iter = trace_buffer_iter(iter, cpu);
4013 	if (!buf_iter)
4014 		return;
4015 
4016 	ring_buffer_iter_reset(buf_iter);
4017 
4018 	/*
4019 	 * We could have the case with the max latency tracers
4020 	 * that a reset never took place on a cpu. This is evident
4021 	 * by the timestamp being before the start of the buffer.
4022 	 */
4023 	while (ring_buffer_iter_peek(buf_iter, &ts)) {
4024 		if (ts >= iter->array_buffer->time_start)
4025 			break;
4026 		entries++;
4027 		ring_buffer_iter_advance(buf_iter);
4028 	}
4029 
4030 	per_cpu_ptr(iter->array_buffer->data, cpu)->skipped_entries = entries;
4031 }
4032 
4033 /*
4034  * The current tracer is copied to avoid a global locking
4035  * all around.
4036  */
4037 static void *s_start(struct seq_file *m, loff_t *pos)
4038 {
4039 	struct trace_iterator *iter = m->private;
4040 	struct trace_array *tr = iter->tr;
4041 	int cpu_file = iter->cpu_file;
4042 	void *p = NULL;
4043 	loff_t l = 0;
4044 	int cpu;
4045 
4046 	/*
4047 	 * copy the tracer to avoid using a global lock all around.
4048 	 * iter->trace is a copy of current_trace, the pointer to the
4049 	 * name may be used instead of a strcmp(), as iter->trace->name
4050 	 * will point to the same string as current_trace->name.
4051 	 */
4052 	mutex_lock(&trace_types_lock);
4053 	if (unlikely(tr->current_trace && iter->trace->name != tr->current_trace->name))
4054 		*iter->trace = *tr->current_trace;
4055 	mutex_unlock(&trace_types_lock);
4056 
4057 #ifdef CONFIG_TRACER_MAX_TRACE
4058 	if (iter->snapshot && iter->trace->use_max_tr)
4059 		return ERR_PTR(-EBUSY);
4060 #endif
4061 
4062 	if (*pos != iter->pos) {
4063 		iter->ent = NULL;
4064 		iter->cpu = 0;
4065 		iter->idx = -1;
4066 
4067 		if (cpu_file == RING_BUFFER_ALL_CPUS) {
4068 			for_each_tracing_cpu(cpu)
4069 				tracing_iter_reset(iter, cpu);
4070 		} else
4071 			tracing_iter_reset(iter, cpu_file);
4072 
4073 		iter->leftover = 0;
4074 		for (p = iter; p && l < *pos; p = s_next(m, p, &l))
4075 			;
4076 
4077 	} else {
4078 		/*
4079 		 * If we overflowed the seq_file before, then we want
4080 		 * to just reuse the trace_seq buffer again.
4081 		 */
4082 		if (iter->leftover)
4083 			p = iter;
4084 		else {
4085 			l = *pos - 1;
4086 			p = s_next(m, p, &l);
4087 		}
4088 	}
4089 
4090 	trace_event_read_lock();
4091 	trace_access_lock(cpu_file);
4092 	return p;
4093 }
4094 
4095 static void s_stop(struct seq_file *m, void *p)
4096 {
4097 	struct trace_iterator *iter = m->private;
4098 
4099 #ifdef CONFIG_TRACER_MAX_TRACE
4100 	if (iter->snapshot && iter->trace->use_max_tr)
4101 		return;
4102 #endif
4103 
4104 	trace_access_unlock(iter->cpu_file);
4105 	trace_event_read_unlock();
4106 }
4107 
4108 static void
4109 get_total_entries_cpu(struct array_buffer *buf, unsigned long *total,
4110 		      unsigned long *entries, int cpu)
4111 {
4112 	unsigned long count;
4113 
4114 	count = ring_buffer_entries_cpu(buf->buffer, cpu);
4115 	/*
4116 	 * If this buffer has skipped entries, then we hold all
4117 	 * entries for the trace and we need to ignore the
4118 	 * ones before the time stamp.
4119 	 */
4120 	if (per_cpu_ptr(buf->data, cpu)->skipped_entries) {
4121 		count -= per_cpu_ptr(buf->data, cpu)->skipped_entries;
4122 		/* total is the same as the entries */
4123 		*total = count;
4124 	} else
4125 		*total = count +
4126 			ring_buffer_overrun_cpu(buf->buffer, cpu);
4127 	*entries = count;
4128 }
4129 
4130 static void
4131 get_total_entries(struct array_buffer *buf,
4132 		  unsigned long *total, unsigned long *entries)
4133 {
4134 	unsigned long t, e;
4135 	int cpu;
4136 
4137 	*total = 0;
4138 	*entries = 0;
4139 
4140 	for_each_tracing_cpu(cpu) {
4141 		get_total_entries_cpu(buf, &t, &e, cpu);
4142 		*total += t;
4143 		*entries += e;
4144 	}
4145 }
4146 
4147 unsigned long trace_total_entries_cpu(struct trace_array *tr, int cpu)
4148 {
4149 	unsigned long total, entries;
4150 
4151 	if (!tr)
4152 		tr = &global_trace;
4153 
4154 	get_total_entries_cpu(&tr->array_buffer, &total, &entries, cpu);
4155 
4156 	return entries;
4157 }
4158 
4159 unsigned long trace_total_entries(struct trace_array *tr)
4160 {
4161 	unsigned long total, entries;
4162 
4163 	if (!tr)
4164 		tr = &global_trace;
4165 
4166 	get_total_entries(&tr->array_buffer, &total, &entries);
4167 
4168 	return entries;
4169 }
4170 
4171 static void print_lat_help_header(struct seq_file *m)
4172 {
4173 	seq_puts(m, "#                    _------=> CPU#            \n"
4174 		    "#                   / _-----=> irqs-off        \n"
4175 		    "#                  | / _----=> need-resched    \n"
4176 		    "#                  || / _---=> hardirq/softirq \n"
4177 		    "#                  ||| / _--=> preempt-depth   \n"
4178 		    "#                  |||| / _-=> migrate-disable \n"
4179 		    "#                  ||||| /     delay           \n"
4180 		    "#  cmd     pid     |||||| time  |   caller     \n"
4181 		    "#     \\   /        ||||||  \\    |    /       \n");
4182 }
4183 
4184 static void print_event_info(struct array_buffer *buf, struct seq_file *m)
4185 {
4186 	unsigned long total;
4187 	unsigned long entries;
4188 
4189 	get_total_entries(buf, &total, &entries);
4190 	seq_printf(m, "# entries-in-buffer/entries-written: %lu/%lu   #P:%d\n",
4191 		   entries, total, num_online_cpus());
4192 	seq_puts(m, "#\n");
4193 }
4194 
4195 static void print_func_help_header(struct array_buffer *buf, struct seq_file *m,
4196 				   unsigned int flags)
4197 {
4198 	bool tgid = flags & TRACE_ITER_RECORD_TGID;
4199 
4200 	print_event_info(buf, m);
4201 
4202 	seq_printf(m, "#           TASK-PID    %s CPU#     TIMESTAMP  FUNCTION\n", tgid ? "   TGID   " : "");
4203 	seq_printf(m, "#              | |      %s   |         |         |\n",      tgid ? "     |    " : "");
4204 }
4205 
4206 static void print_func_help_header_irq(struct array_buffer *buf, struct seq_file *m,
4207 				       unsigned int flags)
4208 {
4209 	bool tgid = flags & TRACE_ITER_RECORD_TGID;
4210 	const char *space = "            ";
4211 	int prec = tgid ? 12 : 2;
4212 
4213 	print_event_info(buf, m);
4214 
4215 	seq_printf(m, "#                            %.*s  _-----=> irqs-off\n", prec, space);
4216 	seq_printf(m, "#                            %.*s / _----=> need-resched\n", prec, space);
4217 	seq_printf(m, "#                            %.*s| / _---=> hardirq/softirq\n", prec, space);
4218 	seq_printf(m, "#                            %.*s|| / _--=> preempt-depth\n", prec, space);
4219 	seq_printf(m, "#                            %.*s||| / _-=> migrate-disable\n", prec, space);
4220 	seq_printf(m, "#                            %.*s|||| /     delay\n", prec, space);
4221 	seq_printf(m, "#           TASK-PID  %.*s CPU#  |||||  TIMESTAMP  FUNCTION\n", prec, "     TGID   ");
4222 	seq_printf(m, "#              | |    %.*s   |   |||||     |         |\n", prec, "       |    ");
4223 }
4224 
4225 void
4226 print_trace_header(struct seq_file *m, struct trace_iterator *iter)
4227 {
4228 	unsigned long sym_flags = (global_trace.trace_flags & TRACE_ITER_SYM_MASK);
4229 	struct array_buffer *buf = iter->array_buffer;
4230 	struct trace_array_cpu *data = per_cpu_ptr(buf->data, buf->cpu);
4231 	struct tracer *type = iter->trace;
4232 	unsigned long entries;
4233 	unsigned long total;
4234 	const char *name = "preemption";
4235 
4236 	name = type->name;
4237 
4238 	get_total_entries(buf, &total, &entries);
4239 
4240 	seq_printf(m, "# %s latency trace v1.1.5 on %s\n",
4241 		   name, UTS_RELEASE);
4242 	seq_puts(m, "# -----------------------------------"
4243 		 "---------------------------------\n");
4244 	seq_printf(m, "# latency: %lu us, #%lu/%lu, CPU#%d |"
4245 		   " (M:%s VP:%d, KP:%d, SP:%d HP:%d",
4246 		   nsecs_to_usecs(data->saved_latency),
4247 		   entries,
4248 		   total,
4249 		   buf->cpu,
4250 #if defined(CONFIG_PREEMPT_NONE)
4251 		   "server",
4252 #elif defined(CONFIG_PREEMPT_VOLUNTARY)
4253 		   "desktop",
4254 #elif defined(CONFIG_PREEMPT)
4255 		   "preempt",
4256 #elif defined(CONFIG_PREEMPT_RT)
4257 		   "preempt_rt",
4258 #else
4259 		   "unknown",
4260 #endif
4261 		   /* These are reserved for later use */
4262 		   0, 0, 0, 0);
4263 #ifdef CONFIG_SMP
4264 	seq_printf(m, " #P:%d)\n", num_online_cpus());
4265 #else
4266 	seq_puts(m, ")\n");
4267 #endif
4268 	seq_puts(m, "#    -----------------\n");
4269 	seq_printf(m, "#    | task: %.16s-%d "
4270 		   "(uid:%d nice:%ld policy:%ld rt_prio:%ld)\n",
4271 		   data->comm, data->pid,
4272 		   from_kuid_munged(seq_user_ns(m), data->uid), data->nice,
4273 		   data->policy, data->rt_priority);
4274 	seq_puts(m, "#    -----------------\n");
4275 
4276 	if (data->critical_start) {
4277 		seq_puts(m, "#  => started at: ");
4278 		seq_print_ip_sym(&iter->seq, data->critical_start, sym_flags);
4279 		trace_print_seq(m, &iter->seq);
4280 		seq_puts(m, "\n#  => ended at:   ");
4281 		seq_print_ip_sym(&iter->seq, data->critical_end, sym_flags);
4282 		trace_print_seq(m, &iter->seq);
4283 		seq_puts(m, "\n#\n");
4284 	}
4285 
4286 	seq_puts(m, "#\n");
4287 }
4288 
4289 static void test_cpu_buff_start(struct trace_iterator *iter)
4290 {
4291 	struct trace_seq *s = &iter->seq;
4292 	struct trace_array *tr = iter->tr;
4293 
4294 	if (!(tr->trace_flags & TRACE_ITER_ANNOTATE))
4295 		return;
4296 
4297 	if (!(iter->iter_flags & TRACE_FILE_ANNOTATE))
4298 		return;
4299 
4300 	if (cpumask_available(iter->started) &&
4301 	    cpumask_test_cpu(iter->cpu, iter->started))
4302 		return;
4303 
4304 	if (per_cpu_ptr(iter->array_buffer->data, iter->cpu)->skipped_entries)
4305 		return;
4306 
4307 	if (cpumask_available(iter->started))
4308 		cpumask_set_cpu(iter->cpu, iter->started);
4309 
4310 	/* Don't print started cpu buffer for the first entry of the trace */
4311 	if (iter->idx > 1)
4312 		trace_seq_printf(s, "##### CPU %u buffer started ####\n",
4313 				iter->cpu);
4314 }
4315 
4316 static enum print_line_t print_trace_fmt(struct trace_iterator *iter)
4317 {
4318 	struct trace_array *tr = iter->tr;
4319 	struct trace_seq *s = &iter->seq;
4320 	unsigned long sym_flags = (tr->trace_flags & TRACE_ITER_SYM_MASK);
4321 	struct trace_entry *entry;
4322 	struct trace_event *event;
4323 
4324 	entry = iter->ent;
4325 
4326 	test_cpu_buff_start(iter);
4327 
4328 	event = ftrace_find_event(entry->type);
4329 
4330 	if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
4331 		if (iter->iter_flags & TRACE_FILE_LAT_FMT)
4332 			trace_print_lat_context(iter);
4333 		else
4334 			trace_print_context(iter);
4335 	}
4336 
4337 	if (trace_seq_has_overflowed(s))
4338 		return TRACE_TYPE_PARTIAL_LINE;
4339 
4340 	if (event)
4341 		return event->funcs->trace(iter, sym_flags, event);
4342 
4343 	trace_seq_printf(s, "Unknown type %d\n", entry->type);
4344 
4345 	return trace_handle_return(s);
4346 }
4347 
4348 static enum print_line_t print_raw_fmt(struct trace_iterator *iter)
4349 {
4350 	struct trace_array *tr = iter->tr;
4351 	struct trace_seq *s = &iter->seq;
4352 	struct trace_entry *entry;
4353 	struct trace_event *event;
4354 
4355 	entry = iter->ent;
4356 
4357 	if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO)
4358 		trace_seq_printf(s, "%d %d %llu ",
4359 				 entry->pid, iter->cpu, iter->ts);
4360 
4361 	if (trace_seq_has_overflowed(s))
4362 		return TRACE_TYPE_PARTIAL_LINE;
4363 
4364 	event = ftrace_find_event(entry->type);
4365 	if (event)
4366 		return event->funcs->raw(iter, 0, event);
4367 
4368 	trace_seq_printf(s, "%d ?\n", entry->type);
4369 
4370 	return trace_handle_return(s);
4371 }
4372 
4373 static enum print_line_t print_hex_fmt(struct trace_iterator *iter)
4374 {
4375 	struct trace_array *tr = iter->tr;
4376 	struct trace_seq *s = &iter->seq;
4377 	unsigned char newline = '\n';
4378 	struct trace_entry *entry;
4379 	struct trace_event *event;
4380 
4381 	entry = iter->ent;
4382 
4383 	if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
4384 		SEQ_PUT_HEX_FIELD(s, entry->pid);
4385 		SEQ_PUT_HEX_FIELD(s, iter->cpu);
4386 		SEQ_PUT_HEX_FIELD(s, iter->ts);
4387 		if (trace_seq_has_overflowed(s))
4388 			return TRACE_TYPE_PARTIAL_LINE;
4389 	}
4390 
4391 	event = ftrace_find_event(entry->type);
4392 	if (event) {
4393 		enum print_line_t ret = event->funcs->hex(iter, 0, event);
4394 		if (ret != TRACE_TYPE_HANDLED)
4395 			return ret;
4396 	}
4397 
4398 	SEQ_PUT_FIELD(s, newline);
4399 
4400 	return trace_handle_return(s);
4401 }
4402 
4403 static enum print_line_t print_bin_fmt(struct trace_iterator *iter)
4404 {
4405 	struct trace_array *tr = iter->tr;
4406 	struct trace_seq *s = &iter->seq;
4407 	struct trace_entry *entry;
4408 	struct trace_event *event;
4409 
4410 	entry = iter->ent;
4411 
4412 	if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
4413 		SEQ_PUT_FIELD(s, entry->pid);
4414 		SEQ_PUT_FIELD(s, iter->cpu);
4415 		SEQ_PUT_FIELD(s, iter->ts);
4416 		if (trace_seq_has_overflowed(s))
4417 			return TRACE_TYPE_PARTIAL_LINE;
4418 	}
4419 
4420 	event = ftrace_find_event(entry->type);
4421 	return event ? event->funcs->binary(iter, 0, event) :
4422 		TRACE_TYPE_HANDLED;
4423 }
4424 
4425 int trace_empty(struct trace_iterator *iter)
4426 {
4427 	struct ring_buffer_iter *buf_iter;
4428 	int cpu;
4429 
4430 	/* If we are looking at one CPU buffer, only check that one */
4431 	if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
4432 		cpu = iter->cpu_file;
4433 		buf_iter = trace_buffer_iter(iter, cpu);
4434 		if (buf_iter) {
4435 			if (!ring_buffer_iter_empty(buf_iter))
4436 				return 0;
4437 		} else {
4438 			if (!ring_buffer_empty_cpu(iter->array_buffer->buffer, cpu))
4439 				return 0;
4440 		}
4441 		return 1;
4442 	}
4443 
4444 	for_each_tracing_cpu(cpu) {
4445 		buf_iter = trace_buffer_iter(iter, cpu);
4446 		if (buf_iter) {
4447 			if (!ring_buffer_iter_empty(buf_iter))
4448 				return 0;
4449 		} else {
4450 			if (!ring_buffer_empty_cpu(iter->array_buffer->buffer, cpu))
4451 				return 0;
4452 		}
4453 	}
4454 
4455 	return 1;
4456 }
4457 
4458 /*  Called with trace_event_read_lock() held. */
4459 enum print_line_t print_trace_line(struct trace_iterator *iter)
4460 {
4461 	struct trace_array *tr = iter->tr;
4462 	unsigned long trace_flags = tr->trace_flags;
4463 	enum print_line_t ret;
4464 
4465 	if (iter->lost_events) {
4466 		if (iter->lost_events == (unsigned long)-1)
4467 			trace_seq_printf(&iter->seq, "CPU:%d [LOST EVENTS]\n",
4468 					 iter->cpu);
4469 		else
4470 			trace_seq_printf(&iter->seq, "CPU:%d [LOST %lu EVENTS]\n",
4471 					 iter->cpu, iter->lost_events);
4472 		if (trace_seq_has_overflowed(&iter->seq))
4473 			return TRACE_TYPE_PARTIAL_LINE;
4474 	}
4475 
4476 	if (iter->trace && iter->trace->print_line) {
4477 		ret = iter->trace->print_line(iter);
4478 		if (ret != TRACE_TYPE_UNHANDLED)
4479 			return ret;
4480 	}
4481 
4482 	if (iter->ent->type == TRACE_BPUTS &&
4483 			trace_flags & TRACE_ITER_PRINTK &&
4484 			trace_flags & TRACE_ITER_PRINTK_MSGONLY)
4485 		return trace_print_bputs_msg_only(iter);
4486 
4487 	if (iter->ent->type == TRACE_BPRINT &&
4488 			trace_flags & TRACE_ITER_PRINTK &&
4489 			trace_flags & TRACE_ITER_PRINTK_MSGONLY)
4490 		return trace_print_bprintk_msg_only(iter);
4491 
4492 	if (iter->ent->type == TRACE_PRINT &&
4493 			trace_flags & TRACE_ITER_PRINTK &&
4494 			trace_flags & TRACE_ITER_PRINTK_MSGONLY)
4495 		return trace_print_printk_msg_only(iter);
4496 
4497 	if (trace_flags & TRACE_ITER_BIN)
4498 		return print_bin_fmt(iter);
4499 
4500 	if (trace_flags & TRACE_ITER_HEX)
4501 		return print_hex_fmt(iter);
4502 
4503 	if (trace_flags & TRACE_ITER_RAW)
4504 		return print_raw_fmt(iter);
4505 
4506 	return print_trace_fmt(iter);
4507 }
4508 
4509 void trace_latency_header(struct seq_file *m)
4510 {
4511 	struct trace_iterator *iter = m->private;
4512 	struct trace_array *tr = iter->tr;
4513 
4514 	/* print nothing if the buffers are empty */
4515 	if (trace_empty(iter))
4516 		return;
4517 
4518 	if (iter->iter_flags & TRACE_FILE_LAT_FMT)
4519 		print_trace_header(m, iter);
4520 
4521 	if (!(tr->trace_flags & TRACE_ITER_VERBOSE))
4522 		print_lat_help_header(m);
4523 }
4524 
4525 void trace_default_header(struct seq_file *m)
4526 {
4527 	struct trace_iterator *iter = m->private;
4528 	struct trace_array *tr = iter->tr;
4529 	unsigned long trace_flags = tr->trace_flags;
4530 
4531 	if (!(trace_flags & TRACE_ITER_CONTEXT_INFO))
4532 		return;
4533 
4534 	if (iter->iter_flags & TRACE_FILE_LAT_FMT) {
4535 		/* print nothing if the buffers are empty */
4536 		if (trace_empty(iter))
4537 			return;
4538 		print_trace_header(m, iter);
4539 		if (!(trace_flags & TRACE_ITER_VERBOSE))
4540 			print_lat_help_header(m);
4541 	} else {
4542 		if (!(trace_flags & TRACE_ITER_VERBOSE)) {
4543 			if (trace_flags & TRACE_ITER_IRQ_INFO)
4544 				print_func_help_header_irq(iter->array_buffer,
4545 							   m, trace_flags);
4546 			else
4547 				print_func_help_header(iter->array_buffer, m,
4548 						       trace_flags);
4549 		}
4550 	}
4551 }
4552 
4553 static void test_ftrace_alive(struct seq_file *m)
4554 {
4555 	if (!ftrace_is_dead())
4556 		return;
4557 	seq_puts(m, "# WARNING: FUNCTION TRACING IS CORRUPTED\n"
4558 		    "#          MAY BE MISSING FUNCTION EVENTS\n");
4559 }
4560 
4561 #ifdef CONFIG_TRACER_MAX_TRACE
4562 static void show_snapshot_main_help(struct seq_file *m)
4563 {
4564 	seq_puts(m, "# echo 0 > snapshot : Clears and frees snapshot buffer\n"
4565 		    "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
4566 		    "#                      Takes a snapshot of the main buffer.\n"
4567 		    "# echo 2 > snapshot : Clears snapshot buffer (but does not allocate or free)\n"
4568 		    "#                      (Doesn't have to be '2' works with any number that\n"
4569 		    "#                       is not a '0' or '1')\n");
4570 }
4571 
4572 static void show_snapshot_percpu_help(struct seq_file *m)
4573 {
4574 	seq_puts(m, "# echo 0 > snapshot : Invalid for per_cpu snapshot file.\n");
4575 #ifdef CONFIG_RING_BUFFER_ALLOW_SWAP
4576 	seq_puts(m, "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
4577 		    "#                      Takes a snapshot of the main buffer for this cpu.\n");
4578 #else
4579 	seq_puts(m, "# echo 1 > snapshot : Not supported with this kernel.\n"
4580 		    "#                     Must use main snapshot file to allocate.\n");
4581 #endif
4582 	seq_puts(m, "# echo 2 > snapshot : Clears this cpu's snapshot buffer (but does not allocate)\n"
4583 		    "#                      (Doesn't have to be '2' works with any number that\n"
4584 		    "#                       is not a '0' or '1')\n");
4585 }
4586 
4587 static void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter)
4588 {
4589 	if (iter->tr->allocated_snapshot)
4590 		seq_puts(m, "#\n# * Snapshot is allocated *\n#\n");
4591 	else
4592 		seq_puts(m, "#\n# * Snapshot is freed *\n#\n");
4593 
4594 	seq_puts(m, "# Snapshot commands:\n");
4595 	if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
4596 		show_snapshot_main_help(m);
4597 	else
4598 		show_snapshot_percpu_help(m);
4599 }
4600 #else
4601 /* Should never be called */
4602 static inline void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter) { }
4603 #endif
4604 
4605 static int s_show(struct seq_file *m, void *v)
4606 {
4607 	struct trace_iterator *iter = v;
4608 	int ret;
4609 
4610 	if (iter->ent == NULL) {
4611 		if (iter->tr) {
4612 			seq_printf(m, "# tracer: %s\n", iter->trace->name);
4613 			seq_puts(m, "#\n");
4614 			test_ftrace_alive(m);
4615 		}
4616 		if (iter->snapshot && trace_empty(iter))
4617 			print_snapshot_help(m, iter);
4618 		else if (iter->trace && iter->trace->print_header)
4619 			iter->trace->print_header(m);
4620 		else
4621 			trace_default_header(m);
4622 
4623 	} else if (iter->leftover) {
4624 		/*
4625 		 * If we filled the seq_file buffer earlier, we
4626 		 * want to just show it now.
4627 		 */
4628 		ret = trace_print_seq(m, &iter->seq);
4629 
4630 		/* ret should this time be zero, but you never know */
4631 		iter->leftover = ret;
4632 
4633 	} else {
4634 		print_trace_line(iter);
4635 		ret = trace_print_seq(m, &iter->seq);
4636 		/*
4637 		 * If we overflow the seq_file buffer, then it will
4638 		 * ask us for this data again at start up.
4639 		 * Use that instead.
4640 		 *  ret is 0 if seq_file write succeeded.
4641 		 *        -1 otherwise.
4642 		 */
4643 		iter->leftover = ret;
4644 	}
4645 
4646 	return 0;
4647 }
4648 
4649 /*
4650  * Should be used after trace_array_get(), trace_types_lock
4651  * ensures that i_cdev was already initialized.
4652  */
4653 static inline int tracing_get_cpu(struct inode *inode)
4654 {
4655 	if (inode->i_cdev) /* See trace_create_cpu_file() */
4656 		return (long)inode->i_cdev - 1;
4657 	return RING_BUFFER_ALL_CPUS;
4658 }
4659 
4660 static const struct seq_operations tracer_seq_ops = {
4661 	.start		= s_start,
4662 	.next		= s_next,
4663 	.stop		= s_stop,
4664 	.show		= s_show,
4665 };
4666 
4667 static struct trace_iterator *
4668 __tracing_open(struct inode *inode, struct file *file, bool snapshot)
4669 {
4670 	struct trace_array *tr = inode->i_private;
4671 	struct trace_iterator *iter;
4672 	int cpu;
4673 
4674 	if (tracing_disabled)
4675 		return ERR_PTR(-ENODEV);
4676 
4677 	iter = __seq_open_private(file, &tracer_seq_ops, sizeof(*iter));
4678 	if (!iter)
4679 		return ERR_PTR(-ENOMEM);
4680 
4681 	iter->buffer_iter = kcalloc(nr_cpu_ids, sizeof(*iter->buffer_iter),
4682 				    GFP_KERNEL);
4683 	if (!iter->buffer_iter)
4684 		goto release;
4685 
4686 	/*
4687 	 * trace_find_next_entry() may need to save off iter->ent.
4688 	 * It will place it into the iter->temp buffer. As most
4689 	 * events are less than 128, allocate a buffer of that size.
4690 	 * If one is greater, then trace_find_next_entry() will
4691 	 * allocate a new buffer to adjust for the bigger iter->ent.
4692 	 * It's not critical if it fails to get allocated here.
4693 	 */
4694 	iter->temp = kmalloc(128, GFP_KERNEL);
4695 	if (iter->temp)
4696 		iter->temp_size = 128;
4697 
4698 	/*
4699 	 * trace_event_printf() may need to modify given format
4700 	 * string to replace %p with %px so that it shows real address
4701 	 * instead of hash value. However, that is only for the event
4702 	 * tracing, other tracer may not need. Defer the allocation
4703 	 * until it is needed.
4704 	 */
4705 	iter->fmt = NULL;
4706 	iter->fmt_size = 0;
4707 
4708 	/*
4709 	 * We make a copy of the current tracer to avoid concurrent
4710 	 * changes on it while we are reading.
4711 	 */
4712 	mutex_lock(&trace_types_lock);
4713 	iter->trace = kzalloc(sizeof(*iter->trace), GFP_KERNEL);
4714 	if (!iter->trace)
4715 		goto fail;
4716 
4717 	*iter->trace = *tr->current_trace;
4718 
4719 	if (!zalloc_cpumask_var(&iter->started, GFP_KERNEL))
4720 		goto fail;
4721 
4722 	iter->tr = tr;
4723 
4724 #ifdef CONFIG_TRACER_MAX_TRACE
4725 	/* Currently only the top directory has a snapshot */
4726 	if (tr->current_trace->print_max || snapshot)
4727 		iter->array_buffer = &tr->max_buffer;
4728 	else
4729 #endif
4730 		iter->array_buffer = &tr->array_buffer;
4731 	iter->snapshot = snapshot;
4732 	iter->pos = -1;
4733 	iter->cpu_file = tracing_get_cpu(inode);
4734 	mutex_init(&iter->mutex);
4735 
4736 	/* Notify the tracer early; before we stop tracing. */
4737 	if (iter->trace->open)
4738 		iter->trace->open(iter);
4739 
4740 	/* Annotate start of buffers if we had overruns */
4741 	if (ring_buffer_overruns(iter->array_buffer->buffer))
4742 		iter->iter_flags |= TRACE_FILE_ANNOTATE;
4743 
4744 	/* Output in nanoseconds only if we are using a clock in nanoseconds. */
4745 	if (trace_clocks[tr->clock_id].in_ns)
4746 		iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
4747 
4748 	/*
4749 	 * If pause-on-trace is enabled, then stop the trace while
4750 	 * dumping, unless this is the "snapshot" file
4751 	 */
4752 	if (!iter->snapshot && (tr->trace_flags & TRACE_ITER_PAUSE_ON_TRACE))
4753 		tracing_stop_tr(tr);
4754 
4755 	if (iter->cpu_file == RING_BUFFER_ALL_CPUS) {
4756 		for_each_tracing_cpu(cpu) {
4757 			iter->buffer_iter[cpu] =
4758 				ring_buffer_read_prepare(iter->array_buffer->buffer,
4759 							 cpu, GFP_KERNEL);
4760 		}
4761 		ring_buffer_read_prepare_sync();
4762 		for_each_tracing_cpu(cpu) {
4763 			ring_buffer_read_start(iter->buffer_iter[cpu]);
4764 			tracing_iter_reset(iter, cpu);
4765 		}
4766 	} else {
4767 		cpu = iter->cpu_file;
4768 		iter->buffer_iter[cpu] =
4769 			ring_buffer_read_prepare(iter->array_buffer->buffer,
4770 						 cpu, GFP_KERNEL);
4771 		ring_buffer_read_prepare_sync();
4772 		ring_buffer_read_start(iter->buffer_iter[cpu]);
4773 		tracing_iter_reset(iter, cpu);
4774 	}
4775 
4776 	mutex_unlock(&trace_types_lock);
4777 
4778 	return iter;
4779 
4780  fail:
4781 	mutex_unlock(&trace_types_lock);
4782 	kfree(iter->trace);
4783 	kfree(iter->temp);
4784 	kfree(iter->buffer_iter);
4785 release:
4786 	seq_release_private(inode, file);
4787 	return ERR_PTR(-ENOMEM);
4788 }
4789 
4790 int tracing_open_generic(struct inode *inode, struct file *filp)
4791 {
4792 	int ret;
4793 
4794 	ret = tracing_check_open_get_tr(NULL);
4795 	if (ret)
4796 		return ret;
4797 
4798 	filp->private_data = inode->i_private;
4799 	return 0;
4800 }
4801 
4802 bool tracing_is_disabled(void)
4803 {
4804 	return (tracing_disabled) ? true: false;
4805 }
4806 
4807 /*
4808  * Open and update trace_array ref count.
4809  * Must have the current trace_array passed to it.
4810  */
4811 int tracing_open_generic_tr(struct inode *inode, struct file *filp)
4812 {
4813 	struct trace_array *tr = inode->i_private;
4814 	int ret;
4815 
4816 	ret = tracing_check_open_get_tr(tr);
4817 	if (ret)
4818 		return ret;
4819 
4820 	filp->private_data = inode->i_private;
4821 
4822 	return 0;
4823 }
4824 
4825 static int tracing_release(struct inode *inode, struct file *file)
4826 {
4827 	struct trace_array *tr = inode->i_private;
4828 	struct seq_file *m = file->private_data;
4829 	struct trace_iterator *iter;
4830 	int cpu;
4831 
4832 	if (!(file->f_mode & FMODE_READ)) {
4833 		trace_array_put(tr);
4834 		return 0;
4835 	}
4836 
4837 	/* Writes do not use seq_file */
4838 	iter = m->private;
4839 	mutex_lock(&trace_types_lock);
4840 
4841 	for_each_tracing_cpu(cpu) {
4842 		if (iter->buffer_iter[cpu])
4843 			ring_buffer_read_finish(iter->buffer_iter[cpu]);
4844 	}
4845 
4846 	if (iter->trace && iter->trace->close)
4847 		iter->trace->close(iter);
4848 
4849 	if (!iter->snapshot && tr->stop_count)
4850 		/* reenable tracing if it was previously enabled */
4851 		tracing_start_tr(tr);
4852 
4853 	__trace_array_put(tr);
4854 
4855 	mutex_unlock(&trace_types_lock);
4856 
4857 	mutex_destroy(&iter->mutex);
4858 	free_cpumask_var(iter->started);
4859 	kfree(iter->fmt);
4860 	kfree(iter->temp);
4861 	kfree(iter->trace);
4862 	kfree(iter->buffer_iter);
4863 	seq_release_private(inode, file);
4864 
4865 	return 0;
4866 }
4867 
4868 static int tracing_release_generic_tr(struct inode *inode, struct file *file)
4869 {
4870 	struct trace_array *tr = inode->i_private;
4871 
4872 	trace_array_put(tr);
4873 	return 0;
4874 }
4875 
4876 static int tracing_single_release_tr(struct inode *inode, struct file *file)
4877 {
4878 	struct trace_array *tr = inode->i_private;
4879 
4880 	trace_array_put(tr);
4881 
4882 	return single_release(inode, file);
4883 }
4884 
4885 static int tracing_open(struct inode *inode, struct file *file)
4886 {
4887 	struct trace_array *tr = inode->i_private;
4888 	struct trace_iterator *iter;
4889 	int ret;
4890 
4891 	ret = tracing_check_open_get_tr(tr);
4892 	if (ret)
4893 		return ret;
4894 
4895 	/* If this file was open for write, then erase contents */
4896 	if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC)) {
4897 		int cpu = tracing_get_cpu(inode);
4898 		struct array_buffer *trace_buf = &tr->array_buffer;
4899 
4900 #ifdef CONFIG_TRACER_MAX_TRACE
4901 		if (tr->current_trace->print_max)
4902 			trace_buf = &tr->max_buffer;
4903 #endif
4904 
4905 		if (cpu == RING_BUFFER_ALL_CPUS)
4906 			tracing_reset_online_cpus(trace_buf);
4907 		else
4908 			tracing_reset_cpu(trace_buf, cpu);
4909 	}
4910 
4911 	if (file->f_mode & FMODE_READ) {
4912 		iter = __tracing_open(inode, file, false);
4913 		if (IS_ERR(iter))
4914 			ret = PTR_ERR(iter);
4915 		else if (tr->trace_flags & TRACE_ITER_LATENCY_FMT)
4916 			iter->iter_flags |= TRACE_FILE_LAT_FMT;
4917 	}
4918 
4919 	if (ret < 0)
4920 		trace_array_put(tr);
4921 
4922 	return ret;
4923 }
4924 
4925 /*
4926  * Some tracers are not suitable for instance buffers.
4927  * A tracer is always available for the global array (toplevel)
4928  * or if it explicitly states that it is.
4929  */
4930 static bool
4931 trace_ok_for_array(struct tracer *t, struct trace_array *tr)
4932 {
4933 	return (tr->flags & TRACE_ARRAY_FL_GLOBAL) || t->allow_instances;
4934 }
4935 
4936 /* Find the next tracer that this trace array may use */
4937 static struct tracer *
4938 get_tracer_for_array(struct trace_array *tr, struct tracer *t)
4939 {
4940 	while (t && !trace_ok_for_array(t, tr))
4941 		t = t->next;
4942 
4943 	return t;
4944 }
4945 
4946 static void *
4947 t_next(struct seq_file *m, void *v, loff_t *pos)
4948 {
4949 	struct trace_array *tr = m->private;
4950 	struct tracer *t = v;
4951 
4952 	(*pos)++;
4953 
4954 	if (t)
4955 		t = get_tracer_for_array(tr, t->next);
4956 
4957 	return t;
4958 }
4959 
4960 static void *t_start(struct seq_file *m, loff_t *pos)
4961 {
4962 	struct trace_array *tr = m->private;
4963 	struct tracer *t;
4964 	loff_t l = 0;
4965 
4966 	mutex_lock(&trace_types_lock);
4967 
4968 	t = get_tracer_for_array(tr, trace_types);
4969 	for (; t && l < *pos; t = t_next(m, t, &l))
4970 			;
4971 
4972 	return t;
4973 }
4974 
4975 static void t_stop(struct seq_file *m, void *p)
4976 {
4977 	mutex_unlock(&trace_types_lock);
4978 }
4979 
4980 static int t_show(struct seq_file *m, void *v)
4981 {
4982 	struct tracer *t = v;
4983 
4984 	if (!t)
4985 		return 0;
4986 
4987 	seq_puts(m, t->name);
4988 	if (t->next)
4989 		seq_putc(m, ' ');
4990 	else
4991 		seq_putc(m, '\n');
4992 
4993 	return 0;
4994 }
4995 
4996 static const struct seq_operations show_traces_seq_ops = {
4997 	.start		= t_start,
4998 	.next		= t_next,
4999 	.stop		= t_stop,
5000 	.show		= t_show,
5001 };
5002 
5003 static int show_traces_open(struct inode *inode, struct file *file)
5004 {
5005 	struct trace_array *tr = inode->i_private;
5006 	struct seq_file *m;
5007 	int ret;
5008 
5009 	ret = tracing_check_open_get_tr(tr);
5010 	if (ret)
5011 		return ret;
5012 
5013 	ret = seq_open(file, &show_traces_seq_ops);
5014 	if (ret) {
5015 		trace_array_put(tr);
5016 		return ret;
5017 	}
5018 
5019 	m = file->private_data;
5020 	m->private = tr;
5021 
5022 	return 0;
5023 }
5024 
5025 static int show_traces_release(struct inode *inode, struct file *file)
5026 {
5027 	struct trace_array *tr = inode->i_private;
5028 
5029 	trace_array_put(tr);
5030 	return seq_release(inode, file);
5031 }
5032 
5033 static ssize_t
5034 tracing_write_stub(struct file *filp, const char __user *ubuf,
5035 		   size_t count, loff_t *ppos)
5036 {
5037 	return count;
5038 }
5039 
5040 loff_t tracing_lseek(struct file *file, loff_t offset, int whence)
5041 {
5042 	int ret;
5043 
5044 	if (file->f_mode & FMODE_READ)
5045 		ret = seq_lseek(file, offset, whence);
5046 	else
5047 		file->f_pos = ret = 0;
5048 
5049 	return ret;
5050 }
5051 
5052 static const struct file_operations tracing_fops = {
5053 	.open		= tracing_open,
5054 	.read		= seq_read,
5055 	.write		= tracing_write_stub,
5056 	.llseek		= tracing_lseek,
5057 	.release	= tracing_release,
5058 };
5059 
5060 static const struct file_operations show_traces_fops = {
5061 	.open		= show_traces_open,
5062 	.read		= seq_read,
5063 	.llseek		= seq_lseek,
5064 	.release	= show_traces_release,
5065 };
5066 
5067 static ssize_t
5068 tracing_cpumask_read(struct file *filp, char __user *ubuf,
5069 		     size_t count, loff_t *ppos)
5070 {
5071 	struct trace_array *tr = file_inode(filp)->i_private;
5072 	char *mask_str;
5073 	int len;
5074 
5075 	len = snprintf(NULL, 0, "%*pb\n",
5076 		       cpumask_pr_args(tr->tracing_cpumask)) + 1;
5077 	mask_str = kmalloc(len, GFP_KERNEL);
5078 	if (!mask_str)
5079 		return -ENOMEM;
5080 
5081 	len = snprintf(mask_str, len, "%*pb\n",
5082 		       cpumask_pr_args(tr->tracing_cpumask));
5083 	if (len >= count) {
5084 		count = -EINVAL;
5085 		goto out_err;
5086 	}
5087 	count = simple_read_from_buffer(ubuf, count, ppos, mask_str, len);
5088 
5089 out_err:
5090 	kfree(mask_str);
5091 
5092 	return count;
5093 }
5094 
5095 int tracing_set_cpumask(struct trace_array *tr,
5096 			cpumask_var_t tracing_cpumask_new)
5097 {
5098 	int cpu;
5099 
5100 	if (!tr)
5101 		return -EINVAL;
5102 
5103 	local_irq_disable();
5104 	arch_spin_lock(&tr->max_lock);
5105 	for_each_tracing_cpu(cpu) {
5106 		/*
5107 		 * Increase/decrease the disabled counter if we are
5108 		 * about to flip a bit in the cpumask:
5109 		 */
5110 		if (cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
5111 				!cpumask_test_cpu(cpu, tracing_cpumask_new)) {
5112 			atomic_inc(&per_cpu_ptr(tr->array_buffer.data, cpu)->disabled);
5113 			ring_buffer_record_disable_cpu(tr->array_buffer.buffer, cpu);
5114 		}
5115 		if (!cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
5116 				cpumask_test_cpu(cpu, tracing_cpumask_new)) {
5117 			atomic_dec(&per_cpu_ptr(tr->array_buffer.data, cpu)->disabled);
5118 			ring_buffer_record_enable_cpu(tr->array_buffer.buffer, cpu);
5119 		}
5120 	}
5121 	arch_spin_unlock(&tr->max_lock);
5122 	local_irq_enable();
5123 
5124 	cpumask_copy(tr->tracing_cpumask, tracing_cpumask_new);
5125 
5126 	return 0;
5127 }
5128 
5129 static ssize_t
5130 tracing_cpumask_write(struct file *filp, const char __user *ubuf,
5131 		      size_t count, loff_t *ppos)
5132 {
5133 	struct trace_array *tr = file_inode(filp)->i_private;
5134 	cpumask_var_t tracing_cpumask_new;
5135 	int err;
5136 
5137 	if (!zalloc_cpumask_var(&tracing_cpumask_new, GFP_KERNEL))
5138 		return -ENOMEM;
5139 
5140 	err = cpumask_parse_user(ubuf, count, tracing_cpumask_new);
5141 	if (err)
5142 		goto err_free;
5143 
5144 	err = tracing_set_cpumask(tr, tracing_cpumask_new);
5145 	if (err)
5146 		goto err_free;
5147 
5148 	free_cpumask_var(tracing_cpumask_new);
5149 
5150 	return count;
5151 
5152 err_free:
5153 	free_cpumask_var(tracing_cpumask_new);
5154 
5155 	return err;
5156 }
5157 
5158 static const struct file_operations tracing_cpumask_fops = {
5159 	.open		= tracing_open_generic_tr,
5160 	.read		= tracing_cpumask_read,
5161 	.write		= tracing_cpumask_write,
5162 	.release	= tracing_release_generic_tr,
5163 	.llseek		= generic_file_llseek,
5164 };
5165 
5166 static int tracing_trace_options_show(struct seq_file *m, void *v)
5167 {
5168 	struct tracer_opt *trace_opts;
5169 	struct trace_array *tr = m->private;
5170 	u32 tracer_flags;
5171 	int i;
5172 
5173 	mutex_lock(&trace_types_lock);
5174 	tracer_flags = tr->current_trace->flags->val;
5175 	trace_opts = tr->current_trace->flags->opts;
5176 
5177 	for (i = 0; trace_options[i]; i++) {
5178 		if (tr->trace_flags & (1 << i))
5179 			seq_printf(m, "%s\n", trace_options[i]);
5180 		else
5181 			seq_printf(m, "no%s\n", trace_options[i]);
5182 	}
5183 
5184 	for (i = 0; trace_opts[i].name; i++) {
5185 		if (tracer_flags & trace_opts[i].bit)
5186 			seq_printf(m, "%s\n", trace_opts[i].name);
5187 		else
5188 			seq_printf(m, "no%s\n", trace_opts[i].name);
5189 	}
5190 	mutex_unlock(&trace_types_lock);
5191 
5192 	return 0;
5193 }
5194 
5195 static int __set_tracer_option(struct trace_array *tr,
5196 			       struct tracer_flags *tracer_flags,
5197 			       struct tracer_opt *opts, int neg)
5198 {
5199 	struct tracer *trace = tracer_flags->trace;
5200 	int ret;
5201 
5202 	ret = trace->set_flag(tr, tracer_flags->val, opts->bit, !neg);
5203 	if (ret)
5204 		return ret;
5205 
5206 	if (neg)
5207 		tracer_flags->val &= ~opts->bit;
5208 	else
5209 		tracer_flags->val |= opts->bit;
5210 	return 0;
5211 }
5212 
5213 /* Try to assign a tracer specific option */
5214 static int set_tracer_option(struct trace_array *tr, char *cmp, int neg)
5215 {
5216 	struct tracer *trace = tr->current_trace;
5217 	struct tracer_flags *tracer_flags = trace->flags;
5218 	struct tracer_opt *opts = NULL;
5219 	int i;
5220 
5221 	for (i = 0; tracer_flags->opts[i].name; i++) {
5222 		opts = &tracer_flags->opts[i];
5223 
5224 		if (strcmp(cmp, opts->name) == 0)
5225 			return __set_tracer_option(tr, trace->flags, opts, neg);
5226 	}
5227 
5228 	return -EINVAL;
5229 }
5230 
5231 /* Some tracers require overwrite to stay enabled */
5232 int trace_keep_overwrite(struct tracer *tracer, u32 mask, int set)
5233 {
5234 	if (tracer->enabled && (mask & TRACE_ITER_OVERWRITE) && !set)
5235 		return -1;
5236 
5237 	return 0;
5238 }
5239 
5240 int set_tracer_flag(struct trace_array *tr, unsigned int mask, int enabled)
5241 {
5242 	int *map;
5243 
5244 	if ((mask == TRACE_ITER_RECORD_TGID) ||
5245 	    (mask == TRACE_ITER_RECORD_CMD))
5246 		lockdep_assert_held(&event_mutex);
5247 
5248 	/* do nothing if flag is already set */
5249 	if (!!(tr->trace_flags & mask) == !!enabled)
5250 		return 0;
5251 
5252 	/* Give the tracer a chance to approve the change */
5253 	if (tr->current_trace->flag_changed)
5254 		if (tr->current_trace->flag_changed(tr, mask, !!enabled))
5255 			return -EINVAL;
5256 
5257 	if (enabled)
5258 		tr->trace_flags |= mask;
5259 	else
5260 		tr->trace_flags &= ~mask;
5261 
5262 	if (mask == TRACE_ITER_RECORD_CMD)
5263 		trace_event_enable_cmd_record(enabled);
5264 
5265 	if (mask == TRACE_ITER_RECORD_TGID) {
5266 		if (!tgid_map) {
5267 			tgid_map_max = pid_max;
5268 			map = kvcalloc(tgid_map_max + 1, sizeof(*tgid_map),
5269 				       GFP_KERNEL);
5270 
5271 			/*
5272 			 * Pairs with smp_load_acquire() in
5273 			 * trace_find_tgid_ptr() to ensure that if it observes
5274 			 * the tgid_map we just allocated then it also observes
5275 			 * the corresponding tgid_map_max value.
5276 			 */
5277 			smp_store_release(&tgid_map, map);
5278 		}
5279 		if (!tgid_map) {
5280 			tr->trace_flags &= ~TRACE_ITER_RECORD_TGID;
5281 			return -ENOMEM;
5282 		}
5283 
5284 		trace_event_enable_tgid_record(enabled);
5285 	}
5286 
5287 	if (mask == TRACE_ITER_EVENT_FORK)
5288 		trace_event_follow_fork(tr, enabled);
5289 
5290 	if (mask == TRACE_ITER_FUNC_FORK)
5291 		ftrace_pid_follow_fork(tr, enabled);
5292 
5293 	if (mask == TRACE_ITER_OVERWRITE) {
5294 		ring_buffer_change_overwrite(tr->array_buffer.buffer, enabled);
5295 #ifdef CONFIG_TRACER_MAX_TRACE
5296 		ring_buffer_change_overwrite(tr->max_buffer.buffer, enabled);
5297 #endif
5298 	}
5299 
5300 	if (mask == TRACE_ITER_PRINTK) {
5301 		trace_printk_start_stop_comm(enabled);
5302 		trace_printk_control(enabled);
5303 	}
5304 
5305 	return 0;
5306 }
5307 
5308 int trace_set_options(struct trace_array *tr, char *option)
5309 {
5310 	char *cmp;
5311 	int neg = 0;
5312 	int ret;
5313 	size_t orig_len = strlen(option);
5314 	int len;
5315 
5316 	cmp = strstrip(option);
5317 
5318 	len = str_has_prefix(cmp, "no");
5319 	if (len)
5320 		neg = 1;
5321 
5322 	cmp += len;
5323 
5324 	mutex_lock(&event_mutex);
5325 	mutex_lock(&trace_types_lock);
5326 
5327 	ret = match_string(trace_options, -1, cmp);
5328 	/* If no option could be set, test the specific tracer options */
5329 	if (ret < 0)
5330 		ret = set_tracer_option(tr, cmp, neg);
5331 	else
5332 		ret = set_tracer_flag(tr, 1 << ret, !neg);
5333 
5334 	mutex_unlock(&trace_types_lock);
5335 	mutex_unlock(&event_mutex);
5336 
5337 	/*
5338 	 * If the first trailing whitespace is replaced with '\0' by strstrip,
5339 	 * turn it back into a space.
5340 	 */
5341 	if (orig_len > strlen(option))
5342 		option[strlen(option)] = ' ';
5343 
5344 	return ret;
5345 }
5346 
5347 static void __init apply_trace_boot_options(void)
5348 {
5349 	char *buf = trace_boot_options_buf;
5350 	char *option;
5351 
5352 	while (true) {
5353 		option = strsep(&buf, ",");
5354 
5355 		if (!option)
5356 			break;
5357 
5358 		if (*option)
5359 			trace_set_options(&global_trace, option);
5360 
5361 		/* Put back the comma to allow this to be called again */
5362 		if (buf)
5363 			*(buf - 1) = ',';
5364 	}
5365 }
5366 
5367 static ssize_t
5368 tracing_trace_options_write(struct file *filp, const char __user *ubuf,
5369 			size_t cnt, loff_t *ppos)
5370 {
5371 	struct seq_file *m = filp->private_data;
5372 	struct trace_array *tr = m->private;
5373 	char buf[64];
5374 	int ret;
5375 
5376 	if (cnt >= sizeof(buf))
5377 		return -EINVAL;
5378 
5379 	if (copy_from_user(buf, ubuf, cnt))
5380 		return -EFAULT;
5381 
5382 	buf[cnt] = 0;
5383 
5384 	ret = trace_set_options(tr, buf);
5385 	if (ret < 0)
5386 		return ret;
5387 
5388 	*ppos += cnt;
5389 
5390 	return cnt;
5391 }
5392 
5393 static int tracing_trace_options_open(struct inode *inode, struct file *file)
5394 {
5395 	struct trace_array *tr = inode->i_private;
5396 	int ret;
5397 
5398 	ret = tracing_check_open_get_tr(tr);
5399 	if (ret)
5400 		return ret;
5401 
5402 	ret = single_open(file, tracing_trace_options_show, inode->i_private);
5403 	if (ret < 0)
5404 		trace_array_put(tr);
5405 
5406 	return ret;
5407 }
5408 
5409 static const struct file_operations tracing_iter_fops = {
5410 	.open		= tracing_trace_options_open,
5411 	.read		= seq_read,
5412 	.llseek		= seq_lseek,
5413 	.release	= tracing_single_release_tr,
5414 	.write		= tracing_trace_options_write,
5415 };
5416 
5417 static const char readme_msg[] =
5418 	"tracing mini-HOWTO:\n\n"
5419 	"# echo 0 > tracing_on : quick way to disable tracing\n"
5420 	"# echo 1 > tracing_on : quick way to re-enable tracing\n\n"
5421 	" Important files:\n"
5422 	"  trace\t\t\t- The static contents of the buffer\n"
5423 	"\t\t\t  To clear the buffer write into this file: echo > trace\n"
5424 	"  trace_pipe\t\t- A consuming read to see the contents of the buffer\n"
5425 	"  current_tracer\t- function and latency tracers\n"
5426 	"  available_tracers\t- list of configured tracers for current_tracer\n"
5427 	"  error_log\t- error log for failed commands (that support it)\n"
5428 	"  buffer_size_kb\t- view and modify size of per cpu buffer\n"
5429 	"  buffer_total_size_kb  - view total size of all cpu buffers\n\n"
5430 	"  trace_clock\t\t-change the clock used to order events\n"
5431 	"       local:   Per cpu clock but may not be synced across CPUs\n"
5432 	"      global:   Synced across CPUs but slows tracing down.\n"
5433 	"     counter:   Not a clock, but just an increment\n"
5434 	"      uptime:   Jiffy counter from time of boot\n"
5435 	"        perf:   Same clock that perf events use\n"
5436 #ifdef CONFIG_X86_64
5437 	"     x86-tsc:   TSC cycle counter\n"
5438 #endif
5439 	"\n  timestamp_mode\t-view the mode used to timestamp events\n"
5440 	"       delta:   Delta difference against a buffer-wide timestamp\n"
5441 	"    absolute:   Absolute (standalone) timestamp\n"
5442 	"\n  trace_marker\t\t- Writes into this file writes into the kernel buffer\n"
5443 	"\n  trace_marker_raw\t\t- Writes into this file writes binary data into the kernel buffer\n"
5444 	"  tracing_cpumask\t- Limit which CPUs to trace\n"
5445 	"  instances\t\t- Make sub-buffers with: mkdir instances/foo\n"
5446 	"\t\t\t  Remove sub-buffer with rmdir\n"
5447 	"  trace_options\t\t- Set format or modify how tracing happens\n"
5448 	"\t\t\t  Disable an option by prefixing 'no' to the\n"
5449 	"\t\t\t  option name\n"
5450 	"  saved_cmdlines_size\t- echo command number in here to store comm-pid list\n"
5451 #ifdef CONFIG_DYNAMIC_FTRACE
5452 	"\n  available_filter_functions - list of functions that can be filtered on\n"
5453 	"  set_ftrace_filter\t- echo function name in here to only trace these\n"
5454 	"\t\t\t  functions\n"
5455 	"\t     accepts: func_full_name or glob-matching-pattern\n"
5456 	"\t     modules: Can select a group via module\n"
5457 	"\t      Format: :mod:<module-name>\n"
5458 	"\t     example: echo :mod:ext3 > set_ftrace_filter\n"
5459 	"\t    triggers: a command to perform when function is hit\n"
5460 	"\t      Format: <function>:<trigger>[:count]\n"
5461 	"\t     trigger: traceon, traceoff\n"
5462 	"\t\t      enable_event:<system>:<event>\n"
5463 	"\t\t      disable_event:<system>:<event>\n"
5464 #ifdef CONFIG_STACKTRACE
5465 	"\t\t      stacktrace\n"
5466 #endif
5467 #ifdef CONFIG_TRACER_SNAPSHOT
5468 	"\t\t      snapshot\n"
5469 #endif
5470 	"\t\t      dump\n"
5471 	"\t\t      cpudump\n"
5472 	"\t     example: echo do_fault:traceoff > set_ftrace_filter\n"
5473 	"\t              echo do_trap:traceoff:3 > set_ftrace_filter\n"
5474 	"\t     The first one will disable tracing every time do_fault is hit\n"
5475 	"\t     The second will disable tracing at most 3 times when do_trap is hit\n"
5476 	"\t       The first time do trap is hit and it disables tracing, the\n"
5477 	"\t       counter will decrement to 2. If tracing is already disabled,\n"
5478 	"\t       the counter will not decrement. It only decrements when the\n"
5479 	"\t       trigger did work\n"
5480 	"\t     To remove trigger without count:\n"
5481 	"\t       echo '!<function>:<trigger> > set_ftrace_filter\n"
5482 	"\t     To remove trigger with a count:\n"
5483 	"\t       echo '!<function>:<trigger>:0 > set_ftrace_filter\n"
5484 	"  set_ftrace_notrace\t- echo function name in here to never trace.\n"
5485 	"\t    accepts: func_full_name, *func_end, func_begin*, *func_middle*\n"
5486 	"\t    modules: Can select a group via module command :mod:\n"
5487 	"\t    Does not accept triggers\n"
5488 #endif /* CONFIG_DYNAMIC_FTRACE */
5489 #ifdef CONFIG_FUNCTION_TRACER
5490 	"  set_ftrace_pid\t- Write pid(s) to only function trace those pids\n"
5491 	"\t\t    (function)\n"
5492 	"  set_ftrace_notrace_pid\t- Write pid(s) to not function trace those pids\n"
5493 	"\t\t    (function)\n"
5494 #endif
5495 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
5496 	"  set_graph_function\t- Trace the nested calls of a function (function_graph)\n"
5497 	"  set_graph_notrace\t- Do not trace the nested calls of a function (function_graph)\n"
5498 	"  max_graph_depth\t- Trace a limited depth of nested calls (0 is unlimited)\n"
5499 #endif
5500 #ifdef CONFIG_TRACER_SNAPSHOT
5501 	"\n  snapshot\t\t- Like 'trace' but shows the content of the static\n"
5502 	"\t\t\t  snapshot buffer. Read the contents for more\n"
5503 	"\t\t\t  information\n"
5504 #endif
5505 #ifdef CONFIG_STACK_TRACER
5506 	"  stack_trace\t\t- Shows the max stack trace when active\n"
5507 	"  stack_max_size\t- Shows current max stack size that was traced\n"
5508 	"\t\t\t  Write into this file to reset the max size (trigger a\n"
5509 	"\t\t\t  new trace)\n"
5510 #ifdef CONFIG_DYNAMIC_FTRACE
5511 	"  stack_trace_filter\t- Like set_ftrace_filter but limits what stack_trace\n"
5512 	"\t\t\t  traces\n"
5513 #endif
5514 #endif /* CONFIG_STACK_TRACER */
5515 #ifdef CONFIG_DYNAMIC_EVENTS
5516 	"  dynamic_events\t\t- Create/append/remove/show the generic dynamic events\n"
5517 	"\t\t\t  Write into this file to define/undefine new trace events.\n"
5518 #endif
5519 #ifdef CONFIG_KPROBE_EVENTS
5520 	"  kprobe_events\t\t- Create/append/remove/show the kernel dynamic events\n"
5521 	"\t\t\t  Write into this file to define/undefine new trace events.\n"
5522 #endif
5523 #ifdef CONFIG_UPROBE_EVENTS
5524 	"  uprobe_events\t\t- Create/append/remove/show the userspace dynamic events\n"
5525 	"\t\t\t  Write into this file to define/undefine new trace events.\n"
5526 #endif
5527 #if defined(CONFIG_KPROBE_EVENTS) || defined(CONFIG_UPROBE_EVENTS)
5528 	"\t  accepts: event-definitions (one definition per line)\n"
5529 	"\t   Format: p[:[<group>/]<event>] <place> [<args>]\n"
5530 	"\t           r[maxactive][:[<group>/]<event>] <place> [<args>]\n"
5531 #ifdef CONFIG_HIST_TRIGGERS
5532 	"\t           s:[synthetic/]<event> <field> [<field>]\n"
5533 #endif
5534 	"\t           e[:[<group>/]<event>] <attached-group>.<attached-event> [<args>]\n"
5535 	"\t           -:[<group>/]<event>\n"
5536 #ifdef CONFIG_KPROBE_EVENTS
5537 	"\t    place: [<module>:]<symbol>[+<offset>]|<memaddr>\n"
5538   "place (kretprobe): [<module>:]<symbol>[+<offset>]%return|<memaddr>\n"
5539 #endif
5540 #ifdef CONFIG_UPROBE_EVENTS
5541   "   place (uprobe): <path>:<offset>[%return][(ref_ctr_offset)]\n"
5542 #endif
5543 	"\t     args: <name>=fetcharg[:type]\n"
5544 	"\t fetcharg: (%<register>|$<efield>), @<address>, @<symbol>[+|-<offset>],\n"
5545 #ifdef CONFIG_HAVE_FUNCTION_ARG_ACCESS_API
5546 	"\t           $stack<index>, $stack, $retval, $comm, $arg<N>,\n"
5547 #else
5548 	"\t           $stack<index>, $stack, $retval, $comm,\n"
5549 #endif
5550 	"\t           +|-[u]<offset>(<fetcharg>), \\imm-value, \\\"imm-string\"\n"
5551 	"\t     type: s8/16/32/64, u8/16/32/64, x8/16/32/64, string, symbol,\n"
5552 	"\t           b<bit-width>@<bit-offset>/<container-size>, ustring,\n"
5553 	"\t           <type>\\[<array-size>\\]\n"
5554 #ifdef CONFIG_HIST_TRIGGERS
5555 	"\t    field: <stype> <name>;\n"
5556 	"\t    stype: u8/u16/u32/u64, s8/s16/s32/s64, pid_t,\n"
5557 	"\t           [unsigned] char/int/long\n"
5558 #endif
5559 	"\t    efield: For event probes ('e' types), the field is on of the fields\n"
5560 	"\t            of the <attached-group>/<attached-event>.\n"
5561 #endif
5562 	"  events/\t\t- Directory containing all trace event subsystems:\n"
5563 	"      enable\t\t- Write 0/1 to enable/disable tracing of all events\n"
5564 	"  events/<system>/\t- Directory containing all trace events for <system>:\n"
5565 	"      enable\t\t- Write 0/1 to enable/disable tracing of all <system>\n"
5566 	"\t\t\t  events\n"
5567 	"      filter\t\t- If set, only events passing filter are traced\n"
5568 	"  events/<system>/<event>/\t- Directory containing control files for\n"
5569 	"\t\t\t  <event>:\n"
5570 	"      enable\t\t- Write 0/1 to enable/disable tracing of <event>\n"
5571 	"      filter\t\t- If set, only events passing filter are traced\n"
5572 	"      trigger\t\t- If set, a command to perform when event is hit\n"
5573 	"\t    Format: <trigger>[:count][if <filter>]\n"
5574 	"\t   trigger: traceon, traceoff\n"
5575 	"\t            enable_event:<system>:<event>\n"
5576 	"\t            disable_event:<system>:<event>\n"
5577 #ifdef CONFIG_HIST_TRIGGERS
5578 	"\t            enable_hist:<system>:<event>\n"
5579 	"\t            disable_hist:<system>:<event>\n"
5580 #endif
5581 #ifdef CONFIG_STACKTRACE
5582 	"\t\t    stacktrace\n"
5583 #endif
5584 #ifdef CONFIG_TRACER_SNAPSHOT
5585 	"\t\t    snapshot\n"
5586 #endif
5587 #ifdef CONFIG_HIST_TRIGGERS
5588 	"\t\t    hist (see below)\n"
5589 #endif
5590 	"\t   example: echo traceoff > events/block/block_unplug/trigger\n"
5591 	"\t            echo traceoff:3 > events/block/block_unplug/trigger\n"
5592 	"\t            echo 'enable_event:kmem:kmalloc:3 if nr_rq > 1' > \\\n"
5593 	"\t                  events/block/block_unplug/trigger\n"
5594 	"\t   The first disables tracing every time block_unplug is hit.\n"
5595 	"\t   The second disables tracing the first 3 times block_unplug is hit.\n"
5596 	"\t   The third enables the kmalloc event the first 3 times block_unplug\n"
5597 	"\t     is hit and has value of greater than 1 for the 'nr_rq' event field.\n"
5598 	"\t   Like function triggers, the counter is only decremented if it\n"
5599 	"\t    enabled or disabled tracing.\n"
5600 	"\t   To remove a trigger without a count:\n"
5601 	"\t     echo '!<trigger> > <system>/<event>/trigger\n"
5602 	"\t   To remove a trigger with a count:\n"
5603 	"\t     echo '!<trigger>:0 > <system>/<event>/trigger\n"
5604 	"\t   Filters can be ignored when removing a trigger.\n"
5605 #ifdef CONFIG_HIST_TRIGGERS
5606 	"      hist trigger\t- If set, event hits are aggregated into a hash table\n"
5607 	"\t    Format: hist:keys=<field1[,field2,...]>\n"
5608 	"\t            [:values=<field1[,field2,...]>]\n"
5609 	"\t            [:sort=<field1[,field2,...]>]\n"
5610 	"\t            [:size=#entries]\n"
5611 	"\t            [:pause][:continue][:clear]\n"
5612 	"\t            [:name=histname1]\n"
5613 	"\t            [:<handler>.<action>]\n"
5614 	"\t            [if <filter>]\n\n"
5615 	"\t    Note, special fields can be used as well:\n"
5616 	"\t            common_timestamp - to record current timestamp\n"
5617 	"\t            common_cpu - to record the CPU the event happened on\n"
5618 	"\n"
5619 	"\t    When a matching event is hit, an entry is added to a hash\n"
5620 	"\t    table using the key(s) and value(s) named, and the value of a\n"
5621 	"\t    sum called 'hitcount' is incremented.  Keys and values\n"
5622 	"\t    correspond to fields in the event's format description.  Keys\n"
5623 	"\t    can be any field, or the special string 'stacktrace'.\n"
5624 	"\t    Compound keys consisting of up to two fields can be specified\n"
5625 	"\t    by the 'keys' keyword.  Values must correspond to numeric\n"
5626 	"\t    fields.  Sort keys consisting of up to two fields can be\n"
5627 	"\t    specified using the 'sort' keyword.  The sort direction can\n"
5628 	"\t    be modified by appending '.descending' or '.ascending' to a\n"
5629 	"\t    sort field.  The 'size' parameter can be used to specify more\n"
5630 	"\t    or fewer than the default 2048 entries for the hashtable size.\n"
5631 	"\t    If a hist trigger is given a name using the 'name' parameter,\n"
5632 	"\t    its histogram data will be shared with other triggers of the\n"
5633 	"\t    same name, and trigger hits will update this common data.\n\n"
5634 	"\t    Reading the 'hist' file for the event will dump the hash\n"
5635 	"\t    table in its entirety to stdout.  If there are multiple hist\n"
5636 	"\t    triggers attached to an event, there will be a table for each\n"
5637 	"\t    trigger in the output.  The table displayed for a named\n"
5638 	"\t    trigger will be the same as any other instance having the\n"
5639 	"\t    same name.  The default format used to display a given field\n"
5640 	"\t    can be modified by appending any of the following modifiers\n"
5641 	"\t    to the field name, as applicable:\n\n"
5642 	"\t            .hex        display a number as a hex value\n"
5643 	"\t            .sym        display an address as a symbol\n"
5644 	"\t            .sym-offset display an address as a symbol and offset\n"
5645 	"\t            .execname   display a common_pid as a program name\n"
5646 	"\t            .syscall    display a syscall id as a syscall name\n"
5647 	"\t            .log2       display log2 value rather than raw number\n"
5648 	"\t            .buckets=size  display values in groups of size rather than raw number\n"
5649 	"\t            .usecs      display a common_timestamp in microseconds\n\n"
5650 	"\t    The 'pause' parameter can be used to pause an existing hist\n"
5651 	"\t    trigger or to start a hist trigger but not log any events\n"
5652 	"\t    until told to do so.  'continue' can be used to start or\n"
5653 	"\t    restart a paused hist trigger.\n\n"
5654 	"\t    The 'clear' parameter will clear the contents of a running\n"
5655 	"\t    hist trigger and leave its current paused/active state\n"
5656 	"\t    unchanged.\n\n"
5657 	"\t    The enable_hist and disable_hist triggers can be used to\n"
5658 	"\t    have one event conditionally start and stop another event's\n"
5659 	"\t    already-attached hist trigger.  The syntax is analogous to\n"
5660 	"\t    the enable_event and disable_event triggers.\n\n"
5661 	"\t    Hist trigger handlers and actions are executed whenever a\n"
5662 	"\t    a histogram entry is added or updated.  They take the form:\n\n"
5663 	"\t        <handler>.<action>\n\n"
5664 	"\t    The available handlers are:\n\n"
5665 	"\t        onmatch(matching.event)  - invoke on addition or update\n"
5666 	"\t        onmax(var)               - invoke if var exceeds current max\n"
5667 	"\t        onchange(var)            - invoke action if var changes\n\n"
5668 	"\t    The available actions are:\n\n"
5669 	"\t        trace(<synthetic_event>,param list)  - generate synthetic event\n"
5670 	"\t        save(field,...)                      - save current event fields\n"
5671 #ifdef CONFIG_TRACER_SNAPSHOT
5672 	"\t        snapshot()                           - snapshot the trace buffer\n\n"
5673 #endif
5674 #ifdef CONFIG_SYNTH_EVENTS
5675 	"  events/synthetic_events\t- Create/append/remove/show synthetic events\n"
5676 	"\t  Write into this file to define/undefine new synthetic events.\n"
5677 	"\t     example: echo 'myevent u64 lat; char name[]' >> synthetic_events\n"
5678 #endif
5679 #endif
5680 ;
5681 
5682 static ssize_t
5683 tracing_readme_read(struct file *filp, char __user *ubuf,
5684 		       size_t cnt, loff_t *ppos)
5685 {
5686 	return simple_read_from_buffer(ubuf, cnt, ppos,
5687 					readme_msg, strlen(readme_msg));
5688 }
5689 
5690 static const struct file_operations tracing_readme_fops = {
5691 	.open		= tracing_open_generic,
5692 	.read		= tracing_readme_read,
5693 	.llseek		= generic_file_llseek,
5694 };
5695 
5696 static void *saved_tgids_next(struct seq_file *m, void *v, loff_t *pos)
5697 {
5698 	int pid = ++(*pos);
5699 
5700 	return trace_find_tgid_ptr(pid);
5701 }
5702 
5703 static void *saved_tgids_start(struct seq_file *m, loff_t *pos)
5704 {
5705 	int pid = *pos;
5706 
5707 	return trace_find_tgid_ptr(pid);
5708 }
5709 
5710 static void saved_tgids_stop(struct seq_file *m, void *v)
5711 {
5712 }
5713 
5714 static int saved_tgids_show(struct seq_file *m, void *v)
5715 {
5716 	int *entry = (int *)v;
5717 	int pid = entry - tgid_map;
5718 	int tgid = *entry;
5719 
5720 	if (tgid == 0)
5721 		return SEQ_SKIP;
5722 
5723 	seq_printf(m, "%d %d\n", pid, tgid);
5724 	return 0;
5725 }
5726 
5727 static const struct seq_operations tracing_saved_tgids_seq_ops = {
5728 	.start		= saved_tgids_start,
5729 	.stop		= saved_tgids_stop,
5730 	.next		= saved_tgids_next,
5731 	.show		= saved_tgids_show,
5732 };
5733 
5734 static int tracing_saved_tgids_open(struct inode *inode, struct file *filp)
5735 {
5736 	int ret;
5737 
5738 	ret = tracing_check_open_get_tr(NULL);
5739 	if (ret)
5740 		return ret;
5741 
5742 	return seq_open(filp, &tracing_saved_tgids_seq_ops);
5743 }
5744 
5745 
5746 static const struct file_operations tracing_saved_tgids_fops = {
5747 	.open		= tracing_saved_tgids_open,
5748 	.read		= seq_read,
5749 	.llseek		= seq_lseek,
5750 	.release	= seq_release,
5751 };
5752 
5753 static void *saved_cmdlines_next(struct seq_file *m, void *v, loff_t *pos)
5754 {
5755 	unsigned int *ptr = v;
5756 
5757 	if (*pos || m->count)
5758 		ptr++;
5759 
5760 	(*pos)++;
5761 
5762 	for (; ptr < &savedcmd->map_cmdline_to_pid[savedcmd->cmdline_num];
5763 	     ptr++) {
5764 		if (*ptr == -1 || *ptr == NO_CMDLINE_MAP)
5765 			continue;
5766 
5767 		return ptr;
5768 	}
5769 
5770 	return NULL;
5771 }
5772 
5773 static void *saved_cmdlines_start(struct seq_file *m, loff_t *pos)
5774 {
5775 	void *v;
5776 	loff_t l = 0;
5777 
5778 	preempt_disable();
5779 	arch_spin_lock(&trace_cmdline_lock);
5780 
5781 	v = &savedcmd->map_cmdline_to_pid[0];
5782 	while (l <= *pos) {
5783 		v = saved_cmdlines_next(m, v, &l);
5784 		if (!v)
5785 			return NULL;
5786 	}
5787 
5788 	return v;
5789 }
5790 
5791 static void saved_cmdlines_stop(struct seq_file *m, void *v)
5792 {
5793 	arch_spin_unlock(&trace_cmdline_lock);
5794 	preempt_enable();
5795 }
5796 
5797 static int saved_cmdlines_show(struct seq_file *m, void *v)
5798 {
5799 	char buf[TASK_COMM_LEN];
5800 	unsigned int *pid = v;
5801 
5802 	__trace_find_cmdline(*pid, buf);
5803 	seq_printf(m, "%d %s\n", *pid, buf);
5804 	return 0;
5805 }
5806 
5807 static const struct seq_operations tracing_saved_cmdlines_seq_ops = {
5808 	.start		= saved_cmdlines_start,
5809 	.next		= saved_cmdlines_next,
5810 	.stop		= saved_cmdlines_stop,
5811 	.show		= saved_cmdlines_show,
5812 };
5813 
5814 static int tracing_saved_cmdlines_open(struct inode *inode, struct file *filp)
5815 {
5816 	int ret;
5817 
5818 	ret = tracing_check_open_get_tr(NULL);
5819 	if (ret)
5820 		return ret;
5821 
5822 	return seq_open(filp, &tracing_saved_cmdlines_seq_ops);
5823 }
5824 
5825 static const struct file_operations tracing_saved_cmdlines_fops = {
5826 	.open		= tracing_saved_cmdlines_open,
5827 	.read		= seq_read,
5828 	.llseek		= seq_lseek,
5829 	.release	= seq_release,
5830 };
5831 
5832 static ssize_t
5833 tracing_saved_cmdlines_size_read(struct file *filp, char __user *ubuf,
5834 				 size_t cnt, loff_t *ppos)
5835 {
5836 	char buf[64];
5837 	int r;
5838 
5839 	arch_spin_lock(&trace_cmdline_lock);
5840 	r = scnprintf(buf, sizeof(buf), "%u\n", savedcmd->cmdline_num);
5841 	arch_spin_unlock(&trace_cmdline_lock);
5842 
5843 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
5844 }
5845 
5846 static void free_saved_cmdlines_buffer(struct saved_cmdlines_buffer *s)
5847 {
5848 	kfree(s->saved_cmdlines);
5849 	kfree(s->map_cmdline_to_pid);
5850 	kfree(s);
5851 }
5852 
5853 static int tracing_resize_saved_cmdlines(unsigned int val)
5854 {
5855 	struct saved_cmdlines_buffer *s, *savedcmd_temp;
5856 
5857 	s = kmalloc(sizeof(*s), GFP_KERNEL);
5858 	if (!s)
5859 		return -ENOMEM;
5860 
5861 	if (allocate_cmdlines_buffer(val, s) < 0) {
5862 		kfree(s);
5863 		return -ENOMEM;
5864 	}
5865 
5866 	arch_spin_lock(&trace_cmdline_lock);
5867 	savedcmd_temp = savedcmd;
5868 	savedcmd = s;
5869 	arch_spin_unlock(&trace_cmdline_lock);
5870 	free_saved_cmdlines_buffer(savedcmd_temp);
5871 
5872 	return 0;
5873 }
5874 
5875 static ssize_t
5876 tracing_saved_cmdlines_size_write(struct file *filp, const char __user *ubuf,
5877 				  size_t cnt, loff_t *ppos)
5878 {
5879 	unsigned long val;
5880 	int ret;
5881 
5882 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
5883 	if (ret)
5884 		return ret;
5885 
5886 	/* must have at least 1 entry or less than PID_MAX_DEFAULT */
5887 	if (!val || val > PID_MAX_DEFAULT)
5888 		return -EINVAL;
5889 
5890 	ret = tracing_resize_saved_cmdlines((unsigned int)val);
5891 	if (ret < 0)
5892 		return ret;
5893 
5894 	*ppos += cnt;
5895 
5896 	return cnt;
5897 }
5898 
5899 static const struct file_operations tracing_saved_cmdlines_size_fops = {
5900 	.open		= tracing_open_generic,
5901 	.read		= tracing_saved_cmdlines_size_read,
5902 	.write		= tracing_saved_cmdlines_size_write,
5903 };
5904 
5905 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
5906 static union trace_eval_map_item *
5907 update_eval_map(union trace_eval_map_item *ptr)
5908 {
5909 	if (!ptr->map.eval_string) {
5910 		if (ptr->tail.next) {
5911 			ptr = ptr->tail.next;
5912 			/* Set ptr to the next real item (skip head) */
5913 			ptr++;
5914 		} else
5915 			return NULL;
5916 	}
5917 	return ptr;
5918 }
5919 
5920 static void *eval_map_next(struct seq_file *m, void *v, loff_t *pos)
5921 {
5922 	union trace_eval_map_item *ptr = v;
5923 
5924 	/*
5925 	 * Paranoid! If ptr points to end, we don't want to increment past it.
5926 	 * This really should never happen.
5927 	 */
5928 	(*pos)++;
5929 	ptr = update_eval_map(ptr);
5930 	if (WARN_ON_ONCE(!ptr))
5931 		return NULL;
5932 
5933 	ptr++;
5934 	ptr = update_eval_map(ptr);
5935 
5936 	return ptr;
5937 }
5938 
5939 static void *eval_map_start(struct seq_file *m, loff_t *pos)
5940 {
5941 	union trace_eval_map_item *v;
5942 	loff_t l = 0;
5943 
5944 	mutex_lock(&trace_eval_mutex);
5945 
5946 	v = trace_eval_maps;
5947 	if (v)
5948 		v++;
5949 
5950 	while (v && l < *pos) {
5951 		v = eval_map_next(m, v, &l);
5952 	}
5953 
5954 	return v;
5955 }
5956 
5957 static void eval_map_stop(struct seq_file *m, void *v)
5958 {
5959 	mutex_unlock(&trace_eval_mutex);
5960 }
5961 
5962 static int eval_map_show(struct seq_file *m, void *v)
5963 {
5964 	union trace_eval_map_item *ptr = v;
5965 
5966 	seq_printf(m, "%s %ld (%s)\n",
5967 		   ptr->map.eval_string, ptr->map.eval_value,
5968 		   ptr->map.system);
5969 
5970 	return 0;
5971 }
5972 
5973 static const struct seq_operations tracing_eval_map_seq_ops = {
5974 	.start		= eval_map_start,
5975 	.next		= eval_map_next,
5976 	.stop		= eval_map_stop,
5977 	.show		= eval_map_show,
5978 };
5979 
5980 static int tracing_eval_map_open(struct inode *inode, struct file *filp)
5981 {
5982 	int ret;
5983 
5984 	ret = tracing_check_open_get_tr(NULL);
5985 	if (ret)
5986 		return ret;
5987 
5988 	return seq_open(filp, &tracing_eval_map_seq_ops);
5989 }
5990 
5991 static const struct file_operations tracing_eval_map_fops = {
5992 	.open		= tracing_eval_map_open,
5993 	.read		= seq_read,
5994 	.llseek		= seq_lseek,
5995 	.release	= seq_release,
5996 };
5997 
5998 static inline union trace_eval_map_item *
5999 trace_eval_jmp_to_tail(union trace_eval_map_item *ptr)
6000 {
6001 	/* Return tail of array given the head */
6002 	return ptr + ptr->head.length + 1;
6003 }
6004 
6005 static void
6006 trace_insert_eval_map_file(struct module *mod, struct trace_eval_map **start,
6007 			   int len)
6008 {
6009 	struct trace_eval_map **stop;
6010 	struct trace_eval_map **map;
6011 	union trace_eval_map_item *map_array;
6012 	union trace_eval_map_item *ptr;
6013 
6014 	stop = start + len;
6015 
6016 	/*
6017 	 * The trace_eval_maps contains the map plus a head and tail item,
6018 	 * where the head holds the module and length of array, and the
6019 	 * tail holds a pointer to the next list.
6020 	 */
6021 	map_array = kmalloc_array(len + 2, sizeof(*map_array), GFP_KERNEL);
6022 	if (!map_array) {
6023 		pr_warn("Unable to allocate trace eval mapping\n");
6024 		return;
6025 	}
6026 
6027 	mutex_lock(&trace_eval_mutex);
6028 
6029 	if (!trace_eval_maps)
6030 		trace_eval_maps = map_array;
6031 	else {
6032 		ptr = trace_eval_maps;
6033 		for (;;) {
6034 			ptr = trace_eval_jmp_to_tail(ptr);
6035 			if (!ptr->tail.next)
6036 				break;
6037 			ptr = ptr->tail.next;
6038 
6039 		}
6040 		ptr->tail.next = map_array;
6041 	}
6042 	map_array->head.mod = mod;
6043 	map_array->head.length = len;
6044 	map_array++;
6045 
6046 	for (map = start; (unsigned long)map < (unsigned long)stop; map++) {
6047 		map_array->map = **map;
6048 		map_array++;
6049 	}
6050 	memset(map_array, 0, sizeof(*map_array));
6051 
6052 	mutex_unlock(&trace_eval_mutex);
6053 }
6054 
6055 static void trace_create_eval_file(struct dentry *d_tracer)
6056 {
6057 	trace_create_file("eval_map", TRACE_MODE_READ, d_tracer,
6058 			  NULL, &tracing_eval_map_fops);
6059 }
6060 
6061 #else /* CONFIG_TRACE_EVAL_MAP_FILE */
6062 static inline void trace_create_eval_file(struct dentry *d_tracer) { }
6063 static inline void trace_insert_eval_map_file(struct module *mod,
6064 			      struct trace_eval_map **start, int len) { }
6065 #endif /* !CONFIG_TRACE_EVAL_MAP_FILE */
6066 
6067 static void trace_insert_eval_map(struct module *mod,
6068 				  struct trace_eval_map **start, int len)
6069 {
6070 	struct trace_eval_map **map;
6071 
6072 	if (len <= 0)
6073 		return;
6074 
6075 	map = start;
6076 
6077 	trace_event_eval_update(map, len);
6078 
6079 	trace_insert_eval_map_file(mod, start, len);
6080 }
6081 
6082 static ssize_t
6083 tracing_set_trace_read(struct file *filp, char __user *ubuf,
6084 		       size_t cnt, loff_t *ppos)
6085 {
6086 	struct trace_array *tr = filp->private_data;
6087 	char buf[MAX_TRACER_SIZE+2];
6088 	int r;
6089 
6090 	mutex_lock(&trace_types_lock);
6091 	r = sprintf(buf, "%s\n", tr->current_trace->name);
6092 	mutex_unlock(&trace_types_lock);
6093 
6094 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6095 }
6096 
6097 int tracer_init(struct tracer *t, struct trace_array *tr)
6098 {
6099 	tracing_reset_online_cpus(&tr->array_buffer);
6100 	return t->init(tr);
6101 }
6102 
6103 static void set_buffer_entries(struct array_buffer *buf, unsigned long val)
6104 {
6105 	int cpu;
6106 
6107 	for_each_tracing_cpu(cpu)
6108 		per_cpu_ptr(buf->data, cpu)->entries = val;
6109 }
6110 
6111 #ifdef CONFIG_TRACER_MAX_TRACE
6112 /* resize @tr's buffer to the size of @size_tr's entries */
6113 static int resize_buffer_duplicate_size(struct array_buffer *trace_buf,
6114 					struct array_buffer *size_buf, int cpu_id)
6115 {
6116 	int cpu, ret = 0;
6117 
6118 	if (cpu_id == RING_BUFFER_ALL_CPUS) {
6119 		for_each_tracing_cpu(cpu) {
6120 			ret = ring_buffer_resize(trace_buf->buffer,
6121 				 per_cpu_ptr(size_buf->data, cpu)->entries, cpu);
6122 			if (ret < 0)
6123 				break;
6124 			per_cpu_ptr(trace_buf->data, cpu)->entries =
6125 				per_cpu_ptr(size_buf->data, cpu)->entries;
6126 		}
6127 	} else {
6128 		ret = ring_buffer_resize(trace_buf->buffer,
6129 				 per_cpu_ptr(size_buf->data, cpu_id)->entries, cpu_id);
6130 		if (ret == 0)
6131 			per_cpu_ptr(trace_buf->data, cpu_id)->entries =
6132 				per_cpu_ptr(size_buf->data, cpu_id)->entries;
6133 	}
6134 
6135 	return ret;
6136 }
6137 #endif /* CONFIG_TRACER_MAX_TRACE */
6138 
6139 static int __tracing_resize_ring_buffer(struct trace_array *tr,
6140 					unsigned long size, int cpu)
6141 {
6142 	int ret;
6143 
6144 	/*
6145 	 * If kernel or user changes the size of the ring buffer
6146 	 * we use the size that was given, and we can forget about
6147 	 * expanding it later.
6148 	 */
6149 	ring_buffer_expanded = true;
6150 
6151 	/* May be called before buffers are initialized */
6152 	if (!tr->array_buffer.buffer)
6153 		return 0;
6154 
6155 	ret = ring_buffer_resize(tr->array_buffer.buffer, size, cpu);
6156 	if (ret < 0)
6157 		return ret;
6158 
6159 #ifdef CONFIG_TRACER_MAX_TRACE
6160 	if (!(tr->flags & TRACE_ARRAY_FL_GLOBAL) ||
6161 	    !tr->current_trace->use_max_tr)
6162 		goto out;
6163 
6164 	ret = ring_buffer_resize(tr->max_buffer.buffer, size, cpu);
6165 	if (ret < 0) {
6166 		int r = resize_buffer_duplicate_size(&tr->array_buffer,
6167 						     &tr->array_buffer, cpu);
6168 		if (r < 0) {
6169 			/*
6170 			 * AARGH! We are left with different
6171 			 * size max buffer!!!!
6172 			 * The max buffer is our "snapshot" buffer.
6173 			 * When a tracer needs a snapshot (one of the
6174 			 * latency tracers), it swaps the max buffer
6175 			 * with the saved snap shot. We succeeded to
6176 			 * update the size of the main buffer, but failed to
6177 			 * update the size of the max buffer. But when we tried
6178 			 * to reset the main buffer to the original size, we
6179 			 * failed there too. This is very unlikely to
6180 			 * happen, but if it does, warn and kill all
6181 			 * tracing.
6182 			 */
6183 			WARN_ON(1);
6184 			tracing_disabled = 1;
6185 		}
6186 		return ret;
6187 	}
6188 
6189 	if (cpu == RING_BUFFER_ALL_CPUS)
6190 		set_buffer_entries(&tr->max_buffer, size);
6191 	else
6192 		per_cpu_ptr(tr->max_buffer.data, cpu)->entries = size;
6193 
6194  out:
6195 #endif /* CONFIG_TRACER_MAX_TRACE */
6196 
6197 	if (cpu == RING_BUFFER_ALL_CPUS)
6198 		set_buffer_entries(&tr->array_buffer, size);
6199 	else
6200 		per_cpu_ptr(tr->array_buffer.data, cpu)->entries = size;
6201 
6202 	return ret;
6203 }
6204 
6205 ssize_t tracing_resize_ring_buffer(struct trace_array *tr,
6206 				  unsigned long size, int cpu_id)
6207 {
6208 	int ret;
6209 
6210 	mutex_lock(&trace_types_lock);
6211 
6212 	if (cpu_id != RING_BUFFER_ALL_CPUS) {
6213 		/* make sure, this cpu is enabled in the mask */
6214 		if (!cpumask_test_cpu(cpu_id, tracing_buffer_mask)) {
6215 			ret = -EINVAL;
6216 			goto out;
6217 		}
6218 	}
6219 
6220 	ret = __tracing_resize_ring_buffer(tr, size, cpu_id);
6221 	if (ret < 0)
6222 		ret = -ENOMEM;
6223 
6224 out:
6225 	mutex_unlock(&trace_types_lock);
6226 
6227 	return ret;
6228 }
6229 
6230 
6231 /**
6232  * tracing_update_buffers - used by tracing facility to expand ring buffers
6233  *
6234  * To save on memory when the tracing is never used on a system with it
6235  * configured in. The ring buffers are set to a minimum size. But once
6236  * a user starts to use the tracing facility, then they need to grow
6237  * to their default size.
6238  *
6239  * This function is to be called when a tracer is about to be used.
6240  */
6241 int tracing_update_buffers(void)
6242 {
6243 	int ret = 0;
6244 
6245 	mutex_lock(&trace_types_lock);
6246 	if (!ring_buffer_expanded)
6247 		ret = __tracing_resize_ring_buffer(&global_trace, trace_buf_size,
6248 						RING_BUFFER_ALL_CPUS);
6249 	mutex_unlock(&trace_types_lock);
6250 
6251 	return ret;
6252 }
6253 
6254 struct trace_option_dentry;
6255 
6256 static void
6257 create_trace_option_files(struct trace_array *tr, struct tracer *tracer);
6258 
6259 /*
6260  * Used to clear out the tracer before deletion of an instance.
6261  * Must have trace_types_lock held.
6262  */
6263 static void tracing_set_nop(struct trace_array *tr)
6264 {
6265 	if (tr->current_trace == &nop_trace)
6266 		return;
6267 
6268 	tr->current_trace->enabled--;
6269 
6270 	if (tr->current_trace->reset)
6271 		tr->current_trace->reset(tr);
6272 
6273 	tr->current_trace = &nop_trace;
6274 }
6275 
6276 static void add_tracer_options(struct trace_array *tr, struct tracer *t)
6277 {
6278 	/* Only enable if the directory has been created already. */
6279 	if (!tr->dir)
6280 		return;
6281 
6282 	create_trace_option_files(tr, t);
6283 }
6284 
6285 int tracing_set_tracer(struct trace_array *tr, const char *buf)
6286 {
6287 	struct tracer *t;
6288 #ifdef CONFIG_TRACER_MAX_TRACE
6289 	bool had_max_tr;
6290 #endif
6291 	int ret = 0;
6292 
6293 	mutex_lock(&trace_types_lock);
6294 
6295 	if (!ring_buffer_expanded) {
6296 		ret = __tracing_resize_ring_buffer(tr, trace_buf_size,
6297 						RING_BUFFER_ALL_CPUS);
6298 		if (ret < 0)
6299 			goto out;
6300 		ret = 0;
6301 	}
6302 
6303 	for (t = trace_types; t; t = t->next) {
6304 		if (strcmp(t->name, buf) == 0)
6305 			break;
6306 	}
6307 	if (!t) {
6308 		ret = -EINVAL;
6309 		goto out;
6310 	}
6311 	if (t == tr->current_trace)
6312 		goto out;
6313 
6314 #ifdef CONFIG_TRACER_SNAPSHOT
6315 	if (t->use_max_tr) {
6316 		arch_spin_lock(&tr->max_lock);
6317 		if (tr->cond_snapshot)
6318 			ret = -EBUSY;
6319 		arch_spin_unlock(&tr->max_lock);
6320 		if (ret)
6321 			goto out;
6322 	}
6323 #endif
6324 	/* Some tracers won't work on kernel command line */
6325 	if (system_state < SYSTEM_RUNNING && t->noboot) {
6326 		pr_warn("Tracer '%s' is not allowed on command line, ignored\n",
6327 			t->name);
6328 		goto out;
6329 	}
6330 
6331 	/* Some tracers are only allowed for the top level buffer */
6332 	if (!trace_ok_for_array(t, tr)) {
6333 		ret = -EINVAL;
6334 		goto out;
6335 	}
6336 
6337 	/* If trace pipe files are being read, we can't change the tracer */
6338 	if (tr->trace_ref) {
6339 		ret = -EBUSY;
6340 		goto out;
6341 	}
6342 
6343 	trace_branch_disable();
6344 
6345 	tr->current_trace->enabled--;
6346 
6347 	if (tr->current_trace->reset)
6348 		tr->current_trace->reset(tr);
6349 
6350 	/* Current trace needs to be nop_trace before synchronize_rcu */
6351 	tr->current_trace = &nop_trace;
6352 
6353 #ifdef CONFIG_TRACER_MAX_TRACE
6354 	had_max_tr = tr->allocated_snapshot;
6355 
6356 	if (had_max_tr && !t->use_max_tr) {
6357 		/*
6358 		 * We need to make sure that the update_max_tr sees that
6359 		 * current_trace changed to nop_trace to keep it from
6360 		 * swapping the buffers after we resize it.
6361 		 * The update_max_tr is called from interrupts disabled
6362 		 * so a synchronized_sched() is sufficient.
6363 		 */
6364 		synchronize_rcu();
6365 		free_snapshot(tr);
6366 	}
6367 #endif
6368 
6369 #ifdef CONFIG_TRACER_MAX_TRACE
6370 	if (t->use_max_tr && !had_max_tr) {
6371 		ret = tracing_alloc_snapshot_instance(tr);
6372 		if (ret < 0)
6373 			goto out;
6374 	}
6375 #endif
6376 
6377 	if (t->init) {
6378 		ret = tracer_init(t, tr);
6379 		if (ret)
6380 			goto out;
6381 	}
6382 
6383 	tr->current_trace = t;
6384 	tr->current_trace->enabled++;
6385 	trace_branch_enable(tr);
6386  out:
6387 	mutex_unlock(&trace_types_lock);
6388 
6389 	return ret;
6390 }
6391 
6392 static ssize_t
6393 tracing_set_trace_write(struct file *filp, const char __user *ubuf,
6394 			size_t cnt, loff_t *ppos)
6395 {
6396 	struct trace_array *tr = filp->private_data;
6397 	char buf[MAX_TRACER_SIZE+1];
6398 	int i;
6399 	size_t ret;
6400 	int err;
6401 
6402 	ret = cnt;
6403 
6404 	if (cnt > MAX_TRACER_SIZE)
6405 		cnt = MAX_TRACER_SIZE;
6406 
6407 	if (copy_from_user(buf, ubuf, cnt))
6408 		return -EFAULT;
6409 
6410 	buf[cnt] = 0;
6411 
6412 	/* strip ending whitespace. */
6413 	for (i = cnt - 1; i > 0 && isspace(buf[i]); i--)
6414 		buf[i] = 0;
6415 
6416 	err = tracing_set_tracer(tr, buf);
6417 	if (err)
6418 		return err;
6419 
6420 	*ppos += ret;
6421 
6422 	return ret;
6423 }
6424 
6425 static ssize_t
6426 tracing_nsecs_read(unsigned long *ptr, char __user *ubuf,
6427 		   size_t cnt, loff_t *ppos)
6428 {
6429 	char buf[64];
6430 	int r;
6431 
6432 	r = snprintf(buf, sizeof(buf), "%ld\n",
6433 		     *ptr == (unsigned long)-1 ? -1 : nsecs_to_usecs(*ptr));
6434 	if (r > sizeof(buf))
6435 		r = sizeof(buf);
6436 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6437 }
6438 
6439 static ssize_t
6440 tracing_nsecs_write(unsigned long *ptr, const char __user *ubuf,
6441 		    size_t cnt, loff_t *ppos)
6442 {
6443 	unsigned long val;
6444 	int ret;
6445 
6446 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
6447 	if (ret)
6448 		return ret;
6449 
6450 	*ptr = val * 1000;
6451 
6452 	return cnt;
6453 }
6454 
6455 static ssize_t
6456 tracing_thresh_read(struct file *filp, char __user *ubuf,
6457 		    size_t cnt, loff_t *ppos)
6458 {
6459 	return tracing_nsecs_read(&tracing_thresh, ubuf, cnt, ppos);
6460 }
6461 
6462 static ssize_t
6463 tracing_thresh_write(struct file *filp, const char __user *ubuf,
6464 		     size_t cnt, loff_t *ppos)
6465 {
6466 	struct trace_array *tr = filp->private_data;
6467 	int ret;
6468 
6469 	mutex_lock(&trace_types_lock);
6470 	ret = tracing_nsecs_write(&tracing_thresh, ubuf, cnt, ppos);
6471 	if (ret < 0)
6472 		goto out;
6473 
6474 	if (tr->current_trace->update_thresh) {
6475 		ret = tr->current_trace->update_thresh(tr);
6476 		if (ret < 0)
6477 			goto out;
6478 	}
6479 
6480 	ret = cnt;
6481 out:
6482 	mutex_unlock(&trace_types_lock);
6483 
6484 	return ret;
6485 }
6486 
6487 #if defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)
6488 
6489 static ssize_t
6490 tracing_max_lat_read(struct file *filp, char __user *ubuf,
6491 		     size_t cnt, loff_t *ppos)
6492 {
6493 	return tracing_nsecs_read(filp->private_data, ubuf, cnt, ppos);
6494 }
6495 
6496 static ssize_t
6497 tracing_max_lat_write(struct file *filp, const char __user *ubuf,
6498 		      size_t cnt, loff_t *ppos)
6499 {
6500 	return tracing_nsecs_write(filp->private_data, ubuf, cnt, ppos);
6501 }
6502 
6503 #endif
6504 
6505 static int tracing_open_pipe(struct inode *inode, struct file *filp)
6506 {
6507 	struct trace_array *tr = inode->i_private;
6508 	struct trace_iterator *iter;
6509 	int ret;
6510 
6511 	ret = tracing_check_open_get_tr(tr);
6512 	if (ret)
6513 		return ret;
6514 
6515 	mutex_lock(&trace_types_lock);
6516 
6517 	/* create a buffer to store the information to pass to userspace */
6518 	iter = kzalloc(sizeof(*iter), GFP_KERNEL);
6519 	if (!iter) {
6520 		ret = -ENOMEM;
6521 		__trace_array_put(tr);
6522 		goto out;
6523 	}
6524 
6525 	trace_seq_init(&iter->seq);
6526 	iter->trace = tr->current_trace;
6527 
6528 	if (!alloc_cpumask_var(&iter->started, GFP_KERNEL)) {
6529 		ret = -ENOMEM;
6530 		goto fail;
6531 	}
6532 
6533 	/* trace pipe does not show start of buffer */
6534 	cpumask_setall(iter->started);
6535 
6536 	if (tr->trace_flags & TRACE_ITER_LATENCY_FMT)
6537 		iter->iter_flags |= TRACE_FILE_LAT_FMT;
6538 
6539 	/* Output in nanoseconds only if we are using a clock in nanoseconds. */
6540 	if (trace_clocks[tr->clock_id].in_ns)
6541 		iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
6542 
6543 	iter->tr = tr;
6544 	iter->array_buffer = &tr->array_buffer;
6545 	iter->cpu_file = tracing_get_cpu(inode);
6546 	mutex_init(&iter->mutex);
6547 	filp->private_data = iter;
6548 
6549 	if (iter->trace->pipe_open)
6550 		iter->trace->pipe_open(iter);
6551 
6552 	nonseekable_open(inode, filp);
6553 
6554 	tr->trace_ref++;
6555 out:
6556 	mutex_unlock(&trace_types_lock);
6557 	return ret;
6558 
6559 fail:
6560 	kfree(iter);
6561 	__trace_array_put(tr);
6562 	mutex_unlock(&trace_types_lock);
6563 	return ret;
6564 }
6565 
6566 static int tracing_release_pipe(struct inode *inode, struct file *file)
6567 {
6568 	struct trace_iterator *iter = file->private_data;
6569 	struct trace_array *tr = inode->i_private;
6570 
6571 	mutex_lock(&trace_types_lock);
6572 
6573 	tr->trace_ref--;
6574 
6575 	if (iter->trace->pipe_close)
6576 		iter->trace->pipe_close(iter);
6577 
6578 	mutex_unlock(&trace_types_lock);
6579 
6580 	free_cpumask_var(iter->started);
6581 	mutex_destroy(&iter->mutex);
6582 	kfree(iter);
6583 
6584 	trace_array_put(tr);
6585 
6586 	return 0;
6587 }
6588 
6589 static __poll_t
6590 trace_poll(struct trace_iterator *iter, struct file *filp, poll_table *poll_table)
6591 {
6592 	struct trace_array *tr = iter->tr;
6593 
6594 	/* Iterators are static, they should be filled or empty */
6595 	if (trace_buffer_iter(iter, iter->cpu_file))
6596 		return EPOLLIN | EPOLLRDNORM;
6597 
6598 	if (tr->trace_flags & TRACE_ITER_BLOCK)
6599 		/*
6600 		 * Always select as readable when in blocking mode
6601 		 */
6602 		return EPOLLIN | EPOLLRDNORM;
6603 	else
6604 		return ring_buffer_poll_wait(iter->array_buffer->buffer, iter->cpu_file,
6605 					     filp, poll_table);
6606 }
6607 
6608 static __poll_t
6609 tracing_poll_pipe(struct file *filp, poll_table *poll_table)
6610 {
6611 	struct trace_iterator *iter = filp->private_data;
6612 
6613 	return trace_poll(iter, filp, poll_table);
6614 }
6615 
6616 /* Must be called with iter->mutex held. */
6617 static int tracing_wait_pipe(struct file *filp)
6618 {
6619 	struct trace_iterator *iter = filp->private_data;
6620 	int ret;
6621 
6622 	while (trace_empty(iter)) {
6623 
6624 		if ((filp->f_flags & O_NONBLOCK)) {
6625 			return -EAGAIN;
6626 		}
6627 
6628 		/*
6629 		 * We block until we read something and tracing is disabled.
6630 		 * We still block if tracing is disabled, but we have never
6631 		 * read anything. This allows a user to cat this file, and
6632 		 * then enable tracing. But after we have read something,
6633 		 * we give an EOF when tracing is again disabled.
6634 		 *
6635 		 * iter->pos will be 0 if we haven't read anything.
6636 		 */
6637 		if (!tracer_tracing_is_on(iter->tr) && iter->pos)
6638 			break;
6639 
6640 		mutex_unlock(&iter->mutex);
6641 
6642 		ret = wait_on_pipe(iter, 0);
6643 
6644 		mutex_lock(&iter->mutex);
6645 
6646 		if (ret)
6647 			return ret;
6648 	}
6649 
6650 	return 1;
6651 }
6652 
6653 /*
6654  * Consumer reader.
6655  */
6656 static ssize_t
6657 tracing_read_pipe(struct file *filp, char __user *ubuf,
6658 		  size_t cnt, loff_t *ppos)
6659 {
6660 	struct trace_iterator *iter = filp->private_data;
6661 	ssize_t sret;
6662 
6663 	/*
6664 	 * Avoid more than one consumer on a single file descriptor
6665 	 * This is just a matter of traces coherency, the ring buffer itself
6666 	 * is protected.
6667 	 */
6668 	mutex_lock(&iter->mutex);
6669 
6670 	/* return any leftover data */
6671 	sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
6672 	if (sret != -EBUSY)
6673 		goto out;
6674 
6675 	trace_seq_init(&iter->seq);
6676 
6677 	if (iter->trace->read) {
6678 		sret = iter->trace->read(iter, filp, ubuf, cnt, ppos);
6679 		if (sret)
6680 			goto out;
6681 	}
6682 
6683 waitagain:
6684 	sret = tracing_wait_pipe(filp);
6685 	if (sret <= 0)
6686 		goto out;
6687 
6688 	/* stop when tracing is finished */
6689 	if (trace_empty(iter)) {
6690 		sret = 0;
6691 		goto out;
6692 	}
6693 
6694 	if (cnt >= PAGE_SIZE)
6695 		cnt = PAGE_SIZE - 1;
6696 
6697 	/* reset all but tr, trace, and overruns */
6698 	memset(&iter->seq, 0,
6699 	       sizeof(struct trace_iterator) -
6700 	       offsetof(struct trace_iterator, seq));
6701 	cpumask_clear(iter->started);
6702 	trace_seq_init(&iter->seq);
6703 	iter->pos = -1;
6704 
6705 	trace_event_read_lock();
6706 	trace_access_lock(iter->cpu_file);
6707 	while (trace_find_next_entry_inc(iter) != NULL) {
6708 		enum print_line_t ret;
6709 		int save_len = iter->seq.seq.len;
6710 
6711 		ret = print_trace_line(iter);
6712 		if (ret == TRACE_TYPE_PARTIAL_LINE) {
6713 			/* don't print partial lines */
6714 			iter->seq.seq.len = save_len;
6715 			break;
6716 		}
6717 		if (ret != TRACE_TYPE_NO_CONSUME)
6718 			trace_consume(iter);
6719 
6720 		if (trace_seq_used(&iter->seq) >= cnt)
6721 			break;
6722 
6723 		/*
6724 		 * Setting the full flag means we reached the trace_seq buffer
6725 		 * size and we should leave by partial output condition above.
6726 		 * One of the trace_seq_* functions is not used properly.
6727 		 */
6728 		WARN_ONCE(iter->seq.full, "full flag set for trace type %d",
6729 			  iter->ent->type);
6730 	}
6731 	trace_access_unlock(iter->cpu_file);
6732 	trace_event_read_unlock();
6733 
6734 	/* Now copy what we have to the user */
6735 	sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
6736 	if (iter->seq.seq.readpos >= trace_seq_used(&iter->seq))
6737 		trace_seq_init(&iter->seq);
6738 
6739 	/*
6740 	 * If there was nothing to send to user, in spite of consuming trace
6741 	 * entries, go back to wait for more entries.
6742 	 */
6743 	if (sret == -EBUSY)
6744 		goto waitagain;
6745 
6746 out:
6747 	mutex_unlock(&iter->mutex);
6748 
6749 	return sret;
6750 }
6751 
6752 static void tracing_spd_release_pipe(struct splice_pipe_desc *spd,
6753 				     unsigned int idx)
6754 {
6755 	__free_page(spd->pages[idx]);
6756 }
6757 
6758 static size_t
6759 tracing_fill_pipe_page(size_t rem, struct trace_iterator *iter)
6760 {
6761 	size_t count;
6762 	int save_len;
6763 	int ret;
6764 
6765 	/* Seq buffer is page-sized, exactly what we need. */
6766 	for (;;) {
6767 		save_len = iter->seq.seq.len;
6768 		ret = print_trace_line(iter);
6769 
6770 		if (trace_seq_has_overflowed(&iter->seq)) {
6771 			iter->seq.seq.len = save_len;
6772 			break;
6773 		}
6774 
6775 		/*
6776 		 * This should not be hit, because it should only
6777 		 * be set if the iter->seq overflowed. But check it
6778 		 * anyway to be safe.
6779 		 */
6780 		if (ret == TRACE_TYPE_PARTIAL_LINE) {
6781 			iter->seq.seq.len = save_len;
6782 			break;
6783 		}
6784 
6785 		count = trace_seq_used(&iter->seq) - save_len;
6786 		if (rem < count) {
6787 			rem = 0;
6788 			iter->seq.seq.len = save_len;
6789 			break;
6790 		}
6791 
6792 		if (ret != TRACE_TYPE_NO_CONSUME)
6793 			trace_consume(iter);
6794 		rem -= count;
6795 		if (!trace_find_next_entry_inc(iter))	{
6796 			rem = 0;
6797 			iter->ent = NULL;
6798 			break;
6799 		}
6800 	}
6801 
6802 	return rem;
6803 }
6804 
6805 static ssize_t tracing_splice_read_pipe(struct file *filp,
6806 					loff_t *ppos,
6807 					struct pipe_inode_info *pipe,
6808 					size_t len,
6809 					unsigned int flags)
6810 {
6811 	struct page *pages_def[PIPE_DEF_BUFFERS];
6812 	struct partial_page partial_def[PIPE_DEF_BUFFERS];
6813 	struct trace_iterator *iter = filp->private_data;
6814 	struct splice_pipe_desc spd = {
6815 		.pages		= pages_def,
6816 		.partial	= partial_def,
6817 		.nr_pages	= 0, /* This gets updated below. */
6818 		.nr_pages_max	= PIPE_DEF_BUFFERS,
6819 		.ops		= &default_pipe_buf_ops,
6820 		.spd_release	= tracing_spd_release_pipe,
6821 	};
6822 	ssize_t ret;
6823 	size_t rem;
6824 	unsigned int i;
6825 
6826 	if (splice_grow_spd(pipe, &spd))
6827 		return -ENOMEM;
6828 
6829 	mutex_lock(&iter->mutex);
6830 
6831 	if (iter->trace->splice_read) {
6832 		ret = iter->trace->splice_read(iter, filp,
6833 					       ppos, pipe, len, flags);
6834 		if (ret)
6835 			goto out_err;
6836 	}
6837 
6838 	ret = tracing_wait_pipe(filp);
6839 	if (ret <= 0)
6840 		goto out_err;
6841 
6842 	if (!iter->ent && !trace_find_next_entry_inc(iter)) {
6843 		ret = -EFAULT;
6844 		goto out_err;
6845 	}
6846 
6847 	trace_event_read_lock();
6848 	trace_access_lock(iter->cpu_file);
6849 
6850 	/* Fill as many pages as possible. */
6851 	for (i = 0, rem = len; i < spd.nr_pages_max && rem; i++) {
6852 		spd.pages[i] = alloc_page(GFP_KERNEL);
6853 		if (!spd.pages[i])
6854 			break;
6855 
6856 		rem = tracing_fill_pipe_page(rem, iter);
6857 
6858 		/* Copy the data into the page, so we can start over. */
6859 		ret = trace_seq_to_buffer(&iter->seq,
6860 					  page_address(spd.pages[i]),
6861 					  trace_seq_used(&iter->seq));
6862 		if (ret < 0) {
6863 			__free_page(spd.pages[i]);
6864 			break;
6865 		}
6866 		spd.partial[i].offset = 0;
6867 		spd.partial[i].len = trace_seq_used(&iter->seq);
6868 
6869 		trace_seq_init(&iter->seq);
6870 	}
6871 
6872 	trace_access_unlock(iter->cpu_file);
6873 	trace_event_read_unlock();
6874 	mutex_unlock(&iter->mutex);
6875 
6876 	spd.nr_pages = i;
6877 
6878 	if (i)
6879 		ret = splice_to_pipe(pipe, &spd);
6880 	else
6881 		ret = 0;
6882 out:
6883 	splice_shrink_spd(&spd);
6884 	return ret;
6885 
6886 out_err:
6887 	mutex_unlock(&iter->mutex);
6888 	goto out;
6889 }
6890 
6891 static ssize_t
6892 tracing_entries_read(struct file *filp, char __user *ubuf,
6893 		     size_t cnt, loff_t *ppos)
6894 {
6895 	struct inode *inode = file_inode(filp);
6896 	struct trace_array *tr = inode->i_private;
6897 	int cpu = tracing_get_cpu(inode);
6898 	char buf[64];
6899 	int r = 0;
6900 	ssize_t ret;
6901 
6902 	mutex_lock(&trace_types_lock);
6903 
6904 	if (cpu == RING_BUFFER_ALL_CPUS) {
6905 		int cpu, buf_size_same;
6906 		unsigned long size;
6907 
6908 		size = 0;
6909 		buf_size_same = 1;
6910 		/* check if all cpu sizes are same */
6911 		for_each_tracing_cpu(cpu) {
6912 			/* fill in the size from first enabled cpu */
6913 			if (size == 0)
6914 				size = per_cpu_ptr(tr->array_buffer.data, cpu)->entries;
6915 			if (size != per_cpu_ptr(tr->array_buffer.data, cpu)->entries) {
6916 				buf_size_same = 0;
6917 				break;
6918 			}
6919 		}
6920 
6921 		if (buf_size_same) {
6922 			if (!ring_buffer_expanded)
6923 				r = sprintf(buf, "%lu (expanded: %lu)\n",
6924 					    size >> 10,
6925 					    trace_buf_size >> 10);
6926 			else
6927 				r = sprintf(buf, "%lu\n", size >> 10);
6928 		} else
6929 			r = sprintf(buf, "X\n");
6930 	} else
6931 		r = sprintf(buf, "%lu\n", per_cpu_ptr(tr->array_buffer.data, cpu)->entries >> 10);
6932 
6933 	mutex_unlock(&trace_types_lock);
6934 
6935 	ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6936 	return ret;
6937 }
6938 
6939 static ssize_t
6940 tracing_entries_write(struct file *filp, const char __user *ubuf,
6941 		      size_t cnt, loff_t *ppos)
6942 {
6943 	struct inode *inode = file_inode(filp);
6944 	struct trace_array *tr = inode->i_private;
6945 	unsigned long val;
6946 	int ret;
6947 
6948 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
6949 	if (ret)
6950 		return ret;
6951 
6952 	/* must have at least 1 entry */
6953 	if (!val)
6954 		return -EINVAL;
6955 
6956 	/* value is in KB */
6957 	val <<= 10;
6958 	ret = tracing_resize_ring_buffer(tr, val, tracing_get_cpu(inode));
6959 	if (ret < 0)
6960 		return ret;
6961 
6962 	*ppos += cnt;
6963 
6964 	return cnt;
6965 }
6966 
6967 static ssize_t
6968 tracing_total_entries_read(struct file *filp, char __user *ubuf,
6969 				size_t cnt, loff_t *ppos)
6970 {
6971 	struct trace_array *tr = filp->private_data;
6972 	char buf[64];
6973 	int r, cpu;
6974 	unsigned long size = 0, expanded_size = 0;
6975 
6976 	mutex_lock(&trace_types_lock);
6977 	for_each_tracing_cpu(cpu) {
6978 		size += per_cpu_ptr(tr->array_buffer.data, cpu)->entries >> 10;
6979 		if (!ring_buffer_expanded)
6980 			expanded_size += trace_buf_size >> 10;
6981 	}
6982 	if (ring_buffer_expanded)
6983 		r = sprintf(buf, "%lu\n", size);
6984 	else
6985 		r = sprintf(buf, "%lu (expanded: %lu)\n", size, expanded_size);
6986 	mutex_unlock(&trace_types_lock);
6987 
6988 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6989 }
6990 
6991 static ssize_t
6992 tracing_free_buffer_write(struct file *filp, const char __user *ubuf,
6993 			  size_t cnt, loff_t *ppos)
6994 {
6995 	/*
6996 	 * There is no need to read what the user has written, this function
6997 	 * is just to make sure that there is no error when "echo" is used
6998 	 */
6999 
7000 	*ppos += cnt;
7001 
7002 	return cnt;
7003 }
7004 
7005 static int
7006 tracing_free_buffer_release(struct inode *inode, struct file *filp)
7007 {
7008 	struct trace_array *tr = inode->i_private;
7009 
7010 	/* disable tracing ? */
7011 	if (tr->trace_flags & TRACE_ITER_STOP_ON_FREE)
7012 		tracer_tracing_off(tr);
7013 	/* resize the ring buffer to 0 */
7014 	tracing_resize_ring_buffer(tr, 0, RING_BUFFER_ALL_CPUS);
7015 
7016 	trace_array_put(tr);
7017 
7018 	return 0;
7019 }
7020 
7021 static ssize_t
7022 tracing_mark_write(struct file *filp, const char __user *ubuf,
7023 					size_t cnt, loff_t *fpos)
7024 {
7025 	struct trace_array *tr = filp->private_data;
7026 	struct ring_buffer_event *event;
7027 	enum event_trigger_type tt = ETT_NONE;
7028 	struct trace_buffer *buffer;
7029 	struct print_entry *entry;
7030 	ssize_t written;
7031 	int size;
7032 	int len;
7033 
7034 /* Used in tracing_mark_raw_write() as well */
7035 #define FAULTED_STR "<faulted>"
7036 #define FAULTED_SIZE (sizeof(FAULTED_STR) - 1) /* '\0' is already accounted for */
7037 
7038 	if (tracing_disabled)
7039 		return -EINVAL;
7040 
7041 	if (!(tr->trace_flags & TRACE_ITER_MARKERS))
7042 		return -EINVAL;
7043 
7044 	if (cnt > TRACE_BUF_SIZE)
7045 		cnt = TRACE_BUF_SIZE;
7046 
7047 	BUILD_BUG_ON(TRACE_BUF_SIZE >= PAGE_SIZE);
7048 
7049 	size = sizeof(*entry) + cnt + 2; /* add '\0' and possible '\n' */
7050 
7051 	/* If less than "<faulted>", then make sure we can still add that */
7052 	if (cnt < FAULTED_SIZE)
7053 		size += FAULTED_SIZE - cnt;
7054 
7055 	buffer = tr->array_buffer.buffer;
7056 	event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
7057 					    tracing_gen_ctx());
7058 	if (unlikely(!event))
7059 		/* Ring buffer disabled, return as if not open for write */
7060 		return -EBADF;
7061 
7062 	entry = ring_buffer_event_data(event);
7063 	entry->ip = _THIS_IP_;
7064 
7065 	len = __copy_from_user_inatomic(&entry->buf, ubuf, cnt);
7066 	if (len) {
7067 		memcpy(&entry->buf, FAULTED_STR, FAULTED_SIZE);
7068 		cnt = FAULTED_SIZE;
7069 		written = -EFAULT;
7070 	} else
7071 		written = cnt;
7072 
7073 	if (tr->trace_marker_file && !list_empty(&tr->trace_marker_file->triggers)) {
7074 		/* do not add \n before testing triggers, but add \0 */
7075 		entry->buf[cnt] = '\0';
7076 		tt = event_triggers_call(tr->trace_marker_file, buffer, entry, event);
7077 	}
7078 
7079 	if (entry->buf[cnt - 1] != '\n') {
7080 		entry->buf[cnt] = '\n';
7081 		entry->buf[cnt + 1] = '\0';
7082 	} else
7083 		entry->buf[cnt] = '\0';
7084 
7085 	if (static_branch_unlikely(&trace_marker_exports_enabled))
7086 		ftrace_exports(event, TRACE_EXPORT_MARKER);
7087 	__buffer_unlock_commit(buffer, event);
7088 
7089 	if (tt)
7090 		event_triggers_post_call(tr->trace_marker_file, tt);
7091 
7092 	if (written > 0)
7093 		*fpos += written;
7094 
7095 	return written;
7096 }
7097 
7098 /* Limit it for now to 3K (including tag) */
7099 #define RAW_DATA_MAX_SIZE (1024*3)
7100 
7101 static ssize_t
7102 tracing_mark_raw_write(struct file *filp, const char __user *ubuf,
7103 					size_t cnt, loff_t *fpos)
7104 {
7105 	struct trace_array *tr = filp->private_data;
7106 	struct ring_buffer_event *event;
7107 	struct trace_buffer *buffer;
7108 	struct raw_data_entry *entry;
7109 	ssize_t written;
7110 	int size;
7111 	int len;
7112 
7113 #define FAULT_SIZE_ID (FAULTED_SIZE + sizeof(int))
7114 
7115 	if (tracing_disabled)
7116 		return -EINVAL;
7117 
7118 	if (!(tr->trace_flags & TRACE_ITER_MARKERS))
7119 		return -EINVAL;
7120 
7121 	/* The marker must at least have a tag id */
7122 	if (cnt < sizeof(unsigned int) || cnt > RAW_DATA_MAX_SIZE)
7123 		return -EINVAL;
7124 
7125 	if (cnt > TRACE_BUF_SIZE)
7126 		cnt = TRACE_BUF_SIZE;
7127 
7128 	BUILD_BUG_ON(TRACE_BUF_SIZE >= PAGE_SIZE);
7129 
7130 	size = sizeof(*entry) + cnt;
7131 	if (cnt < FAULT_SIZE_ID)
7132 		size += FAULT_SIZE_ID - cnt;
7133 
7134 	buffer = tr->array_buffer.buffer;
7135 	event = __trace_buffer_lock_reserve(buffer, TRACE_RAW_DATA, size,
7136 					    tracing_gen_ctx());
7137 	if (!event)
7138 		/* Ring buffer disabled, return as if not open for write */
7139 		return -EBADF;
7140 
7141 	entry = ring_buffer_event_data(event);
7142 
7143 	len = __copy_from_user_inatomic(&entry->id, ubuf, cnt);
7144 	if (len) {
7145 		entry->id = -1;
7146 		memcpy(&entry->buf, FAULTED_STR, FAULTED_SIZE);
7147 		written = -EFAULT;
7148 	} else
7149 		written = cnt;
7150 
7151 	__buffer_unlock_commit(buffer, event);
7152 
7153 	if (written > 0)
7154 		*fpos += written;
7155 
7156 	return written;
7157 }
7158 
7159 static int tracing_clock_show(struct seq_file *m, void *v)
7160 {
7161 	struct trace_array *tr = m->private;
7162 	int i;
7163 
7164 	for (i = 0; i < ARRAY_SIZE(trace_clocks); i++)
7165 		seq_printf(m,
7166 			"%s%s%s%s", i ? " " : "",
7167 			i == tr->clock_id ? "[" : "", trace_clocks[i].name,
7168 			i == tr->clock_id ? "]" : "");
7169 	seq_putc(m, '\n');
7170 
7171 	return 0;
7172 }
7173 
7174 int tracing_set_clock(struct trace_array *tr, const char *clockstr)
7175 {
7176 	int i;
7177 
7178 	for (i = 0; i < ARRAY_SIZE(trace_clocks); i++) {
7179 		if (strcmp(trace_clocks[i].name, clockstr) == 0)
7180 			break;
7181 	}
7182 	if (i == ARRAY_SIZE(trace_clocks))
7183 		return -EINVAL;
7184 
7185 	mutex_lock(&trace_types_lock);
7186 
7187 	tr->clock_id = i;
7188 
7189 	ring_buffer_set_clock(tr->array_buffer.buffer, trace_clocks[i].func);
7190 
7191 	/*
7192 	 * New clock may not be consistent with the previous clock.
7193 	 * Reset the buffer so that it doesn't have incomparable timestamps.
7194 	 */
7195 	tracing_reset_online_cpus(&tr->array_buffer);
7196 
7197 #ifdef CONFIG_TRACER_MAX_TRACE
7198 	if (tr->max_buffer.buffer)
7199 		ring_buffer_set_clock(tr->max_buffer.buffer, trace_clocks[i].func);
7200 	tracing_reset_online_cpus(&tr->max_buffer);
7201 #endif
7202 
7203 	mutex_unlock(&trace_types_lock);
7204 
7205 	return 0;
7206 }
7207 
7208 static ssize_t tracing_clock_write(struct file *filp, const char __user *ubuf,
7209 				   size_t cnt, loff_t *fpos)
7210 {
7211 	struct seq_file *m = filp->private_data;
7212 	struct trace_array *tr = m->private;
7213 	char buf[64];
7214 	const char *clockstr;
7215 	int ret;
7216 
7217 	if (cnt >= sizeof(buf))
7218 		return -EINVAL;
7219 
7220 	if (copy_from_user(buf, ubuf, cnt))
7221 		return -EFAULT;
7222 
7223 	buf[cnt] = 0;
7224 
7225 	clockstr = strstrip(buf);
7226 
7227 	ret = tracing_set_clock(tr, clockstr);
7228 	if (ret)
7229 		return ret;
7230 
7231 	*fpos += cnt;
7232 
7233 	return cnt;
7234 }
7235 
7236 static int tracing_clock_open(struct inode *inode, struct file *file)
7237 {
7238 	struct trace_array *tr = inode->i_private;
7239 	int ret;
7240 
7241 	ret = tracing_check_open_get_tr(tr);
7242 	if (ret)
7243 		return ret;
7244 
7245 	ret = single_open(file, tracing_clock_show, inode->i_private);
7246 	if (ret < 0)
7247 		trace_array_put(tr);
7248 
7249 	return ret;
7250 }
7251 
7252 static int tracing_time_stamp_mode_show(struct seq_file *m, void *v)
7253 {
7254 	struct trace_array *tr = m->private;
7255 
7256 	mutex_lock(&trace_types_lock);
7257 
7258 	if (ring_buffer_time_stamp_abs(tr->array_buffer.buffer))
7259 		seq_puts(m, "delta [absolute]\n");
7260 	else
7261 		seq_puts(m, "[delta] absolute\n");
7262 
7263 	mutex_unlock(&trace_types_lock);
7264 
7265 	return 0;
7266 }
7267 
7268 static int tracing_time_stamp_mode_open(struct inode *inode, struct file *file)
7269 {
7270 	struct trace_array *tr = inode->i_private;
7271 	int ret;
7272 
7273 	ret = tracing_check_open_get_tr(tr);
7274 	if (ret)
7275 		return ret;
7276 
7277 	ret = single_open(file, tracing_time_stamp_mode_show, inode->i_private);
7278 	if (ret < 0)
7279 		trace_array_put(tr);
7280 
7281 	return ret;
7282 }
7283 
7284 u64 tracing_event_time_stamp(struct trace_buffer *buffer, struct ring_buffer_event *rbe)
7285 {
7286 	if (rbe == this_cpu_read(trace_buffered_event))
7287 		return ring_buffer_time_stamp(buffer);
7288 
7289 	return ring_buffer_event_time_stamp(buffer, rbe);
7290 }
7291 
7292 /*
7293  * Set or disable using the per CPU trace_buffer_event when possible.
7294  */
7295 int tracing_set_filter_buffering(struct trace_array *tr, bool set)
7296 {
7297 	int ret = 0;
7298 
7299 	mutex_lock(&trace_types_lock);
7300 
7301 	if (set && tr->no_filter_buffering_ref++)
7302 		goto out;
7303 
7304 	if (!set) {
7305 		if (WARN_ON_ONCE(!tr->no_filter_buffering_ref)) {
7306 			ret = -EINVAL;
7307 			goto out;
7308 		}
7309 
7310 		--tr->no_filter_buffering_ref;
7311 	}
7312  out:
7313 	mutex_unlock(&trace_types_lock);
7314 
7315 	return ret;
7316 }
7317 
7318 struct ftrace_buffer_info {
7319 	struct trace_iterator	iter;
7320 	void			*spare;
7321 	unsigned int		spare_cpu;
7322 	unsigned int		read;
7323 };
7324 
7325 #ifdef CONFIG_TRACER_SNAPSHOT
7326 static int tracing_snapshot_open(struct inode *inode, struct file *file)
7327 {
7328 	struct trace_array *tr = inode->i_private;
7329 	struct trace_iterator *iter;
7330 	struct seq_file *m;
7331 	int ret;
7332 
7333 	ret = tracing_check_open_get_tr(tr);
7334 	if (ret)
7335 		return ret;
7336 
7337 	if (file->f_mode & FMODE_READ) {
7338 		iter = __tracing_open(inode, file, true);
7339 		if (IS_ERR(iter))
7340 			ret = PTR_ERR(iter);
7341 	} else {
7342 		/* Writes still need the seq_file to hold the private data */
7343 		ret = -ENOMEM;
7344 		m = kzalloc(sizeof(*m), GFP_KERNEL);
7345 		if (!m)
7346 			goto out;
7347 		iter = kzalloc(sizeof(*iter), GFP_KERNEL);
7348 		if (!iter) {
7349 			kfree(m);
7350 			goto out;
7351 		}
7352 		ret = 0;
7353 
7354 		iter->tr = tr;
7355 		iter->array_buffer = &tr->max_buffer;
7356 		iter->cpu_file = tracing_get_cpu(inode);
7357 		m->private = iter;
7358 		file->private_data = m;
7359 	}
7360 out:
7361 	if (ret < 0)
7362 		trace_array_put(tr);
7363 
7364 	return ret;
7365 }
7366 
7367 static ssize_t
7368 tracing_snapshot_write(struct file *filp, const char __user *ubuf, size_t cnt,
7369 		       loff_t *ppos)
7370 {
7371 	struct seq_file *m = filp->private_data;
7372 	struct trace_iterator *iter = m->private;
7373 	struct trace_array *tr = iter->tr;
7374 	unsigned long val;
7375 	int ret;
7376 
7377 	ret = tracing_update_buffers();
7378 	if (ret < 0)
7379 		return ret;
7380 
7381 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
7382 	if (ret)
7383 		return ret;
7384 
7385 	mutex_lock(&trace_types_lock);
7386 
7387 	if (tr->current_trace->use_max_tr) {
7388 		ret = -EBUSY;
7389 		goto out;
7390 	}
7391 
7392 	arch_spin_lock(&tr->max_lock);
7393 	if (tr->cond_snapshot)
7394 		ret = -EBUSY;
7395 	arch_spin_unlock(&tr->max_lock);
7396 	if (ret)
7397 		goto out;
7398 
7399 	switch (val) {
7400 	case 0:
7401 		if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
7402 			ret = -EINVAL;
7403 			break;
7404 		}
7405 		if (tr->allocated_snapshot)
7406 			free_snapshot(tr);
7407 		break;
7408 	case 1:
7409 /* Only allow per-cpu swap if the ring buffer supports it */
7410 #ifndef CONFIG_RING_BUFFER_ALLOW_SWAP
7411 		if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
7412 			ret = -EINVAL;
7413 			break;
7414 		}
7415 #endif
7416 		if (tr->allocated_snapshot)
7417 			ret = resize_buffer_duplicate_size(&tr->max_buffer,
7418 					&tr->array_buffer, iter->cpu_file);
7419 		else
7420 			ret = tracing_alloc_snapshot_instance(tr);
7421 		if (ret < 0)
7422 			break;
7423 		local_irq_disable();
7424 		/* Now, we're going to swap */
7425 		if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
7426 			update_max_tr(tr, current, smp_processor_id(), NULL);
7427 		else
7428 			update_max_tr_single(tr, current, iter->cpu_file);
7429 		local_irq_enable();
7430 		break;
7431 	default:
7432 		if (tr->allocated_snapshot) {
7433 			if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
7434 				tracing_reset_online_cpus(&tr->max_buffer);
7435 			else
7436 				tracing_reset_cpu(&tr->max_buffer, iter->cpu_file);
7437 		}
7438 		break;
7439 	}
7440 
7441 	if (ret >= 0) {
7442 		*ppos += cnt;
7443 		ret = cnt;
7444 	}
7445 out:
7446 	mutex_unlock(&trace_types_lock);
7447 	return ret;
7448 }
7449 
7450 static int tracing_snapshot_release(struct inode *inode, struct file *file)
7451 {
7452 	struct seq_file *m = file->private_data;
7453 	int ret;
7454 
7455 	ret = tracing_release(inode, file);
7456 
7457 	if (file->f_mode & FMODE_READ)
7458 		return ret;
7459 
7460 	/* If write only, the seq_file is just a stub */
7461 	if (m)
7462 		kfree(m->private);
7463 	kfree(m);
7464 
7465 	return 0;
7466 }
7467 
7468 static int tracing_buffers_open(struct inode *inode, struct file *filp);
7469 static ssize_t tracing_buffers_read(struct file *filp, char __user *ubuf,
7470 				    size_t count, loff_t *ppos);
7471 static int tracing_buffers_release(struct inode *inode, struct file *file);
7472 static ssize_t tracing_buffers_splice_read(struct file *file, loff_t *ppos,
7473 		   struct pipe_inode_info *pipe, size_t len, unsigned int flags);
7474 
7475 static int snapshot_raw_open(struct inode *inode, struct file *filp)
7476 {
7477 	struct ftrace_buffer_info *info;
7478 	int ret;
7479 
7480 	/* The following checks for tracefs lockdown */
7481 	ret = tracing_buffers_open(inode, filp);
7482 	if (ret < 0)
7483 		return ret;
7484 
7485 	info = filp->private_data;
7486 
7487 	if (info->iter.trace->use_max_tr) {
7488 		tracing_buffers_release(inode, filp);
7489 		return -EBUSY;
7490 	}
7491 
7492 	info->iter.snapshot = true;
7493 	info->iter.array_buffer = &info->iter.tr->max_buffer;
7494 
7495 	return ret;
7496 }
7497 
7498 #endif /* CONFIG_TRACER_SNAPSHOT */
7499 
7500 
7501 static const struct file_operations tracing_thresh_fops = {
7502 	.open		= tracing_open_generic,
7503 	.read		= tracing_thresh_read,
7504 	.write		= tracing_thresh_write,
7505 	.llseek		= generic_file_llseek,
7506 };
7507 
7508 #if defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)
7509 static const struct file_operations tracing_max_lat_fops = {
7510 	.open		= tracing_open_generic,
7511 	.read		= tracing_max_lat_read,
7512 	.write		= tracing_max_lat_write,
7513 	.llseek		= generic_file_llseek,
7514 };
7515 #endif
7516 
7517 static const struct file_operations set_tracer_fops = {
7518 	.open		= tracing_open_generic,
7519 	.read		= tracing_set_trace_read,
7520 	.write		= tracing_set_trace_write,
7521 	.llseek		= generic_file_llseek,
7522 };
7523 
7524 static const struct file_operations tracing_pipe_fops = {
7525 	.open		= tracing_open_pipe,
7526 	.poll		= tracing_poll_pipe,
7527 	.read		= tracing_read_pipe,
7528 	.splice_read	= tracing_splice_read_pipe,
7529 	.release	= tracing_release_pipe,
7530 	.llseek		= no_llseek,
7531 };
7532 
7533 static const struct file_operations tracing_entries_fops = {
7534 	.open		= tracing_open_generic_tr,
7535 	.read		= tracing_entries_read,
7536 	.write		= tracing_entries_write,
7537 	.llseek		= generic_file_llseek,
7538 	.release	= tracing_release_generic_tr,
7539 };
7540 
7541 static const struct file_operations tracing_total_entries_fops = {
7542 	.open		= tracing_open_generic_tr,
7543 	.read		= tracing_total_entries_read,
7544 	.llseek		= generic_file_llseek,
7545 	.release	= tracing_release_generic_tr,
7546 };
7547 
7548 static const struct file_operations tracing_free_buffer_fops = {
7549 	.open		= tracing_open_generic_tr,
7550 	.write		= tracing_free_buffer_write,
7551 	.release	= tracing_free_buffer_release,
7552 };
7553 
7554 static const struct file_operations tracing_mark_fops = {
7555 	.open		= tracing_open_generic_tr,
7556 	.write		= tracing_mark_write,
7557 	.llseek		= generic_file_llseek,
7558 	.release	= tracing_release_generic_tr,
7559 };
7560 
7561 static const struct file_operations tracing_mark_raw_fops = {
7562 	.open		= tracing_open_generic_tr,
7563 	.write		= tracing_mark_raw_write,
7564 	.llseek		= generic_file_llseek,
7565 	.release	= tracing_release_generic_tr,
7566 };
7567 
7568 static const struct file_operations trace_clock_fops = {
7569 	.open		= tracing_clock_open,
7570 	.read		= seq_read,
7571 	.llseek		= seq_lseek,
7572 	.release	= tracing_single_release_tr,
7573 	.write		= tracing_clock_write,
7574 };
7575 
7576 static const struct file_operations trace_time_stamp_mode_fops = {
7577 	.open		= tracing_time_stamp_mode_open,
7578 	.read		= seq_read,
7579 	.llseek		= seq_lseek,
7580 	.release	= tracing_single_release_tr,
7581 };
7582 
7583 #ifdef CONFIG_TRACER_SNAPSHOT
7584 static const struct file_operations snapshot_fops = {
7585 	.open		= tracing_snapshot_open,
7586 	.read		= seq_read,
7587 	.write		= tracing_snapshot_write,
7588 	.llseek		= tracing_lseek,
7589 	.release	= tracing_snapshot_release,
7590 };
7591 
7592 static const struct file_operations snapshot_raw_fops = {
7593 	.open		= snapshot_raw_open,
7594 	.read		= tracing_buffers_read,
7595 	.release	= tracing_buffers_release,
7596 	.splice_read	= tracing_buffers_splice_read,
7597 	.llseek		= no_llseek,
7598 };
7599 
7600 #endif /* CONFIG_TRACER_SNAPSHOT */
7601 
7602 /*
7603  * trace_min_max_write - Write a u64 value to a trace_min_max_param struct
7604  * @filp: The active open file structure
7605  * @ubuf: The userspace provided buffer to read value into
7606  * @cnt: The maximum number of bytes to read
7607  * @ppos: The current "file" position
7608  *
7609  * This function implements the write interface for a struct trace_min_max_param.
7610  * The filp->private_data must point to a trace_min_max_param structure that
7611  * defines where to write the value, the min and the max acceptable values,
7612  * and a lock to protect the write.
7613  */
7614 static ssize_t
7615 trace_min_max_write(struct file *filp, const char __user *ubuf, size_t cnt, loff_t *ppos)
7616 {
7617 	struct trace_min_max_param *param = filp->private_data;
7618 	u64 val;
7619 	int err;
7620 
7621 	if (!param)
7622 		return -EFAULT;
7623 
7624 	err = kstrtoull_from_user(ubuf, cnt, 10, &val);
7625 	if (err)
7626 		return err;
7627 
7628 	if (param->lock)
7629 		mutex_lock(param->lock);
7630 
7631 	if (param->min && val < *param->min)
7632 		err = -EINVAL;
7633 
7634 	if (param->max && val > *param->max)
7635 		err = -EINVAL;
7636 
7637 	if (!err)
7638 		*param->val = val;
7639 
7640 	if (param->lock)
7641 		mutex_unlock(param->lock);
7642 
7643 	if (err)
7644 		return err;
7645 
7646 	return cnt;
7647 }
7648 
7649 /*
7650  * trace_min_max_read - Read a u64 value from a trace_min_max_param struct
7651  * @filp: The active open file structure
7652  * @ubuf: The userspace provided buffer to read value into
7653  * @cnt: The maximum number of bytes to read
7654  * @ppos: The current "file" position
7655  *
7656  * This function implements the read interface for a struct trace_min_max_param.
7657  * The filp->private_data must point to a trace_min_max_param struct with valid
7658  * data.
7659  */
7660 static ssize_t
7661 trace_min_max_read(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos)
7662 {
7663 	struct trace_min_max_param *param = filp->private_data;
7664 	char buf[U64_STR_SIZE];
7665 	int len;
7666 	u64 val;
7667 
7668 	if (!param)
7669 		return -EFAULT;
7670 
7671 	val = *param->val;
7672 
7673 	if (cnt > sizeof(buf))
7674 		cnt = sizeof(buf);
7675 
7676 	len = snprintf(buf, sizeof(buf), "%llu\n", val);
7677 
7678 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, len);
7679 }
7680 
7681 const struct file_operations trace_min_max_fops = {
7682 	.open		= tracing_open_generic,
7683 	.read		= trace_min_max_read,
7684 	.write		= trace_min_max_write,
7685 };
7686 
7687 #define TRACING_LOG_ERRS_MAX	8
7688 #define TRACING_LOG_LOC_MAX	128
7689 
7690 #define CMD_PREFIX "  Command: "
7691 
7692 struct err_info {
7693 	const char	**errs;	/* ptr to loc-specific array of err strings */
7694 	u8		type;	/* index into errs -> specific err string */
7695 	u8		pos;	/* MAX_FILTER_STR_VAL = 256 */
7696 	u64		ts;
7697 };
7698 
7699 struct tracing_log_err {
7700 	struct list_head	list;
7701 	struct err_info		info;
7702 	char			loc[TRACING_LOG_LOC_MAX]; /* err location */
7703 	char			cmd[MAX_FILTER_STR_VAL]; /* what caused err */
7704 };
7705 
7706 static DEFINE_MUTEX(tracing_err_log_lock);
7707 
7708 static struct tracing_log_err *get_tracing_log_err(struct trace_array *tr)
7709 {
7710 	struct tracing_log_err *err;
7711 
7712 	if (tr->n_err_log_entries < TRACING_LOG_ERRS_MAX) {
7713 		err = kzalloc(sizeof(*err), GFP_KERNEL);
7714 		if (!err)
7715 			err = ERR_PTR(-ENOMEM);
7716 		tr->n_err_log_entries++;
7717 
7718 		return err;
7719 	}
7720 
7721 	err = list_first_entry(&tr->err_log, struct tracing_log_err, list);
7722 	list_del(&err->list);
7723 
7724 	return err;
7725 }
7726 
7727 /**
7728  * err_pos - find the position of a string within a command for error careting
7729  * @cmd: The tracing command that caused the error
7730  * @str: The string to position the caret at within @cmd
7731  *
7732  * Finds the position of the first occurrence of @str within @cmd.  The
7733  * return value can be passed to tracing_log_err() for caret placement
7734  * within @cmd.
7735  *
7736  * Returns the index within @cmd of the first occurrence of @str or 0
7737  * if @str was not found.
7738  */
7739 unsigned int err_pos(char *cmd, const char *str)
7740 {
7741 	char *found;
7742 
7743 	if (WARN_ON(!strlen(cmd)))
7744 		return 0;
7745 
7746 	found = strstr(cmd, str);
7747 	if (found)
7748 		return found - cmd;
7749 
7750 	return 0;
7751 }
7752 
7753 /**
7754  * tracing_log_err - write an error to the tracing error log
7755  * @tr: The associated trace array for the error (NULL for top level array)
7756  * @loc: A string describing where the error occurred
7757  * @cmd: The tracing command that caused the error
7758  * @errs: The array of loc-specific static error strings
7759  * @type: The index into errs[], which produces the specific static err string
7760  * @pos: The position the caret should be placed in the cmd
7761  *
7762  * Writes an error into tracing/error_log of the form:
7763  *
7764  * <loc>: error: <text>
7765  *   Command: <cmd>
7766  *              ^
7767  *
7768  * tracing/error_log is a small log file containing the last
7769  * TRACING_LOG_ERRS_MAX errors (8).  Memory for errors isn't allocated
7770  * unless there has been a tracing error, and the error log can be
7771  * cleared and have its memory freed by writing the empty string in
7772  * truncation mode to it i.e. echo > tracing/error_log.
7773  *
7774  * NOTE: the @errs array along with the @type param are used to
7775  * produce a static error string - this string is not copied and saved
7776  * when the error is logged - only a pointer to it is saved.  See
7777  * existing callers for examples of how static strings are typically
7778  * defined for use with tracing_log_err().
7779  */
7780 void tracing_log_err(struct trace_array *tr,
7781 		     const char *loc, const char *cmd,
7782 		     const char **errs, u8 type, u8 pos)
7783 {
7784 	struct tracing_log_err *err;
7785 
7786 	if (!tr)
7787 		tr = &global_trace;
7788 
7789 	mutex_lock(&tracing_err_log_lock);
7790 	err = get_tracing_log_err(tr);
7791 	if (PTR_ERR(err) == -ENOMEM) {
7792 		mutex_unlock(&tracing_err_log_lock);
7793 		return;
7794 	}
7795 
7796 	snprintf(err->loc, TRACING_LOG_LOC_MAX, "%s: error: ", loc);
7797 	snprintf(err->cmd, MAX_FILTER_STR_VAL,"\n" CMD_PREFIX "%s\n", cmd);
7798 
7799 	err->info.errs = errs;
7800 	err->info.type = type;
7801 	err->info.pos = pos;
7802 	err->info.ts = local_clock();
7803 
7804 	list_add_tail(&err->list, &tr->err_log);
7805 	mutex_unlock(&tracing_err_log_lock);
7806 }
7807 
7808 static void clear_tracing_err_log(struct trace_array *tr)
7809 {
7810 	struct tracing_log_err *err, *next;
7811 
7812 	mutex_lock(&tracing_err_log_lock);
7813 	list_for_each_entry_safe(err, next, &tr->err_log, list) {
7814 		list_del(&err->list);
7815 		kfree(err);
7816 	}
7817 
7818 	tr->n_err_log_entries = 0;
7819 	mutex_unlock(&tracing_err_log_lock);
7820 }
7821 
7822 static void *tracing_err_log_seq_start(struct seq_file *m, loff_t *pos)
7823 {
7824 	struct trace_array *tr = m->private;
7825 
7826 	mutex_lock(&tracing_err_log_lock);
7827 
7828 	return seq_list_start(&tr->err_log, *pos);
7829 }
7830 
7831 static void *tracing_err_log_seq_next(struct seq_file *m, void *v, loff_t *pos)
7832 {
7833 	struct trace_array *tr = m->private;
7834 
7835 	return seq_list_next(v, &tr->err_log, pos);
7836 }
7837 
7838 static void tracing_err_log_seq_stop(struct seq_file *m, void *v)
7839 {
7840 	mutex_unlock(&tracing_err_log_lock);
7841 }
7842 
7843 static void tracing_err_log_show_pos(struct seq_file *m, u8 pos)
7844 {
7845 	u8 i;
7846 
7847 	for (i = 0; i < sizeof(CMD_PREFIX) - 1; i++)
7848 		seq_putc(m, ' ');
7849 	for (i = 0; i < pos; i++)
7850 		seq_putc(m, ' ');
7851 	seq_puts(m, "^\n");
7852 }
7853 
7854 static int tracing_err_log_seq_show(struct seq_file *m, void *v)
7855 {
7856 	struct tracing_log_err *err = v;
7857 
7858 	if (err) {
7859 		const char *err_text = err->info.errs[err->info.type];
7860 		u64 sec = err->info.ts;
7861 		u32 nsec;
7862 
7863 		nsec = do_div(sec, NSEC_PER_SEC);
7864 		seq_printf(m, "[%5llu.%06u] %s%s", sec, nsec / 1000,
7865 			   err->loc, err_text);
7866 		seq_printf(m, "%s", err->cmd);
7867 		tracing_err_log_show_pos(m, err->info.pos);
7868 	}
7869 
7870 	return 0;
7871 }
7872 
7873 static const struct seq_operations tracing_err_log_seq_ops = {
7874 	.start  = tracing_err_log_seq_start,
7875 	.next   = tracing_err_log_seq_next,
7876 	.stop   = tracing_err_log_seq_stop,
7877 	.show   = tracing_err_log_seq_show
7878 };
7879 
7880 static int tracing_err_log_open(struct inode *inode, struct file *file)
7881 {
7882 	struct trace_array *tr = inode->i_private;
7883 	int ret = 0;
7884 
7885 	ret = tracing_check_open_get_tr(tr);
7886 	if (ret)
7887 		return ret;
7888 
7889 	/* If this file was opened for write, then erase contents */
7890 	if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC))
7891 		clear_tracing_err_log(tr);
7892 
7893 	if (file->f_mode & FMODE_READ) {
7894 		ret = seq_open(file, &tracing_err_log_seq_ops);
7895 		if (!ret) {
7896 			struct seq_file *m = file->private_data;
7897 			m->private = tr;
7898 		} else {
7899 			trace_array_put(tr);
7900 		}
7901 	}
7902 	return ret;
7903 }
7904 
7905 static ssize_t tracing_err_log_write(struct file *file,
7906 				     const char __user *buffer,
7907 				     size_t count, loff_t *ppos)
7908 {
7909 	return count;
7910 }
7911 
7912 static int tracing_err_log_release(struct inode *inode, struct file *file)
7913 {
7914 	struct trace_array *tr = inode->i_private;
7915 
7916 	trace_array_put(tr);
7917 
7918 	if (file->f_mode & FMODE_READ)
7919 		seq_release(inode, file);
7920 
7921 	return 0;
7922 }
7923 
7924 static const struct file_operations tracing_err_log_fops = {
7925 	.open           = tracing_err_log_open,
7926 	.write		= tracing_err_log_write,
7927 	.read           = seq_read,
7928 	.llseek         = seq_lseek,
7929 	.release        = tracing_err_log_release,
7930 };
7931 
7932 static int tracing_buffers_open(struct inode *inode, struct file *filp)
7933 {
7934 	struct trace_array *tr = inode->i_private;
7935 	struct ftrace_buffer_info *info;
7936 	int ret;
7937 
7938 	ret = tracing_check_open_get_tr(tr);
7939 	if (ret)
7940 		return ret;
7941 
7942 	info = kvzalloc(sizeof(*info), GFP_KERNEL);
7943 	if (!info) {
7944 		trace_array_put(tr);
7945 		return -ENOMEM;
7946 	}
7947 
7948 	mutex_lock(&trace_types_lock);
7949 
7950 	info->iter.tr		= tr;
7951 	info->iter.cpu_file	= tracing_get_cpu(inode);
7952 	info->iter.trace	= tr->current_trace;
7953 	info->iter.array_buffer = &tr->array_buffer;
7954 	info->spare		= NULL;
7955 	/* Force reading ring buffer for first read */
7956 	info->read		= (unsigned int)-1;
7957 
7958 	filp->private_data = info;
7959 
7960 	tr->trace_ref++;
7961 
7962 	mutex_unlock(&trace_types_lock);
7963 
7964 	ret = nonseekable_open(inode, filp);
7965 	if (ret < 0)
7966 		trace_array_put(tr);
7967 
7968 	return ret;
7969 }
7970 
7971 static __poll_t
7972 tracing_buffers_poll(struct file *filp, poll_table *poll_table)
7973 {
7974 	struct ftrace_buffer_info *info = filp->private_data;
7975 	struct trace_iterator *iter = &info->iter;
7976 
7977 	return trace_poll(iter, filp, poll_table);
7978 }
7979 
7980 static ssize_t
7981 tracing_buffers_read(struct file *filp, char __user *ubuf,
7982 		     size_t count, loff_t *ppos)
7983 {
7984 	struct ftrace_buffer_info *info = filp->private_data;
7985 	struct trace_iterator *iter = &info->iter;
7986 	ssize_t ret = 0;
7987 	ssize_t size;
7988 
7989 	if (!count)
7990 		return 0;
7991 
7992 #ifdef CONFIG_TRACER_MAX_TRACE
7993 	if (iter->snapshot && iter->tr->current_trace->use_max_tr)
7994 		return -EBUSY;
7995 #endif
7996 
7997 	if (!info->spare) {
7998 		info->spare = ring_buffer_alloc_read_page(iter->array_buffer->buffer,
7999 							  iter->cpu_file);
8000 		if (IS_ERR(info->spare)) {
8001 			ret = PTR_ERR(info->spare);
8002 			info->spare = NULL;
8003 		} else {
8004 			info->spare_cpu = iter->cpu_file;
8005 		}
8006 	}
8007 	if (!info->spare)
8008 		return ret;
8009 
8010 	/* Do we have previous read data to read? */
8011 	if (info->read < PAGE_SIZE)
8012 		goto read;
8013 
8014  again:
8015 	trace_access_lock(iter->cpu_file);
8016 	ret = ring_buffer_read_page(iter->array_buffer->buffer,
8017 				    &info->spare,
8018 				    count,
8019 				    iter->cpu_file, 0);
8020 	trace_access_unlock(iter->cpu_file);
8021 
8022 	if (ret < 0) {
8023 		if (trace_empty(iter)) {
8024 			if ((filp->f_flags & O_NONBLOCK))
8025 				return -EAGAIN;
8026 
8027 			ret = wait_on_pipe(iter, 0);
8028 			if (ret)
8029 				return ret;
8030 
8031 			goto again;
8032 		}
8033 		return 0;
8034 	}
8035 
8036 	info->read = 0;
8037  read:
8038 	size = PAGE_SIZE - info->read;
8039 	if (size > count)
8040 		size = count;
8041 
8042 	ret = copy_to_user(ubuf, info->spare + info->read, size);
8043 	if (ret == size)
8044 		return -EFAULT;
8045 
8046 	size -= ret;
8047 
8048 	*ppos += size;
8049 	info->read += size;
8050 
8051 	return size;
8052 }
8053 
8054 static int tracing_buffers_release(struct inode *inode, struct file *file)
8055 {
8056 	struct ftrace_buffer_info *info = file->private_data;
8057 	struct trace_iterator *iter = &info->iter;
8058 
8059 	mutex_lock(&trace_types_lock);
8060 
8061 	iter->tr->trace_ref--;
8062 
8063 	__trace_array_put(iter->tr);
8064 
8065 	if (info->spare)
8066 		ring_buffer_free_read_page(iter->array_buffer->buffer,
8067 					   info->spare_cpu, info->spare);
8068 	kvfree(info);
8069 
8070 	mutex_unlock(&trace_types_lock);
8071 
8072 	return 0;
8073 }
8074 
8075 struct buffer_ref {
8076 	struct trace_buffer	*buffer;
8077 	void			*page;
8078 	int			cpu;
8079 	refcount_t		refcount;
8080 };
8081 
8082 static void buffer_ref_release(struct buffer_ref *ref)
8083 {
8084 	if (!refcount_dec_and_test(&ref->refcount))
8085 		return;
8086 	ring_buffer_free_read_page(ref->buffer, ref->cpu, ref->page);
8087 	kfree(ref);
8088 }
8089 
8090 static void buffer_pipe_buf_release(struct pipe_inode_info *pipe,
8091 				    struct pipe_buffer *buf)
8092 {
8093 	struct buffer_ref *ref = (struct buffer_ref *)buf->private;
8094 
8095 	buffer_ref_release(ref);
8096 	buf->private = 0;
8097 }
8098 
8099 static bool buffer_pipe_buf_get(struct pipe_inode_info *pipe,
8100 				struct pipe_buffer *buf)
8101 {
8102 	struct buffer_ref *ref = (struct buffer_ref *)buf->private;
8103 
8104 	if (refcount_read(&ref->refcount) > INT_MAX/2)
8105 		return false;
8106 
8107 	refcount_inc(&ref->refcount);
8108 	return true;
8109 }
8110 
8111 /* Pipe buffer operations for a buffer. */
8112 static const struct pipe_buf_operations buffer_pipe_buf_ops = {
8113 	.release		= buffer_pipe_buf_release,
8114 	.get			= buffer_pipe_buf_get,
8115 };
8116 
8117 /*
8118  * Callback from splice_to_pipe(), if we need to release some pages
8119  * at the end of the spd in case we error'ed out in filling the pipe.
8120  */
8121 static void buffer_spd_release(struct splice_pipe_desc *spd, unsigned int i)
8122 {
8123 	struct buffer_ref *ref =
8124 		(struct buffer_ref *)spd->partial[i].private;
8125 
8126 	buffer_ref_release(ref);
8127 	spd->partial[i].private = 0;
8128 }
8129 
8130 static ssize_t
8131 tracing_buffers_splice_read(struct file *file, loff_t *ppos,
8132 			    struct pipe_inode_info *pipe, size_t len,
8133 			    unsigned int flags)
8134 {
8135 	struct ftrace_buffer_info *info = file->private_data;
8136 	struct trace_iterator *iter = &info->iter;
8137 	struct partial_page partial_def[PIPE_DEF_BUFFERS];
8138 	struct page *pages_def[PIPE_DEF_BUFFERS];
8139 	struct splice_pipe_desc spd = {
8140 		.pages		= pages_def,
8141 		.partial	= partial_def,
8142 		.nr_pages_max	= PIPE_DEF_BUFFERS,
8143 		.ops		= &buffer_pipe_buf_ops,
8144 		.spd_release	= buffer_spd_release,
8145 	};
8146 	struct buffer_ref *ref;
8147 	int entries, i;
8148 	ssize_t ret = 0;
8149 
8150 #ifdef CONFIG_TRACER_MAX_TRACE
8151 	if (iter->snapshot && iter->tr->current_trace->use_max_tr)
8152 		return -EBUSY;
8153 #endif
8154 
8155 	if (*ppos & (PAGE_SIZE - 1))
8156 		return -EINVAL;
8157 
8158 	if (len & (PAGE_SIZE - 1)) {
8159 		if (len < PAGE_SIZE)
8160 			return -EINVAL;
8161 		len &= PAGE_MASK;
8162 	}
8163 
8164 	if (splice_grow_spd(pipe, &spd))
8165 		return -ENOMEM;
8166 
8167  again:
8168 	trace_access_lock(iter->cpu_file);
8169 	entries = ring_buffer_entries_cpu(iter->array_buffer->buffer, iter->cpu_file);
8170 
8171 	for (i = 0; i < spd.nr_pages_max && len && entries; i++, len -= PAGE_SIZE) {
8172 		struct page *page;
8173 		int r;
8174 
8175 		ref = kzalloc(sizeof(*ref), GFP_KERNEL);
8176 		if (!ref) {
8177 			ret = -ENOMEM;
8178 			break;
8179 		}
8180 
8181 		refcount_set(&ref->refcount, 1);
8182 		ref->buffer = iter->array_buffer->buffer;
8183 		ref->page = ring_buffer_alloc_read_page(ref->buffer, iter->cpu_file);
8184 		if (IS_ERR(ref->page)) {
8185 			ret = PTR_ERR(ref->page);
8186 			ref->page = NULL;
8187 			kfree(ref);
8188 			break;
8189 		}
8190 		ref->cpu = iter->cpu_file;
8191 
8192 		r = ring_buffer_read_page(ref->buffer, &ref->page,
8193 					  len, iter->cpu_file, 1);
8194 		if (r < 0) {
8195 			ring_buffer_free_read_page(ref->buffer, ref->cpu,
8196 						   ref->page);
8197 			kfree(ref);
8198 			break;
8199 		}
8200 
8201 		page = virt_to_page(ref->page);
8202 
8203 		spd.pages[i] = page;
8204 		spd.partial[i].len = PAGE_SIZE;
8205 		spd.partial[i].offset = 0;
8206 		spd.partial[i].private = (unsigned long)ref;
8207 		spd.nr_pages++;
8208 		*ppos += PAGE_SIZE;
8209 
8210 		entries = ring_buffer_entries_cpu(iter->array_buffer->buffer, iter->cpu_file);
8211 	}
8212 
8213 	trace_access_unlock(iter->cpu_file);
8214 	spd.nr_pages = i;
8215 
8216 	/* did we read anything? */
8217 	if (!spd.nr_pages) {
8218 		if (ret)
8219 			goto out;
8220 
8221 		ret = -EAGAIN;
8222 		if ((file->f_flags & O_NONBLOCK) || (flags & SPLICE_F_NONBLOCK))
8223 			goto out;
8224 
8225 		ret = wait_on_pipe(iter, iter->tr->buffer_percent);
8226 		if (ret)
8227 			goto out;
8228 
8229 		goto again;
8230 	}
8231 
8232 	ret = splice_to_pipe(pipe, &spd);
8233 out:
8234 	splice_shrink_spd(&spd);
8235 
8236 	return ret;
8237 }
8238 
8239 static const struct file_operations tracing_buffers_fops = {
8240 	.open		= tracing_buffers_open,
8241 	.read		= tracing_buffers_read,
8242 	.poll		= tracing_buffers_poll,
8243 	.release	= tracing_buffers_release,
8244 	.splice_read	= tracing_buffers_splice_read,
8245 	.llseek		= no_llseek,
8246 };
8247 
8248 static ssize_t
8249 tracing_stats_read(struct file *filp, char __user *ubuf,
8250 		   size_t count, loff_t *ppos)
8251 {
8252 	struct inode *inode = file_inode(filp);
8253 	struct trace_array *tr = inode->i_private;
8254 	struct array_buffer *trace_buf = &tr->array_buffer;
8255 	int cpu = tracing_get_cpu(inode);
8256 	struct trace_seq *s;
8257 	unsigned long cnt;
8258 	unsigned long long t;
8259 	unsigned long usec_rem;
8260 
8261 	s = kmalloc(sizeof(*s), GFP_KERNEL);
8262 	if (!s)
8263 		return -ENOMEM;
8264 
8265 	trace_seq_init(s);
8266 
8267 	cnt = ring_buffer_entries_cpu(trace_buf->buffer, cpu);
8268 	trace_seq_printf(s, "entries: %ld\n", cnt);
8269 
8270 	cnt = ring_buffer_overrun_cpu(trace_buf->buffer, cpu);
8271 	trace_seq_printf(s, "overrun: %ld\n", cnt);
8272 
8273 	cnt = ring_buffer_commit_overrun_cpu(trace_buf->buffer, cpu);
8274 	trace_seq_printf(s, "commit overrun: %ld\n", cnt);
8275 
8276 	cnt = ring_buffer_bytes_cpu(trace_buf->buffer, cpu);
8277 	trace_seq_printf(s, "bytes: %ld\n", cnt);
8278 
8279 	if (trace_clocks[tr->clock_id].in_ns) {
8280 		/* local or global for trace_clock */
8281 		t = ns2usecs(ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
8282 		usec_rem = do_div(t, USEC_PER_SEC);
8283 		trace_seq_printf(s, "oldest event ts: %5llu.%06lu\n",
8284 								t, usec_rem);
8285 
8286 		t = ns2usecs(ring_buffer_time_stamp(trace_buf->buffer));
8287 		usec_rem = do_div(t, USEC_PER_SEC);
8288 		trace_seq_printf(s, "now ts: %5llu.%06lu\n", t, usec_rem);
8289 	} else {
8290 		/* counter or tsc mode for trace_clock */
8291 		trace_seq_printf(s, "oldest event ts: %llu\n",
8292 				ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
8293 
8294 		trace_seq_printf(s, "now ts: %llu\n",
8295 				ring_buffer_time_stamp(trace_buf->buffer));
8296 	}
8297 
8298 	cnt = ring_buffer_dropped_events_cpu(trace_buf->buffer, cpu);
8299 	trace_seq_printf(s, "dropped events: %ld\n", cnt);
8300 
8301 	cnt = ring_buffer_read_events_cpu(trace_buf->buffer, cpu);
8302 	trace_seq_printf(s, "read events: %ld\n", cnt);
8303 
8304 	count = simple_read_from_buffer(ubuf, count, ppos,
8305 					s->buffer, trace_seq_used(s));
8306 
8307 	kfree(s);
8308 
8309 	return count;
8310 }
8311 
8312 static const struct file_operations tracing_stats_fops = {
8313 	.open		= tracing_open_generic_tr,
8314 	.read		= tracing_stats_read,
8315 	.llseek		= generic_file_llseek,
8316 	.release	= tracing_release_generic_tr,
8317 };
8318 
8319 #ifdef CONFIG_DYNAMIC_FTRACE
8320 
8321 static ssize_t
8322 tracing_read_dyn_info(struct file *filp, char __user *ubuf,
8323 		  size_t cnt, loff_t *ppos)
8324 {
8325 	ssize_t ret;
8326 	char *buf;
8327 	int r;
8328 
8329 	/* 256 should be plenty to hold the amount needed */
8330 	buf = kmalloc(256, GFP_KERNEL);
8331 	if (!buf)
8332 		return -ENOMEM;
8333 
8334 	r = scnprintf(buf, 256, "%ld pages:%ld groups: %ld\n",
8335 		      ftrace_update_tot_cnt,
8336 		      ftrace_number_of_pages,
8337 		      ftrace_number_of_groups);
8338 
8339 	ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
8340 	kfree(buf);
8341 	return ret;
8342 }
8343 
8344 static const struct file_operations tracing_dyn_info_fops = {
8345 	.open		= tracing_open_generic,
8346 	.read		= tracing_read_dyn_info,
8347 	.llseek		= generic_file_llseek,
8348 };
8349 #endif /* CONFIG_DYNAMIC_FTRACE */
8350 
8351 #if defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE)
8352 static void
8353 ftrace_snapshot(unsigned long ip, unsigned long parent_ip,
8354 		struct trace_array *tr, struct ftrace_probe_ops *ops,
8355 		void *data)
8356 {
8357 	tracing_snapshot_instance(tr);
8358 }
8359 
8360 static void
8361 ftrace_count_snapshot(unsigned long ip, unsigned long parent_ip,
8362 		      struct trace_array *tr, struct ftrace_probe_ops *ops,
8363 		      void *data)
8364 {
8365 	struct ftrace_func_mapper *mapper = data;
8366 	long *count = NULL;
8367 
8368 	if (mapper)
8369 		count = (long *)ftrace_func_mapper_find_ip(mapper, ip);
8370 
8371 	if (count) {
8372 
8373 		if (*count <= 0)
8374 			return;
8375 
8376 		(*count)--;
8377 	}
8378 
8379 	tracing_snapshot_instance(tr);
8380 }
8381 
8382 static int
8383 ftrace_snapshot_print(struct seq_file *m, unsigned long ip,
8384 		      struct ftrace_probe_ops *ops, void *data)
8385 {
8386 	struct ftrace_func_mapper *mapper = data;
8387 	long *count = NULL;
8388 
8389 	seq_printf(m, "%ps:", (void *)ip);
8390 
8391 	seq_puts(m, "snapshot");
8392 
8393 	if (mapper)
8394 		count = (long *)ftrace_func_mapper_find_ip(mapper, ip);
8395 
8396 	if (count)
8397 		seq_printf(m, ":count=%ld\n", *count);
8398 	else
8399 		seq_puts(m, ":unlimited\n");
8400 
8401 	return 0;
8402 }
8403 
8404 static int
8405 ftrace_snapshot_init(struct ftrace_probe_ops *ops, struct trace_array *tr,
8406 		     unsigned long ip, void *init_data, void **data)
8407 {
8408 	struct ftrace_func_mapper *mapper = *data;
8409 
8410 	if (!mapper) {
8411 		mapper = allocate_ftrace_func_mapper();
8412 		if (!mapper)
8413 			return -ENOMEM;
8414 		*data = mapper;
8415 	}
8416 
8417 	return ftrace_func_mapper_add_ip(mapper, ip, init_data);
8418 }
8419 
8420 static void
8421 ftrace_snapshot_free(struct ftrace_probe_ops *ops, struct trace_array *tr,
8422 		     unsigned long ip, void *data)
8423 {
8424 	struct ftrace_func_mapper *mapper = data;
8425 
8426 	if (!ip) {
8427 		if (!mapper)
8428 			return;
8429 		free_ftrace_func_mapper(mapper, NULL);
8430 		return;
8431 	}
8432 
8433 	ftrace_func_mapper_remove_ip(mapper, ip);
8434 }
8435 
8436 static struct ftrace_probe_ops snapshot_probe_ops = {
8437 	.func			= ftrace_snapshot,
8438 	.print			= ftrace_snapshot_print,
8439 };
8440 
8441 static struct ftrace_probe_ops snapshot_count_probe_ops = {
8442 	.func			= ftrace_count_snapshot,
8443 	.print			= ftrace_snapshot_print,
8444 	.init			= ftrace_snapshot_init,
8445 	.free			= ftrace_snapshot_free,
8446 };
8447 
8448 static int
8449 ftrace_trace_snapshot_callback(struct trace_array *tr, struct ftrace_hash *hash,
8450 			       char *glob, char *cmd, char *param, int enable)
8451 {
8452 	struct ftrace_probe_ops *ops;
8453 	void *count = (void *)-1;
8454 	char *number;
8455 	int ret;
8456 
8457 	if (!tr)
8458 		return -ENODEV;
8459 
8460 	/* hash funcs only work with set_ftrace_filter */
8461 	if (!enable)
8462 		return -EINVAL;
8463 
8464 	ops = param ? &snapshot_count_probe_ops :  &snapshot_probe_ops;
8465 
8466 	if (glob[0] == '!')
8467 		return unregister_ftrace_function_probe_func(glob+1, tr, ops);
8468 
8469 	if (!param)
8470 		goto out_reg;
8471 
8472 	number = strsep(&param, ":");
8473 
8474 	if (!strlen(number))
8475 		goto out_reg;
8476 
8477 	/*
8478 	 * We use the callback data field (which is a pointer)
8479 	 * as our counter.
8480 	 */
8481 	ret = kstrtoul(number, 0, (unsigned long *)&count);
8482 	if (ret)
8483 		return ret;
8484 
8485  out_reg:
8486 	ret = tracing_alloc_snapshot_instance(tr);
8487 	if (ret < 0)
8488 		goto out;
8489 
8490 	ret = register_ftrace_function_probe(glob, tr, ops, count);
8491 
8492  out:
8493 	return ret < 0 ? ret : 0;
8494 }
8495 
8496 static struct ftrace_func_command ftrace_snapshot_cmd = {
8497 	.name			= "snapshot",
8498 	.func			= ftrace_trace_snapshot_callback,
8499 };
8500 
8501 static __init int register_snapshot_cmd(void)
8502 {
8503 	return register_ftrace_command(&ftrace_snapshot_cmd);
8504 }
8505 #else
8506 static inline __init int register_snapshot_cmd(void) { return 0; }
8507 #endif /* defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE) */
8508 
8509 static struct dentry *tracing_get_dentry(struct trace_array *tr)
8510 {
8511 	if (WARN_ON(!tr->dir))
8512 		return ERR_PTR(-ENODEV);
8513 
8514 	/* Top directory uses NULL as the parent */
8515 	if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
8516 		return NULL;
8517 
8518 	/* All sub buffers have a descriptor */
8519 	return tr->dir;
8520 }
8521 
8522 static struct dentry *tracing_dentry_percpu(struct trace_array *tr, int cpu)
8523 {
8524 	struct dentry *d_tracer;
8525 
8526 	if (tr->percpu_dir)
8527 		return tr->percpu_dir;
8528 
8529 	d_tracer = tracing_get_dentry(tr);
8530 	if (IS_ERR(d_tracer))
8531 		return NULL;
8532 
8533 	tr->percpu_dir = tracefs_create_dir("per_cpu", d_tracer);
8534 
8535 	MEM_FAIL(!tr->percpu_dir,
8536 		  "Could not create tracefs directory 'per_cpu/%d'\n", cpu);
8537 
8538 	return tr->percpu_dir;
8539 }
8540 
8541 static struct dentry *
8542 trace_create_cpu_file(const char *name, umode_t mode, struct dentry *parent,
8543 		      void *data, long cpu, const struct file_operations *fops)
8544 {
8545 	struct dentry *ret = trace_create_file(name, mode, parent, data, fops);
8546 
8547 	if (ret) /* See tracing_get_cpu() */
8548 		d_inode(ret)->i_cdev = (void *)(cpu + 1);
8549 	return ret;
8550 }
8551 
8552 static void
8553 tracing_init_tracefs_percpu(struct trace_array *tr, long cpu)
8554 {
8555 	struct dentry *d_percpu = tracing_dentry_percpu(tr, cpu);
8556 	struct dentry *d_cpu;
8557 	char cpu_dir[30]; /* 30 characters should be more than enough */
8558 
8559 	if (!d_percpu)
8560 		return;
8561 
8562 	snprintf(cpu_dir, 30, "cpu%ld", cpu);
8563 	d_cpu = tracefs_create_dir(cpu_dir, d_percpu);
8564 	if (!d_cpu) {
8565 		pr_warn("Could not create tracefs '%s' entry\n", cpu_dir);
8566 		return;
8567 	}
8568 
8569 	/* per cpu trace_pipe */
8570 	trace_create_cpu_file("trace_pipe", TRACE_MODE_READ, d_cpu,
8571 				tr, cpu, &tracing_pipe_fops);
8572 
8573 	/* per cpu trace */
8574 	trace_create_cpu_file("trace", TRACE_MODE_WRITE, d_cpu,
8575 				tr, cpu, &tracing_fops);
8576 
8577 	trace_create_cpu_file("trace_pipe_raw", TRACE_MODE_READ, d_cpu,
8578 				tr, cpu, &tracing_buffers_fops);
8579 
8580 	trace_create_cpu_file("stats", TRACE_MODE_READ, d_cpu,
8581 				tr, cpu, &tracing_stats_fops);
8582 
8583 	trace_create_cpu_file("buffer_size_kb", TRACE_MODE_READ, d_cpu,
8584 				tr, cpu, &tracing_entries_fops);
8585 
8586 #ifdef CONFIG_TRACER_SNAPSHOT
8587 	trace_create_cpu_file("snapshot", TRACE_MODE_WRITE, d_cpu,
8588 				tr, cpu, &snapshot_fops);
8589 
8590 	trace_create_cpu_file("snapshot_raw", TRACE_MODE_READ, d_cpu,
8591 				tr, cpu, &snapshot_raw_fops);
8592 #endif
8593 }
8594 
8595 #ifdef CONFIG_FTRACE_SELFTEST
8596 /* Let selftest have access to static functions in this file */
8597 #include "trace_selftest.c"
8598 #endif
8599 
8600 static ssize_t
8601 trace_options_read(struct file *filp, char __user *ubuf, size_t cnt,
8602 			loff_t *ppos)
8603 {
8604 	struct trace_option_dentry *topt = filp->private_data;
8605 	char *buf;
8606 
8607 	if (topt->flags->val & topt->opt->bit)
8608 		buf = "1\n";
8609 	else
8610 		buf = "0\n";
8611 
8612 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
8613 }
8614 
8615 static ssize_t
8616 trace_options_write(struct file *filp, const char __user *ubuf, size_t cnt,
8617 			 loff_t *ppos)
8618 {
8619 	struct trace_option_dentry *topt = filp->private_data;
8620 	unsigned long val;
8621 	int ret;
8622 
8623 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
8624 	if (ret)
8625 		return ret;
8626 
8627 	if (val != 0 && val != 1)
8628 		return -EINVAL;
8629 
8630 	if (!!(topt->flags->val & topt->opt->bit) != val) {
8631 		mutex_lock(&trace_types_lock);
8632 		ret = __set_tracer_option(topt->tr, topt->flags,
8633 					  topt->opt, !val);
8634 		mutex_unlock(&trace_types_lock);
8635 		if (ret)
8636 			return ret;
8637 	}
8638 
8639 	*ppos += cnt;
8640 
8641 	return cnt;
8642 }
8643 
8644 
8645 static const struct file_operations trace_options_fops = {
8646 	.open = tracing_open_generic,
8647 	.read = trace_options_read,
8648 	.write = trace_options_write,
8649 	.llseek	= generic_file_llseek,
8650 };
8651 
8652 /*
8653  * In order to pass in both the trace_array descriptor as well as the index
8654  * to the flag that the trace option file represents, the trace_array
8655  * has a character array of trace_flags_index[], which holds the index
8656  * of the bit for the flag it represents. index[0] == 0, index[1] == 1, etc.
8657  * The address of this character array is passed to the flag option file
8658  * read/write callbacks.
8659  *
8660  * In order to extract both the index and the trace_array descriptor,
8661  * get_tr_index() uses the following algorithm.
8662  *
8663  *   idx = *ptr;
8664  *
8665  * As the pointer itself contains the address of the index (remember
8666  * index[1] == 1).
8667  *
8668  * Then to get the trace_array descriptor, by subtracting that index
8669  * from the ptr, we get to the start of the index itself.
8670  *
8671  *   ptr - idx == &index[0]
8672  *
8673  * Then a simple container_of() from that pointer gets us to the
8674  * trace_array descriptor.
8675  */
8676 static void get_tr_index(void *data, struct trace_array **ptr,
8677 			 unsigned int *pindex)
8678 {
8679 	*pindex = *(unsigned char *)data;
8680 
8681 	*ptr = container_of(data - *pindex, struct trace_array,
8682 			    trace_flags_index);
8683 }
8684 
8685 static ssize_t
8686 trace_options_core_read(struct file *filp, char __user *ubuf, size_t cnt,
8687 			loff_t *ppos)
8688 {
8689 	void *tr_index = filp->private_data;
8690 	struct trace_array *tr;
8691 	unsigned int index;
8692 	char *buf;
8693 
8694 	get_tr_index(tr_index, &tr, &index);
8695 
8696 	if (tr->trace_flags & (1 << index))
8697 		buf = "1\n";
8698 	else
8699 		buf = "0\n";
8700 
8701 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
8702 }
8703 
8704 static ssize_t
8705 trace_options_core_write(struct file *filp, const char __user *ubuf, size_t cnt,
8706 			 loff_t *ppos)
8707 {
8708 	void *tr_index = filp->private_data;
8709 	struct trace_array *tr;
8710 	unsigned int index;
8711 	unsigned long val;
8712 	int ret;
8713 
8714 	get_tr_index(tr_index, &tr, &index);
8715 
8716 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
8717 	if (ret)
8718 		return ret;
8719 
8720 	if (val != 0 && val != 1)
8721 		return -EINVAL;
8722 
8723 	mutex_lock(&event_mutex);
8724 	mutex_lock(&trace_types_lock);
8725 	ret = set_tracer_flag(tr, 1 << index, val);
8726 	mutex_unlock(&trace_types_lock);
8727 	mutex_unlock(&event_mutex);
8728 
8729 	if (ret < 0)
8730 		return ret;
8731 
8732 	*ppos += cnt;
8733 
8734 	return cnt;
8735 }
8736 
8737 static const struct file_operations trace_options_core_fops = {
8738 	.open = tracing_open_generic,
8739 	.read = trace_options_core_read,
8740 	.write = trace_options_core_write,
8741 	.llseek = generic_file_llseek,
8742 };
8743 
8744 struct dentry *trace_create_file(const char *name,
8745 				 umode_t mode,
8746 				 struct dentry *parent,
8747 				 void *data,
8748 				 const struct file_operations *fops)
8749 {
8750 	struct dentry *ret;
8751 
8752 	ret = tracefs_create_file(name, mode, parent, data, fops);
8753 	if (!ret)
8754 		pr_warn("Could not create tracefs '%s' entry\n", name);
8755 
8756 	return ret;
8757 }
8758 
8759 
8760 static struct dentry *trace_options_init_dentry(struct trace_array *tr)
8761 {
8762 	struct dentry *d_tracer;
8763 
8764 	if (tr->options)
8765 		return tr->options;
8766 
8767 	d_tracer = tracing_get_dentry(tr);
8768 	if (IS_ERR(d_tracer))
8769 		return NULL;
8770 
8771 	tr->options = tracefs_create_dir("options", d_tracer);
8772 	if (!tr->options) {
8773 		pr_warn("Could not create tracefs directory 'options'\n");
8774 		return NULL;
8775 	}
8776 
8777 	return tr->options;
8778 }
8779 
8780 static void
8781 create_trace_option_file(struct trace_array *tr,
8782 			 struct trace_option_dentry *topt,
8783 			 struct tracer_flags *flags,
8784 			 struct tracer_opt *opt)
8785 {
8786 	struct dentry *t_options;
8787 
8788 	t_options = trace_options_init_dentry(tr);
8789 	if (!t_options)
8790 		return;
8791 
8792 	topt->flags = flags;
8793 	topt->opt = opt;
8794 	topt->tr = tr;
8795 
8796 	topt->entry = trace_create_file(opt->name, TRACE_MODE_WRITE,
8797 					t_options, topt, &trace_options_fops);
8798 
8799 }
8800 
8801 static void
8802 create_trace_option_files(struct trace_array *tr, struct tracer *tracer)
8803 {
8804 	struct trace_option_dentry *topts;
8805 	struct trace_options *tr_topts;
8806 	struct tracer_flags *flags;
8807 	struct tracer_opt *opts;
8808 	int cnt;
8809 	int i;
8810 
8811 	if (!tracer)
8812 		return;
8813 
8814 	flags = tracer->flags;
8815 
8816 	if (!flags || !flags->opts)
8817 		return;
8818 
8819 	/*
8820 	 * If this is an instance, only create flags for tracers
8821 	 * the instance may have.
8822 	 */
8823 	if (!trace_ok_for_array(tracer, tr))
8824 		return;
8825 
8826 	for (i = 0; i < tr->nr_topts; i++) {
8827 		/* Make sure there's no duplicate flags. */
8828 		if (WARN_ON_ONCE(tr->topts[i].tracer->flags == tracer->flags))
8829 			return;
8830 	}
8831 
8832 	opts = flags->opts;
8833 
8834 	for (cnt = 0; opts[cnt].name; cnt++)
8835 		;
8836 
8837 	topts = kcalloc(cnt + 1, sizeof(*topts), GFP_KERNEL);
8838 	if (!topts)
8839 		return;
8840 
8841 	tr_topts = krealloc(tr->topts, sizeof(*tr->topts) * (tr->nr_topts + 1),
8842 			    GFP_KERNEL);
8843 	if (!tr_topts) {
8844 		kfree(topts);
8845 		return;
8846 	}
8847 
8848 	tr->topts = tr_topts;
8849 	tr->topts[tr->nr_topts].tracer = tracer;
8850 	tr->topts[tr->nr_topts].topts = topts;
8851 	tr->nr_topts++;
8852 
8853 	for (cnt = 0; opts[cnt].name; cnt++) {
8854 		create_trace_option_file(tr, &topts[cnt], flags,
8855 					 &opts[cnt]);
8856 		MEM_FAIL(topts[cnt].entry == NULL,
8857 			  "Failed to create trace option: %s",
8858 			  opts[cnt].name);
8859 	}
8860 }
8861 
8862 static struct dentry *
8863 create_trace_option_core_file(struct trace_array *tr,
8864 			      const char *option, long index)
8865 {
8866 	struct dentry *t_options;
8867 
8868 	t_options = trace_options_init_dentry(tr);
8869 	if (!t_options)
8870 		return NULL;
8871 
8872 	return trace_create_file(option, TRACE_MODE_WRITE, t_options,
8873 				 (void *)&tr->trace_flags_index[index],
8874 				 &trace_options_core_fops);
8875 }
8876 
8877 static void create_trace_options_dir(struct trace_array *tr)
8878 {
8879 	struct dentry *t_options;
8880 	bool top_level = tr == &global_trace;
8881 	int i;
8882 
8883 	t_options = trace_options_init_dentry(tr);
8884 	if (!t_options)
8885 		return;
8886 
8887 	for (i = 0; trace_options[i]; i++) {
8888 		if (top_level ||
8889 		    !((1 << i) & TOP_LEVEL_TRACE_FLAGS))
8890 			create_trace_option_core_file(tr, trace_options[i], i);
8891 	}
8892 }
8893 
8894 static ssize_t
8895 rb_simple_read(struct file *filp, char __user *ubuf,
8896 	       size_t cnt, loff_t *ppos)
8897 {
8898 	struct trace_array *tr = filp->private_data;
8899 	char buf[64];
8900 	int r;
8901 
8902 	r = tracer_tracing_is_on(tr);
8903 	r = sprintf(buf, "%d\n", r);
8904 
8905 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
8906 }
8907 
8908 static ssize_t
8909 rb_simple_write(struct file *filp, const char __user *ubuf,
8910 		size_t cnt, loff_t *ppos)
8911 {
8912 	struct trace_array *tr = filp->private_data;
8913 	struct trace_buffer *buffer = tr->array_buffer.buffer;
8914 	unsigned long val;
8915 	int ret;
8916 
8917 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
8918 	if (ret)
8919 		return ret;
8920 
8921 	if (buffer) {
8922 		mutex_lock(&trace_types_lock);
8923 		if (!!val == tracer_tracing_is_on(tr)) {
8924 			val = 0; /* do nothing */
8925 		} else if (val) {
8926 			tracer_tracing_on(tr);
8927 			if (tr->current_trace->start)
8928 				tr->current_trace->start(tr);
8929 		} else {
8930 			tracer_tracing_off(tr);
8931 			if (tr->current_trace->stop)
8932 				tr->current_trace->stop(tr);
8933 		}
8934 		mutex_unlock(&trace_types_lock);
8935 	}
8936 
8937 	(*ppos)++;
8938 
8939 	return cnt;
8940 }
8941 
8942 static const struct file_operations rb_simple_fops = {
8943 	.open		= tracing_open_generic_tr,
8944 	.read		= rb_simple_read,
8945 	.write		= rb_simple_write,
8946 	.release	= tracing_release_generic_tr,
8947 	.llseek		= default_llseek,
8948 };
8949 
8950 static ssize_t
8951 buffer_percent_read(struct file *filp, char __user *ubuf,
8952 		    size_t cnt, loff_t *ppos)
8953 {
8954 	struct trace_array *tr = filp->private_data;
8955 	char buf[64];
8956 	int r;
8957 
8958 	r = tr->buffer_percent;
8959 	r = sprintf(buf, "%d\n", r);
8960 
8961 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
8962 }
8963 
8964 static ssize_t
8965 buffer_percent_write(struct file *filp, const char __user *ubuf,
8966 		     size_t cnt, loff_t *ppos)
8967 {
8968 	struct trace_array *tr = filp->private_data;
8969 	unsigned long val;
8970 	int ret;
8971 
8972 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
8973 	if (ret)
8974 		return ret;
8975 
8976 	if (val > 100)
8977 		return -EINVAL;
8978 
8979 	if (!val)
8980 		val = 1;
8981 
8982 	tr->buffer_percent = val;
8983 
8984 	(*ppos)++;
8985 
8986 	return cnt;
8987 }
8988 
8989 static const struct file_operations buffer_percent_fops = {
8990 	.open		= tracing_open_generic_tr,
8991 	.read		= buffer_percent_read,
8992 	.write		= buffer_percent_write,
8993 	.release	= tracing_release_generic_tr,
8994 	.llseek		= default_llseek,
8995 };
8996 
8997 static struct dentry *trace_instance_dir;
8998 
8999 static void
9000 init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer);
9001 
9002 static int
9003 allocate_trace_buffer(struct trace_array *tr, struct array_buffer *buf, int size)
9004 {
9005 	enum ring_buffer_flags rb_flags;
9006 
9007 	rb_flags = tr->trace_flags & TRACE_ITER_OVERWRITE ? RB_FL_OVERWRITE : 0;
9008 
9009 	buf->tr = tr;
9010 
9011 	buf->buffer = ring_buffer_alloc(size, rb_flags);
9012 	if (!buf->buffer)
9013 		return -ENOMEM;
9014 
9015 	buf->data = alloc_percpu(struct trace_array_cpu);
9016 	if (!buf->data) {
9017 		ring_buffer_free(buf->buffer);
9018 		buf->buffer = NULL;
9019 		return -ENOMEM;
9020 	}
9021 
9022 	/* Allocate the first page for all buffers */
9023 	set_buffer_entries(&tr->array_buffer,
9024 			   ring_buffer_size(tr->array_buffer.buffer, 0));
9025 
9026 	return 0;
9027 }
9028 
9029 static int allocate_trace_buffers(struct trace_array *tr, int size)
9030 {
9031 	int ret;
9032 
9033 	ret = allocate_trace_buffer(tr, &tr->array_buffer, size);
9034 	if (ret)
9035 		return ret;
9036 
9037 #ifdef CONFIG_TRACER_MAX_TRACE
9038 	ret = allocate_trace_buffer(tr, &tr->max_buffer,
9039 				    allocate_snapshot ? size : 1);
9040 	if (MEM_FAIL(ret, "Failed to allocate trace buffer\n")) {
9041 		ring_buffer_free(tr->array_buffer.buffer);
9042 		tr->array_buffer.buffer = NULL;
9043 		free_percpu(tr->array_buffer.data);
9044 		tr->array_buffer.data = NULL;
9045 		return -ENOMEM;
9046 	}
9047 	tr->allocated_snapshot = allocate_snapshot;
9048 
9049 	/*
9050 	 * Only the top level trace array gets its snapshot allocated
9051 	 * from the kernel command line.
9052 	 */
9053 	allocate_snapshot = false;
9054 #endif
9055 
9056 	return 0;
9057 }
9058 
9059 static void free_trace_buffer(struct array_buffer *buf)
9060 {
9061 	if (buf->buffer) {
9062 		ring_buffer_free(buf->buffer);
9063 		buf->buffer = NULL;
9064 		free_percpu(buf->data);
9065 		buf->data = NULL;
9066 	}
9067 }
9068 
9069 static void free_trace_buffers(struct trace_array *tr)
9070 {
9071 	if (!tr)
9072 		return;
9073 
9074 	free_trace_buffer(&tr->array_buffer);
9075 
9076 #ifdef CONFIG_TRACER_MAX_TRACE
9077 	free_trace_buffer(&tr->max_buffer);
9078 #endif
9079 }
9080 
9081 static void init_trace_flags_index(struct trace_array *tr)
9082 {
9083 	int i;
9084 
9085 	/* Used by the trace options files */
9086 	for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++)
9087 		tr->trace_flags_index[i] = i;
9088 }
9089 
9090 static void __update_tracer_options(struct trace_array *tr)
9091 {
9092 	struct tracer *t;
9093 
9094 	for (t = trace_types; t; t = t->next)
9095 		add_tracer_options(tr, t);
9096 }
9097 
9098 static void update_tracer_options(struct trace_array *tr)
9099 {
9100 	mutex_lock(&trace_types_lock);
9101 	__update_tracer_options(tr);
9102 	mutex_unlock(&trace_types_lock);
9103 }
9104 
9105 /* Must have trace_types_lock held */
9106 struct trace_array *trace_array_find(const char *instance)
9107 {
9108 	struct trace_array *tr, *found = NULL;
9109 
9110 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
9111 		if (tr->name && strcmp(tr->name, instance) == 0) {
9112 			found = tr;
9113 			break;
9114 		}
9115 	}
9116 
9117 	return found;
9118 }
9119 
9120 struct trace_array *trace_array_find_get(const char *instance)
9121 {
9122 	struct trace_array *tr;
9123 
9124 	mutex_lock(&trace_types_lock);
9125 	tr = trace_array_find(instance);
9126 	if (tr)
9127 		tr->ref++;
9128 	mutex_unlock(&trace_types_lock);
9129 
9130 	return tr;
9131 }
9132 
9133 static int trace_array_create_dir(struct trace_array *tr)
9134 {
9135 	int ret;
9136 
9137 	tr->dir = tracefs_create_dir(tr->name, trace_instance_dir);
9138 	if (!tr->dir)
9139 		return -EINVAL;
9140 
9141 	ret = event_trace_add_tracer(tr->dir, tr);
9142 	if (ret) {
9143 		tracefs_remove(tr->dir);
9144 		return ret;
9145 	}
9146 
9147 	init_tracer_tracefs(tr, tr->dir);
9148 	__update_tracer_options(tr);
9149 
9150 	return ret;
9151 }
9152 
9153 static struct trace_array *trace_array_create(const char *name)
9154 {
9155 	struct trace_array *tr;
9156 	int ret;
9157 
9158 	ret = -ENOMEM;
9159 	tr = kzalloc(sizeof(*tr), GFP_KERNEL);
9160 	if (!tr)
9161 		return ERR_PTR(ret);
9162 
9163 	tr->name = kstrdup(name, GFP_KERNEL);
9164 	if (!tr->name)
9165 		goto out_free_tr;
9166 
9167 	if (!alloc_cpumask_var(&tr->tracing_cpumask, GFP_KERNEL))
9168 		goto out_free_tr;
9169 
9170 	tr->trace_flags = global_trace.trace_flags & ~ZEROED_TRACE_FLAGS;
9171 
9172 	cpumask_copy(tr->tracing_cpumask, cpu_all_mask);
9173 
9174 	raw_spin_lock_init(&tr->start_lock);
9175 
9176 	tr->max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
9177 
9178 	tr->current_trace = &nop_trace;
9179 
9180 	INIT_LIST_HEAD(&tr->systems);
9181 	INIT_LIST_HEAD(&tr->events);
9182 	INIT_LIST_HEAD(&tr->hist_vars);
9183 	INIT_LIST_HEAD(&tr->err_log);
9184 
9185 	if (allocate_trace_buffers(tr, trace_buf_size) < 0)
9186 		goto out_free_tr;
9187 
9188 	if (ftrace_allocate_ftrace_ops(tr) < 0)
9189 		goto out_free_tr;
9190 
9191 	ftrace_init_trace_array(tr);
9192 
9193 	init_trace_flags_index(tr);
9194 
9195 	if (trace_instance_dir) {
9196 		ret = trace_array_create_dir(tr);
9197 		if (ret)
9198 			goto out_free_tr;
9199 	} else
9200 		__trace_early_add_events(tr);
9201 
9202 	list_add(&tr->list, &ftrace_trace_arrays);
9203 
9204 	tr->ref++;
9205 
9206 	return tr;
9207 
9208  out_free_tr:
9209 	ftrace_free_ftrace_ops(tr);
9210 	free_trace_buffers(tr);
9211 	free_cpumask_var(tr->tracing_cpumask);
9212 	kfree(tr->name);
9213 	kfree(tr);
9214 
9215 	return ERR_PTR(ret);
9216 }
9217 
9218 static int instance_mkdir(const char *name)
9219 {
9220 	struct trace_array *tr;
9221 	int ret;
9222 
9223 	mutex_lock(&event_mutex);
9224 	mutex_lock(&trace_types_lock);
9225 
9226 	ret = -EEXIST;
9227 	if (trace_array_find(name))
9228 		goto out_unlock;
9229 
9230 	tr = trace_array_create(name);
9231 
9232 	ret = PTR_ERR_OR_ZERO(tr);
9233 
9234 out_unlock:
9235 	mutex_unlock(&trace_types_lock);
9236 	mutex_unlock(&event_mutex);
9237 	return ret;
9238 }
9239 
9240 /**
9241  * trace_array_get_by_name - Create/Lookup a trace array, given its name.
9242  * @name: The name of the trace array to be looked up/created.
9243  *
9244  * Returns pointer to trace array with given name.
9245  * NULL, if it cannot be created.
9246  *
9247  * NOTE: This function increments the reference counter associated with the
9248  * trace array returned. This makes sure it cannot be freed while in use.
9249  * Use trace_array_put() once the trace array is no longer needed.
9250  * If the trace_array is to be freed, trace_array_destroy() needs to
9251  * be called after the trace_array_put(), or simply let user space delete
9252  * it from the tracefs instances directory. But until the
9253  * trace_array_put() is called, user space can not delete it.
9254  *
9255  */
9256 struct trace_array *trace_array_get_by_name(const char *name)
9257 {
9258 	struct trace_array *tr;
9259 
9260 	mutex_lock(&event_mutex);
9261 	mutex_lock(&trace_types_lock);
9262 
9263 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
9264 		if (tr->name && strcmp(tr->name, name) == 0)
9265 			goto out_unlock;
9266 	}
9267 
9268 	tr = trace_array_create(name);
9269 
9270 	if (IS_ERR(tr))
9271 		tr = NULL;
9272 out_unlock:
9273 	if (tr)
9274 		tr->ref++;
9275 
9276 	mutex_unlock(&trace_types_lock);
9277 	mutex_unlock(&event_mutex);
9278 	return tr;
9279 }
9280 EXPORT_SYMBOL_GPL(trace_array_get_by_name);
9281 
9282 static int __remove_instance(struct trace_array *tr)
9283 {
9284 	int i;
9285 
9286 	/* Reference counter for a newly created trace array = 1. */
9287 	if (tr->ref > 1 || (tr->current_trace && tr->trace_ref))
9288 		return -EBUSY;
9289 
9290 	list_del(&tr->list);
9291 
9292 	/* Disable all the flags that were enabled coming in */
9293 	for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++) {
9294 		if ((1 << i) & ZEROED_TRACE_FLAGS)
9295 			set_tracer_flag(tr, 1 << i, 0);
9296 	}
9297 
9298 	tracing_set_nop(tr);
9299 	clear_ftrace_function_probes(tr);
9300 	event_trace_del_tracer(tr);
9301 	ftrace_clear_pids(tr);
9302 	ftrace_destroy_function_files(tr);
9303 	tracefs_remove(tr->dir);
9304 	free_percpu(tr->last_func_repeats);
9305 	free_trace_buffers(tr);
9306 
9307 	for (i = 0; i < tr->nr_topts; i++) {
9308 		kfree(tr->topts[i].topts);
9309 	}
9310 	kfree(tr->topts);
9311 
9312 	free_cpumask_var(tr->tracing_cpumask);
9313 	kfree(tr->name);
9314 	kfree(tr);
9315 
9316 	return 0;
9317 }
9318 
9319 int trace_array_destroy(struct trace_array *this_tr)
9320 {
9321 	struct trace_array *tr;
9322 	int ret;
9323 
9324 	if (!this_tr)
9325 		return -EINVAL;
9326 
9327 	mutex_lock(&event_mutex);
9328 	mutex_lock(&trace_types_lock);
9329 
9330 	ret = -ENODEV;
9331 
9332 	/* Making sure trace array exists before destroying it. */
9333 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
9334 		if (tr == this_tr) {
9335 			ret = __remove_instance(tr);
9336 			break;
9337 		}
9338 	}
9339 
9340 	mutex_unlock(&trace_types_lock);
9341 	mutex_unlock(&event_mutex);
9342 
9343 	return ret;
9344 }
9345 EXPORT_SYMBOL_GPL(trace_array_destroy);
9346 
9347 static int instance_rmdir(const char *name)
9348 {
9349 	struct trace_array *tr;
9350 	int ret;
9351 
9352 	mutex_lock(&event_mutex);
9353 	mutex_lock(&trace_types_lock);
9354 
9355 	ret = -ENODEV;
9356 	tr = trace_array_find(name);
9357 	if (tr)
9358 		ret = __remove_instance(tr);
9359 
9360 	mutex_unlock(&trace_types_lock);
9361 	mutex_unlock(&event_mutex);
9362 
9363 	return ret;
9364 }
9365 
9366 static __init void create_trace_instances(struct dentry *d_tracer)
9367 {
9368 	struct trace_array *tr;
9369 
9370 	trace_instance_dir = tracefs_create_instance_dir("instances", d_tracer,
9371 							 instance_mkdir,
9372 							 instance_rmdir);
9373 	if (MEM_FAIL(!trace_instance_dir, "Failed to create instances directory\n"))
9374 		return;
9375 
9376 	mutex_lock(&event_mutex);
9377 	mutex_lock(&trace_types_lock);
9378 
9379 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
9380 		if (!tr->name)
9381 			continue;
9382 		if (MEM_FAIL(trace_array_create_dir(tr) < 0,
9383 			     "Failed to create instance directory\n"))
9384 			break;
9385 	}
9386 
9387 	mutex_unlock(&trace_types_lock);
9388 	mutex_unlock(&event_mutex);
9389 }
9390 
9391 static void
9392 init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer)
9393 {
9394 	struct trace_event_file *file;
9395 	int cpu;
9396 
9397 	trace_create_file("available_tracers", TRACE_MODE_READ, d_tracer,
9398 			tr, &show_traces_fops);
9399 
9400 	trace_create_file("current_tracer", TRACE_MODE_WRITE, d_tracer,
9401 			tr, &set_tracer_fops);
9402 
9403 	trace_create_file("tracing_cpumask", TRACE_MODE_WRITE, d_tracer,
9404 			  tr, &tracing_cpumask_fops);
9405 
9406 	trace_create_file("trace_options", TRACE_MODE_WRITE, d_tracer,
9407 			  tr, &tracing_iter_fops);
9408 
9409 	trace_create_file("trace", TRACE_MODE_WRITE, d_tracer,
9410 			  tr, &tracing_fops);
9411 
9412 	trace_create_file("trace_pipe", TRACE_MODE_READ, d_tracer,
9413 			  tr, &tracing_pipe_fops);
9414 
9415 	trace_create_file("buffer_size_kb", TRACE_MODE_WRITE, d_tracer,
9416 			  tr, &tracing_entries_fops);
9417 
9418 	trace_create_file("buffer_total_size_kb", TRACE_MODE_READ, d_tracer,
9419 			  tr, &tracing_total_entries_fops);
9420 
9421 	trace_create_file("free_buffer", 0200, d_tracer,
9422 			  tr, &tracing_free_buffer_fops);
9423 
9424 	trace_create_file("trace_marker", 0220, d_tracer,
9425 			  tr, &tracing_mark_fops);
9426 
9427 	file = __find_event_file(tr, "ftrace", "print");
9428 	if (file && file->dir)
9429 		trace_create_file("trigger", TRACE_MODE_WRITE, file->dir,
9430 				  file, &event_trigger_fops);
9431 	tr->trace_marker_file = file;
9432 
9433 	trace_create_file("trace_marker_raw", 0220, d_tracer,
9434 			  tr, &tracing_mark_raw_fops);
9435 
9436 	trace_create_file("trace_clock", TRACE_MODE_WRITE, d_tracer, tr,
9437 			  &trace_clock_fops);
9438 
9439 	trace_create_file("tracing_on", TRACE_MODE_WRITE, d_tracer,
9440 			  tr, &rb_simple_fops);
9441 
9442 	trace_create_file("timestamp_mode", TRACE_MODE_READ, d_tracer, tr,
9443 			  &trace_time_stamp_mode_fops);
9444 
9445 	tr->buffer_percent = 50;
9446 
9447 	trace_create_file("buffer_percent", TRACE_MODE_READ, d_tracer,
9448 			tr, &buffer_percent_fops);
9449 
9450 	create_trace_options_dir(tr);
9451 
9452 	trace_create_maxlat_file(tr, d_tracer);
9453 
9454 	if (ftrace_create_function_files(tr, d_tracer))
9455 		MEM_FAIL(1, "Could not allocate function filter files");
9456 
9457 #ifdef CONFIG_TRACER_SNAPSHOT
9458 	trace_create_file("snapshot", TRACE_MODE_WRITE, d_tracer,
9459 			  tr, &snapshot_fops);
9460 #endif
9461 
9462 	trace_create_file("error_log", TRACE_MODE_WRITE, d_tracer,
9463 			  tr, &tracing_err_log_fops);
9464 
9465 	for_each_tracing_cpu(cpu)
9466 		tracing_init_tracefs_percpu(tr, cpu);
9467 
9468 	ftrace_init_tracefs(tr, d_tracer);
9469 }
9470 
9471 static struct vfsmount *trace_automount(struct dentry *mntpt, void *ingore)
9472 {
9473 	struct vfsmount *mnt;
9474 	struct file_system_type *type;
9475 
9476 	/*
9477 	 * To maintain backward compatibility for tools that mount
9478 	 * debugfs to get to the tracing facility, tracefs is automatically
9479 	 * mounted to the debugfs/tracing directory.
9480 	 */
9481 	type = get_fs_type("tracefs");
9482 	if (!type)
9483 		return NULL;
9484 	mnt = vfs_submount(mntpt, type, "tracefs", NULL);
9485 	put_filesystem(type);
9486 	if (IS_ERR(mnt))
9487 		return NULL;
9488 	mntget(mnt);
9489 
9490 	return mnt;
9491 }
9492 
9493 /**
9494  * tracing_init_dentry - initialize top level trace array
9495  *
9496  * This is called when creating files or directories in the tracing
9497  * directory. It is called via fs_initcall() by any of the boot up code
9498  * and expects to return the dentry of the top level tracing directory.
9499  */
9500 int tracing_init_dentry(void)
9501 {
9502 	struct trace_array *tr = &global_trace;
9503 
9504 	if (security_locked_down(LOCKDOWN_TRACEFS)) {
9505 		pr_warn("Tracing disabled due to lockdown\n");
9506 		return -EPERM;
9507 	}
9508 
9509 	/* The top level trace array uses  NULL as parent */
9510 	if (tr->dir)
9511 		return 0;
9512 
9513 	if (WARN_ON(!tracefs_initialized()))
9514 		return -ENODEV;
9515 
9516 	/*
9517 	 * As there may still be users that expect the tracing
9518 	 * files to exist in debugfs/tracing, we must automount
9519 	 * the tracefs file system there, so older tools still
9520 	 * work with the newer kernel.
9521 	 */
9522 	tr->dir = debugfs_create_automount("tracing", NULL,
9523 					   trace_automount, NULL);
9524 
9525 	return 0;
9526 }
9527 
9528 extern struct trace_eval_map *__start_ftrace_eval_maps[];
9529 extern struct trace_eval_map *__stop_ftrace_eval_maps[];
9530 
9531 static struct workqueue_struct *eval_map_wq __initdata;
9532 static struct work_struct eval_map_work __initdata;
9533 
9534 static void __init eval_map_work_func(struct work_struct *work)
9535 {
9536 	int len;
9537 
9538 	len = __stop_ftrace_eval_maps - __start_ftrace_eval_maps;
9539 	trace_insert_eval_map(NULL, __start_ftrace_eval_maps, len);
9540 }
9541 
9542 static int __init trace_eval_init(void)
9543 {
9544 	INIT_WORK(&eval_map_work, eval_map_work_func);
9545 
9546 	eval_map_wq = alloc_workqueue("eval_map_wq", WQ_UNBOUND, 0);
9547 	if (!eval_map_wq) {
9548 		pr_err("Unable to allocate eval_map_wq\n");
9549 		/* Do work here */
9550 		eval_map_work_func(&eval_map_work);
9551 		return -ENOMEM;
9552 	}
9553 
9554 	queue_work(eval_map_wq, &eval_map_work);
9555 	return 0;
9556 }
9557 
9558 static int __init trace_eval_sync(void)
9559 {
9560 	/* Make sure the eval map updates are finished */
9561 	if (eval_map_wq)
9562 		destroy_workqueue(eval_map_wq);
9563 	return 0;
9564 }
9565 
9566 late_initcall_sync(trace_eval_sync);
9567 
9568 
9569 #ifdef CONFIG_MODULES
9570 static void trace_module_add_evals(struct module *mod)
9571 {
9572 	if (!mod->num_trace_evals)
9573 		return;
9574 
9575 	/*
9576 	 * Modules with bad taint do not have events created, do
9577 	 * not bother with enums either.
9578 	 */
9579 	if (trace_module_has_bad_taint(mod))
9580 		return;
9581 
9582 	trace_insert_eval_map(mod, mod->trace_evals, mod->num_trace_evals);
9583 }
9584 
9585 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
9586 static void trace_module_remove_evals(struct module *mod)
9587 {
9588 	union trace_eval_map_item *map;
9589 	union trace_eval_map_item **last = &trace_eval_maps;
9590 
9591 	if (!mod->num_trace_evals)
9592 		return;
9593 
9594 	mutex_lock(&trace_eval_mutex);
9595 
9596 	map = trace_eval_maps;
9597 
9598 	while (map) {
9599 		if (map->head.mod == mod)
9600 			break;
9601 		map = trace_eval_jmp_to_tail(map);
9602 		last = &map->tail.next;
9603 		map = map->tail.next;
9604 	}
9605 	if (!map)
9606 		goto out;
9607 
9608 	*last = trace_eval_jmp_to_tail(map)->tail.next;
9609 	kfree(map);
9610  out:
9611 	mutex_unlock(&trace_eval_mutex);
9612 }
9613 #else
9614 static inline void trace_module_remove_evals(struct module *mod) { }
9615 #endif /* CONFIG_TRACE_EVAL_MAP_FILE */
9616 
9617 static int trace_module_notify(struct notifier_block *self,
9618 			       unsigned long val, void *data)
9619 {
9620 	struct module *mod = data;
9621 
9622 	switch (val) {
9623 	case MODULE_STATE_COMING:
9624 		trace_module_add_evals(mod);
9625 		break;
9626 	case MODULE_STATE_GOING:
9627 		trace_module_remove_evals(mod);
9628 		break;
9629 	}
9630 
9631 	return NOTIFY_OK;
9632 }
9633 
9634 static struct notifier_block trace_module_nb = {
9635 	.notifier_call = trace_module_notify,
9636 	.priority = 0,
9637 };
9638 #endif /* CONFIG_MODULES */
9639 
9640 static __init int tracer_init_tracefs(void)
9641 {
9642 	int ret;
9643 
9644 	trace_access_lock_init();
9645 
9646 	ret = tracing_init_dentry();
9647 	if (ret)
9648 		return 0;
9649 
9650 	event_trace_init();
9651 
9652 	init_tracer_tracefs(&global_trace, NULL);
9653 	ftrace_init_tracefs_toplevel(&global_trace, NULL);
9654 
9655 	trace_create_file("tracing_thresh", TRACE_MODE_WRITE, NULL,
9656 			&global_trace, &tracing_thresh_fops);
9657 
9658 	trace_create_file("README", TRACE_MODE_READ, NULL,
9659 			NULL, &tracing_readme_fops);
9660 
9661 	trace_create_file("saved_cmdlines", TRACE_MODE_READ, NULL,
9662 			NULL, &tracing_saved_cmdlines_fops);
9663 
9664 	trace_create_file("saved_cmdlines_size", TRACE_MODE_WRITE, NULL,
9665 			  NULL, &tracing_saved_cmdlines_size_fops);
9666 
9667 	trace_create_file("saved_tgids", TRACE_MODE_READ, NULL,
9668 			NULL, &tracing_saved_tgids_fops);
9669 
9670 	trace_eval_init();
9671 
9672 	trace_create_eval_file(NULL);
9673 
9674 #ifdef CONFIG_MODULES
9675 	register_module_notifier(&trace_module_nb);
9676 #endif
9677 
9678 #ifdef CONFIG_DYNAMIC_FTRACE
9679 	trace_create_file("dyn_ftrace_total_info", TRACE_MODE_READ, NULL,
9680 			NULL, &tracing_dyn_info_fops);
9681 #endif
9682 
9683 	create_trace_instances(NULL);
9684 
9685 	update_tracer_options(&global_trace);
9686 
9687 	return 0;
9688 }
9689 
9690 fs_initcall(tracer_init_tracefs);
9691 
9692 static int trace_panic_handler(struct notifier_block *this,
9693 			       unsigned long event, void *unused)
9694 {
9695 	if (ftrace_dump_on_oops)
9696 		ftrace_dump(ftrace_dump_on_oops);
9697 	return NOTIFY_OK;
9698 }
9699 
9700 static struct notifier_block trace_panic_notifier = {
9701 	.notifier_call  = trace_panic_handler,
9702 	.next           = NULL,
9703 	.priority       = 150   /* priority: INT_MAX >= x >= 0 */
9704 };
9705 
9706 static int trace_die_handler(struct notifier_block *self,
9707 			     unsigned long val,
9708 			     void *data)
9709 {
9710 	switch (val) {
9711 	case DIE_OOPS:
9712 		if (ftrace_dump_on_oops)
9713 			ftrace_dump(ftrace_dump_on_oops);
9714 		break;
9715 	default:
9716 		break;
9717 	}
9718 	return NOTIFY_OK;
9719 }
9720 
9721 static struct notifier_block trace_die_notifier = {
9722 	.notifier_call = trace_die_handler,
9723 	.priority = 200
9724 };
9725 
9726 /*
9727  * printk is set to max of 1024, we really don't need it that big.
9728  * Nothing should be printing 1000 characters anyway.
9729  */
9730 #define TRACE_MAX_PRINT		1000
9731 
9732 /*
9733  * Define here KERN_TRACE so that we have one place to modify
9734  * it if we decide to change what log level the ftrace dump
9735  * should be at.
9736  */
9737 #define KERN_TRACE		KERN_EMERG
9738 
9739 void
9740 trace_printk_seq(struct trace_seq *s)
9741 {
9742 	/* Probably should print a warning here. */
9743 	if (s->seq.len >= TRACE_MAX_PRINT)
9744 		s->seq.len = TRACE_MAX_PRINT;
9745 
9746 	/*
9747 	 * More paranoid code. Although the buffer size is set to
9748 	 * PAGE_SIZE, and TRACE_MAX_PRINT is 1000, this is just
9749 	 * an extra layer of protection.
9750 	 */
9751 	if (WARN_ON_ONCE(s->seq.len >= s->seq.size))
9752 		s->seq.len = s->seq.size - 1;
9753 
9754 	/* should be zero ended, but we are paranoid. */
9755 	s->buffer[s->seq.len] = 0;
9756 
9757 	printk(KERN_TRACE "%s", s->buffer);
9758 
9759 	trace_seq_init(s);
9760 }
9761 
9762 void trace_init_global_iter(struct trace_iterator *iter)
9763 {
9764 	iter->tr = &global_trace;
9765 	iter->trace = iter->tr->current_trace;
9766 	iter->cpu_file = RING_BUFFER_ALL_CPUS;
9767 	iter->array_buffer = &global_trace.array_buffer;
9768 
9769 	if (iter->trace && iter->trace->open)
9770 		iter->trace->open(iter);
9771 
9772 	/* Annotate start of buffers if we had overruns */
9773 	if (ring_buffer_overruns(iter->array_buffer->buffer))
9774 		iter->iter_flags |= TRACE_FILE_ANNOTATE;
9775 
9776 	/* Output in nanoseconds only if we are using a clock in nanoseconds. */
9777 	if (trace_clocks[iter->tr->clock_id].in_ns)
9778 		iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
9779 }
9780 
9781 void ftrace_dump(enum ftrace_dump_mode oops_dump_mode)
9782 {
9783 	/* use static because iter can be a bit big for the stack */
9784 	static struct trace_iterator iter;
9785 	static atomic_t dump_running;
9786 	struct trace_array *tr = &global_trace;
9787 	unsigned int old_userobj;
9788 	unsigned long flags;
9789 	int cnt = 0, cpu;
9790 
9791 	/* Only allow one dump user at a time. */
9792 	if (atomic_inc_return(&dump_running) != 1) {
9793 		atomic_dec(&dump_running);
9794 		return;
9795 	}
9796 
9797 	/*
9798 	 * Always turn off tracing when we dump.
9799 	 * We don't need to show trace output of what happens
9800 	 * between multiple crashes.
9801 	 *
9802 	 * If the user does a sysrq-z, then they can re-enable
9803 	 * tracing with echo 1 > tracing_on.
9804 	 */
9805 	tracing_off();
9806 
9807 	local_irq_save(flags);
9808 
9809 	/* Simulate the iterator */
9810 	trace_init_global_iter(&iter);
9811 	/* Can not use kmalloc for iter.temp and iter.fmt */
9812 	iter.temp = static_temp_buf;
9813 	iter.temp_size = STATIC_TEMP_BUF_SIZE;
9814 	iter.fmt = static_fmt_buf;
9815 	iter.fmt_size = STATIC_FMT_BUF_SIZE;
9816 
9817 	for_each_tracing_cpu(cpu) {
9818 		atomic_inc(&per_cpu_ptr(iter.array_buffer->data, cpu)->disabled);
9819 	}
9820 
9821 	old_userobj = tr->trace_flags & TRACE_ITER_SYM_USEROBJ;
9822 
9823 	/* don't look at user memory in panic mode */
9824 	tr->trace_flags &= ~TRACE_ITER_SYM_USEROBJ;
9825 
9826 	switch (oops_dump_mode) {
9827 	case DUMP_ALL:
9828 		iter.cpu_file = RING_BUFFER_ALL_CPUS;
9829 		break;
9830 	case DUMP_ORIG:
9831 		iter.cpu_file = raw_smp_processor_id();
9832 		break;
9833 	case DUMP_NONE:
9834 		goto out_enable;
9835 	default:
9836 		printk(KERN_TRACE "Bad dumping mode, switching to all CPUs dump\n");
9837 		iter.cpu_file = RING_BUFFER_ALL_CPUS;
9838 	}
9839 
9840 	printk(KERN_TRACE "Dumping ftrace buffer:\n");
9841 
9842 	/* Did function tracer already get disabled? */
9843 	if (ftrace_is_dead()) {
9844 		printk("# WARNING: FUNCTION TRACING IS CORRUPTED\n");
9845 		printk("#          MAY BE MISSING FUNCTION EVENTS\n");
9846 	}
9847 
9848 	/*
9849 	 * We need to stop all tracing on all CPUS to read
9850 	 * the next buffer. This is a bit expensive, but is
9851 	 * not done often. We fill all what we can read,
9852 	 * and then release the locks again.
9853 	 */
9854 
9855 	while (!trace_empty(&iter)) {
9856 
9857 		if (!cnt)
9858 			printk(KERN_TRACE "---------------------------------\n");
9859 
9860 		cnt++;
9861 
9862 		trace_iterator_reset(&iter);
9863 		iter.iter_flags |= TRACE_FILE_LAT_FMT;
9864 
9865 		if (trace_find_next_entry_inc(&iter) != NULL) {
9866 			int ret;
9867 
9868 			ret = print_trace_line(&iter);
9869 			if (ret != TRACE_TYPE_NO_CONSUME)
9870 				trace_consume(&iter);
9871 		}
9872 		touch_nmi_watchdog();
9873 
9874 		trace_printk_seq(&iter.seq);
9875 	}
9876 
9877 	if (!cnt)
9878 		printk(KERN_TRACE "   (ftrace buffer empty)\n");
9879 	else
9880 		printk(KERN_TRACE "---------------------------------\n");
9881 
9882  out_enable:
9883 	tr->trace_flags |= old_userobj;
9884 
9885 	for_each_tracing_cpu(cpu) {
9886 		atomic_dec(&per_cpu_ptr(iter.array_buffer->data, cpu)->disabled);
9887 	}
9888 	atomic_dec(&dump_running);
9889 	local_irq_restore(flags);
9890 }
9891 EXPORT_SYMBOL_GPL(ftrace_dump);
9892 
9893 #define WRITE_BUFSIZE  4096
9894 
9895 ssize_t trace_parse_run_command(struct file *file, const char __user *buffer,
9896 				size_t count, loff_t *ppos,
9897 				int (*createfn)(const char *))
9898 {
9899 	char *kbuf, *buf, *tmp;
9900 	int ret = 0;
9901 	size_t done = 0;
9902 	size_t size;
9903 
9904 	kbuf = kmalloc(WRITE_BUFSIZE, GFP_KERNEL);
9905 	if (!kbuf)
9906 		return -ENOMEM;
9907 
9908 	while (done < count) {
9909 		size = count - done;
9910 
9911 		if (size >= WRITE_BUFSIZE)
9912 			size = WRITE_BUFSIZE - 1;
9913 
9914 		if (copy_from_user(kbuf, buffer + done, size)) {
9915 			ret = -EFAULT;
9916 			goto out;
9917 		}
9918 		kbuf[size] = '\0';
9919 		buf = kbuf;
9920 		do {
9921 			tmp = strchr(buf, '\n');
9922 			if (tmp) {
9923 				*tmp = '\0';
9924 				size = tmp - buf + 1;
9925 			} else {
9926 				size = strlen(buf);
9927 				if (done + size < count) {
9928 					if (buf != kbuf)
9929 						break;
9930 					/* This can accept WRITE_BUFSIZE - 2 ('\n' + '\0') */
9931 					pr_warn("Line length is too long: Should be less than %d\n",
9932 						WRITE_BUFSIZE - 2);
9933 					ret = -EINVAL;
9934 					goto out;
9935 				}
9936 			}
9937 			done += size;
9938 
9939 			/* Remove comments */
9940 			tmp = strchr(buf, '#');
9941 
9942 			if (tmp)
9943 				*tmp = '\0';
9944 
9945 			ret = createfn(buf);
9946 			if (ret)
9947 				goto out;
9948 			buf += size;
9949 
9950 		} while (done < count);
9951 	}
9952 	ret = done;
9953 
9954 out:
9955 	kfree(kbuf);
9956 
9957 	return ret;
9958 }
9959 
9960 __init static int tracer_alloc_buffers(void)
9961 {
9962 	int ring_buf_size;
9963 	int ret = -ENOMEM;
9964 
9965 
9966 	if (security_locked_down(LOCKDOWN_TRACEFS)) {
9967 		pr_warn("Tracing disabled due to lockdown\n");
9968 		return -EPERM;
9969 	}
9970 
9971 	/*
9972 	 * Make sure we don't accidentally add more trace options
9973 	 * than we have bits for.
9974 	 */
9975 	BUILD_BUG_ON(TRACE_ITER_LAST_BIT > TRACE_FLAGS_MAX_SIZE);
9976 
9977 	if (!alloc_cpumask_var(&tracing_buffer_mask, GFP_KERNEL))
9978 		goto out;
9979 
9980 	if (!alloc_cpumask_var(&global_trace.tracing_cpumask, GFP_KERNEL))
9981 		goto out_free_buffer_mask;
9982 
9983 	/* Only allocate trace_printk buffers if a trace_printk exists */
9984 	if (&__stop___trace_bprintk_fmt != &__start___trace_bprintk_fmt)
9985 		/* Must be called before global_trace.buffer is allocated */
9986 		trace_printk_init_buffers();
9987 
9988 	/* To save memory, keep the ring buffer size to its minimum */
9989 	if (ring_buffer_expanded)
9990 		ring_buf_size = trace_buf_size;
9991 	else
9992 		ring_buf_size = 1;
9993 
9994 	cpumask_copy(tracing_buffer_mask, cpu_possible_mask);
9995 	cpumask_copy(global_trace.tracing_cpumask, cpu_all_mask);
9996 
9997 	raw_spin_lock_init(&global_trace.start_lock);
9998 
9999 	/*
10000 	 * The prepare callbacks allocates some memory for the ring buffer. We
10001 	 * don't free the buffer if the CPU goes down. If we were to free
10002 	 * the buffer, then the user would lose any trace that was in the
10003 	 * buffer. The memory will be removed once the "instance" is removed.
10004 	 */
10005 	ret = cpuhp_setup_state_multi(CPUHP_TRACE_RB_PREPARE,
10006 				      "trace/RB:preapre", trace_rb_cpu_prepare,
10007 				      NULL);
10008 	if (ret < 0)
10009 		goto out_free_cpumask;
10010 	/* Used for event triggers */
10011 	ret = -ENOMEM;
10012 	temp_buffer = ring_buffer_alloc(PAGE_SIZE, RB_FL_OVERWRITE);
10013 	if (!temp_buffer)
10014 		goto out_rm_hp_state;
10015 
10016 	if (trace_create_savedcmd() < 0)
10017 		goto out_free_temp_buffer;
10018 
10019 	/* TODO: make the number of buffers hot pluggable with CPUS */
10020 	if (allocate_trace_buffers(&global_trace, ring_buf_size) < 0) {
10021 		MEM_FAIL(1, "tracer: failed to allocate ring buffer!\n");
10022 		goto out_free_savedcmd;
10023 	}
10024 
10025 	if (global_trace.buffer_disabled)
10026 		tracing_off();
10027 
10028 	if (trace_boot_clock) {
10029 		ret = tracing_set_clock(&global_trace, trace_boot_clock);
10030 		if (ret < 0)
10031 			pr_warn("Trace clock %s not defined, going back to default\n",
10032 				trace_boot_clock);
10033 	}
10034 
10035 	/*
10036 	 * register_tracer() might reference current_trace, so it
10037 	 * needs to be set before we register anything. This is
10038 	 * just a bootstrap of current_trace anyway.
10039 	 */
10040 	global_trace.current_trace = &nop_trace;
10041 
10042 	global_trace.max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
10043 
10044 	ftrace_init_global_array_ops(&global_trace);
10045 
10046 	init_trace_flags_index(&global_trace);
10047 
10048 	register_tracer(&nop_trace);
10049 
10050 	/* Function tracing may start here (via kernel command line) */
10051 	init_function_trace();
10052 
10053 	/* All seems OK, enable tracing */
10054 	tracing_disabled = 0;
10055 
10056 	atomic_notifier_chain_register(&panic_notifier_list,
10057 				       &trace_panic_notifier);
10058 
10059 	register_die_notifier(&trace_die_notifier);
10060 
10061 	global_trace.flags = TRACE_ARRAY_FL_GLOBAL;
10062 
10063 	INIT_LIST_HEAD(&global_trace.systems);
10064 	INIT_LIST_HEAD(&global_trace.events);
10065 	INIT_LIST_HEAD(&global_trace.hist_vars);
10066 	INIT_LIST_HEAD(&global_trace.err_log);
10067 	list_add(&global_trace.list, &ftrace_trace_arrays);
10068 
10069 	apply_trace_boot_options();
10070 
10071 	register_snapshot_cmd();
10072 
10073 	test_can_verify();
10074 
10075 	return 0;
10076 
10077 out_free_savedcmd:
10078 	free_saved_cmdlines_buffer(savedcmd);
10079 out_free_temp_buffer:
10080 	ring_buffer_free(temp_buffer);
10081 out_rm_hp_state:
10082 	cpuhp_remove_multi_state(CPUHP_TRACE_RB_PREPARE);
10083 out_free_cpumask:
10084 	free_cpumask_var(global_trace.tracing_cpumask);
10085 out_free_buffer_mask:
10086 	free_cpumask_var(tracing_buffer_mask);
10087 out:
10088 	return ret;
10089 }
10090 
10091 void __init early_trace_init(void)
10092 {
10093 	if (tracepoint_printk) {
10094 		tracepoint_print_iter =
10095 			kzalloc(sizeof(*tracepoint_print_iter), GFP_KERNEL);
10096 		if (MEM_FAIL(!tracepoint_print_iter,
10097 			     "Failed to allocate trace iterator\n"))
10098 			tracepoint_printk = 0;
10099 		else
10100 			static_key_enable(&tracepoint_printk_key.key);
10101 	}
10102 	tracer_alloc_buffers();
10103 }
10104 
10105 void __init trace_init(void)
10106 {
10107 	trace_event_init();
10108 }
10109 
10110 __init static void clear_boot_tracer(void)
10111 {
10112 	/*
10113 	 * The default tracer at boot buffer is an init section.
10114 	 * This function is called in lateinit. If we did not
10115 	 * find the boot tracer, then clear it out, to prevent
10116 	 * later registration from accessing the buffer that is
10117 	 * about to be freed.
10118 	 */
10119 	if (!default_bootup_tracer)
10120 		return;
10121 
10122 	printk(KERN_INFO "ftrace bootup tracer '%s' not registered.\n",
10123 	       default_bootup_tracer);
10124 	default_bootup_tracer = NULL;
10125 }
10126 
10127 #ifdef CONFIG_HAVE_UNSTABLE_SCHED_CLOCK
10128 __init static void tracing_set_default_clock(void)
10129 {
10130 	/* sched_clock_stable() is determined in late_initcall */
10131 	if (!trace_boot_clock && !sched_clock_stable()) {
10132 		if (security_locked_down(LOCKDOWN_TRACEFS)) {
10133 			pr_warn("Can not set tracing clock due to lockdown\n");
10134 			return;
10135 		}
10136 
10137 		printk(KERN_WARNING
10138 		       "Unstable clock detected, switching default tracing clock to \"global\"\n"
10139 		       "If you want to keep using the local clock, then add:\n"
10140 		       "  \"trace_clock=local\"\n"
10141 		       "on the kernel command line\n");
10142 		tracing_set_clock(&global_trace, "global");
10143 	}
10144 }
10145 #else
10146 static inline void tracing_set_default_clock(void) { }
10147 #endif
10148 
10149 __init static int late_trace_init(void)
10150 {
10151 	if (tracepoint_printk && tracepoint_printk_stop_on_boot) {
10152 		static_key_disable(&tracepoint_printk_key.key);
10153 		tracepoint_printk = 0;
10154 	}
10155 
10156 	tracing_set_default_clock();
10157 	clear_boot_tracer();
10158 	return 0;
10159 }
10160 
10161 late_initcall_sync(late_trace_init);
10162