xref: /linux/kernel/trace/trace.c (revision fa73514d5e620c2bb48b63e1dd509ce0942c39cf)
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * ring buffer based function tracer
4  *
5  * Copyright (C) 2007-2012 Steven Rostedt <srostedt@redhat.com>
6  * Copyright (C) 2008 Ingo Molnar <mingo@redhat.com>
7  *
8  * Originally taken from the RT patch by:
9  *    Arnaldo Carvalho de Melo <acme@redhat.com>
10  *
11  * Based on code from the latency_tracer, that is:
12  *  Copyright (C) 2004-2006 Ingo Molnar
13  *  Copyright (C) 2004 Nadia Yvette Chambers
14  */
15 #include <linux/ring_buffer.h>
16 #include <generated/utsrelease.h>
17 #include <linux/stacktrace.h>
18 #include <linux/writeback.h>
19 #include <linux/kallsyms.h>
20 #include <linux/security.h>
21 #include <linux/seq_file.h>
22 #include <linux/notifier.h>
23 #include <linux/irqflags.h>
24 #include <linux/debugfs.h>
25 #include <linux/tracefs.h>
26 #include <linux/pagemap.h>
27 #include <linux/hardirq.h>
28 #include <linux/linkage.h>
29 #include <linux/uaccess.h>
30 #include <linux/vmalloc.h>
31 #include <linux/ftrace.h>
32 #include <linux/module.h>
33 #include <linux/percpu.h>
34 #include <linux/splice.h>
35 #include <linux/kdebug.h>
36 #include <linux/string.h>
37 #include <linux/mount.h>
38 #include <linux/rwsem.h>
39 #include <linux/slab.h>
40 #include <linux/ctype.h>
41 #include <linux/init.h>
42 #include <linux/poll.h>
43 #include <linux/nmi.h>
44 #include <linux/fs.h>
45 #include <linux/trace.h>
46 #include <linux/sched/clock.h>
47 #include <linux/sched/rt.h>
48 #include <linux/fsnotify.h>
49 #include <linux/irq_work.h>
50 #include <linux/workqueue.h>
51 
52 #include "trace.h"
53 #include "trace_output.h"
54 
55 /*
56  * On boot up, the ring buffer is set to the minimum size, so that
57  * we do not waste memory on systems that are not using tracing.
58  */
59 bool ring_buffer_expanded;
60 
61 /*
62  * We need to change this state when a selftest is running.
63  * A selftest will lurk into the ring-buffer to count the
64  * entries inserted during the selftest although some concurrent
65  * insertions into the ring-buffer such as trace_printk could occurred
66  * at the same time, giving false positive or negative results.
67  */
68 static bool __read_mostly tracing_selftest_running;
69 
70 /*
71  * If boot-time tracing including tracers/events via kernel cmdline
72  * is running, we do not want to run SELFTEST.
73  */
74 bool __read_mostly tracing_selftest_disabled;
75 
76 #ifdef CONFIG_FTRACE_STARTUP_TEST
77 void __init disable_tracing_selftest(const char *reason)
78 {
79 	if (!tracing_selftest_disabled) {
80 		tracing_selftest_disabled = true;
81 		pr_info("Ftrace startup test is disabled due to %s\n", reason);
82 	}
83 }
84 #endif
85 
86 /* Pipe tracepoints to printk */
87 struct trace_iterator *tracepoint_print_iter;
88 int tracepoint_printk;
89 static bool tracepoint_printk_stop_on_boot __initdata;
90 static DEFINE_STATIC_KEY_FALSE(tracepoint_printk_key);
91 
92 /* For tracers that don't implement custom flags */
93 static struct tracer_opt dummy_tracer_opt[] = {
94 	{ }
95 };
96 
97 static int
98 dummy_set_flag(struct trace_array *tr, u32 old_flags, u32 bit, int set)
99 {
100 	return 0;
101 }
102 
103 /*
104  * To prevent the comm cache from being overwritten when no
105  * tracing is active, only save the comm when a trace event
106  * occurred.
107  */
108 static DEFINE_PER_CPU(bool, trace_taskinfo_save);
109 
110 /*
111  * Kill all tracing for good (never come back).
112  * It is initialized to 1 but will turn to zero if the initialization
113  * of the tracer is successful. But that is the only place that sets
114  * this back to zero.
115  */
116 static int tracing_disabled = 1;
117 
118 cpumask_var_t __read_mostly	tracing_buffer_mask;
119 
120 /*
121  * ftrace_dump_on_oops - variable to dump ftrace buffer on oops
122  *
123  * If there is an oops (or kernel panic) and the ftrace_dump_on_oops
124  * is set, then ftrace_dump is called. This will output the contents
125  * of the ftrace buffers to the console.  This is very useful for
126  * capturing traces that lead to crashes and outputing it to a
127  * serial console.
128  *
129  * It is default off, but you can enable it with either specifying
130  * "ftrace_dump_on_oops" in the kernel command line, or setting
131  * /proc/sys/kernel/ftrace_dump_on_oops
132  * Set 1 if you want to dump buffers of all CPUs
133  * Set 2 if you want to dump the buffer of the CPU that triggered oops
134  */
135 
136 enum ftrace_dump_mode ftrace_dump_on_oops;
137 
138 /* When set, tracing will stop when a WARN*() is hit */
139 int __disable_trace_on_warning;
140 
141 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
142 /* Map of enums to their values, for "eval_map" file */
143 struct trace_eval_map_head {
144 	struct module			*mod;
145 	unsigned long			length;
146 };
147 
148 union trace_eval_map_item;
149 
150 struct trace_eval_map_tail {
151 	/*
152 	 * "end" is first and points to NULL as it must be different
153 	 * than "mod" or "eval_string"
154 	 */
155 	union trace_eval_map_item	*next;
156 	const char			*end;	/* points to NULL */
157 };
158 
159 static DEFINE_MUTEX(trace_eval_mutex);
160 
161 /*
162  * The trace_eval_maps are saved in an array with two extra elements,
163  * one at the beginning, and one at the end. The beginning item contains
164  * the count of the saved maps (head.length), and the module they
165  * belong to if not built in (head.mod). The ending item contains a
166  * pointer to the next array of saved eval_map items.
167  */
168 union trace_eval_map_item {
169 	struct trace_eval_map		map;
170 	struct trace_eval_map_head	head;
171 	struct trace_eval_map_tail	tail;
172 };
173 
174 static union trace_eval_map_item *trace_eval_maps;
175 #endif /* CONFIG_TRACE_EVAL_MAP_FILE */
176 
177 int tracing_set_tracer(struct trace_array *tr, const char *buf);
178 static void ftrace_trace_userstack(struct trace_array *tr,
179 				   struct trace_buffer *buffer,
180 				   unsigned int trace_ctx);
181 
182 #define MAX_TRACER_SIZE		100
183 static char bootup_tracer_buf[MAX_TRACER_SIZE] __initdata;
184 static char *default_bootup_tracer;
185 
186 static bool allocate_snapshot;
187 
188 static int __init set_cmdline_ftrace(char *str)
189 {
190 	strlcpy(bootup_tracer_buf, str, MAX_TRACER_SIZE);
191 	default_bootup_tracer = bootup_tracer_buf;
192 	/* We are using ftrace early, expand it */
193 	ring_buffer_expanded = true;
194 	return 1;
195 }
196 __setup("ftrace=", set_cmdline_ftrace);
197 
198 static int __init set_ftrace_dump_on_oops(char *str)
199 {
200 	if (*str++ != '=' || !*str || !strcmp("1", str)) {
201 		ftrace_dump_on_oops = DUMP_ALL;
202 		return 1;
203 	}
204 
205 	if (!strcmp("orig_cpu", str) || !strcmp("2", str)) {
206 		ftrace_dump_on_oops = DUMP_ORIG;
207                 return 1;
208         }
209 
210         return 0;
211 }
212 __setup("ftrace_dump_on_oops", set_ftrace_dump_on_oops);
213 
214 static int __init stop_trace_on_warning(char *str)
215 {
216 	if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
217 		__disable_trace_on_warning = 1;
218 	return 1;
219 }
220 __setup("traceoff_on_warning", stop_trace_on_warning);
221 
222 static int __init boot_alloc_snapshot(char *str)
223 {
224 	allocate_snapshot = true;
225 	/* We also need the main ring buffer expanded */
226 	ring_buffer_expanded = true;
227 	return 1;
228 }
229 __setup("alloc_snapshot", boot_alloc_snapshot);
230 
231 
232 static char trace_boot_options_buf[MAX_TRACER_SIZE] __initdata;
233 
234 static int __init set_trace_boot_options(char *str)
235 {
236 	strlcpy(trace_boot_options_buf, str, MAX_TRACER_SIZE);
237 	return 0;
238 }
239 __setup("trace_options=", set_trace_boot_options);
240 
241 static char trace_boot_clock_buf[MAX_TRACER_SIZE] __initdata;
242 static char *trace_boot_clock __initdata;
243 
244 static int __init set_trace_boot_clock(char *str)
245 {
246 	strlcpy(trace_boot_clock_buf, str, MAX_TRACER_SIZE);
247 	trace_boot_clock = trace_boot_clock_buf;
248 	return 0;
249 }
250 __setup("trace_clock=", set_trace_boot_clock);
251 
252 static int __init set_tracepoint_printk(char *str)
253 {
254 	if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
255 		tracepoint_printk = 1;
256 	return 1;
257 }
258 __setup("tp_printk", set_tracepoint_printk);
259 
260 static int __init set_tracepoint_printk_stop(char *str)
261 {
262 	tracepoint_printk_stop_on_boot = true;
263 	return 1;
264 }
265 __setup("tp_printk_stop_on_boot", set_tracepoint_printk_stop);
266 
267 unsigned long long ns2usecs(u64 nsec)
268 {
269 	nsec += 500;
270 	do_div(nsec, 1000);
271 	return nsec;
272 }
273 
274 static void
275 trace_process_export(struct trace_export *export,
276 	       struct ring_buffer_event *event, int flag)
277 {
278 	struct trace_entry *entry;
279 	unsigned int size = 0;
280 
281 	if (export->flags & flag) {
282 		entry = ring_buffer_event_data(event);
283 		size = ring_buffer_event_length(event);
284 		export->write(export, entry, size);
285 	}
286 }
287 
288 static DEFINE_MUTEX(ftrace_export_lock);
289 
290 static struct trace_export __rcu *ftrace_exports_list __read_mostly;
291 
292 static DEFINE_STATIC_KEY_FALSE(trace_function_exports_enabled);
293 static DEFINE_STATIC_KEY_FALSE(trace_event_exports_enabled);
294 static DEFINE_STATIC_KEY_FALSE(trace_marker_exports_enabled);
295 
296 static inline void ftrace_exports_enable(struct trace_export *export)
297 {
298 	if (export->flags & TRACE_EXPORT_FUNCTION)
299 		static_branch_inc(&trace_function_exports_enabled);
300 
301 	if (export->flags & TRACE_EXPORT_EVENT)
302 		static_branch_inc(&trace_event_exports_enabled);
303 
304 	if (export->flags & TRACE_EXPORT_MARKER)
305 		static_branch_inc(&trace_marker_exports_enabled);
306 }
307 
308 static inline void ftrace_exports_disable(struct trace_export *export)
309 {
310 	if (export->flags & TRACE_EXPORT_FUNCTION)
311 		static_branch_dec(&trace_function_exports_enabled);
312 
313 	if (export->flags & TRACE_EXPORT_EVENT)
314 		static_branch_dec(&trace_event_exports_enabled);
315 
316 	if (export->flags & TRACE_EXPORT_MARKER)
317 		static_branch_dec(&trace_marker_exports_enabled);
318 }
319 
320 static void ftrace_exports(struct ring_buffer_event *event, int flag)
321 {
322 	struct trace_export *export;
323 
324 	preempt_disable_notrace();
325 
326 	export = rcu_dereference_raw_check(ftrace_exports_list);
327 	while (export) {
328 		trace_process_export(export, event, flag);
329 		export = rcu_dereference_raw_check(export->next);
330 	}
331 
332 	preempt_enable_notrace();
333 }
334 
335 static inline void
336 add_trace_export(struct trace_export **list, struct trace_export *export)
337 {
338 	rcu_assign_pointer(export->next, *list);
339 	/*
340 	 * We are entering export into the list but another
341 	 * CPU might be walking that list. We need to make sure
342 	 * the export->next pointer is valid before another CPU sees
343 	 * the export pointer included into the list.
344 	 */
345 	rcu_assign_pointer(*list, export);
346 }
347 
348 static inline int
349 rm_trace_export(struct trace_export **list, struct trace_export *export)
350 {
351 	struct trace_export **p;
352 
353 	for (p = list; *p != NULL; p = &(*p)->next)
354 		if (*p == export)
355 			break;
356 
357 	if (*p != export)
358 		return -1;
359 
360 	rcu_assign_pointer(*p, (*p)->next);
361 
362 	return 0;
363 }
364 
365 static inline void
366 add_ftrace_export(struct trace_export **list, struct trace_export *export)
367 {
368 	ftrace_exports_enable(export);
369 
370 	add_trace_export(list, export);
371 }
372 
373 static inline int
374 rm_ftrace_export(struct trace_export **list, struct trace_export *export)
375 {
376 	int ret;
377 
378 	ret = rm_trace_export(list, export);
379 	ftrace_exports_disable(export);
380 
381 	return ret;
382 }
383 
384 int register_ftrace_export(struct trace_export *export)
385 {
386 	if (WARN_ON_ONCE(!export->write))
387 		return -1;
388 
389 	mutex_lock(&ftrace_export_lock);
390 
391 	add_ftrace_export(&ftrace_exports_list, export);
392 
393 	mutex_unlock(&ftrace_export_lock);
394 
395 	return 0;
396 }
397 EXPORT_SYMBOL_GPL(register_ftrace_export);
398 
399 int unregister_ftrace_export(struct trace_export *export)
400 {
401 	int ret;
402 
403 	mutex_lock(&ftrace_export_lock);
404 
405 	ret = rm_ftrace_export(&ftrace_exports_list, export);
406 
407 	mutex_unlock(&ftrace_export_lock);
408 
409 	return ret;
410 }
411 EXPORT_SYMBOL_GPL(unregister_ftrace_export);
412 
413 /* trace_flags holds trace_options default values */
414 #define TRACE_DEFAULT_FLAGS						\
415 	(FUNCTION_DEFAULT_FLAGS |					\
416 	 TRACE_ITER_PRINT_PARENT | TRACE_ITER_PRINTK |			\
417 	 TRACE_ITER_ANNOTATE | TRACE_ITER_CONTEXT_INFO |		\
418 	 TRACE_ITER_RECORD_CMD | TRACE_ITER_OVERWRITE |			\
419 	 TRACE_ITER_IRQ_INFO | TRACE_ITER_MARKERS |			\
420 	 TRACE_ITER_HASH_PTR)
421 
422 /* trace_options that are only supported by global_trace */
423 #define TOP_LEVEL_TRACE_FLAGS (TRACE_ITER_PRINTK |			\
424 	       TRACE_ITER_PRINTK_MSGONLY | TRACE_ITER_RECORD_CMD)
425 
426 /* trace_flags that are default zero for instances */
427 #define ZEROED_TRACE_FLAGS \
428 	(TRACE_ITER_EVENT_FORK | TRACE_ITER_FUNC_FORK)
429 
430 /*
431  * The global_trace is the descriptor that holds the top-level tracing
432  * buffers for the live tracing.
433  */
434 static struct trace_array global_trace = {
435 	.trace_flags = TRACE_DEFAULT_FLAGS,
436 };
437 
438 LIST_HEAD(ftrace_trace_arrays);
439 
440 int trace_array_get(struct trace_array *this_tr)
441 {
442 	struct trace_array *tr;
443 	int ret = -ENODEV;
444 
445 	mutex_lock(&trace_types_lock);
446 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
447 		if (tr == this_tr) {
448 			tr->ref++;
449 			ret = 0;
450 			break;
451 		}
452 	}
453 	mutex_unlock(&trace_types_lock);
454 
455 	return ret;
456 }
457 
458 static void __trace_array_put(struct trace_array *this_tr)
459 {
460 	WARN_ON(!this_tr->ref);
461 	this_tr->ref--;
462 }
463 
464 /**
465  * trace_array_put - Decrement the reference counter for this trace array.
466  * @this_tr : pointer to the trace array
467  *
468  * NOTE: Use this when we no longer need the trace array returned by
469  * trace_array_get_by_name(). This ensures the trace array can be later
470  * destroyed.
471  *
472  */
473 void trace_array_put(struct trace_array *this_tr)
474 {
475 	if (!this_tr)
476 		return;
477 
478 	mutex_lock(&trace_types_lock);
479 	__trace_array_put(this_tr);
480 	mutex_unlock(&trace_types_lock);
481 }
482 EXPORT_SYMBOL_GPL(trace_array_put);
483 
484 int tracing_check_open_get_tr(struct trace_array *tr)
485 {
486 	int ret;
487 
488 	ret = security_locked_down(LOCKDOWN_TRACEFS);
489 	if (ret)
490 		return ret;
491 
492 	if (tracing_disabled)
493 		return -ENODEV;
494 
495 	if (tr && trace_array_get(tr) < 0)
496 		return -ENODEV;
497 
498 	return 0;
499 }
500 
501 int call_filter_check_discard(struct trace_event_call *call, void *rec,
502 			      struct trace_buffer *buffer,
503 			      struct ring_buffer_event *event)
504 {
505 	if (unlikely(call->flags & TRACE_EVENT_FL_FILTERED) &&
506 	    !filter_match_preds(call->filter, rec)) {
507 		__trace_event_discard_commit(buffer, event);
508 		return 1;
509 	}
510 
511 	return 0;
512 }
513 
514 void trace_free_pid_list(struct trace_pid_list *pid_list)
515 {
516 	vfree(pid_list->pids);
517 	kfree(pid_list);
518 }
519 
520 /**
521  * trace_find_filtered_pid - check if a pid exists in a filtered_pid list
522  * @filtered_pids: The list of pids to check
523  * @search_pid: The PID to find in @filtered_pids
524  *
525  * Returns true if @search_pid is found in @filtered_pids, and false otherwise.
526  */
527 bool
528 trace_find_filtered_pid(struct trace_pid_list *filtered_pids, pid_t search_pid)
529 {
530 	/*
531 	 * If pid_max changed after filtered_pids was created, we
532 	 * by default ignore all pids greater than the previous pid_max.
533 	 */
534 	if (search_pid >= filtered_pids->pid_max)
535 		return false;
536 
537 	return test_bit(search_pid, filtered_pids->pids);
538 }
539 
540 /**
541  * trace_ignore_this_task - should a task be ignored for tracing
542  * @filtered_pids: The list of pids to check
543  * @filtered_no_pids: The list of pids not to be traced
544  * @task: The task that should be ignored if not filtered
545  *
546  * Checks if @task should be traced or not from @filtered_pids.
547  * Returns true if @task should *NOT* be traced.
548  * Returns false if @task should be traced.
549  */
550 bool
551 trace_ignore_this_task(struct trace_pid_list *filtered_pids,
552 		       struct trace_pid_list *filtered_no_pids,
553 		       struct task_struct *task)
554 {
555 	/*
556 	 * If filtered_no_pids is not empty, and the task's pid is listed
557 	 * in filtered_no_pids, then return true.
558 	 * Otherwise, if filtered_pids is empty, that means we can
559 	 * trace all tasks. If it has content, then only trace pids
560 	 * within filtered_pids.
561 	 */
562 
563 	return (filtered_pids &&
564 		!trace_find_filtered_pid(filtered_pids, task->pid)) ||
565 		(filtered_no_pids &&
566 		 trace_find_filtered_pid(filtered_no_pids, task->pid));
567 }
568 
569 /**
570  * trace_filter_add_remove_task - Add or remove a task from a pid_list
571  * @pid_list: The list to modify
572  * @self: The current task for fork or NULL for exit
573  * @task: The task to add or remove
574  *
575  * If adding a task, if @self is defined, the task is only added if @self
576  * is also included in @pid_list. This happens on fork and tasks should
577  * only be added when the parent is listed. If @self is NULL, then the
578  * @task pid will be removed from the list, which would happen on exit
579  * of a task.
580  */
581 void trace_filter_add_remove_task(struct trace_pid_list *pid_list,
582 				  struct task_struct *self,
583 				  struct task_struct *task)
584 {
585 	if (!pid_list)
586 		return;
587 
588 	/* For forks, we only add if the forking task is listed */
589 	if (self) {
590 		if (!trace_find_filtered_pid(pid_list, self->pid))
591 			return;
592 	}
593 
594 	/* Sorry, but we don't support pid_max changing after setting */
595 	if (task->pid >= pid_list->pid_max)
596 		return;
597 
598 	/* "self" is set for forks, and NULL for exits */
599 	if (self)
600 		set_bit(task->pid, pid_list->pids);
601 	else
602 		clear_bit(task->pid, pid_list->pids);
603 }
604 
605 /**
606  * trace_pid_next - Used for seq_file to get to the next pid of a pid_list
607  * @pid_list: The pid list to show
608  * @v: The last pid that was shown (+1 the actual pid to let zero be displayed)
609  * @pos: The position of the file
610  *
611  * This is used by the seq_file "next" operation to iterate the pids
612  * listed in a trace_pid_list structure.
613  *
614  * Returns the pid+1 as we want to display pid of zero, but NULL would
615  * stop the iteration.
616  */
617 void *trace_pid_next(struct trace_pid_list *pid_list, void *v, loff_t *pos)
618 {
619 	unsigned long pid = (unsigned long)v;
620 
621 	(*pos)++;
622 
623 	/* pid already is +1 of the actual previous bit */
624 	pid = find_next_bit(pid_list->pids, pid_list->pid_max, pid);
625 
626 	/* Return pid + 1 to allow zero to be represented */
627 	if (pid < pid_list->pid_max)
628 		return (void *)(pid + 1);
629 
630 	return NULL;
631 }
632 
633 /**
634  * trace_pid_start - Used for seq_file to start reading pid lists
635  * @pid_list: The pid list to show
636  * @pos: The position of the file
637  *
638  * This is used by seq_file "start" operation to start the iteration
639  * of listing pids.
640  *
641  * Returns the pid+1 as we want to display pid of zero, but NULL would
642  * stop the iteration.
643  */
644 void *trace_pid_start(struct trace_pid_list *pid_list, loff_t *pos)
645 {
646 	unsigned long pid;
647 	loff_t l = 0;
648 
649 	pid = find_first_bit(pid_list->pids, pid_list->pid_max);
650 	if (pid >= pid_list->pid_max)
651 		return NULL;
652 
653 	/* Return pid + 1 so that zero can be the exit value */
654 	for (pid++; pid && l < *pos;
655 	     pid = (unsigned long)trace_pid_next(pid_list, (void *)pid, &l))
656 		;
657 	return (void *)pid;
658 }
659 
660 /**
661  * trace_pid_show - show the current pid in seq_file processing
662  * @m: The seq_file structure to write into
663  * @v: A void pointer of the pid (+1) value to display
664  *
665  * Can be directly used by seq_file operations to display the current
666  * pid value.
667  */
668 int trace_pid_show(struct seq_file *m, void *v)
669 {
670 	unsigned long pid = (unsigned long)v - 1;
671 
672 	seq_printf(m, "%lu\n", pid);
673 	return 0;
674 }
675 
676 /* 128 should be much more than enough */
677 #define PID_BUF_SIZE		127
678 
679 int trace_pid_write(struct trace_pid_list *filtered_pids,
680 		    struct trace_pid_list **new_pid_list,
681 		    const char __user *ubuf, size_t cnt)
682 {
683 	struct trace_pid_list *pid_list;
684 	struct trace_parser parser;
685 	unsigned long val;
686 	int nr_pids = 0;
687 	ssize_t read = 0;
688 	ssize_t ret = 0;
689 	loff_t pos;
690 	pid_t pid;
691 
692 	if (trace_parser_get_init(&parser, PID_BUF_SIZE + 1))
693 		return -ENOMEM;
694 
695 	/*
696 	 * Always recreate a new array. The write is an all or nothing
697 	 * operation. Always create a new array when adding new pids by
698 	 * the user. If the operation fails, then the current list is
699 	 * not modified.
700 	 */
701 	pid_list = kmalloc(sizeof(*pid_list), GFP_KERNEL);
702 	if (!pid_list) {
703 		trace_parser_put(&parser);
704 		return -ENOMEM;
705 	}
706 
707 	pid_list->pid_max = READ_ONCE(pid_max);
708 
709 	/* Only truncating will shrink pid_max */
710 	if (filtered_pids && filtered_pids->pid_max > pid_list->pid_max)
711 		pid_list->pid_max = filtered_pids->pid_max;
712 
713 	pid_list->pids = vzalloc((pid_list->pid_max + 7) >> 3);
714 	if (!pid_list->pids) {
715 		trace_parser_put(&parser);
716 		kfree(pid_list);
717 		return -ENOMEM;
718 	}
719 
720 	if (filtered_pids) {
721 		/* copy the current bits to the new max */
722 		for_each_set_bit(pid, filtered_pids->pids,
723 				 filtered_pids->pid_max) {
724 			set_bit(pid, pid_list->pids);
725 			nr_pids++;
726 		}
727 	}
728 
729 	while (cnt > 0) {
730 
731 		pos = 0;
732 
733 		ret = trace_get_user(&parser, ubuf, cnt, &pos);
734 		if (ret < 0 || !trace_parser_loaded(&parser))
735 			break;
736 
737 		read += ret;
738 		ubuf += ret;
739 		cnt -= ret;
740 
741 		ret = -EINVAL;
742 		if (kstrtoul(parser.buffer, 0, &val))
743 			break;
744 		if (val >= pid_list->pid_max)
745 			break;
746 
747 		pid = (pid_t)val;
748 
749 		set_bit(pid, pid_list->pids);
750 		nr_pids++;
751 
752 		trace_parser_clear(&parser);
753 		ret = 0;
754 	}
755 	trace_parser_put(&parser);
756 
757 	if (ret < 0) {
758 		trace_free_pid_list(pid_list);
759 		return ret;
760 	}
761 
762 	if (!nr_pids) {
763 		/* Cleared the list of pids */
764 		trace_free_pid_list(pid_list);
765 		read = ret;
766 		pid_list = NULL;
767 	}
768 
769 	*new_pid_list = pid_list;
770 
771 	return read;
772 }
773 
774 static u64 buffer_ftrace_now(struct array_buffer *buf, int cpu)
775 {
776 	u64 ts;
777 
778 	/* Early boot up does not have a buffer yet */
779 	if (!buf->buffer)
780 		return trace_clock_local();
781 
782 	ts = ring_buffer_time_stamp(buf->buffer);
783 	ring_buffer_normalize_time_stamp(buf->buffer, cpu, &ts);
784 
785 	return ts;
786 }
787 
788 u64 ftrace_now(int cpu)
789 {
790 	return buffer_ftrace_now(&global_trace.array_buffer, cpu);
791 }
792 
793 /**
794  * tracing_is_enabled - Show if global_trace has been enabled
795  *
796  * Shows if the global trace has been enabled or not. It uses the
797  * mirror flag "buffer_disabled" to be used in fast paths such as for
798  * the irqsoff tracer. But it may be inaccurate due to races. If you
799  * need to know the accurate state, use tracing_is_on() which is a little
800  * slower, but accurate.
801  */
802 int tracing_is_enabled(void)
803 {
804 	/*
805 	 * For quick access (irqsoff uses this in fast path), just
806 	 * return the mirror variable of the state of the ring buffer.
807 	 * It's a little racy, but we don't really care.
808 	 */
809 	smp_rmb();
810 	return !global_trace.buffer_disabled;
811 }
812 
813 /*
814  * trace_buf_size is the size in bytes that is allocated
815  * for a buffer. Note, the number of bytes is always rounded
816  * to page size.
817  *
818  * This number is purposely set to a low number of 16384.
819  * If the dump on oops happens, it will be much appreciated
820  * to not have to wait for all that output. Anyway this can be
821  * boot time and run time configurable.
822  */
823 #define TRACE_BUF_SIZE_DEFAULT	1441792UL /* 16384 * 88 (sizeof(entry)) */
824 
825 static unsigned long		trace_buf_size = TRACE_BUF_SIZE_DEFAULT;
826 
827 /* trace_types holds a link list of available tracers. */
828 static struct tracer		*trace_types __read_mostly;
829 
830 /*
831  * trace_types_lock is used to protect the trace_types list.
832  */
833 DEFINE_MUTEX(trace_types_lock);
834 
835 /*
836  * serialize the access of the ring buffer
837  *
838  * ring buffer serializes readers, but it is low level protection.
839  * The validity of the events (which returns by ring_buffer_peek() ..etc)
840  * are not protected by ring buffer.
841  *
842  * The content of events may become garbage if we allow other process consumes
843  * these events concurrently:
844  *   A) the page of the consumed events may become a normal page
845  *      (not reader page) in ring buffer, and this page will be rewritten
846  *      by events producer.
847  *   B) The page of the consumed events may become a page for splice_read,
848  *      and this page will be returned to system.
849  *
850  * These primitives allow multi process access to different cpu ring buffer
851  * concurrently.
852  *
853  * These primitives don't distinguish read-only and read-consume access.
854  * Multi read-only access are also serialized.
855  */
856 
857 #ifdef CONFIG_SMP
858 static DECLARE_RWSEM(all_cpu_access_lock);
859 static DEFINE_PER_CPU(struct mutex, cpu_access_lock);
860 
861 static inline void trace_access_lock(int cpu)
862 {
863 	if (cpu == RING_BUFFER_ALL_CPUS) {
864 		/* gain it for accessing the whole ring buffer. */
865 		down_write(&all_cpu_access_lock);
866 	} else {
867 		/* gain it for accessing a cpu ring buffer. */
868 
869 		/* Firstly block other trace_access_lock(RING_BUFFER_ALL_CPUS). */
870 		down_read(&all_cpu_access_lock);
871 
872 		/* Secondly block other access to this @cpu ring buffer. */
873 		mutex_lock(&per_cpu(cpu_access_lock, cpu));
874 	}
875 }
876 
877 static inline void trace_access_unlock(int cpu)
878 {
879 	if (cpu == RING_BUFFER_ALL_CPUS) {
880 		up_write(&all_cpu_access_lock);
881 	} else {
882 		mutex_unlock(&per_cpu(cpu_access_lock, cpu));
883 		up_read(&all_cpu_access_lock);
884 	}
885 }
886 
887 static inline void trace_access_lock_init(void)
888 {
889 	int cpu;
890 
891 	for_each_possible_cpu(cpu)
892 		mutex_init(&per_cpu(cpu_access_lock, cpu));
893 }
894 
895 #else
896 
897 static DEFINE_MUTEX(access_lock);
898 
899 static inline void trace_access_lock(int cpu)
900 {
901 	(void)cpu;
902 	mutex_lock(&access_lock);
903 }
904 
905 static inline void trace_access_unlock(int cpu)
906 {
907 	(void)cpu;
908 	mutex_unlock(&access_lock);
909 }
910 
911 static inline void trace_access_lock_init(void)
912 {
913 }
914 
915 #endif
916 
917 #ifdef CONFIG_STACKTRACE
918 static void __ftrace_trace_stack(struct trace_buffer *buffer,
919 				 unsigned int trace_ctx,
920 				 int skip, struct pt_regs *regs);
921 static inline void ftrace_trace_stack(struct trace_array *tr,
922 				      struct trace_buffer *buffer,
923 				      unsigned int trace_ctx,
924 				      int skip, struct pt_regs *regs);
925 
926 #else
927 static inline void __ftrace_trace_stack(struct trace_buffer *buffer,
928 					unsigned int trace_ctx,
929 					int skip, struct pt_regs *regs)
930 {
931 }
932 static inline void ftrace_trace_stack(struct trace_array *tr,
933 				      struct trace_buffer *buffer,
934 				      unsigned long trace_ctx,
935 				      int skip, struct pt_regs *regs)
936 {
937 }
938 
939 #endif
940 
941 static __always_inline void
942 trace_event_setup(struct ring_buffer_event *event,
943 		  int type, unsigned int trace_ctx)
944 {
945 	struct trace_entry *ent = ring_buffer_event_data(event);
946 
947 	tracing_generic_entry_update(ent, type, trace_ctx);
948 }
949 
950 static __always_inline struct ring_buffer_event *
951 __trace_buffer_lock_reserve(struct trace_buffer *buffer,
952 			  int type,
953 			  unsigned long len,
954 			  unsigned int trace_ctx)
955 {
956 	struct ring_buffer_event *event;
957 
958 	event = ring_buffer_lock_reserve(buffer, len);
959 	if (event != NULL)
960 		trace_event_setup(event, type, trace_ctx);
961 
962 	return event;
963 }
964 
965 void tracer_tracing_on(struct trace_array *tr)
966 {
967 	if (tr->array_buffer.buffer)
968 		ring_buffer_record_on(tr->array_buffer.buffer);
969 	/*
970 	 * This flag is looked at when buffers haven't been allocated
971 	 * yet, or by some tracers (like irqsoff), that just want to
972 	 * know if the ring buffer has been disabled, but it can handle
973 	 * races of where it gets disabled but we still do a record.
974 	 * As the check is in the fast path of the tracers, it is more
975 	 * important to be fast than accurate.
976 	 */
977 	tr->buffer_disabled = 0;
978 	/* Make the flag seen by readers */
979 	smp_wmb();
980 }
981 
982 /**
983  * tracing_on - enable tracing buffers
984  *
985  * This function enables tracing buffers that may have been
986  * disabled with tracing_off.
987  */
988 void tracing_on(void)
989 {
990 	tracer_tracing_on(&global_trace);
991 }
992 EXPORT_SYMBOL_GPL(tracing_on);
993 
994 
995 static __always_inline void
996 __buffer_unlock_commit(struct trace_buffer *buffer, struct ring_buffer_event *event)
997 {
998 	__this_cpu_write(trace_taskinfo_save, true);
999 
1000 	/* If this is the temp buffer, we need to commit fully */
1001 	if (this_cpu_read(trace_buffered_event) == event) {
1002 		/* Length is in event->array[0] */
1003 		ring_buffer_write(buffer, event->array[0], &event->array[1]);
1004 		/* Release the temp buffer */
1005 		this_cpu_dec(trace_buffered_event_cnt);
1006 	} else
1007 		ring_buffer_unlock_commit(buffer, event);
1008 }
1009 
1010 /**
1011  * __trace_puts - write a constant string into the trace buffer.
1012  * @ip:	   The address of the caller
1013  * @str:   The constant string to write
1014  * @size:  The size of the string.
1015  */
1016 int __trace_puts(unsigned long ip, const char *str, int size)
1017 {
1018 	struct ring_buffer_event *event;
1019 	struct trace_buffer *buffer;
1020 	struct print_entry *entry;
1021 	unsigned int trace_ctx;
1022 	int alloc;
1023 
1024 	if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
1025 		return 0;
1026 
1027 	if (unlikely(tracing_selftest_running || tracing_disabled))
1028 		return 0;
1029 
1030 	alloc = sizeof(*entry) + size + 2; /* possible \n added */
1031 
1032 	trace_ctx = tracing_gen_ctx();
1033 	buffer = global_trace.array_buffer.buffer;
1034 	ring_buffer_nest_start(buffer);
1035 	event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, alloc,
1036 					    trace_ctx);
1037 	if (!event) {
1038 		size = 0;
1039 		goto out;
1040 	}
1041 
1042 	entry = ring_buffer_event_data(event);
1043 	entry->ip = ip;
1044 
1045 	memcpy(&entry->buf, str, size);
1046 
1047 	/* Add a newline if necessary */
1048 	if (entry->buf[size - 1] != '\n') {
1049 		entry->buf[size] = '\n';
1050 		entry->buf[size + 1] = '\0';
1051 	} else
1052 		entry->buf[size] = '\0';
1053 
1054 	__buffer_unlock_commit(buffer, event);
1055 	ftrace_trace_stack(&global_trace, buffer, trace_ctx, 4, NULL);
1056  out:
1057 	ring_buffer_nest_end(buffer);
1058 	return size;
1059 }
1060 EXPORT_SYMBOL_GPL(__trace_puts);
1061 
1062 /**
1063  * __trace_bputs - write the pointer to a constant string into trace buffer
1064  * @ip:	   The address of the caller
1065  * @str:   The constant string to write to the buffer to
1066  */
1067 int __trace_bputs(unsigned long ip, const char *str)
1068 {
1069 	struct ring_buffer_event *event;
1070 	struct trace_buffer *buffer;
1071 	struct bputs_entry *entry;
1072 	unsigned int trace_ctx;
1073 	int size = sizeof(struct bputs_entry);
1074 	int ret = 0;
1075 
1076 	if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
1077 		return 0;
1078 
1079 	if (unlikely(tracing_selftest_running || tracing_disabled))
1080 		return 0;
1081 
1082 	trace_ctx = tracing_gen_ctx();
1083 	buffer = global_trace.array_buffer.buffer;
1084 
1085 	ring_buffer_nest_start(buffer);
1086 	event = __trace_buffer_lock_reserve(buffer, TRACE_BPUTS, size,
1087 					    trace_ctx);
1088 	if (!event)
1089 		goto out;
1090 
1091 	entry = ring_buffer_event_data(event);
1092 	entry->ip			= ip;
1093 	entry->str			= str;
1094 
1095 	__buffer_unlock_commit(buffer, event);
1096 	ftrace_trace_stack(&global_trace, buffer, trace_ctx, 4, NULL);
1097 
1098 	ret = 1;
1099  out:
1100 	ring_buffer_nest_end(buffer);
1101 	return ret;
1102 }
1103 EXPORT_SYMBOL_GPL(__trace_bputs);
1104 
1105 #ifdef CONFIG_TRACER_SNAPSHOT
1106 static void tracing_snapshot_instance_cond(struct trace_array *tr,
1107 					   void *cond_data)
1108 {
1109 	struct tracer *tracer = tr->current_trace;
1110 	unsigned long flags;
1111 
1112 	if (in_nmi()) {
1113 		internal_trace_puts("*** SNAPSHOT CALLED FROM NMI CONTEXT ***\n");
1114 		internal_trace_puts("*** snapshot is being ignored        ***\n");
1115 		return;
1116 	}
1117 
1118 	if (!tr->allocated_snapshot) {
1119 		internal_trace_puts("*** SNAPSHOT NOT ALLOCATED ***\n");
1120 		internal_trace_puts("*** stopping trace here!   ***\n");
1121 		tracing_off();
1122 		return;
1123 	}
1124 
1125 	/* Note, snapshot can not be used when the tracer uses it */
1126 	if (tracer->use_max_tr) {
1127 		internal_trace_puts("*** LATENCY TRACER ACTIVE ***\n");
1128 		internal_trace_puts("*** Can not use snapshot (sorry) ***\n");
1129 		return;
1130 	}
1131 
1132 	local_irq_save(flags);
1133 	update_max_tr(tr, current, smp_processor_id(), cond_data);
1134 	local_irq_restore(flags);
1135 }
1136 
1137 void tracing_snapshot_instance(struct trace_array *tr)
1138 {
1139 	tracing_snapshot_instance_cond(tr, NULL);
1140 }
1141 
1142 /**
1143  * tracing_snapshot - take a snapshot of the current buffer.
1144  *
1145  * This causes a swap between the snapshot buffer and the current live
1146  * tracing buffer. You can use this to take snapshots of the live
1147  * trace when some condition is triggered, but continue to trace.
1148  *
1149  * Note, make sure to allocate the snapshot with either
1150  * a tracing_snapshot_alloc(), or by doing it manually
1151  * with: echo 1 > /sys/kernel/debug/tracing/snapshot
1152  *
1153  * If the snapshot buffer is not allocated, it will stop tracing.
1154  * Basically making a permanent snapshot.
1155  */
1156 void tracing_snapshot(void)
1157 {
1158 	struct trace_array *tr = &global_trace;
1159 
1160 	tracing_snapshot_instance(tr);
1161 }
1162 EXPORT_SYMBOL_GPL(tracing_snapshot);
1163 
1164 /**
1165  * tracing_snapshot_cond - conditionally take a snapshot of the current buffer.
1166  * @tr:		The tracing instance to snapshot
1167  * @cond_data:	The data to be tested conditionally, and possibly saved
1168  *
1169  * This is the same as tracing_snapshot() except that the snapshot is
1170  * conditional - the snapshot will only happen if the
1171  * cond_snapshot.update() implementation receiving the cond_data
1172  * returns true, which means that the trace array's cond_snapshot
1173  * update() operation used the cond_data to determine whether the
1174  * snapshot should be taken, and if it was, presumably saved it along
1175  * with the snapshot.
1176  */
1177 void tracing_snapshot_cond(struct trace_array *tr, void *cond_data)
1178 {
1179 	tracing_snapshot_instance_cond(tr, cond_data);
1180 }
1181 EXPORT_SYMBOL_GPL(tracing_snapshot_cond);
1182 
1183 /**
1184  * tracing_snapshot_cond_data - get the user data associated with a snapshot
1185  * @tr:		The tracing instance
1186  *
1187  * When the user enables a conditional snapshot using
1188  * tracing_snapshot_cond_enable(), the user-defined cond_data is saved
1189  * with the snapshot.  This accessor is used to retrieve it.
1190  *
1191  * Should not be called from cond_snapshot.update(), since it takes
1192  * the tr->max_lock lock, which the code calling
1193  * cond_snapshot.update() has already done.
1194  *
1195  * Returns the cond_data associated with the trace array's snapshot.
1196  */
1197 void *tracing_cond_snapshot_data(struct trace_array *tr)
1198 {
1199 	void *cond_data = NULL;
1200 
1201 	arch_spin_lock(&tr->max_lock);
1202 
1203 	if (tr->cond_snapshot)
1204 		cond_data = tr->cond_snapshot->cond_data;
1205 
1206 	arch_spin_unlock(&tr->max_lock);
1207 
1208 	return cond_data;
1209 }
1210 EXPORT_SYMBOL_GPL(tracing_cond_snapshot_data);
1211 
1212 static int resize_buffer_duplicate_size(struct array_buffer *trace_buf,
1213 					struct array_buffer *size_buf, int cpu_id);
1214 static void set_buffer_entries(struct array_buffer *buf, unsigned long val);
1215 
1216 int tracing_alloc_snapshot_instance(struct trace_array *tr)
1217 {
1218 	int ret;
1219 
1220 	if (!tr->allocated_snapshot) {
1221 
1222 		/* allocate spare buffer */
1223 		ret = resize_buffer_duplicate_size(&tr->max_buffer,
1224 				   &tr->array_buffer, RING_BUFFER_ALL_CPUS);
1225 		if (ret < 0)
1226 			return ret;
1227 
1228 		tr->allocated_snapshot = true;
1229 	}
1230 
1231 	return 0;
1232 }
1233 
1234 static void free_snapshot(struct trace_array *tr)
1235 {
1236 	/*
1237 	 * We don't free the ring buffer. instead, resize it because
1238 	 * The max_tr ring buffer has some state (e.g. ring->clock) and
1239 	 * we want preserve it.
1240 	 */
1241 	ring_buffer_resize(tr->max_buffer.buffer, 1, RING_BUFFER_ALL_CPUS);
1242 	set_buffer_entries(&tr->max_buffer, 1);
1243 	tracing_reset_online_cpus(&tr->max_buffer);
1244 	tr->allocated_snapshot = false;
1245 }
1246 
1247 /**
1248  * tracing_alloc_snapshot - allocate snapshot buffer.
1249  *
1250  * This only allocates the snapshot buffer if it isn't already
1251  * allocated - it doesn't also take a snapshot.
1252  *
1253  * This is meant to be used in cases where the snapshot buffer needs
1254  * to be set up for events that can't sleep but need to be able to
1255  * trigger a snapshot.
1256  */
1257 int tracing_alloc_snapshot(void)
1258 {
1259 	struct trace_array *tr = &global_trace;
1260 	int ret;
1261 
1262 	ret = tracing_alloc_snapshot_instance(tr);
1263 	WARN_ON(ret < 0);
1264 
1265 	return ret;
1266 }
1267 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
1268 
1269 /**
1270  * tracing_snapshot_alloc - allocate and take a snapshot of the current buffer.
1271  *
1272  * This is similar to tracing_snapshot(), but it will allocate the
1273  * snapshot buffer if it isn't already allocated. Use this only
1274  * where it is safe to sleep, as the allocation may sleep.
1275  *
1276  * This causes a swap between the snapshot buffer and the current live
1277  * tracing buffer. You can use this to take snapshots of the live
1278  * trace when some condition is triggered, but continue to trace.
1279  */
1280 void tracing_snapshot_alloc(void)
1281 {
1282 	int ret;
1283 
1284 	ret = tracing_alloc_snapshot();
1285 	if (ret < 0)
1286 		return;
1287 
1288 	tracing_snapshot();
1289 }
1290 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
1291 
1292 /**
1293  * tracing_snapshot_cond_enable - enable conditional snapshot for an instance
1294  * @tr:		The tracing instance
1295  * @cond_data:	User data to associate with the snapshot
1296  * @update:	Implementation of the cond_snapshot update function
1297  *
1298  * Check whether the conditional snapshot for the given instance has
1299  * already been enabled, or if the current tracer is already using a
1300  * snapshot; if so, return -EBUSY, else create a cond_snapshot and
1301  * save the cond_data and update function inside.
1302  *
1303  * Returns 0 if successful, error otherwise.
1304  */
1305 int tracing_snapshot_cond_enable(struct trace_array *tr, void *cond_data,
1306 				 cond_update_fn_t update)
1307 {
1308 	struct cond_snapshot *cond_snapshot;
1309 	int ret = 0;
1310 
1311 	cond_snapshot = kzalloc(sizeof(*cond_snapshot), GFP_KERNEL);
1312 	if (!cond_snapshot)
1313 		return -ENOMEM;
1314 
1315 	cond_snapshot->cond_data = cond_data;
1316 	cond_snapshot->update = update;
1317 
1318 	mutex_lock(&trace_types_lock);
1319 
1320 	ret = tracing_alloc_snapshot_instance(tr);
1321 	if (ret)
1322 		goto fail_unlock;
1323 
1324 	if (tr->current_trace->use_max_tr) {
1325 		ret = -EBUSY;
1326 		goto fail_unlock;
1327 	}
1328 
1329 	/*
1330 	 * The cond_snapshot can only change to NULL without the
1331 	 * trace_types_lock. We don't care if we race with it going
1332 	 * to NULL, but we want to make sure that it's not set to
1333 	 * something other than NULL when we get here, which we can
1334 	 * do safely with only holding the trace_types_lock and not
1335 	 * having to take the max_lock.
1336 	 */
1337 	if (tr->cond_snapshot) {
1338 		ret = -EBUSY;
1339 		goto fail_unlock;
1340 	}
1341 
1342 	arch_spin_lock(&tr->max_lock);
1343 	tr->cond_snapshot = cond_snapshot;
1344 	arch_spin_unlock(&tr->max_lock);
1345 
1346 	mutex_unlock(&trace_types_lock);
1347 
1348 	return ret;
1349 
1350  fail_unlock:
1351 	mutex_unlock(&trace_types_lock);
1352 	kfree(cond_snapshot);
1353 	return ret;
1354 }
1355 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_enable);
1356 
1357 /**
1358  * tracing_snapshot_cond_disable - disable conditional snapshot for an instance
1359  * @tr:		The tracing instance
1360  *
1361  * Check whether the conditional snapshot for the given instance is
1362  * enabled; if so, free the cond_snapshot associated with it,
1363  * otherwise return -EINVAL.
1364  *
1365  * Returns 0 if successful, error otherwise.
1366  */
1367 int tracing_snapshot_cond_disable(struct trace_array *tr)
1368 {
1369 	int ret = 0;
1370 
1371 	arch_spin_lock(&tr->max_lock);
1372 
1373 	if (!tr->cond_snapshot)
1374 		ret = -EINVAL;
1375 	else {
1376 		kfree(tr->cond_snapshot);
1377 		tr->cond_snapshot = NULL;
1378 	}
1379 
1380 	arch_spin_unlock(&tr->max_lock);
1381 
1382 	return ret;
1383 }
1384 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_disable);
1385 #else
1386 void tracing_snapshot(void)
1387 {
1388 	WARN_ONCE(1, "Snapshot feature not enabled, but internal snapshot used");
1389 }
1390 EXPORT_SYMBOL_GPL(tracing_snapshot);
1391 void tracing_snapshot_cond(struct trace_array *tr, void *cond_data)
1392 {
1393 	WARN_ONCE(1, "Snapshot feature not enabled, but internal conditional snapshot used");
1394 }
1395 EXPORT_SYMBOL_GPL(tracing_snapshot_cond);
1396 int tracing_alloc_snapshot(void)
1397 {
1398 	WARN_ONCE(1, "Snapshot feature not enabled, but snapshot allocation used");
1399 	return -ENODEV;
1400 }
1401 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
1402 void tracing_snapshot_alloc(void)
1403 {
1404 	/* Give warning */
1405 	tracing_snapshot();
1406 }
1407 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
1408 void *tracing_cond_snapshot_data(struct trace_array *tr)
1409 {
1410 	return NULL;
1411 }
1412 EXPORT_SYMBOL_GPL(tracing_cond_snapshot_data);
1413 int tracing_snapshot_cond_enable(struct trace_array *tr, void *cond_data, cond_update_fn_t update)
1414 {
1415 	return -ENODEV;
1416 }
1417 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_enable);
1418 int tracing_snapshot_cond_disable(struct trace_array *tr)
1419 {
1420 	return false;
1421 }
1422 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_disable);
1423 #endif /* CONFIG_TRACER_SNAPSHOT */
1424 
1425 void tracer_tracing_off(struct trace_array *tr)
1426 {
1427 	if (tr->array_buffer.buffer)
1428 		ring_buffer_record_off(tr->array_buffer.buffer);
1429 	/*
1430 	 * This flag is looked at when buffers haven't been allocated
1431 	 * yet, or by some tracers (like irqsoff), that just want to
1432 	 * know if the ring buffer has been disabled, but it can handle
1433 	 * races of where it gets disabled but we still do a record.
1434 	 * As the check is in the fast path of the tracers, it is more
1435 	 * important to be fast than accurate.
1436 	 */
1437 	tr->buffer_disabled = 1;
1438 	/* Make the flag seen by readers */
1439 	smp_wmb();
1440 }
1441 
1442 /**
1443  * tracing_off - turn off tracing buffers
1444  *
1445  * This function stops the tracing buffers from recording data.
1446  * It does not disable any overhead the tracers themselves may
1447  * be causing. This function simply causes all recording to
1448  * the ring buffers to fail.
1449  */
1450 void tracing_off(void)
1451 {
1452 	tracer_tracing_off(&global_trace);
1453 }
1454 EXPORT_SYMBOL_GPL(tracing_off);
1455 
1456 void disable_trace_on_warning(void)
1457 {
1458 	if (__disable_trace_on_warning) {
1459 		trace_array_printk_buf(global_trace.array_buffer.buffer, _THIS_IP_,
1460 			"Disabling tracing due to warning\n");
1461 		tracing_off();
1462 	}
1463 }
1464 
1465 /**
1466  * tracer_tracing_is_on - show real state of ring buffer enabled
1467  * @tr : the trace array to know if ring buffer is enabled
1468  *
1469  * Shows real state of the ring buffer if it is enabled or not.
1470  */
1471 bool tracer_tracing_is_on(struct trace_array *tr)
1472 {
1473 	if (tr->array_buffer.buffer)
1474 		return ring_buffer_record_is_on(tr->array_buffer.buffer);
1475 	return !tr->buffer_disabled;
1476 }
1477 
1478 /**
1479  * tracing_is_on - show state of ring buffers enabled
1480  */
1481 int tracing_is_on(void)
1482 {
1483 	return tracer_tracing_is_on(&global_trace);
1484 }
1485 EXPORT_SYMBOL_GPL(tracing_is_on);
1486 
1487 static int __init set_buf_size(char *str)
1488 {
1489 	unsigned long buf_size;
1490 
1491 	if (!str)
1492 		return 0;
1493 	buf_size = memparse(str, &str);
1494 	/* nr_entries can not be zero */
1495 	if (buf_size == 0)
1496 		return 0;
1497 	trace_buf_size = buf_size;
1498 	return 1;
1499 }
1500 __setup("trace_buf_size=", set_buf_size);
1501 
1502 static int __init set_tracing_thresh(char *str)
1503 {
1504 	unsigned long threshold;
1505 	int ret;
1506 
1507 	if (!str)
1508 		return 0;
1509 	ret = kstrtoul(str, 0, &threshold);
1510 	if (ret < 0)
1511 		return 0;
1512 	tracing_thresh = threshold * 1000;
1513 	return 1;
1514 }
1515 __setup("tracing_thresh=", set_tracing_thresh);
1516 
1517 unsigned long nsecs_to_usecs(unsigned long nsecs)
1518 {
1519 	return nsecs / 1000;
1520 }
1521 
1522 /*
1523  * TRACE_FLAGS is defined as a tuple matching bit masks with strings.
1524  * It uses C(a, b) where 'a' is the eval (enum) name and 'b' is the string that
1525  * matches it. By defining "C(a, b) b", TRACE_FLAGS becomes a list
1526  * of strings in the order that the evals (enum) were defined.
1527  */
1528 #undef C
1529 #define C(a, b) b
1530 
1531 /* These must match the bit positions in trace_iterator_flags */
1532 static const char *trace_options[] = {
1533 	TRACE_FLAGS
1534 	NULL
1535 };
1536 
1537 static struct {
1538 	u64 (*func)(void);
1539 	const char *name;
1540 	int in_ns;		/* is this clock in nanoseconds? */
1541 } trace_clocks[] = {
1542 	{ trace_clock_local,		"local",	1 },
1543 	{ trace_clock_global,		"global",	1 },
1544 	{ trace_clock_counter,		"counter",	0 },
1545 	{ trace_clock_jiffies,		"uptime",	0 },
1546 	{ trace_clock,			"perf",		1 },
1547 	{ ktime_get_mono_fast_ns,	"mono",		1 },
1548 	{ ktime_get_raw_fast_ns,	"mono_raw",	1 },
1549 	{ ktime_get_boot_fast_ns,	"boot",		1 },
1550 	ARCH_TRACE_CLOCKS
1551 };
1552 
1553 bool trace_clock_in_ns(struct trace_array *tr)
1554 {
1555 	if (trace_clocks[tr->clock_id].in_ns)
1556 		return true;
1557 
1558 	return false;
1559 }
1560 
1561 /*
1562  * trace_parser_get_init - gets the buffer for trace parser
1563  */
1564 int trace_parser_get_init(struct trace_parser *parser, int size)
1565 {
1566 	memset(parser, 0, sizeof(*parser));
1567 
1568 	parser->buffer = kmalloc(size, GFP_KERNEL);
1569 	if (!parser->buffer)
1570 		return 1;
1571 
1572 	parser->size = size;
1573 	return 0;
1574 }
1575 
1576 /*
1577  * trace_parser_put - frees the buffer for trace parser
1578  */
1579 void trace_parser_put(struct trace_parser *parser)
1580 {
1581 	kfree(parser->buffer);
1582 	parser->buffer = NULL;
1583 }
1584 
1585 /*
1586  * trace_get_user - reads the user input string separated by  space
1587  * (matched by isspace(ch))
1588  *
1589  * For each string found the 'struct trace_parser' is updated,
1590  * and the function returns.
1591  *
1592  * Returns number of bytes read.
1593  *
1594  * See kernel/trace/trace.h for 'struct trace_parser' details.
1595  */
1596 int trace_get_user(struct trace_parser *parser, const char __user *ubuf,
1597 	size_t cnt, loff_t *ppos)
1598 {
1599 	char ch;
1600 	size_t read = 0;
1601 	ssize_t ret;
1602 
1603 	if (!*ppos)
1604 		trace_parser_clear(parser);
1605 
1606 	ret = get_user(ch, ubuf++);
1607 	if (ret)
1608 		goto out;
1609 
1610 	read++;
1611 	cnt--;
1612 
1613 	/*
1614 	 * The parser is not finished with the last write,
1615 	 * continue reading the user input without skipping spaces.
1616 	 */
1617 	if (!parser->cont) {
1618 		/* skip white space */
1619 		while (cnt && isspace(ch)) {
1620 			ret = get_user(ch, ubuf++);
1621 			if (ret)
1622 				goto out;
1623 			read++;
1624 			cnt--;
1625 		}
1626 
1627 		parser->idx = 0;
1628 
1629 		/* only spaces were written */
1630 		if (isspace(ch) || !ch) {
1631 			*ppos += read;
1632 			ret = read;
1633 			goto out;
1634 		}
1635 	}
1636 
1637 	/* read the non-space input */
1638 	while (cnt && !isspace(ch) && ch) {
1639 		if (parser->idx < parser->size - 1)
1640 			parser->buffer[parser->idx++] = ch;
1641 		else {
1642 			ret = -EINVAL;
1643 			goto out;
1644 		}
1645 		ret = get_user(ch, ubuf++);
1646 		if (ret)
1647 			goto out;
1648 		read++;
1649 		cnt--;
1650 	}
1651 
1652 	/* We either got finished input or we have to wait for another call. */
1653 	if (isspace(ch) || !ch) {
1654 		parser->buffer[parser->idx] = 0;
1655 		parser->cont = false;
1656 	} else if (parser->idx < parser->size - 1) {
1657 		parser->cont = true;
1658 		parser->buffer[parser->idx++] = ch;
1659 		/* Make sure the parsed string always terminates with '\0'. */
1660 		parser->buffer[parser->idx] = 0;
1661 	} else {
1662 		ret = -EINVAL;
1663 		goto out;
1664 	}
1665 
1666 	*ppos += read;
1667 	ret = read;
1668 
1669 out:
1670 	return ret;
1671 }
1672 
1673 /* TODO add a seq_buf_to_buffer() */
1674 static ssize_t trace_seq_to_buffer(struct trace_seq *s, void *buf, size_t cnt)
1675 {
1676 	int len;
1677 
1678 	if (trace_seq_used(s) <= s->seq.readpos)
1679 		return -EBUSY;
1680 
1681 	len = trace_seq_used(s) - s->seq.readpos;
1682 	if (cnt > len)
1683 		cnt = len;
1684 	memcpy(buf, s->buffer + s->seq.readpos, cnt);
1685 
1686 	s->seq.readpos += cnt;
1687 	return cnt;
1688 }
1689 
1690 unsigned long __read_mostly	tracing_thresh;
1691 static const struct file_operations tracing_max_lat_fops;
1692 
1693 #ifdef LATENCY_FS_NOTIFY
1694 
1695 static struct workqueue_struct *fsnotify_wq;
1696 
1697 static void latency_fsnotify_workfn(struct work_struct *work)
1698 {
1699 	struct trace_array *tr = container_of(work, struct trace_array,
1700 					      fsnotify_work);
1701 	fsnotify_inode(tr->d_max_latency->d_inode, FS_MODIFY);
1702 }
1703 
1704 static void latency_fsnotify_workfn_irq(struct irq_work *iwork)
1705 {
1706 	struct trace_array *tr = container_of(iwork, struct trace_array,
1707 					      fsnotify_irqwork);
1708 	queue_work(fsnotify_wq, &tr->fsnotify_work);
1709 }
1710 
1711 static void trace_create_maxlat_file(struct trace_array *tr,
1712 				     struct dentry *d_tracer)
1713 {
1714 	INIT_WORK(&tr->fsnotify_work, latency_fsnotify_workfn);
1715 	init_irq_work(&tr->fsnotify_irqwork, latency_fsnotify_workfn_irq);
1716 	tr->d_max_latency = trace_create_file("tracing_max_latency", 0644,
1717 					      d_tracer, &tr->max_latency,
1718 					      &tracing_max_lat_fops);
1719 }
1720 
1721 __init static int latency_fsnotify_init(void)
1722 {
1723 	fsnotify_wq = alloc_workqueue("tr_max_lat_wq",
1724 				      WQ_UNBOUND | WQ_HIGHPRI, 0);
1725 	if (!fsnotify_wq) {
1726 		pr_err("Unable to allocate tr_max_lat_wq\n");
1727 		return -ENOMEM;
1728 	}
1729 	return 0;
1730 }
1731 
1732 late_initcall_sync(latency_fsnotify_init);
1733 
1734 void latency_fsnotify(struct trace_array *tr)
1735 {
1736 	if (!fsnotify_wq)
1737 		return;
1738 	/*
1739 	 * We cannot call queue_work(&tr->fsnotify_work) from here because it's
1740 	 * possible that we are called from __schedule() or do_idle(), which
1741 	 * could cause a deadlock.
1742 	 */
1743 	irq_work_queue(&tr->fsnotify_irqwork);
1744 }
1745 
1746 /*
1747  * (defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)) && \
1748  *  defined(CONFIG_FSNOTIFY)
1749  */
1750 #else
1751 
1752 #define trace_create_maxlat_file(tr, d_tracer)				\
1753 	trace_create_file("tracing_max_latency", 0644, d_tracer,	\
1754 			  &tr->max_latency, &tracing_max_lat_fops)
1755 
1756 #endif
1757 
1758 #ifdef CONFIG_TRACER_MAX_TRACE
1759 /*
1760  * Copy the new maximum trace into the separate maximum-trace
1761  * structure. (this way the maximum trace is permanently saved,
1762  * for later retrieval via /sys/kernel/tracing/tracing_max_latency)
1763  */
1764 static void
1765 __update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
1766 {
1767 	struct array_buffer *trace_buf = &tr->array_buffer;
1768 	struct array_buffer *max_buf = &tr->max_buffer;
1769 	struct trace_array_cpu *data = per_cpu_ptr(trace_buf->data, cpu);
1770 	struct trace_array_cpu *max_data = per_cpu_ptr(max_buf->data, cpu);
1771 
1772 	max_buf->cpu = cpu;
1773 	max_buf->time_start = data->preempt_timestamp;
1774 
1775 	max_data->saved_latency = tr->max_latency;
1776 	max_data->critical_start = data->critical_start;
1777 	max_data->critical_end = data->critical_end;
1778 
1779 	strncpy(max_data->comm, tsk->comm, TASK_COMM_LEN);
1780 	max_data->pid = tsk->pid;
1781 	/*
1782 	 * If tsk == current, then use current_uid(), as that does not use
1783 	 * RCU. The irq tracer can be called out of RCU scope.
1784 	 */
1785 	if (tsk == current)
1786 		max_data->uid = current_uid();
1787 	else
1788 		max_data->uid = task_uid(tsk);
1789 
1790 	max_data->nice = tsk->static_prio - 20 - MAX_RT_PRIO;
1791 	max_data->policy = tsk->policy;
1792 	max_data->rt_priority = tsk->rt_priority;
1793 
1794 	/* record this tasks comm */
1795 	tracing_record_cmdline(tsk);
1796 	latency_fsnotify(tr);
1797 }
1798 
1799 /**
1800  * update_max_tr - snapshot all trace buffers from global_trace to max_tr
1801  * @tr: tracer
1802  * @tsk: the task with the latency
1803  * @cpu: The cpu that initiated the trace.
1804  * @cond_data: User data associated with a conditional snapshot
1805  *
1806  * Flip the buffers between the @tr and the max_tr and record information
1807  * about which task was the cause of this latency.
1808  */
1809 void
1810 update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu,
1811 	      void *cond_data)
1812 {
1813 	if (tr->stop_count)
1814 		return;
1815 
1816 	WARN_ON_ONCE(!irqs_disabled());
1817 
1818 	if (!tr->allocated_snapshot) {
1819 		/* Only the nop tracer should hit this when disabling */
1820 		WARN_ON_ONCE(tr->current_trace != &nop_trace);
1821 		return;
1822 	}
1823 
1824 	arch_spin_lock(&tr->max_lock);
1825 
1826 	/* Inherit the recordable setting from array_buffer */
1827 	if (ring_buffer_record_is_set_on(tr->array_buffer.buffer))
1828 		ring_buffer_record_on(tr->max_buffer.buffer);
1829 	else
1830 		ring_buffer_record_off(tr->max_buffer.buffer);
1831 
1832 #ifdef CONFIG_TRACER_SNAPSHOT
1833 	if (tr->cond_snapshot && !tr->cond_snapshot->update(tr, cond_data))
1834 		goto out_unlock;
1835 #endif
1836 	swap(tr->array_buffer.buffer, tr->max_buffer.buffer);
1837 
1838 	__update_max_tr(tr, tsk, cpu);
1839 
1840  out_unlock:
1841 	arch_spin_unlock(&tr->max_lock);
1842 }
1843 
1844 /**
1845  * update_max_tr_single - only copy one trace over, and reset the rest
1846  * @tr: tracer
1847  * @tsk: task with the latency
1848  * @cpu: the cpu of the buffer to copy.
1849  *
1850  * Flip the trace of a single CPU buffer between the @tr and the max_tr.
1851  */
1852 void
1853 update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu)
1854 {
1855 	int ret;
1856 
1857 	if (tr->stop_count)
1858 		return;
1859 
1860 	WARN_ON_ONCE(!irqs_disabled());
1861 	if (!tr->allocated_snapshot) {
1862 		/* Only the nop tracer should hit this when disabling */
1863 		WARN_ON_ONCE(tr->current_trace != &nop_trace);
1864 		return;
1865 	}
1866 
1867 	arch_spin_lock(&tr->max_lock);
1868 
1869 	ret = ring_buffer_swap_cpu(tr->max_buffer.buffer, tr->array_buffer.buffer, cpu);
1870 
1871 	if (ret == -EBUSY) {
1872 		/*
1873 		 * We failed to swap the buffer due to a commit taking
1874 		 * place on this CPU. We fail to record, but we reset
1875 		 * the max trace buffer (no one writes directly to it)
1876 		 * and flag that it failed.
1877 		 */
1878 		trace_array_printk_buf(tr->max_buffer.buffer, _THIS_IP_,
1879 			"Failed to swap buffers due to commit in progress\n");
1880 	}
1881 
1882 	WARN_ON_ONCE(ret && ret != -EAGAIN && ret != -EBUSY);
1883 
1884 	__update_max_tr(tr, tsk, cpu);
1885 	arch_spin_unlock(&tr->max_lock);
1886 }
1887 #endif /* CONFIG_TRACER_MAX_TRACE */
1888 
1889 static int wait_on_pipe(struct trace_iterator *iter, int full)
1890 {
1891 	/* Iterators are static, they should be filled or empty */
1892 	if (trace_buffer_iter(iter, iter->cpu_file))
1893 		return 0;
1894 
1895 	return ring_buffer_wait(iter->array_buffer->buffer, iter->cpu_file,
1896 				full);
1897 }
1898 
1899 #ifdef CONFIG_FTRACE_STARTUP_TEST
1900 static bool selftests_can_run;
1901 
1902 struct trace_selftests {
1903 	struct list_head		list;
1904 	struct tracer			*type;
1905 };
1906 
1907 static LIST_HEAD(postponed_selftests);
1908 
1909 static int save_selftest(struct tracer *type)
1910 {
1911 	struct trace_selftests *selftest;
1912 
1913 	selftest = kmalloc(sizeof(*selftest), GFP_KERNEL);
1914 	if (!selftest)
1915 		return -ENOMEM;
1916 
1917 	selftest->type = type;
1918 	list_add(&selftest->list, &postponed_selftests);
1919 	return 0;
1920 }
1921 
1922 static int run_tracer_selftest(struct tracer *type)
1923 {
1924 	struct trace_array *tr = &global_trace;
1925 	struct tracer *saved_tracer = tr->current_trace;
1926 	int ret;
1927 
1928 	if (!type->selftest || tracing_selftest_disabled)
1929 		return 0;
1930 
1931 	/*
1932 	 * If a tracer registers early in boot up (before scheduling is
1933 	 * initialized and such), then do not run its selftests yet.
1934 	 * Instead, run it a little later in the boot process.
1935 	 */
1936 	if (!selftests_can_run)
1937 		return save_selftest(type);
1938 
1939 	if (!tracing_is_on()) {
1940 		pr_warn("Selftest for tracer %s skipped due to tracing disabled\n",
1941 			type->name);
1942 		return 0;
1943 	}
1944 
1945 	/*
1946 	 * Run a selftest on this tracer.
1947 	 * Here we reset the trace buffer, and set the current
1948 	 * tracer to be this tracer. The tracer can then run some
1949 	 * internal tracing to verify that everything is in order.
1950 	 * If we fail, we do not register this tracer.
1951 	 */
1952 	tracing_reset_online_cpus(&tr->array_buffer);
1953 
1954 	tr->current_trace = type;
1955 
1956 #ifdef CONFIG_TRACER_MAX_TRACE
1957 	if (type->use_max_tr) {
1958 		/* If we expanded the buffers, make sure the max is expanded too */
1959 		if (ring_buffer_expanded)
1960 			ring_buffer_resize(tr->max_buffer.buffer, trace_buf_size,
1961 					   RING_BUFFER_ALL_CPUS);
1962 		tr->allocated_snapshot = true;
1963 	}
1964 #endif
1965 
1966 	/* the test is responsible for initializing and enabling */
1967 	pr_info("Testing tracer %s: ", type->name);
1968 	ret = type->selftest(type, tr);
1969 	/* the test is responsible for resetting too */
1970 	tr->current_trace = saved_tracer;
1971 	if (ret) {
1972 		printk(KERN_CONT "FAILED!\n");
1973 		/* Add the warning after printing 'FAILED' */
1974 		WARN_ON(1);
1975 		return -1;
1976 	}
1977 	/* Only reset on passing, to avoid touching corrupted buffers */
1978 	tracing_reset_online_cpus(&tr->array_buffer);
1979 
1980 #ifdef CONFIG_TRACER_MAX_TRACE
1981 	if (type->use_max_tr) {
1982 		tr->allocated_snapshot = false;
1983 
1984 		/* Shrink the max buffer again */
1985 		if (ring_buffer_expanded)
1986 			ring_buffer_resize(tr->max_buffer.buffer, 1,
1987 					   RING_BUFFER_ALL_CPUS);
1988 	}
1989 #endif
1990 
1991 	printk(KERN_CONT "PASSED\n");
1992 	return 0;
1993 }
1994 
1995 static __init int init_trace_selftests(void)
1996 {
1997 	struct trace_selftests *p, *n;
1998 	struct tracer *t, **last;
1999 	int ret;
2000 
2001 	selftests_can_run = true;
2002 
2003 	mutex_lock(&trace_types_lock);
2004 
2005 	if (list_empty(&postponed_selftests))
2006 		goto out;
2007 
2008 	pr_info("Running postponed tracer tests:\n");
2009 
2010 	tracing_selftest_running = true;
2011 	list_for_each_entry_safe(p, n, &postponed_selftests, list) {
2012 		/* This loop can take minutes when sanitizers are enabled, so
2013 		 * lets make sure we allow RCU processing.
2014 		 */
2015 		cond_resched();
2016 		ret = run_tracer_selftest(p->type);
2017 		/* If the test fails, then warn and remove from available_tracers */
2018 		if (ret < 0) {
2019 			WARN(1, "tracer: %s failed selftest, disabling\n",
2020 			     p->type->name);
2021 			last = &trace_types;
2022 			for (t = trace_types; t; t = t->next) {
2023 				if (t == p->type) {
2024 					*last = t->next;
2025 					break;
2026 				}
2027 				last = &t->next;
2028 			}
2029 		}
2030 		list_del(&p->list);
2031 		kfree(p);
2032 	}
2033 	tracing_selftest_running = false;
2034 
2035  out:
2036 	mutex_unlock(&trace_types_lock);
2037 
2038 	return 0;
2039 }
2040 core_initcall(init_trace_selftests);
2041 #else
2042 static inline int run_tracer_selftest(struct tracer *type)
2043 {
2044 	return 0;
2045 }
2046 #endif /* CONFIG_FTRACE_STARTUP_TEST */
2047 
2048 static void add_tracer_options(struct trace_array *tr, struct tracer *t);
2049 
2050 static void __init apply_trace_boot_options(void);
2051 
2052 /**
2053  * register_tracer - register a tracer with the ftrace system.
2054  * @type: the plugin for the tracer
2055  *
2056  * Register a new plugin tracer.
2057  */
2058 int __init register_tracer(struct tracer *type)
2059 {
2060 	struct tracer *t;
2061 	int ret = 0;
2062 
2063 	if (!type->name) {
2064 		pr_info("Tracer must have a name\n");
2065 		return -1;
2066 	}
2067 
2068 	if (strlen(type->name) >= MAX_TRACER_SIZE) {
2069 		pr_info("Tracer has a name longer than %d\n", MAX_TRACER_SIZE);
2070 		return -1;
2071 	}
2072 
2073 	if (security_locked_down(LOCKDOWN_TRACEFS)) {
2074 		pr_warn("Can not register tracer %s due to lockdown\n",
2075 			   type->name);
2076 		return -EPERM;
2077 	}
2078 
2079 	mutex_lock(&trace_types_lock);
2080 
2081 	tracing_selftest_running = true;
2082 
2083 	for (t = trace_types; t; t = t->next) {
2084 		if (strcmp(type->name, t->name) == 0) {
2085 			/* already found */
2086 			pr_info("Tracer %s already registered\n",
2087 				type->name);
2088 			ret = -1;
2089 			goto out;
2090 		}
2091 	}
2092 
2093 	if (!type->set_flag)
2094 		type->set_flag = &dummy_set_flag;
2095 	if (!type->flags) {
2096 		/*allocate a dummy tracer_flags*/
2097 		type->flags = kmalloc(sizeof(*type->flags), GFP_KERNEL);
2098 		if (!type->flags) {
2099 			ret = -ENOMEM;
2100 			goto out;
2101 		}
2102 		type->flags->val = 0;
2103 		type->flags->opts = dummy_tracer_opt;
2104 	} else
2105 		if (!type->flags->opts)
2106 			type->flags->opts = dummy_tracer_opt;
2107 
2108 	/* store the tracer for __set_tracer_option */
2109 	type->flags->trace = type;
2110 
2111 	ret = run_tracer_selftest(type);
2112 	if (ret < 0)
2113 		goto out;
2114 
2115 	type->next = trace_types;
2116 	trace_types = type;
2117 	add_tracer_options(&global_trace, type);
2118 
2119  out:
2120 	tracing_selftest_running = false;
2121 	mutex_unlock(&trace_types_lock);
2122 
2123 	if (ret || !default_bootup_tracer)
2124 		goto out_unlock;
2125 
2126 	if (strncmp(default_bootup_tracer, type->name, MAX_TRACER_SIZE))
2127 		goto out_unlock;
2128 
2129 	printk(KERN_INFO "Starting tracer '%s'\n", type->name);
2130 	/* Do we want this tracer to start on bootup? */
2131 	tracing_set_tracer(&global_trace, type->name);
2132 	default_bootup_tracer = NULL;
2133 
2134 	apply_trace_boot_options();
2135 
2136 	/* disable other selftests, since this will break it. */
2137 	disable_tracing_selftest("running a tracer");
2138 
2139  out_unlock:
2140 	return ret;
2141 }
2142 
2143 static void tracing_reset_cpu(struct array_buffer *buf, int cpu)
2144 {
2145 	struct trace_buffer *buffer = buf->buffer;
2146 
2147 	if (!buffer)
2148 		return;
2149 
2150 	ring_buffer_record_disable(buffer);
2151 
2152 	/* Make sure all commits have finished */
2153 	synchronize_rcu();
2154 	ring_buffer_reset_cpu(buffer, cpu);
2155 
2156 	ring_buffer_record_enable(buffer);
2157 }
2158 
2159 void tracing_reset_online_cpus(struct array_buffer *buf)
2160 {
2161 	struct trace_buffer *buffer = buf->buffer;
2162 
2163 	if (!buffer)
2164 		return;
2165 
2166 	ring_buffer_record_disable(buffer);
2167 
2168 	/* Make sure all commits have finished */
2169 	synchronize_rcu();
2170 
2171 	buf->time_start = buffer_ftrace_now(buf, buf->cpu);
2172 
2173 	ring_buffer_reset_online_cpus(buffer);
2174 
2175 	ring_buffer_record_enable(buffer);
2176 }
2177 
2178 /* Must have trace_types_lock held */
2179 void tracing_reset_all_online_cpus(void)
2180 {
2181 	struct trace_array *tr;
2182 
2183 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
2184 		if (!tr->clear_trace)
2185 			continue;
2186 		tr->clear_trace = false;
2187 		tracing_reset_online_cpus(&tr->array_buffer);
2188 #ifdef CONFIG_TRACER_MAX_TRACE
2189 		tracing_reset_online_cpus(&tr->max_buffer);
2190 #endif
2191 	}
2192 }
2193 
2194 /*
2195  * The tgid_map array maps from pid to tgid; i.e. the value stored at index i
2196  * is the tgid last observed corresponding to pid=i.
2197  */
2198 static int *tgid_map;
2199 
2200 /* The maximum valid index into tgid_map. */
2201 static size_t tgid_map_max;
2202 
2203 #define SAVED_CMDLINES_DEFAULT 128
2204 #define NO_CMDLINE_MAP UINT_MAX
2205 static arch_spinlock_t trace_cmdline_lock = __ARCH_SPIN_LOCK_UNLOCKED;
2206 struct saved_cmdlines_buffer {
2207 	unsigned map_pid_to_cmdline[PID_MAX_DEFAULT+1];
2208 	unsigned *map_cmdline_to_pid;
2209 	unsigned cmdline_num;
2210 	int cmdline_idx;
2211 	char *saved_cmdlines;
2212 };
2213 static struct saved_cmdlines_buffer *savedcmd;
2214 
2215 /* temporary disable recording */
2216 static atomic_t trace_record_taskinfo_disabled __read_mostly;
2217 
2218 static inline char *get_saved_cmdlines(int idx)
2219 {
2220 	return &savedcmd->saved_cmdlines[idx * TASK_COMM_LEN];
2221 }
2222 
2223 static inline void set_cmdline(int idx, const char *cmdline)
2224 {
2225 	strncpy(get_saved_cmdlines(idx), cmdline, TASK_COMM_LEN);
2226 }
2227 
2228 static int allocate_cmdlines_buffer(unsigned int val,
2229 				    struct saved_cmdlines_buffer *s)
2230 {
2231 	s->map_cmdline_to_pid = kmalloc_array(val,
2232 					      sizeof(*s->map_cmdline_to_pid),
2233 					      GFP_KERNEL);
2234 	if (!s->map_cmdline_to_pid)
2235 		return -ENOMEM;
2236 
2237 	s->saved_cmdlines = kmalloc_array(TASK_COMM_LEN, val, GFP_KERNEL);
2238 	if (!s->saved_cmdlines) {
2239 		kfree(s->map_cmdline_to_pid);
2240 		return -ENOMEM;
2241 	}
2242 
2243 	s->cmdline_idx = 0;
2244 	s->cmdline_num = val;
2245 	memset(&s->map_pid_to_cmdline, NO_CMDLINE_MAP,
2246 	       sizeof(s->map_pid_to_cmdline));
2247 	memset(s->map_cmdline_to_pid, NO_CMDLINE_MAP,
2248 	       val * sizeof(*s->map_cmdline_to_pid));
2249 
2250 	return 0;
2251 }
2252 
2253 static int trace_create_savedcmd(void)
2254 {
2255 	int ret;
2256 
2257 	savedcmd = kmalloc(sizeof(*savedcmd), GFP_KERNEL);
2258 	if (!savedcmd)
2259 		return -ENOMEM;
2260 
2261 	ret = allocate_cmdlines_buffer(SAVED_CMDLINES_DEFAULT, savedcmd);
2262 	if (ret < 0) {
2263 		kfree(savedcmd);
2264 		savedcmd = NULL;
2265 		return -ENOMEM;
2266 	}
2267 
2268 	return 0;
2269 }
2270 
2271 int is_tracing_stopped(void)
2272 {
2273 	return global_trace.stop_count;
2274 }
2275 
2276 /**
2277  * tracing_start - quick start of the tracer
2278  *
2279  * If tracing is enabled but was stopped by tracing_stop,
2280  * this will start the tracer back up.
2281  */
2282 void tracing_start(void)
2283 {
2284 	struct trace_buffer *buffer;
2285 	unsigned long flags;
2286 
2287 	if (tracing_disabled)
2288 		return;
2289 
2290 	raw_spin_lock_irqsave(&global_trace.start_lock, flags);
2291 	if (--global_trace.stop_count) {
2292 		if (global_trace.stop_count < 0) {
2293 			/* Someone screwed up their debugging */
2294 			WARN_ON_ONCE(1);
2295 			global_trace.stop_count = 0;
2296 		}
2297 		goto out;
2298 	}
2299 
2300 	/* Prevent the buffers from switching */
2301 	arch_spin_lock(&global_trace.max_lock);
2302 
2303 	buffer = global_trace.array_buffer.buffer;
2304 	if (buffer)
2305 		ring_buffer_record_enable(buffer);
2306 
2307 #ifdef CONFIG_TRACER_MAX_TRACE
2308 	buffer = global_trace.max_buffer.buffer;
2309 	if (buffer)
2310 		ring_buffer_record_enable(buffer);
2311 #endif
2312 
2313 	arch_spin_unlock(&global_trace.max_lock);
2314 
2315  out:
2316 	raw_spin_unlock_irqrestore(&global_trace.start_lock, flags);
2317 }
2318 
2319 static void tracing_start_tr(struct trace_array *tr)
2320 {
2321 	struct trace_buffer *buffer;
2322 	unsigned long flags;
2323 
2324 	if (tracing_disabled)
2325 		return;
2326 
2327 	/* If global, we need to also start the max tracer */
2328 	if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
2329 		return tracing_start();
2330 
2331 	raw_spin_lock_irqsave(&tr->start_lock, flags);
2332 
2333 	if (--tr->stop_count) {
2334 		if (tr->stop_count < 0) {
2335 			/* Someone screwed up their debugging */
2336 			WARN_ON_ONCE(1);
2337 			tr->stop_count = 0;
2338 		}
2339 		goto out;
2340 	}
2341 
2342 	buffer = tr->array_buffer.buffer;
2343 	if (buffer)
2344 		ring_buffer_record_enable(buffer);
2345 
2346  out:
2347 	raw_spin_unlock_irqrestore(&tr->start_lock, flags);
2348 }
2349 
2350 /**
2351  * tracing_stop - quick stop of the tracer
2352  *
2353  * Light weight way to stop tracing. Use in conjunction with
2354  * tracing_start.
2355  */
2356 void tracing_stop(void)
2357 {
2358 	struct trace_buffer *buffer;
2359 	unsigned long flags;
2360 
2361 	raw_spin_lock_irqsave(&global_trace.start_lock, flags);
2362 	if (global_trace.stop_count++)
2363 		goto out;
2364 
2365 	/* Prevent the buffers from switching */
2366 	arch_spin_lock(&global_trace.max_lock);
2367 
2368 	buffer = global_trace.array_buffer.buffer;
2369 	if (buffer)
2370 		ring_buffer_record_disable(buffer);
2371 
2372 #ifdef CONFIG_TRACER_MAX_TRACE
2373 	buffer = global_trace.max_buffer.buffer;
2374 	if (buffer)
2375 		ring_buffer_record_disable(buffer);
2376 #endif
2377 
2378 	arch_spin_unlock(&global_trace.max_lock);
2379 
2380  out:
2381 	raw_spin_unlock_irqrestore(&global_trace.start_lock, flags);
2382 }
2383 
2384 static void tracing_stop_tr(struct trace_array *tr)
2385 {
2386 	struct trace_buffer *buffer;
2387 	unsigned long flags;
2388 
2389 	/* If global, we need to also stop the max tracer */
2390 	if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
2391 		return tracing_stop();
2392 
2393 	raw_spin_lock_irqsave(&tr->start_lock, flags);
2394 	if (tr->stop_count++)
2395 		goto out;
2396 
2397 	buffer = tr->array_buffer.buffer;
2398 	if (buffer)
2399 		ring_buffer_record_disable(buffer);
2400 
2401  out:
2402 	raw_spin_unlock_irqrestore(&tr->start_lock, flags);
2403 }
2404 
2405 static int trace_save_cmdline(struct task_struct *tsk)
2406 {
2407 	unsigned tpid, idx;
2408 
2409 	/* treat recording of idle task as a success */
2410 	if (!tsk->pid)
2411 		return 1;
2412 
2413 	tpid = tsk->pid & (PID_MAX_DEFAULT - 1);
2414 
2415 	/*
2416 	 * It's not the end of the world if we don't get
2417 	 * the lock, but we also don't want to spin
2418 	 * nor do we want to disable interrupts,
2419 	 * so if we miss here, then better luck next time.
2420 	 */
2421 	if (!arch_spin_trylock(&trace_cmdline_lock))
2422 		return 0;
2423 
2424 	idx = savedcmd->map_pid_to_cmdline[tpid];
2425 	if (idx == NO_CMDLINE_MAP) {
2426 		idx = (savedcmd->cmdline_idx + 1) % savedcmd->cmdline_num;
2427 
2428 		savedcmd->map_pid_to_cmdline[tpid] = idx;
2429 		savedcmd->cmdline_idx = idx;
2430 	}
2431 
2432 	savedcmd->map_cmdline_to_pid[idx] = tsk->pid;
2433 	set_cmdline(idx, tsk->comm);
2434 
2435 	arch_spin_unlock(&trace_cmdline_lock);
2436 
2437 	return 1;
2438 }
2439 
2440 static void __trace_find_cmdline(int pid, char comm[])
2441 {
2442 	unsigned map;
2443 	int tpid;
2444 
2445 	if (!pid) {
2446 		strcpy(comm, "<idle>");
2447 		return;
2448 	}
2449 
2450 	if (WARN_ON_ONCE(pid < 0)) {
2451 		strcpy(comm, "<XXX>");
2452 		return;
2453 	}
2454 
2455 	tpid = pid & (PID_MAX_DEFAULT - 1);
2456 	map = savedcmd->map_pid_to_cmdline[tpid];
2457 	if (map != NO_CMDLINE_MAP) {
2458 		tpid = savedcmd->map_cmdline_to_pid[map];
2459 		if (tpid == pid) {
2460 			strlcpy(comm, get_saved_cmdlines(map), TASK_COMM_LEN);
2461 			return;
2462 		}
2463 	}
2464 	strcpy(comm, "<...>");
2465 }
2466 
2467 void trace_find_cmdline(int pid, char comm[])
2468 {
2469 	preempt_disable();
2470 	arch_spin_lock(&trace_cmdline_lock);
2471 
2472 	__trace_find_cmdline(pid, comm);
2473 
2474 	arch_spin_unlock(&trace_cmdline_lock);
2475 	preempt_enable();
2476 }
2477 
2478 static int *trace_find_tgid_ptr(int pid)
2479 {
2480 	/*
2481 	 * Pairs with the smp_store_release in set_tracer_flag() to ensure that
2482 	 * if we observe a non-NULL tgid_map then we also observe the correct
2483 	 * tgid_map_max.
2484 	 */
2485 	int *map = smp_load_acquire(&tgid_map);
2486 
2487 	if (unlikely(!map || pid > tgid_map_max))
2488 		return NULL;
2489 
2490 	return &map[pid];
2491 }
2492 
2493 int trace_find_tgid(int pid)
2494 {
2495 	int *ptr = trace_find_tgid_ptr(pid);
2496 
2497 	return ptr ? *ptr : 0;
2498 }
2499 
2500 static int trace_save_tgid(struct task_struct *tsk)
2501 {
2502 	int *ptr;
2503 
2504 	/* treat recording of idle task as a success */
2505 	if (!tsk->pid)
2506 		return 1;
2507 
2508 	ptr = trace_find_tgid_ptr(tsk->pid);
2509 	if (!ptr)
2510 		return 0;
2511 
2512 	*ptr = tsk->tgid;
2513 	return 1;
2514 }
2515 
2516 static bool tracing_record_taskinfo_skip(int flags)
2517 {
2518 	if (unlikely(!(flags & (TRACE_RECORD_CMDLINE | TRACE_RECORD_TGID))))
2519 		return true;
2520 	if (atomic_read(&trace_record_taskinfo_disabled) || !tracing_is_on())
2521 		return true;
2522 	if (!__this_cpu_read(trace_taskinfo_save))
2523 		return true;
2524 	return false;
2525 }
2526 
2527 /**
2528  * tracing_record_taskinfo - record the task info of a task
2529  *
2530  * @task:  task to record
2531  * @flags: TRACE_RECORD_CMDLINE for recording comm
2532  *         TRACE_RECORD_TGID for recording tgid
2533  */
2534 void tracing_record_taskinfo(struct task_struct *task, int flags)
2535 {
2536 	bool done;
2537 
2538 	if (tracing_record_taskinfo_skip(flags))
2539 		return;
2540 
2541 	/*
2542 	 * Record as much task information as possible. If some fail, continue
2543 	 * to try to record the others.
2544 	 */
2545 	done = !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(task);
2546 	done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(task);
2547 
2548 	/* If recording any information failed, retry again soon. */
2549 	if (!done)
2550 		return;
2551 
2552 	__this_cpu_write(trace_taskinfo_save, false);
2553 }
2554 
2555 /**
2556  * tracing_record_taskinfo_sched_switch - record task info for sched_switch
2557  *
2558  * @prev: previous task during sched_switch
2559  * @next: next task during sched_switch
2560  * @flags: TRACE_RECORD_CMDLINE for recording comm
2561  *         TRACE_RECORD_TGID for recording tgid
2562  */
2563 void tracing_record_taskinfo_sched_switch(struct task_struct *prev,
2564 					  struct task_struct *next, int flags)
2565 {
2566 	bool done;
2567 
2568 	if (tracing_record_taskinfo_skip(flags))
2569 		return;
2570 
2571 	/*
2572 	 * Record as much task information as possible. If some fail, continue
2573 	 * to try to record the others.
2574 	 */
2575 	done  = !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(prev);
2576 	done &= !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(next);
2577 	done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(prev);
2578 	done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(next);
2579 
2580 	/* If recording any information failed, retry again soon. */
2581 	if (!done)
2582 		return;
2583 
2584 	__this_cpu_write(trace_taskinfo_save, false);
2585 }
2586 
2587 /* Helpers to record a specific task information */
2588 void tracing_record_cmdline(struct task_struct *task)
2589 {
2590 	tracing_record_taskinfo(task, TRACE_RECORD_CMDLINE);
2591 }
2592 
2593 void tracing_record_tgid(struct task_struct *task)
2594 {
2595 	tracing_record_taskinfo(task, TRACE_RECORD_TGID);
2596 }
2597 
2598 /*
2599  * Several functions return TRACE_TYPE_PARTIAL_LINE if the trace_seq
2600  * overflowed, and TRACE_TYPE_HANDLED otherwise. This helper function
2601  * simplifies those functions and keeps them in sync.
2602  */
2603 enum print_line_t trace_handle_return(struct trace_seq *s)
2604 {
2605 	return trace_seq_has_overflowed(s) ?
2606 		TRACE_TYPE_PARTIAL_LINE : TRACE_TYPE_HANDLED;
2607 }
2608 EXPORT_SYMBOL_GPL(trace_handle_return);
2609 
2610 unsigned int tracing_gen_ctx_irq_test(unsigned int irqs_status)
2611 {
2612 	unsigned int trace_flags = irqs_status;
2613 	unsigned int pc;
2614 
2615 	pc = preempt_count();
2616 
2617 	if (pc & NMI_MASK)
2618 		trace_flags |= TRACE_FLAG_NMI;
2619 	if (pc & HARDIRQ_MASK)
2620 		trace_flags |= TRACE_FLAG_HARDIRQ;
2621 	if (in_serving_softirq())
2622 		trace_flags |= TRACE_FLAG_SOFTIRQ;
2623 
2624 	if (tif_need_resched())
2625 		trace_flags |= TRACE_FLAG_NEED_RESCHED;
2626 	if (test_preempt_need_resched())
2627 		trace_flags |= TRACE_FLAG_PREEMPT_RESCHED;
2628 	return (trace_flags << 16) | (pc & 0xff);
2629 }
2630 
2631 struct ring_buffer_event *
2632 trace_buffer_lock_reserve(struct trace_buffer *buffer,
2633 			  int type,
2634 			  unsigned long len,
2635 			  unsigned int trace_ctx)
2636 {
2637 	return __trace_buffer_lock_reserve(buffer, type, len, trace_ctx);
2638 }
2639 
2640 DEFINE_PER_CPU(struct ring_buffer_event *, trace_buffered_event);
2641 DEFINE_PER_CPU(int, trace_buffered_event_cnt);
2642 static int trace_buffered_event_ref;
2643 
2644 /**
2645  * trace_buffered_event_enable - enable buffering events
2646  *
2647  * When events are being filtered, it is quicker to use a temporary
2648  * buffer to write the event data into if there's a likely chance
2649  * that it will not be committed. The discard of the ring buffer
2650  * is not as fast as committing, and is much slower than copying
2651  * a commit.
2652  *
2653  * When an event is to be filtered, allocate per cpu buffers to
2654  * write the event data into, and if the event is filtered and discarded
2655  * it is simply dropped, otherwise, the entire data is to be committed
2656  * in one shot.
2657  */
2658 void trace_buffered_event_enable(void)
2659 {
2660 	struct ring_buffer_event *event;
2661 	struct page *page;
2662 	int cpu;
2663 
2664 	WARN_ON_ONCE(!mutex_is_locked(&event_mutex));
2665 
2666 	if (trace_buffered_event_ref++)
2667 		return;
2668 
2669 	for_each_tracing_cpu(cpu) {
2670 		page = alloc_pages_node(cpu_to_node(cpu),
2671 					GFP_KERNEL | __GFP_NORETRY, 0);
2672 		if (!page)
2673 			goto failed;
2674 
2675 		event = page_address(page);
2676 		memset(event, 0, sizeof(*event));
2677 
2678 		per_cpu(trace_buffered_event, cpu) = event;
2679 
2680 		preempt_disable();
2681 		if (cpu == smp_processor_id() &&
2682 		    __this_cpu_read(trace_buffered_event) !=
2683 		    per_cpu(trace_buffered_event, cpu))
2684 			WARN_ON_ONCE(1);
2685 		preempt_enable();
2686 	}
2687 
2688 	return;
2689  failed:
2690 	trace_buffered_event_disable();
2691 }
2692 
2693 static void enable_trace_buffered_event(void *data)
2694 {
2695 	/* Probably not needed, but do it anyway */
2696 	smp_rmb();
2697 	this_cpu_dec(trace_buffered_event_cnt);
2698 }
2699 
2700 static void disable_trace_buffered_event(void *data)
2701 {
2702 	this_cpu_inc(trace_buffered_event_cnt);
2703 }
2704 
2705 /**
2706  * trace_buffered_event_disable - disable buffering events
2707  *
2708  * When a filter is removed, it is faster to not use the buffered
2709  * events, and to commit directly into the ring buffer. Free up
2710  * the temp buffers when there are no more users. This requires
2711  * special synchronization with current events.
2712  */
2713 void trace_buffered_event_disable(void)
2714 {
2715 	int cpu;
2716 
2717 	WARN_ON_ONCE(!mutex_is_locked(&event_mutex));
2718 
2719 	if (WARN_ON_ONCE(!trace_buffered_event_ref))
2720 		return;
2721 
2722 	if (--trace_buffered_event_ref)
2723 		return;
2724 
2725 	preempt_disable();
2726 	/* For each CPU, set the buffer as used. */
2727 	smp_call_function_many(tracing_buffer_mask,
2728 			       disable_trace_buffered_event, NULL, 1);
2729 	preempt_enable();
2730 
2731 	/* Wait for all current users to finish */
2732 	synchronize_rcu();
2733 
2734 	for_each_tracing_cpu(cpu) {
2735 		free_page((unsigned long)per_cpu(trace_buffered_event, cpu));
2736 		per_cpu(trace_buffered_event, cpu) = NULL;
2737 	}
2738 	/*
2739 	 * Make sure trace_buffered_event is NULL before clearing
2740 	 * trace_buffered_event_cnt.
2741 	 */
2742 	smp_wmb();
2743 
2744 	preempt_disable();
2745 	/* Do the work on each cpu */
2746 	smp_call_function_many(tracing_buffer_mask,
2747 			       enable_trace_buffered_event, NULL, 1);
2748 	preempt_enable();
2749 }
2750 
2751 static struct trace_buffer *temp_buffer;
2752 
2753 struct ring_buffer_event *
2754 trace_event_buffer_lock_reserve(struct trace_buffer **current_rb,
2755 			  struct trace_event_file *trace_file,
2756 			  int type, unsigned long len,
2757 			  unsigned int trace_ctx)
2758 {
2759 	struct ring_buffer_event *entry;
2760 	struct trace_array *tr = trace_file->tr;
2761 	int val;
2762 
2763 	*current_rb = tr->array_buffer.buffer;
2764 
2765 	if (!tr->no_filter_buffering_ref &&
2766 	    (trace_file->flags & (EVENT_FILE_FL_SOFT_DISABLED | EVENT_FILE_FL_FILTERED)) &&
2767 	    (entry = this_cpu_read(trace_buffered_event))) {
2768 		/*
2769 		 * Filtering is on, so try to use the per cpu buffer first.
2770 		 * This buffer will simulate a ring_buffer_event,
2771 		 * where the type_len is zero and the array[0] will
2772 		 * hold the full length.
2773 		 * (see include/linux/ring-buffer.h for details on
2774 		 *  how the ring_buffer_event is structured).
2775 		 *
2776 		 * Using a temp buffer during filtering and copying it
2777 		 * on a matched filter is quicker than writing directly
2778 		 * into the ring buffer and then discarding it when
2779 		 * it doesn't match. That is because the discard
2780 		 * requires several atomic operations to get right.
2781 		 * Copying on match and doing nothing on a failed match
2782 		 * is still quicker than no copy on match, but having
2783 		 * to discard out of the ring buffer on a failed match.
2784 		 */
2785 		int max_len = PAGE_SIZE - struct_size(entry, array, 1);
2786 
2787 		val = this_cpu_inc_return(trace_buffered_event_cnt);
2788 
2789 		/*
2790 		 * Preemption is disabled, but interrupts and NMIs
2791 		 * can still come in now. If that happens after
2792 		 * the above increment, then it will have to go
2793 		 * back to the old method of allocating the event
2794 		 * on the ring buffer, and if the filter fails, it
2795 		 * will have to call ring_buffer_discard_commit()
2796 		 * to remove it.
2797 		 *
2798 		 * Need to also check the unlikely case that the
2799 		 * length is bigger than the temp buffer size.
2800 		 * If that happens, then the reserve is pretty much
2801 		 * guaranteed to fail, as the ring buffer currently
2802 		 * only allows events less than a page. But that may
2803 		 * change in the future, so let the ring buffer reserve
2804 		 * handle the failure in that case.
2805 		 */
2806 		if (val == 1 && likely(len <= max_len)) {
2807 			trace_event_setup(entry, type, trace_ctx);
2808 			entry->array[0] = len;
2809 			return entry;
2810 		}
2811 		this_cpu_dec(trace_buffered_event_cnt);
2812 	}
2813 
2814 	entry = __trace_buffer_lock_reserve(*current_rb, type, len,
2815 					    trace_ctx);
2816 	/*
2817 	 * If tracing is off, but we have triggers enabled
2818 	 * we still need to look at the event data. Use the temp_buffer
2819 	 * to store the trace event for the trigger to use. It's recursive
2820 	 * safe and will not be recorded anywhere.
2821 	 */
2822 	if (!entry && trace_file->flags & EVENT_FILE_FL_TRIGGER_COND) {
2823 		*current_rb = temp_buffer;
2824 		entry = __trace_buffer_lock_reserve(*current_rb, type, len,
2825 						    trace_ctx);
2826 	}
2827 	return entry;
2828 }
2829 EXPORT_SYMBOL_GPL(trace_event_buffer_lock_reserve);
2830 
2831 static DEFINE_SPINLOCK(tracepoint_iter_lock);
2832 static DEFINE_MUTEX(tracepoint_printk_mutex);
2833 
2834 static void output_printk(struct trace_event_buffer *fbuffer)
2835 {
2836 	struct trace_event_call *event_call;
2837 	struct trace_event_file *file;
2838 	struct trace_event *event;
2839 	unsigned long flags;
2840 	struct trace_iterator *iter = tracepoint_print_iter;
2841 
2842 	/* We should never get here if iter is NULL */
2843 	if (WARN_ON_ONCE(!iter))
2844 		return;
2845 
2846 	event_call = fbuffer->trace_file->event_call;
2847 	if (!event_call || !event_call->event.funcs ||
2848 	    !event_call->event.funcs->trace)
2849 		return;
2850 
2851 	file = fbuffer->trace_file;
2852 	if (test_bit(EVENT_FILE_FL_SOFT_DISABLED_BIT, &file->flags) ||
2853 	    (unlikely(file->flags & EVENT_FILE_FL_FILTERED) &&
2854 	     !filter_match_preds(file->filter, fbuffer->entry)))
2855 		return;
2856 
2857 	event = &fbuffer->trace_file->event_call->event;
2858 
2859 	spin_lock_irqsave(&tracepoint_iter_lock, flags);
2860 	trace_seq_init(&iter->seq);
2861 	iter->ent = fbuffer->entry;
2862 	event_call->event.funcs->trace(iter, 0, event);
2863 	trace_seq_putc(&iter->seq, 0);
2864 	printk("%s", iter->seq.buffer);
2865 
2866 	spin_unlock_irqrestore(&tracepoint_iter_lock, flags);
2867 }
2868 
2869 int tracepoint_printk_sysctl(struct ctl_table *table, int write,
2870 			     void *buffer, size_t *lenp,
2871 			     loff_t *ppos)
2872 {
2873 	int save_tracepoint_printk;
2874 	int ret;
2875 
2876 	mutex_lock(&tracepoint_printk_mutex);
2877 	save_tracepoint_printk = tracepoint_printk;
2878 
2879 	ret = proc_dointvec(table, write, buffer, lenp, ppos);
2880 
2881 	/*
2882 	 * This will force exiting early, as tracepoint_printk
2883 	 * is always zero when tracepoint_printk_iter is not allocated
2884 	 */
2885 	if (!tracepoint_print_iter)
2886 		tracepoint_printk = 0;
2887 
2888 	if (save_tracepoint_printk == tracepoint_printk)
2889 		goto out;
2890 
2891 	if (tracepoint_printk)
2892 		static_key_enable(&tracepoint_printk_key.key);
2893 	else
2894 		static_key_disable(&tracepoint_printk_key.key);
2895 
2896  out:
2897 	mutex_unlock(&tracepoint_printk_mutex);
2898 
2899 	return ret;
2900 }
2901 
2902 void trace_event_buffer_commit(struct trace_event_buffer *fbuffer)
2903 {
2904 	if (static_key_false(&tracepoint_printk_key.key))
2905 		output_printk(fbuffer);
2906 
2907 	if (static_branch_unlikely(&trace_event_exports_enabled))
2908 		ftrace_exports(fbuffer->event, TRACE_EXPORT_EVENT);
2909 	event_trigger_unlock_commit_regs(fbuffer->trace_file, fbuffer->buffer,
2910 				    fbuffer->event, fbuffer->entry,
2911 				    fbuffer->trace_ctx, fbuffer->regs);
2912 }
2913 EXPORT_SYMBOL_GPL(trace_event_buffer_commit);
2914 
2915 /*
2916  * Skip 3:
2917  *
2918  *   trace_buffer_unlock_commit_regs()
2919  *   trace_event_buffer_commit()
2920  *   trace_event_raw_event_xxx()
2921  */
2922 # define STACK_SKIP 3
2923 
2924 void trace_buffer_unlock_commit_regs(struct trace_array *tr,
2925 				     struct trace_buffer *buffer,
2926 				     struct ring_buffer_event *event,
2927 				     unsigned int trace_ctx,
2928 				     struct pt_regs *regs)
2929 {
2930 	__buffer_unlock_commit(buffer, event);
2931 
2932 	/*
2933 	 * If regs is not set, then skip the necessary functions.
2934 	 * Note, we can still get here via blktrace, wakeup tracer
2935 	 * and mmiotrace, but that's ok if they lose a function or
2936 	 * two. They are not that meaningful.
2937 	 */
2938 	ftrace_trace_stack(tr, buffer, trace_ctx, regs ? 0 : STACK_SKIP, regs);
2939 	ftrace_trace_userstack(tr, buffer, trace_ctx);
2940 }
2941 
2942 /*
2943  * Similar to trace_buffer_unlock_commit_regs() but do not dump stack.
2944  */
2945 void
2946 trace_buffer_unlock_commit_nostack(struct trace_buffer *buffer,
2947 				   struct ring_buffer_event *event)
2948 {
2949 	__buffer_unlock_commit(buffer, event);
2950 }
2951 
2952 void
2953 trace_function(struct trace_array *tr, unsigned long ip, unsigned long
2954 	       parent_ip, unsigned int trace_ctx)
2955 {
2956 	struct trace_event_call *call = &event_function;
2957 	struct trace_buffer *buffer = tr->array_buffer.buffer;
2958 	struct ring_buffer_event *event;
2959 	struct ftrace_entry *entry;
2960 
2961 	event = __trace_buffer_lock_reserve(buffer, TRACE_FN, sizeof(*entry),
2962 					    trace_ctx);
2963 	if (!event)
2964 		return;
2965 	entry	= ring_buffer_event_data(event);
2966 	entry->ip			= ip;
2967 	entry->parent_ip		= parent_ip;
2968 
2969 	if (!call_filter_check_discard(call, entry, buffer, event)) {
2970 		if (static_branch_unlikely(&trace_function_exports_enabled))
2971 			ftrace_exports(event, TRACE_EXPORT_FUNCTION);
2972 		__buffer_unlock_commit(buffer, event);
2973 	}
2974 }
2975 
2976 #ifdef CONFIG_STACKTRACE
2977 
2978 /* Allow 4 levels of nesting: normal, softirq, irq, NMI */
2979 #define FTRACE_KSTACK_NESTING	4
2980 
2981 #define FTRACE_KSTACK_ENTRIES	(PAGE_SIZE / FTRACE_KSTACK_NESTING)
2982 
2983 struct ftrace_stack {
2984 	unsigned long		calls[FTRACE_KSTACK_ENTRIES];
2985 };
2986 
2987 
2988 struct ftrace_stacks {
2989 	struct ftrace_stack	stacks[FTRACE_KSTACK_NESTING];
2990 };
2991 
2992 static DEFINE_PER_CPU(struct ftrace_stacks, ftrace_stacks);
2993 static DEFINE_PER_CPU(int, ftrace_stack_reserve);
2994 
2995 static void __ftrace_trace_stack(struct trace_buffer *buffer,
2996 				 unsigned int trace_ctx,
2997 				 int skip, struct pt_regs *regs)
2998 {
2999 	struct trace_event_call *call = &event_kernel_stack;
3000 	struct ring_buffer_event *event;
3001 	unsigned int size, nr_entries;
3002 	struct ftrace_stack *fstack;
3003 	struct stack_entry *entry;
3004 	int stackidx;
3005 
3006 	/*
3007 	 * Add one, for this function and the call to save_stack_trace()
3008 	 * If regs is set, then these functions will not be in the way.
3009 	 */
3010 #ifndef CONFIG_UNWINDER_ORC
3011 	if (!regs)
3012 		skip++;
3013 #endif
3014 
3015 	preempt_disable_notrace();
3016 
3017 	stackidx = __this_cpu_inc_return(ftrace_stack_reserve) - 1;
3018 
3019 	/* This should never happen. If it does, yell once and skip */
3020 	if (WARN_ON_ONCE(stackidx >= FTRACE_KSTACK_NESTING))
3021 		goto out;
3022 
3023 	/*
3024 	 * The above __this_cpu_inc_return() is 'atomic' cpu local. An
3025 	 * interrupt will either see the value pre increment or post
3026 	 * increment. If the interrupt happens pre increment it will have
3027 	 * restored the counter when it returns.  We just need a barrier to
3028 	 * keep gcc from moving things around.
3029 	 */
3030 	barrier();
3031 
3032 	fstack = this_cpu_ptr(ftrace_stacks.stacks) + stackidx;
3033 	size = ARRAY_SIZE(fstack->calls);
3034 
3035 	if (regs) {
3036 		nr_entries = stack_trace_save_regs(regs, fstack->calls,
3037 						   size, skip);
3038 	} else {
3039 		nr_entries = stack_trace_save(fstack->calls, size, skip);
3040 	}
3041 
3042 	size = nr_entries * sizeof(unsigned long);
3043 	event = __trace_buffer_lock_reserve(buffer, TRACE_STACK,
3044 				    (sizeof(*entry) - sizeof(entry->caller)) + size,
3045 				    trace_ctx);
3046 	if (!event)
3047 		goto out;
3048 	entry = ring_buffer_event_data(event);
3049 
3050 	memcpy(&entry->caller, fstack->calls, size);
3051 	entry->size = nr_entries;
3052 
3053 	if (!call_filter_check_discard(call, entry, buffer, event))
3054 		__buffer_unlock_commit(buffer, event);
3055 
3056  out:
3057 	/* Again, don't let gcc optimize things here */
3058 	barrier();
3059 	__this_cpu_dec(ftrace_stack_reserve);
3060 	preempt_enable_notrace();
3061 
3062 }
3063 
3064 static inline void ftrace_trace_stack(struct trace_array *tr,
3065 				      struct trace_buffer *buffer,
3066 				      unsigned int trace_ctx,
3067 				      int skip, struct pt_regs *regs)
3068 {
3069 	if (!(tr->trace_flags & TRACE_ITER_STACKTRACE))
3070 		return;
3071 
3072 	__ftrace_trace_stack(buffer, trace_ctx, skip, regs);
3073 }
3074 
3075 void __trace_stack(struct trace_array *tr, unsigned int trace_ctx,
3076 		   int skip)
3077 {
3078 	struct trace_buffer *buffer = tr->array_buffer.buffer;
3079 
3080 	if (rcu_is_watching()) {
3081 		__ftrace_trace_stack(buffer, trace_ctx, skip, NULL);
3082 		return;
3083 	}
3084 
3085 	/*
3086 	 * When an NMI triggers, RCU is enabled via rcu_nmi_enter(),
3087 	 * but if the above rcu_is_watching() failed, then the NMI
3088 	 * triggered someplace critical, and rcu_irq_enter() should
3089 	 * not be called from NMI.
3090 	 */
3091 	if (unlikely(in_nmi()))
3092 		return;
3093 
3094 	rcu_irq_enter_irqson();
3095 	__ftrace_trace_stack(buffer, trace_ctx, skip, NULL);
3096 	rcu_irq_exit_irqson();
3097 }
3098 
3099 /**
3100  * trace_dump_stack - record a stack back trace in the trace buffer
3101  * @skip: Number of functions to skip (helper handlers)
3102  */
3103 void trace_dump_stack(int skip)
3104 {
3105 	if (tracing_disabled || tracing_selftest_running)
3106 		return;
3107 
3108 #ifndef CONFIG_UNWINDER_ORC
3109 	/* Skip 1 to skip this function. */
3110 	skip++;
3111 #endif
3112 	__ftrace_trace_stack(global_trace.array_buffer.buffer,
3113 			     tracing_gen_ctx(), skip, NULL);
3114 }
3115 EXPORT_SYMBOL_GPL(trace_dump_stack);
3116 
3117 #ifdef CONFIG_USER_STACKTRACE_SUPPORT
3118 static DEFINE_PER_CPU(int, user_stack_count);
3119 
3120 static void
3121 ftrace_trace_userstack(struct trace_array *tr,
3122 		       struct trace_buffer *buffer, unsigned int trace_ctx)
3123 {
3124 	struct trace_event_call *call = &event_user_stack;
3125 	struct ring_buffer_event *event;
3126 	struct userstack_entry *entry;
3127 
3128 	if (!(tr->trace_flags & TRACE_ITER_USERSTACKTRACE))
3129 		return;
3130 
3131 	/*
3132 	 * NMIs can not handle page faults, even with fix ups.
3133 	 * The save user stack can (and often does) fault.
3134 	 */
3135 	if (unlikely(in_nmi()))
3136 		return;
3137 
3138 	/*
3139 	 * prevent recursion, since the user stack tracing may
3140 	 * trigger other kernel events.
3141 	 */
3142 	preempt_disable();
3143 	if (__this_cpu_read(user_stack_count))
3144 		goto out;
3145 
3146 	__this_cpu_inc(user_stack_count);
3147 
3148 	event = __trace_buffer_lock_reserve(buffer, TRACE_USER_STACK,
3149 					    sizeof(*entry), trace_ctx);
3150 	if (!event)
3151 		goto out_drop_count;
3152 	entry	= ring_buffer_event_data(event);
3153 
3154 	entry->tgid		= current->tgid;
3155 	memset(&entry->caller, 0, sizeof(entry->caller));
3156 
3157 	stack_trace_save_user(entry->caller, FTRACE_STACK_ENTRIES);
3158 	if (!call_filter_check_discard(call, entry, buffer, event))
3159 		__buffer_unlock_commit(buffer, event);
3160 
3161  out_drop_count:
3162 	__this_cpu_dec(user_stack_count);
3163  out:
3164 	preempt_enable();
3165 }
3166 #else /* CONFIG_USER_STACKTRACE_SUPPORT */
3167 static void ftrace_trace_userstack(struct trace_array *tr,
3168 				   struct trace_buffer *buffer,
3169 				   unsigned int trace_ctx)
3170 {
3171 }
3172 #endif /* !CONFIG_USER_STACKTRACE_SUPPORT */
3173 
3174 #endif /* CONFIG_STACKTRACE */
3175 
3176 static inline void
3177 func_repeats_set_delta_ts(struct func_repeats_entry *entry,
3178 			  unsigned long long delta)
3179 {
3180 	entry->bottom_delta_ts = delta & U32_MAX;
3181 	entry->top_delta_ts = (delta >> 32);
3182 }
3183 
3184 void trace_last_func_repeats(struct trace_array *tr,
3185 			     struct trace_func_repeats *last_info,
3186 			     unsigned int trace_ctx)
3187 {
3188 	struct trace_buffer *buffer = tr->array_buffer.buffer;
3189 	struct func_repeats_entry *entry;
3190 	struct ring_buffer_event *event;
3191 	u64 delta;
3192 
3193 	event = __trace_buffer_lock_reserve(buffer, TRACE_FUNC_REPEATS,
3194 					    sizeof(*entry), trace_ctx);
3195 	if (!event)
3196 		return;
3197 
3198 	delta = ring_buffer_event_time_stamp(buffer, event) -
3199 		last_info->ts_last_call;
3200 
3201 	entry = ring_buffer_event_data(event);
3202 	entry->ip = last_info->ip;
3203 	entry->parent_ip = last_info->parent_ip;
3204 	entry->count = last_info->count;
3205 	func_repeats_set_delta_ts(entry, delta);
3206 
3207 	__buffer_unlock_commit(buffer, event);
3208 }
3209 
3210 /* created for use with alloc_percpu */
3211 struct trace_buffer_struct {
3212 	int nesting;
3213 	char buffer[4][TRACE_BUF_SIZE];
3214 };
3215 
3216 static struct trace_buffer_struct *trace_percpu_buffer;
3217 
3218 /*
3219  * This allows for lockless recording.  If we're nested too deeply, then
3220  * this returns NULL.
3221  */
3222 static char *get_trace_buf(void)
3223 {
3224 	struct trace_buffer_struct *buffer = this_cpu_ptr(trace_percpu_buffer);
3225 
3226 	if (!buffer || buffer->nesting >= 4)
3227 		return NULL;
3228 
3229 	buffer->nesting++;
3230 
3231 	/* Interrupts must see nesting incremented before we use the buffer */
3232 	barrier();
3233 	return &buffer->buffer[buffer->nesting - 1][0];
3234 }
3235 
3236 static void put_trace_buf(void)
3237 {
3238 	/* Don't let the decrement of nesting leak before this */
3239 	barrier();
3240 	this_cpu_dec(trace_percpu_buffer->nesting);
3241 }
3242 
3243 static int alloc_percpu_trace_buffer(void)
3244 {
3245 	struct trace_buffer_struct *buffers;
3246 
3247 	if (trace_percpu_buffer)
3248 		return 0;
3249 
3250 	buffers = alloc_percpu(struct trace_buffer_struct);
3251 	if (MEM_FAIL(!buffers, "Could not allocate percpu trace_printk buffer"))
3252 		return -ENOMEM;
3253 
3254 	trace_percpu_buffer = buffers;
3255 	return 0;
3256 }
3257 
3258 static int buffers_allocated;
3259 
3260 void trace_printk_init_buffers(void)
3261 {
3262 	if (buffers_allocated)
3263 		return;
3264 
3265 	if (alloc_percpu_trace_buffer())
3266 		return;
3267 
3268 	/* trace_printk() is for debug use only. Don't use it in production. */
3269 
3270 	pr_warn("\n");
3271 	pr_warn("**********************************************************\n");
3272 	pr_warn("**   NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE   **\n");
3273 	pr_warn("**                                                      **\n");
3274 	pr_warn("** trace_printk() being used. Allocating extra memory.  **\n");
3275 	pr_warn("**                                                      **\n");
3276 	pr_warn("** This means that this is a DEBUG kernel and it is     **\n");
3277 	pr_warn("** unsafe for production use.                           **\n");
3278 	pr_warn("**                                                      **\n");
3279 	pr_warn("** If you see this message and you are not debugging    **\n");
3280 	pr_warn("** the kernel, report this immediately to your vendor!  **\n");
3281 	pr_warn("**                                                      **\n");
3282 	pr_warn("**   NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE   **\n");
3283 	pr_warn("**********************************************************\n");
3284 
3285 	/* Expand the buffers to set size */
3286 	tracing_update_buffers();
3287 
3288 	buffers_allocated = 1;
3289 
3290 	/*
3291 	 * trace_printk_init_buffers() can be called by modules.
3292 	 * If that happens, then we need to start cmdline recording
3293 	 * directly here. If the global_trace.buffer is already
3294 	 * allocated here, then this was called by module code.
3295 	 */
3296 	if (global_trace.array_buffer.buffer)
3297 		tracing_start_cmdline_record();
3298 }
3299 EXPORT_SYMBOL_GPL(trace_printk_init_buffers);
3300 
3301 void trace_printk_start_comm(void)
3302 {
3303 	/* Start tracing comms if trace printk is set */
3304 	if (!buffers_allocated)
3305 		return;
3306 	tracing_start_cmdline_record();
3307 }
3308 
3309 static void trace_printk_start_stop_comm(int enabled)
3310 {
3311 	if (!buffers_allocated)
3312 		return;
3313 
3314 	if (enabled)
3315 		tracing_start_cmdline_record();
3316 	else
3317 		tracing_stop_cmdline_record();
3318 }
3319 
3320 /**
3321  * trace_vbprintk - write binary msg to tracing buffer
3322  * @ip:    The address of the caller
3323  * @fmt:   The string format to write to the buffer
3324  * @args:  Arguments for @fmt
3325  */
3326 int trace_vbprintk(unsigned long ip, const char *fmt, va_list args)
3327 {
3328 	struct trace_event_call *call = &event_bprint;
3329 	struct ring_buffer_event *event;
3330 	struct trace_buffer *buffer;
3331 	struct trace_array *tr = &global_trace;
3332 	struct bprint_entry *entry;
3333 	unsigned int trace_ctx;
3334 	char *tbuffer;
3335 	int len = 0, size;
3336 
3337 	if (unlikely(tracing_selftest_running || tracing_disabled))
3338 		return 0;
3339 
3340 	/* Don't pollute graph traces with trace_vprintk internals */
3341 	pause_graph_tracing();
3342 
3343 	trace_ctx = tracing_gen_ctx();
3344 	preempt_disable_notrace();
3345 
3346 	tbuffer = get_trace_buf();
3347 	if (!tbuffer) {
3348 		len = 0;
3349 		goto out_nobuffer;
3350 	}
3351 
3352 	len = vbin_printf((u32 *)tbuffer, TRACE_BUF_SIZE/sizeof(int), fmt, args);
3353 
3354 	if (len > TRACE_BUF_SIZE/sizeof(int) || len < 0)
3355 		goto out_put;
3356 
3357 	size = sizeof(*entry) + sizeof(u32) * len;
3358 	buffer = tr->array_buffer.buffer;
3359 	ring_buffer_nest_start(buffer);
3360 	event = __trace_buffer_lock_reserve(buffer, TRACE_BPRINT, size,
3361 					    trace_ctx);
3362 	if (!event)
3363 		goto out;
3364 	entry = ring_buffer_event_data(event);
3365 	entry->ip			= ip;
3366 	entry->fmt			= fmt;
3367 
3368 	memcpy(entry->buf, tbuffer, sizeof(u32) * len);
3369 	if (!call_filter_check_discard(call, entry, buffer, event)) {
3370 		__buffer_unlock_commit(buffer, event);
3371 		ftrace_trace_stack(tr, buffer, trace_ctx, 6, NULL);
3372 	}
3373 
3374 out:
3375 	ring_buffer_nest_end(buffer);
3376 out_put:
3377 	put_trace_buf();
3378 
3379 out_nobuffer:
3380 	preempt_enable_notrace();
3381 	unpause_graph_tracing();
3382 
3383 	return len;
3384 }
3385 EXPORT_SYMBOL_GPL(trace_vbprintk);
3386 
3387 __printf(3, 0)
3388 static int
3389 __trace_array_vprintk(struct trace_buffer *buffer,
3390 		      unsigned long ip, const char *fmt, va_list args)
3391 {
3392 	struct trace_event_call *call = &event_print;
3393 	struct ring_buffer_event *event;
3394 	int len = 0, size;
3395 	struct print_entry *entry;
3396 	unsigned int trace_ctx;
3397 	char *tbuffer;
3398 
3399 	if (tracing_disabled || tracing_selftest_running)
3400 		return 0;
3401 
3402 	/* Don't pollute graph traces with trace_vprintk internals */
3403 	pause_graph_tracing();
3404 
3405 	trace_ctx = tracing_gen_ctx();
3406 	preempt_disable_notrace();
3407 
3408 
3409 	tbuffer = get_trace_buf();
3410 	if (!tbuffer) {
3411 		len = 0;
3412 		goto out_nobuffer;
3413 	}
3414 
3415 	len = vscnprintf(tbuffer, TRACE_BUF_SIZE, fmt, args);
3416 
3417 	size = sizeof(*entry) + len + 1;
3418 	ring_buffer_nest_start(buffer);
3419 	event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
3420 					    trace_ctx);
3421 	if (!event)
3422 		goto out;
3423 	entry = ring_buffer_event_data(event);
3424 	entry->ip = ip;
3425 
3426 	memcpy(&entry->buf, tbuffer, len + 1);
3427 	if (!call_filter_check_discard(call, entry, buffer, event)) {
3428 		__buffer_unlock_commit(buffer, event);
3429 		ftrace_trace_stack(&global_trace, buffer, trace_ctx, 6, NULL);
3430 	}
3431 
3432 out:
3433 	ring_buffer_nest_end(buffer);
3434 	put_trace_buf();
3435 
3436 out_nobuffer:
3437 	preempt_enable_notrace();
3438 	unpause_graph_tracing();
3439 
3440 	return len;
3441 }
3442 
3443 __printf(3, 0)
3444 int trace_array_vprintk(struct trace_array *tr,
3445 			unsigned long ip, const char *fmt, va_list args)
3446 {
3447 	return __trace_array_vprintk(tr->array_buffer.buffer, ip, fmt, args);
3448 }
3449 
3450 /**
3451  * trace_array_printk - Print a message to a specific instance
3452  * @tr: The instance trace_array descriptor
3453  * @ip: The instruction pointer that this is called from.
3454  * @fmt: The format to print (printf format)
3455  *
3456  * If a subsystem sets up its own instance, they have the right to
3457  * printk strings into their tracing instance buffer using this
3458  * function. Note, this function will not write into the top level
3459  * buffer (use trace_printk() for that), as writing into the top level
3460  * buffer should only have events that can be individually disabled.
3461  * trace_printk() is only used for debugging a kernel, and should not
3462  * be ever incorporated in normal use.
3463  *
3464  * trace_array_printk() can be used, as it will not add noise to the
3465  * top level tracing buffer.
3466  *
3467  * Note, trace_array_init_printk() must be called on @tr before this
3468  * can be used.
3469  */
3470 __printf(3, 0)
3471 int trace_array_printk(struct trace_array *tr,
3472 		       unsigned long ip, const char *fmt, ...)
3473 {
3474 	int ret;
3475 	va_list ap;
3476 
3477 	if (!tr)
3478 		return -ENOENT;
3479 
3480 	/* This is only allowed for created instances */
3481 	if (tr == &global_trace)
3482 		return 0;
3483 
3484 	if (!(tr->trace_flags & TRACE_ITER_PRINTK))
3485 		return 0;
3486 
3487 	va_start(ap, fmt);
3488 	ret = trace_array_vprintk(tr, ip, fmt, ap);
3489 	va_end(ap);
3490 	return ret;
3491 }
3492 EXPORT_SYMBOL_GPL(trace_array_printk);
3493 
3494 /**
3495  * trace_array_init_printk - Initialize buffers for trace_array_printk()
3496  * @tr: The trace array to initialize the buffers for
3497  *
3498  * As trace_array_printk() only writes into instances, they are OK to
3499  * have in the kernel (unlike trace_printk()). This needs to be called
3500  * before trace_array_printk() can be used on a trace_array.
3501  */
3502 int trace_array_init_printk(struct trace_array *tr)
3503 {
3504 	if (!tr)
3505 		return -ENOENT;
3506 
3507 	/* This is only allowed for created instances */
3508 	if (tr == &global_trace)
3509 		return -EINVAL;
3510 
3511 	return alloc_percpu_trace_buffer();
3512 }
3513 EXPORT_SYMBOL_GPL(trace_array_init_printk);
3514 
3515 __printf(3, 4)
3516 int trace_array_printk_buf(struct trace_buffer *buffer,
3517 			   unsigned long ip, const char *fmt, ...)
3518 {
3519 	int ret;
3520 	va_list ap;
3521 
3522 	if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
3523 		return 0;
3524 
3525 	va_start(ap, fmt);
3526 	ret = __trace_array_vprintk(buffer, ip, fmt, ap);
3527 	va_end(ap);
3528 	return ret;
3529 }
3530 
3531 __printf(2, 0)
3532 int trace_vprintk(unsigned long ip, const char *fmt, va_list args)
3533 {
3534 	return trace_array_vprintk(&global_trace, ip, fmt, args);
3535 }
3536 EXPORT_SYMBOL_GPL(trace_vprintk);
3537 
3538 static void trace_iterator_increment(struct trace_iterator *iter)
3539 {
3540 	struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, iter->cpu);
3541 
3542 	iter->idx++;
3543 	if (buf_iter)
3544 		ring_buffer_iter_advance(buf_iter);
3545 }
3546 
3547 static struct trace_entry *
3548 peek_next_entry(struct trace_iterator *iter, int cpu, u64 *ts,
3549 		unsigned long *lost_events)
3550 {
3551 	struct ring_buffer_event *event;
3552 	struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, cpu);
3553 
3554 	if (buf_iter) {
3555 		event = ring_buffer_iter_peek(buf_iter, ts);
3556 		if (lost_events)
3557 			*lost_events = ring_buffer_iter_dropped(buf_iter) ?
3558 				(unsigned long)-1 : 0;
3559 	} else {
3560 		event = ring_buffer_peek(iter->array_buffer->buffer, cpu, ts,
3561 					 lost_events);
3562 	}
3563 
3564 	if (event) {
3565 		iter->ent_size = ring_buffer_event_length(event);
3566 		return ring_buffer_event_data(event);
3567 	}
3568 	iter->ent_size = 0;
3569 	return NULL;
3570 }
3571 
3572 static struct trace_entry *
3573 __find_next_entry(struct trace_iterator *iter, int *ent_cpu,
3574 		  unsigned long *missing_events, u64 *ent_ts)
3575 {
3576 	struct trace_buffer *buffer = iter->array_buffer->buffer;
3577 	struct trace_entry *ent, *next = NULL;
3578 	unsigned long lost_events = 0, next_lost = 0;
3579 	int cpu_file = iter->cpu_file;
3580 	u64 next_ts = 0, ts;
3581 	int next_cpu = -1;
3582 	int next_size = 0;
3583 	int cpu;
3584 
3585 	/*
3586 	 * If we are in a per_cpu trace file, don't bother by iterating over
3587 	 * all cpu and peek directly.
3588 	 */
3589 	if (cpu_file > RING_BUFFER_ALL_CPUS) {
3590 		if (ring_buffer_empty_cpu(buffer, cpu_file))
3591 			return NULL;
3592 		ent = peek_next_entry(iter, cpu_file, ent_ts, missing_events);
3593 		if (ent_cpu)
3594 			*ent_cpu = cpu_file;
3595 
3596 		return ent;
3597 	}
3598 
3599 	for_each_tracing_cpu(cpu) {
3600 
3601 		if (ring_buffer_empty_cpu(buffer, cpu))
3602 			continue;
3603 
3604 		ent = peek_next_entry(iter, cpu, &ts, &lost_events);
3605 
3606 		/*
3607 		 * Pick the entry with the smallest timestamp:
3608 		 */
3609 		if (ent && (!next || ts < next_ts)) {
3610 			next = ent;
3611 			next_cpu = cpu;
3612 			next_ts = ts;
3613 			next_lost = lost_events;
3614 			next_size = iter->ent_size;
3615 		}
3616 	}
3617 
3618 	iter->ent_size = next_size;
3619 
3620 	if (ent_cpu)
3621 		*ent_cpu = next_cpu;
3622 
3623 	if (ent_ts)
3624 		*ent_ts = next_ts;
3625 
3626 	if (missing_events)
3627 		*missing_events = next_lost;
3628 
3629 	return next;
3630 }
3631 
3632 #define STATIC_FMT_BUF_SIZE	128
3633 static char static_fmt_buf[STATIC_FMT_BUF_SIZE];
3634 
3635 static char *trace_iter_expand_format(struct trace_iterator *iter)
3636 {
3637 	char *tmp;
3638 
3639 	/*
3640 	 * iter->tr is NULL when used with tp_printk, which makes
3641 	 * this get called where it is not safe to call krealloc().
3642 	 */
3643 	if (!iter->tr || iter->fmt == static_fmt_buf)
3644 		return NULL;
3645 
3646 	tmp = krealloc(iter->fmt, iter->fmt_size + STATIC_FMT_BUF_SIZE,
3647 		       GFP_KERNEL);
3648 	if (tmp) {
3649 		iter->fmt_size += STATIC_FMT_BUF_SIZE;
3650 		iter->fmt = tmp;
3651 	}
3652 
3653 	return tmp;
3654 }
3655 
3656 /* Returns true if the string is safe to dereference from an event */
3657 static bool trace_safe_str(struct trace_iterator *iter, const char *str)
3658 {
3659 	unsigned long addr = (unsigned long)str;
3660 	struct trace_event *trace_event;
3661 	struct trace_event_call *event;
3662 
3663 	/* OK if part of the event data */
3664 	if ((addr >= (unsigned long)iter->ent) &&
3665 	    (addr < (unsigned long)iter->ent + iter->ent_size))
3666 		return true;
3667 
3668 	/* OK if part of the temp seq buffer */
3669 	if ((addr >= (unsigned long)iter->tmp_seq.buffer) &&
3670 	    (addr < (unsigned long)iter->tmp_seq.buffer + PAGE_SIZE))
3671 		return true;
3672 
3673 	/* Core rodata can not be freed */
3674 	if (is_kernel_rodata(addr))
3675 		return true;
3676 
3677 	if (trace_is_tracepoint_string(str))
3678 		return true;
3679 
3680 	/*
3681 	 * Now this could be a module event, referencing core module
3682 	 * data, which is OK.
3683 	 */
3684 	if (!iter->ent)
3685 		return false;
3686 
3687 	trace_event = ftrace_find_event(iter->ent->type);
3688 	if (!trace_event)
3689 		return false;
3690 
3691 	event = container_of(trace_event, struct trace_event_call, event);
3692 	if (!event->mod)
3693 		return false;
3694 
3695 	/* Would rather have rodata, but this will suffice */
3696 	if (within_module_core(addr, event->mod))
3697 		return true;
3698 
3699 	return false;
3700 }
3701 
3702 static const char *show_buffer(struct trace_seq *s)
3703 {
3704 	struct seq_buf *seq = &s->seq;
3705 
3706 	seq_buf_terminate(seq);
3707 
3708 	return seq->buffer;
3709 }
3710 
3711 static DEFINE_STATIC_KEY_FALSE(trace_no_verify);
3712 
3713 static int test_can_verify_check(const char *fmt, ...)
3714 {
3715 	char buf[16];
3716 	va_list ap;
3717 	int ret;
3718 
3719 	/*
3720 	 * The verifier is dependent on vsnprintf() modifies the va_list
3721 	 * passed to it, where it is sent as a reference. Some architectures
3722 	 * (like x86_32) passes it by value, which means that vsnprintf()
3723 	 * does not modify the va_list passed to it, and the verifier
3724 	 * would then need to be able to understand all the values that
3725 	 * vsnprintf can use. If it is passed by value, then the verifier
3726 	 * is disabled.
3727 	 */
3728 	va_start(ap, fmt);
3729 	vsnprintf(buf, 16, "%d", ap);
3730 	ret = va_arg(ap, int);
3731 	va_end(ap);
3732 
3733 	return ret;
3734 }
3735 
3736 static void test_can_verify(void)
3737 {
3738 	if (!test_can_verify_check("%d %d", 0, 1)) {
3739 		pr_info("trace event string verifier disabled\n");
3740 		static_branch_inc(&trace_no_verify);
3741 	}
3742 }
3743 
3744 /**
3745  * trace_check_vprintf - Check dereferenced strings while writing to the seq buffer
3746  * @iter: The iterator that holds the seq buffer and the event being printed
3747  * @fmt: The format used to print the event
3748  * @ap: The va_list holding the data to print from @fmt.
3749  *
3750  * This writes the data into the @iter->seq buffer using the data from
3751  * @fmt and @ap. If the format has a %s, then the source of the string
3752  * is examined to make sure it is safe to print, otherwise it will
3753  * warn and print "[UNSAFE MEMORY]" in place of the dereferenced string
3754  * pointer.
3755  */
3756 void trace_check_vprintf(struct trace_iterator *iter, const char *fmt,
3757 			 va_list ap)
3758 {
3759 	const char *p = fmt;
3760 	const char *str;
3761 	int i, j;
3762 
3763 	if (WARN_ON_ONCE(!fmt))
3764 		return;
3765 
3766 	if (static_branch_unlikely(&trace_no_verify))
3767 		goto print;
3768 
3769 	/* Don't bother checking when doing a ftrace_dump() */
3770 	if (iter->fmt == static_fmt_buf)
3771 		goto print;
3772 
3773 	while (*p) {
3774 		bool star = false;
3775 		int len = 0;
3776 
3777 		j = 0;
3778 
3779 		/* We only care about %s and variants */
3780 		for (i = 0; p[i]; i++) {
3781 			if (i + 1 >= iter->fmt_size) {
3782 				/*
3783 				 * If we can't expand the copy buffer,
3784 				 * just print it.
3785 				 */
3786 				if (!trace_iter_expand_format(iter))
3787 					goto print;
3788 			}
3789 
3790 			if (p[i] == '\\' && p[i+1]) {
3791 				i++;
3792 				continue;
3793 			}
3794 			if (p[i] == '%') {
3795 				/* Need to test cases like %08.*s */
3796 				for (j = 1; p[i+j]; j++) {
3797 					if (isdigit(p[i+j]) ||
3798 					    p[i+j] == '.')
3799 						continue;
3800 					if (p[i+j] == '*') {
3801 						star = true;
3802 						continue;
3803 					}
3804 					break;
3805 				}
3806 				if (p[i+j] == 's')
3807 					break;
3808 				star = false;
3809 			}
3810 			j = 0;
3811 		}
3812 		/* If no %s found then just print normally */
3813 		if (!p[i])
3814 			break;
3815 
3816 		/* Copy up to the %s, and print that */
3817 		strncpy(iter->fmt, p, i);
3818 		iter->fmt[i] = '\0';
3819 		trace_seq_vprintf(&iter->seq, iter->fmt, ap);
3820 
3821 		if (star)
3822 			len = va_arg(ap, int);
3823 
3824 		/* The ap now points to the string data of the %s */
3825 		str = va_arg(ap, const char *);
3826 
3827 		/*
3828 		 * If you hit this warning, it is likely that the
3829 		 * trace event in question used %s on a string that
3830 		 * was saved at the time of the event, but may not be
3831 		 * around when the trace is read. Use __string(),
3832 		 * __assign_str() and __get_str() helpers in the TRACE_EVENT()
3833 		 * instead. See samples/trace_events/trace-events-sample.h
3834 		 * for reference.
3835 		 */
3836 		if (WARN_ONCE(!trace_safe_str(iter, str),
3837 			      "fmt: '%s' current_buffer: '%s'",
3838 			      fmt, show_buffer(&iter->seq))) {
3839 			int ret;
3840 
3841 			/* Try to safely read the string */
3842 			if (star) {
3843 				if (len + 1 > iter->fmt_size)
3844 					len = iter->fmt_size - 1;
3845 				if (len < 0)
3846 					len = 0;
3847 				ret = copy_from_kernel_nofault(iter->fmt, str, len);
3848 				iter->fmt[len] = 0;
3849 				star = false;
3850 			} else {
3851 				ret = strncpy_from_kernel_nofault(iter->fmt, str,
3852 								  iter->fmt_size);
3853 			}
3854 			if (ret < 0)
3855 				trace_seq_printf(&iter->seq, "(0x%px)", str);
3856 			else
3857 				trace_seq_printf(&iter->seq, "(0x%px:%s)",
3858 						 str, iter->fmt);
3859 			str = "[UNSAFE-MEMORY]";
3860 			strcpy(iter->fmt, "%s");
3861 		} else {
3862 			strncpy(iter->fmt, p + i, j + 1);
3863 			iter->fmt[j+1] = '\0';
3864 		}
3865 		if (star)
3866 			trace_seq_printf(&iter->seq, iter->fmt, len, str);
3867 		else
3868 			trace_seq_printf(&iter->seq, iter->fmt, str);
3869 
3870 		p += i + j + 1;
3871 	}
3872  print:
3873 	if (*p)
3874 		trace_seq_vprintf(&iter->seq, p, ap);
3875 }
3876 
3877 const char *trace_event_format(struct trace_iterator *iter, const char *fmt)
3878 {
3879 	const char *p, *new_fmt;
3880 	char *q;
3881 
3882 	if (WARN_ON_ONCE(!fmt))
3883 		return fmt;
3884 
3885 	if (!iter->tr || iter->tr->trace_flags & TRACE_ITER_HASH_PTR)
3886 		return fmt;
3887 
3888 	p = fmt;
3889 	new_fmt = q = iter->fmt;
3890 	while (*p) {
3891 		if (unlikely(q - new_fmt + 3 > iter->fmt_size)) {
3892 			if (!trace_iter_expand_format(iter))
3893 				return fmt;
3894 
3895 			q += iter->fmt - new_fmt;
3896 			new_fmt = iter->fmt;
3897 		}
3898 
3899 		*q++ = *p++;
3900 
3901 		/* Replace %p with %px */
3902 		if (p[-1] == '%') {
3903 			if (p[0] == '%') {
3904 				*q++ = *p++;
3905 			} else if (p[0] == 'p' && !isalnum(p[1])) {
3906 				*q++ = *p++;
3907 				*q++ = 'x';
3908 			}
3909 		}
3910 	}
3911 	*q = '\0';
3912 
3913 	return new_fmt;
3914 }
3915 
3916 #define STATIC_TEMP_BUF_SIZE	128
3917 static char static_temp_buf[STATIC_TEMP_BUF_SIZE] __aligned(4);
3918 
3919 /* Find the next real entry, without updating the iterator itself */
3920 struct trace_entry *trace_find_next_entry(struct trace_iterator *iter,
3921 					  int *ent_cpu, u64 *ent_ts)
3922 {
3923 	/* __find_next_entry will reset ent_size */
3924 	int ent_size = iter->ent_size;
3925 	struct trace_entry *entry;
3926 
3927 	/*
3928 	 * If called from ftrace_dump(), then the iter->temp buffer
3929 	 * will be the static_temp_buf and not created from kmalloc.
3930 	 * If the entry size is greater than the buffer, we can
3931 	 * not save it. Just return NULL in that case. This is only
3932 	 * used to add markers when two consecutive events' time
3933 	 * stamps have a large delta. See trace_print_lat_context()
3934 	 */
3935 	if (iter->temp == static_temp_buf &&
3936 	    STATIC_TEMP_BUF_SIZE < ent_size)
3937 		return NULL;
3938 
3939 	/*
3940 	 * The __find_next_entry() may call peek_next_entry(), which may
3941 	 * call ring_buffer_peek() that may make the contents of iter->ent
3942 	 * undefined. Need to copy iter->ent now.
3943 	 */
3944 	if (iter->ent && iter->ent != iter->temp) {
3945 		if ((!iter->temp || iter->temp_size < iter->ent_size) &&
3946 		    !WARN_ON_ONCE(iter->temp == static_temp_buf)) {
3947 			void *temp;
3948 			temp = kmalloc(iter->ent_size, GFP_KERNEL);
3949 			if (!temp)
3950 				return NULL;
3951 			kfree(iter->temp);
3952 			iter->temp = temp;
3953 			iter->temp_size = iter->ent_size;
3954 		}
3955 		memcpy(iter->temp, iter->ent, iter->ent_size);
3956 		iter->ent = iter->temp;
3957 	}
3958 	entry = __find_next_entry(iter, ent_cpu, NULL, ent_ts);
3959 	/* Put back the original ent_size */
3960 	iter->ent_size = ent_size;
3961 
3962 	return entry;
3963 }
3964 
3965 /* Find the next real entry, and increment the iterator to the next entry */
3966 void *trace_find_next_entry_inc(struct trace_iterator *iter)
3967 {
3968 	iter->ent = __find_next_entry(iter, &iter->cpu,
3969 				      &iter->lost_events, &iter->ts);
3970 
3971 	if (iter->ent)
3972 		trace_iterator_increment(iter);
3973 
3974 	return iter->ent ? iter : NULL;
3975 }
3976 
3977 static void trace_consume(struct trace_iterator *iter)
3978 {
3979 	ring_buffer_consume(iter->array_buffer->buffer, iter->cpu, &iter->ts,
3980 			    &iter->lost_events);
3981 }
3982 
3983 static void *s_next(struct seq_file *m, void *v, loff_t *pos)
3984 {
3985 	struct trace_iterator *iter = m->private;
3986 	int i = (int)*pos;
3987 	void *ent;
3988 
3989 	WARN_ON_ONCE(iter->leftover);
3990 
3991 	(*pos)++;
3992 
3993 	/* can't go backwards */
3994 	if (iter->idx > i)
3995 		return NULL;
3996 
3997 	if (iter->idx < 0)
3998 		ent = trace_find_next_entry_inc(iter);
3999 	else
4000 		ent = iter;
4001 
4002 	while (ent && iter->idx < i)
4003 		ent = trace_find_next_entry_inc(iter);
4004 
4005 	iter->pos = *pos;
4006 
4007 	return ent;
4008 }
4009 
4010 void tracing_iter_reset(struct trace_iterator *iter, int cpu)
4011 {
4012 	struct ring_buffer_iter *buf_iter;
4013 	unsigned long entries = 0;
4014 	u64 ts;
4015 
4016 	per_cpu_ptr(iter->array_buffer->data, cpu)->skipped_entries = 0;
4017 
4018 	buf_iter = trace_buffer_iter(iter, cpu);
4019 	if (!buf_iter)
4020 		return;
4021 
4022 	ring_buffer_iter_reset(buf_iter);
4023 
4024 	/*
4025 	 * We could have the case with the max latency tracers
4026 	 * that a reset never took place on a cpu. This is evident
4027 	 * by the timestamp being before the start of the buffer.
4028 	 */
4029 	while (ring_buffer_iter_peek(buf_iter, &ts)) {
4030 		if (ts >= iter->array_buffer->time_start)
4031 			break;
4032 		entries++;
4033 		ring_buffer_iter_advance(buf_iter);
4034 	}
4035 
4036 	per_cpu_ptr(iter->array_buffer->data, cpu)->skipped_entries = entries;
4037 }
4038 
4039 /*
4040  * The current tracer is copied to avoid a global locking
4041  * all around.
4042  */
4043 static void *s_start(struct seq_file *m, loff_t *pos)
4044 {
4045 	struct trace_iterator *iter = m->private;
4046 	struct trace_array *tr = iter->tr;
4047 	int cpu_file = iter->cpu_file;
4048 	void *p = NULL;
4049 	loff_t l = 0;
4050 	int cpu;
4051 
4052 	/*
4053 	 * copy the tracer to avoid using a global lock all around.
4054 	 * iter->trace is a copy of current_trace, the pointer to the
4055 	 * name may be used instead of a strcmp(), as iter->trace->name
4056 	 * will point to the same string as current_trace->name.
4057 	 */
4058 	mutex_lock(&trace_types_lock);
4059 	if (unlikely(tr->current_trace && iter->trace->name != tr->current_trace->name))
4060 		*iter->trace = *tr->current_trace;
4061 	mutex_unlock(&trace_types_lock);
4062 
4063 #ifdef CONFIG_TRACER_MAX_TRACE
4064 	if (iter->snapshot && iter->trace->use_max_tr)
4065 		return ERR_PTR(-EBUSY);
4066 #endif
4067 
4068 	if (!iter->snapshot)
4069 		atomic_inc(&trace_record_taskinfo_disabled);
4070 
4071 	if (*pos != iter->pos) {
4072 		iter->ent = NULL;
4073 		iter->cpu = 0;
4074 		iter->idx = -1;
4075 
4076 		if (cpu_file == RING_BUFFER_ALL_CPUS) {
4077 			for_each_tracing_cpu(cpu)
4078 				tracing_iter_reset(iter, cpu);
4079 		} else
4080 			tracing_iter_reset(iter, cpu_file);
4081 
4082 		iter->leftover = 0;
4083 		for (p = iter; p && l < *pos; p = s_next(m, p, &l))
4084 			;
4085 
4086 	} else {
4087 		/*
4088 		 * If we overflowed the seq_file before, then we want
4089 		 * to just reuse the trace_seq buffer again.
4090 		 */
4091 		if (iter->leftover)
4092 			p = iter;
4093 		else {
4094 			l = *pos - 1;
4095 			p = s_next(m, p, &l);
4096 		}
4097 	}
4098 
4099 	trace_event_read_lock();
4100 	trace_access_lock(cpu_file);
4101 	return p;
4102 }
4103 
4104 static void s_stop(struct seq_file *m, void *p)
4105 {
4106 	struct trace_iterator *iter = m->private;
4107 
4108 #ifdef CONFIG_TRACER_MAX_TRACE
4109 	if (iter->snapshot && iter->trace->use_max_tr)
4110 		return;
4111 #endif
4112 
4113 	if (!iter->snapshot)
4114 		atomic_dec(&trace_record_taskinfo_disabled);
4115 
4116 	trace_access_unlock(iter->cpu_file);
4117 	trace_event_read_unlock();
4118 }
4119 
4120 static void
4121 get_total_entries_cpu(struct array_buffer *buf, unsigned long *total,
4122 		      unsigned long *entries, int cpu)
4123 {
4124 	unsigned long count;
4125 
4126 	count = ring_buffer_entries_cpu(buf->buffer, cpu);
4127 	/*
4128 	 * If this buffer has skipped entries, then we hold all
4129 	 * entries for the trace and we need to ignore the
4130 	 * ones before the time stamp.
4131 	 */
4132 	if (per_cpu_ptr(buf->data, cpu)->skipped_entries) {
4133 		count -= per_cpu_ptr(buf->data, cpu)->skipped_entries;
4134 		/* total is the same as the entries */
4135 		*total = count;
4136 	} else
4137 		*total = count +
4138 			ring_buffer_overrun_cpu(buf->buffer, cpu);
4139 	*entries = count;
4140 }
4141 
4142 static void
4143 get_total_entries(struct array_buffer *buf,
4144 		  unsigned long *total, unsigned long *entries)
4145 {
4146 	unsigned long t, e;
4147 	int cpu;
4148 
4149 	*total = 0;
4150 	*entries = 0;
4151 
4152 	for_each_tracing_cpu(cpu) {
4153 		get_total_entries_cpu(buf, &t, &e, cpu);
4154 		*total += t;
4155 		*entries += e;
4156 	}
4157 }
4158 
4159 unsigned long trace_total_entries_cpu(struct trace_array *tr, int cpu)
4160 {
4161 	unsigned long total, entries;
4162 
4163 	if (!tr)
4164 		tr = &global_trace;
4165 
4166 	get_total_entries_cpu(&tr->array_buffer, &total, &entries, cpu);
4167 
4168 	return entries;
4169 }
4170 
4171 unsigned long trace_total_entries(struct trace_array *tr)
4172 {
4173 	unsigned long total, entries;
4174 
4175 	if (!tr)
4176 		tr = &global_trace;
4177 
4178 	get_total_entries(&tr->array_buffer, &total, &entries);
4179 
4180 	return entries;
4181 }
4182 
4183 static void print_lat_help_header(struct seq_file *m)
4184 {
4185 	seq_puts(m, "#                    _------=> CPU#            \n"
4186 		    "#                   / _-----=> irqs-off        \n"
4187 		    "#                  | / _----=> need-resched    \n"
4188 		    "#                  || / _---=> hardirq/softirq \n"
4189 		    "#                  ||| / _--=> preempt-depth   \n"
4190 		    "#                  |||| /     delay            \n"
4191 		    "#  cmd     pid     ||||| time  |   caller      \n"
4192 		    "#     \\   /        |||||  \\    |   /         \n");
4193 }
4194 
4195 static void print_event_info(struct array_buffer *buf, struct seq_file *m)
4196 {
4197 	unsigned long total;
4198 	unsigned long entries;
4199 
4200 	get_total_entries(buf, &total, &entries);
4201 	seq_printf(m, "# entries-in-buffer/entries-written: %lu/%lu   #P:%d\n",
4202 		   entries, total, num_online_cpus());
4203 	seq_puts(m, "#\n");
4204 }
4205 
4206 static void print_func_help_header(struct array_buffer *buf, struct seq_file *m,
4207 				   unsigned int flags)
4208 {
4209 	bool tgid = flags & TRACE_ITER_RECORD_TGID;
4210 
4211 	print_event_info(buf, m);
4212 
4213 	seq_printf(m, "#           TASK-PID    %s CPU#     TIMESTAMP  FUNCTION\n", tgid ? "   TGID   " : "");
4214 	seq_printf(m, "#              | |      %s   |         |         |\n",      tgid ? "     |    " : "");
4215 }
4216 
4217 static void print_func_help_header_irq(struct array_buffer *buf, struct seq_file *m,
4218 				       unsigned int flags)
4219 {
4220 	bool tgid = flags & TRACE_ITER_RECORD_TGID;
4221 	const char *space = "            ";
4222 	int prec = tgid ? 12 : 2;
4223 
4224 	print_event_info(buf, m);
4225 
4226 	seq_printf(m, "#                            %.*s  _-----=> irqs-off\n", prec, space);
4227 	seq_printf(m, "#                            %.*s / _----=> need-resched\n", prec, space);
4228 	seq_printf(m, "#                            %.*s| / _---=> hardirq/softirq\n", prec, space);
4229 	seq_printf(m, "#                            %.*s|| / _--=> preempt-depth\n", prec, space);
4230 	seq_printf(m, "#                            %.*s||| /     delay\n", prec, space);
4231 	seq_printf(m, "#           TASK-PID  %.*s CPU#  ||||   TIMESTAMP  FUNCTION\n", prec, "     TGID   ");
4232 	seq_printf(m, "#              | |    %.*s   |   ||||      |         |\n", prec, "       |    ");
4233 }
4234 
4235 void
4236 print_trace_header(struct seq_file *m, struct trace_iterator *iter)
4237 {
4238 	unsigned long sym_flags = (global_trace.trace_flags & TRACE_ITER_SYM_MASK);
4239 	struct array_buffer *buf = iter->array_buffer;
4240 	struct trace_array_cpu *data = per_cpu_ptr(buf->data, buf->cpu);
4241 	struct tracer *type = iter->trace;
4242 	unsigned long entries;
4243 	unsigned long total;
4244 	const char *name = "preemption";
4245 
4246 	name = type->name;
4247 
4248 	get_total_entries(buf, &total, &entries);
4249 
4250 	seq_printf(m, "# %s latency trace v1.1.5 on %s\n",
4251 		   name, UTS_RELEASE);
4252 	seq_puts(m, "# -----------------------------------"
4253 		 "---------------------------------\n");
4254 	seq_printf(m, "# latency: %lu us, #%lu/%lu, CPU#%d |"
4255 		   " (M:%s VP:%d, KP:%d, SP:%d HP:%d",
4256 		   nsecs_to_usecs(data->saved_latency),
4257 		   entries,
4258 		   total,
4259 		   buf->cpu,
4260 #if defined(CONFIG_PREEMPT_NONE)
4261 		   "server",
4262 #elif defined(CONFIG_PREEMPT_VOLUNTARY)
4263 		   "desktop",
4264 #elif defined(CONFIG_PREEMPT)
4265 		   "preempt",
4266 #elif defined(CONFIG_PREEMPT_RT)
4267 		   "preempt_rt",
4268 #else
4269 		   "unknown",
4270 #endif
4271 		   /* These are reserved for later use */
4272 		   0, 0, 0, 0);
4273 #ifdef CONFIG_SMP
4274 	seq_printf(m, " #P:%d)\n", num_online_cpus());
4275 #else
4276 	seq_puts(m, ")\n");
4277 #endif
4278 	seq_puts(m, "#    -----------------\n");
4279 	seq_printf(m, "#    | task: %.16s-%d "
4280 		   "(uid:%d nice:%ld policy:%ld rt_prio:%ld)\n",
4281 		   data->comm, data->pid,
4282 		   from_kuid_munged(seq_user_ns(m), data->uid), data->nice,
4283 		   data->policy, data->rt_priority);
4284 	seq_puts(m, "#    -----------------\n");
4285 
4286 	if (data->critical_start) {
4287 		seq_puts(m, "#  => started at: ");
4288 		seq_print_ip_sym(&iter->seq, data->critical_start, sym_flags);
4289 		trace_print_seq(m, &iter->seq);
4290 		seq_puts(m, "\n#  => ended at:   ");
4291 		seq_print_ip_sym(&iter->seq, data->critical_end, sym_flags);
4292 		trace_print_seq(m, &iter->seq);
4293 		seq_puts(m, "\n#\n");
4294 	}
4295 
4296 	seq_puts(m, "#\n");
4297 }
4298 
4299 static void test_cpu_buff_start(struct trace_iterator *iter)
4300 {
4301 	struct trace_seq *s = &iter->seq;
4302 	struct trace_array *tr = iter->tr;
4303 
4304 	if (!(tr->trace_flags & TRACE_ITER_ANNOTATE))
4305 		return;
4306 
4307 	if (!(iter->iter_flags & TRACE_FILE_ANNOTATE))
4308 		return;
4309 
4310 	if (cpumask_available(iter->started) &&
4311 	    cpumask_test_cpu(iter->cpu, iter->started))
4312 		return;
4313 
4314 	if (per_cpu_ptr(iter->array_buffer->data, iter->cpu)->skipped_entries)
4315 		return;
4316 
4317 	if (cpumask_available(iter->started))
4318 		cpumask_set_cpu(iter->cpu, iter->started);
4319 
4320 	/* Don't print started cpu buffer for the first entry of the trace */
4321 	if (iter->idx > 1)
4322 		trace_seq_printf(s, "##### CPU %u buffer started ####\n",
4323 				iter->cpu);
4324 }
4325 
4326 static enum print_line_t print_trace_fmt(struct trace_iterator *iter)
4327 {
4328 	struct trace_array *tr = iter->tr;
4329 	struct trace_seq *s = &iter->seq;
4330 	unsigned long sym_flags = (tr->trace_flags & TRACE_ITER_SYM_MASK);
4331 	struct trace_entry *entry;
4332 	struct trace_event *event;
4333 
4334 	entry = iter->ent;
4335 
4336 	test_cpu_buff_start(iter);
4337 
4338 	event = ftrace_find_event(entry->type);
4339 
4340 	if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
4341 		if (iter->iter_flags & TRACE_FILE_LAT_FMT)
4342 			trace_print_lat_context(iter);
4343 		else
4344 			trace_print_context(iter);
4345 	}
4346 
4347 	if (trace_seq_has_overflowed(s))
4348 		return TRACE_TYPE_PARTIAL_LINE;
4349 
4350 	if (event)
4351 		return event->funcs->trace(iter, sym_flags, event);
4352 
4353 	trace_seq_printf(s, "Unknown type %d\n", entry->type);
4354 
4355 	return trace_handle_return(s);
4356 }
4357 
4358 static enum print_line_t print_raw_fmt(struct trace_iterator *iter)
4359 {
4360 	struct trace_array *tr = iter->tr;
4361 	struct trace_seq *s = &iter->seq;
4362 	struct trace_entry *entry;
4363 	struct trace_event *event;
4364 
4365 	entry = iter->ent;
4366 
4367 	if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO)
4368 		trace_seq_printf(s, "%d %d %llu ",
4369 				 entry->pid, iter->cpu, iter->ts);
4370 
4371 	if (trace_seq_has_overflowed(s))
4372 		return TRACE_TYPE_PARTIAL_LINE;
4373 
4374 	event = ftrace_find_event(entry->type);
4375 	if (event)
4376 		return event->funcs->raw(iter, 0, event);
4377 
4378 	trace_seq_printf(s, "%d ?\n", entry->type);
4379 
4380 	return trace_handle_return(s);
4381 }
4382 
4383 static enum print_line_t print_hex_fmt(struct trace_iterator *iter)
4384 {
4385 	struct trace_array *tr = iter->tr;
4386 	struct trace_seq *s = &iter->seq;
4387 	unsigned char newline = '\n';
4388 	struct trace_entry *entry;
4389 	struct trace_event *event;
4390 
4391 	entry = iter->ent;
4392 
4393 	if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
4394 		SEQ_PUT_HEX_FIELD(s, entry->pid);
4395 		SEQ_PUT_HEX_FIELD(s, iter->cpu);
4396 		SEQ_PUT_HEX_FIELD(s, iter->ts);
4397 		if (trace_seq_has_overflowed(s))
4398 			return TRACE_TYPE_PARTIAL_LINE;
4399 	}
4400 
4401 	event = ftrace_find_event(entry->type);
4402 	if (event) {
4403 		enum print_line_t ret = event->funcs->hex(iter, 0, event);
4404 		if (ret != TRACE_TYPE_HANDLED)
4405 			return ret;
4406 	}
4407 
4408 	SEQ_PUT_FIELD(s, newline);
4409 
4410 	return trace_handle_return(s);
4411 }
4412 
4413 static enum print_line_t print_bin_fmt(struct trace_iterator *iter)
4414 {
4415 	struct trace_array *tr = iter->tr;
4416 	struct trace_seq *s = &iter->seq;
4417 	struct trace_entry *entry;
4418 	struct trace_event *event;
4419 
4420 	entry = iter->ent;
4421 
4422 	if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
4423 		SEQ_PUT_FIELD(s, entry->pid);
4424 		SEQ_PUT_FIELD(s, iter->cpu);
4425 		SEQ_PUT_FIELD(s, iter->ts);
4426 		if (trace_seq_has_overflowed(s))
4427 			return TRACE_TYPE_PARTIAL_LINE;
4428 	}
4429 
4430 	event = ftrace_find_event(entry->type);
4431 	return event ? event->funcs->binary(iter, 0, event) :
4432 		TRACE_TYPE_HANDLED;
4433 }
4434 
4435 int trace_empty(struct trace_iterator *iter)
4436 {
4437 	struct ring_buffer_iter *buf_iter;
4438 	int cpu;
4439 
4440 	/* If we are looking at one CPU buffer, only check that one */
4441 	if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
4442 		cpu = iter->cpu_file;
4443 		buf_iter = trace_buffer_iter(iter, cpu);
4444 		if (buf_iter) {
4445 			if (!ring_buffer_iter_empty(buf_iter))
4446 				return 0;
4447 		} else {
4448 			if (!ring_buffer_empty_cpu(iter->array_buffer->buffer, cpu))
4449 				return 0;
4450 		}
4451 		return 1;
4452 	}
4453 
4454 	for_each_tracing_cpu(cpu) {
4455 		buf_iter = trace_buffer_iter(iter, cpu);
4456 		if (buf_iter) {
4457 			if (!ring_buffer_iter_empty(buf_iter))
4458 				return 0;
4459 		} else {
4460 			if (!ring_buffer_empty_cpu(iter->array_buffer->buffer, cpu))
4461 				return 0;
4462 		}
4463 	}
4464 
4465 	return 1;
4466 }
4467 
4468 /*  Called with trace_event_read_lock() held. */
4469 enum print_line_t print_trace_line(struct trace_iterator *iter)
4470 {
4471 	struct trace_array *tr = iter->tr;
4472 	unsigned long trace_flags = tr->trace_flags;
4473 	enum print_line_t ret;
4474 
4475 	if (iter->lost_events) {
4476 		if (iter->lost_events == (unsigned long)-1)
4477 			trace_seq_printf(&iter->seq, "CPU:%d [LOST EVENTS]\n",
4478 					 iter->cpu);
4479 		else
4480 			trace_seq_printf(&iter->seq, "CPU:%d [LOST %lu EVENTS]\n",
4481 					 iter->cpu, iter->lost_events);
4482 		if (trace_seq_has_overflowed(&iter->seq))
4483 			return TRACE_TYPE_PARTIAL_LINE;
4484 	}
4485 
4486 	if (iter->trace && iter->trace->print_line) {
4487 		ret = iter->trace->print_line(iter);
4488 		if (ret != TRACE_TYPE_UNHANDLED)
4489 			return ret;
4490 	}
4491 
4492 	if (iter->ent->type == TRACE_BPUTS &&
4493 			trace_flags & TRACE_ITER_PRINTK &&
4494 			trace_flags & TRACE_ITER_PRINTK_MSGONLY)
4495 		return trace_print_bputs_msg_only(iter);
4496 
4497 	if (iter->ent->type == TRACE_BPRINT &&
4498 			trace_flags & TRACE_ITER_PRINTK &&
4499 			trace_flags & TRACE_ITER_PRINTK_MSGONLY)
4500 		return trace_print_bprintk_msg_only(iter);
4501 
4502 	if (iter->ent->type == TRACE_PRINT &&
4503 			trace_flags & TRACE_ITER_PRINTK &&
4504 			trace_flags & TRACE_ITER_PRINTK_MSGONLY)
4505 		return trace_print_printk_msg_only(iter);
4506 
4507 	if (trace_flags & TRACE_ITER_BIN)
4508 		return print_bin_fmt(iter);
4509 
4510 	if (trace_flags & TRACE_ITER_HEX)
4511 		return print_hex_fmt(iter);
4512 
4513 	if (trace_flags & TRACE_ITER_RAW)
4514 		return print_raw_fmt(iter);
4515 
4516 	return print_trace_fmt(iter);
4517 }
4518 
4519 void trace_latency_header(struct seq_file *m)
4520 {
4521 	struct trace_iterator *iter = m->private;
4522 	struct trace_array *tr = iter->tr;
4523 
4524 	/* print nothing if the buffers are empty */
4525 	if (trace_empty(iter))
4526 		return;
4527 
4528 	if (iter->iter_flags & TRACE_FILE_LAT_FMT)
4529 		print_trace_header(m, iter);
4530 
4531 	if (!(tr->trace_flags & TRACE_ITER_VERBOSE))
4532 		print_lat_help_header(m);
4533 }
4534 
4535 void trace_default_header(struct seq_file *m)
4536 {
4537 	struct trace_iterator *iter = m->private;
4538 	struct trace_array *tr = iter->tr;
4539 	unsigned long trace_flags = tr->trace_flags;
4540 
4541 	if (!(trace_flags & TRACE_ITER_CONTEXT_INFO))
4542 		return;
4543 
4544 	if (iter->iter_flags & TRACE_FILE_LAT_FMT) {
4545 		/* print nothing if the buffers are empty */
4546 		if (trace_empty(iter))
4547 			return;
4548 		print_trace_header(m, iter);
4549 		if (!(trace_flags & TRACE_ITER_VERBOSE))
4550 			print_lat_help_header(m);
4551 	} else {
4552 		if (!(trace_flags & TRACE_ITER_VERBOSE)) {
4553 			if (trace_flags & TRACE_ITER_IRQ_INFO)
4554 				print_func_help_header_irq(iter->array_buffer,
4555 							   m, trace_flags);
4556 			else
4557 				print_func_help_header(iter->array_buffer, m,
4558 						       trace_flags);
4559 		}
4560 	}
4561 }
4562 
4563 static void test_ftrace_alive(struct seq_file *m)
4564 {
4565 	if (!ftrace_is_dead())
4566 		return;
4567 	seq_puts(m, "# WARNING: FUNCTION TRACING IS CORRUPTED\n"
4568 		    "#          MAY BE MISSING FUNCTION EVENTS\n");
4569 }
4570 
4571 #ifdef CONFIG_TRACER_MAX_TRACE
4572 static void show_snapshot_main_help(struct seq_file *m)
4573 {
4574 	seq_puts(m, "# echo 0 > snapshot : Clears and frees snapshot buffer\n"
4575 		    "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
4576 		    "#                      Takes a snapshot of the main buffer.\n"
4577 		    "# echo 2 > snapshot : Clears snapshot buffer (but does not allocate or free)\n"
4578 		    "#                      (Doesn't have to be '2' works with any number that\n"
4579 		    "#                       is not a '0' or '1')\n");
4580 }
4581 
4582 static void show_snapshot_percpu_help(struct seq_file *m)
4583 {
4584 	seq_puts(m, "# echo 0 > snapshot : Invalid for per_cpu snapshot file.\n");
4585 #ifdef CONFIG_RING_BUFFER_ALLOW_SWAP
4586 	seq_puts(m, "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
4587 		    "#                      Takes a snapshot of the main buffer for this cpu.\n");
4588 #else
4589 	seq_puts(m, "# echo 1 > snapshot : Not supported with this kernel.\n"
4590 		    "#                     Must use main snapshot file to allocate.\n");
4591 #endif
4592 	seq_puts(m, "# echo 2 > snapshot : Clears this cpu's snapshot buffer (but does not allocate)\n"
4593 		    "#                      (Doesn't have to be '2' works with any number that\n"
4594 		    "#                       is not a '0' or '1')\n");
4595 }
4596 
4597 static void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter)
4598 {
4599 	if (iter->tr->allocated_snapshot)
4600 		seq_puts(m, "#\n# * Snapshot is allocated *\n#\n");
4601 	else
4602 		seq_puts(m, "#\n# * Snapshot is freed *\n#\n");
4603 
4604 	seq_puts(m, "# Snapshot commands:\n");
4605 	if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
4606 		show_snapshot_main_help(m);
4607 	else
4608 		show_snapshot_percpu_help(m);
4609 }
4610 #else
4611 /* Should never be called */
4612 static inline void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter) { }
4613 #endif
4614 
4615 static int s_show(struct seq_file *m, void *v)
4616 {
4617 	struct trace_iterator *iter = v;
4618 	int ret;
4619 
4620 	if (iter->ent == NULL) {
4621 		if (iter->tr) {
4622 			seq_printf(m, "# tracer: %s\n", iter->trace->name);
4623 			seq_puts(m, "#\n");
4624 			test_ftrace_alive(m);
4625 		}
4626 		if (iter->snapshot && trace_empty(iter))
4627 			print_snapshot_help(m, iter);
4628 		else if (iter->trace && iter->trace->print_header)
4629 			iter->trace->print_header(m);
4630 		else
4631 			trace_default_header(m);
4632 
4633 	} else if (iter->leftover) {
4634 		/*
4635 		 * If we filled the seq_file buffer earlier, we
4636 		 * want to just show it now.
4637 		 */
4638 		ret = trace_print_seq(m, &iter->seq);
4639 
4640 		/* ret should this time be zero, but you never know */
4641 		iter->leftover = ret;
4642 
4643 	} else {
4644 		print_trace_line(iter);
4645 		ret = trace_print_seq(m, &iter->seq);
4646 		/*
4647 		 * If we overflow the seq_file buffer, then it will
4648 		 * ask us for this data again at start up.
4649 		 * Use that instead.
4650 		 *  ret is 0 if seq_file write succeeded.
4651 		 *        -1 otherwise.
4652 		 */
4653 		iter->leftover = ret;
4654 	}
4655 
4656 	return 0;
4657 }
4658 
4659 /*
4660  * Should be used after trace_array_get(), trace_types_lock
4661  * ensures that i_cdev was already initialized.
4662  */
4663 static inline int tracing_get_cpu(struct inode *inode)
4664 {
4665 	if (inode->i_cdev) /* See trace_create_cpu_file() */
4666 		return (long)inode->i_cdev - 1;
4667 	return RING_BUFFER_ALL_CPUS;
4668 }
4669 
4670 static const struct seq_operations tracer_seq_ops = {
4671 	.start		= s_start,
4672 	.next		= s_next,
4673 	.stop		= s_stop,
4674 	.show		= s_show,
4675 };
4676 
4677 static struct trace_iterator *
4678 __tracing_open(struct inode *inode, struct file *file, bool snapshot)
4679 {
4680 	struct trace_array *tr = inode->i_private;
4681 	struct trace_iterator *iter;
4682 	int cpu;
4683 
4684 	if (tracing_disabled)
4685 		return ERR_PTR(-ENODEV);
4686 
4687 	iter = __seq_open_private(file, &tracer_seq_ops, sizeof(*iter));
4688 	if (!iter)
4689 		return ERR_PTR(-ENOMEM);
4690 
4691 	iter->buffer_iter = kcalloc(nr_cpu_ids, sizeof(*iter->buffer_iter),
4692 				    GFP_KERNEL);
4693 	if (!iter->buffer_iter)
4694 		goto release;
4695 
4696 	/*
4697 	 * trace_find_next_entry() may need to save off iter->ent.
4698 	 * It will place it into the iter->temp buffer. As most
4699 	 * events are less than 128, allocate a buffer of that size.
4700 	 * If one is greater, then trace_find_next_entry() will
4701 	 * allocate a new buffer to adjust for the bigger iter->ent.
4702 	 * It's not critical if it fails to get allocated here.
4703 	 */
4704 	iter->temp = kmalloc(128, GFP_KERNEL);
4705 	if (iter->temp)
4706 		iter->temp_size = 128;
4707 
4708 	/*
4709 	 * trace_event_printf() may need to modify given format
4710 	 * string to replace %p with %px so that it shows real address
4711 	 * instead of hash value. However, that is only for the event
4712 	 * tracing, other tracer may not need. Defer the allocation
4713 	 * until it is needed.
4714 	 */
4715 	iter->fmt = NULL;
4716 	iter->fmt_size = 0;
4717 
4718 	/*
4719 	 * We make a copy of the current tracer to avoid concurrent
4720 	 * changes on it while we are reading.
4721 	 */
4722 	mutex_lock(&trace_types_lock);
4723 	iter->trace = kzalloc(sizeof(*iter->trace), GFP_KERNEL);
4724 	if (!iter->trace)
4725 		goto fail;
4726 
4727 	*iter->trace = *tr->current_trace;
4728 
4729 	if (!zalloc_cpumask_var(&iter->started, GFP_KERNEL))
4730 		goto fail;
4731 
4732 	iter->tr = tr;
4733 
4734 #ifdef CONFIG_TRACER_MAX_TRACE
4735 	/* Currently only the top directory has a snapshot */
4736 	if (tr->current_trace->print_max || snapshot)
4737 		iter->array_buffer = &tr->max_buffer;
4738 	else
4739 #endif
4740 		iter->array_buffer = &tr->array_buffer;
4741 	iter->snapshot = snapshot;
4742 	iter->pos = -1;
4743 	iter->cpu_file = tracing_get_cpu(inode);
4744 	mutex_init(&iter->mutex);
4745 
4746 	/* Notify the tracer early; before we stop tracing. */
4747 	if (iter->trace->open)
4748 		iter->trace->open(iter);
4749 
4750 	/* Annotate start of buffers if we had overruns */
4751 	if (ring_buffer_overruns(iter->array_buffer->buffer))
4752 		iter->iter_flags |= TRACE_FILE_ANNOTATE;
4753 
4754 	/* Output in nanoseconds only if we are using a clock in nanoseconds. */
4755 	if (trace_clocks[tr->clock_id].in_ns)
4756 		iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
4757 
4758 	/*
4759 	 * If pause-on-trace is enabled, then stop the trace while
4760 	 * dumping, unless this is the "snapshot" file
4761 	 */
4762 	if (!iter->snapshot && (tr->trace_flags & TRACE_ITER_PAUSE_ON_TRACE))
4763 		tracing_stop_tr(tr);
4764 
4765 	if (iter->cpu_file == RING_BUFFER_ALL_CPUS) {
4766 		for_each_tracing_cpu(cpu) {
4767 			iter->buffer_iter[cpu] =
4768 				ring_buffer_read_prepare(iter->array_buffer->buffer,
4769 							 cpu, GFP_KERNEL);
4770 		}
4771 		ring_buffer_read_prepare_sync();
4772 		for_each_tracing_cpu(cpu) {
4773 			ring_buffer_read_start(iter->buffer_iter[cpu]);
4774 			tracing_iter_reset(iter, cpu);
4775 		}
4776 	} else {
4777 		cpu = iter->cpu_file;
4778 		iter->buffer_iter[cpu] =
4779 			ring_buffer_read_prepare(iter->array_buffer->buffer,
4780 						 cpu, GFP_KERNEL);
4781 		ring_buffer_read_prepare_sync();
4782 		ring_buffer_read_start(iter->buffer_iter[cpu]);
4783 		tracing_iter_reset(iter, cpu);
4784 	}
4785 
4786 	mutex_unlock(&trace_types_lock);
4787 
4788 	return iter;
4789 
4790  fail:
4791 	mutex_unlock(&trace_types_lock);
4792 	kfree(iter->trace);
4793 	kfree(iter->temp);
4794 	kfree(iter->buffer_iter);
4795 release:
4796 	seq_release_private(inode, file);
4797 	return ERR_PTR(-ENOMEM);
4798 }
4799 
4800 int tracing_open_generic(struct inode *inode, struct file *filp)
4801 {
4802 	int ret;
4803 
4804 	ret = tracing_check_open_get_tr(NULL);
4805 	if (ret)
4806 		return ret;
4807 
4808 	filp->private_data = inode->i_private;
4809 	return 0;
4810 }
4811 
4812 bool tracing_is_disabled(void)
4813 {
4814 	return (tracing_disabled) ? true: false;
4815 }
4816 
4817 /*
4818  * Open and update trace_array ref count.
4819  * Must have the current trace_array passed to it.
4820  */
4821 int tracing_open_generic_tr(struct inode *inode, struct file *filp)
4822 {
4823 	struct trace_array *tr = inode->i_private;
4824 	int ret;
4825 
4826 	ret = tracing_check_open_get_tr(tr);
4827 	if (ret)
4828 		return ret;
4829 
4830 	filp->private_data = inode->i_private;
4831 
4832 	return 0;
4833 }
4834 
4835 static int tracing_release(struct inode *inode, struct file *file)
4836 {
4837 	struct trace_array *tr = inode->i_private;
4838 	struct seq_file *m = file->private_data;
4839 	struct trace_iterator *iter;
4840 	int cpu;
4841 
4842 	if (!(file->f_mode & FMODE_READ)) {
4843 		trace_array_put(tr);
4844 		return 0;
4845 	}
4846 
4847 	/* Writes do not use seq_file */
4848 	iter = m->private;
4849 	mutex_lock(&trace_types_lock);
4850 
4851 	for_each_tracing_cpu(cpu) {
4852 		if (iter->buffer_iter[cpu])
4853 			ring_buffer_read_finish(iter->buffer_iter[cpu]);
4854 	}
4855 
4856 	if (iter->trace && iter->trace->close)
4857 		iter->trace->close(iter);
4858 
4859 	if (!iter->snapshot && tr->stop_count)
4860 		/* reenable tracing if it was previously enabled */
4861 		tracing_start_tr(tr);
4862 
4863 	__trace_array_put(tr);
4864 
4865 	mutex_unlock(&trace_types_lock);
4866 
4867 	mutex_destroy(&iter->mutex);
4868 	free_cpumask_var(iter->started);
4869 	kfree(iter->fmt);
4870 	kfree(iter->temp);
4871 	kfree(iter->trace);
4872 	kfree(iter->buffer_iter);
4873 	seq_release_private(inode, file);
4874 
4875 	return 0;
4876 }
4877 
4878 static int tracing_release_generic_tr(struct inode *inode, struct file *file)
4879 {
4880 	struct trace_array *tr = inode->i_private;
4881 
4882 	trace_array_put(tr);
4883 	return 0;
4884 }
4885 
4886 static int tracing_single_release_tr(struct inode *inode, struct file *file)
4887 {
4888 	struct trace_array *tr = inode->i_private;
4889 
4890 	trace_array_put(tr);
4891 
4892 	return single_release(inode, file);
4893 }
4894 
4895 static int tracing_open(struct inode *inode, struct file *file)
4896 {
4897 	struct trace_array *tr = inode->i_private;
4898 	struct trace_iterator *iter;
4899 	int ret;
4900 
4901 	ret = tracing_check_open_get_tr(tr);
4902 	if (ret)
4903 		return ret;
4904 
4905 	/* If this file was open for write, then erase contents */
4906 	if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC)) {
4907 		int cpu = tracing_get_cpu(inode);
4908 		struct array_buffer *trace_buf = &tr->array_buffer;
4909 
4910 #ifdef CONFIG_TRACER_MAX_TRACE
4911 		if (tr->current_trace->print_max)
4912 			trace_buf = &tr->max_buffer;
4913 #endif
4914 
4915 		if (cpu == RING_BUFFER_ALL_CPUS)
4916 			tracing_reset_online_cpus(trace_buf);
4917 		else
4918 			tracing_reset_cpu(trace_buf, cpu);
4919 	}
4920 
4921 	if (file->f_mode & FMODE_READ) {
4922 		iter = __tracing_open(inode, file, false);
4923 		if (IS_ERR(iter))
4924 			ret = PTR_ERR(iter);
4925 		else if (tr->trace_flags & TRACE_ITER_LATENCY_FMT)
4926 			iter->iter_flags |= TRACE_FILE_LAT_FMT;
4927 	}
4928 
4929 	if (ret < 0)
4930 		trace_array_put(tr);
4931 
4932 	return ret;
4933 }
4934 
4935 /*
4936  * Some tracers are not suitable for instance buffers.
4937  * A tracer is always available for the global array (toplevel)
4938  * or if it explicitly states that it is.
4939  */
4940 static bool
4941 trace_ok_for_array(struct tracer *t, struct trace_array *tr)
4942 {
4943 	return (tr->flags & TRACE_ARRAY_FL_GLOBAL) || t->allow_instances;
4944 }
4945 
4946 /* Find the next tracer that this trace array may use */
4947 static struct tracer *
4948 get_tracer_for_array(struct trace_array *tr, struct tracer *t)
4949 {
4950 	while (t && !trace_ok_for_array(t, tr))
4951 		t = t->next;
4952 
4953 	return t;
4954 }
4955 
4956 static void *
4957 t_next(struct seq_file *m, void *v, loff_t *pos)
4958 {
4959 	struct trace_array *tr = m->private;
4960 	struct tracer *t = v;
4961 
4962 	(*pos)++;
4963 
4964 	if (t)
4965 		t = get_tracer_for_array(tr, t->next);
4966 
4967 	return t;
4968 }
4969 
4970 static void *t_start(struct seq_file *m, loff_t *pos)
4971 {
4972 	struct trace_array *tr = m->private;
4973 	struct tracer *t;
4974 	loff_t l = 0;
4975 
4976 	mutex_lock(&trace_types_lock);
4977 
4978 	t = get_tracer_for_array(tr, trace_types);
4979 	for (; t && l < *pos; t = t_next(m, t, &l))
4980 			;
4981 
4982 	return t;
4983 }
4984 
4985 static void t_stop(struct seq_file *m, void *p)
4986 {
4987 	mutex_unlock(&trace_types_lock);
4988 }
4989 
4990 static int t_show(struct seq_file *m, void *v)
4991 {
4992 	struct tracer *t = v;
4993 
4994 	if (!t)
4995 		return 0;
4996 
4997 	seq_puts(m, t->name);
4998 	if (t->next)
4999 		seq_putc(m, ' ');
5000 	else
5001 		seq_putc(m, '\n');
5002 
5003 	return 0;
5004 }
5005 
5006 static const struct seq_operations show_traces_seq_ops = {
5007 	.start		= t_start,
5008 	.next		= t_next,
5009 	.stop		= t_stop,
5010 	.show		= t_show,
5011 };
5012 
5013 static int show_traces_open(struct inode *inode, struct file *file)
5014 {
5015 	struct trace_array *tr = inode->i_private;
5016 	struct seq_file *m;
5017 	int ret;
5018 
5019 	ret = tracing_check_open_get_tr(tr);
5020 	if (ret)
5021 		return ret;
5022 
5023 	ret = seq_open(file, &show_traces_seq_ops);
5024 	if (ret) {
5025 		trace_array_put(tr);
5026 		return ret;
5027 	}
5028 
5029 	m = file->private_data;
5030 	m->private = tr;
5031 
5032 	return 0;
5033 }
5034 
5035 static int show_traces_release(struct inode *inode, struct file *file)
5036 {
5037 	struct trace_array *tr = inode->i_private;
5038 
5039 	trace_array_put(tr);
5040 	return seq_release(inode, file);
5041 }
5042 
5043 static ssize_t
5044 tracing_write_stub(struct file *filp, const char __user *ubuf,
5045 		   size_t count, loff_t *ppos)
5046 {
5047 	return count;
5048 }
5049 
5050 loff_t tracing_lseek(struct file *file, loff_t offset, int whence)
5051 {
5052 	int ret;
5053 
5054 	if (file->f_mode & FMODE_READ)
5055 		ret = seq_lseek(file, offset, whence);
5056 	else
5057 		file->f_pos = ret = 0;
5058 
5059 	return ret;
5060 }
5061 
5062 static const struct file_operations tracing_fops = {
5063 	.open		= tracing_open,
5064 	.read		= seq_read,
5065 	.write		= tracing_write_stub,
5066 	.llseek		= tracing_lseek,
5067 	.release	= tracing_release,
5068 };
5069 
5070 static const struct file_operations show_traces_fops = {
5071 	.open		= show_traces_open,
5072 	.read		= seq_read,
5073 	.llseek		= seq_lseek,
5074 	.release	= show_traces_release,
5075 };
5076 
5077 static ssize_t
5078 tracing_cpumask_read(struct file *filp, char __user *ubuf,
5079 		     size_t count, loff_t *ppos)
5080 {
5081 	struct trace_array *tr = file_inode(filp)->i_private;
5082 	char *mask_str;
5083 	int len;
5084 
5085 	len = snprintf(NULL, 0, "%*pb\n",
5086 		       cpumask_pr_args(tr->tracing_cpumask)) + 1;
5087 	mask_str = kmalloc(len, GFP_KERNEL);
5088 	if (!mask_str)
5089 		return -ENOMEM;
5090 
5091 	len = snprintf(mask_str, len, "%*pb\n",
5092 		       cpumask_pr_args(tr->tracing_cpumask));
5093 	if (len >= count) {
5094 		count = -EINVAL;
5095 		goto out_err;
5096 	}
5097 	count = simple_read_from_buffer(ubuf, count, ppos, mask_str, len);
5098 
5099 out_err:
5100 	kfree(mask_str);
5101 
5102 	return count;
5103 }
5104 
5105 int tracing_set_cpumask(struct trace_array *tr,
5106 			cpumask_var_t tracing_cpumask_new)
5107 {
5108 	int cpu;
5109 
5110 	if (!tr)
5111 		return -EINVAL;
5112 
5113 	local_irq_disable();
5114 	arch_spin_lock(&tr->max_lock);
5115 	for_each_tracing_cpu(cpu) {
5116 		/*
5117 		 * Increase/decrease the disabled counter if we are
5118 		 * about to flip a bit in the cpumask:
5119 		 */
5120 		if (cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
5121 				!cpumask_test_cpu(cpu, tracing_cpumask_new)) {
5122 			atomic_inc(&per_cpu_ptr(tr->array_buffer.data, cpu)->disabled);
5123 			ring_buffer_record_disable_cpu(tr->array_buffer.buffer, cpu);
5124 		}
5125 		if (!cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
5126 				cpumask_test_cpu(cpu, tracing_cpumask_new)) {
5127 			atomic_dec(&per_cpu_ptr(tr->array_buffer.data, cpu)->disabled);
5128 			ring_buffer_record_enable_cpu(tr->array_buffer.buffer, cpu);
5129 		}
5130 	}
5131 	arch_spin_unlock(&tr->max_lock);
5132 	local_irq_enable();
5133 
5134 	cpumask_copy(tr->tracing_cpumask, tracing_cpumask_new);
5135 
5136 	return 0;
5137 }
5138 
5139 static ssize_t
5140 tracing_cpumask_write(struct file *filp, const char __user *ubuf,
5141 		      size_t count, loff_t *ppos)
5142 {
5143 	struct trace_array *tr = file_inode(filp)->i_private;
5144 	cpumask_var_t tracing_cpumask_new;
5145 	int err;
5146 
5147 	if (!zalloc_cpumask_var(&tracing_cpumask_new, GFP_KERNEL))
5148 		return -ENOMEM;
5149 
5150 	err = cpumask_parse_user(ubuf, count, tracing_cpumask_new);
5151 	if (err)
5152 		goto err_free;
5153 
5154 	err = tracing_set_cpumask(tr, tracing_cpumask_new);
5155 	if (err)
5156 		goto err_free;
5157 
5158 	free_cpumask_var(tracing_cpumask_new);
5159 
5160 	return count;
5161 
5162 err_free:
5163 	free_cpumask_var(tracing_cpumask_new);
5164 
5165 	return err;
5166 }
5167 
5168 static const struct file_operations tracing_cpumask_fops = {
5169 	.open		= tracing_open_generic_tr,
5170 	.read		= tracing_cpumask_read,
5171 	.write		= tracing_cpumask_write,
5172 	.release	= tracing_release_generic_tr,
5173 	.llseek		= generic_file_llseek,
5174 };
5175 
5176 static int tracing_trace_options_show(struct seq_file *m, void *v)
5177 {
5178 	struct tracer_opt *trace_opts;
5179 	struct trace_array *tr = m->private;
5180 	u32 tracer_flags;
5181 	int i;
5182 
5183 	mutex_lock(&trace_types_lock);
5184 	tracer_flags = tr->current_trace->flags->val;
5185 	trace_opts = tr->current_trace->flags->opts;
5186 
5187 	for (i = 0; trace_options[i]; i++) {
5188 		if (tr->trace_flags & (1 << i))
5189 			seq_printf(m, "%s\n", trace_options[i]);
5190 		else
5191 			seq_printf(m, "no%s\n", trace_options[i]);
5192 	}
5193 
5194 	for (i = 0; trace_opts[i].name; i++) {
5195 		if (tracer_flags & trace_opts[i].bit)
5196 			seq_printf(m, "%s\n", trace_opts[i].name);
5197 		else
5198 			seq_printf(m, "no%s\n", trace_opts[i].name);
5199 	}
5200 	mutex_unlock(&trace_types_lock);
5201 
5202 	return 0;
5203 }
5204 
5205 static int __set_tracer_option(struct trace_array *tr,
5206 			       struct tracer_flags *tracer_flags,
5207 			       struct tracer_opt *opts, int neg)
5208 {
5209 	struct tracer *trace = tracer_flags->trace;
5210 	int ret;
5211 
5212 	ret = trace->set_flag(tr, tracer_flags->val, opts->bit, !neg);
5213 	if (ret)
5214 		return ret;
5215 
5216 	if (neg)
5217 		tracer_flags->val &= ~opts->bit;
5218 	else
5219 		tracer_flags->val |= opts->bit;
5220 	return 0;
5221 }
5222 
5223 /* Try to assign a tracer specific option */
5224 static int set_tracer_option(struct trace_array *tr, char *cmp, int neg)
5225 {
5226 	struct tracer *trace = tr->current_trace;
5227 	struct tracer_flags *tracer_flags = trace->flags;
5228 	struct tracer_opt *opts = NULL;
5229 	int i;
5230 
5231 	for (i = 0; tracer_flags->opts[i].name; i++) {
5232 		opts = &tracer_flags->opts[i];
5233 
5234 		if (strcmp(cmp, opts->name) == 0)
5235 			return __set_tracer_option(tr, trace->flags, opts, neg);
5236 	}
5237 
5238 	return -EINVAL;
5239 }
5240 
5241 /* Some tracers require overwrite to stay enabled */
5242 int trace_keep_overwrite(struct tracer *tracer, u32 mask, int set)
5243 {
5244 	if (tracer->enabled && (mask & TRACE_ITER_OVERWRITE) && !set)
5245 		return -1;
5246 
5247 	return 0;
5248 }
5249 
5250 int set_tracer_flag(struct trace_array *tr, unsigned int mask, int enabled)
5251 {
5252 	int *map;
5253 
5254 	if ((mask == TRACE_ITER_RECORD_TGID) ||
5255 	    (mask == TRACE_ITER_RECORD_CMD))
5256 		lockdep_assert_held(&event_mutex);
5257 
5258 	/* do nothing if flag is already set */
5259 	if (!!(tr->trace_flags & mask) == !!enabled)
5260 		return 0;
5261 
5262 	/* Give the tracer a chance to approve the change */
5263 	if (tr->current_trace->flag_changed)
5264 		if (tr->current_trace->flag_changed(tr, mask, !!enabled))
5265 			return -EINVAL;
5266 
5267 	if (enabled)
5268 		tr->trace_flags |= mask;
5269 	else
5270 		tr->trace_flags &= ~mask;
5271 
5272 	if (mask == TRACE_ITER_RECORD_CMD)
5273 		trace_event_enable_cmd_record(enabled);
5274 
5275 	if (mask == TRACE_ITER_RECORD_TGID) {
5276 		if (!tgid_map) {
5277 			tgid_map_max = pid_max;
5278 			map = kvcalloc(tgid_map_max + 1, sizeof(*tgid_map),
5279 				       GFP_KERNEL);
5280 
5281 			/*
5282 			 * Pairs with smp_load_acquire() in
5283 			 * trace_find_tgid_ptr() to ensure that if it observes
5284 			 * the tgid_map we just allocated then it also observes
5285 			 * the corresponding tgid_map_max value.
5286 			 */
5287 			smp_store_release(&tgid_map, map);
5288 		}
5289 		if (!tgid_map) {
5290 			tr->trace_flags &= ~TRACE_ITER_RECORD_TGID;
5291 			return -ENOMEM;
5292 		}
5293 
5294 		trace_event_enable_tgid_record(enabled);
5295 	}
5296 
5297 	if (mask == TRACE_ITER_EVENT_FORK)
5298 		trace_event_follow_fork(tr, enabled);
5299 
5300 	if (mask == TRACE_ITER_FUNC_FORK)
5301 		ftrace_pid_follow_fork(tr, enabled);
5302 
5303 	if (mask == TRACE_ITER_OVERWRITE) {
5304 		ring_buffer_change_overwrite(tr->array_buffer.buffer, enabled);
5305 #ifdef CONFIG_TRACER_MAX_TRACE
5306 		ring_buffer_change_overwrite(tr->max_buffer.buffer, enabled);
5307 #endif
5308 	}
5309 
5310 	if (mask == TRACE_ITER_PRINTK) {
5311 		trace_printk_start_stop_comm(enabled);
5312 		trace_printk_control(enabled);
5313 	}
5314 
5315 	return 0;
5316 }
5317 
5318 int trace_set_options(struct trace_array *tr, char *option)
5319 {
5320 	char *cmp;
5321 	int neg = 0;
5322 	int ret;
5323 	size_t orig_len = strlen(option);
5324 	int len;
5325 
5326 	cmp = strstrip(option);
5327 
5328 	len = str_has_prefix(cmp, "no");
5329 	if (len)
5330 		neg = 1;
5331 
5332 	cmp += len;
5333 
5334 	mutex_lock(&event_mutex);
5335 	mutex_lock(&trace_types_lock);
5336 
5337 	ret = match_string(trace_options, -1, cmp);
5338 	/* If no option could be set, test the specific tracer options */
5339 	if (ret < 0)
5340 		ret = set_tracer_option(tr, cmp, neg);
5341 	else
5342 		ret = set_tracer_flag(tr, 1 << ret, !neg);
5343 
5344 	mutex_unlock(&trace_types_lock);
5345 	mutex_unlock(&event_mutex);
5346 
5347 	/*
5348 	 * If the first trailing whitespace is replaced with '\0' by strstrip,
5349 	 * turn it back into a space.
5350 	 */
5351 	if (orig_len > strlen(option))
5352 		option[strlen(option)] = ' ';
5353 
5354 	return ret;
5355 }
5356 
5357 static void __init apply_trace_boot_options(void)
5358 {
5359 	char *buf = trace_boot_options_buf;
5360 	char *option;
5361 
5362 	while (true) {
5363 		option = strsep(&buf, ",");
5364 
5365 		if (!option)
5366 			break;
5367 
5368 		if (*option)
5369 			trace_set_options(&global_trace, option);
5370 
5371 		/* Put back the comma to allow this to be called again */
5372 		if (buf)
5373 			*(buf - 1) = ',';
5374 	}
5375 }
5376 
5377 static ssize_t
5378 tracing_trace_options_write(struct file *filp, const char __user *ubuf,
5379 			size_t cnt, loff_t *ppos)
5380 {
5381 	struct seq_file *m = filp->private_data;
5382 	struct trace_array *tr = m->private;
5383 	char buf[64];
5384 	int ret;
5385 
5386 	if (cnt >= sizeof(buf))
5387 		return -EINVAL;
5388 
5389 	if (copy_from_user(buf, ubuf, cnt))
5390 		return -EFAULT;
5391 
5392 	buf[cnt] = 0;
5393 
5394 	ret = trace_set_options(tr, buf);
5395 	if (ret < 0)
5396 		return ret;
5397 
5398 	*ppos += cnt;
5399 
5400 	return cnt;
5401 }
5402 
5403 static int tracing_trace_options_open(struct inode *inode, struct file *file)
5404 {
5405 	struct trace_array *tr = inode->i_private;
5406 	int ret;
5407 
5408 	ret = tracing_check_open_get_tr(tr);
5409 	if (ret)
5410 		return ret;
5411 
5412 	ret = single_open(file, tracing_trace_options_show, inode->i_private);
5413 	if (ret < 0)
5414 		trace_array_put(tr);
5415 
5416 	return ret;
5417 }
5418 
5419 static const struct file_operations tracing_iter_fops = {
5420 	.open		= tracing_trace_options_open,
5421 	.read		= seq_read,
5422 	.llseek		= seq_lseek,
5423 	.release	= tracing_single_release_tr,
5424 	.write		= tracing_trace_options_write,
5425 };
5426 
5427 static const char readme_msg[] =
5428 	"tracing mini-HOWTO:\n\n"
5429 	"# echo 0 > tracing_on : quick way to disable tracing\n"
5430 	"# echo 1 > tracing_on : quick way to re-enable tracing\n\n"
5431 	" Important files:\n"
5432 	"  trace\t\t\t- The static contents of the buffer\n"
5433 	"\t\t\t  To clear the buffer write into this file: echo > trace\n"
5434 	"  trace_pipe\t\t- A consuming read to see the contents of the buffer\n"
5435 	"  current_tracer\t- function and latency tracers\n"
5436 	"  available_tracers\t- list of configured tracers for current_tracer\n"
5437 	"  error_log\t- error log for failed commands (that support it)\n"
5438 	"  buffer_size_kb\t- view and modify size of per cpu buffer\n"
5439 	"  buffer_total_size_kb  - view total size of all cpu buffers\n\n"
5440 	"  trace_clock\t\t-change the clock used to order events\n"
5441 	"       local:   Per cpu clock but may not be synced across CPUs\n"
5442 	"      global:   Synced across CPUs but slows tracing down.\n"
5443 	"     counter:   Not a clock, but just an increment\n"
5444 	"      uptime:   Jiffy counter from time of boot\n"
5445 	"        perf:   Same clock that perf events use\n"
5446 #ifdef CONFIG_X86_64
5447 	"     x86-tsc:   TSC cycle counter\n"
5448 #endif
5449 	"\n  timestamp_mode\t-view the mode used to timestamp events\n"
5450 	"       delta:   Delta difference against a buffer-wide timestamp\n"
5451 	"    absolute:   Absolute (standalone) timestamp\n"
5452 	"\n  trace_marker\t\t- Writes into this file writes into the kernel buffer\n"
5453 	"\n  trace_marker_raw\t\t- Writes into this file writes binary data into the kernel buffer\n"
5454 	"  tracing_cpumask\t- Limit which CPUs to trace\n"
5455 	"  instances\t\t- Make sub-buffers with: mkdir instances/foo\n"
5456 	"\t\t\t  Remove sub-buffer with rmdir\n"
5457 	"  trace_options\t\t- Set format or modify how tracing happens\n"
5458 	"\t\t\t  Disable an option by prefixing 'no' to the\n"
5459 	"\t\t\t  option name\n"
5460 	"  saved_cmdlines_size\t- echo command number in here to store comm-pid list\n"
5461 #ifdef CONFIG_DYNAMIC_FTRACE
5462 	"\n  available_filter_functions - list of functions that can be filtered on\n"
5463 	"  set_ftrace_filter\t- echo function name in here to only trace these\n"
5464 	"\t\t\t  functions\n"
5465 	"\t     accepts: func_full_name or glob-matching-pattern\n"
5466 	"\t     modules: Can select a group via module\n"
5467 	"\t      Format: :mod:<module-name>\n"
5468 	"\t     example: echo :mod:ext3 > set_ftrace_filter\n"
5469 	"\t    triggers: a command to perform when function is hit\n"
5470 	"\t      Format: <function>:<trigger>[:count]\n"
5471 	"\t     trigger: traceon, traceoff\n"
5472 	"\t\t      enable_event:<system>:<event>\n"
5473 	"\t\t      disable_event:<system>:<event>\n"
5474 #ifdef CONFIG_STACKTRACE
5475 	"\t\t      stacktrace\n"
5476 #endif
5477 #ifdef CONFIG_TRACER_SNAPSHOT
5478 	"\t\t      snapshot\n"
5479 #endif
5480 	"\t\t      dump\n"
5481 	"\t\t      cpudump\n"
5482 	"\t     example: echo do_fault:traceoff > set_ftrace_filter\n"
5483 	"\t              echo do_trap:traceoff:3 > set_ftrace_filter\n"
5484 	"\t     The first one will disable tracing every time do_fault is hit\n"
5485 	"\t     The second will disable tracing at most 3 times when do_trap is hit\n"
5486 	"\t       The first time do trap is hit and it disables tracing, the\n"
5487 	"\t       counter will decrement to 2. If tracing is already disabled,\n"
5488 	"\t       the counter will not decrement. It only decrements when the\n"
5489 	"\t       trigger did work\n"
5490 	"\t     To remove trigger without count:\n"
5491 	"\t       echo '!<function>:<trigger> > set_ftrace_filter\n"
5492 	"\t     To remove trigger with a count:\n"
5493 	"\t       echo '!<function>:<trigger>:0 > set_ftrace_filter\n"
5494 	"  set_ftrace_notrace\t- echo function name in here to never trace.\n"
5495 	"\t    accepts: func_full_name, *func_end, func_begin*, *func_middle*\n"
5496 	"\t    modules: Can select a group via module command :mod:\n"
5497 	"\t    Does not accept triggers\n"
5498 #endif /* CONFIG_DYNAMIC_FTRACE */
5499 #ifdef CONFIG_FUNCTION_TRACER
5500 	"  set_ftrace_pid\t- Write pid(s) to only function trace those pids\n"
5501 	"\t\t    (function)\n"
5502 	"  set_ftrace_notrace_pid\t- Write pid(s) to not function trace those pids\n"
5503 	"\t\t    (function)\n"
5504 #endif
5505 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
5506 	"  set_graph_function\t- Trace the nested calls of a function (function_graph)\n"
5507 	"  set_graph_notrace\t- Do not trace the nested calls of a function (function_graph)\n"
5508 	"  max_graph_depth\t- Trace a limited depth of nested calls (0 is unlimited)\n"
5509 #endif
5510 #ifdef CONFIG_TRACER_SNAPSHOT
5511 	"\n  snapshot\t\t- Like 'trace' but shows the content of the static\n"
5512 	"\t\t\t  snapshot buffer. Read the contents for more\n"
5513 	"\t\t\t  information\n"
5514 #endif
5515 #ifdef CONFIG_STACK_TRACER
5516 	"  stack_trace\t\t- Shows the max stack trace when active\n"
5517 	"  stack_max_size\t- Shows current max stack size that was traced\n"
5518 	"\t\t\t  Write into this file to reset the max size (trigger a\n"
5519 	"\t\t\t  new trace)\n"
5520 #ifdef CONFIG_DYNAMIC_FTRACE
5521 	"  stack_trace_filter\t- Like set_ftrace_filter but limits what stack_trace\n"
5522 	"\t\t\t  traces\n"
5523 #endif
5524 #endif /* CONFIG_STACK_TRACER */
5525 #ifdef CONFIG_DYNAMIC_EVENTS
5526 	"  dynamic_events\t\t- Create/append/remove/show the generic dynamic events\n"
5527 	"\t\t\t  Write into this file to define/undefine new trace events.\n"
5528 #endif
5529 #ifdef CONFIG_KPROBE_EVENTS
5530 	"  kprobe_events\t\t- Create/append/remove/show the kernel dynamic events\n"
5531 	"\t\t\t  Write into this file to define/undefine new trace events.\n"
5532 #endif
5533 #ifdef CONFIG_UPROBE_EVENTS
5534 	"  uprobe_events\t\t- Create/append/remove/show the userspace dynamic events\n"
5535 	"\t\t\t  Write into this file to define/undefine new trace events.\n"
5536 #endif
5537 #if defined(CONFIG_KPROBE_EVENTS) || defined(CONFIG_UPROBE_EVENTS)
5538 	"\t  accepts: event-definitions (one definition per line)\n"
5539 	"\t   Format: p[:[<group>/]<event>] <place> [<args>]\n"
5540 	"\t           r[maxactive][:[<group>/]<event>] <place> [<args>]\n"
5541 #ifdef CONFIG_HIST_TRIGGERS
5542 	"\t           s:[synthetic/]<event> <field> [<field>]\n"
5543 #endif
5544 	"\t           -:[<group>/]<event>\n"
5545 #ifdef CONFIG_KPROBE_EVENTS
5546 	"\t    place: [<module>:]<symbol>[+<offset>]|<memaddr>\n"
5547   "place (kretprobe): [<module>:]<symbol>[+<offset>]%return|<memaddr>\n"
5548 #endif
5549 #ifdef CONFIG_UPROBE_EVENTS
5550   "   place (uprobe): <path>:<offset>[%return][(ref_ctr_offset)]\n"
5551 #endif
5552 	"\t     args: <name>=fetcharg[:type]\n"
5553 	"\t fetcharg: %<register>, @<address>, @<symbol>[+|-<offset>],\n"
5554 #ifdef CONFIG_HAVE_FUNCTION_ARG_ACCESS_API
5555 	"\t           $stack<index>, $stack, $retval, $comm, $arg<N>,\n"
5556 #else
5557 	"\t           $stack<index>, $stack, $retval, $comm,\n"
5558 #endif
5559 	"\t           +|-[u]<offset>(<fetcharg>), \\imm-value, \\\"imm-string\"\n"
5560 	"\t     type: s8/16/32/64, u8/16/32/64, x8/16/32/64, string, symbol,\n"
5561 	"\t           b<bit-width>@<bit-offset>/<container-size>, ustring,\n"
5562 	"\t           <type>\\[<array-size>\\]\n"
5563 #ifdef CONFIG_HIST_TRIGGERS
5564 	"\t    field: <stype> <name>;\n"
5565 	"\t    stype: u8/u16/u32/u64, s8/s16/s32/s64, pid_t,\n"
5566 	"\t           [unsigned] char/int/long\n"
5567 #endif
5568 #endif
5569 	"  events/\t\t- Directory containing all trace event subsystems:\n"
5570 	"      enable\t\t- Write 0/1 to enable/disable tracing of all events\n"
5571 	"  events/<system>/\t- Directory containing all trace events for <system>:\n"
5572 	"      enable\t\t- Write 0/1 to enable/disable tracing of all <system>\n"
5573 	"\t\t\t  events\n"
5574 	"      filter\t\t- If set, only events passing filter are traced\n"
5575 	"  events/<system>/<event>/\t- Directory containing control files for\n"
5576 	"\t\t\t  <event>:\n"
5577 	"      enable\t\t- Write 0/1 to enable/disable tracing of <event>\n"
5578 	"      filter\t\t- If set, only events passing filter are traced\n"
5579 	"      trigger\t\t- If set, a command to perform when event is hit\n"
5580 	"\t    Format: <trigger>[:count][if <filter>]\n"
5581 	"\t   trigger: traceon, traceoff\n"
5582 	"\t            enable_event:<system>:<event>\n"
5583 	"\t            disable_event:<system>:<event>\n"
5584 #ifdef CONFIG_HIST_TRIGGERS
5585 	"\t            enable_hist:<system>:<event>\n"
5586 	"\t            disable_hist:<system>:<event>\n"
5587 #endif
5588 #ifdef CONFIG_STACKTRACE
5589 	"\t\t    stacktrace\n"
5590 #endif
5591 #ifdef CONFIG_TRACER_SNAPSHOT
5592 	"\t\t    snapshot\n"
5593 #endif
5594 #ifdef CONFIG_HIST_TRIGGERS
5595 	"\t\t    hist (see below)\n"
5596 #endif
5597 	"\t   example: echo traceoff > events/block/block_unplug/trigger\n"
5598 	"\t            echo traceoff:3 > events/block/block_unplug/trigger\n"
5599 	"\t            echo 'enable_event:kmem:kmalloc:3 if nr_rq > 1' > \\\n"
5600 	"\t                  events/block/block_unplug/trigger\n"
5601 	"\t   The first disables tracing every time block_unplug is hit.\n"
5602 	"\t   The second disables tracing the first 3 times block_unplug is hit.\n"
5603 	"\t   The third enables the kmalloc event the first 3 times block_unplug\n"
5604 	"\t     is hit and has value of greater than 1 for the 'nr_rq' event field.\n"
5605 	"\t   Like function triggers, the counter is only decremented if it\n"
5606 	"\t    enabled or disabled tracing.\n"
5607 	"\t   To remove a trigger without a count:\n"
5608 	"\t     echo '!<trigger> > <system>/<event>/trigger\n"
5609 	"\t   To remove a trigger with a count:\n"
5610 	"\t     echo '!<trigger>:0 > <system>/<event>/trigger\n"
5611 	"\t   Filters can be ignored when removing a trigger.\n"
5612 #ifdef CONFIG_HIST_TRIGGERS
5613 	"      hist trigger\t- If set, event hits are aggregated into a hash table\n"
5614 	"\t    Format: hist:keys=<field1[,field2,...]>\n"
5615 	"\t            [:values=<field1[,field2,...]>]\n"
5616 	"\t            [:sort=<field1[,field2,...]>]\n"
5617 	"\t            [:size=#entries]\n"
5618 	"\t            [:pause][:continue][:clear]\n"
5619 	"\t            [:name=histname1]\n"
5620 	"\t            [:<handler>.<action>]\n"
5621 	"\t            [if <filter>]\n\n"
5622 	"\t    When a matching event is hit, an entry is added to a hash\n"
5623 	"\t    table using the key(s) and value(s) named, and the value of a\n"
5624 	"\t    sum called 'hitcount' is incremented.  Keys and values\n"
5625 	"\t    correspond to fields in the event's format description.  Keys\n"
5626 	"\t    can be any field, or the special string 'stacktrace'.\n"
5627 	"\t    Compound keys consisting of up to two fields can be specified\n"
5628 	"\t    by the 'keys' keyword.  Values must correspond to numeric\n"
5629 	"\t    fields.  Sort keys consisting of up to two fields can be\n"
5630 	"\t    specified using the 'sort' keyword.  The sort direction can\n"
5631 	"\t    be modified by appending '.descending' or '.ascending' to a\n"
5632 	"\t    sort field.  The 'size' parameter can be used to specify more\n"
5633 	"\t    or fewer than the default 2048 entries for the hashtable size.\n"
5634 	"\t    If a hist trigger is given a name using the 'name' parameter,\n"
5635 	"\t    its histogram data will be shared with other triggers of the\n"
5636 	"\t    same name, and trigger hits will update this common data.\n\n"
5637 	"\t    Reading the 'hist' file for the event will dump the hash\n"
5638 	"\t    table in its entirety to stdout.  If there are multiple hist\n"
5639 	"\t    triggers attached to an event, there will be a table for each\n"
5640 	"\t    trigger in the output.  The table displayed for a named\n"
5641 	"\t    trigger will be the same as any other instance having the\n"
5642 	"\t    same name.  The default format used to display a given field\n"
5643 	"\t    can be modified by appending any of the following modifiers\n"
5644 	"\t    to the field name, as applicable:\n\n"
5645 	"\t            .hex        display a number as a hex value\n"
5646 	"\t            .sym        display an address as a symbol\n"
5647 	"\t            .sym-offset display an address as a symbol and offset\n"
5648 	"\t            .execname   display a common_pid as a program name\n"
5649 	"\t            .syscall    display a syscall id as a syscall name\n"
5650 	"\t            .log2       display log2 value rather than raw number\n"
5651 	"\t            .usecs      display a common_timestamp in microseconds\n\n"
5652 	"\t    The 'pause' parameter can be used to pause an existing hist\n"
5653 	"\t    trigger or to start a hist trigger but not log any events\n"
5654 	"\t    until told to do so.  'continue' can be used to start or\n"
5655 	"\t    restart a paused hist trigger.\n\n"
5656 	"\t    The 'clear' parameter will clear the contents of a running\n"
5657 	"\t    hist trigger and leave its current paused/active state\n"
5658 	"\t    unchanged.\n\n"
5659 	"\t    The enable_hist and disable_hist triggers can be used to\n"
5660 	"\t    have one event conditionally start and stop another event's\n"
5661 	"\t    already-attached hist trigger.  The syntax is analogous to\n"
5662 	"\t    the enable_event and disable_event triggers.\n\n"
5663 	"\t    Hist trigger handlers and actions are executed whenever a\n"
5664 	"\t    a histogram entry is added or updated.  They take the form:\n\n"
5665 	"\t        <handler>.<action>\n\n"
5666 	"\t    The available handlers are:\n\n"
5667 	"\t        onmatch(matching.event)  - invoke on addition or update\n"
5668 	"\t        onmax(var)               - invoke if var exceeds current max\n"
5669 	"\t        onchange(var)            - invoke action if var changes\n\n"
5670 	"\t    The available actions are:\n\n"
5671 	"\t        trace(<synthetic_event>,param list)  - generate synthetic event\n"
5672 	"\t        save(field,...)                      - save current event fields\n"
5673 #ifdef CONFIG_TRACER_SNAPSHOT
5674 	"\t        snapshot()                           - snapshot the trace buffer\n\n"
5675 #endif
5676 #ifdef CONFIG_SYNTH_EVENTS
5677 	"  events/synthetic_events\t- Create/append/remove/show synthetic events\n"
5678 	"\t  Write into this file to define/undefine new synthetic events.\n"
5679 	"\t     example: echo 'myevent u64 lat; char name[]' >> synthetic_events\n"
5680 #endif
5681 #endif
5682 ;
5683 
5684 static ssize_t
5685 tracing_readme_read(struct file *filp, char __user *ubuf,
5686 		       size_t cnt, loff_t *ppos)
5687 {
5688 	return simple_read_from_buffer(ubuf, cnt, ppos,
5689 					readme_msg, strlen(readme_msg));
5690 }
5691 
5692 static const struct file_operations tracing_readme_fops = {
5693 	.open		= tracing_open_generic,
5694 	.read		= tracing_readme_read,
5695 	.llseek		= generic_file_llseek,
5696 };
5697 
5698 static void *saved_tgids_next(struct seq_file *m, void *v, loff_t *pos)
5699 {
5700 	int pid = ++(*pos);
5701 
5702 	return trace_find_tgid_ptr(pid);
5703 }
5704 
5705 static void *saved_tgids_start(struct seq_file *m, loff_t *pos)
5706 {
5707 	int pid = *pos;
5708 
5709 	return trace_find_tgid_ptr(pid);
5710 }
5711 
5712 static void saved_tgids_stop(struct seq_file *m, void *v)
5713 {
5714 }
5715 
5716 static int saved_tgids_show(struct seq_file *m, void *v)
5717 {
5718 	int *entry = (int *)v;
5719 	int pid = entry - tgid_map;
5720 	int tgid = *entry;
5721 
5722 	if (tgid == 0)
5723 		return SEQ_SKIP;
5724 
5725 	seq_printf(m, "%d %d\n", pid, tgid);
5726 	return 0;
5727 }
5728 
5729 static const struct seq_operations tracing_saved_tgids_seq_ops = {
5730 	.start		= saved_tgids_start,
5731 	.stop		= saved_tgids_stop,
5732 	.next		= saved_tgids_next,
5733 	.show		= saved_tgids_show,
5734 };
5735 
5736 static int tracing_saved_tgids_open(struct inode *inode, struct file *filp)
5737 {
5738 	int ret;
5739 
5740 	ret = tracing_check_open_get_tr(NULL);
5741 	if (ret)
5742 		return ret;
5743 
5744 	return seq_open(filp, &tracing_saved_tgids_seq_ops);
5745 }
5746 
5747 
5748 static const struct file_operations tracing_saved_tgids_fops = {
5749 	.open		= tracing_saved_tgids_open,
5750 	.read		= seq_read,
5751 	.llseek		= seq_lseek,
5752 	.release	= seq_release,
5753 };
5754 
5755 static void *saved_cmdlines_next(struct seq_file *m, void *v, loff_t *pos)
5756 {
5757 	unsigned int *ptr = v;
5758 
5759 	if (*pos || m->count)
5760 		ptr++;
5761 
5762 	(*pos)++;
5763 
5764 	for (; ptr < &savedcmd->map_cmdline_to_pid[savedcmd->cmdline_num];
5765 	     ptr++) {
5766 		if (*ptr == -1 || *ptr == NO_CMDLINE_MAP)
5767 			continue;
5768 
5769 		return ptr;
5770 	}
5771 
5772 	return NULL;
5773 }
5774 
5775 static void *saved_cmdlines_start(struct seq_file *m, loff_t *pos)
5776 {
5777 	void *v;
5778 	loff_t l = 0;
5779 
5780 	preempt_disable();
5781 	arch_spin_lock(&trace_cmdline_lock);
5782 
5783 	v = &savedcmd->map_cmdline_to_pid[0];
5784 	while (l <= *pos) {
5785 		v = saved_cmdlines_next(m, v, &l);
5786 		if (!v)
5787 			return NULL;
5788 	}
5789 
5790 	return v;
5791 }
5792 
5793 static void saved_cmdlines_stop(struct seq_file *m, void *v)
5794 {
5795 	arch_spin_unlock(&trace_cmdline_lock);
5796 	preempt_enable();
5797 }
5798 
5799 static int saved_cmdlines_show(struct seq_file *m, void *v)
5800 {
5801 	char buf[TASK_COMM_LEN];
5802 	unsigned int *pid = v;
5803 
5804 	__trace_find_cmdline(*pid, buf);
5805 	seq_printf(m, "%d %s\n", *pid, buf);
5806 	return 0;
5807 }
5808 
5809 static const struct seq_operations tracing_saved_cmdlines_seq_ops = {
5810 	.start		= saved_cmdlines_start,
5811 	.next		= saved_cmdlines_next,
5812 	.stop		= saved_cmdlines_stop,
5813 	.show		= saved_cmdlines_show,
5814 };
5815 
5816 static int tracing_saved_cmdlines_open(struct inode *inode, struct file *filp)
5817 {
5818 	int ret;
5819 
5820 	ret = tracing_check_open_get_tr(NULL);
5821 	if (ret)
5822 		return ret;
5823 
5824 	return seq_open(filp, &tracing_saved_cmdlines_seq_ops);
5825 }
5826 
5827 static const struct file_operations tracing_saved_cmdlines_fops = {
5828 	.open		= tracing_saved_cmdlines_open,
5829 	.read		= seq_read,
5830 	.llseek		= seq_lseek,
5831 	.release	= seq_release,
5832 };
5833 
5834 static ssize_t
5835 tracing_saved_cmdlines_size_read(struct file *filp, char __user *ubuf,
5836 				 size_t cnt, loff_t *ppos)
5837 {
5838 	char buf[64];
5839 	int r;
5840 
5841 	arch_spin_lock(&trace_cmdline_lock);
5842 	r = scnprintf(buf, sizeof(buf), "%u\n", savedcmd->cmdline_num);
5843 	arch_spin_unlock(&trace_cmdline_lock);
5844 
5845 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
5846 }
5847 
5848 static void free_saved_cmdlines_buffer(struct saved_cmdlines_buffer *s)
5849 {
5850 	kfree(s->saved_cmdlines);
5851 	kfree(s->map_cmdline_to_pid);
5852 	kfree(s);
5853 }
5854 
5855 static int tracing_resize_saved_cmdlines(unsigned int val)
5856 {
5857 	struct saved_cmdlines_buffer *s, *savedcmd_temp;
5858 
5859 	s = kmalloc(sizeof(*s), GFP_KERNEL);
5860 	if (!s)
5861 		return -ENOMEM;
5862 
5863 	if (allocate_cmdlines_buffer(val, s) < 0) {
5864 		kfree(s);
5865 		return -ENOMEM;
5866 	}
5867 
5868 	arch_spin_lock(&trace_cmdline_lock);
5869 	savedcmd_temp = savedcmd;
5870 	savedcmd = s;
5871 	arch_spin_unlock(&trace_cmdline_lock);
5872 	free_saved_cmdlines_buffer(savedcmd_temp);
5873 
5874 	return 0;
5875 }
5876 
5877 static ssize_t
5878 tracing_saved_cmdlines_size_write(struct file *filp, const char __user *ubuf,
5879 				  size_t cnt, loff_t *ppos)
5880 {
5881 	unsigned long val;
5882 	int ret;
5883 
5884 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
5885 	if (ret)
5886 		return ret;
5887 
5888 	/* must have at least 1 entry or less than PID_MAX_DEFAULT */
5889 	if (!val || val > PID_MAX_DEFAULT)
5890 		return -EINVAL;
5891 
5892 	ret = tracing_resize_saved_cmdlines((unsigned int)val);
5893 	if (ret < 0)
5894 		return ret;
5895 
5896 	*ppos += cnt;
5897 
5898 	return cnt;
5899 }
5900 
5901 static const struct file_operations tracing_saved_cmdlines_size_fops = {
5902 	.open		= tracing_open_generic,
5903 	.read		= tracing_saved_cmdlines_size_read,
5904 	.write		= tracing_saved_cmdlines_size_write,
5905 };
5906 
5907 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
5908 static union trace_eval_map_item *
5909 update_eval_map(union trace_eval_map_item *ptr)
5910 {
5911 	if (!ptr->map.eval_string) {
5912 		if (ptr->tail.next) {
5913 			ptr = ptr->tail.next;
5914 			/* Set ptr to the next real item (skip head) */
5915 			ptr++;
5916 		} else
5917 			return NULL;
5918 	}
5919 	return ptr;
5920 }
5921 
5922 static void *eval_map_next(struct seq_file *m, void *v, loff_t *pos)
5923 {
5924 	union trace_eval_map_item *ptr = v;
5925 
5926 	/*
5927 	 * Paranoid! If ptr points to end, we don't want to increment past it.
5928 	 * This really should never happen.
5929 	 */
5930 	(*pos)++;
5931 	ptr = update_eval_map(ptr);
5932 	if (WARN_ON_ONCE(!ptr))
5933 		return NULL;
5934 
5935 	ptr++;
5936 	ptr = update_eval_map(ptr);
5937 
5938 	return ptr;
5939 }
5940 
5941 static void *eval_map_start(struct seq_file *m, loff_t *pos)
5942 {
5943 	union trace_eval_map_item *v;
5944 	loff_t l = 0;
5945 
5946 	mutex_lock(&trace_eval_mutex);
5947 
5948 	v = trace_eval_maps;
5949 	if (v)
5950 		v++;
5951 
5952 	while (v && l < *pos) {
5953 		v = eval_map_next(m, v, &l);
5954 	}
5955 
5956 	return v;
5957 }
5958 
5959 static void eval_map_stop(struct seq_file *m, void *v)
5960 {
5961 	mutex_unlock(&trace_eval_mutex);
5962 }
5963 
5964 static int eval_map_show(struct seq_file *m, void *v)
5965 {
5966 	union trace_eval_map_item *ptr = v;
5967 
5968 	seq_printf(m, "%s %ld (%s)\n",
5969 		   ptr->map.eval_string, ptr->map.eval_value,
5970 		   ptr->map.system);
5971 
5972 	return 0;
5973 }
5974 
5975 static const struct seq_operations tracing_eval_map_seq_ops = {
5976 	.start		= eval_map_start,
5977 	.next		= eval_map_next,
5978 	.stop		= eval_map_stop,
5979 	.show		= eval_map_show,
5980 };
5981 
5982 static int tracing_eval_map_open(struct inode *inode, struct file *filp)
5983 {
5984 	int ret;
5985 
5986 	ret = tracing_check_open_get_tr(NULL);
5987 	if (ret)
5988 		return ret;
5989 
5990 	return seq_open(filp, &tracing_eval_map_seq_ops);
5991 }
5992 
5993 static const struct file_operations tracing_eval_map_fops = {
5994 	.open		= tracing_eval_map_open,
5995 	.read		= seq_read,
5996 	.llseek		= seq_lseek,
5997 	.release	= seq_release,
5998 };
5999 
6000 static inline union trace_eval_map_item *
6001 trace_eval_jmp_to_tail(union trace_eval_map_item *ptr)
6002 {
6003 	/* Return tail of array given the head */
6004 	return ptr + ptr->head.length + 1;
6005 }
6006 
6007 static void
6008 trace_insert_eval_map_file(struct module *mod, struct trace_eval_map **start,
6009 			   int len)
6010 {
6011 	struct trace_eval_map **stop;
6012 	struct trace_eval_map **map;
6013 	union trace_eval_map_item *map_array;
6014 	union trace_eval_map_item *ptr;
6015 
6016 	stop = start + len;
6017 
6018 	/*
6019 	 * The trace_eval_maps contains the map plus a head and tail item,
6020 	 * where the head holds the module and length of array, and the
6021 	 * tail holds a pointer to the next list.
6022 	 */
6023 	map_array = kmalloc_array(len + 2, sizeof(*map_array), GFP_KERNEL);
6024 	if (!map_array) {
6025 		pr_warn("Unable to allocate trace eval mapping\n");
6026 		return;
6027 	}
6028 
6029 	mutex_lock(&trace_eval_mutex);
6030 
6031 	if (!trace_eval_maps)
6032 		trace_eval_maps = map_array;
6033 	else {
6034 		ptr = trace_eval_maps;
6035 		for (;;) {
6036 			ptr = trace_eval_jmp_to_tail(ptr);
6037 			if (!ptr->tail.next)
6038 				break;
6039 			ptr = ptr->tail.next;
6040 
6041 		}
6042 		ptr->tail.next = map_array;
6043 	}
6044 	map_array->head.mod = mod;
6045 	map_array->head.length = len;
6046 	map_array++;
6047 
6048 	for (map = start; (unsigned long)map < (unsigned long)stop; map++) {
6049 		map_array->map = **map;
6050 		map_array++;
6051 	}
6052 	memset(map_array, 0, sizeof(*map_array));
6053 
6054 	mutex_unlock(&trace_eval_mutex);
6055 }
6056 
6057 static void trace_create_eval_file(struct dentry *d_tracer)
6058 {
6059 	trace_create_file("eval_map", 0444, d_tracer,
6060 			  NULL, &tracing_eval_map_fops);
6061 }
6062 
6063 #else /* CONFIG_TRACE_EVAL_MAP_FILE */
6064 static inline void trace_create_eval_file(struct dentry *d_tracer) { }
6065 static inline void trace_insert_eval_map_file(struct module *mod,
6066 			      struct trace_eval_map **start, int len) { }
6067 #endif /* !CONFIG_TRACE_EVAL_MAP_FILE */
6068 
6069 static void trace_insert_eval_map(struct module *mod,
6070 				  struct trace_eval_map **start, int len)
6071 {
6072 	struct trace_eval_map **map;
6073 
6074 	if (len <= 0)
6075 		return;
6076 
6077 	map = start;
6078 
6079 	trace_event_eval_update(map, len);
6080 
6081 	trace_insert_eval_map_file(mod, start, len);
6082 }
6083 
6084 static ssize_t
6085 tracing_set_trace_read(struct file *filp, char __user *ubuf,
6086 		       size_t cnt, loff_t *ppos)
6087 {
6088 	struct trace_array *tr = filp->private_data;
6089 	char buf[MAX_TRACER_SIZE+2];
6090 	int r;
6091 
6092 	mutex_lock(&trace_types_lock);
6093 	r = sprintf(buf, "%s\n", tr->current_trace->name);
6094 	mutex_unlock(&trace_types_lock);
6095 
6096 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6097 }
6098 
6099 int tracer_init(struct tracer *t, struct trace_array *tr)
6100 {
6101 	tracing_reset_online_cpus(&tr->array_buffer);
6102 	return t->init(tr);
6103 }
6104 
6105 static void set_buffer_entries(struct array_buffer *buf, unsigned long val)
6106 {
6107 	int cpu;
6108 
6109 	for_each_tracing_cpu(cpu)
6110 		per_cpu_ptr(buf->data, cpu)->entries = val;
6111 }
6112 
6113 #ifdef CONFIG_TRACER_MAX_TRACE
6114 /* resize @tr's buffer to the size of @size_tr's entries */
6115 static int resize_buffer_duplicate_size(struct array_buffer *trace_buf,
6116 					struct array_buffer *size_buf, int cpu_id)
6117 {
6118 	int cpu, ret = 0;
6119 
6120 	if (cpu_id == RING_BUFFER_ALL_CPUS) {
6121 		for_each_tracing_cpu(cpu) {
6122 			ret = ring_buffer_resize(trace_buf->buffer,
6123 				 per_cpu_ptr(size_buf->data, cpu)->entries, cpu);
6124 			if (ret < 0)
6125 				break;
6126 			per_cpu_ptr(trace_buf->data, cpu)->entries =
6127 				per_cpu_ptr(size_buf->data, cpu)->entries;
6128 		}
6129 	} else {
6130 		ret = ring_buffer_resize(trace_buf->buffer,
6131 				 per_cpu_ptr(size_buf->data, cpu_id)->entries, cpu_id);
6132 		if (ret == 0)
6133 			per_cpu_ptr(trace_buf->data, cpu_id)->entries =
6134 				per_cpu_ptr(size_buf->data, cpu_id)->entries;
6135 	}
6136 
6137 	return ret;
6138 }
6139 #endif /* CONFIG_TRACER_MAX_TRACE */
6140 
6141 static int __tracing_resize_ring_buffer(struct trace_array *tr,
6142 					unsigned long size, int cpu)
6143 {
6144 	int ret;
6145 
6146 	/*
6147 	 * If kernel or user changes the size of the ring buffer
6148 	 * we use the size that was given, and we can forget about
6149 	 * expanding it later.
6150 	 */
6151 	ring_buffer_expanded = true;
6152 
6153 	/* May be called before buffers are initialized */
6154 	if (!tr->array_buffer.buffer)
6155 		return 0;
6156 
6157 	ret = ring_buffer_resize(tr->array_buffer.buffer, size, cpu);
6158 	if (ret < 0)
6159 		return ret;
6160 
6161 #ifdef CONFIG_TRACER_MAX_TRACE
6162 	if (!(tr->flags & TRACE_ARRAY_FL_GLOBAL) ||
6163 	    !tr->current_trace->use_max_tr)
6164 		goto out;
6165 
6166 	ret = ring_buffer_resize(tr->max_buffer.buffer, size, cpu);
6167 	if (ret < 0) {
6168 		int r = resize_buffer_duplicate_size(&tr->array_buffer,
6169 						     &tr->array_buffer, cpu);
6170 		if (r < 0) {
6171 			/*
6172 			 * AARGH! We are left with different
6173 			 * size max buffer!!!!
6174 			 * The max buffer is our "snapshot" buffer.
6175 			 * When a tracer needs a snapshot (one of the
6176 			 * latency tracers), it swaps the max buffer
6177 			 * with the saved snap shot. We succeeded to
6178 			 * update the size of the main buffer, but failed to
6179 			 * update the size of the max buffer. But when we tried
6180 			 * to reset the main buffer to the original size, we
6181 			 * failed there too. This is very unlikely to
6182 			 * happen, but if it does, warn and kill all
6183 			 * tracing.
6184 			 */
6185 			WARN_ON(1);
6186 			tracing_disabled = 1;
6187 		}
6188 		return ret;
6189 	}
6190 
6191 	if (cpu == RING_BUFFER_ALL_CPUS)
6192 		set_buffer_entries(&tr->max_buffer, size);
6193 	else
6194 		per_cpu_ptr(tr->max_buffer.data, cpu)->entries = size;
6195 
6196  out:
6197 #endif /* CONFIG_TRACER_MAX_TRACE */
6198 
6199 	if (cpu == RING_BUFFER_ALL_CPUS)
6200 		set_buffer_entries(&tr->array_buffer, size);
6201 	else
6202 		per_cpu_ptr(tr->array_buffer.data, cpu)->entries = size;
6203 
6204 	return ret;
6205 }
6206 
6207 ssize_t tracing_resize_ring_buffer(struct trace_array *tr,
6208 				  unsigned long size, int cpu_id)
6209 {
6210 	int ret;
6211 
6212 	mutex_lock(&trace_types_lock);
6213 
6214 	if (cpu_id != RING_BUFFER_ALL_CPUS) {
6215 		/* make sure, this cpu is enabled in the mask */
6216 		if (!cpumask_test_cpu(cpu_id, tracing_buffer_mask)) {
6217 			ret = -EINVAL;
6218 			goto out;
6219 		}
6220 	}
6221 
6222 	ret = __tracing_resize_ring_buffer(tr, size, cpu_id);
6223 	if (ret < 0)
6224 		ret = -ENOMEM;
6225 
6226 out:
6227 	mutex_unlock(&trace_types_lock);
6228 
6229 	return ret;
6230 }
6231 
6232 
6233 /**
6234  * tracing_update_buffers - used by tracing facility to expand ring buffers
6235  *
6236  * To save on memory when the tracing is never used on a system with it
6237  * configured in. The ring buffers are set to a minimum size. But once
6238  * a user starts to use the tracing facility, then they need to grow
6239  * to their default size.
6240  *
6241  * This function is to be called when a tracer is about to be used.
6242  */
6243 int tracing_update_buffers(void)
6244 {
6245 	int ret = 0;
6246 
6247 	mutex_lock(&trace_types_lock);
6248 	if (!ring_buffer_expanded)
6249 		ret = __tracing_resize_ring_buffer(&global_trace, trace_buf_size,
6250 						RING_BUFFER_ALL_CPUS);
6251 	mutex_unlock(&trace_types_lock);
6252 
6253 	return ret;
6254 }
6255 
6256 struct trace_option_dentry;
6257 
6258 static void
6259 create_trace_option_files(struct trace_array *tr, struct tracer *tracer);
6260 
6261 /*
6262  * Used to clear out the tracer before deletion of an instance.
6263  * Must have trace_types_lock held.
6264  */
6265 static void tracing_set_nop(struct trace_array *tr)
6266 {
6267 	if (tr->current_trace == &nop_trace)
6268 		return;
6269 
6270 	tr->current_trace->enabled--;
6271 
6272 	if (tr->current_trace->reset)
6273 		tr->current_trace->reset(tr);
6274 
6275 	tr->current_trace = &nop_trace;
6276 }
6277 
6278 static void add_tracer_options(struct trace_array *tr, struct tracer *t)
6279 {
6280 	/* Only enable if the directory has been created already. */
6281 	if (!tr->dir)
6282 		return;
6283 
6284 	create_trace_option_files(tr, t);
6285 }
6286 
6287 int tracing_set_tracer(struct trace_array *tr, const char *buf)
6288 {
6289 	struct tracer *t;
6290 #ifdef CONFIG_TRACER_MAX_TRACE
6291 	bool had_max_tr;
6292 #endif
6293 	int ret = 0;
6294 
6295 	mutex_lock(&trace_types_lock);
6296 
6297 	if (!ring_buffer_expanded) {
6298 		ret = __tracing_resize_ring_buffer(tr, trace_buf_size,
6299 						RING_BUFFER_ALL_CPUS);
6300 		if (ret < 0)
6301 			goto out;
6302 		ret = 0;
6303 	}
6304 
6305 	for (t = trace_types; t; t = t->next) {
6306 		if (strcmp(t->name, buf) == 0)
6307 			break;
6308 	}
6309 	if (!t) {
6310 		ret = -EINVAL;
6311 		goto out;
6312 	}
6313 	if (t == tr->current_trace)
6314 		goto out;
6315 
6316 #ifdef CONFIG_TRACER_SNAPSHOT
6317 	if (t->use_max_tr) {
6318 		arch_spin_lock(&tr->max_lock);
6319 		if (tr->cond_snapshot)
6320 			ret = -EBUSY;
6321 		arch_spin_unlock(&tr->max_lock);
6322 		if (ret)
6323 			goto out;
6324 	}
6325 #endif
6326 	/* Some tracers won't work on kernel command line */
6327 	if (system_state < SYSTEM_RUNNING && t->noboot) {
6328 		pr_warn("Tracer '%s' is not allowed on command line, ignored\n",
6329 			t->name);
6330 		goto out;
6331 	}
6332 
6333 	/* Some tracers are only allowed for the top level buffer */
6334 	if (!trace_ok_for_array(t, tr)) {
6335 		ret = -EINVAL;
6336 		goto out;
6337 	}
6338 
6339 	/* If trace pipe files are being read, we can't change the tracer */
6340 	if (tr->trace_ref) {
6341 		ret = -EBUSY;
6342 		goto out;
6343 	}
6344 
6345 	trace_branch_disable();
6346 
6347 	tr->current_trace->enabled--;
6348 
6349 	if (tr->current_trace->reset)
6350 		tr->current_trace->reset(tr);
6351 
6352 	/* Current trace needs to be nop_trace before synchronize_rcu */
6353 	tr->current_trace = &nop_trace;
6354 
6355 #ifdef CONFIG_TRACER_MAX_TRACE
6356 	had_max_tr = tr->allocated_snapshot;
6357 
6358 	if (had_max_tr && !t->use_max_tr) {
6359 		/*
6360 		 * We need to make sure that the update_max_tr sees that
6361 		 * current_trace changed to nop_trace to keep it from
6362 		 * swapping the buffers after we resize it.
6363 		 * The update_max_tr is called from interrupts disabled
6364 		 * so a synchronized_sched() is sufficient.
6365 		 */
6366 		synchronize_rcu();
6367 		free_snapshot(tr);
6368 	}
6369 #endif
6370 
6371 #ifdef CONFIG_TRACER_MAX_TRACE
6372 	if (t->use_max_tr && !had_max_tr) {
6373 		ret = tracing_alloc_snapshot_instance(tr);
6374 		if (ret < 0)
6375 			goto out;
6376 	}
6377 #endif
6378 
6379 	if (t->init) {
6380 		ret = tracer_init(t, tr);
6381 		if (ret)
6382 			goto out;
6383 	}
6384 
6385 	tr->current_trace = t;
6386 	tr->current_trace->enabled++;
6387 	trace_branch_enable(tr);
6388  out:
6389 	mutex_unlock(&trace_types_lock);
6390 
6391 	return ret;
6392 }
6393 
6394 static ssize_t
6395 tracing_set_trace_write(struct file *filp, const char __user *ubuf,
6396 			size_t cnt, loff_t *ppos)
6397 {
6398 	struct trace_array *tr = filp->private_data;
6399 	char buf[MAX_TRACER_SIZE+1];
6400 	int i;
6401 	size_t ret;
6402 	int err;
6403 
6404 	ret = cnt;
6405 
6406 	if (cnt > MAX_TRACER_SIZE)
6407 		cnt = MAX_TRACER_SIZE;
6408 
6409 	if (copy_from_user(buf, ubuf, cnt))
6410 		return -EFAULT;
6411 
6412 	buf[cnt] = 0;
6413 
6414 	/* strip ending whitespace. */
6415 	for (i = cnt - 1; i > 0 && isspace(buf[i]); i--)
6416 		buf[i] = 0;
6417 
6418 	err = tracing_set_tracer(tr, buf);
6419 	if (err)
6420 		return err;
6421 
6422 	*ppos += ret;
6423 
6424 	return ret;
6425 }
6426 
6427 static ssize_t
6428 tracing_nsecs_read(unsigned long *ptr, char __user *ubuf,
6429 		   size_t cnt, loff_t *ppos)
6430 {
6431 	char buf[64];
6432 	int r;
6433 
6434 	r = snprintf(buf, sizeof(buf), "%ld\n",
6435 		     *ptr == (unsigned long)-1 ? -1 : nsecs_to_usecs(*ptr));
6436 	if (r > sizeof(buf))
6437 		r = sizeof(buf);
6438 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6439 }
6440 
6441 static ssize_t
6442 tracing_nsecs_write(unsigned long *ptr, const char __user *ubuf,
6443 		    size_t cnt, loff_t *ppos)
6444 {
6445 	unsigned long val;
6446 	int ret;
6447 
6448 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
6449 	if (ret)
6450 		return ret;
6451 
6452 	*ptr = val * 1000;
6453 
6454 	return cnt;
6455 }
6456 
6457 static ssize_t
6458 tracing_thresh_read(struct file *filp, char __user *ubuf,
6459 		    size_t cnt, loff_t *ppos)
6460 {
6461 	return tracing_nsecs_read(&tracing_thresh, ubuf, cnt, ppos);
6462 }
6463 
6464 static ssize_t
6465 tracing_thresh_write(struct file *filp, const char __user *ubuf,
6466 		     size_t cnt, loff_t *ppos)
6467 {
6468 	struct trace_array *tr = filp->private_data;
6469 	int ret;
6470 
6471 	mutex_lock(&trace_types_lock);
6472 	ret = tracing_nsecs_write(&tracing_thresh, ubuf, cnt, ppos);
6473 	if (ret < 0)
6474 		goto out;
6475 
6476 	if (tr->current_trace->update_thresh) {
6477 		ret = tr->current_trace->update_thresh(tr);
6478 		if (ret < 0)
6479 			goto out;
6480 	}
6481 
6482 	ret = cnt;
6483 out:
6484 	mutex_unlock(&trace_types_lock);
6485 
6486 	return ret;
6487 }
6488 
6489 #if defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)
6490 
6491 static ssize_t
6492 tracing_max_lat_read(struct file *filp, char __user *ubuf,
6493 		     size_t cnt, loff_t *ppos)
6494 {
6495 	return tracing_nsecs_read(filp->private_data, ubuf, cnt, ppos);
6496 }
6497 
6498 static ssize_t
6499 tracing_max_lat_write(struct file *filp, const char __user *ubuf,
6500 		      size_t cnt, loff_t *ppos)
6501 {
6502 	return tracing_nsecs_write(filp->private_data, ubuf, cnt, ppos);
6503 }
6504 
6505 #endif
6506 
6507 static int tracing_open_pipe(struct inode *inode, struct file *filp)
6508 {
6509 	struct trace_array *tr = inode->i_private;
6510 	struct trace_iterator *iter;
6511 	int ret;
6512 
6513 	ret = tracing_check_open_get_tr(tr);
6514 	if (ret)
6515 		return ret;
6516 
6517 	mutex_lock(&trace_types_lock);
6518 
6519 	/* create a buffer to store the information to pass to userspace */
6520 	iter = kzalloc(sizeof(*iter), GFP_KERNEL);
6521 	if (!iter) {
6522 		ret = -ENOMEM;
6523 		__trace_array_put(tr);
6524 		goto out;
6525 	}
6526 
6527 	trace_seq_init(&iter->seq);
6528 	iter->trace = tr->current_trace;
6529 
6530 	if (!alloc_cpumask_var(&iter->started, GFP_KERNEL)) {
6531 		ret = -ENOMEM;
6532 		goto fail;
6533 	}
6534 
6535 	/* trace pipe does not show start of buffer */
6536 	cpumask_setall(iter->started);
6537 
6538 	if (tr->trace_flags & TRACE_ITER_LATENCY_FMT)
6539 		iter->iter_flags |= TRACE_FILE_LAT_FMT;
6540 
6541 	/* Output in nanoseconds only if we are using a clock in nanoseconds. */
6542 	if (trace_clocks[tr->clock_id].in_ns)
6543 		iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
6544 
6545 	iter->tr = tr;
6546 	iter->array_buffer = &tr->array_buffer;
6547 	iter->cpu_file = tracing_get_cpu(inode);
6548 	mutex_init(&iter->mutex);
6549 	filp->private_data = iter;
6550 
6551 	if (iter->trace->pipe_open)
6552 		iter->trace->pipe_open(iter);
6553 
6554 	nonseekable_open(inode, filp);
6555 
6556 	tr->trace_ref++;
6557 out:
6558 	mutex_unlock(&trace_types_lock);
6559 	return ret;
6560 
6561 fail:
6562 	kfree(iter);
6563 	__trace_array_put(tr);
6564 	mutex_unlock(&trace_types_lock);
6565 	return ret;
6566 }
6567 
6568 static int tracing_release_pipe(struct inode *inode, struct file *file)
6569 {
6570 	struct trace_iterator *iter = file->private_data;
6571 	struct trace_array *tr = inode->i_private;
6572 
6573 	mutex_lock(&trace_types_lock);
6574 
6575 	tr->trace_ref--;
6576 
6577 	if (iter->trace->pipe_close)
6578 		iter->trace->pipe_close(iter);
6579 
6580 	mutex_unlock(&trace_types_lock);
6581 
6582 	free_cpumask_var(iter->started);
6583 	mutex_destroy(&iter->mutex);
6584 	kfree(iter);
6585 
6586 	trace_array_put(tr);
6587 
6588 	return 0;
6589 }
6590 
6591 static __poll_t
6592 trace_poll(struct trace_iterator *iter, struct file *filp, poll_table *poll_table)
6593 {
6594 	struct trace_array *tr = iter->tr;
6595 
6596 	/* Iterators are static, they should be filled or empty */
6597 	if (trace_buffer_iter(iter, iter->cpu_file))
6598 		return EPOLLIN | EPOLLRDNORM;
6599 
6600 	if (tr->trace_flags & TRACE_ITER_BLOCK)
6601 		/*
6602 		 * Always select as readable when in blocking mode
6603 		 */
6604 		return EPOLLIN | EPOLLRDNORM;
6605 	else
6606 		return ring_buffer_poll_wait(iter->array_buffer->buffer, iter->cpu_file,
6607 					     filp, poll_table);
6608 }
6609 
6610 static __poll_t
6611 tracing_poll_pipe(struct file *filp, poll_table *poll_table)
6612 {
6613 	struct trace_iterator *iter = filp->private_data;
6614 
6615 	return trace_poll(iter, filp, poll_table);
6616 }
6617 
6618 /* Must be called with iter->mutex held. */
6619 static int tracing_wait_pipe(struct file *filp)
6620 {
6621 	struct trace_iterator *iter = filp->private_data;
6622 	int ret;
6623 
6624 	while (trace_empty(iter)) {
6625 
6626 		if ((filp->f_flags & O_NONBLOCK)) {
6627 			return -EAGAIN;
6628 		}
6629 
6630 		/*
6631 		 * We block until we read something and tracing is disabled.
6632 		 * We still block if tracing is disabled, but we have never
6633 		 * read anything. This allows a user to cat this file, and
6634 		 * then enable tracing. But after we have read something,
6635 		 * we give an EOF when tracing is again disabled.
6636 		 *
6637 		 * iter->pos will be 0 if we haven't read anything.
6638 		 */
6639 		if (!tracer_tracing_is_on(iter->tr) && iter->pos)
6640 			break;
6641 
6642 		mutex_unlock(&iter->mutex);
6643 
6644 		ret = wait_on_pipe(iter, 0);
6645 
6646 		mutex_lock(&iter->mutex);
6647 
6648 		if (ret)
6649 			return ret;
6650 	}
6651 
6652 	return 1;
6653 }
6654 
6655 /*
6656  * Consumer reader.
6657  */
6658 static ssize_t
6659 tracing_read_pipe(struct file *filp, char __user *ubuf,
6660 		  size_t cnt, loff_t *ppos)
6661 {
6662 	struct trace_iterator *iter = filp->private_data;
6663 	ssize_t sret;
6664 
6665 	/*
6666 	 * Avoid more than one consumer on a single file descriptor
6667 	 * This is just a matter of traces coherency, the ring buffer itself
6668 	 * is protected.
6669 	 */
6670 	mutex_lock(&iter->mutex);
6671 
6672 	/* return any leftover data */
6673 	sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
6674 	if (sret != -EBUSY)
6675 		goto out;
6676 
6677 	trace_seq_init(&iter->seq);
6678 
6679 	if (iter->trace->read) {
6680 		sret = iter->trace->read(iter, filp, ubuf, cnt, ppos);
6681 		if (sret)
6682 			goto out;
6683 	}
6684 
6685 waitagain:
6686 	sret = tracing_wait_pipe(filp);
6687 	if (sret <= 0)
6688 		goto out;
6689 
6690 	/* stop when tracing is finished */
6691 	if (trace_empty(iter)) {
6692 		sret = 0;
6693 		goto out;
6694 	}
6695 
6696 	if (cnt >= PAGE_SIZE)
6697 		cnt = PAGE_SIZE - 1;
6698 
6699 	/* reset all but tr, trace, and overruns */
6700 	memset(&iter->seq, 0,
6701 	       sizeof(struct trace_iterator) -
6702 	       offsetof(struct trace_iterator, seq));
6703 	cpumask_clear(iter->started);
6704 	trace_seq_init(&iter->seq);
6705 	iter->pos = -1;
6706 
6707 	trace_event_read_lock();
6708 	trace_access_lock(iter->cpu_file);
6709 	while (trace_find_next_entry_inc(iter) != NULL) {
6710 		enum print_line_t ret;
6711 		int save_len = iter->seq.seq.len;
6712 
6713 		ret = print_trace_line(iter);
6714 		if (ret == TRACE_TYPE_PARTIAL_LINE) {
6715 			/* don't print partial lines */
6716 			iter->seq.seq.len = save_len;
6717 			break;
6718 		}
6719 		if (ret != TRACE_TYPE_NO_CONSUME)
6720 			trace_consume(iter);
6721 
6722 		if (trace_seq_used(&iter->seq) >= cnt)
6723 			break;
6724 
6725 		/*
6726 		 * Setting the full flag means we reached the trace_seq buffer
6727 		 * size and we should leave by partial output condition above.
6728 		 * One of the trace_seq_* functions is not used properly.
6729 		 */
6730 		WARN_ONCE(iter->seq.full, "full flag set for trace type %d",
6731 			  iter->ent->type);
6732 	}
6733 	trace_access_unlock(iter->cpu_file);
6734 	trace_event_read_unlock();
6735 
6736 	/* Now copy what we have to the user */
6737 	sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
6738 	if (iter->seq.seq.readpos >= trace_seq_used(&iter->seq))
6739 		trace_seq_init(&iter->seq);
6740 
6741 	/*
6742 	 * If there was nothing to send to user, in spite of consuming trace
6743 	 * entries, go back to wait for more entries.
6744 	 */
6745 	if (sret == -EBUSY)
6746 		goto waitagain;
6747 
6748 out:
6749 	mutex_unlock(&iter->mutex);
6750 
6751 	return sret;
6752 }
6753 
6754 static void tracing_spd_release_pipe(struct splice_pipe_desc *spd,
6755 				     unsigned int idx)
6756 {
6757 	__free_page(spd->pages[idx]);
6758 }
6759 
6760 static size_t
6761 tracing_fill_pipe_page(size_t rem, struct trace_iterator *iter)
6762 {
6763 	size_t count;
6764 	int save_len;
6765 	int ret;
6766 
6767 	/* Seq buffer is page-sized, exactly what we need. */
6768 	for (;;) {
6769 		save_len = iter->seq.seq.len;
6770 		ret = print_trace_line(iter);
6771 
6772 		if (trace_seq_has_overflowed(&iter->seq)) {
6773 			iter->seq.seq.len = save_len;
6774 			break;
6775 		}
6776 
6777 		/*
6778 		 * This should not be hit, because it should only
6779 		 * be set if the iter->seq overflowed. But check it
6780 		 * anyway to be safe.
6781 		 */
6782 		if (ret == TRACE_TYPE_PARTIAL_LINE) {
6783 			iter->seq.seq.len = save_len;
6784 			break;
6785 		}
6786 
6787 		count = trace_seq_used(&iter->seq) - save_len;
6788 		if (rem < count) {
6789 			rem = 0;
6790 			iter->seq.seq.len = save_len;
6791 			break;
6792 		}
6793 
6794 		if (ret != TRACE_TYPE_NO_CONSUME)
6795 			trace_consume(iter);
6796 		rem -= count;
6797 		if (!trace_find_next_entry_inc(iter))	{
6798 			rem = 0;
6799 			iter->ent = NULL;
6800 			break;
6801 		}
6802 	}
6803 
6804 	return rem;
6805 }
6806 
6807 static ssize_t tracing_splice_read_pipe(struct file *filp,
6808 					loff_t *ppos,
6809 					struct pipe_inode_info *pipe,
6810 					size_t len,
6811 					unsigned int flags)
6812 {
6813 	struct page *pages_def[PIPE_DEF_BUFFERS];
6814 	struct partial_page partial_def[PIPE_DEF_BUFFERS];
6815 	struct trace_iterator *iter = filp->private_data;
6816 	struct splice_pipe_desc spd = {
6817 		.pages		= pages_def,
6818 		.partial	= partial_def,
6819 		.nr_pages	= 0, /* This gets updated below. */
6820 		.nr_pages_max	= PIPE_DEF_BUFFERS,
6821 		.ops		= &default_pipe_buf_ops,
6822 		.spd_release	= tracing_spd_release_pipe,
6823 	};
6824 	ssize_t ret;
6825 	size_t rem;
6826 	unsigned int i;
6827 
6828 	if (splice_grow_spd(pipe, &spd))
6829 		return -ENOMEM;
6830 
6831 	mutex_lock(&iter->mutex);
6832 
6833 	if (iter->trace->splice_read) {
6834 		ret = iter->trace->splice_read(iter, filp,
6835 					       ppos, pipe, len, flags);
6836 		if (ret)
6837 			goto out_err;
6838 	}
6839 
6840 	ret = tracing_wait_pipe(filp);
6841 	if (ret <= 0)
6842 		goto out_err;
6843 
6844 	if (!iter->ent && !trace_find_next_entry_inc(iter)) {
6845 		ret = -EFAULT;
6846 		goto out_err;
6847 	}
6848 
6849 	trace_event_read_lock();
6850 	trace_access_lock(iter->cpu_file);
6851 
6852 	/* Fill as many pages as possible. */
6853 	for (i = 0, rem = len; i < spd.nr_pages_max && rem; i++) {
6854 		spd.pages[i] = alloc_page(GFP_KERNEL);
6855 		if (!spd.pages[i])
6856 			break;
6857 
6858 		rem = tracing_fill_pipe_page(rem, iter);
6859 
6860 		/* Copy the data into the page, so we can start over. */
6861 		ret = trace_seq_to_buffer(&iter->seq,
6862 					  page_address(spd.pages[i]),
6863 					  trace_seq_used(&iter->seq));
6864 		if (ret < 0) {
6865 			__free_page(spd.pages[i]);
6866 			break;
6867 		}
6868 		spd.partial[i].offset = 0;
6869 		spd.partial[i].len = trace_seq_used(&iter->seq);
6870 
6871 		trace_seq_init(&iter->seq);
6872 	}
6873 
6874 	trace_access_unlock(iter->cpu_file);
6875 	trace_event_read_unlock();
6876 	mutex_unlock(&iter->mutex);
6877 
6878 	spd.nr_pages = i;
6879 
6880 	if (i)
6881 		ret = splice_to_pipe(pipe, &spd);
6882 	else
6883 		ret = 0;
6884 out:
6885 	splice_shrink_spd(&spd);
6886 	return ret;
6887 
6888 out_err:
6889 	mutex_unlock(&iter->mutex);
6890 	goto out;
6891 }
6892 
6893 static ssize_t
6894 tracing_entries_read(struct file *filp, char __user *ubuf,
6895 		     size_t cnt, loff_t *ppos)
6896 {
6897 	struct inode *inode = file_inode(filp);
6898 	struct trace_array *tr = inode->i_private;
6899 	int cpu = tracing_get_cpu(inode);
6900 	char buf[64];
6901 	int r = 0;
6902 	ssize_t ret;
6903 
6904 	mutex_lock(&trace_types_lock);
6905 
6906 	if (cpu == RING_BUFFER_ALL_CPUS) {
6907 		int cpu, buf_size_same;
6908 		unsigned long size;
6909 
6910 		size = 0;
6911 		buf_size_same = 1;
6912 		/* check if all cpu sizes are same */
6913 		for_each_tracing_cpu(cpu) {
6914 			/* fill in the size from first enabled cpu */
6915 			if (size == 0)
6916 				size = per_cpu_ptr(tr->array_buffer.data, cpu)->entries;
6917 			if (size != per_cpu_ptr(tr->array_buffer.data, cpu)->entries) {
6918 				buf_size_same = 0;
6919 				break;
6920 			}
6921 		}
6922 
6923 		if (buf_size_same) {
6924 			if (!ring_buffer_expanded)
6925 				r = sprintf(buf, "%lu (expanded: %lu)\n",
6926 					    size >> 10,
6927 					    trace_buf_size >> 10);
6928 			else
6929 				r = sprintf(buf, "%lu\n", size >> 10);
6930 		} else
6931 			r = sprintf(buf, "X\n");
6932 	} else
6933 		r = sprintf(buf, "%lu\n", per_cpu_ptr(tr->array_buffer.data, cpu)->entries >> 10);
6934 
6935 	mutex_unlock(&trace_types_lock);
6936 
6937 	ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6938 	return ret;
6939 }
6940 
6941 static ssize_t
6942 tracing_entries_write(struct file *filp, const char __user *ubuf,
6943 		      size_t cnt, loff_t *ppos)
6944 {
6945 	struct inode *inode = file_inode(filp);
6946 	struct trace_array *tr = inode->i_private;
6947 	unsigned long val;
6948 	int ret;
6949 
6950 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
6951 	if (ret)
6952 		return ret;
6953 
6954 	/* must have at least 1 entry */
6955 	if (!val)
6956 		return -EINVAL;
6957 
6958 	/* value is in KB */
6959 	val <<= 10;
6960 	ret = tracing_resize_ring_buffer(tr, val, tracing_get_cpu(inode));
6961 	if (ret < 0)
6962 		return ret;
6963 
6964 	*ppos += cnt;
6965 
6966 	return cnt;
6967 }
6968 
6969 static ssize_t
6970 tracing_total_entries_read(struct file *filp, char __user *ubuf,
6971 				size_t cnt, loff_t *ppos)
6972 {
6973 	struct trace_array *tr = filp->private_data;
6974 	char buf[64];
6975 	int r, cpu;
6976 	unsigned long size = 0, expanded_size = 0;
6977 
6978 	mutex_lock(&trace_types_lock);
6979 	for_each_tracing_cpu(cpu) {
6980 		size += per_cpu_ptr(tr->array_buffer.data, cpu)->entries >> 10;
6981 		if (!ring_buffer_expanded)
6982 			expanded_size += trace_buf_size >> 10;
6983 	}
6984 	if (ring_buffer_expanded)
6985 		r = sprintf(buf, "%lu\n", size);
6986 	else
6987 		r = sprintf(buf, "%lu (expanded: %lu)\n", size, expanded_size);
6988 	mutex_unlock(&trace_types_lock);
6989 
6990 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6991 }
6992 
6993 static ssize_t
6994 tracing_free_buffer_write(struct file *filp, const char __user *ubuf,
6995 			  size_t cnt, loff_t *ppos)
6996 {
6997 	/*
6998 	 * There is no need to read what the user has written, this function
6999 	 * is just to make sure that there is no error when "echo" is used
7000 	 */
7001 
7002 	*ppos += cnt;
7003 
7004 	return cnt;
7005 }
7006 
7007 static int
7008 tracing_free_buffer_release(struct inode *inode, struct file *filp)
7009 {
7010 	struct trace_array *tr = inode->i_private;
7011 
7012 	/* disable tracing ? */
7013 	if (tr->trace_flags & TRACE_ITER_STOP_ON_FREE)
7014 		tracer_tracing_off(tr);
7015 	/* resize the ring buffer to 0 */
7016 	tracing_resize_ring_buffer(tr, 0, RING_BUFFER_ALL_CPUS);
7017 
7018 	trace_array_put(tr);
7019 
7020 	return 0;
7021 }
7022 
7023 static ssize_t
7024 tracing_mark_write(struct file *filp, const char __user *ubuf,
7025 					size_t cnt, loff_t *fpos)
7026 {
7027 	struct trace_array *tr = filp->private_data;
7028 	struct ring_buffer_event *event;
7029 	enum event_trigger_type tt = ETT_NONE;
7030 	struct trace_buffer *buffer;
7031 	struct print_entry *entry;
7032 	ssize_t written;
7033 	int size;
7034 	int len;
7035 
7036 /* Used in tracing_mark_raw_write() as well */
7037 #define FAULTED_STR "<faulted>"
7038 #define FAULTED_SIZE (sizeof(FAULTED_STR) - 1) /* '\0' is already accounted for */
7039 
7040 	if (tracing_disabled)
7041 		return -EINVAL;
7042 
7043 	if (!(tr->trace_flags & TRACE_ITER_MARKERS))
7044 		return -EINVAL;
7045 
7046 	if (cnt > TRACE_BUF_SIZE)
7047 		cnt = TRACE_BUF_SIZE;
7048 
7049 	BUILD_BUG_ON(TRACE_BUF_SIZE >= PAGE_SIZE);
7050 
7051 	size = sizeof(*entry) + cnt + 2; /* add '\0' and possible '\n' */
7052 
7053 	/* If less than "<faulted>", then make sure we can still add that */
7054 	if (cnt < FAULTED_SIZE)
7055 		size += FAULTED_SIZE - cnt;
7056 
7057 	buffer = tr->array_buffer.buffer;
7058 	event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
7059 					    tracing_gen_ctx());
7060 	if (unlikely(!event))
7061 		/* Ring buffer disabled, return as if not open for write */
7062 		return -EBADF;
7063 
7064 	entry = ring_buffer_event_data(event);
7065 	entry->ip = _THIS_IP_;
7066 
7067 	len = __copy_from_user_inatomic(&entry->buf, ubuf, cnt);
7068 	if (len) {
7069 		memcpy(&entry->buf, FAULTED_STR, FAULTED_SIZE);
7070 		cnt = FAULTED_SIZE;
7071 		written = -EFAULT;
7072 	} else
7073 		written = cnt;
7074 
7075 	if (tr->trace_marker_file && !list_empty(&tr->trace_marker_file->triggers)) {
7076 		/* do not add \n before testing triggers, but add \0 */
7077 		entry->buf[cnt] = '\0';
7078 		tt = event_triggers_call(tr->trace_marker_file, buffer, entry, event);
7079 	}
7080 
7081 	if (entry->buf[cnt - 1] != '\n') {
7082 		entry->buf[cnt] = '\n';
7083 		entry->buf[cnt + 1] = '\0';
7084 	} else
7085 		entry->buf[cnt] = '\0';
7086 
7087 	if (static_branch_unlikely(&trace_marker_exports_enabled))
7088 		ftrace_exports(event, TRACE_EXPORT_MARKER);
7089 	__buffer_unlock_commit(buffer, event);
7090 
7091 	if (tt)
7092 		event_triggers_post_call(tr->trace_marker_file, tt);
7093 
7094 	if (written > 0)
7095 		*fpos += written;
7096 
7097 	return written;
7098 }
7099 
7100 /* Limit it for now to 3K (including tag) */
7101 #define RAW_DATA_MAX_SIZE (1024*3)
7102 
7103 static ssize_t
7104 tracing_mark_raw_write(struct file *filp, const char __user *ubuf,
7105 					size_t cnt, loff_t *fpos)
7106 {
7107 	struct trace_array *tr = filp->private_data;
7108 	struct ring_buffer_event *event;
7109 	struct trace_buffer *buffer;
7110 	struct raw_data_entry *entry;
7111 	ssize_t written;
7112 	int size;
7113 	int len;
7114 
7115 #define FAULT_SIZE_ID (FAULTED_SIZE + sizeof(int))
7116 
7117 	if (tracing_disabled)
7118 		return -EINVAL;
7119 
7120 	if (!(tr->trace_flags & TRACE_ITER_MARKERS))
7121 		return -EINVAL;
7122 
7123 	/* The marker must at least have a tag id */
7124 	if (cnt < sizeof(unsigned int) || cnt > RAW_DATA_MAX_SIZE)
7125 		return -EINVAL;
7126 
7127 	if (cnt > TRACE_BUF_SIZE)
7128 		cnt = TRACE_BUF_SIZE;
7129 
7130 	BUILD_BUG_ON(TRACE_BUF_SIZE >= PAGE_SIZE);
7131 
7132 	size = sizeof(*entry) + cnt;
7133 	if (cnt < FAULT_SIZE_ID)
7134 		size += FAULT_SIZE_ID - cnt;
7135 
7136 	buffer = tr->array_buffer.buffer;
7137 	event = __trace_buffer_lock_reserve(buffer, TRACE_RAW_DATA, size,
7138 					    tracing_gen_ctx());
7139 	if (!event)
7140 		/* Ring buffer disabled, return as if not open for write */
7141 		return -EBADF;
7142 
7143 	entry = ring_buffer_event_data(event);
7144 
7145 	len = __copy_from_user_inatomic(&entry->id, ubuf, cnt);
7146 	if (len) {
7147 		entry->id = -1;
7148 		memcpy(&entry->buf, FAULTED_STR, FAULTED_SIZE);
7149 		written = -EFAULT;
7150 	} else
7151 		written = cnt;
7152 
7153 	__buffer_unlock_commit(buffer, event);
7154 
7155 	if (written > 0)
7156 		*fpos += written;
7157 
7158 	return written;
7159 }
7160 
7161 static int tracing_clock_show(struct seq_file *m, void *v)
7162 {
7163 	struct trace_array *tr = m->private;
7164 	int i;
7165 
7166 	for (i = 0; i < ARRAY_SIZE(trace_clocks); i++)
7167 		seq_printf(m,
7168 			"%s%s%s%s", i ? " " : "",
7169 			i == tr->clock_id ? "[" : "", trace_clocks[i].name,
7170 			i == tr->clock_id ? "]" : "");
7171 	seq_putc(m, '\n');
7172 
7173 	return 0;
7174 }
7175 
7176 int tracing_set_clock(struct trace_array *tr, const char *clockstr)
7177 {
7178 	int i;
7179 
7180 	for (i = 0; i < ARRAY_SIZE(trace_clocks); i++) {
7181 		if (strcmp(trace_clocks[i].name, clockstr) == 0)
7182 			break;
7183 	}
7184 	if (i == ARRAY_SIZE(trace_clocks))
7185 		return -EINVAL;
7186 
7187 	mutex_lock(&trace_types_lock);
7188 
7189 	tr->clock_id = i;
7190 
7191 	ring_buffer_set_clock(tr->array_buffer.buffer, trace_clocks[i].func);
7192 
7193 	/*
7194 	 * New clock may not be consistent with the previous clock.
7195 	 * Reset the buffer so that it doesn't have incomparable timestamps.
7196 	 */
7197 	tracing_reset_online_cpus(&tr->array_buffer);
7198 
7199 #ifdef CONFIG_TRACER_MAX_TRACE
7200 	if (tr->max_buffer.buffer)
7201 		ring_buffer_set_clock(tr->max_buffer.buffer, trace_clocks[i].func);
7202 	tracing_reset_online_cpus(&tr->max_buffer);
7203 #endif
7204 
7205 	mutex_unlock(&trace_types_lock);
7206 
7207 	return 0;
7208 }
7209 
7210 static ssize_t tracing_clock_write(struct file *filp, const char __user *ubuf,
7211 				   size_t cnt, loff_t *fpos)
7212 {
7213 	struct seq_file *m = filp->private_data;
7214 	struct trace_array *tr = m->private;
7215 	char buf[64];
7216 	const char *clockstr;
7217 	int ret;
7218 
7219 	if (cnt >= sizeof(buf))
7220 		return -EINVAL;
7221 
7222 	if (copy_from_user(buf, ubuf, cnt))
7223 		return -EFAULT;
7224 
7225 	buf[cnt] = 0;
7226 
7227 	clockstr = strstrip(buf);
7228 
7229 	ret = tracing_set_clock(tr, clockstr);
7230 	if (ret)
7231 		return ret;
7232 
7233 	*fpos += cnt;
7234 
7235 	return cnt;
7236 }
7237 
7238 static int tracing_clock_open(struct inode *inode, struct file *file)
7239 {
7240 	struct trace_array *tr = inode->i_private;
7241 	int ret;
7242 
7243 	ret = tracing_check_open_get_tr(tr);
7244 	if (ret)
7245 		return ret;
7246 
7247 	ret = single_open(file, tracing_clock_show, inode->i_private);
7248 	if (ret < 0)
7249 		trace_array_put(tr);
7250 
7251 	return ret;
7252 }
7253 
7254 static int tracing_time_stamp_mode_show(struct seq_file *m, void *v)
7255 {
7256 	struct trace_array *tr = m->private;
7257 
7258 	mutex_lock(&trace_types_lock);
7259 
7260 	if (ring_buffer_time_stamp_abs(tr->array_buffer.buffer))
7261 		seq_puts(m, "delta [absolute]\n");
7262 	else
7263 		seq_puts(m, "[delta] absolute\n");
7264 
7265 	mutex_unlock(&trace_types_lock);
7266 
7267 	return 0;
7268 }
7269 
7270 static int tracing_time_stamp_mode_open(struct inode *inode, struct file *file)
7271 {
7272 	struct trace_array *tr = inode->i_private;
7273 	int ret;
7274 
7275 	ret = tracing_check_open_get_tr(tr);
7276 	if (ret)
7277 		return ret;
7278 
7279 	ret = single_open(file, tracing_time_stamp_mode_show, inode->i_private);
7280 	if (ret < 0)
7281 		trace_array_put(tr);
7282 
7283 	return ret;
7284 }
7285 
7286 u64 tracing_event_time_stamp(struct trace_buffer *buffer, struct ring_buffer_event *rbe)
7287 {
7288 	if (rbe == this_cpu_read(trace_buffered_event))
7289 		return ring_buffer_time_stamp(buffer);
7290 
7291 	return ring_buffer_event_time_stamp(buffer, rbe);
7292 }
7293 
7294 /*
7295  * Set or disable using the per CPU trace_buffer_event when possible.
7296  */
7297 int tracing_set_filter_buffering(struct trace_array *tr, bool set)
7298 {
7299 	int ret = 0;
7300 
7301 	mutex_lock(&trace_types_lock);
7302 
7303 	if (set && tr->no_filter_buffering_ref++)
7304 		goto out;
7305 
7306 	if (!set) {
7307 		if (WARN_ON_ONCE(!tr->no_filter_buffering_ref)) {
7308 			ret = -EINVAL;
7309 			goto out;
7310 		}
7311 
7312 		--tr->no_filter_buffering_ref;
7313 	}
7314  out:
7315 	mutex_unlock(&trace_types_lock);
7316 
7317 	return ret;
7318 }
7319 
7320 struct ftrace_buffer_info {
7321 	struct trace_iterator	iter;
7322 	void			*spare;
7323 	unsigned int		spare_cpu;
7324 	unsigned int		read;
7325 };
7326 
7327 #ifdef CONFIG_TRACER_SNAPSHOT
7328 static int tracing_snapshot_open(struct inode *inode, struct file *file)
7329 {
7330 	struct trace_array *tr = inode->i_private;
7331 	struct trace_iterator *iter;
7332 	struct seq_file *m;
7333 	int ret;
7334 
7335 	ret = tracing_check_open_get_tr(tr);
7336 	if (ret)
7337 		return ret;
7338 
7339 	if (file->f_mode & FMODE_READ) {
7340 		iter = __tracing_open(inode, file, true);
7341 		if (IS_ERR(iter))
7342 			ret = PTR_ERR(iter);
7343 	} else {
7344 		/* Writes still need the seq_file to hold the private data */
7345 		ret = -ENOMEM;
7346 		m = kzalloc(sizeof(*m), GFP_KERNEL);
7347 		if (!m)
7348 			goto out;
7349 		iter = kzalloc(sizeof(*iter), GFP_KERNEL);
7350 		if (!iter) {
7351 			kfree(m);
7352 			goto out;
7353 		}
7354 		ret = 0;
7355 
7356 		iter->tr = tr;
7357 		iter->array_buffer = &tr->max_buffer;
7358 		iter->cpu_file = tracing_get_cpu(inode);
7359 		m->private = iter;
7360 		file->private_data = m;
7361 	}
7362 out:
7363 	if (ret < 0)
7364 		trace_array_put(tr);
7365 
7366 	return ret;
7367 }
7368 
7369 static ssize_t
7370 tracing_snapshot_write(struct file *filp, const char __user *ubuf, size_t cnt,
7371 		       loff_t *ppos)
7372 {
7373 	struct seq_file *m = filp->private_data;
7374 	struct trace_iterator *iter = m->private;
7375 	struct trace_array *tr = iter->tr;
7376 	unsigned long val;
7377 	int ret;
7378 
7379 	ret = tracing_update_buffers();
7380 	if (ret < 0)
7381 		return ret;
7382 
7383 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
7384 	if (ret)
7385 		return ret;
7386 
7387 	mutex_lock(&trace_types_lock);
7388 
7389 	if (tr->current_trace->use_max_tr) {
7390 		ret = -EBUSY;
7391 		goto out;
7392 	}
7393 
7394 	arch_spin_lock(&tr->max_lock);
7395 	if (tr->cond_snapshot)
7396 		ret = -EBUSY;
7397 	arch_spin_unlock(&tr->max_lock);
7398 	if (ret)
7399 		goto out;
7400 
7401 	switch (val) {
7402 	case 0:
7403 		if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
7404 			ret = -EINVAL;
7405 			break;
7406 		}
7407 		if (tr->allocated_snapshot)
7408 			free_snapshot(tr);
7409 		break;
7410 	case 1:
7411 /* Only allow per-cpu swap if the ring buffer supports it */
7412 #ifndef CONFIG_RING_BUFFER_ALLOW_SWAP
7413 		if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
7414 			ret = -EINVAL;
7415 			break;
7416 		}
7417 #endif
7418 		if (tr->allocated_snapshot)
7419 			ret = resize_buffer_duplicate_size(&tr->max_buffer,
7420 					&tr->array_buffer, iter->cpu_file);
7421 		else
7422 			ret = tracing_alloc_snapshot_instance(tr);
7423 		if (ret < 0)
7424 			break;
7425 		local_irq_disable();
7426 		/* Now, we're going to swap */
7427 		if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
7428 			update_max_tr(tr, current, smp_processor_id(), NULL);
7429 		else
7430 			update_max_tr_single(tr, current, iter->cpu_file);
7431 		local_irq_enable();
7432 		break;
7433 	default:
7434 		if (tr->allocated_snapshot) {
7435 			if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
7436 				tracing_reset_online_cpus(&tr->max_buffer);
7437 			else
7438 				tracing_reset_cpu(&tr->max_buffer, iter->cpu_file);
7439 		}
7440 		break;
7441 	}
7442 
7443 	if (ret >= 0) {
7444 		*ppos += cnt;
7445 		ret = cnt;
7446 	}
7447 out:
7448 	mutex_unlock(&trace_types_lock);
7449 	return ret;
7450 }
7451 
7452 static int tracing_snapshot_release(struct inode *inode, struct file *file)
7453 {
7454 	struct seq_file *m = file->private_data;
7455 	int ret;
7456 
7457 	ret = tracing_release(inode, file);
7458 
7459 	if (file->f_mode & FMODE_READ)
7460 		return ret;
7461 
7462 	/* If write only, the seq_file is just a stub */
7463 	if (m)
7464 		kfree(m->private);
7465 	kfree(m);
7466 
7467 	return 0;
7468 }
7469 
7470 static int tracing_buffers_open(struct inode *inode, struct file *filp);
7471 static ssize_t tracing_buffers_read(struct file *filp, char __user *ubuf,
7472 				    size_t count, loff_t *ppos);
7473 static int tracing_buffers_release(struct inode *inode, struct file *file);
7474 static ssize_t tracing_buffers_splice_read(struct file *file, loff_t *ppos,
7475 		   struct pipe_inode_info *pipe, size_t len, unsigned int flags);
7476 
7477 static int snapshot_raw_open(struct inode *inode, struct file *filp)
7478 {
7479 	struct ftrace_buffer_info *info;
7480 	int ret;
7481 
7482 	/* The following checks for tracefs lockdown */
7483 	ret = tracing_buffers_open(inode, filp);
7484 	if (ret < 0)
7485 		return ret;
7486 
7487 	info = filp->private_data;
7488 
7489 	if (info->iter.trace->use_max_tr) {
7490 		tracing_buffers_release(inode, filp);
7491 		return -EBUSY;
7492 	}
7493 
7494 	info->iter.snapshot = true;
7495 	info->iter.array_buffer = &info->iter.tr->max_buffer;
7496 
7497 	return ret;
7498 }
7499 
7500 #endif /* CONFIG_TRACER_SNAPSHOT */
7501 
7502 
7503 static const struct file_operations tracing_thresh_fops = {
7504 	.open		= tracing_open_generic,
7505 	.read		= tracing_thresh_read,
7506 	.write		= tracing_thresh_write,
7507 	.llseek		= generic_file_llseek,
7508 };
7509 
7510 #if defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)
7511 static const struct file_operations tracing_max_lat_fops = {
7512 	.open		= tracing_open_generic,
7513 	.read		= tracing_max_lat_read,
7514 	.write		= tracing_max_lat_write,
7515 	.llseek		= generic_file_llseek,
7516 };
7517 #endif
7518 
7519 static const struct file_operations set_tracer_fops = {
7520 	.open		= tracing_open_generic,
7521 	.read		= tracing_set_trace_read,
7522 	.write		= tracing_set_trace_write,
7523 	.llseek		= generic_file_llseek,
7524 };
7525 
7526 static const struct file_operations tracing_pipe_fops = {
7527 	.open		= tracing_open_pipe,
7528 	.poll		= tracing_poll_pipe,
7529 	.read		= tracing_read_pipe,
7530 	.splice_read	= tracing_splice_read_pipe,
7531 	.release	= tracing_release_pipe,
7532 	.llseek		= no_llseek,
7533 };
7534 
7535 static const struct file_operations tracing_entries_fops = {
7536 	.open		= tracing_open_generic_tr,
7537 	.read		= tracing_entries_read,
7538 	.write		= tracing_entries_write,
7539 	.llseek		= generic_file_llseek,
7540 	.release	= tracing_release_generic_tr,
7541 };
7542 
7543 static const struct file_operations tracing_total_entries_fops = {
7544 	.open		= tracing_open_generic_tr,
7545 	.read		= tracing_total_entries_read,
7546 	.llseek		= generic_file_llseek,
7547 	.release	= tracing_release_generic_tr,
7548 };
7549 
7550 static const struct file_operations tracing_free_buffer_fops = {
7551 	.open		= tracing_open_generic_tr,
7552 	.write		= tracing_free_buffer_write,
7553 	.release	= tracing_free_buffer_release,
7554 };
7555 
7556 static const struct file_operations tracing_mark_fops = {
7557 	.open		= tracing_open_generic_tr,
7558 	.write		= tracing_mark_write,
7559 	.llseek		= generic_file_llseek,
7560 	.release	= tracing_release_generic_tr,
7561 };
7562 
7563 static const struct file_operations tracing_mark_raw_fops = {
7564 	.open		= tracing_open_generic_tr,
7565 	.write		= tracing_mark_raw_write,
7566 	.llseek		= generic_file_llseek,
7567 	.release	= tracing_release_generic_tr,
7568 };
7569 
7570 static const struct file_operations trace_clock_fops = {
7571 	.open		= tracing_clock_open,
7572 	.read		= seq_read,
7573 	.llseek		= seq_lseek,
7574 	.release	= tracing_single_release_tr,
7575 	.write		= tracing_clock_write,
7576 };
7577 
7578 static const struct file_operations trace_time_stamp_mode_fops = {
7579 	.open		= tracing_time_stamp_mode_open,
7580 	.read		= seq_read,
7581 	.llseek		= seq_lseek,
7582 	.release	= tracing_single_release_tr,
7583 };
7584 
7585 #ifdef CONFIG_TRACER_SNAPSHOT
7586 static const struct file_operations snapshot_fops = {
7587 	.open		= tracing_snapshot_open,
7588 	.read		= seq_read,
7589 	.write		= tracing_snapshot_write,
7590 	.llseek		= tracing_lseek,
7591 	.release	= tracing_snapshot_release,
7592 };
7593 
7594 static const struct file_operations snapshot_raw_fops = {
7595 	.open		= snapshot_raw_open,
7596 	.read		= tracing_buffers_read,
7597 	.release	= tracing_buffers_release,
7598 	.splice_read	= tracing_buffers_splice_read,
7599 	.llseek		= no_llseek,
7600 };
7601 
7602 #endif /* CONFIG_TRACER_SNAPSHOT */
7603 
7604 /*
7605  * trace_min_max_write - Write a u64 value to a trace_min_max_param struct
7606  * @filp: The active open file structure
7607  * @ubuf: The userspace provided buffer to read value into
7608  * @cnt: The maximum number of bytes to read
7609  * @ppos: The current "file" position
7610  *
7611  * This function implements the write interface for a struct trace_min_max_param.
7612  * The filp->private_data must point to a trace_min_max_param structure that
7613  * defines where to write the value, the min and the max acceptable values,
7614  * and a lock to protect the write.
7615  */
7616 static ssize_t
7617 trace_min_max_write(struct file *filp, const char __user *ubuf, size_t cnt, loff_t *ppos)
7618 {
7619 	struct trace_min_max_param *param = filp->private_data;
7620 	u64 val;
7621 	int err;
7622 
7623 	if (!param)
7624 		return -EFAULT;
7625 
7626 	err = kstrtoull_from_user(ubuf, cnt, 10, &val);
7627 	if (err)
7628 		return err;
7629 
7630 	if (param->lock)
7631 		mutex_lock(param->lock);
7632 
7633 	if (param->min && val < *param->min)
7634 		err = -EINVAL;
7635 
7636 	if (param->max && val > *param->max)
7637 		err = -EINVAL;
7638 
7639 	if (!err)
7640 		*param->val = val;
7641 
7642 	if (param->lock)
7643 		mutex_unlock(param->lock);
7644 
7645 	if (err)
7646 		return err;
7647 
7648 	return cnt;
7649 }
7650 
7651 /*
7652  * trace_min_max_read - Read a u64 value from a trace_min_max_param struct
7653  * @filp: The active open file structure
7654  * @ubuf: The userspace provided buffer to read value into
7655  * @cnt: The maximum number of bytes to read
7656  * @ppos: The current "file" position
7657  *
7658  * This function implements the read interface for a struct trace_min_max_param.
7659  * The filp->private_data must point to a trace_min_max_param struct with valid
7660  * data.
7661  */
7662 static ssize_t
7663 trace_min_max_read(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos)
7664 {
7665 	struct trace_min_max_param *param = filp->private_data;
7666 	char buf[U64_STR_SIZE];
7667 	int len;
7668 	u64 val;
7669 
7670 	if (!param)
7671 		return -EFAULT;
7672 
7673 	val = *param->val;
7674 
7675 	if (cnt > sizeof(buf))
7676 		cnt = sizeof(buf);
7677 
7678 	len = snprintf(buf, sizeof(buf), "%llu\n", val);
7679 
7680 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, len);
7681 }
7682 
7683 const struct file_operations trace_min_max_fops = {
7684 	.open		= tracing_open_generic,
7685 	.read		= trace_min_max_read,
7686 	.write		= trace_min_max_write,
7687 };
7688 
7689 #define TRACING_LOG_ERRS_MAX	8
7690 #define TRACING_LOG_LOC_MAX	128
7691 
7692 #define CMD_PREFIX "  Command: "
7693 
7694 struct err_info {
7695 	const char	**errs;	/* ptr to loc-specific array of err strings */
7696 	u8		type;	/* index into errs -> specific err string */
7697 	u8		pos;	/* MAX_FILTER_STR_VAL = 256 */
7698 	u64		ts;
7699 };
7700 
7701 struct tracing_log_err {
7702 	struct list_head	list;
7703 	struct err_info		info;
7704 	char			loc[TRACING_LOG_LOC_MAX]; /* err location */
7705 	char			cmd[MAX_FILTER_STR_VAL]; /* what caused err */
7706 };
7707 
7708 static DEFINE_MUTEX(tracing_err_log_lock);
7709 
7710 static struct tracing_log_err *get_tracing_log_err(struct trace_array *tr)
7711 {
7712 	struct tracing_log_err *err;
7713 
7714 	if (tr->n_err_log_entries < TRACING_LOG_ERRS_MAX) {
7715 		err = kzalloc(sizeof(*err), GFP_KERNEL);
7716 		if (!err)
7717 			err = ERR_PTR(-ENOMEM);
7718 		tr->n_err_log_entries++;
7719 
7720 		return err;
7721 	}
7722 
7723 	err = list_first_entry(&tr->err_log, struct tracing_log_err, list);
7724 	list_del(&err->list);
7725 
7726 	return err;
7727 }
7728 
7729 /**
7730  * err_pos - find the position of a string within a command for error careting
7731  * @cmd: The tracing command that caused the error
7732  * @str: The string to position the caret at within @cmd
7733  *
7734  * Finds the position of the first occurrence of @str within @cmd.  The
7735  * return value can be passed to tracing_log_err() for caret placement
7736  * within @cmd.
7737  *
7738  * Returns the index within @cmd of the first occurrence of @str or 0
7739  * if @str was not found.
7740  */
7741 unsigned int err_pos(char *cmd, const char *str)
7742 {
7743 	char *found;
7744 
7745 	if (WARN_ON(!strlen(cmd)))
7746 		return 0;
7747 
7748 	found = strstr(cmd, str);
7749 	if (found)
7750 		return found - cmd;
7751 
7752 	return 0;
7753 }
7754 
7755 /**
7756  * tracing_log_err - write an error to the tracing error log
7757  * @tr: The associated trace array for the error (NULL for top level array)
7758  * @loc: A string describing where the error occurred
7759  * @cmd: The tracing command that caused the error
7760  * @errs: The array of loc-specific static error strings
7761  * @type: The index into errs[], which produces the specific static err string
7762  * @pos: The position the caret should be placed in the cmd
7763  *
7764  * Writes an error into tracing/error_log of the form:
7765  *
7766  * <loc>: error: <text>
7767  *   Command: <cmd>
7768  *              ^
7769  *
7770  * tracing/error_log is a small log file containing the last
7771  * TRACING_LOG_ERRS_MAX errors (8).  Memory for errors isn't allocated
7772  * unless there has been a tracing error, and the error log can be
7773  * cleared and have its memory freed by writing the empty string in
7774  * truncation mode to it i.e. echo > tracing/error_log.
7775  *
7776  * NOTE: the @errs array along with the @type param are used to
7777  * produce a static error string - this string is not copied and saved
7778  * when the error is logged - only a pointer to it is saved.  See
7779  * existing callers for examples of how static strings are typically
7780  * defined for use with tracing_log_err().
7781  */
7782 void tracing_log_err(struct trace_array *tr,
7783 		     const char *loc, const char *cmd,
7784 		     const char **errs, u8 type, u8 pos)
7785 {
7786 	struct tracing_log_err *err;
7787 
7788 	if (!tr)
7789 		tr = &global_trace;
7790 
7791 	mutex_lock(&tracing_err_log_lock);
7792 	err = get_tracing_log_err(tr);
7793 	if (PTR_ERR(err) == -ENOMEM) {
7794 		mutex_unlock(&tracing_err_log_lock);
7795 		return;
7796 	}
7797 
7798 	snprintf(err->loc, TRACING_LOG_LOC_MAX, "%s: error: ", loc);
7799 	snprintf(err->cmd, MAX_FILTER_STR_VAL,"\n" CMD_PREFIX "%s\n", cmd);
7800 
7801 	err->info.errs = errs;
7802 	err->info.type = type;
7803 	err->info.pos = pos;
7804 	err->info.ts = local_clock();
7805 
7806 	list_add_tail(&err->list, &tr->err_log);
7807 	mutex_unlock(&tracing_err_log_lock);
7808 }
7809 
7810 static void clear_tracing_err_log(struct trace_array *tr)
7811 {
7812 	struct tracing_log_err *err, *next;
7813 
7814 	mutex_lock(&tracing_err_log_lock);
7815 	list_for_each_entry_safe(err, next, &tr->err_log, list) {
7816 		list_del(&err->list);
7817 		kfree(err);
7818 	}
7819 
7820 	tr->n_err_log_entries = 0;
7821 	mutex_unlock(&tracing_err_log_lock);
7822 }
7823 
7824 static void *tracing_err_log_seq_start(struct seq_file *m, loff_t *pos)
7825 {
7826 	struct trace_array *tr = m->private;
7827 
7828 	mutex_lock(&tracing_err_log_lock);
7829 
7830 	return seq_list_start(&tr->err_log, *pos);
7831 }
7832 
7833 static void *tracing_err_log_seq_next(struct seq_file *m, void *v, loff_t *pos)
7834 {
7835 	struct trace_array *tr = m->private;
7836 
7837 	return seq_list_next(v, &tr->err_log, pos);
7838 }
7839 
7840 static void tracing_err_log_seq_stop(struct seq_file *m, void *v)
7841 {
7842 	mutex_unlock(&tracing_err_log_lock);
7843 }
7844 
7845 static void tracing_err_log_show_pos(struct seq_file *m, u8 pos)
7846 {
7847 	u8 i;
7848 
7849 	for (i = 0; i < sizeof(CMD_PREFIX) - 1; i++)
7850 		seq_putc(m, ' ');
7851 	for (i = 0; i < pos; i++)
7852 		seq_putc(m, ' ');
7853 	seq_puts(m, "^\n");
7854 }
7855 
7856 static int tracing_err_log_seq_show(struct seq_file *m, void *v)
7857 {
7858 	struct tracing_log_err *err = v;
7859 
7860 	if (err) {
7861 		const char *err_text = err->info.errs[err->info.type];
7862 		u64 sec = err->info.ts;
7863 		u32 nsec;
7864 
7865 		nsec = do_div(sec, NSEC_PER_SEC);
7866 		seq_printf(m, "[%5llu.%06u] %s%s", sec, nsec / 1000,
7867 			   err->loc, err_text);
7868 		seq_printf(m, "%s", err->cmd);
7869 		tracing_err_log_show_pos(m, err->info.pos);
7870 	}
7871 
7872 	return 0;
7873 }
7874 
7875 static const struct seq_operations tracing_err_log_seq_ops = {
7876 	.start  = tracing_err_log_seq_start,
7877 	.next   = tracing_err_log_seq_next,
7878 	.stop   = tracing_err_log_seq_stop,
7879 	.show   = tracing_err_log_seq_show
7880 };
7881 
7882 static int tracing_err_log_open(struct inode *inode, struct file *file)
7883 {
7884 	struct trace_array *tr = inode->i_private;
7885 	int ret = 0;
7886 
7887 	ret = tracing_check_open_get_tr(tr);
7888 	if (ret)
7889 		return ret;
7890 
7891 	/* If this file was opened for write, then erase contents */
7892 	if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC))
7893 		clear_tracing_err_log(tr);
7894 
7895 	if (file->f_mode & FMODE_READ) {
7896 		ret = seq_open(file, &tracing_err_log_seq_ops);
7897 		if (!ret) {
7898 			struct seq_file *m = file->private_data;
7899 			m->private = tr;
7900 		} else {
7901 			trace_array_put(tr);
7902 		}
7903 	}
7904 	return ret;
7905 }
7906 
7907 static ssize_t tracing_err_log_write(struct file *file,
7908 				     const char __user *buffer,
7909 				     size_t count, loff_t *ppos)
7910 {
7911 	return count;
7912 }
7913 
7914 static int tracing_err_log_release(struct inode *inode, struct file *file)
7915 {
7916 	struct trace_array *tr = inode->i_private;
7917 
7918 	trace_array_put(tr);
7919 
7920 	if (file->f_mode & FMODE_READ)
7921 		seq_release(inode, file);
7922 
7923 	return 0;
7924 }
7925 
7926 static const struct file_operations tracing_err_log_fops = {
7927 	.open           = tracing_err_log_open,
7928 	.write		= tracing_err_log_write,
7929 	.read           = seq_read,
7930 	.llseek         = seq_lseek,
7931 	.release        = tracing_err_log_release,
7932 };
7933 
7934 static int tracing_buffers_open(struct inode *inode, struct file *filp)
7935 {
7936 	struct trace_array *tr = inode->i_private;
7937 	struct ftrace_buffer_info *info;
7938 	int ret;
7939 
7940 	ret = tracing_check_open_get_tr(tr);
7941 	if (ret)
7942 		return ret;
7943 
7944 	info = kvzalloc(sizeof(*info), GFP_KERNEL);
7945 	if (!info) {
7946 		trace_array_put(tr);
7947 		return -ENOMEM;
7948 	}
7949 
7950 	mutex_lock(&trace_types_lock);
7951 
7952 	info->iter.tr		= tr;
7953 	info->iter.cpu_file	= tracing_get_cpu(inode);
7954 	info->iter.trace	= tr->current_trace;
7955 	info->iter.array_buffer = &tr->array_buffer;
7956 	info->spare		= NULL;
7957 	/* Force reading ring buffer for first read */
7958 	info->read		= (unsigned int)-1;
7959 
7960 	filp->private_data = info;
7961 
7962 	tr->trace_ref++;
7963 
7964 	mutex_unlock(&trace_types_lock);
7965 
7966 	ret = nonseekable_open(inode, filp);
7967 	if (ret < 0)
7968 		trace_array_put(tr);
7969 
7970 	return ret;
7971 }
7972 
7973 static __poll_t
7974 tracing_buffers_poll(struct file *filp, poll_table *poll_table)
7975 {
7976 	struct ftrace_buffer_info *info = filp->private_data;
7977 	struct trace_iterator *iter = &info->iter;
7978 
7979 	return trace_poll(iter, filp, poll_table);
7980 }
7981 
7982 static ssize_t
7983 tracing_buffers_read(struct file *filp, char __user *ubuf,
7984 		     size_t count, loff_t *ppos)
7985 {
7986 	struct ftrace_buffer_info *info = filp->private_data;
7987 	struct trace_iterator *iter = &info->iter;
7988 	ssize_t ret = 0;
7989 	ssize_t size;
7990 
7991 	if (!count)
7992 		return 0;
7993 
7994 #ifdef CONFIG_TRACER_MAX_TRACE
7995 	if (iter->snapshot && iter->tr->current_trace->use_max_tr)
7996 		return -EBUSY;
7997 #endif
7998 
7999 	if (!info->spare) {
8000 		info->spare = ring_buffer_alloc_read_page(iter->array_buffer->buffer,
8001 							  iter->cpu_file);
8002 		if (IS_ERR(info->spare)) {
8003 			ret = PTR_ERR(info->spare);
8004 			info->spare = NULL;
8005 		} else {
8006 			info->spare_cpu = iter->cpu_file;
8007 		}
8008 	}
8009 	if (!info->spare)
8010 		return ret;
8011 
8012 	/* Do we have previous read data to read? */
8013 	if (info->read < PAGE_SIZE)
8014 		goto read;
8015 
8016  again:
8017 	trace_access_lock(iter->cpu_file);
8018 	ret = ring_buffer_read_page(iter->array_buffer->buffer,
8019 				    &info->spare,
8020 				    count,
8021 				    iter->cpu_file, 0);
8022 	trace_access_unlock(iter->cpu_file);
8023 
8024 	if (ret < 0) {
8025 		if (trace_empty(iter)) {
8026 			if ((filp->f_flags & O_NONBLOCK))
8027 				return -EAGAIN;
8028 
8029 			ret = wait_on_pipe(iter, 0);
8030 			if (ret)
8031 				return ret;
8032 
8033 			goto again;
8034 		}
8035 		return 0;
8036 	}
8037 
8038 	info->read = 0;
8039  read:
8040 	size = PAGE_SIZE - info->read;
8041 	if (size > count)
8042 		size = count;
8043 
8044 	ret = copy_to_user(ubuf, info->spare + info->read, size);
8045 	if (ret == size)
8046 		return -EFAULT;
8047 
8048 	size -= ret;
8049 
8050 	*ppos += size;
8051 	info->read += size;
8052 
8053 	return size;
8054 }
8055 
8056 static int tracing_buffers_release(struct inode *inode, struct file *file)
8057 {
8058 	struct ftrace_buffer_info *info = file->private_data;
8059 	struct trace_iterator *iter = &info->iter;
8060 
8061 	mutex_lock(&trace_types_lock);
8062 
8063 	iter->tr->trace_ref--;
8064 
8065 	__trace_array_put(iter->tr);
8066 
8067 	if (info->spare)
8068 		ring_buffer_free_read_page(iter->array_buffer->buffer,
8069 					   info->spare_cpu, info->spare);
8070 	kvfree(info);
8071 
8072 	mutex_unlock(&trace_types_lock);
8073 
8074 	return 0;
8075 }
8076 
8077 struct buffer_ref {
8078 	struct trace_buffer	*buffer;
8079 	void			*page;
8080 	int			cpu;
8081 	refcount_t		refcount;
8082 };
8083 
8084 static void buffer_ref_release(struct buffer_ref *ref)
8085 {
8086 	if (!refcount_dec_and_test(&ref->refcount))
8087 		return;
8088 	ring_buffer_free_read_page(ref->buffer, ref->cpu, ref->page);
8089 	kfree(ref);
8090 }
8091 
8092 static void buffer_pipe_buf_release(struct pipe_inode_info *pipe,
8093 				    struct pipe_buffer *buf)
8094 {
8095 	struct buffer_ref *ref = (struct buffer_ref *)buf->private;
8096 
8097 	buffer_ref_release(ref);
8098 	buf->private = 0;
8099 }
8100 
8101 static bool buffer_pipe_buf_get(struct pipe_inode_info *pipe,
8102 				struct pipe_buffer *buf)
8103 {
8104 	struct buffer_ref *ref = (struct buffer_ref *)buf->private;
8105 
8106 	if (refcount_read(&ref->refcount) > INT_MAX/2)
8107 		return false;
8108 
8109 	refcount_inc(&ref->refcount);
8110 	return true;
8111 }
8112 
8113 /* Pipe buffer operations for a buffer. */
8114 static const struct pipe_buf_operations buffer_pipe_buf_ops = {
8115 	.release		= buffer_pipe_buf_release,
8116 	.get			= buffer_pipe_buf_get,
8117 };
8118 
8119 /*
8120  * Callback from splice_to_pipe(), if we need to release some pages
8121  * at the end of the spd in case we error'ed out in filling the pipe.
8122  */
8123 static void buffer_spd_release(struct splice_pipe_desc *spd, unsigned int i)
8124 {
8125 	struct buffer_ref *ref =
8126 		(struct buffer_ref *)spd->partial[i].private;
8127 
8128 	buffer_ref_release(ref);
8129 	spd->partial[i].private = 0;
8130 }
8131 
8132 static ssize_t
8133 tracing_buffers_splice_read(struct file *file, loff_t *ppos,
8134 			    struct pipe_inode_info *pipe, size_t len,
8135 			    unsigned int flags)
8136 {
8137 	struct ftrace_buffer_info *info = file->private_data;
8138 	struct trace_iterator *iter = &info->iter;
8139 	struct partial_page partial_def[PIPE_DEF_BUFFERS];
8140 	struct page *pages_def[PIPE_DEF_BUFFERS];
8141 	struct splice_pipe_desc spd = {
8142 		.pages		= pages_def,
8143 		.partial	= partial_def,
8144 		.nr_pages_max	= PIPE_DEF_BUFFERS,
8145 		.ops		= &buffer_pipe_buf_ops,
8146 		.spd_release	= buffer_spd_release,
8147 	};
8148 	struct buffer_ref *ref;
8149 	int entries, i;
8150 	ssize_t ret = 0;
8151 
8152 #ifdef CONFIG_TRACER_MAX_TRACE
8153 	if (iter->snapshot && iter->tr->current_trace->use_max_tr)
8154 		return -EBUSY;
8155 #endif
8156 
8157 	if (*ppos & (PAGE_SIZE - 1))
8158 		return -EINVAL;
8159 
8160 	if (len & (PAGE_SIZE - 1)) {
8161 		if (len < PAGE_SIZE)
8162 			return -EINVAL;
8163 		len &= PAGE_MASK;
8164 	}
8165 
8166 	if (splice_grow_spd(pipe, &spd))
8167 		return -ENOMEM;
8168 
8169  again:
8170 	trace_access_lock(iter->cpu_file);
8171 	entries = ring_buffer_entries_cpu(iter->array_buffer->buffer, iter->cpu_file);
8172 
8173 	for (i = 0; i < spd.nr_pages_max && len && entries; i++, len -= PAGE_SIZE) {
8174 		struct page *page;
8175 		int r;
8176 
8177 		ref = kzalloc(sizeof(*ref), GFP_KERNEL);
8178 		if (!ref) {
8179 			ret = -ENOMEM;
8180 			break;
8181 		}
8182 
8183 		refcount_set(&ref->refcount, 1);
8184 		ref->buffer = iter->array_buffer->buffer;
8185 		ref->page = ring_buffer_alloc_read_page(ref->buffer, iter->cpu_file);
8186 		if (IS_ERR(ref->page)) {
8187 			ret = PTR_ERR(ref->page);
8188 			ref->page = NULL;
8189 			kfree(ref);
8190 			break;
8191 		}
8192 		ref->cpu = iter->cpu_file;
8193 
8194 		r = ring_buffer_read_page(ref->buffer, &ref->page,
8195 					  len, iter->cpu_file, 1);
8196 		if (r < 0) {
8197 			ring_buffer_free_read_page(ref->buffer, ref->cpu,
8198 						   ref->page);
8199 			kfree(ref);
8200 			break;
8201 		}
8202 
8203 		page = virt_to_page(ref->page);
8204 
8205 		spd.pages[i] = page;
8206 		spd.partial[i].len = PAGE_SIZE;
8207 		spd.partial[i].offset = 0;
8208 		spd.partial[i].private = (unsigned long)ref;
8209 		spd.nr_pages++;
8210 		*ppos += PAGE_SIZE;
8211 
8212 		entries = ring_buffer_entries_cpu(iter->array_buffer->buffer, iter->cpu_file);
8213 	}
8214 
8215 	trace_access_unlock(iter->cpu_file);
8216 	spd.nr_pages = i;
8217 
8218 	/* did we read anything? */
8219 	if (!spd.nr_pages) {
8220 		if (ret)
8221 			goto out;
8222 
8223 		ret = -EAGAIN;
8224 		if ((file->f_flags & O_NONBLOCK) || (flags & SPLICE_F_NONBLOCK))
8225 			goto out;
8226 
8227 		ret = wait_on_pipe(iter, iter->tr->buffer_percent);
8228 		if (ret)
8229 			goto out;
8230 
8231 		goto again;
8232 	}
8233 
8234 	ret = splice_to_pipe(pipe, &spd);
8235 out:
8236 	splice_shrink_spd(&spd);
8237 
8238 	return ret;
8239 }
8240 
8241 static const struct file_operations tracing_buffers_fops = {
8242 	.open		= tracing_buffers_open,
8243 	.read		= tracing_buffers_read,
8244 	.poll		= tracing_buffers_poll,
8245 	.release	= tracing_buffers_release,
8246 	.splice_read	= tracing_buffers_splice_read,
8247 	.llseek		= no_llseek,
8248 };
8249 
8250 static ssize_t
8251 tracing_stats_read(struct file *filp, char __user *ubuf,
8252 		   size_t count, loff_t *ppos)
8253 {
8254 	struct inode *inode = file_inode(filp);
8255 	struct trace_array *tr = inode->i_private;
8256 	struct array_buffer *trace_buf = &tr->array_buffer;
8257 	int cpu = tracing_get_cpu(inode);
8258 	struct trace_seq *s;
8259 	unsigned long cnt;
8260 	unsigned long long t;
8261 	unsigned long usec_rem;
8262 
8263 	s = kmalloc(sizeof(*s), GFP_KERNEL);
8264 	if (!s)
8265 		return -ENOMEM;
8266 
8267 	trace_seq_init(s);
8268 
8269 	cnt = ring_buffer_entries_cpu(trace_buf->buffer, cpu);
8270 	trace_seq_printf(s, "entries: %ld\n", cnt);
8271 
8272 	cnt = ring_buffer_overrun_cpu(trace_buf->buffer, cpu);
8273 	trace_seq_printf(s, "overrun: %ld\n", cnt);
8274 
8275 	cnt = ring_buffer_commit_overrun_cpu(trace_buf->buffer, cpu);
8276 	trace_seq_printf(s, "commit overrun: %ld\n", cnt);
8277 
8278 	cnt = ring_buffer_bytes_cpu(trace_buf->buffer, cpu);
8279 	trace_seq_printf(s, "bytes: %ld\n", cnt);
8280 
8281 	if (trace_clocks[tr->clock_id].in_ns) {
8282 		/* local or global for trace_clock */
8283 		t = ns2usecs(ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
8284 		usec_rem = do_div(t, USEC_PER_SEC);
8285 		trace_seq_printf(s, "oldest event ts: %5llu.%06lu\n",
8286 								t, usec_rem);
8287 
8288 		t = ns2usecs(ring_buffer_time_stamp(trace_buf->buffer));
8289 		usec_rem = do_div(t, USEC_PER_SEC);
8290 		trace_seq_printf(s, "now ts: %5llu.%06lu\n", t, usec_rem);
8291 	} else {
8292 		/* counter or tsc mode for trace_clock */
8293 		trace_seq_printf(s, "oldest event ts: %llu\n",
8294 				ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
8295 
8296 		trace_seq_printf(s, "now ts: %llu\n",
8297 				ring_buffer_time_stamp(trace_buf->buffer));
8298 	}
8299 
8300 	cnt = ring_buffer_dropped_events_cpu(trace_buf->buffer, cpu);
8301 	trace_seq_printf(s, "dropped events: %ld\n", cnt);
8302 
8303 	cnt = ring_buffer_read_events_cpu(trace_buf->buffer, cpu);
8304 	trace_seq_printf(s, "read events: %ld\n", cnt);
8305 
8306 	count = simple_read_from_buffer(ubuf, count, ppos,
8307 					s->buffer, trace_seq_used(s));
8308 
8309 	kfree(s);
8310 
8311 	return count;
8312 }
8313 
8314 static const struct file_operations tracing_stats_fops = {
8315 	.open		= tracing_open_generic_tr,
8316 	.read		= tracing_stats_read,
8317 	.llseek		= generic_file_llseek,
8318 	.release	= tracing_release_generic_tr,
8319 };
8320 
8321 #ifdef CONFIG_DYNAMIC_FTRACE
8322 
8323 static ssize_t
8324 tracing_read_dyn_info(struct file *filp, char __user *ubuf,
8325 		  size_t cnt, loff_t *ppos)
8326 {
8327 	ssize_t ret;
8328 	char *buf;
8329 	int r;
8330 
8331 	/* 256 should be plenty to hold the amount needed */
8332 	buf = kmalloc(256, GFP_KERNEL);
8333 	if (!buf)
8334 		return -ENOMEM;
8335 
8336 	r = scnprintf(buf, 256, "%ld pages:%ld groups: %ld\n",
8337 		      ftrace_update_tot_cnt,
8338 		      ftrace_number_of_pages,
8339 		      ftrace_number_of_groups);
8340 
8341 	ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
8342 	kfree(buf);
8343 	return ret;
8344 }
8345 
8346 static const struct file_operations tracing_dyn_info_fops = {
8347 	.open		= tracing_open_generic,
8348 	.read		= tracing_read_dyn_info,
8349 	.llseek		= generic_file_llseek,
8350 };
8351 #endif /* CONFIG_DYNAMIC_FTRACE */
8352 
8353 #if defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE)
8354 static void
8355 ftrace_snapshot(unsigned long ip, unsigned long parent_ip,
8356 		struct trace_array *tr, struct ftrace_probe_ops *ops,
8357 		void *data)
8358 {
8359 	tracing_snapshot_instance(tr);
8360 }
8361 
8362 static void
8363 ftrace_count_snapshot(unsigned long ip, unsigned long parent_ip,
8364 		      struct trace_array *tr, struct ftrace_probe_ops *ops,
8365 		      void *data)
8366 {
8367 	struct ftrace_func_mapper *mapper = data;
8368 	long *count = NULL;
8369 
8370 	if (mapper)
8371 		count = (long *)ftrace_func_mapper_find_ip(mapper, ip);
8372 
8373 	if (count) {
8374 
8375 		if (*count <= 0)
8376 			return;
8377 
8378 		(*count)--;
8379 	}
8380 
8381 	tracing_snapshot_instance(tr);
8382 }
8383 
8384 static int
8385 ftrace_snapshot_print(struct seq_file *m, unsigned long ip,
8386 		      struct ftrace_probe_ops *ops, void *data)
8387 {
8388 	struct ftrace_func_mapper *mapper = data;
8389 	long *count = NULL;
8390 
8391 	seq_printf(m, "%ps:", (void *)ip);
8392 
8393 	seq_puts(m, "snapshot");
8394 
8395 	if (mapper)
8396 		count = (long *)ftrace_func_mapper_find_ip(mapper, ip);
8397 
8398 	if (count)
8399 		seq_printf(m, ":count=%ld\n", *count);
8400 	else
8401 		seq_puts(m, ":unlimited\n");
8402 
8403 	return 0;
8404 }
8405 
8406 static int
8407 ftrace_snapshot_init(struct ftrace_probe_ops *ops, struct trace_array *tr,
8408 		     unsigned long ip, void *init_data, void **data)
8409 {
8410 	struct ftrace_func_mapper *mapper = *data;
8411 
8412 	if (!mapper) {
8413 		mapper = allocate_ftrace_func_mapper();
8414 		if (!mapper)
8415 			return -ENOMEM;
8416 		*data = mapper;
8417 	}
8418 
8419 	return ftrace_func_mapper_add_ip(mapper, ip, init_data);
8420 }
8421 
8422 static void
8423 ftrace_snapshot_free(struct ftrace_probe_ops *ops, struct trace_array *tr,
8424 		     unsigned long ip, void *data)
8425 {
8426 	struct ftrace_func_mapper *mapper = data;
8427 
8428 	if (!ip) {
8429 		if (!mapper)
8430 			return;
8431 		free_ftrace_func_mapper(mapper, NULL);
8432 		return;
8433 	}
8434 
8435 	ftrace_func_mapper_remove_ip(mapper, ip);
8436 }
8437 
8438 static struct ftrace_probe_ops snapshot_probe_ops = {
8439 	.func			= ftrace_snapshot,
8440 	.print			= ftrace_snapshot_print,
8441 };
8442 
8443 static struct ftrace_probe_ops snapshot_count_probe_ops = {
8444 	.func			= ftrace_count_snapshot,
8445 	.print			= ftrace_snapshot_print,
8446 	.init			= ftrace_snapshot_init,
8447 	.free			= ftrace_snapshot_free,
8448 };
8449 
8450 static int
8451 ftrace_trace_snapshot_callback(struct trace_array *tr, struct ftrace_hash *hash,
8452 			       char *glob, char *cmd, char *param, int enable)
8453 {
8454 	struct ftrace_probe_ops *ops;
8455 	void *count = (void *)-1;
8456 	char *number;
8457 	int ret;
8458 
8459 	if (!tr)
8460 		return -ENODEV;
8461 
8462 	/* hash funcs only work with set_ftrace_filter */
8463 	if (!enable)
8464 		return -EINVAL;
8465 
8466 	ops = param ? &snapshot_count_probe_ops :  &snapshot_probe_ops;
8467 
8468 	if (glob[0] == '!')
8469 		return unregister_ftrace_function_probe_func(glob+1, tr, ops);
8470 
8471 	if (!param)
8472 		goto out_reg;
8473 
8474 	number = strsep(&param, ":");
8475 
8476 	if (!strlen(number))
8477 		goto out_reg;
8478 
8479 	/*
8480 	 * We use the callback data field (which is a pointer)
8481 	 * as our counter.
8482 	 */
8483 	ret = kstrtoul(number, 0, (unsigned long *)&count);
8484 	if (ret)
8485 		return ret;
8486 
8487  out_reg:
8488 	ret = tracing_alloc_snapshot_instance(tr);
8489 	if (ret < 0)
8490 		goto out;
8491 
8492 	ret = register_ftrace_function_probe(glob, tr, ops, count);
8493 
8494  out:
8495 	return ret < 0 ? ret : 0;
8496 }
8497 
8498 static struct ftrace_func_command ftrace_snapshot_cmd = {
8499 	.name			= "snapshot",
8500 	.func			= ftrace_trace_snapshot_callback,
8501 };
8502 
8503 static __init int register_snapshot_cmd(void)
8504 {
8505 	return register_ftrace_command(&ftrace_snapshot_cmd);
8506 }
8507 #else
8508 static inline __init int register_snapshot_cmd(void) { return 0; }
8509 #endif /* defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE) */
8510 
8511 static struct dentry *tracing_get_dentry(struct trace_array *tr)
8512 {
8513 	if (WARN_ON(!tr->dir))
8514 		return ERR_PTR(-ENODEV);
8515 
8516 	/* Top directory uses NULL as the parent */
8517 	if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
8518 		return NULL;
8519 
8520 	/* All sub buffers have a descriptor */
8521 	return tr->dir;
8522 }
8523 
8524 static struct dentry *tracing_dentry_percpu(struct trace_array *tr, int cpu)
8525 {
8526 	struct dentry *d_tracer;
8527 
8528 	if (tr->percpu_dir)
8529 		return tr->percpu_dir;
8530 
8531 	d_tracer = tracing_get_dentry(tr);
8532 	if (IS_ERR(d_tracer))
8533 		return NULL;
8534 
8535 	tr->percpu_dir = tracefs_create_dir("per_cpu", d_tracer);
8536 
8537 	MEM_FAIL(!tr->percpu_dir,
8538 		  "Could not create tracefs directory 'per_cpu/%d'\n", cpu);
8539 
8540 	return tr->percpu_dir;
8541 }
8542 
8543 static struct dentry *
8544 trace_create_cpu_file(const char *name, umode_t mode, struct dentry *parent,
8545 		      void *data, long cpu, const struct file_operations *fops)
8546 {
8547 	struct dentry *ret = trace_create_file(name, mode, parent, data, fops);
8548 
8549 	if (ret) /* See tracing_get_cpu() */
8550 		d_inode(ret)->i_cdev = (void *)(cpu + 1);
8551 	return ret;
8552 }
8553 
8554 static void
8555 tracing_init_tracefs_percpu(struct trace_array *tr, long cpu)
8556 {
8557 	struct dentry *d_percpu = tracing_dentry_percpu(tr, cpu);
8558 	struct dentry *d_cpu;
8559 	char cpu_dir[30]; /* 30 characters should be more than enough */
8560 
8561 	if (!d_percpu)
8562 		return;
8563 
8564 	snprintf(cpu_dir, 30, "cpu%ld", cpu);
8565 	d_cpu = tracefs_create_dir(cpu_dir, d_percpu);
8566 	if (!d_cpu) {
8567 		pr_warn("Could not create tracefs '%s' entry\n", cpu_dir);
8568 		return;
8569 	}
8570 
8571 	/* per cpu trace_pipe */
8572 	trace_create_cpu_file("trace_pipe", 0444, d_cpu,
8573 				tr, cpu, &tracing_pipe_fops);
8574 
8575 	/* per cpu trace */
8576 	trace_create_cpu_file("trace", 0644, d_cpu,
8577 				tr, cpu, &tracing_fops);
8578 
8579 	trace_create_cpu_file("trace_pipe_raw", 0444, d_cpu,
8580 				tr, cpu, &tracing_buffers_fops);
8581 
8582 	trace_create_cpu_file("stats", 0444, d_cpu,
8583 				tr, cpu, &tracing_stats_fops);
8584 
8585 	trace_create_cpu_file("buffer_size_kb", 0444, d_cpu,
8586 				tr, cpu, &tracing_entries_fops);
8587 
8588 #ifdef CONFIG_TRACER_SNAPSHOT
8589 	trace_create_cpu_file("snapshot", 0644, d_cpu,
8590 				tr, cpu, &snapshot_fops);
8591 
8592 	trace_create_cpu_file("snapshot_raw", 0444, d_cpu,
8593 				tr, cpu, &snapshot_raw_fops);
8594 #endif
8595 }
8596 
8597 #ifdef CONFIG_FTRACE_SELFTEST
8598 /* Let selftest have access to static functions in this file */
8599 #include "trace_selftest.c"
8600 #endif
8601 
8602 static ssize_t
8603 trace_options_read(struct file *filp, char __user *ubuf, size_t cnt,
8604 			loff_t *ppos)
8605 {
8606 	struct trace_option_dentry *topt = filp->private_data;
8607 	char *buf;
8608 
8609 	if (topt->flags->val & topt->opt->bit)
8610 		buf = "1\n";
8611 	else
8612 		buf = "0\n";
8613 
8614 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
8615 }
8616 
8617 static ssize_t
8618 trace_options_write(struct file *filp, const char __user *ubuf, size_t cnt,
8619 			 loff_t *ppos)
8620 {
8621 	struct trace_option_dentry *topt = filp->private_data;
8622 	unsigned long val;
8623 	int ret;
8624 
8625 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
8626 	if (ret)
8627 		return ret;
8628 
8629 	if (val != 0 && val != 1)
8630 		return -EINVAL;
8631 
8632 	if (!!(topt->flags->val & topt->opt->bit) != val) {
8633 		mutex_lock(&trace_types_lock);
8634 		ret = __set_tracer_option(topt->tr, topt->flags,
8635 					  topt->opt, !val);
8636 		mutex_unlock(&trace_types_lock);
8637 		if (ret)
8638 			return ret;
8639 	}
8640 
8641 	*ppos += cnt;
8642 
8643 	return cnt;
8644 }
8645 
8646 
8647 static const struct file_operations trace_options_fops = {
8648 	.open = tracing_open_generic,
8649 	.read = trace_options_read,
8650 	.write = trace_options_write,
8651 	.llseek	= generic_file_llseek,
8652 };
8653 
8654 /*
8655  * In order to pass in both the trace_array descriptor as well as the index
8656  * to the flag that the trace option file represents, the trace_array
8657  * has a character array of trace_flags_index[], which holds the index
8658  * of the bit for the flag it represents. index[0] == 0, index[1] == 1, etc.
8659  * The address of this character array is passed to the flag option file
8660  * read/write callbacks.
8661  *
8662  * In order to extract both the index and the trace_array descriptor,
8663  * get_tr_index() uses the following algorithm.
8664  *
8665  *   idx = *ptr;
8666  *
8667  * As the pointer itself contains the address of the index (remember
8668  * index[1] == 1).
8669  *
8670  * Then to get the trace_array descriptor, by subtracting that index
8671  * from the ptr, we get to the start of the index itself.
8672  *
8673  *   ptr - idx == &index[0]
8674  *
8675  * Then a simple container_of() from that pointer gets us to the
8676  * trace_array descriptor.
8677  */
8678 static void get_tr_index(void *data, struct trace_array **ptr,
8679 			 unsigned int *pindex)
8680 {
8681 	*pindex = *(unsigned char *)data;
8682 
8683 	*ptr = container_of(data - *pindex, struct trace_array,
8684 			    trace_flags_index);
8685 }
8686 
8687 static ssize_t
8688 trace_options_core_read(struct file *filp, char __user *ubuf, size_t cnt,
8689 			loff_t *ppos)
8690 {
8691 	void *tr_index = filp->private_data;
8692 	struct trace_array *tr;
8693 	unsigned int index;
8694 	char *buf;
8695 
8696 	get_tr_index(tr_index, &tr, &index);
8697 
8698 	if (tr->trace_flags & (1 << index))
8699 		buf = "1\n";
8700 	else
8701 		buf = "0\n";
8702 
8703 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
8704 }
8705 
8706 static ssize_t
8707 trace_options_core_write(struct file *filp, const char __user *ubuf, size_t cnt,
8708 			 loff_t *ppos)
8709 {
8710 	void *tr_index = filp->private_data;
8711 	struct trace_array *tr;
8712 	unsigned int index;
8713 	unsigned long val;
8714 	int ret;
8715 
8716 	get_tr_index(tr_index, &tr, &index);
8717 
8718 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
8719 	if (ret)
8720 		return ret;
8721 
8722 	if (val != 0 && val != 1)
8723 		return -EINVAL;
8724 
8725 	mutex_lock(&event_mutex);
8726 	mutex_lock(&trace_types_lock);
8727 	ret = set_tracer_flag(tr, 1 << index, val);
8728 	mutex_unlock(&trace_types_lock);
8729 	mutex_unlock(&event_mutex);
8730 
8731 	if (ret < 0)
8732 		return ret;
8733 
8734 	*ppos += cnt;
8735 
8736 	return cnt;
8737 }
8738 
8739 static const struct file_operations trace_options_core_fops = {
8740 	.open = tracing_open_generic,
8741 	.read = trace_options_core_read,
8742 	.write = trace_options_core_write,
8743 	.llseek = generic_file_llseek,
8744 };
8745 
8746 struct dentry *trace_create_file(const char *name,
8747 				 umode_t mode,
8748 				 struct dentry *parent,
8749 				 void *data,
8750 				 const struct file_operations *fops)
8751 {
8752 	struct dentry *ret;
8753 
8754 	ret = tracefs_create_file(name, mode, parent, data, fops);
8755 	if (!ret)
8756 		pr_warn("Could not create tracefs '%s' entry\n", name);
8757 
8758 	return ret;
8759 }
8760 
8761 
8762 static struct dentry *trace_options_init_dentry(struct trace_array *tr)
8763 {
8764 	struct dentry *d_tracer;
8765 
8766 	if (tr->options)
8767 		return tr->options;
8768 
8769 	d_tracer = tracing_get_dentry(tr);
8770 	if (IS_ERR(d_tracer))
8771 		return NULL;
8772 
8773 	tr->options = tracefs_create_dir("options", d_tracer);
8774 	if (!tr->options) {
8775 		pr_warn("Could not create tracefs directory 'options'\n");
8776 		return NULL;
8777 	}
8778 
8779 	return tr->options;
8780 }
8781 
8782 static void
8783 create_trace_option_file(struct trace_array *tr,
8784 			 struct trace_option_dentry *topt,
8785 			 struct tracer_flags *flags,
8786 			 struct tracer_opt *opt)
8787 {
8788 	struct dentry *t_options;
8789 
8790 	t_options = trace_options_init_dentry(tr);
8791 	if (!t_options)
8792 		return;
8793 
8794 	topt->flags = flags;
8795 	topt->opt = opt;
8796 	topt->tr = tr;
8797 
8798 	topt->entry = trace_create_file(opt->name, 0644, t_options, topt,
8799 				    &trace_options_fops);
8800 
8801 }
8802 
8803 static void
8804 create_trace_option_files(struct trace_array *tr, struct tracer *tracer)
8805 {
8806 	struct trace_option_dentry *topts;
8807 	struct trace_options *tr_topts;
8808 	struct tracer_flags *flags;
8809 	struct tracer_opt *opts;
8810 	int cnt;
8811 	int i;
8812 
8813 	if (!tracer)
8814 		return;
8815 
8816 	flags = tracer->flags;
8817 
8818 	if (!flags || !flags->opts)
8819 		return;
8820 
8821 	/*
8822 	 * If this is an instance, only create flags for tracers
8823 	 * the instance may have.
8824 	 */
8825 	if (!trace_ok_for_array(tracer, tr))
8826 		return;
8827 
8828 	for (i = 0; i < tr->nr_topts; i++) {
8829 		/* Make sure there's no duplicate flags. */
8830 		if (WARN_ON_ONCE(tr->topts[i].tracer->flags == tracer->flags))
8831 			return;
8832 	}
8833 
8834 	opts = flags->opts;
8835 
8836 	for (cnt = 0; opts[cnt].name; cnt++)
8837 		;
8838 
8839 	topts = kcalloc(cnt + 1, sizeof(*topts), GFP_KERNEL);
8840 	if (!topts)
8841 		return;
8842 
8843 	tr_topts = krealloc(tr->topts, sizeof(*tr->topts) * (tr->nr_topts + 1),
8844 			    GFP_KERNEL);
8845 	if (!tr_topts) {
8846 		kfree(topts);
8847 		return;
8848 	}
8849 
8850 	tr->topts = tr_topts;
8851 	tr->topts[tr->nr_topts].tracer = tracer;
8852 	tr->topts[tr->nr_topts].topts = topts;
8853 	tr->nr_topts++;
8854 
8855 	for (cnt = 0; opts[cnt].name; cnt++) {
8856 		create_trace_option_file(tr, &topts[cnt], flags,
8857 					 &opts[cnt]);
8858 		MEM_FAIL(topts[cnt].entry == NULL,
8859 			  "Failed to create trace option: %s",
8860 			  opts[cnt].name);
8861 	}
8862 }
8863 
8864 static struct dentry *
8865 create_trace_option_core_file(struct trace_array *tr,
8866 			      const char *option, long index)
8867 {
8868 	struct dentry *t_options;
8869 
8870 	t_options = trace_options_init_dentry(tr);
8871 	if (!t_options)
8872 		return NULL;
8873 
8874 	return trace_create_file(option, 0644, t_options,
8875 				 (void *)&tr->trace_flags_index[index],
8876 				 &trace_options_core_fops);
8877 }
8878 
8879 static void create_trace_options_dir(struct trace_array *tr)
8880 {
8881 	struct dentry *t_options;
8882 	bool top_level = tr == &global_trace;
8883 	int i;
8884 
8885 	t_options = trace_options_init_dentry(tr);
8886 	if (!t_options)
8887 		return;
8888 
8889 	for (i = 0; trace_options[i]; i++) {
8890 		if (top_level ||
8891 		    !((1 << i) & TOP_LEVEL_TRACE_FLAGS))
8892 			create_trace_option_core_file(tr, trace_options[i], i);
8893 	}
8894 }
8895 
8896 static ssize_t
8897 rb_simple_read(struct file *filp, char __user *ubuf,
8898 	       size_t cnt, loff_t *ppos)
8899 {
8900 	struct trace_array *tr = filp->private_data;
8901 	char buf[64];
8902 	int r;
8903 
8904 	r = tracer_tracing_is_on(tr);
8905 	r = sprintf(buf, "%d\n", r);
8906 
8907 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
8908 }
8909 
8910 static ssize_t
8911 rb_simple_write(struct file *filp, const char __user *ubuf,
8912 		size_t cnt, loff_t *ppos)
8913 {
8914 	struct trace_array *tr = filp->private_data;
8915 	struct trace_buffer *buffer = tr->array_buffer.buffer;
8916 	unsigned long val;
8917 	int ret;
8918 
8919 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
8920 	if (ret)
8921 		return ret;
8922 
8923 	if (buffer) {
8924 		mutex_lock(&trace_types_lock);
8925 		if (!!val == tracer_tracing_is_on(tr)) {
8926 			val = 0; /* do nothing */
8927 		} else if (val) {
8928 			tracer_tracing_on(tr);
8929 			if (tr->current_trace->start)
8930 				tr->current_trace->start(tr);
8931 		} else {
8932 			tracer_tracing_off(tr);
8933 			if (tr->current_trace->stop)
8934 				tr->current_trace->stop(tr);
8935 		}
8936 		mutex_unlock(&trace_types_lock);
8937 	}
8938 
8939 	(*ppos)++;
8940 
8941 	return cnt;
8942 }
8943 
8944 static const struct file_operations rb_simple_fops = {
8945 	.open		= tracing_open_generic_tr,
8946 	.read		= rb_simple_read,
8947 	.write		= rb_simple_write,
8948 	.release	= tracing_release_generic_tr,
8949 	.llseek		= default_llseek,
8950 };
8951 
8952 static ssize_t
8953 buffer_percent_read(struct file *filp, char __user *ubuf,
8954 		    size_t cnt, loff_t *ppos)
8955 {
8956 	struct trace_array *tr = filp->private_data;
8957 	char buf[64];
8958 	int r;
8959 
8960 	r = tr->buffer_percent;
8961 	r = sprintf(buf, "%d\n", r);
8962 
8963 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
8964 }
8965 
8966 static ssize_t
8967 buffer_percent_write(struct file *filp, const char __user *ubuf,
8968 		     size_t cnt, loff_t *ppos)
8969 {
8970 	struct trace_array *tr = filp->private_data;
8971 	unsigned long val;
8972 	int ret;
8973 
8974 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
8975 	if (ret)
8976 		return ret;
8977 
8978 	if (val > 100)
8979 		return -EINVAL;
8980 
8981 	if (!val)
8982 		val = 1;
8983 
8984 	tr->buffer_percent = val;
8985 
8986 	(*ppos)++;
8987 
8988 	return cnt;
8989 }
8990 
8991 static const struct file_operations buffer_percent_fops = {
8992 	.open		= tracing_open_generic_tr,
8993 	.read		= buffer_percent_read,
8994 	.write		= buffer_percent_write,
8995 	.release	= tracing_release_generic_tr,
8996 	.llseek		= default_llseek,
8997 };
8998 
8999 static struct dentry *trace_instance_dir;
9000 
9001 static void
9002 init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer);
9003 
9004 static int
9005 allocate_trace_buffer(struct trace_array *tr, struct array_buffer *buf, int size)
9006 {
9007 	enum ring_buffer_flags rb_flags;
9008 
9009 	rb_flags = tr->trace_flags & TRACE_ITER_OVERWRITE ? RB_FL_OVERWRITE : 0;
9010 
9011 	buf->tr = tr;
9012 
9013 	buf->buffer = ring_buffer_alloc(size, rb_flags);
9014 	if (!buf->buffer)
9015 		return -ENOMEM;
9016 
9017 	buf->data = alloc_percpu(struct trace_array_cpu);
9018 	if (!buf->data) {
9019 		ring_buffer_free(buf->buffer);
9020 		buf->buffer = NULL;
9021 		return -ENOMEM;
9022 	}
9023 
9024 	/* Allocate the first page for all buffers */
9025 	set_buffer_entries(&tr->array_buffer,
9026 			   ring_buffer_size(tr->array_buffer.buffer, 0));
9027 
9028 	return 0;
9029 }
9030 
9031 static int allocate_trace_buffers(struct trace_array *tr, int size)
9032 {
9033 	int ret;
9034 
9035 	ret = allocate_trace_buffer(tr, &tr->array_buffer, size);
9036 	if (ret)
9037 		return ret;
9038 
9039 #ifdef CONFIG_TRACER_MAX_TRACE
9040 	ret = allocate_trace_buffer(tr, &tr->max_buffer,
9041 				    allocate_snapshot ? size : 1);
9042 	if (MEM_FAIL(ret, "Failed to allocate trace buffer\n")) {
9043 		ring_buffer_free(tr->array_buffer.buffer);
9044 		tr->array_buffer.buffer = NULL;
9045 		free_percpu(tr->array_buffer.data);
9046 		tr->array_buffer.data = NULL;
9047 		return -ENOMEM;
9048 	}
9049 	tr->allocated_snapshot = allocate_snapshot;
9050 
9051 	/*
9052 	 * Only the top level trace array gets its snapshot allocated
9053 	 * from the kernel command line.
9054 	 */
9055 	allocate_snapshot = false;
9056 #endif
9057 
9058 	return 0;
9059 }
9060 
9061 static void free_trace_buffer(struct array_buffer *buf)
9062 {
9063 	if (buf->buffer) {
9064 		ring_buffer_free(buf->buffer);
9065 		buf->buffer = NULL;
9066 		free_percpu(buf->data);
9067 		buf->data = NULL;
9068 	}
9069 }
9070 
9071 static void free_trace_buffers(struct trace_array *tr)
9072 {
9073 	if (!tr)
9074 		return;
9075 
9076 	free_trace_buffer(&tr->array_buffer);
9077 
9078 #ifdef CONFIG_TRACER_MAX_TRACE
9079 	free_trace_buffer(&tr->max_buffer);
9080 #endif
9081 }
9082 
9083 static void init_trace_flags_index(struct trace_array *tr)
9084 {
9085 	int i;
9086 
9087 	/* Used by the trace options files */
9088 	for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++)
9089 		tr->trace_flags_index[i] = i;
9090 }
9091 
9092 static void __update_tracer_options(struct trace_array *tr)
9093 {
9094 	struct tracer *t;
9095 
9096 	for (t = trace_types; t; t = t->next)
9097 		add_tracer_options(tr, t);
9098 }
9099 
9100 static void update_tracer_options(struct trace_array *tr)
9101 {
9102 	mutex_lock(&trace_types_lock);
9103 	__update_tracer_options(tr);
9104 	mutex_unlock(&trace_types_lock);
9105 }
9106 
9107 /* Must have trace_types_lock held */
9108 struct trace_array *trace_array_find(const char *instance)
9109 {
9110 	struct trace_array *tr, *found = NULL;
9111 
9112 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
9113 		if (tr->name && strcmp(tr->name, instance) == 0) {
9114 			found = tr;
9115 			break;
9116 		}
9117 	}
9118 
9119 	return found;
9120 }
9121 
9122 struct trace_array *trace_array_find_get(const char *instance)
9123 {
9124 	struct trace_array *tr;
9125 
9126 	mutex_lock(&trace_types_lock);
9127 	tr = trace_array_find(instance);
9128 	if (tr)
9129 		tr->ref++;
9130 	mutex_unlock(&trace_types_lock);
9131 
9132 	return tr;
9133 }
9134 
9135 static int trace_array_create_dir(struct trace_array *tr)
9136 {
9137 	int ret;
9138 
9139 	tr->dir = tracefs_create_dir(tr->name, trace_instance_dir);
9140 	if (!tr->dir)
9141 		return -EINVAL;
9142 
9143 	ret = event_trace_add_tracer(tr->dir, tr);
9144 	if (ret)
9145 		tracefs_remove(tr->dir);
9146 
9147 	init_tracer_tracefs(tr, tr->dir);
9148 	__update_tracer_options(tr);
9149 
9150 	return ret;
9151 }
9152 
9153 static struct trace_array *trace_array_create(const char *name)
9154 {
9155 	struct trace_array *tr;
9156 	int ret;
9157 
9158 	ret = -ENOMEM;
9159 	tr = kzalloc(sizeof(*tr), GFP_KERNEL);
9160 	if (!tr)
9161 		return ERR_PTR(ret);
9162 
9163 	tr->name = kstrdup(name, GFP_KERNEL);
9164 	if (!tr->name)
9165 		goto out_free_tr;
9166 
9167 	if (!alloc_cpumask_var(&tr->tracing_cpumask, GFP_KERNEL))
9168 		goto out_free_tr;
9169 
9170 	tr->trace_flags = global_trace.trace_flags & ~ZEROED_TRACE_FLAGS;
9171 
9172 	cpumask_copy(tr->tracing_cpumask, cpu_all_mask);
9173 
9174 	raw_spin_lock_init(&tr->start_lock);
9175 
9176 	tr->max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
9177 
9178 	tr->current_trace = &nop_trace;
9179 
9180 	INIT_LIST_HEAD(&tr->systems);
9181 	INIT_LIST_HEAD(&tr->events);
9182 	INIT_LIST_HEAD(&tr->hist_vars);
9183 	INIT_LIST_HEAD(&tr->err_log);
9184 
9185 	if (allocate_trace_buffers(tr, trace_buf_size) < 0)
9186 		goto out_free_tr;
9187 
9188 	if (ftrace_allocate_ftrace_ops(tr) < 0)
9189 		goto out_free_tr;
9190 
9191 	ftrace_init_trace_array(tr);
9192 
9193 	init_trace_flags_index(tr);
9194 
9195 	if (trace_instance_dir) {
9196 		ret = trace_array_create_dir(tr);
9197 		if (ret)
9198 			goto out_free_tr;
9199 	} else
9200 		__trace_early_add_events(tr);
9201 
9202 	list_add(&tr->list, &ftrace_trace_arrays);
9203 
9204 	tr->ref++;
9205 
9206 	return tr;
9207 
9208  out_free_tr:
9209 	ftrace_free_ftrace_ops(tr);
9210 	free_trace_buffers(tr);
9211 	free_cpumask_var(tr->tracing_cpumask);
9212 	kfree(tr->name);
9213 	kfree(tr);
9214 
9215 	return ERR_PTR(ret);
9216 }
9217 
9218 static int instance_mkdir(const char *name)
9219 {
9220 	struct trace_array *tr;
9221 	int ret;
9222 
9223 	mutex_lock(&event_mutex);
9224 	mutex_lock(&trace_types_lock);
9225 
9226 	ret = -EEXIST;
9227 	if (trace_array_find(name))
9228 		goto out_unlock;
9229 
9230 	tr = trace_array_create(name);
9231 
9232 	ret = PTR_ERR_OR_ZERO(tr);
9233 
9234 out_unlock:
9235 	mutex_unlock(&trace_types_lock);
9236 	mutex_unlock(&event_mutex);
9237 	return ret;
9238 }
9239 
9240 /**
9241  * trace_array_get_by_name - Create/Lookup a trace array, given its name.
9242  * @name: The name of the trace array to be looked up/created.
9243  *
9244  * Returns pointer to trace array with given name.
9245  * NULL, if it cannot be created.
9246  *
9247  * NOTE: This function increments the reference counter associated with the
9248  * trace array returned. This makes sure it cannot be freed while in use.
9249  * Use trace_array_put() once the trace array is no longer needed.
9250  * If the trace_array is to be freed, trace_array_destroy() needs to
9251  * be called after the trace_array_put(), or simply let user space delete
9252  * it from the tracefs instances directory. But until the
9253  * trace_array_put() is called, user space can not delete it.
9254  *
9255  */
9256 struct trace_array *trace_array_get_by_name(const char *name)
9257 {
9258 	struct trace_array *tr;
9259 
9260 	mutex_lock(&event_mutex);
9261 	mutex_lock(&trace_types_lock);
9262 
9263 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
9264 		if (tr->name && strcmp(tr->name, name) == 0)
9265 			goto out_unlock;
9266 	}
9267 
9268 	tr = trace_array_create(name);
9269 
9270 	if (IS_ERR(tr))
9271 		tr = NULL;
9272 out_unlock:
9273 	if (tr)
9274 		tr->ref++;
9275 
9276 	mutex_unlock(&trace_types_lock);
9277 	mutex_unlock(&event_mutex);
9278 	return tr;
9279 }
9280 EXPORT_SYMBOL_GPL(trace_array_get_by_name);
9281 
9282 static int __remove_instance(struct trace_array *tr)
9283 {
9284 	int i;
9285 
9286 	/* Reference counter for a newly created trace array = 1. */
9287 	if (tr->ref > 1 || (tr->current_trace && tr->trace_ref))
9288 		return -EBUSY;
9289 
9290 	list_del(&tr->list);
9291 
9292 	/* Disable all the flags that were enabled coming in */
9293 	for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++) {
9294 		if ((1 << i) & ZEROED_TRACE_FLAGS)
9295 			set_tracer_flag(tr, 1 << i, 0);
9296 	}
9297 
9298 	tracing_set_nop(tr);
9299 	clear_ftrace_function_probes(tr);
9300 	event_trace_del_tracer(tr);
9301 	ftrace_clear_pids(tr);
9302 	ftrace_destroy_function_files(tr);
9303 	tracefs_remove(tr->dir);
9304 	free_percpu(tr->last_func_repeats);
9305 	free_trace_buffers(tr);
9306 
9307 	for (i = 0; i < tr->nr_topts; i++) {
9308 		kfree(tr->topts[i].topts);
9309 	}
9310 	kfree(tr->topts);
9311 
9312 	free_cpumask_var(tr->tracing_cpumask);
9313 	kfree(tr->name);
9314 	kfree(tr);
9315 
9316 	return 0;
9317 }
9318 
9319 int trace_array_destroy(struct trace_array *this_tr)
9320 {
9321 	struct trace_array *tr;
9322 	int ret;
9323 
9324 	if (!this_tr)
9325 		return -EINVAL;
9326 
9327 	mutex_lock(&event_mutex);
9328 	mutex_lock(&trace_types_lock);
9329 
9330 	ret = -ENODEV;
9331 
9332 	/* Making sure trace array exists before destroying it. */
9333 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
9334 		if (tr == this_tr) {
9335 			ret = __remove_instance(tr);
9336 			break;
9337 		}
9338 	}
9339 
9340 	mutex_unlock(&trace_types_lock);
9341 	mutex_unlock(&event_mutex);
9342 
9343 	return ret;
9344 }
9345 EXPORT_SYMBOL_GPL(trace_array_destroy);
9346 
9347 static int instance_rmdir(const char *name)
9348 {
9349 	struct trace_array *tr;
9350 	int ret;
9351 
9352 	mutex_lock(&event_mutex);
9353 	mutex_lock(&trace_types_lock);
9354 
9355 	ret = -ENODEV;
9356 	tr = trace_array_find(name);
9357 	if (tr)
9358 		ret = __remove_instance(tr);
9359 
9360 	mutex_unlock(&trace_types_lock);
9361 	mutex_unlock(&event_mutex);
9362 
9363 	return ret;
9364 }
9365 
9366 static __init void create_trace_instances(struct dentry *d_tracer)
9367 {
9368 	struct trace_array *tr;
9369 
9370 	trace_instance_dir = tracefs_create_instance_dir("instances", d_tracer,
9371 							 instance_mkdir,
9372 							 instance_rmdir);
9373 	if (MEM_FAIL(!trace_instance_dir, "Failed to create instances directory\n"))
9374 		return;
9375 
9376 	mutex_lock(&event_mutex);
9377 	mutex_lock(&trace_types_lock);
9378 
9379 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
9380 		if (!tr->name)
9381 			continue;
9382 		if (MEM_FAIL(trace_array_create_dir(tr) < 0,
9383 			     "Failed to create instance directory\n"))
9384 			break;
9385 	}
9386 
9387 	mutex_unlock(&trace_types_lock);
9388 	mutex_unlock(&event_mutex);
9389 }
9390 
9391 static void
9392 init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer)
9393 {
9394 	struct trace_event_file *file;
9395 	int cpu;
9396 
9397 	trace_create_file("available_tracers", 0444, d_tracer,
9398 			tr, &show_traces_fops);
9399 
9400 	trace_create_file("current_tracer", 0644, d_tracer,
9401 			tr, &set_tracer_fops);
9402 
9403 	trace_create_file("tracing_cpumask", 0644, d_tracer,
9404 			  tr, &tracing_cpumask_fops);
9405 
9406 	trace_create_file("trace_options", 0644, d_tracer,
9407 			  tr, &tracing_iter_fops);
9408 
9409 	trace_create_file("trace", 0644, d_tracer,
9410 			  tr, &tracing_fops);
9411 
9412 	trace_create_file("trace_pipe", 0444, d_tracer,
9413 			  tr, &tracing_pipe_fops);
9414 
9415 	trace_create_file("buffer_size_kb", 0644, d_tracer,
9416 			  tr, &tracing_entries_fops);
9417 
9418 	trace_create_file("buffer_total_size_kb", 0444, d_tracer,
9419 			  tr, &tracing_total_entries_fops);
9420 
9421 	trace_create_file("free_buffer", 0200, d_tracer,
9422 			  tr, &tracing_free_buffer_fops);
9423 
9424 	trace_create_file("trace_marker", 0220, d_tracer,
9425 			  tr, &tracing_mark_fops);
9426 
9427 	file = __find_event_file(tr, "ftrace", "print");
9428 	if (file && file->dir)
9429 		trace_create_file("trigger", 0644, file->dir, file,
9430 				  &event_trigger_fops);
9431 	tr->trace_marker_file = file;
9432 
9433 	trace_create_file("trace_marker_raw", 0220, d_tracer,
9434 			  tr, &tracing_mark_raw_fops);
9435 
9436 	trace_create_file("trace_clock", 0644, d_tracer, tr,
9437 			  &trace_clock_fops);
9438 
9439 	trace_create_file("tracing_on", 0644, d_tracer,
9440 			  tr, &rb_simple_fops);
9441 
9442 	trace_create_file("timestamp_mode", 0444, d_tracer, tr,
9443 			  &trace_time_stamp_mode_fops);
9444 
9445 	tr->buffer_percent = 50;
9446 
9447 	trace_create_file("buffer_percent", 0444, d_tracer,
9448 			tr, &buffer_percent_fops);
9449 
9450 	create_trace_options_dir(tr);
9451 
9452 #if defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)
9453 	trace_create_maxlat_file(tr, d_tracer);
9454 #endif
9455 
9456 	if (ftrace_create_function_files(tr, d_tracer))
9457 		MEM_FAIL(1, "Could not allocate function filter files");
9458 
9459 #ifdef CONFIG_TRACER_SNAPSHOT
9460 	trace_create_file("snapshot", 0644, d_tracer,
9461 			  tr, &snapshot_fops);
9462 #endif
9463 
9464 	trace_create_file("error_log", 0644, d_tracer,
9465 			  tr, &tracing_err_log_fops);
9466 
9467 	for_each_tracing_cpu(cpu)
9468 		tracing_init_tracefs_percpu(tr, cpu);
9469 
9470 	ftrace_init_tracefs(tr, d_tracer);
9471 }
9472 
9473 static struct vfsmount *trace_automount(struct dentry *mntpt, void *ingore)
9474 {
9475 	struct vfsmount *mnt;
9476 	struct file_system_type *type;
9477 
9478 	/*
9479 	 * To maintain backward compatibility for tools that mount
9480 	 * debugfs to get to the tracing facility, tracefs is automatically
9481 	 * mounted to the debugfs/tracing directory.
9482 	 */
9483 	type = get_fs_type("tracefs");
9484 	if (!type)
9485 		return NULL;
9486 	mnt = vfs_submount(mntpt, type, "tracefs", NULL);
9487 	put_filesystem(type);
9488 	if (IS_ERR(mnt))
9489 		return NULL;
9490 	mntget(mnt);
9491 
9492 	return mnt;
9493 }
9494 
9495 /**
9496  * tracing_init_dentry - initialize top level trace array
9497  *
9498  * This is called when creating files or directories in the tracing
9499  * directory. It is called via fs_initcall() by any of the boot up code
9500  * and expects to return the dentry of the top level tracing directory.
9501  */
9502 int tracing_init_dentry(void)
9503 {
9504 	struct trace_array *tr = &global_trace;
9505 
9506 	if (security_locked_down(LOCKDOWN_TRACEFS)) {
9507 		pr_warn("Tracing disabled due to lockdown\n");
9508 		return -EPERM;
9509 	}
9510 
9511 	/* The top level trace array uses  NULL as parent */
9512 	if (tr->dir)
9513 		return 0;
9514 
9515 	if (WARN_ON(!tracefs_initialized()))
9516 		return -ENODEV;
9517 
9518 	/*
9519 	 * As there may still be users that expect the tracing
9520 	 * files to exist in debugfs/tracing, we must automount
9521 	 * the tracefs file system there, so older tools still
9522 	 * work with the newer kernel.
9523 	 */
9524 	tr->dir = debugfs_create_automount("tracing", NULL,
9525 					   trace_automount, NULL);
9526 
9527 	return 0;
9528 }
9529 
9530 extern struct trace_eval_map *__start_ftrace_eval_maps[];
9531 extern struct trace_eval_map *__stop_ftrace_eval_maps[];
9532 
9533 static struct workqueue_struct *eval_map_wq __initdata;
9534 static struct work_struct eval_map_work __initdata;
9535 
9536 static void __init eval_map_work_func(struct work_struct *work)
9537 {
9538 	int len;
9539 
9540 	len = __stop_ftrace_eval_maps - __start_ftrace_eval_maps;
9541 	trace_insert_eval_map(NULL, __start_ftrace_eval_maps, len);
9542 }
9543 
9544 static int __init trace_eval_init(void)
9545 {
9546 	INIT_WORK(&eval_map_work, eval_map_work_func);
9547 
9548 	eval_map_wq = alloc_workqueue("eval_map_wq", WQ_UNBOUND, 0);
9549 	if (!eval_map_wq) {
9550 		pr_err("Unable to allocate eval_map_wq\n");
9551 		/* Do work here */
9552 		eval_map_work_func(&eval_map_work);
9553 		return -ENOMEM;
9554 	}
9555 
9556 	queue_work(eval_map_wq, &eval_map_work);
9557 	return 0;
9558 }
9559 
9560 static int __init trace_eval_sync(void)
9561 {
9562 	/* Make sure the eval map updates are finished */
9563 	if (eval_map_wq)
9564 		destroy_workqueue(eval_map_wq);
9565 	return 0;
9566 }
9567 
9568 late_initcall_sync(trace_eval_sync);
9569 
9570 
9571 #ifdef CONFIG_MODULES
9572 static void trace_module_add_evals(struct module *mod)
9573 {
9574 	if (!mod->num_trace_evals)
9575 		return;
9576 
9577 	/*
9578 	 * Modules with bad taint do not have events created, do
9579 	 * not bother with enums either.
9580 	 */
9581 	if (trace_module_has_bad_taint(mod))
9582 		return;
9583 
9584 	trace_insert_eval_map(mod, mod->trace_evals, mod->num_trace_evals);
9585 }
9586 
9587 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
9588 static void trace_module_remove_evals(struct module *mod)
9589 {
9590 	union trace_eval_map_item *map;
9591 	union trace_eval_map_item **last = &trace_eval_maps;
9592 
9593 	if (!mod->num_trace_evals)
9594 		return;
9595 
9596 	mutex_lock(&trace_eval_mutex);
9597 
9598 	map = trace_eval_maps;
9599 
9600 	while (map) {
9601 		if (map->head.mod == mod)
9602 			break;
9603 		map = trace_eval_jmp_to_tail(map);
9604 		last = &map->tail.next;
9605 		map = map->tail.next;
9606 	}
9607 	if (!map)
9608 		goto out;
9609 
9610 	*last = trace_eval_jmp_to_tail(map)->tail.next;
9611 	kfree(map);
9612  out:
9613 	mutex_unlock(&trace_eval_mutex);
9614 }
9615 #else
9616 static inline void trace_module_remove_evals(struct module *mod) { }
9617 #endif /* CONFIG_TRACE_EVAL_MAP_FILE */
9618 
9619 static int trace_module_notify(struct notifier_block *self,
9620 			       unsigned long val, void *data)
9621 {
9622 	struct module *mod = data;
9623 
9624 	switch (val) {
9625 	case MODULE_STATE_COMING:
9626 		trace_module_add_evals(mod);
9627 		break;
9628 	case MODULE_STATE_GOING:
9629 		trace_module_remove_evals(mod);
9630 		break;
9631 	}
9632 
9633 	return NOTIFY_OK;
9634 }
9635 
9636 static struct notifier_block trace_module_nb = {
9637 	.notifier_call = trace_module_notify,
9638 	.priority = 0,
9639 };
9640 #endif /* CONFIG_MODULES */
9641 
9642 static __init int tracer_init_tracefs(void)
9643 {
9644 	int ret;
9645 
9646 	trace_access_lock_init();
9647 
9648 	ret = tracing_init_dentry();
9649 	if (ret)
9650 		return 0;
9651 
9652 	event_trace_init();
9653 
9654 	init_tracer_tracefs(&global_trace, NULL);
9655 	ftrace_init_tracefs_toplevel(&global_trace, NULL);
9656 
9657 	trace_create_file("tracing_thresh", 0644, NULL,
9658 			&global_trace, &tracing_thresh_fops);
9659 
9660 	trace_create_file("README", 0444, NULL,
9661 			NULL, &tracing_readme_fops);
9662 
9663 	trace_create_file("saved_cmdlines", 0444, NULL,
9664 			NULL, &tracing_saved_cmdlines_fops);
9665 
9666 	trace_create_file("saved_cmdlines_size", 0644, NULL,
9667 			  NULL, &tracing_saved_cmdlines_size_fops);
9668 
9669 	trace_create_file("saved_tgids", 0444, NULL,
9670 			NULL, &tracing_saved_tgids_fops);
9671 
9672 	trace_eval_init();
9673 
9674 	trace_create_eval_file(NULL);
9675 
9676 #ifdef CONFIG_MODULES
9677 	register_module_notifier(&trace_module_nb);
9678 #endif
9679 
9680 #ifdef CONFIG_DYNAMIC_FTRACE
9681 	trace_create_file("dyn_ftrace_total_info", 0444, NULL,
9682 			NULL, &tracing_dyn_info_fops);
9683 #endif
9684 
9685 	create_trace_instances(NULL);
9686 
9687 	update_tracer_options(&global_trace);
9688 
9689 	return 0;
9690 }
9691 
9692 fs_initcall(tracer_init_tracefs);
9693 
9694 static int trace_panic_handler(struct notifier_block *this,
9695 			       unsigned long event, void *unused)
9696 {
9697 	if (ftrace_dump_on_oops)
9698 		ftrace_dump(ftrace_dump_on_oops);
9699 	return NOTIFY_OK;
9700 }
9701 
9702 static struct notifier_block trace_panic_notifier = {
9703 	.notifier_call  = trace_panic_handler,
9704 	.next           = NULL,
9705 	.priority       = 150   /* priority: INT_MAX >= x >= 0 */
9706 };
9707 
9708 static int trace_die_handler(struct notifier_block *self,
9709 			     unsigned long val,
9710 			     void *data)
9711 {
9712 	switch (val) {
9713 	case DIE_OOPS:
9714 		if (ftrace_dump_on_oops)
9715 			ftrace_dump(ftrace_dump_on_oops);
9716 		break;
9717 	default:
9718 		break;
9719 	}
9720 	return NOTIFY_OK;
9721 }
9722 
9723 static struct notifier_block trace_die_notifier = {
9724 	.notifier_call = trace_die_handler,
9725 	.priority = 200
9726 };
9727 
9728 /*
9729  * printk is set to max of 1024, we really don't need it that big.
9730  * Nothing should be printing 1000 characters anyway.
9731  */
9732 #define TRACE_MAX_PRINT		1000
9733 
9734 /*
9735  * Define here KERN_TRACE so that we have one place to modify
9736  * it if we decide to change what log level the ftrace dump
9737  * should be at.
9738  */
9739 #define KERN_TRACE		KERN_EMERG
9740 
9741 void
9742 trace_printk_seq(struct trace_seq *s)
9743 {
9744 	/* Probably should print a warning here. */
9745 	if (s->seq.len >= TRACE_MAX_PRINT)
9746 		s->seq.len = TRACE_MAX_PRINT;
9747 
9748 	/*
9749 	 * More paranoid code. Although the buffer size is set to
9750 	 * PAGE_SIZE, and TRACE_MAX_PRINT is 1000, this is just
9751 	 * an extra layer of protection.
9752 	 */
9753 	if (WARN_ON_ONCE(s->seq.len >= s->seq.size))
9754 		s->seq.len = s->seq.size - 1;
9755 
9756 	/* should be zero ended, but we are paranoid. */
9757 	s->buffer[s->seq.len] = 0;
9758 
9759 	printk(KERN_TRACE "%s", s->buffer);
9760 
9761 	trace_seq_init(s);
9762 }
9763 
9764 void trace_init_global_iter(struct trace_iterator *iter)
9765 {
9766 	iter->tr = &global_trace;
9767 	iter->trace = iter->tr->current_trace;
9768 	iter->cpu_file = RING_BUFFER_ALL_CPUS;
9769 	iter->array_buffer = &global_trace.array_buffer;
9770 
9771 	if (iter->trace && iter->trace->open)
9772 		iter->trace->open(iter);
9773 
9774 	/* Annotate start of buffers if we had overruns */
9775 	if (ring_buffer_overruns(iter->array_buffer->buffer))
9776 		iter->iter_flags |= TRACE_FILE_ANNOTATE;
9777 
9778 	/* Output in nanoseconds only if we are using a clock in nanoseconds. */
9779 	if (trace_clocks[iter->tr->clock_id].in_ns)
9780 		iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
9781 }
9782 
9783 void ftrace_dump(enum ftrace_dump_mode oops_dump_mode)
9784 {
9785 	/* use static because iter can be a bit big for the stack */
9786 	static struct trace_iterator iter;
9787 	static atomic_t dump_running;
9788 	struct trace_array *tr = &global_trace;
9789 	unsigned int old_userobj;
9790 	unsigned long flags;
9791 	int cnt = 0, cpu;
9792 
9793 	/* Only allow one dump user at a time. */
9794 	if (atomic_inc_return(&dump_running) != 1) {
9795 		atomic_dec(&dump_running);
9796 		return;
9797 	}
9798 
9799 	/*
9800 	 * Always turn off tracing when we dump.
9801 	 * We don't need to show trace output of what happens
9802 	 * between multiple crashes.
9803 	 *
9804 	 * If the user does a sysrq-z, then they can re-enable
9805 	 * tracing with echo 1 > tracing_on.
9806 	 */
9807 	tracing_off();
9808 
9809 	local_irq_save(flags);
9810 	printk_nmi_direct_enter();
9811 
9812 	/* Simulate the iterator */
9813 	trace_init_global_iter(&iter);
9814 	/* Can not use kmalloc for iter.temp and iter.fmt */
9815 	iter.temp = static_temp_buf;
9816 	iter.temp_size = STATIC_TEMP_BUF_SIZE;
9817 	iter.fmt = static_fmt_buf;
9818 	iter.fmt_size = STATIC_FMT_BUF_SIZE;
9819 
9820 	for_each_tracing_cpu(cpu) {
9821 		atomic_inc(&per_cpu_ptr(iter.array_buffer->data, cpu)->disabled);
9822 	}
9823 
9824 	old_userobj = tr->trace_flags & TRACE_ITER_SYM_USEROBJ;
9825 
9826 	/* don't look at user memory in panic mode */
9827 	tr->trace_flags &= ~TRACE_ITER_SYM_USEROBJ;
9828 
9829 	switch (oops_dump_mode) {
9830 	case DUMP_ALL:
9831 		iter.cpu_file = RING_BUFFER_ALL_CPUS;
9832 		break;
9833 	case DUMP_ORIG:
9834 		iter.cpu_file = raw_smp_processor_id();
9835 		break;
9836 	case DUMP_NONE:
9837 		goto out_enable;
9838 	default:
9839 		printk(KERN_TRACE "Bad dumping mode, switching to all CPUs dump\n");
9840 		iter.cpu_file = RING_BUFFER_ALL_CPUS;
9841 	}
9842 
9843 	printk(KERN_TRACE "Dumping ftrace buffer:\n");
9844 
9845 	/* Did function tracer already get disabled? */
9846 	if (ftrace_is_dead()) {
9847 		printk("# WARNING: FUNCTION TRACING IS CORRUPTED\n");
9848 		printk("#          MAY BE MISSING FUNCTION EVENTS\n");
9849 	}
9850 
9851 	/*
9852 	 * We need to stop all tracing on all CPUS to read
9853 	 * the next buffer. This is a bit expensive, but is
9854 	 * not done often. We fill all what we can read,
9855 	 * and then release the locks again.
9856 	 */
9857 
9858 	while (!trace_empty(&iter)) {
9859 
9860 		if (!cnt)
9861 			printk(KERN_TRACE "---------------------------------\n");
9862 
9863 		cnt++;
9864 
9865 		trace_iterator_reset(&iter);
9866 		iter.iter_flags |= TRACE_FILE_LAT_FMT;
9867 
9868 		if (trace_find_next_entry_inc(&iter) != NULL) {
9869 			int ret;
9870 
9871 			ret = print_trace_line(&iter);
9872 			if (ret != TRACE_TYPE_NO_CONSUME)
9873 				trace_consume(&iter);
9874 		}
9875 		touch_nmi_watchdog();
9876 
9877 		trace_printk_seq(&iter.seq);
9878 	}
9879 
9880 	if (!cnt)
9881 		printk(KERN_TRACE "   (ftrace buffer empty)\n");
9882 	else
9883 		printk(KERN_TRACE "---------------------------------\n");
9884 
9885  out_enable:
9886 	tr->trace_flags |= old_userobj;
9887 
9888 	for_each_tracing_cpu(cpu) {
9889 		atomic_dec(&per_cpu_ptr(iter.array_buffer->data, cpu)->disabled);
9890 	}
9891 	atomic_dec(&dump_running);
9892 	printk_nmi_direct_exit();
9893 	local_irq_restore(flags);
9894 }
9895 EXPORT_SYMBOL_GPL(ftrace_dump);
9896 
9897 #define WRITE_BUFSIZE  4096
9898 
9899 ssize_t trace_parse_run_command(struct file *file, const char __user *buffer,
9900 				size_t count, loff_t *ppos,
9901 				int (*createfn)(const char *))
9902 {
9903 	char *kbuf, *buf, *tmp;
9904 	int ret = 0;
9905 	size_t done = 0;
9906 	size_t size;
9907 
9908 	kbuf = kmalloc(WRITE_BUFSIZE, GFP_KERNEL);
9909 	if (!kbuf)
9910 		return -ENOMEM;
9911 
9912 	while (done < count) {
9913 		size = count - done;
9914 
9915 		if (size >= WRITE_BUFSIZE)
9916 			size = WRITE_BUFSIZE - 1;
9917 
9918 		if (copy_from_user(kbuf, buffer + done, size)) {
9919 			ret = -EFAULT;
9920 			goto out;
9921 		}
9922 		kbuf[size] = '\0';
9923 		buf = kbuf;
9924 		do {
9925 			tmp = strchr(buf, '\n');
9926 			if (tmp) {
9927 				*tmp = '\0';
9928 				size = tmp - buf + 1;
9929 			} else {
9930 				size = strlen(buf);
9931 				if (done + size < count) {
9932 					if (buf != kbuf)
9933 						break;
9934 					/* This can accept WRITE_BUFSIZE - 2 ('\n' + '\0') */
9935 					pr_warn("Line length is too long: Should be less than %d\n",
9936 						WRITE_BUFSIZE - 2);
9937 					ret = -EINVAL;
9938 					goto out;
9939 				}
9940 			}
9941 			done += size;
9942 
9943 			/* Remove comments */
9944 			tmp = strchr(buf, '#');
9945 
9946 			if (tmp)
9947 				*tmp = '\0';
9948 
9949 			ret = createfn(buf);
9950 			if (ret)
9951 				goto out;
9952 			buf += size;
9953 
9954 		} while (done < count);
9955 	}
9956 	ret = done;
9957 
9958 out:
9959 	kfree(kbuf);
9960 
9961 	return ret;
9962 }
9963 
9964 __init static int tracer_alloc_buffers(void)
9965 {
9966 	int ring_buf_size;
9967 	int ret = -ENOMEM;
9968 
9969 
9970 	if (security_locked_down(LOCKDOWN_TRACEFS)) {
9971 		pr_warn("Tracing disabled due to lockdown\n");
9972 		return -EPERM;
9973 	}
9974 
9975 	/*
9976 	 * Make sure we don't accidentally add more trace options
9977 	 * than we have bits for.
9978 	 */
9979 	BUILD_BUG_ON(TRACE_ITER_LAST_BIT > TRACE_FLAGS_MAX_SIZE);
9980 
9981 	if (!alloc_cpumask_var(&tracing_buffer_mask, GFP_KERNEL))
9982 		goto out;
9983 
9984 	if (!alloc_cpumask_var(&global_trace.tracing_cpumask, GFP_KERNEL))
9985 		goto out_free_buffer_mask;
9986 
9987 	/* Only allocate trace_printk buffers if a trace_printk exists */
9988 	if (&__stop___trace_bprintk_fmt != &__start___trace_bprintk_fmt)
9989 		/* Must be called before global_trace.buffer is allocated */
9990 		trace_printk_init_buffers();
9991 
9992 	/* To save memory, keep the ring buffer size to its minimum */
9993 	if (ring_buffer_expanded)
9994 		ring_buf_size = trace_buf_size;
9995 	else
9996 		ring_buf_size = 1;
9997 
9998 	cpumask_copy(tracing_buffer_mask, cpu_possible_mask);
9999 	cpumask_copy(global_trace.tracing_cpumask, cpu_all_mask);
10000 
10001 	raw_spin_lock_init(&global_trace.start_lock);
10002 
10003 	/*
10004 	 * The prepare callbacks allocates some memory for the ring buffer. We
10005 	 * don't free the buffer if the CPU goes down. If we were to free
10006 	 * the buffer, then the user would lose any trace that was in the
10007 	 * buffer. The memory will be removed once the "instance" is removed.
10008 	 */
10009 	ret = cpuhp_setup_state_multi(CPUHP_TRACE_RB_PREPARE,
10010 				      "trace/RB:preapre", trace_rb_cpu_prepare,
10011 				      NULL);
10012 	if (ret < 0)
10013 		goto out_free_cpumask;
10014 	/* Used for event triggers */
10015 	ret = -ENOMEM;
10016 	temp_buffer = ring_buffer_alloc(PAGE_SIZE, RB_FL_OVERWRITE);
10017 	if (!temp_buffer)
10018 		goto out_rm_hp_state;
10019 
10020 	if (trace_create_savedcmd() < 0)
10021 		goto out_free_temp_buffer;
10022 
10023 	/* TODO: make the number of buffers hot pluggable with CPUS */
10024 	if (allocate_trace_buffers(&global_trace, ring_buf_size) < 0) {
10025 		MEM_FAIL(1, "tracer: failed to allocate ring buffer!\n");
10026 		goto out_free_savedcmd;
10027 	}
10028 
10029 	if (global_trace.buffer_disabled)
10030 		tracing_off();
10031 
10032 	if (trace_boot_clock) {
10033 		ret = tracing_set_clock(&global_trace, trace_boot_clock);
10034 		if (ret < 0)
10035 			pr_warn("Trace clock %s not defined, going back to default\n",
10036 				trace_boot_clock);
10037 	}
10038 
10039 	/*
10040 	 * register_tracer() might reference current_trace, so it
10041 	 * needs to be set before we register anything. This is
10042 	 * just a bootstrap of current_trace anyway.
10043 	 */
10044 	global_trace.current_trace = &nop_trace;
10045 
10046 	global_trace.max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
10047 
10048 	ftrace_init_global_array_ops(&global_trace);
10049 
10050 	init_trace_flags_index(&global_trace);
10051 
10052 	register_tracer(&nop_trace);
10053 
10054 	/* Function tracing may start here (via kernel command line) */
10055 	init_function_trace();
10056 
10057 	/* All seems OK, enable tracing */
10058 	tracing_disabled = 0;
10059 
10060 	atomic_notifier_chain_register(&panic_notifier_list,
10061 				       &trace_panic_notifier);
10062 
10063 	register_die_notifier(&trace_die_notifier);
10064 
10065 	global_trace.flags = TRACE_ARRAY_FL_GLOBAL;
10066 
10067 	INIT_LIST_HEAD(&global_trace.systems);
10068 	INIT_LIST_HEAD(&global_trace.events);
10069 	INIT_LIST_HEAD(&global_trace.hist_vars);
10070 	INIT_LIST_HEAD(&global_trace.err_log);
10071 	list_add(&global_trace.list, &ftrace_trace_arrays);
10072 
10073 	apply_trace_boot_options();
10074 
10075 	register_snapshot_cmd();
10076 
10077 	test_can_verify();
10078 
10079 	return 0;
10080 
10081 out_free_savedcmd:
10082 	free_saved_cmdlines_buffer(savedcmd);
10083 out_free_temp_buffer:
10084 	ring_buffer_free(temp_buffer);
10085 out_rm_hp_state:
10086 	cpuhp_remove_multi_state(CPUHP_TRACE_RB_PREPARE);
10087 out_free_cpumask:
10088 	free_cpumask_var(global_trace.tracing_cpumask);
10089 out_free_buffer_mask:
10090 	free_cpumask_var(tracing_buffer_mask);
10091 out:
10092 	return ret;
10093 }
10094 
10095 void __init early_trace_init(void)
10096 {
10097 	if (tracepoint_printk) {
10098 		tracepoint_print_iter =
10099 			kzalloc(sizeof(*tracepoint_print_iter), GFP_KERNEL);
10100 		if (MEM_FAIL(!tracepoint_print_iter,
10101 			     "Failed to allocate trace iterator\n"))
10102 			tracepoint_printk = 0;
10103 		else
10104 			static_key_enable(&tracepoint_printk_key.key);
10105 	}
10106 	tracer_alloc_buffers();
10107 }
10108 
10109 void __init trace_init(void)
10110 {
10111 	trace_event_init();
10112 }
10113 
10114 __init static void clear_boot_tracer(void)
10115 {
10116 	/*
10117 	 * The default tracer at boot buffer is an init section.
10118 	 * This function is called in lateinit. If we did not
10119 	 * find the boot tracer, then clear it out, to prevent
10120 	 * later registration from accessing the buffer that is
10121 	 * about to be freed.
10122 	 */
10123 	if (!default_bootup_tracer)
10124 		return;
10125 
10126 	printk(KERN_INFO "ftrace bootup tracer '%s' not registered.\n",
10127 	       default_bootup_tracer);
10128 	default_bootup_tracer = NULL;
10129 }
10130 
10131 #ifdef CONFIG_HAVE_UNSTABLE_SCHED_CLOCK
10132 __init static void tracing_set_default_clock(void)
10133 {
10134 	/* sched_clock_stable() is determined in late_initcall */
10135 	if (!trace_boot_clock && !sched_clock_stable()) {
10136 		if (security_locked_down(LOCKDOWN_TRACEFS)) {
10137 			pr_warn("Can not set tracing clock due to lockdown\n");
10138 			return;
10139 		}
10140 
10141 		printk(KERN_WARNING
10142 		       "Unstable clock detected, switching default tracing clock to \"global\"\n"
10143 		       "If you want to keep using the local clock, then add:\n"
10144 		       "  \"trace_clock=local\"\n"
10145 		       "on the kernel command line\n");
10146 		tracing_set_clock(&global_trace, "global");
10147 	}
10148 }
10149 #else
10150 static inline void tracing_set_default_clock(void) { }
10151 #endif
10152 
10153 __init static int late_trace_init(void)
10154 {
10155 	if (tracepoint_printk && tracepoint_printk_stop_on_boot) {
10156 		static_key_disable(&tracepoint_printk_key.key);
10157 		tracepoint_printk = 0;
10158 	}
10159 
10160 	tracing_set_default_clock();
10161 	clear_boot_tracer();
10162 	return 0;
10163 }
10164 
10165 late_initcall_sync(late_trace_init);
10166