xref: /linux/kernel/trace/trace.c (revision a7a08b275a8bbade798c4bdaad07ade68fe7003c)
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * ring buffer based function tracer
4  *
5  * Copyright (C) 2007-2012 Steven Rostedt <srostedt@redhat.com>
6  * Copyright (C) 2008 Ingo Molnar <mingo@redhat.com>
7  *
8  * Originally taken from the RT patch by:
9  *    Arnaldo Carvalho de Melo <acme@redhat.com>
10  *
11  * Based on code from the latency_tracer, that is:
12  *  Copyright (C) 2004-2006 Ingo Molnar
13  *  Copyright (C) 2004 Nadia Yvette Chambers
14  */
15 #include <linux/ring_buffer.h>
16 #include <generated/utsrelease.h>
17 #include <linux/stacktrace.h>
18 #include <linux/writeback.h>
19 #include <linux/kallsyms.h>
20 #include <linux/security.h>
21 #include <linux/seq_file.h>
22 #include <linux/notifier.h>
23 #include <linux/irqflags.h>
24 #include <linux/debugfs.h>
25 #include <linux/tracefs.h>
26 #include <linux/pagemap.h>
27 #include <linux/hardirq.h>
28 #include <linux/linkage.h>
29 #include <linux/uaccess.h>
30 #include <linux/vmalloc.h>
31 #include <linux/ftrace.h>
32 #include <linux/module.h>
33 #include <linux/percpu.h>
34 #include <linux/splice.h>
35 #include <linux/kdebug.h>
36 #include <linux/string.h>
37 #include <linux/mount.h>
38 #include <linux/rwsem.h>
39 #include <linux/slab.h>
40 #include <linux/ctype.h>
41 #include <linux/init.h>
42 #include <linux/panic_notifier.h>
43 #include <linux/poll.h>
44 #include <linux/nmi.h>
45 #include <linux/fs.h>
46 #include <linux/trace.h>
47 #include <linux/sched/clock.h>
48 #include <linux/sched/rt.h>
49 #include <linux/fsnotify.h>
50 #include <linux/irq_work.h>
51 #include <linux/workqueue.h>
52 
53 #include "trace.h"
54 #include "trace_output.h"
55 
56 /*
57  * On boot up, the ring buffer is set to the minimum size, so that
58  * we do not waste memory on systems that are not using tracing.
59  */
60 bool ring_buffer_expanded;
61 
62 /*
63  * We need to change this state when a selftest is running.
64  * A selftest will lurk into the ring-buffer to count the
65  * entries inserted during the selftest although some concurrent
66  * insertions into the ring-buffer such as trace_printk could occurred
67  * at the same time, giving false positive or negative results.
68  */
69 static bool __read_mostly tracing_selftest_running;
70 
71 /*
72  * If boot-time tracing including tracers/events via kernel cmdline
73  * is running, we do not want to run SELFTEST.
74  */
75 bool __read_mostly tracing_selftest_disabled;
76 
77 #ifdef CONFIG_FTRACE_STARTUP_TEST
78 void __init disable_tracing_selftest(const char *reason)
79 {
80 	if (!tracing_selftest_disabled) {
81 		tracing_selftest_disabled = true;
82 		pr_info("Ftrace startup test is disabled due to %s\n", reason);
83 	}
84 }
85 #endif
86 
87 /* Pipe tracepoints to printk */
88 struct trace_iterator *tracepoint_print_iter;
89 int tracepoint_printk;
90 static bool tracepoint_printk_stop_on_boot __initdata;
91 static DEFINE_STATIC_KEY_FALSE(tracepoint_printk_key);
92 
93 /* For tracers that don't implement custom flags */
94 static struct tracer_opt dummy_tracer_opt[] = {
95 	{ }
96 };
97 
98 static int
99 dummy_set_flag(struct trace_array *tr, u32 old_flags, u32 bit, int set)
100 {
101 	return 0;
102 }
103 
104 /*
105  * To prevent the comm cache from being overwritten when no
106  * tracing is active, only save the comm when a trace event
107  * occurred.
108  */
109 static DEFINE_PER_CPU(bool, trace_taskinfo_save);
110 
111 /*
112  * Kill all tracing for good (never come back).
113  * It is initialized to 1 but will turn to zero if the initialization
114  * of the tracer is successful. But that is the only place that sets
115  * this back to zero.
116  */
117 static int tracing_disabled = 1;
118 
119 cpumask_var_t __read_mostly	tracing_buffer_mask;
120 
121 /*
122  * ftrace_dump_on_oops - variable to dump ftrace buffer on oops
123  *
124  * If there is an oops (or kernel panic) and the ftrace_dump_on_oops
125  * is set, then ftrace_dump is called. This will output the contents
126  * of the ftrace buffers to the console.  This is very useful for
127  * capturing traces that lead to crashes and outputing it to a
128  * serial console.
129  *
130  * It is default off, but you can enable it with either specifying
131  * "ftrace_dump_on_oops" in the kernel command line, or setting
132  * /proc/sys/kernel/ftrace_dump_on_oops
133  * Set 1 if you want to dump buffers of all CPUs
134  * Set 2 if you want to dump the buffer of the CPU that triggered oops
135  */
136 
137 enum ftrace_dump_mode ftrace_dump_on_oops;
138 
139 /* When set, tracing will stop when a WARN*() is hit */
140 int __disable_trace_on_warning;
141 
142 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
143 /* Map of enums to their values, for "eval_map" file */
144 struct trace_eval_map_head {
145 	struct module			*mod;
146 	unsigned long			length;
147 };
148 
149 union trace_eval_map_item;
150 
151 struct trace_eval_map_tail {
152 	/*
153 	 * "end" is first and points to NULL as it must be different
154 	 * than "mod" or "eval_string"
155 	 */
156 	union trace_eval_map_item	*next;
157 	const char			*end;	/* points to NULL */
158 };
159 
160 static DEFINE_MUTEX(trace_eval_mutex);
161 
162 /*
163  * The trace_eval_maps are saved in an array with two extra elements,
164  * one at the beginning, and one at the end. The beginning item contains
165  * the count of the saved maps (head.length), and the module they
166  * belong to if not built in (head.mod). The ending item contains a
167  * pointer to the next array of saved eval_map items.
168  */
169 union trace_eval_map_item {
170 	struct trace_eval_map		map;
171 	struct trace_eval_map_head	head;
172 	struct trace_eval_map_tail	tail;
173 };
174 
175 static union trace_eval_map_item *trace_eval_maps;
176 #endif /* CONFIG_TRACE_EVAL_MAP_FILE */
177 
178 int tracing_set_tracer(struct trace_array *tr, const char *buf);
179 static void ftrace_trace_userstack(struct trace_array *tr,
180 				   struct trace_buffer *buffer,
181 				   unsigned int trace_ctx);
182 
183 #define MAX_TRACER_SIZE		100
184 static char bootup_tracer_buf[MAX_TRACER_SIZE] __initdata;
185 static char *default_bootup_tracer;
186 
187 static bool allocate_snapshot;
188 
189 static int __init set_cmdline_ftrace(char *str)
190 {
191 	strlcpy(bootup_tracer_buf, str, MAX_TRACER_SIZE);
192 	default_bootup_tracer = bootup_tracer_buf;
193 	/* We are using ftrace early, expand it */
194 	ring_buffer_expanded = true;
195 	return 1;
196 }
197 __setup("ftrace=", set_cmdline_ftrace);
198 
199 static int __init set_ftrace_dump_on_oops(char *str)
200 {
201 	if (*str++ != '=' || !*str || !strcmp("1", str)) {
202 		ftrace_dump_on_oops = DUMP_ALL;
203 		return 1;
204 	}
205 
206 	if (!strcmp("orig_cpu", str) || !strcmp("2", str)) {
207 		ftrace_dump_on_oops = DUMP_ORIG;
208                 return 1;
209         }
210 
211         return 0;
212 }
213 __setup("ftrace_dump_on_oops", set_ftrace_dump_on_oops);
214 
215 static int __init stop_trace_on_warning(char *str)
216 {
217 	if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
218 		__disable_trace_on_warning = 1;
219 	return 1;
220 }
221 __setup("traceoff_on_warning", stop_trace_on_warning);
222 
223 static int __init boot_alloc_snapshot(char *str)
224 {
225 	allocate_snapshot = true;
226 	/* We also need the main ring buffer expanded */
227 	ring_buffer_expanded = true;
228 	return 1;
229 }
230 __setup("alloc_snapshot", boot_alloc_snapshot);
231 
232 
233 static char trace_boot_options_buf[MAX_TRACER_SIZE] __initdata;
234 
235 static int __init set_trace_boot_options(char *str)
236 {
237 	strlcpy(trace_boot_options_buf, str, MAX_TRACER_SIZE);
238 	return 0;
239 }
240 __setup("trace_options=", set_trace_boot_options);
241 
242 static char trace_boot_clock_buf[MAX_TRACER_SIZE] __initdata;
243 static char *trace_boot_clock __initdata;
244 
245 static int __init set_trace_boot_clock(char *str)
246 {
247 	strlcpy(trace_boot_clock_buf, str, MAX_TRACER_SIZE);
248 	trace_boot_clock = trace_boot_clock_buf;
249 	return 0;
250 }
251 __setup("trace_clock=", set_trace_boot_clock);
252 
253 static int __init set_tracepoint_printk(char *str)
254 {
255 	if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
256 		tracepoint_printk = 1;
257 	return 1;
258 }
259 __setup("tp_printk", set_tracepoint_printk);
260 
261 static int __init set_tracepoint_printk_stop(char *str)
262 {
263 	tracepoint_printk_stop_on_boot = true;
264 	return 1;
265 }
266 __setup("tp_printk_stop_on_boot", set_tracepoint_printk_stop);
267 
268 unsigned long long ns2usecs(u64 nsec)
269 {
270 	nsec += 500;
271 	do_div(nsec, 1000);
272 	return nsec;
273 }
274 
275 static void
276 trace_process_export(struct trace_export *export,
277 	       struct ring_buffer_event *event, int flag)
278 {
279 	struct trace_entry *entry;
280 	unsigned int size = 0;
281 
282 	if (export->flags & flag) {
283 		entry = ring_buffer_event_data(event);
284 		size = ring_buffer_event_length(event);
285 		export->write(export, entry, size);
286 	}
287 }
288 
289 static DEFINE_MUTEX(ftrace_export_lock);
290 
291 static struct trace_export __rcu *ftrace_exports_list __read_mostly;
292 
293 static DEFINE_STATIC_KEY_FALSE(trace_function_exports_enabled);
294 static DEFINE_STATIC_KEY_FALSE(trace_event_exports_enabled);
295 static DEFINE_STATIC_KEY_FALSE(trace_marker_exports_enabled);
296 
297 static inline void ftrace_exports_enable(struct trace_export *export)
298 {
299 	if (export->flags & TRACE_EXPORT_FUNCTION)
300 		static_branch_inc(&trace_function_exports_enabled);
301 
302 	if (export->flags & TRACE_EXPORT_EVENT)
303 		static_branch_inc(&trace_event_exports_enabled);
304 
305 	if (export->flags & TRACE_EXPORT_MARKER)
306 		static_branch_inc(&trace_marker_exports_enabled);
307 }
308 
309 static inline void ftrace_exports_disable(struct trace_export *export)
310 {
311 	if (export->flags & TRACE_EXPORT_FUNCTION)
312 		static_branch_dec(&trace_function_exports_enabled);
313 
314 	if (export->flags & TRACE_EXPORT_EVENT)
315 		static_branch_dec(&trace_event_exports_enabled);
316 
317 	if (export->flags & TRACE_EXPORT_MARKER)
318 		static_branch_dec(&trace_marker_exports_enabled);
319 }
320 
321 static void ftrace_exports(struct ring_buffer_event *event, int flag)
322 {
323 	struct trace_export *export;
324 
325 	preempt_disable_notrace();
326 
327 	export = rcu_dereference_raw_check(ftrace_exports_list);
328 	while (export) {
329 		trace_process_export(export, event, flag);
330 		export = rcu_dereference_raw_check(export->next);
331 	}
332 
333 	preempt_enable_notrace();
334 }
335 
336 static inline void
337 add_trace_export(struct trace_export **list, struct trace_export *export)
338 {
339 	rcu_assign_pointer(export->next, *list);
340 	/*
341 	 * We are entering export into the list but another
342 	 * CPU might be walking that list. We need to make sure
343 	 * the export->next pointer is valid before another CPU sees
344 	 * the export pointer included into the list.
345 	 */
346 	rcu_assign_pointer(*list, export);
347 }
348 
349 static inline int
350 rm_trace_export(struct trace_export **list, struct trace_export *export)
351 {
352 	struct trace_export **p;
353 
354 	for (p = list; *p != NULL; p = &(*p)->next)
355 		if (*p == export)
356 			break;
357 
358 	if (*p != export)
359 		return -1;
360 
361 	rcu_assign_pointer(*p, (*p)->next);
362 
363 	return 0;
364 }
365 
366 static inline void
367 add_ftrace_export(struct trace_export **list, struct trace_export *export)
368 {
369 	ftrace_exports_enable(export);
370 
371 	add_trace_export(list, export);
372 }
373 
374 static inline int
375 rm_ftrace_export(struct trace_export **list, struct trace_export *export)
376 {
377 	int ret;
378 
379 	ret = rm_trace_export(list, export);
380 	ftrace_exports_disable(export);
381 
382 	return ret;
383 }
384 
385 int register_ftrace_export(struct trace_export *export)
386 {
387 	if (WARN_ON_ONCE(!export->write))
388 		return -1;
389 
390 	mutex_lock(&ftrace_export_lock);
391 
392 	add_ftrace_export(&ftrace_exports_list, export);
393 
394 	mutex_unlock(&ftrace_export_lock);
395 
396 	return 0;
397 }
398 EXPORT_SYMBOL_GPL(register_ftrace_export);
399 
400 int unregister_ftrace_export(struct trace_export *export)
401 {
402 	int ret;
403 
404 	mutex_lock(&ftrace_export_lock);
405 
406 	ret = rm_ftrace_export(&ftrace_exports_list, export);
407 
408 	mutex_unlock(&ftrace_export_lock);
409 
410 	return ret;
411 }
412 EXPORT_SYMBOL_GPL(unregister_ftrace_export);
413 
414 /* trace_flags holds trace_options default values */
415 #define TRACE_DEFAULT_FLAGS						\
416 	(FUNCTION_DEFAULT_FLAGS |					\
417 	 TRACE_ITER_PRINT_PARENT | TRACE_ITER_PRINTK |			\
418 	 TRACE_ITER_ANNOTATE | TRACE_ITER_CONTEXT_INFO |		\
419 	 TRACE_ITER_RECORD_CMD | TRACE_ITER_OVERWRITE |			\
420 	 TRACE_ITER_IRQ_INFO | TRACE_ITER_MARKERS |			\
421 	 TRACE_ITER_HASH_PTR)
422 
423 /* trace_options that are only supported by global_trace */
424 #define TOP_LEVEL_TRACE_FLAGS (TRACE_ITER_PRINTK |			\
425 	       TRACE_ITER_PRINTK_MSGONLY | TRACE_ITER_RECORD_CMD)
426 
427 /* trace_flags that are default zero for instances */
428 #define ZEROED_TRACE_FLAGS \
429 	(TRACE_ITER_EVENT_FORK | TRACE_ITER_FUNC_FORK)
430 
431 /*
432  * The global_trace is the descriptor that holds the top-level tracing
433  * buffers for the live tracing.
434  */
435 static struct trace_array global_trace = {
436 	.trace_flags = TRACE_DEFAULT_FLAGS,
437 };
438 
439 LIST_HEAD(ftrace_trace_arrays);
440 
441 int trace_array_get(struct trace_array *this_tr)
442 {
443 	struct trace_array *tr;
444 	int ret = -ENODEV;
445 
446 	mutex_lock(&trace_types_lock);
447 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
448 		if (tr == this_tr) {
449 			tr->ref++;
450 			ret = 0;
451 			break;
452 		}
453 	}
454 	mutex_unlock(&trace_types_lock);
455 
456 	return ret;
457 }
458 
459 static void __trace_array_put(struct trace_array *this_tr)
460 {
461 	WARN_ON(!this_tr->ref);
462 	this_tr->ref--;
463 }
464 
465 /**
466  * trace_array_put - Decrement the reference counter for this trace array.
467  * @this_tr : pointer to the trace array
468  *
469  * NOTE: Use this when we no longer need the trace array returned by
470  * trace_array_get_by_name(). This ensures the trace array can be later
471  * destroyed.
472  *
473  */
474 void trace_array_put(struct trace_array *this_tr)
475 {
476 	if (!this_tr)
477 		return;
478 
479 	mutex_lock(&trace_types_lock);
480 	__trace_array_put(this_tr);
481 	mutex_unlock(&trace_types_lock);
482 }
483 EXPORT_SYMBOL_GPL(trace_array_put);
484 
485 int tracing_check_open_get_tr(struct trace_array *tr)
486 {
487 	int ret;
488 
489 	ret = security_locked_down(LOCKDOWN_TRACEFS);
490 	if (ret)
491 		return ret;
492 
493 	if (tracing_disabled)
494 		return -ENODEV;
495 
496 	if (tr && trace_array_get(tr) < 0)
497 		return -ENODEV;
498 
499 	return 0;
500 }
501 
502 int call_filter_check_discard(struct trace_event_call *call, void *rec,
503 			      struct trace_buffer *buffer,
504 			      struct ring_buffer_event *event)
505 {
506 	if (unlikely(call->flags & TRACE_EVENT_FL_FILTERED) &&
507 	    !filter_match_preds(call->filter, rec)) {
508 		__trace_event_discard_commit(buffer, event);
509 		return 1;
510 	}
511 
512 	return 0;
513 }
514 
515 void trace_free_pid_list(struct trace_pid_list *pid_list)
516 {
517 	vfree(pid_list->pids);
518 	kfree(pid_list);
519 }
520 
521 /**
522  * trace_find_filtered_pid - check if a pid exists in a filtered_pid list
523  * @filtered_pids: The list of pids to check
524  * @search_pid: The PID to find in @filtered_pids
525  *
526  * Returns true if @search_pid is found in @filtered_pids, and false otherwise.
527  */
528 bool
529 trace_find_filtered_pid(struct trace_pid_list *filtered_pids, pid_t search_pid)
530 {
531 	/*
532 	 * If pid_max changed after filtered_pids was created, we
533 	 * by default ignore all pids greater than the previous pid_max.
534 	 */
535 	if (search_pid >= filtered_pids->pid_max)
536 		return false;
537 
538 	return test_bit(search_pid, filtered_pids->pids);
539 }
540 
541 /**
542  * trace_ignore_this_task - should a task be ignored for tracing
543  * @filtered_pids: The list of pids to check
544  * @filtered_no_pids: The list of pids not to be traced
545  * @task: The task that should be ignored if not filtered
546  *
547  * Checks if @task should be traced or not from @filtered_pids.
548  * Returns true if @task should *NOT* be traced.
549  * Returns false if @task should be traced.
550  */
551 bool
552 trace_ignore_this_task(struct trace_pid_list *filtered_pids,
553 		       struct trace_pid_list *filtered_no_pids,
554 		       struct task_struct *task)
555 {
556 	/*
557 	 * If filtered_no_pids is not empty, and the task's pid is listed
558 	 * in filtered_no_pids, then return true.
559 	 * Otherwise, if filtered_pids is empty, that means we can
560 	 * trace all tasks. If it has content, then only trace pids
561 	 * within filtered_pids.
562 	 */
563 
564 	return (filtered_pids &&
565 		!trace_find_filtered_pid(filtered_pids, task->pid)) ||
566 		(filtered_no_pids &&
567 		 trace_find_filtered_pid(filtered_no_pids, task->pid));
568 }
569 
570 /**
571  * trace_filter_add_remove_task - Add or remove a task from a pid_list
572  * @pid_list: The list to modify
573  * @self: The current task for fork or NULL for exit
574  * @task: The task to add or remove
575  *
576  * If adding a task, if @self is defined, the task is only added if @self
577  * is also included in @pid_list. This happens on fork and tasks should
578  * only be added when the parent is listed. If @self is NULL, then the
579  * @task pid will be removed from the list, which would happen on exit
580  * of a task.
581  */
582 void trace_filter_add_remove_task(struct trace_pid_list *pid_list,
583 				  struct task_struct *self,
584 				  struct task_struct *task)
585 {
586 	if (!pid_list)
587 		return;
588 
589 	/* For forks, we only add if the forking task is listed */
590 	if (self) {
591 		if (!trace_find_filtered_pid(pid_list, self->pid))
592 			return;
593 	}
594 
595 	/* Sorry, but we don't support pid_max changing after setting */
596 	if (task->pid >= pid_list->pid_max)
597 		return;
598 
599 	/* "self" is set for forks, and NULL for exits */
600 	if (self)
601 		set_bit(task->pid, pid_list->pids);
602 	else
603 		clear_bit(task->pid, pid_list->pids);
604 }
605 
606 /**
607  * trace_pid_next - Used for seq_file to get to the next pid of a pid_list
608  * @pid_list: The pid list to show
609  * @v: The last pid that was shown (+1 the actual pid to let zero be displayed)
610  * @pos: The position of the file
611  *
612  * This is used by the seq_file "next" operation to iterate the pids
613  * listed in a trace_pid_list structure.
614  *
615  * Returns the pid+1 as we want to display pid of zero, but NULL would
616  * stop the iteration.
617  */
618 void *trace_pid_next(struct trace_pid_list *pid_list, void *v, loff_t *pos)
619 {
620 	unsigned long pid = (unsigned long)v;
621 
622 	(*pos)++;
623 
624 	/* pid already is +1 of the actual previous bit */
625 	pid = find_next_bit(pid_list->pids, pid_list->pid_max, pid);
626 
627 	/* Return pid + 1 to allow zero to be represented */
628 	if (pid < pid_list->pid_max)
629 		return (void *)(pid + 1);
630 
631 	return NULL;
632 }
633 
634 /**
635  * trace_pid_start - Used for seq_file to start reading pid lists
636  * @pid_list: The pid list to show
637  * @pos: The position of the file
638  *
639  * This is used by seq_file "start" operation to start the iteration
640  * of listing pids.
641  *
642  * Returns the pid+1 as we want to display pid of zero, but NULL would
643  * stop the iteration.
644  */
645 void *trace_pid_start(struct trace_pid_list *pid_list, loff_t *pos)
646 {
647 	unsigned long pid;
648 	loff_t l = 0;
649 
650 	pid = find_first_bit(pid_list->pids, pid_list->pid_max);
651 	if (pid >= pid_list->pid_max)
652 		return NULL;
653 
654 	/* Return pid + 1 so that zero can be the exit value */
655 	for (pid++; pid && l < *pos;
656 	     pid = (unsigned long)trace_pid_next(pid_list, (void *)pid, &l))
657 		;
658 	return (void *)pid;
659 }
660 
661 /**
662  * trace_pid_show - show the current pid in seq_file processing
663  * @m: The seq_file structure to write into
664  * @v: A void pointer of the pid (+1) value to display
665  *
666  * Can be directly used by seq_file operations to display the current
667  * pid value.
668  */
669 int trace_pid_show(struct seq_file *m, void *v)
670 {
671 	unsigned long pid = (unsigned long)v - 1;
672 
673 	seq_printf(m, "%lu\n", pid);
674 	return 0;
675 }
676 
677 /* 128 should be much more than enough */
678 #define PID_BUF_SIZE		127
679 
680 int trace_pid_write(struct trace_pid_list *filtered_pids,
681 		    struct trace_pid_list **new_pid_list,
682 		    const char __user *ubuf, size_t cnt)
683 {
684 	struct trace_pid_list *pid_list;
685 	struct trace_parser parser;
686 	unsigned long val;
687 	int nr_pids = 0;
688 	ssize_t read = 0;
689 	ssize_t ret = 0;
690 	loff_t pos;
691 	pid_t pid;
692 
693 	if (trace_parser_get_init(&parser, PID_BUF_SIZE + 1))
694 		return -ENOMEM;
695 
696 	/*
697 	 * Always recreate a new array. The write is an all or nothing
698 	 * operation. Always create a new array when adding new pids by
699 	 * the user. If the operation fails, then the current list is
700 	 * not modified.
701 	 */
702 	pid_list = kmalloc(sizeof(*pid_list), GFP_KERNEL);
703 	if (!pid_list) {
704 		trace_parser_put(&parser);
705 		return -ENOMEM;
706 	}
707 
708 	pid_list->pid_max = READ_ONCE(pid_max);
709 
710 	/* Only truncating will shrink pid_max */
711 	if (filtered_pids && filtered_pids->pid_max > pid_list->pid_max)
712 		pid_list->pid_max = filtered_pids->pid_max;
713 
714 	pid_list->pids = vzalloc((pid_list->pid_max + 7) >> 3);
715 	if (!pid_list->pids) {
716 		trace_parser_put(&parser);
717 		kfree(pid_list);
718 		return -ENOMEM;
719 	}
720 
721 	if (filtered_pids) {
722 		/* copy the current bits to the new max */
723 		for_each_set_bit(pid, filtered_pids->pids,
724 				 filtered_pids->pid_max) {
725 			set_bit(pid, pid_list->pids);
726 			nr_pids++;
727 		}
728 	}
729 
730 	while (cnt > 0) {
731 
732 		pos = 0;
733 
734 		ret = trace_get_user(&parser, ubuf, cnt, &pos);
735 		if (ret < 0 || !trace_parser_loaded(&parser))
736 			break;
737 
738 		read += ret;
739 		ubuf += ret;
740 		cnt -= ret;
741 
742 		ret = -EINVAL;
743 		if (kstrtoul(parser.buffer, 0, &val))
744 			break;
745 		if (val >= pid_list->pid_max)
746 			break;
747 
748 		pid = (pid_t)val;
749 
750 		set_bit(pid, pid_list->pids);
751 		nr_pids++;
752 
753 		trace_parser_clear(&parser);
754 		ret = 0;
755 	}
756 	trace_parser_put(&parser);
757 
758 	if (ret < 0) {
759 		trace_free_pid_list(pid_list);
760 		return ret;
761 	}
762 
763 	if (!nr_pids) {
764 		/* Cleared the list of pids */
765 		trace_free_pid_list(pid_list);
766 		read = ret;
767 		pid_list = NULL;
768 	}
769 
770 	*new_pid_list = pid_list;
771 
772 	return read;
773 }
774 
775 static u64 buffer_ftrace_now(struct array_buffer *buf, int cpu)
776 {
777 	u64 ts;
778 
779 	/* Early boot up does not have a buffer yet */
780 	if (!buf->buffer)
781 		return trace_clock_local();
782 
783 	ts = ring_buffer_time_stamp(buf->buffer);
784 	ring_buffer_normalize_time_stamp(buf->buffer, cpu, &ts);
785 
786 	return ts;
787 }
788 
789 u64 ftrace_now(int cpu)
790 {
791 	return buffer_ftrace_now(&global_trace.array_buffer, cpu);
792 }
793 
794 /**
795  * tracing_is_enabled - Show if global_trace has been enabled
796  *
797  * Shows if the global trace has been enabled or not. It uses the
798  * mirror flag "buffer_disabled" to be used in fast paths such as for
799  * the irqsoff tracer. But it may be inaccurate due to races. If you
800  * need to know the accurate state, use tracing_is_on() which is a little
801  * slower, but accurate.
802  */
803 int tracing_is_enabled(void)
804 {
805 	/*
806 	 * For quick access (irqsoff uses this in fast path), just
807 	 * return the mirror variable of the state of the ring buffer.
808 	 * It's a little racy, but we don't really care.
809 	 */
810 	smp_rmb();
811 	return !global_trace.buffer_disabled;
812 }
813 
814 /*
815  * trace_buf_size is the size in bytes that is allocated
816  * for a buffer. Note, the number of bytes is always rounded
817  * to page size.
818  *
819  * This number is purposely set to a low number of 16384.
820  * If the dump on oops happens, it will be much appreciated
821  * to not have to wait for all that output. Anyway this can be
822  * boot time and run time configurable.
823  */
824 #define TRACE_BUF_SIZE_DEFAULT	1441792UL /* 16384 * 88 (sizeof(entry)) */
825 
826 static unsigned long		trace_buf_size = TRACE_BUF_SIZE_DEFAULT;
827 
828 /* trace_types holds a link list of available tracers. */
829 static struct tracer		*trace_types __read_mostly;
830 
831 /*
832  * trace_types_lock is used to protect the trace_types list.
833  */
834 DEFINE_MUTEX(trace_types_lock);
835 
836 /*
837  * serialize the access of the ring buffer
838  *
839  * ring buffer serializes readers, but it is low level protection.
840  * The validity of the events (which returns by ring_buffer_peek() ..etc)
841  * are not protected by ring buffer.
842  *
843  * The content of events may become garbage if we allow other process consumes
844  * these events concurrently:
845  *   A) the page of the consumed events may become a normal page
846  *      (not reader page) in ring buffer, and this page will be rewritten
847  *      by events producer.
848  *   B) The page of the consumed events may become a page for splice_read,
849  *      and this page will be returned to system.
850  *
851  * These primitives allow multi process access to different cpu ring buffer
852  * concurrently.
853  *
854  * These primitives don't distinguish read-only and read-consume access.
855  * Multi read-only access are also serialized.
856  */
857 
858 #ifdef CONFIG_SMP
859 static DECLARE_RWSEM(all_cpu_access_lock);
860 static DEFINE_PER_CPU(struct mutex, cpu_access_lock);
861 
862 static inline void trace_access_lock(int cpu)
863 {
864 	if (cpu == RING_BUFFER_ALL_CPUS) {
865 		/* gain it for accessing the whole ring buffer. */
866 		down_write(&all_cpu_access_lock);
867 	} else {
868 		/* gain it for accessing a cpu ring buffer. */
869 
870 		/* Firstly block other trace_access_lock(RING_BUFFER_ALL_CPUS). */
871 		down_read(&all_cpu_access_lock);
872 
873 		/* Secondly block other access to this @cpu ring buffer. */
874 		mutex_lock(&per_cpu(cpu_access_lock, cpu));
875 	}
876 }
877 
878 static inline void trace_access_unlock(int cpu)
879 {
880 	if (cpu == RING_BUFFER_ALL_CPUS) {
881 		up_write(&all_cpu_access_lock);
882 	} else {
883 		mutex_unlock(&per_cpu(cpu_access_lock, cpu));
884 		up_read(&all_cpu_access_lock);
885 	}
886 }
887 
888 static inline void trace_access_lock_init(void)
889 {
890 	int cpu;
891 
892 	for_each_possible_cpu(cpu)
893 		mutex_init(&per_cpu(cpu_access_lock, cpu));
894 }
895 
896 #else
897 
898 static DEFINE_MUTEX(access_lock);
899 
900 static inline void trace_access_lock(int cpu)
901 {
902 	(void)cpu;
903 	mutex_lock(&access_lock);
904 }
905 
906 static inline void trace_access_unlock(int cpu)
907 {
908 	(void)cpu;
909 	mutex_unlock(&access_lock);
910 }
911 
912 static inline void trace_access_lock_init(void)
913 {
914 }
915 
916 #endif
917 
918 #ifdef CONFIG_STACKTRACE
919 static void __ftrace_trace_stack(struct trace_buffer *buffer,
920 				 unsigned int trace_ctx,
921 				 int skip, struct pt_regs *regs);
922 static inline void ftrace_trace_stack(struct trace_array *tr,
923 				      struct trace_buffer *buffer,
924 				      unsigned int trace_ctx,
925 				      int skip, struct pt_regs *regs);
926 
927 #else
928 static inline void __ftrace_trace_stack(struct trace_buffer *buffer,
929 					unsigned int trace_ctx,
930 					int skip, struct pt_regs *regs)
931 {
932 }
933 static inline void ftrace_trace_stack(struct trace_array *tr,
934 				      struct trace_buffer *buffer,
935 				      unsigned long trace_ctx,
936 				      int skip, struct pt_regs *regs)
937 {
938 }
939 
940 #endif
941 
942 static __always_inline void
943 trace_event_setup(struct ring_buffer_event *event,
944 		  int type, unsigned int trace_ctx)
945 {
946 	struct trace_entry *ent = ring_buffer_event_data(event);
947 
948 	tracing_generic_entry_update(ent, type, trace_ctx);
949 }
950 
951 static __always_inline struct ring_buffer_event *
952 __trace_buffer_lock_reserve(struct trace_buffer *buffer,
953 			  int type,
954 			  unsigned long len,
955 			  unsigned int trace_ctx)
956 {
957 	struct ring_buffer_event *event;
958 
959 	event = ring_buffer_lock_reserve(buffer, len);
960 	if (event != NULL)
961 		trace_event_setup(event, type, trace_ctx);
962 
963 	return event;
964 }
965 
966 void tracer_tracing_on(struct trace_array *tr)
967 {
968 	if (tr->array_buffer.buffer)
969 		ring_buffer_record_on(tr->array_buffer.buffer);
970 	/*
971 	 * This flag is looked at when buffers haven't been allocated
972 	 * yet, or by some tracers (like irqsoff), that just want to
973 	 * know if the ring buffer has been disabled, but it can handle
974 	 * races of where it gets disabled but we still do a record.
975 	 * As the check is in the fast path of the tracers, it is more
976 	 * important to be fast than accurate.
977 	 */
978 	tr->buffer_disabled = 0;
979 	/* Make the flag seen by readers */
980 	smp_wmb();
981 }
982 
983 /**
984  * tracing_on - enable tracing buffers
985  *
986  * This function enables tracing buffers that may have been
987  * disabled with tracing_off.
988  */
989 void tracing_on(void)
990 {
991 	tracer_tracing_on(&global_trace);
992 }
993 EXPORT_SYMBOL_GPL(tracing_on);
994 
995 
996 static __always_inline void
997 __buffer_unlock_commit(struct trace_buffer *buffer, struct ring_buffer_event *event)
998 {
999 	__this_cpu_write(trace_taskinfo_save, true);
1000 
1001 	/* If this is the temp buffer, we need to commit fully */
1002 	if (this_cpu_read(trace_buffered_event) == event) {
1003 		/* Length is in event->array[0] */
1004 		ring_buffer_write(buffer, event->array[0], &event->array[1]);
1005 		/* Release the temp buffer */
1006 		this_cpu_dec(trace_buffered_event_cnt);
1007 	} else
1008 		ring_buffer_unlock_commit(buffer, event);
1009 }
1010 
1011 /**
1012  * __trace_puts - write a constant string into the trace buffer.
1013  * @ip:	   The address of the caller
1014  * @str:   The constant string to write
1015  * @size:  The size of the string.
1016  */
1017 int __trace_puts(unsigned long ip, const char *str, int size)
1018 {
1019 	struct ring_buffer_event *event;
1020 	struct trace_buffer *buffer;
1021 	struct print_entry *entry;
1022 	unsigned int trace_ctx;
1023 	int alloc;
1024 
1025 	if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
1026 		return 0;
1027 
1028 	if (unlikely(tracing_selftest_running || tracing_disabled))
1029 		return 0;
1030 
1031 	alloc = sizeof(*entry) + size + 2; /* possible \n added */
1032 
1033 	trace_ctx = tracing_gen_ctx();
1034 	buffer = global_trace.array_buffer.buffer;
1035 	ring_buffer_nest_start(buffer);
1036 	event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, alloc,
1037 					    trace_ctx);
1038 	if (!event) {
1039 		size = 0;
1040 		goto out;
1041 	}
1042 
1043 	entry = ring_buffer_event_data(event);
1044 	entry->ip = ip;
1045 
1046 	memcpy(&entry->buf, str, size);
1047 
1048 	/* Add a newline if necessary */
1049 	if (entry->buf[size - 1] != '\n') {
1050 		entry->buf[size] = '\n';
1051 		entry->buf[size + 1] = '\0';
1052 	} else
1053 		entry->buf[size] = '\0';
1054 
1055 	__buffer_unlock_commit(buffer, event);
1056 	ftrace_trace_stack(&global_trace, buffer, trace_ctx, 4, NULL);
1057  out:
1058 	ring_buffer_nest_end(buffer);
1059 	return size;
1060 }
1061 EXPORT_SYMBOL_GPL(__trace_puts);
1062 
1063 /**
1064  * __trace_bputs - write the pointer to a constant string into trace buffer
1065  * @ip:	   The address of the caller
1066  * @str:   The constant string to write to the buffer to
1067  */
1068 int __trace_bputs(unsigned long ip, const char *str)
1069 {
1070 	struct ring_buffer_event *event;
1071 	struct trace_buffer *buffer;
1072 	struct bputs_entry *entry;
1073 	unsigned int trace_ctx;
1074 	int size = sizeof(struct bputs_entry);
1075 	int ret = 0;
1076 
1077 	if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
1078 		return 0;
1079 
1080 	if (unlikely(tracing_selftest_running || tracing_disabled))
1081 		return 0;
1082 
1083 	trace_ctx = tracing_gen_ctx();
1084 	buffer = global_trace.array_buffer.buffer;
1085 
1086 	ring_buffer_nest_start(buffer);
1087 	event = __trace_buffer_lock_reserve(buffer, TRACE_BPUTS, size,
1088 					    trace_ctx);
1089 	if (!event)
1090 		goto out;
1091 
1092 	entry = ring_buffer_event_data(event);
1093 	entry->ip			= ip;
1094 	entry->str			= str;
1095 
1096 	__buffer_unlock_commit(buffer, event);
1097 	ftrace_trace_stack(&global_trace, buffer, trace_ctx, 4, NULL);
1098 
1099 	ret = 1;
1100  out:
1101 	ring_buffer_nest_end(buffer);
1102 	return ret;
1103 }
1104 EXPORT_SYMBOL_GPL(__trace_bputs);
1105 
1106 #ifdef CONFIG_TRACER_SNAPSHOT
1107 static void tracing_snapshot_instance_cond(struct trace_array *tr,
1108 					   void *cond_data)
1109 {
1110 	struct tracer *tracer = tr->current_trace;
1111 	unsigned long flags;
1112 
1113 	if (in_nmi()) {
1114 		internal_trace_puts("*** SNAPSHOT CALLED FROM NMI CONTEXT ***\n");
1115 		internal_trace_puts("*** snapshot is being ignored        ***\n");
1116 		return;
1117 	}
1118 
1119 	if (!tr->allocated_snapshot) {
1120 		internal_trace_puts("*** SNAPSHOT NOT ALLOCATED ***\n");
1121 		internal_trace_puts("*** stopping trace here!   ***\n");
1122 		tracing_off();
1123 		return;
1124 	}
1125 
1126 	/* Note, snapshot can not be used when the tracer uses it */
1127 	if (tracer->use_max_tr) {
1128 		internal_trace_puts("*** LATENCY TRACER ACTIVE ***\n");
1129 		internal_trace_puts("*** Can not use snapshot (sorry) ***\n");
1130 		return;
1131 	}
1132 
1133 	local_irq_save(flags);
1134 	update_max_tr(tr, current, smp_processor_id(), cond_data);
1135 	local_irq_restore(flags);
1136 }
1137 
1138 void tracing_snapshot_instance(struct trace_array *tr)
1139 {
1140 	tracing_snapshot_instance_cond(tr, NULL);
1141 }
1142 
1143 /**
1144  * tracing_snapshot - take a snapshot of the current buffer.
1145  *
1146  * This causes a swap between the snapshot buffer and the current live
1147  * tracing buffer. You can use this to take snapshots of the live
1148  * trace when some condition is triggered, but continue to trace.
1149  *
1150  * Note, make sure to allocate the snapshot with either
1151  * a tracing_snapshot_alloc(), or by doing it manually
1152  * with: echo 1 > /sys/kernel/debug/tracing/snapshot
1153  *
1154  * If the snapshot buffer is not allocated, it will stop tracing.
1155  * Basically making a permanent snapshot.
1156  */
1157 void tracing_snapshot(void)
1158 {
1159 	struct trace_array *tr = &global_trace;
1160 
1161 	tracing_snapshot_instance(tr);
1162 }
1163 EXPORT_SYMBOL_GPL(tracing_snapshot);
1164 
1165 /**
1166  * tracing_snapshot_cond - conditionally take a snapshot of the current buffer.
1167  * @tr:		The tracing instance to snapshot
1168  * @cond_data:	The data to be tested conditionally, and possibly saved
1169  *
1170  * This is the same as tracing_snapshot() except that the snapshot is
1171  * conditional - the snapshot will only happen if the
1172  * cond_snapshot.update() implementation receiving the cond_data
1173  * returns true, which means that the trace array's cond_snapshot
1174  * update() operation used the cond_data to determine whether the
1175  * snapshot should be taken, and if it was, presumably saved it along
1176  * with the snapshot.
1177  */
1178 void tracing_snapshot_cond(struct trace_array *tr, void *cond_data)
1179 {
1180 	tracing_snapshot_instance_cond(tr, cond_data);
1181 }
1182 EXPORT_SYMBOL_GPL(tracing_snapshot_cond);
1183 
1184 /**
1185  * tracing_snapshot_cond_data - get the user data associated with a snapshot
1186  * @tr:		The tracing instance
1187  *
1188  * When the user enables a conditional snapshot using
1189  * tracing_snapshot_cond_enable(), the user-defined cond_data is saved
1190  * with the snapshot.  This accessor is used to retrieve it.
1191  *
1192  * Should not be called from cond_snapshot.update(), since it takes
1193  * the tr->max_lock lock, which the code calling
1194  * cond_snapshot.update() has already done.
1195  *
1196  * Returns the cond_data associated with the trace array's snapshot.
1197  */
1198 void *tracing_cond_snapshot_data(struct trace_array *tr)
1199 {
1200 	void *cond_data = NULL;
1201 
1202 	arch_spin_lock(&tr->max_lock);
1203 
1204 	if (tr->cond_snapshot)
1205 		cond_data = tr->cond_snapshot->cond_data;
1206 
1207 	arch_spin_unlock(&tr->max_lock);
1208 
1209 	return cond_data;
1210 }
1211 EXPORT_SYMBOL_GPL(tracing_cond_snapshot_data);
1212 
1213 static int resize_buffer_duplicate_size(struct array_buffer *trace_buf,
1214 					struct array_buffer *size_buf, int cpu_id);
1215 static void set_buffer_entries(struct array_buffer *buf, unsigned long val);
1216 
1217 int tracing_alloc_snapshot_instance(struct trace_array *tr)
1218 {
1219 	int ret;
1220 
1221 	if (!tr->allocated_snapshot) {
1222 
1223 		/* allocate spare buffer */
1224 		ret = resize_buffer_duplicate_size(&tr->max_buffer,
1225 				   &tr->array_buffer, RING_BUFFER_ALL_CPUS);
1226 		if (ret < 0)
1227 			return ret;
1228 
1229 		tr->allocated_snapshot = true;
1230 	}
1231 
1232 	return 0;
1233 }
1234 
1235 static void free_snapshot(struct trace_array *tr)
1236 {
1237 	/*
1238 	 * We don't free the ring buffer. instead, resize it because
1239 	 * The max_tr ring buffer has some state (e.g. ring->clock) and
1240 	 * we want preserve it.
1241 	 */
1242 	ring_buffer_resize(tr->max_buffer.buffer, 1, RING_BUFFER_ALL_CPUS);
1243 	set_buffer_entries(&tr->max_buffer, 1);
1244 	tracing_reset_online_cpus(&tr->max_buffer);
1245 	tr->allocated_snapshot = false;
1246 }
1247 
1248 /**
1249  * tracing_alloc_snapshot - allocate snapshot buffer.
1250  *
1251  * This only allocates the snapshot buffer if it isn't already
1252  * allocated - it doesn't also take a snapshot.
1253  *
1254  * This is meant to be used in cases where the snapshot buffer needs
1255  * to be set up for events that can't sleep but need to be able to
1256  * trigger a snapshot.
1257  */
1258 int tracing_alloc_snapshot(void)
1259 {
1260 	struct trace_array *tr = &global_trace;
1261 	int ret;
1262 
1263 	ret = tracing_alloc_snapshot_instance(tr);
1264 	WARN_ON(ret < 0);
1265 
1266 	return ret;
1267 }
1268 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
1269 
1270 /**
1271  * tracing_snapshot_alloc - allocate and take a snapshot of the current buffer.
1272  *
1273  * This is similar to tracing_snapshot(), but it will allocate the
1274  * snapshot buffer if it isn't already allocated. Use this only
1275  * where it is safe to sleep, as the allocation may sleep.
1276  *
1277  * This causes a swap between the snapshot buffer and the current live
1278  * tracing buffer. You can use this to take snapshots of the live
1279  * trace when some condition is triggered, but continue to trace.
1280  */
1281 void tracing_snapshot_alloc(void)
1282 {
1283 	int ret;
1284 
1285 	ret = tracing_alloc_snapshot();
1286 	if (ret < 0)
1287 		return;
1288 
1289 	tracing_snapshot();
1290 }
1291 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
1292 
1293 /**
1294  * tracing_snapshot_cond_enable - enable conditional snapshot for an instance
1295  * @tr:		The tracing instance
1296  * @cond_data:	User data to associate with the snapshot
1297  * @update:	Implementation of the cond_snapshot update function
1298  *
1299  * Check whether the conditional snapshot for the given instance has
1300  * already been enabled, or if the current tracer is already using a
1301  * snapshot; if so, return -EBUSY, else create a cond_snapshot and
1302  * save the cond_data and update function inside.
1303  *
1304  * Returns 0 if successful, error otherwise.
1305  */
1306 int tracing_snapshot_cond_enable(struct trace_array *tr, void *cond_data,
1307 				 cond_update_fn_t update)
1308 {
1309 	struct cond_snapshot *cond_snapshot;
1310 	int ret = 0;
1311 
1312 	cond_snapshot = kzalloc(sizeof(*cond_snapshot), GFP_KERNEL);
1313 	if (!cond_snapshot)
1314 		return -ENOMEM;
1315 
1316 	cond_snapshot->cond_data = cond_data;
1317 	cond_snapshot->update = update;
1318 
1319 	mutex_lock(&trace_types_lock);
1320 
1321 	ret = tracing_alloc_snapshot_instance(tr);
1322 	if (ret)
1323 		goto fail_unlock;
1324 
1325 	if (tr->current_trace->use_max_tr) {
1326 		ret = -EBUSY;
1327 		goto fail_unlock;
1328 	}
1329 
1330 	/*
1331 	 * The cond_snapshot can only change to NULL without the
1332 	 * trace_types_lock. We don't care if we race with it going
1333 	 * to NULL, but we want to make sure that it's not set to
1334 	 * something other than NULL when we get here, which we can
1335 	 * do safely with only holding the trace_types_lock and not
1336 	 * having to take the max_lock.
1337 	 */
1338 	if (tr->cond_snapshot) {
1339 		ret = -EBUSY;
1340 		goto fail_unlock;
1341 	}
1342 
1343 	arch_spin_lock(&tr->max_lock);
1344 	tr->cond_snapshot = cond_snapshot;
1345 	arch_spin_unlock(&tr->max_lock);
1346 
1347 	mutex_unlock(&trace_types_lock);
1348 
1349 	return ret;
1350 
1351  fail_unlock:
1352 	mutex_unlock(&trace_types_lock);
1353 	kfree(cond_snapshot);
1354 	return ret;
1355 }
1356 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_enable);
1357 
1358 /**
1359  * tracing_snapshot_cond_disable - disable conditional snapshot for an instance
1360  * @tr:		The tracing instance
1361  *
1362  * Check whether the conditional snapshot for the given instance is
1363  * enabled; if so, free the cond_snapshot associated with it,
1364  * otherwise return -EINVAL.
1365  *
1366  * Returns 0 if successful, error otherwise.
1367  */
1368 int tracing_snapshot_cond_disable(struct trace_array *tr)
1369 {
1370 	int ret = 0;
1371 
1372 	arch_spin_lock(&tr->max_lock);
1373 
1374 	if (!tr->cond_snapshot)
1375 		ret = -EINVAL;
1376 	else {
1377 		kfree(tr->cond_snapshot);
1378 		tr->cond_snapshot = NULL;
1379 	}
1380 
1381 	arch_spin_unlock(&tr->max_lock);
1382 
1383 	return ret;
1384 }
1385 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_disable);
1386 #else
1387 void tracing_snapshot(void)
1388 {
1389 	WARN_ONCE(1, "Snapshot feature not enabled, but internal snapshot used");
1390 }
1391 EXPORT_SYMBOL_GPL(tracing_snapshot);
1392 void tracing_snapshot_cond(struct trace_array *tr, void *cond_data)
1393 {
1394 	WARN_ONCE(1, "Snapshot feature not enabled, but internal conditional snapshot used");
1395 }
1396 EXPORT_SYMBOL_GPL(tracing_snapshot_cond);
1397 int tracing_alloc_snapshot(void)
1398 {
1399 	WARN_ONCE(1, "Snapshot feature not enabled, but snapshot allocation used");
1400 	return -ENODEV;
1401 }
1402 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
1403 void tracing_snapshot_alloc(void)
1404 {
1405 	/* Give warning */
1406 	tracing_snapshot();
1407 }
1408 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
1409 void *tracing_cond_snapshot_data(struct trace_array *tr)
1410 {
1411 	return NULL;
1412 }
1413 EXPORT_SYMBOL_GPL(tracing_cond_snapshot_data);
1414 int tracing_snapshot_cond_enable(struct trace_array *tr, void *cond_data, cond_update_fn_t update)
1415 {
1416 	return -ENODEV;
1417 }
1418 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_enable);
1419 int tracing_snapshot_cond_disable(struct trace_array *tr)
1420 {
1421 	return false;
1422 }
1423 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_disable);
1424 #endif /* CONFIG_TRACER_SNAPSHOT */
1425 
1426 void tracer_tracing_off(struct trace_array *tr)
1427 {
1428 	if (tr->array_buffer.buffer)
1429 		ring_buffer_record_off(tr->array_buffer.buffer);
1430 	/*
1431 	 * This flag is looked at when buffers haven't been allocated
1432 	 * yet, or by some tracers (like irqsoff), that just want to
1433 	 * know if the ring buffer has been disabled, but it can handle
1434 	 * races of where it gets disabled but we still do a record.
1435 	 * As the check is in the fast path of the tracers, it is more
1436 	 * important to be fast than accurate.
1437 	 */
1438 	tr->buffer_disabled = 1;
1439 	/* Make the flag seen by readers */
1440 	smp_wmb();
1441 }
1442 
1443 /**
1444  * tracing_off - turn off tracing buffers
1445  *
1446  * This function stops the tracing buffers from recording data.
1447  * It does not disable any overhead the tracers themselves may
1448  * be causing. This function simply causes all recording to
1449  * the ring buffers to fail.
1450  */
1451 void tracing_off(void)
1452 {
1453 	tracer_tracing_off(&global_trace);
1454 }
1455 EXPORT_SYMBOL_GPL(tracing_off);
1456 
1457 void disable_trace_on_warning(void)
1458 {
1459 	if (__disable_trace_on_warning) {
1460 		trace_array_printk_buf(global_trace.array_buffer.buffer, _THIS_IP_,
1461 			"Disabling tracing due to warning\n");
1462 		tracing_off();
1463 	}
1464 }
1465 
1466 /**
1467  * tracer_tracing_is_on - show real state of ring buffer enabled
1468  * @tr : the trace array to know if ring buffer is enabled
1469  *
1470  * Shows real state of the ring buffer if it is enabled or not.
1471  */
1472 bool tracer_tracing_is_on(struct trace_array *tr)
1473 {
1474 	if (tr->array_buffer.buffer)
1475 		return ring_buffer_record_is_on(tr->array_buffer.buffer);
1476 	return !tr->buffer_disabled;
1477 }
1478 
1479 /**
1480  * tracing_is_on - show state of ring buffers enabled
1481  */
1482 int tracing_is_on(void)
1483 {
1484 	return tracer_tracing_is_on(&global_trace);
1485 }
1486 EXPORT_SYMBOL_GPL(tracing_is_on);
1487 
1488 static int __init set_buf_size(char *str)
1489 {
1490 	unsigned long buf_size;
1491 
1492 	if (!str)
1493 		return 0;
1494 	buf_size = memparse(str, &str);
1495 	/* nr_entries can not be zero */
1496 	if (buf_size == 0)
1497 		return 0;
1498 	trace_buf_size = buf_size;
1499 	return 1;
1500 }
1501 __setup("trace_buf_size=", set_buf_size);
1502 
1503 static int __init set_tracing_thresh(char *str)
1504 {
1505 	unsigned long threshold;
1506 	int ret;
1507 
1508 	if (!str)
1509 		return 0;
1510 	ret = kstrtoul(str, 0, &threshold);
1511 	if (ret < 0)
1512 		return 0;
1513 	tracing_thresh = threshold * 1000;
1514 	return 1;
1515 }
1516 __setup("tracing_thresh=", set_tracing_thresh);
1517 
1518 unsigned long nsecs_to_usecs(unsigned long nsecs)
1519 {
1520 	return nsecs / 1000;
1521 }
1522 
1523 /*
1524  * TRACE_FLAGS is defined as a tuple matching bit masks with strings.
1525  * It uses C(a, b) where 'a' is the eval (enum) name and 'b' is the string that
1526  * matches it. By defining "C(a, b) b", TRACE_FLAGS becomes a list
1527  * of strings in the order that the evals (enum) were defined.
1528  */
1529 #undef C
1530 #define C(a, b) b
1531 
1532 /* These must match the bit positions in trace_iterator_flags */
1533 static const char *trace_options[] = {
1534 	TRACE_FLAGS
1535 	NULL
1536 };
1537 
1538 static struct {
1539 	u64 (*func)(void);
1540 	const char *name;
1541 	int in_ns;		/* is this clock in nanoseconds? */
1542 } trace_clocks[] = {
1543 	{ trace_clock_local,		"local",	1 },
1544 	{ trace_clock_global,		"global",	1 },
1545 	{ trace_clock_counter,		"counter",	0 },
1546 	{ trace_clock_jiffies,		"uptime",	0 },
1547 	{ trace_clock,			"perf",		1 },
1548 	{ ktime_get_mono_fast_ns,	"mono",		1 },
1549 	{ ktime_get_raw_fast_ns,	"mono_raw",	1 },
1550 	{ ktime_get_boot_fast_ns,	"boot",		1 },
1551 	ARCH_TRACE_CLOCKS
1552 };
1553 
1554 bool trace_clock_in_ns(struct trace_array *tr)
1555 {
1556 	if (trace_clocks[tr->clock_id].in_ns)
1557 		return true;
1558 
1559 	return false;
1560 }
1561 
1562 /*
1563  * trace_parser_get_init - gets the buffer for trace parser
1564  */
1565 int trace_parser_get_init(struct trace_parser *parser, int size)
1566 {
1567 	memset(parser, 0, sizeof(*parser));
1568 
1569 	parser->buffer = kmalloc(size, GFP_KERNEL);
1570 	if (!parser->buffer)
1571 		return 1;
1572 
1573 	parser->size = size;
1574 	return 0;
1575 }
1576 
1577 /*
1578  * trace_parser_put - frees the buffer for trace parser
1579  */
1580 void trace_parser_put(struct trace_parser *parser)
1581 {
1582 	kfree(parser->buffer);
1583 	parser->buffer = NULL;
1584 }
1585 
1586 /*
1587  * trace_get_user - reads the user input string separated by  space
1588  * (matched by isspace(ch))
1589  *
1590  * For each string found the 'struct trace_parser' is updated,
1591  * and the function returns.
1592  *
1593  * Returns number of bytes read.
1594  *
1595  * See kernel/trace/trace.h for 'struct trace_parser' details.
1596  */
1597 int trace_get_user(struct trace_parser *parser, const char __user *ubuf,
1598 	size_t cnt, loff_t *ppos)
1599 {
1600 	char ch;
1601 	size_t read = 0;
1602 	ssize_t ret;
1603 
1604 	if (!*ppos)
1605 		trace_parser_clear(parser);
1606 
1607 	ret = get_user(ch, ubuf++);
1608 	if (ret)
1609 		goto out;
1610 
1611 	read++;
1612 	cnt--;
1613 
1614 	/*
1615 	 * The parser is not finished with the last write,
1616 	 * continue reading the user input without skipping spaces.
1617 	 */
1618 	if (!parser->cont) {
1619 		/* skip white space */
1620 		while (cnt && isspace(ch)) {
1621 			ret = get_user(ch, ubuf++);
1622 			if (ret)
1623 				goto out;
1624 			read++;
1625 			cnt--;
1626 		}
1627 
1628 		parser->idx = 0;
1629 
1630 		/* only spaces were written */
1631 		if (isspace(ch) || !ch) {
1632 			*ppos += read;
1633 			ret = read;
1634 			goto out;
1635 		}
1636 	}
1637 
1638 	/* read the non-space input */
1639 	while (cnt && !isspace(ch) && ch) {
1640 		if (parser->idx < parser->size - 1)
1641 			parser->buffer[parser->idx++] = ch;
1642 		else {
1643 			ret = -EINVAL;
1644 			goto out;
1645 		}
1646 		ret = get_user(ch, ubuf++);
1647 		if (ret)
1648 			goto out;
1649 		read++;
1650 		cnt--;
1651 	}
1652 
1653 	/* We either got finished input or we have to wait for another call. */
1654 	if (isspace(ch) || !ch) {
1655 		parser->buffer[parser->idx] = 0;
1656 		parser->cont = false;
1657 	} else if (parser->idx < parser->size - 1) {
1658 		parser->cont = true;
1659 		parser->buffer[parser->idx++] = ch;
1660 		/* Make sure the parsed string always terminates with '\0'. */
1661 		parser->buffer[parser->idx] = 0;
1662 	} else {
1663 		ret = -EINVAL;
1664 		goto out;
1665 	}
1666 
1667 	*ppos += read;
1668 	ret = read;
1669 
1670 out:
1671 	return ret;
1672 }
1673 
1674 /* TODO add a seq_buf_to_buffer() */
1675 static ssize_t trace_seq_to_buffer(struct trace_seq *s, void *buf, size_t cnt)
1676 {
1677 	int len;
1678 
1679 	if (trace_seq_used(s) <= s->seq.readpos)
1680 		return -EBUSY;
1681 
1682 	len = trace_seq_used(s) - s->seq.readpos;
1683 	if (cnt > len)
1684 		cnt = len;
1685 	memcpy(buf, s->buffer + s->seq.readpos, cnt);
1686 
1687 	s->seq.readpos += cnt;
1688 	return cnt;
1689 }
1690 
1691 unsigned long __read_mostly	tracing_thresh;
1692 static const struct file_operations tracing_max_lat_fops;
1693 
1694 #ifdef LATENCY_FS_NOTIFY
1695 
1696 static struct workqueue_struct *fsnotify_wq;
1697 
1698 static void latency_fsnotify_workfn(struct work_struct *work)
1699 {
1700 	struct trace_array *tr = container_of(work, struct trace_array,
1701 					      fsnotify_work);
1702 	fsnotify_inode(tr->d_max_latency->d_inode, FS_MODIFY);
1703 }
1704 
1705 static void latency_fsnotify_workfn_irq(struct irq_work *iwork)
1706 {
1707 	struct trace_array *tr = container_of(iwork, struct trace_array,
1708 					      fsnotify_irqwork);
1709 	queue_work(fsnotify_wq, &tr->fsnotify_work);
1710 }
1711 
1712 static void trace_create_maxlat_file(struct trace_array *tr,
1713 				     struct dentry *d_tracer)
1714 {
1715 	INIT_WORK(&tr->fsnotify_work, latency_fsnotify_workfn);
1716 	init_irq_work(&tr->fsnotify_irqwork, latency_fsnotify_workfn_irq);
1717 	tr->d_max_latency = trace_create_file("tracing_max_latency", 0644,
1718 					      d_tracer, &tr->max_latency,
1719 					      &tracing_max_lat_fops);
1720 }
1721 
1722 __init static int latency_fsnotify_init(void)
1723 {
1724 	fsnotify_wq = alloc_workqueue("tr_max_lat_wq",
1725 				      WQ_UNBOUND | WQ_HIGHPRI, 0);
1726 	if (!fsnotify_wq) {
1727 		pr_err("Unable to allocate tr_max_lat_wq\n");
1728 		return -ENOMEM;
1729 	}
1730 	return 0;
1731 }
1732 
1733 late_initcall_sync(latency_fsnotify_init);
1734 
1735 void latency_fsnotify(struct trace_array *tr)
1736 {
1737 	if (!fsnotify_wq)
1738 		return;
1739 	/*
1740 	 * We cannot call queue_work(&tr->fsnotify_work) from here because it's
1741 	 * possible that we are called from __schedule() or do_idle(), which
1742 	 * could cause a deadlock.
1743 	 */
1744 	irq_work_queue(&tr->fsnotify_irqwork);
1745 }
1746 
1747 /*
1748  * (defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)) && \
1749  *  defined(CONFIG_FSNOTIFY)
1750  */
1751 #else
1752 
1753 #define trace_create_maxlat_file(tr, d_tracer)				\
1754 	trace_create_file("tracing_max_latency", 0644, d_tracer,	\
1755 			  &tr->max_latency, &tracing_max_lat_fops)
1756 
1757 #endif
1758 
1759 #ifdef CONFIG_TRACER_MAX_TRACE
1760 /*
1761  * Copy the new maximum trace into the separate maximum-trace
1762  * structure. (this way the maximum trace is permanently saved,
1763  * for later retrieval via /sys/kernel/tracing/tracing_max_latency)
1764  */
1765 static void
1766 __update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
1767 {
1768 	struct array_buffer *trace_buf = &tr->array_buffer;
1769 	struct array_buffer *max_buf = &tr->max_buffer;
1770 	struct trace_array_cpu *data = per_cpu_ptr(trace_buf->data, cpu);
1771 	struct trace_array_cpu *max_data = per_cpu_ptr(max_buf->data, cpu);
1772 
1773 	max_buf->cpu = cpu;
1774 	max_buf->time_start = data->preempt_timestamp;
1775 
1776 	max_data->saved_latency = tr->max_latency;
1777 	max_data->critical_start = data->critical_start;
1778 	max_data->critical_end = data->critical_end;
1779 
1780 	strncpy(max_data->comm, tsk->comm, TASK_COMM_LEN);
1781 	max_data->pid = tsk->pid;
1782 	/*
1783 	 * If tsk == current, then use current_uid(), as that does not use
1784 	 * RCU. The irq tracer can be called out of RCU scope.
1785 	 */
1786 	if (tsk == current)
1787 		max_data->uid = current_uid();
1788 	else
1789 		max_data->uid = task_uid(tsk);
1790 
1791 	max_data->nice = tsk->static_prio - 20 - MAX_RT_PRIO;
1792 	max_data->policy = tsk->policy;
1793 	max_data->rt_priority = tsk->rt_priority;
1794 
1795 	/* record this tasks comm */
1796 	tracing_record_cmdline(tsk);
1797 	latency_fsnotify(tr);
1798 }
1799 
1800 /**
1801  * update_max_tr - snapshot all trace buffers from global_trace to max_tr
1802  * @tr: tracer
1803  * @tsk: the task with the latency
1804  * @cpu: The cpu that initiated the trace.
1805  * @cond_data: User data associated with a conditional snapshot
1806  *
1807  * Flip the buffers between the @tr and the max_tr and record information
1808  * about which task was the cause of this latency.
1809  */
1810 void
1811 update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu,
1812 	      void *cond_data)
1813 {
1814 	if (tr->stop_count)
1815 		return;
1816 
1817 	WARN_ON_ONCE(!irqs_disabled());
1818 
1819 	if (!tr->allocated_snapshot) {
1820 		/* Only the nop tracer should hit this when disabling */
1821 		WARN_ON_ONCE(tr->current_trace != &nop_trace);
1822 		return;
1823 	}
1824 
1825 	arch_spin_lock(&tr->max_lock);
1826 
1827 	/* Inherit the recordable setting from array_buffer */
1828 	if (ring_buffer_record_is_set_on(tr->array_buffer.buffer))
1829 		ring_buffer_record_on(tr->max_buffer.buffer);
1830 	else
1831 		ring_buffer_record_off(tr->max_buffer.buffer);
1832 
1833 #ifdef CONFIG_TRACER_SNAPSHOT
1834 	if (tr->cond_snapshot && !tr->cond_snapshot->update(tr, cond_data))
1835 		goto out_unlock;
1836 #endif
1837 	swap(tr->array_buffer.buffer, tr->max_buffer.buffer);
1838 
1839 	__update_max_tr(tr, tsk, cpu);
1840 
1841  out_unlock:
1842 	arch_spin_unlock(&tr->max_lock);
1843 }
1844 
1845 /**
1846  * update_max_tr_single - only copy one trace over, and reset the rest
1847  * @tr: tracer
1848  * @tsk: task with the latency
1849  * @cpu: the cpu of the buffer to copy.
1850  *
1851  * Flip the trace of a single CPU buffer between the @tr and the max_tr.
1852  */
1853 void
1854 update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu)
1855 {
1856 	int ret;
1857 
1858 	if (tr->stop_count)
1859 		return;
1860 
1861 	WARN_ON_ONCE(!irqs_disabled());
1862 	if (!tr->allocated_snapshot) {
1863 		/* Only the nop tracer should hit this when disabling */
1864 		WARN_ON_ONCE(tr->current_trace != &nop_trace);
1865 		return;
1866 	}
1867 
1868 	arch_spin_lock(&tr->max_lock);
1869 
1870 	ret = ring_buffer_swap_cpu(tr->max_buffer.buffer, tr->array_buffer.buffer, cpu);
1871 
1872 	if (ret == -EBUSY) {
1873 		/*
1874 		 * We failed to swap the buffer due to a commit taking
1875 		 * place on this CPU. We fail to record, but we reset
1876 		 * the max trace buffer (no one writes directly to it)
1877 		 * and flag that it failed.
1878 		 */
1879 		trace_array_printk_buf(tr->max_buffer.buffer, _THIS_IP_,
1880 			"Failed to swap buffers due to commit in progress\n");
1881 	}
1882 
1883 	WARN_ON_ONCE(ret && ret != -EAGAIN && ret != -EBUSY);
1884 
1885 	__update_max_tr(tr, tsk, cpu);
1886 	arch_spin_unlock(&tr->max_lock);
1887 }
1888 #endif /* CONFIG_TRACER_MAX_TRACE */
1889 
1890 static int wait_on_pipe(struct trace_iterator *iter, int full)
1891 {
1892 	/* Iterators are static, they should be filled or empty */
1893 	if (trace_buffer_iter(iter, iter->cpu_file))
1894 		return 0;
1895 
1896 	return ring_buffer_wait(iter->array_buffer->buffer, iter->cpu_file,
1897 				full);
1898 }
1899 
1900 #ifdef CONFIG_FTRACE_STARTUP_TEST
1901 static bool selftests_can_run;
1902 
1903 struct trace_selftests {
1904 	struct list_head		list;
1905 	struct tracer			*type;
1906 };
1907 
1908 static LIST_HEAD(postponed_selftests);
1909 
1910 static int save_selftest(struct tracer *type)
1911 {
1912 	struct trace_selftests *selftest;
1913 
1914 	selftest = kmalloc(sizeof(*selftest), GFP_KERNEL);
1915 	if (!selftest)
1916 		return -ENOMEM;
1917 
1918 	selftest->type = type;
1919 	list_add(&selftest->list, &postponed_selftests);
1920 	return 0;
1921 }
1922 
1923 static int run_tracer_selftest(struct tracer *type)
1924 {
1925 	struct trace_array *tr = &global_trace;
1926 	struct tracer *saved_tracer = tr->current_trace;
1927 	int ret;
1928 
1929 	if (!type->selftest || tracing_selftest_disabled)
1930 		return 0;
1931 
1932 	/*
1933 	 * If a tracer registers early in boot up (before scheduling is
1934 	 * initialized and such), then do not run its selftests yet.
1935 	 * Instead, run it a little later in the boot process.
1936 	 */
1937 	if (!selftests_can_run)
1938 		return save_selftest(type);
1939 
1940 	if (!tracing_is_on()) {
1941 		pr_warn("Selftest for tracer %s skipped due to tracing disabled\n",
1942 			type->name);
1943 		return 0;
1944 	}
1945 
1946 	/*
1947 	 * Run a selftest on this tracer.
1948 	 * Here we reset the trace buffer, and set the current
1949 	 * tracer to be this tracer. The tracer can then run some
1950 	 * internal tracing to verify that everything is in order.
1951 	 * If we fail, we do not register this tracer.
1952 	 */
1953 	tracing_reset_online_cpus(&tr->array_buffer);
1954 
1955 	tr->current_trace = type;
1956 
1957 #ifdef CONFIG_TRACER_MAX_TRACE
1958 	if (type->use_max_tr) {
1959 		/* If we expanded the buffers, make sure the max is expanded too */
1960 		if (ring_buffer_expanded)
1961 			ring_buffer_resize(tr->max_buffer.buffer, trace_buf_size,
1962 					   RING_BUFFER_ALL_CPUS);
1963 		tr->allocated_snapshot = true;
1964 	}
1965 #endif
1966 
1967 	/* the test is responsible for initializing and enabling */
1968 	pr_info("Testing tracer %s: ", type->name);
1969 	ret = type->selftest(type, tr);
1970 	/* the test is responsible for resetting too */
1971 	tr->current_trace = saved_tracer;
1972 	if (ret) {
1973 		printk(KERN_CONT "FAILED!\n");
1974 		/* Add the warning after printing 'FAILED' */
1975 		WARN_ON(1);
1976 		return -1;
1977 	}
1978 	/* Only reset on passing, to avoid touching corrupted buffers */
1979 	tracing_reset_online_cpus(&tr->array_buffer);
1980 
1981 #ifdef CONFIG_TRACER_MAX_TRACE
1982 	if (type->use_max_tr) {
1983 		tr->allocated_snapshot = false;
1984 
1985 		/* Shrink the max buffer again */
1986 		if (ring_buffer_expanded)
1987 			ring_buffer_resize(tr->max_buffer.buffer, 1,
1988 					   RING_BUFFER_ALL_CPUS);
1989 	}
1990 #endif
1991 
1992 	printk(KERN_CONT "PASSED\n");
1993 	return 0;
1994 }
1995 
1996 static __init int init_trace_selftests(void)
1997 {
1998 	struct trace_selftests *p, *n;
1999 	struct tracer *t, **last;
2000 	int ret;
2001 
2002 	selftests_can_run = true;
2003 
2004 	mutex_lock(&trace_types_lock);
2005 
2006 	if (list_empty(&postponed_selftests))
2007 		goto out;
2008 
2009 	pr_info("Running postponed tracer tests:\n");
2010 
2011 	tracing_selftest_running = true;
2012 	list_for_each_entry_safe(p, n, &postponed_selftests, list) {
2013 		/* This loop can take minutes when sanitizers are enabled, so
2014 		 * lets make sure we allow RCU processing.
2015 		 */
2016 		cond_resched();
2017 		ret = run_tracer_selftest(p->type);
2018 		/* If the test fails, then warn and remove from available_tracers */
2019 		if (ret < 0) {
2020 			WARN(1, "tracer: %s failed selftest, disabling\n",
2021 			     p->type->name);
2022 			last = &trace_types;
2023 			for (t = trace_types; t; t = t->next) {
2024 				if (t == p->type) {
2025 					*last = t->next;
2026 					break;
2027 				}
2028 				last = &t->next;
2029 			}
2030 		}
2031 		list_del(&p->list);
2032 		kfree(p);
2033 	}
2034 	tracing_selftest_running = false;
2035 
2036  out:
2037 	mutex_unlock(&trace_types_lock);
2038 
2039 	return 0;
2040 }
2041 core_initcall(init_trace_selftests);
2042 #else
2043 static inline int run_tracer_selftest(struct tracer *type)
2044 {
2045 	return 0;
2046 }
2047 #endif /* CONFIG_FTRACE_STARTUP_TEST */
2048 
2049 static void add_tracer_options(struct trace_array *tr, struct tracer *t);
2050 
2051 static void __init apply_trace_boot_options(void);
2052 
2053 /**
2054  * register_tracer - register a tracer with the ftrace system.
2055  * @type: the plugin for the tracer
2056  *
2057  * Register a new plugin tracer.
2058  */
2059 int __init register_tracer(struct tracer *type)
2060 {
2061 	struct tracer *t;
2062 	int ret = 0;
2063 
2064 	if (!type->name) {
2065 		pr_info("Tracer must have a name\n");
2066 		return -1;
2067 	}
2068 
2069 	if (strlen(type->name) >= MAX_TRACER_SIZE) {
2070 		pr_info("Tracer has a name longer than %d\n", MAX_TRACER_SIZE);
2071 		return -1;
2072 	}
2073 
2074 	if (security_locked_down(LOCKDOWN_TRACEFS)) {
2075 		pr_warn("Can not register tracer %s due to lockdown\n",
2076 			   type->name);
2077 		return -EPERM;
2078 	}
2079 
2080 	mutex_lock(&trace_types_lock);
2081 
2082 	tracing_selftest_running = true;
2083 
2084 	for (t = trace_types; t; t = t->next) {
2085 		if (strcmp(type->name, t->name) == 0) {
2086 			/* already found */
2087 			pr_info("Tracer %s already registered\n",
2088 				type->name);
2089 			ret = -1;
2090 			goto out;
2091 		}
2092 	}
2093 
2094 	if (!type->set_flag)
2095 		type->set_flag = &dummy_set_flag;
2096 	if (!type->flags) {
2097 		/*allocate a dummy tracer_flags*/
2098 		type->flags = kmalloc(sizeof(*type->flags), GFP_KERNEL);
2099 		if (!type->flags) {
2100 			ret = -ENOMEM;
2101 			goto out;
2102 		}
2103 		type->flags->val = 0;
2104 		type->flags->opts = dummy_tracer_opt;
2105 	} else
2106 		if (!type->flags->opts)
2107 			type->flags->opts = dummy_tracer_opt;
2108 
2109 	/* store the tracer for __set_tracer_option */
2110 	type->flags->trace = type;
2111 
2112 	ret = run_tracer_selftest(type);
2113 	if (ret < 0)
2114 		goto out;
2115 
2116 	type->next = trace_types;
2117 	trace_types = type;
2118 	add_tracer_options(&global_trace, type);
2119 
2120  out:
2121 	tracing_selftest_running = false;
2122 	mutex_unlock(&trace_types_lock);
2123 
2124 	if (ret || !default_bootup_tracer)
2125 		goto out_unlock;
2126 
2127 	if (strncmp(default_bootup_tracer, type->name, MAX_TRACER_SIZE))
2128 		goto out_unlock;
2129 
2130 	printk(KERN_INFO "Starting tracer '%s'\n", type->name);
2131 	/* Do we want this tracer to start on bootup? */
2132 	tracing_set_tracer(&global_trace, type->name);
2133 	default_bootup_tracer = NULL;
2134 
2135 	apply_trace_boot_options();
2136 
2137 	/* disable other selftests, since this will break it. */
2138 	disable_tracing_selftest("running a tracer");
2139 
2140  out_unlock:
2141 	return ret;
2142 }
2143 
2144 static void tracing_reset_cpu(struct array_buffer *buf, int cpu)
2145 {
2146 	struct trace_buffer *buffer = buf->buffer;
2147 
2148 	if (!buffer)
2149 		return;
2150 
2151 	ring_buffer_record_disable(buffer);
2152 
2153 	/* Make sure all commits have finished */
2154 	synchronize_rcu();
2155 	ring_buffer_reset_cpu(buffer, cpu);
2156 
2157 	ring_buffer_record_enable(buffer);
2158 }
2159 
2160 void tracing_reset_online_cpus(struct array_buffer *buf)
2161 {
2162 	struct trace_buffer *buffer = buf->buffer;
2163 
2164 	if (!buffer)
2165 		return;
2166 
2167 	ring_buffer_record_disable(buffer);
2168 
2169 	/* Make sure all commits have finished */
2170 	synchronize_rcu();
2171 
2172 	buf->time_start = buffer_ftrace_now(buf, buf->cpu);
2173 
2174 	ring_buffer_reset_online_cpus(buffer);
2175 
2176 	ring_buffer_record_enable(buffer);
2177 }
2178 
2179 /* Must have trace_types_lock held */
2180 void tracing_reset_all_online_cpus(void)
2181 {
2182 	struct trace_array *tr;
2183 
2184 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
2185 		if (!tr->clear_trace)
2186 			continue;
2187 		tr->clear_trace = false;
2188 		tracing_reset_online_cpus(&tr->array_buffer);
2189 #ifdef CONFIG_TRACER_MAX_TRACE
2190 		tracing_reset_online_cpus(&tr->max_buffer);
2191 #endif
2192 	}
2193 }
2194 
2195 /*
2196  * The tgid_map array maps from pid to tgid; i.e. the value stored at index i
2197  * is the tgid last observed corresponding to pid=i.
2198  */
2199 static int *tgid_map;
2200 
2201 /* The maximum valid index into tgid_map. */
2202 static size_t tgid_map_max;
2203 
2204 #define SAVED_CMDLINES_DEFAULT 128
2205 #define NO_CMDLINE_MAP UINT_MAX
2206 static arch_spinlock_t trace_cmdline_lock = __ARCH_SPIN_LOCK_UNLOCKED;
2207 struct saved_cmdlines_buffer {
2208 	unsigned map_pid_to_cmdline[PID_MAX_DEFAULT+1];
2209 	unsigned *map_cmdline_to_pid;
2210 	unsigned cmdline_num;
2211 	int cmdline_idx;
2212 	char *saved_cmdlines;
2213 };
2214 static struct saved_cmdlines_buffer *savedcmd;
2215 
2216 static inline char *get_saved_cmdlines(int idx)
2217 {
2218 	return &savedcmd->saved_cmdlines[idx * TASK_COMM_LEN];
2219 }
2220 
2221 static inline void set_cmdline(int idx, const char *cmdline)
2222 {
2223 	strncpy(get_saved_cmdlines(idx), cmdline, TASK_COMM_LEN);
2224 }
2225 
2226 static int allocate_cmdlines_buffer(unsigned int val,
2227 				    struct saved_cmdlines_buffer *s)
2228 {
2229 	s->map_cmdline_to_pid = kmalloc_array(val,
2230 					      sizeof(*s->map_cmdline_to_pid),
2231 					      GFP_KERNEL);
2232 	if (!s->map_cmdline_to_pid)
2233 		return -ENOMEM;
2234 
2235 	s->saved_cmdlines = kmalloc_array(TASK_COMM_LEN, val, GFP_KERNEL);
2236 	if (!s->saved_cmdlines) {
2237 		kfree(s->map_cmdline_to_pid);
2238 		return -ENOMEM;
2239 	}
2240 
2241 	s->cmdline_idx = 0;
2242 	s->cmdline_num = val;
2243 	memset(&s->map_pid_to_cmdline, NO_CMDLINE_MAP,
2244 	       sizeof(s->map_pid_to_cmdline));
2245 	memset(s->map_cmdline_to_pid, NO_CMDLINE_MAP,
2246 	       val * sizeof(*s->map_cmdline_to_pid));
2247 
2248 	return 0;
2249 }
2250 
2251 static int trace_create_savedcmd(void)
2252 {
2253 	int ret;
2254 
2255 	savedcmd = kmalloc(sizeof(*savedcmd), GFP_KERNEL);
2256 	if (!savedcmd)
2257 		return -ENOMEM;
2258 
2259 	ret = allocate_cmdlines_buffer(SAVED_CMDLINES_DEFAULT, savedcmd);
2260 	if (ret < 0) {
2261 		kfree(savedcmd);
2262 		savedcmd = NULL;
2263 		return -ENOMEM;
2264 	}
2265 
2266 	return 0;
2267 }
2268 
2269 int is_tracing_stopped(void)
2270 {
2271 	return global_trace.stop_count;
2272 }
2273 
2274 /**
2275  * tracing_start - quick start of the tracer
2276  *
2277  * If tracing is enabled but was stopped by tracing_stop,
2278  * this will start the tracer back up.
2279  */
2280 void tracing_start(void)
2281 {
2282 	struct trace_buffer *buffer;
2283 	unsigned long flags;
2284 
2285 	if (tracing_disabled)
2286 		return;
2287 
2288 	raw_spin_lock_irqsave(&global_trace.start_lock, flags);
2289 	if (--global_trace.stop_count) {
2290 		if (global_trace.stop_count < 0) {
2291 			/* Someone screwed up their debugging */
2292 			WARN_ON_ONCE(1);
2293 			global_trace.stop_count = 0;
2294 		}
2295 		goto out;
2296 	}
2297 
2298 	/* Prevent the buffers from switching */
2299 	arch_spin_lock(&global_trace.max_lock);
2300 
2301 	buffer = global_trace.array_buffer.buffer;
2302 	if (buffer)
2303 		ring_buffer_record_enable(buffer);
2304 
2305 #ifdef CONFIG_TRACER_MAX_TRACE
2306 	buffer = global_trace.max_buffer.buffer;
2307 	if (buffer)
2308 		ring_buffer_record_enable(buffer);
2309 #endif
2310 
2311 	arch_spin_unlock(&global_trace.max_lock);
2312 
2313  out:
2314 	raw_spin_unlock_irqrestore(&global_trace.start_lock, flags);
2315 }
2316 
2317 static void tracing_start_tr(struct trace_array *tr)
2318 {
2319 	struct trace_buffer *buffer;
2320 	unsigned long flags;
2321 
2322 	if (tracing_disabled)
2323 		return;
2324 
2325 	/* If global, we need to also start the max tracer */
2326 	if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
2327 		return tracing_start();
2328 
2329 	raw_spin_lock_irqsave(&tr->start_lock, flags);
2330 
2331 	if (--tr->stop_count) {
2332 		if (tr->stop_count < 0) {
2333 			/* Someone screwed up their debugging */
2334 			WARN_ON_ONCE(1);
2335 			tr->stop_count = 0;
2336 		}
2337 		goto out;
2338 	}
2339 
2340 	buffer = tr->array_buffer.buffer;
2341 	if (buffer)
2342 		ring_buffer_record_enable(buffer);
2343 
2344  out:
2345 	raw_spin_unlock_irqrestore(&tr->start_lock, flags);
2346 }
2347 
2348 /**
2349  * tracing_stop - quick stop of the tracer
2350  *
2351  * Light weight way to stop tracing. Use in conjunction with
2352  * tracing_start.
2353  */
2354 void tracing_stop(void)
2355 {
2356 	struct trace_buffer *buffer;
2357 	unsigned long flags;
2358 
2359 	raw_spin_lock_irqsave(&global_trace.start_lock, flags);
2360 	if (global_trace.stop_count++)
2361 		goto out;
2362 
2363 	/* Prevent the buffers from switching */
2364 	arch_spin_lock(&global_trace.max_lock);
2365 
2366 	buffer = global_trace.array_buffer.buffer;
2367 	if (buffer)
2368 		ring_buffer_record_disable(buffer);
2369 
2370 #ifdef CONFIG_TRACER_MAX_TRACE
2371 	buffer = global_trace.max_buffer.buffer;
2372 	if (buffer)
2373 		ring_buffer_record_disable(buffer);
2374 #endif
2375 
2376 	arch_spin_unlock(&global_trace.max_lock);
2377 
2378  out:
2379 	raw_spin_unlock_irqrestore(&global_trace.start_lock, flags);
2380 }
2381 
2382 static void tracing_stop_tr(struct trace_array *tr)
2383 {
2384 	struct trace_buffer *buffer;
2385 	unsigned long flags;
2386 
2387 	/* If global, we need to also stop the max tracer */
2388 	if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
2389 		return tracing_stop();
2390 
2391 	raw_spin_lock_irqsave(&tr->start_lock, flags);
2392 	if (tr->stop_count++)
2393 		goto out;
2394 
2395 	buffer = tr->array_buffer.buffer;
2396 	if (buffer)
2397 		ring_buffer_record_disable(buffer);
2398 
2399  out:
2400 	raw_spin_unlock_irqrestore(&tr->start_lock, flags);
2401 }
2402 
2403 static int trace_save_cmdline(struct task_struct *tsk)
2404 {
2405 	unsigned tpid, idx;
2406 
2407 	/* treat recording of idle task as a success */
2408 	if (!tsk->pid)
2409 		return 1;
2410 
2411 	tpid = tsk->pid & (PID_MAX_DEFAULT - 1);
2412 
2413 	/*
2414 	 * It's not the end of the world if we don't get
2415 	 * the lock, but we also don't want to spin
2416 	 * nor do we want to disable interrupts,
2417 	 * so if we miss here, then better luck next time.
2418 	 */
2419 	if (!arch_spin_trylock(&trace_cmdline_lock))
2420 		return 0;
2421 
2422 	idx = savedcmd->map_pid_to_cmdline[tpid];
2423 	if (idx == NO_CMDLINE_MAP) {
2424 		idx = (savedcmd->cmdline_idx + 1) % savedcmd->cmdline_num;
2425 
2426 		savedcmd->map_pid_to_cmdline[tpid] = idx;
2427 		savedcmd->cmdline_idx = idx;
2428 	}
2429 
2430 	savedcmd->map_cmdline_to_pid[idx] = tsk->pid;
2431 	set_cmdline(idx, tsk->comm);
2432 
2433 	arch_spin_unlock(&trace_cmdline_lock);
2434 
2435 	return 1;
2436 }
2437 
2438 static void __trace_find_cmdline(int pid, char comm[])
2439 {
2440 	unsigned map;
2441 	int tpid;
2442 
2443 	if (!pid) {
2444 		strcpy(comm, "<idle>");
2445 		return;
2446 	}
2447 
2448 	if (WARN_ON_ONCE(pid < 0)) {
2449 		strcpy(comm, "<XXX>");
2450 		return;
2451 	}
2452 
2453 	tpid = pid & (PID_MAX_DEFAULT - 1);
2454 	map = savedcmd->map_pid_to_cmdline[tpid];
2455 	if (map != NO_CMDLINE_MAP) {
2456 		tpid = savedcmd->map_cmdline_to_pid[map];
2457 		if (tpid == pid) {
2458 			strlcpy(comm, get_saved_cmdlines(map), TASK_COMM_LEN);
2459 			return;
2460 		}
2461 	}
2462 	strcpy(comm, "<...>");
2463 }
2464 
2465 void trace_find_cmdline(int pid, char comm[])
2466 {
2467 	preempt_disable();
2468 	arch_spin_lock(&trace_cmdline_lock);
2469 
2470 	__trace_find_cmdline(pid, comm);
2471 
2472 	arch_spin_unlock(&trace_cmdline_lock);
2473 	preempt_enable();
2474 }
2475 
2476 static int *trace_find_tgid_ptr(int pid)
2477 {
2478 	/*
2479 	 * Pairs with the smp_store_release in set_tracer_flag() to ensure that
2480 	 * if we observe a non-NULL tgid_map then we also observe the correct
2481 	 * tgid_map_max.
2482 	 */
2483 	int *map = smp_load_acquire(&tgid_map);
2484 
2485 	if (unlikely(!map || pid > tgid_map_max))
2486 		return NULL;
2487 
2488 	return &map[pid];
2489 }
2490 
2491 int trace_find_tgid(int pid)
2492 {
2493 	int *ptr = trace_find_tgid_ptr(pid);
2494 
2495 	return ptr ? *ptr : 0;
2496 }
2497 
2498 static int trace_save_tgid(struct task_struct *tsk)
2499 {
2500 	int *ptr;
2501 
2502 	/* treat recording of idle task as a success */
2503 	if (!tsk->pid)
2504 		return 1;
2505 
2506 	ptr = trace_find_tgid_ptr(tsk->pid);
2507 	if (!ptr)
2508 		return 0;
2509 
2510 	*ptr = tsk->tgid;
2511 	return 1;
2512 }
2513 
2514 static bool tracing_record_taskinfo_skip(int flags)
2515 {
2516 	if (unlikely(!(flags & (TRACE_RECORD_CMDLINE | TRACE_RECORD_TGID))))
2517 		return true;
2518 	if (!__this_cpu_read(trace_taskinfo_save))
2519 		return true;
2520 	return false;
2521 }
2522 
2523 /**
2524  * tracing_record_taskinfo - record the task info of a task
2525  *
2526  * @task:  task to record
2527  * @flags: TRACE_RECORD_CMDLINE for recording comm
2528  *         TRACE_RECORD_TGID for recording tgid
2529  */
2530 void tracing_record_taskinfo(struct task_struct *task, int flags)
2531 {
2532 	bool done;
2533 
2534 	if (tracing_record_taskinfo_skip(flags))
2535 		return;
2536 
2537 	/*
2538 	 * Record as much task information as possible. If some fail, continue
2539 	 * to try to record the others.
2540 	 */
2541 	done = !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(task);
2542 	done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(task);
2543 
2544 	/* If recording any information failed, retry again soon. */
2545 	if (!done)
2546 		return;
2547 
2548 	__this_cpu_write(trace_taskinfo_save, false);
2549 }
2550 
2551 /**
2552  * tracing_record_taskinfo_sched_switch - record task info for sched_switch
2553  *
2554  * @prev: previous task during sched_switch
2555  * @next: next task during sched_switch
2556  * @flags: TRACE_RECORD_CMDLINE for recording comm
2557  *         TRACE_RECORD_TGID for recording tgid
2558  */
2559 void tracing_record_taskinfo_sched_switch(struct task_struct *prev,
2560 					  struct task_struct *next, int flags)
2561 {
2562 	bool done;
2563 
2564 	if (tracing_record_taskinfo_skip(flags))
2565 		return;
2566 
2567 	/*
2568 	 * Record as much task information as possible. If some fail, continue
2569 	 * to try to record the others.
2570 	 */
2571 	done  = !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(prev);
2572 	done &= !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(next);
2573 	done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(prev);
2574 	done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(next);
2575 
2576 	/* If recording any information failed, retry again soon. */
2577 	if (!done)
2578 		return;
2579 
2580 	__this_cpu_write(trace_taskinfo_save, false);
2581 }
2582 
2583 /* Helpers to record a specific task information */
2584 void tracing_record_cmdline(struct task_struct *task)
2585 {
2586 	tracing_record_taskinfo(task, TRACE_RECORD_CMDLINE);
2587 }
2588 
2589 void tracing_record_tgid(struct task_struct *task)
2590 {
2591 	tracing_record_taskinfo(task, TRACE_RECORD_TGID);
2592 }
2593 
2594 /*
2595  * Several functions return TRACE_TYPE_PARTIAL_LINE if the trace_seq
2596  * overflowed, and TRACE_TYPE_HANDLED otherwise. This helper function
2597  * simplifies those functions and keeps them in sync.
2598  */
2599 enum print_line_t trace_handle_return(struct trace_seq *s)
2600 {
2601 	return trace_seq_has_overflowed(s) ?
2602 		TRACE_TYPE_PARTIAL_LINE : TRACE_TYPE_HANDLED;
2603 }
2604 EXPORT_SYMBOL_GPL(trace_handle_return);
2605 
2606 unsigned int tracing_gen_ctx_irq_test(unsigned int irqs_status)
2607 {
2608 	unsigned int trace_flags = irqs_status;
2609 	unsigned int pc;
2610 
2611 	pc = preempt_count();
2612 
2613 	if (pc & NMI_MASK)
2614 		trace_flags |= TRACE_FLAG_NMI;
2615 	if (pc & HARDIRQ_MASK)
2616 		trace_flags |= TRACE_FLAG_HARDIRQ;
2617 	if (in_serving_softirq())
2618 		trace_flags |= TRACE_FLAG_SOFTIRQ;
2619 
2620 	if (tif_need_resched())
2621 		trace_flags |= TRACE_FLAG_NEED_RESCHED;
2622 	if (test_preempt_need_resched())
2623 		trace_flags |= TRACE_FLAG_PREEMPT_RESCHED;
2624 	return (trace_flags << 16) | (pc & 0xff);
2625 }
2626 
2627 struct ring_buffer_event *
2628 trace_buffer_lock_reserve(struct trace_buffer *buffer,
2629 			  int type,
2630 			  unsigned long len,
2631 			  unsigned int trace_ctx)
2632 {
2633 	return __trace_buffer_lock_reserve(buffer, type, len, trace_ctx);
2634 }
2635 
2636 DEFINE_PER_CPU(struct ring_buffer_event *, trace_buffered_event);
2637 DEFINE_PER_CPU(int, trace_buffered_event_cnt);
2638 static int trace_buffered_event_ref;
2639 
2640 /**
2641  * trace_buffered_event_enable - enable buffering events
2642  *
2643  * When events are being filtered, it is quicker to use a temporary
2644  * buffer to write the event data into if there's a likely chance
2645  * that it will not be committed. The discard of the ring buffer
2646  * is not as fast as committing, and is much slower than copying
2647  * a commit.
2648  *
2649  * When an event is to be filtered, allocate per cpu buffers to
2650  * write the event data into, and if the event is filtered and discarded
2651  * it is simply dropped, otherwise, the entire data is to be committed
2652  * in one shot.
2653  */
2654 void trace_buffered_event_enable(void)
2655 {
2656 	struct ring_buffer_event *event;
2657 	struct page *page;
2658 	int cpu;
2659 
2660 	WARN_ON_ONCE(!mutex_is_locked(&event_mutex));
2661 
2662 	if (trace_buffered_event_ref++)
2663 		return;
2664 
2665 	for_each_tracing_cpu(cpu) {
2666 		page = alloc_pages_node(cpu_to_node(cpu),
2667 					GFP_KERNEL | __GFP_NORETRY, 0);
2668 		if (!page)
2669 			goto failed;
2670 
2671 		event = page_address(page);
2672 		memset(event, 0, sizeof(*event));
2673 
2674 		per_cpu(trace_buffered_event, cpu) = event;
2675 
2676 		preempt_disable();
2677 		if (cpu == smp_processor_id() &&
2678 		    __this_cpu_read(trace_buffered_event) !=
2679 		    per_cpu(trace_buffered_event, cpu))
2680 			WARN_ON_ONCE(1);
2681 		preempt_enable();
2682 	}
2683 
2684 	return;
2685  failed:
2686 	trace_buffered_event_disable();
2687 }
2688 
2689 static void enable_trace_buffered_event(void *data)
2690 {
2691 	/* Probably not needed, but do it anyway */
2692 	smp_rmb();
2693 	this_cpu_dec(trace_buffered_event_cnt);
2694 }
2695 
2696 static void disable_trace_buffered_event(void *data)
2697 {
2698 	this_cpu_inc(trace_buffered_event_cnt);
2699 }
2700 
2701 /**
2702  * trace_buffered_event_disable - disable buffering events
2703  *
2704  * When a filter is removed, it is faster to not use the buffered
2705  * events, and to commit directly into the ring buffer. Free up
2706  * the temp buffers when there are no more users. This requires
2707  * special synchronization with current events.
2708  */
2709 void trace_buffered_event_disable(void)
2710 {
2711 	int cpu;
2712 
2713 	WARN_ON_ONCE(!mutex_is_locked(&event_mutex));
2714 
2715 	if (WARN_ON_ONCE(!trace_buffered_event_ref))
2716 		return;
2717 
2718 	if (--trace_buffered_event_ref)
2719 		return;
2720 
2721 	preempt_disable();
2722 	/* For each CPU, set the buffer as used. */
2723 	smp_call_function_many(tracing_buffer_mask,
2724 			       disable_trace_buffered_event, NULL, 1);
2725 	preempt_enable();
2726 
2727 	/* Wait for all current users to finish */
2728 	synchronize_rcu();
2729 
2730 	for_each_tracing_cpu(cpu) {
2731 		free_page((unsigned long)per_cpu(trace_buffered_event, cpu));
2732 		per_cpu(trace_buffered_event, cpu) = NULL;
2733 	}
2734 	/*
2735 	 * Make sure trace_buffered_event is NULL before clearing
2736 	 * trace_buffered_event_cnt.
2737 	 */
2738 	smp_wmb();
2739 
2740 	preempt_disable();
2741 	/* Do the work on each cpu */
2742 	smp_call_function_many(tracing_buffer_mask,
2743 			       enable_trace_buffered_event, NULL, 1);
2744 	preempt_enable();
2745 }
2746 
2747 static struct trace_buffer *temp_buffer;
2748 
2749 struct ring_buffer_event *
2750 trace_event_buffer_lock_reserve(struct trace_buffer **current_rb,
2751 			  struct trace_event_file *trace_file,
2752 			  int type, unsigned long len,
2753 			  unsigned int trace_ctx)
2754 {
2755 	struct ring_buffer_event *entry;
2756 	struct trace_array *tr = trace_file->tr;
2757 	int val;
2758 
2759 	*current_rb = tr->array_buffer.buffer;
2760 
2761 	if (!tr->no_filter_buffering_ref &&
2762 	    (trace_file->flags & (EVENT_FILE_FL_SOFT_DISABLED | EVENT_FILE_FL_FILTERED)) &&
2763 	    (entry = this_cpu_read(trace_buffered_event))) {
2764 		/*
2765 		 * Filtering is on, so try to use the per cpu buffer first.
2766 		 * This buffer will simulate a ring_buffer_event,
2767 		 * where the type_len is zero and the array[0] will
2768 		 * hold the full length.
2769 		 * (see include/linux/ring-buffer.h for details on
2770 		 *  how the ring_buffer_event is structured).
2771 		 *
2772 		 * Using a temp buffer during filtering and copying it
2773 		 * on a matched filter is quicker than writing directly
2774 		 * into the ring buffer and then discarding it when
2775 		 * it doesn't match. That is because the discard
2776 		 * requires several atomic operations to get right.
2777 		 * Copying on match and doing nothing on a failed match
2778 		 * is still quicker than no copy on match, but having
2779 		 * to discard out of the ring buffer on a failed match.
2780 		 */
2781 		int max_len = PAGE_SIZE - struct_size(entry, array, 1);
2782 
2783 		val = this_cpu_inc_return(trace_buffered_event_cnt);
2784 
2785 		/*
2786 		 * Preemption is disabled, but interrupts and NMIs
2787 		 * can still come in now. If that happens after
2788 		 * the above increment, then it will have to go
2789 		 * back to the old method of allocating the event
2790 		 * on the ring buffer, and if the filter fails, it
2791 		 * will have to call ring_buffer_discard_commit()
2792 		 * to remove it.
2793 		 *
2794 		 * Need to also check the unlikely case that the
2795 		 * length is bigger than the temp buffer size.
2796 		 * If that happens, then the reserve is pretty much
2797 		 * guaranteed to fail, as the ring buffer currently
2798 		 * only allows events less than a page. But that may
2799 		 * change in the future, so let the ring buffer reserve
2800 		 * handle the failure in that case.
2801 		 */
2802 		if (val == 1 && likely(len <= max_len)) {
2803 			trace_event_setup(entry, type, trace_ctx);
2804 			entry->array[0] = len;
2805 			return entry;
2806 		}
2807 		this_cpu_dec(trace_buffered_event_cnt);
2808 	}
2809 
2810 	entry = __trace_buffer_lock_reserve(*current_rb, type, len,
2811 					    trace_ctx);
2812 	/*
2813 	 * If tracing is off, but we have triggers enabled
2814 	 * we still need to look at the event data. Use the temp_buffer
2815 	 * to store the trace event for the trigger to use. It's recursive
2816 	 * safe and will not be recorded anywhere.
2817 	 */
2818 	if (!entry && trace_file->flags & EVENT_FILE_FL_TRIGGER_COND) {
2819 		*current_rb = temp_buffer;
2820 		entry = __trace_buffer_lock_reserve(*current_rb, type, len,
2821 						    trace_ctx);
2822 	}
2823 	return entry;
2824 }
2825 EXPORT_SYMBOL_GPL(trace_event_buffer_lock_reserve);
2826 
2827 static DEFINE_SPINLOCK(tracepoint_iter_lock);
2828 static DEFINE_MUTEX(tracepoint_printk_mutex);
2829 
2830 static void output_printk(struct trace_event_buffer *fbuffer)
2831 {
2832 	struct trace_event_call *event_call;
2833 	struct trace_event_file *file;
2834 	struct trace_event *event;
2835 	unsigned long flags;
2836 	struct trace_iterator *iter = tracepoint_print_iter;
2837 
2838 	/* We should never get here if iter is NULL */
2839 	if (WARN_ON_ONCE(!iter))
2840 		return;
2841 
2842 	event_call = fbuffer->trace_file->event_call;
2843 	if (!event_call || !event_call->event.funcs ||
2844 	    !event_call->event.funcs->trace)
2845 		return;
2846 
2847 	file = fbuffer->trace_file;
2848 	if (test_bit(EVENT_FILE_FL_SOFT_DISABLED_BIT, &file->flags) ||
2849 	    (unlikely(file->flags & EVENT_FILE_FL_FILTERED) &&
2850 	     !filter_match_preds(file->filter, fbuffer->entry)))
2851 		return;
2852 
2853 	event = &fbuffer->trace_file->event_call->event;
2854 
2855 	spin_lock_irqsave(&tracepoint_iter_lock, flags);
2856 	trace_seq_init(&iter->seq);
2857 	iter->ent = fbuffer->entry;
2858 	event_call->event.funcs->trace(iter, 0, event);
2859 	trace_seq_putc(&iter->seq, 0);
2860 	printk("%s", iter->seq.buffer);
2861 
2862 	spin_unlock_irqrestore(&tracepoint_iter_lock, flags);
2863 }
2864 
2865 int tracepoint_printk_sysctl(struct ctl_table *table, int write,
2866 			     void *buffer, size_t *lenp,
2867 			     loff_t *ppos)
2868 {
2869 	int save_tracepoint_printk;
2870 	int ret;
2871 
2872 	mutex_lock(&tracepoint_printk_mutex);
2873 	save_tracepoint_printk = tracepoint_printk;
2874 
2875 	ret = proc_dointvec(table, write, buffer, lenp, ppos);
2876 
2877 	/*
2878 	 * This will force exiting early, as tracepoint_printk
2879 	 * is always zero when tracepoint_printk_iter is not allocated
2880 	 */
2881 	if (!tracepoint_print_iter)
2882 		tracepoint_printk = 0;
2883 
2884 	if (save_tracepoint_printk == tracepoint_printk)
2885 		goto out;
2886 
2887 	if (tracepoint_printk)
2888 		static_key_enable(&tracepoint_printk_key.key);
2889 	else
2890 		static_key_disable(&tracepoint_printk_key.key);
2891 
2892  out:
2893 	mutex_unlock(&tracepoint_printk_mutex);
2894 
2895 	return ret;
2896 }
2897 
2898 void trace_event_buffer_commit(struct trace_event_buffer *fbuffer)
2899 {
2900 	enum event_trigger_type tt = ETT_NONE;
2901 	struct trace_event_file *file = fbuffer->trace_file;
2902 
2903 	if (__event_trigger_test_discard(file, fbuffer->buffer, fbuffer->event,
2904 			fbuffer->entry, &tt))
2905 		goto discard;
2906 
2907 	if (static_key_false(&tracepoint_printk_key.key))
2908 		output_printk(fbuffer);
2909 
2910 	if (static_branch_unlikely(&trace_event_exports_enabled))
2911 		ftrace_exports(fbuffer->event, TRACE_EXPORT_EVENT);
2912 
2913 	trace_buffer_unlock_commit_regs(file->tr, fbuffer->buffer,
2914 			fbuffer->event, fbuffer->trace_ctx, fbuffer->regs);
2915 
2916 discard:
2917 	if (tt)
2918 		event_triggers_post_call(file, tt);
2919 
2920 }
2921 EXPORT_SYMBOL_GPL(trace_event_buffer_commit);
2922 
2923 /*
2924  * Skip 3:
2925  *
2926  *   trace_buffer_unlock_commit_regs()
2927  *   trace_event_buffer_commit()
2928  *   trace_event_raw_event_xxx()
2929  */
2930 # define STACK_SKIP 3
2931 
2932 void trace_buffer_unlock_commit_regs(struct trace_array *tr,
2933 				     struct trace_buffer *buffer,
2934 				     struct ring_buffer_event *event,
2935 				     unsigned int trace_ctx,
2936 				     struct pt_regs *regs)
2937 {
2938 	__buffer_unlock_commit(buffer, event);
2939 
2940 	/*
2941 	 * If regs is not set, then skip the necessary functions.
2942 	 * Note, we can still get here via blktrace, wakeup tracer
2943 	 * and mmiotrace, but that's ok if they lose a function or
2944 	 * two. They are not that meaningful.
2945 	 */
2946 	ftrace_trace_stack(tr, buffer, trace_ctx, regs ? 0 : STACK_SKIP, regs);
2947 	ftrace_trace_userstack(tr, buffer, trace_ctx);
2948 }
2949 
2950 /*
2951  * Similar to trace_buffer_unlock_commit_regs() but do not dump stack.
2952  */
2953 void
2954 trace_buffer_unlock_commit_nostack(struct trace_buffer *buffer,
2955 				   struct ring_buffer_event *event)
2956 {
2957 	__buffer_unlock_commit(buffer, event);
2958 }
2959 
2960 void
2961 trace_function(struct trace_array *tr, unsigned long ip, unsigned long
2962 	       parent_ip, unsigned int trace_ctx)
2963 {
2964 	struct trace_event_call *call = &event_function;
2965 	struct trace_buffer *buffer = tr->array_buffer.buffer;
2966 	struct ring_buffer_event *event;
2967 	struct ftrace_entry *entry;
2968 
2969 	event = __trace_buffer_lock_reserve(buffer, TRACE_FN, sizeof(*entry),
2970 					    trace_ctx);
2971 	if (!event)
2972 		return;
2973 	entry	= ring_buffer_event_data(event);
2974 	entry->ip			= ip;
2975 	entry->parent_ip		= parent_ip;
2976 
2977 	if (!call_filter_check_discard(call, entry, buffer, event)) {
2978 		if (static_branch_unlikely(&trace_function_exports_enabled))
2979 			ftrace_exports(event, TRACE_EXPORT_FUNCTION);
2980 		__buffer_unlock_commit(buffer, event);
2981 	}
2982 }
2983 
2984 #ifdef CONFIG_STACKTRACE
2985 
2986 /* Allow 4 levels of nesting: normal, softirq, irq, NMI */
2987 #define FTRACE_KSTACK_NESTING	4
2988 
2989 #define FTRACE_KSTACK_ENTRIES	(PAGE_SIZE / FTRACE_KSTACK_NESTING)
2990 
2991 struct ftrace_stack {
2992 	unsigned long		calls[FTRACE_KSTACK_ENTRIES];
2993 };
2994 
2995 
2996 struct ftrace_stacks {
2997 	struct ftrace_stack	stacks[FTRACE_KSTACK_NESTING];
2998 };
2999 
3000 static DEFINE_PER_CPU(struct ftrace_stacks, ftrace_stacks);
3001 static DEFINE_PER_CPU(int, ftrace_stack_reserve);
3002 
3003 static void __ftrace_trace_stack(struct trace_buffer *buffer,
3004 				 unsigned int trace_ctx,
3005 				 int skip, struct pt_regs *regs)
3006 {
3007 	struct trace_event_call *call = &event_kernel_stack;
3008 	struct ring_buffer_event *event;
3009 	unsigned int size, nr_entries;
3010 	struct ftrace_stack *fstack;
3011 	struct stack_entry *entry;
3012 	int stackidx;
3013 
3014 	/*
3015 	 * Add one, for this function and the call to save_stack_trace()
3016 	 * If regs is set, then these functions will not be in the way.
3017 	 */
3018 #ifndef CONFIG_UNWINDER_ORC
3019 	if (!regs)
3020 		skip++;
3021 #endif
3022 
3023 	preempt_disable_notrace();
3024 
3025 	stackidx = __this_cpu_inc_return(ftrace_stack_reserve) - 1;
3026 
3027 	/* This should never happen. If it does, yell once and skip */
3028 	if (WARN_ON_ONCE(stackidx >= FTRACE_KSTACK_NESTING))
3029 		goto out;
3030 
3031 	/*
3032 	 * The above __this_cpu_inc_return() is 'atomic' cpu local. An
3033 	 * interrupt will either see the value pre increment or post
3034 	 * increment. If the interrupt happens pre increment it will have
3035 	 * restored the counter when it returns.  We just need a barrier to
3036 	 * keep gcc from moving things around.
3037 	 */
3038 	barrier();
3039 
3040 	fstack = this_cpu_ptr(ftrace_stacks.stacks) + stackidx;
3041 	size = ARRAY_SIZE(fstack->calls);
3042 
3043 	if (regs) {
3044 		nr_entries = stack_trace_save_regs(regs, fstack->calls,
3045 						   size, skip);
3046 	} else {
3047 		nr_entries = stack_trace_save(fstack->calls, size, skip);
3048 	}
3049 
3050 	size = nr_entries * sizeof(unsigned long);
3051 	event = __trace_buffer_lock_reserve(buffer, TRACE_STACK,
3052 				    (sizeof(*entry) - sizeof(entry->caller)) + size,
3053 				    trace_ctx);
3054 	if (!event)
3055 		goto out;
3056 	entry = ring_buffer_event_data(event);
3057 
3058 	memcpy(&entry->caller, fstack->calls, size);
3059 	entry->size = nr_entries;
3060 
3061 	if (!call_filter_check_discard(call, entry, buffer, event))
3062 		__buffer_unlock_commit(buffer, event);
3063 
3064  out:
3065 	/* Again, don't let gcc optimize things here */
3066 	barrier();
3067 	__this_cpu_dec(ftrace_stack_reserve);
3068 	preempt_enable_notrace();
3069 
3070 }
3071 
3072 static inline void ftrace_trace_stack(struct trace_array *tr,
3073 				      struct trace_buffer *buffer,
3074 				      unsigned int trace_ctx,
3075 				      int skip, struct pt_regs *regs)
3076 {
3077 	if (!(tr->trace_flags & TRACE_ITER_STACKTRACE))
3078 		return;
3079 
3080 	__ftrace_trace_stack(buffer, trace_ctx, skip, regs);
3081 }
3082 
3083 void __trace_stack(struct trace_array *tr, unsigned int trace_ctx,
3084 		   int skip)
3085 {
3086 	struct trace_buffer *buffer = tr->array_buffer.buffer;
3087 
3088 	if (rcu_is_watching()) {
3089 		__ftrace_trace_stack(buffer, trace_ctx, skip, NULL);
3090 		return;
3091 	}
3092 
3093 	/*
3094 	 * When an NMI triggers, RCU is enabled via rcu_nmi_enter(),
3095 	 * but if the above rcu_is_watching() failed, then the NMI
3096 	 * triggered someplace critical, and rcu_irq_enter() should
3097 	 * not be called from NMI.
3098 	 */
3099 	if (unlikely(in_nmi()))
3100 		return;
3101 
3102 	rcu_irq_enter_irqson();
3103 	__ftrace_trace_stack(buffer, trace_ctx, skip, NULL);
3104 	rcu_irq_exit_irqson();
3105 }
3106 
3107 /**
3108  * trace_dump_stack - record a stack back trace in the trace buffer
3109  * @skip: Number of functions to skip (helper handlers)
3110  */
3111 void trace_dump_stack(int skip)
3112 {
3113 	if (tracing_disabled || tracing_selftest_running)
3114 		return;
3115 
3116 #ifndef CONFIG_UNWINDER_ORC
3117 	/* Skip 1 to skip this function. */
3118 	skip++;
3119 #endif
3120 	__ftrace_trace_stack(global_trace.array_buffer.buffer,
3121 			     tracing_gen_ctx(), skip, NULL);
3122 }
3123 EXPORT_SYMBOL_GPL(trace_dump_stack);
3124 
3125 #ifdef CONFIG_USER_STACKTRACE_SUPPORT
3126 static DEFINE_PER_CPU(int, user_stack_count);
3127 
3128 static void
3129 ftrace_trace_userstack(struct trace_array *tr,
3130 		       struct trace_buffer *buffer, unsigned int trace_ctx)
3131 {
3132 	struct trace_event_call *call = &event_user_stack;
3133 	struct ring_buffer_event *event;
3134 	struct userstack_entry *entry;
3135 
3136 	if (!(tr->trace_flags & TRACE_ITER_USERSTACKTRACE))
3137 		return;
3138 
3139 	/*
3140 	 * NMIs can not handle page faults, even with fix ups.
3141 	 * The save user stack can (and often does) fault.
3142 	 */
3143 	if (unlikely(in_nmi()))
3144 		return;
3145 
3146 	/*
3147 	 * prevent recursion, since the user stack tracing may
3148 	 * trigger other kernel events.
3149 	 */
3150 	preempt_disable();
3151 	if (__this_cpu_read(user_stack_count))
3152 		goto out;
3153 
3154 	__this_cpu_inc(user_stack_count);
3155 
3156 	event = __trace_buffer_lock_reserve(buffer, TRACE_USER_STACK,
3157 					    sizeof(*entry), trace_ctx);
3158 	if (!event)
3159 		goto out_drop_count;
3160 	entry	= ring_buffer_event_data(event);
3161 
3162 	entry->tgid		= current->tgid;
3163 	memset(&entry->caller, 0, sizeof(entry->caller));
3164 
3165 	stack_trace_save_user(entry->caller, FTRACE_STACK_ENTRIES);
3166 	if (!call_filter_check_discard(call, entry, buffer, event))
3167 		__buffer_unlock_commit(buffer, event);
3168 
3169  out_drop_count:
3170 	__this_cpu_dec(user_stack_count);
3171  out:
3172 	preempt_enable();
3173 }
3174 #else /* CONFIG_USER_STACKTRACE_SUPPORT */
3175 static void ftrace_trace_userstack(struct trace_array *tr,
3176 				   struct trace_buffer *buffer,
3177 				   unsigned int trace_ctx)
3178 {
3179 }
3180 #endif /* !CONFIG_USER_STACKTRACE_SUPPORT */
3181 
3182 #endif /* CONFIG_STACKTRACE */
3183 
3184 static inline void
3185 func_repeats_set_delta_ts(struct func_repeats_entry *entry,
3186 			  unsigned long long delta)
3187 {
3188 	entry->bottom_delta_ts = delta & U32_MAX;
3189 	entry->top_delta_ts = (delta >> 32);
3190 }
3191 
3192 void trace_last_func_repeats(struct trace_array *tr,
3193 			     struct trace_func_repeats *last_info,
3194 			     unsigned int trace_ctx)
3195 {
3196 	struct trace_buffer *buffer = tr->array_buffer.buffer;
3197 	struct func_repeats_entry *entry;
3198 	struct ring_buffer_event *event;
3199 	u64 delta;
3200 
3201 	event = __trace_buffer_lock_reserve(buffer, TRACE_FUNC_REPEATS,
3202 					    sizeof(*entry), trace_ctx);
3203 	if (!event)
3204 		return;
3205 
3206 	delta = ring_buffer_event_time_stamp(buffer, event) -
3207 		last_info->ts_last_call;
3208 
3209 	entry = ring_buffer_event_data(event);
3210 	entry->ip = last_info->ip;
3211 	entry->parent_ip = last_info->parent_ip;
3212 	entry->count = last_info->count;
3213 	func_repeats_set_delta_ts(entry, delta);
3214 
3215 	__buffer_unlock_commit(buffer, event);
3216 }
3217 
3218 /* created for use with alloc_percpu */
3219 struct trace_buffer_struct {
3220 	int nesting;
3221 	char buffer[4][TRACE_BUF_SIZE];
3222 };
3223 
3224 static struct trace_buffer_struct *trace_percpu_buffer;
3225 
3226 /*
3227  * This allows for lockless recording.  If we're nested too deeply, then
3228  * this returns NULL.
3229  */
3230 static char *get_trace_buf(void)
3231 {
3232 	struct trace_buffer_struct *buffer = this_cpu_ptr(trace_percpu_buffer);
3233 
3234 	if (!buffer || buffer->nesting >= 4)
3235 		return NULL;
3236 
3237 	buffer->nesting++;
3238 
3239 	/* Interrupts must see nesting incremented before we use the buffer */
3240 	barrier();
3241 	return &buffer->buffer[buffer->nesting - 1][0];
3242 }
3243 
3244 static void put_trace_buf(void)
3245 {
3246 	/* Don't let the decrement of nesting leak before this */
3247 	barrier();
3248 	this_cpu_dec(trace_percpu_buffer->nesting);
3249 }
3250 
3251 static int alloc_percpu_trace_buffer(void)
3252 {
3253 	struct trace_buffer_struct *buffers;
3254 
3255 	if (trace_percpu_buffer)
3256 		return 0;
3257 
3258 	buffers = alloc_percpu(struct trace_buffer_struct);
3259 	if (MEM_FAIL(!buffers, "Could not allocate percpu trace_printk buffer"))
3260 		return -ENOMEM;
3261 
3262 	trace_percpu_buffer = buffers;
3263 	return 0;
3264 }
3265 
3266 static int buffers_allocated;
3267 
3268 void trace_printk_init_buffers(void)
3269 {
3270 	if (buffers_allocated)
3271 		return;
3272 
3273 	if (alloc_percpu_trace_buffer())
3274 		return;
3275 
3276 	/* trace_printk() is for debug use only. Don't use it in production. */
3277 
3278 	pr_warn("\n");
3279 	pr_warn("**********************************************************\n");
3280 	pr_warn("**   NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE   **\n");
3281 	pr_warn("**                                                      **\n");
3282 	pr_warn("** trace_printk() being used. Allocating extra memory.  **\n");
3283 	pr_warn("**                                                      **\n");
3284 	pr_warn("** This means that this is a DEBUG kernel and it is     **\n");
3285 	pr_warn("** unsafe for production use.                           **\n");
3286 	pr_warn("**                                                      **\n");
3287 	pr_warn("** If you see this message and you are not debugging    **\n");
3288 	pr_warn("** the kernel, report this immediately to your vendor!  **\n");
3289 	pr_warn("**                                                      **\n");
3290 	pr_warn("**   NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE   **\n");
3291 	pr_warn("**********************************************************\n");
3292 
3293 	/* Expand the buffers to set size */
3294 	tracing_update_buffers();
3295 
3296 	buffers_allocated = 1;
3297 
3298 	/*
3299 	 * trace_printk_init_buffers() can be called by modules.
3300 	 * If that happens, then we need to start cmdline recording
3301 	 * directly here. If the global_trace.buffer is already
3302 	 * allocated here, then this was called by module code.
3303 	 */
3304 	if (global_trace.array_buffer.buffer)
3305 		tracing_start_cmdline_record();
3306 }
3307 EXPORT_SYMBOL_GPL(trace_printk_init_buffers);
3308 
3309 void trace_printk_start_comm(void)
3310 {
3311 	/* Start tracing comms if trace printk is set */
3312 	if (!buffers_allocated)
3313 		return;
3314 	tracing_start_cmdline_record();
3315 }
3316 
3317 static void trace_printk_start_stop_comm(int enabled)
3318 {
3319 	if (!buffers_allocated)
3320 		return;
3321 
3322 	if (enabled)
3323 		tracing_start_cmdline_record();
3324 	else
3325 		tracing_stop_cmdline_record();
3326 }
3327 
3328 /**
3329  * trace_vbprintk - write binary msg to tracing buffer
3330  * @ip:    The address of the caller
3331  * @fmt:   The string format to write to the buffer
3332  * @args:  Arguments for @fmt
3333  */
3334 int trace_vbprintk(unsigned long ip, const char *fmt, va_list args)
3335 {
3336 	struct trace_event_call *call = &event_bprint;
3337 	struct ring_buffer_event *event;
3338 	struct trace_buffer *buffer;
3339 	struct trace_array *tr = &global_trace;
3340 	struct bprint_entry *entry;
3341 	unsigned int trace_ctx;
3342 	char *tbuffer;
3343 	int len = 0, size;
3344 
3345 	if (unlikely(tracing_selftest_running || tracing_disabled))
3346 		return 0;
3347 
3348 	/* Don't pollute graph traces with trace_vprintk internals */
3349 	pause_graph_tracing();
3350 
3351 	trace_ctx = tracing_gen_ctx();
3352 	preempt_disable_notrace();
3353 
3354 	tbuffer = get_trace_buf();
3355 	if (!tbuffer) {
3356 		len = 0;
3357 		goto out_nobuffer;
3358 	}
3359 
3360 	len = vbin_printf((u32 *)tbuffer, TRACE_BUF_SIZE/sizeof(int), fmt, args);
3361 
3362 	if (len > TRACE_BUF_SIZE/sizeof(int) || len < 0)
3363 		goto out_put;
3364 
3365 	size = sizeof(*entry) + sizeof(u32) * len;
3366 	buffer = tr->array_buffer.buffer;
3367 	ring_buffer_nest_start(buffer);
3368 	event = __trace_buffer_lock_reserve(buffer, TRACE_BPRINT, size,
3369 					    trace_ctx);
3370 	if (!event)
3371 		goto out;
3372 	entry = ring_buffer_event_data(event);
3373 	entry->ip			= ip;
3374 	entry->fmt			= fmt;
3375 
3376 	memcpy(entry->buf, tbuffer, sizeof(u32) * len);
3377 	if (!call_filter_check_discard(call, entry, buffer, event)) {
3378 		__buffer_unlock_commit(buffer, event);
3379 		ftrace_trace_stack(tr, buffer, trace_ctx, 6, NULL);
3380 	}
3381 
3382 out:
3383 	ring_buffer_nest_end(buffer);
3384 out_put:
3385 	put_trace_buf();
3386 
3387 out_nobuffer:
3388 	preempt_enable_notrace();
3389 	unpause_graph_tracing();
3390 
3391 	return len;
3392 }
3393 EXPORT_SYMBOL_GPL(trace_vbprintk);
3394 
3395 __printf(3, 0)
3396 static int
3397 __trace_array_vprintk(struct trace_buffer *buffer,
3398 		      unsigned long ip, const char *fmt, va_list args)
3399 {
3400 	struct trace_event_call *call = &event_print;
3401 	struct ring_buffer_event *event;
3402 	int len = 0, size;
3403 	struct print_entry *entry;
3404 	unsigned int trace_ctx;
3405 	char *tbuffer;
3406 
3407 	if (tracing_disabled || tracing_selftest_running)
3408 		return 0;
3409 
3410 	/* Don't pollute graph traces with trace_vprintk internals */
3411 	pause_graph_tracing();
3412 
3413 	trace_ctx = tracing_gen_ctx();
3414 	preempt_disable_notrace();
3415 
3416 
3417 	tbuffer = get_trace_buf();
3418 	if (!tbuffer) {
3419 		len = 0;
3420 		goto out_nobuffer;
3421 	}
3422 
3423 	len = vscnprintf(tbuffer, TRACE_BUF_SIZE, fmt, args);
3424 
3425 	size = sizeof(*entry) + len + 1;
3426 	ring_buffer_nest_start(buffer);
3427 	event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
3428 					    trace_ctx);
3429 	if (!event)
3430 		goto out;
3431 	entry = ring_buffer_event_data(event);
3432 	entry->ip = ip;
3433 
3434 	memcpy(&entry->buf, tbuffer, len + 1);
3435 	if (!call_filter_check_discard(call, entry, buffer, event)) {
3436 		__buffer_unlock_commit(buffer, event);
3437 		ftrace_trace_stack(&global_trace, buffer, trace_ctx, 6, NULL);
3438 	}
3439 
3440 out:
3441 	ring_buffer_nest_end(buffer);
3442 	put_trace_buf();
3443 
3444 out_nobuffer:
3445 	preempt_enable_notrace();
3446 	unpause_graph_tracing();
3447 
3448 	return len;
3449 }
3450 
3451 __printf(3, 0)
3452 int trace_array_vprintk(struct trace_array *tr,
3453 			unsigned long ip, const char *fmt, va_list args)
3454 {
3455 	return __trace_array_vprintk(tr->array_buffer.buffer, ip, fmt, args);
3456 }
3457 
3458 /**
3459  * trace_array_printk - Print a message to a specific instance
3460  * @tr: The instance trace_array descriptor
3461  * @ip: The instruction pointer that this is called from.
3462  * @fmt: The format to print (printf format)
3463  *
3464  * If a subsystem sets up its own instance, they have the right to
3465  * printk strings into their tracing instance buffer using this
3466  * function. Note, this function will not write into the top level
3467  * buffer (use trace_printk() for that), as writing into the top level
3468  * buffer should only have events that can be individually disabled.
3469  * trace_printk() is only used for debugging a kernel, and should not
3470  * be ever incorporated in normal use.
3471  *
3472  * trace_array_printk() can be used, as it will not add noise to the
3473  * top level tracing buffer.
3474  *
3475  * Note, trace_array_init_printk() must be called on @tr before this
3476  * can be used.
3477  */
3478 __printf(3, 0)
3479 int trace_array_printk(struct trace_array *tr,
3480 		       unsigned long ip, const char *fmt, ...)
3481 {
3482 	int ret;
3483 	va_list ap;
3484 
3485 	if (!tr)
3486 		return -ENOENT;
3487 
3488 	/* This is only allowed for created instances */
3489 	if (tr == &global_trace)
3490 		return 0;
3491 
3492 	if (!(tr->trace_flags & TRACE_ITER_PRINTK))
3493 		return 0;
3494 
3495 	va_start(ap, fmt);
3496 	ret = trace_array_vprintk(tr, ip, fmt, ap);
3497 	va_end(ap);
3498 	return ret;
3499 }
3500 EXPORT_SYMBOL_GPL(trace_array_printk);
3501 
3502 /**
3503  * trace_array_init_printk - Initialize buffers for trace_array_printk()
3504  * @tr: The trace array to initialize the buffers for
3505  *
3506  * As trace_array_printk() only writes into instances, they are OK to
3507  * have in the kernel (unlike trace_printk()). This needs to be called
3508  * before trace_array_printk() can be used on a trace_array.
3509  */
3510 int trace_array_init_printk(struct trace_array *tr)
3511 {
3512 	if (!tr)
3513 		return -ENOENT;
3514 
3515 	/* This is only allowed for created instances */
3516 	if (tr == &global_trace)
3517 		return -EINVAL;
3518 
3519 	return alloc_percpu_trace_buffer();
3520 }
3521 EXPORT_SYMBOL_GPL(trace_array_init_printk);
3522 
3523 __printf(3, 4)
3524 int trace_array_printk_buf(struct trace_buffer *buffer,
3525 			   unsigned long ip, const char *fmt, ...)
3526 {
3527 	int ret;
3528 	va_list ap;
3529 
3530 	if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
3531 		return 0;
3532 
3533 	va_start(ap, fmt);
3534 	ret = __trace_array_vprintk(buffer, ip, fmt, ap);
3535 	va_end(ap);
3536 	return ret;
3537 }
3538 
3539 __printf(2, 0)
3540 int trace_vprintk(unsigned long ip, const char *fmt, va_list args)
3541 {
3542 	return trace_array_vprintk(&global_trace, ip, fmt, args);
3543 }
3544 EXPORT_SYMBOL_GPL(trace_vprintk);
3545 
3546 static void trace_iterator_increment(struct trace_iterator *iter)
3547 {
3548 	struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, iter->cpu);
3549 
3550 	iter->idx++;
3551 	if (buf_iter)
3552 		ring_buffer_iter_advance(buf_iter);
3553 }
3554 
3555 static struct trace_entry *
3556 peek_next_entry(struct trace_iterator *iter, int cpu, u64 *ts,
3557 		unsigned long *lost_events)
3558 {
3559 	struct ring_buffer_event *event;
3560 	struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, cpu);
3561 
3562 	if (buf_iter) {
3563 		event = ring_buffer_iter_peek(buf_iter, ts);
3564 		if (lost_events)
3565 			*lost_events = ring_buffer_iter_dropped(buf_iter) ?
3566 				(unsigned long)-1 : 0;
3567 	} else {
3568 		event = ring_buffer_peek(iter->array_buffer->buffer, cpu, ts,
3569 					 lost_events);
3570 	}
3571 
3572 	if (event) {
3573 		iter->ent_size = ring_buffer_event_length(event);
3574 		return ring_buffer_event_data(event);
3575 	}
3576 	iter->ent_size = 0;
3577 	return NULL;
3578 }
3579 
3580 static struct trace_entry *
3581 __find_next_entry(struct trace_iterator *iter, int *ent_cpu,
3582 		  unsigned long *missing_events, u64 *ent_ts)
3583 {
3584 	struct trace_buffer *buffer = iter->array_buffer->buffer;
3585 	struct trace_entry *ent, *next = NULL;
3586 	unsigned long lost_events = 0, next_lost = 0;
3587 	int cpu_file = iter->cpu_file;
3588 	u64 next_ts = 0, ts;
3589 	int next_cpu = -1;
3590 	int next_size = 0;
3591 	int cpu;
3592 
3593 	/*
3594 	 * If we are in a per_cpu trace file, don't bother by iterating over
3595 	 * all cpu and peek directly.
3596 	 */
3597 	if (cpu_file > RING_BUFFER_ALL_CPUS) {
3598 		if (ring_buffer_empty_cpu(buffer, cpu_file))
3599 			return NULL;
3600 		ent = peek_next_entry(iter, cpu_file, ent_ts, missing_events);
3601 		if (ent_cpu)
3602 			*ent_cpu = cpu_file;
3603 
3604 		return ent;
3605 	}
3606 
3607 	for_each_tracing_cpu(cpu) {
3608 
3609 		if (ring_buffer_empty_cpu(buffer, cpu))
3610 			continue;
3611 
3612 		ent = peek_next_entry(iter, cpu, &ts, &lost_events);
3613 
3614 		/*
3615 		 * Pick the entry with the smallest timestamp:
3616 		 */
3617 		if (ent && (!next || ts < next_ts)) {
3618 			next = ent;
3619 			next_cpu = cpu;
3620 			next_ts = ts;
3621 			next_lost = lost_events;
3622 			next_size = iter->ent_size;
3623 		}
3624 	}
3625 
3626 	iter->ent_size = next_size;
3627 
3628 	if (ent_cpu)
3629 		*ent_cpu = next_cpu;
3630 
3631 	if (ent_ts)
3632 		*ent_ts = next_ts;
3633 
3634 	if (missing_events)
3635 		*missing_events = next_lost;
3636 
3637 	return next;
3638 }
3639 
3640 #define STATIC_FMT_BUF_SIZE	128
3641 static char static_fmt_buf[STATIC_FMT_BUF_SIZE];
3642 
3643 static char *trace_iter_expand_format(struct trace_iterator *iter)
3644 {
3645 	char *tmp;
3646 
3647 	/*
3648 	 * iter->tr is NULL when used with tp_printk, which makes
3649 	 * this get called where it is not safe to call krealloc().
3650 	 */
3651 	if (!iter->tr || iter->fmt == static_fmt_buf)
3652 		return NULL;
3653 
3654 	tmp = krealloc(iter->fmt, iter->fmt_size + STATIC_FMT_BUF_SIZE,
3655 		       GFP_KERNEL);
3656 	if (tmp) {
3657 		iter->fmt_size += STATIC_FMT_BUF_SIZE;
3658 		iter->fmt = tmp;
3659 	}
3660 
3661 	return tmp;
3662 }
3663 
3664 /* Returns true if the string is safe to dereference from an event */
3665 static bool trace_safe_str(struct trace_iterator *iter, const char *str)
3666 {
3667 	unsigned long addr = (unsigned long)str;
3668 	struct trace_event *trace_event;
3669 	struct trace_event_call *event;
3670 
3671 	/* OK if part of the event data */
3672 	if ((addr >= (unsigned long)iter->ent) &&
3673 	    (addr < (unsigned long)iter->ent + iter->ent_size))
3674 		return true;
3675 
3676 	/* OK if part of the temp seq buffer */
3677 	if ((addr >= (unsigned long)iter->tmp_seq.buffer) &&
3678 	    (addr < (unsigned long)iter->tmp_seq.buffer + PAGE_SIZE))
3679 		return true;
3680 
3681 	/* Core rodata can not be freed */
3682 	if (is_kernel_rodata(addr))
3683 		return true;
3684 
3685 	if (trace_is_tracepoint_string(str))
3686 		return true;
3687 
3688 	/*
3689 	 * Now this could be a module event, referencing core module
3690 	 * data, which is OK.
3691 	 */
3692 	if (!iter->ent)
3693 		return false;
3694 
3695 	trace_event = ftrace_find_event(iter->ent->type);
3696 	if (!trace_event)
3697 		return false;
3698 
3699 	event = container_of(trace_event, struct trace_event_call, event);
3700 	if ((event->flags & TRACE_EVENT_FL_DYNAMIC) || !event->module)
3701 		return false;
3702 
3703 	/* Would rather have rodata, but this will suffice */
3704 	if (within_module_core(addr, event->module))
3705 		return true;
3706 
3707 	return false;
3708 }
3709 
3710 static const char *show_buffer(struct trace_seq *s)
3711 {
3712 	struct seq_buf *seq = &s->seq;
3713 
3714 	seq_buf_terminate(seq);
3715 
3716 	return seq->buffer;
3717 }
3718 
3719 static DEFINE_STATIC_KEY_FALSE(trace_no_verify);
3720 
3721 static int test_can_verify_check(const char *fmt, ...)
3722 {
3723 	char buf[16];
3724 	va_list ap;
3725 	int ret;
3726 
3727 	/*
3728 	 * The verifier is dependent on vsnprintf() modifies the va_list
3729 	 * passed to it, where it is sent as a reference. Some architectures
3730 	 * (like x86_32) passes it by value, which means that vsnprintf()
3731 	 * does not modify the va_list passed to it, and the verifier
3732 	 * would then need to be able to understand all the values that
3733 	 * vsnprintf can use. If it is passed by value, then the verifier
3734 	 * is disabled.
3735 	 */
3736 	va_start(ap, fmt);
3737 	vsnprintf(buf, 16, "%d", ap);
3738 	ret = va_arg(ap, int);
3739 	va_end(ap);
3740 
3741 	return ret;
3742 }
3743 
3744 static void test_can_verify(void)
3745 {
3746 	if (!test_can_verify_check("%d %d", 0, 1)) {
3747 		pr_info("trace event string verifier disabled\n");
3748 		static_branch_inc(&trace_no_verify);
3749 	}
3750 }
3751 
3752 /**
3753  * trace_check_vprintf - Check dereferenced strings while writing to the seq buffer
3754  * @iter: The iterator that holds the seq buffer and the event being printed
3755  * @fmt: The format used to print the event
3756  * @ap: The va_list holding the data to print from @fmt.
3757  *
3758  * This writes the data into the @iter->seq buffer using the data from
3759  * @fmt and @ap. If the format has a %s, then the source of the string
3760  * is examined to make sure it is safe to print, otherwise it will
3761  * warn and print "[UNSAFE MEMORY]" in place of the dereferenced string
3762  * pointer.
3763  */
3764 void trace_check_vprintf(struct trace_iterator *iter, const char *fmt,
3765 			 va_list ap)
3766 {
3767 	const char *p = fmt;
3768 	const char *str;
3769 	int i, j;
3770 
3771 	if (WARN_ON_ONCE(!fmt))
3772 		return;
3773 
3774 	if (static_branch_unlikely(&trace_no_verify))
3775 		goto print;
3776 
3777 	/* Don't bother checking when doing a ftrace_dump() */
3778 	if (iter->fmt == static_fmt_buf)
3779 		goto print;
3780 
3781 	while (*p) {
3782 		bool star = false;
3783 		int len = 0;
3784 
3785 		j = 0;
3786 
3787 		/* We only care about %s and variants */
3788 		for (i = 0; p[i]; i++) {
3789 			if (i + 1 >= iter->fmt_size) {
3790 				/*
3791 				 * If we can't expand the copy buffer,
3792 				 * just print it.
3793 				 */
3794 				if (!trace_iter_expand_format(iter))
3795 					goto print;
3796 			}
3797 
3798 			if (p[i] == '\\' && p[i+1]) {
3799 				i++;
3800 				continue;
3801 			}
3802 			if (p[i] == '%') {
3803 				/* Need to test cases like %08.*s */
3804 				for (j = 1; p[i+j]; j++) {
3805 					if (isdigit(p[i+j]) ||
3806 					    p[i+j] == '.')
3807 						continue;
3808 					if (p[i+j] == '*') {
3809 						star = true;
3810 						continue;
3811 					}
3812 					break;
3813 				}
3814 				if (p[i+j] == 's')
3815 					break;
3816 				star = false;
3817 			}
3818 			j = 0;
3819 		}
3820 		/* If no %s found then just print normally */
3821 		if (!p[i])
3822 			break;
3823 
3824 		/* Copy up to the %s, and print that */
3825 		strncpy(iter->fmt, p, i);
3826 		iter->fmt[i] = '\0';
3827 		trace_seq_vprintf(&iter->seq, iter->fmt, ap);
3828 
3829 		if (star)
3830 			len = va_arg(ap, int);
3831 
3832 		/* The ap now points to the string data of the %s */
3833 		str = va_arg(ap, const char *);
3834 
3835 		/*
3836 		 * If you hit this warning, it is likely that the
3837 		 * trace event in question used %s on a string that
3838 		 * was saved at the time of the event, but may not be
3839 		 * around when the trace is read. Use __string(),
3840 		 * __assign_str() and __get_str() helpers in the TRACE_EVENT()
3841 		 * instead. See samples/trace_events/trace-events-sample.h
3842 		 * for reference.
3843 		 */
3844 		if (WARN_ONCE(!trace_safe_str(iter, str),
3845 			      "fmt: '%s' current_buffer: '%s'",
3846 			      fmt, show_buffer(&iter->seq))) {
3847 			int ret;
3848 
3849 			/* Try to safely read the string */
3850 			if (star) {
3851 				if (len + 1 > iter->fmt_size)
3852 					len = iter->fmt_size - 1;
3853 				if (len < 0)
3854 					len = 0;
3855 				ret = copy_from_kernel_nofault(iter->fmt, str, len);
3856 				iter->fmt[len] = 0;
3857 				star = false;
3858 			} else {
3859 				ret = strncpy_from_kernel_nofault(iter->fmt, str,
3860 								  iter->fmt_size);
3861 			}
3862 			if (ret < 0)
3863 				trace_seq_printf(&iter->seq, "(0x%px)", str);
3864 			else
3865 				trace_seq_printf(&iter->seq, "(0x%px:%s)",
3866 						 str, iter->fmt);
3867 			str = "[UNSAFE-MEMORY]";
3868 			strcpy(iter->fmt, "%s");
3869 		} else {
3870 			strncpy(iter->fmt, p + i, j + 1);
3871 			iter->fmt[j+1] = '\0';
3872 		}
3873 		if (star)
3874 			trace_seq_printf(&iter->seq, iter->fmt, len, str);
3875 		else
3876 			trace_seq_printf(&iter->seq, iter->fmt, str);
3877 
3878 		p += i + j + 1;
3879 	}
3880  print:
3881 	if (*p)
3882 		trace_seq_vprintf(&iter->seq, p, ap);
3883 }
3884 
3885 const char *trace_event_format(struct trace_iterator *iter, const char *fmt)
3886 {
3887 	const char *p, *new_fmt;
3888 	char *q;
3889 
3890 	if (WARN_ON_ONCE(!fmt))
3891 		return fmt;
3892 
3893 	if (!iter->tr || iter->tr->trace_flags & TRACE_ITER_HASH_PTR)
3894 		return fmt;
3895 
3896 	p = fmt;
3897 	new_fmt = q = iter->fmt;
3898 	while (*p) {
3899 		if (unlikely(q - new_fmt + 3 > iter->fmt_size)) {
3900 			if (!trace_iter_expand_format(iter))
3901 				return fmt;
3902 
3903 			q += iter->fmt - new_fmt;
3904 			new_fmt = iter->fmt;
3905 		}
3906 
3907 		*q++ = *p++;
3908 
3909 		/* Replace %p with %px */
3910 		if (p[-1] == '%') {
3911 			if (p[0] == '%') {
3912 				*q++ = *p++;
3913 			} else if (p[0] == 'p' && !isalnum(p[1])) {
3914 				*q++ = *p++;
3915 				*q++ = 'x';
3916 			}
3917 		}
3918 	}
3919 	*q = '\0';
3920 
3921 	return new_fmt;
3922 }
3923 
3924 #define STATIC_TEMP_BUF_SIZE	128
3925 static char static_temp_buf[STATIC_TEMP_BUF_SIZE] __aligned(4);
3926 
3927 /* Find the next real entry, without updating the iterator itself */
3928 struct trace_entry *trace_find_next_entry(struct trace_iterator *iter,
3929 					  int *ent_cpu, u64 *ent_ts)
3930 {
3931 	/* __find_next_entry will reset ent_size */
3932 	int ent_size = iter->ent_size;
3933 	struct trace_entry *entry;
3934 
3935 	/*
3936 	 * If called from ftrace_dump(), then the iter->temp buffer
3937 	 * will be the static_temp_buf and not created from kmalloc.
3938 	 * If the entry size is greater than the buffer, we can
3939 	 * not save it. Just return NULL in that case. This is only
3940 	 * used to add markers when two consecutive events' time
3941 	 * stamps have a large delta. See trace_print_lat_context()
3942 	 */
3943 	if (iter->temp == static_temp_buf &&
3944 	    STATIC_TEMP_BUF_SIZE < ent_size)
3945 		return NULL;
3946 
3947 	/*
3948 	 * The __find_next_entry() may call peek_next_entry(), which may
3949 	 * call ring_buffer_peek() that may make the contents of iter->ent
3950 	 * undefined. Need to copy iter->ent now.
3951 	 */
3952 	if (iter->ent && iter->ent != iter->temp) {
3953 		if ((!iter->temp || iter->temp_size < iter->ent_size) &&
3954 		    !WARN_ON_ONCE(iter->temp == static_temp_buf)) {
3955 			void *temp;
3956 			temp = kmalloc(iter->ent_size, GFP_KERNEL);
3957 			if (!temp)
3958 				return NULL;
3959 			kfree(iter->temp);
3960 			iter->temp = temp;
3961 			iter->temp_size = iter->ent_size;
3962 		}
3963 		memcpy(iter->temp, iter->ent, iter->ent_size);
3964 		iter->ent = iter->temp;
3965 	}
3966 	entry = __find_next_entry(iter, ent_cpu, NULL, ent_ts);
3967 	/* Put back the original ent_size */
3968 	iter->ent_size = ent_size;
3969 
3970 	return entry;
3971 }
3972 
3973 /* Find the next real entry, and increment the iterator to the next entry */
3974 void *trace_find_next_entry_inc(struct trace_iterator *iter)
3975 {
3976 	iter->ent = __find_next_entry(iter, &iter->cpu,
3977 				      &iter->lost_events, &iter->ts);
3978 
3979 	if (iter->ent)
3980 		trace_iterator_increment(iter);
3981 
3982 	return iter->ent ? iter : NULL;
3983 }
3984 
3985 static void trace_consume(struct trace_iterator *iter)
3986 {
3987 	ring_buffer_consume(iter->array_buffer->buffer, iter->cpu, &iter->ts,
3988 			    &iter->lost_events);
3989 }
3990 
3991 static void *s_next(struct seq_file *m, void *v, loff_t *pos)
3992 {
3993 	struct trace_iterator *iter = m->private;
3994 	int i = (int)*pos;
3995 	void *ent;
3996 
3997 	WARN_ON_ONCE(iter->leftover);
3998 
3999 	(*pos)++;
4000 
4001 	/* can't go backwards */
4002 	if (iter->idx > i)
4003 		return NULL;
4004 
4005 	if (iter->idx < 0)
4006 		ent = trace_find_next_entry_inc(iter);
4007 	else
4008 		ent = iter;
4009 
4010 	while (ent && iter->idx < i)
4011 		ent = trace_find_next_entry_inc(iter);
4012 
4013 	iter->pos = *pos;
4014 
4015 	return ent;
4016 }
4017 
4018 void tracing_iter_reset(struct trace_iterator *iter, int cpu)
4019 {
4020 	struct ring_buffer_iter *buf_iter;
4021 	unsigned long entries = 0;
4022 	u64 ts;
4023 
4024 	per_cpu_ptr(iter->array_buffer->data, cpu)->skipped_entries = 0;
4025 
4026 	buf_iter = trace_buffer_iter(iter, cpu);
4027 	if (!buf_iter)
4028 		return;
4029 
4030 	ring_buffer_iter_reset(buf_iter);
4031 
4032 	/*
4033 	 * We could have the case with the max latency tracers
4034 	 * that a reset never took place on a cpu. This is evident
4035 	 * by the timestamp being before the start of the buffer.
4036 	 */
4037 	while (ring_buffer_iter_peek(buf_iter, &ts)) {
4038 		if (ts >= iter->array_buffer->time_start)
4039 			break;
4040 		entries++;
4041 		ring_buffer_iter_advance(buf_iter);
4042 	}
4043 
4044 	per_cpu_ptr(iter->array_buffer->data, cpu)->skipped_entries = entries;
4045 }
4046 
4047 /*
4048  * The current tracer is copied to avoid a global locking
4049  * all around.
4050  */
4051 static void *s_start(struct seq_file *m, loff_t *pos)
4052 {
4053 	struct trace_iterator *iter = m->private;
4054 	struct trace_array *tr = iter->tr;
4055 	int cpu_file = iter->cpu_file;
4056 	void *p = NULL;
4057 	loff_t l = 0;
4058 	int cpu;
4059 
4060 	/*
4061 	 * copy the tracer to avoid using a global lock all around.
4062 	 * iter->trace is a copy of current_trace, the pointer to the
4063 	 * name may be used instead of a strcmp(), as iter->trace->name
4064 	 * will point to the same string as current_trace->name.
4065 	 */
4066 	mutex_lock(&trace_types_lock);
4067 	if (unlikely(tr->current_trace && iter->trace->name != tr->current_trace->name))
4068 		*iter->trace = *tr->current_trace;
4069 	mutex_unlock(&trace_types_lock);
4070 
4071 #ifdef CONFIG_TRACER_MAX_TRACE
4072 	if (iter->snapshot && iter->trace->use_max_tr)
4073 		return ERR_PTR(-EBUSY);
4074 #endif
4075 
4076 	if (*pos != iter->pos) {
4077 		iter->ent = NULL;
4078 		iter->cpu = 0;
4079 		iter->idx = -1;
4080 
4081 		if (cpu_file == RING_BUFFER_ALL_CPUS) {
4082 			for_each_tracing_cpu(cpu)
4083 				tracing_iter_reset(iter, cpu);
4084 		} else
4085 			tracing_iter_reset(iter, cpu_file);
4086 
4087 		iter->leftover = 0;
4088 		for (p = iter; p && l < *pos; p = s_next(m, p, &l))
4089 			;
4090 
4091 	} else {
4092 		/*
4093 		 * If we overflowed the seq_file before, then we want
4094 		 * to just reuse the trace_seq buffer again.
4095 		 */
4096 		if (iter->leftover)
4097 			p = iter;
4098 		else {
4099 			l = *pos - 1;
4100 			p = s_next(m, p, &l);
4101 		}
4102 	}
4103 
4104 	trace_event_read_lock();
4105 	trace_access_lock(cpu_file);
4106 	return p;
4107 }
4108 
4109 static void s_stop(struct seq_file *m, void *p)
4110 {
4111 	struct trace_iterator *iter = m->private;
4112 
4113 #ifdef CONFIG_TRACER_MAX_TRACE
4114 	if (iter->snapshot && iter->trace->use_max_tr)
4115 		return;
4116 #endif
4117 
4118 	trace_access_unlock(iter->cpu_file);
4119 	trace_event_read_unlock();
4120 }
4121 
4122 static void
4123 get_total_entries_cpu(struct array_buffer *buf, unsigned long *total,
4124 		      unsigned long *entries, int cpu)
4125 {
4126 	unsigned long count;
4127 
4128 	count = ring_buffer_entries_cpu(buf->buffer, cpu);
4129 	/*
4130 	 * If this buffer has skipped entries, then we hold all
4131 	 * entries for the trace and we need to ignore the
4132 	 * ones before the time stamp.
4133 	 */
4134 	if (per_cpu_ptr(buf->data, cpu)->skipped_entries) {
4135 		count -= per_cpu_ptr(buf->data, cpu)->skipped_entries;
4136 		/* total is the same as the entries */
4137 		*total = count;
4138 	} else
4139 		*total = count +
4140 			ring_buffer_overrun_cpu(buf->buffer, cpu);
4141 	*entries = count;
4142 }
4143 
4144 static void
4145 get_total_entries(struct array_buffer *buf,
4146 		  unsigned long *total, unsigned long *entries)
4147 {
4148 	unsigned long t, e;
4149 	int cpu;
4150 
4151 	*total = 0;
4152 	*entries = 0;
4153 
4154 	for_each_tracing_cpu(cpu) {
4155 		get_total_entries_cpu(buf, &t, &e, cpu);
4156 		*total += t;
4157 		*entries += e;
4158 	}
4159 }
4160 
4161 unsigned long trace_total_entries_cpu(struct trace_array *tr, int cpu)
4162 {
4163 	unsigned long total, entries;
4164 
4165 	if (!tr)
4166 		tr = &global_trace;
4167 
4168 	get_total_entries_cpu(&tr->array_buffer, &total, &entries, cpu);
4169 
4170 	return entries;
4171 }
4172 
4173 unsigned long trace_total_entries(struct trace_array *tr)
4174 {
4175 	unsigned long total, entries;
4176 
4177 	if (!tr)
4178 		tr = &global_trace;
4179 
4180 	get_total_entries(&tr->array_buffer, &total, &entries);
4181 
4182 	return entries;
4183 }
4184 
4185 static void print_lat_help_header(struct seq_file *m)
4186 {
4187 	seq_puts(m, "#                    _------=> CPU#            \n"
4188 		    "#                   / _-----=> irqs-off        \n"
4189 		    "#                  | / _----=> need-resched    \n"
4190 		    "#                  || / _---=> hardirq/softirq \n"
4191 		    "#                  ||| / _--=> preempt-depth   \n"
4192 		    "#                  |||| /     delay            \n"
4193 		    "#  cmd     pid     ||||| time  |   caller      \n"
4194 		    "#     \\   /        |||||  \\    |   /         \n");
4195 }
4196 
4197 static void print_event_info(struct array_buffer *buf, struct seq_file *m)
4198 {
4199 	unsigned long total;
4200 	unsigned long entries;
4201 
4202 	get_total_entries(buf, &total, &entries);
4203 	seq_printf(m, "# entries-in-buffer/entries-written: %lu/%lu   #P:%d\n",
4204 		   entries, total, num_online_cpus());
4205 	seq_puts(m, "#\n");
4206 }
4207 
4208 static void print_func_help_header(struct array_buffer *buf, struct seq_file *m,
4209 				   unsigned int flags)
4210 {
4211 	bool tgid = flags & TRACE_ITER_RECORD_TGID;
4212 
4213 	print_event_info(buf, m);
4214 
4215 	seq_printf(m, "#           TASK-PID    %s CPU#     TIMESTAMP  FUNCTION\n", tgid ? "   TGID   " : "");
4216 	seq_printf(m, "#              | |      %s   |         |         |\n",      tgid ? "     |    " : "");
4217 }
4218 
4219 static void print_func_help_header_irq(struct array_buffer *buf, struct seq_file *m,
4220 				       unsigned int flags)
4221 {
4222 	bool tgid = flags & TRACE_ITER_RECORD_TGID;
4223 	const char *space = "            ";
4224 	int prec = tgid ? 12 : 2;
4225 
4226 	print_event_info(buf, m);
4227 
4228 	seq_printf(m, "#                            %.*s  _-----=> irqs-off\n", prec, space);
4229 	seq_printf(m, "#                            %.*s / _----=> need-resched\n", prec, space);
4230 	seq_printf(m, "#                            %.*s| / _---=> hardirq/softirq\n", prec, space);
4231 	seq_printf(m, "#                            %.*s|| / _--=> preempt-depth\n", prec, space);
4232 	seq_printf(m, "#                            %.*s||| /     delay\n", prec, space);
4233 	seq_printf(m, "#           TASK-PID  %.*s CPU#  ||||   TIMESTAMP  FUNCTION\n", prec, "     TGID   ");
4234 	seq_printf(m, "#              | |    %.*s   |   ||||      |         |\n", prec, "       |    ");
4235 }
4236 
4237 void
4238 print_trace_header(struct seq_file *m, struct trace_iterator *iter)
4239 {
4240 	unsigned long sym_flags = (global_trace.trace_flags & TRACE_ITER_SYM_MASK);
4241 	struct array_buffer *buf = iter->array_buffer;
4242 	struct trace_array_cpu *data = per_cpu_ptr(buf->data, buf->cpu);
4243 	struct tracer *type = iter->trace;
4244 	unsigned long entries;
4245 	unsigned long total;
4246 	const char *name = "preemption";
4247 
4248 	name = type->name;
4249 
4250 	get_total_entries(buf, &total, &entries);
4251 
4252 	seq_printf(m, "# %s latency trace v1.1.5 on %s\n",
4253 		   name, UTS_RELEASE);
4254 	seq_puts(m, "# -----------------------------------"
4255 		 "---------------------------------\n");
4256 	seq_printf(m, "# latency: %lu us, #%lu/%lu, CPU#%d |"
4257 		   " (M:%s VP:%d, KP:%d, SP:%d HP:%d",
4258 		   nsecs_to_usecs(data->saved_latency),
4259 		   entries,
4260 		   total,
4261 		   buf->cpu,
4262 #if defined(CONFIG_PREEMPT_NONE)
4263 		   "server",
4264 #elif defined(CONFIG_PREEMPT_VOLUNTARY)
4265 		   "desktop",
4266 #elif defined(CONFIG_PREEMPT)
4267 		   "preempt",
4268 #elif defined(CONFIG_PREEMPT_RT)
4269 		   "preempt_rt",
4270 #else
4271 		   "unknown",
4272 #endif
4273 		   /* These are reserved for later use */
4274 		   0, 0, 0, 0);
4275 #ifdef CONFIG_SMP
4276 	seq_printf(m, " #P:%d)\n", num_online_cpus());
4277 #else
4278 	seq_puts(m, ")\n");
4279 #endif
4280 	seq_puts(m, "#    -----------------\n");
4281 	seq_printf(m, "#    | task: %.16s-%d "
4282 		   "(uid:%d nice:%ld policy:%ld rt_prio:%ld)\n",
4283 		   data->comm, data->pid,
4284 		   from_kuid_munged(seq_user_ns(m), data->uid), data->nice,
4285 		   data->policy, data->rt_priority);
4286 	seq_puts(m, "#    -----------------\n");
4287 
4288 	if (data->critical_start) {
4289 		seq_puts(m, "#  => started at: ");
4290 		seq_print_ip_sym(&iter->seq, data->critical_start, sym_flags);
4291 		trace_print_seq(m, &iter->seq);
4292 		seq_puts(m, "\n#  => ended at:   ");
4293 		seq_print_ip_sym(&iter->seq, data->critical_end, sym_flags);
4294 		trace_print_seq(m, &iter->seq);
4295 		seq_puts(m, "\n#\n");
4296 	}
4297 
4298 	seq_puts(m, "#\n");
4299 }
4300 
4301 static void test_cpu_buff_start(struct trace_iterator *iter)
4302 {
4303 	struct trace_seq *s = &iter->seq;
4304 	struct trace_array *tr = iter->tr;
4305 
4306 	if (!(tr->trace_flags & TRACE_ITER_ANNOTATE))
4307 		return;
4308 
4309 	if (!(iter->iter_flags & TRACE_FILE_ANNOTATE))
4310 		return;
4311 
4312 	if (cpumask_available(iter->started) &&
4313 	    cpumask_test_cpu(iter->cpu, iter->started))
4314 		return;
4315 
4316 	if (per_cpu_ptr(iter->array_buffer->data, iter->cpu)->skipped_entries)
4317 		return;
4318 
4319 	if (cpumask_available(iter->started))
4320 		cpumask_set_cpu(iter->cpu, iter->started);
4321 
4322 	/* Don't print started cpu buffer for the first entry of the trace */
4323 	if (iter->idx > 1)
4324 		trace_seq_printf(s, "##### CPU %u buffer started ####\n",
4325 				iter->cpu);
4326 }
4327 
4328 static enum print_line_t print_trace_fmt(struct trace_iterator *iter)
4329 {
4330 	struct trace_array *tr = iter->tr;
4331 	struct trace_seq *s = &iter->seq;
4332 	unsigned long sym_flags = (tr->trace_flags & TRACE_ITER_SYM_MASK);
4333 	struct trace_entry *entry;
4334 	struct trace_event *event;
4335 
4336 	entry = iter->ent;
4337 
4338 	test_cpu_buff_start(iter);
4339 
4340 	event = ftrace_find_event(entry->type);
4341 
4342 	if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
4343 		if (iter->iter_flags & TRACE_FILE_LAT_FMT)
4344 			trace_print_lat_context(iter);
4345 		else
4346 			trace_print_context(iter);
4347 	}
4348 
4349 	if (trace_seq_has_overflowed(s))
4350 		return TRACE_TYPE_PARTIAL_LINE;
4351 
4352 	if (event)
4353 		return event->funcs->trace(iter, sym_flags, event);
4354 
4355 	trace_seq_printf(s, "Unknown type %d\n", entry->type);
4356 
4357 	return trace_handle_return(s);
4358 }
4359 
4360 static enum print_line_t print_raw_fmt(struct trace_iterator *iter)
4361 {
4362 	struct trace_array *tr = iter->tr;
4363 	struct trace_seq *s = &iter->seq;
4364 	struct trace_entry *entry;
4365 	struct trace_event *event;
4366 
4367 	entry = iter->ent;
4368 
4369 	if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO)
4370 		trace_seq_printf(s, "%d %d %llu ",
4371 				 entry->pid, iter->cpu, iter->ts);
4372 
4373 	if (trace_seq_has_overflowed(s))
4374 		return TRACE_TYPE_PARTIAL_LINE;
4375 
4376 	event = ftrace_find_event(entry->type);
4377 	if (event)
4378 		return event->funcs->raw(iter, 0, event);
4379 
4380 	trace_seq_printf(s, "%d ?\n", entry->type);
4381 
4382 	return trace_handle_return(s);
4383 }
4384 
4385 static enum print_line_t print_hex_fmt(struct trace_iterator *iter)
4386 {
4387 	struct trace_array *tr = iter->tr;
4388 	struct trace_seq *s = &iter->seq;
4389 	unsigned char newline = '\n';
4390 	struct trace_entry *entry;
4391 	struct trace_event *event;
4392 
4393 	entry = iter->ent;
4394 
4395 	if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
4396 		SEQ_PUT_HEX_FIELD(s, entry->pid);
4397 		SEQ_PUT_HEX_FIELD(s, iter->cpu);
4398 		SEQ_PUT_HEX_FIELD(s, iter->ts);
4399 		if (trace_seq_has_overflowed(s))
4400 			return TRACE_TYPE_PARTIAL_LINE;
4401 	}
4402 
4403 	event = ftrace_find_event(entry->type);
4404 	if (event) {
4405 		enum print_line_t ret = event->funcs->hex(iter, 0, event);
4406 		if (ret != TRACE_TYPE_HANDLED)
4407 			return ret;
4408 	}
4409 
4410 	SEQ_PUT_FIELD(s, newline);
4411 
4412 	return trace_handle_return(s);
4413 }
4414 
4415 static enum print_line_t print_bin_fmt(struct trace_iterator *iter)
4416 {
4417 	struct trace_array *tr = iter->tr;
4418 	struct trace_seq *s = &iter->seq;
4419 	struct trace_entry *entry;
4420 	struct trace_event *event;
4421 
4422 	entry = iter->ent;
4423 
4424 	if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
4425 		SEQ_PUT_FIELD(s, entry->pid);
4426 		SEQ_PUT_FIELD(s, iter->cpu);
4427 		SEQ_PUT_FIELD(s, iter->ts);
4428 		if (trace_seq_has_overflowed(s))
4429 			return TRACE_TYPE_PARTIAL_LINE;
4430 	}
4431 
4432 	event = ftrace_find_event(entry->type);
4433 	return event ? event->funcs->binary(iter, 0, event) :
4434 		TRACE_TYPE_HANDLED;
4435 }
4436 
4437 int trace_empty(struct trace_iterator *iter)
4438 {
4439 	struct ring_buffer_iter *buf_iter;
4440 	int cpu;
4441 
4442 	/* If we are looking at one CPU buffer, only check that one */
4443 	if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
4444 		cpu = iter->cpu_file;
4445 		buf_iter = trace_buffer_iter(iter, cpu);
4446 		if (buf_iter) {
4447 			if (!ring_buffer_iter_empty(buf_iter))
4448 				return 0;
4449 		} else {
4450 			if (!ring_buffer_empty_cpu(iter->array_buffer->buffer, cpu))
4451 				return 0;
4452 		}
4453 		return 1;
4454 	}
4455 
4456 	for_each_tracing_cpu(cpu) {
4457 		buf_iter = trace_buffer_iter(iter, cpu);
4458 		if (buf_iter) {
4459 			if (!ring_buffer_iter_empty(buf_iter))
4460 				return 0;
4461 		} else {
4462 			if (!ring_buffer_empty_cpu(iter->array_buffer->buffer, cpu))
4463 				return 0;
4464 		}
4465 	}
4466 
4467 	return 1;
4468 }
4469 
4470 /*  Called with trace_event_read_lock() held. */
4471 enum print_line_t print_trace_line(struct trace_iterator *iter)
4472 {
4473 	struct trace_array *tr = iter->tr;
4474 	unsigned long trace_flags = tr->trace_flags;
4475 	enum print_line_t ret;
4476 
4477 	if (iter->lost_events) {
4478 		if (iter->lost_events == (unsigned long)-1)
4479 			trace_seq_printf(&iter->seq, "CPU:%d [LOST EVENTS]\n",
4480 					 iter->cpu);
4481 		else
4482 			trace_seq_printf(&iter->seq, "CPU:%d [LOST %lu EVENTS]\n",
4483 					 iter->cpu, iter->lost_events);
4484 		if (trace_seq_has_overflowed(&iter->seq))
4485 			return TRACE_TYPE_PARTIAL_LINE;
4486 	}
4487 
4488 	if (iter->trace && iter->trace->print_line) {
4489 		ret = iter->trace->print_line(iter);
4490 		if (ret != TRACE_TYPE_UNHANDLED)
4491 			return ret;
4492 	}
4493 
4494 	if (iter->ent->type == TRACE_BPUTS &&
4495 			trace_flags & TRACE_ITER_PRINTK &&
4496 			trace_flags & TRACE_ITER_PRINTK_MSGONLY)
4497 		return trace_print_bputs_msg_only(iter);
4498 
4499 	if (iter->ent->type == TRACE_BPRINT &&
4500 			trace_flags & TRACE_ITER_PRINTK &&
4501 			trace_flags & TRACE_ITER_PRINTK_MSGONLY)
4502 		return trace_print_bprintk_msg_only(iter);
4503 
4504 	if (iter->ent->type == TRACE_PRINT &&
4505 			trace_flags & TRACE_ITER_PRINTK &&
4506 			trace_flags & TRACE_ITER_PRINTK_MSGONLY)
4507 		return trace_print_printk_msg_only(iter);
4508 
4509 	if (trace_flags & TRACE_ITER_BIN)
4510 		return print_bin_fmt(iter);
4511 
4512 	if (trace_flags & TRACE_ITER_HEX)
4513 		return print_hex_fmt(iter);
4514 
4515 	if (trace_flags & TRACE_ITER_RAW)
4516 		return print_raw_fmt(iter);
4517 
4518 	return print_trace_fmt(iter);
4519 }
4520 
4521 void trace_latency_header(struct seq_file *m)
4522 {
4523 	struct trace_iterator *iter = m->private;
4524 	struct trace_array *tr = iter->tr;
4525 
4526 	/* print nothing if the buffers are empty */
4527 	if (trace_empty(iter))
4528 		return;
4529 
4530 	if (iter->iter_flags & TRACE_FILE_LAT_FMT)
4531 		print_trace_header(m, iter);
4532 
4533 	if (!(tr->trace_flags & TRACE_ITER_VERBOSE))
4534 		print_lat_help_header(m);
4535 }
4536 
4537 void trace_default_header(struct seq_file *m)
4538 {
4539 	struct trace_iterator *iter = m->private;
4540 	struct trace_array *tr = iter->tr;
4541 	unsigned long trace_flags = tr->trace_flags;
4542 
4543 	if (!(trace_flags & TRACE_ITER_CONTEXT_INFO))
4544 		return;
4545 
4546 	if (iter->iter_flags & TRACE_FILE_LAT_FMT) {
4547 		/* print nothing if the buffers are empty */
4548 		if (trace_empty(iter))
4549 			return;
4550 		print_trace_header(m, iter);
4551 		if (!(trace_flags & TRACE_ITER_VERBOSE))
4552 			print_lat_help_header(m);
4553 	} else {
4554 		if (!(trace_flags & TRACE_ITER_VERBOSE)) {
4555 			if (trace_flags & TRACE_ITER_IRQ_INFO)
4556 				print_func_help_header_irq(iter->array_buffer,
4557 							   m, trace_flags);
4558 			else
4559 				print_func_help_header(iter->array_buffer, m,
4560 						       trace_flags);
4561 		}
4562 	}
4563 }
4564 
4565 static void test_ftrace_alive(struct seq_file *m)
4566 {
4567 	if (!ftrace_is_dead())
4568 		return;
4569 	seq_puts(m, "# WARNING: FUNCTION TRACING IS CORRUPTED\n"
4570 		    "#          MAY BE MISSING FUNCTION EVENTS\n");
4571 }
4572 
4573 #ifdef CONFIG_TRACER_MAX_TRACE
4574 static void show_snapshot_main_help(struct seq_file *m)
4575 {
4576 	seq_puts(m, "# echo 0 > snapshot : Clears and frees snapshot buffer\n"
4577 		    "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
4578 		    "#                      Takes a snapshot of the main buffer.\n"
4579 		    "# echo 2 > snapshot : Clears snapshot buffer (but does not allocate or free)\n"
4580 		    "#                      (Doesn't have to be '2' works with any number that\n"
4581 		    "#                       is not a '0' or '1')\n");
4582 }
4583 
4584 static void show_snapshot_percpu_help(struct seq_file *m)
4585 {
4586 	seq_puts(m, "# echo 0 > snapshot : Invalid for per_cpu snapshot file.\n");
4587 #ifdef CONFIG_RING_BUFFER_ALLOW_SWAP
4588 	seq_puts(m, "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
4589 		    "#                      Takes a snapshot of the main buffer for this cpu.\n");
4590 #else
4591 	seq_puts(m, "# echo 1 > snapshot : Not supported with this kernel.\n"
4592 		    "#                     Must use main snapshot file to allocate.\n");
4593 #endif
4594 	seq_puts(m, "# echo 2 > snapshot : Clears this cpu's snapshot buffer (but does not allocate)\n"
4595 		    "#                      (Doesn't have to be '2' works with any number that\n"
4596 		    "#                       is not a '0' or '1')\n");
4597 }
4598 
4599 static void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter)
4600 {
4601 	if (iter->tr->allocated_snapshot)
4602 		seq_puts(m, "#\n# * Snapshot is allocated *\n#\n");
4603 	else
4604 		seq_puts(m, "#\n# * Snapshot is freed *\n#\n");
4605 
4606 	seq_puts(m, "# Snapshot commands:\n");
4607 	if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
4608 		show_snapshot_main_help(m);
4609 	else
4610 		show_snapshot_percpu_help(m);
4611 }
4612 #else
4613 /* Should never be called */
4614 static inline void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter) { }
4615 #endif
4616 
4617 static int s_show(struct seq_file *m, void *v)
4618 {
4619 	struct trace_iterator *iter = v;
4620 	int ret;
4621 
4622 	if (iter->ent == NULL) {
4623 		if (iter->tr) {
4624 			seq_printf(m, "# tracer: %s\n", iter->trace->name);
4625 			seq_puts(m, "#\n");
4626 			test_ftrace_alive(m);
4627 		}
4628 		if (iter->snapshot && trace_empty(iter))
4629 			print_snapshot_help(m, iter);
4630 		else if (iter->trace && iter->trace->print_header)
4631 			iter->trace->print_header(m);
4632 		else
4633 			trace_default_header(m);
4634 
4635 	} else if (iter->leftover) {
4636 		/*
4637 		 * If we filled the seq_file buffer earlier, we
4638 		 * want to just show it now.
4639 		 */
4640 		ret = trace_print_seq(m, &iter->seq);
4641 
4642 		/* ret should this time be zero, but you never know */
4643 		iter->leftover = ret;
4644 
4645 	} else {
4646 		print_trace_line(iter);
4647 		ret = trace_print_seq(m, &iter->seq);
4648 		/*
4649 		 * If we overflow the seq_file buffer, then it will
4650 		 * ask us for this data again at start up.
4651 		 * Use that instead.
4652 		 *  ret is 0 if seq_file write succeeded.
4653 		 *        -1 otherwise.
4654 		 */
4655 		iter->leftover = ret;
4656 	}
4657 
4658 	return 0;
4659 }
4660 
4661 /*
4662  * Should be used after trace_array_get(), trace_types_lock
4663  * ensures that i_cdev was already initialized.
4664  */
4665 static inline int tracing_get_cpu(struct inode *inode)
4666 {
4667 	if (inode->i_cdev) /* See trace_create_cpu_file() */
4668 		return (long)inode->i_cdev - 1;
4669 	return RING_BUFFER_ALL_CPUS;
4670 }
4671 
4672 static const struct seq_operations tracer_seq_ops = {
4673 	.start		= s_start,
4674 	.next		= s_next,
4675 	.stop		= s_stop,
4676 	.show		= s_show,
4677 };
4678 
4679 static struct trace_iterator *
4680 __tracing_open(struct inode *inode, struct file *file, bool snapshot)
4681 {
4682 	struct trace_array *tr = inode->i_private;
4683 	struct trace_iterator *iter;
4684 	int cpu;
4685 
4686 	if (tracing_disabled)
4687 		return ERR_PTR(-ENODEV);
4688 
4689 	iter = __seq_open_private(file, &tracer_seq_ops, sizeof(*iter));
4690 	if (!iter)
4691 		return ERR_PTR(-ENOMEM);
4692 
4693 	iter->buffer_iter = kcalloc(nr_cpu_ids, sizeof(*iter->buffer_iter),
4694 				    GFP_KERNEL);
4695 	if (!iter->buffer_iter)
4696 		goto release;
4697 
4698 	/*
4699 	 * trace_find_next_entry() may need to save off iter->ent.
4700 	 * It will place it into the iter->temp buffer. As most
4701 	 * events are less than 128, allocate a buffer of that size.
4702 	 * If one is greater, then trace_find_next_entry() will
4703 	 * allocate a new buffer to adjust for the bigger iter->ent.
4704 	 * It's not critical if it fails to get allocated here.
4705 	 */
4706 	iter->temp = kmalloc(128, GFP_KERNEL);
4707 	if (iter->temp)
4708 		iter->temp_size = 128;
4709 
4710 	/*
4711 	 * trace_event_printf() may need to modify given format
4712 	 * string to replace %p with %px so that it shows real address
4713 	 * instead of hash value. However, that is only for the event
4714 	 * tracing, other tracer may not need. Defer the allocation
4715 	 * until it is needed.
4716 	 */
4717 	iter->fmt = NULL;
4718 	iter->fmt_size = 0;
4719 
4720 	/*
4721 	 * We make a copy of the current tracer to avoid concurrent
4722 	 * changes on it while we are reading.
4723 	 */
4724 	mutex_lock(&trace_types_lock);
4725 	iter->trace = kzalloc(sizeof(*iter->trace), GFP_KERNEL);
4726 	if (!iter->trace)
4727 		goto fail;
4728 
4729 	*iter->trace = *tr->current_trace;
4730 
4731 	if (!zalloc_cpumask_var(&iter->started, GFP_KERNEL))
4732 		goto fail;
4733 
4734 	iter->tr = tr;
4735 
4736 #ifdef CONFIG_TRACER_MAX_TRACE
4737 	/* Currently only the top directory has a snapshot */
4738 	if (tr->current_trace->print_max || snapshot)
4739 		iter->array_buffer = &tr->max_buffer;
4740 	else
4741 #endif
4742 		iter->array_buffer = &tr->array_buffer;
4743 	iter->snapshot = snapshot;
4744 	iter->pos = -1;
4745 	iter->cpu_file = tracing_get_cpu(inode);
4746 	mutex_init(&iter->mutex);
4747 
4748 	/* Notify the tracer early; before we stop tracing. */
4749 	if (iter->trace->open)
4750 		iter->trace->open(iter);
4751 
4752 	/* Annotate start of buffers if we had overruns */
4753 	if (ring_buffer_overruns(iter->array_buffer->buffer))
4754 		iter->iter_flags |= TRACE_FILE_ANNOTATE;
4755 
4756 	/* Output in nanoseconds only if we are using a clock in nanoseconds. */
4757 	if (trace_clocks[tr->clock_id].in_ns)
4758 		iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
4759 
4760 	/*
4761 	 * If pause-on-trace is enabled, then stop the trace while
4762 	 * dumping, unless this is the "snapshot" file
4763 	 */
4764 	if (!iter->snapshot && (tr->trace_flags & TRACE_ITER_PAUSE_ON_TRACE))
4765 		tracing_stop_tr(tr);
4766 
4767 	if (iter->cpu_file == RING_BUFFER_ALL_CPUS) {
4768 		for_each_tracing_cpu(cpu) {
4769 			iter->buffer_iter[cpu] =
4770 				ring_buffer_read_prepare(iter->array_buffer->buffer,
4771 							 cpu, GFP_KERNEL);
4772 		}
4773 		ring_buffer_read_prepare_sync();
4774 		for_each_tracing_cpu(cpu) {
4775 			ring_buffer_read_start(iter->buffer_iter[cpu]);
4776 			tracing_iter_reset(iter, cpu);
4777 		}
4778 	} else {
4779 		cpu = iter->cpu_file;
4780 		iter->buffer_iter[cpu] =
4781 			ring_buffer_read_prepare(iter->array_buffer->buffer,
4782 						 cpu, GFP_KERNEL);
4783 		ring_buffer_read_prepare_sync();
4784 		ring_buffer_read_start(iter->buffer_iter[cpu]);
4785 		tracing_iter_reset(iter, cpu);
4786 	}
4787 
4788 	mutex_unlock(&trace_types_lock);
4789 
4790 	return iter;
4791 
4792  fail:
4793 	mutex_unlock(&trace_types_lock);
4794 	kfree(iter->trace);
4795 	kfree(iter->temp);
4796 	kfree(iter->buffer_iter);
4797 release:
4798 	seq_release_private(inode, file);
4799 	return ERR_PTR(-ENOMEM);
4800 }
4801 
4802 int tracing_open_generic(struct inode *inode, struct file *filp)
4803 {
4804 	int ret;
4805 
4806 	ret = tracing_check_open_get_tr(NULL);
4807 	if (ret)
4808 		return ret;
4809 
4810 	filp->private_data = inode->i_private;
4811 	return 0;
4812 }
4813 
4814 bool tracing_is_disabled(void)
4815 {
4816 	return (tracing_disabled) ? true: false;
4817 }
4818 
4819 /*
4820  * Open and update trace_array ref count.
4821  * Must have the current trace_array passed to it.
4822  */
4823 int tracing_open_generic_tr(struct inode *inode, struct file *filp)
4824 {
4825 	struct trace_array *tr = inode->i_private;
4826 	int ret;
4827 
4828 	ret = tracing_check_open_get_tr(tr);
4829 	if (ret)
4830 		return ret;
4831 
4832 	filp->private_data = inode->i_private;
4833 
4834 	return 0;
4835 }
4836 
4837 static int tracing_release(struct inode *inode, struct file *file)
4838 {
4839 	struct trace_array *tr = inode->i_private;
4840 	struct seq_file *m = file->private_data;
4841 	struct trace_iterator *iter;
4842 	int cpu;
4843 
4844 	if (!(file->f_mode & FMODE_READ)) {
4845 		trace_array_put(tr);
4846 		return 0;
4847 	}
4848 
4849 	/* Writes do not use seq_file */
4850 	iter = m->private;
4851 	mutex_lock(&trace_types_lock);
4852 
4853 	for_each_tracing_cpu(cpu) {
4854 		if (iter->buffer_iter[cpu])
4855 			ring_buffer_read_finish(iter->buffer_iter[cpu]);
4856 	}
4857 
4858 	if (iter->trace && iter->trace->close)
4859 		iter->trace->close(iter);
4860 
4861 	if (!iter->snapshot && tr->stop_count)
4862 		/* reenable tracing if it was previously enabled */
4863 		tracing_start_tr(tr);
4864 
4865 	__trace_array_put(tr);
4866 
4867 	mutex_unlock(&trace_types_lock);
4868 
4869 	mutex_destroy(&iter->mutex);
4870 	free_cpumask_var(iter->started);
4871 	kfree(iter->fmt);
4872 	kfree(iter->temp);
4873 	kfree(iter->trace);
4874 	kfree(iter->buffer_iter);
4875 	seq_release_private(inode, file);
4876 
4877 	return 0;
4878 }
4879 
4880 static int tracing_release_generic_tr(struct inode *inode, struct file *file)
4881 {
4882 	struct trace_array *tr = inode->i_private;
4883 
4884 	trace_array_put(tr);
4885 	return 0;
4886 }
4887 
4888 static int tracing_single_release_tr(struct inode *inode, struct file *file)
4889 {
4890 	struct trace_array *tr = inode->i_private;
4891 
4892 	trace_array_put(tr);
4893 
4894 	return single_release(inode, file);
4895 }
4896 
4897 static int tracing_open(struct inode *inode, struct file *file)
4898 {
4899 	struct trace_array *tr = inode->i_private;
4900 	struct trace_iterator *iter;
4901 	int ret;
4902 
4903 	ret = tracing_check_open_get_tr(tr);
4904 	if (ret)
4905 		return ret;
4906 
4907 	/* If this file was open for write, then erase contents */
4908 	if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC)) {
4909 		int cpu = tracing_get_cpu(inode);
4910 		struct array_buffer *trace_buf = &tr->array_buffer;
4911 
4912 #ifdef CONFIG_TRACER_MAX_TRACE
4913 		if (tr->current_trace->print_max)
4914 			trace_buf = &tr->max_buffer;
4915 #endif
4916 
4917 		if (cpu == RING_BUFFER_ALL_CPUS)
4918 			tracing_reset_online_cpus(trace_buf);
4919 		else
4920 			tracing_reset_cpu(trace_buf, cpu);
4921 	}
4922 
4923 	if (file->f_mode & FMODE_READ) {
4924 		iter = __tracing_open(inode, file, false);
4925 		if (IS_ERR(iter))
4926 			ret = PTR_ERR(iter);
4927 		else if (tr->trace_flags & TRACE_ITER_LATENCY_FMT)
4928 			iter->iter_flags |= TRACE_FILE_LAT_FMT;
4929 	}
4930 
4931 	if (ret < 0)
4932 		trace_array_put(tr);
4933 
4934 	return ret;
4935 }
4936 
4937 /*
4938  * Some tracers are not suitable for instance buffers.
4939  * A tracer is always available for the global array (toplevel)
4940  * or if it explicitly states that it is.
4941  */
4942 static bool
4943 trace_ok_for_array(struct tracer *t, struct trace_array *tr)
4944 {
4945 	return (tr->flags & TRACE_ARRAY_FL_GLOBAL) || t->allow_instances;
4946 }
4947 
4948 /* Find the next tracer that this trace array may use */
4949 static struct tracer *
4950 get_tracer_for_array(struct trace_array *tr, struct tracer *t)
4951 {
4952 	while (t && !trace_ok_for_array(t, tr))
4953 		t = t->next;
4954 
4955 	return t;
4956 }
4957 
4958 static void *
4959 t_next(struct seq_file *m, void *v, loff_t *pos)
4960 {
4961 	struct trace_array *tr = m->private;
4962 	struct tracer *t = v;
4963 
4964 	(*pos)++;
4965 
4966 	if (t)
4967 		t = get_tracer_for_array(tr, t->next);
4968 
4969 	return t;
4970 }
4971 
4972 static void *t_start(struct seq_file *m, loff_t *pos)
4973 {
4974 	struct trace_array *tr = m->private;
4975 	struct tracer *t;
4976 	loff_t l = 0;
4977 
4978 	mutex_lock(&trace_types_lock);
4979 
4980 	t = get_tracer_for_array(tr, trace_types);
4981 	for (; t && l < *pos; t = t_next(m, t, &l))
4982 			;
4983 
4984 	return t;
4985 }
4986 
4987 static void t_stop(struct seq_file *m, void *p)
4988 {
4989 	mutex_unlock(&trace_types_lock);
4990 }
4991 
4992 static int t_show(struct seq_file *m, void *v)
4993 {
4994 	struct tracer *t = v;
4995 
4996 	if (!t)
4997 		return 0;
4998 
4999 	seq_puts(m, t->name);
5000 	if (t->next)
5001 		seq_putc(m, ' ');
5002 	else
5003 		seq_putc(m, '\n');
5004 
5005 	return 0;
5006 }
5007 
5008 static const struct seq_operations show_traces_seq_ops = {
5009 	.start		= t_start,
5010 	.next		= t_next,
5011 	.stop		= t_stop,
5012 	.show		= t_show,
5013 };
5014 
5015 static int show_traces_open(struct inode *inode, struct file *file)
5016 {
5017 	struct trace_array *tr = inode->i_private;
5018 	struct seq_file *m;
5019 	int ret;
5020 
5021 	ret = tracing_check_open_get_tr(tr);
5022 	if (ret)
5023 		return ret;
5024 
5025 	ret = seq_open(file, &show_traces_seq_ops);
5026 	if (ret) {
5027 		trace_array_put(tr);
5028 		return ret;
5029 	}
5030 
5031 	m = file->private_data;
5032 	m->private = tr;
5033 
5034 	return 0;
5035 }
5036 
5037 static int show_traces_release(struct inode *inode, struct file *file)
5038 {
5039 	struct trace_array *tr = inode->i_private;
5040 
5041 	trace_array_put(tr);
5042 	return seq_release(inode, file);
5043 }
5044 
5045 static ssize_t
5046 tracing_write_stub(struct file *filp, const char __user *ubuf,
5047 		   size_t count, loff_t *ppos)
5048 {
5049 	return count;
5050 }
5051 
5052 loff_t tracing_lseek(struct file *file, loff_t offset, int whence)
5053 {
5054 	int ret;
5055 
5056 	if (file->f_mode & FMODE_READ)
5057 		ret = seq_lseek(file, offset, whence);
5058 	else
5059 		file->f_pos = ret = 0;
5060 
5061 	return ret;
5062 }
5063 
5064 static const struct file_operations tracing_fops = {
5065 	.open		= tracing_open,
5066 	.read		= seq_read,
5067 	.write		= tracing_write_stub,
5068 	.llseek		= tracing_lseek,
5069 	.release	= tracing_release,
5070 };
5071 
5072 static const struct file_operations show_traces_fops = {
5073 	.open		= show_traces_open,
5074 	.read		= seq_read,
5075 	.llseek		= seq_lseek,
5076 	.release	= show_traces_release,
5077 };
5078 
5079 static ssize_t
5080 tracing_cpumask_read(struct file *filp, char __user *ubuf,
5081 		     size_t count, loff_t *ppos)
5082 {
5083 	struct trace_array *tr = file_inode(filp)->i_private;
5084 	char *mask_str;
5085 	int len;
5086 
5087 	len = snprintf(NULL, 0, "%*pb\n",
5088 		       cpumask_pr_args(tr->tracing_cpumask)) + 1;
5089 	mask_str = kmalloc(len, GFP_KERNEL);
5090 	if (!mask_str)
5091 		return -ENOMEM;
5092 
5093 	len = snprintf(mask_str, len, "%*pb\n",
5094 		       cpumask_pr_args(tr->tracing_cpumask));
5095 	if (len >= count) {
5096 		count = -EINVAL;
5097 		goto out_err;
5098 	}
5099 	count = simple_read_from_buffer(ubuf, count, ppos, mask_str, len);
5100 
5101 out_err:
5102 	kfree(mask_str);
5103 
5104 	return count;
5105 }
5106 
5107 int tracing_set_cpumask(struct trace_array *tr,
5108 			cpumask_var_t tracing_cpumask_new)
5109 {
5110 	int cpu;
5111 
5112 	if (!tr)
5113 		return -EINVAL;
5114 
5115 	local_irq_disable();
5116 	arch_spin_lock(&tr->max_lock);
5117 	for_each_tracing_cpu(cpu) {
5118 		/*
5119 		 * Increase/decrease the disabled counter if we are
5120 		 * about to flip a bit in the cpumask:
5121 		 */
5122 		if (cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
5123 				!cpumask_test_cpu(cpu, tracing_cpumask_new)) {
5124 			atomic_inc(&per_cpu_ptr(tr->array_buffer.data, cpu)->disabled);
5125 			ring_buffer_record_disable_cpu(tr->array_buffer.buffer, cpu);
5126 		}
5127 		if (!cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
5128 				cpumask_test_cpu(cpu, tracing_cpumask_new)) {
5129 			atomic_dec(&per_cpu_ptr(tr->array_buffer.data, cpu)->disabled);
5130 			ring_buffer_record_enable_cpu(tr->array_buffer.buffer, cpu);
5131 		}
5132 	}
5133 	arch_spin_unlock(&tr->max_lock);
5134 	local_irq_enable();
5135 
5136 	cpumask_copy(tr->tracing_cpumask, tracing_cpumask_new);
5137 
5138 	return 0;
5139 }
5140 
5141 static ssize_t
5142 tracing_cpumask_write(struct file *filp, const char __user *ubuf,
5143 		      size_t count, loff_t *ppos)
5144 {
5145 	struct trace_array *tr = file_inode(filp)->i_private;
5146 	cpumask_var_t tracing_cpumask_new;
5147 	int err;
5148 
5149 	if (!zalloc_cpumask_var(&tracing_cpumask_new, GFP_KERNEL))
5150 		return -ENOMEM;
5151 
5152 	err = cpumask_parse_user(ubuf, count, tracing_cpumask_new);
5153 	if (err)
5154 		goto err_free;
5155 
5156 	err = tracing_set_cpumask(tr, tracing_cpumask_new);
5157 	if (err)
5158 		goto err_free;
5159 
5160 	free_cpumask_var(tracing_cpumask_new);
5161 
5162 	return count;
5163 
5164 err_free:
5165 	free_cpumask_var(tracing_cpumask_new);
5166 
5167 	return err;
5168 }
5169 
5170 static const struct file_operations tracing_cpumask_fops = {
5171 	.open		= tracing_open_generic_tr,
5172 	.read		= tracing_cpumask_read,
5173 	.write		= tracing_cpumask_write,
5174 	.release	= tracing_release_generic_tr,
5175 	.llseek		= generic_file_llseek,
5176 };
5177 
5178 static int tracing_trace_options_show(struct seq_file *m, void *v)
5179 {
5180 	struct tracer_opt *trace_opts;
5181 	struct trace_array *tr = m->private;
5182 	u32 tracer_flags;
5183 	int i;
5184 
5185 	mutex_lock(&trace_types_lock);
5186 	tracer_flags = tr->current_trace->flags->val;
5187 	trace_opts = tr->current_trace->flags->opts;
5188 
5189 	for (i = 0; trace_options[i]; i++) {
5190 		if (tr->trace_flags & (1 << i))
5191 			seq_printf(m, "%s\n", trace_options[i]);
5192 		else
5193 			seq_printf(m, "no%s\n", trace_options[i]);
5194 	}
5195 
5196 	for (i = 0; trace_opts[i].name; i++) {
5197 		if (tracer_flags & trace_opts[i].bit)
5198 			seq_printf(m, "%s\n", trace_opts[i].name);
5199 		else
5200 			seq_printf(m, "no%s\n", trace_opts[i].name);
5201 	}
5202 	mutex_unlock(&trace_types_lock);
5203 
5204 	return 0;
5205 }
5206 
5207 static int __set_tracer_option(struct trace_array *tr,
5208 			       struct tracer_flags *tracer_flags,
5209 			       struct tracer_opt *opts, int neg)
5210 {
5211 	struct tracer *trace = tracer_flags->trace;
5212 	int ret;
5213 
5214 	ret = trace->set_flag(tr, tracer_flags->val, opts->bit, !neg);
5215 	if (ret)
5216 		return ret;
5217 
5218 	if (neg)
5219 		tracer_flags->val &= ~opts->bit;
5220 	else
5221 		tracer_flags->val |= opts->bit;
5222 	return 0;
5223 }
5224 
5225 /* Try to assign a tracer specific option */
5226 static int set_tracer_option(struct trace_array *tr, char *cmp, int neg)
5227 {
5228 	struct tracer *trace = tr->current_trace;
5229 	struct tracer_flags *tracer_flags = trace->flags;
5230 	struct tracer_opt *opts = NULL;
5231 	int i;
5232 
5233 	for (i = 0; tracer_flags->opts[i].name; i++) {
5234 		opts = &tracer_flags->opts[i];
5235 
5236 		if (strcmp(cmp, opts->name) == 0)
5237 			return __set_tracer_option(tr, trace->flags, opts, neg);
5238 	}
5239 
5240 	return -EINVAL;
5241 }
5242 
5243 /* Some tracers require overwrite to stay enabled */
5244 int trace_keep_overwrite(struct tracer *tracer, u32 mask, int set)
5245 {
5246 	if (tracer->enabled && (mask & TRACE_ITER_OVERWRITE) && !set)
5247 		return -1;
5248 
5249 	return 0;
5250 }
5251 
5252 int set_tracer_flag(struct trace_array *tr, unsigned int mask, int enabled)
5253 {
5254 	int *map;
5255 
5256 	if ((mask == TRACE_ITER_RECORD_TGID) ||
5257 	    (mask == TRACE_ITER_RECORD_CMD))
5258 		lockdep_assert_held(&event_mutex);
5259 
5260 	/* do nothing if flag is already set */
5261 	if (!!(tr->trace_flags & mask) == !!enabled)
5262 		return 0;
5263 
5264 	/* Give the tracer a chance to approve the change */
5265 	if (tr->current_trace->flag_changed)
5266 		if (tr->current_trace->flag_changed(tr, mask, !!enabled))
5267 			return -EINVAL;
5268 
5269 	if (enabled)
5270 		tr->trace_flags |= mask;
5271 	else
5272 		tr->trace_flags &= ~mask;
5273 
5274 	if (mask == TRACE_ITER_RECORD_CMD)
5275 		trace_event_enable_cmd_record(enabled);
5276 
5277 	if (mask == TRACE_ITER_RECORD_TGID) {
5278 		if (!tgid_map) {
5279 			tgid_map_max = pid_max;
5280 			map = kvcalloc(tgid_map_max + 1, sizeof(*tgid_map),
5281 				       GFP_KERNEL);
5282 
5283 			/*
5284 			 * Pairs with smp_load_acquire() in
5285 			 * trace_find_tgid_ptr() to ensure that if it observes
5286 			 * the tgid_map we just allocated then it also observes
5287 			 * the corresponding tgid_map_max value.
5288 			 */
5289 			smp_store_release(&tgid_map, map);
5290 		}
5291 		if (!tgid_map) {
5292 			tr->trace_flags &= ~TRACE_ITER_RECORD_TGID;
5293 			return -ENOMEM;
5294 		}
5295 
5296 		trace_event_enable_tgid_record(enabled);
5297 	}
5298 
5299 	if (mask == TRACE_ITER_EVENT_FORK)
5300 		trace_event_follow_fork(tr, enabled);
5301 
5302 	if (mask == TRACE_ITER_FUNC_FORK)
5303 		ftrace_pid_follow_fork(tr, enabled);
5304 
5305 	if (mask == TRACE_ITER_OVERWRITE) {
5306 		ring_buffer_change_overwrite(tr->array_buffer.buffer, enabled);
5307 #ifdef CONFIG_TRACER_MAX_TRACE
5308 		ring_buffer_change_overwrite(tr->max_buffer.buffer, enabled);
5309 #endif
5310 	}
5311 
5312 	if (mask == TRACE_ITER_PRINTK) {
5313 		trace_printk_start_stop_comm(enabled);
5314 		trace_printk_control(enabled);
5315 	}
5316 
5317 	return 0;
5318 }
5319 
5320 int trace_set_options(struct trace_array *tr, char *option)
5321 {
5322 	char *cmp;
5323 	int neg = 0;
5324 	int ret;
5325 	size_t orig_len = strlen(option);
5326 	int len;
5327 
5328 	cmp = strstrip(option);
5329 
5330 	len = str_has_prefix(cmp, "no");
5331 	if (len)
5332 		neg = 1;
5333 
5334 	cmp += len;
5335 
5336 	mutex_lock(&event_mutex);
5337 	mutex_lock(&trace_types_lock);
5338 
5339 	ret = match_string(trace_options, -1, cmp);
5340 	/* If no option could be set, test the specific tracer options */
5341 	if (ret < 0)
5342 		ret = set_tracer_option(tr, cmp, neg);
5343 	else
5344 		ret = set_tracer_flag(tr, 1 << ret, !neg);
5345 
5346 	mutex_unlock(&trace_types_lock);
5347 	mutex_unlock(&event_mutex);
5348 
5349 	/*
5350 	 * If the first trailing whitespace is replaced with '\0' by strstrip,
5351 	 * turn it back into a space.
5352 	 */
5353 	if (orig_len > strlen(option))
5354 		option[strlen(option)] = ' ';
5355 
5356 	return ret;
5357 }
5358 
5359 static void __init apply_trace_boot_options(void)
5360 {
5361 	char *buf = trace_boot_options_buf;
5362 	char *option;
5363 
5364 	while (true) {
5365 		option = strsep(&buf, ",");
5366 
5367 		if (!option)
5368 			break;
5369 
5370 		if (*option)
5371 			trace_set_options(&global_trace, option);
5372 
5373 		/* Put back the comma to allow this to be called again */
5374 		if (buf)
5375 			*(buf - 1) = ',';
5376 	}
5377 }
5378 
5379 static ssize_t
5380 tracing_trace_options_write(struct file *filp, const char __user *ubuf,
5381 			size_t cnt, loff_t *ppos)
5382 {
5383 	struct seq_file *m = filp->private_data;
5384 	struct trace_array *tr = m->private;
5385 	char buf[64];
5386 	int ret;
5387 
5388 	if (cnt >= sizeof(buf))
5389 		return -EINVAL;
5390 
5391 	if (copy_from_user(buf, ubuf, cnt))
5392 		return -EFAULT;
5393 
5394 	buf[cnt] = 0;
5395 
5396 	ret = trace_set_options(tr, buf);
5397 	if (ret < 0)
5398 		return ret;
5399 
5400 	*ppos += cnt;
5401 
5402 	return cnt;
5403 }
5404 
5405 static int tracing_trace_options_open(struct inode *inode, struct file *file)
5406 {
5407 	struct trace_array *tr = inode->i_private;
5408 	int ret;
5409 
5410 	ret = tracing_check_open_get_tr(tr);
5411 	if (ret)
5412 		return ret;
5413 
5414 	ret = single_open(file, tracing_trace_options_show, inode->i_private);
5415 	if (ret < 0)
5416 		trace_array_put(tr);
5417 
5418 	return ret;
5419 }
5420 
5421 static const struct file_operations tracing_iter_fops = {
5422 	.open		= tracing_trace_options_open,
5423 	.read		= seq_read,
5424 	.llseek		= seq_lseek,
5425 	.release	= tracing_single_release_tr,
5426 	.write		= tracing_trace_options_write,
5427 };
5428 
5429 static const char readme_msg[] =
5430 	"tracing mini-HOWTO:\n\n"
5431 	"# echo 0 > tracing_on : quick way to disable tracing\n"
5432 	"# echo 1 > tracing_on : quick way to re-enable tracing\n\n"
5433 	" Important files:\n"
5434 	"  trace\t\t\t- The static contents of the buffer\n"
5435 	"\t\t\t  To clear the buffer write into this file: echo > trace\n"
5436 	"  trace_pipe\t\t- A consuming read to see the contents of the buffer\n"
5437 	"  current_tracer\t- function and latency tracers\n"
5438 	"  available_tracers\t- list of configured tracers for current_tracer\n"
5439 	"  error_log\t- error log for failed commands (that support it)\n"
5440 	"  buffer_size_kb\t- view and modify size of per cpu buffer\n"
5441 	"  buffer_total_size_kb  - view total size of all cpu buffers\n\n"
5442 	"  trace_clock\t\t-change the clock used to order events\n"
5443 	"       local:   Per cpu clock but may not be synced across CPUs\n"
5444 	"      global:   Synced across CPUs but slows tracing down.\n"
5445 	"     counter:   Not a clock, but just an increment\n"
5446 	"      uptime:   Jiffy counter from time of boot\n"
5447 	"        perf:   Same clock that perf events use\n"
5448 #ifdef CONFIG_X86_64
5449 	"     x86-tsc:   TSC cycle counter\n"
5450 #endif
5451 	"\n  timestamp_mode\t-view the mode used to timestamp events\n"
5452 	"       delta:   Delta difference against a buffer-wide timestamp\n"
5453 	"    absolute:   Absolute (standalone) timestamp\n"
5454 	"\n  trace_marker\t\t- Writes into this file writes into the kernel buffer\n"
5455 	"\n  trace_marker_raw\t\t- Writes into this file writes binary data into the kernel buffer\n"
5456 	"  tracing_cpumask\t- Limit which CPUs to trace\n"
5457 	"  instances\t\t- Make sub-buffers with: mkdir instances/foo\n"
5458 	"\t\t\t  Remove sub-buffer with rmdir\n"
5459 	"  trace_options\t\t- Set format or modify how tracing happens\n"
5460 	"\t\t\t  Disable an option by prefixing 'no' to the\n"
5461 	"\t\t\t  option name\n"
5462 	"  saved_cmdlines_size\t- echo command number in here to store comm-pid list\n"
5463 #ifdef CONFIG_DYNAMIC_FTRACE
5464 	"\n  available_filter_functions - list of functions that can be filtered on\n"
5465 	"  set_ftrace_filter\t- echo function name in here to only trace these\n"
5466 	"\t\t\t  functions\n"
5467 	"\t     accepts: func_full_name or glob-matching-pattern\n"
5468 	"\t     modules: Can select a group via module\n"
5469 	"\t      Format: :mod:<module-name>\n"
5470 	"\t     example: echo :mod:ext3 > set_ftrace_filter\n"
5471 	"\t    triggers: a command to perform when function is hit\n"
5472 	"\t      Format: <function>:<trigger>[:count]\n"
5473 	"\t     trigger: traceon, traceoff\n"
5474 	"\t\t      enable_event:<system>:<event>\n"
5475 	"\t\t      disable_event:<system>:<event>\n"
5476 #ifdef CONFIG_STACKTRACE
5477 	"\t\t      stacktrace\n"
5478 #endif
5479 #ifdef CONFIG_TRACER_SNAPSHOT
5480 	"\t\t      snapshot\n"
5481 #endif
5482 	"\t\t      dump\n"
5483 	"\t\t      cpudump\n"
5484 	"\t     example: echo do_fault:traceoff > set_ftrace_filter\n"
5485 	"\t              echo do_trap:traceoff:3 > set_ftrace_filter\n"
5486 	"\t     The first one will disable tracing every time do_fault is hit\n"
5487 	"\t     The second will disable tracing at most 3 times when do_trap is hit\n"
5488 	"\t       The first time do trap is hit and it disables tracing, the\n"
5489 	"\t       counter will decrement to 2. If tracing is already disabled,\n"
5490 	"\t       the counter will not decrement. It only decrements when the\n"
5491 	"\t       trigger did work\n"
5492 	"\t     To remove trigger without count:\n"
5493 	"\t       echo '!<function>:<trigger> > set_ftrace_filter\n"
5494 	"\t     To remove trigger with a count:\n"
5495 	"\t       echo '!<function>:<trigger>:0 > set_ftrace_filter\n"
5496 	"  set_ftrace_notrace\t- echo function name in here to never trace.\n"
5497 	"\t    accepts: func_full_name, *func_end, func_begin*, *func_middle*\n"
5498 	"\t    modules: Can select a group via module command :mod:\n"
5499 	"\t    Does not accept triggers\n"
5500 #endif /* CONFIG_DYNAMIC_FTRACE */
5501 #ifdef CONFIG_FUNCTION_TRACER
5502 	"  set_ftrace_pid\t- Write pid(s) to only function trace those pids\n"
5503 	"\t\t    (function)\n"
5504 	"  set_ftrace_notrace_pid\t- Write pid(s) to not function trace those pids\n"
5505 	"\t\t    (function)\n"
5506 #endif
5507 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
5508 	"  set_graph_function\t- Trace the nested calls of a function (function_graph)\n"
5509 	"  set_graph_notrace\t- Do not trace the nested calls of a function (function_graph)\n"
5510 	"  max_graph_depth\t- Trace a limited depth of nested calls (0 is unlimited)\n"
5511 #endif
5512 #ifdef CONFIG_TRACER_SNAPSHOT
5513 	"\n  snapshot\t\t- Like 'trace' but shows the content of the static\n"
5514 	"\t\t\t  snapshot buffer. Read the contents for more\n"
5515 	"\t\t\t  information\n"
5516 #endif
5517 #ifdef CONFIG_STACK_TRACER
5518 	"  stack_trace\t\t- Shows the max stack trace when active\n"
5519 	"  stack_max_size\t- Shows current max stack size that was traced\n"
5520 	"\t\t\t  Write into this file to reset the max size (trigger a\n"
5521 	"\t\t\t  new trace)\n"
5522 #ifdef CONFIG_DYNAMIC_FTRACE
5523 	"  stack_trace_filter\t- Like set_ftrace_filter but limits what stack_trace\n"
5524 	"\t\t\t  traces\n"
5525 #endif
5526 #endif /* CONFIG_STACK_TRACER */
5527 #ifdef CONFIG_DYNAMIC_EVENTS
5528 	"  dynamic_events\t\t- Create/append/remove/show the generic dynamic events\n"
5529 	"\t\t\t  Write into this file to define/undefine new trace events.\n"
5530 #endif
5531 #ifdef CONFIG_KPROBE_EVENTS
5532 	"  kprobe_events\t\t- Create/append/remove/show the kernel dynamic events\n"
5533 	"\t\t\t  Write into this file to define/undefine new trace events.\n"
5534 #endif
5535 #ifdef CONFIG_UPROBE_EVENTS
5536 	"  uprobe_events\t\t- Create/append/remove/show the userspace dynamic events\n"
5537 	"\t\t\t  Write into this file to define/undefine new trace events.\n"
5538 #endif
5539 #if defined(CONFIG_KPROBE_EVENTS) || defined(CONFIG_UPROBE_EVENTS)
5540 	"\t  accepts: event-definitions (one definition per line)\n"
5541 	"\t   Format: p[:[<group>/]<event>] <place> [<args>]\n"
5542 	"\t           r[maxactive][:[<group>/]<event>] <place> [<args>]\n"
5543 #ifdef CONFIG_HIST_TRIGGERS
5544 	"\t           s:[synthetic/]<event> <field> [<field>]\n"
5545 #endif
5546 	"\t           e[:[<group>/]<event>] <attached-group>.<attached-event> [<args>]\n"
5547 	"\t           -:[<group>/]<event>\n"
5548 #ifdef CONFIG_KPROBE_EVENTS
5549 	"\t    place: [<module>:]<symbol>[+<offset>]|<memaddr>\n"
5550   "place (kretprobe): [<module>:]<symbol>[+<offset>]%return|<memaddr>\n"
5551 #endif
5552 #ifdef CONFIG_UPROBE_EVENTS
5553   "   place (uprobe): <path>:<offset>[%return][(ref_ctr_offset)]\n"
5554 #endif
5555 	"\t     args: <name>=fetcharg[:type]\n"
5556 	"\t fetcharg: (%<register>|$<efield>), @<address>, @<symbol>[+|-<offset>],\n"
5557 #ifdef CONFIG_HAVE_FUNCTION_ARG_ACCESS_API
5558 	"\t           $stack<index>, $stack, $retval, $comm, $arg<N>,\n"
5559 #else
5560 	"\t           $stack<index>, $stack, $retval, $comm,\n"
5561 #endif
5562 	"\t           +|-[u]<offset>(<fetcharg>), \\imm-value, \\\"imm-string\"\n"
5563 	"\t     type: s8/16/32/64, u8/16/32/64, x8/16/32/64, string, symbol,\n"
5564 	"\t           b<bit-width>@<bit-offset>/<container-size>, ustring,\n"
5565 	"\t           <type>\\[<array-size>\\]\n"
5566 #ifdef CONFIG_HIST_TRIGGERS
5567 	"\t    field: <stype> <name>;\n"
5568 	"\t    stype: u8/u16/u32/u64, s8/s16/s32/s64, pid_t,\n"
5569 	"\t           [unsigned] char/int/long\n"
5570 #endif
5571 	"\t    efield: For event probes ('e' types), the field is on of the fields\n"
5572 	"\t            of the <attached-group>/<attached-event>.\n"
5573 #endif
5574 	"  events/\t\t- Directory containing all trace event subsystems:\n"
5575 	"      enable\t\t- Write 0/1 to enable/disable tracing of all events\n"
5576 	"  events/<system>/\t- Directory containing all trace events for <system>:\n"
5577 	"      enable\t\t- Write 0/1 to enable/disable tracing of all <system>\n"
5578 	"\t\t\t  events\n"
5579 	"      filter\t\t- If set, only events passing filter are traced\n"
5580 	"  events/<system>/<event>/\t- Directory containing control files for\n"
5581 	"\t\t\t  <event>:\n"
5582 	"      enable\t\t- Write 0/1 to enable/disable tracing of <event>\n"
5583 	"      filter\t\t- If set, only events passing filter are traced\n"
5584 	"      trigger\t\t- If set, a command to perform when event is hit\n"
5585 	"\t    Format: <trigger>[:count][if <filter>]\n"
5586 	"\t   trigger: traceon, traceoff\n"
5587 	"\t            enable_event:<system>:<event>\n"
5588 	"\t            disable_event:<system>:<event>\n"
5589 #ifdef CONFIG_HIST_TRIGGERS
5590 	"\t            enable_hist:<system>:<event>\n"
5591 	"\t            disable_hist:<system>:<event>\n"
5592 #endif
5593 #ifdef CONFIG_STACKTRACE
5594 	"\t\t    stacktrace\n"
5595 #endif
5596 #ifdef CONFIG_TRACER_SNAPSHOT
5597 	"\t\t    snapshot\n"
5598 #endif
5599 #ifdef CONFIG_HIST_TRIGGERS
5600 	"\t\t    hist (see below)\n"
5601 #endif
5602 	"\t   example: echo traceoff > events/block/block_unplug/trigger\n"
5603 	"\t            echo traceoff:3 > events/block/block_unplug/trigger\n"
5604 	"\t            echo 'enable_event:kmem:kmalloc:3 if nr_rq > 1' > \\\n"
5605 	"\t                  events/block/block_unplug/trigger\n"
5606 	"\t   The first disables tracing every time block_unplug is hit.\n"
5607 	"\t   The second disables tracing the first 3 times block_unplug is hit.\n"
5608 	"\t   The third enables the kmalloc event the first 3 times block_unplug\n"
5609 	"\t     is hit and has value of greater than 1 for the 'nr_rq' event field.\n"
5610 	"\t   Like function triggers, the counter is only decremented if it\n"
5611 	"\t    enabled or disabled tracing.\n"
5612 	"\t   To remove a trigger without a count:\n"
5613 	"\t     echo '!<trigger> > <system>/<event>/trigger\n"
5614 	"\t   To remove a trigger with a count:\n"
5615 	"\t     echo '!<trigger>:0 > <system>/<event>/trigger\n"
5616 	"\t   Filters can be ignored when removing a trigger.\n"
5617 #ifdef CONFIG_HIST_TRIGGERS
5618 	"      hist trigger\t- If set, event hits are aggregated into a hash table\n"
5619 	"\t    Format: hist:keys=<field1[,field2,...]>\n"
5620 	"\t            [:values=<field1[,field2,...]>]\n"
5621 	"\t            [:sort=<field1[,field2,...]>]\n"
5622 	"\t            [:size=#entries]\n"
5623 	"\t            [:pause][:continue][:clear]\n"
5624 	"\t            [:name=histname1]\n"
5625 	"\t            [:<handler>.<action>]\n"
5626 	"\t            [if <filter>]\n\n"
5627 	"\t    Note, special fields can be used as well:\n"
5628 	"\t            common_timestamp - to record current timestamp\n"
5629 	"\t            common_cpu - to record the CPU the event happened on\n"
5630 	"\n"
5631 	"\t    When a matching event is hit, an entry is added to a hash\n"
5632 	"\t    table using the key(s) and value(s) named, and the value of a\n"
5633 	"\t    sum called 'hitcount' is incremented.  Keys and values\n"
5634 	"\t    correspond to fields in the event's format description.  Keys\n"
5635 	"\t    can be any field, or the special string 'stacktrace'.\n"
5636 	"\t    Compound keys consisting of up to two fields can be specified\n"
5637 	"\t    by the 'keys' keyword.  Values must correspond to numeric\n"
5638 	"\t    fields.  Sort keys consisting of up to two fields can be\n"
5639 	"\t    specified using the 'sort' keyword.  The sort direction can\n"
5640 	"\t    be modified by appending '.descending' or '.ascending' to a\n"
5641 	"\t    sort field.  The 'size' parameter can be used to specify more\n"
5642 	"\t    or fewer than the default 2048 entries for the hashtable size.\n"
5643 	"\t    If a hist trigger is given a name using the 'name' parameter,\n"
5644 	"\t    its histogram data will be shared with other triggers of the\n"
5645 	"\t    same name, and trigger hits will update this common data.\n\n"
5646 	"\t    Reading the 'hist' file for the event will dump the hash\n"
5647 	"\t    table in its entirety to stdout.  If there are multiple hist\n"
5648 	"\t    triggers attached to an event, there will be a table for each\n"
5649 	"\t    trigger in the output.  The table displayed for a named\n"
5650 	"\t    trigger will be the same as any other instance having the\n"
5651 	"\t    same name.  The default format used to display a given field\n"
5652 	"\t    can be modified by appending any of the following modifiers\n"
5653 	"\t    to the field name, as applicable:\n\n"
5654 	"\t            .hex        display a number as a hex value\n"
5655 	"\t            .sym        display an address as a symbol\n"
5656 	"\t            .sym-offset display an address as a symbol and offset\n"
5657 	"\t            .execname   display a common_pid as a program name\n"
5658 	"\t            .syscall    display a syscall id as a syscall name\n"
5659 	"\t            .log2       display log2 value rather than raw number\n"
5660 	"\t            .buckets=size  display values in groups of size rather than raw number\n"
5661 	"\t            .usecs      display a common_timestamp in microseconds\n\n"
5662 	"\t    The 'pause' parameter can be used to pause an existing hist\n"
5663 	"\t    trigger or to start a hist trigger but not log any events\n"
5664 	"\t    until told to do so.  'continue' can be used to start or\n"
5665 	"\t    restart a paused hist trigger.\n\n"
5666 	"\t    The 'clear' parameter will clear the contents of a running\n"
5667 	"\t    hist trigger and leave its current paused/active state\n"
5668 	"\t    unchanged.\n\n"
5669 	"\t    The enable_hist and disable_hist triggers can be used to\n"
5670 	"\t    have one event conditionally start and stop another event's\n"
5671 	"\t    already-attached hist trigger.  The syntax is analogous to\n"
5672 	"\t    the enable_event and disable_event triggers.\n\n"
5673 	"\t    Hist trigger handlers and actions are executed whenever a\n"
5674 	"\t    a histogram entry is added or updated.  They take the form:\n\n"
5675 	"\t        <handler>.<action>\n\n"
5676 	"\t    The available handlers are:\n\n"
5677 	"\t        onmatch(matching.event)  - invoke on addition or update\n"
5678 	"\t        onmax(var)               - invoke if var exceeds current max\n"
5679 	"\t        onchange(var)            - invoke action if var changes\n\n"
5680 	"\t    The available actions are:\n\n"
5681 	"\t        trace(<synthetic_event>,param list)  - generate synthetic event\n"
5682 	"\t        save(field,...)                      - save current event fields\n"
5683 #ifdef CONFIG_TRACER_SNAPSHOT
5684 	"\t        snapshot()                           - snapshot the trace buffer\n\n"
5685 #endif
5686 #ifdef CONFIG_SYNTH_EVENTS
5687 	"  events/synthetic_events\t- Create/append/remove/show synthetic events\n"
5688 	"\t  Write into this file to define/undefine new synthetic events.\n"
5689 	"\t     example: echo 'myevent u64 lat; char name[]' >> synthetic_events\n"
5690 #endif
5691 #endif
5692 ;
5693 
5694 static ssize_t
5695 tracing_readme_read(struct file *filp, char __user *ubuf,
5696 		       size_t cnt, loff_t *ppos)
5697 {
5698 	return simple_read_from_buffer(ubuf, cnt, ppos,
5699 					readme_msg, strlen(readme_msg));
5700 }
5701 
5702 static const struct file_operations tracing_readme_fops = {
5703 	.open		= tracing_open_generic,
5704 	.read		= tracing_readme_read,
5705 	.llseek		= generic_file_llseek,
5706 };
5707 
5708 static void *saved_tgids_next(struct seq_file *m, void *v, loff_t *pos)
5709 {
5710 	int pid = ++(*pos);
5711 
5712 	return trace_find_tgid_ptr(pid);
5713 }
5714 
5715 static void *saved_tgids_start(struct seq_file *m, loff_t *pos)
5716 {
5717 	int pid = *pos;
5718 
5719 	return trace_find_tgid_ptr(pid);
5720 }
5721 
5722 static void saved_tgids_stop(struct seq_file *m, void *v)
5723 {
5724 }
5725 
5726 static int saved_tgids_show(struct seq_file *m, void *v)
5727 {
5728 	int *entry = (int *)v;
5729 	int pid = entry - tgid_map;
5730 	int tgid = *entry;
5731 
5732 	if (tgid == 0)
5733 		return SEQ_SKIP;
5734 
5735 	seq_printf(m, "%d %d\n", pid, tgid);
5736 	return 0;
5737 }
5738 
5739 static const struct seq_operations tracing_saved_tgids_seq_ops = {
5740 	.start		= saved_tgids_start,
5741 	.stop		= saved_tgids_stop,
5742 	.next		= saved_tgids_next,
5743 	.show		= saved_tgids_show,
5744 };
5745 
5746 static int tracing_saved_tgids_open(struct inode *inode, struct file *filp)
5747 {
5748 	int ret;
5749 
5750 	ret = tracing_check_open_get_tr(NULL);
5751 	if (ret)
5752 		return ret;
5753 
5754 	return seq_open(filp, &tracing_saved_tgids_seq_ops);
5755 }
5756 
5757 
5758 static const struct file_operations tracing_saved_tgids_fops = {
5759 	.open		= tracing_saved_tgids_open,
5760 	.read		= seq_read,
5761 	.llseek		= seq_lseek,
5762 	.release	= seq_release,
5763 };
5764 
5765 static void *saved_cmdlines_next(struct seq_file *m, void *v, loff_t *pos)
5766 {
5767 	unsigned int *ptr = v;
5768 
5769 	if (*pos || m->count)
5770 		ptr++;
5771 
5772 	(*pos)++;
5773 
5774 	for (; ptr < &savedcmd->map_cmdline_to_pid[savedcmd->cmdline_num];
5775 	     ptr++) {
5776 		if (*ptr == -1 || *ptr == NO_CMDLINE_MAP)
5777 			continue;
5778 
5779 		return ptr;
5780 	}
5781 
5782 	return NULL;
5783 }
5784 
5785 static void *saved_cmdlines_start(struct seq_file *m, loff_t *pos)
5786 {
5787 	void *v;
5788 	loff_t l = 0;
5789 
5790 	preempt_disable();
5791 	arch_spin_lock(&trace_cmdline_lock);
5792 
5793 	v = &savedcmd->map_cmdline_to_pid[0];
5794 	while (l <= *pos) {
5795 		v = saved_cmdlines_next(m, v, &l);
5796 		if (!v)
5797 			return NULL;
5798 	}
5799 
5800 	return v;
5801 }
5802 
5803 static void saved_cmdlines_stop(struct seq_file *m, void *v)
5804 {
5805 	arch_spin_unlock(&trace_cmdline_lock);
5806 	preempt_enable();
5807 }
5808 
5809 static int saved_cmdlines_show(struct seq_file *m, void *v)
5810 {
5811 	char buf[TASK_COMM_LEN];
5812 	unsigned int *pid = v;
5813 
5814 	__trace_find_cmdline(*pid, buf);
5815 	seq_printf(m, "%d %s\n", *pid, buf);
5816 	return 0;
5817 }
5818 
5819 static const struct seq_operations tracing_saved_cmdlines_seq_ops = {
5820 	.start		= saved_cmdlines_start,
5821 	.next		= saved_cmdlines_next,
5822 	.stop		= saved_cmdlines_stop,
5823 	.show		= saved_cmdlines_show,
5824 };
5825 
5826 static int tracing_saved_cmdlines_open(struct inode *inode, struct file *filp)
5827 {
5828 	int ret;
5829 
5830 	ret = tracing_check_open_get_tr(NULL);
5831 	if (ret)
5832 		return ret;
5833 
5834 	return seq_open(filp, &tracing_saved_cmdlines_seq_ops);
5835 }
5836 
5837 static const struct file_operations tracing_saved_cmdlines_fops = {
5838 	.open		= tracing_saved_cmdlines_open,
5839 	.read		= seq_read,
5840 	.llseek		= seq_lseek,
5841 	.release	= seq_release,
5842 };
5843 
5844 static ssize_t
5845 tracing_saved_cmdlines_size_read(struct file *filp, char __user *ubuf,
5846 				 size_t cnt, loff_t *ppos)
5847 {
5848 	char buf[64];
5849 	int r;
5850 
5851 	arch_spin_lock(&trace_cmdline_lock);
5852 	r = scnprintf(buf, sizeof(buf), "%u\n", savedcmd->cmdline_num);
5853 	arch_spin_unlock(&trace_cmdline_lock);
5854 
5855 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
5856 }
5857 
5858 static void free_saved_cmdlines_buffer(struct saved_cmdlines_buffer *s)
5859 {
5860 	kfree(s->saved_cmdlines);
5861 	kfree(s->map_cmdline_to_pid);
5862 	kfree(s);
5863 }
5864 
5865 static int tracing_resize_saved_cmdlines(unsigned int val)
5866 {
5867 	struct saved_cmdlines_buffer *s, *savedcmd_temp;
5868 
5869 	s = kmalloc(sizeof(*s), GFP_KERNEL);
5870 	if (!s)
5871 		return -ENOMEM;
5872 
5873 	if (allocate_cmdlines_buffer(val, s) < 0) {
5874 		kfree(s);
5875 		return -ENOMEM;
5876 	}
5877 
5878 	arch_spin_lock(&trace_cmdline_lock);
5879 	savedcmd_temp = savedcmd;
5880 	savedcmd = s;
5881 	arch_spin_unlock(&trace_cmdline_lock);
5882 	free_saved_cmdlines_buffer(savedcmd_temp);
5883 
5884 	return 0;
5885 }
5886 
5887 static ssize_t
5888 tracing_saved_cmdlines_size_write(struct file *filp, const char __user *ubuf,
5889 				  size_t cnt, loff_t *ppos)
5890 {
5891 	unsigned long val;
5892 	int ret;
5893 
5894 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
5895 	if (ret)
5896 		return ret;
5897 
5898 	/* must have at least 1 entry or less than PID_MAX_DEFAULT */
5899 	if (!val || val > PID_MAX_DEFAULT)
5900 		return -EINVAL;
5901 
5902 	ret = tracing_resize_saved_cmdlines((unsigned int)val);
5903 	if (ret < 0)
5904 		return ret;
5905 
5906 	*ppos += cnt;
5907 
5908 	return cnt;
5909 }
5910 
5911 static const struct file_operations tracing_saved_cmdlines_size_fops = {
5912 	.open		= tracing_open_generic,
5913 	.read		= tracing_saved_cmdlines_size_read,
5914 	.write		= tracing_saved_cmdlines_size_write,
5915 };
5916 
5917 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
5918 static union trace_eval_map_item *
5919 update_eval_map(union trace_eval_map_item *ptr)
5920 {
5921 	if (!ptr->map.eval_string) {
5922 		if (ptr->tail.next) {
5923 			ptr = ptr->tail.next;
5924 			/* Set ptr to the next real item (skip head) */
5925 			ptr++;
5926 		} else
5927 			return NULL;
5928 	}
5929 	return ptr;
5930 }
5931 
5932 static void *eval_map_next(struct seq_file *m, void *v, loff_t *pos)
5933 {
5934 	union trace_eval_map_item *ptr = v;
5935 
5936 	/*
5937 	 * Paranoid! If ptr points to end, we don't want to increment past it.
5938 	 * This really should never happen.
5939 	 */
5940 	(*pos)++;
5941 	ptr = update_eval_map(ptr);
5942 	if (WARN_ON_ONCE(!ptr))
5943 		return NULL;
5944 
5945 	ptr++;
5946 	ptr = update_eval_map(ptr);
5947 
5948 	return ptr;
5949 }
5950 
5951 static void *eval_map_start(struct seq_file *m, loff_t *pos)
5952 {
5953 	union trace_eval_map_item *v;
5954 	loff_t l = 0;
5955 
5956 	mutex_lock(&trace_eval_mutex);
5957 
5958 	v = trace_eval_maps;
5959 	if (v)
5960 		v++;
5961 
5962 	while (v && l < *pos) {
5963 		v = eval_map_next(m, v, &l);
5964 	}
5965 
5966 	return v;
5967 }
5968 
5969 static void eval_map_stop(struct seq_file *m, void *v)
5970 {
5971 	mutex_unlock(&trace_eval_mutex);
5972 }
5973 
5974 static int eval_map_show(struct seq_file *m, void *v)
5975 {
5976 	union trace_eval_map_item *ptr = v;
5977 
5978 	seq_printf(m, "%s %ld (%s)\n",
5979 		   ptr->map.eval_string, ptr->map.eval_value,
5980 		   ptr->map.system);
5981 
5982 	return 0;
5983 }
5984 
5985 static const struct seq_operations tracing_eval_map_seq_ops = {
5986 	.start		= eval_map_start,
5987 	.next		= eval_map_next,
5988 	.stop		= eval_map_stop,
5989 	.show		= eval_map_show,
5990 };
5991 
5992 static int tracing_eval_map_open(struct inode *inode, struct file *filp)
5993 {
5994 	int ret;
5995 
5996 	ret = tracing_check_open_get_tr(NULL);
5997 	if (ret)
5998 		return ret;
5999 
6000 	return seq_open(filp, &tracing_eval_map_seq_ops);
6001 }
6002 
6003 static const struct file_operations tracing_eval_map_fops = {
6004 	.open		= tracing_eval_map_open,
6005 	.read		= seq_read,
6006 	.llseek		= seq_lseek,
6007 	.release	= seq_release,
6008 };
6009 
6010 static inline union trace_eval_map_item *
6011 trace_eval_jmp_to_tail(union trace_eval_map_item *ptr)
6012 {
6013 	/* Return tail of array given the head */
6014 	return ptr + ptr->head.length + 1;
6015 }
6016 
6017 static void
6018 trace_insert_eval_map_file(struct module *mod, struct trace_eval_map **start,
6019 			   int len)
6020 {
6021 	struct trace_eval_map **stop;
6022 	struct trace_eval_map **map;
6023 	union trace_eval_map_item *map_array;
6024 	union trace_eval_map_item *ptr;
6025 
6026 	stop = start + len;
6027 
6028 	/*
6029 	 * The trace_eval_maps contains the map plus a head and tail item,
6030 	 * where the head holds the module and length of array, and the
6031 	 * tail holds a pointer to the next list.
6032 	 */
6033 	map_array = kmalloc_array(len + 2, sizeof(*map_array), GFP_KERNEL);
6034 	if (!map_array) {
6035 		pr_warn("Unable to allocate trace eval mapping\n");
6036 		return;
6037 	}
6038 
6039 	mutex_lock(&trace_eval_mutex);
6040 
6041 	if (!trace_eval_maps)
6042 		trace_eval_maps = map_array;
6043 	else {
6044 		ptr = trace_eval_maps;
6045 		for (;;) {
6046 			ptr = trace_eval_jmp_to_tail(ptr);
6047 			if (!ptr->tail.next)
6048 				break;
6049 			ptr = ptr->tail.next;
6050 
6051 		}
6052 		ptr->tail.next = map_array;
6053 	}
6054 	map_array->head.mod = mod;
6055 	map_array->head.length = len;
6056 	map_array++;
6057 
6058 	for (map = start; (unsigned long)map < (unsigned long)stop; map++) {
6059 		map_array->map = **map;
6060 		map_array++;
6061 	}
6062 	memset(map_array, 0, sizeof(*map_array));
6063 
6064 	mutex_unlock(&trace_eval_mutex);
6065 }
6066 
6067 static void trace_create_eval_file(struct dentry *d_tracer)
6068 {
6069 	trace_create_file("eval_map", 0444, d_tracer,
6070 			  NULL, &tracing_eval_map_fops);
6071 }
6072 
6073 #else /* CONFIG_TRACE_EVAL_MAP_FILE */
6074 static inline void trace_create_eval_file(struct dentry *d_tracer) { }
6075 static inline void trace_insert_eval_map_file(struct module *mod,
6076 			      struct trace_eval_map **start, int len) { }
6077 #endif /* !CONFIG_TRACE_EVAL_MAP_FILE */
6078 
6079 static void trace_insert_eval_map(struct module *mod,
6080 				  struct trace_eval_map **start, int len)
6081 {
6082 	struct trace_eval_map **map;
6083 
6084 	if (len <= 0)
6085 		return;
6086 
6087 	map = start;
6088 
6089 	trace_event_eval_update(map, len);
6090 
6091 	trace_insert_eval_map_file(mod, start, len);
6092 }
6093 
6094 static ssize_t
6095 tracing_set_trace_read(struct file *filp, char __user *ubuf,
6096 		       size_t cnt, loff_t *ppos)
6097 {
6098 	struct trace_array *tr = filp->private_data;
6099 	char buf[MAX_TRACER_SIZE+2];
6100 	int r;
6101 
6102 	mutex_lock(&trace_types_lock);
6103 	r = sprintf(buf, "%s\n", tr->current_trace->name);
6104 	mutex_unlock(&trace_types_lock);
6105 
6106 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6107 }
6108 
6109 int tracer_init(struct tracer *t, struct trace_array *tr)
6110 {
6111 	tracing_reset_online_cpus(&tr->array_buffer);
6112 	return t->init(tr);
6113 }
6114 
6115 static void set_buffer_entries(struct array_buffer *buf, unsigned long val)
6116 {
6117 	int cpu;
6118 
6119 	for_each_tracing_cpu(cpu)
6120 		per_cpu_ptr(buf->data, cpu)->entries = val;
6121 }
6122 
6123 #ifdef CONFIG_TRACER_MAX_TRACE
6124 /* resize @tr's buffer to the size of @size_tr's entries */
6125 static int resize_buffer_duplicate_size(struct array_buffer *trace_buf,
6126 					struct array_buffer *size_buf, int cpu_id)
6127 {
6128 	int cpu, ret = 0;
6129 
6130 	if (cpu_id == RING_BUFFER_ALL_CPUS) {
6131 		for_each_tracing_cpu(cpu) {
6132 			ret = ring_buffer_resize(trace_buf->buffer,
6133 				 per_cpu_ptr(size_buf->data, cpu)->entries, cpu);
6134 			if (ret < 0)
6135 				break;
6136 			per_cpu_ptr(trace_buf->data, cpu)->entries =
6137 				per_cpu_ptr(size_buf->data, cpu)->entries;
6138 		}
6139 	} else {
6140 		ret = ring_buffer_resize(trace_buf->buffer,
6141 				 per_cpu_ptr(size_buf->data, cpu_id)->entries, cpu_id);
6142 		if (ret == 0)
6143 			per_cpu_ptr(trace_buf->data, cpu_id)->entries =
6144 				per_cpu_ptr(size_buf->data, cpu_id)->entries;
6145 	}
6146 
6147 	return ret;
6148 }
6149 #endif /* CONFIG_TRACER_MAX_TRACE */
6150 
6151 static int __tracing_resize_ring_buffer(struct trace_array *tr,
6152 					unsigned long size, int cpu)
6153 {
6154 	int ret;
6155 
6156 	/*
6157 	 * If kernel or user changes the size of the ring buffer
6158 	 * we use the size that was given, and we can forget about
6159 	 * expanding it later.
6160 	 */
6161 	ring_buffer_expanded = true;
6162 
6163 	/* May be called before buffers are initialized */
6164 	if (!tr->array_buffer.buffer)
6165 		return 0;
6166 
6167 	ret = ring_buffer_resize(tr->array_buffer.buffer, size, cpu);
6168 	if (ret < 0)
6169 		return ret;
6170 
6171 #ifdef CONFIG_TRACER_MAX_TRACE
6172 	if (!(tr->flags & TRACE_ARRAY_FL_GLOBAL) ||
6173 	    !tr->current_trace->use_max_tr)
6174 		goto out;
6175 
6176 	ret = ring_buffer_resize(tr->max_buffer.buffer, size, cpu);
6177 	if (ret < 0) {
6178 		int r = resize_buffer_duplicate_size(&tr->array_buffer,
6179 						     &tr->array_buffer, cpu);
6180 		if (r < 0) {
6181 			/*
6182 			 * AARGH! We are left with different
6183 			 * size max buffer!!!!
6184 			 * The max buffer is our "snapshot" buffer.
6185 			 * When a tracer needs a snapshot (one of the
6186 			 * latency tracers), it swaps the max buffer
6187 			 * with the saved snap shot. We succeeded to
6188 			 * update the size of the main buffer, but failed to
6189 			 * update the size of the max buffer. But when we tried
6190 			 * to reset the main buffer to the original size, we
6191 			 * failed there too. This is very unlikely to
6192 			 * happen, but if it does, warn and kill all
6193 			 * tracing.
6194 			 */
6195 			WARN_ON(1);
6196 			tracing_disabled = 1;
6197 		}
6198 		return ret;
6199 	}
6200 
6201 	if (cpu == RING_BUFFER_ALL_CPUS)
6202 		set_buffer_entries(&tr->max_buffer, size);
6203 	else
6204 		per_cpu_ptr(tr->max_buffer.data, cpu)->entries = size;
6205 
6206  out:
6207 #endif /* CONFIG_TRACER_MAX_TRACE */
6208 
6209 	if (cpu == RING_BUFFER_ALL_CPUS)
6210 		set_buffer_entries(&tr->array_buffer, size);
6211 	else
6212 		per_cpu_ptr(tr->array_buffer.data, cpu)->entries = size;
6213 
6214 	return ret;
6215 }
6216 
6217 ssize_t tracing_resize_ring_buffer(struct trace_array *tr,
6218 				  unsigned long size, int cpu_id)
6219 {
6220 	int ret;
6221 
6222 	mutex_lock(&trace_types_lock);
6223 
6224 	if (cpu_id != RING_BUFFER_ALL_CPUS) {
6225 		/* make sure, this cpu is enabled in the mask */
6226 		if (!cpumask_test_cpu(cpu_id, tracing_buffer_mask)) {
6227 			ret = -EINVAL;
6228 			goto out;
6229 		}
6230 	}
6231 
6232 	ret = __tracing_resize_ring_buffer(tr, size, cpu_id);
6233 	if (ret < 0)
6234 		ret = -ENOMEM;
6235 
6236 out:
6237 	mutex_unlock(&trace_types_lock);
6238 
6239 	return ret;
6240 }
6241 
6242 
6243 /**
6244  * tracing_update_buffers - used by tracing facility to expand ring buffers
6245  *
6246  * To save on memory when the tracing is never used on a system with it
6247  * configured in. The ring buffers are set to a minimum size. But once
6248  * a user starts to use the tracing facility, then they need to grow
6249  * to their default size.
6250  *
6251  * This function is to be called when a tracer is about to be used.
6252  */
6253 int tracing_update_buffers(void)
6254 {
6255 	int ret = 0;
6256 
6257 	mutex_lock(&trace_types_lock);
6258 	if (!ring_buffer_expanded)
6259 		ret = __tracing_resize_ring_buffer(&global_trace, trace_buf_size,
6260 						RING_BUFFER_ALL_CPUS);
6261 	mutex_unlock(&trace_types_lock);
6262 
6263 	return ret;
6264 }
6265 
6266 struct trace_option_dentry;
6267 
6268 static void
6269 create_trace_option_files(struct trace_array *tr, struct tracer *tracer);
6270 
6271 /*
6272  * Used to clear out the tracer before deletion of an instance.
6273  * Must have trace_types_lock held.
6274  */
6275 static void tracing_set_nop(struct trace_array *tr)
6276 {
6277 	if (tr->current_trace == &nop_trace)
6278 		return;
6279 
6280 	tr->current_trace->enabled--;
6281 
6282 	if (tr->current_trace->reset)
6283 		tr->current_trace->reset(tr);
6284 
6285 	tr->current_trace = &nop_trace;
6286 }
6287 
6288 static void add_tracer_options(struct trace_array *tr, struct tracer *t)
6289 {
6290 	/* Only enable if the directory has been created already. */
6291 	if (!tr->dir)
6292 		return;
6293 
6294 	create_trace_option_files(tr, t);
6295 }
6296 
6297 int tracing_set_tracer(struct trace_array *tr, const char *buf)
6298 {
6299 	struct tracer *t;
6300 #ifdef CONFIG_TRACER_MAX_TRACE
6301 	bool had_max_tr;
6302 #endif
6303 	int ret = 0;
6304 
6305 	mutex_lock(&trace_types_lock);
6306 
6307 	if (!ring_buffer_expanded) {
6308 		ret = __tracing_resize_ring_buffer(tr, trace_buf_size,
6309 						RING_BUFFER_ALL_CPUS);
6310 		if (ret < 0)
6311 			goto out;
6312 		ret = 0;
6313 	}
6314 
6315 	for (t = trace_types; t; t = t->next) {
6316 		if (strcmp(t->name, buf) == 0)
6317 			break;
6318 	}
6319 	if (!t) {
6320 		ret = -EINVAL;
6321 		goto out;
6322 	}
6323 	if (t == tr->current_trace)
6324 		goto out;
6325 
6326 #ifdef CONFIG_TRACER_SNAPSHOT
6327 	if (t->use_max_tr) {
6328 		arch_spin_lock(&tr->max_lock);
6329 		if (tr->cond_snapshot)
6330 			ret = -EBUSY;
6331 		arch_spin_unlock(&tr->max_lock);
6332 		if (ret)
6333 			goto out;
6334 	}
6335 #endif
6336 	/* Some tracers won't work on kernel command line */
6337 	if (system_state < SYSTEM_RUNNING && t->noboot) {
6338 		pr_warn("Tracer '%s' is not allowed on command line, ignored\n",
6339 			t->name);
6340 		goto out;
6341 	}
6342 
6343 	/* Some tracers are only allowed for the top level buffer */
6344 	if (!trace_ok_for_array(t, tr)) {
6345 		ret = -EINVAL;
6346 		goto out;
6347 	}
6348 
6349 	/* If trace pipe files are being read, we can't change the tracer */
6350 	if (tr->trace_ref) {
6351 		ret = -EBUSY;
6352 		goto out;
6353 	}
6354 
6355 	trace_branch_disable();
6356 
6357 	tr->current_trace->enabled--;
6358 
6359 	if (tr->current_trace->reset)
6360 		tr->current_trace->reset(tr);
6361 
6362 	/* Current trace needs to be nop_trace before synchronize_rcu */
6363 	tr->current_trace = &nop_trace;
6364 
6365 #ifdef CONFIG_TRACER_MAX_TRACE
6366 	had_max_tr = tr->allocated_snapshot;
6367 
6368 	if (had_max_tr && !t->use_max_tr) {
6369 		/*
6370 		 * We need to make sure that the update_max_tr sees that
6371 		 * current_trace changed to nop_trace to keep it from
6372 		 * swapping the buffers after we resize it.
6373 		 * The update_max_tr is called from interrupts disabled
6374 		 * so a synchronized_sched() is sufficient.
6375 		 */
6376 		synchronize_rcu();
6377 		free_snapshot(tr);
6378 	}
6379 #endif
6380 
6381 #ifdef CONFIG_TRACER_MAX_TRACE
6382 	if (t->use_max_tr && !had_max_tr) {
6383 		ret = tracing_alloc_snapshot_instance(tr);
6384 		if (ret < 0)
6385 			goto out;
6386 	}
6387 #endif
6388 
6389 	if (t->init) {
6390 		ret = tracer_init(t, tr);
6391 		if (ret)
6392 			goto out;
6393 	}
6394 
6395 	tr->current_trace = t;
6396 	tr->current_trace->enabled++;
6397 	trace_branch_enable(tr);
6398  out:
6399 	mutex_unlock(&trace_types_lock);
6400 
6401 	return ret;
6402 }
6403 
6404 static ssize_t
6405 tracing_set_trace_write(struct file *filp, const char __user *ubuf,
6406 			size_t cnt, loff_t *ppos)
6407 {
6408 	struct trace_array *tr = filp->private_data;
6409 	char buf[MAX_TRACER_SIZE+1];
6410 	int i;
6411 	size_t ret;
6412 	int err;
6413 
6414 	ret = cnt;
6415 
6416 	if (cnt > MAX_TRACER_SIZE)
6417 		cnt = MAX_TRACER_SIZE;
6418 
6419 	if (copy_from_user(buf, ubuf, cnt))
6420 		return -EFAULT;
6421 
6422 	buf[cnt] = 0;
6423 
6424 	/* strip ending whitespace. */
6425 	for (i = cnt - 1; i > 0 && isspace(buf[i]); i--)
6426 		buf[i] = 0;
6427 
6428 	err = tracing_set_tracer(tr, buf);
6429 	if (err)
6430 		return err;
6431 
6432 	*ppos += ret;
6433 
6434 	return ret;
6435 }
6436 
6437 static ssize_t
6438 tracing_nsecs_read(unsigned long *ptr, char __user *ubuf,
6439 		   size_t cnt, loff_t *ppos)
6440 {
6441 	char buf[64];
6442 	int r;
6443 
6444 	r = snprintf(buf, sizeof(buf), "%ld\n",
6445 		     *ptr == (unsigned long)-1 ? -1 : nsecs_to_usecs(*ptr));
6446 	if (r > sizeof(buf))
6447 		r = sizeof(buf);
6448 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6449 }
6450 
6451 static ssize_t
6452 tracing_nsecs_write(unsigned long *ptr, const char __user *ubuf,
6453 		    size_t cnt, loff_t *ppos)
6454 {
6455 	unsigned long val;
6456 	int ret;
6457 
6458 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
6459 	if (ret)
6460 		return ret;
6461 
6462 	*ptr = val * 1000;
6463 
6464 	return cnt;
6465 }
6466 
6467 static ssize_t
6468 tracing_thresh_read(struct file *filp, char __user *ubuf,
6469 		    size_t cnt, loff_t *ppos)
6470 {
6471 	return tracing_nsecs_read(&tracing_thresh, ubuf, cnt, ppos);
6472 }
6473 
6474 static ssize_t
6475 tracing_thresh_write(struct file *filp, const char __user *ubuf,
6476 		     size_t cnt, loff_t *ppos)
6477 {
6478 	struct trace_array *tr = filp->private_data;
6479 	int ret;
6480 
6481 	mutex_lock(&trace_types_lock);
6482 	ret = tracing_nsecs_write(&tracing_thresh, ubuf, cnt, ppos);
6483 	if (ret < 0)
6484 		goto out;
6485 
6486 	if (tr->current_trace->update_thresh) {
6487 		ret = tr->current_trace->update_thresh(tr);
6488 		if (ret < 0)
6489 			goto out;
6490 	}
6491 
6492 	ret = cnt;
6493 out:
6494 	mutex_unlock(&trace_types_lock);
6495 
6496 	return ret;
6497 }
6498 
6499 #if defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)
6500 
6501 static ssize_t
6502 tracing_max_lat_read(struct file *filp, char __user *ubuf,
6503 		     size_t cnt, loff_t *ppos)
6504 {
6505 	return tracing_nsecs_read(filp->private_data, ubuf, cnt, ppos);
6506 }
6507 
6508 static ssize_t
6509 tracing_max_lat_write(struct file *filp, const char __user *ubuf,
6510 		      size_t cnt, loff_t *ppos)
6511 {
6512 	return tracing_nsecs_write(filp->private_data, ubuf, cnt, ppos);
6513 }
6514 
6515 #endif
6516 
6517 static int tracing_open_pipe(struct inode *inode, struct file *filp)
6518 {
6519 	struct trace_array *tr = inode->i_private;
6520 	struct trace_iterator *iter;
6521 	int ret;
6522 
6523 	ret = tracing_check_open_get_tr(tr);
6524 	if (ret)
6525 		return ret;
6526 
6527 	mutex_lock(&trace_types_lock);
6528 
6529 	/* create a buffer to store the information to pass to userspace */
6530 	iter = kzalloc(sizeof(*iter), GFP_KERNEL);
6531 	if (!iter) {
6532 		ret = -ENOMEM;
6533 		__trace_array_put(tr);
6534 		goto out;
6535 	}
6536 
6537 	trace_seq_init(&iter->seq);
6538 	iter->trace = tr->current_trace;
6539 
6540 	if (!alloc_cpumask_var(&iter->started, GFP_KERNEL)) {
6541 		ret = -ENOMEM;
6542 		goto fail;
6543 	}
6544 
6545 	/* trace pipe does not show start of buffer */
6546 	cpumask_setall(iter->started);
6547 
6548 	if (tr->trace_flags & TRACE_ITER_LATENCY_FMT)
6549 		iter->iter_flags |= TRACE_FILE_LAT_FMT;
6550 
6551 	/* Output in nanoseconds only if we are using a clock in nanoseconds. */
6552 	if (trace_clocks[tr->clock_id].in_ns)
6553 		iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
6554 
6555 	iter->tr = tr;
6556 	iter->array_buffer = &tr->array_buffer;
6557 	iter->cpu_file = tracing_get_cpu(inode);
6558 	mutex_init(&iter->mutex);
6559 	filp->private_data = iter;
6560 
6561 	if (iter->trace->pipe_open)
6562 		iter->trace->pipe_open(iter);
6563 
6564 	nonseekable_open(inode, filp);
6565 
6566 	tr->trace_ref++;
6567 out:
6568 	mutex_unlock(&trace_types_lock);
6569 	return ret;
6570 
6571 fail:
6572 	kfree(iter);
6573 	__trace_array_put(tr);
6574 	mutex_unlock(&trace_types_lock);
6575 	return ret;
6576 }
6577 
6578 static int tracing_release_pipe(struct inode *inode, struct file *file)
6579 {
6580 	struct trace_iterator *iter = file->private_data;
6581 	struct trace_array *tr = inode->i_private;
6582 
6583 	mutex_lock(&trace_types_lock);
6584 
6585 	tr->trace_ref--;
6586 
6587 	if (iter->trace->pipe_close)
6588 		iter->trace->pipe_close(iter);
6589 
6590 	mutex_unlock(&trace_types_lock);
6591 
6592 	free_cpumask_var(iter->started);
6593 	mutex_destroy(&iter->mutex);
6594 	kfree(iter);
6595 
6596 	trace_array_put(tr);
6597 
6598 	return 0;
6599 }
6600 
6601 static __poll_t
6602 trace_poll(struct trace_iterator *iter, struct file *filp, poll_table *poll_table)
6603 {
6604 	struct trace_array *tr = iter->tr;
6605 
6606 	/* Iterators are static, they should be filled or empty */
6607 	if (trace_buffer_iter(iter, iter->cpu_file))
6608 		return EPOLLIN | EPOLLRDNORM;
6609 
6610 	if (tr->trace_flags & TRACE_ITER_BLOCK)
6611 		/*
6612 		 * Always select as readable when in blocking mode
6613 		 */
6614 		return EPOLLIN | EPOLLRDNORM;
6615 	else
6616 		return ring_buffer_poll_wait(iter->array_buffer->buffer, iter->cpu_file,
6617 					     filp, poll_table);
6618 }
6619 
6620 static __poll_t
6621 tracing_poll_pipe(struct file *filp, poll_table *poll_table)
6622 {
6623 	struct trace_iterator *iter = filp->private_data;
6624 
6625 	return trace_poll(iter, filp, poll_table);
6626 }
6627 
6628 /* Must be called with iter->mutex held. */
6629 static int tracing_wait_pipe(struct file *filp)
6630 {
6631 	struct trace_iterator *iter = filp->private_data;
6632 	int ret;
6633 
6634 	while (trace_empty(iter)) {
6635 
6636 		if ((filp->f_flags & O_NONBLOCK)) {
6637 			return -EAGAIN;
6638 		}
6639 
6640 		/*
6641 		 * We block until we read something and tracing is disabled.
6642 		 * We still block if tracing is disabled, but we have never
6643 		 * read anything. This allows a user to cat this file, and
6644 		 * then enable tracing. But after we have read something,
6645 		 * we give an EOF when tracing is again disabled.
6646 		 *
6647 		 * iter->pos will be 0 if we haven't read anything.
6648 		 */
6649 		if (!tracer_tracing_is_on(iter->tr) && iter->pos)
6650 			break;
6651 
6652 		mutex_unlock(&iter->mutex);
6653 
6654 		ret = wait_on_pipe(iter, 0);
6655 
6656 		mutex_lock(&iter->mutex);
6657 
6658 		if (ret)
6659 			return ret;
6660 	}
6661 
6662 	return 1;
6663 }
6664 
6665 /*
6666  * Consumer reader.
6667  */
6668 static ssize_t
6669 tracing_read_pipe(struct file *filp, char __user *ubuf,
6670 		  size_t cnt, loff_t *ppos)
6671 {
6672 	struct trace_iterator *iter = filp->private_data;
6673 	ssize_t sret;
6674 
6675 	/*
6676 	 * Avoid more than one consumer on a single file descriptor
6677 	 * This is just a matter of traces coherency, the ring buffer itself
6678 	 * is protected.
6679 	 */
6680 	mutex_lock(&iter->mutex);
6681 
6682 	/* return any leftover data */
6683 	sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
6684 	if (sret != -EBUSY)
6685 		goto out;
6686 
6687 	trace_seq_init(&iter->seq);
6688 
6689 	if (iter->trace->read) {
6690 		sret = iter->trace->read(iter, filp, ubuf, cnt, ppos);
6691 		if (sret)
6692 			goto out;
6693 	}
6694 
6695 waitagain:
6696 	sret = tracing_wait_pipe(filp);
6697 	if (sret <= 0)
6698 		goto out;
6699 
6700 	/* stop when tracing is finished */
6701 	if (trace_empty(iter)) {
6702 		sret = 0;
6703 		goto out;
6704 	}
6705 
6706 	if (cnt >= PAGE_SIZE)
6707 		cnt = PAGE_SIZE - 1;
6708 
6709 	/* reset all but tr, trace, and overruns */
6710 	memset(&iter->seq, 0,
6711 	       sizeof(struct trace_iterator) -
6712 	       offsetof(struct trace_iterator, seq));
6713 	cpumask_clear(iter->started);
6714 	trace_seq_init(&iter->seq);
6715 	iter->pos = -1;
6716 
6717 	trace_event_read_lock();
6718 	trace_access_lock(iter->cpu_file);
6719 	while (trace_find_next_entry_inc(iter) != NULL) {
6720 		enum print_line_t ret;
6721 		int save_len = iter->seq.seq.len;
6722 
6723 		ret = print_trace_line(iter);
6724 		if (ret == TRACE_TYPE_PARTIAL_LINE) {
6725 			/* don't print partial lines */
6726 			iter->seq.seq.len = save_len;
6727 			break;
6728 		}
6729 		if (ret != TRACE_TYPE_NO_CONSUME)
6730 			trace_consume(iter);
6731 
6732 		if (trace_seq_used(&iter->seq) >= cnt)
6733 			break;
6734 
6735 		/*
6736 		 * Setting the full flag means we reached the trace_seq buffer
6737 		 * size and we should leave by partial output condition above.
6738 		 * One of the trace_seq_* functions is not used properly.
6739 		 */
6740 		WARN_ONCE(iter->seq.full, "full flag set for trace type %d",
6741 			  iter->ent->type);
6742 	}
6743 	trace_access_unlock(iter->cpu_file);
6744 	trace_event_read_unlock();
6745 
6746 	/* Now copy what we have to the user */
6747 	sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
6748 	if (iter->seq.seq.readpos >= trace_seq_used(&iter->seq))
6749 		trace_seq_init(&iter->seq);
6750 
6751 	/*
6752 	 * If there was nothing to send to user, in spite of consuming trace
6753 	 * entries, go back to wait for more entries.
6754 	 */
6755 	if (sret == -EBUSY)
6756 		goto waitagain;
6757 
6758 out:
6759 	mutex_unlock(&iter->mutex);
6760 
6761 	return sret;
6762 }
6763 
6764 static void tracing_spd_release_pipe(struct splice_pipe_desc *spd,
6765 				     unsigned int idx)
6766 {
6767 	__free_page(spd->pages[idx]);
6768 }
6769 
6770 static size_t
6771 tracing_fill_pipe_page(size_t rem, struct trace_iterator *iter)
6772 {
6773 	size_t count;
6774 	int save_len;
6775 	int ret;
6776 
6777 	/* Seq buffer is page-sized, exactly what we need. */
6778 	for (;;) {
6779 		save_len = iter->seq.seq.len;
6780 		ret = print_trace_line(iter);
6781 
6782 		if (trace_seq_has_overflowed(&iter->seq)) {
6783 			iter->seq.seq.len = save_len;
6784 			break;
6785 		}
6786 
6787 		/*
6788 		 * This should not be hit, because it should only
6789 		 * be set if the iter->seq overflowed. But check it
6790 		 * anyway to be safe.
6791 		 */
6792 		if (ret == TRACE_TYPE_PARTIAL_LINE) {
6793 			iter->seq.seq.len = save_len;
6794 			break;
6795 		}
6796 
6797 		count = trace_seq_used(&iter->seq) - save_len;
6798 		if (rem < count) {
6799 			rem = 0;
6800 			iter->seq.seq.len = save_len;
6801 			break;
6802 		}
6803 
6804 		if (ret != TRACE_TYPE_NO_CONSUME)
6805 			trace_consume(iter);
6806 		rem -= count;
6807 		if (!trace_find_next_entry_inc(iter))	{
6808 			rem = 0;
6809 			iter->ent = NULL;
6810 			break;
6811 		}
6812 	}
6813 
6814 	return rem;
6815 }
6816 
6817 static ssize_t tracing_splice_read_pipe(struct file *filp,
6818 					loff_t *ppos,
6819 					struct pipe_inode_info *pipe,
6820 					size_t len,
6821 					unsigned int flags)
6822 {
6823 	struct page *pages_def[PIPE_DEF_BUFFERS];
6824 	struct partial_page partial_def[PIPE_DEF_BUFFERS];
6825 	struct trace_iterator *iter = filp->private_data;
6826 	struct splice_pipe_desc spd = {
6827 		.pages		= pages_def,
6828 		.partial	= partial_def,
6829 		.nr_pages	= 0, /* This gets updated below. */
6830 		.nr_pages_max	= PIPE_DEF_BUFFERS,
6831 		.ops		= &default_pipe_buf_ops,
6832 		.spd_release	= tracing_spd_release_pipe,
6833 	};
6834 	ssize_t ret;
6835 	size_t rem;
6836 	unsigned int i;
6837 
6838 	if (splice_grow_spd(pipe, &spd))
6839 		return -ENOMEM;
6840 
6841 	mutex_lock(&iter->mutex);
6842 
6843 	if (iter->trace->splice_read) {
6844 		ret = iter->trace->splice_read(iter, filp,
6845 					       ppos, pipe, len, flags);
6846 		if (ret)
6847 			goto out_err;
6848 	}
6849 
6850 	ret = tracing_wait_pipe(filp);
6851 	if (ret <= 0)
6852 		goto out_err;
6853 
6854 	if (!iter->ent && !trace_find_next_entry_inc(iter)) {
6855 		ret = -EFAULT;
6856 		goto out_err;
6857 	}
6858 
6859 	trace_event_read_lock();
6860 	trace_access_lock(iter->cpu_file);
6861 
6862 	/* Fill as many pages as possible. */
6863 	for (i = 0, rem = len; i < spd.nr_pages_max && rem; i++) {
6864 		spd.pages[i] = alloc_page(GFP_KERNEL);
6865 		if (!spd.pages[i])
6866 			break;
6867 
6868 		rem = tracing_fill_pipe_page(rem, iter);
6869 
6870 		/* Copy the data into the page, so we can start over. */
6871 		ret = trace_seq_to_buffer(&iter->seq,
6872 					  page_address(spd.pages[i]),
6873 					  trace_seq_used(&iter->seq));
6874 		if (ret < 0) {
6875 			__free_page(spd.pages[i]);
6876 			break;
6877 		}
6878 		spd.partial[i].offset = 0;
6879 		spd.partial[i].len = trace_seq_used(&iter->seq);
6880 
6881 		trace_seq_init(&iter->seq);
6882 	}
6883 
6884 	trace_access_unlock(iter->cpu_file);
6885 	trace_event_read_unlock();
6886 	mutex_unlock(&iter->mutex);
6887 
6888 	spd.nr_pages = i;
6889 
6890 	if (i)
6891 		ret = splice_to_pipe(pipe, &spd);
6892 	else
6893 		ret = 0;
6894 out:
6895 	splice_shrink_spd(&spd);
6896 	return ret;
6897 
6898 out_err:
6899 	mutex_unlock(&iter->mutex);
6900 	goto out;
6901 }
6902 
6903 static ssize_t
6904 tracing_entries_read(struct file *filp, char __user *ubuf,
6905 		     size_t cnt, loff_t *ppos)
6906 {
6907 	struct inode *inode = file_inode(filp);
6908 	struct trace_array *tr = inode->i_private;
6909 	int cpu = tracing_get_cpu(inode);
6910 	char buf[64];
6911 	int r = 0;
6912 	ssize_t ret;
6913 
6914 	mutex_lock(&trace_types_lock);
6915 
6916 	if (cpu == RING_BUFFER_ALL_CPUS) {
6917 		int cpu, buf_size_same;
6918 		unsigned long size;
6919 
6920 		size = 0;
6921 		buf_size_same = 1;
6922 		/* check if all cpu sizes are same */
6923 		for_each_tracing_cpu(cpu) {
6924 			/* fill in the size from first enabled cpu */
6925 			if (size == 0)
6926 				size = per_cpu_ptr(tr->array_buffer.data, cpu)->entries;
6927 			if (size != per_cpu_ptr(tr->array_buffer.data, cpu)->entries) {
6928 				buf_size_same = 0;
6929 				break;
6930 			}
6931 		}
6932 
6933 		if (buf_size_same) {
6934 			if (!ring_buffer_expanded)
6935 				r = sprintf(buf, "%lu (expanded: %lu)\n",
6936 					    size >> 10,
6937 					    trace_buf_size >> 10);
6938 			else
6939 				r = sprintf(buf, "%lu\n", size >> 10);
6940 		} else
6941 			r = sprintf(buf, "X\n");
6942 	} else
6943 		r = sprintf(buf, "%lu\n", per_cpu_ptr(tr->array_buffer.data, cpu)->entries >> 10);
6944 
6945 	mutex_unlock(&trace_types_lock);
6946 
6947 	ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6948 	return ret;
6949 }
6950 
6951 static ssize_t
6952 tracing_entries_write(struct file *filp, const char __user *ubuf,
6953 		      size_t cnt, loff_t *ppos)
6954 {
6955 	struct inode *inode = file_inode(filp);
6956 	struct trace_array *tr = inode->i_private;
6957 	unsigned long val;
6958 	int ret;
6959 
6960 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
6961 	if (ret)
6962 		return ret;
6963 
6964 	/* must have at least 1 entry */
6965 	if (!val)
6966 		return -EINVAL;
6967 
6968 	/* value is in KB */
6969 	val <<= 10;
6970 	ret = tracing_resize_ring_buffer(tr, val, tracing_get_cpu(inode));
6971 	if (ret < 0)
6972 		return ret;
6973 
6974 	*ppos += cnt;
6975 
6976 	return cnt;
6977 }
6978 
6979 static ssize_t
6980 tracing_total_entries_read(struct file *filp, char __user *ubuf,
6981 				size_t cnt, loff_t *ppos)
6982 {
6983 	struct trace_array *tr = filp->private_data;
6984 	char buf[64];
6985 	int r, cpu;
6986 	unsigned long size = 0, expanded_size = 0;
6987 
6988 	mutex_lock(&trace_types_lock);
6989 	for_each_tracing_cpu(cpu) {
6990 		size += per_cpu_ptr(tr->array_buffer.data, cpu)->entries >> 10;
6991 		if (!ring_buffer_expanded)
6992 			expanded_size += trace_buf_size >> 10;
6993 	}
6994 	if (ring_buffer_expanded)
6995 		r = sprintf(buf, "%lu\n", size);
6996 	else
6997 		r = sprintf(buf, "%lu (expanded: %lu)\n", size, expanded_size);
6998 	mutex_unlock(&trace_types_lock);
6999 
7000 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
7001 }
7002 
7003 static ssize_t
7004 tracing_free_buffer_write(struct file *filp, const char __user *ubuf,
7005 			  size_t cnt, loff_t *ppos)
7006 {
7007 	/*
7008 	 * There is no need to read what the user has written, this function
7009 	 * is just to make sure that there is no error when "echo" is used
7010 	 */
7011 
7012 	*ppos += cnt;
7013 
7014 	return cnt;
7015 }
7016 
7017 static int
7018 tracing_free_buffer_release(struct inode *inode, struct file *filp)
7019 {
7020 	struct trace_array *tr = inode->i_private;
7021 
7022 	/* disable tracing ? */
7023 	if (tr->trace_flags & TRACE_ITER_STOP_ON_FREE)
7024 		tracer_tracing_off(tr);
7025 	/* resize the ring buffer to 0 */
7026 	tracing_resize_ring_buffer(tr, 0, RING_BUFFER_ALL_CPUS);
7027 
7028 	trace_array_put(tr);
7029 
7030 	return 0;
7031 }
7032 
7033 static ssize_t
7034 tracing_mark_write(struct file *filp, const char __user *ubuf,
7035 					size_t cnt, loff_t *fpos)
7036 {
7037 	struct trace_array *tr = filp->private_data;
7038 	struct ring_buffer_event *event;
7039 	enum event_trigger_type tt = ETT_NONE;
7040 	struct trace_buffer *buffer;
7041 	struct print_entry *entry;
7042 	ssize_t written;
7043 	int size;
7044 	int len;
7045 
7046 /* Used in tracing_mark_raw_write() as well */
7047 #define FAULTED_STR "<faulted>"
7048 #define FAULTED_SIZE (sizeof(FAULTED_STR) - 1) /* '\0' is already accounted for */
7049 
7050 	if (tracing_disabled)
7051 		return -EINVAL;
7052 
7053 	if (!(tr->trace_flags & TRACE_ITER_MARKERS))
7054 		return -EINVAL;
7055 
7056 	if (cnt > TRACE_BUF_SIZE)
7057 		cnt = TRACE_BUF_SIZE;
7058 
7059 	BUILD_BUG_ON(TRACE_BUF_SIZE >= PAGE_SIZE);
7060 
7061 	size = sizeof(*entry) + cnt + 2; /* add '\0' and possible '\n' */
7062 
7063 	/* If less than "<faulted>", then make sure we can still add that */
7064 	if (cnt < FAULTED_SIZE)
7065 		size += FAULTED_SIZE - cnt;
7066 
7067 	buffer = tr->array_buffer.buffer;
7068 	event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
7069 					    tracing_gen_ctx());
7070 	if (unlikely(!event))
7071 		/* Ring buffer disabled, return as if not open for write */
7072 		return -EBADF;
7073 
7074 	entry = ring_buffer_event_data(event);
7075 	entry->ip = _THIS_IP_;
7076 
7077 	len = __copy_from_user_inatomic(&entry->buf, ubuf, cnt);
7078 	if (len) {
7079 		memcpy(&entry->buf, FAULTED_STR, FAULTED_SIZE);
7080 		cnt = FAULTED_SIZE;
7081 		written = -EFAULT;
7082 	} else
7083 		written = cnt;
7084 
7085 	if (tr->trace_marker_file && !list_empty(&tr->trace_marker_file->triggers)) {
7086 		/* do not add \n before testing triggers, but add \0 */
7087 		entry->buf[cnt] = '\0';
7088 		tt = event_triggers_call(tr->trace_marker_file, buffer, entry, event);
7089 	}
7090 
7091 	if (entry->buf[cnt - 1] != '\n') {
7092 		entry->buf[cnt] = '\n';
7093 		entry->buf[cnt + 1] = '\0';
7094 	} else
7095 		entry->buf[cnt] = '\0';
7096 
7097 	if (static_branch_unlikely(&trace_marker_exports_enabled))
7098 		ftrace_exports(event, TRACE_EXPORT_MARKER);
7099 	__buffer_unlock_commit(buffer, event);
7100 
7101 	if (tt)
7102 		event_triggers_post_call(tr->trace_marker_file, tt);
7103 
7104 	if (written > 0)
7105 		*fpos += written;
7106 
7107 	return written;
7108 }
7109 
7110 /* Limit it for now to 3K (including tag) */
7111 #define RAW_DATA_MAX_SIZE (1024*3)
7112 
7113 static ssize_t
7114 tracing_mark_raw_write(struct file *filp, const char __user *ubuf,
7115 					size_t cnt, loff_t *fpos)
7116 {
7117 	struct trace_array *tr = filp->private_data;
7118 	struct ring_buffer_event *event;
7119 	struct trace_buffer *buffer;
7120 	struct raw_data_entry *entry;
7121 	ssize_t written;
7122 	int size;
7123 	int len;
7124 
7125 #define FAULT_SIZE_ID (FAULTED_SIZE + sizeof(int))
7126 
7127 	if (tracing_disabled)
7128 		return -EINVAL;
7129 
7130 	if (!(tr->trace_flags & TRACE_ITER_MARKERS))
7131 		return -EINVAL;
7132 
7133 	/* The marker must at least have a tag id */
7134 	if (cnt < sizeof(unsigned int) || cnt > RAW_DATA_MAX_SIZE)
7135 		return -EINVAL;
7136 
7137 	if (cnt > TRACE_BUF_SIZE)
7138 		cnt = TRACE_BUF_SIZE;
7139 
7140 	BUILD_BUG_ON(TRACE_BUF_SIZE >= PAGE_SIZE);
7141 
7142 	size = sizeof(*entry) + cnt;
7143 	if (cnt < FAULT_SIZE_ID)
7144 		size += FAULT_SIZE_ID - cnt;
7145 
7146 	buffer = tr->array_buffer.buffer;
7147 	event = __trace_buffer_lock_reserve(buffer, TRACE_RAW_DATA, size,
7148 					    tracing_gen_ctx());
7149 	if (!event)
7150 		/* Ring buffer disabled, return as if not open for write */
7151 		return -EBADF;
7152 
7153 	entry = ring_buffer_event_data(event);
7154 
7155 	len = __copy_from_user_inatomic(&entry->id, ubuf, cnt);
7156 	if (len) {
7157 		entry->id = -1;
7158 		memcpy(&entry->buf, FAULTED_STR, FAULTED_SIZE);
7159 		written = -EFAULT;
7160 	} else
7161 		written = cnt;
7162 
7163 	__buffer_unlock_commit(buffer, event);
7164 
7165 	if (written > 0)
7166 		*fpos += written;
7167 
7168 	return written;
7169 }
7170 
7171 static int tracing_clock_show(struct seq_file *m, void *v)
7172 {
7173 	struct trace_array *tr = m->private;
7174 	int i;
7175 
7176 	for (i = 0; i < ARRAY_SIZE(trace_clocks); i++)
7177 		seq_printf(m,
7178 			"%s%s%s%s", i ? " " : "",
7179 			i == tr->clock_id ? "[" : "", trace_clocks[i].name,
7180 			i == tr->clock_id ? "]" : "");
7181 	seq_putc(m, '\n');
7182 
7183 	return 0;
7184 }
7185 
7186 int tracing_set_clock(struct trace_array *tr, const char *clockstr)
7187 {
7188 	int i;
7189 
7190 	for (i = 0; i < ARRAY_SIZE(trace_clocks); i++) {
7191 		if (strcmp(trace_clocks[i].name, clockstr) == 0)
7192 			break;
7193 	}
7194 	if (i == ARRAY_SIZE(trace_clocks))
7195 		return -EINVAL;
7196 
7197 	mutex_lock(&trace_types_lock);
7198 
7199 	tr->clock_id = i;
7200 
7201 	ring_buffer_set_clock(tr->array_buffer.buffer, trace_clocks[i].func);
7202 
7203 	/*
7204 	 * New clock may not be consistent with the previous clock.
7205 	 * Reset the buffer so that it doesn't have incomparable timestamps.
7206 	 */
7207 	tracing_reset_online_cpus(&tr->array_buffer);
7208 
7209 #ifdef CONFIG_TRACER_MAX_TRACE
7210 	if (tr->max_buffer.buffer)
7211 		ring_buffer_set_clock(tr->max_buffer.buffer, trace_clocks[i].func);
7212 	tracing_reset_online_cpus(&tr->max_buffer);
7213 #endif
7214 
7215 	mutex_unlock(&trace_types_lock);
7216 
7217 	return 0;
7218 }
7219 
7220 static ssize_t tracing_clock_write(struct file *filp, const char __user *ubuf,
7221 				   size_t cnt, loff_t *fpos)
7222 {
7223 	struct seq_file *m = filp->private_data;
7224 	struct trace_array *tr = m->private;
7225 	char buf[64];
7226 	const char *clockstr;
7227 	int ret;
7228 
7229 	if (cnt >= sizeof(buf))
7230 		return -EINVAL;
7231 
7232 	if (copy_from_user(buf, ubuf, cnt))
7233 		return -EFAULT;
7234 
7235 	buf[cnt] = 0;
7236 
7237 	clockstr = strstrip(buf);
7238 
7239 	ret = tracing_set_clock(tr, clockstr);
7240 	if (ret)
7241 		return ret;
7242 
7243 	*fpos += cnt;
7244 
7245 	return cnt;
7246 }
7247 
7248 static int tracing_clock_open(struct inode *inode, struct file *file)
7249 {
7250 	struct trace_array *tr = inode->i_private;
7251 	int ret;
7252 
7253 	ret = tracing_check_open_get_tr(tr);
7254 	if (ret)
7255 		return ret;
7256 
7257 	ret = single_open(file, tracing_clock_show, inode->i_private);
7258 	if (ret < 0)
7259 		trace_array_put(tr);
7260 
7261 	return ret;
7262 }
7263 
7264 static int tracing_time_stamp_mode_show(struct seq_file *m, void *v)
7265 {
7266 	struct trace_array *tr = m->private;
7267 
7268 	mutex_lock(&trace_types_lock);
7269 
7270 	if (ring_buffer_time_stamp_abs(tr->array_buffer.buffer))
7271 		seq_puts(m, "delta [absolute]\n");
7272 	else
7273 		seq_puts(m, "[delta] absolute\n");
7274 
7275 	mutex_unlock(&trace_types_lock);
7276 
7277 	return 0;
7278 }
7279 
7280 static int tracing_time_stamp_mode_open(struct inode *inode, struct file *file)
7281 {
7282 	struct trace_array *tr = inode->i_private;
7283 	int ret;
7284 
7285 	ret = tracing_check_open_get_tr(tr);
7286 	if (ret)
7287 		return ret;
7288 
7289 	ret = single_open(file, tracing_time_stamp_mode_show, inode->i_private);
7290 	if (ret < 0)
7291 		trace_array_put(tr);
7292 
7293 	return ret;
7294 }
7295 
7296 u64 tracing_event_time_stamp(struct trace_buffer *buffer, struct ring_buffer_event *rbe)
7297 {
7298 	if (rbe == this_cpu_read(trace_buffered_event))
7299 		return ring_buffer_time_stamp(buffer);
7300 
7301 	return ring_buffer_event_time_stamp(buffer, rbe);
7302 }
7303 
7304 /*
7305  * Set or disable using the per CPU trace_buffer_event when possible.
7306  */
7307 int tracing_set_filter_buffering(struct trace_array *tr, bool set)
7308 {
7309 	int ret = 0;
7310 
7311 	mutex_lock(&trace_types_lock);
7312 
7313 	if (set && tr->no_filter_buffering_ref++)
7314 		goto out;
7315 
7316 	if (!set) {
7317 		if (WARN_ON_ONCE(!tr->no_filter_buffering_ref)) {
7318 			ret = -EINVAL;
7319 			goto out;
7320 		}
7321 
7322 		--tr->no_filter_buffering_ref;
7323 	}
7324  out:
7325 	mutex_unlock(&trace_types_lock);
7326 
7327 	return ret;
7328 }
7329 
7330 struct ftrace_buffer_info {
7331 	struct trace_iterator	iter;
7332 	void			*spare;
7333 	unsigned int		spare_cpu;
7334 	unsigned int		read;
7335 };
7336 
7337 #ifdef CONFIG_TRACER_SNAPSHOT
7338 static int tracing_snapshot_open(struct inode *inode, struct file *file)
7339 {
7340 	struct trace_array *tr = inode->i_private;
7341 	struct trace_iterator *iter;
7342 	struct seq_file *m;
7343 	int ret;
7344 
7345 	ret = tracing_check_open_get_tr(tr);
7346 	if (ret)
7347 		return ret;
7348 
7349 	if (file->f_mode & FMODE_READ) {
7350 		iter = __tracing_open(inode, file, true);
7351 		if (IS_ERR(iter))
7352 			ret = PTR_ERR(iter);
7353 	} else {
7354 		/* Writes still need the seq_file to hold the private data */
7355 		ret = -ENOMEM;
7356 		m = kzalloc(sizeof(*m), GFP_KERNEL);
7357 		if (!m)
7358 			goto out;
7359 		iter = kzalloc(sizeof(*iter), GFP_KERNEL);
7360 		if (!iter) {
7361 			kfree(m);
7362 			goto out;
7363 		}
7364 		ret = 0;
7365 
7366 		iter->tr = tr;
7367 		iter->array_buffer = &tr->max_buffer;
7368 		iter->cpu_file = tracing_get_cpu(inode);
7369 		m->private = iter;
7370 		file->private_data = m;
7371 	}
7372 out:
7373 	if (ret < 0)
7374 		trace_array_put(tr);
7375 
7376 	return ret;
7377 }
7378 
7379 static ssize_t
7380 tracing_snapshot_write(struct file *filp, const char __user *ubuf, size_t cnt,
7381 		       loff_t *ppos)
7382 {
7383 	struct seq_file *m = filp->private_data;
7384 	struct trace_iterator *iter = m->private;
7385 	struct trace_array *tr = iter->tr;
7386 	unsigned long val;
7387 	int ret;
7388 
7389 	ret = tracing_update_buffers();
7390 	if (ret < 0)
7391 		return ret;
7392 
7393 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
7394 	if (ret)
7395 		return ret;
7396 
7397 	mutex_lock(&trace_types_lock);
7398 
7399 	if (tr->current_trace->use_max_tr) {
7400 		ret = -EBUSY;
7401 		goto out;
7402 	}
7403 
7404 	arch_spin_lock(&tr->max_lock);
7405 	if (tr->cond_snapshot)
7406 		ret = -EBUSY;
7407 	arch_spin_unlock(&tr->max_lock);
7408 	if (ret)
7409 		goto out;
7410 
7411 	switch (val) {
7412 	case 0:
7413 		if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
7414 			ret = -EINVAL;
7415 			break;
7416 		}
7417 		if (tr->allocated_snapshot)
7418 			free_snapshot(tr);
7419 		break;
7420 	case 1:
7421 /* Only allow per-cpu swap if the ring buffer supports it */
7422 #ifndef CONFIG_RING_BUFFER_ALLOW_SWAP
7423 		if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
7424 			ret = -EINVAL;
7425 			break;
7426 		}
7427 #endif
7428 		if (tr->allocated_snapshot)
7429 			ret = resize_buffer_duplicate_size(&tr->max_buffer,
7430 					&tr->array_buffer, iter->cpu_file);
7431 		else
7432 			ret = tracing_alloc_snapshot_instance(tr);
7433 		if (ret < 0)
7434 			break;
7435 		local_irq_disable();
7436 		/* Now, we're going to swap */
7437 		if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
7438 			update_max_tr(tr, current, smp_processor_id(), NULL);
7439 		else
7440 			update_max_tr_single(tr, current, iter->cpu_file);
7441 		local_irq_enable();
7442 		break;
7443 	default:
7444 		if (tr->allocated_snapshot) {
7445 			if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
7446 				tracing_reset_online_cpus(&tr->max_buffer);
7447 			else
7448 				tracing_reset_cpu(&tr->max_buffer, iter->cpu_file);
7449 		}
7450 		break;
7451 	}
7452 
7453 	if (ret >= 0) {
7454 		*ppos += cnt;
7455 		ret = cnt;
7456 	}
7457 out:
7458 	mutex_unlock(&trace_types_lock);
7459 	return ret;
7460 }
7461 
7462 static int tracing_snapshot_release(struct inode *inode, struct file *file)
7463 {
7464 	struct seq_file *m = file->private_data;
7465 	int ret;
7466 
7467 	ret = tracing_release(inode, file);
7468 
7469 	if (file->f_mode & FMODE_READ)
7470 		return ret;
7471 
7472 	/* If write only, the seq_file is just a stub */
7473 	if (m)
7474 		kfree(m->private);
7475 	kfree(m);
7476 
7477 	return 0;
7478 }
7479 
7480 static int tracing_buffers_open(struct inode *inode, struct file *filp);
7481 static ssize_t tracing_buffers_read(struct file *filp, char __user *ubuf,
7482 				    size_t count, loff_t *ppos);
7483 static int tracing_buffers_release(struct inode *inode, struct file *file);
7484 static ssize_t tracing_buffers_splice_read(struct file *file, loff_t *ppos,
7485 		   struct pipe_inode_info *pipe, size_t len, unsigned int flags);
7486 
7487 static int snapshot_raw_open(struct inode *inode, struct file *filp)
7488 {
7489 	struct ftrace_buffer_info *info;
7490 	int ret;
7491 
7492 	/* The following checks for tracefs lockdown */
7493 	ret = tracing_buffers_open(inode, filp);
7494 	if (ret < 0)
7495 		return ret;
7496 
7497 	info = filp->private_data;
7498 
7499 	if (info->iter.trace->use_max_tr) {
7500 		tracing_buffers_release(inode, filp);
7501 		return -EBUSY;
7502 	}
7503 
7504 	info->iter.snapshot = true;
7505 	info->iter.array_buffer = &info->iter.tr->max_buffer;
7506 
7507 	return ret;
7508 }
7509 
7510 #endif /* CONFIG_TRACER_SNAPSHOT */
7511 
7512 
7513 static const struct file_operations tracing_thresh_fops = {
7514 	.open		= tracing_open_generic,
7515 	.read		= tracing_thresh_read,
7516 	.write		= tracing_thresh_write,
7517 	.llseek		= generic_file_llseek,
7518 };
7519 
7520 #if defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)
7521 static const struct file_operations tracing_max_lat_fops = {
7522 	.open		= tracing_open_generic,
7523 	.read		= tracing_max_lat_read,
7524 	.write		= tracing_max_lat_write,
7525 	.llseek		= generic_file_llseek,
7526 };
7527 #endif
7528 
7529 static const struct file_operations set_tracer_fops = {
7530 	.open		= tracing_open_generic,
7531 	.read		= tracing_set_trace_read,
7532 	.write		= tracing_set_trace_write,
7533 	.llseek		= generic_file_llseek,
7534 };
7535 
7536 static const struct file_operations tracing_pipe_fops = {
7537 	.open		= tracing_open_pipe,
7538 	.poll		= tracing_poll_pipe,
7539 	.read		= tracing_read_pipe,
7540 	.splice_read	= tracing_splice_read_pipe,
7541 	.release	= tracing_release_pipe,
7542 	.llseek		= no_llseek,
7543 };
7544 
7545 static const struct file_operations tracing_entries_fops = {
7546 	.open		= tracing_open_generic_tr,
7547 	.read		= tracing_entries_read,
7548 	.write		= tracing_entries_write,
7549 	.llseek		= generic_file_llseek,
7550 	.release	= tracing_release_generic_tr,
7551 };
7552 
7553 static const struct file_operations tracing_total_entries_fops = {
7554 	.open		= tracing_open_generic_tr,
7555 	.read		= tracing_total_entries_read,
7556 	.llseek		= generic_file_llseek,
7557 	.release	= tracing_release_generic_tr,
7558 };
7559 
7560 static const struct file_operations tracing_free_buffer_fops = {
7561 	.open		= tracing_open_generic_tr,
7562 	.write		= tracing_free_buffer_write,
7563 	.release	= tracing_free_buffer_release,
7564 };
7565 
7566 static const struct file_operations tracing_mark_fops = {
7567 	.open		= tracing_open_generic_tr,
7568 	.write		= tracing_mark_write,
7569 	.llseek		= generic_file_llseek,
7570 	.release	= tracing_release_generic_tr,
7571 };
7572 
7573 static const struct file_operations tracing_mark_raw_fops = {
7574 	.open		= tracing_open_generic_tr,
7575 	.write		= tracing_mark_raw_write,
7576 	.llseek		= generic_file_llseek,
7577 	.release	= tracing_release_generic_tr,
7578 };
7579 
7580 static const struct file_operations trace_clock_fops = {
7581 	.open		= tracing_clock_open,
7582 	.read		= seq_read,
7583 	.llseek		= seq_lseek,
7584 	.release	= tracing_single_release_tr,
7585 	.write		= tracing_clock_write,
7586 };
7587 
7588 static const struct file_operations trace_time_stamp_mode_fops = {
7589 	.open		= tracing_time_stamp_mode_open,
7590 	.read		= seq_read,
7591 	.llseek		= seq_lseek,
7592 	.release	= tracing_single_release_tr,
7593 };
7594 
7595 #ifdef CONFIG_TRACER_SNAPSHOT
7596 static const struct file_operations snapshot_fops = {
7597 	.open		= tracing_snapshot_open,
7598 	.read		= seq_read,
7599 	.write		= tracing_snapshot_write,
7600 	.llseek		= tracing_lseek,
7601 	.release	= tracing_snapshot_release,
7602 };
7603 
7604 static const struct file_operations snapshot_raw_fops = {
7605 	.open		= snapshot_raw_open,
7606 	.read		= tracing_buffers_read,
7607 	.release	= tracing_buffers_release,
7608 	.splice_read	= tracing_buffers_splice_read,
7609 	.llseek		= no_llseek,
7610 };
7611 
7612 #endif /* CONFIG_TRACER_SNAPSHOT */
7613 
7614 /*
7615  * trace_min_max_write - Write a u64 value to a trace_min_max_param struct
7616  * @filp: The active open file structure
7617  * @ubuf: The userspace provided buffer to read value into
7618  * @cnt: The maximum number of bytes to read
7619  * @ppos: The current "file" position
7620  *
7621  * This function implements the write interface for a struct trace_min_max_param.
7622  * The filp->private_data must point to a trace_min_max_param structure that
7623  * defines where to write the value, the min and the max acceptable values,
7624  * and a lock to protect the write.
7625  */
7626 static ssize_t
7627 trace_min_max_write(struct file *filp, const char __user *ubuf, size_t cnt, loff_t *ppos)
7628 {
7629 	struct trace_min_max_param *param = filp->private_data;
7630 	u64 val;
7631 	int err;
7632 
7633 	if (!param)
7634 		return -EFAULT;
7635 
7636 	err = kstrtoull_from_user(ubuf, cnt, 10, &val);
7637 	if (err)
7638 		return err;
7639 
7640 	if (param->lock)
7641 		mutex_lock(param->lock);
7642 
7643 	if (param->min && val < *param->min)
7644 		err = -EINVAL;
7645 
7646 	if (param->max && val > *param->max)
7647 		err = -EINVAL;
7648 
7649 	if (!err)
7650 		*param->val = val;
7651 
7652 	if (param->lock)
7653 		mutex_unlock(param->lock);
7654 
7655 	if (err)
7656 		return err;
7657 
7658 	return cnt;
7659 }
7660 
7661 /*
7662  * trace_min_max_read - Read a u64 value from a trace_min_max_param struct
7663  * @filp: The active open file structure
7664  * @ubuf: The userspace provided buffer to read value into
7665  * @cnt: The maximum number of bytes to read
7666  * @ppos: The current "file" position
7667  *
7668  * This function implements the read interface for a struct trace_min_max_param.
7669  * The filp->private_data must point to a trace_min_max_param struct with valid
7670  * data.
7671  */
7672 static ssize_t
7673 trace_min_max_read(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos)
7674 {
7675 	struct trace_min_max_param *param = filp->private_data;
7676 	char buf[U64_STR_SIZE];
7677 	int len;
7678 	u64 val;
7679 
7680 	if (!param)
7681 		return -EFAULT;
7682 
7683 	val = *param->val;
7684 
7685 	if (cnt > sizeof(buf))
7686 		cnt = sizeof(buf);
7687 
7688 	len = snprintf(buf, sizeof(buf), "%llu\n", val);
7689 
7690 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, len);
7691 }
7692 
7693 const struct file_operations trace_min_max_fops = {
7694 	.open		= tracing_open_generic,
7695 	.read		= trace_min_max_read,
7696 	.write		= trace_min_max_write,
7697 };
7698 
7699 #define TRACING_LOG_ERRS_MAX	8
7700 #define TRACING_LOG_LOC_MAX	128
7701 
7702 #define CMD_PREFIX "  Command: "
7703 
7704 struct err_info {
7705 	const char	**errs;	/* ptr to loc-specific array of err strings */
7706 	u8		type;	/* index into errs -> specific err string */
7707 	u8		pos;	/* MAX_FILTER_STR_VAL = 256 */
7708 	u64		ts;
7709 };
7710 
7711 struct tracing_log_err {
7712 	struct list_head	list;
7713 	struct err_info		info;
7714 	char			loc[TRACING_LOG_LOC_MAX]; /* err location */
7715 	char			cmd[MAX_FILTER_STR_VAL]; /* what caused err */
7716 };
7717 
7718 static DEFINE_MUTEX(tracing_err_log_lock);
7719 
7720 static struct tracing_log_err *get_tracing_log_err(struct trace_array *tr)
7721 {
7722 	struct tracing_log_err *err;
7723 
7724 	if (tr->n_err_log_entries < TRACING_LOG_ERRS_MAX) {
7725 		err = kzalloc(sizeof(*err), GFP_KERNEL);
7726 		if (!err)
7727 			err = ERR_PTR(-ENOMEM);
7728 		tr->n_err_log_entries++;
7729 
7730 		return err;
7731 	}
7732 
7733 	err = list_first_entry(&tr->err_log, struct tracing_log_err, list);
7734 	list_del(&err->list);
7735 
7736 	return err;
7737 }
7738 
7739 /**
7740  * err_pos - find the position of a string within a command for error careting
7741  * @cmd: The tracing command that caused the error
7742  * @str: The string to position the caret at within @cmd
7743  *
7744  * Finds the position of the first occurrence of @str within @cmd.  The
7745  * return value can be passed to tracing_log_err() for caret placement
7746  * within @cmd.
7747  *
7748  * Returns the index within @cmd of the first occurrence of @str or 0
7749  * if @str was not found.
7750  */
7751 unsigned int err_pos(char *cmd, const char *str)
7752 {
7753 	char *found;
7754 
7755 	if (WARN_ON(!strlen(cmd)))
7756 		return 0;
7757 
7758 	found = strstr(cmd, str);
7759 	if (found)
7760 		return found - cmd;
7761 
7762 	return 0;
7763 }
7764 
7765 /**
7766  * tracing_log_err - write an error to the tracing error log
7767  * @tr: The associated trace array for the error (NULL for top level array)
7768  * @loc: A string describing where the error occurred
7769  * @cmd: The tracing command that caused the error
7770  * @errs: The array of loc-specific static error strings
7771  * @type: The index into errs[], which produces the specific static err string
7772  * @pos: The position the caret should be placed in the cmd
7773  *
7774  * Writes an error into tracing/error_log of the form:
7775  *
7776  * <loc>: error: <text>
7777  *   Command: <cmd>
7778  *              ^
7779  *
7780  * tracing/error_log is a small log file containing the last
7781  * TRACING_LOG_ERRS_MAX errors (8).  Memory for errors isn't allocated
7782  * unless there has been a tracing error, and the error log can be
7783  * cleared and have its memory freed by writing the empty string in
7784  * truncation mode to it i.e. echo > tracing/error_log.
7785  *
7786  * NOTE: the @errs array along with the @type param are used to
7787  * produce a static error string - this string is not copied and saved
7788  * when the error is logged - only a pointer to it is saved.  See
7789  * existing callers for examples of how static strings are typically
7790  * defined for use with tracing_log_err().
7791  */
7792 void tracing_log_err(struct trace_array *tr,
7793 		     const char *loc, const char *cmd,
7794 		     const char **errs, u8 type, u8 pos)
7795 {
7796 	struct tracing_log_err *err;
7797 
7798 	if (!tr)
7799 		tr = &global_trace;
7800 
7801 	mutex_lock(&tracing_err_log_lock);
7802 	err = get_tracing_log_err(tr);
7803 	if (PTR_ERR(err) == -ENOMEM) {
7804 		mutex_unlock(&tracing_err_log_lock);
7805 		return;
7806 	}
7807 
7808 	snprintf(err->loc, TRACING_LOG_LOC_MAX, "%s: error: ", loc);
7809 	snprintf(err->cmd, MAX_FILTER_STR_VAL,"\n" CMD_PREFIX "%s\n", cmd);
7810 
7811 	err->info.errs = errs;
7812 	err->info.type = type;
7813 	err->info.pos = pos;
7814 	err->info.ts = local_clock();
7815 
7816 	list_add_tail(&err->list, &tr->err_log);
7817 	mutex_unlock(&tracing_err_log_lock);
7818 }
7819 
7820 static void clear_tracing_err_log(struct trace_array *tr)
7821 {
7822 	struct tracing_log_err *err, *next;
7823 
7824 	mutex_lock(&tracing_err_log_lock);
7825 	list_for_each_entry_safe(err, next, &tr->err_log, list) {
7826 		list_del(&err->list);
7827 		kfree(err);
7828 	}
7829 
7830 	tr->n_err_log_entries = 0;
7831 	mutex_unlock(&tracing_err_log_lock);
7832 }
7833 
7834 static void *tracing_err_log_seq_start(struct seq_file *m, loff_t *pos)
7835 {
7836 	struct trace_array *tr = m->private;
7837 
7838 	mutex_lock(&tracing_err_log_lock);
7839 
7840 	return seq_list_start(&tr->err_log, *pos);
7841 }
7842 
7843 static void *tracing_err_log_seq_next(struct seq_file *m, void *v, loff_t *pos)
7844 {
7845 	struct trace_array *tr = m->private;
7846 
7847 	return seq_list_next(v, &tr->err_log, pos);
7848 }
7849 
7850 static void tracing_err_log_seq_stop(struct seq_file *m, void *v)
7851 {
7852 	mutex_unlock(&tracing_err_log_lock);
7853 }
7854 
7855 static void tracing_err_log_show_pos(struct seq_file *m, u8 pos)
7856 {
7857 	u8 i;
7858 
7859 	for (i = 0; i < sizeof(CMD_PREFIX) - 1; i++)
7860 		seq_putc(m, ' ');
7861 	for (i = 0; i < pos; i++)
7862 		seq_putc(m, ' ');
7863 	seq_puts(m, "^\n");
7864 }
7865 
7866 static int tracing_err_log_seq_show(struct seq_file *m, void *v)
7867 {
7868 	struct tracing_log_err *err = v;
7869 
7870 	if (err) {
7871 		const char *err_text = err->info.errs[err->info.type];
7872 		u64 sec = err->info.ts;
7873 		u32 nsec;
7874 
7875 		nsec = do_div(sec, NSEC_PER_SEC);
7876 		seq_printf(m, "[%5llu.%06u] %s%s", sec, nsec / 1000,
7877 			   err->loc, err_text);
7878 		seq_printf(m, "%s", err->cmd);
7879 		tracing_err_log_show_pos(m, err->info.pos);
7880 	}
7881 
7882 	return 0;
7883 }
7884 
7885 static const struct seq_operations tracing_err_log_seq_ops = {
7886 	.start  = tracing_err_log_seq_start,
7887 	.next   = tracing_err_log_seq_next,
7888 	.stop   = tracing_err_log_seq_stop,
7889 	.show   = tracing_err_log_seq_show
7890 };
7891 
7892 static int tracing_err_log_open(struct inode *inode, struct file *file)
7893 {
7894 	struct trace_array *tr = inode->i_private;
7895 	int ret = 0;
7896 
7897 	ret = tracing_check_open_get_tr(tr);
7898 	if (ret)
7899 		return ret;
7900 
7901 	/* If this file was opened for write, then erase contents */
7902 	if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC))
7903 		clear_tracing_err_log(tr);
7904 
7905 	if (file->f_mode & FMODE_READ) {
7906 		ret = seq_open(file, &tracing_err_log_seq_ops);
7907 		if (!ret) {
7908 			struct seq_file *m = file->private_data;
7909 			m->private = tr;
7910 		} else {
7911 			trace_array_put(tr);
7912 		}
7913 	}
7914 	return ret;
7915 }
7916 
7917 static ssize_t tracing_err_log_write(struct file *file,
7918 				     const char __user *buffer,
7919 				     size_t count, loff_t *ppos)
7920 {
7921 	return count;
7922 }
7923 
7924 static int tracing_err_log_release(struct inode *inode, struct file *file)
7925 {
7926 	struct trace_array *tr = inode->i_private;
7927 
7928 	trace_array_put(tr);
7929 
7930 	if (file->f_mode & FMODE_READ)
7931 		seq_release(inode, file);
7932 
7933 	return 0;
7934 }
7935 
7936 static const struct file_operations tracing_err_log_fops = {
7937 	.open           = tracing_err_log_open,
7938 	.write		= tracing_err_log_write,
7939 	.read           = seq_read,
7940 	.llseek         = seq_lseek,
7941 	.release        = tracing_err_log_release,
7942 };
7943 
7944 static int tracing_buffers_open(struct inode *inode, struct file *filp)
7945 {
7946 	struct trace_array *tr = inode->i_private;
7947 	struct ftrace_buffer_info *info;
7948 	int ret;
7949 
7950 	ret = tracing_check_open_get_tr(tr);
7951 	if (ret)
7952 		return ret;
7953 
7954 	info = kvzalloc(sizeof(*info), GFP_KERNEL);
7955 	if (!info) {
7956 		trace_array_put(tr);
7957 		return -ENOMEM;
7958 	}
7959 
7960 	mutex_lock(&trace_types_lock);
7961 
7962 	info->iter.tr		= tr;
7963 	info->iter.cpu_file	= tracing_get_cpu(inode);
7964 	info->iter.trace	= tr->current_trace;
7965 	info->iter.array_buffer = &tr->array_buffer;
7966 	info->spare		= NULL;
7967 	/* Force reading ring buffer for first read */
7968 	info->read		= (unsigned int)-1;
7969 
7970 	filp->private_data = info;
7971 
7972 	tr->trace_ref++;
7973 
7974 	mutex_unlock(&trace_types_lock);
7975 
7976 	ret = nonseekable_open(inode, filp);
7977 	if (ret < 0)
7978 		trace_array_put(tr);
7979 
7980 	return ret;
7981 }
7982 
7983 static __poll_t
7984 tracing_buffers_poll(struct file *filp, poll_table *poll_table)
7985 {
7986 	struct ftrace_buffer_info *info = filp->private_data;
7987 	struct trace_iterator *iter = &info->iter;
7988 
7989 	return trace_poll(iter, filp, poll_table);
7990 }
7991 
7992 static ssize_t
7993 tracing_buffers_read(struct file *filp, char __user *ubuf,
7994 		     size_t count, loff_t *ppos)
7995 {
7996 	struct ftrace_buffer_info *info = filp->private_data;
7997 	struct trace_iterator *iter = &info->iter;
7998 	ssize_t ret = 0;
7999 	ssize_t size;
8000 
8001 	if (!count)
8002 		return 0;
8003 
8004 #ifdef CONFIG_TRACER_MAX_TRACE
8005 	if (iter->snapshot && iter->tr->current_trace->use_max_tr)
8006 		return -EBUSY;
8007 #endif
8008 
8009 	if (!info->spare) {
8010 		info->spare = ring_buffer_alloc_read_page(iter->array_buffer->buffer,
8011 							  iter->cpu_file);
8012 		if (IS_ERR(info->spare)) {
8013 			ret = PTR_ERR(info->spare);
8014 			info->spare = NULL;
8015 		} else {
8016 			info->spare_cpu = iter->cpu_file;
8017 		}
8018 	}
8019 	if (!info->spare)
8020 		return ret;
8021 
8022 	/* Do we have previous read data to read? */
8023 	if (info->read < PAGE_SIZE)
8024 		goto read;
8025 
8026  again:
8027 	trace_access_lock(iter->cpu_file);
8028 	ret = ring_buffer_read_page(iter->array_buffer->buffer,
8029 				    &info->spare,
8030 				    count,
8031 				    iter->cpu_file, 0);
8032 	trace_access_unlock(iter->cpu_file);
8033 
8034 	if (ret < 0) {
8035 		if (trace_empty(iter)) {
8036 			if ((filp->f_flags & O_NONBLOCK))
8037 				return -EAGAIN;
8038 
8039 			ret = wait_on_pipe(iter, 0);
8040 			if (ret)
8041 				return ret;
8042 
8043 			goto again;
8044 		}
8045 		return 0;
8046 	}
8047 
8048 	info->read = 0;
8049  read:
8050 	size = PAGE_SIZE - info->read;
8051 	if (size > count)
8052 		size = count;
8053 
8054 	ret = copy_to_user(ubuf, info->spare + info->read, size);
8055 	if (ret == size)
8056 		return -EFAULT;
8057 
8058 	size -= ret;
8059 
8060 	*ppos += size;
8061 	info->read += size;
8062 
8063 	return size;
8064 }
8065 
8066 static int tracing_buffers_release(struct inode *inode, struct file *file)
8067 {
8068 	struct ftrace_buffer_info *info = file->private_data;
8069 	struct trace_iterator *iter = &info->iter;
8070 
8071 	mutex_lock(&trace_types_lock);
8072 
8073 	iter->tr->trace_ref--;
8074 
8075 	__trace_array_put(iter->tr);
8076 
8077 	if (info->spare)
8078 		ring_buffer_free_read_page(iter->array_buffer->buffer,
8079 					   info->spare_cpu, info->spare);
8080 	kvfree(info);
8081 
8082 	mutex_unlock(&trace_types_lock);
8083 
8084 	return 0;
8085 }
8086 
8087 struct buffer_ref {
8088 	struct trace_buffer	*buffer;
8089 	void			*page;
8090 	int			cpu;
8091 	refcount_t		refcount;
8092 };
8093 
8094 static void buffer_ref_release(struct buffer_ref *ref)
8095 {
8096 	if (!refcount_dec_and_test(&ref->refcount))
8097 		return;
8098 	ring_buffer_free_read_page(ref->buffer, ref->cpu, ref->page);
8099 	kfree(ref);
8100 }
8101 
8102 static void buffer_pipe_buf_release(struct pipe_inode_info *pipe,
8103 				    struct pipe_buffer *buf)
8104 {
8105 	struct buffer_ref *ref = (struct buffer_ref *)buf->private;
8106 
8107 	buffer_ref_release(ref);
8108 	buf->private = 0;
8109 }
8110 
8111 static bool buffer_pipe_buf_get(struct pipe_inode_info *pipe,
8112 				struct pipe_buffer *buf)
8113 {
8114 	struct buffer_ref *ref = (struct buffer_ref *)buf->private;
8115 
8116 	if (refcount_read(&ref->refcount) > INT_MAX/2)
8117 		return false;
8118 
8119 	refcount_inc(&ref->refcount);
8120 	return true;
8121 }
8122 
8123 /* Pipe buffer operations for a buffer. */
8124 static const struct pipe_buf_operations buffer_pipe_buf_ops = {
8125 	.release		= buffer_pipe_buf_release,
8126 	.get			= buffer_pipe_buf_get,
8127 };
8128 
8129 /*
8130  * Callback from splice_to_pipe(), if we need to release some pages
8131  * at the end of the spd in case we error'ed out in filling the pipe.
8132  */
8133 static void buffer_spd_release(struct splice_pipe_desc *spd, unsigned int i)
8134 {
8135 	struct buffer_ref *ref =
8136 		(struct buffer_ref *)spd->partial[i].private;
8137 
8138 	buffer_ref_release(ref);
8139 	spd->partial[i].private = 0;
8140 }
8141 
8142 static ssize_t
8143 tracing_buffers_splice_read(struct file *file, loff_t *ppos,
8144 			    struct pipe_inode_info *pipe, size_t len,
8145 			    unsigned int flags)
8146 {
8147 	struct ftrace_buffer_info *info = file->private_data;
8148 	struct trace_iterator *iter = &info->iter;
8149 	struct partial_page partial_def[PIPE_DEF_BUFFERS];
8150 	struct page *pages_def[PIPE_DEF_BUFFERS];
8151 	struct splice_pipe_desc spd = {
8152 		.pages		= pages_def,
8153 		.partial	= partial_def,
8154 		.nr_pages_max	= PIPE_DEF_BUFFERS,
8155 		.ops		= &buffer_pipe_buf_ops,
8156 		.spd_release	= buffer_spd_release,
8157 	};
8158 	struct buffer_ref *ref;
8159 	int entries, i;
8160 	ssize_t ret = 0;
8161 
8162 #ifdef CONFIG_TRACER_MAX_TRACE
8163 	if (iter->snapshot && iter->tr->current_trace->use_max_tr)
8164 		return -EBUSY;
8165 #endif
8166 
8167 	if (*ppos & (PAGE_SIZE - 1))
8168 		return -EINVAL;
8169 
8170 	if (len & (PAGE_SIZE - 1)) {
8171 		if (len < PAGE_SIZE)
8172 			return -EINVAL;
8173 		len &= PAGE_MASK;
8174 	}
8175 
8176 	if (splice_grow_spd(pipe, &spd))
8177 		return -ENOMEM;
8178 
8179  again:
8180 	trace_access_lock(iter->cpu_file);
8181 	entries = ring_buffer_entries_cpu(iter->array_buffer->buffer, iter->cpu_file);
8182 
8183 	for (i = 0; i < spd.nr_pages_max && len && entries; i++, len -= PAGE_SIZE) {
8184 		struct page *page;
8185 		int r;
8186 
8187 		ref = kzalloc(sizeof(*ref), GFP_KERNEL);
8188 		if (!ref) {
8189 			ret = -ENOMEM;
8190 			break;
8191 		}
8192 
8193 		refcount_set(&ref->refcount, 1);
8194 		ref->buffer = iter->array_buffer->buffer;
8195 		ref->page = ring_buffer_alloc_read_page(ref->buffer, iter->cpu_file);
8196 		if (IS_ERR(ref->page)) {
8197 			ret = PTR_ERR(ref->page);
8198 			ref->page = NULL;
8199 			kfree(ref);
8200 			break;
8201 		}
8202 		ref->cpu = iter->cpu_file;
8203 
8204 		r = ring_buffer_read_page(ref->buffer, &ref->page,
8205 					  len, iter->cpu_file, 1);
8206 		if (r < 0) {
8207 			ring_buffer_free_read_page(ref->buffer, ref->cpu,
8208 						   ref->page);
8209 			kfree(ref);
8210 			break;
8211 		}
8212 
8213 		page = virt_to_page(ref->page);
8214 
8215 		spd.pages[i] = page;
8216 		spd.partial[i].len = PAGE_SIZE;
8217 		spd.partial[i].offset = 0;
8218 		spd.partial[i].private = (unsigned long)ref;
8219 		spd.nr_pages++;
8220 		*ppos += PAGE_SIZE;
8221 
8222 		entries = ring_buffer_entries_cpu(iter->array_buffer->buffer, iter->cpu_file);
8223 	}
8224 
8225 	trace_access_unlock(iter->cpu_file);
8226 	spd.nr_pages = i;
8227 
8228 	/* did we read anything? */
8229 	if (!spd.nr_pages) {
8230 		if (ret)
8231 			goto out;
8232 
8233 		ret = -EAGAIN;
8234 		if ((file->f_flags & O_NONBLOCK) || (flags & SPLICE_F_NONBLOCK))
8235 			goto out;
8236 
8237 		ret = wait_on_pipe(iter, iter->tr->buffer_percent);
8238 		if (ret)
8239 			goto out;
8240 
8241 		goto again;
8242 	}
8243 
8244 	ret = splice_to_pipe(pipe, &spd);
8245 out:
8246 	splice_shrink_spd(&spd);
8247 
8248 	return ret;
8249 }
8250 
8251 static const struct file_operations tracing_buffers_fops = {
8252 	.open		= tracing_buffers_open,
8253 	.read		= tracing_buffers_read,
8254 	.poll		= tracing_buffers_poll,
8255 	.release	= tracing_buffers_release,
8256 	.splice_read	= tracing_buffers_splice_read,
8257 	.llseek		= no_llseek,
8258 };
8259 
8260 static ssize_t
8261 tracing_stats_read(struct file *filp, char __user *ubuf,
8262 		   size_t count, loff_t *ppos)
8263 {
8264 	struct inode *inode = file_inode(filp);
8265 	struct trace_array *tr = inode->i_private;
8266 	struct array_buffer *trace_buf = &tr->array_buffer;
8267 	int cpu = tracing_get_cpu(inode);
8268 	struct trace_seq *s;
8269 	unsigned long cnt;
8270 	unsigned long long t;
8271 	unsigned long usec_rem;
8272 
8273 	s = kmalloc(sizeof(*s), GFP_KERNEL);
8274 	if (!s)
8275 		return -ENOMEM;
8276 
8277 	trace_seq_init(s);
8278 
8279 	cnt = ring_buffer_entries_cpu(trace_buf->buffer, cpu);
8280 	trace_seq_printf(s, "entries: %ld\n", cnt);
8281 
8282 	cnt = ring_buffer_overrun_cpu(trace_buf->buffer, cpu);
8283 	trace_seq_printf(s, "overrun: %ld\n", cnt);
8284 
8285 	cnt = ring_buffer_commit_overrun_cpu(trace_buf->buffer, cpu);
8286 	trace_seq_printf(s, "commit overrun: %ld\n", cnt);
8287 
8288 	cnt = ring_buffer_bytes_cpu(trace_buf->buffer, cpu);
8289 	trace_seq_printf(s, "bytes: %ld\n", cnt);
8290 
8291 	if (trace_clocks[tr->clock_id].in_ns) {
8292 		/* local or global for trace_clock */
8293 		t = ns2usecs(ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
8294 		usec_rem = do_div(t, USEC_PER_SEC);
8295 		trace_seq_printf(s, "oldest event ts: %5llu.%06lu\n",
8296 								t, usec_rem);
8297 
8298 		t = ns2usecs(ring_buffer_time_stamp(trace_buf->buffer));
8299 		usec_rem = do_div(t, USEC_PER_SEC);
8300 		trace_seq_printf(s, "now ts: %5llu.%06lu\n", t, usec_rem);
8301 	} else {
8302 		/* counter or tsc mode for trace_clock */
8303 		trace_seq_printf(s, "oldest event ts: %llu\n",
8304 				ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
8305 
8306 		trace_seq_printf(s, "now ts: %llu\n",
8307 				ring_buffer_time_stamp(trace_buf->buffer));
8308 	}
8309 
8310 	cnt = ring_buffer_dropped_events_cpu(trace_buf->buffer, cpu);
8311 	trace_seq_printf(s, "dropped events: %ld\n", cnt);
8312 
8313 	cnt = ring_buffer_read_events_cpu(trace_buf->buffer, cpu);
8314 	trace_seq_printf(s, "read events: %ld\n", cnt);
8315 
8316 	count = simple_read_from_buffer(ubuf, count, ppos,
8317 					s->buffer, trace_seq_used(s));
8318 
8319 	kfree(s);
8320 
8321 	return count;
8322 }
8323 
8324 static const struct file_operations tracing_stats_fops = {
8325 	.open		= tracing_open_generic_tr,
8326 	.read		= tracing_stats_read,
8327 	.llseek		= generic_file_llseek,
8328 	.release	= tracing_release_generic_tr,
8329 };
8330 
8331 #ifdef CONFIG_DYNAMIC_FTRACE
8332 
8333 static ssize_t
8334 tracing_read_dyn_info(struct file *filp, char __user *ubuf,
8335 		  size_t cnt, loff_t *ppos)
8336 {
8337 	ssize_t ret;
8338 	char *buf;
8339 	int r;
8340 
8341 	/* 256 should be plenty to hold the amount needed */
8342 	buf = kmalloc(256, GFP_KERNEL);
8343 	if (!buf)
8344 		return -ENOMEM;
8345 
8346 	r = scnprintf(buf, 256, "%ld pages:%ld groups: %ld\n",
8347 		      ftrace_update_tot_cnt,
8348 		      ftrace_number_of_pages,
8349 		      ftrace_number_of_groups);
8350 
8351 	ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
8352 	kfree(buf);
8353 	return ret;
8354 }
8355 
8356 static const struct file_operations tracing_dyn_info_fops = {
8357 	.open		= tracing_open_generic,
8358 	.read		= tracing_read_dyn_info,
8359 	.llseek		= generic_file_llseek,
8360 };
8361 #endif /* CONFIG_DYNAMIC_FTRACE */
8362 
8363 #if defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE)
8364 static void
8365 ftrace_snapshot(unsigned long ip, unsigned long parent_ip,
8366 		struct trace_array *tr, struct ftrace_probe_ops *ops,
8367 		void *data)
8368 {
8369 	tracing_snapshot_instance(tr);
8370 }
8371 
8372 static void
8373 ftrace_count_snapshot(unsigned long ip, unsigned long parent_ip,
8374 		      struct trace_array *tr, struct ftrace_probe_ops *ops,
8375 		      void *data)
8376 {
8377 	struct ftrace_func_mapper *mapper = data;
8378 	long *count = NULL;
8379 
8380 	if (mapper)
8381 		count = (long *)ftrace_func_mapper_find_ip(mapper, ip);
8382 
8383 	if (count) {
8384 
8385 		if (*count <= 0)
8386 			return;
8387 
8388 		(*count)--;
8389 	}
8390 
8391 	tracing_snapshot_instance(tr);
8392 }
8393 
8394 static int
8395 ftrace_snapshot_print(struct seq_file *m, unsigned long ip,
8396 		      struct ftrace_probe_ops *ops, void *data)
8397 {
8398 	struct ftrace_func_mapper *mapper = data;
8399 	long *count = NULL;
8400 
8401 	seq_printf(m, "%ps:", (void *)ip);
8402 
8403 	seq_puts(m, "snapshot");
8404 
8405 	if (mapper)
8406 		count = (long *)ftrace_func_mapper_find_ip(mapper, ip);
8407 
8408 	if (count)
8409 		seq_printf(m, ":count=%ld\n", *count);
8410 	else
8411 		seq_puts(m, ":unlimited\n");
8412 
8413 	return 0;
8414 }
8415 
8416 static int
8417 ftrace_snapshot_init(struct ftrace_probe_ops *ops, struct trace_array *tr,
8418 		     unsigned long ip, void *init_data, void **data)
8419 {
8420 	struct ftrace_func_mapper *mapper = *data;
8421 
8422 	if (!mapper) {
8423 		mapper = allocate_ftrace_func_mapper();
8424 		if (!mapper)
8425 			return -ENOMEM;
8426 		*data = mapper;
8427 	}
8428 
8429 	return ftrace_func_mapper_add_ip(mapper, ip, init_data);
8430 }
8431 
8432 static void
8433 ftrace_snapshot_free(struct ftrace_probe_ops *ops, struct trace_array *tr,
8434 		     unsigned long ip, void *data)
8435 {
8436 	struct ftrace_func_mapper *mapper = data;
8437 
8438 	if (!ip) {
8439 		if (!mapper)
8440 			return;
8441 		free_ftrace_func_mapper(mapper, NULL);
8442 		return;
8443 	}
8444 
8445 	ftrace_func_mapper_remove_ip(mapper, ip);
8446 }
8447 
8448 static struct ftrace_probe_ops snapshot_probe_ops = {
8449 	.func			= ftrace_snapshot,
8450 	.print			= ftrace_snapshot_print,
8451 };
8452 
8453 static struct ftrace_probe_ops snapshot_count_probe_ops = {
8454 	.func			= ftrace_count_snapshot,
8455 	.print			= ftrace_snapshot_print,
8456 	.init			= ftrace_snapshot_init,
8457 	.free			= ftrace_snapshot_free,
8458 };
8459 
8460 static int
8461 ftrace_trace_snapshot_callback(struct trace_array *tr, struct ftrace_hash *hash,
8462 			       char *glob, char *cmd, char *param, int enable)
8463 {
8464 	struct ftrace_probe_ops *ops;
8465 	void *count = (void *)-1;
8466 	char *number;
8467 	int ret;
8468 
8469 	if (!tr)
8470 		return -ENODEV;
8471 
8472 	/* hash funcs only work with set_ftrace_filter */
8473 	if (!enable)
8474 		return -EINVAL;
8475 
8476 	ops = param ? &snapshot_count_probe_ops :  &snapshot_probe_ops;
8477 
8478 	if (glob[0] == '!')
8479 		return unregister_ftrace_function_probe_func(glob+1, tr, ops);
8480 
8481 	if (!param)
8482 		goto out_reg;
8483 
8484 	number = strsep(&param, ":");
8485 
8486 	if (!strlen(number))
8487 		goto out_reg;
8488 
8489 	/*
8490 	 * We use the callback data field (which is a pointer)
8491 	 * as our counter.
8492 	 */
8493 	ret = kstrtoul(number, 0, (unsigned long *)&count);
8494 	if (ret)
8495 		return ret;
8496 
8497  out_reg:
8498 	ret = tracing_alloc_snapshot_instance(tr);
8499 	if (ret < 0)
8500 		goto out;
8501 
8502 	ret = register_ftrace_function_probe(glob, tr, ops, count);
8503 
8504  out:
8505 	return ret < 0 ? ret : 0;
8506 }
8507 
8508 static struct ftrace_func_command ftrace_snapshot_cmd = {
8509 	.name			= "snapshot",
8510 	.func			= ftrace_trace_snapshot_callback,
8511 };
8512 
8513 static __init int register_snapshot_cmd(void)
8514 {
8515 	return register_ftrace_command(&ftrace_snapshot_cmd);
8516 }
8517 #else
8518 static inline __init int register_snapshot_cmd(void) { return 0; }
8519 #endif /* defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE) */
8520 
8521 static struct dentry *tracing_get_dentry(struct trace_array *tr)
8522 {
8523 	if (WARN_ON(!tr->dir))
8524 		return ERR_PTR(-ENODEV);
8525 
8526 	/* Top directory uses NULL as the parent */
8527 	if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
8528 		return NULL;
8529 
8530 	/* All sub buffers have a descriptor */
8531 	return tr->dir;
8532 }
8533 
8534 static struct dentry *tracing_dentry_percpu(struct trace_array *tr, int cpu)
8535 {
8536 	struct dentry *d_tracer;
8537 
8538 	if (tr->percpu_dir)
8539 		return tr->percpu_dir;
8540 
8541 	d_tracer = tracing_get_dentry(tr);
8542 	if (IS_ERR(d_tracer))
8543 		return NULL;
8544 
8545 	tr->percpu_dir = tracefs_create_dir("per_cpu", d_tracer);
8546 
8547 	MEM_FAIL(!tr->percpu_dir,
8548 		  "Could not create tracefs directory 'per_cpu/%d'\n", cpu);
8549 
8550 	return tr->percpu_dir;
8551 }
8552 
8553 static struct dentry *
8554 trace_create_cpu_file(const char *name, umode_t mode, struct dentry *parent,
8555 		      void *data, long cpu, const struct file_operations *fops)
8556 {
8557 	struct dentry *ret = trace_create_file(name, mode, parent, data, fops);
8558 
8559 	if (ret) /* See tracing_get_cpu() */
8560 		d_inode(ret)->i_cdev = (void *)(cpu + 1);
8561 	return ret;
8562 }
8563 
8564 static void
8565 tracing_init_tracefs_percpu(struct trace_array *tr, long cpu)
8566 {
8567 	struct dentry *d_percpu = tracing_dentry_percpu(tr, cpu);
8568 	struct dentry *d_cpu;
8569 	char cpu_dir[30]; /* 30 characters should be more than enough */
8570 
8571 	if (!d_percpu)
8572 		return;
8573 
8574 	snprintf(cpu_dir, 30, "cpu%ld", cpu);
8575 	d_cpu = tracefs_create_dir(cpu_dir, d_percpu);
8576 	if (!d_cpu) {
8577 		pr_warn("Could not create tracefs '%s' entry\n", cpu_dir);
8578 		return;
8579 	}
8580 
8581 	/* per cpu trace_pipe */
8582 	trace_create_cpu_file("trace_pipe", 0444, d_cpu,
8583 				tr, cpu, &tracing_pipe_fops);
8584 
8585 	/* per cpu trace */
8586 	trace_create_cpu_file("trace", 0644, d_cpu,
8587 				tr, cpu, &tracing_fops);
8588 
8589 	trace_create_cpu_file("trace_pipe_raw", 0444, d_cpu,
8590 				tr, cpu, &tracing_buffers_fops);
8591 
8592 	trace_create_cpu_file("stats", 0444, d_cpu,
8593 				tr, cpu, &tracing_stats_fops);
8594 
8595 	trace_create_cpu_file("buffer_size_kb", 0444, d_cpu,
8596 				tr, cpu, &tracing_entries_fops);
8597 
8598 #ifdef CONFIG_TRACER_SNAPSHOT
8599 	trace_create_cpu_file("snapshot", 0644, d_cpu,
8600 				tr, cpu, &snapshot_fops);
8601 
8602 	trace_create_cpu_file("snapshot_raw", 0444, d_cpu,
8603 				tr, cpu, &snapshot_raw_fops);
8604 #endif
8605 }
8606 
8607 #ifdef CONFIG_FTRACE_SELFTEST
8608 /* Let selftest have access to static functions in this file */
8609 #include "trace_selftest.c"
8610 #endif
8611 
8612 static ssize_t
8613 trace_options_read(struct file *filp, char __user *ubuf, size_t cnt,
8614 			loff_t *ppos)
8615 {
8616 	struct trace_option_dentry *topt = filp->private_data;
8617 	char *buf;
8618 
8619 	if (topt->flags->val & topt->opt->bit)
8620 		buf = "1\n";
8621 	else
8622 		buf = "0\n";
8623 
8624 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
8625 }
8626 
8627 static ssize_t
8628 trace_options_write(struct file *filp, const char __user *ubuf, size_t cnt,
8629 			 loff_t *ppos)
8630 {
8631 	struct trace_option_dentry *topt = filp->private_data;
8632 	unsigned long val;
8633 	int ret;
8634 
8635 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
8636 	if (ret)
8637 		return ret;
8638 
8639 	if (val != 0 && val != 1)
8640 		return -EINVAL;
8641 
8642 	if (!!(topt->flags->val & topt->opt->bit) != val) {
8643 		mutex_lock(&trace_types_lock);
8644 		ret = __set_tracer_option(topt->tr, topt->flags,
8645 					  topt->opt, !val);
8646 		mutex_unlock(&trace_types_lock);
8647 		if (ret)
8648 			return ret;
8649 	}
8650 
8651 	*ppos += cnt;
8652 
8653 	return cnt;
8654 }
8655 
8656 
8657 static const struct file_operations trace_options_fops = {
8658 	.open = tracing_open_generic,
8659 	.read = trace_options_read,
8660 	.write = trace_options_write,
8661 	.llseek	= generic_file_llseek,
8662 };
8663 
8664 /*
8665  * In order to pass in both the trace_array descriptor as well as the index
8666  * to the flag that the trace option file represents, the trace_array
8667  * has a character array of trace_flags_index[], which holds the index
8668  * of the bit for the flag it represents. index[0] == 0, index[1] == 1, etc.
8669  * The address of this character array is passed to the flag option file
8670  * read/write callbacks.
8671  *
8672  * In order to extract both the index and the trace_array descriptor,
8673  * get_tr_index() uses the following algorithm.
8674  *
8675  *   idx = *ptr;
8676  *
8677  * As the pointer itself contains the address of the index (remember
8678  * index[1] == 1).
8679  *
8680  * Then to get the trace_array descriptor, by subtracting that index
8681  * from the ptr, we get to the start of the index itself.
8682  *
8683  *   ptr - idx == &index[0]
8684  *
8685  * Then a simple container_of() from that pointer gets us to the
8686  * trace_array descriptor.
8687  */
8688 static void get_tr_index(void *data, struct trace_array **ptr,
8689 			 unsigned int *pindex)
8690 {
8691 	*pindex = *(unsigned char *)data;
8692 
8693 	*ptr = container_of(data - *pindex, struct trace_array,
8694 			    trace_flags_index);
8695 }
8696 
8697 static ssize_t
8698 trace_options_core_read(struct file *filp, char __user *ubuf, size_t cnt,
8699 			loff_t *ppos)
8700 {
8701 	void *tr_index = filp->private_data;
8702 	struct trace_array *tr;
8703 	unsigned int index;
8704 	char *buf;
8705 
8706 	get_tr_index(tr_index, &tr, &index);
8707 
8708 	if (tr->trace_flags & (1 << index))
8709 		buf = "1\n";
8710 	else
8711 		buf = "0\n";
8712 
8713 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
8714 }
8715 
8716 static ssize_t
8717 trace_options_core_write(struct file *filp, const char __user *ubuf, size_t cnt,
8718 			 loff_t *ppos)
8719 {
8720 	void *tr_index = filp->private_data;
8721 	struct trace_array *tr;
8722 	unsigned int index;
8723 	unsigned long val;
8724 	int ret;
8725 
8726 	get_tr_index(tr_index, &tr, &index);
8727 
8728 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
8729 	if (ret)
8730 		return ret;
8731 
8732 	if (val != 0 && val != 1)
8733 		return -EINVAL;
8734 
8735 	mutex_lock(&event_mutex);
8736 	mutex_lock(&trace_types_lock);
8737 	ret = set_tracer_flag(tr, 1 << index, val);
8738 	mutex_unlock(&trace_types_lock);
8739 	mutex_unlock(&event_mutex);
8740 
8741 	if (ret < 0)
8742 		return ret;
8743 
8744 	*ppos += cnt;
8745 
8746 	return cnt;
8747 }
8748 
8749 static const struct file_operations trace_options_core_fops = {
8750 	.open = tracing_open_generic,
8751 	.read = trace_options_core_read,
8752 	.write = trace_options_core_write,
8753 	.llseek = generic_file_llseek,
8754 };
8755 
8756 struct dentry *trace_create_file(const char *name,
8757 				 umode_t mode,
8758 				 struct dentry *parent,
8759 				 void *data,
8760 				 const struct file_operations *fops)
8761 {
8762 	struct dentry *ret;
8763 
8764 	ret = tracefs_create_file(name, mode, parent, data, fops);
8765 	if (!ret)
8766 		pr_warn("Could not create tracefs '%s' entry\n", name);
8767 
8768 	return ret;
8769 }
8770 
8771 
8772 static struct dentry *trace_options_init_dentry(struct trace_array *tr)
8773 {
8774 	struct dentry *d_tracer;
8775 
8776 	if (tr->options)
8777 		return tr->options;
8778 
8779 	d_tracer = tracing_get_dentry(tr);
8780 	if (IS_ERR(d_tracer))
8781 		return NULL;
8782 
8783 	tr->options = tracefs_create_dir("options", d_tracer);
8784 	if (!tr->options) {
8785 		pr_warn("Could not create tracefs directory 'options'\n");
8786 		return NULL;
8787 	}
8788 
8789 	return tr->options;
8790 }
8791 
8792 static void
8793 create_trace_option_file(struct trace_array *tr,
8794 			 struct trace_option_dentry *topt,
8795 			 struct tracer_flags *flags,
8796 			 struct tracer_opt *opt)
8797 {
8798 	struct dentry *t_options;
8799 
8800 	t_options = trace_options_init_dentry(tr);
8801 	if (!t_options)
8802 		return;
8803 
8804 	topt->flags = flags;
8805 	topt->opt = opt;
8806 	topt->tr = tr;
8807 
8808 	topt->entry = trace_create_file(opt->name, 0644, t_options, topt,
8809 				    &trace_options_fops);
8810 
8811 }
8812 
8813 static void
8814 create_trace_option_files(struct trace_array *tr, struct tracer *tracer)
8815 {
8816 	struct trace_option_dentry *topts;
8817 	struct trace_options *tr_topts;
8818 	struct tracer_flags *flags;
8819 	struct tracer_opt *opts;
8820 	int cnt;
8821 	int i;
8822 
8823 	if (!tracer)
8824 		return;
8825 
8826 	flags = tracer->flags;
8827 
8828 	if (!flags || !flags->opts)
8829 		return;
8830 
8831 	/*
8832 	 * If this is an instance, only create flags for tracers
8833 	 * the instance may have.
8834 	 */
8835 	if (!trace_ok_for_array(tracer, tr))
8836 		return;
8837 
8838 	for (i = 0; i < tr->nr_topts; i++) {
8839 		/* Make sure there's no duplicate flags. */
8840 		if (WARN_ON_ONCE(tr->topts[i].tracer->flags == tracer->flags))
8841 			return;
8842 	}
8843 
8844 	opts = flags->opts;
8845 
8846 	for (cnt = 0; opts[cnt].name; cnt++)
8847 		;
8848 
8849 	topts = kcalloc(cnt + 1, sizeof(*topts), GFP_KERNEL);
8850 	if (!topts)
8851 		return;
8852 
8853 	tr_topts = krealloc(tr->topts, sizeof(*tr->topts) * (tr->nr_topts + 1),
8854 			    GFP_KERNEL);
8855 	if (!tr_topts) {
8856 		kfree(topts);
8857 		return;
8858 	}
8859 
8860 	tr->topts = tr_topts;
8861 	tr->topts[tr->nr_topts].tracer = tracer;
8862 	tr->topts[tr->nr_topts].topts = topts;
8863 	tr->nr_topts++;
8864 
8865 	for (cnt = 0; opts[cnt].name; cnt++) {
8866 		create_trace_option_file(tr, &topts[cnt], flags,
8867 					 &opts[cnt]);
8868 		MEM_FAIL(topts[cnt].entry == NULL,
8869 			  "Failed to create trace option: %s",
8870 			  opts[cnt].name);
8871 	}
8872 }
8873 
8874 static struct dentry *
8875 create_trace_option_core_file(struct trace_array *tr,
8876 			      const char *option, long index)
8877 {
8878 	struct dentry *t_options;
8879 
8880 	t_options = trace_options_init_dentry(tr);
8881 	if (!t_options)
8882 		return NULL;
8883 
8884 	return trace_create_file(option, 0644, t_options,
8885 				 (void *)&tr->trace_flags_index[index],
8886 				 &trace_options_core_fops);
8887 }
8888 
8889 static void create_trace_options_dir(struct trace_array *tr)
8890 {
8891 	struct dentry *t_options;
8892 	bool top_level = tr == &global_trace;
8893 	int i;
8894 
8895 	t_options = trace_options_init_dentry(tr);
8896 	if (!t_options)
8897 		return;
8898 
8899 	for (i = 0; trace_options[i]; i++) {
8900 		if (top_level ||
8901 		    !((1 << i) & TOP_LEVEL_TRACE_FLAGS))
8902 			create_trace_option_core_file(tr, trace_options[i], i);
8903 	}
8904 }
8905 
8906 static ssize_t
8907 rb_simple_read(struct file *filp, char __user *ubuf,
8908 	       size_t cnt, loff_t *ppos)
8909 {
8910 	struct trace_array *tr = filp->private_data;
8911 	char buf[64];
8912 	int r;
8913 
8914 	r = tracer_tracing_is_on(tr);
8915 	r = sprintf(buf, "%d\n", r);
8916 
8917 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
8918 }
8919 
8920 static ssize_t
8921 rb_simple_write(struct file *filp, const char __user *ubuf,
8922 		size_t cnt, loff_t *ppos)
8923 {
8924 	struct trace_array *tr = filp->private_data;
8925 	struct trace_buffer *buffer = tr->array_buffer.buffer;
8926 	unsigned long val;
8927 	int ret;
8928 
8929 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
8930 	if (ret)
8931 		return ret;
8932 
8933 	if (buffer) {
8934 		mutex_lock(&trace_types_lock);
8935 		if (!!val == tracer_tracing_is_on(tr)) {
8936 			val = 0; /* do nothing */
8937 		} else if (val) {
8938 			tracer_tracing_on(tr);
8939 			if (tr->current_trace->start)
8940 				tr->current_trace->start(tr);
8941 		} else {
8942 			tracer_tracing_off(tr);
8943 			if (tr->current_trace->stop)
8944 				tr->current_trace->stop(tr);
8945 		}
8946 		mutex_unlock(&trace_types_lock);
8947 	}
8948 
8949 	(*ppos)++;
8950 
8951 	return cnt;
8952 }
8953 
8954 static const struct file_operations rb_simple_fops = {
8955 	.open		= tracing_open_generic_tr,
8956 	.read		= rb_simple_read,
8957 	.write		= rb_simple_write,
8958 	.release	= tracing_release_generic_tr,
8959 	.llseek		= default_llseek,
8960 };
8961 
8962 static ssize_t
8963 buffer_percent_read(struct file *filp, char __user *ubuf,
8964 		    size_t cnt, loff_t *ppos)
8965 {
8966 	struct trace_array *tr = filp->private_data;
8967 	char buf[64];
8968 	int r;
8969 
8970 	r = tr->buffer_percent;
8971 	r = sprintf(buf, "%d\n", r);
8972 
8973 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
8974 }
8975 
8976 static ssize_t
8977 buffer_percent_write(struct file *filp, const char __user *ubuf,
8978 		     size_t cnt, loff_t *ppos)
8979 {
8980 	struct trace_array *tr = filp->private_data;
8981 	unsigned long val;
8982 	int ret;
8983 
8984 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
8985 	if (ret)
8986 		return ret;
8987 
8988 	if (val > 100)
8989 		return -EINVAL;
8990 
8991 	if (!val)
8992 		val = 1;
8993 
8994 	tr->buffer_percent = val;
8995 
8996 	(*ppos)++;
8997 
8998 	return cnt;
8999 }
9000 
9001 static const struct file_operations buffer_percent_fops = {
9002 	.open		= tracing_open_generic_tr,
9003 	.read		= buffer_percent_read,
9004 	.write		= buffer_percent_write,
9005 	.release	= tracing_release_generic_tr,
9006 	.llseek		= default_llseek,
9007 };
9008 
9009 static struct dentry *trace_instance_dir;
9010 
9011 static void
9012 init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer);
9013 
9014 static int
9015 allocate_trace_buffer(struct trace_array *tr, struct array_buffer *buf, int size)
9016 {
9017 	enum ring_buffer_flags rb_flags;
9018 
9019 	rb_flags = tr->trace_flags & TRACE_ITER_OVERWRITE ? RB_FL_OVERWRITE : 0;
9020 
9021 	buf->tr = tr;
9022 
9023 	buf->buffer = ring_buffer_alloc(size, rb_flags);
9024 	if (!buf->buffer)
9025 		return -ENOMEM;
9026 
9027 	buf->data = alloc_percpu(struct trace_array_cpu);
9028 	if (!buf->data) {
9029 		ring_buffer_free(buf->buffer);
9030 		buf->buffer = NULL;
9031 		return -ENOMEM;
9032 	}
9033 
9034 	/* Allocate the first page for all buffers */
9035 	set_buffer_entries(&tr->array_buffer,
9036 			   ring_buffer_size(tr->array_buffer.buffer, 0));
9037 
9038 	return 0;
9039 }
9040 
9041 static int allocate_trace_buffers(struct trace_array *tr, int size)
9042 {
9043 	int ret;
9044 
9045 	ret = allocate_trace_buffer(tr, &tr->array_buffer, size);
9046 	if (ret)
9047 		return ret;
9048 
9049 #ifdef CONFIG_TRACER_MAX_TRACE
9050 	ret = allocate_trace_buffer(tr, &tr->max_buffer,
9051 				    allocate_snapshot ? size : 1);
9052 	if (MEM_FAIL(ret, "Failed to allocate trace buffer\n")) {
9053 		ring_buffer_free(tr->array_buffer.buffer);
9054 		tr->array_buffer.buffer = NULL;
9055 		free_percpu(tr->array_buffer.data);
9056 		tr->array_buffer.data = NULL;
9057 		return -ENOMEM;
9058 	}
9059 	tr->allocated_snapshot = allocate_snapshot;
9060 
9061 	/*
9062 	 * Only the top level trace array gets its snapshot allocated
9063 	 * from the kernel command line.
9064 	 */
9065 	allocate_snapshot = false;
9066 #endif
9067 
9068 	return 0;
9069 }
9070 
9071 static void free_trace_buffer(struct array_buffer *buf)
9072 {
9073 	if (buf->buffer) {
9074 		ring_buffer_free(buf->buffer);
9075 		buf->buffer = NULL;
9076 		free_percpu(buf->data);
9077 		buf->data = NULL;
9078 	}
9079 }
9080 
9081 static void free_trace_buffers(struct trace_array *tr)
9082 {
9083 	if (!tr)
9084 		return;
9085 
9086 	free_trace_buffer(&tr->array_buffer);
9087 
9088 #ifdef CONFIG_TRACER_MAX_TRACE
9089 	free_trace_buffer(&tr->max_buffer);
9090 #endif
9091 }
9092 
9093 static void init_trace_flags_index(struct trace_array *tr)
9094 {
9095 	int i;
9096 
9097 	/* Used by the trace options files */
9098 	for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++)
9099 		tr->trace_flags_index[i] = i;
9100 }
9101 
9102 static void __update_tracer_options(struct trace_array *tr)
9103 {
9104 	struct tracer *t;
9105 
9106 	for (t = trace_types; t; t = t->next)
9107 		add_tracer_options(tr, t);
9108 }
9109 
9110 static void update_tracer_options(struct trace_array *tr)
9111 {
9112 	mutex_lock(&trace_types_lock);
9113 	__update_tracer_options(tr);
9114 	mutex_unlock(&trace_types_lock);
9115 }
9116 
9117 /* Must have trace_types_lock held */
9118 struct trace_array *trace_array_find(const char *instance)
9119 {
9120 	struct trace_array *tr, *found = NULL;
9121 
9122 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
9123 		if (tr->name && strcmp(tr->name, instance) == 0) {
9124 			found = tr;
9125 			break;
9126 		}
9127 	}
9128 
9129 	return found;
9130 }
9131 
9132 struct trace_array *trace_array_find_get(const char *instance)
9133 {
9134 	struct trace_array *tr;
9135 
9136 	mutex_lock(&trace_types_lock);
9137 	tr = trace_array_find(instance);
9138 	if (tr)
9139 		tr->ref++;
9140 	mutex_unlock(&trace_types_lock);
9141 
9142 	return tr;
9143 }
9144 
9145 static int trace_array_create_dir(struct trace_array *tr)
9146 {
9147 	int ret;
9148 
9149 	tr->dir = tracefs_create_dir(tr->name, trace_instance_dir);
9150 	if (!tr->dir)
9151 		return -EINVAL;
9152 
9153 	ret = event_trace_add_tracer(tr->dir, tr);
9154 	if (ret) {
9155 		tracefs_remove(tr->dir);
9156 		return ret;
9157 	}
9158 
9159 	init_tracer_tracefs(tr, tr->dir);
9160 	__update_tracer_options(tr);
9161 
9162 	return ret;
9163 }
9164 
9165 static struct trace_array *trace_array_create(const char *name)
9166 {
9167 	struct trace_array *tr;
9168 	int ret;
9169 
9170 	ret = -ENOMEM;
9171 	tr = kzalloc(sizeof(*tr), GFP_KERNEL);
9172 	if (!tr)
9173 		return ERR_PTR(ret);
9174 
9175 	tr->name = kstrdup(name, GFP_KERNEL);
9176 	if (!tr->name)
9177 		goto out_free_tr;
9178 
9179 	if (!alloc_cpumask_var(&tr->tracing_cpumask, GFP_KERNEL))
9180 		goto out_free_tr;
9181 
9182 	tr->trace_flags = global_trace.trace_flags & ~ZEROED_TRACE_FLAGS;
9183 
9184 	cpumask_copy(tr->tracing_cpumask, cpu_all_mask);
9185 
9186 	raw_spin_lock_init(&tr->start_lock);
9187 
9188 	tr->max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
9189 
9190 	tr->current_trace = &nop_trace;
9191 
9192 	INIT_LIST_HEAD(&tr->systems);
9193 	INIT_LIST_HEAD(&tr->events);
9194 	INIT_LIST_HEAD(&tr->hist_vars);
9195 	INIT_LIST_HEAD(&tr->err_log);
9196 
9197 	if (allocate_trace_buffers(tr, trace_buf_size) < 0)
9198 		goto out_free_tr;
9199 
9200 	if (ftrace_allocate_ftrace_ops(tr) < 0)
9201 		goto out_free_tr;
9202 
9203 	ftrace_init_trace_array(tr);
9204 
9205 	init_trace_flags_index(tr);
9206 
9207 	if (trace_instance_dir) {
9208 		ret = trace_array_create_dir(tr);
9209 		if (ret)
9210 			goto out_free_tr;
9211 	} else
9212 		__trace_early_add_events(tr);
9213 
9214 	list_add(&tr->list, &ftrace_trace_arrays);
9215 
9216 	tr->ref++;
9217 
9218 	return tr;
9219 
9220  out_free_tr:
9221 	ftrace_free_ftrace_ops(tr);
9222 	free_trace_buffers(tr);
9223 	free_cpumask_var(tr->tracing_cpumask);
9224 	kfree(tr->name);
9225 	kfree(tr);
9226 
9227 	return ERR_PTR(ret);
9228 }
9229 
9230 static int instance_mkdir(const char *name)
9231 {
9232 	struct trace_array *tr;
9233 	int ret;
9234 
9235 	mutex_lock(&event_mutex);
9236 	mutex_lock(&trace_types_lock);
9237 
9238 	ret = -EEXIST;
9239 	if (trace_array_find(name))
9240 		goto out_unlock;
9241 
9242 	tr = trace_array_create(name);
9243 
9244 	ret = PTR_ERR_OR_ZERO(tr);
9245 
9246 out_unlock:
9247 	mutex_unlock(&trace_types_lock);
9248 	mutex_unlock(&event_mutex);
9249 	return ret;
9250 }
9251 
9252 /**
9253  * trace_array_get_by_name - Create/Lookup a trace array, given its name.
9254  * @name: The name of the trace array to be looked up/created.
9255  *
9256  * Returns pointer to trace array with given name.
9257  * NULL, if it cannot be created.
9258  *
9259  * NOTE: This function increments the reference counter associated with the
9260  * trace array returned. This makes sure it cannot be freed while in use.
9261  * Use trace_array_put() once the trace array is no longer needed.
9262  * If the trace_array is to be freed, trace_array_destroy() needs to
9263  * be called after the trace_array_put(), or simply let user space delete
9264  * it from the tracefs instances directory. But until the
9265  * trace_array_put() is called, user space can not delete it.
9266  *
9267  */
9268 struct trace_array *trace_array_get_by_name(const char *name)
9269 {
9270 	struct trace_array *tr;
9271 
9272 	mutex_lock(&event_mutex);
9273 	mutex_lock(&trace_types_lock);
9274 
9275 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
9276 		if (tr->name && strcmp(tr->name, name) == 0)
9277 			goto out_unlock;
9278 	}
9279 
9280 	tr = trace_array_create(name);
9281 
9282 	if (IS_ERR(tr))
9283 		tr = NULL;
9284 out_unlock:
9285 	if (tr)
9286 		tr->ref++;
9287 
9288 	mutex_unlock(&trace_types_lock);
9289 	mutex_unlock(&event_mutex);
9290 	return tr;
9291 }
9292 EXPORT_SYMBOL_GPL(trace_array_get_by_name);
9293 
9294 static int __remove_instance(struct trace_array *tr)
9295 {
9296 	int i;
9297 
9298 	/* Reference counter for a newly created trace array = 1. */
9299 	if (tr->ref > 1 || (tr->current_trace && tr->trace_ref))
9300 		return -EBUSY;
9301 
9302 	list_del(&tr->list);
9303 
9304 	/* Disable all the flags that were enabled coming in */
9305 	for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++) {
9306 		if ((1 << i) & ZEROED_TRACE_FLAGS)
9307 			set_tracer_flag(tr, 1 << i, 0);
9308 	}
9309 
9310 	tracing_set_nop(tr);
9311 	clear_ftrace_function_probes(tr);
9312 	event_trace_del_tracer(tr);
9313 	ftrace_clear_pids(tr);
9314 	ftrace_destroy_function_files(tr);
9315 	tracefs_remove(tr->dir);
9316 	free_percpu(tr->last_func_repeats);
9317 	free_trace_buffers(tr);
9318 
9319 	for (i = 0; i < tr->nr_topts; i++) {
9320 		kfree(tr->topts[i].topts);
9321 	}
9322 	kfree(tr->topts);
9323 
9324 	free_cpumask_var(tr->tracing_cpumask);
9325 	kfree(tr->name);
9326 	kfree(tr);
9327 
9328 	return 0;
9329 }
9330 
9331 int trace_array_destroy(struct trace_array *this_tr)
9332 {
9333 	struct trace_array *tr;
9334 	int ret;
9335 
9336 	if (!this_tr)
9337 		return -EINVAL;
9338 
9339 	mutex_lock(&event_mutex);
9340 	mutex_lock(&trace_types_lock);
9341 
9342 	ret = -ENODEV;
9343 
9344 	/* Making sure trace array exists before destroying it. */
9345 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
9346 		if (tr == this_tr) {
9347 			ret = __remove_instance(tr);
9348 			break;
9349 		}
9350 	}
9351 
9352 	mutex_unlock(&trace_types_lock);
9353 	mutex_unlock(&event_mutex);
9354 
9355 	return ret;
9356 }
9357 EXPORT_SYMBOL_GPL(trace_array_destroy);
9358 
9359 static int instance_rmdir(const char *name)
9360 {
9361 	struct trace_array *tr;
9362 	int ret;
9363 
9364 	mutex_lock(&event_mutex);
9365 	mutex_lock(&trace_types_lock);
9366 
9367 	ret = -ENODEV;
9368 	tr = trace_array_find(name);
9369 	if (tr)
9370 		ret = __remove_instance(tr);
9371 
9372 	mutex_unlock(&trace_types_lock);
9373 	mutex_unlock(&event_mutex);
9374 
9375 	return ret;
9376 }
9377 
9378 static __init void create_trace_instances(struct dentry *d_tracer)
9379 {
9380 	struct trace_array *tr;
9381 
9382 	trace_instance_dir = tracefs_create_instance_dir("instances", d_tracer,
9383 							 instance_mkdir,
9384 							 instance_rmdir);
9385 	if (MEM_FAIL(!trace_instance_dir, "Failed to create instances directory\n"))
9386 		return;
9387 
9388 	mutex_lock(&event_mutex);
9389 	mutex_lock(&trace_types_lock);
9390 
9391 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
9392 		if (!tr->name)
9393 			continue;
9394 		if (MEM_FAIL(trace_array_create_dir(tr) < 0,
9395 			     "Failed to create instance directory\n"))
9396 			break;
9397 	}
9398 
9399 	mutex_unlock(&trace_types_lock);
9400 	mutex_unlock(&event_mutex);
9401 }
9402 
9403 static void
9404 init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer)
9405 {
9406 	struct trace_event_file *file;
9407 	int cpu;
9408 
9409 	trace_create_file("available_tracers", 0444, d_tracer,
9410 			tr, &show_traces_fops);
9411 
9412 	trace_create_file("current_tracer", 0644, d_tracer,
9413 			tr, &set_tracer_fops);
9414 
9415 	trace_create_file("tracing_cpumask", 0644, d_tracer,
9416 			  tr, &tracing_cpumask_fops);
9417 
9418 	trace_create_file("trace_options", 0644, d_tracer,
9419 			  tr, &tracing_iter_fops);
9420 
9421 	trace_create_file("trace", 0644, d_tracer,
9422 			  tr, &tracing_fops);
9423 
9424 	trace_create_file("trace_pipe", 0444, d_tracer,
9425 			  tr, &tracing_pipe_fops);
9426 
9427 	trace_create_file("buffer_size_kb", 0644, d_tracer,
9428 			  tr, &tracing_entries_fops);
9429 
9430 	trace_create_file("buffer_total_size_kb", 0444, d_tracer,
9431 			  tr, &tracing_total_entries_fops);
9432 
9433 	trace_create_file("free_buffer", 0200, d_tracer,
9434 			  tr, &tracing_free_buffer_fops);
9435 
9436 	trace_create_file("trace_marker", 0220, d_tracer,
9437 			  tr, &tracing_mark_fops);
9438 
9439 	file = __find_event_file(tr, "ftrace", "print");
9440 	if (file && file->dir)
9441 		trace_create_file("trigger", 0644, file->dir, file,
9442 				  &event_trigger_fops);
9443 	tr->trace_marker_file = file;
9444 
9445 	trace_create_file("trace_marker_raw", 0220, d_tracer,
9446 			  tr, &tracing_mark_raw_fops);
9447 
9448 	trace_create_file("trace_clock", 0644, d_tracer, tr,
9449 			  &trace_clock_fops);
9450 
9451 	trace_create_file("tracing_on", 0644, d_tracer,
9452 			  tr, &rb_simple_fops);
9453 
9454 	trace_create_file("timestamp_mode", 0444, d_tracer, tr,
9455 			  &trace_time_stamp_mode_fops);
9456 
9457 	tr->buffer_percent = 50;
9458 
9459 	trace_create_file("buffer_percent", 0444, d_tracer,
9460 			tr, &buffer_percent_fops);
9461 
9462 	create_trace_options_dir(tr);
9463 
9464 #if defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)
9465 	trace_create_maxlat_file(tr, d_tracer);
9466 #endif
9467 
9468 	if (ftrace_create_function_files(tr, d_tracer))
9469 		MEM_FAIL(1, "Could not allocate function filter files");
9470 
9471 #ifdef CONFIG_TRACER_SNAPSHOT
9472 	trace_create_file("snapshot", 0644, d_tracer,
9473 			  tr, &snapshot_fops);
9474 #endif
9475 
9476 	trace_create_file("error_log", 0644, d_tracer,
9477 			  tr, &tracing_err_log_fops);
9478 
9479 	for_each_tracing_cpu(cpu)
9480 		tracing_init_tracefs_percpu(tr, cpu);
9481 
9482 	ftrace_init_tracefs(tr, d_tracer);
9483 }
9484 
9485 static struct vfsmount *trace_automount(struct dentry *mntpt, void *ingore)
9486 {
9487 	struct vfsmount *mnt;
9488 	struct file_system_type *type;
9489 
9490 	/*
9491 	 * To maintain backward compatibility for tools that mount
9492 	 * debugfs to get to the tracing facility, tracefs is automatically
9493 	 * mounted to the debugfs/tracing directory.
9494 	 */
9495 	type = get_fs_type("tracefs");
9496 	if (!type)
9497 		return NULL;
9498 	mnt = vfs_submount(mntpt, type, "tracefs", NULL);
9499 	put_filesystem(type);
9500 	if (IS_ERR(mnt))
9501 		return NULL;
9502 	mntget(mnt);
9503 
9504 	return mnt;
9505 }
9506 
9507 /**
9508  * tracing_init_dentry - initialize top level trace array
9509  *
9510  * This is called when creating files or directories in the tracing
9511  * directory. It is called via fs_initcall() by any of the boot up code
9512  * and expects to return the dentry of the top level tracing directory.
9513  */
9514 int tracing_init_dentry(void)
9515 {
9516 	struct trace_array *tr = &global_trace;
9517 
9518 	if (security_locked_down(LOCKDOWN_TRACEFS)) {
9519 		pr_warn("Tracing disabled due to lockdown\n");
9520 		return -EPERM;
9521 	}
9522 
9523 	/* The top level trace array uses  NULL as parent */
9524 	if (tr->dir)
9525 		return 0;
9526 
9527 	if (WARN_ON(!tracefs_initialized()))
9528 		return -ENODEV;
9529 
9530 	/*
9531 	 * As there may still be users that expect the tracing
9532 	 * files to exist in debugfs/tracing, we must automount
9533 	 * the tracefs file system there, so older tools still
9534 	 * work with the newer kernel.
9535 	 */
9536 	tr->dir = debugfs_create_automount("tracing", NULL,
9537 					   trace_automount, NULL);
9538 
9539 	return 0;
9540 }
9541 
9542 extern struct trace_eval_map *__start_ftrace_eval_maps[];
9543 extern struct trace_eval_map *__stop_ftrace_eval_maps[];
9544 
9545 static struct workqueue_struct *eval_map_wq __initdata;
9546 static struct work_struct eval_map_work __initdata;
9547 
9548 static void __init eval_map_work_func(struct work_struct *work)
9549 {
9550 	int len;
9551 
9552 	len = __stop_ftrace_eval_maps - __start_ftrace_eval_maps;
9553 	trace_insert_eval_map(NULL, __start_ftrace_eval_maps, len);
9554 }
9555 
9556 static int __init trace_eval_init(void)
9557 {
9558 	INIT_WORK(&eval_map_work, eval_map_work_func);
9559 
9560 	eval_map_wq = alloc_workqueue("eval_map_wq", WQ_UNBOUND, 0);
9561 	if (!eval_map_wq) {
9562 		pr_err("Unable to allocate eval_map_wq\n");
9563 		/* Do work here */
9564 		eval_map_work_func(&eval_map_work);
9565 		return -ENOMEM;
9566 	}
9567 
9568 	queue_work(eval_map_wq, &eval_map_work);
9569 	return 0;
9570 }
9571 
9572 static int __init trace_eval_sync(void)
9573 {
9574 	/* Make sure the eval map updates are finished */
9575 	if (eval_map_wq)
9576 		destroy_workqueue(eval_map_wq);
9577 	return 0;
9578 }
9579 
9580 late_initcall_sync(trace_eval_sync);
9581 
9582 
9583 #ifdef CONFIG_MODULES
9584 static void trace_module_add_evals(struct module *mod)
9585 {
9586 	if (!mod->num_trace_evals)
9587 		return;
9588 
9589 	/*
9590 	 * Modules with bad taint do not have events created, do
9591 	 * not bother with enums either.
9592 	 */
9593 	if (trace_module_has_bad_taint(mod))
9594 		return;
9595 
9596 	trace_insert_eval_map(mod, mod->trace_evals, mod->num_trace_evals);
9597 }
9598 
9599 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
9600 static void trace_module_remove_evals(struct module *mod)
9601 {
9602 	union trace_eval_map_item *map;
9603 	union trace_eval_map_item **last = &trace_eval_maps;
9604 
9605 	if (!mod->num_trace_evals)
9606 		return;
9607 
9608 	mutex_lock(&trace_eval_mutex);
9609 
9610 	map = trace_eval_maps;
9611 
9612 	while (map) {
9613 		if (map->head.mod == mod)
9614 			break;
9615 		map = trace_eval_jmp_to_tail(map);
9616 		last = &map->tail.next;
9617 		map = map->tail.next;
9618 	}
9619 	if (!map)
9620 		goto out;
9621 
9622 	*last = trace_eval_jmp_to_tail(map)->tail.next;
9623 	kfree(map);
9624  out:
9625 	mutex_unlock(&trace_eval_mutex);
9626 }
9627 #else
9628 static inline void trace_module_remove_evals(struct module *mod) { }
9629 #endif /* CONFIG_TRACE_EVAL_MAP_FILE */
9630 
9631 static int trace_module_notify(struct notifier_block *self,
9632 			       unsigned long val, void *data)
9633 {
9634 	struct module *mod = data;
9635 
9636 	switch (val) {
9637 	case MODULE_STATE_COMING:
9638 		trace_module_add_evals(mod);
9639 		break;
9640 	case MODULE_STATE_GOING:
9641 		trace_module_remove_evals(mod);
9642 		break;
9643 	}
9644 
9645 	return NOTIFY_OK;
9646 }
9647 
9648 static struct notifier_block trace_module_nb = {
9649 	.notifier_call = trace_module_notify,
9650 	.priority = 0,
9651 };
9652 #endif /* CONFIG_MODULES */
9653 
9654 static __init int tracer_init_tracefs(void)
9655 {
9656 	int ret;
9657 
9658 	trace_access_lock_init();
9659 
9660 	ret = tracing_init_dentry();
9661 	if (ret)
9662 		return 0;
9663 
9664 	event_trace_init();
9665 
9666 	init_tracer_tracefs(&global_trace, NULL);
9667 	ftrace_init_tracefs_toplevel(&global_trace, NULL);
9668 
9669 	trace_create_file("tracing_thresh", 0644, NULL,
9670 			&global_trace, &tracing_thresh_fops);
9671 
9672 	trace_create_file("README", 0444, NULL,
9673 			NULL, &tracing_readme_fops);
9674 
9675 	trace_create_file("saved_cmdlines", 0444, NULL,
9676 			NULL, &tracing_saved_cmdlines_fops);
9677 
9678 	trace_create_file("saved_cmdlines_size", 0644, NULL,
9679 			  NULL, &tracing_saved_cmdlines_size_fops);
9680 
9681 	trace_create_file("saved_tgids", 0444, NULL,
9682 			NULL, &tracing_saved_tgids_fops);
9683 
9684 	trace_eval_init();
9685 
9686 	trace_create_eval_file(NULL);
9687 
9688 #ifdef CONFIG_MODULES
9689 	register_module_notifier(&trace_module_nb);
9690 #endif
9691 
9692 #ifdef CONFIG_DYNAMIC_FTRACE
9693 	trace_create_file("dyn_ftrace_total_info", 0444, NULL,
9694 			NULL, &tracing_dyn_info_fops);
9695 #endif
9696 
9697 	create_trace_instances(NULL);
9698 
9699 	update_tracer_options(&global_trace);
9700 
9701 	return 0;
9702 }
9703 
9704 fs_initcall(tracer_init_tracefs);
9705 
9706 static int trace_panic_handler(struct notifier_block *this,
9707 			       unsigned long event, void *unused)
9708 {
9709 	if (ftrace_dump_on_oops)
9710 		ftrace_dump(ftrace_dump_on_oops);
9711 	return NOTIFY_OK;
9712 }
9713 
9714 static struct notifier_block trace_panic_notifier = {
9715 	.notifier_call  = trace_panic_handler,
9716 	.next           = NULL,
9717 	.priority       = 150   /* priority: INT_MAX >= x >= 0 */
9718 };
9719 
9720 static int trace_die_handler(struct notifier_block *self,
9721 			     unsigned long val,
9722 			     void *data)
9723 {
9724 	switch (val) {
9725 	case DIE_OOPS:
9726 		if (ftrace_dump_on_oops)
9727 			ftrace_dump(ftrace_dump_on_oops);
9728 		break;
9729 	default:
9730 		break;
9731 	}
9732 	return NOTIFY_OK;
9733 }
9734 
9735 static struct notifier_block trace_die_notifier = {
9736 	.notifier_call = trace_die_handler,
9737 	.priority = 200
9738 };
9739 
9740 /*
9741  * printk is set to max of 1024, we really don't need it that big.
9742  * Nothing should be printing 1000 characters anyway.
9743  */
9744 #define TRACE_MAX_PRINT		1000
9745 
9746 /*
9747  * Define here KERN_TRACE so that we have one place to modify
9748  * it if we decide to change what log level the ftrace dump
9749  * should be at.
9750  */
9751 #define KERN_TRACE		KERN_EMERG
9752 
9753 void
9754 trace_printk_seq(struct trace_seq *s)
9755 {
9756 	/* Probably should print a warning here. */
9757 	if (s->seq.len >= TRACE_MAX_PRINT)
9758 		s->seq.len = TRACE_MAX_PRINT;
9759 
9760 	/*
9761 	 * More paranoid code. Although the buffer size is set to
9762 	 * PAGE_SIZE, and TRACE_MAX_PRINT is 1000, this is just
9763 	 * an extra layer of protection.
9764 	 */
9765 	if (WARN_ON_ONCE(s->seq.len >= s->seq.size))
9766 		s->seq.len = s->seq.size - 1;
9767 
9768 	/* should be zero ended, but we are paranoid. */
9769 	s->buffer[s->seq.len] = 0;
9770 
9771 	printk(KERN_TRACE "%s", s->buffer);
9772 
9773 	trace_seq_init(s);
9774 }
9775 
9776 void trace_init_global_iter(struct trace_iterator *iter)
9777 {
9778 	iter->tr = &global_trace;
9779 	iter->trace = iter->tr->current_trace;
9780 	iter->cpu_file = RING_BUFFER_ALL_CPUS;
9781 	iter->array_buffer = &global_trace.array_buffer;
9782 
9783 	if (iter->trace && iter->trace->open)
9784 		iter->trace->open(iter);
9785 
9786 	/* Annotate start of buffers if we had overruns */
9787 	if (ring_buffer_overruns(iter->array_buffer->buffer))
9788 		iter->iter_flags |= TRACE_FILE_ANNOTATE;
9789 
9790 	/* Output in nanoseconds only if we are using a clock in nanoseconds. */
9791 	if (trace_clocks[iter->tr->clock_id].in_ns)
9792 		iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
9793 }
9794 
9795 void ftrace_dump(enum ftrace_dump_mode oops_dump_mode)
9796 {
9797 	/* use static because iter can be a bit big for the stack */
9798 	static struct trace_iterator iter;
9799 	static atomic_t dump_running;
9800 	struct trace_array *tr = &global_trace;
9801 	unsigned int old_userobj;
9802 	unsigned long flags;
9803 	int cnt = 0, cpu;
9804 
9805 	/* Only allow one dump user at a time. */
9806 	if (atomic_inc_return(&dump_running) != 1) {
9807 		atomic_dec(&dump_running);
9808 		return;
9809 	}
9810 
9811 	/*
9812 	 * Always turn off tracing when we dump.
9813 	 * We don't need to show trace output of what happens
9814 	 * between multiple crashes.
9815 	 *
9816 	 * If the user does a sysrq-z, then they can re-enable
9817 	 * tracing with echo 1 > tracing_on.
9818 	 */
9819 	tracing_off();
9820 
9821 	local_irq_save(flags);
9822 
9823 	/* Simulate the iterator */
9824 	trace_init_global_iter(&iter);
9825 	/* Can not use kmalloc for iter.temp and iter.fmt */
9826 	iter.temp = static_temp_buf;
9827 	iter.temp_size = STATIC_TEMP_BUF_SIZE;
9828 	iter.fmt = static_fmt_buf;
9829 	iter.fmt_size = STATIC_FMT_BUF_SIZE;
9830 
9831 	for_each_tracing_cpu(cpu) {
9832 		atomic_inc(&per_cpu_ptr(iter.array_buffer->data, cpu)->disabled);
9833 	}
9834 
9835 	old_userobj = tr->trace_flags & TRACE_ITER_SYM_USEROBJ;
9836 
9837 	/* don't look at user memory in panic mode */
9838 	tr->trace_flags &= ~TRACE_ITER_SYM_USEROBJ;
9839 
9840 	switch (oops_dump_mode) {
9841 	case DUMP_ALL:
9842 		iter.cpu_file = RING_BUFFER_ALL_CPUS;
9843 		break;
9844 	case DUMP_ORIG:
9845 		iter.cpu_file = raw_smp_processor_id();
9846 		break;
9847 	case DUMP_NONE:
9848 		goto out_enable;
9849 	default:
9850 		printk(KERN_TRACE "Bad dumping mode, switching to all CPUs dump\n");
9851 		iter.cpu_file = RING_BUFFER_ALL_CPUS;
9852 	}
9853 
9854 	printk(KERN_TRACE "Dumping ftrace buffer:\n");
9855 
9856 	/* Did function tracer already get disabled? */
9857 	if (ftrace_is_dead()) {
9858 		printk("# WARNING: FUNCTION TRACING IS CORRUPTED\n");
9859 		printk("#          MAY BE MISSING FUNCTION EVENTS\n");
9860 	}
9861 
9862 	/*
9863 	 * We need to stop all tracing on all CPUS to read
9864 	 * the next buffer. This is a bit expensive, but is
9865 	 * not done often. We fill all what we can read,
9866 	 * and then release the locks again.
9867 	 */
9868 
9869 	while (!trace_empty(&iter)) {
9870 
9871 		if (!cnt)
9872 			printk(KERN_TRACE "---------------------------------\n");
9873 
9874 		cnt++;
9875 
9876 		trace_iterator_reset(&iter);
9877 		iter.iter_flags |= TRACE_FILE_LAT_FMT;
9878 
9879 		if (trace_find_next_entry_inc(&iter) != NULL) {
9880 			int ret;
9881 
9882 			ret = print_trace_line(&iter);
9883 			if (ret != TRACE_TYPE_NO_CONSUME)
9884 				trace_consume(&iter);
9885 		}
9886 		touch_nmi_watchdog();
9887 
9888 		trace_printk_seq(&iter.seq);
9889 	}
9890 
9891 	if (!cnt)
9892 		printk(KERN_TRACE "   (ftrace buffer empty)\n");
9893 	else
9894 		printk(KERN_TRACE "---------------------------------\n");
9895 
9896  out_enable:
9897 	tr->trace_flags |= old_userobj;
9898 
9899 	for_each_tracing_cpu(cpu) {
9900 		atomic_dec(&per_cpu_ptr(iter.array_buffer->data, cpu)->disabled);
9901 	}
9902 	atomic_dec(&dump_running);
9903 	local_irq_restore(flags);
9904 }
9905 EXPORT_SYMBOL_GPL(ftrace_dump);
9906 
9907 #define WRITE_BUFSIZE  4096
9908 
9909 ssize_t trace_parse_run_command(struct file *file, const char __user *buffer,
9910 				size_t count, loff_t *ppos,
9911 				int (*createfn)(const char *))
9912 {
9913 	char *kbuf, *buf, *tmp;
9914 	int ret = 0;
9915 	size_t done = 0;
9916 	size_t size;
9917 
9918 	kbuf = kmalloc(WRITE_BUFSIZE, GFP_KERNEL);
9919 	if (!kbuf)
9920 		return -ENOMEM;
9921 
9922 	while (done < count) {
9923 		size = count - done;
9924 
9925 		if (size >= WRITE_BUFSIZE)
9926 			size = WRITE_BUFSIZE - 1;
9927 
9928 		if (copy_from_user(kbuf, buffer + done, size)) {
9929 			ret = -EFAULT;
9930 			goto out;
9931 		}
9932 		kbuf[size] = '\0';
9933 		buf = kbuf;
9934 		do {
9935 			tmp = strchr(buf, '\n');
9936 			if (tmp) {
9937 				*tmp = '\0';
9938 				size = tmp - buf + 1;
9939 			} else {
9940 				size = strlen(buf);
9941 				if (done + size < count) {
9942 					if (buf != kbuf)
9943 						break;
9944 					/* This can accept WRITE_BUFSIZE - 2 ('\n' + '\0') */
9945 					pr_warn("Line length is too long: Should be less than %d\n",
9946 						WRITE_BUFSIZE - 2);
9947 					ret = -EINVAL;
9948 					goto out;
9949 				}
9950 			}
9951 			done += size;
9952 
9953 			/* Remove comments */
9954 			tmp = strchr(buf, '#');
9955 
9956 			if (tmp)
9957 				*tmp = '\0';
9958 
9959 			ret = createfn(buf);
9960 			if (ret)
9961 				goto out;
9962 			buf += size;
9963 
9964 		} while (done < count);
9965 	}
9966 	ret = done;
9967 
9968 out:
9969 	kfree(kbuf);
9970 
9971 	return ret;
9972 }
9973 
9974 __init static int tracer_alloc_buffers(void)
9975 {
9976 	int ring_buf_size;
9977 	int ret = -ENOMEM;
9978 
9979 
9980 	if (security_locked_down(LOCKDOWN_TRACEFS)) {
9981 		pr_warn("Tracing disabled due to lockdown\n");
9982 		return -EPERM;
9983 	}
9984 
9985 	/*
9986 	 * Make sure we don't accidentally add more trace options
9987 	 * than we have bits for.
9988 	 */
9989 	BUILD_BUG_ON(TRACE_ITER_LAST_BIT > TRACE_FLAGS_MAX_SIZE);
9990 
9991 	if (!alloc_cpumask_var(&tracing_buffer_mask, GFP_KERNEL))
9992 		goto out;
9993 
9994 	if (!alloc_cpumask_var(&global_trace.tracing_cpumask, GFP_KERNEL))
9995 		goto out_free_buffer_mask;
9996 
9997 	/* Only allocate trace_printk buffers if a trace_printk exists */
9998 	if (&__stop___trace_bprintk_fmt != &__start___trace_bprintk_fmt)
9999 		/* Must be called before global_trace.buffer is allocated */
10000 		trace_printk_init_buffers();
10001 
10002 	/* To save memory, keep the ring buffer size to its minimum */
10003 	if (ring_buffer_expanded)
10004 		ring_buf_size = trace_buf_size;
10005 	else
10006 		ring_buf_size = 1;
10007 
10008 	cpumask_copy(tracing_buffer_mask, cpu_possible_mask);
10009 	cpumask_copy(global_trace.tracing_cpumask, cpu_all_mask);
10010 
10011 	raw_spin_lock_init(&global_trace.start_lock);
10012 
10013 	/*
10014 	 * The prepare callbacks allocates some memory for the ring buffer. We
10015 	 * don't free the buffer if the CPU goes down. If we were to free
10016 	 * the buffer, then the user would lose any trace that was in the
10017 	 * buffer. The memory will be removed once the "instance" is removed.
10018 	 */
10019 	ret = cpuhp_setup_state_multi(CPUHP_TRACE_RB_PREPARE,
10020 				      "trace/RB:preapre", trace_rb_cpu_prepare,
10021 				      NULL);
10022 	if (ret < 0)
10023 		goto out_free_cpumask;
10024 	/* Used for event triggers */
10025 	ret = -ENOMEM;
10026 	temp_buffer = ring_buffer_alloc(PAGE_SIZE, RB_FL_OVERWRITE);
10027 	if (!temp_buffer)
10028 		goto out_rm_hp_state;
10029 
10030 	if (trace_create_savedcmd() < 0)
10031 		goto out_free_temp_buffer;
10032 
10033 	/* TODO: make the number of buffers hot pluggable with CPUS */
10034 	if (allocate_trace_buffers(&global_trace, ring_buf_size) < 0) {
10035 		MEM_FAIL(1, "tracer: failed to allocate ring buffer!\n");
10036 		goto out_free_savedcmd;
10037 	}
10038 
10039 	if (global_trace.buffer_disabled)
10040 		tracing_off();
10041 
10042 	if (trace_boot_clock) {
10043 		ret = tracing_set_clock(&global_trace, trace_boot_clock);
10044 		if (ret < 0)
10045 			pr_warn("Trace clock %s not defined, going back to default\n",
10046 				trace_boot_clock);
10047 	}
10048 
10049 	/*
10050 	 * register_tracer() might reference current_trace, so it
10051 	 * needs to be set before we register anything. This is
10052 	 * just a bootstrap of current_trace anyway.
10053 	 */
10054 	global_trace.current_trace = &nop_trace;
10055 
10056 	global_trace.max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
10057 
10058 	ftrace_init_global_array_ops(&global_trace);
10059 
10060 	init_trace_flags_index(&global_trace);
10061 
10062 	register_tracer(&nop_trace);
10063 
10064 	/* Function tracing may start here (via kernel command line) */
10065 	init_function_trace();
10066 
10067 	/* All seems OK, enable tracing */
10068 	tracing_disabled = 0;
10069 
10070 	atomic_notifier_chain_register(&panic_notifier_list,
10071 				       &trace_panic_notifier);
10072 
10073 	register_die_notifier(&trace_die_notifier);
10074 
10075 	global_trace.flags = TRACE_ARRAY_FL_GLOBAL;
10076 
10077 	INIT_LIST_HEAD(&global_trace.systems);
10078 	INIT_LIST_HEAD(&global_trace.events);
10079 	INIT_LIST_HEAD(&global_trace.hist_vars);
10080 	INIT_LIST_HEAD(&global_trace.err_log);
10081 	list_add(&global_trace.list, &ftrace_trace_arrays);
10082 
10083 	apply_trace_boot_options();
10084 
10085 	register_snapshot_cmd();
10086 
10087 	test_can_verify();
10088 
10089 	return 0;
10090 
10091 out_free_savedcmd:
10092 	free_saved_cmdlines_buffer(savedcmd);
10093 out_free_temp_buffer:
10094 	ring_buffer_free(temp_buffer);
10095 out_rm_hp_state:
10096 	cpuhp_remove_multi_state(CPUHP_TRACE_RB_PREPARE);
10097 out_free_cpumask:
10098 	free_cpumask_var(global_trace.tracing_cpumask);
10099 out_free_buffer_mask:
10100 	free_cpumask_var(tracing_buffer_mask);
10101 out:
10102 	return ret;
10103 }
10104 
10105 void __init early_trace_init(void)
10106 {
10107 	if (tracepoint_printk) {
10108 		tracepoint_print_iter =
10109 			kzalloc(sizeof(*tracepoint_print_iter), GFP_KERNEL);
10110 		if (MEM_FAIL(!tracepoint_print_iter,
10111 			     "Failed to allocate trace iterator\n"))
10112 			tracepoint_printk = 0;
10113 		else
10114 			static_key_enable(&tracepoint_printk_key.key);
10115 	}
10116 	tracer_alloc_buffers();
10117 }
10118 
10119 void __init trace_init(void)
10120 {
10121 	trace_event_init();
10122 }
10123 
10124 __init static void clear_boot_tracer(void)
10125 {
10126 	/*
10127 	 * The default tracer at boot buffer is an init section.
10128 	 * This function is called in lateinit. If we did not
10129 	 * find the boot tracer, then clear it out, to prevent
10130 	 * later registration from accessing the buffer that is
10131 	 * about to be freed.
10132 	 */
10133 	if (!default_bootup_tracer)
10134 		return;
10135 
10136 	printk(KERN_INFO "ftrace bootup tracer '%s' not registered.\n",
10137 	       default_bootup_tracer);
10138 	default_bootup_tracer = NULL;
10139 }
10140 
10141 #ifdef CONFIG_HAVE_UNSTABLE_SCHED_CLOCK
10142 __init static void tracing_set_default_clock(void)
10143 {
10144 	/* sched_clock_stable() is determined in late_initcall */
10145 	if (!trace_boot_clock && !sched_clock_stable()) {
10146 		if (security_locked_down(LOCKDOWN_TRACEFS)) {
10147 			pr_warn("Can not set tracing clock due to lockdown\n");
10148 			return;
10149 		}
10150 
10151 		printk(KERN_WARNING
10152 		       "Unstable clock detected, switching default tracing clock to \"global\"\n"
10153 		       "If you want to keep using the local clock, then add:\n"
10154 		       "  \"trace_clock=local\"\n"
10155 		       "on the kernel command line\n");
10156 		tracing_set_clock(&global_trace, "global");
10157 	}
10158 }
10159 #else
10160 static inline void tracing_set_default_clock(void) { }
10161 #endif
10162 
10163 __init static int late_trace_init(void)
10164 {
10165 	if (tracepoint_printk && tracepoint_printk_stop_on_boot) {
10166 		static_key_disable(&tracepoint_printk_key.key);
10167 		tracepoint_printk = 0;
10168 	}
10169 
10170 	tracing_set_default_clock();
10171 	clear_boot_tracer();
10172 	return 0;
10173 }
10174 
10175 late_initcall_sync(late_trace_init);
10176