xref: /linux/kernel/trace/trace.c (revision 4b2b7b1e8730d51542c62ba75dabeb52243dfb49)
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * ring buffer based function tracer
4  *
5  * Copyright (C) 2007-2012 Steven Rostedt <srostedt@redhat.com>
6  * Copyright (C) 2008 Ingo Molnar <mingo@redhat.com>
7  *
8  * Originally taken from the RT patch by:
9  *    Arnaldo Carvalho de Melo <acme@redhat.com>
10  *
11  * Based on code from the latency_tracer, that is:
12  *  Copyright (C) 2004-2006 Ingo Molnar
13  *  Copyright (C) 2004 Nadia Yvette Chambers
14  */
15 #include <linux/ring_buffer.h>
16 #include <generated/utsrelease.h>
17 #include <linux/stacktrace.h>
18 #include <linux/writeback.h>
19 #include <linux/kallsyms.h>
20 #include <linux/security.h>
21 #include <linux/seq_file.h>
22 #include <linux/irqflags.h>
23 #include <linux/debugfs.h>
24 #include <linux/tracefs.h>
25 #include <linux/pagemap.h>
26 #include <linux/hardirq.h>
27 #include <linux/linkage.h>
28 #include <linux/uaccess.h>
29 #include <linux/vmalloc.h>
30 #include <linux/ftrace.h>
31 #include <linux/module.h>
32 #include <linux/percpu.h>
33 #include <linux/splice.h>
34 #include <linux/kdebug.h>
35 #include <linux/string.h>
36 #include <linux/mount.h>
37 #include <linux/rwsem.h>
38 #include <linux/slab.h>
39 #include <linux/ctype.h>
40 #include <linux/init.h>
41 #include <linux/panic_notifier.h>
42 #include <linux/poll.h>
43 #include <linux/nmi.h>
44 #include <linux/fs.h>
45 #include <linux/trace.h>
46 #include <linux/sched/clock.h>
47 #include <linux/sched/rt.h>
48 #include <linux/fsnotify.h>
49 #include <linux/irq_work.h>
50 #include <linux/workqueue.h>
51 
52 #include <asm/setup.h> /* COMMAND_LINE_SIZE */
53 
54 #include "trace.h"
55 #include "trace_output.h"
56 
57 #ifdef CONFIG_FTRACE_STARTUP_TEST
58 /*
59  * We need to change this state when a selftest is running.
60  * A selftest will lurk into the ring-buffer to count the
61  * entries inserted during the selftest although some concurrent
62  * insertions into the ring-buffer such as trace_printk could occurred
63  * at the same time, giving false positive or negative results.
64  */
65 static bool __read_mostly tracing_selftest_running;
66 
67 /*
68  * If boot-time tracing including tracers/events via kernel cmdline
69  * is running, we do not want to run SELFTEST.
70  */
71 bool __read_mostly tracing_selftest_disabled;
72 
73 void __init disable_tracing_selftest(const char *reason)
74 {
75 	if (!tracing_selftest_disabled) {
76 		tracing_selftest_disabled = true;
77 		pr_info("Ftrace startup test is disabled due to %s\n", reason);
78 	}
79 }
80 #else
81 #define tracing_selftest_running	0
82 #define tracing_selftest_disabled	0
83 #endif
84 
85 /* Pipe tracepoints to printk */
86 static struct trace_iterator *tracepoint_print_iter;
87 int tracepoint_printk;
88 static bool tracepoint_printk_stop_on_boot __initdata;
89 static DEFINE_STATIC_KEY_FALSE(tracepoint_printk_key);
90 
91 /* For tracers that don't implement custom flags */
92 static struct tracer_opt dummy_tracer_opt[] = {
93 	{ }
94 };
95 
96 static int
97 dummy_set_flag(struct trace_array *tr, u32 old_flags, u32 bit, int set)
98 {
99 	return 0;
100 }
101 
102 /*
103  * To prevent the comm cache from being overwritten when no
104  * tracing is active, only save the comm when a trace event
105  * occurred.
106  */
107 static DEFINE_PER_CPU(bool, trace_taskinfo_save);
108 
109 /*
110  * Kill all tracing for good (never come back).
111  * It is initialized to 1 but will turn to zero if the initialization
112  * of the tracer is successful. But that is the only place that sets
113  * this back to zero.
114  */
115 static int tracing_disabled = 1;
116 
117 cpumask_var_t __read_mostly	tracing_buffer_mask;
118 
119 /*
120  * ftrace_dump_on_oops - variable to dump ftrace buffer on oops
121  *
122  * If there is an oops (or kernel panic) and the ftrace_dump_on_oops
123  * is set, then ftrace_dump is called. This will output the contents
124  * of the ftrace buffers to the console.  This is very useful for
125  * capturing traces that lead to crashes and outputing it to a
126  * serial console.
127  *
128  * It is default off, but you can enable it with either specifying
129  * "ftrace_dump_on_oops" in the kernel command line, or setting
130  * /proc/sys/kernel/ftrace_dump_on_oops
131  * Set 1 if you want to dump buffers of all CPUs
132  * Set 2 if you want to dump the buffer of the CPU that triggered oops
133  */
134 
135 enum ftrace_dump_mode ftrace_dump_on_oops;
136 
137 /* When set, tracing will stop when a WARN*() is hit */
138 int __disable_trace_on_warning;
139 
140 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
141 /* Map of enums to their values, for "eval_map" file */
142 struct trace_eval_map_head {
143 	struct module			*mod;
144 	unsigned long			length;
145 };
146 
147 union trace_eval_map_item;
148 
149 struct trace_eval_map_tail {
150 	/*
151 	 * "end" is first and points to NULL as it must be different
152 	 * than "mod" or "eval_string"
153 	 */
154 	union trace_eval_map_item	*next;
155 	const char			*end;	/* points to NULL */
156 };
157 
158 static DEFINE_MUTEX(trace_eval_mutex);
159 
160 /*
161  * The trace_eval_maps are saved in an array with two extra elements,
162  * one at the beginning, and one at the end. The beginning item contains
163  * the count of the saved maps (head.length), and the module they
164  * belong to if not built in (head.mod). The ending item contains a
165  * pointer to the next array of saved eval_map items.
166  */
167 union trace_eval_map_item {
168 	struct trace_eval_map		map;
169 	struct trace_eval_map_head	head;
170 	struct trace_eval_map_tail	tail;
171 };
172 
173 static union trace_eval_map_item *trace_eval_maps;
174 #endif /* CONFIG_TRACE_EVAL_MAP_FILE */
175 
176 int tracing_set_tracer(struct trace_array *tr, const char *buf);
177 static void ftrace_trace_userstack(struct trace_array *tr,
178 				   struct trace_buffer *buffer,
179 				   unsigned int trace_ctx);
180 
181 #define MAX_TRACER_SIZE		100
182 static char bootup_tracer_buf[MAX_TRACER_SIZE] __initdata;
183 static char *default_bootup_tracer;
184 
185 static bool allocate_snapshot;
186 static bool snapshot_at_boot;
187 
188 static char boot_instance_info[COMMAND_LINE_SIZE] __initdata;
189 static int boot_instance_index;
190 
191 static char boot_snapshot_info[COMMAND_LINE_SIZE] __initdata;
192 static int boot_snapshot_index;
193 
194 static int __init set_cmdline_ftrace(char *str)
195 {
196 	strscpy(bootup_tracer_buf, str, MAX_TRACER_SIZE);
197 	default_bootup_tracer = bootup_tracer_buf;
198 	/* We are using ftrace early, expand it */
199 	trace_set_ring_buffer_expanded(NULL);
200 	return 1;
201 }
202 __setup("ftrace=", set_cmdline_ftrace);
203 
204 static int __init set_ftrace_dump_on_oops(char *str)
205 {
206 	if (*str++ != '=' || !*str || !strcmp("1", str)) {
207 		ftrace_dump_on_oops = DUMP_ALL;
208 		return 1;
209 	}
210 
211 	if (!strcmp("orig_cpu", str) || !strcmp("2", str)) {
212 		ftrace_dump_on_oops = DUMP_ORIG;
213                 return 1;
214         }
215 
216         return 0;
217 }
218 __setup("ftrace_dump_on_oops", set_ftrace_dump_on_oops);
219 
220 static int __init stop_trace_on_warning(char *str)
221 {
222 	if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
223 		__disable_trace_on_warning = 1;
224 	return 1;
225 }
226 __setup("traceoff_on_warning", stop_trace_on_warning);
227 
228 static int __init boot_alloc_snapshot(char *str)
229 {
230 	char *slot = boot_snapshot_info + boot_snapshot_index;
231 	int left = sizeof(boot_snapshot_info) - boot_snapshot_index;
232 	int ret;
233 
234 	if (str[0] == '=') {
235 		str++;
236 		if (strlen(str) >= left)
237 			return -1;
238 
239 		ret = snprintf(slot, left, "%s\t", str);
240 		boot_snapshot_index += ret;
241 	} else {
242 		allocate_snapshot = true;
243 		/* We also need the main ring buffer expanded */
244 		trace_set_ring_buffer_expanded(NULL);
245 	}
246 	return 1;
247 }
248 __setup("alloc_snapshot", boot_alloc_snapshot);
249 
250 
251 static int __init boot_snapshot(char *str)
252 {
253 	snapshot_at_boot = true;
254 	boot_alloc_snapshot(str);
255 	return 1;
256 }
257 __setup("ftrace_boot_snapshot", boot_snapshot);
258 
259 
260 static int __init boot_instance(char *str)
261 {
262 	char *slot = boot_instance_info + boot_instance_index;
263 	int left = sizeof(boot_instance_info) - boot_instance_index;
264 	int ret;
265 
266 	if (strlen(str) >= left)
267 		return -1;
268 
269 	ret = snprintf(slot, left, "%s\t", str);
270 	boot_instance_index += ret;
271 
272 	return 1;
273 }
274 __setup("trace_instance=", boot_instance);
275 
276 
277 static char trace_boot_options_buf[MAX_TRACER_SIZE] __initdata;
278 
279 static int __init set_trace_boot_options(char *str)
280 {
281 	strscpy(trace_boot_options_buf, str, MAX_TRACER_SIZE);
282 	return 1;
283 }
284 __setup("trace_options=", set_trace_boot_options);
285 
286 static char trace_boot_clock_buf[MAX_TRACER_SIZE] __initdata;
287 static char *trace_boot_clock __initdata;
288 
289 static int __init set_trace_boot_clock(char *str)
290 {
291 	strscpy(trace_boot_clock_buf, str, MAX_TRACER_SIZE);
292 	trace_boot_clock = trace_boot_clock_buf;
293 	return 1;
294 }
295 __setup("trace_clock=", set_trace_boot_clock);
296 
297 static int __init set_tracepoint_printk(char *str)
298 {
299 	/* Ignore the "tp_printk_stop_on_boot" param */
300 	if (*str == '_')
301 		return 0;
302 
303 	if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
304 		tracepoint_printk = 1;
305 	return 1;
306 }
307 __setup("tp_printk", set_tracepoint_printk);
308 
309 static int __init set_tracepoint_printk_stop(char *str)
310 {
311 	tracepoint_printk_stop_on_boot = true;
312 	return 1;
313 }
314 __setup("tp_printk_stop_on_boot", set_tracepoint_printk_stop);
315 
316 unsigned long long ns2usecs(u64 nsec)
317 {
318 	nsec += 500;
319 	do_div(nsec, 1000);
320 	return nsec;
321 }
322 
323 static void
324 trace_process_export(struct trace_export *export,
325 	       struct ring_buffer_event *event, int flag)
326 {
327 	struct trace_entry *entry;
328 	unsigned int size = 0;
329 
330 	if (export->flags & flag) {
331 		entry = ring_buffer_event_data(event);
332 		size = ring_buffer_event_length(event);
333 		export->write(export, entry, size);
334 	}
335 }
336 
337 static DEFINE_MUTEX(ftrace_export_lock);
338 
339 static struct trace_export __rcu *ftrace_exports_list __read_mostly;
340 
341 static DEFINE_STATIC_KEY_FALSE(trace_function_exports_enabled);
342 static DEFINE_STATIC_KEY_FALSE(trace_event_exports_enabled);
343 static DEFINE_STATIC_KEY_FALSE(trace_marker_exports_enabled);
344 
345 static inline void ftrace_exports_enable(struct trace_export *export)
346 {
347 	if (export->flags & TRACE_EXPORT_FUNCTION)
348 		static_branch_inc(&trace_function_exports_enabled);
349 
350 	if (export->flags & TRACE_EXPORT_EVENT)
351 		static_branch_inc(&trace_event_exports_enabled);
352 
353 	if (export->flags & TRACE_EXPORT_MARKER)
354 		static_branch_inc(&trace_marker_exports_enabled);
355 }
356 
357 static inline void ftrace_exports_disable(struct trace_export *export)
358 {
359 	if (export->flags & TRACE_EXPORT_FUNCTION)
360 		static_branch_dec(&trace_function_exports_enabled);
361 
362 	if (export->flags & TRACE_EXPORT_EVENT)
363 		static_branch_dec(&trace_event_exports_enabled);
364 
365 	if (export->flags & TRACE_EXPORT_MARKER)
366 		static_branch_dec(&trace_marker_exports_enabled);
367 }
368 
369 static void ftrace_exports(struct ring_buffer_event *event, int flag)
370 {
371 	struct trace_export *export;
372 
373 	preempt_disable_notrace();
374 
375 	export = rcu_dereference_raw_check(ftrace_exports_list);
376 	while (export) {
377 		trace_process_export(export, event, flag);
378 		export = rcu_dereference_raw_check(export->next);
379 	}
380 
381 	preempt_enable_notrace();
382 }
383 
384 static inline void
385 add_trace_export(struct trace_export **list, struct trace_export *export)
386 {
387 	rcu_assign_pointer(export->next, *list);
388 	/*
389 	 * We are entering export into the list but another
390 	 * CPU might be walking that list. We need to make sure
391 	 * the export->next pointer is valid before another CPU sees
392 	 * the export pointer included into the list.
393 	 */
394 	rcu_assign_pointer(*list, export);
395 }
396 
397 static inline int
398 rm_trace_export(struct trace_export **list, struct trace_export *export)
399 {
400 	struct trace_export **p;
401 
402 	for (p = list; *p != NULL; p = &(*p)->next)
403 		if (*p == export)
404 			break;
405 
406 	if (*p != export)
407 		return -1;
408 
409 	rcu_assign_pointer(*p, (*p)->next);
410 
411 	return 0;
412 }
413 
414 static inline void
415 add_ftrace_export(struct trace_export **list, struct trace_export *export)
416 {
417 	ftrace_exports_enable(export);
418 
419 	add_trace_export(list, export);
420 }
421 
422 static inline int
423 rm_ftrace_export(struct trace_export **list, struct trace_export *export)
424 {
425 	int ret;
426 
427 	ret = rm_trace_export(list, export);
428 	ftrace_exports_disable(export);
429 
430 	return ret;
431 }
432 
433 int register_ftrace_export(struct trace_export *export)
434 {
435 	if (WARN_ON_ONCE(!export->write))
436 		return -1;
437 
438 	mutex_lock(&ftrace_export_lock);
439 
440 	add_ftrace_export(&ftrace_exports_list, export);
441 
442 	mutex_unlock(&ftrace_export_lock);
443 
444 	return 0;
445 }
446 EXPORT_SYMBOL_GPL(register_ftrace_export);
447 
448 int unregister_ftrace_export(struct trace_export *export)
449 {
450 	int ret;
451 
452 	mutex_lock(&ftrace_export_lock);
453 
454 	ret = rm_ftrace_export(&ftrace_exports_list, export);
455 
456 	mutex_unlock(&ftrace_export_lock);
457 
458 	return ret;
459 }
460 EXPORT_SYMBOL_GPL(unregister_ftrace_export);
461 
462 /* trace_flags holds trace_options default values */
463 #define TRACE_DEFAULT_FLAGS						\
464 	(FUNCTION_DEFAULT_FLAGS |					\
465 	 TRACE_ITER_PRINT_PARENT | TRACE_ITER_PRINTK |			\
466 	 TRACE_ITER_ANNOTATE | TRACE_ITER_CONTEXT_INFO |		\
467 	 TRACE_ITER_RECORD_CMD | TRACE_ITER_OVERWRITE |			\
468 	 TRACE_ITER_IRQ_INFO | TRACE_ITER_MARKERS |			\
469 	 TRACE_ITER_HASH_PTR)
470 
471 /* trace_options that are only supported by global_trace */
472 #define TOP_LEVEL_TRACE_FLAGS (TRACE_ITER_PRINTK |			\
473 	       TRACE_ITER_PRINTK_MSGONLY | TRACE_ITER_RECORD_CMD)
474 
475 /* trace_flags that are default zero for instances */
476 #define ZEROED_TRACE_FLAGS \
477 	(TRACE_ITER_EVENT_FORK | TRACE_ITER_FUNC_FORK)
478 
479 /*
480  * The global_trace is the descriptor that holds the top-level tracing
481  * buffers for the live tracing.
482  */
483 static struct trace_array global_trace = {
484 	.trace_flags = TRACE_DEFAULT_FLAGS,
485 };
486 
487 void trace_set_ring_buffer_expanded(struct trace_array *tr)
488 {
489 	if (!tr)
490 		tr = &global_trace;
491 	tr->ring_buffer_expanded = true;
492 }
493 
494 LIST_HEAD(ftrace_trace_arrays);
495 
496 int trace_array_get(struct trace_array *this_tr)
497 {
498 	struct trace_array *tr;
499 	int ret = -ENODEV;
500 
501 	mutex_lock(&trace_types_lock);
502 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
503 		if (tr == this_tr) {
504 			tr->ref++;
505 			ret = 0;
506 			break;
507 		}
508 	}
509 	mutex_unlock(&trace_types_lock);
510 
511 	return ret;
512 }
513 
514 static void __trace_array_put(struct trace_array *this_tr)
515 {
516 	WARN_ON(!this_tr->ref);
517 	this_tr->ref--;
518 }
519 
520 /**
521  * trace_array_put - Decrement the reference counter for this trace array.
522  * @this_tr : pointer to the trace array
523  *
524  * NOTE: Use this when we no longer need the trace array returned by
525  * trace_array_get_by_name(). This ensures the trace array can be later
526  * destroyed.
527  *
528  */
529 void trace_array_put(struct trace_array *this_tr)
530 {
531 	if (!this_tr)
532 		return;
533 
534 	mutex_lock(&trace_types_lock);
535 	__trace_array_put(this_tr);
536 	mutex_unlock(&trace_types_lock);
537 }
538 EXPORT_SYMBOL_GPL(trace_array_put);
539 
540 int tracing_check_open_get_tr(struct trace_array *tr)
541 {
542 	int ret;
543 
544 	ret = security_locked_down(LOCKDOWN_TRACEFS);
545 	if (ret)
546 		return ret;
547 
548 	if (tracing_disabled)
549 		return -ENODEV;
550 
551 	if (tr && trace_array_get(tr) < 0)
552 		return -ENODEV;
553 
554 	return 0;
555 }
556 
557 int call_filter_check_discard(struct trace_event_call *call, void *rec,
558 			      struct trace_buffer *buffer,
559 			      struct ring_buffer_event *event)
560 {
561 	if (unlikely(call->flags & TRACE_EVENT_FL_FILTERED) &&
562 	    !filter_match_preds(call->filter, rec)) {
563 		__trace_event_discard_commit(buffer, event);
564 		return 1;
565 	}
566 
567 	return 0;
568 }
569 
570 /**
571  * trace_find_filtered_pid - check if a pid exists in a filtered_pid list
572  * @filtered_pids: The list of pids to check
573  * @search_pid: The PID to find in @filtered_pids
574  *
575  * Returns true if @search_pid is found in @filtered_pids, and false otherwise.
576  */
577 bool
578 trace_find_filtered_pid(struct trace_pid_list *filtered_pids, pid_t search_pid)
579 {
580 	return trace_pid_list_is_set(filtered_pids, search_pid);
581 }
582 
583 /**
584  * trace_ignore_this_task - should a task be ignored for tracing
585  * @filtered_pids: The list of pids to check
586  * @filtered_no_pids: The list of pids not to be traced
587  * @task: The task that should be ignored if not filtered
588  *
589  * Checks if @task should be traced or not from @filtered_pids.
590  * Returns true if @task should *NOT* be traced.
591  * Returns false if @task should be traced.
592  */
593 bool
594 trace_ignore_this_task(struct trace_pid_list *filtered_pids,
595 		       struct trace_pid_list *filtered_no_pids,
596 		       struct task_struct *task)
597 {
598 	/*
599 	 * If filtered_no_pids is not empty, and the task's pid is listed
600 	 * in filtered_no_pids, then return true.
601 	 * Otherwise, if filtered_pids is empty, that means we can
602 	 * trace all tasks. If it has content, then only trace pids
603 	 * within filtered_pids.
604 	 */
605 
606 	return (filtered_pids &&
607 		!trace_find_filtered_pid(filtered_pids, task->pid)) ||
608 		(filtered_no_pids &&
609 		 trace_find_filtered_pid(filtered_no_pids, task->pid));
610 }
611 
612 /**
613  * trace_filter_add_remove_task - Add or remove a task from a pid_list
614  * @pid_list: The list to modify
615  * @self: The current task for fork or NULL for exit
616  * @task: The task to add or remove
617  *
618  * If adding a task, if @self is defined, the task is only added if @self
619  * is also included in @pid_list. This happens on fork and tasks should
620  * only be added when the parent is listed. If @self is NULL, then the
621  * @task pid will be removed from the list, which would happen on exit
622  * of a task.
623  */
624 void trace_filter_add_remove_task(struct trace_pid_list *pid_list,
625 				  struct task_struct *self,
626 				  struct task_struct *task)
627 {
628 	if (!pid_list)
629 		return;
630 
631 	/* For forks, we only add if the forking task is listed */
632 	if (self) {
633 		if (!trace_find_filtered_pid(pid_list, self->pid))
634 			return;
635 	}
636 
637 	/* "self" is set for forks, and NULL for exits */
638 	if (self)
639 		trace_pid_list_set(pid_list, task->pid);
640 	else
641 		trace_pid_list_clear(pid_list, task->pid);
642 }
643 
644 /**
645  * trace_pid_next - Used for seq_file to get to the next pid of a pid_list
646  * @pid_list: The pid list to show
647  * @v: The last pid that was shown (+1 the actual pid to let zero be displayed)
648  * @pos: The position of the file
649  *
650  * This is used by the seq_file "next" operation to iterate the pids
651  * listed in a trace_pid_list structure.
652  *
653  * Returns the pid+1 as we want to display pid of zero, but NULL would
654  * stop the iteration.
655  */
656 void *trace_pid_next(struct trace_pid_list *pid_list, void *v, loff_t *pos)
657 {
658 	long pid = (unsigned long)v;
659 	unsigned int next;
660 
661 	(*pos)++;
662 
663 	/* pid already is +1 of the actual previous bit */
664 	if (trace_pid_list_next(pid_list, pid, &next) < 0)
665 		return NULL;
666 
667 	pid = next;
668 
669 	/* Return pid + 1 to allow zero to be represented */
670 	return (void *)(pid + 1);
671 }
672 
673 /**
674  * trace_pid_start - Used for seq_file to start reading pid lists
675  * @pid_list: The pid list to show
676  * @pos: The position of the file
677  *
678  * This is used by seq_file "start" operation to start the iteration
679  * of listing pids.
680  *
681  * Returns the pid+1 as we want to display pid of zero, but NULL would
682  * stop the iteration.
683  */
684 void *trace_pid_start(struct trace_pid_list *pid_list, loff_t *pos)
685 {
686 	unsigned long pid;
687 	unsigned int first;
688 	loff_t l = 0;
689 
690 	if (trace_pid_list_first(pid_list, &first) < 0)
691 		return NULL;
692 
693 	pid = first;
694 
695 	/* Return pid + 1 so that zero can be the exit value */
696 	for (pid++; pid && l < *pos;
697 	     pid = (unsigned long)trace_pid_next(pid_list, (void *)pid, &l))
698 		;
699 	return (void *)pid;
700 }
701 
702 /**
703  * trace_pid_show - show the current pid in seq_file processing
704  * @m: The seq_file structure to write into
705  * @v: A void pointer of the pid (+1) value to display
706  *
707  * Can be directly used by seq_file operations to display the current
708  * pid value.
709  */
710 int trace_pid_show(struct seq_file *m, void *v)
711 {
712 	unsigned long pid = (unsigned long)v - 1;
713 
714 	seq_printf(m, "%lu\n", pid);
715 	return 0;
716 }
717 
718 /* 128 should be much more than enough */
719 #define PID_BUF_SIZE		127
720 
721 int trace_pid_write(struct trace_pid_list *filtered_pids,
722 		    struct trace_pid_list **new_pid_list,
723 		    const char __user *ubuf, size_t cnt)
724 {
725 	struct trace_pid_list *pid_list;
726 	struct trace_parser parser;
727 	unsigned long val;
728 	int nr_pids = 0;
729 	ssize_t read = 0;
730 	ssize_t ret;
731 	loff_t pos;
732 	pid_t pid;
733 
734 	if (trace_parser_get_init(&parser, PID_BUF_SIZE + 1))
735 		return -ENOMEM;
736 
737 	/*
738 	 * Always recreate a new array. The write is an all or nothing
739 	 * operation. Always create a new array when adding new pids by
740 	 * the user. If the operation fails, then the current list is
741 	 * not modified.
742 	 */
743 	pid_list = trace_pid_list_alloc();
744 	if (!pid_list) {
745 		trace_parser_put(&parser);
746 		return -ENOMEM;
747 	}
748 
749 	if (filtered_pids) {
750 		/* copy the current bits to the new max */
751 		ret = trace_pid_list_first(filtered_pids, &pid);
752 		while (!ret) {
753 			trace_pid_list_set(pid_list, pid);
754 			ret = trace_pid_list_next(filtered_pids, pid + 1, &pid);
755 			nr_pids++;
756 		}
757 	}
758 
759 	ret = 0;
760 	while (cnt > 0) {
761 
762 		pos = 0;
763 
764 		ret = trace_get_user(&parser, ubuf, cnt, &pos);
765 		if (ret < 0)
766 			break;
767 
768 		read += ret;
769 		ubuf += ret;
770 		cnt -= ret;
771 
772 		if (!trace_parser_loaded(&parser))
773 			break;
774 
775 		ret = -EINVAL;
776 		if (kstrtoul(parser.buffer, 0, &val))
777 			break;
778 
779 		pid = (pid_t)val;
780 
781 		if (trace_pid_list_set(pid_list, pid) < 0) {
782 			ret = -1;
783 			break;
784 		}
785 		nr_pids++;
786 
787 		trace_parser_clear(&parser);
788 		ret = 0;
789 	}
790 	trace_parser_put(&parser);
791 
792 	if (ret < 0) {
793 		trace_pid_list_free(pid_list);
794 		return ret;
795 	}
796 
797 	if (!nr_pids) {
798 		/* Cleared the list of pids */
799 		trace_pid_list_free(pid_list);
800 		pid_list = NULL;
801 	}
802 
803 	*new_pid_list = pid_list;
804 
805 	return read;
806 }
807 
808 static u64 buffer_ftrace_now(struct array_buffer *buf, int cpu)
809 {
810 	u64 ts;
811 
812 	/* Early boot up does not have a buffer yet */
813 	if (!buf->buffer)
814 		return trace_clock_local();
815 
816 	ts = ring_buffer_time_stamp(buf->buffer);
817 	ring_buffer_normalize_time_stamp(buf->buffer, cpu, &ts);
818 
819 	return ts;
820 }
821 
822 u64 ftrace_now(int cpu)
823 {
824 	return buffer_ftrace_now(&global_trace.array_buffer, cpu);
825 }
826 
827 /**
828  * tracing_is_enabled - Show if global_trace has been enabled
829  *
830  * Shows if the global trace has been enabled or not. It uses the
831  * mirror flag "buffer_disabled" to be used in fast paths such as for
832  * the irqsoff tracer. But it may be inaccurate due to races. If you
833  * need to know the accurate state, use tracing_is_on() which is a little
834  * slower, but accurate.
835  */
836 int tracing_is_enabled(void)
837 {
838 	/*
839 	 * For quick access (irqsoff uses this in fast path), just
840 	 * return the mirror variable of the state of the ring buffer.
841 	 * It's a little racy, but we don't really care.
842 	 */
843 	smp_rmb();
844 	return !global_trace.buffer_disabled;
845 }
846 
847 /*
848  * trace_buf_size is the size in bytes that is allocated
849  * for a buffer. Note, the number of bytes is always rounded
850  * to page size.
851  *
852  * This number is purposely set to a low number of 16384.
853  * If the dump on oops happens, it will be much appreciated
854  * to not have to wait for all that output. Anyway this can be
855  * boot time and run time configurable.
856  */
857 #define TRACE_BUF_SIZE_DEFAULT	1441792UL /* 16384 * 88 (sizeof(entry)) */
858 
859 static unsigned long		trace_buf_size = TRACE_BUF_SIZE_DEFAULT;
860 
861 /* trace_types holds a link list of available tracers. */
862 static struct tracer		*trace_types __read_mostly;
863 
864 /*
865  * trace_types_lock is used to protect the trace_types list.
866  */
867 DEFINE_MUTEX(trace_types_lock);
868 
869 /*
870  * serialize the access of the ring buffer
871  *
872  * ring buffer serializes readers, but it is low level protection.
873  * The validity of the events (which returns by ring_buffer_peek() ..etc)
874  * are not protected by ring buffer.
875  *
876  * The content of events may become garbage if we allow other process consumes
877  * these events concurrently:
878  *   A) the page of the consumed events may become a normal page
879  *      (not reader page) in ring buffer, and this page will be rewritten
880  *      by events producer.
881  *   B) The page of the consumed events may become a page for splice_read,
882  *      and this page will be returned to system.
883  *
884  * These primitives allow multi process access to different cpu ring buffer
885  * concurrently.
886  *
887  * These primitives don't distinguish read-only and read-consume access.
888  * Multi read-only access are also serialized.
889  */
890 
891 #ifdef CONFIG_SMP
892 static DECLARE_RWSEM(all_cpu_access_lock);
893 static DEFINE_PER_CPU(struct mutex, cpu_access_lock);
894 
895 static inline void trace_access_lock(int cpu)
896 {
897 	if (cpu == RING_BUFFER_ALL_CPUS) {
898 		/* gain it for accessing the whole ring buffer. */
899 		down_write(&all_cpu_access_lock);
900 	} else {
901 		/* gain it for accessing a cpu ring buffer. */
902 
903 		/* Firstly block other trace_access_lock(RING_BUFFER_ALL_CPUS). */
904 		down_read(&all_cpu_access_lock);
905 
906 		/* Secondly block other access to this @cpu ring buffer. */
907 		mutex_lock(&per_cpu(cpu_access_lock, cpu));
908 	}
909 }
910 
911 static inline void trace_access_unlock(int cpu)
912 {
913 	if (cpu == RING_BUFFER_ALL_CPUS) {
914 		up_write(&all_cpu_access_lock);
915 	} else {
916 		mutex_unlock(&per_cpu(cpu_access_lock, cpu));
917 		up_read(&all_cpu_access_lock);
918 	}
919 }
920 
921 static inline void trace_access_lock_init(void)
922 {
923 	int cpu;
924 
925 	for_each_possible_cpu(cpu)
926 		mutex_init(&per_cpu(cpu_access_lock, cpu));
927 }
928 
929 #else
930 
931 static DEFINE_MUTEX(access_lock);
932 
933 static inline void trace_access_lock(int cpu)
934 {
935 	(void)cpu;
936 	mutex_lock(&access_lock);
937 }
938 
939 static inline void trace_access_unlock(int cpu)
940 {
941 	(void)cpu;
942 	mutex_unlock(&access_lock);
943 }
944 
945 static inline void trace_access_lock_init(void)
946 {
947 }
948 
949 #endif
950 
951 #ifdef CONFIG_STACKTRACE
952 static void __ftrace_trace_stack(struct trace_buffer *buffer,
953 				 unsigned int trace_ctx,
954 				 int skip, struct pt_regs *regs);
955 static inline void ftrace_trace_stack(struct trace_array *tr,
956 				      struct trace_buffer *buffer,
957 				      unsigned int trace_ctx,
958 				      int skip, struct pt_regs *regs);
959 
960 #else
961 static inline void __ftrace_trace_stack(struct trace_buffer *buffer,
962 					unsigned int trace_ctx,
963 					int skip, struct pt_regs *regs)
964 {
965 }
966 static inline void ftrace_trace_stack(struct trace_array *tr,
967 				      struct trace_buffer *buffer,
968 				      unsigned long trace_ctx,
969 				      int skip, struct pt_regs *regs)
970 {
971 }
972 
973 #endif
974 
975 static __always_inline void
976 trace_event_setup(struct ring_buffer_event *event,
977 		  int type, unsigned int trace_ctx)
978 {
979 	struct trace_entry *ent = ring_buffer_event_data(event);
980 
981 	tracing_generic_entry_update(ent, type, trace_ctx);
982 }
983 
984 static __always_inline struct ring_buffer_event *
985 __trace_buffer_lock_reserve(struct trace_buffer *buffer,
986 			  int type,
987 			  unsigned long len,
988 			  unsigned int trace_ctx)
989 {
990 	struct ring_buffer_event *event;
991 
992 	event = ring_buffer_lock_reserve(buffer, len);
993 	if (event != NULL)
994 		trace_event_setup(event, type, trace_ctx);
995 
996 	return event;
997 }
998 
999 void tracer_tracing_on(struct trace_array *tr)
1000 {
1001 	if (tr->array_buffer.buffer)
1002 		ring_buffer_record_on(tr->array_buffer.buffer);
1003 	/*
1004 	 * This flag is looked at when buffers haven't been allocated
1005 	 * yet, or by some tracers (like irqsoff), that just want to
1006 	 * know if the ring buffer has been disabled, but it can handle
1007 	 * races of where it gets disabled but we still do a record.
1008 	 * As the check is in the fast path of the tracers, it is more
1009 	 * important to be fast than accurate.
1010 	 */
1011 	tr->buffer_disabled = 0;
1012 	/* Make the flag seen by readers */
1013 	smp_wmb();
1014 }
1015 
1016 /**
1017  * tracing_on - enable tracing buffers
1018  *
1019  * This function enables tracing buffers that may have been
1020  * disabled with tracing_off.
1021  */
1022 void tracing_on(void)
1023 {
1024 	tracer_tracing_on(&global_trace);
1025 }
1026 EXPORT_SYMBOL_GPL(tracing_on);
1027 
1028 
1029 static __always_inline void
1030 __buffer_unlock_commit(struct trace_buffer *buffer, struct ring_buffer_event *event)
1031 {
1032 	__this_cpu_write(trace_taskinfo_save, true);
1033 
1034 	/* If this is the temp buffer, we need to commit fully */
1035 	if (this_cpu_read(trace_buffered_event) == event) {
1036 		/* Length is in event->array[0] */
1037 		ring_buffer_write(buffer, event->array[0], &event->array[1]);
1038 		/* Release the temp buffer */
1039 		this_cpu_dec(trace_buffered_event_cnt);
1040 		/* ring_buffer_unlock_commit() enables preemption */
1041 		preempt_enable_notrace();
1042 	} else
1043 		ring_buffer_unlock_commit(buffer);
1044 }
1045 
1046 int __trace_array_puts(struct trace_array *tr, unsigned long ip,
1047 		       const char *str, int size)
1048 {
1049 	struct ring_buffer_event *event;
1050 	struct trace_buffer *buffer;
1051 	struct print_entry *entry;
1052 	unsigned int trace_ctx;
1053 	int alloc;
1054 
1055 	if (!(tr->trace_flags & TRACE_ITER_PRINTK))
1056 		return 0;
1057 
1058 	if (unlikely(tracing_selftest_running && tr == &global_trace))
1059 		return 0;
1060 
1061 	if (unlikely(tracing_disabled))
1062 		return 0;
1063 
1064 	alloc = sizeof(*entry) + size + 2; /* possible \n added */
1065 
1066 	trace_ctx = tracing_gen_ctx();
1067 	buffer = tr->array_buffer.buffer;
1068 	ring_buffer_nest_start(buffer);
1069 	event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, alloc,
1070 					    trace_ctx);
1071 	if (!event) {
1072 		size = 0;
1073 		goto out;
1074 	}
1075 
1076 	entry = ring_buffer_event_data(event);
1077 	entry->ip = ip;
1078 
1079 	memcpy(&entry->buf, str, size);
1080 
1081 	/* Add a newline if necessary */
1082 	if (entry->buf[size - 1] != '\n') {
1083 		entry->buf[size] = '\n';
1084 		entry->buf[size + 1] = '\0';
1085 	} else
1086 		entry->buf[size] = '\0';
1087 
1088 	__buffer_unlock_commit(buffer, event);
1089 	ftrace_trace_stack(tr, buffer, trace_ctx, 4, NULL);
1090  out:
1091 	ring_buffer_nest_end(buffer);
1092 	return size;
1093 }
1094 EXPORT_SYMBOL_GPL(__trace_array_puts);
1095 
1096 /**
1097  * __trace_puts - write a constant string into the trace buffer.
1098  * @ip:	   The address of the caller
1099  * @str:   The constant string to write
1100  * @size:  The size of the string.
1101  */
1102 int __trace_puts(unsigned long ip, const char *str, int size)
1103 {
1104 	return __trace_array_puts(&global_trace, ip, str, size);
1105 }
1106 EXPORT_SYMBOL_GPL(__trace_puts);
1107 
1108 /**
1109  * __trace_bputs - write the pointer to a constant string into trace buffer
1110  * @ip:	   The address of the caller
1111  * @str:   The constant string to write to the buffer to
1112  */
1113 int __trace_bputs(unsigned long ip, const char *str)
1114 {
1115 	struct ring_buffer_event *event;
1116 	struct trace_buffer *buffer;
1117 	struct bputs_entry *entry;
1118 	unsigned int trace_ctx;
1119 	int size = sizeof(struct bputs_entry);
1120 	int ret = 0;
1121 
1122 	if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
1123 		return 0;
1124 
1125 	if (unlikely(tracing_selftest_running || tracing_disabled))
1126 		return 0;
1127 
1128 	trace_ctx = tracing_gen_ctx();
1129 	buffer = global_trace.array_buffer.buffer;
1130 
1131 	ring_buffer_nest_start(buffer);
1132 	event = __trace_buffer_lock_reserve(buffer, TRACE_BPUTS, size,
1133 					    trace_ctx);
1134 	if (!event)
1135 		goto out;
1136 
1137 	entry = ring_buffer_event_data(event);
1138 	entry->ip			= ip;
1139 	entry->str			= str;
1140 
1141 	__buffer_unlock_commit(buffer, event);
1142 	ftrace_trace_stack(&global_trace, buffer, trace_ctx, 4, NULL);
1143 
1144 	ret = 1;
1145  out:
1146 	ring_buffer_nest_end(buffer);
1147 	return ret;
1148 }
1149 EXPORT_SYMBOL_GPL(__trace_bputs);
1150 
1151 #ifdef CONFIG_TRACER_SNAPSHOT
1152 static void tracing_snapshot_instance_cond(struct trace_array *tr,
1153 					   void *cond_data)
1154 {
1155 	struct tracer *tracer = tr->current_trace;
1156 	unsigned long flags;
1157 
1158 	if (in_nmi()) {
1159 		trace_array_puts(tr, "*** SNAPSHOT CALLED FROM NMI CONTEXT ***\n");
1160 		trace_array_puts(tr, "*** snapshot is being ignored        ***\n");
1161 		return;
1162 	}
1163 
1164 	if (!tr->allocated_snapshot) {
1165 		trace_array_puts(tr, "*** SNAPSHOT NOT ALLOCATED ***\n");
1166 		trace_array_puts(tr, "*** stopping trace here!   ***\n");
1167 		tracer_tracing_off(tr);
1168 		return;
1169 	}
1170 
1171 	/* Note, snapshot can not be used when the tracer uses it */
1172 	if (tracer->use_max_tr) {
1173 		trace_array_puts(tr, "*** LATENCY TRACER ACTIVE ***\n");
1174 		trace_array_puts(tr, "*** Can not use snapshot (sorry) ***\n");
1175 		return;
1176 	}
1177 
1178 	local_irq_save(flags);
1179 	update_max_tr(tr, current, smp_processor_id(), cond_data);
1180 	local_irq_restore(flags);
1181 }
1182 
1183 void tracing_snapshot_instance(struct trace_array *tr)
1184 {
1185 	tracing_snapshot_instance_cond(tr, NULL);
1186 }
1187 
1188 /**
1189  * tracing_snapshot - take a snapshot of the current buffer.
1190  *
1191  * This causes a swap between the snapshot buffer and the current live
1192  * tracing buffer. You can use this to take snapshots of the live
1193  * trace when some condition is triggered, but continue to trace.
1194  *
1195  * Note, make sure to allocate the snapshot with either
1196  * a tracing_snapshot_alloc(), or by doing it manually
1197  * with: echo 1 > /sys/kernel/tracing/snapshot
1198  *
1199  * If the snapshot buffer is not allocated, it will stop tracing.
1200  * Basically making a permanent snapshot.
1201  */
1202 void tracing_snapshot(void)
1203 {
1204 	struct trace_array *tr = &global_trace;
1205 
1206 	tracing_snapshot_instance(tr);
1207 }
1208 EXPORT_SYMBOL_GPL(tracing_snapshot);
1209 
1210 /**
1211  * tracing_snapshot_cond - conditionally take a snapshot of the current buffer.
1212  * @tr:		The tracing instance to snapshot
1213  * @cond_data:	The data to be tested conditionally, and possibly saved
1214  *
1215  * This is the same as tracing_snapshot() except that the snapshot is
1216  * conditional - the snapshot will only happen if the
1217  * cond_snapshot.update() implementation receiving the cond_data
1218  * returns true, which means that the trace array's cond_snapshot
1219  * update() operation used the cond_data to determine whether the
1220  * snapshot should be taken, and if it was, presumably saved it along
1221  * with the snapshot.
1222  */
1223 void tracing_snapshot_cond(struct trace_array *tr, void *cond_data)
1224 {
1225 	tracing_snapshot_instance_cond(tr, cond_data);
1226 }
1227 EXPORT_SYMBOL_GPL(tracing_snapshot_cond);
1228 
1229 /**
1230  * tracing_cond_snapshot_data - get the user data associated with a snapshot
1231  * @tr:		The tracing instance
1232  *
1233  * When the user enables a conditional snapshot using
1234  * tracing_snapshot_cond_enable(), the user-defined cond_data is saved
1235  * with the snapshot.  This accessor is used to retrieve it.
1236  *
1237  * Should not be called from cond_snapshot.update(), since it takes
1238  * the tr->max_lock lock, which the code calling
1239  * cond_snapshot.update() has already done.
1240  *
1241  * Returns the cond_data associated with the trace array's snapshot.
1242  */
1243 void *tracing_cond_snapshot_data(struct trace_array *tr)
1244 {
1245 	void *cond_data = NULL;
1246 
1247 	local_irq_disable();
1248 	arch_spin_lock(&tr->max_lock);
1249 
1250 	if (tr->cond_snapshot)
1251 		cond_data = tr->cond_snapshot->cond_data;
1252 
1253 	arch_spin_unlock(&tr->max_lock);
1254 	local_irq_enable();
1255 
1256 	return cond_data;
1257 }
1258 EXPORT_SYMBOL_GPL(tracing_cond_snapshot_data);
1259 
1260 static int resize_buffer_duplicate_size(struct array_buffer *trace_buf,
1261 					struct array_buffer *size_buf, int cpu_id);
1262 static void set_buffer_entries(struct array_buffer *buf, unsigned long val);
1263 
1264 int tracing_alloc_snapshot_instance(struct trace_array *tr)
1265 {
1266 	int order;
1267 	int ret;
1268 
1269 	if (!tr->allocated_snapshot) {
1270 
1271 		/* Make the snapshot buffer have the same order as main buffer */
1272 		order = ring_buffer_subbuf_order_get(tr->array_buffer.buffer);
1273 		ret = ring_buffer_subbuf_order_set(tr->max_buffer.buffer, order);
1274 		if (ret < 0)
1275 			return ret;
1276 
1277 		/* allocate spare buffer */
1278 		ret = resize_buffer_duplicate_size(&tr->max_buffer,
1279 				   &tr->array_buffer, RING_BUFFER_ALL_CPUS);
1280 		if (ret < 0)
1281 			return ret;
1282 
1283 		tr->allocated_snapshot = true;
1284 	}
1285 
1286 	return 0;
1287 }
1288 
1289 static void free_snapshot(struct trace_array *tr)
1290 {
1291 	/*
1292 	 * We don't free the ring buffer. instead, resize it because
1293 	 * The max_tr ring buffer has some state (e.g. ring->clock) and
1294 	 * we want preserve it.
1295 	 */
1296 	ring_buffer_subbuf_order_set(tr->max_buffer.buffer, 0);
1297 	ring_buffer_resize(tr->max_buffer.buffer, 1, RING_BUFFER_ALL_CPUS);
1298 	set_buffer_entries(&tr->max_buffer, 1);
1299 	tracing_reset_online_cpus(&tr->max_buffer);
1300 	tr->allocated_snapshot = false;
1301 }
1302 
1303 /**
1304  * tracing_alloc_snapshot - allocate snapshot buffer.
1305  *
1306  * This only allocates the snapshot buffer if it isn't already
1307  * allocated - it doesn't also take a snapshot.
1308  *
1309  * This is meant to be used in cases where the snapshot buffer needs
1310  * to be set up for events that can't sleep but need to be able to
1311  * trigger a snapshot.
1312  */
1313 int tracing_alloc_snapshot(void)
1314 {
1315 	struct trace_array *tr = &global_trace;
1316 	int ret;
1317 
1318 	ret = tracing_alloc_snapshot_instance(tr);
1319 	WARN_ON(ret < 0);
1320 
1321 	return ret;
1322 }
1323 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
1324 
1325 /**
1326  * tracing_snapshot_alloc - allocate and take a snapshot of the current buffer.
1327  *
1328  * This is similar to tracing_snapshot(), but it will allocate the
1329  * snapshot buffer if it isn't already allocated. Use this only
1330  * where it is safe to sleep, as the allocation may sleep.
1331  *
1332  * This causes a swap between the snapshot buffer and the current live
1333  * tracing buffer. You can use this to take snapshots of the live
1334  * trace when some condition is triggered, but continue to trace.
1335  */
1336 void tracing_snapshot_alloc(void)
1337 {
1338 	int ret;
1339 
1340 	ret = tracing_alloc_snapshot();
1341 	if (ret < 0)
1342 		return;
1343 
1344 	tracing_snapshot();
1345 }
1346 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
1347 
1348 /**
1349  * tracing_snapshot_cond_enable - enable conditional snapshot for an instance
1350  * @tr:		The tracing instance
1351  * @cond_data:	User data to associate with the snapshot
1352  * @update:	Implementation of the cond_snapshot update function
1353  *
1354  * Check whether the conditional snapshot for the given instance has
1355  * already been enabled, or if the current tracer is already using a
1356  * snapshot; if so, return -EBUSY, else create a cond_snapshot and
1357  * save the cond_data and update function inside.
1358  *
1359  * Returns 0 if successful, error otherwise.
1360  */
1361 int tracing_snapshot_cond_enable(struct trace_array *tr, void *cond_data,
1362 				 cond_update_fn_t update)
1363 {
1364 	struct cond_snapshot *cond_snapshot;
1365 	int ret = 0;
1366 
1367 	cond_snapshot = kzalloc(sizeof(*cond_snapshot), GFP_KERNEL);
1368 	if (!cond_snapshot)
1369 		return -ENOMEM;
1370 
1371 	cond_snapshot->cond_data = cond_data;
1372 	cond_snapshot->update = update;
1373 
1374 	mutex_lock(&trace_types_lock);
1375 
1376 	ret = tracing_alloc_snapshot_instance(tr);
1377 	if (ret)
1378 		goto fail_unlock;
1379 
1380 	if (tr->current_trace->use_max_tr) {
1381 		ret = -EBUSY;
1382 		goto fail_unlock;
1383 	}
1384 
1385 	/*
1386 	 * The cond_snapshot can only change to NULL without the
1387 	 * trace_types_lock. We don't care if we race with it going
1388 	 * to NULL, but we want to make sure that it's not set to
1389 	 * something other than NULL when we get here, which we can
1390 	 * do safely with only holding the trace_types_lock and not
1391 	 * having to take the max_lock.
1392 	 */
1393 	if (tr->cond_snapshot) {
1394 		ret = -EBUSY;
1395 		goto fail_unlock;
1396 	}
1397 
1398 	local_irq_disable();
1399 	arch_spin_lock(&tr->max_lock);
1400 	tr->cond_snapshot = cond_snapshot;
1401 	arch_spin_unlock(&tr->max_lock);
1402 	local_irq_enable();
1403 
1404 	mutex_unlock(&trace_types_lock);
1405 
1406 	return ret;
1407 
1408  fail_unlock:
1409 	mutex_unlock(&trace_types_lock);
1410 	kfree(cond_snapshot);
1411 	return ret;
1412 }
1413 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_enable);
1414 
1415 /**
1416  * tracing_snapshot_cond_disable - disable conditional snapshot for an instance
1417  * @tr:		The tracing instance
1418  *
1419  * Check whether the conditional snapshot for the given instance is
1420  * enabled; if so, free the cond_snapshot associated with it,
1421  * otherwise return -EINVAL.
1422  *
1423  * Returns 0 if successful, error otherwise.
1424  */
1425 int tracing_snapshot_cond_disable(struct trace_array *tr)
1426 {
1427 	int ret = 0;
1428 
1429 	local_irq_disable();
1430 	arch_spin_lock(&tr->max_lock);
1431 
1432 	if (!tr->cond_snapshot)
1433 		ret = -EINVAL;
1434 	else {
1435 		kfree(tr->cond_snapshot);
1436 		tr->cond_snapshot = NULL;
1437 	}
1438 
1439 	arch_spin_unlock(&tr->max_lock);
1440 	local_irq_enable();
1441 
1442 	return ret;
1443 }
1444 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_disable);
1445 #else
1446 void tracing_snapshot(void)
1447 {
1448 	WARN_ONCE(1, "Snapshot feature not enabled, but internal snapshot used");
1449 }
1450 EXPORT_SYMBOL_GPL(tracing_snapshot);
1451 void tracing_snapshot_cond(struct trace_array *tr, void *cond_data)
1452 {
1453 	WARN_ONCE(1, "Snapshot feature not enabled, but internal conditional snapshot used");
1454 }
1455 EXPORT_SYMBOL_GPL(tracing_snapshot_cond);
1456 int tracing_alloc_snapshot(void)
1457 {
1458 	WARN_ONCE(1, "Snapshot feature not enabled, but snapshot allocation used");
1459 	return -ENODEV;
1460 }
1461 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
1462 void tracing_snapshot_alloc(void)
1463 {
1464 	/* Give warning */
1465 	tracing_snapshot();
1466 }
1467 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
1468 void *tracing_cond_snapshot_data(struct trace_array *tr)
1469 {
1470 	return NULL;
1471 }
1472 EXPORT_SYMBOL_GPL(tracing_cond_snapshot_data);
1473 int tracing_snapshot_cond_enable(struct trace_array *tr, void *cond_data, cond_update_fn_t update)
1474 {
1475 	return -ENODEV;
1476 }
1477 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_enable);
1478 int tracing_snapshot_cond_disable(struct trace_array *tr)
1479 {
1480 	return false;
1481 }
1482 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_disable);
1483 #define free_snapshot(tr)	do { } while (0)
1484 #endif /* CONFIG_TRACER_SNAPSHOT */
1485 
1486 void tracer_tracing_off(struct trace_array *tr)
1487 {
1488 	if (tr->array_buffer.buffer)
1489 		ring_buffer_record_off(tr->array_buffer.buffer);
1490 	/*
1491 	 * This flag is looked at when buffers haven't been allocated
1492 	 * yet, or by some tracers (like irqsoff), that just want to
1493 	 * know if the ring buffer has been disabled, but it can handle
1494 	 * races of where it gets disabled but we still do a record.
1495 	 * As the check is in the fast path of the tracers, it is more
1496 	 * important to be fast than accurate.
1497 	 */
1498 	tr->buffer_disabled = 1;
1499 	/* Make the flag seen by readers */
1500 	smp_wmb();
1501 }
1502 
1503 /**
1504  * tracing_off - turn off tracing buffers
1505  *
1506  * This function stops the tracing buffers from recording data.
1507  * It does not disable any overhead the tracers themselves may
1508  * be causing. This function simply causes all recording to
1509  * the ring buffers to fail.
1510  */
1511 void tracing_off(void)
1512 {
1513 	tracer_tracing_off(&global_trace);
1514 }
1515 EXPORT_SYMBOL_GPL(tracing_off);
1516 
1517 void disable_trace_on_warning(void)
1518 {
1519 	if (__disable_trace_on_warning) {
1520 		trace_array_printk_buf(global_trace.array_buffer.buffer, _THIS_IP_,
1521 			"Disabling tracing due to warning\n");
1522 		tracing_off();
1523 	}
1524 }
1525 
1526 /**
1527  * tracer_tracing_is_on - show real state of ring buffer enabled
1528  * @tr : the trace array to know if ring buffer is enabled
1529  *
1530  * Shows real state of the ring buffer if it is enabled or not.
1531  */
1532 bool tracer_tracing_is_on(struct trace_array *tr)
1533 {
1534 	if (tr->array_buffer.buffer)
1535 		return ring_buffer_record_is_on(tr->array_buffer.buffer);
1536 	return !tr->buffer_disabled;
1537 }
1538 
1539 /**
1540  * tracing_is_on - show state of ring buffers enabled
1541  */
1542 int tracing_is_on(void)
1543 {
1544 	return tracer_tracing_is_on(&global_trace);
1545 }
1546 EXPORT_SYMBOL_GPL(tracing_is_on);
1547 
1548 static int __init set_buf_size(char *str)
1549 {
1550 	unsigned long buf_size;
1551 
1552 	if (!str)
1553 		return 0;
1554 	buf_size = memparse(str, &str);
1555 	/*
1556 	 * nr_entries can not be zero and the startup
1557 	 * tests require some buffer space. Therefore
1558 	 * ensure we have at least 4096 bytes of buffer.
1559 	 */
1560 	trace_buf_size = max(4096UL, buf_size);
1561 	return 1;
1562 }
1563 __setup("trace_buf_size=", set_buf_size);
1564 
1565 static int __init set_tracing_thresh(char *str)
1566 {
1567 	unsigned long threshold;
1568 	int ret;
1569 
1570 	if (!str)
1571 		return 0;
1572 	ret = kstrtoul(str, 0, &threshold);
1573 	if (ret < 0)
1574 		return 0;
1575 	tracing_thresh = threshold * 1000;
1576 	return 1;
1577 }
1578 __setup("tracing_thresh=", set_tracing_thresh);
1579 
1580 unsigned long nsecs_to_usecs(unsigned long nsecs)
1581 {
1582 	return nsecs / 1000;
1583 }
1584 
1585 /*
1586  * TRACE_FLAGS is defined as a tuple matching bit masks with strings.
1587  * It uses C(a, b) where 'a' is the eval (enum) name and 'b' is the string that
1588  * matches it. By defining "C(a, b) b", TRACE_FLAGS becomes a list
1589  * of strings in the order that the evals (enum) were defined.
1590  */
1591 #undef C
1592 #define C(a, b) b
1593 
1594 /* These must match the bit positions in trace_iterator_flags */
1595 static const char *trace_options[] = {
1596 	TRACE_FLAGS
1597 	NULL
1598 };
1599 
1600 static struct {
1601 	u64 (*func)(void);
1602 	const char *name;
1603 	int in_ns;		/* is this clock in nanoseconds? */
1604 } trace_clocks[] = {
1605 	{ trace_clock_local,		"local",	1 },
1606 	{ trace_clock_global,		"global",	1 },
1607 	{ trace_clock_counter,		"counter",	0 },
1608 	{ trace_clock_jiffies,		"uptime",	0 },
1609 	{ trace_clock,			"perf",		1 },
1610 	{ ktime_get_mono_fast_ns,	"mono",		1 },
1611 	{ ktime_get_raw_fast_ns,	"mono_raw",	1 },
1612 	{ ktime_get_boot_fast_ns,	"boot",		1 },
1613 	{ ktime_get_tai_fast_ns,	"tai",		1 },
1614 	ARCH_TRACE_CLOCKS
1615 };
1616 
1617 bool trace_clock_in_ns(struct trace_array *tr)
1618 {
1619 	if (trace_clocks[tr->clock_id].in_ns)
1620 		return true;
1621 
1622 	return false;
1623 }
1624 
1625 /*
1626  * trace_parser_get_init - gets the buffer for trace parser
1627  */
1628 int trace_parser_get_init(struct trace_parser *parser, int size)
1629 {
1630 	memset(parser, 0, sizeof(*parser));
1631 
1632 	parser->buffer = kmalloc(size, GFP_KERNEL);
1633 	if (!parser->buffer)
1634 		return 1;
1635 
1636 	parser->size = size;
1637 	return 0;
1638 }
1639 
1640 /*
1641  * trace_parser_put - frees the buffer for trace parser
1642  */
1643 void trace_parser_put(struct trace_parser *parser)
1644 {
1645 	kfree(parser->buffer);
1646 	parser->buffer = NULL;
1647 }
1648 
1649 /*
1650  * trace_get_user - reads the user input string separated by  space
1651  * (matched by isspace(ch))
1652  *
1653  * For each string found the 'struct trace_parser' is updated,
1654  * and the function returns.
1655  *
1656  * Returns number of bytes read.
1657  *
1658  * See kernel/trace/trace.h for 'struct trace_parser' details.
1659  */
1660 int trace_get_user(struct trace_parser *parser, const char __user *ubuf,
1661 	size_t cnt, loff_t *ppos)
1662 {
1663 	char ch;
1664 	size_t read = 0;
1665 	ssize_t ret;
1666 
1667 	if (!*ppos)
1668 		trace_parser_clear(parser);
1669 
1670 	ret = get_user(ch, ubuf++);
1671 	if (ret)
1672 		goto out;
1673 
1674 	read++;
1675 	cnt--;
1676 
1677 	/*
1678 	 * The parser is not finished with the last write,
1679 	 * continue reading the user input without skipping spaces.
1680 	 */
1681 	if (!parser->cont) {
1682 		/* skip white space */
1683 		while (cnt && isspace(ch)) {
1684 			ret = get_user(ch, ubuf++);
1685 			if (ret)
1686 				goto out;
1687 			read++;
1688 			cnt--;
1689 		}
1690 
1691 		parser->idx = 0;
1692 
1693 		/* only spaces were written */
1694 		if (isspace(ch) || !ch) {
1695 			*ppos += read;
1696 			ret = read;
1697 			goto out;
1698 		}
1699 	}
1700 
1701 	/* read the non-space input */
1702 	while (cnt && !isspace(ch) && ch) {
1703 		if (parser->idx < parser->size - 1)
1704 			parser->buffer[parser->idx++] = ch;
1705 		else {
1706 			ret = -EINVAL;
1707 			goto out;
1708 		}
1709 		ret = get_user(ch, ubuf++);
1710 		if (ret)
1711 			goto out;
1712 		read++;
1713 		cnt--;
1714 	}
1715 
1716 	/* We either got finished input or we have to wait for another call. */
1717 	if (isspace(ch) || !ch) {
1718 		parser->buffer[parser->idx] = 0;
1719 		parser->cont = false;
1720 	} else if (parser->idx < parser->size - 1) {
1721 		parser->cont = true;
1722 		parser->buffer[parser->idx++] = ch;
1723 		/* Make sure the parsed string always terminates with '\0'. */
1724 		parser->buffer[parser->idx] = 0;
1725 	} else {
1726 		ret = -EINVAL;
1727 		goto out;
1728 	}
1729 
1730 	*ppos += read;
1731 	ret = read;
1732 
1733 out:
1734 	return ret;
1735 }
1736 
1737 /* TODO add a seq_buf_to_buffer() */
1738 static ssize_t trace_seq_to_buffer(struct trace_seq *s, void *buf, size_t cnt)
1739 {
1740 	int len;
1741 
1742 	if (trace_seq_used(s) <= s->readpos)
1743 		return -EBUSY;
1744 
1745 	len = trace_seq_used(s) - s->readpos;
1746 	if (cnt > len)
1747 		cnt = len;
1748 	memcpy(buf, s->buffer + s->readpos, cnt);
1749 
1750 	s->readpos += cnt;
1751 	return cnt;
1752 }
1753 
1754 unsigned long __read_mostly	tracing_thresh;
1755 
1756 #ifdef CONFIG_TRACER_MAX_TRACE
1757 static const struct file_operations tracing_max_lat_fops;
1758 
1759 #ifdef LATENCY_FS_NOTIFY
1760 
1761 static struct workqueue_struct *fsnotify_wq;
1762 
1763 static void latency_fsnotify_workfn(struct work_struct *work)
1764 {
1765 	struct trace_array *tr = container_of(work, struct trace_array,
1766 					      fsnotify_work);
1767 	fsnotify_inode(tr->d_max_latency->d_inode, FS_MODIFY);
1768 }
1769 
1770 static void latency_fsnotify_workfn_irq(struct irq_work *iwork)
1771 {
1772 	struct trace_array *tr = container_of(iwork, struct trace_array,
1773 					      fsnotify_irqwork);
1774 	queue_work(fsnotify_wq, &tr->fsnotify_work);
1775 }
1776 
1777 static void trace_create_maxlat_file(struct trace_array *tr,
1778 				     struct dentry *d_tracer)
1779 {
1780 	INIT_WORK(&tr->fsnotify_work, latency_fsnotify_workfn);
1781 	init_irq_work(&tr->fsnotify_irqwork, latency_fsnotify_workfn_irq);
1782 	tr->d_max_latency = trace_create_file("tracing_max_latency",
1783 					      TRACE_MODE_WRITE,
1784 					      d_tracer, tr,
1785 					      &tracing_max_lat_fops);
1786 }
1787 
1788 __init static int latency_fsnotify_init(void)
1789 {
1790 	fsnotify_wq = alloc_workqueue("tr_max_lat_wq",
1791 				      WQ_UNBOUND | WQ_HIGHPRI, 0);
1792 	if (!fsnotify_wq) {
1793 		pr_err("Unable to allocate tr_max_lat_wq\n");
1794 		return -ENOMEM;
1795 	}
1796 	return 0;
1797 }
1798 
1799 late_initcall_sync(latency_fsnotify_init);
1800 
1801 void latency_fsnotify(struct trace_array *tr)
1802 {
1803 	if (!fsnotify_wq)
1804 		return;
1805 	/*
1806 	 * We cannot call queue_work(&tr->fsnotify_work) from here because it's
1807 	 * possible that we are called from __schedule() or do_idle(), which
1808 	 * could cause a deadlock.
1809 	 */
1810 	irq_work_queue(&tr->fsnotify_irqwork);
1811 }
1812 
1813 #else /* !LATENCY_FS_NOTIFY */
1814 
1815 #define trace_create_maxlat_file(tr, d_tracer)				\
1816 	trace_create_file("tracing_max_latency", TRACE_MODE_WRITE,	\
1817 			  d_tracer, tr, &tracing_max_lat_fops)
1818 
1819 #endif
1820 
1821 /*
1822  * Copy the new maximum trace into the separate maximum-trace
1823  * structure. (this way the maximum trace is permanently saved,
1824  * for later retrieval via /sys/kernel/tracing/tracing_max_latency)
1825  */
1826 static void
1827 __update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
1828 {
1829 	struct array_buffer *trace_buf = &tr->array_buffer;
1830 	struct array_buffer *max_buf = &tr->max_buffer;
1831 	struct trace_array_cpu *data = per_cpu_ptr(trace_buf->data, cpu);
1832 	struct trace_array_cpu *max_data = per_cpu_ptr(max_buf->data, cpu);
1833 
1834 	max_buf->cpu = cpu;
1835 	max_buf->time_start = data->preempt_timestamp;
1836 
1837 	max_data->saved_latency = tr->max_latency;
1838 	max_data->critical_start = data->critical_start;
1839 	max_data->critical_end = data->critical_end;
1840 
1841 	strncpy(max_data->comm, tsk->comm, TASK_COMM_LEN);
1842 	max_data->pid = tsk->pid;
1843 	/*
1844 	 * If tsk == current, then use current_uid(), as that does not use
1845 	 * RCU. The irq tracer can be called out of RCU scope.
1846 	 */
1847 	if (tsk == current)
1848 		max_data->uid = current_uid();
1849 	else
1850 		max_data->uid = task_uid(tsk);
1851 
1852 	max_data->nice = tsk->static_prio - 20 - MAX_RT_PRIO;
1853 	max_data->policy = tsk->policy;
1854 	max_data->rt_priority = tsk->rt_priority;
1855 
1856 	/* record this tasks comm */
1857 	tracing_record_cmdline(tsk);
1858 	latency_fsnotify(tr);
1859 }
1860 
1861 /**
1862  * update_max_tr - snapshot all trace buffers from global_trace to max_tr
1863  * @tr: tracer
1864  * @tsk: the task with the latency
1865  * @cpu: The cpu that initiated the trace.
1866  * @cond_data: User data associated with a conditional snapshot
1867  *
1868  * Flip the buffers between the @tr and the max_tr and record information
1869  * about which task was the cause of this latency.
1870  */
1871 void
1872 update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu,
1873 	      void *cond_data)
1874 {
1875 	if (tr->stop_count)
1876 		return;
1877 
1878 	WARN_ON_ONCE(!irqs_disabled());
1879 
1880 	if (!tr->allocated_snapshot) {
1881 		/* Only the nop tracer should hit this when disabling */
1882 		WARN_ON_ONCE(tr->current_trace != &nop_trace);
1883 		return;
1884 	}
1885 
1886 	arch_spin_lock(&tr->max_lock);
1887 
1888 	/* Inherit the recordable setting from array_buffer */
1889 	if (ring_buffer_record_is_set_on(tr->array_buffer.buffer))
1890 		ring_buffer_record_on(tr->max_buffer.buffer);
1891 	else
1892 		ring_buffer_record_off(tr->max_buffer.buffer);
1893 
1894 #ifdef CONFIG_TRACER_SNAPSHOT
1895 	if (tr->cond_snapshot && !tr->cond_snapshot->update(tr, cond_data)) {
1896 		arch_spin_unlock(&tr->max_lock);
1897 		return;
1898 	}
1899 #endif
1900 	swap(tr->array_buffer.buffer, tr->max_buffer.buffer);
1901 
1902 	__update_max_tr(tr, tsk, cpu);
1903 
1904 	arch_spin_unlock(&tr->max_lock);
1905 
1906 	/* Any waiters on the old snapshot buffer need to wake up */
1907 	ring_buffer_wake_waiters(tr->array_buffer.buffer, RING_BUFFER_ALL_CPUS);
1908 }
1909 
1910 /**
1911  * update_max_tr_single - only copy one trace over, and reset the rest
1912  * @tr: tracer
1913  * @tsk: task with the latency
1914  * @cpu: the cpu of the buffer to copy.
1915  *
1916  * Flip the trace of a single CPU buffer between the @tr and the max_tr.
1917  */
1918 void
1919 update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu)
1920 {
1921 	int ret;
1922 
1923 	if (tr->stop_count)
1924 		return;
1925 
1926 	WARN_ON_ONCE(!irqs_disabled());
1927 	if (!tr->allocated_snapshot) {
1928 		/* Only the nop tracer should hit this when disabling */
1929 		WARN_ON_ONCE(tr->current_trace != &nop_trace);
1930 		return;
1931 	}
1932 
1933 	arch_spin_lock(&tr->max_lock);
1934 
1935 	ret = ring_buffer_swap_cpu(tr->max_buffer.buffer, tr->array_buffer.buffer, cpu);
1936 
1937 	if (ret == -EBUSY) {
1938 		/*
1939 		 * We failed to swap the buffer due to a commit taking
1940 		 * place on this CPU. We fail to record, but we reset
1941 		 * the max trace buffer (no one writes directly to it)
1942 		 * and flag that it failed.
1943 		 * Another reason is resize is in progress.
1944 		 */
1945 		trace_array_printk_buf(tr->max_buffer.buffer, _THIS_IP_,
1946 			"Failed to swap buffers due to commit or resize in progress\n");
1947 	}
1948 
1949 	WARN_ON_ONCE(ret && ret != -EAGAIN && ret != -EBUSY);
1950 
1951 	__update_max_tr(tr, tsk, cpu);
1952 	arch_spin_unlock(&tr->max_lock);
1953 }
1954 
1955 #endif /* CONFIG_TRACER_MAX_TRACE */
1956 
1957 static int wait_on_pipe(struct trace_iterator *iter, int full)
1958 {
1959 	int ret;
1960 
1961 	/* Iterators are static, they should be filled or empty */
1962 	if (trace_buffer_iter(iter, iter->cpu_file))
1963 		return 0;
1964 
1965 	ret = ring_buffer_wait(iter->array_buffer->buffer, iter->cpu_file, full);
1966 
1967 #ifdef CONFIG_TRACER_MAX_TRACE
1968 	/*
1969 	 * Make sure this is still the snapshot buffer, as if a snapshot were
1970 	 * to happen, this would now be the main buffer.
1971 	 */
1972 	if (iter->snapshot)
1973 		iter->array_buffer = &iter->tr->max_buffer;
1974 #endif
1975 	return ret;
1976 }
1977 
1978 #ifdef CONFIG_FTRACE_STARTUP_TEST
1979 static bool selftests_can_run;
1980 
1981 struct trace_selftests {
1982 	struct list_head		list;
1983 	struct tracer			*type;
1984 };
1985 
1986 static LIST_HEAD(postponed_selftests);
1987 
1988 static int save_selftest(struct tracer *type)
1989 {
1990 	struct trace_selftests *selftest;
1991 
1992 	selftest = kmalloc(sizeof(*selftest), GFP_KERNEL);
1993 	if (!selftest)
1994 		return -ENOMEM;
1995 
1996 	selftest->type = type;
1997 	list_add(&selftest->list, &postponed_selftests);
1998 	return 0;
1999 }
2000 
2001 static int run_tracer_selftest(struct tracer *type)
2002 {
2003 	struct trace_array *tr = &global_trace;
2004 	struct tracer *saved_tracer = tr->current_trace;
2005 	int ret;
2006 
2007 	if (!type->selftest || tracing_selftest_disabled)
2008 		return 0;
2009 
2010 	/*
2011 	 * If a tracer registers early in boot up (before scheduling is
2012 	 * initialized and such), then do not run its selftests yet.
2013 	 * Instead, run it a little later in the boot process.
2014 	 */
2015 	if (!selftests_can_run)
2016 		return save_selftest(type);
2017 
2018 	if (!tracing_is_on()) {
2019 		pr_warn("Selftest for tracer %s skipped due to tracing disabled\n",
2020 			type->name);
2021 		return 0;
2022 	}
2023 
2024 	/*
2025 	 * Run a selftest on this tracer.
2026 	 * Here we reset the trace buffer, and set the current
2027 	 * tracer to be this tracer. The tracer can then run some
2028 	 * internal tracing to verify that everything is in order.
2029 	 * If we fail, we do not register this tracer.
2030 	 */
2031 	tracing_reset_online_cpus(&tr->array_buffer);
2032 
2033 	tr->current_trace = type;
2034 
2035 #ifdef CONFIG_TRACER_MAX_TRACE
2036 	if (type->use_max_tr) {
2037 		/* If we expanded the buffers, make sure the max is expanded too */
2038 		if (tr->ring_buffer_expanded)
2039 			ring_buffer_resize(tr->max_buffer.buffer, trace_buf_size,
2040 					   RING_BUFFER_ALL_CPUS);
2041 		tr->allocated_snapshot = true;
2042 	}
2043 #endif
2044 
2045 	/* the test is responsible for initializing and enabling */
2046 	pr_info("Testing tracer %s: ", type->name);
2047 	ret = type->selftest(type, tr);
2048 	/* the test is responsible for resetting too */
2049 	tr->current_trace = saved_tracer;
2050 	if (ret) {
2051 		printk(KERN_CONT "FAILED!\n");
2052 		/* Add the warning after printing 'FAILED' */
2053 		WARN_ON(1);
2054 		return -1;
2055 	}
2056 	/* Only reset on passing, to avoid touching corrupted buffers */
2057 	tracing_reset_online_cpus(&tr->array_buffer);
2058 
2059 #ifdef CONFIG_TRACER_MAX_TRACE
2060 	if (type->use_max_tr) {
2061 		tr->allocated_snapshot = false;
2062 
2063 		/* Shrink the max buffer again */
2064 		if (tr->ring_buffer_expanded)
2065 			ring_buffer_resize(tr->max_buffer.buffer, 1,
2066 					   RING_BUFFER_ALL_CPUS);
2067 	}
2068 #endif
2069 
2070 	printk(KERN_CONT "PASSED\n");
2071 	return 0;
2072 }
2073 
2074 static int do_run_tracer_selftest(struct tracer *type)
2075 {
2076 	int ret;
2077 
2078 	/*
2079 	 * Tests can take a long time, especially if they are run one after the
2080 	 * other, as does happen during bootup when all the tracers are
2081 	 * registered. This could cause the soft lockup watchdog to trigger.
2082 	 */
2083 	cond_resched();
2084 
2085 	tracing_selftest_running = true;
2086 	ret = run_tracer_selftest(type);
2087 	tracing_selftest_running = false;
2088 
2089 	return ret;
2090 }
2091 
2092 static __init int init_trace_selftests(void)
2093 {
2094 	struct trace_selftests *p, *n;
2095 	struct tracer *t, **last;
2096 	int ret;
2097 
2098 	selftests_can_run = true;
2099 
2100 	mutex_lock(&trace_types_lock);
2101 
2102 	if (list_empty(&postponed_selftests))
2103 		goto out;
2104 
2105 	pr_info("Running postponed tracer tests:\n");
2106 
2107 	tracing_selftest_running = true;
2108 	list_for_each_entry_safe(p, n, &postponed_selftests, list) {
2109 		/* This loop can take minutes when sanitizers are enabled, so
2110 		 * lets make sure we allow RCU processing.
2111 		 */
2112 		cond_resched();
2113 		ret = run_tracer_selftest(p->type);
2114 		/* If the test fails, then warn and remove from available_tracers */
2115 		if (ret < 0) {
2116 			WARN(1, "tracer: %s failed selftest, disabling\n",
2117 			     p->type->name);
2118 			last = &trace_types;
2119 			for (t = trace_types; t; t = t->next) {
2120 				if (t == p->type) {
2121 					*last = t->next;
2122 					break;
2123 				}
2124 				last = &t->next;
2125 			}
2126 		}
2127 		list_del(&p->list);
2128 		kfree(p);
2129 	}
2130 	tracing_selftest_running = false;
2131 
2132  out:
2133 	mutex_unlock(&trace_types_lock);
2134 
2135 	return 0;
2136 }
2137 core_initcall(init_trace_selftests);
2138 #else
2139 static inline int run_tracer_selftest(struct tracer *type)
2140 {
2141 	return 0;
2142 }
2143 static inline int do_run_tracer_selftest(struct tracer *type)
2144 {
2145 	return 0;
2146 }
2147 #endif /* CONFIG_FTRACE_STARTUP_TEST */
2148 
2149 static void add_tracer_options(struct trace_array *tr, struct tracer *t);
2150 
2151 static void __init apply_trace_boot_options(void);
2152 
2153 /**
2154  * register_tracer - register a tracer with the ftrace system.
2155  * @type: the plugin for the tracer
2156  *
2157  * Register a new plugin tracer.
2158  */
2159 int __init register_tracer(struct tracer *type)
2160 {
2161 	struct tracer *t;
2162 	int ret = 0;
2163 
2164 	if (!type->name) {
2165 		pr_info("Tracer must have a name\n");
2166 		return -1;
2167 	}
2168 
2169 	if (strlen(type->name) >= MAX_TRACER_SIZE) {
2170 		pr_info("Tracer has a name longer than %d\n", MAX_TRACER_SIZE);
2171 		return -1;
2172 	}
2173 
2174 	if (security_locked_down(LOCKDOWN_TRACEFS)) {
2175 		pr_warn("Can not register tracer %s due to lockdown\n",
2176 			   type->name);
2177 		return -EPERM;
2178 	}
2179 
2180 	mutex_lock(&trace_types_lock);
2181 
2182 	for (t = trace_types; t; t = t->next) {
2183 		if (strcmp(type->name, t->name) == 0) {
2184 			/* already found */
2185 			pr_info("Tracer %s already registered\n",
2186 				type->name);
2187 			ret = -1;
2188 			goto out;
2189 		}
2190 	}
2191 
2192 	if (!type->set_flag)
2193 		type->set_flag = &dummy_set_flag;
2194 	if (!type->flags) {
2195 		/*allocate a dummy tracer_flags*/
2196 		type->flags = kmalloc(sizeof(*type->flags), GFP_KERNEL);
2197 		if (!type->flags) {
2198 			ret = -ENOMEM;
2199 			goto out;
2200 		}
2201 		type->flags->val = 0;
2202 		type->flags->opts = dummy_tracer_opt;
2203 	} else
2204 		if (!type->flags->opts)
2205 			type->flags->opts = dummy_tracer_opt;
2206 
2207 	/* store the tracer for __set_tracer_option */
2208 	type->flags->trace = type;
2209 
2210 	ret = do_run_tracer_selftest(type);
2211 	if (ret < 0)
2212 		goto out;
2213 
2214 	type->next = trace_types;
2215 	trace_types = type;
2216 	add_tracer_options(&global_trace, type);
2217 
2218  out:
2219 	mutex_unlock(&trace_types_lock);
2220 
2221 	if (ret || !default_bootup_tracer)
2222 		goto out_unlock;
2223 
2224 	if (strncmp(default_bootup_tracer, type->name, MAX_TRACER_SIZE))
2225 		goto out_unlock;
2226 
2227 	printk(KERN_INFO "Starting tracer '%s'\n", type->name);
2228 	/* Do we want this tracer to start on bootup? */
2229 	tracing_set_tracer(&global_trace, type->name);
2230 	default_bootup_tracer = NULL;
2231 
2232 	apply_trace_boot_options();
2233 
2234 	/* disable other selftests, since this will break it. */
2235 	disable_tracing_selftest("running a tracer");
2236 
2237  out_unlock:
2238 	return ret;
2239 }
2240 
2241 static void tracing_reset_cpu(struct array_buffer *buf, int cpu)
2242 {
2243 	struct trace_buffer *buffer = buf->buffer;
2244 
2245 	if (!buffer)
2246 		return;
2247 
2248 	ring_buffer_record_disable(buffer);
2249 
2250 	/* Make sure all commits have finished */
2251 	synchronize_rcu();
2252 	ring_buffer_reset_cpu(buffer, cpu);
2253 
2254 	ring_buffer_record_enable(buffer);
2255 }
2256 
2257 void tracing_reset_online_cpus(struct array_buffer *buf)
2258 {
2259 	struct trace_buffer *buffer = buf->buffer;
2260 
2261 	if (!buffer)
2262 		return;
2263 
2264 	ring_buffer_record_disable(buffer);
2265 
2266 	/* Make sure all commits have finished */
2267 	synchronize_rcu();
2268 
2269 	buf->time_start = buffer_ftrace_now(buf, buf->cpu);
2270 
2271 	ring_buffer_reset_online_cpus(buffer);
2272 
2273 	ring_buffer_record_enable(buffer);
2274 }
2275 
2276 /* Must have trace_types_lock held */
2277 void tracing_reset_all_online_cpus_unlocked(void)
2278 {
2279 	struct trace_array *tr;
2280 
2281 	lockdep_assert_held(&trace_types_lock);
2282 
2283 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
2284 		if (!tr->clear_trace)
2285 			continue;
2286 		tr->clear_trace = false;
2287 		tracing_reset_online_cpus(&tr->array_buffer);
2288 #ifdef CONFIG_TRACER_MAX_TRACE
2289 		tracing_reset_online_cpus(&tr->max_buffer);
2290 #endif
2291 	}
2292 }
2293 
2294 void tracing_reset_all_online_cpus(void)
2295 {
2296 	mutex_lock(&trace_types_lock);
2297 	tracing_reset_all_online_cpus_unlocked();
2298 	mutex_unlock(&trace_types_lock);
2299 }
2300 
2301 /*
2302  * The tgid_map array maps from pid to tgid; i.e. the value stored at index i
2303  * is the tgid last observed corresponding to pid=i.
2304  */
2305 static int *tgid_map;
2306 
2307 /* The maximum valid index into tgid_map. */
2308 static size_t tgid_map_max;
2309 
2310 #define SAVED_CMDLINES_DEFAULT 128
2311 #define NO_CMDLINE_MAP UINT_MAX
2312 /*
2313  * Preemption must be disabled before acquiring trace_cmdline_lock.
2314  * The various trace_arrays' max_lock must be acquired in a context
2315  * where interrupt is disabled.
2316  */
2317 static arch_spinlock_t trace_cmdline_lock = __ARCH_SPIN_LOCK_UNLOCKED;
2318 struct saved_cmdlines_buffer {
2319 	unsigned map_pid_to_cmdline[PID_MAX_DEFAULT+1];
2320 	unsigned *map_cmdline_to_pid;
2321 	unsigned cmdline_num;
2322 	int cmdline_idx;
2323 	char *saved_cmdlines;
2324 };
2325 static struct saved_cmdlines_buffer *savedcmd;
2326 
2327 static inline char *get_saved_cmdlines(int idx)
2328 {
2329 	return &savedcmd->saved_cmdlines[idx * TASK_COMM_LEN];
2330 }
2331 
2332 static inline void set_cmdline(int idx, const char *cmdline)
2333 {
2334 	strncpy(get_saved_cmdlines(idx), cmdline, TASK_COMM_LEN);
2335 }
2336 
2337 static int allocate_cmdlines_buffer(unsigned int val,
2338 				    struct saved_cmdlines_buffer *s)
2339 {
2340 	s->map_cmdline_to_pid = kmalloc_array(val,
2341 					      sizeof(*s->map_cmdline_to_pid),
2342 					      GFP_KERNEL);
2343 	if (!s->map_cmdline_to_pid)
2344 		return -ENOMEM;
2345 
2346 	s->saved_cmdlines = kmalloc_array(TASK_COMM_LEN, val, GFP_KERNEL);
2347 	if (!s->saved_cmdlines) {
2348 		kfree(s->map_cmdline_to_pid);
2349 		return -ENOMEM;
2350 	}
2351 
2352 	s->cmdline_idx = 0;
2353 	s->cmdline_num = val;
2354 	memset(&s->map_pid_to_cmdline, NO_CMDLINE_MAP,
2355 	       sizeof(s->map_pid_to_cmdline));
2356 	memset(s->map_cmdline_to_pid, NO_CMDLINE_MAP,
2357 	       val * sizeof(*s->map_cmdline_to_pid));
2358 
2359 	return 0;
2360 }
2361 
2362 static int trace_create_savedcmd(void)
2363 {
2364 	int ret;
2365 
2366 	savedcmd = kmalloc(sizeof(*savedcmd), GFP_KERNEL);
2367 	if (!savedcmd)
2368 		return -ENOMEM;
2369 
2370 	ret = allocate_cmdlines_buffer(SAVED_CMDLINES_DEFAULT, savedcmd);
2371 	if (ret < 0) {
2372 		kfree(savedcmd);
2373 		savedcmd = NULL;
2374 		return -ENOMEM;
2375 	}
2376 
2377 	return 0;
2378 }
2379 
2380 int is_tracing_stopped(void)
2381 {
2382 	return global_trace.stop_count;
2383 }
2384 
2385 static void tracing_start_tr(struct trace_array *tr)
2386 {
2387 	struct trace_buffer *buffer;
2388 	unsigned long flags;
2389 
2390 	if (tracing_disabled)
2391 		return;
2392 
2393 	raw_spin_lock_irqsave(&tr->start_lock, flags);
2394 	if (--tr->stop_count) {
2395 		if (WARN_ON_ONCE(tr->stop_count < 0)) {
2396 			/* Someone screwed up their debugging */
2397 			tr->stop_count = 0;
2398 		}
2399 		goto out;
2400 	}
2401 
2402 	/* Prevent the buffers from switching */
2403 	arch_spin_lock(&tr->max_lock);
2404 
2405 	buffer = tr->array_buffer.buffer;
2406 	if (buffer)
2407 		ring_buffer_record_enable(buffer);
2408 
2409 #ifdef CONFIG_TRACER_MAX_TRACE
2410 	buffer = tr->max_buffer.buffer;
2411 	if (buffer)
2412 		ring_buffer_record_enable(buffer);
2413 #endif
2414 
2415 	arch_spin_unlock(&tr->max_lock);
2416 
2417  out:
2418 	raw_spin_unlock_irqrestore(&tr->start_lock, flags);
2419 }
2420 
2421 /**
2422  * tracing_start - quick start of the tracer
2423  *
2424  * If tracing is enabled but was stopped by tracing_stop,
2425  * this will start the tracer back up.
2426  */
2427 void tracing_start(void)
2428 
2429 {
2430 	return tracing_start_tr(&global_trace);
2431 }
2432 
2433 static void tracing_stop_tr(struct trace_array *tr)
2434 {
2435 	struct trace_buffer *buffer;
2436 	unsigned long flags;
2437 
2438 	raw_spin_lock_irqsave(&tr->start_lock, flags);
2439 	if (tr->stop_count++)
2440 		goto out;
2441 
2442 	/* Prevent the buffers from switching */
2443 	arch_spin_lock(&tr->max_lock);
2444 
2445 	buffer = tr->array_buffer.buffer;
2446 	if (buffer)
2447 		ring_buffer_record_disable(buffer);
2448 
2449 #ifdef CONFIG_TRACER_MAX_TRACE
2450 	buffer = tr->max_buffer.buffer;
2451 	if (buffer)
2452 		ring_buffer_record_disable(buffer);
2453 #endif
2454 
2455 	arch_spin_unlock(&tr->max_lock);
2456 
2457  out:
2458 	raw_spin_unlock_irqrestore(&tr->start_lock, flags);
2459 }
2460 
2461 /**
2462  * tracing_stop - quick stop of the tracer
2463  *
2464  * Light weight way to stop tracing. Use in conjunction with
2465  * tracing_start.
2466  */
2467 void tracing_stop(void)
2468 {
2469 	return tracing_stop_tr(&global_trace);
2470 }
2471 
2472 static int trace_save_cmdline(struct task_struct *tsk)
2473 {
2474 	unsigned tpid, idx;
2475 
2476 	/* treat recording of idle task as a success */
2477 	if (!tsk->pid)
2478 		return 1;
2479 
2480 	tpid = tsk->pid & (PID_MAX_DEFAULT - 1);
2481 
2482 	/*
2483 	 * It's not the end of the world if we don't get
2484 	 * the lock, but we also don't want to spin
2485 	 * nor do we want to disable interrupts,
2486 	 * so if we miss here, then better luck next time.
2487 	 *
2488 	 * This is called within the scheduler and wake up, so interrupts
2489 	 * had better been disabled and run queue lock been held.
2490 	 */
2491 	lockdep_assert_preemption_disabled();
2492 	if (!arch_spin_trylock(&trace_cmdline_lock))
2493 		return 0;
2494 
2495 	idx = savedcmd->map_pid_to_cmdline[tpid];
2496 	if (idx == NO_CMDLINE_MAP) {
2497 		idx = (savedcmd->cmdline_idx + 1) % savedcmd->cmdline_num;
2498 
2499 		savedcmd->map_pid_to_cmdline[tpid] = idx;
2500 		savedcmd->cmdline_idx = idx;
2501 	}
2502 
2503 	savedcmd->map_cmdline_to_pid[idx] = tsk->pid;
2504 	set_cmdline(idx, tsk->comm);
2505 
2506 	arch_spin_unlock(&trace_cmdline_lock);
2507 
2508 	return 1;
2509 }
2510 
2511 static void __trace_find_cmdline(int pid, char comm[])
2512 {
2513 	unsigned map;
2514 	int tpid;
2515 
2516 	if (!pid) {
2517 		strcpy(comm, "<idle>");
2518 		return;
2519 	}
2520 
2521 	if (WARN_ON_ONCE(pid < 0)) {
2522 		strcpy(comm, "<XXX>");
2523 		return;
2524 	}
2525 
2526 	tpid = pid & (PID_MAX_DEFAULT - 1);
2527 	map = savedcmd->map_pid_to_cmdline[tpid];
2528 	if (map != NO_CMDLINE_MAP) {
2529 		tpid = savedcmd->map_cmdline_to_pid[map];
2530 		if (tpid == pid) {
2531 			strscpy(comm, get_saved_cmdlines(map), TASK_COMM_LEN);
2532 			return;
2533 		}
2534 	}
2535 	strcpy(comm, "<...>");
2536 }
2537 
2538 void trace_find_cmdline(int pid, char comm[])
2539 {
2540 	preempt_disable();
2541 	arch_spin_lock(&trace_cmdline_lock);
2542 
2543 	__trace_find_cmdline(pid, comm);
2544 
2545 	arch_spin_unlock(&trace_cmdline_lock);
2546 	preempt_enable();
2547 }
2548 
2549 static int *trace_find_tgid_ptr(int pid)
2550 {
2551 	/*
2552 	 * Pairs with the smp_store_release in set_tracer_flag() to ensure that
2553 	 * if we observe a non-NULL tgid_map then we also observe the correct
2554 	 * tgid_map_max.
2555 	 */
2556 	int *map = smp_load_acquire(&tgid_map);
2557 
2558 	if (unlikely(!map || pid > tgid_map_max))
2559 		return NULL;
2560 
2561 	return &map[pid];
2562 }
2563 
2564 int trace_find_tgid(int pid)
2565 {
2566 	int *ptr = trace_find_tgid_ptr(pid);
2567 
2568 	return ptr ? *ptr : 0;
2569 }
2570 
2571 static int trace_save_tgid(struct task_struct *tsk)
2572 {
2573 	int *ptr;
2574 
2575 	/* treat recording of idle task as a success */
2576 	if (!tsk->pid)
2577 		return 1;
2578 
2579 	ptr = trace_find_tgid_ptr(tsk->pid);
2580 	if (!ptr)
2581 		return 0;
2582 
2583 	*ptr = tsk->tgid;
2584 	return 1;
2585 }
2586 
2587 static bool tracing_record_taskinfo_skip(int flags)
2588 {
2589 	if (unlikely(!(flags & (TRACE_RECORD_CMDLINE | TRACE_RECORD_TGID))))
2590 		return true;
2591 	if (!__this_cpu_read(trace_taskinfo_save))
2592 		return true;
2593 	return false;
2594 }
2595 
2596 /**
2597  * tracing_record_taskinfo - record the task info of a task
2598  *
2599  * @task:  task to record
2600  * @flags: TRACE_RECORD_CMDLINE for recording comm
2601  *         TRACE_RECORD_TGID for recording tgid
2602  */
2603 void tracing_record_taskinfo(struct task_struct *task, int flags)
2604 {
2605 	bool done;
2606 
2607 	if (tracing_record_taskinfo_skip(flags))
2608 		return;
2609 
2610 	/*
2611 	 * Record as much task information as possible. If some fail, continue
2612 	 * to try to record the others.
2613 	 */
2614 	done = !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(task);
2615 	done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(task);
2616 
2617 	/* If recording any information failed, retry again soon. */
2618 	if (!done)
2619 		return;
2620 
2621 	__this_cpu_write(trace_taskinfo_save, false);
2622 }
2623 
2624 /**
2625  * tracing_record_taskinfo_sched_switch - record task info for sched_switch
2626  *
2627  * @prev: previous task during sched_switch
2628  * @next: next task during sched_switch
2629  * @flags: TRACE_RECORD_CMDLINE for recording comm
2630  *         TRACE_RECORD_TGID for recording tgid
2631  */
2632 void tracing_record_taskinfo_sched_switch(struct task_struct *prev,
2633 					  struct task_struct *next, int flags)
2634 {
2635 	bool done;
2636 
2637 	if (tracing_record_taskinfo_skip(flags))
2638 		return;
2639 
2640 	/*
2641 	 * Record as much task information as possible. If some fail, continue
2642 	 * to try to record the others.
2643 	 */
2644 	done  = !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(prev);
2645 	done &= !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(next);
2646 	done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(prev);
2647 	done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(next);
2648 
2649 	/* If recording any information failed, retry again soon. */
2650 	if (!done)
2651 		return;
2652 
2653 	__this_cpu_write(trace_taskinfo_save, false);
2654 }
2655 
2656 /* Helpers to record a specific task information */
2657 void tracing_record_cmdline(struct task_struct *task)
2658 {
2659 	tracing_record_taskinfo(task, TRACE_RECORD_CMDLINE);
2660 }
2661 
2662 void tracing_record_tgid(struct task_struct *task)
2663 {
2664 	tracing_record_taskinfo(task, TRACE_RECORD_TGID);
2665 }
2666 
2667 /*
2668  * Several functions return TRACE_TYPE_PARTIAL_LINE if the trace_seq
2669  * overflowed, and TRACE_TYPE_HANDLED otherwise. This helper function
2670  * simplifies those functions and keeps them in sync.
2671  */
2672 enum print_line_t trace_handle_return(struct trace_seq *s)
2673 {
2674 	return trace_seq_has_overflowed(s) ?
2675 		TRACE_TYPE_PARTIAL_LINE : TRACE_TYPE_HANDLED;
2676 }
2677 EXPORT_SYMBOL_GPL(trace_handle_return);
2678 
2679 static unsigned short migration_disable_value(void)
2680 {
2681 #if defined(CONFIG_SMP)
2682 	return current->migration_disabled;
2683 #else
2684 	return 0;
2685 #endif
2686 }
2687 
2688 unsigned int tracing_gen_ctx_irq_test(unsigned int irqs_status)
2689 {
2690 	unsigned int trace_flags = irqs_status;
2691 	unsigned int pc;
2692 
2693 	pc = preempt_count();
2694 
2695 	if (pc & NMI_MASK)
2696 		trace_flags |= TRACE_FLAG_NMI;
2697 	if (pc & HARDIRQ_MASK)
2698 		trace_flags |= TRACE_FLAG_HARDIRQ;
2699 	if (in_serving_softirq())
2700 		trace_flags |= TRACE_FLAG_SOFTIRQ;
2701 	if (softirq_count() >> (SOFTIRQ_SHIFT + 1))
2702 		trace_flags |= TRACE_FLAG_BH_OFF;
2703 
2704 	if (tif_need_resched())
2705 		trace_flags |= TRACE_FLAG_NEED_RESCHED;
2706 	if (test_preempt_need_resched())
2707 		trace_flags |= TRACE_FLAG_PREEMPT_RESCHED;
2708 	return (trace_flags << 16) | (min_t(unsigned int, pc & 0xff, 0xf)) |
2709 		(min_t(unsigned int, migration_disable_value(), 0xf)) << 4;
2710 }
2711 
2712 struct ring_buffer_event *
2713 trace_buffer_lock_reserve(struct trace_buffer *buffer,
2714 			  int type,
2715 			  unsigned long len,
2716 			  unsigned int trace_ctx)
2717 {
2718 	return __trace_buffer_lock_reserve(buffer, type, len, trace_ctx);
2719 }
2720 
2721 DEFINE_PER_CPU(struct ring_buffer_event *, trace_buffered_event);
2722 DEFINE_PER_CPU(int, trace_buffered_event_cnt);
2723 static int trace_buffered_event_ref;
2724 
2725 /**
2726  * trace_buffered_event_enable - enable buffering events
2727  *
2728  * When events are being filtered, it is quicker to use a temporary
2729  * buffer to write the event data into if there's a likely chance
2730  * that it will not be committed. The discard of the ring buffer
2731  * is not as fast as committing, and is much slower than copying
2732  * a commit.
2733  *
2734  * When an event is to be filtered, allocate per cpu buffers to
2735  * write the event data into, and if the event is filtered and discarded
2736  * it is simply dropped, otherwise, the entire data is to be committed
2737  * in one shot.
2738  */
2739 void trace_buffered_event_enable(void)
2740 {
2741 	struct ring_buffer_event *event;
2742 	struct page *page;
2743 	int cpu;
2744 
2745 	WARN_ON_ONCE(!mutex_is_locked(&event_mutex));
2746 
2747 	if (trace_buffered_event_ref++)
2748 		return;
2749 
2750 	for_each_tracing_cpu(cpu) {
2751 		page = alloc_pages_node(cpu_to_node(cpu),
2752 					GFP_KERNEL | __GFP_NORETRY, 0);
2753 		/* This is just an optimization and can handle failures */
2754 		if (!page) {
2755 			pr_err("Failed to allocate event buffer\n");
2756 			break;
2757 		}
2758 
2759 		event = page_address(page);
2760 		memset(event, 0, sizeof(*event));
2761 
2762 		per_cpu(trace_buffered_event, cpu) = event;
2763 
2764 		preempt_disable();
2765 		if (cpu == smp_processor_id() &&
2766 		    __this_cpu_read(trace_buffered_event) !=
2767 		    per_cpu(trace_buffered_event, cpu))
2768 			WARN_ON_ONCE(1);
2769 		preempt_enable();
2770 	}
2771 }
2772 
2773 static void enable_trace_buffered_event(void *data)
2774 {
2775 	/* Probably not needed, but do it anyway */
2776 	smp_rmb();
2777 	this_cpu_dec(trace_buffered_event_cnt);
2778 }
2779 
2780 static void disable_trace_buffered_event(void *data)
2781 {
2782 	this_cpu_inc(trace_buffered_event_cnt);
2783 }
2784 
2785 /**
2786  * trace_buffered_event_disable - disable buffering events
2787  *
2788  * When a filter is removed, it is faster to not use the buffered
2789  * events, and to commit directly into the ring buffer. Free up
2790  * the temp buffers when there are no more users. This requires
2791  * special synchronization with current events.
2792  */
2793 void trace_buffered_event_disable(void)
2794 {
2795 	int cpu;
2796 
2797 	WARN_ON_ONCE(!mutex_is_locked(&event_mutex));
2798 
2799 	if (WARN_ON_ONCE(!trace_buffered_event_ref))
2800 		return;
2801 
2802 	if (--trace_buffered_event_ref)
2803 		return;
2804 
2805 	/* For each CPU, set the buffer as used. */
2806 	on_each_cpu_mask(tracing_buffer_mask, disable_trace_buffered_event,
2807 			 NULL, true);
2808 
2809 	/* Wait for all current users to finish */
2810 	synchronize_rcu();
2811 
2812 	for_each_tracing_cpu(cpu) {
2813 		free_page((unsigned long)per_cpu(trace_buffered_event, cpu));
2814 		per_cpu(trace_buffered_event, cpu) = NULL;
2815 	}
2816 
2817 	/*
2818 	 * Wait for all CPUs that potentially started checking if they can use
2819 	 * their event buffer only after the previous synchronize_rcu() call and
2820 	 * they still read a valid pointer from trace_buffered_event. It must be
2821 	 * ensured they don't see cleared trace_buffered_event_cnt else they
2822 	 * could wrongly decide to use the pointed-to buffer which is now freed.
2823 	 */
2824 	synchronize_rcu();
2825 
2826 	/* For each CPU, relinquish the buffer */
2827 	on_each_cpu_mask(tracing_buffer_mask, enable_trace_buffered_event, NULL,
2828 			 true);
2829 }
2830 
2831 static struct trace_buffer *temp_buffer;
2832 
2833 struct ring_buffer_event *
2834 trace_event_buffer_lock_reserve(struct trace_buffer **current_rb,
2835 			  struct trace_event_file *trace_file,
2836 			  int type, unsigned long len,
2837 			  unsigned int trace_ctx)
2838 {
2839 	struct ring_buffer_event *entry;
2840 	struct trace_array *tr = trace_file->tr;
2841 	int val;
2842 
2843 	*current_rb = tr->array_buffer.buffer;
2844 
2845 	if (!tr->no_filter_buffering_ref &&
2846 	    (trace_file->flags & (EVENT_FILE_FL_SOFT_DISABLED | EVENT_FILE_FL_FILTERED))) {
2847 		preempt_disable_notrace();
2848 		/*
2849 		 * Filtering is on, so try to use the per cpu buffer first.
2850 		 * This buffer will simulate a ring_buffer_event,
2851 		 * where the type_len is zero and the array[0] will
2852 		 * hold the full length.
2853 		 * (see include/linux/ring-buffer.h for details on
2854 		 *  how the ring_buffer_event is structured).
2855 		 *
2856 		 * Using a temp buffer during filtering and copying it
2857 		 * on a matched filter is quicker than writing directly
2858 		 * into the ring buffer and then discarding it when
2859 		 * it doesn't match. That is because the discard
2860 		 * requires several atomic operations to get right.
2861 		 * Copying on match and doing nothing on a failed match
2862 		 * is still quicker than no copy on match, but having
2863 		 * to discard out of the ring buffer on a failed match.
2864 		 */
2865 		if ((entry = __this_cpu_read(trace_buffered_event))) {
2866 			int max_len = PAGE_SIZE - struct_size(entry, array, 1);
2867 
2868 			val = this_cpu_inc_return(trace_buffered_event_cnt);
2869 
2870 			/*
2871 			 * Preemption is disabled, but interrupts and NMIs
2872 			 * can still come in now. If that happens after
2873 			 * the above increment, then it will have to go
2874 			 * back to the old method of allocating the event
2875 			 * on the ring buffer, and if the filter fails, it
2876 			 * will have to call ring_buffer_discard_commit()
2877 			 * to remove it.
2878 			 *
2879 			 * Need to also check the unlikely case that the
2880 			 * length is bigger than the temp buffer size.
2881 			 * If that happens, then the reserve is pretty much
2882 			 * guaranteed to fail, as the ring buffer currently
2883 			 * only allows events less than a page. But that may
2884 			 * change in the future, so let the ring buffer reserve
2885 			 * handle the failure in that case.
2886 			 */
2887 			if (val == 1 && likely(len <= max_len)) {
2888 				trace_event_setup(entry, type, trace_ctx);
2889 				entry->array[0] = len;
2890 				/* Return with preemption disabled */
2891 				return entry;
2892 			}
2893 			this_cpu_dec(trace_buffered_event_cnt);
2894 		}
2895 		/* __trace_buffer_lock_reserve() disables preemption */
2896 		preempt_enable_notrace();
2897 	}
2898 
2899 	entry = __trace_buffer_lock_reserve(*current_rb, type, len,
2900 					    trace_ctx);
2901 	/*
2902 	 * If tracing is off, but we have triggers enabled
2903 	 * we still need to look at the event data. Use the temp_buffer
2904 	 * to store the trace event for the trigger to use. It's recursive
2905 	 * safe and will not be recorded anywhere.
2906 	 */
2907 	if (!entry && trace_file->flags & EVENT_FILE_FL_TRIGGER_COND) {
2908 		*current_rb = temp_buffer;
2909 		entry = __trace_buffer_lock_reserve(*current_rb, type, len,
2910 						    trace_ctx);
2911 	}
2912 	return entry;
2913 }
2914 EXPORT_SYMBOL_GPL(trace_event_buffer_lock_reserve);
2915 
2916 static DEFINE_RAW_SPINLOCK(tracepoint_iter_lock);
2917 static DEFINE_MUTEX(tracepoint_printk_mutex);
2918 
2919 static void output_printk(struct trace_event_buffer *fbuffer)
2920 {
2921 	struct trace_event_call *event_call;
2922 	struct trace_event_file *file;
2923 	struct trace_event *event;
2924 	unsigned long flags;
2925 	struct trace_iterator *iter = tracepoint_print_iter;
2926 
2927 	/* We should never get here if iter is NULL */
2928 	if (WARN_ON_ONCE(!iter))
2929 		return;
2930 
2931 	event_call = fbuffer->trace_file->event_call;
2932 	if (!event_call || !event_call->event.funcs ||
2933 	    !event_call->event.funcs->trace)
2934 		return;
2935 
2936 	file = fbuffer->trace_file;
2937 	if (test_bit(EVENT_FILE_FL_SOFT_DISABLED_BIT, &file->flags) ||
2938 	    (unlikely(file->flags & EVENT_FILE_FL_FILTERED) &&
2939 	     !filter_match_preds(file->filter, fbuffer->entry)))
2940 		return;
2941 
2942 	event = &fbuffer->trace_file->event_call->event;
2943 
2944 	raw_spin_lock_irqsave(&tracepoint_iter_lock, flags);
2945 	trace_seq_init(&iter->seq);
2946 	iter->ent = fbuffer->entry;
2947 	event_call->event.funcs->trace(iter, 0, event);
2948 	trace_seq_putc(&iter->seq, 0);
2949 	printk("%s", iter->seq.buffer);
2950 
2951 	raw_spin_unlock_irqrestore(&tracepoint_iter_lock, flags);
2952 }
2953 
2954 int tracepoint_printk_sysctl(struct ctl_table *table, int write,
2955 			     void *buffer, size_t *lenp,
2956 			     loff_t *ppos)
2957 {
2958 	int save_tracepoint_printk;
2959 	int ret;
2960 
2961 	mutex_lock(&tracepoint_printk_mutex);
2962 	save_tracepoint_printk = tracepoint_printk;
2963 
2964 	ret = proc_dointvec(table, write, buffer, lenp, ppos);
2965 
2966 	/*
2967 	 * This will force exiting early, as tracepoint_printk
2968 	 * is always zero when tracepoint_printk_iter is not allocated
2969 	 */
2970 	if (!tracepoint_print_iter)
2971 		tracepoint_printk = 0;
2972 
2973 	if (save_tracepoint_printk == tracepoint_printk)
2974 		goto out;
2975 
2976 	if (tracepoint_printk)
2977 		static_key_enable(&tracepoint_printk_key.key);
2978 	else
2979 		static_key_disable(&tracepoint_printk_key.key);
2980 
2981  out:
2982 	mutex_unlock(&tracepoint_printk_mutex);
2983 
2984 	return ret;
2985 }
2986 
2987 void trace_event_buffer_commit(struct trace_event_buffer *fbuffer)
2988 {
2989 	enum event_trigger_type tt = ETT_NONE;
2990 	struct trace_event_file *file = fbuffer->trace_file;
2991 
2992 	if (__event_trigger_test_discard(file, fbuffer->buffer, fbuffer->event,
2993 			fbuffer->entry, &tt))
2994 		goto discard;
2995 
2996 	if (static_key_false(&tracepoint_printk_key.key))
2997 		output_printk(fbuffer);
2998 
2999 	if (static_branch_unlikely(&trace_event_exports_enabled))
3000 		ftrace_exports(fbuffer->event, TRACE_EXPORT_EVENT);
3001 
3002 	trace_buffer_unlock_commit_regs(file->tr, fbuffer->buffer,
3003 			fbuffer->event, fbuffer->trace_ctx, fbuffer->regs);
3004 
3005 discard:
3006 	if (tt)
3007 		event_triggers_post_call(file, tt);
3008 
3009 }
3010 EXPORT_SYMBOL_GPL(trace_event_buffer_commit);
3011 
3012 /*
3013  * Skip 3:
3014  *
3015  *   trace_buffer_unlock_commit_regs()
3016  *   trace_event_buffer_commit()
3017  *   trace_event_raw_event_xxx()
3018  */
3019 # define STACK_SKIP 3
3020 
3021 void trace_buffer_unlock_commit_regs(struct trace_array *tr,
3022 				     struct trace_buffer *buffer,
3023 				     struct ring_buffer_event *event,
3024 				     unsigned int trace_ctx,
3025 				     struct pt_regs *regs)
3026 {
3027 	__buffer_unlock_commit(buffer, event);
3028 
3029 	/*
3030 	 * If regs is not set, then skip the necessary functions.
3031 	 * Note, we can still get here via blktrace, wakeup tracer
3032 	 * and mmiotrace, but that's ok if they lose a function or
3033 	 * two. They are not that meaningful.
3034 	 */
3035 	ftrace_trace_stack(tr, buffer, trace_ctx, regs ? 0 : STACK_SKIP, regs);
3036 	ftrace_trace_userstack(tr, buffer, trace_ctx);
3037 }
3038 
3039 /*
3040  * Similar to trace_buffer_unlock_commit_regs() but do not dump stack.
3041  */
3042 void
3043 trace_buffer_unlock_commit_nostack(struct trace_buffer *buffer,
3044 				   struct ring_buffer_event *event)
3045 {
3046 	__buffer_unlock_commit(buffer, event);
3047 }
3048 
3049 void
3050 trace_function(struct trace_array *tr, unsigned long ip, unsigned long
3051 	       parent_ip, unsigned int trace_ctx)
3052 {
3053 	struct trace_event_call *call = &event_function;
3054 	struct trace_buffer *buffer = tr->array_buffer.buffer;
3055 	struct ring_buffer_event *event;
3056 	struct ftrace_entry *entry;
3057 
3058 	event = __trace_buffer_lock_reserve(buffer, TRACE_FN, sizeof(*entry),
3059 					    trace_ctx);
3060 	if (!event)
3061 		return;
3062 	entry	= ring_buffer_event_data(event);
3063 	entry->ip			= ip;
3064 	entry->parent_ip		= parent_ip;
3065 
3066 	if (!call_filter_check_discard(call, entry, buffer, event)) {
3067 		if (static_branch_unlikely(&trace_function_exports_enabled))
3068 			ftrace_exports(event, TRACE_EXPORT_FUNCTION);
3069 		__buffer_unlock_commit(buffer, event);
3070 	}
3071 }
3072 
3073 #ifdef CONFIG_STACKTRACE
3074 
3075 /* Allow 4 levels of nesting: normal, softirq, irq, NMI */
3076 #define FTRACE_KSTACK_NESTING	4
3077 
3078 #define FTRACE_KSTACK_ENTRIES	(PAGE_SIZE / FTRACE_KSTACK_NESTING)
3079 
3080 struct ftrace_stack {
3081 	unsigned long		calls[FTRACE_KSTACK_ENTRIES];
3082 };
3083 
3084 
3085 struct ftrace_stacks {
3086 	struct ftrace_stack	stacks[FTRACE_KSTACK_NESTING];
3087 };
3088 
3089 static DEFINE_PER_CPU(struct ftrace_stacks, ftrace_stacks);
3090 static DEFINE_PER_CPU(int, ftrace_stack_reserve);
3091 
3092 static void __ftrace_trace_stack(struct trace_buffer *buffer,
3093 				 unsigned int trace_ctx,
3094 				 int skip, struct pt_regs *regs)
3095 {
3096 	struct trace_event_call *call = &event_kernel_stack;
3097 	struct ring_buffer_event *event;
3098 	unsigned int size, nr_entries;
3099 	struct ftrace_stack *fstack;
3100 	struct stack_entry *entry;
3101 	int stackidx;
3102 
3103 	/*
3104 	 * Add one, for this function and the call to save_stack_trace()
3105 	 * If regs is set, then these functions will not be in the way.
3106 	 */
3107 #ifndef CONFIG_UNWINDER_ORC
3108 	if (!regs)
3109 		skip++;
3110 #endif
3111 
3112 	preempt_disable_notrace();
3113 
3114 	stackidx = __this_cpu_inc_return(ftrace_stack_reserve) - 1;
3115 
3116 	/* This should never happen. If it does, yell once and skip */
3117 	if (WARN_ON_ONCE(stackidx >= FTRACE_KSTACK_NESTING))
3118 		goto out;
3119 
3120 	/*
3121 	 * The above __this_cpu_inc_return() is 'atomic' cpu local. An
3122 	 * interrupt will either see the value pre increment or post
3123 	 * increment. If the interrupt happens pre increment it will have
3124 	 * restored the counter when it returns.  We just need a barrier to
3125 	 * keep gcc from moving things around.
3126 	 */
3127 	barrier();
3128 
3129 	fstack = this_cpu_ptr(ftrace_stacks.stacks) + stackidx;
3130 	size = ARRAY_SIZE(fstack->calls);
3131 
3132 	if (regs) {
3133 		nr_entries = stack_trace_save_regs(regs, fstack->calls,
3134 						   size, skip);
3135 	} else {
3136 		nr_entries = stack_trace_save(fstack->calls, size, skip);
3137 	}
3138 
3139 	event = __trace_buffer_lock_reserve(buffer, TRACE_STACK,
3140 				    struct_size(entry, caller, nr_entries),
3141 				    trace_ctx);
3142 	if (!event)
3143 		goto out;
3144 	entry = ring_buffer_event_data(event);
3145 
3146 	entry->size = nr_entries;
3147 	memcpy(&entry->caller, fstack->calls,
3148 	       flex_array_size(entry, caller, nr_entries));
3149 
3150 	if (!call_filter_check_discard(call, entry, buffer, event))
3151 		__buffer_unlock_commit(buffer, event);
3152 
3153  out:
3154 	/* Again, don't let gcc optimize things here */
3155 	barrier();
3156 	__this_cpu_dec(ftrace_stack_reserve);
3157 	preempt_enable_notrace();
3158 
3159 }
3160 
3161 static inline void ftrace_trace_stack(struct trace_array *tr,
3162 				      struct trace_buffer *buffer,
3163 				      unsigned int trace_ctx,
3164 				      int skip, struct pt_regs *regs)
3165 {
3166 	if (!(tr->trace_flags & TRACE_ITER_STACKTRACE))
3167 		return;
3168 
3169 	__ftrace_trace_stack(buffer, trace_ctx, skip, regs);
3170 }
3171 
3172 void __trace_stack(struct trace_array *tr, unsigned int trace_ctx,
3173 		   int skip)
3174 {
3175 	struct trace_buffer *buffer = tr->array_buffer.buffer;
3176 
3177 	if (rcu_is_watching()) {
3178 		__ftrace_trace_stack(buffer, trace_ctx, skip, NULL);
3179 		return;
3180 	}
3181 
3182 	if (WARN_ON_ONCE(IS_ENABLED(CONFIG_GENERIC_ENTRY)))
3183 		return;
3184 
3185 	/*
3186 	 * When an NMI triggers, RCU is enabled via ct_nmi_enter(),
3187 	 * but if the above rcu_is_watching() failed, then the NMI
3188 	 * triggered someplace critical, and ct_irq_enter() should
3189 	 * not be called from NMI.
3190 	 */
3191 	if (unlikely(in_nmi()))
3192 		return;
3193 
3194 	ct_irq_enter_irqson();
3195 	__ftrace_trace_stack(buffer, trace_ctx, skip, NULL);
3196 	ct_irq_exit_irqson();
3197 }
3198 
3199 /**
3200  * trace_dump_stack - record a stack back trace in the trace buffer
3201  * @skip: Number of functions to skip (helper handlers)
3202  */
3203 void trace_dump_stack(int skip)
3204 {
3205 	if (tracing_disabled || tracing_selftest_running)
3206 		return;
3207 
3208 #ifndef CONFIG_UNWINDER_ORC
3209 	/* Skip 1 to skip this function. */
3210 	skip++;
3211 #endif
3212 	__ftrace_trace_stack(global_trace.array_buffer.buffer,
3213 			     tracing_gen_ctx(), skip, NULL);
3214 }
3215 EXPORT_SYMBOL_GPL(trace_dump_stack);
3216 
3217 #ifdef CONFIG_USER_STACKTRACE_SUPPORT
3218 static DEFINE_PER_CPU(int, user_stack_count);
3219 
3220 static void
3221 ftrace_trace_userstack(struct trace_array *tr,
3222 		       struct trace_buffer *buffer, unsigned int trace_ctx)
3223 {
3224 	struct trace_event_call *call = &event_user_stack;
3225 	struct ring_buffer_event *event;
3226 	struct userstack_entry *entry;
3227 
3228 	if (!(tr->trace_flags & TRACE_ITER_USERSTACKTRACE))
3229 		return;
3230 
3231 	/*
3232 	 * NMIs can not handle page faults, even with fix ups.
3233 	 * The save user stack can (and often does) fault.
3234 	 */
3235 	if (unlikely(in_nmi()))
3236 		return;
3237 
3238 	/*
3239 	 * prevent recursion, since the user stack tracing may
3240 	 * trigger other kernel events.
3241 	 */
3242 	preempt_disable();
3243 	if (__this_cpu_read(user_stack_count))
3244 		goto out;
3245 
3246 	__this_cpu_inc(user_stack_count);
3247 
3248 	event = __trace_buffer_lock_reserve(buffer, TRACE_USER_STACK,
3249 					    sizeof(*entry), trace_ctx);
3250 	if (!event)
3251 		goto out_drop_count;
3252 	entry	= ring_buffer_event_data(event);
3253 
3254 	entry->tgid		= current->tgid;
3255 	memset(&entry->caller, 0, sizeof(entry->caller));
3256 
3257 	stack_trace_save_user(entry->caller, FTRACE_STACK_ENTRIES);
3258 	if (!call_filter_check_discard(call, entry, buffer, event))
3259 		__buffer_unlock_commit(buffer, event);
3260 
3261  out_drop_count:
3262 	__this_cpu_dec(user_stack_count);
3263  out:
3264 	preempt_enable();
3265 }
3266 #else /* CONFIG_USER_STACKTRACE_SUPPORT */
3267 static void ftrace_trace_userstack(struct trace_array *tr,
3268 				   struct trace_buffer *buffer,
3269 				   unsigned int trace_ctx)
3270 {
3271 }
3272 #endif /* !CONFIG_USER_STACKTRACE_SUPPORT */
3273 
3274 #endif /* CONFIG_STACKTRACE */
3275 
3276 static inline void
3277 func_repeats_set_delta_ts(struct func_repeats_entry *entry,
3278 			  unsigned long long delta)
3279 {
3280 	entry->bottom_delta_ts = delta & U32_MAX;
3281 	entry->top_delta_ts = (delta >> 32);
3282 }
3283 
3284 void trace_last_func_repeats(struct trace_array *tr,
3285 			     struct trace_func_repeats *last_info,
3286 			     unsigned int trace_ctx)
3287 {
3288 	struct trace_buffer *buffer = tr->array_buffer.buffer;
3289 	struct func_repeats_entry *entry;
3290 	struct ring_buffer_event *event;
3291 	u64 delta;
3292 
3293 	event = __trace_buffer_lock_reserve(buffer, TRACE_FUNC_REPEATS,
3294 					    sizeof(*entry), trace_ctx);
3295 	if (!event)
3296 		return;
3297 
3298 	delta = ring_buffer_event_time_stamp(buffer, event) -
3299 		last_info->ts_last_call;
3300 
3301 	entry = ring_buffer_event_data(event);
3302 	entry->ip = last_info->ip;
3303 	entry->parent_ip = last_info->parent_ip;
3304 	entry->count = last_info->count;
3305 	func_repeats_set_delta_ts(entry, delta);
3306 
3307 	__buffer_unlock_commit(buffer, event);
3308 }
3309 
3310 /* created for use with alloc_percpu */
3311 struct trace_buffer_struct {
3312 	int nesting;
3313 	char buffer[4][TRACE_BUF_SIZE];
3314 };
3315 
3316 static struct trace_buffer_struct __percpu *trace_percpu_buffer;
3317 
3318 /*
3319  * This allows for lockless recording.  If we're nested too deeply, then
3320  * this returns NULL.
3321  */
3322 static char *get_trace_buf(void)
3323 {
3324 	struct trace_buffer_struct *buffer = this_cpu_ptr(trace_percpu_buffer);
3325 
3326 	if (!trace_percpu_buffer || buffer->nesting >= 4)
3327 		return NULL;
3328 
3329 	buffer->nesting++;
3330 
3331 	/* Interrupts must see nesting incremented before we use the buffer */
3332 	barrier();
3333 	return &buffer->buffer[buffer->nesting - 1][0];
3334 }
3335 
3336 static void put_trace_buf(void)
3337 {
3338 	/* Don't let the decrement of nesting leak before this */
3339 	barrier();
3340 	this_cpu_dec(trace_percpu_buffer->nesting);
3341 }
3342 
3343 static int alloc_percpu_trace_buffer(void)
3344 {
3345 	struct trace_buffer_struct __percpu *buffers;
3346 
3347 	if (trace_percpu_buffer)
3348 		return 0;
3349 
3350 	buffers = alloc_percpu(struct trace_buffer_struct);
3351 	if (MEM_FAIL(!buffers, "Could not allocate percpu trace_printk buffer"))
3352 		return -ENOMEM;
3353 
3354 	trace_percpu_buffer = buffers;
3355 	return 0;
3356 }
3357 
3358 static int buffers_allocated;
3359 
3360 void trace_printk_init_buffers(void)
3361 {
3362 	if (buffers_allocated)
3363 		return;
3364 
3365 	if (alloc_percpu_trace_buffer())
3366 		return;
3367 
3368 	/* trace_printk() is for debug use only. Don't use it in production. */
3369 
3370 	pr_warn("\n");
3371 	pr_warn("**********************************************************\n");
3372 	pr_warn("**   NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE   **\n");
3373 	pr_warn("**                                                      **\n");
3374 	pr_warn("** trace_printk() being used. Allocating extra memory.  **\n");
3375 	pr_warn("**                                                      **\n");
3376 	pr_warn("** This means that this is a DEBUG kernel and it is     **\n");
3377 	pr_warn("** unsafe for production use.                           **\n");
3378 	pr_warn("**                                                      **\n");
3379 	pr_warn("** If you see this message and you are not debugging    **\n");
3380 	pr_warn("** the kernel, report this immediately to your vendor!  **\n");
3381 	pr_warn("**                                                      **\n");
3382 	pr_warn("**   NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE   **\n");
3383 	pr_warn("**********************************************************\n");
3384 
3385 	/* Expand the buffers to set size */
3386 	tracing_update_buffers(&global_trace);
3387 
3388 	buffers_allocated = 1;
3389 
3390 	/*
3391 	 * trace_printk_init_buffers() can be called by modules.
3392 	 * If that happens, then we need to start cmdline recording
3393 	 * directly here. If the global_trace.buffer is already
3394 	 * allocated here, then this was called by module code.
3395 	 */
3396 	if (global_trace.array_buffer.buffer)
3397 		tracing_start_cmdline_record();
3398 }
3399 EXPORT_SYMBOL_GPL(trace_printk_init_buffers);
3400 
3401 void trace_printk_start_comm(void)
3402 {
3403 	/* Start tracing comms if trace printk is set */
3404 	if (!buffers_allocated)
3405 		return;
3406 	tracing_start_cmdline_record();
3407 }
3408 
3409 static void trace_printk_start_stop_comm(int enabled)
3410 {
3411 	if (!buffers_allocated)
3412 		return;
3413 
3414 	if (enabled)
3415 		tracing_start_cmdline_record();
3416 	else
3417 		tracing_stop_cmdline_record();
3418 }
3419 
3420 /**
3421  * trace_vbprintk - write binary msg to tracing buffer
3422  * @ip:    The address of the caller
3423  * @fmt:   The string format to write to the buffer
3424  * @args:  Arguments for @fmt
3425  */
3426 int trace_vbprintk(unsigned long ip, const char *fmt, va_list args)
3427 {
3428 	struct trace_event_call *call = &event_bprint;
3429 	struct ring_buffer_event *event;
3430 	struct trace_buffer *buffer;
3431 	struct trace_array *tr = &global_trace;
3432 	struct bprint_entry *entry;
3433 	unsigned int trace_ctx;
3434 	char *tbuffer;
3435 	int len = 0, size;
3436 
3437 	if (unlikely(tracing_selftest_running || tracing_disabled))
3438 		return 0;
3439 
3440 	/* Don't pollute graph traces with trace_vprintk internals */
3441 	pause_graph_tracing();
3442 
3443 	trace_ctx = tracing_gen_ctx();
3444 	preempt_disable_notrace();
3445 
3446 	tbuffer = get_trace_buf();
3447 	if (!tbuffer) {
3448 		len = 0;
3449 		goto out_nobuffer;
3450 	}
3451 
3452 	len = vbin_printf((u32 *)tbuffer, TRACE_BUF_SIZE/sizeof(int), fmt, args);
3453 
3454 	if (len > TRACE_BUF_SIZE/sizeof(int) || len < 0)
3455 		goto out_put;
3456 
3457 	size = sizeof(*entry) + sizeof(u32) * len;
3458 	buffer = tr->array_buffer.buffer;
3459 	ring_buffer_nest_start(buffer);
3460 	event = __trace_buffer_lock_reserve(buffer, TRACE_BPRINT, size,
3461 					    trace_ctx);
3462 	if (!event)
3463 		goto out;
3464 	entry = ring_buffer_event_data(event);
3465 	entry->ip			= ip;
3466 	entry->fmt			= fmt;
3467 
3468 	memcpy(entry->buf, tbuffer, sizeof(u32) * len);
3469 	if (!call_filter_check_discard(call, entry, buffer, event)) {
3470 		__buffer_unlock_commit(buffer, event);
3471 		ftrace_trace_stack(tr, buffer, trace_ctx, 6, NULL);
3472 	}
3473 
3474 out:
3475 	ring_buffer_nest_end(buffer);
3476 out_put:
3477 	put_trace_buf();
3478 
3479 out_nobuffer:
3480 	preempt_enable_notrace();
3481 	unpause_graph_tracing();
3482 
3483 	return len;
3484 }
3485 EXPORT_SYMBOL_GPL(trace_vbprintk);
3486 
3487 __printf(3, 0)
3488 static int
3489 __trace_array_vprintk(struct trace_buffer *buffer,
3490 		      unsigned long ip, const char *fmt, va_list args)
3491 {
3492 	struct trace_event_call *call = &event_print;
3493 	struct ring_buffer_event *event;
3494 	int len = 0, size;
3495 	struct print_entry *entry;
3496 	unsigned int trace_ctx;
3497 	char *tbuffer;
3498 
3499 	if (tracing_disabled)
3500 		return 0;
3501 
3502 	/* Don't pollute graph traces with trace_vprintk internals */
3503 	pause_graph_tracing();
3504 
3505 	trace_ctx = tracing_gen_ctx();
3506 	preempt_disable_notrace();
3507 
3508 
3509 	tbuffer = get_trace_buf();
3510 	if (!tbuffer) {
3511 		len = 0;
3512 		goto out_nobuffer;
3513 	}
3514 
3515 	len = vscnprintf(tbuffer, TRACE_BUF_SIZE, fmt, args);
3516 
3517 	size = sizeof(*entry) + len + 1;
3518 	ring_buffer_nest_start(buffer);
3519 	event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
3520 					    trace_ctx);
3521 	if (!event)
3522 		goto out;
3523 	entry = ring_buffer_event_data(event);
3524 	entry->ip = ip;
3525 
3526 	memcpy(&entry->buf, tbuffer, len + 1);
3527 	if (!call_filter_check_discard(call, entry, buffer, event)) {
3528 		__buffer_unlock_commit(buffer, event);
3529 		ftrace_trace_stack(&global_trace, buffer, trace_ctx, 6, NULL);
3530 	}
3531 
3532 out:
3533 	ring_buffer_nest_end(buffer);
3534 	put_trace_buf();
3535 
3536 out_nobuffer:
3537 	preempt_enable_notrace();
3538 	unpause_graph_tracing();
3539 
3540 	return len;
3541 }
3542 
3543 __printf(3, 0)
3544 int trace_array_vprintk(struct trace_array *tr,
3545 			unsigned long ip, const char *fmt, va_list args)
3546 {
3547 	if (tracing_selftest_running && tr == &global_trace)
3548 		return 0;
3549 
3550 	return __trace_array_vprintk(tr->array_buffer.buffer, ip, fmt, args);
3551 }
3552 
3553 /**
3554  * trace_array_printk - Print a message to a specific instance
3555  * @tr: The instance trace_array descriptor
3556  * @ip: The instruction pointer that this is called from.
3557  * @fmt: The format to print (printf format)
3558  *
3559  * If a subsystem sets up its own instance, they have the right to
3560  * printk strings into their tracing instance buffer using this
3561  * function. Note, this function will not write into the top level
3562  * buffer (use trace_printk() for that), as writing into the top level
3563  * buffer should only have events that can be individually disabled.
3564  * trace_printk() is only used for debugging a kernel, and should not
3565  * be ever incorporated in normal use.
3566  *
3567  * trace_array_printk() can be used, as it will not add noise to the
3568  * top level tracing buffer.
3569  *
3570  * Note, trace_array_init_printk() must be called on @tr before this
3571  * can be used.
3572  */
3573 __printf(3, 0)
3574 int trace_array_printk(struct trace_array *tr,
3575 		       unsigned long ip, const char *fmt, ...)
3576 {
3577 	int ret;
3578 	va_list ap;
3579 
3580 	if (!tr)
3581 		return -ENOENT;
3582 
3583 	/* This is only allowed for created instances */
3584 	if (tr == &global_trace)
3585 		return 0;
3586 
3587 	if (!(tr->trace_flags & TRACE_ITER_PRINTK))
3588 		return 0;
3589 
3590 	va_start(ap, fmt);
3591 	ret = trace_array_vprintk(tr, ip, fmt, ap);
3592 	va_end(ap);
3593 	return ret;
3594 }
3595 EXPORT_SYMBOL_GPL(trace_array_printk);
3596 
3597 /**
3598  * trace_array_init_printk - Initialize buffers for trace_array_printk()
3599  * @tr: The trace array to initialize the buffers for
3600  *
3601  * As trace_array_printk() only writes into instances, they are OK to
3602  * have in the kernel (unlike trace_printk()). This needs to be called
3603  * before trace_array_printk() can be used on a trace_array.
3604  */
3605 int trace_array_init_printk(struct trace_array *tr)
3606 {
3607 	if (!tr)
3608 		return -ENOENT;
3609 
3610 	/* This is only allowed for created instances */
3611 	if (tr == &global_trace)
3612 		return -EINVAL;
3613 
3614 	return alloc_percpu_trace_buffer();
3615 }
3616 EXPORT_SYMBOL_GPL(trace_array_init_printk);
3617 
3618 __printf(3, 4)
3619 int trace_array_printk_buf(struct trace_buffer *buffer,
3620 			   unsigned long ip, const char *fmt, ...)
3621 {
3622 	int ret;
3623 	va_list ap;
3624 
3625 	if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
3626 		return 0;
3627 
3628 	va_start(ap, fmt);
3629 	ret = __trace_array_vprintk(buffer, ip, fmt, ap);
3630 	va_end(ap);
3631 	return ret;
3632 }
3633 
3634 __printf(2, 0)
3635 int trace_vprintk(unsigned long ip, const char *fmt, va_list args)
3636 {
3637 	return trace_array_vprintk(&global_trace, ip, fmt, args);
3638 }
3639 EXPORT_SYMBOL_GPL(trace_vprintk);
3640 
3641 static void trace_iterator_increment(struct trace_iterator *iter)
3642 {
3643 	struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, iter->cpu);
3644 
3645 	iter->idx++;
3646 	if (buf_iter)
3647 		ring_buffer_iter_advance(buf_iter);
3648 }
3649 
3650 static struct trace_entry *
3651 peek_next_entry(struct trace_iterator *iter, int cpu, u64 *ts,
3652 		unsigned long *lost_events)
3653 {
3654 	struct ring_buffer_event *event;
3655 	struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, cpu);
3656 
3657 	if (buf_iter) {
3658 		event = ring_buffer_iter_peek(buf_iter, ts);
3659 		if (lost_events)
3660 			*lost_events = ring_buffer_iter_dropped(buf_iter) ?
3661 				(unsigned long)-1 : 0;
3662 	} else {
3663 		event = ring_buffer_peek(iter->array_buffer->buffer, cpu, ts,
3664 					 lost_events);
3665 	}
3666 
3667 	if (event) {
3668 		iter->ent_size = ring_buffer_event_length(event);
3669 		return ring_buffer_event_data(event);
3670 	}
3671 	iter->ent_size = 0;
3672 	return NULL;
3673 }
3674 
3675 static struct trace_entry *
3676 __find_next_entry(struct trace_iterator *iter, int *ent_cpu,
3677 		  unsigned long *missing_events, u64 *ent_ts)
3678 {
3679 	struct trace_buffer *buffer = iter->array_buffer->buffer;
3680 	struct trace_entry *ent, *next = NULL;
3681 	unsigned long lost_events = 0, next_lost = 0;
3682 	int cpu_file = iter->cpu_file;
3683 	u64 next_ts = 0, ts;
3684 	int next_cpu = -1;
3685 	int next_size = 0;
3686 	int cpu;
3687 
3688 	/*
3689 	 * If we are in a per_cpu trace file, don't bother by iterating over
3690 	 * all cpu and peek directly.
3691 	 */
3692 	if (cpu_file > RING_BUFFER_ALL_CPUS) {
3693 		if (ring_buffer_empty_cpu(buffer, cpu_file))
3694 			return NULL;
3695 		ent = peek_next_entry(iter, cpu_file, ent_ts, missing_events);
3696 		if (ent_cpu)
3697 			*ent_cpu = cpu_file;
3698 
3699 		return ent;
3700 	}
3701 
3702 	for_each_tracing_cpu(cpu) {
3703 
3704 		if (ring_buffer_empty_cpu(buffer, cpu))
3705 			continue;
3706 
3707 		ent = peek_next_entry(iter, cpu, &ts, &lost_events);
3708 
3709 		/*
3710 		 * Pick the entry with the smallest timestamp:
3711 		 */
3712 		if (ent && (!next || ts < next_ts)) {
3713 			next = ent;
3714 			next_cpu = cpu;
3715 			next_ts = ts;
3716 			next_lost = lost_events;
3717 			next_size = iter->ent_size;
3718 		}
3719 	}
3720 
3721 	iter->ent_size = next_size;
3722 
3723 	if (ent_cpu)
3724 		*ent_cpu = next_cpu;
3725 
3726 	if (ent_ts)
3727 		*ent_ts = next_ts;
3728 
3729 	if (missing_events)
3730 		*missing_events = next_lost;
3731 
3732 	return next;
3733 }
3734 
3735 #define STATIC_FMT_BUF_SIZE	128
3736 static char static_fmt_buf[STATIC_FMT_BUF_SIZE];
3737 
3738 char *trace_iter_expand_format(struct trace_iterator *iter)
3739 {
3740 	char *tmp;
3741 
3742 	/*
3743 	 * iter->tr is NULL when used with tp_printk, which makes
3744 	 * this get called where it is not safe to call krealloc().
3745 	 */
3746 	if (!iter->tr || iter->fmt == static_fmt_buf)
3747 		return NULL;
3748 
3749 	tmp = krealloc(iter->fmt, iter->fmt_size + STATIC_FMT_BUF_SIZE,
3750 		       GFP_KERNEL);
3751 	if (tmp) {
3752 		iter->fmt_size += STATIC_FMT_BUF_SIZE;
3753 		iter->fmt = tmp;
3754 	}
3755 
3756 	return tmp;
3757 }
3758 
3759 /* Returns true if the string is safe to dereference from an event */
3760 static bool trace_safe_str(struct trace_iterator *iter, const char *str,
3761 			   bool star, int len)
3762 {
3763 	unsigned long addr = (unsigned long)str;
3764 	struct trace_event *trace_event;
3765 	struct trace_event_call *event;
3766 
3767 	/* Ignore strings with no length */
3768 	if (star && !len)
3769 		return true;
3770 
3771 	/* OK if part of the event data */
3772 	if ((addr >= (unsigned long)iter->ent) &&
3773 	    (addr < (unsigned long)iter->ent + iter->ent_size))
3774 		return true;
3775 
3776 	/* OK if part of the temp seq buffer */
3777 	if ((addr >= (unsigned long)iter->tmp_seq.buffer) &&
3778 	    (addr < (unsigned long)iter->tmp_seq.buffer + TRACE_SEQ_BUFFER_SIZE))
3779 		return true;
3780 
3781 	/* Core rodata can not be freed */
3782 	if (is_kernel_rodata(addr))
3783 		return true;
3784 
3785 	if (trace_is_tracepoint_string(str))
3786 		return true;
3787 
3788 	/*
3789 	 * Now this could be a module event, referencing core module
3790 	 * data, which is OK.
3791 	 */
3792 	if (!iter->ent)
3793 		return false;
3794 
3795 	trace_event = ftrace_find_event(iter->ent->type);
3796 	if (!trace_event)
3797 		return false;
3798 
3799 	event = container_of(trace_event, struct trace_event_call, event);
3800 	if ((event->flags & TRACE_EVENT_FL_DYNAMIC) || !event->module)
3801 		return false;
3802 
3803 	/* Would rather have rodata, but this will suffice */
3804 	if (within_module_core(addr, event->module))
3805 		return true;
3806 
3807 	return false;
3808 }
3809 
3810 static DEFINE_STATIC_KEY_FALSE(trace_no_verify);
3811 
3812 static int test_can_verify_check(const char *fmt, ...)
3813 {
3814 	char buf[16];
3815 	va_list ap;
3816 	int ret;
3817 
3818 	/*
3819 	 * The verifier is dependent on vsnprintf() modifies the va_list
3820 	 * passed to it, where it is sent as a reference. Some architectures
3821 	 * (like x86_32) passes it by value, which means that vsnprintf()
3822 	 * does not modify the va_list passed to it, and the verifier
3823 	 * would then need to be able to understand all the values that
3824 	 * vsnprintf can use. If it is passed by value, then the verifier
3825 	 * is disabled.
3826 	 */
3827 	va_start(ap, fmt);
3828 	vsnprintf(buf, 16, "%d", ap);
3829 	ret = va_arg(ap, int);
3830 	va_end(ap);
3831 
3832 	return ret;
3833 }
3834 
3835 static void test_can_verify(void)
3836 {
3837 	if (!test_can_verify_check("%d %d", 0, 1)) {
3838 		pr_info("trace event string verifier disabled\n");
3839 		static_branch_inc(&trace_no_verify);
3840 	}
3841 }
3842 
3843 /**
3844  * trace_check_vprintf - Check dereferenced strings while writing to the seq buffer
3845  * @iter: The iterator that holds the seq buffer and the event being printed
3846  * @fmt: The format used to print the event
3847  * @ap: The va_list holding the data to print from @fmt.
3848  *
3849  * This writes the data into the @iter->seq buffer using the data from
3850  * @fmt and @ap. If the format has a %s, then the source of the string
3851  * is examined to make sure it is safe to print, otherwise it will
3852  * warn and print "[UNSAFE MEMORY]" in place of the dereferenced string
3853  * pointer.
3854  */
3855 void trace_check_vprintf(struct trace_iterator *iter, const char *fmt,
3856 			 va_list ap)
3857 {
3858 	const char *p = fmt;
3859 	const char *str;
3860 	int i, j;
3861 
3862 	if (WARN_ON_ONCE(!fmt))
3863 		return;
3864 
3865 	if (static_branch_unlikely(&trace_no_verify))
3866 		goto print;
3867 
3868 	/* Don't bother checking when doing a ftrace_dump() */
3869 	if (iter->fmt == static_fmt_buf)
3870 		goto print;
3871 
3872 	while (*p) {
3873 		bool star = false;
3874 		int len = 0;
3875 
3876 		j = 0;
3877 
3878 		/* We only care about %s and variants */
3879 		for (i = 0; p[i]; i++) {
3880 			if (i + 1 >= iter->fmt_size) {
3881 				/*
3882 				 * If we can't expand the copy buffer,
3883 				 * just print it.
3884 				 */
3885 				if (!trace_iter_expand_format(iter))
3886 					goto print;
3887 			}
3888 
3889 			if (p[i] == '\\' && p[i+1]) {
3890 				i++;
3891 				continue;
3892 			}
3893 			if (p[i] == '%') {
3894 				/* Need to test cases like %08.*s */
3895 				for (j = 1; p[i+j]; j++) {
3896 					if (isdigit(p[i+j]) ||
3897 					    p[i+j] == '.')
3898 						continue;
3899 					if (p[i+j] == '*') {
3900 						star = true;
3901 						continue;
3902 					}
3903 					break;
3904 				}
3905 				if (p[i+j] == 's')
3906 					break;
3907 				star = false;
3908 			}
3909 			j = 0;
3910 		}
3911 		/* If no %s found then just print normally */
3912 		if (!p[i])
3913 			break;
3914 
3915 		/* Copy up to the %s, and print that */
3916 		strncpy(iter->fmt, p, i);
3917 		iter->fmt[i] = '\0';
3918 		trace_seq_vprintf(&iter->seq, iter->fmt, ap);
3919 
3920 		/*
3921 		 * If iter->seq is full, the above call no longer guarantees
3922 		 * that ap is in sync with fmt processing, and further calls
3923 		 * to va_arg() can return wrong positional arguments.
3924 		 *
3925 		 * Ensure that ap is no longer used in this case.
3926 		 */
3927 		if (iter->seq.full) {
3928 			p = "";
3929 			break;
3930 		}
3931 
3932 		if (star)
3933 			len = va_arg(ap, int);
3934 
3935 		/* The ap now points to the string data of the %s */
3936 		str = va_arg(ap, const char *);
3937 
3938 		/*
3939 		 * If you hit this warning, it is likely that the
3940 		 * trace event in question used %s on a string that
3941 		 * was saved at the time of the event, but may not be
3942 		 * around when the trace is read. Use __string(),
3943 		 * __assign_str() and __get_str() helpers in the TRACE_EVENT()
3944 		 * instead. See samples/trace_events/trace-events-sample.h
3945 		 * for reference.
3946 		 */
3947 		if (WARN_ONCE(!trace_safe_str(iter, str, star, len),
3948 			      "fmt: '%s' current_buffer: '%s'",
3949 			      fmt, seq_buf_str(&iter->seq.seq))) {
3950 			int ret;
3951 
3952 			/* Try to safely read the string */
3953 			if (star) {
3954 				if (len + 1 > iter->fmt_size)
3955 					len = iter->fmt_size - 1;
3956 				if (len < 0)
3957 					len = 0;
3958 				ret = copy_from_kernel_nofault(iter->fmt, str, len);
3959 				iter->fmt[len] = 0;
3960 				star = false;
3961 			} else {
3962 				ret = strncpy_from_kernel_nofault(iter->fmt, str,
3963 								  iter->fmt_size);
3964 			}
3965 			if (ret < 0)
3966 				trace_seq_printf(&iter->seq, "(0x%px)", str);
3967 			else
3968 				trace_seq_printf(&iter->seq, "(0x%px:%s)",
3969 						 str, iter->fmt);
3970 			str = "[UNSAFE-MEMORY]";
3971 			strcpy(iter->fmt, "%s");
3972 		} else {
3973 			strncpy(iter->fmt, p + i, j + 1);
3974 			iter->fmt[j+1] = '\0';
3975 		}
3976 		if (star)
3977 			trace_seq_printf(&iter->seq, iter->fmt, len, str);
3978 		else
3979 			trace_seq_printf(&iter->seq, iter->fmt, str);
3980 
3981 		p += i + j + 1;
3982 	}
3983  print:
3984 	if (*p)
3985 		trace_seq_vprintf(&iter->seq, p, ap);
3986 }
3987 
3988 const char *trace_event_format(struct trace_iterator *iter, const char *fmt)
3989 {
3990 	const char *p, *new_fmt;
3991 	char *q;
3992 
3993 	if (WARN_ON_ONCE(!fmt))
3994 		return fmt;
3995 
3996 	if (!iter->tr || iter->tr->trace_flags & TRACE_ITER_HASH_PTR)
3997 		return fmt;
3998 
3999 	p = fmt;
4000 	new_fmt = q = iter->fmt;
4001 	while (*p) {
4002 		if (unlikely(q - new_fmt + 3 > iter->fmt_size)) {
4003 			if (!trace_iter_expand_format(iter))
4004 				return fmt;
4005 
4006 			q += iter->fmt - new_fmt;
4007 			new_fmt = iter->fmt;
4008 		}
4009 
4010 		*q++ = *p++;
4011 
4012 		/* Replace %p with %px */
4013 		if (p[-1] == '%') {
4014 			if (p[0] == '%') {
4015 				*q++ = *p++;
4016 			} else if (p[0] == 'p' && !isalnum(p[1])) {
4017 				*q++ = *p++;
4018 				*q++ = 'x';
4019 			}
4020 		}
4021 	}
4022 	*q = '\0';
4023 
4024 	return new_fmt;
4025 }
4026 
4027 #define STATIC_TEMP_BUF_SIZE	128
4028 static char static_temp_buf[STATIC_TEMP_BUF_SIZE] __aligned(4);
4029 
4030 /* Find the next real entry, without updating the iterator itself */
4031 struct trace_entry *trace_find_next_entry(struct trace_iterator *iter,
4032 					  int *ent_cpu, u64 *ent_ts)
4033 {
4034 	/* __find_next_entry will reset ent_size */
4035 	int ent_size = iter->ent_size;
4036 	struct trace_entry *entry;
4037 
4038 	/*
4039 	 * If called from ftrace_dump(), then the iter->temp buffer
4040 	 * will be the static_temp_buf and not created from kmalloc.
4041 	 * If the entry size is greater than the buffer, we can
4042 	 * not save it. Just return NULL in that case. This is only
4043 	 * used to add markers when two consecutive events' time
4044 	 * stamps have a large delta. See trace_print_lat_context()
4045 	 */
4046 	if (iter->temp == static_temp_buf &&
4047 	    STATIC_TEMP_BUF_SIZE < ent_size)
4048 		return NULL;
4049 
4050 	/*
4051 	 * The __find_next_entry() may call peek_next_entry(), which may
4052 	 * call ring_buffer_peek() that may make the contents of iter->ent
4053 	 * undefined. Need to copy iter->ent now.
4054 	 */
4055 	if (iter->ent && iter->ent != iter->temp) {
4056 		if ((!iter->temp || iter->temp_size < iter->ent_size) &&
4057 		    !WARN_ON_ONCE(iter->temp == static_temp_buf)) {
4058 			void *temp;
4059 			temp = kmalloc(iter->ent_size, GFP_KERNEL);
4060 			if (!temp)
4061 				return NULL;
4062 			kfree(iter->temp);
4063 			iter->temp = temp;
4064 			iter->temp_size = iter->ent_size;
4065 		}
4066 		memcpy(iter->temp, iter->ent, iter->ent_size);
4067 		iter->ent = iter->temp;
4068 	}
4069 	entry = __find_next_entry(iter, ent_cpu, NULL, ent_ts);
4070 	/* Put back the original ent_size */
4071 	iter->ent_size = ent_size;
4072 
4073 	return entry;
4074 }
4075 
4076 /* Find the next real entry, and increment the iterator to the next entry */
4077 void *trace_find_next_entry_inc(struct trace_iterator *iter)
4078 {
4079 	iter->ent = __find_next_entry(iter, &iter->cpu,
4080 				      &iter->lost_events, &iter->ts);
4081 
4082 	if (iter->ent)
4083 		trace_iterator_increment(iter);
4084 
4085 	return iter->ent ? iter : NULL;
4086 }
4087 
4088 static void trace_consume(struct trace_iterator *iter)
4089 {
4090 	ring_buffer_consume(iter->array_buffer->buffer, iter->cpu, &iter->ts,
4091 			    &iter->lost_events);
4092 }
4093 
4094 static void *s_next(struct seq_file *m, void *v, loff_t *pos)
4095 {
4096 	struct trace_iterator *iter = m->private;
4097 	int i = (int)*pos;
4098 	void *ent;
4099 
4100 	WARN_ON_ONCE(iter->leftover);
4101 
4102 	(*pos)++;
4103 
4104 	/* can't go backwards */
4105 	if (iter->idx > i)
4106 		return NULL;
4107 
4108 	if (iter->idx < 0)
4109 		ent = trace_find_next_entry_inc(iter);
4110 	else
4111 		ent = iter;
4112 
4113 	while (ent && iter->idx < i)
4114 		ent = trace_find_next_entry_inc(iter);
4115 
4116 	iter->pos = *pos;
4117 
4118 	return ent;
4119 }
4120 
4121 void tracing_iter_reset(struct trace_iterator *iter, int cpu)
4122 {
4123 	struct ring_buffer_iter *buf_iter;
4124 	unsigned long entries = 0;
4125 	u64 ts;
4126 
4127 	per_cpu_ptr(iter->array_buffer->data, cpu)->skipped_entries = 0;
4128 
4129 	buf_iter = trace_buffer_iter(iter, cpu);
4130 	if (!buf_iter)
4131 		return;
4132 
4133 	ring_buffer_iter_reset(buf_iter);
4134 
4135 	/*
4136 	 * We could have the case with the max latency tracers
4137 	 * that a reset never took place on a cpu. This is evident
4138 	 * by the timestamp being before the start of the buffer.
4139 	 */
4140 	while (ring_buffer_iter_peek(buf_iter, &ts)) {
4141 		if (ts >= iter->array_buffer->time_start)
4142 			break;
4143 		entries++;
4144 		ring_buffer_iter_advance(buf_iter);
4145 	}
4146 
4147 	per_cpu_ptr(iter->array_buffer->data, cpu)->skipped_entries = entries;
4148 }
4149 
4150 /*
4151  * The current tracer is copied to avoid a global locking
4152  * all around.
4153  */
4154 static void *s_start(struct seq_file *m, loff_t *pos)
4155 {
4156 	struct trace_iterator *iter = m->private;
4157 	struct trace_array *tr = iter->tr;
4158 	int cpu_file = iter->cpu_file;
4159 	void *p = NULL;
4160 	loff_t l = 0;
4161 	int cpu;
4162 
4163 	mutex_lock(&trace_types_lock);
4164 	if (unlikely(tr->current_trace != iter->trace)) {
4165 		/* Close iter->trace before switching to the new current tracer */
4166 		if (iter->trace->close)
4167 			iter->trace->close(iter);
4168 		iter->trace = tr->current_trace;
4169 		/* Reopen the new current tracer */
4170 		if (iter->trace->open)
4171 			iter->trace->open(iter);
4172 	}
4173 	mutex_unlock(&trace_types_lock);
4174 
4175 #ifdef CONFIG_TRACER_MAX_TRACE
4176 	if (iter->snapshot && iter->trace->use_max_tr)
4177 		return ERR_PTR(-EBUSY);
4178 #endif
4179 
4180 	if (*pos != iter->pos) {
4181 		iter->ent = NULL;
4182 		iter->cpu = 0;
4183 		iter->idx = -1;
4184 
4185 		if (cpu_file == RING_BUFFER_ALL_CPUS) {
4186 			for_each_tracing_cpu(cpu)
4187 				tracing_iter_reset(iter, cpu);
4188 		} else
4189 			tracing_iter_reset(iter, cpu_file);
4190 
4191 		iter->leftover = 0;
4192 		for (p = iter; p && l < *pos; p = s_next(m, p, &l))
4193 			;
4194 
4195 	} else {
4196 		/*
4197 		 * If we overflowed the seq_file before, then we want
4198 		 * to just reuse the trace_seq buffer again.
4199 		 */
4200 		if (iter->leftover)
4201 			p = iter;
4202 		else {
4203 			l = *pos - 1;
4204 			p = s_next(m, p, &l);
4205 		}
4206 	}
4207 
4208 	trace_event_read_lock();
4209 	trace_access_lock(cpu_file);
4210 	return p;
4211 }
4212 
4213 static void s_stop(struct seq_file *m, void *p)
4214 {
4215 	struct trace_iterator *iter = m->private;
4216 
4217 #ifdef CONFIG_TRACER_MAX_TRACE
4218 	if (iter->snapshot && iter->trace->use_max_tr)
4219 		return;
4220 #endif
4221 
4222 	trace_access_unlock(iter->cpu_file);
4223 	trace_event_read_unlock();
4224 }
4225 
4226 static void
4227 get_total_entries_cpu(struct array_buffer *buf, unsigned long *total,
4228 		      unsigned long *entries, int cpu)
4229 {
4230 	unsigned long count;
4231 
4232 	count = ring_buffer_entries_cpu(buf->buffer, cpu);
4233 	/*
4234 	 * If this buffer has skipped entries, then we hold all
4235 	 * entries for the trace and we need to ignore the
4236 	 * ones before the time stamp.
4237 	 */
4238 	if (per_cpu_ptr(buf->data, cpu)->skipped_entries) {
4239 		count -= per_cpu_ptr(buf->data, cpu)->skipped_entries;
4240 		/* total is the same as the entries */
4241 		*total = count;
4242 	} else
4243 		*total = count +
4244 			ring_buffer_overrun_cpu(buf->buffer, cpu);
4245 	*entries = count;
4246 }
4247 
4248 static void
4249 get_total_entries(struct array_buffer *buf,
4250 		  unsigned long *total, unsigned long *entries)
4251 {
4252 	unsigned long t, e;
4253 	int cpu;
4254 
4255 	*total = 0;
4256 	*entries = 0;
4257 
4258 	for_each_tracing_cpu(cpu) {
4259 		get_total_entries_cpu(buf, &t, &e, cpu);
4260 		*total += t;
4261 		*entries += e;
4262 	}
4263 }
4264 
4265 unsigned long trace_total_entries_cpu(struct trace_array *tr, int cpu)
4266 {
4267 	unsigned long total, entries;
4268 
4269 	if (!tr)
4270 		tr = &global_trace;
4271 
4272 	get_total_entries_cpu(&tr->array_buffer, &total, &entries, cpu);
4273 
4274 	return entries;
4275 }
4276 
4277 unsigned long trace_total_entries(struct trace_array *tr)
4278 {
4279 	unsigned long total, entries;
4280 
4281 	if (!tr)
4282 		tr = &global_trace;
4283 
4284 	get_total_entries(&tr->array_buffer, &total, &entries);
4285 
4286 	return entries;
4287 }
4288 
4289 static void print_lat_help_header(struct seq_file *m)
4290 {
4291 	seq_puts(m, "#                    _------=> CPU#            \n"
4292 		    "#                   / _-----=> irqs-off/BH-disabled\n"
4293 		    "#                  | / _----=> need-resched    \n"
4294 		    "#                  || / _---=> hardirq/softirq \n"
4295 		    "#                  ||| / _--=> preempt-depth   \n"
4296 		    "#                  |||| / _-=> migrate-disable \n"
4297 		    "#                  ||||| /     delay           \n"
4298 		    "#  cmd     pid     |||||| time  |   caller     \n"
4299 		    "#     \\   /        ||||||  \\    |    /       \n");
4300 }
4301 
4302 static void print_event_info(struct array_buffer *buf, struct seq_file *m)
4303 {
4304 	unsigned long total;
4305 	unsigned long entries;
4306 
4307 	get_total_entries(buf, &total, &entries);
4308 	seq_printf(m, "# entries-in-buffer/entries-written: %lu/%lu   #P:%d\n",
4309 		   entries, total, num_online_cpus());
4310 	seq_puts(m, "#\n");
4311 }
4312 
4313 static void print_func_help_header(struct array_buffer *buf, struct seq_file *m,
4314 				   unsigned int flags)
4315 {
4316 	bool tgid = flags & TRACE_ITER_RECORD_TGID;
4317 
4318 	print_event_info(buf, m);
4319 
4320 	seq_printf(m, "#           TASK-PID    %s CPU#     TIMESTAMP  FUNCTION\n", tgid ? "   TGID   " : "");
4321 	seq_printf(m, "#              | |      %s   |         |         |\n",      tgid ? "     |    " : "");
4322 }
4323 
4324 static void print_func_help_header_irq(struct array_buffer *buf, struct seq_file *m,
4325 				       unsigned int flags)
4326 {
4327 	bool tgid = flags & TRACE_ITER_RECORD_TGID;
4328 	static const char space[] = "            ";
4329 	int prec = tgid ? 12 : 2;
4330 
4331 	print_event_info(buf, m);
4332 
4333 	seq_printf(m, "#                            %.*s  _-----=> irqs-off/BH-disabled\n", prec, space);
4334 	seq_printf(m, "#                            %.*s / _----=> need-resched\n", prec, space);
4335 	seq_printf(m, "#                            %.*s| / _---=> hardirq/softirq\n", prec, space);
4336 	seq_printf(m, "#                            %.*s|| / _--=> preempt-depth\n", prec, space);
4337 	seq_printf(m, "#                            %.*s||| / _-=> migrate-disable\n", prec, space);
4338 	seq_printf(m, "#                            %.*s|||| /     delay\n", prec, space);
4339 	seq_printf(m, "#           TASK-PID  %.*s CPU#  |||||  TIMESTAMP  FUNCTION\n", prec, "     TGID   ");
4340 	seq_printf(m, "#              | |    %.*s   |   |||||     |         |\n", prec, "       |    ");
4341 }
4342 
4343 void
4344 print_trace_header(struct seq_file *m, struct trace_iterator *iter)
4345 {
4346 	unsigned long sym_flags = (global_trace.trace_flags & TRACE_ITER_SYM_MASK);
4347 	struct array_buffer *buf = iter->array_buffer;
4348 	struct trace_array_cpu *data = per_cpu_ptr(buf->data, buf->cpu);
4349 	struct tracer *type = iter->trace;
4350 	unsigned long entries;
4351 	unsigned long total;
4352 	const char *name = type->name;
4353 
4354 	get_total_entries(buf, &total, &entries);
4355 
4356 	seq_printf(m, "# %s latency trace v1.1.5 on %s\n",
4357 		   name, UTS_RELEASE);
4358 	seq_puts(m, "# -----------------------------------"
4359 		 "---------------------------------\n");
4360 	seq_printf(m, "# latency: %lu us, #%lu/%lu, CPU#%d |"
4361 		   " (M:%s VP:%d, KP:%d, SP:%d HP:%d",
4362 		   nsecs_to_usecs(data->saved_latency),
4363 		   entries,
4364 		   total,
4365 		   buf->cpu,
4366 		   preempt_model_none()      ? "server" :
4367 		   preempt_model_voluntary() ? "desktop" :
4368 		   preempt_model_full()      ? "preempt" :
4369 		   preempt_model_rt()        ? "preempt_rt" :
4370 		   "unknown",
4371 		   /* These are reserved for later use */
4372 		   0, 0, 0, 0);
4373 #ifdef CONFIG_SMP
4374 	seq_printf(m, " #P:%d)\n", num_online_cpus());
4375 #else
4376 	seq_puts(m, ")\n");
4377 #endif
4378 	seq_puts(m, "#    -----------------\n");
4379 	seq_printf(m, "#    | task: %.16s-%d "
4380 		   "(uid:%d nice:%ld policy:%ld rt_prio:%ld)\n",
4381 		   data->comm, data->pid,
4382 		   from_kuid_munged(seq_user_ns(m), data->uid), data->nice,
4383 		   data->policy, data->rt_priority);
4384 	seq_puts(m, "#    -----------------\n");
4385 
4386 	if (data->critical_start) {
4387 		seq_puts(m, "#  => started at: ");
4388 		seq_print_ip_sym(&iter->seq, data->critical_start, sym_flags);
4389 		trace_print_seq(m, &iter->seq);
4390 		seq_puts(m, "\n#  => ended at:   ");
4391 		seq_print_ip_sym(&iter->seq, data->critical_end, sym_flags);
4392 		trace_print_seq(m, &iter->seq);
4393 		seq_puts(m, "\n#\n");
4394 	}
4395 
4396 	seq_puts(m, "#\n");
4397 }
4398 
4399 static void test_cpu_buff_start(struct trace_iterator *iter)
4400 {
4401 	struct trace_seq *s = &iter->seq;
4402 	struct trace_array *tr = iter->tr;
4403 
4404 	if (!(tr->trace_flags & TRACE_ITER_ANNOTATE))
4405 		return;
4406 
4407 	if (!(iter->iter_flags & TRACE_FILE_ANNOTATE))
4408 		return;
4409 
4410 	if (cpumask_available(iter->started) &&
4411 	    cpumask_test_cpu(iter->cpu, iter->started))
4412 		return;
4413 
4414 	if (per_cpu_ptr(iter->array_buffer->data, iter->cpu)->skipped_entries)
4415 		return;
4416 
4417 	if (cpumask_available(iter->started))
4418 		cpumask_set_cpu(iter->cpu, iter->started);
4419 
4420 	/* Don't print started cpu buffer for the first entry of the trace */
4421 	if (iter->idx > 1)
4422 		trace_seq_printf(s, "##### CPU %u buffer started ####\n",
4423 				iter->cpu);
4424 }
4425 
4426 static enum print_line_t print_trace_fmt(struct trace_iterator *iter)
4427 {
4428 	struct trace_array *tr = iter->tr;
4429 	struct trace_seq *s = &iter->seq;
4430 	unsigned long sym_flags = (tr->trace_flags & TRACE_ITER_SYM_MASK);
4431 	struct trace_entry *entry;
4432 	struct trace_event *event;
4433 
4434 	entry = iter->ent;
4435 
4436 	test_cpu_buff_start(iter);
4437 
4438 	event = ftrace_find_event(entry->type);
4439 
4440 	if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
4441 		if (iter->iter_flags & TRACE_FILE_LAT_FMT)
4442 			trace_print_lat_context(iter);
4443 		else
4444 			trace_print_context(iter);
4445 	}
4446 
4447 	if (trace_seq_has_overflowed(s))
4448 		return TRACE_TYPE_PARTIAL_LINE;
4449 
4450 	if (event) {
4451 		if (tr->trace_flags & TRACE_ITER_FIELDS)
4452 			return print_event_fields(iter, event);
4453 		return event->funcs->trace(iter, sym_flags, event);
4454 	}
4455 
4456 	trace_seq_printf(s, "Unknown type %d\n", entry->type);
4457 
4458 	return trace_handle_return(s);
4459 }
4460 
4461 static enum print_line_t print_raw_fmt(struct trace_iterator *iter)
4462 {
4463 	struct trace_array *tr = iter->tr;
4464 	struct trace_seq *s = &iter->seq;
4465 	struct trace_entry *entry;
4466 	struct trace_event *event;
4467 
4468 	entry = iter->ent;
4469 
4470 	if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO)
4471 		trace_seq_printf(s, "%d %d %llu ",
4472 				 entry->pid, iter->cpu, iter->ts);
4473 
4474 	if (trace_seq_has_overflowed(s))
4475 		return TRACE_TYPE_PARTIAL_LINE;
4476 
4477 	event = ftrace_find_event(entry->type);
4478 	if (event)
4479 		return event->funcs->raw(iter, 0, event);
4480 
4481 	trace_seq_printf(s, "%d ?\n", entry->type);
4482 
4483 	return trace_handle_return(s);
4484 }
4485 
4486 static enum print_line_t print_hex_fmt(struct trace_iterator *iter)
4487 {
4488 	struct trace_array *tr = iter->tr;
4489 	struct trace_seq *s = &iter->seq;
4490 	unsigned char newline = '\n';
4491 	struct trace_entry *entry;
4492 	struct trace_event *event;
4493 
4494 	entry = iter->ent;
4495 
4496 	if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
4497 		SEQ_PUT_HEX_FIELD(s, entry->pid);
4498 		SEQ_PUT_HEX_FIELD(s, iter->cpu);
4499 		SEQ_PUT_HEX_FIELD(s, iter->ts);
4500 		if (trace_seq_has_overflowed(s))
4501 			return TRACE_TYPE_PARTIAL_LINE;
4502 	}
4503 
4504 	event = ftrace_find_event(entry->type);
4505 	if (event) {
4506 		enum print_line_t ret = event->funcs->hex(iter, 0, event);
4507 		if (ret != TRACE_TYPE_HANDLED)
4508 			return ret;
4509 	}
4510 
4511 	SEQ_PUT_FIELD(s, newline);
4512 
4513 	return trace_handle_return(s);
4514 }
4515 
4516 static enum print_line_t print_bin_fmt(struct trace_iterator *iter)
4517 {
4518 	struct trace_array *tr = iter->tr;
4519 	struct trace_seq *s = &iter->seq;
4520 	struct trace_entry *entry;
4521 	struct trace_event *event;
4522 
4523 	entry = iter->ent;
4524 
4525 	if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
4526 		SEQ_PUT_FIELD(s, entry->pid);
4527 		SEQ_PUT_FIELD(s, iter->cpu);
4528 		SEQ_PUT_FIELD(s, iter->ts);
4529 		if (trace_seq_has_overflowed(s))
4530 			return TRACE_TYPE_PARTIAL_LINE;
4531 	}
4532 
4533 	event = ftrace_find_event(entry->type);
4534 	return event ? event->funcs->binary(iter, 0, event) :
4535 		TRACE_TYPE_HANDLED;
4536 }
4537 
4538 int trace_empty(struct trace_iterator *iter)
4539 {
4540 	struct ring_buffer_iter *buf_iter;
4541 	int cpu;
4542 
4543 	/* If we are looking at one CPU buffer, only check that one */
4544 	if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
4545 		cpu = iter->cpu_file;
4546 		buf_iter = trace_buffer_iter(iter, cpu);
4547 		if (buf_iter) {
4548 			if (!ring_buffer_iter_empty(buf_iter))
4549 				return 0;
4550 		} else {
4551 			if (!ring_buffer_empty_cpu(iter->array_buffer->buffer, cpu))
4552 				return 0;
4553 		}
4554 		return 1;
4555 	}
4556 
4557 	for_each_tracing_cpu(cpu) {
4558 		buf_iter = trace_buffer_iter(iter, cpu);
4559 		if (buf_iter) {
4560 			if (!ring_buffer_iter_empty(buf_iter))
4561 				return 0;
4562 		} else {
4563 			if (!ring_buffer_empty_cpu(iter->array_buffer->buffer, cpu))
4564 				return 0;
4565 		}
4566 	}
4567 
4568 	return 1;
4569 }
4570 
4571 /*  Called with trace_event_read_lock() held. */
4572 enum print_line_t print_trace_line(struct trace_iterator *iter)
4573 {
4574 	struct trace_array *tr = iter->tr;
4575 	unsigned long trace_flags = tr->trace_flags;
4576 	enum print_line_t ret;
4577 
4578 	if (iter->lost_events) {
4579 		if (iter->lost_events == (unsigned long)-1)
4580 			trace_seq_printf(&iter->seq, "CPU:%d [LOST EVENTS]\n",
4581 					 iter->cpu);
4582 		else
4583 			trace_seq_printf(&iter->seq, "CPU:%d [LOST %lu EVENTS]\n",
4584 					 iter->cpu, iter->lost_events);
4585 		if (trace_seq_has_overflowed(&iter->seq))
4586 			return TRACE_TYPE_PARTIAL_LINE;
4587 	}
4588 
4589 	if (iter->trace && iter->trace->print_line) {
4590 		ret = iter->trace->print_line(iter);
4591 		if (ret != TRACE_TYPE_UNHANDLED)
4592 			return ret;
4593 	}
4594 
4595 	if (iter->ent->type == TRACE_BPUTS &&
4596 			trace_flags & TRACE_ITER_PRINTK &&
4597 			trace_flags & TRACE_ITER_PRINTK_MSGONLY)
4598 		return trace_print_bputs_msg_only(iter);
4599 
4600 	if (iter->ent->type == TRACE_BPRINT &&
4601 			trace_flags & TRACE_ITER_PRINTK &&
4602 			trace_flags & TRACE_ITER_PRINTK_MSGONLY)
4603 		return trace_print_bprintk_msg_only(iter);
4604 
4605 	if (iter->ent->type == TRACE_PRINT &&
4606 			trace_flags & TRACE_ITER_PRINTK &&
4607 			trace_flags & TRACE_ITER_PRINTK_MSGONLY)
4608 		return trace_print_printk_msg_only(iter);
4609 
4610 	if (trace_flags & TRACE_ITER_BIN)
4611 		return print_bin_fmt(iter);
4612 
4613 	if (trace_flags & TRACE_ITER_HEX)
4614 		return print_hex_fmt(iter);
4615 
4616 	if (trace_flags & TRACE_ITER_RAW)
4617 		return print_raw_fmt(iter);
4618 
4619 	return print_trace_fmt(iter);
4620 }
4621 
4622 void trace_latency_header(struct seq_file *m)
4623 {
4624 	struct trace_iterator *iter = m->private;
4625 	struct trace_array *tr = iter->tr;
4626 
4627 	/* print nothing if the buffers are empty */
4628 	if (trace_empty(iter))
4629 		return;
4630 
4631 	if (iter->iter_flags & TRACE_FILE_LAT_FMT)
4632 		print_trace_header(m, iter);
4633 
4634 	if (!(tr->trace_flags & TRACE_ITER_VERBOSE))
4635 		print_lat_help_header(m);
4636 }
4637 
4638 void trace_default_header(struct seq_file *m)
4639 {
4640 	struct trace_iterator *iter = m->private;
4641 	struct trace_array *tr = iter->tr;
4642 	unsigned long trace_flags = tr->trace_flags;
4643 
4644 	if (!(trace_flags & TRACE_ITER_CONTEXT_INFO))
4645 		return;
4646 
4647 	if (iter->iter_flags & TRACE_FILE_LAT_FMT) {
4648 		/* print nothing if the buffers are empty */
4649 		if (trace_empty(iter))
4650 			return;
4651 		print_trace_header(m, iter);
4652 		if (!(trace_flags & TRACE_ITER_VERBOSE))
4653 			print_lat_help_header(m);
4654 	} else {
4655 		if (!(trace_flags & TRACE_ITER_VERBOSE)) {
4656 			if (trace_flags & TRACE_ITER_IRQ_INFO)
4657 				print_func_help_header_irq(iter->array_buffer,
4658 							   m, trace_flags);
4659 			else
4660 				print_func_help_header(iter->array_buffer, m,
4661 						       trace_flags);
4662 		}
4663 	}
4664 }
4665 
4666 static void test_ftrace_alive(struct seq_file *m)
4667 {
4668 	if (!ftrace_is_dead())
4669 		return;
4670 	seq_puts(m, "# WARNING: FUNCTION TRACING IS CORRUPTED\n"
4671 		    "#          MAY BE MISSING FUNCTION EVENTS\n");
4672 }
4673 
4674 #ifdef CONFIG_TRACER_MAX_TRACE
4675 static void show_snapshot_main_help(struct seq_file *m)
4676 {
4677 	seq_puts(m, "# echo 0 > snapshot : Clears and frees snapshot buffer\n"
4678 		    "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
4679 		    "#                      Takes a snapshot of the main buffer.\n"
4680 		    "# echo 2 > snapshot : Clears snapshot buffer (but does not allocate or free)\n"
4681 		    "#                      (Doesn't have to be '2' works with any number that\n"
4682 		    "#                       is not a '0' or '1')\n");
4683 }
4684 
4685 static void show_snapshot_percpu_help(struct seq_file *m)
4686 {
4687 	seq_puts(m, "# echo 0 > snapshot : Invalid for per_cpu snapshot file.\n");
4688 #ifdef CONFIG_RING_BUFFER_ALLOW_SWAP
4689 	seq_puts(m, "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
4690 		    "#                      Takes a snapshot of the main buffer for this cpu.\n");
4691 #else
4692 	seq_puts(m, "# echo 1 > snapshot : Not supported with this kernel.\n"
4693 		    "#                     Must use main snapshot file to allocate.\n");
4694 #endif
4695 	seq_puts(m, "# echo 2 > snapshot : Clears this cpu's snapshot buffer (but does not allocate)\n"
4696 		    "#                      (Doesn't have to be '2' works with any number that\n"
4697 		    "#                       is not a '0' or '1')\n");
4698 }
4699 
4700 static void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter)
4701 {
4702 	if (iter->tr->allocated_snapshot)
4703 		seq_puts(m, "#\n# * Snapshot is allocated *\n#\n");
4704 	else
4705 		seq_puts(m, "#\n# * Snapshot is freed *\n#\n");
4706 
4707 	seq_puts(m, "# Snapshot commands:\n");
4708 	if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
4709 		show_snapshot_main_help(m);
4710 	else
4711 		show_snapshot_percpu_help(m);
4712 }
4713 #else
4714 /* Should never be called */
4715 static inline void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter) { }
4716 #endif
4717 
4718 static int s_show(struct seq_file *m, void *v)
4719 {
4720 	struct trace_iterator *iter = v;
4721 	int ret;
4722 
4723 	if (iter->ent == NULL) {
4724 		if (iter->tr) {
4725 			seq_printf(m, "# tracer: %s\n", iter->trace->name);
4726 			seq_puts(m, "#\n");
4727 			test_ftrace_alive(m);
4728 		}
4729 		if (iter->snapshot && trace_empty(iter))
4730 			print_snapshot_help(m, iter);
4731 		else if (iter->trace && iter->trace->print_header)
4732 			iter->trace->print_header(m);
4733 		else
4734 			trace_default_header(m);
4735 
4736 	} else if (iter->leftover) {
4737 		/*
4738 		 * If we filled the seq_file buffer earlier, we
4739 		 * want to just show it now.
4740 		 */
4741 		ret = trace_print_seq(m, &iter->seq);
4742 
4743 		/* ret should this time be zero, but you never know */
4744 		iter->leftover = ret;
4745 
4746 	} else {
4747 		ret = print_trace_line(iter);
4748 		if (ret == TRACE_TYPE_PARTIAL_LINE) {
4749 			iter->seq.full = 0;
4750 			trace_seq_puts(&iter->seq, "[LINE TOO BIG]\n");
4751 		}
4752 		ret = trace_print_seq(m, &iter->seq);
4753 		/*
4754 		 * If we overflow the seq_file buffer, then it will
4755 		 * ask us for this data again at start up.
4756 		 * Use that instead.
4757 		 *  ret is 0 if seq_file write succeeded.
4758 		 *        -1 otherwise.
4759 		 */
4760 		iter->leftover = ret;
4761 	}
4762 
4763 	return 0;
4764 }
4765 
4766 /*
4767  * Should be used after trace_array_get(), trace_types_lock
4768  * ensures that i_cdev was already initialized.
4769  */
4770 static inline int tracing_get_cpu(struct inode *inode)
4771 {
4772 	if (inode->i_cdev) /* See trace_create_cpu_file() */
4773 		return (long)inode->i_cdev - 1;
4774 	return RING_BUFFER_ALL_CPUS;
4775 }
4776 
4777 static const struct seq_operations tracer_seq_ops = {
4778 	.start		= s_start,
4779 	.next		= s_next,
4780 	.stop		= s_stop,
4781 	.show		= s_show,
4782 };
4783 
4784 /*
4785  * Note, as iter itself can be allocated and freed in different
4786  * ways, this function is only used to free its content, and not
4787  * the iterator itself. The only requirement to all the allocations
4788  * is that it must zero all fields (kzalloc), as freeing works with
4789  * ethier allocated content or NULL.
4790  */
4791 static void free_trace_iter_content(struct trace_iterator *iter)
4792 {
4793 	/* The fmt is either NULL, allocated or points to static_fmt_buf */
4794 	if (iter->fmt != static_fmt_buf)
4795 		kfree(iter->fmt);
4796 
4797 	kfree(iter->temp);
4798 	kfree(iter->buffer_iter);
4799 	mutex_destroy(&iter->mutex);
4800 	free_cpumask_var(iter->started);
4801 }
4802 
4803 static struct trace_iterator *
4804 __tracing_open(struct inode *inode, struct file *file, bool snapshot)
4805 {
4806 	struct trace_array *tr = inode->i_private;
4807 	struct trace_iterator *iter;
4808 	int cpu;
4809 
4810 	if (tracing_disabled)
4811 		return ERR_PTR(-ENODEV);
4812 
4813 	iter = __seq_open_private(file, &tracer_seq_ops, sizeof(*iter));
4814 	if (!iter)
4815 		return ERR_PTR(-ENOMEM);
4816 
4817 	iter->buffer_iter = kcalloc(nr_cpu_ids, sizeof(*iter->buffer_iter),
4818 				    GFP_KERNEL);
4819 	if (!iter->buffer_iter)
4820 		goto release;
4821 
4822 	/*
4823 	 * trace_find_next_entry() may need to save off iter->ent.
4824 	 * It will place it into the iter->temp buffer. As most
4825 	 * events are less than 128, allocate a buffer of that size.
4826 	 * If one is greater, then trace_find_next_entry() will
4827 	 * allocate a new buffer to adjust for the bigger iter->ent.
4828 	 * It's not critical if it fails to get allocated here.
4829 	 */
4830 	iter->temp = kmalloc(128, GFP_KERNEL);
4831 	if (iter->temp)
4832 		iter->temp_size = 128;
4833 
4834 	/*
4835 	 * trace_event_printf() may need to modify given format
4836 	 * string to replace %p with %px so that it shows real address
4837 	 * instead of hash value. However, that is only for the event
4838 	 * tracing, other tracer may not need. Defer the allocation
4839 	 * until it is needed.
4840 	 */
4841 	iter->fmt = NULL;
4842 	iter->fmt_size = 0;
4843 
4844 	mutex_lock(&trace_types_lock);
4845 	iter->trace = tr->current_trace;
4846 
4847 	if (!zalloc_cpumask_var(&iter->started, GFP_KERNEL))
4848 		goto fail;
4849 
4850 	iter->tr = tr;
4851 
4852 #ifdef CONFIG_TRACER_MAX_TRACE
4853 	/* Currently only the top directory has a snapshot */
4854 	if (tr->current_trace->print_max || snapshot)
4855 		iter->array_buffer = &tr->max_buffer;
4856 	else
4857 #endif
4858 		iter->array_buffer = &tr->array_buffer;
4859 	iter->snapshot = snapshot;
4860 	iter->pos = -1;
4861 	iter->cpu_file = tracing_get_cpu(inode);
4862 	mutex_init(&iter->mutex);
4863 
4864 	/* Notify the tracer early; before we stop tracing. */
4865 	if (iter->trace->open)
4866 		iter->trace->open(iter);
4867 
4868 	/* Annotate start of buffers if we had overruns */
4869 	if (ring_buffer_overruns(iter->array_buffer->buffer))
4870 		iter->iter_flags |= TRACE_FILE_ANNOTATE;
4871 
4872 	/* Output in nanoseconds only if we are using a clock in nanoseconds. */
4873 	if (trace_clocks[tr->clock_id].in_ns)
4874 		iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
4875 
4876 	/*
4877 	 * If pause-on-trace is enabled, then stop the trace while
4878 	 * dumping, unless this is the "snapshot" file
4879 	 */
4880 	if (!iter->snapshot && (tr->trace_flags & TRACE_ITER_PAUSE_ON_TRACE))
4881 		tracing_stop_tr(tr);
4882 
4883 	if (iter->cpu_file == RING_BUFFER_ALL_CPUS) {
4884 		for_each_tracing_cpu(cpu) {
4885 			iter->buffer_iter[cpu] =
4886 				ring_buffer_read_prepare(iter->array_buffer->buffer,
4887 							 cpu, GFP_KERNEL);
4888 		}
4889 		ring_buffer_read_prepare_sync();
4890 		for_each_tracing_cpu(cpu) {
4891 			ring_buffer_read_start(iter->buffer_iter[cpu]);
4892 			tracing_iter_reset(iter, cpu);
4893 		}
4894 	} else {
4895 		cpu = iter->cpu_file;
4896 		iter->buffer_iter[cpu] =
4897 			ring_buffer_read_prepare(iter->array_buffer->buffer,
4898 						 cpu, GFP_KERNEL);
4899 		ring_buffer_read_prepare_sync();
4900 		ring_buffer_read_start(iter->buffer_iter[cpu]);
4901 		tracing_iter_reset(iter, cpu);
4902 	}
4903 
4904 	mutex_unlock(&trace_types_lock);
4905 
4906 	return iter;
4907 
4908  fail:
4909 	mutex_unlock(&trace_types_lock);
4910 	free_trace_iter_content(iter);
4911 release:
4912 	seq_release_private(inode, file);
4913 	return ERR_PTR(-ENOMEM);
4914 }
4915 
4916 int tracing_open_generic(struct inode *inode, struct file *filp)
4917 {
4918 	int ret;
4919 
4920 	ret = tracing_check_open_get_tr(NULL);
4921 	if (ret)
4922 		return ret;
4923 
4924 	filp->private_data = inode->i_private;
4925 	return 0;
4926 }
4927 
4928 bool tracing_is_disabled(void)
4929 {
4930 	return (tracing_disabled) ? true: false;
4931 }
4932 
4933 /*
4934  * Open and update trace_array ref count.
4935  * Must have the current trace_array passed to it.
4936  */
4937 int tracing_open_generic_tr(struct inode *inode, struct file *filp)
4938 {
4939 	struct trace_array *tr = inode->i_private;
4940 	int ret;
4941 
4942 	ret = tracing_check_open_get_tr(tr);
4943 	if (ret)
4944 		return ret;
4945 
4946 	filp->private_data = inode->i_private;
4947 
4948 	return 0;
4949 }
4950 
4951 /*
4952  * The private pointer of the inode is the trace_event_file.
4953  * Update the tr ref count associated to it.
4954  */
4955 int tracing_open_file_tr(struct inode *inode, struct file *filp)
4956 {
4957 	struct trace_event_file *file = inode->i_private;
4958 	int ret;
4959 
4960 	ret = tracing_check_open_get_tr(file->tr);
4961 	if (ret)
4962 		return ret;
4963 
4964 	mutex_lock(&event_mutex);
4965 
4966 	/* Fail if the file is marked for removal */
4967 	if (file->flags & EVENT_FILE_FL_FREED) {
4968 		trace_array_put(file->tr);
4969 		ret = -ENODEV;
4970 	} else {
4971 		event_file_get(file);
4972 	}
4973 
4974 	mutex_unlock(&event_mutex);
4975 	if (ret)
4976 		return ret;
4977 
4978 	filp->private_data = inode->i_private;
4979 
4980 	return 0;
4981 }
4982 
4983 int tracing_release_file_tr(struct inode *inode, struct file *filp)
4984 {
4985 	struct trace_event_file *file = inode->i_private;
4986 
4987 	trace_array_put(file->tr);
4988 	event_file_put(file);
4989 
4990 	return 0;
4991 }
4992 
4993 int tracing_single_release_file_tr(struct inode *inode, struct file *filp)
4994 {
4995 	tracing_release_file_tr(inode, filp);
4996 	return single_release(inode, filp);
4997 }
4998 
4999 static int tracing_mark_open(struct inode *inode, struct file *filp)
5000 {
5001 	stream_open(inode, filp);
5002 	return tracing_open_generic_tr(inode, filp);
5003 }
5004 
5005 static int tracing_release(struct inode *inode, struct file *file)
5006 {
5007 	struct trace_array *tr = inode->i_private;
5008 	struct seq_file *m = file->private_data;
5009 	struct trace_iterator *iter;
5010 	int cpu;
5011 
5012 	if (!(file->f_mode & FMODE_READ)) {
5013 		trace_array_put(tr);
5014 		return 0;
5015 	}
5016 
5017 	/* Writes do not use seq_file */
5018 	iter = m->private;
5019 	mutex_lock(&trace_types_lock);
5020 
5021 	for_each_tracing_cpu(cpu) {
5022 		if (iter->buffer_iter[cpu])
5023 			ring_buffer_read_finish(iter->buffer_iter[cpu]);
5024 	}
5025 
5026 	if (iter->trace && iter->trace->close)
5027 		iter->trace->close(iter);
5028 
5029 	if (!iter->snapshot && tr->stop_count)
5030 		/* reenable tracing if it was previously enabled */
5031 		tracing_start_tr(tr);
5032 
5033 	__trace_array_put(tr);
5034 
5035 	mutex_unlock(&trace_types_lock);
5036 
5037 	free_trace_iter_content(iter);
5038 	seq_release_private(inode, file);
5039 
5040 	return 0;
5041 }
5042 
5043 int tracing_release_generic_tr(struct inode *inode, struct file *file)
5044 {
5045 	struct trace_array *tr = inode->i_private;
5046 
5047 	trace_array_put(tr);
5048 	return 0;
5049 }
5050 
5051 static int tracing_single_release_tr(struct inode *inode, struct file *file)
5052 {
5053 	struct trace_array *tr = inode->i_private;
5054 
5055 	trace_array_put(tr);
5056 
5057 	return single_release(inode, file);
5058 }
5059 
5060 static int tracing_open(struct inode *inode, struct file *file)
5061 {
5062 	struct trace_array *tr = inode->i_private;
5063 	struct trace_iterator *iter;
5064 	int ret;
5065 
5066 	ret = tracing_check_open_get_tr(tr);
5067 	if (ret)
5068 		return ret;
5069 
5070 	/* If this file was open for write, then erase contents */
5071 	if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC)) {
5072 		int cpu = tracing_get_cpu(inode);
5073 		struct array_buffer *trace_buf = &tr->array_buffer;
5074 
5075 #ifdef CONFIG_TRACER_MAX_TRACE
5076 		if (tr->current_trace->print_max)
5077 			trace_buf = &tr->max_buffer;
5078 #endif
5079 
5080 		if (cpu == RING_BUFFER_ALL_CPUS)
5081 			tracing_reset_online_cpus(trace_buf);
5082 		else
5083 			tracing_reset_cpu(trace_buf, cpu);
5084 	}
5085 
5086 	if (file->f_mode & FMODE_READ) {
5087 		iter = __tracing_open(inode, file, false);
5088 		if (IS_ERR(iter))
5089 			ret = PTR_ERR(iter);
5090 		else if (tr->trace_flags & TRACE_ITER_LATENCY_FMT)
5091 			iter->iter_flags |= TRACE_FILE_LAT_FMT;
5092 	}
5093 
5094 	if (ret < 0)
5095 		trace_array_put(tr);
5096 
5097 	return ret;
5098 }
5099 
5100 /*
5101  * Some tracers are not suitable for instance buffers.
5102  * A tracer is always available for the global array (toplevel)
5103  * or if it explicitly states that it is.
5104  */
5105 static bool
5106 trace_ok_for_array(struct tracer *t, struct trace_array *tr)
5107 {
5108 	return (tr->flags & TRACE_ARRAY_FL_GLOBAL) || t->allow_instances;
5109 }
5110 
5111 /* Find the next tracer that this trace array may use */
5112 static struct tracer *
5113 get_tracer_for_array(struct trace_array *tr, struct tracer *t)
5114 {
5115 	while (t && !trace_ok_for_array(t, tr))
5116 		t = t->next;
5117 
5118 	return t;
5119 }
5120 
5121 static void *
5122 t_next(struct seq_file *m, void *v, loff_t *pos)
5123 {
5124 	struct trace_array *tr = m->private;
5125 	struct tracer *t = v;
5126 
5127 	(*pos)++;
5128 
5129 	if (t)
5130 		t = get_tracer_for_array(tr, t->next);
5131 
5132 	return t;
5133 }
5134 
5135 static void *t_start(struct seq_file *m, loff_t *pos)
5136 {
5137 	struct trace_array *tr = m->private;
5138 	struct tracer *t;
5139 	loff_t l = 0;
5140 
5141 	mutex_lock(&trace_types_lock);
5142 
5143 	t = get_tracer_for_array(tr, trace_types);
5144 	for (; t && l < *pos; t = t_next(m, t, &l))
5145 			;
5146 
5147 	return t;
5148 }
5149 
5150 static void t_stop(struct seq_file *m, void *p)
5151 {
5152 	mutex_unlock(&trace_types_lock);
5153 }
5154 
5155 static int t_show(struct seq_file *m, void *v)
5156 {
5157 	struct tracer *t = v;
5158 
5159 	if (!t)
5160 		return 0;
5161 
5162 	seq_puts(m, t->name);
5163 	if (t->next)
5164 		seq_putc(m, ' ');
5165 	else
5166 		seq_putc(m, '\n');
5167 
5168 	return 0;
5169 }
5170 
5171 static const struct seq_operations show_traces_seq_ops = {
5172 	.start		= t_start,
5173 	.next		= t_next,
5174 	.stop		= t_stop,
5175 	.show		= t_show,
5176 };
5177 
5178 static int show_traces_open(struct inode *inode, struct file *file)
5179 {
5180 	struct trace_array *tr = inode->i_private;
5181 	struct seq_file *m;
5182 	int ret;
5183 
5184 	ret = tracing_check_open_get_tr(tr);
5185 	if (ret)
5186 		return ret;
5187 
5188 	ret = seq_open(file, &show_traces_seq_ops);
5189 	if (ret) {
5190 		trace_array_put(tr);
5191 		return ret;
5192 	}
5193 
5194 	m = file->private_data;
5195 	m->private = tr;
5196 
5197 	return 0;
5198 }
5199 
5200 static int show_traces_release(struct inode *inode, struct file *file)
5201 {
5202 	struct trace_array *tr = inode->i_private;
5203 
5204 	trace_array_put(tr);
5205 	return seq_release(inode, file);
5206 }
5207 
5208 static ssize_t
5209 tracing_write_stub(struct file *filp, const char __user *ubuf,
5210 		   size_t count, loff_t *ppos)
5211 {
5212 	return count;
5213 }
5214 
5215 loff_t tracing_lseek(struct file *file, loff_t offset, int whence)
5216 {
5217 	int ret;
5218 
5219 	if (file->f_mode & FMODE_READ)
5220 		ret = seq_lseek(file, offset, whence);
5221 	else
5222 		file->f_pos = ret = 0;
5223 
5224 	return ret;
5225 }
5226 
5227 static const struct file_operations tracing_fops = {
5228 	.open		= tracing_open,
5229 	.read		= seq_read,
5230 	.read_iter	= seq_read_iter,
5231 	.splice_read	= copy_splice_read,
5232 	.write		= tracing_write_stub,
5233 	.llseek		= tracing_lseek,
5234 	.release	= tracing_release,
5235 };
5236 
5237 static const struct file_operations show_traces_fops = {
5238 	.open		= show_traces_open,
5239 	.read		= seq_read,
5240 	.llseek		= seq_lseek,
5241 	.release	= show_traces_release,
5242 };
5243 
5244 static ssize_t
5245 tracing_cpumask_read(struct file *filp, char __user *ubuf,
5246 		     size_t count, loff_t *ppos)
5247 {
5248 	struct trace_array *tr = file_inode(filp)->i_private;
5249 	char *mask_str;
5250 	int len;
5251 
5252 	len = snprintf(NULL, 0, "%*pb\n",
5253 		       cpumask_pr_args(tr->tracing_cpumask)) + 1;
5254 	mask_str = kmalloc(len, GFP_KERNEL);
5255 	if (!mask_str)
5256 		return -ENOMEM;
5257 
5258 	len = snprintf(mask_str, len, "%*pb\n",
5259 		       cpumask_pr_args(tr->tracing_cpumask));
5260 	if (len >= count) {
5261 		count = -EINVAL;
5262 		goto out_err;
5263 	}
5264 	count = simple_read_from_buffer(ubuf, count, ppos, mask_str, len);
5265 
5266 out_err:
5267 	kfree(mask_str);
5268 
5269 	return count;
5270 }
5271 
5272 int tracing_set_cpumask(struct trace_array *tr,
5273 			cpumask_var_t tracing_cpumask_new)
5274 {
5275 	int cpu;
5276 
5277 	if (!tr)
5278 		return -EINVAL;
5279 
5280 	local_irq_disable();
5281 	arch_spin_lock(&tr->max_lock);
5282 	for_each_tracing_cpu(cpu) {
5283 		/*
5284 		 * Increase/decrease the disabled counter if we are
5285 		 * about to flip a bit in the cpumask:
5286 		 */
5287 		if (cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
5288 				!cpumask_test_cpu(cpu, tracing_cpumask_new)) {
5289 			atomic_inc(&per_cpu_ptr(tr->array_buffer.data, cpu)->disabled);
5290 			ring_buffer_record_disable_cpu(tr->array_buffer.buffer, cpu);
5291 #ifdef CONFIG_TRACER_MAX_TRACE
5292 			ring_buffer_record_disable_cpu(tr->max_buffer.buffer, cpu);
5293 #endif
5294 		}
5295 		if (!cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
5296 				cpumask_test_cpu(cpu, tracing_cpumask_new)) {
5297 			atomic_dec(&per_cpu_ptr(tr->array_buffer.data, cpu)->disabled);
5298 			ring_buffer_record_enable_cpu(tr->array_buffer.buffer, cpu);
5299 #ifdef CONFIG_TRACER_MAX_TRACE
5300 			ring_buffer_record_enable_cpu(tr->max_buffer.buffer, cpu);
5301 #endif
5302 		}
5303 	}
5304 	arch_spin_unlock(&tr->max_lock);
5305 	local_irq_enable();
5306 
5307 	cpumask_copy(tr->tracing_cpumask, tracing_cpumask_new);
5308 
5309 	return 0;
5310 }
5311 
5312 static ssize_t
5313 tracing_cpumask_write(struct file *filp, const char __user *ubuf,
5314 		      size_t count, loff_t *ppos)
5315 {
5316 	struct trace_array *tr = file_inode(filp)->i_private;
5317 	cpumask_var_t tracing_cpumask_new;
5318 	int err;
5319 
5320 	if (!zalloc_cpumask_var(&tracing_cpumask_new, GFP_KERNEL))
5321 		return -ENOMEM;
5322 
5323 	err = cpumask_parse_user(ubuf, count, tracing_cpumask_new);
5324 	if (err)
5325 		goto err_free;
5326 
5327 	err = tracing_set_cpumask(tr, tracing_cpumask_new);
5328 	if (err)
5329 		goto err_free;
5330 
5331 	free_cpumask_var(tracing_cpumask_new);
5332 
5333 	return count;
5334 
5335 err_free:
5336 	free_cpumask_var(tracing_cpumask_new);
5337 
5338 	return err;
5339 }
5340 
5341 static const struct file_operations tracing_cpumask_fops = {
5342 	.open		= tracing_open_generic_tr,
5343 	.read		= tracing_cpumask_read,
5344 	.write		= tracing_cpumask_write,
5345 	.release	= tracing_release_generic_tr,
5346 	.llseek		= generic_file_llseek,
5347 };
5348 
5349 static int tracing_trace_options_show(struct seq_file *m, void *v)
5350 {
5351 	struct tracer_opt *trace_opts;
5352 	struct trace_array *tr = m->private;
5353 	u32 tracer_flags;
5354 	int i;
5355 
5356 	mutex_lock(&trace_types_lock);
5357 	tracer_flags = tr->current_trace->flags->val;
5358 	trace_opts = tr->current_trace->flags->opts;
5359 
5360 	for (i = 0; trace_options[i]; i++) {
5361 		if (tr->trace_flags & (1 << i))
5362 			seq_printf(m, "%s\n", trace_options[i]);
5363 		else
5364 			seq_printf(m, "no%s\n", trace_options[i]);
5365 	}
5366 
5367 	for (i = 0; trace_opts[i].name; i++) {
5368 		if (tracer_flags & trace_opts[i].bit)
5369 			seq_printf(m, "%s\n", trace_opts[i].name);
5370 		else
5371 			seq_printf(m, "no%s\n", trace_opts[i].name);
5372 	}
5373 	mutex_unlock(&trace_types_lock);
5374 
5375 	return 0;
5376 }
5377 
5378 static int __set_tracer_option(struct trace_array *tr,
5379 			       struct tracer_flags *tracer_flags,
5380 			       struct tracer_opt *opts, int neg)
5381 {
5382 	struct tracer *trace = tracer_flags->trace;
5383 	int ret;
5384 
5385 	ret = trace->set_flag(tr, tracer_flags->val, opts->bit, !neg);
5386 	if (ret)
5387 		return ret;
5388 
5389 	if (neg)
5390 		tracer_flags->val &= ~opts->bit;
5391 	else
5392 		tracer_flags->val |= opts->bit;
5393 	return 0;
5394 }
5395 
5396 /* Try to assign a tracer specific option */
5397 static int set_tracer_option(struct trace_array *tr, char *cmp, int neg)
5398 {
5399 	struct tracer *trace = tr->current_trace;
5400 	struct tracer_flags *tracer_flags = trace->flags;
5401 	struct tracer_opt *opts = NULL;
5402 	int i;
5403 
5404 	for (i = 0; tracer_flags->opts[i].name; i++) {
5405 		opts = &tracer_flags->opts[i];
5406 
5407 		if (strcmp(cmp, opts->name) == 0)
5408 			return __set_tracer_option(tr, trace->flags, opts, neg);
5409 	}
5410 
5411 	return -EINVAL;
5412 }
5413 
5414 /* Some tracers require overwrite to stay enabled */
5415 int trace_keep_overwrite(struct tracer *tracer, u32 mask, int set)
5416 {
5417 	if (tracer->enabled && (mask & TRACE_ITER_OVERWRITE) && !set)
5418 		return -1;
5419 
5420 	return 0;
5421 }
5422 
5423 int set_tracer_flag(struct trace_array *tr, unsigned int mask, int enabled)
5424 {
5425 	int *map;
5426 
5427 	if ((mask == TRACE_ITER_RECORD_TGID) ||
5428 	    (mask == TRACE_ITER_RECORD_CMD))
5429 		lockdep_assert_held(&event_mutex);
5430 
5431 	/* do nothing if flag is already set */
5432 	if (!!(tr->trace_flags & mask) == !!enabled)
5433 		return 0;
5434 
5435 	/* Give the tracer a chance to approve the change */
5436 	if (tr->current_trace->flag_changed)
5437 		if (tr->current_trace->flag_changed(tr, mask, !!enabled))
5438 			return -EINVAL;
5439 
5440 	if (enabled)
5441 		tr->trace_flags |= mask;
5442 	else
5443 		tr->trace_flags &= ~mask;
5444 
5445 	if (mask == TRACE_ITER_RECORD_CMD)
5446 		trace_event_enable_cmd_record(enabled);
5447 
5448 	if (mask == TRACE_ITER_RECORD_TGID) {
5449 		if (!tgid_map) {
5450 			tgid_map_max = pid_max;
5451 			map = kvcalloc(tgid_map_max + 1, sizeof(*tgid_map),
5452 				       GFP_KERNEL);
5453 
5454 			/*
5455 			 * Pairs with smp_load_acquire() in
5456 			 * trace_find_tgid_ptr() to ensure that if it observes
5457 			 * the tgid_map we just allocated then it also observes
5458 			 * the corresponding tgid_map_max value.
5459 			 */
5460 			smp_store_release(&tgid_map, map);
5461 		}
5462 		if (!tgid_map) {
5463 			tr->trace_flags &= ~TRACE_ITER_RECORD_TGID;
5464 			return -ENOMEM;
5465 		}
5466 
5467 		trace_event_enable_tgid_record(enabled);
5468 	}
5469 
5470 	if (mask == TRACE_ITER_EVENT_FORK)
5471 		trace_event_follow_fork(tr, enabled);
5472 
5473 	if (mask == TRACE_ITER_FUNC_FORK)
5474 		ftrace_pid_follow_fork(tr, enabled);
5475 
5476 	if (mask == TRACE_ITER_OVERWRITE) {
5477 		ring_buffer_change_overwrite(tr->array_buffer.buffer, enabled);
5478 #ifdef CONFIG_TRACER_MAX_TRACE
5479 		ring_buffer_change_overwrite(tr->max_buffer.buffer, enabled);
5480 #endif
5481 	}
5482 
5483 	if (mask == TRACE_ITER_PRINTK) {
5484 		trace_printk_start_stop_comm(enabled);
5485 		trace_printk_control(enabled);
5486 	}
5487 
5488 	return 0;
5489 }
5490 
5491 int trace_set_options(struct trace_array *tr, char *option)
5492 {
5493 	char *cmp;
5494 	int neg = 0;
5495 	int ret;
5496 	size_t orig_len = strlen(option);
5497 	int len;
5498 
5499 	cmp = strstrip(option);
5500 
5501 	len = str_has_prefix(cmp, "no");
5502 	if (len)
5503 		neg = 1;
5504 
5505 	cmp += len;
5506 
5507 	mutex_lock(&event_mutex);
5508 	mutex_lock(&trace_types_lock);
5509 
5510 	ret = match_string(trace_options, -1, cmp);
5511 	/* If no option could be set, test the specific tracer options */
5512 	if (ret < 0)
5513 		ret = set_tracer_option(tr, cmp, neg);
5514 	else
5515 		ret = set_tracer_flag(tr, 1 << ret, !neg);
5516 
5517 	mutex_unlock(&trace_types_lock);
5518 	mutex_unlock(&event_mutex);
5519 
5520 	/*
5521 	 * If the first trailing whitespace is replaced with '\0' by strstrip,
5522 	 * turn it back into a space.
5523 	 */
5524 	if (orig_len > strlen(option))
5525 		option[strlen(option)] = ' ';
5526 
5527 	return ret;
5528 }
5529 
5530 static void __init apply_trace_boot_options(void)
5531 {
5532 	char *buf = trace_boot_options_buf;
5533 	char *option;
5534 
5535 	while (true) {
5536 		option = strsep(&buf, ",");
5537 
5538 		if (!option)
5539 			break;
5540 
5541 		if (*option)
5542 			trace_set_options(&global_trace, option);
5543 
5544 		/* Put back the comma to allow this to be called again */
5545 		if (buf)
5546 			*(buf - 1) = ',';
5547 	}
5548 }
5549 
5550 static ssize_t
5551 tracing_trace_options_write(struct file *filp, const char __user *ubuf,
5552 			size_t cnt, loff_t *ppos)
5553 {
5554 	struct seq_file *m = filp->private_data;
5555 	struct trace_array *tr = m->private;
5556 	char buf[64];
5557 	int ret;
5558 
5559 	if (cnt >= sizeof(buf))
5560 		return -EINVAL;
5561 
5562 	if (copy_from_user(buf, ubuf, cnt))
5563 		return -EFAULT;
5564 
5565 	buf[cnt] = 0;
5566 
5567 	ret = trace_set_options(tr, buf);
5568 	if (ret < 0)
5569 		return ret;
5570 
5571 	*ppos += cnt;
5572 
5573 	return cnt;
5574 }
5575 
5576 static int tracing_trace_options_open(struct inode *inode, struct file *file)
5577 {
5578 	struct trace_array *tr = inode->i_private;
5579 	int ret;
5580 
5581 	ret = tracing_check_open_get_tr(tr);
5582 	if (ret)
5583 		return ret;
5584 
5585 	ret = single_open(file, tracing_trace_options_show, inode->i_private);
5586 	if (ret < 0)
5587 		trace_array_put(tr);
5588 
5589 	return ret;
5590 }
5591 
5592 static const struct file_operations tracing_iter_fops = {
5593 	.open		= tracing_trace_options_open,
5594 	.read		= seq_read,
5595 	.llseek		= seq_lseek,
5596 	.release	= tracing_single_release_tr,
5597 	.write		= tracing_trace_options_write,
5598 };
5599 
5600 static const char readme_msg[] =
5601 	"tracing mini-HOWTO:\n\n"
5602 	"# echo 0 > tracing_on : quick way to disable tracing\n"
5603 	"# echo 1 > tracing_on : quick way to re-enable tracing\n\n"
5604 	" Important files:\n"
5605 	"  trace\t\t\t- The static contents of the buffer\n"
5606 	"\t\t\t  To clear the buffer write into this file: echo > trace\n"
5607 	"  trace_pipe\t\t- A consuming read to see the contents of the buffer\n"
5608 	"  current_tracer\t- function and latency tracers\n"
5609 	"  available_tracers\t- list of configured tracers for current_tracer\n"
5610 	"  error_log\t- error log for failed commands (that support it)\n"
5611 	"  buffer_size_kb\t- view and modify size of per cpu buffer\n"
5612 	"  buffer_total_size_kb  - view total size of all cpu buffers\n\n"
5613 	"  trace_clock\t\t- change the clock used to order events\n"
5614 	"       local:   Per cpu clock but may not be synced across CPUs\n"
5615 	"      global:   Synced across CPUs but slows tracing down.\n"
5616 	"     counter:   Not a clock, but just an increment\n"
5617 	"      uptime:   Jiffy counter from time of boot\n"
5618 	"        perf:   Same clock that perf events use\n"
5619 #ifdef CONFIG_X86_64
5620 	"     x86-tsc:   TSC cycle counter\n"
5621 #endif
5622 	"\n  timestamp_mode\t- view the mode used to timestamp events\n"
5623 	"       delta:   Delta difference against a buffer-wide timestamp\n"
5624 	"    absolute:   Absolute (standalone) timestamp\n"
5625 	"\n  trace_marker\t\t- Writes into this file writes into the kernel buffer\n"
5626 	"\n  trace_marker_raw\t\t- Writes into this file writes binary data into the kernel buffer\n"
5627 	"  tracing_cpumask\t- Limit which CPUs to trace\n"
5628 	"  instances\t\t- Make sub-buffers with: mkdir instances/foo\n"
5629 	"\t\t\t  Remove sub-buffer with rmdir\n"
5630 	"  trace_options\t\t- Set format or modify how tracing happens\n"
5631 	"\t\t\t  Disable an option by prefixing 'no' to the\n"
5632 	"\t\t\t  option name\n"
5633 	"  saved_cmdlines_size\t- echo command number in here to store comm-pid list\n"
5634 #ifdef CONFIG_DYNAMIC_FTRACE
5635 	"\n  available_filter_functions - list of functions that can be filtered on\n"
5636 	"  set_ftrace_filter\t- echo function name in here to only trace these\n"
5637 	"\t\t\t  functions\n"
5638 	"\t     accepts: func_full_name or glob-matching-pattern\n"
5639 	"\t     modules: Can select a group via module\n"
5640 	"\t      Format: :mod:<module-name>\n"
5641 	"\t     example: echo :mod:ext3 > set_ftrace_filter\n"
5642 	"\t    triggers: a command to perform when function is hit\n"
5643 	"\t      Format: <function>:<trigger>[:count]\n"
5644 	"\t     trigger: traceon, traceoff\n"
5645 	"\t\t      enable_event:<system>:<event>\n"
5646 	"\t\t      disable_event:<system>:<event>\n"
5647 #ifdef CONFIG_STACKTRACE
5648 	"\t\t      stacktrace\n"
5649 #endif
5650 #ifdef CONFIG_TRACER_SNAPSHOT
5651 	"\t\t      snapshot\n"
5652 #endif
5653 	"\t\t      dump\n"
5654 	"\t\t      cpudump\n"
5655 	"\t     example: echo do_fault:traceoff > set_ftrace_filter\n"
5656 	"\t              echo do_trap:traceoff:3 > set_ftrace_filter\n"
5657 	"\t     The first one will disable tracing every time do_fault is hit\n"
5658 	"\t     The second will disable tracing at most 3 times when do_trap is hit\n"
5659 	"\t       The first time do trap is hit and it disables tracing, the\n"
5660 	"\t       counter will decrement to 2. If tracing is already disabled,\n"
5661 	"\t       the counter will not decrement. It only decrements when the\n"
5662 	"\t       trigger did work\n"
5663 	"\t     To remove trigger without count:\n"
5664 	"\t       echo '!<function>:<trigger> > set_ftrace_filter\n"
5665 	"\t     To remove trigger with a count:\n"
5666 	"\t       echo '!<function>:<trigger>:0 > set_ftrace_filter\n"
5667 	"  set_ftrace_notrace\t- echo function name in here to never trace.\n"
5668 	"\t    accepts: func_full_name, *func_end, func_begin*, *func_middle*\n"
5669 	"\t    modules: Can select a group via module command :mod:\n"
5670 	"\t    Does not accept triggers\n"
5671 #endif /* CONFIG_DYNAMIC_FTRACE */
5672 #ifdef CONFIG_FUNCTION_TRACER
5673 	"  set_ftrace_pid\t- Write pid(s) to only function trace those pids\n"
5674 	"\t\t    (function)\n"
5675 	"  set_ftrace_notrace_pid\t- Write pid(s) to not function trace those pids\n"
5676 	"\t\t    (function)\n"
5677 #endif
5678 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
5679 	"  set_graph_function\t- Trace the nested calls of a function (function_graph)\n"
5680 	"  set_graph_notrace\t- Do not trace the nested calls of a function (function_graph)\n"
5681 	"  max_graph_depth\t- Trace a limited depth of nested calls (0 is unlimited)\n"
5682 #endif
5683 #ifdef CONFIG_TRACER_SNAPSHOT
5684 	"\n  snapshot\t\t- Like 'trace' but shows the content of the static\n"
5685 	"\t\t\t  snapshot buffer. Read the contents for more\n"
5686 	"\t\t\t  information\n"
5687 #endif
5688 #ifdef CONFIG_STACK_TRACER
5689 	"  stack_trace\t\t- Shows the max stack trace when active\n"
5690 	"  stack_max_size\t- Shows current max stack size that was traced\n"
5691 	"\t\t\t  Write into this file to reset the max size (trigger a\n"
5692 	"\t\t\t  new trace)\n"
5693 #ifdef CONFIG_DYNAMIC_FTRACE
5694 	"  stack_trace_filter\t- Like set_ftrace_filter but limits what stack_trace\n"
5695 	"\t\t\t  traces\n"
5696 #endif
5697 #endif /* CONFIG_STACK_TRACER */
5698 #ifdef CONFIG_DYNAMIC_EVENTS
5699 	"  dynamic_events\t\t- Create/append/remove/show the generic dynamic events\n"
5700 	"\t\t\t  Write into this file to define/undefine new trace events.\n"
5701 #endif
5702 #ifdef CONFIG_KPROBE_EVENTS
5703 	"  kprobe_events\t\t- Create/append/remove/show the kernel dynamic events\n"
5704 	"\t\t\t  Write into this file to define/undefine new trace events.\n"
5705 #endif
5706 #ifdef CONFIG_UPROBE_EVENTS
5707 	"  uprobe_events\t\t- Create/append/remove/show the userspace dynamic events\n"
5708 	"\t\t\t  Write into this file to define/undefine new trace events.\n"
5709 #endif
5710 #if defined(CONFIG_KPROBE_EVENTS) || defined(CONFIG_UPROBE_EVENTS) || \
5711     defined(CONFIG_FPROBE_EVENTS)
5712 	"\t  accepts: event-definitions (one definition per line)\n"
5713 #if defined(CONFIG_KPROBE_EVENTS) || defined(CONFIG_UPROBE_EVENTS)
5714 	"\t   Format: p[:[<group>/][<event>]] <place> [<args>]\n"
5715 	"\t           r[maxactive][:[<group>/][<event>]] <place> [<args>]\n"
5716 #endif
5717 #ifdef CONFIG_FPROBE_EVENTS
5718 	"\t           f[:[<group>/][<event>]] <func-name>[%return] [<args>]\n"
5719 	"\t           t[:[<group>/][<event>]] <tracepoint> [<args>]\n"
5720 #endif
5721 #ifdef CONFIG_HIST_TRIGGERS
5722 	"\t           s:[synthetic/]<event> <field> [<field>]\n"
5723 #endif
5724 	"\t           e[:[<group>/][<event>]] <attached-group>.<attached-event> [<args>] [if <filter>]\n"
5725 	"\t           -:[<group>/][<event>]\n"
5726 #ifdef CONFIG_KPROBE_EVENTS
5727 	"\t    place: [<module>:]<symbol>[+<offset>]|<memaddr>\n"
5728   "place (kretprobe): [<module>:]<symbol>[+<offset>]%return|<memaddr>\n"
5729 #endif
5730 #ifdef CONFIG_UPROBE_EVENTS
5731   "   place (uprobe): <path>:<offset>[%return][(ref_ctr_offset)]\n"
5732 #endif
5733 	"\t     args: <name>=fetcharg[:type]\n"
5734 	"\t fetcharg: (%<register>|$<efield>), @<address>, @<symbol>[+|-<offset>],\n"
5735 #ifdef CONFIG_HAVE_FUNCTION_ARG_ACCESS_API
5736 #ifdef CONFIG_PROBE_EVENTS_BTF_ARGS
5737 	"\t           $stack<index>, $stack, $retval, $comm, $arg<N>,\n"
5738 	"\t           <argname>[->field[->field|.field...]],\n"
5739 #else
5740 	"\t           $stack<index>, $stack, $retval, $comm, $arg<N>,\n"
5741 #endif
5742 #else
5743 	"\t           $stack<index>, $stack, $retval, $comm,\n"
5744 #endif
5745 	"\t           +|-[u]<offset>(<fetcharg>), \\imm-value, \\\"imm-string\"\n"
5746 	"\t     type: s8/16/32/64, u8/16/32/64, x8/16/32/64, char, string, symbol,\n"
5747 	"\t           b<bit-width>@<bit-offset>/<container-size>, ustring,\n"
5748 	"\t           symstr, <type>\\[<array-size>\\]\n"
5749 #ifdef CONFIG_HIST_TRIGGERS
5750 	"\t    field: <stype> <name>;\n"
5751 	"\t    stype: u8/u16/u32/u64, s8/s16/s32/s64, pid_t,\n"
5752 	"\t           [unsigned] char/int/long\n"
5753 #endif
5754 	"\t    efield: For event probes ('e' types), the field is on of the fields\n"
5755 	"\t            of the <attached-group>/<attached-event>.\n"
5756 #endif
5757 	"  events/\t\t- Directory containing all trace event subsystems:\n"
5758 	"      enable\t\t- Write 0/1 to enable/disable tracing of all events\n"
5759 	"  events/<system>/\t- Directory containing all trace events for <system>:\n"
5760 	"      enable\t\t- Write 0/1 to enable/disable tracing of all <system>\n"
5761 	"\t\t\t  events\n"
5762 	"      filter\t\t- If set, only events passing filter are traced\n"
5763 	"  events/<system>/<event>/\t- Directory containing control files for\n"
5764 	"\t\t\t  <event>:\n"
5765 	"      enable\t\t- Write 0/1 to enable/disable tracing of <event>\n"
5766 	"      filter\t\t- If set, only events passing filter are traced\n"
5767 	"      trigger\t\t- If set, a command to perform when event is hit\n"
5768 	"\t    Format: <trigger>[:count][if <filter>]\n"
5769 	"\t   trigger: traceon, traceoff\n"
5770 	"\t            enable_event:<system>:<event>\n"
5771 	"\t            disable_event:<system>:<event>\n"
5772 #ifdef CONFIG_HIST_TRIGGERS
5773 	"\t            enable_hist:<system>:<event>\n"
5774 	"\t            disable_hist:<system>:<event>\n"
5775 #endif
5776 #ifdef CONFIG_STACKTRACE
5777 	"\t\t    stacktrace\n"
5778 #endif
5779 #ifdef CONFIG_TRACER_SNAPSHOT
5780 	"\t\t    snapshot\n"
5781 #endif
5782 #ifdef CONFIG_HIST_TRIGGERS
5783 	"\t\t    hist (see below)\n"
5784 #endif
5785 	"\t   example: echo traceoff > events/block/block_unplug/trigger\n"
5786 	"\t            echo traceoff:3 > events/block/block_unplug/trigger\n"
5787 	"\t            echo 'enable_event:kmem:kmalloc:3 if nr_rq > 1' > \\\n"
5788 	"\t                  events/block/block_unplug/trigger\n"
5789 	"\t   The first disables tracing every time block_unplug is hit.\n"
5790 	"\t   The second disables tracing the first 3 times block_unplug is hit.\n"
5791 	"\t   The third enables the kmalloc event the first 3 times block_unplug\n"
5792 	"\t     is hit and has value of greater than 1 for the 'nr_rq' event field.\n"
5793 	"\t   Like function triggers, the counter is only decremented if it\n"
5794 	"\t    enabled or disabled tracing.\n"
5795 	"\t   To remove a trigger without a count:\n"
5796 	"\t     echo '!<trigger> > <system>/<event>/trigger\n"
5797 	"\t   To remove a trigger with a count:\n"
5798 	"\t     echo '!<trigger>:0 > <system>/<event>/trigger\n"
5799 	"\t   Filters can be ignored when removing a trigger.\n"
5800 #ifdef CONFIG_HIST_TRIGGERS
5801 	"      hist trigger\t- If set, event hits are aggregated into a hash table\n"
5802 	"\t    Format: hist:keys=<field1[,field2,...]>\n"
5803 	"\t            [:<var1>=<field|var_ref|numeric_literal>[,<var2>=...]]\n"
5804 	"\t            [:values=<field1[,field2,...]>]\n"
5805 	"\t            [:sort=<field1[,field2,...]>]\n"
5806 	"\t            [:size=#entries]\n"
5807 	"\t            [:pause][:continue][:clear]\n"
5808 	"\t            [:name=histname1]\n"
5809 	"\t            [:nohitcount]\n"
5810 	"\t            [:<handler>.<action>]\n"
5811 	"\t            [if <filter>]\n\n"
5812 	"\t    Note, special fields can be used as well:\n"
5813 	"\t            common_timestamp - to record current timestamp\n"
5814 	"\t            common_cpu - to record the CPU the event happened on\n"
5815 	"\n"
5816 	"\t    A hist trigger variable can be:\n"
5817 	"\t        - a reference to a field e.g. x=current_timestamp,\n"
5818 	"\t        - a reference to another variable e.g. y=$x,\n"
5819 	"\t        - a numeric literal: e.g. ms_per_sec=1000,\n"
5820 	"\t        - an arithmetic expression: e.g. time_secs=current_timestamp/1000\n"
5821 	"\n"
5822 	"\t    hist trigger arithmetic expressions support addition(+), subtraction(-),\n"
5823 	"\t    multiplication(*) and division(/) operators. An operand can be either a\n"
5824 	"\t    variable reference, field or numeric literal.\n"
5825 	"\n"
5826 	"\t    When a matching event is hit, an entry is added to a hash\n"
5827 	"\t    table using the key(s) and value(s) named, and the value of a\n"
5828 	"\t    sum called 'hitcount' is incremented.  Keys and values\n"
5829 	"\t    correspond to fields in the event's format description.  Keys\n"
5830 	"\t    can be any field, or the special string 'common_stacktrace'.\n"
5831 	"\t    Compound keys consisting of up to two fields can be specified\n"
5832 	"\t    by the 'keys' keyword.  Values must correspond to numeric\n"
5833 	"\t    fields.  Sort keys consisting of up to two fields can be\n"
5834 	"\t    specified using the 'sort' keyword.  The sort direction can\n"
5835 	"\t    be modified by appending '.descending' or '.ascending' to a\n"
5836 	"\t    sort field.  The 'size' parameter can be used to specify more\n"
5837 	"\t    or fewer than the default 2048 entries for the hashtable size.\n"
5838 	"\t    If a hist trigger is given a name using the 'name' parameter,\n"
5839 	"\t    its histogram data will be shared with other triggers of the\n"
5840 	"\t    same name, and trigger hits will update this common data.\n\n"
5841 	"\t    Reading the 'hist' file for the event will dump the hash\n"
5842 	"\t    table in its entirety to stdout.  If there are multiple hist\n"
5843 	"\t    triggers attached to an event, there will be a table for each\n"
5844 	"\t    trigger in the output.  The table displayed for a named\n"
5845 	"\t    trigger will be the same as any other instance having the\n"
5846 	"\t    same name.  The default format used to display a given field\n"
5847 	"\t    can be modified by appending any of the following modifiers\n"
5848 	"\t    to the field name, as applicable:\n\n"
5849 	"\t            .hex        display a number as a hex value\n"
5850 	"\t            .sym        display an address as a symbol\n"
5851 	"\t            .sym-offset display an address as a symbol and offset\n"
5852 	"\t            .execname   display a common_pid as a program name\n"
5853 	"\t            .syscall    display a syscall id as a syscall name\n"
5854 	"\t            .log2       display log2 value rather than raw number\n"
5855 	"\t            .buckets=size  display values in groups of size rather than raw number\n"
5856 	"\t            .usecs      display a common_timestamp in microseconds\n"
5857 	"\t            .percent    display a number of percentage value\n"
5858 	"\t            .graph      display a bar-graph of a value\n\n"
5859 	"\t    The 'pause' parameter can be used to pause an existing hist\n"
5860 	"\t    trigger or to start a hist trigger but not log any events\n"
5861 	"\t    until told to do so.  'continue' can be used to start or\n"
5862 	"\t    restart a paused hist trigger.\n\n"
5863 	"\t    The 'clear' parameter will clear the contents of a running\n"
5864 	"\t    hist trigger and leave its current paused/active state\n"
5865 	"\t    unchanged.\n\n"
5866 	"\t    The 'nohitcount' (or NOHC) parameter will suppress display of\n"
5867 	"\t    raw hitcount in the histogram.\n\n"
5868 	"\t    The enable_hist and disable_hist triggers can be used to\n"
5869 	"\t    have one event conditionally start and stop another event's\n"
5870 	"\t    already-attached hist trigger.  The syntax is analogous to\n"
5871 	"\t    the enable_event and disable_event triggers.\n\n"
5872 	"\t    Hist trigger handlers and actions are executed whenever a\n"
5873 	"\t    a histogram entry is added or updated.  They take the form:\n\n"
5874 	"\t        <handler>.<action>\n\n"
5875 	"\t    The available handlers are:\n\n"
5876 	"\t        onmatch(matching.event)  - invoke on addition or update\n"
5877 	"\t        onmax(var)               - invoke if var exceeds current max\n"
5878 	"\t        onchange(var)            - invoke action if var changes\n\n"
5879 	"\t    The available actions are:\n\n"
5880 	"\t        trace(<synthetic_event>,param list)  - generate synthetic event\n"
5881 	"\t        save(field,...)                      - save current event fields\n"
5882 #ifdef CONFIG_TRACER_SNAPSHOT
5883 	"\t        snapshot()                           - snapshot the trace buffer\n\n"
5884 #endif
5885 #ifdef CONFIG_SYNTH_EVENTS
5886 	"  events/synthetic_events\t- Create/append/remove/show synthetic events\n"
5887 	"\t  Write into this file to define/undefine new synthetic events.\n"
5888 	"\t     example: echo 'myevent u64 lat; char name[]; long[] stack' >> synthetic_events\n"
5889 #endif
5890 #endif
5891 ;
5892 
5893 static ssize_t
5894 tracing_readme_read(struct file *filp, char __user *ubuf,
5895 		       size_t cnt, loff_t *ppos)
5896 {
5897 	return simple_read_from_buffer(ubuf, cnt, ppos,
5898 					readme_msg, strlen(readme_msg));
5899 }
5900 
5901 static const struct file_operations tracing_readme_fops = {
5902 	.open		= tracing_open_generic,
5903 	.read		= tracing_readme_read,
5904 	.llseek		= generic_file_llseek,
5905 };
5906 
5907 static void *saved_tgids_next(struct seq_file *m, void *v, loff_t *pos)
5908 {
5909 	int pid = ++(*pos);
5910 
5911 	return trace_find_tgid_ptr(pid);
5912 }
5913 
5914 static void *saved_tgids_start(struct seq_file *m, loff_t *pos)
5915 {
5916 	int pid = *pos;
5917 
5918 	return trace_find_tgid_ptr(pid);
5919 }
5920 
5921 static void saved_tgids_stop(struct seq_file *m, void *v)
5922 {
5923 }
5924 
5925 static int saved_tgids_show(struct seq_file *m, void *v)
5926 {
5927 	int *entry = (int *)v;
5928 	int pid = entry - tgid_map;
5929 	int tgid = *entry;
5930 
5931 	if (tgid == 0)
5932 		return SEQ_SKIP;
5933 
5934 	seq_printf(m, "%d %d\n", pid, tgid);
5935 	return 0;
5936 }
5937 
5938 static const struct seq_operations tracing_saved_tgids_seq_ops = {
5939 	.start		= saved_tgids_start,
5940 	.stop		= saved_tgids_stop,
5941 	.next		= saved_tgids_next,
5942 	.show		= saved_tgids_show,
5943 };
5944 
5945 static int tracing_saved_tgids_open(struct inode *inode, struct file *filp)
5946 {
5947 	int ret;
5948 
5949 	ret = tracing_check_open_get_tr(NULL);
5950 	if (ret)
5951 		return ret;
5952 
5953 	return seq_open(filp, &tracing_saved_tgids_seq_ops);
5954 }
5955 
5956 
5957 static const struct file_operations tracing_saved_tgids_fops = {
5958 	.open		= tracing_saved_tgids_open,
5959 	.read		= seq_read,
5960 	.llseek		= seq_lseek,
5961 	.release	= seq_release,
5962 };
5963 
5964 static void *saved_cmdlines_next(struct seq_file *m, void *v, loff_t *pos)
5965 {
5966 	unsigned int *ptr = v;
5967 
5968 	if (*pos || m->count)
5969 		ptr++;
5970 
5971 	(*pos)++;
5972 
5973 	for (; ptr < &savedcmd->map_cmdline_to_pid[savedcmd->cmdline_num];
5974 	     ptr++) {
5975 		if (*ptr == -1 || *ptr == NO_CMDLINE_MAP)
5976 			continue;
5977 
5978 		return ptr;
5979 	}
5980 
5981 	return NULL;
5982 }
5983 
5984 static void *saved_cmdlines_start(struct seq_file *m, loff_t *pos)
5985 {
5986 	void *v;
5987 	loff_t l = 0;
5988 
5989 	preempt_disable();
5990 	arch_spin_lock(&trace_cmdline_lock);
5991 
5992 	v = &savedcmd->map_cmdline_to_pid[0];
5993 	while (l <= *pos) {
5994 		v = saved_cmdlines_next(m, v, &l);
5995 		if (!v)
5996 			return NULL;
5997 	}
5998 
5999 	return v;
6000 }
6001 
6002 static void saved_cmdlines_stop(struct seq_file *m, void *v)
6003 {
6004 	arch_spin_unlock(&trace_cmdline_lock);
6005 	preempt_enable();
6006 }
6007 
6008 static int saved_cmdlines_show(struct seq_file *m, void *v)
6009 {
6010 	char buf[TASK_COMM_LEN];
6011 	unsigned int *pid = v;
6012 
6013 	__trace_find_cmdline(*pid, buf);
6014 	seq_printf(m, "%d %s\n", *pid, buf);
6015 	return 0;
6016 }
6017 
6018 static const struct seq_operations tracing_saved_cmdlines_seq_ops = {
6019 	.start		= saved_cmdlines_start,
6020 	.next		= saved_cmdlines_next,
6021 	.stop		= saved_cmdlines_stop,
6022 	.show		= saved_cmdlines_show,
6023 };
6024 
6025 static int tracing_saved_cmdlines_open(struct inode *inode, struct file *filp)
6026 {
6027 	int ret;
6028 
6029 	ret = tracing_check_open_get_tr(NULL);
6030 	if (ret)
6031 		return ret;
6032 
6033 	return seq_open(filp, &tracing_saved_cmdlines_seq_ops);
6034 }
6035 
6036 static const struct file_operations tracing_saved_cmdlines_fops = {
6037 	.open		= tracing_saved_cmdlines_open,
6038 	.read		= seq_read,
6039 	.llseek		= seq_lseek,
6040 	.release	= seq_release,
6041 };
6042 
6043 static ssize_t
6044 tracing_saved_cmdlines_size_read(struct file *filp, char __user *ubuf,
6045 				 size_t cnt, loff_t *ppos)
6046 {
6047 	char buf[64];
6048 	int r;
6049 
6050 	preempt_disable();
6051 	arch_spin_lock(&trace_cmdline_lock);
6052 	r = scnprintf(buf, sizeof(buf), "%u\n", savedcmd->cmdline_num);
6053 	arch_spin_unlock(&trace_cmdline_lock);
6054 	preempt_enable();
6055 
6056 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6057 }
6058 
6059 static void free_saved_cmdlines_buffer(struct saved_cmdlines_buffer *s)
6060 {
6061 	kfree(s->saved_cmdlines);
6062 	kfree(s->map_cmdline_to_pid);
6063 	kfree(s);
6064 }
6065 
6066 static int tracing_resize_saved_cmdlines(unsigned int val)
6067 {
6068 	struct saved_cmdlines_buffer *s, *savedcmd_temp;
6069 
6070 	s = kmalloc(sizeof(*s), GFP_KERNEL);
6071 	if (!s)
6072 		return -ENOMEM;
6073 
6074 	if (allocate_cmdlines_buffer(val, s) < 0) {
6075 		kfree(s);
6076 		return -ENOMEM;
6077 	}
6078 
6079 	preempt_disable();
6080 	arch_spin_lock(&trace_cmdline_lock);
6081 	savedcmd_temp = savedcmd;
6082 	savedcmd = s;
6083 	arch_spin_unlock(&trace_cmdline_lock);
6084 	preempt_enable();
6085 	free_saved_cmdlines_buffer(savedcmd_temp);
6086 
6087 	return 0;
6088 }
6089 
6090 static ssize_t
6091 tracing_saved_cmdlines_size_write(struct file *filp, const char __user *ubuf,
6092 				  size_t cnt, loff_t *ppos)
6093 {
6094 	unsigned long val;
6095 	int ret;
6096 
6097 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
6098 	if (ret)
6099 		return ret;
6100 
6101 	/* must have at least 1 entry or less than PID_MAX_DEFAULT */
6102 	if (!val || val > PID_MAX_DEFAULT)
6103 		return -EINVAL;
6104 
6105 	ret = tracing_resize_saved_cmdlines((unsigned int)val);
6106 	if (ret < 0)
6107 		return ret;
6108 
6109 	*ppos += cnt;
6110 
6111 	return cnt;
6112 }
6113 
6114 static const struct file_operations tracing_saved_cmdlines_size_fops = {
6115 	.open		= tracing_open_generic,
6116 	.read		= tracing_saved_cmdlines_size_read,
6117 	.write		= tracing_saved_cmdlines_size_write,
6118 };
6119 
6120 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
6121 static union trace_eval_map_item *
6122 update_eval_map(union trace_eval_map_item *ptr)
6123 {
6124 	if (!ptr->map.eval_string) {
6125 		if (ptr->tail.next) {
6126 			ptr = ptr->tail.next;
6127 			/* Set ptr to the next real item (skip head) */
6128 			ptr++;
6129 		} else
6130 			return NULL;
6131 	}
6132 	return ptr;
6133 }
6134 
6135 static void *eval_map_next(struct seq_file *m, void *v, loff_t *pos)
6136 {
6137 	union trace_eval_map_item *ptr = v;
6138 
6139 	/*
6140 	 * Paranoid! If ptr points to end, we don't want to increment past it.
6141 	 * This really should never happen.
6142 	 */
6143 	(*pos)++;
6144 	ptr = update_eval_map(ptr);
6145 	if (WARN_ON_ONCE(!ptr))
6146 		return NULL;
6147 
6148 	ptr++;
6149 	ptr = update_eval_map(ptr);
6150 
6151 	return ptr;
6152 }
6153 
6154 static void *eval_map_start(struct seq_file *m, loff_t *pos)
6155 {
6156 	union trace_eval_map_item *v;
6157 	loff_t l = 0;
6158 
6159 	mutex_lock(&trace_eval_mutex);
6160 
6161 	v = trace_eval_maps;
6162 	if (v)
6163 		v++;
6164 
6165 	while (v && l < *pos) {
6166 		v = eval_map_next(m, v, &l);
6167 	}
6168 
6169 	return v;
6170 }
6171 
6172 static void eval_map_stop(struct seq_file *m, void *v)
6173 {
6174 	mutex_unlock(&trace_eval_mutex);
6175 }
6176 
6177 static int eval_map_show(struct seq_file *m, void *v)
6178 {
6179 	union trace_eval_map_item *ptr = v;
6180 
6181 	seq_printf(m, "%s %ld (%s)\n",
6182 		   ptr->map.eval_string, ptr->map.eval_value,
6183 		   ptr->map.system);
6184 
6185 	return 0;
6186 }
6187 
6188 static const struct seq_operations tracing_eval_map_seq_ops = {
6189 	.start		= eval_map_start,
6190 	.next		= eval_map_next,
6191 	.stop		= eval_map_stop,
6192 	.show		= eval_map_show,
6193 };
6194 
6195 static int tracing_eval_map_open(struct inode *inode, struct file *filp)
6196 {
6197 	int ret;
6198 
6199 	ret = tracing_check_open_get_tr(NULL);
6200 	if (ret)
6201 		return ret;
6202 
6203 	return seq_open(filp, &tracing_eval_map_seq_ops);
6204 }
6205 
6206 static const struct file_operations tracing_eval_map_fops = {
6207 	.open		= tracing_eval_map_open,
6208 	.read		= seq_read,
6209 	.llseek		= seq_lseek,
6210 	.release	= seq_release,
6211 };
6212 
6213 static inline union trace_eval_map_item *
6214 trace_eval_jmp_to_tail(union trace_eval_map_item *ptr)
6215 {
6216 	/* Return tail of array given the head */
6217 	return ptr + ptr->head.length + 1;
6218 }
6219 
6220 static void
6221 trace_insert_eval_map_file(struct module *mod, struct trace_eval_map **start,
6222 			   int len)
6223 {
6224 	struct trace_eval_map **stop;
6225 	struct trace_eval_map **map;
6226 	union trace_eval_map_item *map_array;
6227 	union trace_eval_map_item *ptr;
6228 
6229 	stop = start + len;
6230 
6231 	/*
6232 	 * The trace_eval_maps contains the map plus a head and tail item,
6233 	 * where the head holds the module and length of array, and the
6234 	 * tail holds a pointer to the next list.
6235 	 */
6236 	map_array = kmalloc_array(len + 2, sizeof(*map_array), GFP_KERNEL);
6237 	if (!map_array) {
6238 		pr_warn("Unable to allocate trace eval mapping\n");
6239 		return;
6240 	}
6241 
6242 	mutex_lock(&trace_eval_mutex);
6243 
6244 	if (!trace_eval_maps)
6245 		trace_eval_maps = map_array;
6246 	else {
6247 		ptr = trace_eval_maps;
6248 		for (;;) {
6249 			ptr = trace_eval_jmp_to_tail(ptr);
6250 			if (!ptr->tail.next)
6251 				break;
6252 			ptr = ptr->tail.next;
6253 
6254 		}
6255 		ptr->tail.next = map_array;
6256 	}
6257 	map_array->head.mod = mod;
6258 	map_array->head.length = len;
6259 	map_array++;
6260 
6261 	for (map = start; (unsigned long)map < (unsigned long)stop; map++) {
6262 		map_array->map = **map;
6263 		map_array++;
6264 	}
6265 	memset(map_array, 0, sizeof(*map_array));
6266 
6267 	mutex_unlock(&trace_eval_mutex);
6268 }
6269 
6270 static void trace_create_eval_file(struct dentry *d_tracer)
6271 {
6272 	trace_create_file("eval_map", TRACE_MODE_READ, d_tracer,
6273 			  NULL, &tracing_eval_map_fops);
6274 }
6275 
6276 #else /* CONFIG_TRACE_EVAL_MAP_FILE */
6277 static inline void trace_create_eval_file(struct dentry *d_tracer) { }
6278 static inline void trace_insert_eval_map_file(struct module *mod,
6279 			      struct trace_eval_map **start, int len) { }
6280 #endif /* !CONFIG_TRACE_EVAL_MAP_FILE */
6281 
6282 static void trace_insert_eval_map(struct module *mod,
6283 				  struct trace_eval_map **start, int len)
6284 {
6285 	struct trace_eval_map **map;
6286 
6287 	if (len <= 0)
6288 		return;
6289 
6290 	map = start;
6291 
6292 	trace_event_eval_update(map, len);
6293 
6294 	trace_insert_eval_map_file(mod, start, len);
6295 }
6296 
6297 static ssize_t
6298 tracing_set_trace_read(struct file *filp, char __user *ubuf,
6299 		       size_t cnt, loff_t *ppos)
6300 {
6301 	struct trace_array *tr = filp->private_data;
6302 	char buf[MAX_TRACER_SIZE+2];
6303 	int r;
6304 
6305 	mutex_lock(&trace_types_lock);
6306 	r = sprintf(buf, "%s\n", tr->current_trace->name);
6307 	mutex_unlock(&trace_types_lock);
6308 
6309 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6310 }
6311 
6312 int tracer_init(struct tracer *t, struct trace_array *tr)
6313 {
6314 	tracing_reset_online_cpus(&tr->array_buffer);
6315 	return t->init(tr);
6316 }
6317 
6318 static void set_buffer_entries(struct array_buffer *buf, unsigned long val)
6319 {
6320 	int cpu;
6321 
6322 	for_each_tracing_cpu(cpu)
6323 		per_cpu_ptr(buf->data, cpu)->entries = val;
6324 }
6325 
6326 static void update_buffer_entries(struct array_buffer *buf, int cpu)
6327 {
6328 	if (cpu == RING_BUFFER_ALL_CPUS) {
6329 		set_buffer_entries(buf, ring_buffer_size(buf->buffer, 0));
6330 	} else {
6331 		per_cpu_ptr(buf->data, cpu)->entries = ring_buffer_size(buf->buffer, cpu);
6332 	}
6333 }
6334 
6335 #ifdef CONFIG_TRACER_MAX_TRACE
6336 /* resize @tr's buffer to the size of @size_tr's entries */
6337 static int resize_buffer_duplicate_size(struct array_buffer *trace_buf,
6338 					struct array_buffer *size_buf, int cpu_id)
6339 {
6340 	int cpu, ret = 0;
6341 
6342 	if (cpu_id == RING_BUFFER_ALL_CPUS) {
6343 		for_each_tracing_cpu(cpu) {
6344 			ret = ring_buffer_resize(trace_buf->buffer,
6345 				 per_cpu_ptr(size_buf->data, cpu)->entries, cpu);
6346 			if (ret < 0)
6347 				break;
6348 			per_cpu_ptr(trace_buf->data, cpu)->entries =
6349 				per_cpu_ptr(size_buf->data, cpu)->entries;
6350 		}
6351 	} else {
6352 		ret = ring_buffer_resize(trace_buf->buffer,
6353 				 per_cpu_ptr(size_buf->data, cpu_id)->entries, cpu_id);
6354 		if (ret == 0)
6355 			per_cpu_ptr(trace_buf->data, cpu_id)->entries =
6356 				per_cpu_ptr(size_buf->data, cpu_id)->entries;
6357 	}
6358 
6359 	return ret;
6360 }
6361 #endif /* CONFIG_TRACER_MAX_TRACE */
6362 
6363 static int __tracing_resize_ring_buffer(struct trace_array *tr,
6364 					unsigned long size, int cpu)
6365 {
6366 	int ret;
6367 
6368 	/*
6369 	 * If kernel or user changes the size of the ring buffer
6370 	 * we use the size that was given, and we can forget about
6371 	 * expanding it later.
6372 	 */
6373 	trace_set_ring_buffer_expanded(tr);
6374 
6375 	/* May be called before buffers are initialized */
6376 	if (!tr->array_buffer.buffer)
6377 		return 0;
6378 
6379 	/* Do not allow tracing while resizing ring buffer */
6380 	tracing_stop_tr(tr);
6381 
6382 	ret = ring_buffer_resize(tr->array_buffer.buffer, size, cpu);
6383 	if (ret < 0)
6384 		goto out_start;
6385 
6386 #ifdef CONFIG_TRACER_MAX_TRACE
6387 	if (!tr->allocated_snapshot)
6388 		goto out;
6389 
6390 	ret = ring_buffer_resize(tr->max_buffer.buffer, size, cpu);
6391 	if (ret < 0) {
6392 		int r = resize_buffer_duplicate_size(&tr->array_buffer,
6393 						     &tr->array_buffer, cpu);
6394 		if (r < 0) {
6395 			/*
6396 			 * AARGH! We are left with different
6397 			 * size max buffer!!!!
6398 			 * The max buffer is our "snapshot" buffer.
6399 			 * When a tracer needs a snapshot (one of the
6400 			 * latency tracers), it swaps the max buffer
6401 			 * with the saved snap shot. We succeeded to
6402 			 * update the size of the main buffer, but failed to
6403 			 * update the size of the max buffer. But when we tried
6404 			 * to reset the main buffer to the original size, we
6405 			 * failed there too. This is very unlikely to
6406 			 * happen, but if it does, warn and kill all
6407 			 * tracing.
6408 			 */
6409 			WARN_ON(1);
6410 			tracing_disabled = 1;
6411 		}
6412 		goto out_start;
6413 	}
6414 
6415 	update_buffer_entries(&tr->max_buffer, cpu);
6416 
6417  out:
6418 #endif /* CONFIG_TRACER_MAX_TRACE */
6419 
6420 	update_buffer_entries(&tr->array_buffer, cpu);
6421  out_start:
6422 	tracing_start_tr(tr);
6423 	return ret;
6424 }
6425 
6426 ssize_t tracing_resize_ring_buffer(struct trace_array *tr,
6427 				  unsigned long size, int cpu_id)
6428 {
6429 	int ret;
6430 
6431 	mutex_lock(&trace_types_lock);
6432 
6433 	if (cpu_id != RING_BUFFER_ALL_CPUS) {
6434 		/* make sure, this cpu is enabled in the mask */
6435 		if (!cpumask_test_cpu(cpu_id, tracing_buffer_mask)) {
6436 			ret = -EINVAL;
6437 			goto out;
6438 		}
6439 	}
6440 
6441 	ret = __tracing_resize_ring_buffer(tr, size, cpu_id);
6442 	if (ret < 0)
6443 		ret = -ENOMEM;
6444 
6445 out:
6446 	mutex_unlock(&trace_types_lock);
6447 
6448 	return ret;
6449 }
6450 
6451 
6452 /**
6453  * tracing_update_buffers - used by tracing facility to expand ring buffers
6454  * @tr: The tracing instance
6455  *
6456  * To save on memory when the tracing is never used on a system with it
6457  * configured in. The ring buffers are set to a minimum size. But once
6458  * a user starts to use the tracing facility, then they need to grow
6459  * to their default size.
6460  *
6461  * This function is to be called when a tracer is about to be used.
6462  */
6463 int tracing_update_buffers(struct trace_array *tr)
6464 {
6465 	int ret = 0;
6466 
6467 	mutex_lock(&trace_types_lock);
6468 	if (!tr->ring_buffer_expanded)
6469 		ret = __tracing_resize_ring_buffer(tr, trace_buf_size,
6470 						RING_BUFFER_ALL_CPUS);
6471 	mutex_unlock(&trace_types_lock);
6472 
6473 	return ret;
6474 }
6475 
6476 struct trace_option_dentry;
6477 
6478 static void
6479 create_trace_option_files(struct trace_array *tr, struct tracer *tracer);
6480 
6481 /*
6482  * Used to clear out the tracer before deletion of an instance.
6483  * Must have trace_types_lock held.
6484  */
6485 static void tracing_set_nop(struct trace_array *tr)
6486 {
6487 	if (tr->current_trace == &nop_trace)
6488 		return;
6489 
6490 	tr->current_trace->enabled--;
6491 
6492 	if (tr->current_trace->reset)
6493 		tr->current_trace->reset(tr);
6494 
6495 	tr->current_trace = &nop_trace;
6496 }
6497 
6498 static bool tracer_options_updated;
6499 
6500 static void add_tracer_options(struct trace_array *tr, struct tracer *t)
6501 {
6502 	/* Only enable if the directory has been created already. */
6503 	if (!tr->dir)
6504 		return;
6505 
6506 	/* Only create trace option files after update_tracer_options finish */
6507 	if (!tracer_options_updated)
6508 		return;
6509 
6510 	create_trace_option_files(tr, t);
6511 }
6512 
6513 int tracing_set_tracer(struct trace_array *tr, const char *buf)
6514 {
6515 	struct tracer *t;
6516 #ifdef CONFIG_TRACER_MAX_TRACE
6517 	bool had_max_tr;
6518 #endif
6519 	int ret = 0;
6520 
6521 	mutex_lock(&trace_types_lock);
6522 
6523 	if (!tr->ring_buffer_expanded) {
6524 		ret = __tracing_resize_ring_buffer(tr, trace_buf_size,
6525 						RING_BUFFER_ALL_CPUS);
6526 		if (ret < 0)
6527 			goto out;
6528 		ret = 0;
6529 	}
6530 
6531 	for (t = trace_types; t; t = t->next) {
6532 		if (strcmp(t->name, buf) == 0)
6533 			break;
6534 	}
6535 	if (!t) {
6536 		ret = -EINVAL;
6537 		goto out;
6538 	}
6539 	if (t == tr->current_trace)
6540 		goto out;
6541 
6542 #ifdef CONFIG_TRACER_SNAPSHOT
6543 	if (t->use_max_tr) {
6544 		local_irq_disable();
6545 		arch_spin_lock(&tr->max_lock);
6546 		if (tr->cond_snapshot)
6547 			ret = -EBUSY;
6548 		arch_spin_unlock(&tr->max_lock);
6549 		local_irq_enable();
6550 		if (ret)
6551 			goto out;
6552 	}
6553 #endif
6554 	/* Some tracers won't work on kernel command line */
6555 	if (system_state < SYSTEM_RUNNING && t->noboot) {
6556 		pr_warn("Tracer '%s' is not allowed on command line, ignored\n",
6557 			t->name);
6558 		goto out;
6559 	}
6560 
6561 	/* Some tracers are only allowed for the top level buffer */
6562 	if (!trace_ok_for_array(t, tr)) {
6563 		ret = -EINVAL;
6564 		goto out;
6565 	}
6566 
6567 	/* If trace pipe files are being read, we can't change the tracer */
6568 	if (tr->trace_ref) {
6569 		ret = -EBUSY;
6570 		goto out;
6571 	}
6572 
6573 	trace_branch_disable();
6574 
6575 	tr->current_trace->enabled--;
6576 
6577 	if (tr->current_trace->reset)
6578 		tr->current_trace->reset(tr);
6579 
6580 #ifdef CONFIG_TRACER_MAX_TRACE
6581 	had_max_tr = tr->current_trace->use_max_tr;
6582 
6583 	/* Current trace needs to be nop_trace before synchronize_rcu */
6584 	tr->current_trace = &nop_trace;
6585 
6586 	if (had_max_tr && !t->use_max_tr) {
6587 		/*
6588 		 * We need to make sure that the update_max_tr sees that
6589 		 * current_trace changed to nop_trace to keep it from
6590 		 * swapping the buffers after we resize it.
6591 		 * The update_max_tr is called from interrupts disabled
6592 		 * so a synchronized_sched() is sufficient.
6593 		 */
6594 		synchronize_rcu();
6595 		free_snapshot(tr);
6596 	}
6597 
6598 	if (t->use_max_tr && !tr->allocated_snapshot) {
6599 		ret = tracing_alloc_snapshot_instance(tr);
6600 		if (ret < 0)
6601 			goto out;
6602 	}
6603 #else
6604 	tr->current_trace = &nop_trace;
6605 #endif
6606 
6607 	if (t->init) {
6608 		ret = tracer_init(t, tr);
6609 		if (ret)
6610 			goto out;
6611 	}
6612 
6613 	tr->current_trace = t;
6614 	tr->current_trace->enabled++;
6615 	trace_branch_enable(tr);
6616  out:
6617 	mutex_unlock(&trace_types_lock);
6618 
6619 	return ret;
6620 }
6621 
6622 static ssize_t
6623 tracing_set_trace_write(struct file *filp, const char __user *ubuf,
6624 			size_t cnt, loff_t *ppos)
6625 {
6626 	struct trace_array *tr = filp->private_data;
6627 	char buf[MAX_TRACER_SIZE+1];
6628 	char *name;
6629 	size_t ret;
6630 	int err;
6631 
6632 	ret = cnt;
6633 
6634 	if (cnt > MAX_TRACER_SIZE)
6635 		cnt = MAX_TRACER_SIZE;
6636 
6637 	if (copy_from_user(buf, ubuf, cnt))
6638 		return -EFAULT;
6639 
6640 	buf[cnt] = 0;
6641 
6642 	name = strim(buf);
6643 
6644 	err = tracing_set_tracer(tr, name);
6645 	if (err)
6646 		return err;
6647 
6648 	*ppos += ret;
6649 
6650 	return ret;
6651 }
6652 
6653 static ssize_t
6654 tracing_nsecs_read(unsigned long *ptr, char __user *ubuf,
6655 		   size_t cnt, loff_t *ppos)
6656 {
6657 	char buf[64];
6658 	int r;
6659 
6660 	r = snprintf(buf, sizeof(buf), "%ld\n",
6661 		     *ptr == (unsigned long)-1 ? -1 : nsecs_to_usecs(*ptr));
6662 	if (r > sizeof(buf))
6663 		r = sizeof(buf);
6664 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6665 }
6666 
6667 static ssize_t
6668 tracing_nsecs_write(unsigned long *ptr, const char __user *ubuf,
6669 		    size_t cnt, loff_t *ppos)
6670 {
6671 	unsigned long val;
6672 	int ret;
6673 
6674 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
6675 	if (ret)
6676 		return ret;
6677 
6678 	*ptr = val * 1000;
6679 
6680 	return cnt;
6681 }
6682 
6683 static ssize_t
6684 tracing_thresh_read(struct file *filp, char __user *ubuf,
6685 		    size_t cnt, loff_t *ppos)
6686 {
6687 	return tracing_nsecs_read(&tracing_thresh, ubuf, cnt, ppos);
6688 }
6689 
6690 static ssize_t
6691 tracing_thresh_write(struct file *filp, const char __user *ubuf,
6692 		     size_t cnt, loff_t *ppos)
6693 {
6694 	struct trace_array *tr = filp->private_data;
6695 	int ret;
6696 
6697 	mutex_lock(&trace_types_lock);
6698 	ret = tracing_nsecs_write(&tracing_thresh, ubuf, cnt, ppos);
6699 	if (ret < 0)
6700 		goto out;
6701 
6702 	if (tr->current_trace->update_thresh) {
6703 		ret = tr->current_trace->update_thresh(tr);
6704 		if (ret < 0)
6705 			goto out;
6706 	}
6707 
6708 	ret = cnt;
6709 out:
6710 	mutex_unlock(&trace_types_lock);
6711 
6712 	return ret;
6713 }
6714 
6715 #ifdef CONFIG_TRACER_MAX_TRACE
6716 
6717 static ssize_t
6718 tracing_max_lat_read(struct file *filp, char __user *ubuf,
6719 		     size_t cnt, loff_t *ppos)
6720 {
6721 	struct trace_array *tr = filp->private_data;
6722 
6723 	return tracing_nsecs_read(&tr->max_latency, ubuf, cnt, ppos);
6724 }
6725 
6726 static ssize_t
6727 tracing_max_lat_write(struct file *filp, const char __user *ubuf,
6728 		      size_t cnt, loff_t *ppos)
6729 {
6730 	struct trace_array *tr = filp->private_data;
6731 
6732 	return tracing_nsecs_write(&tr->max_latency, ubuf, cnt, ppos);
6733 }
6734 
6735 #endif
6736 
6737 static int open_pipe_on_cpu(struct trace_array *tr, int cpu)
6738 {
6739 	if (cpu == RING_BUFFER_ALL_CPUS) {
6740 		if (cpumask_empty(tr->pipe_cpumask)) {
6741 			cpumask_setall(tr->pipe_cpumask);
6742 			return 0;
6743 		}
6744 	} else if (!cpumask_test_cpu(cpu, tr->pipe_cpumask)) {
6745 		cpumask_set_cpu(cpu, tr->pipe_cpumask);
6746 		return 0;
6747 	}
6748 	return -EBUSY;
6749 }
6750 
6751 static void close_pipe_on_cpu(struct trace_array *tr, int cpu)
6752 {
6753 	if (cpu == RING_BUFFER_ALL_CPUS) {
6754 		WARN_ON(!cpumask_full(tr->pipe_cpumask));
6755 		cpumask_clear(tr->pipe_cpumask);
6756 	} else {
6757 		WARN_ON(!cpumask_test_cpu(cpu, tr->pipe_cpumask));
6758 		cpumask_clear_cpu(cpu, tr->pipe_cpumask);
6759 	}
6760 }
6761 
6762 static int tracing_open_pipe(struct inode *inode, struct file *filp)
6763 {
6764 	struct trace_array *tr = inode->i_private;
6765 	struct trace_iterator *iter;
6766 	int cpu;
6767 	int ret;
6768 
6769 	ret = tracing_check_open_get_tr(tr);
6770 	if (ret)
6771 		return ret;
6772 
6773 	mutex_lock(&trace_types_lock);
6774 	cpu = tracing_get_cpu(inode);
6775 	ret = open_pipe_on_cpu(tr, cpu);
6776 	if (ret)
6777 		goto fail_pipe_on_cpu;
6778 
6779 	/* create a buffer to store the information to pass to userspace */
6780 	iter = kzalloc(sizeof(*iter), GFP_KERNEL);
6781 	if (!iter) {
6782 		ret = -ENOMEM;
6783 		goto fail_alloc_iter;
6784 	}
6785 
6786 	trace_seq_init(&iter->seq);
6787 	iter->trace = tr->current_trace;
6788 
6789 	if (!alloc_cpumask_var(&iter->started, GFP_KERNEL)) {
6790 		ret = -ENOMEM;
6791 		goto fail;
6792 	}
6793 
6794 	/* trace pipe does not show start of buffer */
6795 	cpumask_setall(iter->started);
6796 
6797 	if (tr->trace_flags & TRACE_ITER_LATENCY_FMT)
6798 		iter->iter_flags |= TRACE_FILE_LAT_FMT;
6799 
6800 	/* Output in nanoseconds only if we are using a clock in nanoseconds. */
6801 	if (trace_clocks[tr->clock_id].in_ns)
6802 		iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
6803 
6804 	iter->tr = tr;
6805 	iter->array_buffer = &tr->array_buffer;
6806 	iter->cpu_file = cpu;
6807 	mutex_init(&iter->mutex);
6808 	filp->private_data = iter;
6809 
6810 	if (iter->trace->pipe_open)
6811 		iter->trace->pipe_open(iter);
6812 
6813 	nonseekable_open(inode, filp);
6814 
6815 	tr->trace_ref++;
6816 
6817 	mutex_unlock(&trace_types_lock);
6818 	return ret;
6819 
6820 fail:
6821 	kfree(iter);
6822 fail_alloc_iter:
6823 	close_pipe_on_cpu(tr, cpu);
6824 fail_pipe_on_cpu:
6825 	__trace_array_put(tr);
6826 	mutex_unlock(&trace_types_lock);
6827 	return ret;
6828 }
6829 
6830 static int tracing_release_pipe(struct inode *inode, struct file *file)
6831 {
6832 	struct trace_iterator *iter = file->private_data;
6833 	struct trace_array *tr = inode->i_private;
6834 
6835 	mutex_lock(&trace_types_lock);
6836 
6837 	tr->trace_ref--;
6838 
6839 	if (iter->trace->pipe_close)
6840 		iter->trace->pipe_close(iter);
6841 	close_pipe_on_cpu(tr, iter->cpu_file);
6842 	mutex_unlock(&trace_types_lock);
6843 
6844 	free_trace_iter_content(iter);
6845 	kfree(iter);
6846 
6847 	trace_array_put(tr);
6848 
6849 	return 0;
6850 }
6851 
6852 static __poll_t
6853 trace_poll(struct trace_iterator *iter, struct file *filp, poll_table *poll_table)
6854 {
6855 	struct trace_array *tr = iter->tr;
6856 
6857 	/* Iterators are static, they should be filled or empty */
6858 	if (trace_buffer_iter(iter, iter->cpu_file))
6859 		return EPOLLIN | EPOLLRDNORM;
6860 
6861 	if (tr->trace_flags & TRACE_ITER_BLOCK)
6862 		/*
6863 		 * Always select as readable when in blocking mode
6864 		 */
6865 		return EPOLLIN | EPOLLRDNORM;
6866 	else
6867 		return ring_buffer_poll_wait(iter->array_buffer->buffer, iter->cpu_file,
6868 					     filp, poll_table, iter->tr->buffer_percent);
6869 }
6870 
6871 static __poll_t
6872 tracing_poll_pipe(struct file *filp, poll_table *poll_table)
6873 {
6874 	struct trace_iterator *iter = filp->private_data;
6875 
6876 	return trace_poll(iter, filp, poll_table);
6877 }
6878 
6879 /* Must be called with iter->mutex held. */
6880 static int tracing_wait_pipe(struct file *filp)
6881 {
6882 	struct trace_iterator *iter = filp->private_data;
6883 	int ret;
6884 
6885 	while (trace_empty(iter)) {
6886 
6887 		if ((filp->f_flags & O_NONBLOCK)) {
6888 			return -EAGAIN;
6889 		}
6890 
6891 		/*
6892 		 * We block until we read something and tracing is disabled.
6893 		 * We still block if tracing is disabled, but we have never
6894 		 * read anything. This allows a user to cat this file, and
6895 		 * then enable tracing. But after we have read something,
6896 		 * we give an EOF when tracing is again disabled.
6897 		 *
6898 		 * iter->pos will be 0 if we haven't read anything.
6899 		 */
6900 		if (!tracer_tracing_is_on(iter->tr) && iter->pos)
6901 			break;
6902 
6903 		mutex_unlock(&iter->mutex);
6904 
6905 		ret = wait_on_pipe(iter, 0);
6906 
6907 		mutex_lock(&iter->mutex);
6908 
6909 		if (ret)
6910 			return ret;
6911 	}
6912 
6913 	return 1;
6914 }
6915 
6916 /*
6917  * Consumer reader.
6918  */
6919 static ssize_t
6920 tracing_read_pipe(struct file *filp, char __user *ubuf,
6921 		  size_t cnt, loff_t *ppos)
6922 {
6923 	struct trace_iterator *iter = filp->private_data;
6924 	ssize_t sret;
6925 
6926 	/*
6927 	 * Avoid more than one consumer on a single file descriptor
6928 	 * This is just a matter of traces coherency, the ring buffer itself
6929 	 * is protected.
6930 	 */
6931 	mutex_lock(&iter->mutex);
6932 
6933 	/* return any leftover data */
6934 	sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
6935 	if (sret != -EBUSY)
6936 		goto out;
6937 
6938 	trace_seq_init(&iter->seq);
6939 
6940 	if (iter->trace->read) {
6941 		sret = iter->trace->read(iter, filp, ubuf, cnt, ppos);
6942 		if (sret)
6943 			goto out;
6944 	}
6945 
6946 waitagain:
6947 	sret = tracing_wait_pipe(filp);
6948 	if (sret <= 0)
6949 		goto out;
6950 
6951 	/* stop when tracing is finished */
6952 	if (trace_empty(iter)) {
6953 		sret = 0;
6954 		goto out;
6955 	}
6956 
6957 	if (cnt >= TRACE_SEQ_BUFFER_SIZE)
6958 		cnt = TRACE_SEQ_BUFFER_SIZE - 1;
6959 
6960 	/* reset all but tr, trace, and overruns */
6961 	trace_iterator_reset(iter);
6962 	cpumask_clear(iter->started);
6963 	trace_seq_init(&iter->seq);
6964 
6965 	trace_event_read_lock();
6966 	trace_access_lock(iter->cpu_file);
6967 	while (trace_find_next_entry_inc(iter) != NULL) {
6968 		enum print_line_t ret;
6969 		int save_len = iter->seq.seq.len;
6970 
6971 		ret = print_trace_line(iter);
6972 		if (ret == TRACE_TYPE_PARTIAL_LINE) {
6973 			/*
6974 			 * If one print_trace_line() fills entire trace_seq in one shot,
6975 			 * trace_seq_to_user() will returns -EBUSY because save_len == 0,
6976 			 * In this case, we need to consume it, otherwise, loop will peek
6977 			 * this event next time, resulting in an infinite loop.
6978 			 */
6979 			if (save_len == 0) {
6980 				iter->seq.full = 0;
6981 				trace_seq_puts(&iter->seq, "[LINE TOO BIG]\n");
6982 				trace_consume(iter);
6983 				break;
6984 			}
6985 
6986 			/* In other cases, don't print partial lines */
6987 			iter->seq.seq.len = save_len;
6988 			break;
6989 		}
6990 		if (ret != TRACE_TYPE_NO_CONSUME)
6991 			trace_consume(iter);
6992 
6993 		if (trace_seq_used(&iter->seq) >= cnt)
6994 			break;
6995 
6996 		/*
6997 		 * Setting the full flag means we reached the trace_seq buffer
6998 		 * size and we should leave by partial output condition above.
6999 		 * One of the trace_seq_* functions is not used properly.
7000 		 */
7001 		WARN_ONCE(iter->seq.full, "full flag set for trace type %d",
7002 			  iter->ent->type);
7003 	}
7004 	trace_access_unlock(iter->cpu_file);
7005 	trace_event_read_unlock();
7006 
7007 	/* Now copy what we have to the user */
7008 	sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
7009 	if (iter->seq.readpos >= trace_seq_used(&iter->seq))
7010 		trace_seq_init(&iter->seq);
7011 
7012 	/*
7013 	 * If there was nothing to send to user, in spite of consuming trace
7014 	 * entries, go back to wait for more entries.
7015 	 */
7016 	if (sret == -EBUSY)
7017 		goto waitagain;
7018 
7019 out:
7020 	mutex_unlock(&iter->mutex);
7021 
7022 	return sret;
7023 }
7024 
7025 static void tracing_spd_release_pipe(struct splice_pipe_desc *spd,
7026 				     unsigned int idx)
7027 {
7028 	__free_page(spd->pages[idx]);
7029 }
7030 
7031 static size_t
7032 tracing_fill_pipe_page(size_t rem, struct trace_iterator *iter)
7033 {
7034 	size_t count;
7035 	int save_len;
7036 	int ret;
7037 
7038 	/* Seq buffer is page-sized, exactly what we need. */
7039 	for (;;) {
7040 		save_len = iter->seq.seq.len;
7041 		ret = print_trace_line(iter);
7042 
7043 		if (trace_seq_has_overflowed(&iter->seq)) {
7044 			iter->seq.seq.len = save_len;
7045 			break;
7046 		}
7047 
7048 		/*
7049 		 * This should not be hit, because it should only
7050 		 * be set if the iter->seq overflowed. But check it
7051 		 * anyway to be safe.
7052 		 */
7053 		if (ret == TRACE_TYPE_PARTIAL_LINE) {
7054 			iter->seq.seq.len = save_len;
7055 			break;
7056 		}
7057 
7058 		count = trace_seq_used(&iter->seq) - save_len;
7059 		if (rem < count) {
7060 			rem = 0;
7061 			iter->seq.seq.len = save_len;
7062 			break;
7063 		}
7064 
7065 		if (ret != TRACE_TYPE_NO_CONSUME)
7066 			trace_consume(iter);
7067 		rem -= count;
7068 		if (!trace_find_next_entry_inc(iter))	{
7069 			rem = 0;
7070 			iter->ent = NULL;
7071 			break;
7072 		}
7073 	}
7074 
7075 	return rem;
7076 }
7077 
7078 static ssize_t tracing_splice_read_pipe(struct file *filp,
7079 					loff_t *ppos,
7080 					struct pipe_inode_info *pipe,
7081 					size_t len,
7082 					unsigned int flags)
7083 {
7084 	struct page *pages_def[PIPE_DEF_BUFFERS];
7085 	struct partial_page partial_def[PIPE_DEF_BUFFERS];
7086 	struct trace_iterator *iter = filp->private_data;
7087 	struct splice_pipe_desc spd = {
7088 		.pages		= pages_def,
7089 		.partial	= partial_def,
7090 		.nr_pages	= 0, /* This gets updated below. */
7091 		.nr_pages_max	= PIPE_DEF_BUFFERS,
7092 		.ops		= &default_pipe_buf_ops,
7093 		.spd_release	= tracing_spd_release_pipe,
7094 	};
7095 	ssize_t ret;
7096 	size_t rem;
7097 	unsigned int i;
7098 
7099 	if (splice_grow_spd(pipe, &spd))
7100 		return -ENOMEM;
7101 
7102 	mutex_lock(&iter->mutex);
7103 
7104 	if (iter->trace->splice_read) {
7105 		ret = iter->trace->splice_read(iter, filp,
7106 					       ppos, pipe, len, flags);
7107 		if (ret)
7108 			goto out_err;
7109 	}
7110 
7111 	ret = tracing_wait_pipe(filp);
7112 	if (ret <= 0)
7113 		goto out_err;
7114 
7115 	if (!iter->ent && !trace_find_next_entry_inc(iter)) {
7116 		ret = -EFAULT;
7117 		goto out_err;
7118 	}
7119 
7120 	trace_event_read_lock();
7121 	trace_access_lock(iter->cpu_file);
7122 
7123 	/* Fill as many pages as possible. */
7124 	for (i = 0, rem = len; i < spd.nr_pages_max && rem; i++) {
7125 		spd.pages[i] = alloc_page(GFP_KERNEL);
7126 		if (!spd.pages[i])
7127 			break;
7128 
7129 		rem = tracing_fill_pipe_page(rem, iter);
7130 
7131 		/* Copy the data into the page, so we can start over. */
7132 		ret = trace_seq_to_buffer(&iter->seq,
7133 					  page_address(spd.pages[i]),
7134 					  trace_seq_used(&iter->seq));
7135 		if (ret < 0) {
7136 			__free_page(spd.pages[i]);
7137 			break;
7138 		}
7139 		spd.partial[i].offset = 0;
7140 		spd.partial[i].len = trace_seq_used(&iter->seq);
7141 
7142 		trace_seq_init(&iter->seq);
7143 	}
7144 
7145 	trace_access_unlock(iter->cpu_file);
7146 	trace_event_read_unlock();
7147 	mutex_unlock(&iter->mutex);
7148 
7149 	spd.nr_pages = i;
7150 
7151 	if (i)
7152 		ret = splice_to_pipe(pipe, &spd);
7153 	else
7154 		ret = 0;
7155 out:
7156 	splice_shrink_spd(&spd);
7157 	return ret;
7158 
7159 out_err:
7160 	mutex_unlock(&iter->mutex);
7161 	goto out;
7162 }
7163 
7164 static ssize_t
7165 tracing_entries_read(struct file *filp, char __user *ubuf,
7166 		     size_t cnt, loff_t *ppos)
7167 {
7168 	struct inode *inode = file_inode(filp);
7169 	struct trace_array *tr = inode->i_private;
7170 	int cpu = tracing_get_cpu(inode);
7171 	char buf[64];
7172 	int r = 0;
7173 	ssize_t ret;
7174 
7175 	mutex_lock(&trace_types_lock);
7176 
7177 	if (cpu == RING_BUFFER_ALL_CPUS) {
7178 		int cpu, buf_size_same;
7179 		unsigned long size;
7180 
7181 		size = 0;
7182 		buf_size_same = 1;
7183 		/* check if all cpu sizes are same */
7184 		for_each_tracing_cpu(cpu) {
7185 			/* fill in the size from first enabled cpu */
7186 			if (size == 0)
7187 				size = per_cpu_ptr(tr->array_buffer.data, cpu)->entries;
7188 			if (size != per_cpu_ptr(tr->array_buffer.data, cpu)->entries) {
7189 				buf_size_same = 0;
7190 				break;
7191 			}
7192 		}
7193 
7194 		if (buf_size_same) {
7195 			if (!tr->ring_buffer_expanded)
7196 				r = sprintf(buf, "%lu (expanded: %lu)\n",
7197 					    size >> 10,
7198 					    trace_buf_size >> 10);
7199 			else
7200 				r = sprintf(buf, "%lu\n", size >> 10);
7201 		} else
7202 			r = sprintf(buf, "X\n");
7203 	} else
7204 		r = sprintf(buf, "%lu\n", per_cpu_ptr(tr->array_buffer.data, cpu)->entries >> 10);
7205 
7206 	mutex_unlock(&trace_types_lock);
7207 
7208 	ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
7209 	return ret;
7210 }
7211 
7212 static ssize_t
7213 tracing_entries_write(struct file *filp, const char __user *ubuf,
7214 		      size_t cnt, loff_t *ppos)
7215 {
7216 	struct inode *inode = file_inode(filp);
7217 	struct trace_array *tr = inode->i_private;
7218 	unsigned long val;
7219 	int ret;
7220 
7221 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
7222 	if (ret)
7223 		return ret;
7224 
7225 	/* must have at least 1 entry */
7226 	if (!val)
7227 		return -EINVAL;
7228 
7229 	/* value is in KB */
7230 	val <<= 10;
7231 	ret = tracing_resize_ring_buffer(tr, val, tracing_get_cpu(inode));
7232 	if (ret < 0)
7233 		return ret;
7234 
7235 	*ppos += cnt;
7236 
7237 	return cnt;
7238 }
7239 
7240 static ssize_t
7241 tracing_total_entries_read(struct file *filp, char __user *ubuf,
7242 				size_t cnt, loff_t *ppos)
7243 {
7244 	struct trace_array *tr = filp->private_data;
7245 	char buf[64];
7246 	int r, cpu;
7247 	unsigned long size = 0, expanded_size = 0;
7248 
7249 	mutex_lock(&trace_types_lock);
7250 	for_each_tracing_cpu(cpu) {
7251 		size += per_cpu_ptr(tr->array_buffer.data, cpu)->entries >> 10;
7252 		if (!tr->ring_buffer_expanded)
7253 			expanded_size += trace_buf_size >> 10;
7254 	}
7255 	if (tr->ring_buffer_expanded)
7256 		r = sprintf(buf, "%lu\n", size);
7257 	else
7258 		r = sprintf(buf, "%lu (expanded: %lu)\n", size, expanded_size);
7259 	mutex_unlock(&trace_types_lock);
7260 
7261 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
7262 }
7263 
7264 static ssize_t
7265 tracing_free_buffer_write(struct file *filp, const char __user *ubuf,
7266 			  size_t cnt, loff_t *ppos)
7267 {
7268 	/*
7269 	 * There is no need to read what the user has written, this function
7270 	 * is just to make sure that there is no error when "echo" is used
7271 	 */
7272 
7273 	*ppos += cnt;
7274 
7275 	return cnt;
7276 }
7277 
7278 static int
7279 tracing_free_buffer_release(struct inode *inode, struct file *filp)
7280 {
7281 	struct trace_array *tr = inode->i_private;
7282 
7283 	/* disable tracing ? */
7284 	if (tr->trace_flags & TRACE_ITER_STOP_ON_FREE)
7285 		tracer_tracing_off(tr);
7286 	/* resize the ring buffer to 0 */
7287 	tracing_resize_ring_buffer(tr, 0, RING_BUFFER_ALL_CPUS);
7288 
7289 	trace_array_put(tr);
7290 
7291 	return 0;
7292 }
7293 
7294 static ssize_t
7295 tracing_mark_write(struct file *filp, const char __user *ubuf,
7296 					size_t cnt, loff_t *fpos)
7297 {
7298 	struct trace_array *tr = filp->private_data;
7299 	struct ring_buffer_event *event;
7300 	enum event_trigger_type tt = ETT_NONE;
7301 	struct trace_buffer *buffer;
7302 	struct print_entry *entry;
7303 	int meta_size;
7304 	ssize_t written;
7305 	size_t size;
7306 	int len;
7307 
7308 /* Used in tracing_mark_raw_write() as well */
7309 #define FAULTED_STR "<faulted>"
7310 #define FAULTED_SIZE (sizeof(FAULTED_STR) - 1) /* '\0' is already accounted for */
7311 
7312 	if (tracing_disabled)
7313 		return -EINVAL;
7314 
7315 	if (!(tr->trace_flags & TRACE_ITER_MARKERS))
7316 		return -EINVAL;
7317 
7318 	if ((ssize_t)cnt < 0)
7319 		return -EINVAL;
7320 
7321 	meta_size = sizeof(*entry) + 2;  /* add '\0' and possible '\n' */
7322  again:
7323 	size = cnt + meta_size;
7324 
7325 	/* If less than "<faulted>", then make sure we can still add that */
7326 	if (cnt < FAULTED_SIZE)
7327 		size += FAULTED_SIZE - cnt;
7328 
7329 	if (size > TRACE_SEQ_BUFFER_SIZE) {
7330 		cnt -= size - TRACE_SEQ_BUFFER_SIZE;
7331 		goto again;
7332 	}
7333 
7334 	buffer = tr->array_buffer.buffer;
7335 	event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
7336 					    tracing_gen_ctx());
7337 	if (unlikely(!event)) {
7338 		/*
7339 		 * If the size was greater than what was allowed, then
7340 		 * make it smaller and try again.
7341 		 */
7342 		if (size > ring_buffer_max_event_size(buffer)) {
7343 			/* cnt < FAULTED size should never be bigger than max */
7344 			if (WARN_ON_ONCE(cnt < FAULTED_SIZE))
7345 				return -EBADF;
7346 			cnt = ring_buffer_max_event_size(buffer) - meta_size;
7347 			/* The above should only happen once */
7348 			if (WARN_ON_ONCE(cnt + meta_size == size))
7349 				return -EBADF;
7350 			goto again;
7351 		}
7352 
7353 		/* Ring buffer disabled, return as if not open for write */
7354 		return -EBADF;
7355 	}
7356 
7357 	entry = ring_buffer_event_data(event);
7358 	entry->ip = _THIS_IP_;
7359 
7360 	len = __copy_from_user_inatomic(&entry->buf, ubuf, cnt);
7361 	if (len) {
7362 		memcpy(&entry->buf, FAULTED_STR, FAULTED_SIZE);
7363 		cnt = FAULTED_SIZE;
7364 		written = -EFAULT;
7365 	} else
7366 		written = cnt;
7367 
7368 	if (tr->trace_marker_file && !list_empty(&tr->trace_marker_file->triggers)) {
7369 		/* do not add \n before testing triggers, but add \0 */
7370 		entry->buf[cnt] = '\0';
7371 		tt = event_triggers_call(tr->trace_marker_file, buffer, entry, event);
7372 	}
7373 
7374 	if (entry->buf[cnt - 1] != '\n') {
7375 		entry->buf[cnt] = '\n';
7376 		entry->buf[cnt + 1] = '\0';
7377 	} else
7378 		entry->buf[cnt] = '\0';
7379 
7380 	if (static_branch_unlikely(&trace_marker_exports_enabled))
7381 		ftrace_exports(event, TRACE_EXPORT_MARKER);
7382 	__buffer_unlock_commit(buffer, event);
7383 
7384 	if (tt)
7385 		event_triggers_post_call(tr->trace_marker_file, tt);
7386 
7387 	return written;
7388 }
7389 
7390 static ssize_t
7391 tracing_mark_raw_write(struct file *filp, const char __user *ubuf,
7392 					size_t cnt, loff_t *fpos)
7393 {
7394 	struct trace_array *tr = filp->private_data;
7395 	struct ring_buffer_event *event;
7396 	struct trace_buffer *buffer;
7397 	struct raw_data_entry *entry;
7398 	ssize_t written;
7399 	int size;
7400 	int len;
7401 
7402 #define FAULT_SIZE_ID (FAULTED_SIZE + sizeof(int))
7403 
7404 	if (tracing_disabled)
7405 		return -EINVAL;
7406 
7407 	if (!(tr->trace_flags & TRACE_ITER_MARKERS))
7408 		return -EINVAL;
7409 
7410 	/* The marker must at least have a tag id */
7411 	if (cnt < sizeof(unsigned int))
7412 		return -EINVAL;
7413 
7414 	size = sizeof(*entry) + cnt;
7415 	if (cnt < FAULT_SIZE_ID)
7416 		size += FAULT_SIZE_ID - cnt;
7417 
7418 	buffer = tr->array_buffer.buffer;
7419 
7420 	if (size > ring_buffer_max_event_size(buffer))
7421 		return -EINVAL;
7422 
7423 	event = __trace_buffer_lock_reserve(buffer, TRACE_RAW_DATA, size,
7424 					    tracing_gen_ctx());
7425 	if (!event)
7426 		/* Ring buffer disabled, return as if not open for write */
7427 		return -EBADF;
7428 
7429 	entry = ring_buffer_event_data(event);
7430 
7431 	len = __copy_from_user_inatomic(&entry->id, ubuf, cnt);
7432 	if (len) {
7433 		entry->id = -1;
7434 		memcpy(&entry->buf, FAULTED_STR, FAULTED_SIZE);
7435 		written = -EFAULT;
7436 	} else
7437 		written = cnt;
7438 
7439 	__buffer_unlock_commit(buffer, event);
7440 
7441 	return written;
7442 }
7443 
7444 static int tracing_clock_show(struct seq_file *m, void *v)
7445 {
7446 	struct trace_array *tr = m->private;
7447 	int i;
7448 
7449 	for (i = 0; i < ARRAY_SIZE(trace_clocks); i++)
7450 		seq_printf(m,
7451 			"%s%s%s%s", i ? " " : "",
7452 			i == tr->clock_id ? "[" : "", trace_clocks[i].name,
7453 			i == tr->clock_id ? "]" : "");
7454 	seq_putc(m, '\n');
7455 
7456 	return 0;
7457 }
7458 
7459 int tracing_set_clock(struct trace_array *tr, const char *clockstr)
7460 {
7461 	int i;
7462 
7463 	for (i = 0; i < ARRAY_SIZE(trace_clocks); i++) {
7464 		if (strcmp(trace_clocks[i].name, clockstr) == 0)
7465 			break;
7466 	}
7467 	if (i == ARRAY_SIZE(trace_clocks))
7468 		return -EINVAL;
7469 
7470 	mutex_lock(&trace_types_lock);
7471 
7472 	tr->clock_id = i;
7473 
7474 	ring_buffer_set_clock(tr->array_buffer.buffer, trace_clocks[i].func);
7475 
7476 	/*
7477 	 * New clock may not be consistent with the previous clock.
7478 	 * Reset the buffer so that it doesn't have incomparable timestamps.
7479 	 */
7480 	tracing_reset_online_cpus(&tr->array_buffer);
7481 
7482 #ifdef CONFIG_TRACER_MAX_TRACE
7483 	if (tr->max_buffer.buffer)
7484 		ring_buffer_set_clock(tr->max_buffer.buffer, trace_clocks[i].func);
7485 	tracing_reset_online_cpus(&tr->max_buffer);
7486 #endif
7487 
7488 	mutex_unlock(&trace_types_lock);
7489 
7490 	return 0;
7491 }
7492 
7493 static ssize_t tracing_clock_write(struct file *filp, const char __user *ubuf,
7494 				   size_t cnt, loff_t *fpos)
7495 {
7496 	struct seq_file *m = filp->private_data;
7497 	struct trace_array *tr = m->private;
7498 	char buf[64];
7499 	const char *clockstr;
7500 	int ret;
7501 
7502 	if (cnt >= sizeof(buf))
7503 		return -EINVAL;
7504 
7505 	if (copy_from_user(buf, ubuf, cnt))
7506 		return -EFAULT;
7507 
7508 	buf[cnt] = 0;
7509 
7510 	clockstr = strstrip(buf);
7511 
7512 	ret = tracing_set_clock(tr, clockstr);
7513 	if (ret)
7514 		return ret;
7515 
7516 	*fpos += cnt;
7517 
7518 	return cnt;
7519 }
7520 
7521 static int tracing_clock_open(struct inode *inode, struct file *file)
7522 {
7523 	struct trace_array *tr = inode->i_private;
7524 	int ret;
7525 
7526 	ret = tracing_check_open_get_tr(tr);
7527 	if (ret)
7528 		return ret;
7529 
7530 	ret = single_open(file, tracing_clock_show, inode->i_private);
7531 	if (ret < 0)
7532 		trace_array_put(tr);
7533 
7534 	return ret;
7535 }
7536 
7537 static int tracing_time_stamp_mode_show(struct seq_file *m, void *v)
7538 {
7539 	struct trace_array *tr = m->private;
7540 
7541 	mutex_lock(&trace_types_lock);
7542 
7543 	if (ring_buffer_time_stamp_abs(tr->array_buffer.buffer))
7544 		seq_puts(m, "delta [absolute]\n");
7545 	else
7546 		seq_puts(m, "[delta] absolute\n");
7547 
7548 	mutex_unlock(&trace_types_lock);
7549 
7550 	return 0;
7551 }
7552 
7553 static int tracing_time_stamp_mode_open(struct inode *inode, struct file *file)
7554 {
7555 	struct trace_array *tr = inode->i_private;
7556 	int ret;
7557 
7558 	ret = tracing_check_open_get_tr(tr);
7559 	if (ret)
7560 		return ret;
7561 
7562 	ret = single_open(file, tracing_time_stamp_mode_show, inode->i_private);
7563 	if (ret < 0)
7564 		trace_array_put(tr);
7565 
7566 	return ret;
7567 }
7568 
7569 u64 tracing_event_time_stamp(struct trace_buffer *buffer, struct ring_buffer_event *rbe)
7570 {
7571 	if (rbe == this_cpu_read(trace_buffered_event))
7572 		return ring_buffer_time_stamp(buffer);
7573 
7574 	return ring_buffer_event_time_stamp(buffer, rbe);
7575 }
7576 
7577 /*
7578  * Set or disable using the per CPU trace_buffer_event when possible.
7579  */
7580 int tracing_set_filter_buffering(struct trace_array *tr, bool set)
7581 {
7582 	int ret = 0;
7583 
7584 	mutex_lock(&trace_types_lock);
7585 
7586 	if (set && tr->no_filter_buffering_ref++)
7587 		goto out;
7588 
7589 	if (!set) {
7590 		if (WARN_ON_ONCE(!tr->no_filter_buffering_ref)) {
7591 			ret = -EINVAL;
7592 			goto out;
7593 		}
7594 
7595 		--tr->no_filter_buffering_ref;
7596 	}
7597  out:
7598 	mutex_unlock(&trace_types_lock);
7599 
7600 	return ret;
7601 }
7602 
7603 struct ftrace_buffer_info {
7604 	struct trace_iterator	iter;
7605 	void			*spare;
7606 	unsigned int		spare_cpu;
7607 	unsigned int		spare_size;
7608 	unsigned int		read;
7609 };
7610 
7611 #ifdef CONFIG_TRACER_SNAPSHOT
7612 static int tracing_snapshot_open(struct inode *inode, struct file *file)
7613 {
7614 	struct trace_array *tr = inode->i_private;
7615 	struct trace_iterator *iter;
7616 	struct seq_file *m;
7617 	int ret;
7618 
7619 	ret = tracing_check_open_get_tr(tr);
7620 	if (ret)
7621 		return ret;
7622 
7623 	if (file->f_mode & FMODE_READ) {
7624 		iter = __tracing_open(inode, file, true);
7625 		if (IS_ERR(iter))
7626 			ret = PTR_ERR(iter);
7627 	} else {
7628 		/* Writes still need the seq_file to hold the private data */
7629 		ret = -ENOMEM;
7630 		m = kzalloc(sizeof(*m), GFP_KERNEL);
7631 		if (!m)
7632 			goto out;
7633 		iter = kzalloc(sizeof(*iter), GFP_KERNEL);
7634 		if (!iter) {
7635 			kfree(m);
7636 			goto out;
7637 		}
7638 		ret = 0;
7639 
7640 		iter->tr = tr;
7641 		iter->array_buffer = &tr->max_buffer;
7642 		iter->cpu_file = tracing_get_cpu(inode);
7643 		m->private = iter;
7644 		file->private_data = m;
7645 	}
7646 out:
7647 	if (ret < 0)
7648 		trace_array_put(tr);
7649 
7650 	return ret;
7651 }
7652 
7653 static void tracing_swap_cpu_buffer(void *tr)
7654 {
7655 	update_max_tr_single((struct trace_array *)tr, current, smp_processor_id());
7656 }
7657 
7658 static ssize_t
7659 tracing_snapshot_write(struct file *filp, const char __user *ubuf, size_t cnt,
7660 		       loff_t *ppos)
7661 {
7662 	struct seq_file *m = filp->private_data;
7663 	struct trace_iterator *iter = m->private;
7664 	struct trace_array *tr = iter->tr;
7665 	unsigned long val;
7666 	int ret;
7667 
7668 	ret = tracing_update_buffers(tr);
7669 	if (ret < 0)
7670 		return ret;
7671 
7672 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
7673 	if (ret)
7674 		return ret;
7675 
7676 	mutex_lock(&trace_types_lock);
7677 
7678 	if (tr->current_trace->use_max_tr) {
7679 		ret = -EBUSY;
7680 		goto out;
7681 	}
7682 
7683 	local_irq_disable();
7684 	arch_spin_lock(&tr->max_lock);
7685 	if (tr->cond_snapshot)
7686 		ret = -EBUSY;
7687 	arch_spin_unlock(&tr->max_lock);
7688 	local_irq_enable();
7689 	if (ret)
7690 		goto out;
7691 
7692 	switch (val) {
7693 	case 0:
7694 		if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
7695 			ret = -EINVAL;
7696 			break;
7697 		}
7698 		if (tr->allocated_snapshot)
7699 			free_snapshot(tr);
7700 		break;
7701 	case 1:
7702 /* Only allow per-cpu swap if the ring buffer supports it */
7703 #ifndef CONFIG_RING_BUFFER_ALLOW_SWAP
7704 		if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
7705 			ret = -EINVAL;
7706 			break;
7707 		}
7708 #endif
7709 		if (tr->allocated_snapshot)
7710 			ret = resize_buffer_duplicate_size(&tr->max_buffer,
7711 					&tr->array_buffer, iter->cpu_file);
7712 		else
7713 			ret = tracing_alloc_snapshot_instance(tr);
7714 		if (ret < 0)
7715 			break;
7716 		/* Now, we're going to swap */
7717 		if (iter->cpu_file == RING_BUFFER_ALL_CPUS) {
7718 			local_irq_disable();
7719 			update_max_tr(tr, current, smp_processor_id(), NULL);
7720 			local_irq_enable();
7721 		} else {
7722 			smp_call_function_single(iter->cpu_file, tracing_swap_cpu_buffer,
7723 						 (void *)tr, 1);
7724 		}
7725 		break;
7726 	default:
7727 		if (tr->allocated_snapshot) {
7728 			if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
7729 				tracing_reset_online_cpus(&tr->max_buffer);
7730 			else
7731 				tracing_reset_cpu(&tr->max_buffer, iter->cpu_file);
7732 		}
7733 		break;
7734 	}
7735 
7736 	if (ret >= 0) {
7737 		*ppos += cnt;
7738 		ret = cnt;
7739 	}
7740 out:
7741 	mutex_unlock(&trace_types_lock);
7742 	return ret;
7743 }
7744 
7745 static int tracing_snapshot_release(struct inode *inode, struct file *file)
7746 {
7747 	struct seq_file *m = file->private_data;
7748 	int ret;
7749 
7750 	ret = tracing_release(inode, file);
7751 
7752 	if (file->f_mode & FMODE_READ)
7753 		return ret;
7754 
7755 	/* If write only, the seq_file is just a stub */
7756 	if (m)
7757 		kfree(m->private);
7758 	kfree(m);
7759 
7760 	return 0;
7761 }
7762 
7763 static int tracing_buffers_open(struct inode *inode, struct file *filp);
7764 static ssize_t tracing_buffers_read(struct file *filp, char __user *ubuf,
7765 				    size_t count, loff_t *ppos);
7766 static int tracing_buffers_release(struct inode *inode, struct file *file);
7767 static ssize_t tracing_buffers_splice_read(struct file *file, loff_t *ppos,
7768 		   struct pipe_inode_info *pipe, size_t len, unsigned int flags);
7769 
7770 static int snapshot_raw_open(struct inode *inode, struct file *filp)
7771 {
7772 	struct ftrace_buffer_info *info;
7773 	int ret;
7774 
7775 	/* The following checks for tracefs lockdown */
7776 	ret = tracing_buffers_open(inode, filp);
7777 	if (ret < 0)
7778 		return ret;
7779 
7780 	info = filp->private_data;
7781 
7782 	if (info->iter.trace->use_max_tr) {
7783 		tracing_buffers_release(inode, filp);
7784 		return -EBUSY;
7785 	}
7786 
7787 	info->iter.snapshot = true;
7788 	info->iter.array_buffer = &info->iter.tr->max_buffer;
7789 
7790 	return ret;
7791 }
7792 
7793 #endif /* CONFIG_TRACER_SNAPSHOT */
7794 
7795 
7796 static const struct file_operations tracing_thresh_fops = {
7797 	.open		= tracing_open_generic,
7798 	.read		= tracing_thresh_read,
7799 	.write		= tracing_thresh_write,
7800 	.llseek		= generic_file_llseek,
7801 };
7802 
7803 #ifdef CONFIG_TRACER_MAX_TRACE
7804 static const struct file_operations tracing_max_lat_fops = {
7805 	.open		= tracing_open_generic_tr,
7806 	.read		= tracing_max_lat_read,
7807 	.write		= tracing_max_lat_write,
7808 	.llseek		= generic_file_llseek,
7809 	.release	= tracing_release_generic_tr,
7810 };
7811 #endif
7812 
7813 static const struct file_operations set_tracer_fops = {
7814 	.open		= tracing_open_generic_tr,
7815 	.read		= tracing_set_trace_read,
7816 	.write		= tracing_set_trace_write,
7817 	.llseek		= generic_file_llseek,
7818 	.release	= tracing_release_generic_tr,
7819 };
7820 
7821 static const struct file_operations tracing_pipe_fops = {
7822 	.open		= tracing_open_pipe,
7823 	.poll		= tracing_poll_pipe,
7824 	.read		= tracing_read_pipe,
7825 	.splice_read	= tracing_splice_read_pipe,
7826 	.release	= tracing_release_pipe,
7827 	.llseek		= no_llseek,
7828 };
7829 
7830 static const struct file_operations tracing_entries_fops = {
7831 	.open		= tracing_open_generic_tr,
7832 	.read		= tracing_entries_read,
7833 	.write		= tracing_entries_write,
7834 	.llseek		= generic_file_llseek,
7835 	.release	= tracing_release_generic_tr,
7836 };
7837 
7838 static const struct file_operations tracing_total_entries_fops = {
7839 	.open		= tracing_open_generic_tr,
7840 	.read		= tracing_total_entries_read,
7841 	.llseek		= generic_file_llseek,
7842 	.release	= tracing_release_generic_tr,
7843 };
7844 
7845 static const struct file_operations tracing_free_buffer_fops = {
7846 	.open		= tracing_open_generic_tr,
7847 	.write		= tracing_free_buffer_write,
7848 	.release	= tracing_free_buffer_release,
7849 };
7850 
7851 static const struct file_operations tracing_mark_fops = {
7852 	.open		= tracing_mark_open,
7853 	.write		= tracing_mark_write,
7854 	.release	= tracing_release_generic_tr,
7855 };
7856 
7857 static const struct file_operations tracing_mark_raw_fops = {
7858 	.open		= tracing_mark_open,
7859 	.write		= tracing_mark_raw_write,
7860 	.release	= tracing_release_generic_tr,
7861 };
7862 
7863 static const struct file_operations trace_clock_fops = {
7864 	.open		= tracing_clock_open,
7865 	.read		= seq_read,
7866 	.llseek		= seq_lseek,
7867 	.release	= tracing_single_release_tr,
7868 	.write		= tracing_clock_write,
7869 };
7870 
7871 static const struct file_operations trace_time_stamp_mode_fops = {
7872 	.open		= tracing_time_stamp_mode_open,
7873 	.read		= seq_read,
7874 	.llseek		= seq_lseek,
7875 	.release	= tracing_single_release_tr,
7876 };
7877 
7878 #ifdef CONFIG_TRACER_SNAPSHOT
7879 static const struct file_operations snapshot_fops = {
7880 	.open		= tracing_snapshot_open,
7881 	.read		= seq_read,
7882 	.write		= tracing_snapshot_write,
7883 	.llseek		= tracing_lseek,
7884 	.release	= tracing_snapshot_release,
7885 };
7886 
7887 static const struct file_operations snapshot_raw_fops = {
7888 	.open		= snapshot_raw_open,
7889 	.read		= tracing_buffers_read,
7890 	.release	= tracing_buffers_release,
7891 	.splice_read	= tracing_buffers_splice_read,
7892 	.llseek		= no_llseek,
7893 };
7894 
7895 #endif /* CONFIG_TRACER_SNAPSHOT */
7896 
7897 /*
7898  * trace_min_max_write - Write a u64 value to a trace_min_max_param struct
7899  * @filp: The active open file structure
7900  * @ubuf: The userspace provided buffer to read value into
7901  * @cnt: The maximum number of bytes to read
7902  * @ppos: The current "file" position
7903  *
7904  * This function implements the write interface for a struct trace_min_max_param.
7905  * The filp->private_data must point to a trace_min_max_param structure that
7906  * defines where to write the value, the min and the max acceptable values,
7907  * and a lock to protect the write.
7908  */
7909 static ssize_t
7910 trace_min_max_write(struct file *filp, const char __user *ubuf, size_t cnt, loff_t *ppos)
7911 {
7912 	struct trace_min_max_param *param = filp->private_data;
7913 	u64 val;
7914 	int err;
7915 
7916 	if (!param)
7917 		return -EFAULT;
7918 
7919 	err = kstrtoull_from_user(ubuf, cnt, 10, &val);
7920 	if (err)
7921 		return err;
7922 
7923 	if (param->lock)
7924 		mutex_lock(param->lock);
7925 
7926 	if (param->min && val < *param->min)
7927 		err = -EINVAL;
7928 
7929 	if (param->max && val > *param->max)
7930 		err = -EINVAL;
7931 
7932 	if (!err)
7933 		*param->val = val;
7934 
7935 	if (param->lock)
7936 		mutex_unlock(param->lock);
7937 
7938 	if (err)
7939 		return err;
7940 
7941 	return cnt;
7942 }
7943 
7944 /*
7945  * trace_min_max_read - Read a u64 value from a trace_min_max_param struct
7946  * @filp: The active open file structure
7947  * @ubuf: The userspace provided buffer to read value into
7948  * @cnt: The maximum number of bytes to read
7949  * @ppos: The current "file" position
7950  *
7951  * This function implements the read interface for a struct trace_min_max_param.
7952  * The filp->private_data must point to a trace_min_max_param struct with valid
7953  * data.
7954  */
7955 static ssize_t
7956 trace_min_max_read(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos)
7957 {
7958 	struct trace_min_max_param *param = filp->private_data;
7959 	char buf[U64_STR_SIZE];
7960 	int len;
7961 	u64 val;
7962 
7963 	if (!param)
7964 		return -EFAULT;
7965 
7966 	val = *param->val;
7967 
7968 	if (cnt > sizeof(buf))
7969 		cnt = sizeof(buf);
7970 
7971 	len = snprintf(buf, sizeof(buf), "%llu\n", val);
7972 
7973 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, len);
7974 }
7975 
7976 const struct file_operations trace_min_max_fops = {
7977 	.open		= tracing_open_generic,
7978 	.read		= trace_min_max_read,
7979 	.write		= trace_min_max_write,
7980 };
7981 
7982 #define TRACING_LOG_ERRS_MAX	8
7983 #define TRACING_LOG_LOC_MAX	128
7984 
7985 #define CMD_PREFIX "  Command: "
7986 
7987 struct err_info {
7988 	const char	**errs;	/* ptr to loc-specific array of err strings */
7989 	u8		type;	/* index into errs -> specific err string */
7990 	u16		pos;	/* caret position */
7991 	u64		ts;
7992 };
7993 
7994 struct tracing_log_err {
7995 	struct list_head	list;
7996 	struct err_info		info;
7997 	char			loc[TRACING_LOG_LOC_MAX]; /* err location */
7998 	char			*cmd;                     /* what caused err */
7999 };
8000 
8001 static DEFINE_MUTEX(tracing_err_log_lock);
8002 
8003 static struct tracing_log_err *alloc_tracing_log_err(int len)
8004 {
8005 	struct tracing_log_err *err;
8006 
8007 	err = kzalloc(sizeof(*err), GFP_KERNEL);
8008 	if (!err)
8009 		return ERR_PTR(-ENOMEM);
8010 
8011 	err->cmd = kzalloc(len, GFP_KERNEL);
8012 	if (!err->cmd) {
8013 		kfree(err);
8014 		return ERR_PTR(-ENOMEM);
8015 	}
8016 
8017 	return err;
8018 }
8019 
8020 static void free_tracing_log_err(struct tracing_log_err *err)
8021 {
8022 	kfree(err->cmd);
8023 	kfree(err);
8024 }
8025 
8026 static struct tracing_log_err *get_tracing_log_err(struct trace_array *tr,
8027 						   int len)
8028 {
8029 	struct tracing_log_err *err;
8030 	char *cmd;
8031 
8032 	if (tr->n_err_log_entries < TRACING_LOG_ERRS_MAX) {
8033 		err = alloc_tracing_log_err(len);
8034 		if (PTR_ERR(err) != -ENOMEM)
8035 			tr->n_err_log_entries++;
8036 
8037 		return err;
8038 	}
8039 	cmd = kzalloc(len, GFP_KERNEL);
8040 	if (!cmd)
8041 		return ERR_PTR(-ENOMEM);
8042 	err = list_first_entry(&tr->err_log, struct tracing_log_err, list);
8043 	kfree(err->cmd);
8044 	err->cmd = cmd;
8045 	list_del(&err->list);
8046 
8047 	return err;
8048 }
8049 
8050 /**
8051  * err_pos - find the position of a string within a command for error careting
8052  * @cmd: The tracing command that caused the error
8053  * @str: The string to position the caret at within @cmd
8054  *
8055  * Finds the position of the first occurrence of @str within @cmd.  The
8056  * return value can be passed to tracing_log_err() for caret placement
8057  * within @cmd.
8058  *
8059  * Returns the index within @cmd of the first occurrence of @str or 0
8060  * if @str was not found.
8061  */
8062 unsigned int err_pos(char *cmd, const char *str)
8063 {
8064 	char *found;
8065 
8066 	if (WARN_ON(!strlen(cmd)))
8067 		return 0;
8068 
8069 	found = strstr(cmd, str);
8070 	if (found)
8071 		return found - cmd;
8072 
8073 	return 0;
8074 }
8075 
8076 /**
8077  * tracing_log_err - write an error to the tracing error log
8078  * @tr: The associated trace array for the error (NULL for top level array)
8079  * @loc: A string describing where the error occurred
8080  * @cmd: The tracing command that caused the error
8081  * @errs: The array of loc-specific static error strings
8082  * @type: The index into errs[], which produces the specific static err string
8083  * @pos: The position the caret should be placed in the cmd
8084  *
8085  * Writes an error into tracing/error_log of the form:
8086  *
8087  * <loc>: error: <text>
8088  *   Command: <cmd>
8089  *              ^
8090  *
8091  * tracing/error_log is a small log file containing the last
8092  * TRACING_LOG_ERRS_MAX errors (8).  Memory for errors isn't allocated
8093  * unless there has been a tracing error, and the error log can be
8094  * cleared and have its memory freed by writing the empty string in
8095  * truncation mode to it i.e. echo > tracing/error_log.
8096  *
8097  * NOTE: the @errs array along with the @type param are used to
8098  * produce a static error string - this string is not copied and saved
8099  * when the error is logged - only a pointer to it is saved.  See
8100  * existing callers for examples of how static strings are typically
8101  * defined for use with tracing_log_err().
8102  */
8103 void tracing_log_err(struct trace_array *tr,
8104 		     const char *loc, const char *cmd,
8105 		     const char **errs, u8 type, u16 pos)
8106 {
8107 	struct tracing_log_err *err;
8108 	int len = 0;
8109 
8110 	if (!tr)
8111 		tr = &global_trace;
8112 
8113 	len += sizeof(CMD_PREFIX) + 2 * sizeof("\n") + strlen(cmd) + 1;
8114 
8115 	mutex_lock(&tracing_err_log_lock);
8116 	err = get_tracing_log_err(tr, len);
8117 	if (PTR_ERR(err) == -ENOMEM) {
8118 		mutex_unlock(&tracing_err_log_lock);
8119 		return;
8120 	}
8121 
8122 	snprintf(err->loc, TRACING_LOG_LOC_MAX, "%s: error: ", loc);
8123 	snprintf(err->cmd, len, "\n" CMD_PREFIX "%s\n", cmd);
8124 
8125 	err->info.errs = errs;
8126 	err->info.type = type;
8127 	err->info.pos = pos;
8128 	err->info.ts = local_clock();
8129 
8130 	list_add_tail(&err->list, &tr->err_log);
8131 	mutex_unlock(&tracing_err_log_lock);
8132 }
8133 
8134 static void clear_tracing_err_log(struct trace_array *tr)
8135 {
8136 	struct tracing_log_err *err, *next;
8137 
8138 	mutex_lock(&tracing_err_log_lock);
8139 	list_for_each_entry_safe(err, next, &tr->err_log, list) {
8140 		list_del(&err->list);
8141 		free_tracing_log_err(err);
8142 	}
8143 
8144 	tr->n_err_log_entries = 0;
8145 	mutex_unlock(&tracing_err_log_lock);
8146 }
8147 
8148 static void *tracing_err_log_seq_start(struct seq_file *m, loff_t *pos)
8149 {
8150 	struct trace_array *tr = m->private;
8151 
8152 	mutex_lock(&tracing_err_log_lock);
8153 
8154 	return seq_list_start(&tr->err_log, *pos);
8155 }
8156 
8157 static void *tracing_err_log_seq_next(struct seq_file *m, void *v, loff_t *pos)
8158 {
8159 	struct trace_array *tr = m->private;
8160 
8161 	return seq_list_next(v, &tr->err_log, pos);
8162 }
8163 
8164 static void tracing_err_log_seq_stop(struct seq_file *m, void *v)
8165 {
8166 	mutex_unlock(&tracing_err_log_lock);
8167 }
8168 
8169 static void tracing_err_log_show_pos(struct seq_file *m, u16 pos)
8170 {
8171 	u16 i;
8172 
8173 	for (i = 0; i < sizeof(CMD_PREFIX) - 1; i++)
8174 		seq_putc(m, ' ');
8175 	for (i = 0; i < pos; i++)
8176 		seq_putc(m, ' ');
8177 	seq_puts(m, "^\n");
8178 }
8179 
8180 static int tracing_err_log_seq_show(struct seq_file *m, void *v)
8181 {
8182 	struct tracing_log_err *err = v;
8183 
8184 	if (err) {
8185 		const char *err_text = err->info.errs[err->info.type];
8186 		u64 sec = err->info.ts;
8187 		u32 nsec;
8188 
8189 		nsec = do_div(sec, NSEC_PER_SEC);
8190 		seq_printf(m, "[%5llu.%06u] %s%s", sec, nsec / 1000,
8191 			   err->loc, err_text);
8192 		seq_printf(m, "%s", err->cmd);
8193 		tracing_err_log_show_pos(m, err->info.pos);
8194 	}
8195 
8196 	return 0;
8197 }
8198 
8199 static const struct seq_operations tracing_err_log_seq_ops = {
8200 	.start  = tracing_err_log_seq_start,
8201 	.next   = tracing_err_log_seq_next,
8202 	.stop   = tracing_err_log_seq_stop,
8203 	.show   = tracing_err_log_seq_show
8204 };
8205 
8206 static int tracing_err_log_open(struct inode *inode, struct file *file)
8207 {
8208 	struct trace_array *tr = inode->i_private;
8209 	int ret = 0;
8210 
8211 	ret = tracing_check_open_get_tr(tr);
8212 	if (ret)
8213 		return ret;
8214 
8215 	/* If this file was opened for write, then erase contents */
8216 	if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC))
8217 		clear_tracing_err_log(tr);
8218 
8219 	if (file->f_mode & FMODE_READ) {
8220 		ret = seq_open(file, &tracing_err_log_seq_ops);
8221 		if (!ret) {
8222 			struct seq_file *m = file->private_data;
8223 			m->private = tr;
8224 		} else {
8225 			trace_array_put(tr);
8226 		}
8227 	}
8228 	return ret;
8229 }
8230 
8231 static ssize_t tracing_err_log_write(struct file *file,
8232 				     const char __user *buffer,
8233 				     size_t count, loff_t *ppos)
8234 {
8235 	return count;
8236 }
8237 
8238 static int tracing_err_log_release(struct inode *inode, struct file *file)
8239 {
8240 	struct trace_array *tr = inode->i_private;
8241 
8242 	trace_array_put(tr);
8243 
8244 	if (file->f_mode & FMODE_READ)
8245 		seq_release(inode, file);
8246 
8247 	return 0;
8248 }
8249 
8250 static const struct file_operations tracing_err_log_fops = {
8251 	.open           = tracing_err_log_open,
8252 	.write		= tracing_err_log_write,
8253 	.read           = seq_read,
8254 	.llseek         = tracing_lseek,
8255 	.release        = tracing_err_log_release,
8256 };
8257 
8258 static int tracing_buffers_open(struct inode *inode, struct file *filp)
8259 {
8260 	struct trace_array *tr = inode->i_private;
8261 	struct ftrace_buffer_info *info;
8262 	int ret;
8263 
8264 	ret = tracing_check_open_get_tr(tr);
8265 	if (ret)
8266 		return ret;
8267 
8268 	info = kvzalloc(sizeof(*info), GFP_KERNEL);
8269 	if (!info) {
8270 		trace_array_put(tr);
8271 		return -ENOMEM;
8272 	}
8273 
8274 	mutex_lock(&trace_types_lock);
8275 
8276 	info->iter.tr		= tr;
8277 	info->iter.cpu_file	= tracing_get_cpu(inode);
8278 	info->iter.trace	= tr->current_trace;
8279 	info->iter.array_buffer = &tr->array_buffer;
8280 	info->spare		= NULL;
8281 	/* Force reading ring buffer for first read */
8282 	info->read		= (unsigned int)-1;
8283 
8284 	filp->private_data = info;
8285 
8286 	tr->trace_ref++;
8287 
8288 	mutex_unlock(&trace_types_lock);
8289 
8290 	ret = nonseekable_open(inode, filp);
8291 	if (ret < 0)
8292 		trace_array_put(tr);
8293 
8294 	return ret;
8295 }
8296 
8297 static __poll_t
8298 tracing_buffers_poll(struct file *filp, poll_table *poll_table)
8299 {
8300 	struct ftrace_buffer_info *info = filp->private_data;
8301 	struct trace_iterator *iter = &info->iter;
8302 
8303 	return trace_poll(iter, filp, poll_table);
8304 }
8305 
8306 static ssize_t
8307 tracing_buffers_read(struct file *filp, char __user *ubuf,
8308 		     size_t count, loff_t *ppos)
8309 {
8310 	struct ftrace_buffer_info *info = filp->private_data;
8311 	struct trace_iterator *iter = &info->iter;
8312 	void *trace_data;
8313 	int page_size;
8314 	ssize_t ret = 0;
8315 	ssize_t size;
8316 
8317 	if (!count)
8318 		return 0;
8319 
8320 #ifdef CONFIG_TRACER_MAX_TRACE
8321 	if (iter->snapshot && iter->tr->current_trace->use_max_tr)
8322 		return -EBUSY;
8323 #endif
8324 
8325 	page_size = ring_buffer_subbuf_size_get(iter->array_buffer->buffer);
8326 
8327 	/* Make sure the spare matches the current sub buffer size */
8328 	if (info->spare) {
8329 		if (page_size != info->spare_size) {
8330 			ring_buffer_free_read_page(iter->array_buffer->buffer,
8331 						   info->spare_cpu, info->spare);
8332 			info->spare = NULL;
8333 		}
8334 	}
8335 
8336 	if (!info->spare) {
8337 		info->spare = ring_buffer_alloc_read_page(iter->array_buffer->buffer,
8338 							  iter->cpu_file);
8339 		if (IS_ERR(info->spare)) {
8340 			ret = PTR_ERR(info->spare);
8341 			info->spare = NULL;
8342 		} else {
8343 			info->spare_cpu = iter->cpu_file;
8344 			info->spare_size = page_size;
8345 		}
8346 	}
8347 	if (!info->spare)
8348 		return ret;
8349 
8350 	/* Do we have previous read data to read? */
8351 	if (info->read < page_size)
8352 		goto read;
8353 
8354  again:
8355 	trace_access_lock(iter->cpu_file);
8356 	ret = ring_buffer_read_page(iter->array_buffer->buffer,
8357 				    info->spare,
8358 				    count,
8359 				    iter->cpu_file, 0);
8360 	trace_access_unlock(iter->cpu_file);
8361 
8362 	if (ret < 0) {
8363 		if (trace_empty(iter)) {
8364 			if ((filp->f_flags & O_NONBLOCK))
8365 				return -EAGAIN;
8366 
8367 			ret = wait_on_pipe(iter, 0);
8368 			if (ret)
8369 				return ret;
8370 
8371 			goto again;
8372 		}
8373 		return 0;
8374 	}
8375 
8376 	info->read = 0;
8377  read:
8378 	size = page_size - info->read;
8379 	if (size > count)
8380 		size = count;
8381 	trace_data = ring_buffer_read_page_data(info->spare);
8382 	ret = copy_to_user(ubuf, trace_data + info->read, size);
8383 	if (ret == size)
8384 		return -EFAULT;
8385 
8386 	size -= ret;
8387 
8388 	*ppos += size;
8389 	info->read += size;
8390 
8391 	return size;
8392 }
8393 
8394 static int tracing_buffers_release(struct inode *inode, struct file *file)
8395 {
8396 	struct ftrace_buffer_info *info = file->private_data;
8397 	struct trace_iterator *iter = &info->iter;
8398 
8399 	mutex_lock(&trace_types_lock);
8400 
8401 	iter->tr->trace_ref--;
8402 
8403 	__trace_array_put(iter->tr);
8404 
8405 	iter->wait_index++;
8406 	/* Make sure the waiters see the new wait_index */
8407 	smp_wmb();
8408 
8409 	ring_buffer_wake_waiters(iter->array_buffer->buffer, iter->cpu_file);
8410 
8411 	if (info->spare)
8412 		ring_buffer_free_read_page(iter->array_buffer->buffer,
8413 					   info->spare_cpu, info->spare);
8414 	kvfree(info);
8415 
8416 	mutex_unlock(&trace_types_lock);
8417 
8418 	return 0;
8419 }
8420 
8421 struct buffer_ref {
8422 	struct trace_buffer	*buffer;
8423 	void			*page;
8424 	int			cpu;
8425 	refcount_t		refcount;
8426 };
8427 
8428 static void buffer_ref_release(struct buffer_ref *ref)
8429 {
8430 	if (!refcount_dec_and_test(&ref->refcount))
8431 		return;
8432 	ring_buffer_free_read_page(ref->buffer, ref->cpu, ref->page);
8433 	kfree(ref);
8434 }
8435 
8436 static void buffer_pipe_buf_release(struct pipe_inode_info *pipe,
8437 				    struct pipe_buffer *buf)
8438 {
8439 	struct buffer_ref *ref = (struct buffer_ref *)buf->private;
8440 
8441 	buffer_ref_release(ref);
8442 	buf->private = 0;
8443 }
8444 
8445 static bool buffer_pipe_buf_get(struct pipe_inode_info *pipe,
8446 				struct pipe_buffer *buf)
8447 {
8448 	struct buffer_ref *ref = (struct buffer_ref *)buf->private;
8449 
8450 	if (refcount_read(&ref->refcount) > INT_MAX/2)
8451 		return false;
8452 
8453 	refcount_inc(&ref->refcount);
8454 	return true;
8455 }
8456 
8457 /* Pipe buffer operations for a buffer. */
8458 static const struct pipe_buf_operations buffer_pipe_buf_ops = {
8459 	.release		= buffer_pipe_buf_release,
8460 	.get			= buffer_pipe_buf_get,
8461 };
8462 
8463 /*
8464  * Callback from splice_to_pipe(), if we need to release some pages
8465  * at the end of the spd in case we error'ed out in filling the pipe.
8466  */
8467 static void buffer_spd_release(struct splice_pipe_desc *spd, unsigned int i)
8468 {
8469 	struct buffer_ref *ref =
8470 		(struct buffer_ref *)spd->partial[i].private;
8471 
8472 	buffer_ref_release(ref);
8473 	spd->partial[i].private = 0;
8474 }
8475 
8476 static ssize_t
8477 tracing_buffers_splice_read(struct file *file, loff_t *ppos,
8478 			    struct pipe_inode_info *pipe, size_t len,
8479 			    unsigned int flags)
8480 {
8481 	struct ftrace_buffer_info *info = file->private_data;
8482 	struct trace_iterator *iter = &info->iter;
8483 	struct partial_page partial_def[PIPE_DEF_BUFFERS];
8484 	struct page *pages_def[PIPE_DEF_BUFFERS];
8485 	struct splice_pipe_desc spd = {
8486 		.pages		= pages_def,
8487 		.partial	= partial_def,
8488 		.nr_pages_max	= PIPE_DEF_BUFFERS,
8489 		.ops		= &buffer_pipe_buf_ops,
8490 		.spd_release	= buffer_spd_release,
8491 	};
8492 	struct buffer_ref *ref;
8493 	int page_size;
8494 	int entries, i;
8495 	ssize_t ret = 0;
8496 
8497 #ifdef CONFIG_TRACER_MAX_TRACE
8498 	if (iter->snapshot && iter->tr->current_trace->use_max_tr)
8499 		return -EBUSY;
8500 #endif
8501 
8502 	page_size = ring_buffer_subbuf_size_get(iter->array_buffer->buffer);
8503 	if (*ppos & (page_size - 1))
8504 		return -EINVAL;
8505 
8506 	if (len & (page_size - 1)) {
8507 		if (len < page_size)
8508 			return -EINVAL;
8509 		len &= (~(page_size - 1));
8510 	}
8511 
8512 	if (splice_grow_spd(pipe, &spd))
8513 		return -ENOMEM;
8514 
8515  again:
8516 	trace_access_lock(iter->cpu_file);
8517 	entries = ring_buffer_entries_cpu(iter->array_buffer->buffer, iter->cpu_file);
8518 
8519 	for (i = 0; i < spd.nr_pages_max && len && entries; i++, len -= page_size) {
8520 		struct page *page;
8521 		int r;
8522 
8523 		ref = kzalloc(sizeof(*ref), GFP_KERNEL);
8524 		if (!ref) {
8525 			ret = -ENOMEM;
8526 			break;
8527 		}
8528 
8529 		refcount_set(&ref->refcount, 1);
8530 		ref->buffer = iter->array_buffer->buffer;
8531 		ref->page = ring_buffer_alloc_read_page(ref->buffer, iter->cpu_file);
8532 		if (IS_ERR(ref->page)) {
8533 			ret = PTR_ERR(ref->page);
8534 			ref->page = NULL;
8535 			kfree(ref);
8536 			break;
8537 		}
8538 		ref->cpu = iter->cpu_file;
8539 
8540 		r = ring_buffer_read_page(ref->buffer, ref->page,
8541 					  len, iter->cpu_file, 1);
8542 		if (r < 0) {
8543 			ring_buffer_free_read_page(ref->buffer, ref->cpu,
8544 						   ref->page);
8545 			kfree(ref);
8546 			break;
8547 		}
8548 
8549 		page = virt_to_page(ring_buffer_read_page_data(ref->page));
8550 
8551 		spd.pages[i] = page;
8552 		spd.partial[i].len = page_size;
8553 		spd.partial[i].offset = 0;
8554 		spd.partial[i].private = (unsigned long)ref;
8555 		spd.nr_pages++;
8556 		*ppos += page_size;
8557 
8558 		entries = ring_buffer_entries_cpu(iter->array_buffer->buffer, iter->cpu_file);
8559 	}
8560 
8561 	trace_access_unlock(iter->cpu_file);
8562 	spd.nr_pages = i;
8563 
8564 	/* did we read anything? */
8565 	if (!spd.nr_pages) {
8566 		long wait_index;
8567 
8568 		if (ret)
8569 			goto out;
8570 
8571 		ret = -EAGAIN;
8572 		if ((file->f_flags & O_NONBLOCK) || (flags & SPLICE_F_NONBLOCK))
8573 			goto out;
8574 
8575 		wait_index = READ_ONCE(iter->wait_index);
8576 
8577 		ret = wait_on_pipe(iter, iter->snapshot ? 0 : iter->tr->buffer_percent);
8578 		if (ret)
8579 			goto out;
8580 
8581 		/* No need to wait after waking up when tracing is off */
8582 		if (!tracer_tracing_is_on(iter->tr))
8583 			goto out;
8584 
8585 		/* Make sure we see the new wait_index */
8586 		smp_rmb();
8587 		if (wait_index != iter->wait_index)
8588 			goto out;
8589 
8590 		goto again;
8591 	}
8592 
8593 	ret = splice_to_pipe(pipe, &spd);
8594 out:
8595 	splice_shrink_spd(&spd);
8596 
8597 	return ret;
8598 }
8599 
8600 /* An ioctl call with cmd 0 to the ring buffer file will wake up all waiters */
8601 static long tracing_buffers_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
8602 {
8603 	struct ftrace_buffer_info *info = file->private_data;
8604 	struct trace_iterator *iter = &info->iter;
8605 
8606 	if (cmd)
8607 		return -ENOIOCTLCMD;
8608 
8609 	mutex_lock(&trace_types_lock);
8610 
8611 	iter->wait_index++;
8612 	/* Make sure the waiters see the new wait_index */
8613 	smp_wmb();
8614 
8615 	ring_buffer_wake_waiters(iter->array_buffer->buffer, iter->cpu_file);
8616 
8617 	mutex_unlock(&trace_types_lock);
8618 	return 0;
8619 }
8620 
8621 static const struct file_operations tracing_buffers_fops = {
8622 	.open		= tracing_buffers_open,
8623 	.read		= tracing_buffers_read,
8624 	.poll		= tracing_buffers_poll,
8625 	.release	= tracing_buffers_release,
8626 	.splice_read	= tracing_buffers_splice_read,
8627 	.unlocked_ioctl = tracing_buffers_ioctl,
8628 	.llseek		= no_llseek,
8629 };
8630 
8631 static ssize_t
8632 tracing_stats_read(struct file *filp, char __user *ubuf,
8633 		   size_t count, loff_t *ppos)
8634 {
8635 	struct inode *inode = file_inode(filp);
8636 	struct trace_array *tr = inode->i_private;
8637 	struct array_buffer *trace_buf = &tr->array_buffer;
8638 	int cpu = tracing_get_cpu(inode);
8639 	struct trace_seq *s;
8640 	unsigned long cnt;
8641 	unsigned long long t;
8642 	unsigned long usec_rem;
8643 
8644 	s = kmalloc(sizeof(*s), GFP_KERNEL);
8645 	if (!s)
8646 		return -ENOMEM;
8647 
8648 	trace_seq_init(s);
8649 
8650 	cnt = ring_buffer_entries_cpu(trace_buf->buffer, cpu);
8651 	trace_seq_printf(s, "entries: %ld\n", cnt);
8652 
8653 	cnt = ring_buffer_overrun_cpu(trace_buf->buffer, cpu);
8654 	trace_seq_printf(s, "overrun: %ld\n", cnt);
8655 
8656 	cnt = ring_buffer_commit_overrun_cpu(trace_buf->buffer, cpu);
8657 	trace_seq_printf(s, "commit overrun: %ld\n", cnt);
8658 
8659 	cnt = ring_buffer_bytes_cpu(trace_buf->buffer, cpu);
8660 	trace_seq_printf(s, "bytes: %ld\n", cnt);
8661 
8662 	if (trace_clocks[tr->clock_id].in_ns) {
8663 		/* local or global for trace_clock */
8664 		t = ns2usecs(ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
8665 		usec_rem = do_div(t, USEC_PER_SEC);
8666 		trace_seq_printf(s, "oldest event ts: %5llu.%06lu\n",
8667 								t, usec_rem);
8668 
8669 		t = ns2usecs(ring_buffer_time_stamp(trace_buf->buffer));
8670 		usec_rem = do_div(t, USEC_PER_SEC);
8671 		trace_seq_printf(s, "now ts: %5llu.%06lu\n", t, usec_rem);
8672 	} else {
8673 		/* counter or tsc mode for trace_clock */
8674 		trace_seq_printf(s, "oldest event ts: %llu\n",
8675 				ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
8676 
8677 		trace_seq_printf(s, "now ts: %llu\n",
8678 				ring_buffer_time_stamp(trace_buf->buffer));
8679 	}
8680 
8681 	cnt = ring_buffer_dropped_events_cpu(trace_buf->buffer, cpu);
8682 	trace_seq_printf(s, "dropped events: %ld\n", cnt);
8683 
8684 	cnt = ring_buffer_read_events_cpu(trace_buf->buffer, cpu);
8685 	trace_seq_printf(s, "read events: %ld\n", cnt);
8686 
8687 	count = simple_read_from_buffer(ubuf, count, ppos,
8688 					s->buffer, trace_seq_used(s));
8689 
8690 	kfree(s);
8691 
8692 	return count;
8693 }
8694 
8695 static const struct file_operations tracing_stats_fops = {
8696 	.open		= tracing_open_generic_tr,
8697 	.read		= tracing_stats_read,
8698 	.llseek		= generic_file_llseek,
8699 	.release	= tracing_release_generic_tr,
8700 };
8701 
8702 #ifdef CONFIG_DYNAMIC_FTRACE
8703 
8704 static ssize_t
8705 tracing_read_dyn_info(struct file *filp, char __user *ubuf,
8706 		  size_t cnt, loff_t *ppos)
8707 {
8708 	ssize_t ret;
8709 	char *buf;
8710 	int r;
8711 
8712 	/* 256 should be plenty to hold the amount needed */
8713 	buf = kmalloc(256, GFP_KERNEL);
8714 	if (!buf)
8715 		return -ENOMEM;
8716 
8717 	r = scnprintf(buf, 256, "%ld pages:%ld groups: %ld\n",
8718 		      ftrace_update_tot_cnt,
8719 		      ftrace_number_of_pages,
8720 		      ftrace_number_of_groups);
8721 
8722 	ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
8723 	kfree(buf);
8724 	return ret;
8725 }
8726 
8727 static const struct file_operations tracing_dyn_info_fops = {
8728 	.open		= tracing_open_generic,
8729 	.read		= tracing_read_dyn_info,
8730 	.llseek		= generic_file_llseek,
8731 };
8732 #endif /* CONFIG_DYNAMIC_FTRACE */
8733 
8734 #if defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE)
8735 static void
8736 ftrace_snapshot(unsigned long ip, unsigned long parent_ip,
8737 		struct trace_array *tr, struct ftrace_probe_ops *ops,
8738 		void *data)
8739 {
8740 	tracing_snapshot_instance(tr);
8741 }
8742 
8743 static void
8744 ftrace_count_snapshot(unsigned long ip, unsigned long parent_ip,
8745 		      struct trace_array *tr, struct ftrace_probe_ops *ops,
8746 		      void *data)
8747 {
8748 	struct ftrace_func_mapper *mapper = data;
8749 	long *count = NULL;
8750 
8751 	if (mapper)
8752 		count = (long *)ftrace_func_mapper_find_ip(mapper, ip);
8753 
8754 	if (count) {
8755 
8756 		if (*count <= 0)
8757 			return;
8758 
8759 		(*count)--;
8760 	}
8761 
8762 	tracing_snapshot_instance(tr);
8763 }
8764 
8765 static int
8766 ftrace_snapshot_print(struct seq_file *m, unsigned long ip,
8767 		      struct ftrace_probe_ops *ops, void *data)
8768 {
8769 	struct ftrace_func_mapper *mapper = data;
8770 	long *count = NULL;
8771 
8772 	seq_printf(m, "%ps:", (void *)ip);
8773 
8774 	seq_puts(m, "snapshot");
8775 
8776 	if (mapper)
8777 		count = (long *)ftrace_func_mapper_find_ip(mapper, ip);
8778 
8779 	if (count)
8780 		seq_printf(m, ":count=%ld\n", *count);
8781 	else
8782 		seq_puts(m, ":unlimited\n");
8783 
8784 	return 0;
8785 }
8786 
8787 static int
8788 ftrace_snapshot_init(struct ftrace_probe_ops *ops, struct trace_array *tr,
8789 		     unsigned long ip, void *init_data, void **data)
8790 {
8791 	struct ftrace_func_mapper *mapper = *data;
8792 
8793 	if (!mapper) {
8794 		mapper = allocate_ftrace_func_mapper();
8795 		if (!mapper)
8796 			return -ENOMEM;
8797 		*data = mapper;
8798 	}
8799 
8800 	return ftrace_func_mapper_add_ip(mapper, ip, init_data);
8801 }
8802 
8803 static void
8804 ftrace_snapshot_free(struct ftrace_probe_ops *ops, struct trace_array *tr,
8805 		     unsigned long ip, void *data)
8806 {
8807 	struct ftrace_func_mapper *mapper = data;
8808 
8809 	if (!ip) {
8810 		if (!mapper)
8811 			return;
8812 		free_ftrace_func_mapper(mapper, NULL);
8813 		return;
8814 	}
8815 
8816 	ftrace_func_mapper_remove_ip(mapper, ip);
8817 }
8818 
8819 static struct ftrace_probe_ops snapshot_probe_ops = {
8820 	.func			= ftrace_snapshot,
8821 	.print			= ftrace_snapshot_print,
8822 };
8823 
8824 static struct ftrace_probe_ops snapshot_count_probe_ops = {
8825 	.func			= ftrace_count_snapshot,
8826 	.print			= ftrace_snapshot_print,
8827 	.init			= ftrace_snapshot_init,
8828 	.free			= ftrace_snapshot_free,
8829 };
8830 
8831 static int
8832 ftrace_trace_snapshot_callback(struct trace_array *tr, struct ftrace_hash *hash,
8833 			       char *glob, char *cmd, char *param, int enable)
8834 {
8835 	struct ftrace_probe_ops *ops;
8836 	void *count = (void *)-1;
8837 	char *number;
8838 	int ret;
8839 
8840 	if (!tr)
8841 		return -ENODEV;
8842 
8843 	/* hash funcs only work with set_ftrace_filter */
8844 	if (!enable)
8845 		return -EINVAL;
8846 
8847 	ops = param ? &snapshot_count_probe_ops :  &snapshot_probe_ops;
8848 
8849 	if (glob[0] == '!')
8850 		return unregister_ftrace_function_probe_func(glob+1, tr, ops);
8851 
8852 	if (!param)
8853 		goto out_reg;
8854 
8855 	number = strsep(&param, ":");
8856 
8857 	if (!strlen(number))
8858 		goto out_reg;
8859 
8860 	/*
8861 	 * We use the callback data field (which is a pointer)
8862 	 * as our counter.
8863 	 */
8864 	ret = kstrtoul(number, 0, (unsigned long *)&count);
8865 	if (ret)
8866 		return ret;
8867 
8868  out_reg:
8869 	ret = tracing_alloc_snapshot_instance(tr);
8870 	if (ret < 0)
8871 		goto out;
8872 
8873 	ret = register_ftrace_function_probe(glob, tr, ops, count);
8874 
8875  out:
8876 	return ret < 0 ? ret : 0;
8877 }
8878 
8879 static struct ftrace_func_command ftrace_snapshot_cmd = {
8880 	.name			= "snapshot",
8881 	.func			= ftrace_trace_snapshot_callback,
8882 };
8883 
8884 static __init int register_snapshot_cmd(void)
8885 {
8886 	return register_ftrace_command(&ftrace_snapshot_cmd);
8887 }
8888 #else
8889 static inline __init int register_snapshot_cmd(void) { return 0; }
8890 #endif /* defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE) */
8891 
8892 static struct dentry *tracing_get_dentry(struct trace_array *tr)
8893 {
8894 	if (WARN_ON(!tr->dir))
8895 		return ERR_PTR(-ENODEV);
8896 
8897 	/* Top directory uses NULL as the parent */
8898 	if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
8899 		return NULL;
8900 
8901 	/* All sub buffers have a descriptor */
8902 	return tr->dir;
8903 }
8904 
8905 static struct dentry *tracing_dentry_percpu(struct trace_array *tr, int cpu)
8906 {
8907 	struct dentry *d_tracer;
8908 
8909 	if (tr->percpu_dir)
8910 		return tr->percpu_dir;
8911 
8912 	d_tracer = tracing_get_dentry(tr);
8913 	if (IS_ERR(d_tracer))
8914 		return NULL;
8915 
8916 	tr->percpu_dir = tracefs_create_dir("per_cpu", d_tracer);
8917 
8918 	MEM_FAIL(!tr->percpu_dir,
8919 		  "Could not create tracefs directory 'per_cpu/%d'\n", cpu);
8920 
8921 	return tr->percpu_dir;
8922 }
8923 
8924 static struct dentry *
8925 trace_create_cpu_file(const char *name, umode_t mode, struct dentry *parent,
8926 		      void *data, long cpu, const struct file_operations *fops)
8927 {
8928 	struct dentry *ret = trace_create_file(name, mode, parent, data, fops);
8929 
8930 	if (ret) /* See tracing_get_cpu() */
8931 		d_inode(ret)->i_cdev = (void *)(cpu + 1);
8932 	return ret;
8933 }
8934 
8935 static void
8936 tracing_init_tracefs_percpu(struct trace_array *tr, long cpu)
8937 {
8938 	struct dentry *d_percpu = tracing_dentry_percpu(tr, cpu);
8939 	struct dentry *d_cpu;
8940 	char cpu_dir[30]; /* 30 characters should be more than enough */
8941 
8942 	if (!d_percpu)
8943 		return;
8944 
8945 	snprintf(cpu_dir, 30, "cpu%ld", cpu);
8946 	d_cpu = tracefs_create_dir(cpu_dir, d_percpu);
8947 	if (!d_cpu) {
8948 		pr_warn("Could not create tracefs '%s' entry\n", cpu_dir);
8949 		return;
8950 	}
8951 
8952 	/* per cpu trace_pipe */
8953 	trace_create_cpu_file("trace_pipe", TRACE_MODE_READ, d_cpu,
8954 				tr, cpu, &tracing_pipe_fops);
8955 
8956 	/* per cpu trace */
8957 	trace_create_cpu_file("trace", TRACE_MODE_WRITE, d_cpu,
8958 				tr, cpu, &tracing_fops);
8959 
8960 	trace_create_cpu_file("trace_pipe_raw", TRACE_MODE_READ, d_cpu,
8961 				tr, cpu, &tracing_buffers_fops);
8962 
8963 	trace_create_cpu_file("stats", TRACE_MODE_READ, d_cpu,
8964 				tr, cpu, &tracing_stats_fops);
8965 
8966 	trace_create_cpu_file("buffer_size_kb", TRACE_MODE_READ, d_cpu,
8967 				tr, cpu, &tracing_entries_fops);
8968 
8969 #ifdef CONFIG_TRACER_SNAPSHOT
8970 	trace_create_cpu_file("snapshot", TRACE_MODE_WRITE, d_cpu,
8971 				tr, cpu, &snapshot_fops);
8972 
8973 	trace_create_cpu_file("snapshot_raw", TRACE_MODE_READ, d_cpu,
8974 				tr, cpu, &snapshot_raw_fops);
8975 #endif
8976 }
8977 
8978 #ifdef CONFIG_FTRACE_SELFTEST
8979 /* Let selftest have access to static functions in this file */
8980 #include "trace_selftest.c"
8981 #endif
8982 
8983 static ssize_t
8984 trace_options_read(struct file *filp, char __user *ubuf, size_t cnt,
8985 			loff_t *ppos)
8986 {
8987 	struct trace_option_dentry *topt = filp->private_data;
8988 	char *buf;
8989 
8990 	if (topt->flags->val & topt->opt->bit)
8991 		buf = "1\n";
8992 	else
8993 		buf = "0\n";
8994 
8995 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
8996 }
8997 
8998 static ssize_t
8999 trace_options_write(struct file *filp, const char __user *ubuf, size_t cnt,
9000 			 loff_t *ppos)
9001 {
9002 	struct trace_option_dentry *topt = filp->private_data;
9003 	unsigned long val;
9004 	int ret;
9005 
9006 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
9007 	if (ret)
9008 		return ret;
9009 
9010 	if (val != 0 && val != 1)
9011 		return -EINVAL;
9012 
9013 	if (!!(topt->flags->val & topt->opt->bit) != val) {
9014 		mutex_lock(&trace_types_lock);
9015 		ret = __set_tracer_option(topt->tr, topt->flags,
9016 					  topt->opt, !val);
9017 		mutex_unlock(&trace_types_lock);
9018 		if (ret)
9019 			return ret;
9020 	}
9021 
9022 	*ppos += cnt;
9023 
9024 	return cnt;
9025 }
9026 
9027 static int tracing_open_options(struct inode *inode, struct file *filp)
9028 {
9029 	struct trace_option_dentry *topt = inode->i_private;
9030 	int ret;
9031 
9032 	ret = tracing_check_open_get_tr(topt->tr);
9033 	if (ret)
9034 		return ret;
9035 
9036 	filp->private_data = inode->i_private;
9037 	return 0;
9038 }
9039 
9040 static int tracing_release_options(struct inode *inode, struct file *file)
9041 {
9042 	struct trace_option_dentry *topt = file->private_data;
9043 
9044 	trace_array_put(topt->tr);
9045 	return 0;
9046 }
9047 
9048 static const struct file_operations trace_options_fops = {
9049 	.open = tracing_open_options,
9050 	.read = trace_options_read,
9051 	.write = trace_options_write,
9052 	.llseek	= generic_file_llseek,
9053 	.release = tracing_release_options,
9054 };
9055 
9056 /*
9057  * In order to pass in both the trace_array descriptor as well as the index
9058  * to the flag that the trace option file represents, the trace_array
9059  * has a character array of trace_flags_index[], which holds the index
9060  * of the bit for the flag it represents. index[0] == 0, index[1] == 1, etc.
9061  * The address of this character array is passed to the flag option file
9062  * read/write callbacks.
9063  *
9064  * In order to extract both the index and the trace_array descriptor,
9065  * get_tr_index() uses the following algorithm.
9066  *
9067  *   idx = *ptr;
9068  *
9069  * As the pointer itself contains the address of the index (remember
9070  * index[1] == 1).
9071  *
9072  * Then to get the trace_array descriptor, by subtracting that index
9073  * from the ptr, we get to the start of the index itself.
9074  *
9075  *   ptr - idx == &index[0]
9076  *
9077  * Then a simple container_of() from that pointer gets us to the
9078  * trace_array descriptor.
9079  */
9080 static void get_tr_index(void *data, struct trace_array **ptr,
9081 			 unsigned int *pindex)
9082 {
9083 	*pindex = *(unsigned char *)data;
9084 
9085 	*ptr = container_of(data - *pindex, struct trace_array,
9086 			    trace_flags_index);
9087 }
9088 
9089 static ssize_t
9090 trace_options_core_read(struct file *filp, char __user *ubuf, size_t cnt,
9091 			loff_t *ppos)
9092 {
9093 	void *tr_index = filp->private_data;
9094 	struct trace_array *tr;
9095 	unsigned int index;
9096 	char *buf;
9097 
9098 	get_tr_index(tr_index, &tr, &index);
9099 
9100 	if (tr->trace_flags & (1 << index))
9101 		buf = "1\n";
9102 	else
9103 		buf = "0\n";
9104 
9105 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
9106 }
9107 
9108 static ssize_t
9109 trace_options_core_write(struct file *filp, const char __user *ubuf, size_t cnt,
9110 			 loff_t *ppos)
9111 {
9112 	void *tr_index = filp->private_data;
9113 	struct trace_array *tr;
9114 	unsigned int index;
9115 	unsigned long val;
9116 	int ret;
9117 
9118 	get_tr_index(tr_index, &tr, &index);
9119 
9120 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
9121 	if (ret)
9122 		return ret;
9123 
9124 	if (val != 0 && val != 1)
9125 		return -EINVAL;
9126 
9127 	mutex_lock(&event_mutex);
9128 	mutex_lock(&trace_types_lock);
9129 	ret = set_tracer_flag(tr, 1 << index, val);
9130 	mutex_unlock(&trace_types_lock);
9131 	mutex_unlock(&event_mutex);
9132 
9133 	if (ret < 0)
9134 		return ret;
9135 
9136 	*ppos += cnt;
9137 
9138 	return cnt;
9139 }
9140 
9141 static const struct file_operations trace_options_core_fops = {
9142 	.open = tracing_open_generic,
9143 	.read = trace_options_core_read,
9144 	.write = trace_options_core_write,
9145 	.llseek = generic_file_llseek,
9146 };
9147 
9148 struct dentry *trace_create_file(const char *name,
9149 				 umode_t mode,
9150 				 struct dentry *parent,
9151 				 void *data,
9152 				 const struct file_operations *fops)
9153 {
9154 	struct dentry *ret;
9155 
9156 	ret = tracefs_create_file(name, mode, parent, data, fops);
9157 	if (!ret)
9158 		pr_warn("Could not create tracefs '%s' entry\n", name);
9159 
9160 	return ret;
9161 }
9162 
9163 
9164 static struct dentry *trace_options_init_dentry(struct trace_array *tr)
9165 {
9166 	struct dentry *d_tracer;
9167 
9168 	if (tr->options)
9169 		return tr->options;
9170 
9171 	d_tracer = tracing_get_dentry(tr);
9172 	if (IS_ERR(d_tracer))
9173 		return NULL;
9174 
9175 	tr->options = tracefs_create_dir("options", d_tracer);
9176 	if (!tr->options) {
9177 		pr_warn("Could not create tracefs directory 'options'\n");
9178 		return NULL;
9179 	}
9180 
9181 	return tr->options;
9182 }
9183 
9184 static void
9185 create_trace_option_file(struct trace_array *tr,
9186 			 struct trace_option_dentry *topt,
9187 			 struct tracer_flags *flags,
9188 			 struct tracer_opt *opt)
9189 {
9190 	struct dentry *t_options;
9191 
9192 	t_options = trace_options_init_dentry(tr);
9193 	if (!t_options)
9194 		return;
9195 
9196 	topt->flags = flags;
9197 	topt->opt = opt;
9198 	topt->tr = tr;
9199 
9200 	topt->entry = trace_create_file(opt->name, TRACE_MODE_WRITE,
9201 					t_options, topt, &trace_options_fops);
9202 
9203 }
9204 
9205 static void
9206 create_trace_option_files(struct trace_array *tr, struct tracer *tracer)
9207 {
9208 	struct trace_option_dentry *topts;
9209 	struct trace_options *tr_topts;
9210 	struct tracer_flags *flags;
9211 	struct tracer_opt *opts;
9212 	int cnt;
9213 	int i;
9214 
9215 	if (!tracer)
9216 		return;
9217 
9218 	flags = tracer->flags;
9219 
9220 	if (!flags || !flags->opts)
9221 		return;
9222 
9223 	/*
9224 	 * If this is an instance, only create flags for tracers
9225 	 * the instance may have.
9226 	 */
9227 	if (!trace_ok_for_array(tracer, tr))
9228 		return;
9229 
9230 	for (i = 0; i < tr->nr_topts; i++) {
9231 		/* Make sure there's no duplicate flags. */
9232 		if (WARN_ON_ONCE(tr->topts[i].tracer->flags == tracer->flags))
9233 			return;
9234 	}
9235 
9236 	opts = flags->opts;
9237 
9238 	for (cnt = 0; opts[cnt].name; cnt++)
9239 		;
9240 
9241 	topts = kcalloc(cnt + 1, sizeof(*topts), GFP_KERNEL);
9242 	if (!topts)
9243 		return;
9244 
9245 	tr_topts = krealloc(tr->topts, sizeof(*tr->topts) * (tr->nr_topts + 1),
9246 			    GFP_KERNEL);
9247 	if (!tr_topts) {
9248 		kfree(topts);
9249 		return;
9250 	}
9251 
9252 	tr->topts = tr_topts;
9253 	tr->topts[tr->nr_topts].tracer = tracer;
9254 	tr->topts[tr->nr_topts].topts = topts;
9255 	tr->nr_topts++;
9256 
9257 	for (cnt = 0; opts[cnt].name; cnt++) {
9258 		create_trace_option_file(tr, &topts[cnt], flags,
9259 					 &opts[cnt]);
9260 		MEM_FAIL(topts[cnt].entry == NULL,
9261 			  "Failed to create trace option: %s",
9262 			  opts[cnt].name);
9263 	}
9264 }
9265 
9266 static struct dentry *
9267 create_trace_option_core_file(struct trace_array *tr,
9268 			      const char *option, long index)
9269 {
9270 	struct dentry *t_options;
9271 
9272 	t_options = trace_options_init_dentry(tr);
9273 	if (!t_options)
9274 		return NULL;
9275 
9276 	return trace_create_file(option, TRACE_MODE_WRITE, t_options,
9277 				 (void *)&tr->trace_flags_index[index],
9278 				 &trace_options_core_fops);
9279 }
9280 
9281 static void create_trace_options_dir(struct trace_array *tr)
9282 {
9283 	struct dentry *t_options;
9284 	bool top_level = tr == &global_trace;
9285 	int i;
9286 
9287 	t_options = trace_options_init_dentry(tr);
9288 	if (!t_options)
9289 		return;
9290 
9291 	for (i = 0; trace_options[i]; i++) {
9292 		if (top_level ||
9293 		    !((1 << i) & TOP_LEVEL_TRACE_FLAGS))
9294 			create_trace_option_core_file(tr, trace_options[i], i);
9295 	}
9296 }
9297 
9298 static ssize_t
9299 rb_simple_read(struct file *filp, char __user *ubuf,
9300 	       size_t cnt, loff_t *ppos)
9301 {
9302 	struct trace_array *tr = filp->private_data;
9303 	char buf[64];
9304 	int r;
9305 
9306 	r = tracer_tracing_is_on(tr);
9307 	r = sprintf(buf, "%d\n", r);
9308 
9309 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
9310 }
9311 
9312 static ssize_t
9313 rb_simple_write(struct file *filp, const char __user *ubuf,
9314 		size_t cnt, loff_t *ppos)
9315 {
9316 	struct trace_array *tr = filp->private_data;
9317 	struct trace_buffer *buffer = tr->array_buffer.buffer;
9318 	unsigned long val;
9319 	int ret;
9320 
9321 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
9322 	if (ret)
9323 		return ret;
9324 
9325 	if (buffer) {
9326 		mutex_lock(&trace_types_lock);
9327 		if (!!val == tracer_tracing_is_on(tr)) {
9328 			val = 0; /* do nothing */
9329 		} else if (val) {
9330 			tracer_tracing_on(tr);
9331 			if (tr->current_trace->start)
9332 				tr->current_trace->start(tr);
9333 		} else {
9334 			tracer_tracing_off(tr);
9335 			if (tr->current_trace->stop)
9336 				tr->current_trace->stop(tr);
9337 			/* Wake up any waiters */
9338 			ring_buffer_wake_waiters(buffer, RING_BUFFER_ALL_CPUS);
9339 		}
9340 		mutex_unlock(&trace_types_lock);
9341 	}
9342 
9343 	(*ppos)++;
9344 
9345 	return cnt;
9346 }
9347 
9348 static const struct file_operations rb_simple_fops = {
9349 	.open		= tracing_open_generic_tr,
9350 	.read		= rb_simple_read,
9351 	.write		= rb_simple_write,
9352 	.release	= tracing_release_generic_tr,
9353 	.llseek		= default_llseek,
9354 };
9355 
9356 static ssize_t
9357 buffer_percent_read(struct file *filp, char __user *ubuf,
9358 		    size_t cnt, loff_t *ppos)
9359 {
9360 	struct trace_array *tr = filp->private_data;
9361 	char buf[64];
9362 	int r;
9363 
9364 	r = tr->buffer_percent;
9365 	r = sprintf(buf, "%d\n", r);
9366 
9367 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
9368 }
9369 
9370 static ssize_t
9371 buffer_percent_write(struct file *filp, const char __user *ubuf,
9372 		     size_t cnt, loff_t *ppos)
9373 {
9374 	struct trace_array *tr = filp->private_data;
9375 	unsigned long val;
9376 	int ret;
9377 
9378 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
9379 	if (ret)
9380 		return ret;
9381 
9382 	if (val > 100)
9383 		return -EINVAL;
9384 
9385 	tr->buffer_percent = val;
9386 
9387 	(*ppos)++;
9388 
9389 	return cnt;
9390 }
9391 
9392 static const struct file_operations buffer_percent_fops = {
9393 	.open		= tracing_open_generic_tr,
9394 	.read		= buffer_percent_read,
9395 	.write		= buffer_percent_write,
9396 	.release	= tracing_release_generic_tr,
9397 	.llseek		= default_llseek,
9398 };
9399 
9400 static ssize_t
9401 buffer_subbuf_size_read(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos)
9402 {
9403 	struct trace_array *tr = filp->private_data;
9404 	size_t size;
9405 	char buf[64];
9406 	int order;
9407 	int r;
9408 
9409 	order = ring_buffer_subbuf_order_get(tr->array_buffer.buffer);
9410 	size = (PAGE_SIZE << order) / 1024;
9411 
9412 	r = sprintf(buf, "%zd\n", size);
9413 
9414 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
9415 }
9416 
9417 static ssize_t
9418 buffer_subbuf_size_write(struct file *filp, const char __user *ubuf,
9419 			 size_t cnt, loff_t *ppos)
9420 {
9421 	struct trace_array *tr = filp->private_data;
9422 	unsigned long val;
9423 	int old_order;
9424 	int order;
9425 	int pages;
9426 	int ret;
9427 
9428 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
9429 	if (ret)
9430 		return ret;
9431 
9432 	val *= 1024; /* value passed in is in KB */
9433 
9434 	pages = DIV_ROUND_UP(val, PAGE_SIZE);
9435 	order = fls(pages - 1);
9436 
9437 	/* limit between 1 and 128 system pages */
9438 	if (order < 0 || order > 7)
9439 		return -EINVAL;
9440 
9441 	/* Do not allow tracing while changing the order of the ring buffer */
9442 	tracing_stop_tr(tr);
9443 
9444 	old_order = ring_buffer_subbuf_order_get(tr->array_buffer.buffer);
9445 	if (old_order == order)
9446 		goto out;
9447 
9448 	ret = ring_buffer_subbuf_order_set(tr->array_buffer.buffer, order);
9449 	if (ret)
9450 		goto out;
9451 
9452 #ifdef CONFIG_TRACER_MAX_TRACE
9453 
9454 	if (!tr->allocated_snapshot)
9455 		goto out_max;
9456 
9457 	ret = ring_buffer_subbuf_order_set(tr->max_buffer.buffer, order);
9458 	if (ret) {
9459 		/* Put back the old order */
9460 		cnt = ring_buffer_subbuf_order_set(tr->array_buffer.buffer, old_order);
9461 		if (WARN_ON_ONCE(cnt)) {
9462 			/*
9463 			 * AARGH! We are left with different orders!
9464 			 * The max buffer is our "snapshot" buffer.
9465 			 * When a tracer needs a snapshot (one of the
9466 			 * latency tracers), it swaps the max buffer
9467 			 * with the saved snap shot. We succeeded to
9468 			 * update the order of the main buffer, but failed to
9469 			 * update the order of the max buffer. But when we tried
9470 			 * to reset the main buffer to the original size, we
9471 			 * failed there too. This is very unlikely to
9472 			 * happen, but if it does, warn and kill all
9473 			 * tracing.
9474 			 */
9475 			tracing_disabled = 1;
9476 		}
9477 		goto out;
9478 	}
9479  out_max:
9480 #endif
9481 	(*ppos)++;
9482  out:
9483 	if (ret)
9484 		cnt = ret;
9485 	tracing_start_tr(tr);
9486 	return cnt;
9487 }
9488 
9489 static const struct file_operations buffer_subbuf_size_fops = {
9490 	.open		= tracing_open_generic_tr,
9491 	.read		= buffer_subbuf_size_read,
9492 	.write		= buffer_subbuf_size_write,
9493 	.release	= tracing_release_generic_tr,
9494 	.llseek		= default_llseek,
9495 };
9496 
9497 static struct dentry *trace_instance_dir;
9498 
9499 static void
9500 init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer);
9501 
9502 static int
9503 allocate_trace_buffer(struct trace_array *tr, struct array_buffer *buf, int size)
9504 {
9505 	enum ring_buffer_flags rb_flags;
9506 
9507 	rb_flags = tr->trace_flags & TRACE_ITER_OVERWRITE ? RB_FL_OVERWRITE : 0;
9508 
9509 	buf->tr = tr;
9510 
9511 	buf->buffer = ring_buffer_alloc(size, rb_flags);
9512 	if (!buf->buffer)
9513 		return -ENOMEM;
9514 
9515 	buf->data = alloc_percpu(struct trace_array_cpu);
9516 	if (!buf->data) {
9517 		ring_buffer_free(buf->buffer);
9518 		buf->buffer = NULL;
9519 		return -ENOMEM;
9520 	}
9521 
9522 	/* Allocate the first page for all buffers */
9523 	set_buffer_entries(&tr->array_buffer,
9524 			   ring_buffer_size(tr->array_buffer.buffer, 0));
9525 
9526 	return 0;
9527 }
9528 
9529 static void free_trace_buffer(struct array_buffer *buf)
9530 {
9531 	if (buf->buffer) {
9532 		ring_buffer_free(buf->buffer);
9533 		buf->buffer = NULL;
9534 		free_percpu(buf->data);
9535 		buf->data = NULL;
9536 	}
9537 }
9538 
9539 static int allocate_trace_buffers(struct trace_array *tr, int size)
9540 {
9541 	int ret;
9542 
9543 	ret = allocate_trace_buffer(tr, &tr->array_buffer, size);
9544 	if (ret)
9545 		return ret;
9546 
9547 #ifdef CONFIG_TRACER_MAX_TRACE
9548 	ret = allocate_trace_buffer(tr, &tr->max_buffer,
9549 				    allocate_snapshot ? size : 1);
9550 	if (MEM_FAIL(ret, "Failed to allocate trace buffer\n")) {
9551 		free_trace_buffer(&tr->array_buffer);
9552 		return -ENOMEM;
9553 	}
9554 	tr->allocated_snapshot = allocate_snapshot;
9555 
9556 	allocate_snapshot = false;
9557 #endif
9558 
9559 	return 0;
9560 }
9561 
9562 static void free_trace_buffers(struct trace_array *tr)
9563 {
9564 	if (!tr)
9565 		return;
9566 
9567 	free_trace_buffer(&tr->array_buffer);
9568 
9569 #ifdef CONFIG_TRACER_MAX_TRACE
9570 	free_trace_buffer(&tr->max_buffer);
9571 #endif
9572 }
9573 
9574 static void init_trace_flags_index(struct trace_array *tr)
9575 {
9576 	int i;
9577 
9578 	/* Used by the trace options files */
9579 	for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++)
9580 		tr->trace_flags_index[i] = i;
9581 }
9582 
9583 static void __update_tracer_options(struct trace_array *tr)
9584 {
9585 	struct tracer *t;
9586 
9587 	for (t = trace_types; t; t = t->next)
9588 		add_tracer_options(tr, t);
9589 }
9590 
9591 static void update_tracer_options(struct trace_array *tr)
9592 {
9593 	mutex_lock(&trace_types_lock);
9594 	tracer_options_updated = true;
9595 	__update_tracer_options(tr);
9596 	mutex_unlock(&trace_types_lock);
9597 }
9598 
9599 /* Must have trace_types_lock held */
9600 struct trace_array *trace_array_find(const char *instance)
9601 {
9602 	struct trace_array *tr, *found = NULL;
9603 
9604 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
9605 		if (tr->name && strcmp(tr->name, instance) == 0) {
9606 			found = tr;
9607 			break;
9608 		}
9609 	}
9610 
9611 	return found;
9612 }
9613 
9614 struct trace_array *trace_array_find_get(const char *instance)
9615 {
9616 	struct trace_array *tr;
9617 
9618 	mutex_lock(&trace_types_lock);
9619 	tr = trace_array_find(instance);
9620 	if (tr)
9621 		tr->ref++;
9622 	mutex_unlock(&trace_types_lock);
9623 
9624 	return tr;
9625 }
9626 
9627 static int trace_array_create_dir(struct trace_array *tr)
9628 {
9629 	int ret;
9630 
9631 	tr->dir = tracefs_create_dir(tr->name, trace_instance_dir);
9632 	if (!tr->dir)
9633 		return -EINVAL;
9634 
9635 	ret = event_trace_add_tracer(tr->dir, tr);
9636 	if (ret) {
9637 		tracefs_remove(tr->dir);
9638 		return ret;
9639 	}
9640 
9641 	init_tracer_tracefs(tr, tr->dir);
9642 	__update_tracer_options(tr);
9643 
9644 	return ret;
9645 }
9646 
9647 static struct trace_array *
9648 trace_array_create_systems(const char *name, const char *systems)
9649 {
9650 	struct trace_array *tr;
9651 	int ret;
9652 
9653 	ret = -ENOMEM;
9654 	tr = kzalloc(sizeof(*tr), GFP_KERNEL);
9655 	if (!tr)
9656 		return ERR_PTR(ret);
9657 
9658 	tr->name = kstrdup(name, GFP_KERNEL);
9659 	if (!tr->name)
9660 		goto out_free_tr;
9661 
9662 	if (!alloc_cpumask_var(&tr->tracing_cpumask, GFP_KERNEL))
9663 		goto out_free_tr;
9664 
9665 	if (!zalloc_cpumask_var(&tr->pipe_cpumask, GFP_KERNEL))
9666 		goto out_free_tr;
9667 
9668 	if (systems) {
9669 		tr->system_names = kstrdup_const(systems, GFP_KERNEL);
9670 		if (!tr->system_names)
9671 			goto out_free_tr;
9672 	}
9673 
9674 	tr->trace_flags = global_trace.trace_flags & ~ZEROED_TRACE_FLAGS;
9675 
9676 	cpumask_copy(tr->tracing_cpumask, cpu_all_mask);
9677 
9678 	raw_spin_lock_init(&tr->start_lock);
9679 
9680 	tr->max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
9681 
9682 	tr->current_trace = &nop_trace;
9683 
9684 	INIT_LIST_HEAD(&tr->systems);
9685 	INIT_LIST_HEAD(&tr->events);
9686 	INIT_LIST_HEAD(&tr->hist_vars);
9687 	INIT_LIST_HEAD(&tr->err_log);
9688 
9689 	if (allocate_trace_buffers(tr, trace_buf_size) < 0)
9690 		goto out_free_tr;
9691 
9692 	/* The ring buffer is defaultly expanded */
9693 	trace_set_ring_buffer_expanded(tr);
9694 
9695 	if (ftrace_allocate_ftrace_ops(tr) < 0)
9696 		goto out_free_tr;
9697 
9698 	ftrace_init_trace_array(tr);
9699 
9700 	init_trace_flags_index(tr);
9701 
9702 	if (trace_instance_dir) {
9703 		ret = trace_array_create_dir(tr);
9704 		if (ret)
9705 			goto out_free_tr;
9706 	} else
9707 		__trace_early_add_events(tr);
9708 
9709 	list_add(&tr->list, &ftrace_trace_arrays);
9710 
9711 	tr->ref++;
9712 
9713 	return tr;
9714 
9715  out_free_tr:
9716 	ftrace_free_ftrace_ops(tr);
9717 	free_trace_buffers(tr);
9718 	free_cpumask_var(tr->pipe_cpumask);
9719 	free_cpumask_var(tr->tracing_cpumask);
9720 	kfree_const(tr->system_names);
9721 	kfree(tr->name);
9722 	kfree(tr);
9723 
9724 	return ERR_PTR(ret);
9725 }
9726 
9727 static struct trace_array *trace_array_create(const char *name)
9728 {
9729 	return trace_array_create_systems(name, NULL);
9730 }
9731 
9732 static int instance_mkdir(const char *name)
9733 {
9734 	struct trace_array *tr;
9735 	int ret;
9736 
9737 	mutex_lock(&event_mutex);
9738 	mutex_lock(&trace_types_lock);
9739 
9740 	ret = -EEXIST;
9741 	if (trace_array_find(name))
9742 		goto out_unlock;
9743 
9744 	tr = trace_array_create(name);
9745 
9746 	ret = PTR_ERR_OR_ZERO(tr);
9747 
9748 out_unlock:
9749 	mutex_unlock(&trace_types_lock);
9750 	mutex_unlock(&event_mutex);
9751 	return ret;
9752 }
9753 
9754 /**
9755  * trace_array_get_by_name - Create/Lookup a trace array, given its name.
9756  * @name: The name of the trace array to be looked up/created.
9757  * @systems: A list of systems to create event directories for (NULL for all)
9758  *
9759  * Returns pointer to trace array with given name.
9760  * NULL, if it cannot be created.
9761  *
9762  * NOTE: This function increments the reference counter associated with the
9763  * trace array returned. This makes sure it cannot be freed while in use.
9764  * Use trace_array_put() once the trace array is no longer needed.
9765  * If the trace_array is to be freed, trace_array_destroy() needs to
9766  * be called after the trace_array_put(), or simply let user space delete
9767  * it from the tracefs instances directory. But until the
9768  * trace_array_put() is called, user space can not delete it.
9769  *
9770  */
9771 struct trace_array *trace_array_get_by_name(const char *name, const char *systems)
9772 {
9773 	struct trace_array *tr;
9774 
9775 	mutex_lock(&event_mutex);
9776 	mutex_lock(&trace_types_lock);
9777 
9778 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
9779 		if (tr->name && strcmp(tr->name, name) == 0)
9780 			goto out_unlock;
9781 	}
9782 
9783 	tr = trace_array_create_systems(name, systems);
9784 
9785 	if (IS_ERR(tr))
9786 		tr = NULL;
9787 out_unlock:
9788 	if (tr)
9789 		tr->ref++;
9790 
9791 	mutex_unlock(&trace_types_lock);
9792 	mutex_unlock(&event_mutex);
9793 	return tr;
9794 }
9795 EXPORT_SYMBOL_GPL(trace_array_get_by_name);
9796 
9797 static int __remove_instance(struct trace_array *tr)
9798 {
9799 	int i;
9800 
9801 	/* Reference counter for a newly created trace array = 1. */
9802 	if (tr->ref > 1 || (tr->current_trace && tr->trace_ref))
9803 		return -EBUSY;
9804 
9805 	list_del(&tr->list);
9806 
9807 	/* Disable all the flags that were enabled coming in */
9808 	for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++) {
9809 		if ((1 << i) & ZEROED_TRACE_FLAGS)
9810 			set_tracer_flag(tr, 1 << i, 0);
9811 	}
9812 
9813 	tracing_set_nop(tr);
9814 	clear_ftrace_function_probes(tr);
9815 	event_trace_del_tracer(tr);
9816 	ftrace_clear_pids(tr);
9817 	ftrace_destroy_function_files(tr);
9818 	tracefs_remove(tr->dir);
9819 	free_percpu(tr->last_func_repeats);
9820 	free_trace_buffers(tr);
9821 	clear_tracing_err_log(tr);
9822 
9823 	for (i = 0; i < tr->nr_topts; i++) {
9824 		kfree(tr->topts[i].topts);
9825 	}
9826 	kfree(tr->topts);
9827 
9828 	free_cpumask_var(tr->pipe_cpumask);
9829 	free_cpumask_var(tr->tracing_cpumask);
9830 	kfree_const(tr->system_names);
9831 	kfree(tr->name);
9832 	kfree(tr);
9833 
9834 	return 0;
9835 }
9836 
9837 int trace_array_destroy(struct trace_array *this_tr)
9838 {
9839 	struct trace_array *tr;
9840 	int ret;
9841 
9842 	if (!this_tr)
9843 		return -EINVAL;
9844 
9845 	mutex_lock(&event_mutex);
9846 	mutex_lock(&trace_types_lock);
9847 
9848 	ret = -ENODEV;
9849 
9850 	/* Making sure trace array exists before destroying it. */
9851 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
9852 		if (tr == this_tr) {
9853 			ret = __remove_instance(tr);
9854 			break;
9855 		}
9856 	}
9857 
9858 	mutex_unlock(&trace_types_lock);
9859 	mutex_unlock(&event_mutex);
9860 
9861 	return ret;
9862 }
9863 EXPORT_SYMBOL_GPL(trace_array_destroy);
9864 
9865 static int instance_rmdir(const char *name)
9866 {
9867 	struct trace_array *tr;
9868 	int ret;
9869 
9870 	mutex_lock(&event_mutex);
9871 	mutex_lock(&trace_types_lock);
9872 
9873 	ret = -ENODEV;
9874 	tr = trace_array_find(name);
9875 	if (tr)
9876 		ret = __remove_instance(tr);
9877 
9878 	mutex_unlock(&trace_types_lock);
9879 	mutex_unlock(&event_mutex);
9880 
9881 	return ret;
9882 }
9883 
9884 static __init void create_trace_instances(struct dentry *d_tracer)
9885 {
9886 	struct trace_array *tr;
9887 
9888 	trace_instance_dir = tracefs_create_instance_dir("instances", d_tracer,
9889 							 instance_mkdir,
9890 							 instance_rmdir);
9891 	if (MEM_FAIL(!trace_instance_dir, "Failed to create instances directory\n"))
9892 		return;
9893 
9894 	mutex_lock(&event_mutex);
9895 	mutex_lock(&trace_types_lock);
9896 
9897 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
9898 		if (!tr->name)
9899 			continue;
9900 		if (MEM_FAIL(trace_array_create_dir(tr) < 0,
9901 			     "Failed to create instance directory\n"))
9902 			break;
9903 	}
9904 
9905 	mutex_unlock(&trace_types_lock);
9906 	mutex_unlock(&event_mutex);
9907 }
9908 
9909 static void
9910 init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer)
9911 {
9912 	int cpu;
9913 
9914 	trace_create_file("available_tracers", TRACE_MODE_READ, d_tracer,
9915 			tr, &show_traces_fops);
9916 
9917 	trace_create_file("current_tracer", TRACE_MODE_WRITE, d_tracer,
9918 			tr, &set_tracer_fops);
9919 
9920 	trace_create_file("tracing_cpumask", TRACE_MODE_WRITE, d_tracer,
9921 			  tr, &tracing_cpumask_fops);
9922 
9923 	trace_create_file("trace_options", TRACE_MODE_WRITE, d_tracer,
9924 			  tr, &tracing_iter_fops);
9925 
9926 	trace_create_file("trace", TRACE_MODE_WRITE, d_tracer,
9927 			  tr, &tracing_fops);
9928 
9929 	trace_create_file("trace_pipe", TRACE_MODE_READ, d_tracer,
9930 			  tr, &tracing_pipe_fops);
9931 
9932 	trace_create_file("buffer_size_kb", TRACE_MODE_WRITE, d_tracer,
9933 			  tr, &tracing_entries_fops);
9934 
9935 	trace_create_file("buffer_total_size_kb", TRACE_MODE_READ, d_tracer,
9936 			  tr, &tracing_total_entries_fops);
9937 
9938 	trace_create_file("free_buffer", 0200, d_tracer,
9939 			  tr, &tracing_free_buffer_fops);
9940 
9941 	trace_create_file("trace_marker", 0220, d_tracer,
9942 			  tr, &tracing_mark_fops);
9943 
9944 	tr->trace_marker_file = __find_event_file(tr, "ftrace", "print");
9945 
9946 	trace_create_file("trace_marker_raw", 0220, d_tracer,
9947 			  tr, &tracing_mark_raw_fops);
9948 
9949 	trace_create_file("trace_clock", TRACE_MODE_WRITE, d_tracer, tr,
9950 			  &trace_clock_fops);
9951 
9952 	trace_create_file("tracing_on", TRACE_MODE_WRITE, d_tracer,
9953 			  tr, &rb_simple_fops);
9954 
9955 	trace_create_file("timestamp_mode", TRACE_MODE_READ, d_tracer, tr,
9956 			  &trace_time_stamp_mode_fops);
9957 
9958 	tr->buffer_percent = 50;
9959 
9960 	trace_create_file("buffer_percent", TRACE_MODE_WRITE, d_tracer,
9961 			tr, &buffer_percent_fops);
9962 
9963 	trace_create_file("buffer_subbuf_size_kb", TRACE_MODE_WRITE, d_tracer,
9964 			  tr, &buffer_subbuf_size_fops);
9965 
9966 	create_trace_options_dir(tr);
9967 
9968 #ifdef CONFIG_TRACER_MAX_TRACE
9969 	trace_create_maxlat_file(tr, d_tracer);
9970 #endif
9971 
9972 	if (ftrace_create_function_files(tr, d_tracer))
9973 		MEM_FAIL(1, "Could not allocate function filter files");
9974 
9975 #ifdef CONFIG_TRACER_SNAPSHOT
9976 	trace_create_file("snapshot", TRACE_MODE_WRITE, d_tracer,
9977 			  tr, &snapshot_fops);
9978 #endif
9979 
9980 	trace_create_file("error_log", TRACE_MODE_WRITE, d_tracer,
9981 			  tr, &tracing_err_log_fops);
9982 
9983 	for_each_tracing_cpu(cpu)
9984 		tracing_init_tracefs_percpu(tr, cpu);
9985 
9986 	ftrace_init_tracefs(tr, d_tracer);
9987 }
9988 
9989 static struct vfsmount *trace_automount(struct dentry *mntpt, void *ingore)
9990 {
9991 	struct vfsmount *mnt;
9992 	struct file_system_type *type;
9993 
9994 	/*
9995 	 * To maintain backward compatibility for tools that mount
9996 	 * debugfs to get to the tracing facility, tracefs is automatically
9997 	 * mounted to the debugfs/tracing directory.
9998 	 */
9999 	type = get_fs_type("tracefs");
10000 	if (!type)
10001 		return NULL;
10002 	mnt = vfs_submount(mntpt, type, "tracefs", NULL);
10003 	put_filesystem(type);
10004 	if (IS_ERR(mnt))
10005 		return NULL;
10006 	mntget(mnt);
10007 
10008 	return mnt;
10009 }
10010 
10011 /**
10012  * tracing_init_dentry - initialize top level trace array
10013  *
10014  * This is called when creating files or directories in the tracing
10015  * directory. It is called via fs_initcall() by any of the boot up code
10016  * and expects to return the dentry of the top level tracing directory.
10017  */
10018 int tracing_init_dentry(void)
10019 {
10020 	struct trace_array *tr = &global_trace;
10021 
10022 	if (security_locked_down(LOCKDOWN_TRACEFS)) {
10023 		pr_warn("Tracing disabled due to lockdown\n");
10024 		return -EPERM;
10025 	}
10026 
10027 	/* The top level trace array uses  NULL as parent */
10028 	if (tr->dir)
10029 		return 0;
10030 
10031 	if (WARN_ON(!tracefs_initialized()))
10032 		return -ENODEV;
10033 
10034 	/*
10035 	 * As there may still be users that expect the tracing
10036 	 * files to exist in debugfs/tracing, we must automount
10037 	 * the tracefs file system there, so older tools still
10038 	 * work with the newer kernel.
10039 	 */
10040 	tr->dir = debugfs_create_automount("tracing", NULL,
10041 					   trace_automount, NULL);
10042 
10043 	return 0;
10044 }
10045 
10046 extern struct trace_eval_map *__start_ftrace_eval_maps[];
10047 extern struct trace_eval_map *__stop_ftrace_eval_maps[];
10048 
10049 static struct workqueue_struct *eval_map_wq __initdata;
10050 static struct work_struct eval_map_work __initdata;
10051 static struct work_struct tracerfs_init_work __initdata;
10052 
10053 static void __init eval_map_work_func(struct work_struct *work)
10054 {
10055 	int len;
10056 
10057 	len = __stop_ftrace_eval_maps - __start_ftrace_eval_maps;
10058 	trace_insert_eval_map(NULL, __start_ftrace_eval_maps, len);
10059 }
10060 
10061 static int __init trace_eval_init(void)
10062 {
10063 	INIT_WORK(&eval_map_work, eval_map_work_func);
10064 
10065 	eval_map_wq = alloc_workqueue("eval_map_wq", WQ_UNBOUND, 0);
10066 	if (!eval_map_wq) {
10067 		pr_err("Unable to allocate eval_map_wq\n");
10068 		/* Do work here */
10069 		eval_map_work_func(&eval_map_work);
10070 		return -ENOMEM;
10071 	}
10072 
10073 	queue_work(eval_map_wq, &eval_map_work);
10074 	return 0;
10075 }
10076 
10077 subsys_initcall(trace_eval_init);
10078 
10079 static int __init trace_eval_sync(void)
10080 {
10081 	/* Make sure the eval map updates are finished */
10082 	if (eval_map_wq)
10083 		destroy_workqueue(eval_map_wq);
10084 	return 0;
10085 }
10086 
10087 late_initcall_sync(trace_eval_sync);
10088 
10089 
10090 #ifdef CONFIG_MODULES
10091 static void trace_module_add_evals(struct module *mod)
10092 {
10093 	if (!mod->num_trace_evals)
10094 		return;
10095 
10096 	/*
10097 	 * Modules with bad taint do not have events created, do
10098 	 * not bother with enums either.
10099 	 */
10100 	if (trace_module_has_bad_taint(mod))
10101 		return;
10102 
10103 	trace_insert_eval_map(mod, mod->trace_evals, mod->num_trace_evals);
10104 }
10105 
10106 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
10107 static void trace_module_remove_evals(struct module *mod)
10108 {
10109 	union trace_eval_map_item *map;
10110 	union trace_eval_map_item **last = &trace_eval_maps;
10111 
10112 	if (!mod->num_trace_evals)
10113 		return;
10114 
10115 	mutex_lock(&trace_eval_mutex);
10116 
10117 	map = trace_eval_maps;
10118 
10119 	while (map) {
10120 		if (map->head.mod == mod)
10121 			break;
10122 		map = trace_eval_jmp_to_tail(map);
10123 		last = &map->tail.next;
10124 		map = map->tail.next;
10125 	}
10126 	if (!map)
10127 		goto out;
10128 
10129 	*last = trace_eval_jmp_to_tail(map)->tail.next;
10130 	kfree(map);
10131  out:
10132 	mutex_unlock(&trace_eval_mutex);
10133 }
10134 #else
10135 static inline void trace_module_remove_evals(struct module *mod) { }
10136 #endif /* CONFIG_TRACE_EVAL_MAP_FILE */
10137 
10138 static int trace_module_notify(struct notifier_block *self,
10139 			       unsigned long val, void *data)
10140 {
10141 	struct module *mod = data;
10142 
10143 	switch (val) {
10144 	case MODULE_STATE_COMING:
10145 		trace_module_add_evals(mod);
10146 		break;
10147 	case MODULE_STATE_GOING:
10148 		trace_module_remove_evals(mod);
10149 		break;
10150 	}
10151 
10152 	return NOTIFY_OK;
10153 }
10154 
10155 static struct notifier_block trace_module_nb = {
10156 	.notifier_call = trace_module_notify,
10157 	.priority = 0,
10158 };
10159 #endif /* CONFIG_MODULES */
10160 
10161 static __init void tracer_init_tracefs_work_func(struct work_struct *work)
10162 {
10163 
10164 	event_trace_init();
10165 
10166 	init_tracer_tracefs(&global_trace, NULL);
10167 	ftrace_init_tracefs_toplevel(&global_trace, NULL);
10168 
10169 	trace_create_file("tracing_thresh", TRACE_MODE_WRITE, NULL,
10170 			&global_trace, &tracing_thresh_fops);
10171 
10172 	trace_create_file("README", TRACE_MODE_READ, NULL,
10173 			NULL, &tracing_readme_fops);
10174 
10175 	trace_create_file("saved_cmdlines", TRACE_MODE_READ, NULL,
10176 			NULL, &tracing_saved_cmdlines_fops);
10177 
10178 	trace_create_file("saved_cmdlines_size", TRACE_MODE_WRITE, NULL,
10179 			  NULL, &tracing_saved_cmdlines_size_fops);
10180 
10181 	trace_create_file("saved_tgids", TRACE_MODE_READ, NULL,
10182 			NULL, &tracing_saved_tgids_fops);
10183 
10184 	trace_create_eval_file(NULL);
10185 
10186 #ifdef CONFIG_MODULES
10187 	register_module_notifier(&trace_module_nb);
10188 #endif
10189 
10190 #ifdef CONFIG_DYNAMIC_FTRACE
10191 	trace_create_file("dyn_ftrace_total_info", TRACE_MODE_READ, NULL,
10192 			NULL, &tracing_dyn_info_fops);
10193 #endif
10194 
10195 	create_trace_instances(NULL);
10196 
10197 	update_tracer_options(&global_trace);
10198 }
10199 
10200 static __init int tracer_init_tracefs(void)
10201 {
10202 	int ret;
10203 
10204 	trace_access_lock_init();
10205 
10206 	ret = tracing_init_dentry();
10207 	if (ret)
10208 		return 0;
10209 
10210 	if (eval_map_wq) {
10211 		INIT_WORK(&tracerfs_init_work, tracer_init_tracefs_work_func);
10212 		queue_work(eval_map_wq, &tracerfs_init_work);
10213 	} else {
10214 		tracer_init_tracefs_work_func(NULL);
10215 	}
10216 
10217 	rv_init_interface();
10218 
10219 	return 0;
10220 }
10221 
10222 fs_initcall(tracer_init_tracefs);
10223 
10224 static int trace_die_panic_handler(struct notifier_block *self,
10225 				unsigned long ev, void *unused);
10226 
10227 static struct notifier_block trace_panic_notifier = {
10228 	.notifier_call = trace_die_panic_handler,
10229 	.priority = INT_MAX - 1,
10230 };
10231 
10232 static struct notifier_block trace_die_notifier = {
10233 	.notifier_call = trace_die_panic_handler,
10234 	.priority = INT_MAX - 1,
10235 };
10236 
10237 /*
10238  * The idea is to execute the following die/panic callback early, in order
10239  * to avoid showing irrelevant information in the trace (like other panic
10240  * notifier functions); we are the 2nd to run, after hung_task/rcu_stall
10241  * warnings get disabled (to prevent potential log flooding).
10242  */
10243 static int trace_die_panic_handler(struct notifier_block *self,
10244 				unsigned long ev, void *unused)
10245 {
10246 	if (!ftrace_dump_on_oops)
10247 		return NOTIFY_DONE;
10248 
10249 	/* The die notifier requires DIE_OOPS to trigger */
10250 	if (self == &trace_die_notifier && ev != DIE_OOPS)
10251 		return NOTIFY_DONE;
10252 
10253 	ftrace_dump(ftrace_dump_on_oops);
10254 
10255 	return NOTIFY_DONE;
10256 }
10257 
10258 /*
10259  * printk is set to max of 1024, we really don't need it that big.
10260  * Nothing should be printing 1000 characters anyway.
10261  */
10262 #define TRACE_MAX_PRINT		1000
10263 
10264 /*
10265  * Define here KERN_TRACE so that we have one place to modify
10266  * it if we decide to change what log level the ftrace dump
10267  * should be at.
10268  */
10269 #define KERN_TRACE		KERN_EMERG
10270 
10271 void
10272 trace_printk_seq(struct trace_seq *s)
10273 {
10274 	/* Probably should print a warning here. */
10275 	if (s->seq.len >= TRACE_MAX_PRINT)
10276 		s->seq.len = TRACE_MAX_PRINT;
10277 
10278 	/*
10279 	 * More paranoid code. Although the buffer size is set to
10280 	 * PAGE_SIZE, and TRACE_MAX_PRINT is 1000, this is just
10281 	 * an extra layer of protection.
10282 	 */
10283 	if (WARN_ON_ONCE(s->seq.len >= s->seq.size))
10284 		s->seq.len = s->seq.size - 1;
10285 
10286 	/* should be zero ended, but we are paranoid. */
10287 	s->buffer[s->seq.len] = 0;
10288 
10289 	printk(KERN_TRACE "%s", s->buffer);
10290 
10291 	trace_seq_init(s);
10292 }
10293 
10294 void trace_init_global_iter(struct trace_iterator *iter)
10295 {
10296 	iter->tr = &global_trace;
10297 	iter->trace = iter->tr->current_trace;
10298 	iter->cpu_file = RING_BUFFER_ALL_CPUS;
10299 	iter->array_buffer = &global_trace.array_buffer;
10300 
10301 	if (iter->trace && iter->trace->open)
10302 		iter->trace->open(iter);
10303 
10304 	/* Annotate start of buffers if we had overruns */
10305 	if (ring_buffer_overruns(iter->array_buffer->buffer))
10306 		iter->iter_flags |= TRACE_FILE_ANNOTATE;
10307 
10308 	/* Output in nanoseconds only if we are using a clock in nanoseconds. */
10309 	if (trace_clocks[iter->tr->clock_id].in_ns)
10310 		iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
10311 
10312 	/* Can not use kmalloc for iter.temp and iter.fmt */
10313 	iter->temp = static_temp_buf;
10314 	iter->temp_size = STATIC_TEMP_BUF_SIZE;
10315 	iter->fmt = static_fmt_buf;
10316 	iter->fmt_size = STATIC_FMT_BUF_SIZE;
10317 }
10318 
10319 void ftrace_dump(enum ftrace_dump_mode oops_dump_mode)
10320 {
10321 	/* use static because iter can be a bit big for the stack */
10322 	static struct trace_iterator iter;
10323 	static atomic_t dump_running;
10324 	struct trace_array *tr = &global_trace;
10325 	unsigned int old_userobj;
10326 	unsigned long flags;
10327 	int cnt = 0, cpu;
10328 
10329 	/* Only allow one dump user at a time. */
10330 	if (atomic_inc_return(&dump_running) != 1) {
10331 		atomic_dec(&dump_running);
10332 		return;
10333 	}
10334 
10335 	/*
10336 	 * Always turn off tracing when we dump.
10337 	 * We don't need to show trace output of what happens
10338 	 * between multiple crashes.
10339 	 *
10340 	 * If the user does a sysrq-z, then they can re-enable
10341 	 * tracing with echo 1 > tracing_on.
10342 	 */
10343 	tracing_off();
10344 
10345 	local_irq_save(flags);
10346 
10347 	/* Simulate the iterator */
10348 	trace_init_global_iter(&iter);
10349 
10350 	for_each_tracing_cpu(cpu) {
10351 		atomic_inc(&per_cpu_ptr(iter.array_buffer->data, cpu)->disabled);
10352 	}
10353 
10354 	old_userobj = tr->trace_flags & TRACE_ITER_SYM_USEROBJ;
10355 
10356 	/* don't look at user memory in panic mode */
10357 	tr->trace_flags &= ~TRACE_ITER_SYM_USEROBJ;
10358 
10359 	switch (oops_dump_mode) {
10360 	case DUMP_ALL:
10361 		iter.cpu_file = RING_BUFFER_ALL_CPUS;
10362 		break;
10363 	case DUMP_ORIG:
10364 		iter.cpu_file = raw_smp_processor_id();
10365 		break;
10366 	case DUMP_NONE:
10367 		goto out_enable;
10368 	default:
10369 		printk(KERN_TRACE "Bad dumping mode, switching to all CPUs dump\n");
10370 		iter.cpu_file = RING_BUFFER_ALL_CPUS;
10371 	}
10372 
10373 	printk(KERN_TRACE "Dumping ftrace buffer:\n");
10374 
10375 	/* Did function tracer already get disabled? */
10376 	if (ftrace_is_dead()) {
10377 		printk("# WARNING: FUNCTION TRACING IS CORRUPTED\n");
10378 		printk("#          MAY BE MISSING FUNCTION EVENTS\n");
10379 	}
10380 
10381 	/*
10382 	 * We need to stop all tracing on all CPUS to read
10383 	 * the next buffer. This is a bit expensive, but is
10384 	 * not done often. We fill all what we can read,
10385 	 * and then release the locks again.
10386 	 */
10387 
10388 	while (!trace_empty(&iter)) {
10389 
10390 		if (!cnt)
10391 			printk(KERN_TRACE "---------------------------------\n");
10392 
10393 		cnt++;
10394 
10395 		trace_iterator_reset(&iter);
10396 		iter.iter_flags |= TRACE_FILE_LAT_FMT;
10397 
10398 		if (trace_find_next_entry_inc(&iter) != NULL) {
10399 			int ret;
10400 
10401 			ret = print_trace_line(&iter);
10402 			if (ret != TRACE_TYPE_NO_CONSUME)
10403 				trace_consume(&iter);
10404 		}
10405 		touch_nmi_watchdog();
10406 
10407 		trace_printk_seq(&iter.seq);
10408 	}
10409 
10410 	if (!cnt)
10411 		printk(KERN_TRACE "   (ftrace buffer empty)\n");
10412 	else
10413 		printk(KERN_TRACE "---------------------------------\n");
10414 
10415  out_enable:
10416 	tr->trace_flags |= old_userobj;
10417 
10418 	for_each_tracing_cpu(cpu) {
10419 		atomic_dec(&per_cpu_ptr(iter.array_buffer->data, cpu)->disabled);
10420 	}
10421 	atomic_dec(&dump_running);
10422 	local_irq_restore(flags);
10423 }
10424 EXPORT_SYMBOL_GPL(ftrace_dump);
10425 
10426 #define WRITE_BUFSIZE  4096
10427 
10428 ssize_t trace_parse_run_command(struct file *file, const char __user *buffer,
10429 				size_t count, loff_t *ppos,
10430 				int (*createfn)(const char *))
10431 {
10432 	char *kbuf, *buf, *tmp;
10433 	int ret = 0;
10434 	size_t done = 0;
10435 	size_t size;
10436 
10437 	kbuf = kmalloc(WRITE_BUFSIZE, GFP_KERNEL);
10438 	if (!kbuf)
10439 		return -ENOMEM;
10440 
10441 	while (done < count) {
10442 		size = count - done;
10443 
10444 		if (size >= WRITE_BUFSIZE)
10445 			size = WRITE_BUFSIZE - 1;
10446 
10447 		if (copy_from_user(kbuf, buffer + done, size)) {
10448 			ret = -EFAULT;
10449 			goto out;
10450 		}
10451 		kbuf[size] = '\0';
10452 		buf = kbuf;
10453 		do {
10454 			tmp = strchr(buf, '\n');
10455 			if (tmp) {
10456 				*tmp = '\0';
10457 				size = tmp - buf + 1;
10458 			} else {
10459 				size = strlen(buf);
10460 				if (done + size < count) {
10461 					if (buf != kbuf)
10462 						break;
10463 					/* This can accept WRITE_BUFSIZE - 2 ('\n' + '\0') */
10464 					pr_warn("Line length is too long: Should be less than %d\n",
10465 						WRITE_BUFSIZE - 2);
10466 					ret = -EINVAL;
10467 					goto out;
10468 				}
10469 			}
10470 			done += size;
10471 
10472 			/* Remove comments */
10473 			tmp = strchr(buf, '#');
10474 
10475 			if (tmp)
10476 				*tmp = '\0';
10477 
10478 			ret = createfn(buf);
10479 			if (ret)
10480 				goto out;
10481 			buf += size;
10482 
10483 		} while (done < count);
10484 	}
10485 	ret = done;
10486 
10487 out:
10488 	kfree(kbuf);
10489 
10490 	return ret;
10491 }
10492 
10493 #ifdef CONFIG_TRACER_MAX_TRACE
10494 __init static bool tr_needs_alloc_snapshot(const char *name)
10495 {
10496 	char *test;
10497 	int len = strlen(name);
10498 	bool ret;
10499 
10500 	if (!boot_snapshot_index)
10501 		return false;
10502 
10503 	if (strncmp(name, boot_snapshot_info, len) == 0 &&
10504 	    boot_snapshot_info[len] == '\t')
10505 		return true;
10506 
10507 	test = kmalloc(strlen(name) + 3, GFP_KERNEL);
10508 	if (!test)
10509 		return false;
10510 
10511 	sprintf(test, "\t%s\t", name);
10512 	ret = strstr(boot_snapshot_info, test) == NULL;
10513 	kfree(test);
10514 	return ret;
10515 }
10516 
10517 __init static void do_allocate_snapshot(const char *name)
10518 {
10519 	if (!tr_needs_alloc_snapshot(name))
10520 		return;
10521 
10522 	/*
10523 	 * When allocate_snapshot is set, the next call to
10524 	 * allocate_trace_buffers() (called by trace_array_get_by_name())
10525 	 * will allocate the snapshot buffer. That will alse clear
10526 	 * this flag.
10527 	 */
10528 	allocate_snapshot = true;
10529 }
10530 #else
10531 static inline void do_allocate_snapshot(const char *name) { }
10532 #endif
10533 
10534 __init static void enable_instances(void)
10535 {
10536 	struct trace_array *tr;
10537 	char *curr_str;
10538 	char *str;
10539 	char *tok;
10540 
10541 	/* A tab is always appended */
10542 	boot_instance_info[boot_instance_index - 1] = '\0';
10543 	str = boot_instance_info;
10544 
10545 	while ((curr_str = strsep(&str, "\t"))) {
10546 
10547 		tok = strsep(&curr_str, ",");
10548 
10549 		if (IS_ENABLED(CONFIG_TRACER_MAX_TRACE))
10550 			do_allocate_snapshot(tok);
10551 
10552 		tr = trace_array_get_by_name(tok, NULL);
10553 		if (!tr) {
10554 			pr_warn("Failed to create instance buffer %s\n", curr_str);
10555 			continue;
10556 		}
10557 		/* Allow user space to delete it */
10558 		trace_array_put(tr);
10559 
10560 		while ((tok = strsep(&curr_str, ","))) {
10561 			early_enable_events(tr, tok, true);
10562 		}
10563 	}
10564 }
10565 
10566 __init static int tracer_alloc_buffers(void)
10567 {
10568 	int ring_buf_size;
10569 	int ret = -ENOMEM;
10570 
10571 
10572 	if (security_locked_down(LOCKDOWN_TRACEFS)) {
10573 		pr_warn("Tracing disabled due to lockdown\n");
10574 		return -EPERM;
10575 	}
10576 
10577 	/*
10578 	 * Make sure we don't accidentally add more trace options
10579 	 * than we have bits for.
10580 	 */
10581 	BUILD_BUG_ON(TRACE_ITER_LAST_BIT > TRACE_FLAGS_MAX_SIZE);
10582 
10583 	if (!alloc_cpumask_var(&tracing_buffer_mask, GFP_KERNEL))
10584 		goto out;
10585 
10586 	if (!alloc_cpumask_var(&global_trace.tracing_cpumask, GFP_KERNEL))
10587 		goto out_free_buffer_mask;
10588 
10589 	/* Only allocate trace_printk buffers if a trace_printk exists */
10590 	if (&__stop___trace_bprintk_fmt != &__start___trace_bprintk_fmt)
10591 		/* Must be called before global_trace.buffer is allocated */
10592 		trace_printk_init_buffers();
10593 
10594 	/* To save memory, keep the ring buffer size to its minimum */
10595 	if (global_trace.ring_buffer_expanded)
10596 		ring_buf_size = trace_buf_size;
10597 	else
10598 		ring_buf_size = 1;
10599 
10600 	cpumask_copy(tracing_buffer_mask, cpu_possible_mask);
10601 	cpumask_copy(global_trace.tracing_cpumask, cpu_all_mask);
10602 
10603 	raw_spin_lock_init(&global_trace.start_lock);
10604 
10605 	/*
10606 	 * The prepare callbacks allocates some memory for the ring buffer. We
10607 	 * don't free the buffer if the CPU goes down. If we were to free
10608 	 * the buffer, then the user would lose any trace that was in the
10609 	 * buffer. The memory will be removed once the "instance" is removed.
10610 	 */
10611 	ret = cpuhp_setup_state_multi(CPUHP_TRACE_RB_PREPARE,
10612 				      "trace/RB:prepare", trace_rb_cpu_prepare,
10613 				      NULL);
10614 	if (ret < 0)
10615 		goto out_free_cpumask;
10616 	/* Used for event triggers */
10617 	ret = -ENOMEM;
10618 	temp_buffer = ring_buffer_alloc(PAGE_SIZE, RB_FL_OVERWRITE);
10619 	if (!temp_buffer)
10620 		goto out_rm_hp_state;
10621 
10622 	if (trace_create_savedcmd() < 0)
10623 		goto out_free_temp_buffer;
10624 
10625 	if (!zalloc_cpumask_var(&global_trace.pipe_cpumask, GFP_KERNEL))
10626 		goto out_free_savedcmd;
10627 
10628 	/* TODO: make the number of buffers hot pluggable with CPUS */
10629 	if (allocate_trace_buffers(&global_trace, ring_buf_size) < 0) {
10630 		MEM_FAIL(1, "tracer: failed to allocate ring buffer!\n");
10631 		goto out_free_pipe_cpumask;
10632 	}
10633 	if (global_trace.buffer_disabled)
10634 		tracing_off();
10635 
10636 	if (trace_boot_clock) {
10637 		ret = tracing_set_clock(&global_trace, trace_boot_clock);
10638 		if (ret < 0)
10639 			pr_warn("Trace clock %s not defined, going back to default\n",
10640 				trace_boot_clock);
10641 	}
10642 
10643 	/*
10644 	 * register_tracer() might reference current_trace, so it
10645 	 * needs to be set before we register anything. This is
10646 	 * just a bootstrap of current_trace anyway.
10647 	 */
10648 	global_trace.current_trace = &nop_trace;
10649 
10650 	global_trace.max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
10651 
10652 	ftrace_init_global_array_ops(&global_trace);
10653 
10654 	init_trace_flags_index(&global_trace);
10655 
10656 	register_tracer(&nop_trace);
10657 
10658 	/* Function tracing may start here (via kernel command line) */
10659 	init_function_trace();
10660 
10661 	/* All seems OK, enable tracing */
10662 	tracing_disabled = 0;
10663 
10664 	atomic_notifier_chain_register(&panic_notifier_list,
10665 				       &trace_panic_notifier);
10666 
10667 	register_die_notifier(&trace_die_notifier);
10668 
10669 	global_trace.flags = TRACE_ARRAY_FL_GLOBAL;
10670 
10671 	INIT_LIST_HEAD(&global_trace.systems);
10672 	INIT_LIST_HEAD(&global_trace.events);
10673 	INIT_LIST_HEAD(&global_trace.hist_vars);
10674 	INIT_LIST_HEAD(&global_trace.err_log);
10675 	list_add(&global_trace.list, &ftrace_trace_arrays);
10676 
10677 	apply_trace_boot_options();
10678 
10679 	register_snapshot_cmd();
10680 
10681 	test_can_verify();
10682 
10683 	return 0;
10684 
10685 out_free_pipe_cpumask:
10686 	free_cpumask_var(global_trace.pipe_cpumask);
10687 out_free_savedcmd:
10688 	free_saved_cmdlines_buffer(savedcmd);
10689 out_free_temp_buffer:
10690 	ring_buffer_free(temp_buffer);
10691 out_rm_hp_state:
10692 	cpuhp_remove_multi_state(CPUHP_TRACE_RB_PREPARE);
10693 out_free_cpumask:
10694 	free_cpumask_var(global_trace.tracing_cpumask);
10695 out_free_buffer_mask:
10696 	free_cpumask_var(tracing_buffer_mask);
10697 out:
10698 	return ret;
10699 }
10700 
10701 void __init ftrace_boot_snapshot(void)
10702 {
10703 #ifdef CONFIG_TRACER_MAX_TRACE
10704 	struct trace_array *tr;
10705 
10706 	if (!snapshot_at_boot)
10707 		return;
10708 
10709 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
10710 		if (!tr->allocated_snapshot)
10711 			continue;
10712 
10713 		tracing_snapshot_instance(tr);
10714 		trace_array_puts(tr, "** Boot snapshot taken **\n");
10715 	}
10716 #endif
10717 }
10718 
10719 void __init early_trace_init(void)
10720 {
10721 	if (tracepoint_printk) {
10722 		tracepoint_print_iter =
10723 			kzalloc(sizeof(*tracepoint_print_iter), GFP_KERNEL);
10724 		if (MEM_FAIL(!tracepoint_print_iter,
10725 			     "Failed to allocate trace iterator\n"))
10726 			tracepoint_printk = 0;
10727 		else
10728 			static_key_enable(&tracepoint_printk_key.key);
10729 	}
10730 	tracer_alloc_buffers();
10731 
10732 	init_events();
10733 }
10734 
10735 void __init trace_init(void)
10736 {
10737 	trace_event_init();
10738 
10739 	if (boot_instance_index)
10740 		enable_instances();
10741 }
10742 
10743 __init static void clear_boot_tracer(void)
10744 {
10745 	/*
10746 	 * The default tracer at boot buffer is an init section.
10747 	 * This function is called in lateinit. If we did not
10748 	 * find the boot tracer, then clear it out, to prevent
10749 	 * later registration from accessing the buffer that is
10750 	 * about to be freed.
10751 	 */
10752 	if (!default_bootup_tracer)
10753 		return;
10754 
10755 	printk(KERN_INFO "ftrace bootup tracer '%s' not registered.\n",
10756 	       default_bootup_tracer);
10757 	default_bootup_tracer = NULL;
10758 }
10759 
10760 #ifdef CONFIG_HAVE_UNSTABLE_SCHED_CLOCK
10761 __init static void tracing_set_default_clock(void)
10762 {
10763 	/* sched_clock_stable() is determined in late_initcall */
10764 	if (!trace_boot_clock && !sched_clock_stable()) {
10765 		if (security_locked_down(LOCKDOWN_TRACEFS)) {
10766 			pr_warn("Can not set tracing clock due to lockdown\n");
10767 			return;
10768 		}
10769 
10770 		printk(KERN_WARNING
10771 		       "Unstable clock detected, switching default tracing clock to \"global\"\n"
10772 		       "If you want to keep using the local clock, then add:\n"
10773 		       "  \"trace_clock=local\"\n"
10774 		       "on the kernel command line\n");
10775 		tracing_set_clock(&global_trace, "global");
10776 	}
10777 }
10778 #else
10779 static inline void tracing_set_default_clock(void) { }
10780 #endif
10781 
10782 __init static int late_trace_init(void)
10783 {
10784 	if (tracepoint_printk && tracepoint_printk_stop_on_boot) {
10785 		static_key_disable(&tracepoint_printk_key.key);
10786 		tracepoint_printk = 0;
10787 	}
10788 
10789 	tracing_set_default_clock();
10790 	clear_boot_tracer();
10791 	return 0;
10792 }
10793 
10794 late_initcall_sync(late_trace_init);
10795