xref: /linux/kernel/trace/trace.c (revision c759e609030ca37e59866cbc849fdc611cc56292)
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * ring buffer based function tracer
4  *
5  * Copyright (C) 2007-2012 Steven Rostedt <srostedt@redhat.com>
6  * Copyright (C) 2008 Ingo Molnar <mingo@redhat.com>
7  *
8  * Originally taken from the RT patch by:
9  *    Arnaldo Carvalho de Melo <acme@redhat.com>
10  *
11  * Based on code from the latency_tracer, that is:
12  *  Copyright (C) 2004-2006 Ingo Molnar
13  *  Copyright (C) 2004 Nadia Yvette Chambers
14  */
15 #include <linux/ring_buffer.h>
16 #include <linux/utsname.h>
17 #include <linux/stacktrace.h>
18 #include <linux/writeback.h>
19 #include <linux/kallsyms.h>
20 #include <linux/security.h>
21 #include <linux/seq_file.h>
22 #include <linux/irqflags.h>
23 #include <linux/debugfs.h>
24 #include <linux/tracefs.h>
25 #include <linux/pagemap.h>
26 #include <linux/hardirq.h>
27 #include <linux/linkage.h>
28 #include <linux/uaccess.h>
29 #include <linux/vmalloc.h>
30 #include <linux/ftrace.h>
31 #include <linux/module.h>
32 #include <linux/percpu.h>
33 #include <linux/splice.h>
34 #include <linux/kdebug.h>
35 #include <linux/string.h>
36 #include <linux/mount.h>
37 #include <linux/rwsem.h>
38 #include <linux/slab.h>
39 #include <linux/ctype.h>
40 #include <linux/init.h>
41 #include <linux/panic_notifier.h>
42 #include <linux/poll.h>
43 #include <linux/nmi.h>
44 #include <linux/fs.h>
45 #include <linux/trace.h>
46 #include <linux/sched/clock.h>
47 #include <linux/sched/rt.h>
48 #include <linux/fsnotify.h>
49 #include <linux/irq_work.h>
50 #include <linux/workqueue.h>
51 
52 #include <asm/setup.h> /* COMMAND_LINE_SIZE */
53 
54 #include "trace.h"
55 #include "trace_output.h"
56 
57 #ifdef CONFIG_FTRACE_STARTUP_TEST
58 /*
59  * We need to change this state when a selftest is running.
60  * A selftest will lurk into the ring-buffer to count the
61  * entries inserted during the selftest although some concurrent
62  * insertions into the ring-buffer such as trace_printk could occurred
63  * at the same time, giving false positive or negative results.
64  */
65 static bool __read_mostly tracing_selftest_running;
66 
67 /*
68  * If boot-time tracing including tracers/events via kernel cmdline
69  * is running, we do not want to run SELFTEST.
70  */
71 bool __read_mostly tracing_selftest_disabled;
72 
73 void __init disable_tracing_selftest(const char *reason)
74 {
75 	if (!tracing_selftest_disabled) {
76 		tracing_selftest_disabled = true;
77 		pr_info("Ftrace startup test is disabled due to %s\n", reason);
78 	}
79 }
80 #else
81 #define tracing_selftest_running	0
82 #define tracing_selftest_disabled	0
83 #endif
84 
85 /* Pipe tracepoints to printk */
86 static struct trace_iterator *tracepoint_print_iter;
87 int tracepoint_printk;
88 static bool tracepoint_printk_stop_on_boot __initdata;
89 static DEFINE_STATIC_KEY_FALSE(tracepoint_printk_key);
90 
91 /* For tracers that don't implement custom flags */
92 static struct tracer_opt dummy_tracer_opt[] = {
93 	{ }
94 };
95 
96 static int
97 dummy_set_flag(struct trace_array *tr, u32 old_flags, u32 bit, int set)
98 {
99 	return 0;
100 }
101 
102 /*
103  * To prevent the comm cache from being overwritten when no
104  * tracing is active, only save the comm when a trace event
105  * occurred.
106  */
107 DEFINE_PER_CPU(bool, trace_taskinfo_save);
108 
109 /*
110  * Kill all tracing for good (never come back).
111  * It is initialized to 1 but will turn to zero if the initialization
112  * of the tracer is successful. But that is the only place that sets
113  * this back to zero.
114  */
115 static int tracing_disabled = 1;
116 
117 cpumask_var_t __read_mostly	tracing_buffer_mask;
118 
119 /*
120  * ftrace_dump_on_oops - variable to dump ftrace buffer on oops
121  *
122  * If there is an oops (or kernel panic) and the ftrace_dump_on_oops
123  * is set, then ftrace_dump is called. This will output the contents
124  * of the ftrace buffers to the console.  This is very useful for
125  * capturing traces that lead to crashes and outputing it to a
126  * serial console.
127  *
128  * It is default off, but you can enable it with either specifying
129  * "ftrace_dump_on_oops" in the kernel command line, or setting
130  * /proc/sys/kernel/ftrace_dump_on_oops
131  * Set 1 if you want to dump buffers of all CPUs
132  * Set 2 if you want to dump the buffer of the CPU that triggered oops
133  */
134 
135 enum ftrace_dump_mode ftrace_dump_on_oops;
136 
137 /* When set, tracing will stop when a WARN*() is hit */
138 int __disable_trace_on_warning;
139 
140 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
141 /* Map of enums to their values, for "eval_map" file */
142 struct trace_eval_map_head {
143 	struct module			*mod;
144 	unsigned long			length;
145 };
146 
147 union trace_eval_map_item;
148 
149 struct trace_eval_map_tail {
150 	/*
151 	 * "end" is first and points to NULL as it must be different
152 	 * than "mod" or "eval_string"
153 	 */
154 	union trace_eval_map_item	*next;
155 	const char			*end;	/* points to NULL */
156 };
157 
158 static DEFINE_MUTEX(trace_eval_mutex);
159 
160 /*
161  * The trace_eval_maps are saved in an array with two extra elements,
162  * one at the beginning, and one at the end. The beginning item contains
163  * the count of the saved maps (head.length), and the module they
164  * belong to if not built in (head.mod). The ending item contains a
165  * pointer to the next array of saved eval_map items.
166  */
167 union trace_eval_map_item {
168 	struct trace_eval_map		map;
169 	struct trace_eval_map_head	head;
170 	struct trace_eval_map_tail	tail;
171 };
172 
173 static union trace_eval_map_item *trace_eval_maps;
174 #endif /* CONFIG_TRACE_EVAL_MAP_FILE */
175 
176 int tracing_set_tracer(struct trace_array *tr, const char *buf);
177 static void ftrace_trace_userstack(struct trace_array *tr,
178 				   struct trace_buffer *buffer,
179 				   unsigned int trace_ctx);
180 
181 #define MAX_TRACER_SIZE		100
182 static char bootup_tracer_buf[MAX_TRACER_SIZE] __initdata;
183 static char *default_bootup_tracer;
184 
185 static bool allocate_snapshot;
186 static bool snapshot_at_boot;
187 
188 static char boot_instance_info[COMMAND_LINE_SIZE] __initdata;
189 static int boot_instance_index;
190 
191 static char boot_snapshot_info[COMMAND_LINE_SIZE] __initdata;
192 static int boot_snapshot_index;
193 
194 static int __init set_cmdline_ftrace(char *str)
195 {
196 	strscpy(bootup_tracer_buf, str, MAX_TRACER_SIZE);
197 	default_bootup_tracer = bootup_tracer_buf;
198 	/* We are using ftrace early, expand it */
199 	trace_set_ring_buffer_expanded(NULL);
200 	return 1;
201 }
202 __setup("ftrace=", set_cmdline_ftrace);
203 
204 static int __init set_ftrace_dump_on_oops(char *str)
205 {
206 	if (*str++ != '=' || !*str || !strcmp("1", str)) {
207 		ftrace_dump_on_oops = DUMP_ALL;
208 		return 1;
209 	}
210 
211 	if (!strcmp("orig_cpu", str) || !strcmp("2", str)) {
212 		ftrace_dump_on_oops = DUMP_ORIG;
213                 return 1;
214         }
215 
216         return 0;
217 }
218 __setup("ftrace_dump_on_oops", set_ftrace_dump_on_oops);
219 
220 static int __init stop_trace_on_warning(char *str)
221 {
222 	if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
223 		__disable_trace_on_warning = 1;
224 	return 1;
225 }
226 __setup("traceoff_on_warning", stop_trace_on_warning);
227 
228 static int __init boot_alloc_snapshot(char *str)
229 {
230 	char *slot = boot_snapshot_info + boot_snapshot_index;
231 	int left = sizeof(boot_snapshot_info) - boot_snapshot_index;
232 	int ret;
233 
234 	if (str[0] == '=') {
235 		str++;
236 		if (strlen(str) >= left)
237 			return -1;
238 
239 		ret = snprintf(slot, left, "%s\t", str);
240 		boot_snapshot_index += ret;
241 	} else {
242 		allocate_snapshot = true;
243 		/* We also need the main ring buffer expanded */
244 		trace_set_ring_buffer_expanded(NULL);
245 	}
246 	return 1;
247 }
248 __setup("alloc_snapshot", boot_alloc_snapshot);
249 
250 
251 static int __init boot_snapshot(char *str)
252 {
253 	snapshot_at_boot = true;
254 	boot_alloc_snapshot(str);
255 	return 1;
256 }
257 __setup("ftrace_boot_snapshot", boot_snapshot);
258 
259 
260 static int __init boot_instance(char *str)
261 {
262 	char *slot = boot_instance_info + boot_instance_index;
263 	int left = sizeof(boot_instance_info) - boot_instance_index;
264 	int ret;
265 
266 	if (strlen(str) >= left)
267 		return -1;
268 
269 	ret = snprintf(slot, left, "%s\t", str);
270 	boot_instance_index += ret;
271 
272 	return 1;
273 }
274 __setup("trace_instance=", boot_instance);
275 
276 
277 static char trace_boot_options_buf[MAX_TRACER_SIZE] __initdata;
278 
279 static int __init set_trace_boot_options(char *str)
280 {
281 	strscpy(trace_boot_options_buf, str, MAX_TRACER_SIZE);
282 	return 1;
283 }
284 __setup("trace_options=", set_trace_boot_options);
285 
286 static char trace_boot_clock_buf[MAX_TRACER_SIZE] __initdata;
287 static char *trace_boot_clock __initdata;
288 
289 static int __init set_trace_boot_clock(char *str)
290 {
291 	strscpy(trace_boot_clock_buf, str, MAX_TRACER_SIZE);
292 	trace_boot_clock = trace_boot_clock_buf;
293 	return 1;
294 }
295 __setup("trace_clock=", set_trace_boot_clock);
296 
297 static int __init set_tracepoint_printk(char *str)
298 {
299 	/* Ignore the "tp_printk_stop_on_boot" param */
300 	if (*str == '_')
301 		return 0;
302 
303 	if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
304 		tracepoint_printk = 1;
305 	return 1;
306 }
307 __setup("tp_printk", set_tracepoint_printk);
308 
309 static int __init set_tracepoint_printk_stop(char *str)
310 {
311 	tracepoint_printk_stop_on_boot = true;
312 	return 1;
313 }
314 __setup("tp_printk_stop_on_boot", set_tracepoint_printk_stop);
315 
316 unsigned long long ns2usecs(u64 nsec)
317 {
318 	nsec += 500;
319 	do_div(nsec, 1000);
320 	return nsec;
321 }
322 
323 static void
324 trace_process_export(struct trace_export *export,
325 	       struct ring_buffer_event *event, int flag)
326 {
327 	struct trace_entry *entry;
328 	unsigned int size = 0;
329 
330 	if (export->flags & flag) {
331 		entry = ring_buffer_event_data(event);
332 		size = ring_buffer_event_length(event);
333 		export->write(export, entry, size);
334 	}
335 }
336 
337 static DEFINE_MUTEX(ftrace_export_lock);
338 
339 static struct trace_export __rcu *ftrace_exports_list __read_mostly;
340 
341 static DEFINE_STATIC_KEY_FALSE(trace_function_exports_enabled);
342 static DEFINE_STATIC_KEY_FALSE(trace_event_exports_enabled);
343 static DEFINE_STATIC_KEY_FALSE(trace_marker_exports_enabled);
344 
345 static inline void ftrace_exports_enable(struct trace_export *export)
346 {
347 	if (export->flags & TRACE_EXPORT_FUNCTION)
348 		static_branch_inc(&trace_function_exports_enabled);
349 
350 	if (export->flags & TRACE_EXPORT_EVENT)
351 		static_branch_inc(&trace_event_exports_enabled);
352 
353 	if (export->flags & TRACE_EXPORT_MARKER)
354 		static_branch_inc(&trace_marker_exports_enabled);
355 }
356 
357 static inline void ftrace_exports_disable(struct trace_export *export)
358 {
359 	if (export->flags & TRACE_EXPORT_FUNCTION)
360 		static_branch_dec(&trace_function_exports_enabled);
361 
362 	if (export->flags & TRACE_EXPORT_EVENT)
363 		static_branch_dec(&trace_event_exports_enabled);
364 
365 	if (export->flags & TRACE_EXPORT_MARKER)
366 		static_branch_dec(&trace_marker_exports_enabled);
367 }
368 
369 static void ftrace_exports(struct ring_buffer_event *event, int flag)
370 {
371 	struct trace_export *export;
372 
373 	preempt_disable_notrace();
374 
375 	export = rcu_dereference_raw_check(ftrace_exports_list);
376 	while (export) {
377 		trace_process_export(export, event, flag);
378 		export = rcu_dereference_raw_check(export->next);
379 	}
380 
381 	preempt_enable_notrace();
382 }
383 
384 static inline void
385 add_trace_export(struct trace_export **list, struct trace_export *export)
386 {
387 	rcu_assign_pointer(export->next, *list);
388 	/*
389 	 * We are entering export into the list but another
390 	 * CPU might be walking that list. We need to make sure
391 	 * the export->next pointer is valid before another CPU sees
392 	 * the export pointer included into the list.
393 	 */
394 	rcu_assign_pointer(*list, export);
395 }
396 
397 static inline int
398 rm_trace_export(struct trace_export **list, struct trace_export *export)
399 {
400 	struct trace_export **p;
401 
402 	for (p = list; *p != NULL; p = &(*p)->next)
403 		if (*p == export)
404 			break;
405 
406 	if (*p != export)
407 		return -1;
408 
409 	rcu_assign_pointer(*p, (*p)->next);
410 
411 	return 0;
412 }
413 
414 static inline void
415 add_ftrace_export(struct trace_export **list, struct trace_export *export)
416 {
417 	ftrace_exports_enable(export);
418 
419 	add_trace_export(list, export);
420 }
421 
422 static inline int
423 rm_ftrace_export(struct trace_export **list, struct trace_export *export)
424 {
425 	int ret;
426 
427 	ret = rm_trace_export(list, export);
428 	ftrace_exports_disable(export);
429 
430 	return ret;
431 }
432 
433 int register_ftrace_export(struct trace_export *export)
434 {
435 	if (WARN_ON_ONCE(!export->write))
436 		return -1;
437 
438 	mutex_lock(&ftrace_export_lock);
439 
440 	add_ftrace_export(&ftrace_exports_list, export);
441 
442 	mutex_unlock(&ftrace_export_lock);
443 
444 	return 0;
445 }
446 EXPORT_SYMBOL_GPL(register_ftrace_export);
447 
448 int unregister_ftrace_export(struct trace_export *export)
449 {
450 	int ret;
451 
452 	mutex_lock(&ftrace_export_lock);
453 
454 	ret = rm_ftrace_export(&ftrace_exports_list, export);
455 
456 	mutex_unlock(&ftrace_export_lock);
457 
458 	return ret;
459 }
460 EXPORT_SYMBOL_GPL(unregister_ftrace_export);
461 
462 /* trace_flags holds trace_options default values */
463 #define TRACE_DEFAULT_FLAGS						\
464 	(FUNCTION_DEFAULT_FLAGS |					\
465 	 TRACE_ITER_PRINT_PARENT | TRACE_ITER_PRINTK |			\
466 	 TRACE_ITER_ANNOTATE | TRACE_ITER_CONTEXT_INFO |		\
467 	 TRACE_ITER_RECORD_CMD | TRACE_ITER_OVERWRITE |			\
468 	 TRACE_ITER_IRQ_INFO | TRACE_ITER_MARKERS |			\
469 	 TRACE_ITER_HASH_PTR)
470 
471 /* trace_options that are only supported by global_trace */
472 #define TOP_LEVEL_TRACE_FLAGS (TRACE_ITER_PRINTK |			\
473 	       TRACE_ITER_PRINTK_MSGONLY | TRACE_ITER_RECORD_CMD)
474 
475 /* trace_flags that are default zero for instances */
476 #define ZEROED_TRACE_FLAGS \
477 	(TRACE_ITER_EVENT_FORK | TRACE_ITER_FUNC_FORK)
478 
479 /*
480  * The global_trace is the descriptor that holds the top-level tracing
481  * buffers for the live tracing.
482  */
483 static struct trace_array global_trace = {
484 	.trace_flags = TRACE_DEFAULT_FLAGS,
485 };
486 
487 void trace_set_ring_buffer_expanded(struct trace_array *tr)
488 {
489 	if (!tr)
490 		tr = &global_trace;
491 	tr->ring_buffer_expanded = true;
492 }
493 
494 LIST_HEAD(ftrace_trace_arrays);
495 
496 int trace_array_get(struct trace_array *this_tr)
497 {
498 	struct trace_array *tr;
499 	int ret = -ENODEV;
500 
501 	mutex_lock(&trace_types_lock);
502 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
503 		if (tr == this_tr) {
504 			tr->ref++;
505 			ret = 0;
506 			break;
507 		}
508 	}
509 	mutex_unlock(&trace_types_lock);
510 
511 	return ret;
512 }
513 
514 static void __trace_array_put(struct trace_array *this_tr)
515 {
516 	WARN_ON(!this_tr->ref);
517 	this_tr->ref--;
518 }
519 
520 /**
521  * trace_array_put - Decrement the reference counter for this trace array.
522  * @this_tr : pointer to the trace array
523  *
524  * NOTE: Use this when we no longer need the trace array returned by
525  * trace_array_get_by_name(). This ensures the trace array can be later
526  * destroyed.
527  *
528  */
529 void trace_array_put(struct trace_array *this_tr)
530 {
531 	if (!this_tr)
532 		return;
533 
534 	mutex_lock(&trace_types_lock);
535 	__trace_array_put(this_tr);
536 	mutex_unlock(&trace_types_lock);
537 }
538 EXPORT_SYMBOL_GPL(trace_array_put);
539 
540 int tracing_check_open_get_tr(struct trace_array *tr)
541 {
542 	int ret;
543 
544 	ret = security_locked_down(LOCKDOWN_TRACEFS);
545 	if (ret)
546 		return ret;
547 
548 	if (tracing_disabled)
549 		return -ENODEV;
550 
551 	if (tr && trace_array_get(tr) < 0)
552 		return -ENODEV;
553 
554 	return 0;
555 }
556 
557 int call_filter_check_discard(struct trace_event_call *call, void *rec,
558 			      struct trace_buffer *buffer,
559 			      struct ring_buffer_event *event)
560 {
561 	if (unlikely(call->flags & TRACE_EVENT_FL_FILTERED) &&
562 	    !filter_match_preds(call->filter, rec)) {
563 		__trace_event_discard_commit(buffer, event);
564 		return 1;
565 	}
566 
567 	return 0;
568 }
569 
570 /**
571  * trace_find_filtered_pid - check if a pid exists in a filtered_pid list
572  * @filtered_pids: The list of pids to check
573  * @search_pid: The PID to find in @filtered_pids
574  *
575  * Returns true if @search_pid is found in @filtered_pids, and false otherwise.
576  */
577 bool
578 trace_find_filtered_pid(struct trace_pid_list *filtered_pids, pid_t search_pid)
579 {
580 	return trace_pid_list_is_set(filtered_pids, search_pid);
581 }
582 
583 /**
584  * trace_ignore_this_task - should a task be ignored for tracing
585  * @filtered_pids: The list of pids to check
586  * @filtered_no_pids: The list of pids not to be traced
587  * @task: The task that should be ignored if not filtered
588  *
589  * Checks if @task should be traced or not from @filtered_pids.
590  * Returns true if @task should *NOT* be traced.
591  * Returns false if @task should be traced.
592  */
593 bool
594 trace_ignore_this_task(struct trace_pid_list *filtered_pids,
595 		       struct trace_pid_list *filtered_no_pids,
596 		       struct task_struct *task)
597 {
598 	/*
599 	 * If filtered_no_pids is not empty, and the task's pid is listed
600 	 * in filtered_no_pids, then return true.
601 	 * Otherwise, if filtered_pids is empty, that means we can
602 	 * trace all tasks. If it has content, then only trace pids
603 	 * within filtered_pids.
604 	 */
605 
606 	return (filtered_pids &&
607 		!trace_find_filtered_pid(filtered_pids, task->pid)) ||
608 		(filtered_no_pids &&
609 		 trace_find_filtered_pid(filtered_no_pids, task->pid));
610 }
611 
612 /**
613  * trace_filter_add_remove_task - Add or remove a task from a pid_list
614  * @pid_list: The list to modify
615  * @self: The current task for fork or NULL for exit
616  * @task: The task to add or remove
617  *
618  * If adding a task, if @self is defined, the task is only added if @self
619  * is also included in @pid_list. This happens on fork and tasks should
620  * only be added when the parent is listed. If @self is NULL, then the
621  * @task pid will be removed from the list, which would happen on exit
622  * of a task.
623  */
624 void trace_filter_add_remove_task(struct trace_pid_list *pid_list,
625 				  struct task_struct *self,
626 				  struct task_struct *task)
627 {
628 	if (!pid_list)
629 		return;
630 
631 	/* For forks, we only add if the forking task is listed */
632 	if (self) {
633 		if (!trace_find_filtered_pid(pid_list, self->pid))
634 			return;
635 	}
636 
637 	/* "self" is set for forks, and NULL for exits */
638 	if (self)
639 		trace_pid_list_set(pid_list, task->pid);
640 	else
641 		trace_pid_list_clear(pid_list, task->pid);
642 }
643 
644 /**
645  * trace_pid_next - Used for seq_file to get to the next pid of a pid_list
646  * @pid_list: The pid list to show
647  * @v: The last pid that was shown (+1 the actual pid to let zero be displayed)
648  * @pos: The position of the file
649  *
650  * This is used by the seq_file "next" operation to iterate the pids
651  * listed in a trace_pid_list structure.
652  *
653  * Returns the pid+1 as we want to display pid of zero, but NULL would
654  * stop the iteration.
655  */
656 void *trace_pid_next(struct trace_pid_list *pid_list, void *v, loff_t *pos)
657 {
658 	long pid = (unsigned long)v;
659 	unsigned int next;
660 
661 	(*pos)++;
662 
663 	/* pid already is +1 of the actual previous bit */
664 	if (trace_pid_list_next(pid_list, pid, &next) < 0)
665 		return NULL;
666 
667 	pid = next;
668 
669 	/* Return pid + 1 to allow zero to be represented */
670 	return (void *)(pid + 1);
671 }
672 
673 /**
674  * trace_pid_start - Used for seq_file to start reading pid lists
675  * @pid_list: The pid list to show
676  * @pos: The position of the file
677  *
678  * This is used by seq_file "start" operation to start the iteration
679  * of listing pids.
680  *
681  * Returns the pid+1 as we want to display pid of zero, but NULL would
682  * stop the iteration.
683  */
684 void *trace_pid_start(struct trace_pid_list *pid_list, loff_t *pos)
685 {
686 	unsigned long pid;
687 	unsigned int first;
688 	loff_t l = 0;
689 
690 	if (trace_pid_list_first(pid_list, &first) < 0)
691 		return NULL;
692 
693 	pid = first;
694 
695 	/* Return pid + 1 so that zero can be the exit value */
696 	for (pid++; pid && l < *pos;
697 	     pid = (unsigned long)trace_pid_next(pid_list, (void *)pid, &l))
698 		;
699 	return (void *)pid;
700 }
701 
702 /**
703  * trace_pid_show - show the current pid in seq_file processing
704  * @m: The seq_file structure to write into
705  * @v: A void pointer of the pid (+1) value to display
706  *
707  * Can be directly used by seq_file operations to display the current
708  * pid value.
709  */
710 int trace_pid_show(struct seq_file *m, void *v)
711 {
712 	unsigned long pid = (unsigned long)v - 1;
713 
714 	seq_printf(m, "%lu\n", pid);
715 	return 0;
716 }
717 
718 /* 128 should be much more than enough */
719 #define PID_BUF_SIZE		127
720 
721 int trace_pid_write(struct trace_pid_list *filtered_pids,
722 		    struct trace_pid_list **new_pid_list,
723 		    const char __user *ubuf, size_t cnt)
724 {
725 	struct trace_pid_list *pid_list;
726 	struct trace_parser parser;
727 	unsigned long val;
728 	int nr_pids = 0;
729 	ssize_t read = 0;
730 	ssize_t ret;
731 	loff_t pos;
732 	pid_t pid;
733 
734 	if (trace_parser_get_init(&parser, PID_BUF_SIZE + 1))
735 		return -ENOMEM;
736 
737 	/*
738 	 * Always recreate a new array. The write is an all or nothing
739 	 * operation. Always create a new array when adding new pids by
740 	 * the user. If the operation fails, then the current list is
741 	 * not modified.
742 	 */
743 	pid_list = trace_pid_list_alloc();
744 	if (!pid_list) {
745 		trace_parser_put(&parser);
746 		return -ENOMEM;
747 	}
748 
749 	if (filtered_pids) {
750 		/* copy the current bits to the new max */
751 		ret = trace_pid_list_first(filtered_pids, &pid);
752 		while (!ret) {
753 			trace_pid_list_set(pid_list, pid);
754 			ret = trace_pid_list_next(filtered_pids, pid + 1, &pid);
755 			nr_pids++;
756 		}
757 	}
758 
759 	ret = 0;
760 	while (cnt > 0) {
761 
762 		pos = 0;
763 
764 		ret = trace_get_user(&parser, ubuf, cnt, &pos);
765 		if (ret < 0)
766 			break;
767 
768 		read += ret;
769 		ubuf += ret;
770 		cnt -= ret;
771 
772 		if (!trace_parser_loaded(&parser))
773 			break;
774 
775 		ret = -EINVAL;
776 		if (kstrtoul(parser.buffer, 0, &val))
777 			break;
778 
779 		pid = (pid_t)val;
780 
781 		if (trace_pid_list_set(pid_list, pid) < 0) {
782 			ret = -1;
783 			break;
784 		}
785 		nr_pids++;
786 
787 		trace_parser_clear(&parser);
788 		ret = 0;
789 	}
790 	trace_parser_put(&parser);
791 
792 	if (ret < 0) {
793 		trace_pid_list_free(pid_list);
794 		return ret;
795 	}
796 
797 	if (!nr_pids) {
798 		/* Cleared the list of pids */
799 		trace_pid_list_free(pid_list);
800 		pid_list = NULL;
801 	}
802 
803 	*new_pid_list = pid_list;
804 
805 	return read;
806 }
807 
808 static u64 buffer_ftrace_now(struct array_buffer *buf, int cpu)
809 {
810 	u64 ts;
811 
812 	/* Early boot up does not have a buffer yet */
813 	if (!buf->buffer)
814 		return trace_clock_local();
815 
816 	ts = ring_buffer_time_stamp(buf->buffer);
817 	ring_buffer_normalize_time_stamp(buf->buffer, cpu, &ts);
818 
819 	return ts;
820 }
821 
822 u64 ftrace_now(int cpu)
823 {
824 	return buffer_ftrace_now(&global_trace.array_buffer, cpu);
825 }
826 
827 /**
828  * tracing_is_enabled - Show if global_trace has been enabled
829  *
830  * Shows if the global trace has been enabled or not. It uses the
831  * mirror flag "buffer_disabled" to be used in fast paths such as for
832  * the irqsoff tracer. But it may be inaccurate due to races. If you
833  * need to know the accurate state, use tracing_is_on() which is a little
834  * slower, but accurate.
835  */
836 int tracing_is_enabled(void)
837 {
838 	/*
839 	 * For quick access (irqsoff uses this in fast path), just
840 	 * return the mirror variable of the state of the ring buffer.
841 	 * It's a little racy, but we don't really care.
842 	 */
843 	smp_rmb();
844 	return !global_trace.buffer_disabled;
845 }
846 
847 /*
848  * trace_buf_size is the size in bytes that is allocated
849  * for a buffer. Note, the number of bytes is always rounded
850  * to page size.
851  *
852  * This number is purposely set to a low number of 16384.
853  * If the dump on oops happens, it will be much appreciated
854  * to not have to wait for all that output. Anyway this can be
855  * boot time and run time configurable.
856  */
857 #define TRACE_BUF_SIZE_DEFAULT	1441792UL /* 16384 * 88 (sizeof(entry)) */
858 
859 static unsigned long		trace_buf_size = TRACE_BUF_SIZE_DEFAULT;
860 
861 /* trace_types holds a link list of available tracers. */
862 static struct tracer		*trace_types __read_mostly;
863 
864 /*
865  * trace_types_lock is used to protect the trace_types list.
866  */
867 DEFINE_MUTEX(trace_types_lock);
868 
869 /*
870  * serialize the access of the ring buffer
871  *
872  * ring buffer serializes readers, but it is low level protection.
873  * The validity of the events (which returns by ring_buffer_peek() ..etc)
874  * are not protected by ring buffer.
875  *
876  * The content of events may become garbage if we allow other process consumes
877  * these events concurrently:
878  *   A) the page of the consumed events may become a normal page
879  *      (not reader page) in ring buffer, and this page will be rewritten
880  *      by events producer.
881  *   B) The page of the consumed events may become a page for splice_read,
882  *      and this page will be returned to system.
883  *
884  * These primitives allow multi process access to different cpu ring buffer
885  * concurrently.
886  *
887  * These primitives don't distinguish read-only and read-consume access.
888  * Multi read-only access are also serialized.
889  */
890 
891 #ifdef CONFIG_SMP
892 static DECLARE_RWSEM(all_cpu_access_lock);
893 static DEFINE_PER_CPU(struct mutex, cpu_access_lock);
894 
895 static inline void trace_access_lock(int cpu)
896 {
897 	if (cpu == RING_BUFFER_ALL_CPUS) {
898 		/* gain it for accessing the whole ring buffer. */
899 		down_write(&all_cpu_access_lock);
900 	} else {
901 		/* gain it for accessing a cpu ring buffer. */
902 
903 		/* Firstly block other trace_access_lock(RING_BUFFER_ALL_CPUS). */
904 		down_read(&all_cpu_access_lock);
905 
906 		/* Secondly block other access to this @cpu ring buffer. */
907 		mutex_lock(&per_cpu(cpu_access_lock, cpu));
908 	}
909 }
910 
911 static inline void trace_access_unlock(int cpu)
912 {
913 	if (cpu == RING_BUFFER_ALL_CPUS) {
914 		up_write(&all_cpu_access_lock);
915 	} else {
916 		mutex_unlock(&per_cpu(cpu_access_lock, cpu));
917 		up_read(&all_cpu_access_lock);
918 	}
919 }
920 
921 static inline void trace_access_lock_init(void)
922 {
923 	int cpu;
924 
925 	for_each_possible_cpu(cpu)
926 		mutex_init(&per_cpu(cpu_access_lock, cpu));
927 }
928 
929 #else
930 
931 static DEFINE_MUTEX(access_lock);
932 
933 static inline void trace_access_lock(int cpu)
934 {
935 	(void)cpu;
936 	mutex_lock(&access_lock);
937 }
938 
939 static inline void trace_access_unlock(int cpu)
940 {
941 	(void)cpu;
942 	mutex_unlock(&access_lock);
943 }
944 
945 static inline void trace_access_lock_init(void)
946 {
947 }
948 
949 #endif
950 
951 #ifdef CONFIG_STACKTRACE
952 static void __ftrace_trace_stack(struct trace_buffer *buffer,
953 				 unsigned int trace_ctx,
954 				 int skip, struct pt_regs *regs);
955 static inline void ftrace_trace_stack(struct trace_array *tr,
956 				      struct trace_buffer *buffer,
957 				      unsigned int trace_ctx,
958 				      int skip, struct pt_regs *regs);
959 
960 #else
961 static inline void __ftrace_trace_stack(struct trace_buffer *buffer,
962 					unsigned int trace_ctx,
963 					int skip, struct pt_regs *regs)
964 {
965 }
966 static inline void ftrace_trace_stack(struct trace_array *tr,
967 				      struct trace_buffer *buffer,
968 				      unsigned long trace_ctx,
969 				      int skip, struct pt_regs *regs)
970 {
971 }
972 
973 #endif
974 
975 static __always_inline void
976 trace_event_setup(struct ring_buffer_event *event,
977 		  int type, unsigned int trace_ctx)
978 {
979 	struct trace_entry *ent = ring_buffer_event_data(event);
980 
981 	tracing_generic_entry_update(ent, type, trace_ctx);
982 }
983 
984 static __always_inline struct ring_buffer_event *
985 __trace_buffer_lock_reserve(struct trace_buffer *buffer,
986 			  int type,
987 			  unsigned long len,
988 			  unsigned int trace_ctx)
989 {
990 	struct ring_buffer_event *event;
991 
992 	event = ring_buffer_lock_reserve(buffer, len);
993 	if (event != NULL)
994 		trace_event_setup(event, type, trace_ctx);
995 
996 	return event;
997 }
998 
999 void tracer_tracing_on(struct trace_array *tr)
1000 {
1001 	if (tr->array_buffer.buffer)
1002 		ring_buffer_record_on(tr->array_buffer.buffer);
1003 	/*
1004 	 * This flag is looked at when buffers haven't been allocated
1005 	 * yet, or by some tracers (like irqsoff), that just want to
1006 	 * know if the ring buffer has been disabled, but it can handle
1007 	 * races of where it gets disabled but we still do a record.
1008 	 * As the check is in the fast path of the tracers, it is more
1009 	 * important to be fast than accurate.
1010 	 */
1011 	tr->buffer_disabled = 0;
1012 	/* Make the flag seen by readers */
1013 	smp_wmb();
1014 }
1015 
1016 /**
1017  * tracing_on - enable tracing buffers
1018  *
1019  * This function enables tracing buffers that may have been
1020  * disabled with tracing_off.
1021  */
1022 void tracing_on(void)
1023 {
1024 	tracer_tracing_on(&global_trace);
1025 }
1026 EXPORT_SYMBOL_GPL(tracing_on);
1027 
1028 
1029 static __always_inline void
1030 __buffer_unlock_commit(struct trace_buffer *buffer, struct ring_buffer_event *event)
1031 {
1032 	__this_cpu_write(trace_taskinfo_save, true);
1033 
1034 	/* If this is the temp buffer, we need to commit fully */
1035 	if (this_cpu_read(trace_buffered_event) == event) {
1036 		/* Length is in event->array[0] */
1037 		ring_buffer_write(buffer, event->array[0], &event->array[1]);
1038 		/* Release the temp buffer */
1039 		this_cpu_dec(trace_buffered_event_cnt);
1040 		/* ring_buffer_unlock_commit() enables preemption */
1041 		preempt_enable_notrace();
1042 	} else
1043 		ring_buffer_unlock_commit(buffer);
1044 }
1045 
1046 int __trace_array_puts(struct trace_array *tr, unsigned long ip,
1047 		       const char *str, int size)
1048 {
1049 	struct ring_buffer_event *event;
1050 	struct trace_buffer *buffer;
1051 	struct print_entry *entry;
1052 	unsigned int trace_ctx;
1053 	int alloc;
1054 
1055 	if (!(tr->trace_flags & TRACE_ITER_PRINTK))
1056 		return 0;
1057 
1058 	if (unlikely(tracing_selftest_running && tr == &global_trace))
1059 		return 0;
1060 
1061 	if (unlikely(tracing_disabled))
1062 		return 0;
1063 
1064 	alloc = sizeof(*entry) + size + 2; /* possible \n added */
1065 
1066 	trace_ctx = tracing_gen_ctx();
1067 	buffer = tr->array_buffer.buffer;
1068 	ring_buffer_nest_start(buffer);
1069 	event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, alloc,
1070 					    trace_ctx);
1071 	if (!event) {
1072 		size = 0;
1073 		goto out;
1074 	}
1075 
1076 	entry = ring_buffer_event_data(event);
1077 	entry->ip = ip;
1078 
1079 	memcpy(&entry->buf, str, size);
1080 
1081 	/* Add a newline if necessary */
1082 	if (entry->buf[size - 1] != '\n') {
1083 		entry->buf[size] = '\n';
1084 		entry->buf[size + 1] = '\0';
1085 	} else
1086 		entry->buf[size] = '\0';
1087 
1088 	__buffer_unlock_commit(buffer, event);
1089 	ftrace_trace_stack(tr, buffer, trace_ctx, 4, NULL);
1090  out:
1091 	ring_buffer_nest_end(buffer);
1092 	return size;
1093 }
1094 EXPORT_SYMBOL_GPL(__trace_array_puts);
1095 
1096 /**
1097  * __trace_puts - write a constant string into the trace buffer.
1098  * @ip:	   The address of the caller
1099  * @str:   The constant string to write
1100  * @size:  The size of the string.
1101  */
1102 int __trace_puts(unsigned long ip, const char *str, int size)
1103 {
1104 	return __trace_array_puts(&global_trace, ip, str, size);
1105 }
1106 EXPORT_SYMBOL_GPL(__trace_puts);
1107 
1108 /**
1109  * __trace_bputs - write the pointer to a constant string into trace buffer
1110  * @ip:	   The address of the caller
1111  * @str:   The constant string to write to the buffer to
1112  */
1113 int __trace_bputs(unsigned long ip, const char *str)
1114 {
1115 	struct ring_buffer_event *event;
1116 	struct trace_buffer *buffer;
1117 	struct bputs_entry *entry;
1118 	unsigned int trace_ctx;
1119 	int size = sizeof(struct bputs_entry);
1120 	int ret = 0;
1121 
1122 	if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
1123 		return 0;
1124 
1125 	if (unlikely(tracing_selftest_running || tracing_disabled))
1126 		return 0;
1127 
1128 	trace_ctx = tracing_gen_ctx();
1129 	buffer = global_trace.array_buffer.buffer;
1130 
1131 	ring_buffer_nest_start(buffer);
1132 	event = __trace_buffer_lock_reserve(buffer, TRACE_BPUTS, size,
1133 					    trace_ctx);
1134 	if (!event)
1135 		goto out;
1136 
1137 	entry = ring_buffer_event_data(event);
1138 	entry->ip			= ip;
1139 	entry->str			= str;
1140 
1141 	__buffer_unlock_commit(buffer, event);
1142 	ftrace_trace_stack(&global_trace, buffer, trace_ctx, 4, NULL);
1143 
1144 	ret = 1;
1145  out:
1146 	ring_buffer_nest_end(buffer);
1147 	return ret;
1148 }
1149 EXPORT_SYMBOL_GPL(__trace_bputs);
1150 
1151 #ifdef CONFIG_TRACER_SNAPSHOT
1152 static void tracing_snapshot_instance_cond(struct trace_array *tr,
1153 					   void *cond_data)
1154 {
1155 	struct tracer *tracer = tr->current_trace;
1156 	unsigned long flags;
1157 
1158 	if (in_nmi()) {
1159 		trace_array_puts(tr, "*** SNAPSHOT CALLED FROM NMI CONTEXT ***\n");
1160 		trace_array_puts(tr, "*** snapshot is being ignored        ***\n");
1161 		return;
1162 	}
1163 
1164 	if (!tr->allocated_snapshot) {
1165 		trace_array_puts(tr, "*** SNAPSHOT NOT ALLOCATED ***\n");
1166 		trace_array_puts(tr, "*** stopping trace here!   ***\n");
1167 		tracer_tracing_off(tr);
1168 		return;
1169 	}
1170 
1171 	/* Note, snapshot can not be used when the tracer uses it */
1172 	if (tracer->use_max_tr) {
1173 		trace_array_puts(tr, "*** LATENCY TRACER ACTIVE ***\n");
1174 		trace_array_puts(tr, "*** Can not use snapshot (sorry) ***\n");
1175 		return;
1176 	}
1177 
1178 	local_irq_save(flags);
1179 	update_max_tr(tr, current, smp_processor_id(), cond_data);
1180 	local_irq_restore(flags);
1181 }
1182 
1183 void tracing_snapshot_instance(struct trace_array *tr)
1184 {
1185 	tracing_snapshot_instance_cond(tr, NULL);
1186 }
1187 
1188 /**
1189  * tracing_snapshot - take a snapshot of the current buffer.
1190  *
1191  * This causes a swap between the snapshot buffer and the current live
1192  * tracing buffer. You can use this to take snapshots of the live
1193  * trace when some condition is triggered, but continue to trace.
1194  *
1195  * Note, make sure to allocate the snapshot with either
1196  * a tracing_snapshot_alloc(), or by doing it manually
1197  * with: echo 1 > /sys/kernel/tracing/snapshot
1198  *
1199  * If the snapshot buffer is not allocated, it will stop tracing.
1200  * Basically making a permanent snapshot.
1201  */
1202 void tracing_snapshot(void)
1203 {
1204 	struct trace_array *tr = &global_trace;
1205 
1206 	tracing_snapshot_instance(tr);
1207 }
1208 EXPORT_SYMBOL_GPL(tracing_snapshot);
1209 
1210 /**
1211  * tracing_snapshot_cond - conditionally take a snapshot of the current buffer.
1212  * @tr:		The tracing instance to snapshot
1213  * @cond_data:	The data to be tested conditionally, and possibly saved
1214  *
1215  * This is the same as tracing_snapshot() except that the snapshot is
1216  * conditional - the snapshot will only happen if the
1217  * cond_snapshot.update() implementation receiving the cond_data
1218  * returns true, which means that the trace array's cond_snapshot
1219  * update() operation used the cond_data to determine whether the
1220  * snapshot should be taken, and if it was, presumably saved it along
1221  * with the snapshot.
1222  */
1223 void tracing_snapshot_cond(struct trace_array *tr, void *cond_data)
1224 {
1225 	tracing_snapshot_instance_cond(tr, cond_data);
1226 }
1227 EXPORT_SYMBOL_GPL(tracing_snapshot_cond);
1228 
1229 /**
1230  * tracing_cond_snapshot_data - get the user data associated with a snapshot
1231  * @tr:		The tracing instance
1232  *
1233  * When the user enables a conditional snapshot using
1234  * tracing_snapshot_cond_enable(), the user-defined cond_data is saved
1235  * with the snapshot.  This accessor is used to retrieve it.
1236  *
1237  * Should not be called from cond_snapshot.update(), since it takes
1238  * the tr->max_lock lock, which the code calling
1239  * cond_snapshot.update() has already done.
1240  *
1241  * Returns the cond_data associated with the trace array's snapshot.
1242  */
1243 void *tracing_cond_snapshot_data(struct trace_array *tr)
1244 {
1245 	void *cond_data = NULL;
1246 
1247 	local_irq_disable();
1248 	arch_spin_lock(&tr->max_lock);
1249 
1250 	if (tr->cond_snapshot)
1251 		cond_data = tr->cond_snapshot->cond_data;
1252 
1253 	arch_spin_unlock(&tr->max_lock);
1254 	local_irq_enable();
1255 
1256 	return cond_data;
1257 }
1258 EXPORT_SYMBOL_GPL(tracing_cond_snapshot_data);
1259 
1260 static int resize_buffer_duplicate_size(struct array_buffer *trace_buf,
1261 					struct array_buffer *size_buf, int cpu_id);
1262 static void set_buffer_entries(struct array_buffer *buf, unsigned long val);
1263 
1264 int tracing_alloc_snapshot_instance(struct trace_array *tr)
1265 {
1266 	int order;
1267 	int ret;
1268 
1269 	if (!tr->allocated_snapshot) {
1270 
1271 		/* Make the snapshot buffer have the same order as main buffer */
1272 		order = ring_buffer_subbuf_order_get(tr->array_buffer.buffer);
1273 		ret = ring_buffer_subbuf_order_set(tr->max_buffer.buffer, order);
1274 		if (ret < 0)
1275 			return ret;
1276 
1277 		/* allocate spare buffer */
1278 		ret = resize_buffer_duplicate_size(&tr->max_buffer,
1279 				   &tr->array_buffer, RING_BUFFER_ALL_CPUS);
1280 		if (ret < 0)
1281 			return ret;
1282 
1283 		tr->allocated_snapshot = true;
1284 	}
1285 
1286 	return 0;
1287 }
1288 
1289 static void free_snapshot(struct trace_array *tr)
1290 {
1291 	/*
1292 	 * We don't free the ring buffer. instead, resize it because
1293 	 * The max_tr ring buffer has some state (e.g. ring->clock) and
1294 	 * we want preserve it.
1295 	 */
1296 	ring_buffer_subbuf_order_set(tr->max_buffer.buffer, 0);
1297 	ring_buffer_resize(tr->max_buffer.buffer, 1, RING_BUFFER_ALL_CPUS);
1298 	set_buffer_entries(&tr->max_buffer, 1);
1299 	tracing_reset_online_cpus(&tr->max_buffer);
1300 	tr->allocated_snapshot = false;
1301 }
1302 
1303 static int tracing_arm_snapshot_locked(struct trace_array *tr)
1304 {
1305 	int ret;
1306 
1307 	lockdep_assert_held(&trace_types_lock);
1308 
1309 	spin_lock(&tr->snapshot_trigger_lock);
1310 	if (tr->snapshot == UINT_MAX) {
1311 		spin_unlock(&tr->snapshot_trigger_lock);
1312 		return -EBUSY;
1313 	}
1314 
1315 	tr->snapshot++;
1316 	spin_unlock(&tr->snapshot_trigger_lock);
1317 
1318 	ret = tracing_alloc_snapshot_instance(tr);
1319 	if (ret) {
1320 		spin_lock(&tr->snapshot_trigger_lock);
1321 		tr->snapshot--;
1322 		spin_unlock(&tr->snapshot_trigger_lock);
1323 	}
1324 
1325 	return ret;
1326 }
1327 
1328 int tracing_arm_snapshot(struct trace_array *tr)
1329 {
1330 	int ret;
1331 
1332 	mutex_lock(&trace_types_lock);
1333 	ret = tracing_arm_snapshot_locked(tr);
1334 	mutex_unlock(&trace_types_lock);
1335 
1336 	return ret;
1337 }
1338 
1339 void tracing_disarm_snapshot(struct trace_array *tr)
1340 {
1341 	spin_lock(&tr->snapshot_trigger_lock);
1342 	if (!WARN_ON(!tr->snapshot))
1343 		tr->snapshot--;
1344 	spin_unlock(&tr->snapshot_trigger_lock);
1345 }
1346 
1347 /**
1348  * tracing_alloc_snapshot - allocate snapshot buffer.
1349  *
1350  * This only allocates the snapshot buffer if it isn't already
1351  * allocated - it doesn't also take a snapshot.
1352  *
1353  * This is meant to be used in cases where the snapshot buffer needs
1354  * to be set up for events that can't sleep but need to be able to
1355  * trigger a snapshot.
1356  */
1357 int tracing_alloc_snapshot(void)
1358 {
1359 	struct trace_array *tr = &global_trace;
1360 	int ret;
1361 
1362 	ret = tracing_alloc_snapshot_instance(tr);
1363 	WARN_ON(ret < 0);
1364 
1365 	return ret;
1366 }
1367 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
1368 
1369 /**
1370  * tracing_snapshot_alloc - allocate and take a snapshot of the current buffer.
1371  *
1372  * This is similar to tracing_snapshot(), but it will allocate the
1373  * snapshot buffer if it isn't already allocated. Use this only
1374  * where it is safe to sleep, as the allocation may sleep.
1375  *
1376  * This causes a swap between the snapshot buffer and the current live
1377  * tracing buffer. You can use this to take snapshots of the live
1378  * trace when some condition is triggered, but continue to trace.
1379  */
1380 void tracing_snapshot_alloc(void)
1381 {
1382 	int ret;
1383 
1384 	ret = tracing_alloc_snapshot();
1385 	if (ret < 0)
1386 		return;
1387 
1388 	tracing_snapshot();
1389 }
1390 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
1391 
1392 /**
1393  * tracing_snapshot_cond_enable - enable conditional snapshot for an instance
1394  * @tr:		The tracing instance
1395  * @cond_data:	User data to associate with the snapshot
1396  * @update:	Implementation of the cond_snapshot update function
1397  *
1398  * Check whether the conditional snapshot for the given instance has
1399  * already been enabled, or if the current tracer is already using a
1400  * snapshot; if so, return -EBUSY, else create a cond_snapshot and
1401  * save the cond_data and update function inside.
1402  *
1403  * Returns 0 if successful, error otherwise.
1404  */
1405 int tracing_snapshot_cond_enable(struct trace_array *tr, void *cond_data,
1406 				 cond_update_fn_t update)
1407 {
1408 	struct cond_snapshot *cond_snapshot;
1409 	int ret = 0;
1410 
1411 	cond_snapshot = kzalloc(sizeof(*cond_snapshot), GFP_KERNEL);
1412 	if (!cond_snapshot)
1413 		return -ENOMEM;
1414 
1415 	cond_snapshot->cond_data = cond_data;
1416 	cond_snapshot->update = update;
1417 
1418 	mutex_lock(&trace_types_lock);
1419 
1420 	if (tr->current_trace->use_max_tr) {
1421 		ret = -EBUSY;
1422 		goto fail_unlock;
1423 	}
1424 
1425 	/*
1426 	 * The cond_snapshot can only change to NULL without the
1427 	 * trace_types_lock. We don't care if we race with it going
1428 	 * to NULL, but we want to make sure that it's not set to
1429 	 * something other than NULL when we get here, which we can
1430 	 * do safely with only holding the trace_types_lock and not
1431 	 * having to take the max_lock.
1432 	 */
1433 	if (tr->cond_snapshot) {
1434 		ret = -EBUSY;
1435 		goto fail_unlock;
1436 	}
1437 
1438 	ret = tracing_arm_snapshot_locked(tr);
1439 	if (ret)
1440 		goto fail_unlock;
1441 
1442 	local_irq_disable();
1443 	arch_spin_lock(&tr->max_lock);
1444 	tr->cond_snapshot = cond_snapshot;
1445 	arch_spin_unlock(&tr->max_lock);
1446 	local_irq_enable();
1447 
1448 	mutex_unlock(&trace_types_lock);
1449 
1450 	return ret;
1451 
1452  fail_unlock:
1453 	mutex_unlock(&trace_types_lock);
1454 	kfree(cond_snapshot);
1455 	return ret;
1456 }
1457 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_enable);
1458 
1459 /**
1460  * tracing_snapshot_cond_disable - disable conditional snapshot for an instance
1461  * @tr:		The tracing instance
1462  *
1463  * Check whether the conditional snapshot for the given instance is
1464  * enabled; if so, free the cond_snapshot associated with it,
1465  * otherwise return -EINVAL.
1466  *
1467  * Returns 0 if successful, error otherwise.
1468  */
1469 int tracing_snapshot_cond_disable(struct trace_array *tr)
1470 {
1471 	int ret = 0;
1472 
1473 	local_irq_disable();
1474 	arch_spin_lock(&tr->max_lock);
1475 
1476 	if (!tr->cond_snapshot)
1477 		ret = -EINVAL;
1478 	else {
1479 		kfree(tr->cond_snapshot);
1480 		tr->cond_snapshot = NULL;
1481 	}
1482 
1483 	arch_spin_unlock(&tr->max_lock);
1484 	local_irq_enable();
1485 
1486 	tracing_disarm_snapshot(tr);
1487 
1488 	return ret;
1489 }
1490 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_disable);
1491 #else
1492 void tracing_snapshot(void)
1493 {
1494 	WARN_ONCE(1, "Snapshot feature not enabled, but internal snapshot used");
1495 }
1496 EXPORT_SYMBOL_GPL(tracing_snapshot);
1497 void tracing_snapshot_cond(struct trace_array *tr, void *cond_data)
1498 {
1499 	WARN_ONCE(1, "Snapshot feature not enabled, but internal conditional snapshot used");
1500 }
1501 EXPORT_SYMBOL_GPL(tracing_snapshot_cond);
1502 int tracing_alloc_snapshot(void)
1503 {
1504 	WARN_ONCE(1, "Snapshot feature not enabled, but snapshot allocation used");
1505 	return -ENODEV;
1506 }
1507 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
1508 void tracing_snapshot_alloc(void)
1509 {
1510 	/* Give warning */
1511 	tracing_snapshot();
1512 }
1513 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
1514 void *tracing_cond_snapshot_data(struct trace_array *tr)
1515 {
1516 	return NULL;
1517 }
1518 EXPORT_SYMBOL_GPL(tracing_cond_snapshot_data);
1519 int tracing_snapshot_cond_enable(struct trace_array *tr, void *cond_data, cond_update_fn_t update)
1520 {
1521 	return -ENODEV;
1522 }
1523 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_enable);
1524 int tracing_snapshot_cond_disable(struct trace_array *tr)
1525 {
1526 	return false;
1527 }
1528 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_disable);
1529 #define free_snapshot(tr)	do { } while (0)
1530 #define tracing_arm_snapshot_locked(tr) ({ -EBUSY; })
1531 #endif /* CONFIG_TRACER_SNAPSHOT */
1532 
1533 void tracer_tracing_off(struct trace_array *tr)
1534 {
1535 	if (tr->array_buffer.buffer)
1536 		ring_buffer_record_off(tr->array_buffer.buffer);
1537 	/*
1538 	 * This flag is looked at when buffers haven't been allocated
1539 	 * yet, or by some tracers (like irqsoff), that just want to
1540 	 * know if the ring buffer has been disabled, but it can handle
1541 	 * races of where it gets disabled but we still do a record.
1542 	 * As the check is in the fast path of the tracers, it is more
1543 	 * important to be fast than accurate.
1544 	 */
1545 	tr->buffer_disabled = 1;
1546 	/* Make the flag seen by readers */
1547 	smp_wmb();
1548 }
1549 
1550 /**
1551  * tracing_off - turn off tracing buffers
1552  *
1553  * This function stops the tracing buffers from recording data.
1554  * It does not disable any overhead the tracers themselves may
1555  * be causing. This function simply causes all recording to
1556  * the ring buffers to fail.
1557  */
1558 void tracing_off(void)
1559 {
1560 	tracer_tracing_off(&global_trace);
1561 }
1562 EXPORT_SYMBOL_GPL(tracing_off);
1563 
1564 void disable_trace_on_warning(void)
1565 {
1566 	if (__disable_trace_on_warning) {
1567 		trace_array_printk_buf(global_trace.array_buffer.buffer, _THIS_IP_,
1568 			"Disabling tracing due to warning\n");
1569 		tracing_off();
1570 	}
1571 }
1572 
1573 /**
1574  * tracer_tracing_is_on - show real state of ring buffer enabled
1575  * @tr : the trace array to know if ring buffer is enabled
1576  *
1577  * Shows real state of the ring buffer if it is enabled or not.
1578  */
1579 bool tracer_tracing_is_on(struct trace_array *tr)
1580 {
1581 	if (tr->array_buffer.buffer)
1582 		return ring_buffer_record_is_set_on(tr->array_buffer.buffer);
1583 	return !tr->buffer_disabled;
1584 }
1585 
1586 /**
1587  * tracing_is_on - show state of ring buffers enabled
1588  */
1589 int tracing_is_on(void)
1590 {
1591 	return tracer_tracing_is_on(&global_trace);
1592 }
1593 EXPORT_SYMBOL_GPL(tracing_is_on);
1594 
1595 static int __init set_buf_size(char *str)
1596 {
1597 	unsigned long buf_size;
1598 
1599 	if (!str)
1600 		return 0;
1601 	buf_size = memparse(str, &str);
1602 	/*
1603 	 * nr_entries can not be zero and the startup
1604 	 * tests require some buffer space. Therefore
1605 	 * ensure we have at least 4096 bytes of buffer.
1606 	 */
1607 	trace_buf_size = max(4096UL, buf_size);
1608 	return 1;
1609 }
1610 __setup("trace_buf_size=", set_buf_size);
1611 
1612 static int __init set_tracing_thresh(char *str)
1613 {
1614 	unsigned long threshold;
1615 	int ret;
1616 
1617 	if (!str)
1618 		return 0;
1619 	ret = kstrtoul(str, 0, &threshold);
1620 	if (ret < 0)
1621 		return 0;
1622 	tracing_thresh = threshold * 1000;
1623 	return 1;
1624 }
1625 __setup("tracing_thresh=", set_tracing_thresh);
1626 
1627 unsigned long nsecs_to_usecs(unsigned long nsecs)
1628 {
1629 	return nsecs / 1000;
1630 }
1631 
1632 /*
1633  * TRACE_FLAGS is defined as a tuple matching bit masks with strings.
1634  * It uses C(a, b) where 'a' is the eval (enum) name and 'b' is the string that
1635  * matches it. By defining "C(a, b) b", TRACE_FLAGS becomes a list
1636  * of strings in the order that the evals (enum) were defined.
1637  */
1638 #undef C
1639 #define C(a, b) b
1640 
1641 /* These must match the bit positions in trace_iterator_flags */
1642 static const char *trace_options[] = {
1643 	TRACE_FLAGS
1644 	NULL
1645 };
1646 
1647 static struct {
1648 	u64 (*func)(void);
1649 	const char *name;
1650 	int in_ns;		/* is this clock in nanoseconds? */
1651 } trace_clocks[] = {
1652 	{ trace_clock_local,		"local",	1 },
1653 	{ trace_clock_global,		"global",	1 },
1654 	{ trace_clock_counter,		"counter",	0 },
1655 	{ trace_clock_jiffies,		"uptime",	0 },
1656 	{ trace_clock,			"perf",		1 },
1657 	{ ktime_get_mono_fast_ns,	"mono",		1 },
1658 	{ ktime_get_raw_fast_ns,	"mono_raw",	1 },
1659 	{ ktime_get_boot_fast_ns,	"boot",		1 },
1660 	{ ktime_get_tai_fast_ns,	"tai",		1 },
1661 	ARCH_TRACE_CLOCKS
1662 };
1663 
1664 bool trace_clock_in_ns(struct trace_array *tr)
1665 {
1666 	if (trace_clocks[tr->clock_id].in_ns)
1667 		return true;
1668 
1669 	return false;
1670 }
1671 
1672 /*
1673  * trace_parser_get_init - gets the buffer for trace parser
1674  */
1675 int trace_parser_get_init(struct trace_parser *parser, int size)
1676 {
1677 	memset(parser, 0, sizeof(*parser));
1678 
1679 	parser->buffer = kmalloc(size, GFP_KERNEL);
1680 	if (!parser->buffer)
1681 		return 1;
1682 
1683 	parser->size = size;
1684 	return 0;
1685 }
1686 
1687 /*
1688  * trace_parser_put - frees the buffer for trace parser
1689  */
1690 void trace_parser_put(struct trace_parser *parser)
1691 {
1692 	kfree(parser->buffer);
1693 	parser->buffer = NULL;
1694 }
1695 
1696 /*
1697  * trace_get_user - reads the user input string separated by  space
1698  * (matched by isspace(ch))
1699  *
1700  * For each string found the 'struct trace_parser' is updated,
1701  * and the function returns.
1702  *
1703  * Returns number of bytes read.
1704  *
1705  * See kernel/trace/trace.h for 'struct trace_parser' details.
1706  */
1707 int trace_get_user(struct trace_parser *parser, const char __user *ubuf,
1708 	size_t cnt, loff_t *ppos)
1709 {
1710 	char ch;
1711 	size_t read = 0;
1712 	ssize_t ret;
1713 
1714 	if (!*ppos)
1715 		trace_parser_clear(parser);
1716 
1717 	ret = get_user(ch, ubuf++);
1718 	if (ret)
1719 		goto out;
1720 
1721 	read++;
1722 	cnt--;
1723 
1724 	/*
1725 	 * The parser is not finished with the last write,
1726 	 * continue reading the user input without skipping spaces.
1727 	 */
1728 	if (!parser->cont) {
1729 		/* skip white space */
1730 		while (cnt && isspace(ch)) {
1731 			ret = get_user(ch, ubuf++);
1732 			if (ret)
1733 				goto out;
1734 			read++;
1735 			cnt--;
1736 		}
1737 
1738 		parser->idx = 0;
1739 
1740 		/* only spaces were written */
1741 		if (isspace(ch) || !ch) {
1742 			*ppos += read;
1743 			ret = read;
1744 			goto out;
1745 		}
1746 	}
1747 
1748 	/* read the non-space input */
1749 	while (cnt && !isspace(ch) && ch) {
1750 		if (parser->idx < parser->size - 1)
1751 			parser->buffer[parser->idx++] = ch;
1752 		else {
1753 			ret = -EINVAL;
1754 			goto out;
1755 		}
1756 		ret = get_user(ch, ubuf++);
1757 		if (ret)
1758 			goto out;
1759 		read++;
1760 		cnt--;
1761 	}
1762 
1763 	/* We either got finished input or we have to wait for another call. */
1764 	if (isspace(ch) || !ch) {
1765 		parser->buffer[parser->idx] = 0;
1766 		parser->cont = false;
1767 	} else if (parser->idx < parser->size - 1) {
1768 		parser->cont = true;
1769 		parser->buffer[parser->idx++] = ch;
1770 		/* Make sure the parsed string always terminates with '\0'. */
1771 		parser->buffer[parser->idx] = 0;
1772 	} else {
1773 		ret = -EINVAL;
1774 		goto out;
1775 	}
1776 
1777 	*ppos += read;
1778 	ret = read;
1779 
1780 out:
1781 	return ret;
1782 }
1783 
1784 /* TODO add a seq_buf_to_buffer() */
1785 static ssize_t trace_seq_to_buffer(struct trace_seq *s, void *buf, size_t cnt)
1786 {
1787 	int len;
1788 
1789 	if (trace_seq_used(s) <= s->readpos)
1790 		return -EBUSY;
1791 
1792 	len = trace_seq_used(s) - s->readpos;
1793 	if (cnt > len)
1794 		cnt = len;
1795 	memcpy(buf, s->buffer + s->readpos, cnt);
1796 
1797 	s->readpos += cnt;
1798 	return cnt;
1799 }
1800 
1801 unsigned long __read_mostly	tracing_thresh;
1802 
1803 #ifdef CONFIG_TRACER_MAX_TRACE
1804 static const struct file_operations tracing_max_lat_fops;
1805 
1806 #ifdef LATENCY_FS_NOTIFY
1807 
1808 static struct workqueue_struct *fsnotify_wq;
1809 
1810 static void latency_fsnotify_workfn(struct work_struct *work)
1811 {
1812 	struct trace_array *tr = container_of(work, struct trace_array,
1813 					      fsnotify_work);
1814 	fsnotify_inode(tr->d_max_latency->d_inode, FS_MODIFY);
1815 }
1816 
1817 static void latency_fsnotify_workfn_irq(struct irq_work *iwork)
1818 {
1819 	struct trace_array *tr = container_of(iwork, struct trace_array,
1820 					      fsnotify_irqwork);
1821 	queue_work(fsnotify_wq, &tr->fsnotify_work);
1822 }
1823 
1824 static void trace_create_maxlat_file(struct trace_array *tr,
1825 				     struct dentry *d_tracer)
1826 {
1827 	INIT_WORK(&tr->fsnotify_work, latency_fsnotify_workfn);
1828 	init_irq_work(&tr->fsnotify_irqwork, latency_fsnotify_workfn_irq);
1829 	tr->d_max_latency = trace_create_file("tracing_max_latency",
1830 					      TRACE_MODE_WRITE,
1831 					      d_tracer, tr,
1832 					      &tracing_max_lat_fops);
1833 }
1834 
1835 __init static int latency_fsnotify_init(void)
1836 {
1837 	fsnotify_wq = alloc_workqueue("tr_max_lat_wq",
1838 				      WQ_UNBOUND | WQ_HIGHPRI, 0);
1839 	if (!fsnotify_wq) {
1840 		pr_err("Unable to allocate tr_max_lat_wq\n");
1841 		return -ENOMEM;
1842 	}
1843 	return 0;
1844 }
1845 
1846 late_initcall_sync(latency_fsnotify_init);
1847 
1848 void latency_fsnotify(struct trace_array *tr)
1849 {
1850 	if (!fsnotify_wq)
1851 		return;
1852 	/*
1853 	 * We cannot call queue_work(&tr->fsnotify_work) from here because it's
1854 	 * possible that we are called from __schedule() or do_idle(), which
1855 	 * could cause a deadlock.
1856 	 */
1857 	irq_work_queue(&tr->fsnotify_irqwork);
1858 }
1859 
1860 #else /* !LATENCY_FS_NOTIFY */
1861 
1862 #define trace_create_maxlat_file(tr, d_tracer)				\
1863 	trace_create_file("tracing_max_latency", TRACE_MODE_WRITE,	\
1864 			  d_tracer, tr, &tracing_max_lat_fops)
1865 
1866 #endif
1867 
1868 /*
1869  * Copy the new maximum trace into the separate maximum-trace
1870  * structure. (this way the maximum trace is permanently saved,
1871  * for later retrieval via /sys/kernel/tracing/tracing_max_latency)
1872  */
1873 static void
1874 __update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
1875 {
1876 	struct array_buffer *trace_buf = &tr->array_buffer;
1877 	struct array_buffer *max_buf = &tr->max_buffer;
1878 	struct trace_array_cpu *data = per_cpu_ptr(trace_buf->data, cpu);
1879 	struct trace_array_cpu *max_data = per_cpu_ptr(max_buf->data, cpu);
1880 
1881 	max_buf->cpu = cpu;
1882 	max_buf->time_start = data->preempt_timestamp;
1883 
1884 	max_data->saved_latency = tr->max_latency;
1885 	max_data->critical_start = data->critical_start;
1886 	max_data->critical_end = data->critical_end;
1887 
1888 	strncpy(max_data->comm, tsk->comm, TASK_COMM_LEN);
1889 	max_data->pid = tsk->pid;
1890 	/*
1891 	 * If tsk == current, then use current_uid(), as that does not use
1892 	 * RCU. The irq tracer can be called out of RCU scope.
1893 	 */
1894 	if (tsk == current)
1895 		max_data->uid = current_uid();
1896 	else
1897 		max_data->uid = task_uid(tsk);
1898 
1899 	max_data->nice = tsk->static_prio - 20 - MAX_RT_PRIO;
1900 	max_data->policy = tsk->policy;
1901 	max_data->rt_priority = tsk->rt_priority;
1902 
1903 	/* record this tasks comm */
1904 	tracing_record_cmdline(tsk);
1905 	latency_fsnotify(tr);
1906 }
1907 
1908 /**
1909  * update_max_tr - snapshot all trace buffers from global_trace to max_tr
1910  * @tr: tracer
1911  * @tsk: the task with the latency
1912  * @cpu: The cpu that initiated the trace.
1913  * @cond_data: User data associated with a conditional snapshot
1914  *
1915  * Flip the buffers between the @tr and the max_tr and record information
1916  * about which task was the cause of this latency.
1917  */
1918 void
1919 update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu,
1920 	      void *cond_data)
1921 {
1922 	if (tr->stop_count)
1923 		return;
1924 
1925 	WARN_ON_ONCE(!irqs_disabled());
1926 
1927 	if (!tr->allocated_snapshot) {
1928 		/* Only the nop tracer should hit this when disabling */
1929 		WARN_ON_ONCE(tr->current_trace != &nop_trace);
1930 		return;
1931 	}
1932 
1933 	arch_spin_lock(&tr->max_lock);
1934 
1935 	/* Inherit the recordable setting from array_buffer */
1936 	if (ring_buffer_record_is_set_on(tr->array_buffer.buffer))
1937 		ring_buffer_record_on(tr->max_buffer.buffer);
1938 	else
1939 		ring_buffer_record_off(tr->max_buffer.buffer);
1940 
1941 #ifdef CONFIG_TRACER_SNAPSHOT
1942 	if (tr->cond_snapshot && !tr->cond_snapshot->update(tr, cond_data)) {
1943 		arch_spin_unlock(&tr->max_lock);
1944 		return;
1945 	}
1946 #endif
1947 	swap(tr->array_buffer.buffer, tr->max_buffer.buffer);
1948 
1949 	__update_max_tr(tr, tsk, cpu);
1950 
1951 	arch_spin_unlock(&tr->max_lock);
1952 
1953 	/* Any waiters on the old snapshot buffer need to wake up */
1954 	ring_buffer_wake_waiters(tr->array_buffer.buffer, RING_BUFFER_ALL_CPUS);
1955 }
1956 
1957 /**
1958  * update_max_tr_single - only copy one trace over, and reset the rest
1959  * @tr: tracer
1960  * @tsk: task with the latency
1961  * @cpu: the cpu of the buffer to copy.
1962  *
1963  * Flip the trace of a single CPU buffer between the @tr and the max_tr.
1964  */
1965 void
1966 update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu)
1967 {
1968 	int ret;
1969 
1970 	if (tr->stop_count)
1971 		return;
1972 
1973 	WARN_ON_ONCE(!irqs_disabled());
1974 	if (!tr->allocated_snapshot) {
1975 		/* Only the nop tracer should hit this when disabling */
1976 		WARN_ON_ONCE(tr->current_trace != &nop_trace);
1977 		return;
1978 	}
1979 
1980 	arch_spin_lock(&tr->max_lock);
1981 
1982 	ret = ring_buffer_swap_cpu(tr->max_buffer.buffer, tr->array_buffer.buffer, cpu);
1983 
1984 	if (ret == -EBUSY) {
1985 		/*
1986 		 * We failed to swap the buffer due to a commit taking
1987 		 * place on this CPU. We fail to record, but we reset
1988 		 * the max trace buffer (no one writes directly to it)
1989 		 * and flag that it failed.
1990 		 * Another reason is resize is in progress.
1991 		 */
1992 		trace_array_printk_buf(tr->max_buffer.buffer, _THIS_IP_,
1993 			"Failed to swap buffers due to commit or resize in progress\n");
1994 	}
1995 
1996 	WARN_ON_ONCE(ret && ret != -EAGAIN && ret != -EBUSY);
1997 
1998 	__update_max_tr(tr, tsk, cpu);
1999 	arch_spin_unlock(&tr->max_lock);
2000 }
2001 
2002 #endif /* CONFIG_TRACER_MAX_TRACE */
2003 
2004 struct pipe_wait {
2005 	struct trace_iterator		*iter;
2006 	int				wait_index;
2007 };
2008 
2009 static bool wait_pipe_cond(void *data)
2010 {
2011 	struct pipe_wait *pwait = data;
2012 	struct trace_iterator *iter = pwait->iter;
2013 
2014 	if (atomic_read_acquire(&iter->wait_index) != pwait->wait_index)
2015 		return true;
2016 
2017 	return iter->closed;
2018 }
2019 
2020 static int wait_on_pipe(struct trace_iterator *iter, int full)
2021 {
2022 	struct pipe_wait pwait;
2023 	int ret;
2024 
2025 	/* Iterators are static, they should be filled or empty */
2026 	if (trace_buffer_iter(iter, iter->cpu_file))
2027 		return 0;
2028 
2029 	pwait.wait_index = atomic_read_acquire(&iter->wait_index);
2030 	pwait.iter = iter;
2031 
2032 	ret = ring_buffer_wait(iter->array_buffer->buffer, iter->cpu_file, full,
2033 			       wait_pipe_cond, &pwait);
2034 
2035 #ifdef CONFIG_TRACER_MAX_TRACE
2036 	/*
2037 	 * Make sure this is still the snapshot buffer, as if a snapshot were
2038 	 * to happen, this would now be the main buffer.
2039 	 */
2040 	if (iter->snapshot)
2041 		iter->array_buffer = &iter->tr->max_buffer;
2042 #endif
2043 	return ret;
2044 }
2045 
2046 #ifdef CONFIG_FTRACE_STARTUP_TEST
2047 static bool selftests_can_run;
2048 
2049 struct trace_selftests {
2050 	struct list_head		list;
2051 	struct tracer			*type;
2052 };
2053 
2054 static LIST_HEAD(postponed_selftests);
2055 
2056 static int save_selftest(struct tracer *type)
2057 {
2058 	struct trace_selftests *selftest;
2059 
2060 	selftest = kmalloc(sizeof(*selftest), GFP_KERNEL);
2061 	if (!selftest)
2062 		return -ENOMEM;
2063 
2064 	selftest->type = type;
2065 	list_add(&selftest->list, &postponed_selftests);
2066 	return 0;
2067 }
2068 
2069 static int run_tracer_selftest(struct tracer *type)
2070 {
2071 	struct trace_array *tr = &global_trace;
2072 	struct tracer *saved_tracer = tr->current_trace;
2073 	int ret;
2074 
2075 	if (!type->selftest || tracing_selftest_disabled)
2076 		return 0;
2077 
2078 	/*
2079 	 * If a tracer registers early in boot up (before scheduling is
2080 	 * initialized and such), then do not run its selftests yet.
2081 	 * Instead, run it a little later in the boot process.
2082 	 */
2083 	if (!selftests_can_run)
2084 		return save_selftest(type);
2085 
2086 	if (!tracing_is_on()) {
2087 		pr_warn("Selftest for tracer %s skipped due to tracing disabled\n",
2088 			type->name);
2089 		return 0;
2090 	}
2091 
2092 	/*
2093 	 * Run a selftest on this tracer.
2094 	 * Here we reset the trace buffer, and set the current
2095 	 * tracer to be this tracer. The tracer can then run some
2096 	 * internal tracing to verify that everything is in order.
2097 	 * If we fail, we do not register this tracer.
2098 	 */
2099 	tracing_reset_online_cpus(&tr->array_buffer);
2100 
2101 	tr->current_trace = type;
2102 
2103 #ifdef CONFIG_TRACER_MAX_TRACE
2104 	if (type->use_max_tr) {
2105 		/* If we expanded the buffers, make sure the max is expanded too */
2106 		if (tr->ring_buffer_expanded)
2107 			ring_buffer_resize(tr->max_buffer.buffer, trace_buf_size,
2108 					   RING_BUFFER_ALL_CPUS);
2109 		tr->allocated_snapshot = true;
2110 	}
2111 #endif
2112 
2113 	/* the test is responsible for initializing and enabling */
2114 	pr_info("Testing tracer %s: ", type->name);
2115 	ret = type->selftest(type, tr);
2116 	/* the test is responsible for resetting too */
2117 	tr->current_trace = saved_tracer;
2118 	if (ret) {
2119 		printk(KERN_CONT "FAILED!\n");
2120 		/* Add the warning after printing 'FAILED' */
2121 		WARN_ON(1);
2122 		return -1;
2123 	}
2124 	/* Only reset on passing, to avoid touching corrupted buffers */
2125 	tracing_reset_online_cpus(&tr->array_buffer);
2126 
2127 #ifdef CONFIG_TRACER_MAX_TRACE
2128 	if (type->use_max_tr) {
2129 		tr->allocated_snapshot = false;
2130 
2131 		/* Shrink the max buffer again */
2132 		if (tr->ring_buffer_expanded)
2133 			ring_buffer_resize(tr->max_buffer.buffer, 1,
2134 					   RING_BUFFER_ALL_CPUS);
2135 	}
2136 #endif
2137 
2138 	printk(KERN_CONT "PASSED\n");
2139 	return 0;
2140 }
2141 
2142 static int do_run_tracer_selftest(struct tracer *type)
2143 {
2144 	int ret;
2145 
2146 	/*
2147 	 * Tests can take a long time, especially if they are run one after the
2148 	 * other, as does happen during bootup when all the tracers are
2149 	 * registered. This could cause the soft lockup watchdog to trigger.
2150 	 */
2151 	cond_resched();
2152 
2153 	tracing_selftest_running = true;
2154 	ret = run_tracer_selftest(type);
2155 	tracing_selftest_running = false;
2156 
2157 	return ret;
2158 }
2159 
2160 static __init int init_trace_selftests(void)
2161 {
2162 	struct trace_selftests *p, *n;
2163 	struct tracer *t, **last;
2164 	int ret;
2165 
2166 	selftests_can_run = true;
2167 
2168 	mutex_lock(&trace_types_lock);
2169 
2170 	if (list_empty(&postponed_selftests))
2171 		goto out;
2172 
2173 	pr_info("Running postponed tracer tests:\n");
2174 
2175 	tracing_selftest_running = true;
2176 	list_for_each_entry_safe(p, n, &postponed_selftests, list) {
2177 		/* This loop can take minutes when sanitizers are enabled, so
2178 		 * lets make sure we allow RCU processing.
2179 		 */
2180 		cond_resched();
2181 		ret = run_tracer_selftest(p->type);
2182 		/* If the test fails, then warn and remove from available_tracers */
2183 		if (ret < 0) {
2184 			WARN(1, "tracer: %s failed selftest, disabling\n",
2185 			     p->type->name);
2186 			last = &trace_types;
2187 			for (t = trace_types; t; t = t->next) {
2188 				if (t == p->type) {
2189 					*last = t->next;
2190 					break;
2191 				}
2192 				last = &t->next;
2193 			}
2194 		}
2195 		list_del(&p->list);
2196 		kfree(p);
2197 	}
2198 	tracing_selftest_running = false;
2199 
2200  out:
2201 	mutex_unlock(&trace_types_lock);
2202 
2203 	return 0;
2204 }
2205 core_initcall(init_trace_selftests);
2206 #else
2207 static inline int run_tracer_selftest(struct tracer *type)
2208 {
2209 	return 0;
2210 }
2211 static inline int do_run_tracer_selftest(struct tracer *type)
2212 {
2213 	return 0;
2214 }
2215 #endif /* CONFIG_FTRACE_STARTUP_TEST */
2216 
2217 static void add_tracer_options(struct trace_array *tr, struct tracer *t);
2218 
2219 static void __init apply_trace_boot_options(void);
2220 
2221 /**
2222  * register_tracer - register a tracer with the ftrace system.
2223  * @type: the plugin for the tracer
2224  *
2225  * Register a new plugin tracer.
2226  */
2227 int __init register_tracer(struct tracer *type)
2228 {
2229 	struct tracer *t;
2230 	int ret = 0;
2231 
2232 	if (!type->name) {
2233 		pr_info("Tracer must have a name\n");
2234 		return -1;
2235 	}
2236 
2237 	if (strlen(type->name) >= MAX_TRACER_SIZE) {
2238 		pr_info("Tracer has a name longer than %d\n", MAX_TRACER_SIZE);
2239 		return -1;
2240 	}
2241 
2242 	if (security_locked_down(LOCKDOWN_TRACEFS)) {
2243 		pr_warn("Can not register tracer %s due to lockdown\n",
2244 			   type->name);
2245 		return -EPERM;
2246 	}
2247 
2248 	mutex_lock(&trace_types_lock);
2249 
2250 	for (t = trace_types; t; t = t->next) {
2251 		if (strcmp(type->name, t->name) == 0) {
2252 			/* already found */
2253 			pr_info("Tracer %s already registered\n",
2254 				type->name);
2255 			ret = -1;
2256 			goto out;
2257 		}
2258 	}
2259 
2260 	if (!type->set_flag)
2261 		type->set_flag = &dummy_set_flag;
2262 	if (!type->flags) {
2263 		/*allocate a dummy tracer_flags*/
2264 		type->flags = kmalloc(sizeof(*type->flags), GFP_KERNEL);
2265 		if (!type->flags) {
2266 			ret = -ENOMEM;
2267 			goto out;
2268 		}
2269 		type->flags->val = 0;
2270 		type->flags->opts = dummy_tracer_opt;
2271 	} else
2272 		if (!type->flags->opts)
2273 			type->flags->opts = dummy_tracer_opt;
2274 
2275 	/* store the tracer for __set_tracer_option */
2276 	type->flags->trace = type;
2277 
2278 	ret = do_run_tracer_selftest(type);
2279 	if (ret < 0)
2280 		goto out;
2281 
2282 	type->next = trace_types;
2283 	trace_types = type;
2284 	add_tracer_options(&global_trace, type);
2285 
2286  out:
2287 	mutex_unlock(&trace_types_lock);
2288 
2289 	if (ret || !default_bootup_tracer)
2290 		goto out_unlock;
2291 
2292 	if (strncmp(default_bootup_tracer, type->name, MAX_TRACER_SIZE))
2293 		goto out_unlock;
2294 
2295 	printk(KERN_INFO "Starting tracer '%s'\n", type->name);
2296 	/* Do we want this tracer to start on bootup? */
2297 	tracing_set_tracer(&global_trace, type->name);
2298 	default_bootup_tracer = NULL;
2299 
2300 	apply_trace_boot_options();
2301 
2302 	/* disable other selftests, since this will break it. */
2303 	disable_tracing_selftest("running a tracer");
2304 
2305  out_unlock:
2306 	return ret;
2307 }
2308 
2309 static void tracing_reset_cpu(struct array_buffer *buf, int cpu)
2310 {
2311 	struct trace_buffer *buffer = buf->buffer;
2312 
2313 	if (!buffer)
2314 		return;
2315 
2316 	ring_buffer_record_disable(buffer);
2317 
2318 	/* Make sure all commits have finished */
2319 	synchronize_rcu();
2320 	ring_buffer_reset_cpu(buffer, cpu);
2321 
2322 	ring_buffer_record_enable(buffer);
2323 }
2324 
2325 void tracing_reset_online_cpus(struct array_buffer *buf)
2326 {
2327 	struct trace_buffer *buffer = buf->buffer;
2328 
2329 	if (!buffer)
2330 		return;
2331 
2332 	ring_buffer_record_disable(buffer);
2333 
2334 	/* Make sure all commits have finished */
2335 	synchronize_rcu();
2336 
2337 	buf->time_start = buffer_ftrace_now(buf, buf->cpu);
2338 
2339 	ring_buffer_reset_online_cpus(buffer);
2340 
2341 	ring_buffer_record_enable(buffer);
2342 }
2343 
2344 /* Must have trace_types_lock held */
2345 void tracing_reset_all_online_cpus_unlocked(void)
2346 {
2347 	struct trace_array *tr;
2348 
2349 	lockdep_assert_held(&trace_types_lock);
2350 
2351 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
2352 		if (!tr->clear_trace)
2353 			continue;
2354 		tr->clear_trace = false;
2355 		tracing_reset_online_cpus(&tr->array_buffer);
2356 #ifdef CONFIG_TRACER_MAX_TRACE
2357 		tracing_reset_online_cpus(&tr->max_buffer);
2358 #endif
2359 	}
2360 }
2361 
2362 void tracing_reset_all_online_cpus(void)
2363 {
2364 	mutex_lock(&trace_types_lock);
2365 	tracing_reset_all_online_cpus_unlocked();
2366 	mutex_unlock(&trace_types_lock);
2367 }
2368 
2369 int is_tracing_stopped(void)
2370 {
2371 	return global_trace.stop_count;
2372 }
2373 
2374 static void tracing_start_tr(struct trace_array *tr)
2375 {
2376 	struct trace_buffer *buffer;
2377 	unsigned long flags;
2378 
2379 	if (tracing_disabled)
2380 		return;
2381 
2382 	raw_spin_lock_irqsave(&tr->start_lock, flags);
2383 	if (--tr->stop_count) {
2384 		if (WARN_ON_ONCE(tr->stop_count < 0)) {
2385 			/* Someone screwed up their debugging */
2386 			tr->stop_count = 0;
2387 		}
2388 		goto out;
2389 	}
2390 
2391 	/* Prevent the buffers from switching */
2392 	arch_spin_lock(&tr->max_lock);
2393 
2394 	buffer = tr->array_buffer.buffer;
2395 	if (buffer)
2396 		ring_buffer_record_enable(buffer);
2397 
2398 #ifdef CONFIG_TRACER_MAX_TRACE
2399 	buffer = tr->max_buffer.buffer;
2400 	if (buffer)
2401 		ring_buffer_record_enable(buffer);
2402 #endif
2403 
2404 	arch_spin_unlock(&tr->max_lock);
2405 
2406  out:
2407 	raw_spin_unlock_irqrestore(&tr->start_lock, flags);
2408 }
2409 
2410 /**
2411  * tracing_start - quick start of the tracer
2412  *
2413  * If tracing is enabled but was stopped by tracing_stop,
2414  * this will start the tracer back up.
2415  */
2416 void tracing_start(void)
2417 
2418 {
2419 	return tracing_start_tr(&global_trace);
2420 }
2421 
2422 static void tracing_stop_tr(struct trace_array *tr)
2423 {
2424 	struct trace_buffer *buffer;
2425 	unsigned long flags;
2426 
2427 	raw_spin_lock_irqsave(&tr->start_lock, flags);
2428 	if (tr->stop_count++)
2429 		goto out;
2430 
2431 	/* Prevent the buffers from switching */
2432 	arch_spin_lock(&tr->max_lock);
2433 
2434 	buffer = tr->array_buffer.buffer;
2435 	if (buffer)
2436 		ring_buffer_record_disable(buffer);
2437 
2438 #ifdef CONFIG_TRACER_MAX_TRACE
2439 	buffer = tr->max_buffer.buffer;
2440 	if (buffer)
2441 		ring_buffer_record_disable(buffer);
2442 #endif
2443 
2444 	arch_spin_unlock(&tr->max_lock);
2445 
2446  out:
2447 	raw_spin_unlock_irqrestore(&tr->start_lock, flags);
2448 }
2449 
2450 /**
2451  * tracing_stop - quick stop of the tracer
2452  *
2453  * Light weight way to stop tracing. Use in conjunction with
2454  * tracing_start.
2455  */
2456 void tracing_stop(void)
2457 {
2458 	return tracing_stop_tr(&global_trace);
2459 }
2460 
2461 /*
2462  * Several functions return TRACE_TYPE_PARTIAL_LINE if the trace_seq
2463  * overflowed, and TRACE_TYPE_HANDLED otherwise. This helper function
2464  * simplifies those functions and keeps them in sync.
2465  */
2466 enum print_line_t trace_handle_return(struct trace_seq *s)
2467 {
2468 	return trace_seq_has_overflowed(s) ?
2469 		TRACE_TYPE_PARTIAL_LINE : TRACE_TYPE_HANDLED;
2470 }
2471 EXPORT_SYMBOL_GPL(trace_handle_return);
2472 
2473 static unsigned short migration_disable_value(void)
2474 {
2475 #if defined(CONFIG_SMP)
2476 	return current->migration_disabled;
2477 #else
2478 	return 0;
2479 #endif
2480 }
2481 
2482 unsigned int tracing_gen_ctx_irq_test(unsigned int irqs_status)
2483 {
2484 	unsigned int trace_flags = irqs_status;
2485 	unsigned int pc;
2486 
2487 	pc = preempt_count();
2488 
2489 	if (pc & NMI_MASK)
2490 		trace_flags |= TRACE_FLAG_NMI;
2491 	if (pc & HARDIRQ_MASK)
2492 		trace_flags |= TRACE_FLAG_HARDIRQ;
2493 	if (in_serving_softirq())
2494 		trace_flags |= TRACE_FLAG_SOFTIRQ;
2495 	if (softirq_count() >> (SOFTIRQ_SHIFT + 1))
2496 		trace_flags |= TRACE_FLAG_BH_OFF;
2497 
2498 	if (tif_need_resched())
2499 		trace_flags |= TRACE_FLAG_NEED_RESCHED;
2500 	if (test_preempt_need_resched())
2501 		trace_flags |= TRACE_FLAG_PREEMPT_RESCHED;
2502 	return (trace_flags << 16) | (min_t(unsigned int, pc & 0xff, 0xf)) |
2503 		(min_t(unsigned int, migration_disable_value(), 0xf)) << 4;
2504 }
2505 
2506 struct ring_buffer_event *
2507 trace_buffer_lock_reserve(struct trace_buffer *buffer,
2508 			  int type,
2509 			  unsigned long len,
2510 			  unsigned int trace_ctx)
2511 {
2512 	return __trace_buffer_lock_reserve(buffer, type, len, trace_ctx);
2513 }
2514 
2515 DEFINE_PER_CPU(struct ring_buffer_event *, trace_buffered_event);
2516 DEFINE_PER_CPU(int, trace_buffered_event_cnt);
2517 static int trace_buffered_event_ref;
2518 
2519 /**
2520  * trace_buffered_event_enable - enable buffering events
2521  *
2522  * When events are being filtered, it is quicker to use a temporary
2523  * buffer to write the event data into if there's a likely chance
2524  * that it will not be committed. The discard of the ring buffer
2525  * is not as fast as committing, and is much slower than copying
2526  * a commit.
2527  *
2528  * When an event is to be filtered, allocate per cpu buffers to
2529  * write the event data into, and if the event is filtered and discarded
2530  * it is simply dropped, otherwise, the entire data is to be committed
2531  * in one shot.
2532  */
2533 void trace_buffered_event_enable(void)
2534 {
2535 	struct ring_buffer_event *event;
2536 	struct page *page;
2537 	int cpu;
2538 
2539 	WARN_ON_ONCE(!mutex_is_locked(&event_mutex));
2540 
2541 	if (trace_buffered_event_ref++)
2542 		return;
2543 
2544 	for_each_tracing_cpu(cpu) {
2545 		page = alloc_pages_node(cpu_to_node(cpu),
2546 					GFP_KERNEL | __GFP_NORETRY, 0);
2547 		/* This is just an optimization and can handle failures */
2548 		if (!page) {
2549 			pr_err("Failed to allocate event buffer\n");
2550 			break;
2551 		}
2552 
2553 		event = page_address(page);
2554 		memset(event, 0, sizeof(*event));
2555 
2556 		per_cpu(trace_buffered_event, cpu) = event;
2557 
2558 		preempt_disable();
2559 		if (cpu == smp_processor_id() &&
2560 		    __this_cpu_read(trace_buffered_event) !=
2561 		    per_cpu(trace_buffered_event, cpu))
2562 			WARN_ON_ONCE(1);
2563 		preempt_enable();
2564 	}
2565 }
2566 
2567 static void enable_trace_buffered_event(void *data)
2568 {
2569 	/* Probably not needed, but do it anyway */
2570 	smp_rmb();
2571 	this_cpu_dec(trace_buffered_event_cnt);
2572 }
2573 
2574 static void disable_trace_buffered_event(void *data)
2575 {
2576 	this_cpu_inc(trace_buffered_event_cnt);
2577 }
2578 
2579 /**
2580  * trace_buffered_event_disable - disable buffering events
2581  *
2582  * When a filter is removed, it is faster to not use the buffered
2583  * events, and to commit directly into the ring buffer. Free up
2584  * the temp buffers when there are no more users. This requires
2585  * special synchronization with current events.
2586  */
2587 void trace_buffered_event_disable(void)
2588 {
2589 	int cpu;
2590 
2591 	WARN_ON_ONCE(!mutex_is_locked(&event_mutex));
2592 
2593 	if (WARN_ON_ONCE(!trace_buffered_event_ref))
2594 		return;
2595 
2596 	if (--trace_buffered_event_ref)
2597 		return;
2598 
2599 	/* For each CPU, set the buffer as used. */
2600 	on_each_cpu_mask(tracing_buffer_mask, disable_trace_buffered_event,
2601 			 NULL, true);
2602 
2603 	/* Wait for all current users to finish */
2604 	synchronize_rcu();
2605 
2606 	for_each_tracing_cpu(cpu) {
2607 		free_page((unsigned long)per_cpu(trace_buffered_event, cpu));
2608 		per_cpu(trace_buffered_event, cpu) = NULL;
2609 	}
2610 
2611 	/*
2612 	 * Wait for all CPUs that potentially started checking if they can use
2613 	 * their event buffer only after the previous synchronize_rcu() call and
2614 	 * they still read a valid pointer from trace_buffered_event. It must be
2615 	 * ensured they don't see cleared trace_buffered_event_cnt else they
2616 	 * could wrongly decide to use the pointed-to buffer which is now freed.
2617 	 */
2618 	synchronize_rcu();
2619 
2620 	/* For each CPU, relinquish the buffer */
2621 	on_each_cpu_mask(tracing_buffer_mask, enable_trace_buffered_event, NULL,
2622 			 true);
2623 }
2624 
2625 static struct trace_buffer *temp_buffer;
2626 
2627 struct ring_buffer_event *
2628 trace_event_buffer_lock_reserve(struct trace_buffer **current_rb,
2629 			  struct trace_event_file *trace_file,
2630 			  int type, unsigned long len,
2631 			  unsigned int trace_ctx)
2632 {
2633 	struct ring_buffer_event *entry;
2634 	struct trace_array *tr = trace_file->tr;
2635 	int val;
2636 
2637 	*current_rb = tr->array_buffer.buffer;
2638 
2639 	if (!tr->no_filter_buffering_ref &&
2640 	    (trace_file->flags & (EVENT_FILE_FL_SOFT_DISABLED | EVENT_FILE_FL_FILTERED))) {
2641 		preempt_disable_notrace();
2642 		/*
2643 		 * Filtering is on, so try to use the per cpu buffer first.
2644 		 * This buffer will simulate a ring_buffer_event,
2645 		 * where the type_len is zero and the array[0] will
2646 		 * hold the full length.
2647 		 * (see include/linux/ring-buffer.h for details on
2648 		 *  how the ring_buffer_event is structured).
2649 		 *
2650 		 * Using a temp buffer during filtering and copying it
2651 		 * on a matched filter is quicker than writing directly
2652 		 * into the ring buffer and then discarding it when
2653 		 * it doesn't match. That is because the discard
2654 		 * requires several atomic operations to get right.
2655 		 * Copying on match and doing nothing on a failed match
2656 		 * is still quicker than no copy on match, but having
2657 		 * to discard out of the ring buffer on a failed match.
2658 		 */
2659 		if ((entry = __this_cpu_read(trace_buffered_event))) {
2660 			int max_len = PAGE_SIZE - struct_size(entry, array, 1);
2661 
2662 			val = this_cpu_inc_return(trace_buffered_event_cnt);
2663 
2664 			/*
2665 			 * Preemption is disabled, but interrupts and NMIs
2666 			 * can still come in now. If that happens after
2667 			 * the above increment, then it will have to go
2668 			 * back to the old method of allocating the event
2669 			 * on the ring buffer, and if the filter fails, it
2670 			 * will have to call ring_buffer_discard_commit()
2671 			 * to remove it.
2672 			 *
2673 			 * Need to also check the unlikely case that the
2674 			 * length is bigger than the temp buffer size.
2675 			 * If that happens, then the reserve is pretty much
2676 			 * guaranteed to fail, as the ring buffer currently
2677 			 * only allows events less than a page. But that may
2678 			 * change in the future, so let the ring buffer reserve
2679 			 * handle the failure in that case.
2680 			 */
2681 			if (val == 1 && likely(len <= max_len)) {
2682 				trace_event_setup(entry, type, trace_ctx);
2683 				entry->array[0] = len;
2684 				/* Return with preemption disabled */
2685 				return entry;
2686 			}
2687 			this_cpu_dec(trace_buffered_event_cnt);
2688 		}
2689 		/* __trace_buffer_lock_reserve() disables preemption */
2690 		preempt_enable_notrace();
2691 	}
2692 
2693 	entry = __trace_buffer_lock_reserve(*current_rb, type, len,
2694 					    trace_ctx);
2695 	/*
2696 	 * If tracing is off, but we have triggers enabled
2697 	 * we still need to look at the event data. Use the temp_buffer
2698 	 * to store the trace event for the trigger to use. It's recursive
2699 	 * safe and will not be recorded anywhere.
2700 	 */
2701 	if (!entry && trace_file->flags & EVENT_FILE_FL_TRIGGER_COND) {
2702 		*current_rb = temp_buffer;
2703 		entry = __trace_buffer_lock_reserve(*current_rb, type, len,
2704 						    trace_ctx);
2705 	}
2706 	return entry;
2707 }
2708 EXPORT_SYMBOL_GPL(trace_event_buffer_lock_reserve);
2709 
2710 static DEFINE_RAW_SPINLOCK(tracepoint_iter_lock);
2711 static DEFINE_MUTEX(tracepoint_printk_mutex);
2712 
2713 static void output_printk(struct trace_event_buffer *fbuffer)
2714 {
2715 	struct trace_event_call *event_call;
2716 	struct trace_event_file *file;
2717 	struct trace_event *event;
2718 	unsigned long flags;
2719 	struct trace_iterator *iter = tracepoint_print_iter;
2720 
2721 	/* We should never get here if iter is NULL */
2722 	if (WARN_ON_ONCE(!iter))
2723 		return;
2724 
2725 	event_call = fbuffer->trace_file->event_call;
2726 	if (!event_call || !event_call->event.funcs ||
2727 	    !event_call->event.funcs->trace)
2728 		return;
2729 
2730 	file = fbuffer->trace_file;
2731 	if (test_bit(EVENT_FILE_FL_SOFT_DISABLED_BIT, &file->flags) ||
2732 	    (unlikely(file->flags & EVENT_FILE_FL_FILTERED) &&
2733 	     !filter_match_preds(file->filter, fbuffer->entry)))
2734 		return;
2735 
2736 	event = &fbuffer->trace_file->event_call->event;
2737 
2738 	raw_spin_lock_irqsave(&tracepoint_iter_lock, flags);
2739 	trace_seq_init(&iter->seq);
2740 	iter->ent = fbuffer->entry;
2741 	event_call->event.funcs->trace(iter, 0, event);
2742 	trace_seq_putc(&iter->seq, 0);
2743 	printk("%s", iter->seq.buffer);
2744 
2745 	raw_spin_unlock_irqrestore(&tracepoint_iter_lock, flags);
2746 }
2747 
2748 int tracepoint_printk_sysctl(struct ctl_table *table, int write,
2749 			     void *buffer, size_t *lenp,
2750 			     loff_t *ppos)
2751 {
2752 	int save_tracepoint_printk;
2753 	int ret;
2754 
2755 	mutex_lock(&tracepoint_printk_mutex);
2756 	save_tracepoint_printk = tracepoint_printk;
2757 
2758 	ret = proc_dointvec(table, write, buffer, lenp, ppos);
2759 
2760 	/*
2761 	 * This will force exiting early, as tracepoint_printk
2762 	 * is always zero when tracepoint_printk_iter is not allocated
2763 	 */
2764 	if (!tracepoint_print_iter)
2765 		tracepoint_printk = 0;
2766 
2767 	if (save_tracepoint_printk == tracepoint_printk)
2768 		goto out;
2769 
2770 	if (tracepoint_printk)
2771 		static_key_enable(&tracepoint_printk_key.key);
2772 	else
2773 		static_key_disable(&tracepoint_printk_key.key);
2774 
2775  out:
2776 	mutex_unlock(&tracepoint_printk_mutex);
2777 
2778 	return ret;
2779 }
2780 
2781 void trace_event_buffer_commit(struct trace_event_buffer *fbuffer)
2782 {
2783 	enum event_trigger_type tt = ETT_NONE;
2784 	struct trace_event_file *file = fbuffer->trace_file;
2785 
2786 	if (__event_trigger_test_discard(file, fbuffer->buffer, fbuffer->event,
2787 			fbuffer->entry, &tt))
2788 		goto discard;
2789 
2790 	if (static_key_false(&tracepoint_printk_key.key))
2791 		output_printk(fbuffer);
2792 
2793 	if (static_branch_unlikely(&trace_event_exports_enabled))
2794 		ftrace_exports(fbuffer->event, TRACE_EXPORT_EVENT);
2795 
2796 	trace_buffer_unlock_commit_regs(file->tr, fbuffer->buffer,
2797 			fbuffer->event, fbuffer->trace_ctx, fbuffer->regs);
2798 
2799 discard:
2800 	if (tt)
2801 		event_triggers_post_call(file, tt);
2802 
2803 }
2804 EXPORT_SYMBOL_GPL(trace_event_buffer_commit);
2805 
2806 /*
2807  * Skip 3:
2808  *
2809  *   trace_buffer_unlock_commit_regs()
2810  *   trace_event_buffer_commit()
2811  *   trace_event_raw_event_xxx()
2812  */
2813 # define STACK_SKIP 3
2814 
2815 void trace_buffer_unlock_commit_regs(struct trace_array *tr,
2816 				     struct trace_buffer *buffer,
2817 				     struct ring_buffer_event *event,
2818 				     unsigned int trace_ctx,
2819 				     struct pt_regs *regs)
2820 {
2821 	__buffer_unlock_commit(buffer, event);
2822 
2823 	/*
2824 	 * If regs is not set, then skip the necessary functions.
2825 	 * Note, we can still get here via blktrace, wakeup tracer
2826 	 * and mmiotrace, but that's ok if they lose a function or
2827 	 * two. They are not that meaningful.
2828 	 */
2829 	ftrace_trace_stack(tr, buffer, trace_ctx, regs ? 0 : STACK_SKIP, regs);
2830 	ftrace_trace_userstack(tr, buffer, trace_ctx);
2831 }
2832 
2833 /*
2834  * Similar to trace_buffer_unlock_commit_regs() but do not dump stack.
2835  */
2836 void
2837 trace_buffer_unlock_commit_nostack(struct trace_buffer *buffer,
2838 				   struct ring_buffer_event *event)
2839 {
2840 	__buffer_unlock_commit(buffer, event);
2841 }
2842 
2843 void
2844 trace_function(struct trace_array *tr, unsigned long ip, unsigned long
2845 	       parent_ip, unsigned int trace_ctx)
2846 {
2847 	struct trace_event_call *call = &event_function;
2848 	struct trace_buffer *buffer = tr->array_buffer.buffer;
2849 	struct ring_buffer_event *event;
2850 	struct ftrace_entry *entry;
2851 
2852 	event = __trace_buffer_lock_reserve(buffer, TRACE_FN, sizeof(*entry),
2853 					    trace_ctx);
2854 	if (!event)
2855 		return;
2856 	entry	= ring_buffer_event_data(event);
2857 	entry->ip			= ip;
2858 	entry->parent_ip		= parent_ip;
2859 
2860 	if (!call_filter_check_discard(call, entry, buffer, event)) {
2861 		if (static_branch_unlikely(&trace_function_exports_enabled))
2862 			ftrace_exports(event, TRACE_EXPORT_FUNCTION);
2863 		__buffer_unlock_commit(buffer, event);
2864 	}
2865 }
2866 
2867 #ifdef CONFIG_STACKTRACE
2868 
2869 /* Allow 4 levels of nesting: normal, softirq, irq, NMI */
2870 #define FTRACE_KSTACK_NESTING	4
2871 
2872 #define FTRACE_KSTACK_ENTRIES	(PAGE_SIZE / FTRACE_KSTACK_NESTING)
2873 
2874 struct ftrace_stack {
2875 	unsigned long		calls[FTRACE_KSTACK_ENTRIES];
2876 };
2877 
2878 
2879 struct ftrace_stacks {
2880 	struct ftrace_stack	stacks[FTRACE_KSTACK_NESTING];
2881 };
2882 
2883 static DEFINE_PER_CPU(struct ftrace_stacks, ftrace_stacks);
2884 static DEFINE_PER_CPU(int, ftrace_stack_reserve);
2885 
2886 static void __ftrace_trace_stack(struct trace_buffer *buffer,
2887 				 unsigned int trace_ctx,
2888 				 int skip, struct pt_regs *regs)
2889 {
2890 	struct trace_event_call *call = &event_kernel_stack;
2891 	struct ring_buffer_event *event;
2892 	unsigned int size, nr_entries;
2893 	struct ftrace_stack *fstack;
2894 	struct stack_entry *entry;
2895 	int stackidx;
2896 
2897 	/*
2898 	 * Add one, for this function and the call to save_stack_trace()
2899 	 * If regs is set, then these functions will not be in the way.
2900 	 */
2901 #ifndef CONFIG_UNWINDER_ORC
2902 	if (!regs)
2903 		skip++;
2904 #endif
2905 
2906 	preempt_disable_notrace();
2907 
2908 	stackidx = __this_cpu_inc_return(ftrace_stack_reserve) - 1;
2909 
2910 	/* This should never happen. If it does, yell once and skip */
2911 	if (WARN_ON_ONCE(stackidx >= FTRACE_KSTACK_NESTING))
2912 		goto out;
2913 
2914 	/*
2915 	 * The above __this_cpu_inc_return() is 'atomic' cpu local. An
2916 	 * interrupt will either see the value pre increment or post
2917 	 * increment. If the interrupt happens pre increment it will have
2918 	 * restored the counter when it returns.  We just need a barrier to
2919 	 * keep gcc from moving things around.
2920 	 */
2921 	barrier();
2922 
2923 	fstack = this_cpu_ptr(ftrace_stacks.stacks) + stackidx;
2924 	size = ARRAY_SIZE(fstack->calls);
2925 
2926 	if (regs) {
2927 		nr_entries = stack_trace_save_regs(regs, fstack->calls,
2928 						   size, skip);
2929 	} else {
2930 		nr_entries = stack_trace_save(fstack->calls, size, skip);
2931 	}
2932 
2933 	event = __trace_buffer_lock_reserve(buffer, TRACE_STACK,
2934 				    struct_size(entry, caller, nr_entries),
2935 				    trace_ctx);
2936 	if (!event)
2937 		goto out;
2938 	entry = ring_buffer_event_data(event);
2939 
2940 	entry->size = nr_entries;
2941 	memcpy(&entry->caller, fstack->calls,
2942 	       flex_array_size(entry, caller, nr_entries));
2943 
2944 	if (!call_filter_check_discard(call, entry, buffer, event))
2945 		__buffer_unlock_commit(buffer, event);
2946 
2947  out:
2948 	/* Again, don't let gcc optimize things here */
2949 	barrier();
2950 	__this_cpu_dec(ftrace_stack_reserve);
2951 	preempt_enable_notrace();
2952 
2953 }
2954 
2955 static inline void ftrace_trace_stack(struct trace_array *tr,
2956 				      struct trace_buffer *buffer,
2957 				      unsigned int trace_ctx,
2958 				      int skip, struct pt_regs *regs)
2959 {
2960 	if (!(tr->trace_flags & TRACE_ITER_STACKTRACE))
2961 		return;
2962 
2963 	__ftrace_trace_stack(buffer, trace_ctx, skip, regs);
2964 }
2965 
2966 void __trace_stack(struct trace_array *tr, unsigned int trace_ctx,
2967 		   int skip)
2968 {
2969 	struct trace_buffer *buffer = tr->array_buffer.buffer;
2970 
2971 	if (rcu_is_watching()) {
2972 		__ftrace_trace_stack(buffer, trace_ctx, skip, NULL);
2973 		return;
2974 	}
2975 
2976 	if (WARN_ON_ONCE(IS_ENABLED(CONFIG_GENERIC_ENTRY)))
2977 		return;
2978 
2979 	/*
2980 	 * When an NMI triggers, RCU is enabled via ct_nmi_enter(),
2981 	 * but if the above rcu_is_watching() failed, then the NMI
2982 	 * triggered someplace critical, and ct_irq_enter() should
2983 	 * not be called from NMI.
2984 	 */
2985 	if (unlikely(in_nmi()))
2986 		return;
2987 
2988 	ct_irq_enter_irqson();
2989 	__ftrace_trace_stack(buffer, trace_ctx, skip, NULL);
2990 	ct_irq_exit_irqson();
2991 }
2992 
2993 /**
2994  * trace_dump_stack - record a stack back trace in the trace buffer
2995  * @skip: Number of functions to skip (helper handlers)
2996  */
2997 void trace_dump_stack(int skip)
2998 {
2999 	if (tracing_disabled || tracing_selftest_running)
3000 		return;
3001 
3002 #ifndef CONFIG_UNWINDER_ORC
3003 	/* Skip 1 to skip this function. */
3004 	skip++;
3005 #endif
3006 	__ftrace_trace_stack(global_trace.array_buffer.buffer,
3007 			     tracing_gen_ctx(), skip, NULL);
3008 }
3009 EXPORT_SYMBOL_GPL(trace_dump_stack);
3010 
3011 #ifdef CONFIG_USER_STACKTRACE_SUPPORT
3012 static DEFINE_PER_CPU(int, user_stack_count);
3013 
3014 static void
3015 ftrace_trace_userstack(struct trace_array *tr,
3016 		       struct trace_buffer *buffer, unsigned int trace_ctx)
3017 {
3018 	struct trace_event_call *call = &event_user_stack;
3019 	struct ring_buffer_event *event;
3020 	struct userstack_entry *entry;
3021 
3022 	if (!(tr->trace_flags & TRACE_ITER_USERSTACKTRACE))
3023 		return;
3024 
3025 	/*
3026 	 * NMIs can not handle page faults, even with fix ups.
3027 	 * The save user stack can (and often does) fault.
3028 	 */
3029 	if (unlikely(in_nmi()))
3030 		return;
3031 
3032 	/*
3033 	 * prevent recursion, since the user stack tracing may
3034 	 * trigger other kernel events.
3035 	 */
3036 	preempt_disable();
3037 	if (__this_cpu_read(user_stack_count))
3038 		goto out;
3039 
3040 	__this_cpu_inc(user_stack_count);
3041 
3042 	event = __trace_buffer_lock_reserve(buffer, TRACE_USER_STACK,
3043 					    sizeof(*entry), trace_ctx);
3044 	if (!event)
3045 		goto out_drop_count;
3046 	entry	= ring_buffer_event_data(event);
3047 
3048 	entry->tgid		= current->tgid;
3049 	memset(&entry->caller, 0, sizeof(entry->caller));
3050 
3051 	stack_trace_save_user(entry->caller, FTRACE_STACK_ENTRIES);
3052 	if (!call_filter_check_discard(call, entry, buffer, event))
3053 		__buffer_unlock_commit(buffer, event);
3054 
3055  out_drop_count:
3056 	__this_cpu_dec(user_stack_count);
3057  out:
3058 	preempt_enable();
3059 }
3060 #else /* CONFIG_USER_STACKTRACE_SUPPORT */
3061 static void ftrace_trace_userstack(struct trace_array *tr,
3062 				   struct trace_buffer *buffer,
3063 				   unsigned int trace_ctx)
3064 {
3065 }
3066 #endif /* !CONFIG_USER_STACKTRACE_SUPPORT */
3067 
3068 #endif /* CONFIG_STACKTRACE */
3069 
3070 static inline void
3071 func_repeats_set_delta_ts(struct func_repeats_entry *entry,
3072 			  unsigned long long delta)
3073 {
3074 	entry->bottom_delta_ts = delta & U32_MAX;
3075 	entry->top_delta_ts = (delta >> 32);
3076 }
3077 
3078 void trace_last_func_repeats(struct trace_array *tr,
3079 			     struct trace_func_repeats *last_info,
3080 			     unsigned int trace_ctx)
3081 {
3082 	struct trace_buffer *buffer = tr->array_buffer.buffer;
3083 	struct func_repeats_entry *entry;
3084 	struct ring_buffer_event *event;
3085 	u64 delta;
3086 
3087 	event = __trace_buffer_lock_reserve(buffer, TRACE_FUNC_REPEATS,
3088 					    sizeof(*entry), trace_ctx);
3089 	if (!event)
3090 		return;
3091 
3092 	delta = ring_buffer_event_time_stamp(buffer, event) -
3093 		last_info->ts_last_call;
3094 
3095 	entry = ring_buffer_event_data(event);
3096 	entry->ip = last_info->ip;
3097 	entry->parent_ip = last_info->parent_ip;
3098 	entry->count = last_info->count;
3099 	func_repeats_set_delta_ts(entry, delta);
3100 
3101 	__buffer_unlock_commit(buffer, event);
3102 }
3103 
3104 /* created for use with alloc_percpu */
3105 struct trace_buffer_struct {
3106 	int nesting;
3107 	char buffer[4][TRACE_BUF_SIZE];
3108 };
3109 
3110 static struct trace_buffer_struct __percpu *trace_percpu_buffer;
3111 
3112 /*
3113  * This allows for lockless recording.  If we're nested too deeply, then
3114  * this returns NULL.
3115  */
3116 static char *get_trace_buf(void)
3117 {
3118 	struct trace_buffer_struct *buffer = this_cpu_ptr(trace_percpu_buffer);
3119 
3120 	if (!trace_percpu_buffer || buffer->nesting >= 4)
3121 		return NULL;
3122 
3123 	buffer->nesting++;
3124 
3125 	/* Interrupts must see nesting incremented before we use the buffer */
3126 	barrier();
3127 	return &buffer->buffer[buffer->nesting - 1][0];
3128 }
3129 
3130 static void put_trace_buf(void)
3131 {
3132 	/* Don't let the decrement of nesting leak before this */
3133 	barrier();
3134 	this_cpu_dec(trace_percpu_buffer->nesting);
3135 }
3136 
3137 static int alloc_percpu_trace_buffer(void)
3138 {
3139 	struct trace_buffer_struct __percpu *buffers;
3140 
3141 	if (trace_percpu_buffer)
3142 		return 0;
3143 
3144 	buffers = alloc_percpu(struct trace_buffer_struct);
3145 	if (MEM_FAIL(!buffers, "Could not allocate percpu trace_printk buffer"))
3146 		return -ENOMEM;
3147 
3148 	trace_percpu_buffer = buffers;
3149 	return 0;
3150 }
3151 
3152 static int buffers_allocated;
3153 
3154 void trace_printk_init_buffers(void)
3155 {
3156 	if (buffers_allocated)
3157 		return;
3158 
3159 	if (alloc_percpu_trace_buffer())
3160 		return;
3161 
3162 	/* trace_printk() is for debug use only. Don't use it in production. */
3163 
3164 	pr_warn("\n");
3165 	pr_warn("**********************************************************\n");
3166 	pr_warn("**   NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE   **\n");
3167 	pr_warn("**                                                      **\n");
3168 	pr_warn("** trace_printk() being used. Allocating extra memory.  **\n");
3169 	pr_warn("**                                                      **\n");
3170 	pr_warn("** This means that this is a DEBUG kernel and it is     **\n");
3171 	pr_warn("** unsafe for production use.                           **\n");
3172 	pr_warn("**                                                      **\n");
3173 	pr_warn("** If you see this message and you are not debugging    **\n");
3174 	pr_warn("** the kernel, report this immediately to your vendor!  **\n");
3175 	pr_warn("**                                                      **\n");
3176 	pr_warn("**   NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE   **\n");
3177 	pr_warn("**********************************************************\n");
3178 
3179 	/* Expand the buffers to set size */
3180 	tracing_update_buffers(&global_trace);
3181 
3182 	buffers_allocated = 1;
3183 
3184 	/*
3185 	 * trace_printk_init_buffers() can be called by modules.
3186 	 * If that happens, then we need to start cmdline recording
3187 	 * directly here. If the global_trace.buffer is already
3188 	 * allocated here, then this was called by module code.
3189 	 */
3190 	if (global_trace.array_buffer.buffer)
3191 		tracing_start_cmdline_record();
3192 }
3193 EXPORT_SYMBOL_GPL(trace_printk_init_buffers);
3194 
3195 void trace_printk_start_comm(void)
3196 {
3197 	/* Start tracing comms if trace printk is set */
3198 	if (!buffers_allocated)
3199 		return;
3200 	tracing_start_cmdline_record();
3201 }
3202 
3203 static void trace_printk_start_stop_comm(int enabled)
3204 {
3205 	if (!buffers_allocated)
3206 		return;
3207 
3208 	if (enabled)
3209 		tracing_start_cmdline_record();
3210 	else
3211 		tracing_stop_cmdline_record();
3212 }
3213 
3214 /**
3215  * trace_vbprintk - write binary msg to tracing buffer
3216  * @ip:    The address of the caller
3217  * @fmt:   The string format to write to the buffer
3218  * @args:  Arguments for @fmt
3219  */
3220 int trace_vbprintk(unsigned long ip, const char *fmt, va_list args)
3221 {
3222 	struct trace_event_call *call = &event_bprint;
3223 	struct ring_buffer_event *event;
3224 	struct trace_buffer *buffer;
3225 	struct trace_array *tr = &global_trace;
3226 	struct bprint_entry *entry;
3227 	unsigned int trace_ctx;
3228 	char *tbuffer;
3229 	int len = 0, size;
3230 
3231 	if (unlikely(tracing_selftest_running || tracing_disabled))
3232 		return 0;
3233 
3234 	/* Don't pollute graph traces with trace_vprintk internals */
3235 	pause_graph_tracing();
3236 
3237 	trace_ctx = tracing_gen_ctx();
3238 	preempt_disable_notrace();
3239 
3240 	tbuffer = get_trace_buf();
3241 	if (!tbuffer) {
3242 		len = 0;
3243 		goto out_nobuffer;
3244 	}
3245 
3246 	len = vbin_printf((u32 *)tbuffer, TRACE_BUF_SIZE/sizeof(int), fmt, args);
3247 
3248 	if (len > TRACE_BUF_SIZE/sizeof(int) || len < 0)
3249 		goto out_put;
3250 
3251 	size = sizeof(*entry) + sizeof(u32) * len;
3252 	buffer = tr->array_buffer.buffer;
3253 	ring_buffer_nest_start(buffer);
3254 	event = __trace_buffer_lock_reserve(buffer, TRACE_BPRINT, size,
3255 					    trace_ctx);
3256 	if (!event)
3257 		goto out;
3258 	entry = ring_buffer_event_data(event);
3259 	entry->ip			= ip;
3260 	entry->fmt			= fmt;
3261 
3262 	memcpy(entry->buf, tbuffer, sizeof(u32) * len);
3263 	if (!call_filter_check_discard(call, entry, buffer, event)) {
3264 		__buffer_unlock_commit(buffer, event);
3265 		ftrace_trace_stack(tr, buffer, trace_ctx, 6, NULL);
3266 	}
3267 
3268 out:
3269 	ring_buffer_nest_end(buffer);
3270 out_put:
3271 	put_trace_buf();
3272 
3273 out_nobuffer:
3274 	preempt_enable_notrace();
3275 	unpause_graph_tracing();
3276 
3277 	return len;
3278 }
3279 EXPORT_SYMBOL_GPL(trace_vbprintk);
3280 
3281 __printf(3, 0)
3282 static int
3283 __trace_array_vprintk(struct trace_buffer *buffer,
3284 		      unsigned long ip, const char *fmt, va_list args)
3285 {
3286 	struct trace_event_call *call = &event_print;
3287 	struct ring_buffer_event *event;
3288 	int len = 0, size;
3289 	struct print_entry *entry;
3290 	unsigned int trace_ctx;
3291 	char *tbuffer;
3292 
3293 	if (tracing_disabled)
3294 		return 0;
3295 
3296 	/* Don't pollute graph traces with trace_vprintk internals */
3297 	pause_graph_tracing();
3298 
3299 	trace_ctx = tracing_gen_ctx();
3300 	preempt_disable_notrace();
3301 
3302 
3303 	tbuffer = get_trace_buf();
3304 	if (!tbuffer) {
3305 		len = 0;
3306 		goto out_nobuffer;
3307 	}
3308 
3309 	len = vscnprintf(tbuffer, TRACE_BUF_SIZE, fmt, args);
3310 
3311 	size = sizeof(*entry) + len + 1;
3312 	ring_buffer_nest_start(buffer);
3313 	event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
3314 					    trace_ctx);
3315 	if (!event)
3316 		goto out;
3317 	entry = ring_buffer_event_data(event);
3318 	entry->ip = ip;
3319 
3320 	memcpy(&entry->buf, tbuffer, len + 1);
3321 	if (!call_filter_check_discard(call, entry, buffer, event)) {
3322 		__buffer_unlock_commit(buffer, event);
3323 		ftrace_trace_stack(&global_trace, buffer, trace_ctx, 6, NULL);
3324 	}
3325 
3326 out:
3327 	ring_buffer_nest_end(buffer);
3328 	put_trace_buf();
3329 
3330 out_nobuffer:
3331 	preempt_enable_notrace();
3332 	unpause_graph_tracing();
3333 
3334 	return len;
3335 }
3336 
3337 __printf(3, 0)
3338 int trace_array_vprintk(struct trace_array *tr,
3339 			unsigned long ip, const char *fmt, va_list args)
3340 {
3341 	if (tracing_selftest_running && tr == &global_trace)
3342 		return 0;
3343 
3344 	return __trace_array_vprintk(tr->array_buffer.buffer, ip, fmt, args);
3345 }
3346 
3347 /**
3348  * trace_array_printk - Print a message to a specific instance
3349  * @tr: The instance trace_array descriptor
3350  * @ip: The instruction pointer that this is called from.
3351  * @fmt: The format to print (printf format)
3352  *
3353  * If a subsystem sets up its own instance, they have the right to
3354  * printk strings into their tracing instance buffer using this
3355  * function. Note, this function will not write into the top level
3356  * buffer (use trace_printk() for that), as writing into the top level
3357  * buffer should only have events that can be individually disabled.
3358  * trace_printk() is only used for debugging a kernel, and should not
3359  * be ever incorporated in normal use.
3360  *
3361  * trace_array_printk() can be used, as it will not add noise to the
3362  * top level tracing buffer.
3363  *
3364  * Note, trace_array_init_printk() must be called on @tr before this
3365  * can be used.
3366  */
3367 __printf(3, 0)
3368 int trace_array_printk(struct trace_array *tr,
3369 		       unsigned long ip, const char *fmt, ...)
3370 {
3371 	int ret;
3372 	va_list ap;
3373 
3374 	if (!tr)
3375 		return -ENOENT;
3376 
3377 	/* This is only allowed for created instances */
3378 	if (tr == &global_trace)
3379 		return 0;
3380 
3381 	if (!(tr->trace_flags & TRACE_ITER_PRINTK))
3382 		return 0;
3383 
3384 	va_start(ap, fmt);
3385 	ret = trace_array_vprintk(tr, ip, fmt, ap);
3386 	va_end(ap);
3387 	return ret;
3388 }
3389 EXPORT_SYMBOL_GPL(trace_array_printk);
3390 
3391 /**
3392  * trace_array_init_printk - Initialize buffers for trace_array_printk()
3393  * @tr: The trace array to initialize the buffers for
3394  *
3395  * As trace_array_printk() only writes into instances, they are OK to
3396  * have in the kernel (unlike trace_printk()). This needs to be called
3397  * before trace_array_printk() can be used on a trace_array.
3398  */
3399 int trace_array_init_printk(struct trace_array *tr)
3400 {
3401 	if (!tr)
3402 		return -ENOENT;
3403 
3404 	/* This is only allowed for created instances */
3405 	if (tr == &global_trace)
3406 		return -EINVAL;
3407 
3408 	return alloc_percpu_trace_buffer();
3409 }
3410 EXPORT_SYMBOL_GPL(trace_array_init_printk);
3411 
3412 __printf(3, 4)
3413 int trace_array_printk_buf(struct trace_buffer *buffer,
3414 			   unsigned long ip, const char *fmt, ...)
3415 {
3416 	int ret;
3417 	va_list ap;
3418 
3419 	if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
3420 		return 0;
3421 
3422 	va_start(ap, fmt);
3423 	ret = __trace_array_vprintk(buffer, ip, fmt, ap);
3424 	va_end(ap);
3425 	return ret;
3426 }
3427 
3428 __printf(2, 0)
3429 int trace_vprintk(unsigned long ip, const char *fmt, va_list args)
3430 {
3431 	return trace_array_vprintk(&global_trace, ip, fmt, args);
3432 }
3433 EXPORT_SYMBOL_GPL(trace_vprintk);
3434 
3435 static void trace_iterator_increment(struct trace_iterator *iter)
3436 {
3437 	struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, iter->cpu);
3438 
3439 	iter->idx++;
3440 	if (buf_iter)
3441 		ring_buffer_iter_advance(buf_iter);
3442 }
3443 
3444 static struct trace_entry *
3445 peek_next_entry(struct trace_iterator *iter, int cpu, u64 *ts,
3446 		unsigned long *lost_events)
3447 {
3448 	struct ring_buffer_event *event;
3449 	struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, cpu);
3450 
3451 	if (buf_iter) {
3452 		event = ring_buffer_iter_peek(buf_iter, ts);
3453 		if (lost_events)
3454 			*lost_events = ring_buffer_iter_dropped(buf_iter) ?
3455 				(unsigned long)-1 : 0;
3456 	} else {
3457 		event = ring_buffer_peek(iter->array_buffer->buffer, cpu, ts,
3458 					 lost_events);
3459 	}
3460 
3461 	if (event) {
3462 		iter->ent_size = ring_buffer_event_length(event);
3463 		return ring_buffer_event_data(event);
3464 	}
3465 	iter->ent_size = 0;
3466 	return NULL;
3467 }
3468 
3469 static struct trace_entry *
3470 __find_next_entry(struct trace_iterator *iter, int *ent_cpu,
3471 		  unsigned long *missing_events, u64 *ent_ts)
3472 {
3473 	struct trace_buffer *buffer = iter->array_buffer->buffer;
3474 	struct trace_entry *ent, *next = NULL;
3475 	unsigned long lost_events = 0, next_lost = 0;
3476 	int cpu_file = iter->cpu_file;
3477 	u64 next_ts = 0, ts;
3478 	int next_cpu = -1;
3479 	int next_size = 0;
3480 	int cpu;
3481 
3482 	/*
3483 	 * If we are in a per_cpu trace file, don't bother by iterating over
3484 	 * all cpu and peek directly.
3485 	 */
3486 	if (cpu_file > RING_BUFFER_ALL_CPUS) {
3487 		if (ring_buffer_empty_cpu(buffer, cpu_file))
3488 			return NULL;
3489 		ent = peek_next_entry(iter, cpu_file, ent_ts, missing_events);
3490 		if (ent_cpu)
3491 			*ent_cpu = cpu_file;
3492 
3493 		return ent;
3494 	}
3495 
3496 	for_each_tracing_cpu(cpu) {
3497 
3498 		if (ring_buffer_empty_cpu(buffer, cpu))
3499 			continue;
3500 
3501 		ent = peek_next_entry(iter, cpu, &ts, &lost_events);
3502 
3503 		/*
3504 		 * Pick the entry with the smallest timestamp:
3505 		 */
3506 		if (ent && (!next || ts < next_ts)) {
3507 			next = ent;
3508 			next_cpu = cpu;
3509 			next_ts = ts;
3510 			next_lost = lost_events;
3511 			next_size = iter->ent_size;
3512 		}
3513 	}
3514 
3515 	iter->ent_size = next_size;
3516 
3517 	if (ent_cpu)
3518 		*ent_cpu = next_cpu;
3519 
3520 	if (ent_ts)
3521 		*ent_ts = next_ts;
3522 
3523 	if (missing_events)
3524 		*missing_events = next_lost;
3525 
3526 	return next;
3527 }
3528 
3529 #define STATIC_FMT_BUF_SIZE	128
3530 static char static_fmt_buf[STATIC_FMT_BUF_SIZE];
3531 
3532 char *trace_iter_expand_format(struct trace_iterator *iter)
3533 {
3534 	char *tmp;
3535 
3536 	/*
3537 	 * iter->tr is NULL when used with tp_printk, which makes
3538 	 * this get called where it is not safe to call krealloc().
3539 	 */
3540 	if (!iter->tr || iter->fmt == static_fmt_buf)
3541 		return NULL;
3542 
3543 	tmp = krealloc(iter->fmt, iter->fmt_size + STATIC_FMT_BUF_SIZE,
3544 		       GFP_KERNEL);
3545 	if (tmp) {
3546 		iter->fmt_size += STATIC_FMT_BUF_SIZE;
3547 		iter->fmt = tmp;
3548 	}
3549 
3550 	return tmp;
3551 }
3552 
3553 /* Returns true if the string is safe to dereference from an event */
3554 static bool trace_safe_str(struct trace_iterator *iter, const char *str,
3555 			   bool star, int len)
3556 {
3557 	unsigned long addr = (unsigned long)str;
3558 	struct trace_event *trace_event;
3559 	struct trace_event_call *event;
3560 
3561 	/* Ignore strings with no length */
3562 	if (star && !len)
3563 		return true;
3564 
3565 	/* OK if part of the event data */
3566 	if ((addr >= (unsigned long)iter->ent) &&
3567 	    (addr < (unsigned long)iter->ent + iter->ent_size))
3568 		return true;
3569 
3570 	/* OK if part of the temp seq buffer */
3571 	if ((addr >= (unsigned long)iter->tmp_seq.buffer) &&
3572 	    (addr < (unsigned long)iter->tmp_seq.buffer + TRACE_SEQ_BUFFER_SIZE))
3573 		return true;
3574 
3575 	/* Core rodata can not be freed */
3576 	if (is_kernel_rodata(addr))
3577 		return true;
3578 
3579 	if (trace_is_tracepoint_string(str))
3580 		return true;
3581 
3582 	/*
3583 	 * Now this could be a module event, referencing core module
3584 	 * data, which is OK.
3585 	 */
3586 	if (!iter->ent)
3587 		return false;
3588 
3589 	trace_event = ftrace_find_event(iter->ent->type);
3590 	if (!trace_event)
3591 		return false;
3592 
3593 	event = container_of(trace_event, struct trace_event_call, event);
3594 	if ((event->flags & TRACE_EVENT_FL_DYNAMIC) || !event->module)
3595 		return false;
3596 
3597 	/* Would rather have rodata, but this will suffice */
3598 	if (within_module_core(addr, event->module))
3599 		return true;
3600 
3601 	return false;
3602 }
3603 
3604 static DEFINE_STATIC_KEY_FALSE(trace_no_verify);
3605 
3606 static int test_can_verify_check(const char *fmt, ...)
3607 {
3608 	char buf[16];
3609 	va_list ap;
3610 	int ret;
3611 
3612 	/*
3613 	 * The verifier is dependent on vsnprintf() modifies the va_list
3614 	 * passed to it, where it is sent as a reference. Some architectures
3615 	 * (like x86_32) passes it by value, which means that vsnprintf()
3616 	 * does not modify the va_list passed to it, and the verifier
3617 	 * would then need to be able to understand all the values that
3618 	 * vsnprintf can use. If it is passed by value, then the verifier
3619 	 * is disabled.
3620 	 */
3621 	va_start(ap, fmt);
3622 	vsnprintf(buf, 16, "%d", ap);
3623 	ret = va_arg(ap, int);
3624 	va_end(ap);
3625 
3626 	return ret;
3627 }
3628 
3629 static void test_can_verify(void)
3630 {
3631 	if (!test_can_verify_check("%d %d", 0, 1)) {
3632 		pr_info("trace event string verifier disabled\n");
3633 		static_branch_inc(&trace_no_verify);
3634 	}
3635 }
3636 
3637 /**
3638  * trace_check_vprintf - Check dereferenced strings while writing to the seq buffer
3639  * @iter: The iterator that holds the seq buffer and the event being printed
3640  * @fmt: The format used to print the event
3641  * @ap: The va_list holding the data to print from @fmt.
3642  *
3643  * This writes the data into the @iter->seq buffer using the data from
3644  * @fmt and @ap. If the format has a %s, then the source of the string
3645  * is examined to make sure it is safe to print, otherwise it will
3646  * warn and print "[UNSAFE MEMORY]" in place of the dereferenced string
3647  * pointer.
3648  */
3649 void trace_check_vprintf(struct trace_iterator *iter, const char *fmt,
3650 			 va_list ap)
3651 {
3652 	const char *p = fmt;
3653 	const char *str;
3654 	int i, j;
3655 
3656 	if (WARN_ON_ONCE(!fmt))
3657 		return;
3658 
3659 	if (static_branch_unlikely(&trace_no_verify))
3660 		goto print;
3661 
3662 	/* Don't bother checking when doing a ftrace_dump() */
3663 	if (iter->fmt == static_fmt_buf)
3664 		goto print;
3665 
3666 	while (*p) {
3667 		bool star = false;
3668 		int len = 0;
3669 
3670 		j = 0;
3671 
3672 		/* We only care about %s and variants */
3673 		for (i = 0; p[i]; i++) {
3674 			if (i + 1 >= iter->fmt_size) {
3675 				/*
3676 				 * If we can't expand the copy buffer,
3677 				 * just print it.
3678 				 */
3679 				if (!trace_iter_expand_format(iter))
3680 					goto print;
3681 			}
3682 
3683 			if (p[i] == '\\' && p[i+1]) {
3684 				i++;
3685 				continue;
3686 			}
3687 			if (p[i] == '%') {
3688 				/* Need to test cases like %08.*s */
3689 				for (j = 1; p[i+j]; j++) {
3690 					if (isdigit(p[i+j]) ||
3691 					    p[i+j] == '.')
3692 						continue;
3693 					if (p[i+j] == '*') {
3694 						star = true;
3695 						continue;
3696 					}
3697 					break;
3698 				}
3699 				if (p[i+j] == 's')
3700 					break;
3701 				star = false;
3702 			}
3703 			j = 0;
3704 		}
3705 		/* If no %s found then just print normally */
3706 		if (!p[i])
3707 			break;
3708 
3709 		/* Copy up to the %s, and print that */
3710 		strncpy(iter->fmt, p, i);
3711 		iter->fmt[i] = '\0';
3712 		trace_seq_vprintf(&iter->seq, iter->fmt, ap);
3713 
3714 		/*
3715 		 * If iter->seq is full, the above call no longer guarantees
3716 		 * that ap is in sync with fmt processing, and further calls
3717 		 * to va_arg() can return wrong positional arguments.
3718 		 *
3719 		 * Ensure that ap is no longer used in this case.
3720 		 */
3721 		if (iter->seq.full) {
3722 			p = "";
3723 			break;
3724 		}
3725 
3726 		if (star)
3727 			len = va_arg(ap, int);
3728 
3729 		/* The ap now points to the string data of the %s */
3730 		str = va_arg(ap, const char *);
3731 
3732 		/*
3733 		 * If you hit this warning, it is likely that the
3734 		 * trace event in question used %s on a string that
3735 		 * was saved at the time of the event, but may not be
3736 		 * around when the trace is read. Use __string(),
3737 		 * __assign_str() and __get_str() helpers in the TRACE_EVENT()
3738 		 * instead. See samples/trace_events/trace-events-sample.h
3739 		 * for reference.
3740 		 */
3741 		if (WARN_ONCE(!trace_safe_str(iter, str, star, len),
3742 			      "fmt: '%s' current_buffer: '%s'",
3743 			      fmt, seq_buf_str(&iter->seq.seq))) {
3744 			int ret;
3745 
3746 			/* Try to safely read the string */
3747 			if (star) {
3748 				if (len + 1 > iter->fmt_size)
3749 					len = iter->fmt_size - 1;
3750 				if (len < 0)
3751 					len = 0;
3752 				ret = copy_from_kernel_nofault(iter->fmt, str, len);
3753 				iter->fmt[len] = 0;
3754 				star = false;
3755 			} else {
3756 				ret = strncpy_from_kernel_nofault(iter->fmt, str,
3757 								  iter->fmt_size);
3758 			}
3759 			if (ret < 0)
3760 				trace_seq_printf(&iter->seq, "(0x%px)", str);
3761 			else
3762 				trace_seq_printf(&iter->seq, "(0x%px:%s)",
3763 						 str, iter->fmt);
3764 			str = "[UNSAFE-MEMORY]";
3765 			strcpy(iter->fmt, "%s");
3766 		} else {
3767 			strncpy(iter->fmt, p + i, j + 1);
3768 			iter->fmt[j+1] = '\0';
3769 		}
3770 		if (star)
3771 			trace_seq_printf(&iter->seq, iter->fmt, len, str);
3772 		else
3773 			trace_seq_printf(&iter->seq, iter->fmt, str);
3774 
3775 		p += i + j + 1;
3776 	}
3777  print:
3778 	if (*p)
3779 		trace_seq_vprintf(&iter->seq, p, ap);
3780 }
3781 
3782 const char *trace_event_format(struct trace_iterator *iter, const char *fmt)
3783 {
3784 	const char *p, *new_fmt;
3785 	char *q;
3786 
3787 	if (WARN_ON_ONCE(!fmt))
3788 		return fmt;
3789 
3790 	if (!iter->tr || iter->tr->trace_flags & TRACE_ITER_HASH_PTR)
3791 		return fmt;
3792 
3793 	p = fmt;
3794 	new_fmt = q = iter->fmt;
3795 	while (*p) {
3796 		if (unlikely(q - new_fmt + 3 > iter->fmt_size)) {
3797 			if (!trace_iter_expand_format(iter))
3798 				return fmt;
3799 
3800 			q += iter->fmt - new_fmt;
3801 			new_fmt = iter->fmt;
3802 		}
3803 
3804 		*q++ = *p++;
3805 
3806 		/* Replace %p with %px */
3807 		if (p[-1] == '%') {
3808 			if (p[0] == '%') {
3809 				*q++ = *p++;
3810 			} else if (p[0] == 'p' && !isalnum(p[1])) {
3811 				*q++ = *p++;
3812 				*q++ = 'x';
3813 			}
3814 		}
3815 	}
3816 	*q = '\0';
3817 
3818 	return new_fmt;
3819 }
3820 
3821 #define STATIC_TEMP_BUF_SIZE	128
3822 static char static_temp_buf[STATIC_TEMP_BUF_SIZE] __aligned(4);
3823 
3824 /* Find the next real entry, without updating the iterator itself */
3825 struct trace_entry *trace_find_next_entry(struct trace_iterator *iter,
3826 					  int *ent_cpu, u64 *ent_ts)
3827 {
3828 	/* __find_next_entry will reset ent_size */
3829 	int ent_size = iter->ent_size;
3830 	struct trace_entry *entry;
3831 
3832 	/*
3833 	 * If called from ftrace_dump(), then the iter->temp buffer
3834 	 * will be the static_temp_buf and not created from kmalloc.
3835 	 * If the entry size is greater than the buffer, we can
3836 	 * not save it. Just return NULL in that case. This is only
3837 	 * used to add markers when two consecutive events' time
3838 	 * stamps have a large delta. See trace_print_lat_context()
3839 	 */
3840 	if (iter->temp == static_temp_buf &&
3841 	    STATIC_TEMP_BUF_SIZE < ent_size)
3842 		return NULL;
3843 
3844 	/*
3845 	 * The __find_next_entry() may call peek_next_entry(), which may
3846 	 * call ring_buffer_peek() that may make the contents of iter->ent
3847 	 * undefined. Need to copy iter->ent now.
3848 	 */
3849 	if (iter->ent && iter->ent != iter->temp) {
3850 		if ((!iter->temp || iter->temp_size < iter->ent_size) &&
3851 		    !WARN_ON_ONCE(iter->temp == static_temp_buf)) {
3852 			void *temp;
3853 			temp = kmalloc(iter->ent_size, GFP_KERNEL);
3854 			if (!temp)
3855 				return NULL;
3856 			kfree(iter->temp);
3857 			iter->temp = temp;
3858 			iter->temp_size = iter->ent_size;
3859 		}
3860 		memcpy(iter->temp, iter->ent, iter->ent_size);
3861 		iter->ent = iter->temp;
3862 	}
3863 	entry = __find_next_entry(iter, ent_cpu, NULL, ent_ts);
3864 	/* Put back the original ent_size */
3865 	iter->ent_size = ent_size;
3866 
3867 	return entry;
3868 }
3869 
3870 /* Find the next real entry, and increment the iterator to the next entry */
3871 void *trace_find_next_entry_inc(struct trace_iterator *iter)
3872 {
3873 	iter->ent = __find_next_entry(iter, &iter->cpu,
3874 				      &iter->lost_events, &iter->ts);
3875 
3876 	if (iter->ent)
3877 		trace_iterator_increment(iter);
3878 
3879 	return iter->ent ? iter : NULL;
3880 }
3881 
3882 static void trace_consume(struct trace_iterator *iter)
3883 {
3884 	ring_buffer_consume(iter->array_buffer->buffer, iter->cpu, &iter->ts,
3885 			    &iter->lost_events);
3886 }
3887 
3888 static void *s_next(struct seq_file *m, void *v, loff_t *pos)
3889 {
3890 	struct trace_iterator *iter = m->private;
3891 	int i = (int)*pos;
3892 	void *ent;
3893 
3894 	WARN_ON_ONCE(iter->leftover);
3895 
3896 	(*pos)++;
3897 
3898 	/* can't go backwards */
3899 	if (iter->idx > i)
3900 		return NULL;
3901 
3902 	if (iter->idx < 0)
3903 		ent = trace_find_next_entry_inc(iter);
3904 	else
3905 		ent = iter;
3906 
3907 	while (ent && iter->idx < i)
3908 		ent = trace_find_next_entry_inc(iter);
3909 
3910 	iter->pos = *pos;
3911 
3912 	return ent;
3913 }
3914 
3915 void tracing_iter_reset(struct trace_iterator *iter, int cpu)
3916 {
3917 	struct ring_buffer_iter *buf_iter;
3918 	unsigned long entries = 0;
3919 	u64 ts;
3920 
3921 	per_cpu_ptr(iter->array_buffer->data, cpu)->skipped_entries = 0;
3922 
3923 	buf_iter = trace_buffer_iter(iter, cpu);
3924 	if (!buf_iter)
3925 		return;
3926 
3927 	ring_buffer_iter_reset(buf_iter);
3928 
3929 	/*
3930 	 * We could have the case with the max latency tracers
3931 	 * that a reset never took place on a cpu. This is evident
3932 	 * by the timestamp being before the start of the buffer.
3933 	 */
3934 	while (ring_buffer_iter_peek(buf_iter, &ts)) {
3935 		if (ts >= iter->array_buffer->time_start)
3936 			break;
3937 		entries++;
3938 		ring_buffer_iter_advance(buf_iter);
3939 	}
3940 
3941 	per_cpu_ptr(iter->array_buffer->data, cpu)->skipped_entries = entries;
3942 }
3943 
3944 /*
3945  * The current tracer is copied to avoid a global locking
3946  * all around.
3947  */
3948 static void *s_start(struct seq_file *m, loff_t *pos)
3949 {
3950 	struct trace_iterator *iter = m->private;
3951 	struct trace_array *tr = iter->tr;
3952 	int cpu_file = iter->cpu_file;
3953 	void *p = NULL;
3954 	loff_t l = 0;
3955 	int cpu;
3956 
3957 	mutex_lock(&trace_types_lock);
3958 	if (unlikely(tr->current_trace != iter->trace)) {
3959 		/* Close iter->trace before switching to the new current tracer */
3960 		if (iter->trace->close)
3961 			iter->trace->close(iter);
3962 		iter->trace = tr->current_trace;
3963 		/* Reopen the new current tracer */
3964 		if (iter->trace->open)
3965 			iter->trace->open(iter);
3966 	}
3967 	mutex_unlock(&trace_types_lock);
3968 
3969 #ifdef CONFIG_TRACER_MAX_TRACE
3970 	if (iter->snapshot && iter->trace->use_max_tr)
3971 		return ERR_PTR(-EBUSY);
3972 #endif
3973 
3974 	if (*pos != iter->pos) {
3975 		iter->ent = NULL;
3976 		iter->cpu = 0;
3977 		iter->idx = -1;
3978 
3979 		if (cpu_file == RING_BUFFER_ALL_CPUS) {
3980 			for_each_tracing_cpu(cpu)
3981 				tracing_iter_reset(iter, cpu);
3982 		} else
3983 			tracing_iter_reset(iter, cpu_file);
3984 
3985 		iter->leftover = 0;
3986 		for (p = iter; p && l < *pos; p = s_next(m, p, &l))
3987 			;
3988 
3989 	} else {
3990 		/*
3991 		 * If we overflowed the seq_file before, then we want
3992 		 * to just reuse the trace_seq buffer again.
3993 		 */
3994 		if (iter->leftover)
3995 			p = iter;
3996 		else {
3997 			l = *pos - 1;
3998 			p = s_next(m, p, &l);
3999 		}
4000 	}
4001 
4002 	trace_event_read_lock();
4003 	trace_access_lock(cpu_file);
4004 	return p;
4005 }
4006 
4007 static void s_stop(struct seq_file *m, void *p)
4008 {
4009 	struct trace_iterator *iter = m->private;
4010 
4011 #ifdef CONFIG_TRACER_MAX_TRACE
4012 	if (iter->snapshot && iter->trace->use_max_tr)
4013 		return;
4014 #endif
4015 
4016 	trace_access_unlock(iter->cpu_file);
4017 	trace_event_read_unlock();
4018 }
4019 
4020 static void
4021 get_total_entries_cpu(struct array_buffer *buf, unsigned long *total,
4022 		      unsigned long *entries, int cpu)
4023 {
4024 	unsigned long count;
4025 
4026 	count = ring_buffer_entries_cpu(buf->buffer, cpu);
4027 	/*
4028 	 * If this buffer has skipped entries, then we hold all
4029 	 * entries for the trace and we need to ignore the
4030 	 * ones before the time stamp.
4031 	 */
4032 	if (per_cpu_ptr(buf->data, cpu)->skipped_entries) {
4033 		count -= per_cpu_ptr(buf->data, cpu)->skipped_entries;
4034 		/* total is the same as the entries */
4035 		*total = count;
4036 	} else
4037 		*total = count +
4038 			ring_buffer_overrun_cpu(buf->buffer, cpu);
4039 	*entries = count;
4040 }
4041 
4042 static void
4043 get_total_entries(struct array_buffer *buf,
4044 		  unsigned long *total, unsigned long *entries)
4045 {
4046 	unsigned long t, e;
4047 	int cpu;
4048 
4049 	*total = 0;
4050 	*entries = 0;
4051 
4052 	for_each_tracing_cpu(cpu) {
4053 		get_total_entries_cpu(buf, &t, &e, cpu);
4054 		*total += t;
4055 		*entries += e;
4056 	}
4057 }
4058 
4059 unsigned long trace_total_entries_cpu(struct trace_array *tr, int cpu)
4060 {
4061 	unsigned long total, entries;
4062 
4063 	if (!tr)
4064 		tr = &global_trace;
4065 
4066 	get_total_entries_cpu(&tr->array_buffer, &total, &entries, cpu);
4067 
4068 	return entries;
4069 }
4070 
4071 unsigned long trace_total_entries(struct trace_array *tr)
4072 {
4073 	unsigned long total, entries;
4074 
4075 	if (!tr)
4076 		tr = &global_trace;
4077 
4078 	get_total_entries(&tr->array_buffer, &total, &entries);
4079 
4080 	return entries;
4081 }
4082 
4083 static void print_lat_help_header(struct seq_file *m)
4084 {
4085 	seq_puts(m, "#                    _------=> CPU#            \n"
4086 		    "#                   / _-----=> irqs-off/BH-disabled\n"
4087 		    "#                  | / _----=> need-resched    \n"
4088 		    "#                  || / _---=> hardirq/softirq \n"
4089 		    "#                  ||| / _--=> preempt-depth   \n"
4090 		    "#                  |||| / _-=> migrate-disable \n"
4091 		    "#                  ||||| /     delay           \n"
4092 		    "#  cmd     pid     |||||| time  |   caller     \n"
4093 		    "#     \\   /        ||||||  \\    |    /       \n");
4094 }
4095 
4096 static void print_event_info(struct array_buffer *buf, struct seq_file *m)
4097 {
4098 	unsigned long total;
4099 	unsigned long entries;
4100 
4101 	get_total_entries(buf, &total, &entries);
4102 	seq_printf(m, "# entries-in-buffer/entries-written: %lu/%lu   #P:%d\n",
4103 		   entries, total, num_online_cpus());
4104 	seq_puts(m, "#\n");
4105 }
4106 
4107 static void print_func_help_header(struct array_buffer *buf, struct seq_file *m,
4108 				   unsigned int flags)
4109 {
4110 	bool tgid = flags & TRACE_ITER_RECORD_TGID;
4111 
4112 	print_event_info(buf, m);
4113 
4114 	seq_printf(m, "#           TASK-PID    %s CPU#     TIMESTAMP  FUNCTION\n", tgid ? "   TGID   " : "");
4115 	seq_printf(m, "#              | |      %s   |         |         |\n",      tgid ? "     |    " : "");
4116 }
4117 
4118 static void print_func_help_header_irq(struct array_buffer *buf, struct seq_file *m,
4119 				       unsigned int flags)
4120 {
4121 	bool tgid = flags & TRACE_ITER_RECORD_TGID;
4122 	static const char space[] = "            ";
4123 	int prec = tgid ? 12 : 2;
4124 
4125 	print_event_info(buf, m);
4126 
4127 	seq_printf(m, "#                            %.*s  _-----=> irqs-off/BH-disabled\n", prec, space);
4128 	seq_printf(m, "#                            %.*s / _----=> need-resched\n", prec, space);
4129 	seq_printf(m, "#                            %.*s| / _---=> hardirq/softirq\n", prec, space);
4130 	seq_printf(m, "#                            %.*s|| / _--=> preempt-depth\n", prec, space);
4131 	seq_printf(m, "#                            %.*s||| / _-=> migrate-disable\n", prec, space);
4132 	seq_printf(m, "#                            %.*s|||| /     delay\n", prec, space);
4133 	seq_printf(m, "#           TASK-PID  %.*s CPU#  |||||  TIMESTAMP  FUNCTION\n", prec, "     TGID   ");
4134 	seq_printf(m, "#              | |    %.*s   |   |||||     |         |\n", prec, "       |    ");
4135 }
4136 
4137 void
4138 print_trace_header(struct seq_file *m, struct trace_iterator *iter)
4139 {
4140 	unsigned long sym_flags = (global_trace.trace_flags & TRACE_ITER_SYM_MASK);
4141 	struct array_buffer *buf = iter->array_buffer;
4142 	struct trace_array_cpu *data = per_cpu_ptr(buf->data, buf->cpu);
4143 	struct tracer *type = iter->trace;
4144 	unsigned long entries;
4145 	unsigned long total;
4146 	const char *name = type->name;
4147 
4148 	get_total_entries(buf, &total, &entries);
4149 
4150 	seq_printf(m, "# %s latency trace v1.1.5 on %s\n",
4151 		   name, init_utsname()->release);
4152 	seq_puts(m, "# -----------------------------------"
4153 		 "---------------------------------\n");
4154 	seq_printf(m, "# latency: %lu us, #%lu/%lu, CPU#%d |"
4155 		   " (M:%s VP:%d, KP:%d, SP:%d HP:%d",
4156 		   nsecs_to_usecs(data->saved_latency),
4157 		   entries,
4158 		   total,
4159 		   buf->cpu,
4160 		   preempt_model_none()      ? "server" :
4161 		   preempt_model_voluntary() ? "desktop" :
4162 		   preempt_model_full()      ? "preempt" :
4163 		   preempt_model_rt()        ? "preempt_rt" :
4164 		   "unknown",
4165 		   /* These are reserved for later use */
4166 		   0, 0, 0, 0);
4167 #ifdef CONFIG_SMP
4168 	seq_printf(m, " #P:%d)\n", num_online_cpus());
4169 #else
4170 	seq_puts(m, ")\n");
4171 #endif
4172 	seq_puts(m, "#    -----------------\n");
4173 	seq_printf(m, "#    | task: %.16s-%d "
4174 		   "(uid:%d nice:%ld policy:%ld rt_prio:%ld)\n",
4175 		   data->comm, data->pid,
4176 		   from_kuid_munged(seq_user_ns(m), data->uid), data->nice,
4177 		   data->policy, data->rt_priority);
4178 	seq_puts(m, "#    -----------------\n");
4179 
4180 	if (data->critical_start) {
4181 		seq_puts(m, "#  => started at: ");
4182 		seq_print_ip_sym(&iter->seq, data->critical_start, sym_flags);
4183 		trace_print_seq(m, &iter->seq);
4184 		seq_puts(m, "\n#  => ended at:   ");
4185 		seq_print_ip_sym(&iter->seq, data->critical_end, sym_flags);
4186 		trace_print_seq(m, &iter->seq);
4187 		seq_puts(m, "\n#\n");
4188 	}
4189 
4190 	seq_puts(m, "#\n");
4191 }
4192 
4193 static void test_cpu_buff_start(struct trace_iterator *iter)
4194 {
4195 	struct trace_seq *s = &iter->seq;
4196 	struct trace_array *tr = iter->tr;
4197 
4198 	if (!(tr->trace_flags & TRACE_ITER_ANNOTATE))
4199 		return;
4200 
4201 	if (!(iter->iter_flags & TRACE_FILE_ANNOTATE))
4202 		return;
4203 
4204 	if (cpumask_available(iter->started) &&
4205 	    cpumask_test_cpu(iter->cpu, iter->started))
4206 		return;
4207 
4208 	if (per_cpu_ptr(iter->array_buffer->data, iter->cpu)->skipped_entries)
4209 		return;
4210 
4211 	if (cpumask_available(iter->started))
4212 		cpumask_set_cpu(iter->cpu, iter->started);
4213 
4214 	/* Don't print started cpu buffer for the first entry of the trace */
4215 	if (iter->idx > 1)
4216 		trace_seq_printf(s, "##### CPU %u buffer started ####\n",
4217 				iter->cpu);
4218 }
4219 
4220 static enum print_line_t print_trace_fmt(struct trace_iterator *iter)
4221 {
4222 	struct trace_array *tr = iter->tr;
4223 	struct trace_seq *s = &iter->seq;
4224 	unsigned long sym_flags = (tr->trace_flags & TRACE_ITER_SYM_MASK);
4225 	struct trace_entry *entry;
4226 	struct trace_event *event;
4227 
4228 	entry = iter->ent;
4229 
4230 	test_cpu_buff_start(iter);
4231 
4232 	event = ftrace_find_event(entry->type);
4233 
4234 	if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
4235 		if (iter->iter_flags & TRACE_FILE_LAT_FMT)
4236 			trace_print_lat_context(iter);
4237 		else
4238 			trace_print_context(iter);
4239 	}
4240 
4241 	if (trace_seq_has_overflowed(s))
4242 		return TRACE_TYPE_PARTIAL_LINE;
4243 
4244 	if (event) {
4245 		if (tr->trace_flags & TRACE_ITER_FIELDS)
4246 			return print_event_fields(iter, event);
4247 		return event->funcs->trace(iter, sym_flags, event);
4248 	}
4249 
4250 	trace_seq_printf(s, "Unknown type %d\n", entry->type);
4251 
4252 	return trace_handle_return(s);
4253 }
4254 
4255 static enum print_line_t print_raw_fmt(struct trace_iterator *iter)
4256 {
4257 	struct trace_array *tr = iter->tr;
4258 	struct trace_seq *s = &iter->seq;
4259 	struct trace_entry *entry;
4260 	struct trace_event *event;
4261 
4262 	entry = iter->ent;
4263 
4264 	if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO)
4265 		trace_seq_printf(s, "%d %d %llu ",
4266 				 entry->pid, iter->cpu, iter->ts);
4267 
4268 	if (trace_seq_has_overflowed(s))
4269 		return TRACE_TYPE_PARTIAL_LINE;
4270 
4271 	event = ftrace_find_event(entry->type);
4272 	if (event)
4273 		return event->funcs->raw(iter, 0, event);
4274 
4275 	trace_seq_printf(s, "%d ?\n", entry->type);
4276 
4277 	return trace_handle_return(s);
4278 }
4279 
4280 static enum print_line_t print_hex_fmt(struct trace_iterator *iter)
4281 {
4282 	struct trace_array *tr = iter->tr;
4283 	struct trace_seq *s = &iter->seq;
4284 	unsigned char newline = '\n';
4285 	struct trace_entry *entry;
4286 	struct trace_event *event;
4287 
4288 	entry = iter->ent;
4289 
4290 	if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
4291 		SEQ_PUT_HEX_FIELD(s, entry->pid);
4292 		SEQ_PUT_HEX_FIELD(s, iter->cpu);
4293 		SEQ_PUT_HEX_FIELD(s, iter->ts);
4294 		if (trace_seq_has_overflowed(s))
4295 			return TRACE_TYPE_PARTIAL_LINE;
4296 	}
4297 
4298 	event = ftrace_find_event(entry->type);
4299 	if (event) {
4300 		enum print_line_t ret = event->funcs->hex(iter, 0, event);
4301 		if (ret != TRACE_TYPE_HANDLED)
4302 			return ret;
4303 	}
4304 
4305 	SEQ_PUT_FIELD(s, newline);
4306 
4307 	return trace_handle_return(s);
4308 }
4309 
4310 static enum print_line_t print_bin_fmt(struct trace_iterator *iter)
4311 {
4312 	struct trace_array *tr = iter->tr;
4313 	struct trace_seq *s = &iter->seq;
4314 	struct trace_entry *entry;
4315 	struct trace_event *event;
4316 
4317 	entry = iter->ent;
4318 
4319 	if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
4320 		SEQ_PUT_FIELD(s, entry->pid);
4321 		SEQ_PUT_FIELD(s, iter->cpu);
4322 		SEQ_PUT_FIELD(s, iter->ts);
4323 		if (trace_seq_has_overflowed(s))
4324 			return TRACE_TYPE_PARTIAL_LINE;
4325 	}
4326 
4327 	event = ftrace_find_event(entry->type);
4328 	return event ? event->funcs->binary(iter, 0, event) :
4329 		TRACE_TYPE_HANDLED;
4330 }
4331 
4332 int trace_empty(struct trace_iterator *iter)
4333 {
4334 	struct ring_buffer_iter *buf_iter;
4335 	int cpu;
4336 
4337 	/* If we are looking at one CPU buffer, only check that one */
4338 	if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
4339 		cpu = iter->cpu_file;
4340 		buf_iter = trace_buffer_iter(iter, cpu);
4341 		if (buf_iter) {
4342 			if (!ring_buffer_iter_empty(buf_iter))
4343 				return 0;
4344 		} else {
4345 			if (!ring_buffer_empty_cpu(iter->array_buffer->buffer, cpu))
4346 				return 0;
4347 		}
4348 		return 1;
4349 	}
4350 
4351 	for_each_tracing_cpu(cpu) {
4352 		buf_iter = trace_buffer_iter(iter, cpu);
4353 		if (buf_iter) {
4354 			if (!ring_buffer_iter_empty(buf_iter))
4355 				return 0;
4356 		} else {
4357 			if (!ring_buffer_empty_cpu(iter->array_buffer->buffer, cpu))
4358 				return 0;
4359 		}
4360 	}
4361 
4362 	return 1;
4363 }
4364 
4365 /*  Called with trace_event_read_lock() held. */
4366 enum print_line_t print_trace_line(struct trace_iterator *iter)
4367 {
4368 	struct trace_array *tr = iter->tr;
4369 	unsigned long trace_flags = tr->trace_flags;
4370 	enum print_line_t ret;
4371 
4372 	if (iter->lost_events) {
4373 		if (iter->lost_events == (unsigned long)-1)
4374 			trace_seq_printf(&iter->seq, "CPU:%d [LOST EVENTS]\n",
4375 					 iter->cpu);
4376 		else
4377 			trace_seq_printf(&iter->seq, "CPU:%d [LOST %lu EVENTS]\n",
4378 					 iter->cpu, iter->lost_events);
4379 		if (trace_seq_has_overflowed(&iter->seq))
4380 			return TRACE_TYPE_PARTIAL_LINE;
4381 	}
4382 
4383 	if (iter->trace && iter->trace->print_line) {
4384 		ret = iter->trace->print_line(iter);
4385 		if (ret != TRACE_TYPE_UNHANDLED)
4386 			return ret;
4387 	}
4388 
4389 	if (iter->ent->type == TRACE_BPUTS &&
4390 			trace_flags & TRACE_ITER_PRINTK &&
4391 			trace_flags & TRACE_ITER_PRINTK_MSGONLY)
4392 		return trace_print_bputs_msg_only(iter);
4393 
4394 	if (iter->ent->type == TRACE_BPRINT &&
4395 			trace_flags & TRACE_ITER_PRINTK &&
4396 			trace_flags & TRACE_ITER_PRINTK_MSGONLY)
4397 		return trace_print_bprintk_msg_only(iter);
4398 
4399 	if (iter->ent->type == TRACE_PRINT &&
4400 			trace_flags & TRACE_ITER_PRINTK &&
4401 			trace_flags & TRACE_ITER_PRINTK_MSGONLY)
4402 		return trace_print_printk_msg_only(iter);
4403 
4404 	if (trace_flags & TRACE_ITER_BIN)
4405 		return print_bin_fmt(iter);
4406 
4407 	if (trace_flags & TRACE_ITER_HEX)
4408 		return print_hex_fmt(iter);
4409 
4410 	if (trace_flags & TRACE_ITER_RAW)
4411 		return print_raw_fmt(iter);
4412 
4413 	return print_trace_fmt(iter);
4414 }
4415 
4416 void trace_latency_header(struct seq_file *m)
4417 {
4418 	struct trace_iterator *iter = m->private;
4419 	struct trace_array *tr = iter->tr;
4420 
4421 	/* print nothing if the buffers are empty */
4422 	if (trace_empty(iter))
4423 		return;
4424 
4425 	if (iter->iter_flags & TRACE_FILE_LAT_FMT)
4426 		print_trace_header(m, iter);
4427 
4428 	if (!(tr->trace_flags & TRACE_ITER_VERBOSE))
4429 		print_lat_help_header(m);
4430 }
4431 
4432 void trace_default_header(struct seq_file *m)
4433 {
4434 	struct trace_iterator *iter = m->private;
4435 	struct trace_array *tr = iter->tr;
4436 	unsigned long trace_flags = tr->trace_flags;
4437 
4438 	if (!(trace_flags & TRACE_ITER_CONTEXT_INFO))
4439 		return;
4440 
4441 	if (iter->iter_flags & TRACE_FILE_LAT_FMT) {
4442 		/* print nothing if the buffers are empty */
4443 		if (trace_empty(iter))
4444 			return;
4445 		print_trace_header(m, iter);
4446 		if (!(trace_flags & TRACE_ITER_VERBOSE))
4447 			print_lat_help_header(m);
4448 	} else {
4449 		if (!(trace_flags & TRACE_ITER_VERBOSE)) {
4450 			if (trace_flags & TRACE_ITER_IRQ_INFO)
4451 				print_func_help_header_irq(iter->array_buffer,
4452 							   m, trace_flags);
4453 			else
4454 				print_func_help_header(iter->array_buffer, m,
4455 						       trace_flags);
4456 		}
4457 	}
4458 }
4459 
4460 static void test_ftrace_alive(struct seq_file *m)
4461 {
4462 	if (!ftrace_is_dead())
4463 		return;
4464 	seq_puts(m, "# WARNING: FUNCTION TRACING IS CORRUPTED\n"
4465 		    "#          MAY BE MISSING FUNCTION EVENTS\n");
4466 }
4467 
4468 #ifdef CONFIG_TRACER_MAX_TRACE
4469 static void show_snapshot_main_help(struct seq_file *m)
4470 {
4471 	seq_puts(m, "# echo 0 > snapshot : Clears and frees snapshot buffer\n"
4472 		    "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
4473 		    "#                      Takes a snapshot of the main buffer.\n"
4474 		    "# echo 2 > snapshot : Clears snapshot buffer (but does not allocate or free)\n"
4475 		    "#                      (Doesn't have to be '2' works with any number that\n"
4476 		    "#                       is not a '0' or '1')\n");
4477 }
4478 
4479 static void show_snapshot_percpu_help(struct seq_file *m)
4480 {
4481 	seq_puts(m, "# echo 0 > snapshot : Invalid for per_cpu snapshot file.\n");
4482 #ifdef CONFIG_RING_BUFFER_ALLOW_SWAP
4483 	seq_puts(m, "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
4484 		    "#                      Takes a snapshot of the main buffer for this cpu.\n");
4485 #else
4486 	seq_puts(m, "# echo 1 > snapshot : Not supported with this kernel.\n"
4487 		    "#                     Must use main snapshot file to allocate.\n");
4488 #endif
4489 	seq_puts(m, "# echo 2 > snapshot : Clears this cpu's snapshot buffer (but does not allocate)\n"
4490 		    "#                      (Doesn't have to be '2' works with any number that\n"
4491 		    "#                       is not a '0' or '1')\n");
4492 }
4493 
4494 static void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter)
4495 {
4496 	if (iter->tr->allocated_snapshot)
4497 		seq_puts(m, "#\n# * Snapshot is allocated *\n#\n");
4498 	else
4499 		seq_puts(m, "#\n# * Snapshot is freed *\n#\n");
4500 
4501 	seq_puts(m, "# Snapshot commands:\n");
4502 	if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
4503 		show_snapshot_main_help(m);
4504 	else
4505 		show_snapshot_percpu_help(m);
4506 }
4507 #else
4508 /* Should never be called */
4509 static inline void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter) { }
4510 #endif
4511 
4512 static int s_show(struct seq_file *m, void *v)
4513 {
4514 	struct trace_iterator *iter = v;
4515 	int ret;
4516 
4517 	if (iter->ent == NULL) {
4518 		if (iter->tr) {
4519 			seq_printf(m, "# tracer: %s\n", iter->trace->name);
4520 			seq_puts(m, "#\n");
4521 			test_ftrace_alive(m);
4522 		}
4523 		if (iter->snapshot && trace_empty(iter))
4524 			print_snapshot_help(m, iter);
4525 		else if (iter->trace && iter->trace->print_header)
4526 			iter->trace->print_header(m);
4527 		else
4528 			trace_default_header(m);
4529 
4530 	} else if (iter->leftover) {
4531 		/*
4532 		 * If we filled the seq_file buffer earlier, we
4533 		 * want to just show it now.
4534 		 */
4535 		ret = trace_print_seq(m, &iter->seq);
4536 
4537 		/* ret should this time be zero, but you never know */
4538 		iter->leftover = ret;
4539 
4540 	} else {
4541 		ret = print_trace_line(iter);
4542 		if (ret == TRACE_TYPE_PARTIAL_LINE) {
4543 			iter->seq.full = 0;
4544 			trace_seq_puts(&iter->seq, "[LINE TOO BIG]\n");
4545 		}
4546 		ret = trace_print_seq(m, &iter->seq);
4547 		/*
4548 		 * If we overflow the seq_file buffer, then it will
4549 		 * ask us for this data again at start up.
4550 		 * Use that instead.
4551 		 *  ret is 0 if seq_file write succeeded.
4552 		 *        -1 otherwise.
4553 		 */
4554 		iter->leftover = ret;
4555 	}
4556 
4557 	return 0;
4558 }
4559 
4560 /*
4561  * Should be used after trace_array_get(), trace_types_lock
4562  * ensures that i_cdev was already initialized.
4563  */
4564 static inline int tracing_get_cpu(struct inode *inode)
4565 {
4566 	if (inode->i_cdev) /* See trace_create_cpu_file() */
4567 		return (long)inode->i_cdev - 1;
4568 	return RING_BUFFER_ALL_CPUS;
4569 }
4570 
4571 static const struct seq_operations tracer_seq_ops = {
4572 	.start		= s_start,
4573 	.next		= s_next,
4574 	.stop		= s_stop,
4575 	.show		= s_show,
4576 };
4577 
4578 /*
4579  * Note, as iter itself can be allocated and freed in different
4580  * ways, this function is only used to free its content, and not
4581  * the iterator itself. The only requirement to all the allocations
4582  * is that it must zero all fields (kzalloc), as freeing works with
4583  * ethier allocated content or NULL.
4584  */
4585 static void free_trace_iter_content(struct trace_iterator *iter)
4586 {
4587 	/* The fmt is either NULL, allocated or points to static_fmt_buf */
4588 	if (iter->fmt != static_fmt_buf)
4589 		kfree(iter->fmt);
4590 
4591 	kfree(iter->temp);
4592 	kfree(iter->buffer_iter);
4593 	mutex_destroy(&iter->mutex);
4594 	free_cpumask_var(iter->started);
4595 }
4596 
4597 static struct trace_iterator *
4598 __tracing_open(struct inode *inode, struct file *file, bool snapshot)
4599 {
4600 	struct trace_array *tr = inode->i_private;
4601 	struct trace_iterator *iter;
4602 	int cpu;
4603 
4604 	if (tracing_disabled)
4605 		return ERR_PTR(-ENODEV);
4606 
4607 	iter = __seq_open_private(file, &tracer_seq_ops, sizeof(*iter));
4608 	if (!iter)
4609 		return ERR_PTR(-ENOMEM);
4610 
4611 	iter->buffer_iter = kcalloc(nr_cpu_ids, sizeof(*iter->buffer_iter),
4612 				    GFP_KERNEL);
4613 	if (!iter->buffer_iter)
4614 		goto release;
4615 
4616 	/*
4617 	 * trace_find_next_entry() may need to save off iter->ent.
4618 	 * It will place it into the iter->temp buffer. As most
4619 	 * events are less than 128, allocate a buffer of that size.
4620 	 * If one is greater, then trace_find_next_entry() will
4621 	 * allocate a new buffer to adjust for the bigger iter->ent.
4622 	 * It's not critical if it fails to get allocated here.
4623 	 */
4624 	iter->temp = kmalloc(128, GFP_KERNEL);
4625 	if (iter->temp)
4626 		iter->temp_size = 128;
4627 
4628 	/*
4629 	 * trace_event_printf() may need to modify given format
4630 	 * string to replace %p with %px so that it shows real address
4631 	 * instead of hash value. However, that is only for the event
4632 	 * tracing, other tracer may not need. Defer the allocation
4633 	 * until it is needed.
4634 	 */
4635 	iter->fmt = NULL;
4636 	iter->fmt_size = 0;
4637 
4638 	mutex_lock(&trace_types_lock);
4639 	iter->trace = tr->current_trace;
4640 
4641 	if (!zalloc_cpumask_var(&iter->started, GFP_KERNEL))
4642 		goto fail;
4643 
4644 	iter->tr = tr;
4645 
4646 #ifdef CONFIG_TRACER_MAX_TRACE
4647 	/* Currently only the top directory has a snapshot */
4648 	if (tr->current_trace->print_max || snapshot)
4649 		iter->array_buffer = &tr->max_buffer;
4650 	else
4651 #endif
4652 		iter->array_buffer = &tr->array_buffer;
4653 	iter->snapshot = snapshot;
4654 	iter->pos = -1;
4655 	iter->cpu_file = tracing_get_cpu(inode);
4656 	mutex_init(&iter->mutex);
4657 
4658 	/* Notify the tracer early; before we stop tracing. */
4659 	if (iter->trace->open)
4660 		iter->trace->open(iter);
4661 
4662 	/* Annotate start of buffers if we had overruns */
4663 	if (ring_buffer_overruns(iter->array_buffer->buffer))
4664 		iter->iter_flags |= TRACE_FILE_ANNOTATE;
4665 
4666 	/* Output in nanoseconds only if we are using a clock in nanoseconds. */
4667 	if (trace_clocks[tr->clock_id].in_ns)
4668 		iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
4669 
4670 	/*
4671 	 * If pause-on-trace is enabled, then stop the trace while
4672 	 * dumping, unless this is the "snapshot" file
4673 	 */
4674 	if (!iter->snapshot && (tr->trace_flags & TRACE_ITER_PAUSE_ON_TRACE))
4675 		tracing_stop_tr(tr);
4676 
4677 	if (iter->cpu_file == RING_BUFFER_ALL_CPUS) {
4678 		for_each_tracing_cpu(cpu) {
4679 			iter->buffer_iter[cpu] =
4680 				ring_buffer_read_prepare(iter->array_buffer->buffer,
4681 							 cpu, GFP_KERNEL);
4682 		}
4683 		ring_buffer_read_prepare_sync();
4684 		for_each_tracing_cpu(cpu) {
4685 			ring_buffer_read_start(iter->buffer_iter[cpu]);
4686 			tracing_iter_reset(iter, cpu);
4687 		}
4688 	} else {
4689 		cpu = iter->cpu_file;
4690 		iter->buffer_iter[cpu] =
4691 			ring_buffer_read_prepare(iter->array_buffer->buffer,
4692 						 cpu, GFP_KERNEL);
4693 		ring_buffer_read_prepare_sync();
4694 		ring_buffer_read_start(iter->buffer_iter[cpu]);
4695 		tracing_iter_reset(iter, cpu);
4696 	}
4697 
4698 	mutex_unlock(&trace_types_lock);
4699 
4700 	return iter;
4701 
4702  fail:
4703 	mutex_unlock(&trace_types_lock);
4704 	free_trace_iter_content(iter);
4705 release:
4706 	seq_release_private(inode, file);
4707 	return ERR_PTR(-ENOMEM);
4708 }
4709 
4710 int tracing_open_generic(struct inode *inode, struct file *filp)
4711 {
4712 	int ret;
4713 
4714 	ret = tracing_check_open_get_tr(NULL);
4715 	if (ret)
4716 		return ret;
4717 
4718 	filp->private_data = inode->i_private;
4719 	return 0;
4720 }
4721 
4722 bool tracing_is_disabled(void)
4723 {
4724 	return (tracing_disabled) ? true: false;
4725 }
4726 
4727 /*
4728  * Open and update trace_array ref count.
4729  * Must have the current trace_array passed to it.
4730  */
4731 int tracing_open_generic_tr(struct inode *inode, struct file *filp)
4732 {
4733 	struct trace_array *tr = inode->i_private;
4734 	int ret;
4735 
4736 	ret = tracing_check_open_get_tr(tr);
4737 	if (ret)
4738 		return ret;
4739 
4740 	filp->private_data = inode->i_private;
4741 
4742 	return 0;
4743 }
4744 
4745 /*
4746  * The private pointer of the inode is the trace_event_file.
4747  * Update the tr ref count associated to it.
4748  */
4749 int tracing_open_file_tr(struct inode *inode, struct file *filp)
4750 {
4751 	struct trace_event_file *file = inode->i_private;
4752 	int ret;
4753 
4754 	ret = tracing_check_open_get_tr(file->tr);
4755 	if (ret)
4756 		return ret;
4757 
4758 	mutex_lock(&event_mutex);
4759 
4760 	/* Fail if the file is marked for removal */
4761 	if (file->flags & EVENT_FILE_FL_FREED) {
4762 		trace_array_put(file->tr);
4763 		ret = -ENODEV;
4764 	} else {
4765 		event_file_get(file);
4766 	}
4767 
4768 	mutex_unlock(&event_mutex);
4769 	if (ret)
4770 		return ret;
4771 
4772 	filp->private_data = inode->i_private;
4773 
4774 	return 0;
4775 }
4776 
4777 int tracing_release_file_tr(struct inode *inode, struct file *filp)
4778 {
4779 	struct trace_event_file *file = inode->i_private;
4780 
4781 	trace_array_put(file->tr);
4782 	event_file_put(file);
4783 
4784 	return 0;
4785 }
4786 
4787 int tracing_single_release_file_tr(struct inode *inode, struct file *filp)
4788 {
4789 	tracing_release_file_tr(inode, filp);
4790 	return single_release(inode, filp);
4791 }
4792 
4793 static int tracing_mark_open(struct inode *inode, struct file *filp)
4794 {
4795 	stream_open(inode, filp);
4796 	return tracing_open_generic_tr(inode, filp);
4797 }
4798 
4799 static int tracing_release(struct inode *inode, struct file *file)
4800 {
4801 	struct trace_array *tr = inode->i_private;
4802 	struct seq_file *m = file->private_data;
4803 	struct trace_iterator *iter;
4804 	int cpu;
4805 
4806 	if (!(file->f_mode & FMODE_READ)) {
4807 		trace_array_put(tr);
4808 		return 0;
4809 	}
4810 
4811 	/* Writes do not use seq_file */
4812 	iter = m->private;
4813 	mutex_lock(&trace_types_lock);
4814 
4815 	for_each_tracing_cpu(cpu) {
4816 		if (iter->buffer_iter[cpu])
4817 			ring_buffer_read_finish(iter->buffer_iter[cpu]);
4818 	}
4819 
4820 	if (iter->trace && iter->trace->close)
4821 		iter->trace->close(iter);
4822 
4823 	if (!iter->snapshot && tr->stop_count)
4824 		/* reenable tracing if it was previously enabled */
4825 		tracing_start_tr(tr);
4826 
4827 	__trace_array_put(tr);
4828 
4829 	mutex_unlock(&trace_types_lock);
4830 
4831 	free_trace_iter_content(iter);
4832 	seq_release_private(inode, file);
4833 
4834 	return 0;
4835 }
4836 
4837 int tracing_release_generic_tr(struct inode *inode, struct file *file)
4838 {
4839 	struct trace_array *tr = inode->i_private;
4840 
4841 	trace_array_put(tr);
4842 	return 0;
4843 }
4844 
4845 static int tracing_single_release_tr(struct inode *inode, struct file *file)
4846 {
4847 	struct trace_array *tr = inode->i_private;
4848 
4849 	trace_array_put(tr);
4850 
4851 	return single_release(inode, file);
4852 }
4853 
4854 static int tracing_open(struct inode *inode, struct file *file)
4855 {
4856 	struct trace_array *tr = inode->i_private;
4857 	struct trace_iterator *iter;
4858 	int ret;
4859 
4860 	ret = tracing_check_open_get_tr(tr);
4861 	if (ret)
4862 		return ret;
4863 
4864 	/* If this file was open for write, then erase contents */
4865 	if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC)) {
4866 		int cpu = tracing_get_cpu(inode);
4867 		struct array_buffer *trace_buf = &tr->array_buffer;
4868 
4869 #ifdef CONFIG_TRACER_MAX_TRACE
4870 		if (tr->current_trace->print_max)
4871 			trace_buf = &tr->max_buffer;
4872 #endif
4873 
4874 		if (cpu == RING_BUFFER_ALL_CPUS)
4875 			tracing_reset_online_cpus(trace_buf);
4876 		else
4877 			tracing_reset_cpu(trace_buf, cpu);
4878 	}
4879 
4880 	if (file->f_mode & FMODE_READ) {
4881 		iter = __tracing_open(inode, file, false);
4882 		if (IS_ERR(iter))
4883 			ret = PTR_ERR(iter);
4884 		else if (tr->trace_flags & TRACE_ITER_LATENCY_FMT)
4885 			iter->iter_flags |= TRACE_FILE_LAT_FMT;
4886 	}
4887 
4888 	if (ret < 0)
4889 		trace_array_put(tr);
4890 
4891 	return ret;
4892 }
4893 
4894 /*
4895  * Some tracers are not suitable for instance buffers.
4896  * A tracer is always available for the global array (toplevel)
4897  * or if it explicitly states that it is.
4898  */
4899 static bool
4900 trace_ok_for_array(struct tracer *t, struct trace_array *tr)
4901 {
4902 	return (tr->flags & TRACE_ARRAY_FL_GLOBAL) || t->allow_instances;
4903 }
4904 
4905 /* Find the next tracer that this trace array may use */
4906 static struct tracer *
4907 get_tracer_for_array(struct trace_array *tr, struct tracer *t)
4908 {
4909 	while (t && !trace_ok_for_array(t, tr))
4910 		t = t->next;
4911 
4912 	return t;
4913 }
4914 
4915 static void *
4916 t_next(struct seq_file *m, void *v, loff_t *pos)
4917 {
4918 	struct trace_array *tr = m->private;
4919 	struct tracer *t = v;
4920 
4921 	(*pos)++;
4922 
4923 	if (t)
4924 		t = get_tracer_for_array(tr, t->next);
4925 
4926 	return t;
4927 }
4928 
4929 static void *t_start(struct seq_file *m, loff_t *pos)
4930 {
4931 	struct trace_array *tr = m->private;
4932 	struct tracer *t;
4933 	loff_t l = 0;
4934 
4935 	mutex_lock(&trace_types_lock);
4936 
4937 	t = get_tracer_for_array(tr, trace_types);
4938 	for (; t && l < *pos; t = t_next(m, t, &l))
4939 			;
4940 
4941 	return t;
4942 }
4943 
4944 static void t_stop(struct seq_file *m, void *p)
4945 {
4946 	mutex_unlock(&trace_types_lock);
4947 }
4948 
4949 static int t_show(struct seq_file *m, void *v)
4950 {
4951 	struct tracer *t = v;
4952 
4953 	if (!t)
4954 		return 0;
4955 
4956 	seq_puts(m, t->name);
4957 	if (t->next)
4958 		seq_putc(m, ' ');
4959 	else
4960 		seq_putc(m, '\n');
4961 
4962 	return 0;
4963 }
4964 
4965 static const struct seq_operations show_traces_seq_ops = {
4966 	.start		= t_start,
4967 	.next		= t_next,
4968 	.stop		= t_stop,
4969 	.show		= t_show,
4970 };
4971 
4972 static int show_traces_open(struct inode *inode, struct file *file)
4973 {
4974 	struct trace_array *tr = inode->i_private;
4975 	struct seq_file *m;
4976 	int ret;
4977 
4978 	ret = tracing_check_open_get_tr(tr);
4979 	if (ret)
4980 		return ret;
4981 
4982 	ret = seq_open(file, &show_traces_seq_ops);
4983 	if (ret) {
4984 		trace_array_put(tr);
4985 		return ret;
4986 	}
4987 
4988 	m = file->private_data;
4989 	m->private = tr;
4990 
4991 	return 0;
4992 }
4993 
4994 static int show_traces_release(struct inode *inode, struct file *file)
4995 {
4996 	struct trace_array *tr = inode->i_private;
4997 
4998 	trace_array_put(tr);
4999 	return seq_release(inode, file);
5000 }
5001 
5002 static ssize_t
5003 tracing_write_stub(struct file *filp, const char __user *ubuf,
5004 		   size_t count, loff_t *ppos)
5005 {
5006 	return count;
5007 }
5008 
5009 loff_t tracing_lseek(struct file *file, loff_t offset, int whence)
5010 {
5011 	int ret;
5012 
5013 	if (file->f_mode & FMODE_READ)
5014 		ret = seq_lseek(file, offset, whence);
5015 	else
5016 		file->f_pos = ret = 0;
5017 
5018 	return ret;
5019 }
5020 
5021 static const struct file_operations tracing_fops = {
5022 	.open		= tracing_open,
5023 	.read		= seq_read,
5024 	.read_iter	= seq_read_iter,
5025 	.splice_read	= copy_splice_read,
5026 	.write		= tracing_write_stub,
5027 	.llseek		= tracing_lseek,
5028 	.release	= tracing_release,
5029 };
5030 
5031 static const struct file_operations show_traces_fops = {
5032 	.open		= show_traces_open,
5033 	.read		= seq_read,
5034 	.llseek		= seq_lseek,
5035 	.release	= show_traces_release,
5036 };
5037 
5038 static ssize_t
5039 tracing_cpumask_read(struct file *filp, char __user *ubuf,
5040 		     size_t count, loff_t *ppos)
5041 {
5042 	struct trace_array *tr = file_inode(filp)->i_private;
5043 	char *mask_str;
5044 	int len;
5045 
5046 	len = snprintf(NULL, 0, "%*pb\n",
5047 		       cpumask_pr_args(tr->tracing_cpumask)) + 1;
5048 	mask_str = kmalloc(len, GFP_KERNEL);
5049 	if (!mask_str)
5050 		return -ENOMEM;
5051 
5052 	len = snprintf(mask_str, len, "%*pb\n",
5053 		       cpumask_pr_args(tr->tracing_cpumask));
5054 	if (len >= count) {
5055 		count = -EINVAL;
5056 		goto out_err;
5057 	}
5058 	count = simple_read_from_buffer(ubuf, count, ppos, mask_str, len);
5059 
5060 out_err:
5061 	kfree(mask_str);
5062 
5063 	return count;
5064 }
5065 
5066 int tracing_set_cpumask(struct trace_array *tr,
5067 			cpumask_var_t tracing_cpumask_new)
5068 {
5069 	int cpu;
5070 
5071 	if (!tr)
5072 		return -EINVAL;
5073 
5074 	local_irq_disable();
5075 	arch_spin_lock(&tr->max_lock);
5076 	for_each_tracing_cpu(cpu) {
5077 		/*
5078 		 * Increase/decrease the disabled counter if we are
5079 		 * about to flip a bit in the cpumask:
5080 		 */
5081 		if (cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
5082 				!cpumask_test_cpu(cpu, tracing_cpumask_new)) {
5083 			atomic_inc(&per_cpu_ptr(tr->array_buffer.data, cpu)->disabled);
5084 			ring_buffer_record_disable_cpu(tr->array_buffer.buffer, cpu);
5085 #ifdef CONFIG_TRACER_MAX_TRACE
5086 			ring_buffer_record_disable_cpu(tr->max_buffer.buffer, cpu);
5087 #endif
5088 		}
5089 		if (!cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
5090 				cpumask_test_cpu(cpu, tracing_cpumask_new)) {
5091 			atomic_dec(&per_cpu_ptr(tr->array_buffer.data, cpu)->disabled);
5092 			ring_buffer_record_enable_cpu(tr->array_buffer.buffer, cpu);
5093 #ifdef CONFIG_TRACER_MAX_TRACE
5094 			ring_buffer_record_enable_cpu(tr->max_buffer.buffer, cpu);
5095 #endif
5096 		}
5097 	}
5098 	arch_spin_unlock(&tr->max_lock);
5099 	local_irq_enable();
5100 
5101 	cpumask_copy(tr->tracing_cpumask, tracing_cpumask_new);
5102 
5103 	return 0;
5104 }
5105 
5106 static ssize_t
5107 tracing_cpumask_write(struct file *filp, const char __user *ubuf,
5108 		      size_t count, loff_t *ppos)
5109 {
5110 	struct trace_array *tr = file_inode(filp)->i_private;
5111 	cpumask_var_t tracing_cpumask_new;
5112 	int err;
5113 
5114 	if (!zalloc_cpumask_var(&tracing_cpumask_new, GFP_KERNEL))
5115 		return -ENOMEM;
5116 
5117 	err = cpumask_parse_user(ubuf, count, tracing_cpumask_new);
5118 	if (err)
5119 		goto err_free;
5120 
5121 	err = tracing_set_cpumask(tr, tracing_cpumask_new);
5122 	if (err)
5123 		goto err_free;
5124 
5125 	free_cpumask_var(tracing_cpumask_new);
5126 
5127 	return count;
5128 
5129 err_free:
5130 	free_cpumask_var(tracing_cpumask_new);
5131 
5132 	return err;
5133 }
5134 
5135 static const struct file_operations tracing_cpumask_fops = {
5136 	.open		= tracing_open_generic_tr,
5137 	.read		= tracing_cpumask_read,
5138 	.write		= tracing_cpumask_write,
5139 	.release	= tracing_release_generic_tr,
5140 	.llseek		= generic_file_llseek,
5141 };
5142 
5143 static int tracing_trace_options_show(struct seq_file *m, void *v)
5144 {
5145 	struct tracer_opt *trace_opts;
5146 	struct trace_array *tr = m->private;
5147 	u32 tracer_flags;
5148 	int i;
5149 
5150 	mutex_lock(&trace_types_lock);
5151 	tracer_flags = tr->current_trace->flags->val;
5152 	trace_opts = tr->current_trace->flags->opts;
5153 
5154 	for (i = 0; trace_options[i]; i++) {
5155 		if (tr->trace_flags & (1 << i))
5156 			seq_printf(m, "%s\n", trace_options[i]);
5157 		else
5158 			seq_printf(m, "no%s\n", trace_options[i]);
5159 	}
5160 
5161 	for (i = 0; trace_opts[i].name; i++) {
5162 		if (tracer_flags & trace_opts[i].bit)
5163 			seq_printf(m, "%s\n", trace_opts[i].name);
5164 		else
5165 			seq_printf(m, "no%s\n", trace_opts[i].name);
5166 	}
5167 	mutex_unlock(&trace_types_lock);
5168 
5169 	return 0;
5170 }
5171 
5172 static int __set_tracer_option(struct trace_array *tr,
5173 			       struct tracer_flags *tracer_flags,
5174 			       struct tracer_opt *opts, int neg)
5175 {
5176 	struct tracer *trace = tracer_flags->trace;
5177 	int ret;
5178 
5179 	ret = trace->set_flag(tr, tracer_flags->val, opts->bit, !neg);
5180 	if (ret)
5181 		return ret;
5182 
5183 	if (neg)
5184 		tracer_flags->val &= ~opts->bit;
5185 	else
5186 		tracer_flags->val |= opts->bit;
5187 	return 0;
5188 }
5189 
5190 /* Try to assign a tracer specific option */
5191 static int set_tracer_option(struct trace_array *tr, char *cmp, int neg)
5192 {
5193 	struct tracer *trace = tr->current_trace;
5194 	struct tracer_flags *tracer_flags = trace->flags;
5195 	struct tracer_opt *opts = NULL;
5196 	int i;
5197 
5198 	for (i = 0; tracer_flags->opts[i].name; i++) {
5199 		opts = &tracer_flags->opts[i];
5200 
5201 		if (strcmp(cmp, opts->name) == 0)
5202 			return __set_tracer_option(tr, trace->flags, opts, neg);
5203 	}
5204 
5205 	return -EINVAL;
5206 }
5207 
5208 /* Some tracers require overwrite to stay enabled */
5209 int trace_keep_overwrite(struct tracer *tracer, u32 mask, int set)
5210 {
5211 	if (tracer->enabled && (mask & TRACE_ITER_OVERWRITE) && !set)
5212 		return -1;
5213 
5214 	return 0;
5215 }
5216 
5217 int set_tracer_flag(struct trace_array *tr, unsigned int mask, int enabled)
5218 {
5219 	if ((mask == TRACE_ITER_RECORD_TGID) ||
5220 	    (mask == TRACE_ITER_RECORD_CMD))
5221 		lockdep_assert_held(&event_mutex);
5222 
5223 	/* do nothing if flag is already set */
5224 	if (!!(tr->trace_flags & mask) == !!enabled)
5225 		return 0;
5226 
5227 	/* Give the tracer a chance to approve the change */
5228 	if (tr->current_trace->flag_changed)
5229 		if (tr->current_trace->flag_changed(tr, mask, !!enabled))
5230 			return -EINVAL;
5231 
5232 	if (enabled)
5233 		tr->trace_flags |= mask;
5234 	else
5235 		tr->trace_flags &= ~mask;
5236 
5237 	if (mask == TRACE_ITER_RECORD_CMD)
5238 		trace_event_enable_cmd_record(enabled);
5239 
5240 	if (mask == TRACE_ITER_RECORD_TGID) {
5241 
5242 		if (trace_alloc_tgid_map() < 0) {
5243 			tr->trace_flags &= ~TRACE_ITER_RECORD_TGID;
5244 			return -ENOMEM;
5245 		}
5246 
5247 		trace_event_enable_tgid_record(enabled);
5248 	}
5249 
5250 	if (mask == TRACE_ITER_EVENT_FORK)
5251 		trace_event_follow_fork(tr, enabled);
5252 
5253 	if (mask == TRACE_ITER_FUNC_FORK)
5254 		ftrace_pid_follow_fork(tr, enabled);
5255 
5256 	if (mask == TRACE_ITER_OVERWRITE) {
5257 		ring_buffer_change_overwrite(tr->array_buffer.buffer, enabled);
5258 #ifdef CONFIG_TRACER_MAX_TRACE
5259 		ring_buffer_change_overwrite(tr->max_buffer.buffer, enabled);
5260 #endif
5261 	}
5262 
5263 	if (mask == TRACE_ITER_PRINTK) {
5264 		trace_printk_start_stop_comm(enabled);
5265 		trace_printk_control(enabled);
5266 	}
5267 
5268 	return 0;
5269 }
5270 
5271 int trace_set_options(struct trace_array *tr, char *option)
5272 {
5273 	char *cmp;
5274 	int neg = 0;
5275 	int ret;
5276 	size_t orig_len = strlen(option);
5277 	int len;
5278 
5279 	cmp = strstrip(option);
5280 
5281 	len = str_has_prefix(cmp, "no");
5282 	if (len)
5283 		neg = 1;
5284 
5285 	cmp += len;
5286 
5287 	mutex_lock(&event_mutex);
5288 	mutex_lock(&trace_types_lock);
5289 
5290 	ret = match_string(trace_options, -1, cmp);
5291 	/* If no option could be set, test the specific tracer options */
5292 	if (ret < 0)
5293 		ret = set_tracer_option(tr, cmp, neg);
5294 	else
5295 		ret = set_tracer_flag(tr, 1 << ret, !neg);
5296 
5297 	mutex_unlock(&trace_types_lock);
5298 	mutex_unlock(&event_mutex);
5299 
5300 	/*
5301 	 * If the first trailing whitespace is replaced with '\0' by strstrip,
5302 	 * turn it back into a space.
5303 	 */
5304 	if (orig_len > strlen(option))
5305 		option[strlen(option)] = ' ';
5306 
5307 	return ret;
5308 }
5309 
5310 static void __init apply_trace_boot_options(void)
5311 {
5312 	char *buf = trace_boot_options_buf;
5313 	char *option;
5314 
5315 	while (true) {
5316 		option = strsep(&buf, ",");
5317 
5318 		if (!option)
5319 			break;
5320 
5321 		if (*option)
5322 			trace_set_options(&global_trace, option);
5323 
5324 		/* Put back the comma to allow this to be called again */
5325 		if (buf)
5326 			*(buf - 1) = ',';
5327 	}
5328 }
5329 
5330 static ssize_t
5331 tracing_trace_options_write(struct file *filp, const char __user *ubuf,
5332 			size_t cnt, loff_t *ppos)
5333 {
5334 	struct seq_file *m = filp->private_data;
5335 	struct trace_array *tr = m->private;
5336 	char buf[64];
5337 	int ret;
5338 
5339 	if (cnt >= sizeof(buf))
5340 		return -EINVAL;
5341 
5342 	if (copy_from_user(buf, ubuf, cnt))
5343 		return -EFAULT;
5344 
5345 	buf[cnt] = 0;
5346 
5347 	ret = trace_set_options(tr, buf);
5348 	if (ret < 0)
5349 		return ret;
5350 
5351 	*ppos += cnt;
5352 
5353 	return cnt;
5354 }
5355 
5356 static int tracing_trace_options_open(struct inode *inode, struct file *file)
5357 {
5358 	struct trace_array *tr = inode->i_private;
5359 	int ret;
5360 
5361 	ret = tracing_check_open_get_tr(tr);
5362 	if (ret)
5363 		return ret;
5364 
5365 	ret = single_open(file, tracing_trace_options_show, inode->i_private);
5366 	if (ret < 0)
5367 		trace_array_put(tr);
5368 
5369 	return ret;
5370 }
5371 
5372 static const struct file_operations tracing_iter_fops = {
5373 	.open		= tracing_trace_options_open,
5374 	.read		= seq_read,
5375 	.llseek		= seq_lseek,
5376 	.release	= tracing_single_release_tr,
5377 	.write		= tracing_trace_options_write,
5378 };
5379 
5380 static const char readme_msg[] =
5381 	"tracing mini-HOWTO:\n\n"
5382 	"# echo 0 > tracing_on : quick way to disable tracing\n"
5383 	"# echo 1 > tracing_on : quick way to re-enable tracing\n\n"
5384 	" Important files:\n"
5385 	"  trace\t\t\t- The static contents of the buffer\n"
5386 	"\t\t\t  To clear the buffer write into this file: echo > trace\n"
5387 	"  trace_pipe\t\t- A consuming read to see the contents of the buffer\n"
5388 	"  current_tracer\t- function and latency tracers\n"
5389 	"  available_tracers\t- list of configured tracers for current_tracer\n"
5390 	"  error_log\t- error log for failed commands (that support it)\n"
5391 	"  buffer_size_kb\t- view and modify size of per cpu buffer\n"
5392 	"  buffer_total_size_kb  - view total size of all cpu buffers\n\n"
5393 	"  trace_clock\t\t- change the clock used to order events\n"
5394 	"       local:   Per cpu clock but may not be synced across CPUs\n"
5395 	"      global:   Synced across CPUs but slows tracing down.\n"
5396 	"     counter:   Not a clock, but just an increment\n"
5397 	"      uptime:   Jiffy counter from time of boot\n"
5398 	"        perf:   Same clock that perf events use\n"
5399 #ifdef CONFIG_X86_64
5400 	"     x86-tsc:   TSC cycle counter\n"
5401 #endif
5402 	"\n  timestamp_mode\t- view the mode used to timestamp events\n"
5403 	"       delta:   Delta difference against a buffer-wide timestamp\n"
5404 	"    absolute:   Absolute (standalone) timestamp\n"
5405 	"\n  trace_marker\t\t- Writes into this file writes into the kernel buffer\n"
5406 	"\n  trace_marker_raw\t\t- Writes into this file writes binary data into the kernel buffer\n"
5407 	"  tracing_cpumask\t- Limit which CPUs to trace\n"
5408 	"  instances\t\t- Make sub-buffers with: mkdir instances/foo\n"
5409 	"\t\t\t  Remove sub-buffer with rmdir\n"
5410 	"  trace_options\t\t- Set format or modify how tracing happens\n"
5411 	"\t\t\t  Disable an option by prefixing 'no' to the\n"
5412 	"\t\t\t  option name\n"
5413 	"  saved_cmdlines_size\t- echo command number in here to store comm-pid list\n"
5414 #ifdef CONFIG_DYNAMIC_FTRACE
5415 	"\n  available_filter_functions - list of functions that can be filtered on\n"
5416 	"  set_ftrace_filter\t- echo function name in here to only trace these\n"
5417 	"\t\t\t  functions\n"
5418 	"\t     accepts: func_full_name or glob-matching-pattern\n"
5419 	"\t     modules: Can select a group via module\n"
5420 	"\t      Format: :mod:<module-name>\n"
5421 	"\t     example: echo :mod:ext3 > set_ftrace_filter\n"
5422 	"\t    triggers: a command to perform when function is hit\n"
5423 	"\t      Format: <function>:<trigger>[:count]\n"
5424 	"\t     trigger: traceon, traceoff\n"
5425 	"\t\t      enable_event:<system>:<event>\n"
5426 	"\t\t      disable_event:<system>:<event>\n"
5427 #ifdef CONFIG_STACKTRACE
5428 	"\t\t      stacktrace\n"
5429 #endif
5430 #ifdef CONFIG_TRACER_SNAPSHOT
5431 	"\t\t      snapshot\n"
5432 #endif
5433 	"\t\t      dump\n"
5434 	"\t\t      cpudump\n"
5435 	"\t     example: echo do_fault:traceoff > set_ftrace_filter\n"
5436 	"\t              echo do_trap:traceoff:3 > set_ftrace_filter\n"
5437 	"\t     The first one will disable tracing every time do_fault is hit\n"
5438 	"\t     The second will disable tracing at most 3 times when do_trap is hit\n"
5439 	"\t       The first time do trap is hit and it disables tracing, the\n"
5440 	"\t       counter will decrement to 2. If tracing is already disabled,\n"
5441 	"\t       the counter will not decrement. It only decrements when the\n"
5442 	"\t       trigger did work\n"
5443 	"\t     To remove trigger without count:\n"
5444 	"\t       echo '!<function>:<trigger> > set_ftrace_filter\n"
5445 	"\t     To remove trigger with a count:\n"
5446 	"\t       echo '!<function>:<trigger>:0 > set_ftrace_filter\n"
5447 	"  set_ftrace_notrace\t- echo function name in here to never trace.\n"
5448 	"\t    accepts: func_full_name, *func_end, func_begin*, *func_middle*\n"
5449 	"\t    modules: Can select a group via module command :mod:\n"
5450 	"\t    Does not accept triggers\n"
5451 #endif /* CONFIG_DYNAMIC_FTRACE */
5452 #ifdef CONFIG_FUNCTION_TRACER
5453 	"  set_ftrace_pid\t- Write pid(s) to only function trace those pids\n"
5454 	"\t\t    (function)\n"
5455 	"  set_ftrace_notrace_pid\t- Write pid(s) to not function trace those pids\n"
5456 	"\t\t    (function)\n"
5457 #endif
5458 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
5459 	"  set_graph_function\t- Trace the nested calls of a function (function_graph)\n"
5460 	"  set_graph_notrace\t- Do not trace the nested calls of a function (function_graph)\n"
5461 	"  max_graph_depth\t- Trace a limited depth of nested calls (0 is unlimited)\n"
5462 #endif
5463 #ifdef CONFIG_TRACER_SNAPSHOT
5464 	"\n  snapshot\t\t- Like 'trace' but shows the content of the static\n"
5465 	"\t\t\t  snapshot buffer. Read the contents for more\n"
5466 	"\t\t\t  information\n"
5467 #endif
5468 #ifdef CONFIG_STACK_TRACER
5469 	"  stack_trace\t\t- Shows the max stack trace when active\n"
5470 	"  stack_max_size\t- Shows current max stack size that was traced\n"
5471 	"\t\t\t  Write into this file to reset the max size (trigger a\n"
5472 	"\t\t\t  new trace)\n"
5473 #ifdef CONFIG_DYNAMIC_FTRACE
5474 	"  stack_trace_filter\t- Like set_ftrace_filter but limits what stack_trace\n"
5475 	"\t\t\t  traces\n"
5476 #endif
5477 #endif /* CONFIG_STACK_TRACER */
5478 #ifdef CONFIG_DYNAMIC_EVENTS
5479 	"  dynamic_events\t\t- Create/append/remove/show the generic dynamic events\n"
5480 	"\t\t\t  Write into this file to define/undefine new trace events.\n"
5481 #endif
5482 #ifdef CONFIG_KPROBE_EVENTS
5483 	"  kprobe_events\t\t- Create/append/remove/show the kernel dynamic events\n"
5484 	"\t\t\t  Write into this file to define/undefine new trace events.\n"
5485 #endif
5486 #ifdef CONFIG_UPROBE_EVENTS
5487 	"  uprobe_events\t\t- Create/append/remove/show the userspace dynamic events\n"
5488 	"\t\t\t  Write into this file to define/undefine new trace events.\n"
5489 #endif
5490 #if defined(CONFIG_KPROBE_EVENTS) || defined(CONFIG_UPROBE_EVENTS) || \
5491     defined(CONFIG_FPROBE_EVENTS)
5492 	"\t  accepts: event-definitions (one definition per line)\n"
5493 #if defined(CONFIG_KPROBE_EVENTS) || defined(CONFIG_UPROBE_EVENTS)
5494 	"\t   Format: p[:[<group>/][<event>]] <place> [<args>]\n"
5495 	"\t           r[maxactive][:[<group>/][<event>]] <place> [<args>]\n"
5496 #endif
5497 #ifdef CONFIG_FPROBE_EVENTS
5498 	"\t           f[:[<group>/][<event>]] <func-name>[%return] [<args>]\n"
5499 	"\t           t[:[<group>/][<event>]] <tracepoint> [<args>]\n"
5500 #endif
5501 #ifdef CONFIG_HIST_TRIGGERS
5502 	"\t           s:[synthetic/]<event> <field> [<field>]\n"
5503 #endif
5504 	"\t           e[:[<group>/][<event>]] <attached-group>.<attached-event> [<args>] [if <filter>]\n"
5505 	"\t           -:[<group>/][<event>]\n"
5506 #ifdef CONFIG_KPROBE_EVENTS
5507 	"\t    place: [<module>:]<symbol>[+<offset>]|<memaddr>\n"
5508   "place (kretprobe): [<module>:]<symbol>[+<offset>]%return|<memaddr>\n"
5509 #endif
5510 #ifdef CONFIG_UPROBE_EVENTS
5511   "   place (uprobe): <path>:<offset>[%return][(ref_ctr_offset)]\n"
5512 #endif
5513 	"\t     args: <name>=fetcharg[:type]\n"
5514 	"\t fetcharg: (%<register>|$<efield>), @<address>, @<symbol>[+|-<offset>],\n"
5515 #ifdef CONFIG_HAVE_FUNCTION_ARG_ACCESS_API
5516 	"\t           $stack<index>, $stack, $retval, $comm, $arg<N>,\n"
5517 #ifdef CONFIG_PROBE_EVENTS_BTF_ARGS
5518 	"\t           <argname>[->field[->field|.field...]],\n"
5519 #endif
5520 #else
5521 	"\t           $stack<index>, $stack, $retval, $comm,\n"
5522 #endif
5523 	"\t           +|-[u]<offset>(<fetcharg>), \\imm-value, \\\"imm-string\"\n"
5524 	"\t     kernel return probes support: $retval, $arg<N>, $comm\n"
5525 	"\t     type: s8/16/32/64, u8/16/32/64, x8/16/32/64, char, string, symbol,\n"
5526 	"\t           b<bit-width>@<bit-offset>/<container-size>, ustring,\n"
5527 	"\t           symstr, <type>\\[<array-size>\\]\n"
5528 #ifdef CONFIG_HIST_TRIGGERS
5529 	"\t    field: <stype> <name>;\n"
5530 	"\t    stype: u8/u16/u32/u64, s8/s16/s32/s64, pid_t,\n"
5531 	"\t           [unsigned] char/int/long\n"
5532 #endif
5533 	"\t    efield: For event probes ('e' types), the field is on of the fields\n"
5534 	"\t            of the <attached-group>/<attached-event>.\n"
5535 #endif
5536 	"  events/\t\t- Directory containing all trace event subsystems:\n"
5537 	"      enable\t\t- Write 0/1 to enable/disable tracing of all events\n"
5538 	"  events/<system>/\t- Directory containing all trace events for <system>:\n"
5539 	"      enable\t\t- Write 0/1 to enable/disable tracing of all <system>\n"
5540 	"\t\t\t  events\n"
5541 	"      filter\t\t- If set, only events passing filter are traced\n"
5542 	"  events/<system>/<event>/\t- Directory containing control files for\n"
5543 	"\t\t\t  <event>:\n"
5544 	"      enable\t\t- Write 0/1 to enable/disable tracing of <event>\n"
5545 	"      filter\t\t- If set, only events passing filter are traced\n"
5546 	"      trigger\t\t- If set, a command to perform when event is hit\n"
5547 	"\t    Format: <trigger>[:count][if <filter>]\n"
5548 	"\t   trigger: traceon, traceoff\n"
5549 	"\t            enable_event:<system>:<event>\n"
5550 	"\t            disable_event:<system>:<event>\n"
5551 #ifdef CONFIG_HIST_TRIGGERS
5552 	"\t            enable_hist:<system>:<event>\n"
5553 	"\t            disable_hist:<system>:<event>\n"
5554 #endif
5555 #ifdef CONFIG_STACKTRACE
5556 	"\t\t    stacktrace\n"
5557 #endif
5558 #ifdef CONFIG_TRACER_SNAPSHOT
5559 	"\t\t    snapshot\n"
5560 #endif
5561 #ifdef CONFIG_HIST_TRIGGERS
5562 	"\t\t    hist (see below)\n"
5563 #endif
5564 	"\t   example: echo traceoff > events/block/block_unplug/trigger\n"
5565 	"\t            echo traceoff:3 > events/block/block_unplug/trigger\n"
5566 	"\t            echo 'enable_event:kmem:kmalloc:3 if nr_rq > 1' > \\\n"
5567 	"\t                  events/block/block_unplug/trigger\n"
5568 	"\t   The first disables tracing every time block_unplug is hit.\n"
5569 	"\t   The second disables tracing the first 3 times block_unplug is hit.\n"
5570 	"\t   The third enables the kmalloc event the first 3 times block_unplug\n"
5571 	"\t     is hit and has value of greater than 1 for the 'nr_rq' event field.\n"
5572 	"\t   Like function triggers, the counter is only decremented if it\n"
5573 	"\t    enabled or disabled tracing.\n"
5574 	"\t   To remove a trigger without a count:\n"
5575 	"\t     echo '!<trigger> > <system>/<event>/trigger\n"
5576 	"\t   To remove a trigger with a count:\n"
5577 	"\t     echo '!<trigger>:0 > <system>/<event>/trigger\n"
5578 	"\t   Filters can be ignored when removing a trigger.\n"
5579 #ifdef CONFIG_HIST_TRIGGERS
5580 	"      hist trigger\t- If set, event hits are aggregated into a hash table\n"
5581 	"\t    Format: hist:keys=<field1[,field2,...]>\n"
5582 	"\t            [:<var1>=<field|var_ref|numeric_literal>[,<var2>=...]]\n"
5583 	"\t            [:values=<field1[,field2,...]>]\n"
5584 	"\t            [:sort=<field1[,field2,...]>]\n"
5585 	"\t            [:size=#entries]\n"
5586 	"\t            [:pause][:continue][:clear]\n"
5587 	"\t            [:name=histname1]\n"
5588 	"\t            [:nohitcount]\n"
5589 	"\t            [:<handler>.<action>]\n"
5590 	"\t            [if <filter>]\n\n"
5591 	"\t    Note, special fields can be used as well:\n"
5592 	"\t            common_timestamp - to record current timestamp\n"
5593 	"\t            common_cpu - to record the CPU the event happened on\n"
5594 	"\n"
5595 	"\t    A hist trigger variable can be:\n"
5596 	"\t        - a reference to a field e.g. x=current_timestamp,\n"
5597 	"\t        - a reference to another variable e.g. y=$x,\n"
5598 	"\t        - a numeric literal: e.g. ms_per_sec=1000,\n"
5599 	"\t        - an arithmetic expression: e.g. time_secs=current_timestamp/1000\n"
5600 	"\n"
5601 	"\t    hist trigger arithmetic expressions support addition(+), subtraction(-),\n"
5602 	"\t    multiplication(*) and division(/) operators. An operand can be either a\n"
5603 	"\t    variable reference, field or numeric literal.\n"
5604 	"\n"
5605 	"\t    When a matching event is hit, an entry is added to a hash\n"
5606 	"\t    table using the key(s) and value(s) named, and the value of a\n"
5607 	"\t    sum called 'hitcount' is incremented.  Keys and values\n"
5608 	"\t    correspond to fields in the event's format description.  Keys\n"
5609 	"\t    can be any field, or the special string 'common_stacktrace'.\n"
5610 	"\t    Compound keys consisting of up to two fields can be specified\n"
5611 	"\t    by the 'keys' keyword.  Values must correspond to numeric\n"
5612 	"\t    fields.  Sort keys consisting of up to two fields can be\n"
5613 	"\t    specified using the 'sort' keyword.  The sort direction can\n"
5614 	"\t    be modified by appending '.descending' or '.ascending' to a\n"
5615 	"\t    sort field.  The 'size' parameter can be used to specify more\n"
5616 	"\t    or fewer than the default 2048 entries for the hashtable size.\n"
5617 	"\t    If a hist trigger is given a name using the 'name' parameter,\n"
5618 	"\t    its histogram data will be shared with other triggers of the\n"
5619 	"\t    same name, and trigger hits will update this common data.\n\n"
5620 	"\t    Reading the 'hist' file for the event will dump the hash\n"
5621 	"\t    table in its entirety to stdout.  If there are multiple hist\n"
5622 	"\t    triggers attached to an event, there will be a table for each\n"
5623 	"\t    trigger in the output.  The table displayed for a named\n"
5624 	"\t    trigger will be the same as any other instance having the\n"
5625 	"\t    same name.  The default format used to display a given field\n"
5626 	"\t    can be modified by appending any of the following modifiers\n"
5627 	"\t    to the field name, as applicable:\n\n"
5628 	"\t            .hex        display a number as a hex value\n"
5629 	"\t            .sym        display an address as a symbol\n"
5630 	"\t            .sym-offset display an address as a symbol and offset\n"
5631 	"\t            .execname   display a common_pid as a program name\n"
5632 	"\t            .syscall    display a syscall id as a syscall name\n"
5633 	"\t            .log2       display log2 value rather than raw number\n"
5634 	"\t            .buckets=size  display values in groups of size rather than raw number\n"
5635 	"\t            .usecs      display a common_timestamp in microseconds\n"
5636 	"\t            .percent    display a number of percentage value\n"
5637 	"\t            .graph      display a bar-graph of a value\n\n"
5638 	"\t    The 'pause' parameter can be used to pause an existing hist\n"
5639 	"\t    trigger or to start a hist trigger but not log any events\n"
5640 	"\t    until told to do so.  'continue' can be used to start or\n"
5641 	"\t    restart a paused hist trigger.\n\n"
5642 	"\t    The 'clear' parameter will clear the contents of a running\n"
5643 	"\t    hist trigger and leave its current paused/active state\n"
5644 	"\t    unchanged.\n\n"
5645 	"\t    The 'nohitcount' (or NOHC) parameter will suppress display of\n"
5646 	"\t    raw hitcount in the histogram.\n\n"
5647 	"\t    The enable_hist and disable_hist triggers can be used to\n"
5648 	"\t    have one event conditionally start and stop another event's\n"
5649 	"\t    already-attached hist trigger.  The syntax is analogous to\n"
5650 	"\t    the enable_event and disable_event triggers.\n\n"
5651 	"\t    Hist trigger handlers and actions are executed whenever a\n"
5652 	"\t    a histogram entry is added or updated.  They take the form:\n\n"
5653 	"\t        <handler>.<action>\n\n"
5654 	"\t    The available handlers are:\n\n"
5655 	"\t        onmatch(matching.event)  - invoke on addition or update\n"
5656 	"\t        onmax(var)               - invoke if var exceeds current max\n"
5657 	"\t        onchange(var)            - invoke action if var changes\n\n"
5658 	"\t    The available actions are:\n\n"
5659 	"\t        trace(<synthetic_event>,param list)  - generate synthetic event\n"
5660 	"\t        save(field,...)                      - save current event fields\n"
5661 #ifdef CONFIG_TRACER_SNAPSHOT
5662 	"\t        snapshot()                           - snapshot the trace buffer\n\n"
5663 #endif
5664 #ifdef CONFIG_SYNTH_EVENTS
5665 	"  events/synthetic_events\t- Create/append/remove/show synthetic events\n"
5666 	"\t  Write into this file to define/undefine new synthetic events.\n"
5667 	"\t     example: echo 'myevent u64 lat; char name[]; long[] stack' >> synthetic_events\n"
5668 #endif
5669 #endif
5670 ;
5671 
5672 static ssize_t
5673 tracing_readme_read(struct file *filp, char __user *ubuf,
5674 		       size_t cnt, loff_t *ppos)
5675 {
5676 	return simple_read_from_buffer(ubuf, cnt, ppos,
5677 					readme_msg, strlen(readme_msg));
5678 }
5679 
5680 static const struct file_operations tracing_readme_fops = {
5681 	.open		= tracing_open_generic,
5682 	.read		= tracing_readme_read,
5683 	.llseek		= generic_file_llseek,
5684 };
5685 
5686 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
5687 static union trace_eval_map_item *
5688 update_eval_map(union trace_eval_map_item *ptr)
5689 {
5690 	if (!ptr->map.eval_string) {
5691 		if (ptr->tail.next) {
5692 			ptr = ptr->tail.next;
5693 			/* Set ptr to the next real item (skip head) */
5694 			ptr++;
5695 		} else
5696 			return NULL;
5697 	}
5698 	return ptr;
5699 }
5700 
5701 static void *eval_map_next(struct seq_file *m, void *v, loff_t *pos)
5702 {
5703 	union trace_eval_map_item *ptr = v;
5704 
5705 	/*
5706 	 * Paranoid! If ptr points to end, we don't want to increment past it.
5707 	 * This really should never happen.
5708 	 */
5709 	(*pos)++;
5710 	ptr = update_eval_map(ptr);
5711 	if (WARN_ON_ONCE(!ptr))
5712 		return NULL;
5713 
5714 	ptr++;
5715 	ptr = update_eval_map(ptr);
5716 
5717 	return ptr;
5718 }
5719 
5720 static void *eval_map_start(struct seq_file *m, loff_t *pos)
5721 {
5722 	union trace_eval_map_item *v;
5723 	loff_t l = 0;
5724 
5725 	mutex_lock(&trace_eval_mutex);
5726 
5727 	v = trace_eval_maps;
5728 	if (v)
5729 		v++;
5730 
5731 	while (v && l < *pos) {
5732 		v = eval_map_next(m, v, &l);
5733 	}
5734 
5735 	return v;
5736 }
5737 
5738 static void eval_map_stop(struct seq_file *m, void *v)
5739 {
5740 	mutex_unlock(&trace_eval_mutex);
5741 }
5742 
5743 static int eval_map_show(struct seq_file *m, void *v)
5744 {
5745 	union trace_eval_map_item *ptr = v;
5746 
5747 	seq_printf(m, "%s %ld (%s)\n",
5748 		   ptr->map.eval_string, ptr->map.eval_value,
5749 		   ptr->map.system);
5750 
5751 	return 0;
5752 }
5753 
5754 static const struct seq_operations tracing_eval_map_seq_ops = {
5755 	.start		= eval_map_start,
5756 	.next		= eval_map_next,
5757 	.stop		= eval_map_stop,
5758 	.show		= eval_map_show,
5759 };
5760 
5761 static int tracing_eval_map_open(struct inode *inode, struct file *filp)
5762 {
5763 	int ret;
5764 
5765 	ret = tracing_check_open_get_tr(NULL);
5766 	if (ret)
5767 		return ret;
5768 
5769 	return seq_open(filp, &tracing_eval_map_seq_ops);
5770 }
5771 
5772 static const struct file_operations tracing_eval_map_fops = {
5773 	.open		= tracing_eval_map_open,
5774 	.read		= seq_read,
5775 	.llseek		= seq_lseek,
5776 	.release	= seq_release,
5777 };
5778 
5779 static inline union trace_eval_map_item *
5780 trace_eval_jmp_to_tail(union trace_eval_map_item *ptr)
5781 {
5782 	/* Return tail of array given the head */
5783 	return ptr + ptr->head.length + 1;
5784 }
5785 
5786 static void
5787 trace_insert_eval_map_file(struct module *mod, struct trace_eval_map **start,
5788 			   int len)
5789 {
5790 	struct trace_eval_map **stop;
5791 	struct trace_eval_map **map;
5792 	union trace_eval_map_item *map_array;
5793 	union trace_eval_map_item *ptr;
5794 
5795 	stop = start + len;
5796 
5797 	/*
5798 	 * The trace_eval_maps contains the map plus a head and tail item,
5799 	 * where the head holds the module and length of array, and the
5800 	 * tail holds a pointer to the next list.
5801 	 */
5802 	map_array = kmalloc_array(len + 2, sizeof(*map_array), GFP_KERNEL);
5803 	if (!map_array) {
5804 		pr_warn("Unable to allocate trace eval mapping\n");
5805 		return;
5806 	}
5807 
5808 	mutex_lock(&trace_eval_mutex);
5809 
5810 	if (!trace_eval_maps)
5811 		trace_eval_maps = map_array;
5812 	else {
5813 		ptr = trace_eval_maps;
5814 		for (;;) {
5815 			ptr = trace_eval_jmp_to_tail(ptr);
5816 			if (!ptr->tail.next)
5817 				break;
5818 			ptr = ptr->tail.next;
5819 
5820 		}
5821 		ptr->tail.next = map_array;
5822 	}
5823 	map_array->head.mod = mod;
5824 	map_array->head.length = len;
5825 	map_array++;
5826 
5827 	for (map = start; (unsigned long)map < (unsigned long)stop; map++) {
5828 		map_array->map = **map;
5829 		map_array++;
5830 	}
5831 	memset(map_array, 0, sizeof(*map_array));
5832 
5833 	mutex_unlock(&trace_eval_mutex);
5834 }
5835 
5836 static void trace_create_eval_file(struct dentry *d_tracer)
5837 {
5838 	trace_create_file("eval_map", TRACE_MODE_READ, d_tracer,
5839 			  NULL, &tracing_eval_map_fops);
5840 }
5841 
5842 #else /* CONFIG_TRACE_EVAL_MAP_FILE */
5843 static inline void trace_create_eval_file(struct dentry *d_tracer) { }
5844 static inline void trace_insert_eval_map_file(struct module *mod,
5845 			      struct trace_eval_map **start, int len) { }
5846 #endif /* !CONFIG_TRACE_EVAL_MAP_FILE */
5847 
5848 static void trace_insert_eval_map(struct module *mod,
5849 				  struct trace_eval_map **start, int len)
5850 {
5851 	struct trace_eval_map **map;
5852 
5853 	if (len <= 0)
5854 		return;
5855 
5856 	map = start;
5857 
5858 	trace_event_eval_update(map, len);
5859 
5860 	trace_insert_eval_map_file(mod, start, len);
5861 }
5862 
5863 static ssize_t
5864 tracing_set_trace_read(struct file *filp, char __user *ubuf,
5865 		       size_t cnt, loff_t *ppos)
5866 {
5867 	struct trace_array *tr = filp->private_data;
5868 	char buf[MAX_TRACER_SIZE+2];
5869 	int r;
5870 
5871 	mutex_lock(&trace_types_lock);
5872 	r = sprintf(buf, "%s\n", tr->current_trace->name);
5873 	mutex_unlock(&trace_types_lock);
5874 
5875 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
5876 }
5877 
5878 int tracer_init(struct tracer *t, struct trace_array *tr)
5879 {
5880 	tracing_reset_online_cpus(&tr->array_buffer);
5881 	return t->init(tr);
5882 }
5883 
5884 static void set_buffer_entries(struct array_buffer *buf, unsigned long val)
5885 {
5886 	int cpu;
5887 
5888 	for_each_tracing_cpu(cpu)
5889 		per_cpu_ptr(buf->data, cpu)->entries = val;
5890 }
5891 
5892 static void update_buffer_entries(struct array_buffer *buf, int cpu)
5893 {
5894 	if (cpu == RING_BUFFER_ALL_CPUS) {
5895 		set_buffer_entries(buf, ring_buffer_size(buf->buffer, 0));
5896 	} else {
5897 		per_cpu_ptr(buf->data, cpu)->entries = ring_buffer_size(buf->buffer, cpu);
5898 	}
5899 }
5900 
5901 #ifdef CONFIG_TRACER_MAX_TRACE
5902 /* resize @tr's buffer to the size of @size_tr's entries */
5903 static int resize_buffer_duplicate_size(struct array_buffer *trace_buf,
5904 					struct array_buffer *size_buf, int cpu_id)
5905 {
5906 	int cpu, ret = 0;
5907 
5908 	if (cpu_id == RING_BUFFER_ALL_CPUS) {
5909 		for_each_tracing_cpu(cpu) {
5910 			ret = ring_buffer_resize(trace_buf->buffer,
5911 				 per_cpu_ptr(size_buf->data, cpu)->entries, cpu);
5912 			if (ret < 0)
5913 				break;
5914 			per_cpu_ptr(trace_buf->data, cpu)->entries =
5915 				per_cpu_ptr(size_buf->data, cpu)->entries;
5916 		}
5917 	} else {
5918 		ret = ring_buffer_resize(trace_buf->buffer,
5919 				 per_cpu_ptr(size_buf->data, cpu_id)->entries, cpu_id);
5920 		if (ret == 0)
5921 			per_cpu_ptr(trace_buf->data, cpu_id)->entries =
5922 				per_cpu_ptr(size_buf->data, cpu_id)->entries;
5923 	}
5924 
5925 	return ret;
5926 }
5927 #endif /* CONFIG_TRACER_MAX_TRACE */
5928 
5929 static int __tracing_resize_ring_buffer(struct trace_array *tr,
5930 					unsigned long size, int cpu)
5931 {
5932 	int ret;
5933 
5934 	/*
5935 	 * If kernel or user changes the size of the ring buffer
5936 	 * we use the size that was given, and we can forget about
5937 	 * expanding it later.
5938 	 */
5939 	trace_set_ring_buffer_expanded(tr);
5940 
5941 	/* May be called before buffers are initialized */
5942 	if (!tr->array_buffer.buffer)
5943 		return 0;
5944 
5945 	/* Do not allow tracing while resizing ring buffer */
5946 	tracing_stop_tr(tr);
5947 
5948 	ret = ring_buffer_resize(tr->array_buffer.buffer, size, cpu);
5949 	if (ret < 0)
5950 		goto out_start;
5951 
5952 #ifdef CONFIG_TRACER_MAX_TRACE
5953 	if (!tr->allocated_snapshot)
5954 		goto out;
5955 
5956 	ret = ring_buffer_resize(tr->max_buffer.buffer, size, cpu);
5957 	if (ret < 0) {
5958 		int r = resize_buffer_duplicate_size(&tr->array_buffer,
5959 						     &tr->array_buffer, cpu);
5960 		if (r < 0) {
5961 			/*
5962 			 * AARGH! We are left with different
5963 			 * size max buffer!!!!
5964 			 * The max buffer is our "snapshot" buffer.
5965 			 * When a tracer needs a snapshot (one of the
5966 			 * latency tracers), it swaps the max buffer
5967 			 * with the saved snap shot. We succeeded to
5968 			 * update the size of the main buffer, but failed to
5969 			 * update the size of the max buffer. But when we tried
5970 			 * to reset the main buffer to the original size, we
5971 			 * failed there too. This is very unlikely to
5972 			 * happen, but if it does, warn and kill all
5973 			 * tracing.
5974 			 */
5975 			WARN_ON(1);
5976 			tracing_disabled = 1;
5977 		}
5978 		goto out_start;
5979 	}
5980 
5981 	update_buffer_entries(&tr->max_buffer, cpu);
5982 
5983  out:
5984 #endif /* CONFIG_TRACER_MAX_TRACE */
5985 
5986 	update_buffer_entries(&tr->array_buffer, cpu);
5987  out_start:
5988 	tracing_start_tr(tr);
5989 	return ret;
5990 }
5991 
5992 ssize_t tracing_resize_ring_buffer(struct trace_array *tr,
5993 				  unsigned long size, int cpu_id)
5994 {
5995 	int ret;
5996 
5997 	mutex_lock(&trace_types_lock);
5998 
5999 	if (cpu_id != RING_BUFFER_ALL_CPUS) {
6000 		/* make sure, this cpu is enabled in the mask */
6001 		if (!cpumask_test_cpu(cpu_id, tracing_buffer_mask)) {
6002 			ret = -EINVAL;
6003 			goto out;
6004 		}
6005 	}
6006 
6007 	ret = __tracing_resize_ring_buffer(tr, size, cpu_id);
6008 	if (ret < 0)
6009 		ret = -ENOMEM;
6010 
6011 out:
6012 	mutex_unlock(&trace_types_lock);
6013 
6014 	return ret;
6015 }
6016 
6017 
6018 /**
6019  * tracing_update_buffers - used by tracing facility to expand ring buffers
6020  * @tr: The tracing instance
6021  *
6022  * To save on memory when the tracing is never used on a system with it
6023  * configured in. The ring buffers are set to a minimum size. But once
6024  * a user starts to use the tracing facility, then they need to grow
6025  * to their default size.
6026  *
6027  * This function is to be called when a tracer is about to be used.
6028  */
6029 int tracing_update_buffers(struct trace_array *tr)
6030 {
6031 	int ret = 0;
6032 
6033 	mutex_lock(&trace_types_lock);
6034 	if (!tr->ring_buffer_expanded)
6035 		ret = __tracing_resize_ring_buffer(tr, trace_buf_size,
6036 						RING_BUFFER_ALL_CPUS);
6037 	mutex_unlock(&trace_types_lock);
6038 
6039 	return ret;
6040 }
6041 
6042 struct trace_option_dentry;
6043 
6044 static void
6045 create_trace_option_files(struct trace_array *tr, struct tracer *tracer);
6046 
6047 /*
6048  * Used to clear out the tracer before deletion of an instance.
6049  * Must have trace_types_lock held.
6050  */
6051 static void tracing_set_nop(struct trace_array *tr)
6052 {
6053 	if (tr->current_trace == &nop_trace)
6054 		return;
6055 
6056 	tr->current_trace->enabled--;
6057 
6058 	if (tr->current_trace->reset)
6059 		tr->current_trace->reset(tr);
6060 
6061 	tr->current_trace = &nop_trace;
6062 }
6063 
6064 static bool tracer_options_updated;
6065 
6066 static void add_tracer_options(struct trace_array *tr, struct tracer *t)
6067 {
6068 	/* Only enable if the directory has been created already. */
6069 	if (!tr->dir)
6070 		return;
6071 
6072 	/* Only create trace option files after update_tracer_options finish */
6073 	if (!tracer_options_updated)
6074 		return;
6075 
6076 	create_trace_option_files(tr, t);
6077 }
6078 
6079 int tracing_set_tracer(struct trace_array *tr, const char *buf)
6080 {
6081 	struct tracer *t;
6082 #ifdef CONFIG_TRACER_MAX_TRACE
6083 	bool had_max_tr;
6084 #endif
6085 	int ret = 0;
6086 
6087 	mutex_lock(&trace_types_lock);
6088 
6089 	if (!tr->ring_buffer_expanded) {
6090 		ret = __tracing_resize_ring_buffer(tr, trace_buf_size,
6091 						RING_BUFFER_ALL_CPUS);
6092 		if (ret < 0)
6093 			goto out;
6094 		ret = 0;
6095 	}
6096 
6097 	for (t = trace_types; t; t = t->next) {
6098 		if (strcmp(t->name, buf) == 0)
6099 			break;
6100 	}
6101 	if (!t) {
6102 		ret = -EINVAL;
6103 		goto out;
6104 	}
6105 	if (t == tr->current_trace)
6106 		goto out;
6107 
6108 #ifdef CONFIG_TRACER_SNAPSHOT
6109 	if (t->use_max_tr) {
6110 		local_irq_disable();
6111 		arch_spin_lock(&tr->max_lock);
6112 		if (tr->cond_snapshot)
6113 			ret = -EBUSY;
6114 		arch_spin_unlock(&tr->max_lock);
6115 		local_irq_enable();
6116 		if (ret)
6117 			goto out;
6118 	}
6119 #endif
6120 	/* Some tracers won't work on kernel command line */
6121 	if (system_state < SYSTEM_RUNNING && t->noboot) {
6122 		pr_warn("Tracer '%s' is not allowed on command line, ignored\n",
6123 			t->name);
6124 		goto out;
6125 	}
6126 
6127 	/* Some tracers are only allowed for the top level buffer */
6128 	if (!trace_ok_for_array(t, tr)) {
6129 		ret = -EINVAL;
6130 		goto out;
6131 	}
6132 
6133 	/* If trace pipe files are being read, we can't change the tracer */
6134 	if (tr->trace_ref) {
6135 		ret = -EBUSY;
6136 		goto out;
6137 	}
6138 
6139 	trace_branch_disable();
6140 
6141 	tr->current_trace->enabled--;
6142 
6143 	if (tr->current_trace->reset)
6144 		tr->current_trace->reset(tr);
6145 
6146 #ifdef CONFIG_TRACER_MAX_TRACE
6147 	had_max_tr = tr->current_trace->use_max_tr;
6148 
6149 	/* Current trace needs to be nop_trace before synchronize_rcu */
6150 	tr->current_trace = &nop_trace;
6151 
6152 	if (had_max_tr && !t->use_max_tr) {
6153 		/*
6154 		 * We need to make sure that the update_max_tr sees that
6155 		 * current_trace changed to nop_trace to keep it from
6156 		 * swapping the buffers after we resize it.
6157 		 * The update_max_tr is called from interrupts disabled
6158 		 * so a synchronized_sched() is sufficient.
6159 		 */
6160 		synchronize_rcu();
6161 		free_snapshot(tr);
6162 		tracing_disarm_snapshot(tr);
6163 	}
6164 
6165 	if (!had_max_tr && t->use_max_tr) {
6166 		ret = tracing_arm_snapshot_locked(tr);
6167 		if (ret)
6168 			goto out;
6169 	}
6170 #else
6171 	tr->current_trace = &nop_trace;
6172 #endif
6173 
6174 	if (t->init) {
6175 		ret = tracer_init(t, tr);
6176 		if (ret) {
6177 #ifdef CONFIG_TRACER_MAX_TRACE
6178 			if (t->use_max_tr)
6179 				tracing_disarm_snapshot(tr);
6180 #endif
6181 			goto out;
6182 		}
6183 	}
6184 
6185 	tr->current_trace = t;
6186 	tr->current_trace->enabled++;
6187 	trace_branch_enable(tr);
6188  out:
6189 	mutex_unlock(&trace_types_lock);
6190 
6191 	return ret;
6192 }
6193 
6194 static ssize_t
6195 tracing_set_trace_write(struct file *filp, const char __user *ubuf,
6196 			size_t cnt, loff_t *ppos)
6197 {
6198 	struct trace_array *tr = filp->private_data;
6199 	char buf[MAX_TRACER_SIZE+1];
6200 	char *name;
6201 	size_t ret;
6202 	int err;
6203 
6204 	ret = cnt;
6205 
6206 	if (cnt > MAX_TRACER_SIZE)
6207 		cnt = MAX_TRACER_SIZE;
6208 
6209 	if (copy_from_user(buf, ubuf, cnt))
6210 		return -EFAULT;
6211 
6212 	buf[cnt] = 0;
6213 
6214 	name = strim(buf);
6215 
6216 	err = tracing_set_tracer(tr, name);
6217 	if (err)
6218 		return err;
6219 
6220 	*ppos += ret;
6221 
6222 	return ret;
6223 }
6224 
6225 static ssize_t
6226 tracing_nsecs_read(unsigned long *ptr, char __user *ubuf,
6227 		   size_t cnt, loff_t *ppos)
6228 {
6229 	char buf[64];
6230 	int r;
6231 
6232 	r = snprintf(buf, sizeof(buf), "%ld\n",
6233 		     *ptr == (unsigned long)-1 ? -1 : nsecs_to_usecs(*ptr));
6234 	if (r > sizeof(buf))
6235 		r = sizeof(buf);
6236 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6237 }
6238 
6239 static ssize_t
6240 tracing_nsecs_write(unsigned long *ptr, const char __user *ubuf,
6241 		    size_t cnt, loff_t *ppos)
6242 {
6243 	unsigned long val;
6244 	int ret;
6245 
6246 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
6247 	if (ret)
6248 		return ret;
6249 
6250 	*ptr = val * 1000;
6251 
6252 	return cnt;
6253 }
6254 
6255 static ssize_t
6256 tracing_thresh_read(struct file *filp, char __user *ubuf,
6257 		    size_t cnt, loff_t *ppos)
6258 {
6259 	return tracing_nsecs_read(&tracing_thresh, ubuf, cnt, ppos);
6260 }
6261 
6262 static ssize_t
6263 tracing_thresh_write(struct file *filp, const char __user *ubuf,
6264 		     size_t cnt, loff_t *ppos)
6265 {
6266 	struct trace_array *tr = filp->private_data;
6267 	int ret;
6268 
6269 	mutex_lock(&trace_types_lock);
6270 	ret = tracing_nsecs_write(&tracing_thresh, ubuf, cnt, ppos);
6271 	if (ret < 0)
6272 		goto out;
6273 
6274 	if (tr->current_trace->update_thresh) {
6275 		ret = tr->current_trace->update_thresh(tr);
6276 		if (ret < 0)
6277 			goto out;
6278 	}
6279 
6280 	ret = cnt;
6281 out:
6282 	mutex_unlock(&trace_types_lock);
6283 
6284 	return ret;
6285 }
6286 
6287 #ifdef CONFIG_TRACER_MAX_TRACE
6288 
6289 static ssize_t
6290 tracing_max_lat_read(struct file *filp, char __user *ubuf,
6291 		     size_t cnt, loff_t *ppos)
6292 {
6293 	struct trace_array *tr = filp->private_data;
6294 
6295 	return tracing_nsecs_read(&tr->max_latency, ubuf, cnt, ppos);
6296 }
6297 
6298 static ssize_t
6299 tracing_max_lat_write(struct file *filp, const char __user *ubuf,
6300 		      size_t cnt, loff_t *ppos)
6301 {
6302 	struct trace_array *tr = filp->private_data;
6303 
6304 	return tracing_nsecs_write(&tr->max_latency, ubuf, cnt, ppos);
6305 }
6306 
6307 #endif
6308 
6309 static int open_pipe_on_cpu(struct trace_array *tr, int cpu)
6310 {
6311 	if (cpu == RING_BUFFER_ALL_CPUS) {
6312 		if (cpumask_empty(tr->pipe_cpumask)) {
6313 			cpumask_setall(tr->pipe_cpumask);
6314 			return 0;
6315 		}
6316 	} else if (!cpumask_test_cpu(cpu, tr->pipe_cpumask)) {
6317 		cpumask_set_cpu(cpu, tr->pipe_cpumask);
6318 		return 0;
6319 	}
6320 	return -EBUSY;
6321 }
6322 
6323 static void close_pipe_on_cpu(struct trace_array *tr, int cpu)
6324 {
6325 	if (cpu == RING_BUFFER_ALL_CPUS) {
6326 		WARN_ON(!cpumask_full(tr->pipe_cpumask));
6327 		cpumask_clear(tr->pipe_cpumask);
6328 	} else {
6329 		WARN_ON(!cpumask_test_cpu(cpu, tr->pipe_cpumask));
6330 		cpumask_clear_cpu(cpu, tr->pipe_cpumask);
6331 	}
6332 }
6333 
6334 static int tracing_open_pipe(struct inode *inode, struct file *filp)
6335 {
6336 	struct trace_array *tr = inode->i_private;
6337 	struct trace_iterator *iter;
6338 	int cpu;
6339 	int ret;
6340 
6341 	ret = tracing_check_open_get_tr(tr);
6342 	if (ret)
6343 		return ret;
6344 
6345 	mutex_lock(&trace_types_lock);
6346 	cpu = tracing_get_cpu(inode);
6347 	ret = open_pipe_on_cpu(tr, cpu);
6348 	if (ret)
6349 		goto fail_pipe_on_cpu;
6350 
6351 	/* create a buffer to store the information to pass to userspace */
6352 	iter = kzalloc(sizeof(*iter), GFP_KERNEL);
6353 	if (!iter) {
6354 		ret = -ENOMEM;
6355 		goto fail_alloc_iter;
6356 	}
6357 
6358 	trace_seq_init(&iter->seq);
6359 	iter->trace = tr->current_trace;
6360 
6361 	if (!alloc_cpumask_var(&iter->started, GFP_KERNEL)) {
6362 		ret = -ENOMEM;
6363 		goto fail;
6364 	}
6365 
6366 	/* trace pipe does not show start of buffer */
6367 	cpumask_setall(iter->started);
6368 
6369 	if (tr->trace_flags & TRACE_ITER_LATENCY_FMT)
6370 		iter->iter_flags |= TRACE_FILE_LAT_FMT;
6371 
6372 	/* Output in nanoseconds only if we are using a clock in nanoseconds. */
6373 	if (trace_clocks[tr->clock_id].in_ns)
6374 		iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
6375 
6376 	iter->tr = tr;
6377 	iter->array_buffer = &tr->array_buffer;
6378 	iter->cpu_file = cpu;
6379 	mutex_init(&iter->mutex);
6380 	filp->private_data = iter;
6381 
6382 	if (iter->trace->pipe_open)
6383 		iter->trace->pipe_open(iter);
6384 
6385 	nonseekable_open(inode, filp);
6386 
6387 	tr->trace_ref++;
6388 
6389 	mutex_unlock(&trace_types_lock);
6390 	return ret;
6391 
6392 fail:
6393 	kfree(iter);
6394 fail_alloc_iter:
6395 	close_pipe_on_cpu(tr, cpu);
6396 fail_pipe_on_cpu:
6397 	__trace_array_put(tr);
6398 	mutex_unlock(&trace_types_lock);
6399 	return ret;
6400 }
6401 
6402 static int tracing_release_pipe(struct inode *inode, struct file *file)
6403 {
6404 	struct trace_iterator *iter = file->private_data;
6405 	struct trace_array *tr = inode->i_private;
6406 
6407 	mutex_lock(&trace_types_lock);
6408 
6409 	tr->trace_ref--;
6410 
6411 	if (iter->trace->pipe_close)
6412 		iter->trace->pipe_close(iter);
6413 	close_pipe_on_cpu(tr, iter->cpu_file);
6414 	mutex_unlock(&trace_types_lock);
6415 
6416 	free_trace_iter_content(iter);
6417 	kfree(iter);
6418 
6419 	trace_array_put(tr);
6420 
6421 	return 0;
6422 }
6423 
6424 static __poll_t
6425 trace_poll(struct trace_iterator *iter, struct file *filp, poll_table *poll_table)
6426 {
6427 	struct trace_array *tr = iter->tr;
6428 
6429 	/* Iterators are static, they should be filled or empty */
6430 	if (trace_buffer_iter(iter, iter->cpu_file))
6431 		return EPOLLIN | EPOLLRDNORM;
6432 
6433 	if (tr->trace_flags & TRACE_ITER_BLOCK)
6434 		/*
6435 		 * Always select as readable when in blocking mode
6436 		 */
6437 		return EPOLLIN | EPOLLRDNORM;
6438 	else
6439 		return ring_buffer_poll_wait(iter->array_buffer->buffer, iter->cpu_file,
6440 					     filp, poll_table, iter->tr->buffer_percent);
6441 }
6442 
6443 static __poll_t
6444 tracing_poll_pipe(struct file *filp, poll_table *poll_table)
6445 {
6446 	struct trace_iterator *iter = filp->private_data;
6447 
6448 	return trace_poll(iter, filp, poll_table);
6449 }
6450 
6451 /* Must be called with iter->mutex held. */
6452 static int tracing_wait_pipe(struct file *filp)
6453 {
6454 	struct trace_iterator *iter = filp->private_data;
6455 	int ret;
6456 
6457 	while (trace_empty(iter)) {
6458 
6459 		if ((filp->f_flags & O_NONBLOCK)) {
6460 			return -EAGAIN;
6461 		}
6462 
6463 		/*
6464 		 * We block until we read something and tracing is disabled.
6465 		 * We still block if tracing is disabled, but we have never
6466 		 * read anything. This allows a user to cat this file, and
6467 		 * then enable tracing. But after we have read something,
6468 		 * we give an EOF when tracing is again disabled.
6469 		 *
6470 		 * iter->pos will be 0 if we haven't read anything.
6471 		 */
6472 		if (!tracer_tracing_is_on(iter->tr) && iter->pos)
6473 			break;
6474 
6475 		mutex_unlock(&iter->mutex);
6476 
6477 		ret = wait_on_pipe(iter, 0);
6478 
6479 		mutex_lock(&iter->mutex);
6480 
6481 		if (ret)
6482 			return ret;
6483 	}
6484 
6485 	return 1;
6486 }
6487 
6488 /*
6489  * Consumer reader.
6490  */
6491 static ssize_t
6492 tracing_read_pipe(struct file *filp, char __user *ubuf,
6493 		  size_t cnt, loff_t *ppos)
6494 {
6495 	struct trace_iterator *iter = filp->private_data;
6496 	ssize_t sret;
6497 
6498 	/*
6499 	 * Avoid more than one consumer on a single file descriptor
6500 	 * This is just a matter of traces coherency, the ring buffer itself
6501 	 * is protected.
6502 	 */
6503 	mutex_lock(&iter->mutex);
6504 
6505 	/* return any leftover data */
6506 	sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
6507 	if (sret != -EBUSY)
6508 		goto out;
6509 
6510 	trace_seq_init(&iter->seq);
6511 
6512 	if (iter->trace->read) {
6513 		sret = iter->trace->read(iter, filp, ubuf, cnt, ppos);
6514 		if (sret)
6515 			goto out;
6516 	}
6517 
6518 waitagain:
6519 	sret = tracing_wait_pipe(filp);
6520 	if (sret <= 0)
6521 		goto out;
6522 
6523 	/* stop when tracing is finished */
6524 	if (trace_empty(iter)) {
6525 		sret = 0;
6526 		goto out;
6527 	}
6528 
6529 	if (cnt >= TRACE_SEQ_BUFFER_SIZE)
6530 		cnt = TRACE_SEQ_BUFFER_SIZE - 1;
6531 
6532 	/* reset all but tr, trace, and overruns */
6533 	trace_iterator_reset(iter);
6534 	cpumask_clear(iter->started);
6535 	trace_seq_init(&iter->seq);
6536 
6537 	trace_event_read_lock();
6538 	trace_access_lock(iter->cpu_file);
6539 	while (trace_find_next_entry_inc(iter) != NULL) {
6540 		enum print_line_t ret;
6541 		int save_len = iter->seq.seq.len;
6542 
6543 		ret = print_trace_line(iter);
6544 		if (ret == TRACE_TYPE_PARTIAL_LINE) {
6545 			/*
6546 			 * If one print_trace_line() fills entire trace_seq in one shot,
6547 			 * trace_seq_to_user() will returns -EBUSY because save_len == 0,
6548 			 * In this case, we need to consume it, otherwise, loop will peek
6549 			 * this event next time, resulting in an infinite loop.
6550 			 */
6551 			if (save_len == 0) {
6552 				iter->seq.full = 0;
6553 				trace_seq_puts(&iter->seq, "[LINE TOO BIG]\n");
6554 				trace_consume(iter);
6555 				break;
6556 			}
6557 
6558 			/* In other cases, don't print partial lines */
6559 			iter->seq.seq.len = save_len;
6560 			break;
6561 		}
6562 		if (ret != TRACE_TYPE_NO_CONSUME)
6563 			trace_consume(iter);
6564 
6565 		if (trace_seq_used(&iter->seq) >= cnt)
6566 			break;
6567 
6568 		/*
6569 		 * Setting the full flag means we reached the trace_seq buffer
6570 		 * size and we should leave by partial output condition above.
6571 		 * One of the trace_seq_* functions is not used properly.
6572 		 */
6573 		WARN_ONCE(iter->seq.full, "full flag set for trace type %d",
6574 			  iter->ent->type);
6575 	}
6576 	trace_access_unlock(iter->cpu_file);
6577 	trace_event_read_unlock();
6578 
6579 	/* Now copy what we have to the user */
6580 	sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
6581 	if (iter->seq.readpos >= trace_seq_used(&iter->seq))
6582 		trace_seq_init(&iter->seq);
6583 
6584 	/*
6585 	 * If there was nothing to send to user, in spite of consuming trace
6586 	 * entries, go back to wait for more entries.
6587 	 */
6588 	if (sret == -EBUSY)
6589 		goto waitagain;
6590 
6591 out:
6592 	mutex_unlock(&iter->mutex);
6593 
6594 	return sret;
6595 }
6596 
6597 static void tracing_spd_release_pipe(struct splice_pipe_desc *spd,
6598 				     unsigned int idx)
6599 {
6600 	__free_page(spd->pages[idx]);
6601 }
6602 
6603 static size_t
6604 tracing_fill_pipe_page(size_t rem, struct trace_iterator *iter)
6605 {
6606 	size_t count;
6607 	int save_len;
6608 	int ret;
6609 
6610 	/* Seq buffer is page-sized, exactly what we need. */
6611 	for (;;) {
6612 		save_len = iter->seq.seq.len;
6613 		ret = print_trace_line(iter);
6614 
6615 		if (trace_seq_has_overflowed(&iter->seq)) {
6616 			iter->seq.seq.len = save_len;
6617 			break;
6618 		}
6619 
6620 		/*
6621 		 * This should not be hit, because it should only
6622 		 * be set if the iter->seq overflowed. But check it
6623 		 * anyway to be safe.
6624 		 */
6625 		if (ret == TRACE_TYPE_PARTIAL_LINE) {
6626 			iter->seq.seq.len = save_len;
6627 			break;
6628 		}
6629 
6630 		count = trace_seq_used(&iter->seq) - save_len;
6631 		if (rem < count) {
6632 			rem = 0;
6633 			iter->seq.seq.len = save_len;
6634 			break;
6635 		}
6636 
6637 		if (ret != TRACE_TYPE_NO_CONSUME)
6638 			trace_consume(iter);
6639 		rem -= count;
6640 		if (!trace_find_next_entry_inc(iter))	{
6641 			rem = 0;
6642 			iter->ent = NULL;
6643 			break;
6644 		}
6645 	}
6646 
6647 	return rem;
6648 }
6649 
6650 static ssize_t tracing_splice_read_pipe(struct file *filp,
6651 					loff_t *ppos,
6652 					struct pipe_inode_info *pipe,
6653 					size_t len,
6654 					unsigned int flags)
6655 {
6656 	struct page *pages_def[PIPE_DEF_BUFFERS];
6657 	struct partial_page partial_def[PIPE_DEF_BUFFERS];
6658 	struct trace_iterator *iter = filp->private_data;
6659 	struct splice_pipe_desc spd = {
6660 		.pages		= pages_def,
6661 		.partial	= partial_def,
6662 		.nr_pages	= 0, /* This gets updated below. */
6663 		.nr_pages_max	= PIPE_DEF_BUFFERS,
6664 		.ops		= &default_pipe_buf_ops,
6665 		.spd_release	= tracing_spd_release_pipe,
6666 	};
6667 	ssize_t ret;
6668 	size_t rem;
6669 	unsigned int i;
6670 
6671 	if (splice_grow_spd(pipe, &spd))
6672 		return -ENOMEM;
6673 
6674 	mutex_lock(&iter->mutex);
6675 
6676 	if (iter->trace->splice_read) {
6677 		ret = iter->trace->splice_read(iter, filp,
6678 					       ppos, pipe, len, flags);
6679 		if (ret)
6680 			goto out_err;
6681 	}
6682 
6683 	ret = tracing_wait_pipe(filp);
6684 	if (ret <= 0)
6685 		goto out_err;
6686 
6687 	if (!iter->ent && !trace_find_next_entry_inc(iter)) {
6688 		ret = -EFAULT;
6689 		goto out_err;
6690 	}
6691 
6692 	trace_event_read_lock();
6693 	trace_access_lock(iter->cpu_file);
6694 
6695 	/* Fill as many pages as possible. */
6696 	for (i = 0, rem = len; i < spd.nr_pages_max && rem; i++) {
6697 		spd.pages[i] = alloc_page(GFP_KERNEL);
6698 		if (!spd.pages[i])
6699 			break;
6700 
6701 		rem = tracing_fill_pipe_page(rem, iter);
6702 
6703 		/* Copy the data into the page, so we can start over. */
6704 		ret = trace_seq_to_buffer(&iter->seq,
6705 					  page_address(spd.pages[i]),
6706 					  trace_seq_used(&iter->seq));
6707 		if (ret < 0) {
6708 			__free_page(spd.pages[i]);
6709 			break;
6710 		}
6711 		spd.partial[i].offset = 0;
6712 		spd.partial[i].len = trace_seq_used(&iter->seq);
6713 
6714 		trace_seq_init(&iter->seq);
6715 	}
6716 
6717 	trace_access_unlock(iter->cpu_file);
6718 	trace_event_read_unlock();
6719 	mutex_unlock(&iter->mutex);
6720 
6721 	spd.nr_pages = i;
6722 
6723 	if (i)
6724 		ret = splice_to_pipe(pipe, &spd);
6725 	else
6726 		ret = 0;
6727 out:
6728 	splice_shrink_spd(&spd);
6729 	return ret;
6730 
6731 out_err:
6732 	mutex_unlock(&iter->mutex);
6733 	goto out;
6734 }
6735 
6736 static ssize_t
6737 tracing_entries_read(struct file *filp, char __user *ubuf,
6738 		     size_t cnt, loff_t *ppos)
6739 {
6740 	struct inode *inode = file_inode(filp);
6741 	struct trace_array *tr = inode->i_private;
6742 	int cpu = tracing_get_cpu(inode);
6743 	char buf[64];
6744 	int r = 0;
6745 	ssize_t ret;
6746 
6747 	mutex_lock(&trace_types_lock);
6748 
6749 	if (cpu == RING_BUFFER_ALL_CPUS) {
6750 		int cpu, buf_size_same;
6751 		unsigned long size;
6752 
6753 		size = 0;
6754 		buf_size_same = 1;
6755 		/* check if all cpu sizes are same */
6756 		for_each_tracing_cpu(cpu) {
6757 			/* fill in the size from first enabled cpu */
6758 			if (size == 0)
6759 				size = per_cpu_ptr(tr->array_buffer.data, cpu)->entries;
6760 			if (size != per_cpu_ptr(tr->array_buffer.data, cpu)->entries) {
6761 				buf_size_same = 0;
6762 				break;
6763 			}
6764 		}
6765 
6766 		if (buf_size_same) {
6767 			if (!tr->ring_buffer_expanded)
6768 				r = sprintf(buf, "%lu (expanded: %lu)\n",
6769 					    size >> 10,
6770 					    trace_buf_size >> 10);
6771 			else
6772 				r = sprintf(buf, "%lu\n", size >> 10);
6773 		} else
6774 			r = sprintf(buf, "X\n");
6775 	} else
6776 		r = sprintf(buf, "%lu\n", per_cpu_ptr(tr->array_buffer.data, cpu)->entries >> 10);
6777 
6778 	mutex_unlock(&trace_types_lock);
6779 
6780 	ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6781 	return ret;
6782 }
6783 
6784 static ssize_t
6785 tracing_entries_write(struct file *filp, const char __user *ubuf,
6786 		      size_t cnt, loff_t *ppos)
6787 {
6788 	struct inode *inode = file_inode(filp);
6789 	struct trace_array *tr = inode->i_private;
6790 	unsigned long val;
6791 	int ret;
6792 
6793 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
6794 	if (ret)
6795 		return ret;
6796 
6797 	/* must have at least 1 entry */
6798 	if (!val)
6799 		return -EINVAL;
6800 
6801 	/* value is in KB */
6802 	val <<= 10;
6803 	ret = tracing_resize_ring_buffer(tr, val, tracing_get_cpu(inode));
6804 	if (ret < 0)
6805 		return ret;
6806 
6807 	*ppos += cnt;
6808 
6809 	return cnt;
6810 }
6811 
6812 static ssize_t
6813 tracing_total_entries_read(struct file *filp, char __user *ubuf,
6814 				size_t cnt, loff_t *ppos)
6815 {
6816 	struct trace_array *tr = filp->private_data;
6817 	char buf[64];
6818 	int r, cpu;
6819 	unsigned long size = 0, expanded_size = 0;
6820 
6821 	mutex_lock(&trace_types_lock);
6822 	for_each_tracing_cpu(cpu) {
6823 		size += per_cpu_ptr(tr->array_buffer.data, cpu)->entries >> 10;
6824 		if (!tr->ring_buffer_expanded)
6825 			expanded_size += trace_buf_size >> 10;
6826 	}
6827 	if (tr->ring_buffer_expanded)
6828 		r = sprintf(buf, "%lu\n", size);
6829 	else
6830 		r = sprintf(buf, "%lu (expanded: %lu)\n", size, expanded_size);
6831 	mutex_unlock(&trace_types_lock);
6832 
6833 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6834 }
6835 
6836 static ssize_t
6837 tracing_free_buffer_write(struct file *filp, const char __user *ubuf,
6838 			  size_t cnt, loff_t *ppos)
6839 {
6840 	/*
6841 	 * There is no need to read what the user has written, this function
6842 	 * is just to make sure that there is no error when "echo" is used
6843 	 */
6844 
6845 	*ppos += cnt;
6846 
6847 	return cnt;
6848 }
6849 
6850 static int
6851 tracing_free_buffer_release(struct inode *inode, struct file *filp)
6852 {
6853 	struct trace_array *tr = inode->i_private;
6854 
6855 	/* disable tracing ? */
6856 	if (tr->trace_flags & TRACE_ITER_STOP_ON_FREE)
6857 		tracer_tracing_off(tr);
6858 	/* resize the ring buffer to 0 */
6859 	tracing_resize_ring_buffer(tr, 0, RING_BUFFER_ALL_CPUS);
6860 
6861 	trace_array_put(tr);
6862 
6863 	return 0;
6864 }
6865 
6866 #define TRACE_MARKER_MAX_SIZE		4096
6867 
6868 static ssize_t
6869 tracing_mark_write(struct file *filp, const char __user *ubuf,
6870 					size_t cnt, loff_t *fpos)
6871 {
6872 	struct trace_array *tr = filp->private_data;
6873 	struct ring_buffer_event *event;
6874 	enum event_trigger_type tt = ETT_NONE;
6875 	struct trace_buffer *buffer;
6876 	struct print_entry *entry;
6877 	int meta_size;
6878 	ssize_t written;
6879 	size_t size;
6880 	int len;
6881 
6882 /* Used in tracing_mark_raw_write() as well */
6883 #define FAULTED_STR "<faulted>"
6884 #define FAULTED_SIZE (sizeof(FAULTED_STR) - 1) /* '\0' is already accounted for */
6885 
6886 	if (tracing_disabled)
6887 		return -EINVAL;
6888 
6889 	if (!(tr->trace_flags & TRACE_ITER_MARKERS))
6890 		return -EINVAL;
6891 
6892 	if ((ssize_t)cnt < 0)
6893 		return -EINVAL;
6894 
6895 	if (cnt > TRACE_MARKER_MAX_SIZE)
6896 		cnt = TRACE_MARKER_MAX_SIZE;
6897 
6898 	meta_size = sizeof(*entry) + 2;  /* add '\0' and possible '\n' */
6899  again:
6900 	size = cnt + meta_size;
6901 
6902 	/* If less than "<faulted>", then make sure we can still add that */
6903 	if (cnt < FAULTED_SIZE)
6904 		size += FAULTED_SIZE - cnt;
6905 
6906 	buffer = tr->array_buffer.buffer;
6907 	event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
6908 					    tracing_gen_ctx());
6909 	if (unlikely(!event)) {
6910 		/*
6911 		 * If the size was greater than what was allowed, then
6912 		 * make it smaller and try again.
6913 		 */
6914 		if (size > ring_buffer_max_event_size(buffer)) {
6915 			/* cnt < FAULTED size should never be bigger than max */
6916 			if (WARN_ON_ONCE(cnt < FAULTED_SIZE))
6917 				return -EBADF;
6918 			cnt = ring_buffer_max_event_size(buffer) - meta_size;
6919 			/* The above should only happen once */
6920 			if (WARN_ON_ONCE(cnt + meta_size == size))
6921 				return -EBADF;
6922 			goto again;
6923 		}
6924 
6925 		/* Ring buffer disabled, return as if not open for write */
6926 		return -EBADF;
6927 	}
6928 
6929 	entry = ring_buffer_event_data(event);
6930 	entry->ip = _THIS_IP_;
6931 
6932 	len = __copy_from_user_inatomic(&entry->buf, ubuf, cnt);
6933 	if (len) {
6934 		memcpy(&entry->buf, FAULTED_STR, FAULTED_SIZE);
6935 		cnt = FAULTED_SIZE;
6936 		written = -EFAULT;
6937 	} else
6938 		written = cnt;
6939 
6940 	if (tr->trace_marker_file && !list_empty(&tr->trace_marker_file->triggers)) {
6941 		/* do not add \n before testing triggers, but add \0 */
6942 		entry->buf[cnt] = '\0';
6943 		tt = event_triggers_call(tr->trace_marker_file, buffer, entry, event);
6944 	}
6945 
6946 	if (entry->buf[cnt - 1] != '\n') {
6947 		entry->buf[cnt] = '\n';
6948 		entry->buf[cnt + 1] = '\0';
6949 	} else
6950 		entry->buf[cnt] = '\0';
6951 
6952 	if (static_branch_unlikely(&trace_marker_exports_enabled))
6953 		ftrace_exports(event, TRACE_EXPORT_MARKER);
6954 	__buffer_unlock_commit(buffer, event);
6955 
6956 	if (tt)
6957 		event_triggers_post_call(tr->trace_marker_file, tt);
6958 
6959 	return written;
6960 }
6961 
6962 static ssize_t
6963 tracing_mark_raw_write(struct file *filp, const char __user *ubuf,
6964 					size_t cnt, loff_t *fpos)
6965 {
6966 	struct trace_array *tr = filp->private_data;
6967 	struct ring_buffer_event *event;
6968 	struct trace_buffer *buffer;
6969 	struct raw_data_entry *entry;
6970 	ssize_t written;
6971 	int size;
6972 	int len;
6973 
6974 #define FAULT_SIZE_ID (FAULTED_SIZE + sizeof(int))
6975 
6976 	if (tracing_disabled)
6977 		return -EINVAL;
6978 
6979 	if (!(tr->trace_flags & TRACE_ITER_MARKERS))
6980 		return -EINVAL;
6981 
6982 	/* The marker must at least have a tag id */
6983 	if (cnt < sizeof(unsigned int))
6984 		return -EINVAL;
6985 
6986 	size = sizeof(*entry) + cnt;
6987 	if (cnt < FAULT_SIZE_ID)
6988 		size += FAULT_SIZE_ID - cnt;
6989 
6990 	buffer = tr->array_buffer.buffer;
6991 
6992 	if (size > ring_buffer_max_event_size(buffer))
6993 		return -EINVAL;
6994 
6995 	event = __trace_buffer_lock_reserve(buffer, TRACE_RAW_DATA, size,
6996 					    tracing_gen_ctx());
6997 	if (!event)
6998 		/* Ring buffer disabled, return as if not open for write */
6999 		return -EBADF;
7000 
7001 	entry = ring_buffer_event_data(event);
7002 
7003 	len = __copy_from_user_inatomic(&entry->id, ubuf, cnt);
7004 	if (len) {
7005 		entry->id = -1;
7006 		memcpy(&entry->buf, FAULTED_STR, FAULTED_SIZE);
7007 		written = -EFAULT;
7008 	} else
7009 		written = cnt;
7010 
7011 	__buffer_unlock_commit(buffer, event);
7012 
7013 	return written;
7014 }
7015 
7016 static int tracing_clock_show(struct seq_file *m, void *v)
7017 {
7018 	struct trace_array *tr = m->private;
7019 	int i;
7020 
7021 	for (i = 0; i < ARRAY_SIZE(trace_clocks); i++)
7022 		seq_printf(m,
7023 			"%s%s%s%s", i ? " " : "",
7024 			i == tr->clock_id ? "[" : "", trace_clocks[i].name,
7025 			i == tr->clock_id ? "]" : "");
7026 	seq_putc(m, '\n');
7027 
7028 	return 0;
7029 }
7030 
7031 int tracing_set_clock(struct trace_array *tr, const char *clockstr)
7032 {
7033 	int i;
7034 
7035 	for (i = 0; i < ARRAY_SIZE(trace_clocks); i++) {
7036 		if (strcmp(trace_clocks[i].name, clockstr) == 0)
7037 			break;
7038 	}
7039 	if (i == ARRAY_SIZE(trace_clocks))
7040 		return -EINVAL;
7041 
7042 	mutex_lock(&trace_types_lock);
7043 
7044 	tr->clock_id = i;
7045 
7046 	ring_buffer_set_clock(tr->array_buffer.buffer, trace_clocks[i].func);
7047 
7048 	/*
7049 	 * New clock may not be consistent with the previous clock.
7050 	 * Reset the buffer so that it doesn't have incomparable timestamps.
7051 	 */
7052 	tracing_reset_online_cpus(&tr->array_buffer);
7053 
7054 #ifdef CONFIG_TRACER_MAX_TRACE
7055 	if (tr->max_buffer.buffer)
7056 		ring_buffer_set_clock(tr->max_buffer.buffer, trace_clocks[i].func);
7057 	tracing_reset_online_cpus(&tr->max_buffer);
7058 #endif
7059 
7060 	mutex_unlock(&trace_types_lock);
7061 
7062 	return 0;
7063 }
7064 
7065 static ssize_t tracing_clock_write(struct file *filp, const char __user *ubuf,
7066 				   size_t cnt, loff_t *fpos)
7067 {
7068 	struct seq_file *m = filp->private_data;
7069 	struct trace_array *tr = m->private;
7070 	char buf[64];
7071 	const char *clockstr;
7072 	int ret;
7073 
7074 	if (cnt >= sizeof(buf))
7075 		return -EINVAL;
7076 
7077 	if (copy_from_user(buf, ubuf, cnt))
7078 		return -EFAULT;
7079 
7080 	buf[cnt] = 0;
7081 
7082 	clockstr = strstrip(buf);
7083 
7084 	ret = tracing_set_clock(tr, clockstr);
7085 	if (ret)
7086 		return ret;
7087 
7088 	*fpos += cnt;
7089 
7090 	return cnt;
7091 }
7092 
7093 static int tracing_clock_open(struct inode *inode, struct file *file)
7094 {
7095 	struct trace_array *tr = inode->i_private;
7096 	int ret;
7097 
7098 	ret = tracing_check_open_get_tr(tr);
7099 	if (ret)
7100 		return ret;
7101 
7102 	ret = single_open(file, tracing_clock_show, inode->i_private);
7103 	if (ret < 0)
7104 		trace_array_put(tr);
7105 
7106 	return ret;
7107 }
7108 
7109 static int tracing_time_stamp_mode_show(struct seq_file *m, void *v)
7110 {
7111 	struct trace_array *tr = m->private;
7112 
7113 	mutex_lock(&trace_types_lock);
7114 
7115 	if (ring_buffer_time_stamp_abs(tr->array_buffer.buffer))
7116 		seq_puts(m, "delta [absolute]\n");
7117 	else
7118 		seq_puts(m, "[delta] absolute\n");
7119 
7120 	mutex_unlock(&trace_types_lock);
7121 
7122 	return 0;
7123 }
7124 
7125 static int tracing_time_stamp_mode_open(struct inode *inode, struct file *file)
7126 {
7127 	struct trace_array *tr = inode->i_private;
7128 	int ret;
7129 
7130 	ret = tracing_check_open_get_tr(tr);
7131 	if (ret)
7132 		return ret;
7133 
7134 	ret = single_open(file, tracing_time_stamp_mode_show, inode->i_private);
7135 	if (ret < 0)
7136 		trace_array_put(tr);
7137 
7138 	return ret;
7139 }
7140 
7141 u64 tracing_event_time_stamp(struct trace_buffer *buffer, struct ring_buffer_event *rbe)
7142 {
7143 	if (rbe == this_cpu_read(trace_buffered_event))
7144 		return ring_buffer_time_stamp(buffer);
7145 
7146 	return ring_buffer_event_time_stamp(buffer, rbe);
7147 }
7148 
7149 /*
7150  * Set or disable using the per CPU trace_buffer_event when possible.
7151  */
7152 int tracing_set_filter_buffering(struct trace_array *tr, bool set)
7153 {
7154 	int ret = 0;
7155 
7156 	mutex_lock(&trace_types_lock);
7157 
7158 	if (set && tr->no_filter_buffering_ref++)
7159 		goto out;
7160 
7161 	if (!set) {
7162 		if (WARN_ON_ONCE(!tr->no_filter_buffering_ref)) {
7163 			ret = -EINVAL;
7164 			goto out;
7165 		}
7166 
7167 		--tr->no_filter_buffering_ref;
7168 	}
7169  out:
7170 	mutex_unlock(&trace_types_lock);
7171 
7172 	return ret;
7173 }
7174 
7175 struct ftrace_buffer_info {
7176 	struct trace_iterator	iter;
7177 	void			*spare;
7178 	unsigned int		spare_cpu;
7179 	unsigned int		spare_size;
7180 	unsigned int		read;
7181 };
7182 
7183 #ifdef CONFIG_TRACER_SNAPSHOT
7184 static int tracing_snapshot_open(struct inode *inode, struct file *file)
7185 {
7186 	struct trace_array *tr = inode->i_private;
7187 	struct trace_iterator *iter;
7188 	struct seq_file *m;
7189 	int ret;
7190 
7191 	ret = tracing_check_open_get_tr(tr);
7192 	if (ret)
7193 		return ret;
7194 
7195 	if (file->f_mode & FMODE_READ) {
7196 		iter = __tracing_open(inode, file, true);
7197 		if (IS_ERR(iter))
7198 			ret = PTR_ERR(iter);
7199 	} else {
7200 		/* Writes still need the seq_file to hold the private data */
7201 		ret = -ENOMEM;
7202 		m = kzalloc(sizeof(*m), GFP_KERNEL);
7203 		if (!m)
7204 			goto out;
7205 		iter = kzalloc(sizeof(*iter), GFP_KERNEL);
7206 		if (!iter) {
7207 			kfree(m);
7208 			goto out;
7209 		}
7210 		ret = 0;
7211 
7212 		iter->tr = tr;
7213 		iter->array_buffer = &tr->max_buffer;
7214 		iter->cpu_file = tracing_get_cpu(inode);
7215 		m->private = iter;
7216 		file->private_data = m;
7217 	}
7218 out:
7219 	if (ret < 0)
7220 		trace_array_put(tr);
7221 
7222 	return ret;
7223 }
7224 
7225 static void tracing_swap_cpu_buffer(void *tr)
7226 {
7227 	update_max_tr_single((struct trace_array *)tr, current, smp_processor_id());
7228 }
7229 
7230 static ssize_t
7231 tracing_snapshot_write(struct file *filp, const char __user *ubuf, size_t cnt,
7232 		       loff_t *ppos)
7233 {
7234 	struct seq_file *m = filp->private_data;
7235 	struct trace_iterator *iter = m->private;
7236 	struct trace_array *tr = iter->tr;
7237 	unsigned long val;
7238 	int ret;
7239 
7240 	ret = tracing_update_buffers(tr);
7241 	if (ret < 0)
7242 		return ret;
7243 
7244 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
7245 	if (ret)
7246 		return ret;
7247 
7248 	mutex_lock(&trace_types_lock);
7249 
7250 	if (tr->current_trace->use_max_tr) {
7251 		ret = -EBUSY;
7252 		goto out;
7253 	}
7254 
7255 	local_irq_disable();
7256 	arch_spin_lock(&tr->max_lock);
7257 	if (tr->cond_snapshot)
7258 		ret = -EBUSY;
7259 	arch_spin_unlock(&tr->max_lock);
7260 	local_irq_enable();
7261 	if (ret)
7262 		goto out;
7263 
7264 	switch (val) {
7265 	case 0:
7266 		if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
7267 			ret = -EINVAL;
7268 			break;
7269 		}
7270 		if (tr->allocated_snapshot)
7271 			free_snapshot(tr);
7272 		break;
7273 	case 1:
7274 /* Only allow per-cpu swap if the ring buffer supports it */
7275 #ifndef CONFIG_RING_BUFFER_ALLOW_SWAP
7276 		if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
7277 			ret = -EINVAL;
7278 			break;
7279 		}
7280 #endif
7281 		if (tr->allocated_snapshot)
7282 			ret = resize_buffer_duplicate_size(&tr->max_buffer,
7283 					&tr->array_buffer, iter->cpu_file);
7284 
7285 		ret = tracing_arm_snapshot_locked(tr);
7286 		if (ret)
7287 			break;
7288 
7289 		/* Now, we're going to swap */
7290 		if (iter->cpu_file == RING_BUFFER_ALL_CPUS) {
7291 			local_irq_disable();
7292 			update_max_tr(tr, current, smp_processor_id(), NULL);
7293 			local_irq_enable();
7294 		} else {
7295 			smp_call_function_single(iter->cpu_file, tracing_swap_cpu_buffer,
7296 						 (void *)tr, 1);
7297 		}
7298 		tracing_disarm_snapshot(tr);
7299 		break;
7300 	default:
7301 		if (tr->allocated_snapshot) {
7302 			if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
7303 				tracing_reset_online_cpus(&tr->max_buffer);
7304 			else
7305 				tracing_reset_cpu(&tr->max_buffer, iter->cpu_file);
7306 		}
7307 		break;
7308 	}
7309 
7310 	if (ret >= 0) {
7311 		*ppos += cnt;
7312 		ret = cnt;
7313 	}
7314 out:
7315 	mutex_unlock(&trace_types_lock);
7316 	return ret;
7317 }
7318 
7319 static int tracing_snapshot_release(struct inode *inode, struct file *file)
7320 {
7321 	struct seq_file *m = file->private_data;
7322 	int ret;
7323 
7324 	ret = tracing_release(inode, file);
7325 
7326 	if (file->f_mode & FMODE_READ)
7327 		return ret;
7328 
7329 	/* If write only, the seq_file is just a stub */
7330 	if (m)
7331 		kfree(m->private);
7332 	kfree(m);
7333 
7334 	return 0;
7335 }
7336 
7337 static int tracing_buffers_open(struct inode *inode, struct file *filp);
7338 static ssize_t tracing_buffers_read(struct file *filp, char __user *ubuf,
7339 				    size_t count, loff_t *ppos);
7340 static int tracing_buffers_release(struct inode *inode, struct file *file);
7341 static ssize_t tracing_buffers_splice_read(struct file *file, loff_t *ppos,
7342 		   struct pipe_inode_info *pipe, size_t len, unsigned int flags);
7343 
7344 static int snapshot_raw_open(struct inode *inode, struct file *filp)
7345 {
7346 	struct ftrace_buffer_info *info;
7347 	int ret;
7348 
7349 	/* The following checks for tracefs lockdown */
7350 	ret = tracing_buffers_open(inode, filp);
7351 	if (ret < 0)
7352 		return ret;
7353 
7354 	info = filp->private_data;
7355 
7356 	if (info->iter.trace->use_max_tr) {
7357 		tracing_buffers_release(inode, filp);
7358 		return -EBUSY;
7359 	}
7360 
7361 	info->iter.snapshot = true;
7362 	info->iter.array_buffer = &info->iter.tr->max_buffer;
7363 
7364 	return ret;
7365 }
7366 
7367 #endif /* CONFIG_TRACER_SNAPSHOT */
7368 
7369 
7370 static const struct file_operations tracing_thresh_fops = {
7371 	.open		= tracing_open_generic,
7372 	.read		= tracing_thresh_read,
7373 	.write		= tracing_thresh_write,
7374 	.llseek		= generic_file_llseek,
7375 };
7376 
7377 #ifdef CONFIG_TRACER_MAX_TRACE
7378 static const struct file_operations tracing_max_lat_fops = {
7379 	.open		= tracing_open_generic_tr,
7380 	.read		= tracing_max_lat_read,
7381 	.write		= tracing_max_lat_write,
7382 	.llseek		= generic_file_llseek,
7383 	.release	= tracing_release_generic_tr,
7384 };
7385 #endif
7386 
7387 static const struct file_operations set_tracer_fops = {
7388 	.open		= tracing_open_generic_tr,
7389 	.read		= tracing_set_trace_read,
7390 	.write		= tracing_set_trace_write,
7391 	.llseek		= generic_file_llseek,
7392 	.release	= tracing_release_generic_tr,
7393 };
7394 
7395 static const struct file_operations tracing_pipe_fops = {
7396 	.open		= tracing_open_pipe,
7397 	.poll		= tracing_poll_pipe,
7398 	.read		= tracing_read_pipe,
7399 	.splice_read	= tracing_splice_read_pipe,
7400 	.release	= tracing_release_pipe,
7401 	.llseek		= no_llseek,
7402 };
7403 
7404 static const struct file_operations tracing_entries_fops = {
7405 	.open		= tracing_open_generic_tr,
7406 	.read		= tracing_entries_read,
7407 	.write		= tracing_entries_write,
7408 	.llseek		= generic_file_llseek,
7409 	.release	= tracing_release_generic_tr,
7410 };
7411 
7412 static const struct file_operations tracing_total_entries_fops = {
7413 	.open		= tracing_open_generic_tr,
7414 	.read		= tracing_total_entries_read,
7415 	.llseek		= generic_file_llseek,
7416 	.release	= tracing_release_generic_tr,
7417 };
7418 
7419 static const struct file_operations tracing_free_buffer_fops = {
7420 	.open		= tracing_open_generic_tr,
7421 	.write		= tracing_free_buffer_write,
7422 	.release	= tracing_free_buffer_release,
7423 };
7424 
7425 static const struct file_operations tracing_mark_fops = {
7426 	.open		= tracing_mark_open,
7427 	.write		= tracing_mark_write,
7428 	.release	= tracing_release_generic_tr,
7429 };
7430 
7431 static const struct file_operations tracing_mark_raw_fops = {
7432 	.open		= tracing_mark_open,
7433 	.write		= tracing_mark_raw_write,
7434 	.release	= tracing_release_generic_tr,
7435 };
7436 
7437 static const struct file_operations trace_clock_fops = {
7438 	.open		= tracing_clock_open,
7439 	.read		= seq_read,
7440 	.llseek		= seq_lseek,
7441 	.release	= tracing_single_release_tr,
7442 	.write		= tracing_clock_write,
7443 };
7444 
7445 static const struct file_operations trace_time_stamp_mode_fops = {
7446 	.open		= tracing_time_stamp_mode_open,
7447 	.read		= seq_read,
7448 	.llseek		= seq_lseek,
7449 	.release	= tracing_single_release_tr,
7450 };
7451 
7452 #ifdef CONFIG_TRACER_SNAPSHOT
7453 static const struct file_operations snapshot_fops = {
7454 	.open		= tracing_snapshot_open,
7455 	.read		= seq_read,
7456 	.write		= tracing_snapshot_write,
7457 	.llseek		= tracing_lseek,
7458 	.release	= tracing_snapshot_release,
7459 };
7460 
7461 static const struct file_operations snapshot_raw_fops = {
7462 	.open		= snapshot_raw_open,
7463 	.read		= tracing_buffers_read,
7464 	.release	= tracing_buffers_release,
7465 	.splice_read	= tracing_buffers_splice_read,
7466 	.llseek		= no_llseek,
7467 };
7468 
7469 #endif /* CONFIG_TRACER_SNAPSHOT */
7470 
7471 /*
7472  * trace_min_max_write - Write a u64 value to a trace_min_max_param struct
7473  * @filp: The active open file structure
7474  * @ubuf: The userspace provided buffer to read value into
7475  * @cnt: The maximum number of bytes to read
7476  * @ppos: The current "file" position
7477  *
7478  * This function implements the write interface for a struct trace_min_max_param.
7479  * The filp->private_data must point to a trace_min_max_param structure that
7480  * defines where to write the value, the min and the max acceptable values,
7481  * and a lock to protect the write.
7482  */
7483 static ssize_t
7484 trace_min_max_write(struct file *filp, const char __user *ubuf, size_t cnt, loff_t *ppos)
7485 {
7486 	struct trace_min_max_param *param = filp->private_data;
7487 	u64 val;
7488 	int err;
7489 
7490 	if (!param)
7491 		return -EFAULT;
7492 
7493 	err = kstrtoull_from_user(ubuf, cnt, 10, &val);
7494 	if (err)
7495 		return err;
7496 
7497 	if (param->lock)
7498 		mutex_lock(param->lock);
7499 
7500 	if (param->min && val < *param->min)
7501 		err = -EINVAL;
7502 
7503 	if (param->max && val > *param->max)
7504 		err = -EINVAL;
7505 
7506 	if (!err)
7507 		*param->val = val;
7508 
7509 	if (param->lock)
7510 		mutex_unlock(param->lock);
7511 
7512 	if (err)
7513 		return err;
7514 
7515 	return cnt;
7516 }
7517 
7518 /*
7519  * trace_min_max_read - Read a u64 value from a trace_min_max_param struct
7520  * @filp: The active open file structure
7521  * @ubuf: The userspace provided buffer to read value into
7522  * @cnt: The maximum number of bytes to read
7523  * @ppos: The current "file" position
7524  *
7525  * This function implements the read interface for a struct trace_min_max_param.
7526  * The filp->private_data must point to a trace_min_max_param struct with valid
7527  * data.
7528  */
7529 static ssize_t
7530 trace_min_max_read(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos)
7531 {
7532 	struct trace_min_max_param *param = filp->private_data;
7533 	char buf[U64_STR_SIZE];
7534 	int len;
7535 	u64 val;
7536 
7537 	if (!param)
7538 		return -EFAULT;
7539 
7540 	val = *param->val;
7541 
7542 	if (cnt > sizeof(buf))
7543 		cnt = sizeof(buf);
7544 
7545 	len = snprintf(buf, sizeof(buf), "%llu\n", val);
7546 
7547 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, len);
7548 }
7549 
7550 const struct file_operations trace_min_max_fops = {
7551 	.open		= tracing_open_generic,
7552 	.read		= trace_min_max_read,
7553 	.write		= trace_min_max_write,
7554 };
7555 
7556 #define TRACING_LOG_ERRS_MAX	8
7557 #define TRACING_LOG_LOC_MAX	128
7558 
7559 #define CMD_PREFIX "  Command: "
7560 
7561 struct err_info {
7562 	const char	**errs;	/* ptr to loc-specific array of err strings */
7563 	u8		type;	/* index into errs -> specific err string */
7564 	u16		pos;	/* caret position */
7565 	u64		ts;
7566 };
7567 
7568 struct tracing_log_err {
7569 	struct list_head	list;
7570 	struct err_info		info;
7571 	char			loc[TRACING_LOG_LOC_MAX]; /* err location */
7572 	char			*cmd;                     /* what caused err */
7573 };
7574 
7575 static DEFINE_MUTEX(tracing_err_log_lock);
7576 
7577 static struct tracing_log_err *alloc_tracing_log_err(int len)
7578 {
7579 	struct tracing_log_err *err;
7580 
7581 	err = kzalloc(sizeof(*err), GFP_KERNEL);
7582 	if (!err)
7583 		return ERR_PTR(-ENOMEM);
7584 
7585 	err->cmd = kzalloc(len, GFP_KERNEL);
7586 	if (!err->cmd) {
7587 		kfree(err);
7588 		return ERR_PTR(-ENOMEM);
7589 	}
7590 
7591 	return err;
7592 }
7593 
7594 static void free_tracing_log_err(struct tracing_log_err *err)
7595 {
7596 	kfree(err->cmd);
7597 	kfree(err);
7598 }
7599 
7600 static struct tracing_log_err *get_tracing_log_err(struct trace_array *tr,
7601 						   int len)
7602 {
7603 	struct tracing_log_err *err;
7604 	char *cmd;
7605 
7606 	if (tr->n_err_log_entries < TRACING_LOG_ERRS_MAX) {
7607 		err = alloc_tracing_log_err(len);
7608 		if (PTR_ERR(err) != -ENOMEM)
7609 			tr->n_err_log_entries++;
7610 
7611 		return err;
7612 	}
7613 	cmd = kzalloc(len, GFP_KERNEL);
7614 	if (!cmd)
7615 		return ERR_PTR(-ENOMEM);
7616 	err = list_first_entry(&tr->err_log, struct tracing_log_err, list);
7617 	kfree(err->cmd);
7618 	err->cmd = cmd;
7619 	list_del(&err->list);
7620 
7621 	return err;
7622 }
7623 
7624 /**
7625  * err_pos - find the position of a string within a command for error careting
7626  * @cmd: The tracing command that caused the error
7627  * @str: The string to position the caret at within @cmd
7628  *
7629  * Finds the position of the first occurrence of @str within @cmd.  The
7630  * return value can be passed to tracing_log_err() for caret placement
7631  * within @cmd.
7632  *
7633  * Returns the index within @cmd of the first occurrence of @str or 0
7634  * if @str was not found.
7635  */
7636 unsigned int err_pos(char *cmd, const char *str)
7637 {
7638 	char *found;
7639 
7640 	if (WARN_ON(!strlen(cmd)))
7641 		return 0;
7642 
7643 	found = strstr(cmd, str);
7644 	if (found)
7645 		return found - cmd;
7646 
7647 	return 0;
7648 }
7649 
7650 /**
7651  * tracing_log_err - write an error to the tracing error log
7652  * @tr: The associated trace array for the error (NULL for top level array)
7653  * @loc: A string describing where the error occurred
7654  * @cmd: The tracing command that caused the error
7655  * @errs: The array of loc-specific static error strings
7656  * @type: The index into errs[], which produces the specific static err string
7657  * @pos: The position the caret should be placed in the cmd
7658  *
7659  * Writes an error into tracing/error_log of the form:
7660  *
7661  * <loc>: error: <text>
7662  *   Command: <cmd>
7663  *              ^
7664  *
7665  * tracing/error_log is a small log file containing the last
7666  * TRACING_LOG_ERRS_MAX errors (8).  Memory for errors isn't allocated
7667  * unless there has been a tracing error, and the error log can be
7668  * cleared and have its memory freed by writing the empty string in
7669  * truncation mode to it i.e. echo > tracing/error_log.
7670  *
7671  * NOTE: the @errs array along with the @type param are used to
7672  * produce a static error string - this string is not copied and saved
7673  * when the error is logged - only a pointer to it is saved.  See
7674  * existing callers for examples of how static strings are typically
7675  * defined for use with tracing_log_err().
7676  */
7677 void tracing_log_err(struct trace_array *tr,
7678 		     const char *loc, const char *cmd,
7679 		     const char **errs, u8 type, u16 pos)
7680 {
7681 	struct tracing_log_err *err;
7682 	int len = 0;
7683 
7684 	if (!tr)
7685 		tr = &global_trace;
7686 
7687 	len += sizeof(CMD_PREFIX) + 2 * sizeof("\n") + strlen(cmd) + 1;
7688 
7689 	mutex_lock(&tracing_err_log_lock);
7690 	err = get_tracing_log_err(tr, len);
7691 	if (PTR_ERR(err) == -ENOMEM) {
7692 		mutex_unlock(&tracing_err_log_lock);
7693 		return;
7694 	}
7695 
7696 	snprintf(err->loc, TRACING_LOG_LOC_MAX, "%s: error: ", loc);
7697 	snprintf(err->cmd, len, "\n" CMD_PREFIX "%s\n", cmd);
7698 
7699 	err->info.errs = errs;
7700 	err->info.type = type;
7701 	err->info.pos = pos;
7702 	err->info.ts = local_clock();
7703 
7704 	list_add_tail(&err->list, &tr->err_log);
7705 	mutex_unlock(&tracing_err_log_lock);
7706 }
7707 
7708 static void clear_tracing_err_log(struct trace_array *tr)
7709 {
7710 	struct tracing_log_err *err, *next;
7711 
7712 	mutex_lock(&tracing_err_log_lock);
7713 	list_for_each_entry_safe(err, next, &tr->err_log, list) {
7714 		list_del(&err->list);
7715 		free_tracing_log_err(err);
7716 	}
7717 
7718 	tr->n_err_log_entries = 0;
7719 	mutex_unlock(&tracing_err_log_lock);
7720 }
7721 
7722 static void *tracing_err_log_seq_start(struct seq_file *m, loff_t *pos)
7723 {
7724 	struct trace_array *tr = m->private;
7725 
7726 	mutex_lock(&tracing_err_log_lock);
7727 
7728 	return seq_list_start(&tr->err_log, *pos);
7729 }
7730 
7731 static void *tracing_err_log_seq_next(struct seq_file *m, void *v, loff_t *pos)
7732 {
7733 	struct trace_array *tr = m->private;
7734 
7735 	return seq_list_next(v, &tr->err_log, pos);
7736 }
7737 
7738 static void tracing_err_log_seq_stop(struct seq_file *m, void *v)
7739 {
7740 	mutex_unlock(&tracing_err_log_lock);
7741 }
7742 
7743 static void tracing_err_log_show_pos(struct seq_file *m, u16 pos)
7744 {
7745 	u16 i;
7746 
7747 	for (i = 0; i < sizeof(CMD_PREFIX) - 1; i++)
7748 		seq_putc(m, ' ');
7749 	for (i = 0; i < pos; i++)
7750 		seq_putc(m, ' ');
7751 	seq_puts(m, "^\n");
7752 }
7753 
7754 static int tracing_err_log_seq_show(struct seq_file *m, void *v)
7755 {
7756 	struct tracing_log_err *err = v;
7757 
7758 	if (err) {
7759 		const char *err_text = err->info.errs[err->info.type];
7760 		u64 sec = err->info.ts;
7761 		u32 nsec;
7762 
7763 		nsec = do_div(sec, NSEC_PER_SEC);
7764 		seq_printf(m, "[%5llu.%06u] %s%s", sec, nsec / 1000,
7765 			   err->loc, err_text);
7766 		seq_printf(m, "%s", err->cmd);
7767 		tracing_err_log_show_pos(m, err->info.pos);
7768 	}
7769 
7770 	return 0;
7771 }
7772 
7773 static const struct seq_operations tracing_err_log_seq_ops = {
7774 	.start  = tracing_err_log_seq_start,
7775 	.next   = tracing_err_log_seq_next,
7776 	.stop   = tracing_err_log_seq_stop,
7777 	.show   = tracing_err_log_seq_show
7778 };
7779 
7780 static int tracing_err_log_open(struct inode *inode, struct file *file)
7781 {
7782 	struct trace_array *tr = inode->i_private;
7783 	int ret = 0;
7784 
7785 	ret = tracing_check_open_get_tr(tr);
7786 	if (ret)
7787 		return ret;
7788 
7789 	/* If this file was opened for write, then erase contents */
7790 	if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC))
7791 		clear_tracing_err_log(tr);
7792 
7793 	if (file->f_mode & FMODE_READ) {
7794 		ret = seq_open(file, &tracing_err_log_seq_ops);
7795 		if (!ret) {
7796 			struct seq_file *m = file->private_data;
7797 			m->private = tr;
7798 		} else {
7799 			trace_array_put(tr);
7800 		}
7801 	}
7802 	return ret;
7803 }
7804 
7805 static ssize_t tracing_err_log_write(struct file *file,
7806 				     const char __user *buffer,
7807 				     size_t count, loff_t *ppos)
7808 {
7809 	return count;
7810 }
7811 
7812 static int tracing_err_log_release(struct inode *inode, struct file *file)
7813 {
7814 	struct trace_array *tr = inode->i_private;
7815 
7816 	trace_array_put(tr);
7817 
7818 	if (file->f_mode & FMODE_READ)
7819 		seq_release(inode, file);
7820 
7821 	return 0;
7822 }
7823 
7824 static const struct file_operations tracing_err_log_fops = {
7825 	.open           = tracing_err_log_open,
7826 	.write		= tracing_err_log_write,
7827 	.read           = seq_read,
7828 	.llseek         = tracing_lseek,
7829 	.release        = tracing_err_log_release,
7830 };
7831 
7832 static int tracing_buffers_open(struct inode *inode, struct file *filp)
7833 {
7834 	struct trace_array *tr = inode->i_private;
7835 	struct ftrace_buffer_info *info;
7836 	int ret;
7837 
7838 	ret = tracing_check_open_get_tr(tr);
7839 	if (ret)
7840 		return ret;
7841 
7842 	info = kvzalloc(sizeof(*info), GFP_KERNEL);
7843 	if (!info) {
7844 		trace_array_put(tr);
7845 		return -ENOMEM;
7846 	}
7847 
7848 	mutex_lock(&trace_types_lock);
7849 
7850 	info->iter.tr		= tr;
7851 	info->iter.cpu_file	= tracing_get_cpu(inode);
7852 	info->iter.trace	= tr->current_trace;
7853 	info->iter.array_buffer = &tr->array_buffer;
7854 	info->spare		= NULL;
7855 	/* Force reading ring buffer for first read */
7856 	info->read		= (unsigned int)-1;
7857 
7858 	filp->private_data = info;
7859 
7860 	tr->trace_ref++;
7861 
7862 	mutex_unlock(&trace_types_lock);
7863 
7864 	ret = nonseekable_open(inode, filp);
7865 	if (ret < 0)
7866 		trace_array_put(tr);
7867 
7868 	return ret;
7869 }
7870 
7871 static __poll_t
7872 tracing_buffers_poll(struct file *filp, poll_table *poll_table)
7873 {
7874 	struct ftrace_buffer_info *info = filp->private_data;
7875 	struct trace_iterator *iter = &info->iter;
7876 
7877 	return trace_poll(iter, filp, poll_table);
7878 }
7879 
7880 static ssize_t
7881 tracing_buffers_read(struct file *filp, char __user *ubuf,
7882 		     size_t count, loff_t *ppos)
7883 {
7884 	struct ftrace_buffer_info *info = filp->private_data;
7885 	struct trace_iterator *iter = &info->iter;
7886 	void *trace_data;
7887 	int page_size;
7888 	ssize_t ret = 0;
7889 	ssize_t size;
7890 
7891 	if (!count)
7892 		return 0;
7893 
7894 #ifdef CONFIG_TRACER_MAX_TRACE
7895 	if (iter->snapshot && iter->tr->current_trace->use_max_tr)
7896 		return -EBUSY;
7897 #endif
7898 
7899 	page_size = ring_buffer_subbuf_size_get(iter->array_buffer->buffer);
7900 
7901 	/* Make sure the spare matches the current sub buffer size */
7902 	if (info->spare) {
7903 		if (page_size != info->spare_size) {
7904 			ring_buffer_free_read_page(iter->array_buffer->buffer,
7905 						   info->spare_cpu, info->spare);
7906 			info->spare = NULL;
7907 		}
7908 	}
7909 
7910 	if (!info->spare) {
7911 		info->spare = ring_buffer_alloc_read_page(iter->array_buffer->buffer,
7912 							  iter->cpu_file);
7913 		if (IS_ERR(info->spare)) {
7914 			ret = PTR_ERR(info->spare);
7915 			info->spare = NULL;
7916 		} else {
7917 			info->spare_cpu = iter->cpu_file;
7918 			info->spare_size = page_size;
7919 		}
7920 	}
7921 	if (!info->spare)
7922 		return ret;
7923 
7924 	/* Do we have previous read data to read? */
7925 	if (info->read < page_size)
7926 		goto read;
7927 
7928  again:
7929 	trace_access_lock(iter->cpu_file);
7930 	ret = ring_buffer_read_page(iter->array_buffer->buffer,
7931 				    info->spare,
7932 				    count,
7933 				    iter->cpu_file, 0);
7934 	trace_access_unlock(iter->cpu_file);
7935 
7936 	if (ret < 0) {
7937 		if (trace_empty(iter)) {
7938 			if ((filp->f_flags & O_NONBLOCK))
7939 				return -EAGAIN;
7940 
7941 			ret = wait_on_pipe(iter, 0);
7942 			if (ret)
7943 				return ret;
7944 
7945 			goto again;
7946 		}
7947 		return 0;
7948 	}
7949 
7950 	info->read = 0;
7951  read:
7952 	size = page_size - info->read;
7953 	if (size > count)
7954 		size = count;
7955 	trace_data = ring_buffer_read_page_data(info->spare);
7956 	ret = copy_to_user(ubuf, trace_data + info->read, size);
7957 	if (ret == size)
7958 		return -EFAULT;
7959 
7960 	size -= ret;
7961 
7962 	*ppos += size;
7963 	info->read += size;
7964 
7965 	return size;
7966 }
7967 
7968 static int tracing_buffers_flush(struct file *file, fl_owner_t id)
7969 {
7970 	struct ftrace_buffer_info *info = file->private_data;
7971 	struct trace_iterator *iter = &info->iter;
7972 
7973 	iter->closed = true;
7974 	/* Make sure the waiters see the new wait_index */
7975 	(void)atomic_fetch_inc_release(&iter->wait_index);
7976 
7977 	ring_buffer_wake_waiters(iter->array_buffer->buffer, iter->cpu_file);
7978 
7979 	return 0;
7980 }
7981 
7982 static int tracing_buffers_release(struct inode *inode, struct file *file)
7983 {
7984 	struct ftrace_buffer_info *info = file->private_data;
7985 	struct trace_iterator *iter = &info->iter;
7986 
7987 	mutex_lock(&trace_types_lock);
7988 
7989 	iter->tr->trace_ref--;
7990 
7991 	__trace_array_put(iter->tr);
7992 
7993 	if (info->spare)
7994 		ring_buffer_free_read_page(iter->array_buffer->buffer,
7995 					   info->spare_cpu, info->spare);
7996 	kvfree(info);
7997 
7998 	mutex_unlock(&trace_types_lock);
7999 
8000 	return 0;
8001 }
8002 
8003 struct buffer_ref {
8004 	struct trace_buffer	*buffer;
8005 	void			*page;
8006 	int			cpu;
8007 	refcount_t		refcount;
8008 };
8009 
8010 static void buffer_ref_release(struct buffer_ref *ref)
8011 {
8012 	if (!refcount_dec_and_test(&ref->refcount))
8013 		return;
8014 	ring_buffer_free_read_page(ref->buffer, ref->cpu, ref->page);
8015 	kfree(ref);
8016 }
8017 
8018 static void buffer_pipe_buf_release(struct pipe_inode_info *pipe,
8019 				    struct pipe_buffer *buf)
8020 {
8021 	struct buffer_ref *ref = (struct buffer_ref *)buf->private;
8022 
8023 	buffer_ref_release(ref);
8024 	buf->private = 0;
8025 }
8026 
8027 static bool buffer_pipe_buf_get(struct pipe_inode_info *pipe,
8028 				struct pipe_buffer *buf)
8029 {
8030 	struct buffer_ref *ref = (struct buffer_ref *)buf->private;
8031 
8032 	if (refcount_read(&ref->refcount) > INT_MAX/2)
8033 		return false;
8034 
8035 	refcount_inc(&ref->refcount);
8036 	return true;
8037 }
8038 
8039 /* Pipe buffer operations for a buffer. */
8040 static const struct pipe_buf_operations buffer_pipe_buf_ops = {
8041 	.release		= buffer_pipe_buf_release,
8042 	.get			= buffer_pipe_buf_get,
8043 };
8044 
8045 /*
8046  * Callback from splice_to_pipe(), if we need to release some pages
8047  * at the end of the spd in case we error'ed out in filling the pipe.
8048  */
8049 static void buffer_spd_release(struct splice_pipe_desc *spd, unsigned int i)
8050 {
8051 	struct buffer_ref *ref =
8052 		(struct buffer_ref *)spd->partial[i].private;
8053 
8054 	buffer_ref_release(ref);
8055 	spd->partial[i].private = 0;
8056 }
8057 
8058 static ssize_t
8059 tracing_buffers_splice_read(struct file *file, loff_t *ppos,
8060 			    struct pipe_inode_info *pipe, size_t len,
8061 			    unsigned int flags)
8062 {
8063 	struct ftrace_buffer_info *info = file->private_data;
8064 	struct trace_iterator *iter = &info->iter;
8065 	struct partial_page partial_def[PIPE_DEF_BUFFERS];
8066 	struct page *pages_def[PIPE_DEF_BUFFERS];
8067 	struct splice_pipe_desc spd = {
8068 		.pages		= pages_def,
8069 		.partial	= partial_def,
8070 		.nr_pages_max	= PIPE_DEF_BUFFERS,
8071 		.ops		= &buffer_pipe_buf_ops,
8072 		.spd_release	= buffer_spd_release,
8073 	};
8074 	struct buffer_ref *ref;
8075 	bool woken = false;
8076 	int page_size;
8077 	int entries, i;
8078 	ssize_t ret = 0;
8079 
8080 #ifdef CONFIG_TRACER_MAX_TRACE
8081 	if (iter->snapshot && iter->tr->current_trace->use_max_tr)
8082 		return -EBUSY;
8083 #endif
8084 
8085 	page_size = ring_buffer_subbuf_size_get(iter->array_buffer->buffer);
8086 	if (*ppos & (page_size - 1))
8087 		return -EINVAL;
8088 
8089 	if (len & (page_size - 1)) {
8090 		if (len < page_size)
8091 			return -EINVAL;
8092 		len &= (~(page_size - 1));
8093 	}
8094 
8095 	if (splice_grow_spd(pipe, &spd))
8096 		return -ENOMEM;
8097 
8098  again:
8099 	trace_access_lock(iter->cpu_file);
8100 	entries = ring_buffer_entries_cpu(iter->array_buffer->buffer, iter->cpu_file);
8101 
8102 	for (i = 0; i < spd.nr_pages_max && len && entries; i++, len -= page_size) {
8103 		struct page *page;
8104 		int r;
8105 
8106 		ref = kzalloc(sizeof(*ref), GFP_KERNEL);
8107 		if (!ref) {
8108 			ret = -ENOMEM;
8109 			break;
8110 		}
8111 
8112 		refcount_set(&ref->refcount, 1);
8113 		ref->buffer = iter->array_buffer->buffer;
8114 		ref->page = ring_buffer_alloc_read_page(ref->buffer, iter->cpu_file);
8115 		if (IS_ERR(ref->page)) {
8116 			ret = PTR_ERR(ref->page);
8117 			ref->page = NULL;
8118 			kfree(ref);
8119 			break;
8120 		}
8121 		ref->cpu = iter->cpu_file;
8122 
8123 		r = ring_buffer_read_page(ref->buffer, ref->page,
8124 					  len, iter->cpu_file, 1);
8125 		if (r < 0) {
8126 			ring_buffer_free_read_page(ref->buffer, ref->cpu,
8127 						   ref->page);
8128 			kfree(ref);
8129 			break;
8130 		}
8131 
8132 		page = virt_to_page(ring_buffer_read_page_data(ref->page));
8133 
8134 		spd.pages[i] = page;
8135 		spd.partial[i].len = page_size;
8136 		spd.partial[i].offset = 0;
8137 		spd.partial[i].private = (unsigned long)ref;
8138 		spd.nr_pages++;
8139 		*ppos += page_size;
8140 
8141 		entries = ring_buffer_entries_cpu(iter->array_buffer->buffer, iter->cpu_file);
8142 	}
8143 
8144 	trace_access_unlock(iter->cpu_file);
8145 	spd.nr_pages = i;
8146 
8147 	/* did we read anything? */
8148 	if (!spd.nr_pages) {
8149 
8150 		if (ret)
8151 			goto out;
8152 
8153 		if (woken)
8154 			goto out;
8155 
8156 		ret = -EAGAIN;
8157 		if ((file->f_flags & O_NONBLOCK) || (flags & SPLICE_F_NONBLOCK))
8158 			goto out;
8159 
8160 		ret = wait_on_pipe(iter, iter->snapshot ? 0 : iter->tr->buffer_percent);
8161 		if (ret)
8162 			goto out;
8163 
8164 		/* No need to wait after waking up when tracing is off */
8165 		if (!tracer_tracing_is_on(iter->tr))
8166 			goto out;
8167 
8168 		/* Iterate one more time to collect any new data then exit */
8169 		woken = true;
8170 
8171 		goto again;
8172 	}
8173 
8174 	ret = splice_to_pipe(pipe, &spd);
8175 out:
8176 	splice_shrink_spd(&spd);
8177 
8178 	return ret;
8179 }
8180 
8181 /* An ioctl call with cmd 0 to the ring buffer file will wake up all waiters */
8182 static long tracing_buffers_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
8183 {
8184 	struct ftrace_buffer_info *info = file->private_data;
8185 	struct trace_iterator *iter = &info->iter;
8186 
8187 	if (cmd)
8188 		return -ENOIOCTLCMD;
8189 
8190 	mutex_lock(&trace_types_lock);
8191 
8192 	/* Make sure the waiters see the new wait_index */
8193 	(void)atomic_fetch_inc_release(&iter->wait_index);
8194 
8195 	ring_buffer_wake_waiters(iter->array_buffer->buffer, iter->cpu_file);
8196 
8197 	mutex_unlock(&trace_types_lock);
8198 	return 0;
8199 }
8200 
8201 static const struct file_operations tracing_buffers_fops = {
8202 	.open		= tracing_buffers_open,
8203 	.read		= tracing_buffers_read,
8204 	.poll		= tracing_buffers_poll,
8205 	.release	= tracing_buffers_release,
8206 	.flush		= tracing_buffers_flush,
8207 	.splice_read	= tracing_buffers_splice_read,
8208 	.unlocked_ioctl = tracing_buffers_ioctl,
8209 	.llseek		= no_llseek,
8210 };
8211 
8212 static ssize_t
8213 tracing_stats_read(struct file *filp, char __user *ubuf,
8214 		   size_t count, loff_t *ppos)
8215 {
8216 	struct inode *inode = file_inode(filp);
8217 	struct trace_array *tr = inode->i_private;
8218 	struct array_buffer *trace_buf = &tr->array_buffer;
8219 	int cpu = tracing_get_cpu(inode);
8220 	struct trace_seq *s;
8221 	unsigned long cnt;
8222 	unsigned long long t;
8223 	unsigned long usec_rem;
8224 
8225 	s = kmalloc(sizeof(*s), GFP_KERNEL);
8226 	if (!s)
8227 		return -ENOMEM;
8228 
8229 	trace_seq_init(s);
8230 
8231 	cnt = ring_buffer_entries_cpu(trace_buf->buffer, cpu);
8232 	trace_seq_printf(s, "entries: %ld\n", cnt);
8233 
8234 	cnt = ring_buffer_overrun_cpu(trace_buf->buffer, cpu);
8235 	trace_seq_printf(s, "overrun: %ld\n", cnt);
8236 
8237 	cnt = ring_buffer_commit_overrun_cpu(trace_buf->buffer, cpu);
8238 	trace_seq_printf(s, "commit overrun: %ld\n", cnt);
8239 
8240 	cnt = ring_buffer_bytes_cpu(trace_buf->buffer, cpu);
8241 	trace_seq_printf(s, "bytes: %ld\n", cnt);
8242 
8243 	if (trace_clocks[tr->clock_id].in_ns) {
8244 		/* local or global for trace_clock */
8245 		t = ns2usecs(ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
8246 		usec_rem = do_div(t, USEC_PER_SEC);
8247 		trace_seq_printf(s, "oldest event ts: %5llu.%06lu\n",
8248 								t, usec_rem);
8249 
8250 		t = ns2usecs(ring_buffer_time_stamp(trace_buf->buffer));
8251 		usec_rem = do_div(t, USEC_PER_SEC);
8252 		trace_seq_printf(s, "now ts: %5llu.%06lu\n", t, usec_rem);
8253 	} else {
8254 		/* counter or tsc mode for trace_clock */
8255 		trace_seq_printf(s, "oldest event ts: %llu\n",
8256 				ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
8257 
8258 		trace_seq_printf(s, "now ts: %llu\n",
8259 				ring_buffer_time_stamp(trace_buf->buffer));
8260 	}
8261 
8262 	cnt = ring_buffer_dropped_events_cpu(trace_buf->buffer, cpu);
8263 	trace_seq_printf(s, "dropped events: %ld\n", cnt);
8264 
8265 	cnt = ring_buffer_read_events_cpu(trace_buf->buffer, cpu);
8266 	trace_seq_printf(s, "read events: %ld\n", cnt);
8267 
8268 	count = simple_read_from_buffer(ubuf, count, ppos,
8269 					s->buffer, trace_seq_used(s));
8270 
8271 	kfree(s);
8272 
8273 	return count;
8274 }
8275 
8276 static const struct file_operations tracing_stats_fops = {
8277 	.open		= tracing_open_generic_tr,
8278 	.read		= tracing_stats_read,
8279 	.llseek		= generic_file_llseek,
8280 	.release	= tracing_release_generic_tr,
8281 };
8282 
8283 #ifdef CONFIG_DYNAMIC_FTRACE
8284 
8285 static ssize_t
8286 tracing_read_dyn_info(struct file *filp, char __user *ubuf,
8287 		  size_t cnt, loff_t *ppos)
8288 {
8289 	ssize_t ret;
8290 	char *buf;
8291 	int r;
8292 
8293 	/* 256 should be plenty to hold the amount needed */
8294 	buf = kmalloc(256, GFP_KERNEL);
8295 	if (!buf)
8296 		return -ENOMEM;
8297 
8298 	r = scnprintf(buf, 256, "%ld pages:%ld groups: %ld\n",
8299 		      ftrace_update_tot_cnt,
8300 		      ftrace_number_of_pages,
8301 		      ftrace_number_of_groups);
8302 
8303 	ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
8304 	kfree(buf);
8305 	return ret;
8306 }
8307 
8308 static const struct file_operations tracing_dyn_info_fops = {
8309 	.open		= tracing_open_generic,
8310 	.read		= tracing_read_dyn_info,
8311 	.llseek		= generic_file_llseek,
8312 };
8313 #endif /* CONFIG_DYNAMIC_FTRACE */
8314 
8315 #if defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE)
8316 static void
8317 ftrace_snapshot(unsigned long ip, unsigned long parent_ip,
8318 		struct trace_array *tr, struct ftrace_probe_ops *ops,
8319 		void *data)
8320 {
8321 	tracing_snapshot_instance(tr);
8322 }
8323 
8324 static void
8325 ftrace_count_snapshot(unsigned long ip, unsigned long parent_ip,
8326 		      struct trace_array *tr, struct ftrace_probe_ops *ops,
8327 		      void *data)
8328 {
8329 	struct ftrace_func_mapper *mapper = data;
8330 	long *count = NULL;
8331 
8332 	if (mapper)
8333 		count = (long *)ftrace_func_mapper_find_ip(mapper, ip);
8334 
8335 	if (count) {
8336 
8337 		if (*count <= 0)
8338 			return;
8339 
8340 		(*count)--;
8341 	}
8342 
8343 	tracing_snapshot_instance(tr);
8344 }
8345 
8346 static int
8347 ftrace_snapshot_print(struct seq_file *m, unsigned long ip,
8348 		      struct ftrace_probe_ops *ops, void *data)
8349 {
8350 	struct ftrace_func_mapper *mapper = data;
8351 	long *count = NULL;
8352 
8353 	seq_printf(m, "%ps:", (void *)ip);
8354 
8355 	seq_puts(m, "snapshot");
8356 
8357 	if (mapper)
8358 		count = (long *)ftrace_func_mapper_find_ip(mapper, ip);
8359 
8360 	if (count)
8361 		seq_printf(m, ":count=%ld\n", *count);
8362 	else
8363 		seq_puts(m, ":unlimited\n");
8364 
8365 	return 0;
8366 }
8367 
8368 static int
8369 ftrace_snapshot_init(struct ftrace_probe_ops *ops, struct trace_array *tr,
8370 		     unsigned long ip, void *init_data, void **data)
8371 {
8372 	struct ftrace_func_mapper *mapper = *data;
8373 
8374 	if (!mapper) {
8375 		mapper = allocate_ftrace_func_mapper();
8376 		if (!mapper)
8377 			return -ENOMEM;
8378 		*data = mapper;
8379 	}
8380 
8381 	return ftrace_func_mapper_add_ip(mapper, ip, init_data);
8382 }
8383 
8384 static void
8385 ftrace_snapshot_free(struct ftrace_probe_ops *ops, struct trace_array *tr,
8386 		     unsigned long ip, void *data)
8387 {
8388 	struct ftrace_func_mapper *mapper = data;
8389 
8390 	if (!ip) {
8391 		if (!mapper)
8392 			return;
8393 		free_ftrace_func_mapper(mapper, NULL);
8394 		return;
8395 	}
8396 
8397 	ftrace_func_mapper_remove_ip(mapper, ip);
8398 }
8399 
8400 static struct ftrace_probe_ops snapshot_probe_ops = {
8401 	.func			= ftrace_snapshot,
8402 	.print			= ftrace_snapshot_print,
8403 };
8404 
8405 static struct ftrace_probe_ops snapshot_count_probe_ops = {
8406 	.func			= ftrace_count_snapshot,
8407 	.print			= ftrace_snapshot_print,
8408 	.init			= ftrace_snapshot_init,
8409 	.free			= ftrace_snapshot_free,
8410 };
8411 
8412 static int
8413 ftrace_trace_snapshot_callback(struct trace_array *tr, struct ftrace_hash *hash,
8414 			       char *glob, char *cmd, char *param, int enable)
8415 {
8416 	struct ftrace_probe_ops *ops;
8417 	void *count = (void *)-1;
8418 	char *number;
8419 	int ret;
8420 
8421 	if (!tr)
8422 		return -ENODEV;
8423 
8424 	/* hash funcs only work with set_ftrace_filter */
8425 	if (!enable)
8426 		return -EINVAL;
8427 
8428 	ops = param ? &snapshot_count_probe_ops :  &snapshot_probe_ops;
8429 
8430 	if (glob[0] == '!') {
8431 		ret = unregister_ftrace_function_probe_func(glob+1, tr, ops);
8432 		if (!ret)
8433 			tracing_disarm_snapshot(tr);
8434 
8435 		return ret;
8436 	}
8437 
8438 	if (!param)
8439 		goto out_reg;
8440 
8441 	number = strsep(&param, ":");
8442 
8443 	if (!strlen(number))
8444 		goto out_reg;
8445 
8446 	/*
8447 	 * We use the callback data field (which is a pointer)
8448 	 * as our counter.
8449 	 */
8450 	ret = kstrtoul(number, 0, (unsigned long *)&count);
8451 	if (ret)
8452 		return ret;
8453 
8454  out_reg:
8455 	ret = tracing_arm_snapshot(tr);
8456 	if (ret < 0)
8457 		goto out;
8458 
8459 	ret = register_ftrace_function_probe(glob, tr, ops, count);
8460 	if (ret < 0)
8461 		tracing_disarm_snapshot(tr);
8462  out:
8463 	return ret < 0 ? ret : 0;
8464 }
8465 
8466 static struct ftrace_func_command ftrace_snapshot_cmd = {
8467 	.name			= "snapshot",
8468 	.func			= ftrace_trace_snapshot_callback,
8469 };
8470 
8471 static __init int register_snapshot_cmd(void)
8472 {
8473 	return register_ftrace_command(&ftrace_snapshot_cmd);
8474 }
8475 #else
8476 static inline __init int register_snapshot_cmd(void) { return 0; }
8477 #endif /* defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE) */
8478 
8479 static struct dentry *tracing_get_dentry(struct trace_array *tr)
8480 {
8481 	if (WARN_ON(!tr->dir))
8482 		return ERR_PTR(-ENODEV);
8483 
8484 	/* Top directory uses NULL as the parent */
8485 	if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
8486 		return NULL;
8487 
8488 	/* All sub buffers have a descriptor */
8489 	return tr->dir;
8490 }
8491 
8492 static struct dentry *tracing_dentry_percpu(struct trace_array *tr, int cpu)
8493 {
8494 	struct dentry *d_tracer;
8495 
8496 	if (tr->percpu_dir)
8497 		return tr->percpu_dir;
8498 
8499 	d_tracer = tracing_get_dentry(tr);
8500 	if (IS_ERR(d_tracer))
8501 		return NULL;
8502 
8503 	tr->percpu_dir = tracefs_create_dir("per_cpu", d_tracer);
8504 
8505 	MEM_FAIL(!tr->percpu_dir,
8506 		  "Could not create tracefs directory 'per_cpu/%d'\n", cpu);
8507 
8508 	return tr->percpu_dir;
8509 }
8510 
8511 static struct dentry *
8512 trace_create_cpu_file(const char *name, umode_t mode, struct dentry *parent,
8513 		      void *data, long cpu, const struct file_operations *fops)
8514 {
8515 	struct dentry *ret = trace_create_file(name, mode, parent, data, fops);
8516 
8517 	if (ret) /* See tracing_get_cpu() */
8518 		d_inode(ret)->i_cdev = (void *)(cpu + 1);
8519 	return ret;
8520 }
8521 
8522 static void
8523 tracing_init_tracefs_percpu(struct trace_array *tr, long cpu)
8524 {
8525 	struct dentry *d_percpu = tracing_dentry_percpu(tr, cpu);
8526 	struct dentry *d_cpu;
8527 	char cpu_dir[30]; /* 30 characters should be more than enough */
8528 
8529 	if (!d_percpu)
8530 		return;
8531 
8532 	snprintf(cpu_dir, 30, "cpu%ld", cpu);
8533 	d_cpu = tracefs_create_dir(cpu_dir, d_percpu);
8534 	if (!d_cpu) {
8535 		pr_warn("Could not create tracefs '%s' entry\n", cpu_dir);
8536 		return;
8537 	}
8538 
8539 	/* per cpu trace_pipe */
8540 	trace_create_cpu_file("trace_pipe", TRACE_MODE_READ, d_cpu,
8541 				tr, cpu, &tracing_pipe_fops);
8542 
8543 	/* per cpu trace */
8544 	trace_create_cpu_file("trace", TRACE_MODE_WRITE, d_cpu,
8545 				tr, cpu, &tracing_fops);
8546 
8547 	trace_create_cpu_file("trace_pipe_raw", TRACE_MODE_READ, d_cpu,
8548 				tr, cpu, &tracing_buffers_fops);
8549 
8550 	trace_create_cpu_file("stats", TRACE_MODE_READ, d_cpu,
8551 				tr, cpu, &tracing_stats_fops);
8552 
8553 	trace_create_cpu_file("buffer_size_kb", TRACE_MODE_READ, d_cpu,
8554 				tr, cpu, &tracing_entries_fops);
8555 
8556 #ifdef CONFIG_TRACER_SNAPSHOT
8557 	trace_create_cpu_file("snapshot", TRACE_MODE_WRITE, d_cpu,
8558 				tr, cpu, &snapshot_fops);
8559 
8560 	trace_create_cpu_file("snapshot_raw", TRACE_MODE_READ, d_cpu,
8561 				tr, cpu, &snapshot_raw_fops);
8562 #endif
8563 }
8564 
8565 #ifdef CONFIG_FTRACE_SELFTEST
8566 /* Let selftest have access to static functions in this file */
8567 #include "trace_selftest.c"
8568 #endif
8569 
8570 static ssize_t
8571 trace_options_read(struct file *filp, char __user *ubuf, size_t cnt,
8572 			loff_t *ppos)
8573 {
8574 	struct trace_option_dentry *topt = filp->private_data;
8575 	char *buf;
8576 
8577 	if (topt->flags->val & topt->opt->bit)
8578 		buf = "1\n";
8579 	else
8580 		buf = "0\n";
8581 
8582 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
8583 }
8584 
8585 static ssize_t
8586 trace_options_write(struct file *filp, const char __user *ubuf, size_t cnt,
8587 			 loff_t *ppos)
8588 {
8589 	struct trace_option_dentry *topt = filp->private_data;
8590 	unsigned long val;
8591 	int ret;
8592 
8593 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
8594 	if (ret)
8595 		return ret;
8596 
8597 	if (val != 0 && val != 1)
8598 		return -EINVAL;
8599 
8600 	if (!!(topt->flags->val & topt->opt->bit) != val) {
8601 		mutex_lock(&trace_types_lock);
8602 		ret = __set_tracer_option(topt->tr, topt->flags,
8603 					  topt->opt, !val);
8604 		mutex_unlock(&trace_types_lock);
8605 		if (ret)
8606 			return ret;
8607 	}
8608 
8609 	*ppos += cnt;
8610 
8611 	return cnt;
8612 }
8613 
8614 static int tracing_open_options(struct inode *inode, struct file *filp)
8615 {
8616 	struct trace_option_dentry *topt = inode->i_private;
8617 	int ret;
8618 
8619 	ret = tracing_check_open_get_tr(topt->tr);
8620 	if (ret)
8621 		return ret;
8622 
8623 	filp->private_data = inode->i_private;
8624 	return 0;
8625 }
8626 
8627 static int tracing_release_options(struct inode *inode, struct file *file)
8628 {
8629 	struct trace_option_dentry *topt = file->private_data;
8630 
8631 	trace_array_put(topt->tr);
8632 	return 0;
8633 }
8634 
8635 static const struct file_operations trace_options_fops = {
8636 	.open = tracing_open_options,
8637 	.read = trace_options_read,
8638 	.write = trace_options_write,
8639 	.llseek	= generic_file_llseek,
8640 	.release = tracing_release_options,
8641 };
8642 
8643 /*
8644  * In order to pass in both the trace_array descriptor as well as the index
8645  * to the flag that the trace option file represents, the trace_array
8646  * has a character array of trace_flags_index[], which holds the index
8647  * of the bit for the flag it represents. index[0] == 0, index[1] == 1, etc.
8648  * The address of this character array is passed to the flag option file
8649  * read/write callbacks.
8650  *
8651  * In order to extract both the index and the trace_array descriptor,
8652  * get_tr_index() uses the following algorithm.
8653  *
8654  *   idx = *ptr;
8655  *
8656  * As the pointer itself contains the address of the index (remember
8657  * index[1] == 1).
8658  *
8659  * Then to get the trace_array descriptor, by subtracting that index
8660  * from the ptr, we get to the start of the index itself.
8661  *
8662  *   ptr - idx == &index[0]
8663  *
8664  * Then a simple container_of() from that pointer gets us to the
8665  * trace_array descriptor.
8666  */
8667 static void get_tr_index(void *data, struct trace_array **ptr,
8668 			 unsigned int *pindex)
8669 {
8670 	*pindex = *(unsigned char *)data;
8671 
8672 	*ptr = container_of(data - *pindex, struct trace_array,
8673 			    trace_flags_index);
8674 }
8675 
8676 static ssize_t
8677 trace_options_core_read(struct file *filp, char __user *ubuf, size_t cnt,
8678 			loff_t *ppos)
8679 {
8680 	void *tr_index = filp->private_data;
8681 	struct trace_array *tr;
8682 	unsigned int index;
8683 	char *buf;
8684 
8685 	get_tr_index(tr_index, &tr, &index);
8686 
8687 	if (tr->trace_flags & (1 << index))
8688 		buf = "1\n";
8689 	else
8690 		buf = "0\n";
8691 
8692 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
8693 }
8694 
8695 static ssize_t
8696 trace_options_core_write(struct file *filp, const char __user *ubuf, size_t cnt,
8697 			 loff_t *ppos)
8698 {
8699 	void *tr_index = filp->private_data;
8700 	struct trace_array *tr;
8701 	unsigned int index;
8702 	unsigned long val;
8703 	int ret;
8704 
8705 	get_tr_index(tr_index, &tr, &index);
8706 
8707 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
8708 	if (ret)
8709 		return ret;
8710 
8711 	if (val != 0 && val != 1)
8712 		return -EINVAL;
8713 
8714 	mutex_lock(&event_mutex);
8715 	mutex_lock(&trace_types_lock);
8716 	ret = set_tracer_flag(tr, 1 << index, val);
8717 	mutex_unlock(&trace_types_lock);
8718 	mutex_unlock(&event_mutex);
8719 
8720 	if (ret < 0)
8721 		return ret;
8722 
8723 	*ppos += cnt;
8724 
8725 	return cnt;
8726 }
8727 
8728 static const struct file_operations trace_options_core_fops = {
8729 	.open = tracing_open_generic,
8730 	.read = trace_options_core_read,
8731 	.write = trace_options_core_write,
8732 	.llseek = generic_file_llseek,
8733 };
8734 
8735 struct dentry *trace_create_file(const char *name,
8736 				 umode_t mode,
8737 				 struct dentry *parent,
8738 				 void *data,
8739 				 const struct file_operations *fops)
8740 {
8741 	struct dentry *ret;
8742 
8743 	ret = tracefs_create_file(name, mode, parent, data, fops);
8744 	if (!ret)
8745 		pr_warn("Could not create tracefs '%s' entry\n", name);
8746 
8747 	return ret;
8748 }
8749 
8750 
8751 static struct dentry *trace_options_init_dentry(struct trace_array *tr)
8752 {
8753 	struct dentry *d_tracer;
8754 
8755 	if (tr->options)
8756 		return tr->options;
8757 
8758 	d_tracer = tracing_get_dentry(tr);
8759 	if (IS_ERR(d_tracer))
8760 		return NULL;
8761 
8762 	tr->options = tracefs_create_dir("options", d_tracer);
8763 	if (!tr->options) {
8764 		pr_warn("Could not create tracefs directory 'options'\n");
8765 		return NULL;
8766 	}
8767 
8768 	return tr->options;
8769 }
8770 
8771 static void
8772 create_trace_option_file(struct trace_array *tr,
8773 			 struct trace_option_dentry *topt,
8774 			 struct tracer_flags *flags,
8775 			 struct tracer_opt *opt)
8776 {
8777 	struct dentry *t_options;
8778 
8779 	t_options = trace_options_init_dentry(tr);
8780 	if (!t_options)
8781 		return;
8782 
8783 	topt->flags = flags;
8784 	topt->opt = opt;
8785 	topt->tr = tr;
8786 
8787 	topt->entry = trace_create_file(opt->name, TRACE_MODE_WRITE,
8788 					t_options, topt, &trace_options_fops);
8789 
8790 }
8791 
8792 static void
8793 create_trace_option_files(struct trace_array *tr, struct tracer *tracer)
8794 {
8795 	struct trace_option_dentry *topts;
8796 	struct trace_options *tr_topts;
8797 	struct tracer_flags *flags;
8798 	struct tracer_opt *opts;
8799 	int cnt;
8800 	int i;
8801 
8802 	if (!tracer)
8803 		return;
8804 
8805 	flags = tracer->flags;
8806 
8807 	if (!flags || !flags->opts)
8808 		return;
8809 
8810 	/*
8811 	 * If this is an instance, only create flags for tracers
8812 	 * the instance may have.
8813 	 */
8814 	if (!trace_ok_for_array(tracer, tr))
8815 		return;
8816 
8817 	for (i = 0; i < tr->nr_topts; i++) {
8818 		/* Make sure there's no duplicate flags. */
8819 		if (WARN_ON_ONCE(tr->topts[i].tracer->flags == tracer->flags))
8820 			return;
8821 	}
8822 
8823 	opts = flags->opts;
8824 
8825 	for (cnt = 0; opts[cnt].name; cnt++)
8826 		;
8827 
8828 	topts = kcalloc(cnt + 1, sizeof(*topts), GFP_KERNEL);
8829 	if (!topts)
8830 		return;
8831 
8832 	tr_topts = krealloc(tr->topts, sizeof(*tr->topts) * (tr->nr_topts + 1),
8833 			    GFP_KERNEL);
8834 	if (!tr_topts) {
8835 		kfree(topts);
8836 		return;
8837 	}
8838 
8839 	tr->topts = tr_topts;
8840 	tr->topts[tr->nr_topts].tracer = tracer;
8841 	tr->topts[tr->nr_topts].topts = topts;
8842 	tr->nr_topts++;
8843 
8844 	for (cnt = 0; opts[cnt].name; cnt++) {
8845 		create_trace_option_file(tr, &topts[cnt], flags,
8846 					 &opts[cnt]);
8847 		MEM_FAIL(topts[cnt].entry == NULL,
8848 			  "Failed to create trace option: %s",
8849 			  opts[cnt].name);
8850 	}
8851 }
8852 
8853 static struct dentry *
8854 create_trace_option_core_file(struct trace_array *tr,
8855 			      const char *option, long index)
8856 {
8857 	struct dentry *t_options;
8858 
8859 	t_options = trace_options_init_dentry(tr);
8860 	if (!t_options)
8861 		return NULL;
8862 
8863 	return trace_create_file(option, TRACE_MODE_WRITE, t_options,
8864 				 (void *)&tr->trace_flags_index[index],
8865 				 &trace_options_core_fops);
8866 }
8867 
8868 static void create_trace_options_dir(struct trace_array *tr)
8869 {
8870 	struct dentry *t_options;
8871 	bool top_level = tr == &global_trace;
8872 	int i;
8873 
8874 	t_options = trace_options_init_dentry(tr);
8875 	if (!t_options)
8876 		return;
8877 
8878 	for (i = 0; trace_options[i]; i++) {
8879 		if (top_level ||
8880 		    !((1 << i) & TOP_LEVEL_TRACE_FLAGS))
8881 			create_trace_option_core_file(tr, trace_options[i], i);
8882 	}
8883 }
8884 
8885 static ssize_t
8886 rb_simple_read(struct file *filp, char __user *ubuf,
8887 	       size_t cnt, loff_t *ppos)
8888 {
8889 	struct trace_array *tr = filp->private_data;
8890 	char buf[64];
8891 	int r;
8892 
8893 	r = tracer_tracing_is_on(tr);
8894 	r = sprintf(buf, "%d\n", r);
8895 
8896 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
8897 }
8898 
8899 static ssize_t
8900 rb_simple_write(struct file *filp, const char __user *ubuf,
8901 		size_t cnt, loff_t *ppos)
8902 {
8903 	struct trace_array *tr = filp->private_data;
8904 	struct trace_buffer *buffer = tr->array_buffer.buffer;
8905 	unsigned long val;
8906 	int ret;
8907 
8908 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
8909 	if (ret)
8910 		return ret;
8911 
8912 	if (buffer) {
8913 		mutex_lock(&trace_types_lock);
8914 		if (!!val == tracer_tracing_is_on(tr)) {
8915 			val = 0; /* do nothing */
8916 		} else if (val) {
8917 			tracer_tracing_on(tr);
8918 			if (tr->current_trace->start)
8919 				tr->current_trace->start(tr);
8920 		} else {
8921 			tracer_tracing_off(tr);
8922 			if (tr->current_trace->stop)
8923 				tr->current_trace->stop(tr);
8924 			/* Wake up any waiters */
8925 			ring_buffer_wake_waiters(buffer, RING_BUFFER_ALL_CPUS);
8926 		}
8927 		mutex_unlock(&trace_types_lock);
8928 	}
8929 
8930 	(*ppos)++;
8931 
8932 	return cnt;
8933 }
8934 
8935 static const struct file_operations rb_simple_fops = {
8936 	.open		= tracing_open_generic_tr,
8937 	.read		= rb_simple_read,
8938 	.write		= rb_simple_write,
8939 	.release	= tracing_release_generic_tr,
8940 	.llseek		= default_llseek,
8941 };
8942 
8943 static ssize_t
8944 buffer_percent_read(struct file *filp, char __user *ubuf,
8945 		    size_t cnt, loff_t *ppos)
8946 {
8947 	struct trace_array *tr = filp->private_data;
8948 	char buf[64];
8949 	int r;
8950 
8951 	r = tr->buffer_percent;
8952 	r = sprintf(buf, "%d\n", r);
8953 
8954 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
8955 }
8956 
8957 static ssize_t
8958 buffer_percent_write(struct file *filp, const char __user *ubuf,
8959 		     size_t cnt, loff_t *ppos)
8960 {
8961 	struct trace_array *tr = filp->private_data;
8962 	unsigned long val;
8963 	int ret;
8964 
8965 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
8966 	if (ret)
8967 		return ret;
8968 
8969 	if (val > 100)
8970 		return -EINVAL;
8971 
8972 	tr->buffer_percent = val;
8973 
8974 	(*ppos)++;
8975 
8976 	return cnt;
8977 }
8978 
8979 static const struct file_operations buffer_percent_fops = {
8980 	.open		= tracing_open_generic_tr,
8981 	.read		= buffer_percent_read,
8982 	.write		= buffer_percent_write,
8983 	.release	= tracing_release_generic_tr,
8984 	.llseek		= default_llseek,
8985 };
8986 
8987 static ssize_t
8988 buffer_subbuf_size_read(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos)
8989 {
8990 	struct trace_array *tr = filp->private_data;
8991 	size_t size;
8992 	char buf[64];
8993 	int order;
8994 	int r;
8995 
8996 	order = ring_buffer_subbuf_order_get(tr->array_buffer.buffer);
8997 	size = (PAGE_SIZE << order) / 1024;
8998 
8999 	r = sprintf(buf, "%zd\n", size);
9000 
9001 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
9002 }
9003 
9004 static ssize_t
9005 buffer_subbuf_size_write(struct file *filp, const char __user *ubuf,
9006 			 size_t cnt, loff_t *ppos)
9007 {
9008 	struct trace_array *tr = filp->private_data;
9009 	unsigned long val;
9010 	int old_order;
9011 	int order;
9012 	int pages;
9013 	int ret;
9014 
9015 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
9016 	if (ret)
9017 		return ret;
9018 
9019 	val *= 1024; /* value passed in is in KB */
9020 
9021 	pages = DIV_ROUND_UP(val, PAGE_SIZE);
9022 	order = fls(pages - 1);
9023 
9024 	/* limit between 1 and 128 system pages */
9025 	if (order < 0 || order > 7)
9026 		return -EINVAL;
9027 
9028 	/* Do not allow tracing while changing the order of the ring buffer */
9029 	tracing_stop_tr(tr);
9030 
9031 	old_order = ring_buffer_subbuf_order_get(tr->array_buffer.buffer);
9032 	if (old_order == order)
9033 		goto out;
9034 
9035 	ret = ring_buffer_subbuf_order_set(tr->array_buffer.buffer, order);
9036 	if (ret)
9037 		goto out;
9038 
9039 #ifdef CONFIG_TRACER_MAX_TRACE
9040 
9041 	if (!tr->allocated_snapshot)
9042 		goto out_max;
9043 
9044 	ret = ring_buffer_subbuf_order_set(tr->max_buffer.buffer, order);
9045 	if (ret) {
9046 		/* Put back the old order */
9047 		cnt = ring_buffer_subbuf_order_set(tr->array_buffer.buffer, old_order);
9048 		if (WARN_ON_ONCE(cnt)) {
9049 			/*
9050 			 * AARGH! We are left with different orders!
9051 			 * The max buffer is our "snapshot" buffer.
9052 			 * When a tracer needs a snapshot (one of the
9053 			 * latency tracers), it swaps the max buffer
9054 			 * with the saved snap shot. We succeeded to
9055 			 * update the order of the main buffer, but failed to
9056 			 * update the order of the max buffer. But when we tried
9057 			 * to reset the main buffer to the original size, we
9058 			 * failed there too. This is very unlikely to
9059 			 * happen, but if it does, warn and kill all
9060 			 * tracing.
9061 			 */
9062 			tracing_disabled = 1;
9063 		}
9064 		goto out;
9065 	}
9066  out_max:
9067 #endif
9068 	(*ppos)++;
9069  out:
9070 	if (ret)
9071 		cnt = ret;
9072 	tracing_start_tr(tr);
9073 	return cnt;
9074 }
9075 
9076 static const struct file_operations buffer_subbuf_size_fops = {
9077 	.open		= tracing_open_generic_tr,
9078 	.read		= buffer_subbuf_size_read,
9079 	.write		= buffer_subbuf_size_write,
9080 	.release	= tracing_release_generic_tr,
9081 	.llseek		= default_llseek,
9082 };
9083 
9084 static struct dentry *trace_instance_dir;
9085 
9086 static void
9087 init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer);
9088 
9089 static int
9090 allocate_trace_buffer(struct trace_array *tr, struct array_buffer *buf, int size)
9091 {
9092 	enum ring_buffer_flags rb_flags;
9093 
9094 	rb_flags = tr->trace_flags & TRACE_ITER_OVERWRITE ? RB_FL_OVERWRITE : 0;
9095 
9096 	buf->tr = tr;
9097 
9098 	buf->buffer = ring_buffer_alloc(size, rb_flags);
9099 	if (!buf->buffer)
9100 		return -ENOMEM;
9101 
9102 	buf->data = alloc_percpu(struct trace_array_cpu);
9103 	if (!buf->data) {
9104 		ring_buffer_free(buf->buffer);
9105 		buf->buffer = NULL;
9106 		return -ENOMEM;
9107 	}
9108 
9109 	/* Allocate the first page for all buffers */
9110 	set_buffer_entries(&tr->array_buffer,
9111 			   ring_buffer_size(tr->array_buffer.buffer, 0));
9112 
9113 	return 0;
9114 }
9115 
9116 static void free_trace_buffer(struct array_buffer *buf)
9117 {
9118 	if (buf->buffer) {
9119 		ring_buffer_free(buf->buffer);
9120 		buf->buffer = NULL;
9121 		free_percpu(buf->data);
9122 		buf->data = NULL;
9123 	}
9124 }
9125 
9126 static int allocate_trace_buffers(struct trace_array *tr, int size)
9127 {
9128 	int ret;
9129 
9130 	ret = allocate_trace_buffer(tr, &tr->array_buffer, size);
9131 	if (ret)
9132 		return ret;
9133 
9134 #ifdef CONFIG_TRACER_MAX_TRACE
9135 	ret = allocate_trace_buffer(tr, &tr->max_buffer,
9136 				    allocate_snapshot ? size : 1);
9137 	if (MEM_FAIL(ret, "Failed to allocate trace buffer\n")) {
9138 		free_trace_buffer(&tr->array_buffer);
9139 		return -ENOMEM;
9140 	}
9141 	tr->allocated_snapshot = allocate_snapshot;
9142 
9143 	allocate_snapshot = false;
9144 #endif
9145 
9146 	return 0;
9147 }
9148 
9149 static void free_trace_buffers(struct trace_array *tr)
9150 {
9151 	if (!tr)
9152 		return;
9153 
9154 	free_trace_buffer(&tr->array_buffer);
9155 
9156 #ifdef CONFIG_TRACER_MAX_TRACE
9157 	free_trace_buffer(&tr->max_buffer);
9158 #endif
9159 }
9160 
9161 static void init_trace_flags_index(struct trace_array *tr)
9162 {
9163 	int i;
9164 
9165 	/* Used by the trace options files */
9166 	for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++)
9167 		tr->trace_flags_index[i] = i;
9168 }
9169 
9170 static void __update_tracer_options(struct trace_array *tr)
9171 {
9172 	struct tracer *t;
9173 
9174 	for (t = trace_types; t; t = t->next)
9175 		add_tracer_options(tr, t);
9176 }
9177 
9178 static void update_tracer_options(struct trace_array *tr)
9179 {
9180 	mutex_lock(&trace_types_lock);
9181 	tracer_options_updated = true;
9182 	__update_tracer_options(tr);
9183 	mutex_unlock(&trace_types_lock);
9184 }
9185 
9186 /* Must have trace_types_lock held */
9187 struct trace_array *trace_array_find(const char *instance)
9188 {
9189 	struct trace_array *tr, *found = NULL;
9190 
9191 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
9192 		if (tr->name && strcmp(tr->name, instance) == 0) {
9193 			found = tr;
9194 			break;
9195 		}
9196 	}
9197 
9198 	return found;
9199 }
9200 
9201 struct trace_array *trace_array_find_get(const char *instance)
9202 {
9203 	struct trace_array *tr;
9204 
9205 	mutex_lock(&trace_types_lock);
9206 	tr = trace_array_find(instance);
9207 	if (tr)
9208 		tr->ref++;
9209 	mutex_unlock(&trace_types_lock);
9210 
9211 	return tr;
9212 }
9213 
9214 static int trace_array_create_dir(struct trace_array *tr)
9215 {
9216 	int ret;
9217 
9218 	tr->dir = tracefs_create_dir(tr->name, trace_instance_dir);
9219 	if (!tr->dir)
9220 		return -EINVAL;
9221 
9222 	ret = event_trace_add_tracer(tr->dir, tr);
9223 	if (ret) {
9224 		tracefs_remove(tr->dir);
9225 		return ret;
9226 	}
9227 
9228 	init_tracer_tracefs(tr, tr->dir);
9229 	__update_tracer_options(tr);
9230 
9231 	return ret;
9232 }
9233 
9234 static struct trace_array *
9235 trace_array_create_systems(const char *name, const char *systems)
9236 {
9237 	struct trace_array *tr;
9238 	int ret;
9239 
9240 	ret = -ENOMEM;
9241 	tr = kzalloc(sizeof(*tr), GFP_KERNEL);
9242 	if (!tr)
9243 		return ERR_PTR(ret);
9244 
9245 	tr->name = kstrdup(name, GFP_KERNEL);
9246 	if (!tr->name)
9247 		goto out_free_tr;
9248 
9249 	if (!alloc_cpumask_var(&tr->tracing_cpumask, GFP_KERNEL))
9250 		goto out_free_tr;
9251 
9252 	if (!zalloc_cpumask_var(&tr->pipe_cpumask, GFP_KERNEL))
9253 		goto out_free_tr;
9254 
9255 	if (systems) {
9256 		tr->system_names = kstrdup_const(systems, GFP_KERNEL);
9257 		if (!tr->system_names)
9258 			goto out_free_tr;
9259 	}
9260 
9261 	tr->trace_flags = global_trace.trace_flags & ~ZEROED_TRACE_FLAGS;
9262 
9263 	cpumask_copy(tr->tracing_cpumask, cpu_all_mask);
9264 
9265 	raw_spin_lock_init(&tr->start_lock);
9266 
9267 	tr->max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
9268 #ifdef CONFIG_TRACER_MAX_TRACE
9269 	spin_lock_init(&tr->snapshot_trigger_lock);
9270 #endif
9271 	tr->current_trace = &nop_trace;
9272 
9273 	INIT_LIST_HEAD(&tr->systems);
9274 	INIT_LIST_HEAD(&tr->events);
9275 	INIT_LIST_HEAD(&tr->hist_vars);
9276 	INIT_LIST_HEAD(&tr->err_log);
9277 
9278 	if (allocate_trace_buffers(tr, trace_buf_size) < 0)
9279 		goto out_free_tr;
9280 
9281 	/* The ring buffer is defaultly expanded */
9282 	trace_set_ring_buffer_expanded(tr);
9283 
9284 	if (ftrace_allocate_ftrace_ops(tr) < 0)
9285 		goto out_free_tr;
9286 
9287 	ftrace_init_trace_array(tr);
9288 
9289 	init_trace_flags_index(tr);
9290 
9291 	if (trace_instance_dir) {
9292 		ret = trace_array_create_dir(tr);
9293 		if (ret)
9294 			goto out_free_tr;
9295 	} else
9296 		__trace_early_add_events(tr);
9297 
9298 	list_add(&tr->list, &ftrace_trace_arrays);
9299 
9300 	tr->ref++;
9301 
9302 	return tr;
9303 
9304  out_free_tr:
9305 	ftrace_free_ftrace_ops(tr);
9306 	free_trace_buffers(tr);
9307 	free_cpumask_var(tr->pipe_cpumask);
9308 	free_cpumask_var(tr->tracing_cpumask);
9309 	kfree_const(tr->system_names);
9310 	kfree(tr->name);
9311 	kfree(tr);
9312 
9313 	return ERR_PTR(ret);
9314 }
9315 
9316 static struct trace_array *trace_array_create(const char *name)
9317 {
9318 	return trace_array_create_systems(name, NULL);
9319 }
9320 
9321 static int instance_mkdir(const char *name)
9322 {
9323 	struct trace_array *tr;
9324 	int ret;
9325 
9326 	mutex_lock(&event_mutex);
9327 	mutex_lock(&trace_types_lock);
9328 
9329 	ret = -EEXIST;
9330 	if (trace_array_find(name))
9331 		goto out_unlock;
9332 
9333 	tr = trace_array_create(name);
9334 
9335 	ret = PTR_ERR_OR_ZERO(tr);
9336 
9337 out_unlock:
9338 	mutex_unlock(&trace_types_lock);
9339 	mutex_unlock(&event_mutex);
9340 	return ret;
9341 }
9342 
9343 /**
9344  * trace_array_get_by_name - Create/Lookup a trace array, given its name.
9345  * @name: The name of the trace array to be looked up/created.
9346  * @systems: A list of systems to create event directories for (NULL for all)
9347  *
9348  * Returns pointer to trace array with given name.
9349  * NULL, if it cannot be created.
9350  *
9351  * NOTE: This function increments the reference counter associated with the
9352  * trace array returned. This makes sure it cannot be freed while in use.
9353  * Use trace_array_put() once the trace array is no longer needed.
9354  * If the trace_array is to be freed, trace_array_destroy() needs to
9355  * be called after the trace_array_put(), or simply let user space delete
9356  * it from the tracefs instances directory. But until the
9357  * trace_array_put() is called, user space can not delete it.
9358  *
9359  */
9360 struct trace_array *trace_array_get_by_name(const char *name, const char *systems)
9361 {
9362 	struct trace_array *tr;
9363 
9364 	mutex_lock(&event_mutex);
9365 	mutex_lock(&trace_types_lock);
9366 
9367 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
9368 		if (tr->name && strcmp(tr->name, name) == 0)
9369 			goto out_unlock;
9370 	}
9371 
9372 	tr = trace_array_create_systems(name, systems);
9373 
9374 	if (IS_ERR(tr))
9375 		tr = NULL;
9376 out_unlock:
9377 	if (tr)
9378 		tr->ref++;
9379 
9380 	mutex_unlock(&trace_types_lock);
9381 	mutex_unlock(&event_mutex);
9382 	return tr;
9383 }
9384 EXPORT_SYMBOL_GPL(trace_array_get_by_name);
9385 
9386 static int __remove_instance(struct trace_array *tr)
9387 {
9388 	int i;
9389 
9390 	/* Reference counter for a newly created trace array = 1. */
9391 	if (tr->ref > 1 || (tr->current_trace && tr->trace_ref))
9392 		return -EBUSY;
9393 
9394 	list_del(&tr->list);
9395 
9396 	/* Disable all the flags that were enabled coming in */
9397 	for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++) {
9398 		if ((1 << i) & ZEROED_TRACE_FLAGS)
9399 			set_tracer_flag(tr, 1 << i, 0);
9400 	}
9401 
9402 	tracing_set_nop(tr);
9403 	clear_ftrace_function_probes(tr);
9404 	event_trace_del_tracer(tr);
9405 	ftrace_clear_pids(tr);
9406 	ftrace_destroy_function_files(tr);
9407 	tracefs_remove(tr->dir);
9408 	free_percpu(tr->last_func_repeats);
9409 	free_trace_buffers(tr);
9410 	clear_tracing_err_log(tr);
9411 
9412 	for (i = 0; i < tr->nr_topts; i++) {
9413 		kfree(tr->topts[i].topts);
9414 	}
9415 	kfree(tr->topts);
9416 
9417 	free_cpumask_var(tr->pipe_cpumask);
9418 	free_cpumask_var(tr->tracing_cpumask);
9419 	kfree_const(tr->system_names);
9420 	kfree(tr->name);
9421 	kfree(tr);
9422 
9423 	return 0;
9424 }
9425 
9426 int trace_array_destroy(struct trace_array *this_tr)
9427 {
9428 	struct trace_array *tr;
9429 	int ret;
9430 
9431 	if (!this_tr)
9432 		return -EINVAL;
9433 
9434 	mutex_lock(&event_mutex);
9435 	mutex_lock(&trace_types_lock);
9436 
9437 	ret = -ENODEV;
9438 
9439 	/* Making sure trace array exists before destroying it. */
9440 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
9441 		if (tr == this_tr) {
9442 			ret = __remove_instance(tr);
9443 			break;
9444 		}
9445 	}
9446 
9447 	mutex_unlock(&trace_types_lock);
9448 	mutex_unlock(&event_mutex);
9449 
9450 	return ret;
9451 }
9452 EXPORT_SYMBOL_GPL(trace_array_destroy);
9453 
9454 static int instance_rmdir(const char *name)
9455 {
9456 	struct trace_array *tr;
9457 	int ret;
9458 
9459 	mutex_lock(&event_mutex);
9460 	mutex_lock(&trace_types_lock);
9461 
9462 	ret = -ENODEV;
9463 	tr = trace_array_find(name);
9464 	if (tr)
9465 		ret = __remove_instance(tr);
9466 
9467 	mutex_unlock(&trace_types_lock);
9468 	mutex_unlock(&event_mutex);
9469 
9470 	return ret;
9471 }
9472 
9473 static __init void create_trace_instances(struct dentry *d_tracer)
9474 {
9475 	struct trace_array *tr;
9476 
9477 	trace_instance_dir = tracefs_create_instance_dir("instances", d_tracer,
9478 							 instance_mkdir,
9479 							 instance_rmdir);
9480 	if (MEM_FAIL(!trace_instance_dir, "Failed to create instances directory\n"))
9481 		return;
9482 
9483 	mutex_lock(&event_mutex);
9484 	mutex_lock(&trace_types_lock);
9485 
9486 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
9487 		if (!tr->name)
9488 			continue;
9489 		if (MEM_FAIL(trace_array_create_dir(tr) < 0,
9490 			     "Failed to create instance directory\n"))
9491 			break;
9492 	}
9493 
9494 	mutex_unlock(&trace_types_lock);
9495 	mutex_unlock(&event_mutex);
9496 }
9497 
9498 static void
9499 init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer)
9500 {
9501 	int cpu;
9502 
9503 	trace_create_file("available_tracers", TRACE_MODE_READ, d_tracer,
9504 			tr, &show_traces_fops);
9505 
9506 	trace_create_file("current_tracer", TRACE_MODE_WRITE, d_tracer,
9507 			tr, &set_tracer_fops);
9508 
9509 	trace_create_file("tracing_cpumask", TRACE_MODE_WRITE, d_tracer,
9510 			  tr, &tracing_cpumask_fops);
9511 
9512 	trace_create_file("trace_options", TRACE_MODE_WRITE, d_tracer,
9513 			  tr, &tracing_iter_fops);
9514 
9515 	trace_create_file("trace", TRACE_MODE_WRITE, d_tracer,
9516 			  tr, &tracing_fops);
9517 
9518 	trace_create_file("trace_pipe", TRACE_MODE_READ, d_tracer,
9519 			  tr, &tracing_pipe_fops);
9520 
9521 	trace_create_file("buffer_size_kb", TRACE_MODE_WRITE, d_tracer,
9522 			  tr, &tracing_entries_fops);
9523 
9524 	trace_create_file("buffer_total_size_kb", TRACE_MODE_READ, d_tracer,
9525 			  tr, &tracing_total_entries_fops);
9526 
9527 	trace_create_file("free_buffer", 0200, d_tracer,
9528 			  tr, &tracing_free_buffer_fops);
9529 
9530 	trace_create_file("trace_marker", 0220, d_tracer,
9531 			  tr, &tracing_mark_fops);
9532 
9533 	tr->trace_marker_file = __find_event_file(tr, "ftrace", "print");
9534 
9535 	trace_create_file("trace_marker_raw", 0220, d_tracer,
9536 			  tr, &tracing_mark_raw_fops);
9537 
9538 	trace_create_file("trace_clock", TRACE_MODE_WRITE, d_tracer, tr,
9539 			  &trace_clock_fops);
9540 
9541 	trace_create_file("tracing_on", TRACE_MODE_WRITE, d_tracer,
9542 			  tr, &rb_simple_fops);
9543 
9544 	trace_create_file("timestamp_mode", TRACE_MODE_READ, d_tracer, tr,
9545 			  &trace_time_stamp_mode_fops);
9546 
9547 	tr->buffer_percent = 50;
9548 
9549 	trace_create_file("buffer_percent", TRACE_MODE_WRITE, d_tracer,
9550 			tr, &buffer_percent_fops);
9551 
9552 	trace_create_file("buffer_subbuf_size_kb", TRACE_MODE_WRITE, d_tracer,
9553 			  tr, &buffer_subbuf_size_fops);
9554 
9555 	create_trace_options_dir(tr);
9556 
9557 #ifdef CONFIG_TRACER_MAX_TRACE
9558 	trace_create_maxlat_file(tr, d_tracer);
9559 #endif
9560 
9561 	if (ftrace_create_function_files(tr, d_tracer))
9562 		MEM_FAIL(1, "Could not allocate function filter files");
9563 
9564 #ifdef CONFIG_TRACER_SNAPSHOT
9565 	trace_create_file("snapshot", TRACE_MODE_WRITE, d_tracer,
9566 			  tr, &snapshot_fops);
9567 #endif
9568 
9569 	trace_create_file("error_log", TRACE_MODE_WRITE, d_tracer,
9570 			  tr, &tracing_err_log_fops);
9571 
9572 	for_each_tracing_cpu(cpu)
9573 		tracing_init_tracefs_percpu(tr, cpu);
9574 
9575 	ftrace_init_tracefs(tr, d_tracer);
9576 }
9577 
9578 static struct vfsmount *trace_automount(struct dentry *mntpt, void *ingore)
9579 {
9580 	struct vfsmount *mnt;
9581 	struct file_system_type *type;
9582 
9583 	/*
9584 	 * To maintain backward compatibility for tools that mount
9585 	 * debugfs to get to the tracing facility, tracefs is automatically
9586 	 * mounted to the debugfs/tracing directory.
9587 	 */
9588 	type = get_fs_type("tracefs");
9589 	if (!type)
9590 		return NULL;
9591 	mnt = vfs_submount(mntpt, type, "tracefs", NULL);
9592 	put_filesystem(type);
9593 	if (IS_ERR(mnt))
9594 		return NULL;
9595 	mntget(mnt);
9596 
9597 	return mnt;
9598 }
9599 
9600 /**
9601  * tracing_init_dentry - initialize top level trace array
9602  *
9603  * This is called when creating files or directories in the tracing
9604  * directory. It is called via fs_initcall() by any of the boot up code
9605  * and expects to return the dentry of the top level tracing directory.
9606  */
9607 int tracing_init_dentry(void)
9608 {
9609 	struct trace_array *tr = &global_trace;
9610 
9611 	if (security_locked_down(LOCKDOWN_TRACEFS)) {
9612 		pr_warn("Tracing disabled due to lockdown\n");
9613 		return -EPERM;
9614 	}
9615 
9616 	/* The top level trace array uses  NULL as parent */
9617 	if (tr->dir)
9618 		return 0;
9619 
9620 	if (WARN_ON(!tracefs_initialized()))
9621 		return -ENODEV;
9622 
9623 	/*
9624 	 * As there may still be users that expect the tracing
9625 	 * files to exist in debugfs/tracing, we must automount
9626 	 * the tracefs file system there, so older tools still
9627 	 * work with the newer kernel.
9628 	 */
9629 	tr->dir = debugfs_create_automount("tracing", NULL,
9630 					   trace_automount, NULL);
9631 
9632 	return 0;
9633 }
9634 
9635 extern struct trace_eval_map *__start_ftrace_eval_maps[];
9636 extern struct trace_eval_map *__stop_ftrace_eval_maps[];
9637 
9638 static struct workqueue_struct *eval_map_wq __initdata;
9639 static struct work_struct eval_map_work __initdata;
9640 static struct work_struct tracerfs_init_work __initdata;
9641 
9642 static void __init eval_map_work_func(struct work_struct *work)
9643 {
9644 	int len;
9645 
9646 	len = __stop_ftrace_eval_maps - __start_ftrace_eval_maps;
9647 	trace_insert_eval_map(NULL, __start_ftrace_eval_maps, len);
9648 }
9649 
9650 static int __init trace_eval_init(void)
9651 {
9652 	INIT_WORK(&eval_map_work, eval_map_work_func);
9653 
9654 	eval_map_wq = alloc_workqueue("eval_map_wq", WQ_UNBOUND, 0);
9655 	if (!eval_map_wq) {
9656 		pr_err("Unable to allocate eval_map_wq\n");
9657 		/* Do work here */
9658 		eval_map_work_func(&eval_map_work);
9659 		return -ENOMEM;
9660 	}
9661 
9662 	queue_work(eval_map_wq, &eval_map_work);
9663 	return 0;
9664 }
9665 
9666 subsys_initcall(trace_eval_init);
9667 
9668 static int __init trace_eval_sync(void)
9669 {
9670 	/* Make sure the eval map updates are finished */
9671 	if (eval_map_wq)
9672 		destroy_workqueue(eval_map_wq);
9673 	return 0;
9674 }
9675 
9676 late_initcall_sync(trace_eval_sync);
9677 
9678 
9679 #ifdef CONFIG_MODULES
9680 static void trace_module_add_evals(struct module *mod)
9681 {
9682 	if (!mod->num_trace_evals)
9683 		return;
9684 
9685 	/*
9686 	 * Modules with bad taint do not have events created, do
9687 	 * not bother with enums either.
9688 	 */
9689 	if (trace_module_has_bad_taint(mod))
9690 		return;
9691 
9692 	trace_insert_eval_map(mod, mod->trace_evals, mod->num_trace_evals);
9693 }
9694 
9695 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
9696 static void trace_module_remove_evals(struct module *mod)
9697 {
9698 	union trace_eval_map_item *map;
9699 	union trace_eval_map_item **last = &trace_eval_maps;
9700 
9701 	if (!mod->num_trace_evals)
9702 		return;
9703 
9704 	mutex_lock(&trace_eval_mutex);
9705 
9706 	map = trace_eval_maps;
9707 
9708 	while (map) {
9709 		if (map->head.mod == mod)
9710 			break;
9711 		map = trace_eval_jmp_to_tail(map);
9712 		last = &map->tail.next;
9713 		map = map->tail.next;
9714 	}
9715 	if (!map)
9716 		goto out;
9717 
9718 	*last = trace_eval_jmp_to_tail(map)->tail.next;
9719 	kfree(map);
9720  out:
9721 	mutex_unlock(&trace_eval_mutex);
9722 }
9723 #else
9724 static inline void trace_module_remove_evals(struct module *mod) { }
9725 #endif /* CONFIG_TRACE_EVAL_MAP_FILE */
9726 
9727 static int trace_module_notify(struct notifier_block *self,
9728 			       unsigned long val, void *data)
9729 {
9730 	struct module *mod = data;
9731 
9732 	switch (val) {
9733 	case MODULE_STATE_COMING:
9734 		trace_module_add_evals(mod);
9735 		break;
9736 	case MODULE_STATE_GOING:
9737 		trace_module_remove_evals(mod);
9738 		break;
9739 	}
9740 
9741 	return NOTIFY_OK;
9742 }
9743 
9744 static struct notifier_block trace_module_nb = {
9745 	.notifier_call = trace_module_notify,
9746 	.priority = 0,
9747 };
9748 #endif /* CONFIG_MODULES */
9749 
9750 static __init void tracer_init_tracefs_work_func(struct work_struct *work)
9751 {
9752 
9753 	event_trace_init();
9754 
9755 	init_tracer_tracefs(&global_trace, NULL);
9756 	ftrace_init_tracefs_toplevel(&global_trace, NULL);
9757 
9758 	trace_create_file("tracing_thresh", TRACE_MODE_WRITE, NULL,
9759 			&global_trace, &tracing_thresh_fops);
9760 
9761 	trace_create_file("README", TRACE_MODE_READ, NULL,
9762 			NULL, &tracing_readme_fops);
9763 
9764 	trace_create_file("saved_cmdlines", TRACE_MODE_READ, NULL,
9765 			NULL, &tracing_saved_cmdlines_fops);
9766 
9767 	trace_create_file("saved_cmdlines_size", TRACE_MODE_WRITE, NULL,
9768 			  NULL, &tracing_saved_cmdlines_size_fops);
9769 
9770 	trace_create_file("saved_tgids", TRACE_MODE_READ, NULL,
9771 			NULL, &tracing_saved_tgids_fops);
9772 
9773 	trace_create_eval_file(NULL);
9774 
9775 #ifdef CONFIG_MODULES
9776 	register_module_notifier(&trace_module_nb);
9777 #endif
9778 
9779 #ifdef CONFIG_DYNAMIC_FTRACE
9780 	trace_create_file("dyn_ftrace_total_info", TRACE_MODE_READ, NULL,
9781 			NULL, &tracing_dyn_info_fops);
9782 #endif
9783 
9784 	create_trace_instances(NULL);
9785 
9786 	update_tracer_options(&global_trace);
9787 }
9788 
9789 static __init int tracer_init_tracefs(void)
9790 {
9791 	int ret;
9792 
9793 	trace_access_lock_init();
9794 
9795 	ret = tracing_init_dentry();
9796 	if (ret)
9797 		return 0;
9798 
9799 	if (eval_map_wq) {
9800 		INIT_WORK(&tracerfs_init_work, tracer_init_tracefs_work_func);
9801 		queue_work(eval_map_wq, &tracerfs_init_work);
9802 	} else {
9803 		tracer_init_tracefs_work_func(NULL);
9804 	}
9805 
9806 	rv_init_interface();
9807 
9808 	return 0;
9809 }
9810 
9811 fs_initcall(tracer_init_tracefs);
9812 
9813 static int trace_die_panic_handler(struct notifier_block *self,
9814 				unsigned long ev, void *unused);
9815 
9816 static struct notifier_block trace_panic_notifier = {
9817 	.notifier_call = trace_die_panic_handler,
9818 	.priority = INT_MAX - 1,
9819 };
9820 
9821 static struct notifier_block trace_die_notifier = {
9822 	.notifier_call = trace_die_panic_handler,
9823 	.priority = INT_MAX - 1,
9824 };
9825 
9826 /*
9827  * The idea is to execute the following die/panic callback early, in order
9828  * to avoid showing irrelevant information in the trace (like other panic
9829  * notifier functions); we are the 2nd to run, after hung_task/rcu_stall
9830  * warnings get disabled (to prevent potential log flooding).
9831  */
9832 static int trace_die_panic_handler(struct notifier_block *self,
9833 				unsigned long ev, void *unused)
9834 {
9835 	if (!ftrace_dump_on_oops)
9836 		return NOTIFY_DONE;
9837 
9838 	/* The die notifier requires DIE_OOPS to trigger */
9839 	if (self == &trace_die_notifier && ev != DIE_OOPS)
9840 		return NOTIFY_DONE;
9841 
9842 	ftrace_dump(ftrace_dump_on_oops);
9843 
9844 	return NOTIFY_DONE;
9845 }
9846 
9847 /*
9848  * printk is set to max of 1024, we really don't need it that big.
9849  * Nothing should be printing 1000 characters anyway.
9850  */
9851 #define TRACE_MAX_PRINT		1000
9852 
9853 /*
9854  * Define here KERN_TRACE so that we have one place to modify
9855  * it if we decide to change what log level the ftrace dump
9856  * should be at.
9857  */
9858 #define KERN_TRACE		KERN_EMERG
9859 
9860 void
9861 trace_printk_seq(struct trace_seq *s)
9862 {
9863 	/* Probably should print a warning here. */
9864 	if (s->seq.len >= TRACE_MAX_PRINT)
9865 		s->seq.len = TRACE_MAX_PRINT;
9866 
9867 	/*
9868 	 * More paranoid code. Although the buffer size is set to
9869 	 * PAGE_SIZE, and TRACE_MAX_PRINT is 1000, this is just
9870 	 * an extra layer of protection.
9871 	 */
9872 	if (WARN_ON_ONCE(s->seq.len >= s->seq.size))
9873 		s->seq.len = s->seq.size - 1;
9874 
9875 	/* should be zero ended, but we are paranoid. */
9876 	s->buffer[s->seq.len] = 0;
9877 
9878 	printk(KERN_TRACE "%s", s->buffer);
9879 
9880 	trace_seq_init(s);
9881 }
9882 
9883 void trace_init_global_iter(struct trace_iterator *iter)
9884 {
9885 	iter->tr = &global_trace;
9886 	iter->trace = iter->tr->current_trace;
9887 	iter->cpu_file = RING_BUFFER_ALL_CPUS;
9888 	iter->array_buffer = &global_trace.array_buffer;
9889 
9890 	if (iter->trace && iter->trace->open)
9891 		iter->trace->open(iter);
9892 
9893 	/* Annotate start of buffers if we had overruns */
9894 	if (ring_buffer_overruns(iter->array_buffer->buffer))
9895 		iter->iter_flags |= TRACE_FILE_ANNOTATE;
9896 
9897 	/* Output in nanoseconds only if we are using a clock in nanoseconds. */
9898 	if (trace_clocks[iter->tr->clock_id].in_ns)
9899 		iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
9900 
9901 	/* Can not use kmalloc for iter.temp and iter.fmt */
9902 	iter->temp = static_temp_buf;
9903 	iter->temp_size = STATIC_TEMP_BUF_SIZE;
9904 	iter->fmt = static_fmt_buf;
9905 	iter->fmt_size = STATIC_FMT_BUF_SIZE;
9906 }
9907 
9908 void ftrace_dump(enum ftrace_dump_mode oops_dump_mode)
9909 {
9910 	/* use static because iter can be a bit big for the stack */
9911 	static struct trace_iterator iter;
9912 	static atomic_t dump_running;
9913 	struct trace_array *tr = &global_trace;
9914 	unsigned int old_userobj;
9915 	unsigned long flags;
9916 	int cnt = 0, cpu;
9917 
9918 	/* Only allow one dump user at a time. */
9919 	if (atomic_inc_return(&dump_running) != 1) {
9920 		atomic_dec(&dump_running);
9921 		return;
9922 	}
9923 
9924 	/*
9925 	 * Always turn off tracing when we dump.
9926 	 * We don't need to show trace output of what happens
9927 	 * between multiple crashes.
9928 	 *
9929 	 * If the user does a sysrq-z, then they can re-enable
9930 	 * tracing with echo 1 > tracing_on.
9931 	 */
9932 	tracing_off();
9933 
9934 	local_irq_save(flags);
9935 
9936 	/* Simulate the iterator */
9937 	trace_init_global_iter(&iter);
9938 
9939 	for_each_tracing_cpu(cpu) {
9940 		atomic_inc(&per_cpu_ptr(iter.array_buffer->data, cpu)->disabled);
9941 	}
9942 
9943 	old_userobj = tr->trace_flags & TRACE_ITER_SYM_USEROBJ;
9944 
9945 	/* don't look at user memory in panic mode */
9946 	tr->trace_flags &= ~TRACE_ITER_SYM_USEROBJ;
9947 
9948 	switch (oops_dump_mode) {
9949 	case DUMP_ALL:
9950 		iter.cpu_file = RING_BUFFER_ALL_CPUS;
9951 		break;
9952 	case DUMP_ORIG:
9953 		iter.cpu_file = raw_smp_processor_id();
9954 		break;
9955 	case DUMP_NONE:
9956 		goto out_enable;
9957 	default:
9958 		printk(KERN_TRACE "Bad dumping mode, switching to all CPUs dump\n");
9959 		iter.cpu_file = RING_BUFFER_ALL_CPUS;
9960 	}
9961 
9962 	printk(KERN_TRACE "Dumping ftrace buffer:\n");
9963 
9964 	/* Did function tracer already get disabled? */
9965 	if (ftrace_is_dead()) {
9966 		printk("# WARNING: FUNCTION TRACING IS CORRUPTED\n");
9967 		printk("#          MAY BE MISSING FUNCTION EVENTS\n");
9968 	}
9969 
9970 	/*
9971 	 * We need to stop all tracing on all CPUS to read
9972 	 * the next buffer. This is a bit expensive, but is
9973 	 * not done often. We fill all what we can read,
9974 	 * and then release the locks again.
9975 	 */
9976 
9977 	while (!trace_empty(&iter)) {
9978 
9979 		if (!cnt)
9980 			printk(KERN_TRACE "---------------------------------\n");
9981 
9982 		cnt++;
9983 
9984 		trace_iterator_reset(&iter);
9985 		iter.iter_flags |= TRACE_FILE_LAT_FMT;
9986 
9987 		if (trace_find_next_entry_inc(&iter) != NULL) {
9988 			int ret;
9989 
9990 			ret = print_trace_line(&iter);
9991 			if (ret != TRACE_TYPE_NO_CONSUME)
9992 				trace_consume(&iter);
9993 		}
9994 		touch_nmi_watchdog();
9995 
9996 		trace_printk_seq(&iter.seq);
9997 	}
9998 
9999 	if (!cnt)
10000 		printk(KERN_TRACE "   (ftrace buffer empty)\n");
10001 	else
10002 		printk(KERN_TRACE "---------------------------------\n");
10003 
10004  out_enable:
10005 	tr->trace_flags |= old_userobj;
10006 
10007 	for_each_tracing_cpu(cpu) {
10008 		atomic_dec(&per_cpu_ptr(iter.array_buffer->data, cpu)->disabled);
10009 	}
10010 	atomic_dec(&dump_running);
10011 	local_irq_restore(flags);
10012 }
10013 EXPORT_SYMBOL_GPL(ftrace_dump);
10014 
10015 #define WRITE_BUFSIZE  4096
10016 
10017 ssize_t trace_parse_run_command(struct file *file, const char __user *buffer,
10018 				size_t count, loff_t *ppos,
10019 				int (*createfn)(const char *))
10020 {
10021 	char *kbuf, *buf, *tmp;
10022 	int ret = 0;
10023 	size_t done = 0;
10024 	size_t size;
10025 
10026 	kbuf = kmalloc(WRITE_BUFSIZE, GFP_KERNEL);
10027 	if (!kbuf)
10028 		return -ENOMEM;
10029 
10030 	while (done < count) {
10031 		size = count - done;
10032 
10033 		if (size >= WRITE_BUFSIZE)
10034 			size = WRITE_BUFSIZE - 1;
10035 
10036 		if (copy_from_user(kbuf, buffer + done, size)) {
10037 			ret = -EFAULT;
10038 			goto out;
10039 		}
10040 		kbuf[size] = '\0';
10041 		buf = kbuf;
10042 		do {
10043 			tmp = strchr(buf, '\n');
10044 			if (tmp) {
10045 				*tmp = '\0';
10046 				size = tmp - buf + 1;
10047 			} else {
10048 				size = strlen(buf);
10049 				if (done + size < count) {
10050 					if (buf != kbuf)
10051 						break;
10052 					/* This can accept WRITE_BUFSIZE - 2 ('\n' + '\0') */
10053 					pr_warn("Line length is too long: Should be less than %d\n",
10054 						WRITE_BUFSIZE - 2);
10055 					ret = -EINVAL;
10056 					goto out;
10057 				}
10058 			}
10059 			done += size;
10060 
10061 			/* Remove comments */
10062 			tmp = strchr(buf, '#');
10063 
10064 			if (tmp)
10065 				*tmp = '\0';
10066 
10067 			ret = createfn(buf);
10068 			if (ret)
10069 				goto out;
10070 			buf += size;
10071 
10072 		} while (done < count);
10073 	}
10074 	ret = done;
10075 
10076 out:
10077 	kfree(kbuf);
10078 
10079 	return ret;
10080 }
10081 
10082 #ifdef CONFIG_TRACER_MAX_TRACE
10083 __init static bool tr_needs_alloc_snapshot(const char *name)
10084 {
10085 	char *test;
10086 	int len = strlen(name);
10087 	bool ret;
10088 
10089 	if (!boot_snapshot_index)
10090 		return false;
10091 
10092 	if (strncmp(name, boot_snapshot_info, len) == 0 &&
10093 	    boot_snapshot_info[len] == '\t')
10094 		return true;
10095 
10096 	test = kmalloc(strlen(name) + 3, GFP_KERNEL);
10097 	if (!test)
10098 		return false;
10099 
10100 	sprintf(test, "\t%s\t", name);
10101 	ret = strstr(boot_snapshot_info, test) == NULL;
10102 	kfree(test);
10103 	return ret;
10104 }
10105 
10106 __init static void do_allocate_snapshot(const char *name)
10107 {
10108 	if (!tr_needs_alloc_snapshot(name))
10109 		return;
10110 
10111 	/*
10112 	 * When allocate_snapshot is set, the next call to
10113 	 * allocate_trace_buffers() (called by trace_array_get_by_name())
10114 	 * will allocate the snapshot buffer. That will alse clear
10115 	 * this flag.
10116 	 */
10117 	allocate_snapshot = true;
10118 }
10119 #else
10120 static inline void do_allocate_snapshot(const char *name) { }
10121 #endif
10122 
10123 __init static void enable_instances(void)
10124 {
10125 	struct trace_array *tr;
10126 	char *curr_str;
10127 	char *str;
10128 	char *tok;
10129 
10130 	/* A tab is always appended */
10131 	boot_instance_info[boot_instance_index - 1] = '\0';
10132 	str = boot_instance_info;
10133 
10134 	while ((curr_str = strsep(&str, "\t"))) {
10135 
10136 		tok = strsep(&curr_str, ",");
10137 
10138 		if (IS_ENABLED(CONFIG_TRACER_MAX_TRACE))
10139 			do_allocate_snapshot(tok);
10140 
10141 		tr = trace_array_get_by_name(tok, NULL);
10142 		if (!tr) {
10143 			pr_warn("Failed to create instance buffer %s\n", curr_str);
10144 			continue;
10145 		}
10146 		/* Allow user space to delete it */
10147 		trace_array_put(tr);
10148 
10149 		while ((tok = strsep(&curr_str, ","))) {
10150 			early_enable_events(tr, tok, true);
10151 		}
10152 	}
10153 }
10154 
10155 __init static int tracer_alloc_buffers(void)
10156 {
10157 	int ring_buf_size;
10158 	int ret = -ENOMEM;
10159 
10160 
10161 	if (security_locked_down(LOCKDOWN_TRACEFS)) {
10162 		pr_warn("Tracing disabled due to lockdown\n");
10163 		return -EPERM;
10164 	}
10165 
10166 	/*
10167 	 * Make sure we don't accidentally add more trace options
10168 	 * than we have bits for.
10169 	 */
10170 	BUILD_BUG_ON(TRACE_ITER_LAST_BIT > TRACE_FLAGS_MAX_SIZE);
10171 
10172 	if (!alloc_cpumask_var(&tracing_buffer_mask, GFP_KERNEL))
10173 		goto out;
10174 
10175 	if (!alloc_cpumask_var(&global_trace.tracing_cpumask, GFP_KERNEL))
10176 		goto out_free_buffer_mask;
10177 
10178 	/* Only allocate trace_printk buffers if a trace_printk exists */
10179 	if (&__stop___trace_bprintk_fmt != &__start___trace_bprintk_fmt)
10180 		/* Must be called before global_trace.buffer is allocated */
10181 		trace_printk_init_buffers();
10182 
10183 	/* To save memory, keep the ring buffer size to its minimum */
10184 	if (global_trace.ring_buffer_expanded)
10185 		ring_buf_size = trace_buf_size;
10186 	else
10187 		ring_buf_size = 1;
10188 
10189 	cpumask_copy(tracing_buffer_mask, cpu_possible_mask);
10190 	cpumask_copy(global_trace.tracing_cpumask, cpu_all_mask);
10191 
10192 	raw_spin_lock_init(&global_trace.start_lock);
10193 
10194 	/*
10195 	 * The prepare callbacks allocates some memory for the ring buffer. We
10196 	 * don't free the buffer if the CPU goes down. If we were to free
10197 	 * the buffer, then the user would lose any trace that was in the
10198 	 * buffer. The memory will be removed once the "instance" is removed.
10199 	 */
10200 	ret = cpuhp_setup_state_multi(CPUHP_TRACE_RB_PREPARE,
10201 				      "trace/RB:prepare", trace_rb_cpu_prepare,
10202 				      NULL);
10203 	if (ret < 0)
10204 		goto out_free_cpumask;
10205 	/* Used for event triggers */
10206 	ret = -ENOMEM;
10207 	temp_buffer = ring_buffer_alloc(PAGE_SIZE, RB_FL_OVERWRITE);
10208 	if (!temp_buffer)
10209 		goto out_rm_hp_state;
10210 
10211 	if (trace_create_savedcmd() < 0)
10212 		goto out_free_temp_buffer;
10213 
10214 	if (!zalloc_cpumask_var(&global_trace.pipe_cpumask, GFP_KERNEL))
10215 		goto out_free_savedcmd;
10216 
10217 	/* TODO: make the number of buffers hot pluggable with CPUS */
10218 	if (allocate_trace_buffers(&global_trace, ring_buf_size) < 0) {
10219 		MEM_FAIL(1, "tracer: failed to allocate ring buffer!\n");
10220 		goto out_free_pipe_cpumask;
10221 	}
10222 	if (global_trace.buffer_disabled)
10223 		tracing_off();
10224 
10225 	if (trace_boot_clock) {
10226 		ret = tracing_set_clock(&global_trace, trace_boot_clock);
10227 		if (ret < 0)
10228 			pr_warn("Trace clock %s not defined, going back to default\n",
10229 				trace_boot_clock);
10230 	}
10231 
10232 	/*
10233 	 * register_tracer() might reference current_trace, so it
10234 	 * needs to be set before we register anything. This is
10235 	 * just a bootstrap of current_trace anyway.
10236 	 */
10237 	global_trace.current_trace = &nop_trace;
10238 
10239 	global_trace.max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
10240 #ifdef CONFIG_TRACER_MAX_TRACE
10241 	spin_lock_init(&global_trace.snapshot_trigger_lock);
10242 #endif
10243 	ftrace_init_global_array_ops(&global_trace);
10244 
10245 	init_trace_flags_index(&global_trace);
10246 
10247 	register_tracer(&nop_trace);
10248 
10249 	/* Function tracing may start here (via kernel command line) */
10250 	init_function_trace();
10251 
10252 	/* All seems OK, enable tracing */
10253 	tracing_disabled = 0;
10254 
10255 	atomic_notifier_chain_register(&panic_notifier_list,
10256 				       &trace_panic_notifier);
10257 
10258 	register_die_notifier(&trace_die_notifier);
10259 
10260 	global_trace.flags = TRACE_ARRAY_FL_GLOBAL;
10261 
10262 	INIT_LIST_HEAD(&global_trace.systems);
10263 	INIT_LIST_HEAD(&global_trace.events);
10264 	INIT_LIST_HEAD(&global_trace.hist_vars);
10265 	INIT_LIST_HEAD(&global_trace.err_log);
10266 	list_add(&global_trace.list, &ftrace_trace_arrays);
10267 
10268 	apply_trace_boot_options();
10269 
10270 	register_snapshot_cmd();
10271 
10272 	test_can_verify();
10273 
10274 	return 0;
10275 
10276 out_free_pipe_cpumask:
10277 	free_cpumask_var(global_trace.pipe_cpumask);
10278 out_free_savedcmd:
10279 	trace_free_saved_cmdlines_buffer();
10280 out_free_temp_buffer:
10281 	ring_buffer_free(temp_buffer);
10282 out_rm_hp_state:
10283 	cpuhp_remove_multi_state(CPUHP_TRACE_RB_PREPARE);
10284 out_free_cpumask:
10285 	free_cpumask_var(global_trace.tracing_cpumask);
10286 out_free_buffer_mask:
10287 	free_cpumask_var(tracing_buffer_mask);
10288 out:
10289 	return ret;
10290 }
10291 
10292 void __init ftrace_boot_snapshot(void)
10293 {
10294 #ifdef CONFIG_TRACER_MAX_TRACE
10295 	struct trace_array *tr;
10296 
10297 	if (!snapshot_at_boot)
10298 		return;
10299 
10300 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
10301 		if (!tr->allocated_snapshot)
10302 			continue;
10303 
10304 		tracing_snapshot_instance(tr);
10305 		trace_array_puts(tr, "** Boot snapshot taken **\n");
10306 	}
10307 #endif
10308 }
10309 
10310 void __init early_trace_init(void)
10311 {
10312 	if (tracepoint_printk) {
10313 		tracepoint_print_iter =
10314 			kzalloc(sizeof(*tracepoint_print_iter), GFP_KERNEL);
10315 		if (MEM_FAIL(!tracepoint_print_iter,
10316 			     "Failed to allocate trace iterator\n"))
10317 			tracepoint_printk = 0;
10318 		else
10319 			static_key_enable(&tracepoint_printk_key.key);
10320 	}
10321 	tracer_alloc_buffers();
10322 
10323 	init_events();
10324 }
10325 
10326 void __init trace_init(void)
10327 {
10328 	trace_event_init();
10329 
10330 	if (boot_instance_index)
10331 		enable_instances();
10332 }
10333 
10334 __init static void clear_boot_tracer(void)
10335 {
10336 	/*
10337 	 * The default tracer at boot buffer is an init section.
10338 	 * This function is called in lateinit. If we did not
10339 	 * find the boot tracer, then clear it out, to prevent
10340 	 * later registration from accessing the buffer that is
10341 	 * about to be freed.
10342 	 */
10343 	if (!default_bootup_tracer)
10344 		return;
10345 
10346 	printk(KERN_INFO "ftrace bootup tracer '%s' not registered.\n",
10347 	       default_bootup_tracer);
10348 	default_bootup_tracer = NULL;
10349 }
10350 
10351 #ifdef CONFIG_HAVE_UNSTABLE_SCHED_CLOCK
10352 __init static void tracing_set_default_clock(void)
10353 {
10354 	/* sched_clock_stable() is determined in late_initcall */
10355 	if (!trace_boot_clock && !sched_clock_stable()) {
10356 		if (security_locked_down(LOCKDOWN_TRACEFS)) {
10357 			pr_warn("Can not set tracing clock due to lockdown\n");
10358 			return;
10359 		}
10360 
10361 		printk(KERN_WARNING
10362 		       "Unstable clock detected, switching default tracing clock to \"global\"\n"
10363 		       "If you want to keep using the local clock, then add:\n"
10364 		       "  \"trace_clock=local\"\n"
10365 		       "on the kernel command line\n");
10366 		tracing_set_clock(&global_trace, "global");
10367 	}
10368 }
10369 #else
10370 static inline void tracing_set_default_clock(void) { }
10371 #endif
10372 
10373 __init static int late_trace_init(void)
10374 {
10375 	if (tracepoint_printk && tracepoint_printk_stop_on_boot) {
10376 		static_key_disable(&tracepoint_printk_key.key);
10377 		tracepoint_printk = 0;
10378 	}
10379 
10380 	tracing_set_default_clock();
10381 	clear_boot_tracer();
10382 	return 0;
10383 }
10384 
10385 late_initcall_sync(late_trace_init);
10386