xref: /linux/kernel/trace/trace.c (revision 1c9f8dff62d85ce00b0e99f774a84bd783af7cac)
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * ring buffer based function tracer
4  *
5  * Copyright (C) 2007-2012 Steven Rostedt <srostedt@redhat.com>
6  * Copyright (C) 2008 Ingo Molnar <mingo@redhat.com>
7  *
8  * Originally taken from the RT patch by:
9  *    Arnaldo Carvalho de Melo <acme@redhat.com>
10  *
11  * Based on code from the latency_tracer, that is:
12  *  Copyright (C) 2004-2006 Ingo Molnar
13  *  Copyright (C) 2004 Nadia Yvette Chambers
14  */
15 #include <linux/ring_buffer.h>
16 #include <generated/utsrelease.h>
17 #include <linux/stacktrace.h>
18 #include <linux/writeback.h>
19 #include <linux/kallsyms.h>
20 #include <linux/security.h>
21 #include <linux/seq_file.h>
22 #include <linux/irqflags.h>
23 #include <linux/debugfs.h>
24 #include <linux/tracefs.h>
25 #include <linux/pagemap.h>
26 #include <linux/hardirq.h>
27 #include <linux/linkage.h>
28 #include <linux/uaccess.h>
29 #include <linux/vmalloc.h>
30 #include <linux/ftrace.h>
31 #include <linux/module.h>
32 #include <linux/percpu.h>
33 #include <linux/splice.h>
34 #include <linux/kdebug.h>
35 #include <linux/string.h>
36 #include <linux/mount.h>
37 #include <linux/rwsem.h>
38 #include <linux/slab.h>
39 #include <linux/ctype.h>
40 #include <linux/init.h>
41 #include <linux/panic_notifier.h>
42 #include <linux/poll.h>
43 #include <linux/nmi.h>
44 #include <linux/fs.h>
45 #include <linux/trace.h>
46 #include <linux/sched/clock.h>
47 #include <linux/sched/rt.h>
48 #include <linux/fsnotify.h>
49 #include <linux/irq_work.h>
50 #include <linux/workqueue.h>
51 
52 #include <asm/setup.h> /* COMMAND_LINE_SIZE */
53 
54 #include "trace.h"
55 #include "trace_output.h"
56 
57 /*
58  * On boot up, the ring buffer is set to the minimum size, so that
59  * we do not waste memory on systems that are not using tracing.
60  */
61 bool ring_buffer_expanded;
62 
63 #ifdef CONFIG_FTRACE_STARTUP_TEST
64 /*
65  * We need to change this state when a selftest is running.
66  * A selftest will lurk into the ring-buffer to count the
67  * entries inserted during the selftest although some concurrent
68  * insertions into the ring-buffer such as trace_printk could occurred
69  * at the same time, giving false positive or negative results.
70  */
71 static bool __read_mostly tracing_selftest_running;
72 
73 /*
74  * If boot-time tracing including tracers/events via kernel cmdline
75  * is running, we do not want to run SELFTEST.
76  */
77 bool __read_mostly tracing_selftest_disabled;
78 
79 void __init disable_tracing_selftest(const char *reason)
80 {
81 	if (!tracing_selftest_disabled) {
82 		tracing_selftest_disabled = true;
83 		pr_info("Ftrace startup test is disabled due to %s\n", reason);
84 	}
85 }
86 #else
87 #define tracing_selftest_running	0
88 #define tracing_selftest_disabled	0
89 #endif
90 
91 /* Pipe tracepoints to printk */
92 static struct trace_iterator *tracepoint_print_iter;
93 int tracepoint_printk;
94 static bool tracepoint_printk_stop_on_boot __initdata;
95 static DEFINE_STATIC_KEY_FALSE(tracepoint_printk_key);
96 
97 /* For tracers that don't implement custom flags */
98 static struct tracer_opt dummy_tracer_opt[] = {
99 	{ }
100 };
101 
102 static int
103 dummy_set_flag(struct trace_array *tr, u32 old_flags, u32 bit, int set)
104 {
105 	return 0;
106 }
107 
108 /*
109  * To prevent the comm cache from being overwritten when no
110  * tracing is active, only save the comm when a trace event
111  * occurred.
112  */
113 static DEFINE_PER_CPU(bool, trace_taskinfo_save);
114 
115 /*
116  * Kill all tracing for good (never come back).
117  * It is initialized to 1 but will turn to zero if the initialization
118  * of the tracer is successful. But that is the only place that sets
119  * this back to zero.
120  */
121 static int tracing_disabled = 1;
122 
123 cpumask_var_t __read_mostly	tracing_buffer_mask;
124 
125 /*
126  * ftrace_dump_on_oops - variable to dump ftrace buffer on oops
127  *
128  * If there is an oops (or kernel panic) and the ftrace_dump_on_oops
129  * is set, then ftrace_dump is called. This will output the contents
130  * of the ftrace buffers to the console.  This is very useful for
131  * capturing traces that lead to crashes and outputing it to a
132  * serial console.
133  *
134  * It is default off, but you can enable it with either specifying
135  * "ftrace_dump_on_oops" in the kernel command line, or setting
136  * /proc/sys/kernel/ftrace_dump_on_oops
137  * Set 1 if you want to dump buffers of all CPUs
138  * Set 2 if you want to dump the buffer of the CPU that triggered oops
139  */
140 
141 enum ftrace_dump_mode ftrace_dump_on_oops;
142 
143 /* When set, tracing will stop when a WARN*() is hit */
144 int __disable_trace_on_warning;
145 
146 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
147 /* Map of enums to their values, for "eval_map" file */
148 struct trace_eval_map_head {
149 	struct module			*mod;
150 	unsigned long			length;
151 };
152 
153 union trace_eval_map_item;
154 
155 struct trace_eval_map_tail {
156 	/*
157 	 * "end" is first and points to NULL as it must be different
158 	 * than "mod" or "eval_string"
159 	 */
160 	union trace_eval_map_item	*next;
161 	const char			*end;	/* points to NULL */
162 };
163 
164 static DEFINE_MUTEX(trace_eval_mutex);
165 
166 /*
167  * The trace_eval_maps are saved in an array with two extra elements,
168  * one at the beginning, and one at the end. The beginning item contains
169  * the count of the saved maps (head.length), and the module they
170  * belong to if not built in (head.mod). The ending item contains a
171  * pointer to the next array of saved eval_map items.
172  */
173 union trace_eval_map_item {
174 	struct trace_eval_map		map;
175 	struct trace_eval_map_head	head;
176 	struct trace_eval_map_tail	tail;
177 };
178 
179 static union trace_eval_map_item *trace_eval_maps;
180 #endif /* CONFIG_TRACE_EVAL_MAP_FILE */
181 
182 int tracing_set_tracer(struct trace_array *tr, const char *buf);
183 static void ftrace_trace_userstack(struct trace_array *tr,
184 				   struct trace_buffer *buffer,
185 				   unsigned int trace_ctx);
186 
187 #define MAX_TRACER_SIZE		100
188 static char bootup_tracer_buf[MAX_TRACER_SIZE] __initdata;
189 static char *default_bootup_tracer;
190 
191 static bool allocate_snapshot;
192 static bool snapshot_at_boot;
193 
194 static char boot_instance_info[COMMAND_LINE_SIZE] __initdata;
195 static int boot_instance_index;
196 
197 static char boot_snapshot_info[COMMAND_LINE_SIZE] __initdata;
198 static int boot_snapshot_index;
199 
200 static int __init set_cmdline_ftrace(char *str)
201 {
202 	strscpy(bootup_tracer_buf, str, MAX_TRACER_SIZE);
203 	default_bootup_tracer = bootup_tracer_buf;
204 	/* We are using ftrace early, expand it */
205 	ring_buffer_expanded = true;
206 	return 1;
207 }
208 __setup("ftrace=", set_cmdline_ftrace);
209 
210 static int __init set_ftrace_dump_on_oops(char *str)
211 {
212 	if (*str++ != '=' || !*str || !strcmp("1", str)) {
213 		ftrace_dump_on_oops = DUMP_ALL;
214 		return 1;
215 	}
216 
217 	if (!strcmp("orig_cpu", str) || !strcmp("2", str)) {
218 		ftrace_dump_on_oops = DUMP_ORIG;
219                 return 1;
220         }
221 
222         return 0;
223 }
224 __setup("ftrace_dump_on_oops", set_ftrace_dump_on_oops);
225 
226 static int __init stop_trace_on_warning(char *str)
227 {
228 	if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
229 		__disable_trace_on_warning = 1;
230 	return 1;
231 }
232 __setup("traceoff_on_warning", stop_trace_on_warning);
233 
234 static int __init boot_alloc_snapshot(char *str)
235 {
236 	char *slot = boot_snapshot_info + boot_snapshot_index;
237 	int left = sizeof(boot_snapshot_info) - boot_snapshot_index;
238 	int ret;
239 
240 	if (str[0] == '=') {
241 		str++;
242 		if (strlen(str) >= left)
243 			return -1;
244 
245 		ret = snprintf(slot, left, "%s\t", str);
246 		boot_snapshot_index += ret;
247 	} else {
248 		allocate_snapshot = true;
249 		/* We also need the main ring buffer expanded */
250 		ring_buffer_expanded = true;
251 	}
252 	return 1;
253 }
254 __setup("alloc_snapshot", boot_alloc_snapshot);
255 
256 
257 static int __init boot_snapshot(char *str)
258 {
259 	snapshot_at_boot = true;
260 	boot_alloc_snapshot(str);
261 	return 1;
262 }
263 __setup("ftrace_boot_snapshot", boot_snapshot);
264 
265 
266 static int __init boot_instance(char *str)
267 {
268 	char *slot = boot_instance_info + boot_instance_index;
269 	int left = sizeof(boot_instance_info) - boot_instance_index;
270 	int ret;
271 
272 	if (strlen(str) >= left)
273 		return -1;
274 
275 	ret = snprintf(slot, left, "%s\t", str);
276 	boot_instance_index += ret;
277 
278 	return 1;
279 }
280 __setup("trace_instance=", boot_instance);
281 
282 
283 static char trace_boot_options_buf[MAX_TRACER_SIZE] __initdata;
284 
285 static int __init set_trace_boot_options(char *str)
286 {
287 	strscpy(trace_boot_options_buf, str, MAX_TRACER_SIZE);
288 	return 1;
289 }
290 __setup("trace_options=", set_trace_boot_options);
291 
292 static char trace_boot_clock_buf[MAX_TRACER_SIZE] __initdata;
293 static char *trace_boot_clock __initdata;
294 
295 static int __init set_trace_boot_clock(char *str)
296 {
297 	strscpy(trace_boot_clock_buf, str, MAX_TRACER_SIZE);
298 	trace_boot_clock = trace_boot_clock_buf;
299 	return 1;
300 }
301 __setup("trace_clock=", set_trace_boot_clock);
302 
303 static int __init set_tracepoint_printk(char *str)
304 {
305 	/* Ignore the "tp_printk_stop_on_boot" param */
306 	if (*str == '_')
307 		return 0;
308 
309 	if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
310 		tracepoint_printk = 1;
311 	return 1;
312 }
313 __setup("tp_printk", set_tracepoint_printk);
314 
315 static int __init set_tracepoint_printk_stop(char *str)
316 {
317 	tracepoint_printk_stop_on_boot = true;
318 	return 1;
319 }
320 __setup("tp_printk_stop_on_boot", set_tracepoint_printk_stop);
321 
322 unsigned long long ns2usecs(u64 nsec)
323 {
324 	nsec += 500;
325 	do_div(nsec, 1000);
326 	return nsec;
327 }
328 
329 static void
330 trace_process_export(struct trace_export *export,
331 	       struct ring_buffer_event *event, int flag)
332 {
333 	struct trace_entry *entry;
334 	unsigned int size = 0;
335 
336 	if (export->flags & flag) {
337 		entry = ring_buffer_event_data(event);
338 		size = ring_buffer_event_length(event);
339 		export->write(export, entry, size);
340 	}
341 }
342 
343 static DEFINE_MUTEX(ftrace_export_lock);
344 
345 static struct trace_export __rcu *ftrace_exports_list __read_mostly;
346 
347 static DEFINE_STATIC_KEY_FALSE(trace_function_exports_enabled);
348 static DEFINE_STATIC_KEY_FALSE(trace_event_exports_enabled);
349 static DEFINE_STATIC_KEY_FALSE(trace_marker_exports_enabled);
350 
351 static inline void ftrace_exports_enable(struct trace_export *export)
352 {
353 	if (export->flags & TRACE_EXPORT_FUNCTION)
354 		static_branch_inc(&trace_function_exports_enabled);
355 
356 	if (export->flags & TRACE_EXPORT_EVENT)
357 		static_branch_inc(&trace_event_exports_enabled);
358 
359 	if (export->flags & TRACE_EXPORT_MARKER)
360 		static_branch_inc(&trace_marker_exports_enabled);
361 }
362 
363 static inline void ftrace_exports_disable(struct trace_export *export)
364 {
365 	if (export->flags & TRACE_EXPORT_FUNCTION)
366 		static_branch_dec(&trace_function_exports_enabled);
367 
368 	if (export->flags & TRACE_EXPORT_EVENT)
369 		static_branch_dec(&trace_event_exports_enabled);
370 
371 	if (export->flags & TRACE_EXPORT_MARKER)
372 		static_branch_dec(&trace_marker_exports_enabled);
373 }
374 
375 static void ftrace_exports(struct ring_buffer_event *event, int flag)
376 {
377 	struct trace_export *export;
378 
379 	preempt_disable_notrace();
380 
381 	export = rcu_dereference_raw_check(ftrace_exports_list);
382 	while (export) {
383 		trace_process_export(export, event, flag);
384 		export = rcu_dereference_raw_check(export->next);
385 	}
386 
387 	preempt_enable_notrace();
388 }
389 
390 static inline void
391 add_trace_export(struct trace_export **list, struct trace_export *export)
392 {
393 	rcu_assign_pointer(export->next, *list);
394 	/*
395 	 * We are entering export into the list but another
396 	 * CPU might be walking that list. We need to make sure
397 	 * the export->next pointer is valid before another CPU sees
398 	 * the export pointer included into the list.
399 	 */
400 	rcu_assign_pointer(*list, export);
401 }
402 
403 static inline int
404 rm_trace_export(struct trace_export **list, struct trace_export *export)
405 {
406 	struct trace_export **p;
407 
408 	for (p = list; *p != NULL; p = &(*p)->next)
409 		if (*p == export)
410 			break;
411 
412 	if (*p != export)
413 		return -1;
414 
415 	rcu_assign_pointer(*p, (*p)->next);
416 
417 	return 0;
418 }
419 
420 static inline void
421 add_ftrace_export(struct trace_export **list, struct trace_export *export)
422 {
423 	ftrace_exports_enable(export);
424 
425 	add_trace_export(list, export);
426 }
427 
428 static inline int
429 rm_ftrace_export(struct trace_export **list, struct trace_export *export)
430 {
431 	int ret;
432 
433 	ret = rm_trace_export(list, export);
434 	ftrace_exports_disable(export);
435 
436 	return ret;
437 }
438 
439 int register_ftrace_export(struct trace_export *export)
440 {
441 	if (WARN_ON_ONCE(!export->write))
442 		return -1;
443 
444 	mutex_lock(&ftrace_export_lock);
445 
446 	add_ftrace_export(&ftrace_exports_list, export);
447 
448 	mutex_unlock(&ftrace_export_lock);
449 
450 	return 0;
451 }
452 EXPORT_SYMBOL_GPL(register_ftrace_export);
453 
454 int unregister_ftrace_export(struct trace_export *export)
455 {
456 	int ret;
457 
458 	mutex_lock(&ftrace_export_lock);
459 
460 	ret = rm_ftrace_export(&ftrace_exports_list, export);
461 
462 	mutex_unlock(&ftrace_export_lock);
463 
464 	return ret;
465 }
466 EXPORT_SYMBOL_GPL(unregister_ftrace_export);
467 
468 /* trace_flags holds trace_options default values */
469 #define TRACE_DEFAULT_FLAGS						\
470 	(FUNCTION_DEFAULT_FLAGS |					\
471 	 TRACE_ITER_PRINT_PARENT | TRACE_ITER_PRINTK |			\
472 	 TRACE_ITER_ANNOTATE | TRACE_ITER_CONTEXT_INFO |		\
473 	 TRACE_ITER_RECORD_CMD | TRACE_ITER_OVERWRITE |			\
474 	 TRACE_ITER_IRQ_INFO | TRACE_ITER_MARKERS |			\
475 	 TRACE_ITER_HASH_PTR)
476 
477 /* trace_options that are only supported by global_trace */
478 #define TOP_LEVEL_TRACE_FLAGS (TRACE_ITER_PRINTK |			\
479 	       TRACE_ITER_PRINTK_MSGONLY | TRACE_ITER_RECORD_CMD)
480 
481 /* trace_flags that are default zero for instances */
482 #define ZEROED_TRACE_FLAGS \
483 	(TRACE_ITER_EVENT_FORK | TRACE_ITER_FUNC_FORK)
484 
485 /*
486  * The global_trace is the descriptor that holds the top-level tracing
487  * buffers for the live tracing.
488  */
489 static struct trace_array global_trace = {
490 	.trace_flags = TRACE_DEFAULT_FLAGS,
491 };
492 
493 LIST_HEAD(ftrace_trace_arrays);
494 
495 int trace_array_get(struct trace_array *this_tr)
496 {
497 	struct trace_array *tr;
498 	int ret = -ENODEV;
499 
500 	mutex_lock(&trace_types_lock);
501 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
502 		if (tr == this_tr) {
503 			tr->ref++;
504 			ret = 0;
505 			break;
506 		}
507 	}
508 	mutex_unlock(&trace_types_lock);
509 
510 	return ret;
511 }
512 
513 static void __trace_array_put(struct trace_array *this_tr)
514 {
515 	WARN_ON(!this_tr->ref);
516 	this_tr->ref--;
517 }
518 
519 /**
520  * trace_array_put - Decrement the reference counter for this trace array.
521  * @this_tr : pointer to the trace array
522  *
523  * NOTE: Use this when we no longer need the trace array returned by
524  * trace_array_get_by_name(). This ensures the trace array can be later
525  * destroyed.
526  *
527  */
528 void trace_array_put(struct trace_array *this_tr)
529 {
530 	if (!this_tr)
531 		return;
532 
533 	mutex_lock(&trace_types_lock);
534 	__trace_array_put(this_tr);
535 	mutex_unlock(&trace_types_lock);
536 }
537 EXPORT_SYMBOL_GPL(trace_array_put);
538 
539 int tracing_check_open_get_tr(struct trace_array *tr)
540 {
541 	int ret;
542 
543 	ret = security_locked_down(LOCKDOWN_TRACEFS);
544 	if (ret)
545 		return ret;
546 
547 	if (tracing_disabled)
548 		return -ENODEV;
549 
550 	if (tr && trace_array_get(tr) < 0)
551 		return -ENODEV;
552 
553 	return 0;
554 }
555 
556 int call_filter_check_discard(struct trace_event_call *call, void *rec,
557 			      struct trace_buffer *buffer,
558 			      struct ring_buffer_event *event)
559 {
560 	if (unlikely(call->flags & TRACE_EVENT_FL_FILTERED) &&
561 	    !filter_match_preds(call->filter, rec)) {
562 		__trace_event_discard_commit(buffer, event);
563 		return 1;
564 	}
565 
566 	return 0;
567 }
568 
569 /**
570  * trace_find_filtered_pid - check if a pid exists in a filtered_pid list
571  * @filtered_pids: The list of pids to check
572  * @search_pid: The PID to find in @filtered_pids
573  *
574  * Returns true if @search_pid is found in @filtered_pids, and false otherwise.
575  */
576 bool
577 trace_find_filtered_pid(struct trace_pid_list *filtered_pids, pid_t search_pid)
578 {
579 	return trace_pid_list_is_set(filtered_pids, search_pid);
580 }
581 
582 /**
583  * trace_ignore_this_task - should a task be ignored for tracing
584  * @filtered_pids: The list of pids to check
585  * @filtered_no_pids: The list of pids not to be traced
586  * @task: The task that should be ignored if not filtered
587  *
588  * Checks if @task should be traced or not from @filtered_pids.
589  * Returns true if @task should *NOT* be traced.
590  * Returns false if @task should be traced.
591  */
592 bool
593 trace_ignore_this_task(struct trace_pid_list *filtered_pids,
594 		       struct trace_pid_list *filtered_no_pids,
595 		       struct task_struct *task)
596 {
597 	/*
598 	 * If filtered_no_pids is not empty, and the task's pid is listed
599 	 * in filtered_no_pids, then return true.
600 	 * Otherwise, if filtered_pids is empty, that means we can
601 	 * trace all tasks. If it has content, then only trace pids
602 	 * within filtered_pids.
603 	 */
604 
605 	return (filtered_pids &&
606 		!trace_find_filtered_pid(filtered_pids, task->pid)) ||
607 		(filtered_no_pids &&
608 		 trace_find_filtered_pid(filtered_no_pids, task->pid));
609 }
610 
611 /**
612  * trace_filter_add_remove_task - Add or remove a task from a pid_list
613  * @pid_list: The list to modify
614  * @self: The current task for fork or NULL for exit
615  * @task: The task to add or remove
616  *
617  * If adding a task, if @self is defined, the task is only added if @self
618  * is also included in @pid_list. This happens on fork and tasks should
619  * only be added when the parent is listed. If @self is NULL, then the
620  * @task pid will be removed from the list, which would happen on exit
621  * of a task.
622  */
623 void trace_filter_add_remove_task(struct trace_pid_list *pid_list,
624 				  struct task_struct *self,
625 				  struct task_struct *task)
626 {
627 	if (!pid_list)
628 		return;
629 
630 	/* For forks, we only add if the forking task is listed */
631 	if (self) {
632 		if (!trace_find_filtered_pid(pid_list, self->pid))
633 			return;
634 	}
635 
636 	/* "self" is set for forks, and NULL for exits */
637 	if (self)
638 		trace_pid_list_set(pid_list, task->pid);
639 	else
640 		trace_pid_list_clear(pid_list, task->pid);
641 }
642 
643 /**
644  * trace_pid_next - Used for seq_file to get to the next pid of a pid_list
645  * @pid_list: The pid list to show
646  * @v: The last pid that was shown (+1 the actual pid to let zero be displayed)
647  * @pos: The position of the file
648  *
649  * This is used by the seq_file "next" operation to iterate the pids
650  * listed in a trace_pid_list structure.
651  *
652  * Returns the pid+1 as we want to display pid of zero, but NULL would
653  * stop the iteration.
654  */
655 void *trace_pid_next(struct trace_pid_list *pid_list, void *v, loff_t *pos)
656 {
657 	long pid = (unsigned long)v;
658 	unsigned int next;
659 
660 	(*pos)++;
661 
662 	/* pid already is +1 of the actual previous bit */
663 	if (trace_pid_list_next(pid_list, pid, &next) < 0)
664 		return NULL;
665 
666 	pid = next;
667 
668 	/* Return pid + 1 to allow zero to be represented */
669 	return (void *)(pid + 1);
670 }
671 
672 /**
673  * trace_pid_start - Used for seq_file to start reading pid lists
674  * @pid_list: The pid list to show
675  * @pos: The position of the file
676  *
677  * This is used by seq_file "start" operation to start the iteration
678  * of listing pids.
679  *
680  * Returns the pid+1 as we want to display pid of zero, but NULL would
681  * stop the iteration.
682  */
683 void *trace_pid_start(struct trace_pid_list *pid_list, loff_t *pos)
684 {
685 	unsigned long pid;
686 	unsigned int first;
687 	loff_t l = 0;
688 
689 	if (trace_pid_list_first(pid_list, &first) < 0)
690 		return NULL;
691 
692 	pid = first;
693 
694 	/* Return pid + 1 so that zero can be the exit value */
695 	for (pid++; pid && l < *pos;
696 	     pid = (unsigned long)trace_pid_next(pid_list, (void *)pid, &l))
697 		;
698 	return (void *)pid;
699 }
700 
701 /**
702  * trace_pid_show - show the current pid in seq_file processing
703  * @m: The seq_file structure to write into
704  * @v: A void pointer of the pid (+1) value to display
705  *
706  * Can be directly used by seq_file operations to display the current
707  * pid value.
708  */
709 int trace_pid_show(struct seq_file *m, void *v)
710 {
711 	unsigned long pid = (unsigned long)v - 1;
712 
713 	seq_printf(m, "%lu\n", pid);
714 	return 0;
715 }
716 
717 /* 128 should be much more than enough */
718 #define PID_BUF_SIZE		127
719 
720 int trace_pid_write(struct trace_pid_list *filtered_pids,
721 		    struct trace_pid_list **new_pid_list,
722 		    const char __user *ubuf, size_t cnt)
723 {
724 	struct trace_pid_list *pid_list;
725 	struct trace_parser parser;
726 	unsigned long val;
727 	int nr_pids = 0;
728 	ssize_t read = 0;
729 	ssize_t ret;
730 	loff_t pos;
731 	pid_t pid;
732 
733 	if (trace_parser_get_init(&parser, PID_BUF_SIZE + 1))
734 		return -ENOMEM;
735 
736 	/*
737 	 * Always recreate a new array. The write is an all or nothing
738 	 * operation. Always create a new array when adding new pids by
739 	 * the user. If the operation fails, then the current list is
740 	 * not modified.
741 	 */
742 	pid_list = trace_pid_list_alloc();
743 	if (!pid_list) {
744 		trace_parser_put(&parser);
745 		return -ENOMEM;
746 	}
747 
748 	if (filtered_pids) {
749 		/* copy the current bits to the new max */
750 		ret = trace_pid_list_first(filtered_pids, &pid);
751 		while (!ret) {
752 			trace_pid_list_set(pid_list, pid);
753 			ret = trace_pid_list_next(filtered_pids, pid + 1, &pid);
754 			nr_pids++;
755 		}
756 	}
757 
758 	ret = 0;
759 	while (cnt > 0) {
760 
761 		pos = 0;
762 
763 		ret = trace_get_user(&parser, ubuf, cnt, &pos);
764 		if (ret < 0)
765 			break;
766 
767 		read += ret;
768 		ubuf += ret;
769 		cnt -= ret;
770 
771 		if (!trace_parser_loaded(&parser))
772 			break;
773 
774 		ret = -EINVAL;
775 		if (kstrtoul(parser.buffer, 0, &val))
776 			break;
777 
778 		pid = (pid_t)val;
779 
780 		if (trace_pid_list_set(pid_list, pid) < 0) {
781 			ret = -1;
782 			break;
783 		}
784 		nr_pids++;
785 
786 		trace_parser_clear(&parser);
787 		ret = 0;
788 	}
789 	trace_parser_put(&parser);
790 
791 	if (ret < 0) {
792 		trace_pid_list_free(pid_list);
793 		return ret;
794 	}
795 
796 	if (!nr_pids) {
797 		/* Cleared the list of pids */
798 		trace_pid_list_free(pid_list);
799 		pid_list = NULL;
800 	}
801 
802 	*new_pid_list = pid_list;
803 
804 	return read;
805 }
806 
807 static u64 buffer_ftrace_now(struct array_buffer *buf, int cpu)
808 {
809 	u64 ts;
810 
811 	/* Early boot up does not have a buffer yet */
812 	if (!buf->buffer)
813 		return trace_clock_local();
814 
815 	ts = ring_buffer_time_stamp(buf->buffer);
816 	ring_buffer_normalize_time_stamp(buf->buffer, cpu, &ts);
817 
818 	return ts;
819 }
820 
821 u64 ftrace_now(int cpu)
822 {
823 	return buffer_ftrace_now(&global_trace.array_buffer, cpu);
824 }
825 
826 /**
827  * tracing_is_enabled - Show if global_trace has been enabled
828  *
829  * Shows if the global trace has been enabled or not. It uses the
830  * mirror flag "buffer_disabled" to be used in fast paths such as for
831  * the irqsoff tracer. But it may be inaccurate due to races. If you
832  * need to know the accurate state, use tracing_is_on() which is a little
833  * slower, but accurate.
834  */
835 int tracing_is_enabled(void)
836 {
837 	/*
838 	 * For quick access (irqsoff uses this in fast path), just
839 	 * return the mirror variable of the state of the ring buffer.
840 	 * It's a little racy, but we don't really care.
841 	 */
842 	smp_rmb();
843 	return !global_trace.buffer_disabled;
844 }
845 
846 /*
847  * trace_buf_size is the size in bytes that is allocated
848  * for a buffer. Note, the number of bytes is always rounded
849  * to page size.
850  *
851  * This number is purposely set to a low number of 16384.
852  * If the dump on oops happens, it will be much appreciated
853  * to not have to wait for all that output. Anyway this can be
854  * boot time and run time configurable.
855  */
856 #define TRACE_BUF_SIZE_DEFAULT	1441792UL /* 16384 * 88 (sizeof(entry)) */
857 
858 static unsigned long		trace_buf_size = TRACE_BUF_SIZE_DEFAULT;
859 
860 /* trace_types holds a link list of available tracers. */
861 static struct tracer		*trace_types __read_mostly;
862 
863 /*
864  * trace_types_lock is used to protect the trace_types list.
865  */
866 DEFINE_MUTEX(trace_types_lock);
867 
868 /*
869  * serialize the access of the ring buffer
870  *
871  * ring buffer serializes readers, but it is low level protection.
872  * The validity of the events (which returns by ring_buffer_peek() ..etc)
873  * are not protected by ring buffer.
874  *
875  * The content of events may become garbage if we allow other process consumes
876  * these events concurrently:
877  *   A) the page of the consumed events may become a normal page
878  *      (not reader page) in ring buffer, and this page will be rewritten
879  *      by events producer.
880  *   B) The page of the consumed events may become a page for splice_read,
881  *      and this page will be returned to system.
882  *
883  * These primitives allow multi process access to different cpu ring buffer
884  * concurrently.
885  *
886  * These primitives don't distinguish read-only and read-consume access.
887  * Multi read-only access are also serialized.
888  */
889 
890 #ifdef CONFIG_SMP
891 static DECLARE_RWSEM(all_cpu_access_lock);
892 static DEFINE_PER_CPU(struct mutex, cpu_access_lock);
893 
894 static inline void trace_access_lock(int cpu)
895 {
896 	if (cpu == RING_BUFFER_ALL_CPUS) {
897 		/* gain it for accessing the whole ring buffer. */
898 		down_write(&all_cpu_access_lock);
899 	} else {
900 		/* gain it for accessing a cpu ring buffer. */
901 
902 		/* Firstly block other trace_access_lock(RING_BUFFER_ALL_CPUS). */
903 		down_read(&all_cpu_access_lock);
904 
905 		/* Secondly block other access to this @cpu ring buffer. */
906 		mutex_lock(&per_cpu(cpu_access_lock, cpu));
907 	}
908 }
909 
910 static inline void trace_access_unlock(int cpu)
911 {
912 	if (cpu == RING_BUFFER_ALL_CPUS) {
913 		up_write(&all_cpu_access_lock);
914 	} else {
915 		mutex_unlock(&per_cpu(cpu_access_lock, cpu));
916 		up_read(&all_cpu_access_lock);
917 	}
918 }
919 
920 static inline void trace_access_lock_init(void)
921 {
922 	int cpu;
923 
924 	for_each_possible_cpu(cpu)
925 		mutex_init(&per_cpu(cpu_access_lock, cpu));
926 }
927 
928 #else
929 
930 static DEFINE_MUTEX(access_lock);
931 
932 static inline void trace_access_lock(int cpu)
933 {
934 	(void)cpu;
935 	mutex_lock(&access_lock);
936 }
937 
938 static inline void trace_access_unlock(int cpu)
939 {
940 	(void)cpu;
941 	mutex_unlock(&access_lock);
942 }
943 
944 static inline void trace_access_lock_init(void)
945 {
946 }
947 
948 #endif
949 
950 #ifdef CONFIG_STACKTRACE
951 static void __ftrace_trace_stack(struct trace_buffer *buffer,
952 				 unsigned int trace_ctx,
953 				 int skip, struct pt_regs *regs);
954 static inline void ftrace_trace_stack(struct trace_array *tr,
955 				      struct trace_buffer *buffer,
956 				      unsigned int trace_ctx,
957 				      int skip, struct pt_regs *regs);
958 
959 #else
960 static inline void __ftrace_trace_stack(struct trace_buffer *buffer,
961 					unsigned int trace_ctx,
962 					int skip, struct pt_regs *regs)
963 {
964 }
965 static inline void ftrace_trace_stack(struct trace_array *tr,
966 				      struct trace_buffer *buffer,
967 				      unsigned long trace_ctx,
968 				      int skip, struct pt_regs *regs)
969 {
970 }
971 
972 #endif
973 
974 static __always_inline void
975 trace_event_setup(struct ring_buffer_event *event,
976 		  int type, unsigned int trace_ctx)
977 {
978 	struct trace_entry *ent = ring_buffer_event_data(event);
979 
980 	tracing_generic_entry_update(ent, type, trace_ctx);
981 }
982 
983 static __always_inline struct ring_buffer_event *
984 __trace_buffer_lock_reserve(struct trace_buffer *buffer,
985 			  int type,
986 			  unsigned long len,
987 			  unsigned int trace_ctx)
988 {
989 	struct ring_buffer_event *event;
990 
991 	event = ring_buffer_lock_reserve(buffer, len);
992 	if (event != NULL)
993 		trace_event_setup(event, type, trace_ctx);
994 
995 	return event;
996 }
997 
998 void tracer_tracing_on(struct trace_array *tr)
999 {
1000 	if (tr->array_buffer.buffer)
1001 		ring_buffer_record_on(tr->array_buffer.buffer);
1002 	/*
1003 	 * This flag is looked at when buffers haven't been allocated
1004 	 * yet, or by some tracers (like irqsoff), that just want to
1005 	 * know if the ring buffer has been disabled, but it can handle
1006 	 * races of where it gets disabled but we still do a record.
1007 	 * As the check is in the fast path of the tracers, it is more
1008 	 * important to be fast than accurate.
1009 	 */
1010 	tr->buffer_disabled = 0;
1011 	/* Make the flag seen by readers */
1012 	smp_wmb();
1013 }
1014 
1015 /**
1016  * tracing_on - enable tracing buffers
1017  *
1018  * This function enables tracing buffers that may have been
1019  * disabled with tracing_off.
1020  */
1021 void tracing_on(void)
1022 {
1023 	tracer_tracing_on(&global_trace);
1024 }
1025 EXPORT_SYMBOL_GPL(tracing_on);
1026 
1027 
1028 static __always_inline void
1029 __buffer_unlock_commit(struct trace_buffer *buffer, struct ring_buffer_event *event)
1030 {
1031 	__this_cpu_write(trace_taskinfo_save, true);
1032 
1033 	/* If this is the temp buffer, we need to commit fully */
1034 	if (this_cpu_read(trace_buffered_event) == event) {
1035 		/* Length is in event->array[0] */
1036 		ring_buffer_write(buffer, event->array[0], &event->array[1]);
1037 		/* Release the temp buffer */
1038 		this_cpu_dec(trace_buffered_event_cnt);
1039 		/* ring_buffer_unlock_commit() enables preemption */
1040 		preempt_enable_notrace();
1041 	} else
1042 		ring_buffer_unlock_commit(buffer);
1043 }
1044 
1045 int __trace_array_puts(struct trace_array *tr, unsigned long ip,
1046 		       const char *str, int size)
1047 {
1048 	struct ring_buffer_event *event;
1049 	struct trace_buffer *buffer;
1050 	struct print_entry *entry;
1051 	unsigned int trace_ctx;
1052 	int alloc;
1053 
1054 	if (!(tr->trace_flags & TRACE_ITER_PRINTK))
1055 		return 0;
1056 
1057 	if (unlikely(tracing_selftest_running && tr == &global_trace))
1058 		return 0;
1059 
1060 	if (unlikely(tracing_disabled))
1061 		return 0;
1062 
1063 	alloc = sizeof(*entry) + size + 2; /* possible \n added */
1064 
1065 	trace_ctx = tracing_gen_ctx();
1066 	buffer = tr->array_buffer.buffer;
1067 	ring_buffer_nest_start(buffer);
1068 	event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, alloc,
1069 					    trace_ctx);
1070 	if (!event) {
1071 		size = 0;
1072 		goto out;
1073 	}
1074 
1075 	entry = ring_buffer_event_data(event);
1076 	entry->ip = ip;
1077 
1078 	memcpy(&entry->buf, str, size);
1079 
1080 	/* Add a newline if necessary */
1081 	if (entry->buf[size - 1] != '\n') {
1082 		entry->buf[size] = '\n';
1083 		entry->buf[size + 1] = '\0';
1084 	} else
1085 		entry->buf[size] = '\0';
1086 
1087 	__buffer_unlock_commit(buffer, event);
1088 	ftrace_trace_stack(tr, buffer, trace_ctx, 4, NULL);
1089  out:
1090 	ring_buffer_nest_end(buffer);
1091 	return size;
1092 }
1093 EXPORT_SYMBOL_GPL(__trace_array_puts);
1094 
1095 /**
1096  * __trace_puts - write a constant string into the trace buffer.
1097  * @ip:	   The address of the caller
1098  * @str:   The constant string to write
1099  * @size:  The size of the string.
1100  */
1101 int __trace_puts(unsigned long ip, const char *str, int size)
1102 {
1103 	return __trace_array_puts(&global_trace, ip, str, size);
1104 }
1105 EXPORT_SYMBOL_GPL(__trace_puts);
1106 
1107 /**
1108  * __trace_bputs - write the pointer to a constant string into trace buffer
1109  * @ip:	   The address of the caller
1110  * @str:   The constant string to write to the buffer to
1111  */
1112 int __trace_bputs(unsigned long ip, const char *str)
1113 {
1114 	struct ring_buffer_event *event;
1115 	struct trace_buffer *buffer;
1116 	struct bputs_entry *entry;
1117 	unsigned int trace_ctx;
1118 	int size = sizeof(struct bputs_entry);
1119 	int ret = 0;
1120 
1121 	if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
1122 		return 0;
1123 
1124 	if (unlikely(tracing_selftest_running || tracing_disabled))
1125 		return 0;
1126 
1127 	trace_ctx = tracing_gen_ctx();
1128 	buffer = global_trace.array_buffer.buffer;
1129 
1130 	ring_buffer_nest_start(buffer);
1131 	event = __trace_buffer_lock_reserve(buffer, TRACE_BPUTS, size,
1132 					    trace_ctx);
1133 	if (!event)
1134 		goto out;
1135 
1136 	entry = ring_buffer_event_data(event);
1137 	entry->ip			= ip;
1138 	entry->str			= str;
1139 
1140 	__buffer_unlock_commit(buffer, event);
1141 	ftrace_trace_stack(&global_trace, buffer, trace_ctx, 4, NULL);
1142 
1143 	ret = 1;
1144  out:
1145 	ring_buffer_nest_end(buffer);
1146 	return ret;
1147 }
1148 EXPORT_SYMBOL_GPL(__trace_bputs);
1149 
1150 #ifdef CONFIG_TRACER_SNAPSHOT
1151 static void tracing_snapshot_instance_cond(struct trace_array *tr,
1152 					   void *cond_data)
1153 {
1154 	struct tracer *tracer = tr->current_trace;
1155 	unsigned long flags;
1156 
1157 	if (in_nmi()) {
1158 		trace_array_puts(tr, "*** SNAPSHOT CALLED FROM NMI CONTEXT ***\n");
1159 		trace_array_puts(tr, "*** snapshot is being ignored        ***\n");
1160 		return;
1161 	}
1162 
1163 	if (!tr->allocated_snapshot) {
1164 		trace_array_puts(tr, "*** SNAPSHOT NOT ALLOCATED ***\n");
1165 		trace_array_puts(tr, "*** stopping trace here!   ***\n");
1166 		tracer_tracing_off(tr);
1167 		return;
1168 	}
1169 
1170 	/* Note, snapshot can not be used when the tracer uses it */
1171 	if (tracer->use_max_tr) {
1172 		trace_array_puts(tr, "*** LATENCY TRACER ACTIVE ***\n");
1173 		trace_array_puts(tr, "*** Can not use snapshot (sorry) ***\n");
1174 		return;
1175 	}
1176 
1177 	local_irq_save(flags);
1178 	update_max_tr(tr, current, smp_processor_id(), cond_data);
1179 	local_irq_restore(flags);
1180 }
1181 
1182 void tracing_snapshot_instance(struct trace_array *tr)
1183 {
1184 	tracing_snapshot_instance_cond(tr, NULL);
1185 }
1186 
1187 /**
1188  * tracing_snapshot - take a snapshot of the current buffer.
1189  *
1190  * This causes a swap between the snapshot buffer and the current live
1191  * tracing buffer. You can use this to take snapshots of the live
1192  * trace when some condition is triggered, but continue to trace.
1193  *
1194  * Note, make sure to allocate the snapshot with either
1195  * a tracing_snapshot_alloc(), or by doing it manually
1196  * with: echo 1 > /sys/kernel/tracing/snapshot
1197  *
1198  * If the snapshot buffer is not allocated, it will stop tracing.
1199  * Basically making a permanent snapshot.
1200  */
1201 void tracing_snapshot(void)
1202 {
1203 	struct trace_array *tr = &global_trace;
1204 
1205 	tracing_snapshot_instance(tr);
1206 }
1207 EXPORT_SYMBOL_GPL(tracing_snapshot);
1208 
1209 /**
1210  * tracing_snapshot_cond - conditionally take a snapshot of the current buffer.
1211  * @tr:		The tracing instance to snapshot
1212  * @cond_data:	The data to be tested conditionally, and possibly saved
1213  *
1214  * This is the same as tracing_snapshot() except that the snapshot is
1215  * conditional - the snapshot will only happen if the
1216  * cond_snapshot.update() implementation receiving the cond_data
1217  * returns true, which means that the trace array's cond_snapshot
1218  * update() operation used the cond_data to determine whether the
1219  * snapshot should be taken, and if it was, presumably saved it along
1220  * with the snapshot.
1221  */
1222 void tracing_snapshot_cond(struct trace_array *tr, void *cond_data)
1223 {
1224 	tracing_snapshot_instance_cond(tr, cond_data);
1225 }
1226 EXPORT_SYMBOL_GPL(tracing_snapshot_cond);
1227 
1228 /**
1229  * tracing_cond_snapshot_data - get the user data associated with a snapshot
1230  * @tr:		The tracing instance
1231  *
1232  * When the user enables a conditional snapshot using
1233  * tracing_snapshot_cond_enable(), the user-defined cond_data is saved
1234  * with the snapshot.  This accessor is used to retrieve it.
1235  *
1236  * Should not be called from cond_snapshot.update(), since it takes
1237  * the tr->max_lock lock, which the code calling
1238  * cond_snapshot.update() has already done.
1239  *
1240  * Returns the cond_data associated with the trace array's snapshot.
1241  */
1242 void *tracing_cond_snapshot_data(struct trace_array *tr)
1243 {
1244 	void *cond_data = NULL;
1245 
1246 	local_irq_disable();
1247 	arch_spin_lock(&tr->max_lock);
1248 
1249 	if (tr->cond_snapshot)
1250 		cond_data = tr->cond_snapshot->cond_data;
1251 
1252 	arch_spin_unlock(&tr->max_lock);
1253 	local_irq_enable();
1254 
1255 	return cond_data;
1256 }
1257 EXPORT_SYMBOL_GPL(tracing_cond_snapshot_data);
1258 
1259 static int resize_buffer_duplicate_size(struct array_buffer *trace_buf,
1260 					struct array_buffer *size_buf, int cpu_id);
1261 static void set_buffer_entries(struct array_buffer *buf, unsigned long val);
1262 
1263 int tracing_alloc_snapshot_instance(struct trace_array *tr)
1264 {
1265 	int ret;
1266 
1267 	if (!tr->allocated_snapshot) {
1268 
1269 		/* allocate spare buffer */
1270 		ret = resize_buffer_duplicate_size(&tr->max_buffer,
1271 				   &tr->array_buffer, RING_BUFFER_ALL_CPUS);
1272 		if (ret < 0)
1273 			return ret;
1274 
1275 		tr->allocated_snapshot = true;
1276 	}
1277 
1278 	return 0;
1279 }
1280 
1281 static void free_snapshot(struct trace_array *tr)
1282 {
1283 	/*
1284 	 * We don't free the ring buffer. instead, resize it because
1285 	 * The max_tr ring buffer has some state (e.g. ring->clock) and
1286 	 * we want preserve it.
1287 	 */
1288 	ring_buffer_resize(tr->max_buffer.buffer, 1, RING_BUFFER_ALL_CPUS);
1289 	set_buffer_entries(&tr->max_buffer, 1);
1290 	tracing_reset_online_cpus(&tr->max_buffer);
1291 	tr->allocated_snapshot = false;
1292 }
1293 
1294 /**
1295  * tracing_alloc_snapshot - allocate snapshot buffer.
1296  *
1297  * This only allocates the snapshot buffer if it isn't already
1298  * allocated - it doesn't also take a snapshot.
1299  *
1300  * This is meant to be used in cases where the snapshot buffer needs
1301  * to be set up for events that can't sleep but need to be able to
1302  * trigger a snapshot.
1303  */
1304 int tracing_alloc_snapshot(void)
1305 {
1306 	struct trace_array *tr = &global_trace;
1307 	int ret;
1308 
1309 	ret = tracing_alloc_snapshot_instance(tr);
1310 	WARN_ON(ret < 0);
1311 
1312 	return ret;
1313 }
1314 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
1315 
1316 /**
1317  * tracing_snapshot_alloc - allocate and take a snapshot of the current buffer.
1318  *
1319  * This is similar to tracing_snapshot(), but it will allocate the
1320  * snapshot buffer if it isn't already allocated. Use this only
1321  * where it is safe to sleep, as the allocation may sleep.
1322  *
1323  * This causes a swap between the snapshot buffer and the current live
1324  * tracing buffer. You can use this to take snapshots of the live
1325  * trace when some condition is triggered, but continue to trace.
1326  */
1327 void tracing_snapshot_alloc(void)
1328 {
1329 	int ret;
1330 
1331 	ret = tracing_alloc_snapshot();
1332 	if (ret < 0)
1333 		return;
1334 
1335 	tracing_snapshot();
1336 }
1337 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
1338 
1339 /**
1340  * tracing_snapshot_cond_enable - enable conditional snapshot for an instance
1341  * @tr:		The tracing instance
1342  * @cond_data:	User data to associate with the snapshot
1343  * @update:	Implementation of the cond_snapshot update function
1344  *
1345  * Check whether the conditional snapshot for the given instance has
1346  * already been enabled, or if the current tracer is already using a
1347  * snapshot; if so, return -EBUSY, else create a cond_snapshot and
1348  * save the cond_data and update function inside.
1349  *
1350  * Returns 0 if successful, error otherwise.
1351  */
1352 int tracing_snapshot_cond_enable(struct trace_array *tr, void *cond_data,
1353 				 cond_update_fn_t update)
1354 {
1355 	struct cond_snapshot *cond_snapshot;
1356 	int ret = 0;
1357 
1358 	cond_snapshot = kzalloc(sizeof(*cond_snapshot), GFP_KERNEL);
1359 	if (!cond_snapshot)
1360 		return -ENOMEM;
1361 
1362 	cond_snapshot->cond_data = cond_data;
1363 	cond_snapshot->update = update;
1364 
1365 	mutex_lock(&trace_types_lock);
1366 
1367 	ret = tracing_alloc_snapshot_instance(tr);
1368 	if (ret)
1369 		goto fail_unlock;
1370 
1371 	if (tr->current_trace->use_max_tr) {
1372 		ret = -EBUSY;
1373 		goto fail_unlock;
1374 	}
1375 
1376 	/*
1377 	 * The cond_snapshot can only change to NULL without the
1378 	 * trace_types_lock. We don't care if we race with it going
1379 	 * to NULL, but we want to make sure that it's not set to
1380 	 * something other than NULL when we get here, which we can
1381 	 * do safely with only holding the trace_types_lock and not
1382 	 * having to take the max_lock.
1383 	 */
1384 	if (tr->cond_snapshot) {
1385 		ret = -EBUSY;
1386 		goto fail_unlock;
1387 	}
1388 
1389 	local_irq_disable();
1390 	arch_spin_lock(&tr->max_lock);
1391 	tr->cond_snapshot = cond_snapshot;
1392 	arch_spin_unlock(&tr->max_lock);
1393 	local_irq_enable();
1394 
1395 	mutex_unlock(&trace_types_lock);
1396 
1397 	return ret;
1398 
1399  fail_unlock:
1400 	mutex_unlock(&trace_types_lock);
1401 	kfree(cond_snapshot);
1402 	return ret;
1403 }
1404 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_enable);
1405 
1406 /**
1407  * tracing_snapshot_cond_disable - disable conditional snapshot for an instance
1408  * @tr:		The tracing instance
1409  *
1410  * Check whether the conditional snapshot for the given instance is
1411  * enabled; if so, free the cond_snapshot associated with it,
1412  * otherwise return -EINVAL.
1413  *
1414  * Returns 0 if successful, error otherwise.
1415  */
1416 int tracing_snapshot_cond_disable(struct trace_array *tr)
1417 {
1418 	int ret = 0;
1419 
1420 	local_irq_disable();
1421 	arch_spin_lock(&tr->max_lock);
1422 
1423 	if (!tr->cond_snapshot)
1424 		ret = -EINVAL;
1425 	else {
1426 		kfree(tr->cond_snapshot);
1427 		tr->cond_snapshot = NULL;
1428 	}
1429 
1430 	arch_spin_unlock(&tr->max_lock);
1431 	local_irq_enable();
1432 
1433 	return ret;
1434 }
1435 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_disable);
1436 #else
1437 void tracing_snapshot(void)
1438 {
1439 	WARN_ONCE(1, "Snapshot feature not enabled, but internal snapshot used");
1440 }
1441 EXPORT_SYMBOL_GPL(tracing_snapshot);
1442 void tracing_snapshot_cond(struct trace_array *tr, void *cond_data)
1443 {
1444 	WARN_ONCE(1, "Snapshot feature not enabled, but internal conditional snapshot used");
1445 }
1446 EXPORT_SYMBOL_GPL(tracing_snapshot_cond);
1447 int tracing_alloc_snapshot(void)
1448 {
1449 	WARN_ONCE(1, "Snapshot feature not enabled, but snapshot allocation used");
1450 	return -ENODEV;
1451 }
1452 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
1453 void tracing_snapshot_alloc(void)
1454 {
1455 	/* Give warning */
1456 	tracing_snapshot();
1457 }
1458 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
1459 void *tracing_cond_snapshot_data(struct trace_array *tr)
1460 {
1461 	return NULL;
1462 }
1463 EXPORT_SYMBOL_GPL(tracing_cond_snapshot_data);
1464 int tracing_snapshot_cond_enable(struct trace_array *tr, void *cond_data, cond_update_fn_t update)
1465 {
1466 	return -ENODEV;
1467 }
1468 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_enable);
1469 int tracing_snapshot_cond_disable(struct trace_array *tr)
1470 {
1471 	return false;
1472 }
1473 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_disable);
1474 #define free_snapshot(tr)	do { } while (0)
1475 #endif /* CONFIG_TRACER_SNAPSHOT */
1476 
1477 void tracer_tracing_off(struct trace_array *tr)
1478 {
1479 	if (tr->array_buffer.buffer)
1480 		ring_buffer_record_off(tr->array_buffer.buffer);
1481 	/*
1482 	 * This flag is looked at when buffers haven't been allocated
1483 	 * yet, or by some tracers (like irqsoff), that just want to
1484 	 * know if the ring buffer has been disabled, but it can handle
1485 	 * races of where it gets disabled but we still do a record.
1486 	 * As the check is in the fast path of the tracers, it is more
1487 	 * important to be fast than accurate.
1488 	 */
1489 	tr->buffer_disabled = 1;
1490 	/* Make the flag seen by readers */
1491 	smp_wmb();
1492 }
1493 
1494 /**
1495  * tracing_off - turn off tracing buffers
1496  *
1497  * This function stops the tracing buffers from recording data.
1498  * It does not disable any overhead the tracers themselves may
1499  * be causing. This function simply causes all recording to
1500  * the ring buffers to fail.
1501  */
1502 void tracing_off(void)
1503 {
1504 	tracer_tracing_off(&global_trace);
1505 }
1506 EXPORT_SYMBOL_GPL(tracing_off);
1507 
1508 void disable_trace_on_warning(void)
1509 {
1510 	if (__disable_trace_on_warning) {
1511 		trace_array_printk_buf(global_trace.array_buffer.buffer, _THIS_IP_,
1512 			"Disabling tracing due to warning\n");
1513 		tracing_off();
1514 	}
1515 }
1516 
1517 /**
1518  * tracer_tracing_is_on - show real state of ring buffer enabled
1519  * @tr : the trace array to know if ring buffer is enabled
1520  *
1521  * Shows real state of the ring buffer if it is enabled or not.
1522  */
1523 bool tracer_tracing_is_on(struct trace_array *tr)
1524 {
1525 	if (tr->array_buffer.buffer)
1526 		return ring_buffer_record_is_on(tr->array_buffer.buffer);
1527 	return !tr->buffer_disabled;
1528 }
1529 
1530 /**
1531  * tracing_is_on - show state of ring buffers enabled
1532  */
1533 int tracing_is_on(void)
1534 {
1535 	return tracer_tracing_is_on(&global_trace);
1536 }
1537 EXPORT_SYMBOL_GPL(tracing_is_on);
1538 
1539 static int __init set_buf_size(char *str)
1540 {
1541 	unsigned long buf_size;
1542 
1543 	if (!str)
1544 		return 0;
1545 	buf_size = memparse(str, &str);
1546 	/*
1547 	 * nr_entries can not be zero and the startup
1548 	 * tests require some buffer space. Therefore
1549 	 * ensure we have at least 4096 bytes of buffer.
1550 	 */
1551 	trace_buf_size = max(4096UL, buf_size);
1552 	return 1;
1553 }
1554 __setup("trace_buf_size=", set_buf_size);
1555 
1556 static int __init set_tracing_thresh(char *str)
1557 {
1558 	unsigned long threshold;
1559 	int ret;
1560 
1561 	if (!str)
1562 		return 0;
1563 	ret = kstrtoul(str, 0, &threshold);
1564 	if (ret < 0)
1565 		return 0;
1566 	tracing_thresh = threshold * 1000;
1567 	return 1;
1568 }
1569 __setup("tracing_thresh=", set_tracing_thresh);
1570 
1571 unsigned long nsecs_to_usecs(unsigned long nsecs)
1572 {
1573 	return nsecs / 1000;
1574 }
1575 
1576 /*
1577  * TRACE_FLAGS is defined as a tuple matching bit masks with strings.
1578  * It uses C(a, b) where 'a' is the eval (enum) name and 'b' is the string that
1579  * matches it. By defining "C(a, b) b", TRACE_FLAGS becomes a list
1580  * of strings in the order that the evals (enum) were defined.
1581  */
1582 #undef C
1583 #define C(a, b) b
1584 
1585 /* These must match the bit positions in trace_iterator_flags */
1586 static const char *trace_options[] = {
1587 	TRACE_FLAGS
1588 	NULL
1589 };
1590 
1591 static struct {
1592 	u64 (*func)(void);
1593 	const char *name;
1594 	int in_ns;		/* is this clock in nanoseconds? */
1595 } trace_clocks[] = {
1596 	{ trace_clock_local,		"local",	1 },
1597 	{ trace_clock_global,		"global",	1 },
1598 	{ trace_clock_counter,		"counter",	0 },
1599 	{ trace_clock_jiffies,		"uptime",	0 },
1600 	{ trace_clock,			"perf",		1 },
1601 	{ ktime_get_mono_fast_ns,	"mono",		1 },
1602 	{ ktime_get_raw_fast_ns,	"mono_raw",	1 },
1603 	{ ktime_get_boot_fast_ns,	"boot",		1 },
1604 	{ ktime_get_tai_fast_ns,	"tai",		1 },
1605 	ARCH_TRACE_CLOCKS
1606 };
1607 
1608 bool trace_clock_in_ns(struct trace_array *tr)
1609 {
1610 	if (trace_clocks[tr->clock_id].in_ns)
1611 		return true;
1612 
1613 	return false;
1614 }
1615 
1616 /*
1617  * trace_parser_get_init - gets the buffer for trace parser
1618  */
1619 int trace_parser_get_init(struct trace_parser *parser, int size)
1620 {
1621 	memset(parser, 0, sizeof(*parser));
1622 
1623 	parser->buffer = kmalloc(size, GFP_KERNEL);
1624 	if (!parser->buffer)
1625 		return 1;
1626 
1627 	parser->size = size;
1628 	return 0;
1629 }
1630 
1631 /*
1632  * trace_parser_put - frees the buffer for trace parser
1633  */
1634 void trace_parser_put(struct trace_parser *parser)
1635 {
1636 	kfree(parser->buffer);
1637 	parser->buffer = NULL;
1638 }
1639 
1640 /*
1641  * trace_get_user - reads the user input string separated by  space
1642  * (matched by isspace(ch))
1643  *
1644  * For each string found the 'struct trace_parser' is updated,
1645  * and the function returns.
1646  *
1647  * Returns number of bytes read.
1648  *
1649  * See kernel/trace/trace.h for 'struct trace_parser' details.
1650  */
1651 int trace_get_user(struct trace_parser *parser, const char __user *ubuf,
1652 	size_t cnt, loff_t *ppos)
1653 {
1654 	char ch;
1655 	size_t read = 0;
1656 	ssize_t ret;
1657 
1658 	if (!*ppos)
1659 		trace_parser_clear(parser);
1660 
1661 	ret = get_user(ch, ubuf++);
1662 	if (ret)
1663 		goto out;
1664 
1665 	read++;
1666 	cnt--;
1667 
1668 	/*
1669 	 * The parser is not finished with the last write,
1670 	 * continue reading the user input without skipping spaces.
1671 	 */
1672 	if (!parser->cont) {
1673 		/* skip white space */
1674 		while (cnt && isspace(ch)) {
1675 			ret = get_user(ch, ubuf++);
1676 			if (ret)
1677 				goto out;
1678 			read++;
1679 			cnt--;
1680 		}
1681 
1682 		parser->idx = 0;
1683 
1684 		/* only spaces were written */
1685 		if (isspace(ch) || !ch) {
1686 			*ppos += read;
1687 			ret = read;
1688 			goto out;
1689 		}
1690 	}
1691 
1692 	/* read the non-space input */
1693 	while (cnt && !isspace(ch) && ch) {
1694 		if (parser->idx < parser->size - 1)
1695 			parser->buffer[parser->idx++] = ch;
1696 		else {
1697 			ret = -EINVAL;
1698 			goto out;
1699 		}
1700 		ret = get_user(ch, ubuf++);
1701 		if (ret)
1702 			goto out;
1703 		read++;
1704 		cnt--;
1705 	}
1706 
1707 	/* We either got finished input or we have to wait for another call. */
1708 	if (isspace(ch) || !ch) {
1709 		parser->buffer[parser->idx] = 0;
1710 		parser->cont = false;
1711 	} else if (parser->idx < parser->size - 1) {
1712 		parser->cont = true;
1713 		parser->buffer[parser->idx++] = ch;
1714 		/* Make sure the parsed string always terminates with '\0'. */
1715 		parser->buffer[parser->idx] = 0;
1716 	} else {
1717 		ret = -EINVAL;
1718 		goto out;
1719 	}
1720 
1721 	*ppos += read;
1722 	ret = read;
1723 
1724 out:
1725 	return ret;
1726 }
1727 
1728 /* TODO add a seq_buf_to_buffer() */
1729 static ssize_t trace_seq_to_buffer(struct trace_seq *s, void *buf, size_t cnt)
1730 {
1731 	int len;
1732 
1733 	if (trace_seq_used(s) <= s->seq.readpos)
1734 		return -EBUSY;
1735 
1736 	len = trace_seq_used(s) - s->seq.readpos;
1737 	if (cnt > len)
1738 		cnt = len;
1739 	memcpy(buf, s->buffer + s->seq.readpos, cnt);
1740 
1741 	s->seq.readpos += cnt;
1742 	return cnt;
1743 }
1744 
1745 unsigned long __read_mostly	tracing_thresh;
1746 
1747 #ifdef CONFIG_TRACER_MAX_TRACE
1748 static const struct file_operations tracing_max_lat_fops;
1749 
1750 #ifdef LATENCY_FS_NOTIFY
1751 
1752 static struct workqueue_struct *fsnotify_wq;
1753 
1754 static void latency_fsnotify_workfn(struct work_struct *work)
1755 {
1756 	struct trace_array *tr = container_of(work, struct trace_array,
1757 					      fsnotify_work);
1758 	fsnotify_inode(tr->d_max_latency->d_inode, FS_MODIFY);
1759 }
1760 
1761 static void latency_fsnotify_workfn_irq(struct irq_work *iwork)
1762 {
1763 	struct trace_array *tr = container_of(iwork, struct trace_array,
1764 					      fsnotify_irqwork);
1765 	queue_work(fsnotify_wq, &tr->fsnotify_work);
1766 }
1767 
1768 static void trace_create_maxlat_file(struct trace_array *tr,
1769 				     struct dentry *d_tracer)
1770 {
1771 	INIT_WORK(&tr->fsnotify_work, latency_fsnotify_workfn);
1772 	init_irq_work(&tr->fsnotify_irqwork, latency_fsnotify_workfn_irq);
1773 	tr->d_max_latency = trace_create_file("tracing_max_latency",
1774 					      TRACE_MODE_WRITE,
1775 					      d_tracer, &tr->max_latency,
1776 					      &tracing_max_lat_fops);
1777 }
1778 
1779 __init static int latency_fsnotify_init(void)
1780 {
1781 	fsnotify_wq = alloc_workqueue("tr_max_lat_wq",
1782 				      WQ_UNBOUND | WQ_HIGHPRI, 0);
1783 	if (!fsnotify_wq) {
1784 		pr_err("Unable to allocate tr_max_lat_wq\n");
1785 		return -ENOMEM;
1786 	}
1787 	return 0;
1788 }
1789 
1790 late_initcall_sync(latency_fsnotify_init);
1791 
1792 void latency_fsnotify(struct trace_array *tr)
1793 {
1794 	if (!fsnotify_wq)
1795 		return;
1796 	/*
1797 	 * We cannot call queue_work(&tr->fsnotify_work) from here because it's
1798 	 * possible that we are called from __schedule() or do_idle(), which
1799 	 * could cause a deadlock.
1800 	 */
1801 	irq_work_queue(&tr->fsnotify_irqwork);
1802 }
1803 
1804 #else /* !LATENCY_FS_NOTIFY */
1805 
1806 #define trace_create_maxlat_file(tr, d_tracer)				\
1807 	trace_create_file("tracing_max_latency", TRACE_MODE_WRITE,	\
1808 			  d_tracer, &tr->max_latency, &tracing_max_lat_fops)
1809 
1810 #endif
1811 
1812 /*
1813  * Copy the new maximum trace into the separate maximum-trace
1814  * structure. (this way the maximum trace is permanently saved,
1815  * for later retrieval via /sys/kernel/tracing/tracing_max_latency)
1816  */
1817 static void
1818 __update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
1819 {
1820 	struct array_buffer *trace_buf = &tr->array_buffer;
1821 	struct array_buffer *max_buf = &tr->max_buffer;
1822 	struct trace_array_cpu *data = per_cpu_ptr(trace_buf->data, cpu);
1823 	struct trace_array_cpu *max_data = per_cpu_ptr(max_buf->data, cpu);
1824 
1825 	max_buf->cpu = cpu;
1826 	max_buf->time_start = data->preempt_timestamp;
1827 
1828 	max_data->saved_latency = tr->max_latency;
1829 	max_data->critical_start = data->critical_start;
1830 	max_data->critical_end = data->critical_end;
1831 
1832 	strncpy(max_data->comm, tsk->comm, TASK_COMM_LEN);
1833 	max_data->pid = tsk->pid;
1834 	/*
1835 	 * If tsk == current, then use current_uid(), as that does not use
1836 	 * RCU. The irq tracer can be called out of RCU scope.
1837 	 */
1838 	if (tsk == current)
1839 		max_data->uid = current_uid();
1840 	else
1841 		max_data->uid = task_uid(tsk);
1842 
1843 	max_data->nice = tsk->static_prio - 20 - MAX_RT_PRIO;
1844 	max_data->policy = tsk->policy;
1845 	max_data->rt_priority = tsk->rt_priority;
1846 
1847 	/* record this tasks comm */
1848 	tracing_record_cmdline(tsk);
1849 	latency_fsnotify(tr);
1850 }
1851 
1852 /**
1853  * update_max_tr - snapshot all trace buffers from global_trace to max_tr
1854  * @tr: tracer
1855  * @tsk: the task with the latency
1856  * @cpu: The cpu that initiated the trace.
1857  * @cond_data: User data associated with a conditional snapshot
1858  *
1859  * Flip the buffers between the @tr and the max_tr and record information
1860  * about which task was the cause of this latency.
1861  */
1862 void
1863 update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu,
1864 	      void *cond_data)
1865 {
1866 	if (tr->stop_count)
1867 		return;
1868 
1869 	WARN_ON_ONCE(!irqs_disabled());
1870 
1871 	if (!tr->allocated_snapshot) {
1872 		/* Only the nop tracer should hit this when disabling */
1873 		WARN_ON_ONCE(tr->current_trace != &nop_trace);
1874 		return;
1875 	}
1876 
1877 	arch_spin_lock(&tr->max_lock);
1878 
1879 	/* Inherit the recordable setting from array_buffer */
1880 	if (ring_buffer_record_is_set_on(tr->array_buffer.buffer))
1881 		ring_buffer_record_on(tr->max_buffer.buffer);
1882 	else
1883 		ring_buffer_record_off(tr->max_buffer.buffer);
1884 
1885 #ifdef CONFIG_TRACER_SNAPSHOT
1886 	if (tr->cond_snapshot && !tr->cond_snapshot->update(tr, cond_data)) {
1887 		arch_spin_unlock(&tr->max_lock);
1888 		return;
1889 	}
1890 #endif
1891 	swap(tr->array_buffer.buffer, tr->max_buffer.buffer);
1892 
1893 	__update_max_tr(tr, tsk, cpu);
1894 
1895 	arch_spin_unlock(&tr->max_lock);
1896 }
1897 
1898 /**
1899  * update_max_tr_single - only copy one trace over, and reset the rest
1900  * @tr: tracer
1901  * @tsk: task with the latency
1902  * @cpu: the cpu of the buffer to copy.
1903  *
1904  * Flip the trace of a single CPU buffer between the @tr and the max_tr.
1905  */
1906 void
1907 update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu)
1908 {
1909 	int ret;
1910 
1911 	if (tr->stop_count)
1912 		return;
1913 
1914 	WARN_ON_ONCE(!irqs_disabled());
1915 	if (!tr->allocated_snapshot) {
1916 		/* Only the nop tracer should hit this when disabling */
1917 		WARN_ON_ONCE(tr->current_trace != &nop_trace);
1918 		return;
1919 	}
1920 
1921 	arch_spin_lock(&tr->max_lock);
1922 
1923 	ret = ring_buffer_swap_cpu(tr->max_buffer.buffer, tr->array_buffer.buffer, cpu);
1924 
1925 	if (ret == -EBUSY) {
1926 		/*
1927 		 * We failed to swap the buffer due to a commit taking
1928 		 * place on this CPU. We fail to record, but we reset
1929 		 * the max trace buffer (no one writes directly to it)
1930 		 * and flag that it failed.
1931 		 * Another reason is resize is in progress.
1932 		 */
1933 		trace_array_printk_buf(tr->max_buffer.buffer, _THIS_IP_,
1934 			"Failed to swap buffers due to commit or resize in progress\n");
1935 	}
1936 
1937 	WARN_ON_ONCE(ret && ret != -EAGAIN && ret != -EBUSY);
1938 
1939 	__update_max_tr(tr, tsk, cpu);
1940 	arch_spin_unlock(&tr->max_lock);
1941 }
1942 
1943 #endif /* CONFIG_TRACER_MAX_TRACE */
1944 
1945 static int wait_on_pipe(struct trace_iterator *iter, int full)
1946 {
1947 	/* Iterators are static, they should be filled or empty */
1948 	if (trace_buffer_iter(iter, iter->cpu_file))
1949 		return 0;
1950 
1951 	return ring_buffer_wait(iter->array_buffer->buffer, iter->cpu_file,
1952 				full);
1953 }
1954 
1955 #ifdef CONFIG_FTRACE_STARTUP_TEST
1956 static bool selftests_can_run;
1957 
1958 struct trace_selftests {
1959 	struct list_head		list;
1960 	struct tracer			*type;
1961 };
1962 
1963 static LIST_HEAD(postponed_selftests);
1964 
1965 static int save_selftest(struct tracer *type)
1966 {
1967 	struct trace_selftests *selftest;
1968 
1969 	selftest = kmalloc(sizeof(*selftest), GFP_KERNEL);
1970 	if (!selftest)
1971 		return -ENOMEM;
1972 
1973 	selftest->type = type;
1974 	list_add(&selftest->list, &postponed_selftests);
1975 	return 0;
1976 }
1977 
1978 static int run_tracer_selftest(struct tracer *type)
1979 {
1980 	struct trace_array *tr = &global_trace;
1981 	struct tracer *saved_tracer = tr->current_trace;
1982 	int ret;
1983 
1984 	if (!type->selftest || tracing_selftest_disabled)
1985 		return 0;
1986 
1987 	/*
1988 	 * If a tracer registers early in boot up (before scheduling is
1989 	 * initialized and such), then do not run its selftests yet.
1990 	 * Instead, run it a little later in the boot process.
1991 	 */
1992 	if (!selftests_can_run)
1993 		return save_selftest(type);
1994 
1995 	if (!tracing_is_on()) {
1996 		pr_warn("Selftest for tracer %s skipped due to tracing disabled\n",
1997 			type->name);
1998 		return 0;
1999 	}
2000 
2001 	/*
2002 	 * Run a selftest on this tracer.
2003 	 * Here we reset the trace buffer, and set the current
2004 	 * tracer to be this tracer. The tracer can then run some
2005 	 * internal tracing to verify that everything is in order.
2006 	 * If we fail, we do not register this tracer.
2007 	 */
2008 	tracing_reset_online_cpus(&tr->array_buffer);
2009 
2010 	tr->current_trace = type;
2011 
2012 #ifdef CONFIG_TRACER_MAX_TRACE
2013 	if (type->use_max_tr) {
2014 		/* If we expanded the buffers, make sure the max is expanded too */
2015 		if (ring_buffer_expanded)
2016 			ring_buffer_resize(tr->max_buffer.buffer, trace_buf_size,
2017 					   RING_BUFFER_ALL_CPUS);
2018 		tr->allocated_snapshot = true;
2019 	}
2020 #endif
2021 
2022 	/* the test is responsible for initializing and enabling */
2023 	pr_info("Testing tracer %s: ", type->name);
2024 	ret = type->selftest(type, tr);
2025 	/* the test is responsible for resetting too */
2026 	tr->current_trace = saved_tracer;
2027 	if (ret) {
2028 		printk(KERN_CONT "FAILED!\n");
2029 		/* Add the warning after printing 'FAILED' */
2030 		WARN_ON(1);
2031 		return -1;
2032 	}
2033 	/* Only reset on passing, to avoid touching corrupted buffers */
2034 	tracing_reset_online_cpus(&tr->array_buffer);
2035 
2036 #ifdef CONFIG_TRACER_MAX_TRACE
2037 	if (type->use_max_tr) {
2038 		tr->allocated_snapshot = false;
2039 
2040 		/* Shrink the max buffer again */
2041 		if (ring_buffer_expanded)
2042 			ring_buffer_resize(tr->max_buffer.buffer, 1,
2043 					   RING_BUFFER_ALL_CPUS);
2044 	}
2045 #endif
2046 
2047 	printk(KERN_CONT "PASSED\n");
2048 	return 0;
2049 }
2050 
2051 static int do_run_tracer_selftest(struct tracer *type)
2052 {
2053 	int ret;
2054 
2055 	/*
2056 	 * Tests can take a long time, especially if they are run one after the
2057 	 * other, as does happen during bootup when all the tracers are
2058 	 * registered. This could cause the soft lockup watchdog to trigger.
2059 	 */
2060 	cond_resched();
2061 
2062 	tracing_selftest_running = true;
2063 	ret = run_tracer_selftest(type);
2064 	tracing_selftest_running = false;
2065 
2066 	return ret;
2067 }
2068 
2069 static __init int init_trace_selftests(void)
2070 {
2071 	struct trace_selftests *p, *n;
2072 	struct tracer *t, **last;
2073 	int ret;
2074 
2075 	selftests_can_run = true;
2076 
2077 	mutex_lock(&trace_types_lock);
2078 
2079 	if (list_empty(&postponed_selftests))
2080 		goto out;
2081 
2082 	pr_info("Running postponed tracer tests:\n");
2083 
2084 	tracing_selftest_running = true;
2085 	list_for_each_entry_safe(p, n, &postponed_selftests, list) {
2086 		/* This loop can take minutes when sanitizers are enabled, so
2087 		 * lets make sure we allow RCU processing.
2088 		 */
2089 		cond_resched();
2090 		ret = run_tracer_selftest(p->type);
2091 		/* If the test fails, then warn and remove from available_tracers */
2092 		if (ret < 0) {
2093 			WARN(1, "tracer: %s failed selftest, disabling\n",
2094 			     p->type->name);
2095 			last = &trace_types;
2096 			for (t = trace_types; t; t = t->next) {
2097 				if (t == p->type) {
2098 					*last = t->next;
2099 					break;
2100 				}
2101 				last = &t->next;
2102 			}
2103 		}
2104 		list_del(&p->list);
2105 		kfree(p);
2106 	}
2107 	tracing_selftest_running = false;
2108 
2109  out:
2110 	mutex_unlock(&trace_types_lock);
2111 
2112 	return 0;
2113 }
2114 core_initcall(init_trace_selftests);
2115 #else
2116 static inline int run_tracer_selftest(struct tracer *type)
2117 {
2118 	return 0;
2119 }
2120 static inline int do_run_tracer_selftest(struct tracer *type)
2121 {
2122 	return 0;
2123 }
2124 #endif /* CONFIG_FTRACE_STARTUP_TEST */
2125 
2126 static void add_tracer_options(struct trace_array *tr, struct tracer *t);
2127 
2128 static void __init apply_trace_boot_options(void);
2129 
2130 /**
2131  * register_tracer - register a tracer with the ftrace system.
2132  * @type: the plugin for the tracer
2133  *
2134  * Register a new plugin tracer.
2135  */
2136 int __init register_tracer(struct tracer *type)
2137 {
2138 	struct tracer *t;
2139 	int ret = 0;
2140 
2141 	if (!type->name) {
2142 		pr_info("Tracer must have a name\n");
2143 		return -1;
2144 	}
2145 
2146 	if (strlen(type->name) >= MAX_TRACER_SIZE) {
2147 		pr_info("Tracer has a name longer than %d\n", MAX_TRACER_SIZE);
2148 		return -1;
2149 	}
2150 
2151 	if (security_locked_down(LOCKDOWN_TRACEFS)) {
2152 		pr_warn("Can not register tracer %s due to lockdown\n",
2153 			   type->name);
2154 		return -EPERM;
2155 	}
2156 
2157 	mutex_lock(&trace_types_lock);
2158 
2159 	for (t = trace_types; t; t = t->next) {
2160 		if (strcmp(type->name, t->name) == 0) {
2161 			/* already found */
2162 			pr_info("Tracer %s already registered\n",
2163 				type->name);
2164 			ret = -1;
2165 			goto out;
2166 		}
2167 	}
2168 
2169 	if (!type->set_flag)
2170 		type->set_flag = &dummy_set_flag;
2171 	if (!type->flags) {
2172 		/*allocate a dummy tracer_flags*/
2173 		type->flags = kmalloc(sizeof(*type->flags), GFP_KERNEL);
2174 		if (!type->flags) {
2175 			ret = -ENOMEM;
2176 			goto out;
2177 		}
2178 		type->flags->val = 0;
2179 		type->flags->opts = dummy_tracer_opt;
2180 	} else
2181 		if (!type->flags->opts)
2182 			type->flags->opts = dummy_tracer_opt;
2183 
2184 	/* store the tracer for __set_tracer_option */
2185 	type->flags->trace = type;
2186 
2187 	ret = do_run_tracer_selftest(type);
2188 	if (ret < 0)
2189 		goto out;
2190 
2191 	type->next = trace_types;
2192 	trace_types = type;
2193 	add_tracer_options(&global_trace, type);
2194 
2195  out:
2196 	mutex_unlock(&trace_types_lock);
2197 
2198 	if (ret || !default_bootup_tracer)
2199 		goto out_unlock;
2200 
2201 	if (strncmp(default_bootup_tracer, type->name, MAX_TRACER_SIZE))
2202 		goto out_unlock;
2203 
2204 	printk(KERN_INFO "Starting tracer '%s'\n", type->name);
2205 	/* Do we want this tracer to start on bootup? */
2206 	tracing_set_tracer(&global_trace, type->name);
2207 	default_bootup_tracer = NULL;
2208 
2209 	apply_trace_boot_options();
2210 
2211 	/* disable other selftests, since this will break it. */
2212 	disable_tracing_selftest("running a tracer");
2213 
2214  out_unlock:
2215 	return ret;
2216 }
2217 
2218 static void tracing_reset_cpu(struct array_buffer *buf, int cpu)
2219 {
2220 	struct trace_buffer *buffer = buf->buffer;
2221 
2222 	if (!buffer)
2223 		return;
2224 
2225 	ring_buffer_record_disable(buffer);
2226 
2227 	/* Make sure all commits have finished */
2228 	synchronize_rcu();
2229 	ring_buffer_reset_cpu(buffer, cpu);
2230 
2231 	ring_buffer_record_enable(buffer);
2232 }
2233 
2234 void tracing_reset_online_cpus(struct array_buffer *buf)
2235 {
2236 	struct trace_buffer *buffer = buf->buffer;
2237 
2238 	if (!buffer)
2239 		return;
2240 
2241 	ring_buffer_record_disable(buffer);
2242 
2243 	/* Make sure all commits have finished */
2244 	synchronize_rcu();
2245 
2246 	buf->time_start = buffer_ftrace_now(buf, buf->cpu);
2247 
2248 	ring_buffer_reset_online_cpus(buffer);
2249 
2250 	ring_buffer_record_enable(buffer);
2251 }
2252 
2253 /* Must have trace_types_lock held */
2254 void tracing_reset_all_online_cpus_unlocked(void)
2255 {
2256 	struct trace_array *tr;
2257 
2258 	lockdep_assert_held(&trace_types_lock);
2259 
2260 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
2261 		if (!tr->clear_trace)
2262 			continue;
2263 		tr->clear_trace = false;
2264 		tracing_reset_online_cpus(&tr->array_buffer);
2265 #ifdef CONFIG_TRACER_MAX_TRACE
2266 		tracing_reset_online_cpus(&tr->max_buffer);
2267 #endif
2268 	}
2269 }
2270 
2271 void tracing_reset_all_online_cpus(void)
2272 {
2273 	mutex_lock(&trace_types_lock);
2274 	tracing_reset_all_online_cpus_unlocked();
2275 	mutex_unlock(&trace_types_lock);
2276 }
2277 
2278 /*
2279  * The tgid_map array maps from pid to tgid; i.e. the value stored at index i
2280  * is the tgid last observed corresponding to pid=i.
2281  */
2282 static int *tgid_map;
2283 
2284 /* The maximum valid index into tgid_map. */
2285 static size_t tgid_map_max;
2286 
2287 #define SAVED_CMDLINES_DEFAULT 128
2288 #define NO_CMDLINE_MAP UINT_MAX
2289 /*
2290  * Preemption must be disabled before acquiring trace_cmdline_lock.
2291  * The various trace_arrays' max_lock must be acquired in a context
2292  * where interrupt is disabled.
2293  */
2294 static arch_spinlock_t trace_cmdline_lock = __ARCH_SPIN_LOCK_UNLOCKED;
2295 struct saved_cmdlines_buffer {
2296 	unsigned map_pid_to_cmdline[PID_MAX_DEFAULT+1];
2297 	unsigned *map_cmdline_to_pid;
2298 	unsigned cmdline_num;
2299 	int cmdline_idx;
2300 	char *saved_cmdlines;
2301 };
2302 static struct saved_cmdlines_buffer *savedcmd;
2303 
2304 static inline char *get_saved_cmdlines(int idx)
2305 {
2306 	return &savedcmd->saved_cmdlines[idx * TASK_COMM_LEN];
2307 }
2308 
2309 static inline void set_cmdline(int idx, const char *cmdline)
2310 {
2311 	strncpy(get_saved_cmdlines(idx), cmdline, TASK_COMM_LEN);
2312 }
2313 
2314 static int allocate_cmdlines_buffer(unsigned int val,
2315 				    struct saved_cmdlines_buffer *s)
2316 {
2317 	s->map_cmdline_to_pid = kmalloc_array(val,
2318 					      sizeof(*s->map_cmdline_to_pid),
2319 					      GFP_KERNEL);
2320 	if (!s->map_cmdline_to_pid)
2321 		return -ENOMEM;
2322 
2323 	s->saved_cmdlines = kmalloc_array(TASK_COMM_LEN, val, GFP_KERNEL);
2324 	if (!s->saved_cmdlines) {
2325 		kfree(s->map_cmdline_to_pid);
2326 		return -ENOMEM;
2327 	}
2328 
2329 	s->cmdline_idx = 0;
2330 	s->cmdline_num = val;
2331 	memset(&s->map_pid_to_cmdline, NO_CMDLINE_MAP,
2332 	       sizeof(s->map_pid_to_cmdline));
2333 	memset(s->map_cmdline_to_pid, NO_CMDLINE_MAP,
2334 	       val * sizeof(*s->map_cmdline_to_pid));
2335 
2336 	return 0;
2337 }
2338 
2339 static int trace_create_savedcmd(void)
2340 {
2341 	int ret;
2342 
2343 	savedcmd = kmalloc(sizeof(*savedcmd), GFP_KERNEL);
2344 	if (!savedcmd)
2345 		return -ENOMEM;
2346 
2347 	ret = allocate_cmdlines_buffer(SAVED_CMDLINES_DEFAULT, savedcmd);
2348 	if (ret < 0) {
2349 		kfree(savedcmd);
2350 		savedcmd = NULL;
2351 		return -ENOMEM;
2352 	}
2353 
2354 	return 0;
2355 }
2356 
2357 int is_tracing_stopped(void)
2358 {
2359 	return global_trace.stop_count;
2360 }
2361 
2362 /**
2363  * tracing_start - quick start of the tracer
2364  *
2365  * If tracing is enabled but was stopped by tracing_stop,
2366  * this will start the tracer back up.
2367  */
2368 void tracing_start(void)
2369 {
2370 	struct trace_buffer *buffer;
2371 	unsigned long flags;
2372 
2373 	if (tracing_disabled)
2374 		return;
2375 
2376 	raw_spin_lock_irqsave(&global_trace.start_lock, flags);
2377 	if (--global_trace.stop_count) {
2378 		if (global_trace.stop_count < 0) {
2379 			/* Someone screwed up their debugging */
2380 			WARN_ON_ONCE(1);
2381 			global_trace.stop_count = 0;
2382 		}
2383 		goto out;
2384 	}
2385 
2386 	/* Prevent the buffers from switching */
2387 	arch_spin_lock(&global_trace.max_lock);
2388 
2389 	buffer = global_trace.array_buffer.buffer;
2390 	if (buffer)
2391 		ring_buffer_record_enable(buffer);
2392 
2393 #ifdef CONFIG_TRACER_MAX_TRACE
2394 	buffer = global_trace.max_buffer.buffer;
2395 	if (buffer)
2396 		ring_buffer_record_enable(buffer);
2397 #endif
2398 
2399 	arch_spin_unlock(&global_trace.max_lock);
2400 
2401  out:
2402 	raw_spin_unlock_irqrestore(&global_trace.start_lock, flags);
2403 }
2404 
2405 static void tracing_start_tr(struct trace_array *tr)
2406 {
2407 	struct trace_buffer *buffer;
2408 	unsigned long flags;
2409 
2410 	if (tracing_disabled)
2411 		return;
2412 
2413 	/* If global, we need to also start the max tracer */
2414 	if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
2415 		return tracing_start();
2416 
2417 	raw_spin_lock_irqsave(&tr->start_lock, flags);
2418 
2419 	if (--tr->stop_count) {
2420 		if (tr->stop_count < 0) {
2421 			/* Someone screwed up their debugging */
2422 			WARN_ON_ONCE(1);
2423 			tr->stop_count = 0;
2424 		}
2425 		goto out;
2426 	}
2427 
2428 	buffer = tr->array_buffer.buffer;
2429 	if (buffer)
2430 		ring_buffer_record_enable(buffer);
2431 
2432  out:
2433 	raw_spin_unlock_irqrestore(&tr->start_lock, flags);
2434 }
2435 
2436 /**
2437  * tracing_stop - quick stop of the tracer
2438  *
2439  * Light weight way to stop tracing. Use in conjunction with
2440  * tracing_start.
2441  */
2442 void tracing_stop(void)
2443 {
2444 	struct trace_buffer *buffer;
2445 	unsigned long flags;
2446 
2447 	raw_spin_lock_irqsave(&global_trace.start_lock, flags);
2448 	if (global_trace.stop_count++)
2449 		goto out;
2450 
2451 	/* Prevent the buffers from switching */
2452 	arch_spin_lock(&global_trace.max_lock);
2453 
2454 	buffer = global_trace.array_buffer.buffer;
2455 	if (buffer)
2456 		ring_buffer_record_disable(buffer);
2457 
2458 #ifdef CONFIG_TRACER_MAX_TRACE
2459 	buffer = global_trace.max_buffer.buffer;
2460 	if (buffer)
2461 		ring_buffer_record_disable(buffer);
2462 #endif
2463 
2464 	arch_spin_unlock(&global_trace.max_lock);
2465 
2466  out:
2467 	raw_spin_unlock_irqrestore(&global_trace.start_lock, flags);
2468 }
2469 
2470 static void tracing_stop_tr(struct trace_array *tr)
2471 {
2472 	struct trace_buffer *buffer;
2473 	unsigned long flags;
2474 
2475 	/* If global, we need to also stop the max tracer */
2476 	if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
2477 		return tracing_stop();
2478 
2479 	raw_spin_lock_irqsave(&tr->start_lock, flags);
2480 	if (tr->stop_count++)
2481 		goto out;
2482 
2483 	buffer = tr->array_buffer.buffer;
2484 	if (buffer)
2485 		ring_buffer_record_disable(buffer);
2486 
2487  out:
2488 	raw_spin_unlock_irqrestore(&tr->start_lock, flags);
2489 }
2490 
2491 static int trace_save_cmdline(struct task_struct *tsk)
2492 {
2493 	unsigned tpid, idx;
2494 
2495 	/* treat recording of idle task as a success */
2496 	if (!tsk->pid)
2497 		return 1;
2498 
2499 	tpid = tsk->pid & (PID_MAX_DEFAULT - 1);
2500 
2501 	/*
2502 	 * It's not the end of the world if we don't get
2503 	 * the lock, but we also don't want to spin
2504 	 * nor do we want to disable interrupts,
2505 	 * so if we miss here, then better luck next time.
2506 	 *
2507 	 * This is called within the scheduler and wake up, so interrupts
2508 	 * had better been disabled and run queue lock been held.
2509 	 */
2510 	lockdep_assert_preemption_disabled();
2511 	if (!arch_spin_trylock(&trace_cmdline_lock))
2512 		return 0;
2513 
2514 	idx = savedcmd->map_pid_to_cmdline[tpid];
2515 	if (idx == NO_CMDLINE_MAP) {
2516 		idx = (savedcmd->cmdline_idx + 1) % savedcmd->cmdline_num;
2517 
2518 		savedcmd->map_pid_to_cmdline[tpid] = idx;
2519 		savedcmd->cmdline_idx = idx;
2520 	}
2521 
2522 	savedcmd->map_cmdline_to_pid[idx] = tsk->pid;
2523 	set_cmdline(idx, tsk->comm);
2524 
2525 	arch_spin_unlock(&trace_cmdline_lock);
2526 
2527 	return 1;
2528 }
2529 
2530 static void __trace_find_cmdline(int pid, char comm[])
2531 {
2532 	unsigned map;
2533 	int tpid;
2534 
2535 	if (!pid) {
2536 		strcpy(comm, "<idle>");
2537 		return;
2538 	}
2539 
2540 	if (WARN_ON_ONCE(pid < 0)) {
2541 		strcpy(comm, "<XXX>");
2542 		return;
2543 	}
2544 
2545 	tpid = pid & (PID_MAX_DEFAULT - 1);
2546 	map = savedcmd->map_pid_to_cmdline[tpid];
2547 	if (map != NO_CMDLINE_MAP) {
2548 		tpid = savedcmd->map_cmdline_to_pid[map];
2549 		if (tpid == pid) {
2550 			strscpy(comm, get_saved_cmdlines(map), TASK_COMM_LEN);
2551 			return;
2552 		}
2553 	}
2554 	strcpy(comm, "<...>");
2555 }
2556 
2557 void trace_find_cmdline(int pid, char comm[])
2558 {
2559 	preempt_disable();
2560 	arch_spin_lock(&trace_cmdline_lock);
2561 
2562 	__trace_find_cmdline(pid, comm);
2563 
2564 	arch_spin_unlock(&trace_cmdline_lock);
2565 	preempt_enable();
2566 }
2567 
2568 static int *trace_find_tgid_ptr(int pid)
2569 {
2570 	/*
2571 	 * Pairs with the smp_store_release in set_tracer_flag() to ensure that
2572 	 * if we observe a non-NULL tgid_map then we also observe the correct
2573 	 * tgid_map_max.
2574 	 */
2575 	int *map = smp_load_acquire(&tgid_map);
2576 
2577 	if (unlikely(!map || pid > tgid_map_max))
2578 		return NULL;
2579 
2580 	return &map[pid];
2581 }
2582 
2583 int trace_find_tgid(int pid)
2584 {
2585 	int *ptr = trace_find_tgid_ptr(pid);
2586 
2587 	return ptr ? *ptr : 0;
2588 }
2589 
2590 static int trace_save_tgid(struct task_struct *tsk)
2591 {
2592 	int *ptr;
2593 
2594 	/* treat recording of idle task as a success */
2595 	if (!tsk->pid)
2596 		return 1;
2597 
2598 	ptr = trace_find_tgid_ptr(tsk->pid);
2599 	if (!ptr)
2600 		return 0;
2601 
2602 	*ptr = tsk->tgid;
2603 	return 1;
2604 }
2605 
2606 static bool tracing_record_taskinfo_skip(int flags)
2607 {
2608 	if (unlikely(!(flags & (TRACE_RECORD_CMDLINE | TRACE_RECORD_TGID))))
2609 		return true;
2610 	if (!__this_cpu_read(trace_taskinfo_save))
2611 		return true;
2612 	return false;
2613 }
2614 
2615 /**
2616  * tracing_record_taskinfo - record the task info of a task
2617  *
2618  * @task:  task to record
2619  * @flags: TRACE_RECORD_CMDLINE for recording comm
2620  *         TRACE_RECORD_TGID for recording tgid
2621  */
2622 void tracing_record_taskinfo(struct task_struct *task, int flags)
2623 {
2624 	bool done;
2625 
2626 	if (tracing_record_taskinfo_skip(flags))
2627 		return;
2628 
2629 	/*
2630 	 * Record as much task information as possible. If some fail, continue
2631 	 * to try to record the others.
2632 	 */
2633 	done = !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(task);
2634 	done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(task);
2635 
2636 	/* If recording any information failed, retry again soon. */
2637 	if (!done)
2638 		return;
2639 
2640 	__this_cpu_write(trace_taskinfo_save, false);
2641 }
2642 
2643 /**
2644  * tracing_record_taskinfo_sched_switch - record task info for sched_switch
2645  *
2646  * @prev: previous task during sched_switch
2647  * @next: next task during sched_switch
2648  * @flags: TRACE_RECORD_CMDLINE for recording comm
2649  *         TRACE_RECORD_TGID for recording tgid
2650  */
2651 void tracing_record_taskinfo_sched_switch(struct task_struct *prev,
2652 					  struct task_struct *next, int flags)
2653 {
2654 	bool done;
2655 
2656 	if (tracing_record_taskinfo_skip(flags))
2657 		return;
2658 
2659 	/*
2660 	 * Record as much task information as possible. If some fail, continue
2661 	 * to try to record the others.
2662 	 */
2663 	done  = !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(prev);
2664 	done &= !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(next);
2665 	done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(prev);
2666 	done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(next);
2667 
2668 	/* If recording any information failed, retry again soon. */
2669 	if (!done)
2670 		return;
2671 
2672 	__this_cpu_write(trace_taskinfo_save, false);
2673 }
2674 
2675 /* Helpers to record a specific task information */
2676 void tracing_record_cmdline(struct task_struct *task)
2677 {
2678 	tracing_record_taskinfo(task, TRACE_RECORD_CMDLINE);
2679 }
2680 
2681 void tracing_record_tgid(struct task_struct *task)
2682 {
2683 	tracing_record_taskinfo(task, TRACE_RECORD_TGID);
2684 }
2685 
2686 /*
2687  * Several functions return TRACE_TYPE_PARTIAL_LINE if the trace_seq
2688  * overflowed, and TRACE_TYPE_HANDLED otherwise. This helper function
2689  * simplifies those functions and keeps them in sync.
2690  */
2691 enum print_line_t trace_handle_return(struct trace_seq *s)
2692 {
2693 	return trace_seq_has_overflowed(s) ?
2694 		TRACE_TYPE_PARTIAL_LINE : TRACE_TYPE_HANDLED;
2695 }
2696 EXPORT_SYMBOL_GPL(trace_handle_return);
2697 
2698 static unsigned short migration_disable_value(void)
2699 {
2700 #if defined(CONFIG_SMP)
2701 	return current->migration_disabled;
2702 #else
2703 	return 0;
2704 #endif
2705 }
2706 
2707 unsigned int tracing_gen_ctx_irq_test(unsigned int irqs_status)
2708 {
2709 	unsigned int trace_flags = irqs_status;
2710 	unsigned int pc;
2711 
2712 	pc = preempt_count();
2713 
2714 	if (pc & NMI_MASK)
2715 		trace_flags |= TRACE_FLAG_NMI;
2716 	if (pc & HARDIRQ_MASK)
2717 		trace_flags |= TRACE_FLAG_HARDIRQ;
2718 	if (in_serving_softirq())
2719 		trace_flags |= TRACE_FLAG_SOFTIRQ;
2720 	if (softirq_count() >> (SOFTIRQ_SHIFT + 1))
2721 		trace_flags |= TRACE_FLAG_BH_OFF;
2722 
2723 	if (tif_need_resched())
2724 		trace_flags |= TRACE_FLAG_NEED_RESCHED;
2725 	if (test_preempt_need_resched())
2726 		trace_flags |= TRACE_FLAG_PREEMPT_RESCHED;
2727 	return (trace_flags << 16) | (min_t(unsigned int, pc & 0xff, 0xf)) |
2728 		(min_t(unsigned int, migration_disable_value(), 0xf)) << 4;
2729 }
2730 
2731 struct ring_buffer_event *
2732 trace_buffer_lock_reserve(struct trace_buffer *buffer,
2733 			  int type,
2734 			  unsigned long len,
2735 			  unsigned int trace_ctx)
2736 {
2737 	return __trace_buffer_lock_reserve(buffer, type, len, trace_ctx);
2738 }
2739 
2740 DEFINE_PER_CPU(struct ring_buffer_event *, trace_buffered_event);
2741 DEFINE_PER_CPU(int, trace_buffered_event_cnt);
2742 static int trace_buffered_event_ref;
2743 
2744 /**
2745  * trace_buffered_event_enable - enable buffering events
2746  *
2747  * When events are being filtered, it is quicker to use a temporary
2748  * buffer to write the event data into if there's a likely chance
2749  * that it will not be committed. The discard of the ring buffer
2750  * is not as fast as committing, and is much slower than copying
2751  * a commit.
2752  *
2753  * When an event is to be filtered, allocate per cpu buffers to
2754  * write the event data into, and if the event is filtered and discarded
2755  * it is simply dropped, otherwise, the entire data is to be committed
2756  * in one shot.
2757  */
2758 void trace_buffered_event_enable(void)
2759 {
2760 	struct ring_buffer_event *event;
2761 	struct page *page;
2762 	int cpu;
2763 
2764 	WARN_ON_ONCE(!mutex_is_locked(&event_mutex));
2765 
2766 	if (trace_buffered_event_ref++)
2767 		return;
2768 
2769 	for_each_tracing_cpu(cpu) {
2770 		page = alloc_pages_node(cpu_to_node(cpu),
2771 					GFP_KERNEL | __GFP_NORETRY, 0);
2772 		if (!page)
2773 			goto failed;
2774 
2775 		event = page_address(page);
2776 		memset(event, 0, sizeof(*event));
2777 
2778 		per_cpu(trace_buffered_event, cpu) = event;
2779 
2780 		preempt_disable();
2781 		if (cpu == smp_processor_id() &&
2782 		    __this_cpu_read(trace_buffered_event) !=
2783 		    per_cpu(trace_buffered_event, cpu))
2784 			WARN_ON_ONCE(1);
2785 		preempt_enable();
2786 	}
2787 
2788 	return;
2789  failed:
2790 	trace_buffered_event_disable();
2791 }
2792 
2793 static void enable_trace_buffered_event(void *data)
2794 {
2795 	/* Probably not needed, but do it anyway */
2796 	smp_rmb();
2797 	this_cpu_dec(trace_buffered_event_cnt);
2798 }
2799 
2800 static void disable_trace_buffered_event(void *data)
2801 {
2802 	this_cpu_inc(trace_buffered_event_cnt);
2803 }
2804 
2805 /**
2806  * trace_buffered_event_disable - disable buffering events
2807  *
2808  * When a filter is removed, it is faster to not use the buffered
2809  * events, and to commit directly into the ring buffer. Free up
2810  * the temp buffers when there are no more users. This requires
2811  * special synchronization with current events.
2812  */
2813 void trace_buffered_event_disable(void)
2814 {
2815 	int cpu;
2816 
2817 	WARN_ON_ONCE(!mutex_is_locked(&event_mutex));
2818 
2819 	if (WARN_ON_ONCE(!trace_buffered_event_ref))
2820 		return;
2821 
2822 	if (--trace_buffered_event_ref)
2823 		return;
2824 
2825 	preempt_disable();
2826 	/* For each CPU, set the buffer as used. */
2827 	smp_call_function_many(tracing_buffer_mask,
2828 			       disable_trace_buffered_event, NULL, 1);
2829 	preempt_enable();
2830 
2831 	/* Wait for all current users to finish */
2832 	synchronize_rcu();
2833 
2834 	for_each_tracing_cpu(cpu) {
2835 		free_page((unsigned long)per_cpu(trace_buffered_event, cpu));
2836 		per_cpu(trace_buffered_event, cpu) = NULL;
2837 	}
2838 	/*
2839 	 * Make sure trace_buffered_event is NULL before clearing
2840 	 * trace_buffered_event_cnt.
2841 	 */
2842 	smp_wmb();
2843 
2844 	preempt_disable();
2845 	/* Do the work on each cpu */
2846 	smp_call_function_many(tracing_buffer_mask,
2847 			       enable_trace_buffered_event, NULL, 1);
2848 	preempt_enable();
2849 }
2850 
2851 static struct trace_buffer *temp_buffer;
2852 
2853 struct ring_buffer_event *
2854 trace_event_buffer_lock_reserve(struct trace_buffer **current_rb,
2855 			  struct trace_event_file *trace_file,
2856 			  int type, unsigned long len,
2857 			  unsigned int trace_ctx)
2858 {
2859 	struct ring_buffer_event *entry;
2860 	struct trace_array *tr = trace_file->tr;
2861 	int val;
2862 
2863 	*current_rb = tr->array_buffer.buffer;
2864 
2865 	if (!tr->no_filter_buffering_ref &&
2866 	    (trace_file->flags & (EVENT_FILE_FL_SOFT_DISABLED | EVENT_FILE_FL_FILTERED))) {
2867 		preempt_disable_notrace();
2868 		/*
2869 		 * Filtering is on, so try to use the per cpu buffer first.
2870 		 * This buffer will simulate a ring_buffer_event,
2871 		 * where the type_len is zero and the array[0] will
2872 		 * hold the full length.
2873 		 * (see include/linux/ring-buffer.h for details on
2874 		 *  how the ring_buffer_event is structured).
2875 		 *
2876 		 * Using a temp buffer during filtering and copying it
2877 		 * on a matched filter is quicker than writing directly
2878 		 * into the ring buffer and then discarding it when
2879 		 * it doesn't match. That is because the discard
2880 		 * requires several atomic operations to get right.
2881 		 * Copying on match and doing nothing on a failed match
2882 		 * is still quicker than no copy on match, but having
2883 		 * to discard out of the ring buffer on a failed match.
2884 		 */
2885 		if ((entry = __this_cpu_read(trace_buffered_event))) {
2886 			int max_len = PAGE_SIZE - struct_size(entry, array, 1);
2887 
2888 			val = this_cpu_inc_return(trace_buffered_event_cnt);
2889 
2890 			/*
2891 			 * Preemption is disabled, but interrupts and NMIs
2892 			 * can still come in now. If that happens after
2893 			 * the above increment, then it will have to go
2894 			 * back to the old method of allocating the event
2895 			 * on the ring buffer, and if the filter fails, it
2896 			 * will have to call ring_buffer_discard_commit()
2897 			 * to remove it.
2898 			 *
2899 			 * Need to also check the unlikely case that the
2900 			 * length is bigger than the temp buffer size.
2901 			 * If that happens, then the reserve is pretty much
2902 			 * guaranteed to fail, as the ring buffer currently
2903 			 * only allows events less than a page. But that may
2904 			 * change in the future, so let the ring buffer reserve
2905 			 * handle the failure in that case.
2906 			 */
2907 			if (val == 1 && likely(len <= max_len)) {
2908 				trace_event_setup(entry, type, trace_ctx);
2909 				entry->array[0] = len;
2910 				/* Return with preemption disabled */
2911 				return entry;
2912 			}
2913 			this_cpu_dec(trace_buffered_event_cnt);
2914 		}
2915 		/* __trace_buffer_lock_reserve() disables preemption */
2916 		preempt_enable_notrace();
2917 	}
2918 
2919 	entry = __trace_buffer_lock_reserve(*current_rb, type, len,
2920 					    trace_ctx);
2921 	/*
2922 	 * If tracing is off, but we have triggers enabled
2923 	 * we still need to look at the event data. Use the temp_buffer
2924 	 * to store the trace event for the trigger to use. It's recursive
2925 	 * safe and will not be recorded anywhere.
2926 	 */
2927 	if (!entry && trace_file->flags & EVENT_FILE_FL_TRIGGER_COND) {
2928 		*current_rb = temp_buffer;
2929 		entry = __trace_buffer_lock_reserve(*current_rb, type, len,
2930 						    trace_ctx);
2931 	}
2932 	return entry;
2933 }
2934 EXPORT_SYMBOL_GPL(trace_event_buffer_lock_reserve);
2935 
2936 static DEFINE_RAW_SPINLOCK(tracepoint_iter_lock);
2937 static DEFINE_MUTEX(tracepoint_printk_mutex);
2938 
2939 static void output_printk(struct trace_event_buffer *fbuffer)
2940 {
2941 	struct trace_event_call *event_call;
2942 	struct trace_event_file *file;
2943 	struct trace_event *event;
2944 	unsigned long flags;
2945 	struct trace_iterator *iter = tracepoint_print_iter;
2946 
2947 	/* We should never get here if iter is NULL */
2948 	if (WARN_ON_ONCE(!iter))
2949 		return;
2950 
2951 	event_call = fbuffer->trace_file->event_call;
2952 	if (!event_call || !event_call->event.funcs ||
2953 	    !event_call->event.funcs->trace)
2954 		return;
2955 
2956 	file = fbuffer->trace_file;
2957 	if (test_bit(EVENT_FILE_FL_SOFT_DISABLED_BIT, &file->flags) ||
2958 	    (unlikely(file->flags & EVENT_FILE_FL_FILTERED) &&
2959 	     !filter_match_preds(file->filter, fbuffer->entry)))
2960 		return;
2961 
2962 	event = &fbuffer->trace_file->event_call->event;
2963 
2964 	raw_spin_lock_irqsave(&tracepoint_iter_lock, flags);
2965 	trace_seq_init(&iter->seq);
2966 	iter->ent = fbuffer->entry;
2967 	event_call->event.funcs->trace(iter, 0, event);
2968 	trace_seq_putc(&iter->seq, 0);
2969 	printk("%s", iter->seq.buffer);
2970 
2971 	raw_spin_unlock_irqrestore(&tracepoint_iter_lock, flags);
2972 }
2973 
2974 int tracepoint_printk_sysctl(struct ctl_table *table, int write,
2975 			     void *buffer, size_t *lenp,
2976 			     loff_t *ppos)
2977 {
2978 	int save_tracepoint_printk;
2979 	int ret;
2980 
2981 	mutex_lock(&tracepoint_printk_mutex);
2982 	save_tracepoint_printk = tracepoint_printk;
2983 
2984 	ret = proc_dointvec(table, write, buffer, lenp, ppos);
2985 
2986 	/*
2987 	 * This will force exiting early, as tracepoint_printk
2988 	 * is always zero when tracepoint_printk_iter is not allocated
2989 	 */
2990 	if (!tracepoint_print_iter)
2991 		tracepoint_printk = 0;
2992 
2993 	if (save_tracepoint_printk == tracepoint_printk)
2994 		goto out;
2995 
2996 	if (tracepoint_printk)
2997 		static_key_enable(&tracepoint_printk_key.key);
2998 	else
2999 		static_key_disable(&tracepoint_printk_key.key);
3000 
3001  out:
3002 	mutex_unlock(&tracepoint_printk_mutex);
3003 
3004 	return ret;
3005 }
3006 
3007 void trace_event_buffer_commit(struct trace_event_buffer *fbuffer)
3008 {
3009 	enum event_trigger_type tt = ETT_NONE;
3010 	struct trace_event_file *file = fbuffer->trace_file;
3011 
3012 	if (__event_trigger_test_discard(file, fbuffer->buffer, fbuffer->event,
3013 			fbuffer->entry, &tt))
3014 		goto discard;
3015 
3016 	if (static_key_false(&tracepoint_printk_key.key))
3017 		output_printk(fbuffer);
3018 
3019 	if (static_branch_unlikely(&trace_event_exports_enabled))
3020 		ftrace_exports(fbuffer->event, TRACE_EXPORT_EVENT);
3021 
3022 	trace_buffer_unlock_commit_regs(file->tr, fbuffer->buffer,
3023 			fbuffer->event, fbuffer->trace_ctx, fbuffer->regs);
3024 
3025 discard:
3026 	if (tt)
3027 		event_triggers_post_call(file, tt);
3028 
3029 }
3030 EXPORT_SYMBOL_GPL(trace_event_buffer_commit);
3031 
3032 /*
3033  * Skip 3:
3034  *
3035  *   trace_buffer_unlock_commit_regs()
3036  *   trace_event_buffer_commit()
3037  *   trace_event_raw_event_xxx()
3038  */
3039 # define STACK_SKIP 3
3040 
3041 void trace_buffer_unlock_commit_regs(struct trace_array *tr,
3042 				     struct trace_buffer *buffer,
3043 				     struct ring_buffer_event *event,
3044 				     unsigned int trace_ctx,
3045 				     struct pt_regs *regs)
3046 {
3047 	__buffer_unlock_commit(buffer, event);
3048 
3049 	/*
3050 	 * If regs is not set, then skip the necessary functions.
3051 	 * Note, we can still get here via blktrace, wakeup tracer
3052 	 * and mmiotrace, but that's ok if they lose a function or
3053 	 * two. They are not that meaningful.
3054 	 */
3055 	ftrace_trace_stack(tr, buffer, trace_ctx, regs ? 0 : STACK_SKIP, regs);
3056 	ftrace_trace_userstack(tr, buffer, trace_ctx);
3057 }
3058 
3059 /*
3060  * Similar to trace_buffer_unlock_commit_regs() but do not dump stack.
3061  */
3062 void
3063 trace_buffer_unlock_commit_nostack(struct trace_buffer *buffer,
3064 				   struct ring_buffer_event *event)
3065 {
3066 	__buffer_unlock_commit(buffer, event);
3067 }
3068 
3069 void
3070 trace_function(struct trace_array *tr, unsigned long ip, unsigned long
3071 	       parent_ip, unsigned int trace_ctx)
3072 {
3073 	struct trace_event_call *call = &event_function;
3074 	struct trace_buffer *buffer = tr->array_buffer.buffer;
3075 	struct ring_buffer_event *event;
3076 	struct ftrace_entry *entry;
3077 
3078 	event = __trace_buffer_lock_reserve(buffer, TRACE_FN, sizeof(*entry),
3079 					    trace_ctx);
3080 	if (!event)
3081 		return;
3082 	entry	= ring_buffer_event_data(event);
3083 	entry->ip			= ip;
3084 	entry->parent_ip		= parent_ip;
3085 
3086 	if (!call_filter_check_discard(call, entry, buffer, event)) {
3087 		if (static_branch_unlikely(&trace_function_exports_enabled))
3088 			ftrace_exports(event, TRACE_EXPORT_FUNCTION);
3089 		__buffer_unlock_commit(buffer, event);
3090 	}
3091 }
3092 
3093 #ifdef CONFIG_STACKTRACE
3094 
3095 /* Allow 4 levels of nesting: normal, softirq, irq, NMI */
3096 #define FTRACE_KSTACK_NESTING	4
3097 
3098 #define FTRACE_KSTACK_ENTRIES	(PAGE_SIZE / FTRACE_KSTACK_NESTING)
3099 
3100 struct ftrace_stack {
3101 	unsigned long		calls[FTRACE_KSTACK_ENTRIES];
3102 };
3103 
3104 
3105 struct ftrace_stacks {
3106 	struct ftrace_stack	stacks[FTRACE_KSTACK_NESTING];
3107 };
3108 
3109 static DEFINE_PER_CPU(struct ftrace_stacks, ftrace_stacks);
3110 static DEFINE_PER_CPU(int, ftrace_stack_reserve);
3111 
3112 static void __ftrace_trace_stack(struct trace_buffer *buffer,
3113 				 unsigned int trace_ctx,
3114 				 int skip, struct pt_regs *regs)
3115 {
3116 	struct trace_event_call *call = &event_kernel_stack;
3117 	struct ring_buffer_event *event;
3118 	unsigned int size, nr_entries;
3119 	struct ftrace_stack *fstack;
3120 	struct stack_entry *entry;
3121 	int stackidx;
3122 	void *ptr;
3123 
3124 	/*
3125 	 * Add one, for this function and the call to save_stack_trace()
3126 	 * If regs is set, then these functions will not be in the way.
3127 	 */
3128 #ifndef CONFIG_UNWINDER_ORC
3129 	if (!regs)
3130 		skip++;
3131 #endif
3132 
3133 	preempt_disable_notrace();
3134 
3135 	stackidx = __this_cpu_inc_return(ftrace_stack_reserve) - 1;
3136 
3137 	/* This should never happen. If it does, yell once and skip */
3138 	if (WARN_ON_ONCE(stackidx >= FTRACE_KSTACK_NESTING))
3139 		goto out;
3140 
3141 	/*
3142 	 * The above __this_cpu_inc_return() is 'atomic' cpu local. An
3143 	 * interrupt will either see the value pre increment or post
3144 	 * increment. If the interrupt happens pre increment it will have
3145 	 * restored the counter when it returns.  We just need a barrier to
3146 	 * keep gcc from moving things around.
3147 	 */
3148 	barrier();
3149 
3150 	fstack = this_cpu_ptr(ftrace_stacks.stacks) + stackidx;
3151 	size = ARRAY_SIZE(fstack->calls);
3152 
3153 	if (regs) {
3154 		nr_entries = stack_trace_save_regs(regs, fstack->calls,
3155 						   size, skip);
3156 	} else {
3157 		nr_entries = stack_trace_save(fstack->calls, size, skip);
3158 	}
3159 
3160 	size = nr_entries * sizeof(unsigned long);
3161 	event = __trace_buffer_lock_reserve(buffer, TRACE_STACK,
3162 				    (sizeof(*entry) - sizeof(entry->caller)) + size,
3163 				    trace_ctx);
3164 	if (!event)
3165 		goto out;
3166 	ptr = ring_buffer_event_data(event);
3167 	entry = ptr;
3168 
3169 	/*
3170 	 * For backward compatibility reasons, the entry->caller is an
3171 	 * array of 8 slots to store the stack. This is also exported
3172 	 * to user space. The amount allocated on the ring buffer actually
3173 	 * holds enough for the stack specified by nr_entries. This will
3174 	 * go into the location of entry->caller. Due to string fortifiers
3175 	 * checking the size of the destination of memcpy() it triggers
3176 	 * when it detects that size is greater than 8. To hide this from
3177 	 * the fortifiers, we use "ptr" and pointer arithmetic to assign caller.
3178 	 *
3179 	 * The below is really just:
3180 	 *   memcpy(&entry->caller, fstack->calls, size);
3181 	 */
3182 	ptr += offsetof(typeof(*entry), caller);
3183 	memcpy(ptr, fstack->calls, size);
3184 
3185 	entry->size = nr_entries;
3186 
3187 	if (!call_filter_check_discard(call, entry, buffer, event))
3188 		__buffer_unlock_commit(buffer, event);
3189 
3190  out:
3191 	/* Again, don't let gcc optimize things here */
3192 	barrier();
3193 	__this_cpu_dec(ftrace_stack_reserve);
3194 	preempt_enable_notrace();
3195 
3196 }
3197 
3198 static inline void ftrace_trace_stack(struct trace_array *tr,
3199 				      struct trace_buffer *buffer,
3200 				      unsigned int trace_ctx,
3201 				      int skip, struct pt_regs *regs)
3202 {
3203 	if (!(tr->trace_flags & TRACE_ITER_STACKTRACE))
3204 		return;
3205 
3206 	__ftrace_trace_stack(buffer, trace_ctx, skip, regs);
3207 }
3208 
3209 void __trace_stack(struct trace_array *tr, unsigned int trace_ctx,
3210 		   int skip)
3211 {
3212 	struct trace_buffer *buffer = tr->array_buffer.buffer;
3213 
3214 	if (rcu_is_watching()) {
3215 		__ftrace_trace_stack(buffer, trace_ctx, skip, NULL);
3216 		return;
3217 	}
3218 
3219 	if (WARN_ON_ONCE(IS_ENABLED(CONFIG_GENERIC_ENTRY)))
3220 		return;
3221 
3222 	/*
3223 	 * When an NMI triggers, RCU is enabled via ct_nmi_enter(),
3224 	 * but if the above rcu_is_watching() failed, then the NMI
3225 	 * triggered someplace critical, and ct_irq_enter() should
3226 	 * not be called from NMI.
3227 	 */
3228 	if (unlikely(in_nmi()))
3229 		return;
3230 
3231 	ct_irq_enter_irqson();
3232 	__ftrace_trace_stack(buffer, trace_ctx, skip, NULL);
3233 	ct_irq_exit_irqson();
3234 }
3235 
3236 /**
3237  * trace_dump_stack - record a stack back trace in the trace buffer
3238  * @skip: Number of functions to skip (helper handlers)
3239  */
3240 void trace_dump_stack(int skip)
3241 {
3242 	if (tracing_disabled || tracing_selftest_running)
3243 		return;
3244 
3245 #ifndef CONFIG_UNWINDER_ORC
3246 	/* Skip 1 to skip this function. */
3247 	skip++;
3248 #endif
3249 	__ftrace_trace_stack(global_trace.array_buffer.buffer,
3250 			     tracing_gen_ctx(), skip, NULL);
3251 }
3252 EXPORT_SYMBOL_GPL(trace_dump_stack);
3253 
3254 #ifdef CONFIG_USER_STACKTRACE_SUPPORT
3255 static DEFINE_PER_CPU(int, user_stack_count);
3256 
3257 static void
3258 ftrace_trace_userstack(struct trace_array *tr,
3259 		       struct trace_buffer *buffer, unsigned int trace_ctx)
3260 {
3261 	struct trace_event_call *call = &event_user_stack;
3262 	struct ring_buffer_event *event;
3263 	struct userstack_entry *entry;
3264 
3265 	if (!(tr->trace_flags & TRACE_ITER_USERSTACKTRACE))
3266 		return;
3267 
3268 	/*
3269 	 * NMIs can not handle page faults, even with fix ups.
3270 	 * The save user stack can (and often does) fault.
3271 	 */
3272 	if (unlikely(in_nmi()))
3273 		return;
3274 
3275 	/*
3276 	 * prevent recursion, since the user stack tracing may
3277 	 * trigger other kernel events.
3278 	 */
3279 	preempt_disable();
3280 	if (__this_cpu_read(user_stack_count))
3281 		goto out;
3282 
3283 	__this_cpu_inc(user_stack_count);
3284 
3285 	event = __trace_buffer_lock_reserve(buffer, TRACE_USER_STACK,
3286 					    sizeof(*entry), trace_ctx);
3287 	if (!event)
3288 		goto out_drop_count;
3289 	entry	= ring_buffer_event_data(event);
3290 
3291 	entry->tgid		= current->tgid;
3292 	memset(&entry->caller, 0, sizeof(entry->caller));
3293 
3294 	stack_trace_save_user(entry->caller, FTRACE_STACK_ENTRIES);
3295 	if (!call_filter_check_discard(call, entry, buffer, event))
3296 		__buffer_unlock_commit(buffer, event);
3297 
3298  out_drop_count:
3299 	__this_cpu_dec(user_stack_count);
3300  out:
3301 	preempt_enable();
3302 }
3303 #else /* CONFIG_USER_STACKTRACE_SUPPORT */
3304 static void ftrace_trace_userstack(struct trace_array *tr,
3305 				   struct trace_buffer *buffer,
3306 				   unsigned int trace_ctx)
3307 {
3308 }
3309 #endif /* !CONFIG_USER_STACKTRACE_SUPPORT */
3310 
3311 #endif /* CONFIG_STACKTRACE */
3312 
3313 static inline void
3314 func_repeats_set_delta_ts(struct func_repeats_entry *entry,
3315 			  unsigned long long delta)
3316 {
3317 	entry->bottom_delta_ts = delta & U32_MAX;
3318 	entry->top_delta_ts = (delta >> 32);
3319 }
3320 
3321 void trace_last_func_repeats(struct trace_array *tr,
3322 			     struct trace_func_repeats *last_info,
3323 			     unsigned int trace_ctx)
3324 {
3325 	struct trace_buffer *buffer = tr->array_buffer.buffer;
3326 	struct func_repeats_entry *entry;
3327 	struct ring_buffer_event *event;
3328 	u64 delta;
3329 
3330 	event = __trace_buffer_lock_reserve(buffer, TRACE_FUNC_REPEATS,
3331 					    sizeof(*entry), trace_ctx);
3332 	if (!event)
3333 		return;
3334 
3335 	delta = ring_buffer_event_time_stamp(buffer, event) -
3336 		last_info->ts_last_call;
3337 
3338 	entry = ring_buffer_event_data(event);
3339 	entry->ip = last_info->ip;
3340 	entry->parent_ip = last_info->parent_ip;
3341 	entry->count = last_info->count;
3342 	func_repeats_set_delta_ts(entry, delta);
3343 
3344 	__buffer_unlock_commit(buffer, event);
3345 }
3346 
3347 /* created for use with alloc_percpu */
3348 struct trace_buffer_struct {
3349 	int nesting;
3350 	char buffer[4][TRACE_BUF_SIZE];
3351 };
3352 
3353 static struct trace_buffer_struct __percpu *trace_percpu_buffer;
3354 
3355 /*
3356  * This allows for lockless recording.  If we're nested too deeply, then
3357  * this returns NULL.
3358  */
3359 static char *get_trace_buf(void)
3360 {
3361 	struct trace_buffer_struct *buffer = this_cpu_ptr(trace_percpu_buffer);
3362 
3363 	if (!trace_percpu_buffer || buffer->nesting >= 4)
3364 		return NULL;
3365 
3366 	buffer->nesting++;
3367 
3368 	/* Interrupts must see nesting incremented before we use the buffer */
3369 	barrier();
3370 	return &buffer->buffer[buffer->nesting - 1][0];
3371 }
3372 
3373 static void put_trace_buf(void)
3374 {
3375 	/* Don't let the decrement of nesting leak before this */
3376 	barrier();
3377 	this_cpu_dec(trace_percpu_buffer->nesting);
3378 }
3379 
3380 static int alloc_percpu_trace_buffer(void)
3381 {
3382 	struct trace_buffer_struct __percpu *buffers;
3383 
3384 	if (trace_percpu_buffer)
3385 		return 0;
3386 
3387 	buffers = alloc_percpu(struct trace_buffer_struct);
3388 	if (MEM_FAIL(!buffers, "Could not allocate percpu trace_printk buffer"))
3389 		return -ENOMEM;
3390 
3391 	trace_percpu_buffer = buffers;
3392 	return 0;
3393 }
3394 
3395 static int buffers_allocated;
3396 
3397 void trace_printk_init_buffers(void)
3398 {
3399 	if (buffers_allocated)
3400 		return;
3401 
3402 	if (alloc_percpu_trace_buffer())
3403 		return;
3404 
3405 	/* trace_printk() is for debug use only. Don't use it in production. */
3406 
3407 	pr_warn("\n");
3408 	pr_warn("**********************************************************\n");
3409 	pr_warn("**   NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE   **\n");
3410 	pr_warn("**                                                      **\n");
3411 	pr_warn("** trace_printk() being used. Allocating extra memory.  **\n");
3412 	pr_warn("**                                                      **\n");
3413 	pr_warn("** This means that this is a DEBUG kernel and it is     **\n");
3414 	pr_warn("** unsafe for production use.                           **\n");
3415 	pr_warn("**                                                      **\n");
3416 	pr_warn("** If you see this message and you are not debugging    **\n");
3417 	pr_warn("** the kernel, report this immediately to your vendor!  **\n");
3418 	pr_warn("**                                                      **\n");
3419 	pr_warn("**   NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE   **\n");
3420 	pr_warn("**********************************************************\n");
3421 
3422 	/* Expand the buffers to set size */
3423 	tracing_update_buffers();
3424 
3425 	buffers_allocated = 1;
3426 
3427 	/*
3428 	 * trace_printk_init_buffers() can be called by modules.
3429 	 * If that happens, then we need to start cmdline recording
3430 	 * directly here. If the global_trace.buffer is already
3431 	 * allocated here, then this was called by module code.
3432 	 */
3433 	if (global_trace.array_buffer.buffer)
3434 		tracing_start_cmdline_record();
3435 }
3436 EXPORT_SYMBOL_GPL(trace_printk_init_buffers);
3437 
3438 void trace_printk_start_comm(void)
3439 {
3440 	/* Start tracing comms if trace printk is set */
3441 	if (!buffers_allocated)
3442 		return;
3443 	tracing_start_cmdline_record();
3444 }
3445 
3446 static void trace_printk_start_stop_comm(int enabled)
3447 {
3448 	if (!buffers_allocated)
3449 		return;
3450 
3451 	if (enabled)
3452 		tracing_start_cmdline_record();
3453 	else
3454 		tracing_stop_cmdline_record();
3455 }
3456 
3457 /**
3458  * trace_vbprintk - write binary msg to tracing buffer
3459  * @ip:    The address of the caller
3460  * @fmt:   The string format to write to the buffer
3461  * @args:  Arguments for @fmt
3462  */
3463 int trace_vbprintk(unsigned long ip, const char *fmt, va_list args)
3464 {
3465 	struct trace_event_call *call = &event_bprint;
3466 	struct ring_buffer_event *event;
3467 	struct trace_buffer *buffer;
3468 	struct trace_array *tr = &global_trace;
3469 	struct bprint_entry *entry;
3470 	unsigned int trace_ctx;
3471 	char *tbuffer;
3472 	int len = 0, size;
3473 
3474 	if (unlikely(tracing_selftest_running || tracing_disabled))
3475 		return 0;
3476 
3477 	/* Don't pollute graph traces with trace_vprintk internals */
3478 	pause_graph_tracing();
3479 
3480 	trace_ctx = tracing_gen_ctx();
3481 	preempt_disable_notrace();
3482 
3483 	tbuffer = get_trace_buf();
3484 	if (!tbuffer) {
3485 		len = 0;
3486 		goto out_nobuffer;
3487 	}
3488 
3489 	len = vbin_printf((u32 *)tbuffer, TRACE_BUF_SIZE/sizeof(int), fmt, args);
3490 
3491 	if (len > TRACE_BUF_SIZE/sizeof(int) || len < 0)
3492 		goto out_put;
3493 
3494 	size = sizeof(*entry) + sizeof(u32) * len;
3495 	buffer = tr->array_buffer.buffer;
3496 	ring_buffer_nest_start(buffer);
3497 	event = __trace_buffer_lock_reserve(buffer, TRACE_BPRINT, size,
3498 					    trace_ctx);
3499 	if (!event)
3500 		goto out;
3501 	entry = ring_buffer_event_data(event);
3502 	entry->ip			= ip;
3503 	entry->fmt			= fmt;
3504 
3505 	memcpy(entry->buf, tbuffer, sizeof(u32) * len);
3506 	if (!call_filter_check_discard(call, entry, buffer, event)) {
3507 		__buffer_unlock_commit(buffer, event);
3508 		ftrace_trace_stack(tr, buffer, trace_ctx, 6, NULL);
3509 	}
3510 
3511 out:
3512 	ring_buffer_nest_end(buffer);
3513 out_put:
3514 	put_trace_buf();
3515 
3516 out_nobuffer:
3517 	preempt_enable_notrace();
3518 	unpause_graph_tracing();
3519 
3520 	return len;
3521 }
3522 EXPORT_SYMBOL_GPL(trace_vbprintk);
3523 
3524 __printf(3, 0)
3525 static int
3526 __trace_array_vprintk(struct trace_buffer *buffer,
3527 		      unsigned long ip, const char *fmt, va_list args)
3528 {
3529 	struct trace_event_call *call = &event_print;
3530 	struct ring_buffer_event *event;
3531 	int len = 0, size;
3532 	struct print_entry *entry;
3533 	unsigned int trace_ctx;
3534 	char *tbuffer;
3535 
3536 	if (tracing_disabled)
3537 		return 0;
3538 
3539 	/* Don't pollute graph traces with trace_vprintk internals */
3540 	pause_graph_tracing();
3541 
3542 	trace_ctx = tracing_gen_ctx();
3543 	preempt_disable_notrace();
3544 
3545 
3546 	tbuffer = get_trace_buf();
3547 	if (!tbuffer) {
3548 		len = 0;
3549 		goto out_nobuffer;
3550 	}
3551 
3552 	len = vscnprintf(tbuffer, TRACE_BUF_SIZE, fmt, args);
3553 
3554 	size = sizeof(*entry) + len + 1;
3555 	ring_buffer_nest_start(buffer);
3556 	event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
3557 					    trace_ctx);
3558 	if (!event)
3559 		goto out;
3560 	entry = ring_buffer_event_data(event);
3561 	entry->ip = ip;
3562 
3563 	memcpy(&entry->buf, tbuffer, len + 1);
3564 	if (!call_filter_check_discard(call, entry, buffer, event)) {
3565 		__buffer_unlock_commit(buffer, event);
3566 		ftrace_trace_stack(&global_trace, buffer, trace_ctx, 6, NULL);
3567 	}
3568 
3569 out:
3570 	ring_buffer_nest_end(buffer);
3571 	put_trace_buf();
3572 
3573 out_nobuffer:
3574 	preempt_enable_notrace();
3575 	unpause_graph_tracing();
3576 
3577 	return len;
3578 }
3579 
3580 __printf(3, 0)
3581 int trace_array_vprintk(struct trace_array *tr,
3582 			unsigned long ip, const char *fmt, va_list args)
3583 {
3584 	if (tracing_selftest_running && tr == &global_trace)
3585 		return 0;
3586 
3587 	return __trace_array_vprintk(tr->array_buffer.buffer, ip, fmt, args);
3588 }
3589 
3590 /**
3591  * trace_array_printk - Print a message to a specific instance
3592  * @tr: The instance trace_array descriptor
3593  * @ip: The instruction pointer that this is called from.
3594  * @fmt: The format to print (printf format)
3595  *
3596  * If a subsystem sets up its own instance, they have the right to
3597  * printk strings into their tracing instance buffer using this
3598  * function. Note, this function will not write into the top level
3599  * buffer (use trace_printk() for that), as writing into the top level
3600  * buffer should only have events that can be individually disabled.
3601  * trace_printk() is only used for debugging a kernel, and should not
3602  * be ever incorporated in normal use.
3603  *
3604  * trace_array_printk() can be used, as it will not add noise to the
3605  * top level tracing buffer.
3606  *
3607  * Note, trace_array_init_printk() must be called on @tr before this
3608  * can be used.
3609  */
3610 __printf(3, 0)
3611 int trace_array_printk(struct trace_array *tr,
3612 		       unsigned long ip, const char *fmt, ...)
3613 {
3614 	int ret;
3615 	va_list ap;
3616 
3617 	if (!tr)
3618 		return -ENOENT;
3619 
3620 	/* This is only allowed for created instances */
3621 	if (tr == &global_trace)
3622 		return 0;
3623 
3624 	if (!(tr->trace_flags & TRACE_ITER_PRINTK))
3625 		return 0;
3626 
3627 	va_start(ap, fmt);
3628 	ret = trace_array_vprintk(tr, ip, fmt, ap);
3629 	va_end(ap);
3630 	return ret;
3631 }
3632 EXPORT_SYMBOL_GPL(trace_array_printk);
3633 
3634 /**
3635  * trace_array_init_printk - Initialize buffers for trace_array_printk()
3636  * @tr: The trace array to initialize the buffers for
3637  *
3638  * As trace_array_printk() only writes into instances, they are OK to
3639  * have in the kernel (unlike trace_printk()). This needs to be called
3640  * before trace_array_printk() can be used on a trace_array.
3641  */
3642 int trace_array_init_printk(struct trace_array *tr)
3643 {
3644 	if (!tr)
3645 		return -ENOENT;
3646 
3647 	/* This is only allowed for created instances */
3648 	if (tr == &global_trace)
3649 		return -EINVAL;
3650 
3651 	return alloc_percpu_trace_buffer();
3652 }
3653 EXPORT_SYMBOL_GPL(trace_array_init_printk);
3654 
3655 __printf(3, 4)
3656 int trace_array_printk_buf(struct trace_buffer *buffer,
3657 			   unsigned long ip, const char *fmt, ...)
3658 {
3659 	int ret;
3660 	va_list ap;
3661 
3662 	if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
3663 		return 0;
3664 
3665 	va_start(ap, fmt);
3666 	ret = __trace_array_vprintk(buffer, ip, fmt, ap);
3667 	va_end(ap);
3668 	return ret;
3669 }
3670 
3671 __printf(2, 0)
3672 int trace_vprintk(unsigned long ip, const char *fmt, va_list args)
3673 {
3674 	return trace_array_vprintk(&global_trace, ip, fmt, args);
3675 }
3676 EXPORT_SYMBOL_GPL(trace_vprintk);
3677 
3678 static void trace_iterator_increment(struct trace_iterator *iter)
3679 {
3680 	struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, iter->cpu);
3681 
3682 	iter->idx++;
3683 	if (buf_iter)
3684 		ring_buffer_iter_advance(buf_iter);
3685 }
3686 
3687 static struct trace_entry *
3688 peek_next_entry(struct trace_iterator *iter, int cpu, u64 *ts,
3689 		unsigned long *lost_events)
3690 {
3691 	struct ring_buffer_event *event;
3692 	struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, cpu);
3693 
3694 	if (buf_iter) {
3695 		event = ring_buffer_iter_peek(buf_iter, ts);
3696 		if (lost_events)
3697 			*lost_events = ring_buffer_iter_dropped(buf_iter) ?
3698 				(unsigned long)-1 : 0;
3699 	} else {
3700 		event = ring_buffer_peek(iter->array_buffer->buffer, cpu, ts,
3701 					 lost_events);
3702 	}
3703 
3704 	if (event) {
3705 		iter->ent_size = ring_buffer_event_length(event);
3706 		return ring_buffer_event_data(event);
3707 	}
3708 	iter->ent_size = 0;
3709 	return NULL;
3710 }
3711 
3712 static struct trace_entry *
3713 __find_next_entry(struct trace_iterator *iter, int *ent_cpu,
3714 		  unsigned long *missing_events, u64 *ent_ts)
3715 {
3716 	struct trace_buffer *buffer = iter->array_buffer->buffer;
3717 	struct trace_entry *ent, *next = NULL;
3718 	unsigned long lost_events = 0, next_lost = 0;
3719 	int cpu_file = iter->cpu_file;
3720 	u64 next_ts = 0, ts;
3721 	int next_cpu = -1;
3722 	int next_size = 0;
3723 	int cpu;
3724 
3725 	/*
3726 	 * If we are in a per_cpu trace file, don't bother by iterating over
3727 	 * all cpu and peek directly.
3728 	 */
3729 	if (cpu_file > RING_BUFFER_ALL_CPUS) {
3730 		if (ring_buffer_empty_cpu(buffer, cpu_file))
3731 			return NULL;
3732 		ent = peek_next_entry(iter, cpu_file, ent_ts, missing_events);
3733 		if (ent_cpu)
3734 			*ent_cpu = cpu_file;
3735 
3736 		return ent;
3737 	}
3738 
3739 	for_each_tracing_cpu(cpu) {
3740 
3741 		if (ring_buffer_empty_cpu(buffer, cpu))
3742 			continue;
3743 
3744 		ent = peek_next_entry(iter, cpu, &ts, &lost_events);
3745 
3746 		/*
3747 		 * Pick the entry with the smallest timestamp:
3748 		 */
3749 		if (ent && (!next || ts < next_ts)) {
3750 			next = ent;
3751 			next_cpu = cpu;
3752 			next_ts = ts;
3753 			next_lost = lost_events;
3754 			next_size = iter->ent_size;
3755 		}
3756 	}
3757 
3758 	iter->ent_size = next_size;
3759 
3760 	if (ent_cpu)
3761 		*ent_cpu = next_cpu;
3762 
3763 	if (ent_ts)
3764 		*ent_ts = next_ts;
3765 
3766 	if (missing_events)
3767 		*missing_events = next_lost;
3768 
3769 	return next;
3770 }
3771 
3772 #define STATIC_FMT_BUF_SIZE	128
3773 static char static_fmt_buf[STATIC_FMT_BUF_SIZE];
3774 
3775 char *trace_iter_expand_format(struct trace_iterator *iter)
3776 {
3777 	char *tmp;
3778 
3779 	/*
3780 	 * iter->tr is NULL when used with tp_printk, which makes
3781 	 * this get called where it is not safe to call krealloc().
3782 	 */
3783 	if (!iter->tr || iter->fmt == static_fmt_buf)
3784 		return NULL;
3785 
3786 	tmp = krealloc(iter->fmt, iter->fmt_size + STATIC_FMT_BUF_SIZE,
3787 		       GFP_KERNEL);
3788 	if (tmp) {
3789 		iter->fmt_size += STATIC_FMT_BUF_SIZE;
3790 		iter->fmt = tmp;
3791 	}
3792 
3793 	return tmp;
3794 }
3795 
3796 /* Returns true if the string is safe to dereference from an event */
3797 static bool trace_safe_str(struct trace_iterator *iter, const char *str,
3798 			   bool star, int len)
3799 {
3800 	unsigned long addr = (unsigned long)str;
3801 	struct trace_event *trace_event;
3802 	struct trace_event_call *event;
3803 
3804 	/* Ignore strings with no length */
3805 	if (star && !len)
3806 		return true;
3807 
3808 	/* OK if part of the event data */
3809 	if ((addr >= (unsigned long)iter->ent) &&
3810 	    (addr < (unsigned long)iter->ent + iter->ent_size))
3811 		return true;
3812 
3813 	/* OK if part of the temp seq buffer */
3814 	if ((addr >= (unsigned long)iter->tmp_seq.buffer) &&
3815 	    (addr < (unsigned long)iter->tmp_seq.buffer + PAGE_SIZE))
3816 		return true;
3817 
3818 	/* Core rodata can not be freed */
3819 	if (is_kernel_rodata(addr))
3820 		return true;
3821 
3822 	if (trace_is_tracepoint_string(str))
3823 		return true;
3824 
3825 	/*
3826 	 * Now this could be a module event, referencing core module
3827 	 * data, which is OK.
3828 	 */
3829 	if (!iter->ent)
3830 		return false;
3831 
3832 	trace_event = ftrace_find_event(iter->ent->type);
3833 	if (!trace_event)
3834 		return false;
3835 
3836 	event = container_of(trace_event, struct trace_event_call, event);
3837 	if ((event->flags & TRACE_EVENT_FL_DYNAMIC) || !event->module)
3838 		return false;
3839 
3840 	/* Would rather have rodata, but this will suffice */
3841 	if (within_module_core(addr, event->module))
3842 		return true;
3843 
3844 	return false;
3845 }
3846 
3847 static const char *show_buffer(struct trace_seq *s)
3848 {
3849 	struct seq_buf *seq = &s->seq;
3850 
3851 	seq_buf_terminate(seq);
3852 
3853 	return seq->buffer;
3854 }
3855 
3856 static DEFINE_STATIC_KEY_FALSE(trace_no_verify);
3857 
3858 static int test_can_verify_check(const char *fmt, ...)
3859 {
3860 	char buf[16];
3861 	va_list ap;
3862 	int ret;
3863 
3864 	/*
3865 	 * The verifier is dependent on vsnprintf() modifies the va_list
3866 	 * passed to it, where it is sent as a reference. Some architectures
3867 	 * (like x86_32) passes it by value, which means that vsnprintf()
3868 	 * does not modify the va_list passed to it, and the verifier
3869 	 * would then need to be able to understand all the values that
3870 	 * vsnprintf can use. If it is passed by value, then the verifier
3871 	 * is disabled.
3872 	 */
3873 	va_start(ap, fmt);
3874 	vsnprintf(buf, 16, "%d", ap);
3875 	ret = va_arg(ap, int);
3876 	va_end(ap);
3877 
3878 	return ret;
3879 }
3880 
3881 static void test_can_verify(void)
3882 {
3883 	if (!test_can_verify_check("%d %d", 0, 1)) {
3884 		pr_info("trace event string verifier disabled\n");
3885 		static_branch_inc(&trace_no_verify);
3886 	}
3887 }
3888 
3889 /**
3890  * trace_check_vprintf - Check dereferenced strings while writing to the seq buffer
3891  * @iter: The iterator that holds the seq buffer and the event being printed
3892  * @fmt: The format used to print the event
3893  * @ap: The va_list holding the data to print from @fmt.
3894  *
3895  * This writes the data into the @iter->seq buffer using the data from
3896  * @fmt and @ap. If the format has a %s, then the source of the string
3897  * is examined to make sure it is safe to print, otherwise it will
3898  * warn and print "[UNSAFE MEMORY]" in place of the dereferenced string
3899  * pointer.
3900  */
3901 void trace_check_vprintf(struct trace_iterator *iter, const char *fmt,
3902 			 va_list ap)
3903 {
3904 	const char *p = fmt;
3905 	const char *str;
3906 	int i, j;
3907 
3908 	if (WARN_ON_ONCE(!fmt))
3909 		return;
3910 
3911 	if (static_branch_unlikely(&trace_no_verify))
3912 		goto print;
3913 
3914 	/* Don't bother checking when doing a ftrace_dump() */
3915 	if (iter->fmt == static_fmt_buf)
3916 		goto print;
3917 
3918 	while (*p) {
3919 		bool star = false;
3920 		int len = 0;
3921 
3922 		j = 0;
3923 
3924 		/* We only care about %s and variants */
3925 		for (i = 0; p[i]; i++) {
3926 			if (i + 1 >= iter->fmt_size) {
3927 				/*
3928 				 * If we can't expand the copy buffer,
3929 				 * just print it.
3930 				 */
3931 				if (!trace_iter_expand_format(iter))
3932 					goto print;
3933 			}
3934 
3935 			if (p[i] == '\\' && p[i+1]) {
3936 				i++;
3937 				continue;
3938 			}
3939 			if (p[i] == '%') {
3940 				/* Need to test cases like %08.*s */
3941 				for (j = 1; p[i+j]; j++) {
3942 					if (isdigit(p[i+j]) ||
3943 					    p[i+j] == '.')
3944 						continue;
3945 					if (p[i+j] == '*') {
3946 						star = true;
3947 						continue;
3948 					}
3949 					break;
3950 				}
3951 				if (p[i+j] == 's')
3952 					break;
3953 				star = false;
3954 			}
3955 			j = 0;
3956 		}
3957 		/* If no %s found then just print normally */
3958 		if (!p[i])
3959 			break;
3960 
3961 		/* Copy up to the %s, and print that */
3962 		strncpy(iter->fmt, p, i);
3963 		iter->fmt[i] = '\0';
3964 		trace_seq_vprintf(&iter->seq, iter->fmt, ap);
3965 
3966 		/*
3967 		 * If iter->seq is full, the above call no longer guarantees
3968 		 * that ap is in sync with fmt processing, and further calls
3969 		 * to va_arg() can return wrong positional arguments.
3970 		 *
3971 		 * Ensure that ap is no longer used in this case.
3972 		 */
3973 		if (iter->seq.full) {
3974 			p = "";
3975 			break;
3976 		}
3977 
3978 		if (star)
3979 			len = va_arg(ap, int);
3980 
3981 		/* The ap now points to the string data of the %s */
3982 		str = va_arg(ap, const char *);
3983 
3984 		/*
3985 		 * If you hit this warning, it is likely that the
3986 		 * trace event in question used %s on a string that
3987 		 * was saved at the time of the event, but may not be
3988 		 * around when the trace is read. Use __string(),
3989 		 * __assign_str() and __get_str() helpers in the TRACE_EVENT()
3990 		 * instead. See samples/trace_events/trace-events-sample.h
3991 		 * for reference.
3992 		 */
3993 		if (WARN_ONCE(!trace_safe_str(iter, str, star, len),
3994 			      "fmt: '%s' current_buffer: '%s'",
3995 			      fmt, show_buffer(&iter->seq))) {
3996 			int ret;
3997 
3998 			/* Try to safely read the string */
3999 			if (star) {
4000 				if (len + 1 > iter->fmt_size)
4001 					len = iter->fmt_size - 1;
4002 				if (len < 0)
4003 					len = 0;
4004 				ret = copy_from_kernel_nofault(iter->fmt, str, len);
4005 				iter->fmt[len] = 0;
4006 				star = false;
4007 			} else {
4008 				ret = strncpy_from_kernel_nofault(iter->fmt, str,
4009 								  iter->fmt_size);
4010 			}
4011 			if (ret < 0)
4012 				trace_seq_printf(&iter->seq, "(0x%px)", str);
4013 			else
4014 				trace_seq_printf(&iter->seq, "(0x%px:%s)",
4015 						 str, iter->fmt);
4016 			str = "[UNSAFE-MEMORY]";
4017 			strcpy(iter->fmt, "%s");
4018 		} else {
4019 			strncpy(iter->fmt, p + i, j + 1);
4020 			iter->fmt[j+1] = '\0';
4021 		}
4022 		if (star)
4023 			trace_seq_printf(&iter->seq, iter->fmt, len, str);
4024 		else
4025 			trace_seq_printf(&iter->seq, iter->fmt, str);
4026 
4027 		p += i + j + 1;
4028 	}
4029  print:
4030 	if (*p)
4031 		trace_seq_vprintf(&iter->seq, p, ap);
4032 }
4033 
4034 const char *trace_event_format(struct trace_iterator *iter, const char *fmt)
4035 {
4036 	const char *p, *new_fmt;
4037 	char *q;
4038 
4039 	if (WARN_ON_ONCE(!fmt))
4040 		return fmt;
4041 
4042 	if (!iter->tr || iter->tr->trace_flags & TRACE_ITER_HASH_PTR)
4043 		return fmt;
4044 
4045 	p = fmt;
4046 	new_fmt = q = iter->fmt;
4047 	while (*p) {
4048 		if (unlikely(q - new_fmt + 3 > iter->fmt_size)) {
4049 			if (!trace_iter_expand_format(iter))
4050 				return fmt;
4051 
4052 			q += iter->fmt - new_fmt;
4053 			new_fmt = iter->fmt;
4054 		}
4055 
4056 		*q++ = *p++;
4057 
4058 		/* Replace %p with %px */
4059 		if (p[-1] == '%') {
4060 			if (p[0] == '%') {
4061 				*q++ = *p++;
4062 			} else if (p[0] == 'p' && !isalnum(p[1])) {
4063 				*q++ = *p++;
4064 				*q++ = 'x';
4065 			}
4066 		}
4067 	}
4068 	*q = '\0';
4069 
4070 	return new_fmt;
4071 }
4072 
4073 #define STATIC_TEMP_BUF_SIZE	128
4074 static char static_temp_buf[STATIC_TEMP_BUF_SIZE] __aligned(4);
4075 
4076 /* Find the next real entry, without updating the iterator itself */
4077 struct trace_entry *trace_find_next_entry(struct trace_iterator *iter,
4078 					  int *ent_cpu, u64 *ent_ts)
4079 {
4080 	/* __find_next_entry will reset ent_size */
4081 	int ent_size = iter->ent_size;
4082 	struct trace_entry *entry;
4083 
4084 	/*
4085 	 * If called from ftrace_dump(), then the iter->temp buffer
4086 	 * will be the static_temp_buf and not created from kmalloc.
4087 	 * If the entry size is greater than the buffer, we can
4088 	 * not save it. Just return NULL in that case. This is only
4089 	 * used to add markers when two consecutive events' time
4090 	 * stamps have a large delta. See trace_print_lat_context()
4091 	 */
4092 	if (iter->temp == static_temp_buf &&
4093 	    STATIC_TEMP_BUF_SIZE < ent_size)
4094 		return NULL;
4095 
4096 	/*
4097 	 * The __find_next_entry() may call peek_next_entry(), which may
4098 	 * call ring_buffer_peek() that may make the contents of iter->ent
4099 	 * undefined. Need to copy iter->ent now.
4100 	 */
4101 	if (iter->ent && iter->ent != iter->temp) {
4102 		if ((!iter->temp || iter->temp_size < iter->ent_size) &&
4103 		    !WARN_ON_ONCE(iter->temp == static_temp_buf)) {
4104 			void *temp;
4105 			temp = kmalloc(iter->ent_size, GFP_KERNEL);
4106 			if (!temp)
4107 				return NULL;
4108 			kfree(iter->temp);
4109 			iter->temp = temp;
4110 			iter->temp_size = iter->ent_size;
4111 		}
4112 		memcpy(iter->temp, iter->ent, iter->ent_size);
4113 		iter->ent = iter->temp;
4114 	}
4115 	entry = __find_next_entry(iter, ent_cpu, NULL, ent_ts);
4116 	/* Put back the original ent_size */
4117 	iter->ent_size = ent_size;
4118 
4119 	return entry;
4120 }
4121 
4122 /* Find the next real entry, and increment the iterator to the next entry */
4123 void *trace_find_next_entry_inc(struct trace_iterator *iter)
4124 {
4125 	iter->ent = __find_next_entry(iter, &iter->cpu,
4126 				      &iter->lost_events, &iter->ts);
4127 
4128 	if (iter->ent)
4129 		trace_iterator_increment(iter);
4130 
4131 	return iter->ent ? iter : NULL;
4132 }
4133 
4134 static void trace_consume(struct trace_iterator *iter)
4135 {
4136 	ring_buffer_consume(iter->array_buffer->buffer, iter->cpu, &iter->ts,
4137 			    &iter->lost_events);
4138 }
4139 
4140 static void *s_next(struct seq_file *m, void *v, loff_t *pos)
4141 {
4142 	struct trace_iterator *iter = m->private;
4143 	int i = (int)*pos;
4144 	void *ent;
4145 
4146 	WARN_ON_ONCE(iter->leftover);
4147 
4148 	(*pos)++;
4149 
4150 	/* can't go backwards */
4151 	if (iter->idx > i)
4152 		return NULL;
4153 
4154 	if (iter->idx < 0)
4155 		ent = trace_find_next_entry_inc(iter);
4156 	else
4157 		ent = iter;
4158 
4159 	while (ent && iter->idx < i)
4160 		ent = trace_find_next_entry_inc(iter);
4161 
4162 	iter->pos = *pos;
4163 
4164 	return ent;
4165 }
4166 
4167 void tracing_iter_reset(struct trace_iterator *iter, int cpu)
4168 {
4169 	struct ring_buffer_iter *buf_iter;
4170 	unsigned long entries = 0;
4171 	u64 ts;
4172 
4173 	per_cpu_ptr(iter->array_buffer->data, cpu)->skipped_entries = 0;
4174 
4175 	buf_iter = trace_buffer_iter(iter, cpu);
4176 	if (!buf_iter)
4177 		return;
4178 
4179 	ring_buffer_iter_reset(buf_iter);
4180 
4181 	/*
4182 	 * We could have the case with the max latency tracers
4183 	 * that a reset never took place on a cpu. This is evident
4184 	 * by the timestamp being before the start of the buffer.
4185 	 */
4186 	while (ring_buffer_iter_peek(buf_iter, &ts)) {
4187 		if (ts >= iter->array_buffer->time_start)
4188 			break;
4189 		entries++;
4190 		ring_buffer_iter_advance(buf_iter);
4191 	}
4192 
4193 	per_cpu_ptr(iter->array_buffer->data, cpu)->skipped_entries = entries;
4194 }
4195 
4196 /*
4197  * The current tracer is copied to avoid a global locking
4198  * all around.
4199  */
4200 static void *s_start(struct seq_file *m, loff_t *pos)
4201 {
4202 	struct trace_iterator *iter = m->private;
4203 	struct trace_array *tr = iter->tr;
4204 	int cpu_file = iter->cpu_file;
4205 	void *p = NULL;
4206 	loff_t l = 0;
4207 	int cpu;
4208 
4209 	/*
4210 	 * copy the tracer to avoid using a global lock all around.
4211 	 * iter->trace is a copy of current_trace, the pointer to the
4212 	 * name may be used instead of a strcmp(), as iter->trace->name
4213 	 * will point to the same string as current_trace->name.
4214 	 */
4215 	mutex_lock(&trace_types_lock);
4216 	if (unlikely(tr->current_trace && iter->trace->name != tr->current_trace->name)) {
4217 		/* Close iter->trace before switching to the new current tracer */
4218 		if (iter->trace->close)
4219 			iter->trace->close(iter);
4220 		*iter->trace = *tr->current_trace;
4221 		/* Reopen the new current tracer */
4222 		if (iter->trace->open)
4223 			iter->trace->open(iter);
4224 	}
4225 	mutex_unlock(&trace_types_lock);
4226 
4227 #ifdef CONFIG_TRACER_MAX_TRACE
4228 	if (iter->snapshot && iter->trace->use_max_tr)
4229 		return ERR_PTR(-EBUSY);
4230 #endif
4231 
4232 	if (*pos != iter->pos) {
4233 		iter->ent = NULL;
4234 		iter->cpu = 0;
4235 		iter->idx = -1;
4236 
4237 		if (cpu_file == RING_BUFFER_ALL_CPUS) {
4238 			for_each_tracing_cpu(cpu)
4239 				tracing_iter_reset(iter, cpu);
4240 		} else
4241 			tracing_iter_reset(iter, cpu_file);
4242 
4243 		iter->leftover = 0;
4244 		for (p = iter; p && l < *pos; p = s_next(m, p, &l))
4245 			;
4246 
4247 	} else {
4248 		/*
4249 		 * If we overflowed the seq_file before, then we want
4250 		 * to just reuse the trace_seq buffer again.
4251 		 */
4252 		if (iter->leftover)
4253 			p = iter;
4254 		else {
4255 			l = *pos - 1;
4256 			p = s_next(m, p, &l);
4257 		}
4258 	}
4259 
4260 	trace_event_read_lock();
4261 	trace_access_lock(cpu_file);
4262 	return p;
4263 }
4264 
4265 static void s_stop(struct seq_file *m, void *p)
4266 {
4267 	struct trace_iterator *iter = m->private;
4268 
4269 #ifdef CONFIG_TRACER_MAX_TRACE
4270 	if (iter->snapshot && iter->trace->use_max_tr)
4271 		return;
4272 #endif
4273 
4274 	trace_access_unlock(iter->cpu_file);
4275 	trace_event_read_unlock();
4276 }
4277 
4278 static void
4279 get_total_entries_cpu(struct array_buffer *buf, unsigned long *total,
4280 		      unsigned long *entries, int cpu)
4281 {
4282 	unsigned long count;
4283 
4284 	count = ring_buffer_entries_cpu(buf->buffer, cpu);
4285 	/*
4286 	 * If this buffer has skipped entries, then we hold all
4287 	 * entries for the trace and we need to ignore the
4288 	 * ones before the time stamp.
4289 	 */
4290 	if (per_cpu_ptr(buf->data, cpu)->skipped_entries) {
4291 		count -= per_cpu_ptr(buf->data, cpu)->skipped_entries;
4292 		/* total is the same as the entries */
4293 		*total = count;
4294 	} else
4295 		*total = count +
4296 			ring_buffer_overrun_cpu(buf->buffer, cpu);
4297 	*entries = count;
4298 }
4299 
4300 static void
4301 get_total_entries(struct array_buffer *buf,
4302 		  unsigned long *total, unsigned long *entries)
4303 {
4304 	unsigned long t, e;
4305 	int cpu;
4306 
4307 	*total = 0;
4308 	*entries = 0;
4309 
4310 	for_each_tracing_cpu(cpu) {
4311 		get_total_entries_cpu(buf, &t, &e, cpu);
4312 		*total += t;
4313 		*entries += e;
4314 	}
4315 }
4316 
4317 unsigned long trace_total_entries_cpu(struct trace_array *tr, int cpu)
4318 {
4319 	unsigned long total, entries;
4320 
4321 	if (!tr)
4322 		tr = &global_trace;
4323 
4324 	get_total_entries_cpu(&tr->array_buffer, &total, &entries, cpu);
4325 
4326 	return entries;
4327 }
4328 
4329 unsigned long trace_total_entries(struct trace_array *tr)
4330 {
4331 	unsigned long total, entries;
4332 
4333 	if (!tr)
4334 		tr = &global_trace;
4335 
4336 	get_total_entries(&tr->array_buffer, &total, &entries);
4337 
4338 	return entries;
4339 }
4340 
4341 static void print_lat_help_header(struct seq_file *m)
4342 {
4343 	seq_puts(m, "#                    _------=> CPU#            \n"
4344 		    "#                   / _-----=> irqs-off/BH-disabled\n"
4345 		    "#                  | / _----=> need-resched    \n"
4346 		    "#                  || / _---=> hardirq/softirq \n"
4347 		    "#                  ||| / _--=> preempt-depth   \n"
4348 		    "#                  |||| / _-=> migrate-disable \n"
4349 		    "#                  ||||| /     delay           \n"
4350 		    "#  cmd     pid     |||||| time  |   caller     \n"
4351 		    "#     \\   /        ||||||  \\    |    /       \n");
4352 }
4353 
4354 static void print_event_info(struct array_buffer *buf, struct seq_file *m)
4355 {
4356 	unsigned long total;
4357 	unsigned long entries;
4358 
4359 	get_total_entries(buf, &total, &entries);
4360 	seq_printf(m, "# entries-in-buffer/entries-written: %lu/%lu   #P:%d\n",
4361 		   entries, total, num_online_cpus());
4362 	seq_puts(m, "#\n");
4363 }
4364 
4365 static void print_func_help_header(struct array_buffer *buf, struct seq_file *m,
4366 				   unsigned int flags)
4367 {
4368 	bool tgid = flags & TRACE_ITER_RECORD_TGID;
4369 
4370 	print_event_info(buf, m);
4371 
4372 	seq_printf(m, "#           TASK-PID    %s CPU#     TIMESTAMP  FUNCTION\n", tgid ? "   TGID   " : "");
4373 	seq_printf(m, "#              | |      %s   |         |         |\n",      tgid ? "     |    " : "");
4374 }
4375 
4376 static void print_func_help_header_irq(struct array_buffer *buf, struct seq_file *m,
4377 				       unsigned int flags)
4378 {
4379 	bool tgid = flags & TRACE_ITER_RECORD_TGID;
4380 	static const char space[] = "            ";
4381 	int prec = tgid ? 12 : 2;
4382 
4383 	print_event_info(buf, m);
4384 
4385 	seq_printf(m, "#                            %.*s  _-----=> irqs-off/BH-disabled\n", prec, space);
4386 	seq_printf(m, "#                            %.*s / _----=> need-resched\n", prec, space);
4387 	seq_printf(m, "#                            %.*s| / _---=> hardirq/softirq\n", prec, space);
4388 	seq_printf(m, "#                            %.*s|| / _--=> preempt-depth\n", prec, space);
4389 	seq_printf(m, "#                            %.*s||| / _-=> migrate-disable\n", prec, space);
4390 	seq_printf(m, "#                            %.*s|||| /     delay\n", prec, space);
4391 	seq_printf(m, "#           TASK-PID  %.*s CPU#  |||||  TIMESTAMP  FUNCTION\n", prec, "     TGID   ");
4392 	seq_printf(m, "#              | |    %.*s   |   |||||     |         |\n", prec, "       |    ");
4393 }
4394 
4395 void
4396 print_trace_header(struct seq_file *m, struct trace_iterator *iter)
4397 {
4398 	unsigned long sym_flags = (global_trace.trace_flags & TRACE_ITER_SYM_MASK);
4399 	struct array_buffer *buf = iter->array_buffer;
4400 	struct trace_array_cpu *data = per_cpu_ptr(buf->data, buf->cpu);
4401 	struct tracer *type = iter->trace;
4402 	unsigned long entries;
4403 	unsigned long total;
4404 	const char *name = type->name;
4405 
4406 	get_total_entries(buf, &total, &entries);
4407 
4408 	seq_printf(m, "# %s latency trace v1.1.5 on %s\n",
4409 		   name, UTS_RELEASE);
4410 	seq_puts(m, "# -----------------------------------"
4411 		 "---------------------------------\n");
4412 	seq_printf(m, "# latency: %lu us, #%lu/%lu, CPU#%d |"
4413 		   " (M:%s VP:%d, KP:%d, SP:%d HP:%d",
4414 		   nsecs_to_usecs(data->saved_latency),
4415 		   entries,
4416 		   total,
4417 		   buf->cpu,
4418 		   preempt_model_none()      ? "server" :
4419 		   preempt_model_voluntary() ? "desktop" :
4420 		   preempt_model_full()      ? "preempt" :
4421 		   preempt_model_rt()        ? "preempt_rt" :
4422 		   "unknown",
4423 		   /* These are reserved for later use */
4424 		   0, 0, 0, 0);
4425 #ifdef CONFIG_SMP
4426 	seq_printf(m, " #P:%d)\n", num_online_cpus());
4427 #else
4428 	seq_puts(m, ")\n");
4429 #endif
4430 	seq_puts(m, "#    -----------------\n");
4431 	seq_printf(m, "#    | task: %.16s-%d "
4432 		   "(uid:%d nice:%ld policy:%ld rt_prio:%ld)\n",
4433 		   data->comm, data->pid,
4434 		   from_kuid_munged(seq_user_ns(m), data->uid), data->nice,
4435 		   data->policy, data->rt_priority);
4436 	seq_puts(m, "#    -----------------\n");
4437 
4438 	if (data->critical_start) {
4439 		seq_puts(m, "#  => started at: ");
4440 		seq_print_ip_sym(&iter->seq, data->critical_start, sym_flags);
4441 		trace_print_seq(m, &iter->seq);
4442 		seq_puts(m, "\n#  => ended at:   ");
4443 		seq_print_ip_sym(&iter->seq, data->critical_end, sym_flags);
4444 		trace_print_seq(m, &iter->seq);
4445 		seq_puts(m, "\n#\n");
4446 	}
4447 
4448 	seq_puts(m, "#\n");
4449 }
4450 
4451 static void test_cpu_buff_start(struct trace_iterator *iter)
4452 {
4453 	struct trace_seq *s = &iter->seq;
4454 	struct trace_array *tr = iter->tr;
4455 
4456 	if (!(tr->trace_flags & TRACE_ITER_ANNOTATE))
4457 		return;
4458 
4459 	if (!(iter->iter_flags & TRACE_FILE_ANNOTATE))
4460 		return;
4461 
4462 	if (cpumask_available(iter->started) &&
4463 	    cpumask_test_cpu(iter->cpu, iter->started))
4464 		return;
4465 
4466 	if (per_cpu_ptr(iter->array_buffer->data, iter->cpu)->skipped_entries)
4467 		return;
4468 
4469 	if (cpumask_available(iter->started))
4470 		cpumask_set_cpu(iter->cpu, iter->started);
4471 
4472 	/* Don't print started cpu buffer for the first entry of the trace */
4473 	if (iter->idx > 1)
4474 		trace_seq_printf(s, "##### CPU %u buffer started ####\n",
4475 				iter->cpu);
4476 }
4477 
4478 static enum print_line_t print_trace_fmt(struct trace_iterator *iter)
4479 {
4480 	struct trace_array *tr = iter->tr;
4481 	struct trace_seq *s = &iter->seq;
4482 	unsigned long sym_flags = (tr->trace_flags & TRACE_ITER_SYM_MASK);
4483 	struct trace_entry *entry;
4484 	struct trace_event *event;
4485 
4486 	entry = iter->ent;
4487 
4488 	test_cpu_buff_start(iter);
4489 
4490 	event = ftrace_find_event(entry->type);
4491 
4492 	if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
4493 		if (iter->iter_flags & TRACE_FILE_LAT_FMT)
4494 			trace_print_lat_context(iter);
4495 		else
4496 			trace_print_context(iter);
4497 	}
4498 
4499 	if (trace_seq_has_overflowed(s))
4500 		return TRACE_TYPE_PARTIAL_LINE;
4501 
4502 	if (event) {
4503 		if (tr->trace_flags & TRACE_ITER_FIELDS)
4504 			return print_event_fields(iter, event);
4505 		return event->funcs->trace(iter, sym_flags, event);
4506 	}
4507 
4508 	trace_seq_printf(s, "Unknown type %d\n", entry->type);
4509 
4510 	return trace_handle_return(s);
4511 }
4512 
4513 static enum print_line_t print_raw_fmt(struct trace_iterator *iter)
4514 {
4515 	struct trace_array *tr = iter->tr;
4516 	struct trace_seq *s = &iter->seq;
4517 	struct trace_entry *entry;
4518 	struct trace_event *event;
4519 
4520 	entry = iter->ent;
4521 
4522 	if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO)
4523 		trace_seq_printf(s, "%d %d %llu ",
4524 				 entry->pid, iter->cpu, iter->ts);
4525 
4526 	if (trace_seq_has_overflowed(s))
4527 		return TRACE_TYPE_PARTIAL_LINE;
4528 
4529 	event = ftrace_find_event(entry->type);
4530 	if (event)
4531 		return event->funcs->raw(iter, 0, event);
4532 
4533 	trace_seq_printf(s, "%d ?\n", entry->type);
4534 
4535 	return trace_handle_return(s);
4536 }
4537 
4538 static enum print_line_t print_hex_fmt(struct trace_iterator *iter)
4539 {
4540 	struct trace_array *tr = iter->tr;
4541 	struct trace_seq *s = &iter->seq;
4542 	unsigned char newline = '\n';
4543 	struct trace_entry *entry;
4544 	struct trace_event *event;
4545 
4546 	entry = iter->ent;
4547 
4548 	if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
4549 		SEQ_PUT_HEX_FIELD(s, entry->pid);
4550 		SEQ_PUT_HEX_FIELD(s, iter->cpu);
4551 		SEQ_PUT_HEX_FIELD(s, iter->ts);
4552 		if (trace_seq_has_overflowed(s))
4553 			return TRACE_TYPE_PARTIAL_LINE;
4554 	}
4555 
4556 	event = ftrace_find_event(entry->type);
4557 	if (event) {
4558 		enum print_line_t ret = event->funcs->hex(iter, 0, event);
4559 		if (ret != TRACE_TYPE_HANDLED)
4560 			return ret;
4561 	}
4562 
4563 	SEQ_PUT_FIELD(s, newline);
4564 
4565 	return trace_handle_return(s);
4566 }
4567 
4568 static enum print_line_t print_bin_fmt(struct trace_iterator *iter)
4569 {
4570 	struct trace_array *tr = iter->tr;
4571 	struct trace_seq *s = &iter->seq;
4572 	struct trace_entry *entry;
4573 	struct trace_event *event;
4574 
4575 	entry = iter->ent;
4576 
4577 	if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
4578 		SEQ_PUT_FIELD(s, entry->pid);
4579 		SEQ_PUT_FIELD(s, iter->cpu);
4580 		SEQ_PUT_FIELD(s, iter->ts);
4581 		if (trace_seq_has_overflowed(s))
4582 			return TRACE_TYPE_PARTIAL_LINE;
4583 	}
4584 
4585 	event = ftrace_find_event(entry->type);
4586 	return event ? event->funcs->binary(iter, 0, event) :
4587 		TRACE_TYPE_HANDLED;
4588 }
4589 
4590 int trace_empty(struct trace_iterator *iter)
4591 {
4592 	struct ring_buffer_iter *buf_iter;
4593 	int cpu;
4594 
4595 	/* If we are looking at one CPU buffer, only check that one */
4596 	if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
4597 		cpu = iter->cpu_file;
4598 		buf_iter = trace_buffer_iter(iter, cpu);
4599 		if (buf_iter) {
4600 			if (!ring_buffer_iter_empty(buf_iter))
4601 				return 0;
4602 		} else {
4603 			if (!ring_buffer_empty_cpu(iter->array_buffer->buffer, cpu))
4604 				return 0;
4605 		}
4606 		return 1;
4607 	}
4608 
4609 	for_each_tracing_cpu(cpu) {
4610 		buf_iter = trace_buffer_iter(iter, cpu);
4611 		if (buf_iter) {
4612 			if (!ring_buffer_iter_empty(buf_iter))
4613 				return 0;
4614 		} else {
4615 			if (!ring_buffer_empty_cpu(iter->array_buffer->buffer, cpu))
4616 				return 0;
4617 		}
4618 	}
4619 
4620 	return 1;
4621 }
4622 
4623 /*  Called with trace_event_read_lock() held. */
4624 enum print_line_t print_trace_line(struct trace_iterator *iter)
4625 {
4626 	struct trace_array *tr = iter->tr;
4627 	unsigned long trace_flags = tr->trace_flags;
4628 	enum print_line_t ret;
4629 
4630 	if (iter->lost_events) {
4631 		if (iter->lost_events == (unsigned long)-1)
4632 			trace_seq_printf(&iter->seq, "CPU:%d [LOST EVENTS]\n",
4633 					 iter->cpu);
4634 		else
4635 			trace_seq_printf(&iter->seq, "CPU:%d [LOST %lu EVENTS]\n",
4636 					 iter->cpu, iter->lost_events);
4637 		if (trace_seq_has_overflowed(&iter->seq))
4638 			return TRACE_TYPE_PARTIAL_LINE;
4639 	}
4640 
4641 	if (iter->trace && iter->trace->print_line) {
4642 		ret = iter->trace->print_line(iter);
4643 		if (ret != TRACE_TYPE_UNHANDLED)
4644 			return ret;
4645 	}
4646 
4647 	if (iter->ent->type == TRACE_BPUTS &&
4648 			trace_flags & TRACE_ITER_PRINTK &&
4649 			trace_flags & TRACE_ITER_PRINTK_MSGONLY)
4650 		return trace_print_bputs_msg_only(iter);
4651 
4652 	if (iter->ent->type == TRACE_BPRINT &&
4653 			trace_flags & TRACE_ITER_PRINTK &&
4654 			trace_flags & TRACE_ITER_PRINTK_MSGONLY)
4655 		return trace_print_bprintk_msg_only(iter);
4656 
4657 	if (iter->ent->type == TRACE_PRINT &&
4658 			trace_flags & TRACE_ITER_PRINTK &&
4659 			trace_flags & TRACE_ITER_PRINTK_MSGONLY)
4660 		return trace_print_printk_msg_only(iter);
4661 
4662 	if (trace_flags & TRACE_ITER_BIN)
4663 		return print_bin_fmt(iter);
4664 
4665 	if (trace_flags & TRACE_ITER_HEX)
4666 		return print_hex_fmt(iter);
4667 
4668 	if (trace_flags & TRACE_ITER_RAW)
4669 		return print_raw_fmt(iter);
4670 
4671 	return print_trace_fmt(iter);
4672 }
4673 
4674 void trace_latency_header(struct seq_file *m)
4675 {
4676 	struct trace_iterator *iter = m->private;
4677 	struct trace_array *tr = iter->tr;
4678 
4679 	/* print nothing if the buffers are empty */
4680 	if (trace_empty(iter))
4681 		return;
4682 
4683 	if (iter->iter_flags & TRACE_FILE_LAT_FMT)
4684 		print_trace_header(m, iter);
4685 
4686 	if (!(tr->trace_flags & TRACE_ITER_VERBOSE))
4687 		print_lat_help_header(m);
4688 }
4689 
4690 void trace_default_header(struct seq_file *m)
4691 {
4692 	struct trace_iterator *iter = m->private;
4693 	struct trace_array *tr = iter->tr;
4694 	unsigned long trace_flags = tr->trace_flags;
4695 
4696 	if (!(trace_flags & TRACE_ITER_CONTEXT_INFO))
4697 		return;
4698 
4699 	if (iter->iter_flags & TRACE_FILE_LAT_FMT) {
4700 		/* print nothing if the buffers are empty */
4701 		if (trace_empty(iter))
4702 			return;
4703 		print_trace_header(m, iter);
4704 		if (!(trace_flags & TRACE_ITER_VERBOSE))
4705 			print_lat_help_header(m);
4706 	} else {
4707 		if (!(trace_flags & TRACE_ITER_VERBOSE)) {
4708 			if (trace_flags & TRACE_ITER_IRQ_INFO)
4709 				print_func_help_header_irq(iter->array_buffer,
4710 							   m, trace_flags);
4711 			else
4712 				print_func_help_header(iter->array_buffer, m,
4713 						       trace_flags);
4714 		}
4715 	}
4716 }
4717 
4718 static void test_ftrace_alive(struct seq_file *m)
4719 {
4720 	if (!ftrace_is_dead())
4721 		return;
4722 	seq_puts(m, "# WARNING: FUNCTION TRACING IS CORRUPTED\n"
4723 		    "#          MAY BE MISSING FUNCTION EVENTS\n");
4724 }
4725 
4726 #ifdef CONFIG_TRACER_MAX_TRACE
4727 static void show_snapshot_main_help(struct seq_file *m)
4728 {
4729 	seq_puts(m, "# echo 0 > snapshot : Clears and frees snapshot buffer\n"
4730 		    "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
4731 		    "#                      Takes a snapshot of the main buffer.\n"
4732 		    "# echo 2 > snapshot : Clears snapshot buffer (but does not allocate or free)\n"
4733 		    "#                      (Doesn't have to be '2' works with any number that\n"
4734 		    "#                       is not a '0' or '1')\n");
4735 }
4736 
4737 static void show_snapshot_percpu_help(struct seq_file *m)
4738 {
4739 	seq_puts(m, "# echo 0 > snapshot : Invalid for per_cpu snapshot file.\n");
4740 #ifdef CONFIG_RING_BUFFER_ALLOW_SWAP
4741 	seq_puts(m, "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
4742 		    "#                      Takes a snapshot of the main buffer for this cpu.\n");
4743 #else
4744 	seq_puts(m, "# echo 1 > snapshot : Not supported with this kernel.\n"
4745 		    "#                     Must use main snapshot file to allocate.\n");
4746 #endif
4747 	seq_puts(m, "# echo 2 > snapshot : Clears this cpu's snapshot buffer (but does not allocate)\n"
4748 		    "#                      (Doesn't have to be '2' works with any number that\n"
4749 		    "#                       is not a '0' or '1')\n");
4750 }
4751 
4752 static void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter)
4753 {
4754 	if (iter->tr->allocated_snapshot)
4755 		seq_puts(m, "#\n# * Snapshot is allocated *\n#\n");
4756 	else
4757 		seq_puts(m, "#\n# * Snapshot is freed *\n#\n");
4758 
4759 	seq_puts(m, "# Snapshot commands:\n");
4760 	if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
4761 		show_snapshot_main_help(m);
4762 	else
4763 		show_snapshot_percpu_help(m);
4764 }
4765 #else
4766 /* Should never be called */
4767 static inline void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter) { }
4768 #endif
4769 
4770 static int s_show(struct seq_file *m, void *v)
4771 {
4772 	struct trace_iterator *iter = v;
4773 	int ret;
4774 
4775 	if (iter->ent == NULL) {
4776 		if (iter->tr) {
4777 			seq_printf(m, "# tracer: %s\n", iter->trace->name);
4778 			seq_puts(m, "#\n");
4779 			test_ftrace_alive(m);
4780 		}
4781 		if (iter->snapshot && trace_empty(iter))
4782 			print_snapshot_help(m, iter);
4783 		else if (iter->trace && iter->trace->print_header)
4784 			iter->trace->print_header(m);
4785 		else
4786 			trace_default_header(m);
4787 
4788 	} else if (iter->leftover) {
4789 		/*
4790 		 * If we filled the seq_file buffer earlier, we
4791 		 * want to just show it now.
4792 		 */
4793 		ret = trace_print_seq(m, &iter->seq);
4794 
4795 		/* ret should this time be zero, but you never know */
4796 		iter->leftover = ret;
4797 
4798 	} else {
4799 		print_trace_line(iter);
4800 		ret = trace_print_seq(m, &iter->seq);
4801 		/*
4802 		 * If we overflow the seq_file buffer, then it will
4803 		 * ask us for this data again at start up.
4804 		 * Use that instead.
4805 		 *  ret is 0 if seq_file write succeeded.
4806 		 *        -1 otherwise.
4807 		 */
4808 		iter->leftover = ret;
4809 	}
4810 
4811 	return 0;
4812 }
4813 
4814 /*
4815  * Should be used after trace_array_get(), trace_types_lock
4816  * ensures that i_cdev was already initialized.
4817  */
4818 static inline int tracing_get_cpu(struct inode *inode)
4819 {
4820 	if (inode->i_cdev) /* See trace_create_cpu_file() */
4821 		return (long)inode->i_cdev - 1;
4822 	return RING_BUFFER_ALL_CPUS;
4823 }
4824 
4825 static const struct seq_operations tracer_seq_ops = {
4826 	.start		= s_start,
4827 	.next		= s_next,
4828 	.stop		= s_stop,
4829 	.show		= s_show,
4830 };
4831 
4832 static struct trace_iterator *
4833 __tracing_open(struct inode *inode, struct file *file, bool snapshot)
4834 {
4835 	struct trace_array *tr = inode->i_private;
4836 	struct trace_iterator *iter;
4837 	int cpu;
4838 
4839 	if (tracing_disabled)
4840 		return ERR_PTR(-ENODEV);
4841 
4842 	iter = __seq_open_private(file, &tracer_seq_ops, sizeof(*iter));
4843 	if (!iter)
4844 		return ERR_PTR(-ENOMEM);
4845 
4846 	iter->buffer_iter = kcalloc(nr_cpu_ids, sizeof(*iter->buffer_iter),
4847 				    GFP_KERNEL);
4848 	if (!iter->buffer_iter)
4849 		goto release;
4850 
4851 	/*
4852 	 * trace_find_next_entry() may need to save off iter->ent.
4853 	 * It will place it into the iter->temp buffer. As most
4854 	 * events are less than 128, allocate a buffer of that size.
4855 	 * If one is greater, then trace_find_next_entry() will
4856 	 * allocate a new buffer to adjust for the bigger iter->ent.
4857 	 * It's not critical if it fails to get allocated here.
4858 	 */
4859 	iter->temp = kmalloc(128, GFP_KERNEL);
4860 	if (iter->temp)
4861 		iter->temp_size = 128;
4862 
4863 	/*
4864 	 * trace_event_printf() may need to modify given format
4865 	 * string to replace %p with %px so that it shows real address
4866 	 * instead of hash value. However, that is only for the event
4867 	 * tracing, other tracer may not need. Defer the allocation
4868 	 * until it is needed.
4869 	 */
4870 	iter->fmt = NULL;
4871 	iter->fmt_size = 0;
4872 
4873 	/*
4874 	 * We make a copy of the current tracer to avoid concurrent
4875 	 * changes on it while we are reading.
4876 	 */
4877 	mutex_lock(&trace_types_lock);
4878 	iter->trace = kzalloc(sizeof(*iter->trace), GFP_KERNEL);
4879 	if (!iter->trace)
4880 		goto fail;
4881 
4882 	*iter->trace = *tr->current_trace;
4883 
4884 	if (!zalloc_cpumask_var(&iter->started, GFP_KERNEL))
4885 		goto fail;
4886 
4887 	iter->tr = tr;
4888 
4889 #ifdef CONFIG_TRACER_MAX_TRACE
4890 	/* Currently only the top directory has a snapshot */
4891 	if (tr->current_trace->print_max || snapshot)
4892 		iter->array_buffer = &tr->max_buffer;
4893 	else
4894 #endif
4895 		iter->array_buffer = &tr->array_buffer;
4896 	iter->snapshot = snapshot;
4897 	iter->pos = -1;
4898 	iter->cpu_file = tracing_get_cpu(inode);
4899 	mutex_init(&iter->mutex);
4900 
4901 	/* Notify the tracer early; before we stop tracing. */
4902 	if (iter->trace->open)
4903 		iter->trace->open(iter);
4904 
4905 	/* Annotate start of buffers if we had overruns */
4906 	if (ring_buffer_overruns(iter->array_buffer->buffer))
4907 		iter->iter_flags |= TRACE_FILE_ANNOTATE;
4908 
4909 	/* Output in nanoseconds only if we are using a clock in nanoseconds. */
4910 	if (trace_clocks[tr->clock_id].in_ns)
4911 		iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
4912 
4913 	/*
4914 	 * If pause-on-trace is enabled, then stop the trace while
4915 	 * dumping, unless this is the "snapshot" file
4916 	 */
4917 	if (!iter->snapshot && (tr->trace_flags & TRACE_ITER_PAUSE_ON_TRACE))
4918 		tracing_stop_tr(tr);
4919 
4920 	if (iter->cpu_file == RING_BUFFER_ALL_CPUS) {
4921 		for_each_tracing_cpu(cpu) {
4922 			iter->buffer_iter[cpu] =
4923 				ring_buffer_read_prepare(iter->array_buffer->buffer,
4924 							 cpu, GFP_KERNEL);
4925 		}
4926 		ring_buffer_read_prepare_sync();
4927 		for_each_tracing_cpu(cpu) {
4928 			ring_buffer_read_start(iter->buffer_iter[cpu]);
4929 			tracing_iter_reset(iter, cpu);
4930 		}
4931 	} else {
4932 		cpu = iter->cpu_file;
4933 		iter->buffer_iter[cpu] =
4934 			ring_buffer_read_prepare(iter->array_buffer->buffer,
4935 						 cpu, GFP_KERNEL);
4936 		ring_buffer_read_prepare_sync();
4937 		ring_buffer_read_start(iter->buffer_iter[cpu]);
4938 		tracing_iter_reset(iter, cpu);
4939 	}
4940 
4941 	mutex_unlock(&trace_types_lock);
4942 
4943 	return iter;
4944 
4945  fail:
4946 	mutex_unlock(&trace_types_lock);
4947 	kfree(iter->trace);
4948 	kfree(iter->temp);
4949 	kfree(iter->buffer_iter);
4950 release:
4951 	seq_release_private(inode, file);
4952 	return ERR_PTR(-ENOMEM);
4953 }
4954 
4955 int tracing_open_generic(struct inode *inode, struct file *filp)
4956 {
4957 	int ret;
4958 
4959 	ret = tracing_check_open_get_tr(NULL);
4960 	if (ret)
4961 		return ret;
4962 
4963 	filp->private_data = inode->i_private;
4964 	return 0;
4965 }
4966 
4967 bool tracing_is_disabled(void)
4968 {
4969 	return (tracing_disabled) ? true: false;
4970 }
4971 
4972 /*
4973  * Open and update trace_array ref count.
4974  * Must have the current trace_array passed to it.
4975  */
4976 int tracing_open_generic_tr(struct inode *inode, struct file *filp)
4977 {
4978 	struct trace_array *tr = inode->i_private;
4979 	int ret;
4980 
4981 	ret = tracing_check_open_get_tr(tr);
4982 	if (ret)
4983 		return ret;
4984 
4985 	filp->private_data = inode->i_private;
4986 
4987 	return 0;
4988 }
4989 
4990 static int tracing_mark_open(struct inode *inode, struct file *filp)
4991 {
4992 	stream_open(inode, filp);
4993 	return tracing_open_generic_tr(inode, filp);
4994 }
4995 
4996 static int tracing_release(struct inode *inode, struct file *file)
4997 {
4998 	struct trace_array *tr = inode->i_private;
4999 	struct seq_file *m = file->private_data;
5000 	struct trace_iterator *iter;
5001 	int cpu;
5002 
5003 	if (!(file->f_mode & FMODE_READ)) {
5004 		trace_array_put(tr);
5005 		return 0;
5006 	}
5007 
5008 	/* Writes do not use seq_file */
5009 	iter = m->private;
5010 	mutex_lock(&trace_types_lock);
5011 
5012 	for_each_tracing_cpu(cpu) {
5013 		if (iter->buffer_iter[cpu])
5014 			ring_buffer_read_finish(iter->buffer_iter[cpu]);
5015 	}
5016 
5017 	if (iter->trace && iter->trace->close)
5018 		iter->trace->close(iter);
5019 
5020 	if (!iter->snapshot && tr->stop_count)
5021 		/* reenable tracing if it was previously enabled */
5022 		tracing_start_tr(tr);
5023 
5024 	__trace_array_put(tr);
5025 
5026 	mutex_unlock(&trace_types_lock);
5027 
5028 	mutex_destroy(&iter->mutex);
5029 	free_cpumask_var(iter->started);
5030 	kfree(iter->fmt);
5031 	kfree(iter->temp);
5032 	kfree(iter->trace);
5033 	kfree(iter->buffer_iter);
5034 	seq_release_private(inode, file);
5035 
5036 	return 0;
5037 }
5038 
5039 static int tracing_release_generic_tr(struct inode *inode, struct file *file)
5040 {
5041 	struct trace_array *tr = inode->i_private;
5042 
5043 	trace_array_put(tr);
5044 	return 0;
5045 }
5046 
5047 static int tracing_single_release_tr(struct inode *inode, struct file *file)
5048 {
5049 	struct trace_array *tr = inode->i_private;
5050 
5051 	trace_array_put(tr);
5052 
5053 	return single_release(inode, file);
5054 }
5055 
5056 static int tracing_open(struct inode *inode, struct file *file)
5057 {
5058 	struct trace_array *tr = inode->i_private;
5059 	struct trace_iterator *iter;
5060 	int ret;
5061 
5062 	ret = tracing_check_open_get_tr(tr);
5063 	if (ret)
5064 		return ret;
5065 
5066 	/* If this file was open for write, then erase contents */
5067 	if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC)) {
5068 		int cpu = tracing_get_cpu(inode);
5069 		struct array_buffer *trace_buf = &tr->array_buffer;
5070 
5071 #ifdef CONFIG_TRACER_MAX_TRACE
5072 		if (tr->current_trace->print_max)
5073 			trace_buf = &tr->max_buffer;
5074 #endif
5075 
5076 		if (cpu == RING_BUFFER_ALL_CPUS)
5077 			tracing_reset_online_cpus(trace_buf);
5078 		else
5079 			tracing_reset_cpu(trace_buf, cpu);
5080 	}
5081 
5082 	if (file->f_mode & FMODE_READ) {
5083 		iter = __tracing_open(inode, file, false);
5084 		if (IS_ERR(iter))
5085 			ret = PTR_ERR(iter);
5086 		else if (tr->trace_flags & TRACE_ITER_LATENCY_FMT)
5087 			iter->iter_flags |= TRACE_FILE_LAT_FMT;
5088 	}
5089 
5090 	if (ret < 0)
5091 		trace_array_put(tr);
5092 
5093 	return ret;
5094 }
5095 
5096 /*
5097  * Some tracers are not suitable for instance buffers.
5098  * A tracer is always available for the global array (toplevel)
5099  * or if it explicitly states that it is.
5100  */
5101 static bool
5102 trace_ok_for_array(struct tracer *t, struct trace_array *tr)
5103 {
5104 	return (tr->flags & TRACE_ARRAY_FL_GLOBAL) || t->allow_instances;
5105 }
5106 
5107 /* Find the next tracer that this trace array may use */
5108 static struct tracer *
5109 get_tracer_for_array(struct trace_array *tr, struct tracer *t)
5110 {
5111 	while (t && !trace_ok_for_array(t, tr))
5112 		t = t->next;
5113 
5114 	return t;
5115 }
5116 
5117 static void *
5118 t_next(struct seq_file *m, void *v, loff_t *pos)
5119 {
5120 	struct trace_array *tr = m->private;
5121 	struct tracer *t = v;
5122 
5123 	(*pos)++;
5124 
5125 	if (t)
5126 		t = get_tracer_for_array(tr, t->next);
5127 
5128 	return t;
5129 }
5130 
5131 static void *t_start(struct seq_file *m, loff_t *pos)
5132 {
5133 	struct trace_array *tr = m->private;
5134 	struct tracer *t;
5135 	loff_t l = 0;
5136 
5137 	mutex_lock(&trace_types_lock);
5138 
5139 	t = get_tracer_for_array(tr, trace_types);
5140 	for (; t && l < *pos; t = t_next(m, t, &l))
5141 			;
5142 
5143 	return t;
5144 }
5145 
5146 static void t_stop(struct seq_file *m, void *p)
5147 {
5148 	mutex_unlock(&trace_types_lock);
5149 }
5150 
5151 static int t_show(struct seq_file *m, void *v)
5152 {
5153 	struct tracer *t = v;
5154 
5155 	if (!t)
5156 		return 0;
5157 
5158 	seq_puts(m, t->name);
5159 	if (t->next)
5160 		seq_putc(m, ' ');
5161 	else
5162 		seq_putc(m, '\n');
5163 
5164 	return 0;
5165 }
5166 
5167 static const struct seq_operations show_traces_seq_ops = {
5168 	.start		= t_start,
5169 	.next		= t_next,
5170 	.stop		= t_stop,
5171 	.show		= t_show,
5172 };
5173 
5174 static int show_traces_open(struct inode *inode, struct file *file)
5175 {
5176 	struct trace_array *tr = inode->i_private;
5177 	struct seq_file *m;
5178 	int ret;
5179 
5180 	ret = tracing_check_open_get_tr(tr);
5181 	if (ret)
5182 		return ret;
5183 
5184 	ret = seq_open(file, &show_traces_seq_ops);
5185 	if (ret) {
5186 		trace_array_put(tr);
5187 		return ret;
5188 	}
5189 
5190 	m = file->private_data;
5191 	m->private = tr;
5192 
5193 	return 0;
5194 }
5195 
5196 static int show_traces_release(struct inode *inode, struct file *file)
5197 {
5198 	struct trace_array *tr = inode->i_private;
5199 
5200 	trace_array_put(tr);
5201 	return seq_release(inode, file);
5202 }
5203 
5204 static ssize_t
5205 tracing_write_stub(struct file *filp, const char __user *ubuf,
5206 		   size_t count, loff_t *ppos)
5207 {
5208 	return count;
5209 }
5210 
5211 loff_t tracing_lseek(struct file *file, loff_t offset, int whence)
5212 {
5213 	int ret;
5214 
5215 	if (file->f_mode & FMODE_READ)
5216 		ret = seq_lseek(file, offset, whence);
5217 	else
5218 		file->f_pos = ret = 0;
5219 
5220 	return ret;
5221 }
5222 
5223 static const struct file_operations tracing_fops = {
5224 	.open		= tracing_open,
5225 	.read		= seq_read,
5226 	.read_iter	= seq_read_iter,
5227 	.splice_read	= copy_splice_read,
5228 	.write		= tracing_write_stub,
5229 	.llseek		= tracing_lseek,
5230 	.release	= tracing_release,
5231 };
5232 
5233 static const struct file_operations show_traces_fops = {
5234 	.open		= show_traces_open,
5235 	.read		= seq_read,
5236 	.llseek		= seq_lseek,
5237 	.release	= show_traces_release,
5238 };
5239 
5240 static ssize_t
5241 tracing_cpumask_read(struct file *filp, char __user *ubuf,
5242 		     size_t count, loff_t *ppos)
5243 {
5244 	struct trace_array *tr = file_inode(filp)->i_private;
5245 	char *mask_str;
5246 	int len;
5247 
5248 	len = snprintf(NULL, 0, "%*pb\n",
5249 		       cpumask_pr_args(tr->tracing_cpumask)) + 1;
5250 	mask_str = kmalloc(len, GFP_KERNEL);
5251 	if (!mask_str)
5252 		return -ENOMEM;
5253 
5254 	len = snprintf(mask_str, len, "%*pb\n",
5255 		       cpumask_pr_args(tr->tracing_cpumask));
5256 	if (len >= count) {
5257 		count = -EINVAL;
5258 		goto out_err;
5259 	}
5260 	count = simple_read_from_buffer(ubuf, count, ppos, mask_str, len);
5261 
5262 out_err:
5263 	kfree(mask_str);
5264 
5265 	return count;
5266 }
5267 
5268 int tracing_set_cpumask(struct trace_array *tr,
5269 			cpumask_var_t tracing_cpumask_new)
5270 {
5271 	int cpu;
5272 
5273 	if (!tr)
5274 		return -EINVAL;
5275 
5276 	local_irq_disable();
5277 	arch_spin_lock(&tr->max_lock);
5278 	for_each_tracing_cpu(cpu) {
5279 		/*
5280 		 * Increase/decrease the disabled counter if we are
5281 		 * about to flip a bit in the cpumask:
5282 		 */
5283 		if (cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
5284 				!cpumask_test_cpu(cpu, tracing_cpumask_new)) {
5285 			atomic_inc(&per_cpu_ptr(tr->array_buffer.data, cpu)->disabled);
5286 			ring_buffer_record_disable_cpu(tr->array_buffer.buffer, cpu);
5287 #ifdef CONFIG_TRACER_MAX_TRACE
5288 			ring_buffer_record_disable_cpu(tr->max_buffer.buffer, cpu);
5289 #endif
5290 		}
5291 		if (!cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
5292 				cpumask_test_cpu(cpu, tracing_cpumask_new)) {
5293 			atomic_dec(&per_cpu_ptr(tr->array_buffer.data, cpu)->disabled);
5294 			ring_buffer_record_enable_cpu(tr->array_buffer.buffer, cpu);
5295 #ifdef CONFIG_TRACER_MAX_TRACE
5296 			ring_buffer_record_enable_cpu(tr->max_buffer.buffer, cpu);
5297 #endif
5298 		}
5299 	}
5300 	arch_spin_unlock(&tr->max_lock);
5301 	local_irq_enable();
5302 
5303 	cpumask_copy(tr->tracing_cpumask, tracing_cpumask_new);
5304 
5305 	return 0;
5306 }
5307 
5308 static ssize_t
5309 tracing_cpumask_write(struct file *filp, const char __user *ubuf,
5310 		      size_t count, loff_t *ppos)
5311 {
5312 	struct trace_array *tr = file_inode(filp)->i_private;
5313 	cpumask_var_t tracing_cpumask_new;
5314 	int err;
5315 
5316 	if (!zalloc_cpumask_var(&tracing_cpumask_new, GFP_KERNEL))
5317 		return -ENOMEM;
5318 
5319 	err = cpumask_parse_user(ubuf, count, tracing_cpumask_new);
5320 	if (err)
5321 		goto err_free;
5322 
5323 	err = tracing_set_cpumask(tr, tracing_cpumask_new);
5324 	if (err)
5325 		goto err_free;
5326 
5327 	free_cpumask_var(tracing_cpumask_new);
5328 
5329 	return count;
5330 
5331 err_free:
5332 	free_cpumask_var(tracing_cpumask_new);
5333 
5334 	return err;
5335 }
5336 
5337 static const struct file_operations tracing_cpumask_fops = {
5338 	.open		= tracing_open_generic_tr,
5339 	.read		= tracing_cpumask_read,
5340 	.write		= tracing_cpumask_write,
5341 	.release	= tracing_release_generic_tr,
5342 	.llseek		= generic_file_llseek,
5343 };
5344 
5345 static int tracing_trace_options_show(struct seq_file *m, void *v)
5346 {
5347 	struct tracer_opt *trace_opts;
5348 	struct trace_array *tr = m->private;
5349 	u32 tracer_flags;
5350 	int i;
5351 
5352 	mutex_lock(&trace_types_lock);
5353 	tracer_flags = tr->current_trace->flags->val;
5354 	trace_opts = tr->current_trace->flags->opts;
5355 
5356 	for (i = 0; trace_options[i]; i++) {
5357 		if (tr->trace_flags & (1 << i))
5358 			seq_printf(m, "%s\n", trace_options[i]);
5359 		else
5360 			seq_printf(m, "no%s\n", trace_options[i]);
5361 	}
5362 
5363 	for (i = 0; trace_opts[i].name; i++) {
5364 		if (tracer_flags & trace_opts[i].bit)
5365 			seq_printf(m, "%s\n", trace_opts[i].name);
5366 		else
5367 			seq_printf(m, "no%s\n", trace_opts[i].name);
5368 	}
5369 	mutex_unlock(&trace_types_lock);
5370 
5371 	return 0;
5372 }
5373 
5374 static int __set_tracer_option(struct trace_array *tr,
5375 			       struct tracer_flags *tracer_flags,
5376 			       struct tracer_opt *opts, int neg)
5377 {
5378 	struct tracer *trace = tracer_flags->trace;
5379 	int ret;
5380 
5381 	ret = trace->set_flag(tr, tracer_flags->val, opts->bit, !neg);
5382 	if (ret)
5383 		return ret;
5384 
5385 	if (neg)
5386 		tracer_flags->val &= ~opts->bit;
5387 	else
5388 		tracer_flags->val |= opts->bit;
5389 	return 0;
5390 }
5391 
5392 /* Try to assign a tracer specific option */
5393 static int set_tracer_option(struct trace_array *tr, char *cmp, int neg)
5394 {
5395 	struct tracer *trace = tr->current_trace;
5396 	struct tracer_flags *tracer_flags = trace->flags;
5397 	struct tracer_opt *opts = NULL;
5398 	int i;
5399 
5400 	for (i = 0; tracer_flags->opts[i].name; i++) {
5401 		opts = &tracer_flags->opts[i];
5402 
5403 		if (strcmp(cmp, opts->name) == 0)
5404 			return __set_tracer_option(tr, trace->flags, opts, neg);
5405 	}
5406 
5407 	return -EINVAL;
5408 }
5409 
5410 /* Some tracers require overwrite to stay enabled */
5411 int trace_keep_overwrite(struct tracer *tracer, u32 mask, int set)
5412 {
5413 	if (tracer->enabled && (mask & TRACE_ITER_OVERWRITE) && !set)
5414 		return -1;
5415 
5416 	return 0;
5417 }
5418 
5419 int set_tracer_flag(struct trace_array *tr, unsigned int mask, int enabled)
5420 {
5421 	int *map;
5422 
5423 	if ((mask == TRACE_ITER_RECORD_TGID) ||
5424 	    (mask == TRACE_ITER_RECORD_CMD))
5425 		lockdep_assert_held(&event_mutex);
5426 
5427 	/* do nothing if flag is already set */
5428 	if (!!(tr->trace_flags & mask) == !!enabled)
5429 		return 0;
5430 
5431 	/* Give the tracer a chance to approve the change */
5432 	if (tr->current_trace->flag_changed)
5433 		if (tr->current_trace->flag_changed(tr, mask, !!enabled))
5434 			return -EINVAL;
5435 
5436 	if (enabled)
5437 		tr->trace_flags |= mask;
5438 	else
5439 		tr->trace_flags &= ~mask;
5440 
5441 	if (mask == TRACE_ITER_RECORD_CMD)
5442 		trace_event_enable_cmd_record(enabled);
5443 
5444 	if (mask == TRACE_ITER_RECORD_TGID) {
5445 		if (!tgid_map) {
5446 			tgid_map_max = pid_max;
5447 			map = kvcalloc(tgid_map_max + 1, sizeof(*tgid_map),
5448 				       GFP_KERNEL);
5449 
5450 			/*
5451 			 * Pairs with smp_load_acquire() in
5452 			 * trace_find_tgid_ptr() to ensure that if it observes
5453 			 * the tgid_map we just allocated then it also observes
5454 			 * the corresponding tgid_map_max value.
5455 			 */
5456 			smp_store_release(&tgid_map, map);
5457 		}
5458 		if (!tgid_map) {
5459 			tr->trace_flags &= ~TRACE_ITER_RECORD_TGID;
5460 			return -ENOMEM;
5461 		}
5462 
5463 		trace_event_enable_tgid_record(enabled);
5464 	}
5465 
5466 	if (mask == TRACE_ITER_EVENT_FORK)
5467 		trace_event_follow_fork(tr, enabled);
5468 
5469 	if (mask == TRACE_ITER_FUNC_FORK)
5470 		ftrace_pid_follow_fork(tr, enabled);
5471 
5472 	if (mask == TRACE_ITER_OVERWRITE) {
5473 		ring_buffer_change_overwrite(tr->array_buffer.buffer, enabled);
5474 #ifdef CONFIG_TRACER_MAX_TRACE
5475 		ring_buffer_change_overwrite(tr->max_buffer.buffer, enabled);
5476 #endif
5477 	}
5478 
5479 	if (mask == TRACE_ITER_PRINTK) {
5480 		trace_printk_start_stop_comm(enabled);
5481 		trace_printk_control(enabled);
5482 	}
5483 
5484 	return 0;
5485 }
5486 
5487 int trace_set_options(struct trace_array *tr, char *option)
5488 {
5489 	char *cmp;
5490 	int neg = 0;
5491 	int ret;
5492 	size_t orig_len = strlen(option);
5493 	int len;
5494 
5495 	cmp = strstrip(option);
5496 
5497 	len = str_has_prefix(cmp, "no");
5498 	if (len)
5499 		neg = 1;
5500 
5501 	cmp += len;
5502 
5503 	mutex_lock(&event_mutex);
5504 	mutex_lock(&trace_types_lock);
5505 
5506 	ret = match_string(trace_options, -1, cmp);
5507 	/* If no option could be set, test the specific tracer options */
5508 	if (ret < 0)
5509 		ret = set_tracer_option(tr, cmp, neg);
5510 	else
5511 		ret = set_tracer_flag(tr, 1 << ret, !neg);
5512 
5513 	mutex_unlock(&trace_types_lock);
5514 	mutex_unlock(&event_mutex);
5515 
5516 	/*
5517 	 * If the first trailing whitespace is replaced with '\0' by strstrip,
5518 	 * turn it back into a space.
5519 	 */
5520 	if (orig_len > strlen(option))
5521 		option[strlen(option)] = ' ';
5522 
5523 	return ret;
5524 }
5525 
5526 static void __init apply_trace_boot_options(void)
5527 {
5528 	char *buf = trace_boot_options_buf;
5529 	char *option;
5530 
5531 	while (true) {
5532 		option = strsep(&buf, ",");
5533 
5534 		if (!option)
5535 			break;
5536 
5537 		if (*option)
5538 			trace_set_options(&global_trace, option);
5539 
5540 		/* Put back the comma to allow this to be called again */
5541 		if (buf)
5542 			*(buf - 1) = ',';
5543 	}
5544 }
5545 
5546 static ssize_t
5547 tracing_trace_options_write(struct file *filp, const char __user *ubuf,
5548 			size_t cnt, loff_t *ppos)
5549 {
5550 	struct seq_file *m = filp->private_data;
5551 	struct trace_array *tr = m->private;
5552 	char buf[64];
5553 	int ret;
5554 
5555 	if (cnt >= sizeof(buf))
5556 		return -EINVAL;
5557 
5558 	if (copy_from_user(buf, ubuf, cnt))
5559 		return -EFAULT;
5560 
5561 	buf[cnt] = 0;
5562 
5563 	ret = trace_set_options(tr, buf);
5564 	if (ret < 0)
5565 		return ret;
5566 
5567 	*ppos += cnt;
5568 
5569 	return cnt;
5570 }
5571 
5572 static int tracing_trace_options_open(struct inode *inode, struct file *file)
5573 {
5574 	struct trace_array *tr = inode->i_private;
5575 	int ret;
5576 
5577 	ret = tracing_check_open_get_tr(tr);
5578 	if (ret)
5579 		return ret;
5580 
5581 	ret = single_open(file, tracing_trace_options_show, inode->i_private);
5582 	if (ret < 0)
5583 		trace_array_put(tr);
5584 
5585 	return ret;
5586 }
5587 
5588 static const struct file_operations tracing_iter_fops = {
5589 	.open		= tracing_trace_options_open,
5590 	.read		= seq_read,
5591 	.llseek		= seq_lseek,
5592 	.release	= tracing_single_release_tr,
5593 	.write		= tracing_trace_options_write,
5594 };
5595 
5596 static const char readme_msg[] =
5597 	"tracing mini-HOWTO:\n\n"
5598 	"# echo 0 > tracing_on : quick way to disable tracing\n"
5599 	"# echo 1 > tracing_on : quick way to re-enable tracing\n\n"
5600 	" Important files:\n"
5601 	"  trace\t\t\t- The static contents of the buffer\n"
5602 	"\t\t\t  To clear the buffer write into this file: echo > trace\n"
5603 	"  trace_pipe\t\t- A consuming read to see the contents of the buffer\n"
5604 	"  current_tracer\t- function and latency tracers\n"
5605 	"  available_tracers\t- list of configured tracers for current_tracer\n"
5606 	"  error_log\t- error log for failed commands (that support it)\n"
5607 	"  buffer_size_kb\t- view and modify size of per cpu buffer\n"
5608 	"  buffer_total_size_kb  - view total size of all cpu buffers\n\n"
5609 	"  trace_clock\t\t- change the clock used to order events\n"
5610 	"       local:   Per cpu clock but may not be synced across CPUs\n"
5611 	"      global:   Synced across CPUs but slows tracing down.\n"
5612 	"     counter:   Not a clock, but just an increment\n"
5613 	"      uptime:   Jiffy counter from time of boot\n"
5614 	"        perf:   Same clock that perf events use\n"
5615 #ifdef CONFIG_X86_64
5616 	"     x86-tsc:   TSC cycle counter\n"
5617 #endif
5618 	"\n  timestamp_mode\t- view the mode used to timestamp events\n"
5619 	"       delta:   Delta difference against a buffer-wide timestamp\n"
5620 	"    absolute:   Absolute (standalone) timestamp\n"
5621 	"\n  trace_marker\t\t- Writes into this file writes into the kernel buffer\n"
5622 	"\n  trace_marker_raw\t\t- Writes into this file writes binary data into the kernel buffer\n"
5623 	"  tracing_cpumask\t- Limit which CPUs to trace\n"
5624 	"  instances\t\t- Make sub-buffers with: mkdir instances/foo\n"
5625 	"\t\t\t  Remove sub-buffer with rmdir\n"
5626 	"  trace_options\t\t- Set format or modify how tracing happens\n"
5627 	"\t\t\t  Disable an option by prefixing 'no' to the\n"
5628 	"\t\t\t  option name\n"
5629 	"  saved_cmdlines_size\t- echo command number in here to store comm-pid list\n"
5630 #ifdef CONFIG_DYNAMIC_FTRACE
5631 	"\n  available_filter_functions - list of functions that can be filtered on\n"
5632 	"  set_ftrace_filter\t- echo function name in here to only trace these\n"
5633 	"\t\t\t  functions\n"
5634 	"\t     accepts: func_full_name or glob-matching-pattern\n"
5635 	"\t     modules: Can select a group via module\n"
5636 	"\t      Format: :mod:<module-name>\n"
5637 	"\t     example: echo :mod:ext3 > set_ftrace_filter\n"
5638 	"\t    triggers: a command to perform when function is hit\n"
5639 	"\t      Format: <function>:<trigger>[:count]\n"
5640 	"\t     trigger: traceon, traceoff\n"
5641 	"\t\t      enable_event:<system>:<event>\n"
5642 	"\t\t      disable_event:<system>:<event>\n"
5643 #ifdef CONFIG_STACKTRACE
5644 	"\t\t      stacktrace\n"
5645 #endif
5646 #ifdef CONFIG_TRACER_SNAPSHOT
5647 	"\t\t      snapshot\n"
5648 #endif
5649 	"\t\t      dump\n"
5650 	"\t\t      cpudump\n"
5651 	"\t     example: echo do_fault:traceoff > set_ftrace_filter\n"
5652 	"\t              echo do_trap:traceoff:3 > set_ftrace_filter\n"
5653 	"\t     The first one will disable tracing every time do_fault is hit\n"
5654 	"\t     The second will disable tracing at most 3 times when do_trap is hit\n"
5655 	"\t       The first time do trap is hit and it disables tracing, the\n"
5656 	"\t       counter will decrement to 2. If tracing is already disabled,\n"
5657 	"\t       the counter will not decrement. It only decrements when the\n"
5658 	"\t       trigger did work\n"
5659 	"\t     To remove trigger without count:\n"
5660 	"\t       echo '!<function>:<trigger> > set_ftrace_filter\n"
5661 	"\t     To remove trigger with a count:\n"
5662 	"\t       echo '!<function>:<trigger>:0 > set_ftrace_filter\n"
5663 	"  set_ftrace_notrace\t- echo function name in here to never trace.\n"
5664 	"\t    accepts: func_full_name, *func_end, func_begin*, *func_middle*\n"
5665 	"\t    modules: Can select a group via module command :mod:\n"
5666 	"\t    Does not accept triggers\n"
5667 #endif /* CONFIG_DYNAMIC_FTRACE */
5668 #ifdef CONFIG_FUNCTION_TRACER
5669 	"  set_ftrace_pid\t- Write pid(s) to only function trace those pids\n"
5670 	"\t\t    (function)\n"
5671 	"  set_ftrace_notrace_pid\t- Write pid(s) to not function trace those pids\n"
5672 	"\t\t    (function)\n"
5673 #endif
5674 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
5675 	"  set_graph_function\t- Trace the nested calls of a function (function_graph)\n"
5676 	"  set_graph_notrace\t- Do not trace the nested calls of a function (function_graph)\n"
5677 	"  max_graph_depth\t- Trace a limited depth of nested calls (0 is unlimited)\n"
5678 #endif
5679 #ifdef CONFIG_TRACER_SNAPSHOT
5680 	"\n  snapshot\t\t- Like 'trace' but shows the content of the static\n"
5681 	"\t\t\t  snapshot buffer. Read the contents for more\n"
5682 	"\t\t\t  information\n"
5683 #endif
5684 #ifdef CONFIG_STACK_TRACER
5685 	"  stack_trace\t\t- Shows the max stack trace when active\n"
5686 	"  stack_max_size\t- Shows current max stack size that was traced\n"
5687 	"\t\t\t  Write into this file to reset the max size (trigger a\n"
5688 	"\t\t\t  new trace)\n"
5689 #ifdef CONFIG_DYNAMIC_FTRACE
5690 	"  stack_trace_filter\t- Like set_ftrace_filter but limits what stack_trace\n"
5691 	"\t\t\t  traces\n"
5692 #endif
5693 #endif /* CONFIG_STACK_TRACER */
5694 #ifdef CONFIG_DYNAMIC_EVENTS
5695 	"  dynamic_events\t\t- Create/append/remove/show the generic dynamic events\n"
5696 	"\t\t\t  Write into this file to define/undefine new trace events.\n"
5697 #endif
5698 #ifdef CONFIG_KPROBE_EVENTS
5699 	"  kprobe_events\t\t- Create/append/remove/show the kernel dynamic events\n"
5700 	"\t\t\t  Write into this file to define/undefine new trace events.\n"
5701 #endif
5702 #ifdef CONFIG_UPROBE_EVENTS
5703 	"  uprobe_events\t\t- Create/append/remove/show the userspace dynamic events\n"
5704 	"\t\t\t  Write into this file to define/undefine new trace events.\n"
5705 #endif
5706 #if defined(CONFIG_KPROBE_EVENTS) || defined(CONFIG_UPROBE_EVENTS) || \
5707     defined(CONFIG_FPROBE_EVENTS)
5708 	"\t  accepts: event-definitions (one definition per line)\n"
5709 #if defined(CONFIG_KPROBE_EVENTS) || defined(CONFIG_UPROBE_EVENTS)
5710 	"\t   Format: p[:[<group>/][<event>]] <place> [<args>]\n"
5711 	"\t           r[maxactive][:[<group>/][<event>]] <place> [<args>]\n"
5712 #endif
5713 #ifdef CONFIG_FPROBE_EVENTS
5714 	"\t           f[:[<group>/][<event>]] <func-name>[%return] [<args>]\n"
5715 	"\t           t[:[<group>/][<event>]] <tracepoint> [<args>]\n"
5716 #endif
5717 #ifdef CONFIG_HIST_TRIGGERS
5718 	"\t           s:[synthetic/]<event> <field> [<field>]\n"
5719 #endif
5720 	"\t           e[:[<group>/][<event>]] <attached-group>.<attached-event> [<args>] [if <filter>]\n"
5721 	"\t           -:[<group>/][<event>]\n"
5722 #ifdef CONFIG_KPROBE_EVENTS
5723 	"\t    place: [<module>:]<symbol>[+<offset>]|<memaddr>\n"
5724   "place (kretprobe): [<module>:]<symbol>[+<offset>]%return|<memaddr>\n"
5725 #endif
5726 #ifdef CONFIG_UPROBE_EVENTS
5727   "   place (uprobe): <path>:<offset>[%return][(ref_ctr_offset)]\n"
5728 #endif
5729 	"\t     args: <name>=fetcharg[:type]\n"
5730 	"\t fetcharg: (%<register>|$<efield>), @<address>, @<symbol>[+|-<offset>],\n"
5731 #ifdef CONFIG_HAVE_FUNCTION_ARG_ACCESS_API
5732 #ifdef CONFIG_PROBE_EVENTS_BTF_ARGS
5733 	"\t           $stack<index>, $stack, $retval, $comm, $arg<N>, <argname>\n"
5734 #else
5735 	"\t           $stack<index>, $stack, $retval, $comm, $arg<N>,\n"
5736 #endif
5737 #else
5738 	"\t           $stack<index>, $stack, $retval, $comm,\n"
5739 #endif
5740 	"\t           +|-[u]<offset>(<fetcharg>), \\imm-value, \\\"imm-string\"\n"
5741 	"\t     type: s8/16/32/64, u8/16/32/64, x8/16/32/64, char, string, symbol,\n"
5742 	"\t           b<bit-width>@<bit-offset>/<container-size>, ustring,\n"
5743 	"\t           symstr, <type>\\[<array-size>\\]\n"
5744 #ifdef CONFIG_HIST_TRIGGERS
5745 	"\t    field: <stype> <name>;\n"
5746 	"\t    stype: u8/u16/u32/u64, s8/s16/s32/s64, pid_t,\n"
5747 	"\t           [unsigned] char/int/long\n"
5748 #endif
5749 	"\t    efield: For event probes ('e' types), the field is on of the fields\n"
5750 	"\t            of the <attached-group>/<attached-event>.\n"
5751 #endif
5752 	"  events/\t\t- Directory containing all trace event subsystems:\n"
5753 	"      enable\t\t- Write 0/1 to enable/disable tracing of all events\n"
5754 	"  events/<system>/\t- Directory containing all trace events for <system>:\n"
5755 	"      enable\t\t- Write 0/1 to enable/disable tracing of all <system>\n"
5756 	"\t\t\t  events\n"
5757 	"      filter\t\t- If set, only events passing filter are traced\n"
5758 	"  events/<system>/<event>/\t- Directory containing control files for\n"
5759 	"\t\t\t  <event>:\n"
5760 	"      enable\t\t- Write 0/1 to enable/disable tracing of <event>\n"
5761 	"      filter\t\t- If set, only events passing filter are traced\n"
5762 	"      trigger\t\t- If set, a command to perform when event is hit\n"
5763 	"\t    Format: <trigger>[:count][if <filter>]\n"
5764 	"\t   trigger: traceon, traceoff\n"
5765 	"\t            enable_event:<system>:<event>\n"
5766 	"\t            disable_event:<system>:<event>\n"
5767 #ifdef CONFIG_HIST_TRIGGERS
5768 	"\t            enable_hist:<system>:<event>\n"
5769 	"\t            disable_hist:<system>:<event>\n"
5770 #endif
5771 #ifdef CONFIG_STACKTRACE
5772 	"\t\t    stacktrace\n"
5773 #endif
5774 #ifdef CONFIG_TRACER_SNAPSHOT
5775 	"\t\t    snapshot\n"
5776 #endif
5777 #ifdef CONFIG_HIST_TRIGGERS
5778 	"\t\t    hist (see below)\n"
5779 #endif
5780 	"\t   example: echo traceoff > events/block/block_unplug/trigger\n"
5781 	"\t            echo traceoff:3 > events/block/block_unplug/trigger\n"
5782 	"\t            echo 'enable_event:kmem:kmalloc:3 if nr_rq > 1' > \\\n"
5783 	"\t                  events/block/block_unplug/trigger\n"
5784 	"\t   The first disables tracing every time block_unplug is hit.\n"
5785 	"\t   The second disables tracing the first 3 times block_unplug is hit.\n"
5786 	"\t   The third enables the kmalloc event the first 3 times block_unplug\n"
5787 	"\t     is hit and has value of greater than 1 for the 'nr_rq' event field.\n"
5788 	"\t   Like function triggers, the counter is only decremented if it\n"
5789 	"\t    enabled or disabled tracing.\n"
5790 	"\t   To remove a trigger without a count:\n"
5791 	"\t     echo '!<trigger> > <system>/<event>/trigger\n"
5792 	"\t   To remove a trigger with a count:\n"
5793 	"\t     echo '!<trigger>:0 > <system>/<event>/trigger\n"
5794 	"\t   Filters can be ignored when removing a trigger.\n"
5795 #ifdef CONFIG_HIST_TRIGGERS
5796 	"      hist trigger\t- If set, event hits are aggregated into a hash table\n"
5797 	"\t    Format: hist:keys=<field1[,field2,...]>\n"
5798 	"\t            [:<var1>=<field|var_ref|numeric_literal>[,<var2>=...]]\n"
5799 	"\t            [:values=<field1[,field2,...]>]\n"
5800 	"\t            [:sort=<field1[,field2,...]>]\n"
5801 	"\t            [:size=#entries]\n"
5802 	"\t            [:pause][:continue][:clear]\n"
5803 	"\t            [:name=histname1]\n"
5804 	"\t            [:nohitcount]\n"
5805 	"\t            [:<handler>.<action>]\n"
5806 	"\t            [if <filter>]\n\n"
5807 	"\t    Note, special fields can be used as well:\n"
5808 	"\t            common_timestamp - to record current timestamp\n"
5809 	"\t            common_cpu - to record the CPU the event happened on\n"
5810 	"\n"
5811 	"\t    A hist trigger variable can be:\n"
5812 	"\t        - a reference to a field e.g. x=current_timestamp,\n"
5813 	"\t        - a reference to another variable e.g. y=$x,\n"
5814 	"\t        - a numeric literal: e.g. ms_per_sec=1000,\n"
5815 	"\t        - an arithmetic expression: e.g. time_secs=current_timestamp/1000\n"
5816 	"\n"
5817 	"\t    hist trigger arithmetic expressions support addition(+), subtraction(-),\n"
5818 	"\t    multiplication(*) and division(/) operators. An operand can be either a\n"
5819 	"\t    variable reference, field or numeric literal.\n"
5820 	"\n"
5821 	"\t    When a matching event is hit, an entry is added to a hash\n"
5822 	"\t    table using the key(s) and value(s) named, and the value of a\n"
5823 	"\t    sum called 'hitcount' is incremented.  Keys and values\n"
5824 	"\t    correspond to fields in the event's format description.  Keys\n"
5825 	"\t    can be any field, or the special string 'common_stacktrace'.\n"
5826 	"\t    Compound keys consisting of up to two fields can be specified\n"
5827 	"\t    by the 'keys' keyword.  Values must correspond to numeric\n"
5828 	"\t    fields.  Sort keys consisting of up to two fields can be\n"
5829 	"\t    specified using the 'sort' keyword.  The sort direction can\n"
5830 	"\t    be modified by appending '.descending' or '.ascending' to a\n"
5831 	"\t    sort field.  The 'size' parameter can be used to specify more\n"
5832 	"\t    or fewer than the default 2048 entries for the hashtable size.\n"
5833 	"\t    If a hist trigger is given a name using the 'name' parameter,\n"
5834 	"\t    its histogram data will be shared with other triggers of the\n"
5835 	"\t    same name, and trigger hits will update this common data.\n\n"
5836 	"\t    Reading the 'hist' file for the event will dump the hash\n"
5837 	"\t    table in its entirety to stdout.  If there are multiple hist\n"
5838 	"\t    triggers attached to an event, there will be a table for each\n"
5839 	"\t    trigger in the output.  The table displayed for a named\n"
5840 	"\t    trigger will be the same as any other instance having the\n"
5841 	"\t    same name.  The default format used to display a given field\n"
5842 	"\t    can be modified by appending any of the following modifiers\n"
5843 	"\t    to the field name, as applicable:\n\n"
5844 	"\t            .hex        display a number as a hex value\n"
5845 	"\t            .sym        display an address as a symbol\n"
5846 	"\t            .sym-offset display an address as a symbol and offset\n"
5847 	"\t            .execname   display a common_pid as a program name\n"
5848 	"\t            .syscall    display a syscall id as a syscall name\n"
5849 	"\t            .log2       display log2 value rather than raw number\n"
5850 	"\t            .buckets=size  display values in groups of size rather than raw number\n"
5851 	"\t            .usecs      display a common_timestamp in microseconds\n"
5852 	"\t            .percent    display a number of percentage value\n"
5853 	"\t            .graph      display a bar-graph of a value\n\n"
5854 	"\t    The 'pause' parameter can be used to pause an existing hist\n"
5855 	"\t    trigger or to start a hist trigger but not log any events\n"
5856 	"\t    until told to do so.  'continue' can be used to start or\n"
5857 	"\t    restart a paused hist trigger.\n\n"
5858 	"\t    The 'clear' parameter will clear the contents of a running\n"
5859 	"\t    hist trigger and leave its current paused/active state\n"
5860 	"\t    unchanged.\n\n"
5861 	"\t    The 'nohitcount' (or NOHC) parameter will suppress display of\n"
5862 	"\t    raw hitcount in the histogram.\n\n"
5863 	"\t    The enable_hist and disable_hist triggers can be used to\n"
5864 	"\t    have one event conditionally start and stop another event's\n"
5865 	"\t    already-attached hist trigger.  The syntax is analogous to\n"
5866 	"\t    the enable_event and disable_event triggers.\n\n"
5867 	"\t    Hist trigger handlers and actions are executed whenever a\n"
5868 	"\t    a histogram entry is added or updated.  They take the form:\n\n"
5869 	"\t        <handler>.<action>\n\n"
5870 	"\t    The available handlers are:\n\n"
5871 	"\t        onmatch(matching.event)  - invoke on addition or update\n"
5872 	"\t        onmax(var)               - invoke if var exceeds current max\n"
5873 	"\t        onchange(var)            - invoke action if var changes\n\n"
5874 	"\t    The available actions are:\n\n"
5875 	"\t        trace(<synthetic_event>,param list)  - generate synthetic event\n"
5876 	"\t        save(field,...)                      - save current event fields\n"
5877 #ifdef CONFIG_TRACER_SNAPSHOT
5878 	"\t        snapshot()                           - snapshot the trace buffer\n\n"
5879 #endif
5880 #ifdef CONFIG_SYNTH_EVENTS
5881 	"  events/synthetic_events\t- Create/append/remove/show synthetic events\n"
5882 	"\t  Write into this file to define/undefine new synthetic events.\n"
5883 	"\t     example: echo 'myevent u64 lat; char name[]; long[] stack' >> synthetic_events\n"
5884 #endif
5885 #endif
5886 ;
5887 
5888 static ssize_t
5889 tracing_readme_read(struct file *filp, char __user *ubuf,
5890 		       size_t cnt, loff_t *ppos)
5891 {
5892 	return simple_read_from_buffer(ubuf, cnt, ppos,
5893 					readme_msg, strlen(readme_msg));
5894 }
5895 
5896 static const struct file_operations tracing_readme_fops = {
5897 	.open		= tracing_open_generic,
5898 	.read		= tracing_readme_read,
5899 	.llseek		= generic_file_llseek,
5900 };
5901 
5902 static void *saved_tgids_next(struct seq_file *m, void *v, loff_t *pos)
5903 {
5904 	int pid = ++(*pos);
5905 
5906 	return trace_find_tgid_ptr(pid);
5907 }
5908 
5909 static void *saved_tgids_start(struct seq_file *m, loff_t *pos)
5910 {
5911 	int pid = *pos;
5912 
5913 	return trace_find_tgid_ptr(pid);
5914 }
5915 
5916 static void saved_tgids_stop(struct seq_file *m, void *v)
5917 {
5918 }
5919 
5920 static int saved_tgids_show(struct seq_file *m, void *v)
5921 {
5922 	int *entry = (int *)v;
5923 	int pid = entry - tgid_map;
5924 	int tgid = *entry;
5925 
5926 	if (tgid == 0)
5927 		return SEQ_SKIP;
5928 
5929 	seq_printf(m, "%d %d\n", pid, tgid);
5930 	return 0;
5931 }
5932 
5933 static const struct seq_operations tracing_saved_tgids_seq_ops = {
5934 	.start		= saved_tgids_start,
5935 	.stop		= saved_tgids_stop,
5936 	.next		= saved_tgids_next,
5937 	.show		= saved_tgids_show,
5938 };
5939 
5940 static int tracing_saved_tgids_open(struct inode *inode, struct file *filp)
5941 {
5942 	int ret;
5943 
5944 	ret = tracing_check_open_get_tr(NULL);
5945 	if (ret)
5946 		return ret;
5947 
5948 	return seq_open(filp, &tracing_saved_tgids_seq_ops);
5949 }
5950 
5951 
5952 static const struct file_operations tracing_saved_tgids_fops = {
5953 	.open		= tracing_saved_tgids_open,
5954 	.read		= seq_read,
5955 	.llseek		= seq_lseek,
5956 	.release	= seq_release,
5957 };
5958 
5959 static void *saved_cmdlines_next(struct seq_file *m, void *v, loff_t *pos)
5960 {
5961 	unsigned int *ptr = v;
5962 
5963 	if (*pos || m->count)
5964 		ptr++;
5965 
5966 	(*pos)++;
5967 
5968 	for (; ptr < &savedcmd->map_cmdline_to_pid[savedcmd->cmdline_num];
5969 	     ptr++) {
5970 		if (*ptr == -1 || *ptr == NO_CMDLINE_MAP)
5971 			continue;
5972 
5973 		return ptr;
5974 	}
5975 
5976 	return NULL;
5977 }
5978 
5979 static void *saved_cmdlines_start(struct seq_file *m, loff_t *pos)
5980 {
5981 	void *v;
5982 	loff_t l = 0;
5983 
5984 	preempt_disable();
5985 	arch_spin_lock(&trace_cmdline_lock);
5986 
5987 	v = &savedcmd->map_cmdline_to_pid[0];
5988 	while (l <= *pos) {
5989 		v = saved_cmdlines_next(m, v, &l);
5990 		if (!v)
5991 			return NULL;
5992 	}
5993 
5994 	return v;
5995 }
5996 
5997 static void saved_cmdlines_stop(struct seq_file *m, void *v)
5998 {
5999 	arch_spin_unlock(&trace_cmdline_lock);
6000 	preempt_enable();
6001 }
6002 
6003 static int saved_cmdlines_show(struct seq_file *m, void *v)
6004 {
6005 	char buf[TASK_COMM_LEN];
6006 	unsigned int *pid = v;
6007 
6008 	__trace_find_cmdline(*pid, buf);
6009 	seq_printf(m, "%d %s\n", *pid, buf);
6010 	return 0;
6011 }
6012 
6013 static const struct seq_operations tracing_saved_cmdlines_seq_ops = {
6014 	.start		= saved_cmdlines_start,
6015 	.next		= saved_cmdlines_next,
6016 	.stop		= saved_cmdlines_stop,
6017 	.show		= saved_cmdlines_show,
6018 };
6019 
6020 static int tracing_saved_cmdlines_open(struct inode *inode, struct file *filp)
6021 {
6022 	int ret;
6023 
6024 	ret = tracing_check_open_get_tr(NULL);
6025 	if (ret)
6026 		return ret;
6027 
6028 	return seq_open(filp, &tracing_saved_cmdlines_seq_ops);
6029 }
6030 
6031 static const struct file_operations tracing_saved_cmdlines_fops = {
6032 	.open		= tracing_saved_cmdlines_open,
6033 	.read		= seq_read,
6034 	.llseek		= seq_lseek,
6035 	.release	= seq_release,
6036 };
6037 
6038 static ssize_t
6039 tracing_saved_cmdlines_size_read(struct file *filp, char __user *ubuf,
6040 				 size_t cnt, loff_t *ppos)
6041 {
6042 	char buf[64];
6043 	int r;
6044 
6045 	preempt_disable();
6046 	arch_spin_lock(&trace_cmdline_lock);
6047 	r = scnprintf(buf, sizeof(buf), "%u\n", savedcmd->cmdline_num);
6048 	arch_spin_unlock(&trace_cmdline_lock);
6049 	preempt_enable();
6050 
6051 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6052 }
6053 
6054 static void free_saved_cmdlines_buffer(struct saved_cmdlines_buffer *s)
6055 {
6056 	kfree(s->saved_cmdlines);
6057 	kfree(s->map_cmdline_to_pid);
6058 	kfree(s);
6059 }
6060 
6061 static int tracing_resize_saved_cmdlines(unsigned int val)
6062 {
6063 	struct saved_cmdlines_buffer *s, *savedcmd_temp;
6064 
6065 	s = kmalloc(sizeof(*s), GFP_KERNEL);
6066 	if (!s)
6067 		return -ENOMEM;
6068 
6069 	if (allocate_cmdlines_buffer(val, s) < 0) {
6070 		kfree(s);
6071 		return -ENOMEM;
6072 	}
6073 
6074 	preempt_disable();
6075 	arch_spin_lock(&trace_cmdline_lock);
6076 	savedcmd_temp = savedcmd;
6077 	savedcmd = s;
6078 	arch_spin_unlock(&trace_cmdline_lock);
6079 	preempt_enable();
6080 	free_saved_cmdlines_buffer(savedcmd_temp);
6081 
6082 	return 0;
6083 }
6084 
6085 static ssize_t
6086 tracing_saved_cmdlines_size_write(struct file *filp, const char __user *ubuf,
6087 				  size_t cnt, loff_t *ppos)
6088 {
6089 	unsigned long val;
6090 	int ret;
6091 
6092 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
6093 	if (ret)
6094 		return ret;
6095 
6096 	/* must have at least 1 entry or less than PID_MAX_DEFAULT */
6097 	if (!val || val > PID_MAX_DEFAULT)
6098 		return -EINVAL;
6099 
6100 	ret = tracing_resize_saved_cmdlines((unsigned int)val);
6101 	if (ret < 0)
6102 		return ret;
6103 
6104 	*ppos += cnt;
6105 
6106 	return cnt;
6107 }
6108 
6109 static const struct file_operations tracing_saved_cmdlines_size_fops = {
6110 	.open		= tracing_open_generic,
6111 	.read		= tracing_saved_cmdlines_size_read,
6112 	.write		= tracing_saved_cmdlines_size_write,
6113 };
6114 
6115 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
6116 static union trace_eval_map_item *
6117 update_eval_map(union trace_eval_map_item *ptr)
6118 {
6119 	if (!ptr->map.eval_string) {
6120 		if (ptr->tail.next) {
6121 			ptr = ptr->tail.next;
6122 			/* Set ptr to the next real item (skip head) */
6123 			ptr++;
6124 		} else
6125 			return NULL;
6126 	}
6127 	return ptr;
6128 }
6129 
6130 static void *eval_map_next(struct seq_file *m, void *v, loff_t *pos)
6131 {
6132 	union trace_eval_map_item *ptr = v;
6133 
6134 	/*
6135 	 * Paranoid! If ptr points to end, we don't want to increment past it.
6136 	 * This really should never happen.
6137 	 */
6138 	(*pos)++;
6139 	ptr = update_eval_map(ptr);
6140 	if (WARN_ON_ONCE(!ptr))
6141 		return NULL;
6142 
6143 	ptr++;
6144 	ptr = update_eval_map(ptr);
6145 
6146 	return ptr;
6147 }
6148 
6149 static void *eval_map_start(struct seq_file *m, loff_t *pos)
6150 {
6151 	union trace_eval_map_item *v;
6152 	loff_t l = 0;
6153 
6154 	mutex_lock(&trace_eval_mutex);
6155 
6156 	v = trace_eval_maps;
6157 	if (v)
6158 		v++;
6159 
6160 	while (v && l < *pos) {
6161 		v = eval_map_next(m, v, &l);
6162 	}
6163 
6164 	return v;
6165 }
6166 
6167 static void eval_map_stop(struct seq_file *m, void *v)
6168 {
6169 	mutex_unlock(&trace_eval_mutex);
6170 }
6171 
6172 static int eval_map_show(struct seq_file *m, void *v)
6173 {
6174 	union trace_eval_map_item *ptr = v;
6175 
6176 	seq_printf(m, "%s %ld (%s)\n",
6177 		   ptr->map.eval_string, ptr->map.eval_value,
6178 		   ptr->map.system);
6179 
6180 	return 0;
6181 }
6182 
6183 static const struct seq_operations tracing_eval_map_seq_ops = {
6184 	.start		= eval_map_start,
6185 	.next		= eval_map_next,
6186 	.stop		= eval_map_stop,
6187 	.show		= eval_map_show,
6188 };
6189 
6190 static int tracing_eval_map_open(struct inode *inode, struct file *filp)
6191 {
6192 	int ret;
6193 
6194 	ret = tracing_check_open_get_tr(NULL);
6195 	if (ret)
6196 		return ret;
6197 
6198 	return seq_open(filp, &tracing_eval_map_seq_ops);
6199 }
6200 
6201 static const struct file_operations tracing_eval_map_fops = {
6202 	.open		= tracing_eval_map_open,
6203 	.read		= seq_read,
6204 	.llseek		= seq_lseek,
6205 	.release	= seq_release,
6206 };
6207 
6208 static inline union trace_eval_map_item *
6209 trace_eval_jmp_to_tail(union trace_eval_map_item *ptr)
6210 {
6211 	/* Return tail of array given the head */
6212 	return ptr + ptr->head.length + 1;
6213 }
6214 
6215 static void
6216 trace_insert_eval_map_file(struct module *mod, struct trace_eval_map **start,
6217 			   int len)
6218 {
6219 	struct trace_eval_map **stop;
6220 	struct trace_eval_map **map;
6221 	union trace_eval_map_item *map_array;
6222 	union trace_eval_map_item *ptr;
6223 
6224 	stop = start + len;
6225 
6226 	/*
6227 	 * The trace_eval_maps contains the map plus a head and tail item,
6228 	 * where the head holds the module and length of array, and the
6229 	 * tail holds a pointer to the next list.
6230 	 */
6231 	map_array = kmalloc_array(len + 2, sizeof(*map_array), GFP_KERNEL);
6232 	if (!map_array) {
6233 		pr_warn("Unable to allocate trace eval mapping\n");
6234 		return;
6235 	}
6236 
6237 	mutex_lock(&trace_eval_mutex);
6238 
6239 	if (!trace_eval_maps)
6240 		trace_eval_maps = map_array;
6241 	else {
6242 		ptr = trace_eval_maps;
6243 		for (;;) {
6244 			ptr = trace_eval_jmp_to_tail(ptr);
6245 			if (!ptr->tail.next)
6246 				break;
6247 			ptr = ptr->tail.next;
6248 
6249 		}
6250 		ptr->tail.next = map_array;
6251 	}
6252 	map_array->head.mod = mod;
6253 	map_array->head.length = len;
6254 	map_array++;
6255 
6256 	for (map = start; (unsigned long)map < (unsigned long)stop; map++) {
6257 		map_array->map = **map;
6258 		map_array++;
6259 	}
6260 	memset(map_array, 0, sizeof(*map_array));
6261 
6262 	mutex_unlock(&trace_eval_mutex);
6263 }
6264 
6265 static void trace_create_eval_file(struct dentry *d_tracer)
6266 {
6267 	trace_create_file("eval_map", TRACE_MODE_READ, d_tracer,
6268 			  NULL, &tracing_eval_map_fops);
6269 }
6270 
6271 #else /* CONFIG_TRACE_EVAL_MAP_FILE */
6272 static inline void trace_create_eval_file(struct dentry *d_tracer) { }
6273 static inline void trace_insert_eval_map_file(struct module *mod,
6274 			      struct trace_eval_map **start, int len) { }
6275 #endif /* !CONFIG_TRACE_EVAL_MAP_FILE */
6276 
6277 static void trace_insert_eval_map(struct module *mod,
6278 				  struct trace_eval_map **start, int len)
6279 {
6280 	struct trace_eval_map **map;
6281 
6282 	if (len <= 0)
6283 		return;
6284 
6285 	map = start;
6286 
6287 	trace_event_eval_update(map, len);
6288 
6289 	trace_insert_eval_map_file(mod, start, len);
6290 }
6291 
6292 static ssize_t
6293 tracing_set_trace_read(struct file *filp, char __user *ubuf,
6294 		       size_t cnt, loff_t *ppos)
6295 {
6296 	struct trace_array *tr = filp->private_data;
6297 	char buf[MAX_TRACER_SIZE+2];
6298 	int r;
6299 
6300 	mutex_lock(&trace_types_lock);
6301 	r = sprintf(buf, "%s\n", tr->current_trace->name);
6302 	mutex_unlock(&trace_types_lock);
6303 
6304 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6305 }
6306 
6307 int tracer_init(struct tracer *t, struct trace_array *tr)
6308 {
6309 	tracing_reset_online_cpus(&tr->array_buffer);
6310 	return t->init(tr);
6311 }
6312 
6313 static void set_buffer_entries(struct array_buffer *buf, unsigned long val)
6314 {
6315 	int cpu;
6316 
6317 	for_each_tracing_cpu(cpu)
6318 		per_cpu_ptr(buf->data, cpu)->entries = val;
6319 }
6320 
6321 #ifdef CONFIG_TRACER_MAX_TRACE
6322 /* resize @tr's buffer to the size of @size_tr's entries */
6323 static int resize_buffer_duplicate_size(struct array_buffer *trace_buf,
6324 					struct array_buffer *size_buf, int cpu_id)
6325 {
6326 	int cpu, ret = 0;
6327 
6328 	if (cpu_id == RING_BUFFER_ALL_CPUS) {
6329 		for_each_tracing_cpu(cpu) {
6330 			ret = ring_buffer_resize(trace_buf->buffer,
6331 				 per_cpu_ptr(size_buf->data, cpu)->entries, cpu);
6332 			if (ret < 0)
6333 				break;
6334 			per_cpu_ptr(trace_buf->data, cpu)->entries =
6335 				per_cpu_ptr(size_buf->data, cpu)->entries;
6336 		}
6337 	} else {
6338 		ret = ring_buffer_resize(trace_buf->buffer,
6339 				 per_cpu_ptr(size_buf->data, cpu_id)->entries, cpu_id);
6340 		if (ret == 0)
6341 			per_cpu_ptr(trace_buf->data, cpu_id)->entries =
6342 				per_cpu_ptr(size_buf->data, cpu_id)->entries;
6343 	}
6344 
6345 	return ret;
6346 }
6347 #endif /* CONFIG_TRACER_MAX_TRACE */
6348 
6349 static int __tracing_resize_ring_buffer(struct trace_array *tr,
6350 					unsigned long size, int cpu)
6351 {
6352 	int ret;
6353 
6354 	/*
6355 	 * If kernel or user changes the size of the ring buffer
6356 	 * we use the size that was given, and we can forget about
6357 	 * expanding it later.
6358 	 */
6359 	ring_buffer_expanded = true;
6360 
6361 	/* May be called before buffers are initialized */
6362 	if (!tr->array_buffer.buffer)
6363 		return 0;
6364 
6365 	ret = ring_buffer_resize(tr->array_buffer.buffer, size, cpu);
6366 	if (ret < 0)
6367 		return ret;
6368 
6369 #ifdef CONFIG_TRACER_MAX_TRACE
6370 	if (!(tr->flags & TRACE_ARRAY_FL_GLOBAL) ||
6371 	    !tr->current_trace->use_max_tr)
6372 		goto out;
6373 
6374 	ret = ring_buffer_resize(tr->max_buffer.buffer, size, cpu);
6375 	if (ret < 0) {
6376 		int r = resize_buffer_duplicate_size(&tr->array_buffer,
6377 						     &tr->array_buffer, cpu);
6378 		if (r < 0) {
6379 			/*
6380 			 * AARGH! We are left with different
6381 			 * size max buffer!!!!
6382 			 * The max buffer is our "snapshot" buffer.
6383 			 * When a tracer needs a snapshot (one of the
6384 			 * latency tracers), it swaps the max buffer
6385 			 * with the saved snap shot. We succeeded to
6386 			 * update the size of the main buffer, but failed to
6387 			 * update the size of the max buffer. But when we tried
6388 			 * to reset the main buffer to the original size, we
6389 			 * failed there too. This is very unlikely to
6390 			 * happen, but if it does, warn and kill all
6391 			 * tracing.
6392 			 */
6393 			WARN_ON(1);
6394 			tracing_disabled = 1;
6395 		}
6396 		return ret;
6397 	}
6398 
6399 	if (cpu == RING_BUFFER_ALL_CPUS)
6400 		set_buffer_entries(&tr->max_buffer, size);
6401 	else
6402 		per_cpu_ptr(tr->max_buffer.data, cpu)->entries = size;
6403 
6404  out:
6405 #endif /* CONFIG_TRACER_MAX_TRACE */
6406 
6407 	if (cpu == RING_BUFFER_ALL_CPUS)
6408 		set_buffer_entries(&tr->array_buffer, size);
6409 	else
6410 		per_cpu_ptr(tr->array_buffer.data, cpu)->entries = size;
6411 
6412 	return ret;
6413 }
6414 
6415 ssize_t tracing_resize_ring_buffer(struct trace_array *tr,
6416 				  unsigned long size, int cpu_id)
6417 {
6418 	int ret;
6419 
6420 	mutex_lock(&trace_types_lock);
6421 
6422 	if (cpu_id != RING_BUFFER_ALL_CPUS) {
6423 		/* make sure, this cpu is enabled in the mask */
6424 		if (!cpumask_test_cpu(cpu_id, tracing_buffer_mask)) {
6425 			ret = -EINVAL;
6426 			goto out;
6427 		}
6428 	}
6429 
6430 	ret = __tracing_resize_ring_buffer(tr, size, cpu_id);
6431 	if (ret < 0)
6432 		ret = -ENOMEM;
6433 
6434 out:
6435 	mutex_unlock(&trace_types_lock);
6436 
6437 	return ret;
6438 }
6439 
6440 
6441 /**
6442  * tracing_update_buffers - used by tracing facility to expand ring buffers
6443  *
6444  * To save on memory when the tracing is never used on a system with it
6445  * configured in. The ring buffers are set to a minimum size. But once
6446  * a user starts to use the tracing facility, then they need to grow
6447  * to their default size.
6448  *
6449  * This function is to be called when a tracer is about to be used.
6450  */
6451 int tracing_update_buffers(void)
6452 {
6453 	int ret = 0;
6454 
6455 	mutex_lock(&trace_types_lock);
6456 	if (!ring_buffer_expanded)
6457 		ret = __tracing_resize_ring_buffer(&global_trace, trace_buf_size,
6458 						RING_BUFFER_ALL_CPUS);
6459 	mutex_unlock(&trace_types_lock);
6460 
6461 	return ret;
6462 }
6463 
6464 struct trace_option_dentry;
6465 
6466 static void
6467 create_trace_option_files(struct trace_array *tr, struct tracer *tracer);
6468 
6469 /*
6470  * Used to clear out the tracer before deletion of an instance.
6471  * Must have trace_types_lock held.
6472  */
6473 static void tracing_set_nop(struct trace_array *tr)
6474 {
6475 	if (tr->current_trace == &nop_trace)
6476 		return;
6477 
6478 	tr->current_trace->enabled--;
6479 
6480 	if (tr->current_trace->reset)
6481 		tr->current_trace->reset(tr);
6482 
6483 	tr->current_trace = &nop_trace;
6484 }
6485 
6486 static bool tracer_options_updated;
6487 
6488 static void add_tracer_options(struct trace_array *tr, struct tracer *t)
6489 {
6490 	/* Only enable if the directory has been created already. */
6491 	if (!tr->dir)
6492 		return;
6493 
6494 	/* Only create trace option files after update_tracer_options finish */
6495 	if (!tracer_options_updated)
6496 		return;
6497 
6498 	create_trace_option_files(tr, t);
6499 }
6500 
6501 int tracing_set_tracer(struct trace_array *tr, const char *buf)
6502 {
6503 	struct tracer *t;
6504 #ifdef CONFIG_TRACER_MAX_TRACE
6505 	bool had_max_tr;
6506 #endif
6507 	int ret = 0;
6508 
6509 	mutex_lock(&trace_types_lock);
6510 
6511 	if (!ring_buffer_expanded) {
6512 		ret = __tracing_resize_ring_buffer(tr, trace_buf_size,
6513 						RING_BUFFER_ALL_CPUS);
6514 		if (ret < 0)
6515 			goto out;
6516 		ret = 0;
6517 	}
6518 
6519 	for (t = trace_types; t; t = t->next) {
6520 		if (strcmp(t->name, buf) == 0)
6521 			break;
6522 	}
6523 	if (!t) {
6524 		ret = -EINVAL;
6525 		goto out;
6526 	}
6527 	if (t == tr->current_trace)
6528 		goto out;
6529 
6530 #ifdef CONFIG_TRACER_SNAPSHOT
6531 	if (t->use_max_tr) {
6532 		local_irq_disable();
6533 		arch_spin_lock(&tr->max_lock);
6534 		if (tr->cond_snapshot)
6535 			ret = -EBUSY;
6536 		arch_spin_unlock(&tr->max_lock);
6537 		local_irq_enable();
6538 		if (ret)
6539 			goto out;
6540 	}
6541 #endif
6542 	/* Some tracers won't work on kernel command line */
6543 	if (system_state < SYSTEM_RUNNING && t->noboot) {
6544 		pr_warn("Tracer '%s' is not allowed on command line, ignored\n",
6545 			t->name);
6546 		goto out;
6547 	}
6548 
6549 	/* Some tracers are only allowed for the top level buffer */
6550 	if (!trace_ok_for_array(t, tr)) {
6551 		ret = -EINVAL;
6552 		goto out;
6553 	}
6554 
6555 	/* If trace pipe files are being read, we can't change the tracer */
6556 	if (tr->trace_ref) {
6557 		ret = -EBUSY;
6558 		goto out;
6559 	}
6560 
6561 	trace_branch_disable();
6562 
6563 	tr->current_trace->enabled--;
6564 
6565 	if (tr->current_trace->reset)
6566 		tr->current_trace->reset(tr);
6567 
6568 #ifdef CONFIG_TRACER_MAX_TRACE
6569 	had_max_tr = tr->current_trace->use_max_tr;
6570 
6571 	/* Current trace needs to be nop_trace before synchronize_rcu */
6572 	tr->current_trace = &nop_trace;
6573 
6574 	if (had_max_tr && !t->use_max_tr) {
6575 		/*
6576 		 * We need to make sure that the update_max_tr sees that
6577 		 * current_trace changed to nop_trace to keep it from
6578 		 * swapping the buffers after we resize it.
6579 		 * The update_max_tr is called from interrupts disabled
6580 		 * so a synchronized_sched() is sufficient.
6581 		 */
6582 		synchronize_rcu();
6583 		free_snapshot(tr);
6584 	}
6585 
6586 	if (t->use_max_tr && !tr->allocated_snapshot) {
6587 		ret = tracing_alloc_snapshot_instance(tr);
6588 		if (ret < 0)
6589 			goto out;
6590 	}
6591 #else
6592 	tr->current_trace = &nop_trace;
6593 #endif
6594 
6595 	if (t->init) {
6596 		ret = tracer_init(t, tr);
6597 		if (ret)
6598 			goto out;
6599 	}
6600 
6601 	tr->current_trace = t;
6602 	tr->current_trace->enabled++;
6603 	trace_branch_enable(tr);
6604  out:
6605 	mutex_unlock(&trace_types_lock);
6606 
6607 	return ret;
6608 }
6609 
6610 static ssize_t
6611 tracing_set_trace_write(struct file *filp, const char __user *ubuf,
6612 			size_t cnt, loff_t *ppos)
6613 {
6614 	struct trace_array *tr = filp->private_data;
6615 	char buf[MAX_TRACER_SIZE+1];
6616 	char *name;
6617 	size_t ret;
6618 	int err;
6619 
6620 	ret = cnt;
6621 
6622 	if (cnt > MAX_TRACER_SIZE)
6623 		cnt = MAX_TRACER_SIZE;
6624 
6625 	if (copy_from_user(buf, ubuf, cnt))
6626 		return -EFAULT;
6627 
6628 	buf[cnt] = 0;
6629 
6630 	name = strim(buf);
6631 
6632 	err = tracing_set_tracer(tr, name);
6633 	if (err)
6634 		return err;
6635 
6636 	*ppos += ret;
6637 
6638 	return ret;
6639 }
6640 
6641 static ssize_t
6642 tracing_nsecs_read(unsigned long *ptr, char __user *ubuf,
6643 		   size_t cnt, loff_t *ppos)
6644 {
6645 	char buf[64];
6646 	int r;
6647 
6648 	r = snprintf(buf, sizeof(buf), "%ld\n",
6649 		     *ptr == (unsigned long)-1 ? -1 : nsecs_to_usecs(*ptr));
6650 	if (r > sizeof(buf))
6651 		r = sizeof(buf);
6652 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6653 }
6654 
6655 static ssize_t
6656 tracing_nsecs_write(unsigned long *ptr, const char __user *ubuf,
6657 		    size_t cnt, loff_t *ppos)
6658 {
6659 	unsigned long val;
6660 	int ret;
6661 
6662 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
6663 	if (ret)
6664 		return ret;
6665 
6666 	*ptr = val * 1000;
6667 
6668 	return cnt;
6669 }
6670 
6671 static ssize_t
6672 tracing_thresh_read(struct file *filp, char __user *ubuf,
6673 		    size_t cnt, loff_t *ppos)
6674 {
6675 	return tracing_nsecs_read(&tracing_thresh, ubuf, cnt, ppos);
6676 }
6677 
6678 static ssize_t
6679 tracing_thresh_write(struct file *filp, const char __user *ubuf,
6680 		     size_t cnt, loff_t *ppos)
6681 {
6682 	struct trace_array *tr = filp->private_data;
6683 	int ret;
6684 
6685 	mutex_lock(&trace_types_lock);
6686 	ret = tracing_nsecs_write(&tracing_thresh, ubuf, cnt, ppos);
6687 	if (ret < 0)
6688 		goto out;
6689 
6690 	if (tr->current_trace->update_thresh) {
6691 		ret = tr->current_trace->update_thresh(tr);
6692 		if (ret < 0)
6693 			goto out;
6694 	}
6695 
6696 	ret = cnt;
6697 out:
6698 	mutex_unlock(&trace_types_lock);
6699 
6700 	return ret;
6701 }
6702 
6703 #ifdef CONFIG_TRACER_MAX_TRACE
6704 
6705 static ssize_t
6706 tracing_max_lat_read(struct file *filp, char __user *ubuf,
6707 		     size_t cnt, loff_t *ppos)
6708 {
6709 	return tracing_nsecs_read(filp->private_data, ubuf, cnt, ppos);
6710 }
6711 
6712 static ssize_t
6713 tracing_max_lat_write(struct file *filp, const char __user *ubuf,
6714 		      size_t cnt, loff_t *ppos)
6715 {
6716 	return tracing_nsecs_write(filp->private_data, ubuf, cnt, ppos);
6717 }
6718 
6719 #endif
6720 
6721 static int open_pipe_on_cpu(struct trace_array *tr, int cpu)
6722 {
6723 	if (cpu == RING_BUFFER_ALL_CPUS) {
6724 		if (cpumask_empty(tr->pipe_cpumask)) {
6725 			cpumask_setall(tr->pipe_cpumask);
6726 			return 0;
6727 		}
6728 	} else if (!cpumask_test_cpu(cpu, tr->pipe_cpumask)) {
6729 		cpumask_set_cpu(cpu, tr->pipe_cpumask);
6730 		return 0;
6731 	}
6732 	return -EBUSY;
6733 }
6734 
6735 static void close_pipe_on_cpu(struct trace_array *tr, int cpu)
6736 {
6737 	if (cpu == RING_BUFFER_ALL_CPUS) {
6738 		WARN_ON(!cpumask_full(tr->pipe_cpumask));
6739 		cpumask_clear(tr->pipe_cpumask);
6740 	} else {
6741 		WARN_ON(!cpumask_test_cpu(cpu, tr->pipe_cpumask));
6742 		cpumask_clear_cpu(cpu, tr->pipe_cpumask);
6743 	}
6744 }
6745 
6746 static int tracing_open_pipe(struct inode *inode, struct file *filp)
6747 {
6748 	struct trace_array *tr = inode->i_private;
6749 	struct trace_iterator *iter;
6750 	int cpu;
6751 	int ret;
6752 
6753 	ret = tracing_check_open_get_tr(tr);
6754 	if (ret)
6755 		return ret;
6756 
6757 	mutex_lock(&trace_types_lock);
6758 	cpu = tracing_get_cpu(inode);
6759 	ret = open_pipe_on_cpu(tr, cpu);
6760 	if (ret)
6761 		goto fail_pipe_on_cpu;
6762 
6763 	/* create a buffer to store the information to pass to userspace */
6764 	iter = kzalloc(sizeof(*iter), GFP_KERNEL);
6765 	if (!iter) {
6766 		ret = -ENOMEM;
6767 		goto fail_alloc_iter;
6768 	}
6769 
6770 	trace_seq_init(&iter->seq);
6771 	iter->trace = tr->current_trace;
6772 
6773 	if (!alloc_cpumask_var(&iter->started, GFP_KERNEL)) {
6774 		ret = -ENOMEM;
6775 		goto fail;
6776 	}
6777 
6778 	/* trace pipe does not show start of buffer */
6779 	cpumask_setall(iter->started);
6780 
6781 	if (tr->trace_flags & TRACE_ITER_LATENCY_FMT)
6782 		iter->iter_flags |= TRACE_FILE_LAT_FMT;
6783 
6784 	/* Output in nanoseconds only if we are using a clock in nanoseconds. */
6785 	if (trace_clocks[tr->clock_id].in_ns)
6786 		iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
6787 
6788 	iter->tr = tr;
6789 	iter->array_buffer = &tr->array_buffer;
6790 	iter->cpu_file = cpu;
6791 	mutex_init(&iter->mutex);
6792 	filp->private_data = iter;
6793 
6794 	if (iter->trace->pipe_open)
6795 		iter->trace->pipe_open(iter);
6796 
6797 	nonseekable_open(inode, filp);
6798 
6799 	tr->trace_ref++;
6800 
6801 	mutex_unlock(&trace_types_lock);
6802 	return ret;
6803 
6804 fail:
6805 	kfree(iter);
6806 fail_alloc_iter:
6807 	close_pipe_on_cpu(tr, cpu);
6808 fail_pipe_on_cpu:
6809 	__trace_array_put(tr);
6810 	mutex_unlock(&trace_types_lock);
6811 	return ret;
6812 }
6813 
6814 static int tracing_release_pipe(struct inode *inode, struct file *file)
6815 {
6816 	struct trace_iterator *iter = file->private_data;
6817 	struct trace_array *tr = inode->i_private;
6818 
6819 	mutex_lock(&trace_types_lock);
6820 
6821 	tr->trace_ref--;
6822 
6823 	if (iter->trace->pipe_close)
6824 		iter->trace->pipe_close(iter);
6825 	close_pipe_on_cpu(tr, iter->cpu_file);
6826 	mutex_unlock(&trace_types_lock);
6827 
6828 	free_cpumask_var(iter->started);
6829 	kfree(iter->fmt);
6830 	kfree(iter->temp);
6831 	mutex_destroy(&iter->mutex);
6832 	kfree(iter);
6833 
6834 	trace_array_put(tr);
6835 
6836 	return 0;
6837 }
6838 
6839 static __poll_t
6840 trace_poll(struct trace_iterator *iter, struct file *filp, poll_table *poll_table)
6841 {
6842 	struct trace_array *tr = iter->tr;
6843 
6844 	/* Iterators are static, they should be filled or empty */
6845 	if (trace_buffer_iter(iter, iter->cpu_file))
6846 		return EPOLLIN | EPOLLRDNORM;
6847 
6848 	if (tr->trace_flags & TRACE_ITER_BLOCK)
6849 		/*
6850 		 * Always select as readable when in blocking mode
6851 		 */
6852 		return EPOLLIN | EPOLLRDNORM;
6853 	else
6854 		return ring_buffer_poll_wait(iter->array_buffer->buffer, iter->cpu_file,
6855 					     filp, poll_table, iter->tr->buffer_percent);
6856 }
6857 
6858 static __poll_t
6859 tracing_poll_pipe(struct file *filp, poll_table *poll_table)
6860 {
6861 	struct trace_iterator *iter = filp->private_data;
6862 
6863 	return trace_poll(iter, filp, poll_table);
6864 }
6865 
6866 /* Must be called with iter->mutex held. */
6867 static int tracing_wait_pipe(struct file *filp)
6868 {
6869 	struct trace_iterator *iter = filp->private_data;
6870 	int ret;
6871 
6872 	while (trace_empty(iter)) {
6873 
6874 		if ((filp->f_flags & O_NONBLOCK)) {
6875 			return -EAGAIN;
6876 		}
6877 
6878 		/*
6879 		 * We block until we read something and tracing is disabled.
6880 		 * We still block if tracing is disabled, but we have never
6881 		 * read anything. This allows a user to cat this file, and
6882 		 * then enable tracing. But after we have read something,
6883 		 * we give an EOF when tracing is again disabled.
6884 		 *
6885 		 * iter->pos will be 0 if we haven't read anything.
6886 		 */
6887 		if (!tracer_tracing_is_on(iter->tr) && iter->pos)
6888 			break;
6889 
6890 		mutex_unlock(&iter->mutex);
6891 
6892 		ret = wait_on_pipe(iter, 0);
6893 
6894 		mutex_lock(&iter->mutex);
6895 
6896 		if (ret)
6897 			return ret;
6898 	}
6899 
6900 	return 1;
6901 }
6902 
6903 /*
6904  * Consumer reader.
6905  */
6906 static ssize_t
6907 tracing_read_pipe(struct file *filp, char __user *ubuf,
6908 		  size_t cnt, loff_t *ppos)
6909 {
6910 	struct trace_iterator *iter = filp->private_data;
6911 	ssize_t sret;
6912 
6913 	/*
6914 	 * Avoid more than one consumer on a single file descriptor
6915 	 * This is just a matter of traces coherency, the ring buffer itself
6916 	 * is protected.
6917 	 */
6918 	mutex_lock(&iter->mutex);
6919 
6920 	/* return any leftover data */
6921 	sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
6922 	if (sret != -EBUSY)
6923 		goto out;
6924 
6925 	trace_seq_init(&iter->seq);
6926 
6927 	if (iter->trace->read) {
6928 		sret = iter->trace->read(iter, filp, ubuf, cnt, ppos);
6929 		if (sret)
6930 			goto out;
6931 	}
6932 
6933 waitagain:
6934 	sret = tracing_wait_pipe(filp);
6935 	if (sret <= 0)
6936 		goto out;
6937 
6938 	/* stop when tracing is finished */
6939 	if (trace_empty(iter)) {
6940 		sret = 0;
6941 		goto out;
6942 	}
6943 
6944 	if (cnt >= PAGE_SIZE)
6945 		cnt = PAGE_SIZE - 1;
6946 
6947 	/* reset all but tr, trace, and overruns */
6948 	trace_iterator_reset(iter);
6949 	cpumask_clear(iter->started);
6950 	trace_seq_init(&iter->seq);
6951 
6952 	trace_event_read_lock();
6953 	trace_access_lock(iter->cpu_file);
6954 	while (trace_find_next_entry_inc(iter) != NULL) {
6955 		enum print_line_t ret;
6956 		int save_len = iter->seq.seq.len;
6957 
6958 		ret = print_trace_line(iter);
6959 		if (ret == TRACE_TYPE_PARTIAL_LINE) {
6960 			/*
6961 			 * If one print_trace_line() fills entire trace_seq in one shot,
6962 			 * trace_seq_to_user() will returns -EBUSY because save_len == 0,
6963 			 * In this case, we need to consume it, otherwise, loop will peek
6964 			 * this event next time, resulting in an infinite loop.
6965 			 */
6966 			if (save_len == 0) {
6967 				iter->seq.full = 0;
6968 				trace_seq_puts(&iter->seq, "[LINE TOO BIG]\n");
6969 				trace_consume(iter);
6970 				break;
6971 			}
6972 
6973 			/* In other cases, don't print partial lines */
6974 			iter->seq.seq.len = save_len;
6975 			break;
6976 		}
6977 		if (ret != TRACE_TYPE_NO_CONSUME)
6978 			trace_consume(iter);
6979 
6980 		if (trace_seq_used(&iter->seq) >= cnt)
6981 			break;
6982 
6983 		/*
6984 		 * Setting the full flag means we reached the trace_seq buffer
6985 		 * size and we should leave by partial output condition above.
6986 		 * One of the trace_seq_* functions is not used properly.
6987 		 */
6988 		WARN_ONCE(iter->seq.full, "full flag set for trace type %d",
6989 			  iter->ent->type);
6990 	}
6991 	trace_access_unlock(iter->cpu_file);
6992 	trace_event_read_unlock();
6993 
6994 	/* Now copy what we have to the user */
6995 	sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
6996 	if (iter->seq.seq.readpos >= trace_seq_used(&iter->seq))
6997 		trace_seq_init(&iter->seq);
6998 
6999 	/*
7000 	 * If there was nothing to send to user, in spite of consuming trace
7001 	 * entries, go back to wait for more entries.
7002 	 */
7003 	if (sret == -EBUSY)
7004 		goto waitagain;
7005 
7006 out:
7007 	mutex_unlock(&iter->mutex);
7008 
7009 	return sret;
7010 }
7011 
7012 static void tracing_spd_release_pipe(struct splice_pipe_desc *spd,
7013 				     unsigned int idx)
7014 {
7015 	__free_page(spd->pages[idx]);
7016 }
7017 
7018 static size_t
7019 tracing_fill_pipe_page(size_t rem, struct trace_iterator *iter)
7020 {
7021 	size_t count;
7022 	int save_len;
7023 	int ret;
7024 
7025 	/* Seq buffer is page-sized, exactly what we need. */
7026 	for (;;) {
7027 		save_len = iter->seq.seq.len;
7028 		ret = print_trace_line(iter);
7029 
7030 		if (trace_seq_has_overflowed(&iter->seq)) {
7031 			iter->seq.seq.len = save_len;
7032 			break;
7033 		}
7034 
7035 		/*
7036 		 * This should not be hit, because it should only
7037 		 * be set if the iter->seq overflowed. But check it
7038 		 * anyway to be safe.
7039 		 */
7040 		if (ret == TRACE_TYPE_PARTIAL_LINE) {
7041 			iter->seq.seq.len = save_len;
7042 			break;
7043 		}
7044 
7045 		count = trace_seq_used(&iter->seq) - save_len;
7046 		if (rem < count) {
7047 			rem = 0;
7048 			iter->seq.seq.len = save_len;
7049 			break;
7050 		}
7051 
7052 		if (ret != TRACE_TYPE_NO_CONSUME)
7053 			trace_consume(iter);
7054 		rem -= count;
7055 		if (!trace_find_next_entry_inc(iter))	{
7056 			rem = 0;
7057 			iter->ent = NULL;
7058 			break;
7059 		}
7060 	}
7061 
7062 	return rem;
7063 }
7064 
7065 static ssize_t tracing_splice_read_pipe(struct file *filp,
7066 					loff_t *ppos,
7067 					struct pipe_inode_info *pipe,
7068 					size_t len,
7069 					unsigned int flags)
7070 {
7071 	struct page *pages_def[PIPE_DEF_BUFFERS];
7072 	struct partial_page partial_def[PIPE_DEF_BUFFERS];
7073 	struct trace_iterator *iter = filp->private_data;
7074 	struct splice_pipe_desc spd = {
7075 		.pages		= pages_def,
7076 		.partial	= partial_def,
7077 		.nr_pages	= 0, /* This gets updated below. */
7078 		.nr_pages_max	= PIPE_DEF_BUFFERS,
7079 		.ops		= &default_pipe_buf_ops,
7080 		.spd_release	= tracing_spd_release_pipe,
7081 	};
7082 	ssize_t ret;
7083 	size_t rem;
7084 	unsigned int i;
7085 
7086 	if (splice_grow_spd(pipe, &spd))
7087 		return -ENOMEM;
7088 
7089 	mutex_lock(&iter->mutex);
7090 
7091 	if (iter->trace->splice_read) {
7092 		ret = iter->trace->splice_read(iter, filp,
7093 					       ppos, pipe, len, flags);
7094 		if (ret)
7095 			goto out_err;
7096 	}
7097 
7098 	ret = tracing_wait_pipe(filp);
7099 	if (ret <= 0)
7100 		goto out_err;
7101 
7102 	if (!iter->ent && !trace_find_next_entry_inc(iter)) {
7103 		ret = -EFAULT;
7104 		goto out_err;
7105 	}
7106 
7107 	trace_event_read_lock();
7108 	trace_access_lock(iter->cpu_file);
7109 
7110 	/* Fill as many pages as possible. */
7111 	for (i = 0, rem = len; i < spd.nr_pages_max && rem; i++) {
7112 		spd.pages[i] = alloc_page(GFP_KERNEL);
7113 		if (!spd.pages[i])
7114 			break;
7115 
7116 		rem = tracing_fill_pipe_page(rem, iter);
7117 
7118 		/* Copy the data into the page, so we can start over. */
7119 		ret = trace_seq_to_buffer(&iter->seq,
7120 					  page_address(spd.pages[i]),
7121 					  trace_seq_used(&iter->seq));
7122 		if (ret < 0) {
7123 			__free_page(spd.pages[i]);
7124 			break;
7125 		}
7126 		spd.partial[i].offset = 0;
7127 		spd.partial[i].len = trace_seq_used(&iter->seq);
7128 
7129 		trace_seq_init(&iter->seq);
7130 	}
7131 
7132 	trace_access_unlock(iter->cpu_file);
7133 	trace_event_read_unlock();
7134 	mutex_unlock(&iter->mutex);
7135 
7136 	spd.nr_pages = i;
7137 
7138 	if (i)
7139 		ret = splice_to_pipe(pipe, &spd);
7140 	else
7141 		ret = 0;
7142 out:
7143 	splice_shrink_spd(&spd);
7144 	return ret;
7145 
7146 out_err:
7147 	mutex_unlock(&iter->mutex);
7148 	goto out;
7149 }
7150 
7151 static ssize_t
7152 tracing_entries_read(struct file *filp, char __user *ubuf,
7153 		     size_t cnt, loff_t *ppos)
7154 {
7155 	struct inode *inode = file_inode(filp);
7156 	struct trace_array *tr = inode->i_private;
7157 	int cpu = tracing_get_cpu(inode);
7158 	char buf[64];
7159 	int r = 0;
7160 	ssize_t ret;
7161 
7162 	mutex_lock(&trace_types_lock);
7163 
7164 	if (cpu == RING_BUFFER_ALL_CPUS) {
7165 		int cpu, buf_size_same;
7166 		unsigned long size;
7167 
7168 		size = 0;
7169 		buf_size_same = 1;
7170 		/* check if all cpu sizes are same */
7171 		for_each_tracing_cpu(cpu) {
7172 			/* fill in the size from first enabled cpu */
7173 			if (size == 0)
7174 				size = per_cpu_ptr(tr->array_buffer.data, cpu)->entries;
7175 			if (size != per_cpu_ptr(tr->array_buffer.data, cpu)->entries) {
7176 				buf_size_same = 0;
7177 				break;
7178 			}
7179 		}
7180 
7181 		if (buf_size_same) {
7182 			if (!ring_buffer_expanded)
7183 				r = sprintf(buf, "%lu (expanded: %lu)\n",
7184 					    size >> 10,
7185 					    trace_buf_size >> 10);
7186 			else
7187 				r = sprintf(buf, "%lu\n", size >> 10);
7188 		} else
7189 			r = sprintf(buf, "X\n");
7190 	} else
7191 		r = sprintf(buf, "%lu\n", per_cpu_ptr(tr->array_buffer.data, cpu)->entries >> 10);
7192 
7193 	mutex_unlock(&trace_types_lock);
7194 
7195 	ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
7196 	return ret;
7197 }
7198 
7199 static ssize_t
7200 tracing_entries_write(struct file *filp, const char __user *ubuf,
7201 		      size_t cnt, loff_t *ppos)
7202 {
7203 	struct inode *inode = file_inode(filp);
7204 	struct trace_array *tr = inode->i_private;
7205 	unsigned long val;
7206 	int ret;
7207 
7208 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
7209 	if (ret)
7210 		return ret;
7211 
7212 	/* must have at least 1 entry */
7213 	if (!val)
7214 		return -EINVAL;
7215 
7216 	/* value is in KB */
7217 	val <<= 10;
7218 	ret = tracing_resize_ring_buffer(tr, val, tracing_get_cpu(inode));
7219 	if (ret < 0)
7220 		return ret;
7221 
7222 	*ppos += cnt;
7223 
7224 	return cnt;
7225 }
7226 
7227 static ssize_t
7228 tracing_total_entries_read(struct file *filp, char __user *ubuf,
7229 				size_t cnt, loff_t *ppos)
7230 {
7231 	struct trace_array *tr = filp->private_data;
7232 	char buf[64];
7233 	int r, cpu;
7234 	unsigned long size = 0, expanded_size = 0;
7235 
7236 	mutex_lock(&trace_types_lock);
7237 	for_each_tracing_cpu(cpu) {
7238 		size += per_cpu_ptr(tr->array_buffer.data, cpu)->entries >> 10;
7239 		if (!ring_buffer_expanded)
7240 			expanded_size += trace_buf_size >> 10;
7241 	}
7242 	if (ring_buffer_expanded)
7243 		r = sprintf(buf, "%lu\n", size);
7244 	else
7245 		r = sprintf(buf, "%lu (expanded: %lu)\n", size, expanded_size);
7246 	mutex_unlock(&trace_types_lock);
7247 
7248 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
7249 }
7250 
7251 static ssize_t
7252 tracing_free_buffer_write(struct file *filp, const char __user *ubuf,
7253 			  size_t cnt, loff_t *ppos)
7254 {
7255 	/*
7256 	 * There is no need to read what the user has written, this function
7257 	 * is just to make sure that there is no error when "echo" is used
7258 	 */
7259 
7260 	*ppos += cnt;
7261 
7262 	return cnt;
7263 }
7264 
7265 static int
7266 tracing_free_buffer_release(struct inode *inode, struct file *filp)
7267 {
7268 	struct trace_array *tr = inode->i_private;
7269 
7270 	/* disable tracing ? */
7271 	if (tr->trace_flags & TRACE_ITER_STOP_ON_FREE)
7272 		tracer_tracing_off(tr);
7273 	/* resize the ring buffer to 0 */
7274 	tracing_resize_ring_buffer(tr, 0, RING_BUFFER_ALL_CPUS);
7275 
7276 	trace_array_put(tr);
7277 
7278 	return 0;
7279 }
7280 
7281 static ssize_t
7282 tracing_mark_write(struct file *filp, const char __user *ubuf,
7283 					size_t cnt, loff_t *fpos)
7284 {
7285 	struct trace_array *tr = filp->private_data;
7286 	struct ring_buffer_event *event;
7287 	enum event_trigger_type tt = ETT_NONE;
7288 	struct trace_buffer *buffer;
7289 	struct print_entry *entry;
7290 	ssize_t written;
7291 	int size;
7292 	int len;
7293 
7294 /* Used in tracing_mark_raw_write() as well */
7295 #define FAULTED_STR "<faulted>"
7296 #define FAULTED_SIZE (sizeof(FAULTED_STR) - 1) /* '\0' is already accounted for */
7297 
7298 	if (tracing_disabled)
7299 		return -EINVAL;
7300 
7301 	if (!(tr->trace_flags & TRACE_ITER_MARKERS))
7302 		return -EINVAL;
7303 
7304 	if (cnt > TRACE_BUF_SIZE)
7305 		cnt = TRACE_BUF_SIZE;
7306 
7307 	BUILD_BUG_ON(TRACE_BUF_SIZE >= PAGE_SIZE);
7308 
7309 	size = sizeof(*entry) + cnt + 2; /* add '\0' and possible '\n' */
7310 
7311 	/* If less than "<faulted>", then make sure we can still add that */
7312 	if (cnt < FAULTED_SIZE)
7313 		size += FAULTED_SIZE - cnt;
7314 
7315 	buffer = tr->array_buffer.buffer;
7316 	event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
7317 					    tracing_gen_ctx());
7318 	if (unlikely(!event))
7319 		/* Ring buffer disabled, return as if not open for write */
7320 		return -EBADF;
7321 
7322 	entry = ring_buffer_event_data(event);
7323 	entry->ip = _THIS_IP_;
7324 
7325 	len = __copy_from_user_inatomic(&entry->buf, ubuf, cnt);
7326 	if (len) {
7327 		memcpy(&entry->buf, FAULTED_STR, FAULTED_SIZE);
7328 		cnt = FAULTED_SIZE;
7329 		written = -EFAULT;
7330 	} else
7331 		written = cnt;
7332 
7333 	if (tr->trace_marker_file && !list_empty(&tr->trace_marker_file->triggers)) {
7334 		/* do not add \n before testing triggers, but add \0 */
7335 		entry->buf[cnt] = '\0';
7336 		tt = event_triggers_call(tr->trace_marker_file, buffer, entry, event);
7337 	}
7338 
7339 	if (entry->buf[cnt - 1] != '\n') {
7340 		entry->buf[cnt] = '\n';
7341 		entry->buf[cnt + 1] = '\0';
7342 	} else
7343 		entry->buf[cnt] = '\0';
7344 
7345 	if (static_branch_unlikely(&trace_marker_exports_enabled))
7346 		ftrace_exports(event, TRACE_EXPORT_MARKER);
7347 	__buffer_unlock_commit(buffer, event);
7348 
7349 	if (tt)
7350 		event_triggers_post_call(tr->trace_marker_file, tt);
7351 
7352 	return written;
7353 }
7354 
7355 /* Limit it for now to 3K (including tag) */
7356 #define RAW_DATA_MAX_SIZE (1024*3)
7357 
7358 static ssize_t
7359 tracing_mark_raw_write(struct file *filp, const char __user *ubuf,
7360 					size_t cnt, loff_t *fpos)
7361 {
7362 	struct trace_array *tr = filp->private_data;
7363 	struct ring_buffer_event *event;
7364 	struct trace_buffer *buffer;
7365 	struct raw_data_entry *entry;
7366 	ssize_t written;
7367 	int size;
7368 	int len;
7369 
7370 #define FAULT_SIZE_ID (FAULTED_SIZE + sizeof(int))
7371 
7372 	if (tracing_disabled)
7373 		return -EINVAL;
7374 
7375 	if (!(tr->trace_flags & TRACE_ITER_MARKERS))
7376 		return -EINVAL;
7377 
7378 	/* The marker must at least have a tag id */
7379 	if (cnt < sizeof(unsigned int) || cnt > RAW_DATA_MAX_SIZE)
7380 		return -EINVAL;
7381 
7382 	if (cnt > TRACE_BUF_SIZE)
7383 		cnt = TRACE_BUF_SIZE;
7384 
7385 	BUILD_BUG_ON(TRACE_BUF_SIZE >= PAGE_SIZE);
7386 
7387 	size = sizeof(*entry) + cnt;
7388 	if (cnt < FAULT_SIZE_ID)
7389 		size += FAULT_SIZE_ID - cnt;
7390 
7391 	buffer = tr->array_buffer.buffer;
7392 	event = __trace_buffer_lock_reserve(buffer, TRACE_RAW_DATA, size,
7393 					    tracing_gen_ctx());
7394 	if (!event)
7395 		/* Ring buffer disabled, return as if not open for write */
7396 		return -EBADF;
7397 
7398 	entry = ring_buffer_event_data(event);
7399 
7400 	len = __copy_from_user_inatomic(&entry->id, ubuf, cnt);
7401 	if (len) {
7402 		entry->id = -1;
7403 		memcpy(&entry->buf, FAULTED_STR, FAULTED_SIZE);
7404 		written = -EFAULT;
7405 	} else
7406 		written = cnt;
7407 
7408 	__buffer_unlock_commit(buffer, event);
7409 
7410 	return written;
7411 }
7412 
7413 static int tracing_clock_show(struct seq_file *m, void *v)
7414 {
7415 	struct trace_array *tr = m->private;
7416 	int i;
7417 
7418 	for (i = 0; i < ARRAY_SIZE(trace_clocks); i++)
7419 		seq_printf(m,
7420 			"%s%s%s%s", i ? " " : "",
7421 			i == tr->clock_id ? "[" : "", trace_clocks[i].name,
7422 			i == tr->clock_id ? "]" : "");
7423 	seq_putc(m, '\n');
7424 
7425 	return 0;
7426 }
7427 
7428 int tracing_set_clock(struct trace_array *tr, const char *clockstr)
7429 {
7430 	int i;
7431 
7432 	for (i = 0; i < ARRAY_SIZE(trace_clocks); i++) {
7433 		if (strcmp(trace_clocks[i].name, clockstr) == 0)
7434 			break;
7435 	}
7436 	if (i == ARRAY_SIZE(trace_clocks))
7437 		return -EINVAL;
7438 
7439 	mutex_lock(&trace_types_lock);
7440 
7441 	tr->clock_id = i;
7442 
7443 	ring_buffer_set_clock(tr->array_buffer.buffer, trace_clocks[i].func);
7444 
7445 	/*
7446 	 * New clock may not be consistent with the previous clock.
7447 	 * Reset the buffer so that it doesn't have incomparable timestamps.
7448 	 */
7449 	tracing_reset_online_cpus(&tr->array_buffer);
7450 
7451 #ifdef CONFIG_TRACER_MAX_TRACE
7452 	if (tr->max_buffer.buffer)
7453 		ring_buffer_set_clock(tr->max_buffer.buffer, trace_clocks[i].func);
7454 	tracing_reset_online_cpus(&tr->max_buffer);
7455 #endif
7456 
7457 	mutex_unlock(&trace_types_lock);
7458 
7459 	return 0;
7460 }
7461 
7462 static ssize_t tracing_clock_write(struct file *filp, const char __user *ubuf,
7463 				   size_t cnt, loff_t *fpos)
7464 {
7465 	struct seq_file *m = filp->private_data;
7466 	struct trace_array *tr = m->private;
7467 	char buf[64];
7468 	const char *clockstr;
7469 	int ret;
7470 
7471 	if (cnt >= sizeof(buf))
7472 		return -EINVAL;
7473 
7474 	if (copy_from_user(buf, ubuf, cnt))
7475 		return -EFAULT;
7476 
7477 	buf[cnt] = 0;
7478 
7479 	clockstr = strstrip(buf);
7480 
7481 	ret = tracing_set_clock(tr, clockstr);
7482 	if (ret)
7483 		return ret;
7484 
7485 	*fpos += cnt;
7486 
7487 	return cnt;
7488 }
7489 
7490 static int tracing_clock_open(struct inode *inode, struct file *file)
7491 {
7492 	struct trace_array *tr = inode->i_private;
7493 	int ret;
7494 
7495 	ret = tracing_check_open_get_tr(tr);
7496 	if (ret)
7497 		return ret;
7498 
7499 	ret = single_open(file, tracing_clock_show, inode->i_private);
7500 	if (ret < 0)
7501 		trace_array_put(tr);
7502 
7503 	return ret;
7504 }
7505 
7506 static int tracing_time_stamp_mode_show(struct seq_file *m, void *v)
7507 {
7508 	struct trace_array *tr = m->private;
7509 
7510 	mutex_lock(&trace_types_lock);
7511 
7512 	if (ring_buffer_time_stamp_abs(tr->array_buffer.buffer))
7513 		seq_puts(m, "delta [absolute]\n");
7514 	else
7515 		seq_puts(m, "[delta] absolute\n");
7516 
7517 	mutex_unlock(&trace_types_lock);
7518 
7519 	return 0;
7520 }
7521 
7522 static int tracing_time_stamp_mode_open(struct inode *inode, struct file *file)
7523 {
7524 	struct trace_array *tr = inode->i_private;
7525 	int ret;
7526 
7527 	ret = tracing_check_open_get_tr(tr);
7528 	if (ret)
7529 		return ret;
7530 
7531 	ret = single_open(file, tracing_time_stamp_mode_show, inode->i_private);
7532 	if (ret < 0)
7533 		trace_array_put(tr);
7534 
7535 	return ret;
7536 }
7537 
7538 u64 tracing_event_time_stamp(struct trace_buffer *buffer, struct ring_buffer_event *rbe)
7539 {
7540 	if (rbe == this_cpu_read(trace_buffered_event))
7541 		return ring_buffer_time_stamp(buffer);
7542 
7543 	return ring_buffer_event_time_stamp(buffer, rbe);
7544 }
7545 
7546 /*
7547  * Set or disable using the per CPU trace_buffer_event when possible.
7548  */
7549 int tracing_set_filter_buffering(struct trace_array *tr, bool set)
7550 {
7551 	int ret = 0;
7552 
7553 	mutex_lock(&trace_types_lock);
7554 
7555 	if (set && tr->no_filter_buffering_ref++)
7556 		goto out;
7557 
7558 	if (!set) {
7559 		if (WARN_ON_ONCE(!tr->no_filter_buffering_ref)) {
7560 			ret = -EINVAL;
7561 			goto out;
7562 		}
7563 
7564 		--tr->no_filter_buffering_ref;
7565 	}
7566  out:
7567 	mutex_unlock(&trace_types_lock);
7568 
7569 	return ret;
7570 }
7571 
7572 struct ftrace_buffer_info {
7573 	struct trace_iterator	iter;
7574 	void			*spare;
7575 	unsigned int		spare_cpu;
7576 	unsigned int		read;
7577 };
7578 
7579 #ifdef CONFIG_TRACER_SNAPSHOT
7580 static int tracing_snapshot_open(struct inode *inode, struct file *file)
7581 {
7582 	struct trace_array *tr = inode->i_private;
7583 	struct trace_iterator *iter;
7584 	struct seq_file *m;
7585 	int ret;
7586 
7587 	ret = tracing_check_open_get_tr(tr);
7588 	if (ret)
7589 		return ret;
7590 
7591 	if (file->f_mode & FMODE_READ) {
7592 		iter = __tracing_open(inode, file, true);
7593 		if (IS_ERR(iter))
7594 			ret = PTR_ERR(iter);
7595 	} else {
7596 		/* Writes still need the seq_file to hold the private data */
7597 		ret = -ENOMEM;
7598 		m = kzalloc(sizeof(*m), GFP_KERNEL);
7599 		if (!m)
7600 			goto out;
7601 		iter = kzalloc(sizeof(*iter), GFP_KERNEL);
7602 		if (!iter) {
7603 			kfree(m);
7604 			goto out;
7605 		}
7606 		ret = 0;
7607 
7608 		iter->tr = tr;
7609 		iter->array_buffer = &tr->max_buffer;
7610 		iter->cpu_file = tracing_get_cpu(inode);
7611 		m->private = iter;
7612 		file->private_data = m;
7613 	}
7614 out:
7615 	if (ret < 0)
7616 		trace_array_put(tr);
7617 
7618 	return ret;
7619 }
7620 
7621 static ssize_t
7622 tracing_snapshot_write(struct file *filp, const char __user *ubuf, size_t cnt,
7623 		       loff_t *ppos)
7624 {
7625 	struct seq_file *m = filp->private_data;
7626 	struct trace_iterator *iter = m->private;
7627 	struct trace_array *tr = iter->tr;
7628 	unsigned long val;
7629 	int ret;
7630 
7631 	ret = tracing_update_buffers();
7632 	if (ret < 0)
7633 		return ret;
7634 
7635 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
7636 	if (ret)
7637 		return ret;
7638 
7639 	mutex_lock(&trace_types_lock);
7640 
7641 	if (tr->current_trace->use_max_tr) {
7642 		ret = -EBUSY;
7643 		goto out;
7644 	}
7645 
7646 	local_irq_disable();
7647 	arch_spin_lock(&tr->max_lock);
7648 	if (tr->cond_snapshot)
7649 		ret = -EBUSY;
7650 	arch_spin_unlock(&tr->max_lock);
7651 	local_irq_enable();
7652 	if (ret)
7653 		goto out;
7654 
7655 	switch (val) {
7656 	case 0:
7657 		if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
7658 			ret = -EINVAL;
7659 			break;
7660 		}
7661 		if (tr->allocated_snapshot)
7662 			free_snapshot(tr);
7663 		break;
7664 	case 1:
7665 /* Only allow per-cpu swap if the ring buffer supports it */
7666 #ifndef CONFIG_RING_BUFFER_ALLOW_SWAP
7667 		if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
7668 			ret = -EINVAL;
7669 			break;
7670 		}
7671 #endif
7672 		if (tr->allocated_snapshot)
7673 			ret = resize_buffer_duplicate_size(&tr->max_buffer,
7674 					&tr->array_buffer, iter->cpu_file);
7675 		else
7676 			ret = tracing_alloc_snapshot_instance(tr);
7677 		if (ret < 0)
7678 			break;
7679 		local_irq_disable();
7680 		/* Now, we're going to swap */
7681 		if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
7682 			update_max_tr(tr, current, smp_processor_id(), NULL);
7683 		else
7684 			update_max_tr_single(tr, current, iter->cpu_file);
7685 		local_irq_enable();
7686 		break;
7687 	default:
7688 		if (tr->allocated_snapshot) {
7689 			if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
7690 				tracing_reset_online_cpus(&tr->max_buffer);
7691 			else
7692 				tracing_reset_cpu(&tr->max_buffer, iter->cpu_file);
7693 		}
7694 		break;
7695 	}
7696 
7697 	if (ret >= 0) {
7698 		*ppos += cnt;
7699 		ret = cnt;
7700 	}
7701 out:
7702 	mutex_unlock(&trace_types_lock);
7703 	return ret;
7704 }
7705 
7706 static int tracing_snapshot_release(struct inode *inode, struct file *file)
7707 {
7708 	struct seq_file *m = file->private_data;
7709 	int ret;
7710 
7711 	ret = tracing_release(inode, file);
7712 
7713 	if (file->f_mode & FMODE_READ)
7714 		return ret;
7715 
7716 	/* If write only, the seq_file is just a stub */
7717 	if (m)
7718 		kfree(m->private);
7719 	kfree(m);
7720 
7721 	return 0;
7722 }
7723 
7724 static int tracing_buffers_open(struct inode *inode, struct file *filp);
7725 static ssize_t tracing_buffers_read(struct file *filp, char __user *ubuf,
7726 				    size_t count, loff_t *ppos);
7727 static int tracing_buffers_release(struct inode *inode, struct file *file);
7728 static ssize_t tracing_buffers_splice_read(struct file *file, loff_t *ppos,
7729 		   struct pipe_inode_info *pipe, size_t len, unsigned int flags);
7730 
7731 static int snapshot_raw_open(struct inode *inode, struct file *filp)
7732 {
7733 	struct ftrace_buffer_info *info;
7734 	int ret;
7735 
7736 	/* The following checks for tracefs lockdown */
7737 	ret = tracing_buffers_open(inode, filp);
7738 	if (ret < 0)
7739 		return ret;
7740 
7741 	info = filp->private_data;
7742 
7743 	if (info->iter.trace->use_max_tr) {
7744 		tracing_buffers_release(inode, filp);
7745 		return -EBUSY;
7746 	}
7747 
7748 	info->iter.snapshot = true;
7749 	info->iter.array_buffer = &info->iter.tr->max_buffer;
7750 
7751 	return ret;
7752 }
7753 
7754 #endif /* CONFIG_TRACER_SNAPSHOT */
7755 
7756 
7757 static const struct file_operations tracing_thresh_fops = {
7758 	.open		= tracing_open_generic,
7759 	.read		= tracing_thresh_read,
7760 	.write		= tracing_thresh_write,
7761 	.llseek		= generic_file_llseek,
7762 };
7763 
7764 #ifdef CONFIG_TRACER_MAX_TRACE
7765 static const struct file_operations tracing_max_lat_fops = {
7766 	.open		= tracing_open_generic,
7767 	.read		= tracing_max_lat_read,
7768 	.write		= tracing_max_lat_write,
7769 	.llseek		= generic_file_llseek,
7770 };
7771 #endif
7772 
7773 static const struct file_operations set_tracer_fops = {
7774 	.open		= tracing_open_generic,
7775 	.read		= tracing_set_trace_read,
7776 	.write		= tracing_set_trace_write,
7777 	.llseek		= generic_file_llseek,
7778 };
7779 
7780 static const struct file_operations tracing_pipe_fops = {
7781 	.open		= tracing_open_pipe,
7782 	.poll		= tracing_poll_pipe,
7783 	.read		= tracing_read_pipe,
7784 	.splice_read	= tracing_splice_read_pipe,
7785 	.release	= tracing_release_pipe,
7786 	.llseek		= no_llseek,
7787 };
7788 
7789 static const struct file_operations tracing_entries_fops = {
7790 	.open		= tracing_open_generic_tr,
7791 	.read		= tracing_entries_read,
7792 	.write		= tracing_entries_write,
7793 	.llseek		= generic_file_llseek,
7794 	.release	= tracing_release_generic_tr,
7795 };
7796 
7797 static const struct file_operations tracing_total_entries_fops = {
7798 	.open		= tracing_open_generic_tr,
7799 	.read		= tracing_total_entries_read,
7800 	.llseek		= generic_file_llseek,
7801 	.release	= tracing_release_generic_tr,
7802 };
7803 
7804 static const struct file_operations tracing_free_buffer_fops = {
7805 	.open		= tracing_open_generic_tr,
7806 	.write		= tracing_free_buffer_write,
7807 	.release	= tracing_free_buffer_release,
7808 };
7809 
7810 static const struct file_operations tracing_mark_fops = {
7811 	.open		= tracing_mark_open,
7812 	.write		= tracing_mark_write,
7813 	.release	= tracing_release_generic_tr,
7814 };
7815 
7816 static const struct file_operations tracing_mark_raw_fops = {
7817 	.open		= tracing_mark_open,
7818 	.write		= tracing_mark_raw_write,
7819 	.release	= tracing_release_generic_tr,
7820 };
7821 
7822 static const struct file_operations trace_clock_fops = {
7823 	.open		= tracing_clock_open,
7824 	.read		= seq_read,
7825 	.llseek		= seq_lseek,
7826 	.release	= tracing_single_release_tr,
7827 	.write		= tracing_clock_write,
7828 };
7829 
7830 static const struct file_operations trace_time_stamp_mode_fops = {
7831 	.open		= tracing_time_stamp_mode_open,
7832 	.read		= seq_read,
7833 	.llseek		= seq_lseek,
7834 	.release	= tracing_single_release_tr,
7835 };
7836 
7837 #ifdef CONFIG_TRACER_SNAPSHOT
7838 static const struct file_operations snapshot_fops = {
7839 	.open		= tracing_snapshot_open,
7840 	.read		= seq_read,
7841 	.write		= tracing_snapshot_write,
7842 	.llseek		= tracing_lseek,
7843 	.release	= tracing_snapshot_release,
7844 };
7845 
7846 static const struct file_operations snapshot_raw_fops = {
7847 	.open		= snapshot_raw_open,
7848 	.read		= tracing_buffers_read,
7849 	.release	= tracing_buffers_release,
7850 	.splice_read	= tracing_buffers_splice_read,
7851 	.llseek		= no_llseek,
7852 };
7853 
7854 #endif /* CONFIG_TRACER_SNAPSHOT */
7855 
7856 /*
7857  * trace_min_max_write - Write a u64 value to a trace_min_max_param struct
7858  * @filp: The active open file structure
7859  * @ubuf: The userspace provided buffer to read value into
7860  * @cnt: The maximum number of bytes to read
7861  * @ppos: The current "file" position
7862  *
7863  * This function implements the write interface for a struct trace_min_max_param.
7864  * The filp->private_data must point to a trace_min_max_param structure that
7865  * defines where to write the value, the min and the max acceptable values,
7866  * and a lock to protect the write.
7867  */
7868 static ssize_t
7869 trace_min_max_write(struct file *filp, const char __user *ubuf, size_t cnt, loff_t *ppos)
7870 {
7871 	struct trace_min_max_param *param = filp->private_data;
7872 	u64 val;
7873 	int err;
7874 
7875 	if (!param)
7876 		return -EFAULT;
7877 
7878 	err = kstrtoull_from_user(ubuf, cnt, 10, &val);
7879 	if (err)
7880 		return err;
7881 
7882 	if (param->lock)
7883 		mutex_lock(param->lock);
7884 
7885 	if (param->min && val < *param->min)
7886 		err = -EINVAL;
7887 
7888 	if (param->max && val > *param->max)
7889 		err = -EINVAL;
7890 
7891 	if (!err)
7892 		*param->val = val;
7893 
7894 	if (param->lock)
7895 		mutex_unlock(param->lock);
7896 
7897 	if (err)
7898 		return err;
7899 
7900 	return cnt;
7901 }
7902 
7903 /*
7904  * trace_min_max_read - Read a u64 value from a trace_min_max_param struct
7905  * @filp: The active open file structure
7906  * @ubuf: The userspace provided buffer to read value into
7907  * @cnt: The maximum number of bytes to read
7908  * @ppos: The current "file" position
7909  *
7910  * This function implements the read interface for a struct trace_min_max_param.
7911  * The filp->private_data must point to a trace_min_max_param struct with valid
7912  * data.
7913  */
7914 static ssize_t
7915 trace_min_max_read(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos)
7916 {
7917 	struct trace_min_max_param *param = filp->private_data;
7918 	char buf[U64_STR_SIZE];
7919 	int len;
7920 	u64 val;
7921 
7922 	if (!param)
7923 		return -EFAULT;
7924 
7925 	val = *param->val;
7926 
7927 	if (cnt > sizeof(buf))
7928 		cnt = sizeof(buf);
7929 
7930 	len = snprintf(buf, sizeof(buf), "%llu\n", val);
7931 
7932 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, len);
7933 }
7934 
7935 const struct file_operations trace_min_max_fops = {
7936 	.open		= tracing_open_generic,
7937 	.read		= trace_min_max_read,
7938 	.write		= trace_min_max_write,
7939 };
7940 
7941 #define TRACING_LOG_ERRS_MAX	8
7942 #define TRACING_LOG_LOC_MAX	128
7943 
7944 #define CMD_PREFIX "  Command: "
7945 
7946 struct err_info {
7947 	const char	**errs;	/* ptr to loc-specific array of err strings */
7948 	u8		type;	/* index into errs -> specific err string */
7949 	u16		pos;	/* caret position */
7950 	u64		ts;
7951 };
7952 
7953 struct tracing_log_err {
7954 	struct list_head	list;
7955 	struct err_info		info;
7956 	char			loc[TRACING_LOG_LOC_MAX]; /* err location */
7957 	char			*cmd;                     /* what caused err */
7958 };
7959 
7960 static DEFINE_MUTEX(tracing_err_log_lock);
7961 
7962 static struct tracing_log_err *alloc_tracing_log_err(int len)
7963 {
7964 	struct tracing_log_err *err;
7965 
7966 	err = kzalloc(sizeof(*err), GFP_KERNEL);
7967 	if (!err)
7968 		return ERR_PTR(-ENOMEM);
7969 
7970 	err->cmd = kzalloc(len, GFP_KERNEL);
7971 	if (!err->cmd) {
7972 		kfree(err);
7973 		return ERR_PTR(-ENOMEM);
7974 	}
7975 
7976 	return err;
7977 }
7978 
7979 static void free_tracing_log_err(struct tracing_log_err *err)
7980 {
7981 	kfree(err->cmd);
7982 	kfree(err);
7983 }
7984 
7985 static struct tracing_log_err *get_tracing_log_err(struct trace_array *tr,
7986 						   int len)
7987 {
7988 	struct tracing_log_err *err;
7989 	char *cmd;
7990 
7991 	if (tr->n_err_log_entries < TRACING_LOG_ERRS_MAX) {
7992 		err = alloc_tracing_log_err(len);
7993 		if (PTR_ERR(err) != -ENOMEM)
7994 			tr->n_err_log_entries++;
7995 
7996 		return err;
7997 	}
7998 	cmd = kzalloc(len, GFP_KERNEL);
7999 	if (!cmd)
8000 		return ERR_PTR(-ENOMEM);
8001 	err = list_first_entry(&tr->err_log, struct tracing_log_err, list);
8002 	kfree(err->cmd);
8003 	err->cmd = cmd;
8004 	list_del(&err->list);
8005 
8006 	return err;
8007 }
8008 
8009 /**
8010  * err_pos - find the position of a string within a command for error careting
8011  * @cmd: The tracing command that caused the error
8012  * @str: The string to position the caret at within @cmd
8013  *
8014  * Finds the position of the first occurrence of @str within @cmd.  The
8015  * return value can be passed to tracing_log_err() for caret placement
8016  * within @cmd.
8017  *
8018  * Returns the index within @cmd of the first occurrence of @str or 0
8019  * if @str was not found.
8020  */
8021 unsigned int err_pos(char *cmd, const char *str)
8022 {
8023 	char *found;
8024 
8025 	if (WARN_ON(!strlen(cmd)))
8026 		return 0;
8027 
8028 	found = strstr(cmd, str);
8029 	if (found)
8030 		return found - cmd;
8031 
8032 	return 0;
8033 }
8034 
8035 /**
8036  * tracing_log_err - write an error to the tracing error log
8037  * @tr: The associated trace array for the error (NULL for top level array)
8038  * @loc: A string describing where the error occurred
8039  * @cmd: The tracing command that caused the error
8040  * @errs: The array of loc-specific static error strings
8041  * @type: The index into errs[], which produces the specific static err string
8042  * @pos: The position the caret should be placed in the cmd
8043  *
8044  * Writes an error into tracing/error_log of the form:
8045  *
8046  * <loc>: error: <text>
8047  *   Command: <cmd>
8048  *              ^
8049  *
8050  * tracing/error_log is a small log file containing the last
8051  * TRACING_LOG_ERRS_MAX errors (8).  Memory for errors isn't allocated
8052  * unless there has been a tracing error, and the error log can be
8053  * cleared and have its memory freed by writing the empty string in
8054  * truncation mode to it i.e. echo > tracing/error_log.
8055  *
8056  * NOTE: the @errs array along with the @type param are used to
8057  * produce a static error string - this string is not copied and saved
8058  * when the error is logged - only a pointer to it is saved.  See
8059  * existing callers for examples of how static strings are typically
8060  * defined for use with tracing_log_err().
8061  */
8062 void tracing_log_err(struct trace_array *tr,
8063 		     const char *loc, const char *cmd,
8064 		     const char **errs, u8 type, u16 pos)
8065 {
8066 	struct tracing_log_err *err;
8067 	int len = 0;
8068 
8069 	if (!tr)
8070 		tr = &global_trace;
8071 
8072 	len += sizeof(CMD_PREFIX) + 2 * sizeof("\n") + strlen(cmd) + 1;
8073 
8074 	mutex_lock(&tracing_err_log_lock);
8075 	err = get_tracing_log_err(tr, len);
8076 	if (PTR_ERR(err) == -ENOMEM) {
8077 		mutex_unlock(&tracing_err_log_lock);
8078 		return;
8079 	}
8080 
8081 	snprintf(err->loc, TRACING_LOG_LOC_MAX, "%s: error: ", loc);
8082 	snprintf(err->cmd, len, "\n" CMD_PREFIX "%s\n", cmd);
8083 
8084 	err->info.errs = errs;
8085 	err->info.type = type;
8086 	err->info.pos = pos;
8087 	err->info.ts = local_clock();
8088 
8089 	list_add_tail(&err->list, &tr->err_log);
8090 	mutex_unlock(&tracing_err_log_lock);
8091 }
8092 
8093 static void clear_tracing_err_log(struct trace_array *tr)
8094 {
8095 	struct tracing_log_err *err, *next;
8096 
8097 	mutex_lock(&tracing_err_log_lock);
8098 	list_for_each_entry_safe(err, next, &tr->err_log, list) {
8099 		list_del(&err->list);
8100 		free_tracing_log_err(err);
8101 	}
8102 
8103 	tr->n_err_log_entries = 0;
8104 	mutex_unlock(&tracing_err_log_lock);
8105 }
8106 
8107 static void *tracing_err_log_seq_start(struct seq_file *m, loff_t *pos)
8108 {
8109 	struct trace_array *tr = m->private;
8110 
8111 	mutex_lock(&tracing_err_log_lock);
8112 
8113 	return seq_list_start(&tr->err_log, *pos);
8114 }
8115 
8116 static void *tracing_err_log_seq_next(struct seq_file *m, void *v, loff_t *pos)
8117 {
8118 	struct trace_array *tr = m->private;
8119 
8120 	return seq_list_next(v, &tr->err_log, pos);
8121 }
8122 
8123 static void tracing_err_log_seq_stop(struct seq_file *m, void *v)
8124 {
8125 	mutex_unlock(&tracing_err_log_lock);
8126 }
8127 
8128 static void tracing_err_log_show_pos(struct seq_file *m, u16 pos)
8129 {
8130 	u16 i;
8131 
8132 	for (i = 0; i < sizeof(CMD_PREFIX) - 1; i++)
8133 		seq_putc(m, ' ');
8134 	for (i = 0; i < pos; i++)
8135 		seq_putc(m, ' ');
8136 	seq_puts(m, "^\n");
8137 }
8138 
8139 static int tracing_err_log_seq_show(struct seq_file *m, void *v)
8140 {
8141 	struct tracing_log_err *err = v;
8142 
8143 	if (err) {
8144 		const char *err_text = err->info.errs[err->info.type];
8145 		u64 sec = err->info.ts;
8146 		u32 nsec;
8147 
8148 		nsec = do_div(sec, NSEC_PER_SEC);
8149 		seq_printf(m, "[%5llu.%06u] %s%s", sec, nsec / 1000,
8150 			   err->loc, err_text);
8151 		seq_printf(m, "%s", err->cmd);
8152 		tracing_err_log_show_pos(m, err->info.pos);
8153 	}
8154 
8155 	return 0;
8156 }
8157 
8158 static const struct seq_operations tracing_err_log_seq_ops = {
8159 	.start  = tracing_err_log_seq_start,
8160 	.next   = tracing_err_log_seq_next,
8161 	.stop   = tracing_err_log_seq_stop,
8162 	.show   = tracing_err_log_seq_show
8163 };
8164 
8165 static int tracing_err_log_open(struct inode *inode, struct file *file)
8166 {
8167 	struct trace_array *tr = inode->i_private;
8168 	int ret = 0;
8169 
8170 	ret = tracing_check_open_get_tr(tr);
8171 	if (ret)
8172 		return ret;
8173 
8174 	/* If this file was opened for write, then erase contents */
8175 	if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC))
8176 		clear_tracing_err_log(tr);
8177 
8178 	if (file->f_mode & FMODE_READ) {
8179 		ret = seq_open(file, &tracing_err_log_seq_ops);
8180 		if (!ret) {
8181 			struct seq_file *m = file->private_data;
8182 			m->private = tr;
8183 		} else {
8184 			trace_array_put(tr);
8185 		}
8186 	}
8187 	return ret;
8188 }
8189 
8190 static ssize_t tracing_err_log_write(struct file *file,
8191 				     const char __user *buffer,
8192 				     size_t count, loff_t *ppos)
8193 {
8194 	return count;
8195 }
8196 
8197 static int tracing_err_log_release(struct inode *inode, struct file *file)
8198 {
8199 	struct trace_array *tr = inode->i_private;
8200 
8201 	trace_array_put(tr);
8202 
8203 	if (file->f_mode & FMODE_READ)
8204 		seq_release(inode, file);
8205 
8206 	return 0;
8207 }
8208 
8209 static const struct file_operations tracing_err_log_fops = {
8210 	.open           = tracing_err_log_open,
8211 	.write		= tracing_err_log_write,
8212 	.read           = seq_read,
8213 	.llseek         = tracing_lseek,
8214 	.release        = tracing_err_log_release,
8215 };
8216 
8217 static int tracing_buffers_open(struct inode *inode, struct file *filp)
8218 {
8219 	struct trace_array *tr = inode->i_private;
8220 	struct ftrace_buffer_info *info;
8221 	int ret;
8222 
8223 	ret = tracing_check_open_get_tr(tr);
8224 	if (ret)
8225 		return ret;
8226 
8227 	info = kvzalloc(sizeof(*info), GFP_KERNEL);
8228 	if (!info) {
8229 		trace_array_put(tr);
8230 		return -ENOMEM;
8231 	}
8232 
8233 	mutex_lock(&trace_types_lock);
8234 
8235 	info->iter.tr		= tr;
8236 	info->iter.cpu_file	= tracing_get_cpu(inode);
8237 	info->iter.trace	= tr->current_trace;
8238 	info->iter.array_buffer = &tr->array_buffer;
8239 	info->spare		= NULL;
8240 	/* Force reading ring buffer for first read */
8241 	info->read		= (unsigned int)-1;
8242 
8243 	filp->private_data = info;
8244 
8245 	tr->trace_ref++;
8246 
8247 	mutex_unlock(&trace_types_lock);
8248 
8249 	ret = nonseekable_open(inode, filp);
8250 	if (ret < 0)
8251 		trace_array_put(tr);
8252 
8253 	return ret;
8254 }
8255 
8256 static __poll_t
8257 tracing_buffers_poll(struct file *filp, poll_table *poll_table)
8258 {
8259 	struct ftrace_buffer_info *info = filp->private_data;
8260 	struct trace_iterator *iter = &info->iter;
8261 
8262 	return trace_poll(iter, filp, poll_table);
8263 }
8264 
8265 static ssize_t
8266 tracing_buffers_read(struct file *filp, char __user *ubuf,
8267 		     size_t count, loff_t *ppos)
8268 {
8269 	struct ftrace_buffer_info *info = filp->private_data;
8270 	struct trace_iterator *iter = &info->iter;
8271 	ssize_t ret = 0;
8272 	ssize_t size;
8273 
8274 	if (!count)
8275 		return 0;
8276 
8277 #ifdef CONFIG_TRACER_MAX_TRACE
8278 	if (iter->snapshot && iter->tr->current_trace->use_max_tr)
8279 		return -EBUSY;
8280 #endif
8281 
8282 	if (!info->spare) {
8283 		info->spare = ring_buffer_alloc_read_page(iter->array_buffer->buffer,
8284 							  iter->cpu_file);
8285 		if (IS_ERR(info->spare)) {
8286 			ret = PTR_ERR(info->spare);
8287 			info->spare = NULL;
8288 		} else {
8289 			info->spare_cpu = iter->cpu_file;
8290 		}
8291 	}
8292 	if (!info->spare)
8293 		return ret;
8294 
8295 	/* Do we have previous read data to read? */
8296 	if (info->read < PAGE_SIZE)
8297 		goto read;
8298 
8299  again:
8300 	trace_access_lock(iter->cpu_file);
8301 	ret = ring_buffer_read_page(iter->array_buffer->buffer,
8302 				    &info->spare,
8303 				    count,
8304 				    iter->cpu_file, 0);
8305 	trace_access_unlock(iter->cpu_file);
8306 
8307 	if (ret < 0) {
8308 		if (trace_empty(iter)) {
8309 			if ((filp->f_flags & O_NONBLOCK))
8310 				return -EAGAIN;
8311 
8312 			ret = wait_on_pipe(iter, 0);
8313 			if (ret)
8314 				return ret;
8315 
8316 			goto again;
8317 		}
8318 		return 0;
8319 	}
8320 
8321 	info->read = 0;
8322  read:
8323 	size = PAGE_SIZE - info->read;
8324 	if (size > count)
8325 		size = count;
8326 
8327 	ret = copy_to_user(ubuf, info->spare + info->read, size);
8328 	if (ret == size)
8329 		return -EFAULT;
8330 
8331 	size -= ret;
8332 
8333 	*ppos += size;
8334 	info->read += size;
8335 
8336 	return size;
8337 }
8338 
8339 static int tracing_buffers_release(struct inode *inode, struct file *file)
8340 {
8341 	struct ftrace_buffer_info *info = file->private_data;
8342 	struct trace_iterator *iter = &info->iter;
8343 
8344 	mutex_lock(&trace_types_lock);
8345 
8346 	iter->tr->trace_ref--;
8347 
8348 	__trace_array_put(iter->tr);
8349 
8350 	iter->wait_index++;
8351 	/* Make sure the waiters see the new wait_index */
8352 	smp_wmb();
8353 
8354 	ring_buffer_wake_waiters(iter->array_buffer->buffer, iter->cpu_file);
8355 
8356 	if (info->spare)
8357 		ring_buffer_free_read_page(iter->array_buffer->buffer,
8358 					   info->spare_cpu, info->spare);
8359 	kvfree(info);
8360 
8361 	mutex_unlock(&trace_types_lock);
8362 
8363 	return 0;
8364 }
8365 
8366 struct buffer_ref {
8367 	struct trace_buffer	*buffer;
8368 	void			*page;
8369 	int			cpu;
8370 	refcount_t		refcount;
8371 };
8372 
8373 static void buffer_ref_release(struct buffer_ref *ref)
8374 {
8375 	if (!refcount_dec_and_test(&ref->refcount))
8376 		return;
8377 	ring_buffer_free_read_page(ref->buffer, ref->cpu, ref->page);
8378 	kfree(ref);
8379 }
8380 
8381 static void buffer_pipe_buf_release(struct pipe_inode_info *pipe,
8382 				    struct pipe_buffer *buf)
8383 {
8384 	struct buffer_ref *ref = (struct buffer_ref *)buf->private;
8385 
8386 	buffer_ref_release(ref);
8387 	buf->private = 0;
8388 }
8389 
8390 static bool buffer_pipe_buf_get(struct pipe_inode_info *pipe,
8391 				struct pipe_buffer *buf)
8392 {
8393 	struct buffer_ref *ref = (struct buffer_ref *)buf->private;
8394 
8395 	if (refcount_read(&ref->refcount) > INT_MAX/2)
8396 		return false;
8397 
8398 	refcount_inc(&ref->refcount);
8399 	return true;
8400 }
8401 
8402 /* Pipe buffer operations for a buffer. */
8403 static const struct pipe_buf_operations buffer_pipe_buf_ops = {
8404 	.release		= buffer_pipe_buf_release,
8405 	.get			= buffer_pipe_buf_get,
8406 };
8407 
8408 /*
8409  * Callback from splice_to_pipe(), if we need to release some pages
8410  * at the end of the spd in case we error'ed out in filling the pipe.
8411  */
8412 static void buffer_spd_release(struct splice_pipe_desc *spd, unsigned int i)
8413 {
8414 	struct buffer_ref *ref =
8415 		(struct buffer_ref *)spd->partial[i].private;
8416 
8417 	buffer_ref_release(ref);
8418 	spd->partial[i].private = 0;
8419 }
8420 
8421 static ssize_t
8422 tracing_buffers_splice_read(struct file *file, loff_t *ppos,
8423 			    struct pipe_inode_info *pipe, size_t len,
8424 			    unsigned int flags)
8425 {
8426 	struct ftrace_buffer_info *info = file->private_data;
8427 	struct trace_iterator *iter = &info->iter;
8428 	struct partial_page partial_def[PIPE_DEF_BUFFERS];
8429 	struct page *pages_def[PIPE_DEF_BUFFERS];
8430 	struct splice_pipe_desc spd = {
8431 		.pages		= pages_def,
8432 		.partial	= partial_def,
8433 		.nr_pages_max	= PIPE_DEF_BUFFERS,
8434 		.ops		= &buffer_pipe_buf_ops,
8435 		.spd_release	= buffer_spd_release,
8436 	};
8437 	struct buffer_ref *ref;
8438 	int entries, i;
8439 	ssize_t ret = 0;
8440 
8441 #ifdef CONFIG_TRACER_MAX_TRACE
8442 	if (iter->snapshot && iter->tr->current_trace->use_max_tr)
8443 		return -EBUSY;
8444 #endif
8445 
8446 	if (*ppos & (PAGE_SIZE - 1))
8447 		return -EINVAL;
8448 
8449 	if (len & (PAGE_SIZE - 1)) {
8450 		if (len < PAGE_SIZE)
8451 			return -EINVAL;
8452 		len &= PAGE_MASK;
8453 	}
8454 
8455 	if (splice_grow_spd(pipe, &spd))
8456 		return -ENOMEM;
8457 
8458  again:
8459 	trace_access_lock(iter->cpu_file);
8460 	entries = ring_buffer_entries_cpu(iter->array_buffer->buffer, iter->cpu_file);
8461 
8462 	for (i = 0; i < spd.nr_pages_max && len && entries; i++, len -= PAGE_SIZE) {
8463 		struct page *page;
8464 		int r;
8465 
8466 		ref = kzalloc(sizeof(*ref), GFP_KERNEL);
8467 		if (!ref) {
8468 			ret = -ENOMEM;
8469 			break;
8470 		}
8471 
8472 		refcount_set(&ref->refcount, 1);
8473 		ref->buffer = iter->array_buffer->buffer;
8474 		ref->page = ring_buffer_alloc_read_page(ref->buffer, iter->cpu_file);
8475 		if (IS_ERR(ref->page)) {
8476 			ret = PTR_ERR(ref->page);
8477 			ref->page = NULL;
8478 			kfree(ref);
8479 			break;
8480 		}
8481 		ref->cpu = iter->cpu_file;
8482 
8483 		r = ring_buffer_read_page(ref->buffer, &ref->page,
8484 					  len, iter->cpu_file, 1);
8485 		if (r < 0) {
8486 			ring_buffer_free_read_page(ref->buffer, ref->cpu,
8487 						   ref->page);
8488 			kfree(ref);
8489 			break;
8490 		}
8491 
8492 		page = virt_to_page(ref->page);
8493 
8494 		spd.pages[i] = page;
8495 		spd.partial[i].len = PAGE_SIZE;
8496 		spd.partial[i].offset = 0;
8497 		spd.partial[i].private = (unsigned long)ref;
8498 		spd.nr_pages++;
8499 		*ppos += PAGE_SIZE;
8500 
8501 		entries = ring_buffer_entries_cpu(iter->array_buffer->buffer, iter->cpu_file);
8502 	}
8503 
8504 	trace_access_unlock(iter->cpu_file);
8505 	spd.nr_pages = i;
8506 
8507 	/* did we read anything? */
8508 	if (!spd.nr_pages) {
8509 		long wait_index;
8510 
8511 		if (ret)
8512 			goto out;
8513 
8514 		ret = -EAGAIN;
8515 		if ((file->f_flags & O_NONBLOCK) || (flags & SPLICE_F_NONBLOCK))
8516 			goto out;
8517 
8518 		wait_index = READ_ONCE(iter->wait_index);
8519 
8520 		ret = wait_on_pipe(iter, iter->tr->buffer_percent);
8521 		if (ret)
8522 			goto out;
8523 
8524 		/* No need to wait after waking up when tracing is off */
8525 		if (!tracer_tracing_is_on(iter->tr))
8526 			goto out;
8527 
8528 		/* Make sure we see the new wait_index */
8529 		smp_rmb();
8530 		if (wait_index != iter->wait_index)
8531 			goto out;
8532 
8533 		goto again;
8534 	}
8535 
8536 	ret = splice_to_pipe(pipe, &spd);
8537 out:
8538 	splice_shrink_spd(&spd);
8539 
8540 	return ret;
8541 }
8542 
8543 /* An ioctl call with cmd 0 to the ring buffer file will wake up all waiters */
8544 static long tracing_buffers_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
8545 {
8546 	struct ftrace_buffer_info *info = file->private_data;
8547 	struct trace_iterator *iter = &info->iter;
8548 
8549 	if (cmd)
8550 		return -ENOIOCTLCMD;
8551 
8552 	mutex_lock(&trace_types_lock);
8553 
8554 	iter->wait_index++;
8555 	/* Make sure the waiters see the new wait_index */
8556 	smp_wmb();
8557 
8558 	ring_buffer_wake_waiters(iter->array_buffer->buffer, iter->cpu_file);
8559 
8560 	mutex_unlock(&trace_types_lock);
8561 	return 0;
8562 }
8563 
8564 static const struct file_operations tracing_buffers_fops = {
8565 	.open		= tracing_buffers_open,
8566 	.read		= tracing_buffers_read,
8567 	.poll		= tracing_buffers_poll,
8568 	.release	= tracing_buffers_release,
8569 	.splice_read	= tracing_buffers_splice_read,
8570 	.unlocked_ioctl = tracing_buffers_ioctl,
8571 	.llseek		= no_llseek,
8572 };
8573 
8574 static ssize_t
8575 tracing_stats_read(struct file *filp, char __user *ubuf,
8576 		   size_t count, loff_t *ppos)
8577 {
8578 	struct inode *inode = file_inode(filp);
8579 	struct trace_array *tr = inode->i_private;
8580 	struct array_buffer *trace_buf = &tr->array_buffer;
8581 	int cpu = tracing_get_cpu(inode);
8582 	struct trace_seq *s;
8583 	unsigned long cnt;
8584 	unsigned long long t;
8585 	unsigned long usec_rem;
8586 
8587 	s = kmalloc(sizeof(*s), GFP_KERNEL);
8588 	if (!s)
8589 		return -ENOMEM;
8590 
8591 	trace_seq_init(s);
8592 
8593 	cnt = ring_buffer_entries_cpu(trace_buf->buffer, cpu);
8594 	trace_seq_printf(s, "entries: %ld\n", cnt);
8595 
8596 	cnt = ring_buffer_overrun_cpu(trace_buf->buffer, cpu);
8597 	trace_seq_printf(s, "overrun: %ld\n", cnt);
8598 
8599 	cnt = ring_buffer_commit_overrun_cpu(trace_buf->buffer, cpu);
8600 	trace_seq_printf(s, "commit overrun: %ld\n", cnt);
8601 
8602 	cnt = ring_buffer_bytes_cpu(trace_buf->buffer, cpu);
8603 	trace_seq_printf(s, "bytes: %ld\n", cnt);
8604 
8605 	if (trace_clocks[tr->clock_id].in_ns) {
8606 		/* local or global for trace_clock */
8607 		t = ns2usecs(ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
8608 		usec_rem = do_div(t, USEC_PER_SEC);
8609 		trace_seq_printf(s, "oldest event ts: %5llu.%06lu\n",
8610 								t, usec_rem);
8611 
8612 		t = ns2usecs(ring_buffer_time_stamp(trace_buf->buffer));
8613 		usec_rem = do_div(t, USEC_PER_SEC);
8614 		trace_seq_printf(s, "now ts: %5llu.%06lu\n", t, usec_rem);
8615 	} else {
8616 		/* counter or tsc mode for trace_clock */
8617 		trace_seq_printf(s, "oldest event ts: %llu\n",
8618 				ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
8619 
8620 		trace_seq_printf(s, "now ts: %llu\n",
8621 				ring_buffer_time_stamp(trace_buf->buffer));
8622 	}
8623 
8624 	cnt = ring_buffer_dropped_events_cpu(trace_buf->buffer, cpu);
8625 	trace_seq_printf(s, "dropped events: %ld\n", cnt);
8626 
8627 	cnt = ring_buffer_read_events_cpu(trace_buf->buffer, cpu);
8628 	trace_seq_printf(s, "read events: %ld\n", cnt);
8629 
8630 	count = simple_read_from_buffer(ubuf, count, ppos,
8631 					s->buffer, trace_seq_used(s));
8632 
8633 	kfree(s);
8634 
8635 	return count;
8636 }
8637 
8638 static const struct file_operations tracing_stats_fops = {
8639 	.open		= tracing_open_generic_tr,
8640 	.read		= tracing_stats_read,
8641 	.llseek		= generic_file_llseek,
8642 	.release	= tracing_release_generic_tr,
8643 };
8644 
8645 #ifdef CONFIG_DYNAMIC_FTRACE
8646 
8647 static ssize_t
8648 tracing_read_dyn_info(struct file *filp, char __user *ubuf,
8649 		  size_t cnt, loff_t *ppos)
8650 {
8651 	ssize_t ret;
8652 	char *buf;
8653 	int r;
8654 
8655 	/* 256 should be plenty to hold the amount needed */
8656 	buf = kmalloc(256, GFP_KERNEL);
8657 	if (!buf)
8658 		return -ENOMEM;
8659 
8660 	r = scnprintf(buf, 256, "%ld pages:%ld groups: %ld\n",
8661 		      ftrace_update_tot_cnt,
8662 		      ftrace_number_of_pages,
8663 		      ftrace_number_of_groups);
8664 
8665 	ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
8666 	kfree(buf);
8667 	return ret;
8668 }
8669 
8670 static const struct file_operations tracing_dyn_info_fops = {
8671 	.open		= tracing_open_generic,
8672 	.read		= tracing_read_dyn_info,
8673 	.llseek		= generic_file_llseek,
8674 };
8675 #endif /* CONFIG_DYNAMIC_FTRACE */
8676 
8677 #if defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE)
8678 static void
8679 ftrace_snapshot(unsigned long ip, unsigned long parent_ip,
8680 		struct trace_array *tr, struct ftrace_probe_ops *ops,
8681 		void *data)
8682 {
8683 	tracing_snapshot_instance(tr);
8684 }
8685 
8686 static void
8687 ftrace_count_snapshot(unsigned long ip, unsigned long parent_ip,
8688 		      struct trace_array *tr, struct ftrace_probe_ops *ops,
8689 		      void *data)
8690 {
8691 	struct ftrace_func_mapper *mapper = data;
8692 	long *count = NULL;
8693 
8694 	if (mapper)
8695 		count = (long *)ftrace_func_mapper_find_ip(mapper, ip);
8696 
8697 	if (count) {
8698 
8699 		if (*count <= 0)
8700 			return;
8701 
8702 		(*count)--;
8703 	}
8704 
8705 	tracing_snapshot_instance(tr);
8706 }
8707 
8708 static int
8709 ftrace_snapshot_print(struct seq_file *m, unsigned long ip,
8710 		      struct ftrace_probe_ops *ops, void *data)
8711 {
8712 	struct ftrace_func_mapper *mapper = data;
8713 	long *count = NULL;
8714 
8715 	seq_printf(m, "%ps:", (void *)ip);
8716 
8717 	seq_puts(m, "snapshot");
8718 
8719 	if (mapper)
8720 		count = (long *)ftrace_func_mapper_find_ip(mapper, ip);
8721 
8722 	if (count)
8723 		seq_printf(m, ":count=%ld\n", *count);
8724 	else
8725 		seq_puts(m, ":unlimited\n");
8726 
8727 	return 0;
8728 }
8729 
8730 static int
8731 ftrace_snapshot_init(struct ftrace_probe_ops *ops, struct trace_array *tr,
8732 		     unsigned long ip, void *init_data, void **data)
8733 {
8734 	struct ftrace_func_mapper *mapper = *data;
8735 
8736 	if (!mapper) {
8737 		mapper = allocate_ftrace_func_mapper();
8738 		if (!mapper)
8739 			return -ENOMEM;
8740 		*data = mapper;
8741 	}
8742 
8743 	return ftrace_func_mapper_add_ip(mapper, ip, init_data);
8744 }
8745 
8746 static void
8747 ftrace_snapshot_free(struct ftrace_probe_ops *ops, struct trace_array *tr,
8748 		     unsigned long ip, void *data)
8749 {
8750 	struct ftrace_func_mapper *mapper = data;
8751 
8752 	if (!ip) {
8753 		if (!mapper)
8754 			return;
8755 		free_ftrace_func_mapper(mapper, NULL);
8756 		return;
8757 	}
8758 
8759 	ftrace_func_mapper_remove_ip(mapper, ip);
8760 }
8761 
8762 static struct ftrace_probe_ops snapshot_probe_ops = {
8763 	.func			= ftrace_snapshot,
8764 	.print			= ftrace_snapshot_print,
8765 };
8766 
8767 static struct ftrace_probe_ops snapshot_count_probe_ops = {
8768 	.func			= ftrace_count_snapshot,
8769 	.print			= ftrace_snapshot_print,
8770 	.init			= ftrace_snapshot_init,
8771 	.free			= ftrace_snapshot_free,
8772 };
8773 
8774 static int
8775 ftrace_trace_snapshot_callback(struct trace_array *tr, struct ftrace_hash *hash,
8776 			       char *glob, char *cmd, char *param, int enable)
8777 {
8778 	struct ftrace_probe_ops *ops;
8779 	void *count = (void *)-1;
8780 	char *number;
8781 	int ret;
8782 
8783 	if (!tr)
8784 		return -ENODEV;
8785 
8786 	/* hash funcs only work with set_ftrace_filter */
8787 	if (!enable)
8788 		return -EINVAL;
8789 
8790 	ops = param ? &snapshot_count_probe_ops :  &snapshot_probe_ops;
8791 
8792 	if (glob[0] == '!')
8793 		return unregister_ftrace_function_probe_func(glob+1, tr, ops);
8794 
8795 	if (!param)
8796 		goto out_reg;
8797 
8798 	number = strsep(&param, ":");
8799 
8800 	if (!strlen(number))
8801 		goto out_reg;
8802 
8803 	/*
8804 	 * We use the callback data field (which is a pointer)
8805 	 * as our counter.
8806 	 */
8807 	ret = kstrtoul(number, 0, (unsigned long *)&count);
8808 	if (ret)
8809 		return ret;
8810 
8811  out_reg:
8812 	ret = tracing_alloc_snapshot_instance(tr);
8813 	if (ret < 0)
8814 		goto out;
8815 
8816 	ret = register_ftrace_function_probe(glob, tr, ops, count);
8817 
8818  out:
8819 	return ret < 0 ? ret : 0;
8820 }
8821 
8822 static struct ftrace_func_command ftrace_snapshot_cmd = {
8823 	.name			= "snapshot",
8824 	.func			= ftrace_trace_snapshot_callback,
8825 };
8826 
8827 static __init int register_snapshot_cmd(void)
8828 {
8829 	return register_ftrace_command(&ftrace_snapshot_cmd);
8830 }
8831 #else
8832 static inline __init int register_snapshot_cmd(void) { return 0; }
8833 #endif /* defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE) */
8834 
8835 static struct dentry *tracing_get_dentry(struct trace_array *tr)
8836 {
8837 	if (WARN_ON(!tr->dir))
8838 		return ERR_PTR(-ENODEV);
8839 
8840 	/* Top directory uses NULL as the parent */
8841 	if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
8842 		return NULL;
8843 
8844 	/* All sub buffers have a descriptor */
8845 	return tr->dir;
8846 }
8847 
8848 static struct dentry *tracing_dentry_percpu(struct trace_array *tr, int cpu)
8849 {
8850 	struct dentry *d_tracer;
8851 
8852 	if (tr->percpu_dir)
8853 		return tr->percpu_dir;
8854 
8855 	d_tracer = tracing_get_dentry(tr);
8856 	if (IS_ERR(d_tracer))
8857 		return NULL;
8858 
8859 	tr->percpu_dir = tracefs_create_dir("per_cpu", d_tracer);
8860 
8861 	MEM_FAIL(!tr->percpu_dir,
8862 		  "Could not create tracefs directory 'per_cpu/%d'\n", cpu);
8863 
8864 	return tr->percpu_dir;
8865 }
8866 
8867 static struct dentry *
8868 trace_create_cpu_file(const char *name, umode_t mode, struct dentry *parent,
8869 		      void *data, long cpu, const struct file_operations *fops)
8870 {
8871 	struct dentry *ret = trace_create_file(name, mode, parent, data, fops);
8872 
8873 	if (ret) /* See tracing_get_cpu() */
8874 		d_inode(ret)->i_cdev = (void *)(cpu + 1);
8875 	return ret;
8876 }
8877 
8878 static void
8879 tracing_init_tracefs_percpu(struct trace_array *tr, long cpu)
8880 {
8881 	struct dentry *d_percpu = tracing_dentry_percpu(tr, cpu);
8882 	struct dentry *d_cpu;
8883 	char cpu_dir[30]; /* 30 characters should be more than enough */
8884 
8885 	if (!d_percpu)
8886 		return;
8887 
8888 	snprintf(cpu_dir, 30, "cpu%ld", cpu);
8889 	d_cpu = tracefs_create_dir(cpu_dir, d_percpu);
8890 	if (!d_cpu) {
8891 		pr_warn("Could not create tracefs '%s' entry\n", cpu_dir);
8892 		return;
8893 	}
8894 
8895 	/* per cpu trace_pipe */
8896 	trace_create_cpu_file("trace_pipe", TRACE_MODE_READ, d_cpu,
8897 				tr, cpu, &tracing_pipe_fops);
8898 
8899 	/* per cpu trace */
8900 	trace_create_cpu_file("trace", TRACE_MODE_WRITE, d_cpu,
8901 				tr, cpu, &tracing_fops);
8902 
8903 	trace_create_cpu_file("trace_pipe_raw", TRACE_MODE_READ, d_cpu,
8904 				tr, cpu, &tracing_buffers_fops);
8905 
8906 	trace_create_cpu_file("stats", TRACE_MODE_READ, d_cpu,
8907 				tr, cpu, &tracing_stats_fops);
8908 
8909 	trace_create_cpu_file("buffer_size_kb", TRACE_MODE_READ, d_cpu,
8910 				tr, cpu, &tracing_entries_fops);
8911 
8912 #ifdef CONFIG_TRACER_SNAPSHOT
8913 	trace_create_cpu_file("snapshot", TRACE_MODE_WRITE, d_cpu,
8914 				tr, cpu, &snapshot_fops);
8915 
8916 	trace_create_cpu_file("snapshot_raw", TRACE_MODE_READ, d_cpu,
8917 				tr, cpu, &snapshot_raw_fops);
8918 #endif
8919 }
8920 
8921 #ifdef CONFIG_FTRACE_SELFTEST
8922 /* Let selftest have access to static functions in this file */
8923 #include "trace_selftest.c"
8924 #endif
8925 
8926 static ssize_t
8927 trace_options_read(struct file *filp, char __user *ubuf, size_t cnt,
8928 			loff_t *ppos)
8929 {
8930 	struct trace_option_dentry *topt = filp->private_data;
8931 	char *buf;
8932 
8933 	if (topt->flags->val & topt->opt->bit)
8934 		buf = "1\n";
8935 	else
8936 		buf = "0\n";
8937 
8938 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
8939 }
8940 
8941 static ssize_t
8942 trace_options_write(struct file *filp, const char __user *ubuf, size_t cnt,
8943 			 loff_t *ppos)
8944 {
8945 	struct trace_option_dentry *topt = filp->private_data;
8946 	unsigned long val;
8947 	int ret;
8948 
8949 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
8950 	if (ret)
8951 		return ret;
8952 
8953 	if (val != 0 && val != 1)
8954 		return -EINVAL;
8955 
8956 	if (!!(topt->flags->val & topt->opt->bit) != val) {
8957 		mutex_lock(&trace_types_lock);
8958 		ret = __set_tracer_option(topt->tr, topt->flags,
8959 					  topt->opt, !val);
8960 		mutex_unlock(&trace_types_lock);
8961 		if (ret)
8962 			return ret;
8963 	}
8964 
8965 	*ppos += cnt;
8966 
8967 	return cnt;
8968 }
8969 
8970 
8971 static const struct file_operations trace_options_fops = {
8972 	.open = tracing_open_generic,
8973 	.read = trace_options_read,
8974 	.write = trace_options_write,
8975 	.llseek	= generic_file_llseek,
8976 };
8977 
8978 /*
8979  * In order to pass in both the trace_array descriptor as well as the index
8980  * to the flag that the trace option file represents, the trace_array
8981  * has a character array of trace_flags_index[], which holds the index
8982  * of the bit for the flag it represents. index[0] == 0, index[1] == 1, etc.
8983  * The address of this character array is passed to the flag option file
8984  * read/write callbacks.
8985  *
8986  * In order to extract both the index and the trace_array descriptor,
8987  * get_tr_index() uses the following algorithm.
8988  *
8989  *   idx = *ptr;
8990  *
8991  * As the pointer itself contains the address of the index (remember
8992  * index[1] == 1).
8993  *
8994  * Then to get the trace_array descriptor, by subtracting that index
8995  * from the ptr, we get to the start of the index itself.
8996  *
8997  *   ptr - idx == &index[0]
8998  *
8999  * Then a simple container_of() from that pointer gets us to the
9000  * trace_array descriptor.
9001  */
9002 static void get_tr_index(void *data, struct trace_array **ptr,
9003 			 unsigned int *pindex)
9004 {
9005 	*pindex = *(unsigned char *)data;
9006 
9007 	*ptr = container_of(data - *pindex, struct trace_array,
9008 			    trace_flags_index);
9009 }
9010 
9011 static ssize_t
9012 trace_options_core_read(struct file *filp, char __user *ubuf, size_t cnt,
9013 			loff_t *ppos)
9014 {
9015 	void *tr_index = filp->private_data;
9016 	struct trace_array *tr;
9017 	unsigned int index;
9018 	char *buf;
9019 
9020 	get_tr_index(tr_index, &tr, &index);
9021 
9022 	if (tr->trace_flags & (1 << index))
9023 		buf = "1\n";
9024 	else
9025 		buf = "0\n";
9026 
9027 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
9028 }
9029 
9030 static ssize_t
9031 trace_options_core_write(struct file *filp, const char __user *ubuf, size_t cnt,
9032 			 loff_t *ppos)
9033 {
9034 	void *tr_index = filp->private_data;
9035 	struct trace_array *tr;
9036 	unsigned int index;
9037 	unsigned long val;
9038 	int ret;
9039 
9040 	get_tr_index(tr_index, &tr, &index);
9041 
9042 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
9043 	if (ret)
9044 		return ret;
9045 
9046 	if (val != 0 && val != 1)
9047 		return -EINVAL;
9048 
9049 	mutex_lock(&event_mutex);
9050 	mutex_lock(&trace_types_lock);
9051 	ret = set_tracer_flag(tr, 1 << index, val);
9052 	mutex_unlock(&trace_types_lock);
9053 	mutex_unlock(&event_mutex);
9054 
9055 	if (ret < 0)
9056 		return ret;
9057 
9058 	*ppos += cnt;
9059 
9060 	return cnt;
9061 }
9062 
9063 static const struct file_operations trace_options_core_fops = {
9064 	.open = tracing_open_generic,
9065 	.read = trace_options_core_read,
9066 	.write = trace_options_core_write,
9067 	.llseek = generic_file_llseek,
9068 };
9069 
9070 struct dentry *trace_create_file(const char *name,
9071 				 umode_t mode,
9072 				 struct dentry *parent,
9073 				 void *data,
9074 				 const struct file_operations *fops)
9075 {
9076 	struct dentry *ret;
9077 
9078 	ret = tracefs_create_file(name, mode, parent, data, fops);
9079 	if (!ret)
9080 		pr_warn("Could not create tracefs '%s' entry\n", name);
9081 
9082 	return ret;
9083 }
9084 
9085 
9086 static struct dentry *trace_options_init_dentry(struct trace_array *tr)
9087 {
9088 	struct dentry *d_tracer;
9089 
9090 	if (tr->options)
9091 		return tr->options;
9092 
9093 	d_tracer = tracing_get_dentry(tr);
9094 	if (IS_ERR(d_tracer))
9095 		return NULL;
9096 
9097 	tr->options = tracefs_create_dir("options", d_tracer);
9098 	if (!tr->options) {
9099 		pr_warn("Could not create tracefs directory 'options'\n");
9100 		return NULL;
9101 	}
9102 
9103 	return tr->options;
9104 }
9105 
9106 static void
9107 create_trace_option_file(struct trace_array *tr,
9108 			 struct trace_option_dentry *topt,
9109 			 struct tracer_flags *flags,
9110 			 struct tracer_opt *opt)
9111 {
9112 	struct dentry *t_options;
9113 
9114 	t_options = trace_options_init_dentry(tr);
9115 	if (!t_options)
9116 		return;
9117 
9118 	topt->flags = flags;
9119 	topt->opt = opt;
9120 	topt->tr = tr;
9121 
9122 	topt->entry = trace_create_file(opt->name, TRACE_MODE_WRITE,
9123 					t_options, topt, &trace_options_fops);
9124 
9125 }
9126 
9127 static void
9128 create_trace_option_files(struct trace_array *tr, struct tracer *tracer)
9129 {
9130 	struct trace_option_dentry *topts;
9131 	struct trace_options *tr_topts;
9132 	struct tracer_flags *flags;
9133 	struct tracer_opt *opts;
9134 	int cnt;
9135 	int i;
9136 
9137 	if (!tracer)
9138 		return;
9139 
9140 	flags = tracer->flags;
9141 
9142 	if (!flags || !flags->opts)
9143 		return;
9144 
9145 	/*
9146 	 * If this is an instance, only create flags for tracers
9147 	 * the instance may have.
9148 	 */
9149 	if (!trace_ok_for_array(tracer, tr))
9150 		return;
9151 
9152 	for (i = 0; i < tr->nr_topts; i++) {
9153 		/* Make sure there's no duplicate flags. */
9154 		if (WARN_ON_ONCE(tr->topts[i].tracer->flags == tracer->flags))
9155 			return;
9156 	}
9157 
9158 	opts = flags->opts;
9159 
9160 	for (cnt = 0; opts[cnt].name; cnt++)
9161 		;
9162 
9163 	topts = kcalloc(cnt + 1, sizeof(*topts), GFP_KERNEL);
9164 	if (!topts)
9165 		return;
9166 
9167 	tr_topts = krealloc(tr->topts, sizeof(*tr->topts) * (tr->nr_topts + 1),
9168 			    GFP_KERNEL);
9169 	if (!tr_topts) {
9170 		kfree(topts);
9171 		return;
9172 	}
9173 
9174 	tr->topts = tr_topts;
9175 	tr->topts[tr->nr_topts].tracer = tracer;
9176 	tr->topts[tr->nr_topts].topts = topts;
9177 	tr->nr_topts++;
9178 
9179 	for (cnt = 0; opts[cnt].name; cnt++) {
9180 		create_trace_option_file(tr, &topts[cnt], flags,
9181 					 &opts[cnt]);
9182 		MEM_FAIL(topts[cnt].entry == NULL,
9183 			  "Failed to create trace option: %s",
9184 			  opts[cnt].name);
9185 	}
9186 }
9187 
9188 static struct dentry *
9189 create_trace_option_core_file(struct trace_array *tr,
9190 			      const char *option, long index)
9191 {
9192 	struct dentry *t_options;
9193 
9194 	t_options = trace_options_init_dentry(tr);
9195 	if (!t_options)
9196 		return NULL;
9197 
9198 	return trace_create_file(option, TRACE_MODE_WRITE, t_options,
9199 				 (void *)&tr->trace_flags_index[index],
9200 				 &trace_options_core_fops);
9201 }
9202 
9203 static void create_trace_options_dir(struct trace_array *tr)
9204 {
9205 	struct dentry *t_options;
9206 	bool top_level = tr == &global_trace;
9207 	int i;
9208 
9209 	t_options = trace_options_init_dentry(tr);
9210 	if (!t_options)
9211 		return;
9212 
9213 	for (i = 0; trace_options[i]; i++) {
9214 		if (top_level ||
9215 		    !((1 << i) & TOP_LEVEL_TRACE_FLAGS))
9216 			create_trace_option_core_file(tr, trace_options[i], i);
9217 	}
9218 }
9219 
9220 static ssize_t
9221 rb_simple_read(struct file *filp, char __user *ubuf,
9222 	       size_t cnt, loff_t *ppos)
9223 {
9224 	struct trace_array *tr = filp->private_data;
9225 	char buf[64];
9226 	int r;
9227 
9228 	r = tracer_tracing_is_on(tr);
9229 	r = sprintf(buf, "%d\n", r);
9230 
9231 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
9232 }
9233 
9234 static ssize_t
9235 rb_simple_write(struct file *filp, const char __user *ubuf,
9236 		size_t cnt, loff_t *ppos)
9237 {
9238 	struct trace_array *tr = filp->private_data;
9239 	struct trace_buffer *buffer = tr->array_buffer.buffer;
9240 	unsigned long val;
9241 	int ret;
9242 
9243 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
9244 	if (ret)
9245 		return ret;
9246 
9247 	if (buffer) {
9248 		mutex_lock(&trace_types_lock);
9249 		if (!!val == tracer_tracing_is_on(tr)) {
9250 			val = 0; /* do nothing */
9251 		} else if (val) {
9252 			tracer_tracing_on(tr);
9253 			if (tr->current_trace->start)
9254 				tr->current_trace->start(tr);
9255 		} else {
9256 			tracer_tracing_off(tr);
9257 			if (tr->current_trace->stop)
9258 				tr->current_trace->stop(tr);
9259 			/* Wake up any waiters */
9260 			ring_buffer_wake_waiters(buffer, RING_BUFFER_ALL_CPUS);
9261 		}
9262 		mutex_unlock(&trace_types_lock);
9263 	}
9264 
9265 	(*ppos)++;
9266 
9267 	return cnt;
9268 }
9269 
9270 static const struct file_operations rb_simple_fops = {
9271 	.open		= tracing_open_generic_tr,
9272 	.read		= rb_simple_read,
9273 	.write		= rb_simple_write,
9274 	.release	= tracing_release_generic_tr,
9275 	.llseek		= default_llseek,
9276 };
9277 
9278 static ssize_t
9279 buffer_percent_read(struct file *filp, char __user *ubuf,
9280 		    size_t cnt, loff_t *ppos)
9281 {
9282 	struct trace_array *tr = filp->private_data;
9283 	char buf[64];
9284 	int r;
9285 
9286 	r = tr->buffer_percent;
9287 	r = sprintf(buf, "%d\n", r);
9288 
9289 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
9290 }
9291 
9292 static ssize_t
9293 buffer_percent_write(struct file *filp, const char __user *ubuf,
9294 		     size_t cnt, loff_t *ppos)
9295 {
9296 	struct trace_array *tr = filp->private_data;
9297 	unsigned long val;
9298 	int ret;
9299 
9300 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
9301 	if (ret)
9302 		return ret;
9303 
9304 	if (val > 100)
9305 		return -EINVAL;
9306 
9307 	tr->buffer_percent = val;
9308 
9309 	(*ppos)++;
9310 
9311 	return cnt;
9312 }
9313 
9314 static const struct file_operations buffer_percent_fops = {
9315 	.open		= tracing_open_generic_tr,
9316 	.read		= buffer_percent_read,
9317 	.write		= buffer_percent_write,
9318 	.release	= tracing_release_generic_tr,
9319 	.llseek		= default_llseek,
9320 };
9321 
9322 static struct dentry *trace_instance_dir;
9323 
9324 static void
9325 init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer);
9326 
9327 static int
9328 allocate_trace_buffer(struct trace_array *tr, struct array_buffer *buf, int size)
9329 {
9330 	enum ring_buffer_flags rb_flags;
9331 
9332 	rb_flags = tr->trace_flags & TRACE_ITER_OVERWRITE ? RB_FL_OVERWRITE : 0;
9333 
9334 	buf->tr = tr;
9335 
9336 	buf->buffer = ring_buffer_alloc(size, rb_flags);
9337 	if (!buf->buffer)
9338 		return -ENOMEM;
9339 
9340 	buf->data = alloc_percpu(struct trace_array_cpu);
9341 	if (!buf->data) {
9342 		ring_buffer_free(buf->buffer);
9343 		buf->buffer = NULL;
9344 		return -ENOMEM;
9345 	}
9346 
9347 	/* Allocate the first page for all buffers */
9348 	set_buffer_entries(&tr->array_buffer,
9349 			   ring_buffer_size(tr->array_buffer.buffer, 0));
9350 
9351 	return 0;
9352 }
9353 
9354 static void free_trace_buffer(struct array_buffer *buf)
9355 {
9356 	if (buf->buffer) {
9357 		ring_buffer_free(buf->buffer);
9358 		buf->buffer = NULL;
9359 		free_percpu(buf->data);
9360 		buf->data = NULL;
9361 	}
9362 }
9363 
9364 static int allocate_trace_buffers(struct trace_array *tr, int size)
9365 {
9366 	int ret;
9367 
9368 	ret = allocate_trace_buffer(tr, &tr->array_buffer, size);
9369 	if (ret)
9370 		return ret;
9371 
9372 #ifdef CONFIG_TRACER_MAX_TRACE
9373 	ret = allocate_trace_buffer(tr, &tr->max_buffer,
9374 				    allocate_snapshot ? size : 1);
9375 	if (MEM_FAIL(ret, "Failed to allocate trace buffer\n")) {
9376 		free_trace_buffer(&tr->array_buffer);
9377 		return -ENOMEM;
9378 	}
9379 	tr->allocated_snapshot = allocate_snapshot;
9380 
9381 	allocate_snapshot = false;
9382 #endif
9383 
9384 	return 0;
9385 }
9386 
9387 static void free_trace_buffers(struct trace_array *tr)
9388 {
9389 	if (!tr)
9390 		return;
9391 
9392 	free_trace_buffer(&tr->array_buffer);
9393 
9394 #ifdef CONFIG_TRACER_MAX_TRACE
9395 	free_trace_buffer(&tr->max_buffer);
9396 #endif
9397 }
9398 
9399 static void init_trace_flags_index(struct trace_array *tr)
9400 {
9401 	int i;
9402 
9403 	/* Used by the trace options files */
9404 	for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++)
9405 		tr->trace_flags_index[i] = i;
9406 }
9407 
9408 static void __update_tracer_options(struct trace_array *tr)
9409 {
9410 	struct tracer *t;
9411 
9412 	for (t = trace_types; t; t = t->next)
9413 		add_tracer_options(tr, t);
9414 }
9415 
9416 static void update_tracer_options(struct trace_array *tr)
9417 {
9418 	mutex_lock(&trace_types_lock);
9419 	tracer_options_updated = true;
9420 	__update_tracer_options(tr);
9421 	mutex_unlock(&trace_types_lock);
9422 }
9423 
9424 /* Must have trace_types_lock held */
9425 struct trace_array *trace_array_find(const char *instance)
9426 {
9427 	struct trace_array *tr, *found = NULL;
9428 
9429 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
9430 		if (tr->name && strcmp(tr->name, instance) == 0) {
9431 			found = tr;
9432 			break;
9433 		}
9434 	}
9435 
9436 	return found;
9437 }
9438 
9439 struct trace_array *trace_array_find_get(const char *instance)
9440 {
9441 	struct trace_array *tr;
9442 
9443 	mutex_lock(&trace_types_lock);
9444 	tr = trace_array_find(instance);
9445 	if (tr)
9446 		tr->ref++;
9447 	mutex_unlock(&trace_types_lock);
9448 
9449 	return tr;
9450 }
9451 
9452 static int trace_array_create_dir(struct trace_array *tr)
9453 {
9454 	int ret;
9455 
9456 	tr->dir = tracefs_create_dir(tr->name, trace_instance_dir);
9457 	if (!tr->dir)
9458 		return -EINVAL;
9459 
9460 	ret = event_trace_add_tracer(tr->dir, tr);
9461 	if (ret) {
9462 		tracefs_remove(tr->dir);
9463 		return ret;
9464 	}
9465 
9466 	init_tracer_tracefs(tr, tr->dir);
9467 	__update_tracer_options(tr);
9468 
9469 	return ret;
9470 }
9471 
9472 static struct trace_array *trace_array_create(const char *name)
9473 {
9474 	struct trace_array *tr;
9475 	int ret;
9476 
9477 	ret = -ENOMEM;
9478 	tr = kzalloc(sizeof(*tr), GFP_KERNEL);
9479 	if (!tr)
9480 		return ERR_PTR(ret);
9481 
9482 	tr->name = kstrdup(name, GFP_KERNEL);
9483 	if (!tr->name)
9484 		goto out_free_tr;
9485 
9486 	if (!alloc_cpumask_var(&tr->tracing_cpumask, GFP_KERNEL))
9487 		goto out_free_tr;
9488 
9489 	if (!alloc_cpumask_var(&tr->pipe_cpumask, GFP_KERNEL))
9490 		goto out_free_tr;
9491 
9492 	tr->trace_flags = global_trace.trace_flags & ~ZEROED_TRACE_FLAGS;
9493 
9494 	cpumask_copy(tr->tracing_cpumask, cpu_all_mask);
9495 
9496 	raw_spin_lock_init(&tr->start_lock);
9497 
9498 	tr->max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
9499 
9500 	tr->current_trace = &nop_trace;
9501 
9502 	INIT_LIST_HEAD(&tr->systems);
9503 	INIT_LIST_HEAD(&tr->events);
9504 	INIT_LIST_HEAD(&tr->hist_vars);
9505 	INIT_LIST_HEAD(&tr->err_log);
9506 
9507 	if (allocate_trace_buffers(tr, trace_buf_size) < 0)
9508 		goto out_free_tr;
9509 
9510 	if (ftrace_allocate_ftrace_ops(tr) < 0)
9511 		goto out_free_tr;
9512 
9513 	ftrace_init_trace_array(tr);
9514 
9515 	init_trace_flags_index(tr);
9516 
9517 	if (trace_instance_dir) {
9518 		ret = trace_array_create_dir(tr);
9519 		if (ret)
9520 			goto out_free_tr;
9521 	} else
9522 		__trace_early_add_events(tr);
9523 
9524 	list_add(&tr->list, &ftrace_trace_arrays);
9525 
9526 	tr->ref++;
9527 
9528 	return tr;
9529 
9530  out_free_tr:
9531 	ftrace_free_ftrace_ops(tr);
9532 	free_trace_buffers(tr);
9533 	free_cpumask_var(tr->pipe_cpumask);
9534 	free_cpumask_var(tr->tracing_cpumask);
9535 	kfree(tr->name);
9536 	kfree(tr);
9537 
9538 	return ERR_PTR(ret);
9539 }
9540 
9541 static int instance_mkdir(const char *name)
9542 {
9543 	struct trace_array *tr;
9544 	int ret;
9545 
9546 	mutex_lock(&event_mutex);
9547 	mutex_lock(&trace_types_lock);
9548 
9549 	ret = -EEXIST;
9550 	if (trace_array_find(name))
9551 		goto out_unlock;
9552 
9553 	tr = trace_array_create(name);
9554 
9555 	ret = PTR_ERR_OR_ZERO(tr);
9556 
9557 out_unlock:
9558 	mutex_unlock(&trace_types_lock);
9559 	mutex_unlock(&event_mutex);
9560 	return ret;
9561 }
9562 
9563 /**
9564  * trace_array_get_by_name - Create/Lookup a trace array, given its name.
9565  * @name: The name of the trace array to be looked up/created.
9566  *
9567  * Returns pointer to trace array with given name.
9568  * NULL, if it cannot be created.
9569  *
9570  * NOTE: This function increments the reference counter associated with the
9571  * trace array returned. This makes sure it cannot be freed while in use.
9572  * Use trace_array_put() once the trace array is no longer needed.
9573  * If the trace_array is to be freed, trace_array_destroy() needs to
9574  * be called after the trace_array_put(), or simply let user space delete
9575  * it from the tracefs instances directory. But until the
9576  * trace_array_put() is called, user space can not delete it.
9577  *
9578  */
9579 struct trace_array *trace_array_get_by_name(const char *name)
9580 {
9581 	struct trace_array *tr;
9582 
9583 	mutex_lock(&event_mutex);
9584 	mutex_lock(&trace_types_lock);
9585 
9586 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
9587 		if (tr->name && strcmp(tr->name, name) == 0)
9588 			goto out_unlock;
9589 	}
9590 
9591 	tr = trace_array_create(name);
9592 
9593 	if (IS_ERR(tr))
9594 		tr = NULL;
9595 out_unlock:
9596 	if (tr)
9597 		tr->ref++;
9598 
9599 	mutex_unlock(&trace_types_lock);
9600 	mutex_unlock(&event_mutex);
9601 	return tr;
9602 }
9603 EXPORT_SYMBOL_GPL(trace_array_get_by_name);
9604 
9605 static int __remove_instance(struct trace_array *tr)
9606 {
9607 	int i;
9608 
9609 	/* Reference counter for a newly created trace array = 1. */
9610 	if (tr->ref > 1 || (tr->current_trace && tr->trace_ref))
9611 		return -EBUSY;
9612 
9613 	list_del(&tr->list);
9614 
9615 	/* Disable all the flags that were enabled coming in */
9616 	for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++) {
9617 		if ((1 << i) & ZEROED_TRACE_FLAGS)
9618 			set_tracer_flag(tr, 1 << i, 0);
9619 	}
9620 
9621 	tracing_set_nop(tr);
9622 	clear_ftrace_function_probes(tr);
9623 	event_trace_del_tracer(tr);
9624 	ftrace_clear_pids(tr);
9625 	ftrace_destroy_function_files(tr);
9626 	tracefs_remove(tr->dir);
9627 	free_percpu(tr->last_func_repeats);
9628 	free_trace_buffers(tr);
9629 	clear_tracing_err_log(tr);
9630 
9631 	for (i = 0; i < tr->nr_topts; i++) {
9632 		kfree(tr->topts[i].topts);
9633 	}
9634 	kfree(tr->topts);
9635 
9636 	free_cpumask_var(tr->pipe_cpumask);
9637 	free_cpumask_var(tr->tracing_cpumask);
9638 	kfree(tr->name);
9639 	kfree(tr);
9640 
9641 	return 0;
9642 }
9643 
9644 int trace_array_destroy(struct trace_array *this_tr)
9645 {
9646 	struct trace_array *tr;
9647 	int ret;
9648 
9649 	if (!this_tr)
9650 		return -EINVAL;
9651 
9652 	mutex_lock(&event_mutex);
9653 	mutex_lock(&trace_types_lock);
9654 
9655 	ret = -ENODEV;
9656 
9657 	/* Making sure trace array exists before destroying it. */
9658 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
9659 		if (tr == this_tr) {
9660 			ret = __remove_instance(tr);
9661 			break;
9662 		}
9663 	}
9664 
9665 	mutex_unlock(&trace_types_lock);
9666 	mutex_unlock(&event_mutex);
9667 
9668 	return ret;
9669 }
9670 EXPORT_SYMBOL_GPL(trace_array_destroy);
9671 
9672 static int instance_rmdir(const char *name)
9673 {
9674 	struct trace_array *tr;
9675 	int ret;
9676 
9677 	mutex_lock(&event_mutex);
9678 	mutex_lock(&trace_types_lock);
9679 
9680 	ret = -ENODEV;
9681 	tr = trace_array_find(name);
9682 	if (tr)
9683 		ret = __remove_instance(tr);
9684 
9685 	mutex_unlock(&trace_types_lock);
9686 	mutex_unlock(&event_mutex);
9687 
9688 	return ret;
9689 }
9690 
9691 static __init void create_trace_instances(struct dentry *d_tracer)
9692 {
9693 	struct trace_array *tr;
9694 
9695 	trace_instance_dir = tracefs_create_instance_dir("instances", d_tracer,
9696 							 instance_mkdir,
9697 							 instance_rmdir);
9698 	if (MEM_FAIL(!trace_instance_dir, "Failed to create instances directory\n"))
9699 		return;
9700 
9701 	mutex_lock(&event_mutex);
9702 	mutex_lock(&trace_types_lock);
9703 
9704 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
9705 		if (!tr->name)
9706 			continue;
9707 		if (MEM_FAIL(trace_array_create_dir(tr) < 0,
9708 			     "Failed to create instance directory\n"))
9709 			break;
9710 	}
9711 
9712 	mutex_unlock(&trace_types_lock);
9713 	mutex_unlock(&event_mutex);
9714 }
9715 
9716 static void
9717 init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer)
9718 {
9719 	struct trace_event_file *file;
9720 	int cpu;
9721 
9722 	trace_create_file("available_tracers", TRACE_MODE_READ, d_tracer,
9723 			tr, &show_traces_fops);
9724 
9725 	trace_create_file("current_tracer", TRACE_MODE_WRITE, d_tracer,
9726 			tr, &set_tracer_fops);
9727 
9728 	trace_create_file("tracing_cpumask", TRACE_MODE_WRITE, d_tracer,
9729 			  tr, &tracing_cpumask_fops);
9730 
9731 	trace_create_file("trace_options", TRACE_MODE_WRITE, d_tracer,
9732 			  tr, &tracing_iter_fops);
9733 
9734 	trace_create_file("trace", TRACE_MODE_WRITE, d_tracer,
9735 			  tr, &tracing_fops);
9736 
9737 	trace_create_file("trace_pipe", TRACE_MODE_READ, d_tracer,
9738 			  tr, &tracing_pipe_fops);
9739 
9740 	trace_create_file("buffer_size_kb", TRACE_MODE_WRITE, d_tracer,
9741 			  tr, &tracing_entries_fops);
9742 
9743 	trace_create_file("buffer_total_size_kb", TRACE_MODE_READ, d_tracer,
9744 			  tr, &tracing_total_entries_fops);
9745 
9746 	trace_create_file("free_buffer", 0200, d_tracer,
9747 			  tr, &tracing_free_buffer_fops);
9748 
9749 	trace_create_file("trace_marker", 0220, d_tracer,
9750 			  tr, &tracing_mark_fops);
9751 
9752 	file = __find_event_file(tr, "ftrace", "print");
9753 	if (file && file->dir)
9754 		trace_create_file("trigger", TRACE_MODE_WRITE, file->dir,
9755 				  file, &event_trigger_fops);
9756 	tr->trace_marker_file = file;
9757 
9758 	trace_create_file("trace_marker_raw", 0220, d_tracer,
9759 			  tr, &tracing_mark_raw_fops);
9760 
9761 	trace_create_file("trace_clock", TRACE_MODE_WRITE, d_tracer, tr,
9762 			  &trace_clock_fops);
9763 
9764 	trace_create_file("tracing_on", TRACE_MODE_WRITE, d_tracer,
9765 			  tr, &rb_simple_fops);
9766 
9767 	trace_create_file("timestamp_mode", TRACE_MODE_READ, d_tracer, tr,
9768 			  &trace_time_stamp_mode_fops);
9769 
9770 	tr->buffer_percent = 50;
9771 
9772 	trace_create_file("buffer_percent", TRACE_MODE_WRITE, d_tracer,
9773 			tr, &buffer_percent_fops);
9774 
9775 	create_trace_options_dir(tr);
9776 
9777 #ifdef CONFIG_TRACER_MAX_TRACE
9778 	trace_create_maxlat_file(tr, d_tracer);
9779 #endif
9780 
9781 	if (ftrace_create_function_files(tr, d_tracer))
9782 		MEM_FAIL(1, "Could not allocate function filter files");
9783 
9784 #ifdef CONFIG_TRACER_SNAPSHOT
9785 	trace_create_file("snapshot", TRACE_MODE_WRITE, d_tracer,
9786 			  tr, &snapshot_fops);
9787 #endif
9788 
9789 	trace_create_file("error_log", TRACE_MODE_WRITE, d_tracer,
9790 			  tr, &tracing_err_log_fops);
9791 
9792 	for_each_tracing_cpu(cpu)
9793 		tracing_init_tracefs_percpu(tr, cpu);
9794 
9795 	ftrace_init_tracefs(tr, d_tracer);
9796 }
9797 
9798 static struct vfsmount *trace_automount(struct dentry *mntpt, void *ingore)
9799 {
9800 	struct vfsmount *mnt;
9801 	struct file_system_type *type;
9802 
9803 	/*
9804 	 * To maintain backward compatibility for tools that mount
9805 	 * debugfs to get to the tracing facility, tracefs is automatically
9806 	 * mounted to the debugfs/tracing directory.
9807 	 */
9808 	type = get_fs_type("tracefs");
9809 	if (!type)
9810 		return NULL;
9811 	mnt = vfs_submount(mntpt, type, "tracefs", NULL);
9812 	put_filesystem(type);
9813 	if (IS_ERR(mnt))
9814 		return NULL;
9815 	mntget(mnt);
9816 
9817 	return mnt;
9818 }
9819 
9820 /**
9821  * tracing_init_dentry - initialize top level trace array
9822  *
9823  * This is called when creating files or directories in the tracing
9824  * directory. It is called via fs_initcall() by any of the boot up code
9825  * and expects to return the dentry of the top level tracing directory.
9826  */
9827 int tracing_init_dentry(void)
9828 {
9829 	struct trace_array *tr = &global_trace;
9830 
9831 	if (security_locked_down(LOCKDOWN_TRACEFS)) {
9832 		pr_warn("Tracing disabled due to lockdown\n");
9833 		return -EPERM;
9834 	}
9835 
9836 	/* The top level trace array uses  NULL as parent */
9837 	if (tr->dir)
9838 		return 0;
9839 
9840 	if (WARN_ON(!tracefs_initialized()))
9841 		return -ENODEV;
9842 
9843 	/*
9844 	 * As there may still be users that expect the tracing
9845 	 * files to exist in debugfs/tracing, we must automount
9846 	 * the tracefs file system there, so older tools still
9847 	 * work with the newer kernel.
9848 	 */
9849 	tr->dir = debugfs_create_automount("tracing", NULL,
9850 					   trace_automount, NULL);
9851 
9852 	return 0;
9853 }
9854 
9855 extern struct trace_eval_map *__start_ftrace_eval_maps[];
9856 extern struct trace_eval_map *__stop_ftrace_eval_maps[];
9857 
9858 static struct workqueue_struct *eval_map_wq __initdata;
9859 static struct work_struct eval_map_work __initdata;
9860 static struct work_struct tracerfs_init_work __initdata;
9861 
9862 static void __init eval_map_work_func(struct work_struct *work)
9863 {
9864 	int len;
9865 
9866 	len = __stop_ftrace_eval_maps - __start_ftrace_eval_maps;
9867 	trace_insert_eval_map(NULL, __start_ftrace_eval_maps, len);
9868 }
9869 
9870 static int __init trace_eval_init(void)
9871 {
9872 	INIT_WORK(&eval_map_work, eval_map_work_func);
9873 
9874 	eval_map_wq = alloc_workqueue("eval_map_wq", WQ_UNBOUND, 0);
9875 	if (!eval_map_wq) {
9876 		pr_err("Unable to allocate eval_map_wq\n");
9877 		/* Do work here */
9878 		eval_map_work_func(&eval_map_work);
9879 		return -ENOMEM;
9880 	}
9881 
9882 	queue_work(eval_map_wq, &eval_map_work);
9883 	return 0;
9884 }
9885 
9886 subsys_initcall(trace_eval_init);
9887 
9888 static int __init trace_eval_sync(void)
9889 {
9890 	/* Make sure the eval map updates are finished */
9891 	if (eval_map_wq)
9892 		destroy_workqueue(eval_map_wq);
9893 	return 0;
9894 }
9895 
9896 late_initcall_sync(trace_eval_sync);
9897 
9898 
9899 #ifdef CONFIG_MODULES
9900 static void trace_module_add_evals(struct module *mod)
9901 {
9902 	if (!mod->num_trace_evals)
9903 		return;
9904 
9905 	/*
9906 	 * Modules with bad taint do not have events created, do
9907 	 * not bother with enums either.
9908 	 */
9909 	if (trace_module_has_bad_taint(mod))
9910 		return;
9911 
9912 	trace_insert_eval_map(mod, mod->trace_evals, mod->num_trace_evals);
9913 }
9914 
9915 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
9916 static void trace_module_remove_evals(struct module *mod)
9917 {
9918 	union trace_eval_map_item *map;
9919 	union trace_eval_map_item **last = &trace_eval_maps;
9920 
9921 	if (!mod->num_trace_evals)
9922 		return;
9923 
9924 	mutex_lock(&trace_eval_mutex);
9925 
9926 	map = trace_eval_maps;
9927 
9928 	while (map) {
9929 		if (map->head.mod == mod)
9930 			break;
9931 		map = trace_eval_jmp_to_tail(map);
9932 		last = &map->tail.next;
9933 		map = map->tail.next;
9934 	}
9935 	if (!map)
9936 		goto out;
9937 
9938 	*last = trace_eval_jmp_to_tail(map)->tail.next;
9939 	kfree(map);
9940  out:
9941 	mutex_unlock(&trace_eval_mutex);
9942 }
9943 #else
9944 static inline void trace_module_remove_evals(struct module *mod) { }
9945 #endif /* CONFIG_TRACE_EVAL_MAP_FILE */
9946 
9947 static int trace_module_notify(struct notifier_block *self,
9948 			       unsigned long val, void *data)
9949 {
9950 	struct module *mod = data;
9951 
9952 	switch (val) {
9953 	case MODULE_STATE_COMING:
9954 		trace_module_add_evals(mod);
9955 		break;
9956 	case MODULE_STATE_GOING:
9957 		trace_module_remove_evals(mod);
9958 		break;
9959 	}
9960 
9961 	return NOTIFY_OK;
9962 }
9963 
9964 static struct notifier_block trace_module_nb = {
9965 	.notifier_call = trace_module_notify,
9966 	.priority = 0,
9967 };
9968 #endif /* CONFIG_MODULES */
9969 
9970 static __init void tracer_init_tracefs_work_func(struct work_struct *work)
9971 {
9972 
9973 	event_trace_init();
9974 
9975 	init_tracer_tracefs(&global_trace, NULL);
9976 	ftrace_init_tracefs_toplevel(&global_trace, NULL);
9977 
9978 	trace_create_file("tracing_thresh", TRACE_MODE_WRITE, NULL,
9979 			&global_trace, &tracing_thresh_fops);
9980 
9981 	trace_create_file("README", TRACE_MODE_READ, NULL,
9982 			NULL, &tracing_readme_fops);
9983 
9984 	trace_create_file("saved_cmdlines", TRACE_MODE_READ, NULL,
9985 			NULL, &tracing_saved_cmdlines_fops);
9986 
9987 	trace_create_file("saved_cmdlines_size", TRACE_MODE_WRITE, NULL,
9988 			  NULL, &tracing_saved_cmdlines_size_fops);
9989 
9990 	trace_create_file("saved_tgids", TRACE_MODE_READ, NULL,
9991 			NULL, &tracing_saved_tgids_fops);
9992 
9993 	trace_create_eval_file(NULL);
9994 
9995 #ifdef CONFIG_MODULES
9996 	register_module_notifier(&trace_module_nb);
9997 #endif
9998 
9999 #ifdef CONFIG_DYNAMIC_FTRACE
10000 	trace_create_file("dyn_ftrace_total_info", TRACE_MODE_READ, NULL,
10001 			NULL, &tracing_dyn_info_fops);
10002 #endif
10003 
10004 	create_trace_instances(NULL);
10005 
10006 	update_tracer_options(&global_trace);
10007 }
10008 
10009 static __init int tracer_init_tracefs(void)
10010 {
10011 	int ret;
10012 
10013 	trace_access_lock_init();
10014 
10015 	ret = tracing_init_dentry();
10016 	if (ret)
10017 		return 0;
10018 
10019 	if (eval_map_wq) {
10020 		INIT_WORK(&tracerfs_init_work, tracer_init_tracefs_work_func);
10021 		queue_work(eval_map_wq, &tracerfs_init_work);
10022 	} else {
10023 		tracer_init_tracefs_work_func(NULL);
10024 	}
10025 
10026 	rv_init_interface();
10027 
10028 	return 0;
10029 }
10030 
10031 fs_initcall(tracer_init_tracefs);
10032 
10033 static int trace_die_panic_handler(struct notifier_block *self,
10034 				unsigned long ev, void *unused);
10035 
10036 static struct notifier_block trace_panic_notifier = {
10037 	.notifier_call = trace_die_panic_handler,
10038 	.priority = INT_MAX - 1,
10039 };
10040 
10041 static struct notifier_block trace_die_notifier = {
10042 	.notifier_call = trace_die_panic_handler,
10043 	.priority = INT_MAX - 1,
10044 };
10045 
10046 /*
10047  * The idea is to execute the following die/panic callback early, in order
10048  * to avoid showing irrelevant information in the trace (like other panic
10049  * notifier functions); we are the 2nd to run, after hung_task/rcu_stall
10050  * warnings get disabled (to prevent potential log flooding).
10051  */
10052 static int trace_die_panic_handler(struct notifier_block *self,
10053 				unsigned long ev, void *unused)
10054 {
10055 	if (!ftrace_dump_on_oops)
10056 		return NOTIFY_DONE;
10057 
10058 	/* The die notifier requires DIE_OOPS to trigger */
10059 	if (self == &trace_die_notifier && ev != DIE_OOPS)
10060 		return NOTIFY_DONE;
10061 
10062 	ftrace_dump(ftrace_dump_on_oops);
10063 
10064 	return NOTIFY_DONE;
10065 }
10066 
10067 /*
10068  * printk is set to max of 1024, we really don't need it that big.
10069  * Nothing should be printing 1000 characters anyway.
10070  */
10071 #define TRACE_MAX_PRINT		1000
10072 
10073 /*
10074  * Define here KERN_TRACE so that we have one place to modify
10075  * it if we decide to change what log level the ftrace dump
10076  * should be at.
10077  */
10078 #define KERN_TRACE		KERN_EMERG
10079 
10080 void
10081 trace_printk_seq(struct trace_seq *s)
10082 {
10083 	/* Probably should print a warning here. */
10084 	if (s->seq.len >= TRACE_MAX_PRINT)
10085 		s->seq.len = TRACE_MAX_PRINT;
10086 
10087 	/*
10088 	 * More paranoid code. Although the buffer size is set to
10089 	 * PAGE_SIZE, and TRACE_MAX_PRINT is 1000, this is just
10090 	 * an extra layer of protection.
10091 	 */
10092 	if (WARN_ON_ONCE(s->seq.len >= s->seq.size))
10093 		s->seq.len = s->seq.size - 1;
10094 
10095 	/* should be zero ended, but we are paranoid. */
10096 	s->buffer[s->seq.len] = 0;
10097 
10098 	printk(KERN_TRACE "%s", s->buffer);
10099 
10100 	trace_seq_init(s);
10101 }
10102 
10103 void trace_init_global_iter(struct trace_iterator *iter)
10104 {
10105 	iter->tr = &global_trace;
10106 	iter->trace = iter->tr->current_trace;
10107 	iter->cpu_file = RING_BUFFER_ALL_CPUS;
10108 	iter->array_buffer = &global_trace.array_buffer;
10109 
10110 	if (iter->trace && iter->trace->open)
10111 		iter->trace->open(iter);
10112 
10113 	/* Annotate start of buffers if we had overruns */
10114 	if (ring_buffer_overruns(iter->array_buffer->buffer))
10115 		iter->iter_flags |= TRACE_FILE_ANNOTATE;
10116 
10117 	/* Output in nanoseconds only if we are using a clock in nanoseconds. */
10118 	if (trace_clocks[iter->tr->clock_id].in_ns)
10119 		iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
10120 
10121 	/* Can not use kmalloc for iter.temp and iter.fmt */
10122 	iter->temp = static_temp_buf;
10123 	iter->temp_size = STATIC_TEMP_BUF_SIZE;
10124 	iter->fmt = static_fmt_buf;
10125 	iter->fmt_size = STATIC_FMT_BUF_SIZE;
10126 }
10127 
10128 void ftrace_dump(enum ftrace_dump_mode oops_dump_mode)
10129 {
10130 	/* use static because iter can be a bit big for the stack */
10131 	static struct trace_iterator iter;
10132 	static atomic_t dump_running;
10133 	struct trace_array *tr = &global_trace;
10134 	unsigned int old_userobj;
10135 	unsigned long flags;
10136 	int cnt = 0, cpu;
10137 
10138 	/* Only allow one dump user at a time. */
10139 	if (atomic_inc_return(&dump_running) != 1) {
10140 		atomic_dec(&dump_running);
10141 		return;
10142 	}
10143 
10144 	/*
10145 	 * Always turn off tracing when we dump.
10146 	 * We don't need to show trace output of what happens
10147 	 * between multiple crashes.
10148 	 *
10149 	 * If the user does a sysrq-z, then they can re-enable
10150 	 * tracing with echo 1 > tracing_on.
10151 	 */
10152 	tracing_off();
10153 
10154 	local_irq_save(flags);
10155 
10156 	/* Simulate the iterator */
10157 	trace_init_global_iter(&iter);
10158 
10159 	for_each_tracing_cpu(cpu) {
10160 		atomic_inc(&per_cpu_ptr(iter.array_buffer->data, cpu)->disabled);
10161 	}
10162 
10163 	old_userobj = tr->trace_flags & TRACE_ITER_SYM_USEROBJ;
10164 
10165 	/* don't look at user memory in panic mode */
10166 	tr->trace_flags &= ~TRACE_ITER_SYM_USEROBJ;
10167 
10168 	switch (oops_dump_mode) {
10169 	case DUMP_ALL:
10170 		iter.cpu_file = RING_BUFFER_ALL_CPUS;
10171 		break;
10172 	case DUMP_ORIG:
10173 		iter.cpu_file = raw_smp_processor_id();
10174 		break;
10175 	case DUMP_NONE:
10176 		goto out_enable;
10177 	default:
10178 		printk(KERN_TRACE "Bad dumping mode, switching to all CPUs dump\n");
10179 		iter.cpu_file = RING_BUFFER_ALL_CPUS;
10180 	}
10181 
10182 	printk(KERN_TRACE "Dumping ftrace buffer:\n");
10183 
10184 	/* Did function tracer already get disabled? */
10185 	if (ftrace_is_dead()) {
10186 		printk("# WARNING: FUNCTION TRACING IS CORRUPTED\n");
10187 		printk("#          MAY BE MISSING FUNCTION EVENTS\n");
10188 	}
10189 
10190 	/*
10191 	 * We need to stop all tracing on all CPUS to read
10192 	 * the next buffer. This is a bit expensive, but is
10193 	 * not done often. We fill all what we can read,
10194 	 * and then release the locks again.
10195 	 */
10196 
10197 	while (!trace_empty(&iter)) {
10198 
10199 		if (!cnt)
10200 			printk(KERN_TRACE "---------------------------------\n");
10201 
10202 		cnt++;
10203 
10204 		trace_iterator_reset(&iter);
10205 		iter.iter_flags |= TRACE_FILE_LAT_FMT;
10206 
10207 		if (trace_find_next_entry_inc(&iter) != NULL) {
10208 			int ret;
10209 
10210 			ret = print_trace_line(&iter);
10211 			if (ret != TRACE_TYPE_NO_CONSUME)
10212 				trace_consume(&iter);
10213 		}
10214 		touch_nmi_watchdog();
10215 
10216 		trace_printk_seq(&iter.seq);
10217 	}
10218 
10219 	if (!cnt)
10220 		printk(KERN_TRACE "   (ftrace buffer empty)\n");
10221 	else
10222 		printk(KERN_TRACE "---------------------------------\n");
10223 
10224  out_enable:
10225 	tr->trace_flags |= old_userobj;
10226 
10227 	for_each_tracing_cpu(cpu) {
10228 		atomic_dec(&per_cpu_ptr(iter.array_buffer->data, cpu)->disabled);
10229 	}
10230 	atomic_dec(&dump_running);
10231 	local_irq_restore(flags);
10232 }
10233 EXPORT_SYMBOL_GPL(ftrace_dump);
10234 
10235 #define WRITE_BUFSIZE  4096
10236 
10237 ssize_t trace_parse_run_command(struct file *file, const char __user *buffer,
10238 				size_t count, loff_t *ppos,
10239 				int (*createfn)(const char *))
10240 {
10241 	char *kbuf, *buf, *tmp;
10242 	int ret = 0;
10243 	size_t done = 0;
10244 	size_t size;
10245 
10246 	kbuf = kmalloc(WRITE_BUFSIZE, GFP_KERNEL);
10247 	if (!kbuf)
10248 		return -ENOMEM;
10249 
10250 	while (done < count) {
10251 		size = count - done;
10252 
10253 		if (size >= WRITE_BUFSIZE)
10254 			size = WRITE_BUFSIZE - 1;
10255 
10256 		if (copy_from_user(kbuf, buffer + done, size)) {
10257 			ret = -EFAULT;
10258 			goto out;
10259 		}
10260 		kbuf[size] = '\0';
10261 		buf = kbuf;
10262 		do {
10263 			tmp = strchr(buf, '\n');
10264 			if (tmp) {
10265 				*tmp = '\0';
10266 				size = tmp - buf + 1;
10267 			} else {
10268 				size = strlen(buf);
10269 				if (done + size < count) {
10270 					if (buf != kbuf)
10271 						break;
10272 					/* This can accept WRITE_BUFSIZE - 2 ('\n' + '\0') */
10273 					pr_warn("Line length is too long: Should be less than %d\n",
10274 						WRITE_BUFSIZE - 2);
10275 					ret = -EINVAL;
10276 					goto out;
10277 				}
10278 			}
10279 			done += size;
10280 
10281 			/* Remove comments */
10282 			tmp = strchr(buf, '#');
10283 
10284 			if (tmp)
10285 				*tmp = '\0';
10286 
10287 			ret = createfn(buf);
10288 			if (ret)
10289 				goto out;
10290 			buf += size;
10291 
10292 		} while (done < count);
10293 	}
10294 	ret = done;
10295 
10296 out:
10297 	kfree(kbuf);
10298 
10299 	return ret;
10300 }
10301 
10302 #ifdef CONFIG_TRACER_MAX_TRACE
10303 __init static bool tr_needs_alloc_snapshot(const char *name)
10304 {
10305 	char *test;
10306 	int len = strlen(name);
10307 	bool ret;
10308 
10309 	if (!boot_snapshot_index)
10310 		return false;
10311 
10312 	if (strncmp(name, boot_snapshot_info, len) == 0 &&
10313 	    boot_snapshot_info[len] == '\t')
10314 		return true;
10315 
10316 	test = kmalloc(strlen(name) + 3, GFP_KERNEL);
10317 	if (!test)
10318 		return false;
10319 
10320 	sprintf(test, "\t%s\t", name);
10321 	ret = strstr(boot_snapshot_info, test) == NULL;
10322 	kfree(test);
10323 	return ret;
10324 }
10325 
10326 __init static void do_allocate_snapshot(const char *name)
10327 {
10328 	if (!tr_needs_alloc_snapshot(name))
10329 		return;
10330 
10331 	/*
10332 	 * When allocate_snapshot is set, the next call to
10333 	 * allocate_trace_buffers() (called by trace_array_get_by_name())
10334 	 * will allocate the snapshot buffer. That will alse clear
10335 	 * this flag.
10336 	 */
10337 	allocate_snapshot = true;
10338 }
10339 #else
10340 static inline void do_allocate_snapshot(const char *name) { }
10341 #endif
10342 
10343 __init static void enable_instances(void)
10344 {
10345 	struct trace_array *tr;
10346 	char *curr_str;
10347 	char *str;
10348 	char *tok;
10349 
10350 	/* A tab is always appended */
10351 	boot_instance_info[boot_instance_index - 1] = '\0';
10352 	str = boot_instance_info;
10353 
10354 	while ((curr_str = strsep(&str, "\t"))) {
10355 
10356 		tok = strsep(&curr_str, ",");
10357 
10358 		if (IS_ENABLED(CONFIG_TRACER_MAX_TRACE))
10359 			do_allocate_snapshot(tok);
10360 
10361 		tr = trace_array_get_by_name(tok);
10362 		if (!tr) {
10363 			pr_warn("Failed to create instance buffer %s\n", curr_str);
10364 			continue;
10365 		}
10366 		/* Allow user space to delete it */
10367 		trace_array_put(tr);
10368 
10369 		while ((tok = strsep(&curr_str, ","))) {
10370 			early_enable_events(tr, tok, true);
10371 		}
10372 	}
10373 }
10374 
10375 __init static int tracer_alloc_buffers(void)
10376 {
10377 	int ring_buf_size;
10378 	int ret = -ENOMEM;
10379 
10380 
10381 	if (security_locked_down(LOCKDOWN_TRACEFS)) {
10382 		pr_warn("Tracing disabled due to lockdown\n");
10383 		return -EPERM;
10384 	}
10385 
10386 	/*
10387 	 * Make sure we don't accidentally add more trace options
10388 	 * than we have bits for.
10389 	 */
10390 	BUILD_BUG_ON(TRACE_ITER_LAST_BIT > TRACE_FLAGS_MAX_SIZE);
10391 
10392 	if (!alloc_cpumask_var(&tracing_buffer_mask, GFP_KERNEL))
10393 		goto out;
10394 
10395 	if (!alloc_cpumask_var(&global_trace.tracing_cpumask, GFP_KERNEL))
10396 		goto out_free_buffer_mask;
10397 
10398 	/* Only allocate trace_printk buffers if a trace_printk exists */
10399 	if (&__stop___trace_bprintk_fmt != &__start___trace_bprintk_fmt)
10400 		/* Must be called before global_trace.buffer is allocated */
10401 		trace_printk_init_buffers();
10402 
10403 	/* To save memory, keep the ring buffer size to its minimum */
10404 	if (ring_buffer_expanded)
10405 		ring_buf_size = trace_buf_size;
10406 	else
10407 		ring_buf_size = 1;
10408 
10409 	cpumask_copy(tracing_buffer_mask, cpu_possible_mask);
10410 	cpumask_copy(global_trace.tracing_cpumask, cpu_all_mask);
10411 
10412 	raw_spin_lock_init(&global_trace.start_lock);
10413 
10414 	/*
10415 	 * The prepare callbacks allocates some memory for the ring buffer. We
10416 	 * don't free the buffer if the CPU goes down. If we were to free
10417 	 * the buffer, then the user would lose any trace that was in the
10418 	 * buffer. The memory will be removed once the "instance" is removed.
10419 	 */
10420 	ret = cpuhp_setup_state_multi(CPUHP_TRACE_RB_PREPARE,
10421 				      "trace/RB:prepare", trace_rb_cpu_prepare,
10422 				      NULL);
10423 	if (ret < 0)
10424 		goto out_free_cpumask;
10425 	/* Used for event triggers */
10426 	ret = -ENOMEM;
10427 	temp_buffer = ring_buffer_alloc(PAGE_SIZE, RB_FL_OVERWRITE);
10428 	if (!temp_buffer)
10429 		goto out_rm_hp_state;
10430 
10431 	if (trace_create_savedcmd() < 0)
10432 		goto out_free_temp_buffer;
10433 
10434 	if (!alloc_cpumask_var(&global_trace.pipe_cpumask, GFP_KERNEL))
10435 		goto out_free_savedcmd;
10436 
10437 	/* TODO: make the number of buffers hot pluggable with CPUS */
10438 	if (allocate_trace_buffers(&global_trace, ring_buf_size) < 0) {
10439 		MEM_FAIL(1, "tracer: failed to allocate ring buffer!\n");
10440 		goto out_free_pipe_cpumask;
10441 	}
10442 	if (global_trace.buffer_disabled)
10443 		tracing_off();
10444 
10445 	if (trace_boot_clock) {
10446 		ret = tracing_set_clock(&global_trace, trace_boot_clock);
10447 		if (ret < 0)
10448 			pr_warn("Trace clock %s not defined, going back to default\n",
10449 				trace_boot_clock);
10450 	}
10451 
10452 	/*
10453 	 * register_tracer() might reference current_trace, so it
10454 	 * needs to be set before we register anything. This is
10455 	 * just a bootstrap of current_trace anyway.
10456 	 */
10457 	global_trace.current_trace = &nop_trace;
10458 
10459 	global_trace.max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
10460 
10461 	ftrace_init_global_array_ops(&global_trace);
10462 
10463 	init_trace_flags_index(&global_trace);
10464 
10465 	register_tracer(&nop_trace);
10466 
10467 	/* Function tracing may start here (via kernel command line) */
10468 	init_function_trace();
10469 
10470 	/* All seems OK, enable tracing */
10471 	tracing_disabled = 0;
10472 
10473 	atomic_notifier_chain_register(&panic_notifier_list,
10474 				       &trace_panic_notifier);
10475 
10476 	register_die_notifier(&trace_die_notifier);
10477 
10478 	global_trace.flags = TRACE_ARRAY_FL_GLOBAL;
10479 
10480 	INIT_LIST_HEAD(&global_trace.systems);
10481 	INIT_LIST_HEAD(&global_trace.events);
10482 	INIT_LIST_HEAD(&global_trace.hist_vars);
10483 	INIT_LIST_HEAD(&global_trace.err_log);
10484 	list_add(&global_trace.list, &ftrace_trace_arrays);
10485 
10486 	apply_trace_boot_options();
10487 
10488 	register_snapshot_cmd();
10489 
10490 	test_can_verify();
10491 
10492 	return 0;
10493 
10494 out_free_pipe_cpumask:
10495 	free_cpumask_var(global_trace.pipe_cpumask);
10496 out_free_savedcmd:
10497 	free_saved_cmdlines_buffer(savedcmd);
10498 out_free_temp_buffer:
10499 	ring_buffer_free(temp_buffer);
10500 out_rm_hp_state:
10501 	cpuhp_remove_multi_state(CPUHP_TRACE_RB_PREPARE);
10502 out_free_cpumask:
10503 	free_cpumask_var(global_trace.tracing_cpumask);
10504 out_free_buffer_mask:
10505 	free_cpumask_var(tracing_buffer_mask);
10506 out:
10507 	return ret;
10508 }
10509 
10510 void __init ftrace_boot_snapshot(void)
10511 {
10512 #ifdef CONFIG_TRACER_MAX_TRACE
10513 	struct trace_array *tr;
10514 
10515 	if (!snapshot_at_boot)
10516 		return;
10517 
10518 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
10519 		if (!tr->allocated_snapshot)
10520 			continue;
10521 
10522 		tracing_snapshot_instance(tr);
10523 		trace_array_puts(tr, "** Boot snapshot taken **\n");
10524 	}
10525 #endif
10526 }
10527 
10528 void __init early_trace_init(void)
10529 {
10530 	if (tracepoint_printk) {
10531 		tracepoint_print_iter =
10532 			kzalloc(sizeof(*tracepoint_print_iter), GFP_KERNEL);
10533 		if (MEM_FAIL(!tracepoint_print_iter,
10534 			     "Failed to allocate trace iterator\n"))
10535 			tracepoint_printk = 0;
10536 		else
10537 			static_key_enable(&tracepoint_printk_key.key);
10538 	}
10539 	tracer_alloc_buffers();
10540 
10541 	init_events();
10542 }
10543 
10544 void __init trace_init(void)
10545 {
10546 	trace_event_init();
10547 
10548 	if (boot_instance_index)
10549 		enable_instances();
10550 }
10551 
10552 __init static void clear_boot_tracer(void)
10553 {
10554 	/*
10555 	 * The default tracer at boot buffer is an init section.
10556 	 * This function is called in lateinit. If we did not
10557 	 * find the boot tracer, then clear it out, to prevent
10558 	 * later registration from accessing the buffer that is
10559 	 * about to be freed.
10560 	 */
10561 	if (!default_bootup_tracer)
10562 		return;
10563 
10564 	printk(KERN_INFO "ftrace bootup tracer '%s' not registered.\n",
10565 	       default_bootup_tracer);
10566 	default_bootup_tracer = NULL;
10567 }
10568 
10569 #ifdef CONFIG_HAVE_UNSTABLE_SCHED_CLOCK
10570 __init static void tracing_set_default_clock(void)
10571 {
10572 	/* sched_clock_stable() is determined in late_initcall */
10573 	if (!trace_boot_clock && !sched_clock_stable()) {
10574 		if (security_locked_down(LOCKDOWN_TRACEFS)) {
10575 			pr_warn("Can not set tracing clock due to lockdown\n");
10576 			return;
10577 		}
10578 
10579 		printk(KERN_WARNING
10580 		       "Unstable clock detected, switching default tracing clock to \"global\"\n"
10581 		       "If you want to keep using the local clock, then add:\n"
10582 		       "  \"trace_clock=local\"\n"
10583 		       "on the kernel command line\n");
10584 		tracing_set_clock(&global_trace, "global");
10585 	}
10586 }
10587 #else
10588 static inline void tracing_set_default_clock(void) { }
10589 #endif
10590 
10591 __init static int late_trace_init(void)
10592 {
10593 	if (tracepoint_printk && tracepoint_printk_stop_on_boot) {
10594 		static_key_disable(&tracepoint_printk_key.key);
10595 		tracepoint_printk = 0;
10596 	}
10597 
10598 	tracing_set_default_clock();
10599 	clear_boot_tracer();
10600 	return 0;
10601 }
10602 
10603 late_initcall_sync(late_trace_init);
10604