xref: /linux/kernel/trace/trace.c (revision 9f7861c56b51b84d30114e7fea9d744a9d5ba9b7)
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * ring buffer based function tracer
4  *
5  * Copyright (C) 2007-2012 Steven Rostedt <srostedt@redhat.com>
6  * Copyright (C) 2008 Ingo Molnar <mingo@redhat.com>
7  *
8  * Originally taken from the RT patch by:
9  *    Arnaldo Carvalho de Melo <acme@redhat.com>
10  *
11  * Based on code from the latency_tracer, that is:
12  *  Copyright (C) 2004-2006 Ingo Molnar
13  *  Copyright (C) 2004 Nadia Yvette Chambers
14  */
15 #include <linux/ring_buffer.h>
16 #include <generated/utsrelease.h>
17 #include <linux/stacktrace.h>
18 #include <linux/writeback.h>
19 #include <linux/kallsyms.h>
20 #include <linux/security.h>
21 #include <linux/seq_file.h>
22 #include <linux/irqflags.h>
23 #include <linux/debugfs.h>
24 #include <linux/tracefs.h>
25 #include <linux/pagemap.h>
26 #include <linux/hardirq.h>
27 #include <linux/linkage.h>
28 #include <linux/uaccess.h>
29 #include <linux/vmalloc.h>
30 #include <linux/ftrace.h>
31 #include <linux/module.h>
32 #include <linux/percpu.h>
33 #include <linux/splice.h>
34 #include <linux/kdebug.h>
35 #include <linux/string.h>
36 #include <linux/mount.h>
37 #include <linux/rwsem.h>
38 #include <linux/slab.h>
39 #include <linux/ctype.h>
40 #include <linux/init.h>
41 #include <linux/panic_notifier.h>
42 #include <linux/poll.h>
43 #include <linux/nmi.h>
44 #include <linux/fs.h>
45 #include <linux/trace.h>
46 #include <linux/sched/clock.h>
47 #include <linux/sched/rt.h>
48 #include <linux/fsnotify.h>
49 #include <linux/irq_work.h>
50 #include <linux/workqueue.h>
51 
52 #include <asm/setup.h> /* COMMAND_LINE_SIZE */
53 
54 #include "trace.h"
55 #include "trace_output.h"
56 
57 #ifdef CONFIG_FTRACE_STARTUP_TEST
58 /*
59  * We need to change this state when a selftest is running.
60  * A selftest will lurk into the ring-buffer to count the
61  * entries inserted during the selftest although some concurrent
62  * insertions into the ring-buffer such as trace_printk could occurred
63  * at the same time, giving false positive or negative results.
64  */
65 static bool __read_mostly tracing_selftest_running;
66 
67 /*
68  * If boot-time tracing including tracers/events via kernel cmdline
69  * is running, we do not want to run SELFTEST.
70  */
71 bool __read_mostly tracing_selftest_disabled;
72 
73 void __init disable_tracing_selftest(const char *reason)
74 {
75 	if (!tracing_selftest_disabled) {
76 		tracing_selftest_disabled = true;
77 		pr_info("Ftrace startup test is disabled due to %s\n", reason);
78 	}
79 }
80 #else
81 #define tracing_selftest_running	0
82 #define tracing_selftest_disabled	0
83 #endif
84 
85 /* Pipe tracepoints to printk */
86 static struct trace_iterator *tracepoint_print_iter;
87 int tracepoint_printk;
88 static bool tracepoint_printk_stop_on_boot __initdata;
89 static DEFINE_STATIC_KEY_FALSE(tracepoint_printk_key);
90 
91 /* For tracers that don't implement custom flags */
92 static struct tracer_opt dummy_tracer_opt[] = {
93 	{ }
94 };
95 
96 static int
97 dummy_set_flag(struct trace_array *tr, u32 old_flags, u32 bit, int set)
98 {
99 	return 0;
100 }
101 
102 /*
103  * To prevent the comm cache from being overwritten when no
104  * tracing is active, only save the comm when a trace event
105  * occurred.
106  */
107 static DEFINE_PER_CPU(bool, trace_taskinfo_save);
108 
109 /*
110  * Kill all tracing for good (never come back).
111  * It is initialized to 1 but will turn to zero if the initialization
112  * of the tracer is successful. But that is the only place that sets
113  * this back to zero.
114  */
115 static int tracing_disabled = 1;
116 
117 cpumask_var_t __read_mostly	tracing_buffer_mask;
118 
119 /*
120  * ftrace_dump_on_oops - variable to dump ftrace buffer on oops
121  *
122  * If there is an oops (or kernel panic) and the ftrace_dump_on_oops
123  * is set, then ftrace_dump is called. This will output the contents
124  * of the ftrace buffers to the console.  This is very useful for
125  * capturing traces that lead to crashes and outputing it to a
126  * serial console.
127  *
128  * It is default off, but you can enable it with either specifying
129  * "ftrace_dump_on_oops" in the kernel command line, or setting
130  * /proc/sys/kernel/ftrace_dump_on_oops
131  * Set 1 if you want to dump buffers of all CPUs
132  * Set 2 if you want to dump the buffer of the CPU that triggered oops
133  */
134 
135 enum ftrace_dump_mode ftrace_dump_on_oops;
136 
137 /* When set, tracing will stop when a WARN*() is hit */
138 int __disable_trace_on_warning;
139 
140 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
141 /* Map of enums to their values, for "eval_map" file */
142 struct trace_eval_map_head {
143 	struct module			*mod;
144 	unsigned long			length;
145 };
146 
147 union trace_eval_map_item;
148 
149 struct trace_eval_map_tail {
150 	/*
151 	 * "end" is first and points to NULL as it must be different
152 	 * than "mod" or "eval_string"
153 	 */
154 	union trace_eval_map_item	*next;
155 	const char			*end;	/* points to NULL */
156 };
157 
158 static DEFINE_MUTEX(trace_eval_mutex);
159 
160 /*
161  * The trace_eval_maps are saved in an array with two extra elements,
162  * one at the beginning, and one at the end. The beginning item contains
163  * the count of the saved maps (head.length), and the module they
164  * belong to if not built in (head.mod). The ending item contains a
165  * pointer to the next array of saved eval_map items.
166  */
167 union trace_eval_map_item {
168 	struct trace_eval_map		map;
169 	struct trace_eval_map_head	head;
170 	struct trace_eval_map_tail	tail;
171 };
172 
173 static union trace_eval_map_item *trace_eval_maps;
174 #endif /* CONFIG_TRACE_EVAL_MAP_FILE */
175 
176 int tracing_set_tracer(struct trace_array *tr, const char *buf);
177 static void ftrace_trace_userstack(struct trace_array *tr,
178 				   struct trace_buffer *buffer,
179 				   unsigned int trace_ctx);
180 
181 #define MAX_TRACER_SIZE		100
182 static char bootup_tracer_buf[MAX_TRACER_SIZE] __initdata;
183 static char *default_bootup_tracer;
184 
185 static bool allocate_snapshot;
186 static bool snapshot_at_boot;
187 
188 static char boot_instance_info[COMMAND_LINE_SIZE] __initdata;
189 static int boot_instance_index;
190 
191 static char boot_snapshot_info[COMMAND_LINE_SIZE] __initdata;
192 static int boot_snapshot_index;
193 
194 static int __init set_cmdline_ftrace(char *str)
195 {
196 	strscpy(bootup_tracer_buf, str, MAX_TRACER_SIZE);
197 	default_bootup_tracer = bootup_tracer_buf;
198 	/* We are using ftrace early, expand it */
199 	trace_set_ring_buffer_expanded(NULL);
200 	return 1;
201 }
202 __setup("ftrace=", set_cmdline_ftrace);
203 
204 static int __init set_ftrace_dump_on_oops(char *str)
205 {
206 	if (*str++ != '=' || !*str || !strcmp("1", str)) {
207 		ftrace_dump_on_oops = DUMP_ALL;
208 		return 1;
209 	}
210 
211 	if (!strcmp("orig_cpu", str) || !strcmp("2", str)) {
212 		ftrace_dump_on_oops = DUMP_ORIG;
213                 return 1;
214         }
215 
216         return 0;
217 }
218 __setup("ftrace_dump_on_oops", set_ftrace_dump_on_oops);
219 
220 static int __init stop_trace_on_warning(char *str)
221 {
222 	if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
223 		__disable_trace_on_warning = 1;
224 	return 1;
225 }
226 __setup("traceoff_on_warning", stop_trace_on_warning);
227 
228 static int __init boot_alloc_snapshot(char *str)
229 {
230 	char *slot = boot_snapshot_info + boot_snapshot_index;
231 	int left = sizeof(boot_snapshot_info) - boot_snapshot_index;
232 	int ret;
233 
234 	if (str[0] == '=') {
235 		str++;
236 		if (strlen(str) >= left)
237 			return -1;
238 
239 		ret = snprintf(slot, left, "%s\t", str);
240 		boot_snapshot_index += ret;
241 	} else {
242 		allocate_snapshot = true;
243 		/* We also need the main ring buffer expanded */
244 		trace_set_ring_buffer_expanded(NULL);
245 	}
246 	return 1;
247 }
248 __setup("alloc_snapshot", boot_alloc_snapshot);
249 
250 
251 static int __init boot_snapshot(char *str)
252 {
253 	snapshot_at_boot = true;
254 	boot_alloc_snapshot(str);
255 	return 1;
256 }
257 __setup("ftrace_boot_snapshot", boot_snapshot);
258 
259 
260 static int __init boot_instance(char *str)
261 {
262 	char *slot = boot_instance_info + boot_instance_index;
263 	int left = sizeof(boot_instance_info) - boot_instance_index;
264 	int ret;
265 
266 	if (strlen(str) >= left)
267 		return -1;
268 
269 	ret = snprintf(slot, left, "%s\t", str);
270 	boot_instance_index += ret;
271 
272 	return 1;
273 }
274 __setup("trace_instance=", boot_instance);
275 
276 
277 static char trace_boot_options_buf[MAX_TRACER_SIZE] __initdata;
278 
279 static int __init set_trace_boot_options(char *str)
280 {
281 	strscpy(trace_boot_options_buf, str, MAX_TRACER_SIZE);
282 	return 1;
283 }
284 __setup("trace_options=", set_trace_boot_options);
285 
286 static char trace_boot_clock_buf[MAX_TRACER_SIZE] __initdata;
287 static char *trace_boot_clock __initdata;
288 
289 static int __init set_trace_boot_clock(char *str)
290 {
291 	strscpy(trace_boot_clock_buf, str, MAX_TRACER_SIZE);
292 	trace_boot_clock = trace_boot_clock_buf;
293 	return 1;
294 }
295 __setup("trace_clock=", set_trace_boot_clock);
296 
297 static int __init set_tracepoint_printk(char *str)
298 {
299 	/* Ignore the "tp_printk_stop_on_boot" param */
300 	if (*str == '_')
301 		return 0;
302 
303 	if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
304 		tracepoint_printk = 1;
305 	return 1;
306 }
307 __setup("tp_printk", set_tracepoint_printk);
308 
309 static int __init set_tracepoint_printk_stop(char *str)
310 {
311 	tracepoint_printk_stop_on_boot = true;
312 	return 1;
313 }
314 __setup("tp_printk_stop_on_boot", set_tracepoint_printk_stop);
315 
316 unsigned long long ns2usecs(u64 nsec)
317 {
318 	nsec += 500;
319 	do_div(nsec, 1000);
320 	return nsec;
321 }
322 
323 static void
324 trace_process_export(struct trace_export *export,
325 	       struct ring_buffer_event *event, int flag)
326 {
327 	struct trace_entry *entry;
328 	unsigned int size = 0;
329 
330 	if (export->flags & flag) {
331 		entry = ring_buffer_event_data(event);
332 		size = ring_buffer_event_length(event);
333 		export->write(export, entry, size);
334 	}
335 }
336 
337 static DEFINE_MUTEX(ftrace_export_lock);
338 
339 static struct trace_export __rcu *ftrace_exports_list __read_mostly;
340 
341 static DEFINE_STATIC_KEY_FALSE(trace_function_exports_enabled);
342 static DEFINE_STATIC_KEY_FALSE(trace_event_exports_enabled);
343 static DEFINE_STATIC_KEY_FALSE(trace_marker_exports_enabled);
344 
345 static inline void ftrace_exports_enable(struct trace_export *export)
346 {
347 	if (export->flags & TRACE_EXPORT_FUNCTION)
348 		static_branch_inc(&trace_function_exports_enabled);
349 
350 	if (export->flags & TRACE_EXPORT_EVENT)
351 		static_branch_inc(&trace_event_exports_enabled);
352 
353 	if (export->flags & TRACE_EXPORT_MARKER)
354 		static_branch_inc(&trace_marker_exports_enabled);
355 }
356 
357 static inline void ftrace_exports_disable(struct trace_export *export)
358 {
359 	if (export->flags & TRACE_EXPORT_FUNCTION)
360 		static_branch_dec(&trace_function_exports_enabled);
361 
362 	if (export->flags & TRACE_EXPORT_EVENT)
363 		static_branch_dec(&trace_event_exports_enabled);
364 
365 	if (export->flags & TRACE_EXPORT_MARKER)
366 		static_branch_dec(&trace_marker_exports_enabled);
367 }
368 
369 static void ftrace_exports(struct ring_buffer_event *event, int flag)
370 {
371 	struct trace_export *export;
372 
373 	preempt_disable_notrace();
374 
375 	export = rcu_dereference_raw_check(ftrace_exports_list);
376 	while (export) {
377 		trace_process_export(export, event, flag);
378 		export = rcu_dereference_raw_check(export->next);
379 	}
380 
381 	preempt_enable_notrace();
382 }
383 
384 static inline void
385 add_trace_export(struct trace_export **list, struct trace_export *export)
386 {
387 	rcu_assign_pointer(export->next, *list);
388 	/*
389 	 * We are entering export into the list but another
390 	 * CPU might be walking that list. We need to make sure
391 	 * the export->next pointer is valid before another CPU sees
392 	 * the export pointer included into the list.
393 	 */
394 	rcu_assign_pointer(*list, export);
395 }
396 
397 static inline int
398 rm_trace_export(struct trace_export **list, struct trace_export *export)
399 {
400 	struct trace_export **p;
401 
402 	for (p = list; *p != NULL; p = &(*p)->next)
403 		if (*p == export)
404 			break;
405 
406 	if (*p != export)
407 		return -1;
408 
409 	rcu_assign_pointer(*p, (*p)->next);
410 
411 	return 0;
412 }
413 
414 static inline void
415 add_ftrace_export(struct trace_export **list, struct trace_export *export)
416 {
417 	ftrace_exports_enable(export);
418 
419 	add_trace_export(list, export);
420 }
421 
422 static inline int
423 rm_ftrace_export(struct trace_export **list, struct trace_export *export)
424 {
425 	int ret;
426 
427 	ret = rm_trace_export(list, export);
428 	ftrace_exports_disable(export);
429 
430 	return ret;
431 }
432 
433 int register_ftrace_export(struct trace_export *export)
434 {
435 	if (WARN_ON_ONCE(!export->write))
436 		return -1;
437 
438 	mutex_lock(&ftrace_export_lock);
439 
440 	add_ftrace_export(&ftrace_exports_list, export);
441 
442 	mutex_unlock(&ftrace_export_lock);
443 
444 	return 0;
445 }
446 EXPORT_SYMBOL_GPL(register_ftrace_export);
447 
448 int unregister_ftrace_export(struct trace_export *export)
449 {
450 	int ret;
451 
452 	mutex_lock(&ftrace_export_lock);
453 
454 	ret = rm_ftrace_export(&ftrace_exports_list, export);
455 
456 	mutex_unlock(&ftrace_export_lock);
457 
458 	return ret;
459 }
460 EXPORT_SYMBOL_GPL(unregister_ftrace_export);
461 
462 /* trace_flags holds trace_options default values */
463 #define TRACE_DEFAULT_FLAGS						\
464 	(FUNCTION_DEFAULT_FLAGS |					\
465 	 TRACE_ITER_PRINT_PARENT | TRACE_ITER_PRINTK |			\
466 	 TRACE_ITER_ANNOTATE | TRACE_ITER_CONTEXT_INFO |		\
467 	 TRACE_ITER_RECORD_CMD | TRACE_ITER_OVERWRITE |			\
468 	 TRACE_ITER_IRQ_INFO | TRACE_ITER_MARKERS |			\
469 	 TRACE_ITER_HASH_PTR)
470 
471 /* trace_options that are only supported by global_trace */
472 #define TOP_LEVEL_TRACE_FLAGS (TRACE_ITER_PRINTK |			\
473 	       TRACE_ITER_PRINTK_MSGONLY | TRACE_ITER_RECORD_CMD)
474 
475 /* trace_flags that are default zero for instances */
476 #define ZEROED_TRACE_FLAGS \
477 	(TRACE_ITER_EVENT_FORK | TRACE_ITER_FUNC_FORK)
478 
479 /*
480  * The global_trace is the descriptor that holds the top-level tracing
481  * buffers for the live tracing.
482  */
483 static struct trace_array global_trace = {
484 	.trace_flags = TRACE_DEFAULT_FLAGS,
485 };
486 
487 void trace_set_ring_buffer_expanded(struct trace_array *tr)
488 {
489 	if (!tr)
490 		tr = &global_trace;
491 	tr->ring_buffer_expanded = true;
492 }
493 
494 LIST_HEAD(ftrace_trace_arrays);
495 
496 int trace_array_get(struct trace_array *this_tr)
497 {
498 	struct trace_array *tr;
499 	int ret = -ENODEV;
500 
501 	mutex_lock(&trace_types_lock);
502 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
503 		if (tr == this_tr) {
504 			tr->ref++;
505 			ret = 0;
506 			break;
507 		}
508 	}
509 	mutex_unlock(&trace_types_lock);
510 
511 	return ret;
512 }
513 
514 static void __trace_array_put(struct trace_array *this_tr)
515 {
516 	WARN_ON(!this_tr->ref);
517 	this_tr->ref--;
518 }
519 
520 /**
521  * trace_array_put - Decrement the reference counter for this trace array.
522  * @this_tr : pointer to the trace array
523  *
524  * NOTE: Use this when we no longer need the trace array returned by
525  * trace_array_get_by_name(). This ensures the trace array can be later
526  * destroyed.
527  *
528  */
529 void trace_array_put(struct trace_array *this_tr)
530 {
531 	if (!this_tr)
532 		return;
533 
534 	mutex_lock(&trace_types_lock);
535 	__trace_array_put(this_tr);
536 	mutex_unlock(&trace_types_lock);
537 }
538 EXPORT_SYMBOL_GPL(trace_array_put);
539 
540 int tracing_check_open_get_tr(struct trace_array *tr)
541 {
542 	int ret;
543 
544 	ret = security_locked_down(LOCKDOWN_TRACEFS);
545 	if (ret)
546 		return ret;
547 
548 	if (tracing_disabled)
549 		return -ENODEV;
550 
551 	if (tr && trace_array_get(tr) < 0)
552 		return -ENODEV;
553 
554 	return 0;
555 }
556 
557 int call_filter_check_discard(struct trace_event_call *call, void *rec,
558 			      struct trace_buffer *buffer,
559 			      struct ring_buffer_event *event)
560 {
561 	if (unlikely(call->flags & TRACE_EVENT_FL_FILTERED) &&
562 	    !filter_match_preds(call->filter, rec)) {
563 		__trace_event_discard_commit(buffer, event);
564 		return 1;
565 	}
566 
567 	return 0;
568 }
569 
570 /**
571  * trace_find_filtered_pid - check if a pid exists in a filtered_pid list
572  * @filtered_pids: The list of pids to check
573  * @search_pid: The PID to find in @filtered_pids
574  *
575  * Returns true if @search_pid is found in @filtered_pids, and false otherwise.
576  */
577 bool
578 trace_find_filtered_pid(struct trace_pid_list *filtered_pids, pid_t search_pid)
579 {
580 	return trace_pid_list_is_set(filtered_pids, search_pid);
581 }
582 
583 /**
584  * trace_ignore_this_task - should a task be ignored for tracing
585  * @filtered_pids: The list of pids to check
586  * @filtered_no_pids: The list of pids not to be traced
587  * @task: The task that should be ignored if not filtered
588  *
589  * Checks if @task should be traced or not from @filtered_pids.
590  * Returns true if @task should *NOT* be traced.
591  * Returns false if @task should be traced.
592  */
593 bool
594 trace_ignore_this_task(struct trace_pid_list *filtered_pids,
595 		       struct trace_pid_list *filtered_no_pids,
596 		       struct task_struct *task)
597 {
598 	/*
599 	 * If filtered_no_pids is not empty, and the task's pid is listed
600 	 * in filtered_no_pids, then return true.
601 	 * Otherwise, if filtered_pids is empty, that means we can
602 	 * trace all tasks. If it has content, then only trace pids
603 	 * within filtered_pids.
604 	 */
605 
606 	return (filtered_pids &&
607 		!trace_find_filtered_pid(filtered_pids, task->pid)) ||
608 		(filtered_no_pids &&
609 		 trace_find_filtered_pid(filtered_no_pids, task->pid));
610 }
611 
612 /**
613  * trace_filter_add_remove_task - Add or remove a task from a pid_list
614  * @pid_list: The list to modify
615  * @self: The current task for fork or NULL for exit
616  * @task: The task to add or remove
617  *
618  * If adding a task, if @self is defined, the task is only added if @self
619  * is also included in @pid_list. This happens on fork and tasks should
620  * only be added when the parent is listed. If @self is NULL, then the
621  * @task pid will be removed from the list, which would happen on exit
622  * of a task.
623  */
624 void trace_filter_add_remove_task(struct trace_pid_list *pid_list,
625 				  struct task_struct *self,
626 				  struct task_struct *task)
627 {
628 	if (!pid_list)
629 		return;
630 
631 	/* For forks, we only add if the forking task is listed */
632 	if (self) {
633 		if (!trace_find_filtered_pid(pid_list, self->pid))
634 			return;
635 	}
636 
637 	/* "self" is set for forks, and NULL for exits */
638 	if (self)
639 		trace_pid_list_set(pid_list, task->pid);
640 	else
641 		trace_pid_list_clear(pid_list, task->pid);
642 }
643 
644 /**
645  * trace_pid_next - Used for seq_file to get to the next pid of a pid_list
646  * @pid_list: The pid list to show
647  * @v: The last pid that was shown (+1 the actual pid to let zero be displayed)
648  * @pos: The position of the file
649  *
650  * This is used by the seq_file "next" operation to iterate the pids
651  * listed in a trace_pid_list structure.
652  *
653  * Returns the pid+1 as we want to display pid of zero, but NULL would
654  * stop the iteration.
655  */
656 void *trace_pid_next(struct trace_pid_list *pid_list, void *v, loff_t *pos)
657 {
658 	long pid = (unsigned long)v;
659 	unsigned int next;
660 
661 	(*pos)++;
662 
663 	/* pid already is +1 of the actual previous bit */
664 	if (trace_pid_list_next(pid_list, pid, &next) < 0)
665 		return NULL;
666 
667 	pid = next;
668 
669 	/* Return pid + 1 to allow zero to be represented */
670 	return (void *)(pid + 1);
671 }
672 
673 /**
674  * trace_pid_start - Used for seq_file to start reading pid lists
675  * @pid_list: The pid list to show
676  * @pos: The position of the file
677  *
678  * This is used by seq_file "start" operation to start the iteration
679  * of listing pids.
680  *
681  * Returns the pid+1 as we want to display pid of zero, but NULL would
682  * stop the iteration.
683  */
684 void *trace_pid_start(struct trace_pid_list *pid_list, loff_t *pos)
685 {
686 	unsigned long pid;
687 	unsigned int first;
688 	loff_t l = 0;
689 
690 	if (trace_pid_list_first(pid_list, &first) < 0)
691 		return NULL;
692 
693 	pid = first;
694 
695 	/* Return pid + 1 so that zero can be the exit value */
696 	for (pid++; pid && l < *pos;
697 	     pid = (unsigned long)trace_pid_next(pid_list, (void *)pid, &l))
698 		;
699 	return (void *)pid;
700 }
701 
702 /**
703  * trace_pid_show - show the current pid in seq_file processing
704  * @m: The seq_file structure to write into
705  * @v: A void pointer of the pid (+1) value to display
706  *
707  * Can be directly used by seq_file operations to display the current
708  * pid value.
709  */
710 int trace_pid_show(struct seq_file *m, void *v)
711 {
712 	unsigned long pid = (unsigned long)v - 1;
713 
714 	seq_printf(m, "%lu\n", pid);
715 	return 0;
716 }
717 
718 /* 128 should be much more than enough */
719 #define PID_BUF_SIZE		127
720 
721 int trace_pid_write(struct trace_pid_list *filtered_pids,
722 		    struct trace_pid_list **new_pid_list,
723 		    const char __user *ubuf, size_t cnt)
724 {
725 	struct trace_pid_list *pid_list;
726 	struct trace_parser parser;
727 	unsigned long val;
728 	int nr_pids = 0;
729 	ssize_t read = 0;
730 	ssize_t ret;
731 	loff_t pos;
732 	pid_t pid;
733 
734 	if (trace_parser_get_init(&parser, PID_BUF_SIZE + 1))
735 		return -ENOMEM;
736 
737 	/*
738 	 * Always recreate a new array. The write is an all or nothing
739 	 * operation. Always create a new array when adding new pids by
740 	 * the user. If the operation fails, then the current list is
741 	 * not modified.
742 	 */
743 	pid_list = trace_pid_list_alloc();
744 	if (!pid_list) {
745 		trace_parser_put(&parser);
746 		return -ENOMEM;
747 	}
748 
749 	if (filtered_pids) {
750 		/* copy the current bits to the new max */
751 		ret = trace_pid_list_first(filtered_pids, &pid);
752 		while (!ret) {
753 			trace_pid_list_set(pid_list, pid);
754 			ret = trace_pid_list_next(filtered_pids, pid + 1, &pid);
755 			nr_pids++;
756 		}
757 	}
758 
759 	ret = 0;
760 	while (cnt > 0) {
761 
762 		pos = 0;
763 
764 		ret = trace_get_user(&parser, ubuf, cnt, &pos);
765 		if (ret < 0)
766 			break;
767 
768 		read += ret;
769 		ubuf += ret;
770 		cnt -= ret;
771 
772 		if (!trace_parser_loaded(&parser))
773 			break;
774 
775 		ret = -EINVAL;
776 		if (kstrtoul(parser.buffer, 0, &val))
777 			break;
778 
779 		pid = (pid_t)val;
780 
781 		if (trace_pid_list_set(pid_list, pid) < 0) {
782 			ret = -1;
783 			break;
784 		}
785 		nr_pids++;
786 
787 		trace_parser_clear(&parser);
788 		ret = 0;
789 	}
790 	trace_parser_put(&parser);
791 
792 	if (ret < 0) {
793 		trace_pid_list_free(pid_list);
794 		return ret;
795 	}
796 
797 	if (!nr_pids) {
798 		/* Cleared the list of pids */
799 		trace_pid_list_free(pid_list);
800 		pid_list = NULL;
801 	}
802 
803 	*new_pid_list = pid_list;
804 
805 	return read;
806 }
807 
808 static u64 buffer_ftrace_now(struct array_buffer *buf, int cpu)
809 {
810 	u64 ts;
811 
812 	/* Early boot up does not have a buffer yet */
813 	if (!buf->buffer)
814 		return trace_clock_local();
815 
816 	ts = ring_buffer_time_stamp(buf->buffer);
817 	ring_buffer_normalize_time_stamp(buf->buffer, cpu, &ts);
818 
819 	return ts;
820 }
821 
822 u64 ftrace_now(int cpu)
823 {
824 	return buffer_ftrace_now(&global_trace.array_buffer, cpu);
825 }
826 
827 /**
828  * tracing_is_enabled - Show if global_trace has been enabled
829  *
830  * Shows if the global trace has been enabled or not. It uses the
831  * mirror flag "buffer_disabled" to be used in fast paths such as for
832  * the irqsoff tracer. But it may be inaccurate due to races. If you
833  * need to know the accurate state, use tracing_is_on() which is a little
834  * slower, but accurate.
835  */
836 int tracing_is_enabled(void)
837 {
838 	/*
839 	 * For quick access (irqsoff uses this in fast path), just
840 	 * return the mirror variable of the state of the ring buffer.
841 	 * It's a little racy, but we don't really care.
842 	 */
843 	smp_rmb();
844 	return !global_trace.buffer_disabled;
845 }
846 
847 /*
848  * trace_buf_size is the size in bytes that is allocated
849  * for a buffer. Note, the number of bytes is always rounded
850  * to page size.
851  *
852  * This number is purposely set to a low number of 16384.
853  * If the dump on oops happens, it will be much appreciated
854  * to not have to wait for all that output. Anyway this can be
855  * boot time and run time configurable.
856  */
857 #define TRACE_BUF_SIZE_DEFAULT	1441792UL /* 16384 * 88 (sizeof(entry)) */
858 
859 static unsigned long		trace_buf_size = TRACE_BUF_SIZE_DEFAULT;
860 
861 /* trace_types holds a link list of available tracers. */
862 static struct tracer		*trace_types __read_mostly;
863 
864 /*
865  * trace_types_lock is used to protect the trace_types list.
866  */
867 DEFINE_MUTEX(trace_types_lock);
868 
869 /*
870  * serialize the access of the ring buffer
871  *
872  * ring buffer serializes readers, but it is low level protection.
873  * The validity of the events (which returns by ring_buffer_peek() ..etc)
874  * are not protected by ring buffer.
875  *
876  * The content of events may become garbage if we allow other process consumes
877  * these events concurrently:
878  *   A) the page of the consumed events may become a normal page
879  *      (not reader page) in ring buffer, and this page will be rewritten
880  *      by events producer.
881  *   B) The page of the consumed events may become a page for splice_read,
882  *      and this page will be returned to system.
883  *
884  * These primitives allow multi process access to different cpu ring buffer
885  * concurrently.
886  *
887  * These primitives don't distinguish read-only and read-consume access.
888  * Multi read-only access are also serialized.
889  */
890 
891 #ifdef CONFIG_SMP
892 static DECLARE_RWSEM(all_cpu_access_lock);
893 static DEFINE_PER_CPU(struct mutex, cpu_access_lock);
894 
895 static inline void trace_access_lock(int cpu)
896 {
897 	if (cpu == RING_BUFFER_ALL_CPUS) {
898 		/* gain it for accessing the whole ring buffer. */
899 		down_write(&all_cpu_access_lock);
900 	} else {
901 		/* gain it for accessing a cpu ring buffer. */
902 
903 		/* Firstly block other trace_access_lock(RING_BUFFER_ALL_CPUS). */
904 		down_read(&all_cpu_access_lock);
905 
906 		/* Secondly block other access to this @cpu ring buffer. */
907 		mutex_lock(&per_cpu(cpu_access_lock, cpu));
908 	}
909 }
910 
911 static inline void trace_access_unlock(int cpu)
912 {
913 	if (cpu == RING_BUFFER_ALL_CPUS) {
914 		up_write(&all_cpu_access_lock);
915 	} else {
916 		mutex_unlock(&per_cpu(cpu_access_lock, cpu));
917 		up_read(&all_cpu_access_lock);
918 	}
919 }
920 
921 static inline void trace_access_lock_init(void)
922 {
923 	int cpu;
924 
925 	for_each_possible_cpu(cpu)
926 		mutex_init(&per_cpu(cpu_access_lock, cpu));
927 }
928 
929 #else
930 
931 static DEFINE_MUTEX(access_lock);
932 
933 static inline void trace_access_lock(int cpu)
934 {
935 	(void)cpu;
936 	mutex_lock(&access_lock);
937 }
938 
939 static inline void trace_access_unlock(int cpu)
940 {
941 	(void)cpu;
942 	mutex_unlock(&access_lock);
943 }
944 
945 static inline void trace_access_lock_init(void)
946 {
947 }
948 
949 #endif
950 
951 #ifdef CONFIG_STACKTRACE
952 static void __ftrace_trace_stack(struct trace_buffer *buffer,
953 				 unsigned int trace_ctx,
954 				 int skip, struct pt_regs *regs);
955 static inline void ftrace_trace_stack(struct trace_array *tr,
956 				      struct trace_buffer *buffer,
957 				      unsigned int trace_ctx,
958 				      int skip, struct pt_regs *regs);
959 
960 #else
961 static inline void __ftrace_trace_stack(struct trace_buffer *buffer,
962 					unsigned int trace_ctx,
963 					int skip, struct pt_regs *regs)
964 {
965 }
966 static inline void ftrace_trace_stack(struct trace_array *tr,
967 				      struct trace_buffer *buffer,
968 				      unsigned long trace_ctx,
969 				      int skip, struct pt_regs *regs)
970 {
971 }
972 
973 #endif
974 
975 static __always_inline void
976 trace_event_setup(struct ring_buffer_event *event,
977 		  int type, unsigned int trace_ctx)
978 {
979 	struct trace_entry *ent = ring_buffer_event_data(event);
980 
981 	tracing_generic_entry_update(ent, type, trace_ctx);
982 }
983 
984 static __always_inline struct ring_buffer_event *
985 __trace_buffer_lock_reserve(struct trace_buffer *buffer,
986 			  int type,
987 			  unsigned long len,
988 			  unsigned int trace_ctx)
989 {
990 	struct ring_buffer_event *event;
991 
992 	event = ring_buffer_lock_reserve(buffer, len);
993 	if (event != NULL)
994 		trace_event_setup(event, type, trace_ctx);
995 
996 	return event;
997 }
998 
999 void tracer_tracing_on(struct trace_array *tr)
1000 {
1001 	if (tr->array_buffer.buffer)
1002 		ring_buffer_record_on(tr->array_buffer.buffer);
1003 	/*
1004 	 * This flag is looked at when buffers haven't been allocated
1005 	 * yet, or by some tracers (like irqsoff), that just want to
1006 	 * know if the ring buffer has been disabled, but it can handle
1007 	 * races of where it gets disabled but we still do a record.
1008 	 * As the check is in the fast path of the tracers, it is more
1009 	 * important to be fast than accurate.
1010 	 */
1011 	tr->buffer_disabled = 0;
1012 	/* Make the flag seen by readers */
1013 	smp_wmb();
1014 }
1015 
1016 /**
1017  * tracing_on - enable tracing buffers
1018  *
1019  * This function enables tracing buffers that may have been
1020  * disabled with tracing_off.
1021  */
1022 void tracing_on(void)
1023 {
1024 	tracer_tracing_on(&global_trace);
1025 }
1026 EXPORT_SYMBOL_GPL(tracing_on);
1027 
1028 
1029 static __always_inline void
1030 __buffer_unlock_commit(struct trace_buffer *buffer, struct ring_buffer_event *event)
1031 {
1032 	__this_cpu_write(trace_taskinfo_save, true);
1033 
1034 	/* If this is the temp buffer, we need to commit fully */
1035 	if (this_cpu_read(trace_buffered_event) == event) {
1036 		/* Length is in event->array[0] */
1037 		ring_buffer_write(buffer, event->array[0], &event->array[1]);
1038 		/* Release the temp buffer */
1039 		this_cpu_dec(trace_buffered_event_cnt);
1040 		/* ring_buffer_unlock_commit() enables preemption */
1041 		preempt_enable_notrace();
1042 	} else
1043 		ring_buffer_unlock_commit(buffer);
1044 }
1045 
1046 int __trace_array_puts(struct trace_array *tr, unsigned long ip,
1047 		       const char *str, int size)
1048 {
1049 	struct ring_buffer_event *event;
1050 	struct trace_buffer *buffer;
1051 	struct print_entry *entry;
1052 	unsigned int trace_ctx;
1053 	int alloc;
1054 
1055 	if (!(tr->trace_flags & TRACE_ITER_PRINTK))
1056 		return 0;
1057 
1058 	if (unlikely(tracing_selftest_running && tr == &global_trace))
1059 		return 0;
1060 
1061 	if (unlikely(tracing_disabled))
1062 		return 0;
1063 
1064 	alloc = sizeof(*entry) + size + 2; /* possible \n added */
1065 
1066 	trace_ctx = tracing_gen_ctx();
1067 	buffer = tr->array_buffer.buffer;
1068 	ring_buffer_nest_start(buffer);
1069 	event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, alloc,
1070 					    trace_ctx);
1071 	if (!event) {
1072 		size = 0;
1073 		goto out;
1074 	}
1075 
1076 	entry = ring_buffer_event_data(event);
1077 	entry->ip = ip;
1078 
1079 	memcpy(&entry->buf, str, size);
1080 
1081 	/* Add a newline if necessary */
1082 	if (entry->buf[size - 1] != '\n') {
1083 		entry->buf[size] = '\n';
1084 		entry->buf[size + 1] = '\0';
1085 	} else
1086 		entry->buf[size] = '\0';
1087 
1088 	__buffer_unlock_commit(buffer, event);
1089 	ftrace_trace_stack(tr, buffer, trace_ctx, 4, NULL);
1090  out:
1091 	ring_buffer_nest_end(buffer);
1092 	return size;
1093 }
1094 EXPORT_SYMBOL_GPL(__trace_array_puts);
1095 
1096 /**
1097  * __trace_puts - write a constant string into the trace buffer.
1098  * @ip:	   The address of the caller
1099  * @str:   The constant string to write
1100  * @size:  The size of the string.
1101  */
1102 int __trace_puts(unsigned long ip, const char *str, int size)
1103 {
1104 	return __trace_array_puts(&global_trace, ip, str, size);
1105 }
1106 EXPORT_SYMBOL_GPL(__trace_puts);
1107 
1108 /**
1109  * __trace_bputs - write the pointer to a constant string into trace buffer
1110  * @ip:	   The address of the caller
1111  * @str:   The constant string to write to the buffer to
1112  */
1113 int __trace_bputs(unsigned long ip, const char *str)
1114 {
1115 	struct ring_buffer_event *event;
1116 	struct trace_buffer *buffer;
1117 	struct bputs_entry *entry;
1118 	unsigned int trace_ctx;
1119 	int size = sizeof(struct bputs_entry);
1120 	int ret = 0;
1121 
1122 	if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
1123 		return 0;
1124 
1125 	if (unlikely(tracing_selftest_running || tracing_disabled))
1126 		return 0;
1127 
1128 	trace_ctx = tracing_gen_ctx();
1129 	buffer = global_trace.array_buffer.buffer;
1130 
1131 	ring_buffer_nest_start(buffer);
1132 	event = __trace_buffer_lock_reserve(buffer, TRACE_BPUTS, size,
1133 					    trace_ctx);
1134 	if (!event)
1135 		goto out;
1136 
1137 	entry = ring_buffer_event_data(event);
1138 	entry->ip			= ip;
1139 	entry->str			= str;
1140 
1141 	__buffer_unlock_commit(buffer, event);
1142 	ftrace_trace_stack(&global_trace, buffer, trace_ctx, 4, NULL);
1143 
1144 	ret = 1;
1145  out:
1146 	ring_buffer_nest_end(buffer);
1147 	return ret;
1148 }
1149 EXPORT_SYMBOL_GPL(__trace_bputs);
1150 
1151 #ifdef CONFIG_TRACER_SNAPSHOT
1152 static void tracing_snapshot_instance_cond(struct trace_array *tr,
1153 					   void *cond_data)
1154 {
1155 	struct tracer *tracer = tr->current_trace;
1156 	unsigned long flags;
1157 
1158 	if (in_nmi()) {
1159 		trace_array_puts(tr, "*** SNAPSHOT CALLED FROM NMI CONTEXT ***\n");
1160 		trace_array_puts(tr, "*** snapshot is being ignored        ***\n");
1161 		return;
1162 	}
1163 
1164 	if (!tr->allocated_snapshot) {
1165 		trace_array_puts(tr, "*** SNAPSHOT NOT ALLOCATED ***\n");
1166 		trace_array_puts(tr, "*** stopping trace here!   ***\n");
1167 		tracer_tracing_off(tr);
1168 		return;
1169 	}
1170 
1171 	/* Note, snapshot can not be used when the tracer uses it */
1172 	if (tracer->use_max_tr) {
1173 		trace_array_puts(tr, "*** LATENCY TRACER ACTIVE ***\n");
1174 		trace_array_puts(tr, "*** Can not use snapshot (sorry) ***\n");
1175 		return;
1176 	}
1177 
1178 	local_irq_save(flags);
1179 	update_max_tr(tr, current, smp_processor_id(), cond_data);
1180 	local_irq_restore(flags);
1181 }
1182 
1183 void tracing_snapshot_instance(struct trace_array *tr)
1184 {
1185 	tracing_snapshot_instance_cond(tr, NULL);
1186 }
1187 
1188 /**
1189  * tracing_snapshot - take a snapshot of the current buffer.
1190  *
1191  * This causes a swap between the snapshot buffer and the current live
1192  * tracing buffer. You can use this to take snapshots of the live
1193  * trace when some condition is triggered, but continue to trace.
1194  *
1195  * Note, make sure to allocate the snapshot with either
1196  * a tracing_snapshot_alloc(), or by doing it manually
1197  * with: echo 1 > /sys/kernel/tracing/snapshot
1198  *
1199  * If the snapshot buffer is not allocated, it will stop tracing.
1200  * Basically making a permanent snapshot.
1201  */
1202 void tracing_snapshot(void)
1203 {
1204 	struct trace_array *tr = &global_trace;
1205 
1206 	tracing_snapshot_instance(tr);
1207 }
1208 EXPORT_SYMBOL_GPL(tracing_snapshot);
1209 
1210 /**
1211  * tracing_snapshot_cond - conditionally take a snapshot of the current buffer.
1212  * @tr:		The tracing instance to snapshot
1213  * @cond_data:	The data to be tested conditionally, and possibly saved
1214  *
1215  * This is the same as tracing_snapshot() except that the snapshot is
1216  * conditional - the snapshot will only happen if the
1217  * cond_snapshot.update() implementation receiving the cond_data
1218  * returns true, which means that the trace array's cond_snapshot
1219  * update() operation used the cond_data to determine whether the
1220  * snapshot should be taken, and if it was, presumably saved it along
1221  * with the snapshot.
1222  */
1223 void tracing_snapshot_cond(struct trace_array *tr, void *cond_data)
1224 {
1225 	tracing_snapshot_instance_cond(tr, cond_data);
1226 }
1227 EXPORT_SYMBOL_GPL(tracing_snapshot_cond);
1228 
1229 /**
1230  * tracing_cond_snapshot_data - get the user data associated with a snapshot
1231  * @tr:		The tracing instance
1232  *
1233  * When the user enables a conditional snapshot using
1234  * tracing_snapshot_cond_enable(), the user-defined cond_data is saved
1235  * with the snapshot.  This accessor is used to retrieve it.
1236  *
1237  * Should not be called from cond_snapshot.update(), since it takes
1238  * the tr->max_lock lock, which the code calling
1239  * cond_snapshot.update() has already done.
1240  *
1241  * Returns the cond_data associated with the trace array's snapshot.
1242  */
1243 void *tracing_cond_snapshot_data(struct trace_array *tr)
1244 {
1245 	void *cond_data = NULL;
1246 
1247 	local_irq_disable();
1248 	arch_spin_lock(&tr->max_lock);
1249 
1250 	if (tr->cond_snapshot)
1251 		cond_data = tr->cond_snapshot->cond_data;
1252 
1253 	arch_spin_unlock(&tr->max_lock);
1254 	local_irq_enable();
1255 
1256 	return cond_data;
1257 }
1258 EXPORT_SYMBOL_GPL(tracing_cond_snapshot_data);
1259 
1260 static int resize_buffer_duplicate_size(struct array_buffer *trace_buf,
1261 					struct array_buffer *size_buf, int cpu_id);
1262 static void set_buffer_entries(struct array_buffer *buf, unsigned long val);
1263 
1264 int tracing_alloc_snapshot_instance(struct trace_array *tr)
1265 {
1266 	int ret;
1267 
1268 	if (!tr->allocated_snapshot) {
1269 
1270 		/* allocate spare buffer */
1271 		ret = resize_buffer_duplicate_size(&tr->max_buffer,
1272 				   &tr->array_buffer, RING_BUFFER_ALL_CPUS);
1273 		if (ret < 0)
1274 			return ret;
1275 
1276 		tr->allocated_snapshot = true;
1277 	}
1278 
1279 	return 0;
1280 }
1281 
1282 static void free_snapshot(struct trace_array *tr)
1283 {
1284 	/*
1285 	 * We don't free the ring buffer. instead, resize it because
1286 	 * The max_tr ring buffer has some state (e.g. ring->clock) and
1287 	 * we want preserve it.
1288 	 */
1289 	ring_buffer_resize(tr->max_buffer.buffer, 1, RING_BUFFER_ALL_CPUS);
1290 	set_buffer_entries(&tr->max_buffer, 1);
1291 	tracing_reset_online_cpus(&tr->max_buffer);
1292 	tr->allocated_snapshot = false;
1293 }
1294 
1295 /**
1296  * tracing_alloc_snapshot - allocate snapshot buffer.
1297  *
1298  * This only allocates the snapshot buffer if it isn't already
1299  * allocated - it doesn't also take a snapshot.
1300  *
1301  * This is meant to be used in cases where the snapshot buffer needs
1302  * to be set up for events that can't sleep but need to be able to
1303  * trigger a snapshot.
1304  */
1305 int tracing_alloc_snapshot(void)
1306 {
1307 	struct trace_array *tr = &global_trace;
1308 	int ret;
1309 
1310 	ret = tracing_alloc_snapshot_instance(tr);
1311 	WARN_ON(ret < 0);
1312 
1313 	return ret;
1314 }
1315 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
1316 
1317 /**
1318  * tracing_snapshot_alloc - allocate and take a snapshot of the current buffer.
1319  *
1320  * This is similar to tracing_snapshot(), but it will allocate the
1321  * snapshot buffer if it isn't already allocated. Use this only
1322  * where it is safe to sleep, as the allocation may sleep.
1323  *
1324  * This causes a swap between the snapshot buffer and the current live
1325  * tracing buffer. You can use this to take snapshots of the live
1326  * trace when some condition is triggered, but continue to trace.
1327  */
1328 void tracing_snapshot_alloc(void)
1329 {
1330 	int ret;
1331 
1332 	ret = tracing_alloc_snapshot();
1333 	if (ret < 0)
1334 		return;
1335 
1336 	tracing_snapshot();
1337 }
1338 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
1339 
1340 /**
1341  * tracing_snapshot_cond_enable - enable conditional snapshot for an instance
1342  * @tr:		The tracing instance
1343  * @cond_data:	User data to associate with the snapshot
1344  * @update:	Implementation of the cond_snapshot update function
1345  *
1346  * Check whether the conditional snapshot for the given instance has
1347  * already been enabled, or if the current tracer is already using a
1348  * snapshot; if so, return -EBUSY, else create a cond_snapshot and
1349  * save the cond_data and update function inside.
1350  *
1351  * Returns 0 if successful, error otherwise.
1352  */
1353 int tracing_snapshot_cond_enable(struct trace_array *tr, void *cond_data,
1354 				 cond_update_fn_t update)
1355 {
1356 	struct cond_snapshot *cond_snapshot;
1357 	int ret = 0;
1358 
1359 	cond_snapshot = kzalloc(sizeof(*cond_snapshot), GFP_KERNEL);
1360 	if (!cond_snapshot)
1361 		return -ENOMEM;
1362 
1363 	cond_snapshot->cond_data = cond_data;
1364 	cond_snapshot->update = update;
1365 
1366 	mutex_lock(&trace_types_lock);
1367 
1368 	ret = tracing_alloc_snapshot_instance(tr);
1369 	if (ret)
1370 		goto fail_unlock;
1371 
1372 	if (tr->current_trace->use_max_tr) {
1373 		ret = -EBUSY;
1374 		goto fail_unlock;
1375 	}
1376 
1377 	/*
1378 	 * The cond_snapshot can only change to NULL without the
1379 	 * trace_types_lock. We don't care if we race with it going
1380 	 * to NULL, but we want to make sure that it's not set to
1381 	 * something other than NULL when we get here, which we can
1382 	 * do safely with only holding the trace_types_lock and not
1383 	 * having to take the max_lock.
1384 	 */
1385 	if (tr->cond_snapshot) {
1386 		ret = -EBUSY;
1387 		goto fail_unlock;
1388 	}
1389 
1390 	local_irq_disable();
1391 	arch_spin_lock(&tr->max_lock);
1392 	tr->cond_snapshot = cond_snapshot;
1393 	arch_spin_unlock(&tr->max_lock);
1394 	local_irq_enable();
1395 
1396 	mutex_unlock(&trace_types_lock);
1397 
1398 	return ret;
1399 
1400  fail_unlock:
1401 	mutex_unlock(&trace_types_lock);
1402 	kfree(cond_snapshot);
1403 	return ret;
1404 }
1405 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_enable);
1406 
1407 /**
1408  * tracing_snapshot_cond_disable - disable conditional snapshot for an instance
1409  * @tr:		The tracing instance
1410  *
1411  * Check whether the conditional snapshot for the given instance is
1412  * enabled; if so, free the cond_snapshot associated with it,
1413  * otherwise return -EINVAL.
1414  *
1415  * Returns 0 if successful, error otherwise.
1416  */
1417 int tracing_snapshot_cond_disable(struct trace_array *tr)
1418 {
1419 	int ret = 0;
1420 
1421 	local_irq_disable();
1422 	arch_spin_lock(&tr->max_lock);
1423 
1424 	if (!tr->cond_snapshot)
1425 		ret = -EINVAL;
1426 	else {
1427 		kfree(tr->cond_snapshot);
1428 		tr->cond_snapshot = NULL;
1429 	}
1430 
1431 	arch_spin_unlock(&tr->max_lock);
1432 	local_irq_enable();
1433 
1434 	return ret;
1435 }
1436 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_disable);
1437 #else
1438 void tracing_snapshot(void)
1439 {
1440 	WARN_ONCE(1, "Snapshot feature not enabled, but internal snapshot used");
1441 }
1442 EXPORT_SYMBOL_GPL(tracing_snapshot);
1443 void tracing_snapshot_cond(struct trace_array *tr, void *cond_data)
1444 {
1445 	WARN_ONCE(1, "Snapshot feature not enabled, but internal conditional snapshot used");
1446 }
1447 EXPORT_SYMBOL_GPL(tracing_snapshot_cond);
1448 int tracing_alloc_snapshot(void)
1449 {
1450 	WARN_ONCE(1, "Snapshot feature not enabled, but snapshot allocation used");
1451 	return -ENODEV;
1452 }
1453 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
1454 void tracing_snapshot_alloc(void)
1455 {
1456 	/* Give warning */
1457 	tracing_snapshot();
1458 }
1459 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
1460 void *tracing_cond_snapshot_data(struct trace_array *tr)
1461 {
1462 	return NULL;
1463 }
1464 EXPORT_SYMBOL_GPL(tracing_cond_snapshot_data);
1465 int tracing_snapshot_cond_enable(struct trace_array *tr, void *cond_data, cond_update_fn_t update)
1466 {
1467 	return -ENODEV;
1468 }
1469 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_enable);
1470 int tracing_snapshot_cond_disable(struct trace_array *tr)
1471 {
1472 	return false;
1473 }
1474 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_disable);
1475 #define free_snapshot(tr)	do { } while (0)
1476 #endif /* CONFIG_TRACER_SNAPSHOT */
1477 
1478 void tracer_tracing_off(struct trace_array *tr)
1479 {
1480 	if (tr->array_buffer.buffer)
1481 		ring_buffer_record_off(tr->array_buffer.buffer);
1482 	/*
1483 	 * This flag is looked at when buffers haven't been allocated
1484 	 * yet, or by some tracers (like irqsoff), that just want to
1485 	 * know if the ring buffer has been disabled, but it can handle
1486 	 * races of where it gets disabled but we still do a record.
1487 	 * As the check is in the fast path of the tracers, it is more
1488 	 * important to be fast than accurate.
1489 	 */
1490 	tr->buffer_disabled = 1;
1491 	/* Make the flag seen by readers */
1492 	smp_wmb();
1493 }
1494 
1495 /**
1496  * tracing_off - turn off tracing buffers
1497  *
1498  * This function stops the tracing buffers from recording data.
1499  * It does not disable any overhead the tracers themselves may
1500  * be causing. This function simply causes all recording to
1501  * the ring buffers to fail.
1502  */
1503 void tracing_off(void)
1504 {
1505 	tracer_tracing_off(&global_trace);
1506 }
1507 EXPORT_SYMBOL_GPL(tracing_off);
1508 
1509 void disable_trace_on_warning(void)
1510 {
1511 	if (__disable_trace_on_warning) {
1512 		trace_array_printk_buf(global_trace.array_buffer.buffer, _THIS_IP_,
1513 			"Disabling tracing due to warning\n");
1514 		tracing_off();
1515 	}
1516 }
1517 
1518 /**
1519  * tracer_tracing_is_on - show real state of ring buffer enabled
1520  * @tr : the trace array to know if ring buffer is enabled
1521  *
1522  * Shows real state of the ring buffer if it is enabled or not.
1523  */
1524 bool tracer_tracing_is_on(struct trace_array *tr)
1525 {
1526 	if (tr->array_buffer.buffer)
1527 		return ring_buffer_record_is_on(tr->array_buffer.buffer);
1528 	return !tr->buffer_disabled;
1529 }
1530 
1531 /**
1532  * tracing_is_on - show state of ring buffers enabled
1533  */
1534 int tracing_is_on(void)
1535 {
1536 	return tracer_tracing_is_on(&global_trace);
1537 }
1538 EXPORT_SYMBOL_GPL(tracing_is_on);
1539 
1540 static int __init set_buf_size(char *str)
1541 {
1542 	unsigned long buf_size;
1543 
1544 	if (!str)
1545 		return 0;
1546 	buf_size = memparse(str, &str);
1547 	/*
1548 	 * nr_entries can not be zero and the startup
1549 	 * tests require some buffer space. Therefore
1550 	 * ensure we have at least 4096 bytes of buffer.
1551 	 */
1552 	trace_buf_size = max(4096UL, buf_size);
1553 	return 1;
1554 }
1555 __setup("trace_buf_size=", set_buf_size);
1556 
1557 static int __init set_tracing_thresh(char *str)
1558 {
1559 	unsigned long threshold;
1560 	int ret;
1561 
1562 	if (!str)
1563 		return 0;
1564 	ret = kstrtoul(str, 0, &threshold);
1565 	if (ret < 0)
1566 		return 0;
1567 	tracing_thresh = threshold * 1000;
1568 	return 1;
1569 }
1570 __setup("tracing_thresh=", set_tracing_thresh);
1571 
1572 unsigned long nsecs_to_usecs(unsigned long nsecs)
1573 {
1574 	return nsecs / 1000;
1575 }
1576 
1577 /*
1578  * TRACE_FLAGS is defined as a tuple matching bit masks with strings.
1579  * It uses C(a, b) where 'a' is the eval (enum) name and 'b' is the string that
1580  * matches it. By defining "C(a, b) b", TRACE_FLAGS becomes a list
1581  * of strings in the order that the evals (enum) were defined.
1582  */
1583 #undef C
1584 #define C(a, b) b
1585 
1586 /* These must match the bit positions in trace_iterator_flags */
1587 static const char *trace_options[] = {
1588 	TRACE_FLAGS
1589 	NULL
1590 };
1591 
1592 static struct {
1593 	u64 (*func)(void);
1594 	const char *name;
1595 	int in_ns;		/* is this clock in nanoseconds? */
1596 } trace_clocks[] = {
1597 	{ trace_clock_local,		"local",	1 },
1598 	{ trace_clock_global,		"global",	1 },
1599 	{ trace_clock_counter,		"counter",	0 },
1600 	{ trace_clock_jiffies,		"uptime",	0 },
1601 	{ trace_clock,			"perf",		1 },
1602 	{ ktime_get_mono_fast_ns,	"mono",		1 },
1603 	{ ktime_get_raw_fast_ns,	"mono_raw",	1 },
1604 	{ ktime_get_boot_fast_ns,	"boot",		1 },
1605 	{ ktime_get_tai_fast_ns,	"tai",		1 },
1606 	ARCH_TRACE_CLOCKS
1607 };
1608 
1609 bool trace_clock_in_ns(struct trace_array *tr)
1610 {
1611 	if (trace_clocks[tr->clock_id].in_ns)
1612 		return true;
1613 
1614 	return false;
1615 }
1616 
1617 /*
1618  * trace_parser_get_init - gets the buffer for trace parser
1619  */
1620 int trace_parser_get_init(struct trace_parser *parser, int size)
1621 {
1622 	memset(parser, 0, sizeof(*parser));
1623 
1624 	parser->buffer = kmalloc(size, GFP_KERNEL);
1625 	if (!parser->buffer)
1626 		return 1;
1627 
1628 	parser->size = size;
1629 	return 0;
1630 }
1631 
1632 /*
1633  * trace_parser_put - frees the buffer for trace parser
1634  */
1635 void trace_parser_put(struct trace_parser *parser)
1636 {
1637 	kfree(parser->buffer);
1638 	parser->buffer = NULL;
1639 }
1640 
1641 /*
1642  * trace_get_user - reads the user input string separated by  space
1643  * (matched by isspace(ch))
1644  *
1645  * For each string found the 'struct trace_parser' is updated,
1646  * and the function returns.
1647  *
1648  * Returns number of bytes read.
1649  *
1650  * See kernel/trace/trace.h for 'struct trace_parser' details.
1651  */
1652 int trace_get_user(struct trace_parser *parser, const char __user *ubuf,
1653 	size_t cnt, loff_t *ppos)
1654 {
1655 	char ch;
1656 	size_t read = 0;
1657 	ssize_t ret;
1658 
1659 	if (!*ppos)
1660 		trace_parser_clear(parser);
1661 
1662 	ret = get_user(ch, ubuf++);
1663 	if (ret)
1664 		goto out;
1665 
1666 	read++;
1667 	cnt--;
1668 
1669 	/*
1670 	 * The parser is not finished with the last write,
1671 	 * continue reading the user input without skipping spaces.
1672 	 */
1673 	if (!parser->cont) {
1674 		/* skip white space */
1675 		while (cnt && isspace(ch)) {
1676 			ret = get_user(ch, ubuf++);
1677 			if (ret)
1678 				goto out;
1679 			read++;
1680 			cnt--;
1681 		}
1682 
1683 		parser->idx = 0;
1684 
1685 		/* only spaces were written */
1686 		if (isspace(ch) || !ch) {
1687 			*ppos += read;
1688 			ret = read;
1689 			goto out;
1690 		}
1691 	}
1692 
1693 	/* read the non-space input */
1694 	while (cnt && !isspace(ch) && ch) {
1695 		if (parser->idx < parser->size - 1)
1696 			parser->buffer[parser->idx++] = ch;
1697 		else {
1698 			ret = -EINVAL;
1699 			goto out;
1700 		}
1701 		ret = get_user(ch, ubuf++);
1702 		if (ret)
1703 			goto out;
1704 		read++;
1705 		cnt--;
1706 	}
1707 
1708 	/* We either got finished input or we have to wait for another call. */
1709 	if (isspace(ch) || !ch) {
1710 		parser->buffer[parser->idx] = 0;
1711 		parser->cont = false;
1712 	} else if (parser->idx < parser->size - 1) {
1713 		parser->cont = true;
1714 		parser->buffer[parser->idx++] = ch;
1715 		/* Make sure the parsed string always terminates with '\0'. */
1716 		parser->buffer[parser->idx] = 0;
1717 	} else {
1718 		ret = -EINVAL;
1719 		goto out;
1720 	}
1721 
1722 	*ppos += read;
1723 	ret = read;
1724 
1725 out:
1726 	return ret;
1727 }
1728 
1729 /* TODO add a seq_buf_to_buffer() */
1730 static ssize_t trace_seq_to_buffer(struct trace_seq *s, void *buf, size_t cnt)
1731 {
1732 	int len;
1733 
1734 	if (trace_seq_used(s) <= s->readpos)
1735 		return -EBUSY;
1736 
1737 	len = trace_seq_used(s) - s->readpos;
1738 	if (cnt > len)
1739 		cnt = len;
1740 	memcpy(buf, s->buffer + s->readpos, cnt);
1741 
1742 	s->readpos += cnt;
1743 	return cnt;
1744 }
1745 
1746 unsigned long __read_mostly	tracing_thresh;
1747 
1748 #ifdef CONFIG_TRACER_MAX_TRACE
1749 static const struct file_operations tracing_max_lat_fops;
1750 
1751 #ifdef LATENCY_FS_NOTIFY
1752 
1753 static struct workqueue_struct *fsnotify_wq;
1754 
1755 static void latency_fsnotify_workfn(struct work_struct *work)
1756 {
1757 	struct trace_array *tr = container_of(work, struct trace_array,
1758 					      fsnotify_work);
1759 	fsnotify_inode(tr->d_max_latency->d_inode, FS_MODIFY);
1760 }
1761 
1762 static void latency_fsnotify_workfn_irq(struct irq_work *iwork)
1763 {
1764 	struct trace_array *tr = container_of(iwork, struct trace_array,
1765 					      fsnotify_irqwork);
1766 	queue_work(fsnotify_wq, &tr->fsnotify_work);
1767 }
1768 
1769 static void trace_create_maxlat_file(struct trace_array *tr,
1770 				     struct dentry *d_tracer)
1771 {
1772 	INIT_WORK(&tr->fsnotify_work, latency_fsnotify_workfn);
1773 	init_irq_work(&tr->fsnotify_irqwork, latency_fsnotify_workfn_irq);
1774 	tr->d_max_latency = trace_create_file("tracing_max_latency",
1775 					      TRACE_MODE_WRITE,
1776 					      d_tracer, tr,
1777 					      &tracing_max_lat_fops);
1778 }
1779 
1780 __init static int latency_fsnotify_init(void)
1781 {
1782 	fsnotify_wq = alloc_workqueue("tr_max_lat_wq",
1783 				      WQ_UNBOUND | WQ_HIGHPRI, 0);
1784 	if (!fsnotify_wq) {
1785 		pr_err("Unable to allocate tr_max_lat_wq\n");
1786 		return -ENOMEM;
1787 	}
1788 	return 0;
1789 }
1790 
1791 late_initcall_sync(latency_fsnotify_init);
1792 
1793 void latency_fsnotify(struct trace_array *tr)
1794 {
1795 	if (!fsnotify_wq)
1796 		return;
1797 	/*
1798 	 * We cannot call queue_work(&tr->fsnotify_work) from here because it's
1799 	 * possible that we are called from __schedule() or do_idle(), which
1800 	 * could cause a deadlock.
1801 	 */
1802 	irq_work_queue(&tr->fsnotify_irqwork);
1803 }
1804 
1805 #else /* !LATENCY_FS_NOTIFY */
1806 
1807 #define trace_create_maxlat_file(tr, d_tracer)				\
1808 	trace_create_file("tracing_max_latency", TRACE_MODE_WRITE,	\
1809 			  d_tracer, tr, &tracing_max_lat_fops)
1810 
1811 #endif
1812 
1813 /*
1814  * Copy the new maximum trace into the separate maximum-trace
1815  * structure. (this way the maximum trace is permanently saved,
1816  * for later retrieval via /sys/kernel/tracing/tracing_max_latency)
1817  */
1818 static void
1819 __update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
1820 {
1821 	struct array_buffer *trace_buf = &tr->array_buffer;
1822 	struct array_buffer *max_buf = &tr->max_buffer;
1823 	struct trace_array_cpu *data = per_cpu_ptr(trace_buf->data, cpu);
1824 	struct trace_array_cpu *max_data = per_cpu_ptr(max_buf->data, cpu);
1825 
1826 	max_buf->cpu = cpu;
1827 	max_buf->time_start = data->preempt_timestamp;
1828 
1829 	max_data->saved_latency = tr->max_latency;
1830 	max_data->critical_start = data->critical_start;
1831 	max_data->critical_end = data->critical_end;
1832 
1833 	strncpy(max_data->comm, tsk->comm, TASK_COMM_LEN);
1834 	max_data->pid = tsk->pid;
1835 	/*
1836 	 * If tsk == current, then use current_uid(), as that does not use
1837 	 * RCU. The irq tracer can be called out of RCU scope.
1838 	 */
1839 	if (tsk == current)
1840 		max_data->uid = current_uid();
1841 	else
1842 		max_data->uid = task_uid(tsk);
1843 
1844 	max_data->nice = tsk->static_prio - 20 - MAX_RT_PRIO;
1845 	max_data->policy = tsk->policy;
1846 	max_data->rt_priority = tsk->rt_priority;
1847 
1848 	/* record this tasks comm */
1849 	tracing_record_cmdline(tsk);
1850 	latency_fsnotify(tr);
1851 }
1852 
1853 /**
1854  * update_max_tr - snapshot all trace buffers from global_trace to max_tr
1855  * @tr: tracer
1856  * @tsk: the task with the latency
1857  * @cpu: The cpu that initiated the trace.
1858  * @cond_data: User data associated with a conditional snapshot
1859  *
1860  * Flip the buffers between the @tr and the max_tr and record information
1861  * about which task was the cause of this latency.
1862  */
1863 void
1864 update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu,
1865 	      void *cond_data)
1866 {
1867 	if (tr->stop_count)
1868 		return;
1869 
1870 	WARN_ON_ONCE(!irqs_disabled());
1871 
1872 	if (!tr->allocated_snapshot) {
1873 		/* Only the nop tracer should hit this when disabling */
1874 		WARN_ON_ONCE(tr->current_trace != &nop_trace);
1875 		return;
1876 	}
1877 
1878 	arch_spin_lock(&tr->max_lock);
1879 
1880 	/* Inherit the recordable setting from array_buffer */
1881 	if (ring_buffer_record_is_set_on(tr->array_buffer.buffer))
1882 		ring_buffer_record_on(tr->max_buffer.buffer);
1883 	else
1884 		ring_buffer_record_off(tr->max_buffer.buffer);
1885 
1886 #ifdef CONFIG_TRACER_SNAPSHOT
1887 	if (tr->cond_snapshot && !tr->cond_snapshot->update(tr, cond_data)) {
1888 		arch_spin_unlock(&tr->max_lock);
1889 		return;
1890 	}
1891 #endif
1892 	swap(tr->array_buffer.buffer, tr->max_buffer.buffer);
1893 
1894 	__update_max_tr(tr, tsk, cpu);
1895 
1896 	arch_spin_unlock(&tr->max_lock);
1897 
1898 	/* Any waiters on the old snapshot buffer need to wake up */
1899 	ring_buffer_wake_waiters(tr->array_buffer.buffer, RING_BUFFER_ALL_CPUS);
1900 }
1901 
1902 /**
1903  * update_max_tr_single - only copy one trace over, and reset the rest
1904  * @tr: tracer
1905  * @tsk: task with the latency
1906  * @cpu: the cpu of the buffer to copy.
1907  *
1908  * Flip the trace of a single CPU buffer between the @tr and the max_tr.
1909  */
1910 void
1911 update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu)
1912 {
1913 	int ret;
1914 
1915 	if (tr->stop_count)
1916 		return;
1917 
1918 	WARN_ON_ONCE(!irqs_disabled());
1919 	if (!tr->allocated_snapshot) {
1920 		/* Only the nop tracer should hit this when disabling */
1921 		WARN_ON_ONCE(tr->current_trace != &nop_trace);
1922 		return;
1923 	}
1924 
1925 	arch_spin_lock(&tr->max_lock);
1926 
1927 	ret = ring_buffer_swap_cpu(tr->max_buffer.buffer, tr->array_buffer.buffer, cpu);
1928 
1929 	if (ret == -EBUSY) {
1930 		/*
1931 		 * We failed to swap the buffer due to a commit taking
1932 		 * place on this CPU. We fail to record, but we reset
1933 		 * the max trace buffer (no one writes directly to it)
1934 		 * and flag that it failed.
1935 		 * Another reason is resize is in progress.
1936 		 */
1937 		trace_array_printk_buf(tr->max_buffer.buffer, _THIS_IP_,
1938 			"Failed to swap buffers due to commit or resize in progress\n");
1939 	}
1940 
1941 	WARN_ON_ONCE(ret && ret != -EAGAIN && ret != -EBUSY);
1942 
1943 	__update_max_tr(tr, tsk, cpu);
1944 	arch_spin_unlock(&tr->max_lock);
1945 }
1946 
1947 #endif /* CONFIG_TRACER_MAX_TRACE */
1948 
1949 static int wait_on_pipe(struct trace_iterator *iter, int full)
1950 {
1951 	int ret;
1952 
1953 	/* Iterators are static, they should be filled or empty */
1954 	if (trace_buffer_iter(iter, iter->cpu_file))
1955 		return 0;
1956 
1957 	ret = ring_buffer_wait(iter->array_buffer->buffer, iter->cpu_file, full);
1958 
1959 #ifdef CONFIG_TRACER_MAX_TRACE
1960 	/*
1961 	 * Make sure this is still the snapshot buffer, as if a snapshot were
1962 	 * to happen, this would now be the main buffer.
1963 	 */
1964 	if (iter->snapshot)
1965 		iter->array_buffer = &iter->tr->max_buffer;
1966 #endif
1967 	return ret;
1968 }
1969 
1970 #ifdef CONFIG_FTRACE_STARTUP_TEST
1971 static bool selftests_can_run;
1972 
1973 struct trace_selftests {
1974 	struct list_head		list;
1975 	struct tracer			*type;
1976 };
1977 
1978 static LIST_HEAD(postponed_selftests);
1979 
1980 static int save_selftest(struct tracer *type)
1981 {
1982 	struct trace_selftests *selftest;
1983 
1984 	selftest = kmalloc(sizeof(*selftest), GFP_KERNEL);
1985 	if (!selftest)
1986 		return -ENOMEM;
1987 
1988 	selftest->type = type;
1989 	list_add(&selftest->list, &postponed_selftests);
1990 	return 0;
1991 }
1992 
1993 static int run_tracer_selftest(struct tracer *type)
1994 {
1995 	struct trace_array *tr = &global_trace;
1996 	struct tracer *saved_tracer = tr->current_trace;
1997 	int ret;
1998 
1999 	if (!type->selftest || tracing_selftest_disabled)
2000 		return 0;
2001 
2002 	/*
2003 	 * If a tracer registers early in boot up (before scheduling is
2004 	 * initialized and such), then do not run its selftests yet.
2005 	 * Instead, run it a little later in the boot process.
2006 	 */
2007 	if (!selftests_can_run)
2008 		return save_selftest(type);
2009 
2010 	if (!tracing_is_on()) {
2011 		pr_warn("Selftest for tracer %s skipped due to tracing disabled\n",
2012 			type->name);
2013 		return 0;
2014 	}
2015 
2016 	/*
2017 	 * Run a selftest on this tracer.
2018 	 * Here we reset the trace buffer, and set the current
2019 	 * tracer to be this tracer. The tracer can then run some
2020 	 * internal tracing to verify that everything is in order.
2021 	 * If we fail, we do not register this tracer.
2022 	 */
2023 	tracing_reset_online_cpus(&tr->array_buffer);
2024 
2025 	tr->current_trace = type;
2026 
2027 #ifdef CONFIG_TRACER_MAX_TRACE
2028 	if (type->use_max_tr) {
2029 		/* If we expanded the buffers, make sure the max is expanded too */
2030 		if (tr->ring_buffer_expanded)
2031 			ring_buffer_resize(tr->max_buffer.buffer, trace_buf_size,
2032 					   RING_BUFFER_ALL_CPUS);
2033 		tr->allocated_snapshot = true;
2034 	}
2035 #endif
2036 
2037 	/* the test is responsible for initializing and enabling */
2038 	pr_info("Testing tracer %s: ", type->name);
2039 	ret = type->selftest(type, tr);
2040 	/* the test is responsible for resetting too */
2041 	tr->current_trace = saved_tracer;
2042 	if (ret) {
2043 		printk(KERN_CONT "FAILED!\n");
2044 		/* Add the warning after printing 'FAILED' */
2045 		WARN_ON(1);
2046 		return -1;
2047 	}
2048 	/* Only reset on passing, to avoid touching corrupted buffers */
2049 	tracing_reset_online_cpus(&tr->array_buffer);
2050 
2051 #ifdef CONFIG_TRACER_MAX_TRACE
2052 	if (type->use_max_tr) {
2053 		tr->allocated_snapshot = false;
2054 
2055 		/* Shrink the max buffer again */
2056 		if (tr->ring_buffer_expanded)
2057 			ring_buffer_resize(tr->max_buffer.buffer, 1,
2058 					   RING_BUFFER_ALL_CPUS);
2059 	}
2060 #endif
2061 
2062 	printk(KERN_CONT "PASSED\n");
2063 	return 0;
2064 }
2065 
2066 static int do_run_tracer_selftest(struct tracer *type)
2067 {
2068 	int ret;
2069 
2070 	/*
2071 	 * Tests can take a long time, especially if they are run one after the
2072 	 * other, as does happen during bootup when all the tracers are
2073 	 * registered. This could cause the soft lockup watchdog to trigger.
2074 	 */
2075 	cond_resched();
2076 
2077 	tracing_selftest_running = true;
2078 	ret = run_tracer_selftest(type);
2079 	tracing_selftest_running = false;
2080 
2081 	return ret;
2082 }
2083 
2084 static __init int init_trace_selftests(void)
2085 {
2086 	struct trace_selftests *p, *n;
2087 	struct tracer *t, **last;
2088 	int ret;
2089 
2090 	selftests_can_run = true;
2091 
2092 	mutex_lock(&trace_types_lock);
2093 
2094 	if (list_empty(&postponed_selftests))
2095 		goto out;
2096 
2097 	pr_info("Running postponed tracer tests:\n");
2098 
2099 	tracing_selftest_running = true;
2100 	list_for_each_entry_safe(p, n, &postponed_selftests, list) {
2101 		/* This loop can take minutes when sanitizers are enabled, so
2102 		 * lets make sure we allow RCU processing.
2103 		 */
2104 		cond_resched();
2105 		ret = run_tracer_selftest(p->type);
2106 		/* If the test fails, then warn and remove from available_tracers */
2107 		if (ret < 0) {
2108 			WARN(1, "tracer: %s failed selftest, disabling\n",
2109 			     p->type->name);
2110 			last = &trace_types;
2111 			for (t = trace_types; t; t = t->next) {
2112 				if (t == p->type) {
2113 					*last = t->next;
2114 					break;
2115 				}
2116 				last = &t->next;
2117 			}
2118 		}
2119 		list_del(&p->list);
2120 		kfree(p);
2121 	}
2122 	tracing_selftest_running = false;
2123 
2124  out:
2125 	mutex_unlock(&trace_types_lock);
2126 
2127 	return 0;
2128 }
2129 core_initcall(init_trace_selftests);
2130 #else
2131 static inline int run_tracer_selftest(struct tracer *type)
2132 {
2133 	return 0;
2134 }
2135 static inline int do_run_tracer_selftest(struct tracer *type)
2136 {
2137 	return 0;
2138 }
2139 #endif /* CONFIG_FTRACE_STARTUP_TEST */
2140 
2141 static void add_tracer_options(struct trace_array *tr, struct tracer *t);
2142 
2143 static void __init apply_trace_boot_options(void);
2144 
2145 /**
2146  * register_tracer - register a tracer with the ftrace system.
2147  * @type: the plugin for the tracer
2148  *
2149  * Register a new plugin tracer.
2150  */
2151 int __init register_tracer(struct tracer *type)
2152 {
2153 	struct tracer *t;
2154 	int ret = 0;
2155 
2156 	if (!type->name) {
2157 		pr_info("Tracer must have a name\n");
2158 		return -1;
2159 	}
2160 
2161 	if (strlen(type->name) >= MAX_TRACER_SIZE) {
2162 		pr_info("Tracer has a name longer than %d\n", MAX_TRACER_SIZE);
2163 		return -1;
2164 	}
2165 
2166 	if (security_locked_down(LOCKDOWN_TRACEFS)) {
2167 		pr_warn("Can not register tracer %s due to lockdown\n",
2168 			   type->name);
2169 		return -EPERM;
2170 	}
2171 
2172 	mutex_lock(&trace_types_lock);
2173 
2174 	for (t = trace_types; t; t = t->next) {
2175 		if (strcmp(type->name, t->name) == 0) {
2176 			/* already found */
2177 			pr_info("Tracer %s already registered\n",
2178 				type->name);
2179 			ret = -1;
2180 			goto out;
2181 		}
2182 	}
2183 
2184 	if (!type->set_flag)
2185 		type->set_flag = &dummy_set_flag;
2186 	if (!type->flags) {
2187 		/*allocate a dummy tracer_flags*/
2188 		type->flags = kmalloc(sizeof(*type->flags), GFP_KERNEL);
2189 		if (!type->flags) {
2190 			ret = -ENOMEM;
2191 			goto out;
2192 		}
2193 		type->flags->val = 0;
2194 		type->flags->opts = dummy_tracer_opt;
2195 	} else
2196 		if (!type->flags->opts)
2197 			type->flags->opts = dummy_tracer_opt;
2198 
2199 	/* store the tracer for __set_tracer_option */
2200 	type->flags->trace = type;
2201 
2202 	ret = do_run_tracer_selftest(type);
2203 	if (ret < 0)
2204 		goto out;
2205 
2206 	type->next = trace_types;
2207 	trace_types = type;
2208 	add_tracer_options(&global_trace, type);
2209 
2210  out:
2211 	mutex_unlock(&trace_types_lock);
2212 
2213 	if (ret || !default_bootup_tracer)
2214 		goto out_unlock;
2215 
2216 	if (strncmp(default_bootup_tracer, type->name, MAX_TRACER_SIZE))
2217 		goto out_unlock;
2218 
2219 	printk(KERN_INFO "Starting tracer '%s'\n", type->name);
2220 	/* Do we want this tracer to start on bootup? */
2221 	tracing_set_tracer(&global_trace, type->name);
2222 	default_bootup_tracer = NULL;
2223 
2224 	apply_trace_boot_options();
2225 
2226 	/* disable other selftests, since this will break it. */
2227 	disable_tracing_selftest("running a tracer");
2228 
2229  out_unlock:
2230 	return ret;
2231 }
2232 
2233 static void tracing_reset_cpu(struct array_buffer *buf, int cpu)
2234 {
2235 	struct trace_buffer *buffer = buf->buffer;
2236 
2237 	if (!buffer)
2238 		return;
2239 
2240 	ring_buffer_record_disable(buffer);
2241 
2242 	/* Make sure all commits have finished */
2243 	synchronize_rcu();
2244 	ring_buffer_reset_cpu(buffer, cpu);
2245 
2246 	ring_buffer_record_enable(buffer);
2247 }
2248 
2249 void tracing_reset_online_cpus(struct array_buffer *buf)
2250 {
2251 	struct trace_buffer *buffer = buf->buffer;
2252 
2253 	if (!buffer)
2254 		return;
2255 
2256 	ring_buffer_record_disable(buffer);
2257 
2258 	/* Make sure all commits have finished */
2259 	synchronize_rcu();
2260 
2261 	buf->time_start = buffer_ftrace_now(buf, buf->cpu);
2262 
2263 	ring_buffer_reset_online_cpus(buffer);
2264 
2265 	ring_buffer_record_enable(buffer);
2266 }
2267 
2268 /* Must have trace_types_lock held */
2269 void tracing_reset_all_online_cpus_unlocked(void)
2270 {
2271 	struct trace_array *tr;
2272 
2273 	lockdep_assert_held(&trace_types_lock);
2274 
2275 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
2276 		if (!tr->clear_trace)
2277 			continue;
2278 		tr->clear_trace = false;
2279 		tracing_reset_online_cpus(&tr->array_buffer);
2280 #ifdef CONFIG_TRACER_MAX_TRACE
2281 		tracing_reset_online_cpus(&tr->max_buffer);
2282 #endif
2283 	}
2284 }
2285 
2286 void tracing_reset_all_online_cpus(void)
2287 {
2288 	mutex_lock(&trace_types_lock);
2289 	tracing_reset_all_online_cpus_unlocked();
2290 	mutex_unlock(&trace_types_lock);
2291 }
2292 
2293 /*
2294  * The tgid_map array maps from pid to tgid; i.e. the value stored at index i
2295  * is the tgid last observed corresponding to pid=i.
2296  */
2297 static int *tgid_map;
2298 
2299 /* The maximum valid index into tgid_map. */
2300 static size_t tgid_map_max;
2301 
2302 #define SAVED_CMDLINES_DEFAULT 128
2303 #define NO_CMDLINE_MAP UINT_MAX
2304 /*
2305  * Preemption must be disabled before acquiring trace_cmdline_lock.
2306  * The various trace_arrays' max_lock must be acquired in a context
2307  * where interrupt is disabled.
2308  */
2309 static arch_spinlock_t trace_cmdline_lock = __ARCH_SPIN_LOCK_UNLOCKED;
2310 struct saved_cmdlines_buffer {
2311 	unsigned map_pid_to_cmdline[PID_MAX_DEFAULT+1];
2312 	unsigned *map_cmdline_to_pid;
2313 	unsigned cmdline_num;
2314 	int cmdline_idx;
2315 	char *saved_cmdlines;
2316 };
2317 static struct saved_cmdlines_buffer *savedcmd;
2318 
2319 static inline char *get_saved_cmdlines(int idx)
2320 {
2321 	return &savedcmd->saved_cmdlines[idx * TASK_COMM_LEN];
2322 }
2323 
2324 static inline void set_cmdline(int idx, const char *cmdline)
2325 {
2326 	strncpy(get_saved_cmdlines(idx), cmdline, TASK_COMM_LEN);
2327 }
2328 
2329 static int allocate_cmdlines_buffer(unsigned int val,
2330 				    struct saved_cmdlines_buffer *s)
2331 {
2332 	s->map_cmdline_to_pid = kmalloc_array(val,
2333 					      sizeof(*s->map_cmdline_to_pid),
2334 					      GFP_KERNEL);
2335 	if (!s->map_cmdline_to_pid)
2336 		return -ENOMEM;
2337 
2338 	s->saved_cmdlines = kmalloc_array(TASK_COMM_LEN, val, GFP_KERNEL);
2339 	if (!s->saved_cmdlines) {
2340 		kfree(s->map_cmdline_to_pid);
2341 		return -ENOMEM;
2342 	}
2343 
2344 	s->cmdline_idx = 0;
2345 	s->cmdline_num = val;
2346 	memset(&s->map_pid_to_cmdline, NO_CMDLINE_MAP,
2347 	       sizeof(s->map_pid_to_cmdline));
2348 	memset(s->map_cmdline_to_pid, NO_CMDLINE_MAP,
2349 	       val * sizeof(*s->map_cmdline_to_pid));
2350 
2351 	return 0;
2352 }
2353 
2354 static int trace_create_savedcmd(void)
2355 {
2356 	int ret;
2357 
2358 	savedcmd = kmalloc(sizeof(*savedcmd), GFP_KERNEL);
2359 	if (!savedcmd)
2360 		return -ENOMEM;
2361 
2362 	ret = allocate_cmdlines_buffer(SAVED_CMDLINES_DEFAULT, savedcmd);
2363 	if (ret < 0) {
2364 		kfree(savedcmd);
2365 		savedcmd = NULL;
2366 		return -ENOMEM;
2367 	}
2368 
2369 	return 0;
2370 }
2371 
2372 int is_tracing_stopped(void)
2373 {
2374 	return global_trace.stop_count;
2375 }
2376 
2377 static void tracing_start_tr(struct trace_array *tr)
2378 {
2379 	struct trace_buffer *buffer;
2380 	unsigned long flags;
2381 
2382 	if (tracing_disabled)
2383 		return;
2384 
2385 	raw_spin_lock_irqsave(&tr->start_lock, flags);
2386 	if (--tr->stop_count) {
2387 		if (WARN_ON_ONCE(tr->stop_count < 0)) {
2388 			/* Someone screwed up their debugging */
2389 			tr->stop_count = 0;
2390 		}
2391 		goto out;
2392 	}
2393 
2394 	/* Prevent the buffers from switching */
2395 	arch_spin_lock(&tr->max_lock);
2396 
2397 	buffer = tr->array_buffer.buffer;
2398 	if (buffer)
2399 		ring_buffer_record_enable(buffer);
2400 
2401 #ifdef CONFIG_TRACER_MAX_TRACE
2402 	buffer = tr->max_buffer.buffer;
2403 	if (buffer)
2404 		ring_buffer_record_enable(buffer);
2405 #endif
2406 
2407 	arch_spin_unlock(&tr->max_lock);
2408 
2409  out:
2410 	raw_spin_unlock_irqrestore(&tr->start_lock, flags);
2411 }
2412 
2413 /**
2414  * tracing_start - quick start of the tracer
2415  *
2416  * If tracing is enabled but was stopped by tracing_stop,
2417  * this will start the tracer back up.
2418  */
2419 void tracing_start(void)
2420 
2421 {
2422 	return tracing_start_tr(&global_trace);
2423 }
2424 
2425 static void tracing_stop_tr(struct trace_array *tr)
2426 {
2427 	struct trace_buffer *buffer;
2428 	unsigned long flags;
2429 
2430 	raw_spin_lock_irqsave(&tr->start_lock, flags);
2431 	if (tr->stop_count++)
2432 		goto out;
2433 
2434 	/* Prevent the buffers from switching */
2435 	arch_spin_lock(&tr->max_lock);
2436 
2437 	buffer = tr->array_buffer.buffer;
2438 	if (buffer)
2439 		ring_buffer_record_disable(buffer);
2440 
2441 #ifdef CONFIG_TRACER_MAX_TRACE
2442 	buffer = tr->max_buffer.buffer;
2443 	if (buffer)
2444 		ring_buffer_record_disable(buffer);
2445 #endif
2446 
2447 	arch_spin_unlock(&tr->max_lock);
2448 
2449  out:
2450 	raw_spin_unlock_irqrestore(&tr->start_lock, flags);
2451 }
2452 
2453 /**
2454  * tracing_stop - quick stop of the tracer
2455  *
2456  * Light weight way to stop tracing. Use in conjunction with
2457  * tracing_start.
2458  */
2459 void tracing_stop(void)
2460 {
2461 	return tracing_stop_tr(&global_trace);
2462 }
2463 
2464 static int trace_save_cmdline(struct task_struct *tsk)
2465 {
2466 	unsigned tpid, idx;
2467 
2468 	/* treat recording of idle task as a success */
2469 	if (!tsk->pid)
2470 		return 1;
2471 
2472 	tpid = tsk->pid & (PID_MAX_DEFAULT - 1);
2473 
2474 	/*
2475 	 * It's not the end of the world if we don't get
2476 	 * the lock, but we also don't want to spin
2477 	 * nor do we want to disable interrupts,
2478 	 * so if we miss here, then better luck next time.
2479 	 *
2480 	 * This is called within the scheduler and wake up, so interrupts
2481 	 * had better been disabled and run queue lock been held.
2482 	 */
2483 	lockdep_assert_preemption_disabled();
2484 	if (!arch_spin_trylock(&trace_cmdline_lock))
2485 		return 0;
2486 
2487 	idx = savedcmd->map_pid_to_cmdline[tpid];
2488 	if (idx == NO_CMDLINE_MAP) {
2489 		idx = (savedcmd->cmdline_idx + 1) % savedcmd->cmdline_num;
2490 
2491 		savedcmd->map_pid_to_cmdline[tpid] = idx;
2492 		savedcmd->cmdline_idx = idx;
2493 	}
2494 
2495 	savedcmd->map_cmdline_to_pid[idx] = tsk->pid;
2496 	set_cmdline(idx, tsk->comm);
2497 
2498 	arch_spin_unlock(&trace_cmdline_lock);
2499 
2500 	return 1;
2501 }
2502 
2503 static void __trace_find_cmdline(int pid, char comm[])
2504 {
2505 	unsigned map;
2506 	int tpid;
2507 
2508 	if (!pid) {
2509 		strcpy(comm, "<idle>");
2510 		return;
2511 	}
2512 
2513 	if (WARN_ON_ONCE(pid < 0)) {
2514 		strcpy(comm, "<XXX>");
2515 		return;
2516 	}
2517 
2518 	tpid = pid & (PID_MAX_DEFAULT - 1);
2519 	map = savedcmd->map_pid_to_cmdline[tpid];
2520 	if (map != NO_CMDLINE_MAP) {
2521 		tpid = savedcmd->map_cmdline_to_pid[map];
2522 		if (tpid == pid) {
2523 			strscpy(comm, get_saved_cmdlines(map), TASK_COMM_LEN);
2524 			return;
2525 		}
2526 	}
2527 	strcpy(comm, "<...>");
2528 }
2529 
2530 void trace_find_cmdline(int pid, char comm[])
2531 {
2532 	preempt_disable();
2533 	arch_spin_lock(&trace_cmdline_lock);
2534 
2535 	__trace_find_cmdline(pid, comm);
2536 
2537 	arch_spin_unlock(&trace_cmdline_lock);
2538 	preempt_enable();
2539 }
2540 
2541 static int *trace_find_tgid_ptr(int pid)
2542 {
2543 	/*
2544 	 * Pairs with the smp_store_release in set_tracer_flag() to ensure that
2545 	 * if we observe a non-NULL tgid_map then we also observe the correct
2546 	 * tgid_map_max.
2547 	 */
2548 	int *map = smp_load_acquire(&tgid_map);
2549 
2550 	if (unlikely(!map || pid > tgid_map_max))
2551 		return NULL;
2552 
2553 	return &map[pid];
2554 }
2555 
2556 int trace_find_tgid(int pid)
2557 {
2558 	int *ptr = trace_find_tgid_ptr(pid);
2559 
2560 	return ptr ? *ptr : 0;
2561 }
2562 
2563 static int trace_save_tgid(struct task_struct *tsk)
2564 {
2565 	int *ptr;
2566 
2567 	/* treat recording of idle task as a success */
2568 	if (!tsk->pid)
2569 		return 1;
2570 
2571 	ptr = trace_find_tgid_ptr(tsk->pid);
2572 	if (!ptr)
2573 		return 0;
2574 
2575 	*ptr = tsk->tgid;
2576 	return 1;
2577 }
2578 
2579 static bool tracing_record_taskinfo_skip(int flags)
2580 {
2581 	if (unlikely(!(flags & (TRACE_RECORD_CMDLINE | TRACE_RECORD_TGID))))
2582 		return true;
2583 	if (!__this_cpu_read(trace_taskinfo_save))
2584 		return true;
2585 	return false;
2586 }
2587 
2588 /**
2589  * tracing_record_taskinfo - record the task info of a task
2590  *
2591  * @task:  task to record
2592  * @flags: TRACE_RECORD_CMDLINE for recording comm
2593  *         TRACE_RECORD_TGID for recording tgid
2594  */
2595 void tracing_record_taskinfo(struct task_struct *task, int flags)
2596 {
2597 	bool done;
2598 
2599 	if (tracing_record_taskinfo_skip(flags))
2600 		return;
2601 
2602 	/*
2603 	 * Record as much task information as possible. If some fail, continue
2604 	 * to try to record the others.
2605 	 */
2606 	done = !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(task);
2607 	done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(task);
2608 
2609 	/* If recording any information failed, retry again soon. */
2610 	if (!done)
2611 		return;
2612 
2613 	__this_cpu_write(trace_taskinfo_save, false);
2614 }
2615 
2616 /**
2617  * tracing_record_taskinfo_sched_switch - record task info for sched_switch
2618  *
2619  * @prev: previous task during sched_switch
2620  * @next: next task during sched_switch
2621  * @flags: TRACE_RECORD_CMDLINE for recording comm
2622  *         TRACE_RECORD_TGID for recording tgid
2623  */
2624 void tracing_record_taskinfo_sched_switch(struct task_struct *prev,
2625 					  struct task_struct *next, int flags)
2626 {
2627 	bool done;
2628 
2629 	if (tracing_record_taskinfo_skip(flags))
2630 		return;
2631 
2632 	/*
2633 	 * Record as much task information as possible. If some fail, continue
2634 	 * to try to record the others.
2635 	 */
2636 	done  = !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(prev);
2637 	done &= !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(next);
2638 	done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(prev);
2639 	done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(next);
2640 
2641 	/* If recording any information failed, retry again soon. */
2642 	if (!done)
2643 		return;
2644 
2645 	__this_cpu_write(trace_taskinfo_save, false);
2646 }
2647 
2648 /* Helpers to record a specific task information */
2649 void tracing_record_cmdline(struct task_struct *task)
2650 {
2651 	tracing_record_taskinfo(task, TRACE_RECORD_CMDLINE);
2652 }
2653 
2654 void tracing_record_tgid(struct task_struct *task)
2655 {
2656 	tracing_record_taskinfo(task, TRACE_RECORD_TGID);
2657 }
2658 
2659 /*
2660  * Several functions return TRACE_TYPE_PARTIAL_LINE if the trace_seq
2661  * overflowed, and TRACE_TYPE_HANDLED otherwise. This helper function
2662  * simplifies those functions and keeps them in sync.
2663  */
2664 enum print_line_t trace_handle_return(struct trace_seq *s)
2665 {
2666 	return trace_seq_has_overflowed(s) ?
2667 		TRACE_TYPE_PARTIAL_LINE : TRACE_TYPE_HANDLED;
2668 }
2669 EXPORT_SYMBOL_GPL(trace_handle_return);
2670 
2671 static unsigned short migration_disable_value(void)
2672 {
2673 #if defined(CONFIG_SMP)
2674 	return current->migration_disabled;
2675 #else
2676 	return 0;
2677 #endif
2678 }
2679 
2680 unsigned int tracing_gen_ctx_irq_test(unsigned int irqs_status)
2681 {
2682 	unsigned int trace_flags = irqs_status;
2683 	unsigned int pc;
2684 
2685 	pc = preempt_count();
2686 
2687 	if (pc & NMI_MASK)
2688 		trace_flags |= TRACE_FLAG_NMI;
2689 	if (pc & HARDIRQ_MASK)
2690 		trace_flags |= TRACE_FLAG_HARDIRQ;
2691 	if (in_serving_softirq())
2692 		trace_flags |= TRACE_FLAG_SOFTIRQ;
2693 	if (softirq_count() >> (SOFTIRQ_SHIFT + 1))
2694 		trace_flags |= TRACE_FLAG_BH_OFF;
2695 
2696 	if (tif_need_resched())
2697 		trace_flags |= TRACE_FLAG_NEED_RESCHED;
2698 	if (test_preempt_need_resched())
2699 		trace_flags |= TRACE_FLAG_PREEMPT_RESCHED;
2700 	return (trace_flags << 16) | (min_t(unsigned int, pc & 0xff, 0xf)) |
2701 		(min_t(unsigned int, migration_disable_value(), 0xf)) << 4;
2702 }
2703 
2704 struct ring_buffer_event *
2705 trace_buffer_lock_reserve(struct trace_buffer *buffer,
2706 			  int type,
2707 			  unsigned long len,
2708 			  unsigned int trace_ctx)
2709 {
2710 	return __trace_buffer_lock_reserve(buffer, type, len, trace_ctx);
2711 }
2712 
2713 DEFINE_PER_CPU(struct ring_buffer_event *, trace_buffered_event);
2714 DEFINE_PER_CPU(int, trace_buffered_event_cnt);
2715 static int trace_buffered_event_ref;
2716 
2717 /**
2718  * trace_buffered_event_enable - enable buffering events
2719  *
2720  * When events are being filtered, it is quicker to use a temporary
2721  * buffer to write the event data into if there's a likely chance
2722  * that it will not be committed. The discard of the ring buffer
2723  * is not as fast as committing, and is much slower than copying
2724  * a commit.
2725  *
2726  * When an event is to be filtered, allocate per cpu buffers to
2727  * write the event data into, and if the event is filtered and discarded
2728  * it is simply dropped, otherwise, the entire data is to be committed
2729  * in one shot.
2730  */
2731 void trace_buffered_event_enable(void)
2732 {
2733 	struct ring_buffer_event *event;
2734 	struct page *page;
2735 	int cpu;
2736 
2737 	WARN_ON_ONCE(!mutex_is_locked(&event_mutex));
2738 
2739 	if (trace_buffered_event_ref++)
2740 		return;
2741 
2742 	for_each_tracing_cpu(cpu) {
2743 		page = alloc_pages_node(cpu_to_node(cpu),
2744 					GFP_KERNEL | __GFP_NORETRY, 0);
2745 		/* This is just an optimization and can handle failures */
2746 		if (!page) {
2747 			pr_err("Failed to allocate event buffer\n");
2748 			break;
2749 		}
2750 
2751 		event = page_address(page);
2752 		memset(event, 0, sizeof(*event));
2753 
2754 		per_cpu(trace_buffered_event, cpu) = event;
2755 
2756 		preempt_disable();
2757 		if (cpu == smp_processor_id() &&
2758 		    __this_cpu_read(trace_buffered_event) !=
2759 		    per_cpu(trace_buffered_event, cpu))
2760 			WARN_ON_ONCE(1);
2761 		preempt_enable();
2762 	}
2763 }
2764 
2765 static void enable_trace_buffered_event(void *data)
2766 {
2767 	/* Probably not needed, but do it anyway */
2768 	smp_rmb();
2769 	this_cpu_dec(trace_buffered_event_cnt);
2770 }
2771 
2772 static void disable_trace_buffered_event(void *data)
2773 {
2774 	this_cpu_inc(trace_buffered_event_cnt);
2775 }
2776 
2777 /**
2778  * trace_buffered_event_disable - disable buffering events
2779  *
2780  * When a filter is removed, it is faster to not use the buffered
2781  * events, and to commit directly into the ring buffer. Free up
2782  * the temp buffers when there are no more users. This requires
2783  * special synchronization with current events.
2784  */
2785 void trace_buffered_event_disable(void)
2786 {
2787 	int cpu;
2788 
2789 	WARN_ON_ONCE(!mutex_is_locked(&event_mutex));
2790 
2791 	if (WARN_ON_ONCE(!trace_buffered_event_ref))
2792 		return;
2793 
2794 	if (--trace_buffered_event_ref)
2795 		return;
2796 
2797 	/* For each CPU, set the buffer as used. */
2798 	on_each_cpu_mask(tracing_buffer_mask, disable_trace_buffered_event,
2799 			 NULL, true);
2800 
2801 	/* Wait for all current users to finish */
2802 	synchronize_rcu();
2803 
2804 	for_each_tracing_cpu(cpu) {
2805 		free_page((unsigned long)per_cpu(trace_buffered_event, cpu));
2806 		per_cpu(trace_buffered_event, cpu) = NULL;
2807 	}
2808 
2809 	/*
2810 	 * Wait for all CPUs that potentially started checking if they can use
2811 	 * their event buffer only after the previous synchronize_rcu() call and
2812 	 * they still read a valid pointer from trace_buffered_event. It must be
2813 	 * ensured they don't see cleared trace_buffered_event_cnt else they
2814 	 * could wrongly decide to use the pointed-to buffer which is now freed.
2815 	 */
2816 	synchronize_rcu();
2817 
2818 	/* For each CPU, relinquish the buffer */
2819 	on_each_cpu_mask(tracing_buffer_mask, enable_trace_buffered_event, NULL,
2820 			 true);
2821 }
2822 
2823 static struct trace_buffer *temp_buffer;
2824 
2825 struct ring_buffer_event *
2826 trace_event_buffer_lock_reserve(struct trace_buffer **current_rb,
2827 			  struct trace_event_file *trace_file,
2828 			  int type, unsigned long len,
2829 			  unsigned int trace_ctx)
2830 {
2831 	struct ring_buffer_event *entry;
2832 	struct trace_array *tr = trace_file->tr;
2833 	int val;
2834 
2835 	*current_rb = tr->array_buffer.buffer;
2836 
2837 	if (!tr->no_filter_buffering_ref &&
2838 	    (trace_file->flags & (EVENT_FILE_FL_SOFT_DISABLED | EVENT_FILE_FL_FILTERED))) {
2839 		preempt_disable_notrace();
2840 		/*
2841 		 * Filtering is on, so try to use the per cpu buffer first.
2842 		 * This buffer will simulate a ring_buffer_event,
2843 		 * where the type_len is zero and the array[0] will
2844 		 * hold the full length.
2845 		 * (see include/linux/ring-buffer.h for details on
2846 		 *  how the ring_buffer_event is structured).
2847 		 *
2848 		 * Using a temp buffer during filtering and copying it
2849 		 * on a matched filter is quicker than writing directly
2850 		 * into the ring buffer and then discarding it when
2851 		 * it doesn't match. That is because the discard
2852 		 * requires several atomic operations to get right.
2853 		 * Copying on match and doing nothing on a failed match
2854 		 * is still quicker than no copy on match, but having
2855 		 * to discard out of the ring buffer on a failed match.
2856 		 */
2857 		if ((entry = __this_cpu_read(trace_buffered_event))) {
2858 			int max_len = PAGE_SIZE - struct_size(entry, array, 1);
2859 
2860 			val = this_cpu_inc_return(trace_buffered_event_cnt);
2861 
2862 			/*
2863 			 * Preemption is disabled, but interrupts and NMIs
2864 			 * can still come in now. If that happens after
2865 			 * the above increment, then it will have to go
2866 			 * back to the old method of allocating the event
2867 			 * on the ring buffer, and if the filter fails, it
2868 			 * will have to call ring_buffer_discard_commit()
2869 			 * to remove it.
2870 			 *
2871 			 * Need to also check the unlikely case that the
2872 			 * length is bigger than the temp buffer size.
2873 			 * If that happens, then the reserve is pretty much
2874 			 * guaranteed to fail, as the ring buffer currently
2875 			 * only allows events less than a page. But that may
2876 			 * change in the future, so let the ring buffer reserve
2877 			 * handle the failure in that case.
2878 			 */
2879 			if (val == 1 && likely(len <= max_len)) {
2880 				trace_event_setup(entry, type, trace_ctx);
2881 				entry->array[0] = len;
2882 				/* Return with preemption disabled */
2883 				return entry;
2884 			}
2885 			this_cpu_dec(trace_buffered_event_cnt);
2886 		}
2887 		/* __trace_buffer_lock_reserve() disables preemption */
2888 		preempt_enable_notrace();
2889 	}
2890 
2891 	entry = __trace_buffer_lock_reserve(*current_rb, type, len,
2892 					    trace_ctx);
2893 	/*
2894 	 * If tracing is off, but we have triggers enabled
2895 	 * we still need to look at the event data. Use the temp_buffer
2896 	 * to store the trace event for the trigger to use. It's recursive
2897 	 * safe and will not be recorded anywhere.
2898 	 */
2899 	if (!entry && trace_file->flags & EVENT_FILE_FL_TRIGGER_COND) {
2900 		*current_rb = temp_buffer;
2901 		entry = __trace_buffer_lock_reserve(*current_rb, type, len,
2902 						    trace_ctx);
2903 	}
2904 	return entry;
2905 }
2906 EXPORT_SYMBOL_GPL(trace_event_buffer_lock_reserve);
2907 
2908 static DEFINE_RAW_SPINLOCK(tracepoint_iter_lock);
2909 static DEFINE_MUTEX(tracepoint_printk_mutex);
2910 
2911 static void output_printk(struct trace_event_buffer *fbuffer)
2912 {
2913 	struct trace_event_call *event_call;
2914 	struct trace_event_file *file;
2915 	struct trace_event *event;
2916 	unsigned long flags;
2917 	struct trace_iterator *iter = tracepoint_print_iter;
2918 
2919 	/* We should never get here if iter is NULL */
2920 	if (WARN_ON_ONCE(!iter))
2921 		return;
2922 
2923 	event_call = fbuffer->trace_file->event_call;
2924 	if (!event_call || !event_call->event.funcs ||
2925 	    !event_call->event.funcs->trace)
2926 		return;
2927 
2928 	file = fbuffer->trace_file;
2929 	if (test_bit(EVENT_FILE_FL_SOFT_DISABLED_BIT, &file->flags) ||
2930 	    (unlikely(file->flags & EVENT_FILE_FL_FILTERED) &&
2931 	     !filter_match_preds(file->filter, fbuffer->entry)))
2932 		return;
2933 
2934 	event = &fbuffer->trace_file->event_call->event;
2935 
2936 	raw_spin_lock_irqsave(&tracepoint_iter_lock, flags);
2937 	trace_seq_init(&iter->seq);
2938 	iter->ent = fbuffer->entry;
2939 	event_call->event.funcs->trace(iter, 0, event);
2940 	trace_seq_putc(&iter->seq, 0);
2941 	printk("%s", iter->seq.buffer);
2942 
2943 	raw_spin_unlock_irqrestore(&tracepoint_iter_lock, flags);
2944 }
2945 
2946 int tracepoint_printk_sysctl(struct ctl_table *table, int write,
2947 			     void *buffer, size_t *lenp,
2948 			     loff_t *ppos)
2949 {
2950 	int save_tracepoint_printk;
2951 	int ret;
2952 
2953 	mutex_lock(&tracepoint_printk_mutex);
2954 	save_tracepoint_printk = tracepoint_printk;
2955 
2956 	ret = proc_dointvec(table, write, buffer, lenp, ppos);
2957 
2958 	/*
2959 	 * This will force exiting early, as tracepoint_printk
2960 	 * is always zero when tracepoint_printk_iter is not allocated
2961 	 */
2962 	if (!tracepoint_print_iter)
2963 		tracepoint_printk = 0;
2964 
2965 	if (save_tracepoint_printk == tracepoint_printk)
2966 		goto out;
2967 
2968 	if (tracepoint_printk)
2969 		static_key_enable(&tracepoint_printk_key.key);
2970 	else
2971 		static_key_disable(&tracepoint_printk_key.key);
2972 
2973  out:
2974 	mutex_unlock(&tracepoint_printk_mutex);
2975 
2976 	return ret;
2977 }
2978 
2979 void trace_event_buffer_commit(struct trace_event_buffer *fbuffer)
2980 {
2981 	enum event_trigger_type tt = ETT_NONE;
2982 	struct trace_event_file *file = fbuffer->trace_file;
2983 
2984 	if (__event_trigger_test_discard(file, fbuffer->buffer, fbuffer->event,
2985 			fbuffer->entry, &tt))
2986 		goto discard;
2987 
2988 	if (static_key_false(&tracepoint_printk_key.key))
2989 		output_printk(fbuffer);
2990 
2991 	if (static_branch_unlikely(&trace_event_exports_enabled))
2992 		ftrace_exports(fbuffer->event, TRACE_EXPORT_EVENT);
2993 
2994 	trace_buffer_unlock_commit_regs(file->tr, fbuffer->buffer,
2995 			fbuffer->event, fbuffer->trace_ctx, fbuffer->regs);
2996 
2997 discard:
2998 	if (tt)
2999 		event_triggers_post_call(file, tt);
3000 
3001 }
3002 EXPORT_SYMBOL_GPL(trace_event_buffer_commit);
3003 
3004 /*
3005  * Skip 3:
3006  *
3007  *   trace_buffer_unlock_commit_regs()
3008  *   trace_event_buffer_commit()
3009  *   trace_event_raw_event_xxx()
3010  */
3011 # define STACK_SKIP 3
3012 
3013 void trace_buffer_unlock_commit_regs(struct trace_array *tr,
3014 				     struct trace_buffer *buffer,
3015 				     struct ring_buffer_event *event,
3016 				     unsigned int trace_ctx,
3017 				     struct pt_regs *regs)
3018 {
3019 	__buffer_unlock_commit(buffer, event);
3020 
3021 	/*
3022 	 * If regs is not set, then skip the necessary functions.
3023 	 * Note, we can still get here via blktrace, wakeup tracer
3024 	 * and mmiotrace, but that's ok if they lose a function or
3025 	 * two. They are not that meaningful.
3026 	 */
3027 	ftrace_trace_stack(tr, buffer, trace_ctx, regs ? 0 : STACK_SKIP, regs);
3028 	ftrace_trace_userstack(tr, buffer, trace_ctx);
3029 }
3030 
3031 /*
3032  * Similar to trace_buffer_unlock_commit_regs() but do not dump stack.
3033  */
3034 void
3035 trace_buffer_unlock_commit_nostack(struct trace_buffer *buffer,
3036 				   struct ring_buffer_event *event)
3037 {
3038 	__buffer_unlock_commit(buffer, event);
3039 }
3040 
3041 void
3042 trace_function(struct trace_array *tr, unsigned long ip, unsigned long
3043 	       parent_ip, unsigned int trace_ctx)
3044 {
3045 	struct trace_event_call *call = &event_function;
3046 	struct trace_buffer *buffer = tr->array_buffer.buffer;
3047 	struct ring_buffer_event *event;
3048 	struct ftrace_entry *entry;
3049 
3050 	event = __trace_buffer_lock_reserve(buffer, TRACE_FN, sizeof(*entry),
3051 					    trace_ctx);
3052 	if (!event)
3053 		return;
3054 	entry	= ring_buffer_event_data(event);
3055 	entry->ip			= ip;
3056 	entry->parent_ip		= parent_ip;
3057 
3058 	if (!call_filter_check_discard(call, entry, buffer, event)) {
3059 		if (static_branch_unlikely(&trace_function_exports_enabled))
3060 			ftrace_exports(event, TRACE_EXPORT_FUNCTION);
3061 		__buffer_unlock_commit(buffer, event);
3062 	}
3063 }
3064 
3065 #ifdef CONFIG_STACKTRACE
3066 
3067 /* Allow 4 levels of nesting: normal, softirq, irq, NMI */
3068 #define FTRACE_KSTACK_NESTING	4
3069 
3070 #define FTRACE_KSTACK_ENTRIES	(PAGE_SIZE / FTRACE_KSTACK_NESTING)
3071 
3072 struct ftrace_stack {
3073 	unsigned long		calls[FTRACE_KSTACK_ENTRIES];
3074 };
3075 
3076 
3077 struct ftrace_stacks {
3078 	struct ftrace_stack	stacks[FTRACE_KSTACK_NESTING];
3079 };
3080 
3081 static DEFINE_PER_CPU(struct ftrace_stacks, ftrace_stacks);
3082 static DEFINE_PER_CPU(int, ftrace_stack_reserve);
3083 
3084 static void __ftrace_trace_stack(struct trace_buffer *buffer,
3085 				 unsigned int trace_ctx,
3086 				 int skip, struct pt_regs *regs)
3087 {
3088 	struct trace_event_call *call = &event_kernel_stack;
3089 	struct ring_buffer_event *event;
3090 	unsigned int size, nr_entries;
3091 	struct ftrace_stack *fstack;
3092 	struct stack_entry *entry;
3093 	int stackidx;
3094 
3095 	/*
3096 	 * Add one, for this function and the call to save_stack_trace()
3097 	 * If regs is set, then these functions will not be in the way.
3098 	 */
3099 #ifndef CONFIG_UNWINDER_ORC
3100 	if (!regs)
3101 		skip++;
3102 #endif
3103 
3104 	preempt_disable_notrace();
3105 
3106 	stackidx = __this_cpu_inc_return(ftrace_stack_reserve) - 1;
3107 
3108 	/* This should never happen. If it does, yell once and skip */
3109 	if (WARN_ON_ONCE(stackidx >= FTRACE_KSTACK_NESTING))
3110 		goto out;
3111 
3112 	/*
3113 	 * The above __this_cpu_inc_return() is 'atomic' cpu local. An
3114 	 * interrupt will either see the value pre increment or post
3115 	 * increment. If the interrupt happens pre increment it will have
3116 	 * restored the counter when it returns.  We just need a barrier to
3117 	 * keep gcc from moving things around.
3118 	 */
3119 	barrier();
3120 
3121 	fstack = this_cpu_ptr(ftrace_stacks.stacks) + stackidx;
3122 	size = ARRAY_SIZE(fstack->calls);
3123 
3124 	if (regs) {
3125 		nr_entries = stack_trace_save_regs(regs, fstack->calls,
3126 						   size, skip);
3127 	} else {
3128 		nr_entries = stack_trace_save(fstack->calls, size, skip);
3129 	}
3130 
3131 	event = __trace_buffer_lock_reserve(buffer, TRACE_STACK,
3132 				    struct_size(entry, caller, nr_entries),
3133 				    trace_ctx);
3134 	if (!event)
3135 		goto out;
3136 	entry = ring_buffer_event_data(event);
3137 
3138 	entry->size = nr_entries;
3139 	memcpy(&entry->caller, fstack->calls,
3140 	       flex_array_size(entry, caller, nr_entries));
3141 
3142 	if (!call_filter_check_discard(call, entry, buffer, event))
3143 		__buffer_unlock_commit(buffer, event);
3144 
3145  out:
3146 	/* Again, don't let gcc optimize things here */
3147 	barrier();
3148 	__this_cpu_dec(ftrace_stack_reserve);
3149 	preempt_enable_notrace();
3150 
3151 }
3152 
3153 static inline void ftrace_trace_stack(struct trace_array *tr,
3154 				      struct trace_buffer *buffer,
3155 				      unsigned int trace_ctx,
3156 				      int skip, struct pt_regs *regs)
3157 {
3158 	if (!(tr->trace_flags & TRACE_ITER_STACKTRACE))
3159 		return;
3160 
3161 	__ftrace_trace_stack(buffer, trace_ctx, skip, regs);
3162 }
3163 
3164 void __trace_stack(struct trace_array *tr, unsigned int trace_ctx,
3165 		   int skip)
3166 {
3167 	struct trace_buffer *buffer = tr->array_buffer.buffer;
3168 
3169 	if (rcu_is_watching()) {
3170 		__ftrace_trace_stack(buffer, trace_ctx, skip, NULL);
3171 		return;
3172 	}
3173 
3174 	if (WARN_ON_ONCE(IS_ENABLED(CONFIG_GENERIC_ENTRY)))
3175 		return;
3176 
3177 	/*
3178 	 * When an NMI triggers, RCU is enabled via ct_nmi_enter(),
3179 	 * but if the above rcu_is_watching() failed, then the NMI
3180 	 * triggered someplace critical, and ct_irq_enter() should
3181 	 * not be called from NMI.
3182 	 */
3183 	if (unlikely(in_nmi()))
3184 		return;
3185 
3186 	ct_irq_enter_irqson();
3187 	__ftrace_trace_stack(buffer, trace_ctx, skip, NULL);
3188 	ct_irq_exit_irqson();
3189 }
3190 
3191 /**
3192  * trace_dump_stack - record a stack back trace in the trace buffer
3193  * @skip: Number of functions to skip (helper handlers)
3194  */
3195 void trace_dump_stack(int skip)
3196 {
3197 	if (tracing_disabled || tracing_selftest_running)
3198 		return;
3199 
3200 #ifndef CONFIG_UNWINDER_ORC
3201 	/* Skip 1 to skip this function. */
3202 	skip++;
3203 #endif
3204 	__ftrace_trace_stack(global_trace.array_buffer.buffer,
3205 			     tracing_gen_ctx(), skip, NULL);
3206 }
3207 EXPORT_SYMBOL_GPL(trace_dump_stack);
3208 
3209 #ifdef CONFIG_USER_STACKTRACE_SUPPORT
3210 static DEFINE_PER_CPU(int, user_stack_count);
3211 
3212 static void
3213 ftrace_trace_userstack(struct trace_array *tr,
3214 		       struct trace_buffer *buffer, unsigned int trace_ctx)
3215 {
3216 	struct trace_event_call *call = &event_user_stack;
3217 	struct ring_buffer_event *event;
3218 	struct userstack_entry *entry;
3219 
3220 	if (!(tr->trace_flags & TRACE_ITER_USERSTACKTRACE))
3221 		return;
3222 
3223 	/*
3224 	 * NMIs can not handle page faults, even with fix ups.
3225 	 * The save user stack can (and often does) fault.
3226 	 */
3227 	if (unlikely(in_nmi()))
3228 		return;
3229 
3230 	/*
3231 	 * prevent recursion, since the user stack tracing may
3232 	 * trigger other kernel events.
3233 	 */
3234 	preempt_disable();
3235 	if (__this_cpu_read(user_stack_count))
3236 		goto out;
3237 
3238 	__this_cpu_inc(user_stack_count);
3239 
3240 	event = __trace_buffer_lock_reserve(buffer, TRACE_USER_STACK,
3241 					    sizeof(*entry), trace_ctx);
3242 	if (!event)
3243 		goto out_drop_count;
3244 	entry	= ring_buffer_event_data(event);
3245 
3246 	entry->tgid		= current->tgid;
3247 	memset(&entry->caller, 0, sizeof(entry->caller));
3248 
3249 	stack_trace_save_user(entry->caller, FTRACE_STACK_ENTRIES);
3250 	if (!call_filter_check_discard(call, entry, buffer, event))
3251 		__buffer_unlock_commit(buffer, event);
3252 
3253  out_drop_count:
3254 	__this_cpu_dec(user_stack_count);
3255  out:
3256 	preempt_enable();
3257 }
3258 #else /* CONFIG_USER_STACKTRACE_SUPPORT */
3259 static void ftrace_trace_userstack(struct trace_array *tr,
3260 				   struct trace_buffer *buffer,
3261 				   unsigned int trace_ctx)
3262 {
3263 }
3264 #endif /* !CONFIG_USER_STACKTRACE_SUPPORT */
3265 
3266 #endif /* CONFIG_STACKTRACE */
3267 
3268 static inline void
3269 func_repeats_set_delta_ts(struct func_repeats_entry *entry,
3270 			  unsigned long long delta)
3271 {
3272 	entry->bottom_delta_ts = delta & U32_MAX;
3273 	entry->top_delta_ts = (delta >> 32);
3274 }
3275 
3276 void trace_last_func_repeats(struct trace_array *tr,
3277 			     struct trace_func_repeats *last_info,
3278 			     unsigned int trace_ctx)
3279 {
3280 	struct trace_buffer *buffer = tr->array_buffer.buffer;
3281 	struct func_repeats_entry *entry;
3282 	struct ring_buffer_event *event;
3283 	u64 delta;
3284 
3285 	event = __trace_buffer_lock_reserve(buffer, TRACE_FUNC_REPEATS,
3286 					    sizeof(*entry), trace_ctx);
3287 	if (!event)
3288 		return;
3289 
3290 	delta = ring_buffer_event_time_stamp(buffer, event) -
3291 		last_info->ts_last_call;
3292 
3293 	entry = ring_buffer_event_data(event);
3294 	entry->ip = last_info->ip;
3295 	entry->parent_ip = last_info->parent_ip;
3296 	entry->count = last_info->count;
3297 	func_repeats_set_delta_ts(entry, delta);
3298 
3299 	__buffer_unlock_commit(buffer, event);
3300 }
3301 
3302 /* created for use with alloc_percpu */
3303 struct trace_buffer_struct {
3304 	int nesting;
3305 	char buffer[4][TRACE_BUF_SIZE];
3306 };
3307 
3308 static struct trace_buffer_struct __percpu *trace_percpu_buffer;
3309 
3310 /*
3311  * This allows for lockless recording.  If we're nested too deeply, then
3312  * this returns NULL.
3313  */
3314 static char *get_trace_buf(void)
3315 {
3316 	struct trace_buffer_struct *buffer = this_cpu_ptr(trace_percpu_buffer);
3317 
3318 	if (!trace_percpu_buffer || buffer->nesting >= 4)
3319 		return NULL;
3320 
3321 	buffer->nesting++;
3322 
3323 	/* Interrupts must see nesting incremented before we use the buffer */
3324 	barrier();
3325 	return &buffer->buffer[buffer->nesting - 1][0];
3326 }
3327 
3328 static void put_trace_buf(void)
3329 {
3330 	/* Don't let the decrement of nesting leak before this */
3331 	barrier();
3332 	this_cpu_dec(trace_percpu_buffer->nesting);
3333 }
3334 
3335 static int alloc_percpu_trace_buffer(void)
3336 {
3337 	struct trace_buffer_struct __percpu *buffers;
3338 
3339 	if (trace_percpu_buffer)
3340 		return 0;
3341 
3342 	buffers = alloc_percpu(struct trace_buffer_struct);
3343 	if (MEM_FAIL(!buffers, "Could not allocate percpu trace_printk buffer"))
3344 		return -ENOMEM;
3345 
3346 	trace_percpu_buffer = buffers;
3347 	return 0;
3348 }
3349 
3350 static int buffers_allocated;
3351 
3352 void trace_printk_init_buffers(void)
3353 {
3354 	if (buffers_allocated)
3355 		return;
3356 
3357 	if (alloc_percpu_trace_buffer())
3358 		return;
3359 
3360 	/* trace_printk() is for debug use only. Don't use it in production. */
3361 
3362 	pr_warn("\n");
3363 	pr_warn("**********************************************************\n");
3364 	pr_warn("**   NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE   **\n");
3365 	pr_warn("**                                                      **\n");
3366 	pr_warn("** trace_printk() being used. Allocating extra memory.  **\n");
3367 	pr_warn("**                                                      **\n");
3368 	pr_warn("** This means that this is a DEBUG kernel and it is     **\n");
3369 	pr_warn("** unsafe for production use.                           **\n");
3370 	pr_warn("**                                                      **\n");
3371 	pr_warn("** If you see this message and you are not debugging    **\n");
3372 	pr_warn("** the kernel, report this immediately to your vendor!  **\n");
3373 	pr_warn("**                                                      **\n");
3374 	pr_warn("**   NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE   **\n");
3375 	pr_warn("**********************************************************\n");
3376 
3377 	/* Expand the buffers to set size */
3378 	tracing_update_buffers(&global_trace);
3379 
3380 	buffers_allocated = 1;
3381 
3382 	/*
3383 	 * trace_printk_init_buffers() can be called by modules.
3384 	 * If that happens, then we need to start cmdline recording
3385 	 * directly here. If the global_trace.buffer is already
3386 	 * allocated here, then this was called by module code.
3387 	 */
3388 	if (global_trace.array_buffer.buffer)
3389 		tracing_start_cmdline_record();
3390 }
3391 EXPORT_SYMBOL_GPL(trace_printk_init_buffers);
3392 
3393 void trace_printk_start_comm(void)
3394 {
3395 	/* Start tracing comms if trace printk is set */
3396 	if (!buffers_allocated)
3397 		return;
3398 	tracing_start_cmdline_record();
3399 }
3400 
3401 static void trace_printk_start_stop_comm(int enabled)
3402 {
3403 	if (!buffers_allocated)
3404 		return;
3405 
3406 	if (enabled)
3407 		tracing_start_cmdline_record();
3408 	else
3409 		tracing_stop_cmdline_record();
3410 }
3411 
3412 /**
3413  * trace_vbprintk - write binary msg to tracing buffer
3414  * @ip:    The address of the caller
3415  * @fmt:   The string format to write to the buffer
3416  * @args:  Arguments for @fmt
3417  */
3418 int trace_vbprintk(unsigned long ip, const char *fmt, va_list args)
3419 {
3420 	struct trace_event_call *call = &event_bprint;
3421 	struct ring_buffer_event *event;
3422 	struct trace_buffer *buffer;
3423 	struct trace_array *tr = &global_trace;
3424 	struct bprint_entry *entry;
3425 	unsigned int trace_ctx;
3426 	char *tbuffer;
3427 	int len = 0, size;
3428 
3429 	if (unlikely(tracing_selftest_running || tracing_disabled))
3430 		return 0;
3431 
3432 	/* Don't pollute graph traces with trace_vprintk internals */
3433 	pause_graph_tracing();
3434 
3435 	trace_ctx = tracing_gen_ctx();
3436 	preempt_disable_notrace();
3437 
3438 	tbuffer = get_trace_buf();
3439 	if (!tbuffer) {
3440 		len = 0;
3441 		goto out_nobuffer;
3442 	}
3443 
3444 	len = vbin_printf((u32 *)tbuffer, TRACE_BUF_SIZE/sizeof(int), fmt, args);
3445 
3446 	if (len > TRACE_BUF_SIZE/sizeof(int) || len < 0)
3447 		goto out_put;
3448 
3449 	size = sizeof(*entry) + sizeof(u32) * len;
3450 	buffer = tr->array_buffer.buffer;
3451 	ring_buffer_nest_start(buffer);
3452 	event = __trace_buffer_lock_reserve(buffer, TRACE_BPRINT, size,
3453 					    trace_ctx);
3454 	if (!event)
3455 		goto out;
3456 	entry = ring_buffer_event_data(event);
3457 	entry->ip			= ip;
3458 	entry->fmt			= fmt;
3459 
3460 	memcpy(entry->buf, tbuffer, sizeof(u32) * len);
3461 	if (!call_filter_check_discard(call, entry, buffer, event)) {
3462 		__buffer_unlock_commit(buffer, event);
3463 		ftrace_trace_stack(tr, buffer, trace_ctx, 6, NULL);
3464 	}
3465 
3466 out:
3467 	ring_buffer_nest_end(buffer);
3468 out_put:
3469 	put_trace_buf();
3470 
3471 out_nobuffer:
3472 	preempt_enable_notrace();
3473 	unpause_graph_tracing();
3474 
3475 	return len;
3476 }
3477 EXPORT_SYMBOL_GPL(trace_vbprintk);
3478 
3479 __printf(3, 0)
3480 static int
3481 __trace_array_vprintk(struct trace_buffer *buffer,
3482 		      unsigned long ip, const char *fmt, va_list args)
3483 {
3484 	struct trace_event_call *call = &event_print;
3485 	struct ring_buffer_event *event;
3486 	int len = 0, size;
3487 	struct print_entry *entry;
3488 	unsigned int trace_ctx;
3489 	char *tbuffer;
3490 
3491 	if (tracing_disabled)
3492 		return 0;
3493 
3494 	/* Don't pollute graph traces with trace_vprintk internals */
3495 	pause_graph_tracing();
3496 
3497 	trace_ctx = tracing_gen_ctx();
3498 	preempt_disable_notrace();
3499 
3500 
3501 	tbuffer = get_trace_buf();
3502 	if (!tbuffer) {
3503 		len = 0;
3504 		goto out_nobuffer;
3505 	}
3506 
3507 	len = vscnprintf(tbuffer, TRACE_BUF_SIZE, fmt, args);
3508 
3509 	size = sizeof(*entry) + len + 1;
3510 	ring_buffer_nest_start(buffer);
3511 	event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
3512 					    trace_ctx);
3513 	if (!event)
3514 		goto out;
3515 	entry = ring_buffer_event_data(event);
3516 	entry->ip = ip;
3517 
3518 	memcpy(&entry->buf, tbuffer, len + 1);
3519 	if (!call_filter_check_discard(call, entry, buffer, event)) {
3520 		__buffer_unlock_commit(buffer, event);
3521 		ftrace_trace_stack(&global_trace, buffer, trace_ctx, 6, NULL);
3522 	}
3523 
3524 out:
3525 	ring_buffer_nest_end(buffer);
3526 	put_trace_buf();
3527 
3528 out_nobuffer:
3529 	preempt_enable_notrace();
3530 	unpause_graph_tracing();
3531 
3532 	return len;
3533 }
3534 
3535 __printf(3, 0)
3536 int trace_array_vprintk(struct trace_array *tr,
3537 			unsigned long ip, const char *fmt, va_list args)
3538 {
3539 	if (tracing_selftest_running && tr == &global_trace)
3540 		return 0;
3541 
3542 	return __trace_array_vprintk(tr->array_buffer.buffer, ip, fmt, args);
3543 }
3544 
3545 /**
3546  * trace_array_printk - Print a message to a specific instance
3547  * @tr: The instance trace_array descriptor
3548  * @ip: The instruction pointer that this is called from.
3549  * @fmt: The format to print (printf format)
3550  *
3551  * If a subsystem sets up its own instance, they have the right to
3552  * printk strings into their tracing instance buffer using this
3553  * function. Note, this function will not write into the top level
3554  * buffer (use trace_printk() for that), as writing into the top level
3555  * buffer should only have events that can be individually disabled.
3556  * trace_printk() is only used for debugging a kernel, and should not
3557  * be ever incorporated in normal use.
3558  *
3559  * trace_array_printk() can be used, as it will not add noise to the
3560  * top level tracing buffer.
3561  *
3562  * Note, trace_array_init_printk() must be called on @tr before this
3563  * can be used.
3564  */
3565 __printf(3, 0)
3566 int trace_array_printk(struct trace_array *tr,
3567 		       unsigned long ip, const char *fmt, ...)
3568 {
3569 	int ret;
3570 	va_list ap;
3571 
3572 	if (!tr)
3573 		return -ENOENT;
3574 
3575 	/* This is only allowed for created instances */
3576 	if (tr == &global_trace)
3577 		return 0;
3578 
3579 	if (!(tr->trace_flags & TRACE_ITER_PRINTK))
3580 		return 0;
3581 
3582 	va_start(ap, fmt);
3583 	ret = trace_array_vprintk(tr, ip, fmt, ap);
3584 	va_end(ap);
3585 	return ret;
3586 }
3587 EXPORT_SYMBOL_GPL(trace_array_printk);
3588 
3589 /**
3590  * trace_array_init_printk - Initialize buffers for trace_array_printk()
3591  * @tr: The trace array to initialize the buffers for
3592  *
3593  * As trace_array_printk() only writes into instances, they are OK to
3594  * have in the kernel (unlike trace_printk()). This needs to be called
3595  * before trace_array_printk() can be used on a trace_array.
3596  */
3597 int trace_array_init_printk(struct trace_array *tr)
3598 {
3599 	if (!tr)
3600 		return -ENOENT;
3601 
3602 	/* This is only allowed for created instances */
3603 	if (tr == &global_trace)
3604 		return -EINVAL;
3605 
3606 	return alloc_percpu_trace_buffer();
3607 }
3608 EXPORT_SYMBOL_GPL(trace_array_init_printk);
3609 
3610 __printf(3, 4)
3611 int trace_array_printk_buf(struct trace_buffer *buffer,
3612 			   unsigned long ip, const char *fmt, ...)
3613 {
3614 	int ret;
3615 	va_list ap;
3616 
3617 	if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
3618 		return 0;
3619 
3620 	va_start(ap, fmt);
3621 	ret = __trace_array_vprintk(buffer, ip, fmt, ap);
3622 	va_end(ap);
3623 	return ret;
3624 }
3625 
3626 __printf(2, 0)
3627 int trace_vprintk(unsigned long ip, const char *fmt, va_list args)
3628 {
3629 	return trace_array_vprintk(&global_trace, ip, fmt, args);
3630 }
3631 EXPORT_SYMBOL_GPL(trace_vprintk);
3632 
3633 static void trace_iterator_increment(struct trace_iterator *iter)
3634 {
3635 	struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, iter->cpu);
3636 
3637 	iter->idx++;
3638 	if (buf_iter)
3639 		ring_buffer_iter_advance(buf_iter);
3640 }
3641 
3642 static struct trace_entry *
3643 peek_next_entry(struct trace_iterator *iter, int cpu, u64 *ts,
3644 		unsigned long *lost_events)
3645 {
3646 	struct ring_buffer_event *event;
3647 	struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, cpu);
3648 
3649 	if (buf_iter) {
3650 		event = ring_buffer_iter_peek(buf_iter, ts);
3651 		if (lost_events)
3652 			*lost_events = ring_buffer_iter_dropped(buf_iter) ?
3653 				(unsigned long)-1 : 0;
3654 	} else {
3655 		event = ring_buffer_peek(iter->array_buffer->buffer, cpu, ts,
3656 					 lost_events);
3657 	}
3658 
3659 	if (event) {
3660 		iter->ent_size = ring_buffer_event_length(event);
3661 		return ring_buffer_event_data(event);
3662 	}
3663 	iter->ent_size = 0;
3664 	return NULL;
3665 }
3666 
3667 static struct trace_entry *
3668 __find_next_entry(struct trace_iterator *iter, int *ent_cpu,
3669 		  unsigned long *missing_events, u64 *ent_ts)
3670 {
3671 	struct trace_buffer *buffer = iter->array_buffer->buffer;
3672 	struct trace_entry *ent, *next = NULL;
3673 	unsigned long lost_events = 0, next_lost = 0;
3674 	int cpu_file = iter->cpu_file;
3675 	u64 next_ts = 0, ts;
3676 	int next_cpu = -1;
3677 	int next_size = 0;
3678 	int cpu;
3679 
3680 	/*
3681 	 * If we are in a per_cpu trace file, don't bother by iterating over
3682 	 * all cpu and peek directly.
3683 	 */
3684 	if (cpu_file > RING_BUFFER_ALL_CPUS) {
3685 		if (ring_buffer_empty_cpu(buffer, cpu_file))
3686 			return NULL;
3687 		ent = peek_next_entry(iter, cpu_file, ent_ts, missing_events);
3688 		if (ent_cpu)
3689 			*ent_cpu = cpu_file;
3690 
3691 		return ent;
3692 	}
3693 
3694 	for_each_tracing_cpu(cpu) {
3695 
3696 		if (ring_buffer_empty_cpu(buffer, cpu))
3697 			continue;
3698 
3699 		ent = peek_next_entry(iter, cpu, &ts, &lost_events);
3700 
3701 		/*
3702 		 * Pick the entry with the smallest timestamp:
3703 		 */
3704 		if (ent && (!next || ts < next_ts)) {
3705 			next = ent;
3706 			next_cpu = cpu;
3707 			next_ts = ts;
3708 			next_lost = lost_events;
3709 			next_size = iter->ent_size;
3710 		}
3711 	}
3712 
3713 	iter->ent_size = next_size;
3714 
3715 	if (ent_cpu)
3716 		*ent_cpu = next_cpu;
3717 
3718 	if (ent_ts)
3719 		*ent_ts = next_ts;
3720 
3721 	if (missing_events)
3722 		*missing_events = next_lost;
3723 
3724 	return next;
3725 }
3726 
3727 #define STATIC_FMT_BUF_SIZE	128
3728 static char static_fmt_buf[STATIC_FMT_BUF_SIZE];
3729 
3730 char *trace_iter_expand_format(struct trace_iterator *iter)
3731 {
3732 	char *tmp;
3733 
3734 	/*
3735 	 * iter->tr is NULL when used with tp_printk, which makes
3736 	 * this get called where it is not safe to call krealloc().
3737 	 */
3738 	if (!iter->tr || iter->fmt == static_fmt_buf)
3739 		return NULL;
3740 
3741 	tmp = krealloc(iter->fmt, iter->fmt_size + STATIC_FMT_BUF_SIZE,
3742 		       GFP_KERNEL);
3743 	if (tmp) {
3744 		iter->fmt_size += STATIC_FMT_BUF_SIZE;
3745 		iter->fmt = tmp;
3746 	}
3747 
3748 	return tmp;
3749 }
3750 
3751 /* Returns true if the string is safe to dereference from an event */
3752 static bool trace_safe_str(struct trace_iterator *iter, const char *str,
3753 			   bool star, int len)
3754 {
3755 	unsigned long addr = (unsigned long)str;
3756 	struct trace_event *trace_event;
3757 	struct trace_event_call *event;
3758 
3759 	/* Ignore strings with no length */
3760 	if (star && !len)
3761 		return true;
3762 
3763 	/* OK if part of the event data */
3764 	if ((addr >= (unsigned long)iter->ent) &&
3765 	    (addr < (unsigned long)iter->ent + iter->ent_size))
3766 		return true;
3767 
3768 	/* OK if part of the temp seq buffer */
3769 	if ((addr >= (unsigned long)iter->tmp_seq.buffer) &&
3770 	    (addr < (unsigned long)iter->tmp_seq.buffer + PAGE_SIZE))
3771 		return true;
3772 
3773 	/* Core rodata can not be freed */
3774 	if (is_kernel_rodata(addr))
3775 		return true;
3776 
3777 	if (trace_is_tracepoint_string(str))
3778 		return true;
3779 
3780 	/*
3781 	 * Now this could be a module event, referencing core module
3782 	 * data, which is OK.
3783 	 */
3784 	if (!iter->ent)
3785 		return false;
3786 
3787 	trace_event = ftrace_find_event(iter->ent->type);
3788 	if (!trace_event)
3789 		return false;
3790 
3791 	event = container_of(trace_event, struct trace_event_call, event);
3792 	if ((event->flags & TRACE_EVENT_FL_DYNAMIC) || !event->module)
3793 		return false;
3794 
3795 	/* Would rather have rodata, but this will suffice */
3796 	if (within_module_core(addr, event->module))
3797 		return true;
3798 
3799 	return false;
3800 }
3801 
3802 static DEFINE_STATIC_KEY_FALSE(trace_no_verify);
3803 
3804 static int test_can_verify_check(const char *fmt, ...)
3805 {
3806 	char buf[16];
3807 	va_list ap;
3808 	int ret;
3809 
3810 	/*
3811 	 * The verifier is dependent on vsnprintf() modifies the va_list
3812 	 * passed to it, where it is sent as a reference. Some architectures
3813 	 * (like x86_32) passes it by value, which means that vsnprintf()
3814 	 * does not modify the va_list passed to it, and the verifier
3815 	 * would then need to be able to understand all the values that
3816 	 * vsnprintf can use. If it is passed by value, then the verifier
3817 	 * is disabled.
3818 	 */
3819 	va_start(ap, fmt);
3820 	vsnprintf(buf, 16, "%d", ap);
3821 	ret = va_arg(ap, int);
3822 	va_end(ap);
3823 
3824 	return ret;
3825 }
3826 
3827 static void test_can_verify(void)
3828 {
3829 	if (!test_can_verify_check("%d %d", 0, 1)) {
3830 		pr_info("trace event string verifier disabled\n");
3831 		static_branch_inc(&trace_no_verify);
3832 	}
3833 }
3834 
3835 /**
3836  * trace_check_vprintf - Check dereferenced strings while writing to the seq buffer
3837  * @iter: The iterator that holds the seq buffer and the event being printed
3838  * @fmt: The format used to print the event
3839  * @ap: The va_list holding the data to print from @fmt.
3840  *
3841  * This writes the data into the @iter->seq buffer using the data from
3842  * @fmt and @ap. If the format has a %s, then the source of the string
3843  * is examined to make sure it is safe to print, otherwise it will
3844  * warn and print "[UNSAFE MEMORY]" in place of the dereferenced string
3845  * pointer.
3846  */
3847 void trace_check_vprintf(struct trace_iterator *iter, const char *fmt,
3848 			 va_list ap)
3849 {
3850 	const char *p = fmt;
3851 	const char *str;
3852 	int i, j;
3853 
3854 	if (WARN_ON_ONCE(!fmt))
3855 		return;
3856 
3857 	if (static_branch_unlikely(&trace_no_verify))
3858 		goto print;
3859 
3860 	/* Don't bother checking when doing a ftrace_dump() */
3861 	if (iter->fmt == static_fmt_buf)
3862 		goto print;
3863 
3864 	while (*p) {
3865 		bool star = false;
3866 		int len = 0;
3867 
3868 		j = 0;
3869 
3870 		/* We only care about %s and variants */
3871 		for (i = 0; p[i]; i++) {
3872 			if (i + 1 >= iter->fmt_size) {
3873 				/*
3874 				 * If we can't expand the copy buffer,
3875 				 * just print it.
3876 				 */
3877 				if (!trace_iter_expand_format(iter))
3878 					goto print;
3879 			}
3880 
3881 			if (p[i] == '\\' && p[i+1]) {
3882 				i++;
3883 				continue;
3884 			}
3885 			if (p[i] == '%') {
3886 				/* Need to test cases like %08.*s */
3887 				for (j = 1; p[i+j]; j++) {
3888 					if (isdigit(p[i+j]) ||
3889 					    p[i+j] == '.')
3890 						continue;
3891 					if (p[i+j] == '*') {
3892 						star = true;
3893 						continue;
3894 					}
3895 					break;
3896 				}
3897 				if (p[i+j] == 's')
3898 					break;
3899 				star = false;
3900 			}
3901 			j = 0;
3902 		}
3903 		/* If no %s found then just print normally */
3904 		if (!p[i])
3905 			break;
3906 
3907 		/* Copy up to the %s, and print that */
3908 		strncpy(iter->fmt, p, i);
3909 		iter->fmt[i] = '\0';
3910 		trace_seq_vprintf(&iter->seq, iter->fmt, ap);
3911 
3912 		/*
3913 		 * If iter->seq is full, the above call no longer guarantees
3914 		 * that ap is in sync with fmt processing, and further calls
3915 		 * to va_arg() can return wrong positional arguments.
3916 		 *
3917 		 * Ensure that ap is no longer used in this case.
3918 		 */
3919 		if (iter->seq.full) {
3920 			p = "";
3921 			break;
3922 		}
3923 
3924 		if (star)
3925 			len = va_arg(ap, int);
3926 
3927 		/* The ap now points to the string data of the %s */
3928 		str = va_arg(ap, const char *);
3929 
3930 		/*
3931 		 * If you hit this warning, it is likely that the
3932 		 * trace event in question used %s on a string that
3933 		 * was saved at the time of the event, but may not be
3934 		 * around when the trace is read. Use __string(),
3935 		 * __assign_str() and __get_str() helpers in the TRACE_EVENT()
3936 		 * instead. See samples/trace_events/trace-events-sample.h
3937 		 * for reference.
3938 		 */
3939 		if (WARN_ONCE(!trace_safe_str(iter, str, star, len),
3940 			      "fmt: '%s' current_buffer: '%s'",
3941 			      fmt, seq_buf_str(&iter->seq.seq))) {
3942 			int ret;
3943 
3944 			/* Try to safely read the string */
3945 			if (star) {
3946 				if (len + 1 > iter->fmt_size)
3947 					len = iter->fmt_size - 1;
3948 				if (len < 0)
3949 					len = 0;
3950 				ret = copy_from_kernel_nofault(iter->fmt, str, len);
3951 				iter->fmt[len] = 0;
3952 				star = false;
3953 			} else {
3954 				ret = strncpy_from_kernel_nofault(iter->fmt, str,
3955 								  iter->fmt_size);
3956 			}
3957 			if (ret < 0)
3958 				trace_seq_printf(&iter->seq, "(0x%px)", str);
3959 			else
3960 				trace_seq_printf(&iter->seq, "(0x%px:%s)",
3961 						 str, iter->fmt);
3962 			str = "[UNSAFE-MEMORY]";
3963 			strcpy(iter->fmt, "%s");
3964 		} else {
3965 			strncpy(iter->fmt, p + i, j + 1);
3966 			iter->fmt[j+1] = '\0';
3967 		}
3968 		if (star)
3969 			trace_seq_printf(&iter->seq, iter->fmt, len, str);
3970 		else
3971 			trace_seq_printf(&iter->seq, iter->fmt, str);
3972 
3973 		p += i + j + 1;
3974 	}
3975  print:
3976 	if (*p)
3977 		trace_seq_vprintf(&iter->seq, p, ap);
3978 }
3979 
3980 const char *trace_event_format(struct trace_iterator *iter, const char *fmt)
3981 {
3982 	const char *p, *new_fmt;
3983 	char *q;
3984 
3985 	if (WARN_ON_ONCE(!fmt))
3986 		return fmt;
3987 
3988 	if (!iter->tr || iter->tr->trace_flags & TRACE_ITER_HASH_PTR)
3989 		return fmt;
3990 
3991 	p = fmt;
3992 	new_fmt = q = iter->fmt;
3993 	while (*p) {
3994 		if (unlikely(q - new_fmt + 3 > iter->fmt_size)) {
3995 			if (!trace_iter_expand_format(iter))
3996 				return fmt;
3997 
3998 			q += iter->fmt - new_fmt;
3999 			new_fmt = iter->fmt;
4000 		}
4001 
4002 		*q++ = *p++;
4003 
4004 		/* Replace %p with %px */
4005 		if (p[-1] == '%') {
4006 			if (p[0] == '%') {
4007 				*q++ = *p++;
4008 			} else if (p[0] == 'p' && !isalnum(p[1])) {
4009 				*q++ = *p++;
4010 				*q++ = 'x';
4011 			}
4012 		}
4013 	}
4014 	*q = '\0';
4015 
4016 	return new_fmt;
4017 }
4018 
4019 #define STATIC_TEMP_BUF_SIZE	128
4020 static char static_temp_buf[STATIC_TEMP_BUF_SIZE] __aligned(4);
4021 
4022 /* Find the next real entry, without updating the iterator itself */
4023 struct trace_entry *trace_find_next_entry(struct trace_iterator *iter,
4024 					  int *ent_cpu, u64 *ent_ts)
4025 {
4026 	/* __find_next_entry will reset ent_size */
4027 	int ent_size = iter->ent_size;
4028 	struct trace_entry *entry;
4029 
4030 	/*
4031 	 * If called from ftrace_dump(), then the iter->temp buffer
4032 	 * will be the static_temp_buf and not created from kmalloc.
4033 	 * If the entry size is greater than the buffer, we can
4034 	 * not save it. Just return NULL in that case. This is only
4035 	 * used to add markers when two consecutive events' time
4036 	 * stamps have a large delta. See trace_print_lat_context()
4037 	 */
4038 	if (iter->temp == static_temp_buf &&
4039 	    STATIC_TEMP_BUF_SIZE < ent_size)
4040 		return NULL;
4041 
4042 	/*
4043 	 * The __find_next_entry() may call peek_next_entry(), which may
4044 	 * call ring_buffer_peek() that may make the contents of iter->ent
4045 	 * undefined. Need to copy iter->ent now.
4046 	 */
4047 	if (iter->ent && iter->ent != iter->temp) {
4048 		if ((!iter->temp || iter->temp_size < iter->ent_size) &&
4049 		    !WARN_ON_ONCE(iter->temp == static_temp_buf)) {
4050 			void *temp;
4051 			temp = kmalloc(iter->ent_size, GFP_KERNEL);
4052 			if (!temp)
4053 				return NULL;
4054 			kfree(iter->temp);
4055 			iter->temp = temp;
4056 			iter->temp_size = iter->ent_size;
4057 		}
4058 		memcpy(iter->temp, iter->ent, iter->ent_size);
4059 		iter->ent = iter->temp;
4060 	}
4061 	entry = __find_next_entry(iter, ent_cpu, NULL, ent_ts);
4062 	/* Put back the original ent_size */
4063 	iter->ent_size = ent_size;
4064 
4065 	return entry;
4066 }
4067 
4068 /* Find the next real entry, and increment the iterator to the next entry */
4069 void *trace_find_next_entry_inc(struct trace_iterator *iter)
4070 {
4071 	iter->ent = __find_next_entry(iter, &iter->cpu,
4072 				      &iter->lost_events, &iter->ts);
4073 
4074 	if (iter->ent)
4075 		trace_iterator_increment(iter);
4076 
4077 	return iter->ent ? iter : NULL;
4078 }
4079 
4080 static void trace_consume(struct trace_iterator *iter)
4081 {
4082 	ring_buffer_consume(iter->array_buffer->buffer, iter->cpu, &iter->ts,
4083 			    &iter->lost_events);
4084 }
4085 
4086 static void *s_next(struct seq_file *m, void *v, loff_t *pos)
4087 {
4088 	struct trace_iterator *iter = m->private;
4089 	int i = (int)*pos;
4090 	void *ent;
4091 
4092 	WARN_ON_ONCE(iter->leftover);
4093 
4094 	(*pos)++;
4095 
4096 	/* can't go backwards */
4097 	if (iter->idx > i)
4098 		return NULL;
4099 
4100 	if (iter->idx < 0)
4101 		ent = trace_find_next_entry_inc(iter);
4102 	else
4103 		ent = iter;
4104 
4105 	while (ent && iter->idx < i)
4106 		ent = trace_find_next_entry_inc(iter);
4107 
4108 	iter->pos = *pos;
4109 
4110 	return ent;
4111 }
4112 
4113 void tracing_iter_reset(struct trace_iterator *iter, int cpu)
4114 {
4115 	struct ring_buffer_iter *buf_iter;
4116 	unsigned long entries = 0;
4117 	u64 ts;
4118 
4119 	per_cpu_ptr(iter->array_buffer->data, cpu)->skipped_entries = 0;
4120 
4121 	buf_iter = trace_buffer_iter(iter, cpu);
4122 	if (!buf_iter)
4123 		return;
4124 
4125 	ring_buffer_iter_reset(buf_iter);
4126 
4127 	/*
4128 	 * We could have the case with the max latency tracers
4129 	 * that a reset never took place on a cpu. This is evident
4130 	 * by the timestamp being before the start of the buffer.
4131 	 */
4132 	while (ring_buffer_iter_peek(buf_iter, &ts)) {
4133 		if (ts >= iter->array_buffer->time_start)
4134 			break;
4135 		entries++;
4136 		ring_buffer_iter_advance(buf_iter);
4137 	}
4138 
4139 	per_cpu_ptr(iter->array_buffer->data, cpu)->skipped_entries = entries;
4140 }
4141 
4142 /*
4143  * The current tracer is copied to avoid a global locking
4144  * all around.
4145  */
4146 static void *s_start(struct seq_file *m, loff_t *pos)
4147 {
4148 	struct trace_iterator *iter = m->private;
4149 	struct trace_array *tr = iter->tr;
4150 	int cpu_file = iter->cpu_file;
4151 	void *p = NULL;
4152 	loff_t l = 0;
4153 	int cpu;
4154 
4155 	mutex_lock(&trace_types_lock);
4156 	if (unlikely(tr->current_trace != iter->trace)) {
4157 		/* Close iter->trace before switching to the new current tracer */
4158 		if (iter->trace->close)
4159 			iter->trace->close(iter);
4160 		iter->trace = tr->current_trace;
4161 		/* Reopen the new current tracer */
4162 		if (iter->trace->open)
4163 			iter->trace->open(iter);
4164 	}
4165 	mutex_unlock(&trace_types_lock);
4166 
4167 #ifdef CONFIG_TRACER_MAX_TRACE
4168 	if (iter->snapshot && iter->trace->use_max_tr)
4169 		return ERR_PTR(-EBUSY);
4170 #endif
4171 
4172 	if (*pos != iter->pos) {
4173 		iter->ent = NULL;
4174 		iter->cpu = 0;
4175 		iter->idx = -1;
4176 
4177 		if (cpu_file == RING_BUFFER_ALL_CPUS) {
4178 			for_each_tracing_cpu(cpu)
4179 				tracing_iter_reset(iter, cpu);
4180 		} else
4181 			tracing_iter_reset(iter, cpu_file);
4182 
4183 		iter->leftover = 0;
4184 		for (p = iter; p && l < *pos; p = s_next(m, p, &l))
4185 			;
4186 
4187 	} else {
4188 		/*
4189 		 * If we overflowed the seq_file before, then we want
4190 		 * to just reuse the trace_seq buffer again.
4191 		 */
4192 		if (iter->leftover)
4193 			p = iter;
4194 		else {
4195 			l = *pos - 1;
4196 			p = s_next(m, p, &l);
4197 		}
4198 	}
4199 
4200 	trace_event_read_lock();
4201 	trace_access_lock(cpu_file);
4202 	return p;
4203 }
4204 
4205 static void s_stop(struct seq_file *m, void *p)
4206 {
4207 	struct trace_iterator *iter = m->private;
4208 
4209 #ifdef CONFIG_TRACER_MAX_TRACE
4210 	if (iter->snapshot && iter->trace->use_max_tr)
4211 		return;
4212 #endif
4213 
4214 	trace_access_unlock(iter->cpu_file);
4215 	trace_event_read_unlock();
4216 }
4217 
4218 static void
4219 get_total_entries_cpu(struct array_buffer *buf, unsigned long *total,
4220 		      unsigned long *entries, int cpu)
4221 {
4222 	unsigned long count;
4223 
4224 	count = ring_buffer_entries_cpu(buf->buffer, cpu);
4225 	/*
4226 	 * If this buffer has skipped entries, then we hold all
4227 	 * entries for the trace and we need to ignore the
4228 	 * ones before the time stamp.
4229 	 */
4230 	if (per_cpu_ptr(buf->data, cpu)->skipped_entries) {
4231 		count -= per_cpu_ptr(buf->data, cpu)->skipped_entries;
4232 		/* total is the same as the entries */
4233 		*total = count;
4234 	} else
4235 		*total = count +
4236 			ring_buffer_overrun_cpu(buf->buffer, cpu);
4237 	*entries = count;
4238 }
4239 
4240 static void
4241 get_total_entries(struct array_buffer *buf,
4242 		  unsigned long *total, unsigned long *entries)
4243 {
4244 	unsigned long t, e;
4245 	int cpu;
4246 
4247 	*total = 0;
4248 	*entries = 0;
4249 
4250 	for_each_tracing_cpu(cpu) {
4251 		get_total_entries_cpu(buf, &t, &e, cpu);
4252 		*total += t;
4253 		*entries += e;
4254 	}
4255 }
4256 
4257 unsigned long trace_total_entries_cpu(struct trace_array *tr, int cpu)
4258 {
4259 	unsigned long total, entries;
4260 
4261 	if (!tr)
4262 		tr = &global_trace;
4263 
4264 	get_total_entries_cpu(&tr->array_buffer, &total, &entries, cpu);
4265 
4266 	return entries;
4267 }
4268 
4269 unsigned long trace_total_entries(struct trace_array *tr)
4270 {
4271 	unsigned long total, entries;
4272 
4273 	if (!tr)
4274 		tr = &global_trace;
4275 
4276 	get_total_entries(&tr->array_buffer, &total, &entries);
4277 
4278 	return entries;
4279 }
4280 
4281 static void print_lat_help_header(struct seq_file *m)
4282 {
4283 	seq_puts(m, "#                    _------=> CPU#            \n"
4284 		    "#                   / _-----=> irqs-off/BH-disabled\n"
4285 		    "#                  | / _----=> need-resched    \n"
4286 		    "#                  || / _---=> hardirq/softirq \n"
4287 		    "#                  ||| / _--=> preempt-depth   \n"
4288 		    "#                  |||| / _-=> migrate-disable \n"
4289 		    "#                  ||||| /     delay           \n"
4290 		    "#  cmd     pid     |||||| time  |   caller     \n"
4291 		    "#     \\   /        ||||||  \\    |    /       \n");
4292 }
4293 
4294 static void print_event_info(struct array_buffer *buf, struct seq_file *m)
4295 {
4296 	unsigned long total;
4297 	unsigned long entries;
4298 
4299 	get_total_entries(buf, &total, &entries);
4300 	seq_printf(m, "# entries-in-buffer/entries-written: %lu/%lu   #P:%d\n",
4301 		   entries, total, num_online_cpus());
4302 	seq_puts(m, "#\n");
4303 }
4304 
4305 static void print_func_help_header(struct array_buffer *buf, struct seq_file *m,
4306 				   unsigned int flags)
4307 {
4308 	bool tgid = flags & TRACE_ITER_RECORD_TGID;
4309 
4310 	print_event_info(buf, m);
4311 
4312 	seq_printf(m, "#           TASK-PID    %s CPU#     TIMESTAMP  FUNCTION\n", tgid ? "   TGID   " : "");
4313 	seq_printf(m, "#              | |      %s   |         |         |\n",      tgid ? "     |    " : "");
4314 }
4315 
4316 static void print_func_help_header_irq(struct array_buffer *buf, struct seq_file *m,
4317 				       unsigned int flags)
4318 {
4319 	bool tgid = flags & TRACE_ITER_RECORD_TGID;
4320 	static const char space[] = "            ";
4321 	int prec = tgid ? 12 : 2;
4322 
4323 	print_event_info(buf, m);
4324 
4325 	seq_printf(m, "#                            %.*s  _-----=> irqs-off/BH-disabled\n", prec, space);
4326 	seq_printf(m, "#                            %.*s / _----=> need-resched\n", prec, space);
4327 	seq_printf(m, "#                            %.*s| / _---=> hardirq/softirq\n", prec, space);
4328 	seq_printf(m, "#                            %.*s|| / _--=> preempt-depth\n", prec, space);
4329 	seq_printf(m, "#                            %.*s||| / _-=> migrate-disable\n", prec, space);
4330 	seq_printf(m, "#                            %.*s|||| /     delay\n", prec, space);
4331 	seq_printf(m, "#           TASK-PID  %.*s CPU#  |||||  TIMESTAMP  FUNCTION\n", prec, "     TGID   ");
4332 	seq_printf(m, "#              | |    %.*s   |   |||||     |         |\n", prec, "       |    ");
4333 }
4334 
4335 void
4336 print_trace_header(struct seq_file *m, struct trace_iterator *iter)
4337 {
4338 	unsigned long sym_flags = (global_trace.trace_flags & TRACE_ITER_SYM_MASK);
4339 	struct array_buffer *buf = iter->array_buffer;
4340 	struct trace_array_cpu *data = per_cpu_ptr(buf->data, buf->cpu);
4341 	struct tracer *type = iter->trace;
4342 	unsigned long entries;
4343 	unsigned long total;
4344 	const char *name = type->name;
4345 
4346 	get_total_entries(buf, &total, &entries);
4347 
4348 	seq_printf(m, "# %s latency trace v1.1.5 on %s\n",
4349 		   name, UTS_RELEASE);
4350 	seq_puts(m, "# -----------------------------------"
4351 		 "---------------------------------\n");
4352 	seq_printf(m, "# latency: %lu us, #%lu/%lu, CPU#%d |"
4353 		   " (M:%s VP:%d, KP:%d, SP:%d HP:%d",
4354 		   nsecs_to_usecs(data->saved_latency),
4355 		   entries,
4356 		   total,
4357 		   buf->cpu,
4358 		   preempt_model_none()      ? "server" :
4359 		   preempt_model_voluntary() ? "desktop" :
4360 		   preempt_model_full()      ? "preempt" :
4361 		   preempt_model_rt()        ? "preempt_rt" :
4362 		   "unknown",
4363 		   /* These are reserved for later use */
4364 		   0, 0, 0, 0);
4365 #ifdef CONFIG_SMP
4366 	seq_printf(m, " #P:%d)\n", num_online_cpus());
4367 #else
4368 	seq_puts(m, ")\n");
4369 #endif
4370 	seq_puts(m, "#    -----------------\n");
4371 	seq_printf(m, "#    | task: %.16s-%d "
4372 		   "(uid:%d nice:%ld policy:%ld rt_prio:%ld)\n",
4373 		   data->comm, data->pid,
4374 		   from_kuid_munged(seq_user_ns(m), data->uid), data->nice,
4375 		   data->policy, data->rt_priority);
4376 	seq_puts(m, "#    -----------------\n");
4377 
4378 	if (data->critical_start) {
4379 		seq_puts(m, "#  => started at: ");
4380 		seq_print_ip_sym(&iter->seq, data->critical_start, sym_flags);
4381 		trace_print_seq(m, &iter->seq);
4382 		seq_puts(m, "\n#  => ended at:   ");
4383 		seq_print_ip_sym(&iter->seq, data->critical_end, sym_flags);
4384 		trace_print_seq(m, &iter->seq);
4385 		seq_puts(m, "\n#\n");
4386 	}
4387 
4388 	seq_puts(m, "#\n");
4389 }
4390 
4391 static void test_cpu_buff_start(struct trace_iterator *iter)
4392 {
4393 	struct trace_seq *s = &iter->seq;
4394 	struct trace_array *tr = iter->tr;
4395 
4396 	if (!(tr->trace_flags & TRACE_ITER_ANNOTATE))
4397 		return;
4398 
4399 	if (!(iter->iter_flags & TRACE_FILE_ANNOTATE))
4400 		return;
4401 
4402 	if (cpumask_available(iter->started) &&
4403 	    cpumask_test_cpu(iter->cpu, iter->started))
4404 		return;
4405 
4406 	if (per_cpu_ptr(iter->array_buffer->data, iter->cpu)->skipped_entries)
4407 		return;
4408 
4409 	if (cpumask_available(iter->started))
4410 		cpumask_set_cpu(iter->cpu, iter->started);
4411 
4412 	/* Don't print started cpu buffer for the first entry of the trace */
4413 	if (iter->idx > 1)
4414 		trace_seq_printf(s, "##### CPU %u buffer started ####\n",
4415 				iter->cpu);
4416 }
4417 
4418 static enum print_line_t print_trace_fmt(struct trace_iterator *iter)
4419 {
4420 	struct trace_array *tr = iter->tr;
4421 	struct trace_seq *s = &iter->seq;
4422 	unsigned long sym_flags = (tr->trace_flags & TRACE_ITER_SYM_MASK);
4423 	struct trace_entry *entry;
4424 	struct trace_event *event;
4425 
4426 	entry = iter->ent;
4427 
4428 	test_cpu_buff_start(iter);
4429 
4430 	event = ftrace_find_event(entry->type);
4431 
4432 	if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
4433 		if (iter->iter_flags & TRACE_FILE_LAT_FMT)
4434 			trace_print_lat_context(iter);
4435 		else
4436 			trace_print_context(iter);
4437 	}
4438 
4439 	if (trace_seq_has_overflowed(s))
4440 		return TRACE_TYPE_PARTIAL_LINE;
4441 
4442 	if (event) {
4443 		if (tr->trace_flags & TRACE_ITER_FIELDS)
4444 			return print_event_fields(iter, event);
4445 		return event->funcs->trace(iter, sym_flags, event);
4446 	}
4447 
4448 	trace_seq_printf(s, "Unknown type %d\n", entry->type);
4449 
4450 	return trace_handle_return(s);
4451 }
4452 
4453 static enum print_line_t print_raw_fmt(struct trace_iterator *iter)
4454 {
4455 	struct trace_array *tr = iter->tr;
4456 	struct trace_seq *s = &iter->seq;
4457 	struct trace_entry *entry;
4458 	struct trace_event *event;
4459 
4460 	entry = iter->ent;
4461 
4462 	if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO)
4463 		trace_seq_printf(s, "%d %d %llu ",
4464 				 entry->pid, iter->cpu, iter->ts);
4465 
4466 	if (trace_seq_has_overflowed(s))
4467 		return TRACE_TYPE_PARTIAL_LINE;
4468 
4469 	event = ftrace_find_event(entry->type);
4470 	if (event)
4471 		return event->funcs->raw(iter, 0, event);
4472 
4473 	trace_seq_printf(s, "%d ?\n", entry->type);
4474 
4475 	return trace_handle_return(s);
4476 }
4477 
4478 static enum print_line_t print_hex_fmt(struct trace_iterator *iter)
4479 {
4480 	struct trace_array *tr = iter->tr;
4481 	struct trace_seq *s = &iter->seq;
4482 	unsigned char newline = '\n';
4483 	struct trace_entry *entry;
4484 	struct trace_event *event;
4485 
4486 	entry = iter->ent;
4487 
4488 	if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
4489 		SEQ_PUT_HEX_FIELD(s, entry->pid);
4490 		SEQ_PUT_HEX_FIELD(s, iter->cpu);
4491 		SEQ_PUT_HEX_FIELD(s, iter->ts);
4492 		if (trace_seq_has_overflowed(s))
4493 			return TRACE_TYPE_PARTIAL_LINE;
4494 	}
4495 
4496 	event = ftrace_find_event(entry->type);
4497 	if (event) {
4498 		enum print_line_t ret = event->funcs->hex(iter, 0, event);
4499 		if (ret != TRACE_TYPE_HANDLED)
4500 			return ret;
4501 	}
4502 
4503 	SEQ_PUT_FIELD(s, newline);
4504 
4505 	return trace_handle_return(s);
4506 }
4507 
4508 static enum print_line_t print_bin_fmt(struct trace_iterator *iter)
4509 {
4510 	struct trace_array *tr = iter->tr;
4511 	struct trace_seq *s = &iter->seq;
4512 	struct trace_entry *entry;
4513 	struct trace_event *event;
4514 
4515 	entry = iter->ent;
4516 
4517 	if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
4518 		SEQ_PUT_FIELD(s, entry->pid);
4519 		SEQ_PUT_FIELD(s, iter->cpu);
4520 		SEQ_PUT_FIELD(s, iter->ts);
4521 		if (trace_seq_has_overflowed(s))
4522 			return TRACE_TYPE_PARTIAL_LINE;
4523 	}
4524 
4525 	event = ftrace_find_event(entry->type);
4526 	return event ? event->funcs->binary(iter, 0, event) :
4527 		TRACE_TYPE_HANDLED;
4528 }
4529 
4530 int trace_empty(struct trace_iterator *iter)
4531 {
4532 	struct ring_buffer_iter *buf_iter;
4533 	int cpu;
4534 
4535 	/* If we are looking at one CPU buffer, only check that one */
4536 	if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
4537 		cpu = iter->cpu_file;
4538 		buf_iter = trace_buffer_iter(iter, cpu);
4539 		if (buf_iter) {
4540 			if (!ring_buffer_iter_empty(buf_iter))
4541 				return 0;
4542 		} else {
4543 			if (!ring_buffer_empty_cpu(iter->array_buffer->buffer, cpu))
4544 				return 0;
4545 		}
4546 		return 1;
4547 	}
4548 
4549 	for_each_tracing_cpu(cpu) {
4550 		buf_iter = trace_buffer_iter(iter, cpu);
4551 		if (buf_iter) {
4552 			if (!ring_buffer_iter_empty(buf_iter))
4553 				return 0;
4554 		} else {
4555 			if (!ring_buffer_empty_cpu(iter->array_buffer->buffer, cpu))
4556 				return 0;
4557 		}
4558 	}
4559 
4560 	return 1;
4561 }
4562 
4563 /*  Called with trace_event_read_lock() held. */
4564 enum print_line_t print_trace_line(struct trace_iterator *iter)
4565 {
4566 	struct trace_array *tr = iter->tr;
4567 	unsigned long trace_flags = tr->trace_flags;
4568 	enum print_line_t ret;
4569 
4570 	if (iter->lost_events) {
4571 		if (iter->lost_events == (unsigned long)-1)
4572 			trace_seq_printf(&iter->seq, "CPU:%d [LOST EVENTS]\n",
4573 					 iter->cpu);
4574 		else
4575 			trace_seq_printf(&iter->seq, "CPU:%d [LOST %lu EVENTS]\n",
4576 					 iter->cpu, iter->lost_events);
4577 		if (trace_seq_has_overflowed(&iter->seq))
4578 			return TRACE_TYPE_PARTIAL_LINE;
4579 	}
4580 
4581 	if (iter->trace && iter->trace->print_line) {
4582 		ret = iter->trace->print_line(iter);
4583 		if (ret != TRACE_TYPE_UNHANDLED)
4584 			return ret;
4585 	}
4586 
4587 	if (iter->ent->type == TRACE_BPUTS &&
4588 			trace_flags & TRACE_ITER_PRINTK &&
4589 			trace_flags & TRACE_ITER_PRINTK_MSGONLY)
4590 		return trace_print_bputs_msg_only(iter);
4591 
4592 	if (iter->ent->type == TRACE_BPRINT &&
4593 			trace_flags & TRACE_ITER_PRINTK &&
4594 			trace_flags & TRACE_ITER_PRINTK_MSGONLY)
4595 		return trace_print_bprintk_msg_only(iter);
4596 
4597 	if (iter->ent->type == TRACE_PRINT &&
4598 			trace_flags & TRACE_ITER_PRINTK &&
4599 			trace_flags & TRACE_ITER_PRINTK_MSGONLY)
4600 		return trace_print_printk_msg_only(iter);
4601 
4602 	if (trace_flags & TRACE_ITER_BIN)
4603 		return print_bin_fmt(iter);
4604 
4605 	if (trace_flags & TRACE_ITER_HEX)
4606 		return print_hex_fmt(iter);
4607 
4608 	if (trace_flags & TRACE_ITER_RAW)
4609 		return print_raw_fmt(iter);
4610 
4611 	return print_trace_fmt(iter);
4612 }
4613 
4614 void trace_latency_header(struct seq_file *m)
4615 {
4616 	struct trace_iterator *iter = m->private;
4617 	struct trace_array *tr = iter->tr;
4618 
4619 	/* print nothing if the buffers are empty */
4620 	if (trace_empty(iter))
4621 		return;
4622 
4623 	if (iter->iter_flags & TRACE_FILE_LAT_FMT)
4624 		print_trace_header(m, iter);
4625 
4626 	if (!(tr->trace_flags & TRACE_ITER_VERBOSE))
4627 		print_lat_help_header(m);
4628 }
4629 
4630 void trace_default_header(struct seq_file *m)
4631 {
4632 	struct trace_iterator *iter = m->private;
4633 	struct trace_array *tr = iter->tr;
4634 	unsigned long trace_flags = tr->trace_flags;
4635 
4636 	if (!(trace_flags & TRACE_ITER_CONTEXT_INFO))
4637 		return;
4638 
4639 	if (iter->iter_flags & TRACE_FILE_LAT_FMT) {
4640 		/* print nothing if the buffers are empty */
4641 		if (trace_empty(iter))
4642 			return;
4643 		print_trace_header(m, iter);
4644 		if (!(trace_flags & TRACE_ITER_VERBOSE))
4645 			print_lat_help_header(m);
4646 	} else {
4647 		if (!(trace_flags & TRACE_ITER_VERBOSE)) {
4648 			if (trace_flags & TRACE_ITER_IRQ_INFO)
4649 				print_func_help_header_irq(iter->array_buffer,
4650 							   m, trace_flags);
4651 			else
4652 				print_func_help_header(iter->array_buffer, m,
4653 						       trace_flags);
4654 		}
4655 	}
4656 }
4657 
4658 static void test_ftrace_alive(struct seq_file *m)
4659 {
4660 	if (!ftrace_is_dead())
4661 		return;
4662 	seq_puts(m, "# WARNING: FUNCTION TRACING IS CORRUPTED\n"
4663 		    "#          MAY BE MISSING FUNCTION EVENTS\n");
4664 }
4665 
4666 #ifdef CONFIG_TRACER_MAX_TRACE
4667 static void show_snapshot_main_help(struct seq_file *m)
4668 {
4669 	seq_puts(m, "# echo 0 > snapshot : Clears and frees snapshot buffer\n"
4670 		    "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
4671 		    "#                      Takes a snapshot of the main buffer.\n"
4672 		    "# echo 2 > snapshot : Clears snapshot buffer (but does not allocate or free)\n"
4673 		    "#                      (Doesn't have to be '2' works with any number that\n"
4674 		    "#                       is not a '0' or '1')\n");
4675 }
4676 
4677 static void show_snapshot_percpu_help(struct seq_file *m)
4678 {
4679 	seq_puts(m, "# echo 0 > snapshot : Invalid for per_cpu snapshot file.\n");
4680 #ifdef CONFIG_RING_BUFFER_ALLOW_SWAP
4681 	seq_puts(m, "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
4682 		    "#                      Takes a snapshot of the main buffer for this cpu.\n");
4683 #else
4684 	seq_puts(m, "# echo 1 > snapshot : Not supported with this kernel.\n"
4685 		    "#                     Must use main snapshot file to allocate.\n");
4686 #endif
4687 	seq_puts(m, "# echo 2 > snapshot : Clears this cpu's snapshot buffer (but does not allocate)\n"
4688 		    "#                      (Doesn't have to be '2' works with any number that\n"
4689 		    "#                       is not a '0' or '1')\n");
4690 }
4691 
4692 static void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter)
4693 {
4694 	if (iter->tr->allocated_snapshot)
4695 		seq_puts(m, "#\n# * Snapshot is allocated *\n#\n");
4696 	else
4697 		seq_puts(m, "#\n# * Snapshot is freed *\n#\n");
4698 
4699 	seq_puts(m, "# Snapshot commands:\n");
4700 	if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
4701 		show_snapshot_main_help(m);
4702 	else
4703 		show_snapshot_percpu_help(m);
4704 }
4705 #else
4706 /* Should never be called */
4707 static inline void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter) { }
4708 #endif
4709 
4710 static int s_show(struct seq_file *m, void *v)
4711 {
4712 	struct trace_iterator *iter = v;
4713 	int ret;
4714 
4715 	if (iter->ent == NULL) {
4716 		if (iter->tr) {
4717 			seq_printf(m, "# tracer: %s\n", iter->trace->name);
4718 			seq_puts(m, "#\n");
4719 			test_ftrace_alive(m);
4720 		}
4721 		if (iter->snapshot && trace_empty(iter))
4722 			print_snapshot_help(m, iter);
4723 		else if (iter->trace && iter->trace->print_header)
4724 			iter->trace->print_header(m);
4725 		else
4726 			trace_default_header(m);
4727 
4728 	} else if (iter->leftover) {
4729 		/*
4730 		 * If we filled the seq_file buffer earlier, we
4731 		 * want to just show it now.
4732 		 */
4733 		ret = trace_print_seq(m, &iter->seq);
4734 
4735 		/* ret should this time be zero, but you never know */
4736 		iter->leftover = ret;
4737 
4738 	} else {
4739 		ret = print_trace_line(iter);
4740 		if (ret == TRACE_TYPE_PARTIAL_LINE) {
4741 			iter->seq.full = 0;
4742 			trace_seq_puts(&iter->seq, "[LINE TOO BIG]\n");
4743 		}
4744 		ret = trace_print_seq(m, &iter->seq);
4745 		/*
4746 		 * If we overflow the seq_file buffer, then it will
4747 		 * ask us for this data again at start up.
4748 		 * Use that instead.
4749 		 *  ret is 0 if seq_file write succeeded.
4750 		 *        -1 otherwise.
4751 		 */
4752 		iter->leftover = ret;
4753 	}
4754 
4755 	return 0;
4756 }
4757 
4758 /*
4759  * Should be used after trace_array_get(), trace_types_lock
4760  * ensures that i_cdev was already initialized.
4761  */
4762 static inline int tracing_get_cpu(struct inode *inode)
4763 {
4764 	if (inode->i_cdev) /* See trace_create_cpu_file() */
4765 		return (long)inode->i_cdev - 1;
4766 	return RING_BUFFER_ALL_CPUS;
4767 }
4768 
4769 static const struct seq_operations tracer_seq_ops = {
4770 	.start		= s_start,
4771 	.next		= s_next,
4772 	.stop		= s_stop,
4773 	.show		= s_show,
4774 };
4775 
4776 /*
4777  * Note, as iter itself can be allocated and freed in different
4778  * ways, this function is only used to free its content, and not
4779  * the iterator itself. The only requirement to all the allocations
4780  * is that it must zero all fields (kzalloc), as freeing works with
4781  * ethier allocated content or NULL.
4782  */
4783 static void free_trace_iter_content(struct trace_iterator *iter)
4784 {
4785 	/* The fmt is either NULL, allocated or points to static_fmt_buf */
4786 	if (iter->fmt != static_fmt_buf)
4787 		kfree(iter->fmt);
4788 
4789 	kfree(iter->temp);
4790 	kfree(iter->buffer_iter);
4791 	mutex_destroy(&iter->mutex);
4792 	free_cpumask_var(iter->started);
4793 }
4794 
4795 static struct trace_iterator *
4796 __tracing_open(struct inode *inode, struct file *file, bool snapshot)
4797 {
4798 	struct trace_array *tr = inode->i_private;
4799 	struct trace_iterator *iter;
4800 	int cpu;
4801 
4802 	if (tracing_disabled)
4803 		return ERR_PTR(-ENODEV);
4804 
4805 	iter = __seq_open_private(file, &tracer_seq_ops, sizeof(*iter));
4806 	if (!iter)
4807 		return ERR_PTR(-ENOMEM);
4808 
4809 	iter->buffer_iter = kcalloc(nr_cpu_ids, sizeof(*iter->buffer_iter),
4810 				    GFP_KERNEL);
4811 	if (!iter->buffer_iter)
4812 		goto release;
4813 
4814 	/*
4815 	 * trace_find_next_entry() may need to save off iter->ent.
4816 	 * It will place it into the iter->temp buffer. As most
4817 	 * events are less than 128, allocate a buffer of that size.
4818 	 * If one is greater, then trace_find_next_entry() will
4819 	 * allocate a new buffer to adjust for the bigger iter->ent.
4820 	 * It's not critical if it fails to get allocated here.
4821 	 */
4822 	iter->temp = kmalloc(128, GFP_KERNEL);
4823 	if (iter->temp)
4824 		iter->temp_size = 128;
4825 
4826 	/*
4827 	 * trace_event_printf() may need to modify given format
4828 	 * string to replace %p with %px so that it shows real address
4829 	 * instead of hash value. However, that is only for the event
4830 	 * tracing, other tracer may not need. Defer the allocation
4831 	 * until it is needed.
4832 	 */
4833 	iter->fmt = NULL;
4834 	iter->fmt_size = 0;
4835 
4836 	mutex_lock(&trace_types_lock);
4837 	iter->trace = tr->current_trace;
4838 
4839 	if (!zalloc_cpumask_var(&iter->started, GFP_KERNEL))
4840 		goto fail;
4841 
4842 	iter->tr = tr;
4843 
4844 #ifdef CONFIG_TRACER_MAX_TRACE
4845 	/* Currently only the top directory has a snapshot */
4846 	if (tr->current_trace->print_max || snapshot)
4847 		iter->array_buffer = &tr->max_buffer;
4848 	else
4849 #endif
4850 		iter->array_buffer = &tr->array_buffer;
4851 	iter->snapshot = snapshot;
4852 	iter->pos = -1;
4853 	iter->cpu_file = tracing_get_cpu(inode);
4854 	mutex_init(&iter->mutex);
4855 
4856 	/* Notify the tracer early; before we stop tracing. */
4857 	if (iter->trace->open)
4858 		iter->trace->open(iter);
4859 
4860 	/* Annotate start of buffers if we had overruns */
4861 	if (ring_buffer_overruns(iter->array_buffer->buffer))
4862 		iter->iter_flags |= TRACE_FILE_ANNOTATE;
4863 
4864 	/* Output in nanoseconds only if we are using a clock in nanoseconds. */
4865 	if (trace_clocks[tr->clock_id].in_ns)
4866 		iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
4867 
4868 	/*
4869 	 * If pause-on-trace is enabled, then stop the trace while
4870 	 * dumping, unless this is the "snapshot" file
4871 	 */
4872 	if (!iter->snapshot && (tr->trace_flags & TRACE_ITER_PAUSE_ON_TRACE))
4873 		tracing_stop_tr(tr);
4874 
4875 	if (iter->cpu_file == RING_BUFFER_ALL_CPUS) {
4876 		for_each_tracing_cpu(cpu) {
4877 			iter->buffer_iter[cpu] =
4878 				ring_buffer_read_prepare(iter->array_buffer->buffer,
4879 							 cpu, GFP_KERNEL);
4880 		}
4881 		ring_buffer_read_prepare_sync();
4882 		for_each_tracing_cpu(cpu) {
4883 			ring_buffer_read_start(iter->buffer_iter[cpu]);
4884 			tracing_iter_reset(iter, cpu);
4885 		}
4886 	} else {
4887 		cpu = iter->cpu_file;
4888 		iter->buffer_iter[cpu] =
4889 			ring_buffer_read_prepare(iter->array_buffer->buffer,
4890 						 cpu, GFP_KERNEL);
4891 		ring_buffer_read_prepare_sync();
4892 		ring_buffer_read_start(iter->buffer_iter[cpu]);
4893 		tracing_iter_reset(iter, cpu);
4894 	}
4895 
4896 	mutex_unlock(&trace_types_lock);
4897 
4898 	return iter;
4899 
4900  fail:
4901 	mutex_unlock(&trace_types_lock);
4902 	free_trace_iter_content(iter);
4903 release:
4904 	seq_release_private(inode, file);
4905 	return ERR_PTR(-ENOMEM);
4906 }
4907 
4908 int tracing_open_generic(struct inode *inode, struct file *filp)
4909 {
4910 	int ret;
4911 
4912 	ret = tracing_check_open_get_tr(NULL);
4913 	if (ret)
4914 		return ret;
4915 
4916 	filp->private_data = inode->i_private;
4917 	return 0;
4918 }
4919 
4920 bool tracing_is_disabled(void)
4921 {
4922 	return (tracing_disabled) ? true: false;
4923 }
4924 
4925 /*
4926  * Open and update trace_array ref count.
4927  * Must have the current trace_array passed to it.
4928  */
4929 int tracing_open_generic_tr(struct inode *inode, struct file *filp)
4930 {
4931 	struct trace_array *tr = inode->i_private;
4932 	int ret;
4933 
4934 	ret = tracing_check_open_get_tr(tr);
4935 	if (ret)
4936 		return ret;
4937 
4938 	filp->private_data = inode->i_private;
4939 
4940 	return 0;
4941 }
4942 
4943 /*
4944  * The private pointer of the inode is the trace_event_file.
4945  * Update the tr ref count associated to it.
4946  */
4947 int tracing_open_file_tr(struct inode *inode, struct file *filp)
4948 {
4949 	struct trace_event_file *file = inode->i_private;
4950 	int ret;
4951 
4952 	ret = tracing_check_open_get_tr(file->tr);
4953 	if (ret)
4954 		return ret;
4955 
4956 	mutex_lock(&event_mutex);
4957 
4958 	/* Fail if the file is marked for removal */
4959 	if (file->flags & EVENT_FILE_FL_FREED) {
4960 		trace_array_put(file->tr);
4961 		ret = -ENODEV;
4962 	} else {
4963 		event_file_get(file);
4964 	}
4965 
4966 	mutex_unlock(&event_mutex);
4967 	if (ret)
4968 		return ret;
4969 
4970 	filp->private_data = inode->i_private;
4971 
4972 	return 0;
4973 }
4974 
4975 int tracing_release_file_tr(struct inode *inode, struct file *filp)
4976 {
4977 	struct trace_event_file *file = inode->i_private;
4978 
4979 	trace_array_put(file->tr);
4980 	event_file_put(file);
4981 
4982 	return 0;
4983 }
4984 
4985 int tracing_single_release_file_tr(struct inode *inode, struct file *filp)
4986 {
4987 	tracing_release_file_tr(inode, filp);
4988 	return single_release(inode, filp);
4989 }
4990 
4991 static int tracing_mark_open(struct inode *inode, struct file *filp)
4992 {
4993 	stream_open(inode, filp);
4994 	return tracing_open_generic_tr(inode, filp);
4995 }
4996 
4997 static int tracing_release(struct inode *inode, struct file *file)
4998 {
4999 	struct trace_array *tr = inode->i_private;
5000 	struct seq_file *m = file->private_data;
5001 	struct trace_iterator *iter;
5002 	int cpu;
5003 
5004 	if (!(file->f_mode & FMODE_READ)) {
5005 		trace_array_put(tr);
5006 		return 0;
5007 	}
5008 
5009 	/* Writes do not use seq_file */
5010 	iter = m->private;
5011 	mutex_lock(&trace_types_lock);
5012 
5013 	for_each_tracing_cpu(cpu) {
5014 		if (iter->buffer_iter[cpu])
5015 			ring_buffer_read_finish(iter->buffer_iter[cpu]);
5016 	}
5017 
5018 	if (iter->trace && iter->trace->close)
5019 		iter->trace->close(iter);
5020 
5021 	if (!iter->snapshot && tr->stop_count)
5022 		/* reenable tracing if it was previously enabled */
5023 		tracing_start_tr(tr);
5024 
5025 	__trace_array_put(tr);
5026 
5027 	mutex_unlock(&trace_types_lock);
5028 
5029 	free_trace_iter_content(iter);
5030 	seq_release_private(inode, file);
5031 
5032 	return 0;
5033 }
5034 
5035 static int tracing_release_generic_tr(struct inode *inode, struct file *file)
5036 {
5037 	struct trace_array *tr = inode->i_private;
5038 
5039 	trace_array_put(tr);
5040 	return 0;
5041 }
5042 
5043 static int tracing_single_release_tr(struct inode *inode, struct file *file)
5044 {
5045 	struct trace_array *tr = inode->i_private;
5046 
5047 	trace_array_put(tr);
5048 
5049 	return single_release(inode, file);
5050 }
5051 
5052 static int tracing_open(struct inode *inode, struct file *file)
5053 {
5054 	struct trace_array *tr = inode->i_private;
5055 	struct trace_iterator *iter;
5056 	int ret;
5057 
5058 	ret = tracing_check_open_get_tr(tr);
5059 	if (ret)
5060 		return ret;
5061 
5062 	/* If this file was open for write, then erase contents */
5063 	if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC)) {
5064 		int cpu = tracing_get_cpu(inode);
5065 		struct array_buffer *trace_buf = &tr->array_buffer;
5066 
5067 #ifdef CONFIG_TRACER_MAX_TRACE
5068 		if (tr->current_trace->print_max)
5069 			trace_buf = &tr->max_buffer;
5070 #endif
5071 
5072 		if (cpu == RING_BUFFER_ALL_CPUS)
5073 			tracing_reset_online_cpus(trace_buf);
5074 		else
5075 			tracing_reset_cpu(trace_buf, cpu);
5076 	}
5077 
5078 	if (file->f_mode & FMODE_READ) {
5079 		iter = __tracing_open(inode, file, false);
5080 		if (IS_ERR(iter))
5081 			ret = PTR_ERR(iter);
5082 		else if (tr->trace_flags & TRACE_ITER_LATENCY_FMT)
5083 			iter->iter_flags |= TRACE_FILE_LAT_FMT;
5084 	}
5085 
5086 	if (ret < 0)
5087 		trace_array_put(tr);
5088 
5089 	return ret;
5090 }
5091 
5092 /*
5093  * Some tracers are not suitable for instance buffers.
5094  * A tracer is always available for the global array (toplevel)
5095  * or if it explicitly states that it is.
5096  */
5097 static bool
5098 trace_ok_for_array(struct tracer *t, struct trace_array *tr)
5099 {
5100 	return (tr->flags & TRACE_ARRAY_FL_GLOBAL) || t->allow_instances;
5101 }
5102 
5103 /* Find the next tracer that this trace array may use */
5104 static struct tracer *
5105 get_tracer_for_array(struct trace_array *tr, struct tracer *t)
5106 {
5107 	while (t && !trace_ok_for_array(t, tr))
5108 		t = t->next;
5109 
5110 	return t;
5111 }
5112 
5113 static void *
5114 t_next(struct seq_file *m, void *v, loff_t *pos)
5115 {
5116 	struct trace_array *tr = m->private;
5117 	struct tracer *t = v;
5118 
5119 	(*pos)++;
5120 
5121 	if (t)
5122 		t = get_tracer_for_array(tr, t->next);
5123 
5124 	return t;
5125 }
5126 
5127 static void *t_start(struct seq_file *m, loff_t *pos)
5128 {
5129 	struct trace_array *tr = m->private;
5130 	struct tracer *t;
5131 	loff_t l = 0;
5132 
5133 	mutex_lock(&trace_types_lock);
5134 
5135 	t = get_tracer_for_array(tr, trace_types);
5136 	for (; t && l < *pos; t = t_next(m, t, &l))
5137 			;
5138 
5139 	return t;
5140 }
5141 
5142 static void t_stop(struct seq_file *m, void *p)
5143 {
5144 	mutex_unlock(&trace_types_lock);
5145 }
5146 
5147 static int t_show(struct seq_file *m, void *v)
5148 {
5149 	struct tracer *t = v;
5150 
5151 	if (!t)
5152 		return 0;
5153 
5154 	seq_puts(m, t->name);
5155 	if (t->next)
5156 		seq_putc(m, ' ');
5157 	else
5158 		seq_putc(m, '\n');
5159 
5160 	return 0;
5161 }
5162 
5163 static const struct seq_operations show_traces_seq_ops = {
5164 	.start		= t_start,
5165 	.next		= t_next,
5166 	.stop		= t_stop,
5167 	.show		= t_show,
5168 };
5169 
5170 static int show_traces_open(struct inode *inode, struct file *file)
5171 {
5172 	struct trace_array *tr = inode->i_private;
5173 	struct seq_file *m;
5174 	int ret;
5175 
5176 	ret = tracing_check_open_get_tr(tr);
5177 	if (ret)
5178 		return ret;
5179 
5180 	ret = seq_open(file, &show_traces_seq_ops);
5181 	if (ret) {
5182 		trace_array_put(tr);
5183 		return ret;
5184 	}
5185 
5186 	m = file->private_data;
5187 	m->private = tr;
5188 
5189 	return 0;
5190 }
5191 
5192 static int show_traces_release(struct inode *inode, struct file *file)
5193 {
5194 	struct trace_array *tr = inode->i_private;
5195 
5196 	trace_array_put(tr);
5197 	return seq_release(inode, file);
5198 }
5199 
5200 static ssize_t
5201 tracing_write_stub(struct file *filp, const char __user *ubuf,
5202 		   size_t count, loff_t *ppos)
5203 {
5204 	return count;
5205 }
5206 
5207 loff_t tracing_lseek(struct file *file, loff_t offset, int whence)
5208 {
5209 	int ret;
5210 
5211 	if (file->f_mode & FMODE_READ)
5212 		ret = seq_lseek(file, offset, whence);
5213 	else
5214 		file->f_pos = ret = 0;
5215 
5216 	return ret;
5217 }
5218 
5219 static const struct file_operations tracing_fops = {
5220 	.open		= tracing_open,
5221 	.read		= seq_read,
5222 	.read_iter	= seq_read_iter,
5223 	.splice_read	= copy_splice_read,
5224 	.write		= tracing_write_stub,
5225 	.llseek		= tracing_lseek,
5226 	.release	= tracing_release,
5227 };
5228 
5229 static const struct file_operations show_traces_fops = {
5230 	.open		= show_traces_open,
5231 	.read		= seq_read,
5232 	.llseek		= seq_lseek,
5233 	.release	= show_traces_release,
5234 };
5235 
5236 static ssize_t
5237 tracing_cpumask_read(struct file *filp, char __user *ubuf,
5238 		     size_t count, loff_t *ppos)
5239 {
5240 	struct trace_array *tr = file_inode(filp)->i_private;
5241 	char *mask_str;
5242 	int len;
5243 
5244 	len = snprintf(NULL, 0, "%*pb\n",
5245 		       cpumask_pr_args(tr->tracing_cpumask)) + 1;
5246 	mask_str = kmalloc(len, GFP_KERNEL);
5247 	if (!mask_str)
5248 		return -ENOMEM;
5249 
5250 	len = snprintf(mask_str, len, "%*pb\n",
5251 		       cpumask_pr_args(tr->tracing_cpumask));
5252 	if (len >= count) {
5253 		count = -EINVAL;
5254 		goto out_err;
5255 	}
5256 	count = simple_read_from_buffer(ubuf, count, ppos, mask_str, len);
5257 
5258 out_err:
5259 	kfree(mask_str);
5260 
5261 	return count;
5262 }
5263 
5264 int tracing_set_cpumask(struct trace_array *tr,
5265 			cpumask_var_t tracing_cpumask_new)
5266 {
5267 	int cpu;
5268 
5269 	if (!tr)
5270 		return -EINVAL;
5271 
5272 	local_irq_disable();
5273 	arch_spin_lock(&tr->max_lock);
5274 	for_each_tracing_cpu(cpu) {
5275 		/*
5276 		 * Increase/decrease the disabled counter if we are
5277 		 * about to flip a bit in the cpumask:
5278 		 */
5279 		if (cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
5280 				!cpumask_test_cpu(cpu, tracing_cpumask_new)) {
5281 			atomic_inc(&per_cpu_ptr(tr->array_buffer.data, cpu)->disabled);
5282 			ring_buffer_record_disable_cpu(tr->array_buffer.buffer, cpu);
5283 #ifdef CONFIG_TRACER_MAX_TRACE
5284 			ring_buffer_record_disable_cpu(tr->max_buffer.buffer, cpu);
5285 #endif
5286 		}
5287 		if (!cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
5288 				cpumask_test_cpu(cpu, tracing_cpumask_new)) {
5289 			atomic_dec(&per_cpu_ptr(tr->array_buffer.data, cpu)->disabled);
5290 			ring_buffer_record_enable_cpu(tr->array_buffer.buffer, cpu);
5291 #ifdef CONFIG_TRACER_MAX_TRACE
5292 			ring_buffer_record_enable_cpu(tr->max_buffer.buffer, cpu);
5293 #endif
5294 		}
5295 	}
5296 	arch_spin_unlock(&tr->max_lock);
5297 	local_irq_enable();
5298 
5299 	cpumask_copy(tr->tracing_cpumask, tracing_cpumask_new);
5300 
5301 	return 0;
5302 }
5303 
5304 static ssize_t
5305 tracing_cpumask_write(struct file *filp, const char __user *ubuf,
5306 		      size_t count, loff_t *ppos)
5307 {
5308 	struct trace_array *tr = file_inode(filp)->i_private;
5309 	cpumask_var_t tracing_cpumask_new;
5310 	int err;
5311 
5312 	if (!zalloc_cpumask_var(&tracing_cpumask_new, GFP_KERNEL))
5313 		return -ENOMEM;
5314 
5315 	err = cpumask_parse_user(ubuf, count, tracing_cpumask_new);
5316 	if (err)
5317 		goto err_free;
5318 
5319 	err = tracing_set_cpumask(tr, tracing_cpumask_new);
5320 	if (err)
5321 		goto err_free;
5322 
5323 	free_cpumask_var(tracing_cpumask_new);
5324 
5325 	return count;
5326 
5327 err_free:
5328 	free_cpumask_var(tracing_cpumask_new);
5329 
5330 	return err;
5331 }
5332 
5333 static const struct file_operations tracing_cpumask_fops = {
5334 	.open		= tracing_open_generic_tr,
5335 	.read		= tracing_cpumask_read,
5336 	.write		= tracing_cpumask_write,
5337 	.release	= tracing_release_generic_tr,
5338 	.llseek		= generic_file_llseek,
5339 };
5340 
5341 static int tracing_trace_options_show(struct seq_file *m, void *v)
5342 {
5343 	struct tracer_opt *trace_opts;
5344 	struct trace_array *tr = m->private;
5345 	u32 tracer_flags;
5346 	int i;
5347 
5348 	mutex_lock(&trace_types_lock);
5349 	tracer_flags = tr->current_trace->flags->val;
5350 	trace_opts = tr->current_trace->flags->opts;
5351 
5352 	for (i = 0; trace_options[i]; i++) {
5353 		if (tr->trace_flags & (1 << i))
5354 			seq_printf(m, "%s\n", trace_options[i]);
5355 		else
5356 			seq_printf(m, "no%s\n", trace_options[i]);
5357 	}
5358 
5359 	for (i = 0; trace_opts[i].name; i++) {
5360 		if (tracer_flags & trace_opts[i].bit)
5361 			seq_printf(m, "%s\n", trace_opts[i].name);
5362 		else
5363 			seq_printf(m, "no%s\n", trace_opts[i].name);
5364 	}
5365 	mutex_unlock(&trace_types_lock);
5366 
5367 	return 0;
5368 }
5369 
5370 static int __set_tracer_option(struct trace_array *tr,
5371 			       struct tracer_flags *tracer_flags,
5372 			       struct tracer_opt *opts, int neg)
5373 {
5374 	struct tracer *trace = tracer_flags->trace;
5375 	int ret;
5376 
5377 	ret = trace->set_flag(tr, tracer_flags->val, opts->bit, !neg);
5378 	if (ret)
5379 		return ret;
5380 
5381 	if (neg)
5382 		tracer_flags->val &= ~opts->bit;
5383 	else
5384 		tracer_flags->val |= opts->bit;
5385 	return 0;
5386 }
5387 
5388 /* Try to assign a tracer specific option */
5389 static int set_tracer_option(struct trace_array *tr, char *cmp, int neg)
5390 {
5391 	struct tracer *trace = tr->current_trace;
5392 	struct tracer_flags *tracer_flags = trace->flags;
5393 	struct tracer_opt *opts = NULL;
5394 	int i;
5395 
5396 	for (i = 0; tracer_flags->opts[i].name; i++) {
5397 		opts = &tracer_flags->opts[i];
5398 
5399 		if (strcmp(cmp, opts->name) == 0)
5400 			return __set_tracer_option(tr, trace->flags, opts, neg);
5401 	}
5402 
5403 	return -EINVAL;
5404 }
5405 
5406 /* Some tracers require overwrite to stay enabled */
5407 int trace_keep_overwrite(struct tracer *tracer, u32 mask, int set)
5408 {
5409 	if (tracer->enabled && (mask & TRACE_ITER_OVERWRITE) && !set)
5410 		return -1;
5411 
5412 	return 0;
5413 }
5414 
5415 int set_tracer_flag(struct trace_array *tr, unsigned int mask, int enabled)
5416 {
5417 	int *map;
5418 
5419 	if ((mask == TRACE_ITER_RECORD_TGID) ||
5420 	    (mask == TRACE_ITER_RECORD_CMD))
5421 		lockdep_assert_held(&event_mutex);
5422 
5423 	/* do nothing if flag is already set */
5424 	if (!!(tr->trace_flags & mask) == !!enabled)
5425 		return 0;
5426 
5427 	/* Give the tracer a chance to approve the change */
5428 	if (tr->current_trace->flag_changed)
5429 		if (tr->current_trace->flag_changed(tr, mask, !!enabled))
5430 			return -EINVAL;
5431 
5432 	if (enabled)
5433 		tr->trace_flags |= mask;
5434 	else
5435 		tr->trace_flags &= ~mask;
5436 
5437 	if (mask == TRACE_ITER_RECORD_CMD)
5438 		trace_event_enable_cmd_record(enabled);
5439 
5440 	if (mask == TRACE_ITER_RECORD_TGID) {
5441 		if (!tgid_map) {
5442 			tgid_map_max = pid_max;
5443 			map = kvcalloc(tgid_map_max + 1, sizeof(*tgid_map),
5444 				       GFP_KERNEL);
5445 
5446 			/*
5447 			 * Pairs with smp_load_acquire() in
5448 			 * trace_find_tgid_ptr() to ensure that if it observes
5449 			 * the tgid_map we just allocated then it also observes
5450 			 * the corresponding tgid_map_max value.
5451 			 */
5452 			smp_store_release(&tgid_map, map);
5453 		}
5454 		if (!tgid_map) {
5455 			tr->trace_flags &= ~TRACE_ITER_RECORD_TGID;
5456 			return -ENOMEM;
5457 		}
5458 
5459 		trace_event_enable_tgid_record(enabled);
5460 	}
5461 
5462 	if (mask == TRACE_ITER_EVENT_FORK)
5463 		trace_event_follow_fork(tr, enabled);
5464 
5465 	if (mask == TRACE_ITER_FUNC_FORK)
5466 		ftrace_pid_follow_fork(tr, enabled);
5467 
5468 	if (mask == TRACE_ITER_OVERWRITE) {
5469 		ring_buffer_change_overwrite(tr->array_buffer.buffer, enabled);
5470 #ifdef CONFIG_TRACER_MAX_TRACE
5471 		ring_buffer_change_overwrite(tr->max_buffer.buffer, enabled);
5472 #endif
5473 	}
5474 
5475 	if (mask == TRACE_ITER_PRINTK) {
5476 		trace_printk_start_stop_comm(enabled);
5477 		trace_printk_control(enabled);
5478 	}
5479 
5480 	return 0;
5481 }
5482 
5483 int trace_set_options(struct trace_array *tr, char *option)
5484 {
5485 	char *cmp;
5486 	int neg = 0;
5487 	int ret;
5488 	size_t orig_len = strlen(option);
5489 	int len;
5490 
5491 	cmp = strstrip(option);
5492 
5493 	len = str_has_prefix(cmp, "no");
5494 	if (len)
5495 		neg = 1;
5496 
5497 	cmp += len;
5498 
5499 	mutex_lock(&event_mutex);
5500 	mutex_lock(&trace_types_lock);
5501 
5502 	ret = match_string(trace_options, -1, cmp);
5503 	/* If no option could be set, test the specific tracer options */
5504 	if (ret < 0)
5505 		ret = set_tracer_option(tr, cmp, neg);
5506 	else
5507 		ret = set_tracer_flag(tr, 1 << ret, !neg);
5508 
5509 	mutex_unlock(&trace_types_lock);
5510 	mutex_unlock(&event_mutex);
5511 
5512 	/*
5513 	 * If the first trailing whitespace is replaced with '\0' by strstrip,
5514 	 * turn it back into a space.
5515 	 */
5516 	if (orig_len > strlen(option))
5517 		option[strlen(option)] = ' ';
5518 
5519 	return ret;
5520 }
5521 
5522 static void __init apply_trace_boot_options(void)
5523 {
5524 	char *buf = trace_boot_options_buf;
5525 	char *option;
5526 
5527 	while (true) {
5528 		option = strsep(&buf, ",");
5529 
5530 		if (!option)
5531 			break;
5532 
5533 		if (*option)
5534 			trace_set_options(&global_trace, option);
5535 
5536 		/* Put back the comma to allow this to be called again */
5537 		if (buf)
5538 			*(buf - 1) = ',';
5539 	}
5540 }
5541 
5542 static ssize_t
5543 tracing_trace_options_write(struct file *filp, const char __user *ubuf,
5544 			size_t cnt, loff_t *ppos)
5545 {
5546 	struct seq_file *m = filp->private_data;
5547 	struct trace_array *tr = m->private;
5548 	char buf[64];
5549 	int ret;
5550 
5551 	if (cnt >= sizeof(buf))
5552 		return -EINVAL;
5553 
5554 	if (copy_from_user(buf, ubuf, cnt))
5555 		return -EFAULT;
5556 
5557 	buf[cnt] = 0;
5558 
5559 	ret = trace_set_options(tr, buf);
5560 	if (ret < 0)
5561 		return ret;
5562 
5563 	*ppos += cnt;
5564 
5565 	return cnt;
5566 }
5567 
5568 static int tracing_trace_options_open(struct inode *inode, struct file *file)
5569 {
5570 	struct trace_array *tr = inode->i_private;
5571 	int ret;
5572 
5573 	ret = tracing_check_open_get_tr(tr);
5574 	if (ret)
5575 		return ret;
5576 
5577 	ret = single_open(file, tracing_trace_options_show, inode->i_private);
5578 	if (ret < 0)
5579 		trace_array_put(tr);
5580 
5581 	return ret;
5582 }
5583 
5584 static const struct file_operations tracing_iter_fops = {
5585 	.open		= tracing_trace_options_open,
5586 	.read		= seq_read,
5587 	.llseek		= seq_lseek,
5588 	.release	= tracing_single_release_tr,
5589 	.write		= tracing_trace_options_write,
5590 };
5591 
5592 static const char readme_msg[] =
5593 	"tracing mini-HOWTO:\n\n"
5594 	"# echo 0 > tracing_on : quick way to disable tracing\n"
5595 	"# echo 1 > tracing_on : quick way to re-enable tracing\n\n"
5596 	" Important files:\n"
5597 	"  trace\t\t\t- The static contents of the buffer\n"
5598 	"\t\t\t  To clear the buffer write into this file: echo > trace\n"
5599 	"  trace_pipe\t\t- A consuming read to see the contents of the buffer\n"
5600 	"  current_tracer\t- function and latency tracers\n"
5601 	"  available_tracers\t- list of configured tracers for current_tracer\n"
5602 	"  error_log\t- error log for failed commands (that support it)\n"
5603 	"  buffer_size_kb\t- view and modify size of per cpu buffer\n"
5604 	"  buffer_total_size_kb  - view total size of all cpu buffers\n\n"
5605 	"  trace_clock\t\t- change the clock used to order events\n"
5606 	"       local:   Per cpu clock but may not be synced across CPUs\n"
5607 	"      global:   Synced across CPUs but slows tracing down.\n"
5608 	"     counter:   Not a clock, but just an increment\n"
5609 	"      uptime:   Jiffy counter from time of boot\n"
5610 	"        perf:   Same clock that perf events use\n"
5611 #ifdef CONFIG_X86_64
5612 	"     x86-tsc:   TSC cycle counter\n"
5613 #endif
5614 	"\n  timestamp_mode\t- view the mode used to timestamp events\n"
5615 	"       delta:   Delta difference against a buffer-wide timestamp\n"
5616 	"    absolute:   Absolute (standalone) timestamp\n"
5617 	"\n  trace_marker\t\t- Writes into this file writes into the kernel buffer\n"
5618 	"\n  trace_marker_raw\t\t- Writes into this file writes binary data into the kernel buffer\n"
5619 	"  tracing_cpumask\t- Limit which CPUs to trace\n"
5620 	"  instances\t\t- Make sub-buffers with: mkdir instances/foo\n"
5621 	"\t\t\t  Remove sub-buffer with rmdir\n"
5622 	"  trace_options\t\t- Set format or modify how tracing happens\n"
5623 	"\t\t\t  Disable an option by prefixing 'no' to the\n"
5624 	"\t\t\t  option name\n"
5625 	"  saved_cmdlines_size\t- echo command number in here to store comm-pid list\n"
5626 #ifdef CONFIG_DYNAMIC_FTRACE
5627 	"\n  available_filter_functions - list of functions that can be filtered on\n"
5628 	"  set_ftrace_filter\t- echo function name in here to only trace these\n"
5629 	"\t\t\t  functions\n"
5630 	"\t     accepts: func_full_name or glob-matching-pattern\n"
5631 	"\t     modules: Can select a group via module\n"
5632 	"\t      Format: :mod:<module-name>\n"
5633 	"\t     example: echo :mod:ext3 > set_ftrace_filter\n"
5634 	"\t    triggers: a command to perform when function is hit\n"
5635 	"\t      Format: <function>:<trigger>[:count]\n"
5636 	"\t     trigger: traceon, traceoff\n"
5637 	"\t\t      enable_event:<system>:<event>\n"
5638 	"\t\t      disable_event:<system>:<event>\n"
5639 #ifdef CONFIG_STACKTRACE
5640 	"\t\t      stacktrace\n"
5641 #endif
5642 #ifdef CONFIG_TRACER_SNAPSHOT
5643 	"\t\t      snapshot\n"
5644 #endif
5645 	"\t\t      dump\n"
5646 	"\t\t      cpudump\n"
5647 	"\t     example: echo do_fault:traceoff > set_ftrace_filter\n"
5648 	"\t              echo do_trap:traceoff:3 > set_ftrace_filter\n"
5649 	"\t     The first one will disable tracing every time do_fault is hit\n"
5650 	"\t     The second will disable tracing at most 3 times when do_trap is hit\n"
5651 	"\t       The first time do trap is hit and it disables tracing, the\n"
5652 	"\t       counter will decrement to 2. If tracing is already disabled,\n"
5653 	"\t       the counter will not decrement. It only decrements when the\n"
5654 	"\t       trigger did work\n"
5655 	"\t     To remove trigger without count:\n"
5656 	"\t       echo '!<function>:<trigger> > set_ftrace_filter\n"
5657 	"\t     To remove trigger with a count:\n"
5658 	"\t       echo '!<function>:<trigger>:0 > set_ftrace_filter\n"
5659 	"  set_ftrace_notrace\t- echo function name in here to never trace.\n"
5660 	"\t    accepts: func_full_name, *func_end, func_begin*, *func_middle*\n"
5661 	"\t    modules: Can select a group via module command :mod:\n"
5662 	"\t    Does not accept triggers\n"
5663 #endif /* CONFIG_DYNAMIC_FTRACE */
5664 #ifdef CONFIG_FUNCTION_TRACER
5665 	"  set_ftrace_pid\t- Write pid(s) to only function trace those pids\n"
5666 	"\t\t    (function)\n"
5667 	"  set_ftrace_notrace_pid\t- Write pid(s) to not function trace those pids\n"
5668 	"\t\t    (function)\n"
5669 #endif
5670 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
5671 	"  set_graph_function\t- Trace the nested calls of a function (function_graph)\n"
5672 	"  set_graph_notrace\t- Do not trace the nested calls of a function (function_graph)\n"
5673 	"  max_graph_depth\t- Trace a limited depth of nested calls (0 is unlimited)\n"
5674 #endif
5675 #ifdef CONFIG_TRACER_SNAPSHOT
5676 	"\n  snapshot\t\t- Like 'trace' but shows the content of the static\n"
5677 	"\t\t\t  snapshot buffer. Read the contents for more\n"
5678 	"\t\t\t  information\n"
5679 #endif
5680 #ifdef CONFIG_STACK_TRACER
5681 	"  stack_trace\t\t- Shows the max stack trace when active\n"
5682 	"  stack_max_size\t- Shows current max stack size that was traced\n"
5683 	"\t\t\t  Write into this file to reset the max size (trigger a\n"
5684 	"\t\t\t  new trace)\n"
5685 #ifdef CONFIG_DYNAMIC_FTRACE
5686 	"  stack_trace_filter\t- Like set_ftrace_filter but limits what stack_trace\n"
5687 	"\t\t\t  traces\n"
5688 #endif
5689 #endif /* CONFIG_STACK_TRACER */
5690 #ifdef CONFIG_DYNAMIC_EVENTS
5691 	"  dynamic_events\t\t- Create/append/remove/show the generic dynamic events\n"
5692 	"\t\t\t  Write into this file to define/undefine new trace events.\n"
5693 #endif
5694 #ifdef CONFIG_KPROBE_EVENTS
5695 	"  kprobe_events\t\t- Create/append/remove/show the kernel dynamic events\n"
5696 	"\t\t\t  Write into this file to define/undefine new trace events.\n"
5697 #endif
5698 #ifdef CONFIG_UPROBE_EVENTS
5699 	"  uprobe_events\t\t- Create/append/remove/show the userspace dynamic events\n"
5700 	"\t\t\t  Write into this file to define/undefine new trace events.\n"
5701 #endif
5702 #if defined(CONFIG_KPROBE_EVENTS) || defined(CONFIG_UPROBE_EVENTS) || \
5703     defined(CONFIG_FPROBE_EVENTS)
5704 	"\t  accepts: event-definitions (one definition per line)\n"
5705 #if defined(CONFIG_KPROBE_EVENTS) || defined(CONFIG_UPROBE_EVENTS)
5706 	"\t   Format: p[:[<group>/][<event>]] <place> [<args>]\n"
5707 	"\t           r[maxactive][:[<group>/][<event>]] <place> [<args>]\n"
5708 #endif
5709 #ifdef CONFIG_FPROBE_EVENTS
5710 	"\t           f[:[<group>/][<event>]] <func-name>[%return] [<args>]\n"
5711 	"\t           t[:[<group>/][<event>]] <tracepoint> [<args>]\n"
5712 #endif
5713 #ifdef CONFIG_HIST_TRIGGERS
5714 	"\t           s:[synthetic/]<event> <field> [<field>]\n"
5715 #endif
5716 	"\t           e[:[<group>/][<event>]] <attached-group>.<attached-event> [<args>] [if <filter>]\n"
5717 	"\t           -:[<group>/][<event>]\n"
5718 #ifdef CONFIG_KPROBE_EVENTS
5719 	"\t    place: [<module>:]<symbol>[+<offset>]|<memaddr>\n"
5720   "place (kretprobe): [<module>:]<symbol>[+<offset>]%return|<memaddr>\n"
5721 #endif
5722 #ifdef CONFIG_UPROBE_EVENTS
5723   "   place (uprobe): <path>:<offset>[%return][(ref_ctr_offset)]\n"
5724 #endif
5725 	"\t     args: <name>=fetcharg[:type]\n"
5726 	"\t fetcharg: (%<register>|$<efield>), @<address>, @<symbol>[+|-<offset>],\n"
5727 #ifdef CONFIG_HAVE_FUNCTION_ARG_ACCESS_API
5728 #ifdef CONFIG_PROBE_EVENTS_BTF_ARGS
5729 	"\t           $stack<index>, $stack, $retval, $comm, $arg<N>,\n"
5730 	"\t           <argname>[->field[->field|.field...]],\n"
5731 #else
5732 	"\t           $stack<index>, $stack, $retval, $comm, $arg<N>,\n"
5733 #endif
5734 #else
5735 	"\t           $stack<index>, $stack, $retval, $comm,\n"
5736 #endif
5737 	"\t           +|-[u]<offset>(<fetcharg>), \\imm-value, \\\"imm-string\"\n"
5738 	"\t     type: s8/16/32/64, u8/16/32/64, x8/16/32/64, char, string, symbol,\n"
5739 	"\t           b<bit-width>@<bit-offset>/<container-size>, ustring,\n"
5740 	"\t           symstr, <type>\\[<array-size>\\]\n"
5741 #ifdef CONFIG_HIST_TRIGGERS
5742 	"\t    field: <stype> <name>;\n"
5743 	"\t    stype: u8/u16/u32/u64, s8/s16/s32/s64, pid_t,\n"
5744 	"\t           [unsigned] char/int/long\n"
5745 #endif
5746 	"\t    efield: For event probes ('e' types), the field is on of the fields\n"
5747 	"\t            of the <attached-group>/<attached-event>.\n"
5748 #endif
5749 	"  events/\t\t- Directory containing all trace event subsystems:\n"
5750 	"      enable\t\t- Write 0/1 to enable/disable tracing of all events\n"
5751 	"  events/<system>/\t- Directory containing all trace events for <system>:\n"
5752 	"      enable\t\t- Write 0/1 to enable/disable tracing of all <system>\n"
5753 	"\t\t\t  events\n"
5754 	"      filter\t\t- If set, only events passing filter are traced\n"
5755 	"  events/<system>/<event>/\t- Directory containing control files for\n"
5756 	"\t\t\t  <event>:\n"
5757 	"      enable\t\t- Write 0/1 to enable/disable tracing of <event>\n"
5758 	"      filter\t\t- If set, only events passing filter are traced\n"
5759 	"      trigger\t\t- If set, a command to perform when event is hit\n"
5760 	"\t    Format: <trigger>[:count][if <filter>]\n"
5761 	"\t   trigger: traceon, traceoff\n"
5762 	"\t            enable_event:<system>:<event>\n"
5763 	"\t            disable_event:<system>:<event>\n"
5764 #ifdef CONFIG_HIST_TRIGGERS
5765 	"\t            enable_hist:<system>:<event>\n"
5766 	"\t            disable_hist:<system>:<event>\n"
5767 #endif
5768 #ifdef CONFIG_STACKTRACE
5769 	"\t\t    stacktrace\n"
5770 #endif
5771 #ifdef CONFIG_TRACER_SNAPSHOT
5772 	"\t\t    snapshot\n"
5773 #endif
5774 #ifdef CONFIG_HIST_TRIGGERS
5775 	"\t\t    hist (see below)\n"
5776 #endif
5777 	"\t   example: echo traceoff > events/block/block_unplug/trigger\n"
5778 	"\t            echo traceoff:3 > events/block/block_unplug/trigger\n"
5779 	"\t            echo 'enable_event:kmem:kmalloc:3 if nr_rq > 1' > \\\n"
5780 	"\t                  events/block/block_unplug/trigger\n"
5781 	"\t   The first disables tracing every time block_unplug is hit.\n"
5782 	"\t   The second disables tracing the first 3 times block_unplug is hit.\n"
5783 	"\t   The third enables the kmalloc event the first 3 times block_unplug\n"
5784 	"\t     is hit and has value of greater than 1 for the 'nr_rq' event field.\n"
5785 	"\t   Like function triggers, the counter is only decremented if it\n"
5786 	"\t    enabled or disabled tracing.\n"
5787 	"\t   To remove a trigger without a count:\n"
5788 	"\t     echo '!<trigger> > <system>/<event>/trigger\n"
5789 	"\t   To remove a trigger with a count:\n"
5790 	"\t     echo '!<trigger>:0 > <system>/<event>/trigger\n"
5791 	"\t   Filters can be ignored when removing a trigger.\n"
5792 #ifdef CONFIG_HIST_TRIGGERS
5793 	"      hist trigger\t- If set, event hits are aggregated into a hash table\n"
5794 	"\t    Format: hist:keys=<field1[,field2,...]>\n"
5795 	"\t            [:<var1>=<field|var_ref|numeric_literal>[,<var2>=...]]\n"
5796 	"\t            [:values=<field1[,field2,...]>]\n"
5797 	"\t            [:sort=<field1[,field2,...]>]\n"
5798 	"\t            [:size=#entries]\n"
5799 	"\t            [:pause][:continue][:clear]\n"
5800 	"\t            [:name=histname1]\n"
5801 	"\t            [:nohitcount]\n"
5802 	"\t            [:<handler>.<action>]\n"
5803 	"\t            [if <filter>]\n\n"
5804 	"\t    Note, special fields can be used as well:\n"
5805 	"\t            common_timestamp - to record current timestamp\n"
5806 	"\t            common_cpu - to record the CPU the event happened on\n"
5807 	"\n"
5808 	"\t    A hist trigger variable can be:\n"
5809 	"\t        - a reference to a field e.g. x=current_timestamp,\n"
5810 	"\t        - a reference to another variable e.g. y=$x,\n"
5811 	"\t        - a numeric literal: e.g. ms_per_sec=1000,\n"
5812 	"\t        - an arithmetic expression: e.g. time_secs=current_timestamp/1000\n"
5813 	"\n"
5814 	"\t    hist trigger arithmetic expressions support addition(+), subtraction(-),\n"
5815 	"\t    multiplication(*) and division(/) operators. An operand can be either a\n"
5816 	"\t    variable reference, field or numeric literal.\n"
5817 	"\n"
5818 	"\t    When a matching event is hit, an entry is added to a hash\n"
5819 	"\t    table using the key(s) and value(s) named, and the value of a\n"
5820 	"\t    sum called 'hitcount' is incremented.  Keys and values\n"
5821 	"\t    correspond to fields in the event's format description.  Keys\n"
5822 	"\t    can be any field, or the special string 'common_stacktrace'.\n"
5823 	"\t    Compound keys consisting of up to two fields can be specified\n"
5824 	"\t    by the 'keys' keyword.  Values must correspond to numeric\n"
5825 	"\t    fields.  Sort keys consisting of up to two fields can be\n"
5826 	"\t    specified using the 'sort' keyword.  The sort direction can\n"
5827 	"\t    be modified by appending '.descending' or '.ascending' to a\n"
5828 	"\t    sort field.  The 'size' parameter can be used to specify more\n"
5829 	"\t    or fewer than the default 2048 entries for the hashtable size.\n"
5830 	"\t    If a hist trigger is given a name using the 'name' parameter,\n"
5831 	"\t    its histogram data will be shared with other triggers of the\n"
5832 	"\t    same name, and trigger hits will update this common data.\n\n"
5833 	"\t    Reading the 'hist' file for the event will dump the hash\n"
5834 	"\t    table in its entirety to stdout.  If there are multiple hist\n"
5835 	"\t    triggers attached to an event, there will be a table for each\n"
5836 	"\t    trigger in the output.  The table displayed for a named\n"
5837 	"\t    trigger will be the same as any other instance having the\n"
5838 	"\t    same name.  The default format used to display a given field\n"
5839 	"\t    can be modified by appending any of the following modifiers\n"
5840 	"\t    to the field name, as applicable:\n\n"
5841 	"\t            .hex        display a number as a hex value\n"
5842 	"\t            .sym        display an address as a symbol\n"
5843 	"\t            .sym-offset display an address as a symbol and offset\n"
5844 	"\t            .execname   display a common_pid as a program name\n"
5845 	"\t            .syscall    display a syscall id as a syscall name\n"
5846 	"\t            .log2       display log2 value rather than raw number\n"
5847 	"\t            .buckets=size  display values in groups of size rather than raw number\n"
5848 	"\t            .usecs      display a common_timestamp in microseconds\n"
5849 	"\t            .percent    display a number of percentage value\n"
5850 	"\t            .graph      display a bar-graph of a value\n\n"
5851 	"\t    The 'pause' parameter can be used to pause an existing hist\n"
5852 	"\t    trigger or to start a hist trigger but not log any events\n"
5853 	"\t    until told to do so.  'continue' can be used to start or\n"
5854 	"\t    restart a paused hist trigger.\n\n"
5855 	"\t    The 'clear' parameter will clear the contents of a running\n"
5856 	"\t    hist trigger and leave its current paused/active state\n"
5857 	"\t    unchanged.\n\n"
5858 	"\t    The 'nohitcount' (or NOHC) parameter will suppress display of\n"
5859 	"\t    raw hitcount in the histogram.\n\n"
5860 	"\t    The enable_hist and disable_hist triggers can be used to\n"
5861 	"\t    have one event conditionally start and stop another event's\n"
5862 	"\t    already-attached hist trigger.  The syntax is analogous to\n"
5863 	"\t    the enable_event and disable_event triggers.\n\n"
5864 	"\t    Hist trigger handlers and actions are executed whenever a\n"
5865 	"\t    a histogram entry is added or updated.  They take the form:\n\n"
5866 	"\t        <handler>.<action>\n\n"
5867 	"\t    The available handlers are:\n\n"
5868 	"\t        onmatch(matching.event)  - invoke on addition or update\n"
5869 	"\t        onmax(var)               - invoke if var exceeds current max\n"
5870 	"\t        onchange(var)            - invoke action if var changes\n\n"
5871 	"\t    The available actions are:\n\n"
5872 	"\t        trace(<synthetic_event>,param list)  - generate synthetic event\n"
5873 	"\t        save(field,...)                      - save current event fields\n"
5874 #ifdef CONFIG_TRACER_SNAPSHOT
5875 	"\t        snapshot()                           - snapshot the trace buffer\n\n"
5876 #endif
5877 #ifdef CONFIG_SYNTH_EVENTS
5878 	"  events/synthetic_events\t- Create/append/remove/show synthetic events\n"
5879 	"\t  Write into this file to define/undefine new synthetic events.\n"
5880 	"\t     example: echo 'myevent u64 lat; char name[]; long[] stack' >> synthetic_events\n"
5881 #endif
5882 #endif
5883 ;
5884 
5885 static ssize_t
5886 tracing_readme_read(struct file *filp, char __user *ubuf,
5887 		       size_t cnt, loff_t *ppos)
5888 {
5889 	return simple_read_from_buffer(ubuf, cnt, ppos,
5890 					readme_msg, strlen(readme_msg));
5891 }
5892 
5893 static const struct file_operations tracing_readme_fops = {
5894 	.open		= tracing_open_generic,
5895 	.read		= tracing_readme_read,
5896 	.llseek		= generic_file_llseek,
5897 };
5898 
5899 static void *saved_tgids_next(struct seq_file *m, void *v, loff_t *pos)
5900 {
5901 	int pid = ++(*pos);
5902 
5903 	return trace_find_tgid_ptr(pid);
5904 }
5905 
5906 static void *saved_tgids_start(struct seq_file *m, loff_t *pos)
5907 {
5908 	int pid = *pos;
5909 
5910 	return trace_find_tgid_ptr(pid);
5911 }
5912 
5913 static void saved_tgids_stop(struct seq_file *m, void *v)
5914 {
5915 }
5916 
5917 static int saved_tgids_show(struct seq_file *m, void *v)
5918 {
5919 	int *entry = (int *)v;
5920 	int pid = entry - tgid_map;
5921 	int tgid = *entry;
5922 
5923 	if (tgid == 0)
5924 		return SEQ_SKIP;
5925 
5926 	seq_printf(m, "%d %d\n", pid, tgid);
5927 	return 0;
5928 }
5929 
5930 static const struct seq_operations tracing_saved_tgids_seq_ops = {
5931 	.start		= saved_tgids_start,
5932 	.stop		= saved_tgids_stop,
5933 	.next		= saved_tgids_next,
5934 	.show		= saved_tgids_show,
5935 };
5936 
5937 static int tracing_saved_tgids_open(struct inode *inode, struct file *filp)
5938 {
5939 	int ret;
5940 
5941 	ret = tracing_check_open_get_tr(NULL);
5942 	if (ret)
5943 		return ret;
5944 
5945 	return seq_open(filp, &tracing_saved_tgids_seq_ops);
5946 }
5947 
5948 
5949 static const struct file_operations tracing_saved_tgids_fops = {
5950 	.open		= tracing_saved_tgids_open,
5951 	.read		= seq_read,
5952 	.llseek		= seq_lseek,
5953 	.release	= seq_release,
5954 };
5955 
5956 static void *saved_cmdlines_next(struct seq_file *m, void *v, loff_t *pos)
5957 {
5958 	unsigned int *ptr = v;
5959 
5960 	if (*pos || m->count)
5961 		ptr++;
5962 
5963 	(*pos)++;
5964 
5965 	for (; ptr < &savedcmd->map_cmdline_to_pid[savedcmd->cmdline_num];
5966 	     ptr++) {
5967 		if (*ptr == -1 || *ptr == NO_CMDLINE_MAP)
5968 			continue;
5969 
5970 		return ptr;
5971 	}
5972 
5973 	return NULL;
5974 }
5975 
5976 static void *saved_cmdlines_start(struct seq_file *m, loff_t *pos)
5977 {
5978 	void *v;
5979 	loff_t l = 0;
5980 
5981 	preempt_disable();
5982 	arch_spin_lock(&trace_cmdline_lock);
5983 
5984 	v = &savedcmd->map_cmdline_to_pid[0];
5985 	while (l <= *pos) {
5986 		v = saved_cmdlines_next(m, v, &l);
5987 		if (!v)
5988 			return NULL;
5989 	}
5990 
5991 	return v;
5992 }
5993 
5994 static void saved_cmdlines_stop(struct seq_file *m, void *v)
5995 {
5996 	arch_spin_unlock(&trace_cmdline_lock);
5997 	preempt_enable();
5998 }
5999 
6000 static int saved_cmdlines_show(struct seq_file *m, void *v)
6001 {
6002 	char buf[TASK_COMM_LEN];
6003 	unsigned int *pid = v;
6004 
6005 	__trace_find_cmdline(*pid, buf);
6006 	seq_printf(m, "%d %s\n", *pid, buf);
6007 	return 0;
6008 }
6009 
6010 static const struct seq_operations tracing_saved_cmdlines_seq_ops = {
6011 	.start		= saved_cmdlines_start,
6012 	.next		= saved_cmdlines_next,
6013 	.stop		= saved_cmdlines_stop,
6014 	.show		= saved_cmdlines_show,
6015 };
6016 
6017 static int tracing_saved_cmdlines_open(struct inode *inode, struct file *filp)
6018 {
6019 	int ret;
6020 
6021 	ret = tracing_check_open_get_tr(NULL);
6022 	if (ret)
6023 		return ret;
6024 
6025 	return seq_open(filp, &tracing_saved_cmdlines_seq_ops);
6026 }
6027 
6028 static const struct file_operations tracing_saved_cmdlines_fops = {
6029 	.open		= tracing_saved_cmdlines_open,
6030 	.read		= seq_read,
6031 	.llseek		= seq_lseek,
6032 	.release	= seq_release,
6033 };
6034 
6035 static ssize_t
6036 tracing_saved_cmdlines_size_read(struct file *filp, char __user *ubuf,
6037 				 size_t cnt, loff_t *ppos)
6038 {
6039 	char buf[64];
6040 	int r;
6041 
6042 	preempt_disable();
6043 	arch_spin_lock(&trace_cmdline_lock);
6044 	r = scnprintf(buf, sizeof(buf), "%u\n", savedcmd->cmdline_num);
6045 	arch_spin_unlock(&trace_cmdline_lock);
6046 	preempt_enable();
6047 
6048 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6049 }
6050 
6051 static void free_saved_cmdlines_buffer(struct saved_cmdlines_buffer *s)
6052 {
6053 	kfree(s->saved_cmdlines);
6054 	kfree(s->map_cmdline_to_pid);
6055 	kfree(s);
6056 }
6057 
6058 static int tracing_resize_saved_cmdlines(unsigned int val)
6059 {
6060 	struct saved_cmdlines_buffer *s, *savedcmd_temp;
6061 
6062 	s = kmalloc(sizeof(*s), GFP_KERNEL);
6063 	if (!s)
6064 		return -ENOMEM;
6065 
6066 	if (allocate_cmdlines_buffer(val, s) < 0) {
6067 		kfree(s);
6068 		return -ENOMEM;
6069 	}
6070 
6071 	preempt_disable();
6072 	arch_spin_lock(&trace_cmdline_lock);
6073 	savedcmd_temp = savedcmd;
6074 	savedcmd = s;
6075 	arch_spin_unlock(&trace_cmdline_lock);
6076 	preempt_enable();
6077 	free_saved_cmdlines_buffer(savedcmd_temp);
6078 
6079 	return 0;
6080 }
6081 
6082 static ssize_t
6083 tracing_saved_cmdlines_size_write(struct file *filp, const char __user *ubuf,
6084 				  size_t cnt, loff_t *ppos)
6085 {
6086 	unsigned long val;
6087 	int ret;
6088 
6089 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
6090 	if (ret)
6091 		return ret;
6092 
6093 	/* must have at least 1 entry or less than PID_MAX_DEFAULT */
6094 	if (!val || val > PID_MAX_DEFAULT)
6095 		return -EINVAL;
6096 
6097 	ret = tracing_resize_saved_cmdlines((unsigned int)val);
6098 	if (ret < 0)
6099 		return ret;
6100 
6101 	*ppos += cnt;
6102 
6103 	return cnt;
6104 }
6105 
6106 static const struct file_operations tracing_saved_cmdlines_size_fops = {
6107 	.open		= tracing_open_generic,
6108 	.read		= tracing_saved_cmdlines_size_read,
6109 	.write		= tracing_saved_cmdlines_size_write,
6110 };
6111 
6112 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
6113 static union trace_eval_map_item *
6114 update_eval_map(union trace_eval_map_item *ptr)
6115 {
6116 	if (!ptr->map.eval_string) {
6117 		if (ptr->tail.next) {
6118 			ptr = ptr->tail.next;
6119 			/* Set ptr to the next real item (skip head) */
6120 			ptr++;
6121 		} else
6122 			return NULL;
6123 	}
6124 	return ptr;
6125 }
6126 
6127 static void *eval_map_next(struct seq_file *m, void *v, loff_t *pos)
6128 {
6129 	union trace_eval_map_item *ptr = v;
6130 
6131 	/*
6132 	 * Paranoid! If ptr points to end, we don't want to increment past it.
6133 	 * This really should never happen.
6134 	 */
6135 	(*pos)++;
6136 	ptr = update_eval_map(ptr);
6137 	if (WARN_ON_ONCE(!ptr))
6138 		return NULL;
6139 
6140 	ptr++;
6141 	ptr = update_eval_map(ptr);
6142 
6143 	return ptr;
6144 }
6145 
6146 static void *eval_map_start(struct seq_file *m, loff_t *pos)
6147 {
6148 	union trace_eval_map_item *v;
6149 	loff_t l = 0;
6150 
6151 	mutex_lock(&trace_eval_mutex);
6152 
6153 	v = trace_eval_maps;
6154 	if (v)
6155 		v++;
6156 
6157 	while (v && l < *pos) {
6158 		v = eval_map_next(m, v, &l);
6159 	}
6160 
6161 	return v;
6162 }
6163 
6164 static void eval_map_stop(struct seq_file *m, void *v)
6165 {
6166 	mutex_unlock(&trace_eval_mutex);
6167 }
6168 
6169 static int eval_map_show(struct seq_file *m, void *v)
6170 {
6171 	union trace_eval_map_item *ptr = v;
6172 
6173 	seq_printf(m, "%s %ld (%s)\n",
6174 		   ptr->map.eval_string, ptr->map.eval_value,
6175 		   ptr->map.system);
6176 
6177 	return 0;
6178 }
6179 
6180 static const struct seq_operations tracing_eval_map_seq_ops = {
6181 	.start		= eval_map_start,
6182 	.next		= eval_map_next,
6183 	.stop		= eval_map_stop,
6184 	.show		= eval_map_show,
6185 };
6186 
6187 static int tracing_eval_map_open(struct inode *inode, struct file *filp)
6188 {
6189 	int ret;
6190 
6191 	ret = tracing_check_open_get_tr(NULL);
6192 	if (ret)
6193 		return ret;
6194 
6195 	return seq_open(filp, &tracing_eval_map_seq_ops);
6196 }
6197 
6198 static const struct file_operations tracing_eval_map_fops = {
6199 	.open		= tracing_eval_map_open,
6200 	.read		= seq_read,
6201 	.llseek		= seq_lseek,
6202 	.release	= seq_release,
6203 };
6204 
6205 static inline union trace_eval_map_item *
6206 trace_eval_jmp_to_tail(union trace_eval_map_item *ptr)
6207 {
6208 	/* Return tail of array given the head */
6209 	return ptr + ptr->head.length + 1;
6210 }
6211 
6212 static void
6213 trace_insert_eval_map_file(struct module *mod, struct trace_eval_map **start,
6214 			   int len)
6215 {
6216 	struct trace_eval_map **stop;
6217 	struct trace_eval_map **map;
6218 	union trace_eval_map_item *map_array;
6219 	union trace_eval_map_item *ptr;
6220 
6221 	stop = start + len;
6222 
6223 	/*
6224 	 * The trace_eval_maps contains the map plus a head and tail item,
6225 	 * where the head holds the module and length of array, and the
6226 	 * tail holds a pointer to the next list.
6227 	 */
6228 	map_array = kmalloc_array(len + 2, sizeof(*map_array), GFP_KERNEL);
6229 	if (!map_array) {
6230 		pr_warn("Unable to allocate trace eval mapping\n");
6231 		return;
6232 	}
6233 
6234 	mutex_lock(&trace_eval_mutex);
6235 
6236 	if (!trace_eval_maps)
6237 		trace_eval_maps = map_array;
6238 	else {
6239 		ptr = trace_eval_maps;
6240 		for (;;) {
6241 			ptr = trace_eval_jmp_to_tail(ptr);
6242 			if (!ptr->tail.next)
6243 				break;
6244 			ptr = ptr->tail.next;
6245 
6246 		}
6247 		ptr->tail.next = map_array;
6248 	}
6249 	map_array->head.mod = mod;
6250 	map_array->head.length = len;
6251 	map_array++;
6252 
6253 	for (map = start; (unsigned long)map < (unsigned long)stop; map++) {
6254 		map_array->map = **map;
6255 		map_array++;
6256 	}
6257 	memset(map_array, 0, sizeof(*map_array));
6258 
6259 	mutex_unlock(&trace_eval_mutex);
6260 }
6261 
6262 static void trace_create_eval_file(struct dentry *d_tracer)
6263 {
6264 	trace_create_file("eval_map", TRACE_MODE_READ, d_tracer,
6265 			  NULL, &tracing_eval_map_fops);
6266 }
6267 
6268 #else /* CONFIG_TRACE_EVAL_MAP_FILE */
6269 static inline void trace_create_eval_file(struct dentry *d_tracer) { }
6270 static inline void trace_insert_eval_map_file(struct module *mod,
6271 			      struct trace_eval_map **start, int len) { }
6272 #endif /* !CONFIG_TRACE_EVAL_MAP_FILE */
6273 
6274 static void trace_insert_eval_map(struct module *mod,
6275 				  struct trace_eval_map **start, int len)
6276 {
6277 	struct trace_eval_map **map;
6278 
6279 	if (len <= 0)
6280 		return;
6281 
6282 	map = start;
6283 
6284 	trace_event_eval_update(map, len);
6285 
6286 	trace_insert_eval_map_file(mod, start, len);
6287 }
6288 
6289 static ssize_t
6290 tracing_set_trace_read(struct file *filp, char __user *ubuf,
6291 		       size_t cnt, loff_t *ppos)
6292 {
6293 	struct trace_array *tr = filp->private_data;
6294 	char buf[MAX_TRACER_SIZE+2];
6295 	int r;
6296 
6297 	mutex_lock(&trace_types_lock);
6298 	r = sprintf(buf, "%s\n", tr->current_trace->name);
6299 	mutex_unlock(&trace_types_lock);
6300 
6301 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6302 }
6303 
6304 int tracer_init(struct tracer *t, struct trace_array *tr)
6305 {
6306 	tracing_reset_online_cpus(&tr->array_buffer);
6307 	return t->init(tr);
6308 }
6309 
6310 static void set_buffer_entries(struct array_buffer *buf, unsigned long val)
6311 {
6312 	int cpu;
6313 
6314 	for_each_tracing_cpu(cpu)
6315 		per_cpu_ptr(buf->data, cpu)->entries = val;
6316 }
6317 
6318 static void update_buffer_entries(struct array_buffer *buf, int cpu)
6319 {
6320 	if (cpu == RING_BUFFER_ALL_CPUS) {
6321 		set_buffer_entries(buf, ring_buffer_size(buf->buffer, 0));
6322 	} else {
6323 		per_cpu_ptr(buf->data, cpu)->entries = ring_buffer_size(buf->buffer, cpu);
6324 	}
6325 }
6326 
6327 #ifdef CONFIG_TRACER_MAX_TRACE
6328 /* resize @tr's buffer to the size of @size_tr's entries */
6329 static int resize_buffer_duplicate_size(struct array_buffer *trace_buf,
6330 					struct array_buffer *size_buf, int cpu_id)
6331 {
6332 	int cpu, ret = 0;
6333 
6334 	if (cpu_id == RING_BUFFER_ALL_CPUS) {
6335 		for_each_tracing_cpu(cpu) {
6336 			ret = ring_buffer_resize(trace_buf->buffer,
6337 				 per_cpu_ptr(size_buf->data, cpu)->entries, cpu);
6338 			if (ret < 0)
6339 				break;
6340 			per_cpu_ptr(trace_buf->data, cpu)->entries =
6341 				per_cpu_ptr(size_buf->data, cpu)->entries;
6342 		}
6343 	} else {
6344 		ret = ring_buffer_resize(trace_buf->buffer,
6345 				 per_cpu_ptr(size_buf->data, cpu_id)->entries, cpu_id);
6346 		if (ret == 0)
6347 			per_cpu_ptr(trace_buf->data, cpu_id)->entries =
6348 				per_cpu_ptr(size_buf->data, cpu_id)->entries;
6349 	}
6350 
6351 	return ret;
6352 }
6353 #endif /* CONFIG_TRACER_MAX_TRACE */
6354 
6355 static int __tracing_resize_ring_buffer(struct trace_array *tr,
6356 					unsigned long size, int cpu)
6357 {
6358 	int ret;
6359 
6360 	/*
6361 	 * If kernel or user changes the size of the ring buffer
6362 	 * we use the size that was given, and we can forget about
6363 	 * expanding it later.
6364 	 */
6365 	trace_set_ring_buffer_expanded(tr);
6366 
6367 	/* May be called before buffers are initialized */
6368 	if (!tr->array_buffer.buffer)
6369 		return 0;
6370 
6371 	/* Do not allow tracing while resizing ring buffer */
6372 	tracing_stop_tr(tr);
6373 
6374 	ret = ring_buffer_resize(tr->array_buffer.buffer, size, cpu);
6375 	if (ret < 0)
6376 		goto out_start;
6377 
6378 #ifdef CONFIG_TRACER_MAX_TRACE
6379 	if (!tr->allocated_snapshot)
6380 		goto out;
6381 
6382 	ret = ring_buffer_resize(tr->max_buffer.buffer, size, cpu);
6383 	if (ret < 0) {
6384 		int r = resize_buffer_duplicate_size(&tr->array_buffer,
6385 						     &tr->array_buffer, cpu);
6386 		if (r < 0) {
6387 			/*
6388 			 * AARGH! We are left with different
6389 			 * size max buffer!!!!
6390 			 * The max buffer is our "snapshot" buffer.
6391 			 * When a tracer needs a snapshot (one of the
6392 			 * latency tracers), it swaps the max buffer
6393 			 * with the saved snap shot. We succeeded to
6394 			 * update the size of the main buffer, but failed to
6395 			 * update the size of the max buffer. But when we tried
6396 			 * to reset the main buffer to the original size, we
6397 			 * failed there too. This is very unlikely to
6398 			 * happen, but if it does, warn and kill all
6399 			 * tracing.
6400 			 */
6401 			WARN_ON(1);
6402 			tracing_disabled = 1;
6403 		}
6404 		goto out_start;
6405 	}
6406 
6407 	update_buffer_entries(&tr->max_buffer, cpu);
6408 
6409  out:
6410 #endif /* CONFIG_TRACER_MAX_TRACE */
6411 
6412 	update_buffer_entries(&tr->array_buffer, cpu);
6413  out_start:
6414 	tracing_start_tr(tr);
6415 	return ret;
6416 }
6417 
6418 ssize_t tracing_resize_ring_buffer(struct trace_array *tr,
6419 				  unsigned long size, int cpu_id)
6420 {
6421 	int ret;
6422 
6423 	mutex_lock(&trace_types_lock);
6424 
6425 	if (cpu_id != RING_BUFFER_ALL_CPUS) {
6426 		/* make sure, this cpu is enabled in the mask */
6427 		if (!cpumask_test_cpu(cpu_id, tracing_buffer_mask)) {
6428 			ret = -EINVAL;
6429 			goto out;
6430 		}
6431 	}
6432 
6433 	ret = __tracing_resize_ring_buffer(tr, size, cpu_id);
6434 	if (ret < 0)
6435 		ret = -ENOMEM;
6436 
6437 out:
6438 	mutex_unlock(&trace_types_lock);
6439 
6440 	return ret;
6441 }
6442 
6443 
6444 /**
6445  * tracing_update_buffers - used by tracing facility to expand ring buffers
6446  * @tr: The tracing instance
6447  *
6448  * To save on memory when the tracing is never used on a system with it
6449  * configured in. The ring buffers are set to a minimum size. But once
6450  * a user starts to use the tracing facility, then they need to grow
6451  * to their default size.
6452  *
6453  * This function is to be called when a tracer is about to be used.
6454  */
6455 int tracing_update_buffers(struct trace_array *tr)
6456 {
6457 	int ret = 0;
6458 
6459 	mutex_lock(&trace_types_lock);
6460 	if (!tr->ring_buffer_expanded)
6461 		ret = __tracing_resize_ring_buffer(tr, trace_buf_size,
6462 						RING_BUFFER_ALL_CPUS);
6463 	mutex_unlock(&trace_types_lock);
6464 
6465 	return ret;
6466 }
6467 
6468 struct trace_option_dentry;
6469 
6470 static void
6471 create_trace_option_files(struct trace_array *tr, struct tracer *tracer);
6472 
6473 /*
6474  * Used to clear out the tracer before deletion of an instance.
6475  * Must have trace_types_lock held.
6476  */
6477 static void tracing_set_nop(struct trace_array *tr)
6478 {
6479 	if (tr->current_trace == &nop_trace)
6480 		return;
6481 
6482 	tr->current_trace->enabled--;
6483 
6484 	if (tr->current_trace->reset)
6485 		tr->current_trace->reset(tr);
6486 
6487 	tr->current_trace = &nop_trace;
6488 }
6489 
6490 static bool tracer_options_updated;
6491 
6492 static void add_tracer_options(struct trace_array *tr, struct tracer *t)
6493 {
6494 	/* Only enable if the directory has been created already. */
6495 	if (!tr->dir)
6496 		return;
6497 
6498 	/* Only create trace option files after update_tracer_options finish */
6499 	if (!tracer_options_updated)
6500 		return;
6501 
6502 	create_trace_option_files(tr, t);
6503 }
6504 
6505 int tracing_set_tracer(struct trace_array *tr, const char *buf)
6506 {
6507 	struct tracer *t;
6508 #ifdef CONFIG_TRACER_MAX_TRACE
6509 	bool had_max_tr;
6510 #endif
6511 	int ret = 0;
6512 
6513 	mutex_lock(&trace_types_lock);
6514 
6515 	if (!tr->ring_buffer_expanded) {
6516 		ret = __tracing_resize_ring_buffer(tr, trace_buf_size,
6517 						RING_BUFFER_ALL_CPUS);
6518 		if (ret < 0)
6519 			goto out;
6520 		ret = 0;
6521 	}
6522 
6523 	for (t = trace_types; t; t = t->next) {
6524 		if (strcmp(t->name, buf) == 0)
6525 			break;
6526 	}
6527 	if (!t) {
6528 		ret = -EINVAL;
6529 		goto out;
6530 	}
6531 	if (t == tr->current_trace)
6532 		goto out;
6533 
6534 #ifdef CONFIG_TRACER_SNAPSHOT
6535 	if (t->use_max_tr) {
6536 		local_irq_disable();
6537 		arch_spin_lock(&tr->max_lock);
6538 		if (tr->cond_snapshot)
6539 			ret = -EBUSY;
6540 		arch_spin_unlock(&tr->max_lock);
6541 		local_irq_enable();
6542 		if (ret)
6543 			goto out;
6544 	}
6545 #endif
6546 	/* Some tracers won't work on kernel command line */
6547 	if (system_state < SYSTEM_RUNNING && t->noboot) {
6548 		pr_warn("Tracer '%s' is not allowed on command line, ignored\n",
6549 			t->name);
6550 		goto out;
6551 	}
6552 
6553 	/* Some tracers are only allowed for the top level buffer */
6554 	if (!trace_ok_for_array(t, tr)) {
6555 		ret = -EINVAL;
6556 		goto out;
6557 	}
6558 
6559 	/* If trace pipe files are being read, we can't change the tracer */
6560 	if (tr->trace_ref) {
6561 		ret = -EBUSY;
6562 		goto out;
6563 	}
6564 
6565 	trace_branch_disable();
6566 
6567 	tr->current_trace->enabled--;
6568 
6569 	if (tr->current_trace->reset)
6570 		tr->current_trace->reset(tr);
6571 
6572 #ifdef CONFIG_TRACER_MAX_TRACE
6573 	had_max_tr = tr->current_trace->use_max_tr;
6574 
6575 	/* Current trace needs to be nop_trace before synchronize_rcu */
6576 	tr->current_trace = &nop_trace;
6577 
6578 	if (had_max_tr && !t->use_max_tr) {
6579 		/*
6580 		 * We need to make sure that the update_max_tr sees that
6581 		 * current_trace changed to nop_trace to keep it from
6582 		 * swapping the buffers after we resize it.
6583 		 * The update_max_tr is called from interrupts disabled
6584 		 * so a synchronized_sched() is sufficient.
6585 		 */
6586 		synchronize_rcu();
6587 		free_snapshot(tr);
6588 	}
6589 
6590 	if (t->use_max_tr && !tr->allocated_snapshot) {
6591 		ret = tracing_alloc_snapshot_instance(tr);
6592 		if (ret < 0)
6593 			goto out;
6594 	}
6595 #else
6596 	tr->current_trace = &nop_trace;
6597 #endif
6598 
6599 	if (t->init) {
6600 		ret = tracer_init(t, tr);
6601 		if (ret)
6602 			goto out;
6603 	}
6604 
6605 	tr->current_trace = t;
6606 	tr->current_trace->enabled++;
6607 	trace_branch_enable(tr);
6608  out:
6609 	mutex_unlock(&trace_types_lock);
6610 
6611 	return ret;
6612 }
6613 
6614 static ssize_t
6615 tracing_set_trace_write(struct file *filp, const char __user *ubuf,
6616 			size_t cnt, loff_t *ppos)
6617 {
6618 	struct trace_array *tr = filp->private_data;
6619 	char buf[MAX_TRACER_SIZE+1];
6620 	char *name;
6621 	size_t ret;
6622 	int err;
6623 
6624 	ret = cnt;
6625 
6626 	if (cnt > MAX_TRACER_SIZE)
6627 		cnt = MAX_TRACER_SIZE;
6628 
6629 	if (copy_from_user(buf, ubuf, cnt))
6630 		return -EFAULT;
6631 
6632 	buf[cnt] = 0;
6633 
6634 	name = strim(buf);
6635 
6636 	err = tracing_set_tracer(tr, name);
6637 	if (err)
6638 		return err;
6639 
6640 	*ppos += ret;
6641 
6642 	return ret;
6643 }
6644 
6645 static ssize_t
6646 tracing_nsecs_read(unsigned long *ptr, char __user *ubuf,
6647 		   size_t cnt, loff_t *ppos)
6648 {
6649 	char buf[64];
6650 	int r;
6651 
6652 	r = snprintf(buf, sizeof(buf), "%ld\n",
6653 		     *ptr == (unsigned long)-1 ? -1 : nsecs_to_usecs(*ptr));
6654 	if (r > sizeof(buf))
6655 		r = sizeof(buf);
6656 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6657 }
6658 
6659 static ssize_t
6660 tracing_nsecs_write(unsigned long *ptr, const char __user *ubuf,
6661 		    size_t cnt, loff_t *ppos)
6662 {
6663 	unsigned long val;
6664 	int ret;
6665 
6666 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
6667 	if (ret)
6668 		return ret;
6669 
6670 	*ptr = val * 1000;
6671 
6672 	return cnt;
6673 }
6674 
6675 static ssize_t
6676 tracing_thresh_read(struct file *filp, char __user *ubuf,
6677 		    size_t cnt, loff_t *ppos)
6678 {
6679 	return tracing_nsecs_read(&tracing_thresh, ubuf, cnt, ppos);
6680 }
6681 
6682 static ssize_t
6683 tracing_thresh_write(struct file *filp, const char __user *ubuf,
6684 		     size_t cnt, loff_t *ppos)
6685 {
6686 	struct trace_array *tr = filp->private_data;
6687 	int ret;
6688 
6689 	mutex_lock(&trace_types_lock);
6690 	ret = tracing_nsecs_write(&tracing_thresh, ubuf, cnt, ppos);
6691 	if (ret < 0)
6692 		goto out;
6693 
6694 	if (tr->current_trace->update_thresh) {
6695 		ret = tr->current_trace->update_thresh(tr);
6696 		if (ret < 0)
6697 			goto out;
6698 	}
6699 
6700 	ret = cnt;
6701 out:
6702 	mutex_unlock(&trace_types_lock);
6703 
6704 	return ret;
6705 }
6706 
6707 #ifdef CONFIG_TRACER_MAX_TRACE
6708 
6709 static ssize_t
6710 tracing_max_lat_read(struct file *filp, char __user *ubuf,
6711 		     size_t cnt, loff_t *ppos)
6712 {
6713 	struct trace_array *tr = filp->private_data;
6714 
6715 	return tracing_nsecs_read(&tr->max_latency, ubuf, cnt, ppos);
6716 }
6717 
6718 static ssize_t
6719 tracing_max_lat_write(struct file *filp, const char __user *ubuf,
6720 		      size_t cnt, loff_t *ppos)
6721 {
6722 	struct trace_array *tr = filp->private_data;
6723 
6724 	return tracing_nsecs_write(&tr->max_latency, ubuf, cnt, ppos);
6725 }
6726 
6727 #endif
6728 
6729 static int open_pipe_on_cpu(struct trace_array *tr, int cpu)
6730 {
6731 	if (cpu == RING_BUFFER_ALL_CPUS) {
6732 		if (cpumask_empty(tr->pipe_cpumask)) {
6733 			cpumask_setall(tr->pipe_cpumask);
6734 			return 0;
6735 		}
6736 	} else if (!cpumask_test_cpu(cpu, tr->pipe_cpumask)) {
6737 		cpumask_set_cpu(cpu, tr->pipe_cpumask);
6738 		return 0;
6739 	}
6740 	return -EBUSY;
6741 }
6742 
6743 static void close_pipe_on_cpu(struct trace_array *tr, int cpu)
6744 {
6745 	if (cpu == RING_BUFFER_ALL_CPUS) {
6746 		WARN_ON(!cpumask_full(tr->pipe_cpumask));
6747 		cpumask_clear(tr->pipe_cpumask);
6748 	} else {
6749 		WARN_ON(!cpumask_test_cpu(cpu, tr->pipe_cpumask));
6750 		cpumask_clear_cpu(cpu, tr->pipe_cpumask);
6751 	}
6752 }
6753 
6754 static int tracing_open_pipe(struct inode *inode, struct file *filp)
6755 {
6756 	struct trace_array *tr = inode->i_private;
6757 	struct trace_iterator *iter;
6758 	int cpu;
6759 	int ret;
6760 
6761 	ret = tracing_check_open_get_tr(tr);
6762 	if (ret)
6763 		return ret;
6764 
6765 	mutex_lock(&trace_types_lock);
6766 	cpu = tracing_get_cpu(inode);
6767 	ret = open_pipe_on_cpu(tr, cpu);
6768 	if (ret)
6769 		goto fail_pipe_on_cpu;
6770 
6771 	/* create a buffer to store the information to pass to userspace */
6772 	iter = kzalloc(sizeof(*iter), GFP_KERNEL);
6773 	if (!iter) {
6774 		ret = -ENOMEM;
6775 		goto fail_alloc_iter;
6776 	}
6777 
6778 	trace_seq_init(&iter->seq);
6779 	iter->trace = tr->current_trace;
6780 
6781 	if (!alloc_cpumask_var(&iter->started, GFP_KERNEL)) {
6782 		ret = -ENOMEM;
6783 		goto fail;
6784 	}
6785 
6786 	/* trace pipe does not show start of buffer */
6787 	cpumask_setall(iter->started);
6788 
6789 	if (tr->trace_flags & TRACE_ITER_LATENCY_FMT)
6790 		iter->iter_flags |= TRACE_FILE_LAT_FMT;
6791 
6792 	/* Output in nanoseconds only if we are using a clock in nanoseconds. */
6793 	if (trace_clocks[tr->clock_id].in_ns)
6794 		iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
6795 
6796 	iter->tr = tr;
6797 	iter->array_buffer = &tr->array_buffer;
6798 	iter->cpu_file = cpu;
6799 	mutex_init(&iter->mutex);
6800 	filp->private_data = iter;
6801 
6802 	if (iter->trace->pipe_open)
6803 		iter->trace->pipe_open(iter);
6804 
6805 	nonseekable_open(inode, filp);
6806 
6807 	tr->trace_ref++;
6808 
6809 	mutex_unlock(&trace_types_lock);
6810 	return ret;
6811 
6812 fail:
6813 	kfree(iter);
6814 fail_alloc_iter:
6815 	close_pipe_on_cpu(tr, cpu);
6816 fail_pipe_on_cpu:
6817 	__trace_array_put(tr);
6818 	mutex_unlock(&trace_types_lock);
6819 	return ret;
6820 }
6821 
6822 static int tracing_release_pipe(struct inode *inode, struct file *file)
6823 {
6824 	struct trace_iterator *iter = file->private_data;
6825 	struct trace_array *tr = inode->i_private;
6826 
6827 	mutex_lock(&trace_types_lock);
6828 
6829 	tr->trace_ref--;
6830 
6831 	if (iter->trace->pipe_close)
6832 		iter->trace->pipe_close(iter);
6833 	close_pipe_on_cpu(tr, iter->cpu_file);
6834 	mutex_unlock(&trace_types_lock);
6835 
6836 	free_trace_iter_content(iter);
6837 	kfree(iter);
6838 
6839 	trace_array_put(tr);
6840 
6841 	return 0;
6842 }
6843 
6844 static __poll_t
6845 trace_poll(struct trace_iterator *iter, struct file *filp, poll_table *poll_table)
6846 {
6847 	struct trace_array *tr = iter->tr;
6848 
6849 	/* Iterators are static, they should be filled or empty */
6850 	if (trace_buffer_iter(iter, iter->cpu_file))
6851 		return EPOLLIN | EPOLLRDNORM;
6852 
6853 	if (tr->trace_flags & TRACE_ITER_BLOCK)
6854 		/*
6855 		 * Always select as readable when in blocking mode
6856 		 */
6857 		return EPOLLIN | EPOLLRDNORM;
6858 	else
6859 		return ring_buffer_poll_wait(iter->array_buffer->buffer, iter->cpu_file,
6860 					     filp, poll_table, iter->tr->buffer_percent);
6861 }
6862 
6863 static __poll_t
6864 tracing_poll_pipe(struct file *filp, poll_table *poll_table)
6865 {
6866 	struct trace_iterator *iter = filp->private_data;
6867 
6868 	return trace_poll(iter, filp, poll_table);
6869 }
6870 
6871 /* Must be called with iter->mutex held. */
6872 static int tracing_wait_pipe(struct file *filp)
6873 {
6874 	struct trace_iterator *iter = filp->private_data;
6875 	int ret;
6876 
6877 	while (trace_empty(iter)) {
6878 
6879 		if ((filp->f_flags & O_NONBLOCK)) {
6880 			return -EAGAIN;
6881 		}
6882 
6883 		/*
6884 		 * We block until we read something and tracing is disabled.
6885 		 * We still block if tracing is disabled, but we have never
6886 		 * read anything. This allows a user to cat this file, and
6887 		 * then enable tracing. But after we have read something,
6888 		 * we give an EOF when tracing is again disabled.
6889 		 *
6890 		 * iter->pos will be 0 if we haven't read anything.
6891 		 */
6892 		if (!tracer_tracing_is_on(iter->tr) && iter->pos)
6893 			break;
6894 
6895 		mutex_unlock(&iter->mutex);
6896 
6897 		ret = wait_on_pipe(iter, 0);
6898 
6899 		mutex_lock(&iter->mutex);
6900 
6901 		if (ret)
6902 			return ret;
6903 	}
6904 
6905 	return 1;
6906 }
6907 
6908 /*
6909  * Consumer reader.
6910  */
6911 static ssize_t
6912 tracing_read_pipe(struct file *filp, char __user *ubuf,
6913 		  size_t cnt, loff_t *ppos)
6914 {
6915 	struct trace_iterator *iter = filp->private_data;
6916 	ssize_t sret;
6917 
6918 	/*
6919 	 * Avoid more than one consumer on a single file descriptor
6920 	 * This is just a matter of traces coherency, the ring buffer itself
6921 	 * is protected.
6922 	 */
6923 	mutex_lock(&iter->mutex);
6924 
6925 	/* return any leftover data */
6926 	sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
6927 	if (sret != -EBUSY)
6928 		goto out;
6929 
6930 	trace_seq_init(&iter->seq);
6931 
6932 	if (iter->trace->read) {
6933 		sret = iter->trace->read(iter, filp, ubuf, cnt, ppos);
6934 		if (sret)
6935 			goto out;
6936 	}
6937 
6938 waitagain:
6939 	sret = tracing_wait_pipe(filp);
6940 	if (sret <= 0)
6941 		goto out;
6942 
6943 	/* stop when tracing is finished */
6944 	if (trace_empty(iter)) {
6945 		sret = 0;
6946 		goto out;
6947 	}
6948 
6949 	if (cnt >= PAGE_SIZE)
6950 		cnt = PAGE_SIZE - 1;
6951 
6952 	/* reset all but tr, trace, and overruns */
6953 	trace_iterator_reset(iter);
6954 	cpumask_clear(iter->started);
6955 	trace_seq_init(&iter->seq);
6956 
6957 	trace_event_read_lock();
6958 	trace_access_lock(iter->cpu_file);
6959 	while (trace_find_next_entry_inc(iter) != NULL) {
6960 		enum print_line_t ret;
6961 		int save_len = iter->seq.seq.len;
6962 
6963 		ret = print_trace_line(iter);
6964 		if (ret == TRACE_TYPE_PARTIAL_LINE) {
6965 			/*
6966 			 * If one print_trace_line() fills entire trace_seq in one shot,
6967 			 * trace_seq_to_user() will returns -EBUSY because save_len == 0,
6968 			 * In this case, we need to consume it, otherwise, loop will peek
6969 			 * this event next time, resulting in an infinite loop.
6970 			 */
6971 			if (save_len == 0) {
6972 				iter->seq.full = 0;
6973 				trace_seq_puts(&iter->seq, "[LINE TOO BIG]\n");
6974 				trace_consume(iter);
6975 				break;
6976 			}
6977 
6978 			/* In other cases, don't print partial lines */
6979 			iter->seq.seq.len = save_len;
6980 			break;
6981 		}
6982 		if (ret != TRACE_TYPE_NO_CONSUME)
6983 			trace_consume(iter);
6984 
6985 		if (trace_seq_used(&iter->seq) >= cnt)
6986 			break;
6987 
6988 		/*
6989 		 * Setting the full flag means we reached the trace_seq buffer
6990 		 * size and we should leave by partial output condition above.
6991 		 * One of the trace_seq_* functions is not used properly.
6992 		 */
6993 		WARN_ONCE(iter->seq.full, "full flag set for trace type %d",
6994 			  iter->ent->type);
6995 	}
6996 	trace_access_unlock(iter->cpu_file);
6997 	trace_event_read_unlock();
6998 
6999 	/* Now copy what we have to the user */
7000 	sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
7001 	if (iter->seq.readpos >= trace_seq_used(&iter->seq))
7002 		trace_seq_init(&iter->seq);
7003 
7004 	/*
7005 	 * If there was nothing to send to user, in spite of consuming trace
7006 	 * entries, go back to wait for more entries.
7007 	 */
7008 	if (sret == -EBUSY)
7009 		goto waitagain;
7010 
7011 out:
7012 	mutex_unlock(&iter->mutex);
7013 
7014 	return sret;
7015 }
7016 
7017 static void tracing_spd_release_pipe(struct splice_pipe_desc *spd,
7018 				     unsigned int idx)
7019 {
7020 	__free_page(spd->pages[idx]);
7021 }
7022 
7023 static size_t
7024 tracing_fill_pipe_page(size_t rem, struct trace_iterator *iter)
7025 {
7026 	size_t count;
7027 	int save_len;
7028 	int ret;
7029 
7030 	/* Seq buffer is page-sized, exactly what we need. */
7031 	for (;;) {
7032 		save_len = iter->seq.seq.len;
7033 		ret = print_trace_line(iter);
7034 
7035 		if (trace_seq_has_overflowed(&iter->seq)) {
7036 			iter->seq.seq.len = save_len;
7037 			break;
7038 		}
7039 
7040 		/*
7041 		 * This should not be hit, because it should only
7042 		 * be set if the iter->seq overflowed. But check it
7043 		 * anyway to be safe.
7044 		 */
7045 		if (ret == TRACE_TYPE_PARTIAL_LINE) {
7046 			iter->seq.seq.len = save_len;
7047 			break;
7048 		}
7049 
7050 		count = trace_seq_used(&iter->seq) - save_len;
7051 		if (rem < count) {
7052 			rem = 0;
7053 			iter->seq.seq.len = save_len;
7054 			break;
7055 		}
7056 
7057 		if (ret != TRACE_TYPE_NO_CONSUME)
7058 			trace_consume(iter);
7059 		rem -= count;
7060 		if (!trace_find_next_entry_inc(iter))	{
7061 			rem = 0;
7062 			iter->ent = NULL;
7063 			break;
7064 		}
7065 	}
7066 
7067 	return rem;
7068 }
7069 
7070 static ssize_t tracing_splice_read_pipe(struct file *filp,
7071 					loff_t *ppos,
7072 					struct pipe_inode_info *pipe,
7073 					size_t len,
7074 					unsigned int flags)
7075 {
7076 	struct page *pages_def[PIPE_DEF_BUFFERS];
7077 	struct partial_page partial_def[PIPE_DEF_BUFFERS];
7078 	struct trace_iterator *iter = filp->private_data;
7079 	struct splice_pipe_desc spd = {
7080 		.pages		= pages_def,
7081 		.partial	= partial_def,
7082 		.nr_pages	= 0, /* This gets updated below. */
7083 		.nr_pages_max	= PIPE_DEF_BUFFERS,
7084 		.ops		= &default_pipe_buf_ops,
7085 		.spd_release	= tracing_spd_release_pipe,
7086 	};
7087 	ssize_t ret;
7088 	size_t rem;
7089 	unsigned int i;
7090 
7091 	if (splice_grow_spd(pipe, &spd))
7092 		return -ENOMEM;
7093 
7094 	mutex_lock(&iter->mutex);
7095 
7096 	if (iter->trace->splice_read) {
7097 		ret = iter->trace->splice_read(iter, filp,
7098 					       ppos, pipe, len, flags);
7099 		if (ret)
7100 			goto out_err;
7101 	}
7102 
7103 	ret = tracing_wait_pipe(filp);
7104 	if (ret <= 0)
7105 		goto out_err;
7106 
7107 	if (!iter->ent && !trace_find_next_entry_inc(iter)) {
7108 		ret = -EFAULT;
7109 		goto out_err;
7110 	}
7111 
7112 	trace_event_read_lock();
7113 	trace_access_lock(iter->cpu_file);
7114 
7115 	/* Fill as many pages as possible. */
7116 	for (i = 0, rem = len; i < spd.nr_pages_max && rem; i++) {
7117 		spd.pages[i] = alloc_page(GFP_KERNEL);
7118 		if (!spd.pages[i])
7119 			break;
7120 
7121 		rem = tracing_fill_pipe_page(rem, iter);
7122 
7123 		/* Copy the data into the page, so we can start over. */
7124 		ret = trace_seq_to_buffer(&iter->seq,
7125 					  page_address(spd.pages[i]),
7126 					  trace_seq_used(&iter->seq));
7127 		if (ret < 0) {
7128 			__free_page(spd.pages[i]);
7129 			break;
7130 		}
7131 		spd.partial[i].offset = 0;
7132 		spd.partial[i].len = trace_seq_used(&iter->seq);
7133 
7134 		trace_seq_init(&iter->seq);
7135 	}
7136 
7137 	trace_access_unlock(iter->cpu_file);
7138 	trace_event_read_unlock();
7139 	mutex_unlock(&iter->mutex);
7140 
7141 	spd.nr_pages = i;
7142 
7143 	if (i)
7144 		ret = splice_to_pipe(pipe, &spd);
7145 	else
7146 		ret = 0;
7147 out:
7148 	splice_shrink_spd(&spd);
7149 	return ret;
7150 
7151 out_err:
7152 	mutex_unlock(&iter->mutex);
7153 	goto out;
7154 }
7155 
7156 static ssize_t
7157 tracing_entries_read(struct file *filp, char __user *ubuf,
7158 		     size_t cnt, loff_t *ppos)
7159 {
7160 	struct inode *inode = file_inode(filp);
7161 	struct trace_array *tr = inode->i_private;
7162 	int cpu = tracing_get_cpu(inode);
7163 	char buf[64];
7164 	int r = 0;
7165 	ssize_t ret;
7166 
7167 	mutex_lock(&trace_types_lock);
7168 
7169 	if (cpu == RING_BUFFER_ALL_CPUS) {
7170 		int cpu, buf_size_same;
7171 		unsigned long size;
7172 
7173 		size = 0;
7174 		buf_size_same = 1;
7175 		/* check if all cpu sizes are same */
7176 		for_each_tracing_cpu(cpu) {
7177 			/* fill in the size from first enabled cpu */
7178 			if (size == 0)
7179 				size = per_cpu_ptr(tr->array_buffer.data, cpu)->entries;
7180 			if (size != per_cpu_ptr(tr->array_buffer.data, cpu)->entries) {
7181 				buf_size_same = 0;
7182 				break;
7183 			}
7184 		}
7185 
7186 		if (buf_size_same) {
7187 			if (!tr->ring_buffer_expanded)
7188 				r = sprintf(buf, "%lu (expanded: %lu)\n",
7189 					    size >> 10,
7190 					    trace_buf_size >> 10);
7191 			else
7192 				r = sprintf(buf, "%lu\n", size >> 10);
7193 		} else
7194 			r = sprintf(buf, "X\n");
7195 	} else
7196 		r = sprintf(buf, "%lu\n", per_cpu_ptr(tr->array_buffer.data, cpu)->entries >> 10);
7197 
7198 	mutex_unlock(&trace_types_lock);
7199 
7200 	ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
7201 	return ret;
7202 }
7203 
7204 static ssize_t
7205 tracing_entries_write(struct file *filp, const char __user *ubuf,
7206 		      size_t cnt, loff_t *ppos)
7207 {
7208 	struct inode *inode = file_inode(filp);
7209 	struct trace_array *tr = inode->i_private;
7210 	unsigned long val;
7211 	int ret;
7212 
7213 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
7214 	if (ret)
7215 		return ret;
7216 
7217 	/* must have at least 1 entry */
7218 	if (!val)
7219 		return -EINVAL;
7220 
7221 	/* value is in KB */
7222 	val <<= 10;
7223 	ret = tracing_resize_ring_buffer(tr, val, tracing_get_cpu(inode));
7224 	if (ret < 0)
7225 		return ret;
7226 
7227 	*ppos += cnt;
7228 
7229 	return cnt;
7230 }
7231 
7232 static ssize_t
7233 tracing_total_entries_read(struct file *filp, char __user *ubuf,
7234 				size_t cnt, loff_t *ppos)
7235 {
7236 	struct trace_array *tr = filp->private_data;
7237 	char buf[64];
7238 	int r, cpu;
7239 	unsigned long size = 0, expanded_size = 0;
7240 
7241 	mutex_lock(&trace_types_lock);
7242 	for_each_tracing_cpu(cpu) {
7243 		size += per_cpu_ptr(tr->array_buffer.data, cpu)->entries >> 10;
7244 		if (!tr->ring_buffer_expanded)
7245 			expanded_size += trace_buf_size >> 10;
7246 	}
7247 	if (tr->ring_buffer_expanded)
7248 		r = sprintf(buf, "%lu\n", size);
7249 	else
7250 		r = sprintf(buf, "%lu (expanded: %lu)\n", size, expanded_size);
7251 	mutex_unlock(&trace_types_lock);
7252 
7253 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
7254 }
7255 
7256 static ssize_t
7257 tracing_free_buffer_write(struct file *filp, const char __user *ubuf,
7258 			  size_t cnt, loff_t *ppos)
7259 {
7260 	/*
7261 	 * There is no need to read what the user has written, this function
7262 	 * is just to make sure that there is no error when "echo" is used
7263 	 */
7264 
7265 	*ppos += cnt;
7266 
7267 	return cnt;
7268 }
7269 
7270 static int
7271 tracing_free_buffer_release(struct inode *inode, struct file *filp)
7272 {
7273 	struct trace_array *tr = inode->i_private;
7274 
7275 	/* disable tracing ? */
7276 	if (tr->trace_flags & TRACE_ITER_STOP_ON_FREE)
7277 		tracer_tracing_off(tr);
7278 	/* resize the ring buffer to 0 */
7279 	tracing_resize_ring_buffer(tr, 0, RING_BUFFER_ALL_CPUS);
7280 
7281 	trace_array_put(tr);
7282 
7283 	return 0;
7284 }
7285 
7286 static ssize_t
7287 tracing_mark_write(struct file *filp, const char __user *ubuf,
7288 					size_t cnt, loff_t *fpos)
7289 {
7290 	struct trace_array *tr = filp->private_data;
7291 	struct ring_buffer_event *event;
7292 	enum event_trigger_type tt = ETT_NONE;
7293 	struct trace_buffer *buffer;
7294 	struct print_entry *entry;
7295 	ssize_t written;
7296 	int size;
7297 	int len;
7298 
7299 /* Used in tracing_mark_raw_write() as well */
7300 #define FAULTED_STR "<faulted>"
7301 #define FAULTED_SIZE (sizeof(FAULTED_STR) - 1) /* '\0' is already accounted for */
7302 
7303 	if (tracing_disabled)
7304 		return -EINVAL;
7305 
7306 	if (!(tr->trace_flags & TRACE_ITER_MARKERS))
7307 		return -EINVAL;
7308 
7309 	if (cnt > TRACE_BUF_SIZE)
7310 		cnt = TRACE_BUF_SIZE;
7311 
7312 	BUILD_BUG_ON(TRACE_BUF_SIZE >= PAGE_SIZE);
7313 
7314 	size = sizeof(*entry) + cnt + 2; /* add '\0' and possible '\n' */
7315 
7316 	/* If less than "<faulted>", then make sure we can still add that */
7317 	if (cnt < FAULTED_SIZE)
7318 		size += FAULTED_SIZE - cnt;
7319 
7320 	buffer = tr->array_buffer.buffer;
7321 	event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
7322 					    tracing_gen_ctx());
7323 	if (unlikely(!event))
7324 		/* Ring buffer disabled, return as if not open for write */
7325 		return -EBADF;
7326 
7327 	entry = ring_buffer_event_data(event);
7328 	entry->ip = _THIS_IP_;
7329 
7330 	len = __copy_from_user_inatomic(&entry->buf, ubuf, cnt);
7331 	if (len) {
7332 		memcpy(&entry->buf, FAULTED_STR, FAULTED_SIZE);
7333 		cnt = FAULTED_SIZE;
7334 		written = -EFAULT;
7335 	} else
7336 		written = cnt;
7337 
7338 	if (tr->trace_marker_file && !list_empty(&tr->trace_marker_file->triggers)) {
7339 		/* do not add \n before testing triggers, but add \0 */
7340 		entry->buf[cnt] = '\0';
7341 		tt = event_triggers_call(tr->trace_marker_file, buffer, entry, event);
7342 	}
7343 
7344 	if (entry->buf[cnt - 1] != '\n') {
7345 		entry->buf[cnt] = '\n';
7346 		entry->buf[cnt + 1] = '\0';
7347 	} else
7348 		entry->buf[cnt] = '\0';
7349 
7350 	if (static_branch_unlikely(&trace_marker_exports_enabled))
7351 		ftrace_exports(event, TRACE_EXPORT_MARKER);
7352 	__buffer_unlock_commit(buffer, event);
7353 
7354 	if (tt)
7355 		event_triggers_post_call(tr->trace_marker_file, tt);
7356 
7357 	return written;
7358 }
7359 
7360 /* Limit it for now to 3K (including tag) */
7361 #define RAW_DATA_MAX_SIZE (1024*3)
7362 
7363 static ssize_t
7364 tracing_mark_raw_write(struct file *filp, const char __user *ubuf,
7365 					size_t cnt, loff_t *fpos)
7366 {
7367 	struct trace_array *tr = filp->private_data;
7368 	struct ring_buffer_event *event;
7369 	struct trace_buffer *buffer;
7370 	struct raw_data_entry *entry;
7371 	ssize_t written;
7372 	int size;
7373 	int len;
7374 
7375 #define FAULT_SIZE_ID (FAULTED_SIZE + sizeof(int))
7376 
7377 	if (tracing_disabled)
7378 		return -EINVAL;
7379 
7380 	if (!(tr->trace_flags & TRACE_ITER_MARKERS))
7381 		return -EINVAL;
7382 
7383 	/* The marker must at least have a tag id */
7384 	if (cnt < sizeof(unsigned int) || cnt > RAW_DATA_MAX_SIZE)
7385 		return -EINVAL;
7386 
7387 	if (cnt > TRACE_BUF_SIZE)
7388 		cnt = TRACE_BUF_SIZE;
7389 
7390 	BUILD_BUG_ON(TRACE_BUF_SIZE >= PAGE_SIZE);
7391 
7392 	size = sizeof(*entry) + cnt;
7393 	if (cnt < FAULT_SIZE_ID)
7394 		size += FAULT_SIZE_ID - cnt;
7395 
7396 	buffer = tr->array_buffer.buffer;
7397 	event = __trace_buffer_lock_reserve(buffer, TRACE_RAW_DATA, size,
7398 					    tracing_gen_ctx());
7399 	if (!event)
7400 		/* Ring buffer disabled, return as if not open for write */
7401 		return -EBADF;
7402 
7403 	entry = ring_buffer_event_data(event);
7404 
7405 	len = __copy_from_user_inatomic(&entry->id, ubuf, cnt);
7406 	if (len) {
7407 		entry->id = -1;
7408 		memcpy(&entry->buf, FAULTED_STR, FAULTED_SIZE);
7409 		written = -EFAULT;
7410 	} else
7411 		written = cnt;
7412 
7413 	__buffer_unlock_commit(buffer, event);
7414 
7415 	return written;
7416 }
7417 
7418 static int tracing_clock_show(struct seq_file *m, void *v)
7419 {
7420 	struct trace_array *tr = m->private;
7421 	int i;
7422 
7423 	for (i = 0; i < ARRAY_SIZE(trace_clocks); i++)
7424 		seq_printf(m,
7425 			"%s%s%s%s", i ? " " : "",
7426 			i == tr->clock_id ? "[" : "", trace_clocks[i].name,
7427 			i == tr->clock_id ? "]" : "");
7428 	seq_putc(m, '\n');
7429 
7430 	return 0;
7431 }
7432 
7433 int tracing_set_clock(struct trace_array *tr, const char *clockstr)
7434 {
7435 	int i;
7436 
7437 	for (i = 0; i < ARRAY_SIZE(trace_clocks); i++) {
7438 		if (strcmp(trace_clocks[i].name, clockstr) == 0)
7439 			break;
7440 	}
7441 	if (i == ARRAY_SIZE(trace_clocks))
7442 		return -EINVAL;
7443 
7444 	mutex_lock(&trace_types_lock);
7445 
7446 	tr->clock_id = i;
7447 
7448 	ring_buffer_set_clock(tr->array_buffer.buffer, trace_clocks[i].func);
7449 
7450 	/*
7451 	 * New clock may not be consistent with the previous clock.
7452 	 * Reset the buffer so that it doesn't have incomparable timestamps.
7453 	 */
7454 	tracing_reset_online_cpus(&tr->array_buffer);
7455 
7456 #ifdef CONFIG_TRACER_MAX_TRACE
7457 	if (tr->max_buffer.buffer)
7458 		ring_buffer_set_clock(tr->max_buffer.buffer, trace_clocks[i].func);
7459 	tracing_reset_online_cpus(&tr->max_buffer);
7460 #endif
7461 
7462 	mutex_unlock(&trace_types_lock);
7463 
7464 	return 0;
7465 }
7466 
7467 static ssize_t tracing_clock_write(struct file *filp, const char __user *ubuf,
7468 				   size_t cnt, loff_t *fpos)
7469 {
7470 	struct seq_file *m = filp->private_data;
7471 	struct trace_array *tr = m->private;
7472 	char buf[64];
7473 	const char *clockstr;
7474 	int ret;
7475 
7476 	if (cnt >= sizeof(buf))
7477 		return -EINVAL;
7478 
7479 	if (copy_from_user(buf, ubuf, cnt))
7480 		return -EFAULT;
7481 
7482 	buf[cnt] = 0;
7483 
7484 	clockstr = strstrip(buf);
7485 
7486 	ret = tracing_set_clock(tr, clockstr);
7487 	if (ret)
7488 		return ret;
7489 
7490 	*fpos += cnt;
7491 
7492 	return cnt;
7493 }
7494 
7495 static int tracing_clock_open(struct inode *inode, struct file *file)
7496 {
7497 	struct trace_array *tr = inode->i_private;
7498 	int ret;
7499 
7500 	ret = tracing_check_open_get_tr(tr);
7501 	if (ret)
7502 		return ret;
7503 
7504 	ret = single_open(file, tracing_clock_show, inode->i_private);
7505 	if (ret < 0)
7506 		trace_array_put(tr);
7507 
7508 	return ret;
7509 }
7510 
7511 static int tracing_time_stamp_mode_show(struct seq_file *m, void *v)
7512 {
7513 	struct trace_array *tr = m->private;
7514 
7515 	mutex_lock(&trace_types_lock);
7516 
7517 	if (ring_buffer_time_stamp_abs(tr->array_buffer.buffer))
7518 		seq_puts(m, "delta [absolute]\n");
7519 	else
7520 		seq_puts(m, "[delta] absolute\n");
7521 
7522 	mutex_unlock(&trace_types_lock);
7523 
7524 	return 0;
7525 }
7526 
7527 static int tracing_time_stamp_mode_open(struct inode *inode, struct file *file)
7528 {
7529 	struct trace_array *tr = inode->i_private;
7530 	int ret;
7531 
7532 	ret = tracing_check_open_get_tr(tr);
7533 	if (ret)
7534 		return ret;
7535 
7536 	ret = single_open(file, tracing_time_stamp_mode_show, inode->i_private);
7537 	if (ret < 0)
7538 		trace_array_put(tr);
7539 
7540 	return ret;
7541 }
7542 
7543 u64 tracing_event_time_stamp(struct trace_buffer *buffer, struct ring_buffer_event *rbe)
7544 {
7545 	if (rbe == this_cpu_read(trace_buffered_event))
7546 		return ring_buffer_time_stamp(buffer);
7547 
7548 	return ring_buffer_event_time_stamp(buffer, rbe);
7549 }
7550 
7551 /*
7552  * Set or disable using the per CPU trace_buffer_event when possible.
7553  */
7554 int tracing_set_filter_buffering(struct trace_array *tr, bool set)
7555 {
7556 	int ret = 0;
7557 
7558 	mutex_lock(&trace_types_lock);
7559 
7560 	if (set && tr->no_filter_buffering_ref++)
7561 		goto out;
7562 
7563 	if (!set) {
7564 		if (WARN_ON_ONCE(!tr->no_filter_buffering_ref)) {
7565 			ret = -EINVAL;
7566 			goto out;
7567 		}
7568 
7569 		--tr->no_filter_buffering_ref;
7570 	}
7571  out:
7572 	mutex_unlock(&trace_types_lock);
7573 
7574 	return ret;
7575 }
7576 
7577 struct ftrace_buffer_info {
7578 	struct trace_iterator	iter;
7579 	void			*spare;
7580 	unsigned int		spare_cpu;
7581 	unsigned int		read;
7582 };
7583 
7584 #ifdef CONFIG_TRACER_SNAPSHOT
7585 static int tracing_snapshot_open(struct inode *inode, struct file *file)
7586 {
7587 	struct trace_array *tr = inode->i_private;
7588 	struct trace_iterator *iter;
7589 	struct seq_file *m;
7590 	int ret;
7591 
7592 	ret = tracing_check_open_get_tr(tr);
7593 	if (ret)
7594 		return ret;
7595 
7596 	if (file->f_mode & FMODE_READ) {
7597 		iter = __tracing_open(inode, file, true);
7598 		if (IS_ERR(iter))
7599 			ret = PTR_ERR(iter);
7600 	} else {
7601 		/* Writes still need the seq_file to hold the private data */
7602 		ret = -ENOMEM;
7603 		m = kzalloc(sizeof(*m), GFP_KERNEL);
7604 		if (!m)
7605 			goto out;
7606 		iter = kzalloc(sizeof(*iter), GFP_KERNEL);
7607 		if (!iter) {
7608 			kfree(m);
7609 			goto out;
7610 		}
7611 		ret = 0;
7612 
7613 		iter->tr = tr;
7614 		iter->array_buffer = &tr->max_buffer;
7615 		iter->cpu_file = tracing_get_cpu(inode);
7616 		m->private = iter;
7617 		file->private_data = m;
7618 	}
7619 out:
7620 	if (ret < 0)
7621 		trace_array_put(tr);
7622 
7623 	return ret;
7624 }
7625 
7626 static void tracing_swap_cpu_buffer(void *tr)
7627 {
7628 	update_max_tr_single((struct trace_array *)tr, current, smp_processor_id());
7629 }
7630 
7631 static ssize_t
7632 tracing_snapshot_write(struct file *filp, const char __user *ubuf, size_t cnt,
7633 		       loff_t *ppos)
7634 {
7635 	struct seq_file *m = filp->private_data;
7636 	struct trace_iterator *iter = m->private;
7637 	struct trace_array *tr = iter->tr;
7638 	unsigned long val;
7639 	int ret;
7640 
7641 	ret = tracing_update_buffers(tr);
7642 	if (ret < 0)
7643 		return ret;
7644 
7645 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
7646 	if (ret)
7647 		return ret;
7648 
7649 	mutex_lock(&trace_types_lock);
7650 
7651 	if (tr->current_trace->use_max_tr) {
7652 		ret = -EBUSY;
7653 		goto out;
7654 	}
7655 
7656 	local_irq_disable();
7657 	arch_spin_lock(&tr->max_lock);
7658 	if (tr->cond_snapshot)
7659 		ret = -EBUSY;
7660 	arch_spin_unlock(&tr->max_lock);
7661 	local_irq_enable();
7662 	if (ret)
7663 		goto out;
7664 
7665 	switch (val) {
7666 	case 0:
7667 		if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
7668 			ret = -EINVAL;
7669 			break;
7670 		}
7671 		if (tr->allocated_snapshot)
7672 			free_snapshot(tr);
7673 		break;
7674 	case 1:
7675 /* Only allow per-cpu swap if the ring buffer supports it */
7676 #ifndef CONFIG_RING_BUFFER_ALLOW_SWAP
7677 		if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
7678 			ret = -EINVAL;
7679 			break;
7680 		}
7681 #endif
7682 		if (tr->allocated_snapshot)
7683 			ret = resize_buffer_duplicate_size(&tr->max_buffer,
7684 					&tr->array_buffer, iter->cpu_file);
7685 		else
7686 			ret = tracing_alloc_snapshot_instance(tr);
7687 		if (ret < 0)
7688 			break;
7689 		/* Now, we're going to swap */
7690 		if (iter->cpu_file == RING_BUFFER_ALL_CPUS) {
7691 			local_irq_disable();
7692 			update_max_tr(tr, current, smp_processor_id(), NULL);
7693 			local_irq_enable();
7694 		} else {
7695 			smp_call_function_single(iter->cpu_file, tracing_swap_cpu_buffer,
7696 						 (void *)tr, 1);
7697 		}
7698 		break;
7699 	default:
7700 		if (tr->allocated_snapshot) {
7701 			if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
7702 				tracing_reset_online_cpus(&tr->max_buffer);
7703 			else
7704 				tracing_reset_cpu(&tr->max_buffer, iter->cpu_file);
7705 		}
7706 		break;
7707 	}
7708 
7709 	if (ret >= 0) {
7710 		*ppos += cnt;
7711 		ret = cnt;
7712 	}
7713 out:
7714 	mutex_unlock(&trace_types_lock);
7715 	return ret;
7716 }
7717 
7718 static int tracing_snapshot_release(struct inode *inode, struct file *file)
7719 {
7720 	struct seq_file *m = file->private_data;
7721 	int ret;
7722 
7723 	ret = tracing_release(inode, file);
7724 
7725 	if (file->f_mode & FMODE_READ)
7726 		return ret;
7727 
7728 	/* If write only, the seq_file is just a stub */
7729 	if (m)
7730 		kfree(m->private);
7731 	kfree(m);
7732 
7733 	return 0;
7734 }
7735 
7736 static int tracing_buffers_open(struct inode *inode, struct file *filp);
7737 static ssize_t tracing_buffers_read(struct file *filp, char __user *ubuf,
7738 				    size_t count, loff_t *ppos);
7739 static int tracing_buffers_release(struct inode *inode, struct file *file);
7740 static ssize_t tracing_buffers_splice_read(struct file *file, loff_t *ppos,
7741 		   struct pipe_inode_info *pipe, size_t len, unsigned int flags);
7742 
7743 static int snapshot_raw_open(struct inode *inode, struct file *filp)
7744 {
7745 	struct ftrace_buffer_info *info;
7746 	int ret;
7747 
7748 	/* The following checks for tracefs lockdown */
7749 	ret = tracing_buffers_open(inode, filp);
7750 	if (ret < 0)
7751 		return ret;
7752 
7753 	info = filp->private_data;
7754 
7755 	if (info->iter.trace->use_max_tr) {
7756 		tracing_buffers_release(inode, filp);
7757 		return -EBUSY;
7758 	}
7759 
7760 	info->iter.snapshot = true;
7761 	info->iter.array_buffer = &info->iter.tr->max_buffer;
7762 
7763 	return ret;
7764 }
7765 
7766 #endif /* CONFIG_TRACER_SNAPSHOT */
7767 
7768 
7769 static const struct file_operations tracing_thresh_fops = {
7770 	.open		= tracing_open_generic,
7771 	.read		= tracing_thresh_read,
7772 	.write		= tracing_thresh_write,
7773 	.llseek		= generic_file_llseek,
7774 };
7775 
7776 #ifdef CONFIG_TRACER_MAX_TRACE
7777 static const struct file_operations tracing_max_lat_fops = {
7778 	.open		= tracing_open_generic_tr,
7779 	.read		= tracing_max_lat_read,
7780 	.write		= tracing_max_lat_write,
7781 	.llseek		= generic_file_llseek,
7782 	.release	= tracing_release_generic_tr,
7783 };
7784 #endif
7785 
7786 static const struct file_operations set_tracer_fops = {
7787 	.open		= tracing_open_generic_tr,
7788 	.read		= tracing_set_trace_read,
7789 	.write		= tracing_set_trace_write,
7790 	.llseek		= generic_file_llseek,
7791 	.release	= tracing_release_generic_tr,
7792 };
7793 
7794 static const struct file_operations tracing_pipe_fops = {
7795 	.open		= tracing_open_pipe,
7796 	.poll		= tracing_poll_pipe,
7797 	.read		= tracing_read_pipe,
7798 	.splice_read	= tracing_splice_read_pipe,
7799 	.release	= tracing_release_pipe,
7800 	.llseek		= no_llseek,
7801 };
7802 
7803 static const struct file_operations tracing_entries_fops = {
7804 	.open		= tracing_open_generic_tr,
7805 	.read		= tracing_entries_read,
7806 	.write		= tracing_entries_write,
7807 	.llseek		= generic_file_llseek,
7808 	.release	= tracing_release_generic_tr,
7809 };
7810 
7811 static const struct file_operations tracing_total_entries_fops = {
7812 	.open		= tracing_open_generic_tr,
7813 	.read		= tracing_total_entries_read,
7814 	.llseek		= generic_file_llseek,
7815 	.release	= tracing_release_generic_tr,
7816 };
7817 
7818 static const struct file_operations tracing_free_buffer_fops = {
7819 	.open		= tracing_open_generic_tr,
7820 	.write		= tracing_free_buffer_write,
7821 	.release	= tracing_free_buffer_release,
7822 };
7823 
7824 static const struct file_operations tracing_mark_fops = {
7825 	.open		= tracing_mark_open,
7826 	.write		= tracing_mark_write,
7827 	.release	= tracing_release_generic_tr,
7828 };
7829 
7830 static const struct file_operations tracing_mark_raw_fops = {
7831 	.open		= tracing_mark_open,
7832 	.write		= tracing_mark_raw_write,
7833 	.release	= tracing_release_generic_tr,
7834 };
7835 
7836 static const struct file_operations trace_clock_fops = {
7837 	.open		= tracing_clock_open,
7838 	.read		= seq_read,
7839 	.llseek		= seq_lseek,
7840 	.release	= tracing_single_release_tr,
7841 	.write		= tracing_clock_write,
7842 };
7843 
7844 static const struct file_operations trace_time_stamp_mode_fops = {
7845 	.open		= tracing_time_stamp_mode_open,
7846 	.read		= seq_read,
7847 	.llseek		= seq_lseek,
7848 	.release	= tracing_single_release_tr,
7849 };
7850 
7851 #ifdef CONFIG_TRACER_SNAPSHOT
7852 static const struct file_operations snapshot_fops = {
7853 	.open		= tracing_snapshot_open,
7854 	.read		= seq_read,
7855 	.write		= tracing_snapshot_write,
7856 	.llseek		= tracing_lseek,
7857 	.release	= tracing_snapshot_release,
7858 };
7859 
7860 static const struct file_operations snapshot_raw_fops = {
7861 	.open		= snapshot_raw_open,
7862 	.read		= tracing_buffers_read,
7863 	.release	= tracing_buffers_release,
7864 	.splice_read	= tracing_buffers_splice_read,
7865 	.llseek		= no_llseek,
7866 };
7867 
7868 #endif /* CONFIG_TRACER_SNAPSHOT */
7869 
7870 /*
7871  * trace_min_max_write - Write a u64 value to a trace_min_max_param struct
7872  * @filp: The active open file structure
7873  * @ubuf: The userspace provided buffer to read value into
7874  * @cnt: The maximum number of bytes to read
7875  * @ppos: The current "file" position
7876  *
7877  * This function implements the write interface for a struct trace_min_max_param.
7878  * The filp->private_data must point to a trace_min_max_param structure that
7879  * defines where to write the value, the min and the max acceptable values,
7880  * and a lock to protect the write.
7881  */
7882 static ssize_t
7883 trace_min_max_write(struct file *filp, const char __user *ubuf, size_t cnt, loff_t *ppos)
7884 {
7885 	struct trace_min_max_param *param = filp->private_data;
7886 	u64 val;
7887 	int err;
7888 
7889 	if (!param)
7890 		return -EFAULT;
7891 
7892 	err = kstrtoull_from_user(ubuf, cnt, 10, &val);
7893 	if (err)
7894 		return err;
7895 
7896 	if (param->lock)
7897 		mutex_lock(param->lock);
7898 
7899 	if (param->min && val < *param->min)
7900 		err = -EINVAL;
7901 
7902 	if (param->max && val > *param->max)
7903 		err = -EINVAL;
7904 
7905 	if (!err)
7906 		*param->val = val;
7907 
7908 	if (param->lock)
7909 		mutex_unlock(param->lock);
7910 
7911 	if (err)
7912 		return err;
7913 
7914 	return cnt;
7915 }
7916 
7917 /*
7918  * trace_min_max_read - Read a u64 value from a trace_min_max_param struct
7919  * @filp: The active open file structure
7920  * @ubuf: The userspace provided buffer to read value into
7921  * @cnt: The maximum number of bytes to read
7922  * @ppos: The current "file" position
7923  *
7924  * This function implements the read interface for a struct trace_min_max_param.
7925  * The filp->private_data must point to a trace_min_max_param struct with valid
7926  * data.
7927  */
7928 static ssize_t
7929 trace_min_max_read(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos)
7930 {
7931 	struct trace_min_max_param *param = filp->private_data;
7932 	char buf[U64_STR_SIZE];
7933 	int len;
7934 	u64 val;
7935 
7936 	if (!param)
7937 		return -EFAULT;
7938 
7939 	val = *param->val;
7940 
7941 	if (cnt > sizeof(buf))
7942 		cnt = sizeof(buf);
7943 
7944 	len = snprintf(buf, sizeof(buf), "%llu\n", val);
7945 
7946 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, len);
7947 }
7948 
7949 const struct file_operations trace_min_max_fops = {
7950 	.open		= tracing_open_generic,
7951 	.read		= trace_min_max_read,
7952 	.write		= trace_min_max_write,
7953 };
7954 
7955 #define TRACING_LOG_ERRS_MAX	8
7956 #define TRACING_LOG_LOC_MAX	128
7957 
7958 #define CMD_PREFIX "  Command: "
7959 
7960 struct err_info {
7961 	const char	**errs;	/* ptr to loc-specific array of err strings */
7962 	u8		type;	/* index into errs -> specific err string */
7963 	u16		pos;	/* caret position */
7964 	u64		ts;
7965 };
7966 
7967 struct tracing_log_err {
7968 	struct list_head	list;
7969 	struct err_info		info;
7970 	char			loc[TRACING_LOG_LOC_MAX]; /* err location */
7971 	char			*cmd;                     /* what caused err */
7972 };
7973 
7974 static DEFINE_MUTEX(tracing_err_log_lock);
7975 
7976 static struct tracing_log_err *alloc_tracing_log_err(int len)
7977 {
7978 	struct tracing_log_err *err;
7979 
7980 	err = kzalloc(sizeof(*err), GFP_KERNEL);
7981 	if (!err)
7982 		return ERR_PTR(-ENOMEM);
7983 
7984 	err->cmd = kzalloc(len, GFP_KERNEL);
7985 	if (!err->cmd) {
7986 		kfree(err);
7987 		return ERR_PTR(-ENOMEM);
7988 	}
7989 
7990 	return err;
7991 }
7992 
7993 static void free_tracing_log_err(struct tracing_log_err *err)
7994 {
7995 	kfree(err->cmd);
7996 	kfree(err);
7997 }
7998 
7999 static struct tracing_log_err *get_tracing_log_err(struct trace_array *tr,
8000 						   int len)
8001 {
8002 	struct tracing_log_err *err;
8003 	char *cmd;
8004 
8005 	if (tr->n_err_log_entries < TRACING_LOG_ERRS_MAX) {
8006 		err = alloc_tracing_log_err(len);
8007 		if (PTR_ERR(err) != -ENOMEM)
8008 			tr->n_err_log_entries++;
8009 
8010 		return err;
8011 	}
8012 	cmd = kzalloc(len, GFP_KERNEL);
8013 	if (!cmd)
8014 		return ERR_PTR(-ENOMEM);
8015 	err = list_first_entry(&tr->err_log, struct tracing_log_err, list);
8016 	kfree(err->cmd);
8017 	err->cmd = cmd;
8018 	list_del(&err->list);
8019 
8020 	return err;
8021 }
8022 
8023 /**
8024  * err_pos - find the position of a string within a command for error careting
8025  * @cmd: The tracing command that caused the error
8026  * @str: The string to position the caret at within @cmd
8027  *
8028  * Finds the position of the first occurrence of @str within @cmd.  The
8029  * return value can be passed to tracing_log_err() for caret placement
8030  * within @cmd.
8031  *
8032  * Returns the index within @cmd of the first occurrence of @str or 0
8033  * if @str was not found.
8034  */
8035 unsigned int err_pos(char *cmd, const char *str)
8036 {
8037 	char *found;
8038 
8039 	if (WARN_ON(!strlen(cmd)))
8040 		return 0;
8041 
8042 	found = strstr(cmd, str);
8043 	if (found)
8044 		return found - cmd;
8045 
8046 	return 0;
8047 }
8048 
8049 /**
8050  * tracing_log_err - write an error to the tracing error log
8051  * @tr: The associated trace array for the error (NULL for top level array)
8052  * @loc: A string describing where the error occurred
8053  * @cmd: The tracing command that caused the error
8054  * @errs: The array of loc-specific static error strings
8055  * @type: The index into errs[], which produces the specific static err string
8056  * @pos: The position the caret should be placed in the cmd
8057  *
8058  * Writes an error into tracing/error_log of the form:
8059  *
8060  * <loc>: error: <text>
8061  *   Command: <cmd>
8062  *              ^
8063  *
8064  * tracing/error_log is a small log file containing the last
8065  * TRACING_LOG_ERRS_MAX errors (8).  Memory for errors isn't allocated
8066  * unless there has been a tracing error, and the error log can be
8067  * cleared and have its memory freed by writing the empty string in
8068  * truncation mode to it i.e. echo > tracing/error_log.
8069  *
8070  * NOTE: the @errs array along with the @type param are used to
8071  * produce a static error string - this string is not copied and saved
8072  * when the error is logged - only a pointer to it is saved.  See
8073  * existing callers for examples of how static strings are typically
8074  * defined for use with tracing_log_err().
8075  */
8076 void tracing_log_err(struct trace_array *tr,
8077 		     const char *loc, const char *cmd,
8078 		     const char **errs, u8 type, u16 pos)
8079 {
8080 	struct tracing_log_err *err;
8081 	int len = 0;
8082 
8083 	if (!tr)
8084 		tr = &global_trace;
8085 
8086 	len += sizeof(CMD_PREFIX) + 2 * sizeof("\n") + strlen(cmd) + 1;
8087 
8088 	mutex_lock(&tracing_err_log_lock);
8089 	err = get_tracing_log_err(tr, len);
8090 	if (PTR_ERR(err) == -ENOMEM) {
8091 		mutex_unlock(&tracing_err_log_lock);
8092 		return;
8093 	}
8094 
8095 	snprintf(err->loc, TRACING_LOG_LOC_MAX, "%s: error: ", loc);
8096 	snprintf(err->cmd, len, "\n" CMD_PREFIX "%s\n", cmd);
8097 
8098 	err->info.errs = errs;
8099 	err->info.type = type;
8100 	err->info.pos = pos;
8101 	err->info.ts = local_clock();
8102 
8103 	list_add_tail(&err->list, &tr->err_log);
8104 	mutex_unlock(&tracing_err_log_lock);
8105 }
8106 
8107 static void clear_tracing_err_log(struct trace_array *tr)
8108 {
8109 	struct tracing_log_err *err, *next;
8110 
8111 	mutex_lock(&tracing_err_log_lock);
8112 	list_for_each_entry_safe(err, next, &tr->err_log, list) {
8113 		list_del(&err->list);
8114 		free_tracing_log_err(err);
8115 	}
8116 
8117 	tr->n_err_log_entries = 0;
8118 	mutex_unlock(&tracing_err_log_lock);
8119 }
8120 
8121 static void *tracing_err_log_seq_start(struct seq_file *m, loff_t *pos)
8122 {
8123 	struct trace_array *tr = m->private;
8124 
8125 	mutex_lock(&tracing_err_log_lock);
8126 
8127 	return seq_list_start(&tr->err_log, *pos);
8128 }
8129 
8130 static void *tracing_err_log_seq_next(struct seq_file *m, void *v, loff_t *pos)
8131 {
8132 	struct trace_array *tr = m->private;
8133 
8134 	return seq_list_next(v, &tr->err_log, pos);
8135 }
8136 
8137 static void tracing_err_log_seq_stop(struct seq_file *m, void *v)
8138 {
8139 	mutex_unlock(&tracing_err_log_lock);
8140 }
8141 
8142 static void tracing_err_log_show_pos(struct seq_file *m, u16 pos)
8143 {
8144 	u16 i;
8145 
8146 	for (i = 0; i < sizeof(CMD_PREFIX) - 1; i++)
8147 		seq_putc(m, ' ');
8148 	for (i = 0; i < pos; i++)
8149 		seq_putc(m, ' ');
8150 	seq_puts(m, "^\n");
8151 }
8152 
8153 static int tracing_err_log_seq_show(struct seq_file *m, void *v)
8154 {
8155 	struct tracing_log_err *err = v;
8156 
8157 	if (err) {
8158 		const char *err_text = err->info.errs[err->info.type];
8159 		u64 sec = err->info.ts;
8160 		u32 nsec;
8161 
8162 		nsec = do_div(sec, NSEC_PER_SEC);
8163 		seq_printf(m, "[%5llu.%06u] %s%s", sec, nsec / 1000,
8164 			   err->loc, err_text);
8165 		seq_printf(m, "%s", err->cmd);
8166 		tracing_err_log_show_pos(m, err->info.pos);
8167 	}
8168 
8169 	return 0;
8170 }
8171 
8172 static const struct seq_operations tracing_err_log_seq_ops = {
8173 	.start  = tracing_err_log_seq_start,
8174 	.next   = tracing_err_log_seq_next,
8175 	.stop   = tracing_err_log_seq_stop,
8176 	.show   = tracing_err_log_seq_show
8177 };
8178 
8179 static int tracing_err_log_open(struct inode *inode, struct file *file)
8180 {
8181 	struct trace_array *tr = inode->i_private;
8182 	int ret = 0;
8183 
8184 	ret = tracing_check_open_get_tr(tr);
8185 	if (ret)
8186 		return ret;
8187 
8188 	/* If this file was opened for write, then erase contents */
8189 	if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC))
8190 		clear_tracing_err_log(tr);
8191 
8192 	if (file->f_mode & FMODE_READ) {
8193 		ret = seq_open(file, &tracing_err_log_seq_ops);
8194 		if (!ret) {
8195 			struct seq_file *m = file->private_data;
8196 			m->private = tr;
8197 		} else {
8198 			trace_array_put(tr);
8199 		}
8200 	}
8201 	return ret;
8202 }
8203 
8204 static ssize_t tracing_err_log_write(struct file *file,
8205 				     const char __user *buffer,
8206 				     size_t count, loff_t *ppos)
8207 {
8208 	return count;
8209 }
8210 
8211 static int tracing_err_log_release(struct inode *inode, struct file *file)
8212 {
8213 	struct trace_array *tr = inode->i_private;
8214 
8215 	trace_array_put(tr);
8216 
8217 	if (file->f_mode & FMODE_READ)
8218 		seq_release(inode, file);
8219 
8220 	return 0;
8221 }
8222 
8223 static const struct file_operations tracing_err_log_fops = {
8224 	.open           = tracing_err_log_open,
8225 	.write		= tracing_err_log_write,
8226 	.read           = seq_read,
8227 	.llseek         = tracing_lseek,
8228 	.release        = tracing_err_log_release,
8229 };
8230 
8231 static int tracing_buffers_open(struct inode *inode, struct file *filp)
8232 {
8233 	struct trace_array *tr = inode->i_private;
8234 	struct ftrace_buffer_info *info;
8235 	int ret;
8236 
8237 	ret = tracing_check_open_get_tr(tr);
8238 	if (ret)
8239 		return ret;
8240 
8241 	info = kvzalloc(sizeof(*info), GFP_KERNEL);
8242 	if (!info) {
8243 		trace_array_put(tr);
8244 		return -ENOMEM;
8245 	}
8246 
8247 	mutex_lock(&trace_types_lock);
8248 
8249 	info->iter.tr		= tr;
8250 	info->iter.cpu_file	= tracing_get_cpu(inode);
8251 	info->iter.trace	= tr->current_trace;
8252 	info->iter.array_buffer = &tr->array_buffer;
8253 	info->spare		= NULL;
8254 	/* Force reading ring buffer for first read */
8255 	info->read		= (unsigned int)-1;
8256 
8257 	filp->private_data = info;
8258 
8259 	tr->trace_ref++;
8260 
8261 	mutex_unlock(&trace_types_lock);
8262 
8263 	ret = nonseekable_open(inode, filp);
8264 	if (ret < 0)
8265 		trace_array_put(tr);
8266 
8267 	return ret;
8268 }
8269 
8270 static __poll_t
8271 tracing_buffers_poll(struct file *filp, poll_table *poll_table)
8272 {
8273 	struct ftrace_buffer_info *info = filp->private_data;
8274 	struct trace_iterator *iter = &info->iter;
8275 
8276 	return trace_poll(iter, filp, poll_table);
8277 }
8278 
8279 static ssize_t
8280 tracing_buffers_read(struct file *filp, char __user *ubuf,
8281 		     size_t count, loff_t *ppos)
8282 {
8283 	struct ftrace_buffer_info *info = filp->private_data;
8284 	struct trace_iterator *iter = &info->iter;
8285 	ssize_t ret = 0;
8286 	ssize_t size;
8287 
8288 	if (!count)
8289 		return 0;
8290 
8291 #ifdef CONFIG_TRACER_MAX_TRACE
8292 	if (iter->snapshot && iter->tr->current_trace->use_max_tr)
8293 		return -EBUSY;
8294 #endif
8295 
8296 	if (!info->spare) {
8297 		info->spare = ring_buffer_alloc_read_page(iter->array_buffer->buffer,
8298 							  iter->cpu_file);
8299 		if (IS_ERR(info->spare)) {
8300 			ret = PTR_ERR(info->spare);
8301 			info->spare = NULL;
8302 		} else {
8303 			info->spare_cpu = iter->cpu_file;
8304 		}
8305 	}
8306 	if (!info->spare)
8307 		return ret;
8308 
8309 	/* Do we have previous read data to read? */
8310 	if (info->read < PAGE_SIZE)
8311 		goto read;
8312 
8313  again:
8314 	trace_access_lock(iter->cpu_file);
8315 	ret = ring_buffer_read_page(iter->array_buffer->buffer,
8316 				    &info->spare,
8317 				    count,
8318 				    iter->cpu_file, 0);
8319 	trace_access_unlock(iter->cpu_file);
8320 
8321 	if (ret < 0) {
8322 		if (trace_empty(iter)) {
8323 			if ((filp->f_flags & O_NONBLOCK))
8324 				return -EAGAIN;
8325 
8326 			ret = wait_on_pipe(iter, 0);
8327 			if (ret)
8328 				return ret;
8329 
8330 			goto again;
8331 		}
8332 		return 0;
8333 	}
8334 
8335 	info->read = 0;
8336  read:
8337 	size = PAGE_SIZE - info->read;
8338 	if (size > count)
8339 		size = count;
8340 
8341 	ret = copy_to_user(ubuf, info->spare + info->read, size);
8342 	if (ret == size)
8343 		return -EFAULT;
8344 
8345 	size -= ret;
8346 
8347 	*ppos += size;
8348 	info->read += size;
8349 
8350 	return size;
8351 }
8352 
8353 static int tracing_buffers_release(struct inode *inode, struct file *file)
8354 {
8355 	struct ftrace_buffer_info *info = file->private_data;
8356 	struct trace_iterator *iter = &info->iter;
8357 
8358 	mutex_lock(&trace_types_lock);
8359 
8360 	iter->tr->trace_ref--;
8361 
8362 	__trace_array_put(iter->tr);
8363 
8364 	iter->wait_index++;
8365 	/* Make sure the waiters see the new wait_index */
8366 	smp_wmb();
8367 
8368 	ring_buffer_wake_waiters(iter->array_buffer->buffer, iter->cpu_file);
8369 
8370 	if (info->spare)
8371 		ring_buffer_free_read_page(iter->array_buffer->buffer,
8372 					   info->spare_cpu, info->spare);
8373 	kvfree(info);
8374 
8375 	mutex_unlock(&trace_types_lock);
8376 
8377 	return 0;
8378 }
8379 
8380 struct buffer_ref {
8381 	struct trace_buffer	*buffer;
8382 	void			*page;
8383 	int			cpu;
8384 	refcount_t		refcount;
8385 };
8386 
8387 static void buffer_ref_release(struct buffer_ref *ref)
8388 {
8389 	if (!refcount_dec_and_test(&ref->refcount))
8390 		return;
8391 	ring_buffer_free_read_page(ref->buffer, ref->cpu, ref->page);
8392 	kfree(ref);
8393 }
8394 
8395 static void buffer_pipe_buf_release(struct pipe_inode_info *pipe,
8396 				    struct pipe_buffer *buf)
8397 {
8398 	struct buffer_ref *ref = (struct buffer_ref *)buf->private;
8399 
8400 	buffer_ref_release(ref);
8401 	buf->private = 0;
8402 }
8403 
8404 static bool buffer_pipe_buf_get(struct pipe_inode_info *pipe,
8405 				struct pipe_buffer *buf)
8406 {
8407 	struct buffer_ref *ref = (struct buffer_ref *)buf->private;
8408 
8409 	if (refcount_read(&ref->refcount) > INT_MAX/2)
8410 		return false;
8411 
8412 	refcount_inc(&ref->refcount);
8413 	return true;
8414 }
8415 
8416 /* Pipe buffer operations for a buffer. */
8417 static const struct pipe_buf_operations buffer_pipe_buf_ops = {
8418 	.release		= buffer_pipe_buf_release,
8419 	.get			= buffer_pipe_buf_get,
8420 };
8421 
8422 /*
8423  * Callback from splice_to_pipe(), if we need to release some pages
8424  * at the end of the spd in case we error'ed out in filling the pipe.
8425  */
8426 static void buffer_spd_release(struct splice_pipe_desc *spd, unsigned int i)
8427 {
8428 	struct buffer_ref *ref =
8429 		(struct buffer_ref *)spd->partial[i].private;
8430 
8431 	buffer_ref_release(ref);
8432 	spd->partial[i].private = 0;
8433 }
8434 
8435 static ssize_t
8436 tracing_buffers_splice_read(struct file *file, loff_t *ppos,
8437 			    struct pipe_inode_info *pipe, size_t len,
8438 			    unsigned int flags)
8439 {
8440 	struct ftrace_buffer_info *info = file->private_data;
8441 	struct trace_iterator *iter = &info->iter;
8442 	struct partial_page partial_def[PIPE_DEF_BUFFERS];
8443 	struct page *pages_def[PIPE_DEF_BUFFERS];
8444 	struct splice_pipe_desc spd = {
8445 		.pages		= pages_def,
8446 		.partial	= partial_def,
8447 		.nr_pages_max	= PIPE_DEF_BUFFERS,
8448 		.ops		= &buffer_pipe_buf_ops,
8449 		.spd_release	= buffer_spd_release,
8450 	};
8451 	struct buffer_ref *ref;
8452 	int entries, i;
8453 	ssize_t ret = 0;
8454 
8455 #ifdef CONFIG_TRACER_MAX_TRACE
8456 	if (iter->snapshot && iter->tr->current_trace->use_max_tr)
8457 		return -EBUSY;
8458 #endif
8459 
8460 	if (*ppos & (PAGE_SIZE - 1))
8461 		return -EINVAL;
8462 
8463 	if (len & (PAGE_SIZE - 1)) {
8464 		if (len < PAGE_SIZE)
8465 			return -EINVAL;
8466 		len &= PAGE_MASK;
8467 	}
8468 
8469 	if (splice_grow_spd(pipe, &spd))
8470 		return -ENOMEM;
8471 
8472  again:
8473 	trace_access_lock(iter->cpu_file);
8474 	entries = ring_buffer_entries_cpu(iter->array_buffer->buffer, iter->cpu_file);
8475 
8476 	for (i = 0; i < spd.nr_pages_max && len && entries; i++, len -= PAGE_SIZE) {
8477 		struct page *page;
8478 		int r;
8479 
8480 		ref = kzalloc(sizeof(*ref), GFP_KERNEL);
8481 		if (!ref) {
8482 			ret = -ENOMEM;
8483 			break;
8484 		}
8485 
8486 		refcount_set(&ref->refcount, 1);
8487 		ref->buffer = iter->array_buffer->buffer;
8488 		ref->page = ring_buffer_alloc_read_page(ref->buffer, iter->cpu_file);
8489 		if (IS_ERR(ref->page)) {
8490 			ret = PTR_ERR(ref->page);
8491 			ref->page = NULL;
8492 			kfree(ref);
8493 			break;
8494 		}
8495 		ref->cpu = iter->cpu_file;
8496 
8497 		r = ring_buffer_read_page(ref->buffer, &ref->page,
8498 					  len, iter->cpu_file, 1);
8499 		if (r < 0) {
8500 			ring_buffer_free_read_page(ref->buffer, ref->cpu,
8501 						   ref->page);
8502 			kfree(ref);
8503 			break;
8504 		}
8505 
8506 		page = virt_to_page(ref->page);
8507 
8508 		spd.pages[i] = page;
8509 		spd.partial[i].len = PAGE_SIZE;
8510 		spd.partial[i].offset = 0;
8511 		spd.partial[i].private = (unsigned long)ref;
8512 		spd.nr_pages++;
8513 		*ppos += PAGE_SIZE;
8514 
8515 		entries = ring_buffer_entries_cpu(iter->array_buffer->buffer, iter->cpu_file);
8516 	}
8517 
8518 	trace_access_unlock(iter->cpu_file);
8519 	spd.nr_pages = i;
8520 
8521 	/* did we read anything? */
8522 	if (!spd.nr_pages) {
8523 		long wait_index;
8524 
8525 		if (ret)
8526 			goto out;
8527 
8528 		ret = -EAGAIN;
8529 		if ((file->f_flags & O_NONBLOCK) || (flags & SPLICE_F_NONBLOCK))
8530 			goto out;
8531 
8532 		wait_index = READ_ONCE(iter->wait_index);
8533 
8534 		ret = wait_on_pipe(iter, iter->snapshot ? 0 : iter->tr->buffer_percent);
8535 		if (ret)
8536 			goto out;
8537 
8538 		/* No need to wait after waking up when tracing is off */
8539 		if (!tracer_tracing_is_on(iter->tr))
8540 			goto out;
8541 
8542 		/* Make sure we see the new wait_index */
8543 		smp_rmb();
8544 		if (wait_index != iter->wait_index)
8545 			goto out;
8546 
8547 		goto again;
8548 	}
8549 
8550 	ret = splice_to_pipe(pipe, &spd);
8551 out:
8552 	splice_shrink_spd(&spd);
8553 
8554 	return ret;
8555 }
8556 
8557 /* An ioctl call with cmd 0 to the ring buffer file will wake up all waiters */
8558 static long tracing_buffers_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
8559 {
8560 	struct ftrace_buffer_info *info = file->private_data;
8561 	struct trace_iterator *iter = &info->iter;
8562 
8563 	if (cmd)
8564 		return -ENOIOCTLCMD;
8565 
8566 	mutex_lock(&trace_types_lock);
8567 
8568 	iter->wait_index++;
8569 	/* Make sure the waiters see the new wait_index */
8570 	smp_wmb();
8571 
8572 	ring_buffer_wake_waiters(iter->array_buffer->buffer, iter->cpu_file);
8573 
8574 	mutex_unlock(&trace_types_lock);
8575 	return 0;
8576 }
8577 
8578 static const struct file_operations tracing_buffers_fops = {
8579 	.open		= tracing_buffers_open,
8580 	.read		= tracing_buffers_read,
8581 	.poll		= tracing_buffers_poll,
8582 	.release	= tracing_buffers_release,
8583 	.splice_read	= tracing_buffers_splice_read,
8584 	.unlocked_ioctl = tracing_buffers_ioctl,
8585 	.llseek		= no_llseek,
8586 };
8587 
8588 static ssize_t
8589 tracing_stats_read(struct file *filp, char __user *ubuf,
8590 		   size_t count, loff_t *ppos)
8591 {
8592 	struct inode *inode = file_inode(filp);
8593 	struct trace_array *tr = inode->i_private;
8594 	struct array_buffer *trace_buf = &tr->array_buffer;
8595 	int cpu = tracing_get_cpu(inode);
8596 	struct trace_seq *s;
8597 	unsigned long cnt;
8598 	unsigned long long t;
8599 	unsigned long usec_rem;
8600 
8601 	s = kmalloc(sizeof(*s), GFP_KERNEL);
8602 	if (!s)
8603 		return -ENOMEM;
8604 
8605 	trace_seq_init(s);
8606 
8607 	cnt = ring_buffer_entries_cpu(trace_buf->buffer, cpu);
8608 	trace_seq_printf(s, "entries: %ld\n", cnt);
8609 
8610 	cnt = ring_buffer_overrun_cpu(trace_buf->buffer, cpu);
8611 	trace_seq_printf(s, "overrun: %ld\n", cnt);
8612 
8613 	cnt = ring_buffer_commit_overrun_cpu(trace_buf->buffer, cpu);
8614 	trace_seq_printf(s, "commit overrun: %ld\n", cnt);
8615 
8616 	cnt = ring_buffer_bytes_cpu(trace_buf->buffer, cpu);
8617 	trace_seq_printf(s, "bytes: %ld\n", cnt);
8618 
8619 	if (trace_clocks[tr->clock_id].in_ns) {
8620 		/* local or global for trace_clock */
8621 		t = ns2usecs(ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
8622 		usec_rem = do_div(t, USEC_PER_SEC);
8623 		trace_seq_printf(s, "oldest event ts: %5llu.%06lu\n",
8624 								t, usec_rem);
8625 
8626 		t = ns2usecs(ring_buffer_time_stamp(trace_buf->buffer));
8627 		usec_rem = do_div(t, USEC_PER_SEC);
8628 		trace_seq_printf(s, "now ts: %5llu.%06lu\n", t, usec_rem);
8629 	} else {
8630 		/* counter or tsc mode for trace_clock */
8631 		trace_seq_printf(s, "oldest event ts: %llu\n",
8632 				ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
8633 
8634 		trace_seq_printf(s, "now ts: %llu\n",
8635 				ring_buffer_time_stamp(trace_buf->buffer));
8636 	}
8637 
8638 	cnt = ring_buffer_dropped_events_cpu(trace_buf->buffer, cpu);
8639 	trace_seq_printf(s, "dropped events: %ld\n", cnt);
8640 
8641 	cnt = ring_buffer_read_events_cpu(trace_buf->buffer, cpu);
8642 	trace_seq_printf(s, "read events: %ld\n", cnt);
8643 
8644 	count = simple_read_from_buffer(ubuf, count, ppos,
8645 					s->buffer, trace_seq_used(s));
8646 
8647 	kfree(s);
8648 
8649 	return count;
8650 }
8651 
8652 static const struct file_operations tracing_stats_fops = {
8653 	.open		= tracing_open_generic_tr,
8654 	.read		= tracing_stats_read,
8655 	.llseek		= generic_file_llseek,
8656 	.release	= tracing_release_generic_tr,
8657 };
8658 
8659 #ifdef CONFIG_DYNAMIC_FTRACE
8660 
8661 static ssize_t
8662 tracing_read_dyn_info(struct file *filp, char __user *ubuf,
8663 		  size_t cnt, loff_t *ppos)
8664 {
8665 	ssize_t ret;
8666 	char *buf;
8667 	int r;
8668 
8669 	/* 256 should be plenty to hold the amount needed */
8670 	buf = kmalloc(256, GFP_KERNEL);
8671 	if (!buf)
8672 		return -ENOMEM;
8673 
8674 	r = scnprintf(buf, 256, "%ld pages:%ld groups: %ld\n",
8675 		      ftrace_update_tot_cnt,
8676 		      ftrace_number_of_pages,
8677 		      ftrace_number_of_groups);
8678 
8679 	ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
8680 	kfree(buf);
8681 	return ret;
8682 }
8683 
8684 static const struct file_operations tracing_dyn_info_fops = {
8685 	.open		= tracing_open_generic,
8686 	.read		= tracing_read_dyn_info,
8687 	.llseek		= generic_file_llseek,
8688 };
8689 #endif /* CONFIG_DYNAMIC_FTRACE */
8690 
8691 #if defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE)
8692 static void
8693 ftrace_snapshot(unsigned long ip, unsigned long parent_ip,
8694 		struct trace_array *tr, struct ftrace_probe_ops *ops,
8695 		void *data)
8696 {
8697 	tracing_snapshot_instance(tr);
8698 }
8699 
8700 static void
8701 ftrace_count_snapshot(unsigned long ip, unsigned long parent_ip,
8702 		      struct trace_array *tr, struct ftrace_probe_ops *ops,
8703 		      void *data)
8704 {
8705 	struct ftrace_func_mapper *mapper = data;
8706 	long *count = NULL;
8707 
8708 	if (mapper)
8709 		count = (long *)ftrace_func_mapper_find_ip(mapper, ip);
8710 
8711 	if (count) {
8712 
8713 		if (*count <= 0)
8714 			return;
8715 
8716 		(*count)--;
8717 	}
8718 
8719 	tracing_snapshot_instance(tr);
8720 }
8721 
8722 static int
8723 ftrace_snapshot_print(struct seq_file *m, unsigned long ip,
8724 		      struct ftrace_probe_ops *ops, void *data)
8725 {
8726 	struct ftrace_func_mapper *mapper = data;
8727 	long *count = NULL;
8728 
8729 	seq_printf(m, "%ps:", (void *)ip);
8730 
8731 	seq_puts(m, "snapshot");
8732 
8733 	if (mapper)
8734 		count = (long *)ftrace_func_mapper_find_ip(mapper, ip);
8735 
8736 	if (count)
8737 		seq_printf(m, ":count=%ld\n", *count);
8738 	else
8739 		seq_puts(m, ":unlimited\n");
8740 
8741 	return 0;
8742 }
8743 
8744 static int
8745 ftrace_snapshot_init(struct ftrace_probe_ops *ops, struct trace_array *tr,
8746 		     unsigned long ip, void *init_data, void **data)
8747 {
8748 	struct ftrace_func_mapper *mapper = *data;
8749 
8750 	if (!mapper) {
8751 		mapper = allocate_ftrace_func_mapper();
8752 		if (!mapper)
8753 			return -ENOMEM;
8754 		*data = mapper;
8755 	}
8756 
8757 	return ftrace_func_mapper_add_ip(mapper, ip, init_data);
8758 }
8759 
8760 static void
8761 ftrace_snapshot_free(struct ftrace_probe_ops *ops, struct trace_array *tr,
8762 		     unsigned long ip, void *data)
8763 {
8764 	struct ftrace_func_mapper *mapper = data;
8765 
8766 	if (!ip) {
8767 		if (!mapper)
8768 			return;
8769 		free_ftrace_func_mapper(mapper, NULL);
8770 		return;
8771 	}
8772 
8773 	ftrace_func_mapper_remove_ip(mapper, ip);
8774 }
8775 
8776 static struct ftrace_probe_ops snapshot_probe_ops = {
8777 	.func			= ftrace_snapshot,
8778 	.print			= ftrace_snapshot_print,
8779 };
8780 
8781 static struct ftrace_probe_ops snapshot_count_probe_ops = {
8782 	.func			= ftrace_count_snapshot,
8783 	.print			= ftrace_snapshot_print,
8784 	.init			= ftrace_snapshot_init,
8785 	.free			= ftrace_snapshot_free,
8786 };
8787 
8788 static int
8789 ftrace_trace_snapshot_callback(struct trace_array *tr, struct ftrace_hash *hash,
8790 			       char *glob, char *cmd, char *param, int enable)
8791 {
8792 	struct ftrace_probe_ops *ops;
8793 	void *count = (void *)-1;
8794 	char *number;
8795 	int ret;
8796 
8797 	if (!tr)
8798 		return -ENODEV;
8799 
8800 	/* hash funcs only work with set_ftrace_filter */
8801 	if (!enable)
8802 		return -EINVAL;
8803 
8804 	ops = param ? &snapshot_count_probe_ops :  &snapshot_probe_ops;
8805 
8806 	if (glob[0] == '!')
8807 		return unregister_ftrace_function_probe_func(glob+1, tr, ops);
8808 
8809 	if (!param)
8810 		goto out_reg;
8811 
8812 	number = strsep(&param, ":");
8813 
8814 	if (!strlen(number))
8815 		goto out_reg;
8816 
8817 	/*
8818 	 * We use the callback data field (which is a pointer)
8819 	 * as our counter.
8820 	 */
8821 	ret = kstrtoul(number, 0, (unsigned long *)&count);
8822 	if (ret)
8823 		return ret;
8824 
8825  out_reg:
8826 	ret = tracing_alloc_snapshot_instance(tr);
8827 	if (ret < 0)
8828 		goto out;
8829 
8830 	ret = register_ftrace_function_probe(glob, tr, ops, count);
8831 
8832  out:
8833 	return ret < 0 ? ret : 0;
8834 }
8835 
8836 static struct ftrace_func_command ftrace_snapshot_cmd = {
8837 	.name			= "snapshot",
8838 	.func			= ftrace_trace_snapshot_callback,
8839 };
8840 
8841 static __init int register_snapshot_cmd(void)
8842 {
8843 	return register_ftrace_command(&ftrace_snapshot_cmd);
8844 }
8845 #else
8846 static inline __init int register_snapshot_cmd(void) { return 0; }
8847 #endif /* defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE) */
8848 
8849 static struct dentry *tracing_get_dentry(struct trace_array *tr)
8850 {
8851 	if (WARN_ON(!tr->dir))
8852 		return ERR_PTR(-ENODEV);
8853 
8854 	/* Top directory uses NULL as the parent */
8855 	if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
8856 		return NULL;
8857 
8858 	/* All sub buffers have a descriptor */
8859 	return tr->dir;
8860 }
8861 
8862 static struct dentry *tracing_dentry_percpu(struct trace_array *tr, int cpu)
8863 {
8864 	struct dentry *d_tracer;
8865 
8866 	if (tr->percpu_dir)
8867 		return tr->percpu_dir;
8868 
8869 	d_tracer = tracing_get_dentry(tr);
8870 	if (IS_ERR(d_tracer))
8871 		return NULL;
8872 
8873 	tr->percpu_dir = tracefs_create_dir("per_cpu", d_tracer);
8874 
8875 	MEM_FAIL(!tr->percpu_dir,
8876 		  "Could not create tracefs directory 'per_cpu/%d'\n", cpu);
8877 
8878 	return tr->percpu_dir;
8879 }
8880 
8881 static struct dentry *
8882 trace_create_cpu_file(const char *name, umode_t mode, struct dentry *parent,
8883 		      void *data, long cpu, const struct file_operations *fops)
8884 {
8885 	struct dentry *ret = trace_create_file(name, mode, parent, data, fops);
8886 
8887 	if (ret) /* See tracing_get_cpu() */
8888 		d_inode(ret)->i_cdev = (void *)(cpu + 1);
8889 	return ret;
8890 }
8891 
8892 static void
8893 tracing_init_tracefs_percpu(struct trace_array *tr, long cpu)
8894 {
8895 	struct dentry *d_percpu = tracing_dentry_percpu(tr, cpu);
8896 	struct dentry *d_cpu;
8897 	char cpu_dir[30]; /* 30 characters should be more than enough */
8898 
8899 	if (!d_percpu)
8900 		return;
8901 
8902 	snprintf(cpu_dir, 30, "cpu%ld", cpu);
8903 	d_cpu = tracefs_create_dir(cpu_dir, d_percpu);
8904 	if (!d_cpu) {
8905 		pr_warn("Could not create tracefs '%s' entry\n", cpu_dir);
8906 		return;
8907 	}
8908 
8909 	/* per cpu trace_pipe */
8910 	trace_create_cpu_file("trace_pipe", TRACE_MODE_READ, d_cpu,
8911 				tr, cpu, &tracing_pipe_fops);
8912 
8913 	/* per cpu trace */
8914 	trace_create_cpu_file("trace", TRACE_MODE_WRITE, d_cpu,
8915 				tr, cpu, &tracing_fops);
8916 
8917 	trace_create_cpu_file("trace_pipe_raw", TRACE_MODE_READ, d_cpu,
8918 				tr, cpu, &tracing_buffers_fops);
8919 
8920 	trace_create_cpu_file("stats", TRACE_MODE_READ, d_cpu,
8921 				tr, cpu, &tracing_stats_fops);
8922 
8923 	trace_create_cpu_file("buffer_size_kb", TRACE_MODE_READ, d_cpu,
8924 				tr, cpu, &tracing_entries_fops);
8925 
8926 #ifdef CONFIG_TRACER_SNAPSHOT
8927 	trace_create_cpu_file("snapshot", TRACE_MODE_WRITE, d_cpu,
8928 				tr, cpu, &snapshot_fops);
8929 
8930 	trace_create_cpu_file("snapshot_raw", TRACE_MODE_READ, d_cpu,
8931 				tr, cpu, &snapshot_raw_fops);
8932 #endif
8933 }
8934 
8935 #ifdef CONFIG_FTRACE_SELFTEST
8936 /* Let selftest have access to static functions in this file */
8937 #include "trace_selftest.c"
8938 #endif
8939 
8940 static ssize_t
8941 trace_options_read(struct file *filp, char __user *ubuf, size_t cnt,
8942 			loff_t *ppos)
8943 {
8944 	struct trace_option_dentry *topt = filp->private_data;
8945 	char *buf;
8946 
8947 	if (topt->flags->val & topt->opt->bit)
8948 		buf = "1\n";
8949 	else
8950 		buf = "0\n";
8951 
8952 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
8953 }
8954 
8955 static ssize_t
8956 trace_options_write(struct file *filp, const char __user *ubuf, size_t cnt,
8957 			 loff_t *ppos)
8958 {
8959 	struct trace_option_dentry *topt = filp->private_data;
8960 	unsigned long val;
8961 	int ret;
8962 
8963 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
8964 	if (ret)
8965 		return ret;
8966 
8967 	if (val != 0 && val != 1)
8968 		return -EINVAL;
8969 
8970 	if (!!(topt->flags->val & topt->opt->bit) != val) {
8971 		mutex_lock(&trace_types_lock);
8972 		ret = __set_tracer_option(topt->tr, topt->flags,
8973 					  topt->opt, !val);
8974 		mutex_unlock(&trace_types_lock);
8975 		if (ret)
8976 			return ret;
8977 	}
8978 
8979 	*ppos += cnt;
8980 
8981 	return cnt;
8982 }
8983 
8984 static int tracing_open_options(struct inode *inode, struct file *filp)
8985 {
8986 	struct trace_option_dentry *topt = inode->i_private;
8987 	int ret;
8988 
8989 	ret = tracing_check_open_get_tr(topt->tr);
8990 	if (ret)
8991 		return ret;
8992 
8993 	filp->private_data = inode->i_private;
8994 	return 0;
8995 }
8996 
8997 static int tracing_release_options(struct inode *inode, struct file *file)
8998 {
8999 	struct trace_option_dentry *topt = file->private_data;
9000 
9001 	trace_array_put(topt->tr);
9002 	return 0;
9003 }
9004 
9005 static const struct file_operations trace_options_fops = {
9006 	.open = tracing_open_options,
9007 	.read = trace_options_read,
9008 	.write = trace_options_write,
9009 	.llseek	= generic_file_llseek,
9010 	.release = tracing_release_options,
9011 };
9012 
9013 /*
9014  * In order to pass in both the trace_array descriptor as well as the index
9015  * to the flag that the trace option file represents, the trace_array
9016  * has a character array of trace_flags_index[], which holds the index
9017  * of the bit for the flag it represents. index[0] == 0, index[1] == 1, etc.
9018  * The address of this character array is passed to the flag option file
9019  * read/write callbacks.
9020  *
9021  * In order to extract both the index and the trace_array descriptor,
9022  * get_tr_index() uses the following algorithm.
9023  *
9024  *   idx = *ptr;
9025  *
9026  * As the pointer itself contains the address of the index (remember
9027  * index[1] == 1).
9028  *
9029  * Then to get the trace_array descriptor, by subtracting that index
9030  * from the ptr, we get to the start of the index itself.
9031  *
9032  *   ptr - idx == &index[0]
9033  *
9034  * Then a simple container_of() from that pointer gets us to the
9035  * trace_array descriptor.
9036  */
9037 static void get_tr_index(void *data, struct trace_array **ptr,
9038 			 unsigned int *pindex)
9039 {
9040 	*pindex = *(unsigned char *)data;
9041 
9042 	*ptr = container_of(data - *pindex, struct trace_array,
9043 			    trace_flags_index);
9044 }
9045 
9046 static ssize_t
9047 trace_options_core_read(struct file *filp, char __user *ubuf, size_t cnt,
9048 			loff_t *ppos)
9049 {
9050 	void *tr_index = filp->private_data;
9051 	struct trace_array *tr;
9052 	unsigned int index;
9053 	char *buf;
9054 
9055 	get_tr_index(tr_index, &tr, &index);
9056 
9057 	if (tr->trace_flags & (1 << index))
9058 		buf = "1\n";
9059 	else
9060 		buf = "0\n";
9061 
9062 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
9063 }
9064 
9065 static ssize_t
9066 trace_options_core_write(struct file *filp, const char __user *ubuf, size_t cnt,
9067 			 loff_t *ppos)
9068 {
9069 	void *tr_index = filp->private_data;
9070 	struct trace_array *tr;
9071 	unsigned int index;
9072 	unsigned long val;
9073 	int ret;
9074 
9075 	get_tr_index(tr_index, &tr, &index);
9076 
9077 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
9078 	if (ret)
9079 		return ret;
9080 
9081 	if (val != 0 && val != 1)
9082 		return -EINVAL;
9083 
9084 	mutex_lock(&event_mutex);
9085 	mutex_lock(&trace_types_lock);
9086 	ret = set_tracer_flag(tr, 1 << index, val);
9087 	mutex_unlock(&trace_types_lock);
9088 	mutex_unlock(&event_mutex);
9089 
9090 	if (ret < 0)
9091 		return ret;
9092 
9093 	*ppos += cnt;
9094 
9095 	return cnt;
9096 }
9097 
9098 static const struct file_operations trace_options_core_fops = {
9099 	.open = tracing_open_generic,
9100 	.read = trace_options_core_read,
9101 	.write = trace_options_core_write,
9102 	.llseek = generic_file_llseek,
9103 };
9104 
9105 struct dentry *trace_create_file(const char *name,
9106 				 umode_t mode,
9107 				 struct dentry *parent,
9108 				 void *data,
9109 				 const struct file_operations *fops)
9110 {
9111 	struct dentry *ret;
9112 
9113 	ret = tracefs_create_file(name, mode, parent, data, fops);
9114 	if (!ret)
9115 		pr_warn("Could not create tracefs '%s' entry\n", name);
9116 
9117 	return ret;
9118 }
9119 
9120 
9121 static struct dentry *trace_options_init_dentry(struct trace_array *tr)
9122 {
9123 	struct dentry *d_tracer;
9124 
9125 	if (tr->options)
9126 		return tr->options;
9127 
9128 	d_tracer = tracing_get_dentry(tr);
9129 	if (IS_ERR(d_tracer))
9130 		return NULL;
9131 
9132 	tr->options = tracefs_create_dir("options", d_tracer);
9133 	if (!tr->options) {
9134 		pr_warn("Could not create tracefs directory 'options'\n");
9135 		return NULL;
9136 	}
9137 
9138 	return tr->options;
9139 }
9140 
9141 static void
9142 create_trace_option_file(struct trace_array *tr,
9143 			 struct trace_option_dentry *topt,
9144 			 struct tracer_flags *flags,
9145 			 struct tracer_opt *opt)
9146 {
9147 	struct dentry *t_options;
9148 
9149 	t_options = trace_options_init_dentry(tr);
9150 	if (!t_options)
9151 		return;
9152 
9153 	topt->flags = flags;
9154 	topt->opt = opt;
9155 	topt->tr = tr;
9156 
9157 	topt->entry = trace_create_file(opt->name, TRACE_MODE_WRITE,
9158 					t_options, topt, &trace_options_fops);
9159 
9160 }
9161 
9162 static void
9163 create_trace_option_files(struct trace_array *tr, struct tracer *tracer)
9164 {
9165 	struct trace_option_dentry *topts;
9166 	struct trace_options *tr_topts;
9167 	struct tracer_flags *flags;
9168 	struct tracer_opt *opts;
9169 	int cnt;
9170 	int i;
9171 
9172 	if (!tracer)
9173 		return;
9174 
9175 	flags = tracer->flags;
9176 
9177 	if (!flags || !flags->opts)
9178 		return;
9179 
9180 	/*
9181 	 * If this is an instance, only create flags for tracers
9182 	 * the instance may have.
9183 	 */
9184 	if (!trace_ok_for_array(tracer, tr))
9185 		return;
9186 
9187 	for (i = 0; i < tr->nr_topts; i++) {
9188 		/* Make sure there's no duplicate flags. */
9189 		if (WARN_ON_ONCE(tr->topts[i].tracer->flags == tracer->flags))
9190 			return;
9191 	}
9192 
9193 	opts = flags->opts;
9194 
9195 	for (cnt = 0; opts[cnt].name; cnt++)
9196 		;
9197 
9198 	topts = kcalloc(cnt + 1, sizeof(*topts), GFP_KERNEL);
9199 	if (!topts)
9200 		return;
9201 
9202 	tr_topts = krealloc(tr->topts, sizeof(*tr->topts) * (tr->nr_topts + 1),
9203 			    GFP_KERNEL);
9204 	if (!tr_topts) {
9205 		kfree(topts);
9206 		return;
9207 	}
9208 
9209 	tr->topts = tr_topts;
9210 	tr->topts[tr->nr_topts].tracer = tracer;
9211 	tr->topts[tr->nr_topts].topts = topts;
9212 	tr->nr_topts++;
9213 
9214 	for (cnt = 0; opts[cnt].name; cnt++) {
9215 		create_trace_option_file(tr, &topts[cnt], flags,
9216 					 &opts[cnt]);
9217 		MEM_FAIL(topts[cnt].entry == NULL,
9218 			  "Failed to create trace option: %s",
9219 			  opts[cnt].name);
9220 	}
9221 }
9222 
9223 static struct dentry *
9224 create_trace_option_core_file(struct trace_array *tr,
9225 			      const char *option, long index)
9226 {
9227 	struct dentry *t_options;
9228 
9229 	t_options = trace_options_init_dentry(tr);
9230 	if (!t_options)
9231 		return NULL;
9232 
9233 	return trace_create_file(option, TRACE_MODE_WRITE, t_options,
9234 				 (void *)&tr->trace_flags_index[index],
9235 				 &trace_options_core_fops);
9236 }
9237 
9238 static void create_trace_options_dir(struct trace_array *tr)
9239 {
9240 	struct dentry *t_options;
9241 	bool top_level = tr == &global_trace;
9242 	int i;
9243 
9244 	t_options = trace_options_init_dentry(tr);
9245 	if (!t_options)
9246 		return;
9247 
9248 	for (i = 0; trace_options[i]; i++) {
9249 		if (top_level ||
9250 		    !((1 << i) & TOP_LEVEL_TRACE_FLAGS))
9251 			create_trace_option_core_file(tr, trace_options[i], i);
9252 	}
9253 }
9254 
9255 static ssize_t
9256 rb_simple_read(struct file *filp, char __user *ubuf,
9257 	       size_t cnt, loff_t *ppos)
9258 {
9259 	struct trace_array *tr = filp->private_data;
9260 	char buf[64];
9261 	int r;
9262 
9263 	r = tracer_tracing_is_on(tr);
9264 	r = sprintf(buf, "%d\n", r);
9265 
9266 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
9267 }
9268 
9269 static ssize_t
9270 rb_simple_write(struct file *filp, const char __user *ubuf,
9271 		size_t cnt, loff_t *ppos)
9272 {
9273 	struct trace_array *tr = filp->private_data;
9274 	struct trace_buffer *buffer = tr->array_buffer.buffer;
9275 	unsigned long val;
9276 	int ret;
9277 
9278 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
9279 	if (ret)
9280 		return ret;
9281 
9282 	if (buffer) {
9283 		mutex_lock(&trace_types_lock);
9284 		if (!!val == tracer_tracing_is_on(tr)) {
9285 			val = 0; /* do nothing */
9286 		} else if (val) {
9287 			tracer_tracing_on(tr);
9288 			if (tr->current_trace->start)
9289 				tr->current_trace->start(tr);
9290 		} else {
9291 			tracer_tracing_off(tr);
9292 			if (tr->current_trace->stop)
9293 				tr->current_trace->stop(tr);
9294 			/* Wake up any waiters */
9295 			ring_buffer_wake_waiters(buffer, RING_BUFFER_ALL_CPUS);
9296 		}
9297 		mutex_unlock(&trace_types_lock);
9298 	}
9299 
9300 	(*ppos)++;
9301 
9302 	return cnt;
9303 }
9304 
9305 static const struct file_operations rb_simple_fops = {
9306 	.open		= tracing_open_generic_tr,
9307 	.read		= rb_simple_read,
9308 	.write		= rb_simple_write,
9309 	.release	= tracing_release_generic_tr,
9310 	.llseek		= default_llseek,
9311 };
9312 
9313 static ssize_t
9314 buffer_percent_read(struct file *filp, char __user *ubuf,
9315 		    size_t cnt, loff_t *ppos)
9316 {
9317 	struct trace_array *tr = filp->private_data;
9318 	char buf[64];
9319 	int r;
9320 
9321 	r = tr->buffer_percent;
9322 	r = sprintf(buf, "%d\n", r);
9323 
9324 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
9325 }
9326 
9327 static ssize_t
9328 buffer_percent_write(struct file *filp, const char __user *ubuf,
9329 		     size_t cnt, loff_t *ppos)
9330 {
9331 	struct trace_array *tr = filp->private_data;
9332 	unsigned long val;
9333 	int ret;
9334 
9335 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
9336 	if (ret)
9337 		return ret;
9338 
9339 	if (val > 100)
9340 		return -EINVAL;
9341 
9342 	tr->buffer_percent = val;
9343 
9344 	(*ppos)++;
9345 
9346 	return cnt;
9347 }
9348 
9349 static const struct file_operations buffer_percent_fops = {
9350 	.open		= tracing_open_generic_tr,
9351 	.read		= buffer_percent_read,
9352 	.write		= buffer_percent_write,
9353 	.release	= tracing_release_generic_tr,
9354 	.llseek		= default_llseek,
9355 };
9356 
9357 static struct dentry *trace_instance_dir;
9358 
9359 static void
9360 init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer);
9361 
9362 static int
9363 allocate_trace_buffer(struct trace_array *tr, struct array_buffer *buf, int size)
9364 {
9365 	enum ring_buffer_flags rb_flags;
9366 
9367 	rb_flags = tr->trace_flags & TRACE_ITER_OVERWRITE ? RB_FL_OVERWRITE : 0;
9368 
9369 	buf->tr = tr;
9370 
9371 	buf->buffer = ring_buffer_alloc(size, rb_flags);
9372 	if (!buf->buffer)
9373 		return -ENOMEM;
9374 
9375 	buf->data = alloc_percpu(struct trace_array_cpu);
9376 	if (!buf->data) {
9377 		ring_buffer_free(buf->buffer);
9378 		buf->buffer = NULL;
9379 		return -ENOMEM;
9380 	}
9381 
9382 	/* Allocate the first page for all buffers */
9383 	set_buffer_entries(&tr->array_buffer,
9384 			   ring_buffer_size(tr->array_buffer.buffer, 0));
9385 
9386 	return 0;
9387 }
9388 
9389 static void free_trace_buffer(struct array_buffer *buf)
9390 {
9391 	if (buf->buffer) {
9392 		ring_buffer_free(buf->buffer);
9393 		buf->buffer = NULL;
9394 		free_percpu(buf->data);
9395 		buf->data = NULL;
9396 	}
9397 }
9398 
9399 static int allocate_trace_buffers(struct trace_array *tr, int size)
9400 {
9401 	int ret;
9402 
9403 	ret = allocate_trace_buffer(tr, &tr->array_buffer, size);
9404 	if (ret)
9405 		return ret;
9406 
9407 #ifdef CONFIG_TRACER_MAX_TRACE
9408 	ret = allocate_trace_buffer(tr, &tr->max_buffer,
9409 				    allocate_snapshot ? size : 1);
9410 	if (MEM_FAIL(ret, "Failed to allocate trace buffer\n")) {
9411 		free_trace_buffer(&tr->array_buffer);
9412 		return -ENOMEM;
9413 	}
9414 	tr->allocated_snapshot = allocate_snapshot;
9415 
9416 	allocate_snapshot = false;
9417 #endif
9418 
9419 	return 0;
9420 }
9421 
9422 static void free_trace_buffers(struct trace_array *tr)
9423 {
9424 	if (!tr)
9425 		return;
9426 
9427 	free_trace_buffer(&tr->array_buffer);
9428 
9429 #ifdef CONFIG_TRACER_MAX_TRACE
9430 	free_trace_buffer(&tr->max_buffer);
9431 #endif
9432 }
9433 
9434 static void init_trace_flags_index(struct trace_array *tr)
9435 {
9436 	int i;
9437 
9438 	/* Used by the trace options files */
9439 	for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++)
9440 		tr->trace_flags_index[i] = i;
9441 }
9442 
9443 static void __update_tracer_options(struct trace_array *tr)
9444 {
9445 	struct tracer *t;
9446 
9447 	for (t = trace_types; t; t = t->next)
9448 		add_tracer_options(tr, t);
9449 }
9450 
9451 static void update_tracer_options(struct trace_array *tr)
9452 {
9453 	mutex_lock(&trace_types_lock);
9454 	tracer_options_updated = true;
9455 	__update_tracer_options(tr);
9456 	mutex_unlock(&trace_types_lock);
9457 }
9458 
9459 /* Must have trace_types_lock held */
9460 struct trace_array *trace_array_find(const char *instance)
9461 {
9462 	struct trace_array *tr, *found = NULL;
9463 
9464 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
9465 		if (tr->name && strcmp(tr->name, instance) == 0) {
9466 			found = tr;
9467 			break;
9468 		}
9469 	}
9470 
9471 	return found;
9472 }
9473 
9474 struct trace_array *trace_array_find_get(const char *instance)
9475 {
9476 	struct trace_array *tr;
9477 
9478 	mutex_lock(&trace_types_lock);
9479 	tr = trace_array_find(instance);
9480 	if (tr)
9481 		tr->ref++;
9482 	mutex_unlock(&trace_types_lock);
9483 
9484 	return tr;
9485 }
9486 
9487 static int trace_array_create_dir(struct trace_array *tr)
9488 {
9489 	int ret;
9490 
9491 	tr->dir = tracefs_create_dir(tr->name, trace_instance_dir);
9492 	if (!tr->dir)
9493 		return -EINVAL;
9494 
9495 	ret = event_trace_add_tracer(tr->dir, tr);
9496 	if (ret) {
9497 		tracefs_remove(tr->dir);
9498 		return ret;
9499 	}
9500 
9501 	init_tracer_tracefs(tr, tr->dir);
9502 	__update_tracer_options(tr);
9503 
9504 	return ret;
9505 }
9506 
9507 static struct trace_array *trace_array_create(const char *name)
9508 {
9509 	struct trace_array *tr;
9510 	int ret;
9511 
9512 	ret = -ENOMEM;
9513 	tr = kzalloc(sizeof(*tr), GFP_KERNEL);
9514 	if (!tr)
9515 		return ERR_PTR(ret);
9516 
9517 	tr->name = kstrdup(name, GFP_KERNEL);
9518 	if (!tr->name)
9519 		goto out_free_tr;
9520 
9521 	if (!alloc_cpumask_var(&tr->tracing_cpumask, GFP_KERNEL))
9522 		goto out_free_tr;
9523 
9524 	if (!zalloc_cpumask_var(&tr->pipe_cpumask, GFP_KERNEL))
9525 		goto out_free_tr;
9526 
9527 	tr->trace_flags = global_trace.trace_flags & ~ZEROED_TRACE_FLAGS;
9528 
9529 	cpumask_copy(tr->tracing_cpumask, cpu_all_mask);
9530 
9531 	raw_spin_lock_init(&tr->start_lock);
9532 
9533 	tr->max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
9534 
9535 	tr->current_trace = &nop_trace;
9536 
9537 	INIT_LIST_HEAD(&tr->systems);
9538 	INIT_LIST_HEAD(&tr->events);
9539 	INIT_LIST_HEAD(&tr->hist_vars);
9540 	INIT_LIST_HEAD(&tr->err_log);
9541 
9542 	if (allocate_trace_buffers(tr, trace_buf_size) < 0)
9543 		goto out_free_tr;
9544 
9545 	/* The ring buffer is defaultly expanded */
9546 	trace_set_ring_buffer_expanded(tr);
9547 
9548 	if (ftrace_allocate_ftrace_ops(tr) < 0)
9549 		goto out_free_tr;
9550 
9551 	ftrace_init_trace_array(tr);
9552 
9553 	init_trace_flags_index(tr);
9554 
9555 	if (trace_instance_dir) {
9556 		ret = trace_array_create_dir(tr);
9557 		if (ret)
9558 			goto out_free_tr;
9559 	} else
9560 		__trace_early_add_events(tr);
9561 
9562 	list_add(&tr->list, &ftrace_trace_arrays);
9563 
9564 	tr->ref++;
9565 
9566 	return tr;
9567 
9568  out_free_tr:
9569 	ftrace_free_ftrace_ops(tr);
9570 	free_trace_buffers(tr);
9571 	free_cpumask_var(tr->pipe_cpumask);
9572 	free_cpumask_var(tr->tracing_cpumask);
9573 	kfree(tr->name);
9574 	kfree(tr);
9575 
9576 	return ERR_PTR(ret);
9577 }
9578 
9579 static int instance_mkdir(const char *name)
9580 {
9581 	struct trace_array *tr;
9582 	int ret;
9583 
9584 	mutex_lock(&event_mutex);
9585 	mutex_lock(&trace_types_lock);
9586 
9587 	ret = -EEXIST;
9588 	if (trace_array_find(name))
9589 		goto out_unlock;
9590 
9591 	tr = trace_array_create(name);
9592 
9593 	ret = PTR_ERR_OR_ZERO(tr);
9594 
9595 out_unlock:
9596 	mutex_unlock(&trace_types_lock);
9597 	mutex_unlock(&event_mutex);
9598 	return ret;
9599 }
9600 
9601 /**
9602  * trace_array_get_by_name - Create/Lookup a trace array, given its name.
9603  * @name: The name of the trace array to be looked up/created.
9604  *
9605  * Returns pointer to trace array with given name.
9606  * NULL, if it cannot be created.
9607  *
9608  * NOTE: This function increments the reference counter associated with the
9609  * trace array returned. This makes sure it cannot be freed while in use.
9610  * Use trace_array_put() once the trace array is no longer needed.
9611  * If the trace_array is to be freed, trace_array_destroy() needs to
9612  * be called after the trace_array_put(), or simply let user space delete
9613  * it from the tracefs instances directory. But until the
9614  * trace_array_put() is called, user space can not delete it.
9615  *
9616  */
9617 struct trace_array *trace_array_get_by_name(const char *name)
9618 {
9619 	struct trace_array *tr;
9620 
9621 	mutex_lock(&event_mutex);
9622 	mutex_lock(&trace_types_lock);
9623 
9624 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
9625 		if (tr->name && strcmp(tr->name, name) == 0)
9626 			goto out_unlock;
9627 	}
9628 
9629 	tr = trace_array_create(name);
9630 
9631 	if (IS_ERR(tr))
9632 		tr = NULL;
9633 out_unlock:
9634 	if (tr)
9635 		tr->ref++;
9636 
9637 	mutex_unlock(&trace_types_lock);
9638 	mutex_unlock(&event_mutex);
9639 	return tr;
9640 }
9641 EXPORT_SYMBOL_GPL(trace_array_get_by_name);
9642 
9643 static int __remove_instance(struct trace_array *tr)
9644 {
9645 	int i;
9646 
9647 	/* Reference counter for a newly created trace array = 1. */
9648 	if (tr->ref > 1 || (tr->current_trace && tr->trace_ref))
9649 		return -EBUSY;
9650 
9651 	list_del(&tr->list);
9652 
9653 	/* Disable all the flags that were enabled coming in */
9654 	for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++) {
9655 		if ((1 << i) & ZEROED_TRACE_FLAGS)
9656 			set_tracer_flag(tr, 1 << i, 0);
9657 	}
9658 
9659 	tracing_set_nop(tr);
9660 	clear_ftrace_function_probes(tr);
9661 	event_trace_del_tracer(tr);
9662 	ftrace_clear_pids(tr);
9663 	ftrace_destroy_function_files(tr);
9664 	tracefs_remove(tr->dir);
9665 	free_percpu(tr->last_func_repeats);
9666 	free_trace_buffers(tr);
9667 	clear_tracing_err_log(tr);
9668 
9669 	for (i = 0; i < tr->nr_topts; i++) {
9670 		kfree(tr->topts[i].topts);
9671 	}
9672 	kfree(tr->topts);
9673 
9674 	free_cpumask_var(tr->pipe_cpumask);
9675 	free_cpumask_var(tr->tracing_cpumask);
9676 	kfree(tr->name);
9677 	kfree(tr);
9678 
9679 	return 0;
9680 }
9681 
9682 int trace_array_destroy(struct trace_array *this_tr)
9683 {
9684 	struct trace_array *tr;
9685 	int ret;
9686 
9687 	if (!this_tr)
9688 		return -EINVAL;
9689 
9690 	mutex_lock(&event_mutex);
9691 	mutex_lock(&trace_types_lock);
9692 
9693 	ret = -ENODEV;
9694 
9695 	/* Making sure trace array exists before destroying it. */
9696 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
9697 		if (tr == this_tr) {
9698 			ret = __remove_instance(tr);
9699 			break;
9700 		}
9701 	}
9702 
9703 	mutex_unlock(&trace_types_lock);
9704 	mutex_unlock(&event_mutex);
9705 
9706 	return ret;
9707 }
9708 EXPORT_SYMBOL_GPL(trace_array_destroy);
9709 
9710 static int instance_rmdir(const char *name)
9711 {
9712 	struct trace_array *tr;
9713 	int ret;
9714 
9715 	mutex_lock(&event_mutex);
9716 	mutex_lock(&trace_types_lock);
9717 
9718 	ret = -ENODEV;
9719 	tr = trace_array_find(name);
9720 	if (tr)
9721 		ret = __remove_instance(tr);
9722 
9723 	mutex_unlock(&trace_types_lock);
9724 	mutex_unlock(&event_mutex);
9725 
9726 	return ret;
9727 }
9728 
9729 static __init void create_trace_instances(struct dentry *d_tracer)
9730 {
9731 	struct trace_array *tr;
9732 
9733 	trace_instance_dir = tracefs_create_instance_dir("instances", d_tracer,
9734 							 instance_mkdir,
9735 							 instance_rmdir);
9736 	if (MEM_FAIL(!trace_instance_dir, "Failed to create instances directory\n"))
9737 		return;
9738 
9739 	mutex_lock(&event_mutex);
9740 	mutex_lock(&trace_types_lock);
9741 
9742 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
9743 		if (!tr->name)
9744 			continue;
9745 		if (MEM_FAIL(trace_array_create_dir(tr) < 0,
9746 			     "Failed to create instance directory\n"))
9747 			break;
9748 	}
9749 
9750 	mutex_unlock(&trace_types_lock);
9751 	mutex_unlock(&event_mutex);
9752 }
9753 
9754 static void
9755 init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer)
9756 {
9757 	int cpu;
9758 
9759 	trace_create_file("available_tracers", TRACE_MODE_READ, d_tracer,
9760 			tr, &show_traces_fops);
9761 
9762 	trace_create_file("current_tracer", TRACE_MODE_WRITE, d_tracer,
9763 			tr, &set_tracer_fops);
9764 
9765 	trace_create_file("tracing_cpumask", TRACE_MODE_WRITE, d_tracer,
9766 			  tr, &tracing_cpumask_fops);
9767 
9768 	trace_create_file("trace_options", TRACE_MODE_WRITE, d_tracer,
9769 			  tr, &tracing_iter_fops);
9770 
9771 	trace_create_file("trace", TRACE_MODE_WRITE, d_tracer,
9772 			  tr, &tracing_fops);
9773 
9774 	trace_create_file("trace_pipe", TRACE_MODE_READ, d_tracer,
9775 			  tr, &tracing_pipe_fops);
9776 
9777 	trace_create_file("buffer_size_kb", TRACE_MODE_WRITE, d_tracer,
9778 			  tr, &tracing_entries_fops);
9779 
9780 	trace_create_file("buffer_total_size_kb", TRACE_MODE_READ, d_tracer,
9781 			  tr, &tracing_total_entries_fops);
9782 
9783 	trace_create_file("free_buffer", 0200, d_tracer,
9784 			  tr, &tracing_free_buffer_fops);
9785 
9786 	trace_create_file("trace_marker", 0220, d_tracer,
9787 			  tr, &tracing_mark_fops);
9788 
9789 	tr->trace_marker_file = __find_event_file(tr, "ftrace", "print");
9790 
9791 	trace_create_file("trace_marker_raw", 0220, d_tracer,
9792 			  tr, &tracing_mark_raw_fops);
9793 
9794 	trace_create_file("trace_clock", TRACE_MODE_WRITE, d_tracer, tr,
9795 			  &trace_clock_fops);
9796 
9797 	trace_create_file("tracing_on", TRACE_MODE_WRITE, d_tracer,
9798 			  tr, &rb_simple_fops);
9799 
9800 	trace_create_file("timestamp_mode", TRACE_MODE_READ, d_tracer, tr,
9801 			  &trace_time_stamp_mode_fops);
9802 
9803 	tr->buffer_percent = 50;
9804 
9805 	trace_create_file("buffer_percent", TRACE_MODE_WRITE, d_tracer,
9806 			tr, &buffer_percent_fops);
9807 
9808 	create_trace_options_dir(tr);
9809 
9810 #ifdef CONFIG_TRACER_MAX_TRACE
9811 	trace_create_maxlat_file(tr, d_tracer);
9812 #endif
9813 
9814 	if (ftrace_create_function_files(tr, d_tracer))
9815 		MEM_FAIL(1, "Could not allocate function filter files");
9816 
9817 #ifdef CONFIG_TRACER_SNAPSHOT
9818 	trace_create_file("snapshot", TRACE_MODE_WRITE, d_tracer,
9819 			  tr, &snapshot_fops);
9820 #endif
9821 
9822 	trace_create_file("error_log", TRACE_MODE_WRITE, d_tracer,
9823 			  tr, &tracing_err_log_fops);
9824 
9825 	for_each_tracing_cpu(cpu)
9826 		tracing_init_tracefs_percpu(tr, cpu);
9827 
9828 	ftrace_init_tracefs(tr, d_tracer);
9829 }
9830 
9831 static struct vfsmount *trace_automount(struct dentry *mntpt, void *ingore)
9832 {
9833 	struct vfsmount *mnt;
9834 	struct file_system_type *type;
9835 
9836 	/*
9837 	 * To maintain backward compatibility for tools that mount
9838 	 * debugfs to get to the tracing facility, tracefs is automatically
9839 	 * mounted to the debugfs/tracing directory.
9840 	 */
9841 	type = get_fs_type("tracefs");
9842 	if (!type)
9843 		return NULL;
9844 	mnt = vfs_submount(mntpt, type, "tracefs", NULL);
9845 	put_filesystem(type);
9846 	if (IS_ERR(mnt))
9847 		return NULL;
9848 	mntget(mnt);
9849 
9850 	return mnt;
9851 }
9852 
9853 /**
9854  * tracing_init_dentry - initialize top level trace array
9855  *
9856  * This is called when creating files or directories in the tracing
9857  * directory. It is called via fs_initcall() by any of the boot up code
9858  * and expects to return the dentry of the top level tracing directory.
9859  */
9860 int tracing_init_dentry(void)
9861 {
9862 	struct trace_array *tr = &global_trace;
9863 
9864 	if (security_locked_down(LOCKDOWN_TRACEFS)) {
9865 		pr_warn("Tracing disabled due to lockdown\n");
9866 		return -EPERM;
9867 	}
9868 
9869 	/* The top level trace array uses  NULL as parent */
9870 	if (tr->dir)
9871 		return 0;
9872 
9873 	if (WARN_ON(!tracefs_initialized()))
9874 		return -ENODEV;
9875 
9876 	/*
9877 	 * As there may still be users that expect the tracing
9878 	 * files to exist in debugfs/tracing, we must automount
9879 	 * the tracefs file system there, so older tools still
9880 	 * work with the newer kernel.
9881 	 */
9882 	tr->dir = debugfs_create_automount("tracing", NULL,
9883 					   trace_automount, NULL);
9884 
9885 	return 0;
9886 }
9887 
9888 extern struct trace_eval_map *__start_ftrace_eval_maps[];
9889 extern struct trace_eval_map *__stop_ftrace_eval_maps[];
9890 
9891 static struct workqueue_struct *eval_map_wq __initdata;
9892 static struct work_struct eval_map_work __initdata;
9893 static struct work_struct tracerfs_init_work __initdata;
9894 
9895 static void __init eval_map_work_func(struct work_struct *work)
9896 {
9897 	int len;
9898 
9899 	len = __stop_ftrace_eval_maps - __start_ftrace_eval_maps;
9900 	trace_insert_eval_map(NULL, __start_ftrace_eval_maps, len);
9901 }
9902 
9903 static int __init trace_eval_init(void)
9904 {
9905 	INIT_WORK(&eval_map_work, eval_map_work_func);
9906 
9907 	eval_map_wq = alloc_workqueue("eval_map_wq", WQ_UNBOUND, 0);
9908 	if (!eval_map_wq) {
9909 		pr_err("Unable to allocate eval_map_wq\n");
9910 		/* Do work here */
9911 		eval_map_work_func(&eval_map_work);
9912 		return -ENOMEM;
9913 	}
9914 
9915 	queue_work(eval_map_wq, &eval_map_work);
9916 	return 0;
9917 }
9918 
9919 subsys_initcall(trace_eval_init);
9920 
9921 static int __init trace_eval_sync(void)
9922 {
9923 	/* Make sure the eval map updates are finished */
9924 	if (eval_map_wq)
9925 		destroy_workqueue(eval_map_wq);
9926 	return 0;
9927 }
9928 
9929 late_initcall_sync(trace_eval_sync);
9930 
9931 
9932 #ifdef CONFIG_MODULES
9933 static void trace_module_add_evals(struct module *mod)
9934 {
9935 	if (!mod->num_trace_evals)
9936 		return;
9937 
9938 	/*
9939 	 * Modules with bad taint do not have events created, do
9940 	 * not bother with enums either.
9941 	 */
9942 	if (trace_module_has_bad_taint(mod))
9943 		return;
9944 
9945 	trace_insert_eval_map(mod, mod->trace_evals, mod->num_trace_evals);
9946 }
9947 
9948 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
9949 static void trace_module_remove_evals(struct module *mod)
9950 {
9951 	union trace_eval_map_item *map;
9952 	union trace_eval_map_item **last = &trace_eval_maps;
9953 
9954 	if (!mod->num_trace_evals)
9955 		return;
9956 
9957 	mutex_lock(&trace_eval_mutex);
9958 
9959 	map = trace_eval_maps;
9960 
9961 	while (map) {
9962 		if (map->head.mod == mod)
9963 			break;
9964 		map = trace_eval_jmp_to_tail(map);
9965 		last = &map->tail.next;
9966 		map = map->tail.next;
9967 	}
9968 	if (!map)
9969 		goto out;
9970 
9971 	*last = trace_eval_jmp_to_tail(map)->tail.next;
9972 	kfree(map);
9973  out:
9974 	mutex_unlock(&trace_eval_mutex);
9975 }
9976 #else
9977 static inline void trace_module_remove_evals(struct module *mod) { }
9978 #endif /* CONFIG_TRACE_EVAL_MAP_FILE */
9979 
9980 static int trace_module_notify(struct notifier_block *self,
9981 			       unsigned long val, void *data)
9982 {
9983 	struct module *mod = data;
9984 
9985 	switch (val) {
9986 	case MODULE_STATE_COMING:
9987 		trace_module_add_evals(mod);
9988 		break;
9989 	case MODULE_STATE_GOING:
9990 		trace_module_remove_evals(mod);
9991 		break;
9992 	}
9993 
9994 	return NOTIFY_OK;
9995 }
9996 
9997 static struct notifier_block trace_module_nb = {
9998 	.notifier_call = trace_module_notify,
9999 	.priority = 0,
10000 };
10001 #endif /* CONFIG_MODULES */
10002 
10003 static __init void tracer_init_tracefs_work_func(struct work_struct *work)
10004 {
10005 
10006 	event_trace_init();
10007 
10008 	init_tracer_tracefs(&global_trace, NULL);
10009 	ftrace_init_tracefs_toplevel(&global_trace, NULL);
10010 
10011 	trace_create_file("tracing_thresh", TRACE_MODE_WRITE, NULL,
10012 			&global_trace, &tracing_thresh_fops);
10013 
10014 	trace_create_file("README", TRACE_MODE_READ, NULL,
10015 			NULL, &tracing_readme_fops);
10016 
10017 	trace_create_file("saved_cmdlines", TRACE_MODE_READ, NULL,
10018 			NULL, &tracing_saved_cmdlines_fops);
10019 
10020 	trace_create_file("saved_cmdlines_size", TRACE_MODE_WRITE, NULL,
10021 			  NULL, &tracing_saved_cmdlines_size_fops);
10022 
10023 	trace_create_file("saved_tgids", TRACE_MODE_READ, NULL,
10024 			NULL, &tracing_saved_tgids_fops);
10025 
10026 	trace_create_eval_file(NULL);
10027 
10028 #ifdef CONFIG_MODULES
10029 	register_module_notifier(&trace_module_nb);
10030 #endif
10031 
10032 #ifdef CONFIG_DYNAMIC_FTRACE
10033 	trace_create_file("dyn_ftrace_total_info", TRACE_MODE_READ, NULL,
10034 			NULL, &tracing_dyn_info_fops);
10035 #endif
10036 
10037 	create_trace_instances(NULL);
10038 
10039 	update_tracer_options(&global_trace);
10040 }
10041 
10042 static __init int tracer_init_tracefs(void)
10043 {
10044 	int ret;
10045 
10046 	trace_access_lock_init();
10047 
10048 	ret = tracing_init_dentry();
10049 	if (ret)
10050 		return 0;
10051 
10052 	if (eval_map_wq) {
10053 		INIT_WORK(&tracerfs_init_work, tracer_init_tracefs_work_func);
10054 		queue_work(eval_map_wq, &tracerfs_init_work);
10055 	} else {
10056 		tracer_init_tracefs_work_func(NULL);
10057 	}
10058 
10059 	rv_init_interface();
10060 
10061 	return 0;
10062 }
10063 
10064 fs_initcall(tracer_init_tracefs);
10065 
10066 static int trace_die_panic_handler(struct notifier_block *self,
10067 				unsigned long ev, void *unused);
10068 
10069 static struct notifier_block trace_panic_notifier = {
10070 	.notifier_call = trace_die_panic_handler,
10071 	.priority = INT_MAX - 1,
10072 };
10073 
10074 static struct notifier_block trace_die_notifier = {
10075 	.notifier_call = trace_die_panic_handler,
10076 	.priority = INT_MAX - 1,
10077 };
10078 
10079 /*
10080  * The idea is to execute the following die/panic callback early, in order
10081  * to avoid showing irrelevant information in the trace (like other panic
10082  * notifier functions); we are the 2nd to run, after hung_task/rcu_stall
10083  * warnings get disabled (to prevent potential log flooding).
10084  */
10085 static int trace_die_panic_handler(struct notifier_block *self,
10086 				unsigned long ev, void *unused)
10087 {
10088 	if (!ftrace_dump_on_oops)
10089 		return NOTIFY_DONE;
10090 
10091 	/* The die notifier requires DIE_OOPS to trigger */
10092 	if (self == &trace_die_notifier && ev != DIE_OOPS)
10093 		return NOTIFY_DONE;
10094 
10095 	ftrace_dump(ftrace_dump_on_oops);
10096 
10097 	return NOTIFY_DONE;
10098 }
10099 
10100 /*
10101  * printk is set to max of 1024, we really don't need it that big.
10102  * Nothing should be printing 1000 characters anyway.
10103  */
10104 #define TRACE_MAX_PRINT		1000
10105 
10106 /*
10107  * Define here KERN_TRACE so that we have one place to modify
10108  * it if we decide to change what log level the ftrace dump
10109  * should be at.
10110  */
10111 #define KERN_TRACE		KERN_EMERG
10112 
10113 void
10114 trace_printk_seq(struct trace_seq *s)
10115 {
10116 	/* Probably should print a warning here. */
10117 	if (s->seq.len >= TRACE_MAX_PRINT)
10118 		s->seq.len = TRACE_MAX_PRINT;
10119 
10120 	/*
10121 	 * More paranoid code. Although the buffer size is set to
10122 	 * PAGE_SIZE, and TRACE_MAX_PRINT is 1000, this is just
10123 	 * an extra layer of protection.
10124 	 */
10125 	if (WARN_ON_ONCE(s->seq.len >= s->seq.size))
10126 		s->seq.len = s->seq.size - 1;
10127 
10128 	/* should be zero ended, but we are paranoid. */
10129 	s->buffer[s->seq.len] = 0;
10130 
10131 	printk(KERN_TRACE "%s", s->buffer);
10132 
10133 	trace_seq_init(s);
10134 }
10135 
10136 void trace_init_global_iter(struct trace_iterator *iter)
10137 {
10138 	iter->tr = &global_trace;
10139 	iter->trace = iter->tr->current_trace;
10140 	iter->cpu_file = RING_BUFFER_ALL_CPUS;
10141 	iter->array_buffer = &global_trace.array_buffer;
10142 
10143 	if (iter->trace && iter->trace->open)
10144 		iter->trace->open(iter);
10145 
10146 	/* Annotate start of buffers if we had overruns */
10147 	if (ring_buffer_overruns(iter->array_buffer->buffer))
10148 		iter->iter_flags |= TRACE_FILE_ANNOTATE;
10149 
10150 	/* Output in nanoseconds only if we are using a clock in nanoseconds. */
10151 	if (trace_clocks[iter->tr->clock_id].in_ns)
10152 		iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
10153 
10154 	/* Can not use kmalloc for iter.temp and iter.fmt */
10155 	iter->temp = static_temp_buf;
10156 	iter->temp_size = STATIC_TEMP_BUF_SIZE;
10157 	iter->fmt = static_fmt_buf;
10158 	iter->fmt_size = STATIC_FMT_BUF_SIZE;
10159 }
10160 
10161 void ftrace_dump(enum ftrace_dump_mode oops_dump_mode)
10162 {
10163 	/* use static because iter can be a bit big for the stack */
10164 	static struct trace_iterator iter;
10165 	static atomic_t dump_running;
10166 	struct trace_array *tr = &global_trace;
10167 	unsigned int old_userobj;
10168 	unsigned long flags;
10169 	int cnt = 0, cpu;
10170 
10171 	/* Only allow one dump user at a time. */
10172 	if (atomic_inc_return(&dump_running) != 1) {
10173 		atomic_dec(&dump_running);
10174 		return;
10175 	}
10176 
10177 	/*
10178 	 * Always turn off tracing when we dump.
10179 	 * We don't need to show trace output of what happens
10180 	 * between multiple crashes.
10181 	 *
10182 	 * If the user does a sysrq-z, then they can re-enable
10183 	 * tracing with echo 1 > tracing_on.
10184 	 */
10185 	tracing_off();
10186 
10187 	local_irq_save(flags);
10188 
10189 	/* Simulate the iterator */
10190 	trace_init_global_iter(&iter);
10191 
10192 	for_each_tracing_cpu(cpu) {
10193 		atomic_inc(&per_cpu_ptr(iter.array_buffer->data, cpu)->disabled);
10194 	}
10195 
10196 	old_userobj = tr->trace_flags & TRACE_ITER_SYM_USEROBJ;
10197 
10198 	/* don't look at user memory in panic mode */
10199 	tr->trace_flags &= ~TRACE_ITER_SYM_USEROBJ;
10200 
10201 	switch (oops_dump_mode) {
10202 	case DUMP_ALL:
10203 		iter.cpu_file = RING_BUFFER_ALL_CPUS;
10204 		break;
10205 	case DUMP_ORIG:
10206 		iter.cpu_file = raw_smp_processor_id();
10207 		break;
10208 	case DUMP_NONE:
10209 		goto out_enable;
10210 	default:
10211 		printk(KERN_TRACE "Bad dumping mode, switching to all CPUs dump\n");
10212 		iter.cpu_file = RING_BUFFER_ALL_CPUS;
10213 	}
10214 
10215 	printk(KERN_TRACE "Dumping ftrace buffer:\n");
10216 
10217 	/* Did function tracer already get disabled? */
10218 	if (ftrace_is_dead()) {
10219 		printk("# WARNING: FUNCTION TRACING IS CORRUPTED\n");
10220 		printk("#          MAY BE MISSING FUNCTION EVENTS\n");
10221 	}
10222 
10223 	/*
10224 	 * We need to stop all tracing on all CPUS to read
10225 	 * the next buffer. This is a bit expensive, but is
10226 	 * not done often. We fill all what we can read,
10227 	 * and then release the locks again.
10228 	 */
10229 
10230 	while (!trace_empty(&iter)) {
10231 
10232 		if (!cnt)
10233 			printk(KERN_TRACE "---------------------------------\n");
10234 
10235 		cnt++;
10236 
10237 		trace_iterator_reset(&iter);
10238 		iter.iter_flags |= TRACE_FILE_LAT_FMT;
10239 
10240 		if (trace_find_next_entry_inc(&iter) != NULL) {
10241 			int ret;
10242 
10243 			ret = print_trace_line(&iter);
10244 			if (ret != TRACE_TYPE_NO_CONSUME)
10245 				trace_consume(&iter);
10246 		}
10247 		touch_nmi_watchdog();
10248 
10249 		trace_printk_seq(&iter.seq);
10250 	}
10251 
10252 	if (!cnt)
10253 		printk(KERN_TRACE "   (ftrace buffer empty)\n");
10254 	else
10255 		printk(KERN_TRACE "---------------------------------\n");
10256 
10257  out_enable:
10258 	tr->trace_flags |= old_userobj;
10259 
10260 	for_each_tracing_cpu(cpu) {
10261 		atomic_dec(&per_cpu_ptr(iter.array_buffer->data, cpu)->disabled);
10262 	}
10263 	atomic_dec(&dump_running);
10264 	local_irq_restore(flags);
10265 }
10266 EXPORT_SYMBOL_GPL(ftrace_dump);
10267 
10268 #define WRITE_BUFSIZE  4096
10269 
10270 ssize_t trace_parse_run_command(struct file *file, const char __user *buffer,
10271 				size_t count, loff_t *ppos,
10272 				int (*createfn)(const char *))
10273 {
10274 	char *kbuf, *buf, *tmp;
10275 	int ret = 0;
10276 	size_t done = 0;
10277 	size_t size;
10278 
10279 	kbuf = kmalloc(WRITE_BUFSIZE, GFP_KERNEL);
10280 	if (!kbuf)
10281 		return -ENOMEM;
10282 
10283 	while (done < count) {
10284 		size = count - done;
10285 
10286 		if (size >= WRITE_BUFSIZE)
10287 			size = WRITE_BUFSIZE - 1;
10288 
10289 		if (copy_from_user(kbuf, buffer + done, size)) {
10290 			ret = -EFAULT;
10291 			goto out;
10292 		}
10293 		kbuf[size] = '\0';
10294 		buf = kbuf;
10295 		do {
10296 			tmp = strchr(buf, '\n');
10297 			if (tmp) {
10298 				*tmp = '\0';
10299 				size = tmp - buf + 1;
10300 			} else {
10301 				size = strlen(buf);
10302 				if (done + size < count) {
10303 					if (buf != kbuf)
10304 						break;
10305 					/* This can accept WRITE_BUFSIZE - 2 ('\n' + '\0') */
10306 					pr_warn("Line length is too long: Should be less than %d\n",
10307 						WRITE_BUFSIZE - 2);
10308 					ret = -EINVAL;
10309 					goto out;
10310 				}
10311 			}
10312 			done += size;
10313 
10314 			/* Remove comments */
10315 			tmp = strchr(buf, '#');
10316 
10317 			if (tmp)
10318 				*tmp = '\0';
10319 
10320 			ret = createfn(buf);
10321 			if (ret)
10322 				goto out;
10323 			buf += size;
10324 
10325 		} while (done < count);
10326 	}
10327 	ret = done;
10328 
10329 out:
10330 	kfree(kbuf);
10331 
10332 	return ret;
10333 }
10334 
10335 #ifdef CONFIG_TRACER_MAX_TRACE
10336 __init static bool tr_needs_alloc_snapshot(const char *name)
10337 {
10338 	char *test;
10339 	int len = strlen(name);
10340 	bool ret;
10341 
10342 	if (!boot_snapshot_index)
10343 		return false;
10344 
10345 	if (strncmp(name, boot_snapshot_info, len) == 0 &&
10346 	    boot_snapshot_info[len] == '\t')
10347 		return true;
10348 
10349 	test = kmalloc(strlen(name) + 3, GFP_KERNEL);
10350 	if (!test)
10351 		return false;
10352 
10353 	sprintf(test, "\t%s\t", name);
10354 	ret = strstr(boot_snapshot_info, test) == NULL;
10355 	kfree(test);
10356 	return ret;
10357 }
10358 
10359 __init static void do_allocate_snapshot(const char *name)
10360 {
10361 	if (!tr_needs_alloc_snapshot(name))
10362 		return;
10363 
10364 	/*
10365 	 * When allocate_snapshot is set, the next call to
10366 	 * allocate_trace_buffers() (called by trace_array_get_by_name())
10367 	 * will allocate the snapshot buffer. That will alse clear
10368 	 * this flag.
10369 	 */
10370 	allocate_snapshot = true;
10371 }
10372 #else
10373 static inline void do_allocate_snapshot(const char *name) { }
10374 #endif
10375 
10376 __init static void enable_instances(void)
10377 {
10378 	struct trace_array *tr;
10379 	char *curr_str;
10380 	char *str;
10381 	char *tok;
10382 
10383 	/* A tab is always appended */
10384 	boot_instance_info[boot_instance_index - 1] = '\0';
10385 	str = boot_instance_info;
10386 
10387 	while ((curr_str = strsep(&str, "\t"))) {
10388 
10389 		tok = strsep(&curr_str, ",");
10390 
10391 		if (IS_ENABLED(CONFIG_TRACER_MAX_TRACE))
10392 			do_allocate_snapshot(tok);
10393 
10394 		tr = trace_array_get_by_name(tok);
10395 		if (!tr) {
10396 			pr_warn("Failed to create instance buffer %s\n", curr_str);
10397 			continue;
10398 		}
10399 		/* Allow user space to delete it */
10400 		trace_array_put(tr);
10401 
10402 		while ((tok = strsep(&curr_str, ","))) {
10403 			early_enable_events(tr, tok, true);
10404 		}
10405 	}
10406 }
10407 
10408 __init static int tracer_alloc_buffers(void)
10409 {
10410 	int ring_buf_size;
10411 	int ret = -ENOMEM;
10412 
10413 
10414 	if (security_locked_down(LOCKDOWN_TRACEFS)) {
10415 		pr_warn("Tracing disabled due to lockdown\n");
10416 		return -EPERM;
10417 	}
10418 
10419 	/*
10420 	 * Make sure we don't accidentally add more trace options
10421 	 * than we have bits for.
10422 	 */
10423 	BUILD_BUG_ON(TRACE_ITER_LAST_BIT > TRACE_FLAGS_MAX_SIZE);
10424 
10425 	if (!alloc_cpumask_var(&tracing_buffer_mask, GFP_KERNEL))
10426 		goto out;
10427 
10428 	if (!alloc_cpumask_var(&global_trace.tracing_cpumask, GFP_KERNEL))
10429 		goto out_free_buffer_mask;
10430 
10431 	/* Only allocate trace_printk buffers if a trace_printk exists */
10432 	if (&__stop___trace_bprintk_fmt != &__start___trace_bprintk_fmt)
10433 		/* Must be called before global_trace.buffer is allocated */
10434 		trace_printk_init_buffers();
10435 
10436 	/* To save memory, keep the ring buffer size to its minimum */
10437 	if (global_trace.ring_buffer_expanded)
10438 		ring_buf_size = trace_buf_size;
10439 	else
10440 		ring_buf_size = 1;
10441 
10442 	cpumask_copy(tracing_buffer_mask, cpu_possible_mask);
10443 	cpumask_copy(global_trace.tracing_cpumask, cpu_all_mask);
10444 
10445 	raw_spin_lock_init(&global_trace.start_lock);
10446 
10447 	/*
10448 	 * The prepare callbacks allocates some memory for the ring buffer. We
10449 	 * don't free the buffer if the CPU goes down. If we were to free
10450 	 * the buffer, then the user would lose any trace that was in the
10451 	 * buffer. The memory will be removed once the "instance" is removed.
10452 	 */
10453 	ret = cpuhp_setup_state_multi(CPUHP_TRACE_RB_PREPARE,
10454 				      "trace/RB:prepare", trace_rb_cpu_prepare,
10455 				      NULL);
10456 	if (ret < 0)
10457 		goto out_free_cpumask;
10458 	/* Used for event triggers */
10459 	ret = -ENOMEM;
10460 	temp_buffer = ring_buffer_alloc(PAGE_SIZE, RB_FL_OVERWRITE);
10461 	if (!temp_buffer)
10462 		goto out_rm_hp_state;
10463 
10464 	if (trace_create_savedcmd() < 0)
10465 		goto out_free_temp_buffer;
10466 
10467 	if (!zalloc_cpumask_var(&global_trace.pipe_cpumask, GFP_KERNEL))
10468 		goto out_free_savedcmd;
10469 
10470 	/* TODO: make the number of buffers hot pluggable with CPUS */
10471 	if (allocate_trace_buffers(&global_trace, ring_buf_size) < 0) {
10472 		MEM_FAIL(1, "tracer: failed to allocate ring buffer!\n");
10473 		goto out_free_pipe_cpumask;
10474 	}
10475 	if (global_trace.buffer_disabled)
10476 		tracing_off();
10477 
10478 	if (trace_boot_clock) {
10479 		ret = tracing_set_clock(&global_trace, trace_boot_clock);
10480 		if (ret < 0)
10481 			pr_warn("Trace clock %s not defined, going back to default\n",
10482 				trace_boot_clock);
10483 	}
10484 
10485 	/*
10486 	 * register_tracer() might reference current_trace, so it
10487 	 * needs to be set before we register anything. This is
10488 	 * just a bootstrap of current_trace anyway.
10489 	 */
10490 	global_trace.current_trace = &nop_trace;
10491 
10492 	global_trace.max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
10493 
10494 	ftrace_init_global_array_ops(&global_trace);
10495 
10496 	init_trace_flags_index(&global_trace);
10497 
10498 	register_tracer(&nop_trace);
10499 
10500 	/* Function tracing may start here (via kernel command line) */
10501 	init_function_trace();
10502 
10503 	/* All seems OK, enable tracing */
10504 	tracing_disabled = 0;
10505 
10506 	atomic_notifier_chain_register(&panic_notifier_list,
10507 				       &trace_panic_notifier);
10508 
10509 	register_die_notifier(&trace_die_notifier);
10510 
10511 	global_trace.flags = TRACE_ARRAY_FL_GLOBAL;
10512 
10513 	INIT_LIST_HEAD(&global_trace.systems);
10514 	INIT_LIST_HEAD(&global_trace.events);
10515 	INIT_LIST_HEAD(&global_trace.hist_vars);
10516 	INIT_LIST_HEAD(&global_trace.err_log);
10517 	list_add(&global_trace.list, &ftrace_trace_arrays);
10518 
10519 	apply_trace_boot_options();
10520 
10521 	register_snapshot_cmd();
10522 
10523 	test_can_verify();
10524 
10525 	return 0;
10526 
10527 out_free_pipe_cpumask:
10528 	free_cpumask_var(global_trace.pipe_cpumask);
10529 out_free_savedcmd:
10530 	free_saved_cmdlines_buffer(savedcmd);
10531 out_free_temp_buffer:
10532 	ring_buffer_free(temp_buffer);
10533 out_rm_hp_state:
10534 	cpuhp_remove_multi_state(CPUHP_TRACE_RB_PREPARE);
10535 out_free_cpumask:
10536 	free_cpumask_var(global_trace.tracing_cpumask);
10537 out_free_buffer_mask:
10538 	free_cpumask_var(tracing_buffer_mask);
10539 out:
10540 	return ret;
10541 }
10542 
10543 void __init ftrace_boot_snapshot(void)
10544 {
10545 #ifdef CONFIG_TRACER_MAX_TRACE
10546 	struct trace_array *tr;
10547 
10548 	if (!snapshot_at_boot)
10549 		return;
10550 
10551 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
10552 		if (!tr->allocated_snapshot)
10553 			continue;
10554 
10555 		tracing_snapshot_instance(tr);
10556 		trace_array_puts(tr, "** Boot snapshot taken **\n");
10557 	}
10558 #endif
10559 }
10560 
10561 void __init early_trace_init(void)
10562 {
10563 	if (tracepoint_printk) {
10564 		tracepoint_print_iter =
10565 			kzalloc(sizeof(*tracepoint_print_iter), GFP_KERNEL);
10566 		if (MEM_FAIL(!tracepoint_print_iter,
10567 			     "Failed to allocate trace iterator\n"))
10568 			tracepoint_printk = 0;
10569 		else
10570 			static_key_enable(&tracepoint_printk_key.key);
10571 	}
10572 	tracer_alloc_buffers();
10573 
10574 	init_events();
10575 }
10576 
10577 void __init trace_init(void)
10578 {
10579 	trace_event_init();
10580 
10581 	if (boot_instance_index)
10582 		enable_instances();
10583 }
10584 
10585 __init static void clear_boot_tracer(void)
10586 {
10587 	/*
10588 	 * The default tracer at boot buffer is an init section.
10589 	 * This function is called in lateinit. If we did not
10590 	 * find the boot tracer, then clear it out, to prevent
10591 	 * later registration from accessing the buffer that is
10592 	 * about to be freed.
10593 	 */
10594 	if (!default_bootup_tracer)
10595 		return;
10596 
10597 	printk(KERN_INFO "ftrace bootup tracer '%s' not registered.\n",
10598 	       default_bootup_tracer);
10599 	default_bootup_tracer = NULL;
10600 }
10601 
10602 #ifdef CONFIG_HAVE_UNSTABLE_SCHED_CLOCK
10603 __init static void tracing_set_default_clock(void)
10604 {
10605 	/* sched_clock_stable() is determined in late_initcall */
10606 	if (!trace_boot_clock && !sched_clock_stable()) {
10607 		if (security_locked_down(LOCKDOWN_TRACEFS)) {
10608 			pr_warn("Can not set tracing clock due to lockdown\n");
10609 			return;
10610 		}
10611 
10612 		printk(KERN_WARNING
10613 		       "Unstable clock detected, switching default tracing clock to \"global\"\n"
10614 		       "If you want to keep using the local clock, then add:\n"
10615 		       "  \"trace_clock=local\"\n"
10616 		       "on the kernel command line\n");
10617 		tracing_set_clock(&global_trace, "global");
10618 	}
10619 }
10620 #else
10621 static inline void tracing_set_default_clock(void) { }
10622 #endif
10623 
10624 __init static int late_trace_init(void)
10625 {
10626 	if (tracepoint_printk && tracepoint_printk_stop_on_boot) {
10627 		static_key_disable(&tracepoint_printk_key.key);
10628 		tracepoint_printk = 0;
10629 	}
10630 
10631 	tracing_set_default_clock();
10632 	clear_boot_tracer();
10633 	return 0;
10634 }
10635 
10636 late_initcall_sync(late_trace_init);
10637